Skip to content

Commit

Permalink
NaN packing for TValues
Browse files Browse the repository at this point in the history
From the Lua Power Patches page:

"Use NaN packing for TValue on x86 to reduce memory usage
and tiny performance gain (same as in LuaJIT i2).
It's fully ABI compatible with standard Lua libraries.
On one test script memory consumption reduced from 28Mb
to 21Mb and performance improved about 3.5-5%"

Added support for big endian architectures and LTR compatibility.
  • Loading branch information
funny-falcon authored and bogdanm committed Apr 18, 2012
1 parent c8b89a4 commit 1a5b04e
Show file tree
Hide file tree
Showing 7 changed files with 205 additions and 31 deletions.
29 changes: 17 additions & 12 deletions SConstruct
Expand Up @@ -92,17 +92,17 @@ toolchain_list['devkitarm'] = toolchain_list['arm-eabi-gcc']
# The first toolchain in the toolchains list is the default one
# (the one that will be used if none is specified)
platform_list = {
'at91sam7x' : { 'cpus' : [ 'AT91SAM7X256', 'AT91SAM7X512' ], 'toolchains' : [ 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' ] },
'lm3s' : { 'cpus' : [ 'LM3S1968', 'LM3S8962', 'LM3S6965', 'LM3S6918', 'LM3S9B92', 'LM3S9D92' ], 'toolchains' : [ 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' ] },
'str9' : { 'cpus' : [ 'STR912FAW44' ], 'toolchains' : [ 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' ] },
'i386' : { 'cpus' : [ 'I386' ], 'toolchains' : [ 'i686-gcc' ] },
'sim' : { 'cpus' : [ 'LINUX' ], 'toolchains' : [ 'i686-gcc' ] },
'lpc288x' : { 'cpus' : [ 'LPC2888' ], 'toolchains' : [ 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' ] },
'str7' : { 'cpus' : [ 'STR711FR2' ], 'toolchains' : [ 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' ] },
'stm32' : { 'cpus' : [ 'STM32F103ZE', 'STM32F103RE' ], 'toolchains' : [ 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' ] },
'avr32' : { 'cpus' : [ 'AT32UC3A0512', 'AT32UC3A0256', 'AT32UC3A0128', 'AT32UC3B0256' ], 'toolchains' : [ 'avr32-gcc', 'avr32-unknown-none-gcc' ] },
'lpc24xx' : { 'cpus' : [ 'LPC2468' ], 'toolchains' : [ 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' ] },
'lpc17xx' : { 'cpus' : [ 'LPC1768' ], 'toolchains' : [ 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' ] }
'at91sam7x' : { 'cpus' : [ 'AT91SAM7X256', 'AT91SAM7X512' ], 'toolchains' : [ 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' ], 'big_endian': False },
'lm3s' : { 'cpus' : [ 'LM3S1968', 'LM3S8962', 'LM3S6965', 'LM3S6918', 'LM3S9B92', 'LM3S9D92' ], 'toolchains' : [ 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' ], 'big_endian': False },
'str9' : { 'cpus' : [ 'STR912FAW44' ], 'toolchains' : [ 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' ], 'big_endian': False },
'i386' : { 'cpus' : [ 'I386' ], 'toolchains' : [ 'i686-gcc' ], 'big_endian': False },
'sim' : { 'cpus' : [ 'LINUX' ], 'toolchains' : [ 'i686-gcc' ], 'big_endian': False },
'lpc288x' : { 'cpus' : [ 'LPC2888' ], 'toolchains' : [ 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' ], 'big_endian': False },
'str7' : { 'cpus' : [ 'STR711FR2' ], 'toolchains' : [ 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' ], 'big_endian': False },
'stm32' : { 'cpus' : [ 'STM32F103ZE', 'STM32F103RE' ], 'toolchains' : [ 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' ], 'big_endian': False },
'avr32' : { 'cpus' : [ 'AT32UC3A0512', 'AT32UC3A0256', 'AT32UC3A0128', 'AT32UC3B0256' ], 'toolchains' : [ 'avr32-gcc', 'avr32-unknown-none-gcc' ], 'big_endian': True },
'lpc24xx' : { 'cpus' : [ 'LPC2468' ], 'toolchains' : [ 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' ], 'big_endian': False },
'lpc17xx' : { 'cpus' : [ 'LPC1768' ], 'toolchains' : [ 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' ], 'big_endian': False }
}

# List of board/CPU combinations
Expand Down Expand Up @@ -372,7 +372,12 @@ if not GetOption( 'help' ):
conf.env.Append(CPPDEFINES = ['LUA_NUMBER_INTEGRAL'])
if comp['target'] == 'lualonglong':
conf.env.Append(CPPDEFINES = ['LUA_INTEGRAL_LONGLONG'])

if comp['target'] != 'lualong' and comp['target'] != 'lualonglong':
conf.env.Append(CPPDEFINES = ['LUA_PACK_VALUE'])
if platform_list[platform]['big_endian']:
conf.env.Append(CPPDEFINES = ['ELUA_ENDIAN_BIG'])
else:
conf.env.Append(CPPDEFINES = ['ELUA_ENDIAN_LITTLE'])
conf.env.Append(CPPPATH = ['src/modules', 'src/platform/%s' % platform])
conf.env.Append(CPPDEFINES = {"LUA_OPTIMIZE_MEMORY" : ( comp['optram'] != 0 and 2 or 0 ) } )

Expand Down
24 changes: 13 additions & 11 deletions build_elua.lua
Expand Up @@ -156,17 +156,17 @@ toolchain_list[ 'devkitarm' ] = toolchain_list[ 'arm-eabi-gcc' ]
-- (the one that will be used if none is specified)
local platform_list =
{
at91sam7x = { cpus = { 'AT91SAM7X256', 'AT91SAM7X512' }, toolchains = { 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' } },
lm3s = { cpus = { 'LM3S1968', 'LM3S8962', 'LM3S6965', 'LM3S6918', 'LM3S9B92', 'LM3S9D92' }, toolchains = { 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' } },
str9 = { cpus = { 'STR912FAW44' }, toolchains = { 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' } },
i386 = { cpus = { 'I386' }, toolchains = { 'i686-gcc' } },
sim = { cpus = { 'LINUX' }, toolchains = { 'i686-gcc' } },
lpc288x = { cpus = { 'LPC2888' }, toolchains = { 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' } },
str7 = { cpus = { 'STR711FR2' }, toolchains = { 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' } },
stm32 = { cpus = { 'STM32F103ZE', 'STM32F103RE' }, toolchains = { 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' } },
avr32 = { cpus = { 'AT32UC3A0128', 'AT32UC3A0256', 'AT32UC3A0512', 'AT32UC3B0256' }, toolchains = { 'avr32-gcc', 'avr32-unknown-none-gcc' } },
lpc24xx = { cpus = { 'LPC2468' }, toolchains = { 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' } },
lpc17xx = { cpus = { 'LPC1768' }, toolchains = { 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' } }
at91sam7x = { cpus = { 'AT91SAM7X256', 'AT91SAM7X512' }, toolchains = { 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' }, big_endian = false },
lm3s = { cpus = { 'LM3S1968', 'LM3S8962', 'LM3S6965', 'LM3S6918', 'LM3S9B92', 'LM3S9D92' }, toolchains = { 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' }, big_endian = false },
str9 = { cpus = { 'STR912FAW44' }, toolchains = { 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' }, big_endian = false },
i386 = { cpus = { 'I386' }, toolchains = { 'i686-gcc' }, big_endian = false },
sim = { cpus = { 'LINUX' }, toolchains = { 'i686-gcc' }, big_endian = false },
lpc288x = { cpus = { 'LPC2888' }, toolchains = { 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' }, big_endian = false },
str7 = { cpus = { 'STR711FR2' }, toolchains = { 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' }, big_endian = false },
stm32 = { cpus = { 'STM32F103ZE', 'STM32F103RE' }, toolchains = { 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' }, big_endian = false },
avr32 = { cpus = { 'AT32UC3A0128', 'AT32UC3A0256', 'AT32UC3A0512', 'AT32UC3B0256' }, toolchains = { 'avr32-gcc', 'avr32-unknown-none-gcc' }, big_endian = true },
lpc24xx = { cpus = { 'LPC2468' }, toolchains = { 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' }, big_endian = false },
lpc17xx = { cpus = { 'LPC1768' }, toolchains = { 'arm-gcc', 'codesourcery', 'devkitarm', 'arm-eabi-gcc' }, big_endian = false }
}
-- List of board/CPU combinations
Expand Down Expand Up @@ -387,6 +387,8 @@ end
if comp.boot == 'luarpc' then addm( "ELUA_BOOT_RPC" ) end
if comp.target == 'lualong' or comp.target == 'lualonglong' then addm( "LUA_NUMBER_INTEGRAL" ) end
if comp.target == 'lualonglong' then addm( "LUA_INTEGRAL_LONGLONG" ) end
if comp.target ~= 'lualong' and comp.target ~= "lualonglong" then addm( "LUA_PACK_VALUE" ) end
if platform_list[ platform ].big_endian then addm( "ELUA_ENDIAN_BIG" ) else addm( "ELUA_ENDIAN_LITTLE" ) end
-- Special macro definitions for the SYM target
if platform == 'sim' then addm( { "ELUA_SIMULATOR", "ELUA_SIM_" .. cnorm( comp.cpu ) } ) end
Expand Down
2 changes: 1 addition & 1 deletion src/lua/lobject.c
Expand Up @@ -24,7 +24,7 @@



const TValue luaO_nilobject_ = {{NULL}, LUA_TNIL};
const TValue luaO_nilobject_ = {LUA_TVALUE_NIL};


/*
Expand Down
158 changes: 156 additions & 2 deletions src/lua/lobject.h
Expand Up @@ -56,25 +56,80 @@ typedef struct GCheader {
/*
** Union of all Lua values
*/
#if defined( LUA_PACK_VALUE ) && defined( ELUA_ENDIAN_BIG )
typedef union {
struct {
int _pad0;
GCObject *gc;
};
struct {
int _pad1;
void *p;
};
lua_Number n;
struct {
int _pad2;
int b;
};
} Value;
#else // #if defined( LUA_PACK_VALUE ) && defined( ELUA_ENDIAN_BIG )
typedef union {
GCObject *gc;
void *p;
lua_Number n;
int b;
} Value;

#endif // #if defined( LUA_PACK_VALUE ) && defined( ELUA_ENDIAN_BIG )

/*
** Tagged Values
*/

#ifndef LUA_PACK_VALUE
#define TValuefields Value value; int tt
#define LUA_TVALUE_NIL {NULL}, LUA_TNIL

typedef struct lua_TValue {
TValuefields;
} TValue;
#else // #ifndef LUA_PACK_VALUE
#ifdef ELUA_ENDIAN_LITTLE
#define TValuefields union { \
struct { \
int _pad0; \
int tt_sig; \
} _ts; \
struct { \
int _pad; \
short tt; \
short sig; \
} _t; \
Value value; \
}
#define LUA_TVALUE_NIL {0, add_sig(LUA_TNIL)}
#else // #ifdef ELUA_ENDIAN_LITTLE
#define TValuefields union { \
struct { \
int tt_sig; \
int _pad0; \
} _ts; \
struct { \
short sig; \
short tt; \
int _pad; \
} _t; \
Value value; \
}
#define LUA_TVALUE_NIL {add_sig(LUA_TNIL), 0}
#endif // #ifdef ELUA_ENDIAN_LITTLE
#define LUA_NOTNUMBER_SIG (-1)
#define add_sig(tt) ( 0xffff0000 | (tt) )

typedef TValuefields TValue;
#endif // #ifndef LUA_PACK_VALUE

/* Macros to test type */
#ifndef LUA_PACK_VALUE
#define ttisnil(o) (ttype(o) == LUA_TNIL)
#define ttisnumber(o) (ttype(o) == LUA_TNUMBER)
#define ttisstring(o) (ttype(o) == LUA_TSTRING)
Expand All @@ -86,9 +141,27 @@ typedef struct lua_TValue {
#define ttislightuserdata(o) (ttype(o) == LUA_TLIGHTUSERDATA)
#define ttisrotable(o) (ttype(o) == LUA_TROTABLE)
#define ttislightfunction(o) (ttype(o) == LUA_TLIGHTFUNCTION)
#else // #ifndef LUA_PACK_VALUE
#define ttisnil(o) (ttype_sig(o) == add_sig(LUA_TNIL))
#define ttisnumber(o) ((o)->_t.sig != LUA_NOTNUMBER_SIG)
#define ttisstring(o) (ttype_sig(o) == add_sig(LUA_TSTRING))
#define ttistable(o) (ttype_sig(o) == add_sig(LUA_TTABLE))
#define ttisfunction(o) (ttype_sig(o) == add_sig(LUA_TFUNCTION))
#define ttisboolean(o) (ttype_sig(o) == add_sig(LUA_TBOOLEAN))
#define ttisuserdata(o) (ttype_sig(o) == add_sig(LUA_TUSERDATA))
#define ttisthread(o) (ttype_sig(o) == add_sig(LUA_TTHREAD))
#define ttislightuserdata(o) (ttype_sig(o) == add_sig(LUA_TLIGHTUSERDATA))
#define ttisrotable(o) (ttype_sig(o) == add_sig(LUA_TROTABLE))
#define ttislightfunction(o) (ttype_sig(o) == add_sig(LUA_TLIGHTFUNCTION))
#endif // #ifndef LUA_PACK_VALUE

/* Macros to access values */
#ifndef LUA_PACK_VALUE
#define ttype(o) ((o)->tt)
#else // #ifndef LUA_PACK_VALUE
#define ttype(o) ((o)->_t.sig == LUA_NOTNUMBER_SIG ? (o)->_t.tt : LUA_TNUMBER)
#define ttype_sig(o) ((o)->_ts.tt_sig)
#endif // #ifndef LUA_PACK_VALUE
#define gcvalue(o) check_exp(iscollectable(o), (o)->value.gc)
#define pvalue(o) check_exp(ttislightuserdata(o), (o)->value.p)
#define rvalue(o) check_exp(ttisrotable(o), (o)->value.p)
Expand All @@ -108,15 +181,24 @@ typedef struct lua_TValue {
/*
** for internal debug only
*/
#ifndef LUA_PACK_VALUE
#define checkconsistency(obj) \
lua_assert(!iscollectable(obj) || (ttype(obj) == (obj)->value.gc->gch.tt))

#define checkliveness(g,obj) \
lua_assert(!iscollectable(obj) || \
((ttype(obj) == (obj)->value.gc->gch.tt) && !isdead(g, (obj)->value.gc)))
#else // #ifndef LUA_PACK_VALUE
#define checkconsistency(obj) \
lua_assert(!iscollectable(obj) || (ttype(obj) == (obj)->value.gc->gch._t.tt))

#define checkliveness(g,obj) \
lua_assert(!iscollectable(obj) || \
((ttype(obj) == (obj)->value.gc->gch._t.tt) && !isdead(g, (obj)->value.gc)))
#endif // #ifndef LUA_PACK_VALUE

/* Macros to set values */
#ifndef LUA_PACK_VALUE
#define setnilvalue(obj) ((obj)->tt=LUA_TNIL)

#define setnvalue(obj,x) \
Expand Down Expand Up @@ -177,8 +259,63 @@ typedef struct lua_TValue {
{ const TValue *o2=(obj2); TValue *o1=(obj1); \
o1->value = o2->value; o1->tt=o2->tt; \
checkliveness(G(L),o1); }
#else // #ifndef LUA_PACK_VALUE
#define setnilvalue(obj) ( ttype_sig(obj) = add_sig(LUA_TNIL) )

#define setnvalue(obj,x) \
{ TValue *i_o=(obj); i_o->value.n=(x); }

#define setpvalue(obj,x) \
{ TValue *i_o=(obj); i_o->value.p=(x); i_o->_ts.tt_sig=add_sig(LUA_TLIGHTUSERDATA);}

#define setrvalue(obj,x) \
{ TValue *i_o=(obj); i_o->value.p=(x); i_o->_ts.tt_sig=add_sig(LUA_TROTABLE);}

#define setfvalue(obj,x) \
{ TValue *i_o=(obj); i_o->value.p=(x); i_o->_ts.tt_sig=add_sig(LUA_TLIGHTFUNCTION);}

#define setbvalue(obj,x) \
{ TValue *i_o=(obj); i_o->value.b=(x); i_o->_ts.tt_sig=add_sig(LUA_TBOOLEAN);}

#define setsvalue(L,obj,x) \
{ TValue *i_o=(obj); \
i_o->value.gc=cast(GCObject *, (x)); i_o->_ts.tt_sig=add_sig(LUA_TSTRING); \
checkliveness(G(L),i_o); }

#define setuvalue(L,obj,x) \
{ TValue *i_o=(obj); \
i_o->value.gc=cast(GCObject *, (x)); i_o->_ts.tt_sig=add_sig(LUA_TUSERDATA); \
checkliveness(G(L),i_o); }

#define setthvalue(L,obj,x) \
{ TValue *i_o=(obj); \
i_o->value.gc=cast(GCObject *, (x)); i_o->_ts.tt_sig=add_sig(LUA_TTHREAD); \
checkliveness(G(L),i_o); }

#define setclvalue(L,obj,x) \
{ TValue *i_o=(obj); \
i_o->value.gc=cast(GCObject *, (x)); i_o->_ts.tt_sig=add_sig(LUA_TFUNCTION); \
checkliveness(G(L),i_o); }

#define sethvalue(L,obj,x) \
{ TValue *i_o=(obj); \
i_o->value.gc=cast(GCObject *, (x)); i_o->_ts.tt_sig=add_sig(LUA_TTABLE); \
checkliveness(G(L),i_o); }

#define setptvalue(L,obj,x) \
{ TValue *i_o=(obj); \
i_o->value.gc=cast(GCObject *, (x)); i_o->_ts.tt_sig=add_sig(LUA_TPROTO); \
checkliveness(G(L),i_o); }




#define setobj(L,obj1,obj2) \
{ const TValue *o2=(obj2); TValue *o1=(obj1); \
o1->value = o2->value; \
checkliveness(G(L),o1); }
#endif // #ifndef LUA_PACK_VALUE

/*
** different types of sets, according to destination
*/
Expand All @@ -198,8 +335,13 @@ typedef struct lua_TValue {
#define setobj2n setobj
#define setsvalue2n setsvalue

#ifndef LUA_PACK_VALUE
#define setttype(obj, tt) (ttype(obj) = (tt))

#else // #ifndef LUA_PACK_VALUE
/* considering it used only in lgc to set LUA_TDEADKEY */
/* we could define it this way */
#define setttype(obj, _tt) ( ttype_sig(obj) = add_sig(_tt) )
#endif // #ifndef LUA_PACK_VALUE

#define iscollectable(o) (ttype(o) >= LUA_TSTRING)

Expand Down Expand Up @@ -335,6 +477,7 @@ typedef union Closure {
** Tables
*/

#ifndef LUA_PACK_VALUE
typedef union TKey {
struct {
TValuefields;
Expand All @@ -343,6 +486,17 @@ typedef union TKey {
TValue tvk;
} TKey;

#define LUA_TKEY_NIL {LUA_TVALUE_NIL, NULL}
#else // #ifndef LUA_PACK_VALUE
typedef struct TKey {
TValue tvk;
struct {
struct Node *next; /* for chaining */
} nk;
} TKey;

#define LUA_TKEY_NIL {LUA_TVALUE_NIL}, {NULL}
#endif // #ifndef LUA_PACK_VALUE

typedef struct Node {
TValue i_val;
Expand Down
15 changes: 14 additions & 1 deletion src/lua/lrotable.h
Expand Up @@ -9,10 +9,23 @@
#include "luaconf.h"

/* Macros one can use to define rotable entries */
#ifndef LUA_PACK_VALUE
#define LRO_FUNCVAL(v) {{.p = v}, LUA_TLIGHTFUNCTION}
#define LRO_NUMVAL(v) {{.n = v}, LUA_TNUMBER}
#define LRO_ROVAL(v) {{.p = ( void* )v}, LUA_TROTABLE}
#define LRO_ROVAL(v) {{.p = (void*)v}, LUA_TROTABLE}
#define LRO_NILVAL {{.p = NULL}, LUA_TNIL}
#else // #ifndef LUA_PACK_VALUE
#define LRO_NUMVAL(v) {.value.n = v}
#ifdef ELUA_ENDIAN_LITTLE
#define LRO_FUNCVAL(v) {{(int)v, add_sig(LUA_TLIGHTFUNCTION)}}
#define LRO_ROVAL(v) {{(int)v, add_sig(LUA_TROTABLE)}}
#define LRO_NILVAL {{0, add_sig(LUA_TNIL)}}
#else // #ifdef ELUA_ENDIAN_LITTLE
#define LRO_FUNCVAL(v) {{add_sig(LUA_TLIGHTFUNCTION), (int)v}}
#define LRO_ROVAL(v) {{add_sig(LUA_TROTABLE), (int)v}}
#define LRO_NILVAL {{add_sig(LUA_TNIL), 0}}
#endif // #ifdef ELUA_ENDIAN_LITTLE
#endif // #ifndef LUA_PACK_VALUE

#define LRO_STRKEY(k) {LUA_TSTRING, {.strkey = k}}
#define LRO_NUMKEY(k) {LUA_TNUMBER, {.numkey = k}}
Expand Down
6 changes: 3 additions & 3 deletions src/lua/ltable.c
Expand Up @@ -73,8 +73,8 @@
#define dummynode (&dummynode_)

static const Node dummynode_ = {
{{NULL}, LUA_TNIL}, /* value */
{{{NULL}, LUA_TNIL, NULL}} /* key */
{LUA_TVALUE_NIL}, /* value */
{LUA_TKEY_NIL} /* key */
};


Expand Down Expand Up @@ -550,7 +550,7 @@ static TValue *newkey (lua_State *L, Table *t, const TValue *key) {
mp = n;
}
}
gkey(mp)->value = key->value; gkey(mp)->tt = key->tt;
setobj2t(L, gkey(mp), key);
luaC_barriert(L, t, key);
lua_assert(ttisnil(gval(mp)));
return gval(mp);
Expand Down
2 changes: 1 addition & 1 deletion src/lua/ltable.h
Expand Up @@ -11,7 +11,7 @@


#define gnode(t,i) (&(t)->node[i])
#define gkey(n) (&(n)->i_key.nk)
#define gkey(n) (&(n)->i_key.tvk)
#define gval(n) (&(n)->i_val)
#define gnext(n) ((n)->i_key.nk.next)

Expand Down

8 comments on commit 1a5b04e

@jsnyder
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! I'll have to do some testing with this one. Any initial observations?

@bogdanm
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tested on the simulator, on a LM3S and on an AVR32 with a couple of scripts (life and hangman). Seems to work fine. Not sure how much it actually helps, but it can't hurt.

@jsnyder
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems to make a difference on the low memory system I have here:

Running sieve.lua like this now works up to 1024 on a 32 kB platform where previously only up to 512 worked:

function main(num)
    local flags = {}
    count = 0
    for i=2,num do
        flags[i] = 1
    end
    for i=2,num do

        if flags[i] == 1 then
            k = 0
            for k=i+i, num, i do
                flags[k] = 0
            end
            count = count + 1    
        end
    end
end

NUM = tonumber((arg and arg[1])) or 256
count = 0
main(NUM)
io.write("Count: ", count, ", ", NUM, "\n")

@bogdanm
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow! That's quite something. I'm glad you can use this.

@jsnyder
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

memory usage with new patch

Here's a comparison with and without the patch during the filling stage of this code:

function main(num)
    local flags = {}
    count = 0
    io.write("Filling\n")
    for i=2,num do
        flags[i] = 1
        if (i % 10) == 0 then
          collectgarbage("collect")
          heap,inuse = elua.heapstats()
          io.write(string.format("%2.1f, %d, %d\n",collectgarbage("count"), inuse, heap))
        end
    end
    io.write("Processing\n")
    for i=2,num do

        if flags[i] == 1 then
            k = 0
            for k=i+i, num, i do
                flags[k] = 0
            end
            count = count + 1    
        end
        if (i % 10) == 0 then
          collectgarbage("collect")
          heap,inuse = elua.heapstats()
          io.write(string.format("%2.1f, %d, %d\n",collectgarbage("count"), inuse, heap))
        end
    end
end

NUM = tonumber((arg and arg[1])) or 256
count = 0
main(NUM)
io.write("Count: ", count, ", ", NUM, "\n")

@jsnyder
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

life.lua also runs at 16x16 in 22.1 kB (as reported on the bottom line while running)

@funny-falcon
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could also check, how this patch landed at Lua 5.2 (in a bit more elegant way)

@bogdanm
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I found this one good (and elegant) enough for my needs, thus I didn't even consider checking the Lua 5.2 way. I'll do that once I cleared some of the rest of the stuff I need to do.

Please sign in to comment.