Skip to content

Commit

Permalink
fix 64-bit issues in the garbage collection
Browse files Browse the repository at this point in the history
We discovered that if a single Erlang process tried to grow above 32
GB (i.e., more 64-bit words than can be counted by a 32-bit number),
the VM failed to find the next larger heap size, even though there
were plenty more heap sizes left to pick from and even though we had a
lot more memory available on the machine. (Obviously, this is only
applicable on 64-bit Erlang.)

It turned out to be due to some 'int' variables in the heap resizing
parts of erl_gc.c not being properly updated to 'Uint' or 'Sint'. Once
that was fixed, I got segfaults instead as soon as the heap got larger
than 2^32 words, due to even more 'int' declarations in the same file,
but now in the GC code.

After fixing this as well, I successfully ran an Erlang node in which
a single Erlang process had a heap so large that I'm not at liberty to
divulge the exact size, but I think the scientific term is
"humongous", and I'm confident that there are no further immediate
problems with very very large individual process heaps.
  • Loading branch information
Richard Carlsson authored and bjorng committed Aug 15, 2011
1 parent 4a5a758 commit caad36a
Showing 1 changed file with 24 additions and 16 deletions.
40 changes: 24 additions & 16 deletions erts/emulator/beam/erl_gc.c
Expand Up @@ -100,14 +100,14 @@ static Uint combined_message_size(Process* p);
static void remove_message_buffers(Process* p);
static int major_collection(Process* p, int need, Eterm* objv, int nobj, Uint *recl);
static int minor_collection(Process* p, int need, Eterm* objv, int nobj, Uint *recl);
static void do_minor(Process *p, int new_sz, Eterm* objv, int nobj);
static void do_minor(Process *p, Uint new_sz, Eterm* objv, int nobj);
static Eterm* sweep_rootset(Rootset *rootset, Eterm* htop, char* src, Uint src_size);
static Eterm* sweep_one_area(Eterm* n_hp, Eterm* n_htop, char* src, Uint src_size);
static Eterm* sweep_one_heap(Eterm* heap_ptr, Eterm* heap_end, Eterm* htop,
char* src, Uint src_size);
static Eterm* collect_heap_frags(Process* p, Eterm* heap,
Eterm* htop, Eterm* objv, int nobj);
static Uint adjust_after_fullsweep(Process *p, int size_before,
static Uint adjust_after_fullsweep(Process *p, Uint size_before,
int need, Eterm *objv, int nobj);
static void shrink_new_heap(Process *p, Uint new_sz, Eterm *objv, int nobj);
static void grow_new_heap(Process *p, Uint new_sz, Eterm* objv, int nobj);
Expand Down Expand Up @@ -441,7 +441,15 @@ erts_garbage_collect(Process* p, int need, Eterm* objv, int nobj)
p->last_old_htop = p->old_htop;
#endif

return ((int) (HEAP_TOP(p) - HEAP_START(p)) / 10);
/* FIXME: This function should really return an Sint, i.e., a possibly
64 bit wide signed integer, but that requires updating all the code
that calls it. For now, we just return INT_MAX if the result is too
large for an int. */
{
Sint result = (HEAP_TOP(p) - HEAP_START(p)) / 10;
if (result >= INT_MAX) return INT_MAX;
else return (int) result;
}
}

/*
Expand Down Expand Up @@ -599,7 +607,7 @@ erts_garbage_collect_literals(Process* p, Eterm* literals, Uint lit_size)
char* area;
Uint area_size;
Eterm* old_htop;
int n;
Uint n;

/*
* Set GC state.
Expand Down Expand Up @@ -731,7 +739,7 @@ minor_collection(Process* p, int need, Eterm* objv, int nobj, Uint *recl)
* This improved Estone by more than 1200 estones on my computer
* (Ultra Sparc 10).
*/
size_t new_sz = erts_next_heap_size(HEAP_TOP(p) - HEAP_START(p), 1);
Uint new_sz = erts_next_heap_size(HEAP_TOP(p) - HEAP_START(p), 1);

/* Create new, empty old_heap */
n_old = (Eterm *) ERTS_HEAP_ALLOC(ERTS_ALC_T_OLD_HEAP,
Expand Down Expand Up @@ -871,12 +879,12 @@ minor_collection(Process* p, int need, Eterm* objv, int nobj, Uint *recl)
#endif /* HIPE */

static void
do_minor(Process *p, int new_sz, Eterm* objv, int nobj)
do_minor(Process *p, Uint new_sz, Eterm* objv, int nobj)
{
Rootset rootset; /* Rootset for GC (stack, dictionary, etc). */
Roots* roots;
Eterm* n_htop;
int n;
Uint n;
Eterm* ptr;
Eterm val;
Eterm gval;
Expand Down Expand Up @@ -1079,14 +1087,14 @@ major_collection(Process* p, int need, Eterm* objv, int nobj, Uint *recl)
{
Rootset rootset;
Roots* roots;
int size_before;
Uint size_before;
Eterm* n_heap;
Eterm* n_htop;
char* src = (char *) HEAP_START(p);
Uint src_size = (char *) HEAP_TOP(p) - src;
char* oh = (char *) OLD_HEAP(p);
Uint oh_size = (char *) OLD_HTOP(p) - oh;
int n;
Uint n;
Uint new_sz;
Uint fragments = MBUF_SIZE(p) + combined_message_size(p);
ErlMessage *msgp;
Expand Down Expand Up @@ -1312,10 +1320,10 @@ major_collection(Process* p, int need, Eterm* objv, int nobj, Uint *recl)
}

static Uint
adjust_after_fullsweep(Process *p, int size_before, int need, Eterm *objv, int nobj)
adjust_after_fullsweep(Process *p, Uint size_before, int need, Eterm *objv, int nobj)
{
int wanted, sz, size_after, need_after;
int stack_size = STACK_SZ_ON_HEAP(p);
Uint wanted, sz, size_after, need_after;
Uint stack_size = STACK_SZ_ON_HEAP(p);
Uint reclaimed_now;

size_after = (HEAP_TOP(p) - HEAP_START(p));
Expand Down Expand Up @@ -1915,8 +1923,8 @@ static void
grow_new_heap(Process *p, Uint new_sz, Eterm* objv, int nobj)
{
Eterm* new_heap;
int heap_size = HEAP_TOP(p) - HEAP_START(p);
int stack_size = p->hend - p->stop;
Uint heap_size = HEAP_TOP(p) - HEAP_START(p);
Uint stack_size = p->hend - p->stop;
Sint offs;

ASSERT(HEAP_SIZE(p) < new_sz);
Expand Down Expand Up @@ -1954,10 +1962,10 @@ static void
shrink_new_heap(Process *p, Uint new_sz, Eterm *objv, int nobj)
{
Eterm* new_heap;
int heap_size = HEAP_TOP(p) - HEAP_START(p);
Uint heap_size = HEAP_TOP(p) - HEAP_START(p);
Sint offs;

int stack_size = p->hend - p->stop;
Uint stack_size = p->hend - p->stop;

ASSERT(new_sz < p->heap_sz);
sys_memmove(p->heap + new_sz - stack_size, p->stop, stack_size *
Expand Down

0 comments on commit caad36a

Please sign in to comment.