Skip to content

Commit

Permalink
erts: Stop overestimating external format size
Browse files Browse the repository at this point in the history
Debugging differences between the calculated and actual size is no
fun at all when certain kinds of data is overestimated, as a simple
`ASSERT(after_encode <= &before_encode[size])` check will often
succeed when it shouldn't (e.g. if part of one object is
overestimated while another part is underestimated).

This commit fixes the differences that I've noticed, and adds an
assertion that the encoded size should be exactly equal to the
calculated size so that no new differences can fly under the radar
from now on.
  • Loading branch information
jhogberg committed Nov 23, 2023
1 parent 1ba0526 commit 49024e8
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 20 deletions.
10 changes: 8 additions & 2 deletions erts/emulator/beam/erl_db_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -3384,8 +3384,14 @@ static void* copy_to_comp(int keypos, Eterm obj, DbTerm* dest,
tpl[i] = src[i];
}
else {
tpl[i] = ext2elem(tpl, top.cp);
top.cp = erts_encode_ext_ets(src[i], top.cp, &dest->first_oh);
#ifdef DEBUG
Uint encoded_size = erts_encode_ext_size_ets(src[i]);
byte *orig_cp = top.cp;
#endif
tpl[i] = ext2elem(tpl, top.cp);

top.cp = erts_encode_ext_ets(src[i], top.cp, &dest->first_oh);
ASSERT(top.cp == &orig_cp[encoded_size]);
}
}
}
Expand Down
84 changes: 66 additions & 18 deletions erts/emulator/beam/external.c
Original file line number Diff line number Diff line change
Expand Up @@ -5361,9 +5361,9 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj,
}

#define LIST_TAIL_OP ((0 << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER)
#define TERM_ARRAY_OP(N) (((N) << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER)
#define TERM_ARRAY_OP_DEC(OP) ((OP) - (1 << _TAG_PRIMARY_SIZE))

#define HASHMAP_NODE_OP ((1 << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER)
#define TERM_ARRAY_OP(N) (((N) << _HEADER_ARITY_OFFS) | TAG_PRIMARY_HEADER)
#define TERM_ARRAY_OP_DEC(OP) ((OP) - (1 << _HEADER_ARITY_OFFS))

for (;;) {
ASSERT(!is_header(obj));
Expand Down Expand Up @@ -5513,18 +5513,20 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj,
erts_exit(ERTS_ERROR_EXIT, "bad header\r\n");
}

ptr++;
WSTACK_RESERVE(s, node_sz*2);
while(node_sz--) {
if (is_list(*ptr)) {
WSTACK_FAST_PUSH(s, CAR(list_val(*ptr)));
WSTACK_FAST_PUSH(s, CDR(list_val(*ptr)));
ptr++;
WSTACK_RESERVE(s, node_sz * 2);
while(node_sz--) {
Eterm node = *ptr++;

if (is_list(node) || is_tuple(node)) {
WSTACK_FAST_PUSH(s, (UWord)node);
WSTACK_FAST_PUSH(s, (UWord)HASHMAP_NODE_OP);
} else {
WSTACK_FAST_PUSH(s, *ptr);
}
ptr++;
}
}
ASSERT(is_map(node));
WSTACK_FAST_PUSH(s, node);
}
}
}
break;
case FLOAT_DEF:
if (dflags & DFLAG_NEW_FLOATS) {
Expand Down Expand Up @@ -5625,15 +5627,24 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj,
ErlFunThing *funp = (ErlFunThing *) fun_val(obj);

if (is_local_fun(funp)) {
result += 20+1+1+4; /* New ID + Tag */
result += 4; /* Length field (number of free variables */
ErlFunEntry *fe = funp->entry.fun;

result += 1 /* tag */
+ 4 /* length field (size of free variables) */
+ 1 /* arity */
+ 16 /* uniq */
+ 4 /* index */
+ 4; /* free variables */
result += encode_atom_size(acmp, fe->module, dflags);
result += encode_small_size(acmp, make_small(fe->old_index), dflags);
result += encode_small_size(acmp, make_small(fe->old_uniq), dflags);
result += encode_pid_size(acmp, erts_init_process_id, dflags);
result += encode_atom_size(acmp, funp->entry.fun->module, dflags);
result += 2 * (1+4); /* Index, Uniq */

if (fun_num_free(funp) > 1) {
WSTACK_PUSH2(s, (UWord) (funp->env + 1),
(UWord) TERM_ARRAY_OP(fun_num_free(funp)-1));
}

if (fun_num_free(funp) != 0) {
obj = funp->env[0];
continue; /* big loop */
Expand Down Expand Up @@ -5672,6 +5683,43 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj,
obj = CAR(cons);
}
break;
case HASHMAP_NODE_OP: {
Eterm *cons;

obj = (Eterm)WSTACK_POP(s);

if (is_tuple(obj)) {
/* Collision node */
Eterm *node_terms;
Uint node_size;

node_terms = tuple_val(obj);
node_size = arityval(*node_terms);
ASSERT(node_size >= 2);

WSTACK_RESERVE(s, node_size * 2);
for (Uint i = 1; i < node_size; i++) {
ASSERT(is_list(node_terms[i]));
WSTACK_FAST_PUSH(s, (UWord)node_terms[i]);
WSTACK_FAST_PUSH(s, (UWord)HASHMAP_NODE_OP);
}

/* The last collision leaf must be handled below, or it
* will be wrongly treated as a normal cons cell by the
* main loop. */
obj = node_terms[node_size];
ASSERT(is_list(obj));
}

if (is_list(obj)) {
/* Leaf node */
cons = list_val(obj);

WSTACK_PUSH(s, CDR(cons));
obj = CAR(cons);
}
break;
}
case TERM_ARRAY_OP(1):
obj = *(Eterm*)WSTACK_POP(s);
break;
Expand Down

0 comments on commit 49024e8

Please sign in to comment.