Skip to content

Commit

Permalink
erts: Refactor bitstring (binary) handling
Browse files Browse the repository at this point in the history
By reducing the difference between match states and sub-binaries,
this commit sets the stage for massive improvements in the bit
syntax implementation, where we plan to allow returning matched
tails from functions without any loss of performance relative to
continuation-passing-style.

This commit also simplifies the handling of off-heap Binary
objects. ProcBin (now called BinRef) is no longer exposed
directly as a term, with off-heap bitstrings instead being
represented by an ErlSubBits that references the BinRef. While
this results in slightly more on-heap usage, it reduces complexity
and makes it easy to determine which regions in a binary a process
refers to during a GC, giving us the opportunity to shed references
or shrink them to fit.
  • Loading branch information
jhogberg committed Nov 23, 2023
1 parent 49024e8 commit 24ef4cb
Show file tree
Hide file tree
Showing 73 changed files with 7,029 additions and 8,108 deletions.
4 changes: 3 additions & 1 deletion erts/emulator/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -1120,7 +1120,9 @@ RUN_OBJS += \
$(OBJDIR)/beam_file.o \
$(OBJDIR)/beam_types.o \
$(OBJDIR)/erl_term_hashing.o \
$(OBJDIR)/erl_bif_coverage.o
$(OBJDIR)/erl_bif_coverage.o \
$(OBJDIR)/erl_iolist.o \
$(OBJDIR)/erl_etp.o

LTTNG_OBJS = $(OBJDIR)/erlang_lttng.o

Expand Down
3 changes: 2 additions & 1 deletion erts/emulator/beam/atom.names
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ atom is_constant
atom is_seq_trace
atom iterator
atom io
atom iolist_size_continue
atom iodata
atom iovec
atom keypos
Expand All @@ -404,7 +405,7 @@ atom line_length
atom linked_in_driver
atom links
atom list
atom list_to_binary_continue
atom list_to_bitstring_continue
atom little
atom loaded
atom load_cancelled
Expand Down
83 changes: 33 additions & 50 deletions erts/emulator/beam/beam_bif_load.c
Original file line number Diff line number Diff line change
Expand Up @@ -165,45 +165,29 @@ erts_internal_beamfile_chunk_2(BIF_ALIST_2)
IFF_Chunk chunk;
IFF_File iff;

byte* temp_alloc;
byte* start;
Eterm Bin;
const byte *temp_alloc = NULL, *start;
Eterm binary;
Uint size;
Eterm res;

res = am_undefined;
temp_alloc = NULL;
Bin = BIF_ARG_1;
binary = BIF_ARG_1;

if (!read_iff_list(BIF_ARG_2, &search_iff)) {
BIF_ERROR(BIF_P, BADARG);
}

if ((start = erts_get_aligned_binary_bytes(Bin, &temp_alloc)) == NULL) {
start = erts_get_aligned_binary_bytes(binary, &size, &temp_alloc);
if (start == NULL) {
BIF_ERROR(BIF_P, BADARG);
}

if (iff_init(start, binary_size(Bin), &iff)) {
if (iff_init(start, size, &iff)) {
if (iff_read_chunk(&iff, search_iff, &chunk) && chunk.size > 0) {
Sint offset, bitoffs, bitsize;
Eterm real_bin;

ERTS_GET_REAL_BIN(Bin, real_bin, offset, bitoffs, bitsize);

if (bitoffs) {
res = new_binary(BIF_P, (byte*)chunk.data, chunk.size);
} else {
ErlSubBin *sb = (ErlSubBin*)HAlloc(BIF_P, ERL_SUB_BIN_SIZE);

sb->thing_word = HEADER_SUB_BIN;
sb->orig = real_bin;
sb->size = chunk.size;
sb->bitsize = 0;
sb->bitoffs = 0;
sb->offs = offset + (chunk.data - start);
sb->is_writable = 0;

res = make_binary(sb);
}
res = erts_make_sub_binary(BIF_P,
binary,
(chunk.data - start),
chunk.size);
}
}

Expand All @@ -216,22 +200,20 @@ erts_internal_beamfile_chunk_2(BIF_ALIST_2)
BIF_RETTYPE
erts_internal_beamfile_module_md5_1(BIF_ALIST_1)
{
byte* temp_alloc;
byte* bytes;

const byte *temp_alloc = NULL, *bytes;
BeamFile beam;
Eterm Bin;
Uint size;
Eterm res;

temp_alloc = NULL;
Bin = BIF_ARG_1;

if ((bytes = erts_get_aligned_binary_bytes(Bin, &temp_alloc)) == NULL) {
bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &size, &temp_alloc);
if (bytes == NULL) {
BIF_ERROR(BIF_P, BADARG);
}

if (beamfile_read(bytes, binary_size(Bin), &beam) == BEAMFILE_READ_SUCCESS) {
res = new_binary(BIF_P, beam.checksum, sizeof(beam.checksum));
if (beamfile_read(bytes, size, &beam) == BEAMFILE_READ_SUCCESS) {
res = erts_new_binary_from_data(BIF_P,
sizeof(beam.checksum),
beam.checksum);
beamfile_free(&beam);
} else {
res = am_undefined;
Expand All @@ -245,33 +227,34 @@ erts_internal_beamfile_module_md5_1(BIF_ALIST_1)
BIF_RETTYPE
erts_internal_prepare_loading_2(BIF_ALIST_2)
{
byte* temp_alloc = NULL;
byte* code;
Uint sz;
const byte *temp_alloc = NULL, *code;
Uint size;
Binary* magic;
Eterm reason;
Eterm* hp;
Eterm res;

if (is_not_atom(BIF_ARG_1)) {
error:
erts_free_aligned_binary_bytes(temp_alloc);
BIF_ERROR(BIF_P, BADARG);
BIF_ERROR(BIF_P, BADARG);
}
if ((code = erts_get_aligned_binary_bytes(BIF_ARG_2, &temp_alloc)) == NULL) {
goto error;

code = erts_get_aligned_binary_bytes(BIF_ARG_2, &size, &temp_alloc);
if (code == NULL) {
BIF_ERROR(BIF_P, BADARG);
}

magic = erts_alloc_loader_state();
sz = binary_size(BIF_ARG_2);
reason = erts_prepare_loading(magic, BIF_P, BIF_P->group_leader,
&BIF_ARG_1, code, sz);
&BIF_ARG_1, code, size);

erts_free_aligned_binary_bytes(temp_alloc);

if (reason != NIL) {
hp = HAlloc(BIF_P, 3);
res = TUPLE2(hp, am_error, reason);
BIF_RET(res);
hp = HAlloc(BIF_P, 3);
res = TUPLE2(hp, am_error, reason);
BIF_RET(res);
}

hp = HAlloc(BIF_P, ERTS_MAGIC_REF_THING_SIZE);
res = erts_mk_magic_ref(&hp, &MSO(BIF_P), magic);
erts_refc_dec(&magic->intern.refc, 1);
Expand Down
4 changes: 3 additions & 1 deletion erts/emulator/beam/beam_debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,9 @@ erts_debug_disassemble_1(BIF_ALIST_1)
*/
code_ptr = 0;
}
bin = new_binary(p, (byte *) dsbufp->str, dsbufp->str_len);
bin = erts_new_binary_from_data(p,
dsbufp->str_len,
(byte *) dsbufp->str);
erts_destroy_tmp_dsbuf(dsbufp);
hsz = 4+4;
(void) erts_bld_uword(NULL, &hsz, (BeamInstr) code_ptr);
Expand Down
12 changes: 6 additions & 6 deletions erts/emulator/beam/beam_load.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,12 @@ Eterm
erts_preload_module(Process *c_p,
ErtsProcLocks c_p_locks,
Eterm group_leader, /* Group leader or NIL if none. */
Eterm* modp, /*
Eterm *modp, /*
* Module name as an atom (NIL to not
* check). On return, contains the
* actual module name.
*/
byte* code, /* Points to the code to load */
const byte* code, /* Points to the code to load */
Uint size) /* Size of code to load. */
{
Binary* magic = erts_alloc_loader_state();
Expand All @@ -121,7 +121,7 @@ erts_preload_module(Process *c_p,

Eterm
erts_prepare_loading(Binary* magic, Process *c_p, Eterm group_leader,
Eterm* modp, byte* code, Uint unloaded_size)
Eterm* modp, const byte *code, Uint unloaded_size)
{
enum beamfile_read_result read_result;
Eterm retval = am_badfile;
Expand Down Expand Up @@ -656,10 +656,10 @@ erts_release_literal_area(ErtsLiteralArea* literal_area)

while (oh) {
switch (thing_subtag(oh->thing_word)) {
case REFC_BINARY_SUBTAG:
case BIN_REF_SUBTAG:
{
Binary* bptr = ((ProcBin*)oh)->val;
erts_bin_release(bptr);
Binary *bin = ((BinRef*)oh)->val;
erts_bin_release(bin);
break;
}
case FUN_SUBTAG:
Expand Down
Loading

0 comments on commit 24ef4cb

Please sign in to comment.