Skip to content

Commit

Permalink
Merge branch 'ps/reftable-write-optim' into next
Browse files Browse the repository at this point in the history
Code to write out reftable has seen some optimization and
simplification.

* ps/reftable-write-optim:
  reftable/block: reuse compressed array
  reftable/block: reuse zstream when writing log blocks
  reftable/writer: reset `last_key` instead of releasing it
  reftable/writer: unify releasing memory
  reftable/writer: refactorings for `writer_flush_nonempty_block()`
  reftable/writer: refactorings for `writer_add_record()`
  refs/reftable: don't recompute committer ident
  reftable: remove name checks
  refs/reftable: skip duplicate name checks
  refs/reftable: perform explicit D/F check when writing symrefs
  refs/reftable: fix D/F conflict error message on ref copy
  • Loading branch information
gitster committed Apr 30, 2024
2 parents d4cc1ec + fa74f32 commit 0667e3c
Show file tree
Hide file tree
Showing 16 changed files with 230 additions and 556 deletions.
2 changes: 0 additions & 2 deletions Makefile
Expand Up @@ -2665,7 +2665,6 @@ REFTABLE_OBJS += reftable/merged.o
REFTABLE_OBJS += reftable/pq.o
REFTABLE_OBJS += reftable/reader.o
REFTABLE_OBJS += reftable/record.o
REFTABLE_OBJS += reftable/refname.o
REFTABLE_OBJS += reftable/generic.o
REFTABLE_OBJS += reftable/stack.o
REFTABLE_OBJS += reftable/tree.o
Expand All @@ -2678,7 +2677,6 @@ REFTABLE_TEST_OBJS += reftable/merged_test.o
REFTABLE_TEST_OBJS += reftable/pq_test.o
REFTABLE_TEST_OBJS += reftable/record_test.o
REFTABLE_TEST_OBJS += reftable/readwrite_test.o
REFTABLE_TEST_OBJS += reftable/refname_test.o
REFTABLE_TEST_OBJS += reftable/stack_test.o
REFTABLE_TEST_OBJS += reftable/test_framework.o
REFTABLE_TEST_OBJS += reftable/tree_test.o
Expand Down
75 changes: 53 additions & 22 deletions refs/reftable-backend.c
Expand Up @@ -172,32 +172,30 @@ static int should_write_log(struct ref_store *refs, const char *refname)
}
}

static void fill_reftable_log_record(struct reftable_log_record *log)
static void fill_reftable_log_record(struct reftable_log_record *log, const struct ident_split *split)
{
const char *info = git_committer_info(0);
struct ident_split split = {0};
const char *tz_begin;
int sign = 1;

if (split_ident_line(&split, info, strlen(info)))
BUG("failed splitting committer info");

reftable_log_record_release(log);
log->value_type = REFTABLE_LOG_UPDATE;
log->value.update.name =
xstrndup(split.name_begin, split.name_end - split.name_begin);
xstrndup(split->name_begin, split->name_end - split->name_begin);
log->value.update.email =
xstrndup(split.mail_begin, split.mail_end - split.mail_begin);
log->value.update.time = atol(split.date_begin);
if (*split.tz_begin == '-') {
xstrndup(split->mail_begin, split->mail_end - split->mail_begin);
log->value.update.time = atol(split->date_begin);

tz_begin = split->tz_begin;
if (*tz_begin == '-') {
sign = -1;
split.tz_begin++;
tz_begin++;
}
if (*split.tz_begin == '+') {
if (*tz_begin == '+') {
sign = 1;
split.tz_begin++;
tz_begin++;
}

log->value.update.tz_offset = sign * atoi(split.tz_begin);
log->value.update.tz_offset = sign * atoi(tz_begin);
}

static int read_ref_without_reload(struct reftable_stack *stack,
Expand Down Expand Up @@ -1021,9 +1019,15 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data
reftable_stack_merged_table(arg->stack);
uint64_t ts = reftable_stack_next_update_index(arg->stack);
struct reftable_log_record *logs = NULL;
struct ident_split committer_ident = {0};
size_t logs_nr = 0, logs_alloc = 0, i;
const char *committer_info;
int ret = 0;

committer_info = git_committer_info(0);
if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
BUG("failed splitting committer info");

QSORT(arg->updates, arg->updates_nr, transaction_update_cmp);

reftable_writer_set_limits(writer, ts, ts);
Expand Down Expand Up @@ -1089,7 +1093,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data
log = &logs[logs_nr++];
memset(log, 0, sizeof(*log));

fill_reftable_log_record(log);
fill_reftable_log_record(log, &committer_ident);
log->update_index = ts;
log->refname = xstrdup(u->refname);
memcpy(log->value.update.new_hash, u->new_oid.hash, GIT_MAX_RAWSZ);
Expand Down Expand Up @@ -1227,6 +1231,7 @@ static int reftable_be_pack_refs(struct ref_store *ref_store,
struct write_create_symref_arg {
struct reftable_ref_store *refs;
struct reftable_stack *stack;
struct strbuf *err;
const char *refname;
const char *target;
const char *logmsg;
Expand All @@ -1242,13 +1247,20 @@ static int write_create_symref_table(struct reftable_writer *writer, void *cb_da
.value.symref = (char *)create->target,
.update_index = ts,
};
struct ident_split committer_ident = {0};
struct reftable_log_record log = {0};
struct object_id new_oid;
struct object_id old_oid;
const char *committer_info;
int ret;

reftable_writer_set_limits(writer, ts, ts);

ret = refs_verify_refname_available(&create->refs->base, create->refname,
NULL, NULL, create->err);
if (ret < 0)
return ret;

ret = reftable_writer_add_ref(writer, &ref);
if (ret)
return ret;
Expand All @@ -1267,7 +1279,11 @@ static int write_create_symref_table(struct reftable_writer *writer, void *cb_da
!should_write_log(&create->refs->base, create->refname))
return 0;

fill_reftable_log_record(&log);
committer_info = git_committer_info(0);
if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
BUG("failed splitting committer info");

fill_reftable_log_record(&log, &committer_ident);
log.refname = xstrdup(create->refname);
log.update_index = ts;
log.value.update.message = xstrndup(create->logmsg,
Expand All @@ -1290,12 +1306,14 @@ static int reftable_be_create_symref(struct ref_store *ref_store,
struct reftable_ref_store *refs =
reftable_be_downcast(ref_store, REF_STORE_WRITE, "create_symref");
struct reftable_stack *stack = stack_for(refs, refname, &refname);
struct strbuf err = STRBUF_INIT;
struct write_create_symref_arg arg = {
.refs = refs,
.stack = stack,
.refname = refname,
.target = target,
.logmsg = logmsg,
.err = &err,
};
int ret;

Expand All @@ -1311,9 +1329,15 @@ static int reftable_be_create_symref(struct ref_store *ref_store,

done:
assert(ret != REFTABLE_API_ERROR);
if (ret)
error("unable to write symref for %s: %s", refname,
reftable_error_str(ret));
if (ret) {
if (err.len)
error("%s", err.buf);
else
error("unable to write symref for %s: %s", refname,
reftable_error_str(ret));
}

strbuf_release(&err);
return ret;
}

Expand All @@ -1335,10 +1359,16 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
struct reftable_log_record old_log = {0}, *logs = NULL;
struct reftable_iterator it = {0};
struct string_list skip = STRING_LIST_INIT_NODUP;
struct ident_split committer_ident = {0};
struct strbuf errbuf = STRBUF_INIT;
size_t logs_nr = 0, logs_alloc = 0, i;
const char *committer_info;
int ret;

committer_info = git_committer_info(0);
if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
BUG("failed splitting committer info");

if (reftable_stack_read_ref(arg->stack, arg->oldname, &old_ref)) {
ret = error(_("refname %s not found"), arg->oldname);
goto done;
Expand All @@ -1361,7 +1391,8 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
/*
* Verify that the new refname is available.
*/
string_list_insert(&skip, arg->oldname);
if (arg->delete_old)
string_list_insert(&skip, arg->oldname);
ret = refs_verify_refname_available(&arg->refs->base, arg->newname,
NULL, &skip, &errbuf);
if (ret < 0) {
Expand Down Expand Up @@ -1412,7 +1443,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)

ALLOC_GROW(logs, logs_nr + 1, logs_alloc);
memset(&logs[logs_nr], 0, sizeof(logs[logs_nr]));
fill_reftable_log_record(&logs[logs_nr]);
fill_reftable_log_record(&logs[logs_nr], &committer_ident);
logs[logs_nr].refname = (char *)arg->newname;
logs[logs_nr].update_index = deletion_ts;
logs[logs_nr].value.update.message =
Expand Down Expand Up @@ -1444,7 +1475,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
*/
ALLOC_GROW(logs, logs_nr + 1, logs_alloc);
memset(&logs[logs_nr], 0, sizeof(logs[logs_nr]));
fill_reftable_log_record(&logs[logs_nr]);
fill_reftable_log_record(&logs[logs_nr], &committer_ident);
logs[logs_nr].refname = (char *)arg->newname;
logs[logs_nr].update_index = creation_ts;
logs[logs_nr].value.update.message =
Expand Down
80 changes: 50 additions & 30 deletions reftable/block.c
Expand Up @@ -76,6 +76,10 @@ void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
bw->entries = 0;
bw->restart_len = 0;
bw->last_key.len = 0;
if (!bw->zstream) {
REFTABLE_CALLOC_ARRAY(bw->zstream, 1);
deflateInit(bw->zstream, 9);
}
}

uint8_t block_writer_type(struct block_writer *bw)
Expand Down Expand Up @@ -139,39 +143,52 @@ int block_writer_finish(struct block_writer *w)
w->next += 2;
put_be24(w->buf + 1 + w->header_off, w->next);

/*
* Log records are stored zlib-compressed. Note that the compression
* also spans over the restart points we have just written.
*/
if (block_writer_type(w) == BLOCK_TYPE_LOG) {
int block_header_skip = 4 + w->header_off;
uLongf src_len = w->next - block_header_skip;
uLongf dest_cap = src_len * 1.001 + 12;
uint8_t *compressed;

REFTABLE_ALLOC_ARRAY(compressed, dest_cap);

while (1) {
uLongf out_dest_len = dest_cap;
int zresult = compress2(compressed, &out_dest_len,
w->buf + block_header_skip,
src_len, 9);
if (zresult == Z_BUF_ERROR && dest_cap < LONG_MAX) {
dest_cap *= 2;
compressed =
reftable_realloc(compressed, dest_cap);
if (compressed)
continue;
}

if (Z_OK != zresult) {
reftable_free(compressed);
return REFTABLE_ZLIB_ERROR;
}

memcpy(w->buf + block_header_skip, compressed,
out_dest_len);
w->next = out_dest_len + block_header_skip;
reftable_free(compressed);
break;
}
uLongf src_len = w->next - block_header_skip, compressed_len;
int ret;

ret = deflateReset(w->zstream);
if (ret != Z_OK)
return REFTABLE_ZLIB_ERROR;

/*
* Precompute the upper bound of how many bytes the compressed
* data may end up with. Combined with `Z_FINISH`, `deflate()`
* is guaranteed to return `Z_STREAM_END`.
*/
compressed_len = deflateBound(w->zstream, src_len);
REFTABLE_ALLOC_GROW(w->compressed, compressed_len, w->compressed_cap);

w->zstream->next_out = w->compressed;
w->zstream->avail_out = compressed_len;
w->zstream->next_in = w->buf + block_header_skip;
w->zstream->avail_in = src_len;

/*
* We want to perform all decompression in a single step, which
* is why we can pass Z_FINISH here. As we have precomputed the
* deflated buffer's size via `deflateBound()` this function is
* guaranteed to succeed according to the zlib documentation.
*/
ret = deflate(w->zstream, Z_FINISH);
if (ret != Z_STREAM_END)
return REFTABLE_ZLIB_ERROR;

/*
* Overwrite the uncompressed data we have already written and
* adjust the `next` pointer to point right after the
* compressed data.
*/
memcpy(w->buf + block_header_skip, w->compressed,
w->zstream->total_out);
w->next = w->zstream->total_out + block_header_skip;
}

return w->next;
}

Expand Down Expand Up @@ -514,7 +531,10 @@ int block_iter_seek_key(struct block_iter *it, const struct block_reader *br,

void block_writer_release(struct block_writer *bw)
{
deflateEnd(bw->zstream);
FREE_AND_NULL(bw->zstream);
FREE_AND_NULL(bw->restarts);
FREE_AND_NULL(bw->compressed);
strbuf_release(&bw->last_key);
/* the block is not owned. */
}
Expand Down
4 changes: 4 additions & 0 deletions reftable/block.h
Expand Up @@ -18,6 +18,10 @@ license that can be found in the LICENSE file or at
* allocation overhead.
*/
struct block_writer {
z_stream *zstream;
unsigned char *compressed;
size_t compressed_cap;

uint8_t *buf;
uint32_t block_size;

Expand Down
2 changes: 0 additions & 2 deletions reftable/error.c
Expand Up @@ -27,8 +27,6 @@ const char *reftable_error_str(int err)
return "misuse of the reftable API";
case REFTABLE_ZLIB_ERROR:
return "zlib failure";
case REFTABLE_NAME_CONFLICT:
return "file/directory conflict";
case REFTABLE_EMPTY_TABLE_ERROR:
return "wrote empty table";
case REFTABLE_REFNAME_ERROR:
Expand Down

0 comments on commit 0667e3c

Please sign in to comment.