Skip to content

Commit

Permalink
Merge pull request #3492 from dscho/ns/batched-fsync
Browse files Browse the repository at this point in the history
Switch to batched fsync by default
  • Loading branch information
vdye committed Nov 15, 2021
2 parents f479702 + 7708aa1 commit 009b3ea
Show file tree
Hide file tree
Showing 34 changed files with 613 additions and 49 deletions.
29 changes: 23 additions & 6 deletions Documentation/config/core.txt
Expand Up @@ -576,12 +576,29 @@ core.whitespace::
errors. The default tab width is 8. Allowed values are 1 to 63.

core.fsyncObjectFiles::
This boolean will enable 'fsync()' when writing object files.
+
This is a total waste of time and effort on a filesystem that orders
data writes properly, but can be useful for filesystems that do not use
journalling (traditional UNIX filesystems) or that only journal metadata
and not file contents (OS X's HFS+, or Linux ext3 with "data=writeback").
A value indicating the level of effort Git will expend in
trying to make objects added to the repo durable in the event
of an unclean system shutdown. This setting currently only
controls loose objects in the object store, so updates to any
refs or the index may not be equally durable.
+
* `false` allows data to remain in file system caches according to
operating system policy, whence it may be lost if the system loses power
or crashes.
* `true` triggers a data integrity flush for each loose object added to the
object store. This is the safest setting that is likely to ensure durability
across all operating systems and file systems that honor the 'fsync' system
call. However, this setting comes with a significant performance cost on
common hardware. Git does not currently fsync parent directories for
newly-added files, so some filesystems may still allow data to be lost on
system crash.
* `batch` enables an experimental mode that uses interfaces available in some
operating systems to write loose object data with a minimal set of FLUSH
CACHE (or equivalent) commands sent to the storage controller. If the
operating system interfaces are not available, this mode behaves the same as
`true`. This mode is expected to be as safe as `true` on macOS for repos
stored on HFS+ or APFS filesystems and on Windows for repos stored on NTFS or
ReFS.

core.preloadIndex::
Enable parallel index preload for operations like 'git diff'
Expand Down
6 changes: 6 additions & 0 deletions Makefile
Expand Up @@ -406,6 +406,8 @@ all::
#
# Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
#
# Define HAVE_SYNC_FILE_RANGE if your platform has sync_file_range.
#
# Define NEEDS_LIBRT if your platform requires linking with librt (glibc version
# before 2.17) for clock_gettime and CLOCK_MONOTONIC.
#
Expand Down Expand Up @@ -1901,6 +1903,10 @@ ifdef HAVE_CLOCK_MONOTONIC
BASIC_CFLAGS += -DHAVE_CLOCK_MONOTONIC
endif

ifdef HAVE_SYNC_FILE_RANGE
BASIC_CFLAGS += -DHAVE_SYNC_FILE_RANGE
endif

ifdef NEEDS_LIBRT
EXTLIBS += -lrt
endif
Expand Down
23 changes: 19 additions & 4 deletions builtin/prune.c
Expand Up @@ -19,6 +19,7 @@ static int show_only;
static int verbose;
static timestamp_t expire;
static int show_progress = -1;
static struct strbuf remove_dir_buf = STRBUF_INIT;

static int prune_tmp_file(const char *fullpath)
{
Expand All @@ -27,10 +28,20 @@ static int prune_tmp_file(const char *fullpath)
return error("Could not stat '%s'", fullpath);
if (st.st_mtime > expire)
return 0;
if (show_only || verbose)
printf("Removing stale temporary file %s\n", fullpath);
if (!show_only)
unlink_or_warn(fullpath);
if (S_ISDIR(st.st_mode)) {
if (show_only || verbose)
printf("Removing stale temporary directory %s\n", fullpath);
if (!show_only) {
strbuf_reset(&remove_dir_buf);
strbuf_addstr(&remove_dir_buf, fullpath);
remove_dir_recursively(&remove_dir_buf, 0);
}
} else {
if (show_only || verbose)
printf("Removing stale temporary file %s\n", fullpath);
if (!show_only)
unlink_or_warn(fullpath);
}
return 0;
}

Expand Down Expand Up @@ -98,6 +109,9 @@ static int prune_cruft(const char *basename, const char *path, void *data)

static int prune_subdir(unsigned int nr, const char *path, void *data)
{
if (verbose)
printf("Removing directory %s\n", path);

if (!show_only)
rmdir(path);
return 0;
Expand Down Expand Up @@ -187,5 +201,6 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
prune_shallow(show_only ? PRUNE_SHOW_ONLY : 0);
}

strbuf_release(&remove_dir_buf);
return 0;
}
2 changes: 1 addition & 1 deletion builtin/receive-pack.c
Expand Up @@ -2213,7 +2213,7 @@ static const char *unpack(int err_fd, struct shallow_info *si)
strvec_push(&child.args, alt_shallow_file);
}

tmp_objdir = tmp_objdir_create();
tmp_objdir = tmp_objdir_create("incoming");
if (!tmp_objdir) {
if (err_fd > 0)
close(err_fd);
Expand Down
3 changes: 3 additions & 0 deletions builtin/unpack-objects.c
@@ -1,5 +1,6 @@
#include "builtin.h"
#include "cache.h"
#include "bulk-checkin.h"
#include "config.h"
#include "object-store.h"
#include "object.h"
Expand Down Expand Up @@ -503,10 +504,12 @@ static void unpack_all(void)
if (!quiet)
progress = start_progress(_("Unpacking objects"), nr_objects);
CALLOC_ARRAY(obj_list, nr_objects);
plug_bulk_checkin();
for (i = 0; i < nr_objects; i++) {
unpack_one(i);
display_progress(progress, i + 1);
}
unplug_bulk_checkin();
stop_progress(&progress);

if (delta_list)
Expand Down
6 changes: 6 additions & 0 deletions builtin/update-index.c
Expand Up @@ -5,6 +5,7 @@
*/
#define USE_THE_INDEX_COMPATIBILITY_MACROS
#include "cache.h"
#include "bulk-checkin.h"
#include "config.h"
#include "lockfile.h"
#include "quote.h"
Expand Down Expand Up @@ -1088,6 +1089,9 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)

the_index.updated_skipworktree = 1;

/* we might be adding many objects to the object database */
plug_bulk_checkin();

/*
* Custom copy of parse_options() because we want to handle
* filename arguments as they come.
Expand Down Expand Up @@ -1168,6 +1172,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
strbuf_release(&buf);
}

/* by now we must have added all of the new objects */
unplug_bulk_checkin();
if (split_index > 0) {
if (git_config_get_split_index() == 0)
warning(_("core.splitIndex is set to false; "
Expand Down
90 changes: 80 additions & 10 deletions bulk-checkin.c
Expand Up @@ -3,16 +3,22 @@
*/
#include "cache.h"
#include "bulk-checkin.h"
#include "lockfile.h"
#include "repository.h"
#include "csum-file.h"
#include "pack.h"
#include "strbuf.h"
#include "string-list.h"
#include "tmp-objdir.h"
#include "packfile.h"
#include "object-store.h"

static struct bulk_checkin_state {
unsigned plugged:1;
static int bulk_checkin_plugged;
static int needs_batch_fsync;

static struct tmp_objdir *bulk_fsync_objdir;

static struct bulk_checkin_state {
char *pack_tmp_name;
struct hashfile *f;
off_t offset;
Expand All @@ -21,7 +27,7 @@ static struct bulk_checkin_state {
struct pack_idx_entry **written;
uint32_t alloc_written;
uint32_t nr_written;
} state;
} bulk_checkin_state;

static void finish_tmp_packfile(struct strbuf *basename,
const char *pack_tmp_name,
Expand Down Expand Up @@ -79,6 +85,34 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
reprepare_packed_git(the_repository);
}

/*
* Cleanup after batch-mode fsync_object_files.
*/
static void do_batch_fsync(void)
{
/*
* Issue a full hardware flush against a temporary file to ensure
* that all objects are durable before any renames occur. The code in
* fsync_loose_object_bulk_checkin has already issued a writeout
* request, but it has not flushed any writeback cache in the storage
* hardware.
*/

if (needs_batch_fsync) {
struct strbuf temp_path = STRBUF_INIT;
struct tempfile *temp;

strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", get_object_directory());
temp = xmks_tempfile(temp_path.buf);
fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
delete_tempfile(&temp);
strbuf_release(&temp_path);
}

if (bulk_fsync_objdir)
tmp_objdir_migrate(bulk_fsync_objdir);
}

static int already_written(struct bulk_checkin_state *state, struct object_id *oid)
{
int i;
Expand Down Expand Up @@ -273,25 +307,61 @@ static int deflate_to_pack(struct bulk_checkin_state *state,
return 0;
}

void fsync_loose_object_bulk_checkin(int fd)
{
assert(fsync_object_files == FSYNC_OBJECT_FILES_BATCH);

/*
* If we have a plugged bulk checkin, we issue a call that
* cleans the filesystem page cache but avoids a hardware flush
* command. Later on we will issue a single hardware flush
* before as part of do_batch_fsync.
*/
if (bulk_checkin_plugged &&
git_fsync(fd, FSYNC_WRITEOUT_ONLY) >= 0) {
if (!needs_batch_fsync)
needs_batch_fsync = 1;
} else {
fsync_or_die(fd, "loose object file");
}
}

int index_bulk_checkin(struct object_id *oid,
int fd, size_t size, enum object_type type,
const char *path, unsigned flags)
{
int status = deflate_to_pack(&state, oid, fd, size, type,
int status = deflate_to_pack(&bulk_checkin_state, oid, fd, size, type,
path, flags);
if (!state.plugged)
finish_bulk_checkin(&state);
if (!bulk_checkin_plugged)
finish_bulk_checkin(&bulk_checkin_state);
return status;
}

void plug_bulk_checkin(void)
{
state.plugged = 1;
assert(!bulk_checkin_plugged);

/*
* A temporary object directory is used to hold the files
* while they are not fsynced.
*/
if (fsync_object_files == FSYNC_OBJECT_FILES_BATCH) {
bulk_fsync_objdir = tmp_objdir_create("bulk-fsync");
if (!bulk_fsync_objdir)
die(_("Could not create temporary object directory for core.fsyncobjectfiles=batch"));

tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0);
}

bulk_checkin_plugged = 1;
}

void unplug_bulk_checkin(void)
{
state.plugged = 0;
if (state.f)
finish_bulk_checkin(&state);
assert(bulk_checkin_plugged);
bulk_checkin_plugged = 0;
if (bulk_checkin_state.f)
finish_bulk_checkin(&bulk_checkin_state);

do_batch_fsync();
}
2 changes: 2 additions & 0 deletions bulk-checkin.h
Expand Up @@ -6,6 +6,8 @@

#include "cache.h"

void fsync_loose_object_bulk_checkin(int fd);

int index_bulk_checkin(struct object_id *oid,
int fd, size_t size, enum object_type type,
const char *path, unsigned flags);
Expand Down
8 changes: 7 additions & 1 deletion cache.h
Expand Up @@ -985,7 +985,13 @@ void reset_shared_repository(void);
extern int read_replace_refs;
extern char *git_replace_ref_base;

extern int fsync_object_files;
enum fsync_object_files_mode {
FSYNC_OBJECT_FILES_OFF,
FSYNC_OBJECT_FILES_ON,
FSYNC_OBJECT_FILES_BATCH
};

extern enum fsync_object_files_mode fsync_object_files;
extern int core_preload_index;
extern int precomposed_unicode;
extern int protect_hfs;
Expand Down
7 changes: 5 additions & 2 deletions compat/mingw.c
Expand Up @@ -1239,8 +1239,11 @@ char *mingw_mktemp(char *template)
int offset = 0;

/* we need to return the path, thus no long paths here! */
if (xutftowcs_path(wtemplate, template) < 0)
if (xutftowcsn(wtemplate, template, MAX_PATH, -1) < 0) {
if (errno == ERANGE)
errno = ENAMETOOLONG;
return NULL;
}

if (is_dir_sep(template[0]) && !is_dir_sep(template[1]) &&
iswalpha(wtemplate[0]) && wtemplate[1] == L':') {
Expand Down Expand Up @@ -3754,7 +3757,7 @@ int wmain(int argc, const wchar_t **wargv)

maybe_redirect_std_handles();
adjust_symlink_flags();
fsync_object_files = 1;
fsync_object_files = FSYNC_OBJECT_FILES_ON;

/* determine size of argv and environ conversion buffer */
maxlen = wcslen(wargv[0]);
Expand Down
3 changes: 3 additions & 0 deletions compat/mingw.h
Expand Up @@ -332,6 +332,9 @@ int mingw_getpagesize(void);
#define getpagesize mingw_getpagesize
#endif

int win32_fsync_no_flush(int fd);
#define fsync_no_flush win32_fsync_no_flush

struct rlimit {
unsigned int rlim_cur;
};
Expand Down
28 changes: 28 additions & 0 deletions compat/win32/flush.c
@@ -0,0 +1,28 @@
#include "../../git-compat-util.h"
#include <winternl.h>
#include "lazyload.h"

int win32_fsync_no_flush(int fd)
{
IO_STATUS_BLOCK io_status;

#define FLUSH_FLAGS_FILE_DATA_ONLY 1

DECLARE_PROC_ADDR(ntdll.dll, NTSTATUS, NtFlushBuffersFileEx,
HANDLE FileHandle, ULONG Flags, PVOID Parameters, ULONG ParameterSize,
PIO_STATUS_BLOCK IoStatusBlock);

if (!INIT_PROC_ADDR(NtFlushBuffersFileEx)) {
errno = ENOSYS;
return -1;
}

memset(&io_status, 0, sizeof(io_status));
if (NtFlushBuffersFileEx((HANDLE)_get_osfhandle(fd), FLUSH_FLAGS_FILE_DATA_ONLY,
NULL, 0, &io_status)) {
errno = EINVAL;
return -1;
}

return 0;
}
7 changes: 6 additions & 1 deletion config.c
Expand Up @@ -1491,7 +1491,12 @@ int git_default_core_config(const char *var, const char *value, void *cb)
}

if (!strcmp(var, "core.fsyncobjectfiles")) {
fsync_object_files = git_config_bool(var, value);
if (value && !strcmp(value, "batch"))
fsync_object_files = FSYNC_OBJECT_FILES_BATCH;
else if (git_config_bool(var, value))
fsync_object_files = FSYNC_OBJECT_FILES_ON;
else
fsync_object_files = FSYNC_OBJECT_FILES_OFF;
return 0;
}

Expand Down

0 comments on commit 009b3ea

Please sign in to comment.