Skip to content

Commit

Permalink
Merge branch 'ks/tree-diff-nway' into next
Browse files Browse the repository at this point in the history
Instead of running N pair-wise diff-trees when inspecting a
N-parent merge, find the set of paths that were touched by walking
N+1 trees in parallel.  These set of paths can then be turned into
N pair-wise diff-tree results to be processed through rename
detections and such.  And N=2 case nicely degenerates to the usual
2-way diff-tree, which is very nice.

* ks/tree-diff-nway:
  combine-diff: speed it up, by using multiparent diff tree-walker directly
  tree-diff: rework diff_tree() to generate diffs for multiparent cases as well
  Portable alloca for Git
  • Loading branch information
gitster committed Apr 8, 2014
2 parents 0306834 + 7195fbf commit 6b74773
Show file tree
Hide file tree
Showing 9 changed files with 579 additions and 71 deletions.
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ all::
# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
# /foo/bar/include and /foo/bar/lib directories.
#
# Define HAVE_ALLOCA_H if you have working alloca(3) defined in that header.
#
# Define NO_CURL if you do not have libcurl installed. git-http-fetch and
# git-http-push are not built, and you cannot use http:// and https://
# transports (neither smart nor dumb).
Expand Down Expand Up @@ -1111,6 +1113,10 @@ ifdef USE_LIBPCRE
EXTLIBS += -lpcre
endif

ifdef HAVE_ALLOCA_H
BASIC_CFLAGS += -DHAVE_ALLOCA_H
endif

ifdef NO_CURL
BASIC_CFLAGS += -DNO_CURL
REMOTE_CURL_PRIMARY =
Expand Down
15 changes: 15 additions & 0 deletions cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,21 @@ unsigned long git_deflate_bound(git_zstream *, unsigned long);
#define S_IFGITLINK 0160000
#define S_ISGITLINK(m) (((m) & S_IFMT) == S_IFGITLINK)

/*
* Some mode bits are also used internally for computations.
*
* They *must* not overlap with any valid modes, and they *must* not be emitted
* to outside world - i.e. appear on disk or network. In other words, it's just
* temporary fields, which we internally use, but they have to stay in-house.
*
* ( such approach is valid, as standard S_IF* fits into 16 bits, and in Git
* codebase mode is `unsigned int` which is assumed to be at least 32 bits )
*/

/* used internally in tree-diff */
#define S_DIFFTREE_IFXMIN_NEQ 0x80000000


/*
* Intensive research over the course of many years has shown that
* port 9418 is totally unused by anything else. Or
Expand Down
88 changes: 83 additions & 5 deletions combine-diff.c
Original file line number Diff line number Diff line change
Expand Up @@ -1303,7 +1303,7 @@ static const char *path_path(void *obj)


/* find set of paths that every parent touches */
static struct combine_diff_path *find_paths(const unsigned char *sha1,
static struct combine_diff_path *find_paths_generic(const unsigned char *sha1,
const struct sha1_array *parents, struct diff_options *opt)
{
struct combine_diff_path *paths = NULL;
Expand All @@ -1316,6 +1316,7 @@ static struct combine_diff_path *find_paths(const unsigned char *sha1,
/* tell diff_tree to emit paths in sorted (=tree) order */
opt->orderfile = NULL;

/* D(A,P1...Pn) = D(A,P1) ^ ... ^ D(A,Pn) (wrt paths) */
for (i = 0; i < num_parent; i++) {
/*
* show stat against the first parent even when doing
Expand Down Expand Up @@ -1346,6 +1347,35 @@ static struct combine_diff_path *find_paths(const unsigned char *sha1,
}


/*
* find set of paths that everybody touches, assuming diff is run without
* rename/copy detection, etc, comparing all trees simultaneously (= faster).
*/
static struct combine_diff_path *find_paths_multitree(
const unsigned char *sha1, const struct sha1_array *parents,
struct diff_options *opt)
{
int i, nparent = parents->nr;
const unsigned char **parents_sha1;
struct combine_diff_path paths_head;
struct strbuf base;

parents_sha1 = xmalloc(nparent * sizeof(parents_sha1[0]));
for (i = 0; i < nparent; i++)
parents_sha1[i] = parents->sha1[i];

/* fake list head, so worker can assume it is non-NULL */
paths_head.next = NULL;

strbuf_init(&base, PATH_MAX);
diff_tree_paths(&paths_head, sha1, parents_sha1, nparent, &base, opt);

strbuf_release(&base);
free(parents_sha1);
return paths_head.next;
}


void diff_tree_combined(const unsigned char *sha1,
const struct sha1_array *parents,
int dense,
Expand All @@ -1355,6 +1385,7 @@ void diff_tree_combined(const unsigned char *sha1,
struct diff_options diffopts;
struct combine_diff_path *p, *paths;
int i, num_paths, needsep, show_log_first, num_parent = parents->nr;
int need_generic_pathscan;

/* nothing to do, if no parents */
if (!num_parent)
Expand All @@ -1377,11 +1408,58 @@ void diff_tree_combined(const unsigned char *sha1,

/* find set of paths that everybody touches
*
* NOTE find_paths() also handles --stat, as it computes
* diff(sha1,parent_i) for all i to do the job, specifically
* for parent0.
* NOTE
*
* Diffcore transformations are bound to diff_filespec and logic
* comparing two entries - i.e. they do not apply directly to combine
* diff.
*
* If some of such transformations is requested - we launch generic
* path scanning, which works significantly slower compared to
* simultaneous all-trees-in-one-go scan in find_paths_multitree().
*
* TODO some of the filters could be ported to work on
* combine_diff_paths - i.e. all functionality that skips paths, so in
* theory, we could end up having only multitree path scanning.
*
* NOTE please keep this semantically in sync with diffcore_std()
*/
paths = find_paths(sha1, parents, &diffopts);
need_generic_pathscan = opt->skip_stat_unmatch ||
DIFF_OPT_TST(opt, FOLLOW_RENAMES) ||
opt->break_opt != -1 ||
opt->detect_rename ||
opt->pickaxe ||
opt->filter;


if (need_generic_pathscan) {
/*
* NOTE generic case also handles --stat, as it computes
* diff(sha1,parent_i) for all i to do the job, specifically
* for parent0.
*/
paths = find_paths_generic(sha1, parents, &diffopts);
}
else {
int stat_opt;
paths = find_paths_multitree(sha1, parents, &diffopts);

/*
* show stat against the first parent even
* when doing combined diff.
*/
stat_opt = (opt->output_format &
(DIFF_FORMAT_NUMSTAT|DIFF_FORMAT_DIFFSTAT));
if (stat_opt) {
diffopts.output_format = stat_opt;

diff_tree_sha1(parents->sha1[0], sha1, "", &diffopts);
diffcore_std(&diffopts);
if (opt->orderfile)
diffcore_order(opt->orderfile);
diff_flush(&diffopts);
}
}

/* find out number of surviving paths */
for (num_paths = 0, p = paths; p; p = p->next)
Expand Down
10 changes: 8 additions & 2 deletions config.mak.uname
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@ ifeq ($(uname_S),OSF1)
NO_NSEC = YesPlease
endif
ifeq ($(uname_S),Linux)
HAVE_ALLOCA_H = YesPlease
NO_STRLCPY = YesPlease
NO_MKSTEMPS = YesPlease
HAVE_PATHS_H = YesPlease
LIBC_CONTAINS_LIBINTL = YesPlease
HAVE_DEV_TTY = YesPlease
endif
ifeq ($(uname_S),GNU/kFreeBSD)
HAVE_ALLOCA_H = YesPlease
NO_STRLCPY = YesPlease
NO_MKSTEMPS = YesPlease
HAVE_PATHS_H = YesPlease
Expand Down Expand Up @@ -103,6 +105,7 @@ ifeq ($(uname_S),SunOS)
NEEDS_NSL = YesPlease
SHELL_PATH = /bin/bash
SANE_TOOL_PATH = /usr/xpg6/bin:/usr/xpg4/bin
HAVE_ALLOCA_H = YesPlease
NO_STRCASESTR = YesPlease
NO_MEMMEM = YesPlease
NO_MKDTEMP = YesPlease
Expand Down Expand Up @@ -145,7 +148,7 @@ ifeq ($(uname_S),SunOS)
endif
INSTALL = /usr/ucb/install
TAR = gtar
BASIC_CFLAGS += -D__EXTENSIONS__ -D__sun__ -DHAVE_ALLOCA_H
BASIC_CFLAGS += -D__EXTENSIONS__ -D__sun__
endif
ifeq ($(uname_O),Cygwin)
ifeq ($(shell expr "$(uname_R)" : '1\.[1-6]\.'),4)
Expand All @@ -165,6 +168,7 @@ ifeq ($(uname_O),Cygwin)
else
NO_REGEX = UnfortunatelyYes
endif
HAVE_ALLOCA_H = YesPlease
NEEDS_LIBICONV = YesPlease
NO_FAST_WORKING_DIRECTORY = UnfortunatelyYes
NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease
Expand Down Expand Up @@ -239,6 +243,7 @@ ifeq ($(uname_S),AIX)
endif
ifeq ($(uname_S),GNU)
# GNU/Hurd
HAVE_ALLOCA_H = YesPlease
NO_STRLCPY = YesPlease
NO_MKSTEMPS = YesPlease
HAVE_PATHS_H = YesPlease
Expand Down Expand Up @@ -313,6 +318,7 @@ endif
ifeq ($(uname_S),Windows)
GIT_VERSION := $(GIT_VERSION).MSVC
pathsep = ;
HAVE_ALLOCA_H = YesPlease
NO_PREAD = YesPlease
NEEDS_CRYPTO_WITH_SSL = YesPlease
NO_LIBGEN_H = YesPlease
Expand Down Expand Up @@ -357,7 +363,7 @@ ifeq ($(uname_S),Windows)
COMPAT_OBJS = compat/msvc.o compat/winansi.o \
compat/win32/pthread.o compat/win32/syslog.o \
compat/win32/dirent.o
COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DNOGDI -DHAVE_STRING_H -DHAVE_ALLOCA_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\"
COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DNOGDI -DHAVE_STRING_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\"
BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO -SUBSYSTEM:CONSOLE -NODEFAULTLIB:MSVCRT.lib
EXTLIBS = user32.lib advapi32.lib shell32.lib wininet.lib ws2_32.lib invalidcontinue.obj
PTHREAD_LIBS =
Expand Down
8 changes: 8 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,14 @@ AS_HELP_STRING([], [ARG can be also prefix for libpcre library and hea
GIT_CONF_SUBST([LIBPCREDIR])
fi)
#
# Define HAVE_ALLOCA_H if you have working alloca(3) defined in that header.
AC_FUNC_ALLOCA
case $ac_cv_working_alloca_h in
yes) HAVE_ALLOCA_H=YesPlease;;
*) HAVE_ALLOCA_H='';;
esac
GIT_CONF_SUBST([HAVE_ALLOCA_H])
#
# Define NO_CURL if you do not have curl installed. git-http-pull and
# git-http-push are not built, and you cannot use http:// and https://
# transports.
Expand Down
2 changes: 2 additions & 0 deletions diff.c
Original file line number Diff line number Diff line change
Expand Up @@ -3213,6 +3213,7 @@ void diff_setup(struct diff_options *options)
options->context = diff_context_default;
DIFF_OPT_SET(options, RENAME_EMPTY);

/* pathchange left =NULL by default */
options->change = diff_change;
options->add_remove = diff_addremove;
options->use_color = diff_use_color_default;
Expand Down Expand Up @@ -4757,6 +4758,7 @@ void diffcore_fix_diff_index(struct diff_options *options)

void diffcore_std(struct diff_options *options)
{
/* NOTE please keep the following in sync with diff_tree_combined() */
if (options->skip_stat_unmatch)
diffcore_skip_stat_unmatch(options);
if (!options->found_follow) {
Expand Down
9 changes: 9 additions & 0 deletions diff.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ struct diff_filespec;
struct userdiff_driver;
struct sha1_array;
struct commit;
struct combine_diff_path;

typedef int (*pathchange_fn_t)(struct diff_options *options,
struct combine_diff_path *path);

typedef void (*change_fn_t)(struct diff_options *options,
unsigned old_mode, unsigned new_mode,
Expand Down Expand Up @@ -157,6 +161,7 @@ struct diff_options {
int close_file;

struct pathspec pathspec;
pathchange_fn_t pathchange;
change_fn_t change;
add_remove_fn_t add_remove;
diff_format_fn_t format_callback;
Expand Down Expand Up @@ -189,6 +194,10 @@ const char *diff_line_prefix(struct diff_options *);

extern const char mime_boundary_leader[];

extern struct combine_diff_path *diff_tree_paths(
struct combine_diff_path *p, const unsigned char *sha1,
const unsigned char **parent_sha1, int nparent,
struct strbuf *base, struct diff_options *opt);
extern int diff_tree_sha1(const unsigned char *old, const unsigned char *new,
const char *base, struct diff_options *opt);
extern int diff_root_tree_sha1(const unsigned char *new, const char *base,
Expand Down
8 changes: 8 additions & 0 deletions git-compat-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,14 @@ extern void release_pack_memory(size_t);
typedef void (*try_to_free_t)(size_t);
extern try_to_free_t set_try_to_free_routine(try_to_free_t);

#ifdef HAVE_ALLOCA_H
# include <alloca.h>
# define xalloca(size) (alloca(size))
# define xalloca_free(p) do {} while (0)
#else
# define xalloca(size) (xmalloc(size))
# define xalloca_free(p) (free(p))
#endif
extern char *xstrdup(const char *str);
extern void *xmalloc(size_t size);
extern void *xmallocz(size_t size);
Expand Down
Loading

0 comments on commit 6b74773

Please sign in to comment.