Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Merge branch 'jc/fix-tree-walk'

* jc/fix-tree-walk:
  read-tree --debug-unpack
  unpack-trees.c: look ahead in the index
  unpack-trees.c: prepare for looking ahead in the index
  Aggressive three-way merge: fix D/F case
  traverse_trees(): handle D/F conflict case sanely
  more D/F conflict tests
  tests: move convenience regexp to match object names to test-lib.sh

Conflicts:
	builtin-read-tree.c
	unpack-trees.c
	unpack-trees.h
  • Loading branch information...
commit 026680f881e751311674d97d0f6ed87f06a92bfb 2 parents eca9388 + ba655da
Junio C Hamano authored
36 builtin-read-tree.c
@@ -65,6 +65,34 @@ static int exclude_per_directory_cb(const struct option *opt, const char *arg,
65 65 return 0;
66 66 }
67 67
  68 +static void debug_stage(const char *label, struct cache_entry *ce,
  69 + struct unpack_trees_options *o)
  70 +{
  71 + printf("%s ", label);
  72 + if (!ce)
  73 + printf("(missing)\n");
  74 + else if (ce == o->df_conflict_entry)
  75 + printf("(conflict)\n");
  76 + else
  77 + printf("%06o #%d %s %.8s\n",
  78 + ce->ce_mode, ce_stage(ce), ce->name,
  79 + sha1_to_hex(ce->sha1));
  80 +}
  81 +
  82 +static int debug_merge(struct cache_entry **stages, struct unpack_trees_options *o)
  83 +{
  84 + int i;
  85 +
  86 + printf("* %d-way merge\n", o->merge_size);
  87 + debug_stage("index", stages[0], o);
  88 + for (i = 1; i <= o->merge_size; i++) {
  89 + char buf[24];
  90 + sprintf(buf, "ent#%d", i);
  91 + debug_stage(buf, stages[i], o);
  92 + }
  93 + return 0;
  94 +}
  95 +
68 96 static struct lock_file lock_file;
69 97
70 98 int cmd_read_tree(int argc, const char **argv, const char *unused_prefix)
@@ -101,6 +129,8 @@ int cmd_read_tree(int argc, const char **argv, const char *unused_prefix)
101 129 "don't check the working tree after merging", 1),
102 130 OPT_SET_INT(0, "no-sparse-checkout", &opts.skip_sparse_checkout,
103 131 "skip applying sparse checkout filter", 1),
  132 + OPT_SET_INT(0, "debug-unpack", &opts.debug_unpack,
  133 + "debug unpack-trees", 1),
104 134 OPT_END()
105 135 };
106 136
@@ -169,6 +199,9 @@ int cmd_read_tree(int argc, const char **argv, const char *unused_prefix)
169 199 opts.head_idx = 1;
170 200 }
171 201
  202 + if (opts.debug_unpack)
  203 + opts.fn = debug_merge;
  204 +
172 205 cache_tree_free(&active_cache_tree);
173 206 for (i = 0; i < nr_trees; i++) {
174 207 struct tree *tree = trees[i];
@@ -178,6 +211,9 @@ int cmd_read_tree(int argc, const char **argv, const char *unused_prefix)
178 211 if (unpack_trees(nr_trees, t, &opts))
179 212 return 128;
180 213
  214 + if (opts.debug_unpack)
  215 + return 0; /* do not write the index out */
  216 +
181 217 /*
182 218 * When reading only one tree (either the most basic form,
183 219 * "-m ent" or "--reset ent" form), we can obtain a fully
2  cache.h
@@ -182,6 +182,8 @@ struct cache_entry {
182 182 /* Only remove in work directory, not index */
183 183 #define CE_WT_REMOVE (0x400000)
184 184
  185 +#define CE_UNPACKED (0x1000000)
  186 +
185 187 /*
186 188 * Extended on-disk flags
187 189 */
19 diff-lib.c
@@ -380,21 +380,6 @@ static void do_oneway_diff(struct unpack_trees_options *o,
380 380 show_modified(revs, tree, idx, 1, cached, match_missing);
381 381 }
382 382
383   -static inline void skip_same_name(struct cache_entry *ce, struct unpack_trees_options *o)
384   -{
385   - int len = ce_namelen(ce);
386   - const struct index_state *index = o->src_index;
387   -
388   - while (o->pos < index->cache_nr) {
389   - struct cache_entry *next = index->cache[o->pos];
390   - if (len != ce_namelen(next))
391   - break;
392   - if (memcmp(ce->name, next->name, len))
393   - break;
394   - o->pos++;
395   - }
396   -}
397   -
398 383 /*
399 384 * The unpack_trees() interface is designed for merging, so
400 385 * the different source entries are designed primarily for
@@ -416,9 +401,6 @@ static int oneway_diff(struct cache_entry **src, struct unpack_trees_options *o)
416 401 struct cache_entry *tree = src[1];
417 402 struct rev_info *revs = o->unpack_data;
418 403
419   - if (idx && ce_stage(idx))
420   - skip_same_name(idx, o);
421   -
422 404 /*
423 405 * Unpack-trees generates a DF/conflict entry if
424 406 * there was a directory in the index and a tree
@@ -464,6 +446,7 @@ int run_diff_index(struct rev_info *revs, int cached)
464 446 exit(128);
465 447
466 448 diff_set_mnemonic_prefix(&revs->diffopt, "c/", cached ? "i/" : "w/");
  449 + diffcore_fix_diff_index(&revs->diffopt);
467 450 diffcore_std(&revs->diffopt);
468 451 diff_flush(&revs->diffopt);
469 452 return 0;
17 diff.c
@@ -3678,6 +3678,23 @@ static void diffcore_skip_stat_unmatch(struct diff_options *diffopt)
3678 3678 *q = outq;
3679 3679 }
3680 3680
  3681 +static int diffnamecmp(const void *a_, const void *b_)
  3682 +{
  3683 + const struct diff_filepair *a = *((const struct diff_filepair **)a_);
  3684 + const struct diff_filepair *b = *((const struct diff_filepair **)b_);
  3685 + const char *name_a, *name_b;
  3686 +
  3687 + name_a = a->one ? a->one->path : a->two->path;
  3688 + name_b = b->one ? b->one->path : b->two->path;
  3689 + return strcmp(name_a, name_b);
  3690 +}
  3691 +
  3692 +void diffcore_fix_diff_index(struct diff_options *options)
  3693 +{
  3694 + struct diff_queue_struct *q = &diff_queued_diff;
  3695 + qsort(q->queue, q->nr, sizeof(q->queue[0]), diffnamecmp);
  3696 +}
  3697 +
3681 3698 void diffcore_std(struct diff_options *options)
3682 3699 {
3683 3700 if (options->skip_stat_unmatch)
1  diff.h
@@ -210,6 +210,7 @@ extern int diff_setup_done(struct diff_options *);
210 210 #define DIFF_PICKAXE_REGEX 2
211 211
212 212 extern void diffcore_std(struct diff_options *);
  213 +extern void diffcore_fix_diff_index(struct diff_options *);
213 214
214 215 #define COMMON_DIFF_OPTIONS_HELP \
215 216 "\ncommon diff options:\n" \
2  t/diff-lib.sh
... ... @@ -1,7 +1,5 @@
1 1 :
2 2
3   -_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
4   -_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
5 3 sanitize_diff_raw='/^:/s/ '"$_x40"' '"$_x40"' \([A-Z]\)[0-9]* / X X \1# /'
6 4 compare_diff_raw () {
7 5 # When heuristics are improved, the score numbers would change.
3  t/t1000-read-tree-m-3way.sh
@@ -126,9 +126,6 @@ cat >expected <<\EOF
126 126 100644 X 0 Z/NN
127 127 EOF
128 128
129   -_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
130   -_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
131   -
132 129 check_result () {
133 130 git ls-files --stage | sed -e 's/ '"$_x40"' / X /' >current &&
134 131 test_cmp expected current
2  t/t1001-read-tree-m-2way.sh
@@ -26,8 +26,6 @@ read_tree_twoway () {
26 26 git read-tree -m "$1" "$2" && git ls-files --stage
27 27 }
28 28
29   -_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
30   -_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
31 29 compare_change () {
32 30 sed -n >current \
33 31 -e '/^--- /d; /^+++ /d; /^@@ /d;' \
2  t/t1002-read-tree-m-u-2way.sh
@@ -10,8 +10,6 @@ This is identical to t1001, but uses -u to update the work tree as well.
10 10 '
11 11 . ./test-lib.sh
12 12
13   -_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
14   -_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
15 13 compare_change () {
16 14 sed >current \
17 15 -e '1{/^diff --git /d;}' \
102 t/t1012-read-tree-df.sh
... ... @@ -0,0 +1,102 @@
  1 +#!/bin/sh
  2 +
  3 +test_description='read-tree D/F conflict corner cases'
  4 +
  5 +. ./test-lib.sh
  6 +
  7 +maketree () {
  8 + (
  9 + rm -f .git/index .git/index.lock &&
  10 + git clean -d -f -f -q -x &&
  11 + name="$1" &&
  12 + shift &&
  13 + for it
  14 + do
  15 + path=$(expr "$it" : '\([^:]*\)') &&
  16 + mkdir -p $(dirname "$path") &&
  17 + echo "$it" >"$path" &&
  18 + git update-index --add "$path" || exit
  19 + done &&
  20 + git tag "$name" $(git write-tree)
  21 + )
  22 +}
  23 +
  24 +settree () {
  25 + rm -f .git/index .git/index.lock &&
  26 + git clean -d -f -f -q -x &&
  27 + git read-tree "$1" &&
  28 + git checkout-index -f -q -u -a &&
  29 + git update-index --refresh
  30 +}
  31 +
  32 +checkindex () {
  33 + git ls-files -s |
  34 + sed "s|^[0-7][0-7]* $_x40 \([0-3]\) |\1 |" >current &&
  35 + cat >expect &&
  36 + test_cmp expect current
  37 +}
  38 +
  39 +test_expect_success setup '
  40 + maketree O-000 a/b-2/c/d a/b/c/d a/x &&
  41 + maketree A-000 a/b-2/c/d a/b/c/d a/x &&
  42 + maketree A-001 a/b-2/c/d a/b/c/d a/b/c/e a/x &&
  43 + maketree B-000 a/b-2/c/d a/b a/x &&
  44 +
  45 + maketree O-010 t-0 t/1 t/2 t=3 &&
  46 + maketree A-010 t-0 t t=3 &&
  47 + maketree B-010 t/1: t=3: &&
  48 +
  49 + maketree O-020 ds/dma/ioat.c ds/dma/ioat_dca.c &&
  50 + maketree A-020 ds/dma/ioat/Makefile ds/dma/ioat/registers.h &&
  51 + :
  52 +'
  53 +
  54 +test_expect_success '3-way (1)' '
  55 + settree A-000 &&
  56 + git read-tree -m -u O-000 A-000 B-000 &&
  57 + checkindex <<-EOF
  58 + 3 a/b
  59 + 0 a/b-2/c/d
  60 + 1 a/b/c/d
  61 + 2 a/b/c/d
  62 + 0 a/x
  63 + EOF
  64 +'
  65 +
  66 +test_expect_success '3-way (2)' '
  67 + settree A-001 &&
  68 + git read-tree -m -u O-000 A-001 B-000 &&
  69 + checkindex <<-EOF
  70 + 3 a/b
  71 + 0 a/b-2/c/d
  72 + 1 a/b/c/d
  73 + 2 a/b/c/d
  74 + 2 a/b/c/e
  75 + 0 a/x
  76 + EOF
  77 +'
  78 +
  79 +test_expect_success '3-way (3)' '
  80 + settree A-010 &&
  81 + git read-tree -m -u O-010 A-010 B-010 &&
  82 + checkindex <<-EOF
  83 + 2 t
  84 + 1 t-0
  85 + 2 t-0
  86 + 1 t/1
  87 + 3 t/1
  88 + 1 t/2
  89 + 0 t=3
  90 + EOF
  91 +'
  92 +
  93 +test_expect_success '2-way (1)' '
  94 + settree O-020 &&
  95 + git read-tree -m -u O-020 A-020 &&
  96 + checkindex <<-EOF
  97 + 0 ds/dma/ioat/Makefile
  98 + 0 ds/dma/ioat/registers.h
  99 + EOF
  100 +'
  101 +
  102 +test_done
2  t/t3100-ls-tree-restrict.sh
@@ -43,8 +43,6 @@ test_expect_success \
43 43 tree=`git write-tree` &&
44 44 echo $tree'
45 45
46   -_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
47   -_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
48 46 test_output () {
49 47 sed -e "s/ $_x40 / X /" <current >check
50 48 test_cmp expected check
2  t/t3101-ls-tree-dirname.sh
@@ -39,8 +39,6 @@ test_expect_success \
39 39 tree=`git write-tree` &&
40 40 echo $tree'
41 41
42   -_x05='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
43   -_x40="$_x05$_x05$_x05$_x05$_x05$_x05$_x05$_x05"
44 42 test_output () {
45 43 sed -e "s/ $_x40 / X /" <current >check
46 44 test_cmp expected check
2  t/t4006-diff-mode.sh
@@ -20,8 +20,6 @@ test_expect_success \
20 20 'test_chmod +x rezrov &&
21 21 git diff-index $tree >current'
22 22
23   -_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
24   -_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
25 23 sed -e 's/\(:100644 100755\) \('"$_x40"'\) \2 /\1 X X /' <current >check
26 24 echo ":100644 100755 X X M rezrov" >expected
27 25
3  t/t6012-rev-list-simplify.sh
@@ -8,9 +8,6 @@ note () {
8 8 git tag "$1"
9 9 }
10 10
11   -_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
12   -_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
13   -
14 11 unnote () {
15 12 git name-rev --tags --stdin | sed -e "s|$_x40 (tags/\([^)]*\)) |\1 |g"
16 13 }
4 t/t6035-merge-dir-to-symlink.sh
@@ -48,7 +48,7 @@ test_expect_success 'setup for merge test' '
@@ -74,7 +74,7 @@ test_expect_success 'setup a merge where dir a/b-2 changed to symlink' '
6 t/test-lib.sh
@@ -74,6 +74,12 @@ case $(echo $GIT_TRACE |tr "[A-Z]" "[a-z]") in
74 74 ;;
75 75 esac
76 76
  77 +# Convenience
  78 +#
  79 +# A regexp to match 5 and 40 hexdigits
  80 +_x05='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
  81 +_x40="$_x05$_x05$_x05$_x05$_x05$_x05$_x05$_x05"
  82 +
77 83 # Each test should start with something like this, after copyright notices:
78 84 #
79 85 # test_description='Description of this test...
277 tree-walk.c
@@ -60,13 +60,6 @@ void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1)
60 60 return buf;
61 61 }
62 62
63   -static int entry_compare(struct name_entry *a, struct name_entry *b)
64   -{
65   - return df_name_compare(
66   - a->path, tree_entry_len(a->path, a->sha1), a->mode,
67   - b->path, tree_entry_len(b->path, b->sha1), b->mode);
68   -}
69   -
70 63 static void entry_clear(struct name_entry *a)
71 64 {
72 65 memset(a, 0, sizeof(*a));
@@ -138,66 +131,264 @@ char *make_traverse_path(char *path, const struct traverse_info *info, const str
138 131 return path;
139 132 }
140 133
  134 +struct tree_desc_skip {
  135 + struct tree_desc_skip *prev;
  136 + const void *ptr;
  137 +};
  138 +
  139 +struct tree_desc_x {
  140 + struct tree_desc d;
  141 + struct tree_desc_skip *skip;
  142 +};
  143 +
  144 +static int name_compare(const char *a, int a_len,
  145 + const char *b, int b_len)
  146 +{
  147 + int len = (a_len < b_len) ? a_len : b_len;
  148 + int cmp = memcmp(a, b, len);
  149 + if (cmp)
  150 + return cmp;
  151 + return (a_len - b_len);
  152 +}
  153 +
  154 +static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
  155 +{
  156 + /*
  157 + * The caller wants to pick *a* from a tree or nothing.
  158 + * We are looking at *b* in a tree.
  159 + *
  160 + * (0) If a and b are the same name, we are trivially happy.
  161 + *
  162 + * There are three possibilities where *a* could be hiding
  163 + * behind *b*.
  164 + *
  165 + * (1) *a* == "t", *b* == "ab" i.e. *b* sorts earlier than *a* no
  166 + * matter what.
  167 + * (2) *a* == "t", *b* == "t-2" and "t" is a subtree in the tree;
  168 + * (3) *a* == "t-2", *b* == "t" and "t-2" is a blob in the tree.
  169 + *
  170 + * Otherwise we know *a* won't appear in the tree without
  171 + * scanning further.
  172 + */
  173 +
  174 + int cmp = name_compare(a, a_len, b, b_len);
  175 +
  176 + /* Most common case first -- reading sync'd trees */
  177 + if (!cmp)
  178 + return cmp;
  179 +
  180 + if (0 < cmp) {
  181 + /* a comes after b; it does not matter if it is case (3)
  182 + if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
  183 + return 1;
  184 + */
  185 + return 1; /* keep looking */
  186 + }
  187 +
  188 + /* b comes after a; are we looking at case (2)? */
  189 + if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
  190 + return 1; /* keep looking */
  191 +
  192 + return -1; /* a cannot appear in the tree */
  193 +}
  194 +
  195 +/*
  196 + * From the extended tree_desc, extract the first name entry, while
  197 + * paying attention to the candidate "first" name. Most importantly,
  198 + * when looking for an entry, if there are entries that sorts earlier
  199 + * in the tree object representation than that name, skip them and
  200 + * process the named entry first. We will remember that we haven't
  201 + * processed the first entry yet, and in the later call skip the
  202 + * entry we processed early when update_extended_entry() is called.
  203 + *
  204 + * E.g. if the underlying tree object has these entries:
  205 + *
  206 + * blob "t-1"
  207 + * blob "t-2"
  208 + * tree "t"
  209 + * blob "t=1"
  210 + *
  211 + * and the "first" asks for "t", remember that we still need to
  212 + * process "t-1" and "t-2" but extract "t". After processing the
  213 + * entry "t" from this call, the caller will let us know by calling
  214 + * update_extended_entry() that we can remember "t" has been processed
  215 + * already.
  216 + */
  217 +
  218 +static void extended_entry_extract(struct tree_desc_x *t,
  219 + struct name_entry *a,
  220 + const char *first,
  221 + int first_len)
  222 +{
  223 + const char *path;
  224 + int len;
  225 + struct tree_desc probe;
  226 + struct tree_desc_skip *skip;
  227 +
  228 + /*
  229 + * Extract the first entry from the tree_desc, but skip the
  230 + * ones that we already returned in earlier rounds.
  231 + */
  232 + while (1) {
  233 + if (!t->d.size) {
  234 + entry_clear(a);
  235 + break; /* not found */
  236 + }
  237 + entry_extract(&t->d, a);
  238 + for (skip = t->skip; skip; skip = skip->prev)
  239 + if (a->path == skip->ptr)
  240 + break; /* found */
  241 + if (!skip)
  242 + break;
  243 + /* We have processed this entry already. */
  244 + update_tree_entry(&t->d);
  245 + }
  246 +
  247 + if (!first || !a->path)
  248 + return;
  249 +
  250 + /*
  251 + * The caller wants "first" from this tree, or nothing.
  252 + */
  253 + path = a->path;
  254 + len = tree_entry_len(a->path, a->sha1);
  255 + switch (check_entry_match(first, first_len, path, len)) {
  256 + case -1:
  257 + entry_clear(a);
  258 + case 0:
  259 + return;
  260 + default:
  261 + break;
  262 + }
  263 +
  264 + /*
  265 + * We need to look-ahead -- we suspect that a subtree whose
  266 + * name is "first" may be hiding behind the current entry "path".
  267 + */
  268 + probe = t->d;
  269 + while (probe.size) {
  270 + entry_extract(&probe, a);
  271 + path = a->path;
  272 + len = tree_entry_len(a->path, a->sha1);
  273 + switch (check_entry_match(first, first_len, path, len)) {
  274 + case -1:
  275 + entry_clear(a);
  276 + case 0:
  277 + return;
  278 + default:
  279 + update_tree_entry(&probe);
  280 + break;
  281 + }
  282 + /* keep looking */
  283 + }
  284 + entry_clear(a);
  285 +}
  286 +
  287 +static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
  288 +{
  289 + if (t->d.entry.path == a->path) {
  290 + update_tree_entry(&t->d);
  291 + } else {
  292 + /* we have returned this entry early */
  293 + struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
  294 + skip->ptr = a->path;
  295 + skip->prev = t->skip;
  296 + t->skip = skip;
  297 + }
  298 +}
  299 +
  300 +static void free_extended_entry(struct tree_desc_x *t)
  301 +{
  302 + struct tree_desc_skip *p, *s;
  303 +
  304 + for (s = t->skip; s; s = p) {
  305 + p = s->prev;
  306 + free(s);
  307 + }
  308 +}
  309 +
141 310 int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
142 311 {
143 312 int ret = 0;
144 313 struct name_entry *entry = xmalloc(n*sizeof(*entry));
  314 + int i;
  315 + struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
  316 +
  317 + for (i = 0; i < n; i++)
  318 + tx[i].d = t[i];
145 319
146 320 for (;;) {
147   - unsigned long mask = 0;
148   - unsigned long dirmask = 0;
149   - int i, last;
  321 + unsigned long mask, dirmask;
  322 + const char *first = NULL;
  323 + int first_len = 0;
  324 + struct name_entry *e;
  325 + int len;
150 326
151   - last = -1;
152 327 for (i = 0; i < n; i++) {
153   - if (!t[i].size)
  328 + e = entry + i;
  329 + extended_entry_extract(tx + i, e, NULL, 0);
  330 + }
  331 +
  332 + /*
  333 + * A tree may have "t-2" at the current location even
  334 + * though it may have "t" that is a subtree behind it,
  335 + * and another tree may return "t". We want to grab
  336 + * all "t" from all trees to match in such a case.
  337 + */
  338 + for (i = 0; i < n; i++) {
  339 + e = entry + i;
  340 + if (!e->path)
154 341 continue;
155   - entry_extract(t+i, entry+i);
156   - if (last >= 0) {
157   - int cmp = entry_compare(entry+i, entry+last);
158   -
159   - /*
160   - * Is the new name bigger than the old one?
161   - * Ignore it
162   - */
163   - if (cmp > 0)
  342 + len = tree_entry_len(e->path, e->sha1);
  343 + if (!first) {
  344 + first = e->path;
  345 + first_len = len;
  346 + continue;
  347 + }
  348 + if (name_compare(e->path, len, first, first_len) < 0) {
  349 + first = e->path;
  350 + first_len = len;
  351 + }
  352 + }
  353 +
  354 + if (first) {
  355 + for (i = 0; i < n; i++) {
  356 + e = entry + i;
  357 + extended_entry_extract(tx + i, e, first, first_len);
  358 + /* Cull the ones that are not the earliest */
  359 + if (!e->path)
164 360 continue;
165   - /*
166   - * Is the new name smaller than the old one?
167   - * Ignore all old ones
168   - */
169   - if (cmp < 0)
170   - mask = 0;
  361 + len = tree_entry_len(e->path, e->sha1);
  362 + if (name_compare(e->path, len, first, first_len))
  363 + entry_clear(e);
171 364 }
  365 + }
  366 +
  367 + /* Now we have in entry[i] the earliest name from the trees */
  368 + mask = 0;
  369 + dirmask = 0;
  370 + for (i = 0; i < n; i++) {
  371 + if (!entry[i].path)
  372 + continue;
172 373 mask |= 1ul << i;
173 374 if (S_ISDIR(entry[i].mode))
174 375 dirmask |= 1ul << i;
175   - last = i;
176 376 }
177 377 if (!mask)
178 378 break;
179   - dirmask &= mask;
180   -
181   - /*
182   - * Clear all the unused name-entries.
183   - */
184   - for (i = 0; i < n; i++) {
185   - if (mask & (1ul << i))
186   - continue;
187   - entry_clear(entry + i);
188   - }
189 379 ret = info->fn(n, mask, dirmask, entry, info);
190 380 if (ret < 0)
191 381 break;
192   - if (ret)
193   - mask &= ret;
  382 + mask &= ret;
194 383 ret = 0;
195   - for (i = 0; i < n; i++) {
  384 + for (i = 0; i < n; i++)
196 385 if (mask & (1ul << i))
197   - update_tree_entry(t + i);
198   - }
  386 + update_extended_entry(tx + i, entry + i);
199 387 }
200 388 free(entry);
  389 + for (i = 0; i < n; i++)
  390 + free_extended_entry(tx + i);
  391 + free(tx);
201 392 return ret;
202 393 }
203 394
389 unpack-trees.c
@@ -198,23 +198,142 @@ static inline int call_unpack_fn(struct cache_entry **src, struct unpack_trees_o
198 198 return ret;
199 199 }
200 200
201   -static int unpack_index_entry(struct cache_entry *ce, struct unpack_trees_options *o)
  201 +static void mark_ce_used(struct cache_entry *ce, struct unpack_trees_options *o)
  202 +{
  203 + ce->ce_flags |= CE_UNPACKED;
  204 +
  205 + if (o->cache_bottom < o->src_index->cache_nr &&
  206 + o->src_index->cache[o->cache_bottom] == ce) {
  207 + int bottom = o->cache_bottom;
  208 + while (bottom < o->src_index->cache_nr &&
  209 + o->src_index->cache[bottom]->ce_flags & CE_UNPACKED)
  210 + bottom++;
  211 + o->cache_bottom = bottom;
  212 + }
  213 +}
  214 +
  215 +static void mark_all_ce_unused(struct index_state *index)
  216 +{
  217 + int i;
  218 + for (i = 0; i < index->cache_nr; i++)
  219 + index->cache[i]->ce_flags &= ~CE_UNPACKED;
  220 +}
  221 +
  222 +static int locate_in_src_index(struct cache_entry *ce,
  223 + struct unpack_trees_options *o)
  224 +{
  225 + struct index_state *index = o->src_index;
  226 + int len = ce_namelen(ce);
  227 + int pos = index_name_pos(index, ce->name, len);
  228 + if (pos < 0)
  229 + pos = -1 - pos;
  230 + return pos;
  231 +}
  232 +
  233 +/*
  234 + * We call unpack_index_entry() with an unmerged cache entry
  235 + * only in diff-index, and it wants a single callback. Skip
  236 + * the other unmerged entry with the same name.
  237 + */
  238 +static void mark_ce_used_same_name(struct cache_entry *ce,
  239 + struct unpack_trees_options *o)
  240 +{
  241 + struct index_state *index = o->src_index;
  242 + int len = ce_namelen(ce);
  243 + int pos;
  244 +
  245 + for (pos = locate_in_src_index(ce, o); pos < index->cache_nr; pos++) {
  246 + struct cache_entry *next = index->cache[pos];
  247 + if (len != ce_namelen(next) ||
  248 + memcmp(ce->name, next->name, len))
  249 + break;
  250 + mark_ce_used(next, o);
  251 + }
  252 +}
  253 +
  254 +static struct cache_entry *next_cache_entry(struct unpack_trees_options *o)
  255 +{
  256 + const struct index_state *index = o->src_index;
  257 + int pos = o->cache_bottom;
  258 +
  259 + while (pos < index->cache_nr) {
  260 + struct cache_entry *ce = index->cache[pos];
  261 + if (!(ce->ce_flags & CE_UNPACKED))
  262 + return ce;
  263 + pos++;
  264 + }
  265 + return NULL;
  266 +}
  267 +
  268 +static void add_same_unmerged(struct cache_entry *ce,
  269 + struct unpack_trees_options *o)
  270 +{
  271 + struct index_state *index = o->src_index;
  272 + int len = ce_namelen(ce);
  273 + int pos = index_name_pos(index, ce->name, len);
  274 +
  275 + if (0 <= pos)
  276 + die("programming error in a caller of mark_ce_used_same_name");
  277 + for (pos = -pos - 1; pos < index->cache_nr; pos++) {
  278 + struct cache_entry *next = index->cache[pos];
  279 + if (len != ce_namelen(next) ||
  280 + memcmp(ce->name, next->name, len))
  281 + break;
  282 + add_entry(o, next, 0, 0);
  283 + mark_ce_used(next, o);
  284 + }
  285 +}
  286 +
  287 +static int unpack_index_entry(struct cache_entry *ce,
  288 + struct unpack_trees_options *o)
202 289 {
203 290 struct cache_entry *src[5] = { ce, NULL, };
  291 + int ret;
204 292
205   - o->pos++;
  293 + mark_ce_used(ce, o);
206 294 if (ce_stage(ce)) {
207 295 if (o->skip_unmerged) {
208 296 add_entry(o, ce, 0, 0);
209 297 return 0;
210 298 }
211 299 }
212   - return call_unpack_fn(src, o);
  300 + ret = call_unpack_fn(src, o);
  301 + if (ce_stage(ce))
  302 + mark_ce_used_same_name(ce, o);
  303 + return ret;
  304 +}
  305 +
  306 +static int find_cache_pos(struct traverse_info *, const struct name_entry *);
  307 +
  308 +static void restore_cache_bottom(struct traverse_info *info, int bottom)
  309 +{
  310 + struct unpack_trees_options *o = info->data;
  311 +
  312 + if (o->diff_index_cached)
  313 + return;
  314 + o->cache_bottom = bottom;
  315 +}
  316 +
  317 +static int switch_cache_bottom(struct traverse_info *info)
  318 +{
  319 + struct unpack_trees_options *o = info->data;
  320 + int ret, pos;
  321 +
  322 + if (o->diff_index_cached)
  323 + return 0;
  324 + ret = o->cache_bottom;
  325 + pos = find_cache_pos(info->prev, &info->name);
  326 +
  327 + if (pos < -1)
  328 + o->cache_bottom = -2 - pos;
  329 + else if (pos < 0)
  330 + o->cache_bottom = o->src_index->cache_nr;
  331 + return ret;
213 332 }
214 333
215 334 static int traverse_trees_recursive(int n, unsigned long dirmask, unsigned long df_conflicts, struct name_entry *names, struct traverse_info *info)
216 335 {
217   - int i;
  336 + int i, ret, bottom;
218 337 struct tree_desc t[MAX_UNPACK_TREES];
219 338 struct traverse_info newinfo;
220 339 struct name_entry *p;
@@ -235,7 +354,11 @@ static int traverse_trees_recursive(int n, unsigned long dirmask, unsigned long
235 354 sha1 = names[i].sha1;
236 355 fill_tree_descriptor(t+i, sha1);
237 356 }
238   - return traverse_trees(n, t, &newinfo);
  357 +
  358 + bottom = switch_cache_bottom(&newinfo);
  359 + ret = traverse_trees(n, t, &newinfo);
  360 + restore_cache_bottom(&newinfo, bottom);
  361 + return ret;
239 362 }
240 363
241 364 /*
@@ -284,6 +407,20 @@ static int compare_entry(const struct cache_entry *ce, const struct traverse_inf
284 407 return ce_namelen(ce) > traverse_path_len(info, n);
285 408 }
286 409
  410 +static int ce_in_traverse_path(const struct cache_entry *ce,
  411 + const struct traverse_info *info)
  412 +{
  413 + if (!info->prev)
  414 + return 1;
  415 + if (do_compare_entry(ce, info->prev, &info->name))
  416 + return 0;
  417 + /*
  418 + * If ce (blob) is the same name as the path (which is a tree
  419 + * we will be descending into), it won't be inside it.
  420 + */
  421 + return (info->pathlen < ce_namelen(ce));
  422 +}
  423 +
287 424 static struct cache_entry *create_ce_entry(const struct traverse_info *info, const struct name_entry *n, int stage)
288 425 {
289 426 int len = traverse_path_len(info, n);
@@ -360,6 +497,114 @@ static int unpack_failed(struct unpack_trees_options *o, const char *message)
360 497 return -1;
361 498 }
362 499
  500 +/* NEEDSWORK: give this a better name and share with tree-walk.c */
  501 +static int name_compare(const char *a, int a_len,
  502 + const char *b, int b_len)
  503 +{
  504 + int len = (a_len < b_len) ? a_len : b_len;
  505 + int cmp = memcmp(a, b, len);
  506 + if (cmp)
  507 + return cmp;
  508 + return (a_len - b_len);
  509 +}
  510 +
  511 +/*
  512 + * The tree traversal is looking at name p. If we have a matching entry,
  513 + * return it. If name p is a directory in the index, do not return
  514 + * anything, as we will want to match it when the traversal descends into
  515 + * the directory.
  516 + */
  517 +static int find_cache_pos(struct traverse_info *info,
  518 + const struct name_entry *p)
  519 +{
  520 + int pos;
  521 + struct unpack_trees_options *o = info->data;
  522 + struct index_state *index = o->src_index;
  523 + int pfxlen = info->pathlen;
  524 + int p_len = tree_entry_len(p->path, p->sha1);
  525 +
  526 + for (pos = o->cache_bottom; pos < index->cache_nr; pos++) {
  527 + struct cache_entry *ce = index->cache[pos];
  528 + const char *ce_name, *ce_slash;
  529 + int cmp, ce_len;
  530 +
  531 + if (!ce_in_traverse_path(ce, info))
  532 + continue;
  533 + if (ce->ce_flags & CE_UNPACKED)
  534 + continue;
  535 + ce_name = ce->name + pfxlen;
  536 + ce_slash = strchr(ce_name, '/');
  537 + if (ce_slash)
  538 + ce_len = ce_slash - ce_name;
  539 + else
  540 + ce_len = ce_namelen(ce) - pfxlen;
  541 + cmp = name_compare(p->path, p_len, ce_name, ce_len);
  542 + /*
  543 + * Exact match; if we have a directory we need to
  544 + * delay returning it.
  545 + */
  546 + if (!cmp)
  547 + return ce_slash ? -2 - pos : pos;
  548 + if (0 < cmp)
  549 + continue; /* keep looking */
  550 + /*
  551 + * ce_name sorts after p->path; could it be that we
  552 + * have files under p->path directory in the index?
  553 + * E.g. ce_name == "t-i", and p->path == "t"; we may
  554 + * have "t/a" in the index.
  555 + */
  556 + if (p_len < ce_len && !memcmp(ce_name, p->path, p_len) &&
  557 + ce_name[p_len] < '/')
  558 + continue; /* keep looking */
  559 + break;
  560 + }
  561 + return -1;
  562 +}
  563 +
  564 +static struct cache_entry *find_cache_entry(struct traverse_info *info,
  565 + const struct name_entry *p)
  566 +{
  567 + int pos = find_cache_pos(info, p);
  568 + struct unpack_trees_options *o = info->data;
  569 +
  570 + if (0 <= pos)
  571 + return o->src_index->cache[pos];
  572 + else
  573 + return NULL;
  574 +}
  575 +
  576 +static void debug_path(struct traverse_info *info)
  577 +{
  578 + if (info->prev) {
  579 + debug_path(info->prev);
  580 + if (*info->prev->name.path)
  581 + putchar('/');
  582 + }
  583 + printf("%s", info->name.path);
  584 +}
  585 +
  586 +static void debug_name_entry(int i, struct name_entry *n)
  587 +{
  588 + printf("ent#%d %06o %s\n", i,
  589 + n->path ? n->mode : 0,
  590 + n->path ? n->path : "(missing)");
  591 +}
  592 +
  593 +static void debug_unpack_callback(int n,
  594 + unsigned long mask,
  595 + unsigned long dirmask,
  596 + struct name_entry *names,
  597 + struct traverse_info *info)
  598 +{
  599 + int i;
  600 + printf("* unpack mask %lu, dirmask %lu, cnt %d ",
  601 + mask, dirmask, n);
  602 + debug_path(info);
  603 + putchar('\n');
  604 + for (i = 0; i < n; i++)
  605 + debug_name_entry(i, names + i);
  606 +}
  607 +
363 608 static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, struct traverse_info *info)
364 609 {
365 610 struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
@@ -370,25 +615,38 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
370 615 while (!p->mode)
371 616 p++;
372 617
  618 + if (o->debug_unpack)
  619 + debug_unpack_callback(n, mask, dirmask, names, info);
  620 +
373 621 /* Are we supposed to look at the index too? */
374 622 if (o->merge) {
375   - while (o->pos < o->src_index->cache_nr) {
376   - struct cache_entry *ce = o->src_index->cache[o->pos];
377   - int cmp = compare_entry(ce, info, p);
  623 + while (1) {
  624 + int cmp;
  625 + struct cache_entry *ce;
  626 +
  627 + if (o->diff_index_cached)
  628 + ce = next_cache_entry(o);
  629 + else
  630 + ce = find_cache_entry(info, p);
  631 +
  632 + if (!ce)
  633 + break;
  634 + cmp = compare_entry(ce, info, p);
378 635 if (cmp < 0) {
379 636 if (unpack_index_entry(ce, o) < 0)
380 637 return unpack_failed(o, NULL);
381 638 continue;
382 639 }
383 640 if (!cmp) {
384   - o->pos++;
385 641 if (ce_stage(ce)) {
386 642 /*
387   - * If we skip unmerged index entries, we'll skip this
388   - * entry *and* the tree entries associated with it!
  643 + * If we skip unmerged index
  644 + * entries, we'll skip this
  645 + * entry *and* the tree
  646 + * entries associated with it!
389 647 */
390 648 if (o->skip_unmerged) {
391   - add_entry(o, ce, 0, 0);
  649 + add_same_unmerged(ce, o);
392 650 return mask;
393 651 }
394 652 }
@@ -401,6 +659,13 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
401 659 if (unpack_nondirectories(n, mask, dirmask, src, names, info) < 0)
402 660 return -1;
403 661
  662 + if (src[0]) {
  663 + if (ce_stage(src[0]))
  664 + mark_ce_used_same_name(src[0], o);
  665 + else
  666 + mark_ce_used(src[0], o);
  667 + }
  668 +
404 669 /* Now handle any directories.. */
405 670 if (dirmask) {
406 671 unsigned long conflicts = mask & ~dirmask;
@@ -417,11 +682,13 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
417 682 matches = cache_tree_matches_traversal(o->src_index->cache_tree,
418 683 names, info);
419 684 /*
420   - * Everything under the name matches. Adjust o->pos to
421   - * skip the entire hierarchy.
  685 + * Everything under the name matches; skip the
  686 + * entire hierarchy. diff_index_cached codepath
  687 + * special cases D/F conflicts in such a way that
  688 + * it does not do any look-ahead, so this is safe.
422 689 */
423 690 if (matches) {
424   - o->pos += matches;
  691 + o->cache_bottom += matches;
425 692 return mask;
426 693 }
427 694 }
@@ -465,11 +732,10 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
465 732
466 733 memset(&o->result, 0, sizeof(o->result));
467 734 o->result.initialized = 1;
468   - if (o->src_index) {
469   - o->result.timestamp.sec = o->src_index->timestamp.sec;
470   - o->result.timestamp.nsec = o->src_index->timestamp.nsec;
471   - }
  735 + o->result.timestamp.sec = o->src_index->timestamp.sec;
  736 + o->result.timestamp.nsec = o->src_index->timestamp.nsec;
472 737 o->merge_size = len;
  738 + mark_all_ce_unused(o->src_index);
473 739
474 740 if (!dfc)
475 741 dfc = xcalloc(1, cache_entry_size(0));
@@ -483,22 +749,38 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
483 749 info.fn = unpack_callback;
484 750 info.data = o;
485 751
486   - if (traverse_trees(len, t, &info) < 0) {
487   - ret = unpack_failed(o, NULL);
488   - goto done;
  752 + if (o->prefix) {
  753 + /*
  754 + * Unpack existing index entries that sort before the
  755 + * prefix the tree is spliced into. Note that o->merge
  756 + * is always true in this case.
  757 + */
  758 + while (1) {
  759 + struct cache_entry *ce = next_cache_entry(o);
  760 + if (!ce)
  761 + break;
  762 + if (ce_in_traverse_path(ce, &info))
  763 + break;
  764 + if (unpack_index_entry(ce, o) < 0)
  765 + goto return_failed;
  766 + }
489 767 }
  768 +
  769 + if (traverse_trees(len, t, &info) < 0)
  770 + goto return_failed;
490 771 }
491 772
492 773 /* Any left-over entries in the index? */
493 774 if (o->merge) {
494   - while (o->pos < o->src_index->cache_nr) {
495   - struct cache_entry *ce = o->src_index->cache[o->pos];
496   - if (unpack_index_entry(ce, o) < 0) {
497   - ret = unpack_failed(o, NULL);
498   - goto done;
499   - }
  775 + while (1) {
  776 + struct cache_entry *ce = next_cache_entry(o);
  777 + if (!ce)
  778 + break;
  779 + if (unpack_index_entry(ce, o) < 0)
  780 + goto return_failed;
500 781 }
501 782 }
  783 + mark_all_ce_unused(o->src_index);
502 784
503 785 if (o->trivial_merges_only && o->nontrivial_merge) {
504 786 ret = unpack_failed(o, "Merge requires file-level merging");
@@ -543,6 +825,11 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
543 825 free(el.excludes);
544 826
545 827 return ret;
  828 +
  829 +return_failed:
  830 + mark_all_ce_unused(o->src_index);
  831 + ret = unpack_failed(o, NULL);
  832 + goto done;
546 833 }
547 834
548 835 /* Here come the merge functions */
@@ -661,7 +948,9 @@ static int verify_clean_subdirectory(struct cache_entry *ce, const char *action,
661 948 * in that directory.
662 949 */
663 950 namelen = strlen(ce->name);
664   - for (i = o->pos; i < o->src_index->cache_nr; i++) {
  951 + for (i = locate_in_src_index(ce, o);
  952 + i < o->src_index->cache_nr;
  953 + i++) {
665 954 struct cache_entry *ce2 = o->src_index->cache[i];
666 955 int len = ce_namelen(ce2);
667 956 if (len < namelen ||
@@ -669,12 +958,14 @@ static int verify_clean_subdirectory(struct cache_entry *ce, const char *action,
669 958 ce2->name[namelen] != '/')
670 959 break;
671 960 /*
672   - * ce2->name is an entry in the subdirectory.
  961 + * ce2->name is an entry in the subdirectory to be
  962 + * removed.
673 963 */
674 964 if (!ce_stage(ce2)) {
675 965 if (verify_uptodate(ce2, o))
676 966 return -1;
677 967 add_entry(o, ce2, CE_REMOVE, 0);
  968 + mark_ce_used(ce2, o);
678 969 }
679 970 cnt++;
680 971 }
@@ -731,7 +1022,6 @@ static int verify_absent_1(struct cache_entry *ce, const char *action,
731 1022 return 0;
732 1023
733 1024 if (!lstat(ce->name, &st)) {
734   - int ret;
735 1025 int dtype = ce_to_dtype(ce);
736 1026 struct cache_entry *result;
737 1027
@@ -759,28 +1049,8 @@ static int verify_absent_1(struct cache_entry *ce, const char *action,
759 1049 * files that are in "foo/" we would lose
760 1050 * them.
761 1051 */
762   - ret = verify_clean_subdirectory(ce, action, o);
763   - if (ret < 0)
764   - return ret;
765   -
766   - /*
767   - * If this removed entries from the index,
768   - * what that means is:
769   - *
770   - * (1) the caller unpack_callback() saw path/foo
771   - * in the index, and it has not removed it because
772   - * it thinks it is handling 'path' as blob with
773   - * D/F conflict;
774   - * (2) we will return "ok, we placed a merged entry
775   - * in the index" which would cause o->pos to be
776   - * incremented by one;
777   - * (3) however, original o->pos now has 'path/foo'
778   - * marked with "to be removed".
779   - *
780   - * We need to increment it by the number of
781   - * deleted entries here.
782   - */
783   - o->pos += ret;
  1052 + if (verify_clean_subdirectory(ce, action, o) < 0)
  1053 + return -1;
784 1054 return 0;
785 1055 }
786 1056
@@ -927,7 +1197,8 @@ int threeway_merge(struct cache_entry **stages, struct unpack_trees_options *o)
927 1197 remote = NULL;
928 1198 }
929 1199
930   - /* First, if there's a #16 situation, note that to prevent #13
  1200 + /*
  1201 + * First, if there's a #16 situation, note that to prevent #13
931 1202 * and #14.
932 1203 */
933 1204 if (!same(remote, head)) {
@@ -941,7 +1212,8 @@ int threeway_merge(struct cache_entry **stages, struct unpack_trees_options *o)
941 1212 }
942 1213 }
943 1214
944   - /* We start with cases where the index is allowed to match
  1215 + /*
  1216 + * We start with cases where the index is allowed to match
945 1217 * something other than the head: #14(ALT) and #2ALT, where it
946 1218 * is permitted to match the result instead.
947 1219 */
@@ -971,12 +1243,13 @@ int threeway_merge(struct cache_entry **stages, struct unpack_trees_options *o)
971 1243 if (!head && !remote && any_anc_missing)
972 1244 return 0;
973 1245