Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Merge branch 'kb/name-hash'

The code to keep track of what directory names are known to Git on
platforms with case insensitive filesystems can get confused upon
a hash collision between these pathnames and looped forever.

* kb/name-hash:
  name-hash.c: fix endless loop with core.ignorecase=true
  • Loading branch information...
commit c044bed8f0ed0275792cf66201579e42c0de7171 2 parents e818905 + 2092678
Junio C Hamano gitster authored
17 cache.h
@@ -132,7 +132,6 @@ struct cache_entry {
132 132 unsigned int ce_namelen;
133 133 unsigned char sha1[20];
134 134 struct cache_entry *next;
135   - struct cache_entry *dir_next;
136 135 char name[FLEX_ARRAY]; /* more */
137 136 };
138 137
@@ -268,25 +267,15 @@ struct index_state {
268 267 unsigned name_hash_initialized : 1,
269 268 initialized : 1;
270 269 struct hash_table name_hash;
  270 + struct hash_table dir_hash;
271 271 };
272 272
273 273 extern struct index_state the_index;
274 274
275 275 /* Name hashing */
276 276 extern void add_name_hash(struct index_state *istate, struct cache_entry *ce);
277   -/*
278   - * We don't actually *remove* it, we can just mark it invalid so that
279   - * we won't find it in lookups.
280   - *
281   - * Not only would we have to search the lists (simple enough), but
282   - * we'd also have to rehash other hash buckets in case this makes the
283   - * hash bucket empty (common). So it's much better to just mark
284   - * it.
285   - */
286   -static inline void remove_name_hash(struct cache_entry *ce)
287   -{
288   - ce->ce_flags |= CE_UNHASHED;
289   -}
  277 +extern void remove_name_hash(struct index_state *istate, struct cache_entry *ce);
  278 +extern void free_name_hash(struct index_state *istate);
290 279
291 280
292 281 #ifndef NO_THE_INDEX_COMPATIBILITY_MACROS
182 name-hash.c
@@ -32,38 +32,96 @@ static unsigned int hash_name(const char *name, int namelen)
32 32 return hash;
33 33 }
34 34
35   -static void hash_index_entry_directories(struct index_state *istate, struct cache_entry *ce)
  35 +struct dir_entry {
  36 + struct dir_entry *next;
  37 + struct dir_entry *parent;
  38 + struct cache_entry *ce;
  39 + int nr;
  40 + unsigned int namelen;
  41 +};
  42 +
  43 +static struct dir_entry *find_dir_entry(struct index_state *istate,
  44 + const char *name, unsigned int namelen)
  45 +{
  46 + unsigned int hash = hash_name(name, namelen);
  47 + struct dir_entry *dir;
  48 +
  49 + for (dir = lookup_hash(hash, &istate->dir_hash); dir; dir = dir->next)
  50 + if (dir->namelen == namelen &&
  51 + !strncasecmp(dir->ce->name, name, namelen))
  52 + return dir;
  53 + return NULL;
  54 +}
  55 +
  56 +static struct dir_entry *hash_dir_entry(struct index_state *istate,
  57 + struct cache_entry *ce, int namelen)
36 58 {
37 59 /*
38 60 * Throw each directory component in the hash for quick lookup
39 61 * during a git status. Directory components are stored with their
40 62 * closing slash. Despite submodules being a directory, they never
41 63 * reach this point, because they are stored without a closing slash
42   - * in the cache.
  64 + * in index_state.name_hash (as ordinary cache_entries).
43 65 *
44   - * Note that the cache_entry stored with the directory does not
45   - * represent the directory itself. It is a pointer to an existing
46   - * filename, and its only purpose is to represent existence of the
47   - * directory in the cache. It is very possible multiple directory
48   - * hash entries may point to the same cache_entry.
  66 + * Note that the cache_entry stored with the dir_entry merely
  67 + * supplies the name of the directory (up to dir_entry.namelen). We
  68 + * track the number of 'active' files in a directory in dir_entry.nr,
  69 + * so we can tell if the directory is still relevant, e.g. for git
  70 + * status. However, if cache_entries are removed, we cannot pinpoint
  71 + * an exact cache_entry that's still active. It is very possible that
  72 + * multiple dir_entries point to the same cache_entry.
49 73 */
50   - unsigned int hash;
51   - void **pos;
  74 + struct dir_entry *dir;
  75 +
  76 + /* get length of parent directory */
  77 + while (namelen > 0 && !is_dir_sep(ce->name[namelen - 1]))
  78 + namelen--;
  79 + if (namelen <= 0)
  80 + return NULL;
  81 +
  82 + /* lookup existing entry for that directory */
  83 + dir = find_dir_entry(istate, ce->name, namelen);
  84 + if (!dir) {
  85 + /* not found, create it and add to hash table */
  86 + void **pdir;
  87 + unsigned int hash = hash_name(ce->name, namelen);
52 88
53   - const char *ptr = ce->name;
54   - while (*ptr) {
55   - while (*ptr && *ptr != '/')
56   - ++ptr;
57   - if (*ptr == '/') {
58   - ++ptr;
59   - hash = hash_name(ce->name, ptr - ce->name);
60   - pos = insert_hash(hash, ce, &istate->name_hash);
61   - if (pos) {
62   - ce->dir_next = *pos;
63   - *pos = ce;
64   - }
  89 + dir = xcalloc(1, sizeof(struct dir_entry));
  90 + dir->namelen = namelen;
  91 + dir->ce = ce;
  92 +
  93 + pdir = insert_hash(hash, dir, &istate->dir_hash);
  94 + if (pdir) {
  95 + dir->next = *pdir;
  96 + *pdir = dir;
65 97 }
  98 +
  99 + /* recursively add missing parent directories */
  100 + dir->parent = hash_dir_entry(istate, ce, namelen - 1);
66 101 }
  102 + return dir;
  103 +}
  104 +
  105 +static void add_dir_entry(struct index_state *istate, struct cache_entry *ce)
  106 +{
  107 + /* Add reference to the directory entry (and parents if 0). */
  108 + struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
  109 + while (dir && !(dir->nr++))
  110 + dir = dir->parent;
  111 +}
  112 +
  113 +static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce)
  114 +{
  115 + /*
  116 + * Release reference to the directory entry (and parents if 0).
  117 + *
  118 + * Note: we do not remove / free the entry because there's no
  119 + * hash.[ch]::remove_hash and dir->next may point to other entries
  120 + * that are still valid, so we must not free the memory.
  121 + */
  122 + struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
  123 + while (dir && dir->nr && !(--dir->nr))
  124 + dir = dir->parent;
67 125 }
68 126
69 127 static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
@@ -74,7 +132,7 @@ static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
74 132 if (ce->ce_flags & CE_HASHED)
75 133 return;
76 134 ce->ce_flags |= CE_HASHED;
77   - ce->next = ce->dir_next = NULL;
  135 + ce->next = NULL;
78 136 hash = hash_name(ce->name, ce_namelen(ce));
79 137 pos = insert_hash(hash, ce, &istate->name_hash);
80 138 if (pos) {
@@ -82,8 +140,8 @@ static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
82 140 *pos = ce;
83 141 }
84 142
85   - if (ignore_case)
86   - hash_index_entry_directories(istate, ce);
  143 + if (ignore_case && !(ce->ce_flags & CE_UNHASHED))
  144 + add_dir_entry(istate, ce);
87 145 }
88 146
89 147 static void lazy_init_name_hash(struct index_state *istate)
@@ -101,11 +159,33 @@ static void lazy_init_name_hash(struct index_state *istate)
101 159
102 160 void add_name_hash(struct index_state *istate, struct cache_entry *ce)
103 161 {
  162 + /* if already hashed, add reference to directory entries */
  163 + if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_STATE_MASK)
  164 + add_dir_entry(istate, ce);
  165 +
104 166 ce->ce_flags &= ~CE_UNHASHED;
105 167 if (istate->name_hash_initialized)
106 168 hash_index_entry(istate, ce);
107 169 }
108 170
  171 +/*
  172 + * We don't actually *remove* it, we can just mark it invalid so that
  173 + * we won't find it in lookups.
  174 + *
  175 + * Not only would we have to search the lists (simple enough), but
  176 + * we'd also have to rehash other hash buckets in case this makes the
  177 + * hash bucket empty (common). So it's much better to just mark
  178 + * it.
  179 + */
  180 +void remove_name_hash(struct index_state *istate, struct cache_entry *ce)
  181 +{
  182 + /* if already hashed, release reference to directory entries */
  183 + if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_HASHED)
  184 + remove_dir_entry(istate, ce);
  185 +
  186 + ce->ce_flags |= CE_UNHASHED;
  187 +}
  188 +
109 189 static int slow_same_name(const char *name1, int len1, const char *name2, int len2)
110 190 {
111 191 if (len1 != len2)
@@ -139,18 +219,7 @@ static int same_name(const struct cache_entry *ce, const char *name, int namelen
139 219 if (!icase)
140 220 return 0;
141 221
142   - /*
143   - * If the entry we're comparing is a filename (no trailing slash), then compare
144   - * the lengths exactly.
145   - */
146   - if (name[namelen - 1] != '/')
147   - return slow_same_name(name, namelen, ce->name, len);
148   -
149   - /*
150   - * For a directory, we point to an arbitrary cache_entry filename. Just
151   - * make sure the directory portion matches.
152   - */
153   - return slow_same_name(name, namelen, ce->name, namelen < len ? namelen : len);
  222 + return slow_same_name(name, namelen, ce->name, len);
154 223 }
155 224
156 225 struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase)
@@ -166,27 +235,54 @@ struct cache_entry *index_name_exists(struct index_state *istate, const char *na
166 235 if (same_name(ce, name, namelen, icase))
167 236 return ce;
168 237 }
169   - if (icase && name[namelen - 1] == '/')
170   - ce = ce->dir_next;
171   - else
172   - ce = ce->next;
  238 + ce = ce->next;
173 239 }
174 240
175 241 /*
176   - * Might be a submodule. Despite submodules being directories,
  242 + * When looking for a directory (trailing '/'), it might be a
  243 + * submodule or a directory. Despite submodules being directories,
177 244 * they are stored in the name hash without a closing slash.
178   - * When ignore_case is 1, directories are stored in the name hash
179   - * with their closing slash.
  245 + * When ignore_case is 1, directories are stored in a separate hash
  246 + * table *with* their closing slash.
180 247 *
181 248 * The side effect of this storage technique is we have need to
  249 + * lookup the directory in a separate hash table, and if not found
182 250 * remove the slash from name and perform the lookup again without
183 251 * the slash. If a match is made, S_ISGITLINK(ce->mode) will be
184 252 * true.
185 253 */
186 254 if (icase && name[namelen - 1] == '/') {
  255 + struct dir_entry *dir = find_dir_entry(istate, name, namelen);
  256 + if (dir && dir->nr)
  257 + return dir->ce;
  258 +
187 259 ce = index_name_exists(istate, name, namelen - 1, icase);
188 260 if (ce && S_ISGITLINK(ce->ce_mode))
189 261 return ce;
190 262 }
191 263 return NULL;
192 264 }
  265 +
  266 +static int free_dir_entry(void *entry, void *unused)
  267 +{
  268 + struct dir_entry *dir = entry;
  269 + while (dir) {
  270 + struct dir_entry *next = dir->next;
  271 + free(dir);
  272 + dir = next;
  273 + }
  274 + return 0;
  275 +}
  276 +
  277 +void free_name_hash(struct index_state *istate)
  278 +{
  279 + if (!istate->name_hash_initialized)
  280 + return;
  281 + istate->name_hash_initialized = 0;
  282 + if (ignore_case)
  283 + /* free directory entries */
  284 + for_each_hash(&istate->dir_hash, free_dir_entry, NULL);
  285 +
  286 + free_hash(&istate->name_hash);
  287 + free_hash(&istate->dir_hash);
  288 +}
9 read-cache.c
@@ -46,7 +46,7 @@ static void replace_index_entry(struct index_state *istate, int nr, struct cache
46 46 {
47 47 struct cache_entry *old = istate->cache[nr];
48 48
49   - remove_name_hash(old);
  49 + remove_name_hash(istate, old);
50 50 set_index_entry(istate, nr, ce);
51 51 istate->cache_changed = 1;
52 52 }
@@ -460,7 +460,7 @@ int remove_index_entry_at(struct index_state *istate, int pos)
460 460 struct cache_entry *ce = istate->cache[pos];
461 461
462 462 record_resolve_undo(istate, ce);
463   - remove_name_hash(ce);
  463 + remove_name_hash(istate, ce);
464 464 istate->cache_changed = 1;
465 465 istate->cache_nr--;
466 466 if (pos >= istate->cache_nr)
@@ -483,7 +483,7 @@ void remove_marked_cache_entries(struct index_state *istate)
483 483
484 484 for (i = j = 0; i < istate->cache_nr; i++) {
485 485 if (ce_array[i]->ce_flags & CE_REMOVE)
486   - remove_name_hash(ce_array[i]);
  486 + remove_name_hash(istate, ce_array[i]);
487 487 else
488 488 ce_array[j++] = ce_array[i];
489 489 }
@@ -1515,8 +1515,7 @@ int discard_index(struct index_state *istate)
1515 1515 istate->cache_changed = 0;
1516 1516 istate->timestamp.sec = 0;
1517 1517 istate->timestamp.nsec = 0;
1518   - istate->name_hash_initialized = 0;
1519   - free_hash(&istate->name_hash);
  1518 + free_name_hash(istate);
1520 1519 cache_tree_free(&(istate->cache_tree));
1521 1520 istate->initialized = 0;
1522 1521
20 t/t7062-wtstatus-ignorecase.sh
... ... @@ -0,0 +1,20 @@
  1 +#!/bin/sh
  2 +
  3 +test_description='git-status with core.ignorecase=true'
  4 +
  5 +. ./test-lib.sh
  6 +
  7 +test_expect_success 'status with hash collisions' '
  8 + # note: "V/", "V/XQANY/" and "WURZAUP/" produce the same hash code
  9 + # in name-hash.c::hash_name
  10 + mkdir V &&
  11 + mkdir V/XQANY &&
  12 + mkdir WURZAUP &&
  13 + touch V/XQANY/test &&
  14 + git config core.ignorecase true &&
  15 + git add . &&
  16 + # test is successful if git status completes (no endless loop)
  17 + git status
  18 +'
  19 +
  20 +test_done

0 comments on commit c044bed

Please sign in to comment.
Something went wrong with that request. Please try again.