Skip to content

Commit

Permalink
Merge branch 'sy/sparse-grep'
Browse files Browse the repository at this point in the history
"git grep" learned to expand the sparse-index more lazily and on
demand in a sparse checkout.

* sy/sparse-grep:
  builtin/grep.c: integrate with sparse index
  • Loading branch information
gitster committed Oct 10, 2022
2 parents 4b4d97c + 7cae762 commit 67bf4a8
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 3 deletions.
48 changes: 45 additions & 3 deletions builtin/grep.c
Expand Up @@ -458,6 +458,33 @@ static int grep_submodule(struct grep_opt *opt,
* subrepo's odbs to the in-memory alternates list.
*/
obj_read_lock();

/*
* NEEDSWORK: when reading a submodule, the sparsity settings in the
* superproject are incorrectly forgotten or misused. For example:
*
* 1. "command_requires_full_index"
* When this setting is turned on for `grep`, only the superproject
* knows it. All the submodules are read with their own configs
* and get prepare_repo_settings()'d. Therefore, these submodules
* "forget" the sparse-index feature switch. As a result, the index
* of these submodules are expanded unexpectedly.
*
* 2. "core_apply_sparse_checkout"
* When running `grep` in the superproject, this setting is
* populated using the superproject's configs. However, once
* initialized, this config is globally accessible and is read by
* prepare_repo_settings() for the submodules. For instance, if a
* submodule is using a sparse-checkout, however, the superproject
* is not, the result is that the config from the superproject will
* dictate the behavior for the submodule, making it "forget" its
* sparse-checkout state.
*
* 3. "core_sparse_checkout_cone"
* ditto.
*
* Note that this list is not exhaustive.
*/
repo_read_gitmodules(subrepo, 0);

/*
Expand Down Expand Up @@ -520,8 +547,6 @@ static int grep_cache(struct grep_opt *opt,
if (repo_read_index(repo) < 0)
die(_("index file corrupt"));

/* TODO: audit for interaction with sparse-index. */
ensure_full_index(repo->index);
for (nr = 0; nr < repo->index->cache_nr; nr++) {
const struct cache_entry *ce = repo->index->cache[nr];

Expand All @@ -530,8 +555,20 @@ static int grep_cache(struct grep_opt *opt,

strbuf_setlen(&name, name_base_len);
strbuf_addstr(&name, ce->name);
if (S_ISSPARSEDIR(ce->ce_mode)) {
enum object_type type;
struct tree_desc tree;
void *data;
unsigned long size;

if (S_ISREG(ce->ce_mode) &&
data = read_object_file(&ce->oid, &type, &size);
init_tree_desc(&tree, data, size);

hit |= grep_tree(opt, pathspec, &tree, &name, 0, 0);
strbuf_setlen(&name, name_base_len);
strbuf_addstr(&name, ce->name);
free(data);
} else if (S_ISREG(ce->ce_mode) &&
match_pathspec(repo->index, pathspec, name.buf, name.len, 0, NULL,
S_ISDIR(ce->ce_mode) ||
S_ISGITLINK(ce->ce_mode))) {
Expand Down Expand Up @@ -984,6 +1021,11 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
PARSE_OPT_KEEP_DASHDASH |
PARSE_OPT_STOP_AT_NON_OPTION);

if (the_repository->gitdir) {
prepare_repo_settings(the_repository);
the_repository->settings.command_requires_full_index = 0;
}

if (use_index && !startup_info->have_repository) {
int fallback = 0;
git_config_get_bool("grep.fallbacktonoindex", &fallback);
Expand Down
1 change: 1 addition & 0 deletions t/perf/p2000-sparse-operations.sh
Expand Up @@ -124,5 +124,6 @@ test_perf_on_all git read-tree -mu HEAD
test_perf_on_all git checkout-index -f --all
test_perf_on_all git update-index --add --remove $SPARSE_CONE/a
test_perf_on_all "git rm -f $SPARSE_CONE/a && git checkout HEAD -- $SPARSE_CONE/a"
test_perf_on_all git grep --cached --sparse bogus -- "f2/f1/f1/*"

test_done
72 changes: 72 additions & 0 deletions t/t1092-sparse-checkout-compatibility.sh
Expand Up @@ -162,6 +162,19 @@ init_repos () {
git -C sparse-index sparse-checkout set deep
}

init_repos_as_submodules () {
git reset --hard &&
init_repos &&
git submodule add ./full-checkout &&
git submodule add ./sparse-checkout &&
git submodule add ./sparse-index &&

git submodule status >actual &&
grep full-checkout actual &&
grep sparse-checkout actual &&
grep sparse-index actual
}

run_on_sparse () {
(
cd sparse-checkout &&
Expand Down Expand Up @@ -1981,4 +1994,63 @@ test_expect_success 'sparse index is not expanded: rm' '
ensure_not_expanded rm -r deep
'

test_expect_success 'grep with and --cached' '
init_repos &&
test_all_match git grep --cached a &&
test_all_match git grep --cached a -- "folder1/*"
'

test_expect_success 'grep is not expanded' '
init_repos &&
ensure_not_expanded grep a &&
ensure_not_expanded grep a -- deep/* &&
# All files within the folder1/* pathspec are sparse,
# so this command does not find any matches
ensure_not_expanded ! grep a -- folder1/* &&
# test out-of-cone pathspec with or without wildcard
ensure_not_expanded grep --cached a -- "folder1/a" &&
ensure_not_expanded grep --cached a -- "folder1/*" &&
# test in-cone pathspec with or without wildcard
ensure_not_expanded grep --cached a -- "deep/a" &&
ensure_not_expanded grep --cached a -- "deep/*"
'

# NEEDSWORK: when running `grep` in the superproject with --recurse-submodules,
# Git expands the index of the submodules unexpectedly. Even though `grep`
# builtin is marked as "command_requires_full_index = 0", this config is only
# useful for the superproject. Namely, the submodules have their own configs,
# which are _not_ populated by the one-time sparse-index feature switch.
test_expect_failure 'grep within submodules is not expanded' '
init_repos_as_submodules &&
# do not use ensure_not_expanded() here, becasue `grep` should be
# run in the superproject, not in "./sparse-index"
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
git grep --cached --recurse-submodules a -- "*/folder1/*" &&
test_region ! index ensure_full_index trace2.txt
'

# NEEDSWORK: this test is not actually testing the code. The design purpose
# of this test is to verify the grep result when the submodules are using a
# sparse-index. Namely, we want "folder1/" as a tree (a sparse directory); but
# because of the index expansion, we are now grepping the "folder1/a" blob.
# Because of the problem stated above 'grep within submodules is not expanded',
# we don't have the ideal test environment yet.
test_expect_success 'grep sparse directory within submodules' '
init_repos_as_submodules &&
cat >expect <<-\EOF &&
full-checkout/folder1/a:a
sparse-checkout/folder1/a:a
sparse-index/folder1/a:a
EOF
git grep --cached --recurse-submodules a -- "*/folder1/*" >actual &&
test_cmp actual expect
'

test_done

0 comments on commit 67bf4a8

Please sign in to comment.