Skip to content

Commit

Permalink
[RFC] diff: introduce --scope option
Browse files Browse the repository at this point in the history
Many of git commands, such as "git grep", "git diff", they
will search the "full-tree" scope of the entire git repository,
which is reasonable under normal circumstances, but if the user
uses sparse checkout in a git monorepo, it's very possible that
he just wants to use files within the sparse specification,
perhaps because:

* He wants to be able to focus on his subprojects, the output
of other subprojects will only interfere with him.

* He's using partial cloning at the same time, and he doesn't
want to be able to execute the above git commands download a
large number of blobs which out of sparse specification, which
is a waste of time and may cause the size of the git repository
to gradually expand.

So we need a way to restrict git commands to the sparse
specification. Implementing "diff --scope" is the first step
in this plan. We are looking for a suitable option to choose:
restrict the path scope of diff to the sparse specification
or keep the full tree scope (default action now). "--scope=sparse",
"--scope=all" are the parameters corresponding to these two
cases.

It is worth noting that "--scope" option only works on diff
commands specify "--cached" or "REVISION", because normal
"git diff" has retrict the scope of diff files to the sparse
specificaiton by default, while "git diff --cached" or
"git diff REVSION" will compare to the commit history, and
"--scope" options can works here to restrict or not.

Add "--scope" option to git "diff-index" and "git diff-tree"
too, because they also meet the above: specify "--cached",
or "REVISION". Meanwhile, "git diff-no-index", "git diff-files"
don't have this option.

Signed-off-by: ZheNing Hu <adlternative@gmail.com>
  • Loading branch information
adlternative committed Nov 28, 2022
1 parent 815c1e8 commit 115d128
Show file tree
Hide file tree
Showing 13 changed files with 488 additions and 6 deletions.
33 changes: 33 additions & 0 deletions Documentation/diff-options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,39 @@ For instance, if you configured the `diff.algorithm` variable to a
non-default value and want to use the default one, then you
have to use `--diff-algorithm=default` option.

ifdef::git-diff[]
ifdef::git-diff-index[]
ifdef::git-diff-tree[]

--scope=[sparse|all]::
Restrict or not restrict diff path scope in sparse specification.
The variants are as follows:

+
--
`sparse`;;
When using diff to compare commit history, restrict the
scope of file path comparisons to the sparse specification.
See sparse specification in link:technical/sparse-checkout.html
[the sparse-checkout design document] for more information.
`all`;;
When using diff to compare commit history, the file comparison
scope is full-tree. This is consistent with the current default
behavior.
--
+

Note that `--scope` option only take effect if diff command specify
`--cached` or `REVISION`.

The behavior of this `--scope` option is experimental and may change
in the future. See link:technical/sparse-checkout.html [the sparse-checkout
design document] for more information.

endif::git-diff-tree[]
endif::git-diff-index[]
endif::git-diff[]

--stat[=<width>[,<name-width>[,<count>]]]::
Generate a diffstat. By default, as much space as necessary
will be used for the filename part, and the rest for the graph
Expand Down
24 changes: 21 additions & 3 deletions builtin/diff-index.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ int cmd_diff_index(int argc, const char **argv, const char *prefix)
int i;
int result;

struct option sparse_scope_options[] = {
OPT_SPARSE_SCOPE(&rev.diffopt.scope),
OPT_END()
};

if (argc == 2 && !strcmp(argv[1], "-h"))
usage(diff_cache_usage);

Expand All @@ -35,6 +40,13 @@ int cmd_diff_index(int argc, const char **argv, const char *prefix)
diff_merges_suppress_m_parsing();

argc = setup_revisions(argc, argv, &rev, NULL);

argc = parse_options(argc, argv, prefix, sparse_scope_options, NULL,
PARSE_OPT_KEEP_DASHDASH |
PARSE_OPT_KEEP_UNKNOWN_OPT |
PARSE_OPT_KEEP_ARGV0 |
PARSE_OPT_NO_INTERNAL_HELP);

for (i = 1; i < argc; i++) {
const char *arg = argv[i];

Expand Down Expand Up @@ -65,9 +77,15 @@ int cmd_diff_index(int argc, const char **argv, const char *prefix)
perror("repo_read_index_preload");
return -1;
}
} else if (repo_read_index(the_repository) < 0) {
perror("repo_read_index");
return -1;
} else {
if (repo_read_index(the_repository) < 0) {
perror("read_cache");
return -1;
}
if (rev.diffopt.scope == SPARSE_SCOPE_SPARSE &&
strcmp(rev.pending.objects[0].name, "HEAD"))
diff_collect_changes_index(&rev.diffopt.pathspec,
&rev.diffopt.change_index_files);
}
result = run_diff_index(&rev, option);
result = diff_result_code(&rev.diffopt, result);
Expand Down
11 changes: 11 additions & 0 deletions builtin/diff-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@ int cmd_diff_tree(int argc, const char **argv, const char *prefix)
int read_stdin = 0;
int merge_base = 0;

struct option sparse_scope_options[] = {
OPT_SPARSE_SCOPE(&opt->diffopt.scope),
OPT_END()
};

if (argc == 2 && !strcmp(argv[1], "-h"))
usage(diff_tree_usage);

Expand All @@ -131,6 +136,12 @@ int cmd_diff_tree(int argc, const char **argv, const char *prefix)
prefix = precompose_argv_prefix(argc, argv, prefix);
argc = setup_revisions(argc, argv, opt, &s_r_opt);

argc = parse_options(argc, argv, prefix, sparse_scope_options, NULL,
PARSE_OPT_KEEP_DASHDASH |
PARSE_OPT_KEEP_UNKNOWN_OPT |
PARSE_OPT_KEEP_ARGV0 |
PARSE_OPT_NO_INTERNAL_HELP);

memset(&w, 0, sizeof(w));
userformat_find_requirements(NULL, &w);

Expand Down
23 changes: 20 additions & 3 deletions builtin/diff.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,15 @@ static int builtin_diff_index(struct rev_info *revs,
perror("repo_read_index_preload");
return -1;
}
} else if (repo_read_index(the_repository) < 0) {
perror("repo_read_cache");
return -1;
} else {
if (repo_read_index(the_repository) < 0) {
perror("read_cache");
return -1;
}
if (revs->diffopt.scope == SPARSE_SCOPE_SPARSE &&
strcmp(revs->pending.objects[0].name, "HEAD"))
diff_collect_changes_index(&revs->diffopt.pathspec,
&revs->diffopt.change_index_files);
}
return run_diff_index(revs, option);
}
Expand Down Expand Up @@ -403,6 +409,11 @@ int cmd_diff(int argc, const char **argv, const char *prefix)
int result = 0;
struct symdiff sdiff;

struct option sparse_scope_options[] = {
OPT_SPARSE_SCOPE(&rev.diffopt.scope),
OPT_END()
};

/*
* We could get N tree-ish in the rev.pending_objects list.
* Also there could be M blobs there, and P pathspecs. --cached may
Expand Down Expand Up @@ -507,6 +518,12 @@ int cmd_diff(int argc, const char **argv, const char *prefix)
diff_setup_done(&rev.diffopt);
}

argc = parse_options(argc, argv, prefix, sparse_scope_options, NULL,
PARSE_OPT_KEEP_DASHDASH |
PARSE_OPT_KEEP_UNKNOWN_OPT |
PARSE_OPT_KEEP_ARGV0 |
PARSE_OPT_NO_INTERNAL_HELP);

rev.diffopt.flags.recursive = 1;
rev.diffopt.rotate_to_strict = 1;

Expand Down
5 changes: 5 additions & 0 deletions cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -1058,6 +1058,11 @@ extern int core_apply_sparse_checkout;
extern int core_sparse_checkout_cone;
extern int sparse_expect_files_outside_of_patterns;

enum sparse_scope {
SPARSE_SCOPE_ALL = 0,
SPARSE_SCOPE_SPARSE,
};

/*
* Returns the boolean value of $GIT_OPTIONAL_LOCKS (or the default value).
*/
Expand Down
43 changes: 43 additions & 0 deletions diff-lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,13 @@ static void do_oneway_diff(struct unpack_trees_options *o,

match_missing = revs->match_missing;

if (revs->diffopt.scope == SPARSE_SCOPE_SPARSE &&
((o->index_only && revs->pending.objects[0].name &&
strcmp(revs->pending.objects[0].name, "HEAD") &&
!index_file_in_sparse_specification(idx ? idx : tree, &revs->diffopt.change_index_files)) ||
(!o->index_only && !worktree_file_in_sparse_specification(idx))))
return;

if (cached && idx && ce_stage(idx)) {
struct diff_filepair *pair;
pair = diff_unmerge(&revs->diffopt, idx->name);
Expand Down Expand Up @@ -598,6 +605,42 @@ void diff_get_merge_base(const struct rev_info *revs, struct object_id *mb)
free_commit_list(merge_bases);
}

static void diff_collect_updated_cb(struct diff_queue_struct *q,
struct diff_options *options,
void *data) {
int i;
struct strset *change_index_files = (struct strset *)data;

for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];

strset_add(change_index_files, p->two->path);
if (p->status == DIFF_STATUS_RENAMED)
strset_add(change_index_files, p->one->path);
}
}

void diff_collect_changes_index(struct pathspec *pathspec, struct strset *change_index_files)
{
struct rev_info rev;
struct setup_revision_opt opt;

repo_init_revisions(the_repository, &rev, NULL);
memset(&opt, 0, sizeof(opt));
opt.def = "HEAD";
setup_revisions(0, NULL, &rev, &opt);

rev.diffopt.ita_invisible_in_index = 1;
rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK;
rev.diffopt.format_callback = diff_collect_updated_cb;
rev.diffopt.format_callback_data = change_index_files;
rev.diffopt.flags.recursive = 1;

copy_pathspec(&rev.prune_data, pathspec);
run_diff_index(&rev, 1);
release_revisions(&rev);
}

int run_diff_index(struct rev_info *revs, unsigned int option)
{
struct object_array_entry *ent;
Expand Down
2 changes: 2 additions & 0 deletions diff.c
Original file line number Diff line number Diff line change
Expand Up @@ -4663,6 +4663,7 @@ void repo_diff_setup(struct repository *r, struct diff_options *options)
options->color_moved = diff_color_moved_default;
options->color_moved_ws_handling = diff_color_moved_ws_default;

strset_init(&options->change_index_files);
prep_parse_options(options);
}

Expand Down Expand Up @@ -6514,6 +6515,7 @@ void diff_free(struct diff_options *options)
diff_free_ignore_regex(options);
clear_pathspec(&options->pathspec);
FREE_AND_NULL(options->parseopts);
strset_clear(&options->change_index_files);
}

void diff_flush(struct diff_options *options)
Expand Down
7 changes: 7 additions & 0 deletions diff.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "pathspec.h"
#include "object.h"
#include "oidset.h"
#include "strmap.h"

/**
* The diff API is for programs that compare two sets of files (e.g. two trees,
Expand Down Expand Up @@ -285,6 +286,9 @@ struct diff_options {
/* diff-filter bits */
unsigned int filter, filter_not;

/* diff sparse-checkout scope */
enum sparse_scope scope;

int use_color;

/* Number of context lines to generate in patch output. */
Expand Down Expand Up @@ -397,6 +401,7 @@ struct diff_options {
struct option *parseopts;
struct strmap *additional_path_headers;

struct strset change_index_files;
int no_free;
};

Expand Down Expand Up @@ -696,4 +701,6 @@ void print_stat_summary(FILE *fp, int files,
int insertions, int deletions);
void setup_diff_pager(struct diff_options *);

void diff_collect_changes_index(struct pathspec *pathspec, struct strset *files);

#endif /* DIFF_H */
52 changes: 52 additions & 0 deletions dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "ewah/ewok.h"
#include "fsmonitor.h"
#include "submodule-config.h"
#include "parse-options.h"

/*
* Tells read_directory_recursive how a file or directory should be treated.
Expand Down Expand Up @@ -1503,6 +1504,57 @@ int path_in_cone_mode_sparse_checkout(const char *path,
return path_in_sparse_checkout_1(path, istate, 1);
}

int path_in_sparse_patterns(const char *path, int is_dir) {
struct strbuf sb = STRBUF_INIT;

strbuf_addstr(&sb, path);
if (!sb.len)
return 0;
if (is_dir && sb.buf[sb.len - 1] != '/')
strbuf_addch(&sb, '/');
if (!path_in_sparse_checkout_1(sb.buf,
the_repository->index,
core_sparse_checkout_cone))
return 0;
strbuf_release(&sb);
return 1;
}

/* Expand sparse-checkout specification (worktree) */
int worktree_file_in_sparse_specification(const struct cache_entry *worktree_check_ce)
{
return worktree_check_ce && !ce_skip_worktree(worktree_check_ce);
}

/* Expand sparse-checkout specification (index) */
int index_file_in_sparse_specification(const struct cache_entry *ce, struct strset *change_index_files)
{
if (!ce->ce_namelen)
return 0;
if (change_index_files && strset_contains(change_index_files, ce->name))
return 1;
return path_in_sparse_patterns(ce->name, 0);
}

int opt_sparse_scope(const struct option *option,
const char *optarg, int unset)
{
enum sparse_scope *scope = option->value;

BUG_ON_OPT_NEG_NOARG(unset, optarg);

if (!core_apply_sparse_checkout)
return error(_("this git repository don't "
"use sparse-checkout, --scope option cannot be used"));
if (!strcmp(optarg, "all"))
*scope = SPARSE_SCOPE_ALL;
else if (!strcmp(optarg, "sparse"))
*scope = SPARSE_SCOPE_SPARSE;
else
return error(_("invalid --scope value: %s"), optarg);
return 0;
}

static struct path_pattern *last_matching_pattern_from_lists(
struct dir_struct *dir, struct index_state *istate,
const char *pathname, int pathlen,
Expand Down
4 changes: 4 additions & 0 deletions dir.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "cache.h"
#include "hashmap.h"
#include "strbuf.h"
#include "strmap.h"

/**
* The directory listing API is used to enumerate paths in the work tree,
Expand Down Expand Up @@ -401,6 +402,9 @@ int path_in_sparse_checkout(const char *path,
struct index_state *istate);
int path_in_cone_mode_sparse_checkout(const char *path,
struct index_state *istate);
int path_in_sparse_patterns(const char *path, int is_dir);
int index_file_in_sparse_specification(const struct cache_entry *ce, struct strset *change_index_files);
int worktree_file_in_sparse_specification(const struct cache_entry *worktree_check_ce);

struct dir_entry *dir_add_ignored(struct dir_struct *dir,
struct index_state *istate,
Expand Down
7 changes: 7 additions & 0 deletions parse-options.h
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,13 @@ int parse_opt_passthru_argv(const struct option *, const char *, int);
/* value is enum branch_track* */
int parse_opt_tracking_mode(const struct option *, const char *, int);

int opt_sparse_scope(const struct option *option,
const char *optarg, int unset);

#define OPT_SPARSE_SCOPE(var) OPT_CALLBACK_F(0, "scope", (var), N_("[sparse|all]"), \
N_("restrict path scope in sparse specification"), \
PARSE_OPT_NONEG, opt_sparse_scope)

#define OPT__VERBOSE(var, h) OPT_COUNTUP('v', "verbose", (var), (h))
#define OPT__QUIET(var, h) OPT_COUNTUP('q', "quiet", (var), (h))
#define OPT__VERBOSITY(var) \
Expand Down

0 comments on commit 115d128

Please sign in to comment.