Skip to content

Commit

Permalink
list-objects-filter: implement filter tree:0
Browse files Browse the repository at this point in the history
Teach list-objects the "tree:0" filter which allows for filtering
out all tree and blob objects (unless other objects are explicitly
specified by the user). The purpose of this patch is to allow smaller
partial clones.

The name of this filter - tree:0 - does not explicitly specify that
it also filters out all blobs, but this should not cause much confusion
because blobs are not at all useful without the trees that refer to
them.

I also considered only:commits as a name, but this is inaccurate because
it suggests that annotated tags are omitted, but actually they are
included.

The name "tree:0" allows later filtering based on depth, i.e. "tree:1"
would filter out all but the root tree and blobs. In order to avoid
confusion between 0 and capital O, the documentation was worded in a
somewhat round-about way that also hints at this future improvement to
the feature.

Signed-off-by: Matthew DeVore <matvore@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
matvore authored and gitster committed Oct 6, 2018
1 parent cc0b05a commit bc5975d
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 0 deletions.
5 changes: 5 additions & 0 deletions Documentation/rev-list-options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,11 @@ the requested refs.
+
The form '--filter=sparse:path=<path>' similarly uses a sparse-checkout
specification contained in <path>.
+
The form '--filter=tree:<depth>' omits all blobs and trees whose depth
from the root tree is >= <depth> (minimum depth if an object is located
at multiple depths in the commits traversed). Currently, only <depth>=0
is supported, which omits all blobs and trees.

--no-filter::
Turn off any previous `--filter=` argument.
Expand Down
13 changes: 13 additions & 0 deletions list-objects-filter-options.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,19 @@ static int gently_parse_list_objects_filter(
return 0;
}

} else if (skip_prefix(arg, "tree:", &v0)) {
unsigned long depth;
if (!git_parse_ulong(v0, &depth) || depth != 0) {
if (errbuf) {
strbuf_addstr(
errbuf,
_("only 'tree:0' is supported"));
}
return 1;
}
filter_options->choice = LOFC_TREE_NONE;
return 0;

} else if (skip_prefix(arg, "sparse:oid=", &v0)) {
struct object_context oc;
struct object_id sparse_oid;
Expand Down
1 change: 1 addition & 0 deletions list-objects-filter-options.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ enum list_objects_filter_choice {
LOFC_DISABLED = 0,
LOFC_BLOB_NONE,
LOFC_BLOB_LIMIT,
LOFC_TREE_NONE,
LOFC_SPARSE_OID,
LOFC_SPARSE_PATH,
LOFC__COUNT /* must be last */
Expand Down
49 changes: 49 additions & 0 deletions list-objects-filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,54 @@ static void *filter_blobs_none__init(
return d;
}

/*
* A filter for list-objects to omit ALL trees and blobs from the traversal.
* Can OPTIONALLY collect a list of the omitted OIDs.
*/
struct filter_trees_none_data {
struct oidset *omits;
};

static enum list_objects_filter_result filter_trees_none(
enum list_objects_filter_situation filter_situation,
struct object *obj,
const char *pathname,
const char *filename,
void *filter_data_)
{
struct filter_trees_none_data *filter_data = filter_data_;

switch (filter_situation) {
default:
BUG("unknown filter_situation: %d", filter_situation);

case LOFS_BEGIN_TREE:
case LOFS_BLOB:
if (filter_data->omits)
oidset_insert(filter_data->omits, &obj->oid);
return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */

case LOFS_END_TREE:
assert(obj->type == OBJ_TREE);
return LOFR_ZERO;

}
}

static void* filter_trees_none__init(
struct oidset *omitted,
struct list_objects_filter_options *filter_options,
filter_object_fn *filter_fn,
filter_free_fn *filter_free_fn)
{
struct filter_trees_none_data *d = xcalloc(1, sizeof(*d));
d->omits = omitted;

*filter_fn = filter_trees_none;
*filter_free_fn = free;
return d;
}

/*
* A filter for list-objects to omit large blobs.
* And to OPTIONALLY collect a list of the omitted OIDs.
Expand Down Expand Up @@ -371,6 +419,7 @@ static filter_init_fn s_filters[] = {
NULL,
filter_blobs_none__init,
filter_blobs_limit__init,
filter_trees_none__init,
filter_sparse_oid__init,
filter_sparse_path__init,
};
Expand Down
28 changes: 28 additions & 0 deletions t/t5317-pack-objects-filter-objects.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,34 @@ test_expect_success 'get an error for missing tree object' '
grep -q "bad tree object" bad_tree
'

test_expect_success 'setup for tests of tree:0' '
mkdir r1/subtree &&
echo "This is a file in a subtree" >r1/subtree/file &&
git -C r1 add subtree/file &&
git -C r1 commit -m subtree
'

test_expect_success 'verify tree:0 packfile has no blobs or trees' '
git -C r1 pack-objects --rev --stdout --filter=tree:0 >commitsonly.pack <<-EOF &&
HEAD
EOF
git -C r1 index-pack ../commitsonly.pack &&
git -C r1 verify-pack -v ../commitsonly.pack >objs &&
! grep -E "tree|blob" objs
'

test_expect_success 'grab tree directly when using tree:0' '
# We should get the tree specified directly but not its blobs or subtrees.
git -C r1 pack-objects --rev --stdout --filter=tree:0 >commitsonly.pack <<-EOF &&
HEAD:
EOF
git -C r1 index-pack ../commitsonly.pack &&
git -C r1 verify-pack -v ../commitsonly.pack >objs &&
awk "/tree|blob/{print \$1}" objs >trees_and_blobs &&
git -C r1 rev-parse HEAD: >expected &&
test_cmp expected trees_and_blobs
'

# Test blob:limit=<n>[kmg] filter.
# We boundary test around the size parameter. The filter is strictly less than
# the value, so size 500 and 1000 should have the same results, but 1001 should
Expand Down
42 changes: 42 additions & 0 deletions t/t5616-partial-clone.sh
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,48 @@ test_expect_success 'partial clone with transfer.fsckobjects=1 uses index-pack -
grep "git index-pack.*--fsck-objects" trace
'

test_expect_success 'use fsck before and after manually fetching a missing subtree' '
# push new commit so server has a subtree
mkdir src/dir &&
echo "in dir" >src/dir/file.txt &&
git -C src add dir/file.txt &&
git -C src commit -m "file in dir" &&
git -C src push -u srv master &&
SUBTREE=$(git -C src rev-parse HEAD:dir) &&
rm -rf dst &&
git clone --no-checkout --filter=tree:0 "file://$(pwd)/srv.bare" dst &&
git -C dst fsck &&
# Make sure we only have commits, and all trees and blobs are missing.
git -C dst rev-list --missing=allow-any --objects master \
>fetched_objects &&
awk -f print_1.awk fetched_objects |
xargs -n1 git -C dst cat-file -t >fetched_types &&
sort -u fetched_types >unique_types.observed &&
echo commit >unique_types.expected &&
test_cmp unique_types.expected unique_types.observed &&
# Auto-fetch a tree with cat-file.
git -C dst cat-file -p $SUBTREE >tree_contents &&
grep file.txt tree_contents &&
# fsck still works after an auto-fetch of a tree.
git -C dst fsck &&
# Auto-fetch all remaining trees and blobs with --missing=error
git -C dst rev-list --missing=error --objects master >fetched_objects &&
test_line_count = 70 fetched_objects &&
awk -f print_1.awk fetched_objects |
xargs -n1 git -C dst cat-file -t >fetched_types &&
sort -u fetched_types >unique_types.observed &&
printf "blob\ncommit\ntree\n" >unique_types.expected &&
test_cmp unique_types.expected unique_types.observed
'

test_expect_success 'partial clone fetches blobs pointed to by refs even if normally filtered out' '
rm -rf src dst &&
git init src &&
Expand Down
15 changes: 15 additions & 0 deletions t/t6112-rev-list-filters-objects.sh
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,21 @@ test_expect_success 'rev-list W/ --missing=print and --missing=allow-any for tre
test_must_be_empty rev_list_err
'

# Test tree:0 filter.

test_expect_success 'verify tree:0 includes trees in "filtered" output' '
git -C r3 rev-list --quiet --objects --filter-print-omitted \
--filter=tree:0 HEAD >revs &&
awk -f print_1.awk revs |
sed s/~// |
xargs -n1 git -C r3 cat-file -t >unsorted_filtered_types &&
sort -u unsorted_filtered_types >filtered_types &&
printf "blob\ntree\n" >expected &&
test_cmp expected filtered_types
'

# Delete some loose objects and use rev-list, but WITHOUT any filtering.
# This models previously omitted objects that we did not receive.

Expand Down

0 comments on commit bc5975d

Please sign in to comment.