Skip to content

Commit

Permalink
list-objects-filter: implement composite filters
Browse files Browse the repository at this point in the history
Allow combining filters such that only objects accepted by all filters
are shown. The motivation for this is to allow getting directory
listings without also fetching blobs. This can be done by combining
blob:none with tree:<depth>. There are massive repositories that have
larger-than-expected trees - even if you include only a single commit.

The current usage requires passing the filter to rev-list in the
following form:

	--filter=<FILTER1> --filter=<FILTER2> ...

Such usage is currently an error, so giving it a meaning is backwards-
compatible.

The URL-encoding scheme is being introduced before the repeated flag
logic, and the user-facing documentation for URL-encoding is being
withheld until the repeated flag feature is implemented. The
URL-encoding is in general not meant to be used directly by the user,
and it is better to describe the URL-encoding feature in terms of the
repeated flag.

Helped-by: Emily Shaffer <emilyshaffer@google.com>
Helped-by: Jeff Hostetler <git@jeffhostetler.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Matthew DeVore <matvore@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
matvore authored and gitster committed Jun 18, 2019
1 parent e1ac4a4 commit 1e43301
Show file tree
Hide file tree
Showing 6 changed files with 441 additions and 6 deletions.
106 changes: 104 additions & 2 deletions list-objects-filter-options.c
Expand Up @@ -6,6 +6,12 @@
#include "list-objects.h"
#include "list-objects-filter.h"
#include "list-objects-filter-options.h"
#include "url.h"

static int parse_combine_filter(
struct list_objects_filter_options *filter_options,
const char *arg,
struct strbuf *errbuf);

/*
* Parse value of the argument to the "filter" keyword.
Expand Down Expand Up @@ -35,8 +41,6 @@ static int gently_parse_list_objects_filter(
return 1;
}

filter_options->filter_spec = strdup(arg);

if (!strcmp(arg, "blob:none")) {
filter_options->choice = LOFC_BLOB_NONE;
return 0;
Expand Down Expand Up @@ -77,6 +81,10 @@ static int gently_parse_list_objects_filter(
_("sparse:path filters support has been dropped"));
}
return 1;

} else if (skip_prefix(arg, "combine:", &v0)) {
return parse_combine_filter(filter_options, v0, errbuf);

}
/*
* Please update _git_fetch() in git-completion.bash when you
Expand All @@ -89,10 +97,95 @@ static int gently_parse_list_objects_filter(
return 1;
}

static const char *RESERVED_NON_WS = "~`!@#$^&*()[]{}\\;'\",<>?";

static int has_reserved_character(
struct strbuf *sub_spec, struct strbuf *errbuf)
{
const char *c = sub_spec->buf;
while (*c) {
if (*c <= ' ' || strchr(RESERVED_NON_WS, *c)) {
strbuf_addf(errbuf,
"must escape char in sub-filter-spec: '%c'",
*c);
return 1;
}
c++;
}

return 0;
}

static int parse_combine_subfilter(
struct list_objects_filter_options *filter_options,
struct strbuf *subspec,
struct strbuf *errbuf)
{
size_t new_index = filter_options->sub_nr++;
char *decoded;
int result;

ALLOC_GROW(filter_options->sub, filter_options->sub_nr,
filter_options->sub_alloc);
memset(&filter_options->sub[new_index], 0,
sizeof(*filter_options->sub));

decoded = url_percent_decode(subspec->buf);

result = has_reserved_character(subspec, errbuf) ||
gently_parse_list_objects_filter(
&filter_options->sub[new_index], decoded, errbuf);

free(decoded);
return result;
}

static int parse_combine_filter(
struct list_objects_filter_options *filter_options,
const char *arg,
struct strbuf *errbuf)
{
struct strbuf **subspecs = strbuf_split_str(arg, '+', 0);
size_t sub;
int result = 0;

if (!subspecs[0]) {
strbuf_addf(errbuf,
_("expected something after combine:"));
result = 1;
goto cleanup;
}

for (sub = 0; subspecs[sub] && !result; sub++) {
if (subspecs[sub + 1]) {
/*
* This is not the last subspec. Remove trailing "+" so
* we can parse it.
*/
size_t last = subspecs[sub]->len - 1;
assert(subspecs[sub]->buf[last] == '+');
strbuf_remove(subspecs[sub], last, 1);
}
result = parse_combine_subfilter(
filter_options, subspecs[sub], errbuf);
}

filter_options->choice = LOFC_COMBINE;

cleanup:
strbuf_list_free(subspecs);
if (result) {
list_objects_filter_release(filter_options);
memset(filter_options, 0, sizeof(*filter_options));
}
return result;
}

int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
const char *arg)
{
struct strbuf buf = STRBUF_INIT;
filter_options->filter_spec = strdup(arg);
if (gently_parse_list_objects_filter(filter_options, arg, &buf))
die("%s", buf.buf);
return 0;
Expand Down Expand Up @@ -129,8 +222,15 @@ void expand_list_objects_filter_spec(
void list_objects_filter_release(
struct list_objects_filter_options *filter_options)
{
size_t sub;

if (!filter_options)
return;
free(filter_options->filter_spec);
free(filter_options->sparse_oid_value);
for (sub = 0; sub < filter_options->sub_nr; sub++)
list_objects_filter_release(&filter_options->sub[sub]);
free(filter_options->sub);
memset(filter_options, 0, sizeof(*filter_options));
}

Expand Down Expand Up @@ -174,6 +274,8 @@ void partial_clone_get_default_filter_spec(
*/
if (!core_partial_clone_filter_default)
return;

filter_options->filter_spec = strdup(core_partial_clone_filter_default);
gently_parse_list_objects_filter(filter_options,
core_partial_clone_filter_default,
&errbuf);
Expand Down
17 changes: 14 additions & 3 deletions list-objects-filter-options.h
Expand Up @@ -13,6 +13,7 @@ enum list_objects_filter_choice {
LOFC_BLOB_LIMIT,
LOFC_TREE_DEPTH,
LOFC_SPARSE_OID,
LOFC_COMBINE,
LOFC__COUNT /* must be last */
};

Expand All @@ -38,13 +39,23 @@ struct list_objects_filter_options {
unsigned int no_filter : 1;

/*
* Parsed values (fields) from within the filter-spec. These are
* choice-specific; not all values will be defined for any given
* choice.
* BEGIN choice-specific parsed values from within the filter-spec. Only
* some values will be defined for any given choice.
*/

struct object_id *sparse_oid_value;
unsigned long blob_limit_value;
unsigned long tree_exclude_depth;

/* LOFC_COMBINE values */

/* This array contains all the subfilters which this filter combines. */
size_t sub_nr, sub_alloc;
struct list_objects_filter_options *sub;

/*
* END choice-specific parsed values.
*/
};

/* Normalized command line arguments */
Expand Down
159 changes: 159 additions & 0 deletions list-objects-filter.c
Expand Up @@ -26,6 +26,14 @@
*/
#define FILTER_SHOWN_BUT_REVISIT (1<<21)

struct subfilter {
struct filter *filter;
struct oidset seen;
struct oidset omits;
struct object_id skip_tree;
unsigned is_skipping_tree : 1;
};

struct filter {
enum list_objects_filter_result (*filter_object_fn)(
struct repository *r,
Expand All @@ -36,6 +44,13 @@ struct filter {
struct oidset *omits,
void *filter_data);

/*
* Optional. If this function is supplied and the filter needs to
* collect omits, then this function is called once before free_fn is
* called.
*/
void (*finalize_omits_fn)(struct oidset *omits, void *filter_data);

void (*free_fn)(void *filter_data);

void *filter_data;
Expand Down Expand Up @@ -471,6 +486,147 @@ static void filter_sparse_oid__init(
filter->free_fn = filter_sparse_free;
}

/* A filter which only shows objects shown by all sub-filters. */
struct combine_filter_data {
struct subfilter *sub;
size_t nr;
};

static int should_delegate(enum list_objects_filter_situation filter_situation,
struct object *obj,
struct subfilter *sub)
{
if (!sub->is_skipping_tree)
return 1;
if (filter_situation == LOFS_END_TREE &&
oideq(&obj->oid, &sub->skip_tree)) {
sub->is_skipping_tree = 0;
return 1;
}
return 0;
}

static enum list_objects_filter_result process_subfilter(
struct repository *r,
enum list_objects_filter_situation filter_situation,
struct object *obj,
const char *pathname,
const char *filename,
struct subfilter *sub)
{
enum list_objects_filter_result result;

/*
* Check should_delegate before oidset_contains so that
* is_skipping_tree gets unset even when the object is marked as seen.
* As of this writing, no filter uses LOFR_MARK_SEEN on trees that also
* uses LOFR_SKIP_TREE, so the ordering is only theoretically
* important. Be cautious if you change the order of the below checks
* and more filters have been added!
*/
if (!should_delegate(filter_situation, obj, sub))
return LOFR_ZERO;
if (oidset_contains(&sub->seen, &obj->oid))
return LOFR_ZERO;

result = list_objects_filter__filter_object(
r, filter_situation, obj, pathname, filename, sub->filter);

if (result & LOFR_MARK_SEEN)
oidset_insert(&sub->seen, &obj->oid);

if (result & LOFR_SKIP_TREE) {
sub->is_skipping_tree = 1;
sub->skip_tree = obj->oid;
}

return result;
}

static enum list_objects_filter_result filter_combine(
struct repository *r,
enum list_objects_filter_situation filter_situation,
struct object *obj,
const char *pathname,
const char *filename,
struct oidset *omits,
void *filter_data)
{
struct combine_filter_data *d = filter_data;
enum list_objects_filter_result combined_result =
LOFR_DO_SHOW | LOFR_MARK_SEEN | LOFR_SKIP_TREE;
size_t sub;

for (sub = 0; sub < d->nr; sub++) {
enum list_objects_filter_result sub_result = process_subfilter(
r, filter_situation, obj, pathname, filename,
&d->sub[sub]);
if (!(sub_result & LOFR_DO_SHOW))
combined_result &= ~LOFR_DO_SHOW;
if (!(sub_result & LOFR_MARK_SEEN))
combined_result &= ~LOFR_MARK_SEEN;
if (!d->sub[sub].is_skipping_tree)
combined_result &= ~LOFR_SKIP_TREE;
}

return combined_result;
}

static void filter_combine__free(void *filter_data)
{
struct combine_filter_data *d = filter_data;
size_t sub;
for (sub = 0; sub < d->nr; sub++) {
list_objects_filter__free(d->sub[sub].filter);
oidset_clear(&d->sub[sub].seen);
if (d->sub[sub].omits.set.size)
BUG("expected oidset to be cleared already");
}
free(d->sub);
}

static void add_all(struct oidset *dest, struct oidset *src) {
struct oidset_iter iter;
struct object_id *src_oid;

oidset_iter_init(src, &iter);
while ((src_oid = oidset_iter_next(&iter)) != NULL)
oidset_insert(dest, src_oid);
}

static void filter_combine__finalize_omits(
struct oidset *omits,
void *filter_data)
{
struct combine_filter_data *d = filter_data;
size_t sub;

for (sub = 0; sub < d->nr; sub++) {
add_all(omits, &d->sub[sub].omits);
oidset_clear(&d->sub[sub].omits);
}
}

static void filter_combine__init(
struct list_objects_filter_options *filter_options,
struct filter* filter)
{
struct combine_filter_data *d = xcalloc(1, sizeof(*d));
size_t sub;

d->nr = filter_options->sub_nr;
d->sub = xcalloc(d->nr, sizeof(*d->sub));
for (sub = 0; sub < d->nr; sub++)
d->sub[sub].filter = list_objects_filter__init(
filter->omits ? &d->sub[sub].omits : NULL,
&filter_options->sub[sub]);

filter->filter_data = d;
filter->filter_object_fn = filter_combine;
filter->free_fn = filter_combine__free;
filter->finalize_omits_fn = filter_combine__finalize_omits;
}

typedef void (*filter_init_fn)(
struct list_objects_filter_options *filter_options,
struct filter *filter);
Expand All @@ -484,6 +640,7 @@ static filter_init_fn s_filters[] = {
filter_blobs_limit__init,
filter_trees_depth__init,
filter_sparse_oid__init,
filter_combine__init,
};

struct filter *list_objects_filter__init(
Expand Down Expand Up @@ -535,6 +692,8 @@ void list_objects_filter__free(struct filter *filter)
{
if (!filter)
return;
if (filter->finalize_omits_fn && filter->omits)
filter->finalize_omits_fn(filter->omits, filter->filter_data);
filter->free_fn(filter->filter_data);
free(filter);
}

0 comments on commit 1e43301

Please sign in to comment.