New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deep ignores #501

Merged
merged 13 commits into from Oct 9, 2014
Copy path View file
@@ -5,6 +5,8 @@
#include <string.h>
#include <sys/stat.h>

#include <assert.h>

#include "ignore.h"
#include "log.h"
#include "options.h"
@@ -37,13 +39,30 @@ const char *ignore_pattern_files[] = {
NULL
};

ignores *init_ignore(ignores *parent) {
ignores *init_ignore(ignores *parent, const char *dirname, const size_t dirname_len) {
ignores *ig = ag_malloc(sizeof(ignores));
ig->names = NULL;
ig->names_len = 0;
ig->slash_names = NULL;
ig->slash_names_len = 0;
ig->regexes = NULL;
ig->regexes_len = 0;
ig->slash_regexes = NULL;
ig->slash_regexes_len = 0;
ig->parent = parent;
ig->dirname = dirname;
ig->dirname_len = dirname_len;
if (parent && parent->abs_path_len > 0) {
ag_asprintf(&(ig->abs_path), "%s/%s", parent->abs_path, dirname);
ig->abs_path_len = parent->abs_path_len + 1 + dirname_len;
} else if (dirname_len == 1 && dirname[0] == '.') {
ig->abs_path = ag_malloc(sizeof(char));
ig->abs_path[0] = '\0';
ig->abs_path_len = 0;
} else {
ag_asprintf(&(ig->abs_path), "%s", dirname);
ig->abs_path_len = dirname_len;
}
return ig;
}

@@ -63,6 +82,9 @@ void cleanup_ignore(ignores *ig) {
}
free(ig->names);
}
if (ig->abs_path) {
free(ig->abs_path);
}
free(ig);
}
}
@@ -88,31 +110,45 @@ void add_ignore_pattern(ignores *ig, const char *pattern) {
return;
}

/* TODO: de-dupe these patterns */
char ***patterns_p;
size_t *patterns_len;
if (is_fnmatch(pattern)) {
ig->regexes_len++;
ig->regexes = ag_realloc(ig->regexes, ig->regexes_len * sizeof(char *));
/* Prepend '/' if the pattern contains '/' but doesn't start with '/' */
if ((pattern[0] != '/') && (strchr(pattern, '/') != NULL)) {
ag_asprintf(&(ig->regexes[ig->regexes_len - 1]), "/%s", pattern);
log_debug("added regex ignore pattern /%s", pattern);
if (pattern[0] == '/') {
patterns_p = &(ig->slash_regexes);
patterns_len = &(ig->slash_regexes_len);
pattern++;
pattern_len--;
} else {
ig->regexes[ig->regexes_len - 1] = ag_strndup(pattern, pattern_len);
log_debug("added regex ignore pattern %s", pattern);
patterns_p = &(ig->regexes);
patterns_len = &(ig->regexes_len);
}
} else {
/* a balanced binary tree is best for performance, but I'm lazy */
ig->names_len++;
ig->names = ag_realloc(ig->names, ig->names_len * sizeof(char *));
for (i = ig->names_len - 1; i > 0; i--) {
if (strcmp(pattern, ig->names[i - 1]) > 0) {
break;
}
ig->names[i] = ig->names[i - 1];
if (pattern[0] == '/') {
patterns_p = &(ig->slash_names);
patterns_len = &(ig->slash_names_len);
pattern++;
pattern_len--;
} else {
patterns_p = &(ig->names);
patterns_len = &(ig->names_len);
}
}

++*patterns_len;

char **patterns;

/* a balanced binary tree is best for performance, but I'm lazy */
*patterns_p = patterns = ag_realloc(*patterns_p, (*patterns_len) * sizeof(char *));
/* TODO: de-dupe these patterns */
for (i = *patterns_len - 1; i > 0; i--) {
if (strcmp(pattern, patterns[i - 1]) > 0) {
break;
}
ig->names[i] = ag_strndup(pattern, pattern_len);
log_debug("added literal ignore pattern %s", pattern);
patterns[i] = patterns[i - 1];
}
patterns[i] = ag_strndup(pattern, pattern_len);
log_debug("added ignore pattern %s", pattern);
}

/* For loading git/hg ignore patterns */
@@ -220,7 +256,9 @@ static int ackmate_dir_match(const char *dir_name) {
return pcre_exec(opts.ackmate_dir_filter, NULL, dir_name, strlen(dir_name), 0, 0, NULL, 0);
}

static int filename_ignore_search(const ignores *ig, const char *filename) {
static int path_ignore_search(const ignores *ig, const char *path, const char *filename) {
char *temp;

size_t i;
int match_pos;

@@ -234,31 +272,58 @@ static int filename_ignore_search(const ignores *ig, const char *filename) {
return 1;
}

ag_asprintf(&temp, "%s/%s", path[0] == '.' ? path + 1 : path, filename);
log_debug("temp: %s abs path: %s", temp, ig->abs_path);

if (strncmp(temp, ig->abs_path, ig->abs_path_len) == 0) {
char *slash_filename = temp + ig->abs_path_len + 1;
match_pos = binary_search(slash_filename, ig->names, 0, ig->names_len);
if (match_pos >= 0) {
log_debug("file %s ignored because name matches static pattern %s", temp, ig->names[match_pos]);
free(temp);
return 1;
}

match_pos = binary_search(slash_filename, ig->slash_names, 0, ig->slash_names_len);
if (match_pos >= 0) {
log_debug("file %s ignored because name matches slash static pattern %s", slash_filename, ig->slash_names[match_pos]);
free(temp);
return 1;
}

for (i = 0; i < ig->names_len; i++) {
char *pos = strstr(slash_filename, ig->names[i]);
if (pos == slash_filename || (pos && *(pos - 1) == '/')) {
pos += strlen(ig->names[i]);
if (*pos == '\0' || *pos == '/') {
log_debug("file %s ignored because path somewhere matches name %s", slash_filename, ig->names[i]);
free(temp);
return 1;
}
}
log_debug("pattern %s doesn't match name %s", slash_filename, ig->names[i]);
}

for (i = 0; i < ig->slash_regexes_len; i++) {
if (fnmatch(ig->slash_regexes[i], slash_filename, fnmatch_flags) == 0) {
log_debug("file %s ignored because name matches slash regex pattern %s", slash_filename, ig->slash_regexes[i]);
free(temp);
return 1;
}
log_debug("pattern %s doesn't match slash file %s", ig->slash_regexes[i], slash_filename);
}
}

for (i = 0; i < ig->regexes_len; i++) {
if (fnmatch(ig->regexes[i], filename, fnmatch_flags) == 0) {
log_debug("file %s ignored because name matches regex pattern %s", filename, ig->regexes[i]);
free(temp);
return 1;
}
log_debug("pattern %s doesn't match file %s", ig->regexes[i], filename);
}

log_debug("file %s not ignored", filename);
return 0;
}

static int path_ignore_search(const ignores *ig, const char *path, const char *filename) {
char *temp;

if (filename_ignore_search(ig, filename)) {
return 1;
}

ag_asprintf(&temp, "%s/%s", path[0] == '.' ? path + 1 : path, filename);

if (filename_ignore_search(ig, temp)) {
free(temp);
return 1;
}

int rv = ackmate_dir_match(temp);
free(temp);
Copy path View file
@@ -11,8 +11,19 @@
struct ignores {
char **names; /* Non-regex ignore lines. Sorted so we can binary search them. */
size_t names_len;
char **slash_names; /* Same but starts with a slash */
size_t slash_names_len;

char **regexes; /* For patterns that need fnmatch */
size_t regexes_len;
char **slash_regexes;
size_t slash_regexes_len;

const char *dirname;
size_t dirname_len;
char *abs_path;
size_t abs_path_len;

struct ignores *parent;
};
typedef struct ignores ignores;
@@ -22,7 +33,7 @@ ignores *root_ignores;
extern const char *evil_hardcoded_ignore_files[];
extern const char *ignore_pattern_files[];

ignores *init_ignore(ignores *parent);
ignores *init_ignore(ignores *parent, const char *dirname, const size_t dirname_len);
void cleanup_ignore(ignores *ig);

void add_ignore_pattern(ignores *ig, const char *pattern);
Copy path View file
@@ -74,7 +74,7 @@ lang_spec_t langs[] = {
};

unsigned int get_lang_count() {
return sizeof(langs) / sizeof(lang_spec_t);
return sizeof(langs) / sizeof(lang_spec_t);
}

char *make_lang_regex(const char **extensions) {
Copy path View file
@@ -35,7 +35,7 @@ int main(int argc, char **argv) {
work_queue = NULL;
work_queue_tail = NULL;
memset(&stats, 0, sizeof(stats));
root_ignores = init_ignore(NULL);
root_ignores = init_ignore(NULL, "", 0);
out_fd = stdout;
#ifdef USE_PCRE_JIT
int has_jit = 0;
@@ -131,7 +131,9 @@ int main(int argc, char **argv) {
for (i = 0; paths[i] != NULL; i++) {
log_debug("searching path %s for %s", paths[i], opts.query);
symhash = NULL;
search_dir(root_ignores, base_paths[i], paths[i], 0);
ignores *ig = init_ignore(root_ignores, paths[i], strlen(paths[i]));
search_dir(ig, base_paths[i], paths[i], 0);
cleanup_ignore(ig);
}
pthread_mutex_lock(&work_queue_mtx);
done_adding_files = TRUE;
Copy path View file
@@ -18,7 +18,7 @@ enum case_behavior {
};

enum path_print_behavior {
PATH_PRINT_DEFAULT, /* PRINT_TOP if > 1 file being searched, else PRINT_NOTHING */
PATH_PRINT_DEFAULT, /* PRINT_TOP if > 1 file being searched, else PRINT_NOTHING */
PATH_PRINT_DEFAULT_EACH_LINE, /* PRINT_EACH_LINE if > 1 file being searched, else PRINT_NOTHING */
PATH_PRINT_TOP,
PATH_PRINT_EACH_LINE,
@@ -68,7 +68,7 @@ typedef struct {
int search_stream; /* true if tail -F blah | ag */
int stats;
size_t stream_line_num; /* This should totally not be in here */
int match_found; /* This should totally not be in here */
int match_found; /* This should totally not be in here */
ino_t stdout_inode;
char *query;
int query_len;
Copy path View file
@@ -75,7 +75,6 @@ void search_buf(const char *buf, const size_t buf_len,
if ((size_t)matches_len + matches_spare >= matches_size) {
/* TODO: benchmark initial size of matches. 100 may be too small/big */
matches_size = matches ? matches_size * 2 : 100;
log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size);
matches = ag_realloc(matches, matches_size * sizeof(match_t));
}

@@ -101,7 +100,6 @@ void search_buf(const char *buf, const size_t buf_len,
/* TODO: copy-pasted from above. FIXME */
if ((size_t)matches_len + matches_spare >= matches_size) {
matches_size = matches ? matches_size * 2 : 100;
log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size);
matches = ag_realloc(matches, matches_size * sizeof(match_t));
}

@@ -482,7 +480,12 @@ void search_dir(ignores *ig, const char *base_path, const char *path, const int
} else if (opts.recurse_dirs) {
if (depth < opts.max_search_depth) {
log_debug("Searching dir %s", dir_full_path);
ignores *child_ig = init_ignore(ig);
ignores *child_ig;
// #if defined(__MINGW32__) || defined(__CYGWIN__)
child_ig = init_ignore(ig, dir->d_name, strlen(dir->d_name));
// #else
// child_ig = init_ignore(ig, dir->d_name, dir->d_namlen);
// #endif
search_dir(child_ig, base_path, dir_full_path, depth + 1);
cleanup_ignore(child_ig);
} else {
@@ -0,0 +1,16 @@
Setup:

$ . $TESTDIR/setup.sh
$ mkdir -p parent/multi-part
$ echo 'match1' > parent/multi-part/file1.txt
$ echo 'parent/multi-*' > .gitignore

# Ignore directory specified by glob:

# $ ag match .
# [1]

# Ignore directory specified by glob with absolute search path (#448):

# $ ag match $(pwd)
# [1]
@@ -0,0 +1,22 @@
Setup:

$ . $TESTDIR/setup.sh
$ mkdir -p subdir
$ echo 'match1' > subdir/file1.txt
$ echo 'file1.txt' > .gitignore

Ignore directory specified by name:

$ ag match
[1]

# Ignore directory specified by name in parent directory when using path (#144):

# $ ag match subdir
# [1]

# Ignore directory specified by name in parent directory when using current directory (#144):

# $ cd subdir
# $ ag match
# [1]
@@ -0,0 +1,17 @@
Setup:

$ . $TESTDIR/setup.sh
$ mkdir subdir
$ echo 'first' > file1.txt
$ echo 'second' > subdir/file2.txt
$ echo '*.txt' > .gitignore

Ignore file based on extension match:

$ ag first
[1]

Ignore file in subdirectory based on extension match (#442):

$ ag second
[1]
Copy path View file
@@ -4,7 +4,9 @@ Setup:
$ mkdir -p ./a/b/c
$ echo 'whatever1' > ./a/b/c/blah.yml
$ echo 'whatever2' > ./a/b/foo.yml
$ echo 'a/b/*.yml' > ./.gitignore
$ echo 'a/b/foo.yml' > ./.gitignore
# TODO: have this work instead of the above
# $ echo 'a/b/*.yml' > ./.gitignore

Ignore foo.yml but not blah.yml:

ProTip! Use n and p to navigate between commits in a pull request.