Permalink
Browse files

Merge pull request #56 from ggreer/ignore_the_right_way

Ignore the right way
  • Loading branch information...
2 parents 1c35419 + ccc3a63 commit 6581fc1b9a0ed333813d311596930e29bc5aa316 @ggreer committed Aug 29, 2012
Showing with 162 additions and 138 deletions.
  1. +1 −1 Makefile.am
  2. +59 −79 src/ignore.c
  3. +20 −9 src/ignore.h
  4. +13 −15 src/main.c
  5. +1 −1 src/options.c
  6. +44 −0 src/scandir.c
  7. +12 −0 src/scandir.h
  8. +11 −24 src/search.c
  9. +1 −1 src/search.h
  10. +0 −8 src/util.h
View
2 Makefile.am
@@ -1,7 +1,7 @@
ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS}
bin_PROGRAMS = ag
-ag_SOURCES = src/ignore.c src/log.c src/options.c src/print.c src/search.c src/util.c src/main.c
+ag_SOURCES = src/ignore.c src/log.c src/options.c src/print.c src/scandir.c src/search.c src/util.c src/main.c
ag_LDADD = ${PCRE_LIBS}
man_MANS = doc/ag.1
View
138 src/ignore.c
@@ -25,55 +25,69 @@ const char *ignore_pattern_files[] = {
NULL
};
-/* For patterns that need fnmatch */
-char **ignore_patterns = NULL;
-int ignore_patterns_len = 0;
+const int fnmatch_flags = 0 & FNM_PATHNAME;
-/* For patterns with no regex stuff in them. Sorted for fast matching. */
-char **ignore_names = NULL;
-int ignore_names_len = 0;
+ignores *init_ignore(ignores *parent) {
+ ignores *ig = malloc(sizeof(ignores));
+ ig->names = NULL;
+ ig->names_len = 0;
+ ig->regexes = NULL;
+ ig->regexes_len = 0;
+ ig->parent = parent;
+ return ig;
+}
-const int fnmatch_flags = 0 & FNM_PATHNAME;
+void cleanup_ignore(ignores *ig) {
+ size_t i;
-void add_ignore_pattern(const char* pattern) {
+ if (ig) {
+ if (ig->regexes) {
+ for (i = 0; i < ig->regexes_len; i++) {
+ free(ig->regexes[i]);
+ }
+ free(ig->regexes);
+ }
+ if (ig->names) {
+ for (i = 0; i < ig->names_len; i++) {
+ free(ig->names[i]);
+ }
+ free(ig->names);
+ }
+ free(ig);
+ }
+}
+
+void add_ignore_pattern(ignores *ig, const char* pattern) {
int i;
+ /* Strip off the leading ./ so that matches are more likely. */
+ if (strncmp(pattern, "./", 2) == 0) {
+ pattern += 2; /* TODO: this totally breaks on systems without 1 byte chars */
+ }
+
if (is_fnmatch(pattern)) {
- ignore_patterns_len++;
- ignore_patterns = realloc(ignore_patterns, (ignore_patterns_len) * sizeof(char*));
- ignore_patterns[ignore_patterns_len - 1] = strdup(pattern);
+ ig->regexes_len++;
+ ig->regexes = realloc(ig->regexes, ig->regexes_len * sizeof(char*));
+ ig->regexes[ig->regexes_len - 1] = strdup(pattern);
log_debug("added regex ignore pattern %s", pattern);
}
else {
/* a balanced binary tree is best for performance, but I'm lazy */
- ignore_names_len++;
- ignore_names = realloc(ignore_names, ignore_names_len * sizeof(char*));
- for (i = ignore_names_len-1; i > 0; i--) {
- if (strcmp(pattern, ignore_names[i-1]) > 0) {
+ ig->names_len++;
+ ig->names = realloc(ig->names, ig->names_len * sizeof(char*));
+ for (i = ig->names_len - 1; i > 0; i--) {
+ if (strcmp(pattern, ig->names[i-1]) > 0) {
break;
}
- ignore_names[i] = ignore_names[i-1];
+ ig->names[i] = ig->names[i-1];
}
- ignore_names[i] = strdup(pattern);
+ ig->names[i] = strdup(pattern);
log_debug("added literal ignore pattern %s", pattern);
}
}
-void cleanup_ignore_patterns() {
- int i;
- for (i = 0; i<ignore_patterns_len; i++) {
- free(ignore_patterns[i]);
- }
- free(ignore_patterns);
-
- for (i = 0; i<ignore_names_len; i++) {
- free(ignore_names[i]);
- }
- free(ignore_names);
-}
-
/* For loading git/svn/hg ignore patterns */
-void load_ignore_patterns(const char *ignore_filename) {
+void load_ignore_patterns(ignores *ig, const char *ignore_filename) {
FILE *fp = NULL;
fp = fopen(ignore_filename, "r");
if (fp == NULL) {
@@ -92,15 +106,16 @@ void load_ignore_patterns(const char *ignore_filename) {
if (line[line_len-1] == '\n') {
line[line_len-1] = '\0'; /* kill the \n */
}
- add_ignore_pattern(line);
+ add_ignore_pattern(ig, line);
}
free(line);
fclose(fp);
}
-void load_svn_ignore_patterns(const char *path, const int path_len) {
+void load_svn_ignore_patterns(ignores *ig, const char *path) {
FILE *fp = NULL;
+ int path_len = strlen(path);
char *dir_prop_base = malloc(path_len + strlen(SVN_DIR_PROP_BASE) + 1);
strlcpy(dir_prop_base, path, path_len + 1);
strlcat(dir_prop_base, SVN_DIR_PROP_BASE, path_len + strlen(SVN_DIR_PROP_BASE) + 1);
@@ -157,7 +172,7 @@ void load_svn_ignore_patterns(const char *path, const int path_len) {
if (line_len > 0) {
entry_line = malloc((size_t)line_len + 1);
strlcpy(entry_line, patterns, line_len + 1);
- add_ignore_pattern(entry_line);
+ add_ignore_pattern(ig, entry_line);
free(entry_line);
entry_line = NULL;
}
@@ -186,11 +201,11 @@ int ackmate_dir_match(const char* dir_name) {
}
/* This function is REALLY HOT. It gets called for every file */
-int filename_filter(struct dirent *dir) {
+int filename_filter(const struct dirent *dir, void *baton) {
const char *filename = dir->d_name;
int match_pos;
- char *pattern = NULL;
- int i;
+ size_t i;
+ ignores *ig = (ignores*) baton;
if (!opts.follow_symlinks && dir->d_type == DT_LNK) {
log_debug("File %s ignored becaused it's a symlink", dir->d_name);
@@ -212,60 +227,25 @@ int filename_filter(struct dirent *dir) {
return 1;
}
- match_pos = binary_search(dir->d_name, ignore_names, 0, ignore_names_len);
+ match_pos = binary_search(dir->d_name, ig->names, 0, ig->names_len);
if (match_pos >= 0) {
- log_debug("file %s ignored because name matches static pattern %s", dir->d_name, ignore_names[match_pos]);
+ log_debug("file %s ignored because name matches static pattern %s", dir->d_name, ig->names[match_pos]);
return 0;
}
if (ackmate_dir_match(dir->d_name)) {
return 0;
}
- for (i = 0; i < ignore_patterns_len; i++) {
- pattern = ignore_patterns[i];
- if (fnmatch(pattern, filename, fnmatch_flags) == 0) {
- log_debug("file %s ignored because name matches regex pattern %s", dir->d_name, pattern);
+ for (i = 0; i < ig->regexes_len; i++) {
+ if (fnmatch(ig->regexes[i], filename, fnmatch_flags) == 0) {
+ log_debug("file %s ignored because name matches regex pattern %s", dir->d_name, ig->regexes[i]);
return 0;
}
}
- return 1;
-}
-
-/* Profiling shows that 70% of execution time is spent in this function.
- Most of that time is in fnmatch()
- */
-int filepath_filter(char *filepath) {
- int match_pos;
- char *pattern = NULL;
- int i;
-
- if (opts.search_all_files) {
- return 1;
- }
-
- /* Strip off the leading ./ so that matches are more likely. */
- if (strncmp(filepath, "./", 2) == 0) {
- filepath += 2; /* TODO: this totally breaks on systems without 1 byte chars */
- }
-
- match_pos = binary_search(filepath, ignore_names, 0, ignore_names_len);
- if (match_pos >= 0) {
- log_debug("file %s ignored because name matches static pattern %s", filepath, ignore_names[match_pos]);
- return 0;
- }
-
- if (ackmate_dir_match(filepath)) {
- return 0;
- }
-
- for (i = 0; i < ignore_patterns_len; i++) {
- pattern = ignore_patterns[i];
- if (fnmatch(pattern, filepath, fnmatch_flags) == 0) {
- log_debug("file %s ignored because name matches regex pattern %s", filepath, pattern);
- return 0;
- }
+ if (ig->parent != NULL) {
+ return filename_filter(dir, (void *)(ig->parent));
}
return 1;
View
29 src/ignore.h
@@ -1,26 +1,37 @@
-#include <sys/dir.h>
-#include <sys/types.h>
-
#ifndef IGNORE_H
#define IGNORE_H
+#include <sys/dir.h>
+#include <sys/types.h>
+
#define SVN_DIR_PROP_BASE "/dir-prop-base"
#define SVN_DIR ".svn"
#define SVN_PROP_IGNORE "svn:ignore"
+struct ignores {
+ char **names; /* Non-regex ignore lines. Sorted so we can binary search them. */
+ size_t names_len;
+ char **regexes; /* For patterns that need fnmatch */
+ size_t regexes_len;
+ struct ignores *parent;
+};
+typedef struct ignores ignores;
+
+ignores *root_ignores;
+
extern const char *evil_hardcoded_ignore_files[];
extern const char *ignore_pattern_files[];
-void add_ignore_pattern(const char* pattern);
+ignores *init_ignore(ignores *parent);
+void cleanup_ignore(ignores *ig);
-void cleanup_ignore_patterns();
+void add_ignore_pattern(ignores *ig, const char* pattern);
-void load_ignore_patterns(const char *ignore_filename);
-void load_svn_ignore_patterns(const char *path, const int path_len);
+void load_ignore_patterns(ignores *ig, const char *ignore_filename);
+void load_svn_ignore_patterns(ignores *ig, const char *path);
int ackmate_dir_match(const char* dir_name);
-int filename_filter(struct dirent *dir);
-int filepath_filter(char *filepath);
+int filename_filter(const struct dirent *dir, void *baton);
#endif
View
28 src/main.c
@@ -4,12 +4,12 @@
#include <string.h>
#include <sys/time.h>
+#include "config.h"
+
#include "log.h"
#include "options.h"
#include "search.h"
-#include "config.h"
-
int main(int argc, char **argv) {
set_log_level(LOG_LEVEL_WARN);
@@ -23,7 +23,16 @@ int main(int argc, char **argv) {
pcre_extra *re_extra = NULL;
double time_diff = 0.0;
- memset(&stats, 0, sizeof(stats)); /* What's the point of an init function if it's going to be this one-liner? */
+ /* What's the point of an init function if it's going to be two lines? */
+ memset(&stats, 0, sizeof(stats));
+ root_ignores = init_ignore(NULL);
+#ifdef USE_PCRE_JIT
+ int has_jit = 0;
+ pcre_config(PCRE_CONFIG_JIT, &has_jit);
+ if (has_jit) {
+ study_opts |= PCRE_STUDY_JIT_COMPILE;
+ }
+#endif
gettimeofday(&(stats.time_start), NULL);
@@ -60,15 +69,6 @@ int main(int argc, char **argv) {
log_err("pcre_compile failed at position %i. Error: %s", pcre_err_offset, pcre_err);
exit(1);
}
-
-#ifdef USE_PCRE_JIT
- int has_jit = 0;
- pcre_config(PCRE_CONFIG_JIT, &has_jit);
- if (has_jit) {
- study_opts |= PCRE_STUDY_JIT_COMPILE;
- }
-#endif
-
re_extra = pcre_study(re, study_opts, &pcre_err);
if (re_extra == NULL) {
log_debug("pcre_study returned nothing useful. Error: %s", pcre_err);
@@ -81,7 +81,7 @@ int main(int argc, char **argv) {
else {
for (i = 0; paths[i] != NULL; i++) {
log_debug("searching path %s for %s", paths[i], opts.query);
- search_dir(re, re_extra, paths[i], 0);
+ search_dir(root_ignores, re, re_extra, paths[i], 0);
free(paths[i]);
}
}
@@ -98,7 +98,5 @@ int main(int argc, char **argv) {
pcre_free(re);
pcre_free(re_extra); /* Using pcre_free_study here segfaults on some versions of PCRE */
free(paths);
- cleanup_ignore_patterns();
-
return 0;
}
View
2 src/options.c
@@ -287,7 +287,7 @@ void parse_options(int argc, char **argv, char **paths[]) {
strlcat(ignore_file_path, "/", path_len);
strlcat(ignore_file_path, ignore_pattern_files[0], path_len);
- load_ignore_patterns(ignore_file_path);
+ load_ignore_patterns(root_ignores, ignore_file_path);
free(ignore_file_path);
}
View
44 src/scandir.c
@@ -0,0 +1,44 @@
+#include <dirent.h>
+#include <stdlib.h>
+
+#include "scandir.h"
+#include "util.h"
+
+int ag_scandir(const char *dirname,
+ struct dirent ***namelist,
+ filter_fp filter,
+ void *baton
+ ) {
+ DIR *dirp;
+ struct dirent **names;
+ struct dirent *entry, *d;
+ int names_len = 32;
+ int results_len = 0;
+
+ dirp = opendir(dirname);
+ if (dirp == NULL) {
+ return -1;
+ }
+
+ /* TODO: handle allocation failures */
+ names = malloc(sizeof(struct dirent*) * names_len);
+
+ while ((entry = readdir(dirp)) != NULL) {
+ if ((*filter)(entry, baton) == FALSE) {
+ continue;
+ }
+ if (results_len >= names_len) {
+ /* TODO: handle errors here */
+ names_len = names_len * 2;
+ names = realloc(names, sizeof(struct dirent*) * names_len);
+ }
+ d = malloc(sizeof(struct dirent));
+ memcpy(d, entry, sizeof(struct dirent));
+ names[results_len] = d;
+ results_len++;
+ }
+
+ closedir(dirp);
+ *namelist = names;
+ return results_len;
+}
View
12 src/scandir.h
@@ -0,0 +1,12 @@
+#ifndef SCANDIR_H
+#define SCANDIR_H
+
+typedef int (*filter_fp)(const struct dirent *, void *);
+
+int ag_scandir(const char *dirname,
+ struct dirent ***namelist,
+ filter_fp filter,
+ void *baton
+ );
+
+#endif
View
35 src/search.c
@@ -1,4 +1,5 @@
#include "search.h"
+#include "scandir.h"
void search_buf(const pcre *re, const pcre_extra *re_extra,
const char *buf, const int buf_len,
@@ -217,7 +218,7 @@ void search_file(const pcre *re, const pcre_extra *re_extra, const char *file_fu
/* TODO: Append matches to some data structure instead of just printing them out.
* Then ag can have sweet summaries of matches/files scanned/time/etc.
*/
-void search_dir(const pcre *re, const pcre_extra *re_extra, const char* path, const int depth) {
+void search_dir(ignores *ig, const pcre *re, const pcre_extra *re_extra, const char* path, const int depth) {
struct dirent **dir_list = NULL;
struct dirent *dir = NULL;
int results = 0;
@@ -239,27 +240,20 @@ void search_dir(const pcre *re, const pcre_extra *re_extra, const char* path, co
strlcat(dir_full_path, "/", path_len);
strlcat(dir_full_path, ignore_file, path_len);
if (strcmp(SVN_DIR, ignore_file) == 0) {
- log_debug("svn ignore pattern matched for %s", dir_full_path);
- load_svn_ignore_patterns(dir_full_path, strlen(dir_full_path));
+ load_svn_ignore_patterns(ig, dir_full_path);
}
else {
- load_ignore_patterns(dir_full_path);
+ load_ignore_patterns(ig, dir_full_path);
}
free(dir_full_path);
dir_full_path = NULL;
}
-#ifdef SCANDIR_CONST
- results = scandir(path, &dir_list, &filename_filter, &alphasort);
-#else
- results = scandir(path, &dir_list, (int (*)(const struct dirent *))&filename_filter, &alphasort);
-#endif
+ results = ag_scandir(path, &dir_list, &filename_filter, ig);
if (results == 0)
{
log_debug("No results found in directory %s", path);
- free(dir_list);
- dir_list = NULL;
- return;
+ goto search_dir_cleanup;
}
else if (results == -1) {
if (errno == ENOTDIR) {
@@ -269,12 +263,11 @@ void search_dir(const pcre *re, const pcre_extra *re_extra, const char* path, co
opts.print_heading = -1;
}
search_file(re, re_extra, path);
- return;
}
else {
log_err("Error opening directory %s: %s", path, strerror(errno));
- return;
}
+ goto search_dir_cleanup;
}
int offset_vector[3];
@@ -325,14 +318,6 @@ void search_dir(const pcre *re, const pcre_extra *re_extra, const char* path, co
}
}
-/* TODO: this kills performance */
-/*
- log_debug("dir %s type %i", dir_full_path, dir->d_type);
- if (!filepath_filter(dir_full_path)) {
- goto cleanup;
- }
-*/
-
if (dir->d_type != DT_DIR) {
if (opts.file_search_regex) {
rc = pcre_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path),
@@ -353,7 +338,8 @@ void search_dir(const pcre *re, const pcre_extra *re_extra, const char* path, co
else if (opts.recurse_dirs) {
if (depth < opts.max_search_depth) {
log_debug("Searching dir %s", dir_full_path);
- search_dir(re, re_extra, dir_full_path, depth + 1);
+ ignores *child_ig = init_ignore(ig);
+ search_dir(child_ig, re, re_extra, dir_full_path, depth + 1);
}
else {
log_err("Skipping %s. Use the --depth option to search deeper.", dir_full_path);
@@ -373,7 +359,8 @@ void search_dir(const pcre *re, const pcre_extra *re_extra, const char* path, co
dir_full_path = NULL;
}
+ search_dir_cleanup:
free(dir_list);
dir_list = NULL;
- return;
+ cleanup_ignore(ig);
}
View
2 src/search.h
@@ -31,6 +31,6 @@ void search_buf(const pcre *re, const pcre_extra *re_extra,
void search_stdin(const pcre *re, const pcre_extra *re_extra);
void search_stream(const pcre *re, const pcre_extra *re_extra, FILE *stream, const char *path);
void search_file(const pcre *re, const pcre_extra *re_extra, const char *file_full_path);
-void search_dir(const pcre *re, const pcre_extra *re_extra, const char* path, const int depth);
+void search_dir(ignores *ig, const pcre *re, const pcre_extra *re_extra, const char* path, const int depth);
#endif
View
8 src/util.h
@@ -59,12 +59,4 @@ ssize_t getline(char **lineptr, size_t *n, FILE *stream);
char * strndup (const char *s, size_t n);
#endif
-/*
- * This is just to squelch a compiler warning.
- * Most BSDs want the 3rd parameter to scandir() to be const. Linux doesn't.
- */
-#if defined __NetBSD__ || defined __OpenBSD__ || defined __FreeBSD__ || defined __bsdi__ || defined __DragonFly__ || defined BSD || defined _SYSTYPE_BSD || defined __APPLE__
-#define SCANDIR_CONST
-#endif
-
#endif

0 comments on commit 6581fc1

Please sign in to comment.