Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pipes and other improvements #16

Merged
merged 32 commits into from Mar 3, 2012
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
ff2338a
Don't build an inverse regex. Inverse regexes are slow as hell. Stub …
ggreer Jan 30, 2012
c4a4775
Progress
ggreer Feb 5, 2012
4723210
Merge branch 'master' into better_inverse_matching
ggreer Feb 16, 2012
a4d0c4a
Merge branch 'master' into better_inverse_matching
ggreer Feb 18, 2012
c7dc78c
length can change :/
ggreer Feb 18, 2012
c2a74e1
Instead of passing an int* around, just return the new length.
ggreer Feb 18, 2012
be67bd7
Yay inverted matching works way better now
ggreer Feb 18, 2012
abb4a7a
Don't check the return code, just check the number of matches
ggreer Feb 18, 2012
271bc53
Whoops extra space
ggreer Feb 18, 2012
ebb212e
Remind myself to fix this
ggreer Feb 18, 2012
7d6d362
And remind myself to fix this.
ggreer Feb 18, 2012
83fd04f
Clean up printing logic a little.
ggreer Feb 29, 2012
a23313a
The computer gods will smite me for this. Don't worry, I'll rewrite it.
ggreer Feb 29, 2012
288545d
Just pass 0 to this pcre_exec.
ggreer Mar 2, 2012
12bbfe2
Move search stuff into search.c and search.h. Stub out searching stdin.
ggreer Mar 2, 2012
f3398f6
Remove unneeded includes from search.h
ggreer Mar 2, 2012
01f7947
Remove unneeded includes from main.c
ggreer Mar 2, 2012
592b106
Whoops. I wasn't building against OS versions of strlcpy and strlcat.
ggreer Mar 3, 2012
c669012
I was overzealous removing some of these includes.
ggreer Mar 3, 2012
d9bea19
Move actual search work into search_buf. search_dir was way too huge
ggreer Mar 3, 2012
80e927e
Forgot to zero-out rc in each run of the loop. Also don't wast as muc…
ggreer Mar 3, 2012
08e959c
Whoops. Remove this debugging printf.
ggreer Mar 3, 2012
66fbfde
Move more stuff out of the huge search_dir()
ggreer Mar 3, 2012
cece178
Make the search functions void for now. Clean up compiler warnings.
ggreer Mar 3, 2012
79ed7ab
Apparently everyone uses --context so... whoops. Not using --context …
ggreer Mar 3, 2012
34324cd
Off by two. Also better variable name.
ggreer Mar 3, 2012
803a1e3
OCD
ggreer Mar 3, 2012
5e1fcc4
Searching stdin kinda-sorta works sometimes.
ggreer Mar 3, 2012
90da3b1
This branch has better inverted matching support
ggreer Mar 3, 2012
9d29f3c
Don't print a dumb newline at the start if we're searching stdin
ggreer Mar 3, 2012
f296a84
Support single file search
ggreer Mar 3, 2012
3b61cac
Fix a double-printing bug.
ggreer Mar 3, 2012
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile.am
@@ -1,7 +1,7 @@
ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS}

bin_PROGRAMS = ag
ag_SOURCES = src/ignore.c src/log.c src/options.c src/print.c src/util.c src/main.c
ag_SOURCES = src/ignore.c src/log.c src/options.c src/print.c src/search.c src/util.c src/main.c
ag_LDADD = ${PCRE_LIBS}

man_MANS = doc/ag.1
Expand Down
1 change: 0 additions & 1 deletion README.md
Expand Up @@ -58,7 +58,6 @@ You can use this with [my fork](https://github.com/ggreer/AckMate) of the popula
### TODO ###
* behave better when matching in files with really long lines
* maybe say "match found at position X of line N" if line is > 10k chars
* better support for inverted matching (it's stupidly slow right now)
* support piping in stuff
* allow searching files > 1GB in size
* optimizations
Expand Down
243 changes: 7 additions & 236 deletions src/main.c
@@ -1,250 +1,16 @@
#include <dirent.h>
#include <errno.h>
#include <limits.h>
#include <fcntl.h>
#include <pcre.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/dir.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>

#include "ignore.h"
#include "log.h"
#include "options.h"
#include "print.h"
#include "util.h"
#include "search.h"

#include "config.h"
/* #define AG_DEBUG */

/* TODO: make these configurable */
const int MAX_SEARCH_DEPTH = 25;
const int MAX_MATCHES_PER_FILE = 10000;

ag_stats stats;

size_t skip_lookup[256];

/* TODO: append matches to some data structure instead of just printing them out
* then there can be sweet summaries of matches/files scanned/time/etc
*/
int search_dir(const pcre *re, const pcre_extra *re_extra, const char* path, const int depth) {
/* TODO: don't just die. also make max depth configurable */
if (depth > MAX_SEARCH_DEPTH) {
log_err("Search depth greater than %i, giving up.", depth);
exit(1);
}
struct dirent **dir_list = NULL;
struct dirent *dir = NULL;
int results = 0;

int fd = -1;
off_t f_len = 0;
char *buf = NULL;
int rv = 0;
char *dir_full_path = NULL;
size_t path_length = 0;
int i;

results = scandir(path, &dir_list, &ignorefile_filter, &alphasort);
if (results > 0) {
for (i = 0; i < results; i++) {
dir = dir_list[i];
path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); /* 2 for slash and null char */
dir_full_path = malloc(path_length);
strlcpy(dir_full_path, path, path_length);
strlcat(dir_full_path, "/", path_length);
strlcat(dir_full_path, dir->d_name, path_length);
load_ignore_patterns(dir_full_path);
free(dir);
dir = NULL;
free(dir_full_path);
dir_full_path = NULL;
}
}
free(dir_list);
dir_list = NULL;

results = scandir(path, &dir_list, &filename_filter, &alphasort);
if (results == 0)
{
log_debug("No results found in directory %s", path);
free(dir_list);
dir_list = NULL;
return(0);
}
else if (results == -1) {
log_err("Error opening directory %s", path);
return(0);
}

match matches[MAX_MATCHES_PER_FILE];
int matches_len = 0;
int buf_len = 0;
int buf_offset = 0;
int offset_vector[MAX_MATCHES_PER_FILE * 3]; /* TODO */
int rc = 0;
struct stat statbuf;
int binary = 0;

for (i=0; i<results; i++) {
matches_len = 0;
buf_offset = 0;
binary = 0;
dir = dir_list[i];
/* TODO: this is copy-pasted from about 30 lines above */
path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); /* 2 for slash and null char */
dir_full_path = malloc(path_length);
strlcpy(dir_full_path, path, path_length);
strlcat(dir_full_path, "/", path_length);
strlcat(dir_full_path, dir->d_name, path_length);

log_debug("dir %s type %i", dir_full_path, dir->d_type);
/* TODO: scan files in current dir before going deeper */
if (dir->d_type == DT_DIR) {
if (opts.recurse_dirs) {
log_debug("Searching dir %s", dir_full_path);
rv = search_dir(re, re_extra, dir_full_path, depth + 1);
}
goto cleanup;
}

if (opts.file_search_regex) {
rc = pcre_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path),
buf_offset, 0, offset_vector, 3);
if (rc < 0) { /* no match */
log_debug("Skipping %s due to file_search_regex.", dir_full_path);
goto cleanup;
}
}

fd = open(dir_full_path, O_RDONLY);
if (fd < 0) {
log_err("Error opening file %s. Skipping...", dir_full_path);
goto cleanup;
}

rv = fstat(fd, &statbuf);
if (rv != 0) {
log_err("Error fstat()ing file %s. Skipping...", dir_full_path);
goto cleanup;
}

f_len = statbuf.st_size;

if (f_len == 0) {
log_debug("File %s is empty, skipping.", dir_full_path);
goto cleanup;
}

buf = mmap(0, f_len, PROT_READ, MAP_SHARED, fd, 0);
if (buf == MAP_FAILED) {
log_err("File %s failed to load: %s.", dir_full_path, strerror(errno));
goto cleanup;
}

buf_len = f_len;

if (is_binary((void*)buf, buf_len)) { /* Who needs duck typing when you have void cast? :) */
if (opts.search_binary_files) {
binary = 1;
}
else {
log_debug("File %s is binary. Skipping...", dir_full_path);
goto cleanup;
}
}

if (opts.literal) {
char *match_ptr = buf;
char *(*ag_strncmp_fp)(const char*, const char*, size_t, size_t, size_t[]) = &boyer_moore_strnstr;

if (opts.casing == CASE_INSENSITIVE) {
ag_strncmp_fp = &boyer_moore_strncasestr;
}
while (buf_offset < buf_len) {
match_ptr = ag_strncmp_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, skip_lookup);
if (match_ptr == NULL) {
break;
}
matches[matches_len].start = match_ptr - buf;
matches[matches_len].end = matches[matches_len].start + opts.query_len;
buf_offset = matches[matches_len].end;
matches_len++;
match_ptr++;
/* Don't segfault. TODO: realloc this array */
if (matches_len >= MAX_MATCHES_PER_FILE) {
log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
break;
}
}
}
else {
/* In my profiling, most of the execution time is spent in this pcre_exec */
while (buf_offset < buf_len &&
(rc = pcre_exec(re, re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) {
log_debug("Match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]);
buf_offset = offset_vector[1];
matches[matches_len].start = offset_vector[0];
matches[matches_len].end = offset_vector[1];
matches_len++;
/* Don't segfault. TODO: realloc this array */
if (matches_len >= MAX_MATCHES_PER_FILE) {
log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
break;
}
}
}

if (opts.stats) {
stats.total_bytes += buf_len;
stats.total_files++;
stats.total_matches += matches_len;
}

if (rc == -1) {
log_debug("No match in %s", dir_full_path);
}

if (matches_len > 0) {
if (opts.print_filename_only) {
print_path(dir_full_path);
putchar('\n');
}
else {
if (binary) {
print_binary_file_matches(dir_full_path);
}
else {
print_file_matches(dir_full_path, buf, buf_len, matches, matches_len);
}
}
}

cleanup:
if (fd != -1) {
munmap(buf, f_len);
close(fd);
fd = -1;
}

free(dir);
dir = NULL;
free(dir_full_path);
dir_full_path = NULL;
}

free(dir_list);
dir_list = NULL;
return(0);
}

int main(int argc, char **argv) {
set_log_level(LOG_LEVEL_WARN);

Expand Down Expand Up @@ -303,7 +69,12 @@ int main(int argc, char **argv) {
}
}

search_dir(re, re_extra, path, 0);
if (opts.search_stdin) {
search_stdin(re, re_extra);
}
else {
search_dir(re, re_extra, path, 0);
}

if (opts.stats) {
gettimeofday(&(stats.time_end), NULL);
Expand Down
17 changes: 3 additions & 14 deletions src/options.c
Expand Up @@ -126,6 +126,7 @@ void parse_options(int argc, char **argv, char **query, char **path) {

if (!isatty(fileno(stdin))) {
opts.search_stdin = 1;
group = 0;
}

/* If we're not outputting to a terminal. change output to:
Expand Down Expand Up @@ -258,20 +259,8 @@ void parse_options(int argc, char **argv, char **query, char **path) {
opts.print_break = 0;
}

/* I can't figure out how to tell pcre_exec() to invert matches,
so build an inverse regex. Yes I know this sucks.
*/
if (opts.invert_match) {
opts.query_len = strlen(argv[0]) + 11;
opts.query = malloc(opts.query_len);
strlcat(opts.query, "^((?!", opts.query_len);
strlcat(opts.query, argv[0], opts.query_len);
strlcat(opts.query, ").)*$", opts.query_len);
}
else {
opts.query = strdup(argv[0]);
opts.query_len = strlen(opts.query);
}
opts.query = strdup(argv[0]);
opts.query_len = strlen(opts.query);

*query = opts.query;

Expand Down
18 changes: 14 additions & 4 deletions src/print.c
Expand Up @@ -36,6 +36,7 @@ void print_binary_file_matches(const char* path) {

/* TODO: make print_matching_line() */

/* TODO: doesn't work for matches across lines */
void print_file_matches(const char* path, const char* buf, const int buf_len, const match matches[], const int matches_len) {
int line = 1;
int column = 0;
Expand Down Expand Up @@ -76,10 +77,19 @@ void print_file_matches(const char* path, const char* buf, const int buf_len, co
}

if (lines_since_last_match > 0) {
/* TODO: this is buggy as hell */
if (opts.before > 0 && lines_since_last_match > opts.after + opts.before) {
/* We found the start of a match. print the previous line(s) */
for (j = 0; j < opts.before; j++) {
if (opts.before > 0) {
/* TODO: this is buggy as hell */
/* TODO: yeah, it totally is */
/* print the previous line(s) */
int lines_to_print = lines_since_last_match - (opts.before + opts.after + 1);
if (lines_to_print < 0) {
lines_to_print = opts.before;
}
if (lines_to_print > opts.before) {
lines_to_print = 0;
}

for (j = lines_to_print; j < opts.before; j++) {
prev_line = (last_prev_line + j) % opts.before;
if (context_prev_lines[prev_line] != NULL) {
if (opts.print_heading == 0) {
Expand Down