Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

working brm, binary search strcoll

  • Loading branch information...
commit d4f473061f914038829d3f706fe8ab37aa72f45e 1 parent 316b819
@erikfrey authored
Showing with 115 additions and 49 deletions.
  1. +14 −7 brutils/Makefile
  2. +35 −24 brutils/brm.c
  3. +12 −11 brutils/brp.c
  4. +54 −7 brutils/brutils.h
View
21 brutils/Makefile
@@ -1,17 +1,24 @@
-CFLAGS = -O3 -Wall
-OBJS = brp.o
+CFLAGS = -ggdb -Wall
+OBJS_BRP = brp.o
+OBJS_BRM = brm.o
+HEADERS = brutils.h
LIBS =
-TARGET = brp
+TARGET_BRP = brp
+TARGET_BRM = brm
BINDIR=/usr/local/bin
-$(TARGET): $(OBJS)
- $(CXX) -o $(TARGET) $(OBJS) $(LIBS)
+all: $(TARGET_BRP) $(TARGET_BRM)
-all: $(TARGET)
+$(TARGET_BRP): $(OBJS_BRP) $(HEADERS)
+ $(CC) -o $(TARGET_BRP) $(OBJS_BRP) $(LIBS)
+
+$(TARGET_BRM): $(OBJS_BRM) $(HEADERS)
+ $(CC) -o $(TARGET_BRM) $(OBJS_BRM) $(LIBS)
clean:
- rm -f $(OBJS) $(TARGET)
+ rm -f $(OBJS_BRP) $(OBJS_BRM) $(TARGET_BRP) $(TARGET_BRM)
install: all
install -c brp $(BINDIR)
+ install -c brm $(BINDIR)
View
59 brutils/brm.c
@@ -16,35 +16,46 @@ int main(int argc, char * argv[])
{
FILE * pout = stdout;
int i, col_index;
-
+
if (argc < 4)
showusage();
if (strcmp(argv[1], "-") != 0)
pout = try_open(argv[1], "wb");
col_index = atoi(argv[2]);
-
- int pins_len = argc - 3;
- FILE ** pins = (FILE **) malloc( pins_len * sizeof(FILE *) );
- for (i = 0; i != pins_len; ++i)
- pins[i] = try_open(argv[i + 3], "rb");
-
- line_t * plines = (line_t *) malloc( pins_len * sizeof( line_t ) );
-
- char buf[8192];
- const char * col_beg, * col_end;
- while (fgets(buf, sizeof(buf), pin)) {
- // find the correct column
- col_beg = buf;
- find_col(col_index, &col_beg, &col_end);
- // write it to the correct file
- fputs(buf, pouts[fnv_hash(col_beg, col_end) % pouts_len]);
+
+ int lines_len = argc - 3;
+ line_t ** lines = (line_t **) malloc( lines_len * sizeof(line_t *) );
+ line_t ** lines_end = lines;
+ for (i = 0; i != lines_len; ++i) {
+ *lines_end = (line_t *) malloc( sizeof(line_t) );
+ (*lines_end)->pin = try_open(argv[i + 3], "rb");
+ if (read_parse(col_index, *lines_end)) {
+ ++lines_end;
+ lower_bound_move(lines, lines_end);
+ }
+ else {
+ fclose((*lines_end)->pin);
+ free(*lines_end);
+ }
+ }
+
+ // okay, merge!
+ line_t * back;
+ while (lines != lines_end) {
+ // write to out
+ back = *(lines_end - 1);
+ *back->col_end = back->col_end_val;
+ fputs(back->buf, pout);
+ if (read_parse(col_index, back))
+ lower_bound_move(lines, lines_end);
+ else {
+ fclose(back->pin);
+ --lines_end;
+ }
}
-
- if (pin != stdin)
- fclose(pin);
-
- for (i = 0; i != pouts_len; ++i)
- fclose(pouts[i]);
-
+
+ if (pout != stdout)
+ fclose(pout);
+
return 0;
}
View
23 brutils/brp.c
@@ -14,32 +14,33 @@ void showusage() {
int main(int argc, char * argv[])
{
- FILE * pin = stdin;
+ line_t line;
int i, col_index;
-
+
if (argc < 4)
showusage();
if (strcmp(argv[1], "-") != 0)
- pin = try_open(argv[1], "rb");
+ line.pin = try_open(argv[1], "rb");
+ else
+ line.pin = stdin;
col_index = atoi(argv[2]);
-
+
int pouts_len = argc - 3;
FILE ** pouts = (FILE **) malloc( pouts_len * sizeof(FILE *) );
for (i = 0; i != pouts_len; ++i)
pouts[i] = try_open(argv[i + 3], "wb");
- line_t line;
- while (fgets(line.buf, sizeof(line.buf), pin)) {
+ while (fgets(line.buf, sizeof(line.buf), line.pin)) {
if ( find_col(col_index, &line) ) // if this string has the requisite number of columns
fputs(line.buf, pouts[fnv_hash(line.col_beg, line.col_end) % pouts_len]); // write it to the correct file
}
-
- if (pin != stdin)
- fclose(pin);
-
+
+ if (line.pin != stdin)
+ fclose(line.pin);
+
for (i = 0; i != pouts_len; ++i)
fclose(pouts[i]);
-
+
return 0;
}
View
61 brutils/brutils.h
@@ -1,6 +1,9 @@
#ifndef __BR_UTILS_H__
#define __BR_UTILS_H__
+#include <stdio.h>
+#include <string.h>
+
FILE * try_open(const char * path, const char * flags) {
FILE * p = fopen(path, flags);
if (!p) {
@@ -22,17 +25,61 @@ typedef struct
char buf[8192];
char * col_beg;
char * col_end;
+ char col_end_val;
+ FILE * pin;
} line_t;
-bool find_col(int col, line_t * pline) {
- for (pline->col_beg = pline->buf; col != 0; ++*pline->col_beg) {
- if ( isspace(*pline->col_beg) )
+int find_col(int col, line_t * line) {
+ for (line->col_beg = line->buf; col != 0 && *line->col_beg != 0; ++line->col_beg) {
+ if ( isspace(*line->col_beg) )
--col;
- if ( *pline->col_beg == '\n' )
- return false;
}
- for (pline->col_end = pline->col_beg; !isspace(*pline->col_end); ++*pline->col_end) {}
- return true;
+ if (*line->col_beg == 0)
+ return 0;
+ for (line->col_end = line->col_beg; !isspace(*line->col_end); ++line->col_end) {}
+ return 1;
+}
+
+int read_parse(int col, line_t * line) {
+ while (fgets(line->buf, sizeof(line->buf), line->pin)) {
+ if (find_col(col, line)) {
+ line->col_end_val = *line->col_end;
+ *line->col_end = 0;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+// move end - 1 to the proper position in beg..end
+void lower_bound_move(line_t ** beg, line_t ** end)
+{
+ if (beg == end)
+ return;
+
+ int len = end - beg - 1;
+ int half;
+ line_t ** mid;
+
+ // [ * * * * x ]
+ // we need to move x to its correct position in the otherwise sorted array
+ while (len > 0) {
+ half = len >> 1;
+ mid = beg + half;
+ if ( strcoll( (*mid)->col_beg, (*(end - 1))->col_beg) > 0 ) {
+ beg = mid + 1;
+ len = len - half - 1;
+ }
+ else
+ len = half;
+ }
+
+ // if beg < end - 1, we need to move beg up
+ if (beg < end - 1) {
+ line_t * tmp = *(end - 1);
+ memmove(beg + 1, beg, (end - beg - 1) * sizeof(line_t **));
+ *beg = tmp;
+ }
}
#endif // __BR_UTILS_H__
Please sign in to comment.
Something went wrong with that request. Please try again.