Browse files

converted fastacomplement to pipeline model

  • Loading branch information...
1 parent 9b0be19 commit f5acd5bd564b13b30515a7d6e3416ce0ae0a75c5 greg committed Jun 6, 2011
Showing with 145 additions and 52 deletions.
  1. +2 −0 .bzrignore
  2. +2 −2 Makefile
  3. +1 −0 TODO
  4. +57 −0 fastacomplement.c
  5. +0 −50 fastacomplement.fl
  6. +37 −0 fastalint.c
  7. +7 −0 parameter.h
  8. +39 −0 seekable.c
View
2 .bzrignore
@@ -7,3 +7,5 @@ fastatail
fastacomplement
fastastack
fastalength
+README.branch
+pGFPe-Ler57.fasta
View
4 Makefile
@@ -5,8 +5,8 @@ LDFLAGS=`pkg-config --libs glib-2.0`
all: fastacount fastagap fastatail fastahead fastalint \
fastacomplement fastastack
-fastacomplement: fastacomplement.yy.o
- gcc -o$@ $(LDFLAGS) $^ -lfl
+fastacomplement: lexer.yy.o seekable.o fastacomplement.o firstin.o
+ gcc -o$@ $^ -lfl
fastastack: fastastack.yy.o
gcc -o$@ $(LDFLAGS) $^ -lfl
View
1 TODO
@@ -0,0 +1 @@
++ consider abstracting over fastahead, fastatail
View
57 fastacomplement.c
@@ -0,0 +1,57 @@
+/*
+ * GCB 5jun11
+ *
+ * Convert each sequence in FASTA input to its reverse complement.
+ *
+ * designed to work with seekable.o
+ */
+
+#include <getopt.h>
+#include <stdio.h>
+#include "parameter.h"
+
+const char *my_optstring = "v";
+const struct option my_options[] = {{0,0,0,0}};
+
+/* FIXME doesn't handle lower-case well */
+static char
+complement(char c)
+{
+ if (c == 'G') return 'C';
+ else if (c == 'C') return 'G';
+ else if (c == 'A') return 'T';
+ else if (c == 'T') return 'A';
+ else return c;
+}
+
+static void
+reverse_string(char *str)
+{
+ char *head = str;
+ while (*head != 0) ++head;
+ --head;
+ while (str < head) {
+ char tmp = *str;
+ *str = *head;
+ *head = tmp;
+ ++str;
+ --head;
+ }
+}
+
+void process_option(int code, char *optarg) { /* pass */ }
+void post_process() { /* pass */ }
+
+void
+process_sequence(char *comment, char *sequence)
+{
+ reverse_string(sequence);
+ printf(">%s", comment);
+
+ char *head = sequence;
+
+ while (*head != 0) {
+ if (((head - sequence) % COLUMN_WIDTH) == 0) putchar('\n');
+ putchar(complement(*head)); ++head;
+ }
+}
View
50 fastacomplement.fl
@@ -1,50 +0,0 @@
-/*
- * GCB 3mar11
- *
- * Replace each sequence in a FASTA file with its reverse complement.
- */
-
-%x COMMENT
-%x SEQUENCE
-
-%{
- #include <glib.h>
-
- GList *buffer = NULL;
- #define PUSH {buffer = g_list_prepend(buffer, (gpointer)(int)(*yytext));}
-
- static const int column_width = 40;
-
- static char complement(char x)
- {
- switch (x) {
- case 'G' : return 'C';
- case 'C' : return 'G';
- case 'A' : return 'T';
- case 'T' : return 'A';
- default : return '-';
- };
- }
-
- static void report_sequence()
- {
- int i = 0;
- GList *ptr;
- for (ptr = buffer; ptr != NULL; ptr = ptr->next, i++)
- {
- if ((i > 0) && ((i % column_width) == 0))
- putchar('\n');
- putchar(complement((char)GPOINTER_TO_UINT(ptr->data)));
- }
- }
-%}
-
-%%
-<INITIAL>[;>] {BEGIN(COMMENT); ECHO;}
-<COMMENT>\n {BEGIN(INITIAL); ECHO;}
-<COMMENT>. {ECHO;}
-<INITIAL>[AGTC]|- {BEGIN(SEQUENCE); g_list_free(buffer); buffer = NULL; PUSH;}
-<SEQUENCE>[AGTC]|- {PUSH;}
-<SEQUENCE>[;>] {BEGIN(COMMENT); report_sequence();}
-<SEQUENCE><<EOF>> {report_sequence(); yyterminate();}
-<*>.|\n {/* default rule - gobble */}
View
37 fastalint.c
@@ -0,0 +1,37 @@
+/*
+ * GCB 5jun11
+ *
+ * Reformat input stream to a standard column width;
+ * optionally add a comment to the first sequence
+ */
+
+#include <stdio.h>
+#include <getopt.h>
+#include "parameter.h"
+
+static int idx = 0;
+
+const char *my_optstring = "c:";
+const struct option my_options[] = {
+ {"comment", required_argument, NULL, 'c'},
+ {0,0,0,0}};
+
+void process_option(int code, char *optarg) {
+ if (code == 'c') { printf("> %s\n", optarg); }
+}
+
+void post_process() {}
+
+void init_sequence() { idx = 0; }
+void handle_sequence(char c) {
+ ++idx;
+ putchar(c);
+ if ((idx % COLUMN_WIDTH) == 0) putchar('\n');
+}
+
+void terminate_sequence() { putchar('\n'); }
+
+/* comments are echoed */
+void init_comment() { printf("> "); }
+void handle_comment(char c) { putchar(c); }
+void terminate_comment() { putchar('\n'); }
View
7 parameter.h
@@ -0,0 +1,7 @@
+
+#ifndef _HAVE_PARAMETER_H
+#define _HAVE_PARAMETER_H
+
+#define COLUMN_WIDTH 40
+
+#endif /* _HAVE_PARAMETER_H */
View
39 seekable.c
@@ -0,0 +1,39 @@
+/*
+ * GCB 5jun11
+ *
+ * Implement 'seekable' sequence data on top of stream-oriented lower layer -
+ * allow client programs to look at any part of the sequence, not just
+ * in its natural order
+ */
+
+#include <stdlib.h>
+
+static char *sequence;
+static char *sequence_ptr;
+int sequence_max_length;
+
+static char *comment;
+static char *comment_ptr;
+int comment_max_length;
+
+#define INIT_ARRAY(name, init_length) {\
+ name##_max_length = init_length; \
+ name = (char*)malloc(name##_max_length); \
+ name##_ptr = name; \
+}
+
+#define PUSH_ARRAY(name, c) {\
+ if ((name##_ptr - name) >= (name##_max_length - 1)) \
+ {name##_max_length *= 2; name = realloc(name, name##_max_length); } \
+ *name##_ptr = c; ++name##_ptr; }
+
+extern void process_sequence(char*, char*);
+
+void init_sequence() { INIT_ARRAY(sequence, 128); }
+void handle_sequence(char c) { PUSH_ARRAY(sequence, c); }
+void terminate_sequence() { *sequence_ptr = 0;
+ process_sequence(comment, sequence); }
+
+void init_comment() { INIT_ARRAY(comment, 32); }
+void handle_comment(char c) { PUSH_ARRAY(comment, c); }
+void terminate_comment() { *comment_ptr = 0; }

0 comments on commit f5acd5b

Please sign in to comment.