Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

converted fastacount to pipeline format

  • Loading branch information...
commit e9c862701d6995267c0a278cfb8383b381fb8b94 1 parent 54e378c
greg authored
View
1  .bzrignore
@@ -6,3 +6,4 @@ fastalint
fastatail
fastacomplement
fastastack
+fastalength
View
4 Makefile
@@ -22,8 +22,8 @@ fastatail: fastatail.yy.c
fastahead: fastahead.yy.c
gcc -o$@ $< -lfl
-fastacount: fastacount.yy.c
- gcc -o$@ $< -lfl
+fastacount: lexer.yy.o fastacount.o firstin.o
+ gcc -o$@ $^ -lfl
fastagap: fastagap.yy.c
gcc -o$@ $< -lfl
View
61 fastacount.c
@@ -0,0 +1,61 @@
+
+#include <stdio.h>
+#include <getopt.h>
+
+const struct option my_options[] = {{0,0,0,0}};
+const char *my_optstring = "";
+
+int sequence_length = 0;
+int comment_length = 0;
+static const int comment_max_length = 15;
+
+void
+process_option(int code, char *optarg)
+{
+ /* pass */
+}
+
+void
+post_process(void)
+{
+}
+
+void
+init_sequence(void)
+{
+ sequence_length = 0;
+}
+
+void
+handle_sequence(char c)
+{
+ if ((c == 'A') || (c == 'T') || (c == 'G') || (c == 'C')) sequence_length++;
+}
+
+void
+terminate_sequence()
+{
+ printf("%d\n", sequence_length);
+}
+
+void
+init_comment()
+{
+ comment_length = 0;
+ printf("> ");
+}
+
+void
+handle_comment(char c)
+{
+ ++comment_length;
+ putchar(c);
+}
+
+void
+terminate_comment()
+{
+ for (; comment_length < comment_max_length; ++comment_length)
+ putchar(' ');
+ putchar(' ');
+}
View
34 fastacount.fl
@@ -1,34 +0,0 @@
-
-/*
- * GCB 1feb11
- *
- * Count FASTA sequences & their lengths.
- */
-
-%x COMMENT
-%x SEQUENCE
-
-%{
- int n_seq = 0;
- int n_base = 0;
- int n_skip = 0;
-
- static void report_sequence() {
- printf("Sequence %d -- length %d (%d skips)\n",
- n_seq, n_base + n_skip, n_skip);
- }
-
-%}
-
-%%
-<INITIAL>[;>] BEGIN(COMMENT);
-<COMMENT>\n BEGIN(INITIAL);
-<INITIAL>[A-Z]|- {BEGIN(SEQUENCE); ++n_seq; n_base = 0; n_skip = 0;}
-<SEQUENCE>[AGTC] {++n_base;}
-<SEQUENCE>- {++n_skip;}
-<SEQUENCE>[;>] {
- BEGIN(COMMENT);
- report_sequence();
- }
-<SEQUENCE><<EOF>> { report_sequence(); yyterminate();}
-<*>.|\n {/* default rule - gobble */}
View
1  fastagap.c
@@ -0,0 +1 @@
+
View
10 firstin.c
@@ -0,0 +1,10 @@
+
+#include <stdio.h>
+
+extern FILE *yyin;
+
+void
+process_args(int argc, char *argv[])
+{
+ yyin = (argc < 1) ? stdin : fopen(argv[1], "r");
+}
View
57 lexer.fl
@@ -0,0 +1,57 @@
+/*
+ * Lexer core reads FASTA input and passes it to handlers.
+ */
+
+%{
+ #include <getopt.h>
+
+ extern const struct option my_options[];
+ extern char* my_optstring;
+
+ extern void process_args(int, char*[]);
+ extern void process_option(int, char*);
+ extern void post_process(void);
+
+ extern void init_sequence(void);
+ extern void handle_sequence(char);
+ extern void terminate_sequence(void);
+
+ extern void init_comment(void);
+ extern void handle_comment(char);
+ extern void terminate_comment(void);
+%}
+
+%x COMMENT
+%x SEQUENCE
+
+%%
+
+<INITIAL>[;>] { init_comment(); BEGIN(COMMENT); }
+<COMMENT>. { handle_comment(*yytext); }
+<COMMENT>\n { terminate_comment(); BEGIN(INITIAL); }
+<INITIAL>[A-Z]|- { BEGIN(SEQUENCE); init_sequence(); unput(*yytext); }
+<SEQUENCE>[A-Z]|- { handle_sequence(*yytext); }
+<SEQUENCE>[;>] { terminate_sequence(); init_comment(); BEGIN(COMMENT); }
+<SEQUENCE><<EOF>> { terminate_sequence(); yyterminate(); }
+<*>.|\n { /* default rule - gobble */ }
+
+%%
+
+int
+main(int argc, char *argv[])
+{
+ while (1)
+ {
+ int option_idx = 0;
+ int code =
+ getopt_long(argc, argv, my_optstring, my_options, &option_idx);
+ if (code == -1) break;
+ else process_option(code, optarg);
+ }
+
+ process_args(optind, argv);
+ yylex();
+ post_process();
+
+ return 0;
+}
View
161 pGFPe-Ler.fasta
@@ -0,0 +1,161 @@
+> pGFPe/Ler
+GCGAATGGGACGCGCCCTGTAGCGGCGCATTAAGCGCGGC
+GGGTGTGGTGGTTACGCGCAGCGTGACCGCTACACTTGCC
+AGCGCCCTAGCGCCCGCTCCTTTCGCTTTCTTCCCTTCCT
+TTCTCGCCACGTTCGCCGGCTTTCCCCGTCAAGCTCTAAA
+TCGGGGGCTCCCTTTAGGGTTCCGATTTAGTGCTTTACGG
+CACCTCGACCCCAAAAAACTTGATTAGGGTGATGGTTCAC
+GTAGTGGGCCATCGCCCTGATAGACGGTTTTTCGCCCTTT
+GACGTTGGAGTCCACGTTCTTTAATAGTGGACTCTTGTTC
+CAAACTGGAACAACACTCAACCCTATCTCGGTCTATTCTT
+TTGATTTATAAGGGATTTTGCCGATTTCGGCCTATTGGTT
+AAAAAATGAGCTGATTTAACAAAAATTTAACGCGAATTTT
+AACAAAATATTAACGTTTACAATTTCAGGTGGCACTTTTC
+GGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTA
+AATACATTCAAATATGTATCCGCTCATGAATTAATTCTTA
+GAAAAACTCATCGAGCATCAAATGAAACTGCAATTTATTC
+ATATCAGGATTATCAATACCATATTTTTGAAAAAGCCGTT
+TCTGTAATGAAGGAGAAAACTCACCGAGGCAGTTCCATAG
+GATGGCAAGATCCTGGTATCGGTCTGCGATTCCGACTCGT
+CCAACATCAATACAACCTATTAATTTCCCCTCGTCAAAAA
+TAAGGTTATCAAGTGAGAAATCACCATGAGTGACGACTGA
+ATCCGGTGAGAATGGCAAAAGTTTATGCATTTCTTTCCAG
+ACTTGTTCAACAGGCCAGCCATTACGCTCGTCATCAAAAT
+CACTCGCATCAACCAAACCGTTATTCATTCGTGATTGCGC
+CTGAGCGAGACGAAATACGCGATCGCTGTTAAAAGGACAA
+TTACAAACAGGAATCGAATGCAACCGGCGCAGGAACACTG
+CCAGCGCATCAACAATATTTTCACCTGAATCAGGATATTC
+TTCTAATACCTGGAATGCTGTTTTCCCGGGGATCGCAGTG
+GTGAGTAACCATGCATCATCAGGAGTACGGATAAAATGCT
+TGATGGTCGGAAGAGGCATAAATTCCGTCAGCCAGTTTAG
+TCTGACCATCTCATCTGTAACATCATTGGCAACGCTACCT
+TTGCCATGTTTCAGAAACAACTCTGGCGCATCGGGCTTCC
+CATACAATCGATAGATTGTCGCACCTGATTGCCCGACATT
+ATCGCGAGCCCATTTATACCCATATAAATCAGCATCCATG
+TTGGAATTTAATCGCGGCCTAGAGCAAGACGTTTCCCGTT
+GAATATGGCTCATAACACCCCTTGTATTACTGTTTATGTA
+AGCAGACAGTTTTATTGTTCATGACCAAAATCCCTTAACG
+TGAGTTTTCGTTCCACTGAGCGTCAGACCCCGTAGAAAAG
+ATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAA
+TCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGT
+GGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCG
+AAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTG
+TCCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAA
+CTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTG
+TTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTA
+CCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCA
+GCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGC
+TTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGC
+GTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAA
+GGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGA
+GAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATC
+TTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCG
+TCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGG
+AAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCT
+TTTGCTGGCCTTTTGCTCACATGTTCTTTCCTGCGTTATC
+CCCTGATTCTGTGGATAACCGTATTACCGCCTTTGAGTGA
+GCTGATACCGCTCGCCGCAGCCGAACGACCGAGCGCAGCG
+AGTCAGTGAGCGAGGAAGCGGAAGAGCGCCTGATGCGGTA
+TTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATA
+TATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATA
+GTTAAGCCAGTATACACTCCGCTATCGCTACGTGACTGGG
+TCATGGCTGCGCCCCGACACCCGCCAACACCCGCTGACGC
+GCCCTGACGGGCTTGTCTGCTCCCGGCATCCGCTTACAGA
+CAAGCTGTGACCGTCTCCGGGAGCTGCATGTGTCAGAGGT
+TTTCACCGTCATCACCGAAACGCGCGAGGCAGCTGCGGTA
+AAGCTCATCAGCGTGGTCGTGAAGCGATTCACAGATGTCT
+GCCTGTTCATCCGCGTCCAGCTCGTTGAGTTTCTCCAGAA
+GCGTTAATGTCTGGCTTCTGATAAAGCGGGCCATGTTAAG
+GGCGGTTTTTTCCTGTTTGGTCACTGATGCCTCCGTGTAA
+GGGGGATTTCTGTTCATGGGGGTAATGATACCGATGAAAC
+GAGAGAGGATGCTCACGATACGGGTTACTGATGATGAACA
+TGCCCGGTTACTGGAACGTTGTGAGGGTAAACAACTGGCG
+GTATGGATGCGGCGGGACCAGAGAAAAATCACTCAGGGTC
+AATGCCAGCGCTTCGTTAATACAGATGTAGGTGTTCCACA
+GGGTAGCCAGCAGCATCCTGCGATGCAGATCCGGAACATA
+ATGGTGCAGGGCGCTGACTTCCGCGTTTCCAGACTTTACG
+AAACACGGAAACCGAAGACCATTCATGTTGTTGCTCAGGT
+CGCAGACGTTTTGCAGCAGCAGTCGCTTCACGTTCGCTCG
+CGTATCGGTGATTCATTCTGCTAACCAGTAAGGCAACCCC
+GCCAGCCTAGCCGGGTCCTCAACGACAGGAGCACGATCAT
+GCGCACCCGTGGGGCCGCCATGCCGGCGATAATGGCCTGC
+TTCTCGCCGAAACGTTTGGTGGCGGGACCAGTGACGAAGG
+CTTGAGCGAGGGCGTGCAAGATTCCGAATACCGCAAGCGA
+CAGGCCGATCATCGTCGCGCTCCAGCGAAAGCGGTCCTCG
+CCGAAAATGACCCAGAGCGCTGCCGGCACCTGTCCTACGA
+GTTGCATGATAAAGAAGACAGTCATAAGTGCGGCGACGAT
+AGTCATGCCCCGCGCCCACCGGAAGGAGCTGACTGGGTTG
+AAGGCTCTCAAGGGCATCGGTCGAGATCCCGGTGCCTAAT
+GAGTGAGCTAACTTACATTAATTGCGTTGCGCTCACTGCC
+CGCTTTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAA
+TGAATCGGCCAACGCGCGGGGAGAGGCGGTTTGCGTATTG
+GGCGCCAGGGTGGTTTTTCTTTTCACCAGTGAGACGGGCA
+ACAGCTGATTGCCCTTCACCGCCTGGCCCTGAGAGAGTTG
+CAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAA
+TCCTGTTTGATGGTGGTTAACGGCGGGATATAACATGAGC
+TGTCTTCGGTATCGTCGTATCCCACTACCGAGATATCCGC
+ACCAACGCGCAGCCCGGACTCGGTAATGGCGCGCATTGCG
+CCCAGCGCCATCTGATCGTTGGCAACCAGCATCGCAGTGG
+GAACGATGCCCTCATTCAGCATTTGCATGGTTTGTTGAAA
+ACCGGACATGGCACTCCAGTCGCCTTCCCGTTCCGCTATC
+GGCTGAATTTGATTGCGAGTGAGATATTTATGCCAGCCAG
+CCAGACGCAGACGCGCCGAGACAGAACTTAATGGGCCCGC
+TAACAGCGCGATTTGCTGGTGACCCAATGCGACCAGATGC
+TCCACGCCCAGTCGCGTACCGTCTTCATGGGAGAAAATAA
+TACTGTTGATGGGTGTCTGGTCAGAGACATCAAGAAATAA
+CGCCGGAACATTAGTGCAGGCAGCTTCCACAGCAATGGCA
+TCCTGGTCATCCAGCGGATAGTTAATGATCAGCCCACTGA
+CGCGTTGCGCGAGAAGATTGTGCACCGCCGCTTTACAGGC
+TTCGACGCCGCTTCGTTCTACCATCGACACCACCACGCTG
+GCACCCAGTTGATCGGCGCGAGATTTAATCGCCGCGACAA
+TTTGCGACGGCGCGTGCAGGGCCAGACTGGAGGTGGCAAC
+GCCAATCAGCAACGACTGTTTGCCCGCCAGTTGTTGTGCC
+ACGCGGTTGGGAATGTAATTCAGCTCCGCCATCGCCGCTT
+CCACTTTTTCCCGCGTTTTCGCAGAAACGTGGCTGGCCTG
+GTTCACCACGCGGGAAACGGTCTGATAAGAGACACCGGCA
+TACTCTGCGACATCGTATAACGTTACTGGTTTCACATTCA
+CCACCCTGAATTGACTCTCTTCCGGGCGCTATCATGCCAT
+ACCGCGAAAGGTTTTGCGCCATTCGATGGTGTCCGGGATC
+TCGACGCTCTCCCTTATGCGACTCCTGCATTAGGAAGCAG
+CCCAGTAGTAGGTTGAGGCCGTTGAGCACCGCCGCCGCAA
+GGAATGGTGCATGCAAGGAGATGGCGCCCAACAGTCCCCC
+GGCCACGGGGCCTGCCACCATACCCACGCCGAAACAAGCG
+CTCATGAGCCCGAAGTGGCGAGCCCGATCTTCCCCATCGG
+TGATGTCGGCGATATAGGCGCCAGCAACCGCACCTGTGGC
+GCCGGTGATGCCGGCCACGATGCGTCCGGCGTAGAGGATC
+GAGATCTCGATCCCGCGAAATTAATACGACTCACTATAGG
+GGAATTGTGAGCGGATAACAATTCCCCTCTAGAAATAATT
+TTGTTTAACTTTAAGAAGGAGACTCGAGATGAATATGGAA
+ACTAATTCACATACAACAAGTCCATACATTCAGCTTATAG
+AGCAAATTGCAGTTCTACAGCAGGAAGCAAAGCGACTGCG
+AGAGCAGGAAGTTCAAAGTGTAATTGAGTCGATTCAGAAG
+CAGATTACTTATTACAATATAACCTTACAAGAGCTGGGAT
+ATACTAATGTGCCTGATGATGGACTCGCTCGCCGGAACTC
+ATCGAAAGGTGTTTACTACCGCAATGAAGAAGGGCAGACC
+TGGTCGGGCGTAGGCCGACAGCCACGCTGGCTTAAAGAAG
+CACTGTTGAATGGAATGAAGAAAGAAGATTTTCTTGTGAA
+GGACACTGAAGAAGAAATAATACCGCTGAAAAATATTGGT
+ACCGGATCCGAAAACCTGTACTTCCAGGGTCAATTCAGCA
+AAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGT
+TGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTC
+AGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAACTCA
+CCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCC
+ATGGCCAACACTTGTCACTACTCTGACCTATGGTGTTCAA
+TGCTTTTCCCGTTATCCGGATCACATGAAACGGCATGACT
+TTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACG
+CACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGT
+GCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTA
+TCGAGTTAAAGGGTATTGATTTTAAAGAAGATGGAAACAT
+TCTCGGACACAAACTAGAGTACAACTATAACTCACACAAT
+GTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAG
+CTAACTTCAAAATTCGCCACAACATTGAAGATGGTTCCGT
+TCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGC
+GATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGA
+CACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGA
+CCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATT
+ACACATGGCATGGATGAGCTCTACAAAAAGCTTGCGGCCC
+ATCATCATCACCACCACCACCACTGAGATCCGGCTGCTAA
+CAAAGCCCGAAAGGAAGCTGAGTTGGCTGCTGCCACCGCT
+GAGCAATAACTAGCATAACCCCTTGGGGCCTCTAAACGGG
+TCTTGAGGGGTTTTTTGCTGAAAGGAGGAACTATATCCGG
+AT
Please sign in to comment.
Something went wrong with that request. Please try again.