diff --git a/.gitignore b/.gitignore index af0d0de..ea01ac8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,5 @@ # For zika-seq repo # environment* -data/libraries/ -scripts/ssw_lib.py # Jekyll # ########## diff --git a/README.md b/README.md index 626d635..8872606 100644 --- a/README.md +++ b/README.md @@ -1,55 +1,68 @@ # Experimental protocols and bioinformatic pipelines for Zika genome sequencing +#### Allison Black1,2, Barney Potter2, Nicholas J. Loman3, Trevor Bedford2 + +1Department of Epidemiology, University of Washington, Seattle, WA, USA, 2Vaccine and Infectious Disease Division, Fred Hutchinson Cancer Research Center, Seattle, WA, USA, 3Institute of Microbiology and Infection, University of Birmingham, Birmingham, UK + ## Install -Clone the repo: +Clone the repo and load submodules: git clone https://github.com/blab/zika-seq.git + git submodule update --init --recursive -Install Python dependencies: +## Data sync - pip install -r requirements.txt +Primary sequencing data lives on the Rhino FHCRC cluster at: -Install [SSW Library](https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library): + /fh/fast/bedford_t/data/ - git clone https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library.git - cd Complete-Striped-Smith-Waterman-Library/src/ - cp libssw.so /zika-seq/scripts/ - cp ssw_lib.py /zika-seq/scripts/ +And locally on Meristem drive at: -Install [marginAlign](https://github.com/benedictpaten/marginAlign): + /Volumes/Meristem/data/ - git clone https://github.com/benedictpaten/marginAlign.git - cd marginAlign - git submodule update --init --recursive - make - export PATH=/marginAlign/:$PATH +To sync Meristem to Rhino, run: -Install [samtools](https://github.com/samtools/samtools): + rsync -azP tbedford@rhino.fhcrc.org:/fh/fast/bedford_t/data/ /Volumes/Meristem/data/ - brew tap homebrew/science - brew install samtools +Replacing `tbedford` with your username. -## Data sync +This `data/` directory is assumed to follow [a particular schema](https://github.com/blab/zika-seq/blob/master/data-schema.md). -From `zika-seq` run: +## Bioinformatic pipeline - rsync -azP tbedford@rhino.fhcrc.org:/fh/fast/bedford_t/zika-seq/data/ data/ +Here, we use the ZiBRA project bioinformatic pipeline at [zibraproject/zika-pipeline](https://github.com/zibraproject/zika-pipeline/). This pipeline is instantiated in the Docker image [zibra/zibra](https://hub.docker.com/r/zibra/zibra/). Data processing is done using Docker. -Replacing `tbedford` with your username. +### Data volume -## Bioinformatic pipeline +Create a named data volume that mirrors local `data/` to `data/` within container: + + docker create --name zibra-data -v /Volumes/Meristem/data:/data zibra/zibra + +This is to get data into the Docker container. Note that the path to local directory has to be an absolute path. + +Create a named data volume for a single sample: + + docker create --name zibra-data-lb01-nb01 -v /Volumes/Meristem/data/usvi-library1-2016-12-10/basecalled_reads/pass_demultiplex/NB01:/data zibra/zibra + +### Build volume + +Create a named data volume that mirrors local `build/` to `build/` within container: + + docker create --name zibra-build -v /Volumes/Meristem/build:/build zibra/zibra + +This is to get data out of the Docker container. Note that the path to local directory has to be an absolute path. -Data lives in the [`data/`](data/) directory and is not versioned within the repo. Directory structure described in its [README.md](data/). +### Start -### Base calling +Enter docker image: -Convert raw MinION output to FAST5 + docker run -t -i --volumes-from zibra-data --volumes-from zibra-build zibra/zibra /bin/bash - metrichor-cli -a -w 1289 -f - -i -o downloads +Run single sample script within image: -### Run pipeline + ./scripts/go_single_sample_r94.sh refs/KJ776791.2.fasta NB03 metadata/v2_500.amplicons.ver2.bed -Run poretools, marginAlign, samtools: +## Results - python run.py + * [Initial coverage results from first library are here](depth-coverage/) diff --git a/data-schema.md b/data-schema.md new file mode 100644 index 0000000..bf1b524 --- /dev/null +++ b/data-schema.md @@ -0,0 +1,20 @@ +# Data schema + +## Nanopore reads + +Input data to the Zika pipeline arrives in the `data/` directory. This should be [mounted to `data/` in the Docker container](https://github.com/blab/zika-seq#data-volume). + + - `data` + - `usvi-library1-2016-12-10` - library + - `raw_reads` - squiggle graphs in fast5 format + - `pass` - contains `.fast5` files + - `basecalled_reads` - basecalled with Metrichor + - `pass_demultiplex` - demultiplexed basecalled reads + - `NB01` - contains `.fast5` files for NB01 barcode + - `NB02` - contains `.fast5` files for NB02 barcode + - etc... + - `nonNB_demultiplexed` - demultiplexed basecalled reads + - `BC01` - contains `.fast5` files for BC01 barcode + - `BC02` - contains `.fast5` files for BC02 barcode + - etc... + - `fail` - contains `.fast5` files that weren't demultiplexed diff --git a/data/README.md b/data/README.md deleted file mode 100644 index 4824199..0000000 --- a/data/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# Data organization - - - `libraries` - - `usvi-library1-2016-12-10` - - `raw_reads`: FAST5 reads before Metrichor processing - - `basecalled_reads`: FAST5 reads after Metrichor processing - - `pass_demultiplex`: - - `NB01`, etc...: List of `.fast5` files - - `fail`: List of `.fast5` files diff --git a/depth_coverage/README.md b/depth-coverage/README.md similarity index 100% rename from depth_coverage/README.md rename to depth-coverage/README.md diff --git a/scripts/depth_coverage.R b/depth-coverage/depth_coverage.R similarity index 100% rename from scripts/depth_coverage.R rename to depth-coverage/depth_coverage.R diff --git a/depth_coverage/figures/Coverage-Overlap-NB01-NB07.png b/depth-coverage/figures/Coverage-Overlap-NB01-NB07.png similarity index 100% rename from depth_coverage/figures/Coverage-Overlap-NB01-NB07.png rename to depth-coverage/figures/Coverage-Overlap-NB01-NB07.png diff --git a/depth_coverage/figures/Coverage-Overlap-NB02-NB08.png b/depth-coverage/figures/Coverage-Overlap-NB02-NB08.png similarity index 100% rename from depth_coverage/figures/Coverage-Overlap-NB02-NB08.png rename to depth-coverage/figures/Coverage-Overlap-NB02-NB08.png diff --git a/depth_coverage/figures/Coverage-Overlap-NB03-NB09.png b/depth-coverage/figures/Coverage-Overlap-NB03-NB09.png similarity index 100% rename from depth_coverage/figures/Coverage-Overlap-NB03-NB09.png rename to depth-coverage/figures/Coverage-Overlap-NB03-NB09.png diff --git a/depth_coverage/figures/Coverage-Overlap-NB04-NB10.png b/depth-coverage/figures/Coverage-Overlap-NB04-NB10.png similarity index 100% rename from depth_coverage/figures/Coverage-Overlap-NB04-NB10.png rename to depth-coverage/figures/Coverage-Overlap-NB04-NB10.png diff --git a/depth_coverage/figures/Coverage-Overlap-NB05-NB11.png b/depth-coverage/figures/Coverage-Overlap-NB05-NB11.png similarity index 100% rename from depth_coverage/figures/Coverage-Overlap-NB05-NB11.png rename to depth-coverage/figures/Coverage-Overlap-NB05-NB11.png diff --git a/depth_coverage/figures/Coverage-Overlap-NB06-NB12.png b/depth-coverage/figures/Coverage-Overlap-NB06-NB12.png similarity index 100% rename from depth_coverage/figures/Coverage-Overlap-NB06-NB12.png rename to depth-coverage/figures/Coverage-Overlap-NB06-NB12.png diff --git a/refs/Zika_Africa.fasta b/refs/Zika_Africa.fasta deleted file mode 100755 index c474f9a..0000000 --- a/refs/Zika_Africa.fasta +++ /dev/null @@ -1,2 +0,0 @@ ->gi|226377833|ref|NC_012532.1| Zika virus, complete genome -AGTTGTTGATCTGTGTGAGTCAGACTGCGACAGTTCGAGTCTGAAGCGAGAGCTAACAACAGTATCAACAGGTTTAATTTGGATTTGGAAACGAGAGTTTCTGGTCATGAAAAACCCCAAAGAAGAAATCCGGAGGATCCGGATTGTCAATATGCTAAAACGCGGAGTAGCCCGTGTAAACCCCTTGGGAGGTTTGAAGAGGTTGCCAGCCGGACTTCTGCTGGGTCATGGACCCATCAGAATGGTTTTGGCGATACTAGCCTTTTTGAGATTTACAGCAATCAAGCCATCACTGGGCCTTATCAACAGATGGGGTTCCGTGGGGAAAAAAGAGGCTATGGAAATAATAAAGAAGTTCAAGAAAGATCTTGCTGCCATGTTGAGAATAATCAATGCTAGGAAAGAGAGGAAGAGACGTGGCGCAGACACCAGCATCGGAATCATTGGCCTCCTGCTGACTACAGCCATGGCAGCAGAGATCACTAGACGCGGGAGTGCATACTACATGTACTTGGATAGGAGCGATGCCGGGAAGGCCATTTCGTTTGCTACCACATTGGGAGTGAACAAGTGCCACGTACAGATCATGGACCTCGGGCACATGTGTGACGCCACCATGAGTTATGAGTGCCCTATGCTGGATGAGGGAGTGGAACCAGATGATGTCGATTGCTGGTGCAACACGACATCAACTTGGGTTGTGTACGGAACCTGTCATCACAAAAAAGGTGAGGCACGGCGATCTAGAAGAGCCGTGACGCTCCCTTCTCACTCTACAAGGAAGTTGCAAACGCGGTCGCAGACCTGGTTAGAATCAAGAGAATACACGAAGCACTTGATCAAGGTTGAAAACTGGATATTCAGGAACCCCGGGTTTGCGCTAGTGGCCGTTGCCATTGCCTGGCTTTTGGGAAGCTCGACGAGCCAAAAAGTCATATACTTGGTCATGATACTGCTGATTGCCCCGGCATACAGTATCAGGTGCATTGGAGTCAGCAATAGAGACTTCGTGGAGGGCATGTCAGGTGGGACCTGGGTTGATGTTGTCTTGGAACATGGAGGCTGCGTTACCGTGATGGCACAGGACAAGCCAACAGTCGACATAGAGTTGGTCACGACGACGGTTAGTAACATGGCCGAGGTAAGATCCTATTGCTACGAGGCATCGATATCGGACATGGCTTCGGACAGTCGTTGCCCAACACAAGGTGAAGCCTACCTTGACAAGCAATCAGACACTCAATATGTCTGCAAAAGAACATTAGTGGACAGAGGTTGGGGAAACGGTTGTGGACTTTTTGGCAAAGGGAGCTTGGTGACATGTGCCAAGTTTACGTGTTCTAAGAAGATGACCGGGAAGAGCATTCAACCGGAAAATCTGGAGTATCGGATAATGCTATCAGTGCATGGCTCCCAGCATAGCGGGATGATTGGATATGAAACTGACGAAGATAGAGCGAAAGTCGAGGTTACGCCTAATTCACCAAGAGCGGAAGCAACCTTGGGAGGCTTTGGAAGCTTAGGACTTGACTGTGAACCAAGGACAGGCCTTGACTTTTCAGATCTGTATTACCTGACCATGAACAATAAGCATTGGTTGGTGCACAAAGAGTGGTTTCATGACATCCCATTGCCTTGGCATGCTGGGGCAGACACCGGAACTCCACACTGGAACAACAAAGAGGCATTGGTAGAATTCAAGGATGCCCACGCCAAGAGGCAAACCGTCGTCGTTCTGGGGAGCCAGGAAGGAGCCGTTCACACGGCTCTCGCTGGAGCTCTAGAGGCTGAGATGGATGGTGCAAAGGGAAGGCTGTTCTCTGGCCATTTGAAATGCCGCCTAAAAATGGACAAGCTTAGATTGAAGGGCGTGTCATATTCCTTGTGCACTGCGGCATTCACATTCACCAAGGTCCCAGCTGAAACACTGCATGGAACAGTCACAGTGGAGGTGCAGTATGCAGGGACAGATGGACCCTGCAAGATCCCAGTCCAGATGGCGGTGGACATGCAGACCCTGACCCCAGTTGGAAGGCTGATAACCGCCAACCCCGTGATTACTGAAAGCACTGAGAACTCAAAGATGATGTTGGAGCTTGACCCACCATTTGGGGATTCTTACATTGTCATAGGAGTTGGGGACAAGAAAATCACCCACCACTGGCATAGGAGTGGTAGCACCATCGGAAAGGCATTTGAGGCCACTGTGAGAGGCGCCAAGAGAATGGCAGTCCTGGGGGATACAGCCTGGGACTTCGGATCAGTCGGGGGTGTGTTCAACTCACTGGGTAAGGGCATTCACCAGATTTTTGGAGCAGCCTTCAAATCACTGTTTGGAGGAATGTCCTGGTTCTCACAGATCCTCATAGGCACGCTGCTAGTGTGGTTAGGTTTGAACACAAAGAATGGATCTATCTCCCTCACATGCTTGGCCCTGGGGGGAGTGATGATCTTCCTCTCCACGGCTGTTTCTGCTGACGTGGGGTGCTCAGTGGACTTCTCAAAAAAGGAAACGAGATGTGGCACGGGGGTATTCATCTATAATGATGTTGAAGCCTGGAGGGACCGGTACAAGTACCATCCTGACTCCCCCCGCAGATTGGCAGCAGCAGTCAAGCAGGCCTGGGAAGAGGGGATCTGTGGGATCTCATCCGTTTCAAGAATGGAAAACATCATGTGGAAATCAGTAGAAGGGGAGCTCAATGCTATCCTAGAGGAGAATGGAGTTCAACTGACAGTTGTTGTGGGATCTGTAAAAAACCCCATGTGGAGAGGTCCACAAAGATTGCCAGTGCCTGTGAATGAGCTGCCCCATGGCTGGAAAGCCTGGGGGAAATCGTATTTTGTTAGGGCGGCAAAGACCAACAACAGTTTTGTTGTCGACGGTGACACACTGAAGGAATGTCCGCTTGAGCACAGAGCATGGAATAGTTTTCTTGTGGAGGATCACGGGTTTGGAGTCTTCCACACCAGTGTCTGGCTTAAGGTCAGAGAAGATTACTCATTAGAATGTGACCCAGCCGTCATAGGAACAGCTGTTAAGGGAAGGGAGGCCGCGCACAGTGATCTGGGCTATTGGATTGAAAGTGAAAAGAATGACACATGGAGGCTGAAGAGGGCCCACCTGATTGAGATGAAAACATGTGAATGGCCAAAGTCTCACACATTGTGGACAGATGGAGTAGAAGAAAGTGATCTTATCATACCCAAGTCTTTAGCTGGTCCACTCAGCCACCACAACACCAGAGAGGGTTACAGAACCCAAGTGAAAGGGCCATGGCACAGTGAAGAGCTTGAAATCCGGTTTGAGGAATGTCCAGGCACCAAGGTTTACGTGGAGGAGACATGCGGAACTAGAGGACCATCTCTGAGATCAACTACTGCAAGTGGAAGGGTCATTGAGGAATGGTGCTGTAGGGAATGCACAATGCCCCCACTATCGTTTCGAGCAAAAGACGGCTGCTGGTATGGAATGGAGATAAGGCCCAGGAAAGAACCAGAGAGCAACTTAGTGAGGTCAATGGTGACAGCGGGGTCAACCGATCATATGGACCACTTCTCTCTTGGAGTGCTTGTGATTCTACTCATGGTGCAGGAGGGGTTGAAGAAGAGAATGACCACAAAGATCATCATGAGCACATCAATGGCAGTGCTGGTAGTCATGATCTTGGGAGGATTTTCAATGAGTGACCTGGCCAAGCTTGTGATCCTGATGGGTGCTACTTTCGCAGAAATGAACACTGGAGGAGATGTAGCTCACTTGGCATTGGTAGCGGCATTTAAAGTCAGACCAGCCTTGCTGGTCTCCTTCATTTTCAGAGCCAATTGGACACCCCGTGAGAGCATGCTGCTAGCCCTGGCTTCGTGTCTTCTGCAAACTGCGATCTCTGCTCTTGAAGGTGACTTGATGGTCCTCATTAATGGATTTGCTTTGGCCTGGTTGGCAATTCGAGCAATGGCCGTGCCACGCACTGACAACATCGCTCTACCAATCTTGGCTGCTCTAACACCACTAGCTCGAGGCACACTGCTCGTGGCATGGAGAGCGGGCCTGGCTACTTGTGGAGGGATCATGCTCCTCTCCCTGAAAGGGAAAGGTAGTGTGAAGAAGAACCTGCCATTTGTCATGGCCCTGGGATTGACAGCTGTGAGGGTAGTAGACCCTATTAATGTGGTAGGACTACTGTTACTCACAAGGAGTGGGAAGCGGAGCTGGCCCCCTAGTGAAGTTCTCACAGCCGTTGGCCTGATATGTGCACTGGCCGGAGGGTTTGCCAAGGCAGACATTGAGATGGCTGGACCCATGGCTGCAGTAGGCTTGCTAATTGTCAGCTATGTGGTCTCGGGAAAGAGTGTGGACATGTACATTGAAAGAGCAGGTGACATCACATGGGAAAAGGACGCGGAAGTCACTGGAAACAGTCCTCGGCTTGACGTGGCACTGGATGAGAGTGGTGACTTCTCCTTGGTAGAGGAAGATGGTCCACCCATGAGAGAGATCATACTCAAGGTGGTCCTGATGGCCATCTGTGGCATGAACCCAATAGCTATACCTTTTGCTGCAGGAGCGTGGTATGTGTATGTGAAGACTGGGAAAAGGAGTGGCGCCCTCTGGGACGTGCCTGCTCCCAAAGAAGTGAAGAAAGGAGAGACCACAGATGGAGTGTACAGAGTGATGACTCGCAGACTGCTAGGTTCAACACAGGTTGGAGTGGGAGTCATGCAAGAGGGAGTCTTCCACACCATGTGGCACGTTACAAAAGGAGCCGCACTGAGGAGCGGTGAGGGAAGACTTGATCCATACTGGGGGGATGTCAAGCAGGACTTGGTGTCATACTGTGGGCCTTGGAAGTTGGATGCAGCTTGGGATGGACTCAGCGAGGTACAGCTTTTGGCCGTACCTCCCGGAGAGAGGGCCAGAAACATTCAGACCCTGCCTGGAATATTCAAGACAAAGGACGGGGACATCGGAGCAGTTGCTCTGGACTACCCTGCAGGGACCTCAGGATCTCCGATCCTAGACAAATGTGGAAGAGTGATAGGACTCTATGGCAATGGGGTTGTGATCAAGAATGGAAGCTATGTTAGTGCTATAACCCAGGGAAAGAGGGAGGAGGAGACTCCGGTTGAATGTTTCGAACCCTCGATGCTGAAGAAGAAGCAGCTAACTGTCTTGGATCTGCATCCAGGAGCCGGAAAAACCAGGAGAGTTCTTCCTGAAATAGTCCGTGAAGCCATAAAAAAGAGACTCCGGACAGTGATCTTGGCACCAACTAGGGTTGTCGCTGCTGAGATGGAGGAGGCCTTGAGAGGACTTCCGGTGCGTTACATGACAACAGCAGTCAACGTCACCCATTCTGGGACAGAAATCGTTGATTTGATGTGCCATGCCACTTTCACTTCACGCTTACTACAACCCATCAGAGTCCCTAATTACAATCTCAACATCATGGATGAAGCCCACTTCACAGACCCCTCAAGTATAGCTGCAAGAGGATACATATCAACAAGGGTTGAAATGGGCGAGGCGGCTGCCATTTTTATGACTGCCACACCACCAGGAACCCGTGATGCGTTTCCTGACTCTAACTCACCAATCATGGACACAGAAGTGGAAGTCCCAGAGAGAGCCTGGAGCTCAGGCTTTGATTGGGTGACAGACCATTCTGGGAAAACAGTTTGGTTCGTTCCAAGCGTGAGAAACGGAAATGAAATCGCAGCCTGTCTGACAAAGGCTGGAAAGCGGGTCATACAGCTCAGCAGGAAGACTTTTGAGACAGAATTTCAGAAAACAAAAAATCAAGAGTGGGACTTTGTCATAACAACTGACATCTCAGAGATGGGCGCCAACTTCAAGGCTGACCGGGTCATAGACTCTAGGAGATGCCTAAAACCAGTCATACTTGATGGTGAGAGAGTCATCTTGGCTGGGCCCATGCCTGTCACGCATGCTAGTGCTGCTCAGAGGAGAGGACGTATAGGCAGGAACCCTAACAAACCTGGAGATGAGTACATGTATGGAGGTGGGTGTGCAGAGACTGATGAAGGCCATGCACACTGGCTTGAAGCAAGAATGCTTCTTGACAACATCTACCTCCAGGATGGCCTCATAGCCTCGCTCTATCGGCCTGAGGCCGATAAGGTAGCCGCCATTGAGGGAGAGTTTAAGCTGAGGACAGAGCAAAGGAAGACCTTCGTGGAACTCATGAAGAGAGGAGACCTTCCCGTCTGGCTAGCCTATCAGGTTGCATCTGCCGGAATAACTTACACAGACAGAAGATGGTGCTTTGATGGCACAACCAACAACACCATAATGGAAGACAGTGTACCAGCAGAGGTTTGGACAAAGTATGGAGAGAAGAGAGTGCTCAAACCGAGATGGATGGATGCTAGGGTCTGTTCAGACCATGCGGCCCTGAAGTCGTTCAAAGAATTCGCCGCTGGAAAAAGAGGAGCGGCTTTGGGAGTAATGGAGGCCCTGGGAACACTGCCAGGACACATGACAGAGAGGTTTCAGGAAGCCATTGACAACCTCGCCGTGCTCATGCGAGCAGAGACTGGAAGCAGGCCTTATAAGGCAGCGGCAGCCCAACTGCCGGAGACCCTAGAGACCATTATGCTCTTAGGTTTGCTGGGAACAGTTTCACTGGGGATCTTCTTCGTCTTGATGCGGAATAAGGGCATCGGGAAGATGGGCTTTGGAATGGTAACCCTTGGGGCCAGTGCATGGCTCATGTGGCTTTCGGAAATTGAACCAGCCAGAATTGCATGTGTCCTCATTGTTGTGTTTTTATTACTGGTGGTGCTCATACCCGAGCCAGAGAAGCAAAGATCTCCCCAAGATAACCAGATGGCAATTATCATCATGGTGGCAGTGGGCCTTCTAGGTTTGATAACTGCAAACGAACTTGGATGGCTGGAAAGAACAAAAAATGACATAGCTCATCTAATGGGAAGGAGAGAAGAAGGAGCAACCATGGGATTCTCAATGGACATTGATCTGCGGCCAGCCTCCGCCTGGGCTATCTATGCCGCATTGACAACTCTCATCACCCCAGCTGTCCAACATGCGGTAACCACTTCATACAACAACTACTCCTTAATGGCGATGGCCACACAAGCTGGAGTGCTGTTTGGCATGGGCAAAGGGATGCCATTTATGCATGGGGACCTTGGAGTCCCGCTGCTAATGATGGGTTGCTATTCACAATTAACACCCCTGACTCTGATAGTAGCTATCATTCTGCTTGTGGCGCACTACATGTACTTGATCCCAGGCCTACAAGCGGCAGCAGCGCGTGCTGCCCAGAAAAGGACAGCAGCTGGCATCATGAAGAATCCCGTTGTGGATGGAATAGTGGTAACTGACATTGACACAATGACAATAGACCCCCAGGTGGAGAAGAAGATGGGACAAGTGTTACTCATAGCAGTAGCCATCTCCAGTGCTGTGCTGCTGCGGACCGCCTGGGGATGGGGGGAGGCTGGAGCTCTGATCACAGCAGCGACCTCCACCTTGTGGGAAGGCTCTCCAAACAAATACTGGAACTCCTCTACAGCCACCTCACTGTGCAACATCTTCAGAGGAAGCTATCTGGCAGGAGCTTCCCTTATCTATACAGTGACGAGAAACGCTGGCCTGGTTAAGAGACGTGGAGGTGGGACGGGAGAGACTCTGGGAGAGAAGTGGAAAGCTCGTCTGAATCAGATGTCGGCCCTGGAGTTCTACTCTTATAAAAAGTCAGGTATCACTGAAGTGTGTAGAGAGGAGGCTCGCCGTGCCCTCAAGGATGGAGTGGCCACAGGAGGACATGCCGTATCCCGGGGAAGTGCAAAGATCAGATGGTTGGAGGAGAGAGGATATCTGCAGCCCTATGGGAAGGTTGTTGACCTCGGATGTGGCAGAGGGGGCTGGAGCTATTATGCCGCCACCATCCGCAAAGTGCAGGAGGTGAGAGGATACACAAAGGGAGGTCCCGGTCATGAAGAACCCATGCTGGTGCAAAGCTATGGGTGGAACATAGTTCGTCTCAAGAGTGGAGTGGACGTCTTCCACATGGCGGCTGAGCCGTGTGACACTCTGCTGTGTGACATAGGTGAGTCATCATCTAGTCCTGAAGTGGAAGAGACACGAACACTCAGAGTGCTCTCTATGGTGGGGGACTGGCTTGAAAAAAGACCAGGGGCCTTCTGTATAAAGGTGCTGTGCCCATACACCAGCACTATGATGGAAACCATGGAGCGACTGCAACGTAGGCATGGGGGAGGATTAGTCAGAGTGCCATTGTGTCGCAACTCCACACATGAGATGTACTGGGTCTCTGGGGCAAAGAGCAACATCATAAAAAGTGTGTCCACCACAAGTCAGCTCCTCCTGGGACGCATGGATGGCCCCAGGAGGCCAGTGAAATATGAGGAGGATGTGAACCTCGGCTCGGGTACACGAGCTGTGGCAAGCTGTGCTGAGGCTCCTAACATGAAAATCATCGGCAGGCGCATTGAGAGAATCCGCAATGAACATGCAGAAACATGGTTTCTTGATGAAAACCACCCATACAGGACATGGGCCTACCATGGGAGCTACGAAGCCCCCACGCAAGGATCAGCGTCTTCCCTCGTGAACGGGGTTGTTAGACTCCTGTCAAAGCCTTGGGACGTGGTGACTGGAGTTACAGGAATAGCCATGACTGACACCACACCATACGGCCAACAAAGAGTCTTCAAAGAAAAAGTGGACACCAGGGTGCCAGATCCCCAAGAAGGCACTCGCCAGGTAATGAACATAGTCTCTTCCTGGCTGTGGAAGGAGCTGGGGAAACGCAAGCGGCCACGCGTCTGCACCAAAGAAGAGTTTATCAACAAGGTGCGCAGCAATGCAGCACTGGGAGCAATATTTGAAGAGGAAAAAGAATGGAAGACGGCTGTGGAAGCTGTGAATGATCCAAGGTTTTGGGCCCTAGTGGATAGGGAGAGAGAACACCACCTGAGAGGAGAGTGTCACAGCTGTGTGTACAACATGATGGGAAAAAGAGAAAAGAAGCAAGGAGAGTTCGGGAAAGCAAAAGGTAGCCGCGCCATCTGGTACATGTGGTTGGGAGCCAGATTCTTGGAGTTTGAAGCCCTTGGATTCTTGAACGAGGACCATTGGATGGGAAGAGAAAACTCAGGAGGTGGAGTCGAAGGGTTAGGATTGCAAAGACTTGGATACATTCTAGAAGAAATGAATCGGGCACCAGGAGGAAAGATGTACGCAGATGACACTGCTGGCTGGGACACCCGCATTAGTAAGTTTGATCTGGAGAATGAAGCTCTGATTACCAACCAAATGGAGGAAGGGCACAGAACTCTGGCGTTGGCCGTGATTAAATACACATACCAAAACAAAGTGGTGAAGGTTCTCAGACCAGCTGAAGGAGGAAAAACAGTTATGGACATCATTTCAAGACAAGACCAGAGAGGGAGTGGACAAGTTGTCACTTATGCTCTCAACACATTCACCAACTTGGTGGTGCAGCTTATCCGGAACATGGAAGCTGAGGAAGTGTTAGAGATGCAAGACTTATGGTTGTTGAGGAAGCCAGAGAAAGTGACCAGATGGTTGCAGAGCAATGGATGGGATAGACTCAAACGAATGGCGGTCAGTGGAGATGACTGCGTTGTGAAGCCAATCGATGATAGGTTTGCACATGCCCTCAGGTTCTTGAATGACATGGGAAAAGTTAGGAAAGACACACAGGAGTGGAAACCCTCGACTGGATGGAGCAATTGGGAAGAAGTCCCGTTCTGCTCCCACCACTTCAACAAGCTGTACCTCAAGGATGGGAGATCCATTGTGGTCCCTTGCCGCCACCAAGATGAACTGATTGGCCGAGCTCGCGTCTCACCAGGGGCAGGATGGAGCATCCGGGAGACTGCCTGTCTTGCAAAATCATATGCGCAGATGTGGCAGCTCCTTTATTTCCACAGAAGAGACCTTCGACTGATGGCTAATGCCATTTGCTCGGCTGTGCCAGTTGACTGGGTACCAACTGGGAGAACCACCTGGTCAATCCATGGAAAGGGAGAATGGATGACCACTGAGGACATGCTCATGGTGTGGAATAGAGTGTGGATTGAGGAGAACGACCATATGGAGGACAAGACTCCTGTAACAAAATGGACAGACATTCCCTATCTAGGAAAAAGGGAGGACTTATGGTGTGGATCCCTTATAGGGCACAGACCCCGCACCACTTGGGCTGAAAACATCAAAGACACAGTCAACATGGTGCGCAGGATCATAGGTGATGAAGAAAAGTACATGGACTATCTATCCACCCAAGTCCGCTACTTGGGTGAGGAAGGGTCCACACCCGGAGTGTTGTAAGCACCAATTTTAGTGTTGTCAGGCCTGCTAGTCAGCCACAGTTTGGGGAAAGCTGTGCAGCCTGTAACCCCCCCAGGAGAAGCTGGGAAACCAAGCTCATAGTCAGGCCGAGAACGCCATGGCACGGAAGAAGCCATGCTGCCTGTGAGCCCCTCAGAGGACACTGAGTCAAAAAACCCCACGCGCTTGGAAGCGCAGGATGGGAAAAGAAGGTGGCGACCTTCCCCACCCTTCAATCTGGGGCCTGAACTGGAGACTAGCTGTGAATCTCCAGCAGAGGGACTAGTGGTTAGAGGAGACCCCCCGGAAAACGCAAAACAGCATATTGACGTGGGAAAGACCAGAGACTCCATGAGTTTCCACCACGCTGGCCGCCAGGCACAGATCGCCGAACTTCGGCGGCCGGTGTGGGGAAATCCATGGTTTCT diff --git a/refs/Zika_Brazil.fasta b/refs/Zika_Brazil.fasta deleted file mode 100755 index 51a5d63..0000000 --- a/refs/Zika_Brazil.fasta +++ /dev/null @@ -1,155 +0,0 @@ ->gi|992324757|gb|KU707826.1| Zika virus isolate SSABR1, complete genome -GACAGTTCGAGTTTGAAGCGAAAGCTAGCAACAGTATCAACAGGTTTTATTTGGATTTGGAAACGAGAGT -TTCTGGTCATGAAAAACCCAAAAAAGAAATCCGGAGGATTCCGGATTGTCAATATGCTAAAACGCGGAGT -AGCCCGTGTGAGCCCCTTTGGGGGCTTGAAGAGGCTGCCAGCCGGACTTCTGCTGGGTCATGGGCCCATC -AGGATGGTCTTGGCGATTCTAGCCTTTTTGAGATTCACGGCAATCAAGCCATCACTGGGTCTCATCAATA -GATGGGGTTCAGTGGGGAAAAAAGAGGCTATGGAAATAATAAAGAAGTTCAAGAAAGATCTGGCTGCCAT -GCTGAGAATAATCAATGCTAGGAAGGAGAAGAAGAGACGAGGCGCAGATACTAGTGTCGGAATTGTTGGC -CTCCTGCTGACCACAGCTATGGCAGCGGAGGTCACTAGACGTGGGAGTGCATACTATATGTACTTGGACA -GAAACGATGCTGGGGAGGCCATATCTTTTCCAACCACATTGGGGATGAATAAGTGTTATATACAGATCAT -GGATCTTGGACACATGTGTGATGCCACCATGAGCTATGAATGCCCTATGCTGGATGAGGGGGTGGAACCA -GATGACGTCGATTGTTGGTGCAACACGACGTCAACTTGGGTTGTGTACGGAACCTGCCATCACAAAAAAG -GTGAAGCACGGAGATCTAGAAGAGCTGTGACGCTCCCCTCCCATTCCACTAGGAAGCTGCAAACGCGGTC -GCAAACCTGGTTGGAATCAAGAGAATACACAAAGCACTTGATTAGAGTCGAAAATTGGATATTCAGGAAC -CCTGGCTTCGCGTTAGCAGCAGCTGCCATCGCTTGGCTTTTGGGAAGCTCAACGAGCCAAAAAGTCATAT -ACTTGGTCATGATACTGCTGATTGCCCCGGCATACAGCATCAGGTGCATAGGAGTCAGCAATAGGGACTT -TGTGGAAGGTATGTCAGGTGGGACCTGGGTTGATGTTGTCTTGGAACATGGAGGTTGTGTCACCGTAATG -GCACAGGACAAACCGACTGTCGACATAGAGCTGGTTACAACAACAGTCAGCAACATGGCGGAGGTAAGAT -CCTACTGCTATGAGGCATCAATATCAGACATGGCTTCGGACAGCCGCTGCCCAACACAAGGTGAAGCCTA -CCTTGACAAGCAATCAGACACTCAATATGTCTGCAAAAGAACGTTAGTGGACAGAGGCTGGGGAAATGGA -TGTGGACTTTTTGGCAAAGGGAGCCTGGTGACATGCGCTAAGTTTGCATGCTCCAAGAAAATGACCGGGA -AGAGCATCCAGCCAGAGAATCTGGAGTACCGGATAATGCTGTCAGTTCATGGCTCCCAGCACAGTGGGAT -GATTGTTAATGACACAGGACATGAAACTGATGAGAATAGAGCGAAAGTTGAGATAACGCCCAATTCACCA -AGAGCCGAAGCCACCCTGGGGGGTTTTGGAAGCCTAGGACTTGATTGTGAACCGAGGACAGGCCTTGACT -TTTCAGATTTGTATTACTTGACTATGAATAACAAGCACTGGTTGGTTCACAAGGAGTGGTTCCACGACAT -TCCATTACCTTGGCACGCTGGGGCAGACACCGGAACTCCACACTGGAACAACAAAGAAGCACTGGTAGAG -TTCAAGGACGCACATGCCAAAAGGCAAACTGTCGTGGTTCTAGGGAGTCAAGAAGGAGCAGTTCACACGG -CCCTTGCTGGAGCTCTGGAGGCTGAGATGGATGGTGCAAAGGGAAGGCTGTCCTCTGGCCACTTGAAATG -TCGCCTGAAAATGGATAAACTTAGATTGAAGGGCGTGTCATACTCCTTGTGTACTGCAGCGTTCACATTC -ACCAAGATCCCGGCTGAAACACTGCACGGGACAGTCACAGTGGAGGTACAGTACGCAGGGACAGATGGAC -CTTGCAAGGTTCCAGCTCAGATGGCGGTGGACATGCAAACTCTGACCCCAGTTGGGAGGTTGATAACCGC -TAACCCCGTAATCACTGAAAGCACTGAGAACTCTAAGATGATGCTGGAACTTGATCCACCATTTGGGGAC -TCTTACATTGTCATAGGAGTCGGGGAGAAGAAGATCACCCACCACTGGCACAGGAGTGGCAGCACCATTG -GAAAAGCATTTGAAGCCACTGTGAGAGGTGCCAAGAGAATGGCAGTCTTGGGAGACACAGCCTGGGACTT -TGGATCAGTTGGAGGCGCTCTCAACTCATTGGGCAAGGGCATCCATCAAATTTTTGGAGCAGCTTTCAAA -TCATTGTTTGGAGGAATGTCCTGGTTCTCACAAATTCTCATTGGAACGTTGCTGATGTGGTTGGGTCTGA -ACACAAAGAATGGATCTATTTCCCTTATGTGCTTGGCCTTAGGGGGAGTGTTGATCTTCTTATCCACAGC -CGTCTCTGCTGATGTGGGGTGCTCGGTGGACTTCTCAAAGAAGGAGACGAGATGCGGTACAGGGGTGTTC -GTCTATAACGACGTTGAAGCCTGGAGGGACAGGTACAAGTACCATCCTGACTCCCCCCGTAGATTGGCAG -CAGCAGTCAAGCAAGCCTGGGAAGATGGTATCTGCGGGATCTCCTCTGTTTCAAGAATGGAAAACATCAT -GTGGAGATCAGTAGAAGGGGAGCTCAACGCAATCCTGGAAGAGAATGGAGTTCAACTGACGGTCGTTGTG -GGATCTGTAAAAAACCCCATGTGGAGAGGTCCACAGAGATTGCCCGTGCCTGTGAACGAGCTGCCCCACG -GCTGGAAGGCTTGGGGGAAATCGTACTTCGTCAGAGCAGCAAAGACAAATAACAGCTTTGTCGTGGATGG -TGACACACTGAAGGAATGCCCACTCAAACATAGAGCATGGAACAGCTTTCTTGTGGAGGATCATGGGTTC -GGGGTATTTCACACTAGTGTCTGGCTCAAGGTTAGAGAAGATTATTCATTAGAGTGTGATCCAGCCGTTA -TTGGAACAGCTGTTAAGGGAAAGGAGGCTGTACACAGTGATCTAGGCTACTGGATTGAGAGTGAGAAGAA -TGACACATGGAGGCTGAAGAGGGCCCATCTGATCGAGATGAAAACATGTGAATGGCCAAAGTCCCACACA -TTGTGGACAGATGGAATAGAAGAGAGTGATCTGATCATACCCAAGTCTTTAGCTGGGCCACTCAGCCATC -ACAATACCAGAGAGGGCTACAGGACCCAAATGAAAGGGCCATGGCACAGTGAAGAGCTTGAAATTCGGTT -TGAGGAATGCCCAGGCACTAAGGTCCACGTGGAGGAAACATGTGGAACAAGAGGACCATCTCTGAGATCA -ACCACTGCAAGCGGAAGGGTGATCGAGGAATGGTGCTGCAGGGAGTGCACAATGCCCCCACTGTCGTTCC -GGGCTAAAGATGGCTGTTGGTATGGAATGGAGATAAGGCCCAGGAAAGAACCAGAAAGCAACTTAGTAAG -GTCAATGGTGACTGCAGGATCAACTGATCACATGGACCACTTCTCCCTTGGAGTGCTTGTGATTCTGCTC -ATGGTGCAGGAAGGGCTGAAGAAGAGAATGACCACAAAGATCATCATAAGCACATCAATGGCAGTGCTGG -TAGCTATGATCCTGGGAGGATTTTCAATGAGTGACCTGGCTAAGCTTGCAATTTTGATGGGTGCCACCTT -CGCGGAAATGAACACTGGAGGAGATGTAGCTCATCTGGCGCTGATAGCGGCATTCAAAGTCAGACCAGCG -TTGCTGGTATCTTTCATCTTCAGAGCTAATTGGACACCCCGTGAAAGCATGCTGCTGGCCTTGGCCTCGT -GTCTTTTGCAAACTGCGATCTCCGCCTTGGAAGGCGACCTGATGGTTCTCATCAATGGTTTTGCTTTGGC -CTGGTTGGCAATACGAGCGATGGTTGTTCCACGCACTGATAACATCACCTTGGCAATCCTGGCTGCTCTG -ACACCACTGGCCCGGGGCACACTGCTTGTGGCGTGGAGAGCAGGCCTTGCTACTTGCGGGGGGTTTATGC -TCCTCTCTCTGAAGGGAAAAGGCAGTGTGAAGAAGAACTTACCATTTGTCATGGCCCTGGGACTAACCGC -TGTGAGGCTGGTCGACCCCATCAACGTGGTGGGACTGCTGTTGCTCACAAGGAGTGGGAAGCGGAGCTGG -CCCCCTAGCGAAGTACTCACAGCTGTTGGCCTGATATGCGCATTGGCTGGAGGGTTCGCCAAGGCAGATA -TAGAGATGGCTGGGCCCATGGCCGCGGTCGGTCTGCTAATTGTCAGTTACGTGGTCTCAGGAAAGAGTGT -GGACATGTACATTGAAAGAGCAGGTGACATCACATGGGAAAAAGATGCGGAAGTCACTGGAAACAGTCCC -CGGCTCGATGTGGCGCTAGATGAGAGTGGTGATTTCTCCCTGGTGGAGGATGACGGTCCCCCCATGAGAG -AGATCATACTCAAGGTGGTCCTGATGACCATCTGTGGCATGAACCCAATAGCCATACCCTTTGCAGCTGG -AGCGTGGTACGTATACGTGAAGACTGGAAAAAGGAGTGGTGCTCTATGGGATGTGCCTGCTCCCAAGGAA -GTAAAAAAGGGGGAGACCACAGATGGAGTGTACAGAGTAATGACTCGTAGACTGCTAGGTTCAACACAAG -TTGGAGTGGGAGTTATGCAAGAGGGGGTCTTTCACACTATGTGGCACGTCACAAAAGGATCCGCGCTGAG -AAGCGGTGAAGGGAGACTTGATCCATACTGGGGAGATGTCAAGCAGGATCTGGTGTCATACTGTGGTCCA -TGGAAGCTAGATGCCGCCTGGGACGGGCACAGCGAGGTGCAGCTCTTGGCCGTGCCCCCCGGAGAGAGAG -CGAGGAACATCCAGACTCTGCCCGGAATATTTAAGACAAAGGATGGGGACATTGGAGCGGTTGCGCTGGA -TTACCCAGCAGGAACTTCAGGATCTCCAATCCTAGACAAGTGTGGGAGAGTGATAGGACTTTATGGCAAT -GGGGTCGTGATCAAAAATGGGAGTTATGTTAGTGCCATCACCCAAGGGAGGAGGGAGGAAGAGACTCCTG -TTGAGTGCTTCGAGCCTTCGATGCTGAAGAAGAAGCAGCTAACTGTCTTAGACTTGCATCCTGGAGCTGG -GAAAACCAGGAGAGTTCTTCCTGAAATAGTCCGTGAAGCCATAAAAACAAGACTCCGTACTGTGATCTTA -GCTCCAACCAGGGTTGTCGCTGCTGAAATGGAGGAGGCCCTTAGAGGGCTTCCAGTGCGTTATATGACAA -CAGCAGTCAATGTCACCCACTCTGGAACAGAAATCGTCGACTTAATGTGCCATGCCACCTTCACTTCACG -TCTACTACAGCCAATCAGAGTCCCCAACTATAATCTGTATATTATGGATGAGGCCCACTTCACAGATCCC -TCAAGTATAGCAGCAAGAGGATACATTTCAACAAGGGTTGAGATGGGCGAGGCGGCTGCCATCTTCATGA -CCGCCACGCCACCAGGAACCCGTGACGCATTTCCGGACTCCAACTCACCAATTATGGACACCGAAGTGGA -AGTCCCAGAGAGAGCCTGGAGCTCAGGCTTTGATTGGGTGACGGATCATTCTGGAAAAACAGTTTGGTTT -GTTCCAAGCGTGAGGAACGGCAATGAGATCGCAGCTTGTCTGACAAAGGCTGGAAAACGGGTCATACAGC -TCAGCAGAAAGACTTTTGAGACAGAGTTCCAGAAAACAAAACATCAAGAGTGGGACTTTGTCGTGACAAC -TGACATTTCAGAGATGGGCGCCAACTTTAAAGCTGACCGTGTCATAGATTCCAGGAGATGCCTAAAGCCG -GTCATACTTGATGGCGAGAGAGTCATTCTGGCTGGACCCATGCCTGTCACACATGCCAGCGCTGCCCAGA -GGAGGGGGCGCATAGGCAGGAATCCCAACAAACCTGGAGATGAGTATCTGTATGGAGGTGGGTGCGCAGA -GACTGACGAAGACCATGCACACTGGCTTGAAGCAAGAATGCTCCTTGACAATATTTACCTCCAAGATGGC -CTCATAGCCTCGCTCTATCGACCTGAGGCCGACAAAGTAGCAGCCATTGAGGGAGAGTTCAAGCTTAGGA -CGGAGCAAAGGAAGACCTTTGTGGAACTCATGAAAAGAGGAGATCTTCCTGTTTGGCTGGCCTATCAGGT -TGCATCTGCCGGAATAACCTACACAGATAGAAGATGGTGCTTTGATGGCACGACCAACAACACCATAATG -GAAGACAGTGTGCCGGCAGAGGTGTGGACCAGACACGGAGAGAAAAGAGTGCTCAAACCGAGGTGGATGG -ACGCCAGAGTTTGTTCAGATCATGCGGCCCTGAAGTCATTCAAGGAGTTTGCCGCTGGGAAAAGAGGAGC -GGCTTTTGGAGTGATGGAAGCCCTGGGAACACTGCCAGGACACATGACAGAGAGATTCCAGGAAGCCATT -GACAACCTCGCTGTGCTCATGCGGGCAGAGACTGGAAGCAGGCCTTACAAAGCCGCGGCGGCCCAATTGC -CGGAGACCCTAGAGACCATTATGCTTTTGGGGTTGCTGGGAACAGTCTCGCTGGGAATCTTCTTCGTCTT -GATGAGGAACAAGGGCATAGGGAAGATGGGCTTTGGAATGGTGACTCTTGGGGCCAGCGCATGGCTCATG -TGGCTCTCGGAAATTGAGCCAGCCAGAATTGCATGTGTCCTCATTGTTGTGTTTCTATTGCTGGTGGTGC -TCATACCTGAGCCAGAAAAGCAAAGATCTCCCCAGGACAACCAAATGGCAATCATCATCATGGTAGCAGT -AGGTCTTCTGGGCTTGATTACCGCCAATGAACTCGGATGGTTGGAGAGAACAAAGAGTGACCTAAGCCAT -CTAATGGGAAGGAGAGAGGAGGGGGCAACCATAGGATTCTCAATGGACATTGACCTGCGGCCAGCCTCAG -CTTGGGCCATCTATGCTGCCTTGACAACTTTCATTACCCCAGCCGTCCAACATGCAGTGACCACTTCATA -CAACAACTACTCCTTAATGGCGATGGCCACGCAAGCTGGAGTGTTGTTTGGTATGGGCAAAGGGATGCCA -TTCTACGCATGGGACTTTGGAGTCCCGCTGCTAATGATAGGTTGCTACTCACAATTAACACCCCTGACCC -TAATAGTGGCCATCATTTTGCTCGTGGCGCACTACATGTACTTGATCCCAGGGCTGCAGGCAGCAGCTGC -GCGTGCTGCCCAGAAGAGAACGGCAGCTGGCATCATGAAGAACCCTGTTGTGGATGGAATAGTGGTGACT -GACATTGACACAATGACAATTGACCCCCAAGTGGAGAAAAAGATGGGACAGGTGCTACTCATAGCAGTAG -CCGTCTCCAGCGCCATACTGTCGCGGACCGCCTGGGGGTGGGGGGAGGCTGGGGCCCTGATCACAGCCGC -AACTTCCACTTTGTGGGAAGGCTCTCCGAACAAGTACTGGAACTCCTCTACAGCCACTTCACTGTGTAAC -ATTTTTAGGGGAAGTTACTTGGCTGGAGCTTCTCTAATCTACACAGTAACAAGAAACGCTGGCTTGGTCA -AGAGACGTGGGGGTGGAACAGGAGAGACCCTGGGAGAGAAATGGAAGGCCCGCTTGAACCAGATGTCGGC -CCTGGAGTTCTACTCCTACAAAAAGTCAGGCATCACCGAGGTGTGCAGAGAAGAGGCCCGCCGCGCCCTC -AAGGACGGTGTGGCAACGGGAGGCCATGCTGTGTCCCGAGGAAGTGCAAAGCTGAGATGGTTGGTGGAGC -GGGGATACCTGCAGCCCTATGGAAAGGTCATTGATCTTGGATGTGGCAGAGGGGGCTGGAGTTACTACGC -CGCCACCATCCGCAAAGTTCAAGAAGTGAAAGGATACACAAAAGGAGGCCCTGGTCATGAAGAACCCGTG -TTGGTGCAAAGCTATGGGTGGAACATAGTCCGTCTTAAGAGTGGGGTGGACGTCTTTCATATGGCGGCTG -AGCCGTGTGACACGTTGCTGTGTGACATAGGTGAGTCATCATCTAGTCCTGAAGTGGAAGAAGCACGGAC -GCTCAGAGTCCTCTCCATGGTGGGGGATTGGCTTGAAAAAAGACCAGGAGCCTTTTGTATAAAGGTGTTG -TGCCCATACACCAGCACTATGATGGAAACCCTGGAGCGACTGCAGCGTAGGTATGGGGGAGGACTGGTCA -GAGTGCCACTCTCCCGCAACTCTACACATGAGATGTATTGGGTCTCTGGAGCGAAAAGCAACACCATAAA -AAGTGTGTCCACCACGAGCCAGCTCCTCTTGGGGCGCATGGACGGGCCTAGGAGGCCAGTGAAATATGAG -GAGGATGTGAATCTCGGCTCTGGCACGCGGGCTGTGGTAAGCTGCGCTGAAGCTCCCAACATGAAGATCA -TTGGTAACCGCATTGAAAGGATCCGCAGTGAGCACGCGGAAACGTGGTTCTTTGACGAGAACCACCCATA -TAGGACATGGGCTTACCATGGAAGCTATGAGGCCCCCACACAAGGGTCAGCGTCCTCTCTAATAAACGGG -GTTGTCAGGCTCCTGTCAAAACCCTGGGATGTGGTGACTGGAGTCACAGGAATAGCCATGACCGACACCA -CACCGTATGGTCAGCAAAGAGTTTTCAAGGAAAAAGTGGACACTAGGGTGCCAGACCCCCAAGAAGGCAC -TCGTCAGGTTATGAGCATGGTCTCTTCCTGGTTGTGGAAAGAGCTAGGCAAACACAAACGGCCACGAGTC -TGTACCAAAGAAGAGTTCATCAACAAGGTTCGTAGCAATGCAGCATTAGGGGCAATATTTGAAGAGGAAA -AAGAGTGGAAGACTGCAGTGGAAGCTGTGAACGATCCAAGGTTCTGGGCTCTAGTGGATAAGGAAAGAGA -GCACCACCTGAGAGGAGAGTGCCAGAGTTGTGTGTACAACATGATGGGAAAAAGAGAAAAGAAACAAGGG -GAATTTGGAAAGGCCAAGGGCAGCCGCGCCATCTGGTATATGTGGCTAGGGGCTAGATTTCTAGAGTTCG -AAGCCCTTGGATTCTTGAACGAGGATCACTGGATGGGGAGAGAGAACTCAGGAGGTGGTGTTGAAGGGCT -GGGATTACAAAGACTCGGATATGTCCTAGAAGAGATGAGTCGTATACCAGGAGGAAGGATGTATGCAGAT -GACACTGCTGGCTGGGACACCCGCATCAGCAGGTTTGATCTGGAGAATGAAGCTCTAATCACCAACCAAA -TGGAAAAAGGGCACAGGGCCTTGGCATTGGCCATAATCAAGTACACATACCAAAACAAAGTGGTAAAGGT -CCTTAGACCAGCTGAAAAAGGGAAAACAGTTATGGACATTATTTCGAGACAAGACCAAAGGGGGAGCGGA -CAAGTTGTCACTTACGCTCTTAACACATTTACCAACCTAGTGGTGCAACTCATTCGGAATATGGAGGCTG -AGGAAGTTCTAGAGATGCAAGACTTGTGGCTGCTGCGGAGGTCAGAGAAAGTGACCAACTGGTTGCAGAG -CAACGGATGGGATAGGCTCAAACGAATGGCAGTCAGTGGAGATGATTGCGTTGTGAAGCCAATTGATGAT -AGGTTTGCACATGCCCTCAGGTTCTTGAATGATATGGGAAAAGTTAGGAAGGACACACAAGAGTGGAAAC -CCTCAACTGGATGGGACAACTGGGAAGAAGTTCCGTTTTGCTCCCACCACTTCAACAAGCTCCATCTCAA -GGACGGGAGGTCCATTGTGGTTCCCTGCCGCCACCAAGATGAACTGATTGGCCGGGCCCGCGTCTCTCCA -GGGGCGGGATGGAGCATCCGGGAGACTGCTTGCCTAGCAAAATCATATGCGCAAATGTGGCAGCTCCTTT -ATTTCCACAGAAGGGACCTCCGACTGATGGCCAATGCCATTTGTTCATCTGTGCCAGTTGACTGGGTTCC -AACTGGGAGAACTACCTGGTCAATCCATGGAAAGGGAGAATGGATGACCACTGAAGACATGCTTGTGGTG -TGGAACAGAGTGTGGATTGAGGAGAACGACCACATGGAAGACAAGACCCCAGTTACGAAATGGACAGACA -TCCCCTATTTGGGAAAAAGGGAAGACTTGTGGTGTGGATCTCTCATAGGGCACAGACCGCGCACCACCTG -GGCTGAGAACATTAAAAACACAGTCAACATGGTGCGCAGGATCATAGGTGATGAAGAAAAGTACATGGAC -TACCTATCCACCCAAGTTCGCTACTTGGGTGAAGAAGGGTCTACACCTGGAGTGCTGTAAGCACCAGTCT -TAATGTTGTCAGGCCTGCTAGTCAGCCACAGCTTGGGGAAAGCTGTGCAGCCTGTGACCCCCCCAGGAGA -AGCTGGGAAACCAAGCCTATAGTCAGGCCGAGAACGCCATGGCACGGAAGAAGCCATGCTGCCTGTGAGC -CCCTCAGAGGACACTGAGTCAAAAAACCCCACGCGCTTGGAGGCGCAGGATGGGAAAAGAAGGTGGCGAC -CTTCCCCACCCTTCAATCTGGGGCCTGAACTGGAGATCAGCTGTGGATCTCCAGAAGAGGGACTAGTGGT -TAGAGGAG - diff --git a/refs/Zika_FP.fasta b/refs/Zika_FP.fasta deleted file mode 100755 index 5b9a59a..0000000 --- a/refs/Zika_FP.fasta +++ /dev/null @@ -1,153 +0,0 @@ ->gi|631250742|gb|KJ776791.1| Zika virus strain H/PF/2013 polyprotein gene, complete cds -AGTATCAACAGGTTTTATTTTGGATTTGGAAACGAGAGTTTCTGGTCATGAAAAACCCAAAAAAGAAATC -CGGAGGATTCCGGATTGTCAATATGCTAAAACGCGGAGTAGCCCGTGTGAGCCCCTTTGGGGGCTTGAAG -AGGCTGCCAGCCGGACTTCTGCTGGGTCATGGGCCCATCAGGATGGTCTTGGCGATTCTAGCCTTTTTGA -GATTCACGGCAATCAAGCCATCACTGGGTCTCATCAATAGATGGGGTTCAGTGGGGAAAAAAGAGGCTAT -GGAAATAATAAAGAAGTTCAAGAAAGATCTGGCTGCCATGCTGAGAATAATCAATGCTAGGAAGGAGAAG -AAGAGACGAGGCGCAGATACTAGTGTCGGAATTGTTGGCCTCCTGCTGACCACAGCTATGGCAGCGGAGG -TCACTAGACGTGGGAGTGCATACTATATGTACTTGGACAGAAACGACGCTGGGGAGGCCATATCTTTTCC -AACCACATTGGGGATGAATAAGTGTTATATACAGATCATGGATCTTGGACACATGTGTGATGCCACCATG -AGCTATGAATGCCCTATGCTGGATGAGGGGGTGGAACCAGATGACGTCGATTGTTGGTGCAACACGACGT -CAACTTGGGTTGTGTACGGAACCTGCCATCACAAAAAAGGTGAAGCACGGAGATCTAGAAGAGCTGTGAC -GCTCCCCTCCCATTCCACTAGGAAGCTGCAAACGCGGTCGCAAACCTGGTTGGAATCAAGAGAATACACA -AAGCACTTGATTAGAGTCGAAAATTGGATATTCAGGAACCCTGGCTTCGCGTTAGCAGCAGCTGCCATCG -CTTGGCTTTTGGGAAGCTCAACGAGCCAAAAAGTCATATACTTGGTCATGATACTGCTGATTGCCCCGGC -ATACAGCATCAGGTGCATAGGAGTCAGCAATAGGGACTTTGTGGAAGGTATGTCAGGTGGGACTTGGGTT -GATGTTGTCTTGGAACATGGAGGTTGTGTCACCGTAATGGCACAGGACAAACCGACTGTCGACATAGAGC -TGGTTACAACAACAGTCAGCAACATGGCGGAGGTAAGATCCTACTGCTATGAGGCATCAATATCGGACAT -GGCTTCGGACAGCCGCTGCCCAACACAAGGTGAAGCCTACCTTGACAAGCAATCAGACACTCAATATGTC -TGCAAAAGAACGTTAGTGGACAGAGGCTGGGGAAATGGATGTGGACTTTTTGGCAAAGGGAGCCTGGTGA -CATGCGCTAAGTTTGCATGCTCCAAGAAAATGACCGGGAAGAGCATCCAGCCAGAGAATCTGGAGTACCG -GATAATGCTGTCAGTTCATGGCTCCCAGCACAGTGGGATGATCGTTAATGACACAGGACATGAAACTGAT -GAGAATAGAGCGAAGGTTGAGATAACGCCCAATTCACCAAGAGCCGAAGCCACCCTGGGGGGTTTTGGAA -GCCTAGGACTTGATTGTGAACCGAGGACAGGCCTTGACTTTTCAGATTTGTATTACTTGACTATGAATAA -CAAGCACTGGTTGGTTCACAAGGAGTGGTTCCACGACATTCCATTACCTTGGCACGCTGGGGCAGACACC -GGAACTCCACACTGGAACAACAAAGAAGCACTGGTAGAGTTCAAGGACGCACATGCCAAAAGGCAAACTG -TCGTGGTTCTAGGGAGTCAAGAAGGAGCAGTTCACACGGCCCTTGCTGGAGCTCTGGAGGCTGAGATGGA -TGGTGCAAAGGGAAGGCTGTCCTCTGGCCACTTGAAATGTCGCCTGAAAATGGATAAACTTAGATTGAAG -GGCGTGTCATACTCCTTGTGTACCGCAGCGTTCACATTCACCAAGATCCCGGCTGAAACACTGCACGGGA -CAGTCACAGTGGAGGTACAGTACGCAGGGACAGATGGACCTTGCAAGGTTCCAGCTCAGATGGCGGTGGA -CATGCAAACTCTGACCCCAGTTGGGAGGTTGATAACCGCTAACCCCGTAATCACTGAAAGCACTGAGAAC -TCTAAGATGATGCTGGAACTTGATCCACCATTTGGGGACTCTTACATTGTCATAGGAGTCGGGGAGAAGA -AGATCACCCACCACTGGCACAGGAGTGGCAGCACCATTGGAAAAGCATTTGAAGCCACTGTGAGAGGTGC -CAAGAGAATGGCAGTCTTGGGAGACACAGCCTGGGACTTTGGATCAGTTGGAGGCGCTCTCAACTCATTG -GGCAAGGGCATCCATCAAATTTTTGGAGCAGCTTTCAAATCATTGTTTGGAGGAATGTCCTGGTTCTCAC -AAATTCTCATTGGAACGTTGCTGATGTGGTTGGGTCTGAACACAAAGAATGGATCTATTTCCCTTATGTG -CTTGGCCTTAGGGGGAGTGTTGATCTTCTTATCCACAGCTGTCTCTGCTGATGTGGGGTGCTCGGTGGAC -TTCTCAAAGAAGGAGACGAGATGCGGTACAGGGGTGTTCGTCTATAACGACGTTGAAGCCTGGAGGGACA -GGTACAAGTACCATCCTGACTCCCCCCGTAGATTGGCAGCAGCAGTCAAGCAAGCCTGGGAAGATGGTAT -CTGTGGGATCTCCTCTGTTTCAAGAATGGAAAACATCATGTGGAGATCAGTAGAAGGGGAGCTCAACGCA -ATCCTGGAAGAGAATGGAGTTCAACTGACGGTCGTTGTGGGATCTGTAAAAAACCCCATGTGGAGAGGTC -CACAGAGATTGCCCGTGCCTGTGAACGAGCTGCCCCACGGCTGGAAGGCTTGGGGGAAATCGTACTTCGT -CAGAGCAGCAAAGACAAATAACAGCTTTGTCGTGGATGGTGACACACTGAAGGAATGCCCACTCAAACAT -AGAGCATGGAACAGCTTTCTTGTGGAGGATCATGGGTTCGGGGTATTTCACACTAGTGTCTGGCTCAAGG -TTAGAGAAGATTATTCATTAGAGTGTGATCCAGCCGTTATTGGAACAGCTGTTAAGGGAAAGGAGGCTGT -ACACAGTGATCTAGGCTACTGGATTGAGAGTGAGAAGAATGACACATGGAGGCTGAAGAGGGCCCATCTG -ATCGAGATGAAAACATGTGAATGGCCAAAGTCCCACACATTGTGGACAGATGGAATAGAAGAGAGTGATC -TGATCATACCCAAGTCTTTAGCTGGGCCACTCAGCCATCACAATACCAGAGAGGGCTACAGGACCCAAAT -GAAAGGGCCATGGCACAGTGAAGAGCTTGAAATTCGGTTTGAGGAATGCCCAGGCACTAAGGTCCACGTG -GAGGAAACATGTGGAACAAGAGGACCATCTCTGAGATCAACCACTGCAAGCGGAAGGGTGATCGAGGAAT -GGTGCTGCAGGGAGTGCACAATGCCCCCACTGTCGTTCCGGGCTAAAGATGGCTGTTGGTATGGAATGGA -GATAAGGCCCAGGAAAGAACCAGAAAGTAACTTAGTAAGGTCAATGGTGACTGCAGGATCAACTGATCAC -ATGGATCACTTCTCCCTTGGAGTGCTTGTGATTCTGCTCATGGTGCAGGAAGGGCTGAAGAAGAGAATGA -CCACAAAGATCATCATAAGCACATCGATGGCAGTGCTGGTAGCTATGATCCTGGGAGGATTTTCAATGAG -TGACCTGGCTAAGCTTGCAATTTTGATGGGTGCCACCTTCGCGGAAATGAACACTGGAGGAGATGTAGCT -CATCTGGCGCTGATAGCGGCATTCAAAGTCAGACCAGCGTTGCTGGTATCTTTCATCTTCAGAGCTAATT -GGACACCCCGTGAAAGCATGCTGCTGGCCTTGGCCTCGTGTCTTTTGCAAACTGCGATCTCCGCCTTGGA -AGGCGACCTGATGGTTCTCATCAATGGTTTTGCTTTGGCCTGGTTGGCAATACGAGCGATGGTTGTTCCA -CGCACTGATAACATCACCTTGGCAATCCTGGCTGCTCTGACACCACTGGCCCGGGGCACACTGCTTGTGG -CGTGGAGAGCAGGCCTTGCTACTTGCGGGGGGTTTATGCTCCTCTCTCTGAAGGGAAAAGGCAGTGTGAA -GAAGAACTTACCATTTGTCATGGCCCTGGGACTAACCGCTGTGAGGCTGGTCGACCCCATCAACGTGGTG -GGACTGCTGTTGCTCACAAGGAGTGGGAAGCGGAGCTGGCCCCCTAGCGAAGTACTCACAGCTGTTGGCC -TGATATGCGCATTGGCTGGAGGGTTCGCCAAGGCAGATATAGAGATGGCTGGGCCCATGGCCGCGGTCGG -TCTGCTAATTGTCAGTTACGTGGTCTCAGGAAAGAGTGTGGACATGTACATTGAAAGAGCAGGTGACATC -ACATGGGAAAAAGATGCGGAAGTCACTGGAAACAGTCCCCGGCTCGATGTGGCGCTAGATGAGAGTGGTG -ATTTCTCCCTGGTGGAGGATGACGGTCCCCCCATGAGAGAGATCATACTCAAGGTGGTCCTGATGACCAT -CTGTGGCATGAACCCAATAGCCATACCCTTTGCAGCTGGAGCGTGGTACGTATACGTGAAGACTGGAAAA -AGGAGTGGTGCTCTATGGGATGTGCCTGCTCCCAAGGAAGTAAAAAAGGGGGAGACCACAGATGGAGTGT -ACAGAGTAATGACTCGTAGACTGCTAGGTTCAACACAAGTTGGAGTGGGAGTTATGCAAGAGGGGGTCTT -TCACACTATGTGGCACGTCACAAAAGGATCCGCGCTGAGAAGCGGTGAAGGGAGACTTGATCCATACTGG -GGAGATGTCAAGCAGGATCTGGTGTCATACTGTGGTCCATGGAAGCTAGATGCCGCCTGGGACGGGCACA -GCGAGGTGCAGCTCTTGGCCGTGCCCCCCGGAGAGAGAGCGAGGAACATCCAGACTCTGCCCGGAATATT -TAAGACAAAGGATGGGGACATTGGAGCGGTTGCGCTGGATTACCCAGCAGGAACTTCAGGATCTCCAATC -CTAGACAAGTGTGGGAGAGTGATAGGACTTTATGGCAATGGGGTCGTGATCAAAAATGGGAGTTATGTTA -GTGCCATCACCCAAGGGAGGAGGGAGGAAGAGACTCCTGTTGAGTGCTTCGAGCCTTCGATGCTGAAGAA -GAAGCAGCTAACTGTCTTAGACTTGCATCCTGGAGCTGGGAAAACCAGGAGAGTTCTTCCTGAAATAGTC -CGTGAAGCCATAAAAACAAGACTCCGTACTGTGATCTTAGCTCCAACCAGGGTTGTCGCTGCTGAAATGG -AGGAAGCCCTTAGAGGGCTTCCAGTGCGTTATATGACAACAGCAGTCAATGTCACCCACTCTGGAACAGA -AATCGTCGACTTAATGTGCCATGCCACCTTCACTTCACGTCTACTACAGCCAATCAGAGTCCCCAACTAT -AATCTGTATATTATGGATGAGGCCCACTTCACAGATCCCTCAAGTATAGCAGCAAGAGGATACATTTCAA -CAAGGGTTGAGATGGGCGAGGCGGCTGCCATCTTCATGACCGCCACGCCACCAGGAACCCGTGACGCATT -TCCGGACTCCAACTCACCAATTATGGACACCGAAGTGGAAGTCCCAGAGAGAGCCTGGAGCTCAGGCTTT -GATTGGGTGACGGATCATTCTGGAAAAACAGTTTGGTTTGTTCCAAGCGTGAGGAACGGCAATGAGATCG -CAGCTTGTCTGACAAAGGCTGGAAAACGGGTCATACAGCTCAGCAGAAAGACTTTTGAGACAGAGTTCCA -GAAAACAAAACATCAAGAGTGGGACTTTGTCGTGACAACTGACATTTCAGAGATGGGCGCCAACTTTAAA -GCTGACCGTGTCATAGATTCCAGGAGATGCCTAAAGCCGGTCATACTTGATGGCGAGAGAGTCATTCTGG -CTGGACCCATGCCTGTCACACATGCCAGCGCTGCCCAGAGGAGGGGGCGCATAGGCAGGAATCCCAACAA -ACCTGGAGATGAGTATCTGTATGGAGGTGGGTGCGCAGAGACTGACGAAGACCATGCACACTGGCTTGAA -GCAAGAATGCTCCTTGACAATATTTACCTCCAAGATGGCCTCATAGCCTCGCTCTATCGACCTGAGGCCG -ACAAAGTAGCAGCCATTGAGGGAGAGTTCAAGCTTAGGACGGAGCAAAGGAAGACCTTTGTGGAACTCAT -GAAAAGAGGAGATCTTCCTGTTTGGCTGGCCTATCAGGTTGCATCTGCCGGAATAACCTACACAGATAGA -AGATGGTGCTTTGATGGCACGACCAACAACACCATAATGGAAGACAGTGTGCCGGCAGAGGTGTGGACCA -GACACGGAGAGAAAAGAGTGCTCAAACCGAGGTGGATGGACGCCAGAGTTTGTTCAGATCATGCGGCCCT -GAAGTCATTCAAGGAGTTTGCCGCTGGGAAAAGAGGAGCGGCTTTTGGAGTGATGGAAGCCCTGGGAACA -CTGCCAGGACACATGACAGAGAGATTCCAGGAAGCCATTGACAACCTCGCTGTGCTCATGCGGGCAGAGA -CTGGAAGCAGGCCTTACAAAGCCGCGGCGGCCCAATTGCCGGAGACCCTAGAGACCATTATGCTTTTGGG -GTTGCTGGGAACAGTCTCGCTGGGAATCTTTTTCGTCTTGATGAGGAACAAGGGCATAGGGAAGATGGGC -TTTGGAATGGTGACTCTTGGGGCCAGCGCATGGCTCATGTGGCTCTCGGAAATTGAGCCAGCCAGAATTG -CATGTGTCCTCATTGTTGTGTTCCTATTGCTGGTGGTGCTCATACCTGAGCCAGAAAAGCAAAGATCTCC -CCAGGACAACCAAATGGCAATCATCATCATGGTAGCAGTAGGTCTTCTGGGCTTGATTACCGCCAATGAA -CTCGGATGGTTGGAGAGAACAAAGAGTGACCTAAGCCATCTAATGGGAAGGAGAGAGGAGGGGGCAACCA -TAGGATTCTCAATGGACATTGACCTGCGGCCAGCCTCAGCTTGGGCCATCTATGCTGCCTTGACAACTTT -CATTACCCCAGCCGTCCAACATGCAGTGACCACTTCATACAACAACTACTCCTTAATGGCGATGGCCACG -CAAGCTGGAGTGTTGTTTGGTATGGGCAAAGGGATGCCATTCTACGCATGGGACTTTGGAGTCCCGCTGC -TAATGATAGGTTGCTACTCACAATTAACACCCCTGACCCTAATAGTGGCCATCATTTTGCTCGTGGCGCA -CTACATGTACTTGATCCCAGGGCTGCAGGCAGCAGCTGCGCGTGCTGCCCAGAAGAGAACGGCAGCTGGC -ATCATGAAGAACCCTGTTGTGGATGGAATAGTGGTGACTGACATTGACACAATGACAATTGACCCCCAAG -TGGAGAAAAAGATGGGACAGGTGCTACTCATAGCAGTAGCCGTCTCCAGCGCCATACTGTCGCGGACCGC -CTGGGGGTGGGGGGAGGCTGGGGCCCTGATCACAGCGGCAACTTCCACTTTGTGGGAAGGCTCTCCGAAC -AAGTACTGGAACTCCTCTACAGCCACTTCACTGTGTAACATTTTTAGGGGAAGTTACTTGGCTGGAGCTT -CTCTAATCTACACAGTAACAAGAAACGCTGGCTTGGTCAAGAGACGTGGGGGTGGAACAGGAGAGACCCT -GGGAGAGAAATGGAAGGCCCGCTTGAACCAGATGTCGGCCCTGGAGTTCTACTCCTACAAAAAGTCAGGC -ATCACCGAGGTGTGCAGAGAAGAGGCCCGCCGCGCCCTCAAGGACGGTGTGGCAACGGGAGGCCATGCTG -TGTCCCGAGGAAGTGCAAAGCTGAGATGGTTGGTGGAGCGGGGATACCTGCAGCCCTATGGAAAGGTCAT -TGATCTTGGATGTGGCAGAGGGGGCTGGAGTTACTACGCCGCCACCATCCGCAAAGTTCAAGAAGTGAAA -GGATACACAAAAGGAGGCCCTGGTCATGAAGAACCCATGTTGGTGCAAAGCTATGGGTGGAACATAGTCC -GTCTTAAGAGTGGGGTGGACGTCTTTCATATGGCGGCTGAGCCGTGTGACACGTTGCTGTGTGACATAGG -TGAGTCATCATCTAGTCCTGAAGTGGAAGAAGCACGGACGCTCAGAGTCCTCTCCATGGTGGGGGATTGG -CTTGAAAAAAGACCAGGAGCCTTTTGTATAAAAGTGTTGTGCCCATACACCAGCACTATGATGGAAACCC -TGGAGCGACTGCAGCGTAGGTATGGGGGAGGACTGGTCAGAGTGCCACTCTCCCGCAACTCTACACATGA -GATGTACTGGGTCTCTGGAGCGAAAAGCAACACCATAAAAAGTGTGTCCACCACGAGCCAGCTCCTCTTG -GGGCGCATGGACGGGCCCAGGAGGCCAGTGAAATATGAGGAGGATGTGAATCTCGGCTCTGGCACGCGGG -CTGTGGTAAGCTGCGCTGAAGCTCCCAACATGAAGATCATTGGTAACCGCATTGAAAGGATCCGCAGTGA -GCACGCGGAAACGTGGTTCTTTGACGAGAACCACCCATATAGGACATGGGCTTACCATGGAAGCTATGAG -GCCCCCACACAAGGGTCAGCGTCCTCTCTAATAAACGGGGTTGTCAGGCTCCTGTCAAAACCCTGGGATG -TGGTGACTGGAGTCACAGGAATAGCCATGACCGACACCACACCGTATGGTCAGCAAAGAGTTTTCAAGGA -AAAAGTGGACACTAGGGTGCCAGACCCCCAAGAAGGCACTCGTCAGGTTATGAGCATGGTCTCTTCCTGG -TTGTGGAAAGAGCTAGGCAAACACAAACGGCCACGAGTCTGTACCAAAGAAGAGTTCATCAACAAGGTTC -GTAGCAATGCAGCATTAGGGGCAATATTTGAAGAGGAAAAAGAGTGGAAGACTGCAGTGGAAGCTGTGAA -CGATCCAAGGTTCTGGGCTCTAGTGGACAAGGAAAGAGAGCACCACCTGAGAGGAGAGTGCCAGAGTTGT -GTGTACAACATGATGGGAAAAAGAGAAAAGAAACAAGGGGAATTTGGAAAGGCCAAGGGCAGCCGCGCCA -TCTGGTATATGTGGCTAGGGGCTAGATTTCTAGAGTTCGAAGCCCTTGGATTCTTGAACGAGGATCACTG -GATGGGGAGAGAGAACTCAGGAGGTGGTGTTGAAGGGCTGGGATTACAAAGACTCGGATATGTCCTAGAA -GAGATGAGTCGCATACCAGGAGGAAGGATGTATGCAGATGACACTGCTGGCTGGGACACCCGCATCAGCA -GGTTTGATCTGGAGAATGAAGCTCTAATCACCAACCAAATGGAGAAAGGGCACAGGGCCTTGGCATTGGC -CATAATCAAGTACACATACCAAAACAAAGTGGTAAAGGTCCTTAGACCAGCTGAAAAAGGGAAGACAGTT -ATGGACATTATTTCGAGACAAGACCAAAGGGGGAGCGGACAAGTTGTCACTTACGCTCTTAACACATTTA -CCAACCTAGTGGTGCAACTCATTCGGAATATGGAGGCTGAGGAAGTTCTAGAGATGCAAGACTTGTGGCT -GCTGCGGAGGTCAGAGAAAGTGACCAACTGGTTGCAGAGCAACGGATGGGATAGGCTCAAACGAATGGCA -GTCAGTGGAGATGATTGCGTTGTGAAGCCAATTGATGATAGGTTTGCACATGCCCTCAGGTTCTTGAATG -ATATGGGAAAAGTTAGGAAGGACACACAAGAGTGGAAACCCTCAACTGGATGGGACAACTGGGAAGAAGT -TCCGTTTTGCTCCCACCACTTCAACAAGCTCCATCTCAAGGACGGGAGGTCCATTGTGGTTCCCTGCCGC -CACCAAGATGAACTGATTGGCCGGGCCCGCGTCTCTCCAGGGGCGGGATGGAGCATCCGGGAGACTGCTT -GCCTAGCAAAATCATATGCGCAAATGTGGCAGCTCCTTTATTTCCACAGAAGGGACCTCCGACTGATGGC -CAATGCCATTTGTTCATCTGTGCCAGTTGACTGGGTTCCAACTGGGAGAACTACCTGGTCAATCCATGGA -AAGGGAGAATGGATGACCACTGAAGACATGCTTGTGGTGTGGAACAGAGTGTGGATTGAGGAGAACGACC -ACATGGAAGACAAGACCCCAGTTACGAAATGGACAGACATTCCCTATTTGGGAAAAAGGGAAGACTTGTG -GTGTGGATCTCTCATAGGGCACAGACCGCGCACCACCTGGGCTGAGAACATTAAAAACACAGTCAACATG -GTGCGCAGGATCATAGGTGATGAAGAAAAGTACATGGACTACCTATCCACCCAAGTTCGCTACTTGGGTG -AAGAAGGGTCTACACCTGGAGTGCTGTAAGCACCAATCTTAGTGTTGTCAGGCCTGCTAGTCAGCCACAG -CTTGGGGAAAGCTGTGCAGCCTGTGACCCCCCCAGGAGAAGCTGGGAAACCAAGCCTATAGTCAGGCCGA -GAACGCCATGGCACGGAAGAAGCCATGCTGCCTGTGAGCCCCTCAGAGGACACTGAGTCAAAAAACCCCA -CGCGCTTGGAGGCGCAGGATGGGAAAAGAAGGTGGCGACCTTCCCCACCCTTCAATCTGGGGCCTGAACT -GGAGATCAGCTGTGGATCTCCAGAAGAGGGACTAGTGGTTAGAGGAG diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index e743eb2..0000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -poretools==0.6.0 -pysam==0.9.1.4 diff --git a/run.py b/run.py deleted file mode 100644 index 657e32b..0000000 --- a/run.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python -import os, subprocess, shutil - -libraries = ['usvi-library1-2016-12-10'] -barcodes = ['NB03'] - -for library in libraries: - for barcode in barcodes: - - # poretools fast5 to fastq - input_dir = 'data/libraries/' + library + '/basecalled_reads/pass_demultiplex/' + barcode - if not os.path.exists(input_dir): - raise NotADirectoryError('Input directory', input_dir, 'does not exist') - output_dir = 'data/libraries/' + library + '/fastq_reads/' - output_file = output_dir + barcode + '.fastq' - if not os.path.exists(output_dir): - os.makedirs(output_dir) - f = open(output_file, "w") - call = map(str, ['poretools', 'fastq', '--type', '2D', input_dir]) - print('* ' + ' '.join(call)) - subprocess.call(call, stdout=f) - - # marginAlign to reference - input_file = 'data/libraries/' + library + '/fastq_reads/' + barcode + '.fastq' - output_dir = 'data/libraries/' + library + '/sam_reads/' - output_file = output_dir + barcode + '.sam' - if not os.path.exists(output_dir): - os.makedirs(output_dir) - # marginAlign input.fastq ref.fasta out.sam - call = map(str, ['marginAlign', input_file, 'refs/Zika_FP.fasta', output_file]) - if os.path.exists('jobTree'): - shutil.rmtree('jobTree') - print('* ' + ' '.join(call)) - subprocess.call(call) - if os.path.exists('jobTree'): - shutil.rmtree('jobTree') - - # samtools convert sam to sorted bam - input_file = 'data/libraries/' + library + '/sam_reads/' + barcode + '.sam' - output_dir = 'data/libraries/' + library + '/bam_reads/' - output_file = output_dir + barcode + '.bam' - if not os.path.exists(output_dir): - os.makedirs(output_dir) - # samtools view -bS file.sam | samtools sort -o file_sorted - call = 'samtools view -bS ' + input_file + ' | samtools sort -o ' + output_file - print('* ' + call) - subprocess.call(call, shell=True) - - # samtools create index from sorted bam - input_file = 'data/libraries/' + library + '/bam_reads/' + barcode + '.bam' - output_file = 'data/libraries/' + library + '/bam_reads/' + barcode + '.bai' - # samtools index test_sorted.bam test_sorted.bai - call = map(str, ['samtools', 'index', input_file, output_file]) - print('* ' + ' '.join(call)) - subprocess.call(call) diff --git a/scripts/demultiplex.py b/scripts/demultiplex.py deleted file mode 100644 index ce6fc2f..0000000 --- a/scripts/demultiplex.py +++ /dev/null @@ -1,295 +0,0 @@ -# Script written by Nick Loman -# https://github.com/zibraproject/zika-pipeline/blob/master/barcodes/demultiplex2.py - -#!/usr/bin/env python -import ctypes as ct -import ssw_lib -from Bio import SeqIO -import argparse -import os,sys - - -def get_parser(): - parser = argparse.ArgumentParser( - description="""A simple read demultiplexer for Oxford Nanopore data.""", - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - parser.add_argument("input", action=FileExist, - help="Path to fasta file.") - parser.add_argument("--barcodes",default="NB_barcodes.fasta", type=str, - help="Relative path to fasta file describing barcodes.") - parser.add_argument("--threshold", default=90, type=int, - help="Minimum match score to accept called barcodes.") - - - return parser - -def align_seq(seq,args): - resultdict=dict() - for bc_name in barcode_dict: - match,score = nucl_align(seq,barcode_dict[bc_name],"query",bc_name) - resultdict[match]=dict() - resultdict[match]["score"]=score - - - results = sorted([(resultdict[x]["score"],x,resultdict[x]) for x in resultdict.keys()])[::-1] - #for result in results: - # print result - result = results[0] - score,ide,details=result - #print ide.split("_")[0],score,details - - if score >= args.threshold: - next - else: - ide = "unclassified" - - return ide.split("_")[0],score - -def nucl_align(sQSeq,sRSeq,query,target): - #pathtolibssw=pkg_resources.resource_filename('nanonet', 'libssw.so') - #ospathtolibssw=os.path.dirname(pathtolibssw) - sQId=query - sRId=target - lEle = [] - dRc = {} - dEle2Int = {} - dInt2Ele = {} - lEle = ['A', 'C', 'G', 'T', 'N'] - dRc = {'A':'C', 'C':'G', 'G':'C', 'T':'A', 'a':'C', 'c':'G', 'g':'C', 't':'A'} - for i,ele in enumerate(lEle): - dEle2Int[ele] = i - dEle2Int[ele.lower()] = i - dInt2Ele[i] = ele - nEleNum = len(lEle) - lScore = [0 for i in xrange(nEleNum**2)] - for i in xrange(nEleNum-1): - for j in xrange(nEleNum-1): - if lEle[i] == lEle[j]: - lScore[i*nEleNum+j] = 3 - else: - lScore[i*nEleNum+j] = -1 -# translate score matrix to ctypes - mat = (len(lScore) * ct.c_int8) () - mat[:] = lScore -# set flag - nFlag = 0 - # This line should be the path to libssw.so but I can't get it to work. - ssw = ssw_lib.CSsw("") - - -# build query profile - qNum = to_int(sQSeq, lEle, dEle2Int) - qProfile = ssw.ssw_init(qNum, ct.c_int32(len(sQSeq)), mat, len(lEle), 2) -# set mask len - if len(sQSeq) > 30: - nMaskLen = len(sQSeq) / 2 - else: - nMaskLen = 15 - -# iter target sequence - rNum = to_int(sRSeq, lEle, dEle2Int) - -# format ofres: (nScore, nScore2, nRefBeg, nRefEnd, nQryBeg, nQryEnd, nRefEnd2, nCigarLen, lCigar) - res = align_one(ssw, qProfile, rNum, len(sRSeq), 3, 1, nFlag, nMaskLen) - resRc = None -# build cigar and trace back path - strand = 0 - if resRc == None or res[0] > resRc[0]: - resPrint = res - strand = 0 - sCigar, sQ, sA, sR = buildPath(sQSeq, sRSeq, res[4], res[2], res[8]) - else: - resPrint = resRc - strand = 1 - sCigar, sQ, sA, sR = buildPath(sQRcSeq, sRSeq, resRc[4], resRc[2], resRc[8]) - #print 'target_name: {}\nquery_name: {}\noptimal_alignment_score: {}\t'.format(sRId, sQId, resPrint[0]) - #print 'suboptimal_alignment_score: {}\t'.format(resPrint[1]) - #print res - ssw.init_destroy(qProfile) - return sRId,resPrint[0] - -def to_int(seq, lEle, dEle2Int): - """ - translate a sequence into numbers - @param seq a sequence - """ - num_decl = len(seq) * ct.c_int8 - num = num_decl() - for i,ele in enumerate(seq): - try: - n = dEle2Int[ele] - except KeyError: - n = dEle2Int[lEle[-1]] - finally: - num[i] = n - - return num - -def align_one(ssw, qProfile, rNum, nRLen, nOpen, nExt, nFlag, nMaskLen): - """ - align one pair of sequences - @param qProfile query profile - @param rNum number array for reference - @param nRLen length of reference sequence - @param nFlag alignment flag - @param nMaskLen mask length - """ - res = ssw.ssw_align(qProfile, rNum, ct.c_int32(nRLen), nOpen, nExt, nFlag, 0, 0, nMaskLen) - - nScore = res.contents.nScore - nScore2 = res.contents.nScore2 - nRefBeg = res.contents.nRefBeg - nRefEnd = res.contents.nRefEnd - nQryBeg = res.contents.nQryBeg - nQryEnd = res.contents.nQryEnd - nRefEnd2 = res.contents.nRefEnd2 - lCigar = [res.contents.sCigar[idx] for idx in range(res.contents.nCigarLen)] - nCigarLen = res.contents.nCigarLen - ssw.align_destroy(res) - - return (nScore, nScore2, nRefBeg, nRefEnd, nQryBeg, nQryEnd, nRefEnd2, nCigarLen, lCigar) - -def buildPath(q, r, nQryBeg, nRefBeg, lCigar): - """ - build cigar string and align path based on cigar array returned by ssw_align - @param q query sequence - @param r reference sequence - @param nQryBeg begin position of query sequence - @param nRefBeg begin position of reference sequence - @param lCigar cigar array - """ - sCigarInfo = 'MIDNSHP=X' - sCigar = '' - sQ = '' - sA = '' - sR = '' - nQOff = nQryBeg - nROff = nRefBeg - for x in lCigar: - n = x >> 4 - m = x & 15 - if m > 8: - c = 'M' - else: - c = sCigarInfo[m] - sCigar += str(n) + c - - if c == 'M': - sQ += q[nQOff : nQOff+n] - sA += ''.join(['|' if q[nQOff+j] == r[nROff+j] else '*' for j in xrange(n)]) - sR += r[nROff : nROff+n] - nQOff += n - nROff += n - elif c == 'I': - sQ += q[nQOff : nQOff+n] - sA += ' ' * n - sR += '-' * n - nQOff += n - elif c == 'D': - sQ += '-' * n - sA += ' ' * n - sR += r[nROff : nROff+n] - nROff += n - - return sCigar, sQ, sA, sR - -class FileExist(argparse.Action): - """Check if the input file exist.""" - def __call__(self, parser, namespace, values, option_string=None): - if not os.path.exists(values): - raise RuntimeError("File/path for '{}' does not exist, {}".format(self.dest, values)) - setattr(namespace, self.dest, values) - -def parse_barcodes(barcode_file): - #print "parsing barcodes" - barcode_list = list() - barcode_list.append("uncalssified") - barcode_dict = dict() - barcode_sequences = SeqIO.parse(open(barcode_file),'fasta') - for barcode in barcode_sequences: - name, sequence = barcode.id, str(barcode.seq) - barcode_dict[name]=sequence - barcode_list.append(name) - barcode_dict[name+"_rev"]=str(barcode.reverse_complement().seq) - #print barcode_list - #for barcode in barcode_dict: - # print barcode, barcode_dict[barcode] - - #sys.exit() - return barcode_dict,barcode_list - -def main(): - - args = get_parser().parse_args() - global barcode_dict - barcode_dict,barcode_list=parse_barcodes(args.barcodes) - - """barcode_dict = { - 'NB01': 'GGTGCTGAAGAAAGTTGTCGGTGTCTTTGTGTTAACCTTT', - 'NB01_rev': 'AAGGTTAACACAAAGACACCGACAACTTTCTTCAGCACCAGGTTA', - 'NB02': 'GGTGCTGTCGATTCCGTTTGTAGTCGTCTGTTTAACCTTT', - 'NB02_rev': 'AAGGTTAAACAGACGACTACAAACGGAATCGACAGCACCAGGTTA', - 'NB03': 'GGTGCTGGAGTCTTGTGTCCCAGTTACCAGGTTAACCTTT', - 'NB03_rev': 'AAGGTTAACCTGGTAACTGGGACACAAGACTCCAGCACCAGGTTA', - 'NB04': 'GGTGCTGTTCGGATTCTATCGTGTTTCCCTATTAACCTTT', - 'NB04_rev': 'AAGGTTAATAGGGAAACACGATAGAATCCGAACAGCACCAGGTTA', - 'NB05': 'GGTGCTGCTTGTCCAGGGTTTGTGTAACCTTTTAACCTTT', - 'NB05_rev': 'AAGGTTAAAAGGTTACACAAACCCTGGACAAGCAGCACCAGGTTA', - 'NB06': 'GGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTT', - 'NB06_rev': 'AAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCAGGTTA', - 'NB07': 'GGTGCTGGTGTTACCGTGGGAATGAATCCTTTTAACCTTT', - 'NB07_rev': 'AAGGTTAAAAGGATTCATTCCCACGGTAACACCAGCACCAGGTTA', - 'NB08': 'GGTGCTGTTCAGGGAACAAACCAAGTTACGTTTAACCTTT', - 'NB08_rev': 'AAGGTTAAACGTAACTTGGTTTGTTCCCTGAACAGCACCAGGTTA', - 'NB09': 'GGTGCTGAACTAGGCACAGCGAGTCTTGGTTTTAACCTTT', - 'NB09_rev': 'AAGGTTAAAACCAAGACTCGCTGTGCCTAGTTCAGCACCAGGTTA', - 'NB10': 'GGTGCTGAAGCGTTGAAACCTTTGTCCTCTCTTAACCTTT', - 'NB10_rev': 'AAGGTTAAGAGAGGACAAAGGTTTCAACGCTTCAGCACCAGGTTA', - 'NB11': 'GGTGCTGGTTTCATCTATCGGAGGGAATGGATTAACCTTT', - 'NB11_rev': 'AAGGTTAATCCATTCCCTCCGATAGATGAAACCAGCACCAGGTTA', - 'NB12': 'GGTGCTGCAGGTAGAAAGAAGCAGAATCGGATTAACCTTT', - 'NB12_rev': 'AAGGTTAATCCGATTCTGCTTCTTTCTACCTGCAGCACCAGGTTA' - } - barcode_list = ('NB01','NB02','NB03','NB04','NB05','NB06','NB07','NB08','NB09','NB10','NB11','NB12','unclassified') - """ - resultdict=dict() - input_file = args.input - - fasta_sequences = SeqIO.parse(open(input_file),'fasta') - - for fasta in fasta_sequences: - name, sequence = fasta.id, str(fasta.seq) - #new_sequence = some_function(sequence) - #print ">"+str(name) - #print sequence - - id_,score=align_seq(sequence,args) - print str(name),id_,score - if id_ not in resultdict: - resultdict[id_]=dict() - resultdict[id_]["counter"]=0 - resultdict[id_]["score"]=list() - resultdict[id_]["sequences"]=list() - resultdict[id_]["counter"]+=1 - resultdict[id_]["score"].append(score) - resultdict[id_]["sequences"].append(fasta) - - ##print resultdict - print "Score Threshold:",args.threshold - for ids in barcode_list: - if ids in resultdict.keys(): - print ids, - print resultdict[ids]["counter"], - print "Mean:", (sum(resultdict[ids]["score"])/resultdict[ids]["counter"]) - output_handle=open(os.path.join(os.path.dirname(input_file),ids+"_"+os.path.basename(input_file)),"w") - SeqIO.write(resultdict[ids]["sequences"], output_handle, "fasta") - output_handle.close() - else: - print ids,"0","Mean:N/A" - - - - -if __name__ == "__main__": - main()