diff --git a/test/input/TestOrderAndOrient/contigs.ebov.ambig.fasta.gz b/test/input/TestOrderAndOrient/contigs.ebov.ambig.fasta.gz new file mode 100644 index 000000000..a53fd24da Binary files /dev/null and b/test/input/TestOrderAndOrient/contigs.ebov.ambig.fasta.gz differ diff --git a/test/input/TestOrderAndOrient/expected.ebov.ambig.fasta b/test/input/TestOrderAndOrient/expected.ebov.ambig.fasta new file mode 100644 index 000000000..39bd9e9fa --- /dev/null +++ b/test/input/TestOrderAndOrient/expected.ebov.ambig.fasta @@ -0,0 +1,311 @@ +>KJ660346.2_contigs_ordered_and_oriented +GAGGAAGATTAATAATTTTCCTCTCATTGAAATTTATATCGGAATTTAAATTGAAATTGT +TACTGTAATCATACCTGGTTTGTTTCAGAGCCATATCACCAAGATAGAGAACAACCTAGG +TCTCCGGAGGGGGCAAGGGCATCAGTGTGCTCAGTTGAAAATCCCTTGTCAACATCTAGG +CCTTATCACATCACAAGTTCCGCCTTAAACTCTGCAGGGTGATCCAACAACCTTAATAGC +AACATTATTGTTAAAGGACAGCATTAGTTCACAGTCAAACAAGCAAGATTGAGAATTAAC +TTTGATTTTGAACCTGAACACCCAGAGGACTGGAGACTCAACAACCCTAAAGCCTGGGGT +AAAACATTAGAAATAGTTTAAAGACAAATTGCTCGGAATCACAAAATTCCGAGTATGGAT +TCTCGTCCTCAGAAAGTCTGGATGACGCCGAGTCTCACTGAATCTGACATGGATTACCAC +AAGATCTTGACAGCAGGTCTGTCCGTTCAACAGGGGATTGTTCGGCAAAGAGTCATCCCA +GTGTATCAAGTAAACAATCTTGAGGAAATTTGCCAACTTATCATACAGGCCTTTGAAGCT +GGTGTTGATTTTCAAGAGAGTGCGGACAGTTTCCTTCTCATGCTTTGTCTTCATCATGCG +TACCAAGGAGATTACAAACTTTTCTTGGAAAGTGGCGCAGTCAAGTATTTGGAAGGGCAC +GGGTTCCGTTTTGAAGTCAAGAAGTGTGATGGAGTGAAGCGCCTTGAGGAATTGCTGCCA +GCAGTATCTAGTGGGAGAAACATTAAGAGAACACTTGCTGCCATGCCGGAAGAGGAGACG +ACTGAAGCTAATGCCGGTCAGTTCCTCTCCTTTGCAAGTCTATTCCTTCCGAAATTGGTA +GTAGGAGAAAAGGCTTGCCTTGAGAAGGTTCAAAGGCAAATTCAAGTACATGCAGAGCAA +GGACTGATACAATATCCAACAGCTTGGCAATCAGTAGGACACATGATGGTGATTTTCCGT +TTGATGCGAACAAATTTTTTGATCAAATTTCTTCTAATACACCAAGGGATGCACATGGTT +GCCGGACATGATGCCAACGATGCTGTGATTTCAAATTCAGTGGCTCAAGCTCGTTTTTCA +GGTCTATTGATTGTCAAAACAGTACTTGATCATATCCTACAAAAGACAGAACGAGGAGTT +CGTCTCCATCCTCTTGCAAGGACCGCCAAGGTAAAAAATGAGGTGAACTCCTTCAAGGCT +GCACTCAGCTCCCTGGCCAAGCATGGAGAGTATGCTCCTTTCGCCCGACTTTTGAACCTT +TCTGGAGTAAATAATCTTGAGCATGGTCTTTTCCCTCAACTGTCGGCAATTGCACTCGGA +GTCGCCACAGCCCACGGGAGCACCCTCGCAGGAGTAAATGTTGGAGAACAGTATCAACAG +CTCAGAGAGGCAGCCACTGAGGCTGAGAAGCAACTCCAACAATATGCGGAGTCTCGTGAA +CTTGACCATCTTGGACTTGATGATCAGGAAAAGAAAATTCTTATGAACTTCCATCAGAAA +AAGAACGAAATCAGCTTCCAGCAAACAAACGCGATGGTAACTCTAAGAAAAGAGCGCCTG +GCCAAGCTGACAGAAGCTATCACTGCTGCATCACTGCCCAAAACAAGTGGACATTACGAT +GATGATGACGACATTCCCTTTCCAGGACCCATCAATGATGACGACAATCCTGGCCATCAA +GATGATGATCCGACTGACTCACAGGATACGACCATTCCCGATGTGGTAGTTGACCCCGAT +GATGGAGGCTACGGCGAATACCAAAGTTACTCGGAAAACGGCATGAGTGCACCAGATGAC +TTGGTCCTATTCGATCTAGACGAGGACGACGAGGACACCAAGCCAGTGCCTAACAGATCG +ACCAAGGGTGGACAACAGAAAAACAGTCAAAAGGGCCAGCATACAGAGGGCAGACAGACA +CAATCCACGCCAACTCAAAACGTCACAGGCCCTCGCAGAACAATCCACCATGCCAGTGCT +CCACTCACGGACAATGACAGAAGAAACGAACCCTCCGGCTCAACCAGCCCTCGCATGCTG +ACCCCAATCAACGAAGAGGCAGACCCACTGGACGATGCCGACGACGAGACGTCTAGCCTT +CCGCCCTTAGAGTCAGATGATGAAGAACAGGACAGGGACGGAACTTCTAACCGCACACCC +ACTGTCGCCCCACCGGCTCCCGTATACAGAGATCACTCCGAAAAGAAAGAACTCCCGCAA +GATGAACAACAAGATCAGGACCACATTCAAGAGGCCAGGAACCAAGACAGTGACAACACC +CAGCCAGAACATTCTTTTGAGGAGATGTATCGCCACATTCTAAGATCACAGGGGCCATTT +GATGCCGTTTTGTATTATCATATGATGAAGGATGAGCCTGTAGTTTTCAGTACCAGTGAT +GGTAAAGAGTACACGTATCCGGACTCCCTTGAAGAGGAATATCCACCATGGCTCACTGAA +AAAGAGGCCATGAATGATGAGAATAGATTTGTTACACTGGATGGTCAACAATTTTATTGG +CCAGTAATGAATCACAGGAATAAATTCATGGCAATCCTGCAACATCATCAGTGAATGAGC +ATGTAATAATGGGATGATTTAATCGACAAATAGCTAACATTAAATAGTCAAGGAACGCAA +ACAGGAAGAATTTTTGATGTCTAAGGTGTGAATTATTATCACAATAAAAGTGATTCTTAG +TTTTGAATTTAAAGCTAGCTTATTATTACTAGCCGTTTTTCAAAGTTCAATTTGAGTCTT +AATGCAAATAAGCGTTAAGCCACAGTTATAGCCATAATGGTAACTCAATATCTTAGCCAG +CGATTTATCTAAATTAAATTACATTATGCTTTTATAACTTACCTACTAGCCTGCCCAACA +TTTACACGATCGTTTTATAATTAAGAAAAAACTAATGATGAAGATTAAAACCTTCATCAT +CCTTACGTCAATTGAATTCTCTAGCACTAGAAGCTTATTGTCTTCAATGTAAAAGAAAAG +CTGGCCTAACAAGATGACAACTAGAACAAAGGGCAGGGGCCATACTGTGGCCACGACTCA +AAACGACAGAATGCCAGGCCCTGAGCTTTCGGGCTGGATCTCTGAGCAGCTAATGACCGG +AAGGATTCCTGTAAACGACATCTTCTGTGATATTGAGAACAATCCAGGATTATGCTACGC +ATCCCAAATGCAACAAACGAAGCCAAACCCGAAGATGCGCAACAGTCAAACCCAAACGGA +CCCAATTTGCAATCATAGTTTTGAGGAGGTAGTACAAACATTGGCTTCATTGGCTACTGT +TGTGCAACAACAAACCATCGCATCAGAATCATTAGAACAACGCATTACGAGTCTTGAGAA +TGGTCTAAAGCCAGTTTATGATATGGCAAAAACAATCTCCTCATTGAACAGGGTTTGTGC +TGAGATGGTTGCAAAATATGATCTTCTGGTGATGACAACCGGTCGGGCAACAGCAACCGC +TGCGGCAACTGAGGCTTATTGGGCTGAACATGGTCAACCACCACCTGGACCATCACTTTA +TGAAGAAAGTGCGATTCGGGGTAAGATTGAATCTAGAGATGAGACTGTCCCTCAAAGTGT +TAGGGAGGCATTCAACAATCTAGACAGTACCACTTCACTAACTGAGGAAAATTTTGGGAA +ACCTGACATTTCGGCAAAGGATTTGAGAAACATTATGTATGATCACTTGCCTGGTTTTGG +AACTGCTTTCCACCAATTAGTACAAGTGATTTGTAAATTGGGAAAAGATAGCAATTCATT +GGACATTATTCATGCTGAGTTCCAGGCCAGCCTGGCTGAAGGAGACTCCCCTCAATGTGC +CCTAATTCAAATTACAAAAAGAGTTCCAATCTTCCAAGATGCTGCTCCACCTGTCATCCA +CATCCGCTCTCGAGGTGACATTCCCCGAGCTTGCCAGAAGAGCTTGCGTCCAGTCCCACC +ATCACCCAAGATTGATCGAGGTTGGGTATGTGTTTTTCAGCTTCAAGATGGTAAAACACT +TGGACTCAAAATTTGAGCCAATCTCTTTTCCCTCCGAAAGAGGCAACTAATAGCAGAGGC +TTCAACTGCTGAACTATAGGGTATGTTACATTAATGATACACTTGTGAGTATCAGCCCTA +GATAATATAAGTCAATTAAACAACCAAGATAAAATTGTTCATATCCCGCTAGCAGCTTTA +AAGATAAATGTAATAGGAGCTATACCTCTGACAGTATTATAATTAATTGTTATTAAGTAA +CCCAAACCAAAAATGATGAAGATTAAGAAAAACCTACCTCGACTGAGAGAGTGTTTTTTC +ATTAACCTTCATCTTGTAAACGTTGAGCAAAATTGTTAAAAATATGAGGCGGGTTATATT +GCCTACTGCTCCTCCTGAATATATGGAGGCCATATACCCTGCCAGGTCAAATTCAACAAT +TGCTAGGGGTGGCAACAGCAATACAGGCTTCCTGACACCGGAGTCAGTCAATGGAGACAC +TCCATCGAATCCACTCAGGCCAATTGCTGATGACACCATCGACCATGCCAGCCACACACC +AGGCAGTGTGTCATCAGCATTCATCCTCGAAGCTATGGTGAATGTCATATCGGGCCCCAA +AGTGCTAATGAAGCAAATTCCAATTTGGCTTCCTCTAGGTGTCGCTGATCAAAAGACCTA +CAGCTTTGACTCAACTACGGCCGCCATCATGCTTGCTTCATATACTATCACCCATTTCGG +CAAGGCAACCAATCCGCTTGTCAGAGTCAATCGGCTGGGTCCTGGAATCCCGGATCACCC +CCTCAGGCTCCTGCGAATTGGAAACCAGGCTTTCCTCCAGGAGTTCGTTCTTCCACCAGT +CCAACTACCCCAGTATTTCACCTTTGATTTGACAGCACTCAAACTGATCACTCAACCACT +GCCTGCTGCAACATGGACCGATGACACTCCAACTGGATCAAATGGAGCGTTGCGTCCAGG +AATTTCATTTCATCCAAAACTTCGCCCCATTCTTTTACCCAACAAAAGTGGGAAGAAGGG +GAACAGTGCCGATCTAACATCTCCGGAGAAAATCCAAGCAATAATGACTTCACTCCAGGA +CTTTAAGATCGTTCCAATTGATCCAACCAAAAATATCATGGGTATCGAAGTGCCAGAAAC +TCTGGTCCACAAGCTGACCGGTAAGAAGGTGACTTCCAAAAATGGACAACCAATCATCCC +TGTTCTTTTGCCAAAGTACATTGGGTTGGACCCGGTGGCTCCAGGAGACCTCACCATGGT +AATCACACAGGATTGTGACACGTGTCATTCTCCTGCAAGTCTTCCAGCTGTGGTTGAGAA +GTAATTGCAATAATTGACTCAGATCCAGTTTTACAGAATCTTCTCAGGGATAGTGATAAC +ATCTTTTTAATAATCCGTCTACTAGAAGAGATACTTCTAATTGATCAATATACTAAAGGT +GCTTTACACCATTGTCTCTTTTCTCTCCTAAATGTAGAGCTTAACAAAAGACTCATAATA +TACCTGTTTTTAAAAGATTGATTGATGAAAGATCATGACTAATAACATTACAAACAATCC +TACTATAATCAATACGGTGATTCAAATGTCAATCTTTCTCATTGCACATACTCTTTGTCC +TTATCCTCAAATTGCCTACATGCTTACATCTGAGGACAGCCAGTGTGACTTGGATTGGAG +ATGTGGAGGAAAAATCGGGGCCCATTTCTAAGTTGTTCACAATCTAAGTACAGACATTGC +TCTTCTAATTAAGAAAAAATCGGCGATGAAGATTAAGCCGACAGTGAGCGTAATCTTCAT +CTCTCTTAGATTATTTGTCTTCCAGAGTAGGGGTCATCAGGTCCTTTTCAATTGGATAAC +CAAAATAAGCTTCACTAGAAGGATATTGTGAGGCGACAACACAATGGGTGTTACAGGAAT +ATTGCAGTTACCTCGTGATCGATTCAAGAGGACATCATTCTTTCTTTGGGTAATTATCCT +TTTCCAAAGAACATTTTCCATCCCGCTTGGAGTTATCCACAATAGTACATTACAGGTTAG +TGATGTCGACAAACTAGTTTGTCGTGACAAACTGTCATCCACAAATCAATTGAGATCAGT +TGGACTGAATCTCGAGGGGAATGGAGTGGCAACTGANNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNTTCCTCAATCAGATAATGAGGAAGCTTCAACCAACCCGGGGACATGCTCATGGT +CTGATGAGGGTACCCCTTAATAAGGCTGACTAAAACACTATATAACCTTCTACTTGATCA +CAATACTCCGTATACCTATCATCATATATTTAATCAAGACGATATCCTTTAAAACTTATT +CAGTACTATAATCACTCTCATTTCAAATTGATAAGATATGCATAATTGCCTTAATATATA +AAGAGGTATGATATAACCCAAACATTGACCAAAGAAAATCATAATCTCGTATCGCTCGCA +ATATAACCTGCCAAGCATACCTCTTGCACAAAGTGATTCTTGTACACAAATAATGTTTGA +CTCTACAGGAGGTAGCAACGATCCATCTCATCAAAAAATAAGTATTTTATGATTTACTAA +TGATCTCTTAAAATATTAAGAAAAACTGACGGAACATAAATTCTTTCTGCTTCAAGTTGT +GGAGGAGGTCTATGGTATTCGCTATTGTTATATTACAATCAATAACAAGCTTGTAAAAAT +ATTGTTCTTGTTTCAGGAGGTATATTGTGACCGGAAAAGCTAAACTAATGATGAAGATTA +ATGCGGAGGTTTGATGAGAATAAACCTTATTATTCAGATTAGGCCCCAAGAGGCATTCTT +CATCTCCTTTTAGCAAAATACTATTTCAGGATAGTCCAGCTAGTGACACGTCTTTTAGCT +GTATACCAGTTGCCCCTGAGATACGCCACAAAAGTGTCTCTGAGCTAAAGTGGTCTGTAC +ACATCTCATACATTGTATTAGGGGCAATAATATCTAATTGAACTTAGCCATTTAAAATTT +AGTGCATAAATCTGGGCTAACTCCACCAGGTCAACTCCATTGGCTGAAAAGAAGCCCACC +TACAACGAACATTACTTTGAGCACCCTCACAATTAAAAAATAAGAGCGTCGTTCCAACAA +TCGAGCGCAAGGTTACAAGGTTGAACTGAGAGTGTCTAGACAACAAAATATCGATACTCC +AGACACCAAGCAAGACCTGAGAAAAAACCATGGCCAAAGCTACGGGACGATACAATCTAA +TATCGCCCAAAAAGGACCTGGAGAAAGGGGTTGTCTTAAGCGACCTCTGTAACTTCTTAG +TTAGTCAAACTATTCAAGGGTGGAAAGTTTATTGGGCTGGTATTGAGTTTGATGTGACTC +ACAAAGGAATGGCCCTATTGCATAGACTGAAAACTAATGACTTTGCCCCTGCATGGTCAA +TGACAAGGAACCTATTTCCCCATTTATTTCAAAATCCGAATTCCACTATTGAATCACCGC +TGTGGGCACTGAGAGTCATCCTTGCAGCAGGGATACAGGACCAGTTAATTGACCAGTCTT +TGATTGAACCCTTAGCAGGAGCCCTTGGTCTGATCTCTGATTGGCTGCTAACAACCAACA +CTAACCATTTCAACATGCGAACACAACGTGTCAAGGAACAATTGAGCCTAAAAATGCTGT +CGTTGATTCGATCCAATATTCTCAAGTTTATTAACAAATTGGATGCTCTACATGTCGTGA +ACTACAATGGATTATTGAGCAGTATTGAAATTGGAACTCAAAATCATACAATCATCATAA +CTCGAACTAACATGGGTTTTCTGGTGGAGCTCCAAGAACCCGACAAATCGGCAATGAACC +GCAAGAAGCCTGGGCCGGCGAAATTTTCCCTCCTTCATGAGTCCACACTGAAAGCATTTA +CACAAGGGTCCTCGACACGAATGCAAAGTTTAATTCTTGAATTCAATAGCTCTCTTGCTA +TCTAACTAAGATGGAATACTTCATATTGGGCTAACTCATATATGCTGACTCAATAGTTAA +CTTGACATCTCTGCCTTCATAATCAGATATATAAGCATAATAAATAAATACTCATATTTC +TTGATAATTTGTTTAACCACAGATAAATCCTCACTGTAAGCCAGCTTCCAAGTTGACACC +CTTACAAAAACCAGGACTCAGAATCCCTCAAATAAGAGATTCCAAGACAACATCATAGAA +TTGCTTTATTATATTAATAAGCATTTTATCACTAGAAATCCAATATACGAAATGGTTAAT +TGTAACTAAACCCGCAGGTCATGTGTGTTAGGTTTCACAAATTATATATATTACTAACTC +CATACTCGTAACTAACATTAGATAAGTAGGTTAAGAAAAAAGCTTGAGGAAGATTAAGAA +AAACTGCTTATTGGGTCTTTCCGTGTTTTAGATGAAGCAGTTGACATTCTTCCTCTTGAT +ATTAAATGGCTACACAACATACCCAATACCCAGACGCCAGGTTATCATCACCAATTGTAT +TGGACCAATGTGACCTTGTCACTAGAGCTTGCGGGTTGTATTCATCATACTCCCTTAATC +CGCAACTACGCAACTGTAAACTCCCGAAACATATATACCGTTTAAAATATGATGTAACTG +TTACCAAGTTCTTAAGTGATGTACCAGTGGCGACATTGCCCATAGATTTCATAGTCCCAA +TTCTTCTCAAGGCACTATCAGGCAATGGGTTCTGTCCTGTTGAGCCGCGGTGCCAACAGT +TCTTAGATGAAATTATTAAGTACACAATGCAAGATGCTCTCTTCCTGAAATATTATCTCA +AAAATGTGGGTGCTCAAGAAGACTGTGTTGATGACCACTTTCAAGAAAAAATCTTATCTT +CAATTCAGGGCAATGAATTTTTACATCAAATGTTTTTCTGGTATGACCTGGCTATTTTAA +CTCGAAGGGGTAGATTAAATCGAGGAAACTCTAGATCAACGTGGTTTGTTCATGATGATT +TAATAGACATCTTAGGCTATGGGGACTATGTTTTTTGGAAGATCCCAATTTCACTGTTAC +CACTGAACACACAAGGAATCCCCCATGCTGCTATGGATTGGTATCAGACATCAGTATTCA +AAGAAGCGGTTCAAGGGCATACACACATTGTTTCTGTTTCTACTGCCGATGTCTTGATAA +TGTGCAAAGATTTAATTACATGTCGATTCAACACAACTCTAATCTCAAAAATAGCAGAGG +TTGAGGACCCATTTTGCTCTGATTATCCCAATTTTAAGATTGTGTCTATGCTTTACCAGA +GCGGAGATTACTTACTCTCCATATTAGGGTCTGATGGGTATAAAATCATTAAGTTTCTCG +AACCATTGTGCTTGGCTAAAATTCAATTGTGCTCAAAGTACACCGAGAGGAAGGGCCGAT +TCTTAACACAAATGCATTTAGCTGTAAATCACACCCTGGAAGAAATTACAGAAATACGTG +CACTAAAGCCTTCACAGGCTCACAAGATCCGTGAATTCCATAGAACATTGATAAGGCTGG +AGATGACGCCACAACAACTTTGTGAGCTATTTTCCATACAAAAACACTGGGGGCATCCTG +TGCTACATAGTGAAACAGCAATCCAAAAAGTTAAAAAACATGCTACGGTGCTAAAAGCAT +TACGCCCTATCGTGATTTTCGAGACATATTGTGTTTTTAAATATAGCATTGCAAAACATT +ATTTTGATAGTCAAGGATCTTGGTACAGTGTTACCTCAGATAGAAATCTAACACCAGGTC +TTAATTCTTATATCAAAAGAAATCAATTCCCTCCGTTGCCAATGATTAAAGAACTGCTAT +GGGAATTTTACCACCTTGACCATCCTCCACTTTTCTCAACCAAAATTATTAGTGACTTAA +GTATTTTTATAAAAGACAGAGCTACTGCAGTAGAAAGGACATGCTGGGATGCAGTATTCG +AGCCTAATGTTCTGGGATATAATCCACCTCACAAATTCAGTACCAAACGTGTACCGGAAC +AATTTTTAGAGCAAGAAAACTTTTCTATTGAGAATGTTCTTTCCTACGCGCAAAAACTCG +AGTATCTACTACCACAATATCGGAATTTTTCTTTCTCATTGAAAGAGAAAGAGTTGAATG +TAGGTAGAACTTTCGGAAAATTGCCTTATCCGACTCGCAATGTTCAAACACTTTGTGAAG +CTCTGTTAGCTGATGGTCTTGCTAAAGCATTTCCTAGCAATATGATGGTAGTTACGGAAC +GTGAACAAAAAGAAAGCTTATTGCATCAAGCATCATGGCACCACACAAGTGATGATTTCG +GTGAGCATGCCACAGTTAGAGGGAGTAGCTTTGTAACTGATTTAGAGAAATACAATCTTG +CATTTAGGTATGAGTTTACAGCACCTTTTATAGAATATTGCAACCGTTGCTATGGTGTTA +AGAATGTTTTTAATTGGATGCATTATACAATCCCACAGTGTTATATGCATGTCAGTGATT +ATTATAATCCACCGCATAACCTCACACTGGAAAATCGAAACAACCCCCCTGAAGGGCCTA +GTTCATACAGGGGTCATATGGGAGGGATTGAAGGACTGCAACAAAAACTCTGGACAAGTA +TTTCATGTGCTCAAATTTCTTTAGTTGAAATTAAGACTGGTTTTAAGTTGCGCTCAGCTG +TGATGGGTGACAATCAGTGCATTACCGTTTTATCAGTCTTCCCCTTAGAGACTGATGCAG +GCGAGCAGGAACAGAGCGCCGAGGACAATGCAGCGAGGGTGGCCGCCAGCCTAGCAAAAG +TTACAAGTGCCTGTGGAATCTTTTTAAAACCTGATGAAACATTTGTACATTCAGGTTTTA +TCTATTTTGGAAAAAAACAATATTTGAATGGGGTCCAATTGCCTCAGTCCCTTAAAACGG +CTACAAGAATGGCACCATTGTCTGATGCAATTTTTGATGATCTTCAAGGGACCCTGGCTA +GTATAGGTACTGCTTTTGAGCGATCCATCTCTGAGACACGACATATCTTTCCTTGCAGAA +TAACCGCAGCTTTCCATACGTTCTTTTCGGTGAGAATCTTGCAATATCATCACCTCGGAT +TTAATAAAGGTTTTGACCTTGGACAGTTAACACTCGGCAAACCTCTGGATTTCGGAACAA +TATCATTGGCACTAGCGGTACCGCAGGTGCTTGGAGGGTTATCCTTCTTGAATCCTGAGA +AATGTTTCTACCGGAATCTAGGAGATCCAGTTACCTCAGGTTTATTCCAGTTAAAAACTT +ATCTCCGAATGATTGAGATGGATGATTTATTCTTACCTTTAATTGCGAAGAACCCTGGGA +ACTGCACTGCCATTGACTTTGTGCTAAATCCTAGCGGATTAAATGTTCCTGGGTCGCAAG +ACTTAACTTCATTTCTGCGCCAGATTGTACGTAGGACTATCACCCTAAGTGCGAAAAACA +AACTTATTAATACCTTATTTCATGCATCAGCTGACTTCGAAGACGAAATGGTTTGTAAGT +GGCTCTTATCATCAACTCCTGTTATGAGTCGTTTCGCAGCCGATATATTTTCACGCACGC +CGAGCGGGAAGCGATTGCAAATTCTAGGATACTTGGAAGGAACACGCACATTATTAGCCT +CTAAGATCATCAACAATAATACAGAGACGCCGGTTTTGGACAGACTGAGGAAGATACATT +GCAAAGGTGGAGTCTATGGTTTAGTTATCTTGATCATTGTGATAATATCCTGGCGGAGGC +TTTAACCCAAATAACTTGCACAGTTGATTTAGCACAGATCCTGAGGGAATATTCATGGGC +ACATATTTTAGAGGGGAGACCTCTTATTGGAGCCACACTCCCATGTATGATTGAGCAATT +CAAAGTGGTTTGGCTGAAACCCTACGAACAATGTCCGCAGTGTTCAAATGCCAAGCAACC +TGGTGGGAAACCATTCGTGTCAGTAGCAGTCAAGAAACATATTGTTAGTGCATGGCCAAA +TGCATCCCGAATAAGCTGGACTATCGGGGATGGAATCCCATACATTGGATCAAGGACAGA +AGATAAGATAGGGCAACCTGCTATTAAACCAAAATGTCCTTCCGCAGCCTTAAGAGAGGC +CATTGAATTGGCGTCCCGTTTAACATGGGTAACTCAAGGCAGTTCGAACAGTGACTTGCT +AATAAAACCATTTTTGGAAGCACGAGTAAATTTAAGTGTTCAAGAAATACTTCAAATGAC +CCCTTCACATTACTCGGGAAATATTGTTCATAGGTACAACGATCAATACAGTCCTCATTC +TTTCATGGCCAATCGTATGAGTAACTCAGCAACGCGATTGATTGTTTCTACAAACACTTT +AGGTGAGTTTTCAGGAGGTGGCCAATCGGCACGCGACAGCAATATTATTTTCCAGAATGT +TATAAATTATGCAGTTGCACTGTTCGATATTAAATTTAGAAACACTGAGGCTACAGATAT +CCAGTATAATCGTGCTCACCTTCATCTAACTAAGTGTTGCACCCGGGAGGTACCAGCTCA +GTACTTAACATACACATCTACATTGGATTTAGATTTAACAAGATACCGAGAAAATGAATT +GATTTATGACAATAATCCTCTAAAAGGAGGACTCAATTGCAATATCTCATTTGATAACCC +ATTTTTCCAAGGCAAACAGCTGAACATTATAGAAGATGACCTTATTCGACTGCCTCACTT +ATCTGGATGGGAGCTAGCTAAGACCATCATGCAATCAATTATTTCAGATAGCAATAATTC +GTCTACAGACCCAATTAGCAGTGGAGAAACAAGATCATTCACTACCCATTTCTTAACTTA +TCCCAAAATAGGACTTCTGTACAGTTTTGGGGCCTTTGTAAGTTATTATCTTGGCAATAC +AATTCTTCGGACTAAGAAATTAACACTTGACAATTTTTTATATTACTTAACTACCCAAAT +TCATAATCTACCACATCGCTCATTGCGAATACTTAAGCCAACATTCAAACATGCAAGCGT +TATGTCACGATTAATGAGTATTGATCCCCATTTTTCTATTTACATAGGCGGTGCTGCAGG +TGACAGAGGACTCTCAGATGCGGCCAGGTTATTTTTGAGAACGTCCATTTCATCTTTTCT +TACATTTGTAAAGGAATGGATAATTAATCGCGGAACAATTGTCCCTTTATGGATAGTATA +TCCATTAGAGGGTCAAAATCCAACACCTGTTAATAATTTCCTCCATCAGATCGTAGAACT +GCTGGTGCATGATTCATCAAGACACCAGGCTTTTAAAACTACCATAAATGATCATGTACA +TCCTCACGACAATCTTGTTTACACATGTAAGAGTACAGCCAGCAATTTCTTCCATGCGTC +ATTGGCGTACTGGAGGAGCAGGCACAGAAACAGCAACCGAAAAGACTTGACAAGAAACTC +TTCAACTGGATCAAGCACAAACAACAGTGATGGTCATATTAAGAGAAGTCAAGAACAAAC +CACCAGAGATCCACATGATGGCACTGAACGGAGTCTAGTCCTGCAAATGAGCCATGAAAT +AAAAAGAACGACAATTCCACAAGAGAACACGCACCAGGGTCCGTCGTTCCAGTCATTTCT +AAGTGACTCTGCTTGCGGTACAGCAAACCCAAAACTAAATTTCGATAGATCGAGACACAA +TGTGAAATCTCAGGATCATAACTCAGCATCCAAGAGGGAAGGTCATCAAATAATCTCACA +TCGTCTAGTCCTACCTTTCTTTACATTATCTCAAGGGACACGCCAATTAACGTCATCCAA +TGAGTCACAAACCCAAGATGAGATATCAAAGTACTTACGGCAATTGAGATCCGTCATTGA +TACCACAGTTTATTGTAGGTTTACCGGTATAGTCTCGTCCATGCATTACAAACTTGATGA +GGTCCTTTGGGAAATAGAGAATTTTAAGTCGGCTGTGACGCTGGCAGAGGGAGAAGGTGC +TGGTGCCTTACTATTGATTCAGAAATACCAAGTTAAGACCTTATTCTTCAACACGCTAGC +TACTGAGTCCAGTATAGAGTCAGAAATAGTATCAGGAATGACTACTCCTAGGATGCTTCT +ACCTGTTATGTCAAAATTCCATAATGACCAAATTGAGATTATTCTTAACAACTCAGCAAG +CCAAATAACAGACATAACAAATCCTACTTGGTTTAAAGACCAAAGAGCAAGGCTACCTAG +GCAAGTCGAGGTTATAACCATGGATGCAGAGACGACAGAGAATATAAACAGATCGAAATT +GTACGAAGCTGTACATAAATTGATCTTACACCATGTTGATCCCAGCGTGTTGAAAGCAGT +GGTCCTTAAAGTCTTTCTAAGTGATACCGAGGGTATGTTATGGCTAAATGATAATCTAGC +CCCGTTTTTTGCCACTGGGTATTTAATTAAGCCAATAACGTCAAGTGCCAGGTCTAGTGA +GTGGTATCTTTGTCTGACGAACTTCTTATCAACTACACGTAAGATGCCACACCAAAACCA +TCTCAGTTGTAAGCAGGTAATACTTACGGCATTGCAACTGCAAATTCAACGGAGCCCATA +CTGGCTAAGTCATTTAACTCAGTATGCTGACTGCGATTTACATTTAAGCTATATCCGCCT +TGGTTTTCCATCATTAGAGAAAGTACTATACCACAGGTATAACCTTGTCGATTCAAAAAG +AGGTCCACTAGTCTCTGTCACTCAGCACTTAGCACATCTTAGGGCAGAGATTCGAGAATT +GACCAATGATTATAATCAACAGCGACAAAGTCGGACTCAAACATATCACTTTATTCGTAC +TGCAAAAGGACGAATCACAAAACTAGTCAATGATTATTTAAAATTCTTTCTTATTGTACA +AGCATTAAAACATAATGGGACATGGCAAGCTGAGTTTAAGAAATTACCAGAGTTGATTAG +TGTGTGCAATAGGTTCTATCATATTAGAGATTGTAATTGTGAAGAACGTTTCTTAGTTCA +AACCTTATATTTACATAGAATGCAGGATTCTGAAGTTAAGCTTATTGAAAGGCTGACAGG +GCTTCTGAGTTTATTTCCAGATGGTCTCTACAGGTTCGATTGAATAACCGTGCATAGTAT +TTTGATACTTGTAAAGGTTGGTTATCAACATACAGATTATAAAAAACTCATAAATTGCTC +TCATACATCATCTTGATCTGATTTCAATAAATAACTATTTAGATAACGAAAGGAGTCCTT +ACATTATACACTATATTTGGCCTCTCTCCCTGCGTGATAATCAAAAAATTCACAATACAG +CATGTGTGACATATTACTGCTGCAATGAGTCTAACGCAACATAATAAACTCCGCACTCTT +TATAATTAAGCTTTAACGATAGGTCTGGGCTCATATTGTTATTGATATAGTAATGTTGTA +TCAATATCTTGCCAGATGGAATAGTGCTTTGGTTGATAACACGACTTCTTAAAACAAAAC +TTAATCTTAAAGATCAGTTTT diff --git a/test/input/TestOrderAndOrient/expected.lasv.ambig.fasta b/test/input/TestOrderAndOrient/expected.lasv.ambig.fasta index f811a8e53..bcca56396 100644 --- a/test/input/TestOrderAndOrient/expected.lasv.ambig.fasta +++ b/test/input/TestOrderAndOrient/expected.lasv.ambig.fasta @@ -101,10 +101,10 @@ CTGTCTTTAGCTCTCTTCCTACTGTCTATCCACCGTTTNNNNNNNNNNNNNNNNNNNNNN NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNTCAGTTTTAACAAATTGACTTTCTATGTTCCCTTGCCTTAGTTTGTTCCTA +NNNCAACACTGCATCTGCTCACTTTGAGAGTCAGCAGCCCTTTCTTCACCAGAATCTAAA +TCTGCATATAAGAACTTAAGGACTGGAGAGGCTCTTCGGAACTGGTATGTTAACTGCTCA +ATATTGTCATCACAAGTGTCTATTTTATCACTATATAGCTTTCTGAAATCCCCTAACAGC +TTCATCTTATCAGTTTTAACAAATTGACTTTCTATGTTCCCTTGCCTTAGTTTGTTCCTA AAGATTTGATACTCCTCTTCAATTAAACTCTTGACTTCATGAGAGGTGAGCTTGTTATTA ATGCCCTGATGACAACAAGAAATTATTTCCTCAAAGTGTTTTGCACGCTTGTCAGTTAGA ACATTAATGCTTTCAATCCCGGAGAGCCTCCCAGACGTTAAGGCTAAAGATTCACAAAGT diff --git a/test/input/TestOrderAndOrient/ref.ebov.makona_C15.fasta b/test/input/TestOrderAndOrient/ref.ebov.makona_C15.fasta new file mode 100644 index 000000000..3ec1a60e7 --- /dev/null +++ b/test/input/TestOrderAndOrient/ref.ebov.makona_C15.fasta @@ -0,0 +1,273 @@ +>KJ660346.2 Zaire ebolavirus isolate H.sapiens-wt/GIN/2014/Makona-Kissidougou-C15, complete genome +CGGACACACAAAAAGAAAGAAGAATTTTTAGGATCTTTTGTGTGCGAATAACTATGAGGAAGATTAATAA +TTTTCCTCTCATTGAAATTTATATCGGAATTTAAATTGAAATTGTTACTGTAATCATACCTGGTTTGTTT +CAGAGCCATATCACCAAGATAGAGAACAACCTAGGTCTCCGGAGGGGGCAAGGGCATCAGTGTGCTCAGT +TGAAAATCCCTTGTCAACATCTAGGCCTTATCACATCACAAGTTCCGCCTTAAACTCTGCAGGGTGATCC +AACAACCTTAATAGCAACATTATTGTTAAAGGACAGCATTAGTTCACAGTCAAACAAGCAAGATTGAGAA +TTAACTTTGATTTTGAACCTGAACACCCAGAGGACTGGAGACTCAACAACCCTAAAGCCTGGGGTAAAAC +ATTAGAAATAGTTTAAAGACAAATTGCTCGGAATCACAAAATTCCGAGTATGGATTCTCGTCCTCAGAAA +GTCTGGATGACGCCGAGTCTCACTGAATCTGACATGGATTACCACAAGATCTTGACAGCAGGTCTGTCCG +TTCAACAGGGGATTGTTCGGCAAAGAGTCATCCCAGTGTATCAAGTAAACAATCTTGAGGAAATTTGCCA +ACTTATCATACAGGCCTTTGAAGCTGGTGTTGATTTTCAAGAGAGTGCGGACAGTTTCCTTCTCATGCTT +TGTCTTCATCATGCGTACCAAGGAGATTACAAACTTTTCTTGGAAAGTGGCGCAGTCAAGTATTTGGAAG +GGCACGGGTTCCGTTTTGAAGTCAAGAAGCGTGATGGAGTGAAGCGCCTTGAGGAATTGCTGCCAGCAGT +ATCTAGTGGGAGAAACATTAAGAGAACACTTGCTGCCATGCCGGAAGAGGAGACGACTGAAGCTAATGCC +GGTCAGTTCCTCTCCTTTGCAAGTCTATTCCTTCCGAAATTGGTAGTAGGAGAAAAGGCTTGCCTTGAGA +AGGTTCAAAGGCAAATTCAAGTACATGCAGAGCAAGGACTGATACAATATCCAACAGCTTGGCAATCAGT +AGGACACATGATGGTGATTTTCCGTTTGATGCGAACAAATTTTTTGATCAAATTTCTTCTAATACACCAA +GGGATGCACATGGTTGCCGGACATGATGCCAACGATGCTGTGATTTCAAATTCAGTGGCTCAAGCTCGTT +TTTCAGGTCTATTGATTGTCAAAACAGTACTTGATCATATCCTACAAAAGACAGAACGAGGAGTTCGTCT +CCATCCTCTTGCAAGGACCGCCAAGGTAAAAAATGAGGTGAACTCCTTCAAGGCTGCACTCAGCTCCCTG +GCCAAGCATGGAGAGTATGCTCCTTTCGCCCGACTTTTGAACCTTTCTGGAGTAAATAATCTTGAGCATG +GTCTTTTCCCTCAACTGTCGGCAATTGCACTCGGAGTCGCCACAGCCCACGGGAGCACCCTCGCAGGAGT +AAATGTTGGAGAACAGTATCAACAGCTCAGAGAGGCAGCCACTGAGGCTGAGAAGCAACTCCAACAATAT +GCGGAGTCTCGTGAACTTGACCATCTTGGACTTGATGATCAGGAAAAGAAAATTCTTATGAACTTCCATC +AGAAAAAGAACGAAATCAGCTTCCAGCAAACAAACGCGATGGTAACTCTAAGAAAAGAGCGCCTGGCCAA +GCTGACAGAAGCTATCACTGCTGCATCACTGCCCAAAACAAGTGGACATTACGATGATGATGACGACATT +CCCTTTCCAGGACCCATCAATGATGACGACAATCCTGGCCATCAAGATGATGATCCGACTGACTCACAGG +ATACGACCATTCCCGATGTGGTAGTTGATCCCGATGATGGAGGCTACGGCGAATACCAAAGTTACTCGGA +AAACGGCATGAGTGCACCAGATGACTTGGTCCTATTCGATCTAGACGAGGACGACGAGGACACCAAGCCA +GTGCCTAACAGATCGACCAAGGGTGGACAACAGAAAAACAGTCAAAAGGGCCAGCATACAGAGGGCAGAC +AGACACAATCCACGCCAACTCAAAACGTCACAGGCCCTCGCAGAACAATCCACCATGCCAGTGCTCCACT +CACGGACAATGACAGAAGAAACGAACCCTCCGGCTCAACCAGCCCTCGCATGCTGACCCCAATCAACGAA +GAGGCAGACCCACTGGACGATGCCGACGACGAGACGTCTAGCCTTCCGCCCTTAGAGTCAGATGATGAAG +AACAGGACAGGGACGGAACTTCTAACCGCACACCCACTGTCGCCCCACCGGCTCCCGTATACAGAGATCA +CTCCGAAAAGAAAGAACTCCCGCAAGATGAACAACAAGATCAGGACCACATTCAAGAGGCCAGGAACCAA +GACAGTGACAACACCCAGCCAGAACATTCTTTTGAGGAGATGTATCGCCACATTCTAAGATCACAGGGGC +CATTTGATGCCGTTTTGTATTATCATATGATGAAGGATGAGCCTGTAGTTTTCAGTACCAGTGATGGTAA +AGAGTACACGTATCCGGACTCCCTTGAAGAGGAATATCCACCATGGCTCACTGAAAAAGAGGCCATGAAT +GATGAGAATAGATTTGTTACACTGGATGGTCAACAATTTTATTGGCCAGTAATGAATCACAGGAATAAAT +TCATGGCAATCCTGCAACATCATCAGTGAATGAGCATGTAATAATGGGATGATTTAATCGACAAATAGCT +AACATTAAATAGTCAAGGAACGCAAACAGGAAGAATTTTTGATGTCTAAGGTGTGAATTATTATCACAAT +AAAAGTGATTCTTAGTTTTGAATTTAAAGCTAGCTTATTATTACTAGCCGTTTTTCAAAGTTCAATTTGA +GTCTTAATGCAAATAAGCGTTAAGCCACAGTTATAGCCATAATGGTAACTCAATATCTTAGCCAGCGATT +TATCTAAATTAAATTACATTATGCTTTTATAACTTACCTACTAGCCTGCCCAACATTTACACGATCGTTT +TATAATTAAGAAAAAACTAATGATGAAGATTAAAACCTTCATCATCCTTACGTCAATTGAATTCTCTAGC +ACTAGAAGCTTATTGTCTTCAATGTAAAAGAAAAGCTGGCCTAACAAGATGACAACTAGAACAAAGGGCA +GGGGCCATACTGTGGCCACGACTCAAAACGACAGAATGCCAGGCCCTGAGCTTTCGGGCTGGATCTCTGA +GCAGCTAATGACCGGAAGGATTCCTGTAAACGACATCTTCTGTGATATTGAGAACAATCCAGGATTATGC +TACGCATCCCAAATGCAACAAACGAAGCCAAACCCGAAGATGCGCAACAGTCAAACCCAAACGGACCCAA +TTTGCAATCATAGTTTTGAGGAGGTAGTACAAACATTGGCTTCATTGGCTACTGTTGTGCAACAACAAAC +CATCGCATCAGAATCATTAGAACAACGCATTACGAGTCTTGAGAATGGTCTAAAGCCAGTTTATGATATG +GCAAAAACAATCTCCTCATTGAACAGGGTTTGTGCTGAGATGGTTGCAAAATATGATCTTCTGGTGATGA +CAACCGGTCGGGCAACAGCAACCGCTGCGGCAACTGAGGCTTATTGGGCTGAACATGGTCAACCACCACC +TGGACCATCACTTTATGAAGAAAGTGCGATTCGGGGTAAGATTGAATCTAGAGATGAGACTGTCCCTCAA +AGTGTTAGGGAGGCATTCAACAATCTAGACAGTACCACTTCACTAACTGAGGAAAATTTTGGGAAACCTG +ACATTTCGGCAAAGGATTTGAGAAACATTATGTATGATCACTTGCCTGGTTTTGGAACTGCTTTCCACCA +ATTAGTACAAGTGATTTGTAAATTGGGAAAAGATAGCAATTCATTGGACATTATTCATGCTGAGTTCCAG +GCCAGCCTGGCTGAAGGAGACTCCCCTCAATGTGCCCTAATTCAAATTACAAAAAGAGTTCCAATCTTCC +AAGATGCTGCTCCACCTGTCATCCACATCCGCTCTCGAGGTGACATTCCCCGAGCTTGCCAGAAGAGCTT +GCGTCCAGTCCCACCATCACCCAAGATTGATCGAGGTTGGGTATGTGTTTTTCAGCTTCAAGATGGTAAA +ACACTTGGACTCAAAATTTGAGCCAATCTCTTTTCCCTCCGAAAGAGGCAACTAATAGCAGAGGCTTCAA +CTGCTGAACTATAGGGTATGTTACATTAATGATACACTTGTGAGTATCAGCCCTAGATAATATAAGTCAA +TTAAACAACCAAGATAAAATTGTTCATATCCCGCTAGCAGCTTTAAAGATAAATGTAATAGGAGCTATAC +CTCTGACAGTATTATAATTAATTGTTATTAAGTAACCCAAACCAAAAATGATGAAGATTAAGAAAAACCT +ACCTCGACTGAGAGAGTGTTTTTTCATTAACCTTCATCTTGTAAACGTTGAGCAAAATTGTTAAAAATAT +GAGGCGGGTTATATTGCCTACTGCTCCTCCTGAATATATGGAGGCCATATACCCTGCCAGGTCAAATTCA +ACAATTGCTAGGGGTGGCAACAGCAATACAGGCTTCCTGACACCGGAGTCAGTCAATGGAGACACTCCAT +CGAATCCACTCAGGCCAATTGCTGATGACACCATCGACCATGCCAGCCACACACCAGGCAGTGTGTCATC +AGCATTCATCCTCGAAGCTATGGTGAATGTCATATCGGGCCCCAAAGTGCTAATGAAGCAAATTCCAATT +TGGCTTCCTCTAGGTGTCGCTGATCAAAAGACCTACAGCTTTGACTCAACTACGGCCGCCATCATGCTTG +CTTCATATACTATCACCCATTTCGGCAAGGCAACCAATCCGCTTGTCAGAGTCAATCGGCTGGGTCCTGG +AATCCCGGATCACCCCCTCAGGCTCCTGCGAATTGGAAACCAGGCTTTCCTCCAGGAGTTCGTTCTTCCA +CCAGTCCAACTACCCCAGTATTTCACCTTTGATTTGACAGCACTCAAACTGATCACTCAACCACTGCCTG +CTGCAACATGGACCGATGACACTCCAACTGGATCAAATGGAGCGTTGCGTCCAGGAATTTCATTTCATCC +AAAACTTCGCCCCATTCTTTTACCCAACAAAAGTGGGAAGAAGGGGAACAGTGCCGATCTAACATCTCCG +GAGAAAATCCAAGCAATAATGACTTCACTCCAGGACTTTAAGATCGTTCCAATTGATCCAACCAAAAATA +TCATGGGTATCGAAGTGCCAGAAACTCTGGTCCACAAGCTGACCGGTAAGAAGGTGACTTCCAAAAATGG +ACAACCAATCATCCCTGTTCTTTTGCCAAAGTACATTGGGTTGGACCCGGTGGCTCCAGGAGACCTCACC +ATGGTAATCACACAGGATTGTGACACGTGTCATTCTCCTGCAAGTCTTCCAGCTGTGGTTGAGAAGTAAT +TGCAATAATTGACTCAGATCCAGTTTTACAGAATCTTCTCAGGGATAGTGATAACATCTTTTTAATAATC +CGTCTACTAGAAGAGATACTTCTAATTGATCAATATACTAAAGGTGCTTTACACCATTGTCTCTTTTCTC +TCCTAAATGTAGAGCTTAACAAAAGACTCATAATATACCTGTTTTTAAAAGATTGATTGATGAAAGATCA +TGACTAATAACATTACAAACAATCCTACTATAATCAATACGGTGATTCAAATGTCAATCTTTCTCATTGC +ACATACTCTTTGTCCTTATCCTCAAATTGCCTACATGCTTACATCTGAGGACAGCCAGTGTGACTTGGAT +TGGAGATGTGGAGGAAAAATCGGGGCCCATTTCTAAGTTGTTCACAATCTAAGTACAGACATTGCTCTTC +TAATTAAGAAAAAATCGGCGATGAAGATTAAGCCGACAGTGAGCGTAATCTTCATCTCTCTTAGATTATT +TGTCTTCCAGAGTAGGGGTCATCAGGTCCTTTTCAATTGGATAACCAAAATAAGCTTCACTAGAAGGATA +TTGTGAGGCGACAACACAATGGGTGTTACAGGAATATTGCAGTTACCTCGTGATCGATTCAAGAGGACAT +CATTCTTTCTTTGGGTAATTATCCTTTTCCAAAGAACATTTTCCATCCCGCTTGGAGTTATCCACAATAG +TACATTACAGGTTAGTGATGTCGACAAACTAGTTTGTCGTGACAAACTGTCATCCACAAATCAATTGAGA +TCAGTTGGACTGAATCTCGAGGGGAATGGAGTGGCAACTGACGTGCCATCTGCGACTAAAAGATGGGGCT +TCAGGTCCGGTGTCCCACCAAAGGTGGTCAATTATGAAGCTGGTGAATGGGCTGAAAACTGCTACAATCT +TGAAATCAAAAAACCTGACGGGAGTGAGTGTCTACCAGCAGCGCCAGACGGGATTCGGGGCTTCCCCCGG +TGCCGGTATGTGCACAAAGTATCAGGAACGGGACCATGTGCCGGAGACTTTGCCTTCCACAAAGAGGGTG +CTTTCTTCCTGTATGATCGACTTGCTTCCACAGTTATCTACCGAGGAACGACTTTCGCTGAAGGTGTCGT +TGCATTTCTGATACTGCCCCAAGCTAAGAAGGACTTCTTCAGCTCACACCCCTTGAGAGAGCCGGTCAAT +GCAACGGAGGACCCGTCGAGTGGCTATTATTCTACCACAATTAGATATCAGGCTACCGGTTTTGGAACTA +ATGAGACAGAGTACTTGTTCGAGGTTGACAATTTGACCTACGTCCAACTTGAATCAAGATTCACACCACA +GTTTCTGCTCCAGCTGAATGAGACAATATATGCAAGTGGGAAGAGGAGCAACACCACGGGAAAACTAATT +TGGAAGGTCAACCCCGAAATTGATACAACAATCGGGGAGTGGGCCTTCTGGGAAACTAAAAAAACCTCAC +TAGAAAAATTCGCAGTGAAGAGTTGTCTTTCACAGCTGTATCAAACGGACCCAAAAACATCAGTGGTCAG +AGTCCGGCGCGAACTTCTTCCGACCCAGAGACCAACACAACAAATGAAGACCACAAAATCATGGCTTCAG +AAAATTCCTCTGCAATGGTTCAAGTGCACAGTCAAGGAAGGAAAGCTGCAGTGTCGCATCTGACAACCCT +TGCCACAATCTCCACGAGTCCTCAACCTCCCACAACCAAAACAGGTCCGGACAACAGCACCCATAATACA +CCCGTGTATAAACTTGACATCTCTGAGGCAACTCAAGTTGGACAACATCACCGTAGAGCAGACAACGACA +GCACAGCCTCCGACACTCCCCCCGCCACGACCGCAGCCGGACCCTTAAAAGCAGAGAACACCAACACGAG +TAAGAGCGCTGACTCCCTGGACCTCGCCACCACGACAAGCCCCCAAAACTACAGCGAGACTGCTGGCAAC +AACAACACTCATCACCAAGATACCGGAGAAGAGAGTGCCAGCAGCGGGAAGCTAGGCTTAATTACCAATA +CTATTGCTGGAGTAGCAGGACTGATCACAGGCGGGAGAAGGACTCGAAGAGAAGTAATTGTCAATGCTCA +ACCCAAATGCAACCCCAATTTACATTACTGGACTACTCAGGATGAAGGTGCTGCAATCGGATTGGCCTGG +ATACCATATTTCGGGCCAGCAGCCGAAGGAATTTACACAGAGGGGCTAATGCACAACCAAGATGGTTTAA +TCTGTGGGTTGAGGCAGCTGGCCAACGAAACGACTCAAGCTCTCCAACTGTTCCTGAGAGCCACAACTGA +GCTGCGAACCTTTTCAATCCTCAACCGTAAGGCAATTGACTTCCTGCTGCAGCGATGGGGTGGCACATGC +CACATTTTGGGACCGGACTGCTGTATCGAACCACATGATTGGACCAAGAACATAACAGACAAAATTGATC +AGATTATTCATGATTTTGTTGATAAAACCCTTCCGGACCAGGGGGACAATGACAATTGGTGGACAGGATG +GAGACAATGGATACCGGCAGGTATTGGAGTTACAGGTGTTATAATTGCAGTTATCGCTTTATTCTGTATA +TGCAAATTTGTCTTTTAGTCTTTCTTCAGATTGTTTCACGGCAAAACTCAACCTCAAATCAATGAAACTA +GGATTTAATTATATGAATCACTTGAATCTAAGATTACTTGACAAATGATAACATAATACACTGGAGCTTC +AAACATAGCCAATGTGATTCTAACTCCTTTAAACTCACAGTTAATCATAAACAAGGTTTGACATCAATCT +AGCTATATCTTTAAGAATGATAAACTTGATGAAGATTAAGAAAAAGGTAATCTTTCGATTATCTTTAGTC +TTCATCCTTGATTCTACAATCATGACAGTTGTCTTTAATGAAAAAGGAAAAAAGCCTTTTTATTAAGTTG +TAATAATCAGATCTGCAAACCGGTAGAATTTAGTTGTAACCTAACACACACAAAGCATTGGTAAAAAAGT +CAATAGAAATTTAAACAGTGAGTGCAGACAACTCTTAAATGGAAGCTTCATATGAGAGAGGACGCCCCCG +AGCTGCCAGACAGCATTCAAGGGATGGACACGACCACCATGTTCGAGCACGATCATCATCCAGAGAGAAT +TATCGAGGTGAGTACCGTCAATCAAGGAGCGCCTCACAAGTGCGCGTTCCTACTGTATTTCATAAGAAGA +GAGTTGAACCATTAACAGTTCCTCCAGCACCTAAAGACATATGTCCGACCTTGAAAAAAGGATTTTTGTG +TGACAGTAGTTTTTGCAAAAAAGACCACCAGTTAGAAAGTTTAACTGATAGGGAATTACTCCTACTAATC +GCCCGTAAGACTTGTGGATCAGTAGAACAACAATTAAATATAACTGCACCCAAGGACTCGCGCTTAGCAA +ATCCAACGGCTGATGATTTCCAGCAAGAGGAAGGTCCAAAAATTACCTTGTTGACACTGATCAAGACGGC +AGAACACTGGGCGAGACAAGACATCCGAACCATAGAGGATTCCAAATTAAGGGCATTGTTAACTCTATGT +GCTGTGATGACGAGGAAATTCTCAAAATCCCAGCTGAGTCTTTTGTGTGAGACACACCTAAGGCGCGAAG +GGCTTGGGCAAGATCAGGCAGAACCCGTTCTCGAAGTATATCAACGATTACACAGTGATAAAGGAGGCAG +TTTTGAAGCTGCACTATGGCAACAATGGGACCGACAATCCCTAATTATGTTTATCACTGCATTCTTGAAT +ATCGCTCTCCAGTTACCGTGTGAAAGTTCTGCTGTCGTTGTTTCAGGGTTAAGAACATTGGTTCCTCAAT +CAGATAATGAGGAAGCTTCAACCAACCCGGGGACATGCTCATGGTCTGATGAGGGTACCCCTTAATAAGG +CTGACTAAAACACTATATAACCTTCTACTTGATCACAATACTCCGTATACCTATCATCATATATTTAATC +AAGACGATATCCTTTAAAACTTATTCAGTACTATAATCACTCTCATTTCAAATTGATAAGATATGCATAA +TTGCCTTAATATATAAAGAGGTATGATATAACCCAAACATTGACCAAAGAAAATCATAATCTCGTATCGC +TCGCAATATAACCTGCCAAGCATACCTCTTGCACAAAGTGATTCTTGTACACAAATAATGTTTGACTCTA +CAGGAGGTAGCAACGATCCATCTCATCAAAAAATAAGTATTTTATGATTTACTAATGATCTCTTAAAATA +TTAAGAAAAACTGACGGAACATAAATTCTTTCTGCTTCAAGTTGTGGAGGAGGTCTATGGTATTCGCTAT +TGTTATATTACAATCAATAACAAGCTTGTAAAAATATTGTTCTTGTTTCAGGAGGTATATTGTGACCGGA +AAAGCTAAACTAATGATGAAGATTAATGCGGAGGTCTGATGAGAATAAACCTTATTATTCAGATTAGGCC +CCAAGAGGCATTCTTCATCTCCTTTTAGCAAAATACTATTTCAGGATAGTCCAGCTAGTGACACGTCTTT +TAGCTGTATACCAGTTGCCCCTGAGATACGCCACAAAAGTGTCTCTGAGCTAAAGTGGTCTGTACACATC +TCATACATTGTATTAGGGGCAATAATATCTAATTGAACTTAGCCATTTAAAATTTAGTGCATAAATCTGG +GCTAACTCCACCAGGTCAACTCCATTGGCTGAAAAGAAGCCCACCTACAACGAACATTACTTTGAGCGCC +CTCACAATTAAAAAATAAGAGCGTCGTTCCAACAATCGAGCGCAAGGTTACAAGGTTGAACTGAGAGTGT +CTAGACAACAAAATATCGATACTCCAGACACCAAGCAAGACCTGAGAAAAAACCATGGCCAAAGCTACGG +GACGATACAATCTAATATCGCCCAAAAAGGACCTGGAGAAAGGGGTTGTCTTAAGCGACCTCTGTAACTT +CTTAGTTAGTCAAACTATTCAAGGGTGGAAAGTTTATTGGGCTGGTATTGAGTTTGATGTGACTCACAAA +GGAATGGCCCTATTGCATAGACTGAAAACTAATGACTTTGCCCCTGCATGGTCAATGACAAGGAACCTAT +TTCCCCATTTATTTCAAAATCCGAATTCCACTATTGAATCACCGCTGTGGGCACTGAGAGTCATCCTTGC +AGCAGGGATACAGGACCAGTTAATTGACCAGTCTTTGATTGAACCCTTAGCAGGAGCCCTTGGTCTGATC +TCTGATTGGCTGCTAACAACCAACACTAACCATTTCAACATGCGAACACAACGTGTCAAGGAACAATTGA +GCCTAAAAATGCTGTCGTTGATTCGATCCAATATTCTCAAGTTTATTAACAAATTGGATGCTCTACATGT +CGTGAACTACAATGGATTATTGAGCAGTATTGAAATTGGAACTCAAAATCATACAATCATCATAACTCGA +ACTAACATGGGTTTTCTGGTGGAGCTCCAAGAACCCGACAAATCGGCAATGAACCGCAAGAAGCCTGGGC +CGGCGAAATTTTCCCTCCTTCATGAGTCCACACTGAAAGCATTTACACAAGGGTCCTCGACACGAATGCA +AAGTTTAATTCTTGAATTCAATAGCTCTCTTGCTATCTAACTAAGATGGAATACTTCATATTGGGCTAAC +TCATATATGCTGACTCAATAGTTAACTTGACATCTCTGCCTTCATAATCAGATATATAAGCATAATAAAT +AAATACTCATATTTCTTGATAATTTGTTTAACCACAGATAAATCCTCACTGTAAGCCAGCTTCCAAGTTG +ACACCCTTACAAAAACCAGGACTCAGAATCCCTCAAATAAGAGATTCCAAGACAACATCATAGAATTGCT +TTATTATATTAATAAGCATTTTATCACTAGAAATCCAATATACGAAATGGTTAATTGTAACTAAACCCGC +AGGTCATGTGTGTTAGGTTTCACAAATTATATATATTACTAACTCCATACTCGTAACTAACATTAGATAA +GTAGGTTAAGAAAAAAGCTTGAGGAAGATTAAGAAAAACTGCTTATTGGGTCTTTCCGTGTTTTAGATGA +AGCAGTTGACATTCTTCCTCTTGATATTAAATGGCTACACAACATACCCAATACCCAGACGCCAGGTTAT +CATCACCAATTGTATTGGACCAATGTGACCTTGTCACTAGAGCTTGCGGGTTGTATTCATCATACTCCCT +TAATCCGCAACTACGCAACTGTAAACTCCCGAAACATATATACCGTTTAAAATATGATGTAACTGTTACC +AAGTTCTTAAGTGATGTACCAGTGGCGACATTGCCCATAGATTTCATAGTCCCAATTCTTCTCAAGGCAC +TATCAGGCAATGGGTTCTGTCCTGTTGAGCCGCGGTGCCAACAGTTCTTAGATGAAATTATTAAGTACAC +AATGCAAGATGCTCTCTTCCTGAAATATTATCTCAAAAATGTGGGTGCTCAAGAAGACTGTGTTGATGAC +CACTTTCAAGAAAAAATCTTATCTTCAATTCAGGGCAATGAATTTTTACATCAAATGTTTTTCTGGTATG +ACCTGGCTATTTTAACTCGAAGGGGTAGATTAAATCGAGGAAACTCTAGATCAACGTGGTTTGTTCATGA +TGATTTAATAGACATCTTAGGCTATGGGGACTATGTTTTTTGGAAGATCCCAATTTCACTGTTACCACTG +AACACACAAGGAATCCCCCATGCTGCTATGGATTGGTATCAGACATCAGTATTCAAAGAAGCGGTTCAAG +GGCATACACACATTGTTTCTGTTTCTACTGCCGATGTCTTGATAATGTGCAAAGATTTAATTACATGTCG +ATTCAACACAACTCTAATCTCAAAAATAGCAGAGGTTGAGGACCCAGTTTGCTCTGATTATCCCAATTTT +AAGATTGTGTCTATGCTTTACCAGAGCGGAGATTACTTACTCTCCATATTAGGGTCTGATGGGTATAAAA +TCATTAAGTTTCTCGAACCATTGTGCTTGGCTAAAATTCAATTGTGCTCAAAGTACACCGAGAGGAAGGG +CCGATTCTTAACACAAATGCATTTAGCTGTAAATCACACCCTGGAAGAAATTACAGAAATACGTGCACTA +AAGCCTTCACAGGCTCACAAGATCCGTGAATTCCATAGAACATTGATAAGGCTGGAGATGACGCCACAAC +AACTTTGTGAGCTATTTTCCATACAAAAACACTGGGGGCATCCTGTGCTACATAGTGAAACAGCAATCCA +AAAAGTTAAAAAACATGCTACGGTGCTAAAAGCATTACGCCCTATCGTGATTTTCGAGACATATTGTGTT +TTTAAATATAGCATTGCAAAACATTATTTTGATAGTCAAGGATCTTGGTACAGTGTTACCTCAGATAGAA +ATCTAACACCAGGTCTTAATTCTTATATCAAAAGAAATCAATTCCCTCCGTTGCCAATGATTAAAGAACT +GCTATGGGAATTTTACCACCTTGACCATCCTCCACTTTTCTCAACCAAAATTATTAGTGACTTAAGTATT +TTTATAAAAGACAGAGCTACTGCAGTAGAAAGGACATGCTGGGATGCAGTATTCGAGCCTAATGTTCTGG +GATATAATCCACCTCACAAATTCAGTACCAAACGTGTACCGGAACAATTTTTAGAGCAAGAAAACTTTTC +TATTGAGAATGTTCTTTCCTACGCGCAAAAACTCGAGTATCTACTACCACAATATCGGAATTTTTCTTTC +TCATTGAAAGAGAAAGAGTTGAATGTAGGTAGAACTTTCGGAAAATTGCCTTATCCGACTCGCAATGTTC +AAACACTTTGTGAAGCTCTGTTAGCTGATGGTCTTGCTAAAGCATTTCCTAGCAATATGATGGTAGTTAC +GGAACGTGAACAAAAAGAAAGCTTATTGCATCAAGCATCATGGCACCACACAAGTGATGATTTCGGTGAG +CATGCCACAGTTAGAGGGAGTAGCTTTGTAACTGATTTAGAGAAATACAATCTTGCATTTAGGTATGAGT +TTACAGCACCTTTTATAGAATATTGCAACCGTTGCTATGGTGTTAAGAATGTTTTTAATTGGATGCATTA +TACAATCCCACAGTGTTATATGCATGTCAGTGATTATTATAATCCACCGCATAACCTCACACTGGAAAAT +CGAAACAACCCCCCTGAAGGGCCTAGTTCATACAGGGGTCATATGGGAGGGATTGAAGGACTGCAACAAA +AACTCTGGACAAGTATTTCATGTGCTCAAATTTCTTTAGTTGAAATTAAGACTGGTTTTAAGTTGCGCTC +AGCTGTGATGGGTGACAATCAGTGCATTACCGTTTTATCAGTCTTCCCCTTAGAGACTGATGCAGGCGAG +CAGGAACAGAGCGCCGAGGACAATGCAGCGAGGGTGGCCGCCAGCCTAGCAAAAGTTACAAGTGCCTGTG +GAATCTTTTTAAAACCTGATGAAACATTTGTACATTCAGGTTTTATCTATTTTGGAAAAAAACAATATTT +GAATGGGGTCCAATTGCCTCAGTCCCTTAAAACGGCTACAAGAATGGCACCATTGTCTGATGCAATTTTT +GATGATCTTCAAGGGACCCTGGCTAGTATAGGTACTGCTTTTGAGCGATCCATCTCTGAGACACGACATA +TCTTTCCTTGCAGAATAACCGCAGCTTTCCATACGTTCTTTTCGGTGAGAATCTTGCAATATCATCACCT +CGGATTTAATAAAGGTTTTGACCTTGGACAGTTAACACTCGGCAAACCTCTGGATTTCGGAACAATATCA +TTGGCACTAGCGGTACCGCAGGTGCTTGGAGGGTTATCCTTCTTGAATCCTGAGAAATGTTTCTACCGGA +ATCTAGGAGATCCAGTTACCTCAGGTTTATTCCAGTTAAAAACTTATCTCCGAATGATTGAGATGGATGA +TTTATTCTTACCTTTAATTGCGAAGAACCCTGGGAACTGCACTGCCATTGACTTTGTGCTAAATCCTAGC +GGATTAAATGTTCCTGGGTCGCAAGACTTAACTTCATTTCTGCGCCAGATTGTACGTAGGACTATCACCC +TAAGTGCGAAAAACAAACTTATTAATACCTTATTTCATGCATCAGCTGACTTCGAAGACGAAATGGTTTG +TAAGTGGCTCTTATCATCAACTCCTGTTATGAGTCGTTTCGCAGCCGATATATTTTCACGCACGCCGAGC +GGGAAGCGATTGCAAATTCTAGGATACTTGGAAGGAACACGCACATTATTAGCCTCTAAGATCATCAACA +ATAATACAGAGACGCCGGTTTTGGACAGACTGAGGAAGATAACATTGCAAAGGTGGAGTCTATGGTTTAG +TTATCTTGATCATTGTGATAATATCCTGGCGGAGGCTTTAACCCAAATAACTTGCACAGTTGATTTAGCA +CAGATCCTGAGGGAATATTCATGGGCACATATTTTAGAGGGGAGACCTCTTATTGGAGCCACACTCCCAT +GTATGATTGAGCAATTCAAAGTGGTTTGGCTGAAACCCTACGAACAATGTCCGCAGTGTTCAAATGCCAA +GCAACCTGGTGGGAAACCATTCGTGTCAGTAGCAGTCAAGAAACATATTGTTAGTGCATGGCCAAATGCA +TCCCGAATAAGCTGGACTATCGGGGATGGAATCCCATACATTGGATCAAGGACAGAAGATAAGATAGGGC +AACCTGCTATTAAACCAAAATGTCCTTCCGCAGCCTTAAGAGAGGCCATTGAATTGGCGTCCCGTTTAAC +ATGGGTAACTCAAGGCAGTTCGAACAGTGACTTGCTAATAAAACCATTTTTGGAAGCACGAGTAAATTTA +AGTGTTCAAGAAATACTTCAAATGACCCCTTCACATTACTCGGGAAATATTGTTCATAGGTACAACGATC +AATACAGTCCTCATTCTTTCATGGCCAATCGTATGAGTAACTCAGCAACGCGATTGATTGTTTCTACAAA +CACTTTAGGTGAGTTTTCAGGAGGTGGCCAATCGGCACGCGACAGCAATATTATTTTCCAGAATGTTATA +AATTATGCAGTTGCACTGTTCGATATTAAATTTAGAAACACTGAGGCTACAGATATCCAGTATAATCGTG +CTCACCTTCATCTAACTAAGTGTTGCACCCGGGAGGTACCAGCTCAGTACTTAACATACACATCTACATT +GGATTTAGATTTAACAAGATACCGAGAAAATGAATTGATTTATGACAATAATCCTCTAAAAGGAGGACTC +AATTGCAATATCTCATTTGATAACCCATTTTTCCAAGGCAAACAGCTGAACATTATAGAAGATGACCTTA +TTCGACTGCCTCACTTATCTGGATGGGAGCTAGCTAAGACCATCATGCAATCAATTATTTCAGATAGCAA +TAATTCGTCTACAGACCCAATTAGCAGTGGAGAAACAAGATCATTCACTACCCATTTCTTAACTTATCCC +AAGATAGGACTTCTGTACAGTTTTGGGGCCTTTGTAAGTTATTATCTTGGCAATACAATTCTTCGGACTA +AGAAATTAACACTTGACAATTTTTTATATTACTTAACTACCCAAATTCATAATCTACCACATCGCTCATT +GCGAATACTTAAGCCAACATTCAAACATGCAAGCGTTATGTCACGATTAATGAGTATTGATCCCCATTTT +TCTATTTACATAGGCGGTGCTGCAGGTGACAGAGGACTCTCAGATGCGGCCAGGTTATTTTTGAGAACGT +CCATTTCATCTTTTCTTACATTTGTAAAGGAATGGATAATTAATCGCGGAACAATTGTCCCTTTATGGAT +AGTATATCCATTAGAGGGTCAAAATCCAACACCTGTTAATAATTTCCTCCATCAGATCGTAGAACTGCTG +GTGCATGATTCATCAAGACACCAGGCTTTTAAAACTACCATAAATGATCATGTACATCCTCACGACAATC +TTGTTTACACATGTAAGAGTACAGCCAGCAATTTCTTCCATGCGTCATTGGCGTACTGGAGGAGCAGGCA +CAGAAACAGCAACCGAAAAGACTTGACAAGAAACTCTTCAACTGGATCAAGCACAAACAACAGTGATGGT +CATATTAAGAGAAGTCAAGAACAAACCACCAGAGATCCACATGATGGCACTGAACGGAGTCTAGTCCTGC +AAATGAGCCATGAAATAAAAAGAACGACAATTCCACAAGAGAACACGCACCAGGGTCCGTCGTTCCAGTC +ATTTCTAAGTGACTCTGCTTGCGGTACAGCAAACCCAAAACTAAATTTCGATAGATCGAGACACAATGTG +AAATCTCAGGATCATAACTCAGCATCCAAGAGGGAAGGTCATCAAATAATCTCACATCGTCTAGTCCTAC +CTTTCTTTACATTATCTCAAGGGACACGCCAATTAACGTCATCCAATGAGTCACAAACCCAAGATGAGAT +ATCAAAGTACTTACGGCAATTGAGATCCGTCATTGATACCACAGTTTATTGTAGGTTTACCGGTATAGTC +TCGTCCATGCATTACAAACTTGATGAGGTCCTTTGGGAAATAGAGAATTTTAAGTCGGCTGTGACGCTGG +CAGAGGGAGAAGGTGCTGGTGCCTTACTATTGATTCAGAAATACCAAGTTAAGACCTTATTTTTCAACAC +GCTAGCTACTGAGTCCAGTATAGAGTCAGAAATAGTATCAGGAATGACTACTCCTAGGATGCTTCTACCT +GTTATGTCAAAATTCCATAATGACCAAATTGAGATTATTCTTAACAACTCAGCAAGCCAAATAACAGACA +TAACAAATCCTACTTGGTTTAAAGACCAAAGAGCAAGGCTACCTAGGCAAGTCGAGGTTATAACCATGGA +TGCAGAGACGACAGAGAATATAAACAGATCGAAATTGTACGAAGCTGTACATAAATTGATCTTACACCAT +GTTGATCCCAGCGTATTGAAAGCAGTGGTCCTTAAAGTCTTTCTAAGTGATACCGAGGGTATGTTATGGC +TAAATGATAATCTAGCCCCGTTTTTTGCCACTGGGTATTTAATTAAGCCAATAACGTCAAGTGCCAGGTC +TAGTGAGTGGTATCTTTGTCTGACGAACTTCTTATCAACTACACGTAAGATGCCACACCAAAACCATCTC +AGTTGTAAGCAGGTAATACTTACGGCATTGCAACTGCAAATTCAACGGAGCCCATACTGGCTAAGTCATT +TAACTCAGTATGCTGACTGCGATTTACATTTAAGCTATATCCGCCTTGGTTTTCCATCATTAGAGAAAGT +ACTATACCACAGGTATAACCTTGTCGATTCAAAAAGAGGTCCACTAGTCTCTGTCACTCAGCACTTAGCA +CATCTTAGGGCAGAGATTCGAGAATTGACCAATGATTATAATCAACAGCGACAAAGTCGGACTCAAACAT +ATCACTTTATTCGTACTGCAAAAGGACGAATCACAAAACTAGTCAATGATTATTTAAAATTCTTTCTTAT +TGTACAAGCATTAAAACATAATGGGACATGGCAAGCTGAGTTTAAGAAATTACCAGAGTTGATTAGTGTG +TGCAATAGGTTCTATCATATTAGAGATTGTAATTGTGAAGAACGTTTCTTAGTTCAAACCTTATATTTAC +ATAGAATGCAGGATTCTGAAGTTAAGCTTATCGAAAGGCTGACAGGGCTTCTGAGTTTATTTCCAGATGG +TCTCTACAGGTTCGATTGAATAACCGTGCATAGTATTTTGATACTTGTAAAGGTTGGTTATCAACATACA +GATTATAAAAAACTCATAAATTGCTCTCATACATCATCTTGATCTGATTTCAATAAATAACTATTTAGAT +AACGAAAGGAGTCCTTACATTATACACTATATTTGGCCTCTCTCCCTGCGTGATAATCAAAAAATTCACA +ATACAGCATGTGTGACATATTACTGCTGCAATGAGTCTAACGCAACATAATAAACTCCGCACTCTTTATA +ATTAAGCTTTAACGATAGGTCTGGGCTCATATTGTTATTGATATAGTAATGTTGTATCAATATCTTGCCA +GATGGAATAGTGCTTTGGTTGATAACACGACTTCTTAAAACAAAACTGATCTTTAAGATTAAGTTTTTTA +TAATTGTCATTGCTTTAATTTGTCGATTTAAAAATGGTGATAGCCTTAATCTTTGTGTAAAATAAGAGAT +TAGGTGTAATAACTTTAACATTTTTGTCTAGTAAGCTACTATTCCATTCAGAATGATAAAATTAAAAGAA +AAGACATGACTGTAAAATCAGAAATACCTTCTTTACAATATAGCAGACTAGATAATAATCTTCGTGTTAA +TGATAATTAAGGCATTGACCACGCTCATCAGAAGGCTCACTAGAATAAACGTTGCAAAAAGGATCCCTGG +AAAAATGGTCGCACACAAAAATTTAAAAATAAATCTATTTCTTCTTTTTTGTGTGTCCA + diff --git a/test/unit/test_assembly.py b/test/unit/test_assembly.py index a32e0af5e..71be96d51 100644 --- a/test/unit/test_assembly.py +++ b/test/unit/test_assembly.py @@ -401,6 +401,23 @@ def get_seqs(fasta): return [str(s.seq) for s in Bio.SeqIO.parse(fasta, 'fasta')] self.assertEqual(get_seqs(outFasta), get_seqs(expected)) + def test_ambig_align_ebov(self): + inDir = util.file.get_test_input_path(self) + contigs_gz = os.path.join(inDir, 'contigs.ebov.ambig.fasta.gz') + contigs = util.file.mkstempfname('.fasta') + with util.file.open_or_gzopen(contigs_gz, 'rb') as f_in: + with open(contigs, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + expected = os.path.join(inDir, 'expected.ebov.ambig.fasta') + outFasta = util.file.mkstempfname('.fasta') + assembly.order_and_orient( + contigs, + os.path.join(inDir, 'ref.ebov.makona_C15.fasta'), + outFasta) + def get_seqs(fasta): + return [str(s.seq) for s in Bio.SeqIO.parse(fasta, 'fasta')] + self.assertEqual(get_seqs(outFasta), get_seqs(expected)) + def test_obscure_mummer3_bug(self): inDir = util.file.get_test_input_path(self) outFasta = util.file.mkstempfname('.fasta') diff --git a/tools/mummer.py b/tools/mummer.py index 4129f4bdc..3ac678f92 100644 --- a/tools/mummer.py +++ b/tools/mummer.py @@ -209,7 +209,8 @@ def scaffold_contigs_custom(self, refFasta, contigsFasta, outFasta, aligner='nucmer', extend=None, breaklen=None, maxgap=None, minmatch=None, mincluster=None, min_contig_coverage_diff=0.0, - min_pct_id=0.6, min_pct_contig_aligned=None, min_contig_len=200): + min_pct_id=0.6, min_pct_contig_aligned=None, min_contig_len=200, + ambig_max_aligns=2, ambig_max_lens=1, ambig_max_frac=.01): ''' Re-implement a less buggy version of MUMmer's pseudomolecule feature to scaffold contigs onto a reference genome. ''' @@ -282,17 +283,35 @@ def scaffold_contigs_custom(self, refFasta, contigsFasta, outFasta, # (# assembled segments) continue + def n_diff_vals(*vals): return len(set(vals)) + def n_diff_lens(seqs): return n_diff_vals(*map(len, seqs)) + def frac_unambig(seqs): + """Given a list of seqs of the same length, return the fraction of positions on which they all agree""" + util.misc.chk(n_diff_lens(alt_seqs_f) == 1, 'ambig_max_lens>1 not currently supported') + n_tot = len(seqs[0]) + n_unambig = list(map(n_diff_vals, *seqs)).count(1) + return float(n_unambig) / float(n_tot or 1.0) + # construct scaffolded sequence for this chromosome seq = [] for _, left, right, n_features, features in fs.get_intervals(c): # get all proposed sequences for this specific region alt_seqs = [] - for f in features: - try: - alt_seqs.append(alnReaders[(c, f[-1][0])].retrieve_alt_by_ref(left, right, aln_start=f[1], aln_stop=f[2])) - except AmbiguousAlignmentException: - log.warn("dropping ambiguous alignment to ref seq {} at [{},{}]".format(c, f[1], f[2])) - pass + for consider_ambig_aligns in (False, True): + for f in features: + alt_seqs_f = alnReaders[(c, f[-1][0])].retrieve_alts_by_ref(left, right, aln_start=f[1], aln_stop=f[2]) + if len(alt_seqs_f) == 1: + alt_seqs.append(alt_seqs_f[0]) + elif consider_ambig_aligns: + if len(alt_seqs_f) <= ambig_max_aligns and n_diff_lens(alt_seqs_f) <= ambig_max_lens and \ + frac_unambig(alt_seqs_f) > (1.0 - ambig_max_frac): + alt_seqs.append(alt_seqs_f[0]) + log.info("using ambiguous alignment to ref seq {} at [{},{}]".format(c, f[1], f[2])) + else: + log.warning("dropping ambiguous alignment to ref seq {} at [{},{}]".format(c, f[1], f[2])) + if alt_seqs: + # if have a non-unambiguous alignment, don't consider ambiguous ones + break # pick the "right" one and glue together into a chromosome ranked_unique_seqs = contig_chooser(alt_seqs, right-left+1, "%s:%d-%d" % (c, left, right)) @@ -541,8 +560,8 @@ def get_ref_seq(self, start, stop): ''' return str(self.reference_seq.seq[start-1:stop]) - def retrieve_alt_by_ref(self, start, stop, aln_start=None, aln_stop=None): - ''' Retrieve a sub-sequence from the alternate (2nd) sequence in the + def retrieve_alts_by_ref(self, start, stop, aln_start=None, aln_stop=None): + ''' Retrieve sub-sequence(s) from the alternate (2nd) sequence in the alignment using coordinates relative to the reference sequence. No gaps will be emitted. Required: start-stop interval must be wholly contained within @@ -550,49 +569,52 @@ def retrieve_alt_by_ref(self, start, stop, aln_start=None, aln_stop=None): ''' # grab the one alignment that contains this window - aln = list(a for a in self.alignments if a[1]<=start and a[2]>=stop) + alns = list(a for a in self.alignments if a[1]<=start and a[2]>=stop) if aln_start is not None and aln_stop is not None: # if specified, restrict to a specific alignment that comes from show-tiling # (sometimes show-aligns is more promiscuous than show-tiling) - new_aln = [] - for a in aln: + new_alns = [] + for a in alns: if a[1] > aln_start or a[2] < aln_stop: log.debug("dropping undesired alignment: %s(%s):%s-%s to %s(%s):%s-%s (%s:%s-%s requested)", self.seq_ids[0], a[0], a[1], a[2], self.seq_ids[1], a[3], a[4], a[5], self.seq_ids[0], aln_start, aln_stop) else: - new_aln.append(a) - aln = new_aln - if len(aln) != 1: - log.error("invalid %s:%d-%d -> %s specified, %d alignments found that contain it", - self.seq_ids[0], start, stop, self.seq_ids[1], len(aln)) - for x in aln: - log.debug("alignment: %s", str(x[:6])) - raise AmbiguousAlignmentException() - aln = aln[0] + new_alns.append(a) + alns = new_alns + if len(alns) != 1: + log.warning("invalid %s:%d-%d -> %s specified, %d alignments found that contain it", + self.seq_ids[0], start, stop, self.seq_ids[1], len(alns)) + for aln in alns: + log.debug("alignment: %s", str(aln[:6])) + + return [self._aln_to_alt_seq(aln, start, stop) for aln in alns] + + def _aln_to_alt_seq(self, aln, start, stop): + """Given an alignment of a contig to ref, return the contig sequence aligned to a given stretch of ref""" ref_l, ref_r, ref_seq, alt_seq = (aln[1], aln[2], aln[-2], aln[-1]) # convert desired start/stop relative to this reference window # such that 0 <= start <= stop <= ref_r-ref_l+1 - start = start - ref_l - stop = stop - ref_l + aln_start = start - ref_l + aln_stop = stop - ref_l # travel down alignment until we've reached the left edge # (because of gaps, you must check each position one by one) - # end loop when ref_seq[:i_left] contains {start} bases + # end loop when ref_seq[:i_left] contains {aln_start} bases n_ref_bases = 0 i_left = 0 - while n_ref_bases < start: + while n_ref_bases < aln_start: if ref_seq[i_left] != '-': n_ref_bases += 1 i_left += 1 # travel down alignment until we've reached the right edge # (because of gaps, you must check each position one by one) - # end loop when ref_seq[:i_right] contains {stop} bases + # end loop when ref_seq[:i_right] contains {aln_stop} bases i_right = i_left - while n_ref_bases < stop: + while n_ref_bases < aln_stop: if ref_seq[i_right] != '-': n_ref_bases += 1 i_right += 1 @@ -601,6 +623,6 @@ def retrieve_alt_by_ref(self, start, stop, aln_start=None, aln_stop=None): i_right += 1 # grab the alternate sequence and strip gaps - alt_seq = alt_seq[i_left:i_right+1].replace('-','') - return alt_seq + return alt_seq[i_left:i_right+1].replace('-','') +