## Nucloetide Skew

In [13]:
def skew(genome:String):List[Int] = {
    genome.foldLeft(List(0)){ case(m, x) => 
        val ret = if (x == 'G') m :+ (m.last+1) 
            else if (x == 'C') m :+ (m.last-1) 
            else m
        ret
    }
}

defined [32mfunction[39m [36mskew[39m

In [17]:
skew("GAGCCACCGCGATA")

[36mres16[39m: [32mList[39m[[32mInt[39m] = [33mList[39m([32m0[39m, [32m1[39m, [32m2[39m, [32m1[39m, [32m0[39m, [32m-1[39m, [32m-2[39m, [32m-1[39m, [32m-2[39m, [32m-1[39m)

In [18]:
val input = "TAAAGACTGCCGAGAGGCCAACACGAGTGCTAGAACGAGGGGCGTAAACGCGGGTCCGAT"
val s = skew(input)
val m = s.min
s.zip(0 to input.length-1).filter{ case(s, i) => s == m }

[36minput[39m: [32mString[39m = [32m"TAAAGACTGCCGAGAGGCCAACACGAGTGCTAGAACGAGGGGCGTAAACGCGGGTCCGAT"[39m
[36ms[39m: [32mList[39m[[32mInt[39m] = [33mList[39m(
  [32m0[39m,
  [32m1[39m,
  [32m0[39m,
  [32m1[39m,
  [32m0[39m,
  [32m-1[39m,
  [32m0[39m,
  [32m1[39m,
  [32m2[39m,
  [32m3[39m,
  [32m2[39m,
[33m...[39m
[36mm[39m: [32mInt[39m = [32m-1[39m
[36mres17_3[39m: [32mList[39m[([32mInt[39m, [32mInt[39m)] = [33mList[39m(([32m-1[39m, [32m5[39m), ([32m-1[39m, [32m13[39m))

## Hamming distance

In [38]:
def hamming(s1: String, s2: String): Int = s1.zip(s2).count(c => c._1 != c._2)

defined [32mfunction[39m [36mhamming[39m

In [39]:
hamming("GGGCCGTTGGT", "GGACCGTTGAC")

[36mres38[39m: [32mInt[39m = [32m3[39m

## Approx pattern matching

In [45]:
def approxMatch(pattern:String, input:String, k:Int=3):List[Int] = {
    input.sliding(pattern.length).zipWithIndex.
        filter{ case(kmer, i) => hamming(kmer, pattern) <= k }.
        map{ _._2 }.toList
}

defined [32mfunction[39m [36mapproxMatch[39m

In [46]:
hamming("CGCCCGAA", "ATTCTGGA")

[36mres45[39m: [32mInt[39m = [32m5[39m

In [47]:
approxMatch("ATTCTGGA", 
    "CGCCCGAATCCAGAACGCATTCCCATATTTCGGGACCACTGGCCTCCACGGTACGGACGTCAATCAAAT",
    3)

[36mres46[39m: [32mList[39m[[32mInt[39m] = [33mList[39m([32m6[39m, [32m7[39m, [32m26[39m, [32m27[39m)

In [50]:
val seq = "AACTTGCGCTG"
val input = "GCAGGTAGAACAACGCCAATGCTGGCCACAAACTTCTATCAAAACAAAAGACTGGTAGAAGCCTAGCGGAGCTTCCTCTCTACAACGAGCCTGCCTACCTTTACAATGAACAGGGGTACTCGAGCGGGAGCTTCGCAAAGTTCCATCAAACATATTTCTCTGTGCACGGTACGCACGCTCAATGGTTGTTGAAGTTTGGCCATACCAAGTCCATTTCCAACGAGCACTATCCGTGGTGGATACAGCTGGTGGTCGGCTTGTAAGACCGGAGGTAGAAAAGGCCTTCCACTAGCAGAATTGAGTAAACGATGGGAATATCTATAGTGCTTTGGAAGCAGGGGTGACAAGAGTGTCCTTACATCGTAATGGTGGCGCCCTCCGACGGTAGTGATCAATGGCATCCCCGCTCTATCCGCGACTGGGCAGGCCCAGGTGTTTAGAGTTACTGGGTTTACTAGCGCTAACGTTACCACGTCGGACACAATATTGTGACGGACCCGGCCAGCACTAACTTAAATGAACGGGTGCCCATGATTATCTTTACAAGGCTGTACCCTCACTATGGAACGACGGGACTCACACCAAATTACACACCTACGATCCTCCTAGACGGGGTACCAGAGCTTCATGCTTCGTCTCCTGACACACTCCTGCATTGTGCATAGCCACAGTTCTCGCCTCTTAAAACTTTACGATGTAGCATCGCCCCGGCGGGGGAGATAGCGAATCTGAACGGCGTCTTGACTTTGTAGCCGTCACTGAGGAGTCTAATGCCCTTTAAGAGGACCGCTAGGGGAACCTACTTGAGTCCCGAAAGCGCGGCAAAAAATCTGACTGATTTGCGCCAGCCCGCAGTAGTGGTTAAACTGAATCCATATCGGCATTGGCGGGGGCCGAAAACAATTATTCCCAGTAAACGGGTACAAACCTACGTGTCGGGCGCTTGTCTGGCTGCCTTTGACAGTATAGGTAGTTCTTGCCCATACAGAGTCTGAGAAGAGTTCTATGAAGCCTTTGGGCCAAAAAGTATTTAACAACGGAGCACTACCCGGATCTAGATCTAGAGTCCGAGCACTTCTGATACTGCCTGTAGAGTTTCTGAGGAAGCCGCTAGGGAGGTGGAGAGTCAAACGTAGGGTACAAGTCATCACCCTCTTAGCCTCACACAGTTGACGGGGGGTTCATAGTGCTTCTGTAGCTTTGGTGGACGGCTTTATCCGCGTGACGTGGTTTGAAATCACGATTAAGTCGCGTTATATAATCTTCACGAAGGAAGAGTGGGGAGTACGGCTGGACTTCCAAAACGTGTTCGCAGACGATTGTCTCCGGGCACTCGCTTTTCATGGGACCAAGGGCCATGCTTGGCCACCAGTATATCAAAGTGTTCCCACACGGTCTTTGATTCTAACACTACGCGAAGGGTCAAATGCGATCACTTCTATACGGCCCACTGTGGACCACGTCACCGCTACATTTCAACGGAGCGTTTAAACCAGAATCGTATACTCGGAAAACTGATAAGAATTCTAGGCACCTCCATCCCCCTGGGAGGGGTTGCACTTGATGAATGCATCTCGTGTCGCACGTGAATACATGGAGTGAAACTGCCTGTCAGGACTGAGGTGGTGAAGGGTACCTACACCTGCAGAAGAACGCATAAGAGTGTCGATAGTCCGACGGATTACTTCTCGCTCCAACGGGGTCACAAGTCTCTTATGTACGTAGCTCCAAGACCGAGGATTGTAATAGTCGTCTGCCCCCAGGGCGAGGAGAAAAAACGCTATACGAACTACAATGTTCTACTAGCTAAGAACGGATTTATATGTAGCTGTCATGGGTCTCCTTCTATGCATTCTATACCATGGTAAAAGGGATCGTCTTTTTCGTGTTGTAACGCAGTATTTTATATTCGCATCTCATGACTTTGGAGAATTCCACCATAAGATCGGTTGCTACGCGGTGTAAAGCGTCTCTGTCGCGATAGCTGCTGGCTTAGGAGAAAGGAGTCCGTGTGGAGGACCCACCTCAGCGTGGTTTATCGCGGTAAGCGTTGACTGGCAAAGCGGAACGTACGACCCGCACGCGACTCCCGAATCTATAACATCGTGGAAAAATGACGCAACGGCTCGCAACTTGGTTCGAGCTCACGTGAACGGATAAGAGAGTAAATTTGGCCCCGCCCTCGCTTTGGGCAGGTTTTTCACTATTAAGACTCCGGTAATGCCGTGTATCCGCTCGCAGCTCTCACAGGACTGGCTTTCCACTAGCGTTGTCTAAGGGAAGCACTATCCCATCTGAACATCGAAACGAAATTCATTTATTCATCTCCCGGACGTTAGTTCCCTGCGGCGTCGTGCCTACGGTACTACCATACCCTTGAGAGGCTCTACCAGCTTCGAGTATCGTACCCCCCCGGGCTGGACTCCAGCAGTTGGCCAACGTCAGGTACTGGGTAGATAGTTTGGGTAGTTGCAAAGGTAGACCAACGGCGGCTTAGTATAGGTGCGGTAATGGGAGACCCCGGTTCTGCATTCAAATTTCGTTGCATGATACGCCAAATCAATTACTCCCGAAATGTCCATCAGGCGCATGCAGAAAGGTGACGCTTATCCAGCTAGATGCGTAGATAACCGGCAGGACATAACATCCGCCAGAATCTTGCCTTAGACGGAGCGACCGGAATAACCGCCTCTAGCTAGAGGTAGGCGAACCCCGTACGAAATCGCGGCGCATTGGTCATCGGCGTCTCCCCCGTGATCTCTGACGCCAGCATTTTATACTCGGGCAAGTATCGTATTTCAGATCGATGTAGACTCAGCGTACTTGCTGGCGACGGGGCTTCAGATTTGTTGATAGGTGGGCGCTTCCCGAGCCCAATAGCGTAACTGACCTCGGTATAGTCAATTGCCCCGTAGCGTCTTCCATGGTGCTGTGTCCCACGAGTTACATCCTTGAGCCCGAATCGTGGGTACAACTACGCTCTAACGAAAACGGCCGTGGATCCTAAGACCCTCTGGTGGAGGCAACACCATCTAACGCGGCCAGTGGCATGTAGCAGATTTTCCTAACGTATTACATTTGGGATACGAGCCATAATAGATGGGTTGTACTCGAGGAGTACCGGGTGCGTTCAGATTTTCATATTGTTGTAGTAGTATCCCGGGAGGCCATAATAATACTCTTCAACAAATATAGCGGCTAGAGCCGAATTTCTCGACTACTATACTGGGTACCACAGTTGCTCGGTTGCACTATTTGACGAGAACGCCTTCCCGGGCCGGCGATTGACAAAAAACCGGATGTTCCGGGGCCTATCTAGTCTGTACACTGTGCATTTCTAGTCGGAATCTGATCCGAACACCGTGACTGAACTGGTACCACGCTGCGTGCCTATCCTCTTCCCATTAGAAAAAGTCAGCATTTGGGGTCCTTCTCTACTCAGAGAGGGAGTTTATACGTTCGCTGGATCTAGAGTGTGGAGAGAGAACTGTCTTTGCCGTCGGAGATCGATATGTCCCTCTGTCCGTTACCGTCATAACCGGCTCGTCTTGTGCCTGTTGAACTGTCGGGACACATCTCCACTGTGGCTGTCGCGGAGCTAAAACGGAAGCACTACAGACCCACTATCCCCAACCAGGGTATTTCTCAAAATAGGTACAGACGCCGTCTAACTCACCAGTTTCTATCGCACATCAGATGTAACTAGTTGCGCACTATAAGGAGCCCTTCCTTTGGGCTAAAACCTCACTGCAGTATTGTATCTGGCCGAGATCCTTGAACTTATCAGCCATATCAAGGCAAAATGTCGTCTCGCCATGAACTTAGAACGGGTAAGGACTGAGGGCCGACTACCATACGCGTCTATTTGTCTCTTGTATGCAGCTAGCGCCCCATCAAGAGCAAGCCGTATTCTACCTGAAGGTGTGTAGAATCAAAAATACGATTCACAACGTTGGATCACTGAAACGTGATCTGACCGCAGATCGAGAACTTTCCCCGGACCAACATACGGACGCGAAGCATTCTGGCGCAAGTTTGCATTCATTGGCCGACTAGTAACTGGGTAGGATACACGATGTAACTCGGAGTGGCGGCTCGAAACAACTGATGCTCAGGCTGCAAGGATCGTAGTGCAGACGAACAGTAGGTGATTTCGAATCAACCTACTACTCGTATCTCCTACTTGCAGTACAATAGTGGAAAGGATTCAGCCCTGGGCTGTATCGTGCATAGAGTTTTGACACAGTAATACATTTTGCAGCTGCGCGAGCGGTACCCCCCGGAGTTCCCGCTGTCGGAACTCCCACAACAAATCCGTACGGTTGTTGGGCCCCTAGGCCTTGCCTTATGTTGTGCGAACGGGCTTTTGCACCGCCTACGGACTCCCCGCGTTCCAAATACCGTTGATACTTTGGGCTAGGTAGGGAAAGTGTTCAGGACTGCACATTGATGGTCCGCGATAACAACGAGCTGTTAGATACTTCTCGTCTATAACCTACAATCTTTGCCACGTCCAATCGCAGCCCGACGAAGTTAGGCGCCAGCACCACCAACGAACCCAGATGTGACGAGAACGCTTATTATCGAAATCCAGAGATTCTGCTGCAAGAGGTACAGGACTGGACCCCCGACGGTGCACATACTTCTACAGAACTCTCTCTGATGCCGGCGAAAGTTGACCGCAAGTTAGCTCGGTATCTCATGAGAGTTACATAATCTTAGGCCCGATTAGACGTTTGGGCCCTTAGTAGCACCGCGGCCCGATGCAAGCGGGCAAACGACGTGCGGGTACGAGTGTTCACGCACGGTTTCATTAAGCCCTTGAGGCATGTACTGCGTTCGAAACAAAGAAATTACATAAGATCACTGACCCCGTTCCGGATAGACTAGATAGTACAAGAGGATTTGTACTAGTCTGAATGGTACGACAGCCGTAGTCTGGTACGCTGTAGCTGGCACGCCGCAACTGGGCCCTTTTCCTACAATGCCCGGTGTAAACTATTCTACGGTCGCGAGCTACTGTCGGTTCGTATCCACATTCGACAGCATAGACCGTCAAGGGTAAGATCCATAAGTAGGTGTCTCTAGTATCGATAGTTGTACTACGTGCCACCGTACGGTTGCGTAGTTCTTATCTCCTAGACTTCTCGATAATATCCACTGGGAGACTTGATAACCCTAACGGGGTATGGGATGGGGGGTTGGTTATTTGCGACAGTATGGATTGGTCCTCTAAATTGCCTCCTGGCTCACGCATGAGTCAATTTGGTTGTGGATACAATTACTGTCATCCGGTTCTACGGCTATGTTGAAGTAGTTTACGATCATTACGATCTATTAGTAACCTAACGCCGTTCTCCGGGATACAGACCCGGTCGGAACATATGCGCCGACACAGCTACCCACTAAGCGGGCCATTTTCTGAGCGTCCCTCGTTTAAGCAAAGAGATTACATTCTGTCAGGGGCAAGTTGATAACGCCATTCTTACGAAAAATGTTTCATAAACACGCGGATGTATACTGGGCCGCTAAGGCACGGGCGCTCCGCCAACTGATCGCCAGTGCCAATCTATCGATGACTCCTATTACCAAAAGGTACGAATAGGAACCTTCTCCCCACGCATGGAGGTAAAGCGTGCACGTTTACACAGAACTGGTGTGTAATTGTGAAGCATTTATAAGAATAGTTTCTGACCATCTCCGTTATCGCGAGGTCCACGTCAGGTCTCGATGCAGGACAGGTAATGGGACCGAGGCCAACGACTGCGCTCACTACGAATCGTAAGGTAGCTGATGACCCGTAAGCATTGGCTCTACATCGTCCCCTGCATTAAATCTTGTAGTGGTAGGTAATTAGGTAAATCGTGCCTAGCGGAGCGTACACATTGACCACCTGCCGATTGCTAATTTGTGGGCGGACTACGTCGATCCAGCTTTCTATCGTTTAGTTTTGCGGGACCTTTGTAGCACCTTTACACTATGACCTAGGTTCTGTATGGTTTGCGGCCGTGTGCCACTGCTAACCCGGACACGAAGAACCCCTTTTACGTTCTCTGGCGTATCTACTCTCTGTATGTACACGCCCTGATGAAAATGATCGAATCTAAATTGGGAGATCATCGGCCGTTCGCCACGAAATATGCAGTCTTCTAACGACCTGAGGTCGCGCGCGCCAAGATCGGTCTTGGCAGTCGCTCAAATTCGAGGGTTCAATTTCGTCATTGGGGTCGTTTCGAGGCAGTGCGCAGAATCGGAGCCAACTTCTCGATGTGTGGTAAGGAATCTAGATATCCATCCTTAAATGAAGTACCAAAGCCGTACAAACTATACCGCAGTTCGGTTGTGCGCGAAAAATATTACTCTCCACTAGGCCTGTACCCGGGCGGCACGGACCAGCTTCTCCGTCCGTCCAACACTTACTTAATCTCTGGCCGGAATTTAGCAATCGACGAGAGTGCTCGTCCTGAACTCGTTTAGTTCGCCGGTTAATAAATGTCCTGACCTCCATCGGCTTTACCGCTATGCACTGACCTGTAGATTCGGTCTGACAATACTGGTAGGCGCAAAGTTGTCGAACACCAATTAACGCCGGCGGACTTCGAAATGGGCCAGTGGAGGTGAACAGGAGATATTCCTTTTCGCTATGTGGGTCCCGCATATAAACACCCCACTGGGCAAAGGATTGTGGGAAGTGTAGCGACAAAACCCCGATACATCCGGAGTGAATTTATACCTTCATTGGCATTAGTGCGAACAGCAAGGAGCCAGCCTATCTTGGGTAGTTTCATGCCGGAGATCTAGCGCAGACCGAGCATAACAATAGGTTCAGCCTCTACTCAGTAAGCCGTACTGTCTGTTCCGCAAAGCGTGTTAGCTCGTGTCCCGTAGGATATTTTCCGTATTAGGTGAGAAAGAGCCCGACAAGGGTACTAAAACGACTTCGAATCCCCCCCACCCAATGCCATGGCTATTTCGGAACAGCAGGTAAACGGGTAGACGTGGCACCTAGCACATGCGCTGTGTCGTCGGGATTAATTGGTGTGGGCACGGATTCTTGCGTTGTATGGACGTTCTAGATAAATGAGGCGCAAGAGAGGCCTGCGACCAGCCCAAGCCTGGGGACCTTCTCTGCTGCCAGAGCGGATGGTGCGCACATTCACCTTCGCGTGCATACAGGGCCGTTTTCACAATGGGCTTGACGAGCGAACACACGCCAGCATTGAATGTCCTTAGTTTGAAGTAGACTAGCTACAGGCTCTTCAGCGCCCGCAGACCGATTAGGACATGTCTTGTTGAAAATCAGGATAGTGAAGCATCCCGCTTAGGAAATCAATAGTCTTTTCAACTTAGAGTGAATTTGGAAACGTACACTAACTGCTCACTTATCGGACTAGTTGCGCACGTTGAATAACCCCGTTGATTACCATAGCTCGCAGCAGTTTGGGCACCAAATACTAGACAACGGATTGACACCTTCATTGCCCAGAAATCTGGCACATGGAAATTGACCATTGTAGAAGTGTCGTATGTCGCTATATCACCCTACATAAGTGTCTCAGATTTTCCAGGGGTCGTCCTTCTAGGCAAATGCTAAAGTTGGTACAACCTGGAAGAAAGCCTGGTCCCTCGATCTGGCCGACTCAGATGCATTGATGTATCATCTCGGGCCATCTCTAATAAACGTGTATTCAAAGCGTTTGACTATCGCCACTCAATATGTCGCACCTACCACTTCGTAAGCTTCGCCCGGTGAAGCTAGAGTTAATTGCAGGTTTTGGATGTGAAATCCGGCGCCGGCGTGGGGGGTTCATGCCGCCCCTTTCAACGCTATCTACTGGTGTGTCCCGTGTGTCCCAGTATGTCCTGCCGTTGGGATCTTGAAATGTCATAGAACCCTCAATAGCTGCCCTCTGGAATGTTGGACGTAGACGGATGGAACTGACCAATTCTAAACGTCAGGTAGAATTTTACTTTCCTCTAAGTGGCAAAGACGTTGCAGATTGTATCGCCATACGCTCGAGTTGGTTAGACAGTACACGGGAACTTTCGCGCATTGTGCCCCATGTACGTGGCTAGGGCCGGCAAAAGATACTGCGCGGAGCCGGGCCTTTATTACTACTGGCAGCAGTAAAACCAACGTTCGTTACTGGCAACCAACATTACCCCGAGGCCACGATGCTCTGGCCAGCTGGGATTGCTGTCAGTGTACCGGGCATGGTCTTTACCCATAAAGACAACCTCTGACCTAGTTTGGCTATTGGGTGGTCAGAGGAAACGCGCAATAGGTGGGATTCGATATCTTAGTAACTAATACTGGCTAGACCATATTCCTATTCCGAGTTAGAGACTCTGGTTTATACACCCCACGCCTCTGTCGAACTGTACTACTACAATCTAAGATTGATCGATTTAACTTGGAGGGGGATAACCAAATGTAGGTACTAAGAGTTCACCGCCCTGGCTAGTATCTAAGTTTGTGACGGACGGTTTACCTGCCGTTTACGTTGATGTCGCAACGGATTACGTGTCGTGTCACCTCGCGATGATATATACTACATTCACGTTTTACTACGACCCGACCCTTTCTATCCTCTTCACTGACTATAATGCCTGGGCGGGCTACGCTACAACTATCAGTGTACAAATCATTCGGAACTATCAAAAGTATCCTGGTTATAAAATTCACCCGTGTCCTGACCGCTCGCCAAGTAGCCTGGAATGTGGCTAGCTCTACTTCCGTACTGATAGTAGCATAGCCGGGCGTTTCAAGACCTGGAGTCGTACGCGTCGCCAAGTCACCGTGGGCAAGGTGGTTAAGTCCTCGAATAATGATGTCTACGTACTAAAAACCGGTTAATAGAATCTCTACAACAACGTGTCCGGTCGCGCACGCTTGTTCTGATTCTTAAGTTAAGTCGCAGGCTATGTAGTGGGGGTCCATGTTAAGTCATAGAACCTACGGTGGGCACTGCCTCAGATACATAGTCGAAATAGTTGTTGTCAGCCCTCTTGGCTCGCGCAGTATAGGGCATTCGCGGAGTTTGCTCTCACGCCCTGACCTCTTACACGGTCTACGTGCTTGACGTCGCATTGCGGGTTAGTGGAAAAAGGGTGCGAGCTAATTAATTACCACGATTCCATTGAATTGGCTACGGGCATTAGGATAGCGCGTGGGGACACTCCGTATTGTAGGTTAACTGGCGTGATGCTGTGTTTGTCGCGAAAAGCACTTATGTTTACCACTGGCAGCATGCTGTTACGCCCGGAAGGCCGGGAGAGGTCCTGTGTAAAGTCTGGCTAAAAAATTTGTTGTAGCTCGCCCTTTGTGTCTGGATAGCGGAGCGCAGTTCAGGGCTCCAATAAAATTGAAGCGTCTTAGCCTAAATTTGGTAATGGGAAGTAAATCGTCACCGGGGGGGAATATGTGAGAAAGAAGCAACGAGTAGGACTGACAAATCCTACAAATCCAACAGTCCTTTGTGGGTGCACTGATAGACCCTACCATAAATCGGTAAGGACTGAACGAAGGTTATCTCCAGGGTCTAGGTGGATGCGTCCGCACGGGGGCTGTCCAGAACAAAATTCATAAACGTAAGGCAACGCACTCCAATCAAATTGCACCGTAGTAGCTCGAGAGACGTCGCGTGTTGTTACTCATTTCGCAATACGTCGTCCATCACTTAAGAATCAATGCGTCGGTATCGGACGCATCACCCGTGTCGCCTGACGCATCGGCCTAGGATGACAAGAGAAGTCAAGTCACAGCCTGCAGCAGCTTCCAAGGGCGTGTATTTTATGCCAGCATTTAGGTGATGACGGGAAAGTGCCTGTATTTATGAGAGGTGAGGTGACGCGTTGTCAATTACAACCTAAAGATCGGCGAAGTATAGGGTCGCCGTAGTCGCGTTCTGGGACCCCAGTACCCGTAGTTAGTCTATTTACCGGCAGTGGCGCAATGTTTGGGCTACGGACGGTTCGGCATACGAATTAAGCATAACTCAGGTATTAGGGGGCGTTGCGCGCCTTCAAAGTTGCGTCCAGCCTAAAATAGGGGGAAACGCTCCAGCTCGAGGCCAGTCGCGCCAGGCCTAACCGCGTAGATACACTATGATTAATGGCACTCTTAGACGATCTTAGATCTATGCACATTACCCAGGGCTTTGCTACAGATTCCTCGTCACACTAATTTGCCGGATTAACTTCCCATTATCGCAAAGTTTACTATTGCTGGTTACAGTAAGACCCGCCGGAACCACAACAGTCTTGCCCGAGCGTATGTAATGGCTGACTGTCCAGGGACCATGCCAAAAAAAAACATTCGTAGTGCGCCATTTAAGACATACTCTACTGAATTGCTAACCCATGGGAGGCGTATGATGTTCTGCGCTGTCTTAAATGTACCCGAGGTGGAGCCTCATGGACTTAGAGATTAACGTTGGCCAAGCGTTAGGGTAAAACTATCGCAGCGCGCACATGGCGGTAGCTCCAAGTTCCGTTGTTCTGGTGAAAAGCGAACCTCGGACGAGAACCAAGTGGCGCTTAAGGAAGCCCCCAGTGTTGCGACGTTTATGCCCCGATGAGGTAAGCCGTACTGCCAGGCAAGAAGCACCGTCCGCTTCTTATCTCCGGAGCCGTTTGCTCTACGTCTCGTCAGCCGCACCATGGGACACGTTTGTACCGGAGTAGTTTAGTCCGCTGCAAATCTAGACTAGAAGTCCCCTATCTATATAGAACTCAGCCTAGATCAAATCGAGGACTTTCTACGCGCCATCGTTAGCGTTCTCTGAAAAGAGCTCGATGACATGGTAAACCAATAGGGGTATGAAGCGTTTCACAATGTGTGCCTATAGCGATTAGTAGGTTTAACGGATGTGACTCTGCTGGCAACTGACTGTCGCTTTTTATTACAGGAGGTTCATTGGGTTCCCAGTCGCCGGCATACTATCCCCTATTGCGAGGGATCTATCCTCGGCACGAGATTAGCTAAGCTCTCGCATTTGCGTACACCATGATGATTCACGAAACTCCACTTATCGTCTCTCCTCAATAAAATCCCTCGCACGAGATACTCTACCCCGAAGTTTAATCCGGAGTTCTGCCTCAACCCTGACGTATAGCTCCACCTAGCACATGACCTTAAAATACGCAGGTCGGTCGGGCAATTCTAACTTTCGTAAAGGCGAACCCGAATTCGGGCGCCAGGACACCTCTCGCGCGCGCCTGATCAGCGATTGCCGTAATATTAGGGAGCGAGATCCGGACCAGACACCGCCATATATGGGGGATATAATCCAAGTCAGTAAACCTACCGAGAACTATCCAACTGAGCATCTCATGTGACGATCAAGCACCTGACCATACAACGCTCCGTTTGGCTGTAAAAATTGGCGCTTAGTAATCAGACCCCACCAATCCGCGGTTGTTGGTCTCAGTTATCATGATAGGTTGTATGGAGCCTCACTAAGCCCTAAGCGTACGTGTCGCTTTGTGACACACTGGTGCCTAGGCCTCCACTAATCGTCACGTTTTCATGAAGAGTGAGCCCGCAGTCTTATTTATTTCTCGACGAAATTGATGCCAGAGCTCCTGTTGACTGTTCAATAAATCGATCGTGAAGTATTGAATGAACCTGGTGTGCACGGCTTCAGTGACATTGCTATCGAGAGCCAGACCGGTTAGCGCGAGGGATAACTCAAACTACAGCGACTACCATCCTATGCCGTCCACAGGACCTACGAAAGGCAACCTAGACAGACCCACCTAACGTGATAGACCAAGTCTACCTTCTCGTAAATTCGGACGTTCAACTTGGAGGTCCAGCGGTTCTCGTATTTAAAGTATGTAGGCCGGTTGGCTCGCTGAAATACGAGGCTTAATCGTCGAATATTCAGTGGGAGATTGCAATATTGTAAATTCGACCTTGAAAGCACGTGCGAGCCTCCCGATAGGTTCCGGCTTCTAGCCGCTAACTAACCACAGTTTGGGACTCACTGTTCTGCCCCAATATGTTCAGTTCTCGCGACTCTGCAGGAGGATATCAGGCGATCTTCCGACGCATTAGTAATTCGCTAACACGACTTATACTTTATATTGTACCTCTCACTTGTTGCATCTGGAGACAAACGTGTTTTAGCTTGCTTCAATCTGTGAAGCCACTTATGACGGAAGTTTGGCAAAGAGATGTGCTCATGCACGAGCGAGGGGTGTCTTTAGGTTCTATCGTTAGGACCTTAGTTGCACGAACGATCGTTTCTTTTGTAACGCTGCATGGGAGCCTCAGGTACAGTCGCAGTAAACTTAGCCGTGGCAATGTAGTAGTATCCAGGAAAGAATAGAGCTATGAGGGCTGTCCCCTCTTATAATGCAAGCACTTCTGCCGATAATGGGGCATATGGTCCGTTAACTTACAGACCGTTGGGACCAGATGGAGAGTGTCGGATCGATTGCCACGATTGGATAGTCCGGCCAGGCTGCGTCTGCCATGACGGAACGACGCATTAAAGCAAGAGTCGACCATCTTGTCGCGTCCCTCACGTTCCCGGGCGGGCACCTTCTCTGTTCACTAGCCGCCACTCTAGGTTCTTGAATTAACGCACAGAGACCCTGCTAGTTATTTAAATCGGCAAACATAACATTGGAACGTGGTTACACAATTCTCGTTAAATAGCGTGTGGGACGAAAAGGCAGTCTGTAACATTGGCACGTCGGCTACTGCACTTCTACTGTGGTGAGTAGCCAATTGTAAACATTGCCTATTGCCCACATAGTTTTATATGGCGAGGTAATCGCGCCCGTATACAAGTACATTGAGTCCACGGACCTTAAACTTATGGTACTTGTGGGGGGGTGACTAGCTAGCTTTCTGTAGTACATATGCGTCCCGTGGAATCTTCTATCTAGCTGCAAAGCTATACACCGTGATGCATGGGCAAAAATTTAGCATGATTGACAGGCACGCAAGAATGGTATGCCGGAGTTTCTTGAGGTGGCACTCGCAAGTTGATTCAATGTTCTTGAGTCCTGTCAGTGGTGGATCAGTGGACTGACTTTAGACACTACGATGGCATTGTCAAAGTACATGTCGCTGGTGGCCCCTGGTCAATTGCCGCCCATCCTTGTTCCTAGTTCTTAGTATCACAAGACTGCTGTAGGAAGAAGTTAGACGAGAACCTGAAAGATGTAGTTGTCTGTAAAGGCTTTTCCCTACCCGAGCTGTCGAATAGAAACGCTAAAGTAGAAGTACTTATGCGCTATGAGAGTGGGAGCCCGCGCACAAGGCCCTCCCGGCGGGTCCCGGTAAAAGCGGATATCAAGGACACAGTAATCTTCAAGACGTATCCATCACGAGCGTTTTAAGTGAACGTCCCCTTACTCTTGCTGTCACTAACTAGGTGAGACGTTTCGCAATATCCGGGGGGATCGGCTATCTTGATGCGATGAGCGTTAAGTGGAATGTTACCCCCCTGATCCTATAGGAAATGCCCTATGACATTCCCCGATTCATCATGCTCATCGTTACCCATCACCTCTTCAATCATACTGCTTCGGGATTATTGAACTGCGGGCACATCACAGGAAACCTACACTTGGTTAGGGCGACCCTACAAATCTCTTCTTTTGCTAGCGTTTCTCAGGAGATTATAAGTATGACTATACCCCGCCTTTGCAGACACGGCTTATAGTCCAGGTTTTGGATATAGTTGTCTTGGTGGTGTAGACTGCGGGCCACATTACGACAGCCCATGTTCTTATAACCCTGAAATTACCGACACAATGCGGCCGGCCCGACAGAGGCAATCCGCTTCGTACCACGAGCTGCCGCCCAAACGTTGCTTTTTGGTCCTATGCAGGAAATCACATTCCGCACGGCGCGACGCTATGCGGGTGCCTGGAACGTAGTATCCTCAGATACCACTGTAGGTATTGGATCCTCACACGTGGAGTGGTTCGTTATAAGTGTACATCCATTCTACTCCGCTAATGAGACTCATGAGGAACAACCGACGAAGGTGTTCGCAACCGCCCGTTTCTGGATTGAGAGCGGGGTTATATAGAGGTTTGCCGTACGCAGTTCCCGACAGAGTATTAATTCCCCAATAGCGCGATTCCCCAATATCGGTCGAAGAGCCCGGCTTCCTGACACGAACCCAAAGGAGCTCGGGCGTTTGCGAACACCATGTTGGTGTACGCTACTTTCCCAAGGCTCTAGCTGCCCGGCAGCAGAAAGTATACCGTTCTATGATGTAACTCCTCACAAGTGTCCGCCGATGAACCGCACCGCAAGCAGAACTTATAGGAACTTGCAATGACAGGTGTGCGCTCCTTTAACGTTTCTGTTCCCAAGGGCACGCATCTTAGTAGCCCATATGCTCATTGGAACCGACCGTGGTCTTGCTAATGTTTGATCACGTGAAAGCCGTGCTCTCAATATTCGAGATAGTATATCAGCCAATCCCAGGGCGCGGTCAATAACCTTACTTGTCGCTTAATATGAGAGGTCTGCTAGGCAAAGCATAACCACAACTACGTTGCGCTCTATAGACCCCGCAGAAGATTGCGGCGGAAATTTTGTTTTTAACAACCACTTGAGATTAGGCATTGTGCACAAGTATTCTAGGGCGGGCAACGCGTCGCGCGCATTTGAATTCTAATCCCTGAAGGGGGGTGGCGGGACTTCGAGTTGAATAGCTGATACGGCACAGGCGGACTTGTGCTTACAGCGGACGACAGAGTGCTTTGGCTTGATGCGCACGAGACCAGGCTTGGGACACCCACTAAGGTATATTTCTGATTCTGCATCGTCCGAAAGGCCGAGCGTACTGCCGCAACATAGCCTCCAAAGGCTGGCCAAAGCTTAAATGAGAGTGGGAATCCTAAAACTTAATTGATTCTTGGGCGGGCCAACATGGGCGATTAATCACGAATACCCTTATGGCAGGTAGCCATTCGTCATTGGTCATCTACAACACCCGTAGAATAACTGCGCCGAGGCGGAAATTACACTCGACCCTCGGAACGCTCACTGTTCTGGGACTACCACGCATGTCGTCACCAACCCTCGCATGCTCATTCAGGAATTCTGACCTCAGACAATCATGGTCGCGGTGGTTTGCGAGCCACATGAATTAAGCGTGCTTTTAGAGCTCTAGAGAACGAGGACAGACTAGATATTGGGCGGGACGGTGGGATCGATTGATCAGAACATAGCGACCTCATAAGAGGATAGCCGGGTGTTGAGGACGTGCGTTTTGCTTATCGTAATCGCACACGCTTATAGCACAGGGCCCATGCGTTAGTCTGCACATAAGTGACGAAGGGCGATCTCCGCACGGACATCAGCTGGGTCGGAATCTGGGATATTGGTAAAGTATTATTCTCCAACATGTCAACCAAGATCGCTTTATATTGCATGTTCAGGTGGGTGCGTGGAGTATACCTATGTACCGGACGAAGTCGTTTTGCCGAAGTTTGATCCTGTTCCCGACATTCAGGACGGCAGTCGCTTATTACGTAGCAATATAGAATGGCGACAGCTGGTCTCCGCTAATTAGACACTTAACGTAAGTACGTATTGTCCACAAAGAGCTGAAGGTAGGGTGGTATCTCTTCAAGACCGCTCACAGATTACATCGGGTTTCAGAGGCTATCGCCATCTATTTAGACAGACTACTTTCGATATCCATCTGACAGACAAAGGGCAGGGCACTGGTGTGCCGAAAGCCTCTCTATCTTCGTGGGCAATGAGGTCAGCCAAGAGTCGTCTTTCGGTCTTAGTTGACAGCATCCAAGCTATCAGTCTACCTTTTGTACAACGGACATCCAATTGTGCGGTCAGACCTCCACAGTCGGGAACAGACGGCCTACGCAGGATGCCTTCTTGGCTCGGAAGGCGGACAAGGTGGGTGGCTCCACAAATAGACGGGCTGCCACTCATATGTGGTGCCATTATTGTGCTGGCAGCACTCCACCCCTTTTAGTCGTTGAACACGAGGTGCTGCGGCTCAGTAAAATACTTTCAAGAAATGCTATTCCATTTCGTCCACGGGTATTTTAGTGCAGGAGACTAAGAGCTAGTGTCGACGTCGTAGCACAGTGATGGGCTACTCCATATTGGCAATTTTCGCATGTAACCTAACTTGAGCGAGATTCAATTCAGAACCGGGCGAATCTTTGCACTATCCGGTGTGCCCGTAGAAATGACAGACTTACGCATTGGTAAGTCAATCCTTTGGAACACGGTCGTACCGGCAACCTGTGAATTTTGTAGTAAATGTATGTTAGCGGTTAGATACCGATCCGAGACTTTCCCTTCGTTAATTTTAGAGCCTCCTGTCACATATCACTTGAGACAGAAATCGCCGTACGCCCAGCTGCCCTTACCTATTGCCACTACGTCGTAGGCTAGGTGTGTAGGACGGGCCGCCCGCCTTTTATACGACGGAACGTAACTTGCGCTG"
val matches = approxMatch(seq, input, 6)

[36mseq[39m: [32mString[39m = [32m"AACTTGCGCTG"[39m
[36minput[39m: [32mString[39m = [32m"GCAGGTAGAACAACGCCAATGCTGGCCACAAACTTCTATCAAAACAAAAGACTGGTAGAAGCCTAGCGGAGCTTCCTCTCTACAACGAGCCTGCCTACCTTTACAATGAACAGGGGTACTCGAGCGGGAGCTTCGCAAAGTTCCATCAAACATATTTCTCTGTGCACGGTACGCACGCTCAATGGTTGTTGAAGTTTGGCCATACCAAGTCCATTTCCAACGAGCACTATCCGTGGTGGATACAGCTGGTGGTCGGCTTGTAAGACCGGAGGTAGAAAAGGCCTTCCACTAGCAGAATTGAGTAAACGATGGGAATATCTATAGTGCTTTGGAAGCAGGGGTGACAAGAGTGTCCTTACATCGTAATGGTGGCGCCCTCCGACGGTAGTGATCAATGGCATCCCCGCTCTATCCGCGACTGGGCAGGCCCAGGTGTTTAGAGTTACTGGGTTTACTAGCGCTAACGTTACCACGTCGGACACAATATTGTGACGGACCCGGCCAGCACTAACTTAAATGAACGGGTGCCCATGATTATCTTTACAAGGCTGTACCCTCACTATGGAACGACGGGACTCACACCAAATTACACACCTACGATCCTCCTAGACGGGGTACCAGAGCTTCATGCTTCGTCTCCTGACACACTCCTGCATTGTGCATAGCCACAGTTCTCGCCTCTTAAAACTTTACGATGTAGCATCGCCCCGGCGGGGGAGATAGCGAATCTGAACGGCGTCTTGACTTTGTAGCCGTCACTGAGGAGTCTAATGCCCTTTAAGAGGACCGCTAGGGGAACCTACTTGAGTCCCGAAAGCGCGGCAAAAAATCTGACTGATTTGCGCCAGCCCGCAGTAGTGGTTAAACTGAATCCATATCGGCATTGGCGGGGGCCGAAAA

In [51]:
println(matches.mkString(" "))

13 29 30 58 60 63 69 83 87 97 102 108 116 122 127 136 137 151 153 158 165 169 173 180 191 192 211 218 224 225 227 237 240 243 263 275 280 286 294 300 318 320 324 332 346 352 357 363 366 373 398 400 410 415 416 421 439 443 452 458 462 463 483 490 494 499 509 513 519 536 551 553 573 583 591 615 624 628 631 645 647 652 654 669 685 686 694 699 705 717 725 731 737 750 767 781 796 800 811 813 826 836 846 852 858 864 875 879 881 885 902 915 925 929 934 940 943 949 953 967 973 983 988 997 1011 1036 1046 1052 1058 1066 1072 1075 1081 1092 1093 1104 1108 1129 1146 1152 1158 1166 1167 1171 1181 1184 1187 1190 1192 1195 1196 1209 1218 1223 1234 1245 1259 1274 1282 1293 1300 1302 1304 1317 1324 1328 1330 1334 1347 1349 1354 1358 1378 1388 1390 1402 1407 1409 1423 1442 1470 1471 1478 1490 1497 1511 1536 1541 1551 1557 1571 1590 1596 1601 1602 1614 1615 1620 1633 1634 1637 1639 1640 1660 1668 1683 1685 1695 1714 1719 1732 1745 1772 1774 1786 1791 1794 1800 1820 1822 1825 1826 1839 1880 1888 1904 1920

4565 14576 14582 14588 14590 14595 14597 14611 14624 14627 14629 14637 14656 14657 14671 14672 14677 14686 14691 14697 14705 14711 14726 14728 14732 14733 14738 14741 14749 14754 14755 14759 14766 14782 14785 14790 14797 14817 14826 14831 14835 14850 14856 14857 14876 14877 14882 14888 14902 14904 14936 14964 14977 14978 14994 15000 15014 15019 15025 15031 15049 15053 15054 15059 15076 15092 15094 15096 15106 15117 15122 15123 15126 15128 15135 15151 15161 15167 15179 15189 15201 15225 15238 15243 15258 15262 15271 15284 15289 15294 15305 15312 15319 15331 15336 15337 15348 15379 15390 15392 15402 15406 15409 15418 15442 15450 15453 15465 15471 15494 15503 15508 15515 15521 15525 15530 15535 15552 15557 15577 15583 15588 15589 15608 15612 15621 15625 15626 15641 15654 15667 15675 15690 15692 15697 15703 15705 15717 15726 15727 15730 15739 15759 15787 15799 15814 15820 15822 15834 15850 15857 15861 15866 15874 15875 15895 15912 15915 15921 15931 15934 15945 15983 15986 15988 15989 15992

In [52]:
matches.length

[36mres51[39m: [32mInt[39m = [32m1975[39m