# Naive, BM: Comparing

Implementing versions of the naive exact matching and Boyer-Moore algorithms that also 
count and return 

-  character comparisons performed

-  alignments tried

We will consider these measures as approximately how efficient the algorithms are.

---
### Boyer Moore Counts

In [None]:
from bm_preproc import BoyerMoore

from bm_with_counts import boyer_moore_with_counts

from geneReader import geneReader

In [None]:
filename = 'chr1.GRCh38.excerpt.fasta'

data = open ( filename, 'r' )

reads = geneReader ( filename )

data.close ()

In [None]:
t = reads

p = 'GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG'

uppercase_alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

p_bm = BoyerMoore ( p, uppercase_alphabet )

print ( 'Boyer Moore occurrences:', boyer_moore_with_counts ( p, p_bm, t ) [ 0 ] )

print ( 'Boyer Moore alignments:', boyer_moore_with_counts ( p, p_bm, t ) [ 1 ] )

print ( 'Boyer Moore character comparisons:', boyer_moore_with_counts ( p, p_bm, t ) [ 2 ] )

---
### Naive Exact Matching Counts

In [None]:
from naive_with_counts import naive_with_counts

from geneReader import geneReader

In [None]:
filename = 'chr1.GRCh38.excerpt.fasta'

data = open ( filename, 'r' )

reads = geneReader ( filename )

data.close ()

In [None]:
t = reads

p = 'GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG'

print ( 'Naive exact matching occurrences:', naive_with_counts ( p, t ) [ 0 ] )

print ( 'Naive exact matching alignments:', naive_with_counts ( p, t ) [ 1 ] )

print ( 'Naive exact matching character comparisons:', naive_with_counts ( p, t ) [ 2 ] )

---
### Naive Allowing Up to Two Mismatches

In [None]:
from naive_mismatches import naive_mismatches

from geneReader import geneReader

In [None]:
filename = 'chr1.GRCh38.excerpt.fasta'

data = open ( filename, 'r' )

reads = geneReader ( filename )

data.close ()

In [None]:
p = 'GGCGCGGTGGCTCACGCCTGTAAT'

t = reads
    
print ( 'Occurences using naive matching with up to 2 mismatches:', len ( naive_mismatches ( p, t ) ) )

---

### Approximate Matching an Indexed Object, Allowing 2 Mismatches

In [None]:
from kmer_index import Index

from approximate_match_idx import approximate_match_idx

In [None]:
from geneReader import geneReader

filename = 'chr1.GRCh38.excerpt.fasta'

data = open ( filename, 'r' )

reads = geneReader ( filename )

data.close ()

In [None]:
p = 'GGCGCGGTGGCTCACGCCTGTAAT'

t = reads

approximate_match_idx ( p, t, 2 )

---
### Approximate Matching with a Boyer Moore Object, Up to 2 Mismatches

In [None]:
from approximate_match import approximate_match

from geneReader import geneReader

In [None]:
filename = 'chr1.GRCh38.excerpt.fasta'

data = open ( filename, 'r' )

reads = geneReader ( filename )

data.close ()

In [None]:
p = 'GGCGCGGTGGCTCACGCCTGTAAT'

t = reads

print ( 'Matches using Approximate Matching with Boyer Moore Object:', len ( approximate_match ( p, t, 2 ) ) )

---

### Occurences Using a SubseqIndex Object, Up to 2 Mismatches

In [None]:
from SubseqIndex import SubseqIndex

from approximate_match_subseq import approximate_match_subseq

from geneReader import geneReader

In [None]:
filename = 'chr1.GRCh38.excerpt.fasta'

data = open ( filename, 'r' )

reads = geneReader ( filename )

data.close ()

In [None]:
p = 'GGCGCGGTGGCTCACGCCTGTAAT'

t = reads

n = 2

ival = 3

print ( 'Matches with an indexed object using subsequences:', approximate_match_subseq ( p, t, n, ival ) [ 1 ] )