#### Author: Ceyda Durmaz
Date: November 15, 2020

In [1]:
# Part 1: Wildcard KMP Algorithm 

# Creates prefix table for pattern input and outputs array 
def createLPS(pat):
    
    # Create lps 
    lps = [0]*len(pat)

    # Define variables 
    i = 0
    j = 1
    lps[0] = 0

    # Create prefix table
    while j < len(pat):
        if pat[i] == pat[j]:
            lps[j] = i+1
            i += 1
            j += 1
        else:
            if i == 0: 
                lps[j] = 0
                j += 1
            else: 
                i = lps[i-1]
    return lps


# Implementation of KMP algorithm accepting wildcards
# Note: To resolve ambiguity of leading wildcards, leading 
#       wildcards are treated as empty strings
def kmpSearch(txt, pat):
    
    # Initialize variables for length
    m = len(pat)
    n = len(txt)
    
    # Initialize variables to keep track of indices 
    first_match = None
    wild_text = None
    
    # Initialize counters 
    j = 0
    i = 0
    
    # Create prefix table 
    lps = createLPS(pat)
    
    # Run through KMP 
    while i < n: 
        
        # Match
        if pat[j] == txt[i]:
            i += 1
            j += 1
            if first_match == None: 
                first_match = i-1
          
        # Pattern found! 
        if j == m:
            return first_match
        
        # If mismatch 
        if i < n and pat[j] != txt[i]: 
            # Mismatch by character
            if pat[j] != "*":
                first_match = None
                if j != 0: 
                    j = lps[j-1]
                else: 
                    i +=1
            # Mismatch by wildcard
            else:
                # Set index in text when '*' encountered
                if wild_text == None: 
                    wild_text = i
                    
                # If pat is '*'
                if m == 1: 
                    return i
                # Remove trailing '*'
                elif pat[m-1] == "*": 
                    pat = pat[0:m-1]
                    lps = lps[0:m-1]
                    if j == m-1: 
                        j -= 1
                        i -= 1
                        m -= 1
                    else: 
                        m -= 1
                # Remove leading '*'
                elif pat[0] == "*": 
                    pat = pat[1:m]
                    lps = lps[1:m]
                    m -= 1
                # If string after '*' has no match 
                elif i+1 == n:
                    j = 0
                    i = wild_text
                    wild_text = None
                # If '*' is followed by another '*'
                elif pat[j+1] == "*":
                    pat1 = pat[0:j+1]
                    pat2 = pat[j+2:m]
                    pat = pat1 + pat2
                    lps1 = lps[0:j+1]
                    lps2 = lps[j+2:m]
                    lps = lps1 + lps2
                    m -=1
                # '*' is empty
                elif pat[j+1] == txt[i]: 
                    j += 1
                # '*' is not empty 
                elif pat[j+1] == txt[i+1]:
                    j += 1
                    i += 1
                else: 
                    i += 1

print("TEST CASES")
print("1: " + str(kmpSearch("WildKMP", "KMP")))
print("2: " + str(kmpSearch("foobar", "f**")))
print("3: " + str(kmpSearch("foocar", "foo*ar")))
print("4: " + str(kmpSearch("pattern in text", "t*rn")))
print("5: " + str(kmpSearch("pattern in text", "te*t")))
print("6: " + str(kmpSearch("pattern not in text", "t*st"))) 
print("7: " + str(kmpSearch("match on anything", "***"))) 
print("8: " + str(kmpSearch("find the first three letter word", " **"))) 
print("9: " + str(kmpSearch("find the first three letter word", "*** three"))) 
print("10: " + str(kmpSearch("find the first three letter word", "e *tter")))
print("11: " + str(kmpSearch("find the first three letter word", " *")))
print("12: " + str(kmpSearch("ilovealgorithmscourse", "algorithms")))
print("13: " + str(kmpSearch("ilovealgorithmscourse", "al*thms")))
print("14: " + str(kmpSearch("ilovealgorithmscourse", "al*thm*s")))
print("15: " + str(kmpSearch("ilovealgorithmscourse", "l*ve")))
print("16: " + str(kmpSearch("ilovealgorithmscourse", "lo*al*thm*s")))
print("17: " + str(kmpSearch("ilovealgorithmscourse", "*")))
print("18: " + str(kmpSearch("ilovealgorithmscourse", "al*kkkk")))
print("19: " + str(kmpSearch("ilovealgorithmscourse", "al*thms***")))
print("20: " + str(kmpSearch("ilovealgorithmscourse", "****al*****"))) 
print("21: " + str(kmpSearch("ilovealgorithmscourse", "a****l")))
print("22: " + str(kmpSearch("ilovealgorithmscourse", "***i****e***")))

TEST CASES
1: 4
2: 0
3: 0
4: 2
5: 3
6: None
7: 0
8: 4
9: 14
10: 19
11: 4
12: 5
13: 5
14: 5
15: 1
16: 1
17: 0
18: None
19: 5
20: 5
21: 5
22: 0


In [2]:
# Part 2: Cyclic Rotation Alignment
# Source for suffixArray: https://nbviewer.jupyter.org/github/BenLangmead/comp-genomics-class/blob/master/notebooks/CG_BWT_SimpleBuild.ipynb

# Generates suffix array for string (s)
def suffixArray(s):
    
    satups = sorted([(s[i:], i) for i in range(0, len(s))])
    return map(lambda x: x[1], satups)

# Does binary search of suffix array (sa) for a pattern (t) in string (s)
# Throws IndexError if pattern not found in string. 
def binarySearch(sa, t, s):
    
    # Initialize low and high for binary search
    lo = 0
    hi = len(sa)
    
    # Binary search
    while lo < hi:
        
        # Get middle index of suffix array
        mid = (lo+hi)//2
        
        # Compare s and t
        if s[sa[mid]:]  == t:
            sidx = sa[mid]
            s2 = s[sidx:] + s[0:sidx]
            print("Cycle of s: " + str(s2))
            print("Index in s: " + str(sa[mid]))
            return
        # Checks if first char of s < first char of t
        elif s[sa[mid]:] < t: 
            lo = mid+1
        else: 
            hi = mid
    
    # Checks if no alignment found
    if not s[sa[lo]:].startswith(t):
        print("No match found.")
        return
    
    # Get index of first match
    sidx = sa[lo]
    s2 = s[sidx:] + s[0:sidx]
    print("Cycle of s: " + str(s2))
    print("Index in s: " + str(sa[lo]))
    return

def cyclicSearch(t, s): 
    
    sa = list(suffixArray(s))
    return(binarySearch(sa, t, s))

print("TEST CASES\n")
t = "gest"
s = "suggestingtestingtexting$"
print("---- S = " + s + " ---- t = " + t + " ----")
cyclicSearch(t,s)
print("\n")
t = "test"
s = "suggestingtestingtexting$"
print("---- S = " + s + " ---- t = " + t + " ----")
cyclicSearch(t,s)
print("\n")
t = "text"
s = "suggestingtestingtexting$"
print("---- S = " + s + " ---- t = " + t + " ----")
cyclicSearch(t,s)
print("\n")
t = "ing"
s = "suggestingtestingtexting$"
print("---- S = " + s + " ---- t = " + t + " ----")
cyclicSearch(t,s)
print("\n")
t = "k"
s = "suggestingtestingtexting$"
print("---- S = " + s + " ---- t = " + t + " ----")
cyclicSearch(t,s)

TEST CASES

---- S = suggestingtestingtexting$ ---- t = gest ----
Cycle of s: gestingtestingtexting$sug
Index in s: 3


---- S = suggestingtestingtexting$ ---- t = test ----
Cycle of s: testingtexting$suggesting
Index in s: 10


---- S = suggestingtestingtexting$ ---- t = text ----
Cycle of s: texting$suggestingtesting
Index in s: 17


---- S = suggestingtestingtexting$ ---- t = ing ----
Cycle of s: ing$suggestingtestingtext
Index in s: 21


---- S = suggestingtestingtexting$ ---- t = k ----
No match found.


In [3]:
# Algorithm Source: http://www.cs.jhu.edu/~langmea/resources/lecture_notes/10_bwt_and_fm_index_v2.pdf

class Checkpoints(object):
    """ Creates an object of checkpoints and tally of ranks and occurrences 
    of character c in BWT (L column). """
    
    def __init__(self, bw, cInt = 5):
        self.cps = {}        
        self.cInt = cInt 
        tally = {}           
        for c in bw:
            if c not in tally:
                tally[c] = 0
                self.cps[c] = []
        for i, c in enumerate(bw):
            tally[c] += 1 
            if i % cInt == 0:
                for c in tally.keys():
                    self.cps[c].append(tally[c])
    
    def rank(self, bw, c, row):
        """ Get rank of character c in BWT. Rank is the number of 
        times c has occurred in BWT at row. """
        if row < 0 or c not in self.cps:
            return 0
        
        i, nocc = row, 0
        while (i % self.cInt) != 0:
            if bw[i] == c:
                nocc += 1
            i -= 1
        return self.cps[c][i // self.cInt] + nocc
    
class FMindex():
    """ Creates an FMindex for a reference that can be used to query. """
    
    @staticmethod
    def suffixArray(s):
        """ Get suffix array of s. """
        satups = sorted([(s[i:], i) for i in range(len(s))])
        return list(map(lambda x: x[1], satups)) 
    
    @staticmethod
    def bwt(t, sa = None): 
        """Get BWA transform of t."""
        bw = []
        if sa is None: 
            sa = suffixArray(t)
        
        for si in sa: 
            if si == 0: 
                bw.append('$')
            else: 
                bw.append(t[si-1])
        return ''.join(bw)
    
    @staticmethod
    def downsampleSA(sa, n=4):
        """ Remove some element out of n elements of suffix array to 
        save space. Will keep offsets 0, n, 2n, etc with respect to t. """
        ssa = {}
        for i, suf in enumerate(sa):
            if suf % n == 0:
                ssa[i] = suf
        return ssa
    
    def __init__(self, t, cInt = 5, ssaInt = 5):
        """ Initializes SA, BWT, down sampled SA, FM Index Checkpoints, 
        length of BWT, dictionary of number occurrences of each character 
        in t, and indices of first occurrence of all characters in F 
        column. """
        
        # Append '$' to t if doesn't already exist
        if t[-1] != '$':
            t += '$' 
        self.t = t
        
        # Create SA, BWT, and FM Index Checkpoints
        sa = self.suffixArray(t)
        self.bwt = self.bwt(t, sa)
        self.ssa = self.downsampleSA(sa, ssaInt)
        self.slen = len(self.bwt)
        self.cps = Checkpoints(self.bwt, cInt)
        
        # Get # of occurrences of each character
        tots = dict()
        for c in self.bwt:
            tots[c] = tots.get(c, 0) + 1
        
        # For all characters in F column, store index of first occurence 
        self.first = {}
        totc = 0
        for c, count in sorted(tots.items()):
            self.first[c] = totc
            totc += count
        
        # Mismatch variables 
        self.mismatch = 0 # Mismatch counter
        self.misType = None # Mismatch type  
        self.bases = ["A", "C", "T", "G"] # DNA Nucleotides 
    
    def stepLeft(self, row): 
        """ Moves up one column in L column and returns idx. """
        c = self.bwt[row]
        return self.cps.rank(self.bwt, c, row-1) + self.first[c]
    
    def resolveIdx(self, row):
        """ Find index in t where occurrence occurs starting from row. """
        nsteps = 0
        while row not in self.ssa:
            row = self.stepLeft(row)
            nsteps += 1
        return self.ssa[row] + nsteps
         
    def getRange(self, p):
        """ Get range of BWT rows with p as a prefix. """
        l, r = 0, self.slen - 1 
        
        # Iterate through p from right to left 
        for i in range(len(p)-1, -1, -1): 
            l = self.cps.rank(self.bwt, p[i], l-1) + self.first[p[i]]
            r = self.cps.rank(self.bwt, p[i], r)   + self.first[p[i]] - 1
            if r < l:
                
                self.mismatch += 1
                self.bases = ["A", "C", "T", "G"]
                
                if self.mismatch == 1:
                    if p[i] in self.bases: 
                        self.bases.remove(p[i])
                        
                    # INSERTION: check if removing mismatch aligns p
                    p_ins = p[0:i] + p[i+1:]
                    l,r = self.getRange(p_ins)
                    out = [ self.resolveIdx(x) for x in range(l, r) ]
                    if len(out) > 0:
                        self.mismatch = 0
                        self.misType = "INDEL"
                        print("\t----------READ HAS BASE DIFFERENCE----------")
                        print("\t    Insertion: Removed base '" + str(p[i]) + "' at position " + str(i))
                        print("\t    Adjusted read: " + p_ins)
                        return l, r
                    
                    # MISMATCH/DELETION: replace/add current base with A,C,T, or G 
                    for b in self.bases: 
                        
                        # Deletion: Base Addition
                        p_del = p[0:(i+1)] + str(b) + p[(i+1):]
                        l, r = self.getRange(p_del)
                        out = [ self.resolveIdx(x) for x in range(l, r) ]
                        if len(out) > 0: 
                            self.mismatch = 0
                            self.misType = "INDEL"
                            print("\t----------READ HAS BASE DIFFERENCE----------")
                            print("\t    Deletion: Base added '" + str(b) + "' at position " + str(i))
                            print("\t    Adjusted read: " + p_del)
                            return l, r
                        
                        # Mismatch: Base Replacement
                        p_rep = p[0:i] + str(b) + p[i+1:]
                        l, r = self.getRange(p_rep)
                        out = [ self.resolveIdx(x) for x in range(l, r) ]
                        if len(out) > 0:
                            self.mismatch = 0
                            self.misType = "MISMATCH"
                            print("\t----------READ HAS BASE DIFFERENCE----------")
                            print("\t    Mismatch: Replaced '" + str(p[i]) + "' with '" + str(b) + "' at position " + str(i))
                            print("\t    Adjusted read: " + p_rep)
                            return l, r
                    
                else: 
                    break 
        return l, r+1
    
    def getIdx(self, p):
        """ Gets starting indices of t where p aligns. """
        l, r = self.getRange(p)
        self.mismatch = 0
        starts = [ self.resolveIdx(x) for x in range(l, r) ]

        if len(starts) == 0: 
            print("")
            print("\t-----READ COULD NOT MAP-----")
            return str("")
        else: 
            starts = sorted(starts)
            if self.misType == "INDEL": 
                lenP = len(p) + 1
            else: 
                lenP = len(p)
            for s in starts: 
                end = s + lenP
                # Get substring of t
                tsub = self.t[s:end]
                print("")
                print("\tAlignment starting at " + str(s) + " and ending at " + str(s + len(p)))
                print("\tREAD: " + p)
                print("\tREF:  " + tsub)
            return str("") 


def alignReads(refPath, readsPath): 
    """ Prints to console the alignment status for all reads in a read file. 
    Accepts a path to the reference file 'refPath' and the path to the reads
    file 'readsPath' as strings. """
    
    # Read in reference file 
    refFile = open(refPath)
    ref = refFile.readlines()
    ref = ref[1]
    ref = ref.strip('\n')
    refFile.close()
    
    # Initialize reference for alignment 
    refFM = FMindex(ref)
    
    # Read in reads file 
    readsFile = open(readsPath)
    reads = readsFile.readlines()
    readsFile.close()
    
    # Align reads
    for r in range(0, len(reads), 2): 
        rId = reads[r]
        rStr = reads[r+1]
        rStr = rStr.strip()
        
        print("READ ID: " + str(rId[1:]))
        print("\tSEQUENCE: " + str(rStr) + "\n")
        print(refFM.getIdx(str(rStr)))
        
    return

# TEST ALIGNMENT ON TEST DATA 
alignReads(refPath="reference.fasta", readsPath="reads.fasta")


READ ID: 1_0

	SEQUENCE: CCTTAATTCCGAGATATGACAGTGGACAGTGGGGCCGGGTGGCATTTGTA


	-----READ COULD NOT MAP-----

READ ID: 2_4

	SEQUENCE: GCGCTGCAGTCGGATCACCAAGTTGACCTAACCTTCAGGTCACGAAAAAG


	Alignment starting at 144 and ending at 194
	READ: GCGCTGCAGTCGGATCACCAAGTTGACCTAACCTTCAGGTCACGAAAAAG
	REF:  GCGCTGCAGTCGGATCACCAAGTTGACCTAACCTTCAGGTCACGAAAAAG

READ ID: 3_0

	SEQUENCE: CTAGTGTTTGTGAGTAGCCTTAATTGCTTCTGCGCCGGAGACTTGCCCCT


	-----READ COULD NOT MAP-----

READ ID: 4_3

	SEQUENCE: TTCGTGCTTTTACCACTGAGGGATGATGTGCGCAAGGAAGGGCCCTTCAT

	----------READ HAS BASE DIFFERENCE----------
	    Mismatch: Replaced 'T' with 'G' at position 16
	    Adjusted read: TTCGTGCTTTTACCACGGAGGGATGATGTGCGCAAGGAAGGGCCCTTCAT

	Alignment starting at 563 and ending at 613
	READ: TTCGTGCTTTTACCACTGAGGGATGATGTGCGCAAGGAAGGGCCCTTCAT
	REF:  TTCGTGCTTTTACCACGGAGGGATGATGTGCGCAAGGAAGGGCCCTTCAT

READ ID: 5_1

	SEQUENCE: CCGTATGCTCAAGAATCATTGTTGCACACCCAATATAGGCGCCTCAGGCA


	-----READ COULD NOT MAP-----

READ ID: 6_0

	SEQUENCE:

	REF:  TTATCGGATGTCGCTTACTGTAAACCCATCACAGCACGCCTTAAGAAAGA

READ ID: 199_0

	SEQUENCE: TCTAGTGAAAGCTCCGGTCGTAAGCCTACGGTGATCTATGTTCAGGAGCT


	-----READ COULD NOT MAP-----

READ ID: 200_1

	SEQUENCE: TAGATTTCATTCCATAACAACGACCTTATTACCCACGCTCGTTGTGAGGG


	-----READ COULD NOT MAP-----

READ ID: 201_4

	SEQUENCE: AGAACAGGAACTGCTTCTGCGCCGGAGACTTGCCCCTTCCTGACCTTCTG


	Alignment starting at 226 and ending at 276
	READ: AGAACAGGAACTGCTTCTGCGCCGGAGACTTGCCCCTTCCTGACCTTCTG
	REF:  AGAACAGGAACTGCTTCTGCGCCGGAGACTTGCCCCTTCCTGACCTTCTG

READ ID: 202_3

	SEQUENCE: ACCTTCCGGTCACGAAAAAGCTCAGTTTGAGTGCCCAACACCCGGTTGCG

	----------READ HAS BASE DIFFERENCE----------
	    Mismatch: Replaced 'C' with 'A' at position 6
	    Adjusted read: ACCTTCAGGTCACGAAAAAGCTCAGTTTGAGTGCCCAACACCCGGTTGCG

	Alignment starting at 174 and ending at 224
	READ: ACCTTCCGGTCACGAAAAAGCTCAGTTTGAGTGCCCAACACCCGGTTGCG
	REF:  ACCTTCAGGTCACGAAAAAGCTCAGTTTGAGTGCCCAACACCCGGTTGCG

READ ID: 203_3

	SEQUENCE: TACAAAGTAGTCACGTTTGGACCGGCCCACGTCTTACGGG

	Alignment starting at 932 and ending at 982
	READ: ATCGACATGTCTACGGTGATCTATGTTCAGGAGCTTCAGACCCGCCGTCA
	REF:  AGCGACATGTCTACGGTGATCTATGTTCAGGAGCTTCAGACCCGCCGTCA

READ ID: 405_3

	SEQUENCE: GATATTAACGCGTATACCAACTCGCCTACTAGACATACCCTCAACGGATG

	----------READ HAS BASE DIFFERENCE----------
	    Mismatch: Replaced 'C' with 'A' at position 33
	    Adjusted read: GATATTAACGCGTATACCAACTCGCCTACTAGAAATACCCTCAACGGATG

	Alignment starting at 343 and ending at 393
	READ: GATATTAACGCGTATACCAACTCGCCTACTAGACATACCCTCAACGGATG
	REF:  GATATTAACGCGTATACCAACTCGCCTACTAGAAATACCCTCAACGGATG

READ ID: 406_0

	SEQUENCE: CCGGTTGCGCCAGAACAGGAACTGCCTCCGGTCGTAAGCTTACAAAGTAG


	-----READ COULD NOT MAP-----

READ ID: 407_0

	SEQUENCE: GATCTATGTTCAGGAGCTTCAGACCCAAATAAGACCATGACTCGAATTTG


	-----READ COULD NOT MAP-----

READ ID: 408_2

	SEQUENCE: GGGATGATGTGCGCAAGGAAGGGCCCTTCATCTTCTCTCTCTCTAGTGCA


	-----READ COULD NOT MAP-----

READ ID: 409_2

	SEQUENCE: TAACGCGTATACCAACTCTCCTACTAGAAATACCCTCAACGGATGCGCGG

	----------READ

	    Mismatch: Replaced 'A' with 'C' at position 26
	    Adjusted read: TTACAAAGTAGTCACGTGTGGACCGGCCCACGTCTTACGGGCAGAGAGGC

	Alignment starting at 648 and ending at 698
	READ: TTACAAAGTAGTCACGTGTGGACCGGACCACGTCTTACGGGCAGAGAGGC
	REF:  TTACAAAGTAGTCACGTGTGGACCGGCCCACGTCTTACGGGCAGAGAGGC

READ ID: 614_3

	SEQUENCE: GTCCGGGCGTCCATGTAGCATTACACTTACTACAGCAGACTTTTCATTAT

	----------READ HAS BASE DIFFERENCE----------
	    Mismatch: Replaced 'T' with 'G' at position 20
	    Adjusted read: GTCCGGGCGTCCATGTAGCAGTACACTTACTACAGCAGACTTTTCATTAT

	Alignment starting at 821 and ending at 871
	READ: GTCCGGGCGTCCATGTAGCATTACACTTACTACAGCAGACTTTTCATTAT
	REF:  GTCCGGGCGTCCATGTAGCAGTACACTTACTACAGCAGACTTTTCATTAT

READ ID: 615_0

	SEQUENCE: ATTAACGCGTATACCAACTCGCCTAAGAATGTTCGTGCTTTTACCACGGA


	-----READ COULD NOT MAP-----

READ ID: 616_4

	SEQUENCE: CATTTAAAGCGATATTAACGCGTATACCAACTCGCCTACTAGAAATACCC


	Alignment starting at 333 and ending at 383
	READ: CATTTAAAGCGATATTAACGCGTATACCAACTCGCCTACTAGAAATACCC
	REF:  CA


	-----READ COULD NOT MAP-----

READ ID: 812_2

	SEQUENCE: GTCTGTTCGCTAACACAGGTGTCTGCCTCAGAGGAGGTGCAAGTGTCTAA

	----------READ HAS BASE DIFFERENCE----------
	    Mismatch: Replaced 'T' with 'G' at position 6
	    Adjusted read: GTCTGTGCGCTAACACAGGTGTCTGCCTCAGAGGAGGTGCAAGTGTCTAA

	Alignment starting at 415 and ending at 465
	READ: GTCTGTTCGCTAACACAGGTGTCTGCCTCAGAGGAGGTGCAAGTGTCTAA
	REF:  GTCTGTGCGCTAACACAGGTGTCTGCCTCAGAGGAGGTGCAAGTGTCTAA

READ ID: 813_4

	SEQUENCE: TGCTTCTGCGCCGGAGACTTGCCCCTTCCTGACCTTCTGTTCTCCCTAAA


	Alignment starting at 237 and ending at 287
	READ: TGCTTCTGCGCCGGAGACTTGCCCCTTCCTGACCTTCTGTTCTCCCTAAA
	REF:  TGCTTCTGCGCCGGAGACTTGCCCCTTCCTGACCTTCTGTTCTCCCTAAA

READ ID: 814_2

	SEQUENCE: TCTTGGTAGCGACATGTCTACGGTGATCTATGTGCAGGAGCTTCAGACCC

	----------READ HAS BASE DIFFERENCE----------
	    Mismatch: Replaced 'G' with 'T' at position 33
	    Adjusted read: TCTTGGTAGCGACATGTCTACGGTGATCTATGTTCAGGAGCTTCAGACCC

	Alignment starting at 925 and ending at 975
	READ: TCTTGGTAGCGACAT


	-----READ COULD NOT MAP-----

READ ID: 963_1

	SEQUENCE: ATTGCGCACGTCATAGAGGGGTAGACTACTTATGATTTAGATACATTGTG


	-----READ COULD NOT MAP-----

READ ID: 964_3

	SEQUENCE: GCCTCAAATTGTCTAGTGTTTGTGAGTAGCCTTAATTCCGAGATATGACC


	-----READ COULD NOT MAP-----

READ ID: 965_4

	SEQUENCE: ACTCGAATTTGCACCTCTGACCAGGTTCCATTTAAAGCGATATTAACGCG


	Alignment starting at 305 and ending at 355
	READ: ACTCGAATTTGCACCTCTGACCAGGTTCCATTTAAAGCGATATTAACGCG
	REF:  ACTCGAATTTGCACCTCTGACCAGGTTCCATTTAAAGCGATATTAACGCG

READ ID: 966_3

	SEQUENCE: CTTTTACCACGGAGGGATGATGTGCGAAAGGAAGGGCCCTTCATCTTCTC

	----------READ HAS BASE DIFFERENCE----------
	    Mismatch: Replaced 'A' with 'C' at position 26
	    Adjusted read: CTTTTACCACGGAGGGATGATGTGCGCAAGGAAGGGCCCTTCATCTTCTC

	Alignment starting at 569 and ending at 619
	READ: CTTTTACCACGGAGGGATGATGTGCGAAAGGAAGGGCCCTTCATCTTCTC
	REF:  CTTTTACCACGGAGGGATGATGTGCGCAAGGAAGGGCCCTTCATCTTCTC

READ ID: 967_3

	SEQUENCE: GCTCGTAATTCGTCAATTAGTGCAGCTACGCATATACGCTTCATATCGAT

	----------REA