# How to find overlaps between pair of reads

### Finding overlaps

In [6]:
def overlap(a, b, min_length=4):
    """ Return length of longest suffix of 'a' matching
        a prefix of 'b' that is at least 'min_length'
        characters long.  If no such overlap exists,
        """
    start = 0  # start all the way at the left return 0
    while True:
        start = a.find(b[:min_length], start)
    
        if start == -1:
            return 0
        if b.startswith(a[start:]):
            return len(a)-start
        start += 1  

In [7]:
overlap('ACTGTCCTCCGTCTACGTA', 'CGTACCGT')


4

In [8]:
from itertools import permutations

# it will print out ((1,2,3), (1,3,2), (2, 1, 3), (2, 3, 1), (3, 1, 2), (3, 2, 1))
list(permutations((1,2,3), 3))

[(1, 2, 3), (1, 3, 2), (2, 1, 3), (2, 3, 1), (3, 1, 2), (3, 2, 1)]

In [9]:
def naive_overlap_map(reads, k):
    olaps = {}
    for a, b in permutations(reads, 2):
        olen = overlap(a, b, min_length = k)
        if olen > 0:
            olaps[(a,b)] = olen
    return olaps

In [10]:
reads = ['ACGGATGATC', 'GATCAAGT', 'TTCACGGA']
print(naive_overlap_map(reads, 3))

{('ACGGATGATC', 'GATCAAGT'): 4, ('TTCACGGA', 'ACGGATGATC'): 5}
