In [62]:
def _read(filename):
    with open(filename, 'r') as sqinput:
        sqinput = sqinput.read().splitlines()
    return sqinput

def Composition(k, text):
    return [text[i:k+i] for i in range(len(text)-k+1)]

def Overlap(strings):
    final = [strings[0]]
    final += [i[-1] for i in strings[1:]]
    return ''.join(final)

def OverlapGraph(strings):
    k = len(strings[0])
    prefs = [i[:k-1] for i in strings]
    suffs = [i[-(k-1):] for i in strings]

    dic = {}

    for z, suff in enumerate(suffs):
        dic[strings[z]] = []
        for x, pref in enumerate(prefs):
            if pref == suff:
                dic[strings[z]].append(strings[x])
    
    formatted = '\n'.join([f"{key} -> {','.join(values)}" for key, values in dic.items() if dic[key] != []])
    return dic, formatted
                
def DeBruijn(k, text):
    k -= 1
    all_mers = Composition(k-1, text)
    debruijn = {}

    for i in range(len(text)-k+1):
        preffix = text[i:k+i]
        if preffix in debruijn:
            debruijn[preffix] = [debruijn[preffix], text[i+1:i+k+1]]
        else:
            debruijn[preffix] = text[i+1:i+k+1]

    return debruijn

def MerDeBruijn(strings):
    preffs = [i[:-1] for i in strings]
    suffs = [i[1:] for i in strings]

    merdebruijn = {}
    for idx, i in enumerate(preffs):
        if i in merdebruijn:
            merdebruijn[i] = [merdebruijn[i], suffs[idx]]
        else:
            merdebruijn[i] = suffs[idx]
    return dict(sorted(merdebruijn.items()))



In [63]:
print(Composition(5, 'CAATCCAAC'))
print(Overlap(['ACCGA', 'CCGAA', 'CGAAG', 'GAAGC', 'AAGCT']))
directional, p = OverlapGraph(['ATGCG', 'GCATG', 'CATGC', 'AGGCA', 'GGCAT', 'GGCAC'])
print(p)
print(DeBruijn(4, 'AAGATTCTCTAAGA'))
print(MerDeBruijn(['GAGG', 'CAGG', 'GGGG', 'GGGA', 'CAGG', 'AGGG', 'GGAG']))

['CAATC', 'AATCC', 'ATCCA', 'TCCAA', 'CCAAC']
ACCGAAGCT
GCATG -> CATGC
CATGC -> ATGCG
AGGCA -> GGCAT,GGCAC
GGCAT -> GCATG
{'AAG': ['AGA', 'AGA'], 'AGA': ['GAT', 'GA'], 'GAT': 'ATT', 'ATT': 'TTC', 'TTC': 'TCT', 'TCT': ['CTC', 'CTA'], 'CTC': 'TCT', 'CTA': 'TAA', 'TAA': 'AAG'}
{'AGG': 'GGG', 'CAG': ['AGG', 'AGG'], 'GAG': 'AGG', 'GGA': 'GAG', 'GGG': ['GGG', 'GGA']}
