In [15]:
def DeBruijnPairedKmers(paired_kmers):
    graphMap = {}
    for (a, b) in paired_kmers:
        prefix = (a[:-1], b[:-1])
        suffix = (a[1:], b[1:])
        if prefix not in graphMap:
            graphMap[prefix] = [suffix]
        else:
            graphMap[prefix].append(suffix)
    return graphMap

def EulerianCycle(graph, node):
    cycle = [node]
    while graph[node]:
        working = EulerianCycle(graph, graph[node].pop())
        cycle = cycle[:1] + working + cycle[1:]
    return cycle

#Todo: Implement recursive method. 
def EulerianPath(graph):
    diff = {}
    for key, values in graph.items():
        if key not in diff:
            diff[key] = len(values)
        else:
            diff[key] += len(values)
        for value in values:
            if value not in diff:
                diff[value] = -1
            else:
                diff[value] -= 1
    keys = [node for node, dif in diff.items() if dif == -1][0]
    values = [node for node, dif in diff.items() if dif == 1][0]
    if keys in graph:
        graph[keys].append(values)
    else:
        graph[keys] = [values]
    
    initial = list(graph)[0]
    cycle = EulerianCycle(graph,initial )
    index = 0
    while (True):
        if cycle[index] == keys and cycle[index + 1] == values:
            break
        index += 1
    return cycle[index + 1:] + cycle[1:index + 1]

def StringReconstructionFromPairedReads(k, d, paired_reads):
    # Construct the de Bruijn graph from paired k-mers
    dB = DeBruijnPairedKmers(paired_reads)
    
    # Find the Eulerian path in the graph
    path = EulerianPath(dB)
    
    # Convert the path to the genome string using a paired path to genome conversion
    Text = PairedPathToGenome(k, d, path)
    
    return Text

def PairedPathToGenome(k, d, path):
    first_strings, second_strings = zip(*path)

    # Prefix is formed by directly concatenating the first strings.
    prefix_text = first_strings[0] + ''.join([s[-1] for s in first_strings[1:]])

    # Suffix is formed by directly concatenating the second strings. 
    # This is the same logic as the prefix because the k-mers already represent the overlap.
    suffix_text = second_strings[0] + ''.join([s[-1] for s in second_strings[1:]])
    
    # We need to merge the prefix and the suffix.
    # We need to skip the first k+d characters from the suffix as they're already covered in the prefix.
    merged_text = prefix_text + suffix_text[k+d:]

    return merged_text


In [16]:
# Example usage
k, d = 4, 2
paired_reads = [("GAGA", "TTGA"),
                ("TCGT", "GATG"),
                ("CGTG", "ATGT"),
                ("TGGT", "TGAG"),
                ("GTGA", "TGTT"),
                ("GTGG", "GTGA"),
                ("TGAG", "GTTG"),
                ("GGTC", "GAGA"),
                ("GTCG", "AGAT")]

print(StringReconstructionFromPairedReads(k, d, paired_reads))

GTGGTCGTGAGATGTTGA
