In [92]:
import re
import math
import copy

In [159]:
def parse_chromosome(chr_str):
    return [int(x) for x in chr_str.split(" ")]

def parse_genome(genome_str):
    chromosomes = []
    for chr in re.findall("\(([0-9+\- ]+)\)", genome_str):
        chromosomes.append(parse_chromosome(chr))
    return chromosomes

def genome2graph(genome):
    colored_edges = dict()
    for chromosome in genome:
        edges = []
        for block in chromosome:
            if block>0:
                edges.append((2*abs(block)-1, 2*abs(block)))
            else:
                edges.append((2*abs(block), 2*abs(block)-1))

  
        for e1, e2 in zip(edges[:-1], edges[1:]):
            colored_edges[e1[1]] = e2[0]
        colored_edges[edges[-1][1]] = edges[0][0]

    return colored_edges


def two_break(q_edge, p_graph):
        p_edges = []
        for p_edge in p_graph.items():
            if p_edge[0] in q_edge or p_edge[1] in q_edge:
                p_edges.append(p_edge)
        if len(p_edges) == 2:
            first_edge = p_edges[0]
            second_edge = p_edges[1]
            
            p_graph.pop(first_edge[0], None)
            p_graph.pop(second_edge[0], None)
            
            if first_edge[0] == q_edge[0]:
                tpl1 = (first_edge[0], q_edge[1])
                tpl2 = (first_edge[1], second_edge[1]) if second_edge[0] == q_edge[1] else (second_edge[0],first_edge[1])
            elif first_edge[0] == q_edge[1]:
                tpl1 = (first_edge[0], q_edge[0])
                tpl2 = (first_edge[1], second_edge[1]) if second_edge[0] == q_edge[0] else (second_edge[0],first_edge[1])
            else:
                if first_edge[1] == q_edge[0]:
                    tpl1 = (q_edge[1], first_edge[1])
                    tpl2 = (first_edge[0], second_edge[1]) if second_edge[0] == q_edge[1] else (second_edge[0],first_edge[0])
                elif first_edge[1] == q_edge[1]:
                    tpl1 = (q_edge[0], first_edge[1]) 
                    tpl2 = (first_edge[0], second_edge[1]) if second_edge[0] == q_edge[0] else (second_edge[0],first_edge[0])
            
            p_graph[tpl1[0]] = tpl1[1]
            p_graph[tpl2[0]] = tpl2[1]
            return False
        else:
            print(f"trivial cycle: {len(p_edges)}")
            return True
            
def two_break_on_graph(p_, q_):
    p = copy.deepcopy(p_)
    q = copy.deepcopy(q_)
    path = [graph2genome(p)]
    while q:
        q_edge = q.popitem()
        trivial = two_break(q_edge, p)
        if not trivial:
            path.append(graph2genome(p))
    return path
        
def graph2genome(edges_):
    edges = copy.deepcopy(edges_)
    reverse_edges = {v: k for k, v in edges.items()}
    genome = []
    while edges:
        first_edge = edges.popitem()
        next_node = first_edge[1]
        chromosome = []
        while next_node:
            if next_node % 2 == 1:
                next_node += 1
                chromosome.append(next_node/2)
            else:
                next_node -= 1
                chromosome.append(-(next_node+1)/2)
            nn = edges.pop(next_node, None)
            if nn is None:
                nn = reverse_edges.pop(next_node, None) 
                edges.pop(nn,None)
            next_node = nn
        genome.append(chromosome)
    return genome      

def print_genome(genome):
    genome_str = ""
    for chr in genome:
        s = " ".join([f"+{int(x)}" if x>0 else f"{int(x)}" for x in chr])
        genome_str += f"({s})"
    return genome_str

def print_path(path):
    for genome in path:
        print(print_genome(genome))
        
def shortest_path(pstr, qstr):
    p = parse_genome(pstr)
    q = parse_genome(qstr)
    p_graph = genome2graph(p)
    q_graph = genome2graph(q)
    return two_break_on_graph(p_graph, q_graph)

In [142]:
pstr = "(+1 +2 +3 +4 +5 +6)"
qstr = "(+1 -3 -6 -5)(+2 -4)"

In [143]:
p = parse_genome(pstr)
q = parse_genome(qstr)
print(p, q)

[[1, 2, 3, 4, 5, 6]] [[1, -3, -6, -5], [2, -4]]


In [144]:
p_graph = genome2graph(p)
q_graph = genome2graph(q)
print(p_graph, q_graph)

{2: 3, 4: 5, 6: 7, 8: 9, 10: 11, 12: 1} {2: 6, 5: 12, 11: 10, 9: 1, 4: 8, 7: 3}


In [145]:
print_path(two_break_on_graph(p_graph, q_graph))

trivial cycle: 1
trivial cycle: 1
trivial cycle: 1
(+1.0 +2.0 +3.0 +4.0 +5.0 +6.0)
(-1.0 -6.0 -5.0 -4.0 +2.0 +3.0)
(+5.0 +6.0 +1.0 -3.0)(-4.0 +2.0)
(-6.0 -5.0 +1.0 -3.0)(-4.0 +2.0)


In [141]:
pstr = "(+1 -2 -3 +4)"
qstr = "(+1 +2 -4 -3)"
p = genome2graph(parse_genome(pstr))
q = genome2graph(parse_genome(qstr))

print_path(two_break_on_graph(p, q))

trivial cycle: 1
(+1.0 -2.0 -3.0 +4.0)
(+4.0)(+1.0 -2.0 -3.0)
(+2.0 -1.0 +3.0 +4.0)
(+2.0 -4.0 -3.0 +1.0)


In [146]:
with open("../data/dataset_288_4.txt", "r") as fin:
    genomes = []
    for line in fin:
        genomes.append(line.strip())
    p = parse_genome(genomes[0])
    q = parse_genome(genomes[1])
    p_graph = genome2graph(p)
    q_graph = genome2graph(q)
    print(len(two_break_on_graph(p_graph, q_graph))-1)

trivial cycle: 1
trivial cycle: 1
trivial cycle: 1
8432


In [160]:
pstr = "(+1 +8 -12 +6 +10 +11 +3 +5 +2 -9 -7 -4)"
qstr = "(+7 -8 +12 +3 +11 -9 +10 +1 -4 -6 -2 -5)"

with open("../data/dataset_288_5.txt", "r") as fin:
    genomes = []
    for line in fin:
        genomes.append(line.strip())
    p = parse_genome(genomes[0])
    q = parse_genome(genomes[1])
    p_graph = genome2graph(p)
    q_graph = genome2graph(q)
    print_path(two_break_on_graph(p_graph, q_graph))
    #for i in two_break_on_graph(p_graph, q_graph):
    #    print(i)

trivial cycle: 1
trivial cycle: 1
(+10 +3 +1 -5 +9 -11 +12 +13 +6 -7 -4 -8 -2)
(-7)(-4 -8 -2 +10 +3 +1 -5 +9 -11 +12 +13 +6)
(-13 -12 +11 -9 +5 -1 -3 -10 +2 +8 +4 -6 -7)
(-13 -12 +7 +6 -4 -8 -2 +10 +3 +1 -5 +9 -11)
(+13 +11 -9 +5 -1 -3 -10 -12 +7 +6 -4 -8 -2)
(+3 +1 -5 +9 -11 -13 -10 -12 +7 +6 -4 -8 -2)
(-5 +9 -11)(-13 -10 -12 +7 +6 -4 -8 -2 +3 +1)
(+9 -11 -5 +1 -13 -10 -12 +7 +6 -4 -8 -2 +3)
(+11 -9 -5 +1 -13 -10 -12 +7 +6 -4 -8 -2 +3)
(-11 -3 +2 -9 -5 +1 -13 -10 -12 +7 +6 -4 -8)
(-11 -3)(+8 +4 -6 -7 +12 +10 +13 -1 +5 +9 -2)
(+4 -6 -7 +12 +10 +13 -1 +5 +9 -2 +8 -11 -3)


In [156]:
pstr="(+9 -8 +12 +7 +1 -14 +13 +3 -5 -11 +6 -2 +10 -4)"
qstr="(-11 +8 -10 -2 +3 +4 +13 +6 +12 +9 +5 +7 -14 -1)"
print_path(shortest_path(pstr,qstr))

trivial cycle: 1
trivial cycle: 1
trivial cycle: 1
(+9.0 -8.0 +12.0 +7.0 +1.0 -14.0 +13.0 +3.0 -5.0 -11.0 +6.0 -2.0 +10.0 -4.0)
(-7.0 -12.0 +8.0 -9.0 +4.0 -10.0 +2.0 -6.0 +11.0 +1.0 -14.0 +13.0 +3.0 -5.0)
(+13.0 +3.0 -5.0 -7.0 -12.0 +8.0 -9.0 +4.0 -10.0 +2.0 -6.0 +11.0 +1.0 +14.0)
(+13.0 +3.0 -5.0)(-7.0 -12.0 +8.0 -9.0 +4.0 -10.0 +2.0 -6.0 +11.0 +1.0 +14.0)
(-12.0 +8.0 -9.0 +4.0 -10.0 +2.0 -6.0 +11.0 +1.0 +14.0 -7.0 -5.0 +13.0 +3.0)
(+13.0 +3.0 -12.0 +8.0)(+5.0 +7.0 -14.0 -1.0 -11.0 +6.0 -2.0 +10.0 -4.0 +9.0)
(+4.0 -10.0 +2.0 -6.0 +11.0 +1.0 +14.0 -7.0 -5.0 -9.0 -12.0 +8.0 +13.0 +3.0)
(-2.0 +10.0 -4.0 -3.0 -13.0 -8.0)(+12.0 +9.0 +5.0 +7.0 -14.0 -1.0 -11.0 +6.0)
(+3.0 +4.0 -10.0 +2.0 +8.0 +13.0 +6.0 +12.0 +9.0 +5.0 +7.0 -14.0 -1.0 -11.0)
(-10.0 +2.0 +8.0)(-4.0 -3.0 +11.0 +1.0 +14.0 -7.0 -5.0 -9.0 -12.0 -6.0 -13.0)
(+10.0 -8.0 -2.0 +3.0 +4.0 +13.0 +6.0 +12.0 +9.0 +5.0 +7.0 -14.0 -1.0 -11.0)
(+8.0 -10.0 -2.0 +3.0 +4.0 +13.0 +6.0 +12.0 +9.0 +5.0 +7.0 -14.0 -1.0 -11.0)


In [None]:
(+9 -8 +12 +7 +1 -14 +13 +3 -5 -11 +6 -2 +10 -4)
(+9 -8 +12 -3 -13 +14 -1 -7 -5 -11 +6 -2 +10 -4)
(+9 -8 +12)(+7 +1 -14 +13 +3 +4 -10 +2 -6 +11 +5)
(+9 -8 +12)(+1 -14 +13 +3 +4 -10 +2 -6 +11)(+5 +7)
(+9 -8 +12)(+13 +3 +4 -10 +2 -6 +11 +1 +14)(+5 +7)
(+9 -8 +12)(+13 +3 +4 -10 +2 -6 +11 +1 +14 -7 -5)
(-8 +12 +9 +5 +7 -14 -1 -11 +6 -2 +10 -4 -3 -13)
(-8 +12 +9 +5 +7 -14 -1 -11 +6 -2 +10)(+13 +3 +4)
(+13 +3 +4)(-11 +6 +12 +9 +5 +7 -14 -1)(-2 +10 -8)
(-2 +10 -8)(+5 +7 -14 -1 -11 +3 +4 +13 +6 +12 +9)
(+5 +7 -14 -1 -11 +10 -8 -2 +3 +4 +13 +6 +12 +9)
(+5 +7 -14 -1 -11 +8 -10 -2 +3 +4 +13 +6 +12 +9)