In [1]:
import bioinfo3
import copy
import numpy as np

In [2]:
def chromosome_to_cycle(chromosome):
    nodes = []
    for j in range(0,len(chromosome)):
        i = chromosome[j]
        if i > 0:
            nodes.append(2*i - 1)
            nodes.append(2*i)
        else:
            nodes.append(-2*i)
            nodes.append(-2*i - 1)
    return nodes

In [3]:
P ="""-1 -2 -3 +4 +5 -6 +7 -8 +9 +10 +11 +12 +13 -14 +15 +16 -17 -18 +19 +20 +21 +22 +23 +24 +25 -26 +27 +28 +29 +30 -31 -32 +33 +34 +35 +36 +37 +38 -39 -40 +41 -42 -43 +44 -45 -46 -47 +48 -49 +50 -51 +52 +53 +54 +55 -56 +57 +58 +59 -60 +61 -62
"""

In [4]:
P = bioinfo3.parse_permutation(P)
P

[-1,
 -2,
 -3,
 4,
 5,
 -6,
 7,
 -8,
 9,
 10,
 11,
 12,
 13,
 -14,
 15,
 16,
 -17,
 -18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 -26,
 27,
 28,
 29,
 30,
 -31,
 -32,
 33,
 34,
 35,
 36,
 37,
 38,
 -39,
 -40,
 41,
 -42,
 -43,
 44,
 -45,
 -46,
 -47,
 48,
 -49,
 50,
 -51,
 52,
 53,
 54,
 55,
 -56,
 57,
 58,
 59,
 -60,
 61,
 -62]

In [5]:
nodes = chromosome_to_cycle(P)

In [6]:
string = " ".join(str(node) for node in nodes)
print("(", end="")
print(string, end=")")

(2 1 4 3 6 5 7 8 9 10 12 11 13 14 16 15 17 18 19 20 21 22 23 24 25 26 28 27 29 30 31 32 34 33 36 35 37 38 39 40 41 42 43 44 45 46 47 48 49 50 52 51 53 54 55 56 57 58 59 60 62 61 64 63 65 66 67 68 69 70 71 72 73 74 75 76 78 77 80 79 81 82 84 83 86 85 87 88 90 89 92 91 94 93 95 96 98 97 99 100 102 101 103 104 105 106 107 108 109 110 112 111 113 114 115 116 117 118 120 119 121 122 124 123)

In [7]:
def cycle_to_chromosome(nodes):
    chromosome = []
    for j in range(0, int(len(nodes)/2)):
        if nodes[2*j] < nodes[2*j+1]:
            chromosome.append(int(nodes[2*j+1]/2))
        else:
            chromosome.append(int(-nodes[2*j]/2))
    return chromosome

In [8]:
chromosome = cycle_to_chromosome(nodes)

In [9]:
bioinfo3.print_P(chromosome)

-1 -2 -3 +4 +5 -6 +7 -8 +9 +10 +11 +12 +13 -14 +15 +16 -17 -18 +19 +20 +21 +22 +23 +24 +25 -26 +27 +28 +29 +30 -31 -32 +33 +34 +35 +36 +37 +38 -39 -40 +41 -42 -43 +44 -45 -46 -47 +48 -49 +50 -51 +52 +53 +54 +55 -56 +57 +58 +59 -60 +61 -62


In [10]:
def parse_permutation(P):
    #convert permutation from string to int
    P = P.strip()
    if ")(" in P:
        P = P[1:-1].split(")(")
        P_int = []
        for chromosome in P:
            chromosome = chromosome.split(" ")
            chromosome_int = [int(block) for block in chromosome]
            P_int.append(chromosome_int)
        return P_int
    elif ")" in P:
        P = P[1:-1].split(" ")
        P_int = [int(pk) for pk in P]
        return [P_int]
    else:
        P = P.split(" ")
        P_int = [int(pk) for pk in P]
        return [P_int]
        

In [11]:
nodes = """(1 2 4 3 6 5 7 8)"""
nodes = parse_permutation(nodes)
nodes

[[1, 2, 4, 3, 6, 5, 7, 8]]

In [12]:
bioinfo3.print_P(cycle_to_chromosome(nodes))




In [13]:
def color_edges(P):
    edges = []
    for chromosome in P:
        nodes = chromosome_to_cycle(chromosome)
        for j in range(len(chromosome)):
            if j != len(chromosome)-1:
                edges.append((nodes[2*j+1], nodes[2*j+2]))
            else:
                edges.append((nodes[2*j+1], nodes[0]))
    return edges

In [14]:
P = """(+1 -2 +3 -4 -5 -6 -7 +8 -9 +10 +11 -12 +13 +14 -15 +16 -17 -18 -19 +20 -21 -22 +23 +24 +25 +26 -27 -28 +29 +30)(+31 -32 -33 -34 +35 -36 -37 -38 -39 +40 +41 +42 -43 +44 -45 +46 +47 +48 -49 -50 -51 -52 -53 +54)(-55 -56 +57 -58 +59 +60 +61 +62 -63 -64 +65 -66 -67 +68 +69 -70 +71 -72 -73 +74 -75)(+76 -77 -78 -79 +80 +81 -82 +83 -84 +85 -86 +87 +88 +89 -90 -91 +92 -93 -94 -95 -96 -97 -98 +99 +100 -101)(+102 -103 +104 +105 +106 +107 +108 -109 -110 -111 +112 -113 -114 +115 -116 -117 +118 -119 -120 -121 -122 -123)(+124 -125 +126 +127 +128 +129 -130 +131 -132 -133 -134 +135 +136 +137 +138 -139 -140 +141 -142 -143 -144 -145 -146 -147 +148 -149 -150 -151 +152 -153 -154)(-155 +156 +157 +158 +159 +160 -161 +162 +163 +164 +165 +166 +167 -168 -169 -170 +171 +172 -173 +174 -175 +176 +177 +178 -179 +180 +181 +182 -183 +184 -185)(-186 -187 +188 -189 -190 +191 +192 -193 +194 -195 +196 +197 -198 -199 -200 +201 +202 +203 +204 +205 -206)(-207 -208 -209 +210 -211 -212 -213 -214 +215 +216 -217 -218 -219 -220 -221 -222 +223 +224 -225 +226 +227 +228 +229 -230 -231 -232)

"""
P = parse_permutation(P)
P

[[1,
  -2,
  3,
  -4,
  -5,
  -6,
  -7,
  8,
  -9,
  10,
  11,
  -12,
  13,
  14,
  -15,
  16,
  -17,
  -18,
  -19,
  20,
  -21,
  -22,
  23,
  24,
  25,
  26,
  -27,
  -28,
  29,
  30],
 [31,
  -32,
  -33,
  -34,
  35,
  -36,
  -37,
  -38,
  -39,
  40,
  41,
  42,
  -43,
  44,
  -45,
  46,
  47,
  48,
  -49,
  -50,
  -51,
  -52,
  -53,
  54],
 [-55,
  -56,
  57,
  -58,
  59,
  60,
  61,
  62,
  -63,
  -64,
  65,
  -66,
  -67,
  68,
  69,
  -70,
  71,
  -72,
  -73,
  74,
  -75],
 [76,
  -77,
  -78,
  -79,
  80,
  81,
  -82,
  83,
  -84,
  85,
  -86,
  87,
  88,
  89,
  -90,
  -91,
  92,
  -93,
  -94,
  -95,
  -96,
  -97,
  -98,
  99,
  100,
  -101],
 [102,
  -103,
  104,
  105,
  106,
  107,
  108,
  -109,
  -110,
  -111,
  112,
  -113,
  -114,
  115,
  -116,
  -117,
  118,
  -119,
  -120,
  -121,
  -122,
  -123],
 [124,
  -125,
  126,
  127,
  128,
  129,
  -130,
  131,
  -132,
  -133,
  -134,
  135,
  136,
  137,
  138,
  -139,
  -140,
  141,
  -142,
  -143,
  -144,
  -145,
  -146,
 

In [15]:
color_edges(P)

[(2, 4),
 (3, 5),
 (6, 8),
 (7, 10),
 (9, 12),
 (11, 14),
 (13, 15),
 (16, 18),
 (17, 19),
 (20, 21),
 (22, 24),
 (23, 25),
 (26, 27),
 (28, 30),
 (29, 31),
 (32, 34),
 (33, 36),
 (35, 38),
 (37, 39),
 (40, 42),
 (41, 44),
 (43, 45),
 (46, 47),
 (48, 49),
 (50, 51),
 (52, 54),
 (53, 56),
 (55, 57),
 (58, 59),
 (60, 1),
 (62, 64),
 (63, 66),
 (65, 68),
 (67, 69),
 (70, 72),
 (71, 74),
 (73, 76),
 (75, 78),
 (77, 79),
 (80, 81),
 (82, 83),
 (84, 86),
 (85, 87),
 (88, 90),
 (89, 91),
 (92, 93),
 (94, 95),
 (96, 98),
 (97, 100),
 (99, 102),
 (101, 104),
 (103, 106),
 (105, 107),
 (108, 61),
 (109, 112),
 (111, 113),
 (114, 116),
 (115, 117),
 (118, 119),
 (120, 121),
 (122, 123),
 (124, 126),
 (125, 128),
 (127, 129),
 (130, 132),
 (131, 134),
 (133, 135),
 (136, 137),
 (138, 140),
 (139, 141),
 (142, 144),
 (143, 146),
 (145, 147),
 (148, 150),
 (149, 110),
 (152, 154),
 (153, 156),
 (155, 158),
 (157, 159),
 (160, 161),
 (162, 164),
 (163, 165),
 (166, 168),
 (167, 169),
 (170, 172),
 (1

In [16]:
print(", ".join(str(edge) for edge in color_edges(P)))

(2, 4), (3, 5), (6, 8), (7, 10), (9, 12), (11, 14), (13, 15), (16, 18), (17, 19), (20, 21), (22, 24), (23, 25), (26, 27), (28, 30), (29, 31), (32, 34), (33, 36), (35, 38), (37, 39), (40, 42), (41, 44), (43, 45), (46, 47), (48, 49), (50, 51), (52, 54), (53, 56), (55, 57), (58, 59), (60, 1), (62, 64), (63, 66), (65, 68), (67, 69), (70, 72), (71, 74), (73, 76), (75, 78), (77, 79), (80, 81), (82, 83), (84, 86), (85, 87), (88, 90), (89, 91), (92, 93), (94, 95), (96, 98), (97, 100), (99, 102), (101, 104), (103, 106), (105, 107), (108, 61), (109, 112), (111, 113), (114, 116), (115, 117), (118, 119), (120, 121), (122, 123), (124, 126), (125, 128), (127, 129), (130, 132), (131, 134), (133, 135), (136, 137), (138, 140), (139, 141), (142, 144), (143, 146), (145, 147), (148, 150), (149, 110), (152, 154), (153, 156), (155, 158), (157, 159), (160, 161), (162, 164), (163, 165), (166, 168), (167, 169), (170, 172), (171, 173), (174, 175), (176, 177), (178, 180), (179, 182), (181, 183), (184, 186), (185

In [17]:
def to_tuple(string):
    string = string.split(", ")
    return int(string[0]), int(string[1])

def parse_edges(edges):
    edges = edges[1:-1].split("), (")
    edges_tuple = [to_tuple(i)for i in edges]
    return edges_tuple

def find_cycle_from_gg(gg):
    cycle = []
    result = []
    for edge in gg:
        if edge[0] < edge[1]:
            cycle.append(edge[0])
            cycle.append(edge[1])
        else:
            cycle.append(edge[0])
            cycle.append(edge[1])
            result.append(cycle)
            cycle = []
    return result

def graph_to_genome(gg):
    #genome graph is the same as color edges 
    #genome is the same as permutation
    P = []
    if type(gg) == str:
        gg = parse_edges(gg)
    gg = find_cycle_from_gg(gg)
    
    for cycle in gg:
        nodes = [cycle[-1]] + cycle[:-1]
        chromosome = cycle_to_chromosome(nodes)
        P.append(chromosome)
    return P

In [18]:
def graph_to_genome(gg):
    #genome graph is the same as color edges 
    #genome is the same as permutation
    P = []
    if type(gg) == str:
        gg = parse_edges(gg)
    gg = find_cycle_from_gg(gg)
    
    for cycle in gg:
        nodes = [cycle[-1]] + cycle[:-1]
        chromosome = cycle_to_chromosome(nodes)
        P.append(chromosome)
    return P

In [19]:
gg = "(2, 4), (3, 6), (5, 1), (7, 9), (10, 12), (11, 8)"
gg = parse_edges(gg)
gg

[(2, 4), (3, 6), (5, 1), (7, 9), (10, 12), (11, 8)]

In [20]:
graph_to_genome(gg)

[[1, -2, -3], [-4, 5, -6]]

In [21]:
def print_genome(genome):
    #Input has format of [[1, -2, -3], [-4, 5, -6]]
    for gene in genome:
        print("(",end="")
        string = " ".join(("+" if i > 0 else "") + str(i) for i in gene )
        print(string,end=")")
    return None

In [22]:
gg = """(2, 3), (4, 5), (6, 7), (8, 9), (10, 12), (11, 13), (14, 16), (15, 18), (17, 19), (20, 22), (21, 23), (24, 26), (25, 28), (27, 30), (29, 31), (32, 34), (33, 35), (36, 38), (37, 39), (40, 41), (42, 44), (43, 46), (45, 47), (48, 49), (50, 52), (51, 54), (53, 1), (55, 58), (57, 59), (60, 62), (61, 63), (64, 65), (66, 68), (67, 70), (69, 71), (72, 73), (74, 75), (76, 78), (77, 80), (79, 81), (82, 83), (84, 86), (85, 87), (88, 89), (90, 92), (91, 93), (94, 96), (95, 98), (97, 100), (99, 101), (102, 103), (104, 106), (105, 108), (107, 110), (109, 111), (112, 56), (114, 116), (115, 118), (117, 119), (120, 121), (122, 123), (124, 125), (126, 127), (128, 130), (129, 132), (131, 133), (134, 136), (135, 137), (138, 140), (139, 141), (142, 143), (144, 146), (145, 147), (148, 150), (149, 151), (152, 154), (153, 156), (155, 158), (157, 160), (159, 162), (161, 163), (164, 166), (165, 113), (168, 170), (169, 171), (172, 174), (173, 176), (175, 178), (177, 179), (180, 182), (181, 184), (183, 186), (185, 188), (187, 190), (189, 192), (191, 193), (194, 195), (196, 197), (198, 199), (200, 202), (201, 203), (204, 205), (206, 208), (207, 209), (210, 211), (212, 214), (213, 215), (216, 217), (218, 220), (219, 167), (222, 223), (224, 226), (225, 228), (227, 230), (229, 232), (231, 234), (233, 235), (236, 238), (237, 239), (240, 242), (241, 244), (243, 245), (246, 247), (248, 250), (249, 252), (251, 254), (253, 256), (255, 257), (258, 259), (260, 262), (261, 263), (264, 266), (265, 267), (268, 270), (269, 272), (271, 274), (273, 275), (276, 221), (278, 279), (280, 282), (281, 284), (283, 285), (286, 287), (288, 290), (289, 291), (292, 294), (293, 295), (296, 297), (298, 299), (300, 302), (301, 304), (303, 306), (305, 308), (307, 309), (310, 311), (312, 314), (313, 315), (316, 318), (317, 320), (319, 321), (322, 324), (323, 326), (325, 277), (327, 329), (330, 332), (331, 334), (333, 336), (335, 338), (337, 340), (339, 341), (342, 343), (344, 345), (346, 348), (347, 349), (350, 351), (352, 353), (354, 356), (355, 357), (358, 360), (359, 362), (361, 363), (364, 365), (366, 368), (367, 370), (369, 371), (372, 373), (374, 376), (375, 328)"""
gg = parse_edges(gg)
genome = graph_to_genome(gg)
genome

[[1,
  2,
  3,
  4,
  5,
  -6,
  7,
  -8,
  -9,
  10,
  -11,
  12,
  -13,
  -14,
  -15,
  16,
  -17,
  18,
  -19,
  20,
  21,
  -22,
  -23,
  24,
  25,
  -26,
  -27],
 [-28,
  -29,
  30,
  -31,
  32,
  33,
  -34,
  -35,
  36,
  37,
  38,
  -39,
  -40,
  41,
  42,
  -43,
  44,
  45,
  -46,
  47,
  -48,
  -49,
  -50,
  51,
  52,
  -53,
  -54,
  -55,
  56],
 [57,
  -58,
  -59,
  60,
  61,
  62,
  63,
  64,
  -65,
  -66,
  67,
  -68,
  69,
  -70,
  71,
  72,
  -73,
  74,
  -75,
  76,
  -77,
  -78,
  -79,
  -80,
  -81,
  82,
  -83],
 [84,
  -85,
  86,
  -87,
  -88,
  -89,
  90,
  -91,
  -92,
  -93,
  -94,
  -95,
  -96,
  97,
  98,
  99,
  100,
  -101,
  102,
  103,
  -104,
  105,
  106,
  -107,
  108,
  109,
  -110],
 [111,
  112,
  -113,
  -114,
  -115,
  -116,
  -117,
  118,
  -119,
  120,
  -121,
  -122,
  123,
  124,
  -125,
  -126,
  -127,
  -128,
  129,
  130,
  -131,
  132,
  -133,
  134,
  -135,
  -136,
  -137,
  138],
 [139,
  140,
  -141,
  -142,
  143,
  144,
  -145,
  146,
  -14

In [23]:
print_genome(genome)

(+1 +2 +3 +4 +5 -6 +7 -8 -9 +10 -11 +12 -13 -14 -15 +16 -17 +18 -19 +20 +21 -22 -23 +24 +25 -26 -27)(-28 -29 +30 -31 +32 +33 -34 -35 +36 +37 +38 -39 -40 +41 +42 -43 +44 +45 -46 +47 -48 -49 -50 +51 +52 -53 -54 -55 +56)(+57 -58 -59 +60 +61 +62 +63 +64 -65 -66 +67 -68 +69 -70 +71 +72 -73 +74 -75 +76 -77 -78 -79 -80 -81 +82 -83)(+84 -85 +86 -87 -88 -89 +90 -91 -92 -93 -94 -95 -96 +97 +98 +99 +100 -101 +102 +103 -104 +105 +106 -107 +108 +109 -110)(+111 +112 -113 -114 -115 -116 -117 +118 -119 +120 -121 -122 +123 +124 -125 -126 -127 -128 +129 +130 -131 +132 -133 +134 -135 -136 -137 +138)(+139 +140 -141 -142 +143 +144 -145 +146 -147 +148 +149 +150 -151 -152 -153 -154 +155 +156 -157 +158 -159 -160 +161 -162 -163)(-164 +165 -166 -167 -168 -169 -170 +171 +172 +173 -174 +175 +176 +177 -178 +179 -180 -181 +182 +183 -184 -185 +186 +187 -188)

In [24]:
P = """(+1 +2 +3 +4 +5 +6)
"""
P = parse_permutation(P)
P

[[1, 2, 3, 4, 5, 6]]

In [96]:
P_ce = color_edges(P)
P_ce

[(2, 3), (4, 5), (6, 7), (8, 9), (10, 11), (12, 1)]

In [26]:
Q = """(+1 -3 -6 -5)(+2 -4)
"""
Q = parse_permutation(Q)
Q

[[1, -3, -6, -5], [2, -4]]

In [97]:
Q_ce = color_edges(Q)
Q_ce

[(2, 6), (5, 12), (11, 10), (9, 1), (4, 8), (7, 3)]

In [50]:
P_ce_copy = np.array(copy.deepcopy(P_ce))
Q_ce_copy = np.array(copy.deepcopy(Q_ce))

In [79]:
P_ce

[(2, 3), (4, 5), (6, 7), (8, 9), (10, 11), (12, 1)]

In [87]:
[] == []

True

In [104]:
P_ce = color_edges(P)
Q_ce = color_edges(Q)

In [107]:
len(cycles)

3

In [38]:
z = np.array(Q_ce_copy) == 3

In [46]:
z.any(axis = 1)

array([False, False, False, False, False,  True])

In [47]:
k = np.array(Q_ce_copy)

In [54]:
edge = k[z.any(axis = 1)][0]

In [58]:
[i if i != 3 else i+1 for i in edge]

[7, 4]

In [64]:
end_node = int(edge[edge != end_node])
lol

7

In [66]:
list(edge)

[7, 3]

In [142]:
def find_common_cyle(P_ce, Q_ce):
    P_ce = copy.deepcopy(P_ce)
    Q_ce = copy.deepcopy(Q_ce)
    P_ce_copy = np.array(copy.deepcopy(P_ce))
    Q_ce_copy = np.array(copy.deepcopy(Q_ce))
    cycles = []
    count_out = 0
    while P_ce != [] and Q_ce !=[]:
        if count_out > 1000 :
            print("TIME OUT CYCLE")
            break


        start_edge = P_ce[0]
        start_node = start_edge[0]
        end_node = start_edge[1]
        P_ce.remove(start_edge)
        cycle = [tuple(start_edge)]
        pointer = "P"


        count = 0
        while end_node != start_node and (P_ce != [] or Q_ce !=[]):
            if count > 100000 :
                print("TIME OUT NODES")
                break
            if pointer == "P":
                condition = (Q_ce_copy==end_node).any(axis=1)
                edge = Q_ce_copy[condition][0]
                cycle.append(tuple(edge))
                end_node = int(edge[edge != end_node])
                pointer = "Q"
                Q_ce.remove(tuple(edge))
            else:
                condition = (P_ce_copy==end_node).any(axis=1)
                edge = P_ce_copy[condition][0]
                cycle.append(tuple(edge))
                end_node = int(edge[edge != end_node])
                pointer = "P"
                P_ce.remove(tuple(edge))
            count +=1

        cycles.append(cycle)

        count_out += 1
    return cycles

In [151]:
P = """(+1 +2 +3 +4 +5 +6)
"""
P = parse_permutation(P)
Q = """(+1 -3 -6 -5)(+2 -4)
 """
Q = parse_permutation(Q)

In [140]:
P_ce = color_edges(P)
Q_ce = color_edges(Q)

In [143]:
cycles = find_common_cyle(P_ce, Q_ce)

In [144]:
cycles

[[(5865, 11280),
  (11280, 11952),
  (11952, 14529),
  (9691, 14529),
  (9691, 7210),
  (7210, 15540),
  (15540, 14064),
  (16070, 14064),
  (16070, 9900),
  (8881, 9900),
  (8881, 6266),
  (12348, 6266),
  (12348, 4445),
  (5589, 4445),
  (8313, 5589),
  (15353, 8313),
  (15353, 9330),
  (3361, 9330),
  (1506, 3361),
  (1506, 5684),
  (1171, 5684),
  (1171, 15980),
  (5414, 15980),
  (4333, 5414),
  (13884, 4333),
  (13884, 2126),
  (10296, 2126),
  (9180, 10296),
  (15857, 9180),
  (9982, 15857),
  (9982, 11790),
  (12662, 11790),
  (12662, 416),
  (11871, 416),
  (11871, 14155),
  (5722, 14155),
  (14126, 5722),
  (14126, 11537),
  (14627, 11537),
  (12244, 14627),
  (12244, 7464),
  (752, 7464),
  (752, 9089),
  (12171, 9089),
  (12897, 12171),
  (4998, 12897),
  (4998, 14717),
  (14717, 14875),
  (2517, 14875),
  (2517, 13776),
  (15672, 13776),
  (6441, 15672),
  (6441, 6613),
  (2139, 6613),
  (10154, 2139),
  (8901, 10154),
  (8901, 9856),
  (12678, 9856),
  (12678, 7688),
  (7

In [145]:
len(cycles)

9

In [146]:
blocks = 0
for i in Q:
    blocks += len(i)
blocks

8152

In [147]:
blocks - len(cycles)

8143

In [149]:
def distance_2breaks(P,Q):
    P_ce = color_edges(P)
    Q_ce = color_edges(Q)
    cycles = find_common_cyle(P_ce, Q_ce)
    blocks = 0
    for i in Q:
        blocks += len(i)
    distance_2breaks = blocks - len(cycles)
    return distance_2breaks

In [153]:
Q

[[1, -3, -6, -5], [2, -4]]