# Imports

In [1]:
import numpy as np
from ete3 import Tree



# Variables

In [2]:
step_matrix = np.array([[0, 3, 4, 9],
                       [3, 0, 2, 4],
                       [4, 2, 0, 4],
                       [9, 4, 4, 0]])

n_species = step_matrix.shape[0]

# Ordre : A C G T
dic1 = {
          'Probopass':np.array([0,np.inf,np.inf,np.inf]),
          'Aggron':np.array([np.inf,np.inf,np.inf,0]),
          'Bastiodon':np.array([np.inf,np.inf,np.inf,0]),
          'Regirock':np.array([np.inf,np.inf,0,np.inf]),
          'Registeel':np.array([np.inf,np.inf,0,np.inf]),
          'Regice':np.array([np.inf,np.inf,0,np.inf]),
          'Klingklang':np.array([np.inf,np.inf,0,np.inf]),
          'Metagross':np.array([np.inf,0,np.inf,np.inf]),
          'Genesect':np.array([0,np.inf,np.inf,np.inf]),
          'Porygon=Z':np.array([np.inf,0,np.inf,np.inf]),
          'Magnezone':np.array([np.inf,0,np.inf,np.inf]),
          'Forretress':np.array([np.inf,np.inf,np.inf,0]),
          'Electrode':np.array([0,np.inf,np.inf,np.inf]),
          'Ferrothorn':np.array([np.inf,np.inf,0,np.inf]),
       }

N1 = "(((( Electrode , Magnezone) ,Porygon=Z) , (((( Aggron , Bastiodon ) , Forretress ) , Ferrothorn ) , ((((( Regirock , Regice ) , Registeel ) , Metagross ) , Klingklang ) , Genesect ))) , Probopass );"
N2 = "((((( Regirock , Regice ) , Registeel ) , (( Metagross , Klingklang ) , Genesect )) , ((( Aggron , Bastiodon ) ,( Forretress , Ferrothorn )) , Probopass )) ,( Porygon=Z,( Magnezone , Electrode )));"

# Trees

<div>
<img src="tree1.png" width="500px">
<img src="tree2.png" width="500px">
</div>

# Sankoff algorithm

In [3]:
tree1 = Tree(N1)
print(N1)
print(tree1)

(((( Electrode , Magnezone) ,Porygon=Z) , (((( Aggron , Bastiodon ) , Forretress ) , Ferrothorn ) , ((((( Regirock , Regice ) , Registeel ) , Metagross ) , Klingklang ) , Genesect ))) , Probopass );

            /-Electrode
         /-|
      /-|   \-Magnezone
     |  |
     |   \-Porygon=Z
     |
     |            /-Aggron
     |         /-|
     |      /-|   \-Bastiodon
   /-|     |  |
  |  |   /-|   \-Forretress
  |  |  |  |
  |  |  |   \-Ferrothorn
  |  |  |
  |  |  |               /-Regirock
  |  |  |            /-|
  |   \-|         /-|   \-Regice
--|     |        |  |
  |     |      /-|   \-Registeel
  |     |     |  |
  |     |   /-|   \-Metagross
  |     |  |  |
  |      \-|   \-Klingklang
  |        |
  |         \-Genesect
  |
   \-Probopass


In [6]:
def sankoff(tree):
    
    
    
    tree = tree.replace(" ","")
    tree = tree.replace(";","")
    new_tree = []
    for i in range(len(tree)):
        if tree[i] == ")":
            cpt = 0
            for j in range(i, 0, -2):
                if tree[j] == ")" and tree[j-1] == ")": # Discriminate mini/big cluster and get offset
                    cpt += 1
            tmp_tree = tree[:i] # Cut after parenthesis
            n_close_par = tmp_tree.count(")") # Counts open and close parenthesis
            n_open_par = tmp_tree.count("(")
            if cpt == 1: # Big cluster
                parenthesis_to_cut = n_open_par - n_close_par + cpt + 1 # Petit bug de decalage ici avec N2 ...
                nth = find_nth(tmp_tree, "(", parenthesis_to_cut) # Finds the index to cut at the right open parenthesis
                tmp_tree = tmp_tree[nth:] 
            else: # Mini cluster
                index_par = tmp_tree.rfind("(") # For mini cluster : just cut at first open parenthesis met
                tmp_tree = tmp_tree[index_par:]
            tmp_tree = tmp_tree.replace("(","") # Delete all parenthesis to extract leaves
            tmp_tree = tmp_tree.replace(")","")
            leaves = tmp_tree.split(",")
            new_tree.append(leaves) # We build a new list of list containing leaves
    visited = []
    species = np.unique(np.concatenate(np.array(new_tree)).flatten()) # Gets the species of the tree
    
    
    
    for i in range(len(new_tree)):
        if len(new_tree[i]) == 2:
            elt1 = new_tree[i][0]
            elt2 = new_tree[i][1]
            print("---------------------------------")
            print(new_tree[i])
            print("on add", elt1, "et", elt2)
            new_ancester = add_ancester(elt1, elt2)
            visited.append(new_ancester)
        else:
            print("---------------------------------")
            tmp_str = new_tree[i][0]
            str_possible = []
            for j in range(1, len(new_tree[i])-1):
                tmp_str += "-" + new_tree[i][j]
                str_possible.append(tmp_str)
            str_possible.reverse() 
            if tmp_str in visited:
                visited.remove(tmp_str)
                del_elt = tmp_str.split("-")
                new_elt = new_tree[i].copy()
                for elt in del_elt:
                    new_elt.remove(elt)
                print(new_tree[i])
                print("on add", tmp_str, "et", new_elt[0])
                new_ancester = add_ancester(tmp_str, new_elt[0])
                visited.append(new_ancester)
            else:
                print("c'est la qu'il faut fusionner les big clusters")
                print(new_tree[i])
    print("---------------------------------")
    print("c'est la qu'il faut fusionner les derniers big clusters")
    
def find_nth(haystack, needle, n):
    start = haystack.find(needle)
    while start >= 0 and n > 1:
        start = haystack.find(needle, start+len(needle))
        n -= 1
    return start
    
def add_ancester(elt1, elt2):
    new_ancester = elt1+"-"+elt2    
    new_tab = compute_new_values(elt1,elt2)
    dic1[new_ancester] = new_tab
    return new_ancester
    
def compute_new_values(elt1,elt2):
    new_tab = np.zeros((n_species))
    val1 = dic1[elt1]
    val2 = dic1[elt2]
    for i in range(n_species):
        val_letter = step_matrix[i]
        new_tab[i] = np.min(val_letter+val1) + np.min(val_letter+val2)
    return new_tab

In [7]:
print(N1.replace(" ", ""))
sankoff(N1)
# print(dic1)

((((Electrode,Magnezone),Porygon=Z),((((Aggron,Bastiodon),Forretress),Ferrothorn),(((((Regirock,Regice),Registeel),Metagross),Klingklang),Genesect))),Probopass);
-------------------------------
((((Electrode,Magnezone
mini cluster
(Electrode,Magnezone
['Electrode', 'Magnezone']
-------------------------------
((((Electrode,Magnezone),Porygon=Z
mini cluster
(Electrode,Magnezone),Porygon=Z
['Electrode', 'Magnezone', 'Porygon=Z']
-------------------------------
((((Electrode,Magnezone),Porygon=Z),((((Aggron,Bastiodon
mini cluster
(Aggron,Bastiodon
['Aggron', 'Bastiodon']
-------------------------------
((((Electrode,Magnezone),Porygon=Z),((((Aggron,Bastiodon),Forretress
mini cluster
(Aggron,Bastiodon),Forretress
['Aggron', 'Bastiodon', 'Forretress']
-------------------------------
((((Electrode,Magnezone),Porygon=Z),((((Aggron,Bastiodon),Forretress),Ferrothorn
mini cluster
(Aggron,Bastiodon),Forretress),Ferrothorn
['Aggron', 'Bastiodon', 'Forretress', 'Ferrothorn']
-----------------------

  species = np.unique(np.concatenate(np.array(new_tree)).flatten())
  print(np.array(new_tree)) # Probleme ici...
