# Import

In [1]:
import numpy as np
from ete3 import Tree



# 1. UPGMA :

# Variables

In [2]:
distance_matrix = np.array([[ 0, 19, 27,  8, 33, 18, 13],
                            [19,  0, 31, 18, 36,  1, 13],
                            [27, 31,  0, 26, 41, 32, 29],
                            [ 8, 18, 26,  0, 31, 17, 14],
                            [33, 36, 41, 31,  0, 35, 28],
                            [18,  1, 32, 17, 35,  0, 12],
                            [13, 13, 29, 14, 28, 12, 0]])

In [58]:
def upgma_loop(distance_matrix):
    dic = get_dict(distance_matrix) # Initialisation du dictionnaire
    cpt = 1
    save_order = []
    n = distance_matrix.shape[0] # Sauveguarde du nombre d'espece
    while distance_matrix.shape[0] > 2: # Condition d'arret de l'algorithme
#         print("===================== Nombre d'itération :", cpt,"=====================")
#         print("\nDictionnaire :\n\n", dic)
#         display_distance_matrix(distance_matrix, dic)
        distance_matrix, dic, save_order = upgma(distance_matrix, dic, save_order)
#         print("\nNouveau dictionnaire :\n\n", dic)
#         display_distance_matrix(distance_matrix, dic)
#         print("\n")
        cpt += 1
    save_order.append(list(dic.values())[0]) # Ajout de toute les especes a la fin
    save_order.append(list(dic.values())[1])
    return distance_matrix, dic, save_order

def upgma(distance_matrix, dic, save_order):
    min_v, v1, v2 = get_min_v(distance_matrix, dic)
    distance_matrix_u = update_distance_matrix(distance_matrix, v1, v2, dic)
    distance_matrix_u = distance_matrix_u.astype(float)
    dic_u, save_order = update_dic(dic, v1, v2, save_order)
    return distance_matrix_u, dic_u, save_order

def get_min_v(distance_matrix, dic):
    n,m = distance_matrix.shape
    min_v = np.inf
    for i in range(n):
        for j in range(m):
            if i != j: # Ignore diagonal pour la distance minimale
                if min_v > distance_matrix[i][j]:
                    min_v = distance_matrix[i][j]
                    index_v1 = i
                    index_v2 = j
    return min_v, dic[index_v1], dic[index_v2]

def update_dic(dic, v1, v2, save_order):
    dic_u = {}
    save_order.append(v1)
    save_order.append(v2)
    dic_u[0] = v1+v2
    n = len(dic) 
    for i in range(1, n+1):
        if dic[i-1] != v1 and dic[i-1] != v2: 
            dic_u[i] = dic[i-1] # On creer notre nouveau dictionnaire avec les valeurs sauf la nouvelle
    dic_u = {i: v for i, v in enumerate(dic_u.values())} # On reindexe le dictionnaire
    return dic_u, save_order

def update_distance_matrix(distance_matrix, v1, v2, dic):
    k1 = [k for k, v in dic.items() if v == v1][0]
    k2 = [k for k, v in dic.items() if v == v2][0]
    distance_matrix_u = delete_values(distance_matrix, k1, k2)
    new_distances = compute_new_distances(distance_matrix, v1, v2, dic)
    distance_matrix_u[0  ] = new_distances                 # Ajout des valeurs calculées sur la premiere ligne
    distance_matrix_u[:,0] = new_distances.T               # et la derniere colonne correspondant à {v1,v2}
    return distance_matrix_u

def delete_values(distance_matrix, k1, k2):
    distance_matrix_u = np.delete(distance_matrix,   k1,   0) # Delete ligne à l'indice k1 correspondant à v1
    distance_matrix_u = np.delete(distance_matrix_u, k1,   1) # Delete colonne à l'indice k1 correspondant à v1
    distance_matrix_u = np.delete(distance_matrix_u, k2-1, 0) # Delete ligne à l'indice k2 correspondant à v2 (decaler de 1 car on a suppr une ligne)
    distance_matrix_u = np.delete(distance_matrix_u, k2-1, 1) # Delete colonne à l'indice k2 correspondant à v2 (decaler de 1 car on a suppr une colonne)
    distance_matrix_u = np.insert(distance_matrix_u, 0, np.zeros(distance_matrix_u.shape[0]), axis=0)
    distance_matrix_u = np.insert(distance_matrix_u, 0, np.zeros(distance_matrix_u.shape[0]), axis=1)
    return distance_matrix_u

def compute_new_distances(distance_matrix, v1, v2, dic):
    n = distance_matrix.shape[0]
    new_distances = np.zeros((n-1))
    new_distances[0] = 0 # 0 de la diagonal
    k1 = [k for k, v in dic.items() if v == v1][0]
    k2 = [k for k, v in dic.items() if v == v2][0]
    index = 1
    for i in range(n):
        if k1 != i and k2 != i:
            new_distances[index] = (distance_matrix[i][k1] + distance_matrix[i][k2]) / 2
            index += 1
    return new_distances

def display_distance_matrix(distance_matrix, dic):
    print("\nDistance matrix :\n")
    n,m = distance_matrix.shape
    for val in dic.values():
        print("    ",val, end="")
    print("")
    for i,val in enumerate(dic.values()):
        for j in range(m):
            print("   ", distance_matrix[i][j], end="")
        print("   ", val)

def get_dict(distance_matrix):
    dic = {}
    n = distance_matrix.shape[0]
    keys = [chr(i+65) for i in range(n)]
    values = [i for i in range(n)]
    for i in range(n):
        dic[values[i]] = keys[i]
    return dic

def display_tree(order): # On veut ((((B,F),G)),(A,D)),C),E)
    order_str = ""
    for i in range(0, len(order), 2):
        order_str += "(" + order[i] + "," + order[i+1] + ")"
        if i != len(order)-2:
            order_str += "|"
    print(order_str)
    reversed_str = list(reversed(order_str.split("|")))
    for i in range(len(reversed_str)):
        clean = reversed_str[i].replace("(","")
        clean = clean.replace(")","")
        split = clean.split(",")
        item1 = split[0]
        item2 = split[1]
        print("item1:",item1)
        print("item2:",item2)

In [59]:
print("----------------------------> Algorithme UPGMA : <----------------------------")
# print("\nDictionnaire :\n\n", dic) # Affichage du dictionnaire de base
# display_distance_matrix(distance_matrix, dic) # Affichage de la grille de base
# print("\n")
distance_matrix_final, _, order_final = upgma_loop(distance_matrix)

print("----------------------------> Fabrication de l'arbe de philogenie : <----------------------------")
# print(order_final)
display_tree(order_final)

----------------------------> Algorithme UPGMA : <----------------------------
----------------------------> Fabrication de l'arbe de philogenie : <----------------------------
(B,F)|(A,D)|(BF,G)|(BFG,AD)|(BFGAD,C)|(BFGADC,E)
item1: BFGADC
item2: E
item1: BFGAD
item2: C
item1: BFG
item2: AD
item1: BF
item2: G
item1: A
item2: D
item1: B
item2: F


# 2. Neighbor Joining :