Klasa **Graph** koja se koristi za predstavljanje i izgradnju filogenetskog stabla.

In [15]:
class Graph:
    #konstruktorska funkcija (metod)
    def __init__(self, adjacency_list):
        self.adjacency_list = adjacency_list
        
    #metod koji vraca stringovsku reprezentaciju grafa    
    def __str__(self):
        return f'{self.adjacency_list}'
    
    #metod koji dodaje novi cvor u graf
    def add_node(self, v):
        if v not in self.adjacency_list:
            self.adjacency_list[v] = []
    
    #metod koji vraca listu suseda cvora v u grafu
    def get_neighbors(self, v):
        return self.adjacency_list[v]
    
    #metod koji dodaje u graf suseda cvora node sa oznakom neighbor na udaljenosti distance
    def add_neighbor(self, node, neighbor, distance, undirected=True):
        self.adjacency_list[node].append((neighbor, distance))
        
        if undirected:
            if neighbor not in self.adjacency_list:
                self.adjacency_list[neighbor] = []
              
            self.adjacency_list[neighbor].append((node, distance))
        
    #metod koji uklanja iz grafa suseda cvora node sa oznakom neighbor na udaljenosti distance    
    def remove_neighbor(self, node, neighbor, distance, undirected=True):
        self.adjacency_list[node].remove((neighbor, distance))
        
        if undirected:
            self.adjacency_list[neighbor].remove((node, distance))    

Pomocna klasa za reprezentaciju klastera UPGMA algoritma.

In [61]:
class Cluster:
    #konstruktorska funkcija (metod)
    def __init__(self, elements=[], age = 0):          
        self.age = age                                
        self.elements = elements                   
        
    #metod koji vraca stringovsku reprezentaciju klastera    
    def __str__(self):
        return f'{self.elements}:{self.age}'
        
    #metod koji izracunava udaljenost datog klastera u
    #odnosu na drugi klaster prema matrici rastojanja D
    def distance(self, other_cluster, D):
        # ======== STUDENTSKI KOD ======== #
        # distance(C1, C2) = suma ( D[i][j] ) / |C1| * |C2|
        total_dist = 0
        # n = len(D[0])
        
        for el_i in self.elements:
            for el_j in other_cluster.elements:
                total_dist += D[el_i][el_j]

        total_dist = total_dist / (len(self.elements) * len(other_cluster.elements))
        return total_dist
        # ================================ #
    
    #metod koji vrsi spajanje datog klastera sa drugim klasterom u novi klaster 
    def merge(self, other_cluster, D):
        # ======== STUDENTSKI KOD ======== #
        # e ovo je zanimljivo ? 
        # kako se radi spajanje dva klastera ? 
        # kontam da se pravi novi klaster 
        new_elements = self.elements + other_cluster.elements 
        new_age = self.distance(other_cluster, D) / 2
        return Cluster(new_elements, new_age)
        # ================================ #

Funkcija **two_closest_clusters** pronalazi dva klastera iz liste klastera **clusters** koji su na najmanjoj udaljenosti prema matrici rastojanja **D**.

In [68]:
def two_closest_clusters(clusters, D):
    min_ci = None
    min_cj = None
    min_distance = float('inf')
        
    for c_i in clusters:
        for c_j in clusters:
            if c_i != c_j:
                current_distance = c_i.distance(c_j, D)
                if current_distance < min_distance:
                    min_distance = current_distance
                    min_ci = c_i
                    min_cj = c_j
    print(f'min_distance {min_distance}')
    return min_ci, min_cj

Funkcija **UPGMA** konstruise filogenetsko stablo koje odgovara matrici rastojanja **D** dimenzije **n** $\times$ **n** primenom *UPGMA algoritma*.

In [69]:
def UPGMA(D, n):
    # ======== STUDENTSKI KOD ======== #
    # na pocetku su klasteri jednoclani 
    # treba mi adj_matrix 
    # napravim klaster ? 
    clusters = [Cluster([i], 0) for i in range(n)] 
    adj_matrix = dict([(i, []) for i in range(n)])
    tree = Graph(adj_matrix)

    while len(clusters) > 1:
        # nadjem dva najbliza klastera i spojim ih 
        c_i, c_j = two_closest_clusters(clusters, D)
        print(f'c_i {c_i}, c_j {c_j}')
        
        c_new = c_i.merge(c_j, D)
        print(f'c_new {c_new}')
            
        # update za cvor i grane ?
        tree.add_node(str(c_new))
        #     def add_neighbor(self, node, neighbor, distance, undirected=True):
        tree.add_neighbor(str(c_new), str(c_i), distance=c_new.age - c_i.age, undirected=False)
        tree.add_neighbor(str(c_new), str(c_j), distance=c_new.age - c_j.age, undirected=False)
        
        # izbaci ova dva stara
        clusters.remove(c_i)
        clusters.remove(c_j)
        # dodaj novi klaster
        clusters.append(c_new) 

    
    # sta vracam uoposte ? u filogenoj je tree[0] i tree 
    root = clusters[0]
    return tree, root
    # ================================ #

In [70]:
D = [[0, 3, 4, 3],
     [3, 0, 4, 5],
     [4, 4, 0, 2],
     [3, 5, 2, 0]]
n = 4

(tree, root) = UPGMA(D, n)

print('Root = ',root)
for node in tree.adjacency_list:
    print(node)
    print('Neighbors: ', tree.adjacency_list[node])

min_distance 2.0
c_i [2]:0, c_j [3]:0
c_new [2, 3]:1.0
min_distance 3.0
c_i [0]:0, c_j [1]:0
c_new [0, 1]:1.5
min_distance 4.0
c_i [2, 3]:1.0, c_j [0, 1]:1.5
c_new [2, 3, 0, 1]:2.0
Root =  [2, 3, 0, 1]:2.0
0
Neighbors:  []
1
Neighbors:  []
2
Neighbors:  []
3
Neighbors:  []
[2, 3]:1.0
Neighbors:  [('[2]:0', 1.0), ('[3]:0', 1.0)]
[0, 1]:1.5
Neighbors:  [('[0]:0', 1.5), ('[1]:0', 1.5)]
[2, 3, 0, 1]:2.0
Neighbors:  [('[2, 3]:1.0', 1.0), ('[0, 1]:1.5', 0.5)]
