# Filogenetska stabla

In [1]:
'''Pomoćna klasa za operacije nad grafovima'''
class Graph:
    def __init__(self, adjacency_list):
        self.adjacency_list = adjacency_list
        
    def __str__(self):
        return f'{self.adjacency_list}'
    
    '''Dodavanje čvora u graf'''
    def add_node(self, v):
        if v not in self.adjacency_list:
            self.adjacency_list[v] = []
        
    '''Dodavanje suseda čvora u graf'''
    def add_neighbor(self, v, w, distance, undirected=True):
        self.adjacency_list[v].append((w, distance))
        if undirected:
            self.adjacency_list[w] = [(v, distance)]
        
    '''Izdvajanje svih suseda čvora'''
    def get_neighbors(self, v):
        return self.adjacency_list[v]
    
    '''Izdvajanje dužine grane između susednih čvorova'''
    def distance_between(self, v_i, v_j):
        for (w, dist) in self.adjacency_list[v_i]:
            if w == v_j:
                return dist
    
    '''Uklanjanje grane iz grafa'''
    def remove_edge(self, v_i, v_j, undirected=True):
        self.adjacency_list[v_i] = [neighbor for neighbor in self.adjacency_list[v_i] if neighbor[0] != v_j]
        
        if undirected:
            self.adjacency_list[v_j] = [neighbor for neighbor in self.adjacency_list[v_j] if neighbor[0] != v_i]
    
    '''Pronalaženje puta između proizvoljna dva čvora'''
    def find_path(self, source, destination):
        stack = [source]
        visited = set([source])
        
        while len(stack) > 0:
            v = stack[-1]
            
            if v == destination:
                return stack
            
            has_neighbor = False
            
            for (w, _) in self.adjacency_list[v]:
                if w not in visited:
                    visited.add(w)
                    stack.append(w)
                    has_neighbor = True
                    break
                    
            if not has_neighbor:
                stack.pop()
    
    '''Dodavanje čvora na putu između dva zadata čvora na zadatoj udaljenost od polaznog čvora'''
    def add_vertex_on_path(self, u, v, distance):
        path = self.find_path(u, v)
        i = 0
        j = 1
        
        
        v_i = path[i]
        v_j = path[j]
        current_distance = self.distance_between(v_i, v_j)
        
        while current_distance < distance:
            i += 1
            j += 1
            
            v_i = path[i]
            v_j = path[j]
            current_distance += self.distance_between(v_i, v_j)
            
        if current_distance == distance:
            return v_j
        else:
            local_distance_j = current_distance - distance
            local_distance_i = self.distance_between(v_i, v_j) - local_distance_j
            return self.add_vertex_on_edge(v_i, v_j, local_distance_i, local_distance_j)
        

        
    '''Dodavanje novog čvora na grani na zadatoj udaljenosti od tekućih čvorova grane'''
    def add_vertex_on_edge(self, v_i, v_j, distance_i, distance_j):
        new_vertex = f'X{v_i},{v_j}'
        
        self.remove_edge(v_i, v_j)
        self.add_neighbor(v_i, new_vertex, distance_i)
        self.add_neighbor(new_vertex, v_j, distance_j)
        
        return new_vertex
        

In [2]:
'''Pomoćna klasa za reprezentaciju klastera UPGMA algoritma'''
class Cluster:
    def __init__(self, elements=[], age = 0):
        self.age = age
        self.elements = elements
        
    def __str__(self):
        return f'{self.elements}:{self.age}'
        
    '''Izračunavanje udaljenosti između dva klastera'''
    def distance(self, other_cluster, D):
        total_distance = 0
        for e_i in self.elements:
            for e_j in other_cluster.elements:
                d_ij = D[e_i][e_j]
                total_distance += d_ij
                
        return total_distance / (len(self.elements) * len(other_cluster.elements))
    
    '''Spajanje dva klastera u novi klaster'''
    def merge(self, other_cluster):
        return Cluster(elements = self.elements + other_cluster.elements)

In [3]:
'''Pomoćna klasa za reprezentaciju matrice udaljenosti kao dict objekat, za potrebe NJ algoritma'''
class DMap:
    def __init__(self, D):
        self.d = D
        self.d_map = {}
        
        self.n = len(D)
        for i in range(n):
            self.d_map[i] = {}
            for j in range(n):
                self.d_map[i][j] = D[i][j]
                
    def __str__(self):
        return f'{self.d_map}'
    
    '''Ukupna udaljenost čvora i od svih ostalih čvorova'''
    def total_distance(self, i):
        dist = 0
        for j in self.d_map:
            dist += self.d_map[i][j]
            
        return dist
    
    '''Izračunavanje D* matrice (mape)'''
    def d_star(self):
        d_s = {}
        
        for i in self.d_map:
            d_s[i] = {}
            for j in self.d_map[i]:
                if i != j:
                    d_s[i][j] = (self.n - 2) * self.d_map[i][j] - self.total_distance(i) - self.total_distance(j)
                else:
                    d_s[i][j] = 0
                
        return d_s
    
    '''Pronalaženje najmanje vrednosti u D* matrici'''
    def min_d_star_dist(self):
        d_s = self.d_star()
        
        min_i = None
        min_j = None
        min_distance = float('inf')
        
        for i in d_s:
            for j in d_s:
                if i != j:
                    current_distance = d_s[i][j]
                    if current_distance < min_distance:
                        min_i = i
                        min_j = j
                        
        return min_i, min_j
    
    '''Dodavanje novog čvora i njegovih udaljenosti'''
    def add_node(self, m, i, j):
        self.d_map[m] = {}
        
        for k in self.d_map:
            if k != i and k != j:
                if k != m:
                    self.d_map[k][m] = 0.5 * (self.d_map[k][i] + self.d_map[k][j] - self.d_map[i][j])
                    self.d_map[m][k] = self.d_map[k][m]
                else:
                    self.d_map[m][k] = 0
    
    '''Uklanjanje čvora i njegovih udaljenosti'''
    def remove_node(self, i):
        del self.d_map[i]
        
        for j in self.d_map:
            if j != i and i in self.d_map[j]:
                del self.d_map[j][i]
                

In [4]:
class Phylogeny:
    '''Pronalaženje kraka čvora n i njegove udaljenosti od prvog suseda'''
    def limb(self, D, n):
        min_length = float('inf')
        min_i = None
        min_k = None
        v = n - 1
        
        for i in range(n):
            for k in range(n):
                if i != v and k != v:
                    limb_length = (D[v][k] + D[i][v] - D[i][k]) / 2
                    if limb_length < min_length:
                        min_length = limb_length
                        min_i = i
                        min_k = k
                        
        return (min_i, min_k, min_length)
                        
    
    '''
    Konstrukcija filogenetskog stabla korišćenjem algoritma
    aditivne filogenije
    '''
    def additive_phylogeny(self, D, n):
        if n == 2:
            return Graph({
                0: [(1, D[0][1])],
                1: [(0, D[0][1])]
            })
            
        (i, k, limb_length) = self.limb(D, n)
        
        for j in range(n - 1):
            D[j][n - 1] = D[j][n - 1] - limb_length
            D[n - 1][j] = D[j][n - 1]
            
        x = D[i][n - 1]
        
        T = self.additive_phylogeny(D, n - 1)
        
        v = T.add_vertex_on_path(i, k, x)
        
        T.add_neighbor(v, n-1, limb_length)
        return T
    
    
    '''Pronalaženje dva najbliža klastera'''
    def two_closest_clusters(self, clusters, D):
        min_ci = None
        min_cj = None
        min_distance = float('inf')
        
        for c_i in clusters:
            for c_j in clusters:
                if c_i != c_j:
                    current_distance = c_i.distance(c_j, D)
                    if current_distance < min_distance:
                        min_distance = current_distance
                        min_ci = c_i
                        min_cj = c_j
                        
        return min_ci, min_cj, min_distance
    
    '''
    Konstrukcija filogenetskog stabla korišćenjem 
    UPGMA algoritma
    '''
    def UPGMA(self, D, n):
        clusters = [Cluster([i], 0) for i in range(n)]
        adjacency_list = dict([(i, []) for i in range(n)])
        T = Graph(adjacency_list)
    
        while len(clusters) > 1:
            c_i, c_j, dist = self.two_closest_clusters(clusters, D)
            c_new = c_i.merge(c_j)
            c_new.age = dist / 2
            T.add_node(str(c_new))
            T.add_neighbor(v=str(c_new), w=str(c_i), distance=c_new.age - c_i.age, undirected=False)
            T.add_neighbor(v=str(c_new), w=str(c_j), distance=c_new.age - c_j.age, undirected=False)
            
            clusters = [c for c in clusters if c != c_i and c != c_j]
            clusters.append(c_new)
            
        root = str(clusters[0])
        return T, root
    
    '''
    Konstrukcija filogenetskog stabla korišćenjem algoritma
    Neighbor Joining
    '''
    def neighbor_joining(self, D, n):
        if n == 2:
            [i, j] = list((D.d_map.keys()))
            return Graph({
                i: [(j, D.d_map[i][j])],
                j: [(i, D.d_map[j][i])]
            })
        
        i, j = D.min_d_star_dist()
        delta = (D.total_distance(i) - D.total_distance(j)) / (n - 2)
        limb_length_i = 0.5 * (D.d_map[i][j] + delta)
        limb_length_j = 0.5 * (D.d_map[i][j] - delta)
        
        m = f'X{i},{j}'
        D.add_node(m, i, j)
        D.remove_node(i)
        D.remove_node(j)
        
        T = self.neighbor_joining(D, n - 1)
        T.add_neighbor(m, i, limb_length_i)
        T.add_neighbor(m, j, limb_length_j)
        
        return T

## Testovi

In [5]:
# Aditivna filogenija

ph = Phylogeny()

D = [[0, 13, 21, 22],
    [13, 0, 12, 13],
    [21, 12, 0, 13],
    [22, 13, 13, 0]]

n = len(D)
T = ph.additive_phylogeny(D, n)
print(T)




{0: [('X0,1', 11.0)], 1: [('X0,1', 2.0)], 'X0,1': [(0, 11.0), (1, 2.0), ('XX0,1,2', 4.0)], 2: [('XX0,1,2', 6.0)], 'XX0,1,2': [('X0,1', 4.0), (2, 6.0), (3, 7.0)], 3: [('XX0,1,2', 7.0)]}


In [6]:
# UPGMA

D = [[0, 3, 4, 3],
     [3, 0, 4, 5],
     [4, 4, 0, 2],
     [3, 5, 2, 0]]

T, root = ph.UPGMA(D, n)
print(T)
print()
print(root)


{0: [], 1: [], 2: [], 3: [], '[2, 3]:1.0': [('[2]:0', 1.0), ('[3]:0', 1.0)], '[0, 1]:1.5': [('[0]:0', 1.5), ('[1]:0', 1.5)], '[2, 3, 0, 1]:2.0': [('[2, 3]:1.0', 1.0), ('[0, 1]:1.5', 0.5)]}

[2, 3, 0, 1]:2.0


In [7]:
# Neighbor Joining

D = [[0, 13, 21, 22],
    [13, 0, 12, 13],
    [21, 12, 0, 13],
    [22, 13, 13, 0]]

d_map = DMap(D)
print(ph.neighbor_joining(d_map, n))

{0: [('XX3,2,1', 11.0)], 'XX3,2,1': [(0, 11.0), ('X3,2', 4.0), (1, 2.0)], 'X3,2': [('XX3,2,1', 4.0), (3, 7.0), (2, 6.0)], 1: [('XX3,2,1', 2.0)], 3: [('X3,2', 7.0)], 2: [('X3,2', 6.0)]}
