In [1]:
import numpy as np
import copy

# Week 1: Introduction to Evolutionary Tree Construction

In [2]:
'''
Distances Between Leaves Problem: Compute the distances between leaves in a weighted tree.
Input:  An integer n followed by the adjacency list of a weighted tree with n leaves.
Output: An n x n matrix (di,j), where di,j is the length of the path between leaves i and j.
'''

def Distances_Matrix(n, adjacency_list):
    if type(adjacency_list) == str:
        adjacency_list = adjacency_list.split('\n')
        
    graph        = dict()
    graph_weight = dict()
    for adjacency in adjacency_list:
        adjacency = adjacency.split(':')
        graph_weight[adjacency[0]] = int(adjacency[1])
        adjacency = adjacency[0].split('->')
        adjacency[0], adjacency[1] = int(adjacency[0]), int(adjacency[1])
        if adjacency[0] in graph:
            graph[adjacency[0]].append(adjacency[1])
        else:
            graph[adjacency[0]] = [(adjacency[1])]
    
    length_matrix = np.full([n,n], 0, int)

    for from_ in range(n):
        weight_row = [0] * (max(graph.keys()) + 1)
        froms      = [from_]
        
        while len(froms) != 0:
            
            next_froms = []
            for node in froms:
                tos = graph[node]
                
                for to in tos:
                    if to != from_:
                        if (to not in range(n)) & (weight_row[to] == 0):
                            next_froms.append(to)
                        
                        graph_weight_key = str(node) + '->' + str(to)
                        weight           = graph_weight[graph_weight_key]
                        weight_row[to]   = weight_row[node] + weight
                        
            froms = next_froms
        length_matrix[from_, :] = weight_row[: n]

    return(length_matrix)

# Test
n = 4
adjacency_list = '''0->4:11
1->4:2
2->5:6
3->5:7
4->0:11
4->1:2
4->5:4
5->4:4
5->3:7
5->2:6'''

Distances_Matrix(n, adjacency_list)

array([[ 0, 13, 21, 22],
       [13,  0, 12, 13],
       [21, 12,  0, 13],
       [22, 13, 13,  0]])

In [3]:
'''
Code Challenge: Solve the Limb Length Problem.
Input: An integer n, followed by an integer j between 0 and n - 1, followed by a space-separated additive distance matrix D (whose elements are integers).
Output: The limb length of the leaf in Tree(D) corresponding to row j of this distance matrix (use 0-based indexing).
'''

def Limb_Length(j, length_matrix):
    if type(length_matrix) == str:
        length_matrix = length_matrix.replace('\n', ' ')
        length_matrix = length_matrix.split(' ')
        length_matrix = list(map(int, length_matrix))
        length_matrix = np.array(length_matrix).reshape(n, n)

    min_length = float('Inf')
    for i in range(length_matrix.shape[0]):
        if i != j:
            for k in range(length_matrix.shape[0]):
                if (k != j) & (k != i):
                    length = (length_matrix[i, j] + length_matrix[j, k] - length_matrix[i, k]) / 2
                    if length < min_length:
                        min_length = int(length)
    
    return(min_length)

# Test
n = 4
j = 1
length_matrix = '''0 13 21 22
13 0 12 13
21 12 0 13
22 13 13 0'''

Limb_Length(j, length_matrix)

2

In [4]:
'''
Code Challenge: Implement AdditivePhylogeny to solve the Distance-Based Phylogeny Problem.
Input: An integer n followed by a space-separated n x n distance matrix.
Output: A weighted adjacency list for the simple tree fitting this matrix.
'''

def Attached_Limb(length_matrix, j):
    for i in range(length_matrix.shape[0]):
        for k in range(length_matrix.shape[0]):
            if (i != j) & (k != j) :
                if length_matrix[i, k] == length_matrix[i, j - 1] + length_matrix[j - 1, k]:
                    return(i, k)

def Find_Path(nodes, current, final, path, visited, final_path):
    path = path + [current]
    visited.append(current)
    neighbor_nodes = nodes[current].keys()
    if current == final:
        final_path.extend(path) 
        return

    unvisited_neighbor_nodes = set(neighbor_nodes) - set(visited)
    if len(unvisited_neighbor_nodes) == 0:
        return
    
    for unvisited_neighbor_node in list(unvisited_neighbor_nodes):
        Find_Path(nodes,int(unvisited_neighbor_node),final,path,visited, final_path)

    return final_path
 
def add_to_graph(length_matrix,nodes,n,m,i,k,x):
    visited = []
    final_path = []
    
    i_k_path = Find_Path(nodes,i,k,[],visited,final_path)   
    total_length = 0

    for index in range(len(i_k_path) - 1):
        current_node   = i_k_path[index]
        next_node      = i_k_path[index + 1]
        length_between = nodes[current_node][next_node]
        total_length   = total_length + length_between

        if total_length == x:
            limb_length = Limb_Length(n, length_matrix)
            nodes[next_node][n] = limb_length
            nodes[n] = {next_node:limb_length}
            return nodes
        
        elif total_length > x:
            length1 = x - (total_length - length_between)
            length2 = total_length - x

            limb_length = Limb_Length(n , length_matrix)

            nodes[current_node].pop(next_node)
            nodes[next_node]   .pop(current_node)

            nodes[current_node][m[0]] = length1
            nodes[next_node][m[0]]    = length2
            nodes[m[0]] = {current_node:length1, next_node:length2}

            nodes[m[0]][n] = limb_length
            nodes[n]       = {m[0]:limb_length}
            m[0]           = m[0] + 1
            return nodes
    return nodes

def AdditivePhylogeny(length_matrix,n,m):
    '''if type(length_matrix) == str:
        length_matrix = length_matrix.replace('\n', ' ')
        length_matrix = length_matrix.split(' ')
        length_matrix = list(map(int, length_matrix))
        length_matrix = np.array(length_matrix).reshape(n, n)'''
    
    if n == 1:
        nodes = {}
        nodes[1] = {0:length_matrix[0, 1]}
        nodes[0] = {1:length_matrix[0, 1]}
        return nodes
    
    limb_length = Limb_Length(n , length_matrix)

    sub_matrix = copy.deepcopy(length_matrix)

    for j in range(n):

        sub_matrix[j, n] = sub_matrix[j, n] - limb_length
        sub_matrix[n, j] = sub_matrix[j, n]   

    (i,k) = Attached_Limb(sub_matrix, n)

    x = sub_matrix[i, n]

    sub_matrix = sub_matrix[: -1, : -1]
        
    nodes = AdditivePhylogeny(sub_matrix,n-1,m)

    nodes = add_to_graph(length_matrix, nodes, n, m, i, k, x)
    return nodes

#Test
n = 4
length_matrix = '''0 13 21 22
13 0 12 13
21 12 0 13
22 13 13 0'''
if type(length_matrix) == str:
    length_matrix = length_matrix.replace('\n', ' ')
    length_matrix = length_matrix.split(' ')
    length_matrix = list(map(int, length_matrix))
    length_matrix = np.array(length_matrix).reshape(n, n)

tmp = AdditivePhylogeny(length_matrix, n - 1,[n])    
for key1, values1 in tmp.items():
    for key2, value2 in values1.items():
        print(str(key1) + '->' + str(key2) + ':' + str(value2))

1->4:2
0->4:11
4->0:11
4->1:2
4->5:4
2->5:6
5->4:4
5->2:6
5->3:7
3->5:7
