In [2]:
import numpy as np

# Create the distance matrix
matrix = [[0, 3, 4, 7],
          [3, 0, 5, 8],
          [4, 5, 0, 6],
          [7, 8, 6, 0]]

# Initialize the number of species
n = len(matrix)

# Create a list to store the names of the species
species = ['A', 'B', 'C', 'D']

# Create a list to store the nodes
nodes = range(n)

# Create a list to store the newick format
newick = []

# Create a function to calculate Q
def Q(D, n):
    return (n-2) * D - np.sum(D, axis=0)

while len(nodes) > 2:
    # Calculate Q for the current distance matrix
    Qmat = Q(matrix, len(nodes))
    
    # Find the minimum value and its indices in Qmat
    i, j = np.unravel_index(np.argmin(Qmat), Qmat.shape)
    
    # Calculate the distance between the new node and the other nodes
    dij = (matrix[i, n-2] + matrix[j, n-2] - matrix[i, j]) / 2
    
    # Append the new node to the newick list
    newick.append([species[i], species[j], dij])
    
    # Update the distance matrix
    matrix = np.delete(matrix, j, axis=0)
    matrix = np.delete(matrix, j, axis=1)
    matrix = np.delete(matrix, i, axis=0)
    matrix = np.delete(matrix, i, axis=1)
    newrow = (matrix[:, i] + matrix[:, j] - matrix[i, j]) / 2
    matrix = np.append(matrix, [newrow], axis=0)
    matrix = np.append(matrix, np.transpose([newrow]), axis=1)
    
    # Update the species and nodes lists
    species.append(species[i] + species[j])
    del species[j]
    del species[i]
    del nodes[j]
    del nodes[i]

# Append the last two nodes to the newick list
newick.append([species[0], species[1], matrix[0,1]])

# Print the newick format
print(newick)


TypeError: list indices must be integers or slices, not tuple

In [56]:
# Assume the input matrix is a distance matrix

def neighbor_joining(matrix, species):

    n = len(species)

    if n == 1:
        return species[0]

    newick = []

    while n > 2:
        #Qmat = (n-2) * matrix - np.sum(matrix, axis=0)[np.newaxis].T - np.sum(matrix, axis=1)[np.newaxis]
        Qmat = (n-2) * matrix - np.sum(matrix, axis=0) - np.sum(matrix, axis=1)
        i, j = np.where(Qmat == np.min(Qmat))[0][0], np.where(Qmat == np.min(Qmat))[1][0]
        dij = (matrix[i][n-2] + matrix[j][n-2] - matrix[i][j]) / 2


        # update the distance matrix
        matrix[i][:] = (matrix[i][:] + matrix[j][:] - dij) / 2
        matrix[:][i] = matrix[i][:]
        matrix = np.delete(matrix, j, 0)
        matrix = np.delete(matrix, j, 1)

        # update the species list
        species.append((species[i] + species[j], dij))
        del species[i]
        del species[j]
        
        # update the newick string
        newick.append((species[-1], dij))
        newick.append((species[-2], dij))

        n -= 1
    newick.append(species)
    newick_string = ":".join([str(x[1]) for x in newick[::-1]]) + ";"
    return newick_string


In [62]:
from scipy.spatial.distance import squareform
import pandas as pd

# names of the leaves
species = ['Aadvark', 'Alligator M', 'Alligator S', 'Anolis', 'Chelonia', 'Chrysemys', 'Croco', 'Danio', 'Devil', 'Dog', 'Fugu', 'Gecko', 'Gorilla', 'Human', 'KCobra', 'Koala', 'Monodelphis', 'Mouse', 'Orca', 'Ostrich', 'Papio', 'Pelodiscus', 'Pigeon', 'Platypus', 'Pogona', 'Python', 'Thamnophis', 'Xenopus', 'Xlaevis', 'Xtropicalis']

matrix = np.nan_to_num(pd.read_excel('..\Data\Results\Metric\metric_50_m_n.xlsx', index_col=0).to_numpy(), nan=0)
#matrix = squareform(matrix)

In [63]:
neighbor_joining(matrix, species)

"(('Orca', 0.08664094828533186, 'Orca'), node30, 0.08664094828533186)"

In [61]:
def neighbor_joining(matrix, species):
    n = len(matrix)
    newick = []
    while n > 2:
        Qmat = (n-2) * matrix - np.sum(matrix, axis=0) - np.sum(matrix, axis=1)
        i, j = np.where(Qmat == np.min(Qmat))
        i, j = i[0], j[0]
        dij = (matrix[i][n-2] + matrix[j][n-2] - matrix[i][j]) / 2
        
        # create new internal node name
        internal_node = f"node{n}"
        
        # update the newick string
        if len(newick) == 0:
            newick.append((species[i], dij, species[j]))
        else:
            newick[-1] = (newick[-1], dij, internal_node)
            newick.append((species[i], dij, species[j]))
        
        # update the distance matrix
        matrix[i][:] = (matrix[i][:] + matrix[j][:] - dij) / 2
        matrix[:,i] = matrix[i][:]
        matrix = np.delete(matrix, j, axis=0)
        matrix = np.delete(matrix, j, axis=1)
        
        # update the species list
        del species[j]
        
        # update the newick string
        newick[-1] = (newick[-1], dij, internal_node)
        newick.append(internal_node)
        n -= 1
    newick = f"({newick[0][0]}, {newick[0][2]}, {newick[0][1]})"
    return newick
