In [16]:
import os
import numpy as np
from scipy.cluster.hierarchy import linkage, to_tree

class UltrametricTree:
    def __init__(self, n):
        self.n = n
        self.adj_matrix = np.zeros((2 * n - 1, 2 * n - 1))
        self.distances = np.zeros((2 * n - 1, 2 * n - 1))
        self.node_count = n

    def add_edge(self, i, j, weight):
        self.adj_matrix[i][j] = weight
        self.adj_matrix[j][i] = weight

    def update_distances(self):
        self.distances = np.copy(self.adj_matrix)
        for k in range(self.node_count):
            for i in range(self.node_count):
                for j in range(self.node_count):
                    if self.distances[i][j] > self.distances[i][k] + self.distances[k][j]:
                        self.distances[i][j] = self.distances[i][k] + self.distances[k][j]
        
    def compute_weight(self):
        return np.sum(self.adj_matrix) / 2

    def is_ultrametric(self):
        for i in range(self.node_count):
            for j in range(self.node_count):
                for k in range(self.node_count):
                    if self.distances[i][j] > max(self.distances[i][k], self.distances[k][j]):
                        return False
        
        root = 2 * self.n - 2
        leaf_distances = [self.distances[root][i] for i in range(self.n)]
        
        if len(set(leaf_distances)) != 1:
            return False

        return True

def hierarchical_clustering_ultrametric_tree(distance_matrix):
    n = len(distance_matrix)
    tree = UltrametricTree(n)
    
    Z = linkage(distance_matrix, method='single')

    root, nodes = to_tree(Z, rd=True)
    
    def build_tree(node, current_node_idx):
        if node.is_leaf():
            return node.id
        left_idx = build_tree(node.get_left(), current_node_idx)
        right_idx = build_tree(node.get_right(), current_node_idx)
        new_node_idx = current_node_idx[0]
        current_node_idx[0] += 1
        distance = node.dist / 2
        
        tree.add_edge(new_node_idx, left_idx, distance - (0 if left_idx < n else Z[left_idx - n][2] / 2))
        tree.add_edge(new_node_idx, right_idx, distance - (0 if right_idx < n else Z[right_idx - n][2] / 2))
        
        return new_node_idx

    build_tree(root, [n])
    tree.node_count = 2 * n - 1
    tree.update_distances()
    return tree


def load_distance_matrix(file_path):
    return np.loadtxt(file_path, delimiter=' ')

def load_all_distance_matrices(directory_path):
    matrices = {}
    for file_name in os.listdir(directory_path):
        if file_name.endswith('.txt'):
            file_path = os.path.join(directory_path, file_name)
            matrices[file_name] = load_distance_matrix(file_path)
    return matrices
    
directory_path = 'tests/' 
distance_matrices = load_all_distance_matrices(directory_path)

for file_name, distance_matrix in distance_matrices.items():
    print(f"Processing matrix from file: {file_name}")
    optimal_tree = hierarchical_clustering_ultrametric_tree(distance_matrix)
    if optimal_tree.is_ultrametric():
        print(optimal_tree.adj_matrix)
        print("Weight of the optimal tree:", optimal_tree.compute_weight())
    else:
        print("No ultrametric tree found.")


Processing matrix from file: hc3.txt


  Z = linkage(distance_matrix, method='single')


No ultrametric tree found.
Processing matrix from file: matrix3.txt
[[0.         0.         0.         0.         0.         0.
  3.        ]
 [0.         0.         0.         0.         0.         2.29128785
  0.        ]
 [0.         0.         0.         0.         1.58113883 0.
  0.        ]
 [0.         0.         0.         0.         1.58113883 0.
  0.        ]
 [0.         0.         1.58113883 1.58113883 0.         0.71014902
  0.        ]
 [0.         2.29128785 0.         0.         0.71014902 0.
  0.70871215]
 [3.         0.         0.         0.         0.         0.70871215
  0.        ]]
Weight of the optimal tree: 9.87242667756211
Processing matrix from file: mst6.txt
No ultrametric tree found.
Processing matrix from file: matrix8.txt
[[ 0.          0.          0.          0.          0.          0.
   0.         14.36140662  0.        ]
 [ 0.          0.          0.          0.          0.          0.
  10.60660172  0.          0.        ]
 [ 0.          0.          0