In [65]:
from copy import deepcopy
from itertools import product

class WuPGMA:
    def __init__(self, distances, U=False):
        self.distances = distances
        self.strings = {}
        self.U = U
        self.last_key = None  # used for output
    
    def __getitem__(self, item):
        a, b = item
        if a == b:
            return 0
        try:
            dist = self.distances[a][b]
        except KeyError:
            dist = self.distances[b][a]
            self.distances[a][b] = dist
        finally:
            return dist
    
    def unify(self, a, b, dist):
        new = (a, b)
        new_dist = {}
        
        # compute ditance from new Node to other existing nodes
        for el in self.distances.keys():
            if el in (a, b):
                continue  # because we remove nodes a, b from table
            
            if self.U:
                new_dist[el] = (self[el, a] * len(a) + self[el, b] * len(b)) / (len(a) + len(b))
            else:
                new_dist[el] = (self[el, a] + self[el, b]) / 2 
            
            if a in self.distances[el].keys():  # remove a, b from table
                self.distances[el].pop(a)
            if b in self.distances[el].keys():
                self.distances[el].pop(b)
            
        for n in new:  # remove a, b form table again (because we keep both dist[a, b] and dist[b, a])
            if n in self.distances.keys():
                self.distances.pop(n)
        
        self.distances[new] = new_dist  # add new row to table. __getitem__ will take care of the rest
        
        # distance form nodes a, b to new node
        dist_a = dist_b = dist / 2
            
        # Newick formatted string of subtree with root at new = (a, b)
        dist_a, dist_b = round(dist_a, 2), round(dist_b, 2)
        
        if len(a) == 1 and len(b) == 1:
            s = '{}:{},{}:{}'.format(a, dist_a, b, dist_b)
            dist_a, dist_b = round(dist_a, 2), round(dist_b, 2)
            
        elif len(a) > 1 and len(b) == 1:
            st, d = self.strings[a]
            dist_a = round(dist_a - d, 2)
            dist_b = round(dist_b, 2)
            s = '({}):{},{}:{}'.format(st, dist_a, b, dist_b)
            
        elif len(a) == 1 and len(b) > 1:
            st, d = self.strings[b]
            dist_a = round(dist_a, 2)
            dist_b = round(dist_b - d, 2)
            
            s = '{}:{},({}):{}'.format(a, dist_a, st, dist_b)
        else:
            st_a, d_a = self.strings[a]
            st_b, d_b = self.strings[b]
            
            dist_a = round(dist_a - d_a, 2)
            dist_b = round(dist_b - d_b, 2)
            s = '({}):{},({}):{}'.format(st_a, dist_a, st_b, dist_b)
        
        self.strings[new] = (s, dist/2)
        self.last_key = new
    
    def find_min_distance(self):
        min_dist = float('inf')
        for el1, others in self.distances.items():
            for el2, dist in others.items():
                # print(el1, el2, dist)
                if el1 == el2:
                    continue
                if dist < min_dist:
                    min_dist = dist
                    a, b = el1, el2
        # print(min_dist)
        if min_dist < float('inf'):
            return a, b, min_dist     
    
    def brrr(self):
        while self.find_min_distance():
            self.unify(*self.find_min_distance())
        return '(' + self.strings[self.last_key][0] + ')'

---
---

## 1. WPGMA

In [66]:
test1 = {
    'A': { 'B': 16, 'C': 16, 'D': 10 },
    'B': { 'C': 8, 'D': 8},
    'C': { 'D': 4},
    'D': {}
}
WPGMA1 = WuPGMA(deepcopy(test1), U=False)
print(WPGMA1.brrr())

(((C:2.0,D:2.0):2.0,B:4.0):3.25,A:7.25)


In [67]:
test2 = {
    'A': {'B': 5, 'C':  4, 'D': 7, 'E': 6, 'F': 8},
    'B': {'C': 7, 'D': 10, 'E': 9, 'F': 11},
    'C': {'D': 7, 'E': 6, 'F': 8},
    'D': {'E': 5, 'F': 9},
    'E': {'F': 8},
    'F': {}
}
WPGMA2 = WuPGMA(deepcopy(test2), U=False)
print(WPGMA2.brrr())

((((A:2.0,C:2.0):1.0,B:3.0):1.0,(D:2.5,E:2.5):1.5):0.5,F:4.5)


## 2. UPGMA

In [68]:
test1 = {
    'A': { 'B': 16, 'C': 16, 'D': 10 },
    'B': { 'C': 8, 'D': 8},
    'C': { 'D': 4},
    'D': {}
}
UPGMA1 = WuPGMA(deepcopy(test1), U=True)
print(UPGMA1.brrr())

(((C:2.0,D:2.0):2.0,B:4.0):3.0,A:7.0)


In [69]:
test2 = {
    'A': {'B': 5, 'C':  4, 'D': 7, 'E': 6, 'F': 8},
    'B': {'C': 7, 'D': 10, 'E': 9, 'F': 11},
    'C': {'D': 7, 'E': 6, 'F': 8},
    'D': {'E': 5, 'F': 9},
    'E': {'F': 8},
    'F': {}
}
UPGMA2 = WuPGMA(deepcopy(test2), U=True)
print(UPGMA2.brrr())

((((A:2.0,C:2.0):1.0,B:3.0):0.75,(D:2.5,E:2.5):1.25):0.63,F:4.38)


### Проверка того, что объекты возвращают правильное расстояние

In [9]:
test1 = {
    'A': { 'B': 16, 'C': 16, 'D': 10 },
    'B': { 'C': 8, 'D': 8},
    'C': { 'D': 4},
    'D': {}
}


elements = 'ABCD'
Test1 = WuPGMA(deepcopy(test1), U=False)
for a, b in product(elements, elements):
    assert Test1[a, b] == Test1[b, a]
assert Test1['D', 'B'] == 8
assert Test1['C', 'A'] == 16
assert Test1['C', 'D'] == 4

test2 = {
    'A': {'B': 5, 'C':  4, 'D': 7, 'E': 6, 'F': 8},
    'B': {'C': 7, 'D': 10, 'E': 9, 'F': 11},
    'C': {'D': 7, 'E': 6, 'F': 8},
    'D': {'E': 5, 'F': 9},
    'E': {'F': 8},
    'F': {}
}

elements = 'ABCDEF'
Test2 = WuPGMA(deepcopy(test2), U=True)
for a, b in product(elements, elements):
    assert Test2[a, b] == Test2[b, a]
assert Test2['D', 'B'] == 10
assert Test2['C', 'A'] == 4
assert Test2['C', 'D'] == 7