In [1]:
import pandas as pd
import numpy as np

df = pd.DataFrame(
    dict(
        alpha=[9, 10, 1, 6, 1],
        beta=[3, 2, 9, 5, 10],
        gamma=[7, 9, 4, 5, 3],
    )
)

indices = ["p53", "mdm2", "bcl2", "cylinE", "Caspade"]
df.index = indices
df

Unnamed: 0,alpha,beta,gamma
p53,9,3,7
mdm2,10,2,9
bcl2,1,9,4
cylinE,6,5,5
Caspade,1,10,3


In [2]:
from scipy.spatial import distance_matrix


def calculate_global_min(dm):
    data = np.triu(dm)

    min_val = np.nanmin(data[np.nonzero(data)])
    position = [dm.index[val[0]] for val in np.where(data == min_val)]
    return min_val, position


original_dm = distance_matrix(df, df, p=2)
original_dm = pd.DataFrame(
    original_dm, index=df.index, columns=df.index
).round(2)
original_dm.index = np.arange(5).astype(str)
original_dm.columns = np.arange(5).astype(str)
original_dm

Unnamed: 0,0,1,2,3,4
0,0.0,2.45,10.44,4.12,11.36
1,2.45,0.0,12.45,6.4,13.45
2,10.44,12.45,0.0,6.48,1.41
3,4.12,6.4,6.48,0.0,7.35
4,11.36,13.45,1.41,7.35,0.0


In [3]:
data = original_dm.copy()
data.index = indices
data.columns = indices
data

Unnamed: 0,p53,mdm2,bcl2,cylinE,Caspade
p53,0.0,2.45,10.44,4.12,11.36
mdm2,2.45,0.0,12.45,6.4,13.45
bcl2,10.44,12.45,0.0,6.48,1.41
cylinE,4.12,6.4,6.48,0.0,7.35
Caspade,11.36,13.45,1.41,7.35,0.0


In [4]:
def clean_position(position):
    pos = []
    for p in position:
        pos.extend(p.split(","))
    return pos

In [5]:
def new_iteration(dm, original_dm, linkage=np.nanmean):
    min_val, position = calculate_global_min(dm)
    print(f"El valor mínimo encontrado es: {min_val}")
    print(f"Clusters a fusionar: {position}")
    non_position = [col for col in dm.columns if col not in position]
    print(f"Clusters que no se fusionan: {non_position}")
    new_position = ",".join(position)
    new_dm = dm.copy()
    values = []
    clean_pos = clean_position(position)
    for n_p in non_position:
        n_p = n_p.split(",")
        v = linkage(original_dm.loc[n_p, clean_pos])
        values.append(v)

    new_dm[new_position] = pd.Series(values, index=non_position)
    new_dm = new_dm.T
    new_dm[new_position] = pd.Series(values, index=non_position)
    return new_dm.drop(index=position, columns=position)


dm_1 = new_iteration(original_dm, original_dm)
dm_1

El valor mínimo encontrado es: 1.41
Clusters a fusionar: ['2', '4']
Clusters que no se fusionan: ['0', '1', '3']


Unnamed: 0,0,1,3,"2,4"
0,0.0,2.45,4.12,10.9
1,2.45,0.0,6.4,12.95
3,4.12,6.4,0.0,6.915
24,10.9,12.95,6.915,


In [6]:
dm_2 = new_iteration(dm_1, original_dm)
dm_2

El valor mínimo encontrado es: 2.45
Clusters a fusionar: ['0', '1']
Clusters que no se fusionan: ['3', '2,4']


Unnamed: 0,3,"2,4","0,1"
3,0.0,6.915,5.26
24,6.915,,11.925
1,5.26,11.925,


In [7]:
dm_3 = new_iteration(dm_2, original_dm)
dm_3

El valor mínimo encontrado es: 5.26
Clusters a fusionar: ['3', '0,1']
Clusters que no se fusionan: ['2,4']


Unnamed: 0,"2,4","3,0,1"
24,,10.255
301,10.255,
