In [1]:
import h5py
import numpy as np
from itertools import combinations
from sklearn.cluster import KMeans

In [2]:
def load_h5_file(file, num_cost_matrices):
    """Loads the train and test embeddings"""
    with h5py.File(file, 'r') as f:
        cost_matrices = {i:np.array(f[f'cost_matrix_{i}']) for i in range(num_cost_matrices)}
    return cost_matrices


In [3]:
len_dataset = 188
num_cost_matrices = len(list(combinations(range(len_dataset), r=2)))

print(f'Number of cost matrices: {num_cost_matrices}')

cost_matrices = load_h5_file('../data/cost_matrices.h5', num_cost_matrices)

Number of cost matrices: 17578


In [7]:
cost_matrices[0]

array([[0.13855484, 0.13845885, 1.84178903, 8.33910386, 4.33924013,
        1.54351854, 1.62207349, 1.62207708, 4.26150579, 8.12694368,
        1.97717433, 0.70526678, 1.67934353, 1.01150449, 4.58241239,
        4.76704108, 4.76704108],
       [1.36842527, 1.36843859, 1.13961124, 7.90360381, 3.57798544,
        0.53724399, 0.73316307, 0.73315077, 3.48328601, 7.67941813,
        1.34748111, 2.10670704, 2.37059793, 0.71430031, 4.63460039,
        5.01832703, 5.01832703],
       [2.45242473, 2.45243216, 2.27469461, 7.58660316, 2.93104686,
        2.04664173, 2.10657052, 2.10656624, 2.81480159, 7.35370117,
        2.38405153, 3.00860427, 3.24260571, 2.11382245, 5.16500119,
        5.23503958, 5.23503958],
       [1.36098197, 1.36099536, 1.1403106 , 7.90370468, 3.57820825,
        0.5387259 , 0.73424967, 0.73423739, 3.48351488, 7.67952195,
        1.34807263, 2.09980534, 2.36722034, 0.71541556, 4.6336976 ,
        5.01794937, 5.01794937],
       [0.27703624, 0.27700247, 1.96837194, 8.436694

In [5]:
def sub_matrice_generator_k_means(matrix, row_num_clusters, col_num_clusters):
    cluster_matrices = []
    kmeans_row = KMeans(n_clusters=row_num_clusters)
    kmeans_col = KMeans(n_clusters=col_num_clusters)
    kmeans_row.fit(matrix)
    kmeans_col.fit(matrix.T)
    row_clusters = kmeans_row.labels_
    col_clusters = kmeans_col.labels_
    row_ids = np.arange(row_num_clusters)
    col_ids = np.arange(col_num_clusters)
    for row_id in row_ids:
        for col_id in col_ids:
            row_indices = np.where(row_clusters == row_id)[0]
            col_indices = np.where(col_clusters == col_id)[0]

            sub_matrix = matrix[row_indices, :][:, col_indices]
            cluster_matrices.append(sub_matrix)
    return cluster_matrices

In [6]:
sub_matices = []
for i in range(num_cost_matrices):
    sub_matices.append(sub_matrice_generator_k_means(cost_matrices[i], 2, 2))

In [14]:
sub_matices[0][0].shape

(9, 15)

In [8]:
sub_matices[0][0]

array([[0.13855484, 0.13845885, 1.84178903, 4.33924013, 1.54351854,
        1.62207349, 1.62207708, 4.26150579, 1.97717433, 0.70526678,
        1.67934353, 1.01150449, 4.58241239, 4.76704108, 4.76704108],
       [1.36842527, 1.36843859, 1.13961124, 3.57798544, 0.53724399,
        0.73316307, 0.73315077, 3.48328601, 1.34748111, 2.10670704,
        2.37059793, 0.71430031, 4.63460039, 5.01832703, 5.01832703],
       [2.45242473, 2.45243216, 2.27469461, 2.93104686, 2.04664173,
        2.10657052, 2.10656624, 2.81480159, 2.38405153, 3.00860427,
        3.24260571, 2.11382245, 5.16500119, 5.23503958, 5.23503958],
       [1.36098197, 1.36099536, 1.1403106 , 3.57820825, 0.5387259 ,
        0.73424967, 0.73423739, 3.48351488, 1.34807263, 2.09980534,
        2.36722034, 0.71541556, 4.6336976 , 5.01794937, 5.01794937],
       [0.27703624, 0.27700247, 1.96837194, 4.50169187, 1.6925447 ,
        1.76448533, 1.76448758, 4.42680932, 2.09559409, 0.54436064,
        1.60346112, 1.12758955, 4.61384684, 

In [9]:
sub_matices[0][1]

array([[8.33910386, 8.12694368],
       [7.90360381, 7.67941813],
       [7.58660316, 7.35370117],
       [7.90370468, 7.67952195],
       [8.43669469, 8.22705119],
       [8.58986845, 8.38405579],
       [8.70719146, 8.50421691],
       [8.74686992, 8.54484146],
       [8.69059557, 8.48722194]])

In [10]:
sub_matices[0][2]

array([[3.8571529 , 3.85728103, 4.27279025, 5.99395358, 4.25690318,
        4.26181226, 4.26175755, 5.96715666, 4.31855516, 3.80540993,
        3.91078517, 4.06897398, 2.5126827 , 4.06436983, 4.06436983],
       [4.60078965, 4.60090267, 4.63830696, 6.05458009, 4.64374796,
        4.65495218, 4.65489949, 6.03570288, 4.65445803, 4.71905684,
        4.9471019 , 4.59811514, 0.08263569, 3.74822884, 3.74822884],
       [4.80277771, 4.80288164, 4.89554476, 5.94933366, 4.98596714,
        4.9846615 , 4.98461426, 5.95108043, 4.87904504, 4.80001732,
        5.07339182, 4.93200601, 3.7592952 , 0.04097388, 0.04097388],
       [4.80277771, 4.80288164, 4.89554476, 5.94933366, 4.98596714,
        4.9846615 , 4.98461426, 5.95108043, 4.87904504, 4.80001732,
        5.07339182, 4.93200601, 3.7592952 , 0.04097388, 0.04097388]])

In [11]:
sub_matices[0][3]

array([[9.05244965, 8.86589131],
       [8.83470472, 8.65349668],
       [8.37226051, 8.19741178],
       [8.37226051, 8.19741178]])