In [2]:
import numpy as np
import pandas as pd
def show_matrix(matrix):
    for i in range(len(matrix)):
        print()
        for j in range(len(matrix[i])):
            print(matrix[i][j], end='\t\t\t')

In [3]:
def epsilon_insensitive_loss(y_true, y_pred, epsilon):
    # Calculate absolute errors
    errors = np.linalg.norm(np.abs(y_true - y_pred))
    # Apply epsilon-insensitive penalty
    loss = np.maximum(0, errors - epsilon)
    return loss

def eclidean_distance(point1, point2, epsilon):
    return np.sqrt(np.sum(epsilon_insensitive_loss(point1, point2, epsilon)))

def distance(cluster1,cluster2, epsilon):
    return eclidean_distance(cluster1, cluster2, epsilon)

def min_distance(arr1, arr2, X):
    min_dist = float('inf')
    
    for idx1 in arr1:
        for idx2 in arr2:
            dist = distance(X[idx1], X[idx2], epsilon=.01)
            if dist < min_dist:
                min_dist = dist
    
    return min_dist

def distance_mat(clusters, data):
    cluster_len = len(clusters)
    dist_mat = np.zeros((cluster_len, cluster_len))
    for i1 in range(cluster_len):
        for i2 in range(cluster_len):
            if i2 > i1:
                dist_mat[i1, i2] = min_distance(clusters[i1], clusters[i2], data)
            else : 
                dist_mat[i1, i2] = np.inf
    return dist_mat

def min_matrix(matrix):
    min_index = np.argmin(matrix)
    _, cols = matrix.shape
    i, j= min_index // cols, min_index % cols
    return i, j, matrix[i, j]
    
def linkage(clusters, data, num_clusters):
    linkage_matrix = []
    for i in range(num_clusters):
        linkage_matrix.append([])
        print(clusters)
        dist_mat = distance_mat(clusters, data)
        i_min, j_min, min_data = min_matrix(dist_mat)
        linkage_matrix[-1].append(clusters[i_min])
        linkage_matrix[-1].append(clusters[j_min])
        linkage_matrix[-1].append(min_data)
        clusters[i_min] = clusters[i_min] + clusters[j_min]
        clusters.pop(j_min)
        print(dist_mat)
    print()
    print(pd.DataFrame(linkage_matrix))
    return linkage_matrix


def compare_arr(arr1, arr2):
    for i in arr1:
        for j in arr2:
            if i == j:
                return True
    return False



def inconsistency(linkage_matrix):
    inconsistency_matrix = np.zeros(shape=(len(linkage_matrix), 4)).astype('float64')
    for i in range(len(linkage_matrix)):
        data = []
        for j in range(i,-1,-1):
            flags = np.array([
                compare_arr(linkage_matrix[i][0], linkage_matrix[j][0]),
                compare_arr(linkage_matrix[i][0], linkage_matrix[j][1]),
                compare_arr(linkage_matrix[i][1], linkage_matrix[j][0]),
                compare_arr(linkage_matrix[i][1], linkage_matrix[j][1])
                ])
            if flags.any() == True:
                    data.append(linkage_matrix[j][2])
        
        inconsistency_matrix[i][0] = np.mean(data)
        inconsistency_matrix[i][1] = np.std(data)
        inconsistency_matrix[i][2] = len(data)
        inconsistency_matrix[i][3] = 0 if inconsistency_matrix[i,1] == 0 else (linkage_matrix[i][2] - inconsistency_matrix[i,0]) / inconsistency_matrix[i,1]
    print()
    print(pd.DataFrame(inconsistency_matrix))
    # return inconsistency_matrix

In [4]:
from sklearn.datasets import make_blobs

num_clusters = 1
num_samples = 3
X, y = make_blobs(n_samples=num_samples, centers=num_clusters, cluster_std=2, random_state=42)
X += 10

In [5]:
X = [
    [4, 4, 1, 1],
    [3, 0, 1, 1],
    [8, 2, 1, 1],
    [8, 0, 1, 1],
    [8.5, 0, .5, .5]
]
clusters = [
    [0], [1], [2], [3], [4]
]
lm = linkage(clusters, np.asarray(X), 4)
print()
inconsistency(lm)

[[0], [1], [2], [3], [4]]
[[       inf 2.02807929 2.11237685 2.37631106 2.46011744]
 [       inf        inf 2.31844017 2.23383079 2.35271508]
 [       inf        inf        inf 1.4106736  1.47290511]
 [       inf        inf        inf        inf 0.92521641]
 [       inf        inf        inf        inf        inf]]
[[0], [1], [2], [3, 4]]
[[       inf 2.02807929 2.11237685 2.37631106]
 [       inf        inf 2.31844017 2.23383079]
 [       inf        inf        inf 1.4106736 ]
 [       inf        inf        inf        inf]]
[[0], [1], [2, 3, 4]]
[[       inf 2.02807929 2.11237685]
 [       inf        inf 2.23383079]
 [       inf        inf        inf]]
[[0, 1], [2, 3, 4]]
[[       inf 2.11237685]
 [       inf        inf]]

        0          1         2
0     [3]        [4]  0.925216
1     [2]     [3, 4]  1.410674
2     [0]        [1]  2.028079
3  [0, 1]  [2, 3, 4]  2.112377


          0         1    2         3
0  0.925216  0.000000  1.0  0.000000
1  1.167945  0.242729  2.0  1.000000