In [4]:
import numpy as np

In [9]:
def dist(row, centroid):
    return np.round(np.linalg.norm(np.array(row)-np.array(centroid)),1)


def k_means_clustering(data, k, centroids, max_iterations=100, tolerance=1e-4, log=True):



    for _ in range(max_iterations):

        if log: 
            print(f"ITERATION {_+1}")
        
        clusters = [[] for _ in range(k)]
        for i,point in enumerate(data):
            if log: 
                print(f"{i+1}:", end=' ')
            distances = [dist(point, centroid) for centroid in centroids]
            if log: 
                print(f"L2 distances: {distances}", end='--->')
            closest_centroid = np.argmin(distances)
            clusters[closest_centroid].append([i+1,point])
            if log: 
                print(f"Cluster {closest_centroid+1}")
        

        new_centroids = []
        for i in range(k):
            points_for_cluster = [lst[1] for lst in clusters[i]]
            new_centroid = np.round(np.mean(points_for_cluster, axis=0),1)
            new_centroids.append(new_centroid)


        if np.sum(np.abs(np.array(new_centroids) - np.array(centroids))) < tolerance:
            break

        centroids = new_centroids
        
        if log: 
            print()
        
    return clusters, centroids

def Q1(centroids):
    print(f"Initial {centroids1=}\n")
    clusters, centroids = k_means_clustering(data, len(centroids), centroids)
    print("\nClusters:")
    for i, cluster in enumerate(clusters):
        print(f"Colors in Cluster {i+1}:")
        for lst in cluster:
            print(f'{lst[0]}')
        print("")

    print("Centroids:")
    for centroid in centroids:
        print(centroid)

def difference(centroids1, centroids2):
    clusters1, centroids1 = k_means_clustering(data, len(centroids1), centroids1, log=False)
    clusters2, centroids2 = k_means_clustering(data, len(centroids2), centroids2, log=False)
    print(f'Colors that are in different clusters after 2 simulations:')
    
    lst1, lst2 = [], []
    for cluster1, cluster2 in zip(clusters1, clusters2):

        set1, set2 = set(), set()

        for lst in cluster1:
            set1.add(lst[0])
        for lst in cluster2:
            set2.add(lst[0])
        lst1.append(set1)
        lst2.append(set2)
    
    lst1.sort(key=len)
    lst2.sort(key=len)
    
    differences = []
    
    for set1, set2 in zip(lst1,lst2):        
        differences += list(set1^set2)
    
    print(set(differences))
    


In [10]:
data = [[172, 204, 210],
        [167, 119, 70],
        [247, 231, 184],
        [222, 181, 126],
        [98, 109, 128],
        [85, 74, 71],
        [136, 122, 103],
        [116, 80, 58],
        [207, 135, 67],
        [23, 25, 23],
        [132, 156, 179],
        [147, 101, 64],
        [125, 101, 75],
        [162, 139, 108],
        [52, 51, 50],
        [187, 161, 116]
       ]
centroids1 = [data[6], data[8], data[10]]
centroids2 = [data[0], data[1], data[2]]

In [13]:
Q1(centroids1)

Initial centroids1=[[136, 122, 103], [207, 135, 67], [132, 156, 179]]

ITERATION 1
1: L2 distances: [139.5, 162.6, 69.7]--->Cluster 3
2: L2 distances: [45.4, 43.2, 120.3]--->Cluster 2
3: L2 distances: [175.4, 156.5, 137.4]--->Cluster 3
4: L2 distances: [106.8, 76.3, 107.4]--->Cluster 2
5: L2 distances: [47.3, 127.6, 77.2]--->Cluster 1
6: L2 distances: [77.0, 136.5, 143.5]--->Cluster 1
7: L2 distances: [0.0, 80.7, 83.4]--->Cluster 1
8: L2 distances: [64.7, 106.7, 143.8]--->Cluster 1
9: L2 distances: [80.7, 0.0, 136.4]--->Cluster 2
10: L2 distances: [169.1, 218.8, 231.0]--->Cluster 1
11: L2 distances: [83.4, 136.4, 0.0]--->Cluster 3
12: L2 distances: [45.6, 69.0, 128.4]--->Cluster 1
13: L2 distances: [36.7, 89.1, 117.9]--->Cluster 1
14: L2 distances: [31.5, 61.0, 78.9]--->Cluster 1
15: L2 distances: [122.1, 177.1, 184.6]--->Cluster 1
16: L2 distances: [65.5, 59.0, 83.8]--->Cluster 2

ITERATION 2
1: L2 distances: [189.1, 129.9, 23.4]--->Cluster 3
2: L2 distances: [69.2, 48.4, 144.9]--->Cl

In [11]:
Q1(centroids2)

Initial centroids1=[[136, 122, 103], [207, 135, 67], [132, 156, 179]]

ITERATION 1
1: L2 distances: [0.0, 163.9, 83.8]--->Cluster 1
2: L2 distances: [163.9, 0.0, 178.7]--->Cluster 2
3: L2 distances: [83.8, 178.7, 0.0]--->Cluster 3
4: L2 distances: [100.4, 100.0, 80.6]--->Cluster 3
5: L2 distances: [145.7, 90.7, 200.6]--->Cluster 2
6: L2 distances: [209.3, 93.5, 252.3]--->Cluster 2
7: L2 distances: [139.5, 45.4, 175.4]--->Cluster 2
8: L2 distances: [204.0, 65.3, 236.3]--->Cluster 2
9: L2 distances: [162.6, 43.2, 156.5]--->Cluster 2
10: L2 distances: [298.7, 178.3, 344.3]--->Cluster 2
11: L2 distances: [69.7, 120.3, 137.4]--->Cluster 1
12: L2 distances: [180.4, 27.6, 203.2]--->Cluster 2
13: L2 distances: [176.2, 46.0, 209.0]--->Cluster 2
14: L2 distances: [121.4, 43.2, 146.5]--->Cluster 2
15: L2 distances: [251.8, 135.1, 297.3]--->Cluster 2
16: L2 distances: [104.5, 65.4, 114.6]--->Cluster 2

ITERATION 2
1: L2 distances: [34.9, 173.7, 83.3]--->Cluster 1
2: L2 distances: [139.4, 45.8, 139

In [12]:
difference(centroids1, centroids2)

Colors that are in different clusters after 2 simulations:
{2, 3, 7}
