In [1]:
!pip install minisom



In [2]:
from sklearn.metrics import silhouette_score
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
from minisom import MiniSom
from math import exp, log

import numpy as np

In [3]:
class SOM():
  def __init__(self, features, clusters, learning_rate=5e-1, sigma=0.3):
    self._lr = learning_rate
    self._eta = learning_rate
    self._feat = features
    self._c = clusters
    self._sigma0 = sigma
    self._sigma = sigma
    self._t1 = 1000 / log(sigma)
    self._t2 = 1000
    self._weights = np.random.default_rng().standard_normal((features, clusters))
  
  def __activate(self, x):
    x = x.reshape((self._feat, 1))
    dist = np.linalg.norm(self._weights - x, axis=0)
    return dist.argmin()

  def __dw(self, x, winner):
    x = x.reshape((self._feat, 1))
    dij2 = self.__dij_square(winner)
    h = self.__gaussian(dij2)
    return self._eta * h * (x - self._weights)

  def __dij_square(self, winner):
    d = [(i - winner) ** 2 for i in range(self._c)]
    return np.array(d).reshape((1, -1))

  def __gaussian(self, dij):
    sigma2 = -1 * self._sigma ** 2
    return np.exp(dij / sigma2)

  def predict(self, X):
    clusters = list()
    for x in X:
      y = self.__activate(x)
      clusters.append(y)
    return clusters

  def fit(self, X, epochs=5000):
    for j in range(epochs):
      # p = int(j * 50 / epochs)
      # for i, x in enumerate(X):
      for x in X:
        winner = self.__activate(x)
        del_w = self.__dw(x, winner)
        self._weights += del_w
      if self._eta > 1e-2:
        self._eta = self._lr * exp(-j / self._t2)
      self._sigma = self._sigma0 * exp(-j / self._t1)
        # print(f"({i+1:3d}/{X.shape[0]}) {'='*p}>{'='*(50 - p)}", end="\r")



 ## Kmeans from previous assignment

class kMeans():
  def __init__(self, dimension, k):
    self._dimension = dimension
    self._clusters = k
    self._prototypes = np.zeros((k, dimension))

  def __sample_with_prob(self, sample_space, probability, num_samples=1):
    idx = [i for i in range(sample_space.shape[0])]
    pos = np.random.choice(idx, p=probability)

    return sample_space[pos]

  def __initialize_prototypes(self, X):
    probs = None
    center = self.__sample_with_prob(X, probs)
    self._prototypes[0, :] = center

    for i in range(1, self._clusters):
      probs = self.__nearest_cluster_dist(X, i)
      probs = [j ** 2 for j in probs]
      tot = sum(probs)
      probs = [j/tot for j in probs]
      center = self.__sample_with_prob(X, probs)
      self._prototypes[i, :] = center

  def __nearest_cluster_dist(self, X, i=None, clust_assign=False):
    if i is not None:
      prototypes = self._prototypes[0:i, :]
    else:
      prototypes = self._prototypes
    dists = list()
    for x in X:
      temp = []
      for cluster in prototypes:
        temp.append(np.linalg.norm(x - cluster))
      if clust_assign:
        dists.append(np.argmin(temp))
      else:
        dists.append(np.min(temp))
    return dists
  
  def predict(self, X):
    return self.__nearest_cluster_dist(X, clust_assign=True)

  def __cluster_means(self, X, c):
    c = np.array(c)
    centroids = np.zeros(self._prototypes.shape)
    for k in range(self._clusters):
      cluster_group = X[c == k, :]
      centroids[k, :] = cluster_group.mean(axis=0)
    return centroids

  def __SSE(self, X, c):
    sse = 0
    for i, x in enumerate(X):
      sse += np.linalg.norm(x - self._prototypes[c[i]]) ** 2
    return sse

  def fit(self, X, max_iterations=3000):
    self.__initialize_prototypes(X)
    current = self._prototypes

    for i in range(max_iterations):
    # while True:
      c = self.__nearest_cluster_dist(X, clust_assign=True)
      centroids = self.__cluster_means(X, c)
      
      if np.array_equal(centroids, self._prototypes):
        # print(f"Converged in {i+1} iterations")
        return self.__SSE(X, c)

      self._prototypes = centroids
    
    print(f"Reached Max Iterations but not converged.")
    return self.__SSE(X, c)

In [4]:
X, _ = load_iris(return_X_y=True)

In [5]:
# Custom SOM Implementation
model = SOM(X.shape[1], clusters=3)
model.fit(X)
preds = model.predict(X)
score = silhouette_score(X, preds)
print(f"SOM Custom Implementation        K: {3}, Silhouette Score: {score}")


# SOM Library Implementation
model = MiniSom(3, 1, X.shape[1], sigma=0.3, learning_rate=0.5)
model.train(X, 500)
preds = list()
for x in X:
  c, _ = model.winner(x)
  preds.append(c)
score = silhouette_score(X, preds)
print(f"SOM Library Implementation       K: {3}, Silhouette Score: {score}")


# Custom KMeans implementation from previous assignment
model = kMeans(X.shape[1], 3)
model.fit(X)
preds = model.predict(X)
score = silhouette_score(X, preds)
print(f"KMeans Custom Implementation       K: {3}, Silhouette Score: {score}")


# Sklearn KMeans Implementation
model = KMeans(n_clusters=3, random_state=42, max_iter=3000)
model.fit(X)
preds = model.labels_
score = silhouette_score(X, preds)
print(f"scikit Learn KMeans Implementation       K: {3}, Silhouette Score: {score}")

SOM Custom Implementation        K: 3, Silhouette Score: 0.5024364223221099
SOM Library Implementation       K: 3, Silhouette Score: 0.5408533757076138
KMeans Custom Implementation       K: 3, Silhouette Score: 0.5511916046195916
scikit Learn KMeans Implementation       K: 3, Silhouette Score: 0.5528190123564091
