# **SOM** - Linear map & Sampled initialization of weights

## Import libraries

In [None]:
import numpy as np
from keras.datasets import mnist
from matplotlib import pyplot as plt
import time
from math import exp



## Load the dataset

In [None]:
iterations = 20

(_, _), (test_images, test_labels) = mnist.load_data()

reduced_mnist_images = [test_images[np.random.choice(a=np.where(test_labels == j)[0], size=100)] for j in range(10)]
reduced_mnist_images = np.array(reduced_mnist_images, dtype='float32') / 255

reduced_mnist_images = reduced_mnist_images.reshape((1000, 784))
reduced_mnist_labels = np.array([100*[i] for i in range(10)], dtype='int').ravel()

shuffled_indices = np.random.permutation(np.arange(1000))

reduced_mnist_images = reduced_mnist_images[shuffled_indices]
reduced_mnist_labels = reduced_mnist_labels[shuffled_indices]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


## Implementation

In [None]:
def learning_rate_generator_with_decay():
   global iterations
   time = 0
   while True:
       yield (1 - (time/iterations))
       time += 1

In [None]:
# Define the weight matrix
# and fill it with random samples from reduced MNIST set (625 random of 1000 samples)
# the dimensions are 784 (input size) by 625 (neurons/clusters)
som_weight_matrix = reduced_mnist_images[np.random.choice(np.arange(1000), size=625, replace=False)].transpose()

In [None]:
lr_with_decay = learning_rate_generator_with_decay()    # instantiate a lr generator

weights_snapshot = {0 : som_weight_matrix.copy()}   # snapshot for some epochs
coverage_history = np.zeros((iterations+1, 625), dtype=bool)

winning_counts = dict()
winning_classes_counts = dict()

for epoch in range(iterations):

    lr = next(lr_with_decay)    # Get new learning rate value

    start = time.time()     # start the timer

    for x in range(1000):    

        # Calculate Euclidean distance for all neurons
        distance_matrix = np.sqrt(((som_weight_matrix.transpose() - reduced_mnist_images[x])**2).sum(axis = 1))

        # Find index of minimum distance
        winner_unit = np.argmin(distance_matrix)
        coverage_history[epoch, winner_unit] = True

        # Update the neuron's weights
        dif = reduced_mnist_images[x] - som_weight_matrix[:, winner_unit]
        som_weight_matrix[:, winner_unit] += (dif * lr)
        
        # If it is the last epoch, we want to store the winning statistics of neurons
        if epoch + 1 == iterations:
            winning_counts[winner_unit] = winning_counts.get(winner_unit, 0) + 1
            winning_classes_counts[(winner_unit, reduced_mnist_labels[x])] = winning_classes_counts.get((winner_unit, reduced_mnist_labels[x]), 0) + 1
    
    end = time.time()
    print("epoch %d - %.3f s" % (epoch + 1, end - start))

    weights_snapshot[epoch + 1] = som_weight_matrix.copy()



epoch 1 - 1.047 s
epoch 2 - 0.859 s
epoch 3 - 0.868 s
epoch 4 - 0.874 s
epoch 5 - 0.887 s
epoch 6 - 0.894 s
epoch 7 - 0.904 s
epoch 8 - 0.920 s
epoch 9 - 0.935 s
epoch 10 - 0.955 s
epoch 11 - 0.949 s
epoch 12 - 0.962 s
epoch 13 - 0.979 s
epoch 14 - 1.002 s
epoch 15 - 1.001 s
epoch 16 - 0.999 s
epoch 17 - 1.020 s
epoch 18 - 0.900 s
epoch 19 - 0.883 s
epoch 20 - 1.031 s


### Show twenty neurons with the most winnings at the last epoch

In [None]:
twenty_neurons_with_most_winning = [k for k, v in sorted(winning_counts.items(), key=lambda item: item[1], reverse = True)][:20]

stat_table = np.zeros((20,10), dtype='int')

for i, neu in enumerate(twenty_neurons_with_most_winning):
    for class_label in range(0, 10):
        stat_table[i][class_label] = winning_classes_counts.get((neu, class_label), 0)

import pandas as pd 

df = pd.DataFrame(stat_table, index=twenty_neurons_with_most_winning, columns=['label {0}'.format(i) for i in range(0,10)])
df['Total'] = df.sum(axis=1)
print("Total sum:  %d images" % df['Total'].sum())
df

Total sum:  106 images


Unnamed: 0,label 0,label 1,label 2,label 3,label 4,label 5,label 6,label 7,label 8,label 9,Total
516,0,0,0,0,0,0,0,0,0,10,10
271,0,0,0,0,0,0,7,0,0,0,7
597,0,0,0,0,0,0,0,0,0,6,6
207,0,6,0,0,0,0,0,0,0,0,6
552,0,0,0,0,0,0,0,0,0,6,6
123,0,0,0,0,0,0,0,0,6,0,6
614,0,5,0,0,0,0,0,0,0,0,5
527,0,0,0,5,0,0,0,0,0,0,5
188,0,0,0,0,0,5,0,0,0,0,5
235,0,0,0,0,1,0,0,0,0,4,5


### Generate and save figures

In [None]:
import os
os.makedirs('./linear-topology_sampled-weights/', exist_ok=True)

In [None]:
for epoch, weight_mat in weights_snapshot.items():
    fig = plt.figure(figsize=(17,17))
    axes = [fig.add_subplot(25,25,i+1) for i in range(625)]

    for n, ax in enumerate(axes):
        ax.imshow(weight_mat[:,n].reshape((28,28)), cmap=("gray_r" if coverage_history[epoch, n] == False else "plasma"))

        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        if n >= 600: ax.set_xlabel(n%25 + 1)
        if n%25==0: ax.set_ylabel(n//25 + 1)
        ax.tick_params(axis='both', which='both', length=0)
        ax.label_outer

    fig.subplots_adjust(wspace=0, hspace=0)
    fig.suptitle("Epoch %d - [Linear Topology , Sampled Weights Initialization]" % epoch, fontsize=18)

    # write the figure for this epoch on disk
    path = f"./linear-topology_sampled-weights/epoch-{epoch}"
    fig.savefig(path)
    plt.close(fig)

    print(f"Saved: {path}.png")

Saved: ./linear-topology_sampled-weights/epoch-0.png
Saved: ./linear-topology_sampled-weights/epoch-1.png
Saved: ./linear-topology_sampled-weights/epoch-2.png
Saved: ./linear-topology_sampled-weights/epoch-3.png
Saved: ./linear-topology_sampled-weights/epoch-4.png
Saved: ./linear-topology_sampled-weights/epoch-5.png
Saved: ./linear-topology_sampled-weights/epoch-6.png
Saved: ./linear-topology_sampled-weights/epoch-7.png
Saved: ./linear-topology_sampled-weights/epoch-8.png
Saved: ./linear-topology_sampled-weights/epoch-9.png
Saved: ./linear-topology_sampled-weights/epoch-10.png
Saved: ./linear-topology_sampled-weights/epoch-11.png
Saved: ./linear-topology_sampled-weights/epoch-12.png
Saved: ./linear-topology_sampled-weights/epoch-13.png
Saved: ./linear-topology_sampled-weights/epoch-14.png
Saved: ./linear-topology_sampled-weights/epoch-15.png
Saved: ./linear-topology_sampled-weights/epoch-16.png
Saved: ./linear-topology_sampled-weights/epoch-17.png
Saved: ./linear-topology_sampled-weigh

In [None]:
!zip -r linear_sampled.zip /content/linear-topology_sampled-weights

  adding: content/linear-topology_sampled-weights/ (stored 0%)
  adding: content/linear-topology_sampled-weights/epoch-20.png (deflated 11%)
  adding: content/linear-topology_sampled-weights/epoch-14.png (deflated 1%)
  adding: content/linear-topology_sampled-weights/epoch-13.png (deflated 1%)
  adding: content/linear-topology_sampled-weights/epoch-3.png (deflated 1%)
  adding: content/linear-topology_sampled-weights/epoch-5.png (deflated 1%)
  adding: content/linear-topology_sampled-weights/epoch-4.png (deflated 1%)
  adding: content/linear-topology_sampled-weights/epoch-10.png (deflated 1%)
  adding: content/linear-topology_sampled-weights/epoch-6.png (deflated 1%)
  adding: content/linear-topology_sampled-weights/epoch-2.png (deflated 1%)
  adding: content/linear-topology_sampled-weights/epoch-0.png (deflated 1%)
  adding: content/linear-topology_sampled-weights/epoch-18.png (deflated 1%)
  adding: content/linear-topology_sampled-weights/epoch-11.png (deflated 1%)
  adding: content/