In [9]:
%matplotlib inline
import scipy.io
import numpy as np
from sklearn.model_selection import train_test_split

from minisom import MiniSom
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt

## Data Exploration

In [3]:
# Load Data
train_x = scipy.io.loadmat('train_data.mat')['data_train']
train_y = scipy.io.loadmat('train_label.mat')['label_train']
test_x = scipy.io.loadmat('test_data.mat')['data_test']

## Main functions and classes

In [4]:
# Helper functions
def exponential_decay(parameter, n, t1):
    if parameter >0.1: t1 = 1000/np.log(t1)
    return parameter * np.exp(-1 * n / t1)

def asymptotic_decay(parameter, t, max_iter):
    return parameter / (1+t/(max_iter/2))

def get_distance(x1, x2):
    out = (x1 - x2)**2
    out = out.sum()
    return np.sqrt(out)

In [5]:
# Class definition for SOM network
class MiniSomv2(MiniSom):       
    def random_weights_init(self, data):
        it = np.nditer(self._activation_map, flags=['multi_index'])
        
        while not it.finished:
            rand_i = np.random.randint(len(data))
            self._weights[it.multi_index] = data[rand_i]
            it.iternext()
        
    def update_converge(self, x, win, lr):
        winner = self._weights[win[0],win[1]]
        self._weights[win[0],win[1]] += (x - winner) * lr
       
    def converge(self, data, num_iteration, lr = 0.01, random=False):
        iterations =  np.arange(num_iteration) % len(data)
        if random: np.random.shuffle(iterations)
       
        for iteration in iterations:
            self.update_converge(data[iteration], self.winner(data[iteration]), lr = lr)
     
    def train(self, data, num_iteration, random=False):
        iterations =  np.arange(num_iteration) % len(data)
        if random: np.random.shuffle(iterations)
            
        for t, iteration in enumerate(iterations):
            self.update(data[iteration], self.winner(data[iteration]),
                        t, num_iteration)

In [14]:
# Class Definition for RBF network
class RBF:
    def __init__(self, X, y, centroids, sigma = 1.0):
        self.X, self.y, self.centroids, self.sigma = X, y, centroids, sigma
        
    def one_hot(self,x):
        arr = np.concatenate (((x == -1),(x == 1)), axis= 1)
        return arr.astype(int)

    def rbf(self, x, c):
        d = get_distance(x, c)
        return np.exp(-0.5 * (d**2) / self.sigma **2)

    def rbf_list(self, X, centroids):
        RBF_list = []
        for x in X:
            RBF_list.append([self.rbf(x, c) for c in centroids])
        return np.array(RBF_list)
    
    def fit(self):
        RBF_X = self.rbf_list(self.X, self.centroids)
        self.w = np.linalg.pinv(RBF_X.T @ RBF_X) @ RBF_X.T @ self.one_hot(self.y)
    
    def evaluate(self, data_x = None, data_y = None):
        # Evaluate RBF on training data
        if data: 
            preds = self.predict(data_x).reshape(data_x.shape[0],1)
            diff = preds - data_y
            acc = (diff==0).sum() / len(diff)
            return acc
        else:
            preds = self.predict(self.X).reshape(330,1)
            diff = preds - self.y
            acc = (diff==0).sum() / len(diff)
            print(f'Accuracy on Training Data is: {acc*100:0.2f}%')
    
    def predict(self, data):
        rbf_data = self.rbf_list(data, self.centroids)
        preds = rbf_data @ self.w
        
        preds = np.argmax(preds, axis = 1)
        preds[preds==0] = -1

        return preds

## Setup of Looping tester

In [18]:
seed = 1337
data = train_test_split(train_x, train_y, test_size = 0.3, random_state = seed)

In [100]:
def evaluate_network(data, trials, size, decay_fn, som_iterations, som_sigma = None, rbf_sigma = None):
    X_train, X_valid, y_train, y_valid = data
    
    accuracy = 0
    
    if som_sigma: som_sigma = som_sigma
    else: som_sigma = np.sqrt(2*(size-1)**2) * 0.5
        
    if rbf_sigma: rbf_sigma = rbf_sigma
    else: rbf_sigma = 1
        
    for i in range(trials):
        som = MiniSomv2(size, size, input_len = X_train.shape[1], 
                decay_function=decay_fn,
                sigma=som_sigma, learning_rate=0.1) 
        
        som.random_weights_init(X_train)
        som.train(X_train, som_iterations, random = True) 
        som.converge(X_train, som_iterations*8, lr = 0.01, random = True)

        # Extract SOM outputs as 16 centre vectors
        centroids = som.get_weights()
        centroids = centroids.reshape(size**2,-1)

        # Create, train and evaluate RBF network
        rbfnet = RBF(X_train, y_train, centroids, sigma = rbf_sigma)
        rbfnet.fit()
        accuracy += rbfnet.evaluate(X_valid, y_valid)
    accuracy = accuracy/trials
    print(f'Average accuracy after {trials} trials is: {accuracy*100:0.2f}%')
        
    

### Baseline

In [77]:
# BASELINE
evaluate_network(data, 100, 4, exponential_decay, 1000, som_sigma = 2.121, rbf_sigma = 1)

Average accuracy after 100 trials is: 80.79%


### Changing SOM size

In [78]:
evaluate_network(data, 100, 2, exponential_decay, 1000)

Average accuracy after 100 trials is: 83.11%


In [80]:
evaluate_network(data, 100, 3, exponential_decay, 1000)

Average accuracy after 100 trials is: 79.70%


In [81]:
evaluate_network(data, 100, 5, exponential_decay, 1000)

Average accuracy after 100 trials is: 83.68%


In [82]:
evaluate_network(data, 100, 6, exponential_decay, 1000)

Average accuracy after 100 trials is: 83.98%


### Changing decay function

In [83]:
evaluate_network(data, 100, 4, asymptotic_decay, 1000)

Average accuracy after 100 trials is: 79.89%


### Changing SOM training iterations

In [84]:
evaluate_network(data, 100, 4, exponential_decay, 500)

Average accuracy after 100 trials is: 75.98%


In [85]:
evaluate_network(data, 100, 4, exponential_decay, 1500)

Average accuracy after 100 trials is: 84.07%


In [86]:
evaluate_network(data, 100, 4, exponential_decay, 2000)

Average accuracy after 100 trials is: 84.78%


In [87]:
evaluate_network(data, 100, 4, exponential_decay, 3000)

Average accuracy after 100 trials is: 86.49%


### Changing SOM Sigma value

In [89]:
evaluate_network(data, 100, 4, exponential_decay, 1000, som_sigma = 0.25)

Average accuracy after 100 trials is: 81.92%


In [90]:
evaluate_network(data, 100, 4, exponential_decay, 1000, som_sigma = 0.5)

Average accuracy after 100 trials is: 83.30%


In [91]:
evaluate_network(data, 100, 4, exponential_decay, 1000, som_sigma = 1)

Average accuracy after 100 trials is: 82.47%


In [92]:
evaluate_network(data, 100, 4, exponential_decay, 1000, som_sigma = 1.5)

Average accuracy after 100 trials is: 80.93%


### Changing RBF Sigma value

In [93]:
evaluate_network(data, 100, 4, exponential_decay, 1000, rbf_sigma = 0.5)

Average accuracy after 100 trials is: 80.12%


In [94]:
evaluate_network(data, 100, 4, exponential_decay, 1000, rbf_sigma = 2)

Average accuracy after 100 trials is: 85.35%


In [95]:
evaluate_network(data, 100, 4, exponential_decay, 1000, rbf_sigma = 5)

Average accuracy after 100 trials is: 88.33%


In [96]:
evaluate_network(data, 100, 4, exponential_decay, 1000, rbf_sigma = 10)

Average accuracy after 100 trials is: 86.52%


### Optimal settings

In [97]:
evaluate_network(data, 100, 4, exponential_decay, 3000, som_sigma = 0.5, rbf_sigma = 5)

Average accuracy after 100 trials is: 88.82%


## Data visualisation for SOM