# NNSOM (ver 0.0.3) Test Training


## Notebook Setting

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install --upgrade NNSOM

Collecting NNSOM
  Downloading nnsom-0.0.3-py3-none-any.whl (9.8 kB)
Installing collected packages: NNSOM
Successfully installed NNSOM-0.0.3


In [None]:
from NNSOM.som import SOM
import numpy as np
import pickle
from datetime import datetime
from scipy.spatial.distance import cdist
import pandas as pd
now = datetime.now()
from numpy.random import default_rng
current_time = now.strftime("%H:%M:%S")
print("Current Time =", current_time)
import os

Current Time = 01:58:50


In [None]:
# Flag to initialize the som (True), or load previously initialized (False)
Init_Flag = True
# Flag to save initialized model
Init_Save_Flag = True
# Flag to train the som, or load previously trained
Train_Flag = True
# Flag to save trained model
Save_SOM_Flag = True

In [None]:
# Set parameters
SOM_Row_Num = 4
abs_path = "/content/drive/MyDrive/Colab Notebooks/DATS6501_Capstone/Tests/NNSOMTest/"
data_path =  abs_path + "dataset/"
model_path = abs_path + "model/"
output_path =  abs_path + "output/"

In [None]:
Dimensions = (SOM_Row_Num, SOM_Row_Num)
Epochs = 200
Steps = 100
Init_neighborhood = 3
SEED = 1234567
rng = default_rng(SEED)

In [None]:
Init_Som_File = model_path + "SOM_init_f4_ep_50_Seed_" + str(SEED) + '_Size_' + str(SOM_Row_Num) + ".pkl"
Trained_SOM_File = model_path + "SOM_Model_f4_ep_50_Epoch_" + str(Epochs) + '_Seed_' + str(SEED) + '_Size_' + str(SOM_Row_Num) + ".pkl"

In [None]:
input_file = data_path + 'cv_electra_f4_ep_50_Features.npy'
X = np.load(input_file)

In [None]:
tot_num = len(X)

# Randomize to get different results
X = X[rng.permutation(tot_num)]

In [None]:
# Initializing can take a long time for larege data sets
# Reduce size here. X1 is used for initialization, X is used for training.
X1 = X[:int(tot_num/8)]
X1 = np.transpose(X1)

X = np.transpose(X)

In [None]:
X

array([[ 0.19598798,  0.11238787,  0.03396067, ..., -0.4412961 ,
         0.30089036,  0.22426115],
       [-0.5313212 , -0.5931386 , -0.510237  , ..., -0.11582275,
        -0.5675892 , -0.46735212],
       [ 1.1355038 ,  0.95942116,  1.1728005 , ...,  1.2023962 ,
         0.8775418 ,  1.0716735 ],
       ...,
       [-0.4645274 , -0.63972855, -0.57308996, ..., -0.6743317 ,
        -0.40340135, -0.40148407],
       [ 0.12299869,  0.10662067,  0.10073099, ...,  1.0335336 ,
         0.26131585,  0.17211582],
       [-1.0855391 , -1.0736814 , -1.0800399 , ..., -0.6518752 ,
        -0.6799242 , -0.8346525 ]], dtype=float32)

In [None]:
# Train SOM, or load pretrained SOM
if Train_Flag:

  if Init_Flag:
    # Initialize weights of SOM
    som_net = SOM(Dimensions)
    som_net.init_w(X1)

    if Init_Save_Flag:
      with open(Init_Som_File, 'wb') as f:
        pickle.dump(som_net, f)
  else:
    # Read in initialized SOM
    with open(Init_Som_File, 'rb') as f:
      som_net = pickle.load(f)

  # Train network
  som_net.train(X, Init_neighborhood, Epochs, Steps)

  if Save_SOM_Flag:
  # Save trained network
    with open(Trained_SOM_File, 'wb') as f:
      pickle.dump(som_net, f)

else:
  # Read in trained network
  with open(Trained_SOM_File, 'rb') as f:
    som_net = pickle.load(f)

Beginning Initialization
Current Time = 02:01:42
Ending Initialization
Current Time = 02:04:46
Beginning Training
Current Time = 02:04:46
50
Current Time = 02:05:59
100
Current Time = 02:07:13
150
Current Time = 02:08:25
200
Current Time = 02:09:37
Ending Training
Current Time = 02:09:37


In [None]:
# Compute statistics
# Distance between each input and each weight
x_w_dist = cdist(som_net.w, np.transpose(X), 'euclidean')

# Find the index of the weight closest to the input
ind1 = np.argmin(x_w_dist,axis=0)

shapw = som_net.w.shape
S = shapw[0]
shapx = X.shape
Q = shapx[1]
net_ones = np.ones(S)
same_size = 100*np.ones(S)

Clust = []
dist = []
mdist = np.zeros(S)
clustSize = []

for i in range(S):
    # Find which inputs are closest to the current weight (in cluster i)
    tempclust = np.where(ind1==i)[0]

    # Save distance of each input in the cluster to cluster center (weight)
    tempdist = x_w_dist[i, tempclust]
    indsort = np.argsort(tempdist)
    tempclust = tempclust[indsort]
    tempdist = tempdist[indsort]

    # Add to distance array sorted distances
    dist.append(tempdist)

    # Add to Cluster array sorted indices
    Clust.append(tempclust)

    # Cluster size
    num = len(tempclust)
    clustSize.append(num)

    # Save the maximum distance to any input in the cluster from cluster center
    if num>0:
        mdist[i] = tempdist[-1]


In [None]:
# Find quantization error
quant_err = np.array([ 0 if len(item)==0 else np.mean(item) for item in dist]).mean()
print('Quantization error = ' + str(quant_err))

Quantization error = 6.923670481758034


In [None]:
# Topological Error - Percent inputs where closest center and next closest center
# are not neighbors
ndist = som_net.neuron_dist
sort_dist = np.argsort(x_w_dist,axis=0)
top_dist = [ndist[sort_dist[0,ii],sort_dist[1,ii]] for ii  in range(sort_dist.shape[1])]
neighbors = np.where(np.array(top_dist)>1.1)
top_error = 100*len(neighbors[0])/x_w_dist.shape[1]
print('Topological Error (1st neighbor) = ' + str(top_error) + '%')
neighbors = np.where(np.array(top_dist)>2.1)
top_error = 100*len(neighbors[0])/x_w_dist.shape[1]
print('Topological Error (1st and 2nd neighbor) = ' + str(top_error) + '%')

Topological Error (1st neighbor) = 4.258851211900655%
Topological Error (1st and 2nd neighbor) = 0.012961504332133564%


In [None]:
# Distortion
dd = [1, 2, 3] # neighborhood distances
ww = som_net.w
wwdist = cdist(ww, ww, 'euclidean')
sst  = ndist[:, ind1]
for d in dd:
    factor1 = 2*d*d
    factor2 = Q*d*np.sqrt(2*np.pi)
    temp = np.exp(-np.multiply(sst,sst)/factor1)
    distortion = np.sum(np.multiply(temp,x_w_dist))/factor2
    print('Distortion (d='+str(d)+') = ' + str(distortion))

Distortion (d=1) = 20.50196417626579
Distortion (d=2) = 25.59142786753177
Distortion (d=3) = 22.603057573992704
