<a href="https://colab.research.google.com/github/zahraDehghanian97/Self_Organizing_Map/blob/master/SOFM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **prerequisit**

In [27]:
try :
  %tensorflow_version 2.x
except:
  pass
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import random
from sklearn.metrics import confusion_matrix
from sklearn import preprocessing
from PIL import Image
import glob
import pickle
import random
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split


## mount google drive

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **make dataset ready**

In [15]:
def load_photos(dir_name):
    photo_list =[]
    labels = []
    for file_name in (glob.glob(dir_name+'/*')):
        image = Image.open(file_name)
        image = np.array(image).flatten()
        photo_list.append(image)
        x = file_name.split('/')
        labels.append(x[6].split('.')[1])
    photo_list = np.array(photo_list)
    photo_list = np.array(preprocessing.MinMaxScaler(feature_range=(0,1)).fit_transform(photo_list))
    unique_value =list(set(labels))
    y = []
    for label in labels :
      y.append(unique_value.index(label))
    return photo_list , unique_value , y 
  
    

dir_name = "/content/drive/MyDrive/Colab Notebooks/yalefaces"
X , unique_value , y= load_photos(dir_name) 
n_features =len(X[0])
n_classes = len(unique_value)
print(n_classes)
print(X[0])
print(n_features)
print(y)



11
[1. 1. 1. ... 0. 0. 0.]
77760
[0, 6, 1, 7, 4, 2, 8, 9, 3, 5, 10, 6, 0, 1, 7, 4, 2, 9, 8, 5, 3, 10, 6, 1, 0, 7, 2, 4, 9, 8, 5, 3, 10, 6, 1, 0, 7, 4, 9, 2, 5, 3, 10, 6, 0, 1, 7, 4, 9, 2, 5, 8, 3, 6, 10, 0, 1, 7, 4, 9, 2, 8, 3, 5, 10, 6, 0, 1, 7, 4, 9, 2, 8, 5, 3, 10, 6, 0, 7, 1, 4, 9, 2, 8, 3, 5, 10, 0, 6, 1, 4, 7, 9, 2, 8, 5, 10, 3, 0, 6, 1, 4, 7, 2, 8, 9, 5, 10, 3, 6, 0, 1, 7, 4, 2, 8, 9, 5, 3, 10, 6, 0, 1, 7, 2, 4, 8, 9, 5, 10, 3, 6, 0, 1, 7, 2, 4, 9, 8, 5, 3, 10, 6, 1, 0, 7, 4, 2, 8, 9, 5, 10, 3, 6, 0, 1, 4, 7, 2, 8, 9, 5, 10, 3, 8]


# **SOM Class**

In [23]:
class SOM:
    def __init__(self,data, map_size,dir_map="not load from file", lr = 0.05):
        self.map = np.zeros(shape=map_size)
        ind = np.random.choice(data.shape[0],(map_size[0], map_size[1]) , replace=False)
        for i in range(map_size[0]):
          for j in range (map_size[1]):
            self.map[i,j] = data[ind[i,j]]
        self.lr0 = lr
        self.lr = self.lr0
        self.R0 = map_size[0]//2
        self.R = self.R0
        if dir_map != "not load from file":
          self.load_map(dir_map)
    
    def load_map(self,dir_map):
        with open(dir_map, "rb") as f:
            self.map = pickle.load(f)
        print("load map finished")

        
    def train(self, X, y,dir_map, T=1000, error_threshold=10**-20): 
        Js = []
        for t in range(T):
            prev_map = np.copy(self.map)
            shuffle_ind = np.random.randint(low=0, high=len(X), size=len(X))  # a vector of random indices
            for i in range(len(X)):
                x = X[shuffle_ind[i],:]
                x = np.asarray(x)
                winner = self.find_winner(x)    # winner = [1,4]
                NS = self.get_NS(winner)
                self.update_weights(x, NS, len(X))
            self.lr = self.lr0 * (1 - t/T)
            self.R = self.R0 * (1 - t/T)
            Js.append(np.linalg.norm(prev_map - self.map))     # norm of changes
            if t % 10 == 0:
                print('Iteration: %d, LR: %f, R: %f, J: %f' %(t, self.lr, self.R, Js[-1]))
                # self.visualize(X, y)
            # print('Iteration: %d, LR: %f, R: %f, J: %f' %(t, self.lr, self.R, Js[-1]))

            if Js[-1] < error_threshold:
                print('Min changes')
                break

        # save founded model
        with open(dir_map, 'wb') as f:  
          pickle.dump(self.map, f)

        return Js
            
    def find_winner(self, x):
        rep_x = np.tile(x, [self.map.shape[0], self.map.shape[1], 1])
        dists = np.sum((self.map - rep_x)**2, axis=2)   
        winner = np.unravel_index(np.argmin(dists, axis=None), dists.shape)
        return winner
    
    def get_NS(self, winner):
        # not neighbor = 0 , neighbor = 1/sqrt(euclidean_distance)
        NS = np.zeros(shape= (self.map.shape[0], self.map.shape[1]))
        iw, jw = winner[0], winner[1]
        R = int(self.R)
        for ri in range(-R, R):
            for rj in range(-R, R):
                if (0 <= iw + ri < self.map.shape[0]) and  (0 <= jw + rj < self.map.shape[1]):   
                    NS[iw + ri, jw + rj] = 0 if np.sqrt(ri**2 + rj**2) > R else 1/ ( 1+ np.sqrt(ri**2 + rj**2))  
        return NS
    
    def update_weights(self, x, n_strength, X_len):
        NS = np.tile(n_strength, [self.map.shape[2],1,1]).transpose()
        rep_x = np.tile(x, [self.map.shape[0], self.map.shape[1], 1])
        Delta = rep_x - self.map 
        self.map = self.map + (self.lr/X_len) * np.multiply(NS, Delta)

    def visualize(self, X, y):
        scores = np.zeros(shape=(self.map.shape[0], self.map.shape[1],len(list(set(y)))))  
        self.scores = np.zeros(shape=(self.map.shape[0], self.map.shape[1]))  
        for i in range(len(X)):
            x = X[i, :]
            x = np.asarray(x)
            winner = self.find_winner(x)
            iw, jw = winner[0], winner[1]
            scores[iw, jw][y[i]] += 1
        for i in range(len(scores)):
            for j in range(len(scores[0])):
              self.scores[i,j] = np.argmax(scores[i,j])
        print(self.scores)

        c = plt.imshow(self.scores, cmap='jet')
        plt.colorbar(c)
        for i in range(len(scores)):
          for j in range(len(scores[0])):
            plt.text(j,i, self.scores[i, j],ha="center", va="center", color="w")
        plt.title("Class of each Nuoron")
        plt.show()
        

    def extract_feature(self, x): # here we give a data of n feature and take a matrix of size map as output (e.g, 9*9)
        x = np.asarray(x)
        rep_x = np.tile(x, [self.map.shape[0], self.map.shape[0], 1])
        dists = np.sum((self.map - rep_x)**2, axis=2)
        return 1/ (1 + dists)
     
m_size = 6
dir_map = '/content/drive/MyDrive/Colab Notebooks/map'+str(m_size)+'*'+str(m_size)+'.txt'

som_net = SOM(data = X,dir_map = dir_map,map_size = [m_size,m_size,n_features])

# train SOM Net
Js = som_net.train(X, y,dir_map, T = 500)
plt.plot(Js)
plt.show()

# # reload saved model 
# som_net.load_map(dir_map)

# visualize founded Neuron
som_net.visualize(X, y)
print("Meaning of each number :")
s = ""
for i in range(len(unique_value)):
  s = s + str(i) + " : "+unique_value[i]+"   "
print(s)


load map finished
Iteration: 0, LR: 0.050000, R: 3.000000, J: 1.019750


KeyboardInterrupt: ignored

# **Dimension Reduction**

In [26]:
# x = X[0,:]
# features = som_net.extract_feature(new_X[0])
# plt.imshow(features)
# plt.show()


# # creating new data by using som feature
new_X = []
for i in range(X.shape[0]):
  x = X[i,:]
  features = som_net.extract_feature(x)
  tmp_data = features .flatten()
  new_X.append(tmp_data)
new_X = np.array(new_X)
print("Dimension Reduction is finished")

# **Classification with MLP**

In [29]:
X_train, X_test, y_train, y_test = train_test_split(new_X, y, test_size=0.1, random_state=42)

In [36]:
mlp = MLPClassifier(hidden_layer_sizes=(100,50,), max_iter=100, alpha=1e-4,
                    solver='sgd', verbose=True, random_state=1,learning_rate_init=0.1)
mlp.fit(X_train, y_train)
print("Training set score: %f" % mlp.score(X_train, y_train))
print("Test set score: %f" % mlp.score(X_test, y_test))


Iteration 1, loss = 2.41994970
Iteration 2, loss = 2.41740431
Iteration 3, loss = 2.41417891
Iteration 4, loss = 2.41065112
Iteration 5, loss = 2.40712429
Iteration 6, loss = 2.40378145
Iteration 7, loss = 2.40083991
Iteration 8, loss = 2.39832905
Iteration 9, loss = 2.39627811
Iteration 10, loss = 2.39469162
Iteration 11, loss = 2.39354776
Iteration 12, loss = 2.39283025
Iteration 13, loss = 2.39239298
Iteration 14, loss = 2.39217082
Iteration 15, loss = 2.39209561
Iteration 16, loss = 2.39211513
Iteration 17, loss = 2.39218729
Iteration 18, loss = 2.39227948
Iteration 19, loss = 2.39236851
Iteration 20, loss = 2.39244168
Iteration 21, loss = 2.39248410
Iteration 22, loss = 2.39249903
Iteration 23, loss = 2.39248833
Iteration 24, loss = 2.39245577
Iteration 25, loss = 2.39240591
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
Training set score: 0.101351
Test set score: 0.000000
