# data utils

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

def load_dataset():
    
    df = pd.read_excel ('data.xlsx')
    data = np.array(df)
    y = data[:,0]
    X = data[:,1:]
    X = (data[:,1:] + 1) / 2    # all feature can be 0 or 0.5 or 1
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)  
    
    return X, y

# SOM Class (using all features to train)

In [None]:
# first of all we load initial weights of map, for all experiments they are same
X, y = load_dataset()
n_features = X.shape[1]
loaded_arr = np.loadtxt("weights_7-7.txt") # u should load file based on ur map size, also u should set this map size in main
initial_weights = loaded_arr.reshape( 
    loaded_arr.shape[0], loaded_arr.shape[1] // n_features, n_features)

In [None]:
# this for all feature in consideration

random_weights = initial_weights 


# if u want to consider only a one of features u should run following code
'''
random_weights = initial_weights[:,8] # 0 means that we consider first column (check also X in main that features be same)
random_weights = random_weights.reshape((random_weights.shape[0], random_weights.shape[1], 1))
'''

In [None]:
import numpy as np
from matplotlib import pyplot as plt


class SOM:
    def __init__(self, map_size, lr = 0.1):

        self.map = random_weights
        
        self.lr0 = lr
        self.lr = self.lr0
        
        self.R0 = map_size[0]//2
        self.R = self.R0
        
    def train(self, X, y, T=1000, error_threshold=10**-20): 
        Js = []
        # X is only features of dataset
        # T is number of iterations
        
        for t in range(T):
            prev_map = np.copy(self.map)
            shuffle_ind = np.random.randint(low=0, high=len(X), size=len(X))  # a vector of random indices
            for i in range(len(X)):
                x = X[shuffle_ind[i],:]
                x = np.asarray(x)
                
                winner = self.find_winner(x)    # winner = [1,4]
                NS = self.get_NS(winner)
                
                self.update_weights(x, NS, len(X))
                
            self.lr = self.lr0 * (1 - t/T)
            self.R = self.R0 * (1 - t/T)
            
            
            Js.append(np.linalg.norm(prev_map - self.map))     # norm of changes
            
            if t % 10 == 0:
                print('Iteration: %d, LR: %f, R: %f, J: %f' %(t, self.lr, self.R, Js[-1]))
                # self.visualize(X, y)
                
            if Js[-1] < error_threshold:
                print('Min changes')
                break
        
        return Js
            
    def visualize(self, X, y):
        self.scores = np.zeros(shape=(self.map.shape[0], self.map.shape[1], 3))  
        # as we want to show scores in rgb format we choose third dimension as 3
        # if you just want to cosider number u should cosider it equal to 1
        for i in range(len(X)):
            x = X[i, :]
            x = np.asarray(x)
            winner = self.find_winner(x)
            iw, jw = winner[0], winner[1]
            
            if y[i] == -1:
                self.scores[iw, jw] += np.asarray([1, 0, 0])
            if y[i] == 1:
                self.scores[iw, jw] += np.asarray([0, 0, 1])
            if y[i] == 0:
                self.scores[iw, jw] += np.asarray([0, 1, 0])
                
        self.scores = self.scores / np.mean(np.mean(self.scores))
        
        plt.imshow(self.scores)
        plt.show()
        
    def find_winner(self, x):
        rep_x = np.tile(x, [self.map.shape[0], self.map.shape[0], 1])
        dists = np.sum((self.map - rep_x)**2, axis=2)   
        winner = np.unravel_index(np.argmin(dists, axis=None), dists.shape)
        
        return winner
    
    def get_NS(self, winner):
        NS = np.zeros(shape= (self.map.shape[0], self.map.shape[1]))
        
        iw, jw = winner[0], winner[1]
        
        '''
        NS[iw, jw] = 1
        # Plus-shape neighbourhood
        for r in range(1, int(self.R)):
            if iw - r >= 0:
                NS[iw - r, jw] = 1/r
            if iw + r < self.map.shape[0] - 1:
                NS[iw + r , jw] = 1/r
                
            if jw - r > 0:
                NS[iw , jw - r] = 1/r
            if jw + r < self.map.shape[1] - 1:
                NS[iw , jw + r] = 1/r
        '''
        R = int(self.R)
        for ri in range(-R, R):
            for rj in range(-R, R):
                if (0 <= iw + ri < self.map.shape[0]) and  (0 <= jw + rj < self.map.shape[1]):   # baraye chek kardane inke noghte dar range dadeha bashad va daraghe dar morabae farzi gharar begirad
                    NS[iw + ri, jw + rj] = 0 if np.sqrt(ri**2 + rj**2) > R else 1/ ( 1+ np.sqrt(ri**2 + rj**2))   # in noroun hamsaye dakhel dayereyi ke farz mikonim bashad, agar nabod nouron 0 va agar bod akse fasele(shoaa) + 1
        

                            
        return NS
    
    def update_weights(self, x, n_strength, X_len):
        NS = np.tile(n_strength, [self.map.shape[2],1,1]).transpose()
        
        rep_x = np.tile(x, [self.map.shape[0], self.map.shape[1], 1])
        Delta = rep_x - self.map 
        
        self.map = self.map + (self.lr/X_len) * np.multiply(NS, Delta)
        
    
    def extract_feature(self, x): # here we give a data of n feature and take a matrix of size map as output (e.g, 9*9)
        x = np.asarray(x)
        rep_x = np.tile(x, [self.map.shape[0], self.map.shape[0], 1])
        dists = np.sum((self.map - rep_x)**2, axis=2)
        return 1/ (1 + dists)
        
if __name__=="__main__":
    X, y = load_dataset()

    '''
    # if you want to train only on one of features, u should run following code otherwise dont run ]
    X = X[:,8]  
    X = np.asmatrix(X)
    X = np.transpose(X)
    '''

    som_net = SOM(map_size = [7,7, X.shape[1]])
    Js = som_net.train(X, y, T = 5000)
    plt.plot(Js)
    plt.show()
    som_net.visualize(X, y)
    x = X[0,:]
    features = som_net.extract_feature(x)
    plt.imshow(features)
    plt.show()

In [None]:
# creating new data by using som feature
new_data_som = []
for i in range(X.shape[0]):
  x = X[i,:]
  features = som_net.extract_feature(x)
  tmp_data = features .flatten()
  new_data_som.append(tmp_data)

new_data_som = np.array(new_data_som)
y_new_som = y
y_new_som[y_new_som == -1] = 0

# classification of data by MLP (Loss function: Cross entropy) 

## importing libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tensorflow import keras 
from tensorflow.keras import layers
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_squared_error

## Spiliting data to train, test and validation

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
...     new_data_som, y, test_size=0.1, random_state=42)

## classification of data by MLP (Loss function: Cross entropy) 

In [None]:
# create model
model = keras.models.Sequential()
model.add(keras.Input(shape=(X_train.shape[1],)))
model.add(keras.layers.Dense(80, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))
# Compile model
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:
# Training model
history = model.fit(X_train, y_train, batch_size=5, epochs= 30, validation_split= 0.18)

In [None]:
# plot model accuracy and loss 
  # accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
  # loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
# evaluating
test_loss, test_acc = model.evaluate( X_test, y_test, verbose = 1)

In [None]:
# Predicting labels
predicted_values = model.predict(X_test)
predicted_classes = np.where(predicted_values > 0.5, 1, 0)
# visualize confusion matrix 
tn, fp, fn, tp = confusion_matrix(y_test, predicted_classes[:,0]).ravel()
print('tn = ',tn , '\t','fp = ',fp , '\t','fn = ',fn , '\t','tp = ',tp )