### Importing libraries and toolbox

In [1]:
import numpy as np
import pandas as pd
from bokeh.plotting import show
import matplotlib.pyplot as plt
#### All `sompy.` terms refer to the toolbox files

from sompy.sompy_isom import SOMFactory as smf   # (In case of iSOM)
# from sompy.sompy_csom import SOMFactory as smf  # (In case of cSOM)
    
from sompy.visualization.viz_functions import Visualization_func
from sompy.visualization.comp_planes import som_cplanes
from sompy.visualization.hitmaps import som_hitmap
from sompy.aux_fun import aux_fun

### Loading input data

1. `n_array`: Input data which is a numpy array
2. `comp_names`: Component names i.e. names of each design variable/feature

In [2]:
# file = open('spherical_data.csv')
# n_array = np.genfromtxt(file, delimiter=',',skip_header=1)
# data = pd.read_csv('spherical_data.csv')
# file.close()
# comp_names = list(data.columns)
# comp_names

In [None]:
import pandas as pd

# Path to your Excel file
file_path = "D:\\Mahesh Files\\CreditCardFraudDetection File\\Credit card dataset\\creditcard.csv"

# Read the Excel file into a DataFrame
df = pd.read_csv(file_path)
df = df.drop(df.columns[0], axis=1)


df
df = df.iloc[:272836]

print(df)

In [None]:
# n_array

comp_names = list(df.columns)
print(comp_names)
n_array=df.values
print(n_array)
np.shape(n_array)

### Build initial SOM structure

Following attributes are set:
1. `sm._normalizer` = Instance of class `VarianceNormalizer` or `RangeNormalizer`. Default is `RangeNormalizer`. (See `normalization.py`)
2. `sm._data`: This is the normalized data in case `normalization` parameter is provided.
3. `sm._dim`: Number of features (3 for this example)
4. `sm._dlen`: Number of data points (40 for this example)
5. `sm.lattice`: Lattice type `hexa`. Only `hexa` allowed.
6. `sm.data_raw`: Original data without normalization.
7. `sm.neighborhood`: Only "gaussian" 
8. `sm.training`: Only "batch"
9. `sm.codebook`: Instance of class `Codebook`. (See `codebook.py`)
10. `sm._distance_matrix`: Distance matrix (size: m x m) which has distance between each nodes. 'm' is number of map nodes

In [4]:
m = int(np.ceil(np.sqrt(5*np.sqrt(df.shape[0]))))
sm = smf.build(n_array,normalization='range',initialization='pca',mapshape="planar",mapsize = [m,m],lattice="hexa",neighborhood='gaussian',
          training='batch',component_names=comp_names)



In [None]:
sm.codebook.matrix

### Initialization phase

Two types:
1. `sm.som_randinit()`: For random initialization (only for cSOM)
2. `sm.som_lininit()`: For PCA initialization (for both cSOM and iSOM)

At the end of this execution, following attributes will be updated:
1. `sm.codebook.matrix`: This is the initialized matrix
2. `sm.initialized_matrix`: This is also the initialized matrix

In [None]:
sm.som_lininit()

In [None]:
sm.codebook.matrix

### SOM Training phase

Following parameters are fixed here: `njob`=1, `shared_memory`=False, `train_len_factor`=1
User defined parameters:
1. `verbose`: Can be `info` or `None`
2. `train_rough_len`: Rough training length (See function `rough_train()` in sompy_csom.py or sompy_isom.py)
3. `train_finetune_len`: Fine training length (See function `finetune_train()` in sompy_csom.py or sompy_isom.py)

At the end of training, following attributes will be updated:
1. `sm._bmu`: A numpy array of size (2 x dlen); First row are indices of BMU nodes, Second row are distance values to BMU nodes
2. `sm._codebook.matrix`: Trained weights of SOM after training

In [None]:


sm.train(request_id=None,n_job = 1, shared_memory = False, verbose='info',train_rough_len=10, train_finetune_len=10,
                  train_len_factor=1)
a=sm.codebook.matrix[:,-1]
# sm._bmu[0]
# sm._bmu[1]


In [None]:
# sm.codebook.matrix 

In [9]:
k=input("Do you want to add more?")
while k=="True":
    train_finetune_len=int(input("Enter iterations"))
    sm.set_initialized_matrix(sm.codebook.matrix)
    sm.train(request_id=None,n_job = 1, shared_memory = False, verbose='info',train_rough_len=0, train_finetune_len=train_finetune_len,
                  train_len_factor=1)
    k=input("Do you want to add more?")

In [None]:
b=sm.codebook.matrix[:,-1]
arg_min=sm._bmu[0]
print("Index of the bmus for each node: ",arg_min)
min_dist=sm._bmu[1]
print("Distance of the bmu for each node:",min_dist)



In [14]:
# sm.codebook.matrix

### Visualization 

Creating a visualization instance `vis` with attributes of sm.

In [11]:
def is_classifier_output(df):
   
    if df.columns[-1].lower() in ['classifieroutput', 'label', 'target']:
        return True
   
    elif pd.api.types.is_numeric_dtype(df.iloc[:, -1]) and df.iloc[:, -1].nunique() <= 10:
        return True
    return False


if is_classifier_output(df):
    sm.codebook.matrix[:, -1] = np.round(sm.codebook.matrix[:, -1])
else:
    sm.codebook.matrix[:, -1] = sm.codebook.matrix[:, -1]
   
# sm.codebook.matrix[:, -1] = np.round(sm.codebook.matrix[:, -1])
vis = Visualization_func(sm)
show(vis.plot_cplanes())

Plotting component planes of codebook matrix (See `viz_functions.py` and `comp_planes.py`)

`aux_fun()`: Auxiliary functions: To find hits, data mapped to a given node, neighborhood nodes etc.

In [None]:
axf = aux_fun()
hits = axf.som_bmus_1(sm,sm._data)
# hits = axf.som_hits(sm, sm._data)  # sm._data here is normalized data

In [None]:
hits

In [18]:
# Hit maps plotting
show(vis.plot_hitmap(hits, comp='all' , clr='red'))   # Here, 'comp' takes index of last column of `n_array` data by default

In [19]:
from copy import deepcopy
som_4umatrix = deepcopy(sm)
som_4umatrix.set_codebook_matrix(vis.codebook)

In [20]:
# UMatrix plotting
U = axf.som_umat(som_4umatrix, comp="all") 
show(vis.plot_umat(U))