In [177]:
# imports
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import normalize, MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from scipy.stats import mode
from sklearn.metrics import accuracy_score

### Clases Neurona

In [178]:
class neurona(object):
    def __init__(self, f=0, c=0, dim=0):
        self.c = c
        self.f = f
        self.dim = dim
        
        # Inicializa los pesos aleatoriamente entre [-0.5, 0.5]
        self.w = normalize(0.5 - np.random.rand(dim).reshape(1,-1))
        
        # self.label_winner = []
            
    def predict(self, inputs):
        # Calcula la salida de una neurona ante una o más entradas. "inputs" puede ser un vector o una matriz 2D
        return inputs @ self.w.T
    
    def fit(self, input, alfa=1):
        # ajusta los pesos de una neurona (w) para aproximarlos a una entrada (input)
        self.w = normalize(self.w + (alfa*input))
            
    def neuron_labeling(self, inputs, target):
        # etiquetado por neuronas. Se le pasa la lista de entradas y la etiqueta (target) de cada una de esas muestras.
        # Devuelve la etiqueta de la ganadora
        Y = inputs @ self.w.T
        self.label = target[np.argmax(Y)]
        return self.label

### Clase SOM

In [179]:
class som():
    
    def __init__(self, filas=1, columnas=1, dim=1):
        self.lista = []
        self.filas = filas
        self.columnas = columnas
        self.dim = dim
        
        # Considera que un mapa rectangular es una lista de objetos "neurona", que viene localizado por sus atributos "fila" y "columna"
        for fila in range(self.filas):
            for columna in range(self.columnas):
                self.lista.append(neurona(f=fila, c=columna, dim=dim))
                
    def fit(self, inputs, max_epochs=1, init_radious=0, init_alfa=1):
        # método similar a otros algoritmo de ML. Recibe las entradas, el radio inicial, el factor de apendizaje inicial,
        # el máximo de épocas y devuelve los pesos ajustados
        self.radious = init_radious
        self.alfa = init_alfa
        t = 0
        P = inputs.shape[0]
        for epoch in range(max_epochs):
            for x in inputs:
                self.alfa = init_alfa/(1.0 + float(t/P))
                i_gana, y_gana = -1, float('-inf')
                for i in range(self.filas*self.columnas):
                    y_predict = self.lista[i].predict(x.reshape(1,-1))
                    if y_predict > y_gana:
                        y_gana = y_predict
                        i_gana = i
                f_gana = int(i_gana / self.columnas)
                c_gana = i_gana % self.columnas
                
                # Conjunto de vecinas para un radious
                for f in range(f_gana - self.radious, f_gana + self.radious+1):
                    if f < 0:
                        row = self.filas + f
                    else:
                        if f > self.filas-1:
                            row = f % self.filas
                        else:
                            row = f

                    for c in range(c_gana - self.radious, c_gana + self.radious+1):
                        if c < 0:
                            column = self.columnas + c 
                        else:
                            if c > self.columnas-1:
                                column = c % self.columnas
                            else:
                                column = c
                        self.lista[(row*self.columnas) + column].fit(x.reshape(1,-1), alfa=self.alfa)
                t += 1
                if (t%1000) == 0:
                    print(t, self.radious, "  ", end='')
            if self.radious > 0:
                self.radious -= 1
                            
    def neuron_labeling(self, inputs, target):
        # recorre la lista de neuronas y va llamanado a su metodo de etiquetado para cada neurona
        self.labels = []
        
        for i in range(self.filas*self.columnas):
            # print(X.shape, self.target.shape)
            self.labels.append(self.lista[i].neuron_labeling(inputs, target))
            # print(self.lista[i].labeling(X, target=y_deseada, etiquetado='neurona'))
 
    def predict(self, inputs):
        # recorre la lista de neuronas y calcula la salida de un conjunto de muestras
        # util para usar la salida del som como entrada a otrso sistemas
        output_list = []
        for x in inputs:
            for i in range(self.filas*self.columnas):
                output_list.append(self.lista[i].predict(x.reshape(1,-1)))
        return np.array(output_list).reshape(inputs.shape[0], -1)    
    
    def label_predict(self, inputs):
        # clasificación de muestras con el etiquetado de cada neurona hecho previamente
        label_list = []
        for x in inputs:
            label_list.append(self.labels[np.argmax(self.predict(x.reshape(1,-1)))])
        return np.array(label_list).reshape(inputs.shape[0], -1)

### Load data

In [180]:
mnist = fetch_openml('mnist_784', version=1)
mnist.data.shape, mnist.target.shape

((70000, 784), (70000,))

In [181]:
data = mnist.data
data

Unnamed: 0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
69996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
69997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
69998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [182]:
label = mnist.target
label

0        5
1        0
2        4
3        1
4        9
        ..
69995    2
69996    3
69997    4
69998    5
69999    6
Name: class, Length: 70000, dtype: category
Categories (10, object): ['0', '1', '2', '3', ..., '6', '7', '8', '9']

### Preprocessing Data

In [183]:
# Normalizacion datos
scalar = StandardScaler()
datos_scd = scalar.fit_transform(data)
datos_scd

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [184]:
# Train data & Test data
train_data, test_data, train_label, test_label = train_test_split(datos_scd, label, test_size=0.33)

In [185]:
train_data.shape

(46900, 784)

In [186]:
test_data.shape

(23100, 784)

### Creación SOM

In [187]:
# Creacion de la red neuronal SOM
mapa = som(15,9, train_data.shape[1])

# Entrenamos la red neuronal
mapa.fit(train_data, max_epochs=20, init_radious=9, init_alfa=10)

1000 9   2000 9   3000 9   4000 9   5000 9   6000 9   7000 9   8000 9   9000 9   10000 9   11000 9   12000 9   13000 9   14000 9   15000 9   16000 9   17000 9   18000 9   19000 9   20000 9   21000 9   22000 9   23000 9   24000 9   25000 9   26000 9   27000 9   28000 9   29000 9   30000 9   31000 9   32000 9   33000 9   34000 9   35000 9   36000 9   37000 9   38000 9   39000 9   40000 9   41000 9   42000 9   43000 9   44000 9   45000 9   46000 9   47000 8   48000 8   49000 8   50000 8   51000 8   52000 8   53000 8   54000 8   55000 8   56000 8   57000 8   58000 8   59000 8   60000 8   61000 8   62000 8   63000 8   64000 8   65000 8   66000 8   67000 8   68000 8   69000 8   70000 8   71000 8   72000 8   73000 8   74000 8   75000 8   76000 8   77000 8   78000 8   79000 8   80000 8   81000 8   82000 8   83000 8   84000 8   85000 8   86000 8   87000 8   88000 8   89000 8   90000 8   91000 8   92000 8   93000 8   94000 7   95000 7   96000 7   97000 7   98000 7   99000 7   100000 7   101000 7

756000 0   757000 0   758000 0   759000 0   760000 0   761000 0   762000 0   763000 0   764000 0   765000 0   766000 0   767000 0   768000 0   769000 0   770000 0   771000 0   772000 0   773000 0   774000 0   775000 0   776000 0   777000 0   778000 0   779000 0   780000 0   781000 0   782000 0   783000 0   784000 0   785000 0   786000 0   787000 0   788000 0   789000 0   790000 0   791000 0   792000 0   793000 0   794000 0   795000 0   796000 0   797000 0   798000 0   799000 0   800000 0   801000 0   802000 0   803000 0   804000 0   805000 0   806000 0   807000 0   808000 0   809000 0   810000 0   811000 0   812000 0   813000 0   814000 0   815000 0   816000 0   817000 0   818000 0   819000 0   820000 0   821000 0   822000 0   823000 0   824000 0   825000 0   826000 0   827000 0   828000 0   829000 0   830000 0   831000 0   832000 0   833000 0   834000 0   835000 0   836000 0   837000 0   838000 0   839000 0   840000 0   841000 0   842000 0   843000 0   844000 0   845000 0   846000 0  

#### Etiquetado por neuronas

Procedemos a etiquetar las neuronas que conforman la red neuronal

In [188]:
mapa.neuron_labeling(train_data, train_label.values)

Obtenemos los labels de cada neurona

In [189]:
neuron_labels = mapa.label_predict(test_data)
neuron_labels.shape

(23100, 1)

Porcentaje de acierto utilizando etiquetado por neuronas

In [190]:
accuracy_score(test_label, neuron_labels)

0.6894805194805195

Obtenemos también las labels de las neuronas a partir de las muestras de entrenamiento

In [191]:
neuron_labels_train = mapa.label_predict(train_data)
neuron_labels_train.shape

(46900, 1)

### Con la salida del SOM, construimos un conjunto de aprendizaje y uno de test

Recorremos la lista de neuronas del mapa y calculamos la salida de un conjunto de muestras mediante el metodo **predict()**.

Util para usar la salida del SOM como entrada a otros sistemas, en nuestro caso, hacia un **MLP (135x60x10)**

#### Conjunto de Aprendizaje

In [192]:
train = mapa.predict(train_data)
train

array([[  5.77884805,   3.35548404,  -4.20348889, ...,  -2.06757109,
         -4.10180719,   1.91384614],
       [  0.97736482,  -2.85324449,  -4.58843191, ...,  -3.63068857,
         -3.84131166,  -1.10199706],
       [  0.46562347, -10.31075157,   1.9125644 , ...,  -9.88953808,
         -4.22236495,  -8.4505119 ],
       ...,
       [ -2.84295508,   2.33214141,  -2.33341898, ...,   0.78573895,
          4.03545879,  -0.70006905],
       [  3.95969068,   4.85215316,  -1.88094791, ...,   1.14477172,
         -0.96085618,   5.01590182],
       [ -4.29069942,  -2.62592409,  -1.63535628, ...,   1.23919531,
          4.94683404,  -0.64804869]])

Obtenemos un conjunto de aprendizaje con 135 columnas *(15 filas * 9 columnas)*

In [193]:
train.shape

(46900, 135)

#### Conjunto de Test

In [194]:
test = mapa.predict(test_data)
test

array([[ 0.26716462, -1.16621821, -1.51097821, ..., -1.16953037,
         0.33224332, -1.48017279],
       [-0.19533257, -1.33039494, -0.38224758, ...,  1.53238102,
        -1.32505378,  4.20364452],
       [ 3.14460384,  4.10009752, -3.67776665, ...,  0.43988146,
        -3.30925697,  3.95866123],
       ...,
       [ 1.24763835,  0.28271729, -3.77575677, ..., -2.3557496 ,
        -2.68690506,  2.77967877],
       [-0.94001418,  0.8170134 , -2.90112635, ..., -0.21170029,
        -1.55346173,  8.00656603],
       [ 1.79205457,  2.2203157 , -2.71833431, ...,  0.51110464,
        -1.44685203,  5.27386259]])

In [195]:
test.shape

(23100, 135)

### Creacion MLP (135x60x10)

Como se nos indica, debemos crear un MLP con una capa de entrada formada por 135 neuronas, una capa oculta de 60 neuronas y una capa de salida de 10 neuronas.

In [196]:
from sklearn.neural_network import MLPClassifier

In [197]:
clf = MLPClassifier(hidden_layer_sizes=(60,), activation='relu', solver='adam', random_state=1)

In [198]:
train.shape, neuron_labels_train.shape

((46900, 135), (46900, 1))

In [199]:
clf.fit(train, neuron_labels_train)

  return f(*args, **kwargs)


MLPClassifier(hidden_layer_sizes=(60,), random_state=1)

In [200]:
y_predict = clf.predict(test)

In [201]:
y_predict.shape

(23100,)

In [202]:
accuracy_score(test_label, y_predict) ## tiene que dar sobre 0.967

0.7006060606060606