In [89]:
# Imports
import pandas as pd
import numpy as np
from sklearn.preprocessing import normalize, StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split # Hold-Out
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

#### Clase Neurona

In [30]:
class neurona(object):
    def __init__(self, f=0, c=0, dim=0):
        self.c = c
        self.f = f
        self.dim = dim
        
        # Inicializa los pesos aleatoriamente entre [-0.5, 0.5]
        self.w = normalize(0.5 - np.random.rand(dim).reshape(1,-1))
        
        # self.label_winner = []
            
    def predict(self, inputs):
        # Calcula la salida de una neurona ante una o más entradas. "inputs" puede ser un vector o una matriz 2D
        return inputs @ self.w.T
    
    def fit(self, input, alfa=1):
        # ajusta los pesos de una neurona (w) para aproximarlos a una entrada (input)
        self.w = normalize(self.w + (alfa*input))
            
    def neuron_labeling(self, inputs, target):
        # etiquetado por neuronas. Se le pasa la lista de entradas y la etiqueta (target) de cada una de esas muestras.
        # Devuelve la etiqueta de la ganadora
        Y = inputs @ self.w.T
        self.label = target[np.argmax(Y)]
        return self.label

#### Clase SOM

In [31]:
class som():
    
    def __init__(self, filas=1, columnas=1, dim=1):
        self.lista = []
        self.filas = filas
        self.columnas = columnas
        self.dim = dim
        self.labels = []
        # Considera que un mapa rectangular es una lista de objetos "neurona", que viene localizado por sus atributos "fila" y "columna"
        for fila in range(self.filas):
            for columna in range(self.columnas):
                self.lista.append(neurona(f=fila, c=columna, dim=dim))
                
    def fit(self, inputs, max_epochs=1, init_radious=0, init_alfa=1):
        # método similar a otros algoritmo de ML. Recibe las entradas, el radio inicial, el factor de apendizaje inicial,
        # el máximo de épocas y devuelve los pesos ajustados
        self.radious = init_radious
        self.alfa = init_alfa
        t = 0
        P = inputs.shape[0]
        for epoch in range(max_epochs):
            for x in inputs:
                self.alfa = init_alfa/(1.0 + float(t/P))
                i_gana, y_gana = -1, float('-inf')
                for i in range(self.filas*self.columnas):
                    y_predict = self.lista[i].predict(x.reshape(1,-1))
                    if y_predict > y_gana:
                        y_gana = y_predict
                        i_gana = i
                f_gana = int(i_gana / self.columnas)
                c_gana = i_gana % self.columnas
                
                # Conjunto de vecinas para un radious
                for f in range(f_gana - self.radious, f_gana + self.radious+1):
                    if f < 0:
                        row = self.filas + f
                    else:
                        if f > self.filas-1:
                            row = f % self.filas
                        else:
                            row = f

                    for c in range(c_gana - self.radious, c_gana + self.radious+1):
                        if c < 0:
                            column = self.columnas + c 
                        else:
                            if c > self.columnas-1:
                                column = c % self.columnas
                            else:
                                column = c
                        self.lista[(row*self.columnas) + column].fit(x.reshape(1,-1), alfa=self.alfa)
                t += 1
                if (t%1000) == 0:
                    print(t, self.radious, "  ", end='')
            if self.radious > 0:
                self.radious -= 1
                            
    def neuron_labeling(self, inputs, target):
        # recorre la lista de neuronas y va llamanado a su metodo de etiquetado para cada neurona
        
        for i in range(self.filas*self.columnas):
            # print(X.shape, self.target.shape)
            self.labels.append(self.lista[i].neuron_labeling(inputs, target))
            # print(self.lista[i].labeling(X, target=y_deseada, etiquetado='neurona'))
 
    def predict(self, inputs):
        # recorre la lista de neuronas y calcula la salida de un conjunto de muestras
        # util para usar la salida del som como entrada a otrso sistemas
        output_list = []
        for x in inputs:
            for i in range(self.filas*self.columnas):
                output_list.append(self.lista[i].predict(x.reshape(1,-1)))
        return np.array(output_list).reshape(inputs.shape[0], -1)    
    
    def label_predict(self, inputs):
        # clasificación de muestras con el etiquetado de cada neurona hecho previamente
        label_list = []
        for x in inputs:
            label_list.append(self.labels[np.argmax(self.predict(x.reshape(1,-1)))])
        return np.array(label_list).reshape(inputs.shape[0], -1)

#### Load Data

In [32]:
data = pd.read_csv('mnist_data.csv')
data.head() #70000 x 784

Unnamed: 0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [110]:
y = pd.read_csv('mnist_target.csv')
y.head() # 70000 × 1

Unnamed: 0,class
0,5
1,0
2,4
3,1
4,9


#### Preprocessing Data

In [111]:
scalar = StandardScaler()
data_std = scalar.fit_transform(data)
data_std

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [112]:
## Reduce data
X = data_std[:1500]
label = y[:1500] # Cogemos de un Dataframa, entonces pasamos a un np
X.shape, label.shape

((1500, 784), (1500, 1))

In [113]:
label = label.values

In [114]:
label.shape

(1500, 1)

#### Conjunto de entrenamiento y conjunto de Prueba

In [115]:
X_train, X_test, y_train, y_test = train_test_split(X, label, test_size=0.33, stratify=label)

In [116]:
X_train.shape, y_train.shape # Train

((1005, 784), (1005, 1))

In [117]:
X_test.shape, y_test.shape # Test

((495, 784), (495, 1))

#### Creacion del SOM

In [118]:
SOM = som(filas=15, columnas=9, dim=data.shape[1])

In [119]:
# Entranamiento red SOM
SOM.fit(inputs=X_train, max_epochs=10, init_radious=9, init_alfa=10)

1000 9   2000 8   3000 7   4000 6   5000 5   6000 4   7000 3   8000 2   9000 1   10000 0   

In [120]:
### Etiquetamos la neuronas de la red
SOM.neuron_labeling(X_train, y_train)

In [121]:
# Obtenemos las etiquetas de las neuronas
etiquetas = SOM.label_predict(X_test)
etiquetas.shape

(495, 1)

In [122]:
# Obtenemos la tasa de acierto por el etiqueta de neuronas
accuracy_score(y_test, etiquetas)

0.6444444444444445

In [123]:
# Calculamos los nuevos conjuntos de Train y Test a partir de la salida del SOM
train = SOM.predict(X_train)
test = SOM.predict(X_test)

In [124]:
train.shape, test.shape

((1005, 135), (495, 135))

In [125]:
# Normalizar los conjuntos de datos
escalar = MinMaxScaler()
escalar.fit(np.vstack((train, test))) # Concatenamos los arrays en una unica fila
train_scd = escalar.transform(train)
test_scd = escalar.transform(test)

#### Creacion MLP (135x60x10)

In [126]:
mlp = MLPClassifier(hidden_layer_sizes=(60,), verbose=True, max_iter=3000)

In [127]:
mlp.fit(train_scd, y_train)

  return f(*args, **kwargs)


Iteration 1, loss = 2.31954688
Iteration 2, loss = 2.25377734
Iteration 3, loss = 2.22624653
Iteration 4, loss = 2.19514428
Iteration 5, loss = 2.16273627
Iteration 6, loss = 2.13683963
Iteration 7, loss = 2.10052189
Iteration 8, loss = 2.06751976
Iteration 9, loss = 2.03998487
Iteration 10, loss = 2.00595884
Iteration 11, loss = 1.96704525
Iteration 12, loss = 1.93473895
Iteration 13, loss = 1.89967089
Iteration 14, loss = 1.86470185
Iteration 15, loss = 1.83278798
Iteration 16, loss = 1.80085753
Iteration 17, loss = 1.76751443
Iteration 18, loss = 1.73122769
Iteration 19, loss = 1.70442445
Iteration 20, loss = 1.67562248
Iteration 21, loss = 1.64752263
Iteration 22, loss = 1.62050133
Iteration 23, loss = 1.58914455
Iteration 24, loss = 1.56299006
Iteration 25, loss = 1.53537523
Iteration 26, loss = 1.51585792
Iteration 27, loss = 1.48746562
Iteration 28, loss = 1.46003834
Iteration 29, loss = 1.43140274
Iteration 30, loss = 1.41396424
Iteration 31, loss = 1.38984602
Iteration 32, los

Iteration 264, loss = 0.45775758
Iteration 265, loss = 0.46088729
Iteration 266, loss = 0.45869013
Iteration 267, loss = 0.45676679
Iteration 268, loss = 0.45697584
Iteration 269, loss = 0.45456583
Iteration 270, loss = 0.44988738
Iteration 271, loss = 0.44789374
Iteration 272, loss = 0.44998952
Iteration 273, loss = 0.45091271
Iteration 274, loss = 0.45111017
Iteration 275, loss = 0.44821556
Iteration 276, loss = 0.44926036
Iteration 277, loss = 0.45147388
Iteration 278, loss = 0.45021961
Iteration 279, loss = 0.44768054
Iteration 280, loss = 0.44801662
Iteration 281, loss = 0.44413771
Iteration 282, loss = 0.44272025
Iteration 283, loss = 0.44456851
Iteration 284, loss = 0.44552389
Iteration 285, loss = 0.45509359
Iteration 286, loss = 0.45027448
Iteration 287, loss = 0.44842749
Iteration 288, loss = 0.44456700
Iteration 289, loss = 0.44600672
Iteration 290, loss = 0.43935821
Iteration 291, loss = 0.43961327
Iteration 292, loss = 0.44467007
Iteration 293, loss = 0.44158360
Iteration 

MLPClassifier(hidden_layer_sizes=(60,), max_iter=3000, verbose=True)

In [128]:
mlp.predict(test_scd)

array([8, 5, 5, 4, 9, 1, 9, 8, 1, 3, 1, 6, 4, 9, 0, 1, 9, 0, 0, 6, 7, 1,
       7, 2, 7, 5, 4, 7, 1, 1, 4, 9, 3, 5, 5, 8, 8, 9, 6, 5, 1, 1, 7, 7,
       7, 6, 5, 0, 9, 6, 9, 7, 1, 6, 8, 1, 1, 0, 9, 7, 7, 6, 5, 4, 1, 5,
       5, 3, 5, 0, 6, 3, 8, 2, 8, 8, 7, 9, 4, 8, 2, 4, 2, 9, 0, 1, 4, 7,
       2, 4, 7, 7, 8, 9, 1, 6, 0, 1, 3, 6, 2, 7, 6, 2, 1, 6, 0, 5, 7, 8,
       5, 6, 0, 2, 3, 7, 5, 4, 0, 1, 3, 5, 7, 3, 3, 3, 7, 1, 5, 2, 9, 9,
       6, 9, 6, 6, 8, 1, 9, 6, 1, 1, 9, 1, 6, 9, 9, 7, 9, 3, 5, 6, 5, 2,
       4, 1, 9, 5, 3, 7, 9, 9, 0, 8, 0, 4, 0, 8, 2, 5, 3, 3, 5, 6, 7, 3,
       9, 1, 7, 5, 9, 2, 4, 7, 9, 5, 4, 9, 0, 0, 7, 5, 2, 4, 9, 8, 6, 9,
       9, 3, 6, 2, 1, 9, 6, 0, 0, 1, 2, 9, 9, 9, 0, 1, 4, 3, 1, 0, 2, 1,
       9, 4, 6, 6, 6, 6, 2, 8, 1, 1, 8, 3, 2, 9, 4, 8, 3, 0, 3, 5, 4, 0,
       0, 1, 0, 1, 5, 2, 3, 6, 4, 5, 2, 8, 6, 8, 3, 1, 9, 2, 0, 4, 0, 2,
       3, 5, 1, 7, 1, 4, 9, 8, 3, 2, 9, 7, 8, 1, 8, 4, 9, 7, 8, 9, 7, 4,
       7, 2, 1, 3, 4, 1, 6, 1, 5, 0, 0, 8, 1, 3, 8,

In [129]:
accuracy_score(y_test, mlp.predict(test_scd))

0.8363636363636363