# PRACTICA 5 - ANDRÉS CABERO

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.datasets import fetch_openml
from sklearn.utils import check_array
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
from scipy.stats import mode

In [2]:
datos = fetch_openml("mnist_784")

In [3]:
X = check_array(datos['data'], dtype="float", order='F')
Y = datos["target"]
X.shape

(70000, 784)

In [4]:
#Normalización extendida
X = normalize(np.c_[X, np.ones(X.shape[0])])
X.shape

(70000, 785)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=1/7)

In [6]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(60000, 785) (10000, 785) (60000,) (10000,)


In [7]:
class SOM:
    
    def __init__(self, shape, dim):
        
        self.dim = dim
        self.shape = shape
        self.rows = self.shape[0]
        self.columns = self.shape[1]
        self.size = self.shape[0]*self.shape[1]
        self.neurons = [normalize(np.random.random((1, dim)) - 0.5) for _ in range(self.size)]
        self.labels = []
        self.trained = False
        self.train_epochs = None
        self.train_radious = None
        self.train_alpha = None
        self.labeling = "None"
        
    def __repr__(self):
        
        return "<SOM shape:%s dim:%s fit:%s labeling:%s>" % (self.shape, self.dim, self.trained, self.labeling)

    def __str__(self):
        
        if self.trained:
            
            return "SOM (Self-organizing map) shape: %s, dim: %s, fit: %s (epochs: %s, radious: %s, alpha: %s), labeling: %s." % (self.shape, self.dim, self.trained, self.train_epochs, self.train_radious, self.train_alpha, self.labeling)
        
        return "SOM (Self-organizing map) shape: %s, dim: %s, fit: %s, labeling: %s." % (self.shape, self.dim, self.trained, self.labeling)
        
    def fit(self, data, epochs=1, radious=1, initial_alpha=1):

        self.train_epochs = epochs
        self.train_radious = radious
        self.train_alpha = initial_alpha
        
        #P = Número de iteraciones en una época
        p = data.shape[0]
        t=0
        #Para cada ÉPOCA
        for e in range(epochs):
            
            #Para cada MUESTRA
            for d in data:
                
                alpha = initial_alpha/(1.0+t/p)
                
                pred = self.predict(d.reshape(1,-1))
                
                r_winner = int(pred[0]/self.columns)
                c_winner = pred[0]%self.columns
                
                for r in range(r_winner-radious, r_winner+radious+1):
                    
                    r = r%self.rows
                    
                    for c in range(c_winner-radious, c_winner+radious+1):
                    
                        c = c%self.columns
                        
                        self.neurons[(r*self.columns)+c] = normalize(self.neurons[(r*self.columns)+c]+(alpha*d.reshape(1,-1)))

                t+=1

            print(t, alpha, radious)
                
            if radious > 0:
                radious-=1    
            
            if alpha == 0:
                break
            
        self.trained = True

        
    def label_neurons(self, data, target):
        
        self.labels = [target[np.argmax(data @ ni.T)] for ni in self.neurons]
        
        self.labeling = "Neurons"
            
    def label_mode(self, data, target):
        
        self.labels = [[] for _ in self.neurons]
            
        for di in range(data.shape[0]):

            pred = self.predict(data[di].reshape(1,-1))

            self.labels[pred[0]].append(target[di])
        
        self.labels = [mode(i)[0][0] if len(i) > 0  else -1 for i in self.labels]
        
        self.labeling = "Mode"
        
    def predict(self, data):
        
        pred = []
        
        for d in data:
            
            n_winner = -np.inf
            i_winner = None
        
            for ni in range(self.size):

                n_current = d @ self.neurons[ni].T

                if n_current > n_winner:
                    
                    n_winner = n_current
                    i_winner = ni
            
            pred.append(i_winner)
        
        return pred
        
    
    def predict_label(self, data):
        
        return [self.labels[i] for i in self.predict(data)]
        

In [8]:
shape = (12,8)
dim = X_train.shape[1]
epochs = 15
radious = int((8-1)/2)
alpha = 0.5

In [9]:
som = SOM(shape, dim)

In [10]:
som.fit(X_train, epochs, radious, alpha)

60000 0.2500020833506946 3
120000 0.16666759259773667 2
180000 0.12500052083550348 1
240000 0.10000033333444446 0
300000 0.08333356481545781 0
360000 0.07142874149700357 0
420000 0.06250013020860459 0
480000 0.05555565843640452 0
540000 0.05000008333347222 0
600000 0.04545461432517322 0
660000 0.041666724537117415 0
720000 0.038461587771266376 0
780000 0.035714328231343136 0
840000 0.03333337037041152 0
900000 0.031250032552117245 0


## Etiquetado con moda

In [11]:
from sklearn.metrics import accuracy_score

In [12]:
som.label_mode(X_train, y_train)

In [13]:
print(som)

SOM (Self-organizing map) shape: (12, 8), dim: 785, fit: True (epochs: 15, radious: 3, alpha: 0.5), labeling: Mode.


In [14]:
pred = som.predict_label(X_test)

In [15]:
print(accuracy_score(y_test, pred))

0.8921


## Etiquetado con neurona

In [16]:
from sklearn.metrics import accuracy_score

In [17]:
som.label_neurons(X_train, y_train)

In [18]:
print(som)

SOM (Self-organizing map) shape: (12, 8), dim: 785, fit: True (epochs: 15, radious: 3, alpha: 0.5), labeling: Neurons.


In [19]:
pred = som.predict_label(X_test)

In [20]:
print(accuracy_score(y_test, pred))

0.8921
