In [13]:
from sklearn.datasets import load_iris, fetch_openml
from sklearn.preprocessing import MinMaxScaler, normalize
from sklearn.model_selection import train_test_split
from scipy.spatial.distance import minkowski, cosine
from sklearn.metrics import accuracy_score
from collections import Counter
import numpy as np
import math
import random

In [14]:
X, Y = load_iris(return_X_y=True)
X = MinMaxScaler().fit_transform(X)

In [15]:
class Neuron:
    def __init__(self, size, x, y):
        self.weight = np.array([random.uniform(-1, 1) for i in range(size)]).reshape(1,-1)
        self.x = x
        self.y = y
        self.label = None
        self.wins = Counter()
        self.active = True

    def predict(self, data):
        return cosine(data, self.weight)

class SOM:
    def __init__(self, rows, columns, size):
        self.network = list()
        for i in range(rows):
            for j in range(columns):
                self.network.append(Neuron(size=size, x=i, y=j))
                
    def fit(self, X, epochs, radius, alpha0):
        alpha = alpha0
        for t in range(epochs):
            D = np.copy(X)
            np.random.shuffle(D)
            for data in D:
                l = map(lambda x: x.predict(data), self.network)
                l = list(l)
                winner = self.network[np.argmax(l)]
                for neuron in self.network:
                    if winner.x-radius < neuron.x < winner.x+radius and winner.y-radius < neuron.y < winner.y+radius:
                        #p = neuron.weight+alpha*data
                        #neuron.weight = p/np.linalg.norm(p)
                        #neuron.weight += normalize(alpha*(data-neuron.weight), norm="max")
                        neuron.weight += alpha*(data-neuron.weight)
            radius -= 1
            if radius == -1:
                radius == 0
            alpha = alpha0 / (1+(t/len(D)))
    def neuron_labeling(self, X, Y):
        for neuron in self.network:
            l = map(neuron.predict, X)
            l = list(l)
            neuron.label = Y[np.argmax(l)]
            
    def mode_labeling(self, X, Y):
        for i, instance in enumerate(X):
            l = map(lambda x: x.predict(instance), filter(lambda x: x.active, self.network))
            l = list(l)
            winner = self.network[np.argmax(l)]
            winner.wins[Y[i]] += 1
            winner.label = winner.wins.most_common()[0][0]
            if len(winner.wins.keys()) > 1:
                winner.active = True
            
            
    def predict(self, X):
        output = np.zeros((X.shape[0],))
        for i,instance in enumerate(X):
            l = map(lambda x: x.predict(instance), filter(lambda x: x.active, self.network))
            l = list(l)
            output[i] = self.network[np.argmax(l)].label
        return output
            

In [16]:
X_train, X_test, Y_train, Y_test= train_test_split(X, Y, test_size=0.33, random_state=0, stratify=Y)

In [17]:
som = SOM(12, 8, 4)
som.fit(X_train, 100, 4, 0.5)
som.mode_labeling(X_train, Y_train)
Y_predict = som.predict(X_test)

In [18]:
np.sum(Y_predict == Y_test)/Y_test.shape[0]

0.82

In [19]:
# MNIST
X, Y = fetch_openml("mnist_784", return_X_y=True)
X = MinMaxScaler().fit_transform(X)

In [20]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=10000, random_state=0, stratify=Y)

In [None]:
som = SOM(12, 8, 784)
som.fit(X_train, 10, 4, 0.5)
som.mode_labeling(X_train, Y_train)
Y_predict = som.predict(X_test)
print(accuracy_score(Y_predict, Y_test, normalize=True))
som = SOM(12, 8, 784)
som.fit(X_train, 10, 4, 0.5)
som.neuron_labeling(X_train, Y_train)
Y_predict = som.predict(X_test)
print(accuracy_score(Y_predict, Y_test, normalize=True))

Los resultados con Iris dan un 25% de acierto