In [2]:
import numpy as np
from pandas import DataFrame, read_csv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import minmax_scale

In [4]:
datos = read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",header=None)

In [8]:
X = minmax_scale(datos.iloc[:,1:])
Y = np.array(datos.iloc[:,0])

In [11]:
type(X),type(Y),X.shape,Y.shape

(numpy.ndarray, numpy.ndarray, (178, 13), (178,))

In [12]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=1/3,stratify=Y,random_state=0)

In [13]:
from sklearn.cluster import KMeans
kmedias = KMeans(n_clusters=6).fit(X_train)
centros = kmedias.cluster_centers_

In [14]:
centros

array([[0.33216374, 0.63658322, 0.55496138, 0.58476518, 0.28743961,
        0.59923372, 0.52344116, 0.44025157, 0.56642131, 0.15822905,
        0.35953026, 0.66992267, 0.14098906],
       [0.30797697, 0.16606966, 0.48128342, 0.51449742, 0.24592391,
        0.39881466, 0.34236551, 0.40625   , 0.37569006, 0.14193153,
        0.46392276, 0.59375   , 0.16614658],
       [0.77539474, 0.22401186, 0.57647059, 0.31649485, 0.37880435,
        0.69465517, 0.62004219, 0.26886792, 0.57413249, 0.44001706,
        0.50406504, 0.63461538, 0.70139087],
       [0.50626566, 0.45586298, 0.52126305, 0.48576338, 0.28416149,
        0.23809524, 0.09694595, 0.71069182, 0.17425267, 0.31244921,
        0.25783972, 0.18367347, 0.22124856],
       [0.63634868, 0.5444664 , 0.59959893, 0.57313144, 0.31725543,
        0.28340517, 0.1119462 , 0.60966981, 0.32492114, 0.71096416,
        0.10670732, 0.14010989, 0.25953994],
       [0.65223684, 0.21610672, 0.5868984 , 0.39871134, 0.39130435,
        0.6012069 , 0.50685

# Matriz de distancias - SIGMA

In [15]:
from sklearn.metrics.pairwise import euclidean_distances
distancias = euclidean_distances(centros)
sigma = sum(distancias)/(centros.shape[0]-1)

In [16]:
sigma

array([0.85810088, 0.79314121, 0.94171946, 0.88927048, 0.97776535,
       0.76089675])

# Salida capa OCULTA datos entrenamiento

In [17]:
X_mlp_train = np.zeros((X_train.shape[0], centros.shape[0]), dtype=float)
for i in range(X_train.shape[0]):
    for j in range(centros.shape[0]):
        X_mlp_train[i][j] = np.exp(-sum((X_train[i]-centros[j])**2)/(2.0*(sigma[j]**2)))

# Salida capa OCULTA datos test

In [18]:
X_mlp_test = np.zeros((X_test.shape[0], centros.shape[0]), dtype=float)
for i in range(X_test.shape[0]):
    for j in range(centros.shape[0]):
        X_mlp_test[i][j] = np.exp(-sum((X_test[i]-centros[j])**2)/(2.0*(sigma[j]**2)))

# Binarización de las salidas

In [19]:
from sklearn.preprocessing import LabelBinarizer
etiquetas = LabelBinarizer()
etiquetas.fit([1,2,3])

LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)

In [20]:
Ymlp = etiquetas.transform(Y)
Y_mlp_train = etiquetas.transform(Y_train)
Y_mlp_test = etiquetas.transform(Y_test)

In [21]:
X_mlp_train.shape, X_mlp_test.shape, Y_mlp_train.shape, Y_mlp_test.shape

((118, 6), (60, 6), (118, 3), (60, 3))

# Adaline con SGD

In [23]:
from sklearn.linear_model import SGDRegressor
adaline = SGDRegressor(max_iter=5000)

In [24]:
Yp_test = np.zeros((Y_test.shape[0],3))

In [25]:
for neurona_salida in range(3):
    adaline.fit(X_mlp_train,Y_mlp_train[:,neurona_salida])
    Yp_test[:,neurona_salida] = adaline.predict(X_mlp_test)

In [26]:
aciertos = sum(np.argmax(Yp_test, axis=1) == np.argmax(Y_mlp_test,axis=1)) / Y_mlp_test.shape[0]

In [27]:
print("Tasa de aciertos =",aciertos*100, "%")

Tasa de aciertos = 100.0 %
