In [60]:
# configuración para notebook con instalación LOCAL
FUENTES_DIR  = '../Fuentes'     # carpeta donde se encuentran archivos .py auxiliares
DATOS_DIR    = '../Datos/'      # carpeta donde se encuentran los datasets
MODELOS_DIR  = '../Modelos/'    # carpeta donde se encuentran los modelos
ARCHIVOs_DIR = '../Archivos/'   # carpeta recuperar o copiar archivos

# agrega ruta de busqueda donde tenemos archivos .py
import sys
sys.path.append(FUENTES_DIR)

In [61]:
import numpy as np
import pandas as pd
from matplotlib import pylab as plt

from grafica import *
from ClassRNMulticlase import RNMulticlase

In [62]:
import chardet


nombre_archivo = DATOS_DIR + 'Vinos.csv' 

#-- detectando la codificación de caracteres usada ----
with open(nombre_archivo, 'rb') as f:
    result = chardet.detect(f.read())  # or readline if the file is large

# recupera el archivo en un objeto dataframe de pandas utilizando la codificación detectada
df = pd.read_csv(nombre_archivo, sep=";")

# visualiza el dataframe

df

Unnamed: 0,Class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315,Proline
0,1,14.23,1.71,2.43,15.6,127,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.20,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050
2,1,13.16,2.36,2.67,18.6,101,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.50,16.8,113,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,3,13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740
174,3,13.40,3.91,2.48,23.0,102,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750
175,3,13.27,4.28,2.26,20.0,120,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835
176,3,13.17,2.59,2.37,20.0,120,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840


In [63]:
Y = df['Class']
X = df.drop(columns=['Class'])
Y = pd.get_dummies(Y).astype(int)


In [64]:
X = np.array(X)
Y = np.array(df.iloc[:,0])    # recupera solo la primera columna (es la clase)

In [65]:
#Normalizamos
from sklearn import preprocessing
scaler = preprocessing.StandardScaler()
X = scaler.fit_transform(X)
Y = Y.reshape((-1,1))   # fuerza formato (N,1) sino es (N,)

# los target deben tener formato one hot.
# clase 1 => 100 | clase 2 => 010 | clase 3 => 001
encoder = preprocessing.OneHotEncoder(handle_unknown='ignore')
Y = encoder.fit_transform(Y.reshape(-1,1)).toarray()

In [66]:
from sklearn.model_selection import train_test_split

def ej_doce(funcion, costo, vel_apr):
    CANT_EJECUCIONES = 30
    PORCENTAJES = [0.6, 0.8]
    MAX_ITE = 400
    VELOCIDAD_APR = vel_apr
    FUNC = funcion
    COSTO = costo
    res = []

    for i in PORCENTAJES:
        ejemplos_correctos_train = []
        ejemplos_correctos_test = []
        cant_iteraciones = []

        for p in range (CANT_EJECUCIONES):
            #Primero dividimos los datos en entrenamiento y testeo:
            X_train, X_test, y_train, y_test = train_test_split(X, Y, train_size=i, random_state=None)
           
            #model
            modelo = RNMulticlase(alpha=VELOCIDAD_APR, n_iter=MAX_ITE, FUN=FUNC, COSTO=COSTO)
            #training
            modelo.fit(X_train, y_train)
            #predictions
            y_pred_train = neurona.predict(X_train)
            y_pred_test = neurona.predict(X_test)

            (W, b, iteraciones) = (modelo.w_, modelo.b_, len(modelo.errors_))
            prom_ite = prom_ite + iteraciones
            # efectividad entrenamiento
            efectividad = 100*modelo.accuracy(X_train, y_train)
            
            #sum of the correctly classified
            correct_train = np.sum(y_pred_train == y_train)
            correct_test = np.sum(y_pred_test == y_test)
            #save the info
            ejemplos_correctos_train.append(correct_train)
            ejemplos_correctos_test.append(correct_test)
            cant_iteraciones.append(len(modelo.errors_))

        prom_train = np.mean(ejemplos_correctos_train)
        prom_test = np.mean(ejemplos_correctos_test)
        prom_iter = np.mean(cant_iteraciones)
        desv_iter = np.std(cant_iteraciones)

        res.append({
            'Porcentaje': i,
            'Tasa' : VELOCIDAD_APR,
            'Promedio Correctos Train': prom_train,
            'Promedio Correctos Test': prom_test,
            'Promedio Iteraciones': prom_iter,
            'Desviación Iteraciones': desv_iter
        })

    for r in res:
        print(f"Porcentaje: {r['Porcentaje']}, Tasa: {r['Tasa']}")
        print(f"  Promedio Correctos Train: {r['Promedio Correctos Train']}")
        print(f"  Promedio Correctos Test: {r['Promedio Correctos Test']}")
        print(f"  Promedio Iteraciones: {r['Promedio Iteraciones']} +/- {r['Desviación Iteraciones']}")
        print()

In [67]:
ej_doce("sigmoid", "ECM", 0.1)

[[1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1.

ValueError: operands could not be broadcast together with shapes (106,) (106,3) 