In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from numpy import random
from sklearn.model_selection import StratifiedKFold
from sklearn import preprocessing
from numpy import matlib
import qgrid

In [2]:
X = np.genfromtxt('Data_OpenCV/Features_X.csv',delimiter=',')
Y = np.genfromtxt('Data_OpenCV/Y_labels.csv',delimiter=',')

In [3]:
print('Tamaño X: ',X.shape)
print('Tamaño Y: ', Y.shape)

Tamaño X:  (13493, 150)
Tamaño Y:  (13493,)


In [4]:
def K_vecinos(vecinos):
    Folds = 4
    random.seed(19680801)
    EficienciaTrain = np.zeros(Folds)
    EficienciaVal = np.zeros(Folds)
    Error = np.zeros(Folds)
    skf = StratifiedKFold(n_splits=Folds)
    j = 0
        
    for train, test in skf.split(X, Y):
        Xtrain = X[train,:]
        Ytrain = Y[train]
        Xtest = X[test,:]
        Ytest = Y[test]
        
        #Se normalizan los datos
        media = np.mean(Xtrain)
        desvia = np.std(Xtrain)
        Xtrain = preprocessing.scale(Xtrain)
        Xtest = (Xtest - np.matlib.repmat(media, Xtest.shape[0], 1))/np.matlib.repmat(desvia, Xtest.shape[0], 1)
        
        
        modelo = KNeighborsClassifier(n_neighbors=vecinos)
        modelo.fit(Xtrain,Ytrain)
        
        #Validación con muestras de entrenamiento
        Ytrain_pred = modelo.predict(Xtrain)
        
        #Validación con las muestras de test    
        Yest = modelo.predict(Xtest)
        
        #Evaluamos las predicciones del modelo con los datos de test
        EficienciaTrain[j] = np.mean(Ytrain_pred == Ytrain)
        EficienciaVal[j] = np.mean(Yest == Ytest)
        j += 1
        
    print("Modelo entrenado con "+ str(vecinos)+ " vecinos")
    
    return np.mean(EficienciaVal), np.std(EficienciaVal),np.mean(EficienciaTrain),np.std(EficienciaTrain)    

In [5]:
import qgrid

randn = np.random.randn
df_types = pd.DataFrame({
    'Numero de vecinos' : pd.Series([1, 2, 3, 4, 5, 6, 7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39])})
df_types["Eficiencia en validacion"] = ""
df_types["IC Eficiencia en validacion"] = ""
df_types["Eficiencia en entrenamiento"] = ""
df_types["IC Eficiencia en entrenamiento"] = ""
df_types.set_index(['Numero de vecinos'], inplace=True)


In [6]:
i = 0
for vecinos in df_types.index:   
    eficiencia_val, ic_val,error, std_error = K_vecinos(vecinos)
    df_types["Eficiencia en validacion"][vecinos]=str(eficiencia_val)
    df_types["IC Eficiencia en validacion"][vecinos] = str(ic_val)
    df_types["Eficiencia en entrenamiento"][vecinos] = str(error)
    df_types["IC Eficiencia en entrenamiento"][vecinos] = str(std_error)
    i=i+1

Modelo entrenado con 1 vecinos
Modelo entrenado con 2 vecinos
Modelo entrenado con 3 vecinos
Modelo entrenado con 4 vecinos
Modelo entrenado con 5 vecinos
Modelo entrenado con 6 vecinos
Modelo entrenado con 7 vecinos
Modelo entrenado con 9 vecinos
Modelo entrenado con 11 vecinos
Modelo entrenado con 13 vecinos
Modelo entrenado con 15 vecinos
Modelo entrenado con 17 vecinos
Modelo entrenado con 19 vecinos
Modelo entrenado con 21 vecinos
Modelo entrenado con 23 vecinos
Modelo entrenado con 25 vecinos
Modelo entrenado con 27 vecinos
Modelo entrenado con 29 vecinos
Modelo entrenado con 31 vecinos
Modelo entrenado con 33 vecinos
Modelo entrenado con 35 vecinos
Modelo entrenado con 37 vecinos
Modelo entrenado con 39 vecinos


In [7]:
qgrid_widget = qgrid.show_grid(df_types, show_toolbar=False)
qgrid_widget
qgrid_widget.get_changed_df()

Unnamed: 0_level_0,Eficiencia en validacion,IC Eficiencia en validacion,Eficiencia en entrenamiento,IC Eficiencia en entrenamiento
Numero de vecinos,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,0.3020793393284951,0.0093757549438774,1.0,0.0
2,0.3081601121186894,0.0087409760665373,0.6526100997603721,0.0032512049712774
3,0.3263200792874087,0.0092988780615853,0.6045851136675711,0.0043856931695776
4,0.3240942550102316,0.0063067248629224,0.5712346660158023,0.0054731453830492
5,0.3250570645397755,0.0080826669256914,0.5419110447623573,0.0059492883744488
6,0.3409181211872806,0.0086746066464328,0.5216043857321897,0.0059173250955719
7,0.3434383095927317,0.0113215678425917,0.5084614923222177,0.005979090600177
9,0.3528518900996911,0.0144865375303545,0.4858816127619786,0.0038490716395136
11,0.3534449435779678,0.0110137620150833,0.475678970098119,0.0028679963923487
13,0.3593735497085867,0.0089339233134073,0.4655747656843094,0.0015775572637665


In [8]:
df_types

Unnamed: 0_level_0,Eficiencia en validacion,IC Eficiencia en validacion,Eficiencia en entrenamiento,IC Eficiencia en entrenamiento
Numero de vecinos,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,0.3020793393284951,0.0093757549438774,1.0,0.0
2,0.3081601121186894,0.0087409760665373,0.6526100997603721,0.0032512049712774
3,0.3263200792874087,0.0092988780615853,0.6045851136675711,0.0043856931695776
4,0.3240942550102316,0.0063067248629224,0.5712346660158023,0.0054731453830492
5,0.3250570645397755,0.0080826669256914,0.5419110447623573,0.0059492883744488
6,0.3409181211872806,0.0086746066464328,0.5216043857321897,0.0059173250955719
7,0.3434383095927317,0.0113215678425917,0.5084614923222177,0.005979090600177
9,0.3528518900996911,0.0144865375303545,0.4858816127619786,0.0038490716395136
11,0.3534449435779678,0.0110137620150833,0.475678970098119,0.0028679963923487
13,0.3593735497085867,0.0089339233134073,0.4655747656843094,0.0015775572637665
