In [268]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity
from scipy.spatial.distance import pdist, squareform
import numpy as np
import pandas as pd

In [271]:
iris = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None,
                   names=['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width', 'Species'])

In [270]:
def calculo_indicadores(predicciones, etiquetas):
    matriz_confusion = confusion_matrix(etiquetas, predicciones, labels=np.unique(etiquetas))
    
    accuracy = accuracy_score(etiquetas, predicciones)
    
    precision = precision_score(etiquetas, predicciones, average='weighted', zero_division=0)
    
    recall = recall_score(etiquetas, predicciones, average='weighted', zero_division=0)
    
    f1 = f1_score(etiquetas, predicciones, average='weighted', zero_division=0)

    indicadores = {"Matriz de confusion": matriz_confusion,"Accuracy": accuracy, "Precision": precision, "Recall": recall, "F1 Score": f1}
    return indicadores

## _k_ Nearest Neighbour

### Distancias euclideas

In [272]:
X = iris[['Sepal.Length', 'Petal.Length']]
y = iris['Species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)

In [273]:
param_grid = {'n_neighbors': list(range(1, 20))}

knnEucli = KNeighborsClassifier()
grid_searchEucli = GridSearchCV(knnEucli, param_grid, cv=5)

In [274]:
grid_searchEucli.fit(X_train, y_train)

In [275]:
best_modelEucli = grid_searchEucli.best_estimator_

predictionsEucli = best_modelEucli.predict(X_test)
indicadoresEucli = calculo_indicadores(predictionsEucli, y_test)

print("Best k:", best_modelEucli.get_params()['n_neighbors'])
print("Matriz de Confusion:\n", indicadoresEucli["Matriz de confusion"])
print("Accuracy:", indicadoresEucli["Accuracy"])
print("Precision:", indicadoresEucli["Precision"])
print("Recall:", indicadoresEucli["Recall"])
print("F1 Score:", indicadoresEucli["F1 Score"])

Best k: 12
Matriz de Confusion:
 [[ 9  0  0]
 [ 0  8  0]
 [ 0  2 11]]
Accuracy: 0.9333333333333333
Precision: 0.9466666666666667
Recall: 0.9333333333333333
F1 Score: 0.9342592592592592


### Distancia de Mahattan

In [276]:
knnCityBlock = KNeighborsClassifier(metric='manhattan')

grid_searchCityBlock = GridSearchCV(knnCityBlock, param_grid, cv=5)

grid_searchCityBlock.fit(X_train, y_train)

best_modelCityBlock = grid_searchCityBlock.best_estimator_

predictionsCityBlock = best_modelCityBlock.predict(X_test)
indicadoresCityBlock = calculo_indicadores(predictionsCityBlock, y_test)

accuracyCityBlock = accuracy_score(y_test, predictionsCityBlock)
precisionCityBlock = precision_score(y_test, predictionsCityBlock, average='weighted')
recallCityBlock = recall_score(y_test, predictionsCityBlock, average='weighted')
f1CityBlock = f1_score(y_test, predictionsCityBlock, average='weighted')

print("Best k:", best_modelCityBlock.get_params()['n_neighbors'])
print("Matriz de Confusion:\n", indicadoresCityBlock["Matriz de confusion"])
print("Accuracy:", accuracyCityBlock)
print("Precision:", precisionCityBlock)
print("Recall:", recallCityBlock)
print("F1 Score:", f1CityBlock)

Best k: 11
Matriz de Confusion:
 [[ 9  0  0]
 [ 0  8  0]
 [ 0  2 11]]
Accuracy: 0.9333333333333333
Precision: 0.9466666666666667
Recall: 0.9333333333333333
F1 Score: 0.9342592592592592


### Distancia de Mahalanobis

In [277]:
knnMahala = KNeighborsClassifier(metric='mahalanobis', metric_params={'V': np.cov(X_train, rowvar=False)})

grid_searchMahala = GridSearchCV(knnMahala, param_grid, cv=5)

grid_searchMahala.fit(X_train, y_train)

best_modelMahala = grid_searchMahala.best_estimator_

predictionsMahala = best_modelMahala.predict(X_test)
indicadoresMahala = calculo_indicadores(predictionsMahala, y_test)

accuracyMahala = accuracy_score(y_test, predictionsMahala)
precisionMahala = precision_score(y_test, predictionsMahala, average='weighted')
recallMahala = recall_score(y_test, predictionsMahala, average='weighted')
f1Mahala = f1_score(y_test, predictionsMahala, average='weighted')

print("Best k:", best_modelMahala.get_params()['n_neighbors'])
print("Matriz de Confusion:\n", indicadoresMahala["Matriz de confusion"])
print("Accuracy:", accuracyMahala)
print("Precision:", precisionMahala)
print("Recall:", recallMahala)
print("F1 Score:", f1Mahala)

Best k: 8
Matriz de Confusion:
 [[ 9  0  0]
 [ 0  8  0]
 [ 0  1 12]]
Accuracy: 0.9666666666666667
Precision: 0.9703703703703703
Recall: 0.9666666666666667
F1 Score: 0.9669803921568628


### Distancia de Chebychev

In [278]:
knn_chebyshev = KNeighborsClassifier(metric='chebyshev')

grid_search_chebyshev = GridSearchCV(knn_chebyshev, param_grid, cv=5)

grid_search_chebyshev.fit(X_train, y_train)

best_model_chebyshev = grid_search_chebyshev.best_estimator_

predictions_chebyshev = best_model_chebyshev.predict(X_test)
indicadores_chebyshev = calculo_indicadores(predictions_chebyshev, y_test)

accuracy_chebyshev = accuracy_score(y_test, predictions_chebyshev)
precision_chebyshev = precision_score(y_test, predictions_chebyshev, average='weighted')
recall_chebyshev = recall_score(y_test, predictions_chebyshev, average='weighted')
f1_chebyshev = f1_score(y_test, predictions_chebyshev, average='weighted')

print("Best k:", best_model_chebyshev.get_params()['n_neighbors'])
print("Matriz de Confusion:\n", indicadores_chebyshev["Matriz de confusion"])
print("Accuracy:", accuracy_chebyshev)
print("Precision:", precision_chebyshev)
print("Recall:", recall_chebyshev)
print("F1 Score:", f1_chebyshev)

Best k: 12
Matriz de Confusion:
 [[ 9  0  0]
 [ 0  8  0]
 [ 0  2 11]]
Accuracy: 0.9333333333333333
Precision: 0.9466666666666667
Recall: 0.9333333333333333
F1 Score: 0.9342592592592592


### Distnacias de Canberra

In [279]:
canberra_distance = pairwise_distances(X_train, X_train, metric='canberra')

knn_canberra = KNeighborsClassifier(metric='precomputed')

grid_search_canberra = GridSearchCV(knn_canberra, param_grid, cv=5)

grid_search_canberra.fit(canberra_distance, y_train)

best_model_canberra = grid_search_canberra.best_estimator_

canberra_distance_test = pairwise_distances(X_test, X_train, metric='canberra')

predictions_canberra = best_model_canberra.predict(canberra_distance_test)
indicadores_canberra = calculo_indicadores(predictions_chebyshev, y_test)

accuracy_canberra = accuracy_score(y_test, predictions_canberra)
precision_canberra = precision_score(y_test, predictions_canberra, average='weighted')
recall_canberra = recall_score(y_test, predictions_canberra, average='weighted')
f1_canberra = f1_score(y_test, predictions_canberra, average='weighted')

print("Best k:", best_model_canberra.get_params()['n_neighbors'])
print("Matriz de Confusion:\n", indicadores_canberra["Matriz de confusion"])
print("Accuracy:", accuracy_canberra)
print("Precision:", precision_canberra)
print("Recall:", recall_canberra)
print("F1 Score:", f1_canberra)

Best k: 13
Matriz de Confusion:
 [[ 9  0  0]
 [ 0  8  0]
 [ 0  2 11]]
Accuracy: 0.9666666666666667
Precision: 0.9703703703703703
Recall: 0.9666666666666667
F1 Score: 0.9669803921568628
