# Entrenamiento de modelo Wine Classification KNN (Vecinos mas cercanos)

Basado en el desarrollo que se realizo en el  [libro](https://databricks-prod-cloudfront.cloud.databricks.com/public/4027ec902e239c93eaaa8714f173bcfc/1365877123506783/3295404153434753/8948153789405631/latest.html), donde se valoraron varios algoritmos, este libro creara el modelo para  Nearest Neighbors


Cambiar la referencia de la clase wine_class a:

- 1 por 0 (variedad A)
- 2 por 1 (variedad B)
- 3 por 2 (variedad C)

In [0]:
import os
import mlflow
import mlflow.sklearn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import mean_squared_error, classification_report,confusion_matrix
import warnings
warnings.filterwarnings('ignore')

In [0]:
"""
Leer los datos y asignar columnas
"""
def read_data():
  file_name = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data"
  df = pd.read_csv(f"{file_name}", sep=",", engine='python')
  COL_NAMES = ["wine_class", "alcohol", "malic_acid", "ash", "alcalinity_of_ash", "magnesium", "total_phenols", "flavanoids", "nonflavanoid_phenols", "proanthocyanins", "color_intensity", "hue", "od280_od315_of_diluted_wines", "proline"]
  df.columns = COL_NAMES  
  return df


In [0]:
# mlflow.sklearn.autolog() requires mlflow 1.11.0 or above.
mlflow.sklearn.autolog()
# With autolog() enabled, all model parameters, a model score, and the fitted model are automatically logged.  
with mlflow.start_run(run_name="training-knn"):
    wine_dataframe = read_data()
    #Asignación de la nueva clase, se reemplazan los valores
    wine_dataframe['wine_class'] = wine_dataframe.wine_class.replace({1: 0, 2: 1, 3: 2})
    X = wine_dataframe.iloc[: , 1:].values
    y = wine_dataframe.iloc[: ,0].values
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20)

    sc=StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.fit_transform(X_test)
    
    run_id = mlflow.active_run().info.run_id

    model = KNeighborsClassifier(n_neighbors=7)
    model.fit(X_train, y_train)
    score = model.score(X_test, y_test)
    y_pred = model.predict(X_test)
    cm = confusion_matrix(y_test,y_pred)
