# Maestría en Maestría en Ciencia de Datos e Inteligencia Artificial
#### 8. Machine Learning and Deep Learning
#### Docente: Msc. Renzo Claure Aracena.

## SELECCION DE MODELOS
### Validacion cruzada

In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC

In [None]:
base = pd.read_csv('cancer.csv', delimiter = ';', decimal=',')

In [None]:
base.head()

In [None]:
y = base['Tipo']
y.value_counts()

In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y) 

In [None]:
X = base.drop(['ID', 'Tipo'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, random_state=0)

In [None]:
clasif = SVC(kernel='linear', C=1)
clasif.fit(X_train, y_train)
clasif.score(X_test, y_test)

In [None]:
print('Validacion Cruzada Entrenamiento (exactitud/acierto):', cross_val_score(clasif, X_train, y_train, cv=5))

In [None]:
cv_score = cross_val_score(clasif, X_train, y_train, cv=5)
cv_score.mean()

In [None]:
from sklearn.metrics import roc_auc_score, f1_score

In [None]:
y_pred = clasif.predict(X_test)
auc_score = roc_auc_score(y_test, y_pred)
print(f'El indicador AUC sobre la base de comprobación = {auc_score:.4f}')

In [None]:
#Obtener distintos Cross Val con métricas distintas
print('Validacion Cruzada ENTRENAMIENTO (AUC):',    cross_val_score(clasif, X_train, y_train, cv=5, scoring='roc_auc'))
print('Validacion Cruzada ENTRENAMIENTO (Recall):', cross_val_score(clasif, X_train, y_train, cv=5, scoring='recall'))
print('Validacion Cruzada ENTRENAMIENTO (Precision):', cross_val_score(clasif, X_train, y_train, cv=5, scoring='precision'))
print('Validacion Cruzada ENTRENAMIENTO (F1):', cross_val_score(clasif, X_train, y_train, cv=5, scoring='f1'))

In [None]:
f1_score(y_test, clasif.predict(X_test))

In [None]:
y.value_counts()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, random_state=0)
clasif = SVC(kernel='linear', C=1, class_weight='balanced')
clasif.fit(X_train, y_train)
clasif.score(X_test, y_test)

In [None]:
print('Validacion Cruzada (exactitud/acierto):', cross_val_score(clasif, X_train, y_train, cv=5))
print('Validacion Cruzada (AUC):',               cross_val_score(clasif, X_train, y_train, cv=5, scoring='roc_auc'))
print('Validacion Cruzada (Recall):',            cross_val_score(clasif, X_train, y_train, cv=5, scoring='recall'))

### Búsqueda GRID

In [None]:
import pandas as pd

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score, recall_score, precision_score, accuracy_score

In [None]:
clasif = SVC(kernel='rbf')  # Modelo a entrenar, ya configurado
grid_val = {'gamma': [0.001, 0.0015, 0.01, 0.1, 1, 5, 10, 50]} #El hyperparametro elegido es gamma

grid_clas_ex = GridSearchCV(clasif, param_grid = grid_val, cv=3)
grid_clas_ex.fit(X_train, y_train)

print('Mejor parametro Grid (Max Exactitud): ', grid_clas_ex.best_params_)
print('Mejor score Grid (Max Exactitud): ', grid_clas_ex.best_score_)

In [None]:
pd.DataFrame(grid_clas_ex.cv_results_)

In [None]:
clasif = SVC(kernel='rbf')
grid_val = {'gamma': [0.001, 0.0015, 0.01,  0.1, 1, 5, 10, 50]} # 

grid_clas_auc = GridSearchCV(clasif, param_grid = grid_val, scoring='roc_auc', cv=5 )
grid_clas_auc.fit(X_train, y_train)
y_decision_fn_scores_auc = grid_clas_auc.decision_function(X_test)

print('AUC: ', roc_auc_score(y_test, y_decision_fn_scores_auc))
print('Mejor parametro Grid (Max AUC): ', grid_clas_auc.best_params_)
print('Mejor score Grid (Max Exactitud): ', grid_clas_auc.best_score_)

In [None]:
pd.DataFrame(grid_clas_auc.cv_results_)

In [None]:
from sklearn.metrics import get_scorer_names
print(get_scorer_names())

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [None]:
base.columns

In [None]:
from sklearn.model_selection import GridSearchCV

X = base[['Radio', 'Simetria']]
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, random_state=0)
clasif = SVC(kernel = 'linear').fit(X_train, y_train)
grid_val = {'class_weight':['balanced', {1:1},{1:3},{1:4}], 'gamma': [0.001, 0.01, 0.1, 1, 5, 10, 50]}

for i, eval_metric in enumerate(('precision', 'recall', 'f1', 'roc_auc')):
    grid_clas_p = GridSearchCV(clasif, param_grid=grid_val, scoring=eval_metric, cv=3)
    grid_clas_p.fit(X_train, y_train)
    print('Mejor Parametro Grid (max, {0}): {1}'.format(eval_metric, grid_clas_p.best_params_))
    print('Mejor Score Grid (max, {0}): {1}'.format(eval_metric, grid_clas_p.best_score_))

### Implementación de Pipelines

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
import time

In [None]:
modelo = Pipeline([('scaler', MinMaxScaler()), ('model', SVC())])
param_grid = {'model__gamma': [0.001, 0.01, 0.1, 1, 5, 10, 50], 'model__kernel':  ['rbf', 'linear'] }
cv = GridSearchCV(modelo, param_grid, scoring='f1', cv=5)
cv.fit(X_train, y_train)

In [None]:
cv.best_params_

In [None]:
cv.score(X_test, y_test)

### EJERCICIO
REALICE UN GRID SEARCH CON CROSS VALIDATION, UTILICE KERNEL LINEAL Y RBF, PRUEBE LOS PARAMETROS C{0.1 , 10} y un cv=5