# Model Tuning

In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from skopt import BayesSearchCV
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed

from skopt.space import Real, Integer, Categorical

from imblearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE

# Modelos de clasificación
from sklearn.linear_model import (
    LogisticRegression, RidgeClassifier, SGDClassifier, Perceptron, PassiveAggressiveClassifier
)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC, NuSVC, LinearSVC
from sklearn.ensemble import (
    RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, 
    ExtraTreesClassifier, BaggingClassifier, HistGradientBoostingClassifier
)
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB, ComplementNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier

data_folder = "dataset_matlab"

# Cargar conjuntos de datos
X_train = pd.read_csv(f"{data_folder}/X_train.csv")
y_train = pd.read_csv(f"{data_folder}/y_train.csv").values.ravel()
X_test = pd.read_csv(f"{data_folder}/X_test.csv")
y_test = pd.read_csv(f"{data_folder}/y_test.csv").values.ravel()

# Definir los modelos disponibles
models = {
    "Logistic Regression": LogisticRegression(),
    "Ridge Classifier": RidgeClassifier(),
    "SGD Classifier": SGDClassifier(),
    "Perceptron": Perceptron(),
    "Passive Aggressive": PassiveAggressiveClassifier(),
    "KNN": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "SVM (SVC)": SVC(),
    "SVM (NuSVC)": NuSVC(),
    "SVM (LinearSVC)": LinearSVC(),
    "Random Forest": RandomForestClassifier(),
    "Hist Gradient Boosting": HistGradientBoostingClassifier(),
    "AdaBoost": AdaBoostClassifier(algorithm='SAMME'),
    "Extra Trees": ExtraTreesClassifier(),
    "Bagging": BaggingClassifier(),
    "Gaussian Naive Bayes": GaussianNB(),
    "Bernoulli Naive Bayes": BernoulliNB(),
    "Linear Discriminant Analysis": LinearDiscriminantAnalysis(),
    "Quadratic Discriminant Analysis": QuadraticDiscriminantAnalysis(),
    "MLP (Neural Network)": MLPClassifier()
}

# Definir el espacio de búsqueda para cada modelo (prefijado con 'model__')
param_grids = {
    "Logistic Regression": {'model__C': [0.1, 1, 10], 'model__solver': ['liblinear', 'saga']},
    "Ridge Classifier": {'model__alpha': [0.1, 1, 10]},
    "SGD Classifier": {'model__loss': ['hinge', 'log', 'squared_hinge'], 'model__alpha': [0.0001, 0.001, 0.01]},
    "Perceptron": {'model__alpha': [0.0001, 0.001, 0.01], 'model__max_iter': [1000]},
    "Passive Aggressive": {'model__C': [0.1, 1, 10], 'model__max_iter': [1000]},
    "KNN": {'model__n_neighbors': [3, 5, 7, 9], 'model__metric': ['euclidean', 'manhattan']},
    "Decision Tree": {'model__max_depth': [3, 5, 10], 'model__min_samples_split': [2, 5, 10]},
    "SVM (SVC)": {'model__C': [0.1, 1, 10], 'model__kernel': ['linear', 'rbf'], 'model__gamma': ['scale', 'auto']},
    "SVM (NuSVC)": {'model__C': [0.1, 1, 10], 'model__nu': [0.1, 0.5, 0.9]},
    "SVM (LinearSVC)": {'model__C': [0.1, 1, 10], 'model__loss': ['hinge', 'squared_hinge']},
    "Random Forest": {'model__n_estimators': [50, 100, 200], 'model__max_depth': [3, 5, 10]},
    "Hist Gradient Boosting": {'model__max_iter': [100, 200], 'model__learning_rate': [0.01, 0.1], 'model__max_depth': [3, 5, 10]},
    "AdaBoost": {'model__n_estimators': [50, 100, 200], 'model__learning_rate': [0.01, 0.1, 1]},
    "Extra Trees": {'model__n_estimators': [50, 100, 200], 'model__max_depth': [3, 5, 10]},
    "Bagging": {'model__n_estimators': [50, 100, 200], 'model__max_samples': [0.5, 0.75, 1.0]},
    "Gaussian Naive Bayes": {},
    "Bernoulli Naive Bayes": {},
    "Linear Discriminant Analysis": {'model__solver': ['svd', 'lsqr', 'eigen']},
    "Quadratic Discriminant Analysis": {'model__reg_param': [0.1, 1, 10]},
    "MLP (Neural Network)": {'model__hidden_layer_sizes': [(50,), (100,), (50, 50)], 'model__activation': ['relu', 'tanh'], 'model__solver': ['adam', 'sgd']}
}

param_spaces = {
    "Logistic Regression": {'model__C': Real(1e-3, 1e3, prior='log-uniform'), 'model__solver': Categorical(['liblinear', 'saga'])},
    "Ridge Classifier": {'model__alpha': Real(1e-3, 1e3, prior='log-uniform')},
    "SGD Classifier": {'model__loss': Categorical(['hinge', 'log', 'squared_hinge']), 'model__alpha': Real(1e-6, 1e-1, prior='log-uniform')},
    "Perceptron": {'model__alpha': Real(1e-6, 1e-1, prior='log-uniform'), 'model__max_iter': Integer(500, 2000)},
    "Passive Aggressive": {'model__C': Real(1e-3, 1e3, prior='log-uniform'), 'model__max_iter': Integer(500, 2000)},
    "KNN": {'model__n_neighbors': Integer(1, 20), 'model__metric': Categorical(['euclidean', 'manhattan'])},
    "Decision Tree": {'model__max_depth': Integer(2, 10), 'model__min_samples_split': Integer(2, 50), 'model__min_samples_leaf': Integer(1, 20), 'model__max_features': Categorical(['sqrt', 'log2', None]), 'model__criterion': Categorical(['gini', 'entropy'])},
    "SVM (SVC)": {'model__C': Real(1e-3, 1e3, prior='log-uniform'), 'model__kernel': Categorical(['linear', 'rbf']), 'model__gamma': Real(1e-6, 1e1, prior='log-uniform')},
    "SVM (NuSVC)": {'model__C': Real(1e-3, 1e3, prior='log-uniform'), 'model__nu': Real(0.1, 0.9)},
    "SVM (LinearSVC)": {'model__C': Real(1e-3, 1e3, prior='log-uniform'), 'model__loss': Categorical(['hinge', 'squared_hinge'])},
    "Random Forest": {'model__n_estimators': Integer(50, 500), 'model__max_depth': Integer(3, 50)},
    "Hist Gradient Boosting": {'model__max_iter': Integer(50, 500), 'model__learning_rate': Real(1e-3, 1, prior='log-uniform'), 'model__max_depth': Integer(3, 50)},
    "AdaBoost": {'model__n_estimators': Integer(50, 500), 'model__learning_rate': Real(1e-3, 1, prior='log-uniform')},
    "Extra Trees": {'model__n_estimators': Integer(50, 500), 'model__max_depth': Integer(3, 50)},
    "Bagging": {'model__n_estimators': Integer(50, 500), 'model__max_samples': Real(0.1, 1.0)},
    "Gaussian Naive Bayes": {},
    "Bernoulli Naive Bayes": {},
    "Linear Discriminant Analysis": {'model__solver': Categorical(['svd', 'lsqr', 'eigen'])},
    "Quadratic Discriminant Analysis": {'model__reg_param': Real(1e-3, 1e1, prior='log-uniform')},
    "MLP (Neural Network)": {'model__hidden_layer_sizes': Categorical([(50,), (100,), (50, 50)]), 'model__activation': Categorical(['relu', 'tanh']), 'model__solver': Categorical(['adam', 'sgd'])}
}

# Función para optimización con GridSearch, RandomizedSearch o Bayesian
def optimize_model(model_name, optimization_mode):
    model = models[model_name]

    pipeline = Pipeline([
    ('smote', SMOTE()),  # Se aplica solo a X_train en cada fold
    ('scaler', StandardScaler()),  
    ('model', model)  # Modelo de clasificación
    ])
    
    if optimization_mode == 'Grid Search':
        param_grid = param_grids[model_name]
        search = GridSearchCV(pipeline, param_grid, cv=5)
    
    elif optimization_mode == 'Random Search':
        param_dist = param_grids[model_name]
        search = RandomizedSearchCV(pipeline, param_dist, cv=5, n_iter=30)
    
    elif optimization_mode == 'Bayesian Optimization':
        param_space = param_spaces[model_name]
        search = BayesSearchCV(pipeline, param_space, n_iter=30, cv=5)
    
    # Ajustar el modelo con los parámetros optimizados
    search.fit(X_train, y_train)
    best_model = search.best_estimator_
    
    # Evaluación
    y_pred = best_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"Mejor modelo ({optimization_mode}) para {model_name}:\n", best_model)
    print(f"Accuracy: {accuracy:.4f}")

# Crear interfaz interactiva con ipywidgets
model_selector = widgets.Dropdown(
    options=models.keys(),
    description='Modelo:'
)

optimization_selector = widgets.Dropdown(
    options=['Grid Search', 'Random Search', 'Bayesian Optimization'],
    description='Optimización:',
    value='Grid Search'
)

# Crear un botón para ejecutar la optimización
execute_button = widgets.Button(description="Ejecutar Optimización")

# Salida para mostrar los resultados
out = widgets.Output()

# Función que se ejecutará cuando se presione el botón
def on_button_click(b):
    with out:
        out.clear_output()
        model_name = model_selector.value
        optimization_mode = optimization_selector.value
        print("Ejecutando optimización...\n")
        optimize_model(model_name, optimization_mode)

# Asocia el botón con la función
execute_button.on_click(on_button_click)

# Interfaz de usuario con el botón
ui = widgets.VBox([model_selector, optimization_selector, execute_button])

# Mostrar la interfaz y la salida
display(ui, out)


VBox(children=(Dropdown(description='Modelo:', options=('Logistic Regression', 'Ridge Classifier', 'SGD Classi…

Output()