In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA

from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE

from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import (
    confusion_matrix,
    ConfusionMatrixDisplay,
    classification_report)
from matplotlib.colors import LinearSegmentedColormap

import warnings
warnings.filterwarnings("ignore")

df=pd.read_csv("Datos/Transformados/df_limpio.csv", index_col='ID')

In [None]:
df.head()

In [None]:
# ENTRENAMOS EL MEJOR MODELO
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('selector', SelectKBest(score_func=f_classif)), 
    ('smote', SMOTE(random_state=6)),
    ('lda', LinearDiscriminantAnalysis())])

parametros = {
    'selector__k': [40, 60, 90],
    'lda__solver': ['lsqr', 'eigen'], 
    'lda__shrinkage': ['auto', 0.1, 0.5, 0.9] }

modelo = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=parametros,
    n_iter=20,
    cv=5,              
    scoring='balanced_accuracy',
    n_jobs=-1,
    random_state=6)

modelo.fit(x_train, y_train)
y_pred = modelo.best_estimator_.predict(x_test)

# RESULTADOS
cm = confusion_matrix(y_test, y_pred)

fig, ax = plt.subplots(figsize=(8, 6))
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=modelo.best_estimator_.classes_)

lagun_cmap = LinearSegmentedColormap.from_list(
    "lagun_white_purple",
    ["#FFFFFF", "#5B2A57"])

disp.plot(cmap=lagun_cmap, ax=ax, values_format='d')

plt.title(f'Mejor Matriz de Confusión (F1 Score: {modelo.best_score_:.2f})')

print(f"Mejores parámetros: {modelo.best_params_}")
print(classification_report(y_test, y_pred))
plt.show()