<a href="https://colab.research.google.com/github/gonzalodev15/AppBackend/blob/master/Proyecto_AprendizajeAutomatico.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Paso 1: Instalar librerias

In [2]:
!pip install mlflow optuna ucimlrepo

Collecting mlflow
  Downloading mlflow-2.13.2-py3-none-any.whl (25.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m25.0/25.0 MB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl (147 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.8/147.8 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
Collecting gitpython<4,>=3.1.9 (from mlflow)
  Downloading GitPython-3.1.43-py3-none

#Paso 2: Importar librerias

In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, precision_score, recall_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, StackingClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
import mlflow
import mlflow.sklearn
import optuna
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE
from ucimlrepo import fetch_ucirepo


def load_data():
    data = fetch_ucirepo(id=468)
    X = data.data.features
    y = data.data.targets['Revenue']
    return X, y

def preprocess_data(X,y):
    categorical_features = ['Month', 'OperatingSystems', 'Browser', 'Region', 'TrafficType', 'VisitorType', 'Weekend']
    continuous_features = ['Administrative', 'Administrative_Duration', 'Informational', 'Informational_Duration',
                           'ProductRelated', 'ProductRelated_Duration', 'BounceRates', 'ExitRates',
                           'PageValues', 'SpecialDay']

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), continuous_features),
            ('cat', OneHotEncoder(), categorical_features)])


    # Undersampling
    # sampler = RandomUnderSampler(random_state=42)
    # X_resampled, y_resampled = sampler.fit_resample(X, y)

    # X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
    # X_train_transformed = preprocessor.fit_transform(X_train).toarray()
    # X_test_transformed = preprocessor.transform(X_test).toarray()

    # return X_train_transformed, X_test_transformed, y_train, y_test, preprocessor

    # Oversampling con SMOTE
    #sampler = SMOTE(random_state=42)
    #X_resampled, y_resampled = sampler.fit_resample(X, y)

    #X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
    #X_train_transformed = preprocessor.fit_transform(X_train).toarray()
    #X_test_transformed = preprocessor.transform(X_test).toarray()

    #return X_train_transformed, X_test_transformed, y_train, y_test, preprocessor

    # Sin nada
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train_transformed = preprocessor.fit_transform(X_train).toarray()
    X_test_transformed = preprocessor.transform(X_test).toarray()

    return X_train_transformed, X_test_transformed, y_train, y_test, preprocessor


def objective(trial, X_train_transformed, y_train):
    classifier_name = trial.suggest_categorical("classifier", ["Logistic Regression", "Decision Tree", "Random Forest", "SVM", "XGBoost"])
    # classifier_name = trial.suggest_categorical("classifier", ["Logistic Regression", "XGBoost"])
    # classifier_name = trial.suggest_categorical("classifier", ["Logistic Regression", "Decision Tree", "Random Forest", "SVM", "XGBoost", "Voting", "Stacking"])

    if classifier_name == "Logistic Regression":
        model = LogisticRegression(max_iter=1000)
    elif classifier_name == "Decision Tree":
        max_depth = trial.suggest_int("max_depth", 2, 32)
        model = DecisionTreeClassifier(max_depth=max_depth)
    elif classifier_name == "Random Forest":
        n_estimators = trial.suggest_int("n_estimators", 10, 200)
        max_depth = trial.suggest_int("max_depth", 2, 32)
        model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
    elif classifier_name == "SVM":
        C = trial.suggest_float("C", 1e-10, 1e10, log=True)
        model = SVC(C=C, probability=True)
    elif classifier_name == "XGBoost":
        n_estimators = trial.suggest_int("n_estimators", 50, 300)
        max_depth = trial.suggest_int("max_depth", 2, 32)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.5, log=True)
        subsample = trial.suggest_float("subsample", 0.5, 1.0)
        colsample_bytree = trial.suggest_float("colsample_bytree", 0.5, 1.0)
        model = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate,
                              subsample=subsample, colsample_bytree=colsample_bytree, eval_metric='logloss')
    # elif classifier_name == "Voting":
    #     model1 = LogisticRegression(max_iter=1000)
    #     model2 = DecisionTreeClassifier(max_depth=trial.suggest_int("max_depth_voting", 2, 32))
    #     model3 = RandomForestClassifier(n_estimators=trial.suggest_int("n_estimators_voting", 10, 200), max_depth=trial.suggest_int("max_depth_rf_voting", 2, 32))
    #     model = VotingClassifier(estimators=[('lr', model1), ('dt', model2), ('rf', model3)], voting='soft')
    # elif classifier_name == "Stacking":
    #     model1 = LogisticRegression(max_iter=1000)
    #     model2 = DecisionTreeClassifier(max_depth=trial.suggest_int("max_depth_stacking", 2, 32))
    #     model3 = RandomForestClassifier(n_estimators=trial.suggest_int("n_estimators_stacking", 10, 200), max_depth=trial.suggest_int("max_depth_rf_stacking", 2, 32))
    #     estimators = [('lr', model1), ('dt', model2), ('rf', model3)]
    #     final_estimator = SVC(probability=True, C=trial.suggest_loguniform("C_stacking", 1e-10, 1e10))
    #     model = StackingClassifier(estimators=estimators, final_estimator=final_estimator, cv=3)

    scores = cross_val_score(model, X_train_transformed, y_train, cv=3, scoring='accuracy')
    return scores.mean()

def main():

    # MLFlow
    #mlflow.set_tracking_uri("http://localhost:5000")
    #experiment_name = "Proyecto ML"
    #if not mlflow.get_experiment_by_name(experiment_name):
    #    mlflow.create_experiment(experiment_name)
    #mlflow.set_experiment(experiment_name)

    # Cargar y preprocesar los datos
    X, y = load_data()
    X_train_transformed, X_test_transformed, y_train, y_test, preprocessor = preprocess_data(X, y)

    # Crear estudio de Optuna
    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial, X_train_transformed, y_train), n_trials=300)

    # Obtener el mejor modelo
    print("Best trial:")
    trial = study.best_trial
    print(f"  Value: {trial.value}")
    print(f"  Params: ")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")

    # Entrenar el mejor modelo
    best_model_name = trial.params["classifier"]
    best_params = {k: v for k, v in trial.params.items() if k != "classifier"}

    if best_model_name == "Logistic Regression":
        best_model = LogisticRegression(max_iter=1000)
    elif best_model_name == "Decision Tree":
        best_model = DecisionTreeClassifier(**best_params)
    elif best_model_name == "Random Forest":
        best_model = RandomForestClassifier(**best_params)
    elif best_model_name == "SVM":
        best_model = SVC(probability=True, **best_params)
    elif best_model_name == "XGBoost":
        best_model = XGBClassifier(eval_metric='logloss', **best_params)
    # elif best_model_name == "Voting":
    #     model1 = LogisticRegression(max_iter=1000)
    #     model2 = DecisionTreeClassifier(max_depth=best_params['max_depth_voting'])
    #     model3 = RandomForestClassifier(n_estimators=best_params['n_estimators_voting'], max_depth=best_params['max_depth_rf_voting'])
    #     best_model = VotingClassifier(estimators=[('lr', model1), ('dt', model2), ('rf', model3)], voting='soft')
    # elif best_model_name == "Stacking":
    #     model1 = LogisticRegression(max_iter=1000)
    #     model2 = DecisionTreeClassifier(max_depth=best_params['max_depth_stacking'])
    #     model3 = RandomForestClassifier(n_estimators=best_params['n_estimators_stacking'], max_depth=best_params['max_depth_rf_stacking'])
    #     estimators = [('lr', model1), ('dt', model2), ('rf', model3)]
    #     final_estimator = SVC(probability=True, C=best_params['C_stacking'])
    #     best_model = StackingClassifier(estimators=estimators, final_estimator=final_estimator, cv=3)

    best_model.fit(X_train_transformed, y_train)

    # Predicciones y evaluación
    y_pred = best_model.predict(X_test_transformed)
    y_prob = best_model.predict_proba(X_test_transformed)[:, 1]

    accuracy = accuracy_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_prob)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)

    print(f"Accuracy: {accuracy}")
    print(f"ROC AUC: {roc_auc}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(classification_report(y_test, y_pred))

    # Guardar los resultados con MLflow
    #with mlflow.start_run(run_name="Optuna Optimization"):
     #   mlflow.log_params(trial.params)
     #   mlflow.log_metric("accuracy", accuracy)
     #   mlflow.log_metric("roc_auc", roc_auc)
     #   mlflow.log_metric("precision", precision)
     #   mlflow.log_metric("recall", recall)
     #   mlflow.sklearn.log_model(best_model, "model")

        # Guardar el DataFrame en un archivo CSV y luego registrarlo como artefacto
        #X.to_csv("dataset.csv", index=False)
        #mlflow.log_artifact("dataset.csv")

if __name__ == "__main__":
    main()

[I 2024-06-17 06:00:18,360] A new study created in memory with name: no-name-9f6dd4fc-30af-4a5b-8611-9a199c70d5cf
[I 2024-06-17 06:00:19,544] Trial 0 finished with value: 0.8993309002433091 and parameters: {'classifier': 'Random Forest', 'n_estimators': 35, 'max_depth': 14}. Best is trial 0 with value: 0.8993309002433091.
[I 2024-06-17 06:00:41,265] Trial 1 finished with value: 0.8482360097323601 and parameters: {'classifier': 'SVM', 'C': 1.8951349639013766e-08}. Best is trial 0 with value: 0.8993309002433091.
[I 2024-06-17 06:01:06,940] Trial 2 finished with value: 0.893653690186537 and parameters: {'classifier': 'SVM', 'C': 0.5692610748949224}. Best is trial 0 with value: 0.8993309002433091.
[I 2024-06-17 06:01:07,406] Trial 3 finished with value: 0.8957826439578266 and parameters: {'classifier': 'Random Forest', 'n_estimators': 17, 'max_depth': 28}. Best is trial 0 with value: 0.8993309002433091.
[I 2024-06-17 06:01:07,618] Trial 4 finished with value: 0.8588807785888077 and paramet

KeyboardInterrupt: 

#Paso 3: Carga de dataset

In [None]:
# Fetch dataset
data = fetch_ucirepo(id=468)

# Data (as pandas dataframes)
X = data.data.features
y = data.data.targets['Revenue']

In [None]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12330 entries, 0 to 12329
Data columns (total 17 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Administrative           12330 non-null  int64  
 1   Administrative_Duration  12330 non-null  float64
 2   Informational            12330 non-null  int64  
 3   Informational_Duration   12330 non-null  float64
 4   ProductRelated           12330 non-null  int64  
 5   ProductRelated_Duration  12330 non-null  float64
 6   BounceRates              12330 non-null  float64
 7   ExitRates                12330 non-null  float64
 8   PageValues               12330 non-null  float64
 9   SpecialDay               12330 non-null  float64
 10  Month                    12330 non-null  object 
 11  OperatingSystems         12330 non-null  int64  
 12  Browser                  12330 non-null  int64  
 13  Region                   12330 non-null  int64  
 14  TrafficType           

In [None]:
X.describe()

Unnamed: 0,Administrative,Administrative_Duration,Informational,Informational_Duration,ProductRelated,ProductRelated_Duration,BounceRates,ExitRates,PageValues,SpecialDay,OperatingSystems,Browser,Region,TrafficType
count,12330.0,12330.0,12330.0,12330.0,12330.0,12330.0,12330.0,12330.0,12330.0,12330.0,12330.0,12330.0,12330.0,12330.0
mean,2.315166,80.818611,0.503569,34.472398,31.731468,1194.74622,0.022191,0.043073,5.889258,0.061427,2.124006,2.357097,3.147364,4.069586
std,3.321784,176.779107,1.270156,140.749294,44.475503,1913.669288,0.048488,0.048597,18.568437,0.198917,0.911325,1.717277,2.401591,4.025169
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0
25%,0.0,0.0,0.0,0.0,7.0,184.1375,0.0,0.014286,0.0,0.0,2.0,2.0,1.0,2.0
50%,1.0,7.5,0.0,0.0,18.0,598.936905,0.003112,0.025156,0.0,0.0,2.0,2.0,3.0,2.0
75%,4.0,93.25625,0.0,0.0,38.0,1464.157214,0.016813,0.05,0.0,0.0,3.0,2.0,4.0,4.0
max,27.0,3398.75,24.0,2549.375,705.0,63973.52223,0.2,0.2,361.763742,1.0,8.0,13.0,9.0,20.0


#Paso 4: Convertir variables categoricas y continuas

In [None]:
# Variables categóricas
categorical_features = ['Month', 'OperatingSystems', 'Browser', 'Region', 'TrafficType', 'VisitorType', 'Weekend']

# Variables continuas
continuous_features = ['Administrative', 'Administrative_Duration', 'Informational', 'Informational_Duration',
                       'ProductRelated', 'ProductRelated_Duration', 'BounceRates', 'ExitRates',
                       'PageValues', 'SpecialDay']

# Preprocesamiento
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), continuous_features),
        ('cat', OneHotEncoder(), categorical_features)])

# Dividimos los datos en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Ajustamos el preprocesador a los datos de entrenamiento y transformamos
X_train_transformed = preprocessor.fit_transform(X_train)
X_test_transformed = preprocessor.transform(X_test)

In [None]:
# Convertir las matrices dispersas a matrices densas
X_train_transformed = X_train_transformed.toarray()
X_test_transformed = X_test_transformed.toarray()

In [None]:
# Comprobar la forma de los datos transformados
print(f'Forma de X_train_transformed: {X_train_transformed.shape}')
print(f'Forma de X_test_transformed: {X_test_transformed.shape}')

Forma de X_train_transformed: (9864, 75)
Forma de X_test_transformed: (2466, 75)


In [None]:
# Obtener los nombres de las columnas resultantes del OneHotEncoder
ohe_feature_names = preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features)

# Verificar los nombres de las columnas transformadas
print(f'Nombres de las columnas continuas: {continuous_features}')
print(f'Nombres de las columnas categóricas: {ohe_feature_names}')

Nombres de las columnas continuas: ['Administrative', 'Administrative_Duration', 'Informational', 'Informational_Duration', 'ProductRelated', 'ProductRelated_Duration', 'BounceRates', 'ExitRates', 'PageValues', 'SpecialDay']
Nombres de las columnas categóricas: ['Month_Aug' 'Month_Dec' 'Month_Feb' 'Month_Jul' 'Month_June' 'Month_Mar'
 'Month_May' 'Month_Nov' 'Month_Oct' 'Month_Sep' 'OperatingSystems_1'
 'OperatingSystems_2' 'OperatingSystems_3' 'OperatingSystems_4'
 'OperatingSystems_5' 'OperatingSystems_6' 'OperatingSystems_7'
 'OperatingSystems_8' 'Browser_1' 'Browser_2' 'Browser_3' 'Browser_4'
 'Browser_5' 'Browser_6' 'Browser_7' 'Browser_8' 'Browser_9' 'Browser_10'
 'Browser_11' 'Browser_12' 'Browser_13' 'Region_1' 'Region_2' 'Region_3'
 'Region_4' 'Region_5' 'Region_6' 'Region_7' 'Region_8' 'Region_9'
 'TrafficType_1' 'TrafficType_2' 'TrafficType_3' 'TrafficType_4'
 'TrafficType_5' 'TrafficType_6' 'TrafficType_7' 'TrafficType_8'
 'TrafficType_9' 'TrafficType_10' 'TrafficType_11' '

In [None]:
# Crear un DataFrame para ver las columnas antes y después del preprocesamiento
X_train_transformed_df = pd.DataFrame(X_train_transformed, columns=np.concatenate([continuous_features, ohe_feature_names]))
X_test_transformed_df = pd.DataFrame(X_test_transformed, columns=np.concatenate([continuous_features, ohe_feature_names]))

print("Datos después del preprocesamiento:")
X_train_transformed_df.head()

Datos después del preprocesamiento:


Unnamed: 0,Administrative,Administrative_Duration,Informational,Informational_Duration,ProductRelated,ProductRelated_Duration,BounceRates,ExitRates,PageValues,SpecialDay,...,TrafficType_16,TrafficType_17,TrafficType_18,TrafficType_19,TrafficType_20,VisitorType_New_Visitor,VisitorType_Other,VisitorType_Returning_Visitor,Weekend_False,Weekend_True
0,-0.698469,-0.456937,-0.397412,-0.245168,-0.555129,-0.573313,-0.155336,0.403282,-0.315593,-0.308364,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
1,-0.091848,-0.378064,-0.397412,-0.245168,1.105528,0.127204,-0.404284,-0.599555,-0.165744,-0.308364,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
2,-0.698469,-0.456937,-0.397412,-0.245168,-0.689777,-0.622722,3.757227,3.290124,-0.315593,-0.308364,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
3,0.818084,-0.179757,2.760119,2.482745,-0.600011,-0.583923,-0.456303,-0.510734,0.134924,-0.308364,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
4,-0.698469,-0.456937,0.391971,-0.209179,-0.510246,-0.477616,0.386403,-0.01979,-0.315593,-0.308364,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0


In [None]:
# Mostrar los datos antes del preprocesamiento
print("Datos antes del preprocesamiento:")
X_train.head()

Datos antes del preprocesamiento:


Unnamed: 0,Administrative,Administrative_Duration,Informational,Informational_Duration,ProductRelated,ProductRelated_Duration,BounceRates,ExitRates,PageValues,SpecialDay,Month,OperatingSystems,Browser,Region,TrafficType,VisitorType,Weekend
1785,0,0.0,0,0.0,7,95.0,0.014286,0.061905,0.0,0.0,Mar,2,6,1,1,Returning_Visitor,False
10407,2,14.0,0,0.0,81,1441.910588,0.002469,0.013933,2.769599,0.0,Nov,2,2,3,2,Returning_Visitor,False
286,0,0.0,0,0.0,1,0.0,0.2,0.2,0.0,0.0,Mar,2,2,1,1,Returning_Visitor,False
6520,5,49.2,4,379.0,5,74.6,0.0,0.018182,8.326728,0.0,Sep,2,2,8,2,New_Visitor,False
12251,0,0.0,1,5.0,9,279.0,0.04,0.041667,0.0,0.0,Nov,3,2,7,8,New_Visitor,True


#Paso 6: PCA

#Aplicar modelos (obtener el accuracy y auc-roc)

In [None]:
# Definimos los modelos a utilizar
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(probability=True),
    "XGBoost": XGBClassifier(eval_metric='logloss')
}

In [None]:
# Validación cruzada y ajuste de hiperparámetros con Optuna
def objective(trial):
    classifier_name = trial.suggest_categorical("classifier", ["Logistic Regression", "Decision Tree", "Random Forest", "SVM", "XGBoost"])

    if classifier_name == "Logistic Regression":
        model = LogisticRegression(max_iter=1000)
    elif classifier_name == "Decision Tree":
        max_depth = trial.suggest_int("max_depth", 2, 32)
        model = DecisionTreeClassifier(max_depth=max_depth)
    elif classifier_name == "Random Forest":
        n_estimators = trial.suggest_int("n_estimators", 10, 200)
        max_depth = trial.suggest_int("max_depth", 2, 32)
        model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
    elif classifier_name == "SVM":
        C = trial.suggest_loguniform("C", 1e-10, 1e10)
        model = SVC(C=C, probability=True)
    elif classifier_name == "XGBoost":
        n_estimators = trial.suggest_int("n_estimators", 10, 200)
        max_depth = trial.suggest_int("max_depth", 2, 32)
        learning_rate = trial.suggest_loguniform("learning_rate", 0.01, 0.1)
        model = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate, eval_metric='logloss')

    scores = cross_val_score(model, X_train_transformed, y_train, cv=3, scoring='accuracy')
    return scores.mean()

In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

[I 2024-06-16 18:44:55,204] A new study created in memory with name: no-name-b3743462-4446-4eac-894d-a5607d04beca
[I 2024-06-16 18:44:56,099] Trial 0 finished with value: 0.8870640713706406 and parameters: {'classifier': 'Logistic Regression'}. Best is trial 0 with value: 0.8870640713706406.
  learning_rate = trial.suggest_loguniform("learning_rate", 0.01, 0.1)
[I 2024-06-16 18:45:03,535] Trial 1 finished with value: 0.8976074614760746 and parameters: {'classifier': 'XGBoost', 'n_estimators': 57, 'max_depth': 15, 'learning_rate': 0.08216358076784924}. Best is trial 1 with value: 0.8976074614760746.
[I 2024-06-16 18:45:03,763] Trial 2 finished with value: 0.8585766423357665 and parameters: {'classifier': 'Decision Tree', 'max_depth': 20}. Best is trial 1 with value: 0.8976074614760746.
[I 2024-06-16 18:45:04,448] Trial 3 finished with value: 0.8870640713706406 and parameters: {'classifier': 'Logistic Regression'}. Best is trial 1 with value: 0.8976074614760746.
[I 2024-06-16 18:45:05,14

##Random Forest

##Regresión logistica

##Arboles de decision

##SVM

##XGBoost

##Metodos de ensamble

#Ajuste y optimización de hiperparametros usando Grid Search y Optuna

#Utilizaremos Cross validation

#Investigar como utilizar MLFlow

#Comparar resultados

#Conclusión