# MLflow 🔄

Este notebook contiene el código usado para comparar las técnicas y los modelos generados para el TFM. 

### **Ejecutar en línea de comandos:**


cd Desktop\TFM\NEW_CONDS

mlflow ui

 http://127.0.0.1:5000

Importamos librerías:

In [1]:
import numpy as np
import pandas as pd
import os
import sys
import time

import sklearn
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
# from tensorflow.keras.utils import to_categorical
# from tensorflow.keras import regularizers
# from keras import layers
# from tensorflow.keras import regularizers
# import tensorflow.keras as tk
# from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, RepeatedStratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
import keras
from keras import layers
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier

import mlflow
# Establecer la variable de entorno GIT_PYTHON_REFRESH en quiet
os.environ["GIT_PYTHON_REFRESH"] = "quiet"




Importamos los datos:

In [2]:
df_data = pd.read_csv('data.csv')
X = df_data.drop(columns=['battery','OHS','CHS', 'chemistry'])
y = df_data['OHS']

In [3]:
X.shape, y.shape

((118, 10), (118,))

# Decision Tree 🌳

In [4]:
experiment_tree = mlflow.create_experiment("tree")

In [5]:
experiment_tree

'972786867258079225'

In [6]:
with mlflow.start_run(experiment_id=experiment_tree) as run:
    seed = 10
    mlflow.log_param('seed', seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)

    dt_classifier = DecisionTreeClassifier(random_state=seed)

    param_grid = {
        'max_depth': range(3, 11),  # Profundidad máxima del árbol
        'min_samples_split': range(2, 10),  # Número mínimo de muestras requeridas para dividir un nodo interno
        'min_samples_leaf': range(1, 8)  # Número mínimo de muestras requeridas para estar en un nodo hoja
    }
    
    mlflow.log_param('param_grid', param_grid)
    
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=seed)
    grid_search = GridSearchCV(estimator=dt_classifier, param_grid=param_grid,
                                     scoring='accuracy', cv=cv)
    
    mlflow.log_param('scoring', 'accuracy')
    mlflow.log_param('cv', cv)
    
    # Entrena el modelo
    start_time = time.time()
    grid_search.fit(X_train, y_train)
    train_time = time.time() - start_time
    
    # Obtiene los mejores parámetros
    best_params = grid_search.best_params_
    print("Mejores parámetros:", best_params)

    mlflow.log_param('best_params', best_params)
    
    # Predice usando el mejor modelo encontrado
    y_train_pred = grid_search.predict(X_train)
    y_test_pred = grid_search.predict(X_test)
    
    # Calcula la precisión del conjunto de entrenamiento y de prueba
    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    
    print("Precisión del conjunto de entrenamiento:", train_accuracy)
    print("Precisión del conjunto de prueba:", test_accuracy)

    mlflow.log_metric('train_time', train_time)
    mlflow.log_metric('train_accuracy', train_accuracy)
    mlflow.log_metric('test_accuracy', test_accuracy)

    # Convierte grid_search.cv_results_ en un DataFrame de pandas
    cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
    # Guarda el DataFrame como un archivo CSV temporal
    temp_csv_file = "cv_results.csv"
    cv_results_df.to_csv(temp_csv_file, index=False)

    mlflow.sklearn.log_model(grid_search, 'model')
    
    print(f"Model path: runs:/{run.info.run_id}/model")




Mejores parámetros: {'max_depth': 4, 'min_samples_leaf': 4, 'min_samples_split': 2}
Precisión del conjunto de entrenamiento: 0.8409090909090909
Precisión del conjunto de prueba: 0.6333333333333333
Model path: runs:/27b5c05a8a224470b142b32be2a6e19d/model




In [28]:
experiment = mlflow.get_experiment_by_name('tree')

In [32]:
experiment.experiment_id

'972786867258079225'

In [36]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import mlflow
import mlflow.sklearn

# Inicia un nuevo run de MLflow
with mlflow.start_run(experiment_id=experiment.experiment_id) as run:
    mlflow.log_param('seed-train_test_split', 0)
    
    # Divide los datos en conjuntos de entrenamiento y prueba
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
    
    # Crea el modelo de árbol de clasificación
    mlflow.log_param('seed-DecisionTreeClassifier', 12)
    clf = DecisionTreeClassifier(random_state=12)
    
    # Entrena el modelo en los datos de entrenamiento
    clf.fit(X_train, y_train)
    
    # Realiza predicciones en los datos de prueba
    y_pred = clf.predict(X_test)
    
    # Calcula la precisión del modelo
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)
    
    # Registra los parámetros y métricas en MLflow
    mlflow.log_param('test_size', 0.25)
    mlflow.log_param('seed', 0)
    mlflow.log_metric('test_accuracy', accuracy)
    
    # Guarda el modelo en MLflow
    mlflow.sklearn.log_model(clf, 'model')
    
    print(f"Model path: runs:/{run.info.run_id}/model")


Accuracy: 0.8
Model path: runs:/751cdecd6d124593bd344900c31af917/model


In [8]:
# 

# Random Forest 🌳🌲🌴🌳

In [9]:
# experiment_rf = mlflow.create_experiment("rf")

In [15]:
experiment_rf = mlflow.get_experiment_by_name("rf")
print(experiment_rf)

<Experiment: artifact_location='file:///C:/Users/beca1/Desktop/TFM/NEW_CONDS/mlruns/153073931280205987', creation_time=1714461393030, experiment_id='153073931280205987', last_update_time=1714461393030, lifecycle_stage='active', name='rf', tags={}>


In [11]:
with mlflow.start_run(experiment_id=experiment_rf) as run:
    seed = 0
    mlflow.log_param('seed', seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)

    rf_classifier = RandomForestClassifier(bootstrap=True, class_weight='balanced_subsample', oob_score=True, random_state=seed)

    mlflow.log_param('bootstrap', True)
    mlflow.log_param('class_weight', 'balanced_subsample')
    mlflow.log_param('oob_score', True)
    
    param_grid = {
        'n_estimators': range(30,250),  # Número de árboles en el bosque
        'max_depth': range(3,15),  # Profundidad máxima del árbol
        'min_samples_split': range(2,10),  # Número mínimo de muestras requeridas para dividir un nodo interno
        'min_samples_leaf': range(1,10),  # Número mínimo de muestras requeridas para estar en un nodo hoja
    }
    
    
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=seed)
    grid_search = RandomizedSearchCV(estimator=rf_classifier, param_distributions=param_grid,
                                     n_iter=100, scoring='accuracy', random_state=seed,
                                     cv=cv,) 

    mlflow.log_param('param_distributions', param_grid)
    mlflow.log_param('n_iter', 100)
    mlflow.log_param('scoring', 'accuracy')
    mlflow.log_param('cv', cv)
    
    # NOTA: SI cv=None => cv=5 por defecto.
    
    # Entrena el modelo
    start_time = time.time()
    grid_search.fit(X_train, y_train)
    train_time = time.time() - start_time
    
    # Obtiene los mejores parámetros
    best_params = grid_search.best_params_
    print("Mejores parámetros:", best_params)

    mlflow.log_param('cv', cv)
    mlflow.log_param('best_params', best_params)
    
    
    # Predice usando el mejor modelo encontrado
    y_train_pred = grid_search.predict(X_train)
    y_test_pred = grid_search.predict(X_test)
    
    # Calcula la precisión del conjunto de entrenamiento y de prueba
    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    
    print("Precisión del conjunto de entrenamiento:", train_accuracy)
    print("Precisión del conjunto de prueba:", test_accuracy)

    mlflow.log_metric('train_time', train_time)
    mlflow.log_metric('train_accuracy', train_accuracy)
    mlflow.log_metric('test_accuracy', test_accuracy)


    # Convierte grid_search.cv_results_ en un DataFrame de pandas
    cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
    # Guarda el DataFrame como un archivo CSV temporal
    # Convierte grid_search.cv_results_ en un DataFrame de pandas
    cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
    # Guarda el DataFrame como un archivo CSV temporal
    temp_csv_file = "cv_results.csv"
    cv_results_df.to_csv(temp_csv_file, index=False)

    mlflow.sklearn.log_model(grid_search, 'model')
    
    print(f"Model path: runs:/{run.info.run_id}/model")



Mejores parámetros: {'n_estimators': 30, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_depth': 7}
Precisión del conjunto de entrenamiento: 0.9886363636363636
Precisión del conjunto de prueba: 0.7
Model path: runs:/cba67deec6034af180ad9b1c6d2655ff/model


In [4]:
df_data = pd.read_csv('data.csv')
X = df_data.drop(columns=['battery','OHS','CHS'])
y = df_data['OHS']

In [5]:
X.shape, y.shape

((118, 11), (118,))

In [32]:
experiment_rf = mlflow.get_experiment_by_name("rf")
with mlflow.start_run(experiment_id=experiment_rf.experiment_id) as run:
    seed = 0
    mlflow.log_param('seed', seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)

    rf_classifier = RandomForestClassifier(random_state=seed, bootstrap=True)

    # mlflow.log_param('bootstrap', True)
    # mlflow.log_param('class_weight', 'balanced_subsample')
    # mlflow.log_param('oob_score', True)
    
    param_grid = {
        'n_estimators': [50, 100, 200],  # Número de árboles en el bosque
        'max_depth': [2, 4, 6, None],  # Profundidad máxima del árbol
        'class_weight': ['balanced', 'balanced_subsample', None],
        'bootstrap': [True, False] 
    }
    
    
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=seed)
    grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid,
                                     scoring='accuracy', cv=cv) 

    mlflow.log_param('param_grid', param_grid)
    # mlflow.log_param('n_iter', 100)
    mlflow.log_param('scoring', 'accuracy')
    mlflow.log_param('cv', cv)
    
    # NOTA: SI cv=None => cv=5 por defecto.
    
    # Entrena el modelo
    start_time = time.time()
    grid_search.fit(X_train, y_train)
    train_time = time.time() - start_time
    
    # Obtiene los mejores parámetros
    best_params = grid_search.best_params_
    print("Mejores parámetros:", best_params)

    mlflow.log_param('cv', cv)
    mlflow.log_param('best_params', best_params)
    
    
    # Predice usando el mejor modelo encontrado
    y_train_pred = grid_search.predict(X_train)
    y_test_pred = grid_search.predict(X_test)
    
    # Calcula la precisión del conjunto de entrenamiento y de prueba
    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    
    print("Precisión del conjunto de entrenamiento:", train_accuracy)
    print("Precisión del conjunto de prueba:", test_accuracy)

    mlflow.log_metric('train_time', train_time)
    mlflow.log_metric('train_accuracy', train_accuracy)
    mlflow.log_metric('test_accuracy', test_accuracy)


    # Convierte grid_search.cv_results_ en un DataFrame de pandas
    cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
    # Guarda el DataFrame como un archivo CSV temporal
    # Convierte grid_search.cv_results_ en un DataFrame de pandas
    cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
    # Guarda el DataFrame como un archivo CSV temporal
    temp_csv_file = "cv_results.csv"
    cv_results_df.to_csv(temp_csv_file, index=False)

    mlflow.sklearn.log_model(grid_search, 'model')
    
    print(f"Model path: runs:/{run.info.run_id}/model")



Mejores parámetros: {'bootstrap': True, 'class_weight': 'balanced', 'max_depth': None, 'n_estimators': 100}
Precisión del conjunto de entrenamiento: 1.0
Precisión del conjunto de prueba: 0.8
Model path: runs:/6870bebbf0934984b5b5a4b407d5f80a/model


In [38]:
experiment_rf = mlflow.get_experiment_by_name("rf")
with mlflow.start_run(experiment_id=experiment_rf.experiment_id) as run:
    seed = 0
    mlflow.log_param('seed', seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)

    rf_classifier = RandomForestClassifier(bootstrap=True, 
                                           # class_weight='balanced_subsample', 
                                           oob_score=True, 
                                           random_state=seed)

    mlflow.log_param('bootstrap', True)
    # mlflow.log_param('class_weight', 'balanced_subsample')
    mlflow.log_param('oob_score', True)
    
    param_grid = {
        'n_estimators': range(30,250),  # Número de árboles en el bosque
        'max_depth': range(3,15),  # Profundidad máxima del árbol
        'min_samples_split': range(2,10),  # Número mínimo de muestras requeridas para dividir un nodo interno
        'min_samples_leaf': range(1,10),  # Número mínimo de muestras requeridas para estar en un nodo hoja
    }
    
    
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=seed)
    grid_search = RandomizedSearchCV(estimator=rf_classifier, param_distributions=param_grid,
                                     n_iter=100, scoring='accuracy', random_state=seed,
                                     cv=cv,) 

    mlflow.log_param('param_distributions', param_grid)
    mlflow.log_param('n_iter', 100)
    mlflow.log_param('scoring', 'accuracy')
    mlflow.log_param('cv', cv)
    
    # NOTA: SI cv=None => cv=5 por defecto.
    
    # Entrena el modelo
    start_time = time.time()
    grid_search.fit(X_train, y_train)
    train_time = time.time() - start_time
    
    # Obtiene los mejores parámetros
    best_params = grid_search.best_params_
    print("Mejores parámetros:", best_params)

    mlflow.log_param('cv', cv)
    mlflow.log_param('best_params', best_params)
    
    
    # Predice usando el mejor modelo encontrado
    y_train_pred = grid_search.predict(X_train)
    y_test_pred = grid_search.predict(X_test)
    
    # Calcula la precisión del conjunto de entrenamiento y de prueba
    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    
    print("Precisión del conjunto de entrenamiento:", train_accuracy)
    print("Precisión del conjunto de prueba:", test_accuracy)

    mlflow.log_metric('train_time', train_time)
    mlflow.log_metric('train_accuracy', train_accuracy)
    mlflow.log_metric('test_accuracy', test_accuracy)


    # Convierte grid_search.cv_results_ en un DataFrame de pandas
    cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
    # Guarda el DataFrame como un archivo CSV temporal
    # Convierte grid_search.cv_results_ en un DataFrame de pandas
    cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
    # Guarda el DataFrame como un archivo CSV temporal
    temp_csv_file = "cv_results.csv"
    cv_results_df.to_csv(temp_csv_file, index=False)

    mlflow.sklearn.log_model(grid_search, 'model')
    
    print(f"Model path: runs:/{run.info.run_id}/model")



Mejores parámetros: {'n_estimators': 113, 'min_samples_split': 6, 'min_samples_leaf': 1, 'max_depth': 8}
Precisión del conjunto de entrenamiento: 0.9545454545454546
Precisión del conjunto de prueba: 0.8
Model path: runs:/1126941f17b14796b8b520dff1588a24/model


In [38]:
experiment_rf = mlflow.get_experiment_by_name("rf")
with mlflow.start_run(experiment_id=experiment_rf.experiment_id) as run:
    seed = 0
    mlflow.log_param('seed', seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)

    rf_classifier = RandomForestClassifier(bootstrap=True, 
                                           # class_weight='balanced_subsample', 
                                           oob_score=True, 
                                           random_state=seed)

    mlflow.log_param('bootstrap', True)
    # mlflow.log_param('class_weight', 'balanced_subsample')
    mlflow.log_param('oob_score', True)
    
    param_grid = {
        'n_estimators': range(30,250),  # Número de árboles en el bosque
        'max_depth': range(3,15),  # Profundidad máxima del árbol
        'min_samples_split': range(2,10),  # Número mínimo de muestras requeridas para dividir un nodo interno
        'min_samples_leaf': range(1,10),  # Número mínimo de muestras requeridas para estar en un nodo hoja
    }
    
    
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=seed)
    grid_search = RandomizedSearchCV(estimator=rf_classifier, param_distributions=param_grid,
                                     n_iter=100, scoring='accuracy', random_state=seed,
                                     cv=cv,) 

    mlflow.log_param('param_distributions', param_grid)
    mlflow.log_param('n_iter', 100)
    mlflow.log_param('scoring', 'accuracy')
    mlflow.log_param('cv', cv)
    
    # NOTA: SI cv=None => cv=5 por defecto.
    
    # Entrena el modelo
    start_time = time.time()
    grid_search.fit(X_train, y_train)
    train_time = time.time() - start_time
    
    # Obtiene los mejores parámetros
    best_params = grid_search.best_params_
    print("Mejores parámetros:", best_params)

    mlflow.log_param('cv', cv)
    mlflow.log_param('best_params', best_params)
    
    
    # Predice usando el mejor modelo encontrado
    y_train_pred = grid_search.predict(X_train)
    y_test_pred = grid_search.predict(X_test)
    
    # Calcula la precisión del conjunto de entrenamiento y de prueba
    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    
    print("Precisión del conjunto de entrenamiento:", train_accuracy)
    print("Precisión del conjunto de prueba:", test_accuracy)

    mlflow.log_metric('train_time', train_time)
    mlflow.log_metric('train_accuracy', train_accuracy)
    mlflow.log_metric('test_accuracy', test_accuracy)


    # Convierte grid_search.cv_results_ en un DataFrame de pandas
    cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
    # Guarda el DataFrame como un archivo CSV temporal
    # Convierte grid_search.cv_results_ en un DataFrame de pandas
    cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
    # Guarda el DataFrame como un archivo CSV temporal
    temp_csv_file = "cv_results.csv"
    cv_results_df.to_csv(temp_csv_file, index=False)

    mlflow.sklearn.log_model(grid_search, 'model')
    
    print(f"Model path: runs:/{run.info.run_id}/model")



Mejores parámetros: {'n_estimators': 113, 'min_samples_split': 6, 'min_samples_leaf': 1, 'max_depth': 8}
Precisión del conjunto de entrenamiento: 0.9545454545454546
Precisión del conjunto de prueba: 0.8
Model path: runs:/1126941f17b14796b8b520dff1588a24/model


In [39]:
experiment_rf = mlflow.get_experiment_by_name("rf")
with mlflow.start_run(experiment_id=experiment_rf.experiment_id) as run:
    seed = 0
    mlflow.log_param('seed', seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)

    rf_classifier = RandomForestClassifier(bootstrap=True, class_weight='balanced_subsample', oob_score=True, random_state=seed)

    mlflow.log_param('bootstrap', True)
    mlflow.log_param('class_weight', 'balanced_subsample')
    mlflow.log_param('oob_score', True)
    
    param_grid = {
        'n_estimators': range(30,250),  # Número de árboles en el bosque
        'max_depth': range(3,15),  # Profundidad máxima del árbol
        'min_samples_split': range(2,10),  # Número mínimo de muestras requeridas para dividir un nodo interno
        'min_samples_leaf': range(1,10),  # Número mínimo de muestras requeridas para estar en un nodo hoja
    }
    
    
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=seed)
    grid_search = RandomizedSearchCV(estimator=rf_classifier, param_distributions=param_grid,
                                     n_iter=200, scoring='accuracy', random_state=seed,
                                     cv=cv,) 

    mlflow.log_param('param_distributions', param_grid)
    mlflow.log_param('n_iter', 200)
    mlflow.log_param('scoring', 'accuracy')
    mlflow.log_param('cv', cv)
    
    # NOTA: SI cv=None => cv=5 por defecto.
    
    # Entrena el modelo
    start_time = time.time()
    grid_search.fit(X_train, y_train)
    train_time = time.time() - start_time
    
    # Obtiene los mejores parámetros
    best_params = grid_search.best_params_
    print("Mejores parámetros:", best_params)

    mlflow.log_param('cv', cv)
    mlflow.log_param('best_params', best_params)
    
    
    # Predice usando el mejor modelo encontrado
    y_train_pred = grid_search.predict(X_train)
    y_test_pred = grid_search.predict(X_test)
    
    # Calcula la precisión del conjunto de entrenamiento y de prueba
    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    
    print("Precisión del conjunto de entrenamiento:", train_accuracy)
    print("Precisión del conjunto de prueba:", test_accuracy)

    mlflow.log_metric('train_time', train_time)
    mlflow.log_metric('train_accuracy', train_accuracy)
    mlflow.log_metric('test_accuracy', test_accuracy)


    # Convierte grid_search.cv_results_ en un DataFrame de pandas
    cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
    # Guarda el DataFrame como un archivo CSV temporal
    # Convierte grid_search.cv_results_ en un DataFrame de pandas
    cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
    # Guarda el DataFrame como un archivo CSV temporal
    temp_csv_file = "cv_results.csv"
    cv_results_df.to_csv(temp_csv_file, index=False)

    mlflow.sklearn.log_model(grid_search, 'model')
    
    print(f"Model path: runs:/{run.info.run_id}/model")



Mejores parámetros: {'n_estimators': 30, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_depth': 7}
Precisión del conjunto de entrenamiento: 0.9886363636363636
Precisión del conjunto de prueba: 0.7
Model path: runs:/32079ea082df4e59adbcf1de8fd3fd29/model


In [19]:
experiment_rf = mlflow.get_experiment_by_name("rf")
with mlflow.start_run(experiment_id=experiment_rf.experiment_id) as run:
    seed = 0
    mlflow.log_param('seed', seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)

    rf_classifier = RandomForestClassifier(bootstrap=True, class_weight='balanced_subsample', oob_score=True, random_state=seed)

    mlflow.log_param('bootstrap', True)
    mlflow.log_param('class_weight', 'balanced_subsample')
    mlflow.log_param('oob_score', True)
    
    # NOTA: SI cv=None => cv=5 por defecto.
    
    # Entrena el modelo
    start_time = time.time()
    rf_classifier.fit(X_train, y_train)
    train_time = time.time() - start_time
    
    # Obtiene los mejores parámetros
    # best_params = rf_classifier.best_params_
    # print("Mejores parámetros:", best_params)

    # mlflow.log_param('cv', cv)
    # mlflow.log_param('best_params', best_params)
    
    
    # Predice usando el mejor modelo encontrado
    y_train_pred = rf_classifier.predict(X_train)
    y_test_pred = rf_classifier.predict(X_test)
    
    # Calcula la precisión del conjunto de entrenamiento y de prueba
    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    
    print("Precisión del conjunto de entrenamiento:", train_accuracy)
    print("Precisión del conjunto de prueba:", test_accuracy)

    mlflow.log_metric('train_time', train_time)
    mlflow.log_metric('train_accuracy', train_accuracy)
    mlflow.log_metric('test_accuracy', test_accuracy)


    # Convierte grid_search.cv_results_ en un DataFrame de pandas
    # cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
    # Guarda el DataFrame como un archivo CSV temporal
    # Convierte grid_search.cv_results_ en un DataFrame de pandas
    # cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
    # Guarda el DataFrame como un archivo CSV temporal
    # temp_csv_file = "cv_results.csv"
    # cv_results_df.to_csv(temp_csv_file, index=False)

    mlflow.sklearn.log_model(rf_classifier, 'model')
    
    print(f"Model path: runs:/{run.info.run_id}/model")

Precisión del conjunto de entrenamiento: 1.0
Precisión del conjunto de prueba: 0.8333333333333334
Model path: runs:/b05516740fe34a2997d458658d6a13f2/model


In [23]:
print(rf_classifier.n_estimators, rf_classifier.max_depth, rf_classifier.min_samples_split, rf_classifier.min_samples_leaf)

100 None 2 1


In [26]:
experiment_rf = mlflow.get_experiment_by_name("rf")
with mlflow.start_run(experiment_id=experiment_rf.experiment_id) as run:
    seed = 0
    mlflow.log_param('seed', seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)

    rf_classifier = RandomForestClassifier(bootstrap=True, class_weight='balanced_subsample', oob_score=True, random_state=seed)

    mlflow.log_param('bootstrap', True)
    mlflow.log_param('class_weight', 'balanced_subsample')
    mlflow.log_param('oob_score', True)
    
    # NOTA: SI cv=None => cv=5 por defecto.

    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=seed)
    grid_search = GridSearchCV(estimator=rf_classifier, scoring='accuracy',
                                     cv=cv,) 
    
    # Entrena el modelo
    start_time = time.time()
    rf_classifier.fit(X_train, y_train)
    train_time = time.time() - start_time
    
    # Obtiene los mejores parámetros
    # best_params = rf_classifier.best_params_
    # print("Mejores parámetros:", best_params)

    # mlflow.log_param('cv', cv)
    # mlflow.log_param('best_params', best_params)
    
    
    # Predice usando el mejor modelo encontrado
    y_train_pred = rf_classifier.predict(X_train)
    y_test_pred = rf_classifier.predict(X_test)
    
    # Calcula la precisión del conjunto de entrenamiento y de prueba
    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    
    print("Precisión del conjunto de entrenamiento:", train_accuracy)
    print("Precisión del conjunto de prueba:", test_accuracy)

    mlflow.log_metric('train_time', train_time)
    mlflow.log_metric('train_accuracy', train_accuracy)
    mlflow.log_metric('test_accuracy', test_accuracy)


    # Convierte grid_search.cv_results_ en un DataFrame de pandas
    # cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
    # Guarda el DataFrame como un archivo CSV temporal
    # Convierte grid_search.cv_results_ en un DataFrame de pandas
    # cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
    # Guarda el DataFrame como un archivo CSV temporal
    # temp_csv_file = "cv_results.csv"
    # cv_results_df.to_csv(temp_csv_file, index=False)

    mlflow.sklearn.log_model(rf_classifier, 'model')
    
    print(f"Model path: runs:/{run.info.run_id}/model")

TypeError: GridSearchCV.__init__() missing 1 required positional argument: 'param_grid'

In [23]:
print(rf_classifier.n_estimators, rf_classifier.max_depth, rf_classifier.min_samples_split, rf_classifier.min_samples_leaf)

100 None 2 1


In [12]:
# experiment_rf = mlflow.get_experiment_by_name("rf")
# with mlflow.start_run(experiment_id=experiment_rf.experiment_id) as run:
#     seed = 0
#     mlflow.log_param('seed', seed)
#     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)

#     rf_classifier = RandomForestClassifier(bootstrap=True, class_weight='balanced_subsample', oob_score=True, random_state=seed)

#     mlflow.log_param('bootstrap', True)
#     mlflow.log_param('class_weight', 'balanced_subsample')
#     mlflow.log_param('oob_score', True)
    
#     param_grid = {
#         'n_estimators': range(30,250),  # Número de árboles en el bosque
#         'max_depth': range(3,15),  # Profundidad máxima del árbol
#         'min_samples_split': range(2,10),  # Número mínimo de muestras requeridas para dividir un nodo interno
#         'min_samples_leaf': range(1,10),  # Número mínimo de muestras requeridas para estar en un nodo hoja
#     }
    
    
#     cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=seed)
#     grid_search = RandomizedSearchCV(estimator=rf_classifier, param_distributions=param_grid,
#                                      n_iter=100, scoring='accuracy', random_state=seed,
#                                      cv=cv,) 

#     mlflow.log_param('param_distributions', param_grid)
#     mlflow.log_param('n_iter', 100)
#     mlflow.log_param('scoring', 'accuracy')
#     mlflow.log_param('cv', cv)
    
#     # NOTA: SI cv=None => cv=5 por defecto.
    
#     # Entrena el modelo
#     start_time = time.time()
#     grid_search.fit(X_train, y_train)
#     train_time = time.time() - start_time
    
#     # Obtiene los mejores parámetros
#     best_params = grid_search.best_params_
#     print("Mejores parámetros:", best_params)

#     mlflow.log_param('cv', cv)
#     mlflow.log_param('best_params', best_params)
    
    
#     # Predice usando el mejor modelo encontrado
#     y_train_pred = grid_search.predict(X_train)
#     y_test_pred = grid_search.predict(X_test)
    
#     # Calcula la precisión del conjunto de entrenamiento y de prueba
#     train_accuracy = accuracy_score(y_train, y_train_pred)
#     test_accuracy = accuracy_score(y_test, y_test_pred)
    
#     print("Precisión del conjunto de entrenamiento:", train_accuracy)
#     print("Precisión del conjunto de prueba:", test_accuracy)

#     mlflow.log_metric('train_time', train_time)
#     mlflow.log_metric('train_accuracy', train_accuracy)
#     mlflow.log_metric('test_accuracy', test_accuracy)


#     # Convierte grid_search.cv_results_ en un DataFrame de pandas
#     cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
#     # Guarda el DataFrame como un archivo CSV temporal
#     # Convierte grid_search.cv_results_ en un DataFrame de pandas
#     cv_results_df = pd.DataFrame(grid_search.cv_results_)
    
#     # Guarda el DataFrame como un archivo CSV temporal
#     temp_csv_file = "cv_results.csv"
#     cv_results_df.to_csv(temp_csv_file, index=False)

#     mlflow.sklearn.log_model(grid_search, 'model')
    
#     print(f"Model path: runs:/{run.info.run_id}/model")

# $K$-NN 🏡🏡🏡

In [13]:
experiment_knn = mlflow.create_experiment("knn")

In [14]:
experiment_knn

'986445357258254764'

In [15]:
with mlflow.start_run(experiment_id=experiment_knn) as run:
    seed = 0
    mlflow.log_param('seed', seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)
    
    # Define el pipeline con el escalado de características y el clasificador KNN
    pipeline = Pipeline([
        ('scaler', StandardScaler()),  # Escalado de características
        ('knn', KNeighborsClassifier())  # Clasificador KNN
    ])
    
    # Definir la cuadrícula de parámetros que quieres probar
    param_grid = {
        'knn__n_neighbors': [3, 4, 5, 6, 7, 8, 9],  # Prueba diferentes valores para el número de vecinos
        'knn__weights': ['uniform', 'distance'],  # Prueba diferentes métodos de ponderación
        'knn__metric': ['euclidean', 'manhattan', 'minkowski', 'chebyshev']  # Prueba diferentes métricas de distancia
    }

    mlflow.log_param('param_grid', param_grid)
    
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=seed)
    mlflow.log_param('cv', cv)
    
    # Instanciar el objeto GridSearchCV
    grid_search = GridSearchCV(pipeline, param_grid, cv=cv, scoring='accuracy')

    mlflow.log_param('scoring', 'accuracy')
    
    # Ajustar el objeto GridSearchCV a los datos de entrenamiento
    start_time = time.time()
    grid_search.fit(X_train, y_train)
    train_time = time.time() - start_time
    
    # Mostrar los mejores parámetros encontrados
    print("Mejores parámetros:", grid_search.best_params_)

    mlflow.log_param('best_params_', grid_search.best_params_)
    
    # Obtener el mejor modelo
    best_knn = grid_search.best_estimator_    
    
    # Hacer predicciones en el conjunto de prueba utilizando el mejor modelo
    y_train_pred = best_knn.predict(X_train)
    y_test_pred = best_knn.predict(X_test)
    
    # Calcular la precisión del modelo
    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    print("Precisión con los mejores parámetros:", test_accuracy)

    mlflow.log_metric('train_time', train_time)
    mlflow.log_metric('train_accuracy', train_accuracy)
    mlflow.log_metric('test_accuracy', test_accuracy)

    mlflow.sklearn.log_model(best_knn, 'model')
    
    print(f"Model path: runs:/{run.info.run_id}/model")




Mejores parámetros: {'knn__metric': 'euclidean', 'knn__n_neighbors': 9, 'knn__weights': 'distance'}
Precisión con los mejores parámetros: 0.8333333333333334
Model path: runs:/2a2c2ea8b8184d2c932dd83f8ccddf28/model


In [16]:
experiment_knn = mlflow.get_experiment_by_name("knn")
with mlflow.start_run(experiment_id=experiment_knn.experiment_id) as run:
    seed = 0
    mlflow.log_param('seed', seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)
    # Definir la cuadrícula de parámetros que quieres probar
    param_grid = {
        'n_neighbors': [3, 4, 5, 6, 7, 8, 9],  # Prueba diferentes valores para el número de vecinos
        'weights': ['uniform', 'distance'],  # Prueba diferentes métodos de ponderación
        'metric': ['euclidean', 'manhattan', 'minkowski', 'chebyshev']  # Prueba diferentes métricas de distancia
    }

    mlflow.log_param('param_grid', param_grid)
    
    # Crear un clasificador KNN
    knn = KNeighborsClassifier()

    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=seed)
    mlflow.log_param('cv', cv)
    
    # Instanciar el objeto GridSearchCV
    grid_search = GridSearchCV(knn, param_grid, cv=cv, scoring='accuracy', )

    mlflow.log_param('scoring', 'accuracy')
    
    # Ajustar el objeto GridSearchCV a los datos de entrenamiento
    start_time = time.time()
    grid_search.fit(X_train, y_train)
    train_time = time.time() - start_time
    
    # Mostrar los mejores parámetros encontrados
    print("Mejores parámetros:", grid_search.best_params_)

    mlflow.log_param('best_params_', grid_search.best_params_)
    
    # Obtener el mejor modelo
    best_knn = grid_search.best_estimator_    
    
    # Hacer predicciones en el conjunto de prueba utilizando el mejor modelo
    y_train_pred = best_knn.predict(X_train)
    y_test_pred = best_knn.predict(X_test)
    
    # Calcular la precisión del modelo
    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    print("Precisión con los mejores parámetros:", test_accuracy)

    mlflow.log_metric('train_time', train_time)
    mlflow.log_metric('train_accuracy', train_accuracy)
    mlflow.log_metric('test_accuracy', test_accuracy)

    mlflow.sklearn.log_model(best_knn, 'model')
    
    print(f"Model path: runs:/{run.info.run_id}/model")


# NN 🧬

In [17]:
# experiment_nn = mlflow.create_experiment("nn")

In [6]:
experiment_nn = mlflow.get_experiment_by_name("nn")

In [7]:
experiment_nn

<Experiment: artifact_location='file:///C:/Users/beca1/Desktop/TFM/NEW_CONDS/mlruns/691134205547675520', creation_time=1714461740776, experiment_id='691134205547675520', last_update_time=1714461740776, lifecycle_stage='active', name='nn', tags={}>

In [20]:
# Inicia un nuevo run de MLflow
with mlflow.start_run(experiment_id=experiment_nn) as run:
    seed = 0
    mlflow.log_param('seed', seed)

    # Codifica las etiquetas a valores numéricos si aún no están codificadas
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
        
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=seed)

    # Define el modelo de la red neuronal
    model = Sequential()
    model.add(Dense(64, input_dim=10, activation='relu'))  # Capa de entrada con 10 variables de entrada
    model.add(Dense(32, activation='relu'))  # Capa oculta con 32 neuronas
    model.add(Dense(32, activation='relu'))  # Capa oculta con 32 neuronas
    model.add(Dense(8, activation='softmax'))  # Capa de salida con 8 neuronas para las 8 categorías

    # Compila el modelo
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Entrena el modelo
    start_time = time.time()
    history = model.fit(X_train, y_train, epochs=60, batch_size=8, validation_data=(X_test, y_test))
    train_time = time.time() - start_time

    # Evalúa el modelo en el conjunto de prueba
    loss, accuracy = model.evaluate(X_test, y_test)
    print("Accuracy del modelo en el conjunto de prueba:", accuracy)
    
    # Log de los parámetros y métricas en MLflow
    mlflow.log_param('batch_size', 8)
    mlflow.log_param('epochs', 50)
    mlflow.log_metric('test_loss', loss)
    mlflow.log_metric('test_accuracy', accuracy)
    mlflow.log_metric('train_time', train_time)
    

    # Guarda el modelo en MLflow
    mlflow.keras.log_model(model, "model")
    print(f"Model path: runs:/{run.info.run_id}/model")




Epoch 1/60


Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60




Accuracy del modelo en el conjunto de prueba: 0.5416666865348816
INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmpbw6fv32u\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmpbw6fv32u\model\data\model\assets


Model path: runs:/0bb40c688d294ce0ae5e1c0ca0823919/model


In [21]:
# Inicia un nuevo run de MLflow
with mlflow.start_run(experiment_id=experiment_nn) as run:
    seed = 0
    mlflow.log_param('seed', seed)

    # Codifica las etiquetas a valores numéricos si aún no están codificadas
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
        
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=seed)

    # Define el modelo de la red neuronal
    model = Sequential()
    model.add(Dense(64, input_dim=10, activation='relu'))  # Capa de entrada con 10 variables de entrada
    model.add(Dense(32, activation='relu'))  # Capa oculta con 32 neuronas
    model.add(Dense(32, activation='relu'))  # Capa oculta con 32 neuronas
    model.add(Dense(8, activation='softmax'))  # Capa de salida con 8 neuronas para las 8 categorías

    # Compila el modelo
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Entrena el modelo
    start_time = time.time()
    history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test))
    train_time = time.time() - start_time

    # Evalúa el modelo en el conjunto de prueba
    loss, accuracy = model.evaluate(X_test, y_test)
    print("Accuracy del modelo en el conjunto de prueba:", accuracy)
    
    # Log de los parámetros y métricas en MLflow
    mlflow.log_param('batch_size', 8)
    mlflow.log_param('epochs', 50)
    mlflow.log_metric('test_loss', loss)
    mlflow.log_metric('test_accuracy', accuracy)
    mlflow.log_metric('train_time', train_time)
    

    # Guarda el modelo en MLflow
    mlflow.keras.log_model(model, "model")
    print(f"Model path: runs:/{run.info.run_id}/model")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50




Accuracy del modelo en el conjunto de prueba: 0.5416666865348816
INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmpcs0919iw\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmpcs0919iw\model\data\model\assets


Model path: runs:/a45707f21bdb47f0b622d63dce2499e3/model


In [22]:
# Inicia un nuevo run de MLflow
with mlflow.start_run(experiment_id=experiment_nn) as run:
    seed = 0
    mlflow.log_param('seed', seed)

    # Codifica las etiquetas a valores numéricos si aún no están codificadas
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
        
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=seed)

    # Define el modelo de la red neuronal
    model = Sequential()
    model.add(Dense(64, input_dim=10, activation='relu'))  # Capa de entrada con 10 variables de entrada
    model.add(Dense(32, activation='relu'))  # Capa oculta con 32 neuronas
    model.add(Dense(32, activation='relu'))  # Capa oculta con 32 neuronas
    model.add(Dense(8, activation='softmax'))  # Capa de salida con 8 neuronas para las 8 categorías

    # Compila el modelo
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Entrena el modelo
    start_time = time.time()
    history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))
    train_time = time.time() - start_time

    # Evalúa el modelo en el conjunto de prueba
    loss, accuracy = model.evaluate(X_test, y_test)
    print("Accuracy del modelo en el conjunto de prueba:", accuracy)
    
    # Log de los parámetros y métricas en MLflow
    mlflow.log_param('batch_size', 8)
    mlflow.log_param('epochs', 50)
    mlflow.log_metric('test_loss', loss)
    mlflow.log_metric('test_accuracy', accuracy)
    mlflow.log_metric('train_time', train_time)
    

    # Guarda el modelo en MLflow
    mlflow.keras.log_model(model, "model")
    print(f"Model path: runs:/{run.info.run_id}/model")


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78



Accuracy del modelo en el conjunto de prueba: 0.5833333134651184
INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmpkf556oko\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmpkf556oko\model\data\model\assets


Model path: runs:/b898440e48584b4f8c69f5aacedb8f74/model


In [23]:
# Inicia un nuevo run de MLflow
with mlflow.start_run(experiment_id=experiment_nn) as run:
    seed = 0
    mlflow.log_param('seed', seed)

    # Codifica las etiquetas a valores numéricos si aún no están codificadas
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
        
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=seed)

    # Define el modelo de la red neuronal
    model = Sequential()
    model.add(Dense(64, input_dim=10, activation='relu'))  # Capa de entrada con 10 variables de entrada
    model.add(Dense(32, activation='relu'))  # Capa oculta con 32 neuronas
    model.add(Dense(32, activation='relu'))  # Capa oculta con 32 neuronas
    model.add(Dense(8, activation='softmax'))  # Capa de salida con 8 neuronas para las 8 categorías

    # Compila el modelo
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Entrena el modelo
    start_time = time.time()
    history = model.fit(X_train, y_train, epochs=100, batch_size=24, validation_data=(X_test, y_test))
    train_time = time.time() - start_time

    # Evalúa el modelo en el conjunto de prueba
    loss, accuracy = model.evaluate(X_test, y_test)
    print("Accuracy del modelo en el conjunto de prueba:", accuracy)
    
    # Log de los parámetros y métricas en MLflow
    mlflow.log_param('batch_size', 8)
    mlflow.log_param('epochs', 50)
    mlflow.log_metric('test_loss', loss)
    mlflow.log_metric('test_accuracy', accuracy)
    mlflow.log_metric('train_time', train_time)
    

    # Guarda el modelo en MLflow
    mlflow.keras.log_model(model, "model")
    print(f"Model path: runs:/{run.info.run_id}/model")


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78



Accuracy del modelo en el conjunto de prueba: 0.625
INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmp8w0yqm84\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmp8w0yqm84\model\data\model\assets


Model path: runs:/df61aa8d5d47456c875cdea074bd374f/model


In [9]:
import mlflow
from mlflow.keras import log_model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten
import time

# Inicia un nuevo run de MLflow
with mlflow.start_run(experiment_id=experiment_nn.experiment_id) as run:
    seed = 0
    mlflow.log_param('seed', seed)

    # Codifica las etiquetas a valores numéricos si aún no están codificadas
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
        
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=seed)

    # Define el modelo de la red neuronal convolucional
    model = Sequential()
    model.add(Conv1D(64, 3, activation='relu', input_shape=(10, 1)))  # Capa convolucional
    model.add(MaxPooling1D(2))  # Capa de pooling
    model.add(Flatten())  # Capa de aplanamiento
    model.add(Dense(32, activation='relu'))  # Capa oculta con 32 neuronas
    model.add(Dense(8, activation='softmax'))  # Capa de salida con 8 neuronas para las 8 categorías

    # Compila el modelo
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Entrena el modelo
    start_time = time.time()
    history = model.fit(X_train, y_train, epochs=100, batch_size=24, validation_data=(X_test, y_test))
    train_time = time.time() - start_time

    # Evalúa el modelo en el conjunto de prueba
    loss, accuracy = model.evaluate(X_test, y_test)
    print("Accuracy del modelo en el conjunto de prueba:", accuracy)
    
    # Log de los parámetros y métricas en MLflow
    mlflow.log_param('batch_size', 24)
    mlflow.log_param('epochs', 100)
    mlflow.log_metric('test_loss', loss)
    mlflow.log_metric('test_accuracy', accuracy)
    mlflow.log_metric('train_time', train_time)
    

    # Guarda el modelo en MLflow
    log_model(model, "model")
    print(f"Model path: runs:/{run.info.run_id}/model")





Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epo



Accuracy del modelo en el conjunto de prueba: 0.6666666865348816
INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmpkhpgfnbr\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmpkhpgfnbr\model\data\model\assets


Model path: runs:/60ce056e81454ae2ad4a637428177173/model




In [9]:
import mlflow
from mlflow.keras import log_model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten
import time

# Inicia un nuevo run de MLflow
with mlflow.start_run(experiment_id=experiment_nn.experiment_id) as run:
    seed = 0
    mlflow.log_param('seed', seed)

    # Codifica las etiquetas a valores numéricos si aún no están codificadas
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
        
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=seed)

    # Define el modelo de la red neuronal convolucional
    model = Sequential()
    model.add(Conv1D(64, 3, activation='relu', input_shape=(10, 1)))  # Capa convolucional
    model.add(MaxPooling1D(2))  # Capa de pooling
    model.add(Flatten())  # Capa de aplanamiento
    model.add(Dense(32, activation='relu'))  # Capa oculta con 32 neuronas
    model.add(Dense(8, activation='softmax'))  # Capa de salida con 8 neuronas para las 8 categorías

    # Compila el modelo
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Entrena el modelo
    start_time = time.time()
    history = model.fit(X_train, y_train, epochs=100, batch_size=24, validation_data=(X_test, y_test))
    train_time = time.time() - start_time

    # Evalúa el modelo en el conjunto de prueba
    loss, accuracy = model.evaluate(X_test, y_test)
    print("Accuracy del modelo en el conjunto de prueba:", accuracy)
    
    # Log de los parámetros y métricas en MLflow
    mlflow.log_param('batch_size', 24)
    mlflow.log_param('epochs', 100)
    mlflow.log_metric('test_loss', loss)
    mlflow.log_metric('test_accuracy', accuracy)
    mlflow.log_metric('train_time', train_time)
    

    # Guarda el modelo en MLflow
    log_model(model, "model")
    print(f"Model path: runs:/{run.info.run_id}/model")





Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epo



Accuracy del modelo en el conjunto de prueba: 0.6666666865348816
INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmpkhpgfnbr\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmpkhpgfnbr\model\data\model\assets


Model path: runs:/60ce056e81454ae2ad4a637428177173/model




In [12]:
import mlflow
from mlflow.keras import log_model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten
import time

# Inicia un nuevo run de MLflow
with mlflow.start_run(experiment_id=experiment_nn.experiment_id) as run:
    seed = 0
    mlflow.log_param('seed', seed)

    # Codifica las etiquetas a valores numéricos si aún no están codificadas
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
        
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=seed)

    # Define el modelo de la red neuronal convolucional
    model = Sequential()
    model.add(Conv1D(64, 3, activation='relu', input_shape=(10, 1)))  # Capa convolucional
    model.add(MaxPooling1D(2))  # Capa de pooling
    model.add(Flatten())  # Capa de aplanamiento
    model.add(Dense(32, activation='relu'))  # Capa oculta con 32 neuronas
    model.add(Dense(8, activation='softmax'))  # Capa de salida con 8 neuronas para las 8 categorías

    # Compila el modelo
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Entrena el modelo
    start_time = time.time()
    history = model.fit(X_train, y_train, epochs=200, batch_size=24, validation_data=(X_test, y_test))
    train_time = time.time() - start_time

    # Evalúa el modelo en el conjunto de prueba
    loss, accuracy = model.evaluate(X_test, y_test)
    print("Accuracy del modelo en el conjunto de prueba:", accuracy)
    
    # Log de los parámetros y métricas en MLflow
    mlflow.log_param('batch_size', 24)
    mlflow.log_param('epochs', 100)
    mlflow.log_metric('test_loss', loss)
    mlflow.log_metric('test_accuracy', accuracy)
    mlflow.log_metric('train_time', train_time)
    

    # Guarda el modelo en MLflow
    log_model(model, "model")
    print(f"Model path: runs:/{run.info.run_id}/model")


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78



Accuracy del modelo en el conjunto de prueba: 0.7916666865348816
INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmpob3lts1u\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmpob3lts1u\model\data\model\assets


Model path: runs:/6e6c27a6c51c4665b2a04aeb9d77a715/model


In [13]:
import mlflow
from mlflow.keras import log_model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten
import time

# Inicia un nuevo run de MLflow
with mlflow.start_run(experiment_id=experiment_nn.experiment_id) as run:
    seed = 0
    mlflow.log_param('seed', seed)

    # Codifica las etiquetas a valores numéricos si aún no están codificadas
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
        
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=seed)

    # Define el modelo de la red neuronal convolucional
    model = Sequential()
    model.add(Conv1D(64, 3, activation='relu', input_shape=(10, 1)))  # Capa convolucional
    model.add(MaxPooling1D(2))  # Capa de pooling
    model.add(Flatten())  # Capa de aplanamiento
    model.add(Dense(32, activation='relu'))  # Capa oculta con 32 neuronas
    model.add(Dense(8, activation='softmax'))  # Capa de salida con 8 neuronas para las 8 categorías

    # Compila el modelo
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Entrena el modelo
    start_time = time.time()
    history = model.fit(X_train, y_train, epochs=500, batch_size=24, validation_data=(X_test, y_test))
    train_time = time.time() - start_time

    # Evalúa el modelo en el conjunto de prueba
    loss, accuracy = model.evaluate(X_test, y_test)
    print("Accuracy del modelo en el conjunto de prueba:", accuracy)
    
    # Log de los parámetros y métricas en MLflow
    mlflow.log_param('batch_size', 24)
    mlflow.log_param('epochs', 100)
    mlflow.log_metric('test_loss', loss)
    mlflow.log_metric('test_accuracy', accuracy)
    mlflow.log_metric('train_time', train_time)
    

    # Guarda el modelo en MLflow
    log_model(model, "model")
    print(f"Model path: runs:/{run.info.run_id}/model")


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78



Accuracy del modelo en el conjunto de prueba: 0.7916666865348816
INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmphv89n2zh\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmphv89n2zh\model\data\model\assets


Model path: runs:/21e0a844c523470dbb479279891f1b81/model


In [14]:
import mlflow
from mlflow.keras import log_model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten
import time

# Inicia un nuevo run de MLflow
with mlflow.start_run(experiment_id=experiment_nn.experiment_id) as run:
    seed = 0
    mlflow.log_param('seed', seed)

    # Codifica las etiquetas a valores numéricos si aún no están codificadas
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
        
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=seed)

    # Define el modelo de la red neuronal convolucional
    model = Sequential()
    model.add(Conv1D(64, 3, activation='relu', input_shape=(10, 1)))  # Capa convolucional
    model.add(MaxPooling1D(2))  # Capa de pooling
    model.add(Flatten())  # Capa de aplanamiento
    model.add(Dense(32, activation='relu'))  # Capa oculta con 32 neuronas
    model.add(Dense(8, activation='softmax'))  # Capa de salida con 8 neuronas para las 8 categorías

    # Compila el modelo
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Entrena el modelo
    start_time = time.time()
    history = model.fit(X_train, y_train, epochs=1000, batch_size=24, validation_data=(X_test, y_test))
    train_time = time.time() - start_time

    # Evalúa el modelo en el conjunto de prueba
    loss, accuracy = model.evaluate(X_test, y_test)
    print("Accuracy del modelo en el conjunto de prueba:", accuracy)
    
    # Log de los parámetros y métricas en MLflow
    mlflow.log_param('batch_size', 24)
    mlflow.log_param('epochs', 100)
    mlflow.log_metric('test_loss', loss)
    mlflow.log_metric('test_accuracy', accuracy)
    mlflow.log_metric('train_time', train_time)
    

    # Guarda el modelo en MLflow
    log_model(model, "model")
    print(f"Model path: runs:/{run.info.run_id}/model")


Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E



Accuracy del modelo en el conjunto de prueba: 0.7916666865348816
INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmpwmdni4jt\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmpwmdni4jt\model\data\model\assets


Model path: runs:/43d5c70afff549839095646877cabdd3/model


In [11]:
import mlflow
from mlflow.keras import log_model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense, Dropout
import time

# Inicia un nuevo run de MLflow
with mlflow.start_run(experiment_id=experiment_nn.experiment_id) as run:
    seed = 0
    mlflow.log_param('seed', seed)

    # Codifica las etiquetas a valores numéricos si aún no están codificadas
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
        
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=seed)

    # Define el modelo de la red neuronal con dropout
    model = Sequential()
    model.add(Dense(64, input_dim=10, activation='relu'))  # Capa de entrada con 10 variables de entrada
    model.add(Dropout(0.5))  # Capa de dropout
    model.add(Dense(32, activation='relu'))  # Capa oculta con 32 neuronas
    model.add(Dropout(0.5))  # Capa de dropout
    model.add(Dense(8, activation='softmax'))  # Capa de salida con 8 neuronas para las 8 categorías

    # Compila el modelo
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Entrena el modelo
    start_time = time.time()
    history = model.fit(X_train, y_train, epochs=200, batch_size=24, validation_data=(X_test, y_test))
    train_time = time.time() - start_time

    # Evalúa el modelo en el conjunto de prueba
    loss, accuracy = model.evaluate(X_test, y_test)
    print("Accuracy del modelo en el conjunto de prueba:", accuracy)
    
    # Log de los parámetros y métricas en MLflow
    mlflow.log_param('batch_size', 24)
    mlflow.log_param('epochs', 100)
    mlflow.log_metric('test_loss', loss)
    mlflow.log_metric('test_accuracy', accuracy)
    mlflow.log_metric('train_time', train_time)
    

    # Guarda el modelo en MLflow
    log_model(model, "model")
    print(f"Model path: runs:/{run.info.run_id}/model")


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78



Accuracy del modelo en el conjunto de prueba: 0.4166666567325592
INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmpoaqc136j\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\beca1\AppData\Local\Temp\tmpoaqc136j\model\data\model\assets


Model path: runs:/e94f1281e6a14d5d815bf1520331450e/model


# SVM ⚙⚙⚙

In [24]:
experiment_svm = mlflow.create_experiment("svm")

In [25]:
experiment_svm

'322919668359299300'

In [26]:
# experiment_svm = mlflow.get_experiment_by_name("svm")
# with mlflow.start_run(experiment_id=experiment_svm.experiment_id) as run:
#     seed = 0
#     mlflow.log_param('seed', seed)
#     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)

#     # Define los parámetros de búsqueda en rejilla
#     param_grid = {
#         'C': [0.01, 0.1, 1, 5, 10, 15, 20],  # Prueba diferentes valores para el parámetro de regularización C
#         'gamma': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1],  # Prueba diferentes valores para el parámetro gamma del kernel
#         'kernel': ['rbf']  # Prueba diferentes tipos de kernel
#     }

#     mlflow.log_param('param_grid', param_grid)

#     # Construye el modelo SVM
#     svm = SVC(random_state=seed)

#     cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=seed)
#     mlflow.log_param('cv', cv)
    
#     # Realiza la búsqueda en rejilla
#     grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=cv,
#                                scoring='accuracy', n_jobs=-1)

#     start_time = time.time()
#     grid_result = grid_search.fit(X_train, y_train)
#     train_time = time.time() - start_time

#     # Muestra los mejores parámetros encontrados
#     print("Mejores parámetros:", grid_result.best_params_)
#     mlflow.log_param('best_params_', grid_result.best_params_)

#     # Hacer predicciones en el conjunto de prueba utilizando el mejor modelo
#     y_test_pred = grid_result.predict(X_test)

#     # Calcular la precisión del modelo
#     test_accuracy = accuracy_score(y_test, y_test_pred)
#     print("Precisión del modelo en el conjunto de prueba:", test_accuracy)
#     mlflow.log_metric('test_accuracy', test_accuracy)
#     mlflow.log_metric('train_time', train_time)
    

#     # Guarda el mejor modelo en MLflow
#     mlflow.sklearn.log_model(grid_result.best_estimator_, "model")
#     print(f"Model path: runs:/{run.info.run_id}/model")

In [27]:
with mlflow.start_run(experiment_id=experiment_svm) as run:
    seed = 0
    mlflow.log_param('seed', seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)

    # Define los parámetros de búsqueda en rejilla
    param_grid = {
        'C': [0.01, 0.1, 1, 10],  # Prueba diferentes valores para el parámetro de regularización C
        'gamma': [0.0001, 0.001, 0.01, 0.1, 1],  # Prueba diferentes valores para el parámetro gamma del kernel
        'kernel': ['linear', 'rbf']  # Prueba diferentes tipos de kernel
    }

    mlflow.log_param('param_grid', param_grid)

    # Construye el modelo SVM
    svm = SVC(random_state=seed)

    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=seed)
    mlflow.log_param('cv', cv)

    # Realiza la búsqueda en rejilla
    grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=cv,
                               scoring='accuracy', n_jobs=-1)

    start_time = time.time()
    grid_result = grid_search.fit(X_train, y_train)
    train_time = time.time() - start_time

    # Muestra los mejores parámetros encontrados
    print("Mejores parámetros:", grid_result.best_params_)
    mlflow.log_param('best_params_', grid_result.best_params_)

    # Hacer predicciones en el conjunto de prueba utilizando el mejor modelo
    y_test_pred = grid_result.predict(X_test)

    # Calcular la precisión del modelo
    test_accuracy = accuracy_score(y_test, y_test_pred)
    print("Precisión del modelo en el conjunto de prueba:", test_accuracy)
    mlflow.log_metric('test_accuracy', test_accuracy)
    mlflow.log_metric('train_time', train_time)
    

    # Guarda el mejor modelo en MLflow
    mlflow.sklearn.log_model(grid_result.best_estimator_, "model")
    print(f"Model path: runs:/{run.info.run_id}/model")



Mejores parámetros: {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
Precisión del modelo en el conjunto de prueba: 0.7666666666666667
Model path: runs:/e7b03dc62dc44be389f19845cf7d51e0/model
