In [2]:
import pandas as pd
import os
import yaml # Necesario para leer archivos .dvc (YAML)
import mlflow
import mlflow.data
import mlflow.sklearn

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier

# --- IMPORTACIONES DE M칄TRICAS ---
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    precision_score,
    recall_score,
    f1_score
)

# --- CONFIGURACI칍N DE RUTAS ---
DATASET_PATH = '../data/interim/student_interim_clean_for_model_2.csv' 
DATASET_NAME = 'student_entry_clean'

# --- 1. CARGA DE DATOS (Mismos pasos) ---
try:
    df = pd.read_csv(DATASET_PATH)
    print(f"Dataset cargado desde: {DATASET_PATH}")
except FileNotFoundError:
    print(f"ERROR: Archivo no encontrado en {DATASET_PATH}. Revisa la ruta.")
    exit()

# --- 2. L칍GICA DVC: OBTENER EL HASH (Mismos pasos) ---
dvc_digest = None
dvc_file_path = DATASET_PATH + ".dvc"
if os.path.exists(dvc_file_path):
    try:
        with open(dvc_file_path, 'r') as f:
            dvc_data = yaml.safe_load(f)
        if 'outs' in dvc_data and dvc_data['outs']:
            dvc_digest = dvc_data['outs'][0].get('md5') 
            if not dvc_digest:
                dvc_digest = dvc_data['outs'][0].get('checksum') 
        print(f"DVC Digest encontrado: {dvc_digest}")
    except Exception as e:
        print(f"ADVERTENCIA: No se pudo leer el archivo DVC. Error: {e}")

# --- 3. PREPARACI칍N DE DATOS Y SPLIT (Mismos pasos) ---
X = df.drop(columns=['Performance']) 
y = df['Performance']
cat_cols = ['Gender','Caste','coaching','time','Class_ten_education','twelve_education','medium','Class_ X_Percentage','Class_XII_Percentage','Father_occupation','Mother_occupation']

le = LabelEncoder()
y_enc = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_enc, test_size=0.2, stratify=y_enc, random_state=42)


# ---------------------------------------------------------------------
# --- 4. CONFIGURACI칍N DEL PIPELINE BASE Y LA GRILLA DE B칔SQUEDA ---
# ---------------------------------------------------------------------

# Pipeline base (solo preprocesamiento y clasificador, sin par치metros a칰n)
preprocessor = ColumnTransformer(
    transformers=[('ohe', OneHotEncoder(handle_unknown='ignore'), cat_cols)],
    remainder='drop'
)
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor), 
    ('clf', RandomForestClassifier(random_state=888))
])

# Grilla de Par치metros para GridSearchCV (춰NUEVO!)
# Nota: Los nombres deben coincidir con la convenci칩n del pipeline: <nombre_paso>__<parametro>
param_grid = {
    'clf__n_estimators': [50, 100, 200],  # Cantidad de 치rboles
    'clf__max_depth': [5, 10, None],      # Profundidad m치xima
    'clf__min_samples_split': [5, 10]     # Muestras m칤nimas para dividir
}

# Configuraci칩n del Grid Search (Usando 'f1_weighted' como m칠trica principal para multiclase)
grid_search = GridSearchCV(
    estimator=pipeline,
    param_grid=param_grid,
    scoring='f1_weighted',  # M칠trica de optimizaci칩n
    cv=5,                   # 5-fold cross-validation
    verbose=2,
    n_jobs=1
)

# --- 5. ENTRENAMIENTO Y PREDICCIONES (Ejecuci칩n del Grid Search) ---
print("\nIniciando Grid Search...")
grid_search.fit(X_train, y_train)
print("Grid Search finalizado.")


# ---------------------------------------------------------------------
# --- 6. REGISTRO DE TODAS LAS CORRIDAS EN MLFLOW ---
# ---------------------------------------------------------------------

mlflow.set_tracking_uri("http://127.0.0.1:5001")
EXPERIMENT_NAME = "randomforest_GridSearch" # Nuevo nombre de experimento para la b칰squeda
mlflow.set_experiment(EXPERIMENT_NAME)


# Iterar sobre CADA resultado de CV (Cross-Validation)
for i, (mean_score, std_score, params) in enumerate(zip(
    grid_search.cv_results_['mean_test_score'],
    grid_search.cv_results_['std_test_score'],
    grid_search.cv_results_['params']
)):
    
    # 1. Iniciar una nueva corrida para cada combinaci칩n de hiperpar치metros
    with mlflow.start_run(run_name=f"run_{i+1}_RF_GridSearch", nested=True) as run:
        print(f"Registrando corrida {i+1} con par치metros: {params}")

        # Reconstruir el pipeline para este conjunto de par치metros (para log_model)
        current_pipeline = Pipeline(steps=[
            ('preprocessor', preprocessor), 
            ('clf', RandomForestClassifier(random_state=888, **params))
        ])
        
        # Entrenar el modelo con el conjunto de entrenamiento COMPLETO usando los mejores par치metros
        # NOTA: En la pr치ctica se usa grid_search.best_estimator_ o se reentrena. 
        # Aqu칤, para fines de registro de TODAS las combinaciones, reentrenamos con la combinaci칩n actual
        current_pipeline.fit(X_train, y_train)
        
        # --- Predicciones y C치lculo de M칠tricas (en el conjunto de prueba) ---
        y_pred_test = current_pipeline.predict(X_test)
        
        # Calcular M칠tricas (Incluyendo las de promedio)
        acc_test = accuracy_score(y_test, y_pred_test)
        f1_micro = f1_score(y_test, y_pred_test, average='micro')
        f1_macro = f1_score(y_test, y_pred_test, average='macro')
        f1_weighted = f1_score(y_test, y_pred_test, average='weighted')
        report_text = classification_report(y_test, y_pred_test, target_names=le.classes_)
        
        metrics = {
            "cv_f1_weighted_mean": mean_score, # M칠trica de CV (crucial para GridSearch)
            "cv_f1_weighted_std": std_score,
            "test_acc": acc_test,
            "test_f1_weighted": f1_weighted,
            "test_f1_macro": f1_macro,
        }
        
        # --- Registro en MLflow ---
        
        # 1. Registrar hiperpar치metros
        mlflow.log_params(params)
        
        # 2. Registrar m칠tricas
        mlflow.log_metrics(metrics)
        
        # 3. Guardar modelo
        mlflow.sklearn.log_model(current_pipeline, "random_forest_pipeline")
        
        # 4. Registrar Dataset (solo se hace una vez por experimentaci칩n, pero lo incluimos)
        mlflow_dataset = mlflow.data.from_pandas(
            df=df, source=DATASET_PATH, targets=y.name, name=DATASET_NAME, digest=dvc_digest)
        mlflow.log_input(mlflow_dataset, context="training") 

        # 5. Registrar Classification Report (como artefacto)
        temp_report_path = f"classification_report_run_{i+1}.txt"
        with open(temp_report_path, "w") as f:
            f.write(report_text)
        mlflow.log_artifact(temp_report_path, artifact_path="report")
        os.remove(temp_report_path)

        # 6. Registrar el modelo con el mejor rendimiento (opcional, solo si el F1 de CV es el mejor hasta ahora)
        if mean_score == grid_search.best_score_:
             print(f"!!! Este es el mejor modelo (F1-Weighted CV: {mean_score:.4f}) !!!")
             mlflow.set_tag("best_run", "True")


print("\n--- RESUMEN FINAL DE GRID SEARCH ---")
print(f"El mejor F1-Weighted (CV) es: {grid_search.best_score_:.4f}")
print(f"Los mejores par치metros son: {grid_search.best_params_}")

Dataset cargado desde: ../data/interim/student_interim_clean_for_model_2.csv
DVC Digest encontrado: 76db7197326a5942db9c5b100349f69b

Iniciando Grid Search...
Fitting 5 folds for each of 18 candidates, totalling 90 fits
[CV] END clf__max_depth=5, clf__min_samples_split=5, clf__n_estimators=50; total time=   0.0s
[CV] END clf__max_depth=5, clf__min_samples_split=5, clf__n_estimators=50; total time=   0.0s
[CV] END clf__max_depth=5, clf__min_samples_split=5, clf__n_estimators=50; total time=   0.0s
[CV] END clf__max_depth=5, clf__min_samples_split=5, clf__n_estimators=50; total time=   0.0s
[CV] END clf__max_depth=5, clf__min_samples_split=5, clf__n_estimators=50; total time=   0.0s
[CV] END clf__max_depth=5, clf__min_samples_split=5, clf__n_estimators=100; total time=   0.0s
[CV] END clf__max_depth=5, clf__min_samples_split=5, clf__n_estimators=100; total time=   0.1s
[CV] END clf__max_depth=5, clf__min_samples_split=5, clf__n_estimators=100; total time=   0.1s
[CV] END clf__max_depth=5

2025/11/02 17:57:48 INFO mlflow.tracking.fluent: Experiment with name 'randomforest_GridSearch' does not exist. Creating a new experiment.


[CV] END clf__max_depth=None, clf__min_samples_split=10, clf__n_estimators=200; total time=   0.1s
Grid Search finalizado.


2025/11/02 17:57:48 INFO mlflow.tracking._tracking_service.client: 游끢 View run run_1_RF_GridSearch at: http://127.0.0.1:5001/#/experiments/993790272377427900/runs/1e7d91931a594af2852c01f0546c487d.
2025/11/02 17:57:48 INFO mlflow.tracking._tracking_service.client: 游빍 View experiment at: http://127.0.0.1:5001/#/experiments/993790272377427900.


Registrando corrida 1 con par치metros: {'clf__max_depth': 5, 'clf__min_samples_split': 5, 'clf__n_estimators': 50}


TypeError: RandomForestClassifier.__init__() got an unexpected keyword argument 'clf__max_depth'

In [None]:
#FUNCIONANDO
import mlflow
import mlflow.data
import mlflow.sklearn

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier

# --- IMPORTACIONES DE M칄TRICAS ---
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    precision_score,
    recall_score,
    f1_score
)

# --- CONFIGURACI칍N DE RUTAS ---
DATASET_PATH = '../data/interim/student_interim_clean_for_model_2.csv' 
DATASET_NAME = 'student_entry_clean'

# --- 1. CARGA DE DATOS ---
try:
    df = pd.read_csv(DATASET_PATH)
    print(f"Dataset cargado desde: {DATASET_PATH}")
except FileNotFoundError:
    print(f"ERROR: Archivo no encontrado en {DATASET_PATH}. Revisa la ruta.")
    exit()

# --- 2. L칍GICA DVC: OBTENER EL HASH (DIGEST) ---
dvc_digest = None
dvc_file_path = DATASET_PATH + ".dvc"
if os.path.exists(dvc_file_path):
    try:
        with open(dvc_file_path, 'r') as f:
            dvc_data = yaml.safe_load(f)
        if 'outs' in dvc_data and dvc_data['outs']:
            dvc_digest = dvc_data['outs'][0].get('md5') 
            if not dvc_digest:
                dvc_digest = dvc_data['outs'][0].get('checksum') 
        print(f"DVC Digest encontrado: {dvc_digest}")
    except Exception as e:
        print(f"ADVERTENCIA: No se pudo leer el archivo DVC. Error: {e}")

# --- 3. PREPARACI칍N DE DATOS Y SPLIT ---
X = df.drop(columns=['Performance']) 
y = df['Performance']
cat_cols = ['Gender','Caste','coaching','time','Class_ten_education','twelve_education','medium','Class_ X_Percentage','Class_XII_Percentage','Father_occupation','Mother_occupation']

le = LabelEncoder()
y_enc = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_enc, test_size=0.2, stratify=y_enc, random_state=42)

# ---------------------------------------------------------------------
# --- 4. CONFIGURACI칍N DEL PIPELINE BASE Y LA GRILLA DE B칔SQUEDA ---
# ---------------------------------------------------------------------

# Pipeline base (solo preprocesamiento y clasificador, sin par치metros a칰n)
preprocessor = ColumnTransformer(
    transformers=[('ohe', OneHotEncoder(handle_unknown='ignore'), cat_cols)],
    remainder='drop'
)
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor), 
    ('clf', RandomForestClassifier(random_state=888))
])

# Grilla de Par치metros para GridSearchCV
param_grid = {
    'clf__n_estimators': [50, 100, 200],  # Cantidad de 치rboles
    'clf__max_depth': [5, 10, None],      # Profundidad m치xima
    'clf__min_samples_split': [5, 10]     # Muestras m칤nimas para dividir
}

# Configuraci칩n del Grid Search
grid_search = GridSearchCV(
    estimator=pipeline,
    param_grid=param_grid,
    scoring='f1_weighted',
    cv=5,                   
    verbose=2,
    n_jobs=1  # Corregido a 1 para evitar errores de paralelizaci칩n/pickling
)

# --- 5. ENTRENAMIENTO Y PREDICCIONES (Ejecuci칩n del Grid Search) ---
print("\nIniciando Grid Search...")
grid_search.fit(X_train, y_train)
print("Grid Search finalizado.")


# ---------------------------------------------------------------------
# --- 6. REGISTRO DE TODAS LAS CORRIDAS EN MLFLOW ---
# ---------------------------------------------------------------------

MLFLOW_MODEL_NAME = "Student_Performance_RF_Model" # Nombre para el Model Registry

mlflow.set_tracking_uri("http://127.0.0.1:5001")
EXPERIMENT_NAME = "randomforest_GridSearch"
mlflow.set_experiment(EXPERIMENT_NAME)

best_run_id = None
best_model_uri = None

# Iterar sobre CADA resultado de CV (Cross-Validation)
for i, (mean_score, std_score, params) in enumerate(zip(
    grid_search.cv_results_['mean_test_score'],
    grid_search.cv_results_['std_test_score'],
    grid_search.cv_results_['params']
)):
    
    with mlflow.start_run(run_name=f"run_{i+1}_RF_GridSearch", nested=True) as run:
        print(f"Registrando corrida {i+1} con par치metros: {params}")

        # --- CORRECCI칍N: LIMPIEZA DE PAR츼METROS ---
        # Remueve el prefijo 'clf__' para que RandomForestClassifier lo acepte
        clf_params = {k.replace('clf__', ''): v for k, v in params.items()}
        # ------------------------------------------
        
        # Reconstruir el pipeline con los par치metros actuales
        current_pipeline = Pipeline(steps=[
            ('preprocessor', preprocessor), 
            ('clf', RandomForestClassifier(random_state=888, **clf_params)) 
        ])
        
        # Reentrenar el modelo con el conjunto de entrenamiento COMPLETO para este registro
        current_pipeline.fit(X_train, y_train)
        
        # --- C치lculo de M칠tricas ---
        y_pred_test = current_pipeline.predict(X_test)
        
        acc_test = accuracy_score(y_test, y_pred_test)
        f1_micro = f1_score(y_test, y_pred_test, average='micro')
        f1_macro = f1_score(y_test, y_pred_test, average='macro')
        f1_weighted = f1_score(y_test, y_pred_test, average='weighted')
        report_text = classification_report(y_test, y_pred_test, target_names=le.classes_)
        
        metrics = {
            "cv_f1_weighted_mean": mean_score, 
            "cv_f1_weighted_std": std_score,
            "test_acc": acc_test,
            "test_f1_weighted": f1_weighted,
            "test_f1_macro": f1_macro,
        }
        
        # --- Registro en MLflow ---
        mlflow.log_params(params)
        mlflow.log_metrics(metrics)
        
        # 3. Guardar modelo (artefacto)
        model_info = mlflow.sklearn.log_model(current_pipeline, "random_forest_pipeline")
        
        # 4. Registrar Dataset y Reporte (artefactos)
        mlflow_dataset = mlflow.data.from_pandas(
            df=df, source=DATASET_PATH, targets=y.name, name=DATASET_NAME, digest=dvc_digest)
        mlflow.log_input(mlflow_dataset, context="training") 

        temp_report_path = f"classification_report_run_{i+1}.txt"
        with open(temp_report_path, "w") as f:
            f.write(report_text)
        mlflow.log_artifact(temp_report_path, artifact_path="report")
        os.remove(temp_report_path)

        # --- Identificaci칩n del Mejor Modelo ---
        if mean_score == grid_search.best_score_:
             print(f"!!! Este es el mejor modelo (F1-Weighted CV: {mean_score:.4f}) !!!")
             mlflow.set_tag("best_run", "True")
             best_run_id = run.info.run_id
             best_model_uri = model_info.model_uri # URI para el Model Registry

print("\n--- RESUMEN FINAL DE GRID SEARCH ---")
print(f"El mejor F1-Weighted (CV) es: {grid_search.best_score_:.4f}")
print(f"Los mejores par치metros son: {grid_search.best_params_}")


# ---------------------------------------------------------------------
# --- 7. REGISTRO FINAL DEL MEJOR MODELO EN MODEL REGISTRY ---
# ---------------------------------------------------------------------

if best_model_uri:
    print(f"\n--- REGISTRANDO EL MEJOR MODELO EN REGISTRY ---")
    try:
        # Registra el modelo como versi칩n 'initial' en el Registry
        model_version = mlflow.register_model(
            model_uri=best_model_uri,
            name=MLFLOW_MODEL_NAME
        )
        print(f"[OK] Modelo '{MLFLOW_MODEL_NAME}' registrado como versi칩n: {model_version.version}")
        print(f"URI del Modelo: {best_model_uri}")
    except Exception as e:
        print(f"[ERROR] No se pudo registrar el modelo en el Registry: {e}")

Dataset cargado desde: ../data/interim/student_interim_clean_for_model_2.csv
DVC Digest encontrado: 76db7197326a5942db9c5b100349f69b

Iniciando Grid Search...
Fitting 5 folds for each of 18 candidates, totalling 90 fits
[CV] END clf__max_depth=5, clf__min_samples_split=5, clf__n_estimators=50; total time=   0.0s
[CV] END clf__max_depth=5, clf__min_samples_split=5, clf__n_estimators=50; total time=   0.0s
[CV] END clf__max_depth=5, clf__min_samples_split=5, clf__n_estimators=50; total time=   0.0s
[CV] END clf__max_depth=5, clf__min_samples_split=5, clf__n_estimators=50; total time=   0.0s
[CV] END clf__max_depth=5, clf__min_samples_split=5, clf__n_estimators=50; total time=   0.0s
[CV] END clf__max_depth=5, clf__min_samples_split=5, clf__n_estimators=100; total time=   0.0s
[CV] END clf__max_depth=5, clf__min_samples_split=5, clf__n_estimators=100; total time=   0.0s
[CV] END clf__max_depth=5, clf__min_samples_split=5, clf__n_estimators=100; total time=   0.0s
[CV] END clf__max_depth=5

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _dataset_source_registry.resolve(
  return _dataset_source_registry.resolve(
2025/11/02 18:05:01 INFO mlflow.tracking._tracking_service.client: 游끢 View run run_1_RF_GridSearch at: http://127.0.0.1:5001/#/experiments/993790272377427900/runs/490fd4560e2a47ffac3bf861e8bddb97.
2025/11/02 18:05:01 INFO mlflow.tracking._tracking_service.client: 游빍 View experiment at: http://127.0.0.1:5001/#/experiments/993790272377427900.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Registrando corrida 2 con par치metros: {'clf__max_depth': 5, 'clf__min_samples_split': 5, 'clf__n_estimators': 100}


  return _dataset_source_registry.resolve(
  return _dataset_source_registry.resolve(
2025/11/02 18:07:22 INFO mlflow.tracking._tracking_service.client: 游끢 View run run_2_RF_GridSearch at: http://127.0.0.1:5001/#/experiments/993790272377427900/runs/2274bce565e0403a92509730fb5e4c3d.
2025/11/02 18:07:22 INFO mlflow.tracking._tracking_service.client: 游빍 View experiment at: http://127.0.0.1:5001/#/experiments/993790272377427900.


Registrando corrida 3 con par치metros: {'clf__max_depth': 5, 'clf__min_samples_split': 5, 'clf__n_estimators': 200}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _dataset_source_registry.resolve(
  return _dataset_source_registry.resolve(
2025/11/02 18:09:43 INFO mlflow.tracking._tracking_service.client: 游끢 View run run_3_RF_GridSearch at: http://127.0.0.1:5001/#/experiments/993790272377427900/runs/c9abceea195a44e19eb2a69fb5726af9.
2025/11/02 18:09:43 INFO mlflow.tracking._tracking_service.client: 游빍 View experiment at: http://127.0.0.1:5001/#/experiments/993790272377427900.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Registrando corrida 4 con par치metros: {'clf__max_depth': 5, 'clf__min_samples_split': 10, 'clf__n_estimators': 50}


  return _dataset_source_registry.resolve(
  return _dataset_source_registry.resolve(
2025/11/02 18:12:05 INFO mlflow.tracking._tracking_service.client: 游끢 View run run_4_RF_GridSearch at: http://127.0.0.1:5001/#/experiments/993790272377427900/runs/bb41b4a40aa74e69b80a3db3c19f3397.
2025/11/02 18:12:05 INFO mlflow.tracking._tracking_service.client: 游빍 View experiment at: http://127.0.0.1:5001/#/experiments/993790272377427900.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Registrando corrida 5 con par치metros: {'clf__max_depth': 5, 'clf__min_samples_split': 10, 'clf__n_estimators': 100}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _dataset_source_registry.resolve(
  return _dataset_source_registry.resolve(
2025/11/02 18:14:25 INFO mlflow.tracking._tracking_service.client: 游끢 View run run_5_RF_GridSearch at: http://127.0.0.1:5001/#/experiments/993790272377427900/runs/b5cbef462d68479c90686439bff274d1.
2025/11/02 18:14:25 INFO mlflow.tracking._tracking_service.client: 游빍 View experiment at: http://127.0.0.1:5001/#/experiments/993790272377427900.


Registrando corrida 6 con par치metros: {'clf__max_depth': 5, 'clf__min_samples_split': 10, 'clf__n_estimators': 200}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _dataset_source_registry.resolve(
  return _dataset_source_registry.resolve(
2025/11/02 18:16:47 INFO mlflow.tracking._tracking_service.client: 游끢 View run run_6_RF_GridSearch at: http://127.0.0.1:5001/#/experiments/993790272377427900/runs/e22ed7e21c744e1e8215ebb433d16cf7.
2025/11/02 18:16:47 INFO mlflow.tracking._tracking_service.client: 游빍 View experiment at: http://127.0.0.1:5001/#/experiments/993790272377427900.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Registrando corrida 7 con par치metros: {'clf__max_depth': 10, 'clf__min_samples_split': 5, 'clf__n_estimators': 50}


  return _dataset_source_registry.resolve(
  return _dataset_source_registry.resolve(
2025/11/02 18:19:06 INFO mlflow.tracking._tracking_service.client: 游끢 View run run_7_RF_GridSearch at: http://127.0.0.1:5001/#/experiments/993790272377427900/runs/c0a74d01aa8a4b9694aae7f31a7d0b6d.
2025/11/02 18:19:06 INFO mlflow.tracking._tracking_service.client: 游빍 View experiment at: http://127.0.0.1:5001/#/experiments/993790272377427900.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Registrando corrida 8 con par치metros: {'clf__max_depth': 10, 'clf__min_samples_split': 5, 'clf__n_estimators': 100}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _dataset_source_registry.resolve(
  return _dataset_source_registry.resolve(
2025/11/02 18:21:30 INFO mlflow.tracking._tracking_service.client: 游끢 View run run_8_RF_GridSearch at: http://127.0.0.1:5001/#/experiments/993790272377427900/runs/0aa81e8127014608a97ac6af755c94e7.
2025/11/02 18:21:30 INFO mlflow.tracking._tracking_service.client: 游빍 View experiment at: http://127.0.0.1:5001/#/experiments/993790272377427900.


Registrando corrida 9 con par치metros: {'clf__max_depth': 10, 'clf__min_samples_split': 5, 'clf__n_estimators': 200}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _dataset_source_registry.resolve(
  return _dataset_source_registry.resolve(
2025/11/02 18:23:52 INFO mlflow.tracking._tracking_service.client: 游끢 View run run_9_RF_GridSearch at: http://127.0.0.1:5001/#/experiments/993790272377427900/runs/8dd5bc56da7747cbbec4c648f1438984.
2025/11/02 18:23:52 INFO mlflow.tracking._tracking_service.client: 游빍 View experiment at: http://127.0.0.1:5001/#/experiments/993790272377427900.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Registrando corrida 10 con par치metros: {'clf__max_depth': 10, 'clf__min_samples_split': 10, 'clf__n_estimators': 50}


  return _dataset_source_registry.resolve(
  return _dataset_source_registry.resolve(
2025/11/02 18:26:29 INFO mlflow.tracking._tracking_service.client: 游끢 View run run_10_RF_GridSearch at: http://127.0.0.1:5001/#/experiments/993790272377427900/runs/4f2f958a4ffd4c20b91cace437871db0.
2025/11/02 18:26:29 INFO mlflow.tracking._tracking_service.client: 游빍 View experiment at: http://127.0.0.1:5001/#/experiments/993790272377427900.


!!! Este es el mejor modelo (F1-Weighted CV: 0.5031) !!!
Registrando corrida 11 con par치metros: {'clf__max_depth': 10, 'clf__min_samples_split': 10, 'clf__n_estimators': 100}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


: 