<a href="https://colab.research.google.com/github/dteso/AI-Mini-Trainer/blob/main/AI_mini_trainer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install gradio scikit-learn pandas plotly atomicwrites

Collecting gradio
  Downloading gradio-5.25.2-py3-none-any.whl.metadata (16 kB)
Collecting atomicwrites
  Downloading atomicwrites-1.4.1.tar.gz (14 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Download

In [None]:
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os, json, pickle, warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)
import tempfile

# ---------------------------
# IMPORTACIONES DE SKLEARN
# ---------------------------
from sklearn.datasets import (
    load_iris, load_wine, load_breast_cancer, load_digits,
    load_diabetes, fetch_california_housing, load_linnerud,
    make_regression, make_friedman1
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, r2_score, mean_squared_error
from sklearn.linear_model import (
    LogisticRegression, LinearRegression, RidgeClassifier, Ridge, Lasso,
    ElasticNet, SGDClassifier, SGDRegressor
)
from sklearn.ensemble import (
    RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier,
    GradientBoostingRegressor, AdaBoostClassifier, AdaBoostRegressor,
    BaggingClassifier, BaggingRegressor, ExtraTreesClassifier, ExtraTreesRegressor,
    HistGradientBoostingClassifier, HistGradientBoostingRegressor
)
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.svm import SVC, SVR
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from atomicwrites import atomic_write


# ---------------------------
# CONFIGURACIÓN: DATASETS Y MODELOS
# ---------------------------
available_datasets = {
    "Iris (Clasificación)": load_iris,
    "Wine (Clasificación)": load_wine,
    "Breast Cancer (Clasificación)": load_breast_cancer,
    "Digits (Clasificación)": load_digits,
    "Linnerud (Clasificación - multietiqueta)": load_linnerud,
    "Diabetes (Regresión)": load_diabetes,
    "California Housing (Regresión)": fetch_california_housing,
    "Friedman1 (Regresión sintética)": lambda: {
        "data": make_friedman1(n_samples=200, n_features=10, random_state=42)[0],
        "target": make_friedman1(n_samples=200, n_features=10, random_state=42)[1],
        "feature_names": [f"X{i}" for i in range(10)]
    },
    "Make Regression (Regresión sintética)": lambda: {
        "data": make_regression(n_samples=200, n_features=8, noise=0.1, random_state=42)[0],
        "target": make_regression(n_samples=200, n_features=8, noise=0.1, random_state=42)[1],
        "feature_names": [f"X{i}" for i in range(8)]
    },
}

classification_models = {
    "Logistic Regression": LogisticRegression,
    "KNN Classifier": KNeighborsClassifier,
    "Decision Tree Classifier": DecisionTreeClassifier,
    "Random Forest Classifier": RandomForestClassifier,
    "SVC": SVC,
    "Naive Bayes": GaussianNB,
}

regression_models = {
    "Linear Regression": LinearRegression,
    "Random Forest Regressor": RandomForestRegressor
}

available_models = {**classification_models, **regression_models}

# ---------------------------
# FUNCIONES DE APOYO
# ---------------------------
def load_dataset(name):
    """Carga el dataset y retorna (df, target)."""
    dataset = available_datasets[name]()
    df = pd.DataFrame(
        dataset["data"],
        columns=dataset.get("feature_names", [f"X{i}" for i in range(dataset["data"].shape[1])])
    )
    target = pd.Series(dataset["target"], name="target")
    return df, target

def is_classification_task(target):
    """Determina si el objetivo (target) es para clasificación."""
    return pd.Series(target).nunique() < 20 and pd.api.types.is_integer_dtype(target)

def show_dataset_with_target(dataset_name):
    """Para vista de ejemplo: un dataframe con columna 'class' textual, si aplica."""
    dataset = available_datasets[dataset_name]()
    df = pd.DataFrame(
        dataset["data"],
        columns=dataset.get("feature_names", [f"X{i}" for i in range(dataset["data"].shape[1])])
    )
    if "target_names" in dataset:
        try:
            labels = pd.Series(dataset["target"]).apply(lambda x: dataset["target_names"][x])
        except:
            labels = dataset["target"]
    else:
        labels = dataset["target"]
    df["class"] = labels
    label = f"Vista del Dataset ({len(df)} elementos)"
    return df, label

def update_model_choices(dataset_name):
    """Según el dataset, selecciona modelos de clasificación o regresión."""
    df, target = load_dataset(dataset_name)
    if is_classification_task(target):
        return gr.update(choices=list(classification_models.keys()),
                         value=list(classification_models.keys())[0])
    else:
        return gr.update(choices=list(regression_models.keys()),
                         value=list(regression_models.keys())[0])

def train_multiple_models(dataset_name, model_names, test_size):
    """Entrena varios modelos y retorna resultados (incluyendo métrica)."""
    df, target = load_dataset(dataset_name)
    X_train, X_test, y_train, y_test = train_test_split(df, target, test_size=test_size, random_state=42)
    results = []
    is_classif = is_classification_task(target)
    for name in model_names:
        ModelClass = available_models[name]
        model = ModelClass()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        if is_classif:
            metric = accuracy_score(y_test, y_pred)
            report = classification_report(y_test, y_pred, output_dict=True)
        else:
            metric = r2_score(y_test, y_pred)
            mse = mean_squared_error(y_test, y_pred)
            report = {"R2 Score": metric, "MSE": mse}
        results.append({
            "Modelo": name,
            "Precisión": metric,
            "Reporte": report,
        })
    return results

def plot_accuracy_comparison(results):
    """Barplot comparando la métrica (accuracy o R2)."""
    fig, ax = plt.subplots()
    metrics = [r["Precisión"] for r in results]
    sns.barplot(x=[r["Modelo"] for r in results], y=metrics, ax=ax)
    if all(0 <= m <= 1 for m in metrics):
        ax.set_ylim(0, 1)
        ax.set_ylabel("Precisión")
    else:
        ax.set_ylabel("R2 Score")
    ax.set_title("Comparación de Modelos")
    ax.tick_params(axis="x", rotation=45)
    plt.tight_layout()
    return fig

def export_reports_as_csv(results):
    """Devuelve un CSV con Modelo / Precisión."""
    df = pd.DataFrame([{"Modelo": r["Modelo"], "Precisión": r["Precisión"]} for r in results])
    return df.to_csv(index=False)

def full_training(dataset_name, selected_models, test_size):
    """Entrena varios modelos y retorna dataframe, gráfica y CSV."""
    results = train_multiple_models(dataset_name, selected_models, test_size)
    df_resultados = pd.DataFrame(
        [{"Modelo": r["Modelo"], "Precisión": round(r["Precisión"], 4)} for r in results]
    ).sort_values(by="Precisión", ascending=False)
    fig = plot_accuracy_comparison(results)
    csv_str = export_reports_as_csv(results)
    csv_path = "report.csv"
    with open(csv_path, "w") as f:
        f.write(csv_str)
    return df_resultados, fig, csv_path

def run_eda(dataset_name):
    """Devuelve la descripción estadística del dataset."""
    df, _ = load_dataset(dataset_name)
    return df.describe().reset_index()

def plot_eda(dataset_name):
    """Boxplot por feature."""
    df, _ = load_dataset(dataset_name)
    df_melted = df.melt(var_name="feature", value_name="valor")
    sns.set(style="whitegrid")
    fig, ax = plt.subplots(figsize=(12, 6))
    sns.boxplot(data=df_melted, x="feature", y="valor", hue="feature", palette="Set2", dodge=False, ax=ax)
    leg = ax.get_legend()
    if leg is not None:
        leg.remove()
    ax.set_title("Distribución por Feature (Boxplot)", fontsize=14, weight="bold")
    ax.set_xlabel("Feature", fontsize=12)
    ax.set_ylabel("Valor", fontsize=12)
    ax.tick_params(axis="x", labelrotation=30)
    plt.tight_layout()
    return fig

def run_pca(dataset_name):
    """PCA a 2 componentes y scatterplot."""
    df, _ = load_dataset(dataset_name)
    pca = PCA(n_components=2)
    components = pca.fit_transform(df)
    df_pca = pd.DataFrame(components, columns=["PC1", "PC2"])
    dataset = available_datasets[dataset_name]()
    if "target_names" in dataset:
        try:
            target_labels = pd.Series(dataset["target"]).apply(lambda x: dataset["target_names"][x])
        except:
            target_labels = dataset["target"]
    else:
        target_labels = dataset["target"]
    df_pca["Clase"] = target_labels
    sns.set(style="whitegrid")
    fig, ax = plt.subplots(figsize=(8, 6))
    sns.scatterplot(data=df_pca, x="PC1", y="PC2", hue="Clase", palette="Set2", s=60, ax=ax)
    ax.set_title("PCA - 2 Componentes", fontsize=14, weight="bold")
    ax.set_xlabel("PC1", fontsize=12)
    ax.set_ylabel("PC2", fontsize=12)
    ax.legend(title="Clase")
    plt.tight_layout()
    return fig

# ---------------------------
# HIPERPARÁMETROS DE MODELOS
# ---------------------------
model_params_demo = {
    "Logistic Regression": {
        "C": {"type": "slider", "min": 0.01, "max": 10.0, "value": 1.0, "step": 0.01, "label": "C (Regularización)"},
        "max_iter": {"type": "slider", "min": 100, "max": 2000, "value": 1000, "step": 100, "label": "Iteraciones Máx."}
    },
    "KNN Classifier": {
        "n_neighbors": {"type": "slider", "min": 1, "max": 30, "value": 5, "step": 1, "label": "n_neighbors"}
    },
    "Decision Tree Classifier": {
        "max_depth": {"type": "slider", "min": 1, "max": 20, "value": 10, "step": 1, "label": "max_depth"},
        "min_samples_split": {"type": "slider", "min": 2, "max": 20, "value": 2, "step": 1, "label": "min_samples_split"}
    },
    "Random Forest Classifier": {
        "n_estimators": {"type": "slider", "min": 10, "max": 200, "value": 100, "step": 10, "label": "n_estimators"},
        "max_depth": {"type": "slider", "min": 1, "max": 20, "value": 10, "step": 1, "label": "max_depth"}
    },
    "SVC": {
        "C": {"type": "slider", "min": 0.1, "max": 10.0, "value": 1.0, "step": 0.1, "label": "C"},
        "gamma": {"type": "slider", "min": 0.001, "max": 1.0, "value": 0.01, "step": 0.001, "label": "gamma"}
    },
    "Random Forest Regressor": {
        "n_estimators": {"type": "slider", "min": 10, "max": 200, "value": 100, "step": 10, "label": "n_estimators"},
        "max_depth": {"type": "slider", "min": 1, "max": 20, "value": 5, "step": 1, "label": "max_depth"}
    }
}

def fix_slider_params(params):
    """Convierte 'min' y 'max' en 'minimum' y 'maximum' para Gradio."""
    params = params.copy()
    if "min" in params:
        params["minimum"] = params.pop("min")
    if "max" in params:
        params["maximum"] = params.pop("max")
    return params

# ---------------------------
# GUARDAR/REGISTRAR MODELOS
# ---------------------------
def save_model_locally(model, model_save_name, headers, target_names=None):
    model_dir = "models"
    os.makedirs(model_dir, exist_ok=True)
    if not model_save_name.endswith(".pkl"):
        model_save_name += ".pkl"
    model_path = os.path.join(model_dir, model_save_name)

    # Guarda el modelo junto con la información de cabeceras y target_names (usando pickle)
    with open(model_path, "wb") as f:
        pickle.dump({"model": model, "headers": headers, "target_names": target_names}, f)

    # Convertir target_names a lista si es necesario para JSON
    if target_names is not None:
        if isinstance(target_names, np.ndarray):
            target_names_serializable = target_names.tolist()
        elif not isinstance(target_names, (list, tuple)):
            target_names_serializable = list(target_names)
        else:
            target_names_serializable = target_names
    else:
        target_names_serializable = None

    # Cargar (o inicializar) el registro actual
    registry_path = "model_registry.json"
    if os.path.exists(registry_path):
        try:
            with open(registry_path, "r") as f:
                registry = json.load(f)
        except json.JSONDecodeError:
            registry = {}
    else:
        registry = {}

    # Actualizar el registro con el nuevo modelo
    registry[model_save_name] = {
        "model_path": model_path,
        "headers": headers,
        "target_names": target_names_serializable
    }

    # Escribir el registro de forma atómica usando atomic_write
    try:
        with atomic_write(registry_path, overwrite=True, encoding="utf-8") as f:
            json.dump(registry, f, indent=4)
    except Exception as e:
        print(f"Error al escribir el registro de forma atómica: {e}")
        raise

    return model_path, registry


# ---------------------------
# ENTRENAMIENTO INDIVIDUAL
# ---------------------------
def train_one_model_with_save(dataset_name, model_name, test_size, val1, val2, val3, val4):
    """Entrena un modelo, retorna la métrica, la figura, la tabla, el 'model_state' y los hiperparámetros."""
    df, target = load_dataset(dataset_name)
    headers = df.columns.tolist()
    # Extraer target_names si existen
    dataset = available_datasets[dataset_name]()
    target_names = dataset.get("target_names", None)  # Podría ser None

    X_train, X_test, y_train, y_test = train_test_split(df, target, test_size=test_size, random_state=42)

    # Parametrización
    if model_name in classification_models:
        if model_name == "Logistic Regression":
            hyperparams = {"C": val1, "max_iter": int(round(val2))}
        elif model_name == "KNN Classifier":
            hyperparams = {"n_neighbors": int(round(val1))}
        elif model_name == "Decision Tree Classifier":
            hyperparams = {"max_depth": int(round(val1)), "min_samples_split": int(round(val2))}
        elif model_name == "Random Forest Classifier":
            hyperparams = {"n_estimators": int(round(val1)), "max_depth": int(round(val2))}
        elif model_name == "SVC":
            hyperparams = {"C": val1, "gamma": val2}
        else:
            hyperparams = {}
    else:
        # Regresión
        if model_name == "Random Forest Regressor":
            hyperparams = {"n_estimators": int(round(val1)), "max_depth": int(round(val2))}
        else:
            hyperparams = {}

    ModelClass = available_models.get(model_name)
    if not ModelClass:
        return "Modelo no válido", None, pd.DataFrame(), None, None

    model = ModelClass(**hyperparams)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Métrica y visual
    if model_name in classification_models:
        metric = accuracy_score(y_test, y_pred)
        metric_name = "Accuracy"
        from sklearn.metrics import confusion_matrix
        cm = confusion_matrix(y_test, y_pred)
        fig, ax = plt.subplots()
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)
        ax.set_title("Confusion Matrix")
        table_df = pd.DataFrame({"y_true": y_test, "y_pred": y_pred})
    else:
        metric = r2_score(y_test, y_pred)
        metric_name = "R2 Score"
        fig, ax = plt.subplots()
        ax.scatter(y_test, y_pred)
        ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--", lw=2)
        ax.set_xlabel("Actual")
        ax.set_ylabel("Predicted")
        ax.set_title("Actual vs Predicted")
        table_df = pd.DataFrame({"Actual": y_test, "Predicted": y_pred})

    model_state = {
        "model": model,
        "headers": headers,
        "target_names": target_names
    }
    return f"{metric_name}: {metric:.4f}", fig, table_df, model_state, hyperparams

def load_model_registry():
    registry_path = "model_registry.json"
    if os.path.exists(registry_path):
        try:
            with open(registry_path, "r") as f:
                registry = json.load(f)
            print("Registro cargado:", registry)  # Debug
            return list(registry.keys())
        except json.JSONDecodeError:
            print("Error en decodificación del JSON.")
            return []
    else:
        return []

# ---------------------------
# UTILS PARA LA PREDICCIÓN
# ---------------------------
MAX_FEATURES = 20

def refresh_model_list():
    return gr.update(choices=load_model_registry())

def update_prediction_ui(dataset_name):
    """Muestra/oculta textboxes según el dataset seleccionado (para pred. individual)."""
    df, _ = load_dataset(dataset_name)
    feature_names = df.columns.tolist()
    n = min(len(feature_names), MAX_FEATURES)
    updates = []
    for i in range(MAX_FEATURES):
        if i < n:
            updates.append(gr.update(label=feature_names[i], value="", visible=True))
        else:
            updates.append(gr.update(label=f"Feature_{i}", value="", visible=False))
    return updates

def update_textboxes_from_saved_model(selected_model_name):
    """Si el modelo guardado tiene cabeceras, se muestran esas. De lo contrario, se ocultan."""
    registry_path = "model_registry.json"
    if not os.path.exists(registry_path):
        return [gr.update(visible=False) for _ in range(MAX_FEATURES)]
    with open(registry_path, "r") as f:
        registry = json.load(f)
    if selected_model_name not in registry:
        return [gr.update(visible=False) for _ in range(MAX_FEATURES)]
    entry = registry[selected_model_name]
    if isinstance(entry, dict):
        headers = entry.get("headers", [])
    else:
        headers = []
    updates = []
    n = min(len(headers), MAX_FEATURES)
    for i in range(MAX_FEATURES):
        if i < n:
            updates.append(gr.update(label=headers[i], value="", visible=True))
        else:
            updates.append(gr.update(visible=False))
    return updates

def predict_model_combined(selected_model_name, excel_file, *features):
    """Predicción combinada: si se sube un Excel -> pred masiva; si no, pred individual."""
    registry_path = "model_registry.json"
    if not os.path.exists(registry_path):
        return None, "No hay modelos guardados."

    with open(registry_path, "r") as f:
        registry = json.load(f)

    if selected_model_name not in registry:
        return None, "Modelo no encontrado en el registro."

    model_path = registry[selected_model_name]["model_path"]
    try:
        with open(model_path, "rb") as ff:
            loaded_data = pickle.load(ff)
            model = loaded_data["model"]
            saved_headers = loaded_data["headers"]
            saved_target_names = loaded_data.get("target_names", None)
    except Exception as e:
        return None, f"Error al cargar el modelo: {e}"

    # Función para mapear un índice numérico a la clase real, si target_names existen
    def map_prediction(pred):
        if saved_target_names is not None:
            try:
                return saved_target_names[int(pred)]
            except Exception:
                return pred  # Por si hay error al indexar
        else:
            return pred

    # Modo masivo (archivo Excel)
    if excel_file is not None:
        try:
            df = pd.read_excel(excel_file.name)
        except Exception as e:
            return None, f"Error al leer el Excel: {e}"

        expected = set(saved_headers)
        found = set(df.columns)
        if not expected.issubset(found):
            return None, f"El Excel debe contener al menos estas columnas: {list(expected)}"

        X_df = df[saved_headers]
        try:
            preds = model.predict(X_df)
        except Exception as e:
            return None, f"Error en la predicción: {e}"

        # Si es clasificación y hay target_names, convertimos el número a la clase
        df['Predicción'] = [map_prediction(x) for x in preds]
        return df, ""  # Retorna la tabla y deja en blanco el texto de pred individual

    else:
        # Modo individual (textboxes)
        row_values = []
        for i in range(len(saved_headers)):
            val_str = features[i] if i < len(features) else ""
            if val_str.strip() == "":
                val = 0.0
            else:
                try:
                    val = float(val_str)
                except:
                    return None, "Error: todos los features deben ser numéricos."
            row_values.append(val)

        X_df = pd.DataFrame([row_values], columns=saved_headers)
        try:
            pred = model.predict(X_df)
        except Exception as e:
            return None, f"Error en la predicción: {e}"

        predicted_class = map_prediction(pred[0])
        return None, f"Predicción: {predicted_class}"

# ---------------------------
# INTERFAZ GRADIO PRINCIPAL
# ---------------------------
with gr.Blocks() as demo:
    # ----- Pestaña 1: Dataset + Modelos ML -----
    with gr.Tab("Dataset + Modelos ML"):
        gr.Markdown("## Visualización de Dataset y Comparación de Modelos (múltiples)")

        with gr.Row():
            dataset_selector = gr.Dropdown(
                choices=list(available_datasets.keys()),
                value="Iris (Clasificación)",
                label="Selecciona un dataset"
            )
            model_checkboxes = gr.CheckboxGroup(
                choices=list(classification_models.keys()),
                value=["Logistic Regression", "Random Forest Classifier", "SVC", "KNN Classifier", "Decision Tree Classifier", "Naive Bayes"],
                label="Modelos a entrenar"
            )
            test_size_slider = gr.Slider(
                minimum=0.1, maximum=0.5, value=0.3, step=0.05,
                label="Proporción de datos de prueba"
            )
        train_btn = gr.Button("Entrenar y Comparar")

        results_table = gr.Dataframe(
            headers=["Modelo", "Precisión"],
            label="Resultados Ordenados por Precisión"
        )
        accuracy_plot = gr.Plot(label="Gráfica de Precisión")
        csv_output = gr.File(label="Reporte CSV")

        train_btn.click(
            fn=full_training,
            inputs=[dataset_selector, model_checkboxes, test_size_slider],
            outputs=[results_table, accuracy_plot, csv_output]
        )

        # Vista del dataset
        initial_df, initial_label = show_dataset_with_target("Iris (Clasificación)")
        dataset_label = gr.Markdown(value=initial_label)
        data_table = gr.Dataframe(value=initial_df, label=None)

        dataset_selector.change(
            fn=show_dataset_with_target,
            inputs=dataset_selector,
            outputs=[data_table, dataset_label]
        )
        dataset_selector.change(
            fn=update_model_choices,
            inputs=dataset_selector,
            outputs=model_checkboxes
        )

        with gr.Tabs():
            with gr.Tab("EDA"):
                eda_table_nested = gr.Dataframe(
                    value=run_eda("Iris (Clasificación)"),
                    label="Descripción Estadística"
                )
                eda_plot_nested = gr.Plot(
                    value=plot_eda("Iris (Clasificación)"),
                    label="Distribución de Features"
                )
            with gr.Tab("PCA"):
                pca_output_nested = gr.Plot(
                    value=run_pca("Iris (Clasificación)"),
                    label="Visualización PCA"
                )

        dataset_selector.change(fn=run_eda, inputs=dataset_selector, outputs=eda_table_nested)
        dataset_selector.change(fn=plot_eda, inputs=dataset_selector, outputs=eda_plot_nested)
        dataset_selector.change(fn=run_pca, inputs=dataset_selector, outputs=pca_output_nested)

    # ----- Pestaña 2: Entrenamiento (1 modelo + guardado) -----
    with gr.Tab("Entrenamiento"):
        gr.Markdown("## Entrenamiento (un solo modelo) con hiperparámetros")

        dataset_selector_train = gr.Dropdown(
            choices=list(available_datasets.keys()),
            value="Iris (Clasificación)",
            label="Selecciona un dataset"
        )
        model_selector_train = gr.Dropdown(
            choices=list(classification_models.keys()),
            value=list(classification_models.keys())[0],
            label="Selecciona un modelo"
        )

        dataset_selector_train.change(
            fn=update_model_choices,
            inputs=dataset_selector_train,
            outputs=model_selector_train
        )

        test_size_slider_train = gr.Slider(
            minimum=0.1, maximum=0.5, value=0.3, step=0.05,
            label="Proporción de datos de prueba"
        )

        # Sliders: LR por defecto
        lr_C_params = fix_slider_params(model_params_demo["Logistic Regression"]["C"])
        lr_C_params.pop("type", None)
        lr_max_iter_params = fix_slider_params(model_params_demo["Logistic Regression"]["max_iter"])
        lr_max_iter_params.pop("type", None)

        rf_n_estimators_params = fix_slider_params(model_params_demo["Random Forest Regressor"]["n_estimators"])
        rf_n_estimators_params.pop("type", None)
        rf_max_depth_params = fix_slider_params(model_params_demo["Random Forest Regressor"]["max_depth"])
        rf_max_depth_params.pop("type", None)

        c_slider = gr.Slider(
            label=lr_C_params.get("label", "C"),
            minimum=lr_C_params["minimum"],
            maximum=lr_C_params["maximum"],
            value=lr_C_params["value"],
            step=lr_C_params["step"],
            visible=True
        )
        max_iter_slider = gr.Slider(
            label=lr_max_iter_params.get("label", "Iteraciones Máx."),
            minimum=lr_max_iter_params["minimum"],
            maximum=lr_max_iter_params["maximum"],
            value=lr_max_iter_params["value"],
            step=lr_max_iter_params["step"],
            visible=True
        )
        n_estimators_slider = gr.Slider(
            label=rf_n_estimators_params.get("label", "n_estimators"),
            minimum=rf_n_estimators_params["minimum"],
            maximum=rf_n_estimators_params["maximum"],
            value=rf_n_estimators_params["value"],
            step=rf_n_estimators_params["step"],
            visible=False
        )
        max_depth_slider = gr.Slider(
            label=rf_max_depth_params.get("label", "max_depth"),
            minimum=rf_max_depth_params["minimum"],
            maximum=rf_max_depth_params["maximum"],
            value=rf_max_depth_params["value"],
            step=rf_max_depth_params["step"],
            visible=False
        )

        def show_model_params(model_name):
            """Muestra/oculta sliders según el modelo elegido."""
            if model_name == "Logistic Regression":
                return (
                    gr.update(visible=True, value=lr_C_params["value"]),
                    gr.update(visible=True, value=lr_max_iter_params["value"]),
                    gr.update(visible=False),
                    gr.update(visible=False)
                )
            elif model_name == "KNN Classifier":
                return (
                    gr.update(visible=True, value=model_params_demo["KNN Classifier"]["n_neighbors"]["value"]),
                    gr.update(visible=False),
                    gr.update(visible=False),
                    gr.update(visible=False)
                )
            elif model_name == "Decision Tree Classifier":
                return (
                    gr.update(visible=True, value=model_params_demo["Decision Tree Classifier"]["max_depth"]["value"]),
                    gr.update(visible=True, value=model_params_demo["Decision Tree Classifier"]["min_samples_split"]["value"]),
                    gr.update(visible=False),
                    gr.update(visible=False)
                )
            elif model_name == "Random Forest Classifier":
                return (
                    gr.update(visible=True, value=model_params_demo["Random Forest Classifier"]["n_estimators"]["value"]),
                    gr.update(visible=True, value=model_params_demo["Random Forest Classifier"]["max_depth"]["value"]),
                    gr.update(visible=False),
                    gr.update(visible=False)
                )
            elif model_name == "SVC":
                return (
                    gr.update(visible=True, value=model_params_demo["SVC"]["C"]["value"]),
                    gr.update(visible=True, value=model_params_demo["SVC"]["gamma"]["value"]),
                    gr.update(visible=False),
                    gr.update(visible=False)
                )
            elif model_name == "Random Forest Regressor":
                return (
                    gr.update(visible=True, value=model_params_demo["Random Forest Regressor"]["n_estimators"]["value"]),
                    gr.update(visible=True, value=model_params_demo["Random Forest Regressor"]["max_depth"]["value"]),
                    gr.update(visible=False),
                    gr.update(visible=False)
                )
            else:
                return (
                    gr.update(visible=False), gr.update(visible=False),
                    gr.update(visible=False), gr.update(visible=False)
                )

        model_selector_train.change(
            fn=show_model_params,
            inputs=model_selector_train,
            outputs=[c_slider, max_iter_slider, n_estimators_slider, max_depth_slider]
        )

        train_button_single = gr.Button("Entrenar (1 modelo)")
        metric_output = gr.Markdown("Sin entrenamiento todavía")
        pred_plot = gr.Plot(label="Gráfica de Resultados")
        pred_table = gr.Dataframe(label="Tabla de Predicciones")
        trained_model_state = gr.State()

        def train_model(dataset_name, model_name, test_size, val1, val2, val3, val4):
            msg, fig, table, model_state, hyperparams = train_one_model_with_save(
                dataset_name, model_name, test_size, val1, val2, val3, val4
            )
            return msg, fig, table, model_state

        train_button_single.click(
            fn=train_model,
            inputs=[
                dataset_selector_train, model_selector_train, test_size_slider_train,
                c_slider, max_iter_slider, n_estimators_slider, max_depth_slider
            ],
            outputs=[metric_output, pred_plot, pred_table, trained_model_state]
        )

        with gr.Row():
            model_save_name_input = gr.Textbox(
                label="Nombre para guardar el modelo",
                placeholder="ej: mi_modelo.pkl"
            )
            save_button = gr.Button("Guardar modelo")

        def save_trained_model(model_state, save_name):
            if model_state is None:
                return "No hay modelo entrenado para guardar."
            model_obj = model_state.get("model")
            headers = model_state.get("headers")
            target_names = model_state.get("target_names")
            model_path, registry = save_model_locally(model_obj, save_name, headers, target_names)
            return f"Modelo guardado en: {model_path}\nRegistro:\n{json.dumps(registry, indent=2)}"

        save_button.click(
            fn=save_trained_model,
            inputs=[trained_model_state, model_save_name_input],
            outputs=metric_output
        )

    # ----- Pestaña 3: "Predicción" -----
    with gr.Tab("Predicción"):
        gr.Markdown("""
        ## Predicción

        1. **Selecciona un modelo guardado** (abajo).
        2. *Opcionalmente*, selecciona un dataset para ajustar los campos de la predicción individual.
        3. *Opcionalmente*, sube un archivo Excel con las columnas del modelo para predicción masiva.

        - Si subes Excel, verás la predicción en formato de tabla.
        - Si *no* subes Excel, puedes usar los inputs individuales para hacer una sola predicción.
        """)

        # --- Reorganizado: primero dataset, luego modelo ---
        dataset_selector_pred = gr.Dropdown(
            choices=list(available_datasets.keys()),
            value="Iris (Clasificación)",
            label="Selecciona un dataset (para actualizar inputs individuales)"
        )

        model_registry_dropdown = gr.Dropdown(
            choices=load_model_registry(),
            label="Modelos guardados"
        )
        refresh_models_btn = gr.Button("Refrescar modelos")

        refresh_models_btn.click(fn=refresh_model_list, inputs=[], outputs=model_registry_dropdown)

        # Inputs para predicción individual (hasta MAX_FEATURES)
        feature_textboxes = []
        for i in range(MAX_FEATURES):
            txt = gr.Textbox(label=f"Feature_{i}", visible=False)
            feature_textboxes.append(txt)

        dataset_selector_pred.change(
            fn=update_prediction_ui,
            inputs=dataset_selector_pred,
            outputs=feature_textboxes
        )


        # Al cambiar de modelo, actualizamos inputs con las cabeceras guardadas
        model_registry_dropdown.change(
            fn=update_textboxes_from_saved_model,
            inputs=model_registry_dropdown,
            outputs=feature_textboxes
        )

        # Botón para generar plantilla Excel
        plantilla_btn = gr.Button("Generar plantilla Excel")
        plantilla_output = gr.File(label="Descargar plantilla Excel", interactive=False)

        def generate_excel_template(selected_model_name):
            registry_path = "model_registry.json"
            if not os.path.exists(registry_path):
                return None
            with open(registry_path, "r") as f:
                registry = json.load(f)
            if selected_model_name not in registry:
                return None
            entry = registry[selected_model_name]
            if isinstance(entry, dict):
                headers = entry.get("headers", [])
            else:
                headers = []
            df = pd.DataFrame(columns=headers)
            temp_path = "plantilla.xlsx"
            df.to_excel(temp_path, index=False)
            return temp_path

        plantilla_btn.click(
            fn=generate_excel_template,
            inputs=model_registry_dropdown,
            outputs=plantilla_output
        )

        # Componente de carga de Excel
        excel_upload = gr.File(label="Cargar archivo Excel (opcional)", file_types=[".xlsx", ".xls"])

        predict_btn = gr.Button("Predecir")
        bulk_output = gr.Dataframe(label="Predicción en modo masivo (Excel)")
        single_output = gr.Markdown("Resultado de predicción individual")

        predict_btn.click(
            fn=predict_model_combined,
            inputs=[model_registry_dropdown, excel_upload] + feature_textboxes,
            outputs=[bulk_output, single_output]
        )

demo.launch(share=True, debug=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://f9df17f18015dfe9f7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Registro cargado: {'caner.pkl': {'model_path': 'models/caner.pkl', 'headers': ['mean radius', 'mean texture', 'mean perimeter', 'mean area', 'mean smoothness', 'mean compactness', 'mean concavity', 'mean concave points', 'mean symmetry', 'mean fractal dimension', 'radius error', 'texture error', 'perimeter error', 'area error', 'smoothness error', 'compactness error', 'concavity error', 'concave points error', 'symmetry error', 'fractal dimension error', 'worst radius', 'worst texture', 'worst perimeter', 'worst area', 'worst smoothness', 'worst compactness', 'worst concavity', 'worst concave points', 'worst symmetry', 'worst fractal dimension'], 'target_names': ['malignant', 'benign']}}
