<a href="https://colab.research.google.com/github/ivansst773/Aprendizaje_de_Maquina/blob/main/Parcial_2_TAM_2025_1/Parcial_2_TAM_2025_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Instalar dependencias en Colab
!pip install dash jupyter-dash umap-learn plotly scikit-learn tensorflow



In [None]:
# Importar bibliotecas
from dash import Dash, dcc, html, Input, Output
from jupyter_dash import JupyterDash
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_curve, auc
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
import umap
import base64
from io import BytesIO
from PIL import Image

In [None]:
# Cargar y preprocesar datos USPS
digits = load_digits()
X, y = digits.data / 255.0, digits.target  # Normalizar
images = digits.images  # Para superponer imágenes
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Proyecciones PCA y UMAP
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

umap_model = umap.UMAP(n_components=2, n_neighbors=15, random_state=42)
X_umap = umap_model.fit_transform(X)

  warn(


In [None]:
# Entrenar clasificadores
# 1. LogisticRegression
lr = LogisticRegression(C=1.0, multi_class='multinomial', solver='lbfgs', max_iter=1000)
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
report_lr = classification_report(y_test, y_pred_lr, output_dict=True)

# 2. RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
report_rf = classification_report(y_test, y_pred_rf, output_dict=True)

# 3. CNN
X_train_cnn = X_train.reshape(-1, 8, 8, 1)
X_test_cnn = X_test.reshape(-1, 8, 8, 1)
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(8, 8, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    # Removed the second MaxPooling2D layer
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # Changed metrics to a list
model.fit(X_train_cnn, y_train, epochs=10, batch_size=32, verbose=0)
y_pred_cnn = model.predict(X_test_cnn).argmax(axis=1)
report_cnn = classification_report(y_test, y_pred_cnn, output_dict=True)




Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step


In [None]:
# Calcular curvas ROC (para clase 0 como ejemplo)
fpr_lr, tpr_lr, _ = roc_curve(y_test == 0, lr.predict_proba(X_test)[:, 0])
roc_auc_lr = auc(fpr_lr, tpr_lr)
fpr_rf, tpr_rf, _ = roc_curve(y_test == 0, rf.predict_proba(X_test)[:, 0])
roc_auc_rf = auc(fpr_rf, tpr_rf)
fpr_cnn, tpr_cnn, _ = roc_curve(y_test == 0, model.predict(X_test_cnn)[:, 0])
roc_auc_cnn = auc(fpr_cnn, tpr_cnn)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


In [None]:
# Crear el dashboard
app = Dash(__name__)

# Descripciones teóricas (punto a)
model_descriptions = {
    "PCA": "Modelo: Proyecta datos en un subespacio que maximiza la varianza.\nOptimización: max tr(W^T X^T X W) s.t. W^T W = I.",
    "UMAP": "Modelo: Reducción no lineal que preserva estructura topológica.\nOptimización: min divergencia de entropía cruzada.",
    "GaussianNB": "Modelo: Clasificador probabilístico con suposición de independencia.\nOptimización: max log-verosimilitud.",
    "SGDClassifier": "Modelo: Clasificador lineal optimizado por descenso estocástico.\nOptimización: min pérdida regularizada.",
    "LogisticRegression": "Modelo: Predice probabilidad con sigmoide.\nOptimización: max log-verosimilitud regularizada.",
    "LinearDiscriminantAnalysis": "Modelo: Proyecta datos maximizando separación entre clases.\nOptimización: max razón de varianzas.",
    "KNeighborsClassifier": "Modelo: Predice por mayoría de k-vecinos.\nSin optimización explícita.",
    "SVC": "Modelo: Encuentra hiperplano de máximo margen.\nOptimización: min ||w||^2 + C sum(ξ).",
    "RandomForestClassifier": "Modelo: Ensamble de árboles de decisión.\nOptimización: min impureza por nodo.",
    "GaussianProcessClassifier": "Modelo: Proceso gaussiano para clasificación.\nOptimización: max log-verosimilitud marginal.",
    "DeepLearning": "Modelo: Redes neuronales profundas.\nOptimización: min pérdida (e.g., cross-entropy)."
}

# Layout del dashboard
app.layout = html.Div([
    html.H1("Dashboard Parcial 2: Teoría de Aprendizaje de Máquina", style={'textAlign': 'center'}),

    # Sección teórica (punto a)
    html.H2("Modelos y Optimización"),
    dcc.Dropdown(
        id='model-selector',
        options=[{'label': model, 'value': model} for model in model_descriptions.keys()],
        value='PCA',
        style={'width': '50%'}
    ),
    html.Div(id='model-description', style={'margin': '20px'}),

    # Proyecciones PCA y UMAP (punto b)
    html.H2("Proyecciones del Conjunto USPS"),
    html.H3("PCA"),
    dcc.Graph(id='pca-plot'),
    html.H3("UMAP"),
    dcc.Dropdown(
        id='umap-neighbors',
        options=[{'label': str(i), 'value': i} for i in [5, 15, 50]],
        value=15,
        style={'width': '50%'}
    ),
    dcc.Graph(id='umap-plot'),

    # Resultados de clasificación (punto c)
    html.H2("Resultados de Clasificación"),
    dcc.Dropdown(
        id='classifier-selector',
        options=[
            {'label': 'Logistic Regression', 'value': 'lr'},
            {'label': 'Random Forest', 'value': 'rf'},
            {'label': 'CNN', 'value': 'cnn'}
        ],
        value='lr',
        style={'width': '50%'}
    ),
    html.Table(id='metrics-table', style={'margin': '20px'}),
    dcc.Graph(id='roc-plot')
])

In [None]:
# Callback para actualizar descripción del modelo
@app.callback(
    Output('model-description', 'children'),
    Input('model-selector', 'value')
)
def update_model_description(model):
    return dcc.Markdown(model_descriptions[model])

In [None]:
# Callback para actualizar gráfico PCA
@app.callback(
    Output('pca-plot', 'figure'),
    Input('pca-plot', 'id')  # Dummy input para inicializar
)
def update_pca_plot(_):
    fig = px.scatter(x=X_pca[:, 0], y=X_pca[:, 1], color=y, labels={'x': 'PC1', 'y': 'PC2'}, title="Proyección PCA")
    # Superponer imágenes representativas
    for i in range(10):
        idx = np.where(y == i)[0][0]
        img = images[idx]
        img_pil = Image.fromarray((img * 255).astype(np.uint8))
        buffered = BytesIO()
        img_pil.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        fig.add_layout_image(
            dict(
                source=f"data:image/png;base64,{img_str}",
                x=X_pca[idx, 0], y=X_pca[idx, 1],
                xref="x", yref="y",
                sizex=0.5, sizey=0.5,
                opacity=0.8
            )
        )
    return fig

In [None]:
# Callback para actualizar gráfico UMAP
@app.callback(
    Output('umap-plot', 'figure'),
    Input('umap-neighbors', 'value')
)
def update_umap_plot(n_neighbors):
    umap_model = umap.UMAP(n_components=2, n_neighbors=n_neighbors, random_state=42)
    X_umap = umap_model.fit_transform(X)
    fig = px.scatter(x=X_umap[:, 0], y=X_umap[:, 1], color=y, labels={'x': 'UMAP1', 'y': 'UMAP2'}, title=f"Proyección UMAP (n_neighbors={n_neighbors})")
    # Superponer imágenes
    for i in range(10):
        idx = np.where(y == i)[0][0]
        img = images[idx]
        img_pil = Image.fromarray((img * 255).astype(np.uint8))
        buffered = BytesIO()
        img_pil.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        fig.add_layout_image(
            dict(
                source=f"data:image/png;base64,{img_str}",
                x=X_umap[idx, 0], y=X_umap[idx, 1],
                xref="x", yref="y",
                sizex=0.5, sizey=0.5,
                opacity=0.8
            )
        )
    return fig

In [None]:
# Callback para actualizar tabla de métricas y curva ROC
@app.callback(
    [Output('metrics-table', 'children'), Output('roc-plot', 'figure')],
    Input('classifier-selector', 'value')
)
def update_classifier_results(classifier):
    if classifier == 'lr':
        report = report_lr
        fpr, tpr, roc_auc = fpr_lr, tpr_lr, roc_auc_lr
        title = "Logistic Regression"
    elif classifier == 'rf':
        report = report_rf
        fpr, tpr, roc_auc = fpr_rf, tpr_rf, roc_auc_rf
        title = "Random Forest"
    else:
        report = report_cnn
        fpr, tpr, roc_auc = fpr_cnn, tpr_cnn, roc_auc_cnn
        title = "CNN"

    # Crear tabla de métricas
    table = [
        html.Tr([html.Td("Métrica"), html.Td("Valor")]),
        html.Tr([html.Td("Precisión"), html.Td(f"{report['accuracy']:.3f}")]),
        html.Tr([html.Td("F1-Score (macro)"), html.Td(f"{report['macro avg']['f1-score']:.3f}")])
    ]

    # Crear curva ROC
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC (AUC = {roc_auc:.2f})'))
    fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', name='Línea base', line=dict(dash='dash')))
    fig.update_layout(title=f"Curva ROC - {title}", xaxis_title="Tasa de Falsos Positivos", yaxis_title="Tasa de Verdaderos Positivos")

    return table, fig

In [None]:
# Ejecutar el dashboard
app.run(mode='inline')

<IPython.core.display.Javascript object>

# Task
Convert the provided Python code for a Dash dashboard analyzing the USPS digits dataset, including data loading, preprocessing, PCA and UMAP projections, training of Logistic Regression, Random Forest, and CNN models, and displaying results and visualizations, into a Streamlit application, ensuring all functionalities and visualizations are preserved and the application can be run using `streamlit run`.

## Preparar el entorno streamlit

### Subtask:
Asegurarse de tener Streamlit instalado (`pip install streamlit`).


**Reasoning**:
Install the streamlit library using pip.



In [None]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.46.1-py3-none-any.whl.metadata (9.0 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.46.1-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m38.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m57.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hI

## Reutilizar el código de análisis de datos

### Subtask:
Reutilizar el código de análisis de datos


**Reasoning**:
Copy the necessary code blocks for data loading, preprocessing, dimensionality reduction, model training, and ROC curve calculation from the existing notebook into a single code block for the Streamlit script.



In [None]:
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_curve, auc
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
import umap
import base64
from io import BytesIO
from PIL import Image

# Cargar y preprocesar datos USPS
digits = load_digits()
X, y = digits.data / 255.0, digits.target  # Normalizar
images = digits.images  # Para superponer imágenes
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Proyecciones PCA y UMAP
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

umap_model = umap.UMAP(n_components=2, n_neighbors=15, random_state=42)
X_umap = umap_model.fit_transform(X)

# Entrenar clasificadores
# 1. LogisticRegression
lr = LogisticRegression(C=1.0, multi_class='multinomial', solver='lbfgs', max_iter=1000)
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
report_lr = classification_report(y_test, y_pred_lr, output_dict=True)

# 2. RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
report_rf = classification_report(y_test, y_pred_rf, output_dict=True)

# 3. CNN
X_train_cnn = X_train.reshape(-1, 8, 8, 1)
X_test_cnn = X_test.reshape(-1, 8, 8, 1)
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(8, 8, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_cnn, y_train, epochs=10, batch_size=32, verbose=0)
y_pred_cnn = model.predict(X_test_cnn).argmax(axis=1)
report_cnn = classification_report(y_test, y_pred_cnn, output_dict=True)

# Calcular curvas ROC (para clase 0 como ejemplo)
fpr_lr, tpr_lr, _ = roc_curve(y_test == 0, lr.predict_proba(X_test)[:, 0])
roc_auc_lr = auc(fpr_lr, tpr_lr)
fpr_rf, tpr_rf, _ = roc_curve(y_test == 0, rf.predict_proba(X_test)[:, 0])
roc_auc_rf = auc(fpr_rf, tpr_rf)
fpr_cnn, tpr_cnn, _ = roc_curve(y_test == 0, model.predict(X_test_cnn)[:, 0])
roc_auc_cnn = auc(fpr_cnn, tpr_cnn)


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.




Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


## Diseñar la interfaz de usuario con streamlit

### Subtask:
Utilizar los componentes de Streamlit (`st.title`, `st.header`, `st.write`, `st.selectbox`, `st.slider`, `st.table`, `st.plotly_chart`, etc.) para recrear el diseño del dashboard de Dash (secciones teóricas, proyecciones, resultados).


**Reasoning**:
Use Streamlit components to create the layout of the dashboard, including titles, headers, selectboxes, a slider, and placeholders for plots and tables, following the provided instructions.



In [None]:
# Descripciones teóricas (punto a)
model_descriptions = {
    "PCA": "Modelo: Proyecta datos en un subespacio que maximiza la varianza.\nOptimización: max tr(W^T X^T X W) s.t. W^T W = I.",
    "UMAP": "Modelo: Reducción no lineal que preserva estructura topológica.\nOptimización: min divergencia de entropía cruzada.",
    "GaussianNB": "Modelo: Clasificador probabilístico con suposición de independencia.\nOptimización: max log-verosimilitud.",
    "SGDClassifier": "Modelo: Clasificador lineal optimizado por descenso estocástico.\nOptimización: min pérdida regularizada.",
    "LogisticRegression": "Modelo: Predice probabilidad con sigmoide.\nOptimización: max log-verosimilitud regularizada.",
    "LinearDiscriminantAnalysis": "Modelo: Proyecta datos maximizando separación entre clases.\nOptimización: max razón de varianzas.",
    "KNeighborsClassifier": "Modelo: Predice por mayoría de k-vecinos.\nSin optimización explícita.",
    "SVC": "Modelo: Encuentra hiperplano de máximo margen.\nOptimización: min ||w||^2 + C sum(ξ).",
    "RandomForestClassifier": "Modelo: Ensamble de árboles de decisión.\nOptimización: min impureza por nodo.",
    "GaussianProcessClassifier": "Modelo: Proceso gaussiano para clasificación.\nOptimización: max log-verosimilitud marginal.",
    "DeepLearning": "Modelo: Redes neuronales profundas.\nOptimización: min pérdida (e.g., cross-entropy)."
}

st.title("Dashboard Parcial 2: Teoría de Aprendizaje de Máquina")

# Sección teórica (punto a)
st.header("Modelos y Optimización")
selected_model = st.selectbox(
    "Selecciona un modelo:",
    list(model_descriptions.keys())
)
st.markdown(model_descriptions[selected_model])

# Proyecciones PCA y UMAP (punto b)
st.header("Proyecciones del Conjunto USPS")
st.subheader("PCA")
# Placeholder for PCA plot
pca_plot_placeholder = st.empty()

st.subheader("UMAP")
n_neighbors_umap = st.slider(
    "Número de vecinos para UMAP:",
    min_value=5,
    max_value=50,
    value=15,
    step=1
)
# Placeholder for UMAP plot
umap_plot_placeholder = st.empty()

# Resultados de clasificación (punto c)
st.header("Resultados de Clasificación")
selected_classifier = st.selectbox(
    "Selecciona un clasificador:",
    ('Logistic Regression', 'Random Forest', 'CNN')
)
# Placeholder for metrics table
metrics_table_placeholder = st.empty()
# Placeholder for ROC plot
roc_plot_placeholder = st.empty()

2025-07-16 01:46:25.898 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-07-16 01:46:25.917 Session state does not function when running a script without `streamlit run`


## Implementar la interactividad

### Subtask:
Adaptar la lógica que en Dash se manejaba con "callbacks" para que funcione con el modelo reactivo de Streamlit. Esto implica que los cambios en los widgets de entrada (selectores, sliders) disparen la re-ejecución de partes del script para actualizar los elementos de salida (descripciones, gráficos, tablas).


**Reasoning**:
Wrap the plotting and metrics display logic in functions to handle reactively updating the Streamlit interface based on user input.



In [None]:
def generate_pca_plot():
    """Generates the PCA plot with representative images."""
    fig = px.scatter(x=X_pca[:, 0], y=X_pca[:, 1], color=y, labels={'x': 'PC1', 'y': 'PC2'}, title="Proyección PCA")
    for i in range(10):
        idx = np.where(y == i)[0][0]
        img = images[idx]
        img_pil = Image.fromarray((img * 255).astype(np.uint8))
        buffered = BytesIO()
        img_pil.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        fig.add_layout_image(
            dict(
                source=f"data:image/png;base64,{img_str}",
                x=X_pca[idx, 0], y=X_pca[idx, 1],
                xref="x", yref="y",
                sizex=0.5, sizey=0.5,
                opacity=0.8
            )
        )
    return fig

def generate_umap_plot(n_neighbors):
    """Generates the UMAP plot with representative images for a given number of neighbors."""
    umap_model = umap.UMAP(n_components=2, n_neighbors=n_neighbors, random_state=42)
    X_umap = umap_model.fit_transform(X)
    fig = px.scatter(x=X_umap[:, 0], y=X_umap[:, 1], color=y, labels={'x': 'UMAP1', 'y': 'UMAP2'}, title=f"Proyección UMAP (n_neighbors={n_neighbors})")
    for i in range(10):
        idx = np.where(y == i)[0][0]
        img = images[idx]
        img_pil = Image.fromarray((img * 255).astype(np.uint8))
        buffered = BytesIO()
        img_pil.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        fig.add_layout_image(
            dict(
                source=f"data:image/png;base64,{img_str}",
                x=X_umap[idx, 0], y=X_umap[idx, 1],
                xref="x", yref="y",
                sizex=0.5, sizey=0.5,
                opacity=0.8
            )
        )
    return fig

def generate_classifier_results(classifier):
    """Generates the metrics table and ROC plot for a given classifier."""
    if classifier == 'Logistic Regression':
        report = report_lr
        fpr, tpr, roc_auc = fpr_lr, tpr_lr, roc_auc_lr
        title = "Logistic Regression"
    elif classifier == 'Random Forest':
        report = report_rf
        fpr, tpr, roc_auc = fpr_rf, tpr_rf, roc_auc_rf
        title = "Random Forest"
    else: # CNN
        report = report_cnn
        fpr, tpr, roc_auc = fpr_cnn, tpr_cnn, roc_auc_cnn
        title = "CNN"

    # Create metrics table
    metrics_html = "<table>"
    metrics_html += "<tr><td>Metric</td><td>Value</td></tr>"
    metrics_html += f"<tr><td>Accuracy</td><td>{report['accuracy']:.3f}</td></tr>"
    metrics_html += f"<tr><td>F1-Score (macro)</td><td>{report['macro avg']['f1-score']:.3f}</td></tr>"
    metrics_html += "</table>"


    # Create ROC curve
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC (AUC = {roc_auc:.2f})'))
    fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', name='Baseline', line=dict(dash='dash')))
    fig.update_layout(title=f"Curva ROC - {title}", xaxis_title="False Positive Rate", yaxis_title="True Positive Rate")

    return metrics_html, fig

# Display PCA plot
pca_plot_placeholder.plotly_chart(generate_pca_plot())

# Display UMAP plot
umap_plot_placeholder.plotly_chart(generate_umap_plot(n_neighbors_umap))

# Display classifier results
metrics_html, roc_fig = generate_classifier_results(selected_classifier)
metrics_table_placeholder.markdown(metrics_html, unsafe_allow_html=True)
roc_plot_placeholder.plotly_chart(roc_fig)


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



DeltaGenerator()

## Integrar las visualizaciones de plotly

### Subtask:
Integrar las visualizaciones de plotly


**Reasoning**:
Integrate the plotly visualizations into the Streamlit application using `st.plotly_chart()` for the PCA, UMAP, and ROC curve plots, ensuring they are displayed in their respective placeholders using the figures generated by the helper functions.



In [None]:
# Display PCA plot
pca_fig = generate_pca_plot()
pca_plot_placeholder.plotly_chart(pca_fig)

# Display UMAP plot
umap_fig = generate_umap_plot(n_neighbors_umap)
umap_plot_placeholder.plotly_chart(umap_fig)

# Display classifier results (metrics table and ROC plot)
metrics_html, roc_fig = generate_classifier_results(selected_classifier)
metrics_table_placeholder.markdown(metrics_html, unsafe_allow_html=True)
roc_plot_placeholder.plotly_chart(roc_fig)


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



DeltaGenerator()

## Calcular y mostrar métricas/roc

### Subtask:
Calcular y mostrar métricas/roc


## Estructurar el script de streamlit

### Subtask:
Organizar el código en un único script Python (`.py`) que Streamlit pueda ejecutar.


**Reasoning**:
Combine all the code into a single Python script file.



In [None]:
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_curve, auc
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
import umap
import base64
from io import BytesIO
from PIL import Image

# Cargar y preprocesar datos USPS
@st.cache_resource
def load_data():
    """Loads and preprocesses the USPS digits dataset."""
    digits = load_digits()
    X, y = digits.data / 255.0, digits.target  # Normalizar
    images = digits.images  # Para superponer imágenes
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    return X, y, images, X_train, X_test, y_train, y_test

X, y, images, X_train, X_test, y_train, y_test = load_data()

# Proyecciones PCA y UMAP
@st.cache_resource
def perform_dimensionality_reduction(X, n_neighbors_umap=15):
    """Performs PCA and UMAP dimensionality reduction."""
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X)

    umap_model = umap.UMAP(n_components=2, n_neighbors=n_neighbors_umap, random_state=42)
    X_umap = umap_model.fit_transform(X)
    return X_pca, X_umap

# Entrenar clasificadores
@st.cache_resource
def train_classifiers(X_train, X_test, y_train, y_test):
    """Trains Logistic Regression, Random Forest, and CNN classifiers."""
    # 1. LogisticRegression
    lr = LogisticRegression(C=1.0, multi_class='multinomial', solver='lbfgs', max_iter=1000)
    lr.fit(X_train, y_train)
    y_pred_lr = lr.predict(X_test)
    report_lr = classification_report(y_test, y_pred_lr, output_dict=True)

    # 2. RandomForestClassifier
    rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
    rf.fit(X_train, y_train)
    y_pred_rf = rf.predict(X_test)
    report_rf = classification_report(y_test, y_pred_rf, output_dict=True)

    # 3. CNN
    X_train_cnn = X_train.reshape(-1, 8, 8, 1)
    X_test_cnn = X_test.reshape(-1, 8, 8, 1)
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(8, 8, 1)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train_cnn, y_train, epochs=10, batch_size=32, verbose=0)
    y_pred_cnn = model.predict(X_test_cnn).argmax(axis=1)
    report_cnn = classification_report(y_test, y_pred_cnn, output_dict=True)

    return lr, report_lr, rf, report_rf, model, report_cnn

lr, report_lr, rf, report_rf, cnn_model, report_cnn = train_classifiers(X_train, X_test, y_train, y_test)


# Calcular curvas ROC (para clase 0 como ejemplo)
@st.cache_resource
def calculate_roc_curves(lr, rf, cnn_model, X_test, y_test):
    """Calculates ROC curves and AUC for class 0."""
    X_test_cnn = X_test.reshape(-1, 8, 8, 1)
    fpr_lr, tpr_lr, _ = roc_curve(y_test == 0, lr.predict_proba(X_test)[:, 0])
    roc_auc_lr = auc(fpr_lr, tpr_lr)
    fpr_rf, tpr_rf, _ = roc_curve(y_test == 0, rf.predict_proba(X_test)[:, 0])
    roc_auc_rf = auc(fpr_rf, tpr_rf)
    fpr_cnn, tpr_cnn, _ = roc_curve(y_test == 0, cnn_model.predict(X_test_cnn)[:, 0])
    roc_auc_cnn = auc(fpr_cnn, tpr_cnn)
    return fpr_lr, tpr_lr, roc_auc_lr, fpr_rf, tpr_rf, roc_auc_rf, fpr_cnn, tpr_cnn, roc_auc_cnn

fpr_lr, tpr_lr, roc_auc_lr, fpr_rf, tpr_rf, roc_auc_rf, fpr_cnn, tpr_cnn, roc_auc_cnn = calculate_roc_curves(lr, rf, cnn_model, X_test, y_test)

# Descripciones teóricas (punto a)
model_descriptions = {
    "PCA": "Modelo: Proyecta datos en un subespacio que maximiza la varianza.\nOptimización: max tr(W^T X^T X W) s.t. W^T W = I.",
    "UMAP": "Modelo: Reducción no lineal que preserva estructura topológica.\nOptimización: min divergencia de entropía cruzada.",
    "GaussianNB": "Modelo: Clasificador probabilístico con suposición de independencia.\nOptimización: max log-verosimilitud.",
    "SGDClassifier": "Modelo: Clasificador lineal optimizado por descenso estocástico.\nOptimización: min pérdida regularizada.",
    "LogisticRegression": "Modelo: Predice probabilidad con sigmoide.\nOptimización: max log-verosimilitud regularizada.",
    "LinearDiscriminantAnalysis": "Modelo: Proyecta datos maximizando separación entre clases.\nOptimización: max razón de varianzas.",
    "KNeighborsClassifier": "Modelo: Predice por mayoría de k-vecinos.\nSin optimización explícita.",
    "SVC": "Modelo: Encuentra hiperplano de máximo margen.\nOptimización: min ||w||^2 + C sum(ξ).",
    "RandomForestClassifier": "Modelo: Ensamble de árboles de decisión.\nOptimización: min impureza por nodo.",
    "GaussianProcessClassifier": "Modelo: Proceso gaussiano para clasificación.\nOptimización: max log-verosimilitud marginal.",
    "DeepLearning": "Modelo: Redes neuronales profundas.\nOptimización: min pérdida (e.g., cross-entropy)."
}

def generate_pca_plot(X_pca, y, images):
    """Generates the PCA plot with representative images."""
    fig = px.scatter(x=X_pca[:, 0], y=X_pca[:, 1], color=y, labels={'x': 'PC1', 'y': 'PC2'}, title="Proyección PCA")
    for i in range(10):
        idx = np.where(y == i)[0][0]
        img = images[idx]
        img_pil = Image.fromarray((img * 255).astype(np.uint8))
        buffered = BytesIO()
        img_pil.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        fig.add_layout_image(
            dict(
                source=f"data:image/png;base64,{img_str}",
                x=X_pca[idx, 0], y=X_pca[idx, 1],
                xref="x", yref="y",
                sizex=0.5, sizey=0.5,
                opacity=0.8
            )
        )
    return fig

def generate_umap_plot(X_umap, y, images, n_neighbors):
    """Generates the UMAP plot with representative images for a given number of neighbors."""
    fig = px.scatter(x=X_umap[:, 0], y=X_umap[:, 1], color=y, labels={'x': 'UMAP1', 'y': 'UMAP2'}, title=f"Proyección UMAP (n_neighbors={n_neighbors})")
    for i in range(10):
        idx = np.where(y == i)[0][0]
        img = images[idx]
        img_pil = Image.fromarray((img * 255).astype(np.uint8))
        buffered = BytesIO()
        img_pil.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        fig.add_layout_image(
            dict(
                source=f"data:image/png;base64,{img_str}",
                x=X_umap[idx, 0], y=X_umap[idx, 1],
                xref="x", yref="y",
                sizex=0.5, sizey=0.5,
                opacity=0.8
            )
        )
    return fig

def generate_classifier_results(classifier, report_lr, report_rf, report_cnn, fpr_lr, tpr_lr, roc_auc_lr, fpr_rf, tpr_rf, roc_auc_rf, fpr_cnn, tpr_cnn, roc_auc_cnn):
    """Generates the metrics table and ROC plot for a given classifier."""
    if classifier == 'Logistic Regression':
        report = report_lr
        fpr, tpr, roc_auc = fpr_lr, tpr_lr, roc_auc_lr
        title = "Logistic Regression"
    elif classifier == 'Random Forest':
        report = report_rf
        fpr, tpr, roc_auc = fpr_rf, tpr_rf, roc_auc_rf
        title = "Random Forest"
    else: # CNN
        report = report_cnn
        fpr, tpr, roc_auc = fpr_cnn, tpr_cnn, roc_auc_cnn
        title = "CNN"

    # Create metrics table
    metrics_html = "<table>"
    metrics_html += "<tr><td>Metric</td><td>Value</td></tr>"
    metrics_html += f"<tr><td>Accuracy</td><td>{report['accuracy']:.3f}</td></tr>"
    metrics_html += f"<tr><td>F1-Score (macro)</td><td>{report['macro avg']['f1-score']:.3f}</td></tr>"
    metrics_html += "</table>"


    # Create ROC curve
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC (AUC = {roc_auc:.2f})'))
    fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', name='Baseline', line=dict(dash='dash')))
    fig.update_layout(title=f"Curva ROC - {title}", xaxis_title="False Positive Rate", yaxis_title="True Positive Rate")

    return metrics_html, fig

st.title("Dashboard Parcial 2: Teoría de Aprendizaje de Máquina")

# Sección teórica (punto a)
st.header("Modelos y Optimización")
selected_model = st.selectbox(
    "Selecciona un modelo:",
    list(model_descriptions.keys())
)
st.markdown(model_descriptions[selected_model])

# Proyecciones PCA y UMAP (punto b)
st.header("Proyecciones del Conjunto USPS")
st.subheader("PCA")
# Display PCA plot
pca_fig = generate_pca_plot(X_pca, y, images)
st.plotly_chart(pca_fig)

st.subheader("UMAP")
n_neighbors_umap = st.slider(
    "Número de vecinos para UMAP:",
    min_value=5,
    max_value=50,
    value=15,
    step=1
)
# Perform UMAP with selected neighbors and display plot
X_pca_updated, X_umap_updated = perform_dimensionality_reduction(X, n_neighbors_umap)
umap_fig = generate_umap_plot(X_umap_updated, y, images, n_neighbors_umap)
st.plotly_chart(umap_fig)

# Resultados de clasificación (punto c)
st.header("Resultado de Clasificación")
selected_classifier = st.selectbox(
    "Selecciona un clasificador:",
    ('Logistic Regression', 'Random Forest', 'CNN')
)

# Display classifier results
metrics_html, roc_fig = generate_classifier_results(selected_classifier, report_lr, report_rf, report_cnn, fpr_lr, tpr_lr, roc_auc_lr, fpr_rf, tpr_rf, roc_auc_rf, fpr_cnn, tpr_cnn, roc_auc_cnn)
st.markdown(metrics_html, unsafe_allow_html=True)
st.plotly_chart(roc_fig)




Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step




UnhashableParamError: Cannot hash argument 'lr' (of type `sklearn.linear_model._logistic.LogisticRegression`) in 'calculate_roc_curves'.

To address this, you can tell Streamlit not to hash this argument by adding a
leading underscore to the argument's name in the function signature:

```
@st.cache_resource
def calculate_roc_curves(_lr, ...):
    ...
```
            

**Reasoning**:
The error message indicates that Streamlit cannot hash the `lr` object (a LogisticRegression model) within the `calculate_roc_curves` function when using `@st.cache_resource`. To fix this, I need to follow the suggestion in the error message and add a leading underscore to the `lr`, `rf`, and `cnn_model` arguments in the `calculate_roc_curves` function signature to tell Streamlit not to hash these objects.



In [None]:
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_curve, auc
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
import umap
import base64
from io import BytesIO
from PIL import Image

# Cargar y preprocesar datos USPS
@st.cache_resource
def load_data():
    """Loads and preprocesses the USPS digits dataset."""
    digits = load_digits()
    X, y = digits.data / 255.0, digits.target  # Normalizar
    images = digits.images  # Para superponer imágenes
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    return X, y, images, X_train, X_test, y_train, y_test

X, y, images, X_train, X_test, y_train, y_test = load_data()

# Proyecciones PCA y UMAP
@st.cache_resource
def perform_dimensionality_reduction(_X, n_neighbors_umap=15):
    """Performs PCA and UMAP dimensionality reduction."""
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(_X)

    umap_model = umap.UMAP(n_components=2, n_neighbors=n_neighbors_umap, random_state=42)
    X_umap = umap_model.fit_transform(_X)
    return X_pca, X_umap

# Entrenar clasificadores
@st.cache_resource
def train_classifiers(_X_train, _X_test, _y_train, _y_test):
    """Trains Logistic Regression, Random Forest, and CNN classifiers."""
    # 1. LogisticRegression
    lr = LogisticRegression(C=1.0, multi_class='multinomial', solver='lbfgs', max_iter=1000)
    lr.fit(_X_train, _y_train)
    y_pred_lr = lr.predict(_X_test)
    report_lr = classification_report(_y_test, y_pred_lr, output_dict=True)

    # 2. RandomForestClassifier
    rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
    rf.fit(_X_train, _y_train)
    y_pred_rf = rf.predict(_X_test)
    report_rf = classification_report(_y_test, y_pred_rf, output_dict=True)

    # 3. CNN
    X_train_cnn = _X_train.reshape(-1, 8, 8, 1)
    X_test_cnn = _X_test.reshape(-1, 8, 8, 1)
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(8, 8, 1)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train_cnn, _y_train, epochs=10, batch_size=32, verbose=0)
    y_pred_cnn = model.predict(X_test_cnn).argmax(axis=1)
    report_cnn = classification_report(_y_test, y_pred_cnn, output_dict=True)

    return lr, report_lr, rf, report_rf, model, report_cnn

lr, report_lr, rf, report_rf, cnn_model, report_cnn = train_classifiers(X_train, X_test, y_train, y_test)


# Calcular curvas ROC (para clase 0 como ejemplo)
@st.cache_resource
def calculate_roc_curves(_lr, _rf, _cnn_model, _X_test, _y_test):
    """Calculates ROC curves and AUC for class 0."""
    X_test_cnn = _X_test.reshape(-1, 8, 8, 1)
    fpr_lr, tpr_lr, _ = roc_curve(_y_test == 0, _lr.predict_proba(_X_test)[:, 0])
    roc_auc_lr = auc(fpr_lr, tpr_lr)
    fpr_rf, tpr_rf, _ = roc_curve(_y_test == 0, _rf.predict_proba(_X_test)[:, 0])
    roc_auc_rf = auc(fpr_rf, tpr_rf)
    fpr_cnn, tpr_cnn, _ = roc_curve(_y_test == 0, _cnn_model.predict(X_test_cnn)[:, 0])
    roc_auc_cnn = auc(fpr_cnn, tpr_cnn)
    return fpr_lr, tpr_lr, roc_auc_lr, fpr_rf, tpr_rf, roc_auc_rf, fpr_cnn, tpr_cnn, roc_auc_cnn

fpr_lr, tpr_lr, roc_auc_lr, fpr_rf, tpr_rf, roc_auc_rf, fpr_cnn, tpr_cnn, roc_auc_cnn = calculate_roc_curves(lr, rf, cnn_model, X_test, y_test)

# Descripciones teóricas (punto a)
model_descriptions = {
    "PCA": "Modelo: Proyecta datos en un subespacio que maximiza la varianza.\nOptimización: max tr(W^T X^T X W) s.t. W^T W = I.",
    "UMAP": "Modelo: Reducción no lineal que preserva estructura topológica.\nOptimización: min divergencia de entropía cruzada.",
    "GaussianNB": "Modelo: Clasificador probabilístico con suposición de independencia.\nOptimización: max log-verosimilitud.",
    "SGDClassifier": "Modelo: Clasificador lineal optimizado por descenso estocástico.\nOptimización: min pérdida regularizada.",
    "LogisticRegression": "Modelo: Predice probabilidad con sigmoide.\nOptimización: max log-verosimilitud regularizada.",
    "LinearDiscriminantAnalysis": "Modelo: Proyecta datos maximizando separación entre clases.\nOptimización: max razón de varianzas.",
    "KNeighborsClassifier": "Modelo: Predice por mayoría de k-vecinos.\nSin optimización explícita.",
    "SVC": "Modelo: Encuentra hiperplano de máximo margen.\nOptimización: min ||w||^2 + C sum(ξ).",
    "RandomForestClassifier": "Modelo: Ensamble de árboles de decisión.\nOptimización: min impureza por nodo.",
    "GaussianProcessClassifier": "Modelo: Proceso gaussiano para clasificación.\nOptimización: max log-verosimilitud marginal.",
    "DeepLearning": "Modelo: Redes neuronales profundas.\nOptimización: min pérdida (e.g., cross-entropy)."
}

def generate_pca_plot(_X_pca, _y, _images):
    """Generates the PCA plot with representative images."""
    fig = px.scatter(x=_X_pca[:, 0], y=_X_pca[:, 1], color=_y, labels={'x': 'PC1', 'y': 'PC2'}, title="Proyección PCA")
    for i in range(10):
        idx = np.where(_y == i)[0][0]
        img = _images[idx]
        img_pil = Image.fromarray((img * 255).astype(np.uint8))
        buffered = BytesIO()
        img_pil.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        fig.add_layout_image(
            dict(
                source=f"data:image/png;base64,{img_str}",
                x=_X_pca[idx, 0], y=_X_pca[idx, 1],
                xref="x", yref="y",
                sizex=0.5, sizey=0.5,
                opacity=0.8
            )
        )
    return fig

def generate_umap_plot(_X_umap, _y, _images, n_neighbors):
    """Generates the UMAP plot with representative images for a given number of neighbors."""
    fig = px.scatter(x=_X_umap[:, 0], y=_X_umap[:, 1], color=_y, labels={'x': 'UMAP1', 'y': 'UMAP2'}, title=f"Proyección UMAP (n_neighbors={n_neighbors})")
    for i in range(10):
        idx = np.where(_y == i)[0][0]
        img = _images[idx]
        img_pil = Image.fromarray((img * 255).astype(np.uint8))
        buffered = BytesIO()
        img_pil.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        fig.add_layout_image(
            dict(
                source=f"data:image/png;base64,{img_str}",
                x=_X_umap[idx, 0], y=_X_umap[idx, 1],
                xref="x", yref="y",
                sizex=0.5, sizey=0.5,
                opacity=0.8
            )
        )
    return fig

def generate_classifier_results(classifier, _report_lr, _report_rf, _report_cnn, _fpr_lr, _tpr_lr, _roc_auc_lr, _fpr_rf, _tpr_rf, _roc_auc_rf, _fpr_cnn, _tpr_cnn, _roc_auc_cnn):
    """Generates the metrics table and ROC plot for a given classifier."""
    if classifier == 'Logistic Regression':
        report = _report_lr
        fpr, tpr, roc_auc = _fpr_lr, _tpr_lr, _roc_auc_lr
        title = "Logistic Regression"
    elif classifier == 'Random Forest':
        report = _report_rf
        fpr, tpr, roc_auc = _fpr_rf, _tpr_rf, _roc_auc_rf
        title = "Random Forest"
    else: # CNN
        report = _report_cnn
        fpr, tpr, roc_auc = _fpr_cnn, _tpr_cnn, _roc_auc_cnn
        title = "CNN"

    # Create metrics table
    metrics_html = "<table>"
    metrics_html += "<tr><td>Metric</td><td>Value</td></tr>"
    metrics_html += f"<tr><td>Accuracy</td><td>{report['accuracy']:.3f}</td></tr>"
    metrics_html += f"<tr><td>F1-Score (macro)</td><td>{report['macro avg']['f1-score']:.3f}</td></tr>"
    metrics_html += "</table>"


    # Create ROC curve
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC (AUC = {roc_auc:.2f})'))
    fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', name='Baseline', line=dict(dash='dash')))
    fig.update_layout(title=f"Curva ROC - {title}", xaxis_title="False Positive Rate", yaxis_title="True Positive Rate")

    return metrics_html, fig

st.title("Dashboard Parcial 2: Teoría de Aprendizaje de Máquina")

# Sección teórica (punto a)
st.header("Modelos y Optimización")
selected_model = st.selectbox(
    "Selecciona un modelo:",
    list(model_descriptions.keys())
)
st.markdown(model_descriptions[selected_model])

# Proyecciones PCA y UMAP (punto b)
st.header("Proyecciones del Conjunto USPS")
st.subheader("PCA")
# Perform PCA and display plot
X_pca_updated, X_umap_initial = perform_dimensionality_reduction(X, 15) # Use default 15 for initial UMAP
pca_fig = generate_pca_plot(X_pca_updated, y, images)
st.plotly_chart(pca_fig)

st.subheader("UMAP")
n_neighbors_umap = st.slider(
    "Número de vecinos para UMAP:",
    min_value=5,
    max_value=50,
    value=15,
    step=1
)
# Perform UMAP with selected neighbors and display plot
X_pca_updated, X_umap_updated = perform_dimensionality_reduction(X, n_neighbors_umap)
umap_fig = generate_umap_plot(X_umap_updated, y, images, n_neighbors_umap)
st.plotly_chart(umap_fig)

# Resultados de clasificación (punto c)
st.header("Resultado de Clasificación")
selected_classifier = st.selectbox(
    "Selecciona un clasificador:",
    ('Logistic Regression', 'Random Forest', 'CNN')
)

# Display classifier results
metrics_html, roc_fig = generate_classifier_results(selected_classifier, report_lr, report_rf, report_cnn, fpr_lr, tpr_lr, roc_auc_lr, fpr_rf, tpr_rf, roc_auc_rf, fpr_cnn, tpr_cnn, roc_auc_cnn)
st.markdown(metrics_html, unsafe_allow_html=True)
st.plotly_chart(roc_fig)




Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step




[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 



n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



DeltaGenerator()

## Ejecutar la aplicación streamlit

### Subtask:
Correr la aplicación desde la terminal usando `streamlit run tu_script.py`.


**Reasoning**:
Save the complete Python script into a `.py` file in the current directory.



In [None]:
# Assuming the complete Streamlit script is in a string variable named `streamlit_script_content`
# In a real scenario, this would be the combined code from the previous steps.
streamlit_script_content = """
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_curve, auc
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
import umap
import base64
from io import BytesIO
from PIL import Image

# Cargar y preprocesar datos USPS
@st.cache_resource
def load_data():
    \"\"\"Loads and preprocesses the USPS digits dataset.\"\"\"
    digits = load_digits()
    X, y = digits.data / 255.0, digits.target  # Normalizar
    images = digits.images  # Para superponer imágenes
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    return X, y, images, X_train, X_test, y_train, y_test

X, y, images, X_train, X_test, y_train, y_test = load_data()

# Proyecciones PCA y UMAP
@st.cache_resource
def perform_dimensionality_reduction(_X, n_neighbors_umap=15):
    \"\"\"Performs PCA and UMAP dimensionality reduction.\"\"\"
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(_X)

    umap_model = umap.UMAP(n_components=2, n_neighbors=n_neighbors_umap, random_state=42)
    X_umap = umap_model.fit_transform(_X)
    return X_pca, X_umap

# Entrenar clasificadores
@st.cache_resource
def train_classifiers(_X_train, _X_test, _y_train, _y_test):
    \"\"\"Trains Logistic Regression, Random Forest, and CNN classifiers.\"\"\"
    # 1. LogisticRegression
    lr = LogisticRegression(C=1.0, multi_class='multinomial', solver='lbfgs', max_iter=1000)
    lr.fit(_X_train, _y_train)
    y_pred_lr = lr.predict(_X_test)
    report_lr = classification_report(_y_test, y_pred_lr, output_dict=True)

    # 2. RandomForestClassifier
    rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
    rf.fit(_X_train, _y_train)
    y_pred_rf = rf.predict(_X_test)
    report_rf = classification_report(_y_test, y_pred_rf, output_dict=True)

    # 3. CNN
    X_train_cnn = _X_train.reshape(-1, 8, 8, 1)
    X_test_cnn = _X_test.reshape(-1, 8, 8, 1)
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(8, 8, 1)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train_cnn, _y_train, epochs=10, batch_size=32, verbose=0)
    y_pred_cnn = model.predict(X_test_cnn).argmax(axis=1)
    report_cnn = classification_report(_y_test, y_pred_cnn, output_dict=True)

    return lr, report_lr, rf, report_rf, model, report_cnn

lr, report_lr, rf, report_rf, cnn_model, report_cnn = train_classifiers(X_train, X_test, y_train, y_test)


# Calcular curvas ROC (para clase 0 como ejemplo)
@st.cache_resource
def calculate_roc_curves(_lr, _rf, _cnn_model, _X_test, _y_test):
    \"\"\"Calculates ROC curves and AUC for class 0.\"\"\"
    X_test_cnn = _X_test.reshape(-1, 8, 8, 1)
    fpr_lr, tpr_lr, _ = roc_curve(_y_test == 0, _lr.predict_proba(_X_test)[:, 0])
    roc_auc_lr = auc(fpr_lr, tpr_lr)
    fpr_rf, tpr_rf, _ = roc_curve(_y_test == 0, _rf.predict_proba(_X_test)[:, 0])
    roc_auc_rf = auc(fpr_rf, tpr_rf)
    fpr_cnn, tpr_cnn, _ = roc_curve(_y_test == 0, _cnn_model.predict(X_test_cnn)[:, 0])
    roc_auc_cnn = auc(fpr_cnn, tpr_cnn)
    return fpr_lr, tpr_lr, roc_auc_lr, fpr_rf, tpr_rf, roc_auc_rf, fpr_cnn, tpr_cnn, roc_auc_cnn

fpr_lr, tpr_lr, roc_auc_lr, fpr_rf, tpr_rf, roc_auc_rf, fpr_cnn, tpr_cnn, roc_auc_cnn = calculate_roc_curves(lr, rf, cnn_model, X_test, y_test)


# Descripciones teóricas (punto a)
model_descriptions = {
    "PCA": "Modelo: Proyecta datos en un subespacio que maximiza la varianza.\nOptimización: max tr(W^T X^T X W) s.t. W^T W = I.",
    "UMAP": "Modelo: Reducción no lineal que preserva estructura topológica.\nOptimización: min divergencia de entropía cruzada.",
    "GaussianNB": "Modelo: Clasificador probabilístico con suposición de independencia.\nOptimización: max log-verosimilitud.",
    "SGDClassifier": "Modelo: Clasificador lineal optimizado por descenso estocástico.\nOptimización: min pérdida regularizada.",
    "LogisticRegression": "Modelo: Predice probabilidad con sigmoide.\nOptimización: max log-verosimilitud regularizada.",
    "LinearDiscriminantAnalysis": "Modelo: Proyecta datos maximizando separación entre clases.\nOptimización: max razón de varianzas.",
    "KNeighborsClassifier": "Modelo: Predice por mayoría de k-vecinos.\nSin optimización explícita.",
    "SVC": "Modelo: Encuentra hiperplano de máximo margen.\nOptimización: min ||w||^2 + C sum(ξ).",
    "RandomForestClassifier": "Modelo: Ensamble de árboles de decisión.\nOptimización: min impureza por nodo.",
    "GaussianProcessClassifier": "Modelo: Proceso gaussiano para clasificación.\nOptimización: max log-verosimilitud marginal.",
    "DeepLearning": "Modelo: Redes neuronales profundas.\nOptimización: min pérdida (e.g., cross-entropy)."
}

def generate_pca_plot(_X_pca, _y, _images):
    \"\"\"Generates the PCA plot with representative images.\"\"\"
    fig = px.scatter(x=_X_pca[:, 0], y=_X_pca[:, 1], color=_y, labels={'x': 'PC1', 'y': 'PC2'}, title="Proyección PCA")
    for i in range(10):
        idx = np.where(_y == i)[0][0]
        img = _images[idx]
        img_pil = Image.fromarray((img * 255).astype(np.uint8))
        buffered = BytesIO()
        img_pil.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        fig.add_layout_image(
            dict(
                source=f"data:image/png;base64,{img_str}",
                x=_X_pca[idx, 0], y=_X_pca[idx, 1],
                xref="x", yref="y",
                sizex=0.5, sizey=0.5,
                opacity=0.8
            )
        )
    return fig

def generate_umap_plot(_X_umap, _y, _images, n_neighbors):
    \"\"\"Generates the UMAP plot with representative images for a given number of neighbors.\"\"\"
    fig = px.scatter(x=_X_umap[:, 0], y=_X_umap[:, 1], color=_y, labels={'x': 'UMAP1', 'y': 'UMAP2'}, title=f"Proyección UMAP (n_neighbors={n_neighbors})")
    for i in range(10):
        idx = np.where(_y == i)[0][0]
        img = _images[idx]
        img_pil = Image.fromarray((img * 255).astype(np.uint8))
        buffered = BytesIO()
        img_pil.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        fig.add_layout_image(
            dict(
                source=f"data:image/png;base64,{img_str}",
                x=_X_umap[idx, 0], y=_X_umap[idx, 1],
                xref="x", yref="y",
                sizex=0.5, sizey=0.5,
                opacity=0.8
            )
        )
    return fig


def generate_classifier_results(classifier, _report_lr, _report_rf, _report_cnn, _fpr_lr, _tpr_lr, _roc_auc_lr, _fpr_rf, _tpr_rf, _roc_auc_rf, _fpr_cnn, _tpr_cnn, _roc_auc_cnn):
    \"\"\"Generates the metrics table and ROC plot for a given classifier.\"\"\"
    if classifier == 'Logistic Regression':
        report = _report_lr
        fpr, tpr, roc_auc = _fpr_lr, _tpr_lr, _roc_auc_lr
        title = "Logistic Regression"
    elif classifier == 'Random Forest':
        report = _report_rf
        fpr, tpr, roc_auc = _fpr_rf, _tpr_rf, _roc_auc_rf
        title = "Random Forest"
    else: # CNN
        report = _report_cnn
        fpr, tpr, roc_auc = _fpr_cnn, _tpr_cnn, _roc_auc_cnn
        title = "CNN"

    # Create metrics table
    metrics_html = "<table>"
    metrics_html += "<tr><td>Metric</td><td>Value</td></tr>"
    metrics_html += f"<tr><td>Accuracy</td><td>{report['accuracy']:.3f}</td></tr>"
    metrics_html += f"<tr><td>F1-Score (macro)</td><td>{report['macro avg']['f1-score']:.3f}</td></tr>"
    metrics_html += "</table>"


    # Create ROC curve
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC (AUC = {roc_auc:.2f})'))
    fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', name='Baseline', line=dict(dash='dash')))
    fig.update_layout(title=f"Curva ROC - {title}", xaxis_title="False Positive Rate", yaxis_title="True Positive Rate")

    return metrics_html, fig

st.title("Dashboard Parcial 2: Teoría de Aprendizaje de Máquina")

# Sección teórica (punto a)
st.header("Modelos y Optimización")
selected_model = st.selectbox(
    "Selecciona un modelo:",
    list(model_descriptions.keys())
)
st.markdown(model_descriptions[selected_model])

# Proyecciones PCA y UMAP (punto b)
st.header("Proyecciones del Conjunto USPS")
st.subheader("PCA")
# Perform PCA and display plot
X_pca_updated, X_umap_initial = perform_dimensionality_reduction(X, 15) # Use default 15 for initial UMAP
pca_fig = generate_pca_plot(X_pca_updated, y, images)
st.plotly_chart(pca_fig)

st.subheader("UMAP")
n_neighbors_umap = st.slider(
    "Número de vecinos para UMAP:",
    min_value=5,
    max_value=50,
    value=15,
    step=1
)
# Perform UMAP with selected neighbors and display plot
X_pca_updated, X_umap_updated = perform_dimensionality_reduction(X, n_neighbors_umap)
umap_fig = generate_umap_plot(X_umap_updated, y, images, n_neighbors_umap)
st.plotly_chart(umap_fig)


# Resultados de clasificación (punto c)
st.header("Resultado de Clasificación")
selected_classifier = st.selectbox(
    "Selecciona un clasificador:",
    ('Logistic Regression', 'Random Forest', 'CNN')
)

# Display classifier results
metrics_html, roc_fig = generate_classifier_results(selected_classifier, report_lr, report_rf, report_cnn, fpr_lr, tpr_lr, roc_auc_lr, fpr_rf, tpr_rf, roc_auc_rf, fpr_cnn, tpr_cnn, roc_auc_cnn)
st.markdown(metrics_html, unsafe_allow_html=True)
st.plotly_chart(roc_fig)
"""

with open("dashboard_app.py", "w") as f:
    f.write(streamlit_script_content)

## Summary:

### Data Analysis Key Findings

*   The Streamlit application successfully loads and preprocesses the USPS digits dataset, including splitting it into training and testing sets.
*   Both PCA and UMAP dimensionality reduction techniques were applied to the dataset. The UMAP projection is interactive, allowing the user to adjust the number of neighbors.
*   Three classification models (Logistic Regression, Random Forest, and CNN) were trained on the dataset.
*   The application displays theoretical descriptions of various machine learning models, including PCA, UMAP, and the trained classifiers.
*   Classification performance metrics (Accuracy and Macro Avg F1-Score) and ROC curves (specifically for class 0) are calculated and displayed for the selected classifier.
*   The use of `@st.cache_resource` was implemented to optimize the loading, dimensionality reduction, model training, and ROC curve calculation steps by caching their results, improving application performance on subsequent runs.

### Insights or Next Steps

*   While the application successfully displays the ROC curve for a single class (class 0), implementing multiclass ROC analysis or displaying confusion matrices would provide a more comprehensive evaluation of the classifiers' performance across all digits.
*   Adding the ability for users to upload their own image data for prediction would enhance the practical utility of the dashboard.


In [None]:
# Assuming the complete Streamlit script is in a string variable named `streamlit_script_content`
# In a real scenario, this would be the combined code from the previous steps.
streamlit_script_content = """
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_curve, auc
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
import umap
import base64
from io import BytesIO
from PIL import Image

# Cargar y preprocesar datos USPS
@st.cache_resource
def load_data():
    \"\"\"Loads and preprocesses the USPS digits dataset.\"\"\"
    digits = load_digits()
    X, y = digits.data / 255.0, digits.target  # Normalizar
    images = digits.images  # Para superponer imágenes
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    return X, y, images, X_train, X_test, y_train, y_test

X, y, images, X_train, X_test, y_train, y_test = load_data()

# Proyecciones PCA y UMAP
@st.cache_resource
def perform_dimensionality_reduction(_X, n_neighbors_umap=15):
    \"\"\"Performs PCA and UMAP dimensionality reduction.\"\"\"
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(_X)

    umap_model = umap.UMAP(n_components=2, n_neighbors=n_neighbors_umap, random_state=42)
    X_umap = umap_model.fit_transform(_X)
    return X_pca, X_umap

# Entrenar clasificadores
@st.cache_resource
def train_classifiers(_X_train, _X_test, _y_train, _y_test):
    \"\"\"Trains Logistic Regression, Random Forest, and CNN classifiers.\"\"\"
    # 1. LogisticRegression
    lr = LogisticRegression(C=1.0, multi_class='multinomial', solver='lbfgs', max_iter=1000)
    lr.fit(_X_train, _y_train)
    y_pred_lr = lr.predict(_X_test)
    report_lr = classification_report(_y_test, y_pred_lr, output_dict=True)

    # 2. RandomForestClassifier
    rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
    rf.fit(_X_train, _y_train)
    y_pred_rf = rf.predict(_X_test)
    report_rf = classification_report(_y_test, y_pred_rf, output_dict=True)

    # 3. CNN
    X_train_cnn = _X_train.reshape(-1, 8, 8, 1)
    X_test_cnn = _X_test.reshape(-1, 8, 8, 1)
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(8, 8, 1)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train_cnn, _y_train, epochs=10, batch_size=32, verbose=0)
    y_pred_cnn = model.predict(X_test_cnn).argmax(axis=1)
    report_cnn = classification_report(_y_test, y_pred_cnn, output_dict=True)

    return lr, report_lr, rf, report_rf, model, report_cnn

lr, report_lr, rf, report_rf, cnn_model, report_cnn = train_classifiers(X_train, X_test, y_train, y_test)


# Calcular curvas ROC (para clase 0 como ejemplo)
@st.cache_resource
def calculate_roc_curves(_lr, _rf, _cnn_model, _X_test, _y_test):
    \"\"\"Calculates ROC curves and AUC for class 0.\"\"\"
    X_test_cnn = _X_test.reshape(-1, 8, 8, 1)
    fpr_lr, tpr_lr, _ = roc_curve(_y_test == 0, _lr.predict_proba(_X_test)[:, 0])
    roc_auc_lr = auc(fpr_lr, tpr_lr)
    fpr_rf, tpr_rf, _ = roc_curve(_y_test == 0, _rf.predict_proba(_X_test)[:, 0])
    roc_auc_rf = auc(fpr_rf, tpr_rf)
    fpr_cnn, tpr_cnn, _ = roc_curve(_y_test == 0, _cnn_model.predict(X_test_cnn)[:, 0])
    roc_auc_cnn = auc(fpr_cnn, tpr_cnn)
    return fpr_lr, tpr_lr, roc_auc_lr, fpr_rf, tpr_rf, roc_auc_rf, fpr_cnn, tpr_cnn, roc_auc_cnn

fpr_lr, tpr_lr, roc_auc_lr, fpr_rf, tpr_rf, roc_auc_rf, fpr_cnn, tpr_cnn, roc_auc_cnn = calculate_roc_curves(lr, rf, cnn_model, X_test, y_test)


# Descripciones teóricas (punto a)
model_descriptions = {
    "PCA": "Modelo: Proyecta datos en un subespacio que maximiza la varianza.\nOptimización: max tr(W^T X^T X W) s.t. W^T W = I.",
    "UMAP": "Modelo: Reducción no lineal que preserva estructura topológica.\nOptimización: min divergencia de entropía cruzada.",
    "GaussianNB": "Modelo: Clasificador probabilístico con suposición de independencia.\nOptimización: max log-verosimilitud.",
    "SGDClassifier": "Modelo: Clasificador lineal optimizado por descenso estocástico.\nOptimización: min pérdida regularizada.",
    "LogisticRegression": "Modelo: Predice probabilidad con sigmoide.\nOptimización: max log-verosimilitud regularizada.",
    "LinearDiscriminantAnalysis": "Modelo: Proyecta datos maximizando separación entre clases.\nOptimización: max razón de varianzas.",
    "KNeighborsClassifier": "Modelo: Predice por mayoría de k-vecinos.\nSin optimización explícita.",
    "SVC": "Modelo: Encuentra hiperplano de máximo margen.\nOptimización: min ||w||^2 + C sum(ξ).",
    "RandomForestClassifier": "Modelo: Ensamble de árboles de decisión.\nOptimización: min impureza por nodo.",
    "GaussianProcessClassifier": "Modelo: Proceso gaussiano para clasificación.\nOptimización: max log-verosimilitud marginal.",
    "DeepLearning": "Modelo: Redes neuronales profundas.\nOptimización: min pérdida (e.g., cross-entropy)."
}

def generate_pca_plot(_X_pca, _y, _images):
    \"\"\"Generates the PCA plot with representative images.\"\"\"
    fig = px.scatter(x=_X_pca[:, 0], y=_X_pca[:, 1], color=_y, labels={'x': 'PC1', 'y': 'PC2'}, title="Proyección PCA")
    for i in range(10):
        idx = np.where(_y == i)[0][0]
        img = _images[idx]
        img_pil = Image.fromarray((img * 255).astype(np.uint8))
        buffered = BytesIO()
        img_pil.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        fig.add_layout_image(
            dict(
                source=f"data:image/png;base64,{img_str}",
                x=_X_pca[idx, 0], y=_X_pca[idx, 1],
                xref="x", yref="y",
                sizex=0.5, sizey=0.5,
                opacity=0.8
            )
        )
    return fig

def generate_umap_plot(_X_umap, _y, _images, n_neighbors):
    \"\"\"Generates the UMAP plot with representative images for a given number of neighbors.\"\"\"
    fig = px.scatter(x=_X_umap[:, 0], y=_X_umap[:, 1], color=_y, labels={'x': 'UMAP1', 'y': 'UMAP2'}, title=f"Proyección UMAP (n_neighbors={n_neighbors})")
    for i in range(10):
        idx = np.where(_y == i)[0][0]
        img = _images[idx]
        img_pil = Image.fromarray((img * 255).astype(np.uint8))
        buffered = BytesIO()
        img_pil.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        fig.add_layout_image(
            dict(
                source=f"data:image/png;base64,{img_str}",
                x=_X_umap[idx, 0], y=_X_umap[idx, 1],
                xref="x", yref="y",
                sizex=0.5, sizey=0.5,
                opacity=0.8
            )
        )
    return fig


def generate_classifier_results(classifier, _report_lr, _report_rf, _report_cnn, _fpr_lr, _tpr_lr, _roc_auc_lr, _fpr_rf, _tpr_rf, _roc_auc_rf, _fpr_cnn, _tpr_cnn, _roc_auc_cnn):
    \"\"\"Generates the metrics table and ROC plot for a given classifier.\"\"\"
    if classifier == 'Logistic Regression':
        report = _report_lr
        fpr, tpr, roc_auc = _fpr_lr, _tpr_lr, _roc_auc_lr
        title = "Logistic Regression"
    elif classifier == 'Random Forest':
        report = _report_rf
        fpr, tpr, roc_auc = _fpr_rf, _tpr_rf, _roc_auc_rf
        title = "Random Forest"
    else: # CNN
        report = _report_cnn
        fpr, tpr, roc_auc = _fpr_cnn, _tpr_cnn, _roc_auc_cnn
        title = "CNN"

    # Create metrics table
    metrics_html = "<table>"
    metrics_html += "<tr><td>Metric</td><td>Value</td></tr>"
    metrics_html += f"<tr><td>Accuracy</td><td>{report['accuracy']:.3f}</td></tr>"
    metrics_html += f"<tr><td>F1-Score (macro)</td><td>{report['macro avg']['f1-score']:.3f}</td></tr>"
    metrics_html += "</table>"


    # Create ROC curve
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC (AUC = {roc_auc:.2f})'))
    fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', name='Baseline', line=dict(dash='dash')))
    fig.update_layout(title=f"Curva ROC - {title}", xaxis_title="False Positive Rate", yaxis_title="True Positive Rate")

    return metrics_html, fig

st.title("Dashboard Parcial 2: Teoría de Aprendizaje de Máquina")

# Sección teórica (punto a)
st.header("Modelos y Optimización")
selected_model = st.selectbox(
    "Selecciona un modelo:",
    list(model_descriptions.keys())
)
st.markdown(model_descriptions[selected_model])

# Proyecciones PCA y UMAP (punto b)
st.header("Proyecciones del Conjunto USPS")
st.subheader("PCA")
# Perform PCA and display plot
X_pca_updated, X_umap_initial = perform_dimensionality_reduction(X, 15) # Use default 15 for initial UMAP
pca_fig = generate_pca_plot(X_pca_updated, y, images)
st.plotly_chart(pca_fig)

st.subheader("UMAP")
n_neighbors_umap = st.slider(
    "Número de vecinos para UMAP:",
    min_value=5,
    max_value=50,
    value=15,
    step=1
)
# Perform UMAP with selected neighbors and display plot
X_pca_updated, X_umap_updated = perform_dimensionality_reduction(X, n_neighbors_umap)
umap_fig = generate_umap_plot(X_umap_updated, y, images, n_neighbors_umap)
st.plotly_chart(umap_fig)


# Resultados de clasificación (punto c)
st.header("Resultado de Clasificación")
selected_classifier = st.selectbox(
    "Selecciona un clasificador:",
    ('Logistic Regression', 'Random Forest', 'CNN')
)

# Display classifier results
metrics_html, roc_fig = generate_classifier_results(selected_classifier, report_lr, report_rf, report_cnn, fpr_lr, tpr_lr, roc_auc_lr, fpr_rf, tpr_rf, roc_auc_rf, fpr_cnn, tpr_cnn, roc_auc_cnn)
st.markdown(metrics_html, unsafe_allow_html=True)
st.plotly_chart(roc_fig)
"""

with open("dashboard_app.py", "w") as f:
    f.write(streamlit_script_content)

Ahora que el script `dashboard_app.py` ha sido creado, puedes ejecutarlo usando el siguiente comando en la terminal de Colab (o en tu terminal local si has descargado el archivo):

In [None]:
!pip install ngrok

Collecting ngrok
  Downloading ngrok-1.4.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Downloading ngrok-1.4.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m32.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ngrok
Successfully installed ngrok-1.4.0


In [None]:
%%bash
pip install ngrok



In [None]:
%%bash
python -m streamlit run dashboard_app.py &

KeyboardInterrupt: 