## Dashboard Resultados TFT

In [8]:
import pandas as pd
import dash
from dash import dcc, html, Input, Output, dash_table
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import io
import base64
from prophet import Prophet
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from dash import no_update
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.feature_selection import SelectKBest, f_regression
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import root_mean_squared_error
import shap

# Cargar el dataset
df = pd.read_csv('df_final_prueba.csv')
#df = df[df["tmed"] != 0]
df['date'] = pd.to_datetime(df['date'])

# Identificar columnas de productos y características
producto_cols = df.columns[37:]
feature_cols = df.columns.difference(producto_cols.union(['date']))

# Variables climatológicas
clima_cols = ['tmed', 'tmin', 'tmax', 'prec', 'dir', 'velmedia', 'racha', 'presMax', 'presMin', 'hrMedia', 'sol']

# Perfil de clientes para clustering
perfil_cols = ['adults', 'children', 'babies'] + [col for col in df.columns if col.startswith('people_')]
X_perfil = df[perfil_cols]

# Escalado y PCA
scaler = RobustScaler()
X_scaled = scaler.fit_transform(X_perfil)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# KMeans clustering
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(X_pca)
df['cluster'] = clusters
df['PCA1'] = X_pca[:, 0]
df['PCA2'] = X_pca[:, 1]

# Inicializar la app Dash
app = dash.Dash(__name__, suppress_callback_exceptions=True)

app.layout = html.Div(style={'fontFamily': 'Arial, sans-serif', 'backgroundColor': '#f7f7f7', 'padding': '20px'}, children=[
    html.Div([
        html.Img(src='https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRFZy_SrbT2vGKakrE7lUj70sqTyn5QR4xBbQ&s', style={'height': '60px', 'marginRight': '20px'}),
        html.H1("Dashboard de Análisis de Sensibilidad por Producto", style={'display': 'inline-block', 'verticalAlign': 'middle'})
    ], style={'display': 'flex', 'alignItems': 'center', 'marginBottom': '30px'}),

    dcc.Tabs(id='tabs', value='main', children=[
        dcc.Tab(label='Análisis de Sensibilidad', value='main'),
        dcc.Tab(label='Análisis SHAP values', value='shap'),
        dcc.Tab(label='Análisis de Productos', value='productos'),
        dcc.Tab(label='Predicción de Ventas', value='prediccion')
    ]),

    html.Div(id='tab-content')
])

@app.callback(Output('tab-content', 'children'), Input('tabs', 'value'))
def render_tab(tab):
    if tab == 'main':
        return html.Div([
            html.H3("Análisis de variables más relevantes"),
            html.Div([
                html.Div([
                    html.Label("Selecciona un producto:"),
                    dcc.Dropdown(
                        id='producto-dropdown',
                        options=[{'label': p, 'value': p} for p in producto_cols],
                        value=producto_cols[0]
                    ),
                ], style={'width': '48%', 'display': 'inline-block', 'paddingRight': '2%'}),

                html.Div([
                    html.Label("Selecciona un cluster de clientes:"),
                    dcc.Dropdown(
                        id='cluster-dropdown',
                        options=[{'label': f'Cluster {i}', 'value': i} for i in sorted(df['cluster'].unique())],
                        value=None, placeholder="Todos los clusters"
                    ),
                ], style={'width': '48%', 'display': 'inline-block'})
            ], style={'marginBottom': '30px'}),

            dcc.Graph(id='importancia-lineal', style={'marginBottom': '30px'}),
            dcc.Graph(id='importancia-lineal-inversa', style={'marginBottom': '30px'}),
            dcc.Graph(id='importancia-nolineal', style={'marginBottom': '30px'}),
            dcc.Graph(id='importancia-xgboosts', style={'marginBottom': '30px'}),
            dcc.Graph(id='importancia-lightGMb', style={'marginBottom': '30px'}),

            dcc.Graph(id='grafico-mlp', style={'marginBottom': '30px'}),
            dcc.Graph(id='cluster-pca'),
            html.Div(id='score-modelos', style={'marginTop': '20px', 'fontSize': '18px', 'color': '#333'})
        ])

    elif tab == 'shap':
        return html.Div([
            html.H3("Análisis por modelo de SHAP Values"),
            html.Label("Selecciona un producto para ver SHAP values:"),
            dcc.Dropdown(
                id='producto-shap-dropdown',
                options=[{'label': p, 'value': p} for p in producto_cols],
                value=producto_cols[0],
                style={'width': '60%', 'marginBottom': '30px'}
            ),
            html.Div([
                html.H4("SHAP Summary Plot - Random Forest"),
                html.Img(id='shap-img-rf', style={
                    'width': '60%', 'margin': 'auto', 'display': 'block', 'marginBottom': '30px'
                }),
                html.H4("SHAP Summary Plot - XGBoost"),
                html.Img(id='shap-img-xgb', style={
                    'width': '60%', 'margin': 'auto', 'display': 'block', 'marginBottom': '30px'
                }),
                html.H4("SHAP Summary Plot - LightGBM"),
                html.Img(id='shap-img-lgbm', style={
                    'width': '60%', 'margin': 'auto', 'display': 'block', 'marginBottom': '30px'
                })
            ])
    ])


    elif tab == 'productos':
        return html.Div([
            html.H3("Análisis de Ventas por Producto"),
            html.Div([
                html.Label("Selecciona un producto:"),
                dcc.Dropdown(
                id='producto-evolucion-dropdown',
                options=[{'label': p, 'value': p} for p in producto_cols],
                value=producto_cols[0],
                style={'width': '60%', 'marginBottom': '20px'}
                )
            ]),
            dcc.Graph(id='evolucion-ventas-producto'),
            
            html.H3("Segmentación de Productos por Comportamiento"),
            dcc.Graph(id='cluster-productos', figure=segmentacion_productos()),

            html.H3("Top Productos por Ventas Totales"),
            dcc.Graph(figure=top_productos()),

            html.H3("Comparativa entre Productos"),
            html.Div([
                html.Label("Producto 1:"),
                dcc.Dropdown(
                    id='producto1',
                    options=[{'label': p, 'value': p} for p in producto_cols],
                    value=producto_cols[0],
                    style={'width': '45%', 'display': 'inline-block', 'marginRight': '5%'}
                ),
                html.Label("Producto 2:"),
                dcc.Dropdown(
                    id='producto2',
                    options=[{'label': p, 'value': p} for p in producto_cols],
                    value=producto_cols[1],
                    style={'width': '45%', 'display': 'inline-block'}
                ),
            ], style={'marginBottom': '20px'}),
            dcc.Graph(id='comparativa-productos'),

            html.Div([
                html.Label("Selecciona un producto para análisis estacional y climático:"),
                dcc.Dropdown(
                    id='producto-clima-dropdown',
                    options=[{'label': p, 'value': p} for p in producto_cols],
                    value=producto_cols[0]
                )
            ], style={'width': '60%', 'marginBottom': '30px'}),

            dcc.Graph(id='grafico-estacionalidad'),

            html.Div([
                html.Label("Selecciona una variable climatológica para analizar su impacto en las ventas:"),
                dcc.Dropdown(
                    id='clima-variable-dropdown',
                    options=[{'label': var, 'value': var} for var in clima_cols],
                    value='sol'
                )
            ], style={'width': '60%', 'marginTop': '30px'}),

            dcc.Graph(id='grafico-impacto-clima')

        ])

    elif tab == 'prediccion':
        return html.Div([
            html.H3("Análisis predictivo de ventas futuras"),

            html.Label("Selecciona un producto:"),
            dcc.Dropdown(
                id='producto-prediccion-dropdown',
                options=[{'label': p, 'value': p} for p in producto_cols],
                value=producto_cols[0],
                style={'width': '60%', 'marginBottom': '20px'}
            ),

            html.Label("Selecciona el modelo de predicción:"),
            dcc.Dropdown(
                id='modelo-prediccion-dropdown',
                options=[
                    {'label': 'Prophet', 'value': 'prophet'},
                    {'label': 'Ridge', 'value': 'ridge'},
                    {'label': 'Random Forest', 'value': 'rf'},
                    {'label': 'XGBoost', 'value': 'xgb'},
                    {'label': 'LightGBM', 'value': 'lgbm'},
                    {'label': 'MLP', 'value': 'mlp'}
                ],
                value='prophet',
                style={'width': '60%', 'marginBottom': '30px'}
            ),

            dcc.Graph(id='prediccion-ventas'),

            html.Div(id='tabla-predicciones-container')
        ])


def segmentacion_productos():
    ventas = df[producto_cols]
    ventas_scaled = StandardScaler().fit_transform(ventas.T)
    pca = PCA(n_components=2)
    ventas_pca = pca.fit_transform(ventas_scaled)
    kmeans = KMeans(n_clusters=4, random_state=0).fit(ventas_pca)
    df_cluster = pd.DataFrame(ventas_pca, columns=['PCA1', 'PCA2'])
    df_cluster['cluster'] = kmeans.labels_
    df_cluster['producto'] = producto_cols
    fig = px.scatter(df_cluster, x='PCA1', y='PCA2', color='cluster', hover_name='producto', title="Cluster de productos por comportamiento de ventas")
    return fig

def top_productos():
    total_ventas = df[producto_cols].sum().sort_values(ascending=True)
    fig = px.bar(total_ventas, orientation='h', title="Top productos por ventas totales",
                 labels={'value': 'Ventas Totales', 'index': 'Producto'})
    return fig

@app.callback(
    Output('comparativa-productos', 'figure'),
    [Input('producto1', 'value'), Input('producto2', 'value')]
)
def comparar(producto1, producto2):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df['date'], y=df[producto1], mode='lines', name=producto1))
    fig.add_trace(go.Scatter(x=df['date'], y=df[producto2], mode='lines', name=producto2))
    fig.update_layout(title=f"Comparativa entre {producto1} y {producto2}", xaxis_title="Fecha", yaxis_title="Ventas")
    return fig

@app.callback(
    [Output('importancia-lineal', 'figure'),
     Output('importancia-lineal-inversa', 'figure'),
     Output('importancia-nolineal', 'figure'),
     Output('importancia-xgboosts', 'figure'), 
     Output('importancia-lightGMb', 'figure'),
     Output('grafico-mlp', 'figure'),
     Output('cluster-pca', 'figure')],
     Output('score-modelos', 'children'),
    [Input('producto-dropdown', 'value'),
     Input('cluster-dropdown', 'value')]
)
def update_main_tab(producto, cluster):
    df_filtered = df.copy()
    if cluster is not None:
        df_filtered = df_filtered[df_filtered['cluster'] == cluster]

    q1, q3 = df_filtered[producto].quantile([0.25, 0.75])
    iqr = q3 - q1
    df_filtered = df_filtered[(df_filtered[producto] >= q1 - 1.5 * iqr) & (df_filtered[producto] <= q3 + 1.5 * iqr)]

    X = df_filtered[feature_cols]
    y = df_filtered[producto]

    def mean_absolute_percentage_error(y_true, y_pred):
        y_true, y_pred = np.array(y_true), np.array(y_pred)
        mask = y_true != 0
        return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

    # Modelo LinearRegression
    scaler = RobustScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    linreg = LinearRegression()
    linreg.fit(X_train, y_train)
    
    y_pred_lr = linreg.predict(X_test)
    r2_lr = r2_score(y_test, y_pred_lr)
    mae_lr = mean_absolute_error(y_test, y_pred_lr)
    rmse_lr = root_mean_squared_error(y_test, y_pred_lr)
    mape_lr = mean_absolute_percentage_error(y_test, y_pred_lr)


    coef_importancias = pd.Series(linreg.coef_, index=X.columns)

    top_positivos = coef_importancias.sort_values(ascending=False).head(10)
    fig_lineal = px.bar(top_positivos, title=f"Importancia Lineal (Positivos) para: {producto}",
                        labels={'value': 'Coeficiente', 'index': 'Variable'})

    top_negativos = coef_importancias.sort_values().head(10)
    fig_lineal_inversa = px.bar(-top_negativos, title=f"Importancia Lineal Inversa (Negativos) para: {producto}",
                                 labels={'value': 'Coeficiente invertido', 'index': 'Variable'}, color_discrete_sequence=['red'])

    # Modelo Random Forest
    scaler = RobustScaler()
    X_scaled = scaler.fit_transform(X)
    
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    y_pred_rf = model.predict(X_test)
    r2_rf = r2_score(y_test, y_pred_rf)
    mae_rf = mean_absolute_error(y_test, y_pred_rf)
    rmse_rf = root_mean_squared_error(y_test, y_pred_rf)
    mape_rf = mean_absolute_percentage_error(y_test, y_pred_rf)

    rf_importancias = pd.Series(model.feature_importances_, index=X.columns)

    fig_nolineal = px.bar(rf_importancias.sort_values(ascending=False).head(10),
                          title=f"Importancia No Lineal (Random Forest) para: {producto}",
                          labels={'value': 'Importancia', 'index': 'Variable'})

    fig_evolucion = px.line(df_filtered, x='date', y=producto, title=f"Evolución de Ventas - {producto}")

    fig_cluster = px.scatter(df, x='PCA1', y='PCA2', color='cluster',
                             title="Clusters de Perfiles de Clientes (PCA)",
                             labels={'PCA1': 'Componente Principal 1', 'PCA2': 'Componente Principal 2'})


    # Modelo XGBoost
    scaler = RobustScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
    model_xgb = XGBRegressor(n_estimators=100, random_state=42)
    model_xgb.fit(X_train, y_train)
    y_pred_xgb = model_xgb.predict(X_test)
    
    r2_xgb = r2_score(y_test, y_pred_xgb)
    mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
    rmse_xgb = root_mean_squared_error(y_test, y_pred_xgb)
    mape_xgb = mean_absolute_percentage_error(y_test, y_pred_xgb)

    explainer_xgb = shap.Explainer(model_xgb)
    shap_values_xgb = explainer_xgb(X_test)
    shap_mean_importance = np.abs(shap_values_xgb.values).mean(axis=0)
    xgb_importancias = pd.Series(shap_mean_importance, index=X.columns)
    fig_xgBoost = px.bar(xgb_importancias.sort_values(ascending=False).head(10),
                      title=f"Importancia No Lineal (XGBoost - SHAP) para: {producto}",
                      labels={'value': 'Importancia Media Absoluta', 'index': 'Variable'})
    
    # Modelo LightGBM
    model_lgbm = LGBMRegressor(n_estimators=100, random_state=42)
    model_lgbm.fit(X_train, y_train)
    y_pred_lgbm = model_lgbm.predict(X_test)
    
    r2_lgbm = r2_score(y_test, y_pred_lgbm)
    mae_lgbm = mean_absolute_error(y_test, y_pred_lgbm)
    rmse_lgbm = root_mean_squared_error(y_test, y_pred_lgbm)
    mape_lgbm = mean_absolute_percentage_error(y_test, y_pred_lgbm)

    explainer_lgbm = shap.Explainer(model_lgbm)
    shap_values_lgbm = explainer_lgbm(X_test)
    shap_mean_importance_lgbm = np.abs(shap_values_lgbm.values).mean(axis=0)
    lgbm_importancias = pd.Series(shap_mean_importance_lgbm, index=X.columns)

    fig_lightGBM = px.bar(lgbm_importancias.sort_values(ascending=False).head(10),
                          title=f"Importancia No Lineal (LightGBM - SHAP) para: {producto}",
                          labels={'value': 'Importancia Media Absoluta', 'index': 'Variable'})

    # Red Neuronal MLP
    selector = SelectKBest(score_func=f_regression, k=20)
    X_selected = selector.fit_transform(X, y.values.ravel())
    selected_features = X.columns[selector.get_support()]

    selected_features = X.columns.tolist()

    scaler_X = RobustScaler()
    X_scaled = scaler_X.fit_transform(X[selected_features])
    scaler_y = RobustScaler()
    y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1))

    X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
    y_tensor = torch.tensor(y_scaled, dtype=torch.float32)

    X_train_tensor, X_test_tensor, y_train_tensor, y_test_tensor = train_test_split(
        X_tensor, y_tensor, test_size=0.2, random_state=42
    )

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

    class MLP(nn.Module):
        def __init__(self, input_dim):
            super().__init__()
            self.model = nn.Sequential(
                nn.Linear(input_dim, 128),
                nn.BatchNorm1d(128),
                nn.ReLU(),
                nn.Dropout(0.3),
                nn.Linear(128, 64),
                nn.ReLU(),
                nn.Linear(64, 32),
                nn.ReLU(),
                nn.Linear(32, 1)
            )

        def forward(self, x):
            return self.model(x)

    model = MLP(X_train_tensor.shape[1])
    optimizer = optim.Adam(model.parameters(), lr=0.0005)
    criterion = nn.MSELoss()

    best_r2 = -np.inf
    patience = 20
    trigger_times = 0

    for epoch in range(300):
        model.train()
        for xb, yb in train_loader:
            pred = model(xb)
            loss = criterion(pred, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            y_pred_val = model(X_test_tensor).numpy()
            y_pred_inv = scaler_y.inverse_transform(y_pred_val)
            y_true_inv = scaler_y.inverse_transform(y_test_tensor.numpy())
            current_r2 = r2_score(y_true_inv, y_pred_inv)

            if current_r2 > best_r2:
                best_r2 = current_r2
                best_model_state = model.state_dict().copy()
                trigger_times = 0
            else:
                trigger_times += 1

        if trigger_times >= patience:
            break

    model.load_state_dict(best_model_state)
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test_tensor).numpy()
        y_pred_inv = scaler_y.inverse_transform(y_pred)
        y_true_inv = scaler_y.inverse_transform(y_test_tensor.numpy())
        r2_mlp = r2_score(y_true_inv, y_pred_inv)
        
    mae_mlp = mean_absolute_error(y_true_inv, y_pred_inv)
    rmse_mlp = root_mean_squared_error(y_true_inv, y_pred_inv)
    mape_mlp = mean_absolute_percentage_error(y_true_inv, y_pred_inv)

    def predict_wrapper(X_numpy):
        X_tensor = torch.tensor(X_numpy, dtype=torch.float32)
        with torch.no_grad():
            preds = model(X_tensor).detach().numpy()
        return preds.flatten()

    np.random.seed(42)
    X_background = X_scaled[np.random.choice(X_scaled.shape[0], size=min(100, X_scaled.shape[0]), replace=False)]
    X_shap = X_scaled[:min(300, X_scaled.shape[0])]

    explainer = shap.KernelExplainer(predict_wrapper, X_background)
    shap_values = explainer.shap_values(X_shap)

    if isinstance(shap_values, list):
        shap_values = shap_values[0]

    mean_abs_shap = np.abs(shap_values).mean(axis=0)
    shap_df = pd.DataFrame({
        "Variable": selected_features,
        "Importancia": mean_abs_shap
    }).sort_values(by="Importancia", ascending=False)

    fig_mlp = px.bar(
        shap_df.head(15),
        x="Variable", y="Importancia",
        title=f"Importancia SHAP (MLP) para: {producto}",
        labels={'Importancia': 'Importancia Media Absoluta', 'Variable': 'Variable'}
    )
    
    # Métricas de Regresión de modelos
    metricas_data = [
        {"Modelo": "Regresión Lineal", "R²": round(r2_lr, 4), "MAE": round(mae_lr, 2), "RMSE": round(rmse_lr, 2)},
        {"Modelo": "Random Forest", "R²": round(r2_rf, 4), "MAE": round(mae_rf, 2), "RMSE": round(rmse_rf, 2)},
        {"Modelo": "XGBoost", "R²": round(r2_xgb, 4), "MAE": round(mae_xgb, 2), "RMSE": round(rmse_xgb, 2)},
        {"Modelo": "LightGBM", "R²": round(r2_lgbm, 4), "MAE": round(mae_lgbm, 2), "RMSE": round(rmse_lgbm, 2)},
        {"Modelo": "MLP", "R²": round(r2_mlp, 4), "MAE": round(mae_mlp, 2), "RMSE": round(rmse_mlp, 2)},
    ]

    score_text = html.Div([
        html.H4("Métricas de Evaluación de Modelos", style={'marginBottom': '15px'}),
        dash_table.DataTable(
            columns=[
                {"name": "Modelo", "id": "Modelo"},
                {"name": "R²", "id": "R²"},
                {"name": "MAE", "id": "MAE"},
                {"name": "RMSE", "id": "RMSE"}
            ],
            data=metricas_data,
            style_cell={'textAlign': 'center', 'fontFamily': 'Arial'},
            style_header={'backgroundColor': '#f0f0f0', 'fontWeight': 'bold'},
            style_table={'overflowX': 'auto'},
            page_size=5
        )
    ])
    

    return fig_lineal, fig_lineal_inversa, fig_nolineal, fig_xgBoost, fig_lightGBM, fig_mlp, fig_cluster, score_text


@app.callback(
    [Output('shap-img-rf', 'src'), Output('shap-img-xgb', 'src'), Output('shap-img-lgbm', 'src')],
    Input('producto-shap-dropdown', 'value')
)
def update_shap_tab_all(producto):
    df_filtered = df.copy()

    q1, q3 = df_filtered[producto].quantile([0.25, 0.75])
    iqr = q3 - q1
    df_filtered = df_filtered[(df_filtered[producto] >= q1 - 1.5 * iqr) & (df_filtered[producto] <= q3 + 1.5 * iqr)]

    X = df_filtered[feature_cols]
    y = df_filtered[producto]

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # Random Forest
    rf = RandomForestRegressor(n_estimators=100, random_state=42)
    rf.fit(X_train, y_train)
    explainer_rf = shap.Explainer(rf)
    shap_values_rf = explainer_rf(X_test)
    fig_rf, ax = plt.subplots(figsize=(10, 6))
    shap.summary_plot(shap_values_rf, X_test, feature_names=X.columns.tolist(), max_display=10, show=False)
    buf_rf = io.BytesIO()
    plt.savefig(buf_rf, format="png", bbox_inches='tight')
    plt.close(fig_rf)
    shap_img_rf = base64.b64encode(buf_rf.getvalue()).decode("utf-8")

    # XGBoost
    xgb = XGBRegressor(n_estimators=100, random_state=42)
    xgb.fit(X_train, y_train)
    explainer_xgb = shap.Explainer(xgb)
    shap_values_xgb = explainer_xgb(X_test)
    fig_xgb, ax = plt.subplots(figsize=(10, 6))
    shap.summary_plot(shap_values_xgb, X_test, feature_names=X.columns.tolist(), max_display=10, show=False)
    buf_xgb = io.BytesIO()
    plt.savefig(buf_xgb, format="png", bbox_inches='tight')
    plt.close(fig_xgb)
    shap_img_xgb = base64.b64encode(buf_xgb.getvalue()).decode("utf-8")

    # LightGBM
    lgbm = LGBMRegressor(n_estimators=100, random_state=42)
    lgbm.fit(X_train, y_train)
    explainer_lgbm = shap.Explainer(lgbm)
    shap_values_lgbm = explainer_lgbm(X_test)
    fig_lgbm, ax = plt.subplots(figsize=(10, 6))
    shap.summary_plot(shap_values_lgbm, X_test, feature_names=X.columns.tolist(), max_display=10, show=False)
    buf_lgbm = io.BytesIO()
    plt.savefig(buf_lgbm, format="png", bbox_inches='tight')
    plt.close(fig_lgbm)
    shap_img_lgbm = base64.b64encode(buf_lgbm.getvalue()).decode("utf-8")

    return (
        "data:image/png;base64," + shap_img_rf,
        "data:image/png;base64," + shap_img_xgb,
        "data:image/png;base64," + shap_img_lgbm
    )



@app.callback(
    Output('grafico-estacionalidad', 'figure'),
    Input('producto-clima-dropdown', 'value')
)
def update_estacionalidad(producto):
    dias_semana = [col for col in df.columns if col.startswith('nombre_dia_')]
    estacional = df[dias_semana + [producto]].copy()
    for col in dias_semana:
        estacional[col] = estacional[col].astype(int)
    df_estacionalidad = estacional[dias_semana].multiply(estacional[producto], axis=0).sum().sort_values(ascending=False)
    fig = px.bar(df_estacionalidad, title=f"Ventas por Día de la Semana para: {producto}",
                 labels={'value': 'Ventas acumuladas', 'index': 'Día de la semana'})
    return fig

@app.callback(
    Output('grafico-impacto-clima', 'figure'),
    [Input('producto-clima-dropdown', 'value'),
     Input('clima-variable-dropdown', 'value')]
)
def update_impacto_clima(producto, variable):
    fig = px.scatter(df, x=variable, y=producto,
                     trendline='ols',
                     title=f"Impacto de {variable} en ventas de {producto}",
                     labels={variable: variable, producto: 'Ventas'})
    return fig

@app.callback(
    [Output('prediccion-ventas', 'figure'),
     Output('tabla-predicciones-container', 'children')],
    [Input('producto-prediccion-dropdown', 'value'),
     Input('modelo-prediccion-dropdown', 'value')]
)
def update_prediccion(producto, modelo):
    df_model = df[['date', producto]].dropna().copy()
    df_model['date'] = pd.to_datetime(df_model['date'])
    df_model = df_model.sort_values('date')
    df_model['day'] = (df_model['date'] - df_model['date'].min()).dt.days

    future_days = 30
    future_dates = pd.date_range(df_model['date'].max() + pd.Timedelta(days=1), periods=future_days)

    if modelo == 'prophet':
        df_prophet = df_model.rename(columns={'date': 'ds', producto: 'y'})
        model = Prophet()
        model.fit(df_prophet)
        future = model.make_future_dataframe(periods=future_days)
        forecast = model.predict(future)

        fig = px.line(forecast, x='ds', y='yhat', title=f"Predicción de Ventas - {producto} (Prophet)")
        fig.add_scatter(x=df_prophet['ds'], y=df_prophet['y'], mode='markers', name='Histórico')

        tabla = dash_table.DataTable(
            columns=[
                {'name': 'Fecha', 'id': 'ds'},
                {'name': 'Predicción', 'id': 'yhat'},
                {'name': 'Inferior', 'id': 'yhat_lower'},
                {'name': 'Superior', 'id': 'yhat_upper'}
            ],
            data=forecast.tail(7)[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].round(2).to_dict('records'),
            style_table={'overflowX': 'auto'},
            style_cell={'textAlign': 'center'},
            style_header={'fontWeight': 'bold'},
            page_size=7
        )

        return fig, tabla

    # Modelos ML
    from sklearn.linear_model import Ridge
    from sklearn.ensemble import RandomForestRegressor
    from xgboost import XGBRegressor
    from lightgbm import LGBMRegressor
    from sklearn.preprocessing import RobustScaler
    from torch.utils.data import DataLoader, TensorDataset

    X = df_model[['day']]
    y = df_model[[producto]]

    scaler_X = RobustScaler()
    scaler_y = RobustScaler()
    X_scaled = scaler_X.fit_transform(X)
    y_scaled = scaler_y.fit_transform(y)

    X_future = pd.DataFrame({'day': np.arange(X['day'].max() + 1, X['day'].max() + future_days + 1)})
    X_future_scaled = scaler_X.transform(X_future)

    if modelo == 'ridge':
        model = Ridge()
        model.fit(X_scaled, y_scaled.ravel())
        y_pred = model.predict(X_future_scaled).reshape(-1, 1)

    elif modelo == 'rf':
        model = RandomForestRegressor()
        model.fit(X_scaled, y_scaled.ravel())
        y_pred = model.predict(X_future_scaled).reshape(-1, 1)

    elif modelo == 'xgb':
        model = XGBRegressor()
        model.fit(X_scaled, y_scaled.ravel())
        y_pred = model.predict(X_future_scaled).reshape(-1, 1)

    elif modelo == 'lgbm':
        model = LGBMRegressor()
        model.fit(X_scaled, y_scaled.ravel())
        y_pred = model.predict(X_future_scaled).reshape(-1, 1)

    elif modelo == 'mlp':
        import torch.nn as nn
        import torch

        class MLP(nn.Module):
            def __init__(self):
                super().__init__()
                self.model = nn.Sequential(
                    nn.Linear(1, 128),
                    nn.ReLU(),
                    nn.Linear(128, 64),
                    nn.ReLU(),
                    nn.Linear(64, 32),
                    nn.ReLU(),
                    nn.Linear(32, 1)
                )
            def forward(self, x): return self.model(x)

        X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
        y_tensor = torch.tensor(y_scaled, dtype=torch.float32)
        loader = DataLoader(TensorDataset(X_tensor, y_tensor), batch_size=32, shuffle=True)

        model = MLP()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        loss_fn = nn.MSELoss()

        for _ in range(150):
            for xb, yb in loader:
                optimizer.zero_grad()
                loss = loss_fn(model(xb), yb)
                loss.backward()
                optimizer.step()
        model.eval()
        with torch.no_grad():
            X_future_tensor = torch.tensor(X_future_scaled, dtype=torch.float32)
            y_pred = model(X_future_tensor).numpy()

    y_pred_inv = scaler_y.inverse_transform(y_pred)

    fig = px.line(df_model, x='date', y=producto, title=f"Predicción de Ventas - {producto} ({modelo.upper()})")
    fig.add_scatter(x=future_dates, y=y_pred_inv.flatten(), mode='lines+markers', name='Predicción futura')

    tabla = html.Div("Tabla solo disponible para el modelo Prophet.", style={'marginTop': '10px', 'color': 'gray'})

    return fig, tabla


@app.callback(
    Output('evolucion-ventas-producto', 'figure'),
    Input('producto-evolucion-dropdown', 'value')
)
def update_evolucion_ventas_producto(producto):
    fig = px.line(df, x='date', y=producto, title=f"Evolución de Ventas - {producto}")
    return fig

if __name__ == '__main__':
    app.run(debug=True)


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]