# Dashboard dos Resultados Finais da Modelagem

## Preparação do Ambiente

In [None]:
import ast
from dash import Dash, dcc, html, Input, Output
import lightgbm as lgb
import logging
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import mlflow
import numpy as np
import os
import pandas as pd
import plotly.express as px
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sktime.performance_metrics.forecasting import mean_absolute_scaled_error
from typing import Dict, List, Optional, Tuple
import warnings

mpl_logger = logging.getLogger('matplotlib')
mpl_logger.setLevel(logging.WARNING)
warnings.filterwarnings("ignore", module="matplotlib.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="seaborn")

## Carregamento dos dados

In [None]:
def calculate_metrics(y_true, y_pred, y_train):
    """
    Calcula MAE, RMSE, R², MAPE e MASE entre os valores reais e as previsões.
    """
    metrics = {
        'mae': mean_absolute_error(y_true, y_pred),
        'rmse': np.sqrt(mean_squared_error(y_true, y_pred)),
        'r2': r2_score(y_true, y_pred)
    }
    y_true_safe = np.where(y_true == 0, 1e-10, y_true)
    # Calcula MAPE, evitando divisão por zero
    metrics['mape'] = np.mean(np.abs((y_true - y_pred) / y_true_safe)) * 100

    # O MASE compara o erro do seu modelo com o erro de um modelo ingênuo
    # (que prevê que o próximo valor é igual ao anterior).
    metrics['mase'] = mean_absolute_scaled_error(y_true, y_pred, y_train=y_train)


    return metrics

# --- Avaliação para modelo LGBM Global ---
def evaluate_lgbm_global_model(model_global, df_test, exog_cols, df_train, categ_cols_global):
    """
    Avalia o modelo global LightGBM no conjunto de teste.
    Inclui MASE usando os dados de treino para escala.
    """


    # Prepara os dados de teste
    df_test_clean = df_test.copy()
    numeric_cols = df_test_clean.select_dtypes(include=['number']).columns
    df_test_clean[numeric_cols] = df_test_clean[numeric_cols].fillna(0)

    y_true = df_test_clean['total_consumption']
    X_test = df_test_clean[exog_cols]

    # Prepara os dados de treino para o cálculo do MASE
    df_train_clean = df_train.copy()
    y_train = df_train_clean['total_consumption']

    # Trata as colunas categóricas
    X_test[categ_cols_global] = X_test[categ_cols_global].astype('category')

    # Faz a previsão
    y_pred = model_global.predict(X_test)

    # Calcula as métricas, incluindo o MASE
    metrics = calculate_metrics(y_true, y_pred, y_train=y_train)

    # Calcula o desvio padrão do consumo real por cliente
    client_std = df_test_clean.groupby('client_id')['total_consumption'].std()
    # Adiciona o desvio padrão do consumo real para todo o conjunto de teste ---
    metrics['y_true_std'] = client_std.mean()

    metrics['client_id'] = 'Global'
    metrics['model'] = 'LGBM Global'
    return pd.DataFrame([metrics])

In [None]:
def load_all_artifacts(
    experiment_name: str,
    run_name: str,
    model_artifact_path: str = "lgbm_model",
    df_artifact_names: Tuple[str, str, str] = ("df_train_global.csv", "df_val_global.csv", "df_test_global.csv")
) -> Tuple[Optional[lgb.LGBMRegressor], Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[List[str]]]:
    """
    Busca um run do MLflow pelo nome, carrega o modelo e os DataFrames de dados.

    Args:
        experiment_name (str): O nome do experimento do MLflow.
        run_name (str): O nome do run que contém os artefatos.
        model_artifact_path (str): O caminho do artefato do modelo (padrão: "lgbm_model").
        df_artifact_names (Tuple[str, str, str]): Uma tupla com os nomes dos arquivos dos DataFrames.

    Returns:
        Tuple[Optional[lgb.LGBMRegressor], pd.DataFrame, pd.DataFrame, pd.DataFrame]:
        Retorna o modelo, df_train, df_val e df_test, ou uma tupla de None em caso de falha.
    """
    print(f"--- Tentando carregar todos os artefatos do run '{run_name}' ---")

    # Inicializa as variáveis de retorno como None
    model, df_train, df_val, df_test = None, None, None, None

    try:
        # Encontra o experimento pelo nome
        experiment = mlflow.get_experiment_by_name(experiment_name)
        if experiment is None:
            print(f"❌ Experimento '{experiment_name}' não encontrado.")
            return None, None, None, None

        # Encontra o run com base no nome do run e no experiment_id
        runs = mlflow.search_runs(
            experiment_ids=[experiment.experiment_id],
            filter_string=f"tags.mlflow.runName = '{run_name}'",
            order_by=["start_time DESC"]
        )

        if runs.empty:
            print(f"❌ Run com nome '{run_name}' não encontrado no experimento '{experiment_name}'.")
            return None, None, None, None

        run_id = runs.iloc[0].run_id
        print(f"✅ Run ID encontrado: {run_id}")

        print("\n⏳ Carregando os parâmetros...")
        run = mlflow.get_run(run_id)
        params: Dict = run.data.params
        # O parâmetro 'exog_cols' é salvo como uma string, então precisamos convertê-lo de volta para uma lista
        exog_cols_str = params.get("exog_cols")
        if exog_cols_str:
            exog_cols_loaded = ast.literal_eval(exog_cols_str)
            print(f"✅ Parâmetro 'exog_cols' carregado: {exog_cols_loaded}")
        else:
            print(f"❌ Parâmetro 'exog_cols' não encontrado no run.")

        # Carrega o modelo LGBM
        print("\n⏳ Carregando o modelo LGBM...")
        loaded_model_uri = f"runs:/{run_id}/{model_artifact_path}"
        model = mlflow.lightgbm.load_model(loaded_model_uri)
        print(f"✅ Modelo LGBM carregado com sucesso!")

        # Baixa e carrega os DataFrames
        print("\n⏳ Baixando e carregando os DataFrames...")

        # Cria um diretório temporário para baixar os artefatos
        download_dir = "./mlflow_downloaded_artifacts"
        os.makedirs(download_dir, exist_ok=True)

        # Usa o MLflowClient para interagir com os artefatos
        client = mlflow.tracking.MlflowClient()

        df_train_path = os.path.join(download_dir, df_artifact_names[0])
        df_val_path = os.path.join(download_dir, df_artifact_names[1])
        df_test_path = os.path.join(download_dir, df_artifact_names[2])

        client.download_artifacts(run_id=run_id, path=df_artifact_names[0], dst_path=download_dir)
        client.download_artifacts(run_id=run_id, path=df_artifact_names[1], dst_path=download_dir)
        client.download_artifacts(run_id=run_id, path=df_artifact_names[2], dst_path=download_dir)

        # Carrega os DataFrames
        df_train = pd.read_csv(df_train_path, index_col=0, parse_dates=True)
        df_val = pd.read_csv(df_val_path, index_col=0, parse_dates=True)
        df_test = pd.read_csv(df_test_path, index_col=0, parse_dates=True)

        print("✅ DataFrames carregados com sucesso!")

        return model, df_train, df_val, df_test, exog_cols_loaded

    except Exception as e:
        print(f"❌ Falha inesperada ao carregar artefatos: {e}")
        return None, None, None, None, None

In [None]:
print("teste")

In [None]:
mlflow_drive_path = "file:/content/drive/MyDrive/CaseTécnico_Aquarela_TimeSeries/mlruns"
mlflow.set_tracking_uri(mlflow_drive_path)
model_global, df_train, df_val, df_test, exog_lgbm_global = load_all_artifacts(experiment_name = "Treinamento LightGBM Global_v3",
                                                            run_name = 'Treino Inicial')
model_global

--- Tentando carregar todos os artefatos do run 'Treino Inicial' ---
✅ Run ID encontrado: 98d0e176421d4a76ae606eb133fc9b68

⏳ Carregando os parâmetros...
✅ Parâmetro 'exog_cols' carregado: ['client_id', 'avg_temperature', 'avg_humidity', 'temp_humid_interaction', 'day_counter', 'consumption_lag_1', 'consumption_lag_2', 'consumption_lag_3', 'consumption_lag_4', 'consumption_lag_7', 'consumption_lag_15', 'rolling_mean_3', 'rolling_std_3', 'rolling_mean_7', 'rolling_std_7', 'diff_lag_1', 'tipo_feriado', 'day_of_week_name', 'month']

⏳ Carregando o modelo LGBM...


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

✅ Modelo LGBM carregado com sucesso!

⏳ Baixando e carregando os DataFrames...


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

✅ DataFrames carregados com sucesso!


In [None]:
df_train.head()

Unnamed: 0_level_0,client_id,total_consumption,avg_temperature,avg_humidity,temp_humid_interaction,day_counter,consumption_lag_1,consumption_lag_2,consumption_lag_3,consumption_lag_4,consumption_lag_7,consumption_lag_15,rolling_mean_3,rolling_std_3,rolling_mean_7,rolling_std_7,diff_lag_1,day_of_week_name,month,tipo_feriado
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2023-01-01,C0000,18.64,27.090625,59.06875,1600.209355,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,Feriado
2023-01-01,C0003,13.66,27.090625,59.06875,1600.209355,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,Feriado
2023-01-01,C0033,14.37,27.511111,58.372222,1605.884691,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,Feriado
2023-01-01,C0059,17.9,27.511111,58.372222,1605.884691,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,Feriado
2023-01-01,C0096,16.72,25.994444,64.755556,1683.284691,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,Feriado


In [None]:
df_val.head()

Unnamed: 0_level_0,client_id,total_consumption,avg_temperature,avg_humidity,temp_humid_interaction,day_counter,consumption_lag_1,consumption_lag_2,consumption_lag_3,consumption_lag_4,consumption_lag_7,consumption_lag_15,rolling_mean_3,rolling_std_3,rolling_mean_7,rolling_std_7,diff_lag_1,day_of_week_name,month,tipo_feriado
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2023-04-30,C0003,12.69,25.034375,60.3125,1509.885742,120,17.97,18.78,14.98,18.35,20.08,11.7,17.243333,2.001508,17.108571,2.202154,-5.28,0,4,Nenhum
2023-04-30,C0049,20.4,26.611765,61.829412,1645.389758,120,21.61,17.45,16.05,15.63,20.05,22.0,18.37,2.89192,17.738571,2.586036,-1.21,0,4,Nenhum
2023-04-30,C0071,19.28,25.034375,60.3125,1509.885742,120,15.39,18.12,15.23,16.75,15.07,21.13,16.246667,1.624326,16.684286,1.479187,3.89,0,4,Nenhum
2023-04-30,C0023,20.56,24.519444,62.805556,1539.95733,120,18.63,20.18,16.74,21.1,16.64,13.89,18.516667,1.722798,17.401429,2.698391,1.93,0,4,Nenhum
2023-04-30,C0085,8.24,25.034375,60.3125,1509.885742,120,13.39,16.52,10.62,10.97,13.73,13.41,13.51,2.95183,12.4,2.271549,-5.15,0,4,Nenhum


In [None]:
df_test.head()

Unnamed: 0_level_0,client_id,total_consumption,avg_temperature,avg_humidity,temp_humid_interaction,day_counter,consumption_lag_1,consumption_lag_2,consumption_lag_3,consumption_lag_4,consumption_lag_7,consumption_lag_15,rolling_mean_3,rolling_std_3,rolling_mean_7,rolling_std_7,diff_lag_1,day_of_week_name,month,tipo_feriado
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2023-05-30,C0011,16.52,25.682353,54.864706,1409.05474,150,21.35,13.51,16.9,19.99,16.83,19.94,17.253333,3.931925,16.572857,3.902037,-4.83,2,5,Nenhum
2023-05-30,C0069,15.83,24.46875,60.990625,1492.364355,150,17.82,13.2,20.18,18.26,16.19,19.72,17.066667,3.550455,16.484286,2.492648,-1.99,2,5,Nenhum
2023-05-30,C0096,21.56,24.588889,62.2,1529.428889,150,21.45,17.83,20.43,18.11,15.72,20.76,19.903333,1.866583,19.161429,2.20226,0.11,2,5,Nenhum
2023-05-30,C0022,18.86,24.588889,62.2,1529.428889,150,13.83,16.54,18.25,16.1,16.63,16.34,16.206667,2.228774,16.994286,1.79797,5.03,2,5,Nenhum
2023-05-30,C0032,16.45,26.406667,63.826667,1685.449511,150,19.37,14.92,16.3,22.62,18.26,13.23,16.863333,2.277857,17.707143,2.835899,-2.92,2,5,Nenhum


In [None]:
categ_cols_global = ['client_id', 'tipo_feriado', 'day_of_week_name', 'month']
lgbm_results = evaluate_lgbm_global_model(model_global, df_test
                                          ,exog_lgbm_global, df_train,
                                          categ_cols_global )
lgbm_results

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test[categ_cols_global] = X_test[categ_cols_global].astype('category')


Unnamed: 0,mae,rmse,r2,mape,mase,y_true_std,client_id,model
0,0.153935,0.232982,0.996145,1.150145,0.036002,2.2498,Global,LGBM Global


In [None]:
#max(df_train.index) + pd.Timedelta(days=1)
df_test[categ_cols_global] = df_test[categ_cols_global].astype('category')
y_pred = model_global.predict(df_test.drop(columns = ['total_consumption']))
y_pred

array([16.29552816, 15.92334054, 21.61056403, ..., 15.40555786,
       18.39320363, 16.42805013])

## Dashboard

In [None]:
client_ids = df_test['client_id'].unique().sort_values()
num_days_test = 30
dates = df_test.index.unique()
df_test_with_predictions = df_test.loc[:,['client_id', 'total_consumption']]
df_test_with_predictions['prediction'] = y_pred
df_test_with_predictions = df_test_with_predictions.reset_index()

metrics_by_client = {}
for client_id in client_ids:
    df_client = df_test_with_predictions[df_test_with_predictions['client_id'] == client_id]
    df_client = df_client.dropna(subset=['total_consumption', 'prediction'])
    y_true = df_client['total_consumption']
    y_pred = df_client['prediction']
    if not df_client.empty:
        metrics_by_client[client_id] = {
            'MAE': mean_absolute_error(y_true, y_pred),
            'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
            'R²': r2_score(y_true, y_pred),
        }
    else:
        metrics_by_client[client_id] = {
            'MAE': np.nan,
            'RMSE': np.nan,
            'R²': np.nan,
        }


# INICIALIZAÇÃO E LAYOUT DO DASHBOARD ---
app = Dash(__name__)

# Define o layout do aplicativo
app.layout = html.Div(style={'font-family': 'Arial, sans-serif', 'padding': '20px'}, children=[
    html.H1("Dashboard de Previsões de Consumo", style={'textAlign': 'center', 'color': '#333'}),

    html.Div([
        html.H2("Selecione o Cliente:", style={'fontSize': '18px', 'marginBottom': '10px'}),
        dcc.Dropdown(
            id='client-dropdown',
            options=[{'label': client, 'value': client} for client in client_ids],
            value=client_ids[0],  # Valor padrão
            clearable=False,
            style={'width': '50%'}
        )
    ], style={'marginBottom': '20px'}),

    html.Hr(),

    # Contêiner de layout em grid para posicionar as colunas
    # Isso é mais robusto que flex para evitar o bug de crescimento em ambientes como o Colab
    html.Div(style={'display': 'grid', 'gridTemplateColumns': '1fr 2fr', 'gap': '20px'}, children=[
        # Seção de Métricas (coluna esquerda)
        html.Div(style={'minWidth': '300px'}, children=[
            html.H2("Métricas de Desempenho", style={'fontSize': '18px', 'marginBottom': '10px'}),
            html.Div(id='metrics-output', style={'fontSize': '16px', 'color': '#555', 'padding': '10px', 'borderLeft': '3px solid #007bff'}),
        ]),

        # Seção do Gráfico (coluna direita)
        html.Div(children=[
            # Define uma altura fixa para o gráfico para evitar que ele cresça indefinidamente
            dcc.Graph(id='consumption-prediction-graph', style={'height': '500px'})
        ])
    ])
])

# --- 3. CALLBACKS DO DASHBOARD (Lógica de Interatividade) ---

# O callback atualiza os gráficos e métricas com base na seleção do dropdown
@app.callback(
    Output('consumption-prediction-graph', 'figure'),
    Output('metrics-output', 'children'),
    Input('client-dropdown', 'value')
)
def update_dashboard(selected_client_id):
    """
    Função que atualiza o gráfico e as métricas com base no cliente selecionado.
    """
    print(f"💻 Atualizando dashboard para o cliente: {selected_client_id}")

    # Filtra os dados para o cliente selecionado
    df_client = df_test_with_predictions[df_test_with_predictions['client_id'] == selected_client_id]

    # --- Cria o gráfico interativo com Plotly ---
    fig = px.line(df_client, x='date', y=['total_consumption', 'prediction'],
                  title=f"Previsões vs. Real para o Cliente {selected_client_id}",
                  labels={'value': 'Consumo (kWh)', 'variable': 'Tipo de Dado'})

    fig.update_layout(legend_title_text='Legenda')
    fig.update_traces(hovertemplate='Data: %{x}<br>Consumo: %{y:.2f} kWh')

    # --- Prepara a string de métricas para exibição ---
    metrics = metrics_by_client.get(selected_client_id, {})
    metrics_children = [
        html.P(f"MAE: {metrics.get('MAE', 0):.4f}"),
        html.P(f"RMSE: {metrics.get('RMSE', 0):.4f}"),
        html.P(f"R²: {metrics.get('R²', 0):.4f}"),
    ]

    return fig, metrics_children


# --- 4. EXECUÇÃO DO APLICATIVO ---
if __name__ == '__main__':
    print("\n🚀 Iniciando o aplicativo Dash. Clique no link abaixo para visualizá-lo.")
    app.run(debug=True)


🚀 Iniciando o aplicativo Dash. Clique no link abaixo para visualizá-lo.


<IPython.core.display.Javascript object>