# Preparacion de datos

In [None]:
import os
import pandas as pd
import numpy as np

RUTA_DATA = "/content/drive/MyDrive/Proyecto_UTN_FRBA_Becarios/Códigos/2024/01 - Pronostico de demanda (experimento 1)/Datasets/"

data = pd.read_csv(os.path.join(RUTA_DATA, "dataset.csv"), index_col='fecha', parse_dates=['fecha'])
data = data.resample(rule='H', closed='left', label ='right').mean()
data.temp = data.temp.ffill()
data.tail()

Unnamed: 0_level_0,demanda,no_habil,temp,es_dia_calido,es_dia_frio
fecha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-31 21:00:00,23976.806,0.0,31.8,1.0,0.0
2024-01-31 22:00:00,22933.568,0.0,30.85,1.0,0.0
2024-01-31 23:00:00,23470.519,0.0,30.25,1.0,0.0
2024-02-01 00:00:00,24386.953,0.0,29.1,0.5,0.0
2024-02-01 01:00:00,24334.169,0.0,28.7,0.5,0.0


## Librerias necesarias

In [None]:
# instalo skforecast
#!pip install skforecast
#!pip install ordpy

from skforecast.model_selection import backtesting_forecaster
from sklearn.linear_model import LinearRegression
from skforecast.ForecasterAutoreg import ForecasterAutoreg
import ordpy
import plotly.graph_objects as go

In [None]:
# se definen los intervalos de entranamiento y pronostico
end_train = '2023-12-31 23:59:00'
data_train = data.loc[:end_train, :].copy()
data_test = data.loc[end_train:, :].copy()

print(f"Train dates      : {data_train.index.min()} --- {data_train.index.max()}  (n={len(data_train)})")
print(f"Test dates       : {data_test.index.min()} --- {data_test.index.max()}  (n={len(data_test)})")

Train dates      : 2021-01-01 03:00:00 --- 2023-12-31 23:00:00  (n=26277)
Test dates       : 2024-01-01 00:00:00 --- 2024-02-01 01:00:00  (n=746)


# Modelo 1

In [None]:
forecaster = ForecasterAutoreg(
    regressor=LinearRegression(),   # que modelo se usa
    lags=24*7                       # cuanto mira hacia atras
)

In [None]:
metric, predictions = backtesting_forecaster(
                          forecaster         = forecaster,
                          y                  = data['demanda'],
                          steps              = 24,
                          metric             = 'mean_squared_error',
                          initial_train_size = len(data.loc[:end_train]),
                          refit              = False,
                          n_jobs             = 'auto',
                          verbose            = False,
                          show_progress      = True
                      )

print(f"MSE: {metric:.2f}")

  0%|          | 0/32 [00:00<?, ?it/s]

MSE: 1530491.74


In [None]:
# grafico lado a lado pronostico y valor real
fig = go.Figure()
trace1 = go.Scatter(x=data_test.index, y=data_test['demanda'], name="test", mode="lines")
trace2 = go.Scatter(x=predictions.index, y=predictions['pred'], name="prediction", mode="lines")
fig.add_trace(trace1)
fig.add_trace(trace2)
fig.update_layout(
    title="Demanda y pronostico",
    xaxis_title="Tiempo",
    yaxis_title="Demanda",
)
fig.show()

# Modelo 2
Ver entropia durante toda la semana.

In [None]:
entropia = [0] * 24*7
complejidad = [0] * 24*7
for i in range(24*7, len(data.demanda), 1):
    h, c = ordpy.complexity_entropy(data.demanda[i-24*7:i], dx=3)
    entropia.append(h)
    complejidad.append(c)
data['entropia'] = entropia
data['complejidad'] = complejidad

In [None]:
# grafico lado a lado pronostico y valor real
from plotly.subplots import make_subplots

fig = make_subplots(specs=[[{"secondary_y": True}]])
trace1 = go.Scatter(x=data.index, y=data['demanda'], name="demanda", mode="lines")
trace2 = go.Scatter(x=data.index, y=data['entropia'], name="entropia", mode="lines")
trace3 = go.Scatter(x=data.index, y=data['complejidad'], name="complejidad", mode="lines")
fig.add_trace(trace1)
fig.add_trace(trace2, secondary_y=True)
fig.add_trace(trace3, secondary_y=True)
fig.update_layout(
    title="Demanda y entropia",
    xaxis_title="Tiempo",
)
fig.show()

Output hidden; open in https://colab.research.google.com to view.

In [None]:
# Se crea la funcion de pesos
def weight_entropia(index):
    return data.loc[index, 'entropia']

In [None]:
forecaster = ForecasterAutoreg(
    regressor=LinearRegression(),   # que modelo se usa
    lags=24*7,                      # cuanto mira hacia atras
    weight_func=weight_entropia,    # funcion de peso
)

In [None]:
metric, predictions = backtesting_forecaster(
                          forecaster         = forecaster,
                          y                  = data['demanda'],
                          exog               = data['temp'],
                          steps              = 24,
                          metric             = 'mean_squared_error',
                          initial_train_size = len(data.loc[:end_train]),
                          refit              = False,
                          n_jobs             = 'auto',
                          verbose            = False,
                          show_progress      = True
                      )

print(f"MSE: {metric:.2f}")

  0%|          | 0/32 [00:00<?, ?it/s]

MSE: 1475451.38


In [None]:
# grafico lado a lado pronostico y valor real
fig = go.Figure()
trace1 = go.Scatter(x=data_test.index, y=data_test['demanda'], name="test", mode="lines")
trace2 = go.Scatter(x=predictions.index, y=predictions['pred'], name="prediction", mode="lines")
fig.add_trace(trace1)
fig.add_trace(trace2)
fig.update_layout(
    title="Demanda y pronostico",
    xaxis_title="Tiempo",
    yaxis_title="Demanda",
)
fig.show()

# Modelo 3
La entropia como variable exogena

In [None]:
forecaster = ForecasterAutoreg(
    regressor=LinearRegression(),   # que modelo se usa
    lags=24*7,                      # cuanto mira hacia atras
)

In [None]:
metric, predictions = backtesting_forecaster(
                          forecaster         = forecaster,
                          y                  = data['demanda'],
                          exog               = data[['temp', 'entropia']],
                          steps              = 24,
                          metric             = 'mean_squared_error',
                          initial_train_size = len(data.loc[:end_train]),
                          refit              = False,
                          n_jobs             = 'auto',
                          verbose            = False,
                          show_progress      = True
                      )

print(f"MSE: {metric:.2f}")

  0%|          | 0/32 [00:00<?, ?it/s]

MSE: 1493297.11


In [None]:
# grafico lado a lado pronostico y valor real
fig = go.Figure()
trace1 = go.Scatter(x=data_test.index, y=data_test['demanda'], name="test", mode="lines")
trace2 = go.Scatter(x=predictions.index, y=predictions['pred'], name="prediction", mode="lines")
fig.add_trace(trace1)
fig.add_trace(trace2)
fig.update_layout(
    title="Demanda y pronostico",
    xaxis_title="Tiempo",
    yaxis_title="Demanda",
)
fig.show()

# Modelo 4

In [70]:
forecaster = ForecasterAutoreg(
    regressor=LinearRegression(),   # que modelo se usa
    lags=24*7,                      # cuanto mira hacia atras
)

In [73]:
metric, predictions = backtesting_forecaster(
                          forecaster         = forecaster,
                          y                  = data['demanda'],
                          exog               = data[['temp', 'no_habil']],
                          steps              = 24,
                          metric             = 'mean_squared_error',
                          initial_train_size = len(data.loc[:end_train]),
                          refit              = False,
                          n_jobs             = 'auto',
                          verbose            = False,
                          show_progress      = True
                      )

print(f"MSE: {metric:.2f}")

  0%|          | 0/32 [00:00<?, ?it/s]

MSE: 1394660.23


In [72]:
# grafico lado a lado pronostico y valor real
fig = go.Figure()
trace1 = go.Scatter(x=data_test.index, y=data_test['demanda'], name="test", mode="lines")
trace2 = go.Scatter(x=predictions.index, y=predictions['pred'], name="prediction", mode="lines")
fig.add_trace(trace1)
fig.add_trace(trace2)
fig.update_layout(
    title="Demanda y pronostico",
    xaxis_title="Tiempo",
    yaxis_title="Demanda",
)
fig.show()

# Modelo 5
Se aplica un threshold de entropia como indicador binario

In [98]:
data['dia_raro'] = np.where(data.entropia > 0.70, 1, 0)

fig = go.Figure()
trace1 = go.Bar(x=data.index, y=data['dia_raro'], name="dia_raro", marker_color="red")
trace2 = go.Scatter(x=data.index, y=data['entropia'], name="entropia")
fig.add_trace(trace1)
fig.add_trace(trace2)
fig.show()

In [99]:
forecaster = ForecasterAutoreg(
    regressor=LinearRegression(),   # que modelo se usa
    lags=24*7,                      # cuanto mira hacia atras
)

In [100]:
metric, predictions = backtesting_forecaster(
                          forecaster         = forecaster,
                          y                  = data['demanda'],
                          exog               = data[['temp', 'entropia', 'no_habil', 'dia_raro']],
                          steps              = 24,
                          metric             = 'mean_squared_error',
                          initial_train_size = len(data.loc[:end_train]),
                          refit              = False,
                          n_jobs             = 'auto',
                          verbose            = False,
                          show_progress      = True
                      )

print(f"MSE: {metric:.2f}")

  0%|          | 0/32 [00:00<?, ?it/s]

MSE: 1395036.91


In [None]:
# grafico lado a lado pronostico y valor real
fig = go.Figure()
trace1 = go.Scatter(x=data_test.index, y=data_test['demanda'], name="test", mode="lines")
trace2 = go.Scatter(x=predictions.index, y=predictions['pred'], name="prediction", mode="lines")
fig.add_trace(trace1)
fig.add_trace(trace2)
fig.update_layout(
    title="Demanda y pronostico",
    xaxis_title="Tiempo",
    yaxis_title="Demanda",
)
fig.show()

# Modelo 6
Vamos a probar solamente con dia habil o no

In [101]:
forecaster = ForecasterAutoreg(
    regressor=LinearRegression(),   # que modelo se usa
    lags=24*7,                      # cuanto mira hacia atras
)

In [102]:
metric, predictions = backtesting_forecaster(
                          forecaster         = forecaster,
                          y                  = data['demanda'],
                          exog               = data[['temp', 'no_habil']],
                          steps              = 24,
                          metric             = 'mean_squared_error',
                          initial_train_size = len(data.loc[:end_train]),
                          refit              = False,
                          n_jobs             = 'auto',
                          verbose            = False,
                          show_progress      = True
                      )

print(f"MSE: {metric:.2f}")

  0%|          | 0/32 [00:00<?, ?it/s]

MSE: 1394660.23


In [103]:
# grafico lado a lado pronostico y valor real
fig = go.Figure()
trace1 = go.Scatter(x=data_test.index, y=data_test['demanda'], name="test", mode="lines")
trace2 = go.Scatter(x=predictions.index, y=predictions['pred'], name="prediction", mode="lines")
fig.add_trace(trace1)
fig.add_trace(trace2)
fig.update_layout(
    title="Demanda y pronostico",
    xaxis_title="Tiempo",
    yaxis_title="Demanda",
)
fig.show()