In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from tqdm import tqdm

from datetime import datetime

from pycaret.time_series import *

import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [10, 5]


## Para sacar los componentes de la serie temporal
################################################################
from statsmodels.tsa.seasonal import seasonal_decompose


## Para calcular la estacionaridad de nuestras series temporales
################################################################
from statsmodels.tsa.stattools import adfuller

## Para calcular la autocorrelación en la serie temporal
################################################################
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf

## Para ajustar los modelos predictivos
################################################################
from statsmodels.tsa.arima.model import ARIMA
from itertools import product

## Para las metricas del modelo
################################################################
from sklearn.metrics import mean_squared_error

## Para ignorar los warnings
################################################################
import warnings
warnings.filterwarnings("ignore")

import pickle


In [2]:
df = pd.read_csv("../data/scrap/demanda_tiempo_real.csv")
df.drop(["percentage_Demanda real", "percentage_Demanda programada", "percentage_Demanda prevista"], axis = 1, inplace = True)
df["datetime"] = pd.to_datetime(df["datetime"], utc = True, format = '%Y-%m-%d %H:%M:%S%z')
df["drop"] = df["datetime"].where(df["datetime"].dt.minute % 10 == 0, other= "drop")
df = df[df["drop"] != "drop"]
df.drop(["drop"], inplace = True, axis = 1)
df = df[["datetime", "value_Demanda real"]].rename(columns = {"value_Demanda real" : "demanda_real"})

In [3]:
df["datetime"] = pd.date_range(datetime(2013, 12, 31, hour = 23, minute=0), periods = 486228, freq = '10min')

In [4]:
df.head()

Unnamed: 0,datetime,demanda_real
0,2013-12-31 23:00:00,24546
1,2013-12-31 23:10:00,24309
2,2013-12-31 23:20:00,24348
3,2013-12-31 23:30:00,24321
4,2013-12-31 23:40:00,24194


In [5]:
df.index = pd.to_datetime(df["datetime"])

In [6]:
df.index.freq = pd.infer_freq(df.index)
df.index

DatetimeIndex(['2013-12-31 23:00:00', '2013-12-31 23:10:00',
               '2013-12-31 23:20:00', '2013-12-31 23:30:00',
               '2013-12-31 23:40:00', '2013-12-31 23:50:00',
               '2014-01-01 00:00:00', '2014-01-01 00:10:00',
               '2014-01-01 00:20:00', '2014-01-01 00:30:00',
               ...
               '2023-03-31 11:20:00', '2023-03-31 11:30:00',
               '2023-03-31 11:40:00', '2023-03-31 11:50:00',
               '2023-03-31 12:00:00', '2023-03-31 12:10:00',
               '2023-03-31 12:20:00', '2023-03-31 12:30:00',
               '2023-03-31 12:40:00', '2023-03-31 12:50:00'],
              dtype='datetime64[ns]', name='datetime', length=486228, freq='10T')

In [7]:
df.drop(columns=['datetime'], inplace=True)
df.head()

Unnamed: 0_level_0,demanda_real
datetime,Unnamed: 1_level_1
2013-12-31 23:00:00,24546
2013-12-31 23:10:00,24309
2013-12-31 23:20:00,24348
2013-12-31 23:30:00,24321
2013-12-31 23:40:00,24194


In [8]:
df.isnull().sum()

demanda_real    0
dtype: int64

In [9]:
df.columns = ["valores"]

In [10]:
!pip install pycaret

In [None]:
from pycaret.time_series import *
s = setup(data, fh = 3, fold = 5, session_id = 123)

In [None]:
#EstacionaLidad
df2 = df.copy()
df2.head()

In [None]:
df2["year"] = df2.index.year
df2["month"] = df2.index.month
df2.head()

In [None]:
fig, ax = plt.subplots(figsize=(15, 6))

sns.lineplot(x = 'month',  y = 'valores', hue=df2['year'], data = df2)
ax.set_title('Estacionalidad temperatura', fontsize = 20, loc='center')
ax.set_xlabel('Mes')
ax.set_ylabel('Evolución de la temperatura');

In [None]:
# EstacionaRidad
df.plot(figsize=(15,6))
plt.title("Evolución de la temperatura")
plt.axhline(df.valores.mean(), c="g", label="mean")
plt.legend();

In [None]:
plt.figure(figsize=(15,6))
plt.title("Evolución de la temperatura")

plt.plot(df["valores"], label="AvgTemp", linewidth=8, c = "skyblue")
plt.plot(df.rolling(window = 12).mean(), label="rolling window", c= "r", linewidth = 0.5)

plt.legend();

In [None]:
res_ad = adfuller(df["valores"])
res_ad

In [None]:
print('ADF Statistic:', res_ad[0])
print('p-value:', res_ad[1])
print('Critical Values:')
for key, value in res_ad[4].items():
    print( "\t",  key, round(value, 2))

In [None]:
auto = [90, 180]
partial = [14, 40]

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(15,10))
plot_acf(df, lags=200, ax= axes[0])
plot_pacf(df, lags=40, ax= axes[1]);

In [None]:
y_test = df["valores"][-30:]
y_train = df["valores"][:-30]

In [None]:
ps = range(90)
qs = range(14)

parameters = list(product(ps, qs))
print(parameters)

In [None]:
%%time
rmse = []
order = []
for p,q in tqdm(parameters):
    try:
        modelo=ARIMA(y_train, order=(p, 0, q)).fit()
        pred = modelo.predict(start=len(y_train), end=len(df) -1)
        error = np.sqrt(np.mean(mean_squared_error(y_test, pred)))
        rmse.append(error)
        order.append((p,q))
    except: 
        continue