In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import acf, pacf
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None  # default='warn'

# Exploración de Datos

In [None]:
dfToUse = "consumo"

def returnQuantRows(dfToUse):
    if (dfToUse == 'consumo'):
        return 269 
    return 257

toUse = returnQuantRows(dfToUse)

In [None]:
df = pd.read_excel(dfToUse+'.xlsx', engine='openpyxl')
df = df[['Fecha', 'Gasolina superior', 'Gasolina regular', 'Diesel']]

In [None]:
df = df[:257]
df['Fecha'] = pd.to_datetime(df['Fecha'])

In [None]:
df

In [None]:
quant_vars = ['Gasolina superior', 'Gasolina regular', 'Diesel']
df[quant_vars].astype(float).describe()

In [None]:
for var in quant_vars:
    data = df[var].dropna(how='all', axis=0)
    
    # Gráfico
    sns.displot(data, kde=True)
    print('\033[1m' + var + '\033[0m' + ': Kurtosis:', stats.kurtosis(data), 'Skewness:', stats.skew(data), '\n')


# Por año

In [None]:
plt.rcParams["figure.figsize"] = (20,5.5)

for gas in quant_vars:
    x = df['Fecha']
    y = df[gas]

    plt.title(dfToUse+" 2000-2022")
    # beautify the x-labels
    plt.gcf().autofmt_xdate()
    plt.xlabel(gas)


    plt.plot(x, y)
    plt.show()

# Por mes

In [None]:
dfPerMonth = df.groupby(df['Fecha'].dt.month)
dfPerMonth = dfPerMonth.sum()

In [None]:
plt.rcParams["figure.figsize"] = (20,5.5)

for gas in quant_vars:
    x = dfPerMonth.index
    y = dfPerMonth[gas]


    plt.title(dfToUse+" por mes")
    # beautify the x-labels
    plt.gcf().autofmt_xdate()
    plt.xlabel(gas)


    plt.bar(x, y)
    plt.show()

# Comportamiento en la pandemia

In [None]:
plt.rcParams["figure.figsize"] = (20,5.5)
quant_to_have = 12

for gas in quant_vars:
    x = df['Fecha'].tail(quant_to_have * 3)
    y = df[gas].tail(quant_to_have * 3)

    plt.title(dfToUse+" durante pandemia")
    # beautify the x-labels
    plt.gcf().autofmt_xdate()
    plt.xlabel(gas)


    plt.plot(x, y)
    plt.show()

# Separando test y train

In [None]:
rows = len(df)
train_df = df[0:rows-17]
test_df = df[rows-17:]
print(len(train_df), len(test_df))

## Pasos para construir modelos ARIMA
1. Identificación
2. Estimación
3. Validación
4. Predicción

In [None]:
def make_timeline(column):
  plt.rcParams["figure.figsize"] = (20,5.5)
  mediaGasoline = train_df[column].rolling(window=12).mean()
  deGasoline = train_df[column].rolling(window=12).std()

  original = plt.plot(train_df[column], color="blue", label="Original")
  media = plt.plot(mediaGasoline, color='red', label = 'Media ' + dfToUse)
  ds = plt.plot(deGasoline, color='black', label = 'Desviación Estándar ' + dfToUse)
  plt.legend(loc = 'best')
  plt.title('Media y desviación estándar ' + column)
  plt.show(block=False)

In [None]:
make_timeline('Gasolina regular')

In [None]:
make_timeline('Gasolina superior')

In [None]:
make_timeline('Diesel')

In [None]:
train_regular = train_df[['Fecha', 'Gasolina regular']]
train_superior = train_df[['Fecha', 'Gasolina superior']]
train_diesel = train_df[['Fecha', 'Diesel']]

In [None]:
# Gasolina regular
train_regular['Gasolina regular'] = train_regular['Gasolina regular'].astype(float)
train_regular_indexed = train_regular.set_index(['Fecha'])
# Gasolina superior
train_superior['Gasolina superior'] = train_superior['Gasolina superior'].astype(float)
train_superior_indexed = train_superior.set_index(['Fecha'])
# Gasolina diesel
train_diesel['Diesel'] = train_diesel['Diesel'].astype(float)
train_diesel_indexed = train_diesel.set_index(['Fecha'])

In [None]:
descomposicion = seasonal_decompose(train_regular_indexed)
descomposicion.plot()

In [None]:
descomposicion = seasonal_decompose(train_superior_indexed)
descomposicion.plot()

In [None]:
descomposicion = seasonal_decompose(train_diesel_indexed)
descomposicion.plot()