### Analysis using SARIMAX model for Time Series

In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.dates as mdates
import warnings

from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf

In [None]:
# Ignoring warnings
warnings.filterwarnings("ignore")

# Matplotlib styles
plt.style.use('ggplot')
plt.rcParams.update({
    'figure.figsize': (15, 4),
    'axes.prop_cycle': plt.cycler(color=["#4C72B0", "#C44E52", "#55A868", "#8172B2", "#CCB974", "#64B5CD"]),
    'axes.facecolor': "#EAEAF2"
})
%matplotlib inline

#### Importing already collected data

In [None]:
df_daily = pd.read_csv("./datasets/proc/daily_variables.csv.zip", index_col = 0, parse_dates=["date"])
df_monthly = pd.read_csv("./datasets/proc/monthly_variables.csv.zip", index_col = 0, parse_dates=["date"])

#### Preprocessing for the analysis

In [None]:
# Scaling values
df_daily[df_daily.set_index('date').columns] = MinMaxScaler().fit_transform(df_daily[df_daily.set_index('date').columns])
df_monthly[df_monthly.set_index('date').columns] = MinMaxScaler().fit_transform(df_monthly[df_monthly.set_index('date').columns])

In [None]:
figure, axs = plt.subplots(7,2,figsize=(15,30))

for i in range(7):
    current_column = df_daily.set_index("date").columns[i]
    data = np.asarray(df_monthly[current_column])
    data_disl = data[1:]
    data_trunc = data[:-1]
    data_diff = data_disl - data_trunc
    plot_acf(data, lags=100, ax=axs[i,0], title=f"Autocorrelation for {current_column}")
    plot_acf(data_diff, lags=100, ax=axs[i,1], title=f"Autocorrelation for differentiated {current_column}")

plt.show()

In [None]:
figure, axs = plt.subplots(7,2,figsize=(15,30))

for i in range(7):
    current_column = df_daily.set_index("date").columns[i]
    data = np.asarray(df_monthly[current_column])
    data_disl = data[1:]
    data_trunc = data[:-1]
    data_diff = data_disl - data_trunc
    plot_pacf(data, lags=77, ax=axs[i,0], title=f"PACF {current_column}")
    plot_pacf(data_diff, lags=77, ax=axs[i,1], title=f"PACF differentiated {current_column}")

plt.show()

In [None]:
figure, axs = plt.subplots(7,1,figsize=(15,45))

for i in range(7):
    current_column = df_daily.set_index("date").columns[i]
    axs[i].plot(df_monthly["date"], df_monthly["SELIC"], label="SELIC")
    axs[i].plot(df_monthly["date"], df_monthly[current_column], label=f"{current_column}")
    axs[i].set_title(f"SELIC and {current_column}")
    axs[i].legend()
    axs[i].tick_params(axis='x', labelrotation=45)
    axs[i].xaxis.set_major_locator(mdates.MonthLocator(interval=6))
    axs[i].xaxis.set_major_formatter(mdates.DateFormatter("%m-%Y"))
plt.show()

### Performing Time Series Analysis

### ICON

In [None]:
ps = range(3)
qs = range(3)

ps_season = range(3)
qs_season = range(3)

minAIC, best_parameters, best_model = (float('inf'), (-10,-10), None) 

for p,q,p_season,q_season in [(i, j, k, l) for i in ps for j in qs for k in ps_season for l in qs_season]:
    
    model = sm.tsa.statespace.SARIMAX(df_monthly["ICON"], df_monthly[["DOLLAR", "GDP", "IPCA", "SELIC", "VIX"]], order=(p, 1, q), seasonal_order=(
    p_season, 1, q_season, 12), trend='t', enforce_inversibility=False, enforce_stationarity=False, simple_differencing=True)
    result = model.fit()
    print(f"\rFor parameters ({p},1,{q})x({p},1,{q},12) -> AIC: {result.aic}", end="")
    if result.aic < minAIC:
        minAIC = result.aic
        best_parameters = (p,q)
        best_model = result

print()
print(best_model.summary())

### IEEX

In [None]:
ps = range(3)
qs = range(3)

ps_season = range(3)
qs_season = range(3)

minAIC, best_parameters, best_model = (float('inf'), (-10,-10), None) 

for p,q,p_season,q_season in [(i, j, k, l) for i in ps for j in qs for k in ps_season for l in qs_season]:
    
    model = sm.tsa.statespace.SARIMAX(df_monthly["IEEX"], df_monthly[["DOLLAR", "GDP", "IPCA", "SELIC", "VIX"]], order=(p, 1, q), seasonal_order=(
    p_season, 1, q_season, 12), trend='t', enforce_inversibility=False, enforce_stationarity=False, simple_differencing=True)
    result = model.fit()
    print(f"\rFor parameters ({p},1,{q})x({p},1,{q},12) -> AIC: {result.aic}", end="")
    if result.aic < minAIC:
        minAIC = result.aic
        best_parameters = (p,q)
        best_model = result

print()
print(best_model.summary())

### IFNC

In [None]:
ps = range(3)
qs = range(3)

ps_season = range(3)
qs_season = range(3)

minAIC, best_parameters, best_model = (float('inf'), (-10,-10), None) 

for p,q,p_season,q_season in [(i, j, k, l) for i in ps for j in qs for k in ps_season for l in qs_season]:
    
    model = sm.tsa.statespace.SARIMAX(df_monthly["IFNC"], df_monthly[["DOLLAR", "GDP", "IPCA", "SELIC", "VIX"]], order=(p, 1, q), seasonal_order=(
    p_season, 1, q_season, 12), trend='t', enforce_inversibility=False, enforce_stationarity=False, simple_differencing=True)
    result = model.fit()
    print(f"\rFor parameters ({p},1,{q})x({p},1,{q},12) -> AIC: {result.aic}", end="")
    if result.aic < minAIC:
        minAIC = result.aic
        best_parameters = (p,q)
        best_model = result

print()
print(best_model.summary())

### IMAT

In [None]:
ps = range(3)
qs = range(3)

ps_season = range(3)
qs_season = range(3)

minAIC, best_parameters, best_model = (float('inf'), (-10,-10), None) 

for p,q,p_season,q_season in [(i, j, k, l) for i in ps for j in qs for k in ps_season for l in qs_season]:
    
    model = sm.tsa.statespace.SARIMAX(df_monthly["IMAT"], df_monthly[["DOLLAR", "GDP", "IPCA", "SELIC", "VIX"]], order=(p, 1, q), seasonal_order=(
    p_season, 1, q_season, 12), trend='t', enforce_inversibility=False, enforce_stationarity=False, simple_differencing=True)
    result = model.fit()
    print(f"\rFor parameters ({p},1,{q})x({p},1,{q},12) -> AIC: {result.aic}", end="")
    if result.aic < minAIC:
        minAIC = result.aic
        best_parameters = (p,q)
        best_model = result

print()
print(best_model.summary())

### IMOB

In [None]:
ps = range(3)
qs = range(3)

ps_season = range(3)
qs_season = range(3)

minAIC, best_parameters, best_model = (float('inf'), (-10,-10), None) 

for p,q,p_season,q_season in [(i, j, k, l) for i in ps for j in qs for k in ps_season for l in qs_season]:
    
    model = sm.tsa.statespace.SARIMAX(df_monthly["IMOB"], df_monthly[["DOLLAR", "GDP", "IPCA", "SELIC", "VIX"]], order=(p, 1, q), seasonal_order=(
    p_season, 1, q_season, 12), trend='t', enforce_inversibility=False, enforce_stationarity=False, simple_differencing=True)
    result = model.fit()
    print(f"\rFor parameters ({p},1,{q})x({p},1,{q},12) -> AIC: {result.aic}", end="")
    if result.aic < minAIC:
        minAIC = result.aic
        best_parameters = (p,q)
        best_model = result

print()
print(best_model.summary())

### INDX

In [None]:
ps = range(3)
qs = range(3)

ps_season = range(3)
qs_season = range(3)

minAIC, best_parameters, best_model = (float('inf'), (-10,-10), None) 

for p,q,p_season,q_season in [(i, j, k, l) for i in ps for j in qs for k in ps_season for l in qs_season]:
    
    model = sm.tsa.statespace.SARIMAX(df_monthly["INDX"], df_monthly[["DOLLAR", "GDP", "IPCA", "SELIC", "VIX"]], order=(p, 1, q), seasonal_order=(
    p_season, 1, q_season, 12), trend='t', enforce_inversibility=False, enforce_stationarity=False, simple_differencing=True)
    result = model.fit()
    print(f"\rFor parameters ({p},1,{q})x({p},1,{q},12) -> AIC: {result.aic}", end="")
    if result.aic < minAIC:
        minAIC = result.aic
        best_parameters = (p,q)
        best_model = result

print()
print(best_model.summary())

### UTIL

In [None]:
ps = range(3)
qs = range(3)

ps_season = range(3)
qs_season = range(3)

minAIC, best_parameters, best_model = (float('inf'), (-10,-10), None) 

for p,q,p_season,q_season in [(i, j, k, l) for i in ps for j in qs for k in ps_season for l in qs_season]:
    
    model = sm.tsa.statespace.SARIMAX(df_monthly["UTIL"], df_monthly[["DOLLAR", "GDP", "IPCA", "SELIC", "VIX"]], order=(p, 1, q), seasonal_order=(
    p_season, 1, q_season, 12), trend='t', enforce_inversibility=False, enforce_stationarity=False, simple_differencing=True)
    result = model.fit()
    print(f"\rFor parameters ({p},1,{q})x({p},1,{q},12) -> AIC: {result.aic}", end="")
    if result.aic < minAIC:
        minAIC = result.aic
        best_parameters = (p,q)
        best_model = result

print()
print(best_model.summary())