## **1.0 Imports**

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.stats.diagnostic import acorr_ljungbox, het_arch, het_breuschpagan
from statsmodels.tsa.arima.model import ARIMA

ModuleNotFoundError: No module named 'statsmodels'

In [3]:
!pip install statsmodels



You should consider upgrading via the 'C:\Users\victo\Music\Projetos\Python\tcc\venv\Scripts\python.exe -m pip install --upgrade pip' command.


In [None]:
#pd.set_option('display.float_format', lambda x: '%.2f' % x)
def jupyter_settings():
    %matplotlib inline
    
    plt.style.use( 'ggplot')
    plt.rcParams['figure.figsize'] = [13, 6]
    plt.rcParams['font.size'] = 20
    
    #display( HTML( '<style>.container { width:100% !important; }</style>') )
    pd.options.display.max_columns = None
    pd.options.display.max_rows = None
    pd.set_option( 'display.expand_frame_repr', False )
    
    sns.set()
    
jupyter_settings()

##### **1.1 Load Data and Treatment**

In [None]:
data = pd.read_excel('./data/exchange_rate.xlsx')
data.shape

In [None]:
data.head(15)

##### **1.2 Rename columns and rows**

In [None]:
data = data.rename({'JBDCBRL Index': 'date', 'PX_LAST': 'value'}, axis=1)

##### **1.3 Data types**

In [None]:
data.dtypes

##### **1.4 Check NaN**

In [None]:
data.isna().sum()

##### **1.5 Numerical Analysis**

In [None]:
num_attributes = data.select_dtypes(include=['float'])

In [None]:
# Central tendency - mean, median
#mean
ct1 = pd.DataFrame(num_attributes.apply(np.mean)).T
#median
ct2 = pd.DataFrame(num_attributes.apply(np.median)).T

#Dispersion - desvio, minimo, maximo, range, skew, kutosi

d1 = pd.DataFrame(num_attributes.apply(np.std)).T

d2 = pd.DataFrame(num_attributes.apply(np.min)).T

d3 = pd.DataFrame(num_attributes.apply(np.max)).T

d4 = pd.DataFrame(num_attributes.apply(lambda x: x.max() - x.min())).T

d5 = pd.DataFrame(num_attributes.apply(lambda x : x.skew())).T

d6 = pd.DataFrame(num_attributes.apply(lambda x: x.kurtosis())).T

#concat 
h = pd.concat([d2,d3,d4,ct1,ct2,d1,d5,d6]).T.reset_index()
h.columns = ['attributes', 'min', 'max', 'range', 'mean', 'median', 'std', 'skew', 'kurtosis'] 

In [None]:
h

##### **2.0 Data Visualization**

In [None]:
df2 = data.copy()
df2 = df2.loc[(df2['date'] > '2008-01-01') & (df2['date'] < '2021-12-31')]
df2.head(10)

In [None]:
sns.lineplot(x = 'date', y='value', data=df2)
plt.title('JBDCBRL Index')
plt.ylabel('Price')
plt.xlabel('Data')

In [None]:
plot_acf(df2['value'], lags=60)

##### **2.1 ADF Test - In nivel**

In [None]:
def adf_test(timeseries):
    print("Results of Dickey-Fuller Test:")
    dftest = adfuller(timeseries, autolag="AIC")
    dfoutput = pd.Series(
        dftest[0:4],
        index=[
            "Test Statistic",
            "p-value",
            "#Lags Used",
            "Number of Observations Used",
        ],
    )
    for key, value in dftest[4].items():
        dfoutput["Critical Value (%s)" % key] = value
    print(dfoutput)

In [None]:
adf_test(df2['value'])

##### **2.2 KPSS - In nivel**

In [None]:
def kpss_test(timeseries):
    print("Results of KPSS Test:")
    kpsstest = kpss(timeseries, regression="c", nlags="auto")
    kpss_output = pd.Series(
        kpsstest[0:3], index=["Test Statistic", "p-value", "Lags Used"]
    )
    for key, value in kpsstest[3].items():
        kpss_output["Critical Value (%s)" % key] = value
    print(kpss_output)

In [None]:
kpss_test(df2['value'])

#### **3.0 Diferenciação e modelagem**

In [None]:
df3 = df2.copy()

In [None]:
#Diff
df3['diff_1'] = df3['value'].diff(periods=1)
df3['zero'] = 0

#Plot series in 1° diff
sns.lineplot(x = 'date', y='diff_1', data=df3)
sns.lineplot(x = 'date', y='zero', data=df3)

plt.title('JBDCBRL Index')
plt.xlabel('Data')


##### **3.1 Testes em 1° diferença**

In [None]:
df3 = df3.dropna()

In [None]:
adf_test(df3['diff_1'])

In [None]:
kpss_test(df3['diff_1'])

In [None]:
df3 = df3.dropna()

In [None]:
plot_pacf(df3['diff_1'])

In [None]:
acorr_ljungbox(df3['diff_1'], lags=[30], return_df = True)

In [None]:
plot_acf(df3['diff_1'], lags=35)

In [None]:
plot_pacf(df3['diff_1'], lags=60)

##### **3.2 Ajuste do modelo ARIMA**

In [None]:
model = ARIMA(df3['diff_1'], order=(0,1,1))
model_fit = model.fit()
print(model_fit.summary())

In [None]:
model = ARIMA(df3['diff_1'], order=(1,1,0))
model_fit = model.fit()
print(model_fit.summary())

In [None]:
model = ARIMA(df3['diff_1'], order=(1,1,1))
model_fit_111 = model.fit()
print(model_fit_111.summary())

##### **3.3 Análise dos resíduos**

In [None]:
cols = ['resid']
residuals = pd.DataFrame(columns = cols)

In [None]:
residuals['resid'] = model_fit_111.resid

In [None]:
residuals.head()

In [None]:
acorr_ljungbox(residuals['resid'], lags=[30], return_df=True)

In [None]:
# Correlogram squared
plot_pacf(residuals['resid'])

In [None]:
plot_acf(residuals['resid'])