In [1]:
import pandas as pd
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import grangercausalitytests

In [2]:
pvc_brl = pd.read_excel('PVC_BRL.xlsx', sheet_name='PVC_BRL')
pvc_usa = pd.read_excel('PVC_USA.xlsx', sheet_name='PVC_USA')

In [3]:
from Utils.Interpolate import Interpolate

pvc_brl['PVC BRL/tonne'] = pvc_brl['PVC BRL/tonne'].diff().dropna() 
pvc_usa['PVC BRL/tonne'] = pvc_usa['PVC BRL/tonne'].diff().dropna()

data_dict = {
    'pvc_usa': pvc_usa,
    'pvc_brl': pvc_brl,
}


for name in data_dict:
    data_dict[name] = data_dict[name].set_index("Date")

In [4]:
interp = Interpolate(data_dict)
df = interp.process_data()
df.fillna(method='bfill', inplace=True)
df = df.dropna()


In [5]:
df

Unnamed: 0_level_0,pvc_usa,pvc_brl
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-04-03,151.534000,-28.582
2000-04-10,151.534000,-28.582
2000-04-17,151.534000,-28.582
2000-04-24,151.534000,-28.582
2000-05-01,151.534000,-28.582
...,...,...
2024-01-01,-119.597000,12.968
2024-01-08,2.940258,-26.289
2024-01-15,125.477516,-202.763
2024-01-22,248.014774,-13.096


In [6]:
# Convert the dictionary of dataframes into a single dataframe for VAR model
#df_combined = pd.concat(data_dict, axis=1)

# Fit a VAR model
model = VAR(df)
results = model.fit(maxlags=1)

# Perform Granger Causality Test
granger_test_results = grangercausalitytests(df, maxlag=1, verbose=False)

# Check p-values to confirm Granger causality
p_values = {key: value[0]['ssr_ftest'][1] for key, value in granger_test_results.items()}
print(p_values)


{1: 9.88963056798517e-07}




In [13]:
# Forecasting
pvc_brl = pd.read_excel('PVC_BRL.xlsx', sheet_name='PVC_BRL')
pvc_usa = pd.read_excel('PVC_USA.xlsx', sheet_name='PVC_USA')
from Utils.Interpolate import Interpolate

data_dict = {
    'pvc_usa': pvc_usa,
    'pvc_brl': pvc_brl,
}

for name in data_dict:
    data_dict[name] = data_dict[name].set_index("Date")

interp = Interpolate(data_dict)
df = interp.process_data()
df.fillna(method='bfill', inplace=True)
df = df.dropna()


In [14]:
df

Unnamed: 0_level_0,pvc_usa,pvc_brl
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-04-03,1688.259267,1842.534
2000-04-10,1723.617200,1842.534
2000-04-17,1758.975133,1842.534
2000-04-24,1794.333067,1842.534
2000-05-01,1829.691000,1842.534
...,...,...
2024-01-01,7328.679000,5650.607
2024-01-08,7424.210484,5624.318
2024-01-15,7519.741968,5421.555
2024-01-22,7615.273452,5408.459


In [ ]:

#Fazer as previsões
lag_order = results.k_ar
forecasted_values = results.forecast(df.values[-lag_order:], steps=5)

# Convert forecasted values to a DataFrame
df_forecast = pd.DataFrame(forecasted_values, index=pd.date_range(start=df.index[-1], periods=5, freq='M'), columns=df.columns)

print(df_forecast)

In [ ]:
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'granger_test_results' is a dictionary with your Granger causality test results
# where the keys are the names of the tests and the values are the test statistics

# Create a DataFrame from the dictionary
df_results = pd.DataFrame.from_dict(granger_test_results, orient='index')

# Set up the matplotlib figure
plt.figure(figsize=(12, 8))

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(df_results, annot=True, fmt=".2f", linewidths=.5, cmap='coolwarm')

# Add labels and a title
plt.title('Granger Causality Test Results')
plt.xlabel('Test Statistics')
plt.ylabel('Lag')

# Show the plot
plt.show()
