In [4]:
import pandas as pd

dataset = pd.read_csv('dataset/gold_dataset_monthly.csv')

In [5]:
dataset.head()

Unnamed: 0,ano,mes,preco_ouro,ipc,indice_nyse
0,2007,1,631.06,202.42,9132.09
1,2007,2,666.44,203.5,9346.47
2,2007,3,654.19,205.35,9120.57
3,2007,4,679.64,206.69,9555.98
4,2007,5,667.59,207.95,9822.99


In [7]:
dataset.describe()

Unnamed: 0,ano,preco_ouro,ipc,indice_nyse
count,16.0,16.0,16.0,16.0
mean,2014.5,1336.895,239.42625,10609.211875
std,4.760952,328.244801,22.635042,2890.463599
min,2007.0,697.4,207.34,6100.64
25%,2010.75,1210.4675,223.22,8025.205
50%,2014.5,1267.58,236.88,10523.5
75%,2018.25,1596.7275,252.2475,12631.53
max,2022.0,1801.92,292.65,16262.1


In [6]:
import statsmodels.api as sm
import numpy as np

# Adiciona constante (intercepto)
X = sm.add_constant(dataset[['ipc', 'indice_nyse']])
y = dataset['preco_ouro']

#y = np.log(dataset['preco_ouro'])

# Ajuste do modelo
model = sm.OLS(y, X).fit()

# Resumo completo
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:             preco_ouro   R-squared:                       0.582
Model:                            OLS   Adj. R-squared:                  0.578
Method:                 Least Squares   F-statistic:                     131.8
Date:                Thu, 11 Sep 2025   Prob (F-statistic):           1.43e-36
Time:                        13:08:03   Log-Likelihood:                -1301.4
No. Observations:                 192   AIC:                             2609.
Df Residuals:                     189   BIC:                             2619.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const       -2237.9571    258.867     -8.645      

In [None]:
from statsmodels.stats.diagnostic import het_breuschpagan

bp_test = het_breuschpagan(model.resid, model.model.exog)

# Resultados
labels = ['LM Stat', 'LM p-value', 'F-Stat', 'F p-value']
print(dict(zip(labels, bp_test)))

{'LM Stat': np.float64(40.594605892392394), 'LM p-value': np.float64(1.5310639574441887e-09), 'F-Stat': np.float64(25.337209941837372), 'F p-value': np.float64(1.7839853136886386e-10)}


In [15]:
dataset['ipc'].corr(dataset['indice_nyse'])

np.float64(0.9095396849841281)

In [32]:
robust_model = model.get_robustcov_results(cov_type='HC0')
print(robust_model.summary())

                            OLS Regression Results                            
Dep. Variable:             preco_ouro   R-squared:                       0.582
Model:                            OLS   Adj. R-squared:                  0.578
Method:                 Least Squares   F-statistic:                     96.36
Date:                Tue, 09 Sep 2025   Prob (F-statistic):           1.42e-29
Time:                        20:31:00   Log-Likelihood:                -1301.4
No. Observations:                 192   AIC:                             2609.
Df Residuals:                     189   BIC:                             2619.
Df Model:                           2                                         
Covariance Type:                  HC0                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const       -2237.9571    302.429     -7.400      

In [33]:
from statsmodels.stats.diagnostic import het_breuschpagan

bp_test = het_breuschpagan(robust_model.resid, robust_model.model.exog)

# Resultados
labels = ['LM Stat', 'LM p-value', 'F-Stat', 'F p-value']
print(dict(zip(labels, bp_test)))

{'LM Stat': np.float64(40.594605892392394), 'LM p-value': np.float64(1.5310639574441887e-09), 'F-Stat': np.float64(25.337209941837372), 'F p-value': np.float64(1.7839853136886386e-10)}


In [7]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

X_VIF = dataset[['ipc', 'indice_nyse']]
X_VIF = sm.add_constant(X_VIF)

vif_data = pd.DataFrame()
vif_data['Variavel'] = X_VIF.columns
vif_data['VIF'] = [variance_inflation_factor(X_VIF.values, i)
                   for i in range(X_VIF.shape[1])]

vif_data

Unnamed: 0,Variavel,VIF
0,const,280.201462
1,ipc,4.619637
2,indice_nyse,4.619637


In [4]:
import pandas as pd
import statsmodels.api as sm

# Criar defasagens (lags)
# Lag de 1 período no preço do ouro
dataset['preco_ouro_lag1'] = dataset['preco_ouro'].shift(1)

# Você pode adicionar lags das variáveis explicativas também, se quiser:
# dataset['ipc_lag1'] = dataset['ipc'].shift(1)
# dataset['indice_nyse_lag1'] = dataset['indice_nyse'].shift(1)

# Remover as linhas iniciais que ficaram com NaN por causa do shift
dataset_lagged = dataset.dropna()

# Definir X e y
X = dataset_lagged[['ipc', 'indice_nyse', 'preco_ouro_lag1']]
X = sm.add_constant(X)  # intercepto
y = dataset_lagged['preco_ouro']

# Ajustar o modelo OLS com lag
model = sm.OLS(y, X).fit()

# Resumo do modelo
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:             preco_ouro   R-squared:                       0.979
Model:                            OLS   Adj. R-squared:                  0.978
Method:                 Least Squares   F-statistic:                     2868.
Date:                Wed, 10 Sep 2025   Prob (F-statistic):          4.89e-156
Time:                        16:58:28   Log-Likelihood:                -1008.5
No. Observations:                 191   AIC:                             2025.
Df Residuals:                     187   BIC:                             2038.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
const             -13.6050     69.629     