In [13]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller


In [14]:
cpi_df = pd.read_csv('CPIAUCSL.csv')
unrate_df = pd.read_csv('UNRATE.csv')

cpi_df['DATE'] = pd.to_datetime(cpi_df['DATE'])
unrate_df['DATE'] = pd.to_datetime(unrate_df['DATE'])

merged_df = pd.merge(cpi_df, unrate_df, on='DATE', how='inner')

merged_df['CPI_RATE'] = merged_df['CPIAUCSL'].pct_change() * 100
merged_df = merged_df.dropna()

In [15]:
def identify_outliers_zscore(df, column, threshold=3):
    mean = df[column].mean()
    std = df[column].std()
    return df[np.abs((df[column] - mean) / std) > threshold].index

cpi_outliers = identify_outliers_zscore(merged_df, 'CPI_RATE')
unrate_outliers = identify_outliers_zscore(merged_df, 'UNRATE')

merged_df.loc[cpi_outliers, 'CPI_RATE'] = np.nan
merged_df.loc[unrate_outliers, 'UNRATE'] = np.nan

merged_df['CPI_RATE'] = merged_df['CPI_RATE'].interpolate()
merged_df['UNRATE'] = merged_df['UNRATE'].interpolate()

In [16]:

Y = merged_df['CPI_RATE']
X = merged_df['UNRATE']
X_with_const = sm.add_constant(X)


cointegration_model = sm.OLS(Y, X_with_const).fit()
print("Cointegration regression summary:")
print(cointegration_model.summary())

Cointegration regression summary:
                            OLS Regression Results                            
Dep. Variable:               CPI_RATE   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     4.825
Date:                Sat, 02 Nov 2024   Prob (F-statistic):             0.0283
Time:                        02:15:48   Log-Likelihood:                -172.07
No. Observations:                 918   AIC:                             348.1
Df Residuals:                     916   BIC:                             357.8
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.2

In [17]:
# extract residuals and test for stationarity (ADF test on residuals)
residuals = cointegration_model.resid
adf_test = adfuller(residuals)


print("\nADF Test on Residuals:")
print(f"ADF Statistic: {adf_test[0]}")
print(f"p-value: {adf_test[1]}")
print("Critical Values:", adf_test[4])


if adf_test[1] < 0.05:
    print("The residuals are stationary (reject the null of unit root). Y and X are likely cointegrated.")
else:
    print("The residuals are not stationary (do not reject the null of unit root). Y and X are not cointegrated.")


ADF Test on Residuals:
ADF Statistic: -3.723605127592001
p-value: 0.003793115547989496
Critical Values: {'1%': -3.43761244359048, '5%': -2.8647460220589736, '10%': -2.568477099382028}
The residuals are stationary (reject the null of unit root). Y and X are likely cointegrated.
