In [74]:
import pandas as pd
import numpy as np
import statsmodels.api as sm


In [75]:
cpi_df = pd.read_csv('CPIAUCSL.csv')
unrate_df = pd.read_csv('UNRATE.csv')

cpi_df['DATE'] = pd.to_datetime(cpi_df['DATE'])
unrate_df['DATE'] = pd.to_datetime(unrate_df['DATE'])

merged_df = pd.merge(cpi_df, unrate_df, on='DATE', how='inner')

merged_df['CPI_RATE'] = merged_df['CPIAUCSL'].pct_change() * 100
merged_df = merged_df.dropna()


In [76]:
def identify_outliers_zscore(df, column, threshold=3):
    mean = df[column].mean()
    std = df[column].std()
    return df[np.abs((df[column] - mean) / std) > threshold].index

cpi_outliers = identify_outliers_zscore(merged_df, 'CPI_RATE')
unrate_outliers = identify_outliers_zscore(merged_df, 'UNRATE')

merged_df.loc[cpi_outliers, 'CPI_RATE'] = np.nan
merged_df.loc[unrate_outliers, 'UNRATE'] = np.nan

merged_df['CPI_RATE'] = merged_df['CPI_RATE'].interpolate()
merged_df['UNRATE'] = merged_df['UNRATE'].interpolate()

In [77]:
p=4

data = pd.DataFrame({
    'Y': merged_df['CPI_RATE'],
    'X': merged_df['UNRATE']
})

for i in range(1, p+1):
    data[f'Y_lag{i}'] = data['Y'].shift(i)
    data[f'X_lag{i}'] = data['X'].shift(i)

data = data.dropna()

In [78]:
Y = data['Y']
X = data[['X'] + [f'Y_lag{i}' for i in range(1, p+1)] + [f'X_lag{i}' for i in range(1, p+1)]]

#fit the ADL(p, p) model
adl_model = sm.OLS(Y, sm.add_constant(X)).fit()

In [79]:
iterative_forecasts = []
current_data = data.iloc[-1].copy()  # last known values as starting point

print("Iterative forecasts for each period up to 10:")

for i in range(10):
    # predict next value
    forecast = adl_model.predict(sm.add_constant(current_data.values.reshape(1, -1)))[0]
    iterative_forecasts.append(forecast)
    
    
    print(f"Forecast for period {i+1}: {forecast}")

    #shift lags for next iteration
    for lag in range(p, 1, -1):
        current_data[f'Y_lag{lag}'] = current_data[f'Y_lag{lag - 1}']
        current_data[f'X_lag{lag}'] = current_data[f'X_lag{lag - 1}']
    current_data['Y_lag1'] = forecast
    current_data['X_lag1'] = current_data['X']

Iterative forecasts for each period up to 10:
Forecast for period 1: 0.6438940684869734
Forecast for period 2: 0.9742853510259678
Forecast for period 3: 1.2376939640147266
Forecast for period 4: 1.4815386494497025
Forecast for period 5: 1.6175285622769646
Forecast for period 6: 1.7058627854608908
Forecast for period 7: 1.7665282571790932
Forecast for period 8: 1.7977701885929511
Forecast for period 9: 1.8164323411463004
Forecast for period 10: 1.8275743586728377


In [82]:
direct_forecasts = []

forecast_horizon = 10 

for i in range(1, forecast_horizon + 1):
    #shift data to predict for each horizon directly
    shifted_data = data.iloc[-1:].copy()  # use last row as base
    for lag in range(1, p + 1):
        if lag <= i:
            # shift the 'Y' and 'X' lags forward by `i` steps
            shifted_data[f'Y_lag{lag}'] = shifted_data['Y'].values[0]  
            shifted_data[f'X_lag{lag}'] = shifted_data['X'].values[0]
    
   
    forecast = adl_model.predict(sm.add_constant(shifted_data.values.reshape(1, -1)))[0]
    direct_forecasts.append(forecast)


for i, forecast in enumerate(direct_forecasts, start=1):
    print(f"Direct forecast for period {i}: {forecast}")


Direct forecast for period 1: 0.7597756225694081
Direct forecast for period 2: 0.8123719550266331
Direct forecast for period 3: 0.7575013636679808
Direct forecast for period 4: 0.827096576709365
Direct forecast for period 5: 0.827096576709365
Direct forecast for period 6: 0.827096576709365
Direct forecast for period 7: 0.827096576709365
Direct forecast for period 8: 0.827096576709365
Direct forecast for period 9: 0.827096576709365
Direct forecast for period 10: 0.827096576709365
