In [1]:
import linearmodels as lm
import pandas as pd
import numpy as np

Collecting linearmodels
  Downloading linearmodels-5.1-cp39-cp39-win_amd64.whl (1.9 MB)
     ---------------------------------------- 1.9/1.9 MB 13.7 MB/s eta 0:00:00
Collecting mypy-extensions>=0.4 (from linearmodels)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)
Collecting pyhdfe>=0.1 (from linearmodels)
  Downloading pyhdfe-0.1.2-py3-none-any.whl (18 kB)
Collecting formulaic>=0.6.1 (from linearmodels)
  Downloading formulaic-0.6.2-py3-none-any.whl (82 kB)
     ---------------------------------------- 82.4/82.4 kB ? eta 0:00:00
Collecting setuptools-scm[toml]<8.0.0,>=7.0.0 (from linearmodels)
  Downloading setuptools_scm-7.1.0-py3-none-any.whl (43 kB)
     ---------------------------------------- 43.8/43.8 kB 2.2 MB/s eta 0:00:00
Collecting astor>=0.8 (from formulaic>=0.6.1->linearmodels)
  Using cached astor-0.8.1-py2.py3-none-any.whl (27 kB)
Collecting interface-meta>=1.2.0 (from formulaic>=0.6.1->linearmodels)
  Downloading interface_meta-1.3.0-py3-none-any.whl (14

In [None]:
# Put all the data in a single DataFrame
forecasts = pd.DataFrame(colnames=['forecast', 'actual', 'target', 'horizon', 'scaling', 'dim_red', 'regression'])

for target in ['inflation', 'ip_growth', 'unemployment']:
    for horizon in [1, 6, 12]:
        for scaling in ['none', 'regression', 'distance_correlation']:
            for dim_red in ['pca', 'rbf', 'ae_bayes']:
                for regression in ['ols', 'rf', 'svr']:
                    forecast = np.load('resources/results/forecasts_good/{}_{}_{}_{}_h{}.npy'.format(target, dim_red, scaling, regression, horizon))
                    actual = np.load('resources/results/forecasts_good/{}_actual_h{}.npy'.format(target, horizon))
                    forecasts = forecasts.append({'forecast': forecast, 'actual': actual, 'target': target, 'horizon': horizon, 'scaling': scaling, 'dim_red': dim_red, 'regression': regression}, ignore_index=True)

In [None]:
# Create a dummy variable for each combination of target, horizon, and time
for target in ['inflation', 'ip_growth', 'unemployment']:
    for horizon in [1, 6, 12]:
        forecasts['{}_h{}'.format(target, horizon)] = (forecasts['target'] == target) & (forecasts['horizon'] == horizon)

# Create a dummy variable for nonlinearity in scaling, dimensionality reduction, and regression
for scaling in ['none', 'regression', 'distance_correlation']:
    forecasts['scaling_nonlinearity'] = forecasts['scaling'] in [ 'distance_correlation']
for dim_red in ['pca', 'rbf', 'ae_bayes']:
    forecasts['dim_red_nonlinearity'] = forecasts['dim_red'] in ['rbf', 'ae_bayes']
for regression in ['ols', 'rf', 'svr']:
    forecasts['regression_nonlinearity'] = forecasts['regression'] in ['rf', 'svr']

In [None]:
def get_R2(forecast, actual):
    # Denominator is the mean squared error of actual - historical mean rolling forecast
    denom = np.mean((actual - np.mean(actual)) ** 2)

    # Numerator is the squared error of the forecast
    numer = (actual - forecast) ** 2

    # Return the R2
    temp =  -1 * (numer / denom)
    R2_vec = temp + 1

    return R2_vec

In [None]:
# Compute the R2 for each combination of target, horizon, and time
for target in ['inflation', 'ip_growth', 'unemployment']:
    for horizon in [1, 6, 12]:
        forecasts['{}_h{}_R2'.format(target, horizon)] = get_R2(forecasts['forecast'], forecasts['actual'])

In [None]:
# Run fixed effects regressions
