In [2]:
# General libraries
# import jax
import sys


import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm, multivariate_normal
from IPython.display import clear_output

# Own code
sys.path.append("../")
from utils.data_utils import create_data, create_dgp_data, transformation, standardize
from utils.tvp_models import TVPVARModel

# Suppress scientific notation in numpy
np.set_printoptions(suppress=True)

# Set RNG
np.random.seed(12345)

In [3]:
ds = pd.read_csv("data/fred_qd.csv")

train = 181

In [4]:
transform = True

gdp = transformation(ds["GDPC1"].iloc[2:].to_numpy(), 5, transform)
cpi = transformation(ds["CPIAUCSL"].iloc[2:].to_numpy(), 6, transform)
fedfund = transformation(ds["FEDFUNDS"].iloc[2:].to_numpy(), 2, transform)
compi = transformation(ds["PPIACO"].iloc[2:].to_numpy(), 6, transform)
borrowings = transformation(ds["TOTRESNS"].iloc[2:].to_numpy(), 6, transform)
sp500 = transformation(ds["S&P 500"].iloc[2:].to_numpy(), 5, transform)
m2 = transformation(ds["M2REAL"].iloc[2:].to_numpy(), 5, transform)

# series_total = [gdp, cpi, fedfund]
series_total = [gdp, cpi, fedfund, compi, borrowings , sp500, m2]

In [5]:
#standardized_series = standardize(series_total, train)
X = np.array(series_total).T
X_train = X[:train]

In [6]:
import statsmodels.api as sm
from statsmodels.tsa.api import VAR

var_ols = VAR(X_train)

In [89]:
var_results = var_ols.fit(4)

In [94]:
forecasting_results = np.zeros((62, len(series_total), 8))

for i in range(62):
    var_ols = VAR(X[:(train+i)])
    var_results = var_ols.fit(4)
    
    forecasting_results[i] = var_results.forecast(X[:train+i], 8).T

In [95]:
def calculate_metrics_default(y_pred, y_true, total_h):
    
    msfe = np.zeros(total_h)
    alpl = np.zeros(total_h)
    number_of_predictions = len(y_pred)
    
    for h in range(total_h):
        
        lpl = np.zeros(number_of_predictions-h)

        if h == 0:
            y_true_h = y_true
            y_pred_h = y_pred[:, :, 0]

            msfe[h] = np.mean((y_pred_h - y_true_h) ** 2)
            for t in range(number_of_predictions):
                lpl[t] = multivariate_normal.pdf(y_true_h[t], y_pred_h[t], cov=np.cov(y_pred_h.T), allow_singular=True)

            alpl[h] = lpl.mean()

        else:
            y_true_h = y_true[h:]
            y_pred_h = y_pred[:-h, :, h]

            msfe[h] = np.mean((y_pred_h - y_true_h) ** 2)
            
            for t in range(number_of_predictions-h):
                lpl[t] = multivariate_normal.pdf(y_true_h[t], y_pred_h[t], cov=np.cov(y_pred_h.T), allow_singular=True)

            alpl[h] = lpl.mean()
    return msfe, alpl

In [96]:
msfe_var_ols, alpl_var_ols = calculate_metrics_default(forecasting_results, X[train:-2], 8)

print(msfe_var_ols)

[  0.09375606   0.3845755    1.18235727   3.88065218  13.30131582
  46.09510715 158.95827955 548.489638  ]


In [266]:
df_for_csv = pd.DataFrame(ds[["GDPC1", "CPIAUCSL", "FEDFUNDS", "PPIACO", "TOTRESNS", "S&P 500", "M2REAL"]].iloc[2:])

In [268]:
df_for_csv.set_index(pd.date_range('03-01-1959', '06-01-2020', freq='Q'), inplace=True)
#df_for_csv.rename(columns={0: 'GDP', 1: 'CPI', 2: 'FEDFUNDS', 3: 'COMPI', 4: 'BORROWINGS', 5: 'SP500', 6: 'M2'}, inplace=True)

In [271]:
df_for_csv.to_csv('data/normal_7_fred.csv')

In [272]:
df_for_csv

Unnamed: 0,GDPC1,CPIAUCSL,FEDFUNDS,PPIACO,TOTRESNS,S&P 500,M2REAL
1959-03-31,3121.936,28.9933,2.5700,31.7000,18.6300,55.5167,992.7667
1959-06-30,3192.380,29.0433,3.0833,31.7667,18.5613,57.5067,1005.8333
1959-09-30,3194.653,29.1933,3.5767,31.6667,18.6320,58.7300,1014.3000
1959-12-31,3203.759,29.3700,3.9900,31.5333,18.7280,57.7633,1011.7000
1960-03-31,3275.757,29.3967,3.9333,31.6667,18.3640,56.2767,1016.1000
...,...,...,...,...,...,...,...
2019-03-31,18927.281,253.2753,2.4033,199.7000,1715.6257,2722.0767,5713.2667
2019-06-30,19021.860,255.1707,2.3967,201.3667,1600.5073,2882.8933,5746.7333
2019-09-30,19121.112,256.3247,2.1900,199.4333,1556.7547,2958.5900,5827.4667
2019-12-31,19221.970,257.8323,1.6433,198.8667,1613.5673,3086.4433,5907.8333


Unnamed: 0,GDPC1,CPIAUCSL,FEDFUNDS,PPIACO,TOTRESNS,S&P 500,M2REAL
2,3121.936,28.9933,2.5700,31.7000,18.6300,55.5167,992.7667
3,3192.380,29.0433,3.0833,31.7667,18.5613,57.5067,1005.8333
4,3194.653,29.1933,3.5767,31.6667,18.6320,58.7300,1014.3000
5,3203.759,29.3700,3.9900,31.5333,18.7280,57.7633,1011.7000
6,3275.757,29.3967,3.9333,31.6667,18.3640,56.2767,1016.1000
...,...,...,...,...,...,...,...
242,18927.281,253.2753,2.4033,199.7000,1715.6257,2722.0767,5713.2667
243,19021.860,255.1707,2.3967,201.3667,1600.5073,2882.8933,5746.7333
244,19121.112,256.3247,2.1900,199.4333,1556.7547,2958.5900,5827.4667
245,19221.970,257.8323,1.6433,198.8667,1613.5673,3086.4433,5907.8333


In [22]:
pd.date_range('09-01-1960', '06-01-2020', freq='Q')[178]

Timestamp('2005-03-31 00:00:00', freq='Q-DEC')