In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load data

In [2]:
df = pd.read_csv("datasets/Macro1.csv")

In [3]:
df = df.drop(0)
df['sasdate'] = pd.to_datetime(df['sasdate'])
df.head(3)

Unnamed: 0,sasdate,RPI,W875RX1,DPCERA3M086SBEA,CMRMTSPLx,RETAILx,INDPRO,IPFPNSS,IPFINAL,IPCONGD,...,DSERRG3M086SBEA,CES0600000008,CES2000000008,CES3000000008,UMCSENTx,MZMSL,DTCOLNVHFNM,DTCTHFNM,INVEST,VXOCLSx
1,1959-01-01,2289.8,2151.9,18.191,253747.578885,18234.44037,21.9289,21.5499,20.9407,28.483,...,12.133,2.13,2.45,2.04,,274.9,6476.0,12298.0,84.2,
2,1959-02-01,2299.6,2160.2,18.38,255653.461901,18368.21974,22.3584,21.8408,21.1221,28.6919,...,12.149,2.13,2.46,2.05,,276.0,6476.0,12298.0,83.5,
3,1959-03-01,2314.4,2176.1,18.555,254743.765035,18521.70306,22.6805,21.973,21.2257,28.6919,...,12.169,2.15,2.45,2.07,,277.4,6508.0,12349.0,81.6,


In [4]:
print(f'There are {df.isna().sum().sum()} missing values in the dataset'
      f'\nThis represents {round(100*df.isna().sum().sum() / (df.shape[0]*df.shape[1]), 3)}% of the whole dataset.')

There are 944 missing values in the dataset
This represents 1.108% of the whole dataset.


# Preprocessing

### Filling in missing values

In [5]:
from sklearn.impute import KNNImputer

In [6]:
imputer = KNNImputer()
df_filled = imputer.fit_transform(df.drop(columns=['sasdate']))

In [7]:
df[list(df.columns)[1:]] = df_filled

In [8]:
print(f'There are now {df.isna().sum().sum()} missing values in the dataset.')

There are now 0 missing values in the dataset.


### Scale data

In [9]:
from sklearn.preprocessing import StandardScaler

In [10]:
scaler = StandardScaler()
scaled = scaler.fit_transform(df.drop(columns=['sasdate']))

In [11]:
df.head(3)

Unnamed: 0,sasdate,RPI,W875RX1,DPCERA3M086SBEA,CMRMTSPLx,RETAILx,INDPRO,IPFPNSS,IPFINAL,IPCONGD,...,DSERRG3M086SBEA,CES0600000008,CES2000000008,CES3000000008,UMCSENTx,MZMSL,DTCOLNVHFNM,DTCTHFNM,INVEST,VXOCLSx
1,1959-01-01,2289.8,2151.9,18.191,253747.578885,18234.44037,21.9289,21.5499,20.9407,28.483,...,12.133,2.13,2.45,2.04,94.42,274.9,6476.0,12298.0,84.2,16.6388
2,1959-02-01,2299.6,2160.2,18.38,255653.461901,18368.21974,22.3584,21.8408,21.1221,28.6919,...,12.149,2.13,2.46,2.05,95.5,276.0,6476.0,12298.0,83.5,16.6388
3,1959-03-01,2314.4,2176.1,18.555,254743.765035,18521.70306,22.6805,21.973,21.2257,28.6919,...,12.169,2.15,2.45,2.07,94.42,277.4,6508.0,12349.0,81.6,16.6388


# Gibbs Sampler

### Import and define function

In [12]:
%run GibbsSampler.ipynb

Nb CPU cores: 12


In [13]:
l = 0
T = 200

In [14]:
a, b, A, B = 1, 1, 1, 1

In [15]:
def draw_initial_parameters(X, Y, Ry, s, a=a, b=b, A=A, B=B, T=T):
    q = draw_initial_q(a=a, b=b)
    r2 = draw_initial_R2(A=A, B=B)
    k = X.shape[1]
    beta, z = draw_initial_beta(s, k=k)
    sigma_squared = draw_inital_sigma_squared(X, beta, Ry, T=T)
    v_x_bar = (1/k) * sum([np.var(x) for x in X])
    gamma2 = compute_gamma2(X, r2, q, v_x_bar, k=k)
    q_grid = np.concatenate((np.arange(0.001, 0.1, 0.001), np.arange(0.1, 0.9, 0.01), np.arange(0.9, 1, 0.001)))
    return q, r2, beta, sigma_squared, z, v_x_bar, gamma2, q_grid

In [16]:
def run_gibbs_sampler(X, Y, Ry, s, n_iter=11_000, burn_in=1000, a=a, b=b, A=A, B=B, T=T, display_=True):
    q, r2, beta, sigma_squared, z, v_x_bar, gamma2, q_grid = draw_initial_parameters(X, Y, Ry, s, a=a, b=b, A=A, B=B, T=T)
    params, beta = gibbs_sampling(q, r2, Y, X, beta, sigma_squared, z, v_x_bar, gamma2, q_grid, k=X.shape[1], n_iter=n_iter, burn_in=burn_in, T=T, display_=display_)
    return params, beta

### Run Gibbs sampler

In [17]:
X = df[list(df.columns)[2:]].values
Y = df[list(df.columns)[1:2]].values

In [19]:
run_gibbs_sampler(X, Y, .25, 10, n_iter=11_000, burn_in=1000, a=a, b=b, A=A, B=B, T=T, display_=True)

100%|████████████████████████████████████████████████████████████████████████████| 11000/11000 [17:22<00:00, 10.55it/s]
