In [None]:
from state_space_estimation.dag import dag
from state_space_estimation.roles import roles
from state_space_estimation.estimation import estimation
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
%load_ext autoreload

In [None]:
rbc = pd.read_csv('../data/rbc_100k.csv')
rbc = rbc.drop(['Unnamed: 0', 'eps_g', 'eps_z'], axis=1)
rbc.columns = [col.replace(" ", "") for col in rbc.columns]

shift_vars = rbc.columns.values
shift = rbc.loc[:,shift_vars].shift()
shift.columns = [str(col) + '_1' for col in shift.columns]
rbc = pd.concat([rbc, shift], axis=1)
rbc = rbc.iloc[1:,:]

rbc = rbc.iloc[:1000,:]
rbc = rbc.apply(lambda x: x - x.mean(), axis=0)

In [None]:
%autoreload
rbc_est = estimation(rbc)
for i in range(int(len(rbc.columns.values)/2) - 1):
    print('Evaluating models with {} states'.format(i))
    rbc_results = rbc_est.choose_states_parallel(i)
    if rbc_results[rbc_results['valid']].shape[0] > 0:
        print('Found valid model with {} states'.format(i))
        break

In [None]:
rbc_results[rbc_results['valid']]

In [None]:
nk = pd.read_csv('../data/gali.csv')
nk.columns = [col.replace(" ", "") for col in nk.columns]
nk = nk.drop(['Unnamed:0', 
              'eps_a', 'eps_z', 'eps_nu',
              'pi_ann', 'r_nat_ann', 'r_real_ann', 'm_growth_ann', 'i_ann',
              'y_gap', 'mu_hat', 'yhat',
              'm_nominal'], 
             axis=1)

shift_vars = nk.columns.values
shift = nk.loc[:,shift_vars].shift()
shift.columns = [str(col) + '_1' for col in shift.columns]
nk = pd.concat([nk, shift], axis=1)
nk = nk.iloc[1:,:]

nk = nk.iloc[:1000,:]
nk = nk.apply(lambda x: x - x.mean(), axis=0)

In [None]:
%autoreload
nk_est = estimation(nk)
for i in range(int(len(nk.columns.values)/2) - 1):
    print('Evaluating models with {} states'.format(i))
    nk_results = nk_est.choose_states_parallel(i)
    if nk_results[nk_results['valid']].shape[0] > 0:
        print('Found valid model with {} states'.format(i))
        break

In [None]:
nk_results[nk_results['valid']]

In [None]:
cpi = pd.read_csv('../data/real_data/CPIAUCSL.csv', index_col='DATE').replace('.', np.nan).astype(np.float64)
i__ = pd.read_csv('../data/real_data/DFF.csv', index_col='DATE').replace('.', np.nan).astype(np.float64)
rb_ = pd.read_csv('../data/real_data/DGS10.csv', index_col='DATE').replace('.', np.nan).astype(np.float64)
gr_ = pd.read_csv('../data/real_data/GCEC1.csv', index_col='DATE').replace('.', np.nan).astype(np.float64)
yr_ = pd.read_csv('../data/real_data/GDPC1.csv', index_col='DATE').replace('.', np.nan).astype(np.float64)
ir_ = pd.read_csv('../data/real_data/GPDIC1.csv', index_col='DATE').replace('.', np.nan).astype(np.float64)
wr_ = pd.read_csv('../data/real_data/LES1252881600Q.csv', index_col='DATE').replace('.', np.nan).astype(np.float64)
n__ = pd.read_csv('../data/real_data/LFWA64TTUSM647S.csv', index_col='DATE').replace('.', np.nan).astype(np.float64)
rk_ = pd.read_csv('../data/real_data/NASDAQCOM.csv', index_col='DATE').replace('.', np.nan).astype(np.float64)
kr_ = pd.read_csv('../data/real_data/RKNANPUSA666NRUG.csv', index_col='DATE').replace('.', np.nan).astype(np.float64)
tfp = pd.read_csv('../data/real_data/RTFPNAUSA632NRUG.csv', index_col='DATE').replace('.', np.nan).astype(np.float64)
une = pd.read_csv('../data/real_data/UNRATE.csv', index_col='DATE').replace('.', np.nan).astype(np.float64)
l__ = pd.read_csv('../data/real_data/CLF16OV.csv', index_col='DATE').replace('.', np.nan).astype(np.float64)
cr_ = pd.read_csv('../data/real_data/PCEPI.csv', index_col='DATE').replace('.', np.nan).astype(np.float64)
ts = [cpi, i__, rb_, gr_, yr_, ir_, wr_, n__, rk_, kr_, tfp, une, l__, cr_]
for t in ts:
    t.index = pd.to_datetime(t.index)
    
# Calculate inflation from CPI
cpi = cpi.join(cpi.shift(), rsuffix='_1')
cpi = pd.DataFrame(cpi.apply(lambda x: (x[0] - x[1])/x[1], axis=1), columns=['pi']).reindex(yr_.index)
# Convert to quarterly inflation
cpi = cpi.rolling('90D').sum().reindex(yr_.index)
# Convert to average quarterly federal funds rate
i__ = i__.rolling('90D').mean().reindex(yr_.index)
i__.columns = ['rm']
# Convert to average quarterly 10y bond rate
rb_ = rb_.resample('D').ffill().rolling('90D').mean().reindex(yr_.index)
rb_.columns = ['rb']
# Real governnment expendiature
gr_ = gr_.reindex(yr_.index)
gr_.columns = ['g']
# Real GDP 
yr_ = yr_
yr_.columns = ['y']
# Real investment
ir_ = ir_.reindex(yr_.index)
ir_.columns = ['i']
# Real (median) Wages
wr_ = wr_.reindex(yr_.index)
wr_.columns = ['w']
# Population
n__ = n__.rolling('90D').sum().reindex(yr_.index)
n__.columns = ['n']
# Real return to capital (estimated via NASDAQ)
rk_ = rk_.resample('D').ffill()
rk_ = rk_.join(rk_.shift(), rsuffix='_1').join(cpi, how='right')
rk_['rk'] = ((rk_['NASDAQCOM'] - rk_['NASDAQCOM_1']) / rk_['NASDAQCOM_1']) - (cpi['pi']/90)
rk_ = pd.DataFrame(rk_['rk'], columns=['rk']).reindex(yr_.index) 
# Real capital stock
kr_ = kr_.reindex(yr_.index).interpolate(method='polynomial', order=2)
# kr_ = (kr_.iloc[:,0] - kr_.iloc[:,0].shift()).to_frame().dropna()
kr_.columns = ['k']
# Total factor productivity
tfp = tfp.reindex(yr_.index).interpolate(method='polynomial', order=2)
tfp.columns = ['z']
# Unemployment
une = une.rolling('90D').mean().reindex(yr_.index)
une.columns = ['u']
# Total Labor
l__ = l__.rolling('90D').mean().reindex(yr_.index)
l__.columns = ['l']
# Real Consumption
cr_ = cr_.rolling('90D').mean().reindex(yr_.index)
cr_.columns = ['c']

ts = [cpi, i__, rb_, gr_, yr_, ir_, wr_, n__, rk_, kr_, tfp, une, l__, cr_]

# Transform to deviations from x lag trend
lags = 1
for i in range(len(ts)):
    t = ts[i].dropna()
    if not any([x <=0 for x in t.iloc[:,0]]):
        t = np.log(t)
    model = LinearRegression(fit_intercept=False)
    X = np.array([t.shift(i+1).iloc[lags:,0].values.reshape(-1,1) for i in range(lags)]).reshape(-1, lags)   
    y = t.iloc[lags:,0].values.reshape(-1,1)
    model.fit(X, y)
    ts[i] = pd.DataFrame(y - model.predict(X),
                         index=t.dropna().iloc[lags:,0].index, 
                         columns=t.columns)
    
real_data = pd.DataFrame(index=yr_.index)
for t in ts:
    real_data = real_data.join(t, how='left')
real_data = real_data.interpolate().loc['1985-01-01':'2005-01-01']
real_data = real_data.apply(lambda x: x - x.mean())

shift_vars = real_data.columns.values
shift = real_data.loc[:,shift_vars].shift()
shift.columns = [str(col) + '_1' for col in shift.columns]
real_data = pd.concat([real_data, shift], axis=1)
real_data = real_data.iloc[1:,:]

In [None]:
%autoreload
real_est = estimation(real_data)
for i in range(int(len(real_data.columns.values)/2) - 1):
    print('Evaluating models with {} states'.format(i))
    real_results = real_est.choose_states_parallel(i)
    if real_results[real_results['valid']].shape[0] > 0:
        print('Found valid model with {} states'.format(i))
        break