In [None]:
import pandas as pd
import numpy as np
import csv
from statsmodels.api import OLS
from state_space_estimation.dag import dag
from sklearn.linear_model import LinearRegression
%load_ext autoreload

In [None]:
rbc_results = pd.read_csv('../data/rbc_fullsample_0.05_results.csv')

In [None]:
rbc_results

In [None]:
rbc_wins = pd.read_csv('../data/rbc_100_0.05_1000iter_wins.csv')
rbc_wins['exostates'] = rbc_wins['exo_states'].apply(lambda x: '   '.join(x.split('_')))
rbc_wins['endostates'] = rbc_wins['endo_states'].apply(lambda x: '   '.join(x.split('_')))
rbc_wins.drop(['exo_states', 'endo_states'], axis=1)
rbc_wins['index'] = list(range(1,rbc_wins.shape[0]+1))
rbc_wins = rbc_wins[['index', 'exostates', 'endostates', 'wins', 'valid']]

rbc_wins.to_csv('../text/latex/algo/files/rbc_wins.csv', index=False, quoting=csv.QUOTE_NONE, escapechar='\\')

In [None]:
nk_wins = pd.read_csv('../data/nk_100_0.05_1000iter_wins.csv')
nk_wins['exostates'] = nk_wins['exo_states'].fillna('').apply(lambda x: '   '.join(x.split('_')))
nk_wins['endostates'] = nk_wins['endo_states'].fillna('').apply(lambda x: '   '.join(x.split('_')))
nk_wins.drop(['exo_states', 'endo_states'], axis=1)
nk_wins['index'] = list(range(1,nk_wins.shape[0]+1))
nk_wins = nk_wins[['index', 'exostates', 'endostates', 'wins', 'valid']]
nk_wins = nk_wins[nk_wins['wins'] > 0]

nk_wins.to_csv('../text/latex/algo/files/nk_wins.csv', index=False, quoting=csv.QUOTE_NONE, escapechar='\\')

In [None]:
nk_wins

In [None]:
real_results = pd.read_csv('../data/real_0.05_results.csv')
real_results.drop('Unnamed: 0', axis=1, inplace=True)
real_results = real_results[['exo_states', 'endo_states', 'controls']]
real_results['exostates'] = real_results['exo_states'].apply(lambda x: x.strip('[]').replace('''\'''', '').replace(',', ''))
real_results['endostates'] = real_results['endo_states'].apply(lambda x: x.strip('[]').replace('''\'''', '').replace(',', ''))
real_results['controls'] = real_results['controls'].apply(lambda x: x.strip('[]').replace('''\'''', '').replace(',', ''))
real_results.drop(['exo_states', 'endo_states'], axis=1, inplace=True)
real_results['index'] = list(range(1,real_results.shape[0]+1))
real_results = real_results[['index', 'exostates', 'endostates', 'controls']]
real_results.to_csv('../text/latex/algo/files/realresults.csv', index=False, quoting=csv.QUOTE_NONE, escapechar='\\')

In [None]:
real_results

In [None]:
real_data = pd.read_csv("../data/real_data.csv", index_col='DATE')

In [None]:
def make_adjacency(exo_states, endo_states, controls, data, var=False):
    data.drop([name for name in data.columns.values.tolist() if '_2' in name], axis=1, inplace=True)
    data_names = data.columns.values.tolist()
    data_current = [name for name in data_names if '_1' not in name]
    data_lag = [name for name in data_names if '_1' in name]
    implied_names = exo_states + endo_states + controls
    
    for name in implied_names:
        if name not in data_names:
            raise ValueError('Name {} missing from data'.format(name))
        if str(name) + '_1' not in data_names:
            raise ValueError('Lag of name {} missing from data'.format(name))
        if any([name not in data_names for name in implied_names]) or any([name not in implied_names for name in data_current]):
            print(data_current)
            print(implied_names)
            raise ValueError('Implied names and data do not align')
    
    names = data_names
    d = len(data.columns)
    result = pd.DataFrame(np.zeros((d, d)), columns=names, index=names)
    
    if var:
        model = LinearRegression(fit_intercept=False)
        model.fit(data[data_lag], data[data_current])
        k = model.coef_.shape[0]
        result = pd.DataFrame(
                np.concatenate([np.concatenate((np.zeros((k, k)), np.zeros((k, k))), axis=1),
                                np.concatenate((model.coef_.T, np.zeros((k, k))), axis=1)], 
                               axis=0),
            columns=names, index=names
        )
    else:
        for exo_state in exo_states:
            model = LinearRegression(fit_intercept=False)
            model.fit(data[[exo_state + '_1']], data[exo_state])
            result.loc[exo_state + '_1', exo_state] = model.coef_[0]

        for endo in endo_states + controls:
            regressors = [es + '_1' for es in endo_states] + exo_states
            model = LinearRegression(fit_intercept=False)
            model.fit(data[regressors], data[endo])
            coefs = {}
            for i in range(len(regressors)):
                coefs[regressors[i]] = model.coef_[i]
            for x in regressors:
                result.loc[x, endo] = coefs[x]
    
    return result


def make_roles(states, data):
    exo_names = []
    endo_names = []
    for state in states:
        if '_1' in str(state):
            endo_names.append(str(state).rstrip('_1'))
        else:
            exo_names.append(str(state))
    controls = [name for name in data.columns.values if '_1' not in str(name) and name not in exo_names + endo_names]
    
    return exo_names, endo_names, controls

In [None]:
irf_length = 20
shock_amt = -1
shock_var = 'rm'

In [None]:
%autoreload
exo_states = ['y', 'z', 'u']
endo_states = ['pi', 'k', 'c', 'rm']
controls = ['g', 'i', 'w', 'rk', 'l']

m = make_adjacency(exo_states, endo_states, controls, data=real_data, var=False)
real_dag = dag(m)

x_0 = pd.Series(np.full(len(real_dag.nodes), np.nan), index=real_dag.nodes)
if shock_var in exo_states or shock_var in endo_states:
    shock_index = np.where(real_data.columns == shock_var)[0]
else:
    raise ValueError('Cannot shock control variable {}'.format(shock_var))

x_0[int(len(real_dag.nodes)/2):] = 0
x_0[shock_index] = shock_amt

real_irf = real_dag.calculate_irf(x_0, T=irf_length)
plt = real_dag.plot_irf(real_irf)
plt.savefig('../text/latex/algo/images/real_data_{}_irf.png'.format(shock_var), bbox_inches='tight')
plt.show()

In [None]:
%autoreload
exo_states = ['y', 'z', 'u']
endo_states = ['pi', 'k', 'c', 'rm']
controls = ['g', 'i', 'w', 'rk', 'l']

m = make_adjacency(exo_states, endo_states, controls, data=real_data, var=True)
real_dag = dag(m)

x_0 = pd.Series(np.full(len(real_dag.nodes), np.nan), index=real_dag.nodes)
if shock_var in exo_states or shock_var in endo_states:
    shock_index = np.where(real_data.columns == shock_var)[0]
else:
    raise ValueError('Cannot shock control variable {}'.format(shock_var))

x_0[int(len(real_dag.nodes)/2):] = 0
x_0[shock_index] = shock_amt

real_irf = real_dag.calculate_irf(x_0, T=irf_length)
plt = real_dag.plot_irf(real_irf)
plt.savefig('../text/latex/algo/images/real_data_{}_var_irf.png'.format(shock_var), bbox_inches='tight')
plt.show()