#### load libraries

In [4]:
# The program runs on Python 3.6
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook
pd.set_option('mode.chained_assignment', None)

#### preambles

In [28]:
# observables = ['xgdp_q_obs_frbedo', 'pgdp_q_obs_frbedo', 'rff_q_obs_frbedo', 'pecnn_q_obs_frbedo', 'pecd_q_obs_frbedo',
#                'per_q_obs_frbedo', 'penr_q_obs_frbedo', 'paipc_q_obs_frbedo', 'paipk_q_obs_frbedo', 'hours_obs_frbedo',
#                'wage_obs_frbedo', 'dlnl_q_obs']
# observables = ['dlnl_q_obs']
observables = ['dlndin_obs']

#### declare observables in 'List_Observables_US.xls'

In [6]:
filepath = '../../EstimationInterface/List_Observables_US.xls'
list_obs = pd.read_excel(filepath, sheet_name=['q', 'a'])

In [7]:
numbers = list(range(list_obs['q'].shape[0], list_obs['q'].shape[0]+len(observables)))
newlist_obs = pd.DataFrame(np.transpose([numbers, observables, observables]), columns=list_obs['q'].columns)
list_obs['q'] = pd.concat([list_obs['q'], newlist_obs])

In [8]:
def to_integer(x):
    '''Convert anything to integer wherever possible'''
    try:
        return int(x)
    except:
        x

In [9]:
list_obs['q']['Number'] = list_obs['q']['Number'].map(lambda x: to_integer(x))

In [10]:
writer = pd.ExcelWriter('test.xls')

In [11]:
for key, _ in list_obs.items():
    list_obs[key].to_excel(writer, sheet_name=key, index=False)

In [12]:
writer.save()

#### load raw data

In [13]:
df_alfred = pd.read_csv('alfred_raw.csv', index_col=0)
df_spf = pd.read_csv('spf_raw.csv', index_col=0)

In [14]:
# set date as the index
df_alfred.set_index('date', inplace=True)
df_spf.set_index('date', inplace=True)
# create multiindex columns
df_alfred.columns = pd.MultiIndex.from_arrays([[column[:-7] for column in df_alfred.columns], [column[-6:] for column in df_alfred.columns]])

In [15]:
# GDPC1 nowcast = RGDP2
# GDPCTPI nowcast = PGDP2
# PRFI nowcast = RRESINV2(t)*GDPCTPI(t)/RRESINV2(t-1)/GDPCTPI(t-1)*PRFI(t-1)

In [19]:
df_alfredspf = df_alfred.copy()
# for index in df_alfredspf.index:
#     if int(index.replace(':Q','')) >= 19684:
#         df_alfredspf.loc[index, ('GDPC1', index.replace(':', ''))] = df_spf.loc[index, 'RGDP2']
#         df_alfredspf.loc[index, ('GDPCTPI', index.replace(':', ''))] = df_spf.loc[index, 'PGDP2']
        
#     if int(index.replace(':Q','')) >= 19814:
#         A = df_spf.loc[index, 'RRESINV2']*df_alfredspf.loc[index, ('GDPCTPI', index.replace(':', ''))]
#         B = df_spf.loc[index, 'RRESINV1']*df_alfredspf.loc[last_index, ('GDPCTPI', index.replace(':', ''))]
#         C = df_alfredspf.loc[last_index, ('PRFI', index.replace(':', ''))]
#         prfi_nowcast = A/B*C
#         df_alfredspf.loc[index, ('PRFI', index.replace(':', ''))] = prfi_nowcast
        
#         A = df_spf.loc[index, 'RNRESIN2']*df_alfredspf.loc[index, ('GDPCTPI', index.replace(':', ''))]
#         B = df_spf.loc[index, 'RNRESIN1']*df_alfredspf.loc[last_index, ('GDPCTPI', index.replace(':', ''))]
#         C = df_alfredspf.loc[last_index, ('PNFI', index.replace(':', ''))]
#         pnfi_nowcast = A/B*C
#         df_alfredspf.loc[index, ('PNFI', index.replace(':', ''))] = pnfi_nowcast
        
#     last_index = index

#### transform data and generate observables

In [17]:
# PECNN = [PCND(t) + PCESV(t)]/[PCNDGC96(t) + PCESVC96(t)]
# PECD = PCDG(t)/PCDGCC96(t)
# HOURS = AWHNONAG(t)*CE16OV(t)/CNP16OV(t)

# xgdp_q_obs_frbedo = GDPC1(t)/GDPC1(t-1)
# pgdp_q_obs_frbedo = GDPCTPI(t)/GDPCTPI(t-1)
# rff_q_obs_frbedo = FEDFUNDS(t)/400 + 1
# pecnn_q_obs_frbedo = [PCND(t) + PCESV(t)]/[PCND(t-1) + PCESV(t-1)]
# pecd_q_obs_frbedo = PCDG(t)/PCDG(t-1)
# per_q_obs_frbedo = PRFI(t)/PRFI(t-1)
# penr_q_obs_frbedo = PNFI(t)/PNFI(t-1)
# paipc_q_obs_frbedo = PECNN(t) / PECNN(t-1)
# paipk_q_obs_frbedo = PECD(t) / PECD(t-1)
# hours_obs_frbedo = HOURS(t) / AVERAGE(HOURS, 1968Q1-most recent observation)
# wage_obs_frbedo = [COMPNFB(t) / GDPCTPI(t)] / [COMPNFB(t-1) / GDPCTPI(t-1)]

In [21]:
vintages = sorted(set(df_alfredspf.columns.get_level_values(1)))
for index, vintage in tqdm_notebook(enumerate(vintages), total=len(vintages)):
    
    # load vintage
    vdf = df_alfredspf.xs(vintage, axis=1, level=1)
    
#     # generate intermediate series
#     vdf.loc[:, 'PECNN'] = (vdf['PCND'].values + vdf['PCESV'].values) / (vdf['PCNDGC96'].values + vdf['PCESVC96'].values)
#     vdf.loc[:, 'PECD'] = vdf['PCDG'].values / vdf['PCDGCC96'].values
#     vdf.loc[:, 'HOURS'] = vdf['AWHNONAG'].values * vdf['CE16OV'].values / vdf['CNP16OV'].values
    
#     # generate observed series
#     vdf.loc[:, 'xgdp_q_obs_frbedo'] = vdf['GDPC1'].values/vdf['GDPC1'].shift().values
#     vdf.loc[:, 'pgdp_q_obs_frbedo'] = vdf['GDPCTPI'].values/vdf['GDPCTPI'].shift().values
#     vdf.loc[:, 'rff_q_obs_frbedo'] = vdf['FEDFUNDS'].values/400 + 1
#     vdf.loc[:, 'pecnn_q_obs_frbedo'] = (vdf['PCND'].values + vdf['PCESV'].values) / (vdf['PCND'].shift().values + vdf['PCESV'].shift().values)
#     vdf.loc[:, 'pecd_q_obs_frbedo'] = vdf['PCDG'].values/vdf['PCDG'].shift().values
#     vdf.loc[:, 'per_q_obs_frbedo'] = vdf['PRFI'].values/vdf['PRFI'].shift().values
#     vdf.loc[:, 'penr_q_obs_frbedo'] = vdf['PNFI'].values/vdf['PNFI'].shift().values
#     vdf.loc[:, 'paipc_q_obs_frbedo'] = vdf['PECNN'].values/vdf['PECNN'].shift().values
#     vdf.loc[:, 'paipk_q_obs_frbedo'] = vdf['PECD'].values/vdf['PECD'].shift().values
#     vdf.loc[:, 'hours_obs_frbedo'] = vdf['HOURS'].values/vdf.loc['1968:Q1':, 'HOURS'].mean()
#     vdf.loc[:, 'wage_obs_frbedo'] = (vdf['COMPNFB'].values/vdf['GDPCTPI'].values) / (vdf['COMPNFB'].shift().values/vdf['GDPCTPI'].shift().values)
#     vdf.loc[:, 'dlnl_obs'] = np.log(vdf['BCNSDODNS'].values/vdf['BCNSDODNS'].shift().values)
    vdf.loc[:, 'dlndin_obs'] = np.log(vdf['ASHMA'].values/vdf['ASHMA'].shift().values)
    
    # merge
    vdf.columns = pd.MultiIndex.from_arrays([vdf.columns, [vintage]*vdf.shape[1]])
    vdf.reset_index(drop=False, inplace=True)
    df = vdf if index == 0 else pd.merge(df, vdf, on='date')

HBox(children=(IntProgress(value=0, max=289), HTML(value='')))




In [22]:
# set date as the index
df.set_index('date', inplace=True)

In [23]:
dropped_columns = []
for year in range(1947, 1965+1):
    for quarter in range(1, 4+1):
        dropped_columns.append(str(year)+'Q'+str(quarter))
dropped_columns.remove('1965Q4')

#### with SPF nowcast

In [24]:
df_nowcast = df.copy()

In [29]:
for observable in tqdm_notebook(observables):
    df_obs = df_nowcast[observable]
    df_obs.drop(dropped_columns, axis=1, inplace=True)
    df_obs.drop(['1947:Q1'], inplace=True)
    df_obs.fillna(-99, inplace=True)
    for index, row in df_obs.iterrows():
        current_date = int(index.replace(':Q', ''))
        for column in df_obs.columns.values:
            if int(column.replace('Q', '')) <= current_date and row[column] == -99:
                df_obs.loc[index, column] = -999
    df_obs.columns = [observable + vintage[-4:] for vintage in df_obs.columns]
    df_obs.index.name = 'DATE'
    path = '../../DATA/USDATA/Tranformed_Data_SPF/'
    df_obs.to_excel(path + observable + '.xls')

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




#### without SPF nowcast

In [30]:
df_nonowcast = df.copy()

In [31]:
for index in df_nonowcast.index:
    for observable in observables:
        df_nonowcast.loc[index, (observable, index.replace(':', ''))] = float('nan')

In [32]:
for observable in tqdm_notebook(observables):
    df_obs = df_nonowcast[observable]
    df_obs.drop(dropped_columns, axis=1, inplace=True)
    df_obs.drop(['1947:Q1'], inplace=True)
    df_obs.fillna(-99, inplace=True)
    for index, row in df_obs.iterrows():
        current_date = int(index.replace(':Q', ''))
        for column in df_obs.columns.values:
            if int(column.replace('Q', '')) <= current_date and row[column] == -99:
                df_obs.loc[index, column] = -999
    df_obs.columns = [observable + vintage[-4:] for vintage in df_obs.columns]
    df_obs.index.name = 'DATE'
    path = '../../DATA/USDATA/Tranformed_Data/'
    df_obs.to_excel(path + observable + '.xls')

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




In [None]:
# # observations that have current-period values
# for rowindex, row in df.iterrows():
#     for colindex, value in row[:, rowindex.replace(':', '')].items():
#         if colindex.endswith('frbedo') and not np.isnan(value):
#             print(f'{colindex}: {value}')