### Combining GDP data with the jobs report

In [1]:
%matplotlib inline
import sys
sys.path.append('../src')

import uschartbook.config

from uschartbook.config import *
from uschartbook.utils import *

qtrs = {1: 1, 2: 1, 3: 1, 4: 2, 5:2, 6:2, 7:3, 8:3, 9:3, 10:4, 11:4, 12:4}

In [None]:
aah = {}
epop = {}
cols = ['HRSACTT', 'LFS', 'BASICWGT', 'MONTH', 'PWSSWGT']
for year in range(1989, 2020):
    if year >= 1998:
        wgt = 'PWSSWGT'
    else:
        wgt = 'BASICWGT'
    df = pd.read_feather(cps_dir / f'cps{year}.ft', columns=cols)
    ah = (df.query('LFS == "Employed"')
            .groupby('MONTH').apply(lambda x: np.average(x.HRSACTT, weights=x.BASICWGT)))
    aah.update({f'{year}-{month}-01': value for month, value in list(zip(ah.index, ah.values))})
    
    ep = (df.query('LFS == "Employed"').groupby('MONTH')[wgt].sum() / 
          df.groupby('MONTH')[wgt].sum())
    epop.update({f'{year}-{month}-01': value for month, value in list(zip(ep.index, ep.values))})

In [None]:
pd.Series(epop).to_csv('epop.csv', index_label='date', header=True)
pd.Series(aah).to_csv('aah.csv', index_label='date', header=True)

I can't seem to get X-13ARIMA-SEATS running on linux so I've used a virtual machine to convert epop.csv and aah.csv to epop_sa.csv and aah.csv using x13as with default settings.

In [2]:
epop_sa = (pd.read_csv('raw/epop_sa.csv', header=None, names=['date', 'value'], parse_dates=[0])
           .set_index('date').resample('QS').mean())

aah_sa = (pd.read_csv('raw/aah_sa.csv', header=None, names=['date', 'value'], parse_dates=[0])
           .set_index('date').resample('QS').mean())

In [3]:
gdp_code = ('T10106', ['A191RX'])
pop_code = ('T70100', ['B230RC'])
gdp = nipa_df(retrieve_table(gdp_code[0])['Data'], gdp_code[1]).sort_index()
pop = nipa_df(retrieve_table(pop_code[0])['Data'], pop_code[1]).sort_index()

df = pd.DataFrame()
df['epop'] = epop_sa['value']
df['pop'] = pop['B230RC']
df['hours'] = aah_sa['value'] * 52
df['gdp'] = gdp['A191RX']
df['input'] = (df['pop'] * df['epop']) * df['hours']
df['gdpinp'] = df['gdp'] / df['input']


df['epop_c'] = ((df['pop'] * df['epop'].mean()) * df['hours']) * df['gdpinp']
df['pop_c'] = ((df['pop'].mean() * df['epop']) * df['hours']) * df['gdpinp']
df['hours_c'] = ((df['pop'] * df['epop']) * df['hours'].mean()) * df['gdpinp']

df_g = growth_rate(df)

df_g['pop_contr'] = df_g['gdp'] - df_g['pop_c']
df_g['epop_contr'] = df_g['gdp'] - df_g['epop_c']
df_g['hours_contr'] = df_g['gdp'] - df_g['hours_c']
df_g['prod'] = df_g['gdp'] - df_g['pop_contr'] - df_g['epop_contr'] - df_g['hours_contr']

In [4]:
result = df_g[['pop_contr', 'epop_contr', 'hours_contr', 'prod']].round(2)

In [5]:
result.to_csv(data_dir / 'gdpjobs.csv', index_label='date')