In [1]:
import sys
sys.path.append('../src')

import requests

import uschartbook.config

from uschartbook.config import *
from uschartbook.utils import *

In [None]:
path = '/home/brian/Documents/CPS/data/clean/'

cols = ['LFS', 'BASICWGT', 'PWSSWGT', 'AGE', 'MONTH', 'YEAR', 'FEMALE']

In [None]:
df = pd.concat([pd.read_feather(f'{path}cps{year}.ft', columns=cols) 
                for year in range(1989, 2020)], sort=False)

df['WGT'] = np.where(df['YEAR'] > 1993, df['PWSSWGT'], df['BASICWGT'])
df['DATE'] = pd.to_datetime(dict(year=df.YEAR, month=df.MONTH, day=1))
data = df.groupby(['DATE', 'LFS']).WGT.sum().unstack().rename({'nan': 'Children'}, axis=1) / 1000000
file = '/home/brian/Documents/uschartbook/chartbook/data/cps_lfs.csv'
data.to_csv(file, index_label='date')

In [None]:
datelt = data.index[-1].strftime('%B %Y')

In [None]:
epop = data["Employed"].iloc[-1] / data.iloc[-1].sum() * 100

In [None]:
text = (f'As of {datelt}, {data["Employed"].iloc[-1]:.1f} million '+
        'people are employed (including self-employment). ')

file = '/home/brian/Documents/uschartbook/chartbook/text/cps_lfs0.txt'
with open(file, 'w') as wf:
    wf.write(text)


text = ('Labor provided within a household is not captured by GDP compilation '+
        'methods (expenditures, output, or income), though household surveys '+
        'offer some insight into this important category of labor. The '+
        'number of people who are considered employed divided by the total '+
        'population is the employment rate or employment-to-population ratio, '+
        f'which is {epop:.1f} percent as of {datelt}.')

In [None]:
file = '/home/brian/Documents/uschartbook/chartbook/text/cps_lfs.txt'
with open(file, 'w') as wf:
    wf.write(text)

In [None]:
unemp = (data["Unemployed"].iloc[-1] / (data["Unemployed"].iloc[-1] + data["Employed"].iloc[-1])) * 100
lfpr = (data["Unemployed"].iloc[-1] + data["Employed"].iloc[-1]) / data.iloc[-1].sum() * 100

In [None]:
text = (f'As of {datelt}, there are {data["Unemployed"].iloc[-1]:.1f} million '+
        'unemployed people. The combined group of employed and unemployed people '+
        'is the labor force. The number of unemployed people divided by the number '+
        f'of people in the labor force is the unemployment rate, currently {unemp:.1f} percent. '+
        'The number of people in the labor force divided by the total population is the '+
        f'labor force participation rate, currently {lfpr:.1f} percent.')

In [None]:
text

In [None]:
file = '/home/brian/Documents/uschartbook/chartbook/text/cps_lfs2.txt'
with open(file, 'w') as wf:
    wf.write(text)

In [None]:
cols2 = cols + ['NILFREASON']

In [None]:
datalt = pd.read_feather(f'{path}cps2019.ft', columns=cols2)

In [None]:
student = (datalt.query('MONTH == 10 and NILFREASON == "School"').PWSSWGT.sum() / 1000000)
care = (datalt.query('MONTH == 10 and NILFREASON == "Family"').PWSSWGT.sum() / 1000000)
disill = (datalt.query('MONTH == 10 and NILFREASON == "Disabled/Ill"').PWSSWGT.sum() / 1000000)
disc = (datalt.query('MONTH == 10 and NILFREASON == "Discouraged"').PWSSWGT.sum() / 1000000)
ret = (datalt.query('MONTH == 10 and NILFREASON == "Retired"').PWSSWGT.sum() / 1000000)

In [None]:
text = ('This category is about half of the population, on average, and '+
        f'totals {data["NILF"].iloc[-1]:.1f} million in {datelt}. The '+
        f'category is comprised of children ({data["Children"].iloc[-1]:.1f}'+
        f' million), students ({student:.1f} million), unpaid caregivers '+
        f'({care:.1f} million), those unable to work due to disability or illness '+
        f'({disill:.1f} million), those who want a job but have given up looking ({disc:.1f} '+
        f'million), and retirees and the elderly ({ret:.1f} million).')

In [None]:
text

In [None]:
file = '/home/brian/Documents/uschartbook/chartbook/text/cps_lfs3.txt'
with open(file, 'w') as wf:
    wf.write(text)

In [None]:
data['Total'] = data[['Employed', 'Unemployed', 'NILF']].sum(axis=1)

In [None]:
result = data.drop('Children', axis=1).resample('QS').mean()

In [None]:
def growth_contrib_ann(df, srs):
    '''Calculate df column contribution to srs growth'''
    dft = df.diff(4)
    dft = dft.div(dft[srs], axis=0)
    c = dft.multiply(df[srs].pct_change(4) * 100, axis=0)
    return c.round(2)

In [None]:
file = '/home/brian/Documents/uschartbook/chartbook/data/cps_lfs2.csv'
growth_contrib_ann(result, 'Total').to_csv(file, index_label='date')

### LFS Table

In [None]:
import itertools

In [None]:
cols = ['LFS', 'PWSSWGT', 'PWSSWGT', 'AGE', 'MONTH', 'YEAR', 'MJH',
        'FEMALE', 'NILFREASON', 'UNEMPTYPE', 'PTECON', 'WORKFT']

df = pd.read_feather(f'{path}cps2019.ft', columns=cols).query('AGE >= 15 and MONTH == 11')
df2 = pd.read_feather(f'{path}cps2018.ft', columns=cols).query('AGE >= 15 and MONTH == 11')

In [None]:
age_groups = [(15, 30), (30, 60), (60, 86)]
sex = [0, 1]
d = {'Population': {'query': 'PWSSWGT > 0'}, 
     '\hspace{2mm}Employed': {'query': 'LFS == "Employed"'}, 
     '\hspace{4mm}Multiple jobs': {'query': 'MJH == 1'},     
     '\hspace{4mm}Full-time': {'query': 'WORKFT == 1'},  
     '\hspace{4mm}Part-time': {'query': 'WORKFT == 0'}, 
     '\hspace{6mm}Economic reasons': {'query': 'PTECON == 1'},
     '\hspace{2mm}Unemployed': {'query': 'LFS == "Unemployed"'}, 
     '\hspace{2mm}Not in Labor Force': {'query': 'LFS == "NILF"'}, 
     '\hspace{4mm}Discouraged': {'query': 'NILFREASON == "Discouraged"'}, 
     '\hspace{4mm}Disabled/Ill': {'query': 'NILFREASON == "Disabled/Ill"'}, 
     '\hspace{4mm}Family/Care': {'query': 'NILFREASON == "Family"'}, 
     '\hspace{4mm}School': {'query': 'NILFREASON == "School"'}, 
     '\hspace{4mm}Retirement': {'query': 'NILFREASON == "Retired"'}}

d2 = {k: {} for k, v in d.items()}

lf_groups = list(zip(d.keys(), [d[i]['query'] for i in d.keys()])) 

for name, query in lf_groups:
    totval = df.query(query).PWSSWGT.sum()/1000
    d[name]['Total, 15+'] = f'{totval:,.0f}'
    chval = totval - df2.query(query).PWSSWGT.sum()/1000
    d2[name]['Total, 15+'] = f'{chval:,.0f}'

for sex, (agemin, agemax) in itertools.product(sex, age_groups):
    data = df.query('@agemin <= AGE < @agemax and FEMALE == @sex')
    data2 = df2.query('@agemin <= AGE < @agemax and FEMALE == @sex')
    group = f'{["Women" if sex==1 else "Men"][0]}, {agemin}--{agemax-1}'.replace('--85', '+')
    for name, query in lf_groups:
        tmp = data.query(query).PWSSWGT.sum()/1000
        tmp2 = data2.query(query).PWSSWGT.sum()/1000
        d[name][group] = f'{tmp:,.0f}'
        d2[name][group] = f'{tmp - tmp2:,.0f}'

In [None]:
file = '/home/brian/Documents/uschartbook/chartbook/data/lfs_table1.tex'
(pd.DataFrame(d).T.drop('query', axis=1).to_csv(file, 
    sep='&', line_terminator='\\\ ', quotechar=' '))
file = '/home/brian/Documents/uschartbook/chartbook/data/lfs_table2.tex'
pd.DataFrame(d2).T.to_csv(file, sep='&', line_terminator='\\\ ', quotechar=' ')

### Employment rates by age, gender, education

In [41]:
cols = ['MONTH', 'YEAR', 'LFS', 'PWSSWGT', 'AGE', 'EDUC', 'FEMALE']
df1 = pd.read_feather(cps_dir / 'cps2000.ft', columns=cols).query('PWSSWGT > 0')
df2 = (pd.read_feather(cps_dir / 'cps2018.ft', columns=cols).query('MONTH == 12')
         .append(pd.read_feather(cps_dir / 'cps2019.ft', columns=cols))
         .query('PWSSWGT > 0'))

In [42]:
ages = [(16, 24), (25, 34), (35, 44), (45, 54), (55, 64), (65, 74)]
educ = [['LTHS', 'HS'], ['SC'], ['COLL', 'ADV'], ['LTHS', 'HS', 'SC', 'COLL', 'ADV']]
sex = [0, 1]

In [45]:
results = pd.DataFrame()
for a, e, s in itertools.product(ages, educ, sex):
    row_name = f'{a[0]}-{a[1]}'
    for yr, df in [('2000', df1), ('Latest', df2)]:
        col_name = f'{yr} {" or ".join(e)} {["Female" if s == 1 else "Male"][0]}'
        data = df.query('@a[0] <= AGE <= @a[1] and EDUC in @e and FEMALE == @s')
        i = data.groupby('LFS').PWSSWGT.sum()
        results.loc[row_name, col_name] = (i.Employed / i.sum()) * 100

In [46]:
results.iloc[1:, :-4].to_csv(data_dir / 'empgroups.csv', index_label='name')
results.iloc[:, -4:].to_csv(data_dir / 'empgroups2.csv', index_label='name')