In [36]:
import pandas as pd
import numpy as np

In [37]:
path = '/home/brian/Documents/CPS/data/clean/'

cols = ['LFS', 'BASICWGT', 'PWSSWGT', 'AGE', 'MONTH', 'YEAR', 'FEMALE']

In [38]:
df = pd.concat([pd.read_feather(f'{path}cps{year}.ft', columns=cols) 
                for year in range(1989, 2020)], sort=False)

df['WGT'] = np.where(df['YEAR'] > 1993, df['PWSSWGT'], df['BASICWGT'])
df['DATE'] = pd.to_datetime(dict(year=df.YEAR, month=df.MONTH, day=1))
data = df.groupby(['DATE', 'LFS']).WGT.sum().unstack().rename({'nan': 'Children'}, axis=1) / 1000000
file = '/home/brian/Documents/uschartbook/chartbook/data/cps_lfs.csv'
data.to_csv(file, index_label='date')

In [39]:
datelt = data.index[-1].strftime('%B %Y')

In [40]:
epop = data["Employed"].iloc[-1] / data.iloc[-1].sum() * 100

In [41]:
text = (f'As of {datelt}, {data["Employed"].iloc[-1]:.1f} million '+
        'people are employed (including self-employment). ')

file = '/home/brian/Documents/uschartbook/chartbook/text/cps_lfs0.txt'
with open(file, 'w') as wf:
    wf.write(text)


text = ('Labor provided within a household is not captured by GDP compilation '+
        'methods (expenditures, output, or income), though household surveys '+
        'offer some insight into this important category of labor. The '+
        'number of people who are considered employed divided by the total '+
        'population is the employment rate or employment-to-population ratio, '+
        f'which is {epop:.1f} percent as of {datelt}.')

In [42]:
file = '/home/brian/Documents/uschartbook/chartbook/text/cps_lfs.txt'
with open(file, 'w') as wf:
    wf.write(text)

In [43]:
unemp = (data["Unemployed"].iloc[-1] / (data["Unemployed"].iloc[-1] + data["Employed"].iloc[-1])) * 100
lfpr = (data["Unemployed"].iloc[-1] + data["Employed"].iloc[-1]) / data.iloc[-1].sum() * 100

In [44]:
text = (f'As of {datelt}, there are {data["Unemployed"].iloc[-1]:.1f} million '+
        'unemployed people. The combined group of employed and unemployed people '+
        'is the labor force. The number of unemployed people divided by the number '+
        f'of people in the labor force is the unemployment rate, currently {unemp:.1f} percent. '+
        'The number of people in the labor force divided by the total population is the '+
        f'labor force participation rate, currently {lfpr:.1f} percent.')

In [45]:
text

'As of October 2019, there are 5.6 million unemployed people. The combined group of employed and unemployed people is the labor force. The number of unemployed people divided by the number of people in the labor force is the unemployment rate, currently 3.4 percent. The number of people in the labor force divided by the total population is the labor force participation rate, currently 51.1 percent.'

In [46]:
file = '/home/brian/Documents/uschartbook/chartbook/text/cps_lfs2.txt'
with open(file, 'w') as wf:
    wf.write(text)

In [47]:
cols2 = cols + ['NILFREASON']

In [48]:
datalt = pd.read_feather(f'{path}cps2019.ft', columns=cols2)

In [49]:
student = (datalt.query('MONTH == 10 and NILFREASON == "School"').PWSSWGT.sum() / 1000000)
care = (datalt.query('MONTH == 10 and NILFREASON == "Family"').PWSSWGT.sum() / 1000000)
disill = (datalt.query('MONTH == 10 and NILFREASON == "Disabled/Ill"').PWSSWGT.sum() / 1000000)
disc = (datalt.query('MONTH == 10 and NILFREASON == "Discouraged"').PWSSWGT.sum() / 1000000)
ret = (datalt.query('MONTH == 10 and NILFREASON == "Retired"').PWSSWGT.sum() / 1000000)

In [50]:
text = ('This category is about half of the population, on average, and '+
        f'totals {data["NILF"].iloc[-1]:.1f} million in {datelt}. The '+
        f'category is comprised of children ({data["Children"].iloc[-1]:.1f}'+
        f' million), students ({student:.1f} million), unpaid caregivers '+
        f'({care:.1f} million), those unable to work due to disability or illness '+
        f'({disill:.1f} million), those who want a job but have given up looking ({disc:.1f} '+
        f'million), and retirees and the elderly ({ret:.1f} million).')

In [51]:
text

'This category is about half of the population, on average, and totals 98.1 million in October 2019. The category is comprised of children (60.7 million), students (18.5 million), unpaid caregivers (12.2 million), those unable to work due to disability or illness (14.2 million), those who want a job but have given up looking (4.1 million), and retirees and the elderly (47.3 million).'

In [52]:
file = '/home/brian/Documents/uschartbook/chartbook/text/cps_lfs3.txt'
with open(file, 'w') as wf:
    wf.write(text)

In [53]:
data['Total'] = data[['Employed', 'Unemployed', 'NILF']].sum(axis=1)

In [54]:
result = data.drop('Children', axis=1).resample('QS').mean()

In [55]:
def growth_contrib_ann(df, srs):
    '''Calculate df column contribution to srs growth'''
    dft = df.diff(4)
    dft = dft.div(dft[srs], axis=0)
    c = dft.multiply(df[srs].pct_change(4) * 100, axis=0)
    return c.round(2)

In [56]:
file = '/home/brian/Documents/uschartbook/chartbook/data/cps_lfs2.csv'
growth_contrib_ann(result, 'Total').to_csv(file, index_label='date')

### LFS Table

In [12]:
import itertools

In [113]:
cols = ['LFS', 'PWSSWGT', 'PWSSWGT', 'AGE', 'MONTH', 'YEAR', 'MJH',
        'FEMALE', 'NILFREASON', 'UNEMPTYPE', 'PTECON', 'WORKFT']

df = pd.read_feather(f'{path}cps2019.ft', columns=cols).query('AGE >= 15 and MONTH == 10')
df2 = pd.read_feather(f'{path}cps2018.ft', columns=cols).query('AGE >= 15 and MONTH == 10')

In [115]:
age_groups = [(15, 30), (30, 60), (60, 86)]
sex = [0, 1]
d = {'Population': {'query': 'PWSSWGT > 0'}, 
     '\hspace{2mm}Employed': {'query': 'LFS == "Employed"'}, 
     '\hspace{4mm}Multiple jobs': {'query': 'MJH == 1'},     
     '\hspace{4mm}Full-time': {'query': 'WORKFT == 1'},  
     '\hspace{4mm}Part-time': {'query': 'WORKFT == 0'}, 
     '\hspace{6mm}Economic reasons': {'query': 'PTECON == 1'},
     '\hspace{2mm}Unemployed': {'query': 'LFS == "Unemployed"'}, 
     '\hspace{2mm}Not in Labor Force': {'query': 'LFS == "NILF"'}, 
     '\hspace{4mm}Discouraged': {'query': 'NILFREASON == "Discouraged"'}, 
     '\hspace{4mm}Disabled/Ill': {'query': 'NILFREASON == "Disabled/Ill"'}, 
     '\hspace{4mm}Family/Care': {'query': 'NILFREASON == "Family"'}, 
     '\hspace{4mm}School': {'query': 'NILFREASON == "School"'}, 
     '\hspace{4mm}Retirement': {'query': 'NILFREASON == "Retired"'}}

d2 = {k: {} for k, v in d.items()}

lf_groups = list(zip(d.keys(), [d[i]['query'] for i in d.keys()])) 

for name, query in lf_groups:
    totval = df.query(query).PWSSWGT.sum()/1000
    d[name]['Total, 15+'] = f'{totval:,.0f}'
    chval = totval - df2.query(query).PWSSWGT.sum()/1000
    d2[name]['Total, 15+'] = f'{chval:,.0f}'

for sex, (agemin, agemax) in itertools.product(sex, age_groups):
    data = df.query('@agemin <= AGE < @agemax and FEMALE == @sex')
    data2 = df2.query('@agemin <= AGE < @agemax and FEMALE == @sex')
    group = f'{["Women" if sex==1 else "Men"][0]}, {agemin}--{agemax-1}'.replace('--85', '+')
    for name, query in lf_groups:
        tmp = data.query(query).PWSSWGT.sum()/1000
        tmp2 = data2.query(query).PWSSWGT.sum()/1000
        d[name][group] = f'{tmp:,.0f}'
        d2[name][group] = f'{tmp - tmp2:,.0f}'

In [116]:
file = '/home/brian/Documents/uschartbook/chartbook/data/lfs_table1.tex'
(pd.DataFrame(d).T.drop('query', axis=1).to_csv(file, 
    sep='&', line_terminator='\\\ ', quotechar=' '))
file = '/home/brian/Documents/uschartbook/chartbook/data/lfs_table2.tex'
pd.DataFrame(d2).T.to_csv(file, sep='&', line_terminator='\\\ ', quotechar=' ')

In [74]:
f'{["Women" if sex==1 else "Men"][0]}, {agemin}--{agemax-1}'.replace('--85', '+')

'Women, 60+'

In [110]:
pd.DataFrame(d).T.drop('query', axis=1).reset_index()

Unnamed: 0,index,Total,"Men, 15--29","Men, 30--59","Men, 60+","Women, 15--29","Women, 30--59","Women, 60+"
0,Population,259845,30426,61334,33927,30205,63629,40324
1,\hspace{2mm}Employed,159067,19337,52443,12138,18481,46423,10245
2,\hspace{4mm}Multiple jobs,8350,844,2682,535,1123,2654,512
3,\hspace{4mm}Full-time,120787,13678,46108,8623,11211,35143,6025
4,\hspace{4mm}Part-time,38280,5658,6336,3515,7270,11281,4220
5,\hspace{6mm}Economic reasons,4057,831,1076,212,689,1035,213
6,\hspace{2mm}Unemployed,5510,1366,1269,271,1012,1316,275
7,\hspace{2mm}Not in Labor Force,95269,9723,7621,21518,10713,15889,29804
8,\hspace{4mm}Discouraged,4025,712,733,465,640,950,524
9,\hspace{4mm}Disabled/Ill,14275,846,3724,2366,524,3964,2851


In [46]:
age_groups = [(15, 30), (30, 60), (60, 86)]
sex = [0, 1]
d = {'Population': {'query': 'BASCICWGT > 0'}, '\hspace{2mm}Employed': {'query': 'LFS == "Employed"'}, 
     '\hspace{4mm}Full-time': {'query': 'WORKFT == 1'}, 
     '\hspace{4mm}Multiple jobs': {'query': 'MJH == 1'}, 
     '\hspace{4mm}Part-time': {'query': 'WORKFT == 0'}, 
     '\hspace{6mm}Economic reasons': {'query': 'PTECON == 1'},
     '\hspace{2mm}Unemployed': {'query': 'LFS == "Unemployed"'}, 
     '\hspace{2mm}Not in Labor Force': {'query': 'LFS == "NILF"'}, 
     '\hspace{4mm}Discouraged': {'query': 'NILFREASON == "Discouraged"'}, 
     '\hspace{4mm}Disabled/Ill': {'query': 'NILFREASON == "Disabled/Ill"'}, 
     '\hspace{4mm}Family/Care': {'query': 'NILFREASON == "Family"'}, 
     '\hspace{4mm}School': {'query': 'NILFREASON == "School"'}, 
     '\hspace{4mm}Retirement': {'query': 'NILFREASON == "Retired"'}}
for sex, (agemin, agemax) in itertools.product(sex, age_groups):
    data = df.query('@agemin <= AGE < @agemax and FEMALE == @sex')
    group = f'{["Women" if sex==1 else "Men"][0]}, {agemin}--{agemax-1}'
    for name, query in zip(d.keys(), [d[i]['query'] for i in d.keys()]):
        tmp = data.query(query).BASICWGT.sum()
        d[name][group] = f'{tmp},.0f'
    d['Population'][group] = data.BASICWGT.sum()
    
    
    d['\hspace{2mm}Employed'][group] = data.query('LFS == "Employed"').BASICWGT.sum()    
    d['\hspace{2mm}Unemployed'][group] = data.query('LFS == "Unemployed"').BASICWGT.sum() 
    d['\hspace{2mm}Not in Labor Force'][group] = data.query('LFS == "NILF"').BASICWGT.sum() 
    d['\hspace{4mm}Multiple jobs'][group] = data.query('MJH == 1').BASICWGT.sum() 
    d['\hspace{4mm}Full-time'][group] = data.query('WORKFT == 1').BASICWGT.sum() 
    d['\hspace{4mm}Part-time'][group] = data.query('WORKFT == 0').BASICWGT.sum() 
    d['\hspace{6mm}Economic reasons'][group] = data.query('PTECON == 1').BASICWGT.sum()
    d['\hspace{4mm}Discouraged'][group] = data.query('NILFREASON == "Discouraged"').BASICWGT.sum() 
    d['\hspace{4mm}Disabled/Ill'][group] = data.query('NILFREASON == "Disabled/Ill"').BASICWGT.sum() 
    d['\hspace{4mm}Family/Care'][group] = data.query('NILFREASON == "Family"').BASICWGT.sum() 
    d['\hspace{4mm}School'][group] = data.query('NILFREASON == "School"').BASICWGT.sum() 
    d['\hspace{4mm}Retirement'][group] = data.query('NILFREASON == "Retired"').BASICWGT.sum() 

In [49]:
file = '/home/brian/Documents/uschartbook/chartbook/data/lfs_table1.tex'
pd.DataFrame(d).T.divide(1000).round().to_csv(file, sep='&', line_terminator='\\\ ', quotechar=' ')

In [70]:
file = '/home/brian/Documents/uschartbook/chartbook/data/lfs_table1.tex'
pd.DataFrame(d).T.drop('query', axis=1).to_csv(file, sep='&', line_terminator='\\\ ', quotechar=' ')

In [None]:
d[][]

In [55]:
list(zip(d.keys(), [d[i]['query'] for i in d.keys()]))

[('Population', 'BASCICWGT > 0'),
 ('\\hspace{2mm}Employed', 'LFS == "Employed"'),
 ('\\hspace{4mm}Full-time', 'WORKFT == 1'),
 ('\\hspace{4mm}Multiple jobs', 'MJH == 1'),
 ('\\hspace{4mm}Part-time', 'WORKFT == 0'),
 ('\\hspace{6mm}Economic reasons', 'PTECON == 1'),
 ('\\hspace{2mm}Unemployed', 'LFS == "Unemployed"'),
 ('\\hspace{2mm}Not in Labor Force', 'LFS == "NILF"'),
 ('\\hspace{4mm}Discouraged', 'NILFREASON == "Discouraged"'),
 ('\\hspace{4mm}Disabled/Ill', 'NILFREASON == "Disabled/Ill"'),
 ('\\hspace{4mm}Family/Care', 'NILFREASON == "Family"'),
 ('\\hspace{4mm}School', 'NILFREASON == "School"'),
 ('\\hspace{4mm}Retirement', 'NILFREASON == "Retired"')]

In [56]:
for name, query in zip(d.keys(), [d[i]['query'] for i in d.keys()]):
    tmp = data.query(query).BASICWGT.sum()
    d[name][group] = f'{tmp},.0f'
    




Population
BASCICWGT > 0
\hspace{2mm}Employed
LFS == "Employed"
\hspace{4mm}Full-time
WORKFT == 1
\hspace{4mm}Multiple jobs
MJH == 1
\hspace{4mm}Part-time
WORKFT == 0
\hspace{6mm}Economic reasons
PTECON == 1
\hspace{2mm}Unemployed
LFS == "Unemployed"
\hspace{2mm}Not in Labor Force
LFS == "NILF"
\hspace{4mm}Discouraged
NILFREASON == "Discouraged"
\hspace{4mm}Disabled/Ill
NILFREASON == "Disabled/Ill"
\hspace{4mm}Family/Care
NILFREASON == "Family"
\hspace{4mm}School
NILFREASON == "School"
\hspace{4mm}Retirement
NILFREASON == "Retired"


In [None]:
for sex, age_group in zip([], [(16, 24), (25, 54), (55, 80)]):
    data = 

In [50]:
d_tmp = d