## CPS Demographics

Data on headship, age, and education, calculated from basic monthly CPS microdata files

In [1]:
import sys
sys.path.append('../src')

import uschartbook.config

from uschartbook.config import *
from uschartbook.utils import *

### Headship

In [2]:
cols = ['QSTNUM', 'AGE', 'YEAR', 'MONTH', 'HHWGT', 'PWSSWGT']

df = pd.concat([pd.read_feather(f'{cps_dir}/cps{year}.ft', columns=cols)
                  .query('AGE > 15') 
                for year in range(1996, 2022)])

headship_rate = (lambda grp: grp.groupby('QSTNUM').HHWGT.first().sum()
                 / grp.PWSSWGT.sum())

data = (df.groupby(['YEAR', 'MONTH']).apply(headship_rate)).reset_index()
data['DATE'] = pd.to_datetime(dict(year=data.YEAR, month=data.MONTH, day=1))
data = data.set_index('DATE').drop(['YEAR', 'MONTH'], axis=1)

sm = x13_arima_analysis(data[0])
result = sm.seasadj * 100
result.name = 'value'
result.to_csv(data_dir / 'headship.csv', index_label='date', header=True)

node = end_node3(result, 'red' )
write_txt(text_dir / 'headship_node.txt', node)

low = result.min()
low_dt = dtxt(result.idxmin())['mon1']
lt = result.iloc[-1]
lt_dt = dtxt(result.index[-1])['mon1']
feb = result.loc['2020-02-01']

text = (f'The headship rate reached a low of {low:.2f} percent during '+
        f'{low_dt}, and is currently {lt:.2f} percent, as of {lt_dt}. '+
        f'In February 2020, the headship rate was {feb:.2f} percent.')
write_txt(text_dir / 'headship.txt', text)
print(text)

The headship rate reached a low of 49.18 percent during May 2020, and is currently 50.39 percent, as of December 2021. In February 2020, the headship rate was 50.07 percent.


### Age

In [3]:
write_txt(text_dir / 'cps_age_dt.txt', dtxt(cps_date())['mon2'])

dates = [('Latest', cps_date()), 
         ('Prev', cps_date() - pd.DateOffset(years=1)), 
         ('1989', pd.to_datetime('1989-12-01'))]

d = {y: {} for y, d in dates}
t = {y: {} for y, d in dates}

for name, date in dates:
    if date.year > 1993:
        wgt = 'PWSSWGT'
    else: 
        wgt = 'BASICWGT'
    cols = ['AGE', wgt]
    df = cps_12mo(cps_dir, date, cols)

    data = df.groupby('AGE')[wgt].sum()
    tot = data.sum()
    t[name]['tot'] = tot / 12
    d[name]['0--17'] = data.loc[:'17'].sum() / tot
    d[name]['18--29'] = data.loc['18':'29'].sum() / tot
    d[name]['30--44'] = data.loc['30':'44'].sum() / tot
    d[name]['45--64'] = data.loc['45':'64'].sum() / tot
    d[name]['65+'] = data.loc['65':].sum() / tot
    
result = pd.DataFrame(d) * 100
result[['1989', 'Latest']].to_csv(data_dir / 'cps_age.csv', index_label='name')

pop = f'{t["Latest"]["tot"] / 1_000_000:.0f}'
popp = f'{t["1989"]["tot"] / 1_000_000:.0f}'
ldate = dtxt(cps_date())['mon1']
popgr = f'{((t["Latest"]["tot"] / t["Prev"]["tot"]) - 1) * 100:.1f}'

u18 = d['Latest']['0--17'] * 100
prer = d['Latest']['45--64'] * 100
o64 = d['Latest']['65+'] * 100
u18p = d['1989']['0--17'] * 100
prerp = d['1989']['45--64'] * 100
o64p = d['1989']['65+'] * 100

if float(popgr) == 0.6:
    qual = '; '
else:
    qual = ', though'


text = (f'The CPS civilian non-institutionalized population is {pop} '+
        f'million in the year ending {ldate}, with '+
        f'growth of {popgr} percent over the past year{qual} other Census '+
        f'population growth estimates are around 0.6 percent. By age, {u18:.1f} '+
        f'percent are under the age of 18 and {o64:.1f} percent are age '+
        f'65 or older. In 1989, the US population was {popp} million, '+
        f'with {u18p:.1f} percent under 18 and {o64p:.1f} percent 65 or older. The '+
        'pre-retirement age (45--64) share of the population has increased '+
        f'to {prer:.1f} percent in the year ending {ldate} from {prerp:.1f} '+
        'percent in 1989. ')

write_txt(text_dir / 'cps_age.txt', text)    
print(text)

The CPS civilian non-institutionalized population is 325 million in the year ending December 2021, with growth of 0.2 percent over the past year, though other Census population growth estimates are around 0.6 percent. By age, 22.3 percent are under the age of 18 and 17.3 percent are age 65 or older. In 1989, the US population was 244 million, with 26.3 percent under 18 and 12.0 percent 65 or older. The pre-retirement age (45--64) share of the population has increased to 25.0 percent in the year ending December 2021 from 18.9 percent in 1989. 


### Education

In [4]:
cols = ['EDUCDT', 'EDUC', 'BASICWGT', 'AGE', 'MONTH', 'YEAR', 'LFS', 'HRSACTT', 'FEMALE']

educdt2 = lambda x: np.where(x.EDUCDT == 'Some college but no degree', 'SCND', 
                   np.where(x.EDUCDT == 'Associate degree-occupational/vocational', 'VOC',
                   np.where(x.EDUCDT == 'Associate degree-academic program', 'AAD', x.EDUC)))

df = cps_12mo(cps_dir, cps_date(), cols).query('AGE > 24').assign(EDUCDT2 = educdt2)
date2000 = pd.to_datetime('2000-12-01')
df2 = cps_12mo(cps_dir, date2000, cols).query('AGE > 24').assign(EDUCDT2 = educdt2)

df3 = df.query('LFS == "Employed"')
df4 = df2.query('LFS == "Employed"')

data = pd.DataFrame()

data['2000'] = df2.groupby('EDUCDT2').BASICWGT.sum() / df2.BASICWGT.sum() * 100
data['latest'] = df.groupby('EDUCDT2').BASICWGT.sum() / df.BASICWGT.sum() * 100

data['2000_emp'] = df4.groupby('EDUCDT2').BASICWGT.sum() / df4.BASICWGT.sum() * 100
data['latest_emp'] = df3.groupby('EDUCDT2').BASICWGT.sum() / df3.BASICWGT.sum() * 100

data = data.loc[['LTHS', 'HS', 'SCND', 'VOC', 'AAD', 'COLL', 'ADV']]

data.index = ['No High School Diploma', 'High School Diploma', 'Some College, No Degree', 
              'Associate Degree,\\\*Vocational',
              'Associate Degree,\\\*Academic', "Bachelor's Degree", 'Advanced Degree']

data.to_csv(data_dir / 'cps_educ.csv', sep=';', index_label='name')

In [5]:
tot = (df.groupby('EDUC').BASICWGT.sum() / 12000000).loc[['LTHS', 'HS', 'SC', 'COLL', 'ADV']]

tot.index = ['No High School Diploma', 'High School Diploma', 'Some College or Associate Degree', 
             "Bachelor's Degree", 'Advanced Degree']

tot.to_csv(data_dir / 'cps_educ_tot.csv', index_label='name', header=True)

ltdate = dtxt(cps_date())['mon1']

ba_adv_tot = tot["Bachelor's Degree"] + tot['Advanced Degree']
ba_adv_sh = data.loc["Bachelor's Degree", 'latest'] + data.loc['Advanced Degree', 'latest']

adv_tot = tot['Advanced Degree']
adv_sh = data.loc['Advanced Degree', 'latest']

sc_tot = tot['Some College or Associate Degree']

hs_tot = tot['High School Diploma']

lths_tot = tot['No High School Diploma']

text = (f'Over the year ending {ltdate}, {ba_adv_tot:.1f} million '+
        f'people over the age of 25, or {ba_adv_sh:.1f} percent of the total, '+
        f"have at least a bachelor's degree, with {adv_tot:.1f} million of "+
        f'those, or {adv_sh:.1f} percent of the total, holding '+
        "an advanced degree such as a master's degree, medical or law degree, or PhD. "+
        f'An additional {sc_tot:.1f} million people have some college coursework '+
        f'but no degree or have an associate degree. A total of {hs_tot:.1f} million '+
        f'have a high school diploma but no college, while {lths_tot:.1f} million '+
        f'have no high school diploma.')
write_txt(text_dir / 'cps_educ.txt', text)
write_txt(text_dir / 'cps_ltdate.txt', ltdate)
print(text)

Over the year ending December 2021, 84.3 million people over the age of 25, or 37.6 percent of the total, have at least a bachelor's degree, with 31.9 million of those, or 14.2 percent of the total, holding an advanced degree such as a master's degree, medical or law degree, or PhD. An additional 56.7 million people have some college coursework but no degree or have an associate degree. A total of 63.2 million have a high school diploma but no college, while 20.0 million have no high school diploma.


In [6]:
ba_adv_sh_pr = data.loc["Bachelor's Degree", '2000'] + data.loc['Advanced Degree', '2000']

ba_adv_sh_ch = ba_adv_sh - ba_adv_sh_pr

ba_adv_sh_emp = data.loc["Bachelor's Degree", 'latest_emp'] + data.loc['Advanced Degree', 'latest_emp']

ba_adv_sh_emp_pr = data.loc["Bachelor's Degree", '2000_emp'] + data.loc['Advanced Degree', '2000_emp']
ba_adv_sh_emp_ch = ba_adv_sh_emp - ba_adv_sh_emp_pr

text = ("The share of the population with a bachelor's degree or advanced degree "+
        f"increased by {ba_adv_sh_ch:.1f} percentage points since 2000. The increase "+
        "is even more pronounced among those who are employed; "+
        f"{ba_adv_sh_emp:.1f} percent have a college degree or advanced degree in {ltdate}, an "+
        f"increase of {ba_adv_sh_emp_ch:.1f} percentage points since 2000. ")

write_txt(text_dir / 'cps_educ2.txt', text)

write_txt(text_dir / 'cps_ltdate.txt', ltdate)