## CPS Demographics

Data on headship, age, and education, calculated from basic monthly CPS microdata files

In [1]:
import sys
sys.path.append('../src')

import uschartbook.config

from uschartbook.config import *
from uschartbook.utils import *

### Headship

In [2]:
cols = ['QSTNUM', 'AGE', 'YEAR', 'MONTH', 'HHWGT', 'PWSSWGT']
df = pd.concat([pd.read_feather(f'{cps_dir}/cps{year}.ft', columns=cols)
                  .query('AGE > 15') 
                for year in range(1996, 2023)])

headship_rate = (lambda grp: grp.groupby('QSTNUM').HHWGT.first().sum()
                 / grp.PWSSWGT.sum())

data = (df.groupby(['YEAR', 'MONTH']).apply(headship_rate)).reset_index()
data['DATE'] = pd.to_datetime(dict(year=data.YEAR, month=data.MONTH, day=1))
data = data.set_index('DATE').drop(['YEAR', 'MONTH'], axis=1)

sm = x13_arima_analysis(data[0])
result = sm.seasadj * 100
result.name = 'value'
result.to_csv(data_dir / 'headship.csv', index_label='date', header=True)

color = 'purple!70!violet'
node = end_node(result, color, date='m', digits=2, full_year=True)
write_txt(text_dir / 'headship_node.txt', node)

low = result.min()
low_dt = dtxt(result.idxmin())['mon1']
lt = result.iloc[-1]
lt_dt = dtxt(result.index[-1])['mon1']
feb = result.loc['2020-02-01']

cl = c_line(color)
text = (f'The headship rate reached a low of {low:.2f} percent during '+
        f'{low_dt}, and is currently {lt:.2f} percent, as of {lt_dt} '+
        f'{cl}. ')
write_txt(text_dir / 'headship.txt', text)
print(text)

The headship rate reached a low of 49.17 percent during May 2020, and is currently 50.05 percent, as of December 2022 (see {\color{purple!70!violet}\textbf{---}}). 


          found in one or more of the estimated spectra.


### Median Age

In [3]:
cols = ['YEAR', 'MONTH', 'AGE', 'BASICWGT', 'LFS']
dfe = pd.concat([pd.read_feather(f'{cps_dir}/cps{year}.ft', 
                                columns=cols)
                for year in range(1989, 1994)])
dfl = pd.concat([pd.read_feather(f'{cps_dir}/cps{year}.ft', 
                                columns=cols + ['PWSSWGT'])
                for year in range(1994, 2023)])
df = pd.concat([dfe, dfl])

grps = [('AGE16PLUS', 'BASICWGT', 'YEAR > 1988 and AGE > 15'), 
        ('PWSSWGT', 'PWSSWGT', 'YEAR > 1988'), 
        ('BASICWGT', 'BASICWGT', 'YEAR < 1998'), 
        ('EMP', 'BASICWGT', 'LFS == "Employed"')]
date = lambda x: pd.to_datetime(dict(year=x.YEAR, 
                                     month=x.MONTH, day=1))
data = pd.concat(
    [df.query(query)
       .groupby(['YEAR', 'MONTH'])
       .apply(lambda x: median_age(x, wgt))
       .rename(name).reset_index()
       .assign(date = date).set_index('date')
       .drop(['YEAR', 'MONTH'], axis=1) 
     for name, wgt, query in grps], axis=1)
data['ALL'] = pd.concat([data.loc['1989':'1993', 'BASICWGT'], 
                         data.loc['1994':, 'PWSSWGT']])
res = data.loc['1990':, ['AGE16PLUS', 'EMP', 'ALL']]
res.to_csv(data_dir / 'median_age.csv', 
           index_label='date', float_format='%g')

In [4]:
res = pd.read_csv(data_dir / 'median_age.csv', 
                  index_col='date', parse_dates=True)
allcol = 'blue!60!cyan'
a16col = 'violet'
empcol = 'red'
grps = [('ALL', allcol, None, 0), ('AGE16PLUS', a16col, 'm', 0.17),
        ('EMP', empcol, None, 0)]

nodes = '\n'.join([end_node(res[srs], col, date=dt, loc=loc, 
                            offset=offset,
                            full_year=True, colon=False) 
                   for (srs, col, dt, offset), loc 
                   in itertools.product(grps, ['end', 'start'])])
write_txt(text_dir / 'med_age_node.txt', nodes) 

def drop_zero(value):
    '''
    Return text string with value but drop .0 if value ends in .0
    '''
    return (f'{value:.0f}' if round(value, 1) % 1 == 0.0 
            else f'{value:.1f}')

ltval = drop_zero(res.ALL.iloc[-1])
ltdt = dtxt(res.index[-1])['mon1']
prval = drop_zero(res.ALL.iloc[0])
prdt = dtxt(res.index[0])['mon1']
ltwval = drop_zero(res.EMP.iloc[-1])
prwval = drop_zero(res.EMP.iloc[0])
text = ('The \\textbf{median age} is the midpoint for the age of a '+
        'group; half of the group is older and half is younger. '+
        'Tracking this point over time summarizes the age composition '+
        'of the group. As a population ages, the median age will '+
        'increase.\n\nThe median age of the overall civilian non'+
        'institutionalized population, calculated from the Current '+
        f'Population Survey (CPS), is {ltval}, as of {ltdt}, compared '+
        f'to {prval} in {prdt} {c_line(allcol)}. The median worker '+
        f'is {ltwval} in {ltdt}, and {prwval} in {prdt} '+
        f'{c_line(empcol)}.')
write_txt(text_dir / 'med_age.txt', text) 
print(text)

The \textbf{median age} is the midpoint for the age of a group; half of the group is older and half is younger. Tracking this point over time summarizes the age composition of the group. As a population ages, the median age will increase.

The median age of the overall civilian noninstitutionalized population, calculated from the Current Population Survey (CPS), is 38.2, as of December 2022, compared to 31.3 in January 1990 (see {\color{blue!60!cyan}\textbf{---}}). The median worker is 41 in December 2022, and 35.5 in January 1990 (see {\color{red}\textbf{---}}).


### Age Groups / Composition

In [5]:
write_txt(text_dir / 'cps_age_dt.txt', dtxt(cps_date())['mon2'])
grps = [('0--15', '0', '15'), ('16--24', '16', '24'),
        ('25--34', '25', '34'), ('35--44', '35', '44'),
        ('45--54', '45', '54'), ('55--64', '55', '64'),
        ('65--74', '65', '74'), ('0--17', '0', '17'),
        ('25--54', '25', '54'), ('65+', '65', '85')]
col = ['AGE', 'PWSSWGT']
lt = (cps_1mo(cps_dir, cps_date(), col)
      .groupby('AGE')[col[-1]].sum())
col = ['AGE', 'BASICWGT']
py = '1989'
pr = (cps_1mo(cps_dir, pd.to_datetime(f'{py}-01-01'), col)
      .groupby('AGE')[col[-1]].sum())
sh, tot = pd.DataFrame(), pd.DataFrame()
totlt, totpr = lt.sum() / 1_000_000, pr.sum() / 1_000_000
tot['Latest'] = (pd.Series({name: lt.loc[a1:a2].sum() 
                          for name, a1, a2 in grps}) 
                 / 1_000_000)
sh['Latest'] = (tot.Latest / totlt) * 100
tot['Prev'] = (pd.Series({name: pr.loc[a1:a2].sum() 
                          for name, a1, a2 in grps})
               / 1_000_000)
sh['Prev'] = (tot.Prev / totpr) * 100
sh.to_csv(data_dir / 'cps_age.csv', index_label='name', 
          float_format='%g')
ltdt = dtxt(cps_date())['mon1']

In [6]:
sht = sh.applymap('{:.1f} percent'.format)
tott = tot.applymap('{:.1f} million'.format)
text = ('The noninstitutionalized civilian population '+
        'used in most labor statistics totals '+
        f'{totlt:.1f} million in {ltdt}. '+
        f'Of this, {sht.loc["0--15","Latest"]} '+
        'are under the working age of 16, '+
        f'equivalent to {tott.loc["0--15","Latest"]} '+
        'people. In 1989, the under-16 population '+
        f'was {sht.loc["0--15","Prev"]} '+
        'of the total. The juvenile population, those '+
        f'under 18, is {tott.loc["0--17","Latest"]}, '+
        f'equivalent to {sht.loc["0--17","Latest"]} of '+
        f'the population in {ltdt}, and compared to '+
        f'{sht.loc["0--17","Prev"]} in 1989.\n\n'+
        'Traditionally, the prime working age is '+
        f'between 25 and 54. In {ltdt}, '+
        f'{tott.loc["25--54", "Latest"]} people, '+
        f'{sht.loc["25--54", "Latest"]} of '+
        f'the population, are age 25 to 54. In 1989, '+
        f'{sht.loc["25--54", "Prev"]} of the '+
        'population is age 25 to 54. The age 55 to 64 group '+
        f'is {sht.loc["55--64", "Latest"]} of '+
        'the population in the latest data and '+
        f'{sht.loc["55--64", "Prev"]} in 1989. Those '+
        'above the age of 65 comprise '+
        f'{sht.loc["65+", "Latest"]} in {ltdt} and '+
        f'{sht.loc["65+", "Prev"]} in 1989. ')
write_txt(text_dir / 'cps_age.txt', text)    
print(text)

The noninstitutionalized civilian population used in most labor statistics totals 329.5 million in January 2023. Of this, 19.3 percent are under the working age of 16, equivalent to 63.6 million people. In 1989, the under-16 population was 23.4 percent of the total. The juvenile population, those under 18, is 72.7 million, equivalent to 22.1 percent of the population in January 2023, and compared to 26.3 percent in 1989.

Traditionally, the prime working age is between 25 and 54. In January 2023, 127.7 million people, 38.7 percent of the population, are age 25 to 54. In 1989, 42.3 percent of the population is age 25 to 54. The age 55 to 64 group is 12.6 percent of the population in the latest data and 8.9 percent in 1989. Those above the age of 65 comprise 17.4 percent in January 2023 and 11.9 percent in 1989. 


### Education

In [7]:
cols = ['EDUCDT', 'EDUC', 'BASICWGT', 'AGE', 'MONTH', 'YEAR', 'LFS', 'HRSACTT', 'FEMALE']

educdt2 = lambda x: np.where(x.EDUCDT == 'Some college but no degree', 'SCND', 
                    np.where(x.EDUCDT == 'Associate degree-occupational/vocational', 'VOC',
                    np.where(x.EDUCDT == 'Associate degree-academic program', 'AAD', x.EDUC)))

df = cps_12mo(cps_dir, cps_date(), cols).query('AGE > 24').assign(EDUCDT2 = educdt2)
date2000 = pd.to_datetime('2000-12-01')
df2 = cps_12mo(cps_dir, date2000, cols).query('AGE > 24').assign(EDUCDT2 = educdt2)

df3 = df.query('LFS == "Employed"')
df4 = df2.query('LFS == "Employed"')

data = pd.DataFrame()

data['2000'] = df2.groupby('EDUCDT2').BASICWGT.sum() / df2.BASICWGT.sum() * 100
data['latest'] = df.groupby('EDUCDT2').BASICWGT.sum() / df.BASICWGT.sum() * 100

data['2000_emp'] = df4.groupby('EDUCDT2').BASICWGT.sum() / df4.BASICWGT.sum() * 100
data['latest_emp'] = df3.groupby('EDUCDT2').BASICWGT.sum() / df3.BASICWGT.sum() * 100

data = data.loc[['LTHS', 'HS', 'SCND', 'VOC', 'AAD', 'COLL', 'ADV']]

data.index = ['No High School Diploma', 'High School Diploma', 'Some College, No Degree', 
              'Associate Degree,\\\*Vocational',
              'Associate Degree,\\\*Academic', "Bachelor's Degree", 'Advanced Degree']

data.to_csv(data_dir / 'cps_educ.csv', sep=';', index_label='name')

  df = (pd.read_feather(cps_dir / f'cps{cps_year1}.ft', columns=cols)


In [8]:
tot = (df.groupby('EDUC').BASICWGT.sum() / 12_000_000).loc[['LTHS', 'HS', 'SC', 'COLL', 'ADV']]
tot.index = ['No High School Diploma', 'High School Diploma', 'Some College or Associate Degree', 
             "Bachelor's Degree", 'Advanced Degree']
tot.to_csv(data_dir / 'cps_educ_tot.csv', index_label='name', header=True)

ltdate = dtxt(cps_date())['mon1']
write_txt(text_dir / 'cps_ltdate.txt', ltdate)
yrdate = dtxt(cps_date() - pd.DateOffset(years=1))['mon1']
write_txt(text_dir / 'cps_yrdate.txt', yrdate)
ba_adv_tot = tot["Bachelor's Degree"] + tot['Advanced Degree']
ba_adv_sh = (data.loc["Bachelor's Degree", 'latest'] + 
             data.loc['Advanced Degree', 'latest'])

adv_tot = tot['Advanced Degree']
adv_sh = data.loc['Advanced Degree', 'latest']

sc_tot = tot['Some College or Associate Degree']

hs_tot = tot['High School Diploma']

lths_tot = tot['No High School Diploma']

text = (f'Over the year ending {ltdate}, {ba_adv_tot:.1f} million '+
        f'people over the age of 25, or {ba_adv_sh:.1f} percent of the total, '+
        f"have at least a bachelor's degree, with {adv_tot:.1f} million of "+
        f'those, or {adv_sh:.1f} percent of the total, holding '+
        "an advanced degree such as a master's degree, medical or law degree, or PhD. "+
        f'An additional {sc_tot:.1f} million people have some college coursework '+
        f'but no degree or have an associate degree. A total of {hs_tot:.1f} million '+
        f'have a high school diploma but no college, while {lths_tot:.1f} million '+
        f'have no high school diploma.')
write_txt(text_dir / 'cps_educ.txt', text)
print(text)

Over the year ending January 2023, 86.1 million people over the age of 25, or 38.1 percent of the total, have at least a bachelor's degree, with 32.9 million of those, or 14.5 percent of the total, holding an advanced degree such as a master's degree, medical or law degree, or PhD. An additional 56.6 million people have some college coursework but no degree or have an associate degree. A total of 63.7 million have a high school diploma but no college, while 19.7 million have no high school diploma.


In [9]:
ba_adv_sh_pr = (data.loc["Bachelor's Degree", '2000'] + 
                data.loc['Advanced Degree', '2000'])
ba_adv_sh_ch = ba_adv_sh - ba_adv_sh_pr
ba_adv_sh_emp = (data.loc["Bachelor's Degree", 'latest_emp'] + 
                 data.loc['Advanced Degree', 'latest_emp'])
ba_adv_sh_emp_pr = (data.loc["Bachelor's Degree", '2000_emp'] + 
                    data.loc['Advanced Degree', '2000_emp'])
ba_adv_sh_emp_ch = ba_adv_sh_emp - ba_adv_sh_emp_pr

text = ("The share of the population with a bachelor's degree or advanced degree "+
        f"increased by {ba_adv_sh_ch:.1f} percentage points since 2000. The increase "+
        "is even more pronounced among those who are employed; "+
        f"{ba_adv_sh_emp:.1f} percent have a college degree or advanced degree in {ltdate}, an "+
        f"increase of {ba_adv_sh_emp_ch:.1f} percentage points since 2000. ")
write_txt(text_dir / 'cps_educ2.txt', text)
print(text)

The share of the population with a bachelor's degree or advanced degree increased by 12.2 percentage points since 2000. The increase is even more pronounced among those who are employed; 44.2 percent have a college degree or advanced degree in January 2023, an increase of 13.3 percentage points since 2000. 
