### Census Population Estimates and Projections

February 22, 2022

Brian W. Dew

In [2]:
import sys
sys.path.append('../src')

import uschartbook.config

from uschartbook.config import *
from uschartbook.utils import *

### Population Estimates

In [2]:
# April 2020 onward
url = ('https://api.census.gov/data/2021/pep/natmonthly?'+
       'get=NAME,POP,UNIVERSE,MONTHLY,LASTUPDATE&'+
       f'for=us:1&key={census_key}')
r = requests.get(url).json()
df = (pd.DataFrame(r[1:], columns=r[0]).astype({'MONTHLY': 'int'})
        .query('UNIVERSE == "R" and MONTHLY > 1')
        .sort_values('MONTHLY'))
rel_dt = dtxt(pd.to_datetime(df['LASTUPDATE'].iloc[-1]))['day1']
write_txt(text_dir / 'pop_est_reldt.txt', rel_dt)
df.index = pd.date_range('04-01-2020', '12-01-2022', freq='MS')
data = df['POP'].astype('int')

# 2010 to 2020 data
url = ('https://api.census.gov/data/2019/pep/natmonthly?'+
       'get=NAME,POP,UNIVERSE,MONTHLY&'+
       f'for=us:1&key={census_key}')
r = requests.get(url).json()
df10 = (pd.DataFrame(r[1:], columns=r[0]).astype({'MONTHLY': 'int'})
          .query('UNIVERSE == "R" and MONTHLY > 1')
          .sort_values('MONTHLY'))
df10.index = pd.date_range('04-01-2010', '12-01-2020', freq='MS')
data10 = df10['POP'].astype('int').loc['2010-05-01':'2020-03-01']

# 2000 to 2010 data
url = ('https://api.census.gov/data/2000/pep/int_natmonthly?'+
       f'get=POP,UNIVERSE,MONTHLY&for=us:1&key={census_key}')
r = requests.get(url).json()
df00 = (pd.DataFrame(r[1:], columns=r[0]).query('UNIVERSE == "R"')
          .astype({'MONTHLY': 'int'}).sort_values('MONTHLY'))
df00.index = pd.date_range('04-01-2000', '04-01-2010', freq='MS')
data00 = df00['POP'].astype('int').loc['2000-05-01':]

# 1990 to 2000 data
url = ('https://www2.census.gov/programs-surveys/popest/datasets/'+
       '1990-2000/intercensal/national/us-est90int-07.csv')
r = pd.read_csv(url, header=None, skiprows=1)
r.columns = ['date', 'age', 'TOTAL', 'men', 'women']
df90 = r.query('age == "All Age"')
df90.index = pd.to_datetime(df90.date)
data90 = df90['TOTAL'].astype('int').loc['1990-05-01':]

res = data.append(data10).append(data00).append(data90).sort_index()
res.to_csv(data_dir / 'pop_est_raw.csv', index_label='date')

In [3]:
rel_dt

'December 21, 2021'

### Under 65 Population

In [4]:
# 2010 to 2020 data
url = ('https://api.census.gov/data/2019/pep/charagegroups?'+
       'get=NAME,POP,DATE_CODE,DATE_DESC,AGEGROUP,UNIVERSE&'+
       f'for=us:1&key={census_key}')
r = requests.get(url).json()
df10 = (pd.DataFrame(r[1:], columns=r[0]).astype({'DATE_CODE': 'int'})
          .query('UNIVERSE == "R" and AGEGROUP in ["0","26"] and DATE_CODE > 2'))
df10.index = [pd.to_datetime(df10.DATE_DESC.str[:8]), 
              df10.AGEGROUP]
data10 = df10['POP'].unstack().sort_index().astype(int)

# 2000 to 2010 data
url = ('https://api.census.gov/data/2000/pep/int_charagegroups?'+
       'get=POP,DATE_,DATE_DESC,AGEGROUP,UNIVERSE&'+
       f'for=us:1&key={census_key}')
r = requests.get(url).json()
df00 = (pd.DataFrame(r[1:], columns=r[0]).astype({'DATE_': 'int'})
          .query('UNIVERSE == "R" and AGEGROUP in ["0","26"] and 1 < DATE_ < 12'))
df00.index = [pd.to_datetime(df00.DATE_DESC.str[:8]), 
              df00.AGEGROUP]
data00 = df00['POP'].unstack().sort_index().astype(int)

# 1990 to 2000 data
url = ('https://api.census.gov/data/1990/pep/int_natrespop?'+
       f'get=TOT_POP,YEAR,AGE&key={census_key}')
r = requests.get(url).json()
t = pd.DataFrame(r[1:], columns=r[0]).astype(int)
data90 = t.query('AGE < 65').groupby('YEAR').TOT_POP.sum()
data90.index = pd.to_datetime([f'{i}-07-01' for i in data90.index])

data10['TOT_POP'] = data10['0'] - data10['26']
data00['TOT_POP'] = data00['0'] - data00['26']
result = data90.append(data00.TOT_POP).append(data10.TOT_POP)
result.to_csv(data_dir / 'pop_u65_raw.csv', index_label='date')

### Projections 2017

In [None]:
main = [273.884, 276.587, 279.008]
years = [2016, 2020, 2025]
index = pd.to_datetime([f'{y}-07-01' for y in years])
pr = pd.Series(data=main, index=index, name='PROJ_POP')
pr.to_csv(data_dir / 'pop_u65_proj.csv', index_label='date')

In [None]:
main = [325.511, 327.892, 330.269, 332.639, 334.998,
        337.342, 339.665, 341.963, 344.234, 346.481,
        348.695]
years = [2017, 2018, 2019, 2020, 2021, 2022, 2023,
        2024, 2025, 2026, 2027]
index = pd.to_datetime([f'{y}-07-01' for y in years])
pr = pd.Series(data=main, index=index, name='PROJ_POP')
pr.to_csv(data_dir / 'pop_est_proj.csv', index_label='date')

In [96]:
# Cleaned results and text
d65 = pd.read_csv(data_dir / 'pop_u65_raw.csv', index_col='date', 
                 parse_dates=True)
d65 = d65[~d65.index.duplicated(keep='last')] / 1_000_000
d65.to_csv(data_dir / 'pop_u65.csv', index_label='date')
df = pd.read_csv(data_dir / 'pop_est_raw.csv', index_col='date', 
                 parse_dates=True).rename({'0':'TOT_POP'}, axis=1)
df = df[~df.index.duplicated(keep='last')] / 1_000_000
df.to_csv(data_dir / 'pop_est.csv', index_label='date')
pr = pd.read_csv(data_dir / 'pop_est_proj.csv', index_col='date', 
                 parse_dates=True)
p65 = pd.read_csv(data_dir / 'pop_u65_proj.csv', index_col='date', 
                 parse_dates=True)
pop = int(df.loc[cps_date(), 'TOT_POP'])
ltdt = dtxt(cps_date())['mon1']
clt = c_line('violet')
cl65 = c_line('green!70!blue')
clp = ('(see \\begin{tikzpicture} \draw[magenta!80!white] (0pt,5pt) -- '+
         '(10pt,5pt); \\fill[magenta!80!white] (1ex,1ex) circle (0.5ex);'+
         '\end{tikzpicture})')
cl65p = ('(see \\begin{tikzpicture}[baseline=-2.5pt] \draw[cyan!80!white] '+
         '(-5pt,0pt) -- (5pt,0pt); \color{cyan!80!white}{\pgfuseplotmark'+
         '{triangle}};\end{tikzpicture})')
pyear = '2017'
proj = pr.loc['2025-07-01', 'PROJ_POP']
u65 = d65.TOT_POP.iloc[-1]
u65dt = dtxt(d65.index[-1])['year']
u65pr = p65.loc['2025-07-01', 'PROJ_POP']
text = ('The latest population estimates, released on '+
        '\input{text/pop_est_reldt.txt}\\unskip, show the US '+
        f'resident population is {pop:.1f} million in {ltdt} '+
        f'{clt}. The {pyear}-based projections '+
        'of the future US resident population show a 2025 '+
        f'population of {proj:.1f} million people '+
        f'{clp}. The resident population under age 65 was '+
        f'estimated to be {u65:.1f} million in {u65dt} '+
        f'{cl65} and is projected to be {u65pr:.1f} million '+
        f'in 2025 {cl65p}.')
write_txt(text_dir / 'pop_est.txt', text)
print(text)
dt = dtxt(cps_date())['datetime']
proj_bar = (f'\draw [dashed] (axis cs:{{{dt}}},'+
            '\pgfkeysvalueof{/pgfplots/ymin}) -- '+
            f'(axis cs:{{{dt}}}, \pgfkeysvalueof'+
            f'{{/pgfplots/ymax}}); \n \\absnode{{{{{dt}}}}}'+
            '{228}{\scriptsize \color{black!50}{Projected}}')
write_txt(text_dir / 'pop_est_projbar.txt', proj_bar)

pjdt = '2025-07-01'
pjdt2 = pd.to_datetime(pjdt)
ltdt = cps_date()
lt65 = d65.index[-1]
pr65 = d65.index[-2]
dates = ['2010-07-01', '2000-07-01', '1990-07-01']

tbl = pd.DataFrame({'Tot': (df.loc[[lt65, pr65], 'TOT_POP']
                            .append(df.TOT_POP.loc[dates])),
                    'U65': (d65.loc[[lt65, pr65], 'TOT_POP']
                            .append(d65.TOT_POP.loc[dates]))})
tbl.loc[ltdt, 'Tot'] = df.loc[ltdt, 'TOT_POP']
tbl = tbl.sort_index(ascending=False)
tbl.loc[pjdt2, 'U65'] = p65.loc[pjdt, 'PROJ_POP']
tbl.loc[pjdt2, 'Tot'] = pr.loc[pjdt, 'PROJ_POP']
tbl['O65'] = tbl['Tot'] - tbl['U65']
rn = {'Tot': 'Total Resident Population',
      'U65': '\\hspace{2mm} Under Age 65',
      'O65': '\\hspace{2mm} Over Age 65'}
tbl = tbl.T.rename(rn)
tbl.columns = ([dtxt(tbl.columns[0])['mon2']] + 
               [dtxt(i)['year'] for i in tbl.columns[1:]])
tbl = tbl.applymap('{:,.1f}'.format).replace('nan', '--')
tbl.to_csv(data_dir/'pop_table.tex', sep='&', 
           line_terminator='\\\ ', quotechar=' ')

The latest population estimates, released on \input{text/pop_est_reldt.txt}\unskip, show the US resident population is 332.0 million in January 2022 (see {\color{violet}\textbf{---}}). The 2017-based projections of the future US resident population show a 2025 population of 344.2 million people (see \begin{tikzpicture} \draw[magenta!80!white] (0pt,5pt) -- (10pt,5pt); \fill[magenta!80!white] (1ex,1ex) circle (0.5ex);\end{tikzpicture}). The resident population under age 65 was estimated to be 274.2 million in 2019 (see {\color{green!70!blue}\textbf{---}}) and is projected to be 279.0 million in 2025 (see \begin{tikzpicture}[baseline=-2.5pt] \draw[cyan!80!white] (-5pt,0pt) -- (5pt,0pt); \color{cyan!80!white}{\pgfuseplotmark{triangle}};\end{tikzpicture}).


### Related Measures

In [102]:
url = ('https://api.census.gov/data/2021/pep/natmonthly?'+
       'get=NAME,POP,UNIVERSE,MONTHLY,LASTUPDATE&'+
       f'for=us:1&key={census_key}')
r = requests.get(url).json()
df = (pd.DataFrame(r[1:], columns=r[0])
        .pivot(index='MONTHLY', columns= 'UNIVERSE', 
               values='POP')).sort_index()
df.index = pd.date_range('04-01-2020', '12-01-2022', 
                         freq='MS')
df.to_csv(data_dir / 'pop_universe_raw.csv', 
          index_label='date')

In [155]:
dt = cps_date()
df = pd.read_csv(data_dir / 'pop_universe_raw.csv', 
                 index_col='date', parse_dates=True)
ltdt = dtxt(dt)['mon1']
rpop = df.loc[dt, 'R'] / 1_000_000
ppop = df.loc[dt, 'P'] / 1_000_000
npop = df.loc[dt, 'N'] / 1_000_000
text = (f'As of {ltdt}, the \\textbf{{resident}} population '+
        f'is {rpop:.1f} million, while the more-comprehensive '+
        'resident population \\textbf{including armed forces overseas} '+
        f'is {ppop:.1f} million, and the more-narrow '+
        '\\textbf{civilian noninstitutionalized} population, '+
        f'which is used in labor statistics, is {npop:.1f} '+
        'million.')
write_txt(text_dir / 'pop_universe.txt', text)
print(text, '\n')
hpop = df.loc[dt, 'H'] / 1_000_000
hsh = (hpop / rpop) * 100
txt1 = (f'As of {ltdt}, the \\textbf{{household}} population '+
        f'is {hpop:.1f} million, or {hsh:.1f} percent of the '+
        'total resident population. ')
print(txt1)

As of January 2022, the \textbf{resident} population is 331.4 million, while the more-comprehensive resident population \textbf{including armed forces overseas} is 331.7 million, and the more-narrow \textbf{civilian noninstitutionalized} population, which is used in labor statistics, is 326.7 million. 

As of January 2022, the \textbf{household} population is 323.7 million, or 97.7 percent of the total resident population. 


In [156]:
# From 2020: DEC Redistricting Data (PL 94-171)
d = {'Total': 8239016, 'Institutionalized population': 3753401,
     'Correctional facilities for adults': 1967297,
     'Juvenile facilities': 88115,
     'Nursing facilities/Skilled-nursing facilities': 1627046,
     'Other institutional facilities': 70943,
     'Noninstitutionalized population': 4485615,
     'College/University student housing': 2792097,
     'Military quarters': 328372,
     'Other noninstitutional facilities': 1365146}

In [178]:
s = pd.Series(d)
ts = s[s < 1_000_000].round(-3).apply('{:,.0f}'.format)
def number_rep(value):
    '''Replace some numbers with text'''
    res = str(round(value, 1))
    if (value < 10) & (round(value, 1) % 1 == 0.0):
        res = numbers[res]
    return res
ms = (s[s >= 1_000_000].divide(1_000_000)
      .apply(lambda x: f'{number_rep(x)} million'))
sf = ts.append(ms)
ipop = sf['Institutionalized population']
corpop = sf['Correctional facilities for adults']
ncpop = sf['Nursing facilities/Skilled-nursing facilities']
stpop = sf['College/University student housing']
bpop = sf['Military quarters']
opop = sf['Other noninstitutional facilities']
url2 = ('https://www.census.gov/library/stories/2021/08/'+
        'united-states-group-quarters-in-2020-census.html')

txt2 = ('The \\textbf{group quarters} population is '+
        f'\href{{{url2}}}{{measured}} in depth as part of '+
        'the 2020 Census. The 2020 group quarters population '+
        f'is {sf.Total}, of which {ipop} are institutionalized. '+
        f'Of these, {corpop} are in prisons and jails, and '+
        f'{ncpop} are in nursing and skilled-care facilities. '+
        f'An additional {stpop} people live in dormitories or '+
        f'student housing, {bpop} live in barracks, and '+
        f'{opop} live in other noninstitutional facilities '+
        'such as shelters and group homes.')
write_txt(text_dir / 'pop_hh.txt', txt1+txt2)
print(txt2)

The \textbf{group quarters} population is \href{https://www.census.gov/library/stories/2021/08/united-states-group-quarters-in-2020-census.html}{measured} in depth as part of the 2020 Census. The 2020 group quarters population is 8.2 million, of which 3.8 million are institutionalized. Of these, two million are in prisons and jails, and 1.6 million are in nursing and skilled-care facilities. An additional 2.8 million people live in dormitories or student housing, 328,000 live in barracks, and 1.4 million live in other noninstitutional facilities such as shelters and group homes.


### Growth Rate in Percent

In [98]:
url = ('https://api.census.gov/data/2021/pep/population?'+
       'get=NAME,PPOPCHG_2021,POP_2021,UNIVERSE&for=us:1&'+
       f'key={census_key}')
r = requests.get(url).json()
pc = pd.Series(r[1], index=r[0])['PPOPCHG_2021']
gr = f'{float(pc):.1f} percent'
write_txt(text_dir / 'pop_growth_percent.txt', gr)

### Components of Growth

In [None]:
#Pre-1991 data from Births and Deaths and population 
# https://www.census.gov/library/publications/2006/
# compendia/statab/126ed/vital-statistics.html
# and 
# https://www2.census.gov/programs-surveys/popest/
# tables/1900-1980/national/totals/popclockest.txt
years = [1988, 1989, 1990, 1991]
births88 = [3809000, 3910000, 4041000, 4158000]
deaths88 = [2123000, 2168000, 2150000, 2148000]
total88 = [2210064, 2320248, 2645166, 2688696]
df88 = pd.DataFrame()
df88['BIRTHS'] = pd.Series(data=births88, index=years)
df88['DEATHS'] = pd.Series(data=deaths88, index=years)
df88['TOTAL'] = pd.Series(data=total88, index=years)
df88['NATURALINC'] = df88['BIRTHS'] - df88['DEATHS']
df88['NETMIG'] = df88['TOTAL'] - df88['NATURALINC']

# Copied these manually from here: https://www2.census.gov/programs-surveys/
# popest/tables/1990-2000/estimates-and-change-1990-2000/2000c8_00.txt
years = [2000, 1999, 1998, 1997, 1996, 1995, 1994, 1993, 1992, 1991]
births90 = ['3,966,059',  '3,949,171',  '3,909,345',  '3,892,431',  
            '3,881,967',  '3,926,652',  '3,971,136',  '4,027,125',  
            '4,105,689',  '4,133,265']
deaths90 = ['2,386,995',  '2,359,088',  '2,330,759',  '2,321,933',  
            '2,317,918',  '2,284,363',  '2,282,854',  '2,226,027',  
            '2,180,115',  '2,138,906']
netmig90 = ['878,119', '862,845', '884,272', '930,821', '862,794', 
            '783,884', '763,264', '826,566', '792,335', '698,732']
df90 = pd.DataFrame()
df90['BIRTHS'] = (pd.Series(data=births90, index=years)
                    .str.replace(',', '').astype('int'))
df90['DEATHS'] = (pd.Series(data=deaths90, index=years)
                    .str.replace(',', '').astype('int'))
df90['NETMIG'] = (pd.Series(data=netmig90, index=years)
                    .str.replace(',', '').astype('int'))
df90['NATURALINC'] = df90['BIRTHS'] - df90['DEATHS']

In [None]:
# 2001 to 2010 data csv
url = ('https://www2.census.gov/programs-surveys/popest/datasets/'+
       '2010/2010-eval-estimates/co-est2010-alldata.csv')
df00 = pd.read_csv(url, encoding='iso-8859-1')

# 2011 to 2020 data csv
url = ('https://www2.census.gov/programs-surveys/popest/'+
       'datasets/2010-2020/state/totals/nst-est2020-alldata.csv')
df10 = pd.read_csv(url)

# 2021 file
file = ('https://www2.census.gov/programs-surveys/popest/tables/'+
        '2020-2021/state/totals/NST-EST2021-COMP.xlsx')
df = pd.read_excel(file, index_col=0, usecols='A:F', header=[3])
cols = ['Total', 'NATURALINC', 'BIRTHS', 'DEATHS', 'NETMIG']
df.columns = cols
df21 = df.loc['United States', cols[1:]]

cats = ['BIRTHS', 'DEATHS', 'NATURALINC', 'NETMIG']
res = pd.DataFrame()
for cat, year in itertools.product(cats, range(2001, 2011)):
    data = df00.query('COUNTY == 0').sum().loc[f'{cat}{year}']
    res.at[year, cat] = data
for cat, year in itertools.product(cats, range(2011, 2021)):
    data = df10.query('NAME == "United States"').loc[0, f'{cat}{year}']
    res.at[year, cat] = data
res = df88.append(df90).append(res).append(df21.rename(2021))
res.index = pd.to_datetime([f'{y}-07-01' for y in res.index])
res['TOTAL'] = res['NATURALINC'] + res['NETMIG']
res = res.sort_index()
res.to_csv(data_dir / 'popcomp_raw.csv', index_label='date')

In [13]:
res = pd.read_csv(data_dir / 'popcomp_raw.csv', index_col='date', 
                  parse_dates=True)
(res / 1_000_000).to_csv(data_dir / 'popcomp.csv', index_label='date')
ltdt = dtxt(res.index[-1])['mon1']
ltval = f'{round(res.TOTAL.iloc[-1],-2):,.0f}'
ltb = f'{res.BIRTHS.iloc[-1] / 1_000_000:,.2f}'
ltd = f'{res.DEATHS.iloc[-1] / 1_000_000:,.2f}'
ltni = f'{round(res.NATURALINC.iloc[-1],-2):,.0f}'
ltnm = f'{round(res.NETMIG.iloc[-1],-2):,.0f}'
nii = 'increase' if res.NATURALINC.iloc[-1] > 0 else 'decrease'
nmi = 'increased' if res.NETMIG.iloc[-1] > 0 else 'decreased'
b89 = f'{res.loc["1989-07-01", "BIRTHS"] / 1_000_000:,.2f}'
d89 = f'{res.loc["1989-07-01", "DEATHS"] / 1_000_000:,.2f}'
nm89 = f'{round(res.loc["1989-07-01", "NETMIG"], -2):,.0f}'
cld = c_line('orange!80!red')
clb = c_line('green!90!black')
cbnm = c_box('blue!75!black')
sq = ('(see \\begin{tikzpicture}\draw [pattern=crosshatch, pattern '+
      'color=cyan!80!blue, draw=cyan!80!blue](0,0) rectangle '+
      '(.2,.2); \end{tikzpicture})')
# Population growth estimate retrieved from Census API
gr = Path(text_dir / 'pop_growth_percent.txt').read_text()
text = ('Population growth comes from two sources, natural '+
        'increases (births minus deaths) and net migration. '+
        'In the latest estimate, the US population added '+
        f'{ltval} people over the year ending {ltdt}, a '+
        f'population growth rate of {gr}. There were a '+
        f'total of {ltb} million births {clb}, and {ltd} '+
        f'million deaths {cld}, resulting in a natural '+
        f'{nii} of {ltni} people {sq}. In the same period, '+
        f'net migration from abroad {nmi} the resident '+
        f'population by {ltnm} people {cbnm}. For comparison, '+
        f'in 1989, there were {b89} million births, {d89} '+
        f'million deaths, and {nm89} net migrants to the US. ')
write_txt(text_dir / 'pop_comp.txt', text)
print(text)

Population growth comes from two sources, natural increases (births minus deaths) and net migration. In the latest estimate, the US population added 392,700 people over the year ending July 2021, a population growth rate of 0.1 percent. There were a total of 3.58 million births (see {\color{green!90!black}\textbf{---}}), and 3.43 million deaths (see {\color{orange!80!red}\textbf{---}}), resulting in a natural increase of 148,000 people (see \begin{tikzpicture}\draw [pattern=crosshatch, pattern color=cyan!80!blue, draw=cyan!80!blue](0,0) rectangle (.2,.2); \end{tikzpicture}). In the same period, net migration from abroad increased the resident population by 244,600 people (see\cbox{blue!75!black}). For comparison, in 1989, there were 3.91 million births, 2.17 million deaths, and 578,200 net migrants to the US. 
