The goal of this notebook is to take each of the census variables I will be using in my model and find which macroeconomic variables I can use to grow the variables used in the model for 2020-2022.

In [1]:
import pandas as pd
import numpy as np

%matplotlib inline

import matplotlib.pyplot as plt
import seaborn as sns

# Load data

In [2]:
us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC"
}

## BEA Data for Estimates
https://www.bea.gov/data/economic-accounts/regional

https://www.bea.gov/data/economic-accounts/national

### Employment by State

In [3]:
employment_state = pd.read_csv('https://raw.githubusercontent.com/jhancuch/sba-loan-credit-analysis/main/data/bea/total_employment_state.csv')

In [4]:
employment_state.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   GeoName  51 non-null     object
 1   2017     51 non-null     int64 
 2   2018     51 non-null     int64 
 3   2019     51 non-null     int64 
 4   2020     51 non-null     int64 
dtypes: int64(4), object(1)
memory usage: 2.1+ KB


In [5]:
employment_state = employment_state.add_prefix('emp_')

In [7]:
employment_state['emp_pct_chg'] = (employment_state['emp_2020'] - employment_state['emp_2019']) / employment_state['emp_2019']

In [9]:
employment_state['stabb'] = ''
for i in range(0, len(employment_state)):
    employment_state.loc[i, 'stabb'] = us_state_to_abbrev[employment_state.loc[i, 'emp_GeoName']]

In [10]:
employment_state

Unnamed: 0,emp_GeoName,emp_2017,emp_2018,emp_2019,emp_2020,emp_pct_chg,stabb
0,Alabama,2649812,2692383,2712704,2612469,-0.03695,AL
1,Alaska,455705,454871,454742,425953,-0.063308,AK
2,Arizona,3740003,3854506,3929402,3826162,-0.026274,AZ
3,Arkansas,1641045,1658921,1664839,1608843,-0.033634,AR
4,California,23549113,24078517,24227563,22743902,-0.061239,CA
5,Colorado,3760540,3857800,3893927,3737075,-0.040281,CO
6,Connecticut,2300011,2314371,2298480,2162427,-0.059193,CT
7,Delaware,584854,597018,602908,570469,-0.053804,DE
8,District of Columbia,903304,914601,915470,860807,-0.05971,DC
9,Florida,12097885,12555591,12761508,12148603,-0.048028,FL


### GDP by State

In [54]:
gdp = pd.read_csv('https://raw.githubusercontent.com/jhancuch/sba-loan-credit-analysis/main/data/bea/gdp_state.csv')

In [55]:
gdp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   GeoName  51 non-null     object 
 1   2018:Q1  51 non-null     float64
 2   2019:Q1  51 non-null     float64
 3   2020:Q1  51 non-null     float64
 4   2021:Q1  51 non-null     float64
 5   2022:Q1  51 non-null     float64
dtypes: float64(5), object(1)
memory usage: 2.5+ KB


In [56]:
gdp = gdp.add_prefix('gdp_')

In [57]:
gdp['pct_chg_19_20'] = (gdp['gdp_2020:Q1'] - gdp['gdp_2019:Q1']) / gdp['gdp_2019:Q1']
gdp['pct_chg_20_21'] = (gdp['gdp_2021:Q1'] - gdp['gdp_2020:Q1']) / gdp['gdp_2020:Q1']
gdp['pct_chg_21_22'] = (gdp['gdp_2020:Q1'] - gdp['gdp_2019:Q1']) / gdp['gdp_2019:Q1']

In [65]:
gdp['stabb'] = ''
for i in range(0, len(gdp)):
    gdp.loc[i, 'stabb'] = us_state_to_abbrev[gdp.loc[i, 'gdp_GeoName']]

In [69]:
gdp

Unnamed: 0,gdp_GeoName,gdp_2018:Q1,gdp_2019:Q1,gdp_2020:Q1,gdp_2021:Q1,gdp_2022:Q1,pct_chg_19_20,pct_chg_20_21,pct_chg_21_22,stabb
0,Alabama,200256.3,202231.1,202069.1,202598.8,206879.7,-0.000801,0.002621,-0.000801,AL
1,Alaska,53399.8,52889.5,52876.1,49820.2,49643.0,-0.000253,-0.057794,-0.000253,AK
2,Arizona,311176.5,319443.4,324209.9,331004.1,341753.8,0.014921,0.020956,0.014921,AZ
3,Arkansas,115453.5,116073.3,116945.8,119313.5,121991.1,0.007517,0.020246,0.007517,AR
4,California,2601862.5,2693526.5,2739612.3,2799129.2,2939039.8,0.01711,0.021725,0.01711,CA
5,Colorado,338845.7,349998.6,357983.5,357382.7,371304.9,0.022814,-0.001678,0.022814,CO
6,Connecticut,249498.2,251236.3,245664.6,240712.2,250200.7,-0.022177,-0.020159,-0.022177,CT
7,Delaware,60748.3,63493.3,63227.4,62865.4,65644.9,-0.004188,-0.005725,-0.004188,DE
8,District of Columbia,122470.1,123440.2,125651.9,123514.9,127280.3,0.017917,-0.017007,0.017917,DC
9,Florida,936276.2,963421.6,968139.8,978444.2,1031018.3,0.004897,0.010644,0.004897,FL


## Census data

### Business patterns by ZIP

In [13]:
bp_zip = pd.read_csv('https://raw.githubusercontent.com/jhancuch/sba-loan-credit-analysis/main/data/business_patterns_zip/zbp20totals.txt', delimiter=',', encoding = 'unicode_escape', engine ='python')

In [14]:
bp_zip1 = bp_zip.rename(columns = {"emp": "num_employees20", "est": "num_establishments20", "ap": "annual_payroll_1000s20"})
bp_zip2 = bp_zip1[['stabbr', 'num_employees20', 'num_establishments20', 'annual_payroll_1000s20']]
bp_zip2 = bp_zip2.iloc[:-1, :]

### Total Population

In [11]:
tot_pop = pd.read_csv('https://raw.githubusercontent.com/jhancuch/sba-loan-credit-analysis/main/data/total_population/tot_population_2020.csv')

In [12]:
tot_pop1 = tot_pop.rename(columns = {"B01003_001E": "total_pop", "NAME": "zip"})
tot_pop2 = tot_pop1.iloc[1:, :].drop(axis=1, columns=['B01003_001M','GEO_ID'])

# Remove ZCTA5 string and convert total_pop to int
tot_pop2['zip'] = tot_pop2['zip'].replace('ZCTA5 ', '', regex=True)
tot_pop2['total_pop'] = tot_pop2['total_pop'].astype(int)

In [15]:
# https://www.census.gov/library/stories/2021/12/us-population-grew-in-2021-slowest-rate-since-founding-of-the-nation.html

tot_pop2['pop_pct_chg_20_21'] = .001

In [16]:
tot_pop2

Unnamed: 0,total_pop,zip,pop_pct_chg_20_21
1,16773,00601,0.001
2,37083,00602,0.001
3,45652,00603,0.001
4,6231,00606,0.001
5,26502,00610,0.001
...,...,...,...
33116,12,99923,0.001
33117,990,99925,0.001
33118,1582,99926,0.001
33119,0,99927,0.001


### Employment Status

In [17]:
emp_status = pd.read_csv('https://raw.githubusercontent.com/jhancuch/sba-loan-credit-analysis/main/data/employment_status/employment2020.csv', header = 1)

In [18]:
emp_status_1 = emp_status[['Geographic Area Name', 
                           'Estimate!!Occupied housing units!!Occupied housing units!!HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2020 INFLATION-ADJUSTED DOLLARS)!!Median household income (dollars)',
                           'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!Less than $20,000!!Less than 20 percent',
                           'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!Less than $20,000!!20 to 29 percent',
                           'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!Less than $20,000!!30 percent or more',
                           'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$20,000 to $34,999!!Less than 20 percent',
                           'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$20,000 to $34,999!!20 to 29 percent',
                           'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$20,000 to $34,999!!30 percent or more',
                           'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$35,000 to $49,999!!Less than 20 percent',
                           'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$35,000 to $49,999!!20 to 29 percent',
                           'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$35,000 to $49,999!!30 percent or more',
                           'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$50,000 to $74,999!!Less than 20 percent',
                           'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$50,000 to $74,999!!20 to 29 percent',
                           'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$50,000 to $74,999!!30 percent or more',
                           'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$75,000 or more!!Less than 20 percent',
                           'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$75,000 or more!!20 to 29 percent',
                           'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$75,000 or more!!30 percent or more'
                          ]]

In [19]:
emp_status_2 = emp_status_1.rename(columns = {'Geographic Area Name': 'zip', 
                                              'Estimate!!Occupied housing units!!Occupied housing units!!HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2020 INFLATION-ADJUSTED DOLLARS)!!Median household income (dollars)': 'houshold_income_2020_median',
                                              'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!Less than $20,000!!Less than 20 percent': 'incm_l_20k_house_pct_l_20',
                                              'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!Less than $20,000!!20 to 29 percent': 'incm_l_20k_house_pct_l_30_gt_20',
                                              'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!Less than $20,000!!30 percent or more': 'incm_l_20k_house_pct_gt_30',
                                              'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$20,000 to $34,999!!Less than 20 percent': 'incm_gt_20k_l_35k_house_pct_l_20',
                                              'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$20,000 to $34,999!!20 to 29 percent': 'incm_gt_20k_l_35k_house_pct_l_30_gt_20',
                                              'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$20,000 to $34,999!!30 percent or more': 'incm_gt_20k_l_35k_house_pct_gt_30',
                                              'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$35,000 to $49,999!!Less than 20 percent': 'incm_gt_35k_l_50k_house_pct_l_20',
                                              'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$35,000 to $49,999!!20 to 29 percent': 'incm_gt_35k_l_50k_house_pct_l_30_gt_20',
                                              'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$35,000 to $49,999!!30 percent or more': 'incm_gt_35k_l_50k_house_pct_gt_30',
                                              'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$50,000 to $74,999!!Less than 20 percent': 'incm_gt_50k_l_75k_house_pct_l_20',
                                              'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$50,000 to $74,999!!20 to 29 percent': 'incm_gt_50k_l_75k_house_pct_l_30_gt_20',
                                              'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$50,000 to $74,999!!30 percent or more': 'incm_gt_50k_l_75k_house_pct_gt_30',
                                              'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$75,000 or more!!Less than 20 percent': 'incm_gt_75k_house_pct_l_20',
                                              'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$75,000 or more!!20 to 29 percent': 'incm_gt_75k_house_pct_l_30_gt_20',
                                              'Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS!!$75,000 or more!!30 percent or more': 'incm_gt_75k_house_pct_gt_30'})

In [20]:
# Remove ZCTA5 string and convert houshold_income_2020_median to int. For non-disclosed figures due to masking concerns, I use the median value of the median household income
emp_status_2['zip'] = emp_status_2['zip'].replace('ZCTA5 ', '', regex=True)

emp_status_2['houshold_income_2020_median'] = emp_status_2['houshold_income_2020_median'].replace('250,000+', '250000')
emp_status_2['houshold_income_2020_median'] = emp_status_2['houshold_income_2020_median'].replace('2,500-', np.nan, regex=True)
emp_status_2['houshold_income_2020_median'] = emp_status_2['houshold_income_2020_median'].replace('-', np.nan, regex=True)

emp_status_2['houshold_income_2020_median'] = emp_status_2['houshold_income_2020_median'].astype(float)
emp_status_2.loc[emp_status_2['houshold_income_2020_median'] == np.nan, 'houshold_income_2020_median'] = emp_status_2['houshold_income_2020_median'].median

### Nonemployer

In [None]:
nonemployer = pd.read_csv('https://raw.githubusercontent.com/jhancuch/sba-loan-credit-analysis/main/data/nonemployer/nonemp18st.txt', delimiter=',', encoding = 'unicode_escape', engine ='python')

In [None]:
nonemployer1 = nonemployer[['ST', 'NAICS', 'RCPTOT']]
nonemployer2 = nonemployer1.rename(columns = {"ST": "state_fips", "NAICS": "naics", "RCPTOT": "revenue_total"})

In [None]:
# Subset to if only the naics is three digits long 
nonemployer3_list = []

for i in range(0, len(nonemployer2)):
    if len(nonemployer2.loc[i, 'naics']) == 3:
        temp_list = list(nonemployer2.iloc[i, :])
        nonemployer3_list.append(temp_list)
        
nonemployer3 = pd.DataFrame(nonemployer3_list, columns=['state_fips', 'naics', 'revenue_total'])

In [None]:
# adjust naics to only two digits
nonemployer3['naics'] = nonemployer3['naics'].astype(str)
nonemployer3['naics'] = nonemployer3['naics'].str.slice(0, 2)

In [None]:
# Merge in state abbreviation and fips

In [None]:
# Calculate % Change

### Manufacturing

### Retail Trade

In [25]:
retail_trade = pd.read_csv('https://raw.githubusercontent.com/jhancuch/sba-loan-credit-analysis/main/data/retail_trade/sales.csv', header = 3, encoding = 'unicode_escape', engine ='python')

In [32]:
retail_trade_1 = retail_trade.iloc[3:71, :]
retail_trade_2 = retail_trade_1[['NAICS Code','2020','2019r','2018r','2017r','2016r','2015r','2014r','2013r','2012','2011', '2010']]
retail_trade_3 = retail_trade_2.rename(columns = {'NAICS Code': 'naics',
                                                 '2019r': '2019',
                                                 '2018r': '2018',
                                                 '2017r': '2017',
                                                 '2016r': '2016',
                                                 '2015r': '2015',
                                                 '2014r': '2014',
                                                 '2013r': '2013'}).reset_index(drop=True)
# Subset to if only the naics is three digits long 
retail_trade_4_list = []

for i in range(0, len(retail_trade_3)):
    if len(retail_trade_3.loc[i, 'naics']) == 3:
        temp_list = list(retail_trade_3.iloc[i, :])
        retail_trade_4_list.append(temp_list)
        
retail_trade_4 = pd.DataFrame(retail_trade_4_list, columns=['naics', '2020', '2019', '2018', '2017', '2016', '2015', '2014', '2013', '2012', '2011', '2010'])

# Convert variables to integers and subset naics code to only two digits
retail_trade_4['naics'] = retail_trade_4['naics'].str.slice(0, 2)
for i in retail_trade_4.columns:
    retail_trade_4[i] = retail_trade_4[i].str.replace(',', '')
    retail_trade_4[i] = retail_trade_4[i].astype(int)

retail_trade_5 = retail_trade_4.groupby(['naics'])[['2020', '2019', '2018', '2017', '2016', '2015', '2014', '2013', '2012', '2011', '2010']].sum().reset_index()

In [46]:
retail_trade_6 = pd.DataFrame(retail_trade_5.iloc[0, 1:] + retail_trade_5.iloc[1, 1:]).reset_index()

In [53]:
pd.pivot(retail_trade_6, columns='index', values=0)

index,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,,,,,,,,,,,5570393.0
1,,,,,,,,,,5402272.0,
2,,,,,,,,,5255425.0,,
3,,,,,,,,5040214.0,,,
4,,,,,,,4848096.0,,,,
5,,,,,,4726111.0,,,,,
6,,,,,4640651.0,,,,,,
7,,,,4459238.0,,,,,,,
8,,,4302229.0,,,,,,,,
9,,4102952.0,,,,,,,,,


In [51]:
retail_trade_6.columns

Index(['index', 0], dtype='object')

### Services

### Wholesale Trade

# Growing data

## Business Patterns by Zip

In [110]:
for i in range(0, len(bp_zip2)):
    bp_zip2.loc[i, 'num_employees21'] = (bp_zip2.loc[i, 'num_employees20'] * (1 + gdp.loc[gdp['stabb'] == bp_zip2.loc[i, 'stabbr'], ['pct_chg_20_21']])).values[0]
    bp_zip2.loc[i, 'num_establishments21'] = (bp_zip2.loc[i, 'num_establishments20'] * (1 + gdp.loc[gdp['stabb'] == bp_zip2.loc[i, 'stabbr'], ['pct_chg_20_21']])).values[0]
    bp_zip2.loc[i, 'annual_payroll_1000s21'] = (bp_zip2.loc[i, 'annual_payroll_1000s20'] * (1 + gdp.loc[gdp['stabb'] == bp_zip2.loc[i, 'stabbr'], ['pct_chg_20_21']])).values[0]

In [112]:
for i in range(0, len(bp_zip2)):
    bp_zip2.loc[i, 'num_employees22'] = (bp_zip2.loc[i, 'num_employees21'] * (1 + gdp.loc[gdp['stabb'] == bp_zip2.loc[i, 'stabbr'], ['pct_chg_21_22']])).values[0]
    bp_zip2.loc[i, 'num_establishments22'] = (bp_zip2.loc[i, 'num_establishments21'] * (1 + gdp.loc[gdp['stabb'] == bp_zip2.loc[i, 'stabbr'], ['pct_chg_21_22']])).values[0]
    bp_zip2.loc[i, 'annual_payroll_1000s22'] = (bp_zip2.loc[i, 'annual_payroll_1000s21'] * (1 + gdp.loc[gdp['stabb'] == bp_zip2.loc[i, 'stabbr'], ['pct_chg_21_22']])).values[0]

## Total Population

## Employment Status

In [None]:
# only grow median income, copy over 2020 distribution to 2021

## Retail Trade