This notebook is to gather PPP data (2021 loans through March 31), join with BLS QCEW data, and calculate PPP penetration for display on ArcGIS as requested by the SBA

Initial settings; Load PPP data

In [1]:
import loans_common2021 as co

import pandas as pd
import zipfile
#from plotnine import *    # python lib to use ggplot
from io import BytesIO
from urllib.request import urlopen

pd.set_option('max_columns', 100)

naics = 'NAICS2'
excsole = False

fpath = '/Users/aligo/Downloads/FEMA recovery data/PPP_loans_from_SBA/'\
                                                    'FOIA 2021-04-01/'
loansNE = co.ReadPPPdata(naics, fpath)

/Users/aligo/Downloads/FEMA recovery data/PPP_loans_from_SBA/FOIA 2021-04-01/public_up_to_150k_9.csv
/Users/aligo/Downloads/FEMA recovery data/PPP_loans_from_SBA/FOIA 2021-04-01/public_up_to_150k_8.csv
/Users/aligo/Downloads/FEMA recovery data/PPP_loans_from_SBA/FOIA 2021-04-01/public_150k_plus.csv
/Users/aligo/Downloads/FEMA recovery data/PPP_loans_from_SBA/FOIA 2021-04-01/public_up_to_150k_3.csv
/Users/aligo/Downloads/FEMA recovery data/PPP_loans_from_SBA/FOIA 2021-04-01/public_up_to_150k_2.csv
/Users/aligo/Downloads/FEMA recovery data/PPP_loans_from_SBA/FOIA 2021-04-01/public_up_to_150k_1.csv
/Users/aligo/Downloads/FEMA recovery data/PPP_loans_from_SBA/FOIA 2021-04-01/public_up_to_150k_5.csv
/Users/aligo/Downloads/FEMA recovery data/PPP_loans_from_SBA/FOIA 2021-04-01/public_up_to_150k_4.csv
/Users/aligo/Downloads/FEMA recovery data/PPP_loans_from_SBA/FOIA 2021-04-01/public_up_to_150k_6.csv
/Users/aligo/Downloads/FEMA recovery data/PPP_loans_from_SBA/FOIA 2021-04-01/public_up_to_150k

## Join County info into each loan 

In [5]:
[loans_y, countyfips] = co.MatchCounties(loansNE, excsole, naics)

unique cities in loan dataset: 3408
Unique matches: 1438
Unique non-matches: 1970




loans in loan dataset: 416286
loans with matches: 361698
loans non-matches: 54588
zip-county pairs in New Englad: 2496
zip-county pairs in New England, deduplicated: 2206
loans in loan dataset: 416286
1st draw loans:  300860
2nd draw loans:  115426
After ZIP match: loans with matches:  416257
After ZIP match: loans non-matches:  29
MANUALLY FIX THE COUNTIES IN EXCEL FILE /Users/aligo/Downloads/FEMA recovery data/ppp addresses MISSwithall2021.xlsx
 AND CHANGE ITS NAME FROM MISS TO MANUAL


### EXECUTE THIS COMMAND ONLY AFTER CHECKING THE EXCEL FILE OF UNMATCHED LOANS in Downloads/

In [6]:
loansc = co.AddManualCounties(loans_y, excsole, naics)

loans in loan dataset: 416286
loans with matches: 416257
loans non-matches: 29


## TOTAL NUMBER OF BUSINESSES - read total number of businesses from US Census CBP

In [14]:
url = urlopen("https://www2.census.gov/programs-surveys/cbp/datasets/2018/cbp18co.zip")
#Download Zipfile and create pandas DataFrame
zipfile = zipfile.ZipFile(BytesIO(url.read()))
cbp = pd.read_csv(zipfile.open('cbp18co.txt'), na_values='N')

cbp['FIPST'] = cbp['fipstate'].astype(str).str.pad(2,fillchar='0')
cbp['area_fips'] = cbp['FIPST'] + cbp['fipscty'].astype(str).str.pad(3,fillchar='0')
cnt = cbp[cbp['FIPST'].isin(co.NEstfips)    # New England counties
          & ~cbp['fipscty'].eq(999)      # exclude fipscty = 999 that are "statewide" totals
          & cbp['naics'].str.contains('^[0-9][0-9]----',regex=True)]    # 2-digit NAICS codes
cnt = cnt.assign( industry_code = cnt['naics'].str.slice(start=0,stop=2) )    # 2-digit NAICS codes, cleaned
cnt['NEstabs'] = cnt[['n<5','n5_9','n10_19','n20_49','n50_99','n100_249','n250_499']
                     ].sum(axis=1, skipna=True)         # num Establishments with < 500 employees
cnt = cnt[['area_fips','industry_code','NEstabs']].set_index('area_fips')
# Add state and county name
cnt = cnt.join(countyfips.set_index('COUNTY'))

# Join Total Num businesses + Num of loans
cnt = cnt.reset_index()
cnt.columns = ['COUNTYfips',naics,'NEstabs','State','COUNTYName']
# adjusts 2-digit NAICS that are joint, e.g. NAICS 31-33 Manufacturing
cnt = co.OverrideNAICS2(cnt)

### Subset scope of loans draws and calculate penetration; change from 1st draw (2020+2021) to 2nd draw

In [16]:
pen = co.CalcPenforScopes(loansc, naics, cnt)

*** First draw (2020 and 2021) ***
Total Number of PPP loans to New England excluding loans with missing info: 295651.0
Total Count of businesses in NE: 393718.0
Total Number of County-NAICS pairs in NE with existing businesses: 1251
Previous Number with penetration > 1: 302
Previous Number with penetration = 0: 24
*** First draw (2020 only) ***
Total Number of PPP loans to New England excluding loans with missing info: 254611.0
Total Count of businesses in NE: 393718.0
Total Number of County-NAICS pairs in NE with existing businesses: 1251
Previous Number with penetration > 1: 195
Previous Number with penetration = 0: 23
*** Second draw (2021) ***
Total Number of PPP loans to New England excluding loans with missing info: 113282.0
Total Count of businesses in NE: 393718.0
Total Number of County-NAICS pairs in NE with existing businesses: 1251
Previous Number with penetration > 1: 46
Previous Number with penetration = 0: 12
*** Both draws (2021) ***
Total Number of PPP loans to New Eng

In [17]:
pen.to_csv( '/Users/aligo/Downloads/FEMA recovery data/PPP_loans_from_SBA/PPPpenetration_2021_CBP_' + naics + '.csv' )