In [1]:
#dependencies
import re
from bs4 import BeautifulSoup 
from splinter import Browser
import requests
import pandas as pd
from selenium import webdriver 
from selenium.common.exceptions import TimeoutException
from webdriver_manager.chrome import ChromeDriverManager
import json
import time
from tqdm.notebook import tqdm 

<h4>Start cells here if {state}_geo file has not been FIPS encoded

In [2]:
fips_check = pd.read_csv("data/state_data/geo/geocoded/geo_me.csv") #- original geocoded file
fips_check = fips_check.drop(["Unnamed: 0.1"], axis =1) 
fips_check.head() #10912

Unnamed: 0.1,Unnamed: 0,LoanNumber,DateApproved,SBAOfficeCode,ProcessingMethod,BorrowerName,BorrowerAddress,BorrowerCity,BorrowerState,BorrowerZip,...,LongName,TRIIndustrySector,IndustrySubsector,4DigitNAICS,SBAOfficeLabel,Loan_Range,job_Range,full_add,Lat,Long
0,444832,5508457004,4/5/20,172,PPP,"INTERMED, P.A.",100 Gannett Dr Suite C,SOUTH PORTLAND,ME,4106.0,...,Offices of Physicians (except Mental Health Sp...,999 Other,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"100 Gannett Dr Suite C, SOUTH PORTLAND, ME",43.636154,-70.355718
1,444833,8722078706,4/8/21,172,PPP,SWEETSER,50 Moody St,Saco,ME,4072.0,...,Offices of Mental Health Practitioners (except...,999 Other,6213 Offices of Other Health Practitioners,6213.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"50 Moody St, Saco, ME",43.517756,-70.435568
2,444834,4478017005,4/3/20,172,PPP,PENOBSCOT COMMUNITY HEALTH CENTER,103 Maine Avenue,BANGOR,ME,4401.0,...,Offices of Physicians (except Mental Health Sp...,999 Other,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"103 Maine Avenue, BANGOR, ME",44.801223,-68.807735
3,444835,4752408810,4/16/21,172,PPP,SPURWINK SERVICES INCORPORATED,901 Washington Ave Ste 100,Portland,ME,4103.0,...,Other Residential Care Facilities,999 Other,6239 Other Residential Care Facilities,6239.0,MAINE DISTRICT OFFICE,e $5-10 million,250 to 499,"901 Washington Ave Ste 100, Portland, ME",43.689191,-70.272333
4,444836,4593917108,4/13/20,172,PPP,"SPECTRUM HEALTHCARE PARTNERS, P.A.","324 Gannett Drive,",SOUTH PORTLAND,ME,4106.0,...,Offices of Physicians (except Mental Health Sp...,999 Other,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,250 to 499,"324 Gannett Drive,, SOUTH PORTLAND, ME",43.641674,-70.354535


In [3]:
columns = ['LoanNumber','full_add', 'Lat', 'Long']
fips_check = fips_check[columns]

In [4]:
fips_check_nan = fips_check[fips_check["Lat"].isnull()] # check to make sure no nulls in coords
fips_check_nan 

Unnamed: 0,LoanNumber,full_add,Lat,Long
1469,3886797304,"P.O. Box 207, Oxford, ME",,
3497,1916827107,"PO BOX 116, BERNARD, ME",,


In [5]:
fips_check_notna = fips_check[fips_check["Lat"].notna()] # check to make sure no nulls in coords
fips_check_notna #5078

Unnamed: 0,LoanNumber,full_add,Lat,Long
0,5508457004,"100 Gannett Dr Suite C, SOUTH PORTLAND, ME",43.636154,-70.355718
1,8722078706,"50 Moody St, Saco, ME",43.517756,-70.435568
2,4478017005,"103 Maine Avenue, BANGOR, ME",44.801223,-68.807735
3,4752408810,"901 Washington Ave Ste 100, Portland, ME",43.689191,-70.272333
4,4593917108,"324 Gannett Drive,, SOUTH PORTLAND, ME",43.641674,-70.354535
...,...,...,...,...
4180,7989807001,"103 Main Street, KINGFIELD, ME",44.950061,-70.149805
4181,7994057006,"2046 State Road, Eliot, ME",43.164896,-70.810369
4182,9416187007,"29 First Street, SCARBOROUGH, ME",43.600102,-70.321973
4183,9616798809,"1349 Washington Ave, Portland, ME",43.698961,-70.286492


<h4> Start cells here if partial FIPS already available in {state}_fips_scraped file

In [2]:
fips_check_scraped = pd.read_csv("data/state_data/geo/geo_fips/ME_fips_scraped.csv") 
fips_check_scraped.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,LoanNumber,DateApproved,SBAOfficeCode,ProcessingMethod,BorrowerName,BorrowerAddress,BorrowerCity,...,TRIIndustrySector,IndustrySubsector,4DigitNAICS,SBAOfficeLabel,Loan_Range,job_Range,full_add,Lat,Long,FIPS_z
0,0,444832,444832,5508457004,4/5/20,172,PPP,"INTERMED, P.A.",100 Gannett Dr Suite C,SOUTH PORTLAND,...,999 Other,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"100 Gannett Dr Suite C, SOUTH PORTLAND, ME",43.636154,-70.355718,230050000000000.0
1,1,444833,444833,8722078706,4/8/21,172,PPP,SWEETSER,50 Moody St,Saco,...,999 Other,6213 Offices of Other Health Practitioners,6213.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"50 Moody St, Saco, ME",43.517756,-70.435568,230310100000000.0
2,2,444834,444834,4478017005,4/3/20,172,PPP,PENOBSCOT COMMUNITY HEALTH CENTER,103 Maine Avenue,BANGOR,...,999 Other,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"103 Maine Avenue, BANGOR, ME",44.801223,-68.807735,230190000000000.0
3,3,444835,444835,4752408810,4/16/21,172,PPP,SPURWINK SERVICES INCORPORATED,901 Washington Ave Ste 100,Portland,...,999 Other,6239 Other Residential Care Facilities,6239.0,MAINE DISTRICT OFFICE,e $5-10 million,250 to 499,"901 Washington Ave Ste 100, Portland, ME",43.689191,-70.272333,230050000000000.0
4,4,444836,444836,4593917108,4/13/20,172,PPP,"SPECTRUM HEALTHCARE PARTNERS, P.A.","324 Gannett Drive,",SOUTH PORTLAND,...,999 Other,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,250 to 499,"324 Gannett Drive,, SOUTH PORTLAND, ME",43.641674,-70.354535,230050000000000.0


In [3]:
columns = ['LoanNumber','full_add', 'Lat', 'Long', 'FIPS_z']
fips_check_scraped = fips_check_scraped[columns]

In [4]:
#pd.set_option('display.float_format', '{:.0f}'.format)

In [5]:
#Latitude, Longitude, not included
fips_check_scraped_notna = fips_check_scraped[fips_check_scraped["Lat"].notna()] # check to make sure no nulls in coords
fips_check_scraped_notna #40813

Unnamed: 0,LoanNumber,full_add,Lat,Long,FIPS_z
0,5508457004,"100 Gannett Dr Suite C, SOUTH PORTLAND, ME",43.636154,-70.355718,2.300500e+14
1,8722078706,"50 Moody St, Saco, ME",43.517756,-70.435568,2.303101e+14
2,4478017005,"103 Maine Avenue, BANGOR, ME",44.801223,-68.807735,2.301900e+14
3,4752408810,"901 Washington Ave Ste 100, Portland, ME",43.689191,-70.272333,2.300500e+14
4,4593917108,"324 Gannett Drive,, SOUTH PORTLAND, ME",43.641674,-70.354535,2.300500e+14
...,...,...,...,...,...
4180,7989807001,"103 Main Street, KINGFIELD, ME",44.950061,-70.149805,
4181,7994057006,"2046 State Road, Eliot, ME",43.164896,-70.810369,
4182,9416187007,"29 First Street, SCARBOROUGH, ME",43.600102,-70.321973,
4183,9616798809,"1349 Washington Ave, Portland, ME",43.698961,-70.286492,


In [6]:
#FIPS not included, of Lat, Long available
fips_check_scraped_null = fips_check_scraped_notna[fips_check_scraped_notna["FIPS_z"].isnull()] # check to make sure no nulls in coords
fips_check_scraped_null #1239

Unnamed: 0,LoanNumber,full_add,Lat,Long,FIPS_z
2110,7910567010,"728 MAIN ST, RICHMOND, ME",37.538949,-77.437307,
2129,7939628407,"728 Main St Ste 1, Richmond, ME",40.726457,-73.821711,
3087,2383838510,"37 Atlantic Ave, Wells, ME",43.303921,-70.566396,
3088,4556708308,"1601 Congress St Ste 6, Portland, ME",43.660240,-70.307278,
3089,8890588607,"70 Camden St, Rockport, ME",44.193849,-69.076697,
...,...,...,...,...,...
4180,7989807001,"103 Main Street, KINGFIELD, ME",44.950061,-70.149805,
4181,7994057006,"2046 State Road, Eliot, ME",43.164896,-70.810369,
4182,9416187007,"29 First Street, SCARBOROUGH, ME",43.600102,-70.321973,
4183,9616798809,"1349 Washington Ave, Portland, ME",43.698961,-70.286492,


<h4>FIPS Script

In [7]:
#if geocode file
#lat = fips_check_notna["Lat"].astype(float).astype(str).to_list()
#long = fips_check_notna["Long"].astype(float).astype(str).to_list()

#if FIPS_z file 
lat = fips_check_scraped_null["Lat"].astype(float).astype(str).to_list()
long = fips_check_scraped_null["Long"].astype(float).astype(str).to_list()

In [8]:
#before running script, check for any NaN within Lat, Long fields. 
def sleep():
    time.sleep(1)
    
executable_path = {'executable_path': ChromeDriverManager().install()}
driver = Browser('chrome', **executable_path, headless=False)    
url = 'https://geocoding.geo.census.gov/geocoder/geographies/coordinates?form'
driver.visit(url)

not_found = []
geo_id = []

i=0
for i in tqdm(range(len(lat))):
    #while i < len(lat):
        driver.find_by_id('_x_id').clear()
        driver.find_by_id('_y_id').clear()
        driver.find_by_id('_x_id').fill(long[i])
        driver.find_by_id('_y_id').fill(lat[i])
        driver.find_by_id('proceed').click()
        #sleep()
        #html = driver.html
        soup = BeautifulSoup(driver.html, 'html.parser')


        results = soup.find_all('div', id='pl_gov_census_geo_geocoder_domain_GeographyResult')
        for result in results:
            result_text = result.text

            result_digits = re.findall('\d+', result_text)
            startswith = [s for s in result_digits if s.startswith('23')] #ME
            fips_elements =[element for element in startswith if len(element)>=15]
            
            try:
                #print(f"{i},{lat[i]},{long[i]} is {fips_elements}")
                geo_id.append(fips_elements)
            except (KeyError, IndexError):
                print(f"{i},{lat[i]},{long[i]} Not Found")
                notfound.append(i)
                geo_id.append(None)  
            except (TimeoutException):
                driver.navigate.refresh()
    
        #sleep()
        i=i+1



Current google-chrome version is 99.0.4844
Get LATEST chromedriver version for 99.0.4844 google-chrome
Driver [/Users/ciaraspencer/.wdm/drivers/chromedriver/mac64/99.0.4844.51/chromedriver] found in cache


HBox(children=(FloatProgress(value=0.0, max=1099.0), HTML(value='')))




https://iqss.github.io/dss-webscrape/filling-in-web-forms.html

In [9]:
geo_id_list =geo_id 

Length Check

In [10]:
print(len(geo_id_list))

1099


Matching FIPS to DataFrame

In [11]:
#for geocode file
#fips_fill = fips_check_notna.iloc[:3086] #confirm pair to notna DataFrame
#if FIPS available
fips_fill = fips_check_scraped_null.iloc[:1099]

In [12]:
fips_fill["FIPS_z"] = geo_id_list
fips_fill

Unnamed: 0,LoanNumber,full_add,Lat,Long,FIPS_z
2110,7910567010,"728 MAIN ST, RICHMOND, ME",37.538949,-77.437307,[]
2129,7939628407,"728 Main St Ste 1, Richmond, ME",40.726457,-73.821711,[]
3087,2383838510,"37 Atlantic Ave, Wells, ME",43.303921,-70.566396,[230310340052017]
3088,4556708308,"1601 Congress St Ste 6, Portland, ME",43.660240,-70.307278,[230050020021016]
3089,8890588607,"70 Camden St, Rockport, ME",44.193849,-69.076697,[230139705002006]
...,...,...,...,...,...
4180,7989807001,"103 Main Street, KINGFIELD, ME",44.950061,-70.149805,[230079701011047]
4181,7994057006,"2046 State Road, Eliot, ME",43.164896,-70.810369,[230310370003021]
4182,9416187007,"29 First Street, SCARBOROUGH, ME",43.600102,-70.321973,[230050173053044]
4183,9616798809,"1349 Washington Ave, Portland, ME",43.698961,-70.286492,[230050022002005]


Remove brackets

In [13]:
fips_fill['FIPS_z'] = fips_fill['FIPS_z'].str[0]
fips_fill

Unnamed: 0,LoanNumber,full_add,Lat,Long,FIPS_z
2110,7910567010,"728 MAIN ST, RICHMOND, ME",37.538949,-77.437307,
2129,7939628407,"728 Main St Ste 1, Richmond, ME",40.726457,-73.821711,
3087,2383838510,"37 Atlantic Ave, Wells, ME",43.303921,-70.566396,230310340052017
3088,4556708308,"1601 Congress St Ste 6, Portland, ME",43.660240,-70.307278,230050020021016
3089,8890588607,"70 Camden St, Rockport, ME",44.193849,-69.076697,230139705002006
...,...,...,...,...,...
4180,7989807001,"103 Main Street, KINGFIELD, ME",44.950061,-70.149805,230079701011047
4181,7994057006,"2046 State Road, Eliot, ME",43.164896,-70.810369,230310370003021
4182,9416187007,"29 First Street, SCARBOROUGH, ME",43.600102,-70.321973,230050173053044
4183,9616798809,"1349 Washington Ave, Portland, ME",43.698961,-70.286492,230050022002005


In [14]:
fips_fill.loc[fips_fill["FIPS_z"].isnull()] #check

Unnamed: 0,LoanNumber,full_add,Lat,Long,FIPS_z
2110,7910567010,"728 MAIN ST, RICHMOND, ME",37.538949,-77.437307,
2129,7939628407,"728 Main St Ste 1, Richmond, ME",40.726457,-73.821711,
3240,7713547005,"159 Flight Dr., Auburn, ME",42.886902,-85.774071,
3272,6202277109,"65 Main St, Richmond, ME",37.542424,-77.443738,


Parse for Merge

In [15]:
cols= ["LoanNumber", "FIPS_z"]
fips_fill_parsed = fips_fill[cols]
fips_fill_parsed

Unnamed: 0,LoanNumber,FIPS_z
2110,7910567010,
2129,7939628407,
3087,2383838510,230310340052017
3088,4556708308,230050020021016
3089,8890588607,230139705002006
...,...,...
4180,7989807001,230079701011047
4181,7994057006,230310370003021
4182,9416187007,230050173053044
4183,9616798809,230050022002005


Read in base data

In [16]:
#if from geocode file
#fips_check = pd.read_csv("data/state_data/geo/geocoded/geo_me.csv")
#fips_check = fips_check.drop(["Unnamed: 0.1", "Unnamed: 0.1.1"], axis =1) 
#fips_check

#if from FIPS file
fips_check = pd.read_csv("data/state_data/geo/geo_fips/ME_fips_scraped.csv") 
#fips_check = fips_check.drop(["Unnamed: 0.1", "Unnamed: 0.1.1", "Unnamed: 0"], axis =1) 
fips_check.head() #15796


Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,LoanNumber,DateApproved,SBAOfficeCode,ProcessingMethod,BorrowerName,BorrowerAddress,BorrowerCity,...,TRIIndustrySector,IndustrySubsector,4DigitNAICS,SBAOfficeLabel,Loan_Range,job_Range,full_add,Lat,Long,FIPS_z
0,0,444832,444832,5508457004,4/5/20,172,PPP,"INTERMED, P.A.",100 Gannett Dr Suite C,SOUTH PORTLAND,...,999 Other,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"100 Gannett Dr Suite C, SOUTH PORTLAND, ME",43.636154,-70.355718,230050000000000.0
1,1,444833,444833,8722078706,4/8/21,172,PPP,SWEETSER,50 Moody St,Saco,...,999 Other,6213 Offices of Other Health Practitioners,6213.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"50 Moody St, Saco, ME",43.517756,-70.435568,230310100000000.0
2,2,444834,444834,4478017005,4/3/20,172,PPP,PENOBSCOT COMMUNITY HEALTH CENTER,103 Maine Avenue,BANGOR,...,999 Other,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"103 Maine Avenue, BANGOR, ME",44.801223,-68.807735,230190000000000.0
3,3,444835,444835,4752408810,4/16/21,172,PPP,SPURWINK SERVICES INCORPORATED,901 Washington Ave Ste 100,Portland,...,999 Other,6239 Other Residential Care Facilities,6239.0,MAINE DISTRICT OFFICE,e $5-10 million,250 to 499,"901 Washington Ave Ste 100, Portland, ME",43.689191,-70.272333,230050000000000.0
4,4,444836,444836,4593917108,4/13/20,172,PPP,"SPECTRUM HEALTHCARE PARTNERS, P.A.","324 Gannett Drive,",SOUTH PORTLAND,...,999 Other,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,250 to 499,"324 Gannett Drive,, SOUTH PORTLAND, ME",43.641674,-70.354535,230050000000000.0


Merge with MainFrame

In [17]:
fips_merge = fips_check.merge(fips_fill_parsed, on="LoanNumber", how="left")
fips_merge

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,LoanNumber,DateApproved,SBAOfficeCode,ProcessingMethod,BorrowerName,BorrowerAddress,BorrowerCity,...,IndustrySubsector,4DigitNAICS,SBAOfficeLabel,Loan_Range,job_Range,full_add,Lat,Long,FIPS_z_x,FIPS_z_y
0,0,444832,444832,5508457004,4/5/20,172,PPP,"INTERMED, P.A.",100 Gannett Dr Suite C,SOUTH PORTLAND,...,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"100 Gannett Dr Suite C, SOUTH PORTLAND, ME",43.636154,-70.355718,2.300500e+14,
1,1,444833,444833,8722078706,4/8/21,172,PPP,SWEETSER,50 Moody St,Saco,...,6213 Offices of Other Health Practitioners,6213.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"50 Moody St, Saco, ME",43.517756,-70.435568,2.303101e+14,
2,2,444834,444834,4478017005,4/3/20,172,PPP,PENOBSCOT COMMUNITY HEALTH CENTER,103 Maine Avenue,BANGOR,...,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"103 Maine Avenue, BANGOR, ME",44.801223,-68.807735,2.301900e+14,
3,3,444835,444835,4752408810,4/16/21,172,PPP,SPURWINK SERVICES INCORPORATED,901 Washington Ave Ste 100,Portland,...,6239 Other Residential Care Facilities,6239.0,MAINE DISTRICT OFFICE,e $5-10 million,250 to 499,"901 Washington Ave Ste 100, Portland, ME",43.689191,-70.272333,2.300500e+14,
4,4,444836,444836,4593917108,4/13/20,172,PPP,"SPECTRUM HEALTHCARE PARTNERS, P.A.","324 Gannett Drive,",SOUTH PORTLAND,...,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,250 to 499,"324 Gannett Drive,, SOUTH PORTLAND, ME",43.641674,-70.354535,2.300500e+14,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4180,4180,449012,449012,7989807001,4/8/20,172,PPP,VALLEY GAS COMPANY,103 Main Street,KINGFIELD,...,4543 Direct Selling Establishments,4543.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",10 to 19,"103 Main Street, KINGFIELD, ME",44.950061,-70.149805,,230079701011047
4181,4181,449013,449013,7994057006,4/8/20,172,PPP,LEAPS AND BOUNDS,2046 State Road,Eliot,...,6244 Child Day Care Services,6244.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",10 to 19,"2046 State Road, Eliot, ME",43.164896,-70.810369,,230310370003021
4182,4182,449014,449014,9416187007,4/9/20,172,PPP,"SCARBOROUGH ANIMAL HOSPITAL, P.A.",29 First Street,SCARBOROUGH,...,"5419 Other Professional, Scientific, and Techn...",5419.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",10 to 19,"29 First Street, SCARBOROUGH, ME",43.600102,-70.321973,,230050173053044
4183,4183,449015,449015,9616798809,4/23/21,172,PPS,PARKER'S RESTAURANT INC.,1349 Washington Ave,Portland,...,7225 Restaurants and Other Eating Places,7225.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",20 to 49,"1349 Washington Ave, Portland, ME",43.698961,-70.286492,,230050022002005


If FIPS Encoded

In [18]:
fips_merge["FIPS_z"] = fips_merge["FIPS_z_y"].fillna(fips_merge["FIPS_z_x"])
fips_merge

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,LoanNumber,DateApproved,SBAOfficeCode,ProcessingMethod,BorrowerName,BorrowerAddress,BorrowerCity,...,4DigitNAICS,SBAOfficeLabel,Loan_Range,job_Range,full_add,Lat,Long,FIPS_z_x,FIPS_z_y,FIPS_z
0,0,444832,444832,5508457004,4/5/20,172,PPP,"INTERMED, P.A.",100 Gannett Dr Suite C,SOUTH PORTLAND,...,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"100 Gannett Dr Suite C, SOUTH PORTLAND, ME",43.636154,-70.355718,2.300500e+14,,2.3005e+14
1,1,444833,444833,8722078706,4/8/21,172,PPP,SWEETSER,50 Moody St,Saco,...,6213.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"50 Moody St, Saco, ME",43.517756,-70.435568,2.303101e+14,,2.3031e+14
2,2,444834,444834,4478017005,4/3/20,172,PPP,PENOBSCOT COMMUNITY HEALTH CENTER,103 Maine Avenue,BANGOR,...,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"103 Maine Avenue, BANGOR, ME",44.801223,-68.807735,2.301900e+14,,2.3019e+14
3,3,444835,444835,4752408810,4/16/21,172,PPP,SPURWINK SERVICES INCORPORATED,901 Washington Ave Ste 100,Portland,...,6239.0,MAINE DISTRICT OFFICE,e $5-10 million,250 to 499,"901 Washington Ave Ste 100, Portland, ME",43.689191,-70.272333,2.300500e+14,,2.3005e+14
4,4,444836,444836,4593917108,4/13/20,172,PPP,"SPECTRUM HEALTHCARE PARTNERS, P.A.","324 Gannett Drive,",SOUTH PORTLAND,...,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,250 to 499,"324 Gannett Drive,, SOUTH PORTLAND, ME",43.641674,-70.354535,2.300500e+14,,2.3005e+14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4180,4180,449012,449012,7989807001,4/8/20,172,PPP,VALLEY GAS COMPANY,103 Main Street,KINGFIELD,...,4543.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",10 to 19,"103 Main Street, KINGFIELD, ME",44.950061,-70.149805,,230079701011047,230079701011047
4181,4181,449013,449013,7994057006,4/8/20,172,PPP,LEAPS AND BOUNDS,2046 State Road,Eliot,...,6244.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",10 to 19,"2046 State Road, Eliot, ME",43.164896,-70.810369,,230310370003021,230310370003021
4182,4182,449014,449014,9416187007,4/9/20,172,PPP,"SCARBOROUGH ANIMAL HOSPITAL, P.A.",29 First Street,SCARBOROUGH,...,5419.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",10 to 19,"29 First Street, SCARBOROUGH, ME",43.600102,-70.321973,,230050173053044,230050173053044
4183,4183,449015,449015,9616798809,4/23/21,172,PPS,PARKER'S RESTAURANT INC.,1349 Washington Ave,Portland,...,7225.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",20 to 49,"1349 Washington Ave, Portland, ME",43.698961,-70.286492,,230050022002005,230050022002005


Drop Extra FIPS Columns

In [19]:
fips_merge_drop = fips_merge.drop(["FIPS_z_x", "FIPS_z_y"], axis=1)
fips_merge_drop

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,LoanNumber,DateApproved,SBAOfficeCode,ProcessingMethod,BorrowerName,BorrowerAddress,BorrowerCity,...,TRIIndustrySector,IndustrySubsector,4DigitNAICS,SBAOfficeLabel,Loan_Range,job_Range,full_add,Lat,Long,FIPS_z
0,0,444832,444832,5508457004,4/5/20,172,PPP,"INTERMED, P.A.",100 Gannett Dr Suite C,SOUTH PORTLAND,...,999 Other,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"100 Gannett Dr Suite C, SOUTH PORTLAND, ME",43.636154,-70.355718,2.3005e+14
1,1,444833,444833,8722078706,4/8/21,172,PPP,SWEETSER,50 Moody St,Saco,...,999 Other,6213 Offices of Other Health Practitioners,6213.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"50 Moody St, Saco, ME",43.517756,-70.435568,2.3031e+14
2,2,444834,444834,4478017005,4/3/20,172,PPP,PENOBSCOT COMMUNITY HEALTH CENTER,103 Maine Avenue,BANGOR,...,999 Other,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"103 Maine Avenue, BANGOR, ME",44.801223,-68.807735,2.3019e+14
3,3,444835,444835,4752408810,4/16/21,172,PPP,SPURWINK SERVICES INCORPORATED,901 Washington Ave Ste 100,Portland,...,999 Other,6239 Other Residential Care Facilities,6239.0,MAINE DISTRICT OFFICE,e $5-10 million,250 to 499,"901 Washington Ave Ste 100, Portland, ME",43.689191,-70.272333,2.3005e+14
4,4,444836,444836,4593917108,4/13/20,172,PPP,"SPECTRUM HEALTHCARE PARTNERS, P.A.","324 Gannett Drive,",SOUTH PORTLAND,...,999 Other,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,250 to 499,"324 Gannett Drive,, SOUTH PORTLAND, ME",43.641674,-70.354535,2.3005e+14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4180,4180,449012,449012,7989807001,4/8/20,172,PPP,VALLEY GAS COMPANY,103 Main Street,KINGFIELD,...,999 Other,4543 Direct Selling Establishments,4543.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",10 to 19,"103 Main Street, KINGFIELD, ME",44.950061,-70.149805,230079701011047
4181,4181,449013,449013,7994057006,4/8/20,172,PPP,LEAPS AND BOUNDS,2046 State Road,Eliot,...,999 Other,6244 Child Day Care Services,6244.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",10 to 19,"2046 State Road, Eliot, ME",43.164896,-70.810369,230310370003021
4182,4182,449014,449014,9416187007,4/9/20,172,PPP,"SCARBOROUGH ANIMAL HOSPITAL, P.A.",29 First Street,SCARBOROUGH,...,999 Other,"5419 Other Professional, Scientific, and Techn...",5419.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",10 to 19,"29 First Street, SCARBOROUGH, ME",43.600102,-70.321973,230050173053044
4183,4183,449015,449015,9616798809,4/23/21,172,PPS,PARKER'S RESTAURANT INC.,1349 Washington Ave,Portland,...,999 Other,7225 Restaurants and Other Eating Places,7225.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",20 to 49,"1349 Washington Ave, Portland, ME",43.698961,-70.286492,230050022002005


In [20]:
fips_merge.loc[fips_merge["FIPS_z"].isnull()] #6

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,LoanNumber,DateApproved,SBAOfficeCode,ProcessingMethod,BorrowerName,BorrowerAddress,BorrowerCity,...,4DigitNAICS,SBAOfficeLabel,Loan_Range,job_Range,full_add,Lat,Long,FIPS_z_x,FIPS_z_y,FIPS_z
1469,1469,446301,446301,3886797304,4/29/20,172,PPP,"PATRIOT PRECAST, LLC",P.O. Box 207,Oxford,...,2381.0,MAINE DISTRICT OFFICE,"b $350,000-1 million",20 to 49,"P.O. Box 207, Oxford, ME",,,,,
2110,2110,446942,446942,7910567010,4/8/20,172,PPP,"TRI-STONE INDUSTRIES, LLC",728 MAIN ST,RICHMOND,...,2362.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",20 to 49,"728 MAIN ST, RICHMOND, ME",37.538949,-77.437307,,,
2129,2129,446961,446961,7939628407,2/12/21,172,PPS,TRI-STONE INDUSTRIES LLC,728 Main St Ste 1,Richmond,...,2362.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",20 to 49,"728 Main St Ste 1, Richmond, ME",40.726457,-73.821711,,,
3240,3240,448072,448072,7713547005,4/8/20,172,PPP,FIRESAFE EQUIPMENT CO,159 Flight Dr.,Auburn,...,4239.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",10 to 19,"159 Flight Dr., Auburn, ME",42.886902,-85.774071,,,
3272,3272,448104,448104,6202277109,4/14/20,172,PPP,MAIN STREET FUEL,65 Main St,Richmond,...,4247.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",20 to 49,"65 Main St, Richmond, ME",37.542424,-77.443738,,,
3497,3497,448329,448329,1916827107,4/10/20,172,PPP,A.C. PARSONS LANDSCAPING & GARDEN CENTER,PO BOX 116,BERNARD,...,5617.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",20 to 49,"PO BOX 116, BERNARD, ME",,,,,


In [21]:
geo_ME_fips = fips_merge_drop

Convert to File

In [22]:
geo_ME_fips.to_csv("data/state_data/geo/geo_fips/ME_fips_scraped.csv") 

Review & Compare

In [23]:
geo_ME_fips = pd.read_csv("data/state_data/geo/geo_fips/ME_fips_scraped.csv") 
geo_ME_fips

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,LoanNumber,DateApproved,SBAOfficeCode,ProcessingMethod,BorrowerName,BorrowerAddress,...,TRIIndustrySector,IndustrySubsector,4DigitNAICS,SBAOfficeLabel,Loan_Range,job_Range,full_add,Lat,Long,FIPS_z
0,0,0,444832,444832,5508457004,4/5/20,172,PPP,"INTERMED, P.A.",100 Gannett Dr Suite C,...,999 Other,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"100 Gannett Dr Suite C, SOUTH PORTLAND, ME",43.636154,-70.355718,2.300500e+14
1,1,1,444833,444833,8722078706,4/8/21,172,PPP,SWEETSER,50 Moody St,...,999 Other,6213 Offices of Other Health Practitioners,6213.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"50 Moody St, Saco, ME",43.517756,-70.435568,2.303101e+14
2,2,2,444834,444834,4478017005,4/3/20,172,PPP,PENOBSCOT COMMUNITY HEALTH CENTER,103 Maine Avenue,...,999 Other,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,500 or more,"103 Maine Avenue, BANGOR, ME",44.801223,-68.807735,2.301900e+14
3,3,3,444835,444835,4752408810,4/16/21,172,PPP,SPURWINK SERVICES INCORPORATED,901 Washington Ave Ste 100,...,999 Other,6239 Other Residential Care Facilities,6239.0,MAINE DISTRICT OFFICE,e $5-10 million,250 to 499,"901 Washington Ave Ste 100, Portland, ME",43.689191,-70.272333,2.300500e+14
4,4,4,444836,444836,4593917108,4/13/20,172,PPP,"SPECTRUM HEALTHCARE PARTNERS, P.A.","324 Gannett Drive,",...,999 Other,6211 Offices of Physicians,6211.0,MAINE DISTRICT OFFICE,e $5-10 million,250 to 499,"324 Gannett Drive,, SOUTH PORTLAND, ME",43.641674,-70.354535,2.300500e+14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4180,4180,4180,449012,449012,7989807001,4/8/20,172,PPP,VALLEY GAS COMPANY,103 Main Street,...,999 Other,4543 Direct Selling Establishments,4543.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",10 to 19,"103 Main Street, KINGFIELD, ME",44.950061,-70.149805,2.300797e+14
4181,4181,4181,449013,449013,7994057006,4/8/20,172,PPP,LEAPS AND BOUNDS,2046 State Road,...,999 Other,6244 Child Day Care Services,6244.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",10 to 19,"2046 State Road, Eliot, ME",43.164896,-70.810369,2.303104e+14
4182,4182,4182,449014,449014,9416187007,4/9/20,172,PPP,"SCARBOROUGH ANIMAL HOSPITAL, P.A.",29 First Street,...,999 Other,"5419 Other Professional, Scientific, and Techn...",5419.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",10 to 19,"29 First Street, SCARBOROUGH, ME",43.600102,-70.321973,2.300502e+14
4183,4183,4183,449015,449015,9616798809,4/23/21,172,PPS,PARKER'S RESTAURANT INC.,1349 Washington Ave,...,999 Other,7225 Restaurants and Other Eating Places,7225.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",20 to 49,"1349 Washington Ave, Portland, ME",43.698961,-70.286492,2.300500e+14


In [24]:
#fill in missing Lat, Long, FIPS
geo_ME_fips.loc[geo_ME_fips["FIPS_z"].isnull()] #6

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,LoanNumber,DateApproved,SBAOfficeCode,ProcessingMethod,BorrowerName,BorrowerAddress,...,TRIIndustrySector,IndustrySubsector,4DigitNAICS,SBAOfficeLabel,Loan_Range,job_Range,full_add,Lat,Long,FIPS_z
1469,1469,1469,446301,446301,3886797304,4/29/20,172,PPP,"PATRIOT PRECAST, LLC",P.O. Box 207,...,999 Other,"2381 Foundation, Structure, and Building Exter...",2381.0,MAINE DISTRICT OFFICE,"b $350,000-1 million",20 to 49,"P.O. Box 207, Oxford, ME",,,
2110,2110,2110,446942,446942,7910567010,4/8/20,172,PPP,"TRI-STONE INDUSTRIES, LLC",728 MAIN ST,...,999 Other,2362 Nonresidential Building Construction,2362.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",20 to 49,"728 MAIN ST, RICHMOND, ME",37.538949,-77.437307,
2129,2129,2129,446961,446961,7939628407,2/12/21,172,PPS,TRI-STONE INDUSTRIES LLC,728 Main St Ste 1,...,999 Other,2362 Nonresidential Building Construction,2362.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",20 to 49,"728 Main St Ste 1, Richmond, ME",40.726457,-73.821711,
3240,3240,3240,448072,448072,7713547005,4/8/20,172,PPP,FIRESAFE EQUIPMENT CO,159 Flight Dr.,...,999 Other,4239 Miscellaneous Durable Goods Merchant Whol...,4239.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",10 to 19,"159 Flight Dr., Auburn, ME",42.886902,-85.774071,
3272,3272,3272,448104,448104,6202277109,4/14/20,172,PPP,MAIN STREET FUEL,65 Main St,...,999 Other,4247 Petroleum and Petroleum Products Merchant...,4247.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",20 to 49,"65 Main St, Richmond, ME",37.542424,-77.443738,
3497,3497,3497,448329,448329,1916827107,4/10/20,172,PPP,A.C. PARSONS LANDSCAPING & GARDEN CENTER,PO BOX 116,...,999 Other,5617 Services to Buildings and Dwellings,5617.0,MAINE DISTRICT OFFICE,"a $150,000-350,000",20 to 49,"PO BOX 116, BERNARD, ME",,,
