Get FIPS code for each latitude/longitude coordinate of patient location using FCC Block API
<br>https://geo.fcc.gov/api/census/#!/block/get_block_find

In [16]:
import pandas as pd
import numpy as np
import math
import urllib
import json
import requests

In [3]:
points = pd.read_csv("data/patient_points.csv")
points.head()

Unnamed: 0,LOC_ID,Latitude,Longitude
0,L0,40.533103,-74.205761
1,L1,40.658644,-73.854179
2,L2,40.854103,-74.148751
3,L3,40.818172,-73.051483
4,L4,42.139831,-76.81781


In [6]:
gps_lst = list(zip(points.Latitude.values, points.Longitude.values))
gps_zip = zip(points.Latitude.values, points.Longitude.values)
url_lst = []

In [7]:
for lat, long in gps_zip:
    address = f"https://geo.fcc.gov/api/census/area?lat={lat}&lon={long}&format=json"
    url_lst.append(str(address))

In [9]:
fips_lst = []

In [10]:
for i in url_lst:
    with urllib.request.urlopen(i) as url:
        data = json.loads(url.read().decode())
        fips_lst.append(data['results'][0]['county_fips'])

In [13]:
points['fips'] = np.array(fips_lst)

In [14]:
points.head()

Unnamed: 0,LOC_ID,Latitude,Longitude,fips
0,L0,40.533103,-74.205761,36085
1,L1,40.658644,-73.854179,36081
2,L2,40.854103,-74.148751,34031
3,L3,40.818172,-73.051483,36103
4,L4,42.139831,-76.81781,36015


In [15]:
# Write to csv
points.to_csv("data/patient_loc_fips.csv")

Add rural to loc_id

In [28]:
points = pd.read_csv("data/patient_loc_fips.csv", dtype={"fips": object})
points = points[['LOC_ID', "Latitude", 'Longitude', 'fips']]
points = points.iloc[0:1000, :]

In [29]:
pop = pd.read_csv("data/cleaned_county_pop_density.csv", dtype={"fips": object})

In [30]:
points.head()

Unnamed: 0,LOC_ID,Latitude,Longitude,fips
0,L0,40.533103,-74.205761,36085
1,L1,40.658644,-73.854179,36081
2,L2,40.854103,-74.148751,34031
3,L3,40.818172,-73.051483,36103
4,L4,42.139831,-76.81781,36015


In [31]:
pop.head()

Unnamed: 0,fips,state,county,pop,land_area,pop_density
0,9001,Connecticut,Fairfield County,943332,624.89,1509.596889
1,9003,Connecticut,Hartford County,891720,735.1,1213.059448
2,9005,Connecticut,Litchfield County,180333,920.56,195.89489
3,9007,Connecticut,Middlesex County,162436,369.3,439.848362
4,9009,Connecticut,New Haven County,854757,604.51,1413.966684


In [34]:
# Merge
merged = points.merge(pop, how='left',left_on="fips", right_on="fips")

In [35]:
merged.head()

Unnamed: 0,LOC_ID,Latitude,Longitude,fips,state,county,pop,land_area,pop_density
0,L0,40.533103,-74.205761,36085,New York,Richmond County,476143,58.37,8157.323968
1,L1,40.658644,-73.854179,36081,New York,Queens County,2253858,108.53,20767.142726
2,L2,40.854103,-74.148751,34031,New Jersey,Passaic County,501826,184.59,2718.597974
3,L3,40.818172,-73.051483,36103,New York,Suffolk County,1476601,912.05,1618.991283
4,L4,42.139831,-76.81781,36015,New York,Chemung County,83456,407.35,204.875414


In [36]:
merged.shape

(1000, 9)

In [37]:
merged = merged[["LOC_ID", "pop_density"]]

In [39]:
def is_rural(row):
    if row['pop_density'] < 500:
        return 1
    else:
        return 0

In [40]:
merged['is_rural'] = merged.apply(lambda row: is_rural(row), axis=1)

In [41]:
merged.head()

Unnamed: 0,LOC_ID,pop_density,is_rural
0,L0,8157.323968,0
1,L1,20767.142726,0
2,L2,2718.597974,0
3,L3,1618.991283,0
4,L4,204.875414,1


In [42]:
final = merged[['LOC_ID', 'is_rural']]

In [43]:
final.head()

Unnamed: 0,LOC_ID,is_rural
0,L0,0
1,L1,0
2,L2,0
3,L3,0
4,L4,1


In [44]:
final.to_csv("data/cleaned_loc_rural.csv")