In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
from skimpy import clean_columns
import requests
import re

## County of Los Angeles Fire Department (LACoFD) Fire Station Locations

In [14]:
# read in LACoFD station locations
url = 'https://services.arcgis.com/RmCCgQtiZLDCtblq/arcgis/rest/services/LACoFD_Fire_Station_Locations/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson'
data = requests.get(url)
stations = gpd.GeoDataFrame.from_features(data.json())

In [15]:
# set crs
stations = stations.set_crs('EPSG:4326')

In [16]:
# preview
stations.head()

Unnamed: 0,geometry,OBJECTID,STANUM,CITYNAME_A,ADDRESS,CITY,STATEZIP,BN,DIV,REGION,GlobalID,SD,SAND,TELEPHONE,BAGS
0,POINT (-118.79741 34.75939),1,77,COUNTY,46833 Peace Valley Road,Gorman,CA 93243-0002,6,III,North,67422530-bb34-4f72-9cd2-783cacd4858a,5.0,Yes,661-248-5284,Yes
1,POINT (-118.28819 34.75441),2,112,COUNTY,8812 W. Avenue E-8,Lancaster,CA 93535-9662,11,V,North,a369b226-5f3c-45e5-a75f-96225a06faba,,,,
2,POINT (-118.07747 34.69867),3,117,LANCASTER,44851 30th Street East,Lancaster,CA 93535-1338,11,V,North,fe24a921-2033-4a6b-b3ee-b29ac7b1c10a,5.0,No,661-946-0471,Yes
3,POINT (-118.14139 34.69939),4,33,LANCASTER,44947 Date Avenue,Lancaster,CA 93534-3213,11,V,North,9d7ae295-4c19-4ddf-ae80-adb3bc28d4c9,5.0,No,661-948-1180,Yes
4,POINT (-118.2011 34.69282),5,130,LANCASTER,44558 40th Street West,Lancaster,CA 93536-9575,11,V,North,ba30861a-9219-4c88-9ec7-3856e21716c8,5.0,No,661-945-5788,Yes


In [17]:
# remove unecessary columns/variables
stations = stations.drop(columns = [
    'OBJECTID',
    'CITYNAME_A',
    'GlobalID',
    'TELEPHONE',
    'SD',
    'SAND',
    'BAGS',
    'BN'
])

In [18]:
# replace all missing values with NaN
stations = stations.fillna(value = np.nan)

In [19]:
# replace 'None' values with NaN
stations = stations.replace('None', np.nan)

In [20]:
# create and clean zipcode column
stations['ZIPCODE'] = stations['STATEZIP'].str.replace(r'CA\s+', '', regex = True)

# drop 'STATEZIP'
stations = stations.drop(columns = ['STATEZIP'])

In [21]:
# column names to snake case
stations = clean_columns(stations)

In [22]:
# preview cleaned dataframe
stations.head()

Unnamed: 0,geometry,stanum,address,city,div,region,zipcode
0,POINT (-118.79741 34.75939),77,46833 Peace Valley Road,Gorman,III,North,93243-0002
1,POINT (-118.28819 34.75441),112,8812 W. Avenue E-8,Lancaster,V,North,93535-9662
2,POINT (-118.07747 34.69867),117,44851 30th Street East,Lancaster,V,North,93535-1338
3,POINT (-118.14139 34.69939),33,44947 Date Avenue,Lancaster,V,North,93534-3213
4,POINT (-118.2011 34.69282),130,44558 40th Street West,Lancaster,V,North,93536-9575


In [23]:
# save dataframe
stations.to_file('LACoFD_stations.json')

## LACoFD Station Boundary Data

In [37]:
# read in LACoFD station boundaries
url = 'https://services.arcgis.com/RmCCgQtiZLDCtblq/arcgis/rest/services/LACoFD_Fire_Station_Boundaries/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson'
data = requests.get(url)
lacofd_bounds = gpd.GeoDataFrame.from_features(data.json())

In [38]:
# set crs
lacofd_bounds = lacofd_bounds.set_crs('EPSG:4326')

In [39]:
# preview dataset
lacofd_bounds.head()

Unnamed: 0,geometry,OBJECTID,STANUM,ADDRESS,CITY,STATEZIP,BATTID,DIV,Region,Shape__Area,Shape__Length,GlobalID
0,"POLYGON ((-117.80128 34.12354, -117.80075 34.1...",532,64,164 S. Walnut,San Dimas,CA 91773-2620,2,II,East,108202900.0,70429.794192,165df40f-0c64-4a2b-bd2b-71568ec25341
1,"POLYGON ((-117.79084 34.09064, -117.7904 34.09...",533,184,1980 W. Orange Grove Ave.,Pomona,CA 91768-2046,15,VIII,East,121372900.0,62874.156832,5b21df6a-13f9-47ea-850d-ce8079fcaa82
2,"POLYGON ((-117.72208 34.05206, -117.72237 34.0...",534,185,925 Lexington Ave.,Pomona,CA 91766-3256,15,VIII,East,115640400.0,48140.592963,8d70fcbd-facd-4496-85cf-fb49dac29194
3,"POLYGON ((-117.72816 34.08692, -117.72814 34.0...",535,183,710 N. San Antonio Ave,Pomona,CA 91767-4910,15,VIII,East,98384290.0,46557.865399,2ad2aa00-b1b6-4f80-bdff-67ae897a1889
4,"POLYGON ((-117.7454 34.11083, -117.74542 34.10...",536,186,280 E. Bonita Ave.,Pomona,CA 91767-1924,15,VIII,East,64484700.0,50207.948639,174d1800-9d54-4ad6-8865-db98d4bdf3f9


In [40]:
# remove unecessary columns/variables
lacofd_bounds = lacofd_bounds.drop(columns = [
    'OBJECTID',
    'GlobalID',
    'Shape__Length',
    'Shape__Area'
])

In [41]:
# create and clean zipcode column
lacofd_bounds['ZIPCODE'] = lacofd_bounds['STATEZIP'].str.replace(r'CA\s+', '', regex = True)

# drop 'STATEZIP'
lacofd_bounds = lacofd_bounds.drop(columns = ['STATEZIP'])

In [42]:
# column names to snake case
lacofd_bounds = clean_columns(lacofd_bounds)

In [43]:
# preview cleaned dataframe
lacofd_bounds.head()

Unnamed: 0,geometry,stanum,address,city,battid,div,region,zipcode
0,"POLYGON ((-117.80128 34.12354, -117.80075 34.1...",64,164 S. Walnut,San Dimas,2,II,East,91773-2620
1,"POLYGON ((-117.79084 34.09064, -117.7904 34.09...",184,1980 W. Orange Grove Ave.,Pomona,15,VIII,East,91768-2046
2,"POLYGON ((-117.72208 34.05206, -117.72237 34.0...",185,925 Lexington Ave.,Pomona,15,VIII,East,91766-3256
3,"POLYGON ((-117.72816 34.08692, -117.72814 34.0...",183,710 N. San Antonio Ave,Pomona,15,VIII,East,91767-4910
4,"POLYGON ((-117.7454 34.11083, -117.74542 34.10...",186,280 E. Bonita Ave.,Pomona,15,VIII,East,91767-1924


In [79]:
# save dataframe
lacofd_bounds.to_file('LACoFD_station_boundaries.json')

## Los Angeles Fire Department (LAFD) Station Locations

In [56]:
# LAFD station locations
url = 'https://services5.arcgis.com/7nsPwEMP38bSkCjy/arcgis/rest/services/LAFD_FireStations/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson'
data = requests.get(url)
lafd_stations = gpd.GeoDataFrame.from_features(data.json())

In [57]:
# set crs
lafd_stations = lafd_stations.set_crs('EPSG:4326')

In [58]:
# preview data
lafd_stations.head()

Unnamed: 0,geometry,FID,OBJECTID,FS_CD,SHP_ADDR,ADDRESS,ZIP,DEPTNAME,MAILSTOP,HSENO,...,DATE_,FireStationFirstinDate,September4,September5,September6,September7,September8,September9,September10,WeeklyTotals
0,POINT (-118.50059 34.16105),1,1,83,Fire Station 083,4960 BALBOA BLVD,91436,VALLEY BUREAU,MS-880,4960,...,1358381000000.0,83.0,13.0,15.0,13.0,11.0,13.0,7.0,5.0,77.0
1,POINT (-118.45144 34.24239),2,2,7,Fire Station 007,14630 PLUMMER ST,91402,FIRE,MS-878,14630,...,1423613000000.0,7.0,21.0,26.0,30.0,23.0,19.0,30.0,19.0,168.0
2,POINT (-118.25685 34.01275),3,3,14,Fire Station 014,3401 S CENTRAL AV,90011,FIRE,MS-798,3401,...,,14.0,21.0,13.0,20.0,18.0,20.0,22.0,21.0,135.0
3,POINT (-118.55892 34.2768),4,4,8,Fire Station 008,11351 TAMPA AVE,91326,FIRE,MS-806,11351,...,,8.0,4.0,3.0,7.0,2.0,3.0,4.0,5.0,28.0
4,POINT (-118.29344 33.75309),5,5,36,Fire Station 036,1005 N GAFFEY ST,90732,FIRE,MS-,1005,...,,36.0,7.0,7.0,7.0,5.0,8.0,5.0,12.0,51.0


In [63]:
# drop unnecessary columns
lafd_stations = lafd_stations.drop(columns = [
    'FID', 
    'OBJECTID', 
    'SHP_ADDR',
    'MAILSTOP', 
    'HSENO', 
    'PREF_DIR', 
    'STNAME', 
    'STTYPE',
    'SUFF_DIR', 
    'UNITDESC', 
    'TYPE', 
    'CLASS', 
    'X_COORD', 
    'Y_COORD', 
    'DATE_',
    'FireStationFirstinDate', 
    'September4', 
    'September5', 
    'September6',
    'September7', 
    'September8', 
    'September9', 
    'September10',
    'WeeklyTotals'
])

In [65]:
# rename some columns
lafd_stations = lafd_stations.rename(columns = {
    'FS_CD': 'stanum',
    'ZIP': 'zipcode'
})

In [66]:
# column names to snakecase
lafd_stations = clean_columns(lafd_stations)

In [67]:
# preview clean dataframe
lafd_stations.head()

Unnamed: 0,geometry,stanum,address,zipcode,deptname
0,POINT (-118.50059 34.16105),83,4960 BALBOA BLVD,91436,VALLEY BUREAU
1,POINT (-118.45144 34.24239),7,14630 PLUMMER ST,91402,FIRE
2,POINT (-118.25685 34.01275),14,3401 S CENTRAL AV,90011,FIRE
3,POINT (-118.55892 34.2768),8,11351 TAMPA AVE,91326,FIRE
4,POINT (-118.29344 33.75309),36,1005 N GAFFEY ST,90732,FIRE


In [82]:
# save dataset
lafd_stations.to_file('lafd_stations.json')

## Los Angeles Fire Department (LAFD) Station Boundaries

In [70]:
# LAFD station boundaries
url = 'https://services5.arcgis.com/7nsPwEMP38bSkCjy/arcgis/rest/services/LAFD_FirstIn_Districts/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson'
data = requests.get(url)
lafd_bounds = gpd.GeoDataFrame.from_features(data.json())

In [71]:
lafd_bounds = lafd_bounds.set_crs('EPSG:4326')

In [72]:
# preview data
lafd_bounds.head()

Unnamed: 0,geometry,FID,BATTALION_,DIVISION_N,FIRSTIN_DI,RFSNUM,AREA_SQMI,BUREAU,SHAPE_AREA,SHAPE_LEN,Shape__Area,Shape__Length
0,"POLYGON ((-118.41879 33.98934, -118.41565 33.9...",1,4,0,67,67,4.40334,WEST BUREAU,122756900.0,0,16623580.0,29504.527048
1,"POLYGON ((-118.27011 33.99069, -118.26519 33.9...",2,13,0,33,33,3.29332,SOUTH BUREAU,91418660.0,0,12381460.0,16729.625795
2,"MULTIPOLYGON (((-118.43221 34.02402, -118.4289...",3,4,0,62,62,4.073171,WEST BUREAU,113390900.0,0,15367400.0,31401.262229
3,"POLYGON ((-118.24738 34.00565, -118.24738 34.0...",4,13,0,21,21,1.975591,SOUTH BUREAU,54887370.0,0,7437202.0,12915.021283
4,"POLYGON ((-118.30891 34.00846, -118.30779 34.0...",5,13,0,66,66,5.321798,SOUTH BUREAU,148554500.0,0,20124680.0,27210.886991


In [74]:
# drop unnecessary columns
lafd_bounds = lafd_bounds.drop(columns = [
    'FID', 'BATTALION_', 'DIVISION_N', 'FIRSTIN_DI', 'AREA_SQMI', 
    'BUREAU', 'SHAPE_AREA', 'SHAPE_LEN', 'Shape__Area','Shape__Length'
])

In [75]:
# rename some columns
lafd_bounds = lafd_bounds.rename(columns = {'RFSNUM': 'stanum'})

In [76]:
# preview cleaned data
lafd_bounds.head()

Unnamed: 0,geometry,stanum
0,"POLYGON ((-118.41879 33.98934, -118.41565 33.9...",67
1,"POLYGON ((-118.27011 33.99069, -118.26519 33.9...",33
2,"MULTIPOLYGON (((-118.43221 34.02402, -118.4289...",62
3,"POLYGON ((-118.24738 34.00565, -118.24738 34.0...",21
4,"POLYGON ((-118.30891 34.00846, -118.30779 34.0...",66


In [83]:
# save dataset
lafd_bounds.to_file('lafd_station_boundaries.json')

## All fire stations in LA County

In [None]:
# LA County Points of Interests
url = 'https://services.arcgis.com/RmCCgQtiZLDCtblq/arcgis/rest/services/Points_of_Interest/FeatureServer/5/query?outFields=*&where=1%3D1&f=geojson'
data = requests.get(url)
poi = gpd.GeoDataFrame.from_features(data.json())

# All fire stations in LA County
all_stations = poi[poi['cat2'] == 'Fire Stations']

In [None]:
# preview data
all_stations.head()

Unnamed: 0,geometry,OBJECTID,name,label,cat2,cat1,cat3,organization,addrln1,addrln2,city,state,zip,source,source_id,source_date,display,last_update
0,POINT (-118.32898 33.33947),8663,Avalon Fire Department,Avalon Fire Department,Fire Stations,Public Safety,,HIFLD,420 Avalon Canyon Road,,Avalon,CA,90704,U.S. Geological Survey,b414f166-6883-4537-aeae-3152b990fc09,2018-07-19 20:17:43,Y,
1,POINT (-118.50039 33.44118),8664,Los Angeles County Fire Department Station 155,Los Angeles County Fire Department Station 155,Fire Stations,Public Safety,,HIFLD,1 Banning House Road,,Two Harbors,CA,90704,U.S. Geological Survey,{00839135-9AEA-4215-A974-79A27E1DDE89},2017-07-11 19:53:07,Y,
2,POINT (-118.33544 33.33308),8665,Los Angeles County Fire Department Station 55,Los Angeles County Fire Department Station 55,Fire Stations,Public Safety,,HIFLD,945 Avalon Canyon Road,,Avalon,CA,90704,U.S. Geological Survey,{874A7F0C-158D-4456-86A4-B18EB9038C27},2017-07-05 21:27:31,Y,
3,POINT (-118.26386 34.10765),8666,City of Los Angeles Fire Department Station 56,City of Los Angeles Fire Department Station 56,Fire Stations,Public Safety,,HIFLD,2759 Rowena Avenue,,Los Angeles,CA,90039,U.S. Geological Survey,411fe03d-aa99-48ee-bb06-3e0f4619eb8c,2016-08-12 13:19:58,Y,
4,POINT (-117.75956 34.06769),8667,Los Angeles County Fire Department Station 182,Los Angeles County Fire Department Station 182,Fire Stations,Public Safety,,HIFLD,1059 North White Avenue,,Pomona,CA,91768,U.S. Geological Survey,{9BEFFFCD-3F8B-4325-8959-B9D310E2EB79},2011-03-03 18:02:46,Y,


In [None]:
# drop unnecessary columns
all_stations = all_stations.drop(columns = [
    'OBJECTID',
    'name',
    'cat1',
    'cat2',
    'cat3',
    'organization',
    'addrln2',
    'state',
    'source',
    'source_id',
    'source_date',
    'display',
    'last_update'
])

In [None]:
# rename columns
all_stations = all_stations.rename(columns = {
    'addrln1': 'address'
})

In [None]:
# extract department name
all_stations['department'] = all_stations['label'].str.extract(r'^(.*Department)', expand=False)

In [None]:
# extract station number (if there is one)
all_stations['stanum'] = all_stations['label'].str.extract(r'Station\s+(\d+)', expand = False)

In [None]:
# trim department names
all_stations['department'] = all_stations['department'].str.replace(r'(?i)^city of\s*', '', regex = True)

In [None]:
# use abbreviation 'LACoFD' instead of full name
all_stations['department'] = all_stations['department'].str.replace('Los Angeles County Fire Department', 'LACoFD')

# use abbreviation 'LAFD' instead of full name
all_stations['department'] = all_stations['department'].str.replace('Los Angeles Fire Department', 'LAFD')

In [None]:
# fix pesky observations
all_stations.loc[14, 'department'] = 'Glendora Fire Department'
all_stations.loc[353, 'department'] = 'Torrence Refinery Fire Department'
all_stations.loc[all_stations['label'].str.contains('United States Forest Service', case = False), 'department'] = 'United States Forest Service Angeles National Forest'

In [None]:
# drop 'label'
all_stations = all_stations.drop(columns = ['label'])

In [None]:
# set crs
all_stations = all_stations.set_crs('EPSG:4326')

In [None]:
# unique department values
all_stations['department'].unique()

array(['Avalon Fire Department', 'LACoFD', 'LAFD',
       'Pasadena Fire Department', 'Long Beach Fire Department',
       'Hermosa Beach Fire Department', 'Glendora Fire Department',
       'United States Forest Service Angeles National Forest',
       'Vernon Fire Department', 'Alhambra Fire Department',
       'Santa Monica Fire Department', 'Beverly Hills Fire Department',
       'La Habra Heights Fire Department', 'Burbank Fire Department',
       'Arcadia Fire Department',
       'NASA Jet Propulsion Lab Fire Department',
       'Bob Hope Airport Fire Department', 'Monrovia Fire Department',
       'Santa Fe Springs Fire Department', 'Boeing Fire Department',
       'Glendale Fire Department', 'San Gabriel Fire Department',
       'West Covina Fire Department', 'Downey Fire Department',
       'Monterey Park Fire Department', 'EL Segundo Fire Department',
       'La Verne Fire Department', 'Culver City Fire Department',
       'Walt Disney Studios Fire Department',
       'Manhat

In [None]:
# preview cleaned data
all_stations.head()

Unnamed: 0,geometry,address,city,zip,department,stanum
0,POINT (-118.32898 33.33947),420 Avalon Canyon Road,Avalon,90704,Avalon Fire Department,
1,POINT (-118.50039 33.44118),1 Banning House Road,Two Harbors,90704,LACoFD,155.0
2,POINT (-118.33544 33.33308),945 Avalon Canyon Road,Avalon,90704,LACoFD,55.0
3,POINT (-118.26386 34.10765),2759 Rowena Avenue,Los Angeles,90039,LAFD,56.0
4,POINT (-117.75956 34.06769),1059 North White Avenue,Pomona,91768,LACoFD,182.0


In [None]:
# save dataset
all_stations.to_file('all_stations.json')

## LA County 2020 Census Tracts Data

In [None]:
# read in LA county census tracts data
cts = gpd.read_file('drive/MyDrive/2020_Census_Tracts.geojson')

In [None]:
# preview
cts.head()

Unnamed: 0,OBJECTID,CT20,LABEL,ShapeSTArea,ShapeSTLength,geometry
0,4992,101110,1011.1,12295620.0,15083.854287,"POLYGON ((-118.29793 34.26323, -118.30082 34.2..."
1,4993,101122,1011.22,28457740.0,31671.455844,"POLYGON ((-118.27743 34.25991, -118.27743 34.2..."
2,4994,101220,1012.2,7522093.0,12698.78381,"POLYGON ((-118.27818 34.25577, -118.27887 34.2..."
3,4995,101221,1012.21,3812000.0,9161.710543,"POLYGON ((-118.28735 34.25591, -118.28863 34.2..."
4,4996,101222,1012.22,3191371.0,9980.600461,"POLYGON ((-118.28594 34.2559, -118.28697 34.25..."


In [None]:
# check crs
cts.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [None]:
# drop unnecessary columns
cts = cts.drop(columns = [
    'OBJECTID',
    'CT20',
    'ShapeSTLength',
    'ShapeSTArea'
])

In [None]:
# rename some columns
cts = cts.rename(columns = {
    'LABEL': 'TRACT'
})

In [None]:
# set TRACT values as float
cts['TRACT'] = cts['TRACT'].astype(float)

In [None]:
# preview dataset
cts.head()

Unnamed: 0,TRACT,geometry
0,1011.1,"POLYGON ((-118.29793 34.26323, -118.30082 34.2..."
1,1011.22,"POLYGON ((-118.27743 34.25991, -118.27743 34.2..."
2,1012.2,"POLYGON ((-118.27818 34.25577, -118.27887 34.2..."
3,1012.21,"POLYGON ((-118.28735 34.25591, -118.28863 34.2..."
4,1012.22,"POLYGON ((-118.28594 34.2559, -118.28697 34.25..."


## Calfornia SVI Data

In [None]:
# read in CA SVI data
ca_svi = pd.read_csv('/content/drive/MyDrive/California_svi.csv')

In [None]:
# preview data
ca_svi.head()

Unnamed: 0,ST,STATE,ST_ABBR,STCNTY,COUNTY,FIPS,LOCATION,AREA_SQMI,E_TOTPOP,M_TOTPOP,...,EP_ASIAN,MP_ASIAN,EP_AIAN,MP_AIAN,EP_NHPI,MP_NHPI,EP_TWOMORE,MP_TWOMORE,EP_OTHERRACE,MP_OTHERRACE
0,6,California,CA,6001,Alameda County,6001400100,Census Tract 4001; Alameda County; California,2.681809,3269,452,...,14.6,3.7,0.0,1.2,0.0,1.2,5.3,2.7,0.8,0.9
1,6,California,CA,6001,Alameda County,6001400200,Census Tract 4002; Alameda County; California,0.226472,2147,201,...,11.7,4.5,0.4,0.7,0.0,1.8,8.5,3.4,0.4,0.5
2,6,California,CA,6001,Alameda County,6001400300,Census Tract 4003; Alameda County; California,0.428898,5619,571,...,11.3,3.3,0.5,0.5,0.0,0.7,9.1,4.8,1.1,1.0
3,6,California,CA,6001,Alameda County,6001400400,Census Tract 4004; Alameda County; California,0.276502,4278,598,...,8.7,2.7,0.5,0.7,0.0,0.9,7.7,3.1,0.2,0.2
4,6,California,CA,6001,Alameda County,6001400500,Census Tract 4005; Alameda County; California,0.22835,3949,737,...,9.4,4.5,0.1,0.2,0.2,0.3,8.3,5.0,0.0,1.0


In [None]:
# only keep observations within LA county
svi = ca_svi[ca_svi['COUNTY'] == 'Los Angeles County']

In [None]:
# reset index & drop old index
svi = svi.reset_index(drop = True)

In [None]:
# drop unnecessary columns
svi = svi.drop(columns = [
    'ST',
    'STATE',
    'ST_ABBR',
    'STCNTY',
    'COUNTY',
    'FIPS',
    'AREA_SQMI'
])

In [None]:
# retrieve census tract number
svi['LOCATION'] = svi['LOCATION'].astype(str)
svi['TRACT'] = svi['LOCATION'].str.extract(r'(\d+\.\d+|\d+)')
svi['TRACT'] = svi['TRACT'].astype(float)

In [None]:
# drop location (no longer needed)
svi = svi.drop(columns = ['LOCATION'])

In [None]:
# preview cleaned data
svi.head()

Unnamed: 0,E_TOTPOP,M_TOTPOP,E_HU,M_HU,E_HH,M_HH,E_POV150,M_POV150,E_UNEMP,M_UNEMP,...,MP_ASIAN,EP_AIAN,MP_AIAN,EP_NHPI,MP_NHPI,EP_TWOMORE,MP_TWOMORE,EP_OTHERRACE,MP_OTHERRACE,TRACT
0,4014,473,1652,97,1551,130,586,214,177,85,...,3.8,0.2,0.4,0.0,1.0,4.2,2.8,0.2,0.3,1011.1
1,4164,822,1427,170,1383,160,404,236,220,95,...,5.2,0.0,1.0,0.0,1.0,6.0,3.1,0.3,0.5,1011.22
2,3481,467,1372,207,1349,208,768,286,247,117,...,6.0,1.3,2.1,0.0,1.1,1.4,1.1,0.1,0.2,1012.2
3,3756,687,1549,285,1424,293,1034,576,79,51,...,5.5,0.0,1.1,0.0,1.1,1.6,1.9,2.8,3.0,1012.21
4,2808,424,1006,138,928,146,874,398,164,157,...,5.7,0.0,1.4,1.3,2.2,1.6,3.1,0.0,1.4,1012.22


## Merging Census Tract Data & SVI Data

In [None]:
# merge dataframes on tract number
svi_tracts = cts.merge(svi, how = 'outer', on = 'TRACT')

In [None]:
# see column names
svi_tracts.columns.values

array(['TRACT', 'geometry', 'E_TOTPOP', 'M_TOTPOP', 'E_HU', 'M_HU',
       'E_HH', 'M_HH', 'E_POV150', 'M_POV150', 'E_UNEMP', 'M_UNEMP',
       'E_HBURD', 'M_HBURD', 'E_NOHSDP', 'M_NOHSDP', 'E_UNINSUR',
       'M_UNINSUR', 'E_AGE65', 'M_AGE65', 'E_AGE17', 'M_AGE17',
       'E_DISABL', 'M_DISABL', 'E_SNGPNT', 'M_SNGPNT', 'E_LIMENG',
       'M_LIMENG', 'E_MINRTY', 'M_MINRTY', 'E_MUNIT', 'M_MUNIT',
       'E_MOBILE', 'M_MOBILE', 'E_CROWD', 'M_CROWD', 'E_NOVEH', 'M_NOVEH',
       'E_GROUPQ', 'M_GROUPQ', 'EP_POV150', 'MP_POV150', 'EP_UNEMP',
       'MP_UNEMP', 'EP_HBURD', 'MP_HBURD', 'EP_NOHSDP', 'MP_NOHSDP',
       'EP_UNINSUR', 'MP_UNINSUR', 'EP_AGE65', 'MP_AGE65', 'EP_AGE17',
       'MP_AGE17', 'EP_DISABL', 'MP_DISABL', 'EP_SNGPNT', 'MP_SNGPNT',
       'EP_LIMENG', 'MP_LIMENG', 'EP_MINRTY', 'MP_MINRTY', 'EP_MUNIT',
       'MP_MUNIT', 'EP_MOBILE', 'MP_MOBILE', 'EP_CROWD', 'MP_CROWD',
       'EP_NOVEH', 'MP_NOVEH', 'EP_GROUPQ', 'MP_GROUPQ', 'EPL_POV150',
       'EPL_UNEMP', 'EPL_HBURD', 'E

In [None]:
# preview dataset
svi_tracts.head()

Unnamed: 0,TRACT,geometry,E_TOTPOP,M_TOTPOP,E_HU,M_HU,E_HH,M_HH,E_POV150,M_POV150,...,EP_ASIAN,MP_ASIAN,EP_AIAN,MP_AIAN,EP_NHPI,MP_NHPI,EP_TWOMORE,MP_TWOMORE,EP_OTHERRACE,MP_OTHERRACE
0,1011.1,"POLYGON ((-118.29793 34.26323, -118.30082 34.2...",4014.0,473.0,1652.0,97.0,1551.0,130.0,586.0,214.0,...,9.7,3.8,0.2,0.4,0.0,1.0,4.2,2.8,0.2,0.3
1,1011.22,"POLYGON ((-118.27743 34.25991, -118.27743 34.2...",4164.0,822.0,1427.0,170.0,1383.0,160.0,404.0,236.0,...,14.0,5.2,0.0,1.0,0.0,1.0,6.0,3.1,0.3,0.5
2,1012.2,"POLYGON ((-118.27818 34.25577, -118.27887 34.2...",3481.0,467.0,1372.0,207.0,1349.0,208.0,768.0,286.0,...,12.4,6.0,1.3,2.1,0.0,1.1,1.4,1.1,0.1,0.2
3,1012.21,"POLYGON ((-118.28735 34.25591, -118.28863 34.2...",3756.0,687.0,1549.0,285.0,1424.0,293.0,1034.0,576.0,...,8.4,5.5,0.0,1.1,0.0,1.1,1.6,1.9,2.8,3.0
4,1012.22,"POLYGON ((-118.28594 34.2559, -118.28697 34.25...",2808.0,424.0,1006.0,138.0,928.0,146.0,874.0,398.0,...,4.9,5.7,0.0,1.4,1.3,2.2,1.6,3.1,0.0,1.4


In [None]:
# replace -999 values with NaN
svi_tracts = svi_tracts.replace(-999, np.nan)

In [None]:
# column names to snake case
svi_tracts = clean_columns(svi_tracts)

In [None]:
# preview cleaned data
svi_tracts.head()

Unnamed: 0,tract,geometry,e_totpop,m_totpop,e_hu,m_hu,e_hh,m_hh,e_pov_150,m_pov_150,...,ep_asian,mp_asian,ep_aian,mp_aian,ep_nhpi,mp_nhpi,ep_twomore,mp_twomore,ep_otherrace,mp_otherrace
0,1011.1,"POLYGON ((-118.29793 34.26323, -118.30082 34.2...",4014.0,473.0,1652.0,97.0,1551.0,130.0,586.0,214.0,...,9.7,3.8,0.2,0.4,0.0,1.0,4.2,2.8,0.2,0.3
1,1011.22,"POLYGON ((-118.27743 34.25991, -118.27743 34.2...",4164.0,822.0,1427.0,170.0,1383.0,160.0,404.0,236.0,...,14.0,5.2,0.0,1.0,0.0,1.0,6.0,3.1,0.3,0.5
2,1012.2,"POLYGON ((-118.27818 34.25577, -118.27887 34.2...",3481.0,467.0,1372.0,207.0,1349.0,208.0,768.0,286.0,...,12.4,6.0,1.3,2.1,0.0,1.1,1.4,1.1,0.1,0.2
3,1012.21,"POLYGON ((-118.28735 34.25591, -118.28863 34.2...",3756.0,687.0,1549.0,285.0,1424.0,293.0,1034.0,576.0,...,8.4,5.5,0.0,1.1,0.0,1.1,1.6,1.9,2.8,3.0
4,1012.22,"POLYGON ((-118.28594 34.2559, -118.28697 34.25...",2808.0,424.0,1006.0,138.0,928.0,146.0,874.0,398.0,...,4.9,5.7,0.0,1.4,1.3,2.2,1.6,3.1,0.0,1.4


In [None]:
# save dataset
svi_tracts.to_file('svi_tracts.json')