# For Issue 1279 | 311 Geospatial Analysis
---
Identify addresses or small areas that could benefit from more signage, increased community assistance, or other actions

[Neighborhood Council Shapefile Source](https://data.lacity.org/City-Infrastructure-Service-Requests/Neighborhood-Councils-Certified-/fu65-dz2f) | [LA Blocks Census Shapefile Source](https://www2.census.gov/geo/tiger/TIGER2020PL/STATE/06_CALIFORNIA/06037/)

# Table of Contents
---
### [Package & Data Imports  ](#Package-&-Data-Imports)
### [Data Cleaning & Geopandas Prep  ](#Data-Cleaning)
### Folium Maps
> #### [Neighborhood Council Choropleth  ](#Neighborhood-Council-Choropleth)
>> #### [Type of Request by Neighborhood Council Layered Choropleths    ](#Type-of-Request-by-Neighborhood-Council-Layered-Choropleths)

> #### [Block Choropleth  ](#Block-Choropleth)

> #### [Neighborhood Council and Block-by-Block Layered Map  ](#Neighborhood-Council-and-Block-by-Block-Layered-Map)

# Package & Data Imports
---

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# https://stackoverflow.com/questions/13440102/getting-bounding-box-of-city
import geopandas as gpd

# https://towardsdatascience.com/geopandas-101-plot-any-data-with-a-latitude-and-longitude-on-a-map-98e01944b972
from shapely.geometry import Point, Polygon

# https://www.analyticsvidhya.com/blog/2020/06/guide-geospatial-analysis-folium-python/
import folium
from branca.element import Figure
import re
import matplotlib
from folium.plugins import TimeSliderChoropleth

from titlecase import titlecase

%matplotlib inline
import warnings
warnings.simplefilter("ignore")
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=DeprecationWarning)

In [2]:
# 311 Data 10/01/2021-10/01/2022 from API see 01 notebook
raw_df = pd.read_csv('./data/clean_01Oct21_01Oct22_api.csv')
df = raw_df.copy()

# https://geopandas.org/en/stable/getting_started/introduction.html
# NC boundaries: https://data.lacity.org/City-Infrastructure-Service-Requests/Neighborhood-Councils-Certified-/fu65-dz2f
raw_gdf_nc = gpd.read_file('./data/Neighborhood Councils (Certified)/geo_export_88bb18d9-f96c-4351-8be9-594f258ed0d3.shp')
gdf_nc = raw_gdf_nc.copy()

# Block boundaries: https://www2.census.gov/geo/tiger/TIGER2020PL/STATE/06_CALIFORNIA/06037/
# tl_2020_06037_tabblock20.zip
raw_gdf_blk = gpd.read_file("./data/tl_2020_06037_tabblock20/tl_2020_06037_tabblock20.shp")
gdf_blk = raw_gdf_blk.copy()

In [3]:
print(df.shape)
df.head(1)

(1042208, 16)


Unnamed: 0,requestId,createdDate,closedDate,typeId,typeName,address,lat_lon,latitude,longitude,agencyId,agencyName,sourceId,srnumber,sourceName,councilId,councilName
0,8610592,2021-10-01 00:01:14,2021-10-08 09:24:53,4,Bulky Items,"11614 N HERRICK AVE, 91340","(34.2813123418, -118.425453201)",34.281312,-118.425453,3,Sanitation Bureau,8,1-2079512481,Phone Call,64,Pacoima


In [4]:
print(gdf_nc.shape)
gdf_nc.head(1)

(99, 9)


Unnamed: 0,date_certi,time_certi,dwebsite,name,nc_id,objectid,service_re,waddress,geometry
0,2002-10-22,00:00:00.000,http://empowerla.org/ANC,ARLETA NC,6.0,1.0,REGION 1 - NORTH EAST VALLEY,http://www.arletanc.org/,"POLYGON ((-118.45005 34.24992, -118.45055 34.2..."


In [5]:
print(gdf_blk.shape)
gdf_blk.head(1)

(91626, 16)


Unnamed: 0,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,MTFCC20,UR20,UACE20,UATYPE20,FUNCSTAT20,ALAND20,AWATER20,INTPTLAT20,INTPTLON20,geometry
0,6,37,109300,2015,60371093002015,Block 2015,G5040,,,,S,17913,0,34.2673969,-118.479707,"POLYGON ((-118.48022 34.26741, -118.48021 34.2..."


# Data Cleaning
---

## Checking Consistency Between gdf_nc and df
#### To Note:
- df['councilID'] == gdf['objectiid']
- df contains a 0 ID for No Council

In [6]:
# gdf_nc

In [7]:
# gdf_nc.sort_values(by = 'objectid')

In [8]:
# df[df['councilId'] == 27]

In [9]:
# gdf_nc[gdf_nc['objectid'] == 27]

## Geopandas - Neighborhood Council and Blocks - Spatial Join

In [10]:
# Neighborhood Council
# get necessary columns; objectid = councilId
gdf_nc = gdf_nc[['name', 'objectid', 'geometry']].sort_values(by = 'objectid').reset_index(drop = True)

# format data
gdf_nc['name'] = gdf_nc['name'].apply(lambda x: titlecase(x.strip(' NC')))
gdf_nc['objectid'] = gdf_nc['objectid'].apply(lambda x: int(x))

print(gdf_nc.shape)
gdf_nc.head()

(99, 3)


Unnamed: 0,name,objectid,geometry
0,Arleta,1,"POLYGON ((-118.45005 34.24992, -118.45055 34.2..."
1,Arroyo Seco,2,"POLYGON ((-118.22325 34.10393, -118.22367 34.1..."
2,Atwater Village,3,"POLYGON ((-118.27576 34.15376, -118.26184 34.1..."
3,Bel Air-Beverly Crest,4,"POLYGON ((-118.47485 34.12634, -118.47411 34.1..."
4,Boyle Heights,5,"POLYGON ((-118.21439 34.06063, -118.21303 34.0..."


In [11]:
# Blocks
gdf_blk = gdf_blk[['GEOID20', 'geometry', 'NAME20', 'TRACTCE20', 'COUNTYFP20']]
print(gdf_blk.shape)
gdf_blk.head(1)

(91626, 5)


Unnamed: 0,GEOID20,geometry,NAME20,TRACTCE20,COUNTYFP20
0,60371093002015,"POLYGON ((-118.48022 34.26741, -118.48021 34.2...",Block 2015,109300,37


In [12]:
# confirming unique ID is 'GEOID20'
gdf_blk['GEOID20'].value_counts(ascending = False).head()

060371093002015    1
060379203421001    1
060379203261025    1
060379203031015    1
060379304002025    1
Name: GEOID20, dtype: int64

In [13]:
# spatial join of blocks and neighborhood councils
gdf_blk_nc = gpd.sjoin(gdf_blk, gdf_nc, how = 'inner', op = 'within')
gdf_blk_nc = gdf_blk_nc[['objectid', 'name', 'GEOID20', 'geometry']]
gdf_blk_nc['objectid'] = gdf_blk_nc['objectid'].apply(lambda x: int(x))
gdf_blk_nc.rename(columns = {'objectid': 'councilId'}, inplace = True)

In [14]:
print(gdf_blk_nc.shape)
gdf_blk_nc.head(1)

(23836, 4)


Unnamed: 0,councilId,name,GEOID20,geometry
0,27,Granada Hills North,60371093002015,"POLYGON ((-118.48022 34.26741, -118.48021 34.2..."


In [34]:
# Save request data with block IDs csv and shp
gdf_blk_nc.to_csv('data/clean_01Oct21_01Oct22_nc_blk.csv', index = False)
gdf_blk_nc.to_file('data/SHAPE-clean_01Oct21_01Oct22_nc_blk/clean_01Oct21_01Oct22_nc_blk.geojson', driver='GeoJSON')

## Spatial Join of 311 Request Data and Block-Neighborhood Council

In [16]:
# https://towardsdatascience.com/geopandas-101-plot-any-data-with-a-latitude-and-longitude-on-a-map-98e01944b972

In [17]:
df.head(1)

Unnamed: 0,requestId,createdDate,closedDate,typeId,typeName,address,lat_lon,latitude,longitude,agencyId,agencyName,sourceId,srnumber,sourceName,councilId,councilName
0,8610592,2021-10-01 00:01:14,2021-10-08 09:24:53,4,Bulky Items,"11614 N HERRICK AVE, 91340","(34.2813123418, -118.425453201)",34.281312,-118.425453,3,Sanitation Bureau,8,1-2079512481,Phone Call,64,Pacoima


In [18]:
# get points from longitude, latitude in df
geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]
geometry[:3]

[<shapely.geometry.point.Point at 0x1505d3190>,
 <shapely.geometry.point.Point at 0x1505d3220>,
 <shapely.geometry.point.Point at 0x1505d3280>]

In [19]:
# assign crs value and create GeoDataframe from 311 Requests df with Points geometry variable
crs = {'init': 'epsg:4326'}
df_geo = gpd.GeoDataFrame(df, crs = crs, geometry = geometry)

In [20]:
# Check for duplicates (duplicates exist in original dataframe)
df_geo.drop_duplicates(subset = ['requestId']).shape

(1042202, 17)

In [21]:
# spatial join of addresses and blocks (and nc)
df_blk_nc = gpd.sjoin(df_geo, gdf_blk_nc, how="inner", op='within')
df_blk_nc = df_blk_nc[['requestId', 'createdDate', 'closedDate', 'typeId', 'typeName',
       'address', 'councilId_left', 'councilName', 'geometry','GEOID20']]
df_blk_nc.rename(columns = {'councilId_left': 'councilId'}, inplace = True)
df_blk_nc.reset_index(drop = True, inplace = True)

In [22]:
print(f'df_blk_nc shape: {df_blk_nc.shape}')
print(f'df shape: {df.shape}')
df_blk_nc.head()

df_blk_nc shape: (881694, 10)
df shape: (1042208, 17)


Unnamed: 0,requestId,createdDate,closedDate,typeId,typeName,address,councilId,councilName,geometry,GEOID20
0,8610592,2021-10-01 00:01:14,2021-10-08 09:24:53,4,Bulky Items,"11614 N HERRICK AVE, 91340",64,Pacoima,POINT (-118.42545 34.28131),60371043101005
1,8610569,2021-10-01 00:02:19,2021-10-08 13:36:33,5,Electronic Waste,"11614 N HERRICK AVE, 91340",64,Pacoima,POINT (-118.42545 34.28131),60371043101005
2,8610587,2021-10-01 00:05:25,2021-10-08 14:16:32,7,Metal/Appliances,"11614 N HERRICK AVE, 91340",64,Pacoima,POINT (-118.42545 34.28131),60371043101005
3,8611847,2021-10-01 07:22:13,2021-10-08 09:24:45,6,Illegal Dumping,"11614 N HERRICK AVE, 91340",64,Pacoima,POINT (-118.42545 34.28131),60371043101005
4,8658809,2021-10-06 21:11:45,2021-10-08 11:11:52,7,Metal/Appliances,"13333 W DEL SUR ST, 91340",64,Pacoima,POINT (-118.42380 34.28216),60371043101005


In [23]:
######################## Check on duplicates in next team meeting
print(df_blk_nc.duplicated(subset = ['requestId']).any())
df_blk_nc.drop_duplicates(subset = ['requestId']).shape

True


(881492, 10)

In [33]:
# Save request data with block IDs csv and shp
df_blk_nc.to_csv('data/clean_01Oct21_01Oct22_nc_blk_req.csv', index = False)
df_blk_nc.to_file('data/SHAPE-clean_01Oct21_01Oct22_nc_blk_req /clean_01Oct21_01Oct22_nc_blk_req.shp') 

In [27]:
df_blk_nc['GEOID20'].value_counts()

060371911102000    886
060371271022000    611
060372060501000    601
060371272103008    507
060371911201001    498
                  ... 
060372380003014      1
060371066452007      1
060371284001000      1
060372611021010      1
060372117013003      1
Name: GEOID20, Length: 22263, dtype: int64