In [1]:
req_path = '../data/01Oct21_01Oct22_api.csv'
nc_geo_path = '2022_neighborhood_councils/geo_export_88bb18d9-f96c-4351-8be9-594f258ed0d3.shp'
blk_geo_path = '2020_census_blocks/tl_2020_06037_tabblock20.shp'
NC = 'Boyle Heightss'

# package imports
import argparse
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from titlecase import titlecase

# data imports
# 311 request dataset from API to csv code
df = pd.read_csv(req_path)
# 2022 Neighborhood Council Geodata
gdf_nc = gpd.read_file(nc_geo_path)
# 2020 Census Block IDs Geodata
gdf_blk = gpd.read_file(blk_geo_path)

In [2]:
### 311 Request Data cleaning
# get relevant columns
df = df[['requestId', 'createdDate', 'closedDate', 'typeId', 'typeName', 'address', 'latitude',
       'longitude', 'agencyId', 'agencyName', 'sourceId', 'srnumber', 'sourceName', 'councilId', 'councilName']]
# drop nulls in requests
df.dropna(inplace = True)

# drop duplicates
df.drop_duplicates(subset = 'requestId', inplace = True)

# councilID to object
df['councilID'] = df['councilId'].apply(lambda x: str(x))

# Filter DF for NC
df = df[df['councilName'] == NC]

In [3]:
gdf_nc.head()

Unnamed: 0,date_certi,time_certi,dwebsite,name,nc_id,objectid,service_re,waddress,geometry
0,2002-10-22,00:00:00.000,http://empowerla.org/ANC,ARLETA NC,6.0,1.0,REGION 1 - NORTH EAST VALLEY,http://www.arletanc.org/,"POLYGON ((-118.45005 34.24992, -118.45055 34.2..."
1,2002-10-02,00:00:00.000,http://empowerla.org/ASNC,ARROYO SECO NC,42.0,2.0,REGION 8 - NORTH EAST LA,http://www.asnc.us/,"POLYGON ((-118.22325 34.10393, -118.22367 34.1..."
2,2003-02-11,00:00:00.000,http://empowerla.org/AVNC,ATWATER VILLAGE NC,37.0,3.0,REGION 7 - EAST,http://www.atwatervillage.org/,"POLYGON ((-118.27576 34.15376, -118.26184 34.1..."
3,2002-10-08,00:00:00.000,http://empowerla.org/BABCNC,BEL AIR-BEVERLY CREST NC,64.0,4.0,REGION 11 - WEST LA,http://babcnc.org/,"POLYGON ((-118.47485 34.12634, -118.47411 34.1..."
4,2002-05-21,00:00:00.000,http://empowerla.org/BHNC,BOYLE HEIGHTS NC,50.0,5.0,REGION 8 - NORTH EAST LA,http://bhnc.net/,"POLYGON ((-118.21439 34.06063, -118.21303 34.0..."


In [4]:
### Neighborhood Council cleaning
# get necessary columns; objectid = councilId
gdf_nc = gdf_nc[['name', 'objectid', 'geometry']].sort_values(by = 'objectid').reset_index(drop = True)

# set crs
gdf_nc.to_crs(crs = 'EPSG:4269', inplace = True)

# format data
gdf_nc['name'] = gdf_nc['name'].apply(lambda x: titlecase(x.strip(' NC')))
gdf_nc['objectid'] = gdf_nc['objectid'].apply(lambda x: int(x))

In [5]:
### 2020 Census Block cleaning
gdf_blk = gdf_blk[['GEOID20', 'geometry', 'NAME20', 'TRACTCE20', 'COUNTYFP20']]

# set crs
gdf_blk.to_crs(crs = 'EPSG:4269', inplace = True)

In [6]:
### Spatial join of blocks and neighborhood councils
gdf_blk_nc = gpd.sjoin(gdf_blk, gdf_nc, how = 'inner', predicate = 'within')
gdf_blk_nc = gdf_blk_nc[['objectid', 'name', 'GEOID20', 'geometry']]
gdf_blk_nc['objectid'] = gdf_blk_nc['objectid'].apply(lambda x: int(x))
gdf_blk_nc.rename(columns = {'objectid': 'councilId'}, inplace = True)

In [7]:
### Create geometry points from 311 request lat/lon
geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]

# assign crs value and create GeoDataframe from 311 Requests df with Points geometry variable for spatial join
df_geo = gpd.GeoDataFrame(df, crs = 'EPSG:4269', geometry = geometry)

In [8]:
### Spatial join of 311 points with NC-Block
df_blk_nc = gpd.sjoin(df_geo, gdf_blk_nc, how="left", predicate = 'within')
df_blk_nc = df_blk_nc[['requestId', 'createdDate', 'closedDate', 'typeId', 'typeName',
       'address', 'councilId_left', 'councilName', 'GEOID20', 'geometry']]
df_blk_nc.rename(columns = {'councilId_left': 'councilId'}, inplace = True)
df_blk_nc.drop_duplicates(subset = ['requestId'], inplace = True)
df_blk_nc.dropna(inplace = True)

# set crs
df_blk_nc.to_crs(crs = 'EPSG:4269', inplace = True)

df_blk_nc.reset_index(drop = True, inplace = True)

In [9]:
### SAVE CSV
df_blk_nc.to_csv(f'{NC}_nc_blk_req.csv', index = False)