In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.wkt import loads
import geotable
import os

### Script Objectives:

This script filters our two main camera datasets (Amnesty Internation and Hikvision) via spatial joins of Brooklyn's boundary geometry. This will return a subset of the original camera datasets that we are interested in for our surviellance metric to be created for the neighborhoods across Brooklyn.

We look to create a function that inputs:

* GeoDataFrame containing geometry of desire U.S. Census geographies (i.e. tracts, blocks, block groups)

* Camera datasets

* dictionary of columns of interest within camera datasets (i.e. n_camera_median, hikvision_cameras, etc.)

* dictionary of geometry columns within geographic datasets

and outputs and GeoDataFrame that contains:

* aggregated camera counts for each unique geographic unit

* associated population counts from the U.S. Census and ACS tables

* geometry column for chloropleth creation

#### Input Files:

 **Cameras**

 * Amnesty International Camera Dataset: `os.path.dirname(os.path.dirname(path))+'/CapstoneData/amnesty_camera_counts_per_intersections.csv'`

 * Hikvision Camera Dataset: `../Data/NYC_Surveillance_Map_Hikvision.csv`


 **Shapefiles**

 * NYC Census Blocks: `../Data/2020_Census_Blocks_-_Tabular.csv`
 
 * NYC Census Tracts: `../Data/2020_Census_Tracts_-_Tabular.csv`
 
 * NYC Block Groups: `../Data/tl_2017_36_bg/tl_2017_36_bg.shp`

### Step 1: Load Input Data

In [2]:
#load camera data
path = os.getcwd()
parent = os.path.dirname(os.path.dirname(path))
amnesty = pd.read_csv(parent+'/CapstoneData/amnesty_camera_counts_per_intersections.csv')
hikvision = pd.read_csv('../Data/NYC_Surveillance_Map_Hikvision.csv', index_col = 0)

#load shapefiles
nyc_blks = pd.read_csv('../Data/2020_Census_Blocks_-_Tabular.csv')
nyc_tracts = pd.read_csv('../Data/2020_Census_Tracts_-_Tabular.csv')
nyc_blkgp = gpd.read_file('../Data/tl_2017_36_bg/tl_2017_36_bg.shp')

### Step 2: Investigate which Columns we need for each dataset

We should tell the function which columns are important to consider for each camera dataset as well as which columns are hold geometry values so we can easily conduct spatial joins and convert datasets to GeoDataFrame if needed. I am thinking we should have the function input two dictionaries, one for camera columns and the other for geometry columns.

In [4]:
amnesty.columns

Index(['PanoramaId', 'n_cameras_median', 'attached_street_median',
       'attached_building_median', 'attached_unknown_median',
       'type_dome_median', 'type_bullet_median', 'type_unknown_median',
       'n_cameras_agreement', 'attached_street_agreement',
       'attached_building_agreement', 'attached_unknown_agreement',
       'type_dome_agreement', 'type_bullet_agreement',
       'type_unknown_agreement', 'Lat', 'Long', 'geometry_pano', 'BoroName',
       'URL', 'ImageDate'],
      dtype='object')

In [5]:
hikvision.columns

Index(['Name', 'Latitude', 'Longitude', 'Zip_Code', 'Borough',
       'geometry_layer', 'wkt'],
      dtype='object')

In [6]:
hikvision.head()

Unnamed: 0,Name,Latitude,Longitude,Zip_Code,Borough,geometry_layer,wkt
0,100.12.128.86,40.630798,-74.098297,10301.0,Staten Island,Hikvision: Staten Island,POINT Z (-74.09829712 40.63079834 0)
1,100.12.139.126,40.604191,-74.107681,10301.0,Staten Island,Hikvision: Staten Island,POINT Z (-74.10768127 40.60419083 0)
2,100.12.139.79,40.60553,-74.102753,10301.0,Staten Island,Hikvision: Staten Island,POINT Z (-74.10275269 40.60552979 0)
3,100.12.141.104,40.627338,-74.103592,10301.0,Staten Island,Hikvision: Staten Island,POINT Z (-74.10359192 40.62733841 0)
4,100.12.152.177,40.624729,-74.094543,10301.0,Staten Island,Hikvision: Staten Island,POINT Z (-74.09454346 40.62472916 0)


In [9]:
#camera related column names
amnesty_camera_cols = ['n_cameras_median', 'attached_street_median',
       'attached_building_median', 'attached_unknown_median',
       'type_dome_median', 'type_bullet_median', 'type_unknown_median']
hikvision_camera_cols = ['Name']
#create dictionary key: dataset, value: camera columns
camera_cols = {'amnesty':amnesty_camera_cols,'hikvision':hikvision_camera_cols}

In [11]:
nyc_blks.columns

Index(['the_geom', 'CB2020', 'BoroCode', 'BoroName', 'CT2020', 'BCTCB2020',
       'GEOID', 'Shape_Leng', 'Shape_Area'],
      dtype='object')

In [12]:
nyc_tracts.columns

Index(['the_geom', 'CTLabel', 'BoroCode', 'BoroName', 'CT2020', 'BoroCT2020',
       'CDEligibil', 'NTAName', 'NTA2020', 'CDTA2020', 'CDTANAME', 'GEOID',
       'Shape_Leng', 'Shape_Area'],
      dtype='object')

In [13]:
nyc_blkgp.columns

Index(['STATEFP', 'COUNTYFP', 'TRACTCE', 'BLKGRPCE', 'GEOID', 'NAMELSAD',
       'MTFCC', 'FUNCSTAT', 'ALAND', 'AWATER', 'INTPTLAT', 'INTPTLON',
       'geometry'],
      dtype='object')

In [16]:
#geometry related columns names
amnesty_geo_cols = ['Lat', 'Long']
hikvision_geo_cols = ['Latitude','Longitude']
nyc_blks_geo_cols = ['the_geom']
nyc_tracts_geo_cols = ['the_geom']
nyc_blkgp_geo_cols = ['geometry']

#create dictionary key: dataset, value: camera columns
geo_cols = {'amnesty':amnesty_geo_cols,'hikvision':hikvision_geo_cols, 'nyc_blks':nyc_blks_geo_cols,
           'nyc_tracts':nyc_tracts_geo_cols, 'nyc_blkgp':nyc_blkgp_geo_cols}

In [17]:
geo_cols

{'amnesty': ['Lat', 'Long'],
 'hikvision': ['Latitude', 'Longitude'],
 'nyc_blks': ['the_geom'],
 'nyc_tracts': ['the_geom'],
 'nyc_blkgp': ['geometry']}

### Step 3: Define our function

In [None]:
def agg_cameras_to_geo():
    