# This notebook takes police call data for the city of Seattle and Seattle arcgis data and transforms them into data used in the PowerBI visualization

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point

## Import Call data

In [2]:
# Call Data is from https://data.seattle.gov/Public-Safety/Call-Data/33kz-ixgy/about_data
# Depending on when you download the data the numbers you get might be slightly different, as
# the call data is updated on a monthly basis

df = pd.read_csv('Call_Data_20241027.csv')

In [3]:
df.shape

(5900962, 13)

In [4]:
df.columns.values

array(['CAD Event Number', 'Event Clearance Description', 'Call Type',
       'Priority', 'Initial Call Type', 'Final Call Type',
       'Original Time Queued', 'Arrived Time', 'Precinct', 'Sector',
       'Beat', 'Blurred_Longitude', 'Blurred_Latitude'], dtype=object)

In [5]:
df['call_time'] = pd.to_datetime(df['Original Time Queued'])
df['arrived_time'] = pd.to_datetime(df['Arrived Time'])

df['call_time_day'] = df['call_time'].dt.day
df['call_time_month'] = df['call_time'].dt.month
df['call_time_year'] = df['call_time'].dt.year

df['arrived_time_day'] = df['arrived_time'].dt.day
df['arrived_time_month'] = df['arrived_time'].dt.month
df['arrived_time_year'] = df['arrived_time'].dt.year


  df['call_time'] = pd.to_datetime(df['Original Time Queued'])
  df['arrived_time'] = pd.to_datetime(df['Arrived Time'])


In [9]:
data = df[['Blurred_Longitude', 'Blurred_Latitude']]
data = data.rename({
    'Blurred_Longitude': 'longitude',
    'Blurred_Latitude': 'latitude'
}, axis=1)


## Import ArcGIS data

In [10]:
# Data is from https://data-seattlecitygis.opendata.arcgis.com/datasets/SeattleCityGIS::community-reporting-areas-3/explore
# select the geojson file
neighborhoods_gdf = gpd.read_file('CITYPLAN_CRA_-8527542012581552321.geojson')

# Load your dataset with latitude and longitude information
data = data

# Convert the latitude and longitude into a GeoDataFrame
geometry = [Point(xy) for xy in zip(data['longitude'], data['latitude'])]
geo_data = gpd.GeoDataFrame(data, geometry=geometry)
geo_data.crs = 'EPSG:4326'  # Set coordinate reference system to WGS84
neighborhoods_gdf = neighborhoods_gdf.to_crs('EPSG:4326')


In [11]:
# Perform a spatial join to associate latitude/longitude points with neighborhoods
geo_data_with_neighborhoods = gpd.sjoin(geo_data, neighborhoods_gdf, how='left', op='intersects')

# Display the results
geo_data_with_neighborhoods.head()

  if await self.run_code(code, result, async_=asy):


Unnamed: 0,longitude,latitude,geometry,index_right,OBJECTID,CRA_NO,CRA_GRP,GEN_ALIAS,DETL_NAMES,NEIGHDIST,AREA_ACRES,AREA_SQMI,SE_ANNO_CAD_DATA,DISPLAY_NAME,WATER
0,-122.366238,47.643115,POINT (-122.36624 47.64311),83.0,321.0,12.3,12.0,Queen Anne,"Queen Anne, Lower Queen Anne, Uptown, Seattle Center, Westlake",Magnolia/Queen Anne,1882.098639,2.940779,,CRA - Queen Anne,0.0
1,0.000000,0.000000,POINT (0.00000 0.00000),,,,,,,,,,,,
2,-122.333291,47.706846,POINT (-122.33329 47.70685),50.0,51.0,8.1,8.0,Northgate/Maple Leaf,"Maple Leaf, Northgate, Pinehurst",North,1143.757068,1.787120,,CRA - Northgate-Maple Leaf,0.0
3,-122.378878,47.649401,POINT (-122.37888 47.64940),74.0,76.0,12.2,12.0,Interbay,Interbay,Magnolia/Queen Anne,1232.751616,1.926174,,CRA - Interbay,0.0
4,-122.376383,47.648473,POINT (-122.37638 47.64847),74.0,76.0,12.2,12.0,Interbay,Interbay,Magnolia/Queen Anne,1232.751616,1.926174,,CRA - Interbay,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5900957,-122.328165,47.616002,POINT (-122.32817 47.61600),36.0,37.0,6.2,6.0,Capitol Hill,"Capitol Hill, Pike/Pine, Broadway, First Hill",East,433.172611,0.676832,,CRA - Capitol Hill,0.0
5900958,-122.317526,47.616427,POINT (-122.31753 47.61643),36.0,37.0,6.2,6.0,Capitol Hill,"Capitol Hill, Pike/Pine, Broadway, First Hill",East,433.172611,0.676832,,CRA - Capitol Hill,0.0
5900959,-122.325552,47.617041,POINT (-122.32555 47.61704),36.0,37.0,6.2,6.0,Capitol Hill,"Capitol Hill, Pike/Pine, Broadway, First Hill",East,433.172611,0.676832,,CRA - Capitol Hill,0.0
5900960,47.623699,-122.320933,POINT (47.62370 -122.32093),,,,,,,,,,,,


## Data Cleaning

In [14]:
# Filter out Nan Piorities
final_df = final_df.loc[final_df.Priority.notna()]
final_df.shape

(5900250, 36)

In [15]:
# Filter out Nan and OOJ Sectors
final_df = final_df.loc[df.Sector.notna()]
final_df = final_df.loc[final_df.Sector != 'OOJ']
final_df.shape

(5848263, 36)

In [16]:
# Filter out unknown Precincts
final_df = final_df.loc[final_df.Precinct != 'UNKNOWN']
final_df.shape

(5848240, 36)

In [17]:
# Find Points that have Lat and Long reversed
# about ~14k discovered Nov 2024

final_df['long_ok'] = (final_df.longitude.between(-122.460027, -122.218169)) | (final_df.longitude == 0) | (final_df.longitude == -1)
final_df['lat_ok'] = (final_df.latitude.between(47.314028, 47.874756)) | (final_df.latitude == 0) | (final_df.longitude == -1)
 
swap_condition = (final_df.long_ok == False) & (final_df.lat_ok == False)
final_df.loc[swap_condition,['longitude','latitude']] = final_df.loc[swap_condition,['latitude','longitude']].values

final_df['long_ok'] = (final_df.longitude.between(-122.460027, -122.218169)) | (final_df.longitude == 0) | (final_df.longitude == -1)
final_df['lat_ok'] = (final_df.latitude.between(47.314028, 47.874756)) | (final_df.latitude == 0) | (final_df.longitude == -1)

print(final_df.long_ok.value_counts(dropna=False))

final_df.drop(['long_ok', 'lat_ok'], axis=1, inplace=True)

long_ok
True    5848240
Name: count, dtype: int64


In [18]:
final_df.loc[final_df.longitude == -1, 'longitude'] = 0
final_df.loc[final_df.longitude == -1, 'latitude'] = 0

In [22]:
# Save progress to avoid running above multiple times
final_df.to_csv('20241116-Seattle-neighborhoods.csv')
df = pd.read_csv('20241116-Seattle-neighborhoods.csv')

## Map Police Beats to Neighborhoods

In [24]:
# Beats that are specifically contained within one Neighborhood
# This reduces number of unknown locations from 8% to 6%
dictionary = {
    'B1': 'Ballard',
    'E1': 'Capitol Hill',
    'G1': 'First Hill',
    'K1': 'Downtown Commercial Core',
    'K3': 'Pioneer Square/International District',
    'M3': 'Downtown Commercial Core',
    'O1': 'Duwamish/SODO',
    'Q1': 'Magnolia',
    'Q2': 'Queen Anne',
    'U2': 'University District',
}

for k, v in dictionary.items():
    print(k, v)
    df.loc[(df.Beat == k) & (df.GEN_ALIAS.isna()), 'GEN_ALIAS'] = v

B1 Ballard
E1 Capitol Hill
G1 First Hill
K1 Downtown Commercial Core
K3 Pioneer Square/International District
M3 Downtown Commercial Core
O1 Duwamish/SODO
Q1 Magnolia 
Q2 Queen Anne
U2 University District


In [26]:
df.to_csv('20241116-Seattle-new-neighborhoods.csv')
# file is used in 2_agg_metrics.ipynb

In [27]:
'done'

'done'