In [27]:
# Dependencies and Setup
import numpy as np
import pandas as pd
import pytz

import os 
from scipy import stats
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import re
import hvplot
from scipy.stats import linregress
import folium
from folium.plugins import HeatMap


import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
import warnings
warnings.filterwarnings("ignore")


In [3]:
# Load the CSV file located on BPD website into a DataFrame
arrest_data_df = pd.read_csv("Data/BPD_Arrests.csv")
# Display data
arrest_data_df.head()

Unnamed: 0,X,Y,arrestnumber,age,gender,race,arrestdatetime,arrestlocation,incidentoffence,incidentlocation,...,chargedescription,district,post,neighborhood,latitude,longitude,geolocation,incidentnumber,objectid,shape
0,1401347.0,608148.870493,23000037.0,39.0,M,B,2022/12/31 23:50:00+00,4000 OAKFORD ST,Unknown Offense,4000 OAKFORD ST,...,HGV,Northwest,621.0,West Arlington,39.3361,-76.6853,"(39.3361,-76.6853)",22L09338,1,
1,1401347.0,608148.870493,23000039.0,50.0,M,B,2022/12/31 23:50:00+00,4000 OAKFORD ST,Unknown Offense,4000 OAKFORD ST,...,HGV,Northwest,621.0,West Arlington,39.3361,-76.6853,"(39.3361,-76.6853)",22L09338,2,
2,,,23000010.0,27.0,M,B,2022/12/31 23:40:00+00,,Unknown Offense,,...,FAILURE TO APPEAR,,,,,,"(,)",,3,
3,1417636.0,595206.835862,23000050.0,42.0,M,B,2022/12/31 23:15:00+00,500 DOLPHIN ST,Unknown Offense,500 DOLPHIN ST,...,HGV,Central,123.0,Upton,39.3004,-76.6279,"(39.3004,-76.6279)",22L09343,4,
4,,,22157188.0,31.0,M,B,2022/12/31 21:00:00+00,,Unknown Offense,,...,FAILURE TO APPEAR,,,,,,"(,)",,5,


In [4]:
# Define the desired time frame
start_date = pd.Timestamp("2018-01-01", tz="US/Eastern")
end_date = pd.Timestamp("2022-12-31", tz="US/Eastern")

# Convert date values to EST timezone
arrest_data_df["arrestdatetime"] = pd.to_datetime(arrest_data_df["arrestdatetime"], utc=False)

# Filter the data based on the time frame
sample_arrests = (arrest_data_df["arrestdatetime"] >= start_date) & (arrest_data_df["arrestdatetime"] <= end_date)

# Apply the filter to get the narrowed down data
narrowed_data = arrest_data_df[sample_arrests]
narrowed_data

Unnamed: 0,X,Y,arrestnumber,age,gender,race,arrestdatetime,arrestlocation,incidentoffence,incidentlocation,...,chargedescription,district,post,neighborhood,latitude,longitude,geolocation,incidentnumber,objectid,shape
18,,,22157121.0,32.0,F,W,2022-12-31 03:53:00+00:00,,Unknown Offense,,...,FAILURE TO APPEAR,,,,,,"(,)",,19,
19,1.432299e+06,594505.863803,22157116.0,19.0,M,B,2022-12-31 00:09:00+00:00,600 CURLY ST,Unknown Offense,600 CURLY ST,...,MOTOR VEHICLE THEFT,Southeast,224,Ellwood Park/Monument,39.2983,-76.5761,"(39.2983,-76.5761)",22L07944,20,
20,1.407598e+06,592946.031158,22000022.0,18.0,M,B,2021-12-31 23:14:00+00:00,2800 EDMONDSON AVE,Unknown Offense,2800 EDMONDSON AVE,...,HGV,Western,721,Penrose/Fayette Street Outreach,39.2943,-76.6634,"(39.2943,-76.6634)",,21,
21,1.425747e+06,617095.607199,22000010.0,26.0,M,B,2021-12-31 22:09:00+00:00,5700 NORTHWOOD DR,Unknown Offense,5700 NORTHWOOD DR,...,HGV,Northern,523,Chinquapin Park,39.3604,-76.5989,"(39.3604,-76.5989)",,22,
22,,,21168317.0,24.0,M,B,2021-12-31 21:00:00+00:00,,Unknown Offense,,...,RAPE SECOND DEGREE,,,,,,"(,)",,23,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361706,1.427588e+06,603736.077990,18000078.0,23.0,F,B,2018-01-01 10:00:00+00:00,1600 E. 28TH ST,4ECOMMON ASSAULT,1600 E. 28TH ST,...,ASSAULT,Northeast,411,Coldstream Homestead Montebello,39.3237,-76.5926,"(39.3237,-76.5926)",18A00107,361707,
361707,,,18000050.0,29.0,F,W,2018-01-01 09:45:00+00:00,,Unknown Offense,,...,CDS:POSSESS-NOT MARIJUANA,,,,,,"(,)",,361708,
361708,1.432485e+06,603102.765099,18000045.0,46.0,M,B,2018-01-01 08:00:00+00:00,3200 HARWELL AVE,79OTHER,3200 HARWELL AVE,...,AGG. ASSAULT,Northeast,432,Belair-Edison,39.3219,-76.5753,"(39.3219,-76.5753)",18A00083,361709,
361709,1.420414e+06,600936.840992,18000038.0,58.0,M,B,2018-01-01 05:30:00+00:00,2300 MARYLAND AVE,5DBURG. OTH. (FORCE),2300 MARYLAND AVE,...,COMMON ASSAULT,Northern,514,Old Goucher,39.3161,-76.6180,"(39.3161,-76.618)",18E00079,361710,


In [5]:
####Geographical Analysis

In [6]:
# Extract month and year from the arrestdatetime column
narrowed_data["arrest_month"] = narrowed_data["arrestdatetime"].dt.month
narrowed_data["arrest_year"] = narrowed_data["arrestdatetime"].dt.year
pd.set_option('display.precision', 0)
narrowed_data

Unnamed: 0,X,Y,arrestnumber,age,gender,race,arrestdatetime,arrestlocation,incidentoffence,incidentlocation,...,post,neighborhood,latitude,longitude,geolocation,incidentnumber,objectid,shape,arrest_month,arrest_year
18,,,2e+07,32,F,W,2022-12-31 03:53:00+00:00,,Unknown Offense,,...,,,,,"(,)",,19,,12,2022
19,1e+06,594506,2e+07,19,M,B,2022-12-31 00:09:00+00:00,600 CURLY ST,Unknown Offense,600 CURLY ST,...,224,Ellwood Park/Monument,39,-77,"(39.2983,-76.5761)",22L07944,20,,12,2022
20,1e+06,592946,2e+07,18,M,B,2021-12-31 23:14:00+00:00,2800 EDMONDSON AVE,Unknown Offense,2800 EDMONDSON AVE,...,721,Penrose/Fayette Street Outreach,39,-77,"(39.2943,-76.6634)",,21,,12,2021
21,1e+06,617096,2e+07,26,M,B,2021-12-31 22:09:00+00:00,5700 NORTHWOOD DR,Unknown Offense,5700 NORTHWOOD DR,...,523,Chinquapin Park,39,-77,"(39.3604,-76.5989)",,22,,12,2021
22,,,2e+07,24,M,B,2021-12-31 21:00:00+00:00,,Unknown Offense,,...,,,,,"(,)",,23,,12,2021
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361706,1e+06,603736,2e+07,23,F,B,2018-01-01 10:00:00+00:00,1600 E. 28TH ST,4ECOMMON ASSAULT,1600 E. 28TH ST,...,411,Coldstream Homestead Montebello,39,-77,"(39.3237,-76.5926)",18A00107,361707,,1,2018
361707,,,2e+07,29,F,W,2018-01-01 09:45:00+00:00,,Unknown Offense,,...,,,,,"(,)",,361708,,1,2018
361708,1e+06,603103,2e+07,46,M,B,2018-01-01 08:00:00+00:00,3200 HARWELL AVE,79OTHER,3200 HARWELL AVE,...,432,Belair-Edison,39,-77,"(39.3219,-76.5753)",18A00083,361709,,1,2018
361709,1e+06,600937,2e+07,58,M,B,2018-01-01 05:30:00+00:00,2300 MARYLAND AVE,5DBURG. OTH. (FORCE),2300 MARYLAND AVE,...,514,Old Goucher,39,-77,"(39.3161,-76.618)",18E00079,361710,,1,2018


In [8]:
# Isolating the 2018 data for mapping
data_2018 = narrowed_data[narrowed_data['arrest_year'] == 2018]


Unnamed: 0,X,Y,arrestnumber,age,gender,race,arrestdatetime,arrestlocation,incidentoffence,incidentlocation,...,post,neighborhood,latitude,longitude,geolocation,incidentnumber,objectid,shape,arrest_month,arrest_year
102,1e+06,617759,2e+07,69,M,B,2018-12-31 23:30:00+00:00,6200 LAURELTON AVE,Unknown Offense,6200 LAURELTON AVE,...,423,Hamilton Hills,39,-77,"(39.3621,-76.5651)",,103,,12,2018
103,1e+06,594281,2e+07,17,M,B,2018-12-31 23:30:00+00:00,500 LUZERNE ST,3AFROBB HWY-FIREARM,500 LUZERNE ST,...,221,Mcelderry Park,39,-77,"(39.2977,-76.581)",18L11147,104,,12,2018
104,1e+06,592802,2e+07,41,M,W,2018-12-31 22:00:00+00:00,200 SPRING CT,Unknown Offense,200 SPRING CT,...,212,Dunbar-Broadway,39,-77,"(39.2937,-76.5979)",,105,,12,2018
105,1e+06,612194,2e+07,42,M,B,2018-12-31 21:30:00+00:00,5300 CORDELIA AVE,87NARCOTICS,5300 CORDELIA AVE,...,633,Arlington,39,-77,"(39.3472,-76.6832)",18L11226,106,,12,2018
106,,,2e+07,39,F,B,2018-12-31 19:40:00+00:00,,Unknown Offense,,...,,,,,"(,)",,107,,12,2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361706,1e+06,603736,2e+07,23,F,B,2018-01-01 10:00:00+00:00,1600 E. 28TH ST,4ECOMMON ASSAULT,1600 E. 28TH ST,...,411,Coldstream Homestead Montebello,39,-77,"(39.3237,-76.5926)",18A00107,361707,,1,2018
361707,,,2e+07,29,F,W,2018-01-01 09:45:00+00:00,,Unknown Offense,,...,,,,,"(,)",,361708,,1,2018
361708,1e+06,603103,2e+07,46,M,B,2018-01-01 08:00:00+00:00,3200 HARWELL AVE,79OTHER,3200 HARWELL AVE,...,432,Belair-Edison,39,-77,"(39.3219,-76.5753)",18A00083,361709,,1,2018
361709,1e+06,600937,2e+07,58,M,B,2018-01-01 05:30:00+00:00,2300 MARYLAND AVE,5DBURG. OTH. (FORCE),2300 MARYLAND AVE,...,514,Old Goucher,39,-77,"(39.3161,-76.618)",18E00079,361710,,1,2018


In [45]:
# Define latitutde and longitude and remove the nulls
latitudes = data_2018['latitude'].dropna()  
longitudes = data_2018['longitude'].dropna()  

# Create a map
map_obj = folium.Map(location=[latitudes.mean(), longitudes.mean()], zoom_start=5)

# Combine latitudes and longitudes into list
points = list(zip(latitudes, longitudes))

# Display the map
HeatMap(points, radius=6, blur=4).add_to(map_obj)
map_obj


In [9]:
# Isolating the 2019 data for mapping
data_2019 = narrowed_data[narrowed_data['arrest_year'] == 2019]


Unnamed: 0,X,Y,arrestnumber,age,gender,race,arrestdatetime,arrestlocation,incidentoffence,incidentlocation,...,post,neighborhood,latitude,longitude,geolocation,incidentnumber,objectid,shape,arrest_month,arrest_year
74,1e+06,596833,2e+07,35,M,B,2019-12-31 23:56:00+00:00,3300 BRIGHTON ST,4ECOMMON ASSAULT,3300 BRIGHTON ST,...,813,Rosemont,39,-77,"(39.305,-76.6731)",20A00003,75,,12,2019
75,,,2e+07,55,M,B,2019-12-31 23:27:00+00:00,,Unknown Offense,,...,,,,,"(,)",,76,,12,2019
76,3e+07,-948896,2e+07,48,M,B,2019-12-31 22:21:00+00:00,0 HOWARD ST,81RECOVERED PROPERTY,0 HOWARD ST,...,,,0,0,"(0,0)",19L09538,77,,12,2019
77,1e+06,593194,2e+07,17,M,B,2019-12-31 21:30:00+00:00,2900 FAYETTE ST,54ARMED PERSON,2900 FAYETTE ST,...,224,Ellwood Park/Monument,39,-77,"(39.2947,-76.5763)",19L09488,78,,12,2019
78,1e+06,593194,2e+07,16,M,B,2019-12-31 21:30:00+00:00,2900 FAYETTE ST,54ARMED PERSON,2900 FAYETTE ST,...,224,Ellwood Park/Monument,39,-77,"(39.2947,-76.5763)",19L09488,79,,12,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361677,1e+06,593520,2e+07,42,M,U,2019-01-01 00:20:00+00:00,200 KENWOOD ST,4ECOMMON ASSAULT,200 KENWOOD ST,...,221,Mcelderry Park,39,-77,"(39.2956,-76.578)",19A00022,361678,,1,2019
361678,1e+06,596939,2e+07,44,M,B,2019-01-01 00:11:00+00:00,1200 LUZERNE ST,54ARMED PERSON,1200 LUZERNE ST,...,332,Berea,39,-77,"(39.305,-76.5815)",19A00005,361679,,1,2019
361679,3e+07,-948896,2e+07,31,F,B,2019-01-01 00:10:00+00:00,400 KENWOOD ST,Unknown Offense,400 KENWOOD ST,...,,,0,0,"(0,0)",19A00011,361680,,1,2019
361680,,,2e+07,49,M,B,2019-01-01 00:08:00+00:00,,54ARMED PERSON,,...,,,,,"(,)",19A00012,361681,,1,2019


In [43]:
# Define latitutde and longitude and remove the nulls
latitudes = data_2019['latitude'].dropna()  
longitudes = data_2019['longitude'].dropna()  

# Create a map
map_obj = folium.Map(location=[latitudes.mean(), longitudes.mean()], zoom_start=5)

# Combine latitudes and longitudes ito list
points = list(zip(latitudes, longitudes))

# Display the map
HeatMap(points, radius=6, blur=4).add_to(map_obj)
map_obj


In [10]:
# Isolating the 2020 data for mapping
data_2020 = narrowed_data[narrowed_data['arrest_year'] == 2020]

Unnamed: 0,X,Y,arrestnumber,age,gender,race,arrestdatetime,arrestlocation,incidentoffence,incidentlocation,...,post,neighborhood,latitude,longitude,geolocation,incidentnumber,objectid,shape,arrest_month,arrest_year
42,1e+06,597715,2e+07,27,M,B,2020-12-31 23:50:00+00:00,2900 PRESBURY ST,96BINVESTIGATIVE STOP,2900 PRESBURY ST,...,813,Northwest Community Action,39,-77,"(39.3074,-76.6661)",20A08559,43,,12,2020
43,1e+06,594711,2e+07,45,F,B,2020-12-31 23:45:00+00:00,900 MONROE ST,5FBURG. OTH. (NOFORCE),900 MONROE ST,...,722,Midtown-Edmondson,39,-77,"(39.2991,-76.6477)",21L07949,44,,12,2020
44,1e+06,583937,2e+07,42,F,W,2020-12-31 23:40:00+00:00,1000 PINE HEIGHTS AVE,Unknown Offense,1000 PINE HEIGHTS AVE,...,832,Violetville,39,-77,"(39.2696,-76.6755)",,45,,12,2020
45,1e+06,576818,2e+07,26,M,B,2020-12-31 21:45:00+00:00,700 CHERRY HILL RD,Unknown Offense,700 CHERRY HILL RD,...,922,Cherry Hill,39,-77,"(39.2499,-76.6241)",20L08536,46,,12,2020
46,1e+06,576818,2e+07,19,M,B,2020-12-31 21:45:00+00:00,700 CHERRY HILL RD,Unknown Offense,700 CHERRY HILL RD,...,922,Cherry Hill,39,-77,"(39.2499,-76.6241)",20L08536,47,,12,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361622,1e+06,605459,,31,M,B,2020-01-01 02:15:00+00:00,300 33RD ST,Unknown Offense,300 33RD ST,...,512,Oakenshawe,39,-77,"(39.3285,-76.6134)",,361623,,1,2020
361623,,,2e+07,51,M,B,2020-01-01 01:00:00+00:00,,Unknown Offense,,...,,,,,"(,)",,361624,,1,2020
361624,,,2e+07,42,M,B,2020-01-01 00:43:00+00:00,,Unknown Offense,,...,,,,,"(,)",,361625,,1,2020
361625,1e+06,587893,2e+07,27,M,B,2020-01-01 00:30:00+00:00,2400 CHRISTIAN ST,4CAGG. ASSLT.- OTH.,2400 CHRISTIAN ST,...,834,Millhill,39,-77,"(39.2804,-76.6539)",20A00013,361626,,1,2020


In [42]:
# Define latitutde and longitude and remove the nulls
latitudes = data_2020['latitude'].dropna()  
longitudes = data_2020['longitude'].dropna()  

# Create a map
map_obj = folium.Map(location=[latitudes.mean(), longitudes.mean()], zoom_start=5)

# Combine latitudes and longitudes ito list
points = list(zip(latitudes, longitudes))

# Display the map
HeatMap(points, radius=6, blur=4).add_to(map_obj)
map_obj


In [11]:
# Isolating the 2021 data for mapping
data_2021 = narrowed_data[narrowed_data['arrest_year'] == 2021]

Unnamed: 0,X,Y,arrestnumber,age,gender,race,arrestdatetime,arrestlocation,incidentoffence,incidentlocation,...,post,neighborhood,latitude,longitude,geolocation,incidentnumber,objectid,shape,arrest_month,arrest_year
20,1e+06,592946,2e+07,18,M,B,2021-12-31 23:14:00+00:00,2800 EDMONDSON AVE,Unknown Offense,2800 EDMONDSON AVE,...,721,Penrose/Fayette Street Outreach,39,-77,"(39.2943,-76.6634)",,21,,12,2021
21,1e+06,617096,2e+07,26,M,B,2021-12-31 22:09:00+00:00,5700 NORTHWOOD DR,Unknown Offense,5700 NORTHWOOD DR,...,523,Chinquapin Park,39,-77,"(39.3604,-76.5989)",,22,,12,2021
22,,,2e+07,24,M,B,2021-12-31 21:00:00+00:00,,Unknown Offense,,...,,,,,"(,)",,23,,12,2021
23,,,2e+07,28,M,B,2021-12-31 18:21:00+00:00,,Unknown Offense,,...,,,,,"(,)",,24,,12,2021
24,,,2e+07,28,M,B,2021-12-31 18:21:00+00:00,,Unknown Offense,,...,,,,,"(,)",,25,,12,2021
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361590,1e+06,597715,2e+07,22,F,B,2021-01-01 02:13:00+00:00,2900 PRESBURY ST,Unknown Offense,2900 PRESBURY ST,...,813,Northwest Community Action,39,-77,"(39.3074,-76.6661)",,361591,,1,2021
361591,1e+06,589289,2e+07,31,M,W,2021-01-01 02:12:00+00:00,2400 FOSTER AVE,4ECOMMON ASSAULT,2400 FOSTER AVE,...,214,Canton,39,-77,"(39.284,-76.5827)",21A00040,361592,,1,2021
361592,,,2e+07,27,M,B,2021-01-01 01:31:00+00:00,,Unknown Offense,,...,,,,,"(,)",,361593,,1,2021
361593,1e+06,600082,2e+07,51,M,B,2021-01-01 00:50:00+00:00,800 NEWINGTON AVE,49FAMILY DISTURBANCE,800 NEWINGTON AVE,...,133,Reservoir Hill,39,-77,"(39.3138,-76.6325)",21A00015,361594,,1,2021


In [41]:
# Define latitutde and longitude and remove the nulls
latitudes = data_2021['latitude'].dropna()  
longitudes = data_2021['longitude'].dropna()  

# Create a map
map_obj = folium.Map(location=[latitudes.mean(), longitudes.mean()], zoom_start=5)

# Combine latitudes and longitudes ito list
points = list(zip(latitudes, longitudes))

# Display the map
HeatMap(points, radius=6, blur=4).add_to(map_obj)
map_obj


In [12]:
# Isolating the 2022 data for mapping
data_2022 = narrowed_data[narrowed_data['arrest_year'] == 2022]
data_2022

Unnamed: 0,X,Y,arrestnumber,age,gender,race,arrestdatetime,arrestlocation,incidentoffence,incidentlocation,...,post,neighborhood,latitude,longitude,geolocation,incidentnumber,objectid,shape,arrest_month,arrest_year
18,,,2e+07,32,F,W,2022-12-31 03:53:00+00:00,,Unknown Offense,,...,,,,,"(,)",,19,,12,2022
19,1e+06,594506,2e+07,19,M,B,2022-12-31 00:09:00+00:00,600 CURLY ST,Unknown Offense,600 CURLY ST,...,224,Ellwood Park/Monument,39,-77,"(39.2983,-76.5761)",22L07944,20,,12,2022
682,1e+06,595270,2e+07,24,F,B,2022-12-30 23:25:00+00:00,800 ASHLAND CT,Unknown Offense,800 ASHLAND CT,...,324,Oldtown,39,-77,"(39.3005,-76.6053)",22L09103,683,,12,2022
683,,,2e+07,70,M,B,2022-12-30 23:09:00+00:00,,Unknown Offense,,...,,,,,"(,)",,684,,12,2022
684,,,2e+07,18,M,B,2022-12-30 23:00:00+00:00,,Unknown Offense,,...,,,,,"(,)",,685,,12,2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361561,1e+06,602440,2e+07,41,M,A,2022-01-01 06:15:00+00:00,400 LORRAINE ST,4ECOMMON ASSAULT,400 LORRAINE ST,...,513,Harwood,39,-77,"(39.3202,-76.6097)",22A00098,361562,,1,2022
361562,,,2e+07,53,F,B,2022-01-01 05:25:00+00:00,,Unknown Offense,,...,,,,,"(,)",,361563,,1,2022
361563,1e+06,612483,2e+07,39,M,B,2022-01-01 04:36:00+00:00,5300 NELSON AVE,4BAGG. ASSLT.- CUT,5300 NELSON AVE,...,633,Arlington,39,-77,"(39.348,-76.6855)",22A00045,361564,,1,2022
361564,1e+06,595461,2e+07,21,M,B,2022-01-01 01:18:00+00:00,1200 ETTING ST,Unknown Offense,1200 ETTING ST,...,123,Upton,39,-77,"(39.3011,-76.6287)",22A00025,361565,,1,2022


In [46]:
# Define latitutde and longitude and remove the nulls
latitudes = data_2022['latitude'].dropna()  
longitudes = data_2022['longitude'].dropna()  

# Create a map
map_obj = folium.Map(location=[latitudes.mean(), longitudes.mean()], zoom_start=5)

# Combine latitudes and longitudes ito list
points = list(zip(latitudes, longitudes))

# Display the map
HeatMap(points, radius=6, blur=4).add_to(map_obj)
map_obj

