In [2]:
# Dependencies and Setup
import numpy as np
import pandas as pd
import pytz

import os 
from scipy import stats
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import re
import hvplot
from scipy.stats import linregress

import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
import warnings
warnings.filterwarnings("ignore")


In [3]:
# Load the CSV file located on BPD website into a Pandas DataFrame
arrest_data_df = pd.read_csv("Data/BPD_Arrests.csv")
# Display data
arrest_data_df.head()

Unnamed: 0,X,Y,arrestnumber,age,gender,race,arrestdatetime,arrestlocation,incidentoffence,incidentlocation,...,chargedescription,district,post,neighborhood,latitude,longitude,geolocation,incidentnumber,objectid,shape
0,1401347.0,608148.870493,23000037.0,39.0,M,B,2022/12/31 23:50:00+00,4000 OAKFORD ST,Unknown Offense,4000 OAKFORD ST,...,HGV,Northwest,621.0,West Arlington,39.3361,-76.6853,"(39.3361,-76.6853)",22L09338,1,
1,1401347.0,608148.870493,23000039.0,50.0,M,B,2022/12/31 23:50:00+00,4000 OAKFORD ST,Unknown Offense,4000 OAKFORD ST,...,HGV,Northwest,621.0,West Arlington,39.3361,-76.6853,"(39.3361,-76.6853)",22L09338,2,
2,,,23000010.0,27.0,M,B,2022/12/31 23:40:00+00,,Unknown Offense,,...,FAILURE TO APPEAR,,,,,,"(,)",,3,
3,1417636.0,595206.835862,23000050.0,42.0,M,B,2022/12/31 23:15:00+00,500 DOLPHIN ST,Unknown Offense,500 DOLPHIN ST,...,HGV,Central,123.0,Upton,39.3004,-76.6279,"(39.3004,-76.6279)",22L09343,4,
4,,,22157188.0,31.0,M,B,2022/12/31 21:00:00+00,,Unknown Offense,,...,FAILURE TO APPEAR,,,,,,"(,)",,5,


In [4]:
# Define the desired time frame
start_date = pd.Timestamp("2018-01-01", tz="US/Eastern")
end_date = pd.Timestamp("2022-12-31", tz="US/Eastern")

# Convert date values to EST timezone
arrest_data_df["arrestdatetime"] = pd.to_datetime(arrest_data_df["arrestdatetime"], utc=False)

# Filter the data based on the time frame
sample_arrests = (arrest_data_df["arrestdatetime"] >= start_date) & (arrest_data_df["arrestdatetime"] <= end_date)

# Apply the filter to get the narrowed down data
narrowed_data = arrest_data_df[sample_arrests]
narrowed_data

Unnamed: 0,X,Y,arrestnumber,age,gender,race,arrestdatetime,arrestlocation,incidentoffence,incidentlocation,...,chargedescription,district,post,neighborhood,latitude,longitude,geolocation,incidentnumber,objectid,shape
18,,,22157121.0,32.0,F,W,2022-12-31 03:53:00+00:00,,Unknown Offense,,...,FAILURE TO APPEAR,,,,,,"(,)",,19,
19,1.432299e+06,594505.863803,22157116.0,19.0,M,B,2022-12-31 00:09:00+00:00,600 CURLY ST,Unknown Offense,600 CURLY ST,...,MOTOR VEHICLE THEFT,Southeast,224,Ellwood Park/Monument,39.2983,-76.5761,"(39.2983,-76.5761)",22L07944,20,
20,1.407598e+06,592946.031158,22000022.0,18.0,M,B,2021-12-31 23:14:00+00:00,2800 EDMONDSON AVE,Unknown Offense,2800 EDMONDSON AVE,...,HGV,Western,721,Penrose/Fayette Street Outreach,39.2943,-76.6634,"(39.2943,-76.6634)",,21,
21,1.425747e+06,617095.607199,22000010.0,26.0,M,B,2021-12-31 22:09:00+00:00,5700 NORTHWOOD DR,Unknown Offense,5700 NORTHWOOD DR,...,HGV,Northern,523,Chinquapin Park,39.3604,-76.5989,"(39.3604,-76.5989)",,22,
22,,,21168317.0,24.0,M,B,2021-12-31 21:00:00+00:00,,Unknown Offense,,...,RAPE SECOND DEGREE,,,,,,"(,)",,23,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361706,1.427588e+06,603736.077990,18000078.0,23.0,F,B,2018-01-01 10:00:00+00:00,1600 E. 28TH ST,4ECOMMON ASSAULT,1600 E. 28TH ST,...,ASSAULT,Northeast,411,Coldstream Homestead Montebello,39.3237,-76.5926,"(39.3237,-76.5926)",18A00107,361707,
361707,,,18000050.0,29.0,F,W,2018-01-01 09:45:00+00:00,,Unknown Offense,,...,CDS:POSSESS-NOT MARIJUANA,,,,,,"(,)",,361708,
361708,1.432485e+06,603102.765099,18000045.0,46.0,M,B,2018-01-01 08:00:00+00:00,3200 HARWELL AVE,79OTHER,3200 HARWELL AVE,...,AGG. ASSAULT,Northeast,432,Belair-Edison,39.3219,-76.5753,"(39.3219,-76.5753)",18A00083,361709,
361709,1.420414e+06,600936.840992,18000038.0,58.0,M,B,2018-01-01 05:30:00+00:00,2300 MARYLAND AVE,5DBURG. OTH. (FORCE),2300 MARYLAND AVE,...,COMMON ASSAULT,Northern,514,Old Goucher,39.3161,-76.6180,"(39.3161,-76.618)",18E00079,361710,


In [5]:
####Geographical Analysis

In [6]:
# Extract month and year from the `arrestdatetime` column
narrowed_data["arrest_month"] = narrowed_data["arrestdatetime"].dt.month
narrowed_data["arrest_year"] = narrowed_data["arrestdatetime"].dt.year
pd.set_option('display.precision', 0)
narrowed_data

Unnamed: 0,X,Y,arrestnumber,age,gender,race,arrestdatetime,arrestlocation,incidentoffence,incidentlocation,...,post,neighborhood,latitude,longitude,geolocation,incidentnumber,objectid,shape,arrest_month,arrest_year
18,,,2e+07,32,F,W,2022-12-31 03:53:00+00:00,,Unknown Offense,,...,,,,,"(,)",,19,,12,2022
19,1e+06,594506,2e+07,19,M,B,2022-12-31 00:09:00+00:00,600 CURLY ST,Unknown Offense,600 CURLY ST,...,224,Ellwood Park/Monument,39,-77,"(39.2983,-76.5761)",22L07944,20,,12,2022
20,1e+06,592946,2e+07,18,M,B,2021-12-31 23:14:00+00:00,2800 EDMONDSON AVE,Unknown Offense,2800 EDMONDSON AVE,...,721,Penrose/Fayette Street Outreach,39,-77,"(39.2943,-76.6634)",,21,,12,2021
21,1e+06,617096,2e+07,26,M,B,2021-12-31 22:09:00+00:00,5700 NORTHWOOD DR,Unknown Offense,5700 NORTHWOOD DR,...,523,Chinquapin Park,39,-77,"(39.3604,-76.5989)",,22,,12,2021
22,,,2e+07,24,M,B,2021-12-31 21:00:00+00:00,,Unknown Offense,,...,,,,,"(,)",,23,,12,2021
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361706,1e+06,603736,2e+07,23,F,B,2018-01-01 10:00:00+00:00,1600 E. 28TH ST,4ECOMMON ASSAULT,1600 E. 28TH ST,...,411,Coldstream Homestead Montebello,39,-77,"(39.3237,-76.5926)",18A00107,361707,,1,2018
361707,,,2e+07,29,F,W,2018-01-01 09:45:00+00:00,,Unknown Offense,,...,,,,,"(,)",,361708,,1,2018
361708,1e+06,603103,2e+07,46,M,B,2018-01-01 08:00:00+00:00,3200 HARWELL AVE,79OTHER,3200 HARWELL AVE,...,432,Belair-Edison,39,-77,"(39.3219,-76.5753)",18A00083,361709,,1,2018
361709,1e+06,600937,2e+07,58,M,B,2018-01-01 05:30:00+00:00,2300 MARYLAND AVE,5DBURG. OTH. (FORCE),2300 MARYLAND AVE,...,514,Old Goucher,39,-77,"(39.3161,-76.618)",18E00079,361710,,1,2018


In [7]:
#data_2018 = narrowed_data.loc[narrowed_data['arrest_year'].isin(['2018'])] 
data_2018 = narrowed_data[narrowed_data['arrest_year'] == '2018']
#Handgun_arrests=filtered_data.loc[filtered_data['chargedescription'].isin(['Handgun Charges'])]
data_2018

Unnamed: 0,X,Y,arrestnumber,age,gender,race,arrestdatetime,arrestlocation,incidentoffence,incidentlocation,...,post,neighborhood,latitude,longitude,geolocation,incidentnumber,objectid,shape,arrest_month,arrest_year
