# Exploratory Data Analysis of Crime/Arrest Data

Source:
https://data.cityofnewyork.us/Public-Safety/NYC-crime/qb7u-rbmr/data (crime)
https://data.cityofnewyork.us/Public-Safety/NYPD-Arrest-Data-Year-to-Date-/uip8-fykc/about_data (arrest)

EDA Rundown
- Filter out what dataset to use
- Filter out uncessary columns
- Address invalid/missing values
- Manipulate values
- Analyze trends between certain features
- Visualize these trends

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
crime_unclean = pd.read_csv("Datasets/CrimeData/NYPD_Complaint_Data_Current__Year_To_Date_.csv")
print(crime_unclean.columns, crime_unclean.shape)

Index(['CMPLNT_NUM', 'ADDR_PCT_CD', 'BORO_NM', 'CMPLNT_FR_DT', 'CMPLNT_FR_TM',
       'CMPLNT_TO_DT', 'CMPLNT_TO_TM', 'CRM_ATPT_CPTD_CD', 'HADEVELOPT',
       'HOUSING_PSA', 'JURISDICTION_CODE', 'JURIS_DESC', 'KY_CD', 'LAW_CAT_CD',
       'LOC_OF_OCCUR_DESC', 'OFNS_DESC', 'PARKS_NM', 'PATROL_BORO', 'PD_CD',
       'PD_DESC', 'PREM_TYP_DESC', 'RPT_DT', 'STATION_NAME', 'SUSP_AGE_GROUP',
       'SUSP_RACE', 'SUSP_SEX', 'TRANSIT_DISTRICT', 'VIC_AGE_GROUP',
       'VIC_RACE', 'VIC_SEX', 'X_COORD_CD', 'Y_COORD_CD', 'Latitude',
       'Longitude', 'Lat_Lon', 'New Georeferenced Column'],
      dtype='object') (433281, 36)


# columns to remove

- haddevelop
- housing
- jurisdiciton
- ky_cd
- parks_nm
- rpt_dt
- station
- transit_district
- patrol_boro
- pd, pd_desc
- transit district


In [3]:
remove_col = ['HADEVELOPT','HOUSING_PSA', 'JURISDICTION_CODE', 'JURIS_DESC', 'KY_CD','PARKS_NM', 'PATROL_BORO','TRANSIT_DISTRICT']
crime_unclean.drop(remove_col, axis=1, inplace=True)
crime_unclean.shape

(433281, 28)

In [4]:
crime_unclean["ADDR_PCT_CD"].unique()

array([ 47,  61, 123, 120, 121, 122,   1,  68,  72,  62,  76,  10,   6,
        66,  60,   5,  84,  13,  14,  18,  78,   9,   7,  20,  63,  75,
        34,  17,  88,  24,  22,  70,  33,  19,  26,  77,  90,  71,  94,
        79, 108,  28,  67,  30,  23, 114,  32, 109,  25,  83,  81,  44,
        40, 103, 113,  73,  46, 104, 100,  52,  69,  50,  45,  42,  48,
        43,  41, 115, 110, 112,  49, 102, 106, 107, 101, 111, 105])

In [5]:
print(crime_unclean.shape)


(433281, 28)


In [6]:
crime_unclean.dtypes

CMPLNT_NUM                   object
ADDR_PCT_CD                   int64
BORO_NM                      object
CMPLNT_FR_DT                 object
CMPLNT_FR_TM                 object
CMPLNT_TO_DT                 object
CMPLNT_TO_TM                 object
CRM_ATPT_CPTD_CD             object
LAW_CAT_CD                   object
LOC_OF_OCCUR_DESC            object
OFNS_DESC                    object
PD_CD                       float64
PD_DESC                      object
PREM_TYP_DESC                object
RPT_DT                       object
STATION_NAME                 object
SUSP_AGE_GROUP               object
SUSP_RACE                    object
SUSP_SEX                     object
VIC_AGE_GROUP                object
VIC_RACE                     object
VIC_SEX                      object
X_COORD_CD                  float64
Y_COORD_CD                  float64
Latitude                    float64
Longitude                   float64
Lat_Lon                      object
New Georeferenced Column    

In [7]:
crime_unclean.isna().sum()
crime_unclean = crime_unclean.dropna()



In [8]:
crime_unclean

Unnamed: 0,CMPLNT_NUM,ADDR_PCT_CD,BORO_NM,CMPLNT_FR_DT,CMPLNT_FR_TM,CMPLNT_TO_DT,CMPLNT_TO_TM,CRM_ATPT_CPTD_CD,LAW_CAT_CD,LOC_OF_OCCUR_DESC,...,SUSP_SEX,VIC_AGE_GROUP,VIC_RACE,VIC_SEX,X_COORD_CD,Y_COORD_CD,Latitude,Longitude,Lat_Lon,New Georeferenced Column
2,283436472,123,STATEN ISLAND,03/08/2024,21:20:00,03/08/2024,21:21:00,COMPLETED,VIOLATION,FRONT OF,...,F,25-44,WHITE HISPANIC,F,934640.0,143161.0,40.559481,-74.178539,"(40.559481, -74.178539)",POINT (-74.178539 40.559481)
3,284809655,120,STATEN ISLAND,04/04/2024,19:50:00,04/04/2024,20:02:00,COMPLETED,FELONY,INSIDE,...,M,UNKNOWN,UNKNOWN,D,949942.0,170296.0,40.634036,-74.123607,"(40.634036, -74.123607)",POINT (-74.123607 40.634036)
4,288019777,123,STATEN ISLAND,06/05/2024,15:00:00,06/05/2024,15:30:00,COMPLETED,MISDEMEANOR,FRONT OF,...,U,25-44,WHITE,F,924768.0,134938.0,40.536852,-74.213994,"(40.536852, -74.213994)",POINT (-74.213994 40.536852)
5,291485031,123,STATEN ISLAND,08/09/2024,12:00:00,08/10/2024,13:30:00,COMPLETED,FELONY,INSIDE,...,U,25-44,ASIAN / PACIFIC ISLANDER,M,924010.0,137020.0,40.542560,-74.216738,"(40.54256, -74.216738)",POINT (-74.216738 40.54256)
6,281987340,123,STATEN ISLAND,02/10/2024,01:10:00,02/10/2024,01:49:00,COMPLETED,FELONY,FRONT OF,...,M,25-44,WHITE,F,918928.0,129743.0,40.522550,-74.234952,"(40.52255, -74.234952)",POINT (-74.234952 40.52255)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
433276,281094253,105,QUEENS,11/25/2023,12:00:00,11/25/2023,18:20:00,COMPLETED,MISDEMEANOR,(null),...,U,25-44,BLACK,M,1056672.0,194269.0,40.699606,-73.738811,"(40.699606, -73.738811)",POINT (-73.738811 40.699606)
433277,285839553,105,QUEENS,08/26/2023,06:59:00,04/23/2024,20:28:00,COMPLETED,MISDEMEANOR,FRONT OF,...,F,25-44,ASIAN / PACIFIC ISLANDER,F,1055701.0,201413.0,40.719221,-73.742240,"(40.719221, -73.74224)",POINT (-73.74224 40.719221)
433278,291946590,105,QUEENS,08/19/2024,20:42:00,08/19/2024,20:50:00,COMPLETED,MISDEMEANOR,(null),...,M,45-64,BLACK,M,1057637.0,201157.0,40.718502,-73.735259,"(40.718501800070065, -73.73525943158317)",POINT (-73.73525943158317 40.718501800070065)
433279,287632400,105,QUEENS,05/28/2024,16:20:00,05/28/2024,16:25:00,COMPLETED,MISDEMEANOR,(null),...,M,UNKNOWN,WHITE,E,1057566.0,206607.0,40.733461,-73.735456,"(40.733461245596075, -73.73545618456804)",POINT (-73.73545618456804 40.733461245596075)
