In [1]:
import pandas as pd
import numpy as np

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

**Cleaning**

In [2]:
cad = pd.read_csv("data/call_data_from_CAD.csv")

In [3]:
cad.head()

Unnamed: 0.1,Unnamed: 0,IncidentNumber,Call_Created_Time,Call_First_Dispatched_Time,Call_First_On_Scene,Call_Cleared,Call_Zipcode,Beat,Call_Source,Call_Priority,InitialIncidentTypeDescription,IsPrimary,PrimaryUnitCallSign,RespondingUnitCallSign,Unit_Dispatched_Time,Unit_OnScene_Time,Unit_Cleared_Time,Disposition
0,1,OR-2016-01-01-16000001,01/01/2016 00:00:04,01/01/2016 00:04:58,01/01/2016 00:09:41,01/01/2016 00:54:19,97402.0,EP05,E911,3,ASSAULT,1,5E57,5E57,01/01/2016 00:04:58,01/01/2016 00:09:56,01/01/2016 00:54:19,ADVISED
1,2,OR-2016-01-01-16000001,01/01/2016 00:00:04,01/01/2016 00:04:58,01/01/2016 00:09:41,01/01/2016 00:54:19,97402.0,EP05,E911,3,ASSAULT,0,5E57,4X40,01/01/2016 00:09:41,01/01/2016 00:09:41,01/01/2016 00:46:59,ADVISED
2,3,OR-2016-01-01-16000001,01/01/2016 00:00:04,01/01/2016 00:04:58,01/01/2016 00:09:41,01/01/2016 00:54:19,97402.0,EP05,E911,3,ASSAULT,0,5E57,4E53,01/01/2016 00:04:58,01/01/2016 00:12:26,01/01/2016 00:51:58,ADVISED
3,4,OR-2016-01-01-16000003,01/01/2016 00:00:24,01/01/2016 00:00:25,01/01/2016 00:00:25,01/01/2016 00:02:41,97401.0,EP02,SELF,6,TRAFFIC STOP,1,5T81,5T81,01/01/2016 00:00:25,01/01/2016 00:00:25,01/01/2016 00:02:41,ADVISED
4,5,OR-2016-01-01-16000004,01/01/2016 00:02:45,01/01/2016 00:04:05,01/01/2016 00:04:05,01/01/2016 00:18:22,97401.0,EP02,E911,3,CHECK WELFARE,0,3X90,3F61,01/01/2016 00:04:12,,01/01/2016 00:08:13,ASSISTED


In [4]:
#drop unneeded columns and NA values
cad_clean = cad.drop(columns=["Call_First_Dispatched_Time", "Call_First_On_Scene", "Call_Cleared", "Call_Source",
                             "Call_Priority", "IsPrimary", "Unit_Dispatched_Time", "Unit_OnScene_Time", 
                              "Unit_Cleared_Time", "Disposition", "Unnamed: 0", "Beat", "IncidentNumber", 
                              "InitialIncidentTypeDescription"]).dropna()

In [5]:
#convert zipcodes to integers
cad_clean["Call_Zipcode"] = cad_clean["Call_Zipcode"].astype(int)

In [6]:
#renaming columns
cad_clean = cad_clean.rename(columns={"Call_Zipcode": "Zipcode", "PrimaryUnitCallSign" : "Called",
                                      "RespondingUnitCallSign": "Responded"})

In [7]:
#Eugene zipcodes
zips = [97401, 97402, 97403, 97404, 97405, 97408, 97440]

In [8]:
#only keep data in Eugene zips
cad_clean = cad_clean[cad_clean["Zipcode"].isin(zips)]

In [9]:
#call signs
ch = ["1J77", "3J78", "4J79", "CAHO", "CAHOT"]
ch_r = r"\w*J\w*"

In [10]:
#convert codes
cad_clean.loc[cad_clean["Called"].isin(ch), "Called"] = "CAHOOTS"
cad_clean.loc[cad_clean["Responded"].isin(ch), "Responded"] = "CAHOOTS"

cad_clean["Called"] = cad_clean["Called"].str.replace(ch_r, "CAHOOTS", regex=True)
cad_clean["Responded"] = cad_clean["Responded"].str.replace(ch_r, "CAHOOTS", regex=True)

In [11]:
cad_clean.head()

Unnamed: 0,Call_Created_Time,Zipcode,Called,Responded
0,01/01/2016 00:00:04,97402,5E57,5E57
1,01/01/2016 00:00:04,97402,5E57,4X40
2,01/01/2016 00:00:04,97402,5E57,4E53
3,01/01/2016 00:00:24,97401,5T81,5T81
4,01/01/2016 00:02:45,97401,3X90,3F61


In [12]:
#clean up date column
cad_clean["Date"]= pd.to_datetime(cad_clean["Call_Created_Time"]).dt.floor("d")
cad_clean = cad_clean.drop(columns=["Call_Created_Time"])

In [13]:
cad_clean.head()

Unnamed: 0,Zipcode,Called,Responded,Date
0,97402,5E57,5E57,2016-01-01
1,97402,5E57,4X40,2016-01-01
2,97402,5E57,4E53,2016-01-01
3,97401,5T81,5T81,2016-01-01
4,97401,3X90,3F61,2016-01-01


In [14]:
# save data frame
file_path = 'data/cad_clean.csv'
cad_clean.to_csv(file_path, index=False)