In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from scipy.stats import norm

**Cleaning**

In [2]:
cad = pd.read_csv("call_data_from_CAD.csv")

In [3]:
cad.head()

Unnamed: 0.1,Unnamed: 0,IncidentNumber,Call_Created_Time,Call_First_Dispatched_Time,Call_First_On_Scene,Call_Cleared,Call_Zipcode,Beat,Call_Source,Call_Priority,InitialIncidentTypeDescription,IsPrimary,PrimaryUnitCallSign,RespondingUnitCallSign,Unit_Dispatched_Time,Unit_OnScene_Time,Unit_Cleared_Time,Disposition
0,1,OR-2016-01-01-16000001,01/01/2016 00:00:04,01/01/2016 00:04:58,01/01/2016 00:09:41,01/01/2016 00:54:19,97402.0,EP05,E911,3,ASSAULT,1,5E57,5E57,01/01/2016 00:04:58,01/01/2016 00:09:56,01/01/2016 00:54:19,ADVISED
1,2,OR-2016-01-01-16000001,01/01/2016 00:00:04,01/01/2016 00:04:58,01/01/2016 00:09:41,01/01/2016 00:54:19,97402.0,EP05,E911,3,ASSAULT,0,5E57,4X40,01/01/2016 00:09:41,01/01/2016 00:09:41,01/01/2016 00:46:59,ADVISED
2,3,OR-2016-01-01-16000001,01/01/2016 00:00:04,01/01/2016 00:04:58,01/01/2016 00:09:41,01/01/2016 00:54:19,97402.0,EP05,E911,3,ASSAULT,0,5E57,4E53,01/01/2016 00:04:58,01/01/2016 00:12:26,01/01/2016 00:51:58,ADVISED
3,4,OR-2016-01-01-16000003,01/01/2016 00:00:24,01/01/2016 00:00:25,01/01/2016 00:00:25,01/01/2016 00:02:41,97401.0,EP02,SELF,6,TRAFFIC STOP,1,5T81,5T81,01/01/2016 00:00:25,01/01/2016 00:00:25,01/01/2016 00:02:41,ADVISED
4,5,OR-2016-01-01-16000004,01/01/2016 00:02:45,01/01/2016 00:04:05,01/01/2016 00:04:05,01/01/2016 00:18:22,97401.0,EP02,E911,3,CHECK WELFARE,0,3X90,3F61,01/01/2016 00:04:12,,01/01/2016 00:08:13,ASSISTED


In [4]:
#drop unneeded columns and NA values
cad_clean = cad.drop(columns=["Call_First_Dispatched_Time", "Call_First_On_Scene", "Call_Cleared", "Call_Source",
                             "Call_Priority", "IsPrimary", "Unit_Dispatched_Time", "Unit_OnScene_Time", 
                              "Unit_Cleared_Time", "Disposition", "Unnamed: 0", "Beat", "IncidentNumber", 
                              "InitialIncidentTypeDescription"]).dropna()

In [5]:
#convert zipcodes to integers
cad_clean["Call_Zipcode"] = cad_clean["Call_Zipcode"].astype(int)

In [6]:
#renaming columns
cad_clean = cad_clean.rename(columns={"Call_Zipcode": "Zipcode", "PrimaryUnitCallSign" : "Called",
                                      "RespondingUnitCallSign": "Responded"})

In [7]:
#Eugene zipcodes
zips = [97401, 97402, 97403, 97404, 97405, 97408, 97440]

In [8]:
#only keep data in Eugene zips
cad_clean = cad_clean[cad_clean["Zipcode"].isin(zips)]

In [9]:
#call signs
ch = ["1J77", "3J78", "4J79", "CAHO", "CAHOT"]
ch_r = r"\w*J\w*"

In [10]:
#convert codes
cad_clean.loc[cad_clean["Called"].isin(ch), "Called"] = "CAHOOTS"
cad_clean.loc[cad_clean["Responded"].isin(ch), "Responded"] = "CAHOOTS"

cad_clean["Called"] = cad_clean["Called"].str.replace(ch_r, "CAHOOTS", regex=True)
cad_clean["Responded"] = cad_clean["Responded"].str.replace(ch_r, "CAHOOTS", regex=True)

In [11]:
cad_clean.head()

Unnamed: 0,Call_Created_Time,Zipcode,Called,Responded
0,01/01/2016 00:00:04,97402,5E57,5E57
1,01/01/2016 00:00:04,97402,5E57,4X40
2,01/01/2016 00:00:04,97402,5E57,4E53
3,01/01/2016 00:00:24,97401,5T81,5T81
4,01/01/2016 00:02:45,97401,3X90,3F61


In [12]:
#clean up date column
cad_clean[["Date", "Time"]] = cad_clean["Call_Created_Time"].str.split(" ",expand=True)
cad_clean = cad_clean.drop(columns=["Call_Created_Time", "Time"])
cad_clean['Date']= pd.to_datetime(cad_clean['Date'])

In [13]:
cad_clean.head()

Unnamed: 0,Zipcode,Called,Responded,Date
0,97402,5E57,5E57,2016-01-01
1,97402,5E57,4X40,2016-01-01
2,97402,5E57,4E53,2016-01-01
3,97401,5T81,5T81,2016-01-01
4,97401,3X90,3F61,2016-01-01


In [14]:
#update dates
service = pd.to_datetime("2017-01-01")

In [15]:
#year before and after van addition
before = cad_clean[(cad_clean["Date"] > (service - pd.DateOffset(years=1))) & (cad_clean["Date"] < service)]
after = cad_clean[(cad_clean["Date"] >= service) & (cad_clean["Date"] <= service + pd.DateOffset(years=1))]
before_size = before.shape[0]
after_size = after.shape[0]

In [16]:
#CH calls before additions
ch_before = before[(before["Called"] == "CAHOOTS") & (before["Responded"] == "CAHOOTS")]
calls_before = ch_before.shape[0]

In [17]:
#diverted to CH before additions
divert_before = before[(before["Called"] != "CAHOOTS") & (before["Responded"] == "CAHOOTS")]
divert_before_num = divert_before.shape[0]

In [18]:
#CH calls after additions
ch_after = after[(after["Called"] == "CAHOOTS") & (after["Responded"] == "CAHOOTS")]
calls_after = ch_after.shape[0]

In [19]:
#diverted to CH after additions
divert_after = after[(after["Called"] != "CAHOOTS") & (after["Responded"] == "CAHOOTS")]
divert_after_num = divert_after.shape[0]

**Analysis**

In [20]:
#calculate z-score

def z_score(c1, c2, n1, n2):
    p1 = c1 / n1
    p2 = c2 / n2
    p = (p1 * n1 + p2 * n2) / (n1 + n2)
    z = (p1 - p2) / np.sqrt(p * (1 - p) * (1 / n1 + 1 / n2)) 
    p_value = 2 * (norm.cdf(-1 * abs(z)))
    return z, p_value

chcalls_z = z_score(calls_before, calls_after, before_size, after_size)
divert_z = z_score(divert_before_num, divert_after_num, before_size, after_size)

chcalls_z, divert_z

((-21.665763042901837, 4.317004685227364e-104),
 (-3.1576848418874293, 0.0015902738162257967))

In [21]:
#seperating into different years

ch_calls = cad_clean[(cad_clean["Responded"] == "CAHOOTS")]

#2016
calls_16 = cad_clean[cad_clean["Date"].dt.year == 2016]
ch_16 = ch_calls[ch_calls["Date"].dt.year == 2016]
prop_16 = ch_16.shape[0] / calls_16.shape[0]

#2017
calls_17 = cad_clean[cad_clean["Date"].dt.year == 2017]
ch_17 = ch_calls[ch_calls["Date"].dt.year == 2017]
prop_17 = ch_17.shape[0] / calls_17.shape[0]

#2018
calls_18 = cad_clean[cad_clean["Date"].dt.year == 2018]
ch_18 = ch_calls[ch_calls["Date"].dt.year == 2018]
prop_18 = ch_18.shape[0] / calls_18.shape[0]

#2019
calls_19 = cad_clean[cad_clean["Date"].dt.year == 2019]
ch_19 = ch_calls[ch_calls["Date"].dt.year == 2019]
prop_19 = ch_19.shape[0] / calls_19.shape[0]

#2020
calls_20 = cad_clean[cad_clean["Date"].dt.year == 2020]
ch_20 = ch_calls[ch_calls["Date"].dt.year == 2020]
prop_20 = ch_20.shape[0] / calls_20.shape[0]

#2021
calls_21 = cad_clean[cad_clean["Date"].dt.year == 2021]
ch_21 = ch_calls[ch_calls["Date"].dt.year == 2021]
prop_21 = ch_21.shape[0] / calls_21.shape[0]

calls_over_time = pd.DataFrame({"Year" : [2016, 2017, 2018, 2019, 2020, 2021], "Call Proportion" : 
                               [prop_16, prop_17, prop_18, prop_19, prop_20, prop_21]})

calls_over_time

Unnamed: 0,Year,Call Proportion
0,2016,0.082797
1,2017,0.104751
2,2018,0.112356
3,2019,0.113502
4,2020,0.107523
5,2021,0.110617


In [22]:
#linear regression

X = calls_over_time[["Year"]]
y = calls_over_time["Call Proportion"]

model = LinearRegression()
model.fit(X, y)

In [23]:
#predictions

future_years = [2025, 2026, 2027, 2028, 2029, 2030, 2031, 2032, 2033, 2034, 2035]

year_predictions = pd.DataFrame({'Year': future_years})
props_predictions = model.predict(year_predictions)
year_predictions["Proportion Predictions"] = props_predictions

year_predictions

Unnamed: 0,Year,Proportion Predictions
0,2025,0.132848
1,2026,0.137092
2,2027,0.141337
3,2028,0.145581
4,2029,0.149826
5,2030,0.15407
6,2031,0.158315
7,2032,0.16256
8,2033,0.166804
9,2034,0.171049
