In [1]:
library(plyr)
library(tidyverse)
library(infer)
library(janitor)
library(repr)
library(stringr)
library(ggthemes)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.3     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.6     [32m✔[39m [34mdplyr  [39m 1.0.4
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32marrange()[39m   masks [34mplyr[39m::arrange()
[31m✖[39m [34mpurrr[39m::[32mcompact()[39m   masks [34mplyr[39m::compact()
[31m✖[39m [34mdplyr[39m::[32mcount()[39m     masks [34mplyr[39m::count()
[31m✖[39m [34mdplyr[39m::[32mfailwith()[39m  masks [34mplyr[39m::failwith()
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m    masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mid()[39m        masks [34mplyr[39m::id()
[31m✖[39m [34mdplyr

In [5]:
# url <- "https://geodash.vpd.ca/opendata/crimedata_download/crimedata_csv_all_years.zip"
# filename <- "crime_data.zip"
# download.file(url, destfile = filename)

# # Data comes in as a zip, so we'll need to extract it
# unzip("crime_data.zip")

# Read in the desired file
crime_data <- read_csv("crimedata_csv_all_years.csv")



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  TYPE = [31mcol_character()[39m,
  YEAR = [32mcol_double()[39m,
  MONTH = [32mcol_double()[39m,
  DAY = [32mcol_double()[39m,
  HOUR = [32mcol_double()[39m,
  MINUTE = [32mcol_double()[39m,
  HUNDRED_BLOCK = [31mcol_character()[39m,
  NEIGHBOURHOOD = [31mcol_character()[39m,
  X = [32mcol_double()[39m,
  Y = [32mcol_double()[39m
)




In [18]:
theft_crimes <- c("Other Theft", "Theft from Vehicle", 
                  "Theft of Bicycle", "Theft of Vehicle")
crime_data <- crime_data %>% clean_names() %>% mutate(is_theft = type%in% theft_crimes)

In [19]:
pre_years <- c(2017, 2018, 2019)


In [20]:
neighbourhoods <- unique(crime_data$neighbourhood)
neighbourhoods <- tibble(neighbourhood=as.character(neighbourhoods[!is.na(neighbourhoods)]))
columns <- c("neighbourhood", "past_prop", "current_prop", "diff_in_props", "p_value")

In [21]:
crime_data_processed <- crime_data %>%
    select(neighbourhood, year, is_theft) %>%
    filter(year >= 2017 & year < 2021) %>%
    mutate(period=ifelse(year %in% pre_years, 'pre_covid', 'covid')) %>%
    mutate(period=as.factor(period)) %>%
    mutate(neighbourhood=neighbourhood)
head(crime_data_processed)

neighbourhood,year,is_theft,period
<chr>,<dbl>,<lgl>,<fct>
Fairview,2019,False,pre_covid
West End,2019,False,pre_covid
West End,2020,False,covid
West End,2017,False,pre_covid
West End,2018,False,pre_covid
West End,2020,False,covid


In [22]:
results <- data.frame(matrix(ncol=length(columns), nrow=0))
names(results) <- columns

In [23]:
set.seed(12345)
for (i in 1:nrow(neighbourhoods)) {
    n <- neighbourhoods$neighbourhood[i]
    neighbourhood_data <- crime_data_processed %>% filter(neighbourhood==n)
    
    pre_covid <- neighbourhood_data %>%
        filter(period=="pre_covid")
    pre_covid_prop <- mean(as.numeric(pre_covid$is_theft))
    
    covid <- neighbourhood_data %>%
        filter(period=="covid")
    covid_prop <- mean(as.numeric(covid$is_theft))
    
    obs_diff_in_props <- covid_prop - pre_covid_prop
    
    neighbourhood_infer <- neighbourhood_data %>%
        specify(is_theft ~ period, success="TRUE") %>%
        hypothesise(null="independence") %>%
        generate(reps=1000, type="permute") %>%
        calculate(stat="diff in props", order=c("covid", "pre_covid"))
    
    p_value <- neighbourhood_infer %>% get_p_value(obs_diff_in_props, direction="two_sided") %>% pull()
    
    new_obs <- data.frame(n, pre_covid_prop, covid_prop, obs_diff_in_props, p_value)
    names(new_obs) <- columns
    results <- rbind(results, new_obs)
}

“Please be cautious in reporting a p-value of 0. This result is an approximation based on the number of `reps` chosen in the `generate()` step. See `?get_p_value()` for more information.”
“Please be cautious in reporting a p-value of 0. This result is an approximation based on the number of `reps` chosen in the `generate()` step. See `?get_p_value()` for more information.”
“Please be cautious in reporting a p-value of 0. This result is an approximation based on the number of `reps` chosen in the `generate()` step. See `?get_p_value()` for more information.”
“Please be cautious in reporting a p-value of 0. This result is an approximation based on the number of `reps` chosen in the `generate()` step. See `?get_p_value()` for more information.”
“Please be cautious in reporting a p-value of 0. This result is an approximation based on the number of `reps` chosen in the `generate()` step. See `?get_p_value()` for more information.”
“Please be cautious in reporting a p-value of 0. This result

In [24]:
results <- results %>%
    mutate(reject5=p_value < 0.05, reject10=p_value < 0.10)
results

neighbourhood,past_prop,current_prop,diff_in_props,p_value,reject5,reject10
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<lgl>
Oakridge,0.5432873,0.4968354,-0.046451884,0.176,False,False
Fairview,0.6891537,0.619474,-0.069679692,0.0,True,True
West End,0.7351588,0.633131,-0.102027854,0.0,True,True
Central Business District,0.7533982,0.6046934,-0.148704859,0.0,True,True
Hastings-Sunrise,0.6451613,0.5693642,-0.075797128,0.0,True,True
Strathcona,0.5577118,0.4385246,-0.119187188,0.0,True,True
Grandview-Woodland,0.6237392,0.5506849,-0.073054262,0.0,True,True
Kitsilano,0.6463512,0.6171575,-0.029193753,0.052,False,True
Kensington-Cedar Cottage,0.6043016,0.6161616,0.011860044,0.444,False,False
Sunset,0.589672,0.5651163,-0.024555738,0.246,False,False
