/
hartford-traffic-stops.Rmd
345 lines (230 loc) · 11.9 KB
/
hartford-traffic-stops.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
---
title: "Hartford Traffic Stops"
author: "Céline Vendler"
output: html_document
---
Data courtesy of [The Stanford Open Policing Project](https://openpolicing.stanford.edu/)
```{r setup, include=FALSE}
# Set document settings
knitr::opts_chunk$set(echo = FALSE,
message = FALSE,
error = FALSE,
warning = FALSE)
# Load necessary libraries
library(lubridate)
library(ggthemes)
library(fs)
library(sf)
library(tidyverse)
```
```{r hartford_download, cache=TRUE}
# Read in the rds file containing Hartford's data directly from the link address
# housed within https://openpolicing.stanford.edu/data/; store as new dataset
# called "hartford"
hartford <- read_rds(url("https://stacks.stanford.edu/file/druid:tr137st9964/tr137st9964_ct_hartford_2019_02_25.rds"))
```
<br>
```{r frequency_graph, fig.width=10}
# I perform these calculations here so as to avoid using unexplained, "magic
# numbers" in my code Create object "minutes_in_a_day", the value of which is
# the number of minutes in a day
minutes_in_a_day <- 60*24
# Create object "five_minutes_in_seconds", the value of which is five minutes in
# seconds (the number of seconds in five minutes)
five_minutes_in_seconds <- 60*5
# Create object `00:00:00`, the value of which is the number of seconds that has
# elapsed in the day at midnight (0, as it is the beginning of the day: when the
# clock starts, so to speak)
`00:00:00` <- 0
# Create object `03:00:00`, the value of which is the number of seconds that has
# elapsed in the day at 3AM
`03:00:00` <- 60*60*3
# Create object `03:00:00`, the value of which is the number of seconds that has
# elapsed in the day at 6AM
`06:00:00` <- 60*60*6
# Create object `03:00:00`, the value of which is the number of seconds that has
# elapsed in the day at 9AM
`09:00:00` <- 60*60*9
# Create object `03:00:00`, the value of which is the number of seconds that has
# elapsed in the day at noon
`12:00:00` <- 60*60*12
# Create object `03:00:00`, the value of which is the number of seconds that has
# elapsed in the day at 3PM
`15:00:00` <- 60*60*15
# Create object `03:00:00`, the value of which is the number of seconds that has
# elapsed in the day at 6PM
`18:00:00` <- 60*60*18
# Create object `03:00:00`, the value of which is the number of seconds that has
# elapsed in the day at 9PM
`21:00:00` <- 60*60*21
# Create object `03:00:00`, the value of which is the number of seconds that has
# elapsed in the day at 11:59:59PM, the last second before the next day begins
`23:59:59` <- 60*60*24-1
# To create the graphic, begin with the "hartford" dataset
hartford %>%
# Drop all rows within the variable "time" that contain NA values
drop_na(time) %>%
# Create new variable called "nice_time": the value of "nice_time" is TRUE
# whenever "time"--as an integer--is a multiple of five minutes (using
# "five_minutes_in_seconds", created above, to perform this modulo operation);
# the value of "nice_time" is FALSE whenever it is not
mutate(nice_time = case_when(as.integer(time) %% five_minutes_in_seconds == 0 ~ TRUE,
TRUE ~ FALSE)) %>%
# Plot this data using ggplot, setting "time" as the x-axis variable and fill
# equal to "nice_time" so that bins are filled according to whether or not
# they correspond to "nice_times"
ggplot(aes(x = time, fill = nice_time)) +
# Make the plot a histogram plot, setting the number of bins to be equal to
# "minutes_in_a_day" (created above)
geom_histogram(bins = minutes_in_a_day) +
# Create a temporal scale, with breaks at all the times created above (from
# midnight--00:00:00--to 11:59:59 at intervals of 3 hours)
scale_x_time(breaks = c(`00:00:00`,
`03:00:00`,
`06:00:00`,
`09:00:00`,
`12:00:00`,
`15:00:00`,
`18:00:00`,
`21:00:00`,
`23:59:59`)) +
# Give the x- and y-axes new labels; give the plot a title, subtitle, and
# caption; and give the legend a new title
labs(x = "Time of Day/Night (Hours:Minutes:Seconds)",
y = "Number of Traffic Stops",
title = "Frequency of Traffic Stops per Time of Day/Night in Hartford, CT",
subtitle = "Officers favor times in intervals of 5 minutes (12:30:00, 12:35:00) over other exact times (12:31:00, 12:36:00).",
caption = "Source: The Stanford Open Policing Project",
fill = "Type of Time") +
# Create my own discrete scale, reversing the order of the legend so that
# "5-Minute Interval" is above "Non-5-Minute" interval to match the appearance
# of the plot and labeling and assigning colors to the legend-items
scale_fill_manual(guide = guide_legend(reverse = TRUE),
labels = c("Non-5-Minute Interval", "5-Minute Interval"),
values = c("lightgoldenrod", "darkorchid4"))
```
<br>
```{r shapes_download}
# Download the shapefile for Hartford directly from the link address housed
# within https://openpolicing.stanford.edu/data/
download.file(url = "https://stacks.stanford.edu/file/druid:tr137st9964/tr137st9964_ct_hartford_shapefiles_2019_02_25.tgz",
destfile = "hartford_shapefiles.tgz",
quiet = TRUE)
# Untar or extract the contents of "hartford_shapefiles.tgz"
untar("hartford_shapefiles.tgz")
# Read in the shapefile "Hartford_Neighborhoods.shp" housed within the folder
# "ct_hartford_shapefiles/"; store as new dataset called "hartford_shapes"
hartford_shapes <- read_sf("ct_hartford_shapefiles/Hartford_Neighborhoods.shp")
# Delete the files "hartford_shapefiles.tgz" and the folder
# "ct_hartford_shapefiles/"
file_delete(c("hartford_shapefiles.tgz", "ct_hartford_shapefiles/"))
```
```{r hartford_shapes_data_manipulation}
# Create a new dataset called "black_vs_white_arrest_rates"; begin with the
# "hartford" dataset
black_vs_white_arrest_rates <- hartford %>%
# Drop all rows within the variables "lng", "lat", and "district" that contain
# NA values
drop_na(lng, lat, district) %>%
# Filter the data to exclude observations that lie beyond Hartford's borders
# (I used Google Maps to determine these numbers); filter the data to include
# only observations where race is either "black" or "white"
filter(! lng > -72.64,
! lng < -72.72,
! lat > 41.81,
! lat < 41.72,
subject_race %in% c("black", "white")) %>%
# Group the data by the variables "district" and "subject_race" in order to
# calculate arrest rates by combination of district and race
group_by(district, subject_race) %>%
# First calculate the number of arrests by combination of district and race;
# then calculate the number of stops by combination of district and race; and
# finally calculate, using the calculated "arrests" and "stops", the arrest
# rates by combination of district and race
summarize(arrests = sum(arrest_made),
stops = n(),
arrest_rate = arrests / stops) %>%
# Deselect the variables "arrests" and "stops" in order to be able to spread
# the data properly (having black and white arrest rates align)
select(-c(arrests, stops)) %>%
# Spread the data using the variable "subject_race" as the key and the
# variable "arrest_rate" as the value in order to be able to calculate the
# arrest rate ratio between black subjects and white subjects below (in order
# to be able to do "black / white" below)
spread(subject_race, arrest_rate) %>%
# Calculate the ratio between the arrest rate for black subjects and the
# arrest rate for white subjects for each district (the black to white arrest
# rates ratio)
summarize(black_to_white_ratio = black / white) %>%
# Filter the data to exclude the black to white ratio with a value of "Inf",
# as it is not feasibly plotted
filter(black_to_white_ratio != "Inf") %>%
# Rename the variable "district" to "NAME" in order to be able to left join
# this dataset with "hartford_shapes"
rename(NAME = district)
# Update "hartford_shapes" by left joining "black_vs_white_arrest_rates" to
# "hartford_shapes"
hartford_shapes <- left_join(hartford_shapes, black_vs_white_arrest_rates)
```
```{r black_and_white_arrest_locations_data_manipulation}
# Create a new dataset called "black_and_white_arrests"; begin with the
# "hartford" dataset
black_and_white_arrests <- hartford %>%
# Drop all rows within the variables "lng", "lat", and "district" that contain
# NA values
drop_na(lng, lat, district) %>%
# Filter the data to exclude observations that lie beyond Hartford's borders
# (I used Google Maps to determine these numbers); filter the data to include
# only observations where race is either "black" or "white"; filter the data
# to include only observations where an arrest was made
filter(! lng > -72.64,
! lng < -72.72,
! lat > 41.81,
! lat < 41.72,
subject_race %in% c("black", "white"),
arrest_made)
# Use st_as_sf to turn the "black_and_white_arrests" dataset into the proper
# format to be used by geom_sf; store as "black_and_white_arrest_locatiions"
black_and_white_arrest_locatiions <- st_as_sf(black_and_white_arrests,
coords = c("lng", "lat"),
crs = 4326)
```
```{r ratio_map, fig.height=7, fig.width=7}
# Create a ggplot graphic using the "hartford_shapes" data
ggplot(data = hartford_shapes) +
# Apply geom_sf to the "hartford_shapes" data so that Hartford is properly
# displayed in the graphic; fill the neighborhoods according to their
# respective black to white arrest rate ratio so as to be able to compare
# these ratios across neighborhoods, comparing (the ratios of) the rates at
# which black people are arrested at traffic stops and the rates at which
# white people are within and across neighborhoods
geom_sf(mapping = aes(fill = black_to_white_ratio)) +
# Call geom_sf on the "black_and_white_arrest_locatiions" dataset so that
# locations of arrests of both black and white people appear as black dots on
# the map (I include these points so as to give the viewer a sense of where
# arrests are most or least concentrated); slightly reduce the opacity of
# these points; and remove the legend (which includes the alpha information)
# from view
geom_sf(data = black_and_white_arrest_locatiions, mapping = aes(alpha = 0.7), show.legend = FALSE) +
# Use theme_map to get rid of the x- and y-axis ticks and perform other
# actions to get rid of characterisitcs that would unnecessarily complicate
# the graphic
theme_map() +
# Apply the continuous viridis scales in filling in the neighborhoods for
# aesthetic purposes
scale_fill_viridis_c() +
# Give the graphic a title, subtitle, and caption; give the legend a new title
labs(title = "Black to White Arrest Rate Ratios across Hartford, CT Neighborhoods",
subtitle = "In Downtown and its neighboring areas, where arrests tend to be more concentrated,\nblack people are arrested at higher rates than white people at traffic stops.",
caption = "Source: The Stanford Open Policing Project.\nNB: The black points represent arrests of both black and white people.",
fill = "Black to White Arrest Rate Ratio") +
# Adjust the position of the legend so that it does not block any of the
# graphic, adjust the caption's position so that it appears in a more
# aesthetically pleasing place, and change the color of the grid in the
# background to white to match the actual background and appear invisible, as
# Kieran Healy writes that such grids "aren't really needed"
theme(legend.position = "right",
plot.caption = element_text(hjust = 0),
panel.grid = element_line(color = "white"))
```
<br>