In [None]:
library(data.table)
library(dplyr)
library(rgdal)

In [None]:
# Read data from csv
file_name <- file.path(Sys.getenv("USERPROFILE"), "Desktop\\chicago_crimes.csv")
crimes <- fread(file_name)

In [None]:
## Remove entries without location data
crimes <- crimes[crimes$Latitude != 0 & crimes$Longitude != 0]

In [None]:
# Filter by arrests made since the trail opened (6-6-2015)
crimes$Date <- as.Date(crimes$Date,"%m/%d/%Y")
crimes_post <- crimes[crimes$Date > as.Date("2015-06-06"),]
write.csv(crimes_post, "crime_by_all_beats.csv", row.names=FALSE)

In [None]:
# Filter by police beat areas adjacent to the trail
beat_areas <- c(1421, 1422, 1433, 1434, 2535)
crimes_post_beat <- crimes_post[crimes_post$Beat %in% beat_areas,]
write.csv(crimes_post_beat, "crime_by_beat.csv", row.names=FALSE)

In [None]:
# Filter by police beat areas near to the trail
beat_areas <- c(1111, 1211, 1121, 1423, 1112, 1213, 1432, 1414,
                1431, 1413, 1811, 2525, 2534, 1822, 1424, 
                1421, 1422, 1433, 1434, 2535)
crimes_post_beat <- crimes_post[crimes_post$Beat %in% beat_areas,]
write.csv(crimes_post_beat, "crime_by_beat_plus.csv", row.names=FALSE)

In [None]:
counts <- as.data.frame(table(crimes_post_beat$'Primary Type'))
dim(crimes_post_beat)
head(counts[order(-counts$Freq),])

In [None]:
# Calculate arrests by beat
crimes_beat <- as.data.frame(table(crimes_post_beat$Beat))
colnames(crimes_beat) <- c("Beat", "Crimes")

In [None]:
# Convert factor to numeric
crimes_beat$Beat <- as.numeric(levels(crimes_beat$Beat))

In [None]:
write.csv(crimes_beat, "crime_by_beat_plus_agg.csv", row.names=FALSE)

In [None]:
# Get beat shapes
beat_shapes <- readOGR("chicago_beats.geojson", "OGRGeoJSON")
beat_shapes <- beat_shapes[beat_shapes$beat_num %in% crimes_beat$Beat,]