In [None]:
library(tidyverse)
library(magrittr)
library(ggmap)
library(DT)

In [None]:
options(repr.matrix.max.rows=20, repr.matrix.max.cols=30) # for limiting the number of top and bottom rows of tables printed 

In [None]:
head(crime)

In [None]:
str(crime)

In [None]:
class(crime)

In [None]:
colSums(is.na(crime))

In [None]:
crime_clean <- crime %>% drop_na()

In [None]:
crime_evening <- crime %>% filter(hour >= 18 & hour <= 23)

In [None]:
crime$hour <- as.numeric(crime$hour)  # hour column as numeric

In [None]:
#We can use the select() function to select specific columns.

In [None]:
crime_selected <- crime %>% 
  select(time, date, offense, location)

head(crime_selected)

In [None]:
#We can filter specific rows from the dataset.

In [None]:
crime_filtered <- crime %>% 
  filter(offense == "aggravated assault")

head(crime_filtered)

In [None]:
#we can create new columns with mutate() function

In [None]:
crime <- crime %>% 
  mutate(is_night = ifelse(hour >= 18 | hour < 6, "Yes", "No"))

head(crime)

In [None]:
#We can use the group_by() and summarize() functions to group
#and summarize the dataset according to a specific criterion

In [None]:
crime_summary <- crime %>% 
  group_by(offense) %>% 
  summarize(total_incidents = n())

crime_summary

In [None]:
crime %>% 
  group_by(month) %>% 
  summarise(count = n()) #the number of crimes committed for each month

In [None]:
#the pivot_longer() function is used to convert a large data set to a long format.
#For example, let's turn the lon and lat columns into a longer format.

In [None]:
crime_longer <- crime %>%
  pivot_longer(cols = c(lon, lat), names_to = "coordinate_type", values_to = "value")

head(crime_longer)

In [None]:
#the pivot_wider() function is used to convert a long data set to a large format.

In [None]:
crime_wider <- crime_longer %>%
  pivot_wider(names_from = "coordinate_type", 
              values_from = "value", 
              values_fn = mean)

head(crime_wider)

In [None]:
#we can find out which lines are repeated.
crime_longer %>%
  dplyr::summarise(n = dplyr::n(), .by = c(time, date, hour, premise, offense,
                                           beat, block, street, type, suffix, 
                                           number, month, day, location, address, 
                                           is_night, coordinate_type)) %>%
  dplyr::filter(n > 1L)

In [None]:
#First, let's create a small second dataset 
crime_subset <- crime %>% 
  select(beat, location) %>% 
  distinct()

# Now, let's combine the two tables with left join NOTE: The relationship is many to many so I declared it.
crime_joined <- crime %>% 
  left_join(crime_subset, by = "beat", relationship = "many-to-many")

head(crime_joined)