In [None]:
library(data.table)
library(DT)

In [None]:
library(ggmap)
library(ggplot2)

In [None]:
options(repr.matrix.max.rows=15, repr.matrix.max.cols=20)

In [None]:
head(crime)

In [None]:
str(crime)

In [None]:
crime <- copy(crime)
setDT(crime)

In [None]:
summary(crime)

In [None]:
# Check for missing values
sapply(crime, function(x) sum(is.na(x)))

# Remove rows with missing values (if necessary)
crime <- na.omit(crime)

# Alternatively, fill missing values (example: filling NAs with 0)
crime[is.na(crime)] <- 0

In [None]:
# Convert 'date' and 'time' columns to proper datetime formats
crime$date <- as.Date(crime$date, format="%m/%d/%Y")
crime$time <- as.POSIXct(crime$time, format="%Y-%m-%d %H:%M:%S")

# Extract useful components (like year, month, day) if necessary
crime$year <- format(crime$date, "%Y")
crime$month <- format(crime$date, "%m")
crime$day <- format(crime$date, "%d")

In [None]:
# Create a new column for weekday
crime$weekday <- weekdays(crime$date)

In [None]:
# Group by year and count the number of crimes per year
crime_summary <- crime[, .N, by = year]

# View the summarized data
print(crime_summary)

In [None]:
# Suç türüne göre sayılar
crime_by_offense <- crime[, .N, by = offense]
print(crime_by_offense)

In [None]:
# Plot the number of crimes per year
ggplot(crime_summary, aes(x = year, y = N)) +
  geom_bar(stat = "identity") +
  labs(title = "Number of Crimes per Year", x = "Year", y = "Number of Crimes")

In [None]:
# Zaman dilimini belirle (sabah, öğleden sonra, akşam)
crime[, time_of_day := ifelse(hour < 12, "Morning", ifelse(hour < 18, "Afternoon", "Evening"))]

# Yeni kolonları kontrol et
head(crime)

In [None]:
# Suç türüne göre ortalama saat
crime[, .(mean_hour = mean(hour, na.rm = TRUE)), by = offense]

In [None]:
# Aylara göre suç dağılımı
crime_count_by_month <- crime[, .N, by = month]

# Sonuçları sıralayın
crime_count_by_month <- crime_count_by_month[order(month)]
print(crime_count_by_month)


In [None]:
# Filter rows with conditions
filtered_crime <- crime[year == "2010" & offense == "robbery"]


In [None]:
# Create a new column for day of the week
crime[, weekday := weekdays(as.Date(date))]

# Extract parts of the address into street name and number
crime[, street_name := sub("\\d+\\s", "", address)]
crime[, street_number := sub("\\D+", "", address)]


In [None]:
# Group by year and offense to count the number of crimes
crime_summary <- crime[, .N, by = .(year, offense)]

# View the summarized data
print(crime_summary)

In [None]:
# Combine filtering and column operations
crime[weekday == "Friday", .(avg_lon = mean(lon, na.rm = TRUE), avg_lat = mean(lat, na.rm = TRUE))]

In [None]:
# Filter, then calculate mean longitude and latitude
crime[offense == "robbery" & year == "2010"][, .(mean_lon = mean(lon), mean_lat = mean(lat))]

In [None]:
# Assign unique group ID based on offense and location
crime[, grp_id := .GRP, by = .(offense, location)]

In [None]:
# Calculate max, min, and average longitude and latitude
crime[, `:=`(max_lon = max(lon, na.rm = TRUE), min_lon = min(lon, na.rm = TRUE), avg_lon = mean(lon, na.rm = TRUE))]

In [None]:
# Order the dataset by address
setorder(crime, address)

# Assign unique IDs for contiguous blocks of similar values
crime[, rleid_id := rleid(offense)]

In [None]:
# Filtering and setting NA for unmatched rows
crime[location == "residence / house", lon := NA]

In [None]:
# Reshaping from wide to long format using melt
crime_long <- melt(crime, id.vars = c("time", "date", "location", "lon", "lat"),
                   measure.vars = c("offense", "premise", "beat"), 
                   variable.name = "variable", 
                   value.name = "value")

# View the reshaped data
head(crime_long)

In [None]:
# Reshaping from long to wide format using dcast
crime_wide <- dcast(crime, location ~ offense, fun.aggregate = length)

# View the reshaped data
head(crime_wide)

In [None]:
# Create a summary table by month and offense
crime_summary <- crime[, .N, by = .(month, offense)]

# Reshape the summary table to wide format with months as rows and offense types as columns
crime_summary_wide <- dcast(crime_summary, month ~ offense, value.var = "N", fill = 0)

# View the reshaped data
print(crime_summary_wide)