In [1]:
# Load all required libraries for the notebook, including data package
if(!require("opendatatoronto")) {
    install.packages("opendatatoronto")
    library(opendatatoronto)
}
library(dplyr)
library(stringr)

Loading required package: opendatatoronto


Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union




In [2]:
# Get Data - Traffice
# output Data Description:
# Dataframe with all intersection and daily count ( peak 4 hours), including lng / lat

# ? and todo:
# is separate street name needed
# direction of the street to be determined. How?


# package_traffic <- show_package("traffic-volumes-at-intersections-for-all-modes")

# get all resources for this package
resources <- list_package_resources("traffic-volumes-at-intersections-for-all-modes")

# identify datastore resources; by default, Toronto Open Data sets datastore resource format to CSV for non-geospatial and GeoJSON for geospatial resources
datastore_resources <- filter(resources, tolower(format) %in% c("csv", "geojson"))

# load data
location <- filter(datastore_resources, row_number() == 1) %>% get_resource()
traffic1 <- filter(datastore_resources, row_number() == 3) %>% get_resource()
traffic2 <- filter(datastore_resources, row_number() == 4) %>% get_resource()
traffic3 <- filter(datastore_resources, row_number() == 5) %>% get_resource()
traffic4 <- filter(datastore_resources, row_number() == 6) %>% get_resource()
traffic5 <- filter(datastore_resources, row_number() == 7) %>% get_resource()


In [3]:
# clean and transform load - Traffic Data
# Output data for modelling CleanTraffic
# define parameters for cleaning
peakhours <- 4 # number of peak hours of data per day. value should be between 1 and 10


clean_T1 <- traffic1 %>%
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "centreline_type",
    "time_start", "sb_cars_r", "sb_cars_t", "sb_cars_l",
    "nb_cars_r", "nb_cars_t", "nb_cars_l", "wb_cars_r", "wb_cars_t", "wb_cars_l",
    "eb_cars_r", "eb_cars_t", "eb_cars_l"
  ))) %>% # select needed attributes
  filter(centreline_type == 2) %>% # only need intersection data
  mutate(counthour = str_extract(time_start, "(?<=T)(\\d+)(?=\\:)")) %>% # extract hour
  mutate(total_int_traffic = sb_cars_r + sb_cars_t + sb_cars_l +
    nb_cars_r + nb_cars_t + nb_cars_l + wb_cars_r + wb_cars_t +
    wb_cars_l + eb_cars_r + eb_cars_t + eb_cars_l) %>% # get total sum
  mutate(nb_exit_traffic = nb_cars_t + eb_cars_l + wb_cars_r) %>% # get north bound exit volume
  mutate(sb_exit_traffic = sb_cars_t + eb_cars_r + wb_cars_l) %>% # get south bound exit volume
  mutate(wb_exit_traffic = wb_cars_t + nb_cars_l + sb_cars_r) %>% # get west bound exit volume
  mutate(eb_exit_traffic = eb_cars_t + nb_cars_r + sb_cars_l) %>% # get east bound exit volume
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "counthour",
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  ))) %>% # remove raw attributes, retain aggregate only
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat", "counthour")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) %>% # agregate hourly volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  slice_max(order_by = total_int_traffic, n = peakhours) %>% # filter top peak hour volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) # aggregate daily peak hour volume
clean_T2 <- traffic2 %>%
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "centreline_type",
    "time_start", "sb_cars_r", "sb_cars_t", "sb_cars_l",
    "nb_cars_r", "nb_cars_t", "nb_cars_l", "wb_cars_r", "wb_cars_t", "wb_cars_l",
    "eb_cars_r", "eb_cars_t", "eb_cars_l"
  ))) %>% # select needed attributes
  filter(centreline_type == 2) %>% # only need intersection data
  mutate(counthour = str_extract(time_start, "(?<=T)(\\d+)(?=\\:)")) %>% # extract hour
  mutate(total_int_traffic = sb_cars_r + sb_cars_t + sb_cars_l +
    nb_cars_r + nb_cars_t + nb_cars_l + wb_cars_r + wb_cars_t +
    wb_cars_l + eb_cars_r + eb_cars_t + eb_cars_l) %>% # get total sum
  mutate(nb_exit_traffic = nb_cars_t + eb_cars_l + wb_cars_r) %>% # get north bound exit volume
  mutate(sb_exit_traffic = sb_cars_t + eb_cars_r + wb_cars_l) %>% # get south bound exit volume
  mutate(wb_exit_traffic = wb_cars_t + nb_cars_l + sb_cars_r) %>% # get west bound exit volume
  mutate(eb_exit_traffic = eb_cars_t + nb_cars_r + sb_cars_l) %>% # get east bound exit volume
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "counthour",
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  ))) %>% # remove raw attributes, retain aggsregate only
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat", "counthour")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) %>% # agregate hourly volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  slice_max(order_by = total_int_traffic, n = peakhours) %>% # filter top peak hour volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) # aggregate daily peak hour volume

clean_T3 <- traffic3 %>%
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "centreline_type",
    "time_start", "sb_cars_r", "sb_cars_t", "sb_cars_l",
    "nb_cars_r", "nb_cars_t", "nb_cars_l", "wb_cars_r", "wb_cars_t", "wb_cars_l",
    "eb_cars_r", "eb_cars_t", "eb_cars_l"
  ))) %>% # select needed attributes
  filter(centreline_type == 2) %>% # only need intersection data
  mutate(counthour = str_extract(time_start, "(?<=T)(\\d+)(?=\\:)")) %>% # extract hour
  mutate(total_int_traffic = sb_cars_r + sb_cars_t + sb_cars_l +
    nb_cars_r + nb_cars_t + nb_cars_l + wb_cars_r + wb_cars_t +
    wb_cars_l + eb_cars_r + eb_cars_t + eb_cars_l) %>% # get total sum
  mutate(nb_exit_traffic = nb_cars_t + eb_cars_l + wb_cars_r) %>% # get north bound exit volume
  mutate(sb_exit_traffic = sb_cars_t + eb_cars_r + wb_cars_l) %>% # get south bound exit volume
  mutate(wb_exit_traffic = wb_cars_t + nb_cars_l + sb_cars_r) %>% # get west bound exit volume
  mutate(eb_exit_traffic = eb_cars_t + nb_cars_r + sb_cars_l) %>% # get east bound exit volume
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "counthour",
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  ))) %>% # remove raw attributes, retain aggregate only
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat", "counthour")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) %>% # agregate hourly volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  slice_max(order_by = total_int_traffic, n = peakhours) %>% # filter top peak hour volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) # aggregate daily peak hour volume

clean_T4 <- traffic4 %>%
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "centreline_type",
    "time_start", "sb_cars_r", "sb_cars_t", "sb_cars_l",
    "nb_cars_r", "nb_cars_t", "nb_cars_l", "wb_cars_r", "wb_cars_t", "wb_cars_l",
    "eb_cars_r", "eb_cars_t", "eb_cars_l"
  ))) %>% # select needed attributes
  filter(centreline_type == 2) %>% # only need intersection data
  mutate(counthour = str_extract(time_start, "(?<=T)(\\d+)(?=\\:)")) %>% # extract hour
  mutate(total_int_traffic = sb_cars_r + sb_cars_t + sb_cars_l +
    nb_cars_r + nb_cars_t + nb_cars_l + wb_cars_r + wb_cars_t +
    wb_cars_l + eb_cars_r + eb_cars_t + eb_cars_l) %>% # get total sum
  mutate(nb_exit_traffic = nb_cars_t + eb_cars_l + wb_cars_r) %>% # get north bound exit volume
  mutate(sb_exit_traffic = sb_cars_t + eb_cars_r + wb_cars_l) %>% # get south bound exit volume
  mutate(wb_exit_traffic = wb_cars_t + nb_cars_l + sb_cars_r) %>% # get west bound exit volume
  mutate(eb_exit_traffic = eb_cars_t + nb_cars_r + sb_cars_l) %>% # get east bound exit volume
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "counthour",
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  ))) %>% # remove raw attributes, retain aggregate only
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat", "counthour")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) %>% # agregate hourly volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  slice_max(order_by = total_int_traffic, n = peakhours) %>% # filter top peak hour volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) # aggregate daily peak hour volume

clean_T5 <- traffic5 %>%
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "centreline_type",
    "time_start", "sb_cars_r", "sb_cars_t", "sb_cars_l",
    "nb_cars_r", "nb_cars_t", "nb_cars_l", "wb_cars_r", "wb_cars_t", "wb_cars_l",
    "eb_cars_r", "eb_cars_t", "eb_cars_l"
  ))) %>% # select needed attributes
  filter(centreline_type == 2) %>% # only need intersection data
  mutate(counthour = str_extract(time_start, "(?<=T)(\\d+)(?=\\:)")) %>% # extract hour
  mutate(total_int_traffic = sb_cars_r + sb_cars_t + sb_cars_l +
    nb_cars_r + nb_cars_t + nb_cars_l + wb_cars_r + wb_cars_t +
    wb_cars_l + eb_cars_r + eb_cars_t + eb_cars_l) %>% # get total sum
  mutate(nb_exit_traffic = nb_cars_t + eb_cars_l + wb_cars_r) %>% # get north bound exit volume
  mutate(sb_exit_traffic = sb_cars_t + eb_cars_r + wb_cars_l) %>% # get south bound exit volume
  mutate(wb_exit_traffic = wb_cars_t + nb_cars_l + sb_cars_r) %>% # get west bound exit volume
  mutate(eb_exit_traffic = eb_cars_t + nb_cars_r + sb_cars_l) %>% # get east bound exit volume
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "counthour",
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  ))) %>% # remove raw attributes, retain aggregate only
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat", "counthour")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) %>% # agregate hourly volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  slice_max(order_by = total_int_traffic, n = peakhours) %>% # filter top peak hour volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) # aggregate daily peak hour volume


CleanTraffic <- bind_rows(clean_T1, clean_T2, clean_T3, clean_T4, clean_T5)
head(CleanTraffic)


[1m[22m`summarise()` has grouped output by 'count_date', 'location_id', 'location',
'lng', 'lat'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'count_date', 'location_id', 'location',
'lng'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'count_date', 'location_id', 'location',
'lng', 'lat'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'count_date', 'location_id', 'location',
'lng'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'count_date', 'location_id', 'location',
'lng', 'lat'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'count_date', 'location_id', 'location',
'lng'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'count_date', 'location_id', 'location',
'lng', 'lat'. You can override using the `.groups` ar

count_date,location_id,location,lng,lat,total_int_traffic,nb_exit_traffic,sb_exit_traffic,wb_exit_traffic,eb_exit_traffic
<chr>,<int>,<chr>,<dbl>,<dbl>,<int>,<int>,<int>,<int>,<int>
1984-01-03,4167,DUNDAS ST AT SPADINA AVE (PX 277),-79.39805,43.65294,8407,3286,2540,1545,1036
1984-01-03,4386,DUNDAS ST AT SCARLETT RD (PX 496),-79.49921,43.66552,9414,1845,0,4736,2833
1984-01-03,5292,BAY ST AT DUNDAS ST W (PX 66),-79.38377,43.65571,8238,2545,2046,1643,2004
1984-01-04,4993,EGLINTON AVE AT SPADINA RD (PX 99),-79.41911,43.70235,7868,0,1539,4076,2253
1984-01-04,5349,DUFFERIN ST AT DUNDAS ST W (PX 190),-79.43145,43.64965,6514,1685,1084,1820,1925
1984-01-04,5589,DIXON AVE AT DUNDAS ST & KINGSTON RD (PX 163),-79.31118,43.66997,4625,2348,1075,1202,0


In [4]:
# install.packages("tidyjson") 
# install.packages("tidygeocoder")
# install.packages("sf")
# install.packages("mapview")
# install.packages("ggspatial")

library(ggplot2)
library(ggspatial)
library(tidyjson)
library(dplyr)
library(tidyverse)
library(tidygeocoder)
library(sf)
library(mapview)
library(stringr)
library(opendatatoronto)


# Get Green P Parking package from Open Data-Toronto
package <- show_package("b66466c3-69c8-4825-9c8b-04b270069193")

data=as.data.frame('Green P Parking')  # read the dataset Green P Parking in the package 
data<-show_package(package)

resources<-list_package_resources(package)

# Identify resources
data_resources <- filter(resources, tolower(format) %in% c("csv", "json"))

# Load Green P Parking 2019 data
data <- filter(data_resources, row_number() == 1) %>% get_resource()

# Extract required columns from main data
data<- data.frame(address=data$carparks$address,
                                     lat=data$carparks$lat,
                                     lng=data$carparks$lng,
                                      carpark_type=data$carparks$carpark_type_str,
                                      rate_half_hr=data$carparks$rate_half_hour,
                                      capacity=data$carparks$capacity
                  )

# Check class of each attribute
sapply(data, class) 

# Convert char to numeric class
data$lat<-as.numeric(data$lat)
data$lng<-as.numeric(data$lng)
data$rate_half_hr<-as.numeric(data$rate_half_hr)
data$capacity<-as.numeric(data$capacity)

sapply(data, class) 

# Check for missing values
any(is.na(data))

# Extract street name from address
data <- data %>%
  mutate(extracted_address = str_replace_all(data$address, "\\(.*?\\)",""))

data$extracted_address<-str_replace_all(data$extracted_address, "-.*", "")
data$extracted_address<-str_replace_all(data$extracted_address, ",.*", "")

# Extract data with carpark_type as 'Surface'
data <- data %>%
  filter(carpark_type == "Surface")

# Convert Lat/Lng to address
geo_rev_data<-data %>%
  tidygeocoder::reverse_geocode(
    lat=lat,
    long=lng,
    method="osm")

# Plot map
map<-data %>%
  st_as_sf(
    coords=c("lng","lat"),
    crs=4326
  )

map %>% mapview()

# # Filter data to extract only parking spots with M5C pin code
# filtered_df <- geo_rev_data %>%
#   filter(grepl("M5C", address...8))
# 
# # Print the filtered data frame
# print(filtered_df)
# 
# # Convert Lat/Lng to address and plot map
# filtered_df_map<-filtered_df %>%
#   st_as_sf(
#     coords=c("lng","lat"),
#     crs=4326
#   )
# 
# filtered_df_map %>% mapview()

# # Extract street names
# 
# filtered_df_map$streets <- gsub("\\d+", "", filtered_df_map$extracted_address) # Remove numbers # nolint # nolint
# filtered_df_map$streets <- gsub("\\.$", "", filtered_df_map$streets) # Remove trailing periods # nolint
# 
# filtered_df_map$lat <- filtered_df$lat
# 
# filtered_df_map$lng <- filtered_df$lng


# Define the polygon coordinates
polygon_coords <- matrix(c(
  -79.4289964889767, 43.6700360241176, -79.4226792245877, 43.6543887000655, -79.4000484228339, 43.657948514946, -79.4070875693025, 43.6748646586934,-79.4289964889767, 43.6700360241176  # Repeat first point to close the polygon exactly
), ncol = 2, byrow = TRUE)

# Create a polygon geometry
polygon <- st_polygon(list(polygon_coords))

# Convert the polygon object to an 'sf' object with a specified CRS (Coordinate Reference System)
polygon <- st_sfc(polygon, crs = 4326)

# Convert the data frame 'data' to an 'sf' object, specifying the columns containing longitude and latitude as coordinates, and set the CRS
data_sf <- st_as_sf(data, coords = c("lng", "lat"), crs = 4326)

# Find the indices of the rows in 'data_sf' that fall within the polygon
indices <- st_within(data_sf, polygon)

# Convert the indices to a data frame
indices_df<-data.frame(indices)

selected_rows <- data_sf[match(indices_df$row.id, seq_len(nrow(data))), ]

# Print the selected rows
print(selected_rows) 

# Plot selected rows on map
map_df <- selected_rows %>%
  st_as_sf() %>%
  st_set_crs(4326) %>%
  fortify()   # Convert the 'sf' object to a format suitable for use with 'ggplot2'

# Display the spatial object using 'mapview'
mapview(map_df)




Attaching package: 'tidyjson'


The following object is masked from 'package:stats':

    filter


── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mpurrr    [39m 1.0.2     [32m✔[39m [34mtidyr    [39m 1.3.0
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mtidyjson[39m::[32mfilter()[39m masks [34mdplyr[39m::filter(), [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m       masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors
Linking to GEOS 3.11.2, GDAL 3.8.2, PROJ 9.3.1; sf_use_s2() is TRUE



Passing 220 coordinates to the Nominatim single coordinate geocoder



In [None]:
# Get Data - Intersection
# output Data Description:


In [None]:
# Get Data - Business
# output Data Description: 

In [None]:
# Define Region of Interest - Boundary
## coordinates manually looked up from location dataset
# 1406	5370	DUPONT ST AT OSSINGTON AVE (PX 842)	-79.429019	43.670031996501194
# 251	4180	DUPONT ST AT SPADINA RD (PX 840)	-79.407122	43.67485699954096
# 1885	5864	COLLEGE ST AT OSSINGTON AVE (PX 829)	-79.422705	43.65439999619167
# 241	4170	COLLEGE ST AT SPADINA AVE (PX 279)	-79.400048	43.65794800150128

# Input
# Output

boundary <- location %>%
  select(location_id, location, lng, lat) %>%
  filter(location_id %in% list(5370, 4180, 5864, 4170)) # boundary intersection ID

lng_min <- min(boundary$lng) # west most value since it's negative
lng_max <- max(boundary$lng) # east most value
lat_min <- min(boundary$lat) # south most value
lat_max <- max(boundary$lat) # north most value


In [None]:
# Combine Data for model building
# what is the expected output of data structure


In [None]:
# Model 1 - Time Series Forecast
# additional data processing needed before modelling


In [None]:
# Model 2 - Regression Model

# Load required libraries
library(sf)
library(mapview)
library(ggplot2)
library(ggspatial)

# Read data
parking<-read.csv("Parking.csv")
business<-read.csv("business.csv")

# Convert the business dataset to a spatial object
business_sf <- st_as_sf(business, coords = c("long", "lat"), crs = 4326)

# Convert the parking dataset to a spatial object
parking_sf <- st_as_sf(parking, coords = c("lng", "lat"), crs = 4326)

# Perform a spatial join to find the nearest business for each parking space
nearest_business <- st_nearest_feature(parking_sf, business_sf)

# Add the nearest business information to the parking dataset
parking_data_with_nearest_business <- cbind(parking, nearest_business)

#Display data on map
map<-parking_data_with_nearest_business %>%
  st_as_sf(
    coords=c("lng","lat"),
    crs=4326
  )

map %>% mapview()


parking_data_with_nearest_business$traffic_volume = c(100, 150, 90, 50, 250, 75, 65, 40)


# Define a scoring function
calculate_score <- function(traffic_volume, lat, lng, capacity, nearest_businesses) {
  # Define weights for each factor
  weights <- c(0.3, 0.1, 0.1, 0.2, 0.3)
  
  # Normalize each factor
  normalized_traffic <- (traffic_volume - min(traffic_volume)) / (max(traffic_volume) - min(traffic_volume))
  normalized_capacity <- (capacity - min(capacity)) / (max(capacity) - min(capacity))
  normalized_distance <- (nearest_businesses - min(nearest_businesses)) / (max(nearest_businesses) - min(nearest_businesses))
  
  # Calculate the score
  score <- weights[1] * normalized_traffic +
    weights[2] * normalized_capacity +
    weights[3] * normalized_distance +
    weights[4] * lat +
    weights[5] * lng
  
  return(abs(score))
}

# Calculate score for each parking spot
parking_data_with_nearest_business$score <- calculate_score(parking_data_with_nearest_business$traffic_volume, parking_data_with_nearest_business$lat, parking_data_with_nearest_business$lng, parking_data_with_nearest_business$capacity, parking_data_with_nearest_business$nearest_business)

# Rank the parking spots based on the score
ranked_data <- parking_data_with_nearest_business[order(-parking_data_with_nearest_business$score),]

highest_score_index <- which.max(ranked_data$score)
highest_score_parking_spot <- parking_data_with_nearest_business[highest_score_index, ]

# Print the parking spot with the highest score
print(highest_score_parking_spot)


In [None]:
# Result and Discussion
