In [None]:
# Load all required libraries for the notebook, including data package
if(!require("opendatatoronto")) {
    install.packages("opendatatoronto")
    library(opendatatoronto)
}
library(dplyr)
library(stringr)
library(ggplot2)
library(ggspatial)
library(tidyjson)
library(tidyverse)
library(tidygeocoder)
library(sf)
library(mapview)
library(opendatatoronto)

In [None]:
# Get Data - Traffice
# output Data Description:
# Dataframe with all intersection and daily count ( peak 4 hours), including lng / lat

# ? and todo:
# is separate street name needed
# direction of the street to be determined. How?


# package_traffic <- show_package("traffic-volumes-at-intersections-for-all-modes")

# get all resources for this package
resources <- list_package_resources("traffic-volumes-at-intersections-for-all-modes")

# identify datastore resources; by default, Toronto Open Data sets datastore resource format to CSV for non-geospatial and GeoJSON for geospatial resources
datastore_resources <- filter(resources, tolower(format) %in% c("csv", "geojson"))

# load data
location <- filter(datastore_resources, row_number() == 1) %>% get_resource()
traffic1 <- filter(datastore_resources, row_number() == 3) %>% get_resource()
traffic2 <- filter(datastore_resources, row_number() == 4) %>% get_resource()
traffic3 <- filter(datastore_resources, row_number() == 5) %>% get_resource()
traffic4 <- filter(datastore_resources, row_number() == 6) %>% get_resource()
traffic5 <- filter(datastore_resources, row_number() == 7) %>% get_resource()


In [None]:
# clean and transform load - Traffic Data
# Output data for modelling CleanTraffic
# define parameters for cleaning
peakhours <- 4 # number of peak hours of data per day. value should be between 1 and 10


clean_T1 <- traffic1 %>%
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "centreline_type",
    "time_start", "sb_cars_r", "sb_cars_t", "sb_cars_l",
    "nb_cars_r", "nb_cars_t", "nb_cars_l", "wb_cars_r", "wb_cars_t", "wb_cars_l",
    "eb_cars_r", "eb_cars_t", "eb_cars_l"
  ))) %>% # select needed attributes
  filter(centreline_type == 2) %>% # only need intersection data
  mutate(counthour = str_extract(time_start, "(?<=T)(\\d+)(?=\\:)")) %>% # extract hour
  mutate(total_int_traffic = sb_cars_r + sb_cars_t + sb_cars_l +
    nb_cars_r + nb_cars_t + nb_cars_l + wb_cars_r + wb_cars_t +
    wb_cars_l + eb_cars_r + eb_cars_t + eb_cars_l) %>% # get total sum
  mutate(nb_exit_traffic = nb_cars_t + eb_cars_l + wb_cars_r) %>% # get north bound exit volume
  mutate(sb_exit_traffic = sb_cars_t + eb_cars_r + wb_cars_l) %>% # get south bound exit volume
  mutate(wb_exit_traffic = wb_cars_t + nb_cars_l + sb_cars_r) %>% # get west bound exit volume
  mutate(eb_exit_traffic = eb_cars_t + nb_cars_r + sb_cars_l) %>% # get east bound exit volume
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "counthour",
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  ))) %>% # remove raw attributes, retain aggregate only
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat", "counthour")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) %>% # agregate hourly volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  slice_max(order_by = total_int_traffic, n = peakhours) %>% # filter top peak hour volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) # aggregate daily peak hour volume
clean_T2 <- traffic2 %>%
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "centreline_type",
    "time_start", "sb_cars_r", "sb_cars_t", "sb_cars_l",
    "nb_cars_r", "nb_cars_t", "nb_cars_l", "wb_cars_r", "wb_cars_t", "wb_cars_l",
    "eb_cars_r", "eb_cars_t", "eb_cars_l"
  ))) %>% # select needed attributes
  filter(centreline_type == 2) %>% # only need intersection data
  mutate(counthour = str_extract(time_start, "(?<=T)(\\d+)(?=\\:)")) %>% # extract hour
  mutate(total_int_traffic = sb_cars_r + sb_cars_t + sb_cars_l +
    nb_cars_r + nb_cars_t + nb_cars_l + wb_cars_r + wb_cars_t +
    wb_cars_l + eb_cars_r + eb_cars_t + eb_cars_l) %>% # get total sum
  mutate(nb_exit_traffic = nb_cars_t + eb_cars_l + wb_cars_r) %>% # get north bound exit volume
  mutate(sb_exit_traffic = sb_cars_t + eb_cars_r + wb_cars_l) %>% # get south bound exit volume
  mutate(wb_exit_traffic = wb_cars_t + nb_cars_l + sb_cars_r) %>% # get west bound exit volume
  mutate(eb_exit_traffic = eb_cars_t + nb_cars_r + sb_cars_l) %>% # get east bound exit volume
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "counthour",
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  ))) %>% # remove raw attributes, retain aggsregate only
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat", "counthour")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) %>% # agregate hourly volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  slice_max(order_by = total_int_traffic, n = peakhours) %>% # filter top peak hour volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) # aggregate daily peak hour volume

clean_T3 <- traffic3 %>%
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "centreline_type",
    "time_start", "sb_cars_r", "sb_cars_t", "sb_cars_l",
    "nb_cars_r", "nb_cars_t", "nb_cars_l", "wb_cars_r", "wb_cars_t", "wb_cars_l",
    "eb_cars_r", "eb_cars_t", "eb_cars_l"
  ))) %>% # select needed attributes
  filter(centreline_type == 2) %>% # only need intersection data
  mutate(counthour = str_extract(time_start, "(?<=T)(\\d+)(?=\\:)")) %>% # extract hour
  mutate(total_int_traffic = sb_cars_r + sb_cars_t + sb_cars_l +
    nb_cars_r + nb_cars_t + nb_cars_l + wb_cars_r + wb_cars_t +
    wb_cars_l + eb_cars_r + eb_cars_t + eb_cars_l) %>% # get total sum
  mutate(nb_exit_traffic = nb_cars_t + eb_cars_l + wb_cars_r) %>% # get north bound exit volume
  mutate(sb_exit_traffic = sb_cars_t + eb_cars_r + wb_cars_l) %>% # get south bound exit volume
  mutate(wb_exit_traffic = wb_cars_t + nb_cars_l + sb_cars_r) %>% # get west bound exit volume
  mutate(eb_exit_traffic = eb_cars_t + nb_cars_r + sb_cars_l) %>% # get east bound exit volume
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "counthour",
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  ))) %>% # remove raw attributes, retain aggregate only
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat", "counthour")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) %>% # agregate hourly volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  slice_max(order_by = total_int_traffic, n = peakhours) %>% # filter top peak hour volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) # aggregate daily peak hour volume

clean_T4 <- traffic4 %>%
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "centreline_type",
    "time_start", "sb_cars_r", "sb_cars_t", "sb_cars_l",
    "nb_cars_r", "nb_cars_t", "nb_cars_l", "wb_cars_r", "wb_cars_t", "wb_cars_l",
    "eb_cars_r", "eb_cars_t", "eb_cars_l"
  ))) %>% # select needed attributes
  filter(centreline_type == 2) %>% # only need intersection data
  mutate(counthour = str_extract(time_start, "(?<=T)(\\d+)(?=\\:)")) %>% # extract hour
  mutate(total_int_traffic = sb_cars_r + sb_cars_t + sb_cars_l +
    nb_cars_r + nb_cars_t + nb_cars_l + wb_cars_r + wb_cars_t +
    wb_cars_l + eb_cars_r + eb_cars_t + eb_cars_l) %>% # get total sum
  mutate(nb_exit_traffic = nb_cars_t + eb_cars_l + wb_cars_r) %>% # get north bound exit volume
  mutate(sb_exit_traffic = sb_cars_t + eb_cars_r + wb_cars_l) %>% # get south bound exit volume
  mutate(wb_exit_traffic = wb_cars_t + nb_cars_l + sb_cars_r) %>% # get west bound exit volume
  mutate(eb_exit_traffic = eb_cars_t + nb_cars_r + sb_cars_l) %>% # get east bound exit volume
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "counthour",
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  ))) %>% # remove raw attributes, retain aggregate only
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat", "counthour")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) %>% # agregate hourly volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  slice_max(order_by = total_int_traffic, n = peakhours) %>% # filter top peak hour volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) # aggregate daily peak hour volume

clean_T5 <- traffic5 %>%
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "centreline_type",
    "time_start", "sb_cars_r", "sb_cars_t", "sb_cars_l",
    "nb_cars_r", "nb_cars_t", "nb_cars_l", "wb_cars_r", "wb_cars_t", "wb_cars_l",
    "eb_cars_r", "eb_cars_t", "eb_cars_l"
  ))) %>% # select needed attributes
  filter(centreline_type == 2) %>% # only need intersection data
  mutate(counthour = str_extract(time_start, "(?<=T)(\\d+)(?=\\:)")) %>% # extract hour
  mutate(total_int_traffic = sb_cars_r + sb_cars_t + sb_cars_l +
    nb_cars_r + nb_cars_t + nb_cars_l + wb_cars_r + wb_cars_t +
    wb_cars_l + eb_cars_r + eb_cars_t + eb_cars_l) %>% # get total sum
  mutate(nb_exit_traffic = nb_cars_t + eb_cars_l + wb_cars_r) %>% # get north bound exit volume
  mutate(sb_exit_traffic = sb_cars_t + eb_cars_r + wb_cars_l) %>% # get south bound exit volume
  mutate(wb_exit_traffic = wb_cars_t + nb_cars_l + sb_cars_r) %>% # get west bound exit volume
  mutate(eb_exit_traffic = eb_cars_t + nb_cars_r + sb_cars_l) %>% # get east bound exit volume
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "counthour",
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  ))) %>% # remove raw attributes, retain aggregate only
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat", "counthour")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) %>% # agregate hourly volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  slice_max(order_by = total_int_traffic, n = peakhours) %>% # filter top peak hour volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) # aggregate daily peak hour volume


CleanTraffic <- bind_rows(clean_T1, clean_T2, clean_T3, clean_T4, clean_T5)
head(CleanTraffic)


In [None]:
# Get Green P Parking package from Open Data-Toronto
package <- show_package("b66466c3-69c8-4825-9c8b-04b270069193")

data=as.data.frame('Green P Parking')  # read the dataset Green P Parking in the package 
data<-show_package(package)

resources<-list_package_resources(package)

# Identify resources
data_resources <- filter(resources, tolower(format) %in% c("csv", "json"))

# Load Green P Parking 2019 data
data <- filter(data_resources, row_number() == 1) %>% get_resource()

# Extract required columns from main data
data<- data.frame(address=data$carparks$address,
                                     lat=data$carparks$lat,
                                     lng=data$carparks$lng,
                                      carpark_type=data$carparks$carpark_type_str,
                                      rate_half_hr=data$carparks$rate_half_hour,
                                      capacity=data$carparks$capacity,
                                      rate=data$carparks$rate_half_hour
                  )

# Check class of each attribute
sapply(data, class) 

# Convert char to numeric class
data$lat<-as.numeric(data$lat)
data$lng<-as.numeric(data$lng)
data$rate_half_hr<-as.numeric(data$rate_half_hr)
data$capacity<-as.numeric(data$capacity)

sapply(data, class) 

# Check for missing values
any(is.na(data))

# Extract street name from address
data <- data %>%
  mutate(extracted_address = str_replace_all(data$address, "\\(.*?\\)",""))

data$extracted_address<-str_replace_all(data$extracted_address, "-.*", "")
data$extracted_address<-str_replace_all(data$extracted_address, ",.*", "")

# Extract data with carpark_type as 'Surface'
data <- data %>%
  filter(carpark_type == "Surface")

# Convert Lat/Lng to address
geo_rev_data<-data %>%
  tidygeocoder::reverse_geocode(
    lat=lat,
    long=lng,
    method="osm")

# Plot map
map<-data %>%
  st_as_sf(
    coords=c("lng","lat"),
    crs=4326
  )

map %>% mapview()

# Define the polygon coordinates
polygon_coords <- matrix(c(
  -79.4289964889767, 43.6700360241176, -79.4226792245877, 43.6543887000655, -79.4000484228339, 43.657948514946, -79.4070875693025, 43.6748646586934,-79.4289964889767, 43.6700360241176  # Repeat first point to close the polygon exactly
), ncol = 2, byrow = TRUE)

# Create a polygon geometry
polygon <- st_polygon(list(polygon_coords))

# Convert the polygon object to an 'sf' object with a specified CRS (Coordinate Reference System)
polygon <- st_sfc(polygon, crs = 4326)

# Convert the data frame 'data' to an 'sf' object, specifying the columns containing longitude and latitude as coordinates, and set the CRS
data_sf <- st_as_sf(data, coords = c("lng", "lat"), crs = 4326)

# Find the indices of the rows in 'data_sf' that fall within the polygon
indices <- st_within(data_sf, polygon)

# Convert the indices to a data frame
indices_df<-data.frame(indices)




In [None]:
selected_rows <- data_sf[match(indices_df$row.id, seq_len(nrow(data))), ]

# Plot selected rows on map
map_df <- selected_rows %>%
  st_as_sf() %>%
  st_set_crs(4326) %>%
  fortify()   # Convert the 'sf' object to a format suitable for use with 'ggplot2'

# Display the spatial object using 'mapview'
mapview(map_df)



In [None]:
# Store output in a csv file
write.csv(selected_rows, "Parking.csv", row.names = FALSE)
print(selected_rows)

In [None]:
selected_rows1<-selected_rows
coords <- st_coordinates(selected_rows1$geometry)
selected_rows1 <- cbind(selected_rows1, lng = coords[, "X"], lat = coords[, "Y"])
# Remove the geometry column
selected_rows1 <- selected_rows1[, !(names(selected_rows1) %in% c("geometry"))]

sf_df <- st_drop_geometry(selected_rows1)


parking_data<- sf_df
print(parking_data)


In [None]:
p<-read.csv("Parking.csv")
print(p)
coords <- st_coordinates(selected_rows$geometry)
selected_rows1 <- cbind(selected_rows1, lng = coords[, "X"], lat = coords[, "Y"])
# Remove the geometry column
selected_rows1 <- selected_rows1[, !(names(selected_rows1) %in% c("geometry"))]

sf_df <- st_drop_geometry(selected_rows1)

In [None]:
# Get Data - Intersection
# output Data Description:


In [None]:
# Get Data - Business
# output Data Description: 

In [None]:
# Define Region of Interest - Boundary
## coordinates manually looked up from location dataset
# 1406	5370	DUPONT ST AT OSSINGTON AVE (PX 842)	-79.429019	43.670031996501194
# 251	4180	DUPONT ST AT SPADINA RD (PX 840)	-79.407122	43.67485699954096
# 1885	5864	COLLEGE ST AT OSSINGTON AVE (PX 829)	-79.422705	43.65439999619167
# 241	4170	COLLEGE ST AT SPADINA AVE (PX 279)	-79.400048	43.65794800150128

# Input
# Output

boundary <- location %>%
  select(location_id, location, lng, lat) %>%
  filter(location_id %in% list(5370, 4180, 5864, 4170)) # boundary intersection ID

lng_min <- min(boundary$lng) # west most value since it's negative
lng_max <- max(boundary$lng) # east most value
lat_min <- min(boundary$lat) # south most value
lat_max <- max(boundary$lat) # north most value


In [None]:
# Combine Data for model building
# what is the expected output of data structure


In [None]:
# Model 1 - Time Series Forecast
# additional data processing needed before modelling


In [None]:
write.csv(parking_data, "Parking_data.csv", row.names = FALSE)


In [None]:
## Model 2 - Regression Model

# Read data
parking<-read.csv("Parking_data.csv")
business<-read.csv("business.csv")

# Convert the business dataset to a spatial object
business_sf <- st_as_sf(business, coords = c("long", "lat"), crs = 4326)

# Convert the parking dataset to a spatial object
parking_sf <- st_as_sf(parking, coords = c("lng", "lat"), crs = 4326)

# Perform a spatial join to find the nearest business for each parking space
nearest_business <- st_nearest_feature(parking_sf, business_sf)

# Add the nearest business information to the parking dataset
parking_data_with_nearest_business <- cbind(parking, nearest_business)
parking_data_with_nearest_business$traffic_volume= sample(2:200,nrow(parking), replace=F)  # To be replaced with traffic data

# An empty list to store the results
result_list <- list()
#parking_data_with_nearest_business <- parking


# An empty list to store the results
result_list <- list()

# Function to calculate angular distance between two points on Earth
haversine_distance <- function(lon1, lat1, lon2, lat2) {
  R <- 6371 # Earth radius in km
  dlat <- (lat2 - lat1) * pi / 180
  dlon <- (lon2 - lon1) * pi / 180
  a <- sin(dlat/2)^2 + cos(lat1 * pi / 180) * cos(lat2 * pi / 180) * sin(dlon/2)^2
  c <- 2 * atan2(sqrt(a), sqrt(1 - a))
  distance <- R * c
  return(distance) # Distance in km
}



In [None]:
## Calculate distance between each parking spot and each business

# Create an empty list to store the results
result_list <- list()

for (i in 1:nrow(parking_data_with_nearest_business)){
  
  # Create a temporary data frame to store the results for this parking spot
  temp_df <- data.frame(
    address = character(),
    lat = double(),
    lng = double(),
    distance = double(),
    capacity = integer(),
    traffic_volume = integer(),
    rate_half_hr= double(),
    #n_business = integer(),
    n_customers =integer(),
    time_spent = double(),
    category = character(),
    interaction_term = integer()
  )
  
  for( j in 1:nrow(business)){
    lon1 <- parking_data_with_nearest_business[i,"lng"]
    lat1 <- parking_data_with_nearest_business[i,"lat"]
    lon2 <- business[j, "long"]
    lat2 <- business[j, "lat"]
 

    # Calculate distance
    distance <- st_distance(st_point(c(lon1, lat1)), st_point(c(lon2, lat2)))
    
    # Store the results in the temporary data frame
    temp_df[nrow(temp_df) + 1, ] <- list(
      address = parking_data_with_nearest_business[i,"address"],
      lat = parking_data_with_nearest_business[i,"lat"],
      lng = parking_data_with_nearest_business[i,"lng"],
      distance = distance,
      capacity = parking_data_with_nearest_business[i,"capacity"],
      traffic_volume = parking_data_with_nearest_business[i,"traffic_volume"],
      #n_business = parking_data_with_nearest_business[i,"nearest_business"],
      rate_half_hr = parking_data_with_nearest_business[i,"rate_half_hr"],
      n_customers = business[j,"qCustomer"],
      time_spent = business[j,"tCustomer"],
      category = business[j,"Category"],
      interaction_term = business[j,"qCustomer"] * business[j,"tCustomer"]
    )
  }
  
  # Append the temporary data frame to the result list
  result_list[[i]] <- temp_df
}

# Combine all the results into a single data frame
result_df <- do.call(rbind, result_list)


In [None]:
# Filter rows according to radius

filtered_result_df <- result_df[result_df$distance <= 0.50, ]
a=filtered_result_df %>% count(address)


filtered_result_df <- filtered_result_df %>%
  left_join(a, by = "address")

# Select desired columns and rename the "n" column
filtered_result_df <- filtered_result_df %>%
  select(address, lat,lng, distance,rate_half_hr, n_businesses = n, capacity, n_customers, time_spent, traffic_volume, interaction_term) 



In [None]:

model1 <- lm(capacity ~ traffic_volume  + distance + n_customers + time_spent + interaction_term  + rate_half_hr, data = filtered_result_df)

summary(model1)

model2 <- lm(capacity ~ traffic_volume  + distance  + rate_half_hr + interaction_term, data = filtered_result_df)
summary(model2)

In [None]:

# ----------------------------For 0.4 * existing traffic_volume -----------------------------------

all_results <- data.frame()

x_percent=0.4
new_traffic_scenario = filtered_result_df$traffic_volume * x_percent
newdata = data.frame(traffic_volume = new_traffic_scenario,
                     address=filtered_result_df$address,
                     distance = filtered_result_df$distance,
                     n_customers = filtered_result_df$n_customers,
                     time_spent = filtered_result_df$time_spent,
                     interaction_term = filtered_result_df$interaction_term,
                     rate_half_hr= filtered_result_df$rate_half_hr)

# Make predictions
predicted_capacity <- predict(model2, newdata)

# Determine conversion needs
capacity_needed <- ifelse(predicted_capacity > 1, ceiling(predicted_capacity), 0)

# Store results for this scenario 
results <- data.frame(filtered_result_df$address, filtered_result_df$lat, filtered_result_df$lng, filtered_result_df$capacity,capacity_needed, filtered_result_df$n_businesses, filtered_result_df$n_customers, filtered_result_df$time_spent)


# ----------------------------For different % of traffic_volumes -----------------------------------

# Iterate for different EV adoption rates
x_percent_values <- seq(from = 0.1, to = 1, by = 0.4)  
all_results <- data.frame()

for (x_percent in x_percent_values) {
  # Create new traffic scenario
  new_traffic_scenario = result_df$traffic_volume * x_percent
  newdata = data.frame(traffic_volume = new_traffic_scenario,
                     address=filtered_result_df$address,
                     distance = filtered_result_df$distance,
                     n_customers = filtered_result_df$n_customers,
                     time_spent = filtered_result_df$time_spent,
                     interaction_term = filtered_result_df$interaction_term,
                     rate_half_hr= filtered_result_df$rate_half_hr)

  # Make predictions
  predicted_capacity <- predict(model1, newdata)
  # Determine conversion needs
  capacity_needed <- ifelse(predicted_capacity > 1, ceiling(predicted_capacity), 0)

  # Store results for this scenario 
  results <- data.frame(data.frame(filtered_result_df$address, filtered_result_df$lat, filtered_result_df$lng, filtered_result_df$capacity,capacity_needed, filtered_result_df$n_businesses, filtered_result_df$n_customers, filtered_result_df$time_spent))
  all_results <- rbind(all_results, results)
}

# Sort combined results
all_results <- arrange(all_results, x_percent)

# Create table
library(kableExtra)  
table <- kable(all_results, caption = "Conversion Needs by EV Adoption Rate")
print(table)




In [None]:
# # Scoring function
# calculate_score <- function(traffic_volume, capacity, rate_half_hr, nearest_businesses, customers, c_time, distance) {
#   # Define weights for each factor
#   weights <- c(0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.1)
  
#   # Normalize each factor
#   normalized_traffic <- (traffic_volume - min(traffic_volume)) / (max(traffic_volume) - min(traffic_volume))
#   normalized_capacity <- (capacity - min(capacity)) / (max(capacity) - min(capacity))
#   normalized_rate <- (rate_half_hr - min(rate_half_hr)) / (max(rate_half_hr) - min(rate_half_hr))
#   normalized_n_businesses <- (nearest_businesses - min(nearest_businesses)) / (max(nearest_businesses) - min(nearest_businesses))
#   normalized_customers <-(customers - min(customers)) / (max(customers) - min(customers))
#   normalized_time <- (c_time - min(c_time)) / (max(c_time) - min(c_time))
#   normalized_distance <- (distance - min(distance)) / (max(distance) - min(distance))

#   # Calculate the score
#   score <- weights[1] * normalized_traffic +
#     weights[2] * normalized_capacity +
#     weights[3] * normalized_distance +
#     weights[4] * normalized_n_businesses + 
#     weights[5] * (normalized_customers * normalized_time) +
#     weights[6] * (normalized_rate)

#   return(abs(score))
# }

# # Calculate score for each parking spot
# filtered_result_df$score <- calculate_score(filtered_result_df$traffic_volume, filtered_result_df$capacity, filtered_result_df$rate_half_hr, filtered_result_df$n_businesses, filtered_result_df$n_customers, filtered_result_df$time_spent,filtered_result_df$distance)
# # Rank the parking spots based on the score
# ranked_data <- filtered_result_df[order(-filtered_result_df$score),]

# # Find highest scored parking spot
# highest_score_index <- which.max(ranked_data$score)
# highest_score_parking_spot <- filtered_result_df[highest_score_index, ]

# # Print the result
# print(paste0("Address of parking spot: ",highest_score_parking_spot$address))
# print(paste0("Latitude of parking spot: ",highest_score_parking_spot$lat))
# print(paste0("Longitude of parking spot: ",highest_score_parking_spot$lng))


In [None]:
# Result and Discussion
