In [4]:
# Load all required libraries for the notebook, including data package
if(!require("opendatatoronto")) {
    install.packages("opendatatoronto")
    library(opendatatoronto) # data source
}
if(!require("tidyjson")) {
    install.packages("tidyjson")
    library(tidyjson)
}
if(!require("tidygeocoder")) {
    install.packages("tidygeocoder")
    library(tidygeocoder)
}
if(!require("sf")) {
    install.packages("sf")
    library(sf)
}

if(!require("ggspatial")) {
    install.packages("ggspatial")
    library(ggspatial)
}
if(!require("stringr")) {
    install.packages("stringr")
    library(stringr)
}
if(!require("dplyr")) {
    install.packages("dplyr")
    library(dplyr)
}
if(!require("ggplot2")) {
    install.packages("ggplot2")
    library(ggplot2)
}
if(!require("tidyverse")) {
    install.packages("tidyverse")
    library(tidyverse)
}
if(!require("FNN")) {
    install.packages("FNN")
    library(FNN)
}

if(!require("mapview")) {
    install.packages("mapview")
    library(mapview)
}

Loading required package: opendatatoronto



Loading required package: tidyjson


Attaching package: 'tidyjson'


The following object is masked from 'package:stats':

    filter


Loading required package: tidygeocoder

Loading required package: sf

Linking to GEOS 3.11.2, GDAL 3.8.2, PROJ 9.3.1; sf_use_s2() is TRUE

Loading required package: ggspatial

Loading required package: stringr

Loading required package: dplyr


Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union


Loading required package: ggplot2

Loading required package: tidyverse

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mpurrr    [39m 1.0.2     [32m✔[39m [34mtidyr    [39m 1.3.0
── [1mConflicts

In [5]:
# Get Data - Traffic
# output Data Description:
# Dataframe with all intersection and daily count ( peak 4 hours), including lng / lat

# # ? and todo:
# # is separate street name needed
# # direction of the street to be determined. How?


# # package_traffic <- show_package("traffic-volumes-at-intersections-for-all-modes")

# # get all resources for this package
# resources <- list_package_resources("traffic-volumes-at-intersections-for-all-modes")

# # identify datastore resources; by default, Toronto Open Data sets datastore resource format to CSV for non-geospatial and GeoJSON for geospatial resources
# datastore_resources <- filter(resources, tolower(format) %in% c("csv", "geojson"))

# # load data # The method of loading data directly using turns out to be insufficient as the get_resource() only returns first 32000 rows of record. 
# location <- filter(datastore_resources, row_number() == 1) %>% get_resource()
# traffic1 <- filter(datastore_resources, row_number() == 3) %>% get_resource()
# traffic2 <- filter(datastore_resources, row_number() == 4) %>% get_resource()
# traffic3 <- filter(datastore_resources, row_number() == 5) %>% get_resource()
# traffic4 <- filter(datastore_resources, row_number() == 6) %>% get_resource()
# traffic5 <- filter(datastore_resources, row_number() == 7) %>% get_resource()

# To Run the code download the raw files from 
# https://open.toronto.ca/dataset/traffic-volumes-at-intersections-for-all-modes/ 
# and save the files in .csv format to the path: ../Data/Toronto/Traffic, 7 Files below. 
# count_metadata.csv
# locations.csv
# raw-data-1980-1989.csv
# raw-data-1990-1999.csv
# raw-data-2000-2009.csv
# raw-data-2010-2019.csv
# raw-data-2020-2029.csv
location <- read.csv("../Data/Toronto/Traffic/locations.csv") # ensure these path are correct if you have to download the files manually. 
traffic1 <- read.csv("../Data/Toronto/Traffic/raw-data-1980-1989.csv")
traffic2 <- read.csv("../Data/Toronto/Traffic/raw-data-1990-1999.csv")
traffic3 <- read.csv("../Data/Toronto/Traffic/raw-data-2000-2009.csv")
traffic4 <- read.csv("../Data/Toronto/Traffic/raw-data-2010-2019.csv")
traffic5 <- read.csv("../Data/Toronto/Traffic/raw-data-2020-2029.csv")
all_traffic = bind_rows(traffic1,traffic2,traffic3,traffic4,traffic5) # combine all raw data into one data frame

In [6]:
# clean and transform load - Traffic Data
# Output data for modelling CleanTraffic
# define parameters for cleaning
peakhours <- 4 # number of peak hours of data per day. value should be between 1 and 10

# get full intersection volume for each intersection based on peak hours of each day. 
# get average vol per intersection per year. 
# get number of years list, sort from low to high

# 

CleanTraffic <- all_traffic %>%
  filter(centreline_type == 2) %>% # only need intersection data
  mutate(counthour = str_extract(time_start, "(?<=T)(\\d+)(?=\\:)")) %>% # extract hour
  mutate(total_int_traffic = sb_cars_r + sb_cars_t + sb_cars_l +
    nb_cars_r + nb_cars_t + nb_cars_l + wb_cars_r + wb_cars_t +
    wb_cars_l + eb_cars_r + eb_cars_t + eb_cars_l) %>% # get total sum
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "counthour",
    "total_int_traffic"
  ))) %>% # remove raw attributes, retain aggregate only
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat", "counthour")))) %>%
  summarise_at("total_int_traffic",sum) %>% # agregate hourly volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  slice_max(order_by = total_int_traffic, n = peakhours) %>% # filter top peak hour volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  summarise_at("total_int_traffic", sum)%>% # aggregate daily peak hour volume
  mutate(count_date= as.Date(count_date, format("%Y-%m-%d"))) %>% 
  mutate(year = as.numeric(format(count_date,'%Y'))) %>% #add year
  group_by(across(all_of(c("year", "location_id", "location", "lat","lng")))) %>% # group by year to get the average per intersection per year
  summarise(AvgTotal = mean(total_int_traffic), .groups = "drop")  # average traffic volume for that year and location
traffic_years <- unique(CleanTraffic$year) # get number of years list, sort from low to high

In [7]:


# Get Green P Parking package from Open Data-Toronto
package <- show_package("b66466c3-69c8-4825-9c8b-04b270069193")

data=as.data.frame('Green P Parking')  # read the dataset Green P Parking in the package 
data<-show_package(package)

resources<-list_package_resources(package)

# Identify resources
data_resources <- filter(resources, tolower(format) %in% c("csv", "json"))

# Load Green P Parking 2019 data
data <- filter(data_resources, row_number() == 1) %>% get_resource()

# Extract required columns from main data
data<- data.frame(address=data$carparks$address,
                                     lat=data$carparks$lat,
                                     lng=data$carparks$lng,
                                      carpark_type=data$carparks$carpark_type_str,
                                      rate_half_hr=data$carparks$rate_half_hour,
                                      capacity=data$carparks$capacity,
                                      rate=data$carparks$rate_half_hour
                  )

# Check class of each attribute
sapply(data, class) 

# Convert char to numeric class
data$lat<-as.numeric(data$lat)
data$lng<-as.numeric(data$lng)
data$rate_half_hr<-as.numeric(data$rate_half_hr)
data$capacity<-as.numeric(data$capacity)

sapply(data, class) 

# Check for missing values
any(is.na(data))

# Extract street name from address
data <- data %>%
  mutate(extracted_address = str_replace_all(data$address, "\\(.*?\\)",""))

data$extracted_address<-str_replace_all(data$extracted_address, "-.*", "")
data$extracted_address<-str_replace_all(data$extracted_address, ",.*", "")

# Extract data with carpark_type as 'Surface'
data <- data %>%
  filter(carpark_type == "Surface")



In [8]:
# Graphical View of Parking Data
# Convert Lat/Lng to address
geo_rev_data<-data %>%
  tidygeocoder::reverse_geocode(
    lat=lat,
    long=lng,
    method="osm")

# Plot map
map<-data %>%
  st_as_sf(
    coords=c("lng","lat"),
    crs=4326
  )

map %>% mapview()

# Define the polygon coordinates
polygon_coords <- matrix(c(
  -79.4289964889767, 43.6700360241176, -79.4226792245877, 43.6543887000655, -79.4000484228339, 43.657948514946, -79.4070875693025, 43.6748646586934,-79.4289964889767, 43.6700360241176  # Repeat first point to close the polygon exactly
), ncol = 2, byrow = TRUE)

# Create a polygon geometry
polygon <- st_polygon(list(polygon_coords))

# Convert the polygon object to an 'sf' object with a specified CRS (Coordinate Reference System)
polygon <- st_sfc(polygon, crs = 4326)

# Convert the data frame 'data' to an 'sf' object, specifying the columns containing longitude and latitude as coordinates, and set the CRS
data_sf <- st_as_sf(data, coords = c("lng", "lat"), crs = 4326)

# Find the indices of the rows in 'data_sf' that fall within the polygon
indices <- st_within(data_sf, polygon)

# Convert the indices to a data frame
indices_df<-data.frame(indices)

selected_rows <- data_sf[match(indices_df$row.id, seq_len(nrow(data))), ]


Passing 220 coordinates to the Nominatim single coordinate geocoder

Query completed in: 227 seconds

[1m[22mNew names:
[36m•[39m `address` -> `address...1`
[36m•[39m `address` -> `address...9`


In [9]:

# Plot selected rows on map
map_df <- selected_rows %>%
  st_as_sf() %>%
  st_set_crs(4326) %>%
  fortify()   # Convert the 'sf' object to a format suitable for use with 'ggplot2'

# Display the spatial object using 'mapview'
mapview(map_df)




In [10]:
write.csv(selected_rows, "Parking.csv", row.names = FALSE)


In [11]:
# Add lat, lng and remove geometry column
selected_rows1<-selected_rows
coords <- st_coordinates(selected_rows1$geometry)
selected_rows1 <- cbind(selected_rows1, lng = coords[, "X"], lat = coords[, "Y"])
parking_data <- st_drop_geometry(selected_rows1)
write.csv(parking_data, "Parking_data.csv", row.names = FALSE)


In [12]:
# Get Data - Intersection
# output Data Description:


In [13]:
# Get Data - Business
# output Data Description: 

In [14]:
# Define Region of Interest - Boundary
## coordinates manually looked up from location dataset
# 1406	5370	DUPONT ST AT OSSINGTON AVE (PX 842)	-79.429019	43.670031996501194
# 251	4180	DUPONT ST AT SPADINA RD (PX 840)	-79.407122	43.67485699954096
# 1885	5864	COLLEGE ST AT OSSINGTON AVE (PX 829)	-79.422705	43.65439999619167
# 241	4170	COLLEGE ST AT SPADINA AVE (PX 279)	-79.400048	43.65794800150128

# Input
# Output

boundary <- location %>%
  select(location_id, location, lng, lat) %>%
  filter(location_id %in% list(5370, 4180, 5864, 4170)) # boundary intersection ID

lng_min <- min(boundary$lng) # west most value since it's negative
lng_max <- max(boundary$lng) # east most value
lat_min <- min(boundary$lat) # south most value
lat_max <- max(boundary$lat) # north most value


In [15]:
# Combine Parking Data and Clean Traffic Data to prepare input data for Time Series Model to predict traffic / EV charging demand

# loop through low to high year number
# filter each year, to list of unique intersections with traffic volume
# run knn algo against parking lot, find k nearst intersection
# use knn nn.index to find the k intersection vol, get average, creating a vector
# add vector to parking data as new column with year and volume
# next loop for the next year

#Output TS_Input

K <- 5 # Parameter used in knn calculation. 
All_park <- data %>%
    select(one_of(c("lat","lng"))) # location coordinates of all surface car parks. 

TS_Input <- data # TS_Input is the input data for the time series of traffic volume forecast. 

for (i in traffic_years) {
    # print(paste("This is year" ,i))
    year_traffic<- filter(CleanTraffic, year == i) # get the traffic volume, intersection for each year.
    All_int <-select(year_traffic,one_of(c("lat","lng"))) # extract just the coordinates for K nearest neighter calculation
    knn_dist <- get.knnx(All_int, All_park, k=K, algorithm="kd_tree") # get the K nearest intersection to the parking lot and their index list
    
    get.mean <- function(x) { # custom function defined to calculate the average of the K nearest intersection volume
    mean(slice(year_traffic, c(x))$AvgTotal)
    }

    TS_Input[paste0("NearbyTraffic_",i)]  <- apply(knn_dist$nn.index,1,get.mean) # calculate the avaerage per row of indices

}

In [26]:
write.csv(TS_Input, "TS_Input.csv", row.names = FALSE)


In [27]:
# Convert the data frame 'TS_Input' to an 'sf' object, specifying the columns containing longitude and latitude as coordinates, and set the CRS
TS_Input_sf <- st_as_sf(TS_Input, coords = c("lng", "lat"), crs = 4326)

# Find the indices of the rows in 'data_sf' that fall within the polygon
t_indices <- st_within(TS_Input_sf, polygon)

# Convert the indices to a data frame
t_indices_df<-data.frame(t_indices)

t_selected_rows <- TS_Input_sf[match(t_indices_df$row.id, seq_len(nrow(TS_Input))), ]

In [29]:
print(t_selected_rows)

Simple feature collection with 8 features and 46 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -79.42565 ymin: 43.65635 xmax: -79.40412 ymax: 43.66688
Geodetic CRS:  WGS 84
                                    address carpark_type rate_half_hr capacity
16                    365 Lippincott Street      Surface          2.0      144
27                    557 Palmerston Avenue      Surface          2.0       55
33                       675 Manning Avenue      Surface          2.0       49
34  745 Ossington Avenue, 16 Carling Avenue      Surface          2.0       45
48                       376 Clinton Street      Surface          1.5       33
90                        80 Clinton Street      Surface          1.5       25
110                          4 Spadina Road      Surface          1.5       51
121                    292 Brunswick Avenue      Surface          2.0       19
    rate     extracted_address NearbyTraffic_1984 NearbyTraffic_1985
16  2.00 365 Lippincott St

In [17]:
# Model 1 - Time Series Forecast
# additional data processing needed before modelling


In [34]:
t_selected_rows$NearbyTraffic_1985

In [137]:
## Model 2 - Regression Model

# Read data
parking<-read.csv("Parking_data.csv")
business<-read.csv("business.csv")

# Convert the business dataset to a spatial object
business_sf <- st_as_sf(business, coords = c("long", "lat"), crs = 4326)

# Convert the parking dataset to a spatial object
parking_sf <- st_as_sf(parking, coords = c("lng", "lat"), crs = 4326)

# Perform a spatial join to find the nearest business for each parking space
nearest_business <- st_nearest_feature(parking_sf, business_sf)

# Add the nearest business information to the parking dataset
parking_data_with_nearest_business <- cbind(parking, nearest_business)
#parking_data_with_nearest_business$traffic_volume= sample(2:200,nrow(parking), replace=F)  # To be replaced with traffic data
parking_data_with_nearest_business$traffic_volume=t_selected_rows$NearbyTraffic_2024

# An empty list to store the results
result_list <- list()
#parking_data_with_nearest_business <- parking


# An empty list to store the results
result_list <- list()

# Function to calculate angular distance between two points on Earth
haversine_distance <- function(lon1, lat1, lon2, lat2) {
  R <- 6371 # Earth radius in km
  dlat <- (lat2 - lat1) * pi / 180
  dlon <- (lon2 - lon1) * pi / 180
  a <- sin(dlat/2)^2 + cos(lat1 * pi / 180) * cos(lat2 * pi / 180) * sin(dlon/2)^2
  c <- 2 * atan2(sqrt(a), sqrt(1 - a))
  distance <- R * c
  return(distance) # Distance in km
}



In [138]:
## Calculate distance between each parking spot and each business
suppressWarnings({

# Create an empty list to store the results
result_list <- list()

for (i in 1:nrow(parking_data_with_nearest_business)){
  
  # Create a temporary data frame to store the results for this parking spot
  temp_df <- data.frame(
    address = character(),
    lat = double(),
    lng = double(),
    distance = double(),
    capacity = integer(),
    traffic_volume = integer(),
    rate_half_hr= double(),
    #n_business = integer(),
    n_customers =integer(),
    time_spent = double(),
    category = character(),
    interaction_term = integer()
  )
  
  for( j in 1:nrow(business)){
    lon1 <- parking_data_with_nearest_business[i,"lng"]
    lat1 <- parking_data_with_nearest_business[i,"lat"]
    lon2 <- business[j, "long"]
    lat2 <- business[j, "lat"]
 

    # Calculate distance
    distance <- st_distance(st_point(c(lon1, lat1)), st_point(c(lon2, lat2)))
    
    # Store the results in the temporary data frame
    temp_df[nrow(temp_df) + 1, ] <- list(
      address = parking_data_with_nearest_business[i,"address"],
      lat = parking_data_with_nearest_business[i,"lat"],
      lng = parking_data_with_nearest_business[i,"lng"],
      distance = distance,
      capacity = parking_data_with_nearest_business[i,"capacity"],
      traffic_volume = parking_data_with_nearest_business[i,"traffic_volume"],
      #n_business = parking_data_with_nearest_business[i,"nearest_business"],
      rate_half_hr = parking_data_with_nearest_business[i,"rate_half_hr"],
      n_customers = business[j,"qCustomer"],
      time_spent = business[j,"tCustomer"],
      category = business[j,"Category"],
      interaction_term = business[j,"qCustomer"] * business[j,"tCustomer"]
    )
  }
  
  # Append the temporary data frame to the result list
  result_list[[i]] <- temp_df
}

# Combine all the results into a single data frame
result_df <- do.call(rbind, result_list)
})


ERROR: Error in x[[jj]][iseq] <- vjj: replacement has length zero


In [139]:
# Filter rows according to radius

filtered_result_df <- result_df[result_df$distance <= 0.50, ]
a=filtered_result_df %>% count(address)


filtered_result_df <- filtered_result_df %>%
  left_join(a, by = "address")

# Select desired columns and rename the "n" column
filtered_result_df <- filtered_result_df %>%
  select(address, lat,lng, distance,rate_half_hr, n_businesses = n, capacity, n_customers, time_spent, traffic_volume, interaction_term) 


print(head(filtered_result_df))

                address      lat       lng   distance rate_half_hr n_businesses
1 365 Lippincott Street 43.66505 -79.40966 0.02237103            2          404
2 365 Lippincott Street 43.66505 -79.40966 0.01805960            2          404
3 365 Lippincott Street 43.66505 -79.40966 0.01440829            2          404
4 365 Lippincott Street 43.66505 -79.40966 0.02334676            2          404
5 365 Lippincott Street 43.66505 -79.40966 0.01839793            2          404
6 365 Lippincott Street 43.66505 -79.40966 0.03068990            2          404
  capacity n_customers time_spent traffic_volume interaction_term
1      144           5  0.4698662           3171         2.349331
2      144           2  4.9004879           3171         9.800976
3      144           6  0.7990269           3171         4.794162
4      144           2  0.7700175           3171         1.540035
5      144           2  1.0064248           3171         2.012850
6      144           4  1.1079138           

In [140]:
# filtered_result_df2 <- filtered_result_df %>%
#   group_by(address, lat, lng) %>%
#   summarise(sum_n_customers = sum(n_customers),
#             sum_time_spent = sum(time_spent),
#             sum_traffic_volume =sum(traffic_volume)) %>%
#   select(address, lat, lng, sum_n_customers, sum_time_spent, sum_traffic_volume)

# print(head(filtered_result_df2))

In [141]:
# Normalize predictors
filtered_result_df$traffic_volume <- scale(filtered_result_df$traffic_volume)
filtered_result_df$distance <- scale(filtered_result_df$distance)
filtered_result_df$n_customers <- scale(filtered_result_df$n_customers)
filtered_result_df$time_spent <- scale(filtered_result_df$time_spent)
filtered_result_df$interaction_term <- scale(filtered_result_df$interaction_term)
filtered_result_df$rate_half_hr <- scale(filtered_result_df$rate_half_hr)




In [145]:

model1 <- lm(capacity ~ traffic_volume  + distance + n_customers + time_spent + interaction_term  + rate_half_hr, data = filtered_result_df)

summary(model1)

model2 <- lm(capacity ~ traffic_volume  + distance  + rate_half_hr + interaction_term, data = filtered_result_df)
summary(model2)


Call:
lm(formula = capacity ~ traffic_volume + distance + n_customers + 
    time_spent + interaction_term + rate_half_hr, data = filtered_result_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-51.105 -12.217  -5.173   2.246  79.441 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)      52.62500    0.58264  90.322   <2e-16 ***
traffic_volume   10.13521    0.64507  15.712   <2e-16 ***
distance         -1.20104    0.58582  -2.050   0.0404 *  
n_customers       0.03416    0.80525   0.042   0.9662    
time_spent        0.15736    2.07850   0.076   0.9397    
interaction_term -0.24799    2.13773  -0.116   0.9077    
rate_half_hr     16.99202    0.64527  26.333   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 33.12 on 3225 degrees of freedom
Multiple R-squared:  0.1821,	Adjusted R-squared:  0.1806 
F-statistic: 119.6 on 6 and 3225 DF,  p-value: < 2.2e-16



Call:
lm(formula = capacity ~ traffic_volume + distance + rate_half_hr + 
    interaction_term, data = filtered_result_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-51.089 -12.207  -5.156   2.237  79.427 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)       52.6250     0.5825  90.350   <2e-16 ***
traffic_volume    10.1351     0.6449  15.717   <2e-16 ***
distance          -1.1994     0.5852  -2.049   0.0405 *  
rate_half_hr      16.9919     0.6451  26.341   <2e-16 ***
interaction_term  -0.0942     0.5844  -0.161   0.8719    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 33.11 on 3227 degrees of freedom
Multiple R-squared:  0.1821,	Adjusted R-squared:  0.1811 
F-statistic: 179.6 on 4 and 3227 DF,  p-value: < 2.2e-16


In [143]:

# ----------------------------For 0.4 * existing traffic_volume -----------------------------------

all_results <- data.frame()

x_percent=0.4
new_traffic_scenario = filtered_result_df$traffic_volume * x_percent
newdata = data.frame(traffic_volume = new_traffic_scenario,
                     address=filtered_result_df$address,
                     distance = filtered_result_df$distance,
                     n_customers = filtered_result_df$n_customers,
                     time_spent = filtered_result_df$time_spent,
                     interaction_term = filtered_result_df$interaction_term,
                     rate_half_hr= filtered_result_df$rate_half_hr)

# Make predictions
predicted_capacity <- predict(model2, newdata)

# Determine conversion needs
capacity_needed <- ifelse(predicted_capacity > 1, ceiling(predicted_capacity), 0)

# Store results for this scenario 
results <- data.frame(filtered_result_df$address, filtered_result_df$lat, filtered_result_df$lng, filtered_result_df$capacity,capacity_needed, filtered_result_df$n_businesses, filtered_result_df$n_customers, filtered_result_df$time_spent)


# ----------------------------For different % of traffic_volumes -----------------------------------

# Iterate for different EV adoption rates
x_percent_values <- seq(from = 0.1, to = 1, by = 0.4)  
all_results <- data.frame()

for (x_percent in x_percent_values) {
  # Create new traffic scenario
  new_traffic_scenario = result_df$traffic_volume * x_percent
  newdata = data.frame(traffic_volume = new_traffic_scenario,
                     address=filtered_result_df$address,
                     distance = filtered_result_df$distance,
                     n_customers = filtered_result_df$n_customers,
                     time_spent = filtered_result_df$time_spent,
                     interaction_term = filtered_result_df$interaction_term,
                     rate_half_hr= filtered_result_df$rate_half_hr)

  # Make predictions
  predicted_capacity <- predict(model1, newdata)
  # Determine conversion needs
  capacity_needed <- ifelse(predicted_capacity > 1, ceiling(predicted_capacity), 0)

  # Store results for this scenario 
  results <- data.frame(data.frame(filtered_result_df$address, filtered_result_df$lat, filtered_result_df$lng, filtered_result_df$capacity,capacity_needed, filtered_result_df$n_businesses, filtered_result_df$n_customers, filtered_result_df$time_spent))
  all_results <- rbind(all_results, results)
}

# Sort combined results
all_results <- arrange(all_results, x_percent)

# Create table
library(kableExtra)  
table <- kable(all_results, caption = "Conversion Needs by EV Adoption Rate")
print(table)




ERROR: Error: variables 'traffic_volume', 'distance', 'rate_half_hr', 'interaction_term' were specified with different types from the fit


In [23]:
# # Scoring function
# calculate_score <- function(traffic_volume, capacity, rate_half_hr, nearest_businesses, customers, c_time, distance) {
#   # Define weights for each factor
#   weights <- c(0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.1)
  
#   # Normalize each factor
#   normalized_traffic <- (traffic_volume - min(traffic_volume)) / (max(traffic_volume) - min(traffic_volume))
#   normalized_capacity <- (capacity - min(capacity)) / (max(capacity) - min(capacity))
#   normalized_rate <- (rate_half_hr - min(rate_half_hr)) / (max(rate_half_hr) - min(rate_half_hr))
#   normalized_n_businesses <- (nearest_businesses - min(nearest_businesses)) / (max(nearest_businesses) - min(nearest_businesses))
#   normalized_customers <-(customers - min(customers)) / (max(customers) - min(customers))
#   normalized_time <- (c_time - min(c_time)) / (max(c_time) - min(c_time))
#   normalized_distance <- (distance - min(distance)) / (max(distance) - min(distance))

#   # Calculate the score
#   score <- weights[1] * normalized_traffic +
#     weights[2] * normalized_capacity +
#     weights[3] * normalized_distance +
#     weights[4] * normalized_n_businesses + 
#     weights[5] * (normalized_customers * normalized_time) +
#     weights[6] * (normalized_rate)

#   return(abs(score))
# }

# # Calculate score for each parking spot
# filtered_result_df$score <- calculate_score(filtered_result_df$traffic_volume, filtered_result_df$capacity, filtered_result_df$rate_half_hr, filtered_result_df$n_businesses, filtered_result_df$n_customers, filtered_result_df$time_spent,filtered_result_df$distance)
# # Rank the parking spots based on the score
# ranked_data <- filtered_result_df[order(-filtered_result_df$score),]

# # Find highest scored parking spot
# highest_score_index <- which.max(ranked_data$score)
# highest_score_parking_spot <- filtered_result_df[highest_score_index, ]

# # Print the result
# print(paste0("Address of parking spot: ",highest_score_parking_spot$address))
# print(paste0("Latitude of parking spot: ",highest_score_parking_spot$lat))
# print(paste0("Longitude of parking spot: ",highest_score_parking_spot$lng))


In [24]:
# Result and Discussion
