In [33]:
# Load all required libraries for the notebook, including data package
if(!require("opendatatoronto")) {
    install.packages("opendatatoronto")
    library(opendatatoronto)
}
library(dplyr)
library(stringr)
library(ggplot2)
library(ggspatial)
library(tidyjson)
library(tidyverse)
library(tidygeocoder)
library(sf)
library(mapview)
library(opendatatoronto)

In [34]:
# Get Data - Traffice
# output Data Description:
# Dataframe with all intersection and daily count ( peak 4 hours), including lng / lat

# ? and todo:
# is separate street name needed
# direction of the street to be determined. How?


# package_traffic <- show_package("traffic-volumes-at-intersections-for-all-modes")

# get all resources for this package
resources <- list_package_resources("traffic-volumes-at-intersections-for-all-modes")

# identify datastore resources; by default, Toronto Open Data sets datastore resource format to CSV for non-geospatial and GeoJSON for geospatial resources
datastore_resources <- filter(resources, tolower(format) %in% c("csv", "geojson"))

# load data
location <- filter(datastore_resources, row_number() == 1) %>% get_resource()
traffic1 <- filter(datastore_resources, row_number() == 3) %>% get_resource()
traffic2 <- filter(datastore_resources, row_number() == 4) %>% get_resource()
traffic3 <- filter(datastore_resources, row_number() == 5) %>% get_resource()
traffic4 <- filter(datastore_resources, row_number() == 6) %>% get_resource()
traffic5 <- filter(datastore_resources, row_number() == 7) %>% get_resource()


In [35]:
# clean and transform load - Traffic Data
# Output data for modelling CleanTraffic
# define parameters for cleaning
peakhours <- 4 # number of peak hours of data per day. value should be between 1 and 10


clean_T1 <- traffic1 %>%
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "centreline_type",
    "time_start", "sb_cars_r", "sb_cars_t", "sb_cars_l",
    "nb_cars_r", "nb_cars_t", "nb_cars_l", "wb_cars_r", "wb_cars_t", "wb_cars_l",
    "eb_cars_r", "eb_cars_t", "eb_cars_l"
  ))) %>% # select needed attributes
  filter(centreline_type == 2) %>% # only need intersection data
  mutate(counthour = str_extract(time_start, "(?<=T)(\\d+)(?=\\:)")) %>% # extract hour
  mutate(total_int_traffic = sb_cars_r + sb_cars_t + sb_cars_l +
    nb_cars_r + nb_cars_t + nb_cars_l + wb_cars_r + wb_cars_t +
    wb_cars_l + eb_cars_r + eb_cars_t + eb_cars_l) %>% # get total sum
  mutate(nb_exit_traffic = nb_cars_t + eb_cars_l + wb_cars_r) %>% # get north bound exit volume
  mutate(sb_exit_traffic = sb_cars_t + eb_cars_r + wb_cars_l) %>% # get south bound exit volume
  mutate(wb_exit_traffic = wb_cars_t + nb_cars_l + sb_cars_r) %>% # get west bound exit volume
  mutate(eb_exit_traffic = eb_cars_t + nb_cars_r + sb_cars_l) %>% # get east bound exit volume
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "counthour",
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  ))) %>% # remove raw attributes, retain aggregate only
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat", "counthour")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) %>% # agregate hourly volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  slice_max(order_by = total_int_traffic, n = peakhours) %>% # filter top peak hour volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) # aggregate daily peak hour volume
clean_T2 <- traffic2 %>%
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "centreline_type",
    "time_start", "sb_cars_r", "sb_cars_t", "sb_cars_l",
    "nb_cars_r", "nb_cars_t", "nb_cars_l", "wb_cars_r", "wb_cars_t", "wb_cars_l",
    "eb_cars_r", "eb_cars_t", "eb_cars_l"
  ))) %>% # select needed attributes
  filter(centreline_type == 2) %>% # only need intersection data
  mutate(counthour = str_extract(time_start, "(?<=T)(\\d+)(?=\\:)")) %>% # extract hour
  mutate(total_int_traffic = sb_cars_r + sb_cars_t + sb_cars_l +
    nb_cars_r + nb_cars_t + nb_cars_l + wb_cars_r + wb_cars_t +
    wb_cars_l + eb_cars_r + eb_cars_t + eb_cars_l) %>% # get total sum
  mutate(nb_exit_traffic = nb_cars_t + eb_cars_l + wb_cars_r) %>% # get north bound exit volume
  mutate(sb_exit_traffic = sb_cars_t + eb_cars_r + wb_cars_l) %>% # get south bound exit volume
  mutate(wb_exit_traffic = wb_cars_t + nb_cars_l + sb_cars_r) %>% # get west bound exit volume
  mutate(eb_exit_traffic = eb_cars_t + nb_cars_r + sb_cars_l) %>% # get east bound exit volume
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "counthour",
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  ))) %>% # remove raw attributes, retain aggsregate only
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat", "counthour")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) %>% # agregate hourly volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  slice_max(order_by = total_int_traffic, n = peakhours) %>% # filter top peak hour volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) # aggregate daily peak hour volume

clean_T3 <- traffic3 %>%
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "centreline_type",
    "time_start", "sb_cars_r", "sb_cars_t", "sb_cars_l",
    "nb_cars_r", "nb_cars_t", "nb_cars_l", "wb_cars_r", "wb_cars_t", "wb_cars_l",
    "eb_cars_r", "eb_cars_t", "eb_cars_l"
  ))) %>% # select needed attributes
  filter(centreline_type == 2) %>% # only need intersection data
  mutate(counthour = str_extract(time_start, "(?<=T)(\\d+)(?=\\:)")) %>% # extract hour
  mutate(total_int_traffic = sb_cars_r + sb_cars_t + sb_cars_l +
    nb_cars_r + nb_cars_t + nb_cars_l + wb_cars_r + wb_cars_t +
    wb_cars_l + eb_cars_r + eb_cars_t + eb_cars_l) %>% # get total sum
  mutate(nb_exit_traffic = nb_cars_t + eb_cars_l + wb_cars_r) %>% # get north bound exit volume
  mutate(sb_exit_traffic = sb_cars_t + eb_cars_r + wb_cars_l) %>% # get south bound exit volume
  mutate(wb_exit_traffic = wb_cars_t + nb_cars_l + sb_cars_r) %>% # get west bound exit volume
  mutate(eb_exit_traffic = eb_cars_t + nb_cars_r + sb_cars_l) %>% # get east bound exit volume
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "counthour",
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  ))) %>% # remove raw attributes, retain aggregate only
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat", "counthour")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) %>% # agregate hourly volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  slice_max(order_by = total_int_traffic, n = peakhours) %>% # filter top peak hour volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) # aggregate daily peak hour volume

clean_T4 <- traffic4 %>%
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "centreline_type",
    "time_start", "sb_cars_r", "sb_cars_t", "sb_cars_l",
    "nb_cars_r", "nb_cars_t", "nb_cars_l", "wb_cars_r", "wb_cars_t", "wb_cars_l",
    "eb_cars_r", "eb_cars_t", "eb_cars_l"
  ))) %>% # select needed attributes
  filter(centreline_type == 2) %>% # only need intersection data
  mutate(counthour = str_extract(time_start, "(?<=T)(\\d+)(?=\\:)")) %>% # extract hour
  mutate(total_int_traffic = sb_cars_r + sb_cars_t + sb_cars_l +
    nb_cars_r + nb_cars_t + nb_cars_l + wb_cars_r + wb_cars_t +
    wb_cars_l + eb_cars_r + eb_cars_t + eb_cars_l) %>% # get total sum
  mutate(nb_exit_traffic = nb_cars_t + eb_cars_l + wb_cars_r) %>% # get north bound exit volume
  mutate(sb_exit_traffic = sb_cars_t + eb_cars_r + wb_cars_l) %>% # get south bound exit volume
  mutate(wb_exit_traffic = wb_cars_t + nb_cars_l + sb_cars_r) %>% # get west bound exit volume
  mutate(eb_exit_traffic = eb_cars_t + nb_cars_r + sb_cars_l) %>% # get east bound exit volume
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "counthour",
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  ))) %>% # remove raw attributes, retain aggregate only
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat", "counthour")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) %>% # agregate hourly volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  slice_max(order_by = total_int_traffic, n = peakhours) %>% # filter top peak hour volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) # aggregate daily peak hour volume

clean_T5 <- traffic5 %>%
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "centreline_type",
    "time_start", "sb_cars_r", "sb_cars_t", "sb_cars_l",
    "nb_cars_r", "nb_cars_t", "nb_cars_l", "wb_cars_r", "wb_cars_t", "wb_cars_l",
    "eb_cars_r", "eb_cars_t", "eb_cars_l"
  ))) %>% # select needed attributes
  filter(centreline_type == 2) %>% # only need intersection data
  mutate(counthour = str_extract(time_start, "(?<=T)(\\d+)(?=\\:)")) %>% # extract hour
  mutate(total_int_traffic = sb_cars_r + sb_cars_t + sb_cars_l +
    nb_cars_r + nb_cars_t + nb_cars_l + wb_cars_r + wb_cars_t +
    wb_cars_l + eb_cars_r + eb_cars_t + eb_cars_l) %>% # get total sum
  mutate(nb_exit_traffic = nb_cars_t + eb_cars_l + wb_cars_r) %>% # get north bound exit volume
  mutate(sb_exit_traffic = sb_cars_t + eb_cars_r + wb_cars_l) %>% # get south bound exit volume
  mutate(wb_exit_traffic = wb_cars_t + nb_cars_l + sb_cars_r) %>% # get west bound exit volume
  mutate(eb_exit_traffic = eb_cars_t + nb_cars_r + sb_cars_l) %>% # get east bound exit volume
  select(one_of(c(
    "count_date", "location_id", "location", "lng", "lat", "counthour",
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  ))) %>% # remove raw attributes, retain aggregate only
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat", "counthour")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) %>% # agregate hourly volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  slice_max(order_by = total_int_traffic, n = peakhours) %>% # filter top peak hour volume
  group_by(across(all_of(c("count_date", "location_id", "location", "lng", "lat")))) %>%
  summarise(across(any_of(c(
    "total_int_traffic", "nb_exit_traffic", "sb_exit_traffic", "wb_exit_traffic",
    "eb_exit_traffic"
  )), sum)) # aggregate daily peak hour volume


CleanTraffic <- bind_rows(clean_T1, clean_T2, clean_T3, clean_T4, clean_T5)
head(CleanTraffic)


[1m[22m`summarise()` has grouped output by 'count_date', 'location_id', 'location',
'lng', 'lat'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'count_date', 'location_id', 'location',
'lng'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'count_date', 'location_id', 'location',
'lng', 'lat'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'count_date', 'location_id', 'location',
'lng'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'count_date', 'location_id', 'location',
'lng', 'lat'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'count_date', 'location_id', 'location',
'lng'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'count_date', 'location_id', 'location',
'lng', 'lat'. You can override using the `.groups` ar

count_date,location_id,location,lng,lat,total_int_traffic,nb_exit_traffic,sb_exit_traffic,wb_exit_traffic,eb_exit_traffic
<chr>,<int>,<chr>,<dbl>,<dbl>,<int>,<int>,<int>,<int>,<int>
1984-01-03,4167,DUNDAS ST AT SPADINA AVE (PX 277),-79.39805,43.65294,8407,3286,2540,1545,1036
1984-01-03,4386,DUNDAS ST AT SCARLETT RD (PX 496),-79.49921,43.66552,9414,1845,0,4736,2833
1984-01-03,5292,BAY ST AT DUNDAS ST W (PX 66),-79.38377,43.65571,8238,2545,2046,1643,2004
1984-01-04,4993,EGLINTON AVE AT SPADINA RD (PX 99),-79.41911,43.70235,7868,0,1539,4076,2253
1984-01-04,5349,DUFFERIN ST AT DUNDAS ST W (PX 190),-79.43145,43.64965,6514,1685,1084,1820,1925
1984-01-04,5589,DIXON AVE AT DUNDAS ST & KINGSTON RD (PX 163),-79.31118,43.66997,4625,2348,1075,1202,0


In [36]:
# Get Green P Parking package from Open Data-Toronto
package <- show_package("b66466c3-69c8-4825-9c8b-04b270069193")

data=as.data.frame('Green P Parking')  # read the dataset Green P Parking in the package 
data<-show_package(package)

resources<-list_package_resources(package)

# Identify resources
data_resources <- filter(resources, tolower(format) %in% c("csv", "json"))

# Load Green P Parking 2019 data
data <- filter(data_resources, row_number() == 1) %>% get_resource()

# Extract required columns from main data
data<- data.frame(address=data$carparks$address,
                                     lat=data$carparks$lat,
                                     lng=data$carparks$lng,
                                      carpark_type=data$carparks$carpark_type_str,
                                      rate_half_hr=data$carparks$rate_half_hour,
                                      capacity=data$carparks$capacity
                  )

# Check class of each attribute
sapply(data, class) 

# Convert char to numeric class
data$lat<-as.numeric(data$lat)
data$lng<-as.numeric(data$lng)
data$rate_half_hr<-as.numeric(data$rate_half_hr)
data$capacity<-as.numeric(data$capacity)

sapply(data, class) 

# Check for missing values
any(is.na(data))

# Extract street name from address
data <- data %>%
  mutate(extracted_address = str_replace_all(data$address, "\\(.*?\\)",""))

data$extracted_address<-str_replace_all(data$extracted_address, "-.*", "")
data$extracted_address<-str_replace_all(data$extracted_address, ",.*", "")

# Extract data with carpark_type as 'Surface'
data <- data %>%
  filter(carpark_type == "Surface")

# Convert Lat/Lng to address
geo_rev_data<-data %>%
  tidygeocoder::reverse_geocode(
    lat=lat,
    long=lng,
    method="osm")

# Plot map
map<-data %>%
  st_as_sf(
    coords=c("lng","lat"),
    crs=4326
  )

map %>% mapview()

# Define the polygon coordinates
polygon_coords <- matrix(c(
  -79.4289964889767, 43.6700360241176, -79.4226792245877, 43.6543887000655, -79.4000484228339, 43.657948514946, -79.4070875693025, 43.6748646586934,-79.4289964889767, 43.6700360241176  # Repeat first point to close the polygon exactly
), ncol = 2, byrow = TRUE)

# Create a polygon geometry
polygon <- st_polygon(list(polygon_coords))

# Convert the polygon object to an 'sf' object with a specified CRS (Coordinate Reference System)
polygon <- st_sfc(polygon, crs = 4326)

# Convert the data frame 'data' to an 'sf' object, specifying the columns containing longitude and latitude as coordinates, and set the CRS
data_sf <- st_as_sf(data, coords = c("lng", "lat"), crs = 4326)

# Find the indices of the rows in 'data_sf' that fall within the polygon
indices <- st_within(data_sf, polygon)

# Convert the indices to a data frame
indices_df<-data.frame(indices)

selected_rows <- data_sf[match(indices_df$row.id, seq_len(nrow(data))), ]

# Print the selected rows
print(selected_rows) 

# Plot selected rows on map
map_df <- selected_rows %>%
  st_as_sf() %>%
  st_set_crs(4326) %>%
  fortify()   # Convert the 'sf' object to a format suitable for use with 'ggplot2'

# Display the spatial object using 'mapview'
mapview(map_df)



Passing 220 coordinates to the Nominatim single coordinate geocoder

Query completed in: 224.3 seconds

[1m[22mNew names:
[36m•[39m `address` -> `address...1`
[36m•[39m `address` -> `address...8`


Simple feature collection with 8 features and 5 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -79.42565 ymin: 43.65635 xmax: -79.40412 ymax: 43.66688
Geodetic CRS:  WGS 84
                                    address carpark_type rate_half_hr capacity
16                    365 Lippincott Street      Surface          2.0      144
27                    557 Palmerston Avenue      Surface          2.0       55
33                       675 Manning Avenue      Surface          2.0       49
34  745 Ossington Avenue, 16 Carling Avenue      Surface          2.0       45
48                       376 Clinton Street      Surface          1.5       33
90                        80 Clinton Street      Surface          1.5       25
110                          4 Spadina Road      Surface          1.5       51
121                    292 Brunswick Avenue      Surface          2.0       19
        extracted_address                   geometry
16  365 Lippincott Street POINT (-79.40966 

In [37]:
# Get Data - Intersection
# output Data Description:


In [38]:
# Get Data - Business
# output Data Description: 

In [39]:
# Define Region of Interest - Boundary
## coordinates manually looked up from location dataset
# 1406	5370	DUPONT ST AT OSSINGTON AVE (PX 842)	-79.429019	43.670031996501194
# 251	4180	DUPONT ST AT SPADINA RD (PX 840)	-79.407122	43.67485699954096
# 1885	5864	COLLEGE ST AT OSSINGTON AVE (PX 829)	-79.422705	43.65439999619167
# 241	4170	COLLEGE ST AT SPADINA AVE (PX 279)	-79.400048	43.65794800150128

# Input
# Output

boundary <- location %>%
  select(location_id, location, lng, lat) %>%
  filter(location_id %in% list(5370, 4180, 5864, 4170)) # boundary intersection ID

lng_min <- min(boundary$lng) # west most value since it's negative
lng_max <- max(boundary$lng) # east most value
lat_min <- min(boundary$lat) # south most value
lat_max <- max(boundary$lat) # north most value


In [40]:
# Combine Data for model building
# what is the expected output of data structure


In [41]:
# Model 1 - Time Series Forecast
# additional data processing needed before modelling


In [42]:
# Model 2 - Regression Model

# Read data
parking<-read.csv("Parking.csv")
business<-read.csv("business.csv")

# Convert the business dataset to a spatial object
business_sf <- st_as_sf(business, coords = c("long", "lat"), crs = 4326)

# Convert the parking dataset to a spatial object
parking_sf <- st_as_sf(parking, coords = c("lng", "lat"), crs = 4326)

# Perform a spatial join to find the nearest business for each parking space
nearest_business <- st_nearest_feature(parking_sf, business_sf)

# Add the nearest business information to the parking dataset
parking_data_with_nearest_business <- cbind(parking, nearest_business)

#Display data on map
map<-parking_data_with_nearest_business %>% 
st_as_sf(
    coords=c("lng","lat"),
    crs=4326
  )

map %>% mapview()

# (To be replaced with traffic volume data)
parking_data_with_nearest_business$traffic_volume = sample(2:200,length(parking_data_with_nearest_business), replace=F)  #c(100, 150, 90, 50, 250, 75, 65, 40)

# An empty list to store the results
result_list <- list()

# Iterate through each parking spot and business data
for (i in 1:nrow(parking_data_with_nearest_business)){
  
  # A temporary data frame to store the results for this parking spot
  temp_df <- data.frame(
    address = character(),
    lat = double(),
    lng = double(),
    distance = double(),
    capacity = integer(),
    traffic_volume = integer(),
    n_business = integer(),
    n_customers =integer(),
    time_spent = double(),
    category = character(),
    interaction_term= integer(),
    
    stringsAsFactors = FALSE
  )
  
  for( j in 1:nrow(business)){
    lon1 <- parking_data_with_nearest_business[i,"lng"]
    lat1 <- parking_data_with_nearest_business[i,"lat"]
    lon2 <- business[j, "long"]
    lat2 <- business[j, "lat"]
    
    # Function to calculate angular distance between two points on Earth
    haversine_distance <- function(lon1, lat1, lon2, lat2) {
      R <- 6371 # Earth radius in km
      dlat <- (lat2 - lat1) * pi / 180
      dlon <- (lon2 - lon1) * pi / 180
      a <- sin(dlat/2)^2 + cos(lat1 * pi / 180) * cos(lat2 * pi / 180) * sin(dlon/2)^2
      c <- 2 * atan2(sqrt(a), sqrt(1 - a))
      distance <- R * c
      return(distance) # Distance in km
    }
    
    distance <- haversine_distance(lon1, lat1, lon2, lat2)

    # Store the results in the temporary data frame
    temp_df[nrow(temp_df) + 1, ] <- list(
      address = parking_data_with_nearest_business[i,"address"],
      lat = parking_data_with_nearest_business[i,"lat"],
      lng = parking_data_with_nearest_business[i,"lng"],
      distance = distance,
      capacity = parking_data_with_nearest_business[i,"capacity"],
      traffic_volume = parking_data_with_nearest_business[i,"traffic_volume"],
      n_business = parking_data_with_nearest_business[i,"nearest_business"],
      n_customers = business[j,"qCustomer"],
      time_spent = business[j,"tCustomer"],
      category = business[j,"Category"],
      interaction_term = business[j,"qCustomer"] * business[j,"tCustomer"]
    )
  }
  
  # Append the temporary data frame to the result list
  result_list[[i]] <- temp_df
}

# Combine all the results into a single data frame
result_df <- do.call(rbind, result_list)
#result_df

# Filter data to have rows that have distance less than 50 meters between parking spot and business
filtered_result_df <- result_df %>%
  filter(distance <= 0.05)


# Define a scoring function
calculate_score <- function(traffic_volume, capacity, nearest_businesses, customers, c_time, distance) {
  # Define weights for each factor
  weights <- c(0.2, 0.2, 0.2, 0.2, 0.2, 0.1)
  
  # Normalize each factor
  normalized_traffic <- (traffic_volume - min(traffic_volume)) / (max(traffic_volume) - min(traffic_volume))
  normalized_capacity <- (capacity - min(capacity)) / (max(capacity) - min(capacity))
  normalized_n_businesses <- (nearest_businesses - min(nearest_businesses)) / (max(nearest_businesses) - min(nearest_businesses))
  normalized_customers <-(customers - min(customers)) / (max(customers) - min(customers))
  normalized_distance <- (distance - min(distance)) / (max(distance) - min(distance))
  normalized_time <- (c_time - min(c_time)) / (max(c_time) - min(c_time))

  # Calculate the score
  score <- weights[1] * normalized_traffic +
    weights[2] * normalized_capacity +
    weights[3] * normalized_distance +
    weights[4] * normalized_n_businesses + 
    weights[5] *(normalized_customers * normalized_time)

  return(abs(score))
}

# Calculate score for each parking spot
filtered_result_df$score <- calculate_score(filtered_result_df$traffic_volume, filtered_result_df$capacity, filtered_result_df$n_busines, filtered_result_df$n_customers, filtered_result_df$time_spent,filtered_result_df$distance)

# Rank the parking spots based on the score
ranked_data <- filtered_result_df[order(-filtered_result_df$score),]


# Find highest scored parking spot
highest_score_index <- which.max(ranked_data$score)
highest_score_parking_spot <- filtered_result_df[highest_score_index, ]

# Print the result
print(paste0("Address of parking spot: ",highest_score_parking_spot$address))
print(paste0("Latitude of parking spot: ",highest_score_parking_spot$lat))
print(paste0("Longitude of parking spot: ",highest_score_parking_spot$lng))



[1] "Address of parking spot: 365 Lippincott Street"
[1] "Latitude of parking spot: 43.665054"
[1] "Longitude of parking spot: -79.409662"


In [43]:
filtered_result_df

address,lat,lng,distance,capacity,traffic_volume,n_business,n_customers,time_spent,category,interaction_term,score
<chr>,<dbl>,<dbl>,<dbl>,<int>,<int>,<int>,<int>,<dbl>,<chr>,<dbl>,<dbl>
365 Lippincott Street,43.66505,-79.40966,0.03082028,144,160,267,32,0.47953392,RETAIL STORE (FOOD),15.345086,0.6188305
365 Lippincott Street,43.66505,-79.40966,0.0319679,144,160,267,29,0.45823541,RETAIL STORE (FOOD),13.288827,0.6107672
365 Lippincott Street,43.66505,-79.40966,0.02894485,144,160,267,24,1.3096895,RETAIL STORE (FOOD),31.432548,0.5536481
557 Palmerston Avenue,43.66504,-79.41358,0.02930292,55,120,88,24,0.55180162,RETAIL STORE (FOOD),13.243239,0.1890022
675 Manning Avenue,43.66465,-79.41601,0.04955736,49,49,321,35,0.08341729,RETAIL STORE (FOOD),2.919605,0.448
675 Manning Avenue,43.66465,-79.41601,0.04653679,49,49,321,33,0.33700847,RETAIL STORE (FOOD),11.12128,0.4525317
292 Brunswick Avenue,43.66537,-79.40764,0.04700399,19,150,143,29,0.64970145,RETAIL STORE (FOOD),18.841342,0.4463985


In [44]:
filtered_result_df =filtered_result_df %>% mutate(interaction_term = n_customers * time_spent)

# Filter test data for distances less than 50
test_data_filtered <- data %>% filter(distance < 50)

model <- lm(capacity ~ traffic_volume  + distance + n_customers + time_spent + interaction_term, data = filtered_result_df)

summary(model)





Call:
lm(formula = capacity ~ traffic_volume + distance + n_customers + 
    time_spent + interaction_term, data = filtered_result_df)

Residuals:
      1       2       3       4       5       6       7 
-17.226  27.522   2.307 -10.876  -4.539   9.017  -6.206 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)
(Intercept)       -232.6392   280.0263  -0.831    0.559
traffic_volume       0.3149     0.6158   0.511    0.699
distance         -6296.0360  2717.8341  -2.317    0.259
n_customers         16.7011     8.2198   2.032    0.291
time_spent         302.4683   385.5076   0.785    0.576
interaction_term    -9.2512    16.0736  -0.576    0.668

Residual standard error: 36.31 on 1 degrees of freedom
Multiple R-squared:  0.9279,	Adjusted R-squared:  0.5673 
F-statistic: 2.573 on 5 and 1 DF,  p-value: 0.4396


In [61]:
# Iterate for different EV adoption rates
x_percent_values <- seq(from = 0.1, to = 1, by = 0.4)  
all_results <- data.frame()

for (x_percent in x_percent_values) {
  # Create new traffic scenario
  new_traffic_scenario = result_df$traffic_volume * x_percent
  # Make predictions
  predicted_capacity <- predict(model, newdata = data.frame(traffic_volume = new_traffic_scenario,
                                                             distance = result_df$distance,
                                                             n_customers = result_df$n_customers,
                                                             time_spent = result_df$time_spent,
                                                             interaction_term = result_df$interaction_term))
  # Determine conversion needs
  conversion_needed <- ifelse(predicted_capacity > 1, ceiling(predicted_capacity), 0)

  # Store results for this scenario 
  results <- data.frame(x_percent, conversion_needed)
  all_results <- rbind(all_results, results)
}

# Sort combined results
all_results <- arrange(all_results, x_percent)

# Create table
library(kableExtra)  
table <- kable(all_results, caption = "Conversion Needs by EV Adoption Rate (Distance < 50)")





Table: Conversion Needs by EV Adoption Rate (Distance < 50)

|      | x_percent| conversion_needed|
|:-----|---------:|-----------------:|
|1     |       0.1|                 0|
|2     |       0.1|                 0|
|3     |       0.1|                 0|
|4     |       0.1|                 0|
|5     |       0.1|                 0|
|6     |       0.1|                 0|
|7     |       0.1|                 0|
|8     |       0.1|                 0|
|9     |       0.1|                 0|
|10    |       0.1|                 0|
|11    |       0.1|                 0|
|12    |       0.1|                 0|
|13    |       0.1|                 0|
|14    |       0.1|                 0|
|15    |       0.1|                 0|
|16    |       0.1|                 0|
|17    |       0.1|                 0|
|18    |       0.1|                 0|
|19    |       0.1|                 0|
|20    |       0.1|                 0|
|21    |       0.1|                 0|
|22    |       0.1|                 0|
|

In [None]:
# Result and Discussion
