In [1]:
import pandas as pd
from haversine import haversine, Unit
import matplotlib.pyplot as plt
import folium
import json
import numpy as np
from shapely.geometry import Point, MultiPoint
from shapely.ops import nearest_points
from shapely.geometry import Point, shape
from shapely.geometry.polygon import Polygon

# Load the data

In [2]:
# bus stops in Kingston
bus_stops = pd.read_csv("./bus-stops.csv")

# municipal voting locations
voting = pd.read_csv("./municipal-voting-locations.csv")

# census income data
census = pd.read_csv("./neighbourhood-census-profiles-income-occupation-education.csv")

# Perform Preprocessing

## Bus Stops

In [3]:
# for the bus stops, all that is needed is the coordinate locations and the IDs (to help with debugging), the rest can be dropped
bus_stops = bus_stops.drop(columns=["Code", "Name", "Description", "Zone ID", "URL", "Location Type", "Parent Station ID", "Timezone", "Wheelchair Boarding"])

In [4]:
# coordinates should be lists of floats or dictionary, not strings
print(bus_stops["Coordinates"][0])

44.25722, -76.57395


In [5]:
# now we can properly check for null values and see that there are none!
bus_stops.isnull().any()

ID             False
Coordinates    False
dtype: bool

In [6]:
# finally for bus stops, since each coordinate is a string, let's fix that
bus_stops["Coordinates"] = bus_stops["Coordinates"].apply(lambda bus_stop_coords: [float(coord) for coord in bus_stop_coords.split(",")])

## Census

In [7]:
# since each coordinate is a string and our GeoJSON data is a string, we will conver them to a list of floats and a dictionary respectively
census["geo_point_2d"] = census["geo_point_2d"].apply(lambda parking_coords: [float(coord) for coord in parking_coords.split(",")])
census["GeoJSON"] = census["GeoJSON"].apply(lambda parking_coords: json.loads(parking_coords))

## Voting

In [8]:
voting.isnull().any()

Election Year                False
Electoral District Name      False
Electoral District Number    False
Voting Location Name         False
Voting Location Address      False
Voting Location Type         False
Voting Location Opens        False
Voting Location Number        True
Voting Location Closes       False
GeoJSON                      False
geo_point_2d                 False
dtype: bool

In [9]:
# since each coordinate is a string and our GeoJSON data is a string, we will conver them to a list of floats and a dictionary respectively
voting["geo_point_2d"] = voting["geo_point_2d"].apply(lambda parking_coords: [float(coord) for coord in parking_coords.split(",")])
voting["GeoJSON"] = voting["GeoJSON"].apply(lambda parking_coords: json.loads(parking_coords))

In [10]:
# compute how many bus stops are within 250m of a parking lot
counts = []
for index, voting_row in voting.iterrows():
    count = 0
    for index_2, bus_stop in bus_stops.iterrows():
        dist = haversine(voting_row["geo_point_2d"], bus_stop["Coordinates"], unit=Unit.METERS)
        if dist < 250:
            count += 1    
    
    counts.append(count)

In [11]:
# update the voting dataframe to include the number of bus stops nearby
voting["stops_nearby"] = counts

In [12]:
# map the parking locations with red if there's no bus stops nearby and green otherwise
m = folium.Map(location=[44.255457210079605, -76.57542256523625	], zoom_start=13, tiles="OpenStreetMap")

for index, row in voting.iterrows():
    if row["stops_nearby"] == 0:
        color =  "red" 
    else:
        color = "green"
    folium.Marker(location=row['geo_point_2d'], icon=folium.Icon(color=color),).add_to(m)

In [13]:
m