## Bikespace Analysis - Damaged Bicycle Parking Reports

This notebook takes user-submitted reports of damaged bicycle parking from the BikeSpace app and returns the nearest 5 or fewer City of Toronto bicycle parking features based on geographic proximity. The goal is to identify City bicycle parking that may need to be replaced or repaired.


In [1]:
# imports
from datetime import datetime
from zoneinfo import ZoneInfo
from pathlib import Path
import json

import contextily as cx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
import pytz
import requests


In [2]:
# OPTIONS
search_radius = 30  # area to search, in metres
thumbnail_folder_path = "thumbnails"
output_excel_path = "damage_bikespace_city_matches.xlsx"

### Get Data - Bikespace Reports

The [BikeSpace app](https://bikespace.ca/) allows users to report issues with bicycle parking in Toronto, including parking features that are damaged. User reports can be viewed on the [BikeSpace dashboard](https://dashboard.bikespace.ca/) or downloaded via the API.

Details on the bikespace API can be found at [api-dev.bikespace.ca](https://api-dev.bikespace.ca/api/v2/docs).


In [3]:
# get bikespace reports
report_limit = 5000
bikespace_request = requests.get(
  "https://api-dev.bikespace.ca/api/v2/submissions",
  params={"limit": report_limit})
bikespace_response = json.loads(bikespace_request.text)
bikespace_reports_data = pd.DataFrame(
  bikespace_response['submissions']
  ).set_index('id')

In [4]:
# convert to geodataframe
bikespace_reports = gpd.GeoDataFrame(bikespace_reports_data, 
  geometry=gpd.points_from_xy(
    bikespace_reports_data['longitude'], 
    bikespace_reports_data['latitude'],
    ),
  crs="EPSG:4326"
  )

# show a quick summary of issue types
bikespace_reports['issues'].explode().value_counts()

issues
not_provided    513
damaged         400
full            359
other           158
abandoned        16
Name: count, dtype: int64

In [5]:
# improve data display for "parking_time" field
bikespace_reports = bikespace_reports.assign(
  parking_dt = lambda r: [
    (
      datetime.strptime(dt_string, "%a, %d %b %Y %H:%M:%S %Z")
      .replace(tzinfo=ZoneInfo("GMT"))
      .astimezone(ZoneInfo("America/Toronto"))
    )
    for dt_string
    in r['parking_time']
  ]
)
bikespace_reports = bikespace_reports.assign(
  report_date = bikespace_reports['parking_dt'].dt.date,
  report_time = bikespace_reports['parking_dt'].dt.time,
)

# sort by date desc
bikespace_reports = bikespace_reports.sort_values(
  by="parking_dt", 
  axis=0, 
  ascending=False,
)

### BikeSpace Reports - Quality Check

Source Google sheet: [BikeSpace Data Notes and Cleanup](https://docs.google.com/spreadsheets/d/137S4d4zLhj49rEWIaaVB67UxMSU5LKMt5kIjvgYsQOU/edit?usp=sharing)

In [6]:
bs_quality_check = pd.read_csv(
  "BikeSpace Data Notes and Cleanup - Data.csv"
  ).set_index("id")
bikespace_reports = bikespace_reports.join(
  bs_quality_check[["Status", "Notes", "Survey Date"]], 
  on="id", 
  how="left",
)

In [7]:
exclude_by_status = [
  status not in [
    "Resolved", 
    "Invalid", 
    "Needs Checking", 
    "Resolution Unclear", 
    "Private Property",
    "Caution",
    ] 
  for status 
  in bikespace_reports['Status']
]
bikespace_reports = (
  bikespace_reports[exclude_by_status]
  .drop(columns=["Status"])
)

In [8]:
# get toronto ward boundaries
# https://open.toronto.ca/dataset/city-wards/
toronto_wards = (
  gpd.read_file("https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/5e7a8234-f805-43ac-820f-03d7c360b588/resource/737b29e0-8329-4260-b6af-21555ab24f28/download/City%20Wards%20Data.geojson")
  .rename(columns={"AREA_DESC": "WARD"})
)

In [9]:
# bikespace reports within Toronto only
br_toronto = bikespace_reports.sjoin(
  toronto_wards[["geometry", "WARD"]], 
  how="inner", 
  predicate="intersects",
  ).drop("index_right", axis=1)

### City Bicycle Parking Data

Source datasets from [open.toronto.ca](https://open.toronto.ca/)

In [10]:
# read source urls and other metadata from open_toronto_ca_sources.json
city_sources_path = Path("open_toronto_ca_sources.json")
with city_sources_path.open("r") as f:
  city_sources = json.load(f)


In [11]:
# get city bicycle parking data
city_data = {}
for source in city_sources['datasets']:
  city_data[source['dataset_name']] = gpd.read_file(source['download_url'])
  city_data[source['dataset_name']].insert(0, "source", source['dataset_name'])

city_data_all = pd.concat(city_data.values())


### Damage reports and closest parking feature

In [12]:
br_toronto_damaged = br_toronto[["damaged" in i for i in br_toronto['issues']]]

In [13]:
# convert crs to allow for distance calculations in metres
br_toronto_damaged_utm17n = br_toronto_damaged.to_crs("32617")
city_data_all_utm17n = city_data_all.to_crs("32617")


In [14]:
br_toronto_damaged_utm17n = br_toronto_damaged_utm17n.assign(
  geometry_buffered = br_toronto_damaged_utm17n.buffer(search_radius)
)

data_matches = city_data_all_utm17n.sjoin(
  df=(
    br_toronto_damaged_utm17n[["geometry_buffered"]]
    .set_geometry("geometry_buffered")
  ),
  how='inner',
  predicate='intersects',
)

In [15]:
report_matches = gpd.GeoDataFrame(
  [
    br_toronto_damaged_utm17n.loc[i] 
    for i 
    in data_matches['index_right']
  ],
  crs="32617", # UTM 17N
)

distances = data_matches['geometry'].distance(report_matches, align=False)
data_matches = data_matches.assign(distance=distances)
# reorder columns
left_columns = [
  'distance', 
  'index_right', 
  'source', 
  'ID', 
  'OBJECTID', 
  'ADDRESSNUMBERTEXT', 
  'ADDRESSSTREET', 
  'FRONTINGSTREET', 
  'SIDE', 
  'FROMSTREET', 
  'DIRECTION', 
  'SITEID', 
  'WARD', 
  'BIA', 
  'ASSETTYPE', 
  'STATUS', 
  'SDE_STATE_ID',
]
data_matches = data_matches[
  left_columns + [
    col for col in data_matches.columns 
    if col not in left_columns
  ]
]
data_matches = (
  data_matches
  .to_crs(4326) # WGS 84
  .explode(index_parts=False) # convert multipoint to point
  .assign(
    latitude=lambda r: [y for y in r.geometry.y],
    longitude=lambda r: [x for x in r.geometry.x]
  )
  .drop(columns=["_id"])
  .rename(columns={"index_right": "bikespace_id"})
)

In [16]:
report_matches_unique = (
  report_matches[~report_matches.index.duplicated(keep='first')]
  .drop(columns=["geometry_buffered"])
  .sort_values(by="parking_dt", axis=0, ascending=False,)
  .assign(url=lambda x: [
    f"https://dashboard.bikespace.ca/#feed?view_all=1&submission_id={id}" 
    for id 
    in x.index
    ])
  .to_crs(4326) # WGS 84
)

In [17]:
# organize BikeSpace damage reports alongside top 5 nearest city parking features
report_city_matches = []

for ix in report_matches_unique.index:
  report_city_matches.append({
    "report": report_matches_unique.loc[[ix]],
    "city_features": data_matches[
      data_matches['bikespace_id'] == ix
      ].nsmallest(n=5, columns="distance"),
  })



### Generate Thumbnail Maps

In [18]:
def save_thumbnail(entry, folder):
  """Saves a thumbnail using geodataframe.plot() if it doesn't already exist
  
  returns the relative posix path to the saved thumbnail
  """
  with plt.ioff(): # turn off matplotlib output
    # get geodataframes and convert to Web mercator (unit = metres)
    gdf_bs = entry["report"].to_crs(epsg=3857)
    gdf_city = entry["city_features"].to_crs(epsg=3857)

    bs_id = gdf_bs.squeeze().name
    thumbnail_path = Path(folder) / f"{bs_id}.jpg"
    if not thumbnail_path.exists():
      # generate plots
      ax_city = gdf_city.plot(
        figsize=(8, 8), 
        markersize=100, 
        edgecolor="white", 
        linewidth=2,
      )
      pad = 75 # additional metres of map to show around bounds of features
      _x = gdf_bs.squeeze().geometry.x
      _y = gdf_bs.squeeze().geometry.y
      ax_city.set_xlim(_x-pad, _x+pad)
      ax_city.set_ylim(_y-pad, _y+pad)
      ax_bs = gdf_bs.plot(
        figsize=(8, 8), 
        ax=ax_city, 
        marker="^", # triangle
        markersize=200,
        edgecolor="white", 
        linewidth=2,
      )

      # add basemap and remove axis labels
      cx.add_basemap(ax_bs, crs=gdf_bs.crs, zoom=19, 
        source=cx.providers.OpenStreetMap.Mapnik
      )
      ax_bs.set_axis_off() # remove x and y axes from plot

      # save to file
      Path(folder).mkdir(exist_ok=True) # make folder if it doesn't exist
      ax_bs.figure.savefig(thumbnail_path, bbox_inches="tight")
      plt.close()
    
    return (Path(folder) / f"{bs_id}.jpg").as_posix()


In [19]:
for entry in report_city_matches:
  entry["thumbnail"] = save_thumbnail(entry, thumbnail_folder_path)

In [20]:
matched_city_features_unique = pd.concat(
  [df["city_features"] for df in report_city_matches]
)

### Output to Excel

In [21]:
# set up output excel sheet
writer = pd.ExcelWriter(
  output_excel_path, 
  engine='xlsxwriter',
)
workbook = writer.book
bold = workbook.add_format({'bold': True, 'text_wrap': False})
text_wrap = workbook.add_format({'text_wrap': True})
no_text_wrap = workbook.add_format({'text_wrap': False})



In [22]:
# TAB 0 - INSTRUCTIONS PAGE
worksheet = workbook.add_worksheet('Notes')
writer.sheets['Matches'] = worksheet

content = [
  ("Bikespace Analysis - Damaged Bicycle Parking Reports", bold),
  (f"Updated {datetime.today().strftime('%B %d %Y')}", no_text_wrap),
  (f"{len(report_city_matches)} BikeSpace damage reports with nearby City bicycle parking features", no_text_wrap),
  (""),
  ("NOTES", bold),
  (" • Coordinate reference system for lat/long values is WGS84 (EPSG:4326)"),
  (" • For any questions about this sheet, please contact bikespaceto@gmail.com"),
  (""),
  ("TABS", bold),
  (" • Matches: display of damaged bicycle parking reports alongside nearby City of Toronto parking features"),
  (f"    City features include top 5 nearest bicycle parking features within a {search_radius}m radius"),
  ("    Damage report listed first, then data from applicable City features"),
  ("    Thumbnail maps: orange triangle is location of damage report"),
  ("    blue dots are locations of nearest 5 City bicycle parking features"),
  (" • DamageReports: data table for damaged bicycle parking reports"),
  (" • CityFeatures: data table for City of Toronto parking features matched with damage reports"),
  ("    (Note that the same features may be listed more than once)"),
  (""),
  ("SOURCES", bold),
  ("User reports of damaged bicycle parking are from the BikeSpace app (bikespace.ca)"),
  ("City of Toronto bicycle parking features are from the following datasets:"),
  *[(" • " + source['dataset_title']) for source in city_sources['datasets']],
]

for i, line in enumerate(content):
  try:
    (text, format) = line
  except:
    (text) = line
    format = None
  worksheet.write(i, 0, text, format)

worksheet.fit_to_pages(1, 0)

In [23]:
# TAB 1 - DISPLAY OF REPORTS AND MATCHES
worksheet = workbook.add_worksheet('Matches')
writer.sheets['Matches'] = worksheet
pagebreaks = []
images = []

# set column widths to 18 and format to word wrap
worksheet.set_column("A:F", 18, text_wrap) 

# write header content
worksheet.write('A1', *content[0])
worksheet.write('A2', *content[1])
worksheet.write('A3', *content[2])

# write data tables
write_row = 4
for pair in report_city_matches:
  report, city_features, thumbnail = pair.values()
  worksheet.insert_image(write_row, 0, thumbnail, 
    {"x_scale": 0.5, "y_scale": 0.5, "object_position": 2}
  )
  write_row += 15 + 1  # 15 is approx size of image, 1 is blank row
  report = (
    report
    .reset_index(names=["id"])
    .drop(columns=["geometry", "parking_time", "parking_dt"])
    .T
    .dropna(how="all")
  )
  city_features = (
    city_features
    .drop(columns=["bikespace_id", "geometry"])
    .T
    .replace(0, np.nan)
    .replace("", np.nan)
    .dropna(how="all")
  )
  report.to_excel(
    writer, 
    sheet_name='Matches', 
    startrow=write_row, 
    startcol=0,
    header=False,
  )
  write_row += len(report) + 1
  city_features.to_excel(
    writer, 
    sheet_name='Matches', 
    startrow=write_row, 
    startcol=0,
    header=False,
  )
  write_row += len(city_features) + 2
  pagebreaks.append(write_row)
  

# print formatting
worksheet.set_h_pagebreaks(pagebreaks[0:-1])
worksheet.print_area(0, 0, write_row, 5)
worksheet.fit_to_pages(1, 0)  # fit to one column

In [24]:
# TAB 2 - BIKESPACE REPORTS
worksheet = workbook.add_worksheet('DamageReports')
writer.sheets['DamageReports'] = worksheet

# write header content
worksheet.write('A1', "Damaged Bicycle Parking Reports", bold)
damage_reports = (
  report_matches_unique
  .drop(columns=["geometry", "parking_time", "parking_dt"])
  .reset_index()
  )
damage_reports.to_excel(
    writer, 
    sheet_name='DamageReports', 
    startrow=2, 
    startcol=0,
    header=True,
    index=False,
)

# formatting
(max_row, max_col) = damage_reports.shape
column_settings = [{"header": column} for column in damage_reports.columns]
worksheet.add_table(2, 0, max_row + 2, max_col - 1, {
  "name": "T_DamageReports",
  "columns": column_settings,
  "style": "Table Style Light 8",
})

worksheet.set_column(0, max_col, 15, text_wrap)
worksheet.set_column("B:B", 40, text_wrap)
worksheet.set_column("H:I", 20, text_wrap)


0

In [25]:
# TAB 3 - MATCHED CITY BICYCLE PARKING FEATURES
worksheet = workbook.add_worksheet('CityFeatures')
writer.sheets['CityFeatures'] = worksheet
# write header content
worksheet.write('A1', "Matched City Bicycle Parking Features", bold)
city_features_output = (
  matched_city_features_unique
  .drop(columns=["geometry"])
  .replace(0, np.nan)
  .replace("", np.nan)
  .dropna(axis=1, how="all")
)
city_features_output.to_excel(
    writer, 
    sheet_name='CityFeatures', 
    startrow=2, 
    startcol=0,
    header=True,
    index=False,
)

# formatting
(max_row, max_col) = city_features_output.shape
column_settings = [
  {"header": column} 
  for column 
  in city_features_output.columns
]
worksheet.add_table(2, 0, max_row + 2, max_col - 1, {
  "name": "T_CityFeatures",
  "columns": column_settings,
  "style": "Table Style Light 8",
})

worksheet.set_column(0, max_col, 15, text_wrap)

0

In [26]:
workbook.close()