In [None]:
import math
import os
import sys
import logging

import numpy as np
import pandas as pd
import geopandas as gpd
import torch
import torch.nn as nn
import networkx as nx

from torch.utils.data import DataLoader, Dataset
from node2vec import Node2Vec
from shapely.geometry import Point, MultiPolygon
from shapely.wkt import loads
from pandarallel import pandarallel

In [None]:
logger = logging.getLogger(__name__)
logging.basicConfig(stream=sys.stdout, level=logging.INFO)

pandarallel.initialize(nb_workers=8, progress_bar=True)

**Load Dataset Features**

In [None]:
try:
  from google.colab import drive
  logger.info("Running on Google Colab, reading dataset from drive")
  drive.mount("/content/drive")
  DATASET_PATH = "/content/drive/MyDrive/ECE2500/EdmontonFireRescueServicesData"
except:
  logger.info("Running locally, reading dataset from local file system")
  DATASET_PATH = "./dataset/EdmontonFireRescueServicesData"
  if not os.path.exists(DATASET_PATH):
    logger.critical(f"Cannot find dataset directory, place dataset in {DATASET_PATH}")
    exit(1)

UNIT_TRIP_PATH = os.path.join(DATASET_PATH, "EFRS_Unit_Trip_Summary.csv")
EVENT_TRIP_PATH = os.path.join(DATASET_PATH, "EFRS_Event_Trip_Summary.csv")
UNIT_HISTORY_2023_PATH = os.path.join(DATASET_PATH, "UN_HI_2023.csv")
NEIGHBOURHOOD_PATH = os.path.join(DATASET_PATH, "City_of_Edmonton_-_Neighbourhoods_20241022.csv")
FIRE_STATION_PATH = os.path.join(DATASET_PATH, "Fire_Stations_20241027.csv")
NEIGHBOURHOOD_FEATURES_PATH = os.path.join(DATASET_PATH, "neighbourhood_static_data_with_five_years_events.csv")

logger.debug(f"Unit Trip: {UNIT_TRIP_PATH}")
logger.debug(f"Event Trip: {EVENT_TRIP_PATH}")
logger.debug(f"Unit History 2023: {UNIT_HISTORY_2023_PATH}")
logger.debug(f"Neighbourhood: {NEIGHBOURHOOD_PATH}")
logger.debug(f"Fire Stations: {FIRE_STATION_PATH}")
logger.debug(f"Neighbourhood Features: {NEIGHBOURHOOD_FEATURES_PATH}")

unit_trip_df = pd.read_csv(UNIT_TRIP_PATH)
event_trip_df = pd.read_csv(EVENT_TRIP_PATH)
unit_history_2023_df = pd.read_csv(UNIT_HISTORY_2023_PATH)
neighbourhood_df = pd.read_csv(NEIGHBOURHOOD_PATH)
station_df = pd.read_csv(FIRE_STATION_PATH)
neighbourhood_feature_df = pd.read_csv(NEIGHBOURHOOD_FEATURES_PATH)

**Data and Embedding**

In [None]:
def find_neighbourhood(latitude, longitude, neighbourhood_info_df):
  """Finds the Neighbourhood Number for a given latitude and longitude.

  Args:
    latitude: The latitude.
    longitude: The longitude.
    neighbourhood_info_df: The DataFrame containing neighbourhood data.

  Returns:
    The Neighbourhood Number, or None if not found.
  """
  point = Point(longitude, latitude)
  for _, row in neighbourhood_info_df.iterrows():
    multipolygon = row['MultiPolygon_obj']
    if multipolygon.contains(point):
      return row['Neighbourhood Number']
  return None

**Feature Extraction**

In [None]:
build_type_list = [
    'Apartment_Condo_1_to_4_stories', 'Apartment_Condo_5_or_more_stories',
    'Duplex_Fourplex', 'Hotel_Motel',
    'Institution_Collective_Residence', 'Manufactured_Mobile_Home',
    'RV_Tent_Other', 'Row_House',
    'Single_Detached_House'
    ]
event_type_list = event_trip_df['Rc_description'].unique()
unit_type_list = unit_trip_df['unityp'].unique()

num_neighborhoods = len(neighbourhood_df)
num_building_types = len(build_type_list)
num_event_types = len(event_type_list)
num_equipment_types = len(unit_type_list)

In [None]:
# Note this step takes ~80 mins to complete on 1 CPU
# Alternatively, assign 0 to all rows to test with dummy data
neighbourhood_df['MultiPolygon_obj'] = neighbourhood_df['Geometry Multipolygon'].parallel_apply(loads)
event_trip_df['Neighbourhood Number'] = event_trip_df.parallel_apply(
    lambda row: find_neighbourhood(row['Latitude'], row['Longitude'], neighbourhood_df), axis=1)

event_trip_df["Sd_date_dt"] = pd.to_datetime(event_trip_df["Sd_date"])
event_trip_df["date"] = event_trip_df["Sd_date_dt"].dt.date
event_trip_df["day_of_year"] = event_trip_df["Sd_date_dt"].dt.dayofyear
event_trip_df["day_of_week"] = event_trip_df["Sd_date_dt"].dt.dayofweek
event_trip_df["hour_of_day"] = event_trip_df["Sd_date_dt"].dt.hour
event_trip_df["week_of_year"] = event_trip_df["Sd_date_dt"].dt.strftime("%V") # week number
event_trip_df["year"] = event_trip_df["Sd_date_dt"].dt.year


In [None]:
# Neighbourhood Feature Cleaning

neighbourhood_feature_full_df = pd.merge(neighbourhood_feature_df, neighbourhood_df, left_on='Neighbourhood_Number', right_on='Neighbourhood Number', how='inner')
# Note: some neighbourhood is missing in neighbourhood_feature_df
# len(neighbourhood_feature_df) # 377
# len(neighbourhood_df) # 403
# set(neighbourhood_df['Neighbourhood Number'].unique()) - set(neighbourhood_feature_df['Neighbourhood_Number'].unique())
# len(neighbourhood_feature_full_df) # 403

# Neighborhood
# neighborhood_mapping = neighbourhood_feature_full_df[['Neighbourhood Number', 'Neighbourhood Name']].drop_duplicates() # mapping between index to neighborhood

# Building features
# building_type_mapping = pd.DataFrame(build_type_list)
# building_counts_np = neighbourhood_feature_full_df[build_type_list]
# building_counts_np = building_counts_np.astype(int)
# building_counts_np = building_counts_np.to_numpy()

# Demographic features
neighbourhood_feature_full_df['Population'] = neighbourhood_feature_full_df['Area_Sq_Km'] * neighbourhood_feature_full_df['Population_per_Sq_km']
# population_np = neighbourhood_feature_full_df['Population'] 
# population_np = population_np.astype(int)
# population_np = population_np.to_numpy()

In [None]:
neighbourhood_selected_columns = [
    'Neighbourhood_Number', 'Neighbourhood_Name', 'Ward', 'Population',
    # buidling type
    'Apartment_Condo_1_to_4_stories', 'Apartment_Condo_5_or_more_stories', 'Duplex_Fourplex',
    'Hotel_Motel', 'Institution_Collective_Residence', 'Manufactured_Mobile_Home',
    'RV_Tent_Other', 'Row_House', 'Single_Detached_House',
    # duration
    'five_Years_or_More', 'three_Years_to_Less_than_five_Years',
    'one_Year_to_Less_than_three_Years', 'Less_than_one_Year',
    # income
    'Low_Income', 'Low_medium_Income', 'Medium_Income', 'High_Income',
    # education
    'No_Certificate_Diploma_or_Degree', 'High_School_Trades_or_Apprenticeship_Certificate',
    'College_or_University_Certificate_or_Diploma',
    'University_Bachelor_and_Medical_Degree', 'Master_and_Doctorate_Degree',
    # age
    'Children', 'Youth', 'Adults', 'Seniors',
    # zoning
    'No_traffic_lights', 'No_bus_stops',
    'Food', 'Education', 'Healthcare', 'Entertainment',
    'Public_Service', 'commercial', 'retail',
    # event type
    'ALARMS', 'CITIZEN_ASSIST', 'COMMUNITY_EVENT', 'FIRE',
    'HAZARDOUS_MATERIALS', 'MEDICAL', 'MOTOR_VEHICLE_INCIDENT',
    'OUTSIDE_FIRE', 'RESCUE', 'TRAINING_MAINTENANCE', 'VEHICLE_FIRE'
]

event_selected_columns = [
    'Eid', 'Rc', 'Rc_description',
    # time
    'day_of_year', 'day_of_week', 'hour_of_day', 'week_of_year', 'year',
    # neighbourhood
    'Neighbourhood Number'
]

neighbourhood_df_slim = neighbourhood_feature_full_df[neighbourhood_selected_columns]
neighbourhood_df_slim = neighbourhood_df_slim.apply(pd.to_numeric, errors='ignore')

event_trip_df_slim = event_trip_df[event_selected_columns]
event_trip_df_slim = event_trip_df_slim.apply(pd.to_numeric, errors='ignore')

In [40]:
weekly_event_df = event_trip_df_slim.groupby(['year', 'week_of_year', 'Neighbourhood Number', 'Rc_description']).size().reset_index(name='event_count')

In [41]:
neighbourhood_df_save = neighbourhood_df_slim.replace(to_replace=r'\n', value=' ', regex=True)
neighbourhood_df_save.to_csv(os.path.join(DATASET_PATH, "neighbourhood_features.csv"))

event_df_save = event_trip_df_slim.replace(to_replace=r'\n', value=' ', regex=True)
event_df_save.to_csv(os.path.join(DATASET_PATH, "event_trip_features.csv"))

weekly_event_df_save = weekly_event_df.replace(to_replace=r'\n', value=' ', regex=True)
weekly_event_df_save.to_csv(os.path.join(DATASET_PATH, "weekly_events.csv"))