In [None]:
!pip install networkx node2vec
!pip install numpy pandas scipy
!pip install shapely geopandas contextily
!pip install torch seaborn matplotlib

In [None]:
import math
import os
import sys
import logging

import numpy as np
import pandas as pd
import geopandas as gpd
import torch
import torch.nn as nn
import networkx as nx

from torch.utils.data import DataLoader, Dataset
from node2vec import Node2Vec

In [None]:
logger = logging.getLogger(__name__)
logging.basicConfig(stream=sys.stdout, level=logging.INFO)

**Load Dataset Features**

In [None]:
try:
  from google.colab import drive
  logger.info("Running on Google Colab, reading dataset from drive")
  drive.mount("/content/drive")
  DATASET_PATH = "/content/drive/MyDrive/ECE2500/EdmontonFireRescueServicesData"
except:
  logger.info("Running locally, reading dataset from local file system")
  DATASET_PATH = "./dataset/EdmontonFireRescueServicesData"
  if not os.path.exists(DATASET_PATH):
    logger.critical(f"Cannot find dataset directory, place dataset in {DATASET_PATH}")
    exit(1)

UNIT_TRIP_PATH = os.path.join(DATASET_PATH, "EFRS_Unit_Trip_Summary.csv")
EVENT_TRIP_PATH = os.path.join(DATASET_PATH, "EFRS_Event_Trip_Summary.csv")
UNIT_HISTORY_2023_PATH = os.path.join(DATASET_PATH, "UN_HI_2023.csv")
NEIGHBOURHOOD_PATH = os.path.join(DATASET_PATH, "City_of_Edmonton_-_Neighbourhoods_20241022.csv")
FIRE_STATION_PATH = os.path.join(DATASET_PATH, "Fire_Stations_20241027.csv")
NEIGHBOURHOOD_FEATURES_PATH = os.path.join(DATASET_PATH, "neighbourhood_static_data_with_five_years_events.csv")

logger.debug(f"Unit Trip: {UNIT_TRIP_PATH}")
logger.debug(f"Event Trip: {EVENT_TRIP_PATH}")
logger.debug(f"Unit History 2023: {UNIT_HISTORY_2023_PATH}")
logger.debug(f"Neighbourhood: {NEIGHBOURHOOD_PATH}")
logger.debug(f"Fire Stations: {FIRE_STATION_PATH}")
logger.debug(f"Neighbourhood Features: {NEIGHBOURHOOD_FEATURES_PATH}")

unit_trip_df = pd.read_csv(UNIT_TRIP_PATH)
event_trip_df = pd.read_csv(EVENT_TRIP_PATH)
unit_history_2023_df = pd.read_csv(UNIT_HISTORY_2023_PATH)
neighbourhood_df = pd.read_csv(NEIGHBOURHOOD_PATH)
station_df = pd.read_csv(FIRE_STATION_PATH)
neighbourhood_feature_df = pd.read_csv(NEIGHBOURHOOD_FEATURES_PATH)

**Data and Embedding**

In [None]:
def generate_node2vec_embeddings(neighbourhood_df, num_neighborhoods, node2vec_dim=32):
  """
  Generates Node2Vec embeddings for neighborhoods based on a sample adjacency graph.
  num_neighborhoods: Number of neighborhoods (nodes in the graph).
  node2vec_dim: Dimension of the embeddings to be generated.
  """

  G = nx.Graph()

  # Convert "Geometry Multipolygon" column to GeoSeries
  neighbourhood_df['geometry'] = gpd.GeoSeries.from_wkt(neighbourhood_df['Geometry Multipolygon'])
  neighbourhood_df['nid'] = range(num_neighborhoods)

  # Assuming you have a way to define neighborhood connections based on proximity
  # You can use the geometry information for this.
  # Here's a placeholder for how you might connect neighborhoods based on proximity:

  for i in range(num_neighborhoods):
    for j in range(i + 1, num_neighborhoods):
      # Use the new 'geometry' column for spatial operations
      if neighbourhood_df['geometry'].iloc[i].intersects(neighbourhood_df['geometry'].iloc[j]):
        G.add_edge(i, j)

  # Alternatively, you could build a graph based on other criteria like sharing a boundary
  node2vec = Node2Vec(G, dimensions=node2vec_dim, walk_length=10, num_walks=100, p=1, q=1)
  node2vec_model = node2vec.fit()
  node2vec_embeddings_np = np.array([node2vec_model.wv[str(i)] for i in range(num_neighborhoods)])
  node2vec_embeddings = torch.from_numpy(node2vec_embeddings_np)
  node2vec_emb_layer = nn.Embedding.from_pretrained(node2vec_embeddings, freeze=True)

  return node2vec_emb_layer


class Time2Vec(nn.Module):
  """
  Time2Vec embedding module for temporal features

  This captures both linear and periodic components for time-based features.
  """
  def __init__(self, input_dim, embed_dim, act_function=torch.sin):
    super(Time2Vec, self).__init__()
    self.embed_dim = embed_dim // input_dim  # Embedding dimension per time feature
    self.act_function = act_function       # Activation function for periodicity
    self.weight = nn.Parameter(torch.randn(input_dim, self.embed_dim))
    self.bias = nn.Parameter(torch.randn(input_dim, self.embed_dim))

  def forward(self, x):
    # Diagonal embedding for each time feature (day of week, hour, etc.)
    x = torch.diag_embed(x)
    x_affine = torch.matmul(x, self.weight) + self.bias
    x_affine_0, x_affine_remain = torch.split(x_affine, [1, self.embed_dim - 1], dim=-1)
    x_affine_remain = self.act_function(x_affine_remain)
    return torch.cat([x_affine_0, x_affine_remain], dim=-1).view(x.size(0), x.size(1), -1)


class NeighborhoodDataset(Dataset):
    def __init__(self, neighborhood_ids, time_features, building_type_ids, building_counts,
                 population, event_type_ids, equipment_ids, targets):
        self.neighborhood_ids = neighborhood_ids  # Tensor of input neighborhood_ids
        self.time_features = time_features  # Tensor of input time_features
        self.building_type_ids = building_type_ids  # Tensor of input building_type_ids
        self.building_counts = building_counts  # Tensor of input building_counts
        self.population = population  # Tensor of input population
        self.event_type_ids = event_type_ids  # Tensor of input event_type_ids
        self.equipment_ids = equipment_ids  # Tensor of input equipment_ids
        self.targets = targets    # Tensor of target values

    def __len__(self):
        return len(self.neighborhood_ids)  # Number of neighborhoods

    def __getitem__(self, idx):
        return (self.neighborhood_ids[idx], self.time_features[idx], self.building_type_ids[idx], self.building_counts[idx],
                self.population[idx], self.event_type_ids[idx], self.equipment_ids[idx], self.targets[idx])


class CombinedEmbedding(nn.Module):
  """
  Combined Embedding Module

  Combines embeddings from Node2Vec, Time2Vec, building type/counts, population, event type, and equipment.
  Projects the combined embedding to a target dimension (e.g., 64) for compatibility with transformer layers.
  """
  def __init__(self, node2vec_emb_layer, time2vec_embed_dim, time_feature_dim,
          num_building_types, building_type_embed_dim, population_embed_dim,
          num_event_types, event_type_embed_dim, num_equipment_types, equipment_embed_dim,
          target_embed_dim=64):  # Add target_embed_dim for projection
    super(CombinedEmbedding, self).__init__()

    # Embedding initialization code
    self.node2vec_emb_layer = node2vec_emb_layer  # Precomputed Node2Vec embeddings
    self.time2vec = Time2Vec(input_dim=time_feature_dim, embed_dim=time2vec_embed_dim)
    self.building_type_embedding = nn.Embedding(num_building_types, building_type_embed_dim)
    self.population_embedding = nn.Linear(1, population_embed_dim)
    self.event_type_embedding = nn.Embedding(num_event_types, event_type_embed_dim)
    self.equipment_embedding = nn.Embedding(num_equipment_types, equipment_embed_dim)

    # Compute the combined embedding dimension before projection
    self.projection_dim = (node2vec_emb_layer.embedding_dim + time2vec_embed_dim +
                    building_type_embed_dim + population_embed_dim +
                    event_type_embed_dim + equipment_embed_dim)

    # Projection layer to reduce to target_embed_dim
    self.projection_layer = nn.Linear(self.projection_dim, target_embed_dim)

  def forward(self, neighborhood_ids, time_features, building_type_ids, building_counts,
              population, event_type_ids, equipment_ids):
    # Generate and combine embeddings
    temporal_embeddings = self.time2vec(time_features)
    logger.debug(f"Temporal Embedding Shape: {temporal_embeddings.shape}")
    spatial_embeddings = self.node2vec_emb_layer(neighborhood_ids).unsqueeze(1).repeat(1, temporal_embeddings.size(1), 1) # [batch_size, temporal_dimensions, time2vec_embed_dim]
    logger.debug(f"Spatial Embedding Shape: {spatial_embeddings.shape}")
    building_embeddings = (self.building_type_embedding(building_type_ids) * building_counts.unsqueeze(-1)).sum(dim=2)
    logger.debug(f"Building Embedding Shape: {building_embeddings.shape}")
    population_embeddings = self.population_embedding(population.unsqueeze(-1)).unsqueeze(1).expand(-1, spatial_embeddings.size(1), -1)
    logger.debug(f"Population Embedding Shape: {population_embeddings.shape}")
    event_type_embeddings = self.event_type_embedding(event_type_ids)
    logger.debug(f"Event Type Embedding Shape: {event_type_embeddings.shape}")
    equipment_embeddings = self.equipment_embedding(equipment_ids)
    logger.debug(f"Equipment Embedding Shape: {equipment_embeddings.shape}")

    # Concatenate all embeddings into a single combined embedding
    combined_embedding = torch.cat([spatial_embeddings, temporal_embeddings,
                                    building_embeddings, population_embeddings,
                                    event_type_embeddings, equipment_embeddings], dim=-1)

    logger.debug(f"Combined Embedding Shape before Projection: {combined_embedding.shape}")

    # Project to target dimension for compatibility (e.g., 64)
    combined_embedding = self.projection_layer(combined_embedding)

    return combined_embedding


class PositionalEncoding(nn.Module):
  """
  Positional Encoding Module
  """
  def __init__(self, embed_dim, max_len=7):  # 7 days in a week
    super(PositionalEncoding, self).__init__()
    position = torch.arange(0, max_len).unsqueeze(1)
    div_term = torch.exp(torch.arange(0, embed_dim, 2) * -(math.log(10000.0) / embed_dim))
    pe = torch.zeros(max_len, embed_dim)
    pe[:, 0::2] = torch.sin(position * div_term)
    pe[:, 1::2] = torch.cos(position * div_term)
    self.pe = pe.unsqueeze(0)  # Shape: (1, max_len, embed_dim)

  def forward(self, x):
    x = x + self.pe[:, :x.size(1), :].to(x.device)
    return x

**Transformer Model**

In [None]:
# Transformer-based Emergency Event Predictor
class EmergencyEventPredictor(nn.Module):
  def __init__(self, embedding_module, embed_dim, num_heads, num_layers, max_len=7):
    super(EmergencyEventPredictor, self).__init__()

    # Embedding module (CombinedEmbedding) and positional encoding
    self.embedding_module = embedding_module
    self.positional_encoding = PositionalEncoding(embed_dim, max_len)

    # Transformer Encoder
    encoder_layer = nn.TransformerEncoderLayer(
        d_model=embed_dim, nhead=num_heads, dim_feedforward=512, dropout=0.1
    )
    self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

    # Prediction head
    self.fc_out = nn.Linear(embed_dim, 1)  # Output: predicting the number of events

  def forward(self, neighborhood_ids, time_features, building_type_ids, building_counts,
              population, event_type_ids, equipment_ids):

    # Generate combined embeddings from the embedding module
    x = self.embedding_module(neighborhood_ids, time_features, building_type_ids, building_counts,
                              population, event_type_ids, equipment_ids)

    # Apply positional encoding
    x = self.positional_encoding(x)

    # Pass through transformer encoder
    x = self.transformer_encoder(x)

    # Prediction layer (we apply it to each element in the sequence)
    predictions = self.fc_out(x).squeeze(-1)  # Shape: [batch_size, sequence_length]

    return predictions

**Feature Extraction**

In [None]:
build_type_list = [
    'Apartment_Condo_1_to_4_stories', 'Apartment_Condo_5_or_more_stories',
    'Duplex_Fourplex', 'Hotel_Motel',
    'Institution_Collective_Residence', 'Manufactured_Mobile_Home',
    'RV_Tent_Other', 'Row_House',
    'Single_Detached_House'
    ]
event_type_list = event_trip_df['Rc_description'].unique()
unit_type_list = unit_trip_df['unityp'].unique()

num_neighborhoods = len(neighbourhood_df)
num_building_types = len(build_type_list)
num_event_types = len(event_type_list)
num_equipment_types = len(unit_type_list)

In [None]:
# Neighbourhood Feature Cleaning

neighbourhood_feature_full_df = pd.merge(neighbourhood_feature_df, neighbourhood_df, left_on='Neighbourhood_Number', right_on='Neighbourhood Number', how='outer')
# Note: some neighbourhood is missing in neighbourhood_feature_df
# len(neighbourhood_feature_df) # 377
# len(neighbourhood_df) # 403
# set(neighbourhood_df['Neighbourhood Number'].unique()) - set(neighbourhood_feature_df['Neighbourhood_Number'].unique())
# len(neighbourhood_feature_full_df) # 403

# Neighborhood IDs
neighborhood_mapping = neighbourhood_feature_full_df['Neighbourhood Number'].unique() # mapping between index to neighborhood number

# Building features
# building_type_ids

building_counts_np = neighbourhood_feature_full_df[build_type_list]
building_counts_np.fillna(0, inplace=True)
building_counts_np = building_counts_np.astype(int)
building_counts_np = building_counts_np.to_numpy()

# Demographic features
neighbourhood_feature_full_df['Population'] = neighbourhood_feature_full_df['Area_Sq_Km'] * neighbourhood_feature_full_df['Population_per_Sq_km']
population_np = neighbourhood_feature_full_df['Population']
population_np.fillna(0, inplace=True)
population_np = population_np.astype(int)
population_np = population_np.to_numpy()

**Transformer Model Initalization**

In [None]:
# Embedding Parameter and Module

node2vec_dim = 32
time2vec_embed_dim = 63
time_feature_dim = 3  # day of year, day of week, hour
building_type_embed_dim = 16
population_embed_dim = 8
event_type_embed_dim = 16
equipment_embed_dim = 16
target_embed_dim = 64

node2vec_emb_layer = generate_node2vec_embeddings(neighbourhood_df=neighbourhood_df, num_neighborhoods=num_neighborhoods, node2vec_dim=node2vec_dim)

embedding_module = CombinedEmbedding(
    node2vec_emb_layer=node2vec_emb_layer,
    time2vec_embed_dim=time2vec_embed_dim,
    time_feature_dim=time_feature_dim,
    num_building_types=num_building_types,
    building_type_embed_dim=building_type_embed_dim,
    population_embed_dim=population_embed_dim,
    num_event_types=num_event_types,
    event_type_embed_dim=event_type_embed_dim,
    num_equipment_types=num_equipment_types,
    equipment_embed_dim=equipment_embed_dim,
    target_embed_dim=target_embed_dim
)

In [None]:
# Define batch and mini-batch

mini_batch_size = 13
batch_size = 31

spatial_dimension = num_neighborhoods
temporal_dimension = 7 # Predicting for a week

# verify spatial_dimension = batch_size * mini_batch_size
assert spatial_dimension == batch_size * mini_batch_size

In [None]:
# Neighborhood Features
neighborhood_ids = torch.arange(num_neighborhoods)
logger.info(f"neighborhood_ids shape {neighborhood_ids.shape}")

# Time Feature
# TODO: use real data
time_features = torch.zeros(spatial_dimension, temporal_dimension, time_feature_dim)
time_features[:, :, 0] = torch.randint(1, 366, (spatial_dimension, temporal_dimension)) # Day of year (1–365)
time_features[:, :, 1] = torch.randint(1, 8, (spatial_dimension, temporal_dimension)) # Day of week (1–7)
time_features[:, :, 2] = torch.randint(0, 24, (spatial_dimension, temporal_dimension)) # Hour of the day (0–23)
logger.info(f"time_features shape {time_features.shape}")

# Building Features
# TODO: use real data
building_type_ids = torch.randint(0, num_building_types, (spatial_dimension, temporal_dimension, num_building_types))
logger.info(f"building_type_ids shape {building_type_ids.shape}")

# TODO: reduce building_counts size from [403, 7, 9] to [403, 9]
# building_counts = torch.tensor(building_counts_np, dtype=torch.int32)
building_counts = torch.from_numpy(building_counts_np).unsqueeze(1).repeat(1, temporal_dimension, 1)
logger.info(f"building_counts shape {building_counts.shape}")

# Demographic Features
population = torch.from_numpy(population_np).float()
logger.info(f"population shape {population.shape}")

# Event Features
# TODO: use real data
event_type_ids = torch.randint(0, num_event_types, (spatial_dimension, temporal_dimension))
logger.info(f"event_type_ids shape {event_type_ids.shape}")

# TODO: use real data
equipment_ids = torch.randint(0, num_equipment_types, (spatial_dimension, temporal_dimension))
logger.info(f"equipment_ids shape {equipment_ids.shape}")

# Target Values
# TODO: use real data
targets = torch.randint(0, 2, (spatial_dimension, temporal_dimension))
logger.info(f"targets shape {equipment_ids.shape}")

**Transformer Model Training Loop**

In [None]:
# Instantiate the DataLoader and EmergencyEventPredictor
dataset = NeighborhoodDataset(neighborhood_ids, time_features, building_type_ids, building_counts,
                              population, event_type_ids, equipment_ids, targets)
dataloader = DataLoader(dataset, batch_size=mini_batch_size, shuffle=True)

model = EmergencyEventPredictor(
    embedding_module=embedding_module,
    embed_dim=64,
    num_heads=4,
    num_layers=2
)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.PoissonNLLLoss()

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
  model.train()
  epoch_loss = 0

  for i, (neighborhood_ids, time_features, building_type_ids, building_counts,
          population, event_type_ids, equipment_ids, targets) in enumerate(dataloader):
    optimizer.zero_grad()

    # Forward pass
    predictions = model(neighborhood_ids, time_features, building_type_ids,
                        building_counts, population, event_type_ids, equipment_ids)

    # Compute loss
    loss = criterion(predictions, targets)

    # Backward pass and optimization
    loss.backward()
    optimizer.step()

    mini_batch_loss = loss.item()
    epoch_loss += mini_batch_loss
    logger.info(f"Epoch [{epoch+1}/{num_epochs}], Mini-batch [{i + 1}/{len(dataloader)}], Loss: {mini_batch_loss:.4f}")

  logger.info(f"Epoch [{epoch+1}/{num_epochs}] complete, Epoch Loss: {(epoch_loss / len(dataloader)):.4f}")

# # Save the trained model
# torch.save(model.state_dict(), "transformer_model.pth")

**Transformer Model Prediction**

In [None]:
# Define model parameters
embed_dim = target_embed_dim  # Same as output dimension of CombinedEmbedding
num_heads = 4
num_layers = 2
max_len = 7  # Sequence length (e.g., 7 days for a weekly prediction)

# Instantiate the model
model = EmergencyEventPredictor(
    embedding_module=embedding_module,  # Replace with actual CombinedEmbedding instance
    embed_dim=embed_dim,
    num_heads=num_heads,
    num_layers=num_layers,
    max_len=max_len
)

# Forward pass to get predictions
predictions = model(
    neighborhood_ids=neighborhood_ids,
    time_features=time_features,
    building_type_ids=building_type_ids,
    building_counts=building_counts,
    population=population,
    event_type_ids=event_type_ids,
    equipment_ids=equipment_ids
)

logger.info(f"Predictions Shape: {predictions.shape}")
logger.info(f"Predictions: {predictions}")

**Output Visulization**

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Detach predictions and convert to NumPy for visualization
predictions_np = predictions.detach().numpy()

# Define the plot
plt.figure(figsize=(12, 10))
sns.heatmap(predictions_np, annot=True, cmap="coolwarm", cbar=True, fmt=".2f",
            xticklabels=["Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6", "Day 7"],
            yticklabels=[f"Neighborhood {i+1}" for i in range(predictions_np.shape[0])])

# Add titles and labels
plt.title("Predicted Number of Events per Day for Each Neighborhood")
plt.xlabel("Day of the Week")
plt.ylabel("Neighborhood")
plt.show()