# EFRS Dataset Normalization

Dataset used in this project
- **EFRS dataset** (*event_trip_summary, unit_trip_summary, unit_history*) http://ftp.safecity.savitestbed.ca/
- **Neighbourhood** (*City_of_Edmonton_-_Neighbourhoods_20241022*) https://data.edmonton.ca/City-Administration/City-of-Edmonton-Neighbourhoods/65fr-66s6/about_data
- **Fire Stations** (*Fire_Stations_20241027*): https://data.edmonton.ca/Emergency-Services/Fire-Stations/b4y7-zhnz/about_data
- **Neighbourhood Features** (*neighbourhood_static_data_with_five_years_events*): Email "Static features data by neighborhood of the city of Edmonton" from Dilli

In [None]:
!pip install -r ../requirements.txt

In [None]:
import copy
import math
import datetime
import time
import os

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import contextily as ctx

from scipy import stats
from shapely.geometry import Point, MultiPolygon
from shapely.wkt import loads

In [None]:
try:
  from google.colab import drive
  print("Running on Google Colab, reading dataset from drive")
  drive.mount("/content/drive")
  DATASET_PATH = "/content/drive/MyDrive/ECE2500/EdmontonFireRescueServicesData"
except:
  print("Running locally, reading dataset from local file system")
  DATASET_PATH = "../dataset/EdmontonFireRescueServicesData"
  if not os.path.exists(DATASET_PATH):
    print(f"Cannot find dataset directory, place dataset in {DATASET_PATH}")
    exit(1)

UNIT_TRIP_PATH = os.path.join(DATASET_PATH, "EFRS_Unit_Trip_Summary.csv")
EVENT_TRIP_PATH = os.path.join(DATASET_PATH, "EFRS_Event_Trip_Summary.csv")
UNIT_HISTORY_2023_PATH = os.path.join(DATASET_PATH, "UN_HI_2023.csv")
NEIGHBOURHOOD_PATH = os.path.join(DATASET_PATH, "City_of_Edmonton_-_Neighbourhoods_20241022.csv")
FIRE_STATION_PATH = os.path.join(DATASET_PATH, "Fire_Stations_20241027.csv")
NEIGHBOURHOOD_FEATURES_PATH = os.path.join(DATASET_PATH, "neighbourhood_static_data_with_five_years_events.csv")

print(f"Unit Trip: {UNIT_TRIP_PATH}")
print(f"Event Trip: {EVENT_TRIP_PATH}")
print(f"Unit History 2023: {UNIT_HISTORY_2023_PATH}")
print(f"Neighbourhood: {NEIGHBOURHOOD_PATH}")
print(f"Fire Stations: {FIRE_STATION_PATH}")
print(f"Neighbourhood Features: {NEIGHBOURHOOD_FEATURES_PATH}")

## 1 Event Trip Data

In [None]:
event_trip_df = pd.read_csv(EVENT_TRIP_PATH)

event_trip_df.columns

In [None]:
event_trip_df.head(5)

In [None]:
# Convert 'Sd_date' to datetime objects
event_trip_df['Sd_date'] = pd.to_datetime(event_trip_df['Sd_date'])
event_trip_df['month'] = event_trip_df['Sd_date'].dt.month

monthly_buckets = {}
for month in range(1, 13):
  monthly_buckets[month] = event_trip_df[event_trip_df['month'] == month]
  print(f"month {month}, len {monthly_buckets[month].size}")

january_df = monthly_buckets[1]
january_df.head()

In [None]:
event_trip_df['year'] = event_trip_df['Sd_date'].dt.year
event_trip_df['month'] = event_trip_df['Sd_date'].dt.month
event_trip_df['week'] = event_trip_df['Sd_date'].dt.strftime("%V")
event_trip_df['dayofweek'] = event_trip_df['Sd_date'].dt.dayofweek

In [None]:
card_count_df = event_trip_df['card_description'].value_counts()

for i in range(2015, 2024):
  yearly_df = event_trip_df[event_trip_df['year'] == i]
  yearly_df = yearly_df['card_description'].value_counts()
  card_count_df = pd.concat((card_count_df, yearly_df.rename(i)), axis=1)

card_count_df.head(30)

In [None]:
event_trip_df['hour'] = event_trip_df['Sd_date'].dt.hour
event_trip_df.head(5)

In [None]:
eid_counts_buckets = event_trip_df.groupby('dayofweek')['Eid'].nunique()
# eid_counts_buckets = eid_counts_buckets.drop(['53']) # for weekly event count

plt.figure(figsize=(10, 6))
plt.plot(eid_counts_buckets.index, eid_counts_buckets.values)
plt.xlabel('Day of Week')
plt.ylabel('Number of Events')
plt.title('Number of Events by Day of Week')
# plt.grid(True)
plt.show()

In [None]:
event_trip_df['Rc_description'].unique()

## 2 Unit Trip Data

In [None]:
unit_trip_df = pd.read_csv(UNIT_TRIP_PATH)

# Change EID to Eid for correlation
unit_trip_df.rename(columns={'EID': 'Eid'}, inplace=True)
unit_trip_df.columns

In [None]:
event_unit_trip_df = pd.merge(event_trip_df, unit_trip_df, on='Eid', how='inner')
event_unit_trip_df.head(5)

In [None]:
event_unit_trip_df['year'] = event_unit_trip_df['Sd_date'].dt.year
event_unit_trip_df['month'] = event_unit_trip_df['Sd_date'].dt.month

unit_type_count_df = event_unit_trip_df['unityp'].value_counts()

for i in range(2015, 2024):
  yearly_df = event_unit_trip_df[event_unit_trip_df['year'] == i]
  yearly_df = yearly_df['unityp'].value_counts()
  unit_type_count_df = pd.concat((unit_type_count_df, yearly_df.rename(i)), axis=1)

unit_type_count_df.head(10)

## 3 Unit History Data

In [None]:
unit_history_2023_df = pd.read_csv(UNIT_HISTORY_2023_PATH)
unit_history_2023_df.head()

## 4 Neighbourhood & Stations

In [None]:
neighbourhood_df = pd.read_csv(NEIGHBOURHOOD_PATH)

neighbourhood_df.head(5)

In [None]:
neighbourhood_feature_df = pd.read_csv(NEIGHBOURHOOD_FEATURES_PATH)

neighbourhood_feature_df.head(5)

In [None]:
neighbourhood_feature_joined = pd.merge(neighbourhood_feature_df, neighbourhood_df, left_on='Neighbourhood_Number', right_on='Neighbourhood Number', how='inner')

neighbourhood_feature_joined.columns

In [None]:
station_df = pd.read_csv(FIRE_STATION_PATH)

station_df.head(5)

In [None]:
neighbourhood_df['multipolygon object'] = neighbourhood_df['Geometry Multipolygon'].apply(loads)
station_df['point object'] = station_df['POINT LOCATION'].apply(loads)

# # List of neighbourhoods
# neighbourhood_list = list()
# for index, row in neighbourhood_df.iterrows():
#   try:
#     multipolygon = loads(row['Geometry Multipolygon'])
#     neighbourhood_list.append(multipolygon)
#   except Exception as e:
#     print(f"Error processing row {index}: {e}")
#     continue

def find_neighbourhood(latitude, longitude, neighbourhood_df):
  """Finds the Neighbourhood Number for a given latitude and longitude.

  Args:
    latitude: The latitude.
    longitude: The longitude.
    neighbourhood_df: The DataFrame containing neighbourhood data.

  Returns:
    The Neighbourhood Number, or None if not found.
  """
  point = Point(longitude, latitude)
  for index, row in neighbourhood_df.iterrows():
    multipolygon = row['multipolygon object']
    if multipolygon.contains(point):
      return row['Neighbourhood Number']
  return None

In [None]:
print(f"length of neighbourhood_df {len(neighbourhood_df)}")
print(f"length of station_df {len(station_df)}")
print(f"length of unit_history_2023_df {len(unit_history_2023_df)}")
print(f"length of event_trip_df {len(event_trip_df)}")
print(f"length of unit_trip_df {len(unit_trip_df)}")
print(f"length of event_unit_trip_df {len(event_unit_trip_df)}")

In [None]:
# # Apply the function to each row in unit_history_2023_df
# unit_history_2023_df['Neighbourhood Number'] = unit_history_2023_df.apply(
#     lambda row: find_neighbourhood(row['latitude'], row['longitude'], neighbourhood_df), axis=1)
# unit_history_2023_df.head(10)

unit_history_2023_df_head = unit_history_2023_df.head(403)
unit_history_2023_df_head['Neighbourhood Number'] = unit_history_2023_df_head.apply(
    lambda row: find_neighbourhood(row['latitude'], row['longitude'], neighbourhood_df), axis=1)
unit_history_2023_df_head.head(10)

In [None]:
# Convert 'Geometry Multipolygon' to a geometry column if not already done
neighbourhood_df['geometry'] = gpd.GeoSeries.from_wkt(neighbourhood_df['Geometry Multipolygon'])
neighbourhood_gdf = gpd.GeoDataFrame(neighbourhood_df, geometry='geometry')

# Set a coordinate reference system (CRS) if not already defined
neighbourhood_gdf.crs = "EPSG:4326" # Example: WGS 84

# Create a plot of the neighbourhoods
fig, ax = plt.subplots(figsize=(10, 10))
neighbourhood_gdf.plot(ax=ax, color='lightblue', edgecolor='black')

# Add a basemap (optional)
ctx.add_basemap(ax, crs=neighbourhood_gdf.crs, zoom=11)

# Customize plot labels and title
ax.set_title('Edmonton Neighbourhoods')

# Display the plot
plt.show()

In [None]:
event_trip_df_head = event_trip_df.head(500)

# Create a GeoDataFrame for events and stations
event_trip_gdf = gpd.GeoDataFrame(
    event_trip_df_head,
    geometry=gpd.points_from_xy(event_trip_df_head['Longitude'], event_trip_df_head['Latitude'])
)
station_gdf = gpd.GeoDataFrame(
    station_df,
    geometry=gpd.points_from_xy(station_df['LONGITUDE'], station_df['LATITUDE'])
)

# Set the coordinate reference system
station_gdf.crs = "EPSG:4326"
event_trip_gdf.crs = "EPSG:4326"

# Create a plot of the neighbourhoods
fig, ax = plt.subplots(figsize=(10, 10))
neighbourhood_gdf.plot(ax=ax, color='lightblue', edgecolor='black')

# Plot the events and stations on top of the neighbourhoods
event_trip_gdf.plot(ax=ax, markersize=3, color='red', label='Events')
station_gdf.plot(ax=ax, markersize=5, color='blue', label='Stations')

# Add a basemap (optional)
ctx.add_basemap(ax, crs=neighbourhood_gdf.crs, zoom=11)

# Customize plot labels and title
ax.set_title('Edmonton Neighbourhoods with Events and Stations')
ax.legend()

# Display the plot
plt.show()

## 5 Understanding Events

Selected a few Response Codes and then one event for each code in 2023.
- Alarms
- Non-Structural Fire
- Structural Fire
- Major Event
- Rescue

In [None]:
event_trip_df['Rc_description'].unique()

# event_trip_df[event_trip_df['Rc_description'] == "Alarms"]

In [None]:
# Rc_description: Alarms
interested_eid = 2890339

# Rc_description: Non-Structural Fire
# interested_eid = 2890362

# Rc_description: Structural Fire
# interested_eid = 2890174

# Rc_description: Major Event
# interested_eid = 2887326

# Rc_description: Rescue
# interested_eid = 2890322


### Select Alarm Event

In [None]:
# Event Trip

alarm_event_trip = event_trip_df[event_trip_df['Eid'] == interested_eid]

In [None]:
alarm_event_trip

In [None]:
# Unit History

alarm_unit_history = unit_history_2023_df[unit_history_2023_df['eid'] == interested_eid]

print(f"Unit IDs: {alarm_unit_history['unid'].unique()}")
print(f"Unit Types: {alarm_unit_history['unityp'].unique()}")
print(f"Unit Status: {alarm_unit_history['unit_status'].unique()}")

alarm_unit_history['unid'].value_counts().sort_index()

In [None]:
unit_history_DC3 = alarm_unit_history[alarm_unit_history['unid'] == "DC3"]
unit_history_DC3

In [None]:
unit_history_LD16 = alarm_unit_history[alarm_unit_history['unid'] == "LD16"]
unit_history_LD16

In [None]:
unit_history_P16 = alarm_unit_history[alarm_unit_history['unid'] == "P16"]
unit_history_P16

In [None]:
unit_history_P26 = alarm_unit_history[alarm_unit_history['unid'] == "P26"]
unit_history_P26

In [None]:
unit_history_R20 = alarm_unit_history[alarm_unit_history['unid'] == "R20"]
unit_history_R20

In [None]:
unit_history_TK26 = alarm_unit_history[alarm_unit_history['unid'] == "TK26"]
unit_history_TK26

In [None]:
alarm_unit_history

In [None]:
# Unit Trip

alarm_unit_trip = unit_trip_df[unit_trip_df['Eid'] == interested_eid]

print(f"Unit IDs: {alarm_unit_trip['unid'].unique()}")
print(f"CAD Unit IDs: {alarm_unit_trip['cad_unid'].unique()}")
print(f"Stations: {alarm_unit_trip['station'].unique()}")

alarm_unit_trip['unid'].value_counts().sort_index()

In [None]:
alarm_unit_trip

In [None]:
selected_stations = [24, 16, 26, 20]
event_stations = station_df[station_df['STATION_NAME'].isin(selected_stations)]

In [None]:
import math

import matplotlib.pyplot as plt

import matplotlib.colors as mcolors
from matplotlib.patches import Rectangle


def plot_colortable(colors, *, ncols=4, sort_colors=True):

    cell_width = 212
    cell_height = 22
    swatch_width = 48
    margin = 12

    # Sort colors by hue, saturation, value and name.
    if sort_colors is True:
        names = sorted(
            colors, key=lambda c: tuple(mcolors.rgb_to_hsv(mcolors.to_rgb(c))))
    else:
        names = list(colors)

    n = len(names)
    nrows = math.ceil(n / ncols)

    width = cell_width * ncols + 2 * margin
    height = cell_height * nrows + 2 * margin
    dpi = 72

    fig, ax = plt.subplots(figsize=(width / dpi, height / dpi), dpi=dpi)
    fig.subplots_adjust(margin/width, margin/height,
                        (width-margin)/width, (height-margin)/height)
    ax.set_xlim(0, cell_width * ncols)
    ax.set_ylim(cell_height * (nrows-0.5), -cell_height/2.)
    ax.yaxis.set_visible(False)
    ax.xaxis.set_visible(False)
    ax.set_axis_off()

    for i, name in enumerate(names):
        row = i % nrows
        col = i // nrows
        y = row * cell_height

        swatch_start_x = cell_width * col
        text_pos_x = cell_width * col + swatch_width + 7

        ax.text(text_pos_x, y, name, fontsize=14,
                horizontalalignment='left',
                verticalalignment='center')

        ax.add_patch(
            Rectangle(xy=(swatch_start_x, y-9), width=swatch_width,
                      height=18, facecolor=colors[name], edgecolor='0.7')
        )

    return fig

# plot_colortable(mcolors.CSS4_COLORS)
# plt.show()

In [None]:
# Create a GeoDataFrame for events and stations
event_trip_gdf = gpd.GeoDataFrame(
    alarm_event_trip,
    geometry=gpd.points_from_xy(alarm_event_trip['Longitude'], alarm_event_trip['Latitude'])
)
station_gdf = gpd.GeoDataFrame(
    event_stations,
    geometry=gpd.points_from_xy(event_stations['LONGITUDE'], event_stations['LATITUDE'])
)
unit_history_LD16_gdf = gpd.GeoDataFrame(
    unit_history_LD16,
    geometry=gpd.points_from_xy(unit_history_LD16['longitude'], unit_history_LD16['latitude'])
)
unit_history_P16_gdf = gpd.GeoDataFrame(
    unit_history_P16,
    geometry=gpd.points_from_xy(unit_history_P16['longitude'], unit_history_P16['latitude'])
)
unit_history_P26_gdf = gpd.GeoDataFrame(
    unit_history_P26,
    geometry=gpd.points_from_xy(unit_history_P26['longitude'], unit_history_P26['latitude'])
)
unit_history_R20_gdf = gpd.GeoDataFrame(
    unit_history_R20,
    geometry=gpd.points_from_xy(unit_history_R20['longitude'], unit_history_R20['latitude'])
)
unit_history_TK26_gdf = gpd.GeoDataFrame(
    unit_history_TK26,
    geometry=gpd.points_from_xy(unit_history_TK26['longitude'], unit_history_TK26['latitude'])
)
unit_history_DC3_gdf = gpd.GeoDataFrame(
    unit_history_DC3,
    geometry=gpd.points_from_xy(unit_history_DC3['longitude'], unit_history_DC3['latitude'])
)

# Set the coordinate reference system
station_gdf.crs = "EPSG:4326"
event_trip_gdf.crs = "EPSG:4326"
unit_history_LD16_gdf.crs = "EPSG:4326"
unit_history_P16_gdf.crs = "EPSG:4326"
unit_history_P26_gdf.crs = "EPSG:4326"
unit_history_R20_gdf.crs = "EPSG:4326"
unit_history_TK26_gdf.crs = "EPSG:4326"
unit_history_DC3_gdf.crs = "EPSG:4326"

# Create a plot of the neighbourhoods
fig, ax = plt.subplots(figsize=(10, 10))
neighbourhood_gdf.plot(ax=ax, color='lightblue', edgecolor='black')

# Plot the events and stations on top of the neighbourhoods
event_trip_gdf.plot(ax=ax, markersize=25, color='red', label='Events')
station_gdf.plot(ax=ax, markersize=15, color='blue', label='Stations')
unit_history_LD16_gdf.plot(ax=ax, markersize=10, color='orange', label='Unit LD16')
unit_history_P16_gdf.plot(ax=ax, markersize=10, color='indigo', label='Unit P16')
unit_history_P26_gdf.plot(ax=ax, markersize=10, color='gold', label='Unit P26')
unit_history_R20_gdf.plot(ax=ax, markersize=10, color='cyan', label='Unit R20')
unit_history_TK26_gdf.plot(ax=ax, markersize=10, color='mediumpurple', label='Unit TK26')
unit_history_DC3_gdf.plot(ax=ax, markersize=10, color='peachpuff', label='Unit DC3')

# Add a basemap (optional)
ctx.add_basemap(ax, crs=neighbourhood_gdf.crs, zoom=11)

# Customize plot labels and title
ax.set_title('Responding Units to an Alarm Event')
ax.legend()

# Display the plot
plt.show()

In [None]:
unit_history_TK26_gdf