# Graph-learning Preprocessing
### Preprocessing the road network for doing graph learning
*Written by - Rasmus Bergman rbvp20@student.aau.dk*

This is a preprocessing pipeline for making a dataset which can be used to do graph learning on the accident data from vejman.dk

### **Prerequisits**
- Running mastra.ipynb
- Running vejman.ipynb

In [109]:
# Import libraries
import geopandas as gpd
import os
import osmnx as ox
from pyproj import Transformer
import constants as c
# Config
ox.settings.console_log = True
ox.settings.use_cache = True

# Parameters
ACCIDENT_RADIUS_METERS = 50

In [110]:
# Load the accident data from vejman.dk
accidents = gpd.read_file(c.VEJMAN_PATH)
accidents_buffered = accidents.buffer(ACCIDENT_RADIUS_METERS)
accidents["old_geometry"] = accidents["geometry"]
accidents["geometry"] = accidents_buffered

In [111]:
# Load all nodes from the OSM network
gdf_nodes = gpd.read_file(c.NODE_GDF_PATH)
gdf_nodes = gdf_nodes.to_crs("EPSG:25832")

In [112]:
# Find all nodes within 50 meters of an accident
all_accident_nodes = gpd.sjoin(gdf_nodes, accidents)


In [113]:
all_accident_nodes["distance"] = all_accident_nodes["geometry"].distance(all_accident_nodes["old_geometry"])
all_accident_nodes["distance"] = 1 - all_accident_nodes["distance"] / ACCIDENT_RADIUS_METERS


In [114]:
# Sum up the distances to all accidents for each node
all_accident_nodes_grouped = all_accident_nodes.groupby("osmid").agg({
    "distance": "sum"
    }).reset_index()

# Rescale the distances to be between 0 and 1
all_accident_nodes_grouped["distance"] = all_accident_nodes_grouped["distance"] / all_accident_nodes_grouped["distance"].max()

In [115]:
accident_nodes = gdf_nodes
accident_nodes["accident_score"] = accident_nodes["osmid"].map(all_accident_nodes_grouped.set_index("osmid")["distance"])
accident_nodes["accident_score"] = accident_nodes["accident_score"].fillna(0)

In [116]:
# Load the mastra data and filter out all traffic counts which are not for motor vehicles
traffic_flow = gpd.read_file(c.MASTRA_PATH)
traffic_flow = traffic_flow[traffic_flow["KOERETOEJSART"] == "MOTORKTJ"]
traffic_flow = traffic_flow[traffic_flow["AAR"] >= 2018]

In [118]:
traffic_flow = traffic_flow.to_crs("EPSG:25832")
traffic_flow_simple = traffic_flow[["geometry", "AADT", "AAR"]]
traffic_flow_simple = traffic_flow_simple.sort_values("AAR", ascending=False)

traffic_flow_joined = gpd.sjoin_nearest(traffic_flow_simple, accident_nodes, how="inner")
traffic_flow_joined = traffic_flow_joined.drop_duplicates(subset=["osmid"], keep="first")
traffic_flow_joined = traffic_flow_joined.set_index("osmid")

In [119]:
accident_flow_nodes = accident_nodes

accident_flow_nodes["traffic_flow"] = accident_flow_nodes["osmid"].map(traffic_flow_joined["AADT"])
accident_flow_nodes["traffic_flow"] = accident_flow_nodes["traffic_flow"].fillna(0)

In [72]:
color_start = [0x27, 0x7d, 0xa1] #277da1
color_end = [0xf9, 0x41, 0x44] #f94144

def gradient(value):
    clamped_value = max(0, min(1, value))
    r = color_start[0] + (color_end[0] - color_start[0]) * clamped_value
    g = color_start[1] + (color_end[1] - color_start[1]) * clamped_value
    b = color_start[2] + (color_end[2] - color_start[2]) * clamped_value
    return f"rgb({r}, {g}, {b})"

test_bbox = [
    (9.790373,56.967101),
    (10.072584,57.119877)
]

transformer = Transformer.from_crs("epsg:4326", "epsg:25832")

test_bbox_new = [
    transformer.transform(test_bbox[0][1], test_bbox[0][0]),
    transformer.transform(test_bbox[1][1], test_bbox[1][0])
]

test_nodes = accident_nodes.cx[test_bbox_new[0][0]:test_bbox_new[1][0], test_bbox_new[0][1]:test_bbox_new[1][1]]

In [None]:
test_nodes.explore()

In [124]:
accident_flow_nodes.to_file("../data/graph/accident_flow_nodes.geojson", driver="GeoJSON")