In [None]:
import os
import json
import fiona
import networkx as nx
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from shapely.geometry import Polygon
from shapely import wkt
from uoapi import api_client
from gnn_package import data_utils

In [None]:
import zipfile


def unzip_files(zip_file, output_folder):
    with zipfile.ZipFile(zip_file, "r") as zip_ref:
        zip_ref.extractall(output_folder)


# unzip_files('data/os/mastermap_roads_and_paths.zip', 'data/os')

In [None]:
os_processed_data_dir = "../gnn_package/data/ordinance_survey/processed/"
urban_observatory_data_dir = "../gnn_package/data/urban_observatory/"
os_raw_data_dir = "../gnn_package/data/ordinance_survey/raw/"

# os open map files for testing (not as good as mastermap)
open_roads_file = "OSOpenRoads_NZ.gml"
openmap_local_file = "NZ.gml"

# mastermap raw files
mm_highways_file = "mastermap_highways.gpkg"
mm_paths_file = "mastermap_paths.gpkg"

# mastermap processed files
mm_highways_cropped_file = "mm_highways_cropped.pkl"
mm_paths_cropped_file = "mm_paths_cropped.pkl"


def check_files_exist(dir_path, file_name):
    return os.path.exists(dir_path + file_name)


def read_gpkg_files(dir_path, file_name, layer):
    if check_files_exist(dir_path, file_name):
        print("Reading GPKG file...")
        return gpd.read_file(dir_path + file_name, layer=layer)
    else:
        print("GPKG file does not exist.")


def read_gml_files(dir_path, file_name, layer):
    if check_files_exist(dir_path, file_name):
        print("Reading GML file...")
        return gpd.read_file(dir_path + file_name, layer=layer)
    else:
        print("GML file does not exist.")


# List layers in GML file
def list_layers(dir_path, file_name):
    return fiona.listlayers(dir_path + file_name)


def print_layers_in_raw_data():
    print("Layers in OS Open Roads", list_layers(os_raw_data_dir, open_roads_file))
    print(
        "Layers in OS OpenMap - Local", list_layers(os_raw_data_dir, openmap_local_file)
    )
    print(
        "Layers in MasterMap Highways",
        list_layers(os_raw_data_dir, "mastermap_highways.gpkg"),
    )
    print(
        "Layers in MasterMap Paths",
        list_layers(os_raw_data_dir, "mastermap_paths.gpkg"),
    )


def get_bbox_transformed():
    polygon_bbox = Polygon(
        [
            [-1.65327, 54.93188],
            [-1.54993, 54.93188],
            [-1.54993, 55.02084],
            [-1.65327, 55.02084],
        ]
    )

    # Create a GeoDataFrame from the bounding box polygon
    bbox_gdf = gpd.GeoDataFrame(geometry=[polygon_bbox], crs="EPSG:4326")

    # Assuming your road data is in British National Grid (EPSG:27700)
    # Transform the bbox to match the road data's CRS
    bbox_transformed = bbox_gdf.to_crs("EPSG:27700")
    return bbox_transformed


if not os.path.exists(os_processed_data_dir + mm_highways_cropped_file):
    # Read the GML files
    or_roads_link_gdf = read_gml_files(
        os_raw_data_dir, open_roads_file, layer="RoadLink"
    )
    or_roads_node_gdf = read_gml_files(
        os_raw_data_dir, open_roads_file, layer="RoadNode"
    )
    onml_road_gdf = read_gml_files(os_raw_data_dir, openmap_local_file, layer="Road")
    mm_highways = read_gpkg_files(
        os_raw_data_dir, "mastermap_highways.gpkg", layer="RoadLink"
    )
    mm_paths = read_gpkg_files(
        os_raw_data_dir, "mastermap_paths.gpkg", layer="PathLink"
    )

    bbox_transformed = get_bbox_transformed()

    # Crop each GDF using the transformed bounding box
    or_roads_link_cropped = gpd.clip(or_roads_link_gdf, bbox_transformed)
    or_roads_node_cropped = gpd.clip(or_roads_node_gdf, bbox_transformed)
    onml_road_cropped = gpd.clip(onml_road_gdf, bbox_transformed)
    mm_highways_cropped = gpd.clip(mm_highways, bbox_transformed)
    mm_paths_cropped = gpd.clip(mm_paths, bbox_transformed)

    # First, merge the highways and paths into a single GDF
    road_network = pd.concat([mm_highways_cropped, mm_paths_cropped])

    # Save the files to pickle
    mm_highways_cropped.to_pickle(os_processed_data_dir + "mm_highways_cropped.pkl")
    mm_paths_cropped.to_pickle(os_processed_data_dir + "mm_paths_cropped.pkl")
    road_network.to_pickle(os_processed_data_dir + "road_network.pkl")

else:
    mm_highways_cropped = data_utils.read_pickled_gdf(
        os_processed_data_dir, mm_highways_cropped_file
    )
    mm_paths_cropped = data_utils.read_pickled_gdf(
        os_processed_data_dir, mm_paths_cropped_file
    )
    road_network = data_utils.read_pickled_gdf(
        os_processed_data_dir, "road_network.pkl"
    )

In [None]:
# Plot the road links
fig, ax = plt.subplots(figsize=(30, 30))
mm_highways_cropped.plot(ax=ax, color="orange", linewidth=0.5)
mm_paths_cropped.plot(ax=ax, color="grey", linewidth=0.5)
plt.title("OS Open Roads - Road Links")
plt.show()

In [None]:
if not os.path.exists(
    urban_observatory_data_dir + "sensor_nodes.pkl"
) or not os.path.exists(urban_observatory_data_dir + "sensor_names.json"):
    uo_client = api_client.APIClient()

    # Get sensor locations
    sensors = uo_client.get_sensors(theme="People")
    sensor_nodes = {
        sensor["Raw ID"]: sensor["Location (WKT)"] for sensor in sensors["sensors"]
    }
    sensor_names = {
        sensor["Raw ID"]: sensor["Sensor Name"] for sensor in sensors["sensors"]
    }

    sensor_nodes_df = pd.DataFrame(sensor_nodes.items(), columns=["id", "geometry"])
    # convert wkt strings to geometry objects
    sensor_nodes_df["geometry"] = sensor_nodes_df["geometry"].apply(wkt.loads)
    sensor_nodes_gdf = gpd.GeoDataFrame(
        sensor_nodes_df, geometry="geometry", crs="EPSG:4326"
    )
    # convert to British National Grid from WGS84
    sensor_nodes_gdf = sensor_nodes_gdf.to_crs("EPSG:27700")
    bbox_transformed = get_bbox_transformed()
    # crop the sensor nodes to the bounding box
    sensor_nodes_gdf = gpd.clip(sensor_nodes_gdf, bbox_transformed)
    # save the sensor nodes to pickle
    sensor_nodes_gdf.to_pickle(urban_observatory_data_dir + "sensor_nodes.pkl")
    # save the sensor names to json
    json.dump(sensor_names, open(urban_observatory_data_dir + "sensor_names.json", "w"))

else:
    sensor_nodes_gdf = data_utils.read_pickled_gdf(
        urban_observatory_data_dir, "sensor_nodes.pkl"
    )

In [None]:
# First verify the CRS of both dataframes
print("Roads CRS:", road_network.crs)
print("Sensor nodes CRS:", sensor_nodes_gdf.crs)

# Create the plot with a larger figure size
fig, ax = plt.subplots(figsize=(12, 12))

# Plot highways and paths
road_network.plot(ax=ax, color="grey", linewidth=0.5)

# Plot sensor points with different styling
sensor_nodes_gdf.plot(
    ax=ax,
    color="red",
    markersize=20,
    alpha=0.6,  # Add some transparency
    zorder=2,  # Ensure points are plotted on top
)

# Set the plot bounds based on the highways extent - there are some sensors that are a long way from the roads
bounds = road_network.total_bounds
ax.set_xlim([bounds[0], bounds[2]])
ax.set_ylim([bounds[1], bounds[3]])

# Add gridlines
ax.grid(True)

# Add title
plt.title("OS Open Roads - Road Links")

# Show the plot
plt.tight_layout()
plt.show()

# Print some diagnostic information
print("\nHighways bounds:", road_network.total_bounds)
print("Sensor nodes bounds:", sensor_nodes_gdf.total_bounds)