In [None]:
import logging
import networkx as nx
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial import Delaunay, cKDTree
from pyproj import Transformer

graph = nx.read_gml("../data/interim/interim_graph_1774bbe3a0e79fba_sensor_graph.gml")

In [None]:
graph.edges(data=True)

In [None]:
fig, ax = plt.subplots(figsize=(32, 32))
nx.draw(graph, with_labels=True, ax=ax)

In [None]:
import networkx as nx
import numpy as np
from scipy.spatial import Delaunay, cKDTree
from pyproj import Transformer
import logging
from collections import defaultdict

def create_gabriel_graph_robust(G_in, crs_from="EPSG:4326", crs_to="EPSG:32630"):
    """
    Generates a Gabriel graph from a spatial networkx graph,
    robustly handling multiple nodes at the same (lat, lon) coordinate
    AND preserving all original edge attributes.
    """

    # === Step 1: Map Nodes to Unique Spatial Locations ===

    transformer = Transformer.from_crs(crs_from, crs_to, always_xy=True)
    location_to_nodes = defaultdict(list)
    location_to_point = {}
    node_to_location = {}

    for node_id, data in G_in.nodes(data=True):
        try:
            loc_key = (data['lon'], data['lat'])
            node_to_location[node_id] = loc_key
            location_to_nodes[loc_key].append(node_id)
            if loc_key not in location_to_point:
                x, y = transformer.transform(data['lon'], data['lat'])
                location_to_point[loc_key] = (x, y)
        except KeyError:
            logging.error(f"Node {node_id} is missing 'lat' or 'lon' data.")
            return nx.Graph()

    unique_locations = list(location_to_nodes.keys())
    points = np.array([location_to_point[key] for key in unique_locations])

    if len(points) < 2:
        logging.warning("Not enough unique locations to build a graph.")
        GG = nx.Graph()
        GG.add_nodes_from(G_in.nodes(data=True))
        for u, v in G_in.edges():
            if node_to_location.get(u) == node_to_location.get(v):
                attrs = G_in.edges[u, v].copy()
                attrs['is_gabriel_edge'] = 0.0 # Mark as internal edge
                GG.add_edge(u, v, **attrs)
        return GG

    if len(points) == 2:
        gabriel_edges_idx = [(0, 1)]

    # === Step 2: Run Gabriel on Unique Points ===

    elif len(points) >= 3:
        try:
            del_obj = Delaunay(points, qhull_options="QJ")
        except Exception as e:
            logging.error(f"Delaunay triangulation failed: {e}")
            return nx.Graph()

        tree = cKDTree(points)
        edges = np.vstack((del_obj.simplices[:, [0, 1]],
                           del_obj.simplices[:, [1, 2]],
                           del_obj.simplices[:, [2, 0]]))
        sorted_edges = np.sort(edges, axis=1)
        candidate_edges = np.unique(sorted_edges, axis=0)

        gabriel_edges_idx = []
        epsilon = 1e-9
        for i, j in candidate_edges:
            u_pt, v_pt = points[i], points[j]
            midpoint = (u_pt + v_pt) / 2.0
            radius = np.linalg.norm(u_pt - v_pt) / 2.0
            indices_in_disk = tree.query_ball_point(midpoint, radius + epsilon)

            is_gabriel = True
            for k in indices_in_disk:
                if k != i and k != j:
                    is_gabriel = False
                    break
            if is_gabriel:
                gabriel_edges_idx.append((i, j))

    # === Step 3: Build Final Graph (Expand Edges) ===

    GG = nx.Graph()
    GG.add_nodes_from(G_in.nodes(data=True))

    # 1. Add back *internal* edges (from G_in)
    for u, v in G_in.edges():
        if node_to_location.get(u) == node_to_location.get(v):
            # Copy all original attributes
            attrs = G_in.edges[u, v].copy()
            # Add a new attribute to mark this as NOT a Gabriel edge
            attrs['is_gabriel_edge'] = 0.0
            GG.add_edge(u, v, **attrs)

    # 2. Add *new* Gabriel edges
    for i, j in gabriel_edges_idx:
        loc_u = unique_locations[i]
        loc_v = unique_locations[j]
        nodes_at_u = location_to_nodes[loc_u]
        nodes_at_v = location_to_nodes[loc_v]

        for u in nodes_at_u:
            for v in nodes_at_v:
                if not GG.has_edge(u, v):
                    # --- THIS IS THE FIX ---
                    # Find the original edge data from the input graph G_in
                    if G_in.has_edge(u, v):
                        original_data = G_in.edges[u, v].copy()
                    elif G_in.has_edge(v, u):
                        original_data = G_in.edges[v, u].copy()
                    else:
                        # Should not happen if G_in was fully connected
                        original_data = {}

                    # Add/overwrite the gabriel_edge flag as a number
                    original_data['is_gabriel_edge'] = 1.0

                    # Add the edge with all its original attributes
                    GG.add_edge(u, v, **original_data)
                    # --- END FIX ---

    return GG

In [None]:
gabriel_graph = create_gabriel_graph_robust(graph)

print(f"Original graph edges: {graph.number_of_edges()}")
print(f"Gabriel graph edges: {gabriel_graph.number_of_edges()}")

In [None]:
def plot_graph_spatially(G, with_labels=False, node_size=50, edge_alpha=0.5):
    """
    Plots a NetworkX graph spatially using 'lat' and 'lon' attributes in the nodes.

    Args:
        G (networkx.Graph): The graph containing nodes with 'lat' and 'lon' attributes.
        with_labels (bool): Whether to print node labels (can be cluttered for map data).
        node_size (int): Size of the plotted nodes.
        edge_alpha (float): Transparency of the edges.
    """

    # 1. Extract positions from node attributes
    # Matplotlib plots (x, y), so we use (lon, lat)
    pos = {}
    nodes_without_coords = []

    for node, data in G.nodes(data=True):
        if 'lat' in data and 'lon' in data:
            pos[node] = (data['lon'], data['lat'])
        else:
            nodes_without_coords.append(node)

    if nodes_without_coords:
        print(f"Warning: {len(nodes_without_coords)} nodes missing lat/lon data and will not be plotted.")

    # 2. Setup the plot
    plt.figure(figsize=(10, 8))

    # 3. Draw Nodes
    # We use the keys of 'pos' to ensure we only draw nodes that have coordinates
    nx.draw_networkx_nodes(G, pos,
                           nodelist=pos.keys(),
                           node_size=node_size,
                           node_color='red',
                           alpha=0.8)

    # 4. Draw Edges
    # Filter edges to only those where both source and target have positions
    valid_edges = [
        (u, v) for u, v in G.edges()
        if u in pos and v in pos
    ]

    nx.draw_networkx_edges(G, pos,
                           edgelist=valid_edges,
                           width=1.0,
                           alpha=edge_alpha,
                           edge_color='gray')

    # 5. Optional Labels
    if with_labels:
        nx.draw_networkx_labels(G, pos, font_size=8, font_color='black')

    # 6. Formatting for Spatial Context
    plt.title("Spatial Graph Plot")
    plt.xlabel("Longitude")
    plt.ylabel("Latitude")

    # Grid lines help with coordinate visualization
    plt.grid(True, linestyle='--', alpha=0.5)

    # Important: Set aspect ratio to 'equal' so the map doesn't look stretched
    plt.axis('equal')

    # Show the plot
    plt.tight_layout()
    plt.show()

plot_graph_spatially(gabriel_graph, with_labels=False, node_size=20, edge_alpha=0.3)

In [None]:
fig, ax = plt.subplots(figsize=(100, 100))
nx.draw(gabriel_graph, with_labels=True, ax=ax)

In [None]:
import torch
import networkx as nx
from typing import List, Dict, Tuple

# Assuming _pbar and cfg are defined in your environment as in your example
# If not, you can remove _pbar() and the cfg/total arguments.
# from your_utils import _pbar, cfg

# Placeholder for _pbar if not defined, so the code is runnable
def _pbar_placeholder(iterable, *args, **kwargs):
    return iterable
_pbar = _pbar_placeholder
cfg = None # Placeholder

def graph_to_pyg_tensors(
    g: nx.Graph,
    name_to_id: Dict[str, int],
    edge_attr_keys: List[str],
    node_attr_keys: List[str],  # Correctly named argument
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
    """
    Build node_features, undirected edge_index, and edge_attr tensors
    aligned to a pre-defined name_to_id mapping.

    Args:
        g (nx.Graph): The input graph (e.g., your Gabriel graph).
        name_to_id (dict): Mapping from node names (str) to integer IDs.
        edge_attr_keys (list): List of edge attribute keys to extract,
                               e.g., ['weight', 'distance'].
        node_attr_keys (list): List of node attribute keys to extract,
                               e.g., ['closeness_centrality', 'shops_in_200m'].

    Returns:
        (torch.Tensor, torch.Tensor, torch.Tensor):
        A tuple of (node_features, edge_index, edge_attr).
    """

    # === 1. Build Node Features Tensor (x) ===

    num_nodes = len(name_to_id)

    # Create the reverse mapping to iterate in the correct order
    # (id_to_name[0] = '...', id_to_name[1] = '...')
    id_to_name = {i: name for name, i in name_to_id.items()}

    node_features_list = []

    # Iterate from 0 to num_nodes - 1 to build the tensor in order
    for i in range(num_nodes):
        # Find the node name corresponding to this integer ID
        if i not in id_to_name:
            # This would be an error in the name_to_id map
            # We'll add a row of zeros as a safeguard
            node_features_list.append([0.0] * len(node_attr_keys))
            continue

        node_name = id_to_name[i]

        # Check if this node is in the graph's nodes
        if node_name not in g.nodes:
            # This node from name_to_id is not in the graph
            # We'll add a row of zeros
            node_features_list.append([0.0] * len(node_attr_keys))
            continue

        # Get the node's data dictionary
        data = g.nodes[node_name]

        # Extract the features for this node
        features = [data.get(key, 0.0) for key in node_attr_keys]
        node_features_list.append(features)

    # Convert the list of lists to a tensor
    if num_nodes == 0:
        node_features = torch.zeros((0, len(node_attr_keys)), dtype=torch.float)
    else:
        node_features = torch.tensor(node_features_list, dtype=torch.float)


    # === 2. Build Edge Tensors (edge_index, edge_attr) ===

    edges = []
    edge_features = []

    # Use g.edges(data=True) to get attributes
    for u, v, data in _pbar(g.edges(data=True), "Building Edge Tensors", cfg, total=g.number_of_edges()):

        # Ensure both nodes are in the mapping
        if u in name_to_id and v in name_to_id:
            ui = int(name_to_id[u])
            vi = int(name_to_id[v])

            # Extract features for this edge
            features = [data.get(key, 0.0) for key in edge_attr_keys]

            # Add forward edge and its features
            edges.append((ui, vi))
            edge_features.append(features)

            # Add reverse edge and its features (they are the same)
            edges.append((vi, ui))
            edge_features.append(features)

    # Handle the case of an empty edge list
    if not edges:
        edge_index = torch.zeros((2, 0), dtype=torch.long)
        edge_attr = torch.zeros((0, len(edge_attr_keys)), dtype=torch.float)
    else:
        # Convert to tensors
        edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
        edge_attr = torch.tensor(edge_features, dtype=torch.float)

    # === 3. Return all tensors ===
    # (Returning in the standard (x, edge_index, edge_attr) order)
    return node_features, edge_index, edge_attr

In [None]:
import json
from torch_geometric.data import Data

edge_keys = ['connection_strength', 'distance', 'is_gabriel_edge']
node_keys = ['closeness_centrality', 'eigen_vector_centrality','nearest_intersection_degree','pct_primary_roads_in_200m','pct_service_roads_in_200m','shops_in_200m','offices_in_200m', 'restaurants_in_200m', 'bus_stops_in_200m', 'rail_stations_in_400m', 'schools_in_200m' 'leisure_sites_in_200m']
with open("../data/processed/large/processed_2d1ea856121fac16_sensor_name_to_id_map.json", "r") as f:
    name_to_id = json.load(f)

new_node_features, new_edge_index, new_edge_attr = graph_to_pyg_tensors(
    g=gabriel_graph,
    name_to_id=name_to_id,
    edge_attr_keys=edge_keys,
    node_attr_keys=node_keys
)

data = Data(
    x=new_node_features,
    edge_index=new_edge_index,
    edge_attr=new_edge_attr
)

# 5. Verify the shapes (this is a great sanity check)
print(f"Data object updated:")
print(f"edge_index shape: {data.edge_index.shape}")
print(f"edge_attr shape:  {data.edge_attr.shape}")
print(f"x shape:          {data.x.shape}")

In [None]:
torch.save(data, "../data/processed/custom_1774bbe3a0e79fba_gabriel_graph_data.pt")

In [None]:
# 2. Build the list of sensor nodes with the CORRECT IDs
sensor_nodes_list = []
for node_name, attributes in gabriel_graph.nodes(data=True):
    node_data = attributes.copy()
    # print(node_name, node_data)

    # Store the original, real-world ID (e.g., "10000") in the 'id' field.
    # This now matches the IDs used in your attention_weights.json.
    node_data["id"] = name_to_id[node_name]
    node_data["name"] = node_name
    node_data["pos"] = (node_data['lat'], node_data['lon'])

    sensor_nodes_list.append(node_data)

with open("../data/processed/large/processed_2d1ea856121fac16_sensor_nodes_list.json", "w") as f:
    json.dump(sensor_nodes_list, f)

In [None]:
sensor_nodes_list