## Baseline GCN testing
Notebook to create and evaluate GCN against EBC on predicting number of passing bicyclists in *copenhagen?*
- Preprocess EBC for graph DONE
- Assign Metrics from data
- Create Torch Graph
- Evaluate against SOTA

In [None]:
import torch
from torch_geometric.data import Data
import torch_geometric as tg
import osmnx as ox
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
import folium
from folium import plugins
from folium.plugins import HeatMap
from shapely.geometry import Point, LineString, Polygon
import shapely
import momepy as mp 
import esda
import seaborn as sns
from shapely.strtree import STRtree
sns.set_theme()


In [None]:

lat, lon = 55.6867243, 12.5700724

def get_city_graph(lat, lon, dist, features = ['amenity', 'shop', 'building']):
    g = ox.graph_from_point((lat, lon), dist=dist, network_type='bike', simplify=True, retain_all=False)
    feat_dict = {i : True for i in features}
    amenities = ox.features.features_from_point((lat, lon), tags=feat_dict, dist=dist)
    amenities = amenities[amenities.geometry.notnull()]
    for feat in features:
        amenities['amenity'].fillna(amenities[feat], inplace=True)
        print(amenities['amenity'].isnull().sum())
    amenities = amenities[amenities['amenity'].notnull()]
    # amenities = amenities.to_crs(epsg=3857)
    gdf = mp.nx_to_gdf(g, points=False, lines=True, spatial_weights=True).to_crs(epsg=3857)
    ### remove rows with geometry == None
    gdf = gdf[gdf.geometry.notnull()]
    gdf = gdf.reset_index(drop=True)
    return g, gdf, amenities

features = ['amenity', 'shop', 'building',
            'aerialway', 'aeroway', 'barrier', 'boundary', 'craft', 'emergency', 'geological', 'highway', 'historic',
            'landuse', 'leisure', 'healthcare', 'military', 'natural', 'office', 'power', 'public_transport', 'railway',
            'place', 'service', 'tourism', 'waterway', 'route', 'water']

g, gdf, amenities = get_city_graph(lat, lon, 10000, features = features)

### plot the graph
fig, ax = ox.plot_graph(g, node_size=0, edge_linewidth=0.5, show=False, close=False)
gdf.plot(ax=ax, linewidth=1, edgecolor='black')

import nx_parallel as nxp
import networkx as nx

# enabling networkx's config for nx-parallel
nx.config.backends.parallel.active = True

# setting `n_jobs` (by default, `n_jobs=None`)
nx.config.backends.parallel.n_jobs = 4

### carry weights over to line graph
H = nx.line_graph(g)
H.add_nodes_from((node, g.edges[node]) for node in H)   

for s, t, v in H.edges:
    H.edges[(s, t, v)]['weight'] = g.edges[s]['length'] + g.edges[t]['length']


### EBC Calculation

In [None]:
import tqdm
def calc_bc(shortest_paths, graph):
    bc = {i : 0 for i in graph.nodes}
    for node in tqdm.tqdm(graph.nodes):
        for other_node in shortest_paths[node].keys():
            path = set(shortest_paths[node][other_node])
            for node_visited in path:
                bc[node_visited] += 1
    for node in bc.keys():
        bc[node] /= graph.number_of_nodes()**2
    return bc

ebc = dict(nxp.all_pairs_dijkstra_path(H, weight='weight',
                                        cutoff=1000,
                                        ))
bc = calc_bc(ebc, H)
# bc = {k: v for k, v in sorted(bc.items(), key=lambda item: item[1], reverse=True)}


In [None]:
### color edges in g by bc
bc2 = {}
for x, y, z in bc:
    bc2[(x, y)] = bc[(x, y, z)]


In [None]:
nodes, edges, _ = mp.nx_to_gdf(g, points=True, lines=True, spatial_weights=True)

nodes.to_file('../data/g_nodes.gpkg', driver='GPKG')
edges.to_file('../data/g_edges.gpkg', driver='GPKG')


In [None]:
# slope_df = gpd.read_file('../data/graphs/edge_slopes.gpkg', layer='Edges average slope')

# g_linestrings = [t['geometry'] for s, v, t in H.nodes(data=True)]

# for max_slope, linestring in zip(slope_df.max_slope ,slope_df.geometry):
#     if linestring in g_linestrings:
#         slope = slope_df[slope_df.geometry == linestring].slope.values[0]
        

In [None]:
for (s, t), value in bc2.items():
    for i in range(len(g[s][t])):
        g[s][t][i]['bc'] = value


### Counter data assignment

# TODO

In [None]:
nodes, edges = mp.nx_to_gdf(g)

import geopandas as gpd 
import pandas as pd 

filepath = '/Users/christianrasmussen/Documents/thesis/bike-flow-estimate/data/raw/trafiktaelling.json'

gdf = gpd.GeoDataFrame.from_file(filepath)
gdf.set_crs(epsg=4326, inplace=True)
gdf['geometry'] = gdf['geometry']

### export only relevant columns
gdf_new = gdf[['id', 'vejnavn', 'geometry', 'aadt_cykler']]

### remove null values on aadt_cykler
gdf_new = gdf_new[gdf_new['aadt_cykler'].notnull()]


In [None]:
from shapely.geometry import Point

linestrings = [i[2]['geometry'] if 'geometry' in i[2] else None for i in list(g.edges(data=True))]
from_node = [i[0] for i in list(g.edges(data=True))]
to_node = [i[1] for i in list(g.edges(data=True))]

def find_nearest_edge(linestrings, point, from_node, to_node):
    # Initialize variables to find the closest edge
    shortest_distance = float('inf')
    closest_edge = None
    node_pair = None
    for linestring, n1, n2 in zip(linestrings, from_node, to_node): 
        if linestring is not None:
            # Calculate the distance between the point and the edge
            distance = linestring.distance(point)
            if distance < shortest_distance:
                shortest_distance = distance
                closest_edge = linestring
                node_pair = (n1, n2)
    return closest_edge, shortest_distance, node_pair


In [None]:
g2 = g.copy()
import tqdm
for i, row in tqdm.tqdm(gdf_new.iterrows(), total=len(gdf_new)):
    point = row['geometry']
    closest_edge, shortest_distance, node_pair = find_nearest_edge(linestrings, point, from_node, to_node)
    try:
        if not 'aadt' in g2[node_pair[0]][node_pair[1]][0].keys():
            g2[node_pair[0]][node_pair[1]][0]['aadt'] = row['aadt_cykler']
            g2[node_pair[0]][node_pair[1]][0]['aadt_dist'] = shortest_distance
        elif g2[node_pair[0]][node_pair[1]][0]['aadt_dist'] > shortest_distance:
            print('IT HAPPENED!!!')
            g2[node_pair[0]][node_pair[1]][0]['aadt'] = row['aadt_cykler']
            g2[node_pair[0]][node_pair[1]][0]['aadt_dist'] = shortest_distance
    except:
        print(node_pair)
        print(row['aadt_cykler'])
        print(i)


In [None]:
for s, t, value in g2.edges(data=True):
    if 'aadt' not in value.keys():
        value['aadt'] = 0


In [None]:
import pickle
with open('../data/graphs/graph_nx.pkl', 'wb') as f:
    pickle.dump(g2, f)


### Creating Torch Graph from **edgelist**

To be used as we convert graphs with calculated betweenness centralities and run our GCN over them. <br>
**TODO: Functionize**

In [None]:
import pickle
with open('../data/graphs/graph_nx.pkl', 'rb') as f:
    g2 = pickle.load(f)


In [None]:
edge_list = []
for s, t, v in list(g2.edges(data=True)):
    edge_list.append(((s, t), (v['bc'], int(v['aadt']))))

# Step 1: Create node mapping (string -> integer)
node_to_idx = {}
for (src, tgt), _ in edge_list:
    if src not in node_to_idx:
        node_to_idx[src] = len(node_to_idx)
    if tgt not in node_to_idx:
        node_to_idx[tgt] = len(node_to_idx)

# Step 2: Extract edge index and features
edge_index = []
edge_attr = []

for (src, tgt), feature in edge_list:
    edge_index.append([node_to_idx[src], node_to_idx[tgt]])
    edge_attr.append(feature)

# Convert to torch tensors
edge_index = torch.tensor(edge_index, dtype=torch.long).t()  # Shape: [2, num_edges]
edge_attr = torch.tensor(edge_attr, dtype=torch.float)  # Shape: [num_edges, feature_dim]

# Create graph object
graph = Data()
graph['node'] = torch.arange(len(node_to_idx))  # Add arbitrary node features
graph.edge_index = edge_index
graph.edge_attr = edge_attr[:, 0].unsqueeze(1)  # Use edge attribute as feature
graph.edge_label = edge_attr[:, 1].unsqueeze(1)  # Use edge attribute as label

# Print output
print(graph)


In [None]:
with open('../data/graphs/graph_tg.pkl', 'wb') as f:
    pickle.dump(graph, f)


In [None]:
import pickle 

with open('../data/graphs/graph_nx.pkl', 'wb') as f:
    pickle.dump(g, f)


### Creating torch-geometric GCN

In [None]:
### carry weights over to line graph
H = nx.line_graph(g2)
H.add_nodes_from((node, g2.edges[node]) for node in H)
for s, t, v in H.edges:
    H.edges[(s, t, v)]['weight'] = g2.edges[s]['length'] + g2.edges[t]['length']


In [None]:
# amenities = amenities.reset_index()
nodes = list((node, linestring) for node, linestring in H.nodes(data='geometry'))
nodes = [node for node in nodes if node[1] is not None]
linestrings = [linestring for node, linestring in nodes]
nodes = [node for node, linestring in nodes]
assert len(nodes) == len(linestrings)
amenities['geometry'] = amenities['geometry'].apply(lambda x: x.centroid if x.geom_type == 'Polygon' else x)
tree = STRtree(linestrings)
for geom, amenity in zip(amenities['geometry'], amenities['amenity']):
    nearest = tree.nearest(geom)
    nearest = nodes[nearest]
    if 'amenity' not in H.nodes[nearest]:
        H.nodes[nearest]['amenity'] = [amenity]
    else:
        H.nodes[nearest]['amenity'].append(amenity)

from collections import Counter
for i in H.nodes(data=True):
    if 'amenity' in i[1]:
        amenity_counts = Counter(i[1]['amenity'])
        for key in amenity_counts:
            H.nodes[i[0]][key] = amenity_counts[key]
        ## drop the amenity key
        H.nodes[i[0]].pop('amenity', None)


In [None]:
with open('../data/graphs/linegraph_nx.pkl', 'wb') as f:
    g = pickle.dump(H, f)


In [None]:
for node in H.nodes(data=True):
    node[1].pop('geometry', None)
    node[1].pop('osmid', None)
    node[1].pop('name', None)
    node[1].pop('highway', None)
    node[1].pop('ref', None)
    node[1].pop('aadt_dist', None)
    for key in list(node[1].keys()):
        if type(node[1][key]) not in (int, float):
            try:
                node[1][key] = float(node[1][key])
            except:
                node[1].pop(key, None)


In [None]:
all_feats = []
for node in H.nodes(data=True):
    for key in node[1].keys():
        if key not in all_feats:
            all_feats.append(key)


In [None]:
for node in H.nodes(data=True):
    for feat in all_feats:
        if feat not in node[1].keys():
            node[1][feat] = 0


In [None]:
node_list, x, y = [], [], []
for node, feats in list(H.nodes(data=True)):
    node_list.append(node)
    x.append([feats[feat] for feat in all_feats if feat != 'aadt'])
    y.append(feats['aadt'])

node_idx = {node : idx for idx, node in enumerate(node_list)}
edge_index = []
for s, t, _ in list(H.edges):
    edge_index.append([node_idx[s], node_idx[t]])

edge_index = torch.tensor(edge_index, dtype=torch.long).t()
x = torch.tensor(x, dtype=torch.float)
y = torch.tensor(y, dtype=torch.float)

linegraph = Data()
linegraph.num_nodes = len(node_list)
# linegraph['node'] = torch.arange(len(node_list))
linegraph.x = x
linegraph.y = y
linegraph.edge_index = edge_index

# with open('../data/graphs/linegraph_tg.pkl', 'wb') as f:
    # pickle.dump(linegraph, f)


In [None]:
linegraph.x.shape


In [None]:
print(list(H.nodes(data=True))[0])


In [None]:
print(linegraph.x[0].squeeze(0))
