In [47]:
# keep count of what attributes exist & how many times each is represented
from collections import defaultdict
attribute_counts = defaultdict(int)

unique_attributes = set()
for node in G.nodes(): # for nodes
    for attribute in G.nodes[node].keys():
        unique_attributes.add(attribute)
        attribute_counts[attribute] += 1
        
# for _, _, data in G.edges.data(): # for edges
#     for attribute in data.keys():
#         unique_attributes.add(attribute)
#         attribute_counts[attribute] += 1

print(unique_attributes)
for attribute, count in attribute_counts.items():
    print(attribute, count)

{'x', 'ref', 'y', 'street_count', 'highway'}
y 262086
x 262086
street_count 262086
highway 262086
ref 262086


In [50]:
import osmnx as ox
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import torch
import torch_geometric.nn as gnn
import torch_geometric.data as gdata
from torch_geometric.utils.convert import to_networkx, from_networkx 
    # https://stackoverflow.com/questions/70452465/how-to-load-in-graph-from-networkx-into-pytorch-geometric-and-set-node-features
from shapely import wkt

# read in US as graph
cf = '["highway"~"motorway|motorway_link|trunk|trunk_link|primary|primary_link|secondary|secondary_link'\
        '|tertiary|tertiary_link"]'
G_sub = ox.graph_from_place('Massachusetts, USA', simplify=True, custom_filter=cf)
# G = ox.io.load_graphml('data\MA_drive.osm')

# make sure all nodes contain the same attributes by setting missing one to None
numeric_attribute_names = ['x', 'y', 'street_count']
string_attribute_names = ['highway', 'ref']
for node in G.nodes():
    for attribute in numeric_attribute_names: 
        if not G.nodes[node].get(attribute):
            G.nodes[node][attribute] = np.nan # TODO: try with missing's set equal to 0, ''
    for attribute in string_attribute_names:
        if not G.nodes[node].get(attribute):
            G.nodes[node][attribute] = np.nan
            
# make sure all edges contain the same attributes by setting missing to None
numeric_attribute_names = ['lanes', 'width', 'maxspeed']
string_attribute_names = ['name', 'geometry', 'ref', 'access', 'bridge', 'junction', 'est_width', 'tunnel', 'service']
for u, v, data in G.edges.data():
    if 'geometry' in data:
        data['geometry'] = wkt.dumps(data['geometry']) # convert from LineString object to string representation
    for attribute in numeric_attribute_names:
        if attribute not in data:
            data[attribute] = np.nan # TODO: try with missing's set equal to 0, ''
    for attribute in string_attribute_names:
        if attribute not in data:
            data[attribute] = np.nan

# convert graph to PyTorch geometric data object
data = from_networkx(G)

In [51]:
# define GNN model
class LandfillGNN(gnn.MessagePassing):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__(aggr='add')
        self.conv1 = gnn.GCNConv(in_channels, hidden_channels)
        self.conv2 = gnn.GCNConv(hidden_channels, out_channels)
    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        return x

In [52]:
# create labeled dataset of existing landfill sites and non-landfill sites
place_name = "Massachusetts, USA"
landfills = ox.geometries.geometries_from_place(place_name, tags={"landuse": "landfill"})
non_landfills = ox.geometries.geometries_from_place(place_name, tags={"landuse": "residential"})
landfills['label'] = 1
non_landfills['label'] = 0
dataset = gdata.Data(
    x=torch.cat([torch.ones(len(landfills), 1), torch.zeros(len(non_landfills), 1)]),
    y=torch.cat([torch.ones(len(landfills)), torch.zeros(len(non_landfills))]),
    edge_index=data.edge_index,
    pos=data.pos,
)

In [53]:
dataset

Data(x=[1965, 1], edge_index=[2, 658827], y=[1965])

In [54]:
# create and train GNN model
model = LandfillGNN(in_channels=dataset.x.size(1), hidden_channels=16, out_channels=1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.BCEWithLogitsLoss()
for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    out = model(dataset.x, dataset.edge_index)
    loss = criterion(out, dataset.y.view(-1, 1))
    loss.backward()
    optimizer.step()

RuntimeError: index 14180 is out of bounds for dimension 0 with size 1965

In [None]:
# use trained GNN model to predict the likelihood of a location being a good place for a landfill site
scores = model(dataset.x, dataset.edge_index).squeeze()
predictions = torch.sigmoid(scores)
landfill_probabilities = predictions[:len(landfills)]
non_landfill_probabilities = predictions[len(landfills):]
plt.hist(landfill_probabilities.detach().numpy(), bins=20, alpha=0.5, label='Landfill')
plt.hist(non_landfill_probabilities.detach().numpy(), bins=20, alpha=0.5, label='Non-landfill')
plt.xlabel('Probability')
plt.ylabel('Count')
plt.legend()
plt.show()