In [1]:
import pandas as pd
import torch
from torch_geometric.data import Data
from torch_geometric.utils import remove_self_loops, add_remaining_self_loops, degree
from torch_geometric.nn import GCNConv

In [2]:
# Graph implementation without neighbourhood sampling and sliding window
def create_graph_pyg(master_df):
    node_features = master_df.drop(columns=['priogrid_gid', 'ged_sb', 'month_id', 'date']).to_numpy(dtype=float)
    node_features_tensor = torch.tensor(node_features, dtype=torch.float)

    labels = master_df['ged_sb'].to_numpy(dtype=float)
    labels_tensor = torch.tensor(labels, dtype=torch.float)

    nodes = master_df['priogrid_gid'].tolist()
    node_index = {node: idx for idx, node in enumerate(nodes)}

    num_nodes = len(nodes)
    edge_list = []
    for i in range(num_nodes):
        for j in range(i+1, num_nodes):
            edge_list.append([i, j])
            edge_list.append([j, i])

    edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()
    node_features_tensor = node_features_tensor
    edge_index = edge_index
    labels_tensor = labels_tensor

    data = Data(x=node_features_tensor, edge_index=edge_index, y=labels_tensor)

    return data

In [3]:
# Graph implementation with neighbourhood sampling
def create_graph_pyg_ns(master_df, num_neighbors=4, hidden_channels=64):
    node_features = master_df.drop(columns=['priogrid_gid', 'ged_sb', 'date']).to_numpy(dtype=float)
    node_features_tensor = torch.tensor(node_features, dtype=torch.float)

    labels = master_df['ged_sb'].to_numpy(dtype=float)
    labels_tensor = torch.tensor(labels, dtype=torch.float)

    nodes = master_df['priogrid_gid'].tolist()
    node_index = {node: idx for idx, node in enumerate(nodes)}

    edge_list = []
    for i, node in enumerate(nodes):
        neighbors = nodes[max(0, i-num_neighbors):i] + nodes[i+1:i+num_neighbors+1]
        for neighbor in neighbors:
            if neighbor in node_index:
                edge_list.append([node_index[node], node_index[neighbor]])

    edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()

    data = Data(x=node_features_tensor, edge_index=edge_index, y=labels_tensor)

    edge_index, _ = remove_self_loops(data.edge_index)
    edge_index, _ = add_remaining_self_loops(edge_index, num_nodes=data.num_nodes)
    row, col = edge_index
    deg = degree(row, data.num_nodes, dtype=data.x.dtype)
    deg_inv_sqrt = deg.pow(-0.5)
    deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
    norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
    data.norm = norm

    values = torch.ones(edge_index.shape[1])
    adj_matrix = torch.sparse_coo_tensor(edge_index, values, (data.num_nodes, data.num_nodes))

    conv_layer = GCNConv(node_features.shape[1], hidden_channels)

    return data, adj_matrix, conv_layer

In [4]:
# Graph implementation with sliding window
def create_graph_pyg_sw(master_df, hidden_channels=64):
    node_features = master_df.drop(columns=['priogrid_gid', 'ged_sb', 'date']).to_numpy(dtype=float)
    node_features_tensor = torch.tensor(node_features, dtype=torch.float)

    labels = master_df['ged_sb'].to_numpy(dtype=float)
    labels_tensor = torch.tensor(labels, dtype=torch.float)

    nodes = master_df['priogrid_gid'].tolist()
    node_index = {node: idx for idx, node in enumerate(nodes)}

    edge_list = []
    for i, node in enumerate(nodes):
        neighbors = nodes[max(0, i-4):i] + nodes[i+1:min(len(nodes),i+5)]
        for neighbor in neighbors:
            if neighbor in node_index:
                edge_list.append([node_index[node], node_index[neighbor]])

    edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()

    data = Data(x=node_features_tensor, edge_index=edge_index, y=labels_tensor)

    edge_index, _ = remove_self_loops(data.edge_index)
    edge_index, _ = add_remaining_self_loops(edge_index, num_nodes=data.num_nodes)
    row, col = edge_index
    deg = degree(row, data.num_nodes, dtype=data.x.dtype)
    deg_inv_sqrt = deg.pow(-0.5)
    deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
    norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
    data.norm = norm

    values = torch.ones(edge_index.shape[1])
    adj_matrix = torch.sparse_coo_tensor(edge_index, values, (data.num_nodes, data.num_nodes))
    conv_layer = GCNConv(node_features.shape[1], hidden_channels)

    return data, adj_matrix, conv_layer