In [2]:
import pandas as pd
import torch
from torch_geometric.data import Data
import pandas as pd
import networkx as nx

In [None]:
%run preprocessing.ipynb

In [3]:
# Graph implementation assuming a no neighborhood sampling and no sliding window approach 
def create_graph(master_df):
    graph = nx.Graph()
    unique_priogrid_gids = master_df['priogrid_gid'].unique()

    for priogrid_gid in unique_priogrid_gids:
        node_features = master_df[master_df['priogrid_gid'] == priogrid_gid].drop(['priogrid_gid'], axis=1).to_dict('records')[0]
        graph.add_node(priogrid_gid, **node_features)

    # Add edges connecting each node to its 8 immediate neighbors
    for node in graph.nodes():
        row, col = node // 100, node % 100
        neighbors = []

        for dr in [-1, 0, 1]:
            for dc in [-1, 0, 1]:
                if dr == 0 and dc == 0:
                    continue
                neighbor_row = row + dr
                neighbor_col = col + dc
                neighbor_priogrid_gid = neighbor_row * 100 + neighbor_col
                if neighbor_priogrid_gid in graph.nodes():
                    neighbors.append(neighbor_priogrid_gid)

        for neighbor in neighbors:
            graph.add_edge(node, neighbor)

    return graph

In [6]:
# Creating a random sliding window of size 8, selecting 4 nodes prior and 4 nodes afte
def sliding_window(master_df):
    G = nx.Graph()

    for gid, group in master_df.groupby('priogrid_gid'):
        features = group[master_df.columns.drop('priogrid_gid')].to_dict('records')[0]
        G.add_node(gid, **features)

    nodes = list(G.nodes())
    for i in range(len(nodes)):
        node = nodes[i]
        neighbors = nodes[max(0, i-4):i] + nodes[i+1:i+5]
        for neighbor in neighbors:
            G.add_edge(node, neighbor)

In [11]:
import torch
import pandas as pd
from torch_geometric.data import Data

def create_graph_pyg(master_df):
    # Prepare node features
    node_features = master_df.drop(columns=['priogrid_gid']).to_numpy(dtype=float)
    node_features_tensor = torch.tensor(node_features, dtype=torch.float)
    
    # Prepare nodes and their indices
    nodes = master_df['priogrid_gid'].tolist()
    node_index = {node: idx for idx, node in enumerate(nodes)}
    
    # Prepare edge list
    edge_list = []
    for i, node in enumerate(nodes):
        # Determine neighbors based on the index; adjust for boundary conditions
        neighbors = nodes[max(0, i-4):i] + nodes[i+1:i+5]
        for neighbor in neighbors:
            if neighbor in node_index:  # Ensure neighbor exists
                edge_list.append([node_index[node], node_index[neighbor]])
    
    # Convert edge list to tensor
    edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()
    
    # Create PyTorch Geometric Data object
    data = Data(x=node_features_tensor, edge_index=edge_index)
    
    return data
# Example usage
# Assume 'master_df' is your DataFrame and has been correctly loaded with 'priogrid_gid' and other features


In [None]:
# Implements a neighborhood sampling solution for the graph, considering a randomization factor of 0.50 
import random

def create_graph_neighbouthoodSampling(master_df, sampling_rate=0.5):
    graph = nx.Graph()
    unique_priogrid_gids = master_df['priogrid_gid'].unique()

    for priogrid_gid in unique_priogrid_gids:
        node_features = master_df[master_df['priogrid_gid'] == priogrid_gid].drop(['priogrid_gid'], axis=1).to_dict('records')[0]
        graph.add_node(priogrid_gid, **node_features)
        
        row, col = priogrid_gid // 100, priogrid_gid % 100
        neighbors = []
        for dr in [-1, 0, 1]:
            for dc in [-1, 0, 1]:
                if dr == 0 and dc == 0:
                    continue
                neighbor_row = row + dr
                neighbor_col = col + dc
                neighbor_priogrid_gid = neighbor_row * 100 + neighbor_col
                if neighbor_priogrid_gid in graph.nodes():
                    neighbors.append(neighbor_priogrid_gid)

        for neighbor in neighbors:
            if random.random() <= sampling_rate:
                graph.add_edge(priogrid_gid, neighbor)

    return graph

In [15]:
import numpy as np
import torch
from torch_geometric.data import Data
from torch_geometric.utils import remove_self_loops, add_remaining_self_loops, degree

def create_graph_pyg_usable(master_df):
    # Prepare node features
    node_features = master_df.drop(columns=['priogrid_gid', 'ged_sb', 'month_id', 'date']).to_numpy(dtype=float)
    node_features_tensor = torch.tensor(node_features, dtype=torch.float)

    # Extract labels
    labels = master_df['ged_sb'].to_numpy(dtype=float)
    labels_tensor = torch.tensor(labels, dtype=torch.float)

    # Prepare nodes and their indices
    nodes = master_df['priogrid_gid'].tolist()
    node_index = {node: idx for idx, node in enumerate(nodes)}

    # Prepare edge list
    edge_list = []
    for i, node in enumerate(nodes):
        # Determine neighbors based on the index; adjust for boundary conditions
        neighbors = nodes[max(0, i-4):i] + nodes[i+1:i+5]
        for neighbor in neighbors:
            if neighbor in node_index:
                # Ensure neighbor exists
                edge_list.append([node_index[node], node_index[neighbor]])

    # Convert edge list to tensor
    edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()

    # Create PyTorch Geometric Data object
    data = Data(x=node_features_tensor, edge_index=edge_index, y=labels_tensor)

    # Normalize the adjacency matrix
    edge_index, _ = remove_self_loops(data.edge_index)
    edge_index, _ = add_remaining_self_loops(edge_index, num_nodes=data.num_nodes)
    row, col = edge_index
    deg = degree(row, data.num_nodes, dtype=data.x.dtype)
    deg_inv_sqrt = deg.pow(-0.5)
    deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
    norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
    data.norm = norm

    # Construct the adjacency matrix
    values = torch.ones(edge_index.shape[1])
    adj_matrix = torch.sparse_coo_tensor(edge_index, values, (data.num_nodes, data.num_nodes))

    return data, adj_matrix

data, adj_matrix = create_graph_pyg_usable(master_df)

In [19]:
print(data)

Data(x=[5139120, 106], edge_index=[2, 41112940], y=[5139120], norm=[46252060])


In [17]:
print(adj_matrix)

tensor(indices=tensor([[5126010, 5126010, 5126010,  ..., 5139117, 5139118,
                        5139119],
                       [5126011, 5126012, 5126013,  ..., 5139117, 5139118,
                        5139119]]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]),
       size=(5139120, 5139120), nnz=46252060, layout=torch.sparse_coo)
