HOW THE HELL DO I MAKE A GNN

In [6]:
import torch
import torch.nn as nn
from torch_geometric.nn import MessagePassing
from torch.utils.data import Dataset, DataLoader

Graph neural network

$v_i' = \phi^v (v_i, \sum_j \phi^e(v_i, v_j))$

$\phi^e$ is the edge model which creates the edge message from two nodes

$\phi^v$ is the node model which predicts the instantaneous acceleration of the recieving node from the aggregated edge messages and node information.

Both are MLPs.

The node information $x_i$ is the position $(x, y)$, velocity $(\Delta x, \Delta y)$, charge and mass.



In [2]:
def get_edge_index(num_nodes): #edge index for fully connected graph
    idx = torch.arange(num_nodes)
    edge_index = torch.cartesian_prod(idx, idx)
    edge_index = edge_index[edge_index[:, 0] != edge_index[:, 1]]
    return edge_index.t()

edges =get_edge_index(10)
edges.shape

torch.Size([2, 90])

In [16]:
def get_edge_index(num_nodes): #edge index for fully connected graph
    idx = torch.arange(num_nodes)
    edge_index = torch.cartesian_prod(idx, idx)
    edge_index = edge_index[edge_index[:, 0] != edge_index[:, 1]]
    return edge_index.t()

class NBodyGNN(MessagePassing):
    def __init__(self, node_dim = 6, acc_dim = 2, hidden_dim = 300):
        """ 
        N-body graph NN class.

        Args:
            node_dim (int): dimensionality of the node (in 2d case it is 6)
            acc_dim (int): dimensionality of the output of the network, which are accelerations so in 2d = 2
            hidden_dim (int): hidden layer dimensions 

        """
    
        super().__init__(aggr='add')
         
        #edge model MLP
        self.edge_model = nn.Sequential(
            nn.Linear(2*node_dim, hidden_dim), #inputs = node information for two nodes
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 100) #output message features of dimension 100 for standard and L1 model 
        )
        #node model MLP
        self.node_model = nn.Sequential(
            nn.Linear(node_dim + 100, hidden_dim), #inputs = sum of outputs of edge model and node features
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, acc_dim) #output = predicted acc
        )

    def message(self, x_i, x_j):
        x = torch.cat((x_i, x_j), dim = -1) #can change the dimension ? not hugely sure which one it right tbh 
        x = self.edge_model(x)
        return x

    def forward(self, x, edge_index): 
        """forward pass of this network

        Args:
            x (torch.Tensor): shape is [no_nodes, no_node_features]
            edge_index (torch.Tensor): shape is [2, no_edges]
        """

        edge_message =  self.propagate(edge_index, x = (x,x)) #use same feature matrix for both source and target nodes
        acc_pred = self.node_model(torch.cat([x, edge_message], dim = -1)) #predict accelerations

        return acc_pred



In [36]:
class NBodyDataset(Dataset):
    """
    Help organise and manage the dataset. Later used in DataLoader.
    """
    def __init__(self, data, targets):
        """
        Args:
            data (torch.Tensor): shape is [no_datapoints, no_nodes, no_node_features]
            targets (torch.Tensor): shape is [no_datapoints, no_nodes, 2d_acceleration]
        """
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.data) #how many samples in dataset to use later for batching

    def __getitem__(self, idx):
        nodes = self.data[idx]  # shape: [no_nodes, no_node_features] 
        acc = self.targets[idx]
        return nodes, acc #inputs and target variables

def train (data, num_epoch):
    input_data, acc = data
    dataset = NBodyDataset(input_data, acc)   
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

    model = NBodyGNN()
    optimiser = torch.optim.Adam(model.parameters(), weight_decay=1e-8) #L2 regulariser on params
    criterion = nn.L1Loss() #MAE loss

    edge_index = get_edge_index(input_data.shape[1]) #this never changes so we only calc once

    for epoch in range (num_epoch):
        total_loss = 0 #loss tracking
        for nodes, acc in dataloader:

            
            optimiser.zero_grad()

            acc_pred = model(nodes, edge_index) #automatically calls model.forward()
            
            loss = criterion(acc_pred, acc)

            loss.backward()
            optimiser.step()

            total_loss += loss.item()
        
        avg_loss = total_loss/len(dataloader)
        print(f'Epoch [{epoch+1}/{num_epoch}], Loss: {avg_loss:.4f}')

    return model


#TODO: Fix dataloader and train
#how do i input the things into the network what



In [27]:
def load_data(path):
    data = torch.load(f"{path}.pt")
    return data['X'], data['y']

In [38]:
data = load_data('../simulations/datasets/r1_n=3_dim=2_nt=1000_dt=0.005')
X, y = data

In [40]:
len(X[:100])

100

In [33]:
X.shape[1]

3

In [42]:
cutoff = 10000

model = train((X[:cutoff], y[:cutoff]), 10)

Epoch [1/10], Loss: 0.9816
Epoch [2/10], Loss: 0.6859
Epoch [3/10], Loss: 0.5602
Epoch [4/10], Loss: 0.5102
Epoch [5/10], Loss: 0.4263
Epoch [6/10], Loss: 0.3683
Epoch [7/10], Loss: 0.3357
Epoch [8/10], Loss: 0.3111
Epoch [9/10], Loss: 0.3375
Epoch [10/10], Loss: 0.2823
