In [210]:
import torch
from torch_geometric.datasets import TUDataset, Planetoid, Reddit
import numpy as np
import matplotlib.pyplot as plt

In [211]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(device)

mps


In [230]:
#Neighborhood sample size as described in experimental setup
NEIGHBORHOOD_SAMPLE_SIZE = [25, 10]
#Depth of neighborhood sampling and aggregation
SEARCH_DEPTH = 2
EPOCHS = 100

In [213]:
#return dataset and data of input dataset name
def get_data(dataset_name):
    if(dataset_name == 'proteins'):
        dataset = TUDataset(root="data", name="PROTEINS", use_node_attr=True)
        data = dataset[0]
        #TODO: correct mask implementation
        masks = {'train' : torch.tensor([True for i in range(len(data))]),
                 'val' : torch.tensor([True for i in range(len(data))]),
                 'test' : torch.tensor([True for i in range(len(data))])}
    elif(dataset_name == 'pubmed'):
        dataset = Planetoid(root="data", name="Pubmed")
        data = dataset[0]
        masks = {'train' : data.train_mask,
                 'val' : data.val_mask,
                 'test' : data.test_mask}
    else:
        print("No")

    return dataset, data, masks

In [214]:
from torch_geometric.loader import NeighborLoader
#implement Mini Batching with neighborhood sampling
def neighborhood(data, input_nodes, batch_size):
    return NeighborLoader(data, NEIGHBORHOOD_SAMPLE_SIZE, input_nodes=input_nodes, batch_size=batch_size, shuffle=True)
#val_loader = NeighborLoader(data, NEIGHBORHOOD_SAMPLE_SIZE, input_nodes=pubmed_data.val_mask, shuffle=True)

In [215]:
from torch_geometric.nn import SAGEConv
import torch.nn.functional as F
from torch.nn import Dropout, Linear

class SAGE(torch.nn.Module):
    """ 
    GraphSAGE model
    Params:
        in_channels feature size of each input sample
        hidden_channels feature size of each hidden sample
        out_channel feature size of each output sample
        num_layers =K number of message passing layers
        aggregator_type (mean, pool, lstm)
    """
    def __init__(self, in_channels, hidden_channels, out_channels, aggregator_type="mean"):
        super().__init__()
        #SAGEConv layers
        self.sage_in = SAGEConv(in_channels, hidden_channels, aggregator_type)
        self.sage_out = SAGEConv(hidden_channels, out_channels, aggregator_type)
        #Use Adam optimaizer vgl Experimental Setup
        self.optimizer = torch.optim.Adam(self.parameters())
        #Cross entropy loss for supervised learning TODO: unsupervised variant
        self.loss_fn = F.cross_entropy
    
    """
    Forward Propagation
    Params:
        x input vector 
        edge_index adjacency matrix 
    """
    def forward(self, x, edge_index):
        h = self.sage_in(x, edge_index)
        h = h.relu_()
        h = self.sage_out(h, edge_index)
        return h

In [244]:
def train(model, neighborhood, masks):
    model.train()
    #Use optimizer and lossfunktion of model
    optimizer = model.optimizer
    criterion = model.loss_fn

    total_loss = total_correct = 0
    for batch in neighborhood:
        #Zero weights before calculating gradients
        optimizer.zero_grad()
        output = model(batch.x, batch.edge_index)
        label = batch.y
        #loss = criterion(output[batch.train_mask], label.squeeze(0)[batch.train_mask])
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        total_correct += torch.sum(output.argmax(dim=-1, keepdim=True) == label).item() / len(label)
    
    loss = total_loss / len(neighborhood)
    acc = total_correct / len(neighborhood)
    return loss, acc

@torch.no_grad()
def evaluate(model, data, mask):
    model.eval()
    criterion = model.loss_fn
    output = model(data.x, data.edge_index)
    label = data.y
    loss = criterion(output[mask], label[mask])
    total_loss = loss.item()
    total_acc = torch.sum(output.argmax(dim=-1, keepdim=True) == label).item() * 1.0 / len(label)
    return total_loss, total_acc


In [245]:
dataset, data, masks = get_data('pubmed')

model = SAGE(dataset.num_features, 250 ,dataset.num_classes)
print(model)
train_loader = neighborhood(data, masks['train'], 20)
loss = np.empty(EPOCHS)
new_loss = 0

print("Training...")
for epoch in range(EPOCHS):
    loss, acc = train(model, train_loader, masks)
    print(f'Epoch {epoch:02d}, Loss: {loss:.4f}, Approx. Train: {acc:.4f}')


SAGE(
  (sage_in): SAGEConv(500, 250, aggr=mean)
  (sage_out): SAGEConv(250, 3, aggr=mean)
)
Training...
Epoch 00, Loss: 1.0977, Approx. Train: 174.4167
Epoch 01, Loss: 1.0888, Approx. Train: 190.5107
Epoch 02, Loss: 1.0795, Approx. Train: 203.0446
Epoch 03, Loss: 1.0696, Approx. Train: 207.9081
Epoch 04, Loss: 1.0579, Approx. Train: 192.9501
Epoch 05, Loss: 1.0428, Approx. Train: 209.5797
Epoch 06, Loss: 1.0283, Approx. Train: 194.5911
Epoch 07, Loss: 1.0074, Approx. Train: 193.3956
Epoch 08, Loss: 0.9843, Approx. Train: 208.4360
Epoch 09, Loss: 0.9603, Approx. Train: 205.9044
Epoch 10, Loss: 0.9384, Approx. Train: 194.8568
Epoch 11, Loss: 0.9100, Approx. Train: 217.1437
Epoch 12, Loss: 0.8833, Approx. Train: 215.6708
Epoch 13, Loss: 0.8640, Approx. Train: 207.6046
Epoch 14, Loss: 0.8306, Approx. Train: 213.5154
Epoch 15, Loss: 0.8034, Approx. Train: 195.0369
Epoch 16, Loss: 0.7813, Approx. Train: 213.5329
Epoch 17, Loss: 0.7481, Approx. Train: 205.5117
Epoch 18, Loss: 0.7210, Approx.