In [27]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from torch_geometric.datasets import Planetoid
import numpy as np
import matplotlib.pyplot as plt

In [48]:
S_1 = 25
S_2 = 10
K = 2

Download Dataset and create mini-batching of training data

In [50]:
#Download PROTEINS dataset and save in data
dataset = Planetoid(root="data", name="CiteSeer")
data = dataset[0]

feat_data = {'train' : data.x[data.train_mask],
        'val' : data.x[data.val_mask],
        'test' : data.x[data.test_mask]}
label = {'train' : data.y[data.train_mask],
        'val' : data.y[data.val_mask],
        'test' : data.y[data.test_mask]}
#mini-batching of training data
train_batches = DataLoader(TensorDataset(feat_data['train'], label['train']), batch_size=4)


{'x': tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 'edge_index': tensor([[   0,    1,    1,  ..., 3324, 3325, 3326],
         [ 628,  158,  486,  ..., 2820, 1643,   33]]),
 'y': tensor([3, 1, 5,  ..., 3, 1, 5]),
 'train_mask': tensor([ True,  True,  True,  ..., False, False, False]),
 'val_mask': tensor([False, False, False,  ..., False, False, False]),
 'test_mask': tensor([False, False, False,  ...,  True,  True,  True])}

In [29]:
#Print information about the dataset
print(f'Dataset: {dataset}')
print('-------------------')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

# Print information about the graph
print(f'\nGraph:')
print('------')
print(f'Training nodes: {sum(data.train_mask).item()}')
#print(f'Training nodes: {next(iter(train_batches))[1].shape}')
print(f'Evaluation nodes: {sum(data.val_mask).item()}')
#print(f'Evaluation nodes: {len(label["val"])}')
print(f'Testing nodes: {sum(data.test_mask).item()}')
#print(f'Testing nodes: {len(label["test"])}')


Dataset: CiteSeer()
-------------------
Number of graphs: 1
Number of nodes: 3327
Number of features: 3703
Number of classes: 6

Graph:
------
Training nodes: 120
Evaluation nodes: 500
Testing nodes: 1000


Neighborhood Sampler

In [49]:
from torch_geometric.loader import NeighborLoader
neigborhood = NeighborLoader(data, (S_1, S_2), data.train_mask)

Aggregator function

In [None]:
class MaxPoolingAggregator(torch.nn.Module):
    """ 
    AGGREGATOR: Max Pooling
    Params:
        in_channels feature size of each input sample
        out_channel feature size of each output sample
    """
    def __init__(self, in_channels, out_channels):
        super(MaxPoolingAggregator, self).__init__()
        #fully connected layer with learnable weights
        self.fc_layer = torch.nn.Linear(in_channels, out_channels, bias=True)
        #non-linearity -> ReLu
        self.non_lin = torch.nn.ReLU()
    """
    Forward Propagation
    Params:
        neighborhood neigborhood sample of imput node to be aggregated
    feed neighborhood of node through fully connected layer and non linearity
    return maximum of all neighbors
    """
    def forward(self, neigborhood):
        out = np.array([])
        for h in neigborhood:
            h = self.fc_layer(h)
            h = self.non_lin
            np.append(out, h)
        return np.max(out)