In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch_geometric
import os
import torch
from tqdm import tqdm
import numpy as np

In [36]:
from customed_datasets.graph_datasets import HeteroGraphDataset

In [68]:
dataset = HeteroGraphDataset('datasets/GraphDatasets10perAggrExclusive')

Processing...
100%|█████████████████████████████████████| 2000/2000 [00:01<00:00, 1397.32it/s]
Done!


In [69]:
from torch.utils.data import DataLoader
from utils.graph_utils import my_hetero_collate, separate

In [70]:
len(dataset)

2000

In [71]:
train_loader = DataLoader(dataset[:1600], shuffle=True, batch_size=16, collate_fn=my_hetero_collate)
val_loader = DataLoader(dataset[1600:1800], shuffle=False, batch_size=32, collate_fn=my_hetero_collate)
test_loader = DataLoader(dataset[1800:], shuffle=False, batch_size=32, collate_fn=my_hetero_collate)

In [72]:
from models.hetero_gnn import HeteroConv, HeteroGNN, HeteroGNNHomofeatures

In [73]:
# model = HeteroGNN(feature_in_channels=128,
#                  aggr_in_channels=1,
#                  hidden_channels=128,
#                  out_channels=1,
#                  num_layers=3,
#                  feature_encode='mean').cuda()

model = HeteroGNNHomofeatures(feature_in_channels=1024,
                 aggr_in_channels=1,
                 hidden_channels=128,
                 out_channels=1,
                 num_layers=3,)

In [74]:
criterion = torch.nn.BCEWithLogitsLoss()

In [75]:
@torch.no_grad()
def validation(dataloader, model):
    corrects = 0
    counts = 0
    for i, data in enumerate(dataloader):
        # get the inputs; data is a list of [inputs, labels]
        # forward + backward + optimize
        outputs = model(data.x_dict, data.edge_index_dict)
        preds = (outputs > 0.).detach().to(torch.float)
        corrects += (preds == data.y).sum()
        counts += data.y.shape[0]
    
    return corrects / counts

In [76]:
epochs = 15

In [77]:
test_accs = []

In [78]:
runs = 5

In [79]:
for _ in range(runs):
    model.reset_parameters()
    optimizer = torch.optim.Adam(model.parameters(), lr=1.e-3)
    pbar = tqdm(range(epochs))
    for epoch in pbar:  # loop over the dataset multiple times
        losses = 0.
        counts = 0
        corrects = 0
        model.train()
        for i, data in enumerate(train_loader):
            # get the inputs; data is a list of [inputs, labels]
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(data.x_dict, data.edge_index_dict)
            loss = criterion(outputs, data.y)
            loss.backward()
            optimizer.step()

            losses += loss.item() * data.y.shape[0]
            counts += data.y.shape[0]
            preds = (outputs > 0.).detach().to(torch.float)
            corrects += (preds == data.y).sum()

        losses /= counts
        train_acc = corrects / counts
        
        model.eval()
        val_acc = validation(val_loader, model)

        pbar.set_postfix({'loss': losses, 'train_acc': train_acc, 'val_acc': val_acc})
    
    model.eval()
    test_acc = validation(test_loader, model)
    print(f'test acc: {test_acc}')
    test_accs.append(test_acc.cpu().item())

100%|█| 15/15 [00:10<00:00,  1.37it/s, loss=1e-6, train_acc=tensor(1.), val_acc=


test acc: 0.9998999834060669


100%|█| 15/15 [00:11<00:00,  1.29it/s, loss=1.33e-6, train_acc=tensor(1.), val_a


test acc: 0.9998999834060669


100%|█| 15/15 [00:10<00:00,  1.38it/s, loss=9.67e-7, train_acc=tensor(1.), val_a


test acc: 0.9998999834060669


100%|█| 15/15 [00:09<00:00,  1.53it/s, loss=1.57e-6, train_acc=tensor(1.), val_a


test acc: 0.9998999834060669


100%|█| 15/15 [00:10<00:00,  1.36it/s, loss=1.28e-6, train_acc=tensor(1.), val_a

test acc: 0.9998999834060669





In [80]:
print(f'{np.mean(test_accs)} ± {np.std(test_accs)}')

0.9998999834060669 ± 0.0
