In [1]:
## Standard libraries
import os
import json
import math
import numpy as np 
import time
## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg', 'pdf') # For export
from matplotlib.colors import to_rgb
import matplotlib
matplotlib.rcParams['lines.linewidth'] = 2.0
import seaborn as sns
sns.reset_orig()
sns.set()

## Progress bar
from tqdm.notebook import tqdm

## PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim
# Torchvision
import torchvision
from torchvision.datasets import CIFAR10
from torchvision import transforms
# PyTorch Lightning
import pytorch_lightning as pl

# torch geometric
import torch_geometric
import torch_geometric.nn as geom_nn
import torch_geometric.data as geom_data

# Path to the folder where the datasets are/should be downloaded (e.g. CIFAR10)
DATASET_PATH = "../data"
# Path to the folder where the pretrained models are saved
CHECKPOINT_PATH = "../saved_models/tutorial7"

# Setting the seed
pl.seed_everything(40)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.determinstic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print('Devices set to:', device)


from models import GATModel, GraphGNNModel, GraphLevelGNN
from layers import GATLayer, GCNLayer

  set_matplotlib_formats('svg', 'pdf') # For export
Global seed set to 40


Devices set to: cpu


In [3]:
import dataset_generator
dataset = dataset_generator.MoleculeDataset(root='dataset/', filename='BBBP.csv', target='p_np', expand=False)
train_set = dataset[:1600]
val_set = dataset[1600:]


train_set = geom_data.DataLoader(train_set, batch_size=128, shuffle=True)
val_set = geom_data.DataLoader(val_set, batch_size=128) # Additional loader if you want to change to a larger dataset
#graph_test_loader = geom_data.DataLoader(test_dataset, batch_size=1024)

Processing...
100%|██████████| 2029/2029 [00:06<00:00, 294.40it/s]
Done!


In [5]:
dataset[3].edge_attr

tensor([[1.0000, 1.0000],
        [1.0000, 1.0000],
        [1.0000, 1.0000],
        [1.0000, 1.0000],
        [1.0000, 1.0000],
        [1.0000, 1.0000],
        [1.0000, 1.0000],
        [1.0000, 1.0000],
        [1.0000, 1.0000],
        [1.0000, 1.0000],
        [1.0000, 0.0000],
        [1.0000, 0.0000],
        [1.0000, 0.0000],
        [1.0000, 0.0000],
        [1.5000, 1.0000],
        [1.5000, 1.0000],
        [1.5000, 1.0000],
        [1.5000, 1.0000],
        [1.5000, 1.0000],
        [1.5000, 1.0000],
        [1.5000, 1.0000],
        [1.5000, 1.0000],
        [1.5000, 1.0000],
        [1.5000, 1.0000],
        [1.0000, 0.0000],
        [1.0000, 0.0000],
        [1.0000, 0.0000],
        [1.0000, 0.0000],
        [1.0000, 0.0000],
        [1.0000, 0.0000],
        [1.0000, 0.0000],
        [1.0000, 0.0000],
        [1.0000, 0.0000],
        [1.0000, 0.0000],
        [1.0000, 0.0000],
        [1.0000, 0.0000],
        [2.0000, 0.0000],
        [2.0000, 0.0000],
        [1.0

In [6]:
def train_graph_classifier(model_name, **model_kwargs):
    pl.seed_everything(42)
    
    # Create a PyTorch Lightning trainer with the generation callback
    root_dir = os.path.join(CHECKPOINT_PATH, "GraphLevel" + model_name)
    os.makedirs(root_dir, exist_ok=True)
    trainer = pl.Trainer(default_root_dir=root_dir,
                         #callbacks=[ModelCheckpoint(save_weights_only=True, mode="max", monitor="val_acc")],
                         gpus=1 if str(device).startswith("cuda") else 0,
                         max_epochs=500,
                         progress_bar_refresh_rate=0)
    trainer.logger._default_hp_metric = None # Optional logging argument that we don't need

    # Check whether pretrained model exists. If yes, load it and skip training
    pretrained_filename = os.path.join(CHECKPOINT_PATH, f"GraphLevel{model_name}.ckpt")
    # if os.path.isfile(pretrained_filename):
    #     print("Found pretrained model, loading...")   
    #     model = GraphLevelGNN.load_from_checkpoint(pretrained_filename)
    # else:
    pl.seed_everything(40)
    model = GraphLevelGNN(c_in=dataset.num_node_features, 
                            c_out=1, # if dataset.num_classes==2 else tu_dataset.num_classes, 
                            **model_kwargs)
    trainer.fit(model, train_set, val_set)
    model = GraphLevelGNN.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)
    # Test best model on validation and test set
    train_result = trainer.test(model, test_dataloaders=train_set, verbose=False)
    test_result = trainer.test(model, test_dataloaders=val_set, verbose=False)
    result = {"test": test_result[0]['test_acc'], "train": train_result[0]['test_acc']}
    return model, result

In [7]:
# Training
model, result = train_graph_classifier(model_name="GraphConv", 
                                       c_hidden=256, 
                                       layer_name="GraphConv", 
                                       num_layers=3, 
                                       dp_rate_linear=0.5,
                                       dp_rate=0.0)

Global seed set to 42
  rank_zero_deprecation(
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Global seed set to 40

  | Name        | Type              | Params
--------------------------------------------------
0 | model       | GraphGNNModel     | 267 K 
1 | loss_module | BCEWithLogitsLoss | 0     
--------------------------------------------------
267 K     Trainable params
0         Non-trainable params
267 K     Total params
1.071     Total estimated model params size (MB)
  rank_zero_warn(


RuntimeError: Sizes of tensors must match except in dimension 1. Got 39 and 58 in dimension 0 (The offending index is 1)

In [None]:
print(f"Train performance: {100.0*result['train']:4.2f}%")
print(f"Test performance:  {100.0*result['test']:4.2f}%")