#Installation

In [None]:
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-cluster -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git
!pip install rdkit

2.3.0+cu121
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting rdkit
  Downloading rdkit-2023.9.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.9/34.9 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: rdkit
Successfully installed rdkit-2023.9.6


# PyG Data Structure

In [None]:
import torch
from tqdm import tqdm
from torch_geometric.data import Data, DataLoader
from torch_geometric.datasets import QM9 # We will use the QM9 dataset for this example

class QM9Transform:
    def __call__(self, data):
        # Select target.
        data.y = data.y[:, 0]
        return data


path = './QM9'

dataset = QM9(path, transform=QM9Transform()).shuffle()

# Normalize targets to mean = 0 and std = 1.
mean = dataset.data.y.mean(dim=0, keepdim=True)
std = dataset.data.y.std(dim=0, keepdim=True)
dataset.data.y = (dataset.data.y - mean) / std
mean, std = mean[:, 0].item(), std[:, 0].item()

# Split datasets.
test_dataset = dataset[:10000]
val_dataset = dataset[10000:20000]
train_dataset = dataset[20000:]
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

dataset



QM9(130831)

In [None]:
# see what is inside
sample_data = dataset[0]
sample_data

Data(x=[17, 11], edge_index=[2, 36], edge_attr=[36, 4], y=[1], pos=[17, 3], idx=[1], name='gdb_81404', z=[17])

# Build the first GCN

In [None]:
from torch.nn import Dropout1d
from torch_geometric.nn import GCNConv
from torch_scatter import scatter

# Let's say I want to build a 2-layer GCN with 0.3 dropout and relu activation

class Net(torch.nn.Module):

  def __init__(self,
               input_dim:int=32,
               hidden_dim:int=32,
               output_dim:int=32,
               *args,
               **kwargs) -> None:
     super().__init__(*args, **kwargs)
     
     # fill in the convolutional layers, check the documentation for GCNConv: https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.nn.conv.GCNConv.html
     # self.conv1 = 
     # self.conv2 = 
     self.dropout = Dropout1d(p=0.3)
     self.activation = torch.nn.ReLU()

     self.input_dim = input_dim

  def forward(self,data):
    x = data.x
    edge_index = data.edge_index

    assert x.size(-1) == self.input_dim

    # x = 
    x = self.activation(x)
    # x = 
    x = self.dropout(x)
    # graph level readout, check the documentation for scatter: https://pytorch-scatter.readthedocs.io/en/latest/functions/scatter.html
    # x = 

    return x


In [None]:
# test our sample network
sample_data.batch = torch.zeros(sample_data.pos.size(0)).long()
model = Net(11,64,1)
model(sample_data)

tensor([[20.4077]], grad_fn=<ScatterAddBackward0>)

# Training loop

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net = Net(11,64,1).to(device)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                       factor=0.7, patience=5,
                                                       min_lr=0.00001)

In [None]:
from torch.cuda.amp import autocast, GradScaler

scaler = GradScaler()

# define the optimizer and loss function
from torch.optim import Adam
from torch.nn import L1Loss

optimizer = Adam(net.parameters(), lr=1e-3)
criterion = L1Loss()

# define the training loop

def train():
    net.train()

    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()

        with autocast():
            out = net(data)
            loss = criterion(out.view(-1), data.y.view(-1))

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

    return total_loss / len(train_loader)

@torch.no_grad()
def test(loader):
    net.eval()

    total_loss = 0
    for data in loader:
        data = data.to(device)
        with autocast():
            out = net(data)
            total_loss += criterion(out.view(-1), data.y.view(-1)).item()

    return total_loss / len(loader)

In [None]:
best_val_error = None
for epoch in range(1, 21):
    lr = scheduler.optimizer.param_groups[0]['lr']
    loss = train()
    val_error = test(val_loader)
    scheduler.step(val_error)

    if best_val_error is None or val_error <= best_val_error:
        test_error = test(test_loader)
        best_val_error = val_error

    print(f'Epoch: {epoch:03d}, LR: {lr:7f}, Loss: {loss:.7f}, '
          f'Val MAE: {val_error:.7f}, Test MAE: {test_error:.7f}')

Epoch: 001, LR: 0.000500, Loss: 0.8198288, Val MAE: 0.6740510, Test MAE: 0.6729905
Epoch: 002, LR: 0.000500, Loss: 0.6693683, Val MAE: 0.6442012, Test MAE: 0.6453697
Epoch: 003, LR: 0.000500, Loss: 0.6556587, Val MAE: 0.6334624, Test MAE: 0.6371701
Epoch: 004, LR: 0.000500, Loss: 0.6490098, Val MAE: 0.6322935, Test MAE: 0.6353899
Epoch: 005, LR: 0.000500, Loss: 0.6485257, Val MAE: 0.6197366, Test MAE: 0.6237359
Epoch: 006, LR: 0.000500, Loss: 0.6407827, Val MAE: 0.6221578, Test MAE: 0.6237359
Epoch: 007, LR: 0.000500, Loss: 0.6374820, Val MAE: 0.6067372, Test MAE: 0.6110349
Epoch: 008, LR: 0.000500, Loss: 0.6319617, Val MAE: 0.6083696, Test MAE: 0.6110349
Epoch: 009, LR: 0.000500, Loss: 0.6333933, Val MAE: 0.6054017, Test MAE: 0.6089518
Epoch: 010, LR: 0.000500, Loss: 0.6292399, Val MAE: 0.6648393, Test MAE: 0.6089518
Epoch: 011, LR: 0.000500, Loss: 0.6257595, Val MAE: 0.5955086, Test MAE: 0.5982262
Epoch: 012, LR: 0.000500, Loss: 0.6271705, Val MAE: 0.6339588, Test MAE: 0.5982262
Epoc

# Using Positional Encoding from Graph Lap.

In [None]:
import torch
from tqdm import tqdm
import numpy as np
from torch_geometric.data import Data, DataLoader
from torch_geometric.datasets import QM9
from torch_geometric.utils import (
    get_laplacian,
    to_scipy_sparse_matrix,
)
from typing import Any, Optional


def add_node_attr(
    data: Data,
    value: Any,
    attr_name: Optional[str] = None,
) -> Data:
    if attr_name is None:
        if data.x is not None:
            x = data.x.view(-1, 1) if data.x.dim() == 1 else data.x
            data.x = torch.cat([x, value.to(x.device, x.dtype)], dim=-1)
        else:
            data.x = value
    else:
        data[attr_name] = value

    return data

class QM9Transform:
    def __call__(self, data):
        # Select target.
        data.y = data.y[:, 0]
        return data

num_lap_vecs = 16

class LapTransform:
    def __call__(self,data):

        num_nodes = data.num_nodes
        edge_index, edge_weight = get_laplacian(
        data.edge_index,
        data.edge_weight,
        normalization='sym',
        num_nodes=num_nodes,
    )
        L = to_scipy_sparse_matrix(edge_index, edge_weight, num_nodes)
        # eigen decomposition

        # eig_vals, eig_vecs =

        # sort the eigenvectors by eigenvalues
        
        # eig_vecs = np.real()

        # pe = torch.from_numpy(eig_vecs[:, 1:num_lap_vecs + 1])

        if pe.shape[1] < num_lap_vecs:
          pe = torch.nn.functional.pad(pe, (0, num_lap_vecs - pe.shape[1]), value=float(0))

        data = add_node_attr(data, pe, attr_name='pe')
        return data


path = './QM9_lap'
dataset = QM9(path, transform=QM9Transform(),pre_transform=LapTransform()).shuffle()

# Normalize targets to mean = 0 and std = 1.
mean = dataset.data.y.mean(dim=0, keepdim=True)
std = dataset.data.y.std(dim=0, keepdim=True)
dataset.data.y = (dataset.data.y - mean) / std
mean, std = mean[:, 0].item(), std[:, 0].item()

# Split datasets.
test_dataset = dataset[:10000]
val_dataset = dataset[10000:20000]
train_dataset = dataset[20000:]
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

dataset

Processing...

  0%|          | 0/10000 [00:00<?, ?it/s][A
Processing train dataset:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing train dataset:   1%|          | 102/10000 [00:00<00:09, 1012.75it/s][A
Processing train dataset:   2%|▏         | 206/10000 [00:00<00:09, 1026.53it/s][A
Processing train dataset:   3%|▎         | 309/10000 [00:00<00:10, 900.39it/s] [A
Processing train dataset:   4%|▍         | 401/10000 [00:00<00:11, 839.06it/s][A
Processing train dataset:   5%|▍         | 487/10000 [00:00<00:11, 816.45it/s][A
Processing train dataset:   6%|▌         | 573/10000 [00:00<00:11, 828.81it/s][A
Processing train dataset:   7%|▋         | 657/10000 [00:00<00:11, 827.34it/s][A
Processing train dataset:   7%|▋         | 748/10000 [00:00<00:10, 851.20it/s][A
Processing train dataset:   8%|▊         | 844/10000 [00:00<00:10, 882.93it/s][A
Processing train dataset:   9%|▉         | 936/10000 [00:01<00:10, 892.51it/s][A
Processing train dataset:  10%|█         | 102

QM9(130831)

In [30]:
dataset[0]

Data(x=[17, 11], edge_index=[2, 34], edge_attr=[34, 4], y=[1], pos=[17, 3], z=[17], smiles='[H]ONC1=C([H])[C@@H](N([H])[H])[C@]([H])(O[H])C1([H])[H]', name='gdb_22348', idx=[1], pe=[17, 16])

Data(x=[17, 11], edge_index=[2, 34], edge_attr=[34, 4], y=[1], pos=[17, 3], z=[17], smiles='[H]ONC1=C([H])[C@@H](N([H])[H])[C@]([H])(O[H])C1([H])[H]', name='gdb_22348', idx=[1], pe=[17, 16])

In [33]:
from torch.nn import Dropout1d
from torch_geometric.nn import GCNConv
from torch_scatter import scatter


class Net(torch.nn.Module):

  def __init__(self,
               input_dim:int=32,
               hidden_dim:int=32,
               output_dim:int=32,
               *args,
               **kwargs) -> None:
     super().__init__(*args, **kwargs)

    #  self.conv1 = 
    #  self.conv2 = 
     self.dropout = Dropout1d(p=0.3)
     self.activation = torch.nn.ReLU()


  def forward(self,data):
    x = data.x
    edge_index = data.edge_index
    pe = data.pe

    # how to use the laplacian eigenvectors?

    # after using the lap eigenvectors, let's use the same GCN as before

    return x

In [36]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net = Net(11+16,64,1).to(device)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                       factor=0.7, patience=5,
                                                       min_lr=0.00001)

In [37]:
from torch.cuda.amp import autocast, GradScaler

scaler = GradScaler()

from torch.optim import Adam
from torch.nn import L1Loss

optimizer = Adam(net.parameters(), lr=1e-3)
criterion = L1Loss()

def train():
    net.train()

    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()

        with autocast():
            out = net(data) 
            loss = criterion(out.view(-1), data.y.view(-1))

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

    return total_loss / len(train_loader)


@torch.no_grad()
def test(loader):
    net.eval()

    total_loss = 0
    for data in loader:
        data = data.to(device)
        with autocast():
            out = net(data)
            total_loss += criterion(out.view(-1), data.y.view(-1)).item()

    return total_loss / len(loader)

In [38]:
best_val_error = None
for epoch in range(1, 21):
    lr = scheduler.optimizer.param_groups[0]['lr']
    loss = train()
    val_error = test(val_loader)
    scheduler.step(val_error)

    if best_val_error is None or val_error <= best_val_error:
        test_error = test(test_loader)
        best_val_error = val_error

    print(f'Epoch: {epoch:03d}, LR: {lr:7f}, Loss: {loss:.7f}, '
          f'Val MAE: {val_error:.7f}, Test MAE: {test_error:.7f}')

Processing train dataset:   0%|          | 2/10000 [15:16<1273:08:57, 458.43s/it]


Epoch: 001, LR: 0.000500, Loss: 0.7526540, Val MAE: 0.6402822, Test MAE: 0.6490042
Epoch: 002, LR: 0.000500, Loss: 0.6609365, Val MAE: 0.6473056, Test MAE: 0.6490042
Epoch: 003, LR: 0.000500, Loss: 0.6442807, Val MAE: 0.6072533, Test MAE: 0.6155941
Epoch: 004, LR: 0.000500, Loss: 0.6305100, Val MAE: 0.6085729, Test MAE: 0.6155941
Epoch: 005, LR: 0.000500, Loss: 0.6231813, Val MAE: 0.6037212, Test MAE: 0.6113447
Epoch: 006, LR: 0.000500, Loss: 0.6194320, Val MAE: 0.5908112, Test MAE: 0.5980551
Epoch: 007, LR: 0.000500, Loss: 0.6171259, Val MAE: 0.5849622, Test MAE: 0.5926509
Epoch: 008, LR: 0.000500, Loss: 0.6109742, Val MAE: 0.6182257, Test MAE: 0.5926509
Epoch: 009, LR: 0.000500, Loss: 0.6147881, Val MAE: 0.5829086, Test MAE: 0.5892039
Epoch: 010, LR: 0.000500, Loss: 0.6052363, Val MAE: 0.5831516, Test MAE: 0.5892039
Epoch: 011, LR: 0.000500, Loss: 0.6067222, Val MAE: 0.5813650, Test MAE: 0.5887707
Epoch: 012, LR: 0.000500, Loss: 0.6040835, Val MAE: 0.5906744, Test MAE: 0.5887707
Epoc

# Graph Transformer (GraphTrans-like)

In [99]:
from torch.nn import Dropout1d
from torch_geometric.nn import GCNConv
from torch_scatter import scatter

# same MP as before

class MP_block(torch.nn.Module):

  def __init__(self,
               input_dim:int=32,
               hidden_dim:int=32,
               output_dim:int=32,
               *args,
               **kwargs) -> None:
     super().__init__(*args, **kwargs)

    #  self.conv1 = 
    #  self.conv2 = 
     self.dropout = Dropout1d(p=0.3)
     self.activation = torch.nn.ReLU()


  def forward(self,data):
    x = data.x
    edge_index = data.edge_index


    return x

from torch_geometric.utils import to_dense_batch

class GraphTrans(torch.nn.Module):

  def __init__(self,
               input_dim:int=32,
               hidden_dim:int=32,
               output_dim:int=32,
               dual_readout = False,
               use_cls = True,
               *args,
               **kwargs) -> None:
     super().__init__(*args, **kwargs)

     self.mp_block = MP_block(input_dim,hidden_dim,output_dim)
    
     # check: https://pytorch.org/docs/stable/generated/torch.nn.TransformerEncoderLayer.html#torch.nn.TransformerEncoderLayer
     from torch.nn import TransformerEncoderLayer,TransformerEncoder

     trans_layer = TransformerEncoderLayer(
         d_model = hidden_dim+16,
         nhead=4,
         dim_feedforward= 2*hidden_dim,
         dropout=0.2,
         activation='relu',
         batch_first=True
     )
     self.encoder = TransformerEncoder(trans_layer,num_layers=4)

     if use_cls:
      # how to use the cls token? As mentioned, CLS token is initialized as a learnable parameter, how to do that?
        
      # self.cls_token = 

     self.use_cls = use_cls


     self.readout = torch.nn.Linear(hidden_dim+16,1)

     self.dual_readout = dual_readout
     
     if dual_readout:
      self.readout = torch.nn.Linear(2*hidden_dim+16,1)


  def forward(self,data):

    x = self.mp_block(data)
    graph_readout = scatter(x,data.batch,reduce='sum',dim=0)

    src, mask = to_dense_batch(x,data.batch)
    pe, _ = to_dense_batch(data.pe,data.batch)

    src = torch.cat([src,pe],dim=-1)

    if self.use_cls:
      cls_token = self.cls_token.expand(src.shape[0], -1, -1)
      src = torch.cat([cls_token, src], dim=1)
      mask = torch.cat([torch.ones(src.shape[0], 1).bool().to(src.device), mask], dim=1)
      output = self.encoder(src, src_key_padding_mask=~mask)
      output = output[:, 0, :]
    else:
      output = self.encoder(src, src_key_padding_mask=~mask)
      output = output.sum(1)


    if not self.dual_readout:
      output = self.readout(output)

    else:
      output = torch.cat([output,graph_readout],dim=-1)
      output = self.readout(output)

    return output



In [103]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net = GraphTrans(11,64,1,dual_readout=False,use_cls=True).to(device)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                       factor=0.7, patience=5,
                                                       min_lr=0.00001)

In [104]:
from torch.cuda.amp import autocast, GradScaler

scaler = GradScaler()

from torch.optim import Adam
from torch.nn import L1Loss

optimizer = Adam(net.parameters(), lr=1e-3)
criterion = L1Loss()


def train():
    net.train()

    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()

        with autocast():
            out = net(data) 
            loss = criterion(out.view(-1), data.y.view(-1))

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

    return total_loss / len(train_loader)

@torch.no_grad()
def test(loader):
    net.eval()

    total_loss = 0
    for data in loader:
        data = data.to(device)
        with autocast():
            out = net(data)
            total_loss += criterion(out.view(-1), data.y.view(-1)).item()

    return total_loss / len(loader)

In [105]:
best_val_error = None
for epoch in range(1, 21):
    lr = scheduler.optimizer.param_groups[0]['lr']
    loss = train()
    val_error = test(val_loader)
    scheduler.step(val_error)

    if best_val_error is None or val_error <= best_val_error:
        test_error = test(test_loader)
        best_val_error = val_error

    print(f'Epoch: {epoch:03d}, LR: {lr:7f}, Loss: {loss:.7f}, '
          f'Val MAE: {val_error:.7f}, Test MAE: {test_error:.7f}')

Epoch: 001, LR: 0.000500, Loss: 0.5999675, Val MAE: 0.5470448, Test MAE: 0.5565732
Epoch: 002, LR: 0.000500, Loss: 0.5403858, Val MAE: 0.5616606, Test MAE: 0.5565732
Epoch: 003, LR: 0.000500, Loss: 0.5231962, Val MAE: 0.5052637, Test MAE: 0.5127802
Epoch: 004, LR: 0.000500, Loss: 0.5142129, Val MAE: 0.5028469, Test MAE: 0.5076585
Epoch: 005, LR: 0.000500, Loss: 0.5082970, Val MAE: 0.5061650, Test MAE: 0.5076585
Epoch: 006, LR: 0.000500, Loss: 0.5041381, Val MAE: 0.4969436, Test MAE: 0.5035305
Epoch: 007, LR: 0.000500, Loss: 0.4991217, Val MAE: 0.4915266, Test MAE: 0.5007566


KeyboardInterrupt: 

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net = GraphTrans(11,64,1,dual_readout=False,use_cls=False).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=0.0005)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                       factor=0.7, patience=5,
                                                       min_lr=0.00001)

In [101]:
from torch.cuda.amp import autocast, GradScaler

scaler = GradScaler()

from torch.optim import Adam
from torch.nn import L1Loss

optimizer = Adam(net.parameters(), lr=1e-3)
criterion = L1Loss()


def train():
    net.train()

    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()

        with autocast():
            out = net(data) 
            loss = criterion(out.view(-1), data.y.view(-1))

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

    return total_loss / len(train_loader)


@torch.no_grad()
def test(loader):
    net.eval()

    total_loss = 0
    for data in loader:
        data = data.to(device)
        with autocast():
            out = net(data)
            total_loss += criterion(out.view(-1), data.y.view(-1)).item()

    return total_loss / len(loader)

In [102]:
best_val_error = None
for epoch in range(1, 21):
    lr = scheduler.optimizer.param_groups[0]['lr']
    loss = train()
    val_error = test(val_loader)
    scheduler.step(val_error)

    if best_val_error is None or val_error <= best_val_error:
        test_error = test(test_loader)
        best_val_error = val_error

    print(f'Epoch: {epoch:03d}, LR: {lr:7f}, Loss: {loss:.7f}, '
          f'Val MAE: {val_error:.7f}, Test MAE: {test_error:.7f}')

Epoch: 001, LR: 0.000500, Loss: 1.1340361, Val MAE: 0.6864532, Test MAE: 0.6947218
Epoch: 002, LR: 0.000500, Loss: 0.6069056, Val MAE: 0.7233595, Test MAE: 0.6947218
Epoch: 003, LR: 0.000500, Loss: 0.5744214, Val MAE: 0.5611899, Test MAE: 0.5692674
Epoch: 004, LR: 0.000500, Loss: 0.5479245, Val MAE: 0.5437525, Test MAE: 0.5498771
Epoch: 005, LR: 0.000500, Loss: 0.5345107, Val MAE: 0.4998771, Test MAE: 0.5064580
Epoch: 006, LR: 0.000500, Loss: 0.5245923, Val MAE: 0.5345667, Test MAE: 0.5064580


KeyboardInterrupt: 

In [96]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net = GraphTrans(11,64,1,dual_readout=True).to(device)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                       factor=0.7, patience=5,
                                                       min_lr=0.00001)

In [97]:
from torch.cuda.amp import autocast, GradScaler

scaler = GradScaler()

from torch.optim import Adam
from torch.nn import L1Loss

optimizer = Adam(net.parameters(), lr=1e-3)
criterion = L1Loss()


def train():
    net.train()

    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()

        with autocast():
            out = net(data) 
            loss = criterion(out.view(-1), data.y.view(-1))

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

    return total_loss / len(train_loader)

@torch.no_grad()
def test(loader):
    net.eval()

    total_loss = 0
    for data in loader:
        data = data.to(device)
        with autocast():
            out = net(data)
            total_loss += criterion(out.view(-1), data.y.view(-1)).item()

    return total_loss / len(loader)

In [98]:
best_val_error = None
for epoch in range(1, 21):
    lr = scheduler.optimizer.param_groups[0]['lr']
    loss = train()
    val_error = test(val_loader)
    scheduler.step(val_error)

    if best_val_error is None or val_error <= best_val_error:
        test_error = test(test_loader)
        best_val_error = val_error

    print(f'Epoch: {epoch:03d}, LR: {lr:7f}, Loss: {loss:.7f}, '
          f'Val MAE: {val_error:.7f}, Test MAE: {test_error:.7f}')

Epoch: 001, LR: 0.000500, Loss: 0.6299823, Val MAE: 0.6426990, Test MAE: 0.6500050
Epoch: 002, LR: 0.000500, Loss: 0.5605375, Val MAE: 0.5628818, Test MAE: 0.5689878
Epoch: 003, LR: 0.000500, Loss: 0.5406009, Val MAE: 0.5448830, Test MAE: 0.5526573
Epoch: 004, LR: 0.000500, Loss: 0.5320968, Val MAE: 0.5522172, Test MAE: 0.5526573
Epoch: 005, LR: 0.000500, Loss: 0.5232036, Val MAE: 0.5250343, Test MAE: 0.5309696
Epoch: 006, LR: 0.000500, Loss: 0.5166567, Val MAE: 0.5296919, Test MAE: 0.5309696
Epoch: 007, LR: 0.000500, Loss: 0.5116349, Val MAE: 0.5145550, Test MAE: 0.5228748
Epoch: 008, LR: 0.000500, Loss: 0.5049468, Val MAE: 0.5504969, Test MAE: 0.5228748
Epoch: 009, LR: 0.000500, Loss: 0.5034734, Val MAE: 0.5397349, Test MAE: 0.5228748


KeyboardInterrupt: 

# SchNet (a simple geometrical MP-GNN)

In [None]:
from torch_geometric.nn import SchNet

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net = SchNet(hidden_channels=64, num_filters=64, num_interactions=3).to(device)
print(net)

SchNet(hidden_channels=64, num_filters=64, num_interactions=3, num_gaussians=50, cutoff=10.0)


In [None]:
from torch.cuda.amp import autocast, GradScaler

scaler = GradScaler()

# define the optimizer and loss function
from torch.optim import Adam
from torch.nn import L1Loss

optimizer = Adam(net.parameters(), lr=1e-3)
criterion = L1Loss()


def train():
    net.train()

    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()

        with autocast():
            out = net(data.z, data.pos, data.batch)
            loss = criterion(out.view(-1), data.y.view(-1))

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

    return total_loss / len(train_loader)


@torch.no_grad()
def test(loader):
    net.eval()

    total_loss = 0
    for data in loader:
        data = data.to(device)
        with autocast():
            out = net(data.z, data.pos, data.batch)
            total_loss += criterion(out.view(-1), data.y.view(-1)).item()

    return total_loss / len(loader)

In [None]:
from tqdm import tqdm

best_val_loss = None
for epoch in tqdm(range(1, 101)):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

    if epoch % 2 == 0:
        val_loss = test(val_loader)
        print(f'Val Loss: {val_loss:.4f}')

        if best_val_loss is None or val_loss <= best_val_loss:
            best_val_loss = val_loss
            test_loss = test(test_loader)
            print(f'Test Loss: {test_loss:.4f}')

  1%|          | 1/100 [00:23<38:17, 23.21s/it]

Epoch: 001, Loss: 0.7536
Epoch: 002, Loss: 0.3695
Val Loss: 0.3185


  2%|▏         | 2/100 [00:47<39:01, 23.89s/it]

Test Loss: 0.3121


  3%|▎         | 3/100 [01:07<35:59, 22.26s/it]

Epoch: 003, Loss: 0.3102
Epoch: 004, Loss: 0.2707
Val Loss: 0.2359


  4%|▍         | 4/100 [01:34<38:20, 23.96s/it]

Test Loss: 0.2318


  5%|▌         | 5/100 [01:57<37:10, 23.48s/it]

Epoch: 005, Loss: 0.2368
Epoch: 006, Loss: 0.2111
Val Loss: 0.1971


  6%|▌         | 6/100 [02:21<37:15, 23.78s/it]

Test Loss: 0.1928


  6%|▌         | 6/100 [02:25<37:56, 24.22s/it]


KeyboardInterrupt: 