In [20]:
import torch
import numpy as np

from torch.nn import Parameter, Linear, BatchNorm1d, ReLU, LeakyReLU, Linear
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torcheval.metrics import BinaryAccuracy, BinaryAUROC
from torchmetrics.regression import R2Score, MeanSquaredError, MeanAbsoluteError
from torchmetrics.classification import Accuracy, AUROC

from molsetrep.utils.torch_trainer import TorchTrainer
from molsetrep.utils.multiset_torch_trainer import MultisetTorchTrainer
from molsetrep.utils.datasets import molnet_loader
from molsetrep.utils.converters import molnet_to_pyg
from molsetrep.utils.root_mean_squared_error import RootMeanSquaredError
from molsetrep.utils.imbalanced_sampler import ImbalancedSampler
# from molsetrep.models import SetRepClassifier, SetRepRegressor, GNNDeepSetClassifier, DeepSet, DualSetRepClassifier, DualSetRepRegressor
from molsetrep.encoders import SECMQNFPEncoder, SECFPEncoder, ECFPEncoder, Mol2VecEncoder, Mol2SetEncoder

from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight

import matplotlib.pyplot as plt

import lightning.pytorch as pl


## Setup

### Lightning Module

In [82]:
class DualSetClassifier(pl.LightningModule):
    def __init__(self, n_hidden_sets, n_hidden_sets_2, n_elements, n_elements_2, d, d_2, n_classes, class_weights):
        super().__init__()
        self.n_hidden_sets = n_hidden_sets
        self.n_elements = n_elements

        self.n_hidden_sets_2 = n_hidden_sets_2
        self.n_elements_2 = n_elements_2

        self.class_weights = class_weights

        self.Wc = Parameter(torch.FloatTensor(d, n_hidden_sets * n_elements))
        self.Wc_2 = Parameter(torch.FloatTensor(d_2, n_hidden_sets_2 * n_elements_2))
        self.fc1 = Linear(n_hidden_sets, 32)
        self.fc1_2 = Linear(n_hidden_sets_2, 32)
        self.bn = BatchNorm1d(n_hidden_sets)
        self.bn_2 = BatchNorm1d(n_hidden_sets_2)
        self.fc2 = Linear(32 * 2, 32)
        self.fc3 = Linear(32, n_classes)

        
        # Init weights
        self.Wc.data.normal_()
        self.Wc_2.data.normal_()

        # Metrics
        self.train_accuracy = Accuracy(task="multiclass", num_classes=n_classes)
        self.train_auroc = AUROC(task="multiclass", num_classes=n_classes)
        self.valid_accuracy = Accuracy(task="multiclass", num_classes=n_classes)
        self.valid_auroc = AUROC(task="multiclass", num_classes=n_classes)
        self.test_accuracy = Accuracy(task="multiclass", num_classes=n_classes)
        self.test_auroc = AUROC(task="multiclass", num_classes=n_classes)

    def forward(self, X, X2):
        # First sets (e.g. atoms)
        t = torch.matmul(X, self.Wc)
        t = torch.relu(t)
        t = t.view(t.size()[0], t.size()[1], self.n_elements, self.n_hidden_sets)
        t, _ = torch.max(t, dim=2)
        t = torch.sum(t, dim=1)
        t = self.bn(t)
        t = self.fc1(t)
        t = torch.relu(t)

        # Second sets (e.g. bonds)
        t_2 = torch.matmul(X2, self.Wc_2)
        t_2 = torch.relu(t_2)
        t_2 = t_2.view(
            t_2.size()[0], t_2.size()[1], self.n_elements_2, self.n_hidden_sets_2
        )
        t_2, _ = torch.max(t_2, dim=2)
        t_2 = torch.sum(t_2, dim=1)
        t_2 = self.bn_2(t_2)
        t_2 = self.fc1_2(t_2)
        t_2 = torch.relu(t_2)

        # Concat and softmax
        out = self.fc2(torch.cat((t, t_2), 1))
        out = self.fc3(out)
        out = F.log_softmax(out, dim=1)

        return out

    def training_step(self, batch, batch_idx):
        x, x2, y = batch
        out = self(x, x2)
        loss = F.nll_loss(out, y, weight=torch.FloatTensor(self.class_weights).to(self.device))
        
        # Metrics
        self.train_accuracy(out, y)
        self.train_auroc(out, y)

        self.log("train_loss", loss, prog_bar=True, on_step=False, on_epoch=True)

        return loss
    
    def validation_step(self, val_batch, batch_idx):
        x, x2, y = val_batch
        out = self.forward(x, x2)
        loss = F.nll_loss(out, y)

        # Metrics
        self.valid_accuracy(out, y)
        self.valid_auroc(out, y)

        self.log("val_loss", loss, prog_bar=True, on_step=False, on_epoch=True)

    def test_step(self, val_batch, batch_idx):
        x, x2, y = val_batch
        out = self.forward(x, x2)
        loss = F.nll_loss(out, y)

        # Metrics
        self.test_accuracy(out, y)
        self.test_auroc(out, y)

        self.log("test_loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        self.log("test_accuracy", self.test_accuracy, prog_bar=True, on_step=False, on_epoch=True)
        self.log("test_auroc", self.test_auroc, prog_bar=True, on_step=False, on_epoch=True)

    def on_train_epoch_end(self):
        self.log("train_acc_epoch", self.train_accuracy)
        self.log("train_auroc_epoch", self.train_auroc)

        print("Train AUROC", self.train_auroc.compute())

    def on_validation_epoch_end(self):
        self.log("valid_acc_epoch", self.valid_accuracy)
        self.log("valid_auroc_epoch", self.valid_auroc)

        print("Valid AUROC", self.valid_auroc.compute())
        

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.0001)

## Train

### Load Data

In [80]:
train, valid, test = molnet_loader("bace_classification", splitter="scaffold")

enc = ECFPEncoder()

class_weights = compute_class_weight("balanced", classes=np.sort(np.unique(train.y.flatten())), y=train.y.flatten())
print(class_weights)

train_dataset = enc.encode(train.ids, [y[0] for y in train.y], label_dtype=torch.long)
valid_dataset = enc.encode(valid.ids, [y[0] for y in valid.y], label_dtype=torch.long)
test_dataset = enc.encode(test.ids, [y[0] for y in test.y], label_dtype=torch.long)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False, num_workers=8)#, sampler=ImbalancedSampler(train_dataset))
valid_loader = DataLoader(valid_dataset, batch_size=64, shuffle=False, num_workers=8)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=8)

d = len(train_dataset[0][0][0])
d2 = len(train_dataset[0][1][0])

[0.8705036  1.17475728]


### Fit

In [83]:
trainer = pl.Trainer(max_epochs=50, log_every_n_steps=1)
model = DualSetClassifier(16, 16, 8, 8, d, d2, 2, class_weights=class_weights)
trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=valid_loader)
trainer.test(ckpt_path="best", dataloaders=test_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name           | Type               | Params
-------------------------------------------------------
0  | fc1            | Linear             | 544   
1  | fc1_2          | Linear             | 544   
2  | bn             | BatchNorm1d        | 32    
3  | bn_2           | BatchNorm1d        | 32    
4  | fc2            | Linear             | 2.1 K 
5  | fc3            | Linear             | 66    
6  | train_accuracy | MulticlassAccuracy | 0     
7  | train_auroc    | MulticlassAUROC    | 0     
8  | valid_accuracy | MulticlassAccuracy | 0     
9  | valid_auroc    | MulticlassAUROC    | 0     
10 | test_accuracy  | MulticlassAccuracy | 0     
11 | test_auroc     | MulticlassAUROC    | 0     
-------------------------------------------------------
50.9 K    Trainable params
0         Non-tr

Sanity Checking: 0it [00:00, ?it/s]

Valid AUROC tensor(0.4580, device='cuda:0')


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5549, device='cuda:0')
Train AUROC tensor(0.3761, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5840, device='cuda:0')
Train AUROC tensor(0.5876, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5832, device='cuda:0')
Train AUROC tensor(0.6658, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5844, device='cuda:0')
Train AUROC tensor(0.6755, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5851, device='cuda:0')
Train AUROC tensor(0.6757, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5865, device='cuda:0')
Train AUROC tensor(0.6756, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5885, device='cuda:0')
Train AUROC tensor(0.6756, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5891, device='cuda:0')
Train AUROC tensor(0.6754, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5896, device='cuda:0')
Train AUROC tensor(0.6755, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5906, device='cuda:0')
Train AUROC tensor(0.6757, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5906, device='cuda:0')
Train AUROC tensor(0.6758, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5904, device='cuda:0')
Train AUROC tensor(0.6759, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5906, device='cuda:0')
Train AUROC tensor(0.6761, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5912, device='cuda:0')
Train AUROC tensor(0.6763, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5908, device='cuda:0')
Train AUROC tensor(0.6766, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5910, device='cuda:0')
Train AUROC tensor(0.6770, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5908, device='cuda:0')
Train AUROC tensor(0.6773, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5903, device='cuda:0')
Train AUROC tensor(0.6776, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5903, device='cuda:0')
Train AUROC tensor(0.6780, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5903, device='cuda:0')
Train AUROC tensor(0.6783, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5904, device='cuda:0')
Train AUROC tensor(0.6786, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5908, device='cuda:0')
Train AUROC tensor(0.6790, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5913, device='cuda:0')
Train AUROC tensor(0.6794, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5915, device='cuda:0')
Train AUROC tensor(0.6798, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5924, device='cuda:0')
Train AUROC tensor(0.6803, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5929, device='cuda:0')
Train AUROC tensor(0.6808, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5928, device='cuda:0')
Train AUROC tensor(0.6812, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5929, device='cuda:0')
Train AUROC tensor(0.6816, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5928, device='cuda:0')
Train AUROC tensor(0.6820, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5919, device='cuda:0')
Train AUROC tensor(0.6824, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5915, device='cuda:0')
Train AUROC tensor(0.6828, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5908, device='cuda:0')
Train AUROC tensor(0.6834, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5912, device='cuda:0')
Train AUROC tensor(0.6839, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5910, device='cuda:0')
Train AUROC tensor(0.6843, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5903, device='cuda:0')
Train AUROC tensor(0.6848, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5903, device='cuda:0')
Train AUROC tensor(0.6853, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5903, device='cuda:0')
Train AUROC tensor(0.6858, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5904, device='cuda:0')
Train AUROC tensor(0.6864, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5903, device='cuda:0')
Train AUROC tensor(0.6868, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5903, device='cuda:0')
Train AUROC tensor(0.6872, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5903, device='cuda:0')
Train AUROC tensor(0.6877, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5897, device='cuda:0')
Train AUROC tensor(0.6882, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5899, device='cuda:0')
Train AUROC tensor(0.6886, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5897, device='cuda:0')
Train AUROC tensor(0.6892, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5897, device='cuda:0')
Train AUROC tensor(0.6898, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5894, device='cuda:0')
Train AUROC tensor(0.6903, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5897, device='cuda:0')
Train AUROC tensor(0.6907, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5894, device='cuda:0')
Train AUROC tensor(0.6912, device='cuda:0')


Validation: 0it [00:00, ?it/s]

Valid AUROC tensor(0.5896, device='cuda:0')
Train AUROC tensor(0.6917, device='cuda:0')


Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
Restoring states from the checkpoint path at /home/daenu/Code/molsetrep/notebooks/lightning_logs/version_38/checkpoints/epoch=49-step=950.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/daenu/Code/molsetrep/notebooks/lightning_logs/version_38/checkpoints/epoch=49-step=950.ckpt


Valid AUROC tensor(0.5899, device='cuda:0')
Train AUROC tensor(0.6922, device='cuda:0')


Testing: 0it [00:00, ?it/s]

[{'test_loss': 0.585979700088501,
  'test_accuracy': 0.7368420958518982,
  'test_auroc': 0.7846014499664307}]