In [1]:
import torch

from torchmetrics.classification import Accuracy, AUROC
from torchmetrics.regression import R2Score, MeanSquaredError

from torch_geometric.nn import GAT

from molsetrep.utils.trainer import Trainer
from molsetrep.utils.datasets import molnet_loader
from molsetrep.utils.converters import molnet_to_pyg
from molsetrep.utils.root_mean_squared_error import RootMeanSquaredError
from molsetrep.models import GNNSetRepClassifier, GNNSetRepRegressor, GNNSetRepClassifierSubstruct, GNNRegressor
from molsetrep.explain import RegressionExplainer

Skipped loading some Tensorflow models, missing a dependency. No module named 'tensorflow'
Skipped loading modules with pytorch-lightning dependency, missing a dependency. No module named 'pytorch_lightning'
Skipped loading some Jax models, missing a dependency. No module named 'jax'


## Classification

In [13]:
# Compare to https://github.com/wengong-jin/chemprop
train, valid, test = molnet_loader("hiv", reload=False)
train_loader, valid_loader, test_loader = molnet_to_pyg(
    train,
    valid,
    test,
    label_type=torch.long,
    imbalanced_sampler=True,
    secfp=False,
    index_graphs=False,
    # atom_attrs=["atomic_num", "charge", "hydrogen_count"],
    # bond_attrs=["bond_type"],
)

In [14]:
num_node_features = train_loader.dataset[0].num_node_features
num_edge_features = train_loader.dataset[0].num_edge_features
model = GNNSetRepClassifier(num_node_features, 256, 2, num_edge_features, 16, 32)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
criterion = torch.nn.NLLLoss()

trainer = Trainer(
    model,
    optimizer,
    criterion,
    100,
    [Accuracy(task="binary"), AUROC(task="binary")],
    [Accuracy(task="binary"), AUROC(task="binary")],
    [Accuracy(task="binary"), AUROC(task="binary")],
    # scheduler=scheduler,
    monitor_metric=1,
    monitor_lower_is_better=False
)

trainer.train(train_loader, valid_loader)
trainer.test(test_loader)

*  Epoch 1: Train loss: 0.257 (BinaryAccuracy: 0.919, BinaryAUROC: 0.497)  Valid loss: 0.119 (BinaryAccuracy: 0.98, BinaryAUROC: 0.5)
|  Epoch 2: Train loss: 0.16 (BinaryAccuracy: 0.963, BinaryAUROC: 0.501)  Valid loss: 0.109 (BinaryAccuracy: 0.98, BinaryAUROC: 0.5)
*  Epoch 3: Train loss: 0.15 (BinaryAccuracy: 0.962, BinaryAUROC: 0.504)  Valid loss: 0.097 (BinaryAccuracy: 0.98, BinaryAUROC: 0.506)
*  Epoch 4: Train loss: 0.147 (BinaryAccuracy: 0.963, BinaryAUROC: 0.52)  Valid loss: 0.096 (BinaryAccuracy: 0.981, BinaryAUROC: 0.506)
*  Epoch 5: Train loss: 0.146 (BinaryAccuracy: 0.963, BinaryAUROC: 0.515)  Valid loss: 0.096 (BinaryAccuracy: 0.981, BinaryAUROC: 0.537)
|  Epoch 6: Train loss: 0.143 (BinaryAccuracy: 0.963, BinaryAUROC: 0.521)  Valid loss: 0.097 (BinaryAccuracy: 0.98, BinaryAUROC: 0.531)
|  Epoch 7: Train loss: 0.141 (BinaryAccuracy: 0.963, BinaryAUROC: 0.532)  Valid loss: 0.098 (BinaryAccuracy: 0.981, BinaryAUROC: 0.519)
|  Epoch 8: Train loss: 0.14 (BinaryAccuracy: 0.964,

[]

## Regression

In [None]:
train, valid, test = molnet_loader("lipo")
train_loader, valid_loader, test_loader = molnet_to_pyg(
    train,
    valid,
    test,
    label_type=torch.float,
    # atom_attrs=[
    #     "atomic_num",
    #     "charge",
    #     "aromatic",
    #     "is_in_ring",
    #     "hydrogen_count",
    #     "hybridization_sp",
    #     "hybridization_sp2",
    #     "hybridization_sp3",
    #     "hybridization_sp3d",
    #     "hybridization_sp3d2",
    #     "chiral_type_chi_tetrahedral_cw",
    #     "chiral_type_chi_tetrahedral_ccw",
    #     "chiral_type_chi_other",
    #     "chiral_type_chi_tetrahedral",
    #     "chiral_type_chi_allene",
    #     "chiral_type_chi_squareplanar",
    #     "chiral_type_chi_trigonalbipyramidal",
    #     "chiral_type_chi_octahedral",
    #     "degree",
    #     "radical_count"
    # ]
)

num_node_features = train_loader.dataset[0].num_node_features
num_edge_features = train_loader.dataset[0].num_edge_features
model = GNNSetRepRegressor(num_node_features, 512, 2, num_edge_features, 8, 16)
# model = GNNRegressor(num_node_features, 512, 2, num_edge_features)
# model = GNNSetRepRegressor(num_node_features, 512, 2, num_edge_features, 8, 32, gnn=GAT(num_node_features, 512, 4, jk="cat", heads=8))


optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = torch.nn.MSELoss()
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

explainer = RegressionExplainer(model, valid_loader)

trainer = Trainer(
    model,
    optimizer,
    criterion,
    200,
    [R2Score(), MeanSquaredError(squared=False)],
    [R2Score(), MeanSquaredError(squared=False)],
    [R2Score(), MeanSquaredError(squared=False)],
    # scheduler=scheduler,
    monitor_metric=1,
    # monitor_lower_is_better=False
    # explainer=explainer
)

trainer.train(train_loader, valid_loader)
trainer.test(test_loader, average_n_epochs=0)