In [1]:
import torch

from torchmetrics.classification import Accuracy, AUROC
from torchmetrics.regression import R2Score, MeanSquaredError

from torch_geometric.nn import GAT

from molsetrep.utils.trainer import Trainer
from molsetrep.utils.datasets import molnet_loader
from molsetrep.utils.converters import molnet_to_pyg
from molsetrep.utils.root_mean_squared_error import RootMeanSquaredError
from molsetrep.models import GNNSetRepClassifier, GNNSetRepRegressor, GNNSetRepClassifierSubstruct
from molsetrep.explain import RegressionExplainer

Skipped loading some Tensorflow models, missing a dependency. No module named 'tensorflow'
Skipped loading modules with pytorch-lightning dependency, missing a dependency. No module named 'pytorch_lightning'
Skipped loading some Jax models, missing a dependency. No module named 'jax'


## Classification

In [5]:
# Compare to https://github.com/wengong-jin/chemprop
train, valid, test = molnet_loader("bbbp", splitter="random")
train_loader, valid_loader, test_loader = molnet_to_pyg(
    train,
    valid,
    test,
    label_type=torch.long,
    imbalanced_sampler=True,
    secfp=False,
    index_graphs=False,
    # atom_attrs=["atomic_num", "charge", "hydrogen_count"],
    # bond_attrs=["bond_type"],
)

In [None]:
num_node_features = train_loader.dataset[0].num_node_features
num_edge_features = train_loader.dataset[0].num_edge_features
model = GNNSetRepClassifier(num_node_features, 1024, 6, num_edge_features, 100, 32)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
criterion = torch.nn.NLLLoss()

trainer = Trainer(
    model,
    optimizer,
    criterion,
    200,
    [Accuracy(task="binary"), AUROC(task="binary")],
    [Accuracy(task="binary"), AUROC(task="binary")],
    [Accuracy(task="binary"), AUROC(task="binary")],
    scheduler=scheduler,
    monitor_metric=1,
    monitor_lower_is_better=False
)

trainer.train(train_loader, valid_loader)
trainer.test(test_loader)

## Regression

In [16]:
train, valid, test = molnet_loader("delaney", splitter="random", reload=False)
train_loader, valid_loader, test_loader = molnet_to_pyg(
    train,
    valid,
    test,
    label_type=torch.float,
    # atom_attrs=[
    #     "atomic_num",
    #     "charge",
    #     "aromatic",
    #     "is_in_ring",
    #     "hydrogen_count",
    #     "hybridization_sp",
    #     "hybridization_sp2",
    #     "hybridization_sp3",
    #     "hybridization_sp3d",
    #     "hybridization_sp3d2",
    #     "chiral_type_chi_tetrahedral_cw",
    #     "chiral_type_chi_tetrahedral_ccw",
    #     "chiral_type_chi_other",
    #     "chiral_type_chi_tetrahedral",
    #     "chiral_type_chi_allene",
    #     "chiral_type_chi_squareplanar",
    #     "chiral_type_chi_trigonalbipyramidal",
    #     "chiral_type_chi_octahedral",
    #     "degree",
    #     "radical_count"
    # ]
)

num_node_features = train_loader.dataset[0].num_node_features
num_edge_features = train_loader.dataset[0].num_edge_features
model = GNNSetRepRegressor(num_node_features, 512, 2, num_edge_features, 16, 32)
# model = GNNSetRepRegressor(num_node_features, 512, 2, num_edge_features, 8, 32, gnn=GAT(num_node_features, 512, 4, jk="cat", heads=8))


optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = torch.nn.MSELoss()
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

explainer = RegressionExplainer(model, valid_loader)

trainer = Trainer(
    model,
    optimizer,
    criterion,
    200,
    [R2Score(), MeanSquaredError(squared=False)],
    [R2Score(), MeanSquaredError(squared=False)],
    [R2Score(), MeanSquaredError(squared=False)],
    # scheduler=scheduler,
    monitor_metric=1,
    # monitor_lower_is_better=False
    # explainer=explainer
)

trainer.train(train_loader, valid_loader)
trainer.test(test_loader, average_n_epochs=0)

*  Epoch 1: Train loss: 0.732 (R2Score: 0.259, MeanSquaredError: 0.855)  Valid loss: 0.983 (R2Score: 0.21, MeanSquaredError: 0.991)
*  Epoch 2: Train loss: 0.619 (R2Score: 0.382, MeanSquaredError: 0.787)  Valid loss: 0.975 (R2Score: 0.188, MeanSquaredError: 0.987)
*  Epoch 3: Train loss: 0.578 (R2Score: 0.417, MeanSquaredError: 0.76)  Valid loss: 0.726 (R2Score: 0.301, MeanSquaredError: 0.852)
*  Epoch 4: Train loss: 0.543 (R2Score: 0.458, MeanSquaredError: 0.737)  Valid loss: 0.696 (R2Score: 0.354, MeanSquaredError: 0.834)
*  Epoch 5: Train loss: 0.527 (R2Score: 0.474, MeanSquaredError: 0.726)  Valid loss: 0.515 (R2Score: 0.425, MeanSquaredError: 0.718)
|  Epoch 6: Train loss: 0.509 (R2Score: 0.493, MeanSquaredError: 0.713)  Valid loss: 0.821 (R2Score: 0.347, MeanSquaredError: 0.906)
|  Epoch 7: Train loss: 0.476 (R2Score: 0.521, MeanSquaredError: 0.69)  Valid loss: 0.666 (R2Score: 0.459, MeanSquaredError: 0.816)
|  Epoch 8: Train loss: 0.499 (R2Score: 0.503, MeanSquaredError: 0.706) 