In [1]:
import torch

from torchmetrics.classification import Accuracy, AUROC
from torchmetrics.regression import R2Score, MeanSquaredError

from torch_geometric.nn import GAT

from molsetrep.utils.trainer import Trainer
from molsetrep.utils.datasets import molnet_loader
from molsetrep.utils.converters import molnet_to_pyg, smiles_to_nx, nx_to_pyg
from molsetrep.utils.root_mean_squared_error import RootMeanSquaredError
from molsetrep.models import GNNSetRepClassifier, GNNSetRepRegressor, GNNSetRepClassifierSubstruct, GNNRegressor
from molsetrep.explain import RegressionExplainer

Skipped loading some Tensorflow models, missing a dependency. No module named 'tensorflow'
Skipped loading modules with pytorch-lightning dependency, missing a dependency. No module named 'pytorch_lightning'
Skipped loading some Jax models, missing a dependency. No module named 'jax'


In [2]:
nx_to_pyg(smiles_to_nx("C.CNC"))

C.CNC


Data(edge_index=[2, 4], atomic_num=[4], charge=[4], aromatic=[4], is_in_ring=[4], hydrogen_count=[4], hybridization_sp=[4], hybridization_sp2=[4], hybridization_sp3=[4], hybridization_sp3d=[4], hybridization_sp3d2=[4], chiral_type_chi_tetrahedral_cw=[4], chiral_type_chi_tetrahedral_ccw=[4], chiral_type_chi_other=[4], chiral_type_chi_tetrahedral=[4], chiral_type_chi_allene=[4], chiral_type_chi_squareplanar=[4], chiral_type_chi_trigonalbipyramidal=[4], chiral_type_chi_octahedral=[4], degree=[4], radical_count=[4], bond_type=[4], bond_type_aromatic=[4], bond_conjugated=[4], bond_stereo_z=[4], bond_stereo_e=[4], bond_stereo_cis=[4], bond_stereo_trans=[4], num_nodes=4)

## Classification

In [None]:
# Compare to https://github.com/chemprop/chemprop
train, valid, test = molnet_loader("bbbp", reload=False)
train_loader, valid_loader, test_loader = molnet_to_pyg(
    train,
    valid,
    test,
    label_type=torch.long,
    imbalanced_sampler=True,
    secfp=False,
    index_graphs=False,
    # atom_attrs=["atomic_num", "charge", "hydrogen_count"],
    # bond_attrs=["bond_type"],
)

In [5]:
num_node_features = train_loader.dataset[0].num_node_features
num_edge_features = train_loader.dataset[0].num_edge_features
model = GNNSetRepClassifier(num_node_features, 16, 256, num_edge_features, 8, 16)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
criterion = torch.nn.NLLLoss()

trainer = Trainer(
    model,
    optimizer,
    criterion,
    10,
    [Accuracy(task="binary"), AUROC(task="binary")],
    [Accuracy(task="binary"), AUROC(task="binary")],
    [Accuracy(task="binary"), AUROC(task="binary")],
    # scheduler=scheduler,
    # monitor_metric=1,
    # monitor_lower_is_better=False
)

trainer.train(train_loader, valid_loader)
trainer.test(test_loader)

*  Epoch 1: Train loss: 0.555 (BinaryAccuracy: 0.727, BinaryAUROC: 0.727)  Valid loss: 0.774 (BinaryAccuracy: 0.674, BinaryAUROC: 0.66)
|  Epoch 2: Train loss: 0.508 (BinaryAccuracy: 0.74, BinaryAUROC: 0.742)  Valid loss: 0.88 (BinaryAccuracy: 0.6, BinaryAUROC: 0.627)
|  Epoch 3: Train loss: 0.497 (BinaryAccuracy: 0.761, BinaryAUROC: 0.761)  Valid loss: 0.941 (BinaryAccuracy: 0.698, BinaryAUROC: 0.628)
|  Epoch 4: Train loss: 0.507 (BinaryAccuracy: 0.74, BinaryAUROC: 0.742)  Valid loss: 0.896 (BinaryAccuracy: 0.549, BinaryAUROC: 0.584)
|  Epoch 5: Train loss: 0.509 (BinaryAccuracy: 0.742, BinaryAUROC: 0.74)  Valid loss: 0.902 (BinaryAccuracy: 0.671, BinaryAUROC: 0.653)
|  Epoch 6: Train loss: 0.487 (BinaryAccuracy: 0.757, BinaryAUROC: 0.756)  Valid loss: 0.977 (BinaryAccuracy: 0.589, BinaryAUROC: 0.6)


KeyboardInterrupt: 

## Regression

In [None]:
train, valid, test = molnet_loader("lipo")
train_loader, valid_loader, test_loader = molnet_to_pyg(
    train,
    valid,
    test,
    label_type=torch.float,
    # atom_attrs=[
    #     "atomic_num",
    #     "charge",
    #     "aromatic",
    #     "is_in_ring",
    #     "hydrogen_count",
    #     "hybridization_sp",
    #     "hybridization_sp2",
    #     "hybridization_sp3",
    #     "hybridization_sp3d",
    #     "hybridization_sp3d2",
    #     "chiral_type_chi_tetrahedral_cw",
    #     "chiral_type_chi_tetrahedral_ccw",
    #     "chiral_type_chi_other",
    #     "chiral_type_chi_tetrahedral",
    #     "chiral_type_chi_allene",
    #     "chiral_type_chi_squareplanar",
    #     "chiral_type_chi_trigonalbipyramidal",
    #     "chiral_type_chi_octahedral",
    #     "degree",
    #     "radical_count"
    # ]
)

num_node_features = train_loader.dataset[0].num_node_features
num_edge_features = train_loader.dataset[0].num_edge_features
model = GNNSetRepRegressor(num_node_features, 512, 2, num_edge_features, 8, 16)
# model = GNNRegressor(num_node_features, 512, 2, num_edge_features)
# model = GNNSetRepRegressor(num_node_features, 512, 2, num_edge_features, 8, 32, gnn=GAT(num_node_features, 512, 4, jk="cat", heads=8))


optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = torch.nn.MSELoss()
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

explainer = RegressionExplainer(model, valid_loader)

trainer = Trainer(
    model,
    optimizer,
    criterion,
    200,
    [R2Score(), MeanSquaredError(squared=False)],
    [R2Score(), MeanSquaredError(squared=False)],
    [R2Score(), MeanSquaredError(squared=False)],
    # scheduler=scheduler,
    monitor_metric=1,
    # monitor_lower_is_better=False
    # explainer=explainer
)

trainer.train(train_loader, valid_loader)
trainer.test(test_loader, average_n_epochs=0)