In [1]:
import torch

from torchmetrics.classification import Accuracy, AUROC
from torchmetrics.regression import R2Score, MeanSquaredError

from torch_geometric.nn import GAT

from molsetrep.utils.trainer import Trainer
from molsetrep.utils.datasets import molnet_loader
from molsetrep.utils.converters import molnet_to_pyg, smiles_to_nx, nx_to_pyg
from molsetrep.utils.root_mean_squared_error import RootMeanSquaredError
from molsetrep.models import GNNSetRepClassifier, GNNSetRepRegressor, GNNSetRepClassifierSubstruct, GNNRegressor, GNNClassifier
from molsetrep.explain import RegressionExplainer

Skipped loading some Tensorflow models, missing a dependency. No module named 'tensorflow'
Skipped loading modules with pytorch-lightning dependency, missing a dependency. No module named 'pytorch_lightning'
Skipped loading some Jax models, missing a dependency. No module named 'jax'


## Classification

In [2]:
# Compare to https://github.com/chemprop/chemprop
train, valid, test = molnet_loader("bbbp", reload=False)
train_loader, valid_loader, test_loader = molnet_to_pyg(
    train,
    valid,
    test,
    label_type=torch.long,
    imbalanced_sampler=True,
    secfp=False,
    index_graphs=False,
    # atom_attrs=["atomic_num", "charge", "hydrogen_count"],
    # bond_attrs=["bond_type"],
)

[23:49:51] Explicit valence for atom # 1 N, 4, is greater than permitted
Failed to featurize datapoint 59, None. Appending empty array
Exception message: Python argument types in
    rdkit.Chem.rdmolfiles.CanonicalRankAtoms(NoneType)
did not match C++ signature:
    CanonicalRankAtoms(RDKit::ROMol mol, bool breakTies=True, bool includeChirality=True, bool includeIsotopes=True)
[23:49:51] Explicit valence for atom # 6 N, 4, is greater than permitted
Failed to featurize datapoint 61, None. Appending empty array
Exception message: Python argument types in
    rdkit.Chem.rdmolfiles.CanonicalRankAtoms(NoneType)
did not match C++ signature:
    CanonicalRankAtoms(RDKit::ROMol mol, bool breakTies=True, bool includeChirality=True, bool includeIsotopes=True)
[23:49:51] Explicit valence for atom # 6 N, 4, is greater than permitted
Failed to featurize datapoint 391, None. Appending empty array
Exception message: Python argument types in
    rdkit.Chem.rdmolfiles.CanonicalRankAtoms(NoneType)
did n

In [12]:
num_node_features = train_loader.dataset[0].num_node_features
num_edge_features = train_loader.dataset[0].num_edge_features
model = GNNSetRepClassifier(num_node_features, 256, 6, num_edge_features, 8, 8)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
criterion = torch.nn.NLLLoss()

trainer = Trainer(
    model,
    optimizer,
    criterion,
    200,
    [Accuracy(task="binary"), AUROC(task="binary")],
    [Accuracy(task="binary"), AUROC(task="binary")],
    [Accuracy(task="binary"), AUROC(task="binary")],
    scheduler=scheduler,
    silent=True,
    # monitor_metric=1,
    # monitor_lower_is_better=False
)

trainer.train(train_loader, valid_loader)
trainer.test(test_loader)

[{'best_epoch': 28,
  'loss': 1.0378702481587727,
  'BinaryAccuracy': 0.671875,
  'BinaryAUROC': 0.6643791198730469}]

In [13]:
num_node_features = train_loader.dataset[0].num_node_features
num_edge_features = train_loader.dataset[0].num_edge_features
model = GNNClassifier(num_node_features, 256, 6, num_edge_features)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
criterion = torch.nn.NLLLoss()

trainer = Trainer(
    model,
    optimizer,
    criterion,
    200,
    [Accuracy(task="binary"), AUROC(task="binary")],
    [Accuracy(task="binary"), AUROC(task="binary")],
    [Accuracy(task="binary"), AUROC(task="binary")],
    scheduler=scheduler,
    silent=True,
    # monitor_metric=1,
    # monitor_lower_is_better=False
)

trainer.train(train_loader, valid_loader)
trainer.test(test_loader)

[{'best_epoch': 7,
  'loss': 1.20888090133667,
  'BinaryAccuracy': 0.6458333134651184,
  'BinaryAUROC': 0.6443477869033813}]

## Regression

In [None]:
train, valid, test = molnet_loader("lipo")
train_loader, valid_loader, test_loader = molnet_to_pyg(
    train,
    valid,
    test,
    label_type=torch.float,
    # atom_attrs=[
    #     "atomic_num",
    #     "charge",
    #     "aromatic",
    #     "is_in_ring",
    #     "hydrogen_count",
    #     "hybridization_sp",
    #     "hybridization_sp2",
    #     "hybridization_sp3",
    #     "hybridization_sp3d",
    #     "hybridization_sp3d2",
    #     "chiral_type_chi_tetrahedral_cw",
    #     "chiral_type_chi_tetrahedral_ccw",
    #     "chiral_type_chi_other",
    #     "chiral_type_chi_tetrahedral",
    #     "chiral_type_chi_allene",
    #     "chiral_type_chi_squareplanar",
    #     "chiral_type_chi_trigonalbipyramidal",
    #     "chiral_type_chi_octahedral",
    #     "degree",
    #     "radical_count"
    # ]
)

num_node_features = train_loader.dataset[0].num_node_features
num_edge_features = train_loader.dataset[0].num_edge_features
model = GNNSetRepRegressor(num_node_features, 512, 2, num_edge_features, 8, 16)
# model = GNNRegressor(num_node_features, 512, 2, num_edge_features)
# model = GNNSetRepRegressor(num_node_features, 512, 2, num_edge_features, 8, 32, gnn=GAT(num_node_features, 512, 4, jk="cat", heads=8))


optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = torch.nn.MSELoss()
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

explainer = RegressionExplainer(model, valid_loader)

trainer = Trainer(
    model,
    optimizer,
    criterion,
    200,
    [R2Score(), MeanSquaredError(squared=False)],
    [R2Score(), MeanSquaredError(squared=False)],
    [R2Score(), MeanSquaredError(squared=False)],
    # scheduler=scheduler,
    monitor_metric=1,
    # monitor_lower_is_better=False
    # explainer=explainer
)

trainer.train(train_loader, valid_loader)
trainer.test(test_loader, average_n_epochs=0)