# Import packages

In [1]:
import pandas as pd
import numpy as np
import torch
from lightning import pytorch as pl

from chemprop.v2 import data
from chemprop.v2 import featurizers
from chemprop.v2.models import models

# Change model input here

In [2]:
checkpoint_path = '/path/to/checkpoint.pt'

## Load model

In [3]:
mpnn = models.MPNN.load_from_checkpoint(checkpoint_path)
mpnn

MPNN(
  (message_passing): BondMessageBlock(
    (W_i): Linear(in_features=147, out_features=300, bias=False)
    (W_h): Linear(in_features=300, out_features=300, bias=False)
    (W_o): Linear(in_features=433, out_features=300, bias=True)
    (dropout): Dropout(p=0, inplace=False)
    (tau): ReLU()
  )
  (agg): MeanAggregation()
  (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (readout): RegressionFFN(
    (ffn): SimpleFFN(
      (ffn): Sequential(
        (0): Linear(in_features=300, out_features=300, bias=True)
        (1): ReLU()
        (2): Dropout(p=0, inplace=False)
        (3): Linear(in_features=300, out_features=1, bias=True)
      )
    )
  )
)

# Change predict input here

In [4]:
test_path = '../tests/data/regression_test_smiles.csv'
smiles_column = 'smiles'

## Load test smiles

In [5]:
df_test = pd.read_csv(test_path)
df_test

Unnamed: 0,smiles
0,CCOP(=S)(OCC)SC(CCl)N2C(=O)c1ccccc1C2=O
1,Brc1ccc(Br)cc1
2,Cn2c(=O)on(c1ccc(Cl)c(Cl)c1)c2=O
3,Oc1ccc(cc1)c2ccccc2
4,CC1=C(CCCO1)C(=O)Nc2ccccc2
5,CCOC=C
6,CCC#C
7,COc1ncnc2nccnc12
8,CCCCC(C)(O)CC
9,Clc1ccc(Cl)cc1


## Get smiles

In [6]:
smis = df_test[smiles_column].tolist()
smis[:5]

['CCOP(=S)(OCC)SC(CCl)N2C(=O)c1ccccc1C2=O',
 'Brc1ccc(Br)cc1',
 'Cn2c(=O)on(c1ccc(Cl)c(Cl)c1)c2=O',
 'Oc1ccc(cc1)c2ccccc2',
 'CC1=C(CCCO1)C(=O)Nc2ccccc2']

## Get molecule datapoints

In [7]:
test_data = [data.MoleculeDatapoint.from_smi(smi) for smi in smis]

## Get molecule dataset

In [8]:
featurizer = featurizers.MoleculeMolGraphFeaturizer()
test_dset = data.MoleculeDataset(test_data, featurizer=featurizer)
test_loader = data.MolGraphDataLoader(test_dset, shuffle=False)

# Set up trainer

In [17]:
with torch.inference_mode():
    trainer = pl.Trainer(
        logger=None,
        enable_progress_bar=True,
        accelerator="auto",
        devices=1
    )
    test_preds = trainer.predict(mpnn, test_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Predicting DataLoader 0:   0%|          | 0/10 [00:00<?, ?it/s]

Predicting DataLoader 0: 100%|██████████| 10/10 [00:00<00:00, 70.85it/s]


In [22]:
test_preds = np.concatenate(test_preds, axis=0)
df_test['pred'] = test_preds
df_test

Unnamed: 0,smiles,pred
0,CCOP(=S)(OCC)SC(CCl)N2C(=O)c1ccccc1C2=O,-0.131145
1,Brc1ccc(Br)cc1,-0.128768
2,Cn2c(=O)on(c1ccc(Cl)c(Cl)c1)c2=O,-0.165492
3,Oc1ccc(cc1)c2ccccc2,-0.138822
4,CC1=C(CCCO1)C(=O)Nc2ccccc2,-0.116687
5,CCOC=C,-0.009083
6,CCC#C,-0.005834
7,COc1ncnc2nccnc12,-0.106233
8,CCCCC(C)(O)CC,-0.047039
9,Clc1ccc(Cl)cc1,-0.134129
