To run predictions, you first need to place the trained model file (.pt format) into the current directory

In [None]:
import pandas as pd
import sys
sys.path.append('../../../../..chemprop')
sys.path.append('../../../../..chemprop/chemprop')
from sklearn.metrics import r2_score
from tqdm import tqdm
from typing import Callable, List
import logging

import numpy as np
import torch
from tqdm import trange
from chemprop.args import TrainArgs
from chemprop.data import get_data, MoleculeDataLoader, StandardScaler
from chemprop.models import MoleculeModel
from chemprop.utils import get_loss_func, get_metric_func, load_checkpoint

In [None]:
df = pd.read_csv('data/separate_data/GLP_dataset.csv')
df = df[df['dataset_type'] == 'test']
df  = df.drop(columns=['dataset_type'])
df = df.rename(columns={'molecule':'smiles', 'logp':'logP'})
df.to_csv('GLP_dataset(test).csv', index=False)

In [6]:
def evaluate(model: MoleculeModel,
             data_loader: MoleculeDataLoader,
             num_tasks: int,
             metric_func: Callable,
             dataset_type: str,
             scaler: StandardScaler = None,
             logger: logging.Logger = None) -> List[float]:
    """
    Evaluates an ensemble of models on a dataset by making predictions and then evaluating the predictions.

    :param model: A :class:`~chemprop.models.model.MoleculeModel`.
    :param data_loader: A :class:`~chemprop.data.data.MoleculeDataLoader`.
    :param num_tasks: Number of tasks.
    :param metric_func: Metric function which takes in a list of targets and a list of predictions.
    :param dataset_type: Dataset type.
    :param scaler: A :class:`~chemprop.features.scaler.StandardScaler` object fit on the training targets.
    :param logger: A logger to record output.
    :return: A list with the score for each task based on :code:`metric_func`.
    """
    preds = predict(
        model=model,
        data_loader=data_loader,
        scaler=scaler
    )

    targets = data_loader.targets()

    results, results_r2 = evaluate_predictions(
        preds=preds,
        targets=targets,
        num_tasks=num_tasks,
        metric_func=metric_func,
        dataset_type=dataset_type,
        logger=logger
    )

    return results, results_r2

In [7]:
def predict(model: MoleculeModel,
            data_loader: MoleculeDataLoader,
            disable_progress_bar: bool = False,
            scaler: StandardScaler = None,
            args: TrainArgs = None) -> List[List[float]]:
    """
    Makes predictions on a dataset using an ensemble of models.

    :param model: A :class:`~chemprop.models.model.MoleculeModel`.
    :param data_loader: A :class:`~chemprop.data.data.MoleculeDataLoader`.
    :param disable_progress_bar: Whether to disable the progress bar.
    :param scaler: A :class:`~chemprop.features.scaler.StandardScaler` object fit on the training targets.
    :return: A list of lists of predictions. The outer list is molecules while the inner list is tasks.
    """
    model.eval()

    preds = []

    for batch in tqdm(data_loader, disable=disable_progress_bar):
        # Prepare batch
        batch: MoleculeDataset
        if args.additional_encoder:
            substructure_mol_batch = batch.batch_graph(model_type='substructures', args = args)
        mol_batch, features_batch = batch.batch_graph(args = args), batch.features()

        # Make predictions
        with torch.no_grad():
            if args.additional_encoder:        
                batch_preds = model(batch = mol_batch, substructures_batch = substructure_mol_batch, features_batch = features_batch)
            else:
                batch_preds = model(batch = mol_batch, features_batch = features_batch)

        batch_preds = batch_preds.data.cpu().numpy()

        # Inverse scale if regression
        if scaler is not None:
            batch_preds = scaler.inverse_transform(batch_preds)

        # Collect vectors
        batch_preds = batch_preds.tolist()
        preds.extend(batch_preds)

    return preds

In [8]:
class Args(object):
    def __init__(self, adict):
        self.__dict__.update(adict)

In [9]:
Seed = 42

In [10]:
SMILES_COLUMN = 'smiles'
VALUE_COLUMN = 'logP'
PREDS_COLUMN = f'logP_pred_{Seed}'
DATASET_OUTPUT_PATH = '.'
MODEL_PATH = f'GLP_dataset_{Seed}.pt'

In [11]:
model = load_checkpoint(MODEL_PATH, device=torch.device('cpu'))

Loading pretrained parameter "encoder.encoder.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.W_i.weight".
Loading pretrained parameter "encoder.encoder.W_h.weight".
Loading pretrained parameter "encoder.encoder.W_o.weight".
Loading pretrained parameter "encoder.encoder.W_o.bias".
Loading pretrained parameter "substructures_encoder.encoder.cached_zero_vector".
Loading pretrained parameter "substructures_encoder.encoder.W_o.weight".
Loading pretrained parameter "substructures_encoder.encoder.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".


In [12]:
def evaluate_predictions(preds: List[List[float]],
                         targets: List[List[float]],
                         num_tasks: int,
                         metric_func: Callable,
                         dataset_type: str,
                         logger: logging.Logger = None) -> List[float]:
    """
    Evaluates predictions using a metric function after filtering out invalid targets.

    :param preds: A list of lists of shape :code:`(data_size, num_tasks)` with model predictions.
    :param targets: A list of lists of shape :code:`(data_size, num_tasks)` with targets.
    :param num_tasks: Number of tasks.
    :param metric_func: Metric function which takes in a list of targets and a list of predictions.
    :param dataset_type: Dataset type.
    :param logger: A logger to record output.
    :return: A list with the score for each task based on :code:`metric_func`.
    """
    info = logger.info if logger is not None else print

    if len(preds) == 0:
        return [float('nan')] * num_tasks

    # Filter out empty targets
    # valid_preds and valid_targets have shape (num_tasks, data_size)
    valid_preds = [[] for _ in range(num_tasks)]
    valid_targets = [[] for _ in range(num_tasks)]
    for i in range(num_tasks - 1):
        for j in range(len(preds)):
            if targets[j][i] is not None:  # Skip those without targets
                valid_preds[i].append(preds[j][i])
                valid_targets[i].append(targets[j][i])

    # Compute metric
    results = []
    results_r2 = []
    
    for i in range(num_tasks):
        # Skip if all targets or preds are identical, otherwise we'll crash during classification
        if dataset_type == 'classification':
            nan = False
            if all(target == 0 for target in valid_targets[i]) or all(target == 1 for target in valid_targets[i]):
                nan = True
                info('Warning: Found a task with targets all 0s or all 1s')
            if all(pred == 0 for pred in valid_preds[i]) or all(pred == 1 for pred in valid_preds[i]):
                nan = True
                info('Warning: Found a task with predictions all 0s or all 1s')

            if nan:
                results.append(float('nan'))
                continue

        if len(valid_targets[i]) == 0:
            print(valid_targets)
            continue

        if dataset_type == 'multiclass':
            results.append(metric_func(valid_targets[i], valid_preds[i], labels=list(range(len(valid_preds[i][0])))))
        else:
            results.append(metric_func(valid_targets[i], valid_preds[i]))
            results_r2.append(r2_score(valid_targets[i], valid_preds[i]))
    return results, results_r2

In [13]:
args = TrainArgs()
args.load('args.json',skip_unsettable = True)

TrainArgs(prog='ipykernel_launcher.py', usage=None, description=':class:`TrainArgs` includes :class:`CommonArgs` along with additional arguments used for training a Chemprop model.', formatter_class=<class 'argparse.HelpFormatter'>, conflict_handler='error', add_help=True)

In [14]:
args.target_columns = ['logP']
args.target_columns

['logP']

In [15]:
args.separate_test_path = 'GLP_dataset(test).csv'

In [17]:
if args.separate_test_path:
    test_data = get_data(path=args.separate_test_path, args=args, features_path=args.separate_test_features_path)

6301it [00:00, 113210.00it/s]
100%|██████████| 6301/6301 [02:29<00:00, 42.08it/s]
100%|██████████| 6301/6301 [00:00<00:00, 526827.66it/s]


In [18]:
test_smiles, test_targets = test_data.smiles(), test_data.targets()

In [19]:
cache = True
num_workers = 8

In [20]:
test_data_loader = MoleculeDataLoader(
    dataset=test_data,
    batch_size=args.batch_size,
    num_workers=num_workers,
    cache=cache
)

In [21]:
loss_func = get_loss_func(args)
metric_func = get_metric_func(metric=args.metric)

In [22]:
test_data1 = pd.read_csv(args.separate_test_path)

In [23]:
features_scaler = None

In [24]:
scaler = StandardScaler().fit(test_data1['logP'])

In [25]:
test_preds = predict(
            model=model,
            data_loader=test_data_loader,
            scaler=scaler,
            args=args
        )
test_scores, test_r2 = evaluate_predictions(
    preds=test_preds,
    targets=test_targets,
    num_tasks=args.num_tasks,
    metric_func=metric_func,
    dataset_type=args.dataset_type,
)


100%|██████████| 127/127 [00:28<00:00,  4.38it/s]

[[]]





In [26]:
ERROR_COLUMN = 'Absolute Error'
SMILES_COLUMN = 'smiles'
VALUE_COLUMN = 'logP'
PREDS_COLUMN = f'logP_pred{Seed}'

In [27]:
test_predictions = pd.DataFrame(columns=[SMILES_COLUMN,VALUE_COLUMN,PREDS_COLUMN])

In [28]:
test_predictions[SMILES_COLUMN] = test_smiles
test_predictions[VALUE_COLUMN] = np.array(test_targets, dtype=float).reshape(-1)

In [29]:
test_preds = [[i[0]] for i in test_preds]
test_preds

[[2.699678517023729],
 [4.427489283682412],
 [3.524410489703296],
 [5.32848237181174],
 [4.018561559683117],
 [8.36797741312621],
 [9.273790821680855],
 [7.214068611475312],
 [7.028449887695384],
 [5.810728190348991],
 [5.366288885795509],
 [3.2246715769045764],
 [1.7436930992225181],
 [4.758968942655631],
 [7.734951789747983],
 [2.9397134049738756],
 [5.596484707826138],
 [7.9018259460530675],
 [7.454307357336086],
 [5.860320685947234],
 [7.831134314636351],
 [2.9228201730828083],
 [2.1778756881472683],
 [6.78938867869674],
 [4.507802098795606],
 [3.4006194001687953],
 [4.22335530888738],
 [2.9172851181050152],
 [5.529786088747484],
 [1.9543112969864107],
 [2.089051126160169],
 [1.8635821449319112],
 [2.4463902274276617],
 [1.6433118404943956],
 [4.59818512829283],
 [0.32991723144991214],
 [1.321259425401841],
 [1.4806968426003415],
 [1.5758832622718622],
 [0.5354201560197482],
 [0.982591042864994],
 [2.0159556517886372],
 [7.22586166841857],
 [3.582867866888927],
 [3.266175013351258]

In [30]:
test_predictions[PREDS_COLUMN] = np.array(test_preds, dtype=float).reshape(-1)

In [31]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
r2 = r2_score(test_predictions['logP'], test_preds)
rmse = np.sqrt(mean_squared_error(test_predictions['logP'], test_preds))
mae = mean_absolute_error(test_predictions['logP'], test_preds)
round(rmse, 3), round(r2, 3), round(mae, 3)

(0.498, 0.945, 0.293)

In [32]:
test_predictions.to_csv('Seeds_SGNN_PREDICT_GLP_train_GLP_test.csv', index=False)