In [1]:
# General imports
import yaml
import numpy as np
import torch
import fsspec

# Current project imports
import graphium
from graphium.config._loader import load_datamodule, load_trainer
from graphium.trainer.predictor import PredictorModule


Using backend: pytorch


In [2]:
# Path containing the model and its configurations
MODEL_PATH = "https://storage.valencelabs.com/graphium/pretrained-models/graphium-zinc-micro-dummy-test"
MODEL_FILE = f"{MODEL_PATH}/model.ckpt"
CONFIG_FILE = f"{MODEL_PATH}/configs.yaml"

# Path containing the SMILES data to infer
SMILES_DF_PATH = f"https://storage.valencelabs.com/graphium/datasets/graphium-zinc-bench-gnn/smiles_score.csv.gz"
SMILES_COL = "SMILES"

# Number of layers to drop when inferring the fingerprints
NUM_LAYERS_TO_DROP = 1

In [3]:
# Load the configuration file of the trained model
with fsspec.open(CONFIG_FILE, "rb") as f:
    cfg = yaml.safe_load(f)

# Overwrite configurations of the datamodule
cfg["datamodule"]["module_type"] = "DGLFromSmilesDataModule"
args = cfg["datamodule"]["args"]
cfg["datamodule"]["args"] = {
        "df_path": SMILES_DF_PATH,
        "smiles_col": SMILES_COL,
        "label_cols": [],
        "featurization": args["featurization"],
    }

# Load and initialize the dataset
datamodule = load_datamodule(cfg)

In [4]:
# Load the model, drop the layers, and load the trainer
predictor = PredictorModule.load_from_checkpoint(MODEL_FILE)
predictor.model.drop_graph_output_nn_layers(num_layers_to_drop=NUM_LAYERS_TO_DROP)
trainer = load_trainer(cfg)

predictor

AssertionError: 

In [None]:
# Run the model prediction, and concatenate the batched results
preds = trainer.predict(model=predictor, datamodule=datamodule)
if isinstance(preds[0], torch.Tensor):
    preds = [p.detach().cpu().numpy() for p in preds]
preds = np.concatenate(preds, axis=0)

preds

In [None]:
preds.shape