## Plug-and-Play Checkpoints: Reproduce Table 1 (NovoMolGen-32M)

> *Run this single cell in the notebook; it downloads the checkpoint, samples
> 30000 valid/canonical SMILES, evaluates the six metrics used in Table 1, and
> renders the result as a tidy dataframe.*

In [None]:
import os
from pathlib import Path

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["HF_HOME"] = "/network/scratch/k/kamran.chitsaz/hf_home"
os.chdir(Path(os.getcwd()).parent)

from transformers import AutoTokenizer, AutoModelForCausalLM
from accelerate import Accelerator
import rootutils

rootutils.setup_root(os.getcwd(), indicator=".project-root", pythonpath=True)

from src.eval import MoleculeEvaluator
from src.models import generate_valid_smiles, prepare_hf_model
from src.data_loader.utils import load_valid_and_test_data


In [2]:
# 1.  Load pretrained 32 M checkpoint + tokenizer
tokenizer = AutoTokenizer.from_pretrained("chandar-lab/NovoMolGen_32M_SMILES_AtomWise", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("chandar-lab/NovoMolGen_32M_SMILES_AtomWise", trust_remote_code=True)

acc = Accelerator(mixed_precision='bf16')
model = acc.prepare(model)

# 2.  Load pre-computed dataset stats
df_test, df_valid = load_valid_and_test_data("MolGen/ZINC_1B-raw", "175k")
task = MoleculeEvaluator(
    task_names=['unique@1k', 'IntDiv', 'FCD', 'Frag', 'Scaf', 'SNN'],  
    valid_stats=df_valid, 
    test_stats=df_test,
    n_jobs=2
    )

# 3.  Sample 30000 molecules
smiles_list = []
for _ in range(10):
    outputs = model.sample(
        tokenizer=tokenizer, 
        batch_size=3000, 
        max_length=64, 
        temperature=1.0, 
        top_k=1, 
        top_p=0, 
        )
    smiles_list.extend(outputs['SMILES'])

# 4.  Evaluate & display
metrics = task(smiles_list, filter=True, return_valid_index=True)

import pandas as pd
from IPython.display import display

row = {
    "Validity": metrics["validity"],
    "Unique@1k": metrics["unique@1k"],
    "IntDiv":    metrics["IntDiv"],
    "FCD":       metrics["FCD"]["FCD"],
    "SNN":       metrics["SNN"]["SNN"],
    "Frag":      metrics["Frag"]["Frag"],
    "Scaf":      metrics["Scaf"]["Scaf"],
}

df = pd.DataFrame([row]).round(4)
display(df.style.hide(axis="index").set_caption("NovoMolGen-32 M ─ Table 1 metrics"))

[32m2025-08-07 15:16:13.542[0m | [1mINFO    [0m | [36msrc.eval.molecule_evaluation[0m:[36m__call__[0m:[36m169[0m - [1mFraction of valid molecules: 1.00[0m
[32m2025-08-07 15:16:23.181[0m | [1mINFO    [0m | [36msrc.eval.molecule_evaluation[0m:[36m__call__[0m:[36m185[0m - [1mEvaluating task: unique@1k[0m
[32m2025-08-07 15:16:23.507[0m | [1mINFO    [0m | [36msrc.eval.molecule_evaluation[0m:[36m__call__[0m:[36m185[0m - [1mEvaluating task: IntDiv[0m
[32m2025-08-07 15:16:42.291[0m | [1mINFO    [0m | [36msrc.eval.molecule_evaluation[0m:[36m__call__[0m:[36m185[0m - [1mEvaluating task: FCD[0m
[32m2025-08-07 15:16:55.677[0m | [1mINFO    [0m | [36msrc.eval.molecule_evaluation[0m:[36m__call__[0m:[36m185[0m - [1mEvaluating task: Frag[0m
[32m2025-08-07 15:17:13.951[0m | [1mINFO    [0m | [36msrc.eval.molecule_evaluation[0m:[36m__call__[0m:[36m185[0m - [1mEvaluating task: Scaf[0m
[32m2025-08-07 15:17:22.162[0m | [1mINFO    [0m 

Validity,Unique@1k,IntDiv,FCD,SNN,Frag,Scaf
0.9993,1.0,0.8517,0.0406,0.4655,0.9999,0.5189


### HF Checkpoint

In [4]:
# 1.  Load HF checkpoint (no flash attention)
model = AutoModelForCausalLM.from_pretrained("chandar-lab/NovoMolGen_32M_SMILES_AtomWise", revision='hf-checkpoint', device_map='auto')
tokenizer = AutoTokenizer.from_pretrained("chandar-lab/NovoMolGen_32M_SMILES_AtomWise", revision='hf-checkpoint')
model = prepare_hf_model(model)

# 2.  Load pre-computed dataset stats
df_test, df_valid = load_valid_and_test_data("MolGen/ZINC_1B-raw", "175k")
task = MoleculeEvaluator(
    task_names=['unique@1k', 'IntDiv', 'FCD', 'Frag', 'Scaf', 'SNN'],  
    valid_stats=df_valid, 
    test_stats=df_test,
    n_jobs=2
    )

# 3.  Sample 30000 molecules
smiles_list = []
for _ in range(10):
    outputs = model.sample(
        tokenizer=tokenizer, 
        batch_size=1000, 
        max_length=64, 
        temperature=1.0, 
        top_k=0, 
        top_p=1.0,
        do_sample=True, 
        )
    smiles_list.extend(outputs['SMILES'])

# 4.  Evaluate & display
metrics = task(smiles_list, filter=True, return_valid_index=True)

import pandas as pd
from IPython.display import display

row = {
    "Validity": metrics["validity"],
    "Unique@1k": metrics["unique@1k"],
    "IntDiv":    metrics["IntDiv"],
    "FCD":       metrics["FCD"]["FCD"],
    "SNN":       metrics["SNN"]["SNN"],
    "Frag":      metrics["Frag"]["Frag"],
    "Scaf":      metrics["Scaf"]["Scaf"],
}

df = pd.DataFrame([row]).round(4)
display(df.style.hide(axis="index").set_caption("NovoMolGen-32 M ─ Table 1 metrics"))

[32m2025-09-02 13:48:23.553[0m | [1mINFO    [0m | [36msrc.eval.molecule_evaluation[0m:[36m__call__[0m:[36m169[0m - [1mFraction of valid molecules: 1.00[0m
[32m2025-09-02 13:48:27.322[0m | [1mINFO    [0m | [36msrc.eval.molecule_evaluation[0m:[36m__call__[0m:[36m185[0m - [1mEvaluating task: unique@1k[0m
[32m2025-09-02 13:48:27.590[0m | [1mINFO    [0m | [36msrc.eval.molecule_evaluation[0m:[36m__call__[0m:[36m185[0m - [1mEvaluating task: IntDiv[0m
[32m2025-09-02 13:48:32.663[0m | [1mINFO    [0m | [36msrc.eval.molecule_evaluation[0m:[36m__call__[0m:[36m185[0m - [1mEvaluating task: FCD[0m
[32m2025-09-02 13:48:38.891[0m | [1mINFO    [0m | [36msrc.eval.molecule_evaluation[0m:[36m__call__[0m:[36m185[0m - [1mEvaluating task: Frag[0m
[32m2025-09-02 13:48:45.196[0m | [1mINFO    [0m | [36msrc.eval.molecule_evaluation[0m:[36m__call__[0m:[36m185[0m - [1mEvaluating task: Scaf[0m
[32m2025-09-02 13:48:48.459[0m | [1mINFO    [0m 

Validity,Unique@1k,IntDiv,FCD,SNN,Frag,Scaf
0.9995,1.0,0.8511,0.0979,0.4658,0.9998,0.315
