In [1]:
import os
import json
import numpy as np
import torch
from model import create_toy_model

In [2]:
# 1) Load vocab and build one-hot tools
vocab_path = "vocab_chars.json"
with open(vocab_path, 'r') as f:
    vocab = json.load(f)

vocab_size = len(vocab)
print("Vocab size:", vocab_size)

# helper: smiles -> one-hot [seq_len, vocab_size]
def smiles_to_onehot(smiles: str, vocab: dict, max_len: int = 50):
    one_hot = np.zeros((max_len, vocab_size), dtype=np.float32)
    for i, ch in enumerate(smiles[:max_len]):
        idx = vocab.get(ch, vocab.get('<unk>', 3))
        one_hot[i, idx] = 1.0
    # pad remainder with <pad>
    pad_id = vocab.get('<pad>', 0)
    for i in range(len(smiles), max_len):
        one_hot[i, pad_id] = 1.0
    return one_hot

print("One-hot helper ready")

Vocab size: 101
One-hot helper ready


In [3]:
# 2) Create model and run a quick forward
model = create_toy_model(vocab_size=vocab_size)
model.eval()

seq_len = 50
smiles = "CC[NH+](CC)[C@](C)(CC)[C@H](O)c1cscc1Br"
one_hot = smiles_to_onehot(smiles, vocab, max_len=seq_len)
input_tensor = torch.from_numpy(one_hot).unsqueeze(0)  # [1, seq_len, vocab_size]

with torch.no_grad():
    out = model(input_tensor)

print("Input:", input_tensor.shape)
print("Output:", out.shape, out.item())

Input: torch.Size([1, 50, 101])
Output: torch.Size([1, 1]) 0.7667984366416931


In [4]:
# 3) Export TorchScript (.pt)
dummy = torch.zeros(1, seq_len, vocab_size, dtype=torch.float32)
traced = torch.jit.trace(model, dummy, strict=False, check_trace=False)
traced_path = "smiles_transformer_regression.pt"
traced.save(traced_path)
print("Saved TorchScript:", traced_path)

# 4) Save example input (.npy)
example_path = "example_input.npy"
np.save(example_path, input_tensor.numpy())
print("Saved example input:", example_path, input_tensor.numpy().shape)

Saved TorchScript: smiles_transformer_regression.pt
Saved example input: example_input.npy (1, 50, 101)
