In [1]:
import os
import json
import numpy as np
import torch
from model import create_dta_toy_model

In [2]:
fasta_vocab_path = "fasta_vocab.json"
with open(fasta_vocab_path, 'r') as f:
    fasta_vocab = json.load(f)

fasta_vocab_size = len(fasta_vocab)
print("Vocab size:", fasta_vocab_size)

Vocab size: 25


In [3]:
smiles_vocab_path = "vocab_chars.json"
with open(smiles_vocab_path, 'r') as f:
    smiles_vocab = json.load(f)

smiles_vocab_size = len(smiles_vocab)
print("Vocab size:", smiles_vocab_size)

Vocab size: 101


In [4]:
def smiles_to_onehot(smiles: str, vocab: dict, max_len: int):
    vocab_size = len(vocab)
    one_hot = np.zeros((max_len, vocab_size), dtype=np.float32)
    pad_id = vocab.get('<pad>', 0)
    unk_id = vocab.get('<unk>', 3)
    
    for i, ch in enumerate(smiles[:max_len]):
        idx = vocab.get(ch, unk_id)
        one_hot[i, idx] = 1.0
        
    for i in range(len(smiles), max_len):
        one_hot[i, pad_id] = 1.0
    return one_hot

In [5]:
def fasta_to_onehot(fasta: str, vocab: dict, max_len: int):
    vocab_size = len(vocab)
    one_hot = np.zeros((max_len, vocab_size), dtype=np.float32)
    pad_id = vocab.get('<pad>')
    unk_id = vocab.get('<unk>')

    for i, ch in enumerate(fasta[:max_len]):
        idx = vocab.get(ch.upper(), unk_id)
        one_hot[i, idx] = 1.0

    for i in range(len(fasta), max_len):
        one_hot[i, pad_id] = 1.0
    return one_hot

In [6]:
model = create_dta_toy_model(
    smiles_vocab_size=smiles_vocab_size, 
    fasta_vocab_size=fasta_vocab_size
)
model.eval()

smiles_seq_len = 256
fasta_seq_len = 1000
smiles_example = "CC[NH+](CC)[C@](C)(CC)[C@H](O)c1cscc1Br"
fasta_example = "MEECWVTEIANGSKDGLDSNPMKDYMILSGPQKTAVAVLCTLLGLLSALENVAVLYLILSSHQLRRKPSYLFIGSLAGADFLASVVFACSFVNFHVFHGVDSKAVFLLKIGSVTMTFTASVGSLLLTAIDRYLCLRYPPSYKALLTRGRALVTLGIMWVLSALVSYLPLMGWTCCPRPCSELFPLIPNDYLLSWLLFIAFLFSGIIYTYGHVLWKAHQHVASLSGHQDRQVPGMARMRLDVRLAKTLGLVLAVLLICWFPVLALMAHSLATTLSDQVKKAFAFCSMLCLINSMVNPVIYALRSGEIRSSAHHCLAHWKKCVRGLGSEAKEEAPRSSVTETEADGKITPWPDSRDLDLSDC"

smiles_onehot = smiles_to_onehot(smiles_example, smiles_vocab, max_len=smiles_seq_len)
fasta_onehot = fasta_to_onehot(fasta_example, fasta_vocab, max_len=fasta_seq_len)

smiles_tensor = torch.from_numpy(smiles_onehot).unsqueeze(0)
fasta_tensor = torch.from_numpy(fasta_onehot).unsqueeze(0)

with torch.no_grad():
    output = model(smiles_tensor, fasta_tensor)

print(f"SMILES Input Shape: {smiles_tensor.shape}")
print(f"FASTA Input Shape:  {fasta_tensor.shape}")
print(f"Output Shape:       {output.shape}")
print(f"Predicted Value:    {output.item():.4f}")

SMILES Input Shape: torch.Size([1, 256, 101])
FASTA Input Shape:  torch.Size([1, 1000, 25])
Output Shape:       torch.Size([1, 1])
Predicted Value:    0.3452


In [7]:
dummy_smiles = torch.zeros(1, smiles_seq_len, smiles_vocab_size, dtype=torch.float32)
dummy_fasta = torch.zeros(1, fasta_seq_len, fasta_vocab_size, dtype=torch.float32)

traced_model = torch.jit.trace(model, (dummy_smiles, dummy_fasta), strict=False, check_trace=False)

traced_path = "dta_model.pt"
traced_model.save(traced_path)

In [8]:
model_name = "dta_model"
platform = "pytorch_libtorch"
max_batch_size = 16

config_content = f"""name: "{model_name}"
platform: "{platform}"
max_batch_size: {max_batch_size}

input [
  {{
    name: "input__0"
    data_type: TYPE_FP32
    dims: [{smiles_seq_len}, {smiles_vocab_size}]
  }},
  {{
    name: "input__1"
    data_type: TYPE_FP32
    dims: [{fasta_seq_len}, {fasta_vocab_size}]
  }}
]

output [
  {{
    name: "output__0"
    data_type: TYPE_FP32
    dims: [1]
  }}
]

instance_group [
  {{
    count: 1
    kind: KIND_CPU
  }}
]

dynamic_batching {{
  preferred_batch_size: [4, 8, 16]
  max_queue_delay_microseconds: 100
}}
"""

# 파일로 저장
config_path = "config.pbtxt"
with open(config_path, "w") as f:
    f.write(config_content)

print(f"Saved Triton config file: {config_path}")
print("\\n--- config.pbtxt content ---")
print(config_content)

Saved Triton config file: config.pbtxt
\n--- config.pbtxt content ---
name: "dta_model"
platform: "pytorch_libtorch"
max_batch_size: 16

input [
  {
    name: "input__0"
    data_type: TYPE_FP32
    dims: [256, 101]
  },
  {
    name: "input__1"
    data_type: TYPE_FP32
    dims: [1000, 25]
  }
]

output [
  {
    name: "output__0"
    data_type: TYPE_FP32
    dims: [1]
  }
]

instance_group [
  {
    count: 1
    kind: KIND_CPU
  }
]

dynamic_batching {
  preferred_batch_size: [4, 8, 16]
  max_queue_delay_microseconds: 100
}



In [9]:
smiles_input_np = smiles_onehot.astype(np.float32)
fasta_input_np = fasta_onehot.astype(np.float32)

smiles_bin_path = "smiles_input.bin"
fasta_bin_path = "fasta_input.bin"

smiles_input_np.tofile(smiles_bin_path)
fasta_input_np.tofile(fasta_bin_path)

print(f"Saved SMILES example input: {smiles_bin_path} (Shape: {smiles_input_np.shape})")
print(f"Saved FASTA example input:  {fasta_bin_path} (Shape: {fasta_input_np.shape})")

Saved SMILES example input: smiles_input.bin (Shape: (256, 101))
Saved FASTA example input:  fasta_input.bin (Shape: (1000, 25))
