In [44]:
from transformers.models.auto import AutoConfig, AutoModelForSeq2SeqLM
import torch

model_dir = "../raw-files/"

config = AutoConfig.from_pretrained(
    model_dir + "config.json",
    #cache_dir=cache_dir,
    max_length=512,
    num_beams=4,
    num_beam_groups=1,
    diversity_penalty=0.0,
    # Do note that this is needed because the encoder and decoder have weights dependent on each other.
    # This causes the Torchscript encoder and decoder to be separated and thus unable to be further trained.
    torchscript=True,
)


In [45]:
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_dir + "pytorch_model.bin",
    config=config,
    #torchscript=True,
)
# Switch the model to eval model
model.eval()

T5ForConditionalGeneration(
  (shared): Embedding(32102, 1024)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32102, 1024)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=4096, bias=False)
              (k): Linear(in_features=1024, out_features=4096, bias=False)
              (v): Linear(in_features=1024, out_features=4096, bias=False)
              (o): Linear(in_features=4096, out_features=1024, bias=False)
              (relative_attention_bias): Embedding(32, 32)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=1024, out_features=16384, bias=False)
              (wo): Linear(in_features=16384, out_features=1024, bias=False)
              

In [46]:
print(dir(model))



In [47]:
from transformers.models.auto import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_dir, use_fast=True)

model.resize_token_embeddings(len(tokenizer))

text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)

segments_ids = [1 for i in range(len(indexed_tokens))]

# Creating a dummy input
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensor = torch.tensor([segments_ids])

In [48]:
# An example input you would normally provide to your model's forward() method.
#example = torch.rand(1, 3, 224, 224)
#example = torch.rand(32102, 1024)
#print(example)


# Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing.
traced_script_module = torch.jit.trace(model, [tokens_tensor, segments_tensor, tokens_tensor])

# Save the TorchScript model
traced_script_module.save("picard_model.pt")