# Load dataset

In [1]:
from datasets import load_dataset

raw_datasets = load_dataset("mt_eng_vietnamese", 'iwslt2015-vi-en')
dataset = raw_datasets["test"]

sents = []
for item in dataset['translation']:
    en_sent = "en: " + item['en']
    vi_sent = "vi: " + item['vi']
    sents.append(en_sent)
    sents.append(vi_sent)

print(len(sents))

  from .autonotebook import tqdm as notebook_tqdm


2538


In [2]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, i):
        return self.data[i]

In [3]:
dataset = CustomDataset(sents)

# Load model

In [4]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_name = "VietAI/envit5-translation"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="cuda", torch_dtype=torch.float16) # torch.float16 or torch.float32
tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="cuda", torch_dtype=torch.float16)  # torch.float16 or torch.float32

# Inference

In [None]:
import time
from tqdm import tqdm
from transformers.pipelines.pt_utils import KeyDataset

for batch_size in [8]:
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    start = time.time()
    for inputs in tqdm(dataloader):
        outputs = model.generate(tokenizer(inputs, return_tensors="pt", padding=True).input_ids.to('cuda'), max_length=512)
        decode_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    end = time.time()
    
    inference_time = end - start
    num_requests = len(dataset)
    print(f"Batch size: {batch_size}")
    print(f"Total inference time: {round(inference_time, 4)}s")
    print(f"Total sample: {num_requests}")
    print(f"Result: {round(num_requests / inference_time)} sample/s")
    print('---------------------------------------------------------')