In [None]:
import torch

from transformers import T5ForConditionalGeneration, T5Tokenizer
import torch
if torch.cuda.is_available():       
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

model = T5ForConditionalGeneration.from_pretrained("NlpHUST/t5-en-vi-base")
tokenizer = T5Tokenizer.from_pretrained("NlpHUST/t5-en-vi-base")
model.to(device)

src = "I'm feeling under the weather today"
tokenized_text = tokenizer.encode(src, return_tensors="pt").to(device)
print(tokenized_text)
model.eval()
summary_ids = model.generate(
                    tokenized_text,
                    max_length=128, 
                    num_beams=5,
                    repetition_penalty=2.5, 
                    length_penalty=1.0, 
                    early_stopping=True
                )
output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print(output)

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(count_parameters(model))

## Reading data file, saving result

In [None]:
with open('tst2013.en','r') as f:
    eng_txt = f.read().splitlines()
print(eng_txt)

In [None]:
with open('tst2013.vi','r',encoding = 'utf8') as fv:
    vie_txt = fv.read().splitlines()
print(vie_txt)

In [None]:
# src = "Good weather today ."
src = eng_txt[3]
tokenized_text = tokenizer.encode(src, return_tensors="pt").to(device)
model.eval()
summary_ids = model.generate(
                    tokenized_text,
                    max_length=128, 
                    num_beams=5,
                    repetition_penalty=2.5, 
                    length_penalty=1.0, 
                    early_stopping=True
                )
output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print(output)

In [None]:
## Legacy original prediction and save code
# from tqdm import tqdm
# with open('tst2013_original.pred','w',encoding = 'utf8') as fp:
#     for t in tqdm(eng_txt[:5]):
#         src = t
#         tokenized_text = tokenizer.encode(src, return_tensors="pt").to(device)
#         model.eval()
#         summary_ids = model.generate(
#                             tokenized_text,
#                             max_length=128, 
#                             num_beams=5,
#                             repetition_penalty=2.5, 
#                             length_penalty=1.0, 
#                             early_stopping=True
#                         )
#         output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
#         fp.write(output + '\n')

In [None]:
from tqdm import tqdm
results = []
for t in tqdm(eng_txt[:]):
    src = t
    tokenized_text = tokenizer.encode(src, return_tensors="pt").to(device)
    model.eval()
    summary_ids = model.generate(
                        tokenized_text,
                        max_length=128, 
                        num_beams=5,
                        repetition_penalty=2.5, 
                        length_penalty=1.0, 
                        early_stopping=True
                    )
    output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    results.append(output)


In [None]:
with open('tst2013.pred','w',encoding = 'utf8') as fp:
    for result in results:
        fp.write(result + '\n')

In [None]:
with open("tst2013.pred",'r',encoding = 'utf8') as fp:
    pred_txt = fp.read().splitlines()

## Saving model

In [None]:
save_file_path = ('models/model.pth')  #save with fp32
torch.save(model.state_dict(), save_file_path)

In [3]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer,AutoConfig

# Load the tokenizer
tokenizer = T5Tokenizer.from_pretrained("NlpHUST/t5-en-vi-base")

# Initialize the model with the architecture

config = AutoConfig.from_pretrained("NlpHUST/t5-en-vi-base")
new_model = T5ForConditionalGeneration(config=config)

# Load the weights from the .pth file
weights_path = "models/model.pth"
state_dict = torch.load(weights_path, map_location=torch.device('cpu'))  # Load the weights onto CPU

# Load the state_dict onto the model
new_model.load_state_dict(state_dict)

# Move the model to the desired device
device = "cpu"  # Change this to "cuda" if you want to use GPU
new_model.to(device)

# Iterate through the parameters and print their datatype
param_dtype_list = []
for name, param in new_model.named_parameters():
    # print(f"Parameter '{name}' has datatype: {param.dtype}")
    if param.dtype not in param_dtype_list:
        param_dtype_list.append(param.dtype)
print(param_dtype_list)



You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


[torch.float32]


In [None]:
# Test loaded model
# src = "I'm feeling under the weather today"
# tokenized_text = tokenizer.encode(src, return_tensors="pt").to(device)
# print(tokenized_text)
# new_model.eval()
# summary_ids = new_model.generate(
#                     tokenized_text,
#                     max_length=128, 
#                     num_beams=5,
#                     repetition_penalty=2.5, 
#                     length_penalty=1.0, 
#                     early_stopping=True
#                 )
# output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
# print(output)

In [4]:
import torch.quantization
	
quantized_model = torch.quantization.quantize_dynamic(new_model, {torch.nn.Linear}, dtype=torch.qint8)

In [7]:
#Test quantized model
quantized_model.to(device)
device = "cuda"
src = "I'm feeling under the weather today"
tokenized_text = tokenizer.encode(src, return_tensors="pt").to(device)
print(tokenized_text)
quantized_model.eval()
summary_ids = quantized_model.generate(
                    tokenized_text,
                    max_length=128, 
                    num_beams=5,
                    repetition_penalty=2.5, 
                    length_penalty=1.0, 
                    early_stopping=True
                )
output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print(output)

tensor([[  336,   277,   282, 28478,  1711,   287, 40864,  7883,     1]],
       device='cuda:0')


NotImplementedError: Could not run 'quantized::linear_dynamic' with arguments from the 'CUDA' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'quantized::linear_dynamic' is only available for these backends: [CPU, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, Tracer, AutocastCPU, AutocastCUDA, FuncTorchBatched, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PythonDispatcher].

CPU: registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp:656 [kernel]
BackendSelect: fallthrough registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/core/PythonFallbackKernel.cpp:140 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/functorch/DynamicLayer.cpp:488 [backend fallback]
Functionalize: registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/FunctionalizeFallbackKernel.cpp:291 [backend fallback]
Named: registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/ConjugateFallback.cpp:18 [backend fallback]
Negative: registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/native/NegateFallback.cpp:18 [backend fallback]
ZeroTensor: registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/core/VariableFallbackKernel.cpp:64 [backend fallback]
AutogradOther: fallthrough registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/core/VariableFallbackKernel.cpp:35 [backend fallback]
AutogradCPU: fallthrough registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/core/VariableFallbackKernel.cpp:39 [backend fallback]
AutogradCUDA: fallthrough registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/core/VariableFallbackKernel.cpp:47 [backend fallback]
AutogradXLA: fallthrough registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/core/VariableFallbackKernel.cpp:51 [backend fallback]
AutogradMPS: fallthrough registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/core/VariableFallbackKernel.cpp:59 [backend fallback]
AutogradXPU: fallthrough registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/core/VariableFallbackKernel.cpp:43 [backend fallback]
AutogradHPU: fallthrough registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/core/VariableFallbackKernel.cpp:68 [backend fallback]
AutogradLazy: fallthrough registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/core/VariableFallbackKernel.cpp:55 [backend fallback]
Tracer: registered at /opt/conda/conda-bld/pytorch_1666643016022/work/torch/csrc/autograd/TraceTypeManual.cpp:296 [backend fallback]
AutocastCPU: fallthrough registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/autocast_mode.cpp:482 [backend fallback]
AutocastCUDA: fallthrough registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/autocast_mode.cpp:324 [backend fallback]
FuncTorchBatched: registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:743 [backend fallback]
FuncTorchVmapMode: fallthrough registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/functorch/VmapModeRegistrations.cpp:28 [backend fallback]
Batched: registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/BatchingRegistrations.cpp:1064 [backend fallback]
VmapMode: fallthrough registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/functorch/TensorWrapper.cpp:189 [backend fallback]
PythonTLSSnapshot: registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/core/PythonFallbackKernel.cpp:148 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/functorch/DynamicLayer.cpp:484 [backend fallback]
PythonDispatcher: registered at /opt/conda/conda-bld/pytorch_1666643016022/work/aten/src/ATen/core/PythonFallbackKernel.cpp:144 [backend fallback]


In [None]:

# Inference from quantized model
device = "cuda"
quantized_model.to(device)
from tqdm import tqdm
results = []
for t in tqdm(eng_txt[:]):
    src = t
    tokenized_text = tokenizer.encode(src, return_tensors="pt").to(device)
    quantized_model.eval()
    summary_ids = quantized_model.generate(
                        tokenized_text,
                        max_length=128, 
                        num_beams=5,
                        repetition_penalty=2.5, 
                        length_penalty=1.0, 
                        early_stopping=True
                    )
    output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    results.append(output)

In [None]:
save_file_path = ('quantized_models/model.pth')  #save with fp32
torch.save(quantized_model.state_dict(), save_file_path)

In [None]:
# # Saving model config (for architecture)
# from transformers import AutoConfig
# import json

# # Load the model's configuration
# config = AutoConfig.from_pretrained("NlpHUST/t5-en-vi-base")

# # Convert the configuration to a dictionary
# config_dict = config.to_dict()

# # Define the path to save the JSON file
# json_path = "model_config.json"

# # Write the configuration dictionary to a JSON file
# with open(json_path, 'w') as json_file:
#     json.dump(config_dict, json_file, indent=4)

## BLEU Score


In [None]:
def split_string(string):
    return string.split()
predictions = list(map(split_string, pred_txt))
references = list(map(split_string,vie_txt[:]))
print(predictions)
print(references)

In [None]:
import nltk
from tqdm import tqdm
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

ref_txt = vie_txt[:]
ref_len=len(ref_txt)
pred_len=len(pred_txt)
chencherry = SmoothingFunction()
# assert ref_len==pred_len

bleu=0
for i in tqdm(range(ref_len)): 
    # Only consider Unigram BLEU score here (n = 1)
    sent_bleu = sentence_bleu([ref_txt[i].strip().split()],pred_txt[i].strip().split(),smoothing_function = chencherry.method3,weights = (1,0,0,0))
    bleu+=sent_bleu
    
print('\nAverage Unigram BLEU Score:',bleu/float(ref_len))