In [1]:
import os
import sys
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

In [2]:
from transformers import AutoModelForSeq2SeqLM, T5TokenizerFast
import torch

class AMRToTextSan:
    def __init__(self, model_path):
        # self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.device = "cpu"
        
        # Initialize tokenizer and model
        self.tokenizer = T5TokenizerFast.from_pretrained(
            os.path.join(model_path, 'tokenizer')
        )
        self.model = AutoModelForSeq2SeqLM.from_pretrained(
            os.path.join(model_path, 'model')
        ).to(self.device)
        
        # Set generation parameters
        self.max_seq_len_sent = 512
        self.num_beams = 5

    def __call__(self, amr_string: str) -> str:
        # Prepare input
        inputs = self.tokenizer(
            amr_string,
            max_length=self.max_seq_len_sent,
            padding=False,
            truncation=True,
            return_tensors="pt"
        ).to(self.device)
        
        # Generate
        outputs = self.model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            num_beams=self.num_beams,
            max_length=self.max_seq_len_sent,
            early_stopping=True,
            pad_token_id=self.tokenizer.pad_token_id,
            eos_token_id=self.tokenizer.eos_token_id
        )
        
        # Decode output
        generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return generated_text

# # Contoh penggunaan
# if __name__ == "__main__":
#     text_to_amr = TextToAMRSan()
#     amr_to_text = AMRToTextSan("path/to/saved/indot5/model")
    
#     sent1 = "Presiden Jokowi akan memberikan pidato penting di Istana Negara besok pagi."
    
#     # Text -> AMR -> Text
#     amr_graph = text_to_amr(sent1)
#     amr_string = penman.encode(amr_graph)
#     generated_text = amr_to_text(amr_string)
    
#     print("Original:", sent1)
#     print("AMR:", amr_string)
#     print("Generated:", generated_text)

In [3]:
amr_to_text = AMRToTextSan("model/amr2text/best_model")

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
2025-03-05 16:01:57.649648: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-05 16:01:57.652616: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-05 16:01:57.726230: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is o