In [2]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer

# Verify GPU availability
print("CUDA available:", torch.cuda.is_available())
print("GPU:", torch.cuda.get_device_name(0))

# Load the original tokenizer and model
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

# Read prompts from dataset.txt
file_path = "dataset.txt"
lines = open(file_path, 'r', encoding='utf-8').read().splitlines()
prompts = [line.split('|')[0].strip() for line in lines if '|' in line]

# Process each prompt and generate the result
for input_text in prompts:
    inputs = tokenizer(input_text, return_tensors='pt', padding='max_length', max_length=50, truncation=True).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

    # Generate response with attention mask and additional parameters
    with torch.no_grad():
        outputs = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],  # Pass the attention mask
            max_length=50,
            pad_token_id=tokenizer.eos_token_id,
            num_beams=5,  # Use beam search to potentially improve generation quality
            early_stopping=True,
            do_sample=True,  # Enable sampling
            temperature=0.7,  # Control randomness
            top_k=50,  # Limit to top-k tokens
            top_p=0.9  # Nucleus sampling
        )
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    print(f"Input text: {input_text}")
    print(f"Generated response: {response}")
    print("-" * 50)  # Separator for readability


CUDA available: True
GPU: NVIDIA GeForce RTX 3060 Laptop GPU


You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Input text: turn on my kitchen light
Generated response: turn on my light
--------------------------------------------------
Input text: turn off my kitchen light
Generated response: turn off my kitchen light
--------------------------------------------------
Input text: turn on the living room light
Generated response: turn on the light
--------------------------------------------------
Input text: turn off the living room light
Generated response: turn off the light
--------------------------------------------------
