In [None]:
import pandas as pd

X_test = pd.read_csv("/content/drive/MyDrive/summarizer/assets/data/X_test.csv")
y_test = pd.read_csv("/content/drive/MyDrive/summarizer/assets/data/y_test.csv")

test_data = pd.DataFrame({'review': X_test['X'], 'summary': y_test['y']})

In [None]:
from transformers import T5ForConditionalGeneration, AutoTokenizer
from peft import PeftModel

# Load the model and tokenizer
base_model = "google-t5/t5-base"
peft_model = "jaltareyr/t5-base-summarization"
base_model = T5ForConditionalGeneration.from_pretrained(base_model)
tokenizer = AutoTokenizer.from_pretrained(base_model)

model = PeftModel.from_pretrained(base_model, peft_model)

# Summarization function
def summarize(text):
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
    summary_ids = model.generate(
        input_ids=inputs['input_ids'],
        max_length=128,
        min_length=16,
        length_penalty=2.0,
        repetition_penalty=2.0,
        num_beams=2,
        early_stopping=True
    )
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Check if GPU is available and move the model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Summarization function
def summarize(text):
    # Tokenize input and move to the same device as the model
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True).to(device)
    summary_ids = model.generate(
        input_ids=inputs['input_ids'],
        max_length=128,
        min_length=24,
        length_penalty=2.0,
        repetition_penalty=2.0,
        num_beams=2,
        early_stopping=True
    )
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Example usage
text = test_dataset[20]['review']
summary = summarize(text)
print(summary)
