In [None]:
!pip install tensorflow transformers

In [None]:
import pandas as pd

# Load your dataset
data = pd.read_csv("/kaggle/input/cleaned-amazon-reviews/your_file.csv")

In [None]:
from transformers import T5Tokenizer, TFT5ForConditionalGeneration
model_name = 't5-large'
t5_model = TFT5ForConditionalGeneration.from_pretrained(model_name)
t5_tokenizer = T5Tokenizer.from_pretrained(model_name)

def summarize_t5(text, model, tokenizer, max_length=100, num_beams=5):
    # T5 uses a prefix like "summarize: "
    inputs = tokenizer("summarize: " + text, return_tensors="pt", max_length=512, truncation=True)
    summary_ids = model.generate(inputs['input_ids'], max_length=max_length, num_beams=num_beams, early_stopping=True)
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

In [None]:
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
model_name='google/pegasus-xsum'
p_tokenizer = PegasusTokenizer.from_pretrained(model_name)
p_model = PegasusForConditionalGeneration.from_pretrained(model_name)

def summarize_pegasus(text, model, tokenizer, max_length=100, num_beams=5):
    # PEGASUS doesn't need a specific prefix
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
    summary_ids = model.generate(inputs['input_ids'], max_length=max_length, num_beams=num_beams, early_stopping=True)
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

In [None]:
from transformers import BartTokenizer, BartForConditionalGeneration
model_name='facebook/bart-large-cnn'
b_tokenizer = BartTokenizer.from_pretrained(model_name)
b_model = BartForConditionalGeneration.from_pretrained(model_name)

def summarize_bart(text, model, tokenizer, max_length=100, num_beams=5):
    # BART doesn't need a specific prefix
    inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(inputs['input_ids'], max_length=max_length, num_beams=num_beams, early_stopping=True)
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

In [None]:
from keras.models import load_model
import sys

# Add the directory containing attention.py to the Python path
sys.path.append('/kaggle/input/attention-layer')

# Now import the AttentionLayer
from attention import AttentionLayer

# Load the trained seq2seq model
model_path = '/kaggle/input/seq2seq-model/summary.h5'
model = load_model(model_path, custom_objects={'AttentionLayer': AttentionLayer})

In [None]:
for i in range(5):
    review = data['Text'][i]
    p_review = data['cleaned_text'][i]
    dataset_summ = data['Summary'][i]
    
    t5_summary = summarize_t5(review, t5_model, t5_tokenizer)
    p_summary = summarize_pegasus(review, p_model, p_tokenizer)
    b_summary = summarize_bart(review, b_model, b_tokenizer)
    
    p_t5_summary = summarize_t5(p_review, t5_model, t5_tokenizer)
    p_p_summary = summarize_pegasus(p_review, p_model, p_tokenizer)
    p_b_summary = summarize_bart(p_review, b_model, b_tokenizer)

    # Original Review
    print("Review:")
    print(review)
    print()

    # Dataset's Summary
    print("Dataset's summary:")
    print(dataset_summ)
    print()

    # T5 Summary
    print("T5 summary:")
    print(t5_summary)
    print("T5 summary (preprocessed):")
    print(p_t5_summary)
    print()

    # PEGASUS Summary
    print("PEGASUS summary:")
    print(p_summary)
    print("PEGASUS summary (preprocessed):")
    print(p_p_summary)
    print()

    # BART Summary
    print("BART summary:")
    print(b_summary)
    print("BART summary (preprocessed):")
    print(p_b_summary)
    print()

In [None]:
# THIS BLOCK CONCATS ALL TEN REVIEWS AND THEN SUMMARIZES

import pandas as pd

# Load the CSV file
file_path = '/kaggle/input/yelp-reviews/5_business_10_review.csv'
data = pd.read_csv(file_path)

# Process the data
for business_name in data['Business Name'].unique():
    business_reviews = data[data['Business Name'] == business_name]['Reviews']
    concatenated_reviews = " ".join(business_reviews)

    # Generate summaries
    t5_summary = summarize_t5(concatenated_reviews, t5_model, t5_tokenizer)
    pegasus_summary = summarize_pegasus(concatenated_reviews, p_model, p_tokenizer)
    bart_summary = summarize_bart(concatenated_reviews, b_model, b_tokenizer)

    # Print the results
    print(f"Reviews: ")
    print(concatenated_reviews)
    print(f"Business: {business_name}")
    print("T5 Summary:", t5_summary)
    print("PEGASUS Summary:", pegasus_summary)
    print("BART Summary:", bart_summary)
    print("\n---\n")

In [None]:
# THIS BLOCK SUMMARIZES EACH REVIEW, CONCATS THE 10 SUMMARIES, 
# AND THEN SUMMARIZES THE SUMMARIES INTO ONE

for business_name in data['Business Name'].unique():
    business_reviews = data[data['Business Name'] == business_name]['Reviews']

    # Print all reviews for the business
    print(f"Business: {business_name}\nAll Reviews:")
    for review in business_reviews:
        print(review)

    # Individual summaries for each model
    print("\n--- Individual Summaries (T5) ---")
    t5_summaries = [summarize_t5(review, t5_model, t5_tokenizer) for review in business_reviews]
    for summary in t5_summaries:
        print(summary)

    print("\n--- Individual Summaries (PEGASUS) ---")
    pegasus_summaries = [summarize_pegasus(review, p_model, p_tokenizer) for review in business_reviews]
    for summary in pegasus_summaries:
        print(summary)

    print("\n--- Individual Summaries (BART) ---")
    bart_summaries = [summarize_bart(review, b_model, b_tokenizer) for review in business_reviews]
    for summary in bart_summaries:
        print(summary)

    # Concatenate and summarize the individual summaries for each model
    print("\n--- Final Summaries ---")
    print("T5 Final Summary:", summarize_t5(" ".join(t5_summaries), t5_model, t5_tokenizer))
    print("PEGASUS Final Summary:", summarize_pegasus(" ".join(pegasus_summaries), p_model, p_tokenizer))
    print("BART Final Summary:", summarize_bart(" ".join(bart_summaries), b_model, b_tokenizer))
    print("\n=============================\n")