In [None]:
import pandas as pd
import nltk
from transformers import pipeline

# Import project modules (from src folder)
from src.extractive_summarizer import extractive_summary
from src.abstractive_summarizer import abstractive_summary
from src.evaluation import evaluate_summary

# Download tokenizer (needed for sentence splitting)
nltk.download('punkt')

print("✅ All libraries imported successfully!")


# Load the sample text data
df = pd.read_csv('../data/sample_texts.csv')
print("📂 Data loaded successfully!\n")
print(df.head())



# Select one sample text
text = df.loc[0, 'text']

print("📝 Original Text:\n", text)

# ---------- Extractive Summarizer ----------
extractive_result = extractive_summary(text, num_sentences=2)
print("\n✂️ Extractive Summary:\n", extractive_result)

# ---------- Abstractive Summarizer ----------
abstractive_result = abstractive_summary(text, min_length=20, max_length=60)
print("\n🤖 Abstractive Summary:\n", abstractive_result)



scores = evaluate_summary(text, abstractive_result)
print("\n📈 Evaluation Scores (ROUGE):\n", scores)



results = []

for i, row in df.iterrows():
    extractive = extractive_summary(row['text'], num_sentences=2)
    abstractive = abstractive_summary(row['text'], min_length=20, max_length=60)
    results.append({
        'id': row['id'],
        'original_text': row['text'],
        'extractive_summary': extractive,
        'abstractive_summary': abstractive
    })

summary_df = pd.DataFrame(results)
print("\n✅ Summaries generated for all texts!\n")
print(summary_df.head())


summary_df.to_csv('../data/generated_summaries.csv', index=False)
print("\n💾 Summaries saved successfully to 'data/generated_summaries.csv'!")

print("\n🎉 All tasks completed successfully!")
