# 📘 AI Text Summarizer using BART (Jupyter Notebook Version)

In [ ]:
# 📌 Install Required Libraries
!pip install transformers textstat PyPDF2 python-docx docx2txt

In [ ]:
# 📦 Imports
from transformers import BartTokenizer, BartForConditionalGeneration
import PyPDF2
import docx2txt
import textstat
import re

In [ ]:
# ✅ Load BART model and tokenizer
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")

In [ ]:
# 📁 Load or Input Text
def read_file(filepath):
    text = ""
    if filepath.endswith(".txt"):
        with open(filepath, "r", encoding="utf-8") as f:
            text = f.read()
    elif filepath.endswith(".pdf"):
        with open(filepath, "rb") as f:
            reader = PyPDF2.PdfReader(f)
            for page in reader.pages:
                text += page.extract_text()
    elif filepath.endswith(".docx"):
        text = docx2txt.process(filepath)
    else:
        print("Unsupported file type.")
    return text

choice = input("Do you want to load a file? (yes/no): ").lower()
if choice == "yes":
    path = input("Enter full path to the file (.txt, .pdf, .docx): ")
    user_input = read_file(path)
else:
    user_input = input("Paste or type your text:\n")

In [ ]:
# 🎨 Set Parameters
tone = input("Choose tone/style (Default, Formal, Informal, Academic, Concise): ")
manual_keywords = input("Enter keywords to highlight (comma-separated): ")
summary_length = int(input("Enter desired summary length (in words, 30–200): "))
token_length = int(summary_length * 1.33)
three_line_mode = input("Enable 3-line summary? (yes/no): ").strip().lower() == "yes"

In [ ]:
# 🧠 Generate Summary
input_text = user_input.strip().replace("\n", " ")
tone_instruction = f"Summarize in a {tone.lower()} tone: " if tone.lower() != "default" else ""
full_input = tone_instruction + input_text
inputs = tokenizer([full_input], max_length=1024, truncation=True, return_tensors="pt")

if three_line_mode:
    summary_ids = model.generate(inputs["input_ids"], max_length=60, min_length=30, num_beams=4, length_penalty=2.0, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    summary_sentences = re.split(r'(?<=[.!?]) +', summary)
    summary = ' '.join(summary_sentences[:3])
else:
    summary_ids = model.generate(inputs["input_ids"], max_length=token_length + 40, min_length=token_length, num_beams=4, length_penalty=2.0, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

In [ ]:
# 🔍 Highlight Keywords
if manual_keywords:
    keywords = [kw.strip() for kw in manual_keywords.split(',')]
    for kw in keywords:
        summary = re.sub(f"(?i)({re.escape(kw)})", r"**\1**", summary)

In [ ]:
# 📊 Output Summary & Stats
print("\n✨ Summary:\n")
print(summary)

with open("summary.txt", "w", encoding="utf-8") as f:
    f.write(summary)
print("\n📥 Summary saved to summary.txt")

original_len = len(user_input.split())
summary_len = len(summary.split())
compression = round(100 * (1 - summary_len / original_len), 1) if original_len > 0 else 0
print(f"\n📊 Original Words: {original_len} | Summary Words: {summary_len} | Compression: {compression}%")

print("\n🧠 Readability Metrics:")
print(f"• Flesch Reading Ease: {textstat.flesch_reading_ease(user_input):.2f}")
print(f"• Flesch-Kincaid Grade: {textstat.flesch_kincaid_grade(user_input):.2f}")
print(f"• Gunning Fog Index: {textstat.gunning_fog(user_input):.2f}")