# 🧠📚 Multilingual AI Text Summarizer - BART Based

In [None]:
from transformers import BartTokenizer, BartForConditionalGeneration
import PyPDF2
import docx2txt
import textstat
import re
from googletrans import Translator

# 🔄 Load Model and Tokenizer
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
translator = Translator()

## 🌐 Language Selection
Choose the language of your input text.

In [None]:
lang_dict = {
    "English": "en", "Hindi": "hi", "French": "fr", "Spanish": "es",
    "German": "de", "Chinese (Simplified)": "zh-cn", "Tamil": "ta", "Arabic": "ar"
}

print("Supported Languages:")
for lang in lang_dict:
    print(f"- {lang}")

selected_lang = input("🔤 Enter the language of your input text (e.g., English, Hindi): ").strip()
selected_lang_code = lang_dict.get(selected_lang, "en")

## 📄 Input Text or File Upload
You can either paste your text below or load it from a `.txt`, `.pdf`, or `.docx` file.

In [None]:
from pathlib import Path

def extract_text_from_file(filepath):
    file_text = ""
    ext = Path(filepath).suffix.lower()

    if ext == ".txt":
        with open(filepath, "r", encoding="utf-8") as f:
            file_text = f.read()
    elif ext == ".pdf":
        with open(filepath, "rb") as f:
            reader = PyPDF2.PdfReader(f)
            for page in reader.pages:
                file_text += page.extract_text()
    elif ext == ".docx":
        file_text = docx2txt.process(filepath)
    else:
        print("❌ Unsupported file type.")

    return file_text

input_choice = input("📌 Do you want to paste text or upload a file? (Enter 'text' or 'file'): ").strip().lower()

if input_choice == "file":
    file_path = input("📁 Enter the full path to your file: ").strip()
    user_input = extract_text_from_file(file_path)
else:
    user_input = input("✍️ Paste your text here:\n")

## 🎨 Summary Style and Length
Choose how you want your summary to sound and how long it should be.

In [None]:
tone = input("🎭 Choose tone (Default, Formal, Informal, Academic, Concise): ").strip().capitalize()
if tone not in ["Default", "Formal", "Informal", "Academic", "Concise"]:
    tone = "Default"

manual_keywords = input("🔍 Enter keywords/phrases to highlight (comma-separated, optional): ").strip()

try:
    summary_length = int(input("📏 Desired summary length in words (30 - 200): ").strip())
    summary_length = max(30, min(summary_length, 200))
except ValueError:
    summary_length = 60

token_length = int(summary_length * 1.33)

three_line_mode = input("📌 Generate 3-line summary only? (yes/no): ").strip().lower() == "yes"

## 🚀 Generate Summary
Running the translation and summarization pipeline...

In [None]:
if user_input.strip():
    print("🔄 Translating & Summarizing...")

    input_translated = translator.translate(user_input.strip(), src=selected_lang_code, dest="en").text

    tone_instruction = f"Summarize in a {tone.lower()} tone: " if tone != "Default" else ""
    full_input = tone_instruction + input_translated.replace("\n", " ")

    inputs = tokenizer([full_input], max_length=1024, truncation=True, return_tensors="pt")

    if three_line_mode:
        summary_ids = model.generate(inputs["input_ids"], max_length=60, min_length=30,
                                     num_beams=4, length_penalty=2.0, early_stopping=True)
    else:
        summary_ids = model.generate(inputs["input_ids"], max_length=token_length + 40, min_length=token_length,
                                     num_beams=4, length_penalty=2.0, early_stopping=True)

    summary_en = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    if three_line_mode:
        summary_sentences = re.split(r'(?<=[.!?]) +', summary_en)
        summary_en = ' '.join(summary_sentences[:3])

    if manual_keywords:
        keywords = [kw.strip() for kw in manual_keywords.split(',')]
        for kw in keywords:
            summary_en = re.sub(f"(?i)({re.escape(kw)})", r"**\\1**", summary_en)

    summary_translated = translator.translate(summary_en, src="en", dest=selected_lang_code).text

    print("\n✨ Summary:")
    print(summary_translated)

    original_len = len(user_input.split())
    summary_len = len(summary_translated.split())
    compression = round(100 * (1 - summary_len / original_len), 1) if original_len > 0 else 0
    print(f"\n📊 Original Words: {original_len} | Summary Words: {summary_len} | Compression: {compression}%")

    translated_for_score = translator.translate(user_input, src=selected_lang_code, dest="en").text
    print("\n🧠 Readability (English Version of Input):")
    print(f"• Flesch Reading Ease: {textstat.flesch_reading_ease(translated_for_score):.2f}")
    print(f"• Flesch-Kincaid Grade: {textstat.flesch_kincaid_grade(translated_for_score):.2f}")
    print(f"• Gunning Fog Index: {textstat.gunning_fog(translated_for_score):.2f}")
else:
    print("⚠️ No input provided.")