In [None]:

!pip install -q transformers gradio evaluate sentencepiece PyPDF2


from transformers import BartTokenizer, BartForConditionalGeneration
import gradio as gr
import torch
import PyPDF2


model_name = "facebook/bart-large-cnn"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

def read_file(file_path):
    if file_path.name.endswith(".txt"):
        with open(file_path.name, "r", encoding="utf-8") as f:
            return f.read()
    elif file_path.name.endswith(".pdf"):
        try:
            with open(file_path.name, "rb") as f:
                reader = PyPDF2.PdfReader(f)
                return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
        except Exception as e:
            return f"Failed to read PDF: {e}"
    else:
        return "Unsupported file format."


def chunk_text(text, max_tokens=1000):
    words = text.split()
    chunks = []
    current_chunk = []

    for word in words:
        current_chunk.append(word)
        if len(current_chunk) >= max_tokens:
            chunks.append(" ".join(current_chunk))
            current_chunk = []
    if current_chunk:
        chunks.append(" ".join(current_chunk))
    return chunks

def summarize_text(paragraph, file_obj, summary_length):
    if file_obj is not None:
        text = read_file(file_obj)
    elif paragraph.strip():
        text = paragraph
    else:
        return "Please enter some text or upload a valid file."

    length_map = {"Short": 50, "Medium": 100, "Long": 150}
    max_len = length_map.get(summary_length, 100)

    chunks = chunk_text(text, max_tokens=1000)  # keep under 1024 tokens
    summaries = []

    try:
        for chunk in chunks:
            inputs = tokenizer([chunk], max_length=1024, return_tensors="pt", truncation=True)
            summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=max_len, early_stopping=True)
            summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
            summaries.append(summary)

        final_summary = "\n\n".join(summaries)
        return final_summary
    except Exception as e:
        return f"Error during summarization: {e}"

#  Gradio UI
interface = gr.Interface(
    fn=summarize_text,
    inputs=[
        gr.Textbox(lines=10, label="Enter Text (optional)"),
        gr.File(label="Upload .txt or .pdf file", file_types=[".txt", ".pdf"]),
        gr.Radio(["Short", "Medium", "Long"], label="Summary Length", value="Medium")
    ],
    outputs="text",
    title="Smart Text Summarizer",
    description="Paste or upload text, choose summary length, and get a concise summary."
)

#  Launch Gradio app
interface.launch(share=True, debug=False)


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/232.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25h

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c7fcf39a160768f67c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


