In [None]:
import torch, platform, sys
print("Python:", sys.version.split()[0])
print("PyTorch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))


Python: 3.12.12
PyTorch: 2.8.0+cu126
CUDA available: True
GPU: Tesla T4


In [None]:
!pip -q install --upgrade transformers accelerate gradio sentencepiece huggingface_hub


In [1]:
import os, getpass
USE_HF_TOKEN = True  # set False if using a public model
if USE_HF_TOKEN:
    token = getpass.getpass("Paste your HF token (will be hidden): ")
    os.environ["HF_TOKEN"] = token
    # optional: persisting login (uncomment next 2 lines if you want)
    # from huggingface_hub import login
    # login(token=token)


Paste your HF token (will be hidden): ··········


In [2]:
MODEL_ID = "facebook/bart-large-cnn"   # change here if you want
MAX_NEW_TOKENS_DEFAULT = 128           # default summary length


In [3]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch, os

auth = os.getenv("HF_TOKEN", None)

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_auth_token=auth)
model = AutoModelForSeq2SeqLM.from_pretrained(
    MODEL_ID,
    token=auth,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto"
)

summarizer = pipeline(
    "summarization",
    model=model,
    tokenizer=tokenizer,
)




config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Device set to use cuda:0


In [4]:
from math import ceil

def chunk_text(text, max_input_tokens=1024, overlap=50):
    ids = tokenizer.encode(text, add_special_tokens=False)
    if len(ids) <= max_input_tokens:
        return [text]

    chunks = []
    start = 0
    while start < len(ids):
        end = min(start + max_input_tokens, len(ids))
        chunk_ids = ids[start:end]
        chunks.append(tokenizer.decode(chunk_ids, skip_special_tokens=True))
        if end == len(ids):
            break
        start = end - overlap  # small overlap to keep continuity
    return chunks

def summarize_long(text, max_new_tokens=128, min_new_tokens=32, temperature=0.0, do_sample=False):
    pieces = chunk_text(text, max_input_tokens=min(tokenizer.model_max_length, 1024))
    outs = []
    for p in pieces:
        out = summarizer(
            p,
            max_new_tokens=max_new_tokens,
            min_new_tokens=min_new_tokens,
            do_sample=do_sample,
            temperature=temperature if do_sample else None,
        )[0]["summary_text"]
        outs.append(out)
    # final pass to compress concatenated chunk summaries
    joined = " ".join(outs)
    final = summarizer(
        joined,
        max_new_tokens=max_new_tokens,
        min_new_tokens=min_new_tokens,
        do_sample=do_sample,
        temperature=temperature if do_sample else None,
    )[0]["summary_text"]
    return final


In [5]:
import gradio as gr
from pathlib import Path
from datetime import datetime
import uuid, json

# choose your look here:
USE_DARK_THEME = False  # set True for dark

LIGHT = gr.themes.Soft()
DARK = gr.themes.Monochrome()  # simple dark-ish theme
THEME = DARK if USE_DARK_THEME else LIGHT

DOWNLOAD_DIR = Path("downloads")
DOWNLOAD_DIR.mkdir(exist_ok=True)

def _random_name(ext: str) -> Path:
    ts = datetime.now().strftime("%Y%m%d-%H%M%S")
    rid = uuid.uuid4().hex[:8]
    return DOWNLOAD_DIR / f"summary-{ts}-{rid}.{ext}"

def run_summary(text, max_new_tokens, min_new_tokens, temperature, do_sample):
    if not text or not text.strip():
        return "", ""
    out = summarize_long(
        text.strip(),
        max_new_tokens=max_new_tokens,
        min_new_tokens=min_new_tokens,
        temperature=temperature,
        do_sample=do_sample,
    )
    return out, "Ready"

def prepare_download(summary_text, fmt):
    fmt = (fmt or "txt").lower()
    if fmt not in {"txt", "json"}:
        fmt = "txt"
    path = _random_name(fmt)
    if fmt == "txt":
        path.write_text(summary_text or "", encoding="utf-8")
    else:
        payload = {
            "model_id": MODEL_ID,
            "summary": summary_text or "",
            "exported_at": datetime.now().isoformat(timespec="seconds"),
            "id": uuid.uuid4().hex,
        }
        path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
    return str(path)

with gr.Blocks(theme=THEME, fill_height=True, title="Summarizer") as demo:
    gr.Markdown("## Text Summarizer (HF + Gradio) — export, random filename, light/dark theme")

    with gr.Row():
        inp = gr.Textbox(lines=12, label="Input text", placeholder="Paste a long article or notes…")
        out = gr.Textbox(lines=12, label="Summary")

    with gr.Row():
        max_new = gr.Slider(32, 512, value=MAX_NEW_TOKENS_DEFAULT, step=1, label="Max new tokens")
        min_new = gr.Slider(0, 256, value=32, step=1, label="Min new tokens")
        temp = gr.Slider(0.0, 1.5, value=0.0, step=0.1, label="Temperature")
        do_sample = gr.Checkbox(value=False, label="Use sampling")

    with gr.Row():
        run_btn = gr.Button("Summarize", variant="primary")
        status = gr.Label(value="Idle")

    with gr.Row():
        fmt = gr.Radio(["txt", "json"], value="txt", label="Export format")
        prep = gr.Button("Prepare Download")
        dl = gr.DownloadButton(label="Download file", value=None)

    run_btn.click(run_summary, inputs=[inp, max_new, min_new, temp, do_sample], outputs=[out, status])
    prep.click(prepare_download, inputs=[out, fmt], outputs=dl)

demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://47db332373f338e35d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [6]:
sample = """Large language models are increasingly used for summarization...
They require careful prompt design and evaluation..."""
print(summarize_long(sample, max_new_tokens=80, min_new_tokens=20))


Your max_length is set to 142, but your input_length is only 21. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
Your max_length is set to 142, but your input_length is only 20. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)


Large language models are increasingly used for summarization. They require careful prompt design and evaluation.
