# Text Summerization

In [1]:
# Step 1: Install required libraries 
!pip install transformers --quiet

In [2]:
pip install huggingface_hub[hf_xet] --quiet

Note: you may need to restart the kernel to use updated packages.


In [3]:
# Step 2: Import libraries
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import re
import warnings
warnings.filterwarnings("ignore")

In [4]:
# Step 3: Load summarization pipeline using pretrained BART model
def load_summarizer():
    model_name = "facebook/bart-large-cnn"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
    print(f"✅ Loaded summarization model: {model_name}")
    return summarizer

In [5]:
summarizer = load_summarizer()

# Step 4: Define text summarization function
def summarize_text(text, max_len=130, min_len=30):
    summary = summarizer(text, max_length=max_len, min_length=min_len, do_sample=False)
    return summary[0]['summary_text']

Device set to use cpu


✅ Loaded summarization model: facebook/bart-large-cnn


In [6]:
# Step 5: Define lightweight keyword extractor (no NLTK required)
def extract_keywords_basic(text, num_keywords=5):
    text = text.lower()
    words = re.findall(r'\b[a-z]{3,}\b', text)
    stopwords_set = set([
        "the", "and", "was", "for", "with", "that", "from", "after", "this", "which", "had",
        "were", "have", "been", "they", "their", "not", "are", "his", "her", "has", "but", "you"
    ])
    filtered_words = [word for word in words if word not in stopwords_set]
    freq = {}
    for word in filtered_words:
        freq[word] = freq.get(word, 0) + 1
    top_keywords = sorted(freq, key=freq.get, reverse=True)[:num_keywords]
    return top_keywords

In [7]:
# 📝 Step 6: Input your text here
input_text = """
The Apollo program was the third United States human spaceflight program carried out by NASA,
which succeeded in landing the first humans on the Moon from 1969 to 1972.
Apollo was first conceived during the Eisenhower administration and began in earnest after President John F. Kennedy's 1961 speech.
It was the third human spaceflight program after Mercury and Gemini.
"""

# 🚀 Step 7: Run summarization and keyword extraction
summary = summarize_text(input_text)
keywords = extract_keywords_basic(input_text)

Your max_length is set to 130, but your input_length is only 76. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=38)


In [8]:
# Step 8: Display results
print("\n🧾 Original Text:\n", input_text.strip())
print("\n📌 Summary:\n", summary.strip())
print("\n🔑 Top Keywords:", ", ".join(keywords))


🧾 Original Text:
 The Apollo program was the third United States human spaceflight program carried out by NASA,
which succeeded in landing the first humans on the Moon from 1969 to 1972.
Apollo was first conceived during the Eisenhower administration and began in earnest after President John F. Kennedy's 1961 speech.
It was the third human spaceflight program after Mercury and Gemini.

📌 Summary:
 The Apollo program was the third U.S. human spaceflight program carried out by NASA. It succeeded in landing the first humans on the Moon from 1969 to 1972.

🔑 Top Keywords: program, apollo, third, human, spaceflight
