In [1]:
!pip install summa

from summa import summarizer

text = """
The TextRank algorithm is an unsupervised, graph-based ranking model for text processing.
It works by building a graph where sentences are nodes (vertices) and the edges between
sentences are weighted by their semantic similarity. This similarity is often calculated
using techniques like cosine similarity on word vectors. The process then applies an
algorithm similar to PageRank to rank the importance of each sentence. The sentences
with the highest scores are extracted to form the final summary. This approach is highly
efficient as it avoids the need for massive labeled training data and complex deep learning
architectures, making it a reliable choice for legal or technical documents where factual
accuracy is paramount.
"""
textrank_summary = summarizer.summarize(text, ratio=0.2)

print("### TextRank (Extractive) Summary ###")
print(textrank_summary)
# Output will be sentences lifted directly from the text.

Collecting summa
  Downloading summa-1.2.0.tar.gz (54 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/54.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.9/54.9 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: summa
  Building wheel for summa (setup.py) ... [?25l[?25hdone
  Created wheel for summa: filename=summa-1.2.0-py3-none-any.whl size=54387 sha256=91416e03efc7381a7d593d9acd11cb86f68b4df439033572dcf8fe31c2ef2547
  Stored in directory: /root/.cache/pip/wheels/70/26/84/58df5a55ebde6fd802666b6ac0b86909ecd018a2702b89d13c
Successfully built summa
Installing collected packages: summa
Successfully installed summa-1.2.0
### TextRank (Extractive) Summary ###
sentences are weighted by their semantic similarity.
algorithm similar to PageRank to rank the importance of each sentence.


In [2]:
!pip install transformers sentencepiece torch

from transformers import pipeline

# Load a pre-trained Abstractive Model (e.g., BART fine-tuned on CNN/DailyMail)
# 'summarization' task pipeline automatically loads a default model if one isn't specified.
# A common model is 'facebook/bart-large-cnn' or 'google/pegasus-xsum'
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

text = """
The TextRank algorithm is an unsupervised, graph-based ranking model for text processing.
It works by building a graph where sentences are nodes (vertices) and the edges between
sentences are weighted by their semantic similarity. This similarity is often calculated
using techniques like cosine similarity on word vectors. The process then applies an
algorithm similar to PageRank to rank the importance of each sentence. The sentences
with the highest scores are extracted to form the final summary. This approach is highly
efficient as it avoids the need for massive labeled training data and complex deep learning
architectures, making it a reliable choice for legal or technical documents where factual
accuracy is paramount.
"""

# Generate the summary
abstractive_summary = summarizer(
    text,
    max_length=50,
    min_length=10,
    do_sample=False
)[0]['summary_text']

print("\n### Hugging Face (Abstractive) Summary ###")
print(abstractive_summary)
# Output will be newly generated, paraphrased sentences.



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu



### Hugging Face (Abstractive) Summary ###
The TextRank algorithm is an unsupervised, graph-based ranking model for text processing. It works by building a graph where sentences are nodes (vertices) and the edges between them are weighted by semantic similarity.
