In [3]:
from transformers import pipeline

# Load the pre-trained model from Hugging Face
model_name = "Advik-7/text_summarizer"
summarizer = pipeline("summarization", model=model_name)

# Example text for summarization
input_text = """
The quick brown fox jumps over the lazy dog. This is a simple sentence that demonstrates the use of all the letters in the English alphabet. It is often used in typing exercises and font displays to showcase the style and clarity of a font.
"""

# Generate the summary
summary = summarizer(input_text, max_length=50, min_length=25, do_sample=False)

# Print the generated summary
print("Original Text:", input_text)
print("\nGenerated Summary:", summary[0]['summary_text'])


config.json:   0%|          | 0.00/1.68k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.35k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.56M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


Original Text: 
The quick brown fox jumps over the lazy dog. This is a simple sentence that demonstrates the use of all the letters in the English alphabet. It is often used in typing exercises and font displays to showcase the style and clarity of a font.


Generated Summary: This is a simple sentence that demonstrates the use of all the letters in the English alphabet.
It is often used in typing exercises and font displays to showcase the style and clarity of a font.


In [4]:

# Example large input text for summarization
input_text = """
The quick brown fox jumps over the lazy dog. This is a simple sentence that demonstrates the use of all the letters in the English alphabet. It is often used in typing exercises and font displays to showcase the style and clarity of a font. 
Many other fonts are used around the world for different purposes. Some fonts are serif fonts, which have small lines or strokes regularly attached to the end of a larger stroke in a letter or symbol. Sans-serif fonts are those without these strokes, and are commonly used for online and digital content because they are considered easier to read.
There are various classifications of fonts, including script, decorative, and display fonts. Each type has its own characteristics and uses. Script fonts imitate cursive writing, while decorative fonts are designed to catch the eye, often used for advertising, branding, or artistic purposes.
The choice of font plays a significant role in how the message is conveyed, as different fonts evoke different emotions and perceptions. Some fonts are more formal and serious, while others are more playful and informal. The right font can help enhance the impact of the message, making it more readable and engaging.
"""

# Generate the summary with the desired length
summary = summarizer(input_text, 
                     max_length=60,  # Set a shorter max length for a more concise summary
                     min_length=30,  # Keep the summary at least 30 tokens long
                     do_sample=False,  # Disable sampling for deterministic output
                     truncation=True, 
                     pad_token_id=50256)  # Handle padding correctly

# Print the original text and generated summary
print("Original Text:", input_text)
print("\nGenerated Summary:", summary[0]['summary_text'])

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Original Text: 
The quick brown fox jumps over the lazy dog. This is a simple sentence that demonstrates the use of all the letters in the English alphabet. It is often used in typing exercises and font displays to showcase the style and clarity of a font. 
Many other fonts are used around the world for different purposes. Some fonts are serif fonts, which have small lines or strokes regularly attached to the end of a larger stroke in a letter or symbol. Sans-serif fonts are those without these strokes, and are commonly used for online and digital content because they are considered easier to read.
There are various classifications of fonts, including script, decorative, and display fonts. Each type has its own characteristics and uses. Script fonts imitate cursive writing, while decorative fonts are designed to catch the eye, often used for advertising, branding, or artistic purposes.
The choice of font plays a significant role in how the message is conveyed, as different fonts evoke 

In [2]:
!pip install rouge_score


Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25ldone
[?25h  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=498f8dae20af3323bc07cecb999ab294864b500dcd0ebd250016ac11f83bb03f
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [19]:
from rouge_score import rouge_scorer

# Example reference summary (This should be your "gold standard" summary for comparison)
reference_summary = """
The quick brown fox is a simple sentence used to showcase all the letters of the English alphabet. It is frequently used in typing exercises and font displays. Various font types, like serif and sans-serif, are used in different scenarios. Serif fonts have small lines attached to the end of each letter, while sans-serif fonts do not. Fonts play an important role in conveying the tone of a message.
"""

# Example large input text for summarization
input_text = """
The quick brown fox jumps over the lazy dog. This is a simple sentence that demonstrates the use of all the letters in the English alphabet. It is often used in typing exercises and font displays to showcase the style and clarity of a font. 
Many other fonts are used around the world for different purposes. Some fonts are serif fonts, which have small lines or strokes regularly attached to the end of a larger stroke in a letter or symbol. Sans-serif fonts are those without these strokes, and are commonly used for online and digital content because they are considered easier to read.
There are various classifications of fonts, including script, decorative, and display fonts. Each type has its own characteristics and uses. Script fonts imitate cursive writing, while decorative fonts are designed to catch the eye, often used for advertising, branding, or artistic purposes.
The choice of font plays a significant role in how the message is conveyed, as different fonts evoke different emotions and perceptions. Some fonts are more formal and serious, while others are more playful and informal. The right font can help enhance the impact of the message, making it more readable and engaging.
"""

# Assuming the summarizer is already defined as 'summarizer'
summaries = []
for _ in range(5):  # Generate multiple summaries
    summary = summarizer(input_text, 
                         max_length=80,  # Increase max length for more detailed summaries
                         min_length=30,  # Reduce the minimum length to avoid truncation
                         do_sample=True, 
                         temperature=1.2,  # Increase temperature to allow more creativity
                         top_k=50,  # Apply Top-k sampling to narrow down choices
                         truncation=True, 
                         pad_token_id=50256)
    summaries.append(summary[0]['summary_text'])

# Use RougeScorer to calculate ROUGE-1 score for each generated summary
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

# Store ROUGE-1 scores for each summary
rouge_scores = []
for generated_summary in summaries:
    score = scorer.score(reference_summary, generated_summary)
    rouge_scores.append(score['rouge1'])  # Collecting F-Measure (F1 score)

# Choose the summary with the highest ROUGE-1 F-Measure
final_summary = summaries[rouge_scores.index(max(rouge_scores))]

print(f"ROUGE-1 F-Measure: {max(rouge_scores)}")


ROUGE-1 F-Measure: Score(precision=0.6, recall=0.5070422535211268, fmeasure=0.549618320610687)


In [21]:
from rouge_score import rouge_scorer

# Example reference summary (This should be your "gold standard" summary for comparison)
reference_summary = """
The quick brown fox is a simple sentence used to showcase all the letters of the English alphabet. It is frequently used in typing exercises and font displays. Various font types, like serif and sans-serif, are used in different scenarios. Serif fonts have small lines attached to the end of each letter, while sans-serif fonts do not. Fonts play an important role in conveying the tone of a message.
"""

# Example large input text for summarization
input_text = """
The quick brown fox jumps over the lazy dog. This is a simple sentence that demonstrates the use of all the letters in the English alphabet. It is often used in typing exercises and font displays to showcase the style and clarity of a font. 
Many other fonts are used around the world for different purposes. Some fonts are serif fonts, which have small lines or strokes regularly attached to the end of a larger stroke in a letter or symbol. Sans-serif fonts are those without these strokes, and are commonly used for online and digital content because they are considered easier to read.
There are various classifications of fonts, including script, decorative, and display fonts. Each type has its own characteristics and uses. Script fonts imitate cursive writing, while decorative fonts are designed to catch the eye, often used for advertising, branding, or artistic purposes.
The choice of font plays a significant role in how the message is conveyed, as different fonts evoke different emotions and perceptions. Some fonts are more formal and serious, while others are more playful and informal. The right font can help enhance the impact of the message, making it more readable and engaging.
"""

# Assuming the summarizer is already defined as 'summarizer'
summaries = []
for _ in range(5):  # Generate multiple summaries
    summary = summarizer(input_text, 
                         max_length=80,  # Increase max length for more detailed summaries
                         min_length=30,  # Reduce the minimum length to avoid truncation
                         do_sample=True, 
                         temperature=1.5,  # Increase temperature to allow more creativity
                         top_k=50,  # Apply Top-k sampling to narrow down choices
                         truncation=True, 
                         pad_token_id=50256)
    summaries.append(summary[0]['summary_text'])

# Use RougeScorer to calculate ROUGE-1 score for each generated summary
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

# Store ROUGE-1 scores for each summary
rouge_scores = []
for generated_summary in summaries:
    score = scorer.score(reference_summary, generated_summary)
    rouge_scores.append(score['rouge1'])  # Collecting F-Measure (F1 score)

# Choose the summary with the highest ROUGE-1 F-Measure
final_summary = summaries[rouge_scores.index(max(rouge_scores))]

print(f"ROUGE-1 F-Measure: {max(rouge_scores)}")


ROUGE-1 F-Measure: Score(precision=0.6176470588235294, recall=0.29577464788732394, fmeasure=0.39999999999999997)
