Exercise1

In [2]:
# Install transformers if not already installed
!pip install -q transformers

# Suppress warnings
import warnings
warnings.filterwarnings("ignore")

# Import Hugging Face summarization pipeline
from transformers import pipeline, logging
logging.set_verbosity_error()


In [3]:
# Load BART summarization model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

In [4]:
# Original input text
text = (
    "Hey team, hope you're all doing great. Just a heads-up that we'll be having our project sync-up call on Thursday at 2 PM. "
    "We’ll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint. "
    "As always, feel free to bring up any ideas that could improve delivery or collaboration. Looking forward to a great discussion!"
)


In [5]:
# Define different parameter configurations
configs = [
    {"max_length": 30, "min_length": 10, "do_sample": False},
    {"max_length": 60, "min_length": 20, "do_sample": False},
    {"max_length": 45, "min_length": 20, "do_sample": True, "temperature": 0.7},
    {"max_length": 100, "min_length": 40, "do_sample": False},
    {"max_length": 60, "min_length": 25, "do_sample": True, "temperature": 0.7},
    {"max_length": 30, "min_length": 10, "do_sample": True, "temperature": 0.7}
]


In [6]:
# Run summarization with each config
for i, cfg in enumerate(configs, start=1):
    print(f"\n--- Summary {i} ---")
    print("Parameters:", cfg)
    summary = summarizer(text, **cfg)[0]["summary_text"]
    print("Summary:", summary)



--- Summary 1 ---
Parameters: {'max_length': 30, 'min_length': 10, 'do_sample': False}
Summary: Project sync-up call on Thursday at 2 PM. We'll review the current sprint progress, discuss any blockers, and finalize the

--- Summary 2 ---
Parameters: {'max_length': 60, 'min_length': 20, 'do_sample': False}
Summary: We'll have a project sync-up call on Thursday at 2 PM. We'll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint.

--- Summary 3 ---
Parameters: {'max_length': 45, 'min_length': 20, 'do_sample': True, 'temperature': 0.7}
Summary: Project sync-up call will take place on Thursday at 2 PM. We'll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint.

--- Summary 4 ---
Parameters: {'max_length': 100, 'min_length': 40, 'do_sample': False}
Summary: We'll have a project sync-up call on Thursday at 2 PM. We'll review the current sprint progress, discuss any blockers, and finalize the scop

Exercise2

In [7]:
# Define texts of different styles
texts = {
    "news": (
        "Scientists at MIT have developed a new battery technology that could revolutionize electric vehicles. "
        "The lithium-metal battery can store twice as much energy as current batteries and charge 50% faster. "
        "The research team, led by Dr. Sarah Johnson, spent three years developing this breakthrough. "
        "The technology could be commercially available within five years, potentially making electric cars more affordable and practical for everyday use."
    ),

    "recipe": (
        "To make chocolate chip cookies, you'll need flour, sugar, butter, eggs, vanilla, and chocolate chips. "
        "First, preheat your oven to 375°F. Mix the dry ingredients in one bowl and wet ingredients in another. "
        "Combine them slowly, then fold in chocolate chips. Drop spoonfuls of dough on a baking sheet and bake for 10-12 minutes until golden brown."
    ),

    "email": (
        "Hi everyone, I wanted to update you on our quarterly sales results. "
        "We exceeded our target by 15% this quarter, thanks to strong performance in the mobile app division. "
        "The marketing campaign we launched in July was particularly successful, generating 200 new leads. "
        "Our customer satisfaction scores also improved by 8%. Great work team, and let's keep the momentum going into Q4!"
    )
}


In [8]:
# Run summarizer on each type of text with fixed parameters
print("Summarization Results\n")

for label, txt in texts.items():
    summary = summarizer(txt, max_length=45, min_length=20, do_sample=False)[0]["summary_text"]
    print(f"--- {label.upper()} ---")
    print(summary, "\n")


Summarization Results

--- NEWS ---
Scientists at MIT have developed a new battery technology that could revolutionize electric vehicles. The lithium-metal battery can store twice as much energy as current batteries. The technology could be commercially available within five years. 

--- RECIPE ---
To make chocolate chip cookies, you'll need flour, sugar, butter, eggs, vanilla, and chocolate chips. Drop spoonfuls of dough on a baking sheet and bake for 10-12 minutes until golden 

--- EMAIL ---
We exceeded our target by 15% this quarter, thanks to strong performance in the mobile app division. The marketing campaign we launched in July was particularly successful, generating 200 new leads. Our customer satisfaction scores also improved 



Exercise3

In [9]:
from transformers import pipeline
import warnings
warnings.filterwarnings("ignore")

In [10]:
# Input text used for comparison
text = (
    "Hey team, hope you're all doing great. Just a heads-up that we'll be having our project sync-up call on Thursday at 2 PM. "
    "We’ll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint. "
    "As always, feel free to bring up any ideas that could improve delivery or collaboration. Looking forward to a great discussion!"
)

In [12]:
# Load multiple summarization models
bart_large = pipeline("summarization", model="facebook/bart-large-cnn")
t5_small = pipeline("summarization", model="t5-small")
bart_base = pipeline("summarization", model="facebook/bart-base")
distilbart = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

In [13]:
# Store models in a list of (name, pipeline)
models = [
    ("BART Large", bart_large),
    ("BART Base", bart_base),
    ("DistilBART", distilbart),
    ("T5 Small", t5_small)
]

# Run summarization with fixed parameters
for name, model in models:
    print(f"--- {name} ---")
    input_text = "summarize: " + text if "T5" in name else text
    summary = model(input_text, max_length=45, min_length=20, do_sample=False)[0]["summary_text"]
    print(summary, "\n")


--- BART Large ---
We'll have a project sync-up call on Thursday at 2 PM. We'll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint. 

--- BART Base ---
Hey team, hope you're all doing great. Just a heads-up that we'll be having our project sync-up call on Thursday at 2 PM. We’ll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint. As always, feel free to bring up any ideas that could improve delivery or collaboration. Looking forward to a great discussion! 

--- DistilBART ---
 The project sync-up call will be held on Thursday at 2 PM . We'll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint . Feel free to bring up 

--- T5 Small ---
we'll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint . 



Exercise4

In [14]:
def smart_summarize(text, chunk_size=200, overlap=50):
    # Split into sentences
    sentences = text.split('. ')
    chunks, current = [], ""

    for sent in sentences:
        # Check if adding sentence keeps chunk below size limit
        if len(current) + len(sent) < chunk_size:
            current += sent + ". "
        else:
            chunks.append(current.strip())
            # Add overlap to next chunk
            current = " ".join(current.split()[-overlap:]) + sent + ". "

    # Append final chunk
    if current:
        chunks.append(current.strip())

    # Summarize each chunk
    summaries = [
        summarizer(chunk, max_length=45, min_length=20, do_sample=False)[0]["summary_text"]
        for chunk in chunks
    ]

    return " ".join(summaries)


In [16]:
# Sample long input text for testing
long_text = (
    "Hey team, hope you're all doing great. Just a heads-up that we'll be having our project sync-up call on Thursday at 2 PM. "
    "We’ll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint. "
    "As always, feel free to bring up any ideas that could improve delivery or collaboration. "
    "This quarter, we saw significant improvements in deployment speed and testing coverage. "
    "However, we still face delays in design sign-offs and backend integrations. "
    "Please come prepared with suggestions on improving those areas. "
    "Looking forward to a productive conversation and clear action points for the next sprint. "
    "Also, don’t forget to update the Jira board before the meeting."
)

# Run smart summarizer
print("Smart Summary:\n")
print(smart_summarize(long_text))


Smart Summary:

We'll have our project sync-up call on Thursday at 2 PM. Hey team, hope you're all doing great. Just a heads-up that we'll be having our project Sync-up Call on We'll be having our project sync-up call on Thursday at 2 PM. We’ll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint. We'll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint. Feel free to bring up any ideas that could improve delivery or collaboration. This quarter, we saw significant improvements in deployment speed and testing coverage. Just a heads-up that we'll be having our project sync-up call on Thursday at 2 PM. This quarter, we saw significant improvements in deployment speed and testing coverage. However, we still face delays in design sign-offs and backend integrations. We’ll review the current sprint progress, discuss any This quarter, we saw significant improvements in deployment speed and testing 

Exercise5

In [17]:
def interactive_summarizer():
    print("Text Summarizer")
    print("Enter 'quit' to exit")

    while True:
        user_input = input("\nEnter text to summarize: ")

        if user_input.lower().strip() == 'quit':
            break

        if not user_input.strip():
            print("Empty input. Please enter some text.")
            continue

        if len(user_input.split()) < 10:
            print("Text too short to summarize effectively. Try entering more content.")
            continue

        try:
            summary = summarizer(user_input, max_length=45, min_length=20, do_sample=False)[0]["summary_text"]
            print("Summary:", summary)
        except Exception as e:
            print("Error summarizing text:", e)

    print("Thanks for using the summarizer!")


In [18]:
interactive_summarizer()

Text Summarizer
Enter 'quit' to exit

Enter text to summarize: Hey team, hope you're all doing great. Just a heads-up that we'll be having our project sync-up call on Thursday at 2 PM.
Summary: We'll have our project sync-up call on Thursday at 2 PM. Hey team, hope you're all doing great. Just a heads-up that we'll be having our project Sync-up Call on

Enter text to summarize: Looking forward to a productive conversation and clear action points for the next sprint.
Summary: "Looking forward to a productive conversation and clear action points for the next sprint. Looking forward to having a productive and clear conversation," he said.

Enter text to summarize: quit
Thanks for using the summarizer!


In [20]:
# Methods to test different decoding strategies
methods = {
    "greedy": {"do_sample": False},
    "beam":   {"do_sample": False, "num_beams": 4},
    "nucleus":{"do_sample": True,  "top_p": 0.9, "temperature": 0.8},
    "top_k":  {"do_sample": True,  "top_k": 50, "temperature": 0.7}
}

# Run summarization with each decoding method
print("Decoding Strategy Comparison:\n")
for name, params in methods.items():
    summary = summarizer(text, max_length=45, min_length=20, **params)[0]["summary_text"]
    print(f"{name.upper()} →\n{summary}\n")


Decoding Strategy Comparison:

GREEDY →
We'll have a project sync-up call on Thursday at 2 PM. We'll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint.

BEAM →
We'll have a project sync-up call on Thursday at 2 PM. We'll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint.

NUCLEUS →
Project sync-up call on Thursday at 2 PM. We’ll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint. As always, feel free to bring

TOP_K →
 project sync-up call on Thursday at 2 PM. We’ll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint. As always, feel free to bring



Exercise6


In [21]:
def evaluate_summary(original_text, summary):
    # Compression ratio (shorter is better, unless too short)
    compression_ratio = len(summary) / len(original_text)

    # Sentence counts (simple approximation using punctuation)
    original_sentences = original_text.count('.') + original_text.count('!') + original_text.count('?')
    summary_sentences = summary.count('.') + summary.count('!') + summary.count('?')

    return {
        'compression_ratio': round(compression_ratio, 3),
        'original_sentences': original_sentences,
        'summary_sentences': summary_sentences
    }


In [22]:
# Example long input (reusing from previous exercises)
original_text = (
    "Hey team, hope you're all doing great. Just a heads-up that we'll be having our project sync-up call on Thursday at 2 PM. "
    "We’ll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint. "
    "As always, feel free to bring up any ideas that could improve delivery or collaboration. "
    "This quarter, we saw significant improvements in deployment speed and testing coverage. "
    "However, we still face delays in design sign-offs and backend integrations. "
    "Please come prepared with suggestions on improving those areas. "
    "Looking forward to a productive conversation and clear action points for the next sprint. "
    "Also, don’t forget to update the Jira board before the meeting."
)

# Generate a summary (you can switch to any decoding strategy)
summary_text = summarizer(original_text, max_length=45, min_length=20, do_sample=False)[0]["summary_text"]

# Evaluate it
metrics = evaluate_summary(original_text, summary_text)
print("Evaluation Metrics:")
for k, v in metrics.items():
    print(f"{k}: {v}")


Evaluation Metrics:
compression_ratio: 0.321
original_sentences: 9
summary_sentences: 2


Multi-Task Pipeline

In [23]:
from transformers import pipeline

In [24]:
# Load pre-trained models for summarization, sentiment analysis, and translation
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
classifier = pipeline("sentiment-analysis")
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr")

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/301M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

In [25]:
def multi_task_processor(text, tasks=['summarize', 'sentiment', 'translate']):
    results = {}

    if 'summarize' in tasks:
        summary = summarizer(text, max_length=45, min_length=20, do_sample=False)[0]["summary_text"]
        results['summary'] = summary

    if 'sentiment' in tasks:
        sentiment = classifier(text)[0]  # Returns label and score
        results['sentiment'] = sentiment

    if 'translate' in tasks:
        translation = translator(text)[0]["translation_text"]
        results['translation_fr'] = translation

    return results


In [26]:
# Example text
text = (
    "Hey team, hope you're all doing great. Just a heads-up that we'll be having our project sync-up call on Thursday at 2 PM. "
    "We’ll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint. "
    "Looking forward to a great discussion!"
)

# Run all tasks
output = multi_task_processor(text, tasks=['summarize', 'sentiment', 'translate'])

# Display results
print("📌 Multi-Task Output:")
for task, result in output.items():
    print(f"\n--- {task.upper()} ---")
    print(result)


📌 Multi-Task Output:

--- SUMMARY ---
We'll be having our project sync-up call on Thursday at 2 PM. We’ll review the current sprint progress, discuss any blockers, and finalize the scope for the next sprint.

--- SENTIMENT ---
{'label': 'POSITIVE', 'score': 0.9996765851974487}

--- TRANSLATION_FR ---
Hey équipe, espérons que vous vous en sortez tous bien. Juste un coup de tête que nous aurons notre appel de synchronisation de projet jeudi à 14h00. Nous allons examiner l'état d'avancement du sprint actuel, discuter de tous les bloqueurs, et finaliser la portée pour le prochain sprint.
