In [1]:
# Exercise 1: Translate Multiple Sentences
# Install transformers if not already installed
!pip install transformers --quiet

# Import required libraries
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

from transformers import pipeline, logging

# Suppress warnings from transformers
logging.set_verbosity_error()


In [2]:
# Load the English to French translation pipeline
translator = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr")

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/301M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

In [3]:
# Define a list of English sentences
sentences = [
    "I wake up at 7 a.m. every day.",
    "She enjoys playing the guitar in the evening.",
    "They are working on a new software project.",
    "We often go hiking on weekends.",
    "He watches technology reviews on YouTube."
]

In [4]:
# Translate each sentence and print original + translation
for i, sentence in enumerate(sentences, 1):
    translated = translator(sentence, max_length=50)[0]['translation_text']
    print(f"{i}. Original (EN): {sentence}")
    print(f"   Translated (FR): {translated}\n")

1. Original (EN): I wake up at 7 a.m. every day.
   Translated (FR): Je me réveille à 7 heures du matin tous les jours.

2. Original (EN): She enjoys playing the guitar in the evening.
   Translated (FR): Elle aime jouer de la guitare le soir.

3. Original (EN): They are working on a new software project.
   Translated (FR): Ils travaillent sur un nouveau projet de logiciel.

4. Original (EN): We often go hiking on weekends.
   Translated (FR): Nous allons souvent à la randonnée le week-end.

5. Original (EN): He watches technology reviews on YouTube.
   Translated (FR): Il regarde des revues technologiques sur YouTube.



Exercise2

In [5]:
# Experiment with Different Translation Models
sample_text = "He works at Apple and eats an apple every day."

In [6]:
# Load translation pipelines for English to German and Spanish
translator_de = pipeline("translation_en_to_de", model="Helsinki-NLP/opus-mt-en-de")
translator_es = pipeline("translation_en_to_es", model="Helsinki-NLP/opus-mt-en-es")

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/298M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/298M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/768k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/797k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/312M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/312M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/826k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

In [7]:
# Translate to German
translated_de = translator_de(sample_text, max_length=50)[0]['translation_text']

# Translate to Spanish
translated_es = translator_es(sample_text, max_length=50)[0]['translation_text']

# Print results
print("Original (English):", sample_text)
print("\nTranslated (German):", translated_de)
print("\nTranslated (Spanish):", translated_es)


Original (English): He works at Apple and eats an apple every day.

Translated (German): Er arbeitet bei Apple und isst jeden Tag einen Apfel.

Translated (Spanish): Trabaja en Apple y se come una manzana todos los días.


In [8]:
# for reusability - wrapping into a function
def translate_text(text, target_lang="de"):
    model_map = {
        "de": "Helsinki-NLP/opus-mt-en-de",
        "es": "Helsinki-NLP/opus-mt-en-es",
        "fr": "Helsinki-NLP/opus-mt-en-fr"
    }
    model_name = model_map.get(target_lang)
    if not model_name:
        raise ValueError("Unsupported language code. Use 'de', 'es', or 'fr'.")

    translator = pipeline(f"translation_en_to_{target_lang}", model=model_name)
    return translator(text, max_length=50)[0]['translation_text']

# Example usage:
print("German:", translate_text(sample_text, "de"))
print("Spanish:", translate_text(sample_text, "es"))


German: Er arbeitet bei Apple und isst jeden Tag einen Apfel.
Spanish: Trabaja en Apple y se come una manzana todos los días.


Exercise3

In [9]:
# Reverse Translation (French to English)
# Load the French to English translation pipeline
translator_fr_en = pipeline("translation_fr_to_en", model="Helsinki-NLP/opus-mt-fr-en")


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/301M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

In [10]:
french_text = "Je me réveille à 7 heures du matin tous les jours."

In [13]:
# Translate from French to English
retranslated_en = translator_fr_en(french_text, max_length=50)[0]['translation_text']

# Original English sentence
original_en = "I wake up at 7 a.m. every day."

# Print comparison
print("Original English Sentence:")
print(original_en)

print("\nFrench Translation:")
print(french_text)

print("\nRe-translated English Sentence:")
print(retranslated_en)


Original English Sentence:
I wake up at 7 a.m. every day.

French Translation:
Je me réveille à 7 heures du matin tous les jours.

Re-translated English Sentence:
I wake up at 7:00 a.m. every day.


In [14]:
def compare_translation_round_trip(original_en, fr_text):
    translator_rev = pipeline("translation_fr_to_en", model="Helsinki-NLP/opus-mt-fr-en")
    back_translated = translator_rev(fr_text, max_length=50)[0]['translation_text']

    print("Original English:\n", original_en)
    print("\nFrench Translation:\n", fr_text)
    print("\nBack to English:\n", back_translated)

# Example use:
compare_translation_round_trip(original_en, french_text)


Original English:
 I wake up at 7 a.m. every day.

French Translation:
 Je me réveille à 7 heures du matin tous les jours.

Back to English:
 I wake up at 7:00 a.m. every day.


Exercise4

In [15]:
# Handle Long Text Translation
# Function to translate long English text by chunking
def translate_long_text(text, max_chunk_length=100):
    # Split text into chunks of approximately `max_chunk_length` characters
    chunks = []
    start = 0
    while start < len(text):
        end = start + max_chunk_length
        if end < len(text):
            # Try not to cut mid-sentence
            while end < len(text) and text[end] not in ['.', '?', '!']:
                end += 1
            end += 1  # Include the punctuation
        chunk = text[start:end].strip()
        chunks.append(chunk)
        start = end

    # Translate each chunk
    translated_chunks = []
    for i, chunk in enumerate(chunks):
        translated = translator(chunk, max_length=200)[0]['translation_text']
        translated_chunks.append(translated)

    # Join translated chunks
    return " ".join(translated_chunks)


In [16]:
# Sample long English paragraph (around 170 words)
long_paragraph = """
Artificial intelligence is rapidly transforming the way we live and work. From self-driving cars to advanced medical diagnostics, AI systems are being integrated into many areas of our lives. In education, AI can personalize learning by adapting content to the needs of individual students. In business, it can automate repetitive tasks and provide insights from large datasets. However, as AI becomes more powerful, it also raises important ethical and social questions. Who is responsible when an AI system makes a mistake? How do we ensure that AI is fair, transparent, and unbiased? These are challenges that researchers, policymakers, and industry leaders must address. Furthermore, the impact of AI on jobs is a growing concern. While some roles may be replaced by automation, new jobs will also emerge, requiring new skills and training. It is crucial that societies invest in education and re-skilling programs to prepare for this transition. AI has the potential to greatly benefit humanity, but it must be developed and used responsibly.
"""


In [18]:
# Translate the long paragraph using the chunking function
translated_long_text = translate_long_text(long_paragraph)

# Print the translated text
print("Translated (French):\n")
print(translated_long_text)


Translated (French):

L'intelligence artificielle transforme rapidement notre façon de vivre et de travailler. Des voitures autoconduites aux diagnostics médicaux avancés, les systèmes d'IA sont intégrés dans de nombreux domaines de notre vie. Dans le domaine de l'éducation, l'IA peut personnaliser l'apprentissage en adaptant le contenu aux besoins des étudiants. En entreprise, elle peut automatiser les tâches répétitives et fournir des informations à partir de grands ensembles de données. Cependant, à mesure que l'IA devient plus puissante, elle soulève également d'importantes questions éthiques et sociales.Qui est responsable lorsqu'un système d'IA fait une erreur? Comment nous assurer que l'IA est équitable, transparente et impartiale? Ce sont des défis que les chercheurs, les décideurs et les dirigeants de l'industrie doivent relever. De plus, l'impact de l'IA sur l'emploi est de plus en plus préoccupant. Bien que certains rôles puissent être remplacés par l'automatisation, de nouv

In [19]:
# Check chunk boundaries for observation
print("Original Chunks (for observation):\n")
for i, chunk in enumerate(long_paragraph.split(".")):
    print(f"Chunk {i+1}: {chunk.strip()}")


Original Chunks (for observation):

Chunk 1: Artificial intelligence is rapidly transforming the way we live and work
Chunk 2: From self-driving cars to advanced medical diagnostics, AI systems are being integrated into many areas of our lives
Chunk 3: In education, AI can personalize learning by adapting content to the needs of individual students
Chunk 4: In business, it can automate repetitive tasks and provide insights from large datasets
Chunk 5: However, as AI becomes more powerful, it also raises important ethical and social questions
Chunk 6: Who is responsible when an AI system makes a mistake? How do we ensure that AI is fair, transparent, and unbiased? These are challenges that researchers, policymakers, and industry leaders must address
Chunk 7: Furthermore, the impact of AI on jobs is a growing concern
Chunk 8: While some roles may be replaced by automation, new jobs will also emerge, requiring new skills and training
Chunk 9: It is crucial that societies invest in educati

Exercise5

In [20]:
# Translation with Controlled Output Length
# Sentence to translate
test_sentence = "The quick brown fox jumps over the lazy dog."

# Function to translate with varying max_length
def translate_with_max_length(text, max_lengths, translator):
    results = []
    for length in max_lengths:
        translation = translator(text, max_length=length)[0]['translation_text']
        results.append((length, translation))
    return results


In [22]:
# Define max_length values to test
lengths = [10, 20, 40]

# Get translations
results = translate_with_max_length(test_sentence, lengths, translator)

# Print results
print("Translations with varying max_length:\n")
for max_len, output in results:
    print(f"Max Length = {max_len}:\n{output}\n")


Translations with varying max_length:

Max Length = 10:
Le renard brun rapide saute sur le

Max Length = 20:
Le renard brun rapide saute sur le chien paresseux.

Max Length = 40:
Le renard brun rapide saute sur le chien paresseux.



###  Observations:
- **max_length=10**: May truncate the sentence or stop mid-phrase.
- **max_length=20**: Often enough for short sentences, but might still cut off endings or reduce fluency.
- **max_length=40**: Safely captures the full meaning and grammar of the sentence.
- Larger max_length allows the model more freedom to generate complete, nuanced translations.
