##### Translation using Helsinki NLP OPUS MT model 

Using pipeline as a high-level helper.

Using as texts to translate the review titles from preprocessed dataset.

In [None]:
# imports
import pandas as pd
from transformers import pipeline
import time

In [2]:
# Load dataset and convert to dataframe
filename = './data/preprocessed/dataset_shape(108,5).csv'
df = pd.read_csv(filename, delimiter=',', index_col=None, header=0)

# Separe english and non-english texts
df_non_en = df[df['language'] != 'en']

In [3]:
# Boolean function definition to select romance languages
def languages_contain(language):
    languages = ['it', 'ca', 'rm', 'es', 'ro', 'gl', 'co', 'wa', 'pt', 'oc', 'an', 'id', 'fr', 'ht', 'roa', 'en']
    return language in languages

In [None]:
# Using pipeline

# Start calculating running time
start_time = time.time()

# Initialize lists to store translated texts
translated_texts = []

# Loop through all texts to be translated
for i in range(len(df_non_en)):
    text = df_non_en['review_title'].iloc[i]
    language = df_non_en['language'].iloc[i]
    if languages_contain(language):
        # Select appropriate model depending on source language
        model_checkpoint = 'Helsinki-NLP/opus-mt-roa-en'
    else:
        src = language
        model_checkpoint = f'Helsinki-NLP/opus-mt-{src}-en'
    
    translator = pipeline("translation", model=model_checkpoint)
    translation = translator(text)
    translated_texts.append(translation[0]['translation_text'])

# End timing
end_time = time.time()

# Calculate elapsed time
elapsed_time = end_time - start_time
print("Total time taken:", elapsed_time, "seconds")


In [5]:
(translated_texts[:len(translated_texts)])

['PERFECT!',
 'disappointment',
 'Very nice',
 'Very nice and comfortable',
 'Beautiful....sin for the number',
 'Well made shoes, excellent seller',
 'Very comfortable shoe',
 'Perfect.',
 'Large',
 'Stupendous',
 "It's cute, but it's a shame.",
 "It's light and very comfortable!",
 'Pain in the child...',
 'From the display it is not clear which size it is',
 'Very sensitive',
 'Be careful until you get used to it.',
 'The toes are fine.',
 'Great shoe, bad quality!!!',
 'Perfect smell',
 'Falling down narrower',
 'Favourite shoe!',
 'Top',
 "Great children's shoe",
 'Good design',
 "Great, I'm hitting it.",
 'Pressed from the top',
 'Original product',
 'Very good winter boots!',
 "Perfect even for slightly wider children's feet",
 'Super rubber boots',
 'Robust, warm winter rubber boots',
 'Beautiful Boy Boots',
 'Good winter rubber boots',
 'Great for Dinofans',
 'Resistant',
 'Beautiful boots but quite heavy',
 'Very satisfied',
 'Very comfortable',
 'Damaged',
 'Well-made sandal

#### Similar procedure but with T5 small model

In [8]:
# Using pipeline for T5 small model

# Start calculating running time
start_time = time.time()

# Initialize lists to store translated texts
translated_texts = []
model_checkpoint = 't5-small'
# Loop through all texts to be translated
for i in range(len(df_non_en)):
    text = df_non_en['review_title'].iloc[i]
    src_lang = df_non_en['language'].iloc[i]
    
    translator = pipeline(f"translation_{src_lang}_to_en", model=model_checkpoint)
    translation = translator(text, max_length=400)
    translated_texts.append(translation[0]['translation_text'])

# End timing
end_time = time.time()

# Calculate elapsed time
elapsed_time = end_time - start_time
print("Total time taken:", elapsed_time, "seconds")


Total time taken: 56.4572229385376 seconds


In [9]:
(translated_texts[:len(translated_texts)])

['PERFETTE!!',
 'a',
 'Molto belle',
 'Molto carine e comode',
 'Bellissime....peccato per il numero',
 'Scarpe ben realizzate, venditore ottimo',
 'Schuh, Sehr bequemer Schuh',
 'Perfetto',
 'Grandi',
 'Stupende',
 '',
 '!',
 'Schmerzen am Kind...',
 'Aus der Anzeige es wird nicht klar welche Größe es ist, wie groß es ist.',
 'Sehr empfindlich',
 '',
 '',
 'Toller Schuh, schlechte Qualität!!!',
 'Perfekt schuche',
 'Fällt schmäler aus',
 '!',
 'Top',
 'Toller Kinderschuh',
 'Buen diseo',
 'Sehr groß , mein Sohn liebt es!',
 'Apretado de la parte de arriba',
 'Producto Original',
 '!',
 'Perfekt auch für etwas breitere Kinderfüße Perfekt auch für etwas breitere Kinderfüße.',
 'Super Gummistiefel',
 'Robuste, warme Winter-Gummistiefel',
 'Schöner Jungen Stiefel',
 'Gut Gute Wintergummistiefel',
 'Toll für Dinofans',
 '',
 'Schön Schöne Stiefel aber ziemlich schwer',
 'sehr zufrieden sehr zufrieden sehr zufrieden sehr zufrieden sehr zufrieden! sehr zufrieden sehr zufrieden! sehr zufriede