In [None]:
from transformers import MarianMTModel, MarianTokenizer, M2M100ForConditionalGeneration

model_name = "Helsinki-NLP/opus-mt-en-fr"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

def translate(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)

    # Perform translation
    translated = model.generate(**inputs)

    # Decode the translated tokens to text
    translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
    return translated_text

def translate_and_merge(texts: list) -> str:
    """
    Translate a list of texts and merge them into a single string.
    
    Args:
        texts (list): List of strings to translate.
        
    Returns:
        str: Merged translated text.
    """
    # Translate each text in the list
    translated_texts = [translate(text)[0] for text in texts]
    
    # Merge the translated texts into a single string
    merged_text = " ".join(translated_texts)
    
    return merged_text



KeyboardInterrupt: 

In [None]:
import nltk.data

tokenizer_nltk = nltk.data.load('tokenizers/punkt/english.pickle')
text = "Would you like to go out for dinner? I would love to go out for dinner!"
text_split = tokenizer_nltk.tokenize(text)
text_split

['Would you like to go out for dinner?', 'I would love to go out for dinner!']

In [None]:
list(map(lambda x: translate(x), text_split))

[['Voulez-vous sortir dîner ?'], ["J'adorerais sortir dîner !"]]

In [None]:
import pandas as pd

df = pd.read_csv("./Datasets/email_text.csv")
df["French Text"] = df["text"].apply(lambda x: translate_and_merge(x))

KeyboardInterrupt: 