In [1]:
import tensorflow as tf
from transformers import TFAutoModelForSeq2SeqLM, AutoTokenizer
import pandas as pd
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load dataset
data = pd.read_csv('dataset.csv')
english_texts = data['English words/sentences'].tolist()
french_texts = data['French words/sentences'].tolist()

# Use only the first 5000 samples for training
english_texts = english_texts[:5000]
french_texts = french_texts[:5000]

# Load the pretrained model and tokenizer
model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)

# Prepare inputs for training
def preprocess_sentences(sentences, prefix="translate English to French: "):
    return [prefix + sentence for sentence in sentences]

english_texts_with_prefix = preprocess_sentences(english_texts)

# Tokenize input and output sequences
inputs = tokenizer(english_texts_with_prefix, return_tensors="tf", padding=True, truncation=True, max_length=128)
targets = tokenizer(french_texts, return_tensors="tf", padding=True, truncation=True, max_length=128)

# Create a simple Dataset
train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"]),
    targets["input_ids"]
))

# Define the optimizer and loss function
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# Training function
def train_step(input_batch, target_batch):
    with tf.GradientTape() as tape:
        outputs = model(input_ids=input_batch["input_ids"], attention_mask=input_batch["attention_mask"], labels=target_batch)
        loss = outputs.loss
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.

All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


In [3]:
# Train for a few steps
epochs = 3
for epoch in range(epochs):
    for input_batch, target_batch in train_dataset.batch(8):
        loss = train_step(input_batch, target_batch)
        print(f"Epoch {epoch + 1}, Loss: {loss.numpy()}")

# Translation (Inference)
def translate(sentence):
    input_text = "translate English to French: " + sentence
    inputs = tokenizer(input_text, return_tensors="tf", padding=True, truncation=True, max_length=128)
    outputs = model.generate(inputs["input_ids"], max_length=128)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example translation
print(translate("Hello, how are you?"))  # Replace with any sentence you want to test

Epoch 1, Loss: [12.417087]
Epoch 1, Loss: [11.747015]
Epoch 1, Loss: [11.7634115]
Epoch 1, Loss: [10.180937]
Epoch 1, Loss: [10.673882]
Epoch 1, Loss: [11.398996]
Epoch 1, Loss: [10.447904]
Epoch 1, Loss: [10.443795]
Epoch 1, Loss: [10.410525]
Epoch 1, Loss: [8.4669075]
Epoch 1, Loss: [7.9945793]
Epoch 1, Loss: [8.648028]
Epoch 1, Loss: [8.665652]
Epoch 1, Loss: [8.292768]
Epoch 1, Loss: [7.61665]
Epoch 1, Loss: [6.8143477]
Epoch 1, Loss: [7.6719728]
Epoch 1, Loss: [8.630557]
Epoch 1, Loss: [7.6223693]
Epoch 1, Loss: [8.042955]
Epoch 1, Loss: [8.276385]
Epoch 1, Loss: [7.7639747]
Epoch 1, Loss: [7.361193]
Epoch 1, Loss: [7.830888]
Epoch 1, Loss: [6.3838544]
Epoch 1, Loss: [5.480428]
Epoch 1, Loss: [4.495448]
Epoch 1, Loss: [4.6235747]
Epoch 1, Loss: [5.557089]
Epoch 1, Loss: [4.398739]
Epoch 1, Loss: [4.105227]
Epoch 1, Loss: [4.7733297]
Epoch 1, Loss: [4.156606]
Epoch 1, Loss: [4.1312547]
Epoch 1, Loss: [4.0731225]
Epoch 1, Loss: [4.088505]
Epoch 1, Loss: [3.3008726]
Epoch 1, Loss: [2

2024-09-06 19:05:39.032202: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 2, Loss: [0.4109174]
Epoch 2, Loss: [0.3713257]
Epoch 2, Loss: [0.22679846]
Epoch 2, Loss: [0.677808]
Epoch 2, Loss: [0.8618722]
Epoch 2, Loss: [0.65476173]
Epoch 2, Loss: [0.31064817]
Epoch 2, Loss: [0.6251273]
Epoch 2, Loss: [0.43638763]
Epoch 2, Loss: [0.45496047]
Epoch 2, Loss: [0.34290132]
Epoch 2, Loss: [0.40094152]
Epoch 2, Loss: [0.38310272]
Epoch 2, Loss: [0.5728142]
Epoch 2, Loss: [0.75683415]
Epoch 2, Loss: [0.90504557]
Epoch 2, Loss: [0.8644509]
Epoch 2, Loss: [0.3909976]
Epoch 2, Loss: [0.86229765]
Epoch 2, Loss: [0.57258976]
Epoch 2, Loss: [0.28269556]
Epoch 2, Loss: [0.13240977]
Epoch 2, Loss: [0.6030679]
Epoch 2, Loss: [0.71575373]
Epoch 2, Loss: [0.82261235]
Epoch 2, Loss: [0.26545143]
Epoch 2, Loss: [0.52370334]
Epoch 2, Loss: [1.1200804]
Epoch 2, Loss: [0.46305758]
Epoch 2, Loss: [0.56222636]
Epoch 2, Loss: [0.6369112]
Epoch 2, Loss: [0.6701623]
Epoch 2, Loss: [0.6226634]
Epoch 2, Loss: [1.0295986]
Epoch 2, Loss: [0.6721012]
Epoch 2, Loss: [0.48525122]
Epoch 2,

2024-09-06 19:22:56.347642: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 3, Loss: [0.38188592]
Epoch 3, Loss: [0.31968248]
Epoch 3, Loss: [0.20415926]
Epoch 3, Loss: [0.6210553]
Epoch 3, Loss: [0.815441]
Epoch 3, Loss: [0.62599313]
Epoch 3, Loss: [0.2741727]
Epoch 3, Loss: [0.59316653]
Epoch 3, Loss: [0.3871425]
Epoch 3, Loss: [0.3635746]
Epoch 3, Loss: [0.25058842]
Epoch 3, Loss: [0.29040086]
Epoch 3, Loss: [0.31169567]
Epoch 3, Loss: [0.4453898]
Epoch 3, Loss: [0.6589487]
Epoch 3, Loss: [0.8569605]
Epoch 3, Loss: [0.8081459]
Epoch 3, Loss: [0.32817987]
Epoch 3, Loss: [0.7893659]
Epoch 3, Loss: [0.5091187]
Epoch 3, Loss: [0.26169112]
Epoch 3, Loss: [0.10798517]
Epoch 3, Loss: [0.5629048]
Epoch 3, Loss: [0.64690953]
Epoch 3, Loss: [0.76076293]
Epoch 3, Loss: [0.16705465]
Epoch 3, Loss: [0.40091443]
Epoch 3, Loss: [0.9412113]
Epoch 3, Loss: [0.3902812]
Epoch 3, Loss: [0.4640524]
Epoch 3, Loss: [0.58260053]
Epoch 3, Loss: [0.5963268]
Epoch 3, Loss: [0.52573866]
Epoch 3, Loss: [0.9371168]
Epoch 3, Loss: [0.58955824]
Epoch 3, Loss: [0.40836585]
Epoch 3, L

I0000 00:00:1725640896.149235  790201 service.cc:146] XLA service 0x33bb57850 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1725640896.150115  790201 service.cc:154]   StreamExecutor device (0): Host, Default Version
2024-09-06 19:41:36.203919: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1725640896.379849  790201 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Hallo, comment êtes-vous?


In [4]:
# Save the model
save_directory = "./saved_model"

# Save the model
model.save_pretrained(save_directory)

# Save the tokenizer
tokenizer.save_pretrained(save_directory)

('./saved_model/tokenizer_config.json',
 './saved_model/special_tokens_map.json',
 './saved_model/tokenizer.json')

In [5]:
from transformers import TFAutoModelForSeq2SeqLM, AutoTokenizer

# Load the saved model
model = TFAutoModelForSeq2SeqLM.from_pretrained(save_directory)

# Load the saved tokenizer
tokenizer = AutoTokenizer.from_pretrained(save_directory)

All model checkpoint layers were used when initializing TFT5ForConditionalGeneration.

All the layers of TFT5ForConditionalGeneration were initialized from the model checkpoint at ./saved_model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


In [6]:
texts = [
    "Hello, how are you?",
    "I am doing great, thank you!",
    "What are you working on?",
    "I am working on a project that involves machine translation.",
    "That's interesting. What languages are you working on?",
    "I am working on translating English to French.",
    "How is the project going?",
    "The project is going well. I have trained the model and it is working fine.",
    "That's great to hear. Keep up the good work!",
    "Thank you! I will keep you updated on the progress."
]

for text in texts:
    print(f"Input: {text}")
    print(f"Translation: {translate(text)}")
    print()

Input: Hello, how are you?
Translation: Hallo, comment êtes-vous?

Input: I am doing great, thank you!
Translation: Je fais une bonne chose, je vous remercie!

Input: What are you working on?
Translation:  quoi uvres-tu?

Input: I am working on a project that involves machine translation.
Translation: Je travaille à un projet qui implique la traduction de machines.

Input: That's interesting. What languages are you working on?
Translation: C'est intéressant, à quelles langues travaillez-vous?

Input: I am working on translating English to French.
Translation: Je travaille à traduire l'anglais en français.

Input: How is the project going?
Translation: Comment le projet va-t-il?

Input: The project is going well. I have trained the model and it is working fine.
Translation: Le projet va bien, j'ai formé le modèle et il fonctionne bien.

Input: That's great to hear. Keep up the good work!
Translation: C'est digne d'entendre, et c'est bien à entendre!

Input: Thank you! I will keep you up