In [None]:
# ___________________________ Requirements
### install libraries & load libraries
import torch

!pip install transformers
from transformers import pipeline, AutoTokenizer, AutoProcessor, SeamlessM4TModel
from IPython.display import Audio

!pip install sacrebleu
!pip install evaluate
import evaluate




In [None]:
# ___________________________
### Exemplifying MT English to French with an updated Google’s T5 (Text-To-Text Transfer Transformer). After execution, the MT output "Alimentation de précision" is correct. Google’s T5-Base is a LLM with 223 million parameters and a transformers encoder-decoder architecture.

###--- --- --- Requirements
# !pip install transformers
# from transformers import pipeline

###--- --- --- 3 lines of code
#1 instantiate pipeline
pipe = pipeline(task="translation_en_to_fr", model="google-t5/t5-base")

#2 input data
text_data = "Precision feeding"

#3 show pipeline output
print(pipe(text_data)[0]['translation_text'])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


Alimentation de précision


In [None]:
# ___________________________
### Exemplifying MT English to Spanish with an OPUS-MT model. After execution, the MT output "Alimentación de precisión" is correct. Helsinki-NLP/opus-mt-tc-big-en-es has 233 million parameters and is a base transformer model for translating from English to Spanish.

###--- --- --- Requirements
# !pip install transformers
# from transformers import pipeline

###--- --- --- 3 lines of code
#1 instantiate pipeline
pipe=pipeline(task="translation",model="Helsinki-NLP/opus-mt-tc-big-en-es")

#2 input data
text_data = "Precision feeding"

#3 show pipeline output
print(pipe(text_data)[0]['translation_text'])


config.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/466M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/337 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/804k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/824k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


Alimentación de precisión


In [None]:
# ___________________________
### Exemplifying MT English to Chinese with falcon-7b-instruct and zero-shot. After execution, the MT output is "准确喂". The falcon-7b-instruct is a decoder-only model with 7 billion parameters.

###--- --- --- Requirements
# import torch
# !pip install transformers accelerate
# from transformers import pipeline, AutoTokenizer

###--- --- --- 3 lines of code
#1 instantiate pipeline
pipe = pipeline(task="text-generation", model="tiiuae/falcon-7b-instruct", tokenizer=AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct"), torch_dtype=torch.bfloat16, device_map="auto")

#2 input data
text_prompt = "Translate the English text to Chinese. Text: Precision feeding. Translation:"

#3 show pipeline output
print(pipe(text_prompt)[0]['generated_text'])




config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/16.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Translate the English text to Chinese. Text: Precision feeding. Translation: 准确喂


In [None]:
# ___________________________
### Exemplifying zero-shot prompting for MT from English to Spanish, French, and Chinese

###--- --- --- No-Code AI (prompting)

# MT from EN to ES
text_prompt = "Translate the English text to Spanish. Text: algae. Translation:"

# MT from EN to FR
text_prompt = "Translate the English text to French. Text: algae. Translation:"

# MT from EN to CN
text_prompt = "Translate the English text to Chinese. Text: algae. Translation:"


In [None]:
# ___________________________
### Exemplifying MT English to French with Google’s Flan-T5 model and zero-shot prompting. Google’s Flan-T5-Base is an encoder-decoder model with 248 million parameters.

###--- --- --- Requirements
# !pip install transformers
# from transformers import pipeline

###--- --- --- 3 lines of code
#1 instantiate pipeline
pipe = pipeline(task="text2text-generation", model="google/flan-t5-base")

#2 input data
text_prompt = "Translate from English to French: Precision feeding"

#3 show pipeline output
print(pipe(text_prompt, max_length=20)[0]['generated_text'])


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


alimentation précision


In [None]:
# ___________________________
### Exemplifying MT English to Spanish with Facebook's SeamlessM4T. After execution, the MT output "Alimentación de precisión" is correct. Facebook's SeamlessM4T-Large has 2.3 billion parameters and the SeamlessM4T-v2 versatile architecture, including encoders and decoders, for sequential generation of text and speech.

###--- --- --- Requirements
# !pip install transformers
# from transformers import AutoProcessor, SeamlessM4TModel

#0 Preliminaries: specify model & processor
model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-large")
processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-large")

###--- --- --- 3 lines of code: text-to-text translation
#1 encode input
text_inputs = processor(text="Precision feeding", src_lang="eng", return_tensors="pt")

#2 model output
output_tokens = model.generate(**text_inputs, tgt_lang="spa", generate_speech=False)

#3 show decode output
print(processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True))


config.json:   0%|          | 0.00/2.56k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/9.44G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/3.35k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/1.78k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/19.5k [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.17M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.12k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.70k [00:00<?, ?B/s]

Alimentación de precisión


In [None]:
# ___________________________
### Exemplifying MT English to Spanish with Facebook's SeamlessM4T.

###--- --- --- Requirements
# !pip install transformers
# from transformers import AutoProcessor, SeamlessM4TModel
# from IPython.display import Audio

#0 Preliminaries: specify model & processor
model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-large")
processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-large")

###--- --- --- 3 lines of code: text-to-speech translation
#1 encode input
text_inputs = processor(text="Precision feeding", src_lang="eng", return_tensors="pt")

#2 model output
output_audio = model.generate(**text_inputs, tgt_lang="spa")[0].cpu().numpy().squeeze()

#3 load and play speech (audio created)
Audio(output_audio, rate=model.config.sampling_rate)

In [None]:
# ___________________________
### Exemplifying CHRF score with the MT output (the prediction) from Google’s Flan-T5-Base and the reference (the ground truth) from the multilingual FAO glossary created.

###--- --- --- Requirements
# !pip install sacrebleu
# !pip install evaluate
# import evaluate

###--- --- --- 3 lines of code
#1 load metric
chrf = evaluate.load("chrf")

#2 input data
predictionsMT =["alimentation précision"]
referencesFAO =["alimentation de précision"]

#3 show metric score
print(chrf.compute(predictions=predictionsMT, references=referencesFAO, word_order=0))


Downloading builder script:   0%|          | 0.00/9.01k [00:00<?, ?B/s]

{'score': 78.72272784814895, 'char_order': 6, 'word_order': 0, 'beta': 2}
