In [3]:
# ignore the TensorFlow binary optimization warning (AVX2 FMA)
import os; os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

import transformers

#Set to avoid warning messages.
transformers.logging.set_verbosity_error()

## 03.02. Content Creation

In [4]:
from transformers import pipeline

text_generator = pipeline("text-generation", 
                          model="gpt2")
transformers.set_seed(1)

input_text="Natural Language Processing is a \
growing domain in machine learning"

synthetic_text=text_generator(input_text,
                              num_return_sequences=3,
                              max_new_tokens=50)

for text in synthetic_text:
    print(text.get("generated_text") ,"\n-----------------")
    

Natural Language Processing is a growing domain in machine learning. At Caltech's Computer Science & Artificial Intelligence Lab, we design, benchmark, and train applications to detect, extract, and interpret binary numbers and other information by searching neural networks. Our algorithms are designed to identify natural languages by the complexity of their 
-----------------
Natural Language Processing is a growing domain in machine learning, as well as as in non-interactive programming using machine learning techniques in general.

Machine Learning In Machine Learning

In addition to machine learning, machine learning also incorporates the use of many other processes, including reinforcement learning for examples, 
-----------------
Natural Language Processing is a growing domain in machine learning, providing solutions for many human-related problems including visual memory and complex language processing. Its main challenge has been overcoming the computational complexities of dee

## 03.04. Chatbot Conversation Example

In [6]:
from transformers import  Conversation

conversational_pipeline = pipeline("conversational", 
                                   model="facebook/blenderbot_small-90M")

print(conversational_pipeline.model.config)

BlenderbotSmallConfig {
  "_name_or_path": "facebook/blenderbot_small-90M",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BlenderbotSmallForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 512,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 2048,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 8,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": true,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 2048,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 8,
  "eos_token_id": 2,
  "extra_pos_embeddings": 0,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 2,
  "gradient_checkpointing": false,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "init_std": 0.02,
  "is_encoder_decoder": true,
  "label2id

In [8]:
#Sample inputs
first_input="Do you have any hobbies?"
second_input = "I like to watch movies"
third_input = "action movies"

#Create a context
bot_conversation = Conversation(first_input)

print("\nFirst Exchange: \n--------------------")

conversational_pipeline(bot_conversation)
print(" User Input:", bot_conversation.past_user_inputs[0])
print(" Bot Output:", bot_conversation.generated_responses[0])

print("\nSecond Exchange: \n--------------------")
bot_conversation.add_user_input(second_input)
conversational_pipeline(bot_conversation)

print(" User Input:", bot_conversation.past_user_inputs[1])
print(" Bot Output:", bot_conversation.generated_responses[1])

print("\nThird Exchange: \n--------------------")
bot_conversation.add_user_input(third_input)
conversational_pipeline(bot_conversation)

print(" User Input:", bot_conversation.past_user_inputs[2])
print(" Bot Output:", bot_conversation.generated_responses[2])

print("\nAccessing All Responses: ")
print(bot_conversation)



First Exchange: 
--------------------
 User Input: Do you have any hobbies?
 Bot Output: yes, i love going to the beach. what about you? do you have any hobbies?

Second Exchange: 
--------------------
 User Input: I like to watch movies
 Bot Output: i love going to the beach. i also like to watch movies. what kind of movies do you like?

Third Exchange: 
--------------------
 User Input: action movies
 Bot Output: i love going to the beach as well. i like action movies as well, but i don't get to see them often.

Accessing All Responses: 
Conversation id: 49cd11cf-ea12-46a3-b054-4ad5317014ff
user: Do you have any hobbies?
assistant: yes, i love going to the beach. what about you? do you have any hobbies?
user: I like to watch movies
assistant: i love going to the beach. i also like to watch movies. what kind of movies do you like?
user: action movies
assistant: i love going to the beach as well. i like action movies as well, but i don't get to see them often.



## 03.06. Translating with Hugging Face

In [9]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
tokenizer = AutoTokenizer.from_pretrained("t5-base")

source_english="Acme is a technology company based in New York and Paris"

inputs_german = tokenizer(
    "translate English to German: " + source_english,
    return_tensors="pt",
)
outputs_german = model.generate(
    inputs_german["input_ids"], 
    max_length=40)

print("German Translation: ",
      tokenizer.decode(outputs_german[0], 
                       skip_special_tokens=True))

inputs_french = tokenizer(
    "translate English to French: " + source_english, 
    return_tensors="pt",
)
outputs_french = model.generate(
    inputs_french["input_ids"], 
    max_length=40)

print("French Translation: ", 
      tokenizer.decode(outputs_french[0], 
                       skip_special_tokens=True))

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

German Translation:  Acme ist ein Technologieunternehmen mit Sitz in New York und Paris.
French Translation:  Acme est une société technologique basée à New York et à Paris.
