# Setup

This project requires Python 3.7 or above:

In [None]:
import sys
assert sys.version_info >= (3, 7)

And TensorFlow ≥ 2.8:

In [None]:
from packaging import version
import tensorflow as tf
assert version.parse(tf.__version__) >= version.parse("2.8.0")

As we did in earlier chapters, let's define the default font sizes to make the figures prettier:

In [None]:
import matplotlib.pyplot as plt

plt.rc('font', size=14)
plt.rc('axes', labelsize=14, titlesize=14)
plt.rc('legend', fontsize=14)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)

In [None]:
import tensorflow_datasets as tfds
import numpy as np

And let's create the `images/nlp` folder (if it doesn't already exist), and define the `save_fig()` function which is used through this notebook to save the figures in high-res for the book:

In [None]:
from pathlib import Path

IMAGES_PATH = Path() / "images" / "nlp"
IMAGES_PATH.mkdir(parents=True, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = IMAGES_PATH / f"{fig_id}.{fig_extension}"
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

This chapter can be very slow without a GPU, so let's make sure there's one, or else issue a warning:

In [None]:
if not tf.config.list_physical_devices('GPU'):
    print("No GPU was detected. Neural nets can be very slow without a GPU.")
    if "google.colab" in sys.modules:
        print("Go to Runtime > Change runtime and select a GPU hardware "
              "accelerator.")
    if "kaggle_secrets" in sys.modules:
        print("Go to Settings > Accelerator and select GPU.")

# Generating Shakespearean Text Using a Character RNN

## Creating the Training Dataset

Let's download the Shakespeare data from Andrej Karpathy's [char-rnn project](https://github.com/karpathy/char-rnn/)

In [None]:
import tensorflow as tf

shakespeare_url = "https://homl.info/shakespeare"  # shortcut URL
filepath = tf.keras.utils.get_file("shakespeare.txt", shakespeare_url)
with open(filepath) as f:
    shakespeare_text = f.read()

Downloading data from https://homl.info/shakespeare


In [None]:
# extra code – shows a short text sample
print(shakespeare_text[78000:78850])

 house;
Leave us to cure this cause.

MENENIUS:
For 'tis a sore upon us,
You cannot tent yourself: be gone, beseech you.

COMINIUS:
Come, sir, along with us.

CORIOLANUS:
I would they were barbarians--as they are,
Though in Rome litter'd--not Romans--as they are not,
Though calved i' the porch o' the Capitol--

MENENIUS:
Be gone;
Put not your worthy rage into your tongue;
One time will owe another.

CORIOLANUS:
On fair ground
I could beat forty of them.

COMINIUS:
I could myself
Take up a brace o' the best of them; yea, the
two tribunes:
But now 'tis odds beyond arithmetic;
And manhood is call'd foolery, when it stands
Against a falling fabric. Will you hence,
Before the tag return? whose rage doth rend
Like interrupted waters and o'erbear
What they are used to bear.

MENENIUS:
Pray you, be gone:
I'll try whether my old wit be in request



In [None]:
# extra code – shows all 39 distinct characters (after converting to lower case)
"".join(sorted(set(shakespeare_text.lower())))

"\n !$&',-.3:;?abcdefghijklmnopqrstuvwxyz"

In [None]:
# TextVectorization is a preprocessing layer which maps text features to integer sequences.
# trying to predict next character
text_vec_layer = tf.keras.layers.TextVectorization(split="character",
                                                   standardize="lower")
text_vec_layer.adapt([shakespeare_text])
encoded = text_vec_layer([shakespeare_text])[0]
encoded

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([21,  7, 10, ..., 22, 28, 12])>

In [None]:
encoded -= 2  # drop tokens 0 (pad) and 1 (unknown), which we will not use
n_tokens = text_vec_layer.vocabulary_size() - 2  # number of distinct chars = 39
dataset_size = len(encoded)  # total number of chars = 1,115,394

In [None]:
# number of distinct characters
n_tokens

39

In [None]:
# dataset size
dataset_size

1115394

In [None]:
# define a function to:
# take a sequence of text and convert to data from our mapping
def to_dataset(sequence, length, shuffle=False, seed=None, batch_size=32):
    ds = tf.data.Dataset.from_tensor_slices(sequence)
    ds = ds.window(length + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda window_ds: window_ds.batch(length + 1))
    if shuffle:
        ds = ds.shuffle(100_000, seed=seed)
    ds = ds.batch(batch_size)
    return ds.map(lambda window: (window[:, :-1], window[:, 1:])).prefetch(1)

In [None]:
# extra code – a simple example using to_dataset()
# There's just one sample in this dataset: the input represents "to b" and the
# output represents "o be"
list(to_dataset(text_vec_layer(["To be"])[0], length=4))

[(<tf.Tensor: shape=(1, 4), dtype=int64, numpy=array([[ 4,  5,  2, 23]])>,
  <tf.Tensor: shape=(1, 4), dtype=int64, numpy=array([[ 5,  2, 23,  3]])>)]

In [None]:
# taking a building train, valid, test from our encoded
# Shakespeare text
length = 100
tf.random.set_seed(42)
train_set = to_dataset(encoded[:1_000_000], length=length, shuffle=True,
                       seed=42)
valid_set = to_dataset(encoded[1_000_000:1_060_000], length=length)
test_set = to_dataset(encoded[1_060_000:], length=length)

## Building and Training the Char-RNN Model

**Warning**: the following code may one or two hours to run, depending on your GPU. Without a GPU, it may take over 24 hours. If you don't want to wait, just skip the next two code cells and run the code below to download a pretrained model.

**Note**: the `GRU` class will only use cuDNN acceleration (assuming you have a GPU) when using the default values for the following arguments: `activation`, `recurrent_activation`, `recurrent_dropout`, `unroll`, `use_bias` and `reset_after`.

In [None]:
#tf.random.set_seed(42)  # extra code – ensures reproducibility on CPU
#model = tf.keras.Sequential([
#    tf.keras.layers.Embedding(input_dim=n_tokens, output_dim=16),
#    tf.keras.layers.GRU(128, return_sequences=True),
#    tf.keras.layers.Dense(n_tokens, activation="softmax")
#])
#model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam",
#              metrics=["accuracy"])
#model_ckpt = tf.keras.callbacks.ModelCheckpoint(
#    "my_shakespeare_model", monitor="val_accuracy", save_best_only=True)
#history = model.fit(train_set, validation_data=valid_set, epochs=10,
#                    callbacks=[model_ckpt])

In [None]:
#shakespeare_model = tf.keras.Sequential([
#    text_vec_layer,
#    tf.keras.layers.Lambda(lambda X: X - 2),  # no <PAD> or <UNK> tokens
#    model
#])

If you don't want to wait for training to complete, I've --- by I here we mean the authors of HOML3 --- pretrained a model for you. The following code will download it. Uncomment the last line if you want to use it instead of the model trained above.

In [None]:
# extra code – downloads a pretrained model
url = "https://github.com/ageron/data/raw/main/shakespeare_model.tgz"
path = tf.keras.utils.get_file("shakespeare_model.tgz", url, extract=True)
model_path = Path(path).with_name("shakespeare_model")
shakespeare_model = tf.keras.models.load_model(model_path)

Downloading data from https://github.com/ageron/data/raw/main/shakespeare_model.tgz


In [None]:
# predict the most probable character to complete
# "to be or not to b"
y_proba = shakespeare_model.predict(["To be or not to b"])[0, -1]
y_pred = tf.argmax(y_proba)  # choose the most probable character ID
text_vec_layer.get_vocabulary()[y_pred + 2]



'e'

In [None]:
y_proba = shakespeare_model.predict(["Tomorrow and tomorrow and tomorrows c"])[0, -1]
y_pred = tf.argmax(y_proba)  # choose the most probable character ID
text_vec_layer.get_vocabulary()[y_pred + 2]



'o'

## Generating Fake Shakespearean Text

In [None]:

log_probas = tf.math.log([[0.5, 0.4, 0.1]])  # probas = 50%, 40%, and 10%
tf.random.set_seed(42)
char=tf.random.categorical(log_probas, num_samples=8)  # draw 8 samples



In [None]:
# function to predict next character
# temperature is a scaling factor to adjust probabilities
def next_char(text, temperature=1):
    y_proba = shakespeare_model.predict([text])[0, -1:]
    rescaled_logits = tf.math.log(y_proba) / temperature
    char_id = tf.random.categorical(rescaled_logits, num_samples=1)[0, 0]
    return text_vec_layer.get_vocabulary()[char_id + 2]

In [None]:
# function to predict next text of quote
# again n_chars is number of characters to predict
# temperature is scaling factor and higher values give
# us lower probability text
def extend_text(text, n_chars=50, temperature=1):
    for _ in range(n_chars):
        text += next_char(text, temperature)
    return text

In [None]:
tf.random.set_seed(42)  # extra code – ensures reproducibility on CPU

In [None]:
print(extend_text("To be or not to be", temperature=0.01))

To be or not to be the duke
as it is a proper strange death,
and the


In [None]:

#print(extend_text("To be or not to be", temperature=1))

In [None]:
# higher temperatures essentially give us gibberish
print(extend_text("To be or not to be", temperature=100))

To be or not to beg3cc br
dm;'aet.nyr$bptx
qh,cr:
v3-oevb-si?xr&zl3y


In [None]:
#ZZZ
print(extend_text("Tomorrow and tomorrow and tomorrow ",temperature = 0.01))

Tomorrow and tomorrow and tomorrow nothing
that i should prove a proper strange death


In [None]:
print(extend_text("Romeo, Romeo, wherefore art thou",temperature = 0.01))

Romeo, Romeo, wherefore art thou art to the death,
and then the strange daughter i


# Sentiment Analysis

In [None]:
import tensorflow_datasets as tfds

# load in imdb reviews
raw_train_set, raw_valid_set, raw_test_set = tfds.load(
    name="imdb_reviews",
    split=["train[:90%]", "train[90%:]", "test"],
    as_supervised=True
)



Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteEUEEJP/imdb_reviews-train.tfrecord…

Generating test examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteEUEEJP/imdb_reviews-test.tfrecord*…

Generating unsupervised examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteEUEEJP/imdb_reviews-unsupervised.t…

Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.


In [None]:
tf.random.set_seed(42)
train_set = raw_train_set.shuffle(5000, seed=42).batch(32).prefetch(1)
valid_set = raw_valid_set.batch(32).prefetch(1)
test_set = raw_test_set.batch(32).prefetch(1)

In [None]:
# so in the imdb dataset
# there are two labels 0 and 1 for negative and positive, respectively
for review, label in raw_train_set.take(2):
    print(review.numpy().decode("utf-8")[:250], "...")
    print("Label:", label.numpy())

This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline ...
Label: 0
I have been known to fall asleep during films, but this is usually due to a combination of things including, really tired, being warm and comfortable on the sette and having just eaten a lot. However on this occasion I fell asleep because the film wa ...
Label: 0


In [None]:
# specify how large our vocabulary will be
vocab_size = 1000
# make a vector of these vocabs
text_vec_layer = tf.keras.layers.TextVectorization(max_tokens=vocab_size)
text_vec_layer.adapt(train_set.map(lambda reviews, labels: reviews))

**Warning**: the following cell will take a few minutes to run and the model will probably not learn anything because we didn't mask the padding tokens (that's the point of the next section).

In [None]:
embed_size = 128
tf.random.set_seed(42)
model = tf.keras.Sequential([
    text_vec_layer,
    tf.keras.layers.Embedding(vocab_size, embed_size),
    tf.keras.layers.GRU(128),
    tf.keras.layers.Dense(1, activation="sigmoid")
])
model.compile(loss="binary_crossentropy", optimizer="nadam",
              metrics=["accuracy"])
history = model.fit(train_set, validation_data=valid_set, epochs=2)

Epoch 1/2
Epoch 2/2


## Masking

In [None]:
# We are not going to run the following since we do similar things below
#
# Masking is just removing text padding tokens like ' '

**Warning**: the following cell will take a while to run (possibly 30 minutes if you are not using a GPU).

In [None]:
#embed_size = 128
#tf.random.set_seed(42)
#model = tf.keras.Sequential([
#    text_vec_layer,
#    tf.keras.layers.Embedding(vocab_size, embed_size, mask_zero=True),
#    tf.keras.layers.GRU(128),
#    tf.keras.layers.Dense(1, activation="sigmoid")
#])
#model.compile(loss="binary_crossentropy", optimizer="nadam",
#              metrics=["accuracy"])
#history = model.fit(train_set, validation_data=valid_set, epochs=5)

Or using manual masking:

**Warning**: the following cell will take a while to run (possibly 30 minutes if you are not using a GPU).

In [None]:
# extra code – compiles and trains the model, as usual
#model.compile(loss="binary_crossentropy", optimizer="nadam",
#              metrics=["accuracy"])
#history = model.fit(train_set, validation_data=valid_set, epochs=5)

## Reusing Pretrained Embeddings and Language Models

**Warning**: the following cell will take a while to run (possibly an hour if you are not using a GPU).

In [None]:
import os
import tensorflow_hub as hub

os.environ["TFHUB_CACHE_DIR"] = "my_tfhub_cache"
tf.random.set_seed(42)  # extra code – ensures reproducibility on CPU
model = tf.keras.Sequential([
    hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder/4",
                   trainable=True, dtype=tf.string, input_shape=[]),
    tf.keras.layers.Dense(64, activation="relu"),
    # sigmoid gives a value between 0 and 1
    tf.keras.layers.Dense(1, activation="sigmoid")
])


KeyboardInterrupt: ignored

In [None]:
# this took about too long I didn't even try to finish this
# I gave up after 30 minutes
#
# training on IMDB database
#model.compile(loss="binary_crossentropy", optimizer="nadam",
#              metrics=["accuracy"])
#model.fit(train_set, validation_data=valid_set, epochs=10)

NameError: ignored

# HuggingFace

Install the Transformers and Datasets libraries if we're running on Colab:

In [None]:
if "google.colab" in sys.modules:
    %pip install -q -U transformers
    %pip install -q -U datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m92.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m25.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m474.6/474.6 kB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.3/134.3 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.5/212.5 kB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m57.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━

## First sentiment classifier

In [None]:
from transformers import pipeline

classifier = pipeline("sentiment-analysis")  # many other tasks are available
result = classifier("The actors were very convincing.")

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Downloading (…)lve/main/config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

In [None]:
result

[{'label': 'POSITIVE', 'score': 0.9998071789741516}]

Models can be very biased. For example, it may like or dislike some countries depending on the data it was trained on, and how it is used, so use it with care:

In [None]:
classifier(["I am from India.", "I am from Iraq."])

[{'label': 'POSITIVE', 'score': 0.9896161556243896},
 {'label': 'NEGATIVE', 'score': 0.9811071157455444}]

In [None]:
classifier(["I am anti-Muslim","I have bright green hair."])

[{'label': 'NEGATIVE', 'score': 0.9950698614120483},
 {'label': 'POSITIVE', 'score': 0.9997227787971497}]

In [None]:
classifier(["I own a Stars & Bars flag","I have a Confederate flag."])

[{'label': 'POSITIVE', 'score': 0.9961159229278564},
 {'label': 'NEGATIVE', 'score': 0.9426649808883667}]

In [None]:
classifier(["I am anti-Muslim","I have bright green hair."])

[{'label': 'NEGATIVE', 'score': 0.9950698614120483},
 {'label': 'POSITIVE', 'score': 0.9997227787971497}]

In [None]:
classifier(["I am from North Korea","I am from South Korea"])

[{'label': 'NEGATIVE', 'score': 0.8256769776344299},
 {'label': 'POSITIVE', 'score': 0.8110870122909546}]

In [None]:
classifier(["I am a libertarian","I am a communist"])

[{'label': 'NEGATIVE', 'score': 0.9208695888519287},
 {'label': 'NEGATIVE', 'score': 0.9864199757575989}]

In [None]:
classifier(["I am a socialist","I am a capitalist"])

[{'label': 'NEGATIVE', 'score': 0.9238114356994629},
 {'label': 'NEGATIVE', 'score': 0.9574117064476013}]

In [None]:
classifier(["Donald Trump","Kim Jong-Un"])

[{'label': 'POSITIVE', 'score': 0.99681156873703},
 {'label': 'POSITIVE', 'score': 0.9928004741668701}]

### Second Sentiment Classifier

In [None]:
# use a huggingface finetuned model for text classification
# seems like there are three possible classes that get output from this
# classifier: # 0 = contradiction, 1 = entailment, 2 = neutral
# entailment means a logical statement
model_name = "huggingface/distilbert-base-uncased-finetuned-mnli"
classifier_mnli = pipeline("text-classification", model=model_name)
classifier_mnli("She loves me. [SEP] She loves me not.")

Downloading (…)lve/main/config.json:   0%|          | 0.00/729 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/58.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

[{'label': 'contradiction', 'score': 0.9790191650390625}]

In [None]:
classifier_mnli("To be or not to be that is the question")

[{'label': 'entailment', 'score': 0.7332963943481445}]

In [None]:
classifier_mnli("whether it is nobler in the mind's eye to suffer the slings and arrows")

[{'label': 'neutral', 'score': 0.40238088369369507}]

In [None]:
classifier_mnli("How are you doing")

[{'label': 'neutral', 'score': 0.40079382061958313}]

In [None]:
classifier_mnli("Donal Trump is the next presidential favorite", "Who will win the next presidential election?")

Ignoring args : ('Who will win the next presidential election?',)


[{'label': 'neutral', 'score': 0.6463461518287659}]

In [None]:
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
# Tokenization is the act of breaking up a sequence of strings into pieces such as words, keywords,
# phrases, symbols and other elements called tokens.
# Tokens can be individual words, phrases or even whole sentences.

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSequenceClassification.from_pretrained(model_name)



Downloading tf_model.h5:   0%|          | 0.00/268M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFDistilBertForSequenceClassification.

All the layers of TFDistilBertForSequenceClassification were initialized from the model checkpoint at huggingface/distilbert-base-uncased-finetuned-mnli.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


In [None]:
token_ids = tokenizer(["I like soccer. [SEP] We all love soccer!",
                       "Joe lived for a very long time. [SEP] Joe is old."],
                      padding=True, return_tensors="tf")
token_ids

{'input_ids': <tf.Tensor: shape=(2, 15), dtype=int32, numpy=
array([[ 101, 1045, 2066, 4715, 1012,  102, 2057, 2035, 2293, 4715,  999,
         102,    0,    0,    0],
       [ 101, 3533, 2973, 2005, 1037, 2200, 2146, 2051, 1012,  102, 3533,
        2003, 2214, 1012,  102]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(2, 15), dtype=int32, numpy=
array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], dtype=int32)>}

In [None]:
# show the embedding vectors
#
outputs = model(token_ids)
outputs

TFSequenceClassifierOutput(loss=None, logits=<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[-2.1123812 ,  1.178679  ,  1.4101001 ],
       [-0.01478288,  1.0962466 , -0.9919953 ]], dtype=float32)>, hidden_states=None, attentions=None)

In [None]:
Y_probas = tf.keras.activations.softmax(outputs.logits)
Y_probas

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0.01619703, 0.43523598, 0.54856706],
       [0.22655995, 0.6881722 , 0.08526783]], dtype=float32)>

In [None]:
Y_pred = tf.argmax(Y_probas, axis=1)
Y_pred  # 0 = contradiction, 1 = entailment, 2 = neutral

<tf.Tensor: shape=(2,), dtype=int64, numpy=array([2, 1])>

Code below took a couple minutes to run

In [None]:
sentences = [("Sky is blue", "Sky is red"), ("I love her", "She loves me")]
X_train = tokenizer(sentences, padding=True, return_tensors="tf").data
y_train = tf.constant([0, 2])  # contradiction, neutral
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(loss=loss, optimizer="nadam", metrics=["accuracy"])
history = model.fit(X_train, y_train, epochs=2)

Epoch 1/2
Epoch 2/2


In [None]:
model.predict(tokenizer(sentences[0], padding=True, return_tensors="tf").data)



TFSequenceClassifierOutput(loss=None, logits=array([[ 5.4515824, -2.918422 , -3.265358 ],
       [ 5.4962416, -2.7838924, -3.4216917]], dtype=float32), hidden_states=None, attentions=None)

In [None]:
model.predict(tokenizer(sentences[1], padding=True, return_tensors="tf").data)



TFSequenceClassifierOutput(loss=None, logits=array([[ 3.9638326, -2.6616502, -1.9187839],
       [ 3.6690125, -2.76269  , -1.5889317]], dtype=float32), hidden_states=None, attentions=None)

In [None]:
### Task
# Try fitting the above code with two new phrases
# be sure to also give the model both X_train and y_train