In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
from tensorflow import keras
from tensorflow.keras.layers import Dense, Dropout

Definition of a plot function for training result visualization

In [None]:
def plot_results(history):
    hist_df = pd.DataFrame(history.history)
    hist_df.columns=["loss", "accuracy", "val_loss", "val_accuracy"]
    hist_df.index = np.arange(1, len(hist_df)+1)
    
    fig, axs = plt.subplots(nrows=2, sharex=True, figsize=(16, 10))
    axs[0].plot(hist_df.val_accuracy, lw=3, label='Validation Accuracy')
    axs[0].plot(hist_df.accuracy, lw=3, label='Training Accuracy')
    axs[0].set_ylabel('Accuracy')
    axs[0].set_xlabel('Epoch')
    axs[0].grid()
    axs[0].legend(loc=0)
    axs[1].plot(hist_df.val_loss, lw=3, label='Validation Loss')
    axs[1].plot(hist_df.loss, lw=3, label='Training Loss')
    axs[1].set_ylabel('Loss')
    axs[1].set_xlabel('Epoch')
    axs[1].grid()
    axs[1].legend(loc=0)
    
    plt.show();

## Preprocessing of the data

We get the IMDB dataset directly from the tensorflow_datasets API and we do the usual preprocessing before feeding a neural network

In [None]:
import tensorflow_datasets as tfds

datasets, info = tfds.load("imdb_reviews", as_supervised=True, with_info=True)

train_size = info.splits["train"].num_examples
batch_size = 32

train_set = datasets["train"].shuffle(10000).repeat().batch(batch_size).prefetch(tf.data.AUTOTUNE)

test_size = info.splits["test"].num_examples
test_set = datasets["test"].repeat().batch(batch_size).prefetch(tf.data.AUTOTUNE)

## Use of a pretrained embedding

We use of pretrained embedding directly from tensorflow_hub

In [None]:
embed = hub.load("https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1")

We test on two (famous) lines and check the shapes of the embedding results

In [None]:
embeddings = embed(["A thing of beauty is a joy forever", "If by dull rhymes our English must be chain'd"])
print(embeddings)
print(embeddings.shape)

## Neural network model definition

Build a neural network using keras sequential layers

(you may have a look at https://keras.io/api/layers/)

In [None]:
# Question 1: Build a neural network using relevant layers, dimensions and activation function (the input layer is already defined to help you)
model = tf.keras.models.Sequential([
    hub.KerasLayer(embed,
                   dtype=tf.string, input_shape=[], output_shape=[50]),
    #??????
    #??????
    #....
])

We check that everything is fine with the model as we defined it

In [None]:
model.summary()

We compile the model, choosing the relevant loss function, optimizer and metrics

(You may have a look at
https://keras.io/api/losses/
and
https://keras.io/api/optimizers/)

In [None]:
# Question 2: Choose a relevant loss fonction and optimizer for the training
loss_function = # ?????
optimizer = # ??????

model.compile(loss=loss_function, optimizer=optimizer,
              metrics=["accuracy"])

We train the model on the dataset

In [None]:
# Question 3: Choose relevant values for epochs
# (Start with small values for epochs in order to save some computation time)
epochs = # ?????

history = model.fit(train_set, steps_per_epoch=train_size // batch_size, epochs=epochs, validation_data=test_set, validation_steps=test_size // batch_size)

## Result visualization

In [None]:
plot_results(history)

In [None]:
# Question 4: What can you tell about the results? Does it seem satisfying to you? Do you see any hint of an over-fitting? If yes, what kind of layers can you use into the Keras model in order to prevent this phenomenon?