### Section: A text classification case study

In [1]:
from tensorflow.keras.datasets import imdb

In [2]:
size_vocab = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=size_vocab)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [3]:
len(X_train), len(y_train)

(25000, 25000)

In [4]:
print(X_train[46])

[1, 568, 65, 9, 4689, 31, 7, 4, 118, 495, 34, 4, 1300, 7, 206, 309, 1079, 8, 85, 206, 108, 568, 65, 166, 2, 5, 5811, 168, 40, 2, 4, 3410, 139, 26, 73, 2, 5, 4, 206, 139, 26, 897, 48, 162, 347, 438, 47, 101, 281, 36, 62, 766, 14, 11, 2258]


In [5]:
word_idx = imdb.get_word_index()
#https://www.tensorflow.org/api_docs/python/tf/keras/datasets/imdb/get_word_index

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [6]:
word_idx.get('text')

3001

In [7]:
idx_word = {id:token for token, id in word_idx.items()}

In [8]:
idx_word.get(3001)

'text'

In [9]:
def showreview(inp_ids):
  return " ".join([idx_word.get(x-3,'#') for x in inp_ids])

In [10]:
showreview(X_train[46])

'# police story is arguably one of the best works by the master of action himself compared to other action films police story makes # and stallone look like # the stunt scenes are well # and the action scenes are superb if new line cinema has any sense they would release this in theaters'

###Section: Recurrent architectures

In [11]:
import tensorflow as tf
import numpy as np
tf.random.set_seed(42)
np.random.seed(42)

In [12]:
from tensorflow.keras.preprocessing import sequence

In [13]:
max_len = 200
X_train_proc = sequence.pad_sequences(X_train, max_len)
X_test_proc = sequence.pad_sequences(X_test, max_len)

In [14]:
X_train_proc[46]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    1,  568,   65,    9, 4689,   

In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN
from tensorflow.keras.layers import Embedding, Dropout, SpatialDropout1D
from tensorflow.keras.callbacks import EarlyStopping

In [16]:
model_rnn = Sequential()
model_rnn.add(Embedding(input_dim=size_vocab, output_dim=32))
model_rnn.add(SpatialDropout1D(0.3))
model_rnn.add(SimpleRNN(32))
model_rnn.add(Dropout(0.4))
model_rnn.add(Dense(1, activation="sigmoid"))

In [17]:
model_rnn.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
model_rnn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 32)          320000    
                                                                 
 spatial_dropout1d (Spatial  (None, None, 32)          0         
 Dropout1D)                                                      
                                                                 
 simple_rnn (SimpleRNN)      (None, 32)                2080      
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 322113 (1.23 MB)
Trainable params: 322113 (1.23 MB)
Non-trainable params: 0 (0.00 Byte)
____________________

In [18]:
stop_early = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)

In [19]:
model_rnn.fit(X_train_proc, y_train, epochs=20, validation_split=0.2, callbacks=[stop_early], batch_size=256)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20


<keras.src.callbacks.History at 0x78b3eb7edde0>

### Section: RNN Architectures - LSTM

In [None]:
from tensorflow.keras.layers import LSTM

In [None]:
model_lstm = Sequential()
model_lstm.add(Embedding(input_dim=size_vocab, output_dim=32))
model_lstm.add(SpatialDropout1D(0.4))
model_lstm.add(LSTM(32))
model_lstm.add(Dropout(0.4))
model_lstm.add(Dense(1, activation="sigmoid"))

In [None]:
model_lstm.compile(loss='binary_crossentropy',
                   optimizer='adam',
                   metrics=['accuracy'])

In [None]:
model_lstm.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, None, 32)          320000    
                                                                 
 spatial_dropout1d_2 (Spatia  (None, None, 32)         0         
 lDropout1D)                                                     
                                                                 
 lstm_1 (LSTM)               (None, 32)                8320      
                                                                 
 dropout_2 (Dropout)         (None, 32)                0         
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 328,353
Trainable params: 328,353
Non-trainable params: 0
________________________________________________

In [None]:
stop_early = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)

In [None]:
model_lstm.fit(X_train_proc, y_train,
               epochs=20, validation_split=0.2,
               callbacks=[stop_early], batch_size=256)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20


<keras.callbacks.History at 0x7f1bb623a9e0>

In [None]:
model_lstm.evaluate(X_test_proc, y_test)[1]



0.8721200227737427

### Section: RNN Architectures - GRU

In [None]:
from tensorflow.keras.layers import GRU

In [None]:
model_gru = Sequential()
model_gru.add(Embedding(input_dim=size_vocab, output_dim=32))
model_gru.add(SpatialDropout1D(0.4))
model_gru.add(GRU(32, reset_after=False))
model_gru.add(Dropout(0.4))
model_gru.add(Dense(1, activation="sigmoid"))

In [None]:
model_gru.compile(loss='binary_crossentropy',
                   optimizer='adam',
                   metrics=['accuracy'])

In [None]:
model_gru.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, None, 32)          320000    
                                                                 
 spatial_dropout1d_4 (Spatia  (None, None, 32)         0         
 lDropout1D)                                                     
                                                                 
 gru_1 (GRU)                 (None, 32)                6240      
                                                                 
 dropout_4 (Dropout)         (None, 32)                0         
                                                                 
 dense_4 (Dense)             (None, 1)                 33        
                                                                 
Total params: 326,273
Trainable params: 326,273
Non-trainable params: 0
________________________________________________

In [None]:
stop_early = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)

In [None]:
model_gru.fit(X_train_proc, y_train, epochs=20, validation_split=0.2, callbacks=[stop_early], batch_size=256)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20


<keras.callbacks.History at 0x7f1c456b6d40>

In [None]:
model_gru.evaluate(X_test_proc, y_test)[1]



0.8570799827575684

### Section: Transformer architecture

In [None]:
from tensorflow.keras.layers import MultiHeadAttention

In [None]:
from tensorflow.keras.layers im

In [None]:
x = MultiHeadAttention(
    key_dim=head_size, num_heads=num_heads) (x, x)

### Section: Tuning network hyper-parameters

In [None]:
!pip install keras-tuner -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/176.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m174.1/176.1 kB[0m [31m6.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.1/176.1 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import keras_tuner as kt

### Exercise- Hyper-parameter tuning of GRU model

In [None]:
def model_builder(hparams):

  model = Sequential()

  hp_embedsize = hparams.Int('embedsize', min_value=16,
                             max_value=64, step=16)
  hp_units = hparams.Int('units', min_value=16,
                         max_value=64, step=16)
  hp_activation = hparams.Choice('activation',
   ['elu', 'relu', 'tanh'])

  model.add(Embedding(input_dim=size_vocab, output_dim=hp_embedsize))
  model.add(SpatialDropout1D(0.4))
  model.add(GRU(units=hp_units, activation=hp_activation, reset_after=False))
  model.add(Dropout(0.4))
  model.add(Dense(1, activation='sigmoid'))

  model.compile(loss='binary_crossentropy',
                    optimizer='adam',
                    metrics=['accuracy'])

  return model

In [None]:
tuner = kt.RandomSearch(model_builder,
                        objective='val_accuracy',
                        max_trials=10,
                        overwrite=True)

In [None]:
stop_early = EarlyStopping(monitor='val_accuracy',
                           patience=5, restore_best_weights=True)

In [None]:
tuner.search(X_train_proc, y_train,
             epochs=20, validation_split=0.2,
             callbacks=[stop_early])

Trial 10 Complete [00h 06m 12s]
val_accuracy: 0.8794000148773193

Best val_accuracy So Far: 0.8809999823570251
Total elapsed time: 01h 05m 52s


In [None]:
res = tuner.get_best_hyperparameters()[0]
res.values

{'embedsize': 48, 'units': 64, 'activation': 'tanh'}

In [None]:
tuner.results_summary(3)

Results summary
Results in ./untitled_project
Showing 3 best trials
Objective(name="val_accuracy", direction="max")

Trial 04 summary
Hyperparameters:
embedsize: 48
units: 64
activation: tanh
Score: 0.8809999823570251

Trial 05 summary
Hyperparameters:
embedsize: 48
units: 64
activation: elu
Score: 0.8795999884605408

Trial 09 summary
Hyperparameters:
embedsize: 16
units: 32
activation: relu
Score: 0.8794000148773193


In [None]:
best_model = tuner.get_best_models()[0]
best_model.evaluate(X_test_proc, y_test)[1]



0.8701599836349487

### Exercise- Using 1D convolutions + RNNs

In [None]:
from tensorflow.keras.layers import Conv1D

In [None]:
model_comb = Sequential()
model_comb.add(Embedding(input_dim=size_vocab, output_dim=32))
model_comb.add(SpatialDropout1D(0.4))
model_comb.add(Conv1D(32, 2, activation='relu'))
model_comb.add(GRU(32, reset_after=False))
model_comb.add(Dropout(0.4))
model_comb.add(Dense(1, activation="sigmoid"))

In [None]:
model_comb.compile(loss='binary_crossentropy',
                   optimizer='adam',
                   metrics=['accuracy'])

In [None]:
stop_early = EarlyStopping(monitor='val_accuracy',
                           patience=10,
                           restore_best_weights=True)

In [None]:
model_comb.fit(X_train_proc, y_train,
               epochs=20, validation_split=0.2,
               callbacks=[stop_early],
               batch_size=256)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20


<keras.callbacks.History at 0x7f6f407ee0e0>

In [None]:
model_comb.evaluate(X_test_proc, y_test)[1]



0.8613200187683105

### Section: Using pre-trained embeddings

In [None]:
!pip install tensorflow-hub -q

In [None]:
!pip install tensorflow-datasets -q

In [None]:
import tensorflow as tf, tensorflow_hub as tfhub
import tensorflow_datasets as tfds

In [None]:
train_data, valid_data, test_data = tfds.load(name="imdb_reviews",
                                  split=('train[:80%]', 'train[80%:]','test'),
                                  as_supervised=True)

In [None]:
train_examples_batch, _ = next(iter(train_data.batch(10)))

In [None]:
train_examples_batch[3]

<tf.Tensor: shape=(), dtype=string, numpy=b'This is the kind of film for a snowy Sunday afternoon when the rest of the world can go ahead with its own business as you descend into a big arm-chair and mellow for a couple of hours. Wonderful performances from Cher and Nicolas Cage (as always) gently row the plot along. There are no rapids to cross, no dangerous waters, just a warm and witty paddle through New York life at its best. A family film in every sense and one that deserves the praise it received.'>

In [None]:
embedding = "https://tfhub.dev/google/universal-sentence-encoder/4"
hub_layer = tfhub.KerasLayer(embedding, input_shape=[],
                             dtype=tf.string,
                             trainable=False)

In [None]:
hub_layer(train_examples_batch[3:4]).shape

TensorShape([1, 512])

In [None]:
hub_layer(train_examples_batch[3:4])[0][:10]

<tf.Tensor: shape=(10,), dtype=float32, numpy=
array([-0.00274222, -0.04518872,  0.02080956,  0.03594078, -0.03814794,
        0.07527251,  0.01817022, -0.02279347,  0.07460126,  0.0103387 ],
      dtype=float32)>

### Exercise- Text classification with pre-trained embeddings

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
model_emb = Sequential()
model_emb.add(hub_layer)

model_emb.add(Dense(128, activation='relu'))
model_emb.add(Dense(128, activation='relu'))
model_emb.add(Dense(1))
model_emb.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer_1 (KerasLayer)  (None, 512)               256797824 
                                                                 
 dense_8 (Dense)             (None, 128)               65664     
                                                                 
 dense_9 (Dense)             (None, 128)               16512     
                                                                 
 dense_10 (Dense)            (None, 1)                 129       
                                                                 
Total params: 256,880,129
Trainable params: 82,305
Non-trainable params: 256,797,824
_________________________________________________________________


In [None]:
model_emb.compile(loss='binary_crossentropy',
                   optimizer='adam',
                   metrics=['accuracy'])

In [None]:
stop_early = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)

In [None]:
model_emb.fit(train_data.batch(512),
              epochs=10,
              validation_data = valid_data.batch(512),
              callbacks=[stop_early])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f1b89e9bdc0>

In [None]:
model_emb.evaluate(test_data.batch(512))[1]



0.8531200289726257