In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import seaborn
seaborn.set()
%matplotlib inline
# tensorflow 2.xx required
print(tf.__version__)



2.0.0-beta0


In [3]:
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']



In [4]:
tokenizer = info.features['text'].encoder

In [5]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64
NUM_EPOCHS = 10

train_dataset = train_dataset.shuffle(BUFFER_SIZE)

train_dataset_final = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes)
test_dataset_final = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes)

In [6]:
def plot_graphs(history, string):
    plt.plot(history.history[string])
    plt.plot(history.history['val_'+string])
    plt.xlabel('Epochs')
    plt.ylabel(string)
    plt.legend([string,'val_'+string])
    plt.grid()
    plt.show()

## MODEL1 with LSTM

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(tokenizer.vocab_size, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.summary()

# very slow....

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(train_dataset_final, 
                    epochs=NUM_EPOCHS, 
                    validation_data = test_dataset_final)

plot_graphs(history, 'acc')
plot_graphs(history, 'loss')

## MODLE2 Single LSTM layer with less connection

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(tokenizer.vocab_size, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.summary()

# very slow....

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(train_dataset_final, 
                    epochs=NUM_EPOCHS, 
                    validation_data = test_dataset_final)

plot_graphs(history, 'acc')
plot_graphs(history, 'loss')

## MODEL3 Multiple layer of LSTM

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(tokenizer.vocab_size, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
history = model.fit(train_dataset_final, 
                    epochs=NUM_EPOCHS, 
                    validation_data = test_dataset_final, verbose=2)

plot_graphs(history, 'acc')

## MODEL4 Conv1D

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(tokenizer.vocab_size, 64),
    tf.keras.layers.Conv1D(128, 5, activation='relu'),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(train_dataset_final, 
                    epochs=NUM_EPOCHS, 
                    validation_data = test_dataset_final, verbose=2)

plot_graphs(history, 'acc')
plot_graphs(history, 'loss')

## MODEL5 GRU 

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32)),
    tf.keras.layers.Dense(6, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
history = model.fit(train_dataset_final, 
                    epochs=NUM_EPOCHS, 
                    validation_data = test_dataset_final, verbose=2)

plot_graphs(history, 'acc')
plot_graphs(history, 'loss')