# University of Stavanger DAT550 - Data Mining and Deep Learning
## Hands-on 10: Text classification using LSTMs, hyper-parameter turning and tensorboard

In [1]:
import tensorflow as tf
import numpy as np
from keras_preprocessing import sequence
from tensorflow.keras.datasets import imdb
from tensorboard.plugins.hparams import api as hp
from sklearn import metrics
import tensorflow.keras.backend as K
import os
os.environ["CUDA_VISIBLE_DEVICES"]="7"

In [2]:
tf.__version__

'2.1.0'

In [40]:
max_len = 100
number_of_words = 10000

In [41]:
# (X_train, y_train), (x_test, y_test)  = imdb.load_data(num_words = number_of_words)
pad_id = 0
start_id = 1
oov_id = 2
index_offset = 2
 
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=number_of_words, start_char=start_id,
                                                                        oov_char=oov_id, index_from=index_offset)
 

word2idx = tf.keras.datasets.imdb.get_word_index()
 
idx2word = {v + index_offset: k for k, v in word2idx.items()}
 
idx2word[pad_id] = '<PAD>'
idx2word[start_id] = '<START>'
idx2word[oov_id] = '<OOV>'
 
rnn_cell_size = max_len
 
x_train = sequence.pad_sequences(x_train,
                                 maxlen=max_len,
                                 truncating='post',
                                 padding='post',
                                 value=pad_id)
x_test = sequence.pad_sequences(x_test, maxlen=max_len,
                                truncating='post',
                                padding='post',
                                value=pad_id)


In [42]:
x_train[0]

array([   1,   13,   21,   15,   42,  529,  972, 1621, 1384,   64,  457,
       4467,   65, 3940,    3,  172,   35,  255,    4,   24,   99,   42,
        837,  111,   49,  669,    2,    8,   34,  479,  283,    4,  149,
          3,  171,  111,  166,    2,  335,  384,   38,    3,  171, 4535,
       1110,   16,  545,   37,   12,  446,    3,  191,   49,   15,    5,
        146, 2024,   18,   13,   21,    3, 1919, 4612,  468,    3,   21,
         70,   86,   11,   15,   42,  529,   37,   75,   14,   12, 1246,
          3,   21,   16,  514,   16,   11,   15,  625,   17,    2,    4,
         61,  385,   11,    7,  315,    7,  105,    4,    3, 2222, 5243,
         15], dtype=int32)

In [45]:
def convertIntToWord(sentence):
    sentence_str = ''
    for word in sentence:
            sentence_str = sentence_str + ' ' + idx2word[word]
    return sentence_str
convertIntToWord(x_train[0])

" <START> this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert <OOV> is an amazing actor and now the same being director <OOV> father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for <OOV> and would recommend it to everyone to watch and the fly fishing was"

In [44]:
len(word2idx)

88584

In [30]:
import os
import numpy as np
embeddings_index = {}
embedding_size = 100
GLOVE_DIR='/home/prosjekt/deepnews/glove/'
f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'))
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

In [47]:
words_not_fount = 0
embedding_matrix = np.random.random((len(word2idx) + 1, embedding_size))
for word, i in word2idx.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector
    else:
        words_not_fount = words_not_fount + 1

In [48]:
print(words_not_fount)

28434


In [88]:
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([25, 50, 100]))
HP_BATCH_SIZE = hp.HParam('batch_size', hp.Discrete([64, 128, 256]))
HP_DROPOUT = hp.HParam('dropout', hp.Discrete([0.1, 0.2, 0.3]))
HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001, 0.003, 0.01, 0.03, 0.1]))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'rmsprop']))
HP_EMBEDDING_LAYER = hp.HParam('embedding', hp.Discrete(['pretrained', 'from_scratch']))
HP_ATTENTION = hp.HParam('attention', hp.Discrete(['yes', 'no']))
#3 * 3 * 3 * 5 * 2 * 2

In [89]:
METRIC_ACCURACY = 'accuracy'
METRIC_F1_MAC = 'f1_macro' #f1 per class then averaged
METRIC_F1_MIC = 'f1_micro' #global average of each inidvidual instances
METRIC_PRECISION = 'precision'
METRIC_RECALL = 'recall'
METRIC_LOSS = 'loss'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
        hparams=[HP_OPTIMIZER, HP_NUM_UNITS, HP_BATCH_SIZE, HP_DROPOUT, HP_LEARNING_RATE, HP_EMBEDDING_LAYER],
        metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy'),
                 hp.Metric(METRIC_F1_MIC, display_name='F1 Micro'),
                 hp.Metric(METRIC_LOSS, display_name='Loss'),
                 hp.Metric(METRIC_PRECISION, display_name='Precision'),
                 hp.Metric(METRIC_RECALL, display_name='Recall')],
      )

In [53]:
def get_opt_algo(algo, learning_rate):
    #Depending on the optimization algo specified create the algo object with specified learning rate
    if algo == 'rmsprop':
        opt_algo = tf.keras.optimizers.RMSprop(learning_rate)
    elif algo == 'adam':
        opt_algo = tf.keras.optimizers.Adam(learning_rate)
    else:#For now it defaults to SGD
        opt_algo = tf.keras.optimizers.SGD(learning_rate)
    return opt_algo

In [54]:
def getEmbeddingLayer(embedding):
    if embedding == 'pretrained':
        embedding_layer = tf.keras.layers.Embedding(len(word2idx) + 1,
                            embedding_size,
                            weights=[embedding_matrix],
                            input_length=max_len,
                            trainable=True)
    else:
        embedding_layer = tf.keras.layers.Embedding(number_of_words, embedding_size, 
                                            input_length=max_len, trainable=True)
    return embedding_layer

In [55]:


def get_f1(y_true, y_pred): #taken from old keras source code
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val

In [80]:
#Attention implementation borrowed from https://matthewmcateer.me/blog/getting-started-with-attention-for-classification/

class Attention(tf.keras.Model):
	def __init__(self, units):
		super(Attention, self).__init__()
		self.W1 = tf.keras.layers.Dense(units)
		self.W2 = tf.keras.layers.Dense(units)
		self.V = tf.keras.layers.Dense(1)

	def call(self, features, hidden):
		# hidden shape == (batch_size, hidden size)
		# hidden_with_time_axis shape == (batch_size, 1, hidden size)
		# we are doing this to perform addition to calculate the score
		hidden_with_time_axis = tf.expand_dims(hidden, 1)
		  
		# score shape == (batch_size, max_length, 1)
		# we get 1 at the last axis because we are applying score to self.V
		# the shape of the tensor before applying self.V is (batch_size, max_length, units)
		score = tf.nn.tanh(
			self.W1(features) + self.W2(hidden_with_time_axis))
		# attention_weights shape == (batch_size, max_length, 1)
		attention_weights = tf.nn.softmax(self.V(score), axis=1)
		  
		# context_vector shape after sum == (batch_size, hidden_size)
		context_vector = attention_weights * features
		context_vector = tf.reduce_sum(context_vector, axis=1)
		return context_vector, attention_weights

In [91]:
def train_and_test_model(hparams):
    sequence_input = tf.keras.layers.Input(shape=(max_len,), dtype='int32')
    embedding_layer = getEmbeddingLayer(hparams[HP_EMBEDDING_LAYER])(sequence_input)
    lstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=hparams[HP_NUM_UNITS], 
                                            return_sequences = True), name="bi_lstm_0")(embedding_layer)
    ret_sequences = False
    if hparams[HP_ATTENTION] == 'yes':
        ret_sequences = True
    lstm, forward_h, forward_c, backward_h, backward_c = tf.keras.layers.Bidirectional \
                                                            (tf.keras.layers.LSTM
                                                             (units=hparams[HP_NUM_UNITS],
                                                              dropout=hparams[HP_DROPOUT],
                                                              activation='tanh',
                                                              return_sequences=ret_sequences,
                                                              return_state=True))(lstm)
    state_h = tf.keras.layers.Concatenate()([forward_h, backward_h])
    if hparams[HP_ATTENTION] == 'yes':
        context_vector, attention_weights = Attention(10)(lstm, state_h)
        output = tf.keras.layers.Dense(units=1, activation='sigmoid')(context_vector)
    else:
        output = tf.keras.layers.Dense(units=1, activation='sigmoid')(lstm)
    opt_algo = get_opt_algo(hparams[HP_OPTIMIZER], hparams[HP_LEARNING_RATE])
    model = tf.keras.Model(inputs=sequence_input, outputs=output)
    
    model.compile(optimizer=opt_algo, loss='binary_crossentropy',
                  metrics=['accuracy', 
                           tf.keras.metrics.Precision(),
                           tf.keras.metrics.Recall(), 
                           get_f1])
    model.fit(x_train[:100], y_train[:100], epochs=3, batch_size = hparams[HP_BATCH_SIZE])
    loss, accuracy, prec, recall, f1 = model.evaluate(x_test, y_test)
    return loss, accuracy, prec, recall, f1

In [82]:
def run(run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        loss, accuracy, prec, recall, f1 = train_and_test_model(hparams)
        tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)
        tf.summary.scalar(METRIC_LOSS, loss, step=1)
        tf.summary.scalar(METRIC_F1_MAC, f1, step=1)
        tf.summary.scalar(METRIC_PRECISION, prec, step=1)
        tf.summary.scalar(METRIC_RECALL, recall, step=1)

In [92]:
session_num = 0

for optimizer in HP_OPTIMIZER.domain.values:
    for num_units in HP_NUM_UNITS.domain.values:
        for batch_size in HP_BATCH_SIZE.domain.values:
            for dropout_rate in HP_DROPOUT.domain.values:
                for learning_rate in HP_LEARNING_RATE.domain.values:
                    for embedding_layer in HP_EMBEDDING_LAYER.domain.values:
                        for attention in HP_ATTENTION.domain.values:
                            hparams = {
                                HP_NUM_UNITS: num_units,
                                HP_DROPOUT: dropout_rate,
                                HP_OPTIMIZER: optimizer,
                                HP_EMBEDDING_LAYER: embedding_layer,
                                HP_LEARNING_RATE: learning_rate,
                                HP_BATCH_SIZE: batch_size,
                                HP_ATTENTION: attention
                              }
                            run_name = "run-%d" % session_num
                            print('--- Starting trial: %s' % run_name)
                            print({h.name: hparams[h] for h in hparams})
                            run('logs/hparam_tuning/' + run_name, hparams)
                            session_num += 1

--- Starting trial: run-0
{'num_units': 25, 'dropout': 0.1, 'optimizer': 'adam', 'embedding': 'from_scratch', 'learning_rate': 0.001, 'batch_size': 64, 'attention': 'no'}
Train on 100 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- Starting trial: run-1
{'num_units': 25, 'dropout': 0.1, 'optimizer': 'adam', 'embedding': 'from_scratch', 'learning_rate': 0.001, 'batch_size': 64, 'attention': 'yes'}
Train on 100 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

KeyboardInterrupt: 

In [None]:
!tensorboard --logdir logs/hparam_tuning/