In [1]:
%tensorflow_version 2.x

import tensorflow as tf
import datetime
from sklearn.metrics import accuracy_score, f1_score
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd

from google.colab import files
DATADIR = "/content/gdrive/My Drive/Colab Notebooks/DL/"

from google.colab import drive
drive.mount("/content/gdrive", force_remount=True)

TensorFlow 2.x selected.
Mounted at /content/gdrive


In [0]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=2000)
x_train, y_train = x_train[:5000], y_train[:5000]
x_test, y_test = x_test[:2000], y_test[:2000]

In [0]:
def test_params_lstm(batch_size, learning_coef, epochs, cost_func, optimizer,
                     x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test):
    model = LSTMModel()
    
    print("Started fitting")

    model.fit(x=x_train, y=y_train, batch_size=batch_size, learning_coef=learning_coef, 
             epochs=epochs, cost_func=cost_func, optimizer=optimizer)
    
    y_pred = model.predict(x_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='macro')
    loss = model.epoch_loss[-1]
    return model, acc, f1, loss.numpy()


def test_params_rnn(batch_size, learning_coef, epochs, cost_func, optimizer,
                     x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test):
    model = RNNMModel()
    
    print("Started fitting")

    model.fit(x=x_train, y=y_train, batch_size=batch_size, learning_coef=learning_coef, 
             epochs=epochs, cost_func=cost_func, optimizer=optimizer)
    
    y_pred = model.predict(x_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='macro')
    loss = model.epoch_loss[-1]
    return model, acc, f1, loss.numpy()

In [0]:
class LSTMModel(tf.keras.Model):
    def __init__(self, h_size=8):
        super(LSTMModel, self).__init__()
        
        self.num_words = 2000
        self.h_size = h_size
        
        self.w_f = self.add_weight(shape=(self.h_size, self.h_size + self.num_words), 
                                   initializer='truncated_normal', trainable=True)
        self.b_f = self.add_weight(shape=(self.h_size, 1), 
                                   initializer='truncated_normal', trainable=True)
        
        self.w_i = self.add_weight(shape=(self.h_size, self.h_size + self.num_words), 
                                   initializer='truncated_normal', trainable=True)
        self.b_i = self.add_weight(shape=(self.h_size, 1), 
                                   initializer='truncated_normal', trainable=True)
        
        self.w_c = self.add_weight(shape=(self.h_size, self.h_size + self.num_words), 
                                   initializer='truncated_normal', trainable=True)
        self.b_c = self.add_weight(shape=(self.h_size, 1), 
                                   initializer='truncated_normal', trainable=True)
        
        self.w_o = self.add_weight(shape=(self.h_size, self.h_size + self.num_words), 
                                   initializer='truncated_normal', trainable=True)
        self.b_o = self.add_weight(shape=(self.h_size, 1), 
                                   initializer='truncated_normal', trainable=True)

        self.dense = tf.keras.layers.Dense(units=2, activation='softmax')

    def __call__(self, x, training=False):
        h = tf.zeros(shape=(self.h_size, len(x)))
        c = tf.zeros(shape=(self.h_size, len(x)))
        
        for i in range(x.shape[1]):
            h, c = self.lstm_cell(x[:, i], h, c)
        
        x = self.dense(tf.transpose(h))

        return x
    
    def lstm_cell(self, x_t, h_prev, c_prev):
        x_t = tf.transpose(x_t)

        f = tf.sigmoid(tf.matmul(self.w_f, tf.concat([x_t, h_prev], 0)) + self.b_f)
        i = tf.sigmoid(tf.matmul(self.w_i, tf.concat([x_t, h_prev], 0)) + self.b_i)
        z = tf.tanh(tf.matmul(self.w_c, tf.concat([x_t, h_prev], 0)) + self.b_c)
        c = f * c_prev + i * z
        
        o = tf.sigmoid(tf.matmul(self.w_o, tf.concat([x_t, h_prev], 0)) + self.b_o)
        h = o * tf.tanh(c)
        
        return h, c

    def fit(self, **kwargs):
        self.epoch_loss = []
        self.epoch_accuracy = []
        self.epoch_fscore = []
        self.epoch_time = []

        x_train, y_train, epochs, batch_size, cost_func, learning_coef, optimizer = self._get_params(**kwargs)
        
        weight_decay = kwargs.get('weight_decay', None)
        if weight_decay is not None:
            optimizer = optimizer(weight_decay=weight_decay, learning_rate=learning_coef)
        else:
            optimizer = optimizer(learning_rate=learning_coef)
        train_dataset = self._extract_train_dataset(x_train, y_train, batch_size)

        for epoch in range(epochs):
            start = datetime.datetime.now()
            epoch_acc = tf.keras.metrics.SparseCategoricalAccuracy()
            
            print("epoch: ", epoch)
            i = 0
            for x, y in train_dataset:
                print(i)
                i += 1

                x = tf.keras.utils.to_categorical(x, num_classes=self.num_words)
                
                with tf.GradientTape() as tape:
                    logits = self.__call__(x, training=True)
                    loss_value = cost_func(y, logits)

                grads = tape.gradient(loss_value, self.trainable_variables)
                optimizer.apply_gradients(zip(grads, self.trainable_variables))

                epoch_acc.update_state(y, logits)
                self.epoch_loss.append(loss_value)

            end = datetime.datetime.now()
            diff = end - start
            self.epoch_time.append(diff.total_seconds())

            self.epoch_accuracy.append(epoch_acc.result().numpy())

            print("Epoch {:02d}: Loss: {:.3f}, Accuracy: {:.3%}".format(epoch,
                                                                        self.epoch_loss[-1], 
                                                                        self.epoch_accuracy[-1]))

    def predict(self, x):
        """Predicts outputs based on inputs (x)."""
        x_dataset = self._extract_test_dataset(x, batch_size=100)
        prediction = []
        for x in x_dataset:
            x = tf.keras.utils.to_categorical(x, num_classes=self.num_words)
            logits = self.__call__(x)
            prediction.extend(tf.argmax(logits, axis=1, output_type=tf.int32))
        return prediction

    def _extract_train_dataset(self, x_train, y_train, batch_size):
        y_train = tf.dtypes.cast(y_train, tf.float32)
        x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,
                                                              padding='post')
        train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
        train_dataset = train_dataset.shuffle(buffer_size=25000).batch(batch_size=batch_size)
        return train_dataset

    def _extract_test_dataset(self, x_train, batch_size):
        x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,
                                                              padding='post')

        train_dataset = tf.data.Dataset.from_tensor_slices(x_train)
        train_dataset = train_dataset.batch(batch_size=batch_size)
        return train_dataset

    def _get_params(self, **kwargs):
        x_train = kwargs.get('x', None)
        y_train = kwargs.get('y', None)
        epochs = kwargs.get('epochs', 10)
        batch_size = kwargs.get('batch_size', 100)
        cost_func = kwargs.get('cost_func', tf.keras.losses.SparseCategoricalCrossentropy())
        learning_coef = kwargs.get('learning_coef', 0.001)
        optimizer = kwargs.get('optimizer', tf.keras.optimizers.Adam)
        return x_train, y_train, epochs, batch_size, cost_func, learning_coef, optimizer


In [0]:
class RNNMModel(LSTMModel):
    def __init__(self, h_size=8):
        super(LSTMModel, self).__init__()
        
        self.num_words = 2000
        self.h_size = h_size
        
        self.w_o = self.add_weight(shape=(self.h_size, self.h_size + self.num_words), 
                                   initializer='truncated_normal', trainable=True)
        self.b_o = self.add_weight(shape=(self.h_size, 1), 
                                   initializer='truncated_normal', trainable=True)

        self.dense = tf.keras.layers.Dense(units=2, activation='softmax')

    def __call__(self, x, training=False):
        h = tf.zeros(shape=(self.h_size, len(x)))
        
        for i in range(x.shape[1]):
            h = self.rnn_cell(x[:, i], h)
        
        x = self.dense(tf.transpose(h))

        return x
    
    def rnn_cell(self, x_t, h_prev):
        x_t = tf.transpose(x_t)
        c = tf.tanh(tf.matmul(self.w_o, tf.concat([x_t, h_prev], 0)) + self.b_o)
        return c

**Test hyperparameters**

In [0]:
%%time

batch_size = 100
learning_coef = 0.001
epochs = 3
cost_func = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam

experiments_epochs = [1, 3]
experiments_learning_coef = [0.001, 0.005]
experiments_optimizer = [tf.keras.optimizers.SGD, tf.keras.optimizers.Adam]

for epochs in experiments_epochs:
  for learning_coef in experiments_learning_coef:
    for optimizer in experiments_optimizer:
      results = test_params_lstm(batch_size=batch_size, learning_coef=learning_coef, 
                                epochs=epochs, cost_func=cost_func, 
                                optimizer=optimizer)
      model, acc, f1, loss = results
          
      res_df = pd.DataFrame(columns=['model', 'batch_size', 'learning_coef', 'epochs', 
                                    'cost_func', 'optimizer', 'acc', 'f1', 'loss', 'time'])
      res_df = res_df.append({'model': 'LSTM', 'batch_size': batch_size, 'learning_coef': learning_coef,
                              'epochs': epochs, 'cost_func': type(cost_func).__name__, 
                              'optimizer': optimizer.__name__, 
                              'acc': acc, 'f1': f1, 'loss': loss, 
                              'time': sum(model.epoch_time)}, 
                            ignore_index=True)

      print('Time: {}'.format(sum(model.epoch_time)))

      with open(DATADIR + 'results.csv', 'a') as f:
        res_df.to_csv(f, header=False)

      # results = test_params_rnn(batch_size=batch_size, learning_coef=learning_coef, 
      #                           epochs=epochs, cost_func=cost_func, 
      #                           optimizer=optimizer)
      # model, acc, f1, loss = results
          
      # res_df = pd.DataFrame(columns=['model', 'batch_size', 'learning_coef', 'epochs', 
      #                               'cost_func', 'optimizer', 'acc', 'f1', 'loss', 'time'])
      # res_df = res_df.append({'model': 'RNN', 'batch_size': batch_size, 'learning_coef': learning_coef,
      #                         'epochs': epochs, 'cost_func': type(cost_func).__name__, 
      #                         'optimizer': optimizer.__name__, 
      #                         'acc': acc, 'f1': f1, 'loss': loss, 
      #                         'time': sum(model.epoch_time)}, 
      #                       ignore_index=True)

      # print('Time: {}'.format(sum(model.epoch_time)))

      # with open(DATADIR + 'results.csv', 'a') as f:
      #   res_df.to_csv(f, header=False)

Started fitting
epoch:  0
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14


Other

In [5]:
%%time

batch_size = 100
learning_coef = 0.001
epochs = 1
cost_func = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam


results = test_params_lstm(batch_size=batch_size, learning_coef=learning_coef, 
                           epochs=epochs, cost_func=cost_func, 
                           optimizer=optimizer)
model, acc, f1, loss = results
    
res_df = pd.DataFrame(columns=['model', 'batch_size', 'learning_coef', 'epochs', 
                               'cost_func', 'optimizer', 'acc', 'f1', 'loss', 'time'])
res_df = res_df.append({'model': 'LSTM', 'batch_size': batch_size, 'learning_coef': learning_coef,
                        'epochs': epochs, 'cost_func': type(cost_func).__name__, 
                        'optimizer': optimizer.__name__, 
                        'acc': acc, 'f1': f1, 'loss': loss, 
                        'time': sum(model.epoch_time)}, 
                       ignore_index=True)

print('Time: {}'.format(sum(model.epoch_time)))

Started fitting
epoch:  0
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
Epoch 00: Loss: 0.695, Accuracy: 50.440%
Time: 540.131955
CPU times: user 9min 50s, sys: 9.13 s, total: 9min 59s
Wall time: 10min


  'precision', 'predicted', average, warn_for)


In [9]:
%%time

batch_size = 100
learning_coef = 0.001
epochs = 1
cost_func = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam


results = test_params_rnn(batch_size=batch_size, learning_coef=learning_coef, 
                           epochs=epochs, cost_func=cost_func, 
                           optimizer=optimizer)
model, acc, f1, loss = results
    
res_df = pd.DataFrame(columns=['model', 'batch_size', 'learning_coef', 'epochs', 
                               'cost_func', 'optimizer', 'acc', 'f1', 'loss', 'time'])
res_df = res_df.append({'model': 'RNN', 'batch_size': batch_size, 'learning_coef': learning_coef,
                        'epochs': epochs, 'cost_func': type(cost_func).__name__, 
                        'optimizer': optimizer.__name__, 
                        'acc': acc, 'f1': f1, 'loss': loss, 
                        'time': sum(model.epoch_time)}, 
                       ignore_index=True)

print('Time: {}'.format(sum(model.epoch_time)))

Started fitting
epoch:  0
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
Epoch 00: Loss: 0.696, Accuracy: 50.920%
Time: 186.619398


  'precision', 'predicted', average, warn_for)
