In [1]:
import tensorflow as tf
import datetime
from sklearn.metrics import accuracy_score, f1_score
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd

In [2]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=2000)


In [62]:
class LSTMModel(tf.keras.Model):
    def __init__(self, h_size=8):
        super(LSTMModel, self).__init__()
        
        self.num_words = 2000
        self.h_size = h_size
        
        self.w_f = self.add_weight()
        self.b_f = self.add_weight()
        
        self.w_i = self.add_weight()
        self.b_i = self.add_weight()
        
        self.w_c = self.add_weight()
        self.b_c = self.add_weight()
        
        self.w_o = self.add_weight()
        self.b_o = self.add_weight()

        self.dense = tf.keras.layers.Dense(units=2, activation='softmax')

    def call(self, x, training=False):
        print("x shape", x.shape)
        
        h = tf.zeros(shape=(self.h_size, len(x)))
        c = tf.zeros(shape=(self.h_size, len(x)))
        
        print(h.shape)
        print(c.shape)
        
        for i in range(1):
            print("i: ", i)
            print("x[i]: ", x[i])
            # print(x[:, i])
            print(x[:, :, i])
            h, c = self.lstm_cell(x[i], h, c)
        
        x = self.dense(x)

        return x
    
    def lstm_cell(self, x_t, h_prev, c_prev):
        x_t = tf.transpose(x_t)
        
        f = tf.sigmoid(tf.matmul(self.w_f, tf.concat(h_prev, x_t)) + self.b_f)
        i = tf.sigmoid(tf.matmul(self.w_i, tf.concat(h_prev, x_t)) + self.b_i)
        z = tf.tanh(tf.matmul(self.w_c, tf.concat(h_prev, x_t)) + self.b_c)
        c = f * c_prev + i * z
        
        o = tf.sigmoid(tf.matmul(self.w_o, tf.concat(h_prev, x_t)) + self.b_o)
        h = o * tf.tanh(c)
        
        return h, c

    def fit(self, **kwargs):
        self.epoch_loss = []
        self.epoch_accuracy = []
        self.epoch_fscore = []
        self.epoch_time = []

        x_train, y_train, epochs, batch_size, cost_func, learning_coef, optimizer = self._get_params(**kwargs)

        weight_decay = kwargs.get('weight_decay', None)
        if weight_decay is not None:
            optimizer = optimizer(weight_decay=weight_decay, learning_rate=learning_coef)
        else:
            optimizer = optimizer(learning_rate=learning_coef)
        train_dataset = self._extract_train_dataset(x_train, y_train, batch_size)

        for epoch in range(epochs):
            start = datetime.datetime.now()
            epoch_acc = tf.keras.metrics.SparseCategoricalAccuracy()
            
            print("epoch: ", epoch)
            
            for x, y in train_dataset:
                print("x shape: ", x.shape)
                print(x)
                print("y shape: ", y.shape)
                print(y)
                x_onehot = []
                for x_el in x:
                    x_onehot.append(tf.dtypes.cast(
                        tf.keras.utils.to_categorical(x_el, num_classes=self.num_words), tf.int32))
                
                x = np.array(x_onehot)
                
                # x = tf.keras.utils.to_categorical(x, num_classes=self.num_words)
                print("type(x): ", type(x))
                # x = tf.dtypes.cast(x, tf.int32)
                print("len(x): ", len(x))
                
                print("x shape: ", x.shape)
                print("x[0] shape: ", x[0].shape)
                print("x[0][0] shape: ", x[0][0].shape)
                
                y = tf.dtypes.cast(y, tf.float32)

                with tf.GradientTape() as tape:
                    logits = self.call(x, training=True)
                    loss_value = cost_func(y, logits)

                grads = tape.gradient(loss_value, self.trainable_variables)
                optimizer.apply_gradients(zip(grads, self.trainable_variables))

                epoch_acc.update_state(y, logits)
                self.epoch_loss.append(loss_value)

            end = datetime.datetime.now()
            diff = end - start
            self.epoch_time.append(diff.total_seconds())

            self.epoch_accuracy.append(epoch_acc.result().numpy())

            print("Epoch {:02d}: Loss: {:.3f}, Accuracy: {:.3%}".format(epoch,
                                                                        self.epoch_loss[-1], 
                                                                        self.epoch_accuracy[-1]))

    def predict(self, x):
        """Predicts outputs based on inputs (x)."""
        x_dataset = self._extract_test_dataset(x, batch_size=1000)
        prediction = []
        for x in x_dataset:
            logits = self.call(x)
            prediction.extend(tf.argmax(logits, axis=1, output_type=tf.int32))
        return prediction

    def _extract_train_dataset(self, x_train, y_train, batch_size):
        x_train = self._preprocess_x(x_train)
        max_length = max(len(lst) for lst in x_train)
        
        #train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
        
        # x_train = tf.data.Dataset.from_generator(lambda: x_train, tf.int32, output_shapes=[None])
        # y_train = tf.data.Dataset.from_tensors((y_train))
        # train_dataset = tf.data.Dataset.zip((x_train, y_train))
        # for x, y in train_dataset:
        #     print(x, y)
        x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,
                                                              padding='post')
        # x_train = tf.ragged.constant(x_train)
        train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
        train_dataset = train_dataset.shuffle(buffer_size=50000).batch(batch_size=batch_size)
            # padded_batch(
            # batch_size=batch_size, padded_shapes=[None, None, None])
        print("train dataset: ", train_dataset)
        return train_dataset
        # return zip(x_train, y_train)

    def _extract_test_dataset(self, x_train, batch_size):
        x_train = self._preprocess_x(x_train)

        train_dataset = tf.data.Dataset.from_tensor_slices(x_train)
        train_dataset = train_dataset.batch(batch_size=batch_size)
        return train_dataset
        # return x_train
        
    def _preprocess_x(self, x_set):
        # x_onehot.append(tf.keras.utils.to_categorical(x, num_classes=2000))
        return x_set

    def _get_params(self, **kwargs):
        x_train = kwargs.get('x', None)
        y_train = kwargs.get('y', None)
        epochs = kwargs.get('epochs', 10)
        batch_size = kwargs.get('batch_size', 100)
        cost_func = kwargs.get('cost_func', tf.keras.losses.SparseCategoricalCrossentropy())
        learning_coef = kwargs.get('learning_coef', 0.001)
        optimizer = kwargs.get('optimizer', tf.keras.optimizers.Adam)
        return x_train, y_train, epochs, batch_size, cost_func, learning_coef, optimizer

In [63]:
batch_size = 100
learning_coef = 0.01  # 0.01
epochs = 10
cost_func = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.SGD


def test_params_base(batch_size, learning_coef, epochs, cost_func, optimizer,
                     kernel_size, pool_size, pooling, weight_decay=None,
                     x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test):
    model = LSTMModel()
    
    print("Started fitting")

    model.fit(x=x_train, y=y_train, batch_size=batch_size, learning_coef=learning_coef, 
             epochs=epochs, cost_func=cost_func, optimizer=optimizer, weight_decay=weight_decay)
    
    y_pred = model.predict(x_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='macro')
    loss = model.epoch_loss[-1]
    return model, acc, f1, loss.numpy()

In [64]:
%%time

batch_size = 100
learning_coef = 0.001
epochs = 1
cost_func = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam
kernel_size = (3, 3)
pool_size = (4, 4)
pooling = tf.keras.layers.MaxPool2D


results = test_params_base(batch_size=batch_size, learning_coef=learning_coef, 
                           epochs=epochs, cost_func=cost_func, 
                           optimizer=optimizer, kernel_size=kernel_size, 
                           pool_size=pool_size, pooling=pooling)
model, acc, f1, loss = results
    
res_df = pd.DataFrame(columns=['batch_size', 'learning_coef', 'epochs', 
                               'cost_func', 'optimizer', 'kernel_size', 
                               'pool_size', 'pooling', 'acc', 'f1', 'loss', 'time'])
res_df = res_df.append({'batch_size': batch_size, 'learning_coef': learning_coef,
                        'epochs': epochs, 'cost_func': type(cost_func).__name__, 
                        'optimizer': optimizer.__name__, 
                        'kernel_size': kernel_size, 'pool_size': pool_size, 
                        'pooling': pooling.__name__, 
                        'acc': acc, 'f1': f1, 'loss': loss, 
                        'time': sum(model.epoch_time)}, 
                       ignore_index=True)

print('Time: {}'.format(sum(model.epoch_time)))

with open('results.csv', 'a') as f:
    res_df.to_csv(f, header=False)
    
now = datetime.datetime.now()
now = str(now).replace(':', '_').replace(' ', '_').replace('.', '_').replace('-', '_')
tf.saved_model.save(model, "model_{}_{}".format(now, acc))

Started fitting
train dataset:  <BatchDataset shapes: ((None, 2494), (None,)), types: (tf.int32, tf.int64)>
epoch:  0
x shape:  (100, 2494)
tf.Tensor(
[[   1   13  296 ...    0    0    0]
 [   1   13   69 ...    0    0    0]
 [   1   51  126 ...    0    0    0]
 ...
 [   1 1301   73 ...    0    0    0]
 [   1   14   20 ...    0    0    0]
 [   1   99   78 ...    0    0    0]], shape=(100, 2494), dtype=int32)
y shape:  (100,)
tf.Tensor(
[1 0 0 1 0 1 0 0 0 1 1 0 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0 0 0 1 0 0 1 1 0 1
 1 1 1 0 1 1 1 0 1 1 1 1 1 0 1 0 0 1 0 1 1 1 0 1 1 1 1 0 1 0 1 0 1 0 0 1 1
 0 1 0 0 1 1 0 0 0 1 1 0 1 0 0 0 0 0 1 1 0 1 1 1 0 0], shape=(100,), dtype=int64)


ResourceExhaustedError: OOM when allocating tensor with shape[2494,2000] and type int32 on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Cast] name: Cast/

In [7]:
# x_train_onehot = []
# for x in x_train[:200]:
#     x_train_onehot.append(tf.keras.utils.to_categorical(x, num_classes=2000))

In [6]:
# len(x_train_onehot)

In [105]:
tmp = tf.keras.preprocessing.sequence.pad_sequences(x_train, padding='post')
tmp = tf.keras.utils.to_categorical(tmp[:100], num_classes=2000)
tmp.shape

(100, 2494, 2000)

In [106]:
# tmp = np.array([[[1, 2, 3, 4],[5, 6, 7, 8]],
#                 [[9, 10, 11, 12],[13, 14, 15, 16]],
#                 [[17, 18, 19, 20],[21, 22, 23, 24]]])
tmp = np.array([[[1, 2, 3, 4]],
                [[9, 10, 11, 12]],
                [[17, 18, 19, 20]]])
tmp.shape

(3, 1, 4)

In [108]:
t1 = tmp[:, 0]
print(t1.shape)
t1

(3, 4)


array([[ 1,  2,  3,  4],
       [ 9, 10, 11, 12],
       [17, 18, 19, 20]])

In [109]:
# t2 = np.array([[[10, 20, 30, 40],[50, 60, 70, 80]],
#                 [[90, 100, 110, 120],[130, 140, 150, 160]],
#                 [[170, 180, 190, 200],[210, 220, 230, 240]]])
t2 = np.array([[[10, 20, 30, 40]],
                [[90, 100, 110, 120]],
                [[170, 180, 190, 200]]])

In [112]:
tmp2 = tf.transpose(tmp)
t3 = tf.transpose(t2)
tf.concat([tmp2, t3], 0)

<tf.Tensor: id=29189, shape=(8, 1, 3), dtype=int64, numpy=
array([[[  1,   9,  17]],

       [[  2,  10,  18]],

       [[  3,  11,  19]],

       [[  4,  12,  20]],

       [[ 10,  90, 170]],

       [[ 20, 100, 180]],

       [[ 30, 110, 190]],

       [[ 40, 120, 200]]])>

In [116]:
tmp = [[10, 20], [30, 40], [50, 60], [70, 80]]
tmp2 = [[1, 2, 3], [4, 5, 6]]
tf.matmul(tmp, tmp2)

<tf.Tensor: id=29197, shape=(4, 3), dtype=int32, numpy=
array([[ 90, 120, 150],
       [190, 260, 330],
       [290, 400, 510],
       [390, 540, 690]], dtype=int32)>