In [None]:
from tensorflow.keras import Model, Sequential
from tensorflow.keras.layers import Bidirectional, Embedding, Dense, Input, LSTM, TextVectorization
from tensorflow.keras.models import load_model
import h5py
import matplotlib.pyplot as plt
import nltk
import numpy as np
import os
import pandas as pd
import random
import tensorflow as tf
import tensorflow_datasets as tfds

In [None]:
seed = 0
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [None]:
vocab_sz = 1000
seq_len = 100
embed_out_dim = 64

bidirectional = False
lstm_layer = 1
    
# One value: Applies to all LSTM layers
# List of values: Specific to each LSTM layer
lstm_out_dim = 64
lstm_bias = True
# Supported activations: tanh, sigmoid, relu, None
lstm_activation = 'tanh'
lstm_rec_activation = 'sigmoid'

fc_layer = 2
fc_out_dim = [64, 2]
fc_activation = ['relu', 'softmax']
fc_bias = True

optimiser = 'adam'
loss = 'sparse_categorical_crossentropy'
metrics = ['acc']

batch_sz = 64
num_epoch = 10

In [None]:
def lstm_model():
    model = Sequential([Input(seq_len),
                        Embedding(vocab_sz, embed_out_dim)])
    
    for index in range(lstm_layer):
        lstm = LSTM(lstm_out_dim[index] if type(lstm_out_dim) == list else lstm_out_dim,
                    lstm_activation[index] if type(lstm_activation) == list else lstm_activation,
                    lstm_rec_activation[index] if type(lstm_rec_activation) == list else lstm_rec_activation,
                    lstm_bias[index] if type(lstm_bias) == list else lstm_bias,
                    return_sequences = index != lstm_layer - 1)
        
        model.add(Bidirectional(lstm) if bidirectional else lstm)

    for index in range(fc_layer):
        model.add(Dense(fc_out_dim[index] if type(fc_out_dim) == list else fc_out_dim,
                        fc_activation[index] if type(fc_activation) == list else fc_activation,
                        fc_bias[index] if type(fc_bias) == list else fc_bias))

    model.compile(optimiser, loss, metrics)
    return model

In [None]:
if not os.path.exists('train_data.txt') or not os.path.exists('train_label.txt') or not os.path.exists('test_data.txt') or not os.path.exists('test_label.txt'):
    dataset = tfds.load('imdb_reviews', as_supervised = True)
    train_data = dataset['train'].shuffle(buffer_sz)
    test_data = dataset['test'].shuffle(buffer_sz)
    
    encoder = TextVectorization(vocab_sz, output_sequence_length = seq_len)
    encoder.adapt(train_data.map(lambda text, label: text))
    
    data_file = open('train_data.txt', 'w')
    label_file = open('train_label.txt', 'w')

    for vector, label in train_data:
        data_file.write(str(encoder(vector).numpy().tolist()) + '\n')
        label_file.write(str(label.numpy()) + '\n')

    data_file.close()
    label_file.close()
    
    data_file = open('test_data.txt', 'w')
    label_file = open('test_label.txt', 'w')

    for vector, label in test_data:
        data_file.write(str(encoder(vector).numpy().tolist()) + '\n')
        label_file.write(str(label.numpy()) + '\n')

    data_file.close()
    label_file.close()

In [None]:
data_file = open('train_data.txt', 'r')
label_file = open('train_label.txt', 'r')

vectors = data_file.readlines()
labels = label_file.readlines()

train_data = np.zeros((len(vectors), seq_len), np.int64)
train_label = np.zeros(len(labels), np.int64)

for index, vector in enumerate(vectors):
    train_data[index] = eval(vector)
    train_label[index] = labels[index]

data_file.close()
label_file.close()

data_file = open('test_data.txt', 'r')
label_file = open('test_label.txt', 'r')

vectors = data_file.readlines()
labels = label_file.readlines()

test_data = np.zeros((len(vectors), seq_len), np.int64)
test_label = np.zeros(len(labels), np.int64)

for index, vector in enumerate(vectors):
    test_data[index] = eval(vector)
    test_label[index] = labels[index]

data_file.close()
label_file.close()

In [None]:
if not os.path.exists('lstm.h5'):
    model = lstm_model()
else:
    model = load_model('lstm.h5')
    
model.summary()

In [None]:
if not os.path.exists('lstm.h5'):
    history = model.fit(train_data, train_label, batch_sz, num_epoch, validation_split = 0.3)
    model.save('lstm.h5')

In [None]:
train_loss, train_acc = model.evaluate(train_data, train_label)

In [None]:
test_loss, test_acc = model.evaluate(test_data, test_label)