In [None]:
#Install bert package for tensorflow v1
!pip install bert-tensorflow==1.0.1
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization

from datetime import datetime
import keras
from keras import layers
from keras.callbacks import ReduceLROnPlateau
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from tqdm.notebook import tqdm #adds progress bars to show loop status
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

class LSTM(object):

    """This class consists of LSTM model definition and functions for training, evaluting and getting predictions from LSTM model. 
    While train, evaluate and predict funtions can be called independently, a general order of execution from top to bottom is -
    predict -> evaluate -> train -> buildModel(internally called by train function)
    """

    def __init__(self, config):
        self.config = config

    def buildModel(self):
        text_input = layers.Input(shape=(None,768,), dtype='float32', name='text')
        l_mask = layers.Masking(mask_value=-99.)(text_input)
        encoded_text = layers.LSTM(self.config.model.lstm_units,)(l_mask)
        dropout = layers.Dropout(self.config.model.lstm_dropout)(encoded_text)
        out_dense = layers.Dense(self.config.model.dense_units, activation=self.config.model.dense_activation)(dropout)
        # And we add a softmax classifier on top
        out = layers.Dense(len(self.config.labelList), activation=self.config.model.output_activation)(out_dense)
        # At model instantiation, we specify the input and the output:
        model = keras.Model(inputs = text_input, outputs = out)
        model.compile(optimizer=self.config.model.optimizer,
              loss=self.config.model.loss,
              metrics=self.config.model.metrics)
        return model

    def train(self, train_generator, val_generator):
        call_reduce = ReduceLROnPlateau(monitor='val_acc', factor=0.98, patience=2, verbose=2,
                                mode='auto', min_delta=0.01, cooldown=0, min_lr=0)
        self.model = self.buildModel()
        self.model.fit_generator(train_generator, steps_per_epoch=self.config.training.batches_per_epoch_train, epochs=self.config.model.num_epochs,
                    validation_data=val_generator, validation_steps=self.config.training.batches_per_epoch_val, callbacks =[call_reduce] )
        
    def evaluate(self, test_generator, df_test):
        num_sequences_val = len(df_test['emb'].to_list())
        batch_size = self.config.training.batch_size_test
        batches_per_epoch = self.config.training.batches_per_epoch_test
        assert batch_size * batches_per_epoch == num_sequences_val
        num_features= 768

        #returns a list with loss and accuracy values
        return self.model.evaluate_generator(test_generator, steps= batches_per_epoch)

    def predict(self, generator):
        batches_per_epoch = self.config.training.batches_per_epoch_test

        #returns predicted probabilities
        return self.model.predict(generator, steps = batches_per_epoch)