### Import packages

In [None]:
from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda, Embedding, Flatten
from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda, Embedding, Flatten
from keras.optimizers import RMSprop,Adagrad,Adadelta,Adam,Adamax,Nadam
from keras.utils import to_categorical
from keras.models import load_model, Model, Sequential
import keras.backend as K
import numpy as np
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
from keras.preprocessing import sequence
from keras import layers
from keras.datasets import imdb
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import os
from sklearn.model_selection import StratifiedKFold
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import make_scorer,f1_score,recall_score,fbeta_score,precision_recall_fscore_support
import tensorflow as tf
import random
from lr_finder import LRFinder

#### Auxiliary functions

In [None]:
def step_decay_schedule(initial_lr=1e-4, decay_factor=0.75, step_size=10):
        '''
        Wrapper function to create a LearningRateScheduler with step decay schedule.
        '''
        def schedule(epoch):
            return initial_lr * (decay_factor ** np.floor(epoch/step_size))
        
        return LearningRateScheduler(schedule)

def focal_loss(gamma=2., alpha=.25): #alternative loss function to try
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
    return focal_loss_fixed

### Fetching the data

In [None]:
print("LSTM/GRU search")
#Processing the labels of the raw IMDB data
imdb_dir = ''
train_dir = os.path.join(imdb_dir, 'X_REP_RAW')
labels = []
texts = []
for label_type in ['neg', 'pos']:
    dir_name = os.path.join(train_dir, label_type)
    for fname in os.listdir(dir_name):
        if fname[-4:] == '.txt':
                f = open(os.path.join(dir_name, fname), encoding='utf-8',errors='ignore')
                texts.append(f.read())
                f.close()
                if label_type == 'neg':
                    labels.append(0)
                else:
                    labels.append(1)

### Parameter settings

In [None]:
total_samples = len(labels)
maxlen = 1000 #cutoff reviews, put 2000
training_samples = int(0.8 * total_samples )
test_samples = total_samples - training_samples
max_words = 10000 #29107 is the total number of words, 27610 is w2v vocabulary dimension, put at least 10000 
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

### Tokenization

In [None]:
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))
data = pad_sequences(sequences, maxlen=maxlen) #Pads sequences to the same length.
labels = np.asarray(labels)
print('Shape of data tensor:', data.shape)
print('Shape of label tensor:', labels.shape)
indices = np.arange(data.shape[0])
np.random.shuffle(indices)
data = data[indices]
labels = labels[indices]
X = data[:training_samples]
Y = labels[:training_samples]
x_test = data[training_samples: training_samples + test_samples]
y_test = labels[training_samples: training_samples + test_samples]

### Word embeddings

In [None]:
#Parsing the GloVe word-embeddings file
w2v_dir = ''
embeddings_index = {}
f = open(os.path.join(w2v_dir, 'w2v_reports_256.vec'), encoding='utf-8',errors='ignore') #even 300
dummy = f.readline() #to skip the first line that tells: n.of words n.of features
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()
print('Found %s word vectors.' % len(embeddings_index))

#OSS: THIS CAN BE REMOVED IF I DON'T WANT PRETRAINED WORD EMBEDDINGS####
#Preparing the GloVe word-embeddings matrix
embedding_dim = 256 #number of features, increaseable to 300
embedding_matrix = np.zeros((max_words, embedding_dim))
for word, i in word_index.items():
    if i < max_words:
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
            


### Model definition

In [None]:
def create_model(optimizer,
                 kernel_initializer,
                     bias_initializer,
                      kernel_regularizer,
                      recurrent_regularizer,
                      bias_regularizer,
                      activity_regularizer,
                      kernel_constraint,
                      recurrent_constraint,
                      bias_constraint,
                      dropout,
                      recurrent_dropout,loss):
    model = Sequential()
    model.add(Embedding(max_words, embedding_dim, input_length=maxlen))
    for i in range(len(layer_list)):
        if rnntype=='LSTM':
            model.add(layers.Bidirectional(layers.LSTM(layer_list[i], 
                                                       #activation = activation,
                     # recurrent_activation = recurrent_activation,
                      kernel_initializer = kernel_initializer,
                     # recurrent_initializer =  recurrent_initializer,
                      bias_initializer = bias_initializer,
                      kernel_regularizer = kernel_regularizer,
                      recurrent_regularizer = recurrent_regularizer,
                      bias_regularizer = bias_regularizer,
                      activity_regularizer = activity_regularizer,
                      kernel_constraint = kernel_constraint,
                      recurrent_constraint = recurrent_constraint,
                      bias_constraint = bias_constraint,
                      dropout = dropout,
                      recurrent_dropout = recurrent_dropout,
                                                       return_sequences = True)) )
        else:
            model.add(layers.Bidirectional(layers.GRU(layer_list[i],
                     #                                 activation = activation,
                      #recurrent_activation = recurrent_activation,
                      kernel_initializer = kernel_initializer,
                     # recurrent_initializer =  recurrent_initializer,
                      bias_initializer = bias_initializer,
                      kernel_regularizer = kernel_regularizer,
                      recurrent_regularizer = recurrent_regularizer,
                      bias_regularizer = bias_regularizer,
                      activity_regularizer = activity_regularizer,
                      kernel_constraint = kernel_constraint,
                      recurrent_constraint = recurrent_constraint,
                      bias_constraint = bias_constraint,
                      dropout = dropout,
                      recurrent_dropout = recurrent_dropout,
                                                      return_sequences = True)) )

    if rnntype=='LSTM':
        model.add(layers.Bidirectional(layers.LSTM(layer_list[-1], 
                                                #   activation = activation,
                     # recurrent_activation = recurrent_activation,
                      kernel_initializer = kernel_initializer,
                     # recurrent_initializer =  recurrent_initializer,
                      bias_initializer = bias_initializer,
                      kernel_regularizer = kernel_regularizer,
                      recurrent_regularizer = recurrent_regularizer,
                      bias_regularizer = bias_regularizer,
                      activity_regularizer = activity_regularizer,
                      kernel_constraint = kernel_constraint,
                      recurrent_constraint = recurrent_constraint,
                      bias_constraint = bias_constraint,
                      dropout = dropout,
                      recurrent_dropout = recurrent_dropout,)) )
    else:
        model.add(layers.Bidirectional(layers.GRU(layer_list[-1],
                                  #  activation = activation,
                      #recurrent_activation = recurrent_activation,
                      kernel_initializer = kernel_initializer,
                     # recurrent_initializer =  recurrent_initializer,
                      bias_initializer = bias_initializer,
                      kernel_regularizer = kernel_regularizer,
                      recurrent_regularizer = recurrent_regularizer,
                      bias_regularizer = bias_regularizer,
                      activity_regularizer = activity_regularizer,
                      kernel_constraint = kernel_constraint,
                      recurrent_constraint = recurrent_constraint,
                      bias_constraint = bias_constraint,
                      dropout = dropout,
                      recurrent_dropout = recurrent_dropout)) )
    model.add(layers.Dense(1, activation='sigmoid'))
    model.summary()
    
    #OSS: THIS CAN BE REMOVED IF I DON'T WANT PRETRAINED WORD EMBEDDINGS####
    #Loading pretrained word embeddings into the Embedding layer
    model.layers[0].set_weights([embedding_matrix]) 
    model.layers[0].trainable = False
    
    model.compile(optimizer=optimizer,loss=loss)#,metrics=['acc',mean_pred])
    
    return model

### Single cross validation (i=1)

In [None]:
f=open('GRU-LSTM-gridoutput.txt','w')
for i in range(1):
    
    num_layers = np.random.randint(1,5)
    layers_dim = [16,32,64,128] #units
    layer_list = sorted(random.sample(layers_dim,num_layers), reverse=True)
    rnntype = random.sample(['LSTM','GRU'],1)[0]
    epoch_size = [50]# random.sample(list(range(6,16,2)),1)[0]
    batch_size = random.sample([16,32,64,128],1)[0]
    
    #GRU params
    #activation = ['hard_sigmoid','softmax','elu','selu','softplus','softsign','relu',
    #                      'tanh','sigmoid','exponential','linear','PReLU','LeakyReLu']
    #recurrent_activation=['hard_sigmoid','softmax','elu','selu','softplus','softsign','relu',
    #                      'tanh','sigmoid','exponential','linear','PReLU','LeakyReLu']
    kernel_initializer=['glorot_normal','glorot_uniform','TruncatedNormal','VarianceScaling'] #cause it's a tanh
    #                    'zeros','ones','constant','RandomNormal','RandomUniform',
    #                  'TruncatedNormal','VarianceScaling','orthogonal','identity',
    #
    #                  'he_uniform','he_normal']
    #recurrent_initializer=['zeros',
    #                       'ones','constant','RandomNormal','RandomUniform',
    #                  'TruncatedNormal','VarianceScaling','orthogonal','identity',
    #                  'lecun_uniform','lecun_normal','glorot_uniform','glorot_normal',
    #                  'he_uniform','he_normal']
    bias_initializer=['zeros','ones','glorot_normal','he_normal']
    #                   'ones','constant','RandomNormal','RandomUniform',
    #                  'TruncatedNormal','VarianceScaling','orthogonal','identity',
    #                  'lecun_uniform','lecun_normal','glorot_uniform','glorot_normal',
    #                  'he_uniform','he_normal']
    kernel_regularizer=[None, 'l1','l2','l1_l2']
    recurrent_regularizer=[None, 'l1','l2','l1_l2']
    bias_regularizer=[None, 'l1','l2','l1_l2']
    activity_regularizer=[None, 'l1','l2','l1_l2']
    kernel_constraint=[None, 'MaxNorm']#'MinMaxNorm','NonNeg','UnitNorm',
    recurrent_constraint=[None, 'MaxNorm']#'MinMaxNorm','NonNeg','UnitNorm',
    bias_constraint=[None, 'MaxNorm']#'MinMaxNorm','NonNeg','UnitNorm',
    dropout=[0.0, 0.2,0.3,0.4,0.5]
    recurrent_dropout=[0.0, 0.2,0.3,0.4,0.5]
    
    optimizer = ['Adadelta','Adam','Adamax','Nadam']
    loss = ['binary_crossentropy']#,[focal_loss()]]
    
    
    param_grid = dict(optimizer = optimizer,
                      #activation = activation,
                      #recurrent_activation = recurrent_activation,
                      kernel_initializer = kernel_initializer,
                      #recurrent_initializer =  recurrent_initializer,
                      bias_initializer = bias_initializer,
                      kernel_regularizer = kernel_regularizer,
                      recurrent_regularizer = recurrent_regularizer,
                      bias_regularizer = bias_regularizer,
                      activity_regularizer = activity_regularizer,
                      kernel_constraint = kernel_constraint,
                      recurrent_constraint = recurrent_constraint,
                      bias_constraint = bias_constraint,
                      dropout = dropout,
                      recurrent_dropout = recurrent_dropout,
                      loss = loss
                      )
    
    scoring = {'acc':make_scorer(accuracy_score),'f1': make_scorer(f1_score),'f2': make_scorer(fbeta_score, beta=2),
               'rec': make_scorer(recall_score)} 
    
    model = KerasClassifier(build_fn=create_model,verbose=0)
    
    grid = RandomizedSearchCV(cv=2, #4
                              n_iter=2, #10
                              estimator=model, 
                              param_distributions=param_grid,
                              n_jobs=-1, 
                              scoring=scoring, 
                              refit='acc', #or f1, f2
                              #random_state = 42
                             )
    
    """#fix
    lr_finder = LRFinder(min_lr=1e-5, 
                                 max_lr=1e-2, 
                                 steps_per_epoch=np.ceil(epoch_size/batch_size), 
                                 epochs=3)
    """                              
    grid_result = grid.fit(X, Y)#, callbacks=[lr_finder])
    
    """    
    lr_finder.plot_loss('lr_loss.png')
    lr_finder.plot_lr('lr.png')
    """
    
    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    
    #mean_acc = grid_result.cv_results_['mean_test_acc']
    mean_f1 = grid_result.cv_results_['mean_test_f1']
    mean_f2 = grid_result.cv_results_['mean_test_f2']
    mean_rec = grid_result.cv_results_['mean_test_rec']
    params = grid_result.cv_results_['params']
    #look into grid_result_v: rank_test_rec, rank_test_f2 give a ranking of the models for both parameters++
    
    f.write('rnn type: %s\n'%rnntype)
    for item in layer_list:
        f.write("%s\n" % item)
    f.write('epochs: %d\n'%epoch_size)
    f.write('batch_dim: %d\n'%batch_size)
    for mean1, mean2, mean3, param in zip(mean_f1, mean_f2, mean_rec, params):
        f.write("f1 %f f2 %f rec %f with: %r" % (mean1, mean2, mean3, param))
    f.write('---------------------------------------------\n')

### Display results

In [None]:
res = pd.DataFrame(grid.cv_results_)
res.to_csv('GRU_params.csv')

#Test here
y_pred = grid.best_estimator_.predict(x_test)
f.write("The final accuracy is: ")
somme = 0
for i in range(len(y_test)):
    if y_test[i]==y_pred[i]:
        somme+=1
print(somme,len(y_test))
avg = somme/len(y_test)
f.write("%f"%avg)
f.close()

print('y_pred:',y_pred)
print('y_test',y_test)