# Calculate accuracy, precision and recall

## Load data from pickle files

In [237]:
import keras
import data_functions
import numpy

In [238]:
#Load data from already created pickle file
words, X, dataX, y, n_words, n_vocab, index2word, word2index = data_functions.read_data_from_pickle("data.pickle")

In [239]:
X_train, X_test, y_train, y_test = data_functions.read_train_set_from_pickle("traintest_data.pickle")

print ('Training set:', X_train.shape, y_train.shape)
print ('Test set:', X_test.shape, y_test.shape)

KeyboardInterrupt: 

## Create models

In [None]:
#All
models ={'rnn' : {'model': keras.layers.SimpleRNN, 
                  'weights_file':'./hdf5/train_weights_rnn.hdf5', 
                  'log_file': './csv/results_rnn.csv'},
         'lstm': {'model': keras.layers.LSTM, 
                  'weights_file': './hdf5/train_weights_lstm.hdf5', 
                  'log_file': './csv/results_lstm.csv'},
         'gru' : {'model': keras.layers.GRU, 
                  'weights_file':'./hdf5/train_weights_gru.hdf5', 
                  'log_file': './csv/results_gru.csv'}
        }

In [None]:
# #Training only
# models ={'rnn' : {'model': keras.layers.SimpleRNN, 
#                   'weights_file':'./hdf5/weights_rnn.hdf5', 
#                   'log_file': './csv/results_rnn.csv'},
#          'lstm': {'model': keras.layers.LSTM, 
#                   'weights_file': './hdf5/weights_lstm.hdf5', 
#                   'log_file': './csv/results_lstm.csv'},
#          'gru' : {'model': keras.layers.GRU, 
#                   'weights_file':'./hdf5/weights_gru.hdf5', 
#                   'log_file': './csv/results_gru.csv'}
#         }

In [None]:
from model_design import modelRNN
from importlib import reload 
# reload(model_design)

mdl = 'rnn'
model = modelRNN(model_type=models[mdl]['model'])
model_rnn=model.create_model(hidden_layer=2000, input_shape=(X.shape[1], X.shape[2]), output_shape=y.shape[1])
model.load_weights(weights_file=models[mdl]['weights_file'])

mdl = 'lstm'
model = modelRNN(model_type=models[mdl]['model'])
model_lstm=model.create_model(hidden_layer=1000, input_shape=(X.shape[1], X.shape[2]), output_shape=y.shape[1])
model.load_weights(weights_file=models[mdl]['weights_file'])

mdl = 'gru'
model = modelRNN(model_type=models[mdl]['model'])
model_gru=model.create_model(hidden_layer=1000, input_shape=(X.shape[1], X.shape[2]), output_shape=y.shape[1])
model.load_weights(weights_file=models[mdl]['weights_file'])

## Functions to predict from models

In [None]:
#Function to predict next word(s) using model
def generate_from_model(model, pattern, nb=50, show_input=False):
    if show_input:
        print ("Seed:")
        print ("\"", ' '.join([index2word[i] for i in pattern]), "\"")
        print('--------------------')
    
    predictions = []
    for i in range(nb):
        x = numpy.reshape(pattern, (1, len(pattern), 1))
        x = x / float(n_vocab)
        prediction = model.predict(x, verbose=0)
        index = numpy.argmax(prediction)
        predictions.append(index)
        pattern.append(index)
        pattern = pattern[1:len(pattern)]
    return(predictions)

In [None]:
#Functions to convert indexes to text
import numpy as np

def get_global_index(item):
    for i in range(len(X)):
        if (item==X[i]).all():
            return(i)
            break

def true_text_indexes(start, X_set, y_set, nb):
    tindex = []
    if nb==1:
        tindex.append(np.argmax(y_set[start]))
    else:
        start = get_global_index(X_set[start])
        for i in range(nb):
            tindex.append(np.argmax(y[start+i]))
    return(tindex)

def convert_index2words(indexes):
    ttext = []
    for i in range(len(indexes)):
        ttext.append(index2word[indexes[i]])   
    return(ttext)

def true_text(start, X_set, y_set, nb):
    return(convert_index2words(true_text_indexes(start, X_set, y_set, nb)))

def p_accuracy(list1, list2):
    return((np.asarray(list1)==np.asarray(list2)).sum() / len(list1) )

# s = 45166
# print(true_text_indexes(s,5))
# print(convert_index2words(true_text_indexes(s,5)))
# print(true_text(s, 6))

In [None]:
import sys
#predict a "nb_samples" sequence of "n" words from each model
def predict_sequence(X_set, y_set, nb_samples, n):
    t_text, p_lstm, p_gru, p_rnn = [], [], [], []

    for i in range(nb_samples):
        sys.stdout.write('*')
        start = i_samples[i]
        pattern = X_set[start]
        pattern =[np.int(i[0]*float(n_vocab)) for i in pattern]  #Calculates indexes in pattern
#         print(' '.join(convert_index2words(pattern)))
        
        #append correct prediction
        if n==1:
            t_text.append(true_text(start, X_set, y_set, n)[0]) 
        else:
            t_text.append(true_text(start, X_set, y_set, n)) 
#         print(true_text(start, y_set, n)[0])

        #append lstm prediction
        r = generate_from_model(model_lstm, pattern.copy(), n, show_input=False)
        if n==1:
            p_lstm.append(convert_index2words(r)[0])
        else:
            p_lstm.append(convert_index2words(r))

        #append gru prediction
        r = generate_from_model(model_gru, pattern.copy(), n, show_input=False)
        if n==1:
            p_gru.append(convert_index2words(r)[0]) 
        else:
            p_gru.append(convert_index2words(r)) 

        #append rnn prediction
        r = generate_from_model(model_rnn, pattern.copy(), n, show_input=False) 
        if n==1:
            p_rnn.append(convert_index2words(r)[0])  
        else:
            p_rnn.append(convert_index2words(r)) 
        
    return(t_text, p_rnn, p_lstm, p_gru)


## Assessement

In [None]:
# pick a random seed
X_set = X_train
y_set = y_train

nb_samples = 1000
i_samples = [np.random.randint(2, len(X_set)-1) for p in range(0, nb_samples)]
n = 1

t_text, p_rnn, p_lstm, p_gru = predict_sequence(X_set, y_set, nb_samples, n)    

print('\n Accuracy with training data')
# print("Correct", t_text)
print("rnn: accuracy = ", p_accuracy(t_text,p_rnn))
print("lstm: accuracy = ", p_accuracy(t_text,p_lstm))
print("gru: accuracy = ", p_accuracy(t_text,p_gru))

In [None]:
# pick a random seed
X_set = X_test
y_set = y_test

nb_samples = len(y_test)
i_samples = [np.random.randint(2, len(X_set)-1) for p in range(0, nb_samples)]
n = 1

t_text, p_rnn, p_lstm, p_gru = predict_sequence(X_set, y_set, nb_samples, n)    

print('\n Accuracy with test data')
# print("Correct", t_text)
print("rnn: accuracy = ", p_accuracy(t_text,p_rnn))
print("lstm: accuracy = ", p_accuracy(t_text,p_lstm))
print("gru: accuracy = ", p_accuracy(t_text,p_gru))

## BLEU scores calculation

In [None]:
from nltk.translate.bleu_score import corpus_bleu
def calc_bleu(ref, hyp):
    BLEU1 = corpus_bleu(ref, hyp, weights=(1.0, 0, 0, 0))
    BLEU2 = corpus_bleu(ref, hyp, weights=(0.5, 0.5, 0, 0))
    BLEU3 = corpus_bleu(ref, hyp, weights=(0.3, 0.3, 0.3, 0))
    return(BLEU1,BLEU2,BLEU3)

In [None]:
len(y_test)

In [None]:
# pick a random seed
X_set = X_train
y_set = y_train

nb_samples = 1000
i_samples = [np.random.randint(2, len(X_set)-1) for p in range(0, nb_samples)]
n = 10

t_text, p_rnn, p_lstm, p_gru = predict_sequence(X_set, y_set, nb_samples, n)    

print('\n BLEU score with training data')
# print("Correct\n", t_text)
# print("rnn\n ",p_rnn)
# print("lstm\n", p_lstm)
# print("gru\n", p_gru)

ref = np.array(t_text).reshape(nb_samples,1,-1)
print('lstm')
print(calc_bleu(ref, p_lstm))
print('gru')
print(calc_bleu(ref, p_gru))
print('rnn')
print(calc_bleu(ref, p_rnn))

In [None]:
# pick a random seed
X_set = X_test
y_set = y_test

nb_samples = len(y_test)
i_samples = [np.random.randint(2, len(X_set)-1) for p in range(0, nb_samples)]
n = 10

t_text, p_rnn, p_lstm, p_gru = predict_sequence(X_set, y_set, nb_samples, n)    

print('\n BLEU score with test data')
print("Correct\n", t_text)
# print("rnn\n ",p_rnn)
print("lstm\n", p_lstm)
# print("gru\n", p_gru)

ref = np.array(t_text).reshape(nb_samples,1,-1)
print('lstm')
print(calc_bleu(ref, p_lstm))
print('gru')
print(calc_bleu(ref, p_gru))
print('rnn')
print(calc_bleu(ref, p_rnn))

In [None]:
for i in range(len(p_lstm)):
    print(ref[i][0])
    print(p_lstm[i])
    print()