In [1]:
%tensorflow_version 1.x

import pickle as pkl
import numpy as np
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
from tensorflow.python.keras.models import load_model
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from nltk.translate.gleu_score import corpus_gleu

import tensorflow.python.keras.backend as K

TensorFlow 1.x selected.


In [2]:
# Imports for colab
from google.colab import drive
drive.mount('/content/gdrive')

dirname = '/content/gdrive/My Drive/Colab Notebooks/'

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# Evaluation

## Load clean datasets and tokenizers

In [3]:
def load_pickle(filename):
    return pkl.load(open(filename, 'rb'))

## Compute max length of sentence

In [4]:
def max_length(lines):
    return max(len(line.split()) for line in lines)

## Encode and pad sequences

In [5]:
def encode_sequences(tokenizer, length, lines):
    # Encode sequences
    X = tokenizer.texts_to_sequences(lines)

    # Pad sequences with zeros
    X = pad_sequences(X, maxlen=length, padding='post')

    return X

## Generate a translation from a sequence
Algorithm predicts a one-hot encodings of words in a batch of sentences, which can be converted to sequences of numbers of words in a dictionary, which eventually will be converted to sequences of words

In [6]:
def batch_generator(sources, batch_size):
    for i in range(0, sources.shape[0], batch_size):
        yield sources[i:i + batch_size, :]

def predict_texts(model, tokenizer, sources, batch_size=4096):
    integers = np.empty((sources.shape[0], model.layers[-1].output_shape[1]))
    if batch_size==None:
        predictions = model.predict(sources, verbose=0, use_multiprocessing=True)
        integers = np.argmax(predictions, axis=2)
    else:
        for idx, batch in enumerate(batch_generator(sources, batch_size)):
            actual_batch_len = batch.shape[0]
            predictions = model.predict(batch, verbose=0, use_multiprocessing=True)
            integers[idx*batch_size:idx*batch_size + actual_batch_len] = np.argmax(predictions, axis=2)
    target = tokenizer.sequences_to_texts(integers)
    return target

## Evaluate the model

In [7]:
def evaluate_model(model, tokenizer, sources, raw_dataset, batch_size=4096):
    actual = [raw_target.split() for raw_target in raw_dataset[:, 1]]
    predicted = predict_texts(model, tokenizer, sources, batch_size)
    print('Predictions are done. Calculating the scores now')
    predicted = [text.split() for text in predicted]
    for i in range(10):
        print('src=[%s], target=[%s], predicted=[%s]' % (raw_dataset[i, 0], raw_dataset[i, 1], ' '.join(predicted[i])))
    # Calculate BLEU score. n-gram (n is up to 4) count weights are 0.25
    smoothing = SmoothingFunction()
    print('BLEU-4: %f' % corpus_bleu(actual, predicted, smoothing_function=smoothing.method4))
    # Calculate GLEU score. Maximum n-gram length count - 4
    print('GLEU-4: %f' % corpus_gleu(actual, predicted))

## Evaluation

In [8]:
# Load datasets
dataset = load_pickle(dirname + 'english-russian-100k-both.pkl')
train = load_pickle(dirname + 'english-russian-100k-train.pkl')
test = load_pickle(dirname + 'english-russian-100k-test.pkl')

In [9]:
# Load english tokenizer
eng_tokenizer = load_pickle(dirname + 'en_tokenizer_100k')
eng_vocab_size = len(eng_tokenizer.word_index) + 1 #+1 because of <EOS> token
eng_length = max_length(dataset[:, 0])
print('English vocabulary size: %d' % eng_vocab_size)
print('English max sentence length: %d' % eng_length)

English vocabulary size: 7313
English max sentence length: 7


In [10]:
# Load russian tokenizer
rus_tokenizer = load_pickle(dirname + 'ru_tokenizer_100k')
rus_vocab_size = len(rus_tokenizer.word_index) + 1
rus_length = max_length(dataset[:, 1])
print('Russian vocabulary size: %d' % rus_vocab_size)
print('Russian max sentence length: %d' % rus_length)

Russian vocabulary size: 20884
Russian max sentence length: 11


In [11]:
#Encode sequences
X_train = encode_sequences(eng_tokenizer, eng_length, train[:, 0])
X_test = encode_sequences(eng_tokenizer, eng_length, test[:, 0])

In [12]:
# Load model
K.clear_session()
model = load_model(dirname + 'model_basic_100k_100e.h5')

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [13]:
# Evaluation on training sequences
print('Train:')
evaluate_model(model, rus_tokenizer, X_train, train)

Train:
Predictions are done. Calculating the scores now
src=[i buried it], target=[я её закопал], predicted=[я её закопал]
src=[go and wake up mary], target=[пойди разбуди мэри], predicted=[пойди разбудите мэри]
src=[she did a good job], target=[она проделала хорошую работу], predicted=[она проделала хорошую работу]
src=[i work at a zoo], target=[я работаю в зоопарке], predicted=[я работаю в зоопарке]
src=[i want them], target=[я хочу их], predicted=[я хочу хочу]
src=[ive been thinking], target=[я размышляю], predicted=[я думал]
src=[we talked about boys], target=[мы говорили о мальчиках], predicted=[мы говорили о мальчиках]
src=[is that blood], target=[это кровь], predicted=[это кровь]
src=[dont be so selfish], target=[не будь такой эгоисткой], predicted=[не будь таким эгоистками]
src=[people are stupid], target=[люди глупы], predicted=[люди глупы]
BLEU-4: 0.010189
GLEU-4: 0.027326


In [14]:
# Evaluation on test sequences
print('Test:')
evaluate_model(model, rus_tokenizer, X_test, test)

Test:
Predictions are done. Calculating the scores now
src=[tom is very good], target=[том очень хороший], predicted=[том очень хороший]
src=[i was born in 1960], target=[я родился в 1960], predicted=[я родилась в 1982]
src=[do you work in boston], target=[вы работаете в бостоне], predicted=[ты работаешь в бостоне]
src=[look at this picture], target=[посмотрите на эту картинку], predicted=[посмотрите на эту фотографию]
src=[theres a problem], target=[есть проблема], predicted=[есть есть]
src=[tom is my neighbor], target=[том  мой сосед], predicted=[том мой сосед]
src=[wasnt he your friend], target=[он разве не был тебе другом], predicted=[он он не свой другом]
src=[i was playing here], target=[я здесь играл], predicted=[я играла здесь]
src=[its good to dream], target=[мечтать хорошо], predicted=[мечта]
src=[come by tomorrow], target=[заходи завтра], predicted=[завтра завтра завтра]
BLEU-4: 0.147634
GLEU-4: 0.025955
