In [1]:
import numpy as np
import pandas as pd
import copy
import os
from asl_data import AslDb
from asl_utils import show_errors
import datetime

In [2]:
def pred_accuracy(md, X, Y):
    """
    Using the model to make prediction with accuracy as return
    """
    
    pred = np.argmax(md.predict(X), axis=1)
    truth = np.argmax(Y, axis=1)
    accuracy = np.mean(pred == truth)
    
    return accuracy

In [3]:
def retrieveDataFeatures():
    
    asl = AslDb()
    
    # calculate the coordinates of hands w.r.t the coordinates of nose 
    asl.df['grnd-ry'] = asl.df['right-y'] - asl.df['nose-y']
    asl.df['grnd-rx'] = asl.df['right-x'] - asl.df['nose-x']
    asl.df['grnd-ly'] = asl.df['left-y'] - asl.df['nose-y']
    asl.df['grnd-lx'] = asl.df['left-x'] - asl.df['nose-x']

    # save relative coordinates as features
    features_ground = ['grnd-rx','grnd-ry','grnd-lx','grnd-ly']

    # calculate the polar coordinates from Cartesian coordinates
    asl.df['polar-lr'] = np.sqrt(np.square(asl.df['grnd-ly']) + np.square(asl.df['grnd-lx']))
    asl.df['polar-ltheta'] = np.arctan2(asl.df['grnd-lx'], asl.df['grnd-ly'])
    asl.df['polar-rr'] = np.sqrt(np.square(asl.df['grnd-ry']) + np.square(asl.df['grnd-rx']))
    asl.df['polar-rtheta'] = np.arctan2(asl.df['grnd-rx'], asl.df['grnd-ry'])

    features_polar = ['polar-rr', 'polar-rtheta','polar-lr', 'polar-ltheta']

    train_rawdata = asl.build_data(features_polar, csvfile=os.path.join('data', 'train_words.csv'))  
    test_rawdata = asl.build_data(features_polar, csvfile=os.path.join('data', 'test_words.csv'))  
    
    return train_rawdata, test_rawdata, asl

In [4]:
#Part I Extract Features from data
train_dataset, test_dataset, asl = retrieveDataFeatures() #asl.df is a pandas dataframe object

In [5]:
def createVocabDict(completeWordsList):
    """
        completeWordsList: LIST format of unique wrords including special characters needed to create
        special_words = ['<SOS>','<EOS>','<UKN>'] for training by sentence (LM)
        
        return:
        vocabList: a list of unique vocab including special characters
        index2vocab: dict: index: vocab look up
        vocab2index: dict: vocab: index look up
    """
    # making sure '<PAD>' if of position index 0
    completeWordsList = list(reversed(completeWordsList))
    
    index2vocab = {indx:vals for indx, vals in enumerate(completeWordsList)}
    vocab2index = {vals: indx for indx, vals in index2vocab.items()}
        
    return list(completeWordsList), index2vocab, vocab2index

In [6]:
# create Master Words Dictionary and related index/vocab look up dict
print(len(train_dataset.wordlist)) #note that this is NOT a unique dictionary
print(len(test_dataset.wordlist)) #note that this is NOT a unique dictionary
print(len(set(test_dataset.wordlist))) #note that this is NOT a unique dictionary

completeWordsList = set(test_dataset.wordlist).union(set(train_dataset.wordlist))
masterWordList, index2vocab, vocab2index = createVocabDict(list(completeWordsList)+['<SOS>','<EOS>','<PAD>'])

print("Total number of words in master list: {}".format(len(masterWordList)))

710
178
66
Total number of words in master list: 116


In [7]:
vocab2index['<PAD>'], index2vocab[0], 

(0, '<PAD>')

In [8]:
def prepareData(dataset, vocabList, training_mode="byWord"):
    
    """
    parameters: 
        dataset: SingleData object
        training_mode: "byWord" -> a sequence of frame to softmax words classification
                       (number of words, max no of frames per word, no features per frame)
                       "bySentence" -> a sequence of frame to softmax sequence of words (Language Model)
                       (number of sentence, max no of word per sent * max no of frames per word, no features per frame)
    """
    
    #Prepare Word Dictionary for one hot
    maxFramesPerWord = 100 #max no of frames (4 features per frame) allowed for each word
    maxWordPerSentence = 10 #max no of words per sentence
    featuresPerFrame = 4 #no of features per frame    
    noOfClasses = len(vocabList) #no of classes for words classification
    
    if training_mode=="byWord":
        
        noOfClasses = len(vocabList) #no of classes for words classification
        
        # initialize zeros arrays for X_train and Y_train
        m = dataset.num_items # Use word level training (ie not using language model) ie 710

        X_array = np.zeros([m,maxFramesPerWord,featuresPerFrame])
        Y_array = np.zeros([m,noOfClasses], dtype=np.int32)

        # fill in the sequence values
        all_sequences = dataset.get_all_sequences()

        for keys, values in all_sequences.items():
            X_array[keys,:len(all_sequences[keys][0]),:] = np.array(all_sequences[keys])
            Y_array[keys,:] = (dataset.wordlist[keys] == np.array(vocabList))*1
    else:
        
        # initialize zeros arrays for X_train and Y_train
        m = dataset.num_sentences # Use SENTENCE level training ie 161

        Tx = maxWordPerSentence*maxFramesPerWord
        Ty = maxWordPerSentence

        X_array = np.zeros([m,Tx,featuresPerFrame])
        Y_array = np.zeros([m,Ty,noOfClasses], dtype=np.int32)
        Y_array = Y_array + vocab2index['<PAD>'] # change the padding values

        # fill in the sequence values
        sentence_count = 0
        for keys, values in dataset.sentences_index.items():
            word_index_seq = values

            next_ind = 0
            word_count = 0
            for wi in word_index_seq:
                this_seq = dataset.get_item_sequences(wi)[0]

                X_array[sentence_count,next_ind:next_ind+len(this_seq),:] = this_seq
                #print(next_ind, len(this_seq))
                next_ind =  next_ind + len(this_seq)

                Y_array[sentence_count,word_count+1,:] = (dataset.wordlist[wi] == np.array(vocabList))*1
                word_count+=1
            Y_array[sentence_count,0,:] = ('<SOS>' == np.array(vocabList))*1
            Y_array[sentence_count,word_count+1,:] = ('<EOS>' == np.array(vocabList))*1

            sentence_count += 1
    
    print("Shape of X_array: {}".format(X_array.shape))
    print("Shape of Y_array: {}".format(Y_array.shape))
    print("Total number of UNIQUE words from data set: {}".format(len(vocabList)))
    
    return X_array, Y_array

In [9]:
def prepareDecoderTarget(decoder_input):
    """Create the decoder target by rolling the decoder_input"""
    
    decoder_target = np.roll(decoder_input, -1, axis=1)
    decoder_target[:,-1,:] = 0
    assert(np.sum(decoder_target[:,0,:] - decoder_input[:,1,:]) == 0)
    assert(decoder_target.shape == decoder_input.shape)
    
    return decoder_target

In [153]:
def showWER(ds, m, m_input):
    
    sentence_count = 0
    guessList = []
    
    #do an index mapping between the sorted dict used in original code and the unsorted one used in my code
    sorted_index = [s[0] for s in sorted(ds.sentences_index.items())]
    original_index = [s[0] for s in ds.sentences_index.items()]

    pred_index = np.argmax(m.predict(m_input), axis=-1)
    
    for keys, values in sorted(ds.sentences_index.items()):

        truth = [ds.wordlist[w] for w in values]

        convert_index = original_index.index(sorted_index[sentence_count])
        pred_out = [index2vocab[t] for t in pred_index[convert_index,:]][:len(values)]

        guessList.extend(pred_out)
        sentence_count += 1 
    
    show_errors(guessList, ds, show_answer = True)
    
    return

### Training by Sentence ###
This time around we will be using a language model. Instead of recognizing chose frame features word by word, we will be recognizing the whole sentence by taking into considerations of probability

In [11]:
#Part II Prepare Training data
train_encoder_input, train_decoder_input = prepareData(train_dataset, masterWordList, training_mode="bySentence")
train_decoder_target = prepareDecoderTarget(train_decoder_input)

Shape of X_array: (161, 1000, 4)
Shape of Y_array: (161, 10, 116)
Total number of UNIQUE words from data set: 116


In [12]:
#Part II Prepare Test data
test_encoder_input, test_decoder_input = prepareData(test_dataset, masterWordList, training_mode="bySentence")
test_decoder_target = prepareDecoderTarget(test_decoder_input)

Shape of X_array: (40, 1000, 4)
Shape of Y_array: (40, 10, 116)
Total number of UNIQUE words from data set: 116


In [13]:
def seqIndex2Sentence(seq, i):
    sentence = ''
    for t in range(train_decoder_input.shape[1]):
        sentence = sentence + index2vocab[np.argmax(seq[i,t,:])] + ' '
    return sentence

In [14]:
seqIndex2Sentence(train_decoder_target, 10)

'JOHN CAN GO1 CAN <EOS> <PAD> <PAD> <PAD> <PAD> <PAD> '

In [17]:
np.random.seed(0)

# Part III prepare the model
import keras
from keras.callbacks import ModelCheckpoint
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, Activation, Dropout, Input, Masking, TimeDistributed, LSTM, Conv1D
from keras.layers import GRU, Bidirectional, BatchNormalization, Reshape, Concatenate, Embedding
from keras.optimizers import Adam
from keras import backend as K

In [37]:
m, Tx, noOfFeatures = train_encoder_input.shape
_, Ty, noOfClasses = train_decoder_input.shape
latent_dim = 64
model_filepath = 'models/model_seq2seq_lm9Oct2018.h5'
dropout = 0.2

In [38]:
### ENCODER ###
encoder_inputs = Input(shape=(None, noOfFeatures), name='encoder_inputs')
encoder = LSTM(latent_dim, return_state=True, name='encoder_lstm')
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
# Output is disregarded; these states will be used as the initial stated for the decoder
encoder_states = [state_h, state_c]

# define the encoder model separately
encoder_model = Model(encoder_inputs, encoder_states)

### DECODER ###
# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(None, noOfClasses), name='decoder_inputs')
# We set up our decoder to return full output sequences,
# and to return internal states as well. We don't use the 
# return states in the training model, but we will use them in inference.
decoder_lstm = LSTM(latent_dim, return_sequences=True, 
                    return_state=True, name='decoder_lstm')

decoder_outputs, _, _ = decoder_lstm(decoder_inputs, 
                                     initial_state=encoder_states)

decoder_dense = Dense(noOfClasses, activation='softmax', name='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

In [39]:
# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_inputs (InputLayer)     (None, None, 4)      0                                            
__________________________________________________________________________________________________
decoder_inputs (InputLayer)     (None, None, 116)    0                                            
__________________________________________________________________________________________________
encoder_lstm (LSTM)             [(None, 64), (None,  17664       encoder_inputs[0][0]             
__________________________________________________________________________________________________
decoder_lstm (LSTM)             [(None, None, 64), ( 46336       decoder_inputs[0][0]             
                                                                 encoder_lstm[0][1]               
          

In [140]:
#optimizer=keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, decay=1e-7)
optimizer=keras.optimizers.RMSprop(lr=0.00005, rho=0.9, decay=0.0)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [141]:
# define the checkpoint
checkpoint = ModelCheckpoint(model_filepath, monitor='loss', verbose=1, 
                             save_best_only=True, 
                             save_weights_only=False, 
                             mode='min')
callbacks_list = [checkpoint]

In [207]:
#model = load_model(model_filepath)
model.load_weights('models/model_seq2seq_lm_weights9Oct2018.h5')

In [93]:
batch_size = 32
epochs = 100

In [260]:
hist = model.fit([train_encoder_input, train_decoder_input], train_decoder_target,
                 batch_size=batch_size,
                 epochs=epochs,
                 callbacks=callbacks_list,
                 validation_data=[[test_encoder_input, test_decoder_input], test_decoder_target],
                 #validation_split=0.15,
                 verbose=2
                 )

Train on 161 samples, validate on 40 samples
Epoch 1/100
Epoch 00001: loss improved from 0.30846 to 0.30806, saving model to models/model_seq2seq_lm9Oct2018.h5
 - 6s - loss: 0.3081 - acc: 0.4540 - val_loss: 0.8146 - val_acc: 0.3850
Epoch 2/100


  str(node.arguments) + '. They will not be included '


Epoch 00002: loss did not improve
 - 6s - loss: 0.3107 - acc: 0.4509 - val_loss: 0.8248 - val_acc: 0.3875
Epoch 3/100
Epoch 00003: loss did not improve
 - 6s - loss: 0.3085 - acc: 0.4516 - val_loss: 0.8294 - val_acc: 0.3875
Epoch 4/100
Epoch 00004: loss did not improve
 - 6s - loss: 0.3094 - acc: 0.4522 - val_loss: 0.8212 - val_acc: 0.3875
Epoch 5/100
Epoch 00005: loss improved from 0.30806 to 0.30785, saving model to models/model_seq2seq_lm9Oct2018.h5
 - 6s - loss: 0.3079 - acc: 0.4528 - val_loss: 0.8248 - val_acc: 0.3850
Epoch 6/100
Epoch 00006: loss did not improve
 - 6s - loss: 0.3125 - acc: 0.4522 - val_loss: 0.8209 - val_acc: 0.3875
Epoch 7/100
Epoch 00007: loss did not improve
 - 6s - loss: 0.3110 - acc: 0.4522 - val_loss: 0.8156 - val_acc: 0.3875
Epoch 8/100
Epoch 00008: loss did not improve
 - 6s - loss: 0.3084 - acc: 0.4528 - val_loss: 0.8170 - val_acc: 0.3875
Epoch 9/100
Epoch 00009: loss did not improve
 - 6s - loss: 0.3093 - acc: 0.4522 - val_loss: 0.8207 - val_acc: 0.3850

Epoch 66/100
Epoch 00066: loss did not improve
 - 6s - loss: 0.3056 - acc: 0.4540 - val_loss: 0.8231 - val_acc: 0.3850
Epoch 67/100
Epoch 00067: loss did not improve
 - 7s - loss: 0.3056 - acc: 0.4540 - val_loss: 0.8224 - val_acc: 0.3875
Epoch 68/100
Epoch 00068: loss did not improve
 - 7s - loss: 0.3061 - acc: 0.4528 - val_loss: 0.8212 - val_acc: 0.3875
Epoch 69/100
Epoch 00069: loss did not improve
 - 7s - loss: 0.3070 - acc: 0.4540 - val_loss: 0.8221 - val_acc: 0.3850
Epoch 70/100
Epoch 00070: loss did not improve
 - 7s - loss: 0.3060 - acc: 0.4534 - val_loss: 0.8176 - val_acc: 0.3900
Epoch 71/100
Epoch 00071: loss did not improve
 - 6s - loss: 0.3064 - acc: 0.4547 - val_loss: 0.8180 - val_acc: 0.3900
Epoch 72/100
Epoch 00072: loss did not improve
 - 6s - loss: 0.3112 - acc: 0.4503 - val_loss: 0.8160 - val_acc: 0.3875
Epoch 73/100
Epoch 00073: loss did not improve
 - 6s - loss: 0.3075 - acc: 0.4516 - val_loss: 0.8094 - val_acc: 0.3925
Epoch 74/100
Epoch 00074: loss did not improve
 

In [143]:
model.save_weights('models/model_seq2seq_lm_weights9Oct2018.h5', overwrite=True)

In [144]:
print("Training accuracy: {}%".format(pred_accuracy(model, [train_encoder_input, train_decoder_input], 
                                                    train_decoder_target)*100.))
print("Test accuracy: {}%".format(pred_accuracy(model, [test_encoder_input, test_decoder_input],  
                                                test_decoder_target)*100.))

Training accuracy: 50.97451274362819%
Test accuracy: 47.11206896551724%


In [145]:
test_pred = np.argmax(model.predict([test_encoder_input, test_decoder_input]), axis=-1) 
test_truth = np.argmax(test_decoder_target, axis=-1)

print(([index2vocab[t] for t in test_pred[2,:]]))
print([index2vocab[t] for t in test_truth[2,:]])

['JOHN', 'SHOULD', 'NOT', 'BUY', 'HOUSE', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>']
['JOHN', 'FUTURE', 'NOT', 'BUY', 'HOUSE', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']


In [154]:
showWER(test_dataset, model, [test_encoder_input, test_decoder_input])


**** WER = 0.34269662921348315

Total correct: 117 out of 178
Video  Recognized                                                    Correct
  100: *JOHN NEW CAR BREAK-DOWN                                      POSS NEW CAR BREAK-DOWN
    2: JOHN *SHOULD HOMEWORK                                         JOHN WRITE HOMEWORK
   67: JOHN *SHOULD NOT BUY HOUSE                                    JOHN FUTURE NOT BUY HOUSE
    7: JOHN CAN *BUY CAN                                             JOHN CAN GO CAN
  201: JOHN *SHOULD *VISIT *<EOS> BUY HOUSE                          JOHN TELL MARY IX-1P BUY HOUSE
   74: JOHN *IX VISIT MARY                                           JOHN NOT VISIT MARY
  119: *JOHN *BUY1 *CAR CAR BLUE                                     SUE BUY IX CAR BLUE
   12: JOHN CAN *GO1 CAN                                             JOHN CAN GO CAN
   77: *JOHN BLAME MARY                                              ANN BLAME MARY
  142: JOHN BUY *WHAT WHAT BOOK                    

In [None]:
########## Inference Model ############

In [99]:
""" 
    Extract the encoder part as a separate model so that the interim states
    encoder_states can be extracted and manupulated before feeding into the decoder network
    input: encoder_inputs as defined earlier
    output: encoder_states
"""
encoder_model = Model(encoder_inputs, encoder_states)

### prepare the initial state inputs for the decoder ###
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

### Note that we need also to input the decoder_inputs
decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_inputs, initial_state=decoder_states_inputs)

decoder_states = [state_h, state_c] #again the states are extracted for futher loops

decoder_outputs = decoder_dense(decoder_outputs)

### inputs: states inputs(from encoder or previous steps of decoder)
### outputs: softmax prediction as well as states for the next time step
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

In [100]:
def decode_sequence(input_seq):
    beam_width = 3
    
    maxWordPerSentence = Ty
    
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, noOfClasses))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0, vocab2index['<SOS>']] = 1.

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_index_list = []
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + states_value)

        # Sample a token        
        sampled_word_index = np.argmax(output_tokens[0, 0, :])
        decoded_index_list.append(sampled_word_index)
        
        sampled_word = index2vocab[sampled_word_index]
        #decoded_sentence = decoded_sentence + " " + sampled_word

        # Exit condition: either hit max length
        # or find stop character.
        if (sampled_word == '<EOS>' or
           len(decoded_index_list) >= maxWordPerSentence):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, noOfClasses))
        target_seq[0, 0, sampled_word_index] = 1.

        # Update states
        states_value = [h, c]

    return decoded_index_list

In [101]:
class CandidateObj():
    # Object to hold the Seq Candidates Data
    def __init__(self, seq, state, logProb, normalizedProb, parentNode=None):
        self.seq = seq
        self.state = state
        self.logProb = logProb
        self.normalizedProb = normalizedProb
        self.parentNode = parentNode

In [105]:
def createDecoderOHTargetSeq(ind):
    #create one hot target sequence (Y) based on index (int) provided
    target_seq = np.zeros((1, 1, noOfClasses), dtype=np.int32)
    target_seq[0, 0, ind] = 1 
    return target_seq

In [208]:
def beam_search_sequence(input_seq, beam_width=3, printSentence=False):
    
    # Encode the input as state vectors. -> decoder
    encoded_state_conditions = encoder_model.predict(input_seq) #states: list of state_h and state_c
    
    de_target_seq = createDecoderOHTargetSeq(vocab2index['<SOS>']) #use <SOS> to generate next stete
    
    nextWordProb, next_h, next_c = decoder_model.predict([de_target_seq] + encoded_state_conditions)
    
    nextWordProb = np.squeeze(nextWordProb)
    
    topProbIndex = np.argsort(nextWordProb)[::-1]

    finalCandidateList = [] # for seq candidate which either reached <EOS> or Ty = 10
    
    beamList = [] # to hold current most promising top beam_width seq for further exploration

    # create the initial beam list 
    for b in range(beam_width):
        seq = copy.deepcopy([vocab2index['<SOS>']])
        seq.append(topProbIndex[b])
        logProb = np.log(nextWordProb[topProbIndex[b]])
        noramlizedProb = logProb/1.0
        c = CandidateObj(seq, [next_h, next_c], logProb, noramlizedProb)
        beamList.append(c)
    
    assert(len(beamList) == beam_width)
    
    for t in range(1,Ty):
        
        ###  need an updated beamList (remove all unwanted candidates) before entering this loop ###
        
        #update cycle ends at the end of each time step
        ram_candidateList = [] #temp list for storing all candidates sequence for later sorting
        
        for s in range(beam_width): 
            
            nextBeam = copy.deepcopy(beamList[s])
            nextInd = nextBeam.seq[-1]
            nextState = nextBeam.state
            de_target_seq = createDecoderOHTargetSeq(nextInd)
            
            nextWordProb, next_h, next_c = decoder_model.predict([de_target_seq] + nextState)
            
            nextWordProb = np.squeeze(nextWordProb)
            
            for ci in range(len(nextWordProb)):
                newSeq = copy.deepcopy(nextBeam.seq)
                newSeq.append(ci)
                newState = [next_h, next_c]
                newLogProb = nextBeam.logProb + np.log(nextWordProb[ci])
                newParent = nextBeam #reference to previous object just in case
                newNormalizedProb = newLogProb/(len(newSeq))
                
                new_c = CandidateObj(newSeq, newState, newLogProb, newNormalizedProb)
                ram_candidateList.append(new_c)
        
        # time the sort the ram_candidateList
        ram_candidateList = sorted(ram_candidateList, key=lambda x: x.normalizedProb, reverse=True)
            
        # time to update beamList and removed unwanted list
        beamList = [] # reinitialize beamList
        
        for i in range(len(ram_candidateList)):
            if len(beamList) < beam_width:
                nextTopSeq = copy.deepcopy(ram_candidateList[i])
                if nextTopSeq.seq[-1] == vocab2index['<EOS>']:
                    finalCandidateList.append(copy.deepcopy(nextTopSeq))
                else:
                    beamList.append(nextTopSeq)
    
    # final sort!
    finalCandidateList.extend(beamList)
    finalCandidateList.sort(key=lambda x: x.normalizedProb, reverse=True)
    
    return finalCandidateList

In [266]:
for s in [np.random.randint(len(test_encoder_input)) for t in range(5)]:
    print("Sample Index: {}".format(s))
    decoded_index_list = decode_sequence(test_encoder_input[s:s+1,:,:])
    finalCandidateList = beam_search_sequence(test_encoder_input[s:s+1,:,:], beam_width=3)

    test_truth = np.argmax(test_decoder_target[s,:,:], axis=-1)

    print(([index2vocab[t] for t in decoded_index_list]))
    print(([index2vocab[t] for t in finalCandidateList[0].seq if t != vocab2index['<SOS>']]))
    print([index2vocab[t] for t in test_truth], end='\n\n')

Sample Index: 3
['JOHN', 'CAN', 'BUY', 'HOUSE', '<EOS>']
['JOHN', 'CAN', 'BUY', 'HOUSE', '<EOS>']
['JOHN', 'CAN', 'GO', 'CAN', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']

Sample Index: 18
['JOHN', 'LIKE', 'IX', 'IX', 'IX', '<EOS>']
['JOHN', 'LIKE', 'IX', 'IX', 'IX', '<EOS>']
['JOHN', 'LIKE', 'IX', 'IX', 'IX', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']

Sample Index: 15
['JOHN', 'LEAVE', 'IX', '<EOS>']
['JOHN', 'WILL', 'VISIT', 'MARY', '<EOS>']
['JOHN', 'WILL', 'VISIT', 'MARY', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']

Sample Index: 21
['JOHN', 'BUY', 'CAR', 'FUTURE', 'NOT', '<EOS>']
['JOHN', 'LIKE', 'IX', 'IX', 'IX', '<EOS>']
['JOHN', 'BUY', 'WHAT', 'YESTERDAY', 'BOOK', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']

Sample Index: 5
['JOHN', 'LEAVE', 'IX', '<EOS>']
['JOHN', 'LEAVE', 'IX', '<EOS>']
['JOHN', 'NOT', 'VISIT', 'MARY', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']



In [169]:
#sorted_index = [s[0] for s in sorted(test_dataset.sentences_index.items())]
#original_index = [s[0] for s in test_dataset.sentences_index.items()]

[38, 9, 28, 15, 20]