In [3]:
from collections import Counter
from gensim.models import Word2Vec
from random import random
from nltk import word_tokenize
from nltk.translate.bleu_score import sentence_bleu
from torch import nn
from torch.autograd import Variable

import json
import numpy as np
import torch
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data Acquisition

For this assignment, you must download the data and extract it into `data/`. The dataset contains two files, both containing a single caption on each line. We should have 20,000 sentences (one sentence per image in Assignment 2) in the training captions and 500 sentences in the validation captions (five sentences per image in Assignment 2).

The data file should be the same as from the previous assignment.

In [4]:
# Load the data into memory.
mscoco_train = json.load(open('data/annotations/train_captions.json'))
mscoco_val  = json.load(open('data/annotations/val_captions.json'))

train_sentences = [entry['caption'] for entry in mscoco_train['annotations']]
val_sentences = [entry['caption'] for entry in mscoco_val['annotations']]

print(len(train_sentences))
print(len(val_sentences))
print(train_sentences[0])
print(val_sentences[0])

20000
500
A very clean and well decorated empty bathroom
Set of bananas hanging off of a banana tree.


# Preprocessing

The code provided below creates word embeddings for you to use. After creating the vocabulary, we construct both one-hot embeddings and word2vec embeddings. 

All of the packages utilized should be installed on your Azure servers, however you will have to download an NLTK corpus. To do this, follow the instructions below:

1. SSH to your Azure server
2. Open up Python interpreter
3. `import nltk`
4. `nltk.download()`

    You should now see something that looks like:

    ```
    >>> nltk.download()
    NLTK Downloader
    ---------------------------------------------------------------------------
        d) Download   l) List    u) Update   c) Config   h) Help   q) Quit
    ---------------------------------------------------------------------------
    Downloader> 

    ```

5. `d punkt`
6. Provided the download finished successfully, you may now exit out of the Python interpreter and close the SSH connection.

Please look through the functions provided below **carefully**, as you will need to use all of them at some point in your assignment.

In [5]:
sentences = train_sentences

# Lower-case the sentence, tokenize them and add <SOS> and <EOS> tokens
sentences = [["<SOS>"] + word_tokenize(sentence.lower()) + ["<EOS>"] for sentence in sentences]

# Create the vocabulary. Note that we add an <UNK> token to represent words not in our vocabulary.
vocabularySize = 1000
word_counts = Counter([word for sentence in sentences for word in sentence])

vocabulary = ["<UNK>"] + [e[0] for e in word_counts.most_common(vocabularySize-1)]
word2index = {word:index for index,word in enumerate(vocabulary)}
one_hot_embeddings = np.eye(vocabularySize)

# Build the word2vec embeddings
wordEncodingSize = 300
filtered_sentences = [[word for word in sentence if word in word2index] for sentence in sentences]
w2v = Word2Vec(filtered_sentences, min_count=0, size=wordEncodingSize)
w2v_embeddings = np.concatenate((np.zeros((1, wordEncodingSize)), w2v.wv.syn0))

# Define the max sequence length to be the longest sentence in the training data. 
maxSequenceLength = max([len(sentence) for sentence in sentences])

def preprocess_numberize(sentence):
    """
    Given a sentence, in the form of a string, this function will preprocess it
    into list of numbers (denoting the index into the vocabulary).
    """
    tokenized = word_tokenize(sentence.lower())
        
    # Add the <SOS>/<EOS> tokens and numberize (all unknown words are represented as <UNK>).
    tokenized = ["<SOS>"] + tokenized + ["<EOS>"]
    numberized = [word2index.get(word, 0) for word in tokenized]
    
    return numberized

def preprocess_one_hot(sentence):
    """
    Given a sentence, in the form of a string, this function will preprocess it
    into a numpy array of one-hot vectors.
    """
    numberized = preprocess_numberize(sentence)
    
    # Represent each word as it's one-hot embedding
    one_hot_embedded = one_hot_embeddings[numberized]
    
    return one_hot_embedded

def preprocess_word2vec(sentence):
    """
    Given a sentence, in the form of a string, this function will preprocess it
    into a numpy array of word2vec embeddings.
    """
    numberized = preprocess_numberize(sentence)
    
    # Represent each word as it's one-hot embedding
    w2v_embedded = w2v_embeddings[numberized]
    
    return w2v_embedded

def compute_bleu(reference_sentence, predicted_sentence):
    """
    Given a reference sentence, and a predicted sentence, compute the BLEU similary between them.
    """
    reference_tokenized = word_tokenize(reference_sentence.lower())
    predicted_tokenized = word_tokenize(predicted_sentence.lower())
    return sentence_bleu([reference_tokenized], predicted_tokenized)

score1 = compute_bleu("<SOS>" + train_sentences[0], "<SOS>" + train_sentences[0])
score2 = compute_bleu("<SOS>" + train_sentences[0], "<SOS>" + train_sentences[5])

print('BLEU score distnace between \n  "' + train_sentences[0] + '" \nand\n  "'+ train_sentences[0] + '" \nis: ' + str(score1) +'\n\n')
print('BLEU score distnace between \n  "' + train_sentences[0] + '" \nand\n  "'+ train_sentences[5] + '" \nis: ' + str(score2) +'\n\n')

BLEU score distnace between 
  "A very clean and well decorated empty bathroom" 
and
  "A very clean and well decorated empty bathroom" 
is: 1.0


BLEU score distnace between 
  "A very clean and well decorated empty bathroom" 
and
  "A few people sit on a dim transportation system. " 
is: 0.1933853138176172






# 1. Building a Language Decoder

We now implement a language decoder. For now, we will have the decoder take a single training sample at a time (as opposed to batching). For our purposes, we will also avoid defining the embeddings as part of the model and instead pass in embedded inputs. While this is sometimes useful, as it learns/tunes the embeddings, we avoid doing it for the sake of simplicity and speed.

Remember to use LSTM hidden units!

In [6]:
class DecoderLSTM(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderLSTM, self).__init__()
        self.hidden_size = hidden_size

        #self.embedding = nn.Embedding(output_size, hidden_size)
        self.lstm = nn.LSTM(output_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        
        output, hidden = self.lstm(input, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return (torch.zeros(1, 1, self.hidden_size, device=device),torch.zeros(1, 1, self.hidden_size, device=device))



In [7]:
decoder = DecoderLSTM(wordEncodingSize,vocabularySize).to(device)

# 2. Training a Language Decoder

We must now train the language decoder we implemented above. An important thing to pay attention to is the [inputs for an LSTM](http://pytorch.org/docs/master/nn.html#torch.nn.LSTM).

In [14]:
import random
teacher_forcing_ratio = 1

def train(target_variable, 
          decoder, 
          decoder_optimizer, 
          criterion, 
          embeddings=one_hot_embeddings): 
    """
    Given a single training sample, go through a single step of training.
    """
    
    #set the gradient to zero
    decoder_optimizer.zero_grad()
    
    
    numerized_variable = torch.tensor(target_variable, dtype=torch.long, device=device).view(-1,1)
    sentence_one_hot_embeddings = embeddings[target_variable]
    input_length = sentence_one_hot_embeddings.shape[0]
    decoder_hidden = decoder.initHidden()
    loss = 0
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    
    if use_teacher_forcing:
        for ei in range(input_length-1):
            decoder_input = torch.tensor(sentence_one_hot_embeddings[ei,:], 
                                         dtype=torch.float, device=device).view(1,1,-1)
            decoder_output, decoder_hidden = decoder(
            decoder_input, decoder_hidden)
            loss += criterion(decoder_output, numerized_variable[ei+1,:])

    else:
        #print("non-teacher assistant mode")
        decoder_input =  torch.tensor(sentence_one_hot_embeddings[0,:], dtype=torch.float, device=device).view(1,1,-1)
        for ei in range(input_length-1):
            decoder_output, decoder_hidden = decoder(
            decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            num = topi.squeeze().detach().item()
            if word2index['<EOS>'] == num:
                break
            decoder_input = torch.tensor(embeddings[num,:], dtype=torch.float, device=device).view(1,1,-1)
            
            loss += criterion(decoder_output, numerized_variable[ei+1,:])
            
    loss.backward()
    decoder_optimizer.step()
    
    return loss.item() / input_length




In [8]:
# Train the model and monitor the loss. Remember to use Adam optimizer and CrossEntropyLoss

def train_the_model(decoder, epochs=5, criterion=nn.NLLLoss()):
    
    lr = 0.001
    optimizor = torch.optim.Adam(decoder.parameters(), lr=lr)
    
    for epoch in range(epochs):
        print('this is epoch number ', epoch)
        counter = 0
        for sentence in train_sentences:
            counter = counter + 1
            numberized = preprocess_numberize(sentence)[1:]
            loss = train(numberized, decoder, optimizor, criterion)
            if counter%100 == 99:
                print('this is loss =', loss)
                
    print("Training is over")
            
            



In [9]:
train_the_model(decoder)

this is epoch number  0
this is loss = 4.578378041585286
this is loss = 4.670798492431641
this is loss = 4.8327092257413
this is loss = 3.029078801472982
this is loss = 4.420703464084202
this is loss = 2.886064910888672
this is loss = 3.720386505126953
this is loss = 3.574116446755149
this is loss = 3.645665095402644
this is loss = 3.695066892183744
this is loss = 3.689280289870042
this is loss = 4.435306294759115
this is loss = 2.5890781402587892
this is loss = 3.8472930908203127
this is loss = 2.4607778029008345
this is loss = 3.341639836629232
this is loss = 3.4121570587158203
this is loss = 2.867417335510254
this is loss = 2.098670482635498
this is loss = 2.6798374176025392
this is loss = 2.8742431004842124
this is loss = 3.559571901957194
this is loss = 4.198141271417791
this is loss = 3.6275696527390253
this is loss = 2.6700313568115233
this is loss = 2.8319977847012607
this is loss = 2.816996834494851
this is loss = 2.741258968006481
this is loss = 3.841526848929269
this is loss

this is loss = 2.406650103055514
this is loss = 2.7949488321940104
this is loss = 3.0781545639038086
this is loss = 2.848524284362793
this is loss = 2.6376319298377404
this is loss = 2.933003044128418
this is loss = 3.253568796011118
this is loss = 2.3262296404157365
this is loss = 2.7282874367453833
this is loss = 1.7992172241210938
this is loss = 2.4672667185465493
this is loss = 2.3828368414015997
this is loss = 1.473268917628697
this is loss = 3.791474855863131
this is loss = 2.86304931640625
this is loss = 3.1742415957980685
this is loss = 1.7229050856370192
this is loss = 2.7576399909125433
this is loss = 2.6936073303222656
this is loss = 3.5744571685791016
this is loss = 3.2956873575846353
this is loss = 1.8025202433268228
this is loss = 3.1044903564453126
this is loss = 1.6698957170758928
this is loss = 2.0842742919921875
this is loss = 1.9960139881480823
this is loss = 2.065134366353353
this is loss = 3.363267626081194
this is loss = 3.9155838012695314
this is loss = 3.3814968

this is loss = 1.7819305419921876
this is loss = 2.863641103108724
this is loss = 2.018390655517578
this is loss = 3.749849001566569
this is loss = 2.473586320877075
this is loss = 1.7923259735107422
this is loss = 1.2533283233642578
this is loss = 2.5712884267171225
this is loss = 2.6670920054117837
this is loss = 2.8795575228604404
this is loss = 2.4094823837280273
this is loss = 2.0385507583618163
this is loss = 2.410199425437234
this is loss = 2.056544390591708
this is loss = 2.8378798166910806
this is loss = 1.7646061823918269
this is loss = 0.84749968846639
this is loss = 4.666116926405165
this is loss = 1.5866076729514382
this is loss = 3.1645447867257253
this is loss = 2.040490237149325
this is loss = 2.7966187795003257
this is loss = 2.8919376373291015
this is loss = 3.483905792236328
this is loss = 2.1191627979278564
this is loss = 2.2869091033935547
this is loss = 2.143768734402127
this is loss = 1.2686868147416548
this is loss = 2.79677321694114
this is loss = 1.32577695846

this is loss = 3.3777253287179128
this is loss = 1.5767812728881836
this is loss = 1.4086175646100725
this is loss = 1.86690886815389
this is loss = 1.2739119096235796
this is loss = 0.5138529936472574
this is loss = 2.6298139572143553
this is loss = 2.809728969227184
this is loss = 2.179393594915217
this is loss = 1.5451455116271973
this is loss = 2.069876744196965
this is loss = 2.345456049992488
this is loss = 1.540068112886869
this is loss = 1.941037586757115
this is loss = 1.279766845703125
this is loss = 1.6326138632638114
this is loss = 2.363795439402262
this is loss = 1.9802656173706055
this is loss = 3.072220230102539
this is loss = 3.435680389404297
this is loss = 0.7595803397042411
this is loss = 2.7402543288010817
this is loss = 1.729356288909912
this is loss = 1.944121519724528
this is loss = 2.556701151529948
this is loss = 2.3726158142089844
this is loss = 0.9250420729319254
this is loss = 2.370679473876953
this is loss = 1.1901666201077974
this is loss = 2.1546859741210

this is loss = 2.573407309395926
this is loss = 1.6419120201697717
this is loss = 2.0880018870035806
this is loss = 1.381097206702599
this is loss = 2.133514404296875
this is loss = 1.878369649251302
this is loss = 2.2929229736328125
this is loss = 2.162528111384465
this is loss = 3.6142361958821616
this is loss = 1.4412654240926106
this is loss = 2.4499093691507974
this is loss = 2.3143962224324546
this is loss = 2.431304359436035
this is loss = 3.031309445699056
this is loss = 2.4714126586914062
this is loss = 2.3565220832824707
this is loss = 1.947596830480239
this is loss = 1.528084975022536
this is loss = 1.465328736738725
this is loss = 1.1788338661193847
this is loss = 2.119593556722005
this is loss = 1.248739936135032
this is loss = 3.1105152476917612
this is loss = 1.2916602221402256
this is loss = 1.6104314592149522
Training is over


# 3. Building Language Decoder MAP Inference

We now define a method to perform inference with our decoder and test it with a few different starting words. This code will be fairly similar to your training function from part 2.

In [10]:
def inference(decoder, init_word, embeddings=one_hot_embeddings, max_length=maxSequenceLength):
    decoder_hidden = decoder.initHidden()
    decoded_words = []
    decoded_words.append(init_word)
    word_embedding = embeddings[word2index[init_word]]
    decoder_input = torch.tensor(word_embedding, dtype=torch.float, device=device).view(1,1,-1)
    for di in range(max_length):
        decoder_output, decoder_hidden = decoder(decoder_input , decoder_hidden)
        topv, topi = decoder_output.data.topk(1)
        
        if topi.item() == word2index['<EOS>']:
            decoded_words.append('<EOS>')
            break
        else:
            decoded_words.append(vocabulary[topi.item()])
        decoder_input = torch.tensor(embeddings[topi.item(),:], 
                                     dtype=torch.float, device=device).view(1,1,-1)
        #decoder_input = torch.tensor(decoder_output, dtype=torch.float, device=device).view(1,1,-1)

    return decoded_words
print(inference(decoder, init_word="a"))
print(inference(decoder, init_word="the"))
print(inference(decoder, init_word="man"))
print(inference(decoder, init_word="woman"))
print(inference(decoder, init_word="dog"))

['a', 'group', 'of', 'people', 'sitting', 'around', 'a', 'dinner', 'table', '.', '<EOS>']
['the', '<UNK>', 'of', 'a', '<UNK>', 'clock', 'tower', 'in', 'the', 'distance', '.', '<EOS>']
['man', 'in', 'a', '<UNK>', '<UNK>', '<UNK>', '<UNK>', '<UNK>', '.', '<EOS>']
['woman', 'sitting', 'at', 'a', 'table', 'with', 'a', 'plate', 'of', 'food', '.', '<EOS>']
['dog', 'sitting', 'at', 'a', 'table', 'with', 'a', 'plate', 'of', 'food', '.', '<EOS>']


# 4. Building Language Decoder Sampling Inference

We must now modify the method defined in part 3, to sample from the distribution outputted by the LSTM rather than taking the most probable word.

It might be useful to take a look at the output of your model and (depending on your implementation) modify it so that the outputs sum to 1. 

In [11]:
def find_the_next_word(word_probabilities):
    prob = random.random()
    probSum = 0
    word_probabilities = torch.exp(word_probabilities)
    for i in range(word_probabilities.size(1)):
        probSum = probSum + word_probabilities[0][i].item()
        if probSum > prob:
            return (i)
        
def sampling_inference(decoder, init_word, embeddings=one_hot_embeddings, max_length=maxSequenceLength):
    # Your code goes here
    decoder_hidden = decoder.initHidden()
    decoded_words = []
    decoded_words.append(init_word)
    word_embedding = embeddings[word2index[init_word]]
    decoder_input = torch.tensor(word_embedding, dtype=torch.float, device=device).view(1,1,-1)
    for di in range(max_length):
        decoder_output, decoder_hidden = decoder(
           decoder_input , decoder_hidden)
        
        index = find_the_next_word(decoder_output.data)
        
        if index == word2index['<EOS>']:
            decoded_words.append('<EOS>')
            break
        else:
            decoded_words.append(vocabulary[index])
        #decoder_input = torch.tensor(decoder_output, dtype=torch.float, device=device).view(1,1,-1)
        decoder_input = torch.tensor(embeddings[index,:], 
                                     dtype=torch.float, device=device).view(1,1,-1)
    return decoded_words

# Print the results with sampling_inference by drawing 5 samples per initial word, requiring to run 
# the code below 5 times
print(sampling_inference(decoder, init_word="the"))
print(sampling_inference(decoder, init_word="the"))
print(sampling_inference(decoder, init_word="the"))
print(sampling_inference(decoder, init_word="the"))
print(sampling_inference(decoder, init_word="the"))
print('--------------------------------------------------------------------')
print(sampling_inference(decoder, init_word="man"))
print(sampling_inference(decoder, init_word="man"))
print(sampling_inference(decoder, init_word="man"))
print(sampling_inference(decoder, init_word="man"))
print(sampling_inference(decoder, init_word="man"))
print('--------------------------------------------------------------------')
print(sampling_inference(decoder, init_word="woman"))
print(sampling_inference(decoder, init_word="woman"))
print(sampling_inference(decoder, init_word="woman"))
print(sampling_inference(decoder, init_word="woman"))
print(sampling_inference(decoder, init_word="woman"))
print('--------------------------------------------------------------------')
print(sampling_inference(decoder, init_word="dog"))
print(sampling_inference(decoder, init_word="dog"))
print(sampling_inference(decoder, init_word="dog"))
print(sampling_inference(decoder, init_word="dog"))
print(sampling_inference(decoder, init_word="dog"))

['the', 'dishes', 'of', 'a', '<UNK>', 'drink', 'and', 'an', 'assortment', 'of', 'wine', 'glasses', 'on', 'it', '<EOS>']
['the', 'bunch', 'of', 'people', 'on', 'the', 'table', 'in', 'the', 'room', '.', '<EOS>']
['the', 'large', 'clock', 'is', 'standing', 'in', 'the', 'near', 'a', 'pan', '.', '<EOS>']
['the', 'man', 'is', 'holding', 'camera', 'his', 'hair', 'to', 'his', 'ear', '.', '<EOS>']
['the', 'room', 'is', 'clock', ',', 'with', '<UNK>', ',', 'and', '<UNK>', '.', '<EOS>']
--------------------------------------------------------------------
['man', 'and', 'others', 'that', 'are', 'standing', 'on', 'and', 'scene', 'near', 'a', 'window', '.', '<EOS>']
['man', 'on', 'his', 'cell', 'phone', ',', 'one', 'edge', ',', 'with', 'the', 'plate', 'girl', 'in', 'front', 'of', 'pink', ',', 'with', 'one', 'skirt', 'and', 'laying', 'soup', '.', '<EOS>']
['man', 'dressed', 'in', 'her', '<UNK>', '<UNK>', 'to', 'another', 'face', '<EOS>']
['man', '<UNK>', 'a', 'pot', 'of', 'a', 'kitchen', 'serving', 'h

# 5. Experiment with Teacher Forcing

Redo steps 2 to 4 with teacher_forcing_ratio = 0.9 and 0.8. Comment on the results, speed of convergence and the quality of results. Note that in most real scenarious the teacher forcing is actually annealed; starting with teacher forcing 

In [20]:
# Your code goes here
decoder = DecoderLSTM(wordEncodingSize,vocabularySize).to(device)
teacher_forcing_ratio = 0.9
train_the_model(decoder)


print(inference(decoder, init_word="a"))
print(inference(decoder, init_word="the"))
print(inference(decoder, init_word="man"))
print(inference(decoder, init_word="woman"))
print(inference(decoder, init_word="dog"))

print('--------------------------------------------------------------------')

print(sampling_inference(decoder, init_word="the"))
print(sampling_inference(decoder, init_word="the"))
print(sampling_inference(decoder, init_word="man"))
print(sampling_inference(decoder, init_word="woman"))
print(sampling_inference(decoder, init_word="dog"))

this is epoch number  0
this is loss = 4.4965511957804365
this is loss = 4.7187847137451175
this is loss = 4.901337363503196
this is loss = 3.058868090311686
this is loss = 4.6822865804036455
this is loss = 3.06148681640625
this is loss = 3.7749401728312173
this is loss = 3.6977934403852983
this is loss = 3.680387643667368
this is loss = 3.790333087627704
this is loss = 3.810121389535757
this is loss = 4.489067077636719
this is loss = 2.536533737182617
this is loss = 3.8721460978190105
this is loss = 2.5698297674005683
this is loss = 3.4355220794677734
this is loss = 3.547561264038086
this is loss = 3.0419416427612305
this is loss = 2.0978711446126304
this is loss = 2.7634952545166014
this is loss = 3.015366872151693
this is loss = 3.485915184020996
this is loss = 4.230697285045277
this is loss = 3.7090566725957963
this is loss = 2.773482894897461
this is loss = 2.905663923783736
this is loss = 2.858780254017223
this is loss = 2.842437050559304
this is loss = 3.9418231419154575
this is

this is loss = 2.6603305523212137
this is loss = 2.717005157470703
this is loss = 3.2063468297322593
this is loss = 2.9735363006591795
this is loss = 2.4005586183988132
this is loss = 3.1253849029541017
this is loss = 3.25651608980619
this is loss = 2.441865921020508
this is loss = 2.793635281649503
this is loss = 1.7970544029684627
this is loss = 2.6148557662963867
this is loss = 2.385903858003162
this is loss = 1.698340824672154
this is loss = 3.6072813180776744
this is loss = 2.9830540974934894
this is loss = 3.21038818359375
this is loss = 1.6174810849703276
this is loss = 2.855556699964735
this is loss = 2.5688776536421343
this is loss = 3.6386617933000838
this is loss = 3.115465037027995
this is loss = 6.506488545735677
this is loss = 3.257418212890625
this is loss = 1.7513039452689034
this is loss = 2.376728820800781
this is loss = 2.2171238985928623
this is loss = 2.1193933486938477
this is loss = 3.363722392490932
this is loss = 3.9172337849934897
this is loss = 3.715689468383

this is loss = 2.047262954711914
this is loss = 2.918787956237793
this is loss = 2.2055329409512607
this is loss = 4.312617301940918
this is loss = 2.354374408721924
this is loss = 1.835054079691569
this is loss = 1.4887433052062988
this is loss = 3.869791030883789
this is loss = 2.721144358317057
this is loss = 2.742902929132635
this is loss = 2.344785690307617
this is loss = 2.2741891860961916
this is loss = 2.488655783913352
this is loss = 3.657357996160334
this is loss = 2.9652097490098743
this is loss = 1.907667453472431
this is loss = 0.8751480579376221
this is loss = 4.2637218899197045
this is loss = 1.6783540899103337
this is loss = 3.3220299312046597
this is loss = 2.2204045382413
this is loss = 3.0288111368815103
this is loss = 2.624881362915039
this is loss = 3.9658695220947267
this is loss = 2.096750259399414
this is loss = 2.322281837463379
this is loss = 2.3680106268988714
this is loss = 1.401677131652832
this is loss = 2.737672285600142
this is loss = 1.4560457229614259


this is loss = 4.2026487077985495
this is loss = 2.0108769734700522
this is loss = 1.7585551398141044
this is loss = 2.1642858187357583
this is loss = 1.222844037142667
this is loss = 0.9760850270589193
this is loss = 2.886028861999512
this is loss = 2.8521385192871094
this is loss = 2.005036093971946
this is loss = 1.4649667739868164
this is loss = 2.0352899111234226
this is loss = 2.7064343965970554
this is loss = 1.5641106825608473
this is loss = 3.850104740687779
this is loss = 4.154419199625651
this is loss = 1.5371043341500419
this is loss = 2.594963232676188
this is loss = 1.9895191192626953
this is loss = 3.1324741363525392
this is loss = 4.060699190412249
this is loss = 2.3580521174839566
this is loss = 3.1979299692007213
this is loss = 1.730312665303548
this is loss = 2.1115411122639975
this is loss = 2.639184315999349
this is loss = 2.6978229522705077
this is loss = 1.428339958190918
this is loss = 2.0326332092285155
this is loss = 1.4698169414813702
this is loss = 2.3248464

this is loss = 1.6759427877572866
this is loss = 1.5620808601379395
this is loss = 1.2707410959097056
this is loss = 2.266341890607561
this is loss = 2.0705668131510415
this is loss = 2.7617870130037008
this is loss = 2.2273318950946512
this is loss = 3.4973742167154946
this is loss = 3.639931042989095
this is loss = 3.644110361735026
this is loss = 2.808431307474772
this is loss = 2.1908702850341797
this is loss = 3.120429039001465
this is loss = 3.829838752746582
this is loss = 2.94176451365153
this is loss = 2.506361344281365
this is loss = 1.6731288616473858
this is loss = 3.4748382568359375
this is loss = 1.2436413764953613
this is loss = 2.203474680582682
this is loss = 1.2639500011097302
this is loss = 3.1320804249156606
this is loss = 1.2720325643366033
this is loss = 1.9781850179036458
Training is over
['a', 'group', 'of', 'people', 'sitting', 'around', 'a', 'a', 'table', '.', '<EOS>']
['the', '<UNK>', 'is', '<UNK>', '<UNK>', '<UNK>', '<UNK>', '<UNK>', '.', '<EOS>']
['man', 'i

In [21]:
print(sampling_inference(decoder, init_word="the"))
print(sampling_inference(decoder, init_word="the"))
print(sampling_inference(decoder, init_word="the"))
print(sampling_inference(decoder, init_word="the"))
print(sampling_inference(decoder, init_word="the"))
print('--------------------------------------------------------------------')
print(sampling_inference(decoder, init_word="man"))
print(sampling_inference(decoder, init_word="man"))
print(sampling_inference(decoder, init_word="man"))
print(sampling_inference(decoder, init_word="man"))
print(sampling_inference(decoder, init_word="man"))
print('--------------------------------------------------------------------')
print(sampling_inference(decoder, init_word="woman"))
print(sampling_inference(decoder, init_word="woman"))
print(sampling_inference(decoder, init_word="woman"))
print(sampling_inference(decoder, init_word="woman"))
print(sampling_inference(decoder, init_word="woman"))
print('--------------------------------------------------------------------')
print(sampling_inference(decoder, init_word="dog"))
print(sampling_inference(decoder, init_word="dog"))
print(sampling_inference(decoder, init_word="dog"))
print(sampling_inference(decoder, init_word="dog"))
print(sampling_inference(decoder, init_word="dog"))

['the', 'office', 'is', 'on', 'the', 'wall', 'is', 'lying', 'on', 'the', 'phones', '.', '<EOS>']
['the', 'little', 'girl', 'is', 'holding', 'a', 'tennis', 'red', '.', '<EOS>']
['the', 'clock', 'is', 'ready', 'to', '<UNK>', 'the', 'tower', '.', '<EOS>']
['the', 'teddy', 'bear', 'with', 'purple', 'putting', 'dress', 'and', 'the', 'woman', '<UNK>', 'in', 'her', '.', '<EOS>']
['the', 'huge', 'clock', 'is', 'view', 'from', 'the', 'apartment', '.', '<EOS>']
--------------------------------------------------------------------
['man', 'in', 'brown', 'cup', 'sitting', 'in', 'front', 'of', 'a', 'paper', '<UNK>', '.', '<EOS>']
['man', 'and', 'dog', 'are', 'smiling', 'and', '<UNK>', 'dress', 'who', 'are', '<UNK>', '.', '<EOS>']
['man', 'standing', 'on', '<UNK>', 'field', 'in', '<UNK>', 'sitting', 'next', 'to', 'her', 'computer', '.', '<EOS>']
['man', 'in', 'a', 'looking', 'shirt', 'holding', 'stuffed', '<UNK>', 'on', 'a', '<UNK>', '<EOS>']
['man', 'dressed', 'up', 'in', 'a', 'vase', 'getting', '<U

In [17]:
# Your code goes here
decoder = DecoderLSTM(wordEncodingSize,vocabularySize).to(device)
teacher_forcing_ratio = 0.8
train_the_model(decoder)


print(inference(decoder, init_word="a"))
print(inference(decoder, init_word="the"))
print(inference(decoder, init_word="man"))
print(inference(decoder, init_word="woman"))
print(inference(decoder, init_word="dog"))

print('--------------------------------------------------------------------')

print(sampling_inference(decoder, init_word="the"))
print(sampling_inference(decoder, init_word="the"))
print(sampling_inference(decoder, init_word="man"))
print(sampling_inference(decoder, init_word="woman"))
print(sampling_inference(decoder, init_word="dog"))

this is epoch number  0
this is loss = 4.567222595214844
this is loss = 4.701365280151367
this is loss = 4.4234629544344815
this is loss = 2.8873895009358725
this is loss = 4.449752383761936
this is loss = 3.115988922119141
this is loss = 3.645444869995117
this is loss = 3.7374936884099785
this is loss = 3.677225553072416
this is loss = 3.8285322922926683
this is loss = 2.7300450251652646
this is loss = 4.560926310221354
this is loss = 2.6875177383422852
this is loss = 3.923792521158854
this is loss = 2.7038182345303623
this is loss = 3.292522430419922
this is loss = 3.6103946685791017
this is loss = 3.1307101249694824
this is loss = 2.255422751108805
this is loss = 2.886117172241211
this is loss = 2.8605759938557944
this is loss = 3.756934483846029
this is loss = 4.316438848322088
this is loss = 3.707395281110491
this is loss = 4.372337341308594
this is loss = 3.050392497669567
this is loss = 2.9563220630992544
this is loss = 2.8454839533025567
this is loss = 4.138456617082868
this is

this is loss = 2.5094997699444113
this is loss = 2.7467020670572917
this is loss = 2.7376171747843423
this is loss = 2.932463264465332
this is loss = 2.651562030498798
this is loss = 1.9728715896606446
this is loss = 3.4832681509164662
this is loss = 2.3249459947858537
this is loss = 2.79313139481978
this is loss = 1.9502603867474724
this is loss = 2.2734498977661133
this is loss = 2.5196742103213357
this is loss = 1.8436549050467355
this is loss = 2.5525216322678785
this is loss = 3.0107027689615884
this is loss = 3.45809449089898
this is loss = 1.5990301278921275
this is loss = 3.0403800540500217
this is loss = 2.6379127502441406
this is loss = 3.876897539411272
this is loss = 3.057988484700521
this is loss = 2.062305450439453
this is loss = 3.260756530761719
this is loss = 1.8944157191685267
this is loss = 2.221541976928711
this is loss = 2.0173932855779473
this is loss = 3.3772153854370117
this is loss = 3.5763498033796037
this is loss = 4.068146006266276
this is loss = 5.267803192

this is loss = 3.3041388193766275
this is loss = 2.1340559179132637
this is loss = 3.964419682820638
this is loss = 2.7336537837982178
this is loss = 1.8508481979370117
this is loss = 1.3520333766937256
this is loss = 3.0196078618367515
this is loss = 2.8653624852498374
this is loss = 2.9284237948330967
this is loss = 3.3259952545166014
this is loss = 2.298540496826172
this is loss = 3.0711146267977627
this is loss = 1.9593571749600498
this is loss = 1.6772178014119465
this is loss = 2.0666304368239183
this is loss = 0.8691080411275228
this is loss = 4.913224538167317
this is loss = 1.9210725264115767
this is loss = 3.0751781463623047
this is loss = 2.1936059431596235
this is loss = 3.258982022603353
this is loss = 2.7752574920654296
this is loss = 3.750082015991211
this is loss = 2.8060574531555176
this is loss = 2.3379405975341796
this is loss = 2.310445785522461
this is loss = 1.3629589947787197
this is loss = 2.8531100533225318
this is loss = 2.4272823333740234
this is loss = 3.703

this is loss = 1.7686201731363933
this is loss = 1.593111446925572
this is loss = 3.5731655756632485
this is loss = 1.195848985151811
this is loss = 1.5916377703348796
this is loss = 3.3037010192871095
this is loss = 2.515106894753196
this is loss = 1.8149531971324573
this is loss = 1.4396813710530598
this is loss = 3.7755567110501804
this is loss = 2.5966063279371996
this is loss = 1.683163569523738
this is loss = 2.0430613926478793
this is loss = 2.329443868001302
this is loss = 1.7097514016287667
this is loss = 2.920398712158203
this is loss = 3.7882207234700522
this is loss = 3.1200742721557617
this is loss = 3.4435533796037947
this is loss = 0.967947142464774
this is loss = 3.1459080622746396
this is loss = 1.6817437807718914
this is loss = 2.2715326944986978
this is loss = 2.530046081542969
this is loss = 3.6021358489990236
this is loss = 1.0790138244628906
this is loss = 2.335088539123535
this is loss = 1.532622557419997
this is loss = 2.419598799485427
this is loss = 1.33021283

this is loss = 3.9697996775309243
this is loss = 1.493765610914964
this is loss = 2.1844496045793806
this is loss = 1.7738539377848308
this is loss = 2.8475004497327303
this is loss = 3.7827694232647238
this is loss = 3.577829678853353
this is loss = 3.791902542114258
this is loss = 2.906841595967611
this is loss = 2.81773312886556
this is loss = 2.2963428497314453
this is loss = 2.439953009287516
this is loss = 2.4462339083353677
this is loss = 2.3133702278137207
this is loss = 1.975851395550896
this is loss = 1.5364943284254808
this is loss = 1.5767830935391514
this is loss = 0.9990326881408691
this is loss = 1.9997464497884114
this is loss = 1.34498422796076
this is loss = 3.3233861056241123
this is loss = 3.9399230263449927
this is loss = 1.8991360134548612
Training is over
['a', 'group', 'of', 'people', 'sitting', 'a', '<UNK>', 'of', 'a', '<EOS>']
['the', '<UNK>', '<UNK>', 'is', '<UNK>', 'the', '<UNK>', '<UNK>', '.', '<EOS>']
['man', 'in', 'a', '<UNK>', '<UNK>', 'a', 'a', 'a', 'a'

In [18]:
print(sampling_inference(decoder, init_word="the"))
print(sampling_inference(decoder, init_word="the"))
print(sampling_inference(decoder, init_word="the"))
print(sampling_inference(decoder, init_word="the"))
print(sampling_inference(decoder, init_word="the"))
print('--------------------------------------------------------------------')
print(sampling_inference(decoder, init_word="man"))
print(sampling_inference(decoder, init_word="man"))
print(sampling_inference(decoder, init_word="man"))
print(sampling_inference(decoder, init_word="man"))
print(sampling_inference(decoder, init_word="man"))
print('--------------------------------------------------------------------')
print(sampling_inference(decoder, init_word="woman"))
print(sampling_inference(decoder, init_word="woman"))
print(sampling_inference(decoder, init_word="woman"))
print(sampling_inference(decoder, init_word="woman"))
print(sampling_inference(decoder, init_word="woman"))
print('--------------------------------------------------------------------')
print(sampling_inference(decoder, init_word="dog"))
print(sampling_inference(decoder, init_word="dog"))
print(sampling_inference(decoder, init_word="dog"))
print(sampling_inference(decoder, init_word="dog"))
print(sampling_inference(decoder, init_word="dog"))

['the', '<UNK>', '<UNK>', 'is', 'on', 'the', 'refrigerator', 'on', 'the', 'table', '.', '<EOS>']
['the', 'that', 'has', 'hand', 'and', 'fruit', 'sitting', 'on', 'it', "'s", 'head', '.', '<EOS>']
['the', 'small', 'white', 'stone', 'clock', 'tower', '.', '<EOS>']
['the', 'computer', 'holding', 'out', 'and', '<UNK>', 'in', '<UNK>', '<EOS>']
['the', 'four', 'men', 'are', 'wearing', 'red', '<UNK>', '<UNK>', '.', '<EOS>']
--------------------------------------------------------------------
['man', 'with', 'no', '<UNK>', 'is', 'at', 'a', 'bar', 'with', 'a', 'plate', 'of', '.', '<EOS>']
['man', 'and', 'woman', 'men', 'holding', 'a', 'large', 'teddy', 'bear', '<EOS>']
['man', 'in', 'man', 'christmas', 'tree', 'in', 'a', 'lit', 'next', 'to', 'a', 'man', 'teddy', 'bear', '<EOS>']
['man', 'wearing', '<UNK>', 'is', 'sitting', 'outside', 'at', 'a', 'potted', 'animals', '.', '<EOS>']
['man', 'orange', 'cat', '<UNK>', 'at', 'a', 'wine', 'with', 'a', 'plate', 'on', 'his', 'head', '.', '<EOS>']
--------

# 6.  Building Language Encoder

We now build a language encoder, which will encode an input word by word, and ultimately output a hidden state that we can then be used by our decoder.

In [8]:
class EncoderLSTM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size)

    def forward(self, input, hidden):
        output, hidden = self.lstm(input, hidden)
        return output, hidden

    def initHidden(self):
        #LSTM gets three inputs
        return (torch.zeros(1, 1, self.hidden_size, device=device), torch.zeros(1, 1, self.hidden_size, device=device))

    # Initialize the encoder with a hidden size of 300. 

In [9]:
encoder = EncoderLSTM(vocabularySize, 300).to(device)

# 7. Connecting Encoder to Decoder and Training End-to-End

We now connect our newly created encoder with our decoder, to train an end-to-end seq2seq architecture. 

It's likely that you'll be able to re-use most of your code from part 2. For our purposes, the only interaction between the encoder and the decoder is that the *last hidden state of the encoder is used as the initial hidden state of the decoder*. 

In [10]:
# Your code goes here

import random
def train(target_variable, 
          encoder, decoder, 
          encoder_optimizer,decoder_optimizer,
          criterion, 
          embeddings=one_hot_embeddings):
    
    encoder_hidden = encoder.initHidden()
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    
    numerized_variable = torch.tensor(target_variable, dtype=torch.long, device=device).view(-1,1)
    sentence_one_hot_embeddings = embeddings[target_variable]
    input_length = sentence_one_hot_embeddings.shape[0]
    
    loss = 0
    
    encoder_outputs = torch.zeros(maxSequenceLength, encoder.hidden_size, device=device)
    
    for ei in range(input_length):
        encoder_input =  torch.tensor(sentence_one_hot_embeddings[ei,:], 
                                     dtype=torch.float, device=device).view(1,1,-1)
        encoder_output, encoder_hidden = encoder(
            encoder_input, encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]
    
    #number2 is <SOS>
    decoder_input = torch.tensor(sentence_one_hot_embeddings[0,:], dtype=torch.float, 
                                 device=device).view(1,1,-1)   
    decoder_hidden = encoder_hidden
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    if use_teacher_forcing:
        
        for ei in range(input_length-1):
            decoder_output, decoder_hidden = decoder(
            decoder_input, decoder_hidden)
            loss += criterion(decoder_output, numerized_variable[ei+1,:])
            decoder_input = torch.tensor(sentence_one_hot_embeddings[ei+1,:],
                                         dtype=torch.float, device=device).view(1,1,-1)

    else:
        
        print("non-teacher assistant mode")
        #decoder_input =  torch.tensor(sentence_one_hot_embeddings[0,:], 
        #                             dtype=torch.float, device=device).view(1,1,-1)
        for ei in range(input_length-1):
            decoder_output, decoder_hidden = decoder(
            decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            num = topi.squeeze().detach().item()
            decoder_input = torch.tensor(embeddings[num,:], 
                                         dtype=torch.float, device=device).view(1,1,-1)
            
            loss += criterion(decoder_output, numerized_variable[ei+1,:])
            if num == word2index["<EOS>"]:
                break
            
            
    loss.backward()
    decoder_optimizer.step()
    encoder_optimizer.step()
    
    return loss.item() / input_length



    

In [11]:
def train_the_model(encoder, decoder, epochs=6, criterion=nn.NLLLoss()):
    
    lr = 0.001
    dec_optimizor = torch.optim.Adam(decoder.parameters(), lr=lr)
    enc_optimizor = torch.optim.Adam(encoder.parameters(), lr=lr)
    
    for epoch in range(epochs):
        counter = 0
        for sentence in train_sentences:
            counter = counter + 1
            numberized = preprocess_numberize(sentence)
            loss = train(numberized, encoder, decoder, enc_optimizor, dec_optimizor, criterion)
            if counter%100 == 99:
                print('this is loss =', loss)
                
    print("Training is over")
            
            

In [12]:
teacher_forcing_ratio = 1
decoder = decoder = DecoderLSTM(wordEncodingSize,vocabularySize).to(device)
train_the_model(encoder, decoder)

this is loss = 3.819256122295673
this is loss = 4.241327194940476
this is loss = 4.264661471048991
this is loss = 2.3708135164701023
this is loss = 3.6030250549316407
this is loss = 2.372650840065696
this is loss = 3.1895206157977762
this is loss = 3.1000083287556968
this is loss = 3.1868089948381697
this is loss = 3.2323319571358815
this is loss = 3.231940133231027
this is loss = 4.368178844451904
this is loss = 2.14630126953125
this is loss = 3.457719326019287
this is loss = 2.158472220102946
this is loss = 3.2570865337665262
this is loss = 2.9906272888183594
this is loss = 2.5768800623276653
this is loss = 2.022722390981821
this is loss = 2.3935585021972656
this is loss = 2.5669273963341346
this is loss = 3.340640141413762
this is loss = 4.570744514465332
this is loss = 3.144301327792081
this is loss = 2.394511482932351
this is loss = 2.5589621861775718
this is loss = 2.72780704498291
this is loss = 2.5187859535217285
this is loss = 3.7972437540690103
this is loss = 3.48818102749911

this is loss = 1.3223220280238561
this is loss = 1.9565207958221436
this is loss = 2.37333004291241
this is loss = 1.825863404707475
this is loss = 1.7391260692051478
this is loss = 2.316614423479353
this is loss = 2.449387686593192
this is loss = 1.3421756744384765
this is loss = 1.4270841280619304
this is loss = 1.4160776138305664
this is loss = 1.4282448108379657
this is loss = 1.6805693886496804
this is loss = 1.0239440282185872
this is loss = 2.4541947501046315
this is loss = 2.281789541244507
this is loss = 1.5001708984375
this is loss = 0.8827738761901855
this is loss = 1.2522294998168946
this is loss = 1.3538438479105632
this is loss = 2.757463582356771
this is loss = 1.9395698308944702
this is loss = 0.8878494501113892
this is loss = 2.021283663236178
this is loss = 0.8208975474039714
this is loss = 0.9298302910544656
this is loss = 1.1003958384195964
this is loss = 1.072449464064378
this is loss = 2.759264119466146
this is loss = 2.385746479034424
this is loss = 2.21974060752

this is loss = 0.6313456807817731
this is loss = 0.07679396409254807
this is loss = 0.23296564275568182
this is loss = 1.184244889479417
this is loss = 0.43830402692159015
this is loss = 0.7356071472167969
this is loss = 1.1816152684828813
this is loss = 0.4545760521521935
this is loss = 0.12006246342378504
this is loss = 1.514825967641977
this is loss = 0.9015141267042893
this is loss = 0.60545547803243
this is loss = 0.4356723698702725
this is loss = 0.23762967369773172
this is loss = 0.1917348305384318
this is loss = 0.6267065604527792
this is loss = 1.623173763877467
this is loss = 0.13893638338361466
this is loss = 0.017851902888371393
this is loss = 1.8469722747802735
this is loss = 0.3641313711802165
this is loss = 1.120193099975586
this is loss = 0.3202051321665446
this is loss = 1.2992703364445612
this is loss = 1.92055944962935
this is loss = 1.4314258748834783
this is loss = 0.4861440658569336
this is loss = 0.6355138258500532
this is loss = 0.5675564289093018
this is loss =

this is loss = 1.309512002127511
this is loss = 0.4253061467950994
this is loss = 0.18978655338287354
this is loss = 0.5324641466140747
this is loss = 1.1521149589901878
this is loss = 0.08362145857377486
this is loss = 1.4516222635904947
this is loss = 0.21935154841496393
this is loss = 0.05447549819946289
this is loss = 0.30516606110792893
this is loss = 0.03386696179707845
this is loss = 0.0032812998845027043
this is loss = 0.47373693639581854
this is loss = 0.7882612546284994
this is loss = 0.03326181570688883
this is loss = 0.04361658829909105
this is loss = 0.29713358197893414
this is loss = 0.4032145227704729
this is loss = 0.13207783017839705
this is loss = 0.47912397384643557
this is loss = 0.06146860122680664
this is loss = 0.1531653086344401
this is loss = 0.31258649092454177
this is loss = 0.5020794134873611
this is loss = 0.8433571728793058
this is loss = 1.3218086242675782
this is loss = 0.003666114807128906
this is loss = 1.3618548257010323
this is loss = 0.1729945769676

this is loss = 0.7699435779026577
this is loss = 0.193180114030838
this is loss = 0.2391376495361328
this is loss = 0.4352109432220459
this is loss = 0.21735024452209473
this is loss = 0.09273925194373497
this is loss = 0.020598411560058594
this is loss = 0.3318157196044922
this is loss = 0.11257198878696986
this is loss = 0.3000583431937478
this is loss = 0.9543004830678304
this is loss = 0.46493342717488606
this is loss = 0.03473499843052456
this is loss = 0.368865233201247
this is loss = 0.1357748167855399
this is loss = 0.7331116358439128
this is loss = 0.5279165781461276
this is loss = 0.768843698501587
this is loss = 0.4057863780430385
this is loss = 1.0649832212007964
this is loss = 0.18680348763099083
this is loss = 0.6449574690598708
this is loss = 0.11251669663649339
this is loss = 0.02188907970081676
this is loss = 0.9865449025080755
this is loss = 0.19443368911743164
this is loss = 0.7701903123121995
this is loss = 0.1512501769595676
this is loss = 0.015289340700422014
this

# 8. Testing 

We must now define a method that allows us to do inference using the seq2seq architecture. We then run the 500 validation captions through this method, and ultimately compare the **reference** and **generated** sentences using our **BLEU** similarity score method defined above, to identify the average BLEU score.

In [13]:
def seq2seq_inference(sentence, embeddings=one_hot_embeddings, max_length=maxSequenceLength):
    # Your code goes here
    decoded_words = []
    encoder_hidden = encoder.initHidden()
    numerized = preprocess_numberize(sentence)
    sentence_one_hot_embeddings = embeddings[numerized]
    input_length = sentence_one_hot_embeddings.shape[0]
    for ei in range(input_length):
        encoder_input = torch.tensor(sentence_one_hot_embeddings[ei,:], dtype=torch.float, 
                                 device=device).view(1,1,-1)
        encoder_output, encoder_hidden = encoder(encoder_input , encoder_hidden)
    
    decoder_hidden = encoder_hidden
    decoder_input = torch.tensor(embeddings[word2index['<SOS>'],:], dtype=torch.float, 
                                 device=device).view(1,1,-1)
    
    for di in range(max_length):
        decoder_output, decoder_hidden = decoder(decoder_input , decoder_hidden)
        topv, topi = decoder_output.data.topk(1)
        if topi.item() == word2index['<EOS>']:
            decoded_words.append(vocabulary[topi.item()])
            break
        else:
            decoded_words.append(vocabulary[topi.item()])
            
        #decoder_input = torch.tensor(decoder_output, dtype=torch.float, device=device).view(1,1,-1)
        decoder_input = torch.tensor(embeddings[topi.item(),:], 
                                     dtype=torch.float, device=device).view(1,1,-1)
        
    return decoded_words



In [14]:
seq2seq_inference('A very clean and well decorated empty bathroom')

['a', 'very', 'clean', 'and', 'black', 'empty', 'showing', 'by', '<EOS>']

In [15]:
# Perform inference for all validation sequences and report the average BLEU score
    # Your code goes here
BleuSum = 0    
i = 0
for sentence in val_sentences:
    newSentence = ' '.join(seq2seq_inference(sentence))
    newSentence = '<SOS> ' + newSentence
    sentence = '<SOS> ' + sentence + ' <EOS>'
    if i < 100:
        i = i + 1
        print(sentence)
        print(newSentence)
        print('-----------------------------------------')
    BleuSum = BleuSum + compute_bleu(sentence.lower(), newSentence.lower())   
print(BleuSum/len(val_sentences))



<SOS> Set of bananas hanging off of a banana tree. <EOS>
<SOS> behind of bananas hanging off of a banana <UNK> . <EOS>
-----------------------------------------
<SOS> Two bunches of green bananas on banana trees. <EOS>
<SOS> two slices of green bananas on palm trees . <EOS>
-----------------------------------------
<SOS> Many calendars and bunches of bananas hanging on a wall. <EOS>
<SOS> many <UNK> and bananas and hanging out on a wall . <EOS>
-----------------------------------------
<SOS> Clusters of bananas and pictures hanging on a wall. <EOS>
<SOS> <UNK> of bananas and hanging hanging on a wall . <EOS>
-----------------------------------------
<SOS> two dogs that look to be fighting one another <EOS>
<SOS> two dogs that look to one another <UNK> one <EOS>
-----------------------------------------
<SOS> Two dogs fighting with one on his back on the ground <EOS>
<SOS> two dogs <UNK> with one on his head on the ground <EOS>
-----------------------------------------
<SOS> Bunches of 

The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


<SOS> A lady dressed in a blue and purple outfit wearing a hat made of fruit. <EOS>
<SOS> a lady dressed in a blue blanket and a yellow <UNK> made up and rides . <EOS>
-----------------------------------------
<SOS> A pot of vegetables cooking on the stove. <EOS>
<SOS> a pot of vegetables vegetables on the stove . <EOS>
-----------------------------------------
<SOS> Broccoli stir fry in a frying pan on the stove  <EOS>
<SOS> wine <UNK> <UNK> in a <UNK> on the stove top <EOS>
-----------------------------------------
<SOS> Three horses with white markings on their faces standing in snow. <EOS>
<SOS> three horses with white <UNK> on their <UNK> while standing over . <EOS>
-----------------------------------------
<SOS> A wooden spoon accompanied by a cooking pan filled with stir-fried broccoli and onions. <EOS>
<SOS> a wooden metal <UNK> by a variety of pizza arranged in front of sauce . <EOS>
-----------------------------------------
<SOS> A wooden spoon stirs vegetables cooking in a p

In [31]:
compute_bleu('salam ali chetori gayidi maro rya.', 'salam ali chetori gayidi maro rya .')

1.0

# 9. Encoding as Generic Feature Representation

We now use the final hidden state of our encoder, to identify the nearest neighbor amongst the training sentences for each sentence in our validation data.

It would be effective to first define a method that would generate all of the hidden states and store these hidden states **on the CPU**, and then loop over the generated hidden states to identify/output the nearest neighbors.

In [36]:
def final_encoder_hidden(sentence, embeddings=one_hot_embeddings):
    # Your code goes here
    encoder_hidden = encoder.initHidden()
    numerized = preprocess_numberize(sentence)
    sentence_one_hot_embeddings = embeddings[numerized]
    input_length = sentence_one_hot_embeddings.shape[0]
    for ei in range(input_length):
        encoder_input = torch.tensor(sentence_one_hot_embeddings[ei,:], dtype=torch.float, 
                                 device=device).view(1,1,-1)
        encoder_output, encoder_hidden = encoder(encoder_input , encoder_hidden)
        
    return encoder_output.view(-1)
    

# Now run all training data and validation data to store hidden states
# Your code goes here
val_contexts = []
train_contexts = []
for sentence in val_sentences:
    val_contexts.append(final_encoder_hidden(sentence).cpu().data.numpy())
np.save(open('validation_vectors', 'wb+'), val_contexts)
    
for sentence in train_sentences:
    train_contexts.append(final_encoder_hidden(sentence).cpu().data.numpy())
np.save(open('training_vectors', 'wb+'), train_contexts)

In [37]:
val_contexts = np.load('validation_vectors')
train_contexts = np.load('training_vectors')

In [38]:
# Now get nearest neighbors and print
from sklearn.metrics.pairwise import euclidean_distances
distances = euclidean_distances(val_contexts, train_contexts)
min_distances = np.argmin(distances, axis=1)
print(min_distances.shape)
print(min_distances.shape)
i = 0
for index in min_distances[:10]:
    print(val_sentences[i])
    i = i+1
    print(train_sentences[index])
    print('-----------------------------')

(500,)
(500,)
Set of bananas hanging off of a banana tree.
There is an airplane that is not moving.
-----------------------------
Two bunches of green bananas on banana trees.
Two giraffes and zebras in a grassy field next to trees.
-----------------------------
Many calendars and bunches of bananas hanging on a wall.
Many electronics, wires, parts, and phones are arranged on a bed.
-----------------------------
Clusters of bananas and pictures hanging on a wall.
Persimmons, lime, bananas and tomatoes on a table.
-----------------------------
two dogs that look to be fighting one another
two dogs running together on the beach 
-----------------------------
Two dogs fighting with one on his back on the ground
Two horses running through a field of grass
-----------------------------
Bunches of green bananas hanging down from trees.
Bunches of green bananas still on the plant.
-----------------------------
Two dogs have a playful fight with one another.
Two creatures grazing in a lush fie

# 10. Effectiveness of word2vec

We now repeat everything done above using word2vec embeddings in place of one-hot embeddings. This will require re-running steps 1-9.

You can find the results in CPSCAssignment3-2.ipynb 