In [150]:
import torch
import pandas as pd
import os
import numpy as np
from torch.utils.data import Dataset, DataLoader

import pprint
from collections import Counter,defaultdict
from itertools import chain

SAMPLE_EASY = ['Data', 'sample_easy.json']
TRAIN_EASY = ['Data', 'Easy', 'IR_train_easy.json']

class DialogDataset(Dataset):
    def __init__(self, json_data, transform=None):
        self.json_data = pd.read_json(json_data, orient='index')

    def __len__(self):
        return len(self.json_data)

    def __getitem__(self, idx):
        item = self.json_data.iloc[idx]
        print(item.dialog[0])

        # Flatten dialog and add caption into 1d array
        dialog = [word for line in item.dialog for word in line[0].split()]
        dialog.extend(item.caption.split(' '))
        #words = np.array(dialog)

        img_ids = np.array(item.img_list)
        target = np.array([item.target, item.target_img_id])

        return {'dialog':dialog, 'img_ids':item.img_list, 'target':item.target_img_id}

def show_batch(sample_batched):
    print(sample_batched)


def createEmbeddings (words, threshold):
    w2i = defaultdict(lambda: len(w2i))
    i2w = dict()
    wordCounts = Counter()

    # count all the words in lower case
    for word in words:
        wordCounts[word.lower()] += 1

    # index all words that occured at least n times
    for word, count in wordCounts.most_common():
        if count >= threshold:
            i2w[w2i[word]] = word
        else:
            break

    return w2i, i2w

# done: collect all the words from dialogs and 
# captions and use them to create embedding map
def getWords(dataset):
    words = [dataset[i]['dialog'] for i in range(len(dataset))]
    return list(chain.from_iterable(words))

    
dd = DialogDataset(os.path.join(*SAMPLE_EASY))

words = getWords(dd)
w2i, i2w = createEmbeddings(words, 3)

loader = DataLoader(dd, batch_size=4, shuffle=True, num_workers=4)

for batch_num, sample in enumerate(loader):
    show_batch(sample)
    if batch_num == 3:
        break

['is this a child or adult ? adult']
["what color is horse ? brown, but it's black and white photo"]
['how many bikes there ? 3']
['what color is the sink ? white']
['is this a zoo ? yes']
['is this a zoo ? yes']
['is this a child or adult ? adult']
['what color is the sink ? white']
["what color is horse ? brown, but it's black and white photo"]
['how many bikes there ? 3']
{'dialog': [('is', 'what', 'what', 'how'), ('this', 'color', 'color', 'many'), ('a', 'is', 'is', 'bikes'), ('zoo', 'the', 'horse', 'there'), ('?', 'sink', '?', '?'), ('yes', '?', 'brown,', '3'), ('how', 'white', 'but', 'what'), ('many', 'is', "it's", 'color'), ('giraffes', 'the', 'black', 'are'), ('are', 'light', 'and', 'bikes'), ('there', 'on', 'white', '?'), ('?', '?', 'photo', 'i'), ('1', 'yes', 'is', 'see'), ('how', 'any', 'this', 'green'), ('many', 'people', 'outdoors', 'red'), ('zebras', '?', '?', 'and'), ('?', 'no', 'yes', 'white'), ('1', 'how', 'do', 'are'), ('are', 'many', 'you', 'they'), ('people', 'dishe

In [149]:
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

#### Testing 
print("Testing Embedding neural net class:")
embeds = nn.Embedding(len(w2i), 5)
lookup_tensor = torch.LongTensor([w2i['bikes']])
result = embeds(autograd.Variable(lookup_tensor))
print(result)


context_size = 2
data = []
#Find two words before, and two words after given word.
for i in range(2, len(words) - 2):
    context = [words[i - 2], words[i - 1],
               words[i + 1], words[i + 2]]
    target = words[i]
    data.append((context, target))


class CBOW(nn.Module):

    def __init__(self, context_size=2, embedding_size=100, vocab_size=None):
        super(CBOW, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_size)
        self.linear1 = nn.Linear(embedding_size, vocab_size)

    def forward(self, inputs):
        lookup_embeds = self.embeddings(inputs)
        embeds = lookup_embeds.sum(dim=0)
        out = self.linear1(embeds)
        out = F.log_softmax(out)
        return out

def make_context_vector(context, w2i):
    idxs = [w2i[w] for w in context]
    tensor = torch.LongTensor(idxs)
    return autograd.Variable(tensor)


def run():
    loss_func = nn.CrossEntropyLoss()
    net = CBOW(context_size, embedding_size=5, vocab_size=len(w2i))
    optimizer = optim.SGD(net.parameters(), lr=0.01)

    for epoch in range(100):
        total_loss = 0
        for context, target in data:
            context_var = make_context_vector(context, w2i)
            net.zero_grad()
            log_probs = net(context_var)

            loss = loss_func(log_probs, autograd.Variable(
                torch.LongTensor([w2i[target]])
            ))

            loss.backward()
            optimizer.step()

            total_loss += loss.data
        print(total_loss)

run()

Testing Embedding neural net class:
Variable containing:
-1.0952 -1.0703  0.6404  1.6199  0.5258
[torch.FloatTensor of size 1x5]



ValueError: Expected 2 or 4 dimensions (got 1)