In [204]:
from utils.colors import ColorsCorpusReader
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from utils.torch_color_describer import (
    ContextualColorDescriber, create_example_dataset
)
import utils.utils as utils
from utils.utils import UNK_SYMBOL
import matplotlib.pyplot as plt
import matplotlib.patches as mpatch
import numpy as np
from baseline.model import (
    BaselineTokenizer, BaselineColorEncoder,
    BaselineEmbedding, BaselineDescriber, GloVeEmbedding
)
from experiment.word_embeddings.helper import Embedding, EmbeddingType
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [205]:
COLORS_SRC_FILENAME = os.path.join(
    "data", "colors", "filteredCorpus.csv"
)
corpus = ColorsCorpusReader(
    COLORS_SRC_FILENAME,
    word_count=10,
    normalize_colors=True
)
examples = list(corpus.read())
examples = examples[:10]
len(examples)

10

## Bahdanau Attention

In [206]:
from utils.torch_color_describer import Encoder, Decoder, EncoderDecoder 
import torch
import torch.nn as nn

tokenizer = BaselineTokenizer()
color_encoder = BaselineColorEncoder()
embedding = BaselineEmbedding()

### 1. Producing the Encoder Hidden States

In [207]:
class Encoder_withAttention(Encoder):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        
    def forward(self, color_seqs):
        """
        Parameters
        ----------
        color_seqs : torch.FloatTensor
            The shape is `(m, n, p)` where `m` is the batch_size,
             `n` is the number of colors in each context, and `p` is
             the color dimensionality.
        Returns
        -------
        hidden : torch.FloatTensor
            These are the final hidden state of the RNN for this batch,
            shape `(m, p) where `m` is the batch_size and `p` is
             the color dimensionality.
        output : tensor containing the output features h_t from the last layer of the GRU 
        """
        
        output, hidden = self.rnn(color_seqs)
        print("color_seqs", color_seqs.shape, "hidden", hidden.shape
              , "output", output.shape)  # hidden.shape is (1, m, p)
        return output, hidden   # what is the output vs hidden?

## 2. Step 2 to 4

In [254]:
class Decoder_withAttention(Decoder):
    def __init__(self, color_dim, *args, **kwargs):
        self.color_dim = color_dim
        super().__init__(*args, **kwargs)
        
        self.fc_hidden = nn.Linear(self.hidden_dim, self.hidden_dim, bias=False)
        self.fc_encoder = nn.Linear(self.hidden_dim, self.hidden_dim, bias=False)
        self.weight = nn.Parameter(torch.FloatTensor(1, self.hidden_dim))


    def get_embeddings(self, word_seqs, target_colors=None):
        """
        You can assume that `target_colors` is a tensor of shape
        (m, n), where m is the length of the batch (same as
        `word_seqs.shape[0]`) and n is the dimensionality of the
        color representations the model is using. The goal is
        to attached each color vector i to each of the tokens in
        the ith sequence of (the embedded version of) `word_seqs`.
        """
        
        target_colors = torch.unsqueeze(target_colors, 1)
        target_colors_token = torch.repeat_interleave(target_colors, word_seqs.shape[1], dim=1)
        ret = torch.cat((self.embedding(word_seqs), target_colors_token), 2)
        return ret
    
    
    def forward(self, word_seqs, seq_lengths=None, hidden=None, target_colors=None, encoder_outputs=None):
        encoder_outputs = encoder_outputs.squeeze()   # (6, 3, 50)   
        embs = self.get_embeddings(word_seqs, target_colors=target_colors)
        print("Decoder encoder_output", encoder_outputs.shape, "word_seqs", word_seqs.shape)
        
        # Calculating Alignment Scores (final score (6, 3, 1)
        # hidden: 1x batch_size x color_dim   (1x6x50)
        # encoder_output: batch_size x num_colors x color_dim (6x3x50)
        print("hidden shape", hidden.shape, hidden[0].shape, encoder_outputs[0].shape)  
        print("linear layer1", self.fc_hidden(hidden[0]).shape)
        x = torch.tanh(self.fc_hidden(hidden[0])+self.fc_encoder(encoder_outputs[0]))

        
        
        if self.training:
            # Packed sequence for performance:
            embs = torch.nn.utils.rnn.pack_padded_sequence(
                embs,
                batch_first=True,
                lengths=seq_lengths,
                enforce_sorted=False)
            # RNN forward:
            print("before rnn", len(embs),len(embs[0]), len(embs[0][0]), hidden.shape)
            output, hidden = self.rnn(embs, hidden)
            print("rnn ok?")
            # Unpack:
            output, seq_lengths = torch.nn.utils.rnn.pad_packed_sequence(
                output, batch_first=True)
            # Output dense layer to get logits:
            output = self.output_layer(output)
            # Drop the final element:
            output = output[:, : -1, :]
            # Reshape for the sake of the loss function:
            output = output.transpose(1, 2)
            return output, hidden
        else:
            output, hidden = self.rnn(embs, hidden)
            output = self.output_layer(output)
            return output, hidden   

In [251]:
class ColorizedEncoderDecoder(EncoderDecoder):

    def forward(self,
            color_seqs,
            word_seqs,
            seq_lengths=None,
            hidden=None,
            targets=None):
        if hidden is None:
            encoder_output, encoder_hidden = self.encoder(color_seqs)

#         print("ED forward color seq", color_seqs.shape) 
#         print ("ED", encoder_output.shape, encoder_hidden.shape)
        
        output, hidden = self.decoder.forward(
            word_seqs, seq_lengths=seq_lengths, hidden=encoder_hidden, 
            target_colors=color_seqs[:,-1], encoder_outputs= encoder_output)

        if self.training:
            return output
        else:
            return output, hidden

In [252]:
class ColorizedInputDescriber(ContextualColorDescriber):

    def build_graph(self):
        encoder = Encoder_withAttention(
            color_dim=self.color_dim,
            hidden_dim=self.hidden_dim)

        decoder = Decoder_withAttention(
         vocab_size=self.vocab_size,
            embed_dim=self.embed_dim,
            embedding=self.embedding,
            hidden_dim=50,
            color_dim=self.color_dim
        )      

        encoder_decoder = ColorizedEncoderDecoder(
            encoder = encoder,
            decoder = decoder)
        
        return encoder_decoder

In [253]:
def create_data():    
    rawcols, texts = zip(*[[ex.colors, ex.contents] for ex in examples])

    raw_colors_train, raw_colors_test, texts_train, texts_test = \
        train_test_split(rawcols, texts)

    tokens_train = [tokenizer.encode(text) for text in texts_train]
    colors_train = [
        color_encoder.encode_color_context(colors) for colors in raw_colors_train
    ]

    vocab = sorted({word for tokens in tokens_train for word in tokens})
    vocab += [UNK_SYMBOL]

    return vocab, colors_train, tokens_train, raw_colors_test, texts_test

vocab, colors_train, tokens_train, raw_colors_test, texts_test = create_data()

glove_embedding, glove_vocab = embedding.create_embeddings(vocab)

baseline_model = ColorizedInputDescriber(
    vocab=glove_vocab,
    embedding=glove_embedding,
    early_stopping=True
)

Describler build dataset 6
ColorDataset _init_ 6
color_seqs torch.Size([6, 3, 54]) hidden torch.Size([1, 6, 50]) output torch.Size([6, 3, 50])
embs torch.Size([6, 12, 104])
Decoder encoder_output torch.Size([6, 3, 50]) word_seqs torch.Size([6, 12])
hidden shape torch.Size([1, 6, 50]) torch.Size([6, 50]) torch.Size([3, 50])
linear layer1 torch.Size([6, 50])


RuntimeError: The size of tensor a (6) must match the size of tensor b (3) at non-singleton dimension 0