In [None]:
# Train literal listener and speaker to use DNC representations:

In [1]:
import sys
sys.path.append("../color-evaluation/")

In [26]:
import torch
import torch.nn as nn
import dnc
from monroe_data import MonroeData, MonroeDataEntry, Color # last two for reading pkl file
import caption_featurizers
from color_featurizers import ColorFeaturizer, color_phi_fourier
from models import LiteralListener, LiteralSpeaker, CaptionEncoder, CaptionGenerator, PytorchModel, ColorEncoder, BeamNode
from experiment import FeatureHandler
import scipy.stats as stats
from evaluation import score_model, Score

import numpy as np
from queue import PriorityQueue

In [52]:
# load in DNC
class DNCEncoder(nn.Module):
    def __init__(self, vocab_size, embed_dim, color_dim):
        super(DNCEncoder, self).__init__()
        self.embed = nn.Embedding(vocab_size, embed_dim)
        self.rnn = dnc.DNC(
          input_size=embed_dim + color_dim,
          hidden_size=128,
          rnn_type='lstm',
          num_layers=4,
          nr_cells=100,
          cell_size=32,
          read_heads=4,
          batch_first=True,
          gpu_id=-1,
          debug=True
        )
        self.decoder = nn.Linear(embed_dim + color_dim, vocab_size) # don't predict over padding tag
        self.logsoftmax = nn.LogSoftmax(dim=1)
        
        self.vocab_size = vocab_size
        
    def forward(self, caption, color):
        embeds = self.embed(caption)
        color_reps = color.repeat(1, caption.shape[1], 1) # repeat color for number of tokens in captions
        # concatenate colors to caption
        inputs = torch.cat((embeds, color_reps), dim=2) # cat along the innermost dimension
        # dnc magic
        (controller_hidden, memory, read_vectors) = (None, None, None)
        output, (controller_hidden, memory, read_vectors), debug_memory = \
          self.rnn(inputs, (controller_hidden, memory, read_vectors), reset_experience=True)

        result = self.decoder(output)
        return self.logsoftmax(result)

In [114]:
class DNCSpeaker(PytorchModel):
    
    def predict(self, X, sample=1, beam_width=5):
        all_tokens = []
        self.model.eval()
        max_gen_len = 20
    
        self.model.eval()
        if not torch.cuda.is_available():
            torch.manual_seed(10) # for determinism
        else:
            torch.cuda.manual_seed_all(10)
            
        with torch.no_grad():
            for i, feature in enumerate(X):
                caption, colors = feature
                caption = torch.tensor(caption, dtype=torch.long)
                colors = torch.tensor(colors, dtype=torch.float)

    
                beam_nodes = PriorityQueue()
                ended_list = []
    
                tokens = caption[:, 0].view(-1, 1) # begin at start token
                print(tokens)
                start = BeamNode(0, tokens, False)
                beam_nodes.put(start)
    
                for i in range(max_gen_len + 1):
                    node = beam_nodes.get()
                    if node.ended:
                        ended_list.append(np.array(node.tokens[0].numpy()))
                        if len(ended_list) == sample:
                            break
                    else:
                        tokens = node.tokens
                        vocab_preds = self.model(tokens, colors)[:,-1:,:] # just distribution over last token
                        log_probs, prediction_indices = vocab_preds.topk(beam_width, dim=2)  # taking the topk predictions
                        for j in range(beam_width):
                            prediction_index = prediction_indices[:,-1,j:j+1] # a single prediction
                            log_prob = log_probs[0][0][j].item()
                            updated_tokens = tokens.clone()
                            updated_tokens = torch.cat((updated_tokens, prediction_index), dim=1)
                            updated_log_prob = node.log_prob + log_prob
                            ended = ((i == max_gen_len - 1) or (prediction_index.item() == caption[:, -1].item()))#.view(-1, 1)))
                            new_node = BeamNode(updated_log_prob, updated_tokens, ended)
                            beam_nodes.put(new_node)
                if sample == 1: # for backwards compatability
                    all_tokens.append(np.array(ended_list))
                else:
                    all_tokens.append(ended_list)
        return all_tokens
    
    def train_iter(self, caption_tensor, color_tensor, target_tensor, criterion):
        model_output = self.model(caption_tensor, color_tensor)
        
        #model_output = model_output[:, :-1, :].squeeze(0)
        model_output = model_output.view(-1, self.model.vocab_size)
        target_tensor = target_tensor.view(-1)
        loss = criterion(model_output, target_tensor)
        return loss

In [115]:
# get DNC
dnc_speaker = DNCSpeaker(DNCEncoder, num_epochs=5)
dnc_speaker.init_model(vocab_size = 368, embed_dim=100, color_dim=3) # vocab size just copied from dnc training notebook
# load model manually because we need to set "map_location" to cpu
dnc_speaker.model.load_state_dict(torch.load("model_checkpoint_4.params", map_location='cpu'))
#dnc_speaker.load_model("./model_checkpoint_4.params")

In [3]:
# get data
monroe_train_data = MonroeData("../color-evaluation/data/csv/train_corpus_monroe.csv", "../color-evaluation/data/entries/train_entries_monroe.pkl")
monroe_dev_data = MonroeData("../color-evaluation/data/csv/dev_corpus_monroe.csv", "../color-evaluation/data/entries/dev_entries_monroe.pkl")

In [29]:
# build dnc color featurizer:
def dnc_phi(color_list, space):
    if space != "hsv":
        print("Space must be hsv to use dnc")
        return None
    # we're going to do a greedy search for the tokens and then max-pool their embeddings
    features = [
        [np.array([[0, 1]]), np.array([color_list])]
    ]
    predicted_tokens = dnc_speaker.predict(features) # looks like [array([[  0, 123,   1]])]
    
    predicted_tokens = predicted_tokens[0].flatten() # looks like array([  0, 123,   1])
    
    predicted_tokens = predicted_tokens[1:-1] # get rid of nasty start and end tokens
    
    # get embeddings for predicted tokens
    print(predicted_tokens)
    embeds = dnc.model.embed(torch.tensor.fromNumpy(predicted_tokens))
    return embeds

In [40]:
test_features = [
        [np.array([[0, 1]]), np.array([[100.0, 30.0, 30.0]])]
    ]

In [41]:
test_features

[[array([[0, 1]]), array([[100.,  30.,  30.]])]]

In [116]:
test_predicted_tokens = dnc_speaker.predict([[np.array([[0, 1]]), np.array([[118.11320755,  66.52719665,  93.7254902 ]])]], beam_width=1)

tensor([[0]])


In [117]:
test_predicted_tokens

[array([[  0, 367,  32,  32,  32,  32,   4, 367,   4,  32,  32,  32, 118,
          32,  20,  32, 367,  32, 212,  32, 212]])]

In [60]:
np.array([[0, 1]])[:, -1]

array([1])

In [55]:
test_predicted_tokens

[array([], dtype=float64)]

In [39]:
np.array([[0, 1]]).shape

(1, 2)

In [30]:
embeds = dnc_phi([100.0, 30.0, 30.0], space="hsv")

RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 2. Got 2 and 1 in dimension 0 at /Users/administrator/nightlies/pytorch-1.0.0/wheel_build_dirs/wheel_3.6/pytorch/aten/src/TH/generic/THTensorMoreMath.cpp:1333

In [110]:
# OK because DNC isn't working, let's do this with the regular literal speaker/listener

print("Initializing featurizers")
caption_phi = caption_featurizers.CaptionFeaturizer(tokenizer=caption_featurizers.EndingTokenizer) # Use with parameter files that end in `endings_tkn` - using endings tokenizer to separate endings like "ish" and "er"
color_phi = ColorFeaturizer(color_phi_fourier, "rgb", normalized=True)

# Now we have a different target function, because we want to predict the target color directly
def target_color_target(data_entry):
    return np.array(data_entry.colors[0].rgb_norm)

feature_handler = FeatureHandler(monroe_train_data, monroe_dev_data, caption_phi, color_phi, target_fn=target_color_target,
                                randomized_colors=False)

print("Obtaining training features") # get features even if you're runnning the pretrained model for example
train_features = feature_handler.train_features()
train_targets = feature_handler.train_targets()



Initializing featurizers
Obtaining training features


In [111]:
assess_features = feature_handler.test_features()
assess_targets = feature_handler.test_targets()

In [112]:
assess_features[0]

[array([ 0, 96,  6]),
 array([[ 1.0000000e+00,  2.0711137e-01, -9.1420978e-01, -1.2241068e-01,
         -9.9631262e-01, -2.9028466e-01, -9.7003126e-01,  3.6807224e-02,
          9.8527765e-01, -2.0711137e-01, -1.0000000e+00, -2.0711137e-01,
         -9.4560730e-01,  1.2241068e-01,  9.9631262e-01,  4.3861625e-01,
          9.7003126e-01, -3.6807224e-02, -9.1420978e-01,  2.0711137e-01,
          1.0000000e+00,  5.1410276e-01,  9.4560730e-01, -1.2241068e-01,
          7.8834641e-01, -4.3861625e-01, -9.7003126e-01,  0.0000000e+00,
         -9.7831738e-01, -4.0524131e-01, -9.9247956e-01, -8.5797310e-02,
          9.5694035e-01,  2.4298018e-01,  9.9932235e-01,  1.7096189e-01,
         -9.7831738e-01, -1.2246469e-16,  9.7831738e-01,  3.2531029e-01,
          9.9247956e-01,  8.5797310e-02,  8.9867449e-01, -2.4298018e-01,
         -9.9932235e-01,  4.0524131e-01,  9.7831738e-01,  0.0000000e+00,
          8.5772860e-01, -3.2531029e-01, -9.9247956e-01, -6.1523157e-01,
         -8.9867449e-01,  2.4

In [None]:
for feature in assess_features:
    