In [27]:
import sys
sys.path.append("..")

In [77]:
import torch
import torch.nn as nn
import numpy as np

from models import PytorchModel
from monroe_data import MonroeData, MonroeDataEntry, Color # last two for reading pkl file
import caption_featurizers
from color_featurizers import ColorFeaturizer, color_phi_fourier
from experiment import FeatureHandler, evaluate_model
import scipy.stats as stats
from evaluation import score_model, Score

In [62]:
class ColorSelector(nn.Module):
    def __init__(self, color_dim):
        super(ColorSelector, self).__init__()
        self.linear1 = nn.Linear(3*color_dim, color_dim)
        self.linear2 = nn.Linear(color_dim, 3)
        self.nll = nn.LogSoftmax(dim=2)
    
    def forward(self, colors):
        colors = colors.reshape(1, 1, -1)
        output = self.linear1(colors)
        output = nn.functional.relu(output)
        output = self.linear2(output)
        output = self.nll(output)
        return output

In [71]:
class ColorOnlyBaseline(PytorchModel):
    
    def train_iter(self, caption_tensor, color_tensor, target_tensor, criterion):
        model_output = self.model(color_tensor)
        loss = criterion(model_output.view(1, -1), target_tensor)
        return loss
        
    
    def predict(self, X):
        model_outputs = np.empty([len(X), 3])
        self.model.eval()
        with torch.no_grad():
            for i, feature in enumerate(X):
                caption, colors = feature
                color_tensor = torch.tensor([colors], dtype=torch.float)
                model_output = self.model(color_tensor)
                            
                model_output_np = model_output.view(-1).numpy()
                model_outputs[i] = model_output_np
        return model_outputs
                

In [46]:
baseline = ColorOnlyBaseline(ColorSelector)

In [34]:
# ok, let's train it
train_data = MonroeData("../data/csv/train_corpus_monroe.csv", "../data/entries/train_entries_monroe.pkl")
dev_data_synth = MonroeData("../data/csv/dev_corpus_synth_10fold.csv", "../data/entries/dev_corpus_synth_10fold.pkl")

In [36]:
caption_phi = caption_featurizers.CaptionFeaturizer(tokenizer=caption_featurizers.EndingTokenizer) # Use with parameter files that end in `endings_tkn` - using endings tokenizer to separate endings like "ish" and "er"
color_phi = ColorFeaturizer(color_phi_fourier, "rgb", normalized=True)
feature_handler = FeatureHandler(train_data, dev_data_synth, caption_phi, color_phi)
train_features = feature_handler.train_features()
train_targets = feature_handler.train_targets()



NameError: name 'baseline_model' is not defined

In [44]:
train_targets[:10]

array([0, 1, 0, 1, 1, 0, 2, 1, 2, 0])

In [74]:
# model params
baseline_model = ColorOnlyBaseline(ColorSelector, optimizer=torch.optim.Adam, lr=0.001, num_epochs=5)
baseline_model.init_model(color_dim=54)

In [67]:
baseline_model.fit(train_features, train_targets)

---EPOCH 0---
0m 0s (0:0 0.00%) 0.0010
0m 0s (0:1000 7.90%) 1.1131
0m 1s (0:2000 15.79%) 1.1006
0m 1s (0:3000 23.69%) 1.1002
0m 2s (0:4000 31.58%) 1.0988
0m 2s (0:5000 39.48%) 1.1051
0m 3s (0:6000 47.37%) 1.0953
0m 4s (0:7000 55.27%) 1.1004
0m 4s (0:8000 63.17%) 1.0900
0m 5s (0:9000 71.06%) 1.0962
0m 6s (0:10000 78.96%) 1.0962
0m 6s (0:11000 86.85%) 1.0990
0m 7s (0:12000 94.75%) 1.0970
AFTER EPOCH 2999 - AVERAGE VALIDATION LOSS: 1.0915622524023056
---EPOCH 1---
0m 8s (1:0 0.00%) 0.0009
0m 8s (1:1000 7.90%) 1.0915
0m 9s (1:2000 15.79%) 1.0838
0m 10s (1:3000 23.69%) 1.0874
0m 10s (1:4000 31.58%) 1.0793
0m 11s (1:5000 39.48%) 1.0865
0m 12s (1:6000 47.37%) 1.0810
0m 12s (1:7000 55.27%) 1.0807
0m 13s (1:8000 63.17%) 1.0773
0m 14s (1:9000 71.06%) 1.0804
0m 15s (1:10000 78.96%) 1.0806
0m 15s (1:11000 86.85%) 1.0774
0m 16s (1:12000 94.75%) 1.0771
AFTER EPOCH 2999 - AVERAGE VALIDATION LOSS: 1.0878375240663687
---EPOCH 2---
0m 17s (2:0 0.00%) 0.0006
0m 18s (2:1000 7.90%) 1.0811
0m 18s (2:2000 15

In [68]:
assess_features = feature_handler.test_features()
assess_targets = feature_handler.test_targets()



In [73]:
baseline_model.save_model("../model/baseline_model.params")

In [75]:
baseline_model.load_model("../model/baseline_model.params")

In [78]:
predictions = baseline_model.predict(assess_features)
output_to_score = lambda model_outputs, targets: np.exp(model_outputs[np.arange(len(model_outputs)), targets]) # get the model's predicted probablity at each target index and use that as the score
evaluate_model(dev_data_synth, feature_handler, baseline_model, output_to_score, score_model)


(-0.04931313036969144, 0.26304163420872834)
Accuracy: 0.38042553191489364


(-0.04931313036969144, 0.26304163420872834)

It does bad. Yay. Accuracy just above random

In [79]:
0.33 * 0.5 + 0.66 * 0.33

0.38280000000000003