In [None]:
# so we can use packages from parent directory
import sys
sys.path.append("..")

In [40]:
#import skorch
import torch
import torch.nn as nn

import numpy as np
from monroe_data import MonroeData, MonroeDataEntry, Color # last two for reading pkl file
import caption_featurizers
from color_featurizers import ColorFeaturizer, color_phi_fourier
from experiment import FeatureHandler#, evaluate_model
import evaluation
from models import CaptionEncoder, LiteralListener, PytorchModel

In [163]:
import importlib
importlib.reload(evaluation)
import evaluation

In [66]:
import models

In [9]:
net = skorch.NeuralNet(module=CaptionEncoder, 
                       module__
                       criterion=nn.NLLLoss, 
                       optimizer=torch.optim.Adam, 
                       lr = 0.004)

In [13]:
from sklearn.model_selection import GridSearchCV, ParameterGrid

In [12]:
ll = LiteralListener(CaptionEncoder)

In [19]:
gs = GridSearchCV(ll, param_grid, refit=False, cv=3)

In [16]:
param_grid = {"lr":[0.001, 0.004, 0.01], 'color_hidden_size':[50, 100, 150]}

In [18]:
list(ParameterGrid(param_grid))

[{'color_hidden_size': 50, 'lr': 0.001},
 {'color_hidden_size': 50, 'lr': 0.004},
 {'color_hidden_size': 50, 'lr': 0.01},
 {'color_hidden_size': 100, 'lr': 0.001},
 {'color_hidden_size': 100, 'lr': 0.004},
 {'color_hidden_size': 100, 'lr': 0.01},
 {'color_hidden_size': 150, 'lr': 0.001},
 {'color_hidden_size': 150, 'lr': 0.004},
 {'color_hidden_size': 150, 'lr': 0.01}]

In [21]:
import sklearn.linear_model.logistic as lr



In [22]:
e = lr.LogisticRegression()

In [23]:
e.get_params()

{'C': 1.0,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'max_iter': 100,
 'multi_class': 'warn',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': None,
 'solver': 'warn',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [102]:
default_args_map = {}

In [104]:
# parameter maps:

def init_default_arg_map(feature_handler):
    global default_args_map
    default_args_map = {
        # wrappers
        'PytorchModel': {'num_epochs' : 5,
                       'optimizer' : torch.optim.Adam,
                       'lr' : 0.004,
                       'criterion': torch.nn.CrossEntropyLoss
                      },
        # modules
        'CaptionEncoder': { 'embed_dim' : 100,
                          'hidden_dim' : 100,
                          'vocab_size' : feature_handler.caption_featurizer.caption_indexer.size,
                          'color_dim' : 54
                        }
        
    }

def fill_default_args(model, params):
    model_default_params = {}
    # add args that should be there and filter out args that shouldn't
    for superclass in model.mro(): # mro = "method resolution order - it lists all the superclasses in order
        default_params = default_args_map.get(superclass.__name__, {})
        model_default_params = dict(default_params, **model_default_params)
        
    # filter out params with keys not in default
    params = {k: params[k] for k in params.keys() & model_default_params.keys()}
    params = dict(model_default_params, **params)
    
    return params
#     for klass in default_args_map.keys():
#         if isinstance(model, klass):
#             default_params = default_arg_map[klass]
#             params = dict(default_params, **params) # extend dict
#     return params
            

In [106]:
def retrain_model(wrapper, model, train_X, train_y, parameters):
    wrapper_args = fill_default_args(wrapper, parameters)
    model_args = fill_default_args(model, parameters)
    print(wrapper_args, model_args)
    # create new model
    new_model = wrapper(model, **wrapper_args)
    new_model.init_model(**model_args)
    
    # train model
    new_model.fit(train_X, train_y)
    
    return new_model

In [145]:
def hyperparameter_search(search_parameters, wrapper, model, feature_handler, score_model_f):
    # ~initialization code~
    train_X, train_y, assess_X, assess_y = feature_handler # for now, for debugging
    
    best_model_score = 0
    best_model = None
    best_model_params = None
    tested_params = []

    print("Searching parameter space")
    for parameters in search_parameters:
        # retrain candidate model
        candidate_model = retrain_model(wrapper, model, train_X[:100], train_y[:100], parameters)
        # evaluate candidate model
        print("Evaluating Candidate Model:")
        candidate_model_score = score_model_f(candidate_model, assess_X, assess_y)
        print("Parameters: {}\tScore: {}".format(parameters, candidate_model_score))
        # store best model, score, parameters
        if candidate_model_score > best_model_score:
            best_model = candidate_model
            best_model_params = parameters
            best_model_score = candidate_model_score
            
        tested_params.append((parameters, candidate_model_score))
    
    return best_model, best_model_params, tested_params
        
    
        

In [160]:
def assess_model(model, assess_features, assess_targets, output_to_score):
    assess_model_outputs = model.predict(assess_features)
    assess_model_scores = [output_to_score(assess_model_outputs[i], assess_targets[i]) for i in range(len(assess_model_outputs))]
#     dev_data_synth_sm = copy.deepcopy(dev_data_synth)
   # dev_data_synth_sm.data = dev_data_synth_sm.data.head(len(assess_model_scores))
    #print(dev_data_synth_sm.data.shape)
    reg_results = evaluation.score_model(dev_data_synth, assess_model_scores, score=evaluation.Score.COMPOSITE)
    return reg_results[0] # just return the rho value


In [133]:
import copy

In [139]:
dev_data_synth.data.shape

(25850, 36)

In [134]:
# test on literal listener:
train_data = MonroeData("../data/csv/train_corpus_monroe.csv", "../data/entries/train_entries_monroe.pkl")
dev_data_synth = MonroeData("../data/csv/dev_corpus_synth_10fold.csv", "../data/entries/dev_corpus_synth_10fold.pkl")


In [34]:
caption_phi = caption_featurizers.CaptionFeaturizer(tokenizer=caption_featurizers.EndingTokenizer) # Use with parameter files that end in `endings_tkn` - using endings tokenizer to separate endings like "ish" and "er"
color_phi = ColorFeaturizer(color_phi_fourier, "rgb", normalized=True)
feature_handler = FeatureHandler(train_data, dev_data_synth, caption_phi, color_phi)

output_to_score_target = lambda model_output, target: np.exp(model_output[target]) # get the model's predicted probablity at each target index and use that as the score



In [103]:
init_default_arg_map(feature_handler)

In [88]:
test_params = {'color_dim':54}
fill_default_args(LiteralListener, test_params)

{'criterion': torch.nn.modules.loss.CrossEntropyLoss,
 'foobar': 4,
 'lr': 0.004,
 'num_epochs': 5,
 'optimizer': torch.optim.adam.Adam}

In [161]:
from functools import partial
literal_listener_assess_model = partial(assess_model, output_to_score=output_to_score_target)

In [96]:
literal_listener_assess_model

functools.partial(<function assess_model at 0x132c95c80>, output_to_score=<function <lambda> at 0x14a6a7620>)

In [None]:
fill_default_args(LiteralListener, test_params)

In [68]:
isinstance(models.LiteralListener, models.PytorchModel)

False

In [71]:
default_args_map[LiteralListener.mro()[0]]

KeyError: <class 'models.LiteralListener'>

In [91]:
param_grid = {"lr":[0.001, 0.004], 'color_hidden_size':[50, 100], 'num_epochs':[1]}
pg = list(ParameterGrid(param_grid))

In [92]:
pg

[{'color_hidden_size': 50, 'lr': 0.001, 'num_epochs': 1},
 {'color_hidden_size': 50, 'lr': 0.004, 'num_epochs': 1},
 {'color_hidden_size': 100, 'lr': 0.001, 'num_epochs': 1},
 {'color_hidden_size': 100, 'lr': 0.004, 'num_epochs': 1}]

In [107]:
print("Initializing data")
train_X = feature_handler.train_features()
train_y = feature_handler.train_targets()
assess_X = feature_handler.test_features()
assess_y = feature_handler.test_targets()

Initializing data


In [164]:
x = hyperparameter_search(pg, LiteralListener, CaptionEncoder, [train_X, train_y, assess_X, assess_y], literal_listener_assess_model)

Searching parameter space
{'num_epochs': 1, 'optimizer': <class 'torch.optim.adam.Adam'>, 'lr': 0.001, 'criterion': <class 'torch.nn.modules.loss.CrossEntropyLoss'>} {'embed_dim': 100, 'hidden_dim': 100, 'vocab_size': 974, 'color_dim': 54}
---EPOCH 0---
AFTER EPOCH 99 - AVERAGE VALIDATION LOSS: 1.679647059449926
Evaluating Candidate Model:
Parameters: {'color_hidden_size': 50, 'lr': 0.001, 'num_epochs': 1}	Score: -0.23228980693006493
{'num_epochs': 1, 'optimizer': <class 'torch.optim.adam.Adam'>, 'lr': 0.004, 'criterion': <class 'torch.nn.modules.loss.CrossEntropyLoss'>} {'embed_dim': 100, 'hidden_dim': 100, 'vocab_size': 974, 'color_dim': 54}
---EPOCH 0---
AFTER EPOCH 99 - AVERAGE VALIDATION LOSS: 1.8203433629265056
Evaluating Candidate Model:
Parameters: {'color_hidden_size': 50, 'lr': 0.004, 'num_epochs': 1}	Score: -0.11769071730060483
{'num_epochs': 1, 'optimizer': <class 'torch.optim.adam.Adam'>, 'lr': 0.001, 'criterion': <class 'torch.nn.modules.loss.CrossEntropyLoss'>} {'embed_d

In [165]:
x

(<models.LiteralListener at 0x14ef410b8>,
 {'color_hidden_size': 100, 'lr': 0.004, 'num_epochs': 1},
 [({'color_hidden_size': 50, 'lr': 0.001, 'num_epochs': 1},
   -0.23228980693006493),
  ({'color_hidden_size': 50, 'lr': 0.004, 'num_epochs': 1},
   -0.11769071730060483),
  ({'color_hidden_size': 100, 'lr': 0.001, 'num_epochs': 1},
   -0.03434776707752851),
  ({'color_hidden_size': 100, 'lr': 0.004, 'num_epochs': 1},
   0.15623398834939042)])

In [130]:
len([0.006511065487832984, 0.023409264572201046, 0.6525344613148397, 0.09153942832213081, 0.0019055160863379183, 0.002195713462865435, 0.1448486371158992, 0.007449071892645734, 0.06845598253083168, 0.49858460771894203, 0.05267798043761057, 0.29324368461662176, 0.9144715929980316, 0.1134261497010641, 0.17154708680082278, 0.005607378544412828, 7.812167852447264e-05, 0.06333848954337418, 0.9633538953296845, 0.011605059793079166, 0.8556798382579612, 0.18931032564737954, 0.1166264816611529, 0.5714283994675888, 0.25649961174125974, 0.41297865930497724, 0.7978049673014234, 0.3047420364644446, 0.8063982627374914, 0.32835220802086823, 0.2001534036175303, 0.05276737872809043, 0.08269850860288774, 0.002314080755694551, 0.4503065225107272, 0.1975493121434279, 0.25547086385817375, 0.9186077341337798, 0.6768080964413862, 0.0030063694388321686, 0.39580027519055905, 0.0023964067133358776, 0.1809097752651766, 0.044980412231280575, 0.11199719377032401, 0.5037079764207698, 0.0015147020215676664, 0.1665735468755542, 0.5762354043482538, 0.14418571292829307, 0.006411319066708746, 0.041272174078962186, 0.004344640528712232, 0.0201392888501235, 0.9953587805852087, 0.8299879082023517, 0.2468926126260226, 0.6466677614174884, 0.8242486119440646, 0.34690894766460656, 0.09823838280766233, 0.021839486407299106, 0.4769084038955761, 0.025082927191961132, 0.8002800267680534, 0.6547736151857817, 0.001833037777494593, 0.4148568419816999, 0.503092514085159, 0.33141704348114026, 0.4054090449127508, 0.005552567771134523, 0.5581252695012987, 0.20252008772154886, 0.007947802147097458, 0.954537964338813, 0.027183101706815125, 0.13083463179136667, 0.0009264151665071757, 0.03186110885427075, 0.23897450850269675, 0.972998867979999, 0.07901870529066095, 0.15202201475749666, 0.5528595062303509, 0.4674201920099895, 0.6071302121346576, 0.3124493471738203, 0.07134538991298571, 0.26224347756421307, 0.1495575507875468, 0.06697103773667995, 0.07494151370056636, 0.7321363155275477, 0.5891875218710468, 0.01618206455778802, 0.018867579293167583, 0.1903581828114284, 0.87716879901514, 0.7642459746172323])

100