## Import Library

In [66]:
%load_ext autoreload
%autoreload 2
from utils.colors import ColorsCorpusReader
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from utils.torch_color_describer import ContextualColorDescriber, create_example_dataset
import utils.utils
from utils.utils import UNK_SYMBOL, START_SYMBOL, END_SYMBOL
import matplotlib.pyplot as plt
import matplotlib.patches as mpatch
import numpy as np
from baseline.model import (
    BaselineTokenizer, BaselineColorEncoder,
    BaselineEmbedding, BaselineDescriber, GloVeEmbedding
)

from experiment.vision import ConvolutionalColorEncoder

from transformers import (
    BertTokenizer, BertModel,
    XLNetTokenizer, XLNetModel,
    RobertaTokenizer, RobertaModel,
    ElectraTokenizer, ElectraModel,    
)

import utils.model_utils as mu

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [67]:
# torch.cuda.current_device()
# torch.cuda.device(0)
# torch.cuda.device_count()
# torch.cuda.get_device_name()
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Dataset

In [68]:
utils.utils.fix_random_seeds()
COLORS_SRC_FILENAME = os.path.join(
    "data", "colors", "filteredCorpus.csv"
)
corpus = ColorsCorpusReader(
    COLORS_SRC_FILENAME,
    word_count=None,
    normalize_colors=True
)
examples = list(corpus.read())

In [69]:
len(examples)

46994

In [70]:
BAKE_OFF_COLORS_SRC_FILENAME = os.path.join(
    "data", "colors", "cs224u-colors-bakeoff-data.csv"
)
bake_off_corpus = ColorsCorpusReader(
    BAKE_OFF_COLORS_SRC_FILENAME,
    word_count=None,
    normalize_colors=True
)
bake_off_examples = list(bake_off_corpus.read())

### Full data

In [71]:
rawcols, texts = zip(*[[ex.colors, ex.contents] for ex in examples[:10]])
raw_colors_train, raw_colors_test, texts_train, texts_test = train_test_split(rawcols, texts)

def create_data(tokenizer, include_position=False,include_conv_embeddings=False):
    tokens_train = [
        mu.tokenize_colour_description(text, tokenizer) for text in texts_train
    ]  
    return tokens_train

In [72]:
color_encoder = ConvolutionalColorEncoder(True)
colors_train = [color_encoder.encode_color_context(colors) for colors in raw_colors_train]

Using cache found in /Users/yanjiang/.cache/torch/hub/pytorch_vision_v0.6.0


In [73]:
len(colors_train[0]), colors_train[0][0].shape

(3, torch.Size([1, 566]))

In [74]:
def create_bakeoff_data():    
    return zip(*[[ex.colors, ex.contents] for ex in bake_off_examples])

In [75]:
def evaluate(trained_model, tokenizer, color_seqs_test, texts_test):
    tok_seqs = [mu.tokenize_colour_description(text, tokenizer) for text in texts_test]
    col_seqs = [color_encoder.encode_color_context(colors) for colors in color_seqs_test]
    return trained_model.evaluate(col_seqs, tok_seqs)

## BERT

In [76]:
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
bert_model = BertModel.from_pretrained('bert-base-cased')

In [77]:
%time  tokens_train = create_data(bert_tokenizer)
b_raw_colors_test, b_texts_test = create_bakeoff_data()

CPU times: user 2.12 ms, sys: 845 µs, total: 2.97 ms
Wall time: 2.17 ms


In [78]:
%time bert_embeddings, bert_vocab = mu.extract_input_embeddings(texts_test, bert_model, bert_tokenizer)

CPU times: user 2.64 ms, sys: 1.03 ms, total: 3.67 ms
Wall time: 2.74 ms


In [79]:
%load_ext autoreload
%autoreload 2
from baseline.model import (
    BaselineTokenizer, BaselineColorEncoder,
    BaselineEmbedding, BaselineDescriber, GloVeEmbedding
)
from experiment.vision import ConvolutionalColorEncoder

bert_model = BaselineDescriber(
    vocab=bert_vocab,
    embedding=bert_embeddings,
    early_stopping=True
)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [80]:
%time _ = bert_model.fit(colors_train, tokens_train)

Stopping after epoch 18. Validation score did not improve by tol=1e-05 for more than 10 epochs. Final error is 1.3114489316940308

CPU times: user 309 ms, sys: 32.8 ms, total: 342 ms
Wall time: 419 ms


  Evaluate on test 

In [81]:
evaluate(bert_model, bert_tokenizer, raw_colors_test, texts_test)

{'listener_accuracy': 0.3333333333333333, 'corpus_bleu': 0.05000000000000001}

  Evaluate on bakeoff

In [83]:
# evaluate(bert_model, bert_tokenizer, b_raw_colors_test, b_texts_test)

## XLNET

In [84]:
xlnet_tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
xlnet_model = XLNetModel.from_pretrained('xlnet-base-cased')

In [85]:
tokens_train = create_data(xlnet_tokenizer)
b_raw_colors_test, b_texts_test = create_bakeoff_data()

In [86]:
%time xlnet_embeddings, xlnet_vocab = mu.extract_input_embeddings(texts_test, xlnet_model, xlnet_tokenizer)

CPU times: user 3.18 ms, sys: 1.23 ms, total: 4.4 ms
Wall time: 3.28 ms


In [96]:
xlnet_model = BaselineDescriber(
    vocab=xlnet_vocab,
    embedding=xlnet_embeddings,
    early_stopping=True
)

In [97]:
%time _ = xlnet_model.fit(colors_train, tokens_train)

Stopping after epoch 12. Validation score did not improve by tol=1e-05 for more than 10 epochs. Final error is 1.3800020217895508

CPU times: user 228 ms, sys: 20 ms, total: 248 ms
Wall time: 273 ms


In [98]:
evaluate(xlnet_model, xlnet_tokenizer, raw_colors_test, texts_test)

{'listener_accuracy': 0.3333333333333333, 'corpus_bleu': 0.36787944117144233}

  Evaluate on bakeoff

In [None]:
# evaluate(xlnet_model, xlnet_tokenizer, b_raw_colors_test, b_texts_test)

## Roberta

In [99]:
roberta_tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
roberta_model = RobertaModel.from_pretrained('roberta-base')

In [100]:
tokens_train = create_data(roberta_tokenizer)
b_raw_colors_test, b_texts_test = create_bakeoff_data()

In [101]:
%time roberta_embeddings, roberta_vocab = mu.extract_input_embeddings(texts_test, roberta_model, roberta_tokenizer)

CPU times: user 2.81 ms, sys: 797 µs, total: 3.61 ms
Wall time: 3.11 ms


In [103]:
roberta_model = BaselineDescriber(
    vocab=roberta_vocab,
    embedding=roberta_embeddings,
    early_stopping=True
)

In [104]:
%time _ = roberta_model.fit(colors_train, tokens_train)

Stopping after epoch 18. Validation score did not improve by tol=1e-05 for more than 10 epochs. Final error is 1.5666316747665405

CPU times: user 345 ms, sys: 32.8 ms, total: 378 ms
Wall time: 408 ms


In [105]:
evaluate(roberta_model, roberta_tokenizer, raw_colors_test, texts_test)

{'listener_accuracy': 0.0, 'corpus_bleu': 0.05000000000000001}

In [106]:
# evaluate(roberta_model, roberta_tokenizer, b_raw_colors_test, b_texts_test)