In [1]:
import torch
from torch import nn
from torch.utils import data
from torch.cuda.amp import autocast, GradScaler

import numpy as np
from tqdm.notebook import tqdm

import sys
import datetime

sys.path.append('../code')
from dataset import get_data, MaskedDataset, make_vocab

from transformers import (
    AdamW, get_linear_schedule_with_warmup
)

from models import MaskedRDModel

In [2]:
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'bert-base-uncased')

Using cache found in /home/ubuntu/.cache/torch/hub/huggingface_pytorch-transformers_master


In [11]:
mask_size = 5
# model = torch.load('../trained_models/MaskedRDModel_Epoch_1_at_2021-05-05_06:45:01.309818')
# model = MaskedRDModel.from_pretrained('bert-base-uncased')
# model.initialize(mask_size)

In [10]:
d = get_data('../wantwords-english-baseline/data', word2vec=False)

Loading data...
Training data: 675715 word-def pairs
Dev data: 75873 word-def pairs
Test data: 1200 word-def pairs


In [12]:
train_data, train_data_def, dev_data, test_data_seen, \
    test_data_unseen, test_data_desc = d

In [13]:
target_matrix, target2idx, idx2target = make_vocab(d, tokenizer, mask_size=mask_size)

In [53]:
# target2idx maps target words to indices
# target_matrix maps target indices to bpe sequences, padded/truncated to mask_size
target2idx['book'], target_matrix[target2idx['book']], idx2target[target2idx['book']]

(16187, tensor([2338,  103,  103,  103,  103]), 'book')

In [14]:
train_dataset = MaskedDataset(train_data + train_data_def, tokenizer, target2idx, mask_size=mask_size)
dev_dataset = MaskedDataset(dev_data, tokenizer, target2idx, mask_size=mask_size)
test_dataset_seen = MaskedDataset(test_data_seen, tokenizer, target2idx, mask_size=mask_size)
test_dataset_unseen = MaskedDataset(test_data_unseen, tokenizer, target2idx, mask_size=mask_size)
test_dataset_desc = MaskedDataset(test_data_desc, tokenizer, target2idx, mask_size=mask_size)

In [17]:
batch_size = 55
num_workers = 4

loader_params = {
    'pin_memory': False,
    'batch_size': batch_size,
    'num_workers': num_workers,
    'collate_fn': dev_dataset.collate_fn
}

train_loader = data.DataLoader(train_dataset, **{'shuffle': True, **loader_params})
dev_loader = data.DataLoader(dev_dataset, **{'shuffle': True, **loader_params})
test_loader_seen = data.DataLoader(test_dataset_seen, **{'shuffle': False, **loader_params})
test_loader_unseen = data.DataLoader(test_dataset_unseen, **{'shuffle': False, **loader_params})
test_loader_desc = data.DataLoader(test_dataset_desc, **{'shuffle': False, **loader_params})

In [10]:
train_dataset[0], tokenizer.convert_ids_to_tokens(train_dataset[0][0])

((tensor([  101,   103,   103,   103,   103,   103,   102,  2000,  2713,  1037,
           2711,  2013,  7750, 11819,  2013,  6531,  1037,  7979,  4735,  2040,
           2001, 14933,  2098,  2011,  1996,  3099,   102]),
  0),
 ['[CLS]',
  '[MASK]',
  '[MASK]',
  '[MASK]',
  '[MASK]',
  '[MASK]',
  '[SEP]',
  'to',
  'release',
  'a',
  'person',
  'from',
  'punishment',
  'exempt',
  'from',
  'penalty',
  'a',
  'convicted',
  'criminal',
  'who',
  'was',
  'pardon',
  '##ed',
  'by',
  'the',
  'governor',
  '[SEP]'])

In [15]:
epochs = 10

lr = 2e-5
optim = AdamW(model.parameters(), lr=lr)

warmup_duration = 0.01 # portion of the first epoch spent on lr warmup
scheduler = get_linear_schedule_with_warmup(optim, num_warmup_steps=len(train_loader) * warmup_duration, 
                                            num_training_steps=len(train_loader) * epochs)

epoch = 0

# scaler = GradScaler()

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [12]:
import wandb

wandb.init(project='reverse-dictionary', entity='reverse-dict')

config = wandb.config
config.learning_rate = lr
config.epochs = epochs
config.batch_size = batch_size
config.optimizer = type(optim).__name__
config.scheduler = type(scheduler).__name__
config.warmup_duration = warmup_duration

wandb.watch(model)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mreverse-dict[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.10.29 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[<wandb.wandb_torch.TorchGraph at 0x7f0ea4382410>]

In [23]:
target_matrix = target_matrix.to(device)

In [18]:
model = model.to(device)

In [25]:
def evaluate(pred, gt, test=False):
    acc1 = acc10 = acc100 = 0
    n = len(pred)
    pred_rank = []
    for p, word in zip(pred, gt):
        if test:
            loc = (p == word).nonzero(as_tuple=True)
            if len(loc) != 0:
                pred_rank.append(min(loc[-1], 1000))
            else:
                pred_rank.append(1000)
        if word in p[:100]:
            acc100 += 1
            if word in p[:10]:
                acc10 += 1
                if word == p[0]:
                    acc1 += 1
    if test:
        pred_rank = torch.tensor(pred_rank, dtype=torch.float32)
        return (acc1, acc10, acc100, pred_rank)
    else:
        return acc1/n, acc10/n, acc100/n

In [16]:
inc = 10
losses = []

for epoch in range(epoch, epochs):
    # Training
    model.train()
    train_loss = 0.0
    # Train on subset of training data to save time
    with tqdm(total=len(train_loader)) as pbar:
        for i, (x, y) in enumerate(train_loader):
            if i % inc == 0 and i != 0:
                display_loss = train_loss / i
                pbar.set_description(f'Epoch {epoch+1}, Train Loss: {train_loss / i}')

            optim.zero_grad()

            x = x.to(device)
            attention_mask = (x != train_dataset.pad_id)
            y = y.to(device)
            
            loss, out = model(input_ids=x, attention_mask=attention_mask, 
                              target_matrix=target_matrix, ground_truth=y)

#             scaler.scale(loss).backward()
            loss.backward()
            
#             scaler.unscale_(optim)
            nn.utils.clip_grad_value_(model.parameters(), 5)
            
#             scaler.step(optim)
            optim.step()
#             scaler.update()
            
            train_loss += loss.detach()
            
            scheduler.step()
            
            pbar.update(1)
            
            del x, y, out, loss, attention_mask
            
    model_name = type(model).__name__
    filename = f'../trained_models/{model_name} Epoch {epoch+1} at {datetime.datetime.now()}'.replace(' ', '_')
    with open(filename, 'wb+') as f:
        torch.save(model, f)
    
    # Validation
    model.eval()
    val_loss = 0.0
    val_acc1, val_acc10, val_acc100 = 0.0, 0.0, 0.0
    with torch.no_grad():
        with tqdm(total=len(dev_loader)) as pbar:
            for i, (x, y) in enumerate(dev_loader):
                if i % inc == 0 and i != 0:
                    display_loss = val_loss / i
                    pbar.set_description(f'Epoch {epoch+1}, Val Loss: {val_loss / i}')

                x = x.to(device)
                attention_mask = (x != train_dataset.pad_id)
                y = y.to(device)

#                 with autocast():
                loss, out = model(input_ids=x, attention_mask=attention_mask, target_matrix=target_matrix,
                              criterion=criterion, ground_truth=y)

                val_loss += loss.detach()

                pbar.update(1)                
                
                result, indices = torch.topk(out, k=100, dim=-1, largest=True, sorted=True)
                
                acc1, acc10, acc100 = evaluate(indices, y)
                val_acc1 += acc1
                val_acc10 += acc10
                val_acc100 += acc100

                del x, y, out, loss
    
    wandb.log({
        'train_loss': train_loss / len(train_loader),
        'val_loss': val_loss / len(dev_loader),
        'val_acc1': val_acc1 / len(dev_loader),
        'val_acc10': val_acc10 / len(dev_loader),
        'val_acc100': val_acc100 / len(dev_loader)
    })
    

  0%|          | 0/12286 [00:00<?, ?it/s]

RuntimeError: CUDA out of memory. Tried to allocate 142.00 MiB (GPU 0; 14.76 GiB total capacity; 12.69 GiB already allocated; 45.75 MiB free; 13.45 GiB reserved in total by PyTorch)

In [33]:
'''
Epoch 1, Train Loss: 5.3207106590271: 100%|██████████| 14078/14078 [1:39:36<00:00,  2.36it/s]   
Epoch 1, Val Loss: 7.255414962768555: 100%|██████████| 1581/1581 [04:09<00:00,  6.34it/s] 
'''

In [22]:
tokenizer.convert_ids_to_tokens(train_dataset[0][0].numpy())

['[CLS]',
 '[MASK]',
 '[MASK]',
 '[MASK]',
 '[MASK]',
 '[MASK]',
 '[SEP]',
 'to',
 'release',
 'a',
 'person',
 'from',
 'punishment',
 'exempt',
 'from',
 'penalty',
 'a',
 'convicted',
 'criminal',
 'who',
 'was',
 'pardon',
 '##ed',
 'by',
 'the',
 'governor',
 '[SEP]']

In [38]:
def getPredFromDesc(model, desc : str, mask_size=5, top_n=10):
    desc = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(desc))
    cls_id, mask_id, sep_id, pad_id = dev_dataset.cls_id, dev_dataset.mask_id, dev_dataset.sep_id, dev_dataset.pad_id
    desc_ids = [cls_id] + [mask_id] * mask_size + [sep_id] + desc + [sep_id]
    x = torch.tensor(desc_ids).unsqueeze(0).to(device)
    attention_mask = (x != pad_id)
    out = model(input_ids=x, attention_mask=attention_mask, target_matrix=target_matrix)
    result, indices = torch.topk(out, k=top_n, dim=-1, largest=True, sorted=True)
    
    indices = indices[0]
    return [idx2target[i] for i in indices], indices
    

In [112]:
getPredFromDesc(model, 'an inhabitant of a cold country', 5, 100)

(['rustic',
  'countryman',
  'arctic',
  'northerner',
  'winters',
  'outflank',
  'deserter',
  'landsman',
  'country',
  'landscapist',
  'arcadian',
  'sylvan',
  'gypsy',
  'highlander',
  'quagmire',
  'borer',
  'northern',
  'greengrocer',
  'outlander',
  'vegan',
  'outlandish',
  'alpine',
  'spectrin',
  'trappist',
  'landside',
  'chiller',
  'southerner',
  'quarantined',
  'bushranger',
  'barbaric',
  'nordic',
  'lurcher',
  'barbarian',
  'countrywoman',
  'dendrite',
  'desert',
  'orientalist',
  'inglenook',
  'frozen',
  'continental',
  'inland',
  'mountainous',
  'borzoi',
  'churlish',
  'rustication',
  'merganser',
  'icepick',
  'denizen',
  'norther',
  'cold',
  'pastoralist',
  'midland',
  'pelagic',
  'lutenist',
  'bulgar',
  'bouffant',
  'winter',
  'philistine',
  'barranca',
  'barman',
  'outdoorsman',
  'denier',
  'bannister',
  'snowman',
  'polemicist',
  'campervan',
  'dendritic',
  'lubricant',
  'midwestern',
  'icer',
  'narcissist',


In [51]:
train_dataset[1000]

(tensor([  101,   103,   103,   103,   103,   103,   102,  2583,  1998,  5627,
          2000,  4553,  6570,  3085,  2402, 15608]),
 75)

In [52]:
train_data[1000]

{'word': 'teachable',
 'lexnames': ['adj.all'],
 'root_affix': ['able'],
 'sememes': ['willing', 'undergo', 'teach'],
 'definitions': 'able and willing to learn teachable youngsters'}

In [45]:
tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(dev_dataset[120][0]))

'[CLS] [MASK] [MASK] [MASK] [MASK] [MASK] [SEP] a whipping or flogging the discipline of the scourge'

In [19]:
def test(loader, name):
    inc = 3
    model.eval()
    test_loss = 0.0
    test_acc1 = test_acc10 = test_acc100 = test_rank_median = test_rank_variance = 0.0
    total_seen = 0
    all_pred = []
    with torch.no_grad():
        with tqdm(total=len(loader)) as pbar:
            for i, (x,y) in enumerate(loader):
                if i % inc == 0 and i != 0:
                    display_loss = test_loss / i
                    pbar.set_description(f'Test Loss: {display_loss}')

                x = x.to(device)
                attention_mask = (x != dev_dataset.pad_id)
                y = y.to(device)

#                 with autocast():
                loss, out = model(input_ids=x, attention_mask=attention_mask, 
                                  target_matrix=target_matrix, ground_truth=y)

                test_loss += loss.detach()

                pbar.update(1)

                result, indices = torch.sort(out, descending=True)
                
                b = len(x)
                acc1, acc10, acc100, pred_rank = evaluate(indices, y, test=True)
                test_acc1 += acc1
                test_acc10 += acc10
                test_acc100 += acc100
                total_seen += b
                all_pred.extend(pred_rank)
                
                del x, y, out, loss
                if i % 20 == 0:
                    torch.cuda.empty_cache()
    
    test_loss /= len(loader)
    test_acc1 /= total_seen
    test_acc10 /= total_seen
    test_acc100 /= total_seen
    all_pred = torch.tensor(all_pred)
    median = torch.median(all_pred)
    var = torch.var(all_pred)**0.5
    
    print(f'{name}_test_loss:', test_loss)
    print(f'{name}_test_acc1:', test_acc1)
    print(f'{name}_test_acc10:', test_acc10)
    print(f'{name}_test_acc100:', test_acc100)
    print(f'{name}_test_rank_median:', median)
    print(f'{name}_test_rank_variance', var)
    
    return ({
        f'{name}_test_loss': test_loss,
        f'{name}_test_acc1': test_acc1,
        f'{name}_test_acc10': test_acc10,
        f'{name}_test_acc100': test_acc100,
        f'{name}_test_rank_median': test_rank_median,
        f'{name}_test_rank_variance': test_rank_variance
    })
    

In [21]:
test(test_loader_seen, 'seen') # epoch 1

  0%|          | 0/10 [00:00<?, ?it/s]

seen_test_loss: tensor(5.4286, device='cuda:0')
seen_test_acc1: 0.238
seen_test_acc10: 0.456
seen_test_acc100: 0.674
seen_test_rank_median: tensor(13.)
seen_test_rank_variance tensor(375.9349)


{'seen_test_loss': tensor(5.4286, device='cuda:0'),
 'seen_test_acc1': 0.238,
 'seen_test_acc10': 0.456,
 'seen_test_acc100': 0.674,
 'seen_test_rank_median': 0.0,
 'seen_test_rank_variance': 0.0}

In [22]:
test(test_loader_unseen, 'unseen') # epoch 1

  0%|          | 0/10 [00:00<?, ?it/s]

unseen_test_loss: tensor(7.1437, device='cuda:0')
unseen_test_acc1: 0.114
unseen_test_acc10: 0.302
unseen_test_acc100: 0.5
unseen_test_rank_median: tensor(99.)
unseen_test_rank_variance tensor(435.7265)


{'unseen_test_loss': tensor(7.1437, device='cuda:0'),
 'unseen_test_acc1': 0.114,
 'unseen_test_acc10': 0.302,
 'unseen_test_acc100': 0.5,
 'unseen_test_rank_median': 0.0,
 'unseen_test_rank_variance': 0.0}

In [23]:
test(test_loader_desc, 'desc') # epoch 1

  0%|          | 0/4 [00:00<?, ?it/s]

desc_test_loss: tensor(2.8519, device='cuda:0')
desc_test_acc1: 0.46
desc_test_acc10: 0.82
desc_test_acc100: 0.95
desc_test_rank_median: tensor(1.)
desc_test_rank_variance tensor(137.4574)


{'desc_test_loss': tensor(2.8519, device='cuda:0'),
 'desc_test_acc1': 0.46,
 'desc_test_acc10': 0.82,
 'desc_test_acc100': 0.95,
 'desc_test_rank_median': 0.0,
 'desc_test_rank_variance': 0.0}

In [98]:
test(test_loader_seen, 'seen') # epoch 8

  0%|          | 0/10 [00:00<?, ?it/s]

seen_test_loss: tensor(2.1239, device='cuda:0')
seen_test_acc1: 0.61
seen_test_acc10: 0.884
seen_test_acc100: 0.926
seen_test_rank_median: tensor(0.)
seen_test_rank_variance tensor(227.7307)


{'seen_test_loss': tensor(2.1239, device='cuda:0'),
 'seen_test_acc1': 0.61,
 'seen_test_acc10': 0.884,
 'seen_test_acc100': 0.926,
 'seen_test_rank_median': 0.0,
 'seen_test_rank_variance': 0.0}

In [99]:
test(test_loader_unseen, 'unseen') # epoch 8

  0%|          | 0/10 [00:00<?, ?it/s]

unseen_test_loss: tensor(9.5594, device='cuda:0')
unseen_test_acc1: 0.078
unseen_test_acc10: 0.308
unseen_test_acc100: 0.528
unseen_test_rank_median: tensor(68.)
unseen_test_rank_variance tensor(434.0811)


{'unseen_test_loss': tensor(9.5594, device='cuda:0'),
 'unseen_test_acc1': 0.078,
 'unseen_test_acc10': 0.308,
 'unseen_test_acc100': 0.528,
 'unseen_test_rank_median': 0.0,
 'unseen_test_rank_variance': 0.0}

In [100]:
test(test_loader_desc, 'desc') # epoch 8

  0%|          | 0/4 [00:00<?, ?it/s]

desc_test_loss: tensor(2.8336, device='cuda:0')
desc_test_acc1: 0.42
desc_test_acc10: 0.75
desc_test_acc100: 0.935
desc_test_rank_median: tensor(1.)
desc_test_rank_variance tensor(97.9599)


{'desc_test_loss': tensor(2.8336, device='cuda:0'),
 'desc_test_acc1': 0.42,
 'desc_test_acc10': 0.75,
 'desc_test_acc100': 0.935,
 'desc_test_rank_median': 0.0,
 'desc_test_rank_variance': 0.0}

In [37]:
input_ids, labels = next(iter(train_loader))
input_ids

tensor([[101, 103, 103,  ...,   0,   0,   0],
        [101, 103, 103,  ...,   0,   0,   0],
        [101, 103, 103,  ...,   0,   0,   0],
        ...,
        [101, 103, 103,  ...,   0,   0,   0],
        [101, 103, 103,  ...,   0,   0,   0],
        [101, 103, 103,  ...,   0,   0,   0]])

In [56]:
sep_locations = torch.roll(input_ids == torch.tensor(102).expand_as(input_ids), shifts=1, dims=-1)
sep_locations[:,0] = 0 # last [SEP] will wrap to 0th position
token_type_ids = (torch.cumsum(sep_locations, dim=-1) > 0).long()

In [57]:
token_type_ids.dtype

torch.int64

In [58]:
token_type_ids[0]

tensor([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [36]:
input_ids[0]

tensor([  101,   103,   103,   103,   103,   103,   102,  6331, 20976,  2819,
          102,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0])

In [26]:
model_name = type(model).__name__
filename = f'../trained_models/{model_name} Epoch {epoch+1} at {datetime.datetime.now()}'.replace(' ', '_')
with open(filename, 'wb+') as f:
    torch.save(model, f)

In [29]:
filename

'../trained_models/MaskedRDModel_Epoch_1_at_2021-05-05_06:45:01.309818'

In [33]:
# Some code for printing with color, may use later
class Color:
    color = dict(
        purple = '\033[95m',
        cyan = '\033[96m',
        darkCyan = '\033[36m',
        blue = '\033[94m',
        green = '\033[92m',
        yellow = '\033[93m',
        red = '\033[91m'
    )
    bold = '\033[1m'
    underline = '\033[4m'
    end = '\033[0m'
    
    
def cprint(*args, **kwargs):
    color = kwargs.pop('color', None)
    color = Color.color.get(color, None)
    bold = kwargs.pop('bold', False)
    underline = kwargs.pop('underline', False)
    end = kwargs.pop('end', '\n')
    if bold:
        print(Color.bold, end='')
    if underline:
        print(Color.underline, end='')
    if color is not None:
        print(color, end='')    
    print(*args, end='', **kwargs)
    if color is not None:
        print(Color.end, end='')
    if bold:
        print(Color.end, end='')
    if underline:
        print(Color.end, end='')
    print(end=end)
    
def cstr(*args, color=None, bold=False, underline=False):
    base = ' '.join(args)
    display = []
    color = Color.color.get(color, False)
    if color:
        display.append(color)
    if underline:
        display.append(Color.underline)
    if bold:
        display.append(Color.bold)
    display.append(base)
    display.extend([Color.end] * (underline + bold + bool(color)))
    return ''.join(display)

In [42]:
model.mask_start = 1
model.mask_size = mask_size

In [41]:
model = torch.load('../trained_models/MaskedRDModel_Epoch_1_at_2021-04-26_03:46:31.644159')

In [6]:
model = model.to(device)

In [129]:
model.device

device(type='cuda', index=0)

In [43]:
model.xent_criterion = nn.CrossEntropyLoss()

In [34]:
test(test_loader_seen, 'seen') # MaskedRDModel_Epoch_7_at_2021-04-26_14:13:53.041391
test(test_loader_unseen, 'unseen')
test(test_loader_desc, 'desc')

  0%|          | 0/10 [00:00<?, ?it/s]

seen_test_loss: tensor(2.1239, device='cuda:0')
seen_test_acc1: 0.61
seen_test_acc10: 0.884
seen_test_acc100: 0.926
seen_test_rank_median: tensor(0.)
seen_test_rank_variance tensor(227.7307)


  0%|          | 0/10 [00:00<?, ?it/s]

unseen_test_loss: tensor(9.5594, device='cuda:0')
unseen_test_acc1: 0.078
unseen_test_acc10: 0.308
unseen_test_acc100: 0.528
unseen_test_rank_median: tensor(68.)
unseen_test_rank_variance tensor(434.0811)


  0%|          | 0/4 [00:00<?, ?it/s]

desc_test_loss: tensor(2.8336, device='cuda:0')
desc_test_acc1: 0.42
desc_test_acc10: 0.75
desc_test_acc100: 0.935
desc_test_rank_median: tensor(1.)
desc_test_rank_variance tensor(97.9599)


{'desc_test_loss': tensor(2.8336, device='cuda:0'),
 'desc_test_acc1': 0.42,
 'desc_test_acc10': 0.75,
 'desc_test_acc100': 0.935,
 'desc_test_rank_median': 0.0,
 'desc_test_rank_variance': 0.0}

In [44]:
test(test_loader_seen, 'seen') # 
test(test_loader_unseen, 'unseen')
test(test_loader_desc, 'desc')

  0%|          | 0/10 [00:00<?, ?it/s]

seen_test_loss: tensor(4.4673, device='cuda:0')
seen_test_acc1: 0.306
seen_test_acc10: 0.568
seen_test_acc100: 0.74
seen_test_rank_median: tensor(5.)
seen_test_rank_variance tensor(328.7286)


  0%|          | 0/10 [00:00<?, ?it/s]

unseen_test_loss: tensor(7.3299, device='cuda:0')
unseen_test_acc1: 0.092
unseen_test_acc10: 0.266
unseen_test_acc100: 0.494
unseen_test_rank_median: tensor(108.)
unseen_test_rank_variance tensor(432.9438)


  0%|          | 0/4 [00:00<?, ?it/s]

desc_test_loss: tensor(2.9275, device='cuda:0')
desc_test_acc1: 0.43
desc_test_acc10: 0.81
desc_test_acc100: 0.955
desc_test_rank_median: tensor(1.)
desc_test_rank_variance tensor(63.1195)


{'desc_test_loss': tensor(2.9275, device='cuda:0'),
 'desc_test_acc1': 0.43,
 'desc_test_acc10': 0.81,
 'desc_test_acc100': 0.955,
 'desc_test_rank_median': 0.0,
 'desc_test_rank_variance': 0.0}

In [6]:
train_words = {e['word'] for e in (train_data + train_data_def)}
dev_words = {e['word'] for e in dev_data}
test_unseen_words = {e['word'] for e in test_data_unseen}
test_seen_words = {e['word'] for e in test_data_seen}
test_desc_words = {e['word'] for e in test_data_desc}


In [23]:
train_words.intersection(dev_words)

{'aunt',
 'city',
 'elephant',
 'fight',
 'forget',
 'government',
 'green',
 'juice',
 'prepare',
 'prevent',
 'strawberry',
 'thanks',
 'wood'}

In [30]:
len(test_seen_words.intersection(dev_words))

57

In [17]:
len(train_data) + len(train_data_def)

675715

In [16]:
len(dev_data)

75873

In [41]:
total = set()
for w in [train_words, dev_words, test_unseen_words, test_seen_words, test_desc_words]:
    total = {*total, *w}

In [43]:
len(total)

50477

In [26]:
test(test_loader_seen, 'seen')
test(test_loader_unseen, 'unseen')
test(test_loader_desc, 'desc')

  0%|          | 0/10 [00:00<?, ?it/s]

seen_test_loss: tensor(3.8748, device='cuda:0')
seen_test_acc1: 0.242
seen_test_acc10: 0.69
seen_test_acc100: 0.882
seen_test_rank_median: tensor(3.)
seen_test_rank_variance tensor(246.8164)


  0%|          | 0/10 [00:00<?, ?it/s]

unseen_test_loss: tensor(6.0390, device='cuda:0')
unseen_test_acc1: 0.114
unseen_test_acc10: 0.366
unseen_test_acc100: 0.656
unseen_test_rank_median: tensor(23.)
unseen_test_rank_variance tensor(382.3108)


  0%|          | 0/4 [00:00<?, ?it/s]

desc_test_loss: tensor(3.8807, device='cuda:0')
desc_test_acc1: 0.275
desc_test_acc10: 0.685
desc_test_acc100: 0.89
desc_test_rank_median: tensor(2.)
desc_test_rank_variance tensor(152.8812)


{'desc_test_loss': tensor(3.8807, device='cuda:0'),
 'desc_test_acc1': 0.275,
 'desc_test_acc10': 0.685,
 'desc_test_acc100': 0.89,
 'desc_test_rank_median': 0.0,
 'desc_test_rank_variance': 0.0}

In [36]:
queries = [
    'a type of tree',
    'the opposite of being happy',
    'employee at a circus',
    'a road on which cars can go quickly without stopping',
    'a very intelligent person',
    'a very smart person',
    'something you use to measure your temperature',
    'a dark time of day',
    'medieval social hierarchy where peasants and vassals served lords',
    'to help someone else learn',
    'when someone you trust does something that breaks your trust',
    'deep learning'
]

In [45]:
from pprint import pprint

for q in queries:
    print(f'Results for {q}')
    pprint(getPredFromDesc(model, q, top_n=100))
    print()

Results for a type of tree
(['chestnut',
  'spruce',
  'pinewood',
  'teakwood',
  'linden',
  'redwood',
  'oak',
  'maple',
  'logwood',
  'fir',
  'hornbeam',
  'mangrove',
  'lime',
  'cedarwood',
  'hardwood',
  'evergreen',
  'mahogany',
  'oakum',
  'tree',
  'applewood',
  'pollard',
  'birchbark',
  'nopal',
  'cypress',
  'boxwood',
  'plane',
  'almond',
  'limn',
  'plum',
  'loquat',
  'pear',
  'ashlar',
  'aspen',
  'fig',
  'elderberry',
  'dogwood',
  'olive',
  'poplar',
  'hawthorn',
  'barking',
  'stocked',
  'sandalwood',
  'eucalyptus',
  'firkin',
  'rowan',
  'pine',
  'rosewood',
  'treed',
  'tea',
  'limes',
  'medlar',
  'mango',
  'chinquapin',
  'oaken',
  'plumb',
  'nome',
  'pineal',
  'stocks',
  'logarithm',
  'hickory',
  'satinwood',
  'plumber',
  'aliquot',
  'balsa',
  'ebony',
  'oakleaf',
  'cordwood',
  'teat',
  'logjam',
  'birch',
  'basil',
  'cork',
  'ssh',
  'pinecone',
  'hazelnut',
  'nock',
  'ssp',
  'nutmeg',
  'woodcut',
  'log',

  'refractor',
  'toaster',
  'measurer',
  'bronchoscope',
  'conditioner',
  'measure',
  'reflectometer',
  'stovepiping',
  'kisser',
  'indicator',
  'sundowner',
  'certiorari',
  'baths',
  'prescriber',
  'hydrometer',
  'sun'],
 tensor([21694,  8653, 43736, 48277, 25497, 43191, 21219, 33404, 11401, 23558,
        36418, 35931, 12915, 39099,  8840,  8385, 39573, 11440, 35187, 27127,
        12871, 23728, 28154, 15343,  4497, 27122,  9253, 22133, 23868, 33803,
        24097, 41980,  6213,  9326, 35758, 44024, 32228, 16616, 16406, 45168,
        12573, 15141, 15495,  2795, 24506, 17738, 33029, 16321, 24800, 37815,
        11812, 32769, 26743, 22997,  7510, 13156, 31285,  1979, 24940, 44376,
        40195, 10077,  2775, 43405, 32063,  3168, 28786, 39450, 12720, 26158,
        46524, 41527, 16928, 39157, 14266, 15161, 40759, 48130, 16710, 32286,
         3187, 47240, 10929, 32534, 24713,   950, 39895, 28819, 40261,  4759,
        49732, 11039,   301, 34296, 32030, 29867,   784, 101