In [1]:
import torch
from data import Vocabulary, get_dataloader
from models import NegSampleModel
from neg_sample_utils import train_model

torch.manual_seed(41)

<torch._C.Generator at 0x7f55c8653168>

## Hyperparameters

In [2]:
from torch.optim import Adam

BATCH_SIZE = 64
EMBEDDING_DIM = 200
EMBED_DROPOUT = 0.5
RNN_DROPOUT = 0.5
LR = 1e-3
EPOCHS = 20
OPTIM = Adam

## Load vocabulary

In [3]:
vocab = Vocabulary()
FILENAME = 'bobsue.prevsent.{}.tsv'

## Function to train a model with given dataloaders

In [4]:
def train(train_loader, val_loader, filename):
    model = NegSampleModel(
        vocab_size=len(vocab),
        embedding_dim=EMBEDDING_DIM,
        padding_idx=vocab.pad_idx,
        embed_dropout=EMBED_DROPOUT,
        rnn_dropout=RNN_DROPOUT
    )
    train_model(
        model=model,
        filename=filename,
        train_loader=train_loader,
        val_loader=val_loader,
        vocab_size=len(vocab),
        pad_idx=vocab.pad_idx,
        optim=OPTIM,
        lr=LR,
        epochs=EPOCHS
    )

# UNIF sampling

In [5]:
SAMPLE_POW = 0.
neg_counts = [20, 100, 500]
unif_loaders = {}
unif_file = 'unif-r{}.pt'

for neg_count in neg_counts:
    loaders = []
    loaders.append(get_dataloader(
        filename=FILENAME.format('train'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=neg_count,
        sample_pow=SAMPLE_POW,
    ))
    loaders.append(get_dataloader(
        filename=FILENAME.format('dev'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=neg_count,
        sample_pow=SAMPLE_POW,
    ))
    unif_loaders[neg_count] = loaders

### r = 20

In [6]:
r = 20
train_loader, val_loader = unif_loaders[r]
train(train_loader, val_loader, unif_file.format(r))

Epoch:  1
	 Wall Time:  4.642 s
	Train Loss:  1.114 | Train Acc:  4.50%
	 Wall Time:  0.461 s
	 Val. Loss:  0.999 |  Val. Acc:  2.74%
	Model parameters saved to unif-r20.pt
Epoch:  2
	 Wall Time:  4.400 s
	Train Loss:  0.989 | Train Acc:  6.53%
	 Wall Time:  0.507 s
	 Val. Loss:  0.971 |  Val. Acc:  9.05%
	Model parameters saved to unif-r20.pt
Epoch:  3
	 Wall Time:  4.427 s
	Train Loss:  0.964 | Train Acc:  8.22%
	 Wall Time:  0.464 s
	 Val. Loss:  0.951 |  Val. Acc:  11.11%
	Model parameters saved to unif-r20.pt
Epoch:  4
	 Wall Time:  4.339 s
	Train Loss:  0.936 | Train Acc:  9.66%
	 Wall Time:  0.468 s
	 Val. Loss:  0.932 |  Val. Acc:  12.03%
	Model parameters saved to unif-r20.pt
Epoch:  5
	 Wall Time:  4.460 s
	Train Loss:  0.914 | Train Acc:  10.73%
	 Wall Time:  0.511 s
	 Val. Loss:  0.897 |  Val. Acc:  12.35%
	Model parameters saved to unif-r20.pt
Epoch:  6
	 Wall Time:  4.344 s
	Train Loss:  0.895 | Train Acc:  11.72%
	 Wall Time:  0.482 s
	 Val. Loss:  0.889 |  Val. Acc:  13

### r = 100

In [7]:
r = 100
train_loader, val_loader = unif_loaders[r]
train(train_loader, val_loader, unif_file.format(r))

Epoch:  1
	 Wall Time:  5.385 s
	Train Loss:  1.118 | Train Acc:  4.33%
	 Wall Time:  0.557 s
	 Val. Loss:  0.996 |  Val. Acc:  2.74%
	Model parameters saved to unif-r100.pt
Epoch:  2
	 Wall Time:  5.413 s
	Train Loss:  0.987 | Train Acc:  5.17%
	 Wall Time:  0.545 s
	 Val. Loss:  0.975 |  Val. Acc:  9.05%
	Model parameters saved to unif-r100.pt
Epoch:  3
	 Wall Time:  5.148 s
	Train Loss:  0.963 | Train Acc:  8.06%
	 Wall Time:  0.550 s
	 Val. Loss:  0.947 |  Val. Acc:  10.92%
	Model parameters saved to unif-r100.pt
Epoch:  4
	 Wall Time:  5.297 s
	Train Loss:  0.936 | Train Acc:  9.53%
	 Wall Time:  0.545 s
	 Val. Loss:  0.922 |  Val. Acc:  12.22%
	Model parameters saved to unif-r100.pt
Epoch:  5
	 Wall Time:  5.405 s
	Train Loss:  0.914 | Train Acc:  11.07%
	 Wall Time:  0.560 s
	 Val. Loss:  0.892 |  Val. Acc:  12.49%
	Model parameters saved to unif-r100.pt
Epoch:  6
	 Wall Time:  5.217 s
	Train Loss:  0.892 | Train Acc:  12.52%
	 Wall Time:  0.554 s
	 Val. Loss:  0.884 |  Val. Acc

### r = 500

In [8]:
r = 500
train_loader, val_loader = unif_loaders[r]
train(train_loader, val_loader, unif_file.format(r))

Epoch:  1
	 Wall Time:  9.569 s
	Train Loss:  1.118 | Train Acc:  4.34%
	 Wall Time:  1.025 s
	 Val. Loss:  0.994 |  Val. Acc:  2.74%
	Model parameters saved to unif-r500.pt
Epoch:  2
	 Wall Time:  9.659 s
	Train Loss:  0.988 | Train Acc:  5.50%
	 Wall Time:  1.053 s
	 Val. Loss:  0.970 |  Val. Acc:  11.11%
	Model parameters saved to unif-r500.pt
Epoch:  3
	 Wall Time:  9.683 s
	Train Loss:  0.960 | Train Acc:  7.85%
	 Wall Time:  0.947 s
	 Val. Loss:  0.943 |  Val. Acc:  10.81%
	Model parameters saved to unif-r500.pt
Epoch:  4
	 Wall Time:  9.644 s
	Train Loss:  0.934 | Train Acc:  9.65%
	 Wall Time:  0.926 s
	 Val. Loss:  0.916 |  Val. Acc:  12.15%
	Model parameters saved to unif-r500.pt
Epoch:  5
	 Wall Time:  9.652 s
	Train Loss:  0.910 | Train Acc:  11.15%
	 Wall Time:  0.960 s
	 Val. Loss:  0.897 |  Val. Acc:  12.69%
	Model parameters saved to unif-r500.pt
Epoch:  6
	 Wall Time:  9.670 s
	Train Loss:  0.888 | Train Acc:  12.61%
	 Wall Time:  0.963 s
	 Val. Loss:  0.875 |  Val. Ac

# UNIG-f sampling

In [9]:
NEG_COUNT = 20
sample_pows = [1, 2, 3, 4]
unig_loaders = {}
unig_file = 'unig-f{}.pt'

for sample_pow in sample_pows:
    loaders = []
    loaders.append(get_dataloader(
        filename=FILENAME.format('train'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=NEG_COUNT,
        sample_pow=sample_pow / 4,
    ))
    loaders.append(get_dataloader(
        filename=FILENAME.format('dev'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=NEG_COUNT,
        sample_pow=sample_pow / 4,
    ))
    unig_loaders[sample_pow] = loaders

### f = 0.25

In [10]:
f_4 = 1
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch:  1
	 Wall Time:  4.450 s
	Train Loss:  1.185 | Train Acc:  7.05%
	 Wall Time:  0.475 s
	 Val. Loss:  1.085 |  Val. Acc:  9.05%
	Model parameters saved to unig-f1.pt
Epoch:  2
	 Wall Time:  4.387 s
	Train Loss:  1.070 | Train Acc:  9.17%
	 Wall Time:  0.462 s
	 Val. Loss:  1.045 |  Val. Acc:  11.03%
	Model parameters saved to unig-f1.pt
Epoch:  3
	 Wall Time:  4.345 s
	Train Loss:  1.026 | Train Acc:  10.53%
	 Wall Time:  0.470 s
	 Val. Loss:  1.003 |  Val. Acc:  12.40%
	Model parameters saved to unig-f1.pt
Epoch:  4
	 Wall Time:  4.490 s
	Train Loss:  0.991 | Train Acc:  13.97%
	 Wall Time:  0.456 s
	 Val. Loss:  0.969 |  Val. Acc:  19.48%
	Model parameters saved to unig-f1.pt
Epoch:  5
	 Wall Time:  4.199 s
	Train Loss:  0.963 | Train Acc:  17.40%
	 Wall Time:  0.565 s
	 Val. Loss:  0.939 |  Val. Acc:  22.13%
	Model parameters saved to unig-f1.pt
Epoch:  6
	 Wall Time:  4.458 s
	Train Loss:  0.939 | Train Acc:  19.89%
	 Wall Time:  0.473 s
	 Val. Loss:  0.918 |  Val. Acc:  23.9

### f = 0.5

In [11]:
f_4 = 2
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch:  1
	 Wall Time:  4.224 s
	Train Loss:  1.261 | Train Acc:  8.36%
	 Wall Time:  0.565 s
	 Val. Loss:  1.194 |  Val. Acc:  9.05%
	Model parameters saved to unig-f2.pt
Epoch:  2
	 Wall Time:  4.612 s
	Train Loss:  1.150 | Train Acc:  11.46%
	 Wall Time:  0.492 s
	 Val. Loss:  1.104 |  Val. Acc:  15.67%
	Model parameters saved to unig-f2.pt
Epoch:  3
	 Wall Time:  4.495 s
	Train Loss:  1.079 | Train Acc:  17.10%
	 Wall Time:  0.523 s
	 Val. Loss:  1.045 |  Val. Acc:  22.43%
	Model parameters saved to unig-f2.pt
Epoch:  4
	 Wall Time:  4.443 s
	Train Loss:  1.035 | Train Acc:  19.62%
	 Wall Time:  0.462 s
	 Val. Loss:  1.000 |  Val. Acc:  23.41%
	Model parameters saved to unig-f2.pt
Epoch:  5
	 Wall Time:  4.643 s
	Train Loss:  1.000 | Train Acc:  21.09%
	 Wall Time:  0.470 s
	 Val. Loss:  0.975 |  Val. Acc:  25.64%
	Model parameters saved to unig-f2.pt
Epoch:  6
	 Wall Time:  4.444 s
	Train Loss:  0.979 | Train Acc:  21.74%
	 Wall Time:  0.565 s
	 Val. Loss:  0.956 |  Val. Acc:  26.

### f = 0.75

In [12]:
f_4 = 3
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch:  1
	 Wall Time:  4.291 s
	Train Loss:  1.313 | Train Acc:  9.53%
	 Wall Time:  0.464 s
	 Val. Loss:  1.260 |  Val. Acc:  10.96%
	Model parameters saved to unig-f3.pt
Epoch:  2
	 Wall Time:  4.537 s
	Train Loss:  1.212 | Train Acc:  18.68%
	 Wall Time:  0.485 s
	 Val. Loss:  1.149 |  Val. Acc:  22.91%
	Model parameters saved to unig-f3.pt
Epoch:  3
	 Wall Time:  4.431 s
	Train Loss:  1.124 | Train Acc:  21.25%
	 Wall Time:  0.505 s
	 Val. Loss:  1.068 |  Val. Acc:  24.23%
	Model parameters saved to unig-f3.pt
Epoch:  4
	 Wall Time:  4.221 s
	Train Loss:  1.059 | Train Acc:  21.44%
	 Wall Time:  0.463 s
	 Val. Loss:  1.010 |  Val. Acc:  25.75%
	Model parameters saved to unig-f3.pt
Epoch:  5
	 Wall Time:  4.498 s
	Train Loss:  1.017 | Train Acc:  21.32%
	 Wall Time:  0.462 s
	 Val. Loss:  0.981 |  Val. Acc:  27.28%
	Model parameters saved to unig-f3.pt
Epoch:  6
	 Wall Time:  4.305 s
	Train Loss:  0.993 | Train Acc:  21.68%
	 Wall Time:  0.468 s
	 Val. Loss:  0.955 |  Val. Acc:  26

### f = 1.0

In [13]:
f_4 = 4
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch:  1
	 Wall Time:  4.416 s
	Train Loss:  1.323 | Train Acc:  11.45%
	 Wall Time:  0.532 s
	 Val. Loss:  1.234 |  Val. Acc:  22.12%
	Model parameters saved to unig-f4.pt
Epoch:  2
	 Wall Time:  4.381 s
	Train Loss:  1.183 | Train Acc:  17.83%
	 Wall Time:  0.467 s
	 Val. Loss:  1.124 |  Val. Acc:  21.10%
	Model parameters saved to unig-f4.pt
Epoch:  3
	 Wall Time:  4.320 s
	Train Loss:  1.111 | Train Acc:  17.26%
	 Wall Time:  0.465 s
	 Val. Loss:  1.072 |  Val. Acc:  22.03%
	Model parameters saved to unig-f4.pt
Epoch:  4
	 Wall Time:  4.370 s
	Train Loss:  1.066 | Train Acc:  17.07%
	 Wall Time:  0.460 s
	 Val. Loss:  1.025 |  Val. Acc:  23.69%
	Model parameters saved to unig-f4.pt
Epoch:  5
	 Wall Time:  4.340 s
	Train Loss:  1.021 | Train Acc:  17.24%
	 Wall Time:  0.493 s
	 Val. Loss:  0.983 |  Val. Acc:  23.53%
	Model parameters saved to unig-f4.pt
Epoch:  6
	 Wall Time:  4.359 s
	Train Loss:  0.990 | Train Acc:  16.93%
	 Wall Time:  0.472 s
	 Val. Loss:  0.955 |  Val. Acc:  2