In [1]:
import torch
from data import Vocabulary, get_dataloader
from models import NegSampleModel
from neg_sample_utils import train_model

torch.manual_seed(41)

<torch._C.Generator at 0x7fce048cd168>

## Hyperparameters

In [2]:
from torch.optim import Adam

BATCH_SIZE = 64
EMBEDDING_DIM = 200
EMBED_DROPOUT = 0.5
RNN_DROPOUT = 0.5
LR = 1e-3
EPOCHS = 20
OPTIM = Adam

## Load vocabulary

In [3]:
vocab = Vocabulary()
FILENAME = 'bobsue.prevsent.{}.tsv'

## Function to train a model with given dataloaders

In [4]:
def train(train_loader, val_loader, filename):
    model = NegSampleModel(
        vocab_size=len(vocab),
        embedding_dim=EMBEDDING_DIM,
        padding_idx=vocab.pad_idx,
        embed_dropout=EMBED_DROPOUT,
        rnn_dropout=RNN_DROPOUT
    )
    train_model(
        model=model,
        filename=filename,
        train_loader=train_loader,
        val_loader=val_loader,
        vocab_size=len(vocab),
        optim=OPTIM,
        lr=LR,
        epochs=EPOCHS
    )

# UNIF sampling

In [5]:
SAMPLE_POW = 0.
neg_counts = [20, 100, 500]
unif_loaders = {}
unif_file = 'unif-r{}.pt'

for neg_count in neg_counts:
    loaders = []
    loaders.append(get_dataloader(
        filename=FILENAME.format('train'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=neg_count,
        sample_pow=SAMPLE_POW,
    ))
    loaders.append(get_dataloader(
        filename=FILENAME.format('dev'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=neg_count,
        sample_pow=SAMPLE_POW,
    ))
    unif_loaders[neg_count] = loaders

### r = 20

In [6]:
r = 20
train_loader, val_loader = unif_loaders[r]
train(train_loader, val_loader, unif_file.format(r))

Epoch:  1
	 Wall Time:  5.474 s
	Train Loss:  1.114 | Train Acc:  2.89%
	 Wall Time:  0.580 s
	 Val. Loss:  0.999 |  Val. Acc:  1.70%
	Model parameters saved to unif-r20.pt
Epoch:  2
	 Wall Time:  5.467 s
	Train Loss:  0.990 | Train Acc:  3.79%
	 Wall Time:  0.550 s
	 Val. Loss:  0.971 |  Val. Acc:  5.70%
	Model parameters saved to unif-r20.pt
Epoch:  3
	 Wall Time:  5.445 s
	Train Loss:  0.965 | Train Acc:  4.98%
	 Wall Time:  0.584 s
	 Val. Loss:  0.953 |  Val. Acc:  6.97%
	Model parameters saved to unif-r20.pt
Epoch:  4
	 Wall Time:  5.421 s
	Train Loss:  0.938 | Train Acc:  5.89%
	 Wall Time:  0.566 s
	 Val. Loss:  0.933 |  Val. Acc:  7.27%
	Model parameters saved to unif-r20.pt
Epoch:  5
	 Wall Time:  5.673 s
	Train Loss:  0.914 | Train Acc:  6.58%
	 Wall Time:  0.603 s
	 Val. Loss:  0.897 |  Val. Acc:  7.84%
	Model parameters saved to unif-r20.pt
Epoch:  6
	 Wall Time:  5.539 s
	Train Loss:  0.894 | Train Acc:  7.43%
	 Wall Time:  0.579 s
	 Val. Loss:  0.889 |  Val. Acc:  8.14%
	

### r = 100

In [7]:
r = 100
train_loader, val_loader = unif_loaders[r]
train(train_loader, val_loader, unif_file.format(r))

Epoch:  1
	 Wall Time:  6.855 s
	Train Loss:  1.118 | Train Acc:  2.68%
	 Wall Time:  0.699 s
	 Val. Loss:  0.996 |  Val. Acc:  1.72%
	Model parameters saved to unif-r100.pt
Epoch:  2
	 Wall Time:  6.831 s
	Train Loss:  0.987 | Train Acc:  3.65%
	 Wall Time:  0.700 s
	 Val. Loss:  0.976 |  Val. Acc:  5.67%
	Model parameters saved to unif-r100.pt
Epoch:  3
	 Wall Time:  6.904 s
	Train Loss:  0.964 | Train Acc:  4.74%
	 Wall Time:  0.702 s
	 Val. Loss:  0.949 |  Val. Acc:  6.90%
	Model parameters saved to unif-r100.pt
Epoch:  4
	 Wall Time:  6.957 s
	Train Loss:  0.937 | Train Acc:  5.96%
	 Wall Time:  0.679 s
	 Val. Loss:  0.923 |  Val. Acc:  7.61%
	Model parameters saved to unif-r100.pt
Epoch:  5
	 Wall Time:  7.013 s
	Train Loss:  0.914 | Train Acc:  6.94%
	 Wall Time:  0.674 s
	 Val. Loss:  0.891 |  Val. Acc:  7.98%
	Model parameters saved to unif-r100.pt
Epoch:  6
	 Wall Time:  6.929 s
	Train Loss:  0.892 | Train Acc:  7.83%
	 Wall Time:  0.672 s
	 Val. Loss:  0.885 |  Val. Acc:  8.

### r = 500

In [8]:
r = 500
train_loader, val_loader = unif_loaders[r]
train(train_loader, val_loader, unif_file.format(r))

Epoch:  1
	 Wall Time:  13.484 s
	Train Loss:  1.117 | Train Acc:  2.67%
	 Wall Time:  1.252 s
	 Val. Loss:  0.994 |  Val. Acc:  1.72%
	Model parameters saved to unif-r500.pt
Epoch:  2
	 Wall Time:  13.084 s
	Train Loss:  0.988 | Train Acc:  3.58%
	 Wall Time:  1.157 s
	 Val. Loss:  0.970 |  Val. Acc:  6.99%
	Model parameters saved to unif-r500.pt
Epoch:  3
	 Wall Time:  13.424 s
	Train Loss:  0.961 | Train Acc:  4.83%
	 Wall Time:  1.222 s
	 Val. Loss:  0.944 |  Val. Acc:  6.76%
	Model parameters saved to unif-r500.pt
Epoch:  4
	 Wall Time:  13.160 s
	Train Loss:  0.934 | Train Acc:  6.02%
	 Wall Time:  1.160 s
	 Val. Loss:  0.916 |  Val. Acc:  7.63%
	Model parameters saved to unif-r500.pt
Epoch:  5
	 Wall Time:  13.206 s
	Train Loss:  0.910 | Train Acc:  7.00%
	 Wall Time:  1.148 s
	 Val. Loss:  0.898 |  Val. Acc:  7.91%
	Model parameters saved to unif-r500.pt
Epoch:  6
	 Wall Time:  13.382 s
	Train Loss:  0.890 | Train Acc:  8.20%
	 Wall Time:  1.196 s
	 Val. Loss:  0.878 |  Val. Ac

# UNIG-f sampling

In [9]:
NEG_COUNT = 20
sample_pows = [1, 2, 3, 4]
unig_loaders = {}
unig_file = 'unig-f{}.pt'

for sample_pow in sample_pows:
    loaders = []
    loaders.append(get_dataloader(
        filename=FILENAME.format('train'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=NEG_COUNT,
        sample_pow=sample_pow / 4,
    ))
    loaders.append(get_dataloader(
        filename=FILENAME.format('dev'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=NEG_COUNT,
        sample_pow=sample_pow / 4,
    ))
    unig_loaders[sample_pow] = loaders

### f = 0.25

In [10]:
f_4 = 1
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch:  1
	 Wall Time:  5.705 s
	Train Loss:  1.185 | Train Acc:  4.14%
	 Wall Time:  0.607 s
	 Val. Loss:  1.085 |  Val. Acc:  5.70%
	Model parameters saved to unig-f1.pt
Epoch:  2
	 Wall Time:  5.462 s
	Train Loss:  1.070 | Train Acc:  5.78%
	 Wall Time:  0.587 s
	 Val. Loss:  1.045 |  Val. Acc:  6.88%
	Model parameters saved to unig-f1.pt
Epoch:  3
	 Wall Time:  5.428 s
	Train Loss:  1.026 | Train Acc:  6.68%
	 Wall Time:  0.598 s
	 Val. Loss:  1.003 |  Val. Acc:  7.87%
	Model parameters saved to unig-f1.pt
Epoch:  4
	 Wall Time:  5.637 s
	Train Loss:  0.991 | Train Acc:  8.48%
	 Wall Time:  0.584 s
	 Val. Loss:  0.969 |  Val. Acc:  12.17%
	Model parameters saved to unig-f1.pt
Epoch:  5
	 Wall Time:  5.585 s
	Train Loss:  0.963 | Train Acc:  10.67%
	 Wall Time:  0.570 s
	 Val. Loss:  0.940 |  Val. Acc:  13.72%
	Model parameters saved to unig-f1.pt
Epoch:  6
	 Wall Time:  5.692 s
	Train Loss:  0.942 | Train Acc:  12.23%
	 Wall Time:  0.561 s
	 Val. Loss:  0.919 |  Val. Acc:  14.89%
	

### f = 0.5

In [11]:
f_4 = 2
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch:  1
	 Wall Time:  5.524 s
	Train Loss:  1.261 | Train Acc:  5.25%
	 Wall Time:  0.568 s
	 Val. Loss:  1.194 |  Val. Acc:  5.68%
	Model parameters saved to unig-f2.pt
Epoch:  2
	 Wall Time:  5.434 s
	Train Loss:  1.149 | Train Acc:  7.46%
	 Wall Time:  0.569 s
	 Val. Loss:  1.101 |  Val. Acc:  11.60%
	Model parameters saved to unig-f2.pt
Epoch:  3
	 Wall Time:  5.423 s
	Train Loss:  1.076 | Train Acc:  10.93%
	 Wall Time:  0.626 s
	 Val. Loss:  1.040 |  Val. Acc:  14.21%
	Model parameters saved to unig-f2.pt
Epoch:  4
	 Wall Time:  5.408 s
	Train Loss:  1.031 | Train Acc:  12.33%
	 Wall Time:  0.565 s
	 Val. Loss:  0.996 |  Val. Acc:  15.23%
	Model parameters saved to unig-f2.pt
Epoch:  5
	 Wall Time:  5.424 s
	Train Loss:  0.997 | Train Acc:  13.55%
	 Wall Time:  0.588 s
	 Val. Loss:  0.973 |  Val. Acc:  16.25%
	Model parameters saved to unig-f2.pt
Epoch:  6
	 Wall Time:  5.654 s
	Train Loss:  0.976 | Train Acc:  13.95%
	 Wall Time:  0.582 s
	 Val. Loss:  0.955 |  Val. Acc:  16.2

### f = 0.75

In [12]:
f_4 = 3
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch:  1
	 Wall Time:  5.358 s
	Train Loss:  1.312 | Train Acc:  6.06%
	 Wall Time:  0.584 s
	 Val. Loss:  1.257 |  Val. Acc:  7.09%
	Model parameters saved to unig-f3.pt
Epoch:  2
	 Wall Time:  5.432 s
	Train Loss:  1.210 | Train Acc:  11.54%
	 Wall Time:  0.573 s
	 Val. Loss:  1.148 |  Val. Acc:  14.49%
	Model parameters saved to unig-f3.pt
Epoch:  3
	 Wall Time:  5.384 s
	Train Loss:  1.123 | Train Acc:  13.47%
	 Wall Time:  0.560 s
	 Val. Loss:  1.067 |  Val. Acc:  15.36%
	Model parameters saved to unig-f3.pt
Epoch:  4
	 Wall Time:  5.409 s
	Train Loss:  1.057 | Train Acc:  13.47%
	 Wall Time:  0.562 s
	 Val. Loss:  1.009 |  Val. Acc:  16.60%
	Model parameters saved to unig-f3.pt
Epoch:  5
	 Wall Time:  5.410 s
	Train Loss:  1.015 | Train Acc:  13.67%
	 Wall Time:  0.574 s
	 Val. Loss:  0.980 |  Val. Acc:  16.93%
	Model parameters saved to unig-f3.pt
Epoch:  6
	 Wall Time:  5.410 s
	Train Loss:  0.991 | Train Acc:  13.57%
	 Wall Time:  0.563 s
	 Val. Loss:  0.954 |  Val. Acc:  16.

### f = 1.0

In [13]:
f_4 = 4
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch:  1
	 Wall Time:  5.308 s
	Train Loss:  1.321 | Train Acc:  7.38%
	 Wall Time:  0.578 s
	 Val. Loss:  1.225 |  Val. Acc:  14.08%
	Model parameters saved to unig-f4.pt
Epoch:  2
	 Wall Time:  5.378 s
	Train Loss:  1.180 | Train Acc:  10.84%
	 Wall Time:  0.586 s
	 Val. Loss:  1.122 |  Val. Acc:  13.73%
	Model parameters saved to unig-f4.pt
Epoch:  3
	 Wall Time:  5.367 s
	Train Loss:  1.110 | Train Acc:  10.71%
	 Wall Time:  0.573 s
	 Val. Loss:  1.069 |  Val. Acc:  13.62%
	Model parameters saved to unig-f4.pt
Epoch:  4
	 Wall Time:  5.656 s
	Train Loss:  1.063 | Train Acc:  10.59%
	 Wall Time:  0.569 s
	 Val. Loss:  1.021 |  Val. Acc:  14.44%
	Model parameters saved to unig-f4.pt
Epoch:  5
	 Wall Time:  5.582 s
	Train Loss:  1.017 | Train Acc:  11.20%
	 Wall Time:  0.624 s
	 Val. Loss:  0.979 |  Val. Acc:  14.85%
	Model parameters saved to unig-f4.pt
Epoch:  6
	 Wall Time:  5.560 s
	Train Loss:  0.987 | Train Acc:  10.83%
	 Wall Time:  0.562 s
	 Val. Loss:  0.952 |  Val. Acc:  15