In [1]:
import torch
from data import Vocabulary, get_dataloader
from models import NegSampleModel
from utils import train_model

torch.manual_seed(41)

<torch._C.Generator at 0x7f2b1027e168>

## Hyperparameters

In [2]:
from torch.optim import Adam

BATCH_SIZE = 64
EMBEDDING_DIM = 200
EMBED_DROPOUT = 0.5
RNN_DROPOUT = 0.5
LR = 1e-3
EPOCHS = 30
OPTIM = Adam

## Load vocabulary

In [3]:
vocab = Vocabulary()
FILENAME = 'bobsue.prevsent.{}.tsv'

## Function to train a model with given dataloaders

In [4]:
def train(train_loader, valid_loader, filename):
    model = NegSampleModel(
        vocab_size=len(vocab),
        embedding_dim=EMBEDDING_DIM,
        padding_idx=vocab.pad_idx,
        embed_dropout=EMBED_DROPOUT,
        rnn_dropout=RNN_DROPOUT
    )
    _ = train_model(
        model=model,
        filename=filename,
        train_loader=train_loader,
        valid_loader=valid_loader,
        optim=OPTIM,
        lr=LR,
        epochs=EPOCHS,
        vocab_size=len(vocab),
        pad_idx=vocab.pad_idx
    )

# UNIF sampling

In [5]:
SAMPLE_POW = 0.
neg_counts = [20, 100, 500]
unif_loaders = {}
unif_file = 'unif-r{}.pt'

for neg_count in neg_counts:
    loaders = []
    loaders.append(get_dataloader(
        filename=FILENAME.format('train'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=neg_count,
        sample_pow=SAMPLE_POW,
    ))
    loaders.append(get_dataloader(
        filename=FILENAME.format('dev'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=neg_count,
        sample_pow=SAMPLE_POW,
    ))
    unif_loaders[neg_count] = loaders

### r = 20

In [6]:
r = 20
train_loader, val_loader = unif_loaders[r]
train(train_loader, val_loader, unif_file.format(r))

Epoch: 01
	 Wall Time: 6.319 s
	Train Loss: 1.114 | Train Acc: 4.48%
	 Wall Time: 0.719 s
	Valid Loss: 0.999 | Valid Acc: 2.74%
	Model parameters saved to unif-r20.pt
Epoch: 02
	 Wall Time: 6.614 s
	Train Loss: 0.990 | Train Acc: 6.49%
	 Wall Time: 0.560 s
	Valid Loss: 0.971 | Valid Acc: 9.05%
	Model parameters saved to unif-r20.pt
Epoch: 03
	 Wall Time: 6.223 s
	Train Loss: 0.964 | Train Acc: 8.29%
	 Wall Time: 0.724 s
	Valid Loss: 0.950 | Valid Acc: 11.11%
	Model parameters saved to unif-r20.pt
Epoch: 04
	 Wall Time: 6.060 s
	Train Loss: 0.936 | Train Acc: 9.69%
	 Wall Time: 0.548 s
	Valid Loss: 0.931 | Valid Acc: 12.00%
	Model parameters saved to unif-r20.pt
Epoch: 05
	 Wall Time: 6.107 s
	Train Loss: 0.913 | Train Acc: 10.71%
	 Wall Time: 0.817 s
	Valid Loss: 0.897 | Valid Acc: 12.33%
	Model parameters saved to unif-r20.pt
Epoch: 06
	 Wall Time: 6.579 s
	Train Loss: 0.894 | Train Acc: 11.80%
	 Wall Time: 0.521 s
	Valid Loss: 0.889 | Valid Acc: 13.03%
	Model parameters saved to unif

### r = 100

In [7]:
r = 100
train_loader, val_loader = unif_loaders[r]
train(train_loader, val_loader, unif_file.format(r))

Epoch: 01
	 Wall Time: 8.064 s
	Train Loss: 1.116 | Train Acc: 4.48%
	 Wall Time: 0.897 s
	Valid Loss: 0.992 | Valid Acc: 2.74%
	Model parameters saved to unif-r100.pt
Epoch: 02
	 Wall Time: 8.178 s
	Train Loss: 0.987 | Train Acc: 5.62%
	 Wall Time: 0.714 s
	Valid Loss: 0.978 | Valid Acc: 9.05%
	Model parameters saved to unif-r100.pt
Epoch: 03
	 Wall Time: 8.118 s
	Train Loss: 0.962 | Train Acc: 7.51%
	 Wall Time: 0.882 s
	Valid Loss: 0.954 | Valid Acc: 11.01%
	Model parameters saved to unif-r100.pt
Epoch: 04
	 Wall Time: 7.899 s
	Train Loss: 0.935 | Train Acc: 9.63%
	 Wall Time: 0.810 s
	Valid Loss: 0.917 | Valid Acc: 12.15%
	Model parameters saved to unif-r100.pt
Epoch: 05
	 Wall Time: 8.142 s
	Train Loss: 0.910 | Train Acc: 11.20%
	 Wall Time: 0.959 s
	Valid Loss: 0.890 | Valid Acc: 12.76%
	Model parameters saved to unif-r100.pt
Epoch: 06
	 Wall Time: 7.863 s
	Train Loss: 0.891 | Train Acc: 13.17%
	 Wall Time: 0.819 s
	Valid Loss: 0.883 | Valid Acc: 17.38%
	Model parameters saved to

### r = 500

In [8]:
r = 500
train_loader, val_loader = unif_loaders[r]
train(train_loader, val_loader, unif_file.format(r))

Epoch: 01
	 Wall Time: 15.029 s
	Train Loss: 1.120 | Train Acc: 4.98%
	 Wall Time: 1.509 s
	Valid Loss: 0.995 | Valid Acc: 2.74%
	Model parameters saved to unif-r500.pt
Epoch: 02
	 Wall Time: 15.195 s
	Train Loss: 0.986 | Train Acc: 5.91%
	 Wall Time: 1.655 s
	Valid Loss: 0.973 | Valid Acc: 11.11%
	Model parameters saved to unif-r500.pt
Epoch: 03
	 Wall Time: 14.941 s
	Train Loss: 0.961 | Train Acc: 7.85%
	 Wall Time: 1.578 s
	Valid Loss: 0.938 | Valid Acc: 11.03%
	Model parameters saved to unif-r500.pt
Epoch: 04
	 Wall Time: 15.370 s
	Train Loss: 0.934 | Train Acc: 9.83%
	 Wall Time: 1.661 s
	Valid Loss: 0.913 | Valid Acc: 12.17%
	Model parameters saved to unif-r500.pt
Epoch: 05
	 Wall Time: 14.886 s
	Train Loss: 0.904 | Train Acc: 11.50%
	 Wall Time: 1.551 s
	Valid Loss: 0.891 | Valid Acc: 12.47%
	Model parameters saved to unif-r500.pt
Epoch: 06
	 Wall Time: 14.501 s
	Train Loss: 0.891 | Train Acc: 12.95%
	 Wall Time: 1.360 s
	Valid Loss: 0.881 | Valid Acc: 13.91%
	Model parameters s

# UNIG-f sampling

In [9]:
NEG_COUNT = 20
sample_pows = [1, 2, 3, 4]
unig_loaders = {}
unig_file = 'unig-f{}.pt'

for sample_pow in sample_pows:
    loaders = []
    loaders.append(get_dataloader(
        filename=FILENAME.format('train'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=NEG_COUNT,
        sample_pow=sample_pow / 4,
    ))
    loaders.append(get_dataloader(
        filename=FILENAME.format('dev'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=NEG_COUNT,
        sample_pow=sample_pow / 4,
    ))
    unig_loaders[sample_pow] = loaders

### f = 0.25

In [10]:
f_4 = 1
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch: 01
	 Wall Time: 6.313 s
	Train Loss: 1.188 | Train Acc: 7.26%
	 Wall Time: 0.686 s
	Valid Loss: 1.088 | Valid Acc: 11.11%
	Model parameters saved to unig-f1.pt
Epoch: 02
	 Wall Time: 6.438 s
	Train Loss: 1.067 | Train Acc: 9.72%
	 Wall Time: 0.600 s
	Valid Loss: 1.040 | Valid Acc: 11.03%
	Model parameters saved to unig-f1.pt
Epoch: 03
	 Wall Time: 7.194 s
	Train Loss: 1.029 | Train Acc: 10.54%
	 Wall Time: 0.774 s
	Valid Loss: 1.002 | Valid Acc: 12.35%
	Model parameters saved to unig-f1.pt
Epoch: 04
	 Wall Time: 6.336 s
	Train Loss: 0.991 | Train Acc: 13.41%
	 Wall Time: 0.579 s
	Valid Loss: 0.971 | Valid Acc: 17.33%
	Model parameters saved to unig-f1.pt
Epoch: 05
	 Wall Time: 6.176 s
	Train Loss: 0.964 | Train Acc: 16.92%
	 Wall Time: 0.736 s
	Valid Loss: 0.946 | Valid Acc: 21.75%
	Model parameters saved to unig-f1.pt
Epoch: 06
	 Wall Time: 6.840 s
	Train Loss: 0.939 | Train Acc: 19.51%
	 Wall Time: 0.974 s
	Valid Loss: 0.927 | Valid Acc: 23.65%
	Model parameters saved to unig-

### f = 0.5

In [11]:
f_4 = 2
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch: 01
	 Wall Time: 6.245 s
	Train Loss: 1.260 | Train Acc: 8.25%
	 Wall Time: 0.766 s
	Valid Loss: 1.192 | Valid Acc: 9.05%
	Model parameters saved to unig-f2.pt
Epoch: 02
	 Wall Time: 7.117 s
	Train Loss: 1.143 | Train Acc: 12.31%
	 Wall Time: 0.763 s
	Valid Loss: 1.091 | Valid Acc: 19.92%
	Model parameters saved to unig-f2.pt
Epoch: 03
	 Wall Time: 6.555 s
	Train Loss: 1.075 | Train Acc: 18.22%
	 Wall Time: 0.771 s
	Valid Loss: 1.042 | Valid Acc: 23.41%
	Model parameters saved to unig-f2.pt
Epoch: 04
	 Wall Time: 6.794 s
	Train Loss: 1.034 | Train Acc: 20.44%
	 Wall Time: 0.567 s
	Valid Loss: 1.009 | Valid Acc: 24.78%
	Model parameters saved to unig-f2.pt
Epoch: 05
	 Wall Time: 6.134 s
	Train Loss: 1.002 | Train Acc: 21.64%
	 Wall Time: 0.808 s
	Valid Loss: 0.980 | Valid Acc: 25.50%
	Model parameters saved to unig-f2.pt
Epoch: 06
	 Wall Time: 6.881 s
	Train Loss: 0.978 | Train Acc: 22.15%
	 Wall Time: 0.757 s
	Valid Loss: 0.956 | Valid Acc: 26.05%
	Model parameters saved to unig-

### f = 0.75

In [12]:
f_4 = 3
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch: 01
	 Wall Time: 6.735 s
	Train Loss: 1.311 | Train Acc: 9.57%
	 Wall Time: 0.779 s
	Valid Loss: 1.260 | Valid Acc: 10.96%
	Model parameters saved to unig-f3.pt
Epoch: 02
	 Wall Time: 6.113 s
	Train Loss: 1.206 | Train Acc: 19.13%
	 Wall Time: 0.715 s
	Valid Loss: 1.150 | Valid Acc: 23.06%
	Model parameters saved to unig-f3.pt
Epoch: 03
	 Wall Time: 6.215 s
	Train Loss: 1.122 | Train Acc: 21.04%
	 Wall Time: 0.601 s
	Valid Loss: 1.070 | Valid Acc: 25.14%
	Model parameters saved to unig-f3.pt
Epoch: 04
	 Wall Time: 5.899 s
	Train Loss: 1.060 | Train Acc: 21.69%
	 Wall Time: 0.622 s
	Valid Loss: 1.015 | Valid Acc: 26.38%
	Model parameters saved to unig-f3.pt
Epoch: 05
	 Wall Time: 6.193 s
	Train Loss: 1.018 | Train Acc: 21.19%
	 Wall Time: 0.756 s
	Valid Loss: 0.982 | Valid Acc: 27.21%
	Model parameters saved to unig-f3.pt
Epoch: 06
	 Wall Time: 6.391 s
	Train Loss: 0.991 | Train Acc: 21.64%
	 Wall Time: 0.663 s
	Valid Loss: 0.956 | Valid Acc: 27.31%
	Model parameters saved to unig

### f = 1.0

In [13]:
f_4 = 4
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch: 01
	 Wall Time: 6.426 s
	Train Loss: 1.325 | Train Acc: 11.35%
	 Wall Time: 0.519 s
	Valid Loss: 1.242 | Valid Acc: 21.85%
	Model parameters saved to unig-f4.pt
Epoch: 02
	 Wall Time: 5.042 s
	Train Loss: 1.185 | Train Acc: 17.48%
	 Wall Time: 0.555 s
	Valid Loss: 1.131 | Valid Acc: 20.49%
	Model parameters saved to unig-f4.pt
Epoch: 03
	 Wall Time: 5.306 s
	Train Loss: 1.111 | Train Acc: 16.87%
	 Wall Time: 0.702 s
	Valid Loss: 1.069 | Valid Acc: 22.89%
	Model parameters saved to unig-f4.pt
Epoch: 04
	 Wall Time: 5.657 s
	Train Loss: 1.061 | Train Acc: 16.87%
	 Wall Time: 0.676 s
	Valid Loss: 1.013 | Valid Acc: 21.64%
	Model parameters saved to unig-f4.pt
Epoch: 05
	 Wall Time: 6.292 s
	Train Loss: 1.021 | Train Acc: 17.51%
	 Wall Time: 0.666 s
	Valid Loss: 0.978 | Valid Acc: 24.61%
	Model parameters saved to unig-f4.pt
Epoch: 06
	 Wall Time: 5.866 s
	Train Loss: 0.992 | Train Acc: 17.70%
	 Wall Time: 0.760 s
	Valid Loss: 0.957 | Valid Acc: 24.75%
	Model parameters saved to uni