In [1]:
import torch
from data import Vocabulary, get_dataloader
from models import NegSampleModel
from neg_sample_utils import train_model

torch.manual_seed(41)

<torch._C.Generator at 0x7ff9686f9180>

## Hyperparameters

In [2]:
from torch.optim import Adam

BATCH_SIZE = 64
EMBEDDING_DIM = 200
EMBED_DROPOUT = 0.5
RNN_DROPOUT = 0.5
LR = 1e-3
EPOCHS = 20
OPTIM = Adam

## Load vocabulary

In [3]:
vocab = Vocabulary()
FILENAME = 'bobsue.prevsent.{}.tsv'

## Function to train a model with given dataloaders

In [4]:
def train(train_loader, val_loader, filename):
    model = NegSampleModel(
        vocab_size=len(vocab),
        embedding_dim=EMBEDDING_DIM,
        padding_idx=vocab.pad_idx,
        embed_dropout=EMBED_DROPOUT,
        rnn_dropout=RNN_DROPOUT
    )
    train_model(
        model=model,
        filename=filename,
        train_loader=train_loader,
        val_loader=val_loader,
        vocab_size=len(vocab),
        optim=OPTIM,
        lr=LR,
        epochs=EPOCHS
    )

# UNIF sampling

In [5]:
SAMPLE_POW = 0.
neg_counts = [20, 100, 500]
unif_loaders = {}
unif_file = 'unif-r{}.pt'

for neg_count in neg_counts:
    loaders = []
    loaders.append(get_dataloader(
        filename=FILENAME.format('train'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=neg_count,
        sample_pow=SAMPLE_POW,
    ))
    loaders.append(get_dataloader(
        filename=FILENAME.format('dev'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=neg_count,
        sample_pow=SAMPLE_POW,
    ))
    unif_loaders[neg_count] = loaders

### r = 20

In [6]:
r = 20
train_loader, val_loader = unif_loaders[r]
train(train_loader, val_loader, unif_file.format(r))

Epoch:  1




	Elapsed time: 0m  4.398s
	Train Loss:  1.114 | Train Acc:  2.84%
	Elapsed time: 0m  0.554s
	 Val. Loss:  0.999 |  Val. Acc:  1.70%
	Model parameters saved to unif-r20.pt
Epoch:  2
	Elapsed time: 0m  4.292s
	Train Loss:  0.989 | Train Acc:  4.09%
	Elapsed time: 0m  0.497s
	 Val. Loss:  0.971 |  Val. Acc:  5.70%
	Model parameters saved to unif-r20.pt
Epoch:  3
	Elapsed time: 0m  4.279s
	Train Loss:  0.964 | Train Acc:  5.15%
	Elapsed time: 0m  0.569s
	 Val. Loss:  0.951 |  Val. Acc:  6.97%
	Model parameters saved to unif-r20.pt
Epoch:  4
	Elapsed time: 0m  4.224s
	Train Loss:  0.936 | Train Acc:  6.09%
	Elapsed time: 0m  0.469s
	 Val. Loss:  0.932 |  Val. Acc:  7.39%
	Model parameters saved to unif-r20.pt
Epoch:  5
	Elapsed time: 0m  4.292s
	Train Loss:  0.914 | Train Acc:  6.74%
	Elapsed time: 0m  0.498s
	 Val. Loss:  0.897 |  Val. Acc:  7.85%
	Model parameters saved to unif-r20.pt
Epoch:  6
	Elapsed time: 0m  4.264s
	Train Loss:  0.895 | Train Acc:  7.38%
	Elapsed time: 0m  0.457s
	 V

### r = 100

In [7]:
r = 100
train_loader, val_loader = unif_loaders[r]
train(train_loader, val_loader, unif_file.format(r))

Epoch:  1
	Elapsed time: 0m  5.785s
	Train Loss:  1.118 | Train Acc:  2.70%
	Elapsed time: 0m  0.604s
	 Val. Loss:  0.996 |  Val. Acc:  1.72%
	Model parameters saved to unif-r100.pt
Epoch:  2
	Elapsed time: 0m  5.822s
	Train Loss:  0.987 | Train Acc:  3.26%
	Elapsed time: 0m  0.607s
	 Val. Loss:  0.975 |  Val. Acc:  5.67%
	Model parameters saved to unif-r100.pt
Epoch:  3
	Elapsed time: 0m  5.820s
	Train Loss:  0.963 | Train Acc:  5.06%
	Elapsed time: 0m  0.632s
	 Val. Loss:  0.947 |  Val. Acc:  6.84%
	Model parameters saved to unif-r100.pt
Epoch:  4
	Elapsed time: 0m  6.287s
	Train Loss:  0.936 | Train Acc:  5.95%
	Elapsed time: 0m  0.599s
	 Val. Loss:  0.922 |  Val. Acc:  7.61%
	Model parameters saved to unif-r100.pt
Epoch:  5
	Elapsed time: 0m  6.865s
	Train Loss:  0.914 | Train Acc:  6.90%
	Elapsed time: 0m  0.745s
	 Val. Loss:  0.892 |  Val. Acc:  7.97%
	Model parameters saved to unif-r100.pt
Epoch:  6
	Elapsed time: 0m  5.778s
	Train Loss:  0.892 | Train Acc:  7.88%
	Elapsed time:

### r = 500

In [8]:
r = 500
train_loader, val_loader = unif_loaders[r]
train(train_loader, val_loader, unif_file.format(r))

Epoch:  1
	Elapsed time: 0m  10.738s
	Train Loss:  1.118 | Train Acc:  2.72%
	Elapsed time: 0m  1.070s
	 Val. Loss:  0.994 |  Val. Acc:  1.72%
	Model parameters saved to unif-r500.pt
Epoch:  2
	Elapsed time: 0m  11.208s
	Train Loss:  0.988 | Train Acc:  3.45%
	Elapsed time: 0m  1.179s
	 Val. Loss:  0.970 |  Val. Acc:  6.99%
	Model parameters saved to unif-r500.pt
Epoch:  3
	Elapsed time: 0m  9.887s
	Train Loss:  0.960 | Train Acc:  4.93%
	Elapsed time: 0m  0.982s
	 Val. Loss:  0.943 |  Val. Acc:  6.80%
	Model parameters saved to unif-r500.pt
Epoch:  4
	Elapsed time: 0m  9.708s
	Train Loss:  0.934 | Train Acc:  6.02%
	Elapsed time: 0m  0.987s
	 Val. Loss:  0.916 |  Val. Acc:  7.62%
	Model parameters saved to unif-r500.pt
Epoch:  5
	Elapsed time: 0m  9.457s
	Train Loss:  0.910 | Train Acc:  6.98%
	Elapsed time: 0m  0.947s
	 Val. Loss:  0.897 |  Val. Acc:  7.96%
	Model parameters saved to unif-r500.pt
Epoch:  6
	Elapsed time: 0m  9.378s
	Train Loss:  0.888 | Train Acc:  7.95%
	Elapsed tim

# UNIG-f sampling

In [9]:
NEG_COUNT = 20
sample_pows = [1, 2, 3, 4]
unig_loaders = {}
unig_file = 'unig-f{}.pt'

for sample_pow in sample_pows:
    loaders = []
    loaders.append(get_dataloader(
        filename=FILENAME.format('train'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=NEG_COUNT,
        sample_pow=sample_pow / 4,
    ))
    loaders.append(get_dataloader(
        filename=FILENAME.format('dev'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=NEG_COUNT,
        sample_pow=sample_pow / 4,
    ))
    unig_loaders[sample_pow] = loaders

### f = 0.25

In [10]:
f_4 = 1
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch:  1
	Elapsed time: 0m  4.581s
	Train Loss:  1.185 | Train Acc:  4.39%
	Elapsed time: 0m  0.487s
	 Val. Loss:  1.085 |  Val. Acc:  5.70%
	Model parameters saved to unig-f1.pt
Epoch:  2
	Elapsed time: 0m  4.420s
	Train Loss:  1.070 | Train Acc:  5.73%
	Elapsed time: 0m  0.556s
	 Val. Loss:  1.045 |  Val. Acc:  6.88%
	Model parameters saved to unig-f1.pt
Epoch:  3
	Elapsed time: 0m  4.505s
	Train Loss:  1.026 | Train Acc:  6.64%
	Elapsed time: 0m  0.494s
	 Val. Loss:  1.003 |  Val. Acc:  7.77%
	Model parameters saved to unig-f1.pt
Epoch:  4
	Elapsed time: 0m  4.645s
	Train Loss:  0.991 | Train Acc:  8.74%
	Elapsed time: 0m  0.489s
	 Val. Loss:  0.969 |  Val. Acc:  12.25%
	Model parameters saved to unig-f1.pt
Epoch:  5
	Elapsed time: 0m  4.381s
	Train Loss:  0.963 | Train Acc:  10.92%
	Elapsed time: 0m  0.489s
	 Val. Loss:  0.939 |  Val. Acc:  13.93%
	Model parameters saved to unig-f1.pt
Epoch:  6
	Elapsed time: 0m  4.476s
	Train Loss:  0.939 | Train Acc:  12.46%
	Elapsed time: 0m  0

### f = 0.5

In [11]:
f_4 = 2
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch:  1
	Elapsed time: 0m  4.265s
	Train Loss:  1.261 | Train Acc:  5.26%
	Elapsed time: 0m  0.498s
	 Val. Loss:  1.194 |  Val. Acc:  5.68%
	Model parameters saved to unig-f2.pt
Epoch:  2
	Elapsed time: 0m  4.326s
	Train Loss:  1.150 | Train Acc:  7.18%
	Elapsed time: 0m  0.471s
	 Val. Loss:  1.104 |  Val. Acc:  9.85%
	Model parameters saved to unig-f2.pt
Epoch:  3
	Elapsed time: 0m  4.568s
	Train Loss:  1.079 | Train Acc:  10.71%
	Elapsed time: 0m  0.546s
	 Val. Loss:  1.045 |  Val. Acc:  14.05%
	Model parameters saved to unig-f2.pt
Epoch:  4
	Elapsed time: 0m  4.493s
	Train Loss:  1.035 | Train Acc:  12.30%
	Elapsed time: 0m  0.497s
	 Val. Loss:  1.000 |  Val. Acc:  14.84%
	Model parameters saved to unig-f2.pt
Epoch:  5
	Elapsed time: 0m  4.518s
	Train Loss:  1.000 | Train Acc:  13.29%
	Elapsed time: 0m  0.608s
	 Val. Loss:  0.975 |  Val. Acc:  16.05%
	Model parameters saved to unig-f2.pt
Epoch:  6
	Elapsed time: 0m  4.707s
	Train Loss:  0.979 | Train Acc:  13.60%
	Elapsed time: 0m

### f = 0.75

In [12]:
f_4 = 3
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch:  1
	Elapsed time: 0m  4.724s
	Train Loss:  1.313 | Train Acc:  5.96%
	Elapsed time: 0m  0.483s
	 Val. Loss:  1.260 |  Val. Acc:  6.96%
	Model parameters saved to unig-f3.pt
Epoch:  2
	Elapsed time: 0m  4.314s
	Train Loss:  1.212 | Train Acc:  11.68%
	Elapsed time: 0m  0.516s
	 Val. Loss:  1.149 |  Val. Acc:  14.48%
	Model parameters saved to unig-f3.pt
Epoch:  3
	Elapsed time: 0m  4.329s
	Train Loss:  1.124 | Train Acc:  13.36%
	Elapsed time: 0m  0.483s
	 Val. Loss:  1.068 |  Val. Acc:  15.20%
	Model parameters saved to unig-f3.pt
Epoch:  4
	Elapsed time: 0m  4.770s
	Train Loss:  1.059 | Train Acc:  13.41%
	Elapsed time: 0m  0.500s
	 Val. Loss:  1.010 |  Val. Acc:  16.21%
	Model parameters saved to unig-f3.pt
Epoch:  5
	Elapsed time: 0m  4.277s
	Train Loss:  1.017 | Train Acc:  13.39%
	Elapsed time: 0m  0.469s
	 Val. Loss:  0.981 |  Val. Acc:  16.94%
	Model parameters saved to unig-f3.pt
Epoch:  6
	Elapsed time: 0m  4.235s
	Train Loss:  0.993 | Train Acc:  13.52%
	Elapsed time: 

### f = 1.0

In [13]:
f_4 = 4
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch:  1
	Elapsed time: 0m  5.323s
	Train Loss:  1.323 | Train Acc:  7.19%
	Elapsed time: 0m  0.591s
	 Val. Loss:  1.234 |  Val. Acc:  14.03%
	Model parameters saved to unig-f4.pt
Epoch:  2
	Elapsed time: 0m  5.617s
	Train Loss:  1.183 | Train Acc:  11.17%
	Elapsed time: 0m  0.559s
	 Val. Loss:  1.124 |  Val. Acc:  13.33%
	Model parameters saved to unig-f4.pt
Epoch:  3
	Elapsed time: 0m  5.289s
	Train Loss:  1.111 | Train Acc:  10.81%
	Elapsed time: 0m  0.619s
	 Val. Loss:  1.072 |  Val. Acc:  13.73%
	Model parameters saved to unig-f4.pt
Epoch:  4
	Elapsed time: 0m  5.445s
	Train Loss:  1.066 | Train Acc:  10.65%
	Elapsed time: 0m  0.623s
	 Val. Loss:  1.025 |  Val. Acc:  14.74%
	Model parameters saved to unig-f4.pt
Epoch:  5
	Elapsed time: 0m  5.657s
	Train Loss:  1.021 | Train Acc:  10.90%
	Elapsed time: 0m  0.656s
	 Val. Loss:  0.983 |  Val. Acc:  14.65%
	Model parameters saved to unig-f4.pt
Epoch:  6
	Elapsed time: 0m  5.305s
	Train Loss:  0.990 | Train Acc:  10.61%
	Elapsed time: