In [1]:
import torch
from data import Vocabulary, get_dataloader
from models import NegSampleModel
from utils import count_params
from neg_sample_utils import train_model, evaluate

torch.manual_seed(41)

<torch._C.Generator at 0x7fa248710168>

## Hyperparameters

In [2]:
from torch.optim import Adam

BATCH_SIZE = 64
EMBEDDING_DIM = 200
EMBED_DROPOUT = 0.5
RNN_DROPOUT = 0.5
LR = 1e-3
EPOCHS = 20
OPTIM = Adam

## Load vocabulary

In [3]:
vocab = Vocabulary()
FILENAME = 'bobsue.prevsent.{}.tsv'

## Function to train a model with given dataloaders

In [4]:
def train(train_loader, val_loader, filename):
    model = NegSampleModel(
        vocab_size=len(vocab),
        embedding_dim=EMBEDDING_DIM,
        padding_idx=vocab.pad_idx,
        embed_dropout=EMBED_DROPOUT,
        rnn_dropout=RNN_DROPOUT
    )
    train_model(
        model=model,
        filename=filename,
        train_loader=train_loader,
        val_loader=val_loader,
        vocab_size=len(vocab),
        optim=OPTIM,
        lr=LR,
        epochs=EPOCHS
    )

# UNIF sampling

In [5]:
SAMPLE_POW = 0.
neg_counts = [20, 100, 500]
unif_loaders = {}
unif_file = 'unif-r{}.pt'

for neg_count in neg_counts:
    loaders = []
    loaders.append(get_dataloader(
        filename=FILENAME.format('train'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=neg_count,
        sample_pow=SAMPLE_POW,
    ))
    loaders.append(get_dataloader(
        filename=FILENAME.format('dev'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=neg_count,
        sample_pow=SAMPLE_POW,
    ))
    unif_loaders[neg_count] = loaders

### r = 20

In [6]:
r = 20
train_loader, val_loader = unif_loaders[r]
train(train_loader, val_loader, unif_file.format(r))

Epoch:  1




	Train time: 0m 4.318s
	Train Loss:  1.114 | Train Acc:  2.84%
	 Val. time: 0m 0.489s
	 Val. Loss:  0.999 |  Val. Acc:  1.70%
	Model parameters saved to unif-r20.pt
Epoch:  2
	Train time: 0m 4.243s
	Train Loss:  0.989 | Train Acc:  4.09%
	 Val. time: 0m 0.49s
	 Val. Loss:  0.971 |  Val. Acc:  5.70%
	Model parameters saved to unif-r20.pt
Epoch:  3
	Train time: 0m 4.302s
	Train Loss:  0.964 | Train Acc:  5.15%
	 Val. time: 0m 0.464s
	 Val. Loss:  0.951 |  Val. Acc:  6.97%
	Model parameters saved to unif-r20.pt
Epoch:  4
	Train time: 0m 4.241s
	Train Loss:  0.936 | Train Acc:  6.09%
	 Val. time: 0m 0.472s
	 Val. Loss:  0.932 |  Val. Acc:  7.39%
	Model parameters saved to unif-r20.pt
Epoch:  5
	Train time: 0m 4.31s
	Train Loss:  0.914 | Train Acc:  6.74%
	 Val. time: 0m 0.474s
	 Val. Loss:  0.897 |  Val. Acc:  7.85%
	Model parameters saved to unif-r20.pt
Epoch:  6
	Train time: 0m 4.484s
	Train Loss:  0.895 | Train Acc:  7.38%
	 Val. time: 0m 0.459s
	 Val. Loss:  0.889 |  Val. Acc:  8.20%
	

### r = 100

In [7]:
r = 100
train_loader, val_loader = unif_loaders[r]
train(train_loader, val_loader, unif_file.format(r))

Epoch:  1
	Train time: 0m 5.269s
	Train Loss:  1.118 | Train Acc:  2.70%
	 Val. time: 0m 0.538s
	 Val. Loss:  0.996 |  Val. Acc:  1.72%
	Model parameters saved to unif-r100.pt
Epoch:  2
	Train time: 0m 5.141s
	Train Loss:  0.987 | Train Acc:  3.26%
	 Val. time: 0m 0.552s
	 Val. Loss:  0.975 |  Val. Acc:  5.67%
	Model parameters saved to unif-r100.pt
Epoch:  3
	Train time: 0m 5.258s
	Train Loss:  0.963 | Train Acc:  5.06%
	 Val. time: 0m 0.554s
	 Val. Loss:  0.947 |  Val. Acc:  6.84%
	Model parameters saved to unif-r100.pt
Epoch:  4
	Train time: 0m 5.307s
	Train Loss:  0.936 | Train Acc:  5.95%
	 Val. time: 0m 0.549s
	 Val. Loss:  0.922 |  Val. Acc:  7.61%
	Model parameters saved to unif-r100.pt
Epoch:  5
	Train time: 0m 5.107s
	Train Loss:  0.914 | Train Acc:  6.90%
	 Val. time: 0m 0.567s
	 Val. Loss:  0.892 |  Val. Acc:  7.97%
	Model parameters saved to unif-r100.pt
Epoch:  6
	Train time: 0m 5.367s
	Train Loss:  0.892 | Train Acc:  7.88%
	 Val. time: 0m 0.556s
	 Val. Loss:  0.884 |  V

### r = 500

In [8]:
r = 500
train_loader, val_loader = unif_loaders[r]
train(train_loader, val_loader, unif_file.format(r))

Epoch:  1
	Train time: 0m 9.585s
	Train Loss:  1.118 | Train Acc:  2.72%
	 Val. time: 0m 0.916s
	 Val. Loss:  0.994 |  Val. Acc:  1.72%
	Model parameters saved to unif-r500.pt
Epoch:  2
	Train time: 0m 9.56s
	Train Loss:  0.988 | Train Acc:  3.45%
	 Val. time: 0m 0.973s
	 Val. Loss:  0.970 |  Val. Acc:  6.99%
	Model parameters saved to unif-r500.pt
Epoch:  3
	Train time: 0m 9.452s
	Train Loss:  0.960 | Train Acc:  4.93%
	 Val. time: 0m 0.946s
	 Val. Loss:  0.943 |  Val. Acc:  6.80%
	Model parameters saved to unif-r500.pt
Epoch:  4
	Train time: 0m 9.437s
	Train Loss:  0.934 | Train Acc:  6.02%
	 Val. time: 0m 0.899s
	 Val. Loss:  0.916 |  Val. Acc:  7.62%
	Model parameters saved to unif-r500.pt
Epoch:  5
	Train time: 0m 9.637s
	Train Loss:  0.910 | Train Acc:  6.98%
	 Val. time: 0m 0.941s
	 Val. Loss:  0.897 |  Val. Acc:  7.96%
	Model parameters saved to unif-r500.pt
Epoch:  6
	Train time: 0m 9.571s
	Train Loss:  0.888 | Train Acc:  7.95%
	 Val. time: 0m 1.004s
	 Val. Loss:  0.875 |  Va

# UNIG-f sampling

In [9]:
NEG_COUNT = 20
sample_pows = [1, 2, 3, 4]
unig_loaders = {}
unig_file = 'unig-f{}.pt'

for sample_pow in sample_pows:
    loaders = []
    loaders.append(get_dataloader(
        filename=FILENAME.format('train'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=NEG_COUNT,
        sample_pow=sample_pow / 4,
    ))
    loaders.append(get_dataloader(
        filename=FILENAME.format('dev'),
        vocab=vocab,
        batch_size=BATCH_SIZE,
        neg_count=NEG_COUNT,
        sample_pow=sample_pow / 4,
    ))
    unig_loaders[sample_pow] = loaders

### f = 0.25

In [10]:
f_4 = 1
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch:  1
	Train time: 0m 4.624s
	Train Loss:  1.185 | Train Acc:  4.39%
	 Val. time: 0m 0.51s
	 Val. Loss:  1.085 |  Val. Acc:  5.70%
	Model parameters saved to unig-f1.pt
Epoch:  2
	Train time: 0m 4.37s
	Train Loss:  1.070 | Train Acc:  5.73%
	 Val. time: 0m 0.473s
	 Val. Loss:  1.045 |  Val. Acc:  6.88%
	Model parameters saved to unig-f1.pt
Epoch:  3
	Train time: 0m 4.477s
	Train Loss:  1.026 | Train Acc:  6.64%
	 Val. time: 0m 0.465s
	 Val. Loss:  1.003 |  Val. Acc:  7.77%
	Model parameters saved to unig-f1.pt
Epoch:  4
	Train time: 0m 4.299s
	Train Loss:  0.991 | Train Acc:  8.74%
	 Val. time: 0m 0.496s
	 Val. Loss:  0.969 |  Val. Acc:  12.25%
	Model parameters saved to unig-f1.pt
Epoch:  5
	Train time: 0m 4.302s
	Train Loss:  0.963 | Train Acc:  10.92%
	 Val. time: 0m 0.47s
	 Val. Loss:  0.939 |  Val. Acc:  13.93%
	Model parameters saved to unig-f1.pt
Epoch:  6
	Train time: 0m 4.294s
	Train Loss:  0.939 | Train Acc:  12.46%
	 Val. time: 0m 0.463s
	 Val. Loss:  0.918 |  Val. Acc: 

### f = 0.5

In [11]:
f_4 = 2
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch:  1
	Train time: 0m 4.253s
	Train Loss:  1.261 | Train Acc:  5.26%
	 Val. time: 0m 0.467s
	 Val. Loss:  1.194 |  Val. Acc:  5.68%
	Model parameters saved to unig-f2.pt
Epoch:  2
	Train time: 0m 4.278s
	Train Loss:  1.150 | Train Acc:  7.18%
	 Val. time: 0m 0.523s
	 Val. Loss:  1.104 |  Val. Acc:  9.85%
	Model parameters saved to unig-f2.pt
Epoch:  3
	Train time: 0m 4.256s
	Train Loss:  1.079 | Train Acc:  10.71%
	 Val. time: 0m 0.471s
	 Val. Loss:  1.045 |  Val. Acc:  14.05%
	Model parameters saved to unig-f2.pt
Epoch:  4
	Train time: 0m 4.277s
	Train Loss:  1.035 | Train Acc:  12.30%
	 Val. time: 0m 0.471s
	 Val. Loss:  1.000 |  Val. Acc:  14.84%
	Model parameters saved to unig-f2.pt
Epoch:  5
	Train time: 0m 4.41s
	Train Loss:  1.000 | Train Acc:  13.29%
	 Val. time: 0m 0.454s
	 Val. Loss:  0.975 |  Val. Acc:  16.05%
	Model parameters saved to unig-f2.pt
Epoch:  6
	Train time: 0m 4.261s
	Train Loss:  0.979 | Train Acc:  13.60%
	 Val. time: 0m 0.476s
	 Val. Loss:  0.956 |  Val. 

### f = 0.75

In [12]:
f_4 = 3
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch:  1
	Train time: 0m 4.349s
	Train Loss:  1.313 | Train Acc:  5.96%
	 Val. time: 0m 0.481s
	 Val. Loss:  1.260 |  Val. Acc:  6.96%
	Model parameters saved to unig-f3.pt
Epoch:  2
	Train time: 0m 4.277s
	Train Loss:  1.212 | Train Acc:  11.68%
	 Val. time: 0m 0.456s
	 Val. Loss:  1.149 |  Val. Acc:  14.48%
	Model parameters saved to unig-f3.pt
Epoch:  3
	Train time: 0m 4.218s
	Train Loss:  1.124 | Train Acc:  13.36%
	 Val. time: 0m 0.496s
	 Val. Loss:  1.068 |  Val. Acc:  15.20%
	Model parameters saved to unig-f3.pt
Epoch:  4
	Train time: 0m 4.284s
	Train Loss:  1.059 | Train Acc:  13.41%
	 Val. time: 0m 0.473s
	 Val. Loss:  1.010 |  Val. Acc:  16.21%
	Model parameters saved to unig-f3.pt
Epoch:  5
	Train time: 0m 4.485s
	Train Loss:  1.017 | Train Acc:  13.39%
	 Val. time: 0m 0.465s
	 Val. Loss:  0.981 |  Val. Acc:  16.94%
	Model parameters saved to unig-f3.pt
Epoch:  6
	Train time: 0m 4.538s
	Train Loss:  0.993 | Train Acc:  13.52%
	 Val. time: 0m 0.467s
	 Val. Loss:  0.955 |  Va

### f = 1.0

In [13]:
f_4 = 4
train_loader, val_loader = unig_loaders[f_4]
train(train_loader, val_loader, unig_file.format(f_4))

Epoch:  1
	Train time: 0m 4.314s
	Train Loss:  1.323 | Train Acc:  7.19%
	 Val. time: 0m 0.462s
	 Val. Loss:  1.234 |  Val. Acc:  14.03%
	Model parameters saved to unig-f4.pt
Epoch:  2
	Train time: 0m 4.278s
	Train Loss:  1.183 | Train Acc:  11.17%
	 Val. time: 0m 0.479s
	 Val. Loss:  1.124 |  Val. Acc:  13.33%
	Model parameters saved to unig-f4.pt
Epoch:  3
	Train time: 0m 4.335s
	Train Loss:  1.111 | Train Acc:  10.81%
	 Val. time: 0m 0.508s
	 Val. Loss:  1.072 |  Val. Acc:  13.73%
	Model parameters saved to unig-f4.pt
Epoch:  4
	Train time: 0m 4.54s
	Train Loss:  1.066 | Train Acc:  10.65%
	 Val. time: 0m 0.491s
	 Val. Loss:  1.025 |  Val. Acc:  14.74%
	Model parameters saved to unig-f4.pt
Epoch:  5
	Train time: 0m 4.445s
	Train Loss:  1.021 | Train Acc:  10.90%
	 Val. time: 0m 0.458s
	 Val. Loss:  0.983 |  Val. Acc:  14.65%
	Model parameters saved to unig-f4.pt
Epoch:  6
	Train time: 0m 4.705s
	Train Loss:  0.990 | Train Acc:  10.61%
	 Val. time: 0m 0.5s
	 Val. Loss:  0.955 |  Val.