In [4]:
from entropy import calculate_entropy_bigram, calculate_entropy_unigram
from tables import create_normal_bigram_table, create_normal_unigram_table, create_uniform_unigram_table
from generate_sequences import *
from model import get_ffnn
import torch

In [5]:
hparams = {
    'device': 'cuda' if torch.cuda.is_available() else 'cpu',
    'vocab_size': 100,
    'n_positions': 64,
    'n_embd': 64, # 64, 256
    'n_layer': 4,
    'n_head': 4,
    'resid_pdrop': 0.05,
    'embd_pdrop': 0.05,
    'attn_pdrop': 0.05,
    'summary_first_dropout': 0.05,
    'bos_token_id': 0,
    'eos_token_id': 1,
    'pad_token_id': 2,
    'batch_size': 4,
    'sequence_length': 64,
    'epochs': 4,
    'learning_rate': 0.001,
    'warmup_steps': 100,
    'weight_decay': 0.01,
    'adam_epsilon': 1e-8,
    'max_grad_norm': 1.0,
    'dist': 'normal_unigram',
    'num_train_samples': 8000,
    'num_test_samples': 2000,
    'log_interval': 10,
    'manual_option': 0
}

In [6]:
model = get_ffnn(**hparams)

In [8]:
seqs = generate_unigram_sequences_using_table(
    hparams['batch_size'],
    hparams['sequence_length'],
    create_uniform_unigram_table(hparams['vocab_size']),
    hparams['bos_token_id'],
    hparams['eos_token_id'],
    hparams['pad_token_id']
)

In [17]:
seqs

tensor([[92, 28, 13, 27,  5, 94, 58, 83, 81, 59, 98, 71, 38, 20, 88, 13, 72, 73,
         31, 84, 23, 74,  2,  2, 13, 81,  4, 92, 30, 15, 90, 20, 96, 75, 92, 74,
         65, 51, 93, 20, 10, 60, 32, 15, 95, 81, 26, 92, 54, 62,  5, 33, 66, 26,
         27, 78, 88, 71, 11, 77, 67,  7, 61,  1],
        [19, 29, 42, 67, 65, 78, 41, 63, 39, 77, 79, 81, 64, 84, 75, 93, 31, 95,
         59, 12, 59, 76, 97, 91, 49, 59, 78, 65, 57, 11, 74, 23, 28, 98,  2, 88,
         89, 83, 19, 16, 12, 59, 35, 30, 48, 95, 87, 54,  8, 58, 71, 73, 89, 13,
         83, 72, 71, 80, 88, 70, 87, 94, 85,  1],
        [38, 12, 26, 58, 43, 54, 36, 13,  4, 92, 92, 87, 76,  2,  2, 45, 93, 26,
         97, 46, 21, 91, 63, 78, 10, 37, 66, 93, 29, 62, 41, 68, 19, 55, 52,  1,
         44, 52, 27,  1, 75, 83, 49, 56, 82, 56, 49, 63,  3,  1,  2,  2,  2,  2,
          2,  2,  2,  2,  2,  2,  2,  2,  2,  2],
        [73, 53,  9,  2, 41, 68, 76, 71, 80,  8,  3,  1,  2,  2,  2,  2,  2,  2,
          2,  2,  2,  2,  2,  2,  2,  2,

In [14]:
y = model(seqs, labels=seqs)

In [16]:
y.loss

tensor(4.6111, grad_fn=<NllLossBackward0>)

In [16]:
def e(x):
    print(calculate_entropy_unigram(x))

In [19]:
x = create_uniform_unigram_table(10)
e(x)

2.3025848865509033


In [44]:
n = 8192
y = torch.tensor(
    [0.8] + [0.2 / n] * n
)
print(y.sum())
e(y)

tensor(1.0000)
2.302584171295166


In [60]:
n = 282
y = torch.tensor(
    [0.7] + [0.3 / n] * n
)
print(y.sum())
e(y)

tensor(1.0000)
2.303436279296875


In [68]:
n = 59
y = torch.tensor(
    [0.6] + [0.4 / n] * n
)
print(y.sum())
e(y)

tensor(1.0000)
2.3040266036987305


In [74]:
n = 25
y = torch.tensor(
    [0.5] + [0.5 / n] * n
)
print(y.sum())
e(y)

tensor(1.)
2.3025853633880615


In [86]:
n = 15
y = torch.tensor(
    [0.4] + [0.6 / n] * n
)
print(y.sum())
e(y)

tensor(1.0000)
2.297842025756836


In [92]:
n = 11
y = torch.tensor(
    [0.3] + [0.7 / n] * n
)
print(y.sum())
e(y)

tensor(1.)
2.289391040802002


In [3]:
from generate_sequences import *
from tables import create_normal_bigram_table, create_normal_unigram_table, create_uniform_unigram_table

In [4]:
x = create_normal_unigram_table(10)

In [7]:
generate_unigram_sequences_using_table(4, 8, x)

tensor([[   8,    2,    2,    7,    6,    1,    9,    9],
        [   1,    0, -100, -100, -100, -100, -100, -100],
        [   5,    9,    7,    2,    0, -100, -100, -100],
        [   0, -100, -100, -100, -100, -100, -100, -100]])