## Hand-tuned probabilities

In [None]:
from entropy import calculate_entropy_bigram, calculate_entropy_unigram
from tables import create_normal_bigram_table, create_normal_unigram_table, create_uniform_unigram_table
from generate_sequences import *
from model import get_ffnn
import torch

In [None]:
hparams = {
    'device': 'cuda' if torch.cuda.is_available() else 'cpu',
    'vocab_size': 100,
    'n_positions': 64,
    'n_embd': 64, # 64, 256
    'n_layer': 4,
    'n_head': 4,
    'resid_pdrop': 0.05,
    'embd_pdrop': 0.05,
    'attn_pdrop': 0.05,
    'summary_first_dropout': 0.05,
    'bos_token_id': 0,
    'eos_token_id': 1,
    'pad_token_id': 2,
    'batch_size': 4,
    'sequence_length': 64,
    'epochs': 4,
    'learning_rate': 0.001,
    'warmup_steps': 100,
    'weight_decay': 0.01,
    'adam_epsilon': 1e-8,
    'max_grad_norm': 1.0,
    'dist': 'normal_unigram',
    'num_train_samples': 8000,
    'num_test_samples': 2000,
    'log_interval': 10,
    'manual_option': 0
}

In [None]:
model = get_ffnn(**hparams)

In [None]:
seqs = generate_unigram_sequences_using_table(
    hparams['batch_size'],
    hparams['sequence_length'],
    create_uniform_unigram_table(hparams['vocab_size']),
    hparams['bos_token_id'],
    hparams['eos_token_id'],
    hparams['pad_token_id']
)

In [None]:
seqs

In [None]:
y = model(seqs, labels=seqs)

In [None]:
y.loss

In [None]:
def e(x):
    print(calculate_entropy_unigram(x))

In [None]:
x = create_uniform_unigram_table(10)
e(x)

In [None]:
n = 8192
y = torch.tensor(
    [0.8] + [0.2 / n] * n
)
print(y.sum())
e(y)

In [None]:
n = 282
y = torch.tensor(
    [0.7] + [0.3 / n] * n
)
print(y.sum())
e(y)

In [None]:
n = 59
y = torch.tensor(
    [0.6] + [0.4 / n] * n
)
print(y.sum())
e(y)

In [None]:
n = 25
y = torch.tensor(
    [0.5] + [0.5 / n] * n
)
print(y.sum())
e(y)

In [None]:
n = 15
y = torch.tensor(
    [0.4] + [0.6 / n] * n
)
print(y.sum())
e(y)

In [None]:
n = 11
y = torch.tensor(
    [0.3] + [0.7 / n] * n
)
print(y.sum())
e(y)

In [None]:
from generate_sequences import *
from tables import create_normal_bigram_table, create_normal_unigram_table, create_uniform_unigram_table

In [None]:
x = create_normal_unigram_table(10)

In [None]:
generate_unigram_sequences_using_table(4, 8, x)

In [None]:
import json
import glob
import os

In [None]:
paths = glob.glob('results/u*.json') + glob.glob('results/m*.json') + glob.glob('results/n*.json')

In [None]:
paths = sorted(list(set(paths)))

In [None]:
for path in paths:
    with open(path, 'r', encoding='utf-8') as f:
        try:
            data = json.load(f)
            dist = os.path.splitext(os.path.basename(path))[0]
            print(dist)
            print(data['entropy'] - data['transient_entropy'], min(data['test_set_perplexities']))
            print()
        except:
            print('no data:', path)
            continue

## Entropy Optimization Algorithm

In [1]:
from entropy_opt import get_dist, MSEAgainstEntropyAndVarEntropy

In [None]:
from torch.cuda import is_available
device = 'cuda' if is_available() else 'cpu'

In [2]:
ent_var_crit = MSEAgainstEntropyAndVarEntropy()

In [None]:
from itertools import product

param_grid = {
    'vocab_size': [10, 100, 1000, 10000],
    'desired_entropy': [1.5, 3.0, 4.5, 6.0, 7.5, 9.0],
    'desired_varent': [1.5, 3.0, 4.5, 6.0, 7.5, 9.0]
}

for vocab_size, desired_entropy, desired_varent in product(*param_grid.values()):
    final_dist = get_dist(
        ent_var_crit,
        vocab_size,
        desired_entropy,
        desired_varent,
        False,
        1e-6
    )
    
    print('-----------------------------------------------------')
    print(f'vocab size: {vocab_size}')
    print(f'sum of probabilities (should be 1): {final_dist.sum()}')
    X = -final_dist.log()
    E_X = (final_dist * X).sum()
    E_X_sq = (final_dist * X * X).sum()
    mean = E_X.item()
    var = E_X_sq.item() - (mean ** 2)
    print('-----------------------------------------------------')
    print(f'desired entropy:    {desired_entropy}')
    print(f'true entropy:       {mean}')
    print('-----------------------------------------------------')
    print(f'desired varentropy: {desired_varent}')
    print(f'true varentropy:    {var}')
    print('-----------------------------------------------------')

-----------------------------------------------------
vocab size: 10
sum of probabilities (should be 1): 0.9999999999999999
-----------------------------------------------------
desired entropy:    1.5
true entropy:       1.4998426312544317
-----------------------------------------------------
desired varentropy: 1.5
true varentropy:    1.4993950914847778
-----------------------------------------------------
