In [69]:
import os
from collections import defaultdict

import numpy as np

CRF_SAMPLES = '../data/proposals/crf-test.props'
RNNG_SAMPLES = '../data/proposals/rnng-test.props'
DYER_SAMPLES = '../data/proposals/dyer-test.props'
ENTROPIES_OUT = '../out/entropies'

In [70]:
def nll_dict_from_samples(path):
    nlls = defaultdict(list)
    with open(path) as f:
        for line in f:
            index, nll, _ = line.strip().split(' ||| ')
            nlls[int(index)].append(float(nll))
    return nlls

def read_nll_from_samples(path):
    with open(path) as f:
        nlls = [float(line.strip().split(' ||| ')[1]) for line in f]
    return nlls

rnng_nll = read_nll_from_samples(RNNG_SAMPLES)
crf_nll = read_nll_from_samples(CRF_SAMPLES)
dyer_nll = read_nll_from_samples(DYER_SAMPLES)

## Print entropies

In [57]:
crf_dict = nll_dict_from_samples(CRF_SAMPLES)
dyer_dict = nll_dict_from_samples(DYER_SAMPLES)
rnng_dict = nll_dict_from_samples(RNNG_SAMPLES)

In [58]:
def write_entropies(nlls_dict, path):
    with open(path, 'w') as f:
        for i, nlls in nlls_dict.items():
            entropy = -np.mean(nlls)
            print(i, entropy, file=f)

In [59]:
write_entropies(crf_dict, os.path.join(ENTROPIES_OUT, 'crf_entropies_approx.txt'))
write_entropies(dyer_dict, os.path.join(ENTROPIES_OUT, 'dyer_entropies_approx.txt'))
write_entropies(rnng_dict, os.path.join(ENTROPIES_OUT, 'rnng_entropies_approx.txt'))

## Entropies

In [64]:
- np.mean(rnng_nll)

3.3100958276791683

In [22]:
- np.mean(crf_nll)

5.276174141294909

In [23]:
- np.mean(dyer_nll)

1.916661471623733

## Variance (?)

In [26]:
np.var(dyer_nll)

10.319010052216576

In [27]:
np.var(rnng_nll)

19.156451063835185

In [28]:
np.var(crf_nll)

29.28713166156486

## Lower temperature

In [109]:
RNNG_TEMP_SAMPLES = '../out/sample-experiment/proposal=disc-rnng_temp=1.0_seed=1.props'

rnng_temp1_dict = nll_dict_from_samples(RNNG_TEMP_SAMPLES)
rnng_temp1_nlls = read_nll_from_samples(RNNG_TEMP_SAMPLES)

In [110]:
write_entropies(rnng_temp1_dict, '../out/entropies/rnng_temp1_approx.txt')

In [111]:
-np.mean(rnng_temp1_nlls)

2.1741025028481387

In [112]:
-np.mean(rnng_nll)

3.3100958276791683