In [1]:
import os
import math
import json
import numpy as np
import plotly.express as px

In [2]:
def combine_attrs(list1, list2):
    return [str(list1[i]) + ', ' + str(list2[i]) for i in range(len(list1))]

def data(json_file):
    j = json.load(open(json_file))
    settings = os.path.basename(json_file).replace('.json', '').split('_')
    j['dist'] = settings[0]
    j['uni_or_bi'] = settings[1]
    j['vocab_size'] = int(settings[2])
    j['softmax'] = True if settings[3] == 'softmax' else False
    j['settings'] = settings
    j['losses'] = []
    for i in range(len(j['train_losses'])):
        j['losses'].extend(j['train_losses'][i])
    del j['train_losses']
    if 'val_losses' in j:
        print(json_file)
    return j

json_files = [
    os.path.join('results', x) for x in os.listdir('results') if x.endswith('.json')
]

jsons = [data(json_file) for json_file in json_files]

test_set_perplexity = []
entropy = []
dist = []
uni_or_bi = []
vocab_size = []
softmax = []

for j in jsons:
    test_set_perplexity.append(j['test_set_perplexity'])
    entropy.append(j['entropy'])
    dist.append(j['dist'])
    uni_or_bi.append(j['uni_or_bi'])
    vocab_size.append(j['vocab_size'])
    softmax.append(j['softmax'])
    
test_set_avg_cross_entropy = [math.log(x) for x in test_set_perplexity]

df = {
    'Test set perplexity': test_set_perplexity,
    'Test set average cross-entropy': test_set_avg_cross_entropy,
    'Entropy': entropy,
    'Distribution': dist,
    'Uni- or bigram': uni_or_bi,
    'Vocab size': vocab_size,
    'Softmax': softmax
}

In [3]:
fig = px.scatter(
    df,
    x='Entropy',
    y='Test set perplexity',
    title='Test set perplexity vs. entropy',
    labels={'x': 'Entropy', 'y': 'Test set perplexity', 'color': 'Uni- or bigram'},
    color=uni_or_bi,
    hover_data={'Vocab size': True, 'Softmax': True, 'Distribution': True}
)
fig.update_layout(
    legend=dict(
        x=0.015,
        y=0.97
    )
)
fig.write_html('plots/uni_or_bi.html')
fig = px.scatter(
    df,
    x='Entropy',
    y='Test set perplexity',
    title='Test set perplexity vs. entropy',
    labels={'x': 'Entropy', 'y': 'Test set perplexity', 'color': 'Vocab size'},
    color=[str(x) for x in vocab_size],
    category_orders={'color': reversed([str(x) for x in sorted(vocab_size)])},
    hover_data={'Uni- or bigram': True, 'Softmax': True, 'Distribution': True}
)
fig.update_layout(
    legend=dict(
        x=0.015,
        y=0.97
    )
)
fig.write_html('plots/vocab.html')
fig = px.scatter(
    df,
    x='Entropy',
    y='Test set perplexity',
    title='Test set perplexity vs. entropy',
    labels={'x': 'Entropy', 'y': 'Test set perplexity', 'color': 'Uni- or bigram and distribution'},
    color=combine_attrs(uni_or_bi, dist),
    hover_data={'Vocab size': True, 'Softmax': True}
)
fig.update_layout(
    legend=dict(
        x=0.015,
        y=0.97
    )
)
fig.write_html('plots/uni_or_bi_dist.html')
fig = px.scatter(
    df,
    x='Entropy',
    y='Test set average cross-entropy',
    title='Test set average cross-entropy vs. entropy',
    labels={'x': 'Entropy', 'y': 'Test set average cross-entropy', 'color': 'Uni- or bigram'},
    color='Uni- or bigram',
    hover_data={'Vocab size': True, 'Softmax': True, 'Distribution': True}
)
fig.update_layout(
    legend=dict(
        x=0.015,
        y=0.97
    )
)
fig.write_html('plots/uni_or_bi_ce.html')
fig = px.scatter(
    df,
    x='Entropy',
    y='Test set average cross-entropy',
    title='Test set average cross-entropy vs. entropy',
    labels={'x': 'Entropy', 'y': 'Test set average cross-entropy', 'color': 'Vocab size'},
    color=[str(x) for x in vocab_size],
    category_orders={'color': reversed([str(x) for x in sorted(vocab_size)])},
    hover_data={'Uni- or bigram': True, 'Softmax': True, 'Distribution': True}
)
fig.update_layout(
    legend=dict(
        x=0.015,
        y=0.97
    )
)
fig.write_html('plots/vocab_ce.html')
fig = px.scatter(
    df,
    x='Entropy',
    y='Test set average cross-entropy',
    title='Test set average cross-entropy vs. entropy',
    labels={'x': 'Entropy', 'y': 'Test set average cross-entropy', 'color': 'Uni- or bigram and distribution'},
    color=combine_attrs(uni_or_bi, dist),
    hover_data={'Vocab size': True, 'Softmax': True}
)
fig.update_layout(
    legend=dict(
        x=0.015,
        y=0.97
    )
)
fig.write_html('plots/uni_or_bi_dist_ce.html')

  sf: grouped.get_group(s if len(s) > 1 else s[0])










