In [None]:
"""
Goal: plot training curves for e2e. There was probably an earlier way to do this, but I forget
Doing this from scratch, 25 sept 2021
"""
%matplotlib inline
import importlib
import glob
import matplotlib.pyplot as plt
from ulfs import graphing, graphing_common
importlib.reload(graphing)
importlib.reload(graphing_common)

log_dir = '../../logs'

ref = 'ibe124'
send_arch = 'RNNAutoReg:LSTM'
recv_arch = 'RNN:LSTM'

def plot_scenario(ref, send_arch, recv_arch):
    send_arch_filename = send_arch.replace(':', '')
    recv_arch_filename = recv_arch.replace(':', '')
    files = glob.glob(f'{log_dir}/log_*_{ref}_{send_arch_filename}_{recv_arch_filename}*.log')
    assert len(files) > 0
    files = [file for file in files if 'recv' not in file and 'send' not in file]
    assert len(files) > 0
    file_by_grammar = {}
    grammars = []
    for file in files:
        grammar = file.split(f'{send_arch_filename}_{recv_arch_filename}_')[1].split('_')[0]
#         print('    ', grammar, file)
        file_by_grammar[grammar] = file
        grammars.append(grammar)
    assert len(grammars) > 0

    plt.figure(figsize=(15, 3))
    for i, value_key in enumerate(['e2e_loss', 'e2e_acc', 'send_acc', 'recv_acc']):
        plt.subplot(1, 4, i + 1)
        for grammar in sorted(grammars):
    #         print(grammar)
            filepath = file_by_grammar[grammar]
            y_lims = None
            if value_key.endswith('_acc'):
                y_lims = [0, 1]
            graphing.plot_logfile2(
                logfile=filepath, step_key='episode', value_key=value_key, title=value_key, y_lims=y_lims,
                label=grammar, units='thousands', skip_record_types=['sup_train_res'],
            )
        plt.legend()
    plt.show()

for send_arch in ['FC2L', 'RNNAutoReg:LSTM', 'TransDecSoft']:
    print(send_arch, recv_arch)
    plot_scenario(ref=ref, send_arch=send_arch, recv_arch=recv_arch)

# {"sps": 13, "elapsed_time": 1685.0098643302917, "e2e_loss": -0.7471874964237213,
# "e2e_acc": 0.7478437483310699, "r_mean": 0.7471874964237213, "r_std": 0.4346870410442352,
# "rho": 0.34620082867986396, "recv_acc": 0.3296875059604645,
# "send_acc": 0.3695312440395355, "episode": 21971}

In [None]:
"""
in this section, we'll try to plot ci95 over multiple runs
"""
%matplotlib inline
import importlib
import glob
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
import seaborn as sns
from ulfs import graphing, graphing_common
importlib.reload(graphing)
importlib.reload(graphing_common)

# log_dir = '../../pull'
log_dir = '../../logs'

# refs = ['ibe122']
# refs = ['ibe122', 'ibe123', 'ibe124', 'ibe125', 'ibe126']
refs = [f'ibe{i}' for i in range(122, 132)]
# refs = [refs[2]]
send_arch = 'RNNAutoReg:LSTM'
recv_arch = 'RNN:LSTM'

plt.rc('font', family='serif')

def aggregate_series(files, value_key, grammar, num_bins):
    step_min, step_max = None, None
    data_by_file = {}
    for file in files:
        steps, values, value_by_epoch, time_by_epoch = graphing.get_log_results(
            logfile=file, step_key='episode', skip_record_types=['sup_train_res'], value_key=value_key,
            units='thousands',
        )
        data_by_file[file] = {'steps': steps, 'values': values, 'value_by_epoch': value_by_epoch, 'time_by_epoch': time_by_epoch}
        _min, _max = min(steps), max(steps)
        if step_min is None or _min < step_min:
            step_min = _min
        if step_max is None or _max < step_max:
            step_max = _max
    min_count = min([len(data_by_file[file]['steps']) for file in files])
    print('min_count', min_count)
#     bins = np.linspace(step_min + (step_max - step_min) / num_bins, step_max, min_count // 100)
#     num_bins = 50
    bins = np.linspace(step_min + (step_max - step_min) / num_bins, step_max, num_bins)

    steps_all = []
    values_all = []
    refs_all = []
    value_keys_all = []
    grammar_all = []
    df_dict = {}
    for i, file in enumerate(files):
        steps, values, value_by_epoch, time_by_epoch = map(data_by_file[file].__getitem__, [
            'steps', 'values', 'value_by_epoch', 'time_by_epoch'])
        means, edges, bin_numbers = stats.binned_statistic(steps, values, bins=bins)
        data_by_file[file]['means'] = means
        steps_all += list(edges)[1:]
        values_all += list(means)
        refs_all += len(means) * [i]
        value_keys_all += len(means) * [value_key]
        grammar_all += len(means) * [grammar]
        df_dict[i] = means
    df = pd.DataFrame({
        'step': steps_all, 'value': values_all, 'ref': refs_all, 'value_key': value_keys_all,
        'grammar': grammar_all})
    return df

def aggregate_data(refs, send_arch, recv_arch, num_bins):
    print(send_arch, recv_arch)
    send_arch_filename = send_arch.replace(':', '')
    recv_arch_filename = recv_arch.replace(':', '')
    files_by_grammar = defaultdict(list)
    grammars_set = set()
    for ref in refs:
        files = glob.glob(f'{log_dir}/log_*_{ref}_{send_arch_filename}_{recv_arch_filename}*.log')
        assert len(files) > 0
        files = [file for file in files if 'recv' not in file and 'send' not in file]
        assert len(files) > 0
        grammars = []
        for file in files:
            grammar = file.split(f'{send_arch_filename}_{recv_arch_filename}_')[1].split('_')[0]
            files_by_grammar[grammar].append(file)
            grammars_set.add(grammar)
    assert len(grammars_set) > 0
    grammars = sorted(list(grammars_set))

    df_by_value_key = {}

    df_all = []
    for i, value_key in enumerate(['e2e_loss', 'e2e_acc', 'send_acc', 'recv_acc']):
        print(i, value_key)
        df_l = []
        grammar_display = {
            'Comp': 'comp',
            'Cumrot': 'rot',
            'Permute': 'perm',
            'RandomProj': 'proj',
            'ShuffleWordsDet': 'shufdet'
        }
        for grammar in grammars:
            files = files_by_grammar[grammar]
            df = aggregate_series(files=files, value_key=value_key, grammar=grammar_display[grammar], num_bins=num_bins)
            df_l.append(df)
        df = pd.concat(df_l, ignore_index=True)
        df_all.append(df)
        df_by_value_key[value_key] = df
    df_all = pd.concat(df_all, ignore_index=True)
    df_all.to_csv(f'{send_arch_filename}_{recv_arch_filename}.csv')
    return df_by_value_key

# for send_arch in ['FC2L', 'RNNAutoReg:LSTM', 'TransDecSoft']:
df_by_value_key_by_send_arch = {}
for send_arch in ['RNNAutoReg:LSTM']:
    print(send_arch, recv_arch)
    df_by_value_key = aggregate_data(refs=refs, send_arch=send_arch, recv_arch=recv_arch, num_bins=50)
    df_by_value_key_by_send_arch[send_arch] = df_by_value_key

In [None]:
def plot_scenario(df_by_value_key):
    send_arch_filename = send_arch.replace(':', '')
    recv_arch_filename = recv_arch.replace(':', '')

    df_all = []
    plt.figure(figsize=(15, 3))
    for i, value_key in enumerate(['e2e_loss', 'e2e_acc', 'send_acc', 'recv_acc']):
        plt.subplot(1, 4, i + 1)
        df = df_by_value_key[value_key]
        sns.lineplot(data=df, x='step', y='value', hue='grammar')
        if value_key.endswith('_acc'):
            plt.ylim([0, 1])
        plt.xlabel('steps (thousands)')
        value_key_tex = {
            'e2e_acc': r'$\mathrm{acc}_{e2e}$',
            'send_acc': r'$\mathrm{acc}_{send}$',
            'recv_acc': r'$\mathrm{acc}_{recv}$',
            'e2e_loss': r'$\mathrm{loss}_{e2e}$',
        }.get(value_key, value_key)
        plt.ylabel(value_key_tex)
        plt.title(value_key_tex)
    plt.savefig(f'{send_arch_filename}_{recv_arch_filename}.pdf', format='pdf', bbox_inches='tight')
    plt.show()

# SMALL_SIZE = 16
# MEDIUM_SIZE = 20
# BIGGER_SIZE = 24

SMALL_SIZE = 8
MEDIUM_SIZE = 10
BIGGER_SIZE = 12

sns.set_style(style='white')

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# for send_arch in ['FC2L', 'RNNAutoReg:LSTM', 'TransDecSoft']:
for send_arch in ['RNNAutoReg:LSTM']:
    print(send_arch, recv_arch)
    plot_scenario(df_by_value_key_by_send_arch[send_arch])

In [None]:
import csv

def dump_tall_csv(send_arch, df_by_value_key):
    fieldnames = ['value_key', 'grammar', 'value', 'step']
    df_by_ref = {}

    df_all = pd.concat(df_by_value_key.values())
    print('df_all', df_all)
    for ref in range(10):
        _df = df_all[df_all.ref==ref]
        print('_df', _df)

#     for ref in range(10):
#         _df = df_by_value_key
#         for value_key, df in df_by_value_key.items():
            
#     with open(f'{send_arch}_tall.csv', 'w') as f:
#         dict_writer = csv.DictWriter(f, fieldnames)
#         dict_writer.writeheader()
#         for value_key, df in df_by_value_key.items():
#             for row in df.to_dict(orient="records"):
#                 print(row)
#                 asdsdf

for send_arch in ['RNNAutoReg:LSTM']:
    print(send_arch, recv_arch)
    dump_tall_csv(send_arch, df_by_value_key_by_send_arch[send_arch])

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

# plt.rc('text', usetex=False)
plt.rc('font', family='monospace')

X = np.arange(5)
Y = X * X
plt.plot(X, Y)
# plt.xlabel(r'$\mathrm{foo} x^2$')
plt.xlabel('HelloWorld', variant='small-caps', family='sans-serif')
# plt.title(f'$x^2$ HelloWorld', variant='small-caps')
# plt.title('HelloWorld', variant='small-caps')
# plt.title('HelloWorld', stretch='ultra-expanded')
plt.show()

# ax.text(0.3, 0.7, "Hello!", fontdict = {'stretch': 'ultra-expanded'})
# ax.text(0.7, 0.3, "Hello!", fontdict = {'stretch': 'condensed'})
# ax.text(0.7, 0.7, "Hello!", fontdict = {'variant': 'small-caps'})


In [None]:
%matplotlib inline
import os
import pandas as pd
from os import path
from os.path import join
try:
    import seaborn as sns
except:
    print('installing seaborn')
    os.system('pip install seaborn')
    import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np

# from https://jwalton.info/Embed-Publication-Matplotlib-Latex/
def set_size(width, fraction=1):
    """Set figure dimensions to avoid scaling in LaTeX.

    Parameters
    ----------
    width: float
            Document textwidth or columnwidth in pts
    fraction: float, optional
            Fraction of the width which you wish the figure to occupy

    Returns
    -------
    fig_dim: tuple
            Dimensions of figure in inches
    """
    # Width of figure (in pts)
    fig_width_pt = width * fraction

    # Convert from pt to inches
    inches_per_pt = 1 / 72.27

    # Golden ratio to set aesthetic figure height
    # https://disq.us/p/2940ij3
    golden_ratio = (5**.5 - 1) / 2

    # Figure width in inches
    fig_width_in = fig_width_pt * inches_per_pt
    # Figure height in inches
    fig_height_in = fig_width_in * golden_ratio

    fig_dim = (fig_width_in, fig_height_in)

    return fig_dim

width = 395
fig, ax = plt.subplots(1, 1, figsize=set_size(width))

if os.system('pdflatex --version') != 0:
    print('installing texlife')
    os.system('apt-get update')
    os.system('apt-get install -y texlive-full')
assert os.system('pdflatex --version') == 0

style_sheet_dir = path.join(mpl.__path__[0], 'mpl-data', 'stylelib')
# print("Your style sheets are located at: {}".format(style_sheet_dir))
with open(join(style_sheet_dir, 'tex.mplstyle'), 'w') as f:
    f.write("""
text.usetex: True
font.family: serif
axes.labelsize: 10
font.size: 10     
legend.fontsize: 8
xtick.labelsize: 8
ytick.labelsize: 8
""")

# Using seaborn's style
plt.style.use('seaborn')
# With LaTex fonts
plt.style.use('tex')

x = np.linspace(0, 2 * np.pi, 100)
# Initialise figure instance
fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=set_size(width))

# Plot
# ax.plot(x, np.sin(x))
# ax.set_xlim(0, 2 * np.pi)
# ax.set_xlabel(r'$\theta$')
# ax.set_ylabel(r'$\sin (\theta)$')

df = pd.DataFrame({'x': [0,0,1,1,2,2], 'y': [1,0,2,1,3,2]})
sns.lineplot(ax=ax1, data=df, x='x', y='y')

df = pd.DataFrame({'x': [0,0,1,1,2,2], 'y': [1,3,2,5,3,11]})
sns.lineplot(ax=ax2, data=df, x='x', y='y')

df = pd.DataFrame({'x': [0,0,1,1,2,2], 'y': [1,3,2,5,3,11]})
sns.lineplot(ax=ax3, data=df, x='x', y='y')

df = pd.DataFrame({'x': [0,0,1,1,2,2], 'y': [1,3,2,5,3,11]})
sns.lineplot(ax=ax4, data=df, x='x', y='y')
ax4.set_xlabel('foo $2^3$')

# Save and remove excess whitespace
fig.savefig('example_1.pdf', format='pdf', bbox_inches='tight')
os.system('cp example_1.pdf ../../pull/')