# Catastrophic Forgetting Results Notebook<a name='top'/>

This notebook contains the data analysis tools and products for all the experiments associated with this project.

## Table of Contents

<a href=#setup>Setup</a>

<a href=#experiment_1_mnist>Experiment 1 MNIST</a>
> <a href=#experiment_1_mnist_validation>Validation Phase</a>

> <a href=#experiment_1_mnist_testing>Testing Phase</a><br><br>
>> <a href=#experiment_1_mnist_testing_hypothesis>Statistical Hypothesis Testing</a><br><br>
>> <a href=#experiment_1_mnist_testing_plotting>Plotting</a>

<a href=#experiment_2_mnist>Experiment 2 MNIST</a>
> <a href=#experiment_2_mnist_validation>Validation Phase</a>

> <a href=#experiment_2_mnist_testing>Testing Phase</a><br><br>
>> <a href=#experiment_2_mnist_testing_hypothesis>Statistical Hypothesis Testing</a><br><br>
>> <a href=#experiment_2_mnist_testing_plotting>Plotting</a>

<a href=#experiment_2_mountain_car>Experiment 2 Mountain Car</a>
> <a href=#experiment_2_mountain_car_validation>Validation Phase</a><br><br>
>> <a href=#experiment_2_mountain_car_validation_sensitivity_analysis_step-size>Step-size Sensitivity Analysis</a><br><br>
>> <a href=#experiment_2_mountain_car_validation_sensitivity_analysis_other>Other Hyperparameters Sensitivity Analysis</a>

> <a href=#experiment_2_mountain_car_testing>Testing Phase</a><br><br>
>> <a href=#experiment_2_mountain_car_testing_hypothesis>Statistical Hypothesis Testing</a><br><br>
>> <a href=#experiment_2_mountain_car_testing_plotting>Plotting</a>

<a href=#experiment_2_acrobot>Experiment 2 Acrobot</a>
> <a href=#experiment_2_acrobot_validation>Validation Phase</a>

## Setup <a name='setup'/>

In [None]:
import collections
import copy
import json
import matplotlib.pyplot as plt
import mnist.tools as mnist_tools
import mountain_car.tools as mc_tools
import numpy as np
import pandas as pd
import scipy.stats as st
import seaborn as sns

from matplotlib import rc
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib.gridspec import GridSpec
from matplotlib.patches import Rectangle
from statsmodels.stats.multicomp import MultiComparison

# setup matplotlib
%matplotlib inline
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('text', usetex=True)
sns.set_style('ticks')
sns.set_context('notebook')
sns.set_style('ticks')
sns.set_context('notebook')

# setup dict for specifying nice names for legends
optimizer_nice_names = {'constant': 'Constant', 'adam': 'Adam', 'momentum': 'Momentum', 'rms': 'RMSProp', 'sgd': 'SGD'}

# setup function to load and process data
def load_data(files):
    raw_results = list()
    for filename in files:
        with open(filename, 'r') as infile:
            raw_results += json.load(infile)
    results = mnist_tools.list_of_dicts_to_dict_of_lists(raw_results)
    del raw_results
    for key, value in results.items():
        results[key] = mnist_tools.to_nested_tuples(value)
    rdf = pd.DataFrame(data=results)
    del results
    return rdf

<a href=#top>Back to Top</a>

## Experiment 1 MNIST<a name='experiment_1_mnist'/>

### Validation Phase<a name='experiment_1_mnist_validation'/>

In [None]:
rdf = load_data(['experiment_1_mnist_validation.json'])
rdf = rdf[rdf['success']]
E12 = ((1, 2), (3, 4))
E34 = ((3, 4), (1, 2))
E1234 = ((1, 2, 3, 4), (1, 2), (3, 4))

E12_best = mnist_tools.get_best(mnist_tools.get_summary(rdf[rdf['phases'] == E12]), mnist_tools.phase_time_metric(2, 50, 2500))['sgd']
E34_best = mnist_tools.get_best(mnist_tools.get_summary(rdf[rdf['phases'] == E34]), mnist_tools.phase_time_metric(2, 50, 2500))['sgd']
E1234_best = mnist_tools.get_best(mnist_tools.get_summary(rdf[rdf['phases'] == E1234]), mnist_tools.phase_time_metric(1, 50, 2500))['sgd']
print('{}: lr={}, count={}'.format(
    'E12',
    E12_best['lr'],
    E12_best['count'][0]))
print('{}: lr={}, count={}'.format(
    'E34',
    E34_best['lr'],
    E34_best['count'][0]))
print('{}: lr={}, count={}'.format(
    'E1234',
    E1234_best['lr'],
    E1234_best['count'][0]))

<a href=#top>Back to Top</a>

### Testing Phase<a name='experiment_1_mnist_testing'/>

In [None]:
rdf = load_data(['experiment_1_mnist_test.json'])
rdf = rdf[rdf['success']]
E12 = ((1, 2), (3, 4), (1, 2), (3, 4))
E34 = ((3, 4), (1, 2), (3, 4), (1, 2))
E1234 = ((1, 2, 3, 4), (1, 2), (3, 4))
ER = ((1, 2), (3, 4), (1, 2), (3, 4), (1, 2), (3, 4), (1, 2), (3, 4))

<a href=#top>Back to Top</a>

#### Statistical Hypothesis Testing<a name='experiment_1_mnist_testing_hypothesis'/>

In [None]:
# H1
accuracies = list()
for k,v in rdf[rdf['phases'] == E1234].iterrows():
    accuracies.append(v['accuracies'][v['phase_length'][0] - 1][1])
h1_t, h1_p = st.ttest_1samp(accuracies, 0.9)
h1_p = h1_p / 2
print('H1: {} with p = {}'.format(
    'Reject' if (h1_p < 0.05 / 22) and (h1_t > 0) else 'Fail to Reject', h1_p))

# H2
accuracies = list()
for k,v in rdf[rdf['phases'] == E1234].iterrows():
    accuracies.append(v['accuracies'][v['phase_length'][0] - 1][2])
h2_t, h2_p = st.ttest_1samp(accuracies, 0.9)
h2_p = h2_p / 2
print('H2: {} with p = {}'.format(
    'Reject' if (h2_p < 0.05 / 22) and (h2_t > 0) else 'Fail to Reject', h2_p))

# H3
accuracies = list()
for k,v in rdf[rdf['phases'] == E12].iterrows():
    accuracies.append(v['accuracies'][v['phase_length'][0] - 1][0])
h3_t, h3_p = st.ttest_1samp(accuracies, 0.9)
h3_p = h3_p / 2
print('H3: {} with p = {}'.format(
    'Reject' if (h3_p < 0.05 / 22) and (h3_t > 0) else 'Fail to Reject', h3_p))

# H4
accuracies = list()
for k,v in rdf[rdf['phases'] == E12].iterrows():
    accuracies.append(v['accuracies'][v['phase_length'][0] + v['phase_length'][1] - 1][1])
h4_t, h4_p = st.ttest_1samp(accuracies, 0.9)
h4_p = h4_p / 2
print('H4: {} with p = {}'.format(
    'Reject' if (h4_p < 0.05 / 22) and (h4_t > 0) else 'Fail to Reject', h4_p))

# H5
accuracies = list()
for k,v in rdf[rdf['phases'] == E12].iterrows():
    accuracies.append(v['accuracies'][v['phase_length'][0] + v['phase_length'][1] - 1][0])
h5_t, h5_p = st.ttest_1samp(accuracies, 0.9)
h5_p = h5_p / 2
print('H5: {} with p = {}'.format(
    'Reject' if (h5_p < 0.05 / 22) and (h5_t < 0) else 'Fail to Reject', h5_p))

# H6
P1_time = list()
for k,v in rdf[rdf['phases'] == E12].iterrows():
    P1_time.append(v['phase_length'][0])
P3_time = list()
for k,v in rdf[rdf['phases'] == E12].iterrows():
    P3_time.append(v['phase_length'][2])
h6_t, h6_p = st.ttest_ind(P1_time, P3_time)
h6_p = h6_p / 2
print('H6: {} with p = {}'.format(
    'Reject' if (h6_p < 0.05 / 22) and (h6_t > 0) else 'Fail to Reject', h6_p))

# H7
P2_time = list()
for k,v in rdf[rdf['phases'] == E12].iterrows():
    P2_time.append(v['phase_length'][1])
P4_time = list()
for k,v in rdf[rdf['phases'] == E12].iterrows():
    P4_time.append(v['phase_length'][3])
h7_t, h7_p = st.ttest_ind(P2_time, P4_time)
h7_p = h7_p / 2
print('H7: {} with p = {}'.format(
    'Reject' if (h7_p < 0.05 / 22) and (h7_t > 0) else 'Fail to Reject', h7_p))

# H8
E1234_time = list()
for k,v in rdf[rdf['phases'] == E1234].iterrows():
    E1234_time.append(v['phase_length'][0])
E12_time = list()
for k,v in rdf[rdf['phases'] == E12].iterrows():
    E12_time.append(v['phase_length'][0] + v['phase_length'][1])
h8_t, h8_p = st.ttest_ind(E1234_time, E12_time)
print('H8: {} with p = {}'.format(
    'Reject' if (h8_p < 0.05 / 22) else 'Fail to Reject', h8_p))

# H9
accuracies = list()
for k,v in rdf[rdf['phases'] == E34].iterrows():
    accuracies.append(v['accuracies'][v['phase_length'][0] - 1][0])
h9_t, h9_p = st.ttest_1samp(accuracies, 0.9)
h9_p = h9_p / 2
print('H9: {} with p = {}'.format(
    'Reject' if (h9_p < 0.05 / 22) and (h9_t > 0) else 'Fail to Reject', h9_p))

# H10
accuracies = list()
for k,v in rdf[rdf['phases'] == E34].iterrows():
    accuracies.append(v['accuracies'][v['phase_length'][0] + v['phase_length'][1] - 1][1])
h10_t, h10_p = st.ttest_1samp(accuracies, 0.9)
h10_p = h10_p / 2
print('H10: {} with p = {}'.format(
    'Reject' if (h10_p < 0.05 / 22) and (h10_t > 0) else 'Fail to Reject', h10_p))

# H11
accuracies = list()
for k,v in rdf[rdf['phases'] == E34].iterrows():
    accuracies.append(v['accuracies'][v['phase_length'][0] + v['phase_length'][1] - 1][0])
h11_t, h11_p = st.ttest_1samp(accuracies, 0.9)
h11_p = h11_p / 2
print('H11: {} with p = {}'.format(
    'Reject' if (h11_p < 0.05 / 22) and (h11_t < 0) else 'Fail to Reject', h11_p))

<a href=#top>Back to Top</a>

#### Plotting<a name='experiment_1_mnist_testing_plotting'/>

In [None]:
max_phase_lengths = list()
for _, row in rdf.iterrows():
    for i, l in enumerate(row['phase_length']):
        if len(max_phase_lengths) == i:
            max_phase_lengths.append(0)
        max_phase_lengths[i] = max(max_phase_lengths[i], l)
data = dict()
for k in rdf['phases'].unique():
    data[k] = dict()
    for k2 in ['d1_count',
               'd2_count']:
        data[k][k2] = [np.zeros(i, dtype=int) for i in max_phase_lengths]
    for k2 in ['d1_avg',
               'd1_sec',
               'd1_min',
               'd1_max',
               'd2_avg',
               'd2_sec',
               'd2_min',
               'd2_max']:
        data[k][k2] = [np.zeros(i, dtype=float) for i in max_phase_lengths]
    data[k]['phase_lengths'] = [list() for i in max_phase_lengths]
    for i in range(len(max_phase_lengths)):
        data[k]['d1_min'][i] += 1
        data[k]['d2_min'][i] += 1

    kdf = rdf[rdf['phases'] == k]
    for _, row in kdf.iterrows():
        j = 0
        for i, l in enumerate(row['phase_length']):
            data[k]['phase_lengths'][i].append(l)

            if k == E1234:
                d1_index = 1
                d2_index = 2
            elif k == E12:
                d1_index = 0
                d2_index = 1
            elif k == E34:
                d1_index = 1
                d2_index = 0
            else:
                assert k == ER
                d1_index = 0
                d2_index = 1
            
            values = np.array(row['accuracies'])[j:j + l, d1_index]
            mask = np.where(np.array(np.invert(np.isnan(values)), dtype=int))[0]
            values = values[mask]
            delta = values - data[k]['d1_avg'][i][mask]
            data[k]['d1_count'][i][mask] += 1
            data[k]['d1_avg'][i][mask] += delta / data[k]['d1_count'][i][mask]
            data[k]['d1_sec'][i][mask] += delta * (values - data[k]['d1_avg'][i][mask])
            data[k]['d1_min'][i][mask] = np.minimum(data[k]['d1_min'][i][mask], values)
            data[k]['d1_max'][i][mask] = np.maximum(data[k]['d1_max'][i][mask], values)

            values = np.array(row['accuracies'])[j:j + l, d2_index]
            mask = np.where(np.array(np.invert(np.isnan(values)), dtype=int))[0]
            values = values[mask]
            delta = values - data[k]['d2_avg'][i][mask]
            data[k]['d2_count'][i][mask] += 1
            data[k]['d2_avg'][i][mask] += delta / data[k]['d2_count'][i][mask]
            data[k]['d2_sec'][i][mask] += delta * (values - data[k]['d2_avg'][i][mask])
            data[k]['d2_min'][i][mask] = np.minimum(data[k]['d2_min'][i][mask], values)
            data[k]['d2_max'][i][mask] = np.maximum(data[k]['d2_max'][i][mask], values)

            j += l
    data[k]['d1_sem'] = list()
    data[k]['d2_sem'] = list()
    for i in range(len(max_phase_lengths)):
        data[k]['d1_sem'].append(np.nan_to_num(np.sqrt(data[k]['d1_sec'][i]) / data[k]['d1_count'][i]))
        data[k]['d2_sem'].append(np.nan_to_num(np.sqrt(data[k]['d2_sec'][i]) / data[k]['d2_count'][i]))
    del data[k]['d1_sec']
    del data[k]['d2_sec']

In [None]:
threshold = 125
fig = plt.figure(figsize=(9, 6), dpi=300)
gs = GridSpec(3, 2, figure=fig)
axarr = list()
axarr.append(fig.add_subplot(gs[0, :]))
axarr.append(fig.add_subplot(gs[1, 0]))
axarr.append(fig.add_subplot(gs[2, 0]))
axarr.append(fig.add_subplot(gs[2, 1]))
colors = sns.color_palette('colorblind', len(data.keys()))
xmax = 0

# phase 1 plot
ax = axarr[0]

x = np.where(data[E1234]['d1_count'][0] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E1234]['d1_avg'][0][x]
yerr = data[E1234]['d1_sem'][0][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[2], alpha=0.1)
ax.plot(x, y, color=colors[2], label=r'$D_{(1 + 2)}$ in $E_{(1, 2, 3, 4)}$', linestyle='--', linewidth=1)

x = np.where(data[E1234]['d2_count'][0] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E1234]['d2_avg'][0][x]
yerr = data[E1234]['d2_sem'][0][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[2], alpha=0.1)
ax.plot(x, y, color=colors[2], label=r'$D_{(3 + 4)}$ in $E_{(1, 2, 3, 4)}$', linestyle='-', linewidth=1)

x = np.where(data[E12]['d1_count'][0] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E12]['d1_avg'][0][x]
yerr = data[E12]['d1_sem'][0][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[0], alpha=0.1)
ax.plot(x, y, color=colors[0], label=r'$D_{(1 + 2)}$ in $E_{(1, 2)}$', linestyle='--', linewidth=1)

x = np.where(data[E12]['d2_count'][0] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E12]['d2_avg'][0][x]
yerr = data[E12]['d2_sem'][0][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[0], alpha=0.1)
ax.plot(x, y, color=colors[0], label=r'$D_{(3 + 4)}$ in $E_{(1, 2)}$', linestyle='-', linewidth=1)

x = np.where(data[E34]['d1_count'][0] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E34]['d1_avg'][0][x]
yerr = data[E34]['d1_sem'][0][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[1], alpha=0.1)
ax.plot(x, y, color=colors[1], label=r'$D_{(1 + 2)}$ in $E_{(3, 4)}$', linestyle='--', linewidth=1)

x = np.where(data[E34]['d2_count'][0] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E34]['d2_avg'][0][x]
yerr = data[E34]['d2_sem'][0][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[1], alpha=0.1)
ax.plot(x, y, color=colors[1], label=r'$D_{(3 + 4)}$ in $E_{(3, 4)}$', linestyle='-', linewidth=1)

# phase 2 plot
ax = axarr[1]

x = np.where(data[E12]['d1_count'][1] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E12]['d1_avg'][1][x]
yerr = data[E12]['d1_sem'][1][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[0], alpha=0.1)
ax.plot(x, y, color=colors[0], linestyle='--', linewidth=1)

x = np.where(data[E12]['d2_count'][1] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E12]['d2_avg'][1][x]
yerr = data[E12]['d2_sem'][1][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[0], alpha=0.1)
ax.plot(x, y, color=colors[0], linestyle='-', linewidth=1)

x = np.where(data[E34]['d1_count'][1] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E34]['d1_avg'][1][x]
yerr = data[E34]['d1_sem'][1][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[1], alpha=0.1)
ax.plot(x, y, color=colors[1], linestyle='--', linewidth=1)

x = np.where(data[E34]['d2_count'][1] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E34]['d2_avg'][1][x]
yerr = data[E34]['d2_sem'][1][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[1], alpha=0.1)
ax.plot(x, y, color=colors[1], linestyle='-', linewidth=1)

# phase 3 plot
ax = axarr[2]

x = np.where(data[E12]['d1_count'][2] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E12]['d1_avg'][2][x]
yerr = data[E12]['d1_sem'][2][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[0], alpha=0.1)
ax.plot(x, y, color=colors[0], linestyle='--', linewidth=1)

x = np.where(data[E12]['d2_count'][2] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E12]['d2_avg'][2][x]
yerr = data[E12]['d2_sem'][2][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[0], alpha=0.1)
ax.plot(x, y, color=colors[0], linestyle='-', linewidth=1)

x = np.where(data[E34]['d1_count'][2] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E34]['d1_avg'][2][x]
yerr = data[E34]['d1_sem'][2][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[1], alpha=0.1)
ax.plot(x, y, color=colors[1], linestyle='--', linewidth=1)

x = np.where(data[E34]['d2_count'][2] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E34]['d2_avg'][2][x]
yerr = data[E34]['d2_sem'][2][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[1], alpha=0.1)
ax.plot(x, y, color=colors[1], linestyle='-', linewidth=1)

# phase 4 plot
ax = axarr[3]

x = np.where(data[E12]['d1_count'][3] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E12]['d1_avg'][3][x]
yerr = data[E12]['d1_sem'][3][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[0], alpha=0.1)
ax.plot(x, y, color=colors[0], linestyle='--', linewidth=1)

x = np.where(data[E12]['d2_count'][3] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E12]['d2_avg'][3][x]
yerr = data[E12]['d2_sem'][3][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[0], alpha=0.1)
ax.plot(x, y, color=colors[0], linestyle='-', linewidth=1)

x = np.where(data[E34]['d1_count'][3] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E34]['d1_avg'][3][x]
yerr = data[E34]['d1_sem'][3][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[1], alpha=0.1)
ax.plot(x, y, color=colors[1], linestyle='--', linewidth=1)

x = np.where(data[E34]['d2_count'][3] >= threshold)[0]
xmax = max(xmax, max(x))
y = data[E34]['d2_avg'][3][x]
yerr = data[E34]['d2_sem'][3][x]
ax.fill_between(x, y - yerr, y + yerr, color=colors[1], alpha=0.1)
ax.plot(x, y, color=colors[1], linestyle='-', linewidth=1)

# clean up plot
for j in range(len(axarr)):
    ax = axarr[j]
    ax.set_ylim(-0.1, 1.1)
    ax.set_yticks([0.0, 0.5, 1.0])
    ax.set_xlabel(r'Steps', labelpad=10)
axarr[0].set_ylabel(r'Phase 1 Acc.', labelpad=10)
axarr[0].set_xticks([0, 200, 400, 600, 800, 1000])
axarr[0].set_xlim(0, max(xmax, 1050))
axarr[1].set_ylabel(r'Phase 2 Acc.', labelpad=10)
axarr[1].set_xticks([0, 100, 200])
axarr[1].set_xlim(0, 200)
axarr[2].set_ylabel(r'Phase 3 Acc.', labelpad=10)
axarr[2].set_xticks([0, 100, 200])
axarr[2].set_xlim(0, 200)
axarr[3].set_ylabel(r'Phase 4 Acc.', labelpad=10)
axarr[3].set_xticks([0, 100, 200])
axarr[3].set_xlim(0, 200)
fig.legend(frameon=False, loc=(0.615, 0.415))
fig.tight_layout(pad=1)

# save plot
fig.savefig('experiment_1_accuracies.pdf', bbox_inches='tight')

In [None]:
text = ''
text += '|-------|---------------|\n'
text += '| Phase | Steps         |\n'
text += '|-------|---------------|\n'
for i, phase_lengths in enumerate(data[ER]['phase_lengths']):
    text += '| {0:>5} | {1:>7.2f}+-{2:4.2f} |\n'.format(
        i + 1,
        np.mean(phase_lengths),
        np.std(phase_lengths) / np.sqrt(len(phase_lengths)))
text += '|-------|---------------|\n'
with open('experiment_1_extended_phase_lengths.txt', 'w') as outfile:
    print(text, file=outfile)
print(text)

<a href=#top>Back to Top</a>

## Experiment 2 MNIST<a name='experiment_2_mnist'/>

### Validation Phase<a name='experiment_2_mnist_validation'/>

In [None]:
rdf = load_data(['experiment_2_mnist_validation.json'])
rdf = rdf[rdf['success']]
for i in rdf.index:
    if (rdf.at[i, 'optimizer'] == 'sgd') and (float(rdf.at[i, 'momentum']) > 0):
        rdf.at[i, 'optimizer'] = 'momentum'

best = mnist_tools.get_best(mnist_tools.get_summary(rdf[rdf['init_seed'] < 50]), mnist_tools.total_time_metric(50, 2500))
print('{}: lr={}, count={}'.format(
    optimizer_nice_names['sgd'],
    best['sgd']['lr'],
    best['sgd']['count'][0]))
print('{}: lr={}, momentum={}, count={}'.format(
    optimizer_nice_names['momentum'],
    best['momentum']['lr'],
    best['momentum']['momentum'],
    best['momentum']['count'][0]))
print('{}: lr={}, rho={}, count={}'.format(
    optimizer_nice_names['rms'],
    best['rms']['lr'],
    best['rms']['rho'],
    best['rms']['count'][0]))
print('{}: lr={}, beta_1={}, beta_2={}, count={}'.format(
    optimizer_nice_names['adam'],
    best['adam']['lr'],
    best['adam']['beta_1'],
    best['adam']['beta_2'],
    best['adam']['count'][0]))

<a href=#top>Back to Top</a>

### Testing Phase<a name='experiment_2_mnist_testing'/>

In [None]:
rdf = load_data(['experiment_2_mnist_test.json'])
rdf = rdf[rdf['success']]
for i in rdf.index:
    if (rdf.at[i, 'optimizer'] == 'sgd') and (float(rdf.at[i, 'momentum']) > 0):
        rdf.at[i, 'optimizer'] = 'momentum'

<a href=#top>Back to Top</a>

#### Statistical Hypothesis Testing<a name='experiment_2_mnist_testing_hypothesis'/>

In [None]:
# H13
accuracies = {name: list() for name in list(set(rdf['optimizer'].unique()) - {'constant'})}
for k,v in rdf.iterrows():
    if v['optimizer'] != 'constant':
        accuracies[v['optimizer']].append(v['accuracies'][v['phase_length'][0] + v['phase_length'][1] - 1][0])
h13_t, h13_p = st.f_oneway(* accuracies.values())
print('H13: {} with p = {}'.format(
    'Reject' if (h13_p < 0.05 / 22) else 'Fail to Reject', h13_p))
df = {'optimizer': list(), 'accuracy': list()}
for k, v in accuracies.items():
    df['accuracy'] += v
    df['optimizer'] += [k] * len(v)
df = pd.DataFrame(df)
h13_tukey = str(MultiComparison(df['accuracy'], df['optimizer']).tukeyhsd(0.05 / 22).summary())
print(h13_tukey)

# H14
accuracies = {name: list() for name in list(set(rdf['optimizer'].unique()) - {'constant'})}
for k,v in rdf.iterrows():
    if v['optimizer'] != 'constant':
        accuracies[v['optimizer']].append(v['phase_length'][0] / v['phase_length'][2])
h14_t, h14_p = st.f_oneway(* accuracies.values())
print('H14: {} with p = {}'.format(
    'Reject' if (h14_p < 0.05 / 22) else 'Fail to Reject', h14_p))
df = {'optimizer': list(), 'accuracy': list()}
for k, v in accuracies.items():
    df['accuracy'] += v
    df['optimizer'] += [k] * len(v)
df = pd.DataFrame(df)
h14_tukey = str(MultiComparison(df['accuracy'], df['optimizer']).tukeyhsd(0.05 / 22).summary())
print(h14_tukey)

# H15
accuracies = {name: list() for name in list(set(rdf['optimizer'].unique()) - {'constant'})}
for k,v in rdf.iterrows():
    if v['optimizer'] != 'constant':
        accuracies[v['optimizer']].append(v['activation_overlap'][v['phase_length'][0] - 1])
h15_t, h15_p = st.f_oneway(* accuracies.values())
print('H15: {} with p = {}'.format(
    'Reject' if (h15_p < 0.05 / 22) else 'Fail to Reject', h15_p))
df = {'optimizer': list(), 'accuracy': list()}
for k, v in accuracies.items():
    df['accuracy'] += v
    df['optimizer'] += [k] * len(v)
df = pd.DataFrame(df)
h15_tukey = str(MultiComparison(df['accuracy'], df['optimizer']).tukeyhsd(0.05 / 22).summary())
print(h15_tukey)

# H16
accuracies = {name: list() for name in list(set(rdf['optimizer'].unique()) - {'constant'})}
for k,v in rdf.iterrows():
    if v['optimizer'] != 'constant':
        accuracies[v['optimizer']].append(v['activation_overlap'][v['phase_length'][0] + v['phase_length'][1] - 1])
h16_t, h16_p = st.f_oneway(* accuracies.values())
print('H16: {} with p = {}'.format(
    'Reject' if (h16_p < 0.05 / 22) else 'Fail to Reject', h16_p))
df = {'optimizer': list(), 'accuracy': list()}
for k, v in accuracies.items():
    df['accuracy'] += v
    df['optimizer'] += [k] * len(v)
df = pd.DataFrame(df)
h16_tukey = str(MultiComparison(df['accuracy'], df['optimizer']).tukeyhsd(0.05 / 22).summary())
print(h16_tukey)

<a href=#top>Back to Top</a>

#### Plotting<a name='experiment_2_mnist_testing_plotting'/>

In [None]:
max_phase_lengths = list()
for _, row in rdf.iterrows():
    for i, l in enumerate(row['phase_length']):
        if len(max_phase_lengths) == i:
            max_phase_lengths.append(0)
        max_phase_lengths[i] = max(max_phase_lengths[i], l)
data = dict()
for k,v in best.items():
    data[k] = dict()
    for k2 in ['d1_count',
               'd2_count',
               'online_count',
               'ao_count',
               'sao_count']:
        data[k][k2] = [np.zeros(i, dtype=int) for i in max_phase_lengths]
    for k2 in ['d1_avg',
               'd1_sec',
               'd1_min',
               'd1_max',
               'd2_avg',
               'd2_sec',
               'd2_min',
               'd2_max',
               'online_avg',
               'online_sec',
               'online_min',
               'online_max',
               'ao_avg',
               'ao_sec',
               'ao_min',
               'ao_max',
               'sao_avg',
               'sao_sec',
               'sao_min',
               'sao_max']:
        data[k][k2] = [np.zeros(i, dtype=float) for i in max_phase_lengths]
    data[k]['phase_lengths'] = [list() for i in max_phase_lengths]
    for i in range(len(max_phase_lengths)):
        data[k]['d1_min'][i] += 1
        data[k]['d2_min'][i] += 1
    kdf = rdf[rdf['optimizer'] == k]
    for _, row in kdf.iterrows():
        j = 0
        for i, l in enumerate(row['phase_length']):
            data[k]['phase_lengths'][i].append(l)

            values = np.array(row['accuracies'])[j:j + l, 0]
            mask = np.where(np.array(np.invert(np.isnan(values)), dtype=int))[0]
            values = values[mask]
            delta = values - data[k]['d1_avg'][i][mask]
            data[k]['d1_count'][i][mask] += 1
            data[k]['d1_avg'][i][mask] += delta / data[k]['d1_count'][i][mask]
            data[k]['d1_sec'][i][mask] += delta * (values - data[k]['d1_avg'][i][mask])
            data[k]['d1_min'][i][mask] = np.minimum(data[k]['d1_min'][i][mask], values)
            data[k]['d1_max'][i][mask] = np.maximum(data[k]['d1_max'][i][mask], values)

            values = np.array(row['accuracies'])[j:j + l, 1]
            mask = np.where(np.array(np.invert(np.isnan(values)), dtype=int))[0]
            values = values[mask]
            delta = values - data[k]['d2_avg'][i][mask]
            data[k]['d2_count'][i][mask] += 1
            data[k]['d2_avg'][i][mask] += delta / data[k]['d2_count'][i][mask]
            data[k]['d2_sec'][i][mask] += delta * (values - data[k]['d2_avg'][i][mask])
            data[k]['d2_min'][i][mask] = np.minimum(data[k]['d2_min'][i][mask], values)
            data[k]['d2_max'][i][mask] = np.maximum(data[k]['d2_max'][i][mask], values)

            values = np.cumsum(np.array(row['correct'])[j:j + l]) / (np.arange(l) + 1)
            mask = np.where(np.array(np.invert(np.isnan(values)), dtype=int))[0]
            values = values[mask]
            delta = values - data[k]['online_avg'][i][mask]
            data[k]['online_count'][i][mask] += 1
            data[k]['online_avg'][i][mask] += delta / data[k]['online_count'][i][mask]
            data[k]['online_sec'][i][mask] += delta * (values - data[k]['online_avg'][i][mask])
            data[k]['online_min'][i][mask] = np.minimum(data[k]['online_min'][i][mask], values)
            data[k]['online_max'][i][mask] = np.maximum(data[k]['online_max'][i][mask], values)

            values = np.array(row['activation_overlap'])[j:j + l]
            mask = np.where(np.array(np.invert(np.isnan(values)), dtype=int))[0]
            values = values[mask]
            delta = values - data[k]['ao_avg'][i][mask]
            data[k]['ao_count'][i][mask] += 1
            data[k]['ao_avg'][i][mask] += delta / data[k]['ao_count'][i][mask]
            data[k]['ao_sec'][i][mask] += delta * (values - data[k]['ao_avg'][i][mask])
            data[k]['ao_min'][i][mask] = np.minimum(data[k]['ao_min'][i][mask], values)
            data[k]['ao_max'][i][mask] = np.maximum(data[k]['ao_max'][i][mask], values)
            
            values = np.array(row['sparse_activation_overlap'])[j:j + l]
            mask = np.where(np.array(np.invert(np.isnan(values)), dtype=int))[0]
            values = values[mask]
            delta = values - data[k]['sao_avg'][i][mask]
            data[k]['sao_count'][i][mask] += 1
            data[k]['sao_avg'][i][mask] += delta / data[k]['sao_count'][i][mask]
            data[k]['sao_sec'][i][mask] += delta * (values - data[k]['sao_avg'][i][mask])
            data[k]['sao_min'][i][mask] = np.minimum(data[k]['sao_min'][i][mask], values)
            data[k]['sao_max'][i][mask] = np.maximum(data[k]['sao_max'][i][mask], values)

            j += l
    data[k]['d1_sem'] = list()
    data[k]['d2_sem'] = list()
    data[k]['online_sem'] = list()
    data[k]['ao_sem'] = list()
    data[k]['sao_sem'] = list()
    for i in range(len(max_phase_lengths)):
        data[k]['d1_sem'].append(np.nan_to_num(np.sqrt(data[k]['d1_sec'][i]) / data[k]['d1_count'][i]))
        data[k]['d2_sem'].append(np.nan_to_num(np.sqrt(data[k]['d2_sec'][i]) / data[k]['d2_count'][i]))
        data[k]['online_sem'].append(np.nan_to_num(np.sqrt(data[k]['online_sec'][i]) / data[k]['online_count'][i]))
        data[k]['ao_sem'].append(np.nan_to_num(np.sqrt(data[k]['ao_sec'][i]) / data[k]['ao_count'][i]))
        data[k]['sao_sem'].append(np.nan_to_num(np.sqrt(data[k]['sao_sec'][i]) / data[k]['sao_count'][i]))
    del data[k]['d1_sec']
    del data[k]['d2_sec']
    del data[k]['online_sec']
    del data[k]['ao_sec']
    del data[k]['sao_sec']

In [None]:
threshold = 125
fig, axarr = plt.subplots(4, 1, figsize=(5, 8), dpi=300, sharex=True)
colors = sns.color_palette('colorblind', len(data.keys()))
xmax = 0
for j in range(len(axarr)):
    ax = axarr[j]
    for i, (k, v) in enumerate(sorted(data.items(), key=lambda x: x[0])):
        other_phase = (j + 1) % 2 + 1
        x = np.where(v['d{}_count'.format(other_phase)][j] >= threshold)[0]
        xmax = max(xmax, max(x))
        y = v['d{}_avg'.format(other_phase)][j][x]
        yerr = v['d{}_sem'.format(other_phase)][j][x]
        ax.fill_between(x, y - yerr, y + yerr, color=colors[i], alpha=0.1)
        ax.plot(x, y, color=colors[i], linestyle='--', linewidth=1)

        x = np.where(v['online_count'][j] >= threshold)[0]
        xmax = max(xmax, max(x))
        y = v['online_avg'][j][x]
        yerr = v['online_sem'][j][x]
        ax.fill_between(x, y - yerr, y + yerr, color=colors[i], alpha=0.1)
        ax.plot(x, y, label=optimizer_nice_names[k], color=colors[i], linestyle='-', linewidth=1)
for j in range(len(axarr)):
    ax = axarr[j]
    ax.set_ylim(-0.05, 1.05)
    ax.set_yticks([0.0, 0.5, 1.0])
    ax.set_xlim((0, xmax * 1.05))
    ax.set_ylabel('Phase {}'.format(j + 1), labelpad=10)
axarr[0].set_title('Accuracy')
axarr[-1].set_xticks([0, 50, 100, 150])
axarr[-1].set_xlabel('Steps', labelpad=10)
plt.legend(frameon=False, loc=(1, 3.6))
fig.subplots_adjust(hspace=0.15)
fig.savefig('experiment_2_mnist_accuracies.pdf', bbox_inches='tight')

In [None]:
phase_lengths = [{optimizer: list() for optimizer in rdf['optimizer'].unique()} for _ in range(4)]
for _, row in rdf.iterrows():
    for i in range(4):
        phase_lengths[i][row['optimizer']].append(row['phase_length'][i])

text = ''
text += '|-----------|------------------|------------------|------------------|------------------|\n'
text += '| Optimizer | Steps in Phase 1 | Steps in Phase 2 | Steps in Phase 3 | Steps in Phase 4 |\n'
text += '|-----------|------------------|------------------|------------------|------------------|\n'
for optimizer in sorted(list(phase_lengths[0].keys())):
    text += '| {0:>9} |   {1:>6.2f}+-{2:<4.2f}   |   {3:>6.2f}+-{4:<4.2f}   |   {5:>6.2f}+-{6:<4.2f}   |   {7:>6.2f}+-{8:<4.2f}   |\n'.format(
        optimizer_nice_names[optimizer],
        np.mean(phase_lengths[0][optimizer]), np.std(phase_lengths[0][optimizer]) / np.sqrt(len(phase_lengths[0][optimizer])),
        np.mean(phase_lengths[1][optimizer]), np.std(phase_lengths[1][optimizer]) / np.sqrt(len(phase_lengths[1][optimizer])),
        np.mean(phase_lengths[2][optimizer]), np.std(phase_lengths[2][optimizer]) / np.sqrt(len(phase_lengths[2][optimizer])),
        np.mean(phase_lengths[3][optimizer]), np.std(phase_lengths[3][optimizer]) / np.sqrt(len(phase_lengths[3][optimizer])))
text += '|-----------|------------------|------------------|------------------|------------------|'
with open('experiment_2_mnist_phase_lengths.txt', 'w') as outfile:
    print(text, file=outfile)
print(text)

In [None]:
plt.close()
phase_lengths = {optimizer: [list() for _ in max_phase_lengths] for optimizer in rdf['optimizer'].unique()}
for _, row in rdf.iterrows():
    optimizer = row['optimizer']
    for i, phase_length in enumerate(row['phase_length']):
        phase_lengths[optimizer][i].append(phase_length)
colors = sns.color_palette('colorblind', len(phase_lengths.keys()))
fig, axmat = plt.subplots(len(phase_lengths.keys()),
                          len(max_phase_lengths),
                          figsize=(2 * len(max_phase_lengths), 2 * len(phase_lengths.keys())),
                          dpi=300,
                          sharex=True,
                          sharey=True)
for j, optimizer in enumerate(sorted(list(phase_lengths.keys()))):
    for i in range(len(max_phase_lengths)):
        axmat[i, j].set_xlim(0, 400)
        axmat[i, j].set_ylim(0, 150)
        axmat[i, j].hist(phase_lengths[optimizer][i], bins=range(0, max(max_phase_lengths), 10), color=colors[j])
        if i == 0:
            axmat[i, j].set_title(optimizer_nice_names[optimizer], pad=10)
        if i == len(phase_lengths.keys()) - 1:
            axmat[i, j].set_xlabel('Phase Length', labelpad=17)
            axmat[i, j].set_xticks([0, 100, 200, 300, 400])
        if j == 0:
            axmat[i, j].set_ylabel('Phase {}'.format(i + 1), labelpad=12)
            axmat[i, j].set_yticks([0, 50, 100, 150])
plt.tight_layout()
fig.savefig('experiment_2_mnist_phase_length_distribution.pdf', bbox_inches='tight')

In [None]:
accuracies = {name: [[list() for j in range(2)] for i in range(4)] for name in list(set(rdf['optimizer'].unique()) - {'constant'})}
for k,v in rdf.iterrows():
    if v['optimizer'] != 'constant':
        for i in range(4):
            for j in range(2):
                accuracies[v['optimizer']][i][j].append(v['accuracies'][sum(v['phase_length'][:i + 1]) - 1][j])

text = ''
text += '|-----------|-------|-------------------|-------------------|\n'
text += '| Optimizer | Phase | Accuracy on 1 + 2 | Accuracy on 3 + 4 |\n'
text += '|-----------|-------|-------------------|-------------------|\n'
for optimizer in sorted(list(accuracies.keys())):
    for phase in range(4):
        if (optimizer != 'adam') and (phase == 0):
            text += '|-----------|-------|-------------------|-------------------|\n'
        text += '| {0:>9} |   {1}   |   {2:>4.2f}+-{3:<6.4f}    |   {4:>4.2f}+-{5:<6.4f}    |\n'.format(
            optimizer_nice_names[optimizer] if phase == 1 else '',
            phase,
            np.mean(accuracies[optimizer][phase][0]), np.std(accuracies[optimizer][phase][0]) / np.sqrt(len(accuracies[optimizer][phase][0])),
            np.mean(accuracies[optimizer][phase][1]), np.std(accuracies[optimizer][phase][1]) / np.sqrt(len(accuracies[optimizer][phase][1])))
text += '|-----------|-------|-------------------|-------------------|\n'
with open('experiment_2_mnist_after_phase_accuracies.txt', 'w') as outfile:
    print(text, file=outfile)
print(text)

In [None]:
phase_lengths = [{optimizer: list() for optimizer in rdf['optimizer'].unique()} for _ in range(4)]
for _, row in rdf.iterrows():
    for i in range(4):
        phase_lengths[i][row['optimizer']].append(row['phase_length'][i])
boxplot_data = {'optimizer': [], 'phase': [], 'phase_length': []}
for optimizer in sorted(list(phase_lengths[2].keys())):
    for i in range(len(phase_lengths[0][optimizer])):
        first_phase_length = phase_lengths[0][optimizer][i]
        third_phase_length = phase_lengths[2][optimizer][i]
        boxplot_data['optimizer'].append(optimizer_nice_names[optimizer])
        boxplot_data['phase'].append('Phase 1')
        boxplot_data['phase_length'].append(first_phase_length)
        boxplot_data['optimizer'].append(optimizer_nice_names[optimizer])
        boxplot_data['phase'].append('Phase 3')
        boxplot_data['phase_length'].append(third_phase_length)
fig, ax = plt.subplots(1, 1, figsize=(4, 4), dpi=300)
ax.set_ylabel('Phase Length', labelpad=20)
sns.violinplot(x='optimizer', y='phase_length', hue='phase', data=boxplot_data, cut=0, split=True, inner='quartile', ax=ax)
sns.despine(offset=10, trim=True)
plt.legend(frameon=False, loc=(0.6775, 0.9))
fig.savefig('experiment_2_mnist_relearning.pdf', bbox_inches='tight')

In [None]:
ratios = {optimizer: list() for optimizer in rdf['optimizer'].unique()}
for _, row in rdf.iterrows():
    ratios[row['optimizer']].append(row['phase_length'][0] / row['phase_length'][2])

text = ''
text += '|-----------|------------|\n'
text += '| Optimizer |  Speedup   |\n'
text += '|---------- |------------|\n'
for optimizer in sorted(list(ratios.keys())):
    text += '| {0:>9} | {1:>4.2f}+-{2:<4.2f} |\n'.format(
        optimizer_nice_names[optimizer],
        np.mean(ratios[optimizer]),
        np.std(ratios[optimizer]) / np.sqrt(len(ratios[optimizer])))
text += '|---------- |------------|\n'
with open('experiment_2_mnist_speedup.txt', 'w') as outfile:
    print(text, file=outfile)
print(text)

In [None]:
threshold = 125
fig, axmat = plt.subplots(2, 4, sharex=True, dpi=300, figsize=(12, 6))
colors = sns.color_palette('colorblind', len(data.keys()))
xmax = 0
for j in range(len(axarr)):
    ax = axarr[j]
    for i, (k, v) in enumerate(sorted(data.items(), key=lambda x: x[0])):
        x = np.where(v['ao_count'][j] >= threshold)[0]
        xmax = max(xmax, max(x))
        y = v['ao_avg'][j][x]
        yerr = v['ao_sem'][j][x]
        label = optimizer_nice_names[k] if j == 0 else None
        axmat[0, j].plot(
            x,
            y,
            label=label,
            linewidth=1,
            color=colors[i])
        axmat[0, j].fill_between(
            x,
            y - yerr,
            y + yerr,
            color=colors[i],
            alpha=0.3)
    
        x = np.where(v['sao_count'][j] >= threshold)[0]
        xmax = max(xmax, max(x))
        y = v['sao_avg'][j][x]
        yerr = v['sao_sem'][j][x]
        axmat[1, j].plot(
            x,
            y,
            linewidth=1,
            color=colors[i])
        axmat[1, j].fill_between(
            x,
            y - yerr,
            y + yerr,
            color=colors[i],
            alpha=0.3)

axmat[0, 0].set_ylabel('Activation Overlap', labelpad=10)
axmat[1, 0].set_ylabel('Sparse Activation Overlap', labelpad=10)
for i in range(4):
    axmat[0, i].set_title('Phase {}'.format(i + 1), pad=10)
    axmat[1, i].set_xticks([0, 50, 100, 150])
    axmat[1, i].set_xlabel('Steps', labelpad=10)
    for j in range(2):
        axmat[j, i].set_xlim(0, xmax * 1.05)
        axmat[j, i].set_ylim(0.43, 0.93)
        if i == 0:
            axmat[j, i].set_yticks([0.5, 0.7, 0.9])
        else:
            axmat[j, i].tick_params(axis='y', which='both', left=False)
            axmat[j, i].set_yticklabels([])
axmat[0, 0].legend(loc='upper right', bbox_to_anchor=(4.6, 0.75), frameon=False)
fig.subplots_adjust(wspace=0.0, hspace=0.1)
fig.savefig('experiment_2_mnist_activation_overlap.pdf', bbox_inches='tight')

<a href=#top>Back to Top</a>

## Experiment 2 Mountain Car<a name='experiment_2_mountain_car'/>

### Validation Phase<a name='experiment_2_mountain_car_validation'/>

In [None]:
data = mc_tools.load_clean_data(['experiment_2_mountain_car_validation.json'])
best = mc_tools.get_best(data, 'auc')
best_by_optimizer = mc_tools.get_best_by_optimizer(data, best)
best_by_optimizer_summary = mc_tools.get_best_by_optimizer_summary(data, best, best_by_optimizer)
best.head(n=10)

<a href=#top>Back to Top</a>

#### Step-size Sensitivity Analysis<a name='experiment_2_mountain_car_validation_sensitivity_analysis_step-size'/>

In [None]:
data = mc_tools.load_clean_data(['experiment_2_mountain_car_validation.json'])
summary = mc_tools.get_summary(data)
lr_comparison = dict()
for optimizer in set(summary['optimizer'].unique()) - {'constant'}:
    lr_comparison[optimizer] = summary[summary['optimizer'] == optimizer].sort_values('lr')
lr_comparison['momentum'] = lr_comparison['momentum'][lr_comparison['momentum']['momentum'] == 0.81]
lr_comparison['rms'] = lr_comparison['rms'][lr_comparison['rms']['rho'] == 0.99]

In [None]:
colors = sns.color_palette(n_colors=len(lr_comparison.keys()))
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, sharex=True, dpi=300, figsize=(4, 10))
sort_key = lambda x: x[0]
for i, (optimizer, value) in enumerate(sorted(lr_comparison.items(), key=sort_key)):
    value = value[value['final_accuracy_mean'].notnull()]
    y1 = value['mean_accuracy_mean']
    y1err = value['mean_accuracy_stderr']
    x = value['lr']
    ax1.plot(
        x,
        y1,
        '-o',
        label=optimizer_nice_names[optimizer],
        markersize=2,
        linewidth=1,
        color=colors[i])
    ax1.fill_between(
        x,
        y1 - y1err,
        y1 + y1err,
        alpha=0.3,
        color=colors[i])
    try:
        y2 = value['mean_activation_overlap_mean']
        y2err = value['mean_activation_overlap_stderr']
        ax2.plot(
            x,
            y2,
            '-o',
            linewidth=1,
            markersize=2,
            color=colors[i])
        ax2.fill_between(
            x,
            y2 - y2err,
            y2 + y2err,
            alpha=0.3,
            color=colors[i])
    except KeyError:
        pass
    try:
        y3 = value['mean_sparse_activation_overlap_mean']
        y3err = value['mean_sparse_activation_overlap_stderr']
        ax3.plot(
            x,
            y3,
            '-o',
            linewidth=1,
            markersize=2,
            color=colors[i])
        ax3.fill_between(
            x,
            y3 - y3err,
            y3 + y3err,
            alpha=0.3,
            color=colors[i])
    except KeyError:
        pass
    try:
        y4 = value['mean_pairwise_interference_mean']
        y4err = value['mean_pairwise_interference_stderr']
        ax4.plot(
            x,
            y4,
            '-o',
            linewidth=1,
            markersize=2,
            color=colors[i])
        ax4.fill_between(
            x,
            y4 - y4err,
            y4 + y4err,
            alpha=0.3,
            color=colors[i])
    except KeyError:
        pass

ax1.set_xscale('log', basex=2)
ax1.set_ylabel('$\overline{\mbox{RMSVE}}$', labelpad=17.5)
ax1.set_ylim(40, 100)
ax1.set_yticks([50, 70, 90])
ax2.set_ylabel('$\overline{\mbox{Activation Overlap}}$', labelpad=15)
ax2.set_ylim(- 0.25, 5.1)
ax2.set_yticks([0.0, 1.5, 3, 4.5])
ax3.set_ylabel('$\overline{\mbox{Sparse Activation Overlap}}$', labelpad=10)
ax3.set_ylim(- 0.05, 0.7)
ax3.set_yticks([0.0, 0.25, 0.5])
ax4.set_ylabel('$\overline{\mbox{Pairwise Interference}}$', labelpad=10)
ax4.set_ylim(0.94, 1)
ax4.set_yticks([0.95, 0.97, 0.99])
ax3.set_xlim(2 ** (-18), 2 ** (-3))
ax4.set_xticks([2 ** (-18), 2 ** (-13), 2 ** (-8), 2 ** (-3)])
ax4.set_xlabel('Step-size', labelpad=10)
fig.legend(loc='upper right', bbox_to_anchor=(1.33, 0.7775), frameon=False)
fig.subplots_adjust(hspace=0.0)
fig.savefig('experiment_2_mountain_car_lr.pdf', bbox_inches='tight')

<a href=#top>Back to Top</a>

##### Other Hyperparameters Sensitivity Analysis<a name='experiment_2_mountain_car_validation_sensitivity_analysis_other'/>

In [None]:
momentum_data = mc_tools.load_clean_data(['experiment_2_mountain_car_validation.json'])
rms_data = copy.deepcopy(momentum_data)

# clean momentum data
to_delete = list()
for i, entry in enumerate(momentum_data):
    if entry['optimizer'] != 'momentum':
        to_delete.append(i)
    else:
        entry['optimizer'] = entry['momentum']
for i in reversed(to_delete):
    del momentum_data[i]

# clean rms data
to_delete = list()
for i, entry in enumerate(rms_data):
    if entry['optimizer'] != 'rms':
        to_delete.append(i)
    else:
        entry['optimizer'] = entry['rho']
for i in reversed(to_delete):
    del rms_data[i]

momentum_summary = mc_tools.get_best_by_optimizer_summary(momentum_data)
rms_summary = mc_tools.get_best_by_optimizer_summary(rms_data)

In [None]:
colors = sns.color_palette(n_colors=max(len(momentum_summary.keys()), len(rms_summary.keys())))
fig, axmat = plt.subplots(4, 2, sharex=True, dpi=300, figsize=(7, 10))
sort_key = lambda x: '~' if x[0] == 'constant' else x[0]
for i, summary in enumerate([momentum_summary, rms_summary]):
    for j, key in enumerate(['accuracy', 'activation_overlap', 'sparse_activation_overlap', 'pairwise_interference']):
        for k, (hyperparameter, value) in enumerate(sorted(summary.items(), key=sort_key)):
            ax = axmat[j, i]
            try:
                y = value['{}_mean'.format(key)]
                yerr = value['{}_stderr'.format(key)]
                x = np.arange(len(y)) + 1
                if (i == 0) and (j == 0):
                    label = hyperparameter
                else:
                    label = None
                ax.plot(
                    x,
                    y,
                    label=label,
                    linewidth=1,
                    color=colors[k])
                ax.fill_between(
                    x,
                    y - yerr,
                    y + yerr,
                    alpha=0.3,
                    color=colors[k])
            except KeyError:
                pass
axmat[0, 0].set_title('Momentum')
axmat[0, 1].set_title('RMSProp')
axmat[0, 0].set_ylabel('RMSVE', labelpad=17.5)
axmat[1, 0].set_ylabel('Activation Overlap', labelpad=10)
axmat[2, 0].set_ylabel('Sparse Activation Overlap', labelpad=10)
axmat[3, 0].set_ylabel('Pairwise Interference', labelpad=15)
for i in range(2):
    axmat[0, i].set_ylim(37, 90)
    axmat[0, i].set_yticks([40, 60, 80])
    axmat[1, i].set_ylim(- 0.05, 0.9)
    axmat[1, i].set_yticks([0, 0.25, 0.5, 0.75])
    axmat[2, i].set_ylim(- 0.05, 0.7)
    axmat[2, i].set_yticks([0, 0.25, 0.5])
    axmat[3, i].set_ylim(0.75, 1.05)
    axmat[3, i].set_yticks([0.8, 0.9, 1])
    axmat[3, i].set_xlim(0, 500)
    axmat[3, i].set_xlabel('Episode', labelpad=10)
for i in range(4):
    axmat[i, 1].set_yticklabels([])
fig.legend(loc='upper right', bbox_to_anchor=(1.025, 0.7675), frameon=False)
fig.subplots_adjust(hspace=0.0, wspace=0.1)
fig.savefig('experiment_2_mountain_car_momentum_and_rho.pdf', bbox_inches='tight')

<a href=#top>Back to Top</a>

### Testing Phase<a name='experiment_2_mountain_car_testing'/>

In [None]:
data = mc_tools.load_clean_data(['experiment_2_mountain_car_validation.json'])

# clean data
to_delete = list()
for i, entry in enumerate(data):
    if np.isnan(entry['accuracy'][- 1]):
        to_delete.append(i)
print('Deleting {} of {} entries'.format(len(to_delete), len(data)))
for i in reversed(to_delete):
    del data[i]

# find best
best = mc_tools.get_best(data, 'auc')
best_by_optimizer = mc_tools.get_best_by_optimizer(data, best)
best_by_optimizer_summary = mc_tools.get_best_by_optimizer_summary(data, best, best_by_optimizer)
best.head(n=10)

<a href=#top>Back to Top</a>

#### Statistical Hypothesis Testing<a name='experiment_2_mountain_car_testing_hypothesis'/>

In [None]:
# H12
constant = [item['final_accuracy'] for item in best_by_optimizer['constant']]
sgd = [item['final_accuracy'] for item in best_by_optimizer['sgd']]
h12_t, h12_p = st.ttest_ind(constant, sgd)
print('H12: {} with p = {}'.format(
    'Reject' if (h12_p < 0.05 / 22) else 'Fail to Reject', h12_p))

# H17
mean_activation_overlap = dict()
for optimizer in list(set(best_by_optimizer.keys()) - {'constant'}):
    mean_activation_overlap[optimizer] = list()
    for i in range(len(best_by_optimizer[optimizer])):
        values = best_by_optimizer[optimizer][i]['activation_overlap']
        mean_activation_overlap[optimizer].append(np.mean(values))
h17_t, h17_p = st.f_oneway(* mean_activation_overlap.values())
print('H17: {} with p = {}'.format(
    'Reject' if (h17_p < 0.05 / 22) else 'Fail to Reject', h17_p))
df = {'optimizer': list(), 'mean_activation_overlap': list()}
for k, v in mean_activation_overlap.items():
    df['mean_activation_overlap'] += v
    df['optimizer'] += [k] * len(v)
df = pd.DataFrame(df)
h17_tukey = str(MultiComparison(df['mean_activation_overlap'], df['optimizer']).tukeyhsd(0.05 / 22).summary())
print(h17_tukey)

# H18
mean_sparse_activation_overlap = dict()
for optimizer in list(set(best_by_optimizer.keys()) - {'constant'}):
    mean_sparse_activation_overlap[optimizer] = list()
    for i in range(len(best_by_optimizer[optimizer])):
        values = best_by_optimizer[optimizer][i]['sparse_activation_overlap']
        mean_sparse_activation_overlap[optimizer].append(np.mean(values))
h18_t, h18_p = st.f_oneway(* mean_sparse_activation_overlap.values())
print('H18: {} with p = {}'.format(
    'Reject' if (h18_p < 0.05 / 22) else 'Fail to Reject', h18_p))
df = {'optimizer': list(), 'mean_sparse_activation_overlap': list()}
for k, v in mean_sparse_activation_overlap.items():
    df['mean_sparse_activation_overlap'] += v
    df['optimizer'] += [k] * len(v)
df = pd.DataFrame(df)
h18_tukey = str(MultiComparison(df['mean_sparse_activation_overlap'], df['optimizer']).tukeyhsd(0.05 / 22).summary())
print(h18_tukey)

# H19
mean_pairwise_interference = dict()
for optimizer in list(set(best_by_optimizer.keys()) - {'constant'}):
    mean_pairwise_interference[optimizer] = list()
    for i in range(len(best_by_optimizer[optimizer])):
        values = best_by_optimizer[optimizer][i]['pairwise_interference']
        mean_pairwise_interference[optimizer].append(np.mean(values))
h19_t, h19_p = st.f_oneway(* mean_pairwise_interference.values())
print('H19: {} with p = {}'.format(
    'Reject' if (h19_p < 0.05 / 19) else 'Fail to Reject', h19_p))
df = {'optimizer': list(), 'mean_pairwise_interference': list()}
for k, v in mean_pairwise_interference.items():
    df['mean_pairwise_interference'] += v
    df['optimizer'] += [k] * len(v)
df = pd.DataFrame(df)
h19_tukey = str(MultiComparison(df['mean_pairwise_interference'], df['optimizer']).tukeyhsd(0.05 / 22).summary())
print(h19_tukey)

# H20
final_activation_overlap = dict()
for optimizer in list(set(best_by_optimizer.keys()) - {'constant'}):
    final_activation_overlap[optimizer] = list()
    for i in range(len(best_by_optimizer[optimizer])):
        values = best_by_optimizer[optimizer][i]['final_activation_overlap']
        final_activation_overlap[optimizer].append(values)
h20_t, h20_p = st.f_oneway(* final_activation_overlap.values())
print('H20: {} with p = {}'.format(
    'Reject' if (h20_p < 0.05 / 22) else 'Fail to Reject', h20_p))
df = {'optimizer': list(), 'final_activation_overlap': list()}
for k, v in final_activation_overlap.items():
    df['final_activation_overlap'] += v
    df['optimizer'] += [k] * len(v)
df = pd.DataFrame(df)
h20_tukey = str(MultiComparison(df['final_activation_overlap'], df['optimizer']).tukeyhsd(0.05 / 22).summary())
print(h20_tukey)

# H21
final_sparse_activation_overlap = dict()
for optimizer in list(set(best_by_optimizer.keys()) - {'constant'}):
    final_sparse_activation_overlap[optimizer] = list()
    for i in range(len(best_by_optimizer[optimizer])):
        values = best_by_optimizer[optimizer][i]['final_sparse_activation_overlap']
        final_sparse_activation_overlap[optimizer].append(values)
h21_t, h21_p = st.f_oneway(* final_sparse_activation_overlap.values())
print('H21: {} with p = {}'.format(
    'Reject' if (h21_p < 0.05 / 22) else 'Fail to Reject', h21_p))
df = {'optimizer': list(), 'final_sparse_activation_overlap': list()}
for k, v in final_sparse_activation_overlap.items():
    df['final_sparse_activation_overlap'] += v
    df['optimizer'] += [k] * len(v)
df = pd.DataFrame(df)
h21_tukey = str(MultiComparison(df['final_sparse_activation_overlap'], df['optimizer']).tukeyhsd(0.05 / 22).summary())
print(h21_tukey)

# H22
final_pairwise_interference = dict()
for optimizer in list(set(best_by_optimizer.keys()) - {'constant'}):
    final_pairwise_interference[optimizer] = list()
    for i in range(len(best_by_optimizer[optimizer])):
        values = best_by_optimizer[optimizer][i]['final_pairwise_interference']
        final_pairwise_interference[optimizer].append(values)
h22_t, h22_p = st.f_oneway(* final_pairwise_interference.values())
print('H22: {} with p = {}'.format(
    'Reject' if (h22_p < 0.05 / 22) else 'Fail to Reject', h22_p))
df = {'optimizer': list(), 'final_pairwise_interference': list()}
for k, v in final_pairwise_interference.items():
    df['final_pairwise_interference'] += v
    df['optimizer'] += [k] * len(v)
df = pd.DataFrame(df)
h22_tukey = str(MultiComparison(df['final_pairwise_interference'], df['optimizer']).tukeyhsd(0.05 / 22).summary())
print(h22_tukey)

<a href=#top>Back to Top</a>

#### Plotting<a name='experiment_2_mountain_car_testing_plotting'/>

In [None]:
colors = sns.color_palette(n_colors=len(best_by_optimizer_summary.keys()))
fig, axarr = plt.subplots(4, 1, sharex=True, dpi=300, figsize=(4, 10))
sort_key = lambda x: '~' if x[0] == 'constant' else x[0]
for i, key in enumerate(['accuracy', 'activation_overlap', 'sparse_activation_overlap', 'pairwise_interference']):
    for j, (optimizer, value) in enumerate(sorted(best_by_optimizer_summary.items(), key=sort_key)):
        ax = axarr[i]
        try:
            y = value['{}_mean'.format(key)]
            yerr = value['{}_stderr'.format(key)]
            x = np.arange(len(y)) + 1
            if i == 0:
                label = optimizer_nice_names[optimizer]
            else:
                label = None
            ax.plot(
                x,
                y,
                label=label,
                linewidth=1,
                color=colors[j])
            ax.fill_between(
                x,
                y - yerr,
                y + yerr,
                alpha=0.3,
                color=colors[j])
        except KeyError:
            pass
axarr[0].set_ylabel('RMSVE', labelpad=17.5)
axarr[0].set_ylim(37, 90)
axarr[0].set_yticks([40, 60, 80])
axarr[1].set_ylabel('Activation Overlap', labelpad=10)
axarr[1].set_ylim(- 0.05, 0.9)
axarr[1].set_yticks([0, 0.25, 0.5, 0.75])
axarr[2].set_ylabel('Sparse Activation Overlap', labelpad=10)
axarr[2].set_ylim(- 0.05, 0.7)
axarr[2].set_yticks([0, 0.25, 0.5])
axarr[3].set_ylabel('Pairwise Interference', labelpad=15)
axarr[3].set_ylim(0.75, 1.05)
axarr[3].set_yticks([0.8, 0.9, 1])
axarr[3].set_xlim(0, 500)
axarr[3].set_xlabel('Episode', labelpad=10)
fig.legend(loc='upper right', bbox_to_anchor=(1.33, 0.7875), frameon=False)
fig.subplots_adjust(hspace=0.0, wspace=0.1)
fig.savefig('experiment_2_mountain_car_momentum_and_rho.pdf', bbox_inches='tight')

In [None]:
values = collections.OrderedDict()
values['activation_overlap'] = {'mean': mean_activation_overlap, 'final': final_activation_overlap}
values['sparse_activation_overlap'] = {'mean': mean_sparse_activation_overlap, 'final': final_sparse_activation_overlap}
values['pairwise_interference'] = {'mean': mean_pairwise_interference, 'final': final_pairwise_interference}

text = ''
text += '|---------------------------|-----------|----------------|----------------|\n'
text += '|          Metric           | Optimizer |   Mean Value   |  Final Value   |\n'
text += '|---------------------------|-----------|----------------|----------------|\n'
for k, v in values.items():
    for i, optimizer in enumerate(sorted(v['mean'].keys())):
        text += '| {0:>25} | {1:<9} | {2:>6.4f}+-{3:<6.4f} | {4:>6.4f}+-{5:<6.4f} |\n'.format(
            k.replace('_', ' ').title()  if i == 1 else '',
            optimizer_nice_names[optimizer],
            np.mean(v['mean'][optimizer]),
            np.std(v['mean'][optimizer]), np.sqrt(len(v['mean'][optimizer])),
            np.mean(v['final'][optimizer]),
            np.std(v['final'][optimizer]), np.sqrt(len(v['final'][optimizer])))
    text += '|---------------------------|-----------|----------------|----------------|\n'
with open('experiment_2_mountain_car_metrics.txt', 'w') as outfile:
    print(text, file=outfile)
print(text)

In [None]:
data = mc_tools.load_clean_data(['experiment_2_acrobot_validation.json'])

# clean data
to_delete = list()
for i, entry in enumerate(data):
    if np.isnan(entry['accuracy'][- 1]):
        to_delete.append(i)
print('Deleting {} of {} entries'.format(len(to_delete), len(data)))
for i in reversed(to_delete):
    del data[i]

# find best
best = mc_tools.get_best(data, 'auc')
best_by_optimizer = mc_tools.get_best_by_optimizer(data, best)
best_by_optimizer_summary = mc_tools.get_best_by_optimizer_summary(data, best, best_by_optimizer)
best.head(n=10)

In [None]:
colors = sns.color_palette(n_colors=len(best_by_optimizer_summary.keys()))
fig, axarr = plt.subplots(4, 1, sharex=True, dpi=300, figsize=(4, 10))
sort_key = lambda x: '~' if x[0] == 'constant' else x[0]
for i, key in enumerate(['accuracy', 'activation_overlap', 'sparse_activation_overlap', 'pairwise_interference']):
    for j, (optimizer, value) in enumerate(sorted(best_by_optimizer_summary.items(), key=sort_key)):
        ax = axarr[i]
        try:
            y = value['{}_mean'.format(key)]
            yerr = value['{}_stderr'.format(key)]
            x = np.arange(len(y)) + 1
            if i == 0:
                label = optimizer_nice_names[optimizer]
            else:
                label = None
            ax.plot(
                x,
                y,
                label=label,
                linewidth=1,
                color=colors[j])
            ax.fill_between(
                x,
                y - yerr,
                y + yerr,
                alpha=0.3,
                color=colors[j])
        except KeyError:
            pass
axarr[0].set_ylabel('RMSVE', labelpad=17.5)
axarr[0].set_ylim(37, 90)
axarr[0].set_yticks([40, 60, 80])
axarr[1].set_ylabel('Activation Overlap', labelpad=10)
axarr[1].set_ylim(- 0.05, 0.9)
axarr[1].set_yticks([0, 0.25, 0.5, 0.75])
axarr[2].set_ylabel('Sparse Activation Overlap', labelpad=10)
axarr[2].set_ylim(- 0.05, 0.7)
axarr[2].set_yticks([0, 0.25, 0.5])
axarr[3].set_ylabel('Pairwise Interference', labelpad=15)
axarr[3].set_ylim(0.75, 1.05)
axarr[3].set_yticks([0.8, 0.9, 1])
axarr[3].set_xlim(0, 500)
axarr[3].set_xlabel('Episode', labelpad=10)
fig.legend(loc='upper right', bbox_to_anchor=(1.33, 0.7875), frameon=False)
fig.subplots_adjust(hspace=0.0, wspace=0.1)
fig.savefig('experiment_2_acrobot_momentum_and_rho.pdf', bbox_inches='tight')

<a href=#top>Back to Top</a>

## Experiment 2 Acrobot<a name='experiment_2_acrobot'/>

### Validation Phase<a name='experiment_2_acrobot_validation'/>

In [None]:
data = mc_tools.load_clean_data(['experiment_2_acrobot_validation.json'])
best = mc_tools.get_best(data, 'auc')
best_by_optimizer = mc_tools.get_best_by_optimizer(data, best)
best_by_optimizer_summary = mc_tools.get_best_by_optimizer_summary(data, best, best_by_optimizer)
best.head(n=10)

<a href=#top>Back to Top</a>