In [None]:
import os

import matplotlib.pyplot as plt
import pandas as pd

#### prep

In [None]:
fnames = os.listdir('./results/')
files = {}
probs = ['queens', 'ff', 'nn', '4pks']
steps = ['tuning', 'final']
for fname in fnames:
    if os.path.isdir(fname):
        continue
    elif 'prelim' in fname:
        continue
    fname_parts = fname.replace('.csv', '').split('_')
    alg, step, prob = fname_parts[:3]
    files.setdefault(prob, {})
    files[prob].setdefault(step, {})
    if len(fname_parts) == 4:
        files[prob][step].setdefault(alg, {})
        i = fname_parts[3]
        if 'a' in i or 'b' in i or 'c' in i:
            continue
        files[prob][step][alg][i] = fname
    else:
        files[prob][step][alg] = fname

#### Queens Tuning

In [None]:
# HC Tuning
prob = 'queens'
prob_name = 'Queens'
step = 'tuning'
alg = 'hc'
alg_name = 'Random Hill Climbing'
features = ['max_attempts', 'restart']
f = files[prob][step][alg]
path = os.path.join('./results', f)
df = pd.read_csv(path, index_col=0)
fig, ax = plt.subplots(1, ncols=2, figsize=(16,5))
fig.suptitle(f'{alg_name} {prob_name} Tuning Results', fontsize=16)
ax[0].set_ylabel('Fitness')
ax[1].set_ylabel('Time')
for i in range(2):
    ax[i].set_xlabel('Restart')
fitness = df.groupby(['max_attempts', 'restart'])['fitness'].mean().unstack().T
fitness *= -1
time = df.groupby(['max_attempts', 'restart'])['time'].mean().unstack().T
for col in fitness.columns:
    ax[0].plot(fitness[col])
    ax[1].plot(time[col], label=col)
ax[1].legend(title='Max Attempts');

In [None]:
# SA Tuning
prob = 'queens'
prob_name = 'Queens'
step = 'tuning'
alg = 'sa'
alg_name = 'Simulated Annealing'
features = ['max_attempts', 'init_temp', 'decay']
plot_files = files[prob][step][alg]
fig, ax = plt.subplots(1, ncols=len(plot_files), figsize=(16,5), sharey=True)
fig.suptitle(f'{alg_name} {prob_name} Tuning Results', fontsize=16)
ax_twins = []
max_i = len(plot_files.items())
for i, f in sorted(plot_files.items()):
    i = int(i) - 1
    path = os.path.join('./results', f)
    df = pd.read_csv(path, index_col=0)
    cols = df.columns.intersection(features)
    feature = cols[df[cols].nunique() > 1][0]
    x = sorted(df[feature].unique())
    fit_y = df.groupby(feature)['fitness'].mean().values
    fit_y *= -1
    time_y = df.groupby(feature)['time'].mean().values
    if feature == 'init_temp':
        ax[i].set_xscale('log')
    ax[i].set_xlabel(feature.replace('_', ' ').title())
    p0 = ax[i].plot(x, fit_y, color='orange')
    ax_twins.append(ax[i].twinx())
    p1 = ax_twins[i].plot(x, time_y)
    if i == 0:
        ax[i].set_ylabel('Fitness')
        ax[i].legend((p0[0], p1[0]), ('Fitness', 'Time'))
    if i != max_i-1:
        ax_twins[i].set_yticklabels([])
    else:
        ax_twins[i].set_ylabel('Time')
ax_twins[max_i-1].get_shared_y_axes().join(*[a for a in ax_twins])

In [None]:
# GA Tuning
prob = 'queens'
prob_name = 'Queens'
step = 'tuning'
alg = 'ga'
alg_name = 'Genetic Algorithm'
features = ['max_attempts', 'pop_size', 'mutation_rate', 'crossover']
plot_files = files[prob][step][alg]
fig, ax = plt.subplots(1, ncols=len(plot_files), figsize=(16,5), sharey=True)
fig.suptitle(f'{alg_name} {prob_name} Tuning Results', fontsize=16)
ax_twins = []
max_i = len(plot_files.items())
for i, f in sorted(plot_files.items()):
    i = int(i) - 1
    path = os.path.join('./results', f)
    df = pd.read_csv(path, index_col=0)
    cols = df.columns.intersection(features)
    feature = cols[df[cols].nunique() > 1][0]
    x = sorted(df[feature].unique())
    fit_y = df.groupby(feature)['fitness'].mean().values
    fit_y *= -1
    time_y = df.groupby(feature)['time'].mean().values
    if feature == 'init_temp':
        ax[i].set_xscale('log')
    ax[i].set_xlabel(feature.replace('_', ' ').title())
    p0 = ax[i].plot(x, fit_y, color='orange')
    ax_twins.append(ax[i].twinx())
    ax_twins[i].plot(x, time_y)
    if i == 0:
        ax[i].set_ylabel('Fitness')
        ax[i].legend((p0[0], p1[0]), ('Fitness', 'Time'))
    if i != max_i-1:
        ax_twins[i].set_yticklabels([])
    else:
        ax_twins[i].set_ylabel('Time')
ax_twins[max_i-1].get_shared_y_axes().join(*[a for a in ax_twins])

In [None]:
# MIMIC Tuning
prob = 'queens'
prob_name = 'Queens'
step = 'tuning'
alg = 'mimic'
alg_name = 'MIMIC'
features = ['max_attempts', 'pop_size', 'keep_pct']
plot_files = files[prob][step][alg]
fig, ax = plt.subplots(1, ncols=len(plot_files), figsize=(16,5), sharey=True)
fig.suptitle(f'{alg_name} {prob_name} Tuning Results', fontsize=16)
ax_twins = []
max_i = len(plot_files.items())
for i, f in sorted(plot_files.items()):
    i = int(i) - 1
    path = os.path.join('./results', f)
    df = pd.read_csv(path, index_col=0)
    cols = df.columns.intersection(features)
    feature = cols[df[cols].nunique() > 1][0]
    x = sorted(df[feature].unique())
    fit_y = df.groupby(feature)['fitness'].mean().values
    fit_y *= -1
    time_y = df.groupby(feature)['time'].mean().values
    if feature == 'init_temp':
        ax[i].set_xscale('log')
    ax[i].set_xlabel(feature.replace('_', ' ').title())
    p0 = ax[i].plot(x, fit_y, color='orange')
    ax_twins.append(ax[i].twinx())
    p1 = ax_twins[i].plot(x, time_y)
    if i == 0:
        ax[i].set_ylabel('Fitness')
        ax[i].legend((p0[0], p1[0]), ('Fitness', 'Time'))
    if i != max_i-1:
        ax_twins[i].set_yticklabels([])
    else:
        ax_twins[i].set_ylabel('Time')
ax_twins[max_i-1].get_shared_y_axes().join(*[a for a in ax_twins])

#### 4 Peaks Tuning

In [None]:
# HC Tuning
prob = '4pks'
prob_name = '4 Peaks'
step = 'tuning'
alg = 'hc'
alg_name = 'Random Hill Climbing'
features = ['max_attempts', 'restart']
f = files[prob][step][alg]
path = os.path.join('./results', f)
df = pd.read_csv(path, index_col=0)
fig, ax = plt.subplots(1, ncols=2, figsize=(16,5))
fig.suptitle(f'{alg_name} {prob_name} Tuning Results', fontsize=16)
ax[0].set_ylabel('Fitness')
ax[1].set_ylabel('Time')
for i in range(2):
    ax[i].set_xlabel('Restart')
fitness = df.groupby(['max_attempts', 'restart'])['fitness'].mean().unstack().T
time = df.groupby(['max_attempts', 'restart'])['time'].mean().unstack().T
for col in fitness.columns:
    ax[0].plot(fitness[col])
    ax[1].plot(time[col], label=col)
ax[1].legend(title='Max Attempts');

In [None]:
# SA Tuning
prob = '4pks'
prob_name = '4 Peaks'
step = 'tuning'
alg = 'sa'
alg_name = 'Simulated Annealing'
features = ['max_attempts', 'init_temp', 'decay']
plot_files = files[prob][step][alg]
fig, ax = plt.subplots(1, ncols=len(plot_files), figsize=(16,5), sharey=True)
fig.suptitle(f'{alg_name} {prob_name} Tuning Results', fontsize=16)
ax_twins = []
max_i = len(plot_files.items())
for i, f in sorted(plot_files.items()):
    i = int(i) - 1
    path = os.path.join('./results', f)
    df = pd.read_csv(path, index_col=0)
    cols = df.columns.intersection(features)
    feature = cols[df[cols].nunique() > 1][0]
    x = sorted(df[feature].unique())
    fit_y = df.groupby(feature)['fitness'].mean().values
    time_y = df.groupby(feature)['time'].mean().values
    if feature == 'init_temp':
        ax[i].set_xscale('log')
    ax[i].set_xlabel(feature.replace('_', ' ').title())
    p0 = ax[i].plot(x, fit_y, color='orange')
    ax_twins.append(ax[i].twinx())
    p1 = ax_twins[i].plot(x, time_y)
    if i == 0:
        ax[i].set_ylabel('Fitness')
        ax[i].legend((p0[0], p1[0]), ('Fitness', 'Time'), loc='lower right')
    if i != max_i-1:
        ax_twins[i].set_yticklabels([])
    else:
        ax_twins[i].set_ylabel('Time')
ax_twins[max_i-1].get_shared_y_axes().join(*[a for a in ax_twins])

In [None]:
# GA Tuning
prob = '4pks'
prob_name = '4 Peaks'
step = 'tuning'
alg = 'ga'
alg_name = 'Genetic Algorithm'
features = ['max_attempts', 'pop_size', 'mutation_rate', 'crossover']
plot_files = files[prob][step][alg]
fig, ax = plt.subplots(1, ncols=len(plot_files), figsize=(16,5), sharey=True)
fig.suptitle(f'{alg_name} {prob_name} Tuning Results', fontsize=16)
ax_twins = []
max_i = len(plot_files.items())
for i, f in sorted(plot_files.items()):
    i = int(i) - 1
    path = os.path.join('./results', f)
    df = pd.read_csv(path, index_col=0)
    cols = df.columns.intersection(features)
    feature = cols[df[cols].nunique() > 1][0]
    x = sorted(df[feature].unique())
    fit_y = df.groupby(feature)['fitness'].mean().values
    time_y = df.groupby(feature)['time'].mean().values
    if feature == 'init_temp':
        ax[i].set_xscale('log')
    ax[i].set_xlabel(feature.replace('_', ' ').title())
    p0 = ax[i].plot(x, fit_y, color='orange')
    ax_twins.append(ax[i].twinx())
    p1 = ax_twins[i].plot(x, time_y)
    if i == 0:
        ax[i].set_ylabel('Fitness')
        ax[i].legend((p0[0], p1[0]), ('Fitness', 'Time'))
    if i != max_i-1:
        ax_twins[i].set_yticklabels([])
    else:
        ax_twins[i].set_ylabel('Time')
ax_twins[max_i-1].get_shared_y_axes().join(*[a for a in ax_twins])

In [None]:
# MIMIC Tuning
prob = '4pks'
prob_name = '4 Peaks'
step = 'tuning'
alg = 'mimic'
alg_name = 'MIMIC'
features = ['max_attempts', 'pop_size', 'keep_pct']
plot_files = files[prob][step][alg]
fig, ax = plt.subplots(1, ncols=len(plot_files), figsize=(16,5), sharey=True)
fig.suptitle(f'{alg_name} {prob_name} Tuning Results', fontsize=16)
ax_twins = []
max_i = len(plot_files.items())
for i, f in sorted(plot_files.items()):
    i = int(i) - 1
    path = os.path.join('./results', f)
    df = pd.read_csv(path, index_col=0)
    cols = df.columns.intersection(features)
    feature = cols[df[cols].nunique() > 1][0]
    x = sorted(df[feature].unique())
    fit_y = df.groupby(feature)['fitness'].mean().values
    time_y = df.groupby(feature)['time'].mean().values
    if feature == 'init_temp':
        ax[i].set_xscale('log')
    ax[i].set_xlabel(feature.replace('_', ' ').title())
    p0 = ax[i].plot(x, fit_y, color='orange')
    ax_twins.append(ax[i].twinx())
    p1 = ax_twins[i].plot(x, time_y)
    if i == 0:
        ax[i].set_ylabel('Fitness')
        ax[i].legend((p0[0], p1[0]), ('Fitness', 'Time'))
    if i != max_i-1:
        ax_twins[i].set_yticklabels([])
    else:
        ax_twins[i].set_ylabel('Time')
ax_twins[max_i-1].get_shared_y_axes().join(*[a for a in ax_twins])

#### Flip Flop Tuning

In [None]:
# HC Tuning
prob = 'ff'
prob_name = 'Flip Flop'
step = 'tuning'
alg = 'hc'
alg_name = 'Random Hill Climbing'
features = ['max_attempts', 'restart']
f = files[prob][step][alg]
path = os.path.join('./results', f)
df = pd.read_csv(path, index_col=0)
fig, ax = plt.subplots(1, ncols=2, figsize=(16,5))
fig.suptitle(f'{alg_name} {prob_name} Tuning Results', fontsize=16)
ax[0].set_ylabel('Fitness')
ax[1].set_ylabel('Time')
for i in range(2):
    ax[i].set_xlabel('Restart')
fitness = df.groupby(['max_attempts', 'restart'])['fitness'].mean().unstack().T
time = df.groupby(['max_attempts', 'restart'])['time'].mean().unstack().T
for col in fitness.columns:
    ax[0].plot(fitness[col])
    ax[1].plot(time[col], label=col)
ax[1].legend(title='Max Attempts');

In [None]:
# SA Tuning
prob = 'ff'
prob_name = 'Flip Flop'
step = 'tuning'
alg = 'sa'
alg_name = 'Simulated Annealing'
features = ['max_attempts', 'init_temp', 'decay']
plot_files = files[prob][step][alg]
fig, ax = plt.subplots(1, ncols=len(plot_files), figsize=(16,5), sharey=True)
fig.suptitle(f'{alg_name} {prob_name} Tuning Results', fontsize=16)
ax_twins = []
max_i = len(plot_files.items())
for i, f in sorted(plot_files.items()):
    i = int(i) - 1
    path = os.path.join('./results', f)
    df = pd.read_csv(path, index_col=0)
    cols = df.columns.intersection(features)
    feature = cols[df[cols].nunique() > 1][0]
    x = sorted(df[feature].unique())
    fit_y = df.groupby(feature)['fitness'].mean().values
    time_y = df.groupby(feature)['time'].mean().values
    if feature == 'init_temp':
        ax[i].set_xscale('log')
    ax[i].set_xlabel(feature.replace('_', ' ').title())
    p0 = ax[i].plot(x, fit_y, color='orange')
    ax_twins.append(ax[i].twinx())
    p1 = ax_twins[i].plot(x, time_y)
    if i == 0:
        ax[i].set_ylabel('Fitness')
        ax[i].legend((p0[0], p1[0]), ('Fitness', 'Time'))
    if i != max_i-1:
        ax_twins[i].set_yticklabels([])
    else:
        ax_twins[i].set_ylabel('Time')
ax_twins[max_i-1].get_shared_y_axes().join(*[a for a in ax_twins])

In [None]:
# GA Tuning
prob = 'ff'
prob_name = 'Flip Flop'
step = 'tuning'
alg = 'ga'
alg_name = 'Genetic Algorithm'
features = ['max_attempts', 'pop_size', 'mutation_rate', 'crossover']
plot_files = files[prob][step][alg]
fig, ax = plt.subplots(1, ncols=len(plot_files), figsize=(16,5), sharey=True)
fig.suptitle(f'{alg_name} {prob_name} Tuning Results', fontsize=16)
ax_twins = []
max_i = len(plot_files.items())
for i, f in sorted(plot_files.items()):
    i = int(i) - 1
    path = os.path.join('./results', f)
    df = pd.read_csv(path, index_col=0)
    cols = df.columns.intersection(features)
    feature = cols[df[cols].nunique() > 1][0]
    x = sorted(df[feature].unique())
    fit_y = df.groupby(feature)['fitness'].mean().values
    time_y = df.groupby(feature)['time'].mean().values
    if feature == 'init_temp':
        ax[i].set_xscale('log')
    ax[i].set_xlabel(feature.replace('_', ' ').title())
    p0 = ax[i].plot(x, fit_y, color='orange')
    ax_twins.append(ax[i].twinx())
    p1 = ax_twins[i].plot(x, time_y)
    if i == 0:
        ax[i].set_ylabel('Fitness')
        ax[i].legend((p0[0], p1[0]), ('Fitness', 'Time'))
    if i != max_i-1:
        ax_twins[i].set_yticklabels([])
    else:
        ax_twins[i].set_ylabel('Time')
ax_twins[max_i-1].get_shared_y_axes().join(*[a for a in ax_twins])

In [None]:
# MIMIC Tuning
prob = 'ff'
prob_name = 'Flip Flop'
step = 'tuning'
alg = 'mimic'
alg_name = 'MIMIC'
features = ['max_attempts', 'pop_size', 'keep_pct']
plot_files = files[prob][step][alg]
fig, ax = plt.subplots(1, ncols=len(plot_files), figsize=(16,5), sharey=True)
fig.suptitle(f'{alg_name} {prob_name} Tuning Results', fontsize=16)
ax_twins = []
max_i = len(plot_files.items())
for i, f in sorted(plot_files.items()):
    i = int(i) - 1
    path = os.path.join('./results', f)
    df = pd.read_csv(path, index_col=0)
    cols = df.columns.intersection(features)
    feature = cols[df[cols].nunique() > 1][0]
    x = sorted(df[feature].unique())
    fit_y = df.groupby(feature)['fitness'].mean().values
    time_y = df.groupby(feature)['time'].mean().values
    if feature == 'init_temp':
        ax[i].set_xscale('log')
    ax[i].set_xlabel(feature.replace('_', ' ').title())
    p0 = ax[i].plot(x, fit_y, color='orange')
    ax_twins.append(ax[i].twinx())
    p1 = ax_twins[i].plot(x, time_y)
    if i == 0:
        ax[i].set_ylabel('Fitness')
        ax[i].legend((p0[0], p1[0]), ('Fitness', 'Time'))
    if i != max_i-1:
        ax_twins[i].set_yticklabels([])
    else:
        ax_twins[i].set_ylabel('Time')
ax_twins[max_i-1].get_shared_y_axes().join(*[a for a in ax_twins])

#### NN Tuning

In [None]:
# HC Tuning
prob = 'nn'
prob_name = 'Neural Network'
step = 'tuning'
alg = 'hc'
alg_name = 'Random Hill Climbing'
features = ['max_attempts', 'restart']
f = files[prob][step][alg]
path = os.path.join('./results', f)
df = pd.read_csv(path, index_col=0)
fig, ax = plt.subplots(1, ncols=2, figsize=(16,5))
fig.suptitle(f'{alg_name} {prob_name} Tuning Results', fontsize=16)
ax[0].set_ylabel('Fitness')
ax[1].set_ylabel('Time')
for i in range(2):
    ax[i].set_xlabel('Restart')
fitness = df.groupby(['max_attempts', 'restart'])['fitness'].mean().unstack().T
time = df.groupby(['max_attempts', 'restart'])['time'].mean().unstack().T
for col in fitness.columns:
    ax[0].plot(fitness[col])
    ax[1].plot(time[col], label=col)
ax[1].legend(title='Max Attempts');

In [None]:
# SA Tuning
prob = 'nn'
prob_name = 'Neural Network'
step = 'tuning'
alg = 'sa'
alg_name = 'Simulated Annealing'
features = ['max_attempts', 'init_temp', 'decay', 'learning_rate']
plot_files = files[prob][step][alg]
fig, ax = plt.subplots(1, ncols=len(plot_files), figsize=(16,5), sharey=True)
fig.suptitle(f'{alg_name} {prob_name} Tuning Results', fontsize=16)
ax_twins = []
max_i = len(plot_files.items())
for i, f in sorted(plot_files.items()):
    i = int(i) - 1
    path = os.path.join('./results', f)
    df = pd.read_csv(path, index_col=0)
    cols = df.columns.intersection(features)
    feature = cols[df[cols].nunique() > 1][0]
    x = sorted(df[feature].unique())
    fit_y = df.groupby(feature)['fitness'].mean().values
    time_y = df.groupby(feature)['time'].mean().values
    if feature == 'init_temp':
        ax[i].set_xscale('log')
    ax[i].set_xlabel(feature.replace('_', ' ').title())
    p0 = ax[i].plot(x, fit_y, color='orange')
    ax_twins.append(ax[i].twinx())
    p1 = ax_twins[i].plot(x, time_y)
    if i == 0:
        ax[i].set_ylabel('Fitness')
        ax[i].legend((p0[0], p1[0]), ('Fitness', 'Time'))
    if i != max_i-1:
        ax_twins[i].set_yticklabels([])
    else:
        ax_twins[i].set_ylabel('Time')
ax_twins[max_i-1].get_shared_y_axes().join(*[a for a in ax_twins])

In [None]:
# GA Tuning
prob = 'nn'
prob_name = 'Neural Network'
step = 'tuning'
alg = 'ga'
alg_name = 'Genetic Algorithm'
features = ['max_attempts', 'pop_size', 'mutation_rate', 'crossover']
plot_files = files[prob][step][alg]
fig, ax = plt.subplots(1, ncols=len(plot_files), figsize=(16,5), sharey=True)
fig.suptitle(f'{alg_name} {prob_name} Tuning Results', fontsize=16)
ax_twins = []
max_i = len(plot_files.items())
for i, f in sorted(plot_files.items()):
    i = int(i) - 1
    path = os.path.join('./results', f)
    df = pd.read_csv(path, index_col=0)
    cols = df.columns.intersection(features)
    feature = cols[df[cols].nunique() > 1][0]
    x = sorted(df[feature].unique())
    fit_y = df.groupby(feature)['fitness'].mean().values
    time_y = df.groupby(feature)['time'].mean().values
    if feature == 'init_temp':
        ax[i].set_xscale('log')
    ax[i].set_xlabel(feature.replace('_', ' ').title())
    p0 = ax[i].plot(x, fit_y, color='orange')
    ax_twins.append(ax[i].twinx())
    p1 = ax_twins[i].plot(x, time_y)
    if i == 0:
        ax[i].set_ylabel('Fitness')
        ax[i].legend((p0[0], p1[0]), ('Fitness', 'Time'), loc='upper center')
    if i != max_i-1:
        ax_twins[i].set_yticklabels([])
    else:
        ax_twins[i].set_ylabel('Time')
ax_twins[max_i-1].get_shared_y_axes().join(*[a for a in ax_twins])

#### Final Results

In [None]:
# Queens
prob = 'queens'
prob_name = 'Queens'
step = 'final'
alg_name_map = {
    'hc': 'Random Hill Climbing',
    'sa': 'Simulated Annealing',
    'ga': 'Genetic Algorithm',
    'mimic': 'MIMIC'
}
dfs = {}
for alg in alg_name_map:
    f = files[prob][step][alg]
    path = os.path.join('./results/', f)
    dfs[alg] = pd.read_csv(path, index_col=0)
fig, ax = plt.subplots(1, ncols=4, figsize=(16,5), sharey=False)
fig.suptitle(f'{prob_name} Final Results', fontsize=16)
alg_0 = list(alg_name_map.keys())[0]
prob_sizes = sorted(dfs[alg_0]['prob_size'].unique())
for i, size in enumerate(prob_sizes):
    for alg, alg_name in sorted(alg_name_map.items()):
        df = dfs[alg].query('prob_size == @size')
        max_iter = df.drop_duplicates(['time'])['iteration'].max()
        fitness = df.query('iteration <= @max_iter').groupby('iteration')['fitness'].mean()
        fitness *= -1
        x = fitness.index
        y = fitness.values
        if alg == 'hc':
            restarts = df['restarts'].unique()[0]
            x *= restarts + 1
        ax[i].plot(x, y, label=alg_name)
        ax[i].set_xscale('log')
        ax[i].set_xlabel('Iteration')
        ax[i].set_title(f'Problem Size {size}')
        if i == 0:
            ax[i].set_ylabel('Fitness')
            ax[i].legend()
        else:
            ax[i].set_yticklabels([])
ax[0].get_shared_y_axes().join(*[ax[i] for i in range(-len(ax), 0) if i != -1])
ax_twin = ax[-1].twinx()
for alg, alg_name in sorted(alg_name_map.items()):
    df = dfs[alg]
    max_iters = df.drop_duplicates(['prob_size', 'time']).groupby('prob_size')['iteration'].max()
    times = df.groupby('prob_size').apply(lambda x: x[x['iteration']==max_iters[x['prob_size'].min()]]['time'].mean())
    p0 = ax[-1].plot(times.index, times.values, label='Time', linestyle='-')
    ax[-1].set_xlabel('Problem Size')
    ax[-1].set_ylabel('Time')
    fevals = df.groupby('prob_size').apply(
        lambda x: x[x['iteration']==max_iters[x['prob_size'].min()]]['f_evals'].mean()
    )
    p1 = ax_twin.plot(fevals.index, fevals.values, linestyle='--', label='Evals')
    ax_twin.set_ylabel('Func Evals')
    if alg == alg_0:
        leg = ax[-1].legend([p0[0], p1[0]], ['Time', 'Evals'])
        [handle.set_color('black') for handle in leg.legendHandles]

In [None]:
# 4 Peaks
prob = '4pks'
prob_name = '4 Peaks'
step = 'final'
alg_name_map = {
    'hc': 'Random Hill Climbing',
    'sa': 'Simulated Annealing',
    'ga': 'Genetic Algorithm',
    'mimic': 'MIMIC'
}
dfs = {}
for alg in alg_name_map:
    f = files[prob][step][alg]
    path = os.path.join('./results/', f)
    dfs[alg] = pd.read_csv(path, index_col=0)
fig, ax = plt.subplots(1, ncols=4, figsize=(16,5), sharey=False)
fig.suptitle(f'{prob_name} Final Results', fontsize=16)
alg_0 = list(alg_name_map.keys())[0]
prob_sizes = sorted(dfs[alg_0]['prob_size'].unique())
for i, size in enumerate(prob_sizes):
    for alg, alg_name in sorted(alg_name_map.items()):
        df = dfs[alg].query('prob_size == @size')
        max_iter = df.drop_duplicates(['time'])['iteration'].max()
        fitness = df.query('iteration <= @max_iter').groupby('iteration')['fitness'].mean()
        x = fitness.index
        y = fitness.values
        if alg == 'hc':
            restarts = df['restarts'].unique()[0]
            x *= restarts + 1
        ax[i].plot(x, y, label=alg_name)
        ax[i].set_xscale('log')
        ax[i].set_xlabel('Iteration')
        ax[i].set_title(f'Problem Size {size}')
        if i == 0:
            ax[i].set_ylabel('Fitness')
            ax[i].legend()
        else:
            ax[i].set_yticklabels([])
ax[0].get_shared_y_axes().join(*[ax[i] for i in range(-len(ax), 0) if i != -1])
ax_twin = ax[-1].twinx()
for alg, alg_name in sorted(alg_name_map.items()):
    df = dfs[alg]
    max_iters = df.drop_duplicates(['prob_size', 'time']).groupby('prob_size')['iteration'].max()
    times = df.groupby('prob_size').apply(lambda x: x[x['iteration']==max_iters[x['prob_size'].min()]]['time'].mean())
    p0 = ax[-1].plot(times.index, times.values, label='Time', linestyle='-')
    ax[-1].set_xlabel('Problem Size')
    ax[-1].set_ylabel('Time')
    fevals = df.groupby('prob_size').apply(
        lambda x: x[x['iteration']==max_iters[x['prob_size'].min()]]['f_evals'].mean()
    )
    p1 = ax_twin.plot(fevals.index, fevals.values, linestyle='--', label='Evals')
    ax_twin.set_ylabel('Func Evals')
    if alg == alg_0:
        leg = ax[-1].legend([p0[0], p1[0]], ['Time', 'Evals'])
        [handle.set_color('black') for handle in leg.legendHandles]

In [None]:
# Flip Flop
prob = 'ff'
prob_name = 'Flip Flop'
step = 'final'
alg_name_map = {
    'hc': 'Random Hill Climbing',
    'sa': 'Simulated Annealing',
    'ga': 'Genetic Algorithm',
    'mimic': 'MIMIC'
}
dfs = {}
for alg in alg_name_map:
    f = files[prob][step][alg]
    path = os.path.join('./results/', f)
    dfs[alg] = pd.read_csv(path, index_col=0)
fig, ax = plt.subplots(1, ncols=4, figsize=(16,5), sharey=False)
plt.subplots_adjust(wspace=0.25)
fig.suptitle(f'{prob_name} Final Results', fontsize=16)
alg_0 = list(alg_name_map.keys())[0]
prob_sizes = sorted(dfs[alg_0]['prob_size'].unique())
for i, size in enumerate(prob_sizes):
    for alg, alg_name in sorted(alg_name_map.items()):
        df = dfs[alg].query('prob_size == @size')
        max_iter = df.drop_duplicates(['time'])['iteration'].max()
        fitness = df.query('iteration <= @max_iter').groupby('iteration')['fitness'].mean()
        x = fitness.index
        y = fitness.values
        if alg == 'hc':
            restarts = df['restarts'].unique()[0]
            x *= restarts + 1
        ax[i].plot(x, y, label=alg_name)
        ax[i].set_xscale('log')
        ax[i].set_xlabel('Iteration')
        ax[i].set_title(f'Problem Size {size}')
        if i == 0:
            ax[i].set_ylabel('Fitness')
            ax[i].legend()
        else:
            ax[i].set_yticklabels([])
ax[0].get_shared_y_axes().join(*[ax[i] for i in range(-len(ax), 0) if i != -1])
ax_twin = ax[-1].twinx()
for alg, alg_name in sorted(alg_name_map.items()):
    df = dfs[alg]
    max_iters = df.drop_duplicates(['prob_size', 'time']).groupby('prob_size')['iteration'].max()
    times = df.groupby('prob_size').apply(lambda x: x[x['iteration']==max_iters[x['prob_size'].min()]]['time'].mean())
    p0 = ax[-1].plot(times.index, times.values, label='Time', linestyle='-')
    ax[-1].set_xlabel('Problem Size')
    ax[-1].set_ylabel('Time')
    fevals = df.groupby('prob_size').apply(
        lambda x: x[x['iteration']==max_iters[x['prob_size'].min()]]['f_evals'].mean()
    )
    p1 = ax_twin.plot(fevals.index, fevals.values, linestyle='--', label='Evals')
    ax_twin.set_ylabel('Func Evals')
    if alg == alg_0:
        leg = ax[-1].legend([p0[0], p1[0]], ['Time', 'Evals'])
        [handle.set_color('black') for handle in leg.legendHandles]

In [None]:
# Neural Net
prob = 'nn'
prob_name = 'Neural Network'
step = 'final'
alg_name_map = {
    'hc': 'Random Hill Climbing',
    'sa': 'Simulated Annealing',
    'ga': 'Genetic Algorithm',
    'gd': 'Gradient Descent'
}
i = 0
dfs = {}
for alg in alg_name_map:
    f = files[prob][step][alg]
    path = os.path.join('./results/', f)
    dfs[alg] = pd.read_csv(path, index_col=0)
fig, ax = plt.subplots(1, ncols=4, figsize=(16,5), sharey=False)
plt.subplots_adjust(wspace=0.3)
fig.suptitle(f'{prob_name} Final Results', fontsize=16)
alg_0 = list(alg_name_map.keys())[0]
for alg, alg_name in sorted(alg_name_map.items()):
    df = dfs[alg]
    max_iter = df.drop_duplicates(['time'])['iteration'].max()
    fitness = df.query('iteration <= @max_iter').groupby('iteration')['fitness'].mean()
    x = fitness.index
    y = fitness.values
    if alg == 'hc':
        restarts = df['restarts'].unique()[0]
        x *= restarts + 1
    ax[i].plot(x, y, label=alg_name)
    ax[i].set_xscale('log')
    ax[i].set_xlabel('Iteration')
    if i == 0:
        ax[i].set_ylabel('Loss')
        ax[i].legend()
    else:
        ax[i].set_yticklabels([])

f = f'size_{step}_{prob}.csv'
path = os.path.join('./results/', f)
df = pd.read_csv(path, index_col=0)

i += 1
for alg, alg_name in sorted(alg_name_map.items()):
    train_acc = df.query('alg == @alg_name').groupby('pct')['train_accuracy'].mean()
    p0 = ax[i].plot(train_acc.index, train_acc.values, linestyle='-')
    val_acc = df.query('alg == @alg_name').groupby('pct')['val_accuracy'].mean()
    color = p0[0].get_c()
    p1 = ax[i].plot(val_acc.index, val_acc.values, linestyle='--', color=color)
    ax[i].set_xlabel('Percent of Data')
    ax[i].set_ylabel('Balanced Accuracy')
    if alg == alg_0:
        leg = ax[i].legend([p0[0], p1[0]], ['Train', 'Validation'])
        [handle.set_color('black') for handle in leg.legendHandles]

i += 1
for alg, alg_name in sorted(alg_name_map.items()):
    train_acc = df.query('alg == @alg_name').groupby('pct')['loss'].mean()
    p0 = ax[i].plot(train_acc.index, train_acc.values, linestyle='-')
    ax[i].set_xlabel('Percent of Data')
    ax[i].set_ylabel('Loss')

i += 1
ax_twin = ax[i].twinx()
for alg, alg_name in sorted(alg_name_map.items()):
    times = df.query('alg == @alg_name').groupby('pct')['time'].mean()
    p0 = ax[i].plot(times.index, times.values, label='Time', linestyle='-')
    ax[i].set_xlabel('Percent of Data')
    ax[i].set_ylabel('Time')
    fevals = df.query('alg == @alg_name').groupby('pct')['f_evals'].mean()
    p1 = ax_twin.plot(fevals.index, fevals.values, linestyle='--', label='Evals')
    ax_twin.set_ylabel('Func Evals')
    if alg == alg_0:
        leg = ax[i].legend([p0[0], p1[0]], ['Time', 'Evals'])
        [handle.set_color('black') for handle in leg.legendHandles]