In [48]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import Orange

In [49]:
DATA_FILE = 'bhh_vs_standalone.csv'
DATA_FILEPATH = os.path.abspath(os.path.join(os.getcwd(), DATA_FILE))
print(DATA_FILEPATH)
DATASETS = [
	'iris',
	'fish_toxicity',
	'abalone',
	'air_quality',
	'housing',
	'wine_quality',
	'car',
	'parkinsons',
	'forest_fires',
	'bank',
	'bike',
	'student_performance',
	'adult',
	'mushroom',
	'diabetic'
]
CATEGORICAL_DATASETS = [
	'iris',
	'abalone',
	'wine_quality',
	'car',
	'bank',
	'adult',
	'mushroom',
	'diabetic'
]
HEURISTICS = [
	'adadelta',
	'adagrad',
	'adam',
	'bhh_gd_only',
	'bhh',
	'de',
	'ga',
	'momentum',
	'nag',
	'pso',
	'rmsprop',
	'sgd'
]
COLUMNS = [
	'id',
	'dataset',
	'heuristic_type',
	'heuristic',
	'is_baseline',
	'population',
	'burn_in',
	'replay',
	'reselection',
	'reanalysis',
	'normalisation',
	'credit',
	'discounted_rewards',
	'run',
	'step',
	'train_loss',
	'train_accuracy',
	'test_loss',
	'test_accuracy',
	'rank'
]
DTYPE = {
	'id': 'object',
	'dataset': 'object',
	'heuristic_type': 'object',
	'heuristic': 'object',
	'is_baseline': 'boolean',
	'population': 'Int32',
	'burn_in': 'Int32',
	'replay': 'Int32',
	'reselection': 'Int32',
	'reanalysis': 'Int32',
	'normalisation': 'boolean',
	'credit': 'object',
	'discounted_rewards': 'boolean',
	'run': 'Int32',
	'step': 'Int32',
	'train_loss': 'float32',
	'train_accuracy': 'float32',
	'test_loss': 'float32',
	'test_accuracy': 'float32',
	'rank': 'float32',
}

/Users/arneschreuder/Development/personal/masters.ai/analysis/bhh_vs_standalone/bhh_vs_standalone.csv


## Load data

In [50]:
data = pd.read_csv(DATA_FILEPATH, names=COLUMNS, dtype=DTYPE, skiprows=1)

## Plot Critical Difference

### For all datasets

In [51]:
for d, dataset in enumerate(DATASETS):
	query = '(dataset == "{}") and (step == 20)'.format(dataset)
	subset = data.query(query)

	pivot = pd.pivot_table(subset, values='rank',
                    columns=['heuristic'], aggfunc=np.mean, fill_value=0)

	names = [key for key, value in pivot.items()]
	avg_ranks = [pivot[key]['rank'] for key, value in pivot.items()]
	cd = Orange.evaluation.compute_CD(avg_ranks, 10) #tested on 30 datasets
	Orange.evaluation.graph_ranks(avg_ranks, names, cd=cd, width=10, textspace=1.5)
	plt.title('BHH vs Standalone - Critical Difference between Heuristics - {}'.format(dataset))
	plt.savefig("figures/cd/{}.png".format(dataset), transparent=True)
	# plt.show()
	plt.close()


### Overall

In [52]:
query = '(step == 20)'
subset = data.query(query)

pivot = pd.pivot_table(subset, values='rank',
									columns=['heuristic'], aggfunc=np.mean, fill_value=0)

names = [key for key, value in pivot.items()]
avg_ranks = [pivot[key]['rank'] for key, value in pivot.items()]
cd = Orange.evaluation.compute_CD(avg_ranks, 10) #tested on 30 datasets
Orange.evaluation.graph_ranks(avg_ranks, names, cd=cd, width=10, textspace=1.5)
plt.title('BHH vs Standalone - Critical Difference between Heuristics - Overall')
plt.savefig("figures/cd/overall.png", transparent=True)
# plt.show()
plt.close()

## Export Loss Figures

### Train

In [53]:
sns.set_context('paper', font_scale=2.5, rc={'lines.linewidth': 2})
colors = sns.color_palette('hls', 12)
palette = {key:value for key,value in zip(HEURISTICS, colors)}

# Special colors for BHH
palette['bhh'] = '#000'
palette['bhh_gd_only'] = '#777'

markers = {
	'adadelta': 'X',
	'adagrad': 'X',
	'adam': 'X',
	'bhh_gd_only': '*',
	'bhh': '*',
	'de': 'd',
	'ga': 'd',
	'momentum': 'o',
	'nag': 'o',
	'pso': 'd',
	'rmsprop': 'X',
	'sgd': 'o',
}

for d, dataset in enumerate(DATASETS):
	query = 'dataset == "{}"'.format(dataset)
	subset = data.query(query)

	fig, ax = plt.subplots(figsize=(20,12))
	fig.suptitle('BHH vs Standalone - Train Loss - {}'.format(dataset))

	plot = sns.lineplot(data=subset, x='step', y='train_loss', hue='heuristic', style='heuristic', markers=markers, dashes=False, ax=ax, markersize=10, palette=palette)
	plot.set_xlabel("Steps")
	plot.set_ylabel("Loss")
	plot.spines['left'].set_position('zero')
	# plot.spines['bottom'].set_position('zero')
	# plot.grid(False)

	handles, labels = plot.get_legend_handles_labels()
	labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))
	plot.legend(handles, labels, loc='upper right', title="Heuristic")

	plt.xticks(np.arange(21))
	fig.tight_layout()

	fig.savefig("figures/train/loss/{}.png".format(dataset), transparent=True)
	# fig.show()
	plt.close()
	# break

### Test

In [54]:
sns.set_context('paper', font_scale=2.5, rc={'lines.linewidth': 2})
colors = sns.color_palette('hls', 12)
palette = {key:value for key,value in zip(HEURISTICS, colors)}

markers = {
	'adadelta': 'X',
	'adagrad': 'X',
	'adam': 'X',
	'bhh_gd_only': '*',
	'bhh': '*',
	'de': 'd',
	'ga': 'd',
	'momentum': 'o',
	'nag': 'o',
	'pso': 'd',
	'rmsprop': 'X',
	'sgd': 'o',
}

for d, dataset in enumerate(DATASETS):
	query = 'dataset == "{}"'.format(dataset)
	subset = data.query(query)

	fig, ax = plt.subplots(figsize=(20,12))
	fig.suptitle('BHH vs Standalone - Test Loss - {}'.format(dataset))

	plot = sns.lineplot(data=subset, x='step', y='test_loss', hue='heuristic', style='heuristic', markers=markers, dashes=False, ax=ax, markersize=10, palette=palette)
	plot.set_xlabel("Steps")
	plot.set_ylabel("Loss")
	plot.spines['left'].set_position('zero')
	# plot.spines['bottom'].set_position('zero')
	# plot.grid(False)

	handles, labels = plot.get_legend_handles_labels()
	labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))
	plot.legend(handles, labels, loc='upper right', title="Heuristic")

	plt.xticks(np.arange(21))
	fig.tight_layout()

	fig.savefig("figures/test/loss/{}.png".format(dataset), transparent=True)
	# fig.show()
	plt.close()
	# break

## Export Accuracy Figures

### Train

In [55]:
sns.set_context('paper', font_scale=2.5, rc={'lines.linewidth': 2})
colors = sns.color_palette('hls', 12)
palette = {key:value for key,value in zip(HEURISTICS, colors)}

markers = {
	'adadelta': 'X',
	'adagrad': 'X',
	'adam': 'X',
	'bhh_gd_only': '*',
	'bhh': '*',
	'de': 'd',
	'ga': 'd',
	'momentum': 'o',
	'nag': 'o',
	'pso': 'd',
	'rmsprop': 'X',
	'sgd': 'o',
}

for d, dataset in enumerate(CATEGORICAL_DATASETS):
	query = 'dataset == "{}"'.format(dataset)
	subset = data.query(query)

	fig, ax = plt.subplots(figsize=(20,12))
	fig.suptitle('BHH vs Standalone - Train Accuracy - {}'.format(dataset))

	plot = sns.lineplot(data=subset, x='step', y='train_accuracy', hue='heuristic', style='heuristic', markers=markers, dashes=False, ax=ax, markersize=10, palette=palette)
	plot.set_xlabel("Steps")
	plot.set_ylabel("Accuracy")
	plot.spines['left'].set_position('zero')
	# plot.spines['bottom'].set_position('zero')
	# plot.grid(False)

	handles, labels = plot.get_legend_handles_labels()
	labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))

	if dataset == 'abalone':
		plot.legend(handles, labels, loc='upper right', title="Heuristic")
	else:
		plot.legend(handles, labels, loc='lower right', title="Heuristic")

	plt.yticks(np.arange(0.0, 1.1, 0.1))
	plt.xticks(np.arange(21))
	fig.tight_layout()

	fig.savefig("figures/train/accuracy/{}.png".format(dataset), transparent=True)
	# fig.show()
	plt.close()
	# break

### Test

In [56]:
sns.set_context('paper', font_scale=2.5, rc={'lines.linewidth': 2})
colors = sns.color_palette('hls', 12)
palette = {key:value for key,value in zip(HEURISTICS, colors)}

markers = {
	'adadelta': 'X',
	'adagrad': 'X',
	'adam': 'X',
	'bhh_gd_only': '*',
	'bhh': '*',
	'de': 'd',
	'ga': 'd',
	'momentum': 'o',
	'nag': 'o',
	'pso': 'd',
	'rmsprop': 'X',
	'sgd': 'o',
}

for d, dataset in enumerate(CATEGORICAL_DATASETS):
	query = 'dataset == "{}"'.format(dataset)
	subset = data.query(query)

	fig, ax = plt.subplots(figsize=(20,12))
	fig.suptitle('BHH vs Standalone - Test Accuracy - {}'.format(dataset))

	plot = sns.lineplot(data=subset, x='step', y='test_accuracy', hue='heuristic', style='heuristic', markers=markers, dashes=False, ax=ax, markersize=10, palette=palette)
	plot.set_xlabel("Steps")
	plot.set_ylabel("Accuracy")
	plot.spines['left'].set_position('zero')
	# plot.spines['bottom'].set_position('zero')
	# plot.grid(False)

	handles, labels = plot.get_legend_handles_labels()
	labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))
	
	if dataset == 'abalone':
		plot.legend(handles, labels, loc='upper right', title="Heuristic")
	else:
		plot.legend(handles, labels, loc='lower right', title="Heuristic")

	plt.yticks(np.arange(0.0, 1.1, 0.1))
	plt.xticks(np.arange(21))
	fig.tight_layout()

	fig.savefig("figures/test/accuracy/{}.png".format(dataset), transparent=True)
	# fig.show()
	plt.close()
	# break