In [None]:
import pandas as pd
import numpy as np
from benchmarks import *
import glob
import os, sys
import seaborn as sns
import matplotlib.pyplot as plt
import re
import time
from tqdm.notebook import tqdm
from itertools import product
import matplotlib.colors as mcolors

In [None]:
MACHINE = 'lassen' if 'lassen' in ROOT_DIR else 'ruby'
print(MACHINE, ROOT_DIR)
prognames = list(progs.keys())
probsizes = ['smlprob', 'medprob', 'lrgprob']

seeds = [1337, 3827, 9999, 4873]

hypers = {
	'cma':(('popsize',), ('seed', 'sigma')),
	'pso':(('popsize', 'w'), ('seed', 'c1', 'c2')),
	'bo-ucb':(('seed',), ('kappa',)),
	'bo-ei': (('seed',), ('xi',)),
	'bo-poi': (('seed',), ('xi',))
}

#goMethods = list(hypers.keys())
goMethods = ['pso', 'cma', 'bo']
print(goMethods)

In [None]:
dbFile = f'{MACHINE}-fullExploreDataset.csv'
xtimeDB = pd.read_csv(ROOT_DIR+'/databases/'+dbFile)

globalOptimals = xtimeDB.groupby(['progname', 'probsize'])['xtime'].min().reset_index()

print(globalOptimals)

numthreads = 56 if MACHINE in 'ruby' else 80
globalBaselines = xtimeDB.loc[(xtimeDB['OMP_NUM_THREADS'] == numthreads) 
										& (xtimeDB['OMP_PROC_BIND'] == 'close')
										& (xtimeDB['OMP_PLACES'] == 'cores')
										& (xtimeDB['OMP_SCHEDULE'] == 'static'),['progname', 'probsize', 'xtime']]

probsizeMap = {'smlprob': 'Small Problem', 'medprob': 'Medium Problem', 'lrgprob': 'Large Problem'}
prognameMap = {'bt_nas': 'BT', 'ft_nas': 'FT', 'hpcg': 'HPCG', 'lulesh':'Lulesh'}

In [None]:
overallDF = pd.DataFrame()
tojoin = []
for progname in prognames:
	if progname == 'cg_nas' or progname == 'cfd_rodinia':
		continue
	for method in goMethods:
		# read the pre-processed dataframes
		filename = ROOT_DIR+'/databases/'+f'{MACHINE}-{progname}-{method}-GO_Data-rawXtimes.csv'
		fullDF = pd.read_csv(filename)
		tojoin += [fullDF]

overallDF = pd.concat(tojoin, ignore_index=True, sort=True)
overallDF = overallDF.drop(['optimXtime', 'kappa_decay', 'kappa_decay_delay'], axis=1)

print(overallDF.columns)
overallDF.loc[overallDF['method'] == 'bo', 'method'] = overallDF[overallDF['method'] == 'bo'].apply(lambda x: x['method']+'-'+x['utilFnct'], axis=1)
overallDF = overallDF.drop(['utilFnct'], axis=1)

for col in overallDF:
	if col == 'xtime' or col == 'globalSample' or col == 'optimXtime':
		continue
	print(col, overallDF[col].unique())

In [None]:
def makeGOHyperHeatmapPlots(df, method):

	# we're going to make one plot for each GO method
	# want to show programs in rows, probsize in columns
	# the heatmap for each will show the hyperparameters on the two axes
	
	# we're going to generate 4 heatmaps showing the following values:
	# 1) the earliest step a better-than-baseline configuration found
	# 2) the speedup of the best configuration found after 100 steps
	# 3) the speedup of the best configuration found after 200 steps
	# 4) the speedup of the best configuration found after 300 steps
	
	# get the subset we're interested in
	df = df[df['method'] == method].reset_index(drop=True)
	
	#print(df.columns)
	
	xaxis, yaxis = hypers[method]
	#print(xaxis, yaxis)
	
	axisCols = list(xaxis+yaxis)
	axisCols.remove('seed')
	colsToKeep = ['progname', 'probsize', 'seed', 'globalSample']+axisCols
	colsToDrop = list(df.columns)
	
	[colsToDrop.remove(col) for col in colsToKeep]
	colsToDrop.remove('xtime')
	
	#print('dropping')
	#print(colsToDrop)
	
	# get rid of unused columns
	df = df.drop(colsToDrop, axis=1)
	
	# presort the DF
	df = df.sort_values(by=colsToKeep, ignore_index=True)
	
	# pick one combination to make column tuples with
	subset = df[(df['progname'] == prognames[0]) & (df['probsize'] == probsizes[0]) & (df['seed'] == seeds[0]) & (df['globalSample'] == 0)]
	#print('subset cols', subset.columns)
	#print('subset shape',subset.shape)
	
	
	
	# need to make tuples of the columns, and stringify them
	if len(xaxis) > 1:
		xtuples = list(product(*[list(sorted(subset[col].unique())) for col in xaxis]))
		#tuples = list(set([ str(a) for a in list(zip( *[list(subset[col]) for col in xaxis] ))]))
		#print(len(xtuples), xtuples)
	
		# tupleify the columns and drop them
		newXColName = f'({",".join(xaxis)})'
		df[newXColName] = list(zip(*[df[col] for col in xaxis]))
		df = df.drop(list(xaxis), axis=1)
	else:
		xtuples = list(sorted(subset[xaxis[0]].unique()))
		newXColName = xaxis[0]
		#print(xtuples)
	
	if len(yaxis) > 1:
		ytuples = list(product(*[list(sorted(subset[col].unique())) for col in yaxis]))
		#tuples = list(set([ str(a) for a in list(zip( *[list(subset[col]) for col in xaxis] ))]))
		#print(len(ytuples), ytuples)
	
		# tupleify the columns and drop them
		newYColName = f'({",".join(yaxis)})'
		df[newYColName] = list(zip(*[df[col] for col in yaxis]))
		df = df.drop(list(yaxis), axis=1)
	else:
		ytuples = list(sorted(subset[yaxis[0]].unique()))
		newYColName = yaxis[0]
		#print(ytuples)
	
	#print('new columns')
	#print(df.columns)
	
	#print('uniques')
	## for each column, print the number of unique values
	#for col in list(df.columns):
	#	if col != 'xtime':
	#		print(col, len(list(df[col].unique())))
	
	# find the min xtime found up to each globalSample
	df['cummin'] = df.groupby([newXColName, newYColName, 'probsize', 'progname'])['xtime'].transform('cummin')
	
	# let's make a new dataframe column for each plot type we want to make
	# 1) earliest better-than-baseline config found for each GO hyperparam combination
	
	# rescale the xtime to be baseline-normalized
	#print('pre-reset index')
	#print(df.shape, df.head())
	df = df.set_index(['progname', 'probsize', newXColName, newYColName])
	baselines = globalBaselines.set_index(['progname', 'probsize'])
	#print('post- reset index')
	#print(df.shape, df.head())
	
	df['baselineXtime'] = 1/df['cummin'].div(baselines.reindex(df.index)['xtime'], axis=0)
	
	#print('rescaled')
	#print(df.shape, df.head())
	
	earliestSamples = pd.DataFrame(index=df.index.copy())
	#earliestSamples = df.copy(deep=True)
	#print('super init earliest sampels')
	#print(earliestSamples.shape, earliestSamples)
	earliestSamples = earliestSamples[~earliestSamples.index.duplicated(keep='first')]
	#earliestSamples = earliestSamples.groupby(earliestSamples.index).first()
	
	# set it to the latest possible value
	earliestSamples['firstSample'] = 301
	
	#print('init early samples')
	#print(earliestSamples.shape, earliestSamples)
	
	#test = df.loc[df.baselineXtime >= 0.1, 'globalSample'].min()
	test = df.loc[df.baselineXtime > 1.0].groupby(level=[0,1,2,3])['globalSample'].min()
	#print('found earliest')
	#print(test.shape, test)
	
	# now find the earliest globalSample that is >= 1.0
	# update only a couple elements
	earliestSamples['firstSample'].update(test)
	#print('earliest samples')
	#print(earliestSamples.shape, earliestSamples)
	
	# now reset the index
	earliestSamples = earliestSamples.reset_index()
	
	#print('reset index')
	#print(earliestSamples.shape, earliestSamples)
	
	
	def drawHeatmap(*args, **kwargs):
		data = kwargs.pop('data').copy(deep=True)
		if 'probsize' in list(data.columns):
			data = data.drop(['probsize'], axis=1)
		if 'progname' in list(data.columns):
			data = data.drop(['progname'], axis=1)

		data = data.pivot(index=newYColName, columns=newXColName, values='firstSample')

		if method == 'pso':
			sns.heatmap(data, annot_kws={'fontsize':6}, **kwargs)
		else:
			sns.heatmap(data, **kwargs)

		return
	
	# let's first make the tuples of columns
	g = sns.FacetGrid(earliestSamples, row='progname', col='probsize', col_order=probsizes, palette='flare', height=15, aspect=1.5)
	g.map_dataframe(drawHeatmap, annot=True, vmin=0, vmax=300, cbar=True, xticklabels=True, yticklabels=True, fmt='.3g')
	
	for ax in g.axes.flatten():
		ax.tick_params(axis='x', labelbottom=True, labelrotation=90)
	
	plt.tight_layout()
	
	g.fig.subplots_adjust(top=0.96)
	g.fig.suptitle(f'GO Hyperparam Exploration ({method.upper()}) -- Samples till >= baseline xtime')
	
	plt.show()

	# now let's make the plot for where the best is by step 100
	
	# let's make the plots showing what happens at each step 0, 50, 100, 150, 200, 250, 299
	# want the best speedup witnessed up until each point

	samps = pd.DataFrame(index=df.index.copy())
	samps = samps[~samps.index.duplicated(keep='first')]

	# update the dataframe with the desired columns
	for i in range(0,7):
		steps = 50*i
		steps = 299 if steps == 300 else steps
		colName = f'step{steps}'
		samps[colName] = 0
		test = df.loc[df.globalSample == steps].groupby(level=[0,1,2,3])['baselineXtime'].max()
		samps[colName].update(test)

	samps = samps.reset_index()

	def drawHeatmap(*args, **kwargs):
		data = kwargs.pop('data').copy(deep=True)
		targetCol = kwargs.pop('step')
		if 'probsize' in list(data.columns):
			probsize = data.iloc[0]['probsize']
			data = data.drop(['probsize'], axis=1)
		if 'progname' in list(data.columns):
			progname = data.iloc[0]['progname']
			data = data.drop(['progname'], axis=1)

		optimalXtime = globalOptimals[(globalOptimals.progname == progname) & (globalOptimals.probsize == probsize)]['xtime'].iat[0]
		baselineXtime = globalBaselines[(globalBaselines.progname == progname) & (globalBaselines.probsize == probsize)]['xtime'].iat[0]

		vmax = baselineXtime/optimalXtime

		#print(f'working on {probsize} {progname} vmax {vmax}')

		data = data.pivot(index=newYColName, columns=newXColName, values=targetCol)

		if method == 'pso':
			sns.heatmap(data, vmax=vmax, norm=mcolors.TwoSlopeNorm(vcenter=1.0), cmap='seismic', annot_kws={'fontsize':6}, **kwargs)
		else:
			sns.heatmap(data, vmax=vmax, norm=mcolors.TwoSlopeNorm(vcenter=1.0), cmap='seismic', **kwargs)

		return


	# let's first make the tuples of columns
	for i in range(0,7):
		step = 50*i
		step = 299 if step == 300 else step

		g = sns.FacetGrid(samps, row='progname', col='probsize', col_order=probsizes, palette='flare', height=15, aspect=1.5)
		colName = f'step{step}'
		g.map_dataframe(drawHeatmap, step=colName, annot=True, vmin=0.0, cbar=True, xticklabels=True, yticklabels=True)

		for ax in g.axes.flatten():
			ax.tick_params(axis='x', labelbottom=True, labelrotation=90)

		plt.tight_layout()

		g.fig.subplots_adjust(top=0.96)
		g.fig.suptitle(f'GO Hyperparam Exploration ({method.upper()}) -- Best Speedup at {step} Samples')

		plt.show()
	return

In [None]:
#makeGOHyperHeatmapPlots(overallDF, 'pso')
#makeGOHyperHeatmapPlots(overallDF, 'cma')
#makeGOHyperHeatmapPlots(overallDF, 'bo-ucb')
#makeGOHyperHeatmapPlots(overallDF, 'bo-ei')
#makeGOHyperHeatmapPlots(overallDF, 'bo-poi')

In [None]:
# we assume that the DF passed in here has the same method, progname and probsize
def preprepareDF(df, method,  progname, probsize):
	xaxis, yaxis = hypers[method]
	
	axisCols = list(xaxis+yaxis)
	axisCols.remove('seed')
	colsToKeep = ['seed', 'globalSample']+axisCols
	colsToDrop = list(df.columns)
	
	[colsToDrop.remove(col) for col in colsToKeep]
	colsToDrop.remove('xtime')
	
	# get rid of unused columns
	df = df.drop(colsToDrop, axis=1)
	
	# presort the DF
	df = df.sort_values(by=colsToKeep, ignore_index=True)

	#print('keeping cols')
	#print(colsToKeep)

	#print('df now')
	#print(df.columns, '\n', df.head())
	
	# pick one combination to make column tuples with
	subset = df[(df['seed'] == seeds[0]) & (df['globalSample'] == 0)]
	
	# need to make tuples of the columns, and stringify them
	if len(xaxis) > 1:
		xtuples = list(product(*[list(sorted(subset[col].unique())) for col in xaxis]))
	
		# tupleify the columns and drop them
		newXColName = f'({",".join(xaxis)})'
		df[newXColName] = list(zip(*[df[col] for col in xaxis]))
		df = df.drop(list(xaxis), axis=1)
	else:
		xtuples = list(sorted(subset[xaxis[0]].unique()))
		newXColName = xaxis[0]
	
	if len(yaxis) > 1:
		ytuples = list(product(*[list(sorted(subset[col].unique())) for col in yaxis]))
	
		# tupleify the columns and drop them
		newYColName = f'({",".join(yaxis)})'
		df[newYColName] = list(zip(*[df[col] for col in yaxis]))
		df = df.drop(list(yaxis), axis=1)
	else:
		ytuples = list(sorted(subset[yaxis[0]].unique()))
		newYColName = yaxis[0]
	
	# find the min xtime found up to each globalSample
	df['cummin'] = df.groupby([newXColName, newYColName])['xtime'].transform('cummin')
	
	# rescale the xtime to be baseline-normalized
	df = df.set_index([newXColName, newYColName])

	baselineXtime = globalBaselines[(globalBaselines.progname == progname) & (globalBaselines.probsize == probsize)]['xtime'].iat[0]

	#print('got baseline xtime', baselineXtime)

	df['baselineXtime'] = baselineXtime/df['cummin']

	#print('new new df')
	#print(df.columns, '\n', df.head())

	return df, newXColName, newYColName

def drawEarlySamplesHeatmap(*args, **kwargs):
	df = kwargs.pop('data').copy(deep=True)
	method = df['method'].iat[0]
	probsize = df['probsize'].iat[0]
	progname = df['progname'].iat[0]

	#print(f'working on {method}, {probsize}, {progname}')

	df, newXColName, newYColName = preprepareDF(df, method, progname, probsize)

	earliestSamples = pd.DataFrame(index=df.index.copy())
	earliestSamples = earliestSamples[~earliestSamples.index.duplicated(keep='first')]
	earliestSamples['firstSample'] = 301
	test = df.loc[df.baselineXtime > 1.0].groupby(level=[0,1])['globalSample'].min()
	earliestSamples['firstSample'].update(test)
	earliestSamples = earliestSamples.reset_index()

	df = earliestSamples
	df = df.pivot(index=newYColName, columns=newXColName, values='firstSample')

	#print('after pivot')
	#print(df.columns, '\n', df.head())

	if method == 'pso':
		sns.heatmap(df, annot_kws={'fontsize':6}, **kwargs)
	else:
		sns.heatmap(df, **kwargs)

	return

def drawSpeedupHeatmap(*args, **kwargs):
	df = kwargs.pop('data').copy(deep=True)
	step = kwargs.pop('step')

	method = df['method'].iat[0]
	probsize = df['probsize'].iat[0]
	progname = df['progname'].iat[0]

	df, newXColName, newYColName = preprepareDF(df, method, progname, probsize)

	samps = pd.DataFrame(index=df.index.copy())
	samps = samps[~samps.index.duplicated(keep='first')]

	colName = f'step{step}'
	samps[colName] = 0
	test = df.loc[df.globalSample == step].groupby(level=[0,1])['baselineXtime'].max()
	samps[colName].update(test)
	samps = samps.reset_index()

	optimalXtime = globalOptimals[(globalOptimals.progname == progname) & (globalOptimals.probsize == probsize)]['xtime'].iat[0]
	baselineXtime = globalBaselines[(globalBaselines.progname == progname) & (globalBaselines.probsize == probsize)]['xtime'].iat[0]

	vmax = baselineXtime/optimalXtime

	#print(f'working on {probsize} {progname} vmax {vmax}', end="\t")

	samps = samps.pivot(index=newYColName, columns=newXColName, values=colName)

	if method == 'pso':
		sns.heatmap(samps, norm=mcolors.TwoSlopeNorm(vcenter=1.0, vmin=0.0, vmax=vmax), cmap='seismic', annot_kws={'fontsize':6}, **kwargs)
	else:
		sns.heatmap(samps, norm=mcolors.TwoSlopeNorm(vcenter=1.0, vmin=0.0, vmax=vmax), cmap='seismic', **kwargs)

	#print('done!')
	
	return

def makeGOHyperHeatmapPlots(df, progname):

	# get the subset we're interested in
	df = df[df['progname'] == progname].reset_index(drop=True)

	#print('raw data')
	#print(df.columns, df.head())
	
	# for each method we'll need to figure out the axes
	#for method in list(hypers.keys()):
	
	# let's first make the tuples of columns
	g = sns.FacetGrid(df, row='method', col='probsize', col_order=probsizes, height=15, aspect=1.5, sharex=False, sharey=False)
	g.map_dataframe(drawEarlySamplesHeatmap, annot=True, vmin=0, vmax=300, cbar=True, xticklabels=True, yticklabels=True, fmt='.3g')
	
	for ax in g.axes.flatten():
		ax.tick_params(axis='x', labelbottom=True, labelrotation=90)
	
	#plt.tight_layout()
	g.set_titles(col_template="Problem Size: {col_name}", row_template="GO Method: {row_name}")
	
	g.fig.subplots_adjust(top=0.96)
	g.fig.suptitle(f'GO Hyperparam Exploration ({progname.upper()}) -- Samples till > baseline xtime')
	
	plt.show()

	# let's make the plots showing what happens at each step 0, 100, 200, 299
	# want the best speedup witnessed up until each point

	# let's first make the tuples of columns
	for i in range(1,4):
		step = 100*i
		step = 299 if step == 300 else step

		g = sns.FacetGrid(df, row='method', col='probsize', col_order=probsizes, height=15, aspect=1.5, sharex=False, sharey=False)
		g.map_dataframe(drawSpeedupHeatmap, step=step, annot=True, vmin=0.0, fmt='.3f', cbar=True, xticklabels=True, yticklabels=True)

		for ax in g.axes.flatten():
			ax.tick_params(axis='x', labelbottom=True, labelrotation=90)

		g.set_titles(col_template="Problem Size: {col_name}", row_template="GO Method: {row_name}")
		#plt.tight_layout()

		g.fig.subplots_adjust(top=0.96)
		g.fig.suptitle(f'GO Hyperparam Exploration ({progname.upper()}) -- Best Speedup after {step} samples')

		plt.show()
	return

In [None]:
# let's make the plots showing what happens at each step 0, 50, 100, 150, 200, 250, 300
# want the best speedup witnessed up until each point

makeGOHyperHeatmapPlots(overallDF, 'bt_nas')

In [None]:

makeGOHyperHeatmapPlots(overallDF, 'ft_nas')

In [None]:

makeGOHyperHeatmapPlots(overallDF, 'hpcg')

In [None]:

makeGOHyperHeatmapPlots(overallDF, 'lulesh')

In [None]:
# we're going to make one plot for each GO method
# want to show programs in rows, probsize in columns
# the heatmap for each will show the hyperparameters on the two axes

# we're going to generate 4 heatmaps showing the following values:
# 1) the earliest step a better-than-baseline configuration found
# 2) the speedup of the best configuration found after 100 steps
# 3) the speedup of the best configuration found after 200 steps
# 4) the speedup of the best configuration found after 300 steps

# get the subset we're interested in
#method = 'cma'
#df = overallDF[overallDF['method'] == method].reset_index(drop=True)
#
#print(df.columns)
#
#xaxis, yaxis = hypers[method]
#print(xaxis, yaxis)
#
#axisCols = list(xaxis+yaxis)
#axisCols.remove('seed')
#colsToKeep = ['progname', 'probsize', 'seed', 'globalSample']+axisCols
#colsToDrop = list(df.columns)
#
#[colsToDrop.remove(col) for col in colsToKeep]
#colsToDrop.remove('xtime')
#
#print('dropping')
#print(colsToDrop)
#
## get rid of unused columns
#df = df.drop(colsToDrop, axis=1)
#
## presort the DF
#df = df.sort_values(by=colsToKeep, ignore_index=True)
#
## pick one combination to make column tuples with
#subset = df[(df['progname'] == prognames[0]) & (df['probsize'] == probsizes[0]) & (df['seed'] == seeds[0]) & (df['globalSample'] == 0)]
#print('subset cols', subset.columns)
#print('subset shape',subset.shape)
#
## need to make tuples of the columns, and stringify them
#if len(xaxis) > 1:
#	xtuples = list(product(*[list(sorted(subset[col].unique())) for col in xaxis]))
#	#tuples = list(set([ str(a) for a in list(zip( *[list(subset[col]) for col in xaxis] ))]))
#	print(len(xtuples), xtuples)
#
#	# tupleify the columns and drop them
#	newXColName = f'({",".join(xaxis)})'
#	df[newXColName] = list(zip(*[df[col] for col in xaxis]))
#	df = df.drop(list(xaxis), axis=1)
#else:
#	xtuples = list(sorted(subset[xaxis[0]].unique()))
#	newXColName = xaxis[0]
#	print(xtuples)
#
#if len(yaxis) > 1:
#	ytuples = list(product(*[list(sorted(subset[col].unique())) for col in yaxis]))
#	#tuples = list(set([ str(a) for a in list(zip( *[list(subset[col]) for col in xaxis] ))]))
#	print(len(ytuples), ytuples)
#
#	# tupleify the columns and drop them
#	newYColName = f'({",".join(yaxis)})'
#	df[newYColName] = list(zip(*[df[col] for col in yaxis]))
#	df = df.drop(list(yaxis), axis=1)
#else:
#	ytuples = list(sorted(subset[yaxis[0]].unique()))
#	newYColName = yaxis[0]
#	print(ytuples)
#
#print('new columns')
#print(df.columns)
#
##print('uniques')
### for each column, print the number of unique values
##for col in list(df.columns):
##	if col != 'xtime':
##		print(col, len(list(df[col].unique())))
#
## find the min xtime found up to each globalSample
#df['cummin'] = df.groupby([newXColName, newYColName, 'probsize', 'progname'])['xtime'].transform('cummin')
#
## let's make a new dataframe column for each plot type we want to make
## 1) earliest better-than-baseline config found for each GO hyperparam combination
#
## rescale the xtime to be baseline-normalized
#print('pre-reset index')
#print(df.shape, df.head())
#df = df.set_index(['progname', 'probsize', newXColName, newYColName])
#baselines = globalBaselines.set_index(['progname', 'probsize'])
#print('post- reset index')
#print(df.shape, df.head())
#
#df['baselineXtime'] = 1/df['cummin'].div(baselines.reindex(df.index)['xtime'], axis=0)
#
#print('rescaled')
#print(df.shape, df.head())
#earliestSamples = pd.DataFrame(index=df.index.copy())
##earliestSamples = df.copy(deep=True)
#print('super init earliest sampels')
#print(earliestSamples.shape, earliestSamples)
#earliestSamples = earliestSamples[~earliestSamples.index.duplicated(keep='first')]
##earliestSamples = earliestSamples.groupby(earliestSamples.index).first()
#
## set it to the latest possible value
#earliestSamples['firstSample'] = 301
#
#print('init early samples')
#print(earliestSamples.shape, earliestSamples)
#
##test = df.loc[df.baselineXtime >= 0.1, 'globalSample'].min()
#test = df.loc[df.baselineXtime >= 1.0].groupby(level=[0,1,2,3])['globalSample'].min()
#print('found earliest')
#print(test.shape, test)
#
## now find the earliest globalSample that is >= 1.0
## update only a couple elements
#earliestSamples['firstSample'].update(test)
#print('earliest samples')
#print(earliestSamples.shape, earliestSamples)
#
## now reset the index
#earliestSamples = earliestSamples.reset_index()
#
#print('reset index')
#print(earliestSamples.shape, earliestSamples)

	
#df['xtime'] = df.groupby([newXColName, newYColName, 'probsize', 'progname']).apply(getEarliestSampleBeatingBenchmark)['xtime']


#df['reachedBaseline'] = df[xtime >= 1.0]

#return
#

#def drawHeatmap(*args, **kwargs):
#	data = kwargs.pop('data').copy(deep=True)
#	if 'probsize' in list(data.columns):
#		data = data.drop(['probsize'], axis=1)
#	if 'progname' in list(data.columns):
#		data = data.drop(['progname'], axis=1)
#	data = data.pivot(index=newXColName, columns=newYColName, values='firstSample')
#	# silly how we have to resort this manually...
#	#data.index = pd.CategoricalIndex(data.index, categories=uniqA)
#	#data.sort_index(level=0, inplace=True)
#
#	#data.columns = pd.CategoricalIndex(data.columns, categories=uniqB)
#	#data.sort_index(axis='columns', level='OMP_NUM_THREADS_PLACES', inplace=True)
#
#	#print(data)
#	# plot the good values
#	ax = sns.heatmap(data, **kwargs)
#	return
##
##
## let's first make the tuples of columns
##
#g = sns.FacetGrid(earliestSamples, row='progname', col='probsize', col_order=probsizes, palette='flare', height=10, aspect=1.5)
##g.map_dataframe(drawHeatmap, annot=True, vmin=0.0, vmax=1.0, cbar=True)
#g.map_dataframe(drawHeatmap, annot=False, vmin=0, vmax=300, cbar=True, xticklabels=True, yticklabels=True)
##
##
#for ax in g.axes.flatten():
#	ax.tick_params(axis='x', labelbottom=True, labelrotation=90)
##
#plt.tight_layout()
##
#g.fig.subplots_adjust(top=0.96)
#g.fig.suptitle(f'GO Hyperparam Exploration ({method.upper()}) -- Samples till >= baseline xtime')
##
#plt.show()
##
## for PSO we will need to make tuples

In [None]:
# let's make the plots showing what happens at each step 0, 50, 100, 150, 200, 250, 299
# want the best speedup witnessed up until each point

#samps = pd.DataFrame(index=df.index.copy())
#samps = samps[~samps.index.duplicated(keep='first')]
#
#for i in range(1,7):
#	steps = 50*i
#	steps = 299 if steps == 300 else steps
#	colName = f'step{steps}'
#	samps[colName] = 0
#	test = df.loc[df.globalSample == steps].groupby(level=[0,1,2,3])['baselineXtime'].max()
#	samps[colName].update(test)
#
#samps = samps.reset_index()
#
#def drawHeatmap(*args, **kwargs):
#	data = kwargs.pop('data').copy(deep=True)
#	targetCol = kwargs.pop('step')
#	if 'probsize' in list(data.columns):
#		probsize = data.iloc[0]['probsize']
#		data = data.drop(['probsize'], axis=1)
#	if 'progname' in list(data.columns):
#		progname = data.iloc[0]['progname']
#		data = data.drop(['progname'], axis=1)
#		
#	optimalXtime = globalOptimals[(globalOptimals.progname == progname) & (globalOptimals.probsize == probsize)]['xtime'].iat[0]
#	baselineXtime = globalBaselines[(globalBaselines.progname == progname) & (globalBaselines.probsize == probsize)]['xtime'].iat[0]
#
#	vmax = baselineXtime/optimalXtime
#
#	#print(f'working on {probsize} {progname} vmax {vmax}')
#
#	data = data.pivot(index=newXColName, columns=newYColName, values=targetCol)
#	ax = sns.heatmap(data, vmax=vmax, **kwargs)
#	return
#
#
## let's first make the tuples of columns
#for i in range(1,7):
#	step = 50*i
#	step = 299 if step == 300 else step
#
#	g = sns.FacetGrid(samps, row='progname', col='probsize', col_order=probsizes, palette='flare', height=10, aspect=1.5)
#	colName = f'step{step}'
#	g.map_dataframe(drawHeatmap, step=colName, annot=True, vmin=1.0, cbar=True, xticklabels=True, yticklabels=True)
#
#	for ax in g.axes.flatten():
#		ax.tick_params(axis='x', labelbottom=True, labelrotation=90)
#
#	plt.tight_layout()
#
#	g.fig.subplots_adjust(top=0.96)
#	g.fig.suptitle(f'GO Hyperparam Exploration ({method.upper()}) -- Best Speedup at {step} Samples')
#
#	plt.show()


In [None]:
#
#def makeOverallPlots(df, baselineXtimes=None):
#	# want each row to be a program, and each column to be a problem size
#	# each plot will show the average cumsum across each bo-util fnct, PSO, and CMA
#
#	otherCols = list(df.columns)
#	otherCols.remove('xtime')
#	otherCols.remove('globalSample')
#	print(otherCols)
#
#	# for each category, we need to do a cumulative max
#	df['cummax'] = df.groupby(otherCols, dropna=False)['xtime'].transform('cummax')
#
#	def addBaselines(*args, **kwargs):
#		data = kwargs['data']
#		probsize = data.iloc[0]['probsize']
#		progname = data.iloc[0]['progname']
#		method = data.iloc[0]['method']
#
#		ax = plt.gca()
#		handles, labels = ax.get_legend_handles_labels()
#
#		optimal = globalOptimals[(globalOptimals['progname'] == progname) & (globalOptimals['probsize'] == probsize)]['xtime'].iat[0]
#		baseline = globalBaselines[(globalBaselines['progname'] == progname) & (globalBaselines['probsize'] == probsize)]['xtime'].iat[0]
#
#		#avrgCummax = data.groupby(['globalSample'], dropna=False)['cummax'].mean().reset_index()
#		#if baselineXtimes is None:
#		#	#step = data[data['cummax'] >= 0.5]['globalSample'].min()
#		#	step = avrgCummax[avrgCummax['cummax'] >= 0.5]['globalSample'].min()
#		#	if step is None:
#		#		print(f'{progname} {probsize} {method} did not reach baseline')
#		#	else:
#		#		print(f'{progname} {probsize} {method} passed baseline on step {step}')
#		#else:
#		#	val = baselineXtimes.loc[(baselineXtimes['progname'] == progname) & (baselineXtimes['probsize'] == probsize), 'xtime'].iat[0]
#		#	poss = baselineXtimes.loc[(baselineXtimes['progname'] == progname) & (baselineXtimes['probsize'] == probsize), 'xtime']
#		#	print('poss')
#		#	print(poss)
#		#	step = avrgCummax[avrgCummax['cummax'] >= val]['globalSample'].min()
#		#	if step is None:
#		#		print(f'{progname} {probsize} {method} did not reach baseline')
#		#	else:
#		#		print(f'{progname} {probsize} {method} passed baseline on step {step}')
#		#	print('at step 280, avrgCummax is', avrgCummax[avrgCummax['globalSample'] == 280.0])
#
#		if not ('optimal' in labels):
#			ax.set_title(f'{prognameMap[progname]} -- {probsizeMap[probsize]}')
#			ax.set_xlabel('Sample Index')
#
#			if baselineXtimes is None:
#				#ax.set_ylabel('Norm. to Optimal/Baseline Execution Times from Database \n(higher is better)')
#				ax.axhline(baseline/optimal, c='blue', linestyle='--', zorder=0, label='optimal')
#				ax.axhline(1.0, c='red', linestyle='--', zorder=0, label='baseline')
#				# what step do we pass the baseline xtime?
#			else:
#				#ax.set_ylabel('Norm. to Optimal/Worst Execution Times from Database \n(higher is better)')
#				val = baselineXtimes.loc[(baselineXtimes['progname'] == progname) & (baselineXtimes['probsize'] == probsize), 'xtime'].iat[0]
#				ax.axhline(1.0, c='blue', linestyle='--', zorder=0, label='optimal')
#				ax.axhline(val, c='red', linestyle='--', zorder=0, label='baseline')
#
#		#ax.legend(loc='lower right')
#		return
#
#	g = sns.FacetGrid(df, row='progname', col='probsize', col_order=probsizes, hue='method', 
#										palette='flare', legend_out=False, height=5, aspect=1.5, sharex=False, sharey=False)
#	g.map_dataframe(sns.lineplot, x='globalSample', y='cummax', errorbar="pi")
#	g.map_dataframe(addBaselines)
#	#g.set(ylim=(-0.05, 1.05))
#
#	axes = g.axes
#	for r in range(axes.shape[0]):
#		if baselineXtimes is None:
#			axes[r,0].set_ylabel('Norm. to Baseline from Database (Speedup) \n( >1.0 is better )')
#		else:
#			axes[r,0].set_ylabel('Norm. to Optimal/Worst Execution Times from Database \n(higher is better)')
#		for c in range(axes.shape[1]):
#			axes[r,c].legend(loc='lower right')
#
#	plt.tight_layout()
#
#	g.fig.subplots_adjust(top=0.95)
#	g.fig.suptitle('Average highest normalized execution time found at each optimization step')
#
#	plt.show()
#	return
#

In [None]:

#useBaseline=True
#
#overallDF = pd.DataFrame()
#tojoin = []
#for progname in prognames:
#	if progname == 'cg_nas' or progname == 'cfd_rodinia':
#		continue
#	for method in goMethods:
#		# read the pre-processed dataframe
#		if useBaseline:
#			filename = ROOT_DIR+'/databases/'+f'{MACHINE}-{progname}-{method}-GO_Data-baselineNorm.csv'
#		else:
#			filename = ROOT_DIR+'/databases/'+f'{MACHINE}-{progname}-{method}-GO_Data.csv'
#		fullDF = pd.read_csv(filename)
#		tojoin += [fullDF]
#
#overallDF = pd.concat(tojoin, ignore_index=True, sort=True)
#overallDF = overallDF.drop(['optimXtime', 'kappa_decay', 'kappa_decay_delay'], axis=1)
#
#print(overallDF.columns)
## for the method column, make all the bo entries a union of the method and utilFnct
##overallDF[overallDF['method'] == 'bo'].apply(lambda x: x['method']+'-'+x['utilFnct'], axis=1)
##print('it works')
#overallDF.loc[overallDF['method'] == 'bo', 'method'] = overallDF[overallDF['method'] == 'bo'].apply(lambda x: x['method']+'-'+x['utilFnct'], axis=1)
##print(overallDF['method'].unique())
#
#overallDF = overallDF.drop(['utilFnct'], axis=1)
#
#for col in overallDF:
#	if col == 'xtime' or col == 'globalSample' or col == 'optimXtime':
#		continue
#	print(col, overallDF[col].unique())

In [None]:
#baselinesDF = xtimeDB[(xtimeDB['OMP_NUM_THREADS'] == numthreads) 
#										& (xtimeDB['OMP_PROC_BIND'] == 'close')
#										& (xtimeDB['OMP_PLACES'] == 'threads')
#										& (xtimeDB['OMP_SCHEDULE'] == 'static')]
#
#def normToMinMax(row):
#	progname = row['progname']
#	probsize = row['probsize']
#
#	minVal, maxVal = getMinMaxXtimeForProg(progname, probsize)
#
#	row['xtime'] = 1 - (row['xtime']-minVal)/(maxVal-minVal)
#	return row
#
#
## normalize the baselinesDF to the min/max
#baselinesDF.loc[:,'xtime'] = baselinesDF.apply(normToMinMax, axis=1)['xtime']
#
#if useBaseline:
#	makeOverallPlots(overallDF)
#else:
#	makeOverallPlots(overallDF,baselinesDF)