In [None]:
# setup

from calendar import c
import collections
import pandas as pd
import pprint
import sys
import csv
import os
import json


import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import statistics

def geomean(series):
    t = [ n for n in series if not np.isnan(n) ]
    # print(t)
    t = np.array(t)
    return np.exp(np.log(t).mean())

def hmean(series):
    return (series.shape[0] / np.sum(1/series))

def amean(series):
    return (np.sum(series) / series.shape[0])

In [None]:
import pandas as pd
def gen_df(filename, K, core=64):
    filename = filename + "_" + str(core) + "core_" "K" + str(K) + ".csv"
    # print('Reading File: ', filename)
    df = pd.read_csv(filename).sort_values(by=['mtx'])
    return df

def get_file_names(working_dir, K, core=64):
    # walk files in working_dir
    filenames = []
    for root, dirs, files in os.walk(working_dir):
        for file in files:
            if file.endswith(".csv"):
                if file.endswith("K" + str(K) + ".csv"):
                    # strip core from filename
                    file = file.replace(str(core) + "core_", "")
                    file = file.replace("K" + str(K) + ".csv", "")
                    # remove traling underscore
                    file = file.rstrip('_')
                    filenames.append(file)

    return filenames

# working directory where all the basefiles reside
working_dir = 'data/'

K=256
filenames = get_file_names(working_dir, K)

# print("Files: ", filenames)

rassm_name = ""
baseline_name = ""
baseline_key = "aspt"
dfs = {}
algos = []
algoname = ""
for filename in filenames:
    if "aspt" in filename:
        algoname = "ASpT"
    if "csf-uo" in filename:
        algoname = "CSF-UO"
    if "csf-us" in filename:
        algoname = "CSF-US"
    if "csr-32" in filename:
        algoname = "Fixed CSR"
    if "jstream" in filename:
        algoname = "J-Stream"
    if "rassm" in filename:
        algoname = "RASSM"
    
    algos.append(algoname)

    if baseline_key in filename:
        baseline_name = algoname
    if 'rassm' in filename:
        rassm_name = algoname

    dfs[algoname] = gen_df(working_dir + filename, K)


gflops_df = pd.DataFrame()
gflops_df['mtx'] = dfs[baseline_name]['mtx']

for algoname in algos:
    gflops_df[algoname] = dfs[algoname]['gflops']

speedup_df = pd.DataFrame()
speedup_df['mtx'] = dfs[baseline_name]['mtx']
for algoname in algos:
    speedup_df[algoname] = dfs[rassm_name]['gflops'] / dfs[algoname]['gflops']
    # print("Geomean Speedup: ", algoname, geomean(speedup_df[algoname]))



In [None]:
plot_df = speedup_df.copy()
display(plot_df)

comparisons = ['ASpT', 'J-Stream', 'Fixed CSR', 'CSF-US', 'CSF-UO']
colors = {'ASpT': 'tomato', 'Fixed CSR': 'darkviolet', 'J-Stream': 'mediumblue', 'CSF-US': 'forestgreen', 'CSF-UO': 'forestgreen'}


plt.rcParams.update({'legend.fontsize': 16, 'axes.labelsize': 16, 'axes.titlesize': 16, 'ytick.labelsize': 16, 'xtick.labelsize': 16})

for column in comparisons:
    print_name = column
    plot_df = plot_df.sort_values(by=[column])
    ylower = 0.5
    yupper = 4
    xlower = -20
    xupper = plot_df.shape[0] + 20
    pltwidth = 8
    pltheight= 2.875
    textcolor = 'dimgray'
    if column in colors:
        linecolor = colors[column]
    plot_df.plot.scatter(x='mtx', y=column, s=(plt.rcParams['lines.markersize'] / 3) ** 2, c = linecolor, title='Speedup over ' + print_name, figsize=(pltwidth, pltheight))
    plt.axhline(y=1, color='k', linestyle='--', label='_nolegend_')
    plt.axhline(y=geomean(plot_df[column]), color=linecolor, linestyle='-')
    larrow = u'\u2190'
    rarrow = u'\u2192'
    closest_to_1 = plot_df.iloc[(plot_df[column] - 1).abs().argsort()[:1]]
    closest_name = closest_to_1['mtx'].values[0]
    closest_row = plot_df[plot_df['mtx'] == closest_name].index[0]
    closest_order = plot_df.index.get_loc(closest_row) 
    plt.axvline(x=closest_to_1['mtx'].values[0], color='k', linestyle='--', label='_nolegend_')

    closest_order_percent = int(100 * closest_order / plot_df.shape[0] + 0.5) / 1
    plt.text(closest_to_1['mtx'].values[0], 0.6, larrow + " " + str(int(closest_order_percent)) + "% ", ha='right', va='bottom', fontsize=16, color=textcolor)
    closest_to_geomean = plot_df.iloc[(plot_df[column] - geomean(plot_df[column])).abs().argsort()[:1]]
    plt.axvline(x=closest_to_geomean['mtx'].values[0], color='k', linestyle='--', label='_nolegend_')
    closest_geomean_order = plot_df.index.get_loc(plot_df[plot_df['mtx'] == closest_to_geomean['mtx'].values[0]].index[0])
    closest_geomean_remain = plot_df.shape[0] - closest_geomean_order
    closest_geomean_remain_percent = int(100 * closest_geomean_remain / plot_df.shape[0] + 0.5) / 1
    plt.text(closest_to_geomean['mtx'].values[0], 0.6, " " + str(int(closest_geomean_remain_percent)) + "% " + rarrow, ha='left', va='bottom', fontsize=16, color=textcolor)
    middle = int((closest_order + closest_geomean_order) / 2)
    center = plot_df.shape[0] - closest_geomean_remain - closest_order + 1
    center_percent = 100 - closest_order_percent - closest_geomean_remain_percent
    plt.text(plot_df['mtx'].iloc[middle], 0.6, larrow + " " + str(int(center_percent)) + "% " + rarrow, ha='center', va='bottom', fontsize=16, color=textcolor)
    plt.legend([print_name, 'geomean'], loc='upper left')
    plt.yscale('log')
    if geomean(plot_df[column]) < 1.15:
        ylower = 0.5
        yupper = 2
    plt.ylim(ylower, yupper)
    plt.xlim(xlower, xupper)
    ytick_vals = [ylower, 1, geomean(plot_df[column]), yupper]
    ytick_strs = [str(round(val, 1)) for val in ytick_vals]
    plt.yticks(ytick_vals, ytick_strs)
    plt.gca().yaxis.set_major_formatter(plt.matplotlib.ticker.StrMethodFormatter('{x:,.2f}'))
    plt.gca().yaxis.set_minor_formatter(plt.NullFormatter())
    plt.gca().margins(x=0.15)
    geo = geomean(plot_df[column])
    plt.text(xupper, yupper*7/8, str(round(plot_df[column].max(), 2)) + " ", ha='right', va='top', fontsize=16)
    plt.text(xlower, max(ylower, plot_df[column].min() - 0.1), " " + str(round(plot_df[column].min(), 2)), ha='left', va='bottom', fontsize=16)
    plt.text(len(plot_df['mtx'])/2, geo + 0.1, str(round(geomean(plot_df[column]), 2)), ha='left', va='bottom', fontsize=16)
    plt.xlabel('')
    plt.ylabel('Speedup of RASSM')
    plt.xticks([1, 45, 90, 180, 270, 360], [1, 45, 90, 180, 270, 360])
    plt.savefig(working_dir + print_name + '_speedup.pdf', bbox_inches='tight')
    plt.show()
