In [104]:
import pandas as pd
import os
import seaborn as sns


scaling_directory = '/scaling'
all_impls_directory = '/all_impls'
perc_directory = '/percentage'

base_path_viewing = "plots/plot_gen/viewing"
base_path_report = "plots/plot_gen/report"



# kill old plots
for f in os.listdir(base_path_viewing + scaling_directory):
    os.remove(os.path.join(base_path_viewing + scaling_directory, f))
for f in os.listdir(base_path_viewing + all_impls_directory):
    os.remove(os.path.join(base_path_viewing + all_impls_directory, f))
for f in os.listdir(base_path_viewing + perc_directory):
    os.remove(os.path.join(base_path_viewing + perc_directory, f))

for f in os.listdir(base_path_report + scaling_directory):
    os.remove(os.path.join(base_path_report + scaling_directory, f))
for f in os.listdir(base_path_report + all_impls_directory):
    os.remove(os.path.join(base_path_report + all_impls_directory, f))
for f in os.listdir(base_path_report + perc_directory):
    os.remove(os.path.join(base_path_report + perc_directory, f))

### Parameters that need to be set each run

In [105]:
# Remember to copy your plots into your own folder after each run, since the directories get cleaned at the beginning of each run
# You can of course change scaling_directory and all_impls_directory in the previous cell to where you would like to have your plots saved
        # just make sure they have the right folder structure
        # and remember to revert scaling_directory and all_impls_directory back or someone might accidentally delete all your plots
# Please Set Here Which Implementation you'd like to be plotted
implementations_to_plot = [
    'coo_vectorized', #'coo_opt_vectorization_SDDMM_GPU',
    'coo_loop_unrolled', #'coo_opt_loop_unrolled_SDDMM_GPU',
    # 'dynamic_warp_csr',
    # 'tiled', # 'merged',
    # 'naive_csr',  # 'semi_naive_CSR_SDDMM_GPU'
    # 'naive_coo', # 'naive_coo_SDDMM_GPU',
    # 'sampled_cuBLAS', # 'naive_SDDMM_GPU',
    # 'cuSPARSE_baseline', # 'cusparse_baseline'
    # 'sml2_paper'
    ]

# Choose which Datasets you want to be used in your plot
base_paths = [
    "/scratch/eschreib/correct_SDDMM_results/results_different_k_1k_10k_20k/",
    "/scratch/eschreib/correct_SDDMM_results/results_matrixmarket_different_k_10k/",
    "/scratch/eschreib/correct_SDDMM_results/results_matrixmarket_different_k_24k/",
    "/scratch/eschreib/correct_SDDMM_results/results_matrixmarket_different_k_100k/"

    # "/scratch/eschreib/correct_SDDMM_results/results_generated_square_1k_10k_20k/",
    # "/scratch/eschreib/correct_SDDMM_results/results_matrixmarket_square_10k/",
    # "/scratch/eschreib/correct_SDDMM_results/results_matrixmarket_square_24k/"
    # "/scratch/eschreib/results_full_run/",
    # "/scratch/eschreib/correct_SDDMM_results"
    # "/scratch/eschreib/results_downloaded_size_generated/",
    # "/scratch/eschreib/results_downloaded_24k_generated/",
    # "/scratch/eschreib/results_MatrixMarket_1k_10k/",
    # "/scratch/eschreib/results_MatrixMarket_10k_not_fixed/",
    # "/scratch/eschreib/results_MatrixMarket_24k/",
    # "/scratch/eschreib/results_merged_vs_semi_naive/"
    ]

# Choose whether or not you would like your plots to be normalized
normalized = False

# Choose whether you'd like a log-scale
log_scale = False

# Would you like a plot where you have a single implementation and fix two of the set {density, k, size} and iterate over non-fixed variable?
make_scaling_plots = False

# Would you like a plot with all implementations where iterate over size, density or k and fix the other two respectively?
make_all_impls_plots = False

# Would you like a plot with all datapoints once for density and once for size
    # this only makes sense if you are normalizing
make_all_datapoints_plot = False

# Would you like a plot that gives you the percentage of a baseline?
    # this is NOT affected by normalization
    # note that the all data percentage plots can be missleading since it contains runs with different ks 
make_percentage_plots = True
make_all_data_percentage_plots = False
baseline_implementation = 'naive_coo'

# If you would like to ignore specific sizes, please specify them here:
ignore_these_sizes = [
    # "9506x9506",
    # "9769x9769",
    # "10605x10605"
    # "1000x1000"
    "gen: 100000x100000"
]

# If you would like to ignore specific densities, specify them here:
ignore_these_densities =[
    '0.002%'
    # '0.001%',
    # '0.01%',
    # '0.012%'
    # "1.%"
]

# If you would like one of the implementations to be called differently, add the corresponding mapping:
implementation_mappings = {
    'naive_SDDMM_GPU': 'sampled_cuBLAS',
    'semi_naive_CSR_SDDMM_GPU': 'naive_csr',
    'better_naive_CSR_SDDMM_GPU': 'dynamic_warp_csr',
    'merged': 'tiled',
    'naive_coo_SDDMM_GPU': 'naive_coo',
    'coo_opt_vectorization_SDDMM_GPU': 'coo_vectorized',
    'coo_opt_loop_unrolled_SDDMM_GPU': 'coo_loop_unrolled',
    'cusparse_baseline': 'cuSPARSE_baseline'
    
}


# In case you want to change the title of a plot, set the following string variable (otherwise it should be None)
custom_fig_title = False
fig_title = None

# If you feel there is some plot not being created, you can set this to False, but the amount of plots generated will increase by ~3x
remove_useless_plots = True

### Global params

In [106]:
num_warmups = 20  # with the full data this should probably be something like 20
consistency_check = False

if normalized:
    y_axis = "normalized time in ns"
    y_axis_name = y_axis
else:
    y_axis = "time in ns"
    y_axis_name = y_axis

matrix_name_mappings = {
    '961x961':'MM: 961x961',
    '1000x1000':'gen: 1000x1000',
    '1059x1059':'MM: 1059x1059',
    '1074x1074':'MM: 1074x1074',
    '1080x1080':'MM: 1080x1080',
    '4500x4500':'MM: 4500x4500',
    '8032x8032':'MM: 8032x8032',
    '9000x9000':'MM: 9000x9000',
    '9506x9506':'MM: 9506x9506',
    '9769x9769': 'MM: 9769x9769',
    '10000x10000':'gen: 10000x10000',
    '10001x10001':'MM: 10001x10001',
    '10605x10605':'MM: 10605x10605',
    '20000x20000':'gen: 20000x20000',
    '23670x23670':'MM: 23670x23670',
    '25626x25626':'MM: 25626x25626',
    '24696x24696':'MM: 24696x24696',
    '25710x25710':'MM: 25710x25710',
    '97569x97569':'MM: 97569x97569',
    '97578x97578':'MM: 97578x97578',
    '99340x99340':'MM: 99340x99340',
    "100000x100000":"gen: 100000x100000" ,
}

final_sizes = [
    'MM: 961x961',
    'gen: 1000x1000',
    'MM: 1059x1059',
    'MM: 1074x1074',
    'MM: 1080x1080',
    'MM: 4500x4500',
    'MM: 8032x8032',
    'MM: 9000x9000',
    'MM: 9506x9506',
    'MM: 9769x9769',
    'gen: 10000x10000',
    'MM: 10001x10001',
    'MM: 10605x10605',
    'gen: 20000x20000',
    'MM: 23670x23670',
    'MM: 25626x25626',
    'MM: 24696x24696',
    'MM: 25710x25710',
    'MM: 97569x97569',
    'MM: 97578x97578',
    'MM: 99340x99340',
    "gen: 100000x100000" ,
]

all_possible_sizes = [
    '961x961',
    '1000x1000',
    '1059x1059',
    '1074x1074',
    '1080x1080',
    '4500x4500',
    '8032x8032',
    '9000x9000',
    '9506x9506',
    '9769x9769',
    '10000x10000',
    '10001x10001',
    '10605x10605',
    '20000x20000',
    '23670x23670',
    '25626x25626',
    '24696x24696',
    '25710x25710',
    '97569x97569',
    '97578x97578',
    '99340x99340',
    "100000x100000",
    ]

all_possible_densities = [
    '1.5%',
    '1%',
    '0.54%',
    '0.56%',
    '0.5%',
    '0.4%',
    '0.46%',
    '0.1%',
    '0.096%',
    '0.09%',
    '0.058%',
    '0.05%',
    '0.046%',
    '0.04%',
    '0.016%',
    '0.012%',
    '0.01%',
    '0.0095%',
    '0.002%',
    '0.001%'
 ]

correct_k = [
"50",
"100",
"500",
"1000"
]


seperator_information = "__"

### Load data

In [107]:

# create the dataframe to which we can append our sub-dataframes
data = pd.DataFrame()

for base_path in base_paths:

    # collect all files that contain run data
    file_paths = []
    impl_name = os.listdir(base_path)
    for impl in impl_name:
        sizes = os.listdir(base_path + impl)
        # there is exactly one csv file per size_dir
        for size_dir in sizes:
            files = os.listdir(base_path + impl + "/" + size_dir)
            for i in range(len(files)):
                if files[i].endswith(".csv"):
                    file_paths.append(base_path + impl + "/" + size_dir + "/" + files[i])
    sub_data = []
    for file in file_paths:
        with open(file, 'r') as fin:
            size = file.split("/")[-2]
            lines = fin.readlines()
            lines.pop(0)  # remove header line
            lines = list(map(lambda x: x.replace(",\n", ""), lines))  # remove trailing newline since they would create an extra column at the end
            lines = list(map(lambda x: x.split(","), lines))  # split into columns
            lines = list(map(lambda x: [x[0]] + ["0." + (x[3].split("/")[-1].split(".")[0].split("_")[-1][1:])] + x[1:], lines))
            lines = list(map(lambda x: [x[0]] + [size] + x[1:], lines))  # add size info
            # grab k from filepath of A
            lines = list(map(lambda line: line[0:3] + [line[3].split("/")[6].split("_")[3]] + line[3:], lines))
            sub_data += lines
    # make a subdata dataframe and add an indicator if we want it to be in the special configurations plot
    special_config = base_path != "/scratch/eschreib/results_full_run/"
        
    sub_data = pd.DataFrame(sub_data)
    sub_data["special_config"] = special_config
    # add the subdata to our main big dataframe
    data = pd.concat([data, sub_data], axis=0)

data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,218,219,220,221,222,223,224,225,226,special_config
0,semi_naive_CSR_SDDMM_GPU,1000x1000,0.01,1000,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrices/Dataset_generated_m...,95232.002437,81919.997931,81919.997931,...,80895.997584,81919.997931,81919.997931,81919.997931,81919.997931,80895.997584,80895.997584,80895.997584,81919.997931,True
1,semi_naive_CSR_SDDMM_GPU,1000x1000,0.005,1000,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrices/Dataset_generated_m...,61439.998448,47104.001045,48128.001392,...,47104.001045,48128.001392,46080.000699,46080.000699,47104.001045,47104.001045,46080.000699,47104.001045,46080.000699,True
2,semi_naive_CSR_SDDMM_GPU,1000x1000,0.001,1000,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrices/Dataset_generated_m...,34816.000611,19455.999136,19455.999136,...,18432.000652,18432.000652,17408.000305,17408.000305,17408.000305,18432.000652,17408.000305,17408.000305,17408.000305,True
3,semi_naive_CSR_SDDMM_GPU,1000x1000,0.0005,1000,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrices/Dataset_generated_m...,28672.000393,13311.999850,13311.999850,...,13311.999850,13311.999850,13311.999850,13311.999850,13311.999850,12288.000435,12288.000435,13311.999850,13311.999850,True
4,semi_naive_CSR_SDDMM_GPU,1000x1000,0.0001,1000,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrices/Dataset_generated_m...,23552.000523,11264.000088,10239.999741,...,10239.999741,11264.000088,10239.999741,11264.000088,10239.999741,10239.999741,10239.999741,10239.999741,10239.999741,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,sml2_paper,97569x97569,0.0005,100,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrix_market_matrices/Si34H...,322560.012341,275456.011295,274432.003498,...,429055.988789,273407.995701,278528.004885,274432.003498,291839.987040,272383.987904,271360.009909,270336.002111,1241088.032722,True
94,** Function did not pass the test ** sml2_paper,97569x97569,0.0005,100,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrix_market_matrices/Si34H...,,,,...,,,,,,,,,,True
95,** Function did not pass the test ** sml2_paper,97569x97569,0.0005,500,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrix_market_matrices/Si34H...,,,,...,,,,,,,,,,True
96,sml2_paper,97569x97569,0.0005,500,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrices/Dataset_generated_m...,/scratch/eschreib/matrix_market_matrices/Si34H...,1185791.969299,1135615.944862,1127424.001694,...,1227776.050568,1078271.985054,1078271.985054,1083392.024040,1087488.055229,1080320.000648,1079295.992851,1080320.000648,1999871.969223,True


### Lucas' preprocessing code

In [108]:
# A helper function to turn the densities int percentages
def to_percent (a):
    # remove 3 characters
    a = a[3:]
    # We assume that we never have more than 9._%
    before_comma = a[0]
    a = a[1:]
    if a == "":
        a = before_comma + "%"
    else:
        a = before_comma + "." + a + "%"

    return a
    
# remove all the broken runs (= the impls that start with "**")
data = data[~data[0].str.startswith("**")]

# prettify df
data = data.drop(data.columns[[4, 5, 6]], axis=1)  # rm the paths to the matrices
data = data.sort_values(by=[0])  # sort by implementation name

# remove warmup measurements
metadata = data.iloc[:, :4]
measurements = data.iloc[:, 4:]
measurements = measurements.iloc[:, num_warmups:]
data = pd.concat([metadata, measurements], axis=1)

data.columns = range(data.shape[1])  # reset the column names to consecutive numbers

print(data[0].unique())

data


['better_naive_CSR_SDDMM_GPU' 'coo_opt_loop_unrolled_SDDMM_GPU'
 'coo_opt_vectorization_SDDMM_GPU' 'cusparse_baseline' 'naive_SDDMM_GPU'
 'naive_coo_SDDMM_GPU' 'semi_naive_CSR_SDDMM_GPU' 'sml2_paper']


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,195,196,197,198,199,200,201,202,203,204
50,better_naive_CSR_SDDMM_GPU,20000x20000,0.0001,100,43007.999659,43007.999659,43007.999659,41983.999312,43007.999659,41983.999312,...,43007.999659,43007.999659,43007.999659,41983.999312,43007.999659,41983.999312,43007.999659,41983.999312,41983.999312,True
179,better_naive_CSR_SDDMM_GPU,10000x10000,0.00001,500,21503.999829,21503.999829,21503.999829,21503.999829,21503.999829,21471.999586,...,21503.999829,21503.999829,21503.999829,21503.999829,22528.000176,22528.000176,21503.999829,21503.999829,21503.999829,True
180,better_naive_CSR_SDDMM_GPU,10000x10000,0.01,1000,4877312.183380,4894720.077515,4898816.108704,4891647.815704,4887551.784515,4893695.831299,...,4898816.108704,4878335.952759,4899839.878082,4895743.846893,4889599.800110,4874239.921570,4884479.999542,4900864.124298,4888576.030731,True
181,better_naive_CSR_SDDMM_GPU,10000x10000,0.005,1000,2562047.958374,2568192.005157,2576384.067535,2565119.981766,2579456.090927,2575360.059738,...,2569216.012955,2557951.927185,2576384.067535,2577408.075333,2561023.950577,2566143.989563,2581504.106522,2582528.114319,2567167.997360,True
182,better_naive_CSR_SDDMM_GPU,10000x10000,0.001,1000,575487.971306,580608.010292,579584.002495,586751.997471,574464.023113,580608.010292,...,578559.994698,578559.994698,579584.002495,581632.018089,575487.971306,574464.023113,583679.974079,574464.023113,574464.023113,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,sml2_paper,1000x1000,0.001,1000,245759.993792,267264.008522,247807.994485,246784.001589,248832.002282,248832.002282,...,246784.001589,244736.000896,265215.992928,247807.994485,245759.993792,247807.994485,247807.994485,246784.001589,245759.993792,True
511,sml2_paper,1000x1000,0.005,1000,245759.993792,244736.000896,246784.001589,245759.993792,245759.993792,246784.001589,...,247807.994485,246784.001589,245759.993792,246784.001589,247807.994485,244736.000896,245759.993792,1215487.957001,248832.002282,True
510,sml2_paper,1000x1000,0.01,1000,248832.002282,248832.002282,247807.994485,402431.994677,250880.002975,247807.994485,...,249855.995178,248832.002282,250880.002975,246784.001589,1091583.967209,246784.001589,246784.001589,247807.994485,246784.001589,True
532,sml2_paper,10000x10000,0.0001,50,31743.999571,29696.000740,30719.999224,30719.999224,30719.999224,29696.000740,...,163839.995861,31743.999571,30719.999224,29696.000740,29696.000740,30719.999224,30719.999224,30719.999224,37856.001407,True


### Jielas preprocessing code

In [109]:
data['percent'] = data[2].apply(to_percent)

sizes = data[1].unique()
densities = data['percent'].unique()
densities.sort()  # if I don't seaborn messes up the order of the x axis in some plots

missing_densities = [sp for sp in densities if sp not in all_possible_densities]
missing_sizes = [si for si in sizes if si not in all_possible_sizes]


# print(sizes)
# print(densities)

assert len(missing_sizes) == 0, f"THE FOLLOWING SIZES ARE NOT IN THE LIST all_possible_sizes, PLEASE SORT THEM IN AND TRY AGAIN {missing_sizes}"
assert len(missing_densities) == 0, f"THE FOLLOWING densities ARE NOT IN THE LIST all_possible_densities, PLEASE SORT THEM IN AND TRY AGAIN {missing_densities}"
# print(impls)
# print(data)

# We now expand such that we can use seaborn
# specifically this means we only have one measurement per row
# d_exp stands for data expanded. Meaning each row only contains a single measurement
 
d_exp = pd.DataFrame(columns=['implementation', 'Type: Size of S MxN', 'k', 'density', 'time in ns', 'configuration', 'normalized time in ns'])

useless_k = []

for row_idx in range(len(data)):

    row = data.iloc[row_idx]
    row_len = len(row)

    cur_imp = row[0]
    cur_size = row[1]
    cur_k = row[3]
    cur_spars = row['percent']
    cur_conf = row[row_len-2]
    cur_spars_float = float(row[2])
    current_configuration = "No"
    size_number = cur_size


    # print(f"impl: {cur_imp}\n size: {cur_size}\n k: {cur_k}\n density:cur_spars")
    if cur_imp in implementation_mappings:
        cur_imp = implementation_mappings[cur_imp]
    
    if cur_size in matrix_name_mappings:
        cur_size = matrix_name_mappings[cur_size]
    
    if not (cur_imp in implementations_to_plot or cur_imp == baseline_implementation):
        continue

    if not cur_k in correct_k:
        if cur_k not in useless_k:
            useless_k.append(cur_k)
        continue

    dims = size_number.split('x')

    division_factor = float(dims[0]) * float(dims[1]) * cur_spars_float

    if cur_conf:
        current_configuration = "size: " + cur_size + " density: " + cur_spars

    for measurement in row[3: row_len-2]:
        if measurement is not None:
            nt = float(measurement) / division_factor
            new_row = {'implementation': cur_imp, 'Type: Size of S MxN': cur_size, 'k': cur_k, 'density': cur_spars, 'time in ns': float(measurement), 'configuration':current_configuration, 'normalized time in ns': nt}
            d_exp.loc[len(d_exp)] = new_row

############################################################################################################
# Grabbing some info about the data in the df
# configurations = d_exp["configuration"].unique()
# configurations.sort()
impls = d_exp["implementation"].unique()
impls.sort()
all_k = d_exp["k"].unique()
k_to_ignore = [i for i in all_k if i not in correct_k]
implementations_to_ignore = [i for i in impls if i not in implementations_to_plot]

############################################################################################################
# Remove sizes and densities that we want to ignore
for size_to_ig in ignore_these_sizes:
    d_exp = d_exp[d_exp["Type: Size of S MxN"] != size_to_ig]

for spars_to_ig in ignore_these_densities:
    d_exp = d_exp[d_exp["density"] != spars_to_ig]


for k_to_ig in k_to_ignore:
    d_exp = d_exp[d_exp["k"] != k_to_ig]

ks = [dis_k for dis_k in correct_k if dis_k in d_exp["k"].unique()]

print(f"THESE k WERE REMOVED FROM THE DATASET: {useless_k}")
print(f"HERE ARE THE Ks: {ks}")
############################################################################################################
if make_percentage_plots or make_all_data_percentage_plots:
    # Grabbing the relevant data for the minus plots
    baseline_data = d_exp[d_exp["implementation"] == baseline_implementation]
    median_time_by_size_impl = baseline_data.groupby(['Type: Size of S MxN', 'density', 'k'])['time in ns'].median()

    d_exp["percentage_of_base"] = None

    for index, row in d_exp.iterrows():
        r_size = row['Type: Size of S MxN']
        r_spars = row['density']
        r_ns = row["time in ns"]
        r_k = row["k"]

        if (r_size, r_spars, r_k) in median_time_by_size_impl.index:
            
            med = median_time_by_size_impl.loc[(r_size, r_spars, r_k)]

            # diff = med - r_ns
            # pob = r_ns / med
            pob = r_ns / med * 100
            # pob = round(pob, 2)
            
        else:
            pob = None
        
        d_exp.at[index, 'percentage_of_base'] = pob

############################################################################################################
for imp_to_ig in implementations_to_ignore:
    d_exp = d_exp[d_exp["implementation"] != imp_to_ig]

sizes = [s for s in final_sizes if s in d_exp["Type: Size of S MxN"]]


d_exp


THESE k WERE REMOVED FROM THE DATASET: []
HERE ARE THE Ks: ['50', '100', '500', '1000']


Unnamed: 0,implementation,Type: Size of S MxN,k,density,time in ns,configuration,normalized time in ns,percentage_of_base
0,coo_loop_unrolled,MM: 9506x9506,100,0.5%,100.000000,size: MM: 9506x9506 density: 0.5%,0.000221,0.043403
1,coo_loop_unrolled,MM: 9506x9506,100,0.5%,230399.996042,size: MM: 9506x9506 density: 0.5%,0.509937,100.0
2,coo_loop_unrolled,MM: 9506x9506,100,0.5%,230399.996042,size: MM: 9506x9506 density: 0.5%,0.509937,100.0
3,coo_loop_unrolled,MM: 9506x9506,100,0.5%,230399.996042,size: MM: 9506x9506 density: 0.5%,0.509937,100.0
4,coo_loop_unrolled,MM: 9506x9506,100,0.5%,231424.003839,size: MM: 9506x9506 density: 0.5%,0.512204,100.444448
...,...,...,...,...,...,...,...,...
48235,coo_vectorized,gen: 20000x20000,100,0.5%,718847.990036,size: gen: 20000x20000 density: 0.5%,0.359424,40.624999
48236,coo_vectorized,gen: 20000x20000,100,0.5%,718847.990036,size: gen: 20000x20000 density: 0.5%,0.359424,40.624999
48237,coo_vectorized,gen: 20000x20000,100,0.5%,718847.990036,size: gen: 20000x20000 density: 0.5%,0.359424,40.624999
48238,coo_vectorized,gen: 20000x20000,100,0.5%,718847.990036,size: gen: 20000x20000 density: 0.5%,0.359424,40.624999


### Scaling plots

In [110]:
def plotting_simple(data_to_plot, x_axis, fig_title, save_name):
    sns.set_theme(style="whitegrid")
    sns.set(rc={'figure.figsize':(10,6)})
    ax = sns.barplot(data= data_to_plot, x = x_axis, y = y_axis, estimator='median', errorbar=('ci',98))

    if log_scale:
        ax.set_yscale('log')

    ax.set_title(fig_title)
    ax.set_ylabel("time in ns")
    ax.set_xlabel(x_axis)

    # save plot
    fig = ax.get_figure()
    # use a non standard seperator to read the asocited parts from the file name easier

    fig.savefig(base_path_viewing + save_name + "png", format="png")
    fig.savefig(base_path_report + save_name + "eps", format="eps")
    fig.clf()  # reset seaborn to avoid stuff from accumulating


if make_scaling_plots:

    # 1: fix implementation, fix size, iterate over density DAS HIER DU HORNOCHSE
    for impl in implementations_to_plot:
        for k in ks:
            for size in sizes:
                    
                    d_to_plt = d_exp[d_exp['Type: Size of S MxN'] == size]
                    d_to_plt = d_to_plt[d_to_plt['implementation'] == impl]
                    d_to_plt = d_to_plt[d_to_plt['k'] == k]

                    if len(d_to_plt["density"].unique()) < 2 and remove_useless_plots:
                        continue

                    save_name = (scaling_directory +
                                 "/impl_name=" + impl + seperator_information +
                                 "Size=" + size + seperator_information +
                                 "k=" + str(k) + seperator_information +
                                 ".")
                    fig_title = impl + ", Type: Size of S MxN: " + str(size) + ", k: " + str(k)

                    plotting_simple(d_to_plt, "density", fig_title, save_name)
    if consistency_check: print()

    # 2: fix implementation, fix density, iterate over size
    # it = 1
    for impl in implementations_to_plot:
        for k in ks:
            for density in densities:

                d_to_plt = d_exp[d_exp['implementation'] == impl]
                d_to_plt = d_to_plt[d_to_plt['density'] == density]
                d_to_plt = d_to_plt[d_to_plt['k'] == k]

                if len(d_to_plt["Type: Size of S MxN"].unique()) < 2  and remove_useless_plots:
                        continue

                save_name = (scaling_directory +
                                "/impl_name=" + impl + seperator_information +
                                "density=" + str(density) + seperator_information + 
                                "k=" + str(k) + seperator_information +
                                ".")
                fig_title = impl + ", k: " + str(k) + ", density: " + str(density)

                plotting_simple(d_to_plt, "Type: Size of S MxN", fig_title, save_name)

    for impl in implementations_to_plot:
         for size in sizes:
              for density in densities:
                   
                d_to_plt = d_exp[d_exp['implementation'] == impl]
                d_to_plt = d_to_plt[d_to_plt['Type: Size of S MxN'] == size]
                d_to_plt = d_to_plt[d_to_plt['density'] == density]

                if len(d_to_plt["k"].unique()) < 2  and remove_useless_plots:
                        continue
                
                save_name = (scaling_directory +
                                "/impl_name=" + impl + seperator_information +
                                "Type: Size of S MxN=" + size + seperator_information +
                                "density=" + str(density) + seperator_information + 
                                ".")
                fig_title = impl +  ", Type: Size of S MxN: " + str(size) + ", density: " + str(density)

                plotting_simple(d_to_plt, "k", fig_title, save_name)

    if consistency_check: print()

### All in one plots

In [111]:
from matplotlib.container import ErrorbarContainer


def grouped_plotting(data_to_plot, x_axis, x_axis_order, hue, hue_order, fig_title, save_name):
    sns.set(style="whitegrid")
    sns.set(rc={'figure.figsize':(10,6)})

  
    ax = sns.barplot(x=x_axis, order=x_axis_order, y=y_axis, hue=hue, hue_order=hue_order, data=data_to_plot, estimator='median', errorbar=('ci',98))

    if log_scale:
        ax.set_yscale('log')
        
    # print("doing all in one plot")
    fig = ax.get_figure()
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis_name)
    # plt.yscale('log')
    ax.set_title(fig_title)

    if x_axis == "implementation":
        if hue == "Type: Size of S MxN":
            legend = ax.legend(loc="lower center", bbox_to_anchor=(0.5, -0.3), fontsize='small', ncol=5)
            legend.set_title("Type: Size of S MxN:")
        elif hue == "k":
            legend = ax.legend(loc="lower center", bbox_to_anchor=(0.5, -0.3), fontsize='small', ncol=6)
            legend.set_title("k:")
        elif hue == "density":
            legend = ax.legend(loc="lower center", bbox_to_anchor=(0.5, -0.3), fontsize='small', ncol=6)
            legend.set_title("density:")
    else:
        legend = ax.legend(loc="lower center", bbox_to_anchor=(0.5, -0.3), fontsize='small', ncol=4)
        legend.set_title("Implementation:")


    save_name = save_name.replace("%", "_perc_")
    
    fig.savefig(base_path_viewing + save_name + "png", bbox_inches='tight', format="png")
    fig.savefig(base_path_report + save_name + "eps", bbox_inches='tight',format="eps")
   

    fig.clf()  # reset seaborn to avoid stuff from accumulating
    
if make_all_impls_plots:
    for k in ks:
        # 1. fix density, group iterator = size, x-axis-ticks iterator = implementation
        for density in densities:

            d_to_plt = d_exp[d_exp['density'] == density]
            d_to_plt = d_to_plt[d_to_plt['k'] == k]

            if len(d_to_plt["Type: Size of S MxN"].unique()) < 2  and remove_useless_plots:
                continue

            # # This is only relevant if there is an implementation we don't want in our plot
            # for impl in implementations_to_ignore:
            #     d_to_plt = d_to_plt[d_to_plt['implementation'] != impl]

            cur_sizes = d_to_plt["Type: Size of S MxN"].unique()
            size_order = []
            for s in final_sizes:
                if s in cur_sizes:
                    size_order.append(s)

            save_name = (all_impls_directory +
                         "/k=" + str(k) + seperator_information +
                         "density=" + str(density) + seperator_information +
                         "grouped_by_implementation.")
            fig_title = "k=" + str(k)+ ", density: " + str(density)

            # grouped_plotting(d_to_plt, x_axis, x_axis_order hue, hue_order, fig_title, save_name)
            grouped_plotting(d_to_plt, "implementation", implementations_to_plot, "Type: Size of S MxN", size_order, fig_title, save_name)

            save_name = (all_impls_directory +
                "/k=" + str(k) + seperator_information +
                "density=" + str(density) + seperator_information +
                "grouped_by_size.")
            grouped_plotting(d_to_plt, "Type: Size of S MxN", size_order, "implementation", implementations_to_plot, fig_title, save_name)

    if consistency_check: print()

    # 2. fix size, group iterator = density, x-axis-ticks iterator = implementation
    for size in sizes:
        for k in ks:
        
            d_to_plt = d_exp[d_exp['Type: Size of S MxN'] == size]
            d_to_plt = d_to_plt[d_to_plt['k'] == k]

            if len(d_to_plt["density"].unique()) < 2  and remove_useless_plots:
                continue

            density_order = []
            cur_densities = d_to_plt["density"].unique()

            for d in all_possible_densities:
                if d in cur_densities:
                    density_order.append(d)

            fig_title = "Type: Size of S MxN: " + str(size) + ", k: " + str(k)
            save_name = (all_impls_directory +
                         "/Size=" + str(size) + seperator_information +
                         "k=" + str(k) +  seperator_information +
                         "grouped_by_implementation.")
        

            # # This is only relevant if there is an implementation we don't want in our plot
            # for impl in implementations_to_ignore:
            #     d_to_plt = d_to_plt[d_to_plt['implementation'] != impl]

            # grouped_plotting(d_to_plt, x_axis, x_axis_order hue, hue_order, fig_title, save_name)
            grouped_plotting(d_to_plt, "implementation", implementations_to_plot, "density", density_order, fig_title, save_name)

            save_name = (all_impls_directory +
                         "/Size=" + str(size) + seperator_information +
                         "k=" + str(k) +  seperator_information +
                         "grouped_by_implementation."
                         )
            
            grouped_plotting(d_to_plt, "density", density_order, "implementation", implementations_to_plot, fig_title, save_name)

    for size in sizes:
        for density in densities:

            d_to_plt = d_exp[d_exp["Type: Size of S MxN"] == size]
            d_to_plt = d_to_plt[d_to_plt["density"] == density]

            if len(d_to_plt["k"].unique()) < 2  and remove_useless_plots:
                continue

            k_order = [dis_k for dis_k in correct_k if dis_k in d_to_plt["k"].unique()]

            fig_title = "Type: Size of S MxN: " + str(size) + ", density: " + str(density)
            save_name = (all_impls_directory +
                         "/Type: Size of S MxN=" + str(size) + seperator_information +
                         "density=" + str(density) + seperator_information +
                         "grouped_by_implementation."
                         )

            grouped_plotting(d_to_plt, "implementation", implementations_to_plot, "k", k_order, fig_title, save_name)

            save_name = (all_impls_directory +
                        "/Type: Size of S MxN=" + str(size) + seperator_information +
                        "density=" + str(density) + seperator_information +
                        "grouped_by_k."
                        )

            grouped_plotting(d_to_plt, "k", k_order, "implementation", implementations_to_plot, fig_title, save_name)

        
    if consistency_check: print()


### All Datapoint Plots

In [112]:

# Plot all datapoints. This mainly makes sense if we are using normalized data
if make_all_datapoints_plot:
    
    d_to_plt = d_exp

    k_order = [dis_k for dis_k in correct_k if dis_k in d_to_plt["k"].unique()]

    density_order = []
    cur_densities = d_to_plt["density"].unique()

    for d in all_possible_densities:
        if d in cur_densities:
            density_order.append(d)

    size_order = []
    cur_sizes = d_to_plt["Type: Size of S MxN"].unique()
    for s in final_sizes:
        if s in cur_sizes:
            size_order.append(s)

    for k in ks:

        d_to_plt = d_exp[d_exp["k"] == k]
        fig_title =  "Runtime Plot of a Selection of Implementations, k = " + str(k)

        save_name = (all_impls_directory +
                        "/all_data_plot"  + seperator_information +
                        "k=" + str(k) + seperator_information +
                        "iterating_over_Type: Size of S MxN" + seperator_information +
                        "grouped_by_implementation."
                    )
        
        grouped_plotting(d_to_plt, "implementation", implementations_to_plot, "Type: Size of S MxN", size_order, fig_title, save_name)

        save_name = (all_impls_directory +
                    "/all_data_plot"  + seperator_information +
                    "k=" + str(k) + seperator_information +
                    "iterating_over_size" + seperator_information +
                    "grouped_by_size."
                    )

        grouped_plotting(d_to_plt, "Type: Size of S MxN", size_order, "implementation", implementations_to_plot, fig_title, save_name)


        save_name = (all_impls_directory +
                        "/all_data_plot"  + seperator_information +
                        "k=" + str(k) + seperator_information +
                        "iterating_over_density" + seperator_information +
                        "grouped_by_implementation."
                        )
        
        grouped_plotting(d_to_plt, "implementation", implementations_to_plot, "density", density_order, fig_title, save_name)

        save_name = (all_impls_directory +
                    "/all_data_plot"  + seperator_information +
                    "k=" + str(k) + seperator_information +
                    "iterating_over_density" + seperator_information +
                    "grouped_by_density."
                    )

        grouped_plotting(d_to_plt, "density", density_order, "implementation", implementations_to_plot, fig_title, save_name)


    if not custom_fig_title:
        fig_title = "Runtime Plot of a Selection of Implementations"

    save_name = (all_impls_directory +
                    "/all_data_plot"  + seperator_information +
                    "iterating_over_k" + seperator_information +
                    "grouped_by_implementation."
                    )
    
    grouped_plotting(d_to_plt, "implementation", implementations_to_plot, "k", k_order, fig_title, save_name)

    save_name = (all_impls_directory +
                "/all_data_plot"  + seperator_information +
                "iterating_over_k" + seperator_information +
                "grouped_by_k."
                )

    grouped_plotting(d_to_plt, "k", k_order, "implementation", implementations_to_plot, fig_title, save_name)


    # for impl in implementations_to_ignore:
    #     d_to_plt = d_to_plt[d_to_plt['implementation'] != impl]


### Percentage Plots

In [113]:
if make_percentage_plots:
    # percentage plots

    y_axis = "percentage_of_base"
    y_axis_name = "Percentage of " + baseline_implementation
    # log_scale = False

    for k in ks:
        # 1. fix density, group iterator = size, x-axis-ticks iterator = implementation
        for density in densities:

            d_to_plt = d_exp[d_exp['density'] == density]
            d_to_plt = d_to_plt[d_to_plt['k'] == k]

            if len(d_to_plt["Type: Size of S MxN"].unique()) < 2  and remove_useless_plots:
                continue

            # # This is only relevant if there is an implementation we don't want in our plot
            # for impl in implementations_to_ignore:
            #     d_to_plt = d_to_plt[d_to_plt['implementation'] != impl]

            cur_sizes = d_to_plt["Type: Size of S MxN"].unique()
            size_order = []
            for s in final_sizes:
                if s in cur_sizes:
                    size_order.append(s)

            save_name = (perc_directory +
                         "/k=" + str(k) + seperator_information +
                         "density=" + str(density) + seperator_information +
                         "grouped_by_implementation."
                         )
            fig_title = "k=" + str(k)+ ", density: " + str(density)

            # grouped_plotting(d_to_plt, x_axis, x_axis_order hue, hue_order, fig_title, save_name)
            grouped_plotting(d_to_plt, "implementation", implementations_to_plot, "Type: Size of S MxN", size_order, fig_title, save_name)

            save_name = (perc_directory +
                "/k=" + str(k) + seperator_information +
                "density=" + str(density) + seperator_information +
                "grouped_by_Type: Size of S MxN."
                )
            grouped_plotting(d_to_plt, "Type: Size of S MxN", size_order, "implementation", implementations_to_plot, fig_title, save_name)

    if consistency_check: print()

    # 2. fix size, group iterator = density, x-axis-ticks iterator = implementation
    for size in sizes:
        for k in ks:
        
            d_to_plt = d_exp[d_exp['Type: Size of S MxN'] == size]
            d_to_plt = d_to_plt[d_to_plt['k'] == k]

            if len(d_to_plt["density"].unique()) < 2  and remove_useless_plots:
                continue

            density_order = []
            cur_densities = d_to_plt["density"].unique()

            for d in all_possible_densities:
                if d in cur_densities:
                    density_order.append(d)

            fig_title = "Type: Size of S MxN: " + str(size) + ", k: " + str(k)
            save_name = (perc_directory +
                         "/Type: Size of S MxN=" + str(size) + seperator_information +
                         "k=" + str(k) +  seperator_information +
                         "grouped_by_implementation."
                        )
        

            # # This is only relevant if there is an implementation we don't want in our plot
            # for impl in implementations_to_ignore:
            #     d_to_plt = d_to_plt[d_to_plt['implementation'] != impl]

            # grouped_plotting(d_to_plt, x_axis, x_axis_order hue, hue_order, fig_title, save_name)
            grouped_plotting(d_to_plt, "implementation", implementations_to_plot, "density", density_order, fig_title, save_name)

            save_name = (perc_directory +
                         "/Type: Size of S MxN=" + str(size) + seperator_information +
                         "k=" + str(k) +  seperator_information +
                         "grouped_by_implementation."
                         )
            
            grouped_plotting(d_to_plt, "density", density_order, "implementation", implementations_to_plot, fig_title, save_name)

    for size in sizes:
        for density in densities:

            d_to_plt = d_exp[d_exp["Type: Size of S MxN"] == size]
            d_to_plt = d_to_plt[d_to_plt["density"] == density]

            if len(d_to_plt["k"].unique()) < 2  and remove_useless_plots:
                continue

            k_order = [dis_k for dis_k in correct_k if dis_k in d_to_plt["k"].unique()]

            fig_title = "Type: Size of S MxN: " + str(size) + ", density: " + str(density)
            save_name = (perc_directory +
                         "/Type: Size of S MxN=" + str(size) + seperator_information +
                         "density=" + str(density) + seperator_information +
                         "grouped_by_implementation."
                         )

            grouped_plotting(d_to_plt, "implementation", implementations_to_plot, "k", k_order, fig_title, save_name)

            save_name = (perc_directory +
                        "/Type: Size of S MxN=" + str(size) + seperator_information +
                        "density=" + str(density) + seperator_information +
                        "grouped_by_k."
                        )

            grouped_plotting(d_to_plt, "k", k_order, "implementation", implementations_to_plot, fig_title, save_name)


The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript back

<Figure size 1000x600 with 0 Axes>

In [114]:
if make_all_data_percentage_plots:
    
    # percentage plots
    d_to_plt = d_exp
    # for impl in implementations_to_ignore:
    #     d_to_plt = d_to_plt[d_to_plt['implementation'] != impl]

    k_order = [dis_k for dis_k in correct_k if dis_k in d_to_plt["k"].unique()]

    density_order = []
    cur_densities = d_to_plt["density"].unique()

    for d in all_possible_densities:
        if d in cur_densities:
            density_order.append(d)

    size_order = []
    cur_sizes = d_to_plt["Type: Size of S MxN"].unique()
    for s in final_sizes:
        if s in cur_sizes:
            size_order.append(s)

    if not custom_fig_title:
        fig_title = "Percentage Plot of Different Implementations over " + str (baseline_implementation)

    y_axis = "percentage_of_base"
    log_scale = False

    save_name = (all_impls_directory +
                    "/percentage_plot"  + seperator_information +
                    "iterating_over_size" + seperator_information +
                    "grouped_by_implementation."
                    )
    
    grouped_plotting(d_to_plt, "implementation", implementations_to_plot, "Type: Size of S MxN", size_order, fig_title, save_name)

    save_name = (all_impls_directory +
                    "/percentage_plot"  + seperator_information +
                    "iterating_over_size" + seperator_information +
                    "grouped_by_size."
                    )

    grouped_plotting(d_to_plt, "Type: Size of S MxN", size_order, "implementation", implementations_to_plot, fig_title, save_name)


    save_name = (all_impls_directory +
                    "/percentage_plot"  + seperator_information +
                    "iterating_over_density" + seperator_information +
                    "grouped_by_implementation."
                    )
    
    grouped_plotting(d_to_plt, "implementation", implementations_to_plot, "density", density_order, fig_title, save_name)

    save_name = (all_impls_directory +
                "/percentage_plot"  + seperator_information +
                "iterating_over_density" + seperator_information +
                "grouped_by_density."
            )

    grouped_plotting(d_to_plt, "density", density_order, "implementation", implementations_to_plot, fig_title, save_name)

    save_name = (all_impls_directory +
                    "/percentage_plot"  + seperator_information +
                    "iterating_over_k" + seperator_information +
                    "grouped_by_implementation."
                )
    
    grouped_plotting(d_to_plt, "implementation", implementations_to_plot, "k", k_order, fig_title, save_name)

    save_name = (all_impls_directory +
                "/percentage_plot"  + seperator_information +
                "iterating_over_k" + seperator_information +
                "grouped_by_k."
                )

    grouped_plotting(d_to_plt, "k", k_order, "implementation", implementations_to_plot, fig_title, save_name)
