In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
from pandas.api.types import CategoricalDtype

In [2]:
header_names = ['mtx_name','distribution','placement','seed',
                'm','n','nz','density','A_mem_footprint','mem_range',
                'avg_nz_row','std_nz_row',
                'avg_bandwidth','std_bandwidth','avg_bandwidth_scaled','std_bandwidth_scaled',
                'avg_scattering','std_scattering','avg_scattering_scaled','std_scattering_scaled',
                'skew_coeff','avg_num_neighbours','cross_row_similarity',
                'implementation','time','gflops','W_avg','J_estimated', 'System', 'Arch','friends']

# Read GPU Data (panastas)

In [3]:
#Unified read
inputdata_GPU_V100_synthetic = pd.read_csv('../benchmark_results/vulcan-V100/vulcan-V100_dtype-D_run_full_dataset.csv', names = header_names)
inputdata_GPU_V100_synthetic['friends'] = False
inputdata_GPU_V100 = pd.concat([inputdata_GPU_V100_synthetic])
inputdata_GPU_V100['System'] = 'Tesla-V100'

inputdata_GPU_P100_synthetic = pd.read_csv('../benchmark_results/vulcan-P100/vulcan-P100_dtype-D_run_full_dataset.csv', names = header_names)
inputdata_GPU_P100_synthetic['friends'] = False
inputdata_GPU_P100 = pd.concat([inputdata_GPU_P100_synthetic])
inputdata_GPU_P100['System'] = 'Tesla-P100'

inputdata_GPU_A100_synthetic = pd.read_csv('../benchmark_results/epyc5-A100/epyc5-A100_dtype-D_run_synthetic_matrices_small_dataset.csv', names = header_names)
inputdata_GPU_A100_synthetic['friends'] = False
inputdata_GPU_A100 = pd.concat([inputdata_GPU_A100_synthetic])
inputdata_GPU_A100['System'] = 'Tesla-A100'

inputdata_GPU = pd.concat([inputdata_GPU_V100, inputdata_GPU_P100, inputdata_GPU_A100])
inputdata_GPU['Arch'] = 'GPU'

print(inputdata_GPU.shape)
print(set(inputdata_GPU['implementation']))

for sys in set(inputdata_GPU['System']):
    inputdata_GPU_sys = inputdata_GPU[inputdata_GPU['System']==sys]
    print(sys, '\t', set(inputdata_GPU_sys['implementation']))

(411025, 31)
{'cuSPARSE_coo11', 'cuSPARSE_csr11', 'cuSPARSE_hyb9-2', 'Merge_11', 'SELL-32-1', 'CSR5_9'}
Tesla-P100 	 {'cuSPARSE_coo11', 'cuSPARSE_csr11', 'cuSPARSE_hyb9-2', 'CSR5_9'}
Tesla-A100 	 {'cuSPARSE_coo11', 'cuSPARSE_csr11', 'SELL-32-1', 'Merge_11'}
Tesla-V100 	 {'cuSPARSE_coo11', 'cuSPARSE_csr11', 'cuSPARSE_hyb9-2', 'CSR5_9'}


# Read CPU Data (dgal)

In [4]:
Hawk_threads = 64
Epyc_threads = 24
Xeon_threads = 14
Arm_threads = 80
Power9_threads = 32

inputdata_CPU_AMD_HAWK_synthetic = pd.read_csv('../benchmark_results/amd-hawk/amd-hawk_synthetic_t%d_d.csv' % Hawk_threads, names=header_names)
inputdata_CPU_AMD_HAWK_synthetic['friends'] = False
inputdata_CPU_AMD_HAWK = pd.concat([inputdata_CPU_AMD_HAWK_synthetic])
inputdata_CPU_AMD_HAWK['System'] = 'AMD-EPYC-64'

inputdata_CPU_AMD_EPYC1_synthetic = pd.read_csv('../benchmark_results/amd-epyc1/amd-epyc1_synthetic_t%d_d.csv' % Epyc_threads, names=header_names)
inputdata_CPU_AMD_EPYC1_synthetic['friends'] = False
inputdata_CPU_AMD_EPYC1 = pd.concat([inputdata_CPU_AMD_EPYC1_synthetic])
inputdata_CPU_AMD_EPYC1['System'] = 'AMD-EPYC-24'
inputdata_CPU_AMD_EPYC1.astype({'avg_bandwidth': 'float64'})

inputdata_CPU_ARM_synthetic = pd.read_csv('../benchmark_results/arm/arm_synthetic_t%d_d.csv' % Arm_threads, names=header_names)
inputdata_CPU_ARM_synthetic['friends'] = False
inputdata_CPU_ARM = pd.concat([inputdata_CPU_ARM_synthetic])
inputdata_CPU_ARM['System'] = 'ARM-NEON'

inputdata_CPU_INTEL_GOLD2_synthetic = pd.read_csv('../benchmark_results/intel-gold2/intel-gold2_synthetic_t%d_d.csv' % Xeon_threads, names=header_names)
inputdata_CPU_INTEL_GOLD2_synthetic['friends'] = False
inputdata_CPU_INTEL_GOLD2 = pd.concat([inputdata_CPU_INTEL_GOLD2_synthetic])
inputdata_CPU_INTEL_GOLD2['System'] = 'INTEL-XEON'

inputdata_CPU_POWER9_synthetic = pd.read_csv('../benchmark_results/power9-m100/power9-m100_synthetic_t%d_d.csv' % Power9_threads, names=header_names)
inputdata_CPU_POWER9_synthetic['friends'] = False
inputdata_CPU_POWER9 = pd.concat([inputdata_CPU_POWER9_synthetic])
inputdata_CPU_POWER9['System'] = 'IBM-POWER9'
inputdata_CPU_POWER9['W_avg'] = 200.1

inputdata_CPU = pd.concat([inputdata_CPU_AMD_HAWK, inputdata_CPU_AMD_EPYC1, inputdata_CPU_INTEL_GOLD2, inputdata_CPU_ARM, inputdata_CPU_POWER9])
inputdata_CPU['Arch'] = 'CPU'    

print(inputdata_CPU.shape)
print(set(inputdata_CPU['implementation']))

poulo_list=['Custom_CSR_PBV_x86', 'Custom_CSR_PBV']
for impl_poulo in poulo_list:
    inputdata_CPU = inputdata_CPU[inputdata_CPU.implementation != impl_poulo]

for sys in set(inputdata_CPU['System']):
    inputdata_CPU_sys = inputdata_CPU[inputdata_CPU['System']==sys]
    print(sys, '\t', set(inputdata_CPU_sys['implementation']))

(1639037, 31)
{'Custom_CSR_BV_x86', 'Custom_CSR_B', 'SparseX', 'Naive_CSR_CPU', 'AOCL_OPTMV', 'MERGE', 'Custom_CSR_PBV_x86', 'MKL_IE', 'Custom_CSR_BV', nan, 'Custom_CSR_PBV', 'SELL-32-1', 'CSR5', 'ARMPL'}
ARM-NEON 	 {'SparseX', 'Naive_CSR_CPU', 'MERGE', 'SELL-32-1', 'ARMPL'}
AMD-EPYC-24 	 {'Custom_CSR_BV_x86', 'SparseX', 'Naive_CSR_CPU', 'AOCL_OPTMV', 'MERGE', 'MKL_IE', 'SELL-32-1', 'CSR5'}
AMD-EPYC-64 	 {'Naive_CSR_CPU', 'CSR5', 'MKL_IE'}
IBM-POWER9 	 {'Custom_CSR_B', 'SparseX', 'Naive_CSR_CPU', 'MERGE', 'Custom_CSR_BV', nan}
INTEL-XEON 	 {'Custom_CSR_BV_x86', 'SparseX', 'Naive_CSR_CPU', 'MERGE', 'MKL_IE', 'SELL-32-1', 'CSR5'}


# Read FPGA Data (pmpakos)

In [5]:
inputdata_FPGA_synthetic = pd.read_csv('../benchmark_results/alveo-u280/alveo-u280_spmv_4-2048_dtype-D.csv', names = header_names)
inputdata_FPGA_synthetic['friends'] = False
inputdata_FPGA = pd.concat([inputdata_FPGA_synthetic])
inputdata_FPGA['System'] = 'Alveo-U280'
inputdata_FPGA['Arch'] = 'FPGA'
print(inputdata_FPGA.shape)
print(set(inputdata_FPGA['implementation']))

(51955, 31)
{'Xilinx_SpMV'}


# Concatenate all data, place in *inputdata* dataframe

In [6]:
%%time
# Merge the results
inputdata = pd.concat([inputdata_GPU,inputdata_CPU,inputdata_FPGA])

# keep "synthetic" dataset only (discard "friends")
inputdata = inputdata[inputdata['friends'] == False]
print(inputdata.shape)

# Group per reps, take mean
groupreps = inputdata.groupby(['mtx_name','distribution','placement','seed',
                               'm','n','nz','density','A_mem_footprint','mem_range',
                               'avg_nz_row','std_nz_row',
                               'avg_bandwidth','std_bandwidth','avg_bandwidth_scaled','std_bandwidth_scaled',
                               'avg_scattering','std_scattering','avg_scattering_scaled','std_scattering_scaled',
                               'skew_coeff','avg_num_neighbours','cross_row_similarity',
                               'implementation','System', 'Arch','friends']).mean().reset_index().reindex(columns=header_names)

# dataTypeSeries = groupreps.dtypes
# print('Data type of each column of Dataframe :')
# print(dataTypeSeries)
print(groupreps.shape)
groupreps.drop(columns=['avg_scattering', 'std_scattering', 'avg_scattering_scaled', 'std_scattering_scaled'])

(2069637, 31)
(581881, 31)
CPU times: user 2.99 s, sys: 3.68 s, total: 6.67 s
Wall time: 6.72 s


Unnamed: 0,mtx_name,distribution,placement,seed,m,n,nz,density,A_mem_footprint,mem_range,...,avg_num_neighbours,cross_row_similarity,implementation,time,gflops,W_avg,J_estimated,System,Arch,friends
0,synthetic,normal,random,14.0,698.0,698.0,340181.0,69.823100,3.895730,[2-4],...,1.995870,0.853019,CSR5,0.004220,20.6366,248.7380,1.049670,AMD-EPYC-64,CPU,False
1,synthetic,normal,random,14.0,698.0,698.0,340181.0,69.823100,3.895730,[2-4],...,1.995870,0.853019,CSR5_9,0.000027,24.9312,69.4968,0.001883,Tesla-P100,GPU,False
2,synthetic,normal,random,14.0,698.0,698.0,340181.0,69.823100,3.895730,[2-4],...,1.995870,0.853019,CSR5_9,0.000020,34.4234,85.7196,0.001509,Tesla-V100,GPU,False
3,synthetic,normal,random,14.0,698.0,698.0,340181.0,69.823100,3.895730,[2-4],...,1.995870,0.853019,MKL_IE,0.002452,35.5092,236.5380,0.580110,AMD-EPYC-64,CPU,False
4,synthetic,normal,random,14.0,698.0,698.0,340181.0,69.823100,3.895730,[2-4],...,1.995870,0.853019,Naive_CSR_CPU,0.002641,32.9784,270.7270,0.714912,AMD-EPYC-64,CPU,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
581876,synthetic_980644_980644_99044944_avg100.9999_s...,normal,random,14.0,980644.0,980644.0,99044944.0,0.010299,1137.220295,[1024-2048],...,1.882192,0.051339,Xilinx_SpMV,32.000000,6.1800,33.0000,1056.000000,Alveo-U280,FPGA,False
581877,synthetic_980644_980644_99044944_avg100.9999_s...,normal,random,14.0,980644.0,980644.0,99044944.0,0.010299,1137.220295,[1024-2048],...,1.882102,0.946059,Xilinx_SpMV,26.700000,7.4200,33.0000,881.100000,Alveo-U280,FPGA,False
581878,synthetic_980644_980644_99044944_avg100.9999_s...,normal,random,14.0,980644.0,980644.0,99044944.0,0.010299,1137.220295,[1024-2048],...,1.882273,0.497778,Xilinx_SpMV,227.000000,0.8720,33.0000,7491.000000,Alveo-U280,FPGA,False
581879,synthetic_980644_980644_99044944_avg100.9999_s...,normal,random,14.0,980644.0,980644.0,99044944.0,0.010299,1137.220295,[1024-2048],...,1.882161,0.050237,Xilinx_SpMV,231.000000,0.8590,33.0000,7623.000000,Alveo-U280,FPGA,False


# FIX NAMES OF IMPLEMENTATIONS

In [7]:
# groupreps.loc[(groupreps['implementation'] == 'cuSPARSE_coo11'), 'implementation'] = 'cuSPARSE-COO'
# groupreps.loc[(groupreps['implementation'] == 'cuSPARSE_csr11'), 'implementation'] = 'cuSPARSE-CSR'
# groupreps.loc[(groupreps['implementation'] == 'cuSPARSE_hyb9-2'), 'implementation'] = 'cuSPARSE-HYB'
# groupreps.loc[(groupreps['implementation'] == 'CSR5_9'), 'implementation'] = 'CSR5'
# groupreps.loc[(groupreps['implementation'] == 'Merge_11'), 'implementation'] = 'Merge'
# groupreps.loc[(groupreps['implementation'] == 'Naive_CSR_CPU'), 'implementation'] = 'Naive-CSR'
# groupreps.loc[(groupreps['implementation'] == 'Custom_CSR_BV_x86'), 'implementation'] = 'Vectorized-CSR'    
# # groupreps.loc[(groupreps['implementation'] == 'Custom_CSR_PBV_x86'), 'implementation'] = 'Vect-Bal-CSR'    
# groupreps.loc[(groupreps['implementation'] == 'Custom_CSR_BV'), 'implementation'] = 'Vectorized-CSR'
# # groupreps.loc[(groupreps['implementation'] == 'Custom_CSR_PBV'), 'implementation'] = 'Vect-Bal-CSR'
# groupreps.loc[(groupreps['implementation'] == 'Custom_CSR_B'), 'implementation'] = 'Balanced-CSR'    
# groupreps.loc[(groupreps['implementation'] == 'MKL_IE_no_hint'), 'implementation'] = 'MKL-IE-no-hint'
# groupreps.loc[(groupreps['implementation'] == 'MKL_IE'), 'implementation'] = 'MKL-IE'
# groupreps.loc[(groupreps['implementation'] == 'AOCL_OPTMV'), 'implementation'] = 'AOCL'
# groupreps.loc[(groupreps['implementation'] == 'SparseX'), 'implementation'] = 'SparseX'
# groupreps.loc[(groupreps['implementation'] == 'SELL-32-1'), 'implementation'] = 'SELL-C-s'
# groupreps.loc[(groupreps['implementation'] == 'MERGE'), 'implementation'] = 'Merge'
# groupreps.loc[(groupreps['implementation'] == 'ARMPL'), 'implementation'] = 'ARM-library'
# groupreps.loc[(groupreps['implementation'] == 'Xilinx_SpMV'), 'implementation'] = 'Xilinx-library'


groupreps.loc[(groupreps['implementation'] == 'cuSPARSE_coo11'), 'implementation'] = 'cu-COO'
groupreps.loc[(groupreps['implementation'] == 'cuSPARSE_csr11'), 'implementation'] = 'cu-CSR'
groupreps.loc[(groupreps['implementation'] == 'cuSPARSE_hyb9-2'), 'implementation'] = 'cu-HYB'
groupreps.loc[(groupreps['implementation'] == 'CSR5_9'), 'implementation'] = 'CSR5'
groupreps.loc[(groupreps['implementation'] == 'Merge_11'), 'implementation'] = 'Merge'
groupreps.loc[(groupreps['implementation'] == 'Naive_CSR_CPU'), 'implementation'] = 'Naive-CSR'
groupreps.loc[(groupreps['implementation'] == 'Custom_CSR_BV_x86'), 'implementation'] = 'Vec-CSR'    
# groupreps.loc[(groupreps['implementation'] == 'Custom_CSR_PBV_x86'), 'implementation'] = 'Vect-Bal-CSR'    
groupreps.loc[(groupreps['implementation'] == 'Custom_CSR_BV'), 'implementation'] = 'Vec-CSR'
# groupreps.loc[(groupreps['implementation'] == 'Custom_CSR_PBV'), 'implementation'] = 'Vect-Bal-CSR'
groupreps.loc[(groupreps['implementation'] == 'Custom_CSR_B'), 'implementation'] = 'Bal-CSR'    
groupreps.loc[(groupreps['implementation'] == 'MKL_IE_no_hint'), 'implementation'] = 'MKL-IE-no-hint'
groupreps.loc[(groupreps['implementation'] == 'MKL_IE'), 'implementation'] = 'MKL-IE'
groupreps.loc[(groupreps['implementation'] == 'AOCL_OPTMV'), 'implementation'] = 'AOCL'
groupreps.loc[(groupreps['implementation'] == 'SparseX'), 'implementation'] = 'SparseX'
groupreps.loc[(groupreps['implementation'] == 'SELL-32-1'), 'implementation'] = 'SELL-C-s'
groupreps.loc[(groupreps['implementation'] == 'MERGE'), 'implementation'] = 'Merge-CSR'
groupreps.loc[(groupreps['implementation'] == 'ARMPL'), 'implementation'] = 'ARM-lib'
groupreps.loc[(groupreps['implementation'] == 'Xilinx_SpMV'), 'implementation'] = 'Xilinx-lib'


In [8]:
def set_category(x, cat_list, cat_size, ranges_size_flag=False):
    for index in range(len(cat_list)):
        cat = cat_list[index]
        cat_min = float(cat.strip('[').strip(']').split('-')[0])
        cat_max = float(cat.strip('[').strip(']').split('-')[1])
        # print(cat_min, cat_max,'->\t->', x)
        if(x>cat_min and x<=cat_max):
            if(ranges_size_flag==True):
                return cat_size[index]
            else:
                return cat_list[index]
            
def set_category2(x, cat_list, cat_size, ranges_size_flag=False):
    for index in range(len(cat_list)):
        cat = cat_list[index]
        if(x==cat):
            return cat_size[index]

ranges_arch = ['GPU', 'GPU', 'GPU', 'CPU', 'CPU', 'CPU', 'CPU', 'CPU', 'FPGA']

ranges_dev = ['Tesla-P100', 'Tesla-V100', 'Tesla-A100', 'AMD-EPYC-24', 'AMD-EPYC-64', 'ARM-NEON', 'INTEL-XEON', 'IBM-POWER9', 'Alveo-U280']

ranges_impl_arch = ['( Tesla-P100 ) cu-COO', '( Tesla-P100 ) cu-CSR', '( Tesla-P100 ) cu-HYB', 
                    '( Tesla-P100 ) CSR5',

                    '( Tesla-V100 ) cu-COO', '( Tesla-V100 ) cu-CSR', '( Tesla-V100 ) cu-HYB', 
                    '( Tesla-V100 ) CSR5',

                    '( Tesla-A100 ) cu-COO', '( Tesla-A100 ) cu-CSR', 
                    '( Tesla-A100 ) Merge-CSR', '( Tesla-A100 ) SELL-C-s',

                    '( AMD-EPYC-24 ) Naive-CSR',  '( AMD-EPYC-24 ) Vec-CSR',  # '( AMD-EPYC-24 ) Vect-Bal-CSR',  
                    '( AMD-EPYC-24 ) AOCL', '( AMD-EPYC-24 ) MKL-IE', 
                    '( AMD-EPYC-24 ) CSR5', '( AMD-EPYC-24 ) SparseX', '( AMD-EPYC-24 ) Merge-CSR', '( AMD-EPYC-24 ) SELL-C-s', 
                    
                    '( AMD-EPYC-64 ) Naive-CSR',
                    '( AMD-EPYC-64 ) MKL-IE',  '( AMD-EPYC-64 ) CSR5', 

                    '( ARM-NEON ) Naive-CSR',
                    '( ARM-NEON ) ARM-lib', 
                    '( ARM-NEON ) SparseX', '( ARM-NEON ) Merge-CSR', '( ARM-NEON ) SELL-C-s', 

                    '( INTEL-XEON ) Naive-CSR',  '( INTEL-XEON ) Vec-CSR',  
                    '( INTEL-XEON ) MKL-IE',
                    '( INTEL-XEON ) CSR5', '( INTEL-XEON ) SparseX', '( INTEL-XEON ) Merge-CSR', '( INTEL-XEON ) SELL-C-s',                    

                    '( IBM-POWER9 ) Naive-CSR', '( IBM-POWER9 ) Bal-CSR', 
                    '( IBM-POWER9 ) Vec-CSR',
                    '( IBM-POWER9 ) SparseX', '( IBM-POWER9 ) Merge-CSR',
                    
                    '( Alveo-U280 ) Xilinx-lib']

ranges_impl_arch_backup = ranges_impl_arch

ranges_memr = ['[4-8]','[8-16]','[16-32]','[32-64]','[64-128]','[128-256]','[256-512]','[512-1024]','[1024-2048]'] # A_mem_footprint
ranges_anr = ['[0-20]', '[20-75]','[75-150]', '[150-510]'] # avg_nz_row
ranges_anr = ['[0-15]', '[15-40]', '[40-75]','[75-150]', '[150-510]'] # avg_nz_row

ranges_skew = ['[0-1.5]', '[1.5-50]', '[50-250]', '[250-3000]', '[3000-10000]'] # skew_coeff

ranges_ann = ['[0-0.6]', '[0.6-1.4]', '[1.4-2]'] # avg_num_neighbours
ranges_crs = ['[0-0.3]', '[0.3-0.7]', '[0.7-1]'] # cross_row_similarity

ranges_size = ['S', 'M', 'L']
ranges_regularity = ['SS', 'SM', 'SL', 'MS', 'MM', 'ML', 'LS', 'LM', 'LL']

cat_list = ['A_mem_footprint',  # 'm', 
            'avg_nz_row',
            'skew_coeff','avg_num_neighbours','cross_row_similarity']
ranges_list = [ranges_memr, # ranges_rows, 
               ranges_anr, 
               ranges_skew, 
               ranges_ann, ranges_crs]


---
# Group by "best-of" implementation for each device
# skip this step if you want to plot every measurement collected

In [9]:
%%time
# Group per system, take best (it was over "inputdata", but "groupreps" is better choice I think)
# fixed this after reordering groupreps columns according to header_names
# before this, columns and data were mixed and it was a complete shitstorm
group_system = groupreps.groupby(['mtx_name','distribution','placement','seed',
                                  'm','n','nz','density','A_mem_footprint','mem_range',
                                  'avg_nz_row','std_nz_row',
                                  'avg_bandwidth','std_bandwidth','avg_bandwidth_scaled','std_bandwidth_scaled',
                                  # 'avg_scattering','std_scattering','avg_scattering_scaled','std_scattering_scaled',
                                  'skew_coeff','avg_num_neighbours','cross_row_similarity',
                                  'System','Arch','friends'], as_index = False)
reslist = []
for desc, experiment in group_system:
    best_format = experiment['implementation'].iloc[experiment['gflops'].argmax()]
    outrow = experiment[experiment['implementation'] == best_format]
    reslist.append(outrow.values.tolist()[0])             

group_system_best = pd.DataFrame(reslist, columns = header_names)
print(group_system_best.shape)

(124688, 31)
CPU times: user 1min 55s, sys: 1.32 s, total: 1min 57s
Wall time: 1min 55s


In [10]:
%%time
groupreps = groupreps[groupreps['W_avg']>0]

groupreps1 = groupreps[groupreps['System'] != 'AMD-EPYC-64']
groupreps2 = groupreps[groupreps['System'] == 'AMD-EPYC-64'] 
groupreps2 = groupreps2[groupreps2['W_avg']>30]
groupreps = pd.concat([groupreps1, groupreps2])

groupreps['impl_arch'] = "( " + groupreps['System'] + " " ")" + groupreps['implementation']
groupreps['energy_efficiency'] = groupreps['gflops'] / groupreps['W_avg'] 
groupreps['GFLOPs^2-per-W'] = groupreps['gflops'] * groupreps['gflops'] / groupreps['W_avg']


groupreps['crs_categ'] = groupreps.apply (lambda row: set_category(row['cross_row_similarity'], ranges_crs, ranges_size, ranges_size_flag=True), axis=1)
groupreps['ann_categ'] = groupreps.apply (lambda row: set_category(row['avg_num_neighbours'], ranges_ann, ranges_size, ranges_size_flag=True), axis=1)
groupreps['regularity'] = groupreps['crs_categ'] + groupreps['ann_categ']

groupreps['anr_categ'] = groupreps.apply (lambda row: set_category(row['avg_nz_row'], ranges_anr, ranges_size, ranges_size_flag=False), axis=1)
groupreps['skew_categ'] = groupreps.apply (lambda row: set_category(row['skew_coeff'], ranges_skew, ranges_size, ranges_size_flag=False), axis=1)

CPU times: user 15.2 s, sys: 924 ms, total: 16.2 s
Wall time: 16.2 s


In [11]:
%%time
group_system_best = group_system_best[group_system_best['W_avg']>0]

group_system_best1 = group_system_best[group_system_best['System'] != 'AMD-EPYC-64']
group_system_best2 = group_system_best[group_system_best['System'] == 'AMD-EPYC-64'] 
group_system_best2 = group_system_best2[group_system_best2['W_avg']>30]
group_system_best = pd.concat([group_system_best1, group_system_best2])
# group_system_best = group_system_best[group_system_best['W_avg']>32.9]

group_system_best['impl_arch'] = "( " + group_system_best['System'] + " " ")" + group_system_best['implementation']
group_system_best['energy_efficiency'] = group_system_best['gflops'] / group_system_best['W_avg'] 
group_system_best['GFLOPs^2-per-W'] = group_system_best['gflops'] * group_system_best['gflops'] / group_system_best['W_avg']


group_system_best['crs_categ'] = group_system_best.apply (lambda row: set_category(row['cross_row_similarity'], ranges_crs, ranges_size, ranges_size_flag=True), axis=1)
group_system_best['ann_categ'] = group_system_best.apply (lambda row: set_category(row['avg_num_neighbours'], ranges_ann, ranges_size, ranges_size_flag=True), axis=1)
group_system_best['regularity'] = group_system_best['crs_categ'] + group_system_best['ann_categ']

group_system_best['anr_categ'] = group_system_best.apply (lambda row: set_category(row['avg_nz_row'], ranges_anr, ranges_size, ranges_size_flag=False), axis=1)
group_system_best['skew_categ'] = group_system_best.apply (lambda row: set_category(row['skew_coeff'], ranges_skew, ranges_size, ranges_size_flag=False), axis=1)


extra_header_names = ['impl_arch', 'energy_efficiency', 'GFLOPs^2-per-W', 
                      'crs_categ', 'ann_categ', 'regularity', 'anr_categ', 'skew_categ']


CPU times: user 3.35 s, sys: 160 ms, total: 3.51 s
Wall time: 3.51 s


---

In [12]:
for system in set(group_system_best['System']):
    group_system_best_s = group_system_best[group_system_best['System']==system]
    print(system,'\t', set(group_system_best_s['implementation']))

ARM-NEON 	 {'SparseX', 'Merge-CSR', 'ARM-lib', 'SELL-C-s', 'Naive-CSR'}
Tesla-P100 	 {'cu-HYB', 'cu-COO', 'CSR5', 'cu-CSR'}
AMD-EPYC-24 	 {'SparseX', 'Merge-CSR', 'AOCL', 'SELL-C-s', 'MKL-IE', 'Naive-CSR', 'Vec-CSR', 'CSR5'}
AMD-EPYC-64 	 {'MKL-IE', 'Naive-CSR', 'CSR5'}
Tesla-A100 	 {'cu-COO', 'Merge', 'cu-CSR'}
Tesla-V100 	 {'cu-HYB', 'cu-COO', 'CSR5', 'cu-CSR'}
IBM-POWER9 	 {'Naive-CSR', 'SparseX', 'Bal-CSR', 'Merge-CSR'}
INTEL-XEON 	 {'SparseX', 'Merge-CSR', 'SELL-C-s', 'MKL-IE', 'Naive-CSR', 'Vec-CSR', 'CSR5'}
Alveo-U280 	 {'Xilinx-lib'}


---

In [13]:
%%time
groupreps.to_csv("../all_format_runs_March_2023.csv",header=True, index=False)

In [14]:
%%time
group_system_best.to_csv("../best_format_runs_March_2023.csv",header=True, index=False)

---