In [1]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

header_names = ['matrix_name','distribution','placement','seed',
                'nr_rows','nr_cols','nr_nzeros','density','mem_footprint','mem_range',
                'avg_nnz_per_row','std_nnz_per_row',
                'avg_bw','std_bw','avg_bw_scaled','std_bw_scaled',
                'avg_sc','std_sc','avg_sc_scaled','std_sc_scaled',
                'skew','avg_num_neighbours','cross_row_similarity',
                'format_name','time','gflops','W_avg','J_estimated', 'System', 'Arch']

def find_class(mem_footprint):
    low_mb_list =   [4,8, 16,32,64, 128,256,512, 1024,2048, 4096] # removed largest mem range (2048-4096)
    high_mb_list =  [8,16,32,64,128,256,512,1024,2048,4096, 8192]
    for i in range(len(low_mb_list)):
        if(mem_footprint>=low_mb_list[i] and mem_footprint<=high_mb_list[i]):
            pos = i
            mem_range = '['+str(low_mb_list[pos])+'-'+str(high_mb_list[pos])+']'
            return mem_range
    return str(-1)

# first of all, we will generate some 'modified_features' csv files, that take as input the existing validation matrix benchmark results

# GPU data

In [2]:
def modify_footprint(v):
    if(v['format_name']=='CSR5_9'):
        return (v['mem_footprint'] - 4*(v['nr_rows']+v['nr_cols']))/(1024*1024.0)
    else:
        return v['mem_footprint']

def create_complete_gpu_csv(gpu_dataframe, system, arch):
    gpu_dataframe['matrix_name'] = gpu_dataframe['matrix_name'].apply(lambda x: x.split('/')[-1].split('.')[0])
    vm_features = pd.read_csv('../benchmark_results/validation_matrices_features.csv', sep='\t')
    matrix_names = list(vm_features['matrix']) # same as : list(set(inputdata_GPU['matrix_name']))

    inputvaldata_GPU = pd.DataFrame(columns=header_names)

    for matrix_name in matrix_names:
        for index, curr in gpu_dataframe[gpu_dataframe['matrix_name'] == matrix_name].iterrows():
            matrix_name = matrix_name
            distribution = 'unused'
            placement = 'unused'
            seed = curr['seed']
            m = curr['nr_rows']
            n = curr['nr_cols']
            nz = curr['nr_nzeros']
            density = nz/(m*n)*100.0
            mem_footprint = curr['mem_footprint']
            mem_range = find_class((nz*(64+32)+32*(m+1))/(8*1024*1024))
            avg_nnz_per_row = list(vm_features[vm_features['matrix']==matrix_name]['nnz-r-avg'])[0]
            std_nnz_per_row = list(vm_features[vm_features['matrix']==matrix_name]['nnz-r-std'])[0]
            avg_bw = list(vm_features[vm_features['matrix']==matrix_name]['bw-scaled-avg'])[0]*n
            std_bw = list(vm_features[vm_features['matrix']==matrix_name]['bw-scaled-std'])[0]*n
            avg_bw_scaled = avg_bw/n
            std_bw_scaled = std_bw/n
            # avg_sc = list(vm_features[vm_features['matrix']==matrix_name]['sc-avg'])[0]
            # std_sc = list(vm_features[vm_features['matrix']==matrix_name]['sc-std'])[0]
            # avg_sc_scaled = avg_sc*n
            # std_sc_scaled = std_sc*n
            # skew = (list(vm_features[vm_features['matrix']==matrix_name]['nnz-r-max'])[0] - avg_nnz_per_row)/avg_nnz_per_row
            skew = list(vm_features[vm_features['matrix']==matrix_name]['skew_coeff'])[0]
            avg_num_neighbours = list(vm_features[vm_features['matrix']==matrix_name]['num-neigh-avg'])[0]
            cross_row_similarity = list(vm_features[vm_features['matrix']==matrix_name]['cross_row_sim-avg'])[0]

            format_name = curr['format_name']
            time = curr['time']
            gflops = curr['gflops']
            W_avg = curr['W_avg']
            J_estimated = curr['J_estimated']

            new_row = {'matrix_name' : matrix_name, 'distribution' : distribution, 'placement' : placement, 'seed' : seed, 
                       'nr_rows' : m, 'nr_cols' : n, 'nr_nzeros' : nz, 'density' : density, 'mem_footprint' : mem_footprint, 'mem_range' : mem_range, 
                       'avg_nnz_per_row' : avg_nnz_per_row, 'std_nnz_per_row' : std_nnz_per_row, 
                       'avg_bw' : avg_bw, 'std_bw' : std_bw, 'avg_bw_scaled' : avg_bw_scaled, 'std_bw_scaled' : std_bw_scaled,
                       'skew' : skew, 'avg_num_neighbours' : avg_num_neighbours, 'cross_row_similarity' : cross_row_similarity,
                       'format_name' : format_name, 'time' : time, 'gflops' : gflops, 'W_avg' : W_avg, 'J_estimated' : J_estimated}
            inputvaldata_GPU = pd.concat([inputvaldata_GPU, pd.DataFrame([new_row])], ignore_index=True)
    inputvaldata_GPU['System'] = system
    inputvaldata_GPU['Arch'] = arch
    if(system=='Tesla-V100'):
        inputvaldata_GPU['mem_footprint'] = inputvaldata_GPU.apply(lambda x: modify_footprint(x), axis=1)
    return inputvaldata_GPU

# read P100 data

In [3]:
arch, system = 'GPU', 'NVIDIA-P100'

fname = 'vulcan-P100/vulcan-P100_dtype-D_run_validation_matrices.csv' 
# it has different data layout @@@ panastas @@@
gpu_data = pd.read_csv('../benchmark_results/%s' % fname, names = ['matrix_name','distribution','placement','seed',
                                                            'nr_rows','nr_cols','nr_nzeros','density','mem_footprint','mem_range',
                                                            'avg_nnz_per_row','std_nnz_per_row',
                                                            'avg_bw','std_bw','avg_sc','std_sc',
                                                            '1','2','3','4','5','6','7',
                                                            'format_name','time','gflops','W_avg', 'J_estimated'])

gpu_data = create_complete_gpu_csv(gpu_data, system, arch)
fname2 = fname.replace('.csv', '_modified_features.csv')
gpu_data.to_csv('../benchmark_results/%s' % fname2, header=False, index=False)


  inputvaldata_GPU = pd.concat([inputvaldata_GPU, pd.DataFrame([new_row])], ignore_index=True)


# read V100 data

In [4]:
arch, system = 'GPU', 'NVIDIA-V100'

# fname = 'silver1-TeslaV100_dtype-D_run_validation_matrices.csv'
fname = 'vulcan-V100/vulcan-V100_dtype-D_run_validation_matrices.csv'
gpu_data = pd.read_csv('../benchmark_results/%s' % fname, names = ['matrix_name','distribution','placement','diagonal_factor','seed',
                                                            'nr_rows','nr_cols','nr_nzeros','density','mem_footprint','mem_range',
                                                            'avg_nnz_per_row','std_nnz_per_row',
                                                            'avg_bw','std_bw','avg_sc','std_sc',
                                                            'format_name','time','gflops','W_avg', 'J_estimated'])

gpu_data = create_complete_gpu_csv(gpu_data, system, arch)
fname2 = fname.replace('.csv', '_modified_features.csv')
gpu_data.to_csv('../benchmark_results/%s' % fname2, header=False, index=False)


  inputvaldata_GPU = pd.concat([inputvaldata_GPU, pd.DataFrame([new_row])], ignore_index=True)


# read A100 data

In [5]:
arch, system = 'GPU', 'NVIDIA-A100'

fname = 'epyc5-A100/epyc5-A100_dtype-D_run_validation_matrices.csv'
# it has different data layout @@@ panastas @@@
gpu_data = pd.read_csv('../benchmark_results/%s' % fname, names = ['matrix_name','distribution','placement','seed',
                                                            'nr_rows','nr_cols','nr_nzeros','density','mem_footprint','mem_range',
                                                            'avg_nnz_per_row','std_nnz_per_row',
                                                            'avg_bw','std_bw','avg_sc','std_sc',
                                                            '1','2','3','4','5','6','7',
                                                            'format_name','time','gflops','W_avg', 'J_estimated'])

gpu_data = create_complete_gpu_csv(gpu_data, system, arch)
fname2 = fname.replace('.csv', '_modified_features.csv')
gpu_data.to_csv('../benchmark_results/%s' % fname2, header=False, index=False)


  inputvaldata_GPU = pd.concat([inputvaldata_GPU, pd.DataFrame([new_row])], ignore_index=True)


# read H100 data

In [6]:
arch, system = 'GPU', 'NVIDIA-H100'

fname = 'grace1-H100/grace1-H100_validation_matrices_d.csv'

gpu_data = pd.read_csv('../benchmark_results/%s' % fname, names = ['matrix_name','distribution','placement','seed',
                                'nr_rows','nr_cols','nr_nzeros','density','mem_footprint','mem_range',
                                'avg_nnz_per_row','std_nnz_per_row',
                                'avg_bw','std_bw','avg_bw_scaled','std_bw_scaled',
                                'avg_sc','std_sc','avg_sc_scaled','std_sc_scaled',
                                'skew', 'avg_num_neighbours', 'cross_row_similarity',
                                'format_name','time','gflops','W_avg','J_estimated', 'System'])
gpu_data = create_complete_gpu_csv(gpu_data, system, arch)
fname2 = fname.replace('.csv', '_modified_features.csv')
gpu_data.to_csv('../benchmark_results/%s' % fname2, header=False, index=False)


  inputvaldata_GPU = pd.concat([inputvaldata_GPU, pd.DataFrame([new_row])], ignore_index=True)


# read MI250 data

In [7]:
arch, system = 'GPU', 'AMD-MI250'

fname = 'amd-mi250/amd-mi250_validation_matrices_d.csv'

gpu_data = pd.read_csv('../benchmark_results/%s' % fname, names = ['matrix_name','distribution','placement','seed',
                                'nr_rows','nr_cols','nr_nzeros','density','mem_footprint','mem_range',
                                'avg_nnz_per_row','std_nnz_per_row',
                                'avg_bw','std_bw','avg_bw_scaled','std_bw_scaled',
                                'avg_sc','std_sc','avg_sc_scaled','std_sc_scaled',
                                'skew', 'avg_num_neighbours', 'cross_row_similarity',
                                'format_name','time','gflops','W_avg','J_estimated', 'System'])

gpu_data = create_complete_gpu_csv(gpu_data, system, arch)
fname2 = fname.replace('.csv', '_modified_features.csv')
gpu_data.to_csv('../benchmark_results/%s' % fname2, header=False, index=False)


  inputvaldata_GPU = pd.concat([inputvaldata_GPU, pd.DataFrame([new_row])], ignore_index=True)


# read CPU data

In [8]:
Hawk_threads     = 64
Epyc_threads     = 24
Xeon_threads     = 14
Icy_threads      = 16
Sapphire_threads = 56
Arm_threads      = 80
Grace_threads    = 72
Power9_threads   = 32

In [9]:
def create_complete_cpu_csv(cpu_dataframe, system, arch):
    vm_features = pd.read_csv('../benchmark_results/validation_matrices_features.csv', sep='\t')
    matrix_names = list(vm_features['matrix']) # same as : list(set(inputdata_GPU['matrix_name']))

    inputvaldata_CPU = pd.DataFrame(columns=header_names)

    for matrix_name in matrix_names:
        for index, curr in cpu_dataframe[cpu_dataframe['matrix_name'] == matrix_name].iterrows():
            matrix_name = matrix_name
            distribution = 'unused'
            placement = 'unused'
            diagonal_factor = 0
            seed = 0
            m = curr['nr_rows']
            n = curr['nr_cols']
            nz = curr['nr_nzeros']
            density = nz/(m*n)*100.0
            mem_footprint = curr['mem_footprint']
            mem_range = find_class((nz*(64+32)+32*(m+1))/(8*1024*1024))
            avg_nnz_per_row = list(vm_features[vm_features['matrix']==matrix_name]['nnz-r-avg'])[0]
            std_nnz_per_row = list(vm_features[vm_features['matrix']==matrix_name]['nnz-r-std'])[0]
            avg_bw = list(vm_features[vm_features['matrix']==matrix_name]['bw-scaled-avg'])[0]*n
            std_bw = list(vm_features[vm_features['matrix']==matrix_name]['bw-scaled-std'])[0]*n
            avg_bw_scaled = avg_bw/n
            std_bw_scaled = std_bw/n
            # avg_sc = list(vm_features[vm_features['matrix']==matrix_name]['sc-avg'])[0]
            # std_sc = list(vm_features[vm_features['matrix']==matrix_name]['sc-std'])[0]
            # avg_sc_scaled = avg_sc*n
            # std_sc_scaled = std_sc*n
            # skew = (list(vm_features[vm_features['matrix']==matrix_name]['nnz-r-max'])[0] - avg_nnz_per_row)/avg_nnz_per_row
            skew = list(vm_features[vm_features['matrix']==matrix_name]['skew_coeff'])[0]
            avg_num_neighbours = list(vm_features[vm_features['matrix']==matrix_name]['num-neigh-avg'])[0]
            cross_row_similarity = list(vm_features[vm_features['matrix']==matrix_name]['cross_row_sim-avg'])[0]

            format_name = curr['format_name']
            time = curr['time']
            gflops = curr['gflops']
            W_avg = curr['W_avg']
            J_estimated = curr['J_estimated']
            
            new_row = {'matrix_name' : matrix_name, 'distribution' : distribution, 'placement' : placement, 'seed' : seed,  'nr_rows' : m, 'nr_cols' : n, 'nr_nzeros' : nz, 'density' : density, 'mem_footprint' : mem_footprint, 'mem_range' : mem_range, 
                       'avg_nnz_per_row' : avg_nnz_per_row, 'std_nnz_per_row' : std_nnz_per_row, 
                       'avg_bw' : avg_bw, 'std_bw' : std_bw, 'avg_bw_scaled' : avg_bw_scaled, 'std_bw_scaled' : std_bw_scaled,
                       #'avg_sc' : avg_sc, 'std_sc' : std_sc, 'avg_sc_scaled' : avg_sc_scaled, 'std_sc_scaled' : std_sc_scaled,
                       'skew' : skew, 'avg_num_neighbours' : avg_num_neighbours, 'cross_row_similarity' : cross_row_similarity,
                       'format_name' : format_name, 'time' : time, 'gflops' : gflops, 'W_avg' : W_avg, 'J_estimated' : J_estimated}
            inputvaldata_CPU = pd.concat([inputvaldata_CPU, pd.DataFrame([new_row])], ignore_index=True)
    inputvaldata_CPU['System'] = system
    inputvaldata_CPU['Arch'] = arch
    return inputvaldata_CPU

# read HAWK data (DEPRECATED, skip)

In [10]:
# arch, system = 'CPU', 'AMD-EPYC-64'
# fname = 'amd-hawk/amd-hawk_validation_matrices_t%d_d.csv' % Hawk_threads

# cpu_data = pd.read_csv('../benchmark_results/%s' % fname, names = ['matrix_name','distribution','placement','seed',
#             'nr_rows','nr_cols','nr_nzeros','density','mem_footprint','mem_range',
#             'avg_nnz_per_row','std_nnz_per_row',
#             'avg_bw','std_bw','avg_bw_scaled','std_bw_scaled',
#             'avg_sc','std_sc','avg_sc_scaled','std_sc_scaled',
#             'skew', 'avg_num_neighbours', 'cross_row_similarity',
#             'format_name','time','gflops','W_avg','J_estimated', 'System'])

# cpu_data = create_complete_cpu_csv(cpu_data, system, arch)
# fname2 = fname.replace('.csv', '_modified_features.csv')

# if(fname == 'amd-hawk/amd-hawk_validation_matrices_t128_d.csv'):
#     formats_to_discard = ['MKL_IE_no_optimize', 'Custom_CSR_BV_CPU', 'Custom_CSR_B_CPU']
    
# cpu_data.to_csv('../benchmark_results/%s' % fname2, header=False, index=False)


# read AMD-EPYC-24 data

In [11]:
arch, system = 'CPU', 'AMD-EPYC-24'
fname = 'amd-epyc1/amd-epyc1_validation_matrices_t%d_d.csv' % Epyc_threads

cpu_data = pd.read_csv('../benchmark_results/%s' % fname, names = ['matrix_name','distribution','placement','seed',
            'nr_rows','nr_cols','nr_nzeros','density','mem_footprint','mem_range',
            'avg_nnz_per_row','std_nnz_per_row',
            'avg_bw','std_bw','avg_bw_scaled','std_bw_scaled',
            'avg_sc','std_sc','avg_sc_scaled','std_sc_scaled',
            'skew', 'avg_num_neighbours', 'cross_row_similarity',
            'format_name','time','gflops','W_avg','J_estimated', 'System'])

cpu_data = create_complete_cpu_csv(cpu_data, system, arch)
fname2 = fname.replace('.csv', '_modified_features.csv')

cpu_data.to_csv('../benchmark_results/%s' % fname2, header=False, index=False)


  inputvaldata_CPU = pd.concat([inputvaldata_CPU, pd.DataFrame([new_row])], ignore_index=True)


# read AMD-EPYC-64 data

In [12]:
arch, system = 'CPU', 'AMD-EPYC-64'
fname = 'amd-epyc7763/amd-epyc7763_validation_matrices_t%d_d.csv' % Hawk_threads

cpu_data = pd.read_csv('../benchmark_results/%s' % fname, names = ['matrix_name','distribution','placement','seed',
            'nr_rows','nr_cols','nr_nzeros','density','mem_footprint','mem_range',
            'avg_nnz_per_row','std_nnz_per_row',
            'avg_bw','std_bw','avg_bw_scaled','std_bw_scaled',
            'avg_sc','std_sc','avg_sc_scaled','std_sc_scaled',
            'skew', 'avg_num_neighbours', 'cross_row_similarity',
            'format_name','time','gflops','W_avg','J_estimated', 'System'])

cpu_data = create_complete_cpu_csv(cpu_data, system, arch)
fname2 = fname.replace('.csv', '_modified_features.csv')

cpu_data.to_csv('../benchmark_results/%s' % fname2, header=False, index=False)


  inputvaldata_CPU = pd.concat([inputvaldata_CPU, pd.DataFrame([new_row])], ignore_index=True)


# read INTEL-XEON-14 data

In [13]:
arch, system = 'CPU', 'INTEL-XEON-14'
fname = 'intel-gold2/intel-gold2_validation_matrices_t%d_d.csv' % Xeon_threads

cpu_data = pd.read_csv('../benchmark_results/%s' % fname, names = ['matrix_name','distribution','placement','seed',
            'nr_rows','nr_cols','nr_nzeros','density','mem_footprint','mem_range',
            'avg_nnz_per_row','std_nnz_per_row',
            'avg_bw','std_bw','avg_bw_scaled','std_bw_scaled',
            'avg_sc','std_sc','avg_sc_scaled','std_sc_scaled',
            'skew', 'avg_num_neighbours', 'cross_row_similarity',
            'format_name','time','gflops','W_avg','J_estimated', 'System'])

cpu_data = create_complete_cpu_csv(cpu_data, system, arch)
fname2 = fname.replace('.csv', '_modified_features.csv')

cpu_data.to_csv('../benchmark_results/%s' % fname2, header=False, index=False)


  inputvaldata_CPU = pd.concat([inputvaldata_CPU, pd.DataFrame([new_row])], ignore_index=True)


# read INTEL-ICY-16 data

In [14]:
arch, system = 'CPU', 'INTEL-ICY-16'
fname = 'intel-icy3/intel-icy3_validation_matrices_t%d_d.csv' % Icy_threads

cpu_data = pd.read_csv('../benchmark_results/%s' % fname, names = ['matrix_name','distribution','placement','seed',
            'nr_rows','nr_cols','nr_nzeros','density','mem_footprint','mem_range',
            'avg_nnz_per_row','std_nnz_per_row',
            'avg_bw','std_bw','avg_bw_scaled','std_bw_scaled',
            'avg_sc','std_sc','avg_sc_scaled','std_sc_scaled',
            'skew', 'avg_num_neighbours', 'cross_row_similarity',
            'format_name','time','gflops','W_avg','J_estimated', 'System'])

cpu_data = create_complete_cpu_csv(cpu_data, system, arch)
fname2 = fname.replace('.csv', '_modified_features.csv')

cpu_data.to_csv('../benchmark_results/%s' % fname2, header=False, index=False)


  inputvaldata_CPU = pd.concat([inputvaldata_CPU, pd.DataFrame([new_row])], ignore_index=True)


# read INTEL-SAPPHIRE-56 data

In [15]:
arch, system = 'CPU', 'INTEL-SAPPHIRE-56'
fname = 'intel-sapphire/intel-sapphire_validation_matrices_t%d_d.csv' % Sapphire_threads

cpu_data = pd.read_csv('../benchmark_results/%s' % fname, names = ['matrix_name','distribution','placement','seed',
            'nr_rows','nr_cols','nr_nzeros','density','mem_footprint','mem_range',
            'avg_nnz_per_row','std_nnz_per_row',
            'avg_bw','std_bw','avg_bw_scaled','std_bw_scaled',
            'avg_sc','std_sc','avg_sc_scaled','std_sc_scaled',
            'skew', 'avg_num_neighbours', 'cross_row_similarity',
            'format_name','time','gflops','W_avg','J_estimated', 'System'])

cpu_data = create_complete_cpu_csv(cpu_data, system, arch)
fname2 = fname.replace('.csv', '_modified_features.csv')

cpu_data.to_csv('../benchmark_results/%s' % fname2, header=False, index=False)


  inputvaldata_CPU = pd.concat([inputvaldata_CPU, pd.DataFrame([new_row])], ignore_index=True)


# read ARM-NEON-80 data

In [16]:
arch, system = 'CPU', 'ARM-NEON-80'
fname = 'arm/arm_validation_matrices_t%d_d.csv' % Arm_threads

cpu_data_arm = pd.read_csv('../benchmark_results/%s' % fname, names = ['matrix_name','distribution','placement','seed',
            'nr_rows','nr_cols','nr_nzeros','density','mem_footprint','mem_range',
            'avg_nnz_per_row','std_nnz_per_row',
            'avg_bw','std_bw','avg_bw_scaled','std_bw_scaled',
            'avg_sc','std_sc','avg_sc_scaled','std_sc_scaled',
            'skew', 'avg_num_neighbours', 'cross_row_similarity',
            'format_name','time','gflops','W_avg','J_estimated', 'System'])

cpu_data_arm = create_complete_cpu_csv(cpu_data_arm, system, arch)
fname2 = fname.replace('.csv', '_modified_features.csv')

cpu_data_arm.to_csv('../benchmark_results/%s' % fname2, header=False, index=False)


  inputvaldata_CPU = pd.concat([inputvaldata_CPU, pd.DataFrame([new_row])], ignore_index=True)


# read ARM-GRACE-72 data

In [17]:
arch, system = 'CPU', 'ARM-GRACE-72'
fname = 'grace1-arm/grace1-arm_validation_matrices_t%d_d.csv' % Grace_threads

cpu_data_arm = pd.read_csv('../benchmark_results/%s' % fname, names = ['matrix_name','distribution','placement','seed',
            'nr_rows','nr_cols','nr_nzeros','density','mem_footprint','mem_range',
            'avg_nnz_per_row','std_nnz_per_row',
            'avg_bw','std_bw','avg_bw_scaled','std_bw_scaled',
            'avg_sc','std_sc','avg_sc_scaled','std_sc_scaled',
            'skew', 'avg_num_neighbours', 'cross_row_similarity',
            'format_name','time','gflops','W_avg','J_estimated', 'System'])

cpu_data_arm = create_complete_cpu_csv(cpu_data_arm, system, arch)
fname2 = fname.replace('.csv', '_modified_features.csv')

cpu_data_arm.to_csv('../benchmark_results/%s' % fname2, header=False, index=False)


  inputvaldata_CPU = pd.concat([inputvaldata_CPU, pd.DataFrame([new_row])], ignore_index=True)


# read IBM-POWER9-32 data

In [18]:
arch, system = 'CPU', 'IBM-POWER9-32'
fname = 'power9-m100/power9-m100_validation_matrices_t%d_d.csv' % Power9_threads

cpu_data_power9 = pd.read_csv('../benchmark_results/%s' % fname, names = ['matrix_name','distribution','placement','seed',
            'nr_rows','nr_cols','nr_nzeros','density','mem_footprint','mem_range',
            'avg_nnz_per_row','std_nnz_per_row',
            'avg_bw','std_bw','avg_bw_scaled','std_bw_scaled',
            'avg_sc','std_sc','avg_sc_scaled','std_sc_scaled',
            'skew', 'avg_num_neighbours', 'cross_row_similarity',
            'format_name','time','gflops','W_avg','J_estimated', 'System'])

cpu_data_power9 = create_complete_cpu_csv(cpu_data_power9, system, arch)
fname2 = fname.replace('.csv', '_modified_features.csv')

cpu_data_power9.to_csv('../benchmark_results/%s' % fname2, header=False, index=False)


  inputvaldata_CPU = pd.concat([inputvaldata_CPU, pd.DataFrame([new_row])], ignore_index=True)


# read FPGA Data (SKIP FOR NOW...)

In [19]:
def create_complete_fpga_csv(fpga_dataframe, system, arch):
    vm_features = pd.read_csv('../benchmark_results/validation_matrices_features.csv', sep='\t')
    matrix_names = list(vm_features['matrix']) # same as : list(set(inputdata_GPU['matrix_name']))

    inputvaldata_FPGA = pd.DataFrame(columns=header_names)

    for matrix_name in matrix_names:
        pin_df = fpga_dataframe[fpga_dataframe['matrix'] == matrix_name]
        if(pin_df.empty):
            m = list(vm_features[vm_features['matrix']==matrix_name]['nr_rows'])[0]
            n = list(vm_features[vm_features['matrix']==matrix_name]['nr_cols'])[0]
            nz = list(vm_features[vm_features['matrix']==matrix_name]['nr_nzeros'])[0]
            mem_footprint = (nz*(64+32)+32*(m+1))/(8*1024*1024)
            
            new_row = {'matrix_name' : matrix_name, 'distribution' : 'unused', 'placement' : 'unused', 'seed' : 0, 'nr_rows' : m, 'nr_cols' : n, 'nr_nzeros' : nz, 'density' : nz/(m*n)*100.0, 'mem_footprint' : mem_footprint, 'mem_range' : find_class((nz*(64+32)+32*(m+1))/(8*1024*1024)),'avg_nnz_per_row' : list(vm_features[vm_features['matrix']==matrix_name]['nnz-r-avg'])[0],'std_nnz_per_row' : list(vm_features[vm_features['matrix']==matrix_name]['nnz-r-std'])[0], 'avg_bw' : list(vm_features[vm_features['matrix']==matrix_name]['bw-scaled-avg'])[0]*n,'std_bw' : list(vm_features[vm_features['matrix']==matrix_name]['bw-std'])[0]*n,'avg_bw_scaled' : list(vm_features[vm_features['matrix']==matrix_name]['bw-scaled-avg'])[0],'std_bw_scaled' : list(vm_features[vm_features['matrix']==matrix_name]['bw-std'])[0],'skew' : list(vm_features[vm_features['matrix']==matrix_name]['skew'])[0],'avg_num_neighbours' : list(vm_features[vm_features['matrix']==matrix_name]['num-neigh-avg'])[0],'cross_row_similarity' : list(vm_features[vm_features['matrix']==matrix_name]['cross_row_sim-avg'])[0],'format_name' : 'unused','time' : -1,'gflops' : -1,'W_avg' : -1,'J_estimated' : -1}
            inputvaldata_FPGA = pd.concat([inputvaldata_FPGA, pd.DataFrame([new_row])], ignore_index=True)
            
        for index, curr in pin_df.iterrows():
            matrix_name = matrix_name
            distribution = 'unused'
            placement = 'unused'
            diagonal_factor = 0
            seed = 0
            m = curr['nr_rows']
            n = curr['nr_cols']
            nz = curr['nr_nnz']
            density = nz/(m*n)*100.0
            # mem_footprint = curr['mem_footprint']
            mem_footprint = (nz*(64+32)+32*(m+1))/(8*1024*1024)

            mem_range = find_class((nz*(64+32)+32*(m+1))/(8*1024*1024))
            avg_nnz_per_row = list(vm_features[vm_features['matrix']==matrix_name]['nnz-r-avg'])[0]
            std_nnz_per_row = list(vm_features[vm_features['matrix']==matrix_name]['nnz-r-std'])[0]
            avg_bw = list(vm_features[vm_features['matrix']==matrix_name]['bw-scaled-avg'])[0]*n
            std_bw = list(vm_features[vm_features['matrix']==matrix_name]['bw-scaled-std'])[0]*n
            avg_bw_scaled = avg_bw/n
            std_bw_scaled = std_bw/n
            # avg_sc = list(vm_features[vm_features['matrix']==matrix_name]['sc-avg'])[0]
            # std_sc = list(vm_features[vm_features['matrix']==matrix_name]['sc-std'])[0]
            # avg_sc_scaled = avg_sc*n
            # std_sc_scaled = std_sc*n
            # skew = (list(vm_features[vm_features['matrix']==matrix_name]['nnz-r-max'])[0] - avg_nnz_per_row)/avg_nnz_per_row
            skew = list(vm_features[vm_features['matrix']==matrix_name]['skew_coeff'])[0]
            avg_num_neighbours = list(vm_features[vm_features['matrix']==matrix_name]['num-neigh-avg'])[0]
            cross_row_similarity = list(vm_features[vm_features['matrix']==matrix_name]['cross_row_sim-avg'])[0]

            format_name = curr['format_name']
            time = curr['runtime_iter']
            gflops = curr['gflops']
            W_avg = curr['W_avg']
            J_estimated = curr['J_estimated']

            new_row = {'matrix_name' : matrix_name, 'distribution' : distribution, 'placement' : placement, 'seed' : seed, 
                       'nr_rows' : m, 'nr_cols' : n, 'nr_nzeros' : nz, 'density' : density, 'mem_footprint' : mem_footprint, 'mem_range' : mem_range, 
                       'avg_nnz_per_row' : avg_nnz_per_row, 'std_nnz_per_row' : std_nnz_per_row, 
                       'avg_bw' : avg_bw, 'std_bw' : std_bw, 'avg_bw_scaled' : avg_bw_scaled, 'std_bw_scaled' : std_bw_scaled,
                       #'avg_sc' : avg_sc, 'std_sc' : std_sc, 'avg_sc_scaled' : avg_sc_scaled, 'std_sc_scaled' : std_sc_scaled,
                       'skew' : skew, 'avg_num_neighbours' : avg_num_neighbours, 'cross_row_similarity' : cross_row_similarity,
                       'format_name' : format_name, 'time' : time, 'gflops' : gflops, 'W_avg' : W_avg, 'J_estimated' : J_estimated}
            inputvaldata_FPGA = pd.concat([inputvaldata_FPGA, pd.DataFrame([new_row])], ignore_index=True)

    inputvaldata_FPGA['System'] = system
    inputvaldata_FPGA['Arch'] = arch
    return inputvaldata_FPGA

In [20]:
# arch, system = 'FPGA', 'Alveo-U280'

# fname = 'alveo-u280/alveo-u280_spmv_validation_matrices_dtype-D.csv'
# fpga_data = pd.read_csv('../benchmark_results/%s' % fname, names = ['matrix','nr_rows','nr_cols','nr_nnz','density',
#                                                              'mem_footprint','format_name','runtime_iter',
#                                                              'gflops','W_avg','J_estimated'])

# fpga_data = create_complete_fpga_csv(fpga_data, system, arch)
# fname2 = fname.replace('.csv', '_modified_features.csv')
# fpga_data.to_csv('../benchmark_results/%s' % fname2, header=False, index=False)


---
# Now, read validation matrices complete dataset!

In [21]:
#Unified read
fname = 'vulcan-P100/vulcan-P100_dtype-D_run_validation_matrices_modified_features.csv'
inputvaldata_GPU_P100 = pd.read_csv('../benchmark_results/%s' % fname, names = header_names,index_col=False)

# fname = 'silver1-TeslaV100_dtype-D_run_validation_matrices_modified_features.csv' % dtype
fname = 'vulcan-V100/vulcan-V100_dtype-D_run_validation_matrices_modified_features.csv'
inputvaldata_GPU_V100 = pd.read_csv('../benchmark_results/%s' % fname, names = header_names,index_col=False)

# fname = 'epyc5TeslaA100_dtype-D_run_validation_matrices_modified_features.csv'
fname = 'epyc5-A100/epyc5-A100_dtype-D_run_validation_matrices_modified_features.csv'
inputvaldata_GPU_A100 = pd.read_csv('../benchmark_results/%s' % fname, names = header_names,index_col=False)

fname = 'grace1-H100/grace1-H100_validation_matrices_d_modified_features.csv'
inputvaldata_GPU_H100 = pd.read_csv('../benchmark_results/%s' % fname, names = header_names,index_col=False)

fname = 'amd-mi250/amd-mi250_validation_matrices_d_modified_features.csv'
inputvaldata_GPU_MI250 = pd.read_csv('../benchmark_results/%s' % fname, names = header_names,index_col=False)

#Device P100 V100 A100 H100 MI250
#specification 720 980.99 1555.00
#copy 720 825.47 1329.58
#scale 720 826.52 1327.59
#add 720 873.63 1376.84
#triadd 720 872.37 1377.21

inputvaldata_GPU_P100['mem_bw_gbytes_s'] = 464
inputvaldata_GPU_P100['cache_bw_gbytes_s'] = 464

inputvaldata_GPU_V100['mem_bw_gbytes_s'] = 760
inputvaldata_GPU_V100['cache_bw_gbytes_s'] = 760

inputvaldata_GPU_A100['mem_bw_gbytes_s'] = 1350
inputvaldata_GPU_A100['cache_bw_gbytes_s'] = 1350

inputvaldata_GPU_H100['mem_bw_gbytes_s'] = 3300
inputvaldata_GPU_H100['cache_bw_gbytes_s'] = 3300

inputvaldata_GPU_MI250['mem_bw_gbytes_s'] = 1313
inputvaldata_GPU_MI250['cache_bw_gbytes_s'] = 1313


In [22]:
# fname = 'amd-hawk/amd-hawk_validation_matrices_t%d_d_modified_features.csv' % Hawk_threads
# inputvaldata_CPU_Hawk = pd.read_csv('../benchmark_results/%s' % fname, names = header_names,index_col=False)

fname = 'amd-epyc1/amd-epyc1_validation_matrices_t%d_d_modified_features.csv' % Epyc_threads
inputvaldata_CPU_Epyc1 = pd.read_csv('../benchmark_results/%s' % fname, names = header_names,index_col=False)

fname = 'amd-epyc7763/amd-epyc7763_validation_matrices_t%d_d_modified_features.csv' % Hawk_threads
inputvaldata_CPU_Epyc64 = pd.read_csv('../benchmark_results/%s' % fname, names = header_names,index_col=False)

fname = 'intel-gold2/intel-gold2_validation_matrices_t%d_d_modified_features.csv' % Xeon_threads
inputvaldata_CPU_Gold2 = pd.read_csv('../benchmark_results/%s' % fname, names = header_names,index_col=False)

fname = 'intel-icy3/intel-icy3_validation_matrices_t%d_d_modified_features.csv' % Icy_threads
inputvaldata_CPU_Icy3 = pd.read_csv('../benchmark_results/%s' % fname, names = header_names,index_col=False)

fname = 'intel-sapphire/intel-sapphire_validation_matrices_t%d_d_modified_features.csv' % Sapphire_threads
inputvaldata_CPU_Sapphire = pd.read_csv('../benchmark_results/%s' % fname, names = header_names,index_col=False)

fname = 'arm/arm_validation_matrices_t%d_d_modified_features.csv' % Arm_threads
inputvaldata_CPU_Arm = pd.read_csv('../benchmark_results/%s' % fname, names = header_names,index_col=False)

fname = 'grace1-arm/grace1-arm_validation_matrices_t%d_d_modified_features.csv' % Grace_threads
inputvaldata_CPU_Arm_Grace = pd.read_csv('../benchmark_results/%s' % fname, names = header_names,index_col=False)

fname = 'power9-m100/power9-m100_validation_matrices_t%d_d_modified_features.csv' % Power9_threads
inputvaldata_CPU_Power9 = pd.read_csv('../benchmark_results/%s' % fname, names = header_names,index_col=False)

# inputvaldata_CPU_Hawk['mem_bw_gbytes_s'] = 105
# inputvaldata_CPU_Hawk['cache_bw_gbytes_s'] = 878

inputvaldata_CPU_Epyc1['mem_bw_gbytes_s'] = 50
inputvaldata_CPU_Epyc1['cache_bw_gbytes_s'] = 700

inputvaldata_CPU_Epyc64['mem_bw_gbytes_s'] = 120
inputvaldata_CPU_Epyc64['cache_bw_gbytes_s'] = 900 # needs to be verified

inputvaldata_CPU_Gold2['mem_bw_gbytes_s'] = 55
inputvaldata_CPU_Gold2['cache_bw_gbytes_s'] = 300

inputvaldata_CPU_Icy3['mem_bw_gbytes_s'] = 75
inputvaldata_CPU_Icy3['cache_bw_gbytes_s'] = 350

inputvaldata_CPU_Sapphire['mem_bw_gbytes_s'] = 250
inputvaldata_CPU_Sapphire['cache_bw_gbytes_s'] = 1200

# inputvaldata_CPU_Arm['mem_bw_gbytes_s'] = 102
inputvaldata_CPU_Arm['mem_bw_gbytes_s'] = 122
inputvaldata_CPU_Arm['cache_bw_gbytes_s'] = 820

inputvaldata_CPU_Arm_Grace['mem_bw_gbytes_s'] = 450
inputvaldata_CPU_Arm_Grace['cache_bw_gbytes_s'] = 1500

inputvaldata_CPU_Power9['mem_bw_gbytes_s'] = 100 # needs to be fixed!!!! 109
inputvaldata_CPU_Power9['cache_bw_gbytes_s'] = 600 # needs to be fixed!!!! 612

# fname = 'alveo-u280/PADDED-alveo-u280_spmv_validation_matrices_dtype-D_modified_features.csv'
# inputvaldata_FPGA = pd.read_csv('../benchmark_results/%s' % fname, names = header_names, index_col=False)
# inputvaldata_FPGA['mem_bw_gbytes_s'] = 287.5 #  20/32 * 460 = 287.5 GB/s (
# inputvaldata_FPGA['cache_bw_gbytes_s'] = 287.5

---
# Merge the results

In [23]:
%%time
inputvaldata = pd.concat([inputvaldata_GPU_P100, inputvaldata_GPU_V100, inputvaldata_GPU_A100,
                          inputvaldata_GPU_H100, inputvaldata_GPU_MI250,
                          inputvaldata_CPU_Epyc1, inputvaldata_CPU_Epyc64,
                          inputvaldata_CPU_Arm, inputvaldata_CPU_Arm_Grace,
                          inputvaldata_CPU_Gold2, inputvaldata_CPU_Icy3, inputvaldata_CPU_Sapphire,
                          inputvaldata_CPU_Power9,
                          #inputvaldata_FPGA
                         ])
print(inputvaldata.shape)

header_names.append('mem_bw_gbytes_s')
header_names.append('cache_bw_gbytes_s')

# Group per reps, take mean
# header_names without 'time','gflops','W_avg','J_estimated'
groupvalreps = inputvaldata.groupby( ['matrix_name','distribution','placement','seed',
                                      'nr_rows','nr_cols','nr_nzeros','density','mem_footprint','mem_range',
                                      'avg_nnz_per_row','std_nnz_per_row',
                                      'avg_bw','std_bw','avg_bw_scaled','std_bw_scaled',
                                      'skew','avg_num_neighbours','cross_row_similarity',
                                      'format_name','System', 'Arch', 'mem_bw_gbytes_s','cache_bw_gbytes_s']).mean().reset_index().reindex(columns=header_names)

group_val_system_best = groupvalreps

(18824, 32)
CPU times: user 16.8 ms, sys: 7.99 ms, total: 24.8 ms
Wall time: 23.8 ms


---
# Group by 'best-of' format_name for each device
# skip this step if you want to plot per-format validation plots

In [24]:
%%time 
# Group per system, take best
# header_names without 'time','gflops','W_avg','J_estimated', 'format_name'
# Curiously, I have to also remove 'mem_footprint', because for CSR5 it reports different mem_footprint
# for the same matrix, leading to a different representation in groupby
groupval_system = groupvalreps.groupby(['matrix_name','distribution','placement','seed',
                                        'nr_rows','nr_cols','nr_nzeros','density','mem_range',
                                        'avg_nnz_per_row','std_nnz_per_row',
                                        'avg_bw','std_bw','avg_bw_scaled','std_bw_scaled',
                                        'skew','avg_num_neighbours','cross_row_similarity',
                                        'System','Arch', 'mem_bw_gbytes_s','cache_bw_gbytes_s'], as_index = False)
reslist = []

for desc, experiment in groupval_system:
    best_format = experiment['format_name'].iloc[experiment['gflops'].argmax()]
    outrow = experiment[experiment['format_name'] == best_format]
    outrow = outrow[header_names] # reorder column because they are mixed by group-by
    reslist.append(outrow.values.tolist()[0])

group_val_system_best = pd.DataFrame(reslist, columns = header_names)
print(group_val_system_best.shape)

(674, 32)
CPU times: user 590 ms, sys: 3 ms, total: 593 ms
Wall time: 592 ms


# Some fixes that have to be done by hand

In [25]:
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'scircuit') , ['mem_footprint']] =  11.626
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'mac_econ_fwd500') , ['mem_footprint']] =  15.361
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'raefsky3') , ['mem_footprint']] =  17.118
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'rgg_n_2_17_s0') , ['mem_footprint']] =  17.18
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'bbmat') , ['mem_footprint']] =  20.424
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'appu') , ['mem_footprint']] =  21.261
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'conf5_4-8x8-15') , ['mem_footprint']] =  22.125
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'mc2depi') , ['mem_footprint']] =  26.041
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'rma10') , ['mem_footprint']] =  27.347
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'cop20k_A') , ['mem_footprint']] =  30.495
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'thermomech_dK') , ['mem_footprint']] =  33.352
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'webbase-1M') , ['mem_footprint']] =  39.355
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'cant') , ['mem_footprint']] =  46.099
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'ASIC_680k') , ['mem_footprint']] =  46.914
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'roadNet-TX') , ['mem_footprint']] =  49.299
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'pdb1HYS') , ['mem_footprint']] =  49.861
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'TSOPF_RS_b300_c3') , ['mem_footprint']] =  50.669
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'Chebyshev4') , ['mem_footprint']] =  61.803
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'consph') , ['mem_footprint']] =  69.102
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'com-Youtube') , ['mem_footprint']] =  72.711
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'rajat30') , ['mem_footprint']] =  73.128
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'radiation') , ['mem_footprint']] =  88.257
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'Stanford_Berkeley') , ['mem_footprint']] =  89.392
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'shipsec1') , ['mem_footprint']] =  89.955
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'PR02R') , ['mem_footprint']] =  94.286
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'CurlCurl_2') , ['mem_footprint']] =  105.178
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'gupta3') , ['mem_footprint']] =  106.762
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'mip1') , ['mem_footprint']] =  118.732
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'rail4284') , ['mem_footprint']] =  129.152
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'pwtk') , ['mem_footprint']] =  133.977
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'crankseg_2') , ['mem_footprint']] =  162.164
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'Si41Ge41H72') , ['mem_footprint']] =  172.498
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'TSOPF_RS_b2383') , ['mem_footprint']] =  185.21
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'in-2004') , ['mem_footprint']] =  198.876
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'Ga41As41H72') , ['mem_footprint']] =  212.607
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'eu-2005') , ['mem_footprint']] =  223.42
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'wikipedia-20051105') , ['mem_footprint']] =  232.293
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'kron_g500-logn18') , ['mem_footprint']] =  243.225
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'human_gene1') , ['mem_footprint']] =  282.407
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'delaunay_n22') , ['mem_footprint']] =  303.999
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'GL7d20') , ['mem_footprint']] =  347.583
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'sx-stackoverflow') , ['mem_footprint']] =  424.585
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'dgreen') , ['mem_footprint']] =  442.43
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'mawi_201512012345') , ['mem_footprint']] =  506.18
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'ldoor') , ['mem_footprint']] =  536.04
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'dielFilterV2real') , ['mem_footprint']] =  559.9
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'circuit5M') , ['mem_footprint']] =  702.405
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'soc-LiveJournal1') , ['mem_footprint']] =  808.063
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'bone010') , ['mem_footprint']] =  823.92
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'audikw_1') , ['mem_footprint']] =  892.255
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'cage15') , ['mem_footprint']] =  1154.913
# group_val_system_best.loc[(group_val_system_best['System'] == 'Alveo-U280') &(group_val_system_best['matrix_name'] == 'kmer_V2a') , ['mem_footprint']] =  1551.419

group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'scircuit') , ['mem_footprint']] =  11.63
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'mac_econ_fwd500') , ['mem_footprint']] =  15.36
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'raefsky3') , ['mem_footprint']] =  17.12
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'rgg_n_2_17_s0') , ['mem_footprint']] =  17.18
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'bbmat') , ['mem_footprint']] =  20.42
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'appu') , ['mem_footprint']] =  21.26
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'conf5_4-8x8-15') , ['mem_footprint']] =  22.13
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'mc2depi') , ['mem_footprint']] =  26.04
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'rma10') , ['mem_footprint']] =  27.35
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'cop20k_A') , ['mem_footprint']] =  30.5
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'thermomech_dK') , ['mem_footprint']] =  33.35
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'webbase-1M') , ['mem_footprint']] =  39.35
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'cant') , ['mem_footprint']] =  46.1
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'ASIC_680k') , ['mem_footprint']] =  46.91
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'roadNet-TX') , ['mem_footprint']] =  49.3
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'pdb1HYS') , ['mem_footprint']] =  49.86
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'TSOPF_RS_b300_c3') , ['mem_footprint']] =  50.67
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'Chebyshev4') , ['mem_footprint']] =  61.8
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'consph') , ['mem_footprint']] =  69.1
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'com-Youtube') , ['mem_footprint']] =  72.71
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'rajat30') , ['mem_footprint']] =  73.13
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'radiation') , ['mem_footprint']] =  88.26
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'Stanford_Berkeley') , ['mem_footprint']] =  89.39
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'shipsec1') , ['mem_footprint']] =  89.95
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'PR02R') , ['mem_footprint']] =  94.29
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'CurlCurl_2') , ['mem_footprint']] =  105.18
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'gupta3') , ['mem_footprint']] =  106.76
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'mip1') , ['mem_footprint']] =  118.73
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'rail4284') , ['mem_footprint']] =  129.15
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'pwtk') , ['mem_footprint']] =  133.98
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'crankseg_2') , ['mem_footprint']] =  162.16
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'Si41Ge41H72') , ['mem_footprint']] =  172.5
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'TSOPF_RS_b2383') , ['mem_footprint']] =  185.21
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'in-2004') , ['mem_footprint']] =  198.88
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'Ga41As41H72') , ['mem_footprint']] =  212.61
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'eu-2005') , ['mem_footprint']] =  223.42
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'wikipedia-20051105') , ['mem_footprint']] =  232.29
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'kron_g500-logn18') , ['mem_footprint']] =  243.22
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'human_gene1') , ['mem_footprint']] =  282.41
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'delaunay_n22') , ['mem_footprint']] =  304
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'GL7d20') , ['mem_footprint']] =  347.58
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'sx-stackoverflow') , ['mem_footprint']] =  424.58
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'dgreen') , ['mem_footprint']] =  442.43
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'mawi_201512012345') , ['mem_footprint']] =  506.18
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'ldoor') , ['mem_footprint']] =  536.04
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'dielFilterV2real') , ['mem_footprint']] =  559.9
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'circuit5M') , ['mem_footprint']] =  702.4
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'soc-LiveJournal1') , ['mem_footprint']] =  808.06
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'bone010') , ['mem_footprint']] =  823.92
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'audikw_1') , ['mem_footprint']] =  892.25
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'cage15') , ['mem_footprint']] =  1154.91
group_val_system_best.loc[(group_val_system_best['System'] == 'NVIDIA-V100') &(group_val_system_best['matrix_name'] == 'kmer_V2a') , ['mem_footprint']] =  1551.42


In [26]:
groupvalreps.to_csv('validation_real_benchmarks_all-devices_all.csv', sep=',', header=True, index=False)
group_val_system_best.to_csv('validation_real_all-devices_best-of.csv', sep=',', header=True, index=False)