In [21]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
from tabulate import tabulate
import re
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import os
import sys

module_path = '/home/serinatan/project/gpgpu-sim_simulations/util/job_launching/results'
if module_path not in sys.path:
    sys.path.append(module_path)
    
import helper.help_iso as hi


bench_dict = {'cut_sgemm-0':0, 'cut_sgemm-1':0, 'cut_wmma-0': 0, 'cut_wmma-1': 0, 
         'parb_stencil-0': 1, 'parb_sgemm-0': 0,
         'parb_lbm-0': 1, 'parb_spmv-0': 1, 'parb_cutcp-0': 0}

mpl.style.use('seaborn-paper')





The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Read CSVs

In [125]:
df_seq = pd.read_csv('seq.csv')
print(df_seq[['pair_str', 'runtime']])
df_intra = pd.read_csv('intra.csv')
df_inter = pd.read_csv('inter.csv')
print(df_intra.columns)

         pair_str  runtime
0    parb_sgemm-0        0
1      cut_wmma-1  2257808
2  parb_stencil-0   350242
3     parb_spmv-0    73206
4     cut_sgemm-0    93149
5      cut_wmma-0    62957
6    parb_cutcp-0   525176
7     cut_sgemm-1  1719066
8      parb_lbm-0   875934
Index(['pair_str', 'config', 'gpusim_version', 'jobId', 'grid_x', 'grid_y',
       'grid_z', 'block_x', 'block_y', 'block_z', 'ctas/SM', 'runtime', 'ipc',
       'instructions', 'avg_mem_lat', 'avg_core_to_l2', 'avg_l2_to_core',
       'avg_mrq_latency', 'stall_core_ldst', 'stall_icnt_to_l2', 'l2_BW',
       'l2_rshr_entry_fail', 'l2_rshr_merge_fail', 'stall_l2_to_icnt',
       'l1D_miss_rate', 'l2_miss_rate', 'l2_total_accesses', 'packet_lat_out',
       'network_lat_out', 'inject_out', 'accepted_out', 'packet_lat_in',
       'network_lat_in', 'inject_in', 'accepted_in',
       'mem_subpartition_parallism', 'mem_subpartition_parallism_util',
       'L2_reservation_fail', 'empty_warp', 'stall_warp', 'idle_warp',
       '

## Process columns

In [119]:
# scale all IPC to baseline
baseline_dict = pd.Series(df_seq.ipc.values,index=df_seq.pair_str).to_dict() 

def norm_ipc_row(row):
    return row['ipc'] / baseline_dict[row['pair_str']]

df_intra['norm_ipc'] = df_intra.apply(lambda row: norm_ipc_row(row), axis=1)
df_inter['norm_ipc'] = df_inter.apply(lambda row: norm_ipc_row(row), axis=1)

# decompose gpgpu-sim configs
hi.process_config_column('intra', 'l2', df=df_intra)
hi.process_config_column('inter', 'l2', df=df_inter)

df_intra['avg_dram_bw'] = df_intra['dram_bw'].transform(hi.avg_array)
df_inter['avg_dram_bw'] = df_inter['dram_bw'].transform(hi.avg_array)

df_intra['dram_busy'] = 1 - np.divide(df_intra['mem_idle'].transform(hi.avg_array),
                                      df_intra['total_cmd'].transform(hi.avg_array))
df_inter['dram_busy'] = 1 - np.divide(df_inter['mem_idle'].transform(hi.avg_array),
                                      df_inter['total_cmd'].transform(hi.avg_array))

idle_sum = df_intra[['empty_warp', 'idle_warp']].sum(axis=1)
df_intra['comp_busy'] = 1 - idle_sum / (idle_sum + df_intra[['stall_warp', 'scoreboard_warp']].sum(axis=1))

idle_sum = df_inter[['empty_warp', 'idle_warp']].sum(axis=1)
df_inter['comp_busy'] = 1 - idle_sum / (idle_sum + df_inter[['stall_warp', 'scoreboard_warp']].sum(axis=1))

max_cta_volta = 32
max_thread_volta = 2048
max_smem = 96*1024
max_register = 64*1024

df_intra['cta_ratio'] = df_intra['intra'] / max_cta_volta
threads = df_intra['intra'] * df_intra['block_x'] * df_intra['block_y'] * df_intra['block_z']
df_intra['thread_ratio'] = threads / max_thread_volta
df_intra['smem_ratio'] = df_intra['intra'] * df_intra['smem'] / max_smem
df_intra['reg_ratio'] = threads * df_intra['regs'] / max_register
# df_intra['dominant_resc'] = df_intra[['cta_ratio', 'thread_ratio', 'smem_ratio', 'reg_ratio']].max(axis=1)

def pow_2(*resc_list):
    done_first = False
    for r in resc_list:
        if done_first:
            usage = usage + df_intra[r] ** 2
        else:
            done_first = True
            usage = df_intra[r] ** 2
    return usage
            
df_intra['usage'] = pow_2('cta_ratio', 'thread_ratio', 'smem_ratio', 'reg_ratio', 'l2', 'dram_busy', 'comp_busy')

print(df_intra.columns)


Index(['pair_str', 'config', 'gpusim_version', 'jobId', 'grid_x', 'grid_y',
       'grid_z', 'block_x', 'block_y', 'block_z', 'ctas/SM', 'runtime', 'ipc',
       'instructions', 'avg_mem_lat', 'avg_core_to_l2', 'avg_l2_to_core',
       'avg_mrq_latency', 'stall_core_ldst', 'stall_icnt_to_l2', 'l2_BW',
       'l2_rshr_entry_fail', 'l2_rshr_merge_fail', 'stall_l2_to_icnt',
       'l1D_miss_rate', 'l2_miss_rate', 'l2_total_accesses', 'packet_lat_out',
       'network_lat_out', 'inject_out', 'accepted_out', 'packet_lat_in',
       'network_lat_in', 'inject_in', 'accepted_in',
       'mem_subpartition_parallism', 'mem_subpartition_parallism_util',
       'L2_reservation_fail', 'empty_warp', 'stall_warp', 'idle_warp',
       'scoreboard_warp', 'regs', 'smem', 'dram_eff', 'dram_bw',
       'row_buffer_locality', 'mrqq', 'total_cmd', 'wasted_col', 'wasted_row',
       'mem_idle', 'CCDLc', 'WTRc', 'RTWc', 'RCDc', 'RCDWRc', 'norm_ipc',
       'intra', 'l2', 'avg_dram_bw', 'dram_busy', 'comp_busy

In [109]:
def print_intra(df, benchmark):
    filename = '{0}-{1}.pdf'.format(benchmark, 'intra')
    filename = os.path.join('plots', filename)
    with PdfPages(filename) as pdf:
        hi.plot_page_intra(df, 'norm_ipc', benchmark, pdf)
        hi.plot_page_intra(df, 'avg_dram_bw', benchmark, pdf)
        hi.plot_page_intra(df, 'dram_busy', benchmark, pdf)
        hi.plot_page_intra(df, 'l2_miss_rate', benchmark, pdf)
        hi.plot_page_intra(df, 'l2_BW', benchmark, pdf)
        hi.plot_page_intra(df, 'l2_total_accesses', benchmark, pdf)
        hi.plot_page_intra(df, 'l1D_miss_rate', benchmark, pdf)
        hi.plot_page_intra(df, 'avg_mem_lat', benchmark, pdf)

def print_intra_inter(df_intra, df_inter, benchmark):
    filename = '{0}-{1}.pdf'.format(benchmark, 'both')
    filename = os.path.join('plots', filename)
    with PdfPages(filename) as pdf:
        hi.plot_page_intra_inter(df_intra, df_inter, 'norm_ipc', benchmark, pdf)
 

In [78]:
print_intra_inter(df_intra, df_inter, 'parb_stencil-0')

In [79]:
print_intra_inter(df_intra, df_inter, 'parb_cutcp-0')

## Intra SM Only

In [42]:
bench_list = list(df_intra['pair_str'].unique())
bench_list.sort()

In [35]:
for bench in bench_list:
    print(bench)
    print_intra(df_intra, bench)

cut_sgemm-1
cut_wmma-1
parb_stencil-0
parb_spmv-0
cut_sgemm-0
parb_cutcp-0
parb_lbm-0
cut_wmma-0


In [51]:
fig_tot, axs = plt.subplots(2, 4, figsize=(40, 30))   
axs = axs.flat

for ax, bench in zip(axs, bench_list):
    _df = df_intra[df_intra['pair_str'] == bench]
    
    hi.plot_heatmap(_df, x_key='intra', y_key='l2', z_key='norm_ipc', title=bench, axis=ax, scale=1.2)


fig_tot.suptitle('Intra, Normalized IPC', fontsize=18)
fig_tot.savefig('plots/total.pdf')
plt.close()




In [124]:
df_intra['perfdollar'] = df_intra['norm_ipc'] / df_intra['usage']
cols = ['pair_str', 'perfdollar', 'intra', 'norm_ipc', 'usage', 'l2', 'cta_ratio', 
        'thread_ratio', 'smem_ratio', 'reg_ratio', 'comp_busy', 'dram_busy']
sort = df_intra[cols].sort_values(['pair_str', 'perfdollar'], ascending=[True, True])
sort = sort[sort['norm_ipc'] > 0.8]

best_df = []
for bench in bench_list:
    idx = df_intra[(df_intra['norm_ipc'] > 0.8) & (df_intra['pair_str'] == bench)]['perfdollar'].idxmax()
    best_df.append(df_intra.iloc[idx])
    
best_df = pd.concat(best_df, axis=1).T[cols]

# formatting
pd.options.display.float_format = '{:,}'.format
table_style = [{'selector': 'tr:nth-of-type(odd)',
  'props': [('background', '#eee')]}, 
 {'selector': 'tr:nth-of-type(even)',
  'props': [('background', 'white')]},
 {'selector': 'th',
  'props': [('background', '#606060'), 
            ('color', 'white'),
            ('font-family', 'verdana'),
            ("font-size", "90%")]},
 {'selector': 'td',
  'props': [('font-family', 'verdana'), 
            ("font-size", "90%"),
            ("font-weight", "bold")]},
 {'selector': 'tr:hover',
  'props': [('background-color', '#ffffcc')]}
]

    
sort.style.set_table_styles(table_style).hide_index()\
            .format({'norm_ipc': "{:.4f}", 'dominant_resc': '{:.2f}', 
                     
                    })\

# print(best_df)

pair_str,perfdollar,intra,norm_ipc,usage,l2,cta_ratio,thread_ratio,smem_ratio,reg_ratio,comp_busy,dram_busy
cut_sgemm-0,0.470594,2,1.0,2.12497,1.0,0.0625,0.25,0.333659,0.953125,0.148003,0.129946
cut_sgemm-0,0.568189,2,0.9639,1.6964,0.75,0.0625,0.25,0.333659,0.953125,0.142949,0.165182
cut_sgemm-0,0.69188,2,0.9614,1.3895,0.5,0.0625,0.25,0.333659,0.953125,0.142601,0.181625
cut_sgemm-0,0.711401,2,0.8553,1.20222,0.25,0.0625,0.25,0.333659,0.953125,0.127918,0.19282
cut_sgemm-1,0.459164,2,1.0,2.17787,1.0,0.0625,0.25,0.333659,0.953125,0.270635,0.135817
cut_sgemm-1,0.574472,2,0.9994,1.73974,0.75,0.0625,0.25,0.333659,0.953125,0.270602,0.133543
cut_sgemm-1,0.701486,2,0.9989,1.42393,0.5,0.0625,0.25,0.333659,0.953125,0.269742,0.122404
cut_sgemm-1,0.764651,1,1.1237,1.4696,1.0,0.03125,0.125,0.166829,0.476562,0.362515,0.258141
cut_sgemm-1,0.806542,2,0.9967,1.23577,0.25,0.0625,0.25,0.333659,0.953125,0.269611,0.119995
cut_sgemm-1,1.1029,1,1.1202,1.01573,0.75,0.03125,0.125,0.166829,0.476562,0.363126,0.223203


In [123]:
l = [2, 3]
':'.join(l)

TypeError: sequence item 0: expected str instance, int found