In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import os
import sys
from ipywidgets import widgets
from IPython.display import display, HTML
from IPython.display import clear_output
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
from tabulate import tabulate
import math

# Customized modules
HOME = '/home/serinatan/project/GPU-Virtualization-Benchmarks/util'
if HOME not in sys.path:
    sys.path.append(HOME)

import data.scripts.common.format as fmt
import data.scripts.common.constants as const
import data.scripts.gen_tables.gen_pair_configs as gen_pair
import data.scripts.gen_tables.search_best_inter as search_inter
import data.scripts.gen_graphs.gen_altair_timeline as gen_altair
import scheduler.scheduler as scheduler

SCRIPT_PATH = os.path.join(HOME, 'data/scripts') 
PKL_PATH = os.path.join(HOME, 'data/pickles') 
CSV_PATH = os.path.join(HOME, 'data/csv') 
GRAPH_PATH = os.path.join(HOME, 'data/graphs')

%config InlineBackend.figure_format ='retina'
plt.style.use('seaborn-talk')

In [2]:
def draw_table(df, cols, hide_index=True):
    if hide_index:
        return df[cols].style.set_table_styles(fmt.table_style).hide_index()
    else:
        return df[cols].style.set_table_styles(fmt.table_style)

# Seq

In [34]:
# run python scripts to generate all the pickles needed
gen_seq = os.path.join(SCRIPT_PATH, 'gen_tables/gen_table_seq.py')   
seq_file = os.path.join(CSV_PATH, 'seq-multi.csv')
seq_pkl = os.path.join(PKL_PATH, 'seq-multi.pkl')
%run $gen_seq --multi --csv $seq_file --output $seq_pkl

df_seq = pd.read_pickle(os.path.join(PKL_PATH, 'seq-multi.pkl'))
print(df_seq.columns)

Index(['pair_str', 'config', 'gpusim_version', 'jobId', 'stall_icnt_to_l2',
       'stall_l2_to_icnt', 'stall_core_ldst', 'l1D_miss_rate', 'l2_miss_rate',
       'l2_rshr_entry_fail', 'l2_rshr_merge_fail', 'l2_total_accesses',
       'mem_count', 'empty_warp', 'stall_warp', 'idle_warp', 'scoreboard_warp',
       'tot_warp_insn', 'runtime', 'instructions', 'l2_bw', 'avg_mem_lat',
       'avg_core_to_l2', 'avg_l2_to_core', 'avg_mrq_latency', 'dram_eff',
       'dram_bw', 'row_buffer_locality', 'mem_idle', 'total_cmd', 'ipc',
       'avg_dram_bw', 'avg_dram_eff', 'avg_row_locality', 'std_dram_bw',
       'ratio_dram_bw', 'MPKI', 'l2_access_density', '1_kidx', 'waves'],
      dtype='object')


In [10]:
col_seq = ['pair_str', '1_kidx', 'runtime',
           'ipc',
           'avg_dram_bw', 
           'l1D_miss_rate',
           'l2_miss_rate',
          ]

draw_table(df_seq, col_seq).format({
    'runtime': '{:,}',
    'avg_dram_bw':'{:.4f}', 
    'sp_busy': '{:.2f}',
    'dp_busy': '{:.2f}',
    'int_busy': '{:.2f}',
    'tensor_busy': '{:.2f}',
    'sfu_busy': '{:.2f}',
})

pair_str,1_kidx,runtime,ipc,avg_dram_bw,l1D_miss_rate,l2_miss_rate
nvd_conv-0,1,190346,2131.9,0.7242,0.9991,0.931
nvd_conv-0,2,208164,1755.92,0.6698,0.9997,0.9323
parb_sad-0,1,51633,2686.45,0.0061,1.0,0.6109
parb_sad-0,2,21556,203.199,0.4558,0.9099,0.9849
parb_sad-0,3,12806,89.9086,0.1249,0.9109,0.9371
parb_sad-1,1,3000706,3754.08,0.1736,0.9999,0.5524
parb_sad-1,2,2324970,153.001,0.5645,0.9121,0.9998
parb_sad-1,3,541865,172.562,0.5842,0.912,0.9992


# Intra

In [11]:
gen_intra = os.path.join(SCRIPT_PATH, 'gen_tables/gen_table_intra.py')
intra_file = os.path.join(CSV_PATH, 'intra-multi.csv')
intra_pkl = os.path.join(PKL_PATH, 'intra-multi.pkl')

%run $gen_intra --out_intra $intra_pkl --seq $seq_pkl --csv $intra_file

df_intra = pd.read_pickle(intra_pkl)
df_intra.sort_values(['pair_str', '1_kidx'], inplace=True)
# col_intra = ['pair_str', '1_kidx', 'regs', 'thread_count', 'smem', 'intra']
# draw_table(df_intra, col_intra)

In [12]:
col_prod = ['pair_str_x', '1_kidx_x', 'pair_str_y', '1_kidx_y',
            'norm_ipc_x', 'norm_ipc_y', 'diff_mflat', 'sum_ipc', 
            'intra_x', 'intra_y', 
           'sum_comp', 'sum_dram',]
find_pair = os.path.join(SCRIPT_PATH, 'gen_tables/gen_pair_configs.py')
pairs = df_seq.apply(lambda row: ':'.join([row['pair_str'], str(row['1_kidx'])]), axis=1)

In [13]:
app1 = widgets.Dropdown(options=pairs, value=pairs[0], description='App 1:')
app2 = widgets.Dropdown(options=pairs, value=pairs[1], description='App 2:')

qos = widgets.FloatSlider(
    value=0.75,
    min=0.1,
    max=0.95,
    step=0.05,
    description='QoS:',
    readout_format='.2f',
)

button = widgets.Button(description='Calculate', button_style='info')

def show_widgets():
    display(app1)
    display(app2)
    display(qos)
    display(button)
    

def onclick(b):
    clear_output()
    show_widgets()
    %run $find_pair --app $app1.value $app2.value --qos $qos.value --intra_pkl $intra_pkl --top
    
    df_prod = pd.read_pickle(os.path.join(PKL_PATH, 'pair_candidates.pkl'))
   
    display(HTML(draw_table(df_prod, col_prod, False).render()))
    
    

In [14]:
show_widgets()
button.on_click(onclick)

Dropdown(description='App 1:', index=6, options=('nvd_conv-0:1', 'nvd_conv-0:2', 'parb_sad-0:1', 'parb_sad-0:2…

Dropdown(description='App 2:', index=7, options=('nvd_conv-0:1', 'nvd_conv-0:2', 'parb_sad-0:1', 'parb_sad-0:2…

FloatSlider(value=0.75, description='QoS:', max=0.95, min=0.1, step=0.05)

Button(button_style='info', description='Calculate', style=ButtonStyle())

# CTX Ratio

In [15]:
gen_pair = os.path.join(SCRIPT_PATH, 'gen_tables/gen_table_pair.py')
ctx_csv = os.path.join(HOME, 'data/csv/ctx.csv')
output = os.path.join(PKL_PATH, 'pair_ctx.pkl')


In [16]:
%run $gen_pair --csv $ctx_csv --output $output --seq_pkl $seq_pkl --qos 0.5 --multi --how ctx

Index(['pair_str', 'config', 'gpusim_version', 'jobId', 'stall_icnt_to_l2',
       'stall_l2_to_icnt', 'stall_core_ldst', 'l1D_miss_rate', 'l2_miss_rate',
       'l2_rshr_entry_fail', 'l2_rshr_merge_fail', 'l2_total_accesses',
       'mem_count', 'empty_warp', 'stall_warp', 'idle_warp', 'scoreboard_warp',
       'tot_warp_insn', 'runtime', 'instructions', 'l2_bw', 'avg_mem_lat',
       'avg_core_to_l2', 'avg_l2_to_core', 'avg_mrq_latency', 'dram_eff',
       'dram_bw', 'row_buffer_locality', 'mem_idle', 'total_cmd'],
      dtype='object')


In [17]:
df_ctx= pd.read_pickle(output)
uniq_pairs = df_ctx[['1_bench', '2_bench']].drop_duplicates().values
uniq_pairs = ['+'.join(x) for x in uniq_pairs]

col_ctx = ['1_bench', '2_bench', '1_ctx', '2_ctx', 
           'cta_quota',
#            'runtime', 'norm_runtime'
          ]
draw_table(df_ctx, col_ctx)

1_bench,2_bench,1_ctx,2_ctx,cta_quota
parb_sad-0,nvd_conv-0,0.5,0.5,"[[], [8, 2, 2], [16, 8]]"
parb_sad-0,nvd_conv-0,0.25,0.75,"[[], [4, 2, 2], [24, 12]]"
parb_sad-0,nvd_conv-0,0.75,0.25,"[[], [12, 2, 2], [8, 4]]"
parb_sad-1,nvd_conv-0,0.5,0.5,"[[], [8, 8, 16], [16, 8]]"
parb_sad-1,nvd_conv-0,0.25,0.75,"[[], [4, 4, 8], [24, 12]]"
parb_sad-1,nvd_conv-0,0.75,0.25,"[[], [12, 12, 24], [8, 4]]"


In [18]:
ctx_dd = widgets.Dropdown(options=uniq_pairs, value=uniq_pairs[0], description='Pair:')

ctx_btn = widgets.Button(description='Display', button_style='info')

def show_widgets():
    display(ctx_dd)
    display(ctx_btn)
    

def ctx_onclick(b):
    clear_output()
    show_widgets()
    
    apps = ctx_dd.value.split('+')
    df_apps = df_ctx[(df_ctx['1_bench'] == apps[0]) & (df_ctx['2_bench'] == apps[1])]
    df_apps = df_apps.sort_values('1_ctx')
    for rid, row in df_apps.iterrows():
        gen_altair.draw_altair_timeline(row, col_title='1_ctx').display()  
        # Print ctas/SM for each kernel in each app
        print(row['1_bench'], ':', row['cta_quota'][1], 'CTAs/SM')
        print(row['2_bench'], ':', row['cta_quota'][2], 'CTAs/SM')
        
#         print(row['norm_ipc'])
#         print(row['baseline'])
#         print(row['runtime'])
        print('slowdown:', row['sld'])
        print('weighted speedup:', row['ws'])
        print('-' * 100)

show_widgets()
ctx_btn.on_click(ctx_onclick)
 

Dropdown(description='Pair:', options=('parb_sad-0+nvd_conv-0', 'parb_sad-1+nvd_conv-0'), value='parb_sad-0+nv…

Button(button_style='info', description='Display', style=ButtonStyle())

# Pair-Dynamic

In [55]:
dynamic_csv = os.path.join(CSV_PATH, 'pair_dynamic_multi.csv')
dynamic_output = os.path.join(PKL_PATH, 'pair_dynamic_multi.pkl')
%run $gen_pair --csv $dynamic_csv --output $dynamic_output --seq_pkl $seq_pkl --qos 0.5 --how dynamic --multi --isolated_pkl $intra_pkl

Index(['pair_str', 'config', 'gpusim_version', 'jobId', 'stall_icnt_to_l2',
       'stall_l2_to_icnt', 'stall_core_ldst', 'l1D_miss_rate', 'l2_miss_rate',
       'l2_rshr_entry_fail', 'l2_rshr_merge_fail', 'l2_total_accesses',
       'mem_count', 'empty_warp', 'stall_warp', 'idle_warp', 'scoreboard_warp',
       'tot_warp_insn', 'runtime', 'instructions', 'l2_bw', 'avg_mem_lat',
       'avg_core_to_l2', 'avg_l2_to_core', 'avg_mrq_latency', 'dram_eff',
       'dram_bw', 'row_buffer_locality', 'mem_idle', 'total_cmd'],
      dtype='object')


In [56]:
df_dynamic = pd.read_pickle(dynamic_output)
col_dynamic = ['1_bench', '1_kidx', '1_intra', '2_bench', '2_kidx', '2_intra', 'sld', 'ws']

df_dynamic_show = df_dynamic[(df_dynamic['1_bench'] == 'parb_sad-0') & (df_dynamic['1_kidx'] == 1)]
# df_dynamic_show[col_dynamic]

In [57]:
df_best = df_dynamic.sort_values('ws', ascending=False).drop_duplicates(['1_bench', '1_kidx', '2_bench', '2_kidx'])

draw_table(df_best, col_dynamic)


1_bench,1_kidx,1_intra,2_bench,2_kidx,2_intra,sld,ws
parb_sad-0,1,12,nvd_conv-0,2,4,"[0, 0.8894113999276523, 0.8435239627358891]",1.73294
parb_sad-0,1,12,nvd_conv-0,1,4,"[0, 0.9027221707432208, 0.7646014428716037]",1.66732
parb_sad-0,3,2,nvd_conv-0,2,4,"[0, 0.7614460696872398, 0.8881512422187995]",1.6496
parb_sad-0,3,2,nvd_conv-0,1,4,"[0, 0.7966903073286052, 0.852728250156796]",1.64942
parb_sad-1,1,8,nvd_conv-0,2,6,"[0, 0.7375398607950677, 0.9098832507944279]",1.64742
parb_sad-1,1,8,nvd_conv-0,1,4,"[0, 0.79483573683671, 0.7568429423459244]",1.55168
parb_sad-1,2,2,nvd_conv-0,1,4,"[0, 0.779599250636178, 0.4695854683248263]",1.24918
parb_sad-0,2,2,nvd_conv-0,1,4,"[0, 0.6817851156023658, 0.5533555242234399]",1.23514
parb_sad-1,3,4,nvd_conv-0,1,4,"[0, 0.6669337527939424, 0.5525940677990704]",1.21953
parb_sad-1,2,2,nvd_conv-0,2,6,"[0, 0.7195732643359414, 0.48259170132561796]",1.20216


In [58]:
df_best.set_index(['1_bench', '1_kidx', '2_bench', '2_kidx'], inplace=True)

In [59]:
df_seq_index = df_seq.set_index(['pair_str', '1_kidx'])

In [60]:

best_dd = widgets.Dropdown(options=uniq_pairs, value=uniq_pairs[0], description='Pair:')
best_btn = widgets.Button(description='Best', button_style='info')

def show_widgets():
    display(best_dd)
    display(best_btn)
    

def best_onclick(b):
    clear_output()
    show_widgets()
    
    apps = best_dd.value.split('+')
    def get_best(app1, app2):
        matrix_size = (len(const.kernel_yaml[app2]), len(const.kernel_yaml[app1]))
        configs_1 = np.zeros(matrix_size, dtype=int)
        configs_2 = np.zeros(matrix_size, dtype=int)
        interference_1 = np.zeros(matrix_size)
        interference_2 = np.zeros(matrix_size)
        
        for kernel_1 in const.kernel_yaml[app1]:
            for kernel_2 in const.kernel_yaml[app2]:
                best_idx = (app1, kernel_1, app2, kernel_2)
                
                cta_1 = df_best.loc[best_idx, '1_intra']
                sld_1 = df_best.loc[best_idx, 'sld'][1]
                
                cta_2 = df_best.loc[best_idx, '2_intra']
                sld_2 = df_best.loc[best_idx, 'sld'][2]
                
                matrix_idx = (kernel_2-1, kernel_1-1)
                
                configs_1[matrix_idx] = int(cta_1)
                configs_2[matrix_idx] = int(cta_2)
                
                interference_1[matrix_idx] = sld_1
                interference_2[matrix_idx] = sld_2
                
        return [configs_1, configs_2], [interference_1, interference_2]
                
    def get_runtime(app):
        runtime = []
        for kidx in const.kernel_yaml[app]:
            runtime.append(df_seq_index.loc[(app, kidx)]['runtime'])
        
        return runtime
            
    configs, interference = get_best(apps[0], apps[1])
    runtimes = [get_runtime(apps[0]), get_runtime(apps[1])]
    
    tot_runtime = [sum(r) for r in runtimes]
    if tot_runtime[0] < tot_runtime[1]:
        iter_lim = [math.inf, 1]
    else:
        iter_lim = [1, math.inf] 

    # Printing results
    app1_kernels = ['{0}:{1}'.format(apps[0], kernel) for kernel in const.kernel_yaml[apps[0]]]
    app2_kernels = ['{0}:{1}'.format(apps[1], kernel) for kernel in const.kernel_yaml[apps[1]]]
    
    print(apps[0], 'configs:')
    print(pd.DataFrame(configs[0], app2_kernels, app1_kernels))
    print('-' * 50)
    
    print(apps[1], 'configs:')
    print(pd.DataFrame(configs[1], app2_kernels, app1_kernels))
    print('-' * 50)
    
    print(apps[0], 'slowdown:')
    print(pd.DataFrame(interference[0], app2_kernels, app1_kernels))
    print('-' * 50)
    
    print(apps[1], 'slowdown:')
    print(pd.DataFrame(interference[1], app2_kernels, app1_kernels))
    print('-' * 50)
    print('\n')
    
    # run simulation to predict final slowdown
    sim_results = scheduler.simulate(runtimes, interference, iter_lim, finish_remaining=False)
    scaled_runtime = [[int(t) for t in app] for app in sim_results[0]]
    print('Predicted runtime:', scaled_runtime)
    
    norm_ipc = scheduler.calculate_norm_ipc(runtimes, scaled_runtime)
#     print('Norm IPC:', norm_ipc)
    
    sld = scheduler.calculate_qos(runtimes, scaled_runtime, short=True, revert=False)
    print('Predicted weighted speedup:', sum(sld))
    
    row = pd.Series({'1_bench': apps[0], '2_bench': apps[1], 
                     'runtime': scaled_runtime, 'norm_ipc': norm_ipc})
    
    gen_altair.draw_altair_timeline(row, title='Prediction').display()  

show_widgets()
best_btn.on_click(best_onclick)
 

Dropdown(description='Pair:', index=1, options=('parb_sad-0+nvd_conv-0', 'parb_sad-1+nvd_conv-0'), value='parb…

Button(button_style='info', description='Best', style=ButtonStyle())

parb_sad-1 configs:
              parb_sad-1:1  parb_sad-1:2  parb_sad-1:3
nvd_conv-0:1             8             2             4
nvd_conv-0:2             8             2             4
--------------------------------------------------
nvd_conv-0 configs:
              parb_sad-1:1  parb_sad-1:2  parb_sad-1:3
nvd_conv-0:1             4             4             4
nvd_conv-0:2             6             6             4
--------------------------------------------------
parb_sad-1 slowdown:
              parb_sad-1:1  parb_sad-1:2  parb_sad-1:3
nvd_conv-0:1      0.794836      0.779599      0.666934
nvd_conv-0:2      0.737540      0.719573      0.696158
--------------------------------------------------
nvd_conv-0 slowdown:
              parb_sad-1:1  parb_sad-1:2  parb_sad-1:3
nvd_conv-0:1      0.756843      0.469585      0.552594
nvd_conv-0:2      0.909883      0.482592      0.500994
--------------------------------------------------


Predicted runtime: [[3907186, 3104515, 792825], [251

# CTA LUT

In [24]:
lut_csv = os.path.join(HOME, 'data/csv/pair_lut.csv')
lut_out = os.path.join(PKL_PATH, 'pair_lut.pkl')

%run $gen_pair --csv $lut_csv --output $lut_out --seq_pkl $seq_pkl --multi --how lut

Index(['pair_str', 'config', 'gpusim_version', 'jobId', 'stall_icnt_to_l2',
       'stall_l2_to_icnt', 'stall_core_ldst', 'l1D_miss_rate', 'l2_miss_rate',
       'l2_rshr_entry_fail', 'l2_rshr_merge_fail', 'l2_total_accesses',
       'mem_count', 'empty_warp', 'stall_warp', 'idle_warp', 'scoreboard_warp',
       'tot_warp_insn', 'runtime', 'instructions', 'l2_bw', 'avg_mem_lat',
       'avg_core_to_l2', 'avg_l2_to_core', 'avg_mrq_latency', 'dram_eff',
       'dram_bw', 'row_buffer_locality', 'mem_idle', 'total_cmd'],
      dtype='object')


In [25]:
df_lut = pd.read_pickle(lut_out)
col_lut = ['pair_str', '1_bench', '2_bench', 'norm_ipc', 'runtime'] 
# draw_table(df_lut, col_lut)

In [26]:
lut_dd = widgets.Dropdown(options=df_lut['pair_str'], value=df_lut.iloc[0]['pair_str'], description='Pair:')

lut_btn = widgets.Button(description='Display', button_style='info')

def show_widgets():
    display(lut_dd)
    display(lut_btn)
    

def lut_onclick(b):
    clear_output()
    show_widgets()
    
    apps = re.split(r'-(?=\D)', lut_dd.value)
    df_apps = df_lut[(df_lut['1_bench'] == apps[0]) & (df_lut['2_bench'] == apps[1])]
    for rid, row in df_apps.iterrows():
        gen_altair.draw_altair_timeline(row, title='CTA LUT').display()  
        
        print('Note:')
#         print(row['1_bench'], ':', row['cta_quota'][1], 'CTAs/SM')
#         print(row['2_bench'], ':', row['cta_quota'][2], 'CTAs/SM')
        
        print('runtime:', row['runtime'])
        print('slowdown:', row['sld'])
        print('weighted speedup:', row['ws'])
        print('-' * 100)

show_widgets()
lut_btn.on_click(lut_onclick)
 

Dropdown(description='Pair:', options=('parb_sad-0-nvd_conv-0',), value='parb_sad-0-nvd_conv-0')

Button(button_style='info', description='Display', style=ButtonStyle())

Note:
runtime: [[], [54001, 48255, 27729, 44335, 43201, 27524, 39974, 41658, 21998, 42772, 38771, 22872, 41510, 38726, 22462, 38692, 24956, 9253], [268239, 295478]]
slowdown: [0, 0.7736313126587836, 0.7069327339782195]
weighted speedup: 1.4805640466370031
----------------------------------------------------------------------------------------------------


In [None]:
test = [0]
test[-1] = 1
print(test)