In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import os
import sys
from ipywidgets import widgets
from IPython.display import display, HTML
from IPython.display import clear_output
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
from tabulate import tabulate
import math

# Customized modules
HOME = '/home/serinatan/project/GPU-Virtualization-Benchmarks/util'
if HOME not in sys.path:
    sys.path.append(HOME)

import data.scripts.common.format as fmt
import data.scripts.common.constants as const
import data.scripts.gen_tables.gen_pair_configs as gen_pair
import data.scripts.gen_tables.search_best_inter as search_inter
import data.scripts.gen_graphs.gen_altair_timeline as gen_altair
import scheduler.scheduler as scheduler

SCRIPT_PATH = os.path.join(HOME, 'data/scripts') 
PKL_PATH = os.path.join(HOME, 'data/pickles') 
CSV_PATH = os.path.join(HOME, 'data/csv') 
GRAPH_PATH = os.path.join(HOME, 'data/graphs')

%config InlineBackend.figure_format ='retina'
plt.style.use('seaborn-talk')

In [2]:
def draw_table(df, cols, hide_index=True):
    if hide_index:
        return df[cols].style.set_table_styles(fmt.table_style).hide_index()
    else:
        return df[cols].style.set_table_styles(fmt.table_style)

# Seq

In [3]:
# run python scripts to generate all the pickles needed
gen_seq = os.path.join(SCRIPT_PATH, 'gen_tables/gen_table_seq.py')   
seq_file = os.path.join(CSV_PATH, 'seq-multi.csv')
seq_pkl = os.path.join(PKL_PATH, 'seq-multi.pkl')
%run $gen_seq --multi --csv $seq_file --output $seq_pkl

df_seq = pd.read_pickle(os.path.join(PKL_PATH, 'seq-multi.pkl'))
print(df_seq.columns)

Index(['pair_str', 'config', 'gpusim_version', 'jobId', 'stall_icnt_to_l2',
       'stall_l2_to_icnt', 'stall_core_ldst', 'l1D_miss_rate', 'l2_miss_rate',
       'l2_rshr_entry_fail', 'l2_rshr_merge_fail', 'l2_total_accesses',
       'mem_count', 'empty_warp', 'stall_warp', 'idle_warp', 'scoreboard_warp',
       'tot_warp_insn', 'runtime', 'instructions', 'l2_bw', 'avg_mem_lat',
       'avg_core_to_l2', 'avg_l2_to_core', 'avg_mrq_latency', 'dram_eff',
       'dram_bw', 'row_buffer_locality', 'mem_idle', 'total_cmd', 'ipc',
       'avg_dram_bw', 'avg_dram_eff', 'avg_row_locality', 'std_dram_bw',
       'ratio_dram_bw', 'MPKI', 'l2_access_density', '1_kidx', 'waves'],
      dtype='object')


In [4]:
col_seq = ['pair_str', '1_kidx', 'runtime',
           'ipc',
           'avg_dram_bw', 
           'waves',
           'l1D_miss_rate',
           'l2_miss_rate',
          ]

draw_table(df_seq, col_seq).format({
    'runtime': '{:,}',
    'avg_dram_bw':'{:.4f}', 
    'sp_busy': '{:.2f}',
    'dp_busy': '{:.2f}',
    'int_busy': '{:.2f}',
    'tensor_busy': '{:.2f}',
    'sfu_busy': '{:.2f}',
})

pair_str,1_kidx,runtime,ipc,avg_dram_bw,waves,l1D_miss_rate,l2_miss_rate
nvd_conv-0,1,190346,2131.9,0.7242,7.2,0.9991,0.931
nvd_conv-0,2,208164,1755.92,0.6698,14.4,0.9997,0.9323
parb_mriq-0,1,1286,35.0358,0.0005,0.0125,1.0,0.3292
parb_mriq-0,2,3157989,6882.6,0.0019,1.6,0.7143,0.3666
parb_sad-0,1,51633,2686.45,0.0061,1.16471,1.0,0.6109
parb_sad-0,2,21556,203.199,0.4558,0.0773438,0.9099,0.9849
parb_sad-0,3,12806,89.9086,0.1249,0.0386719,0.9109,0.9371
parb_sad-1,1,3000706,3754.08,0.1736,94.5882,0.9999,0.5524
parb_sad-1,2,2324970,153.001,0.5645,6.28125,0.9121,0.9998
parb_sad-1,3,541865,172.562,0.5842,3.14062,0.912,0.9992


# Intra

In [5]:
gen_intra = os.path.join(SCRIPT_PATH, 'gen_tables/gen_table_intra.py')
intra_file = os.path.join(CSV_PATH, 'intra-multi.csv')
intra_pkl = os.path.join(PKL_PATH, 'intra-multi.pkl')

%run $gen_intra --out_intra $intra_pkl --seq $seq_pkl --csv $intra_file

df_intra = pd.read_pickle(intra_pkl)
df_intra.sort_values(['pair_str', '1_kidx'], inplace=True)
# col_intra = ['pair_str', '1_kidx', 'regs', 'thread_count', 'smem', 'intra']
# draw_table(df_intra, col_intra)

In [6]:
col_prod = ['pair_str_x', '1_kidx_x', 'pair_str_y', '1_kidx_y',
            'norm_ipc_x', 'norm_ipc_y', 'diff_mflat', 'sum_ipc', 
            'intra_x', 'intra_y', 
           'sum_comp', 'sum_dram',]
find_pair = os.path.join(SCRIPT_PATH, 'gen_tables/gen_pair_configs.py')
pairs = df_seq.apply(lambda row: ':'.join([row['pair_str'], str(row['1_kidx'])]), axis=1)

In [26]:
app1 = widgets.Dropdown(options=pairs, value=pairs[0], description='App 1:')
app2 = widgets.Dropdown(options=pairs, value=pairs[1], description='App 2:')

qos = widgets.FloatSlider(
    value=0.75,
    min=0.1,
    max=0.95,
    step=0.05,
    description='QoS:',
    readout_format='.2f',
)

button = widgets.Button(description='Calculate', button_style='info')

def show_widgets_intra():
    display(app1)
    display(app2)
    display(qos)
    display(button)
    

def onclick(b):
    clear_output()
    show_widgets()
    %run $find_pair --app $app1.value $app2.value --qos $qos.value --intra_pkl $intra_pkl --top
    
    df_prod = pd.read_pickle(os.path.join(PKL_PATH, 'pair_candidates.pkl'))
   
    display(HTML(draw_table(df_prod, col_prod, False).render()))
    
    

In [27]:
show_widgets_intra()
button.on_click(onclick)

Dropdown(description='App 1:', index=8, options=('nvd_conv-0:1', 'nvd_conv-0:2', 'parb_mriq-0:1', 'parb_mriq-0…

Dropdown(description='App 2:', index=9, options=('nvd_conv-0:1', 'nvd_conv-0:2', 'parb_mriq-0:1', 'parb_mriq-0…

FloatSlider(value=0.75, description='QoS:', max=0.95, min=0.1, step=0.05)

Button(button_style='info', description='Calculate', style=ButtonStyle())

# Pair-Dynamic

In [13]:
dynamic_csv = os.path.join(CSV_PATH, 'pair_dynamic_multi.csv')
dynamic_output = os.path.join(PKL_PATH, 'pair_dynamic_multi.pkl')
%run $gen_pair --csv $dynamic_csv --output $dynamic_output --seq_pkl $seq_pkl --qos 0.5 --how dynamic --multi --isolated_pkl $intra_pkl

Index(['pair_str', 'config', 'gpusim_version', 'jobId', 'stall_icnt_to_l2',
       'stall_l2_to_icnt', 'stall_core_ldst', 'l1D_miss_rate', 'l2_miss_rate',
       'l2_rshr_entry_fail', 'l2_rshr_merge_fail', 'l2_total_accesses',
       'mem_count', 'empty_warp', 'stall_warp', 'idle_warp', 'scoreboard_warp',
       'tot_warp_insn', 'runtime', 'instructions', 'l2_bw', 'avg_mem_lat',
       'avg_core_to_l2', 'avg_l2_to_core', 'avg_mrq_latency', 'dram_eff',
       'dram_bw', 'row_buffer_locality', 'mem_idle', 'total_cmd'],
      dtype='object')


In [116]:
df_dynamic = pd.read_pickle(dynamic_output)
col_dynamic = ['1_bench', '1_kidx', '1_intra', '2_bench', '2_kidx', '2_intra', 'sld', 'ws', 
               'importance', 'weighted_increase', 'sum_increase']

df_dynamic_show = df_dynamic[(df_dynamic['1_bench'] == 'parb_sad-0') & (df_dynamic['1_kidx'] == 2)].sort_values('2_kidx')
# df_dynamic_show[col_dynamic]

In [126]:
# Want to maximize ws (weighted speedup)
df_best_local = df_dynamic.sort_values('ws', ascending=False
                                      ).drop_duplicates(['1_bench', '1_kidx', '2_bench', '2_kidx']
                                                       ).set_index(['1_bench', '1_kidx', '2_bench', '2_kidx'])
# Want to minimize weighted runtime increase (sum_increase)
df_best_weighted = df_dynamic.sort_values('sum_increase', ascending=True
                                         ).drop_duplicates(['1_bench', '1_kidx', '2_bench', '2_kidx']
                                                          ).set_index(['1_bench', '1_kidx', '2_bench', '2_kidx'])

draw_table(df_dynamic_show, col_dynamic)

1_bench,1_kidx,1_intra,2_bench,2_kidx,2_intra,sld,ws,importance,weighted_increase,sum_increase
parb_sad-0,2,2,nvd_conv-0,1,16,"[0, 0.4573327911458426, 0.589353941803365]",1.04669,"[0, 0.2506657363800221, 0.47764422473714585]","[0, 0.5481035719130957, 0.810453940929964]",1.35856
parb_sad-0,2,1,nvd_conv-0,1,4,"[0, 0.3974701749857098, 0.6998117619376756]",1.09728,"[0, 0.2506657363800221, 0.47764422473714585]","[0, 0.6306529449386592, 0.682532433314095]",1.31319
parb_sad-0,2,2,nvd_conv-0,1,20,"[0, 0.4518919674364977, 0.5857611053838224]",1.03765,"[0, 0.2506657363800221, 0.47764422473714585]","[0, 0.5547027927980309, 0.8154249579684323]",1.37013
parb_sad-0,2,2,nvd_conv-0,1,4,"[0, 0.5411916082160398, 0.5533555242234399]",1.09455,"[0, 0.2506657363800221, 0.47764422473714585]","[0, 0.46317373103087384, 0.8631778374444807]",1.32635
parb_sad-0,2,2,nvd_conv-0,1,8,"[0, 0.48736466757964914, 0.5978541500461709]",1.08522,"[0, 0.2506657363800221, 0.47764422473714585]","[0, 0.5143289061656409, 0.7989310180422072]",1.31326
parb_sad-0,2,2,nvd_conv-0,1,12,"[0, 0.4514692227675423, 0.6255825418214086]",1.07705,"[0, 0.2506657363800221, 0.47764422473714585]","[0, 0.5552222028412506, 0.7635191086798324]",1.31874
parb_sad-0,2,2,nvd_conv-0,2,8,"[0, 0.44747588276637196, 0.6125677560605258]",1.06004,"[0, 0.2506657363800221, 0.5223557752628541]","[0, 0.5601770867076991, 0.8527314245564729]",1.41291
parb_sad-0,2,2,nvd_conv-0,2,6,"[0, 0.4804192509010329, 0.5902498362496491]",1.07067,"[0, 0.2506657363800221, 0.5223557752628541]","[0, 0.5217645544176156, 0.8849740282552508]",1.40674
parb_sad-0,2,2,nvd_conv-0,2,4,"[0, 0.576611716757088, 0.4925734432862839]",1.06919,"[0, 0.2506657363800221, 0.5223557752628541]","[0, 0.434721891864749, 1.06046272364558]",1.49518
parb_sad-0,2,1,nvd_conv-0,2,4,"[0, 0.4280267930422811, 0.6639449359861702]",1.09197,"[0, 0.2506657363800221, 0.5223557752628541]","[0, 0.5856309475357095, 0.786745627462297]",1.37238


In [131]:
df_seq_index = df_seq.set_index(['pair_str', '1_kidx'])
df_intra_index = df_intra.set_index(['pair_str', '1_kidx', 'intra'])

In [158]:
def get_best(app1, app2, df_best):
    matrix_size = (len(const.kernel_yaml[app2]), len(const.kernel_yaml[app1]))
    configs_1 = np.zeros(matrix_size, dtype=int)
    configs_2 = np.zeros(matrix_size, dtype=int)
    interference_1 = np.zeros(matrix_size)
    interference_2 = np.zeros(matrix_size)

    for kernel_1 in const.kernel_yaml[app1]:
        for kernel_2 in const.kernel_yaml[app2]:
            best_idx = (app1, kernel_1, app2, kernel_2)

            cta_1 = df_best.loc[best_idx, '1_intra']
            sld_1 = df_best.loc[best_idx, 'sld'][1]

            cta_2 = df_best.loc[best_idx, '2_intra']
            sld_2 = df_best.loc[best_idx, 'sld'][2]

            matrix_idx = (kernel_2-1, kernel_1-1)

            configs_1[matrix_idx] = int(cta_1)
            configs_2[matrix_idx] = int(cta_2)

            interference_1[matrix_idx] = sld_1
            interference_2[matrix_idx] = sld_2

    return [configs_1, configs_2], [interference_1, interference_2]

def get_runtime(app):
    runtime = []
    for kidx in const.kernel_yaml[app]:
        runtime.append(df_seq_index.loc[(app, kidx)]['runtime'])

    return runtime

def predict_app(runtimes, interference, apps):
    tot_runtime = [sum(r) for r in runtimes]
    if tot_runtime[0] < tot_runtime[1]:
        iter_lim = [math.inf, 1]
    else:
        iter_lim = [1, math.inf] 

    sim_results = scheduler.simulate(runtimes, interference, iter_lim, finish_remaining=False)
    scaled_runtime = [[int(t) for t in app] for app in sim_results[0]]
#     print('Predicted runtime:', scaled_runtime)
    
    norm_ipc = scheduler.calculate_norm_ipc(runtimes, scaled_runtime)
#     print('Norm IPC:', norm_ipc)
    
    sld = scheduler.calculate_qos(runtimes, scaled_runtime, short=True, revert=False)
    print('Predicted weighted speedup:', sum(sld))
    
    row = pd.Series({'1_bench': apps[0], '2_bench': apps[1], 
                     'runtime': scaled_runtime, 'norm_ipc': norm_ipc})
    
    gen_altair.draw_altair_timeline(row, title='Prediction').display()  
    
    
def print_table(apps, data_type, data_list, headers):
    for app, data in zip(apps, data_list):
        print(app, data_type)
        print(pd.DataFrame(data, headers[0], headers[1]))
        print('-' * 50)

def print_rsrc_usage(configs, headers):
    print('Execution context utilization:')
    rsrc = ['cta_ratio', 'thread_ratio', 'smem_ratio', 'reg_ratio', 'avg_dram_bw']
   
    def get_usage(intra_list, kernel_list, app_idx):
        usage_app = []
        for intra, kernel in zip(intra_list, kernel_list):
            split_kernel = kernel.split(':')
            intra_idx = (split_kernel[0], int(split_kernel[1]), intra)
            
            usage = [df_intra_index.loc[intra_idx, r] for r in rsrc]
            usage_app.append(usage)
            
        usage_app = np.array(usage_app)
        print('>>> App', app_idx)
        max_usage = np.amax(usage_app, axis=0)
        [print(r, f'{u:.3}') for u, r in zip(max_usage, rsrc)]
            
    # App 1, take max of column in first table
    max_intra_1 = configs[0].max(axis=0)
    get_usage(max_intra_1, headers[1], 1)

    # App 2: take max of each row in second table
    max_intra_2 = configs[1].max(axis=1)
    get_usage(max_intra_2, headers[0], 2)

In [159]:
def show_widgets_best():
    display(best_dd)
    display(best_type_dd)
    display(best_btn)
    

def best_onclick(b):
    clear_output()
    show_widgets_best()
    
    apps = best_dd.value.split('+')
           
    # Output LUT resource configuration 
    print(best_type_dd.value)
    df_best = df_best_local if best_type_dd.value == 'local' else df_best_weighted
    configs, interference = get_best(apps[0], apps[1], df_best)
    
    app1_kernels = ['{0}:{1}'.format(apps[0], kernel) for kernel in const.kernel_yaml[apps[0]]]
    app2_kernels = ['{0}:{1}'.format(apps[1], kernel) for kernel in const.kernel_yaml[apps[1]]]
    
    df_header = (app2_kernels, app1_kernels)
    print_table(apps, 'configs', configs, df_header)
    print_table(apps, 'slowdown', interference, df_header)
    print('\n')
   
    print_rsrc_usage(configs, df_header)
    print('\n')
    
    # Run simulation to predict final slowdown
    runtimes = [get_runtime(apps[0]), get_runtime(apps[1])]
    predict_app(runtimes, interference, apps)


In [160]:

best_dd = widgets.Dropdown(options=uniq_pairs, value=uniq_pairs[0], description='Pair:')
best_type_dd = widgets.Dropdown(options=['Local', 'Global (weighted)'], value='Local', description='Type')
best_btn = widgets.Button(description='Best', button_style='info')


show_widgets_best()
best_btn.on_click(best_onclick)
 

Dropdown(description='Pair:', index=1, options=('parb_sad-0+nvd_conv-0', 'parb_sad-1+nvd_conv-0'), value='parb…

Dropdown(description='Type', options=('Local', 'Global (weighted)'), value='Local')

Button(button_style='info', description='Best', style=ButtonStyle())

Local
parb_sad-1 configs
              parb_sad-1:1  parb_sad-1:2  parb_sad-1:3
nvd_conv-0:1             8             2             4
nvd_conv-0:2             8             2             4
--------------------------------------------------
nvd_conv-0 configs
              parb_sad-1:1  parb_sad-1:2  parb_sad-1:3
nvd_conv-0:1             4             4             4
nvd_conv-0:2             6             8             8
--------------------------------------------------
parb_sad-1 slowdown
              parb_sad-1:1  parb_sad-1:2  parb_sad-1:3
nvd_conv-0:1      0.794836      0.779599      0.666934
nvd_conv-0:2      0.737540      0.686226      0.566032
--------------------------------------------------
nvd_conv-0 slowdown
              parb_sad-1:1  parb_sad-1:2  parb_sad-1:3
nvd_conv-0:1      0.771356      0.483271      0.552858
nvd_conv-0:2      0.916291      0.509509      0.584127
--------------------------------------------------


Execution context utilization:
>>> App 1
cta_ratio

# CTX Ratio

In [9]:
gen_pair = os.path.join(SCRIPT_PATH, 'gen_tables/gen_table_pair.py')
ctx_csv = os.path.join(HOME, 'data/csv/ctx.csv')
output = os.path.join(PKL_PATH, 'pair_ctx.pkl')


In [10]:
%run $gen_pair --csv $ctx_csv --output $output --seq_pkl $seq_pkl --qos 0.5 --multi --how ctx

Index(['pair_str', 'config', 'gpusim_version', 'jobId', 'stall_icnt_to_l2',
       'stall_l2_to_icnt', 'stall_core_ldst', 'l1D_miss_rate', 'l2_miss_rate',
       'l2_rshr_entry_fail', 'l2_rshr_merge_fail', 'l2_total_accesses',
       'mem_count', 'empty_warp', 'stall_warp', 'idle_warp', 'scoreboard_warp',
       'tot_warp_insn', 'runtime', 'instructions', 'l2_bw', 'avg_mem_lat',
       'avg_core_to_l2', 'avg_l2_to_core', 'avg_mrq_latency', 'dram_eff',
       'dram_bw', 'row_buffer_locality', 'mem_idle', 'total_cmd'],
      dtype='object')


In [11]:
df_ctx= pd.read_pickle(output)
uniq_pairs = df_ctx[['1_bench', '2_bench']].drop_duplicates().values
uniq_pairs = ['+'.join(x) for x in uniq_pairs]

col_ctx = ['1_bench', '2_bench', '1_ctx', '2_ctx', 
           'cta_quota',
#            'runtime', 'norm_runtime'
          ]
draw_table(df_ctx, col_ctx)

1_bench,2_bench,1_ctx,2_ctx,cta_quota
parb_sad-0,nvd_conv-0,0.5,0.5,"[[], [8, 2, 2], [16, 8]]"
parb_sad-0,nvd_conv-0,0.75,0.25,"[[], [12, 2, 2], [8, 4]]"
parb_sad-0,nvd_conv-0,0.25,0.75,"[[], [4, 2, 2], [24, 12]]"
parb_sad-1,nvd_conv-0,0.75,0.25,"[[], [12, 12, 24], [8, 4]]"
parb_sad-1,nvd_conv-0,0.25,0.75,"[[], [4, 4, 8], [24, 12]]"
parb_sad-1,nvd_conv-0,0.5,0.5,"[[], [8, 8, 16], [16, 8]]"


In [88]:
df_dynamic_index = df_dynamic.set_index(['1_bench', '1_intra', '2_bench', '2_intra'])

In [106]:
ctx_dd = widgets.Dropdown(options=uniq_pairs, value=uniq_pairs[0], description='Pair:')
ctx_btn = widgets.Button(description='Display', button_style='info')

def show_widgets_ctx():
    display(ctx_dd)
    display(ctx_btn)
    

def ctx_onclick(b):
    clear_output()
    show_widgets_ctx()
    
    apps = ctx_dd.value.split('+')
    df_apps = df_ctx[(df_ctx['1_bench'] == apps[0]) & (df_ctx['2_bench'] == apps[1])]
    df_apps = df_apps.sort_values('1_ctx')
    for rid, row in df_apps.iterrows():
        app1 = row['1_bench']
        app2 = row['2_bench']
        # Show actual timeline
        gen_altair.draw_altair_timeline(row, col_title='1_ctx').display()  
        # Print ctas/SM for each kernel in each app
        print(app1, ':', row['cta_quota'][1], 'CTAs/SM')
        print(app2, ':', row['cta_quota'][2], 'CTAs/SM')
        
        print('slowdown:', row['sld'])
        print('weighted speedup:', row['ws'])
        
        # Show predicted timeline based on df_dynamic info
        matrix_size = (len(const.kernel_yaml[app2]), len(const.kernel_yaml[app1]))
        interference_1 = np.zeros(matrix_size)
        interference_2 = np.zeros(matrix_size)

        no_predict = False
        # Build interference matrix
        for kernel_1 in const.kernel_yaml[app1]:
            if not no_predict:
                for kernel_2 in const.kernel_yaml[app2]:
                    cta_1 = row['cta_quota'][1][kernel_1-1]
                    cta_2 = row['cta_quota'][2][kernel_2-1]
                    dynamic_idx = (app1, int(cta_1), app2, int(cta_2))

                    if dynamic_idx not in df_dynamic_index.index:
                        no_predict = True
                        break

                    sld_1 = df_dynamic_index.loc[dynamic_idx, 'sld'].to_list()[0][1]
                    sld_2 = df_dynamic_index.loc[dynamic_idx, 'sld'].to_list()[0][2]

                    matrix_idx = (kernel_2-1, kernel_1-1)

                    interference_1[matrix_idx] = sld_1
                    interference_2[matrix_idx] = sld_2
        
        if not no_predict:
            # Get baseline runtime
            runtimes = [get_runtime(row['1_bench']), get_runtime(row['2_bench'])]
            interference = [interference_1, interference_2]
            
            app1_kernels = ['{0}:{1}'.format(app1, kernel) for kernel in const.kernel_yaml[app1]]
            app2_kernels = ['{0}:{1}'.format(app2, kernel) for kernel in const.kernel_yaml[app2]]

            df_header = (app2_kernels, app1_kernels)
            print_table(apps, 'slowdown', interference, df_header)


            # Predict app slowdown
            predict_app(runtimes, interference, [app1, app2])
        
        print('-' * 100)

show_widgets_ctx()
ctx_btn.on_click(ctx_onclick)
 

Dropdown(description='Pair:', index=1, options=('parb_sad-0+nvd_conv-0', 'parb_sad-1+nvd_conv-0'), value='parb…

Button(button_style='info', description='Display', style=ButtonStyle())

parb_sad-1 : [4, 4, 8] CTAs/SM
nvd_conv-0 : [24, 12] CTAs/SM
slowdown: [0, 0.4650344487662427, 0.7434961034254148]
weighted speedup: 1.2085305521916576
----------------------------------------------------------------------------------------------------


parb_sad-1 : [8, 8, 16] CTAs/SM
nvd_conv-0 : [16, 8] CTAs/SM
slowdown: [0, 0.6170419755973829, 0.6786942403764827]
weighted speedup: 1.2957362159738657
----------------------------------------------------------------------------------------------------


parb_sad-1 : [12, 12, 24] CTAs/SM
nvd_conv-0 : [8, 4] CTAs/SM
slowdown: [0, 0.7192665114181458, 0.5645163471255075]
weighted speedup: 1.2837828585436533
----------------------------------------------------------------------------------------------------


# CTA LUT

In [112]:
lut_csv = os.path.join(HOME, 'data/csv/pair_lut.csv')
lut_out = os.path.join(PKL_PATH, 'pair_lut.pkl')

%run $gen_pair --csv $lut_csv --output $lut_out --seq_pkl $seq_pkl --multi --how lut

Index(['pair_str', 'config', 'gpusim_version', 'jobId', 'stall_icnt_to_l2',
       'stall_l2_to_icnt', 'stall_core_ldst', 'l1D_miss_rate', 'l2_miss_rate',
       'l2_rshr_entry_fail', 'l2_rshr_merge_fail', 'l2_total_accesses',
       'mem_count', 'empty_warp', 'stall_warp', 'idle_warp', 'scoreboard_warp',
       'tot_warp_insn', 'runtime', 'instructions', 'l2_bw', 'avg_mem_lat',
       'avg_core_to_l2', 'avg_l2_to_core', 'avg_mrq_latency', 'dram_eff',
       'dram_bw', 'row_buffer_locality', 'mem_idle', 'total_cmd'],
      dtype='object')


In [113]:
df_lut = pd.read_pickle(lut_out)
col_lut = ['pair_str', '1_bench', '2_bench', 'norm_ipc', 'runtime'] 
# draw_table(df_lut, col_lut)

In [114]:
lut_dd = widgets.Dropdown(options=df_lut['pair_str'], value=df_lut.iloc[0]['pair_str'], description='Pair:')

lut_btn = widgets.Button(description='Display', button_style='info')

def show_widgets_lut():
    display(lut_dd)
    display(lut_btn)
    

def lut_onclick(b):
    clear_output()
    show_widgets_lut()
    
    apps = re.split(r'-(?=\D)', lut_dd.value)
    df_apps = df_lut[(df_lut['1_bench'] == apps[0]) & (df_lut['2_bench'] == apps[1])]
    for rid, row in df_apps.iterrows():
        gen_altair.draw_altair_timeline(row, title='CTA LUT').display()  
        
        print('Note:')
#         print(row['1_bench'], ':', row['cta_quota'][1], 'CTAs/SM')
#         print(row['2_bench'], ':', row['cta_quota'][2], 'CTAs/SM')
        
        print('runtime:', row['runtime'])
        print('slowdown:', row['sld'])
        print('weighted speedup:', row['ws'])
        print('-' * 100)

show_widgets_lut()
lut_btn.on_click(lut_onclick)
 

Dropdown(description='Pair:', index=1, options=('parb_sad-0-nvd_conv-0', 'parb_sad-1-nvd_conv-0'), value='parb…

Button(button_style='info', description='Display', style=ButtonStyle())

Note:
runtime: [[], [3903782, 3114421, 816561], [251500, 231423, 247800, 224266, 250280, 226474, 246663, 226125, 245418, 225838, 246431, 225143, 251086, 227849, 248230, 226570, 327024, 436394, 409356, 415062, 372055, 419565, 384013, 431129, 323182, 409296, 221001, 206416]]
slowdown: [0, 0.7489110074023927, 0.6703564568697488]
weighted speedup: 1.4192674642721415
----------------------------------------------------------------------------------------------------


In [24]:
test = [0, 1]
print(test[0:-1])

[0]
