In [1]:
import sys
sys.path.append("../")
import pandas as pd
from get_trace_durations import get_trace_durations

In [13]:

class FilterOnKPIs:
    def __init__(self, case_table, measurements):
        case_durations = get_trace_durations(measurements)

        df = case_durations.merge(case_table, on="case:concept:name")
        min_flop = df['case:flops'].min()
        min_duration = df['case:duration'].min()

        df['case:rel-flops'] = df.apply(lambda row: (row['case:flops'] - min_flop) / min_flop, axis=1)
        df['case:rel-duration'] = df.apply(lambda row: (row['case:duration'] - min_duration) / min_duration, axis=1)

        self.case_table = df

    def filter_on_flops_and_rel_duration(self, rel_duration_limit=None):

        if not rel_duration_limit:
            rel_duration_limit = self.case_table[self.case_table['case:rel-flops'] == 0]['case:rel-duration'].max()
            if rel_duration_limit > 1.2:
                rel_duration_limit = 1.2

        return self.case_table[(self.case_table['case:rel-flops'] == 0) |
                               (self.case_table['case:rel-duration'] < rel_duration_limit)]

    def filter_on_best_flops(self):
        return self.case_table[self.case_table['case:rel-flops'] == 0]

    def filter_on_rel_duration(self, rel_duration_limit):
        return self.case_table[self.case_table['case:rel-duration'] < rel_duration_limit]

    def get_alg_seq_sorted_on_duration(self, case_table=None):
        df = self.case_table
        if case_table:
            df = case_table
        return list(df.sort_values(by=['case:duration'])['case:concept:name'])

### Input: case table and measurements table

In [3]:
sys.path.append("../../data_integration/")
from data_collector import DataCollector

In [4]:
dc = DataCollector("sample_data/100_100_10_100_100/")
case_table = dc.get_case_table()
measurements_table = dc.get_runtimes_table()

In [5]:
case_table

Unnamed: 0,case:concept:name,case:flops,case:num_kernels
0,algorithm0,600000.0,3
1,algorithm2,2400000.0,3
2,algorithm4,4200000.0,3
3,algorithm1,600000.0,3
4,algorithm5,4200000.0,3
5,algorithm3,2400000.0,3


In [6]:
measurements_table

Unnamed: 0,case:concept:name,concept:name,concept:flops,concept:operation,concept:kernel,timestamp:start,timestamp:end
0,algorithm0,gemm_2e+05,200000.0,tmp3 = (C D),"gemm!('N', 'N', 1.0, ml2, ml3, 0.0, ml4)",1657009000.0,1657009000.0
1,algorithm0,gemm_2e+05,200000.0,tmp1 = (A B),"gemm!('N', 'N', 1.0, ml0, ml1, 0.0, ml5)",1657009000.0,1657009000.0
2,algorithm0,gemm_2e+05,200000.0,tmp6 = (tmp1 tmp3),"gemm!('N', 'N', 1.0, ml5, ml4, 0.0, ml6)",1657009000.0,1657009000.0
3,algorithm2,gemm_2e+05,200000.0,tmp1 = (A B),"gemm!('N', 'N', 1.0, ml0, ml1, 0.0, ml4)",1657009000.0,1657009000.0
4,algorithm2,gemm_2e+05,200000.0,tmp4 = (tmp1 C),"gemm!('N', 'N', 1.0, ml4, ml2, 0.0, ml5)",1657009000.0,1657009000.0
5,algorithm2,gemm_2e+06,2000000.0,tmp6 = (tmp4 D),"gemm!('N', 'N', 1.0, ml5, ml3, 0.0, ml6)",1657009000.0,1657009000.0
6,algorithm4,gemm_2e+05,200000.0,tmp2 = (B C),"gemm!('N', 'N', 1.0, ml1, ml2, 0.0, ml4)",1657009000.0,1657009000.0
7,algorithm4,gemm_2e+06,2000000.0,tmp5 = (tmp2 D),"gemm!('N', 'N', 1.0, ml4, ml3, 0.0, ml5)",1657009000.0,1657009000.0
8,algorithm4,gemm_2e+06,2000000.0,tmp6 = (A tmp5),"gemm!('N', 'N', 1.0, ml0, ml5, 0.0, ml6)",1657009000.0,1657009000.0
9,algorithm1,gemm_2e+05,200000.0,tmp1 = (A B),"gemm!('N', 'N', 1.0, ml0, ml1, 0.0, ml4)",1657009000.0,1657009000.0


### Usage

In [14]:
competing_variants = FilterOnKPIs(case_table, measurements_table)

In [15]:
competing_variants.filter_on_flops_and_rel_duration(1.1)

Unnamed: 0,case:concept:name,case:timestamp:start,case:timestamp:end,case:duration,case:flops,case:num_kernels,case:rel-flops,case:rel-duration
0,algorithm0,1657009000.0,1657009000.0,0.000228,600000.0,3,0.0,1.453846
1,algorithm2,1657009000.0,1657009000.0,0.000193,2400000.0,3,3.0,1.074359
3,algorithm1,1657009000.0,1657009000.0,9.3e-05,600000.0,3,0.0,0.0
5,algorithm3,1657009000.0,1657009000.0,0.000156,2400000.0,3,3.0,0.676923


In [16]:
competing_variants.filter_on_best_flops()

Unnamed: 0,case:concept:name,case:timestamp:start,case:timestamp:end,case:duration,case:flops,case:num_kernels,case:rel-flops,case:rel-duration
0,algorithm0,1657009000.0,1657009000.0,0.000228,600000.0,3,0.0,1.453846
3,algorithm1,1657009000.0,1657009000.0,9.3e-05,600000.0,3,0.0,0.0


In [17]:
competing_variants.filter_on_rel_duration(1.1)

Unnamed: 0,case:concept:name,case:timestamp:start,case:timestamp:end,case:duration,case:flops,case:num_kernels,case:rel-flops,case:rel-duration
1,algorithm2,1657009000.0,1657009000.0,0.000193,2400000.0,3,3.0,1.074359
3,algorithm1,1657009000.0,1657009000.0,9.3e-05,600000.0,3,0.0,0.0
5,algorithm3,1657009000.0,1657009000.0,0.000156,2400000.0,3,3.0,0.676923


In [18]:
competing_variants.get_alg_seq_sorted_on_duration()

['algorithm1',
 'algorithm3',
 'algorithm2',
 'algorithm4',
 'algorithm0',
 'algorithm5']