In [3]:
import datetime

import pandas as pd 

from report.report import Db
from utils_base_env import DbConfig

from IPython.display import display
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [4]:
!pip install jinja2 ipywidgets



In [60]:
db_config = DbConfig(
    driver='mysql+mysqlconnector',
    server='ansatlin07.an.intel.com',
    port=3306,
    user='gashiman',
    password='omniscidb',
    name='omniscidb',
)

engine = db_config.create_engine()
db = Db(engine=engine)

In [70]:
from collections import Counter


def get_available_parent_prefixes(queries):
    """Learn parent prefixes for benchmarks with hierarchical measurements."""
    parent_prefixes = Counter('.'.join(tuple(n.split('.')[:-1])) for n in sorted(queries))
    parent_prefixes.pop('')

    print('Prefix parents:', sorted(parent_prefixes))
    return parent_prefixes


def filter_by_prefix(m_names, prefix: str):
    if prefix is None:
        return m_names
    prefix_splitter = tuple(prefix.split('.'))
    return [m for m in m_names if tuple(m.split('.')[:-1]) == prefix_splitter]


def build_simple_table(df, selected_metacols, selected_ids, selected_measurements):
    table = (
        df
        .loc[selected_ids, [*selected_metacols, *selected_measurements]]
        .sort_values(selected_metacols)
        .T
        .copy()
    )
    table.columns = [str(i) for i in table.columns]
    return table


def build_table_with_baseline(df, baseline_id, selected_ids, chosen_prefix, selected_measurements):
    """Build table that's good for visualization"""
    
    table = pd.DataFrame()

    # baseline cols
    table['baseline'] = df.loc[baseline_id, [*selected_measurements]]
    if chosen_prefix:
        table['baseline_parent_share'] = df.loc[baseline_id, selected_measurements].T / df.loc[baseline_id, chosen_prefix]

    # write result cols
    for i in selected_ids:
        name = f'{df.loc[i, "pandas_mode"].replace("Modin_on_", "")}_{i}' 
        table[name] = df.loc[i, selected_measurements]
        table[f'{i}_delta'] = table[name] - table['baseline']
        if chosen_prefix:
            table[f'{i}_delta_share'] = (table[name] - table['baseline']) / (table.loc[chosen_prefix, name] - table.loc[chosen_prefix, 'baseline'])
    
    return table

def delta2color(v):
    if v < 0:
        c = 'green;'
    elif v > 60:
        c = 'red'
    else:
        c = '#c1c286'
    return f'color:{c};'


def display_styled(table, selected_ids, measurements):
    def ss(f):
        return (measurements, [c for c in table.columns if f(c)])

    display(table
        .style
        # .applymap(lambda v: 'color:red;' if v > 0.25 else 'color:green;', subset=['baseline_parent_share'])
        .applymap(delta2color, subset=ss(lambda c: c.endswith('delta')))
        .background_gradient(axis=0, vmin=0, vmax=1, cmap="YlOrRd", subset=ss(lambda c: c.endswith('_share')))
        .applymap_index(lambda v: 'text-align:left;')
        .format('{:.1f}', subset=ss(lambda c: c[-1].isnumeric()))
        .format('{:.1f}', subset=ss(lambda c: c.endswith('share')))
        .format('{:.1%}', subset=ss(lambda c: 'speedup' in c or c.endswith('ratio') or c.endswith('share')))
    )

In [76]:
benchmarks = db.load_benchmarks()

@interact(
    benchmark=widgets.RadioButtons(
        options=list(benchmarks),
        # value=list(parent_prefixes)[0],
        description='Benchmark',
        default=1,
        disabled=False,
    )
)
def interactive_table(benchmark):
    df, measurements = db.load_benchmark_results_agg(benchmark=benchmark)
    df['latest'] = df.groupby('pandas_mode')['date'].transform('rank', ascending=False)

    # Select latest results for along with the benchmark, you can replace with hardcoded ids
    n_top = 1
    selected_ids = list(df[df['latest'] <= n_top].index)

    # build table for visuals
    table = build_simple_table(
        df,
        selected_metacols=['pandas_mode'],
        selected_ids=selected_ids,
        selected_measurements=measurements
    )

    # Styling
    display_styled(table, selected_ids, measurements=measurements)

interactive(children=(RadioButtons(description='Benchmark', options=('taxi_ml', 'ny_taxi', 'census', 'h2o', 'p…

In [72]:
BENCHMARK = 'hm_fashion_recs'

df, queries = db.load_benchmark_results(BENCHMARK)
parent_prefixes = get_available_parent_prefixes(queries) or [None]

@interact(
    chosen_prefix=widgets.RadioButtons(
        options=list(parent_prefixes),
        # value=list(parent_prefixes)[0],
        description='Prefix',
        default=1,
        disabled=False,
    )
)
def interactive_table(chosen_prefix):
    df, measurements = db.load_benchmark_results(benchmark=BENCHMARK)
    df['latest'] = df.groupby('pandas_mode')['date'].transform('rank', ascending=False)

    # Select baseline
    baseline_id = df[df.pandas_mode == 'Pandas'].index[-1]

    # Select latest results for along with the benchmark, you can replace with hardcoded ids
    n_top = 3
    selected_ids = list(df[df['latest'] <= n_top].index)

    # Measurements that we are interested in
    # chosen_prefix = 'total.02-create_user_ohe_agg'
    selected_measurements = [chosen_prefix] if chosen_prefix else []
    selected_measurements.extend(filter_by_prefix(measurements, prefix=chosen_prefix))

    # build table for visuals
    table = build_table_with_baseline(df, baseline_id, selected_ids, chosen_prefix, selected_measurements)

    # Styling
    display_styled(table, selected_ids, measurements=selected_measurements)

Prefix parents: ['total', 'total.02-create_user_ohe_agg', 'total.03-fe', 'total.03-fe.03-candidates', 'total.03-fe.04-attach_features']


interactive(children=(RadioButtons(description='Prefix', options=('total', 'total.02-create_user_ohe_agg', 'to…

In [17]:
print('Available benchmarks:', db.load_benchmarks(node='c5n5'))

Available benchmarks: ['ny_taxi', 'census', 'h2o', 'plasticc', 'taxi_ml']
