In [1]:
import datetime

import pandas as pd 

from report.report import Db
from utils_base_env import DbConfig

from IPython.display import display
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [2]:
!pip install jinja2 ipywidgets



In [3]:
db_config = DbConfig(
    driver='mysql+mysqlconnector',
    server='ansatlin07.an.intel.com',
    port=3306,
    user='gashiman',
    password='omniscidb',
    name='omniscidb',
)

engine = db_config.create_engine(future=False)
db = Db(engine=engine)

In [4]:
from collections import Counter


def get_available_parent_prefixes(queries):
    """Learn parent prefixes for benchmarks with hierarchical measurements."""
    parent_prefixes = Counter('.'.join(tuple(n.split('.')[:-1])) for n in sorted(queries))
    parent_prefixes.pop('')

    print('Prefix parents:', sorted(parent_prefixes))
    return parent_prefixes


def filter_by_prefix(m_names, prefix: str):
    if prefix is None:
        return m_names
    prefix_splitter = tuple(prefix.split('.'))
    return [m for m in m_names if tuple(m.split('.')[:-1]) == prefix_splitter]


def build_table(df, baseline_id, selected_ids, chosen_prefix, selected_queries):
    """Build table that's good for visualization"""
    # Get version that's good at visuals
    table = pd.DataFrame()

    # baseline cols
    table['baseline'] = df.loc[baseline_id, [*selected_queries]]
    if chosen_prefix:
        table['baseline_parent_share'] = df.loc[baseline_id, selected_queries].T / df.loc[baseline_id, chosen_prefix]

    # write result cols
    for i in selected_ids:
        name = f'{df.loc[i, "pandas_mode"].replace("Modin_on_", "")}_{i}' 
        table[name] = df.loc[i, selected_queries]
        table[f'{i}_delta'] = table[name] - table['baseline']
        if chosen_prefix:
            table[f'{i}_delta_share'] = (table[name] - table['baseline']) / (table.loc[chosen_prefix, name] - table.loc[chosen_prefix, 'baseline'])
    
    # for i in selected_ids:
    #     table.loc['pandas_mode', f'{i}, s'] = df.loc[i, 'pandas_mode']

    return table

def delta2color(v):
    if v < 0:
        c = 'green;'
    elif v > 60:
        c = 'red'
    else:
        c = '#c1c286'
    return f'color:{c};'


def display_styled(table, selected_ids):
    cell_hover = {  # for row hover use <tr> instead of <td>
        'selector': 'td:hover',
        'props': [('background-color', '#ffffb3')]
    }
    index_names = {
        'selector': '.index_name',
        'props': 'font-style: italic; color: darkgrey; font-weight:normal;'
    }
    headers = {
        'selector': 'th:not(.index_name)',
        'props': 'background-color: #000066; color: white;'
    }

    display(table
        .style
        # .applymap(lambda v: 'color:red;' if v > 0.25 else 'color:green;', subset=['baseline_parent_share'])
        .applymap(delta2color, subset=[c for c in table.columns if c.endswith('delta')])
        .background_gradient(axis=0, vmin=0, vmax=1, cmap="YlOrRd", subset=[c for c in table.columns if c.endswith('_share')])
        .applymap_index(lambda v: 'text-align:left;')
        .format('{:.1f}', subset=[c for c in table.columns if not c.endswith('share')])
        .format('{:.1%}', subset=[c for c in table.columns if 'speedup' in c or c.endswith('ratio') or c.endswith('share')])
        .set_table_styles([cell_hover, index_names, headers])
    )

In [5]:
print('Available benchmarks:', db.load_benchmarks())

Available benchmarks: ['taxi_ml', 'ny_taxi', 'census', 'h2o', 'plasticc', 'hm_fashion_recs']


In [7]:
BENCHMARK = 'hm_fashion_recs'

df, queries = db.load_benchmark_results(BENCHMARK)
parent_prefixes = get_available_parent_prefixes(queries) or [None]

@interact(
    chosen_prefix=widgets.RadioButtons(
        options=list(parent_prefixes),
        # value=list(parent_prefixes)[0],
        description='Prefix',
        default=1,
        disabled=False,
    )
)
def interactive_table(chosen_prefix):
    df, measurements = db.load_benchmark_results(benchmark=BENCHMARK)
    df['latest'] = df.groupby('pandas_mode')['date'].transform('rank', ascending=False)

    # Select baseline
    baseline_id = df[df.pandas_mode == 'Pandas'].index[-1]

    # Select latest results for along with the benchmark, you can replace with hardcoded ids
    n_top = 3
    selected_ids = list(df[df['latest'] <= n_top].index)

    # Measurements that we are interested in
    # chosen_prefix = 'total.02-create_user_ohe_agg'
    selected_measurements = [chosen_prefix] if chosen_prefix else []
    selected_measurements.extend(filter_by_prefix(measurements, prefix=chosen_prefix))
    print(selected_measurements)

    # build table for visuals
    table = build_table(df, baseline_id, selected_ids, chosen_prefix, selected_measurements)

    # Styling
    display_styled(table, selected_ids)

Prefix parents: ['total', 'total.02-create_user_ohe_agg', 'total.03-fe', 'total.03-fe.03-candidates', 'total.03-fe.04-attach_features']


interactive(children=(RadioButtons(description='Prefix', options=('total', 'total.02-create_user_ohe_agg', 'toâ€¦