In [None]:
from notebooks import *
memory.log.level = 'debug'
sg.init(app=None)

In [None]:

def opt(*args, **kwargs):
    verbose_keys = ['cache', 'audio']
    verbose(*args, **{k: v for k, v in kwargs.items() if k in verbose_keys})
    par(**{k: v for k, v in kwargs.items() if k not in verbose_keys})

def par(**kwargs):
    """
    Usage:
        par(use=None)                         # No par, no progress bars (easier to see audio read/write and cache hit/miss)
        par(use='sync')                       # No par, cooperative progress bars (no lines printing over each other)
        par(use='dask', scheduler='threads')  # Par, uncooperative progress bars (lines print over each other)
    """
    progress_kwargs(override=kwargs)

def verbose(
    both=None,       # Shorthand, e.g. verbose('warn') = verbose(cache='warn', audio='warn')
    *,
    cache='debug',   # Show cache hit/miss lines # WARNING Frequent hangs during bigger xc_similar_html calls...
    # cache='info',  # Show cache hit/miss chars # WARNING (same)
    # cache='warn',  # Quiet
    audio='debug',   # Show read/write
    # audio='info',  # Show write
    # audio='warn',  # Quiet
):
    if both:
        cache = both
        audio = both
    memory.log.level = cache
    log_levels({'load': audio.upper()})

def n_recs_by_sp_quality(df):
    return (df
        # Count recs by (species, quality)
        .pipe(df_remove_unused_categories)
        .assign(n=1).groupby(['species', 'quality']).n.sum().reset_index()
        # Strip and restore .species cat around .pivot_table to workaround category error when adding .total
        .pipe(df_cat_to_str)
        # Pivot
        .pivot_table(index='species', columns='quality', values='n', fill_value=0, aggfunc='sum',
            margins=True, margins_name='total',  # FIXME 'total' ends up as NaN because we cat .species
        )
        # Restore .species cat
        .reset_index().astype({'species': metadata.species.df.shorthand.dtype}).sort_values('species').set_index('species')
        # Drop columns.name from .pivot_table ('quality')
        .T.pipe(df_set_index_name, None).T
        # Add .total
        # .pipe(df_assign_first, total=lambda df: df.sum(axis=1))
        .pipe(df_reorder_cols, first=['total'])
        # Sort
        .sort_values('total', ascending=False)
    )

In [None]:
%%memit -c
# Optimize perf
#   - NOTE n_sp=None -> 27035/35233 recs because quality='ab'
opt('warn', use='dask', scheduler='threads')  # Quiet + par + progress bars
# opt('warn', use='sync')
# opt('debug', use='sync')  # Verbose + sync + no progress bars
with ExitStack() as stack:
    stack.enter_context(cache_control(
        # tags_fail_on_miss=['rec'],  # Require warmed cache for 'rec' funcs
        # tags_refresh=['recs'],  # Measure un-warmed cache for 'recs' funcs
        # tags_refresh=['sp14/model/species_proba'],  # XXX Dev
    ))
    display(
        xc_similar_html(
            sp_cols='species',

            # Perf: search_recs
            #   (countries_k='na', com_names_k='ca'): total[TODO] = meta[112s] feat[70s] f_p[91s]

            # Perf bottlenecks
            #   - meta: O(n) cache hits
            #   - feat: O(n) cache hits
            #   - p:    O(n) sg.search.predict_proba [@cache would achieve perf ~between meta and feat, which is only ~2x speedup]
            # xc_id=381417, n_total=10,   n_sp=1,     # t[4.0s] n_recs[   32,0]
            # xc_id=381417, n_total=10,   n_sp=2,     # t[    ] n_recs[       ]
            # xc_id=381417, n_total=10,   n_sp=10,    # t[7.1s] n_recs[  717,0]
            # xc_id=381417, n_total=10,   n_sp=40,    # t[ 25s] n_recs[ 3879,1]
            # xc_id=381417, n_total=10,   n_sp=80,    # t[ 51s] n_recs[ 8203,1]
            # xc_id=381417, n_total=10,   n_sp=160,   # t[111s] n_recs[15284,1]
            xc_id=381417, n_total=10,   n_sp=None,  # t[192s] n_recs[27033,2]

            # xc_id=414126, n_total=10,   n_sp=20,     # XXX Dev

            view=False,  # For .pipe(n_recs_by_sp_quality): disable html view else junk .species values
        )
        # .pipe(n_recs_by_sp_quality)
    )

In [None]:
# Use /species
xc_species_html(species='WTSP', n_recs=3)

In [None]:
# Use /similar
opt('warn', use='dask', scheduler='threads')  # Quiet + par + progress bars
xc_similar_html(
    # view=False,
    # n_total=20,
    # n_sp_recs=1,

    # Examples (bad/good)
    # xc_id=381417,  # YHBL (bad)
    # xc_id=173272,  # LOSH (bad -- curious that d_f* does better than d_p*...)
    # xc_id=15222,   # JUTI (bad)
    # xc_id=406162,  # AMRO (wrong sp but good results)
    # xc_id=408021,  # WREN (good)
    # xc_id=348142,  # PSFL (good)
    # xc_id=378376,  # WCSP (good)
    # xc_id=381582,  # COYE (good)
    # xc_id=416558,  # COYE (good)
    xc_id=329879,  # WTSP (good)

    n_sp=3,
    # n_sp=10,
    # n_sp=20,
    # n_sp=40,
    # n_sp=80,
    # n_sp=160,
    # n_sp=None,

)