In [None]:
import pyperclip
import pandas as pd
import numpy as np

from misc_util.logutils import setup_logging
from misc_util.pretty_print import Markdown, display

from derive_conceptualspace.pipeline import SnakeContext, load_envfiles
from derive_conceptualspace.util.result_analysis_tools import getfiles_allconfigs, highlight_nonzero_max, get_best_conf, df_to_latex
from derive_conceptualspace.cli.args_from_filename import get_filename, print_envvars
from derive_conceptualspace.evaluate.shallow_trees import classify_shallowtree_multi
from derive_conceptualspace.util.desc_object import DescriptionList
from derive_conceptualspace.pipeline import cluster_loader

In [None]:
setup_logging()
load_envfiles("placetypes")

# Find best-performing parameter-combination per Target+Depth Combination

In [None]:
metric = "f1"
firsttime = True
for target in ["Geonames", "Foursquare"]:
    for dt_depth in [1,3,None]:
        display(Markdown(f"### Best for: {target}, depth {dt_depth}"))
        conf, perf = get_best_conf(target, verbose=True, forward_verbose=firsttime, balance_classes=True, one_vs_rest=True, dt_depth=dt_depth, test_percentage_crossval=0.3, metric=metric)
        firsttime = False
        
        print_envvars(get_filename(conf, get_dependencies=False, doprint=False))
        display(Markdown(f"Best {metric}: **{perf:.4f}** <br><br> "))

<br><br><br><br><br><br>

# Find best overall config & run all for that one

In [None]:
conf, perf = get_best_conf("Geonames", verbose=True, balance_classes=True, one_vs_rest=True, dt_depth=1, test_percentage_crossval=0.3, metric="f1")

ctx = SnakeContext.loader_context(config=conf, silent=False)
descriptions, clusters, embedding = ctx.load("pp_descriptions", "clusters", "embedding", 
                  loaders=dict(pp_descriptions=DescriptionList.from_json, clusters=cluster_loader, embedding=lambda **args: args["embedding"].embedding_))

print_envvars(get_filename(conf, get_dependencies=False, doprint=False))

In [None]:
res = classify_shallowtree_multi(clusters, embedding, descriptions, ctx.obj["dataset_class"], verbose=False)

In [None]:
# old, reported results (all of them weighted) (delete me if executing anew)

SHOW_CROSSVAL_NUM = 5
SHOW_METRIC = "f1"

styles = [{'selector': 'th', 'props': [('vertical-align','top'),('text-align','left')]}] #('border-style', 'solid')  #see https://stackoverflow.com/a/55904239/5122790
styler = lambda df: df.style.apply(highlight_nonzero_max, axis=0).format('{:.3f}'.format, na_rep="-").set_table_styles(styles)
    
for taxonomy in set(res.columns.get_level_values(0)):
    display(Markdown(f"## {taxonomy}"))
    df = res.T.xs(taxonomy).xs(SHOW_CROSSVAL_NUM)[SHOW_METRIC].unstack(level=[1])
    df.columns = pd.Series(df.columns.fillna(0).astype(int).astype(str)).replace("0", "-")
    display(styler(df))
    pyperclip.copy(df_to_latex(df, styler, rotate=False, caption=f"This algorithm on {taxonomy}"))

In [None]:
# new results (weighted & unweighted) 

SHOW_CROSSVAL_NUM = 5
SHOW_METRIC = "f1"

styles = [{'selector': 'th', 'props': [('vertical-align','top'),('text-align','left')]}] #('border-style', 'solid')  #see https://stackoverflow.com/a/55904239/5122790
styler = lambda df: df.style.apply(highlight_nonzero_max, axis=0).format('{:.3f}'.format, na_rep="-").set_table_styles(styles)
    
for weighted in [True, False]:
    wres = pd.DataFrame(np.array([[i[0] if weighted else i[1] for i in row] for row in res.values]), index=res.index, columns=res.columns)
    for taxonomy in set(wres.columns.get_level_values(0)):
        display(Markdown(f"## {taxonomy} ({'weighted' if weighted else 'unweighted'})"))
        df = wres.T.xs(taxonomy).xs(SHOW_CROSSVAL_NUM)[SHOW_METRIC].unstack(level=[1])
        df.columns = pd.Series(df.columns.fillna(0).astype(int).astype(str)).replace("0", "-")
        display(styler(df))