In [7]:
%load_ext lab_black
import argparse
import contextlib
import datetime
import io
import logging
import multiprocessing
import os
import random
import sys
from itertools import chain, combinations
from timeit import default_timer as timer

import altair as alt
import altair_viewer
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import peewee
from evolutionary_search import EvolutionaryAlgorithmSearchCV
from json_tricks import dumps, loads
from playhouse.shortcuts import model_to_dict
from scipy.stats import randint, uniform
from sklearn.datasets import load_iris
from tabulate import tabulate
from IPython.core.display import display, HTML

from active_learning.cluster_strategies import (
    DummyClusterStrategy,
    MostUncertainClusterStrategy,
    RandomClusterStrategy,
    RoundRobinClusterStrategy,
)
from active_learning.dataStorage import DataStorage
from active_learning.experiment_setup_lib import (
    ExperimentResult,
    classification_report_and_confusion_matrix,
    get_db,
    get_single_al_run_stats_row,
    get_single_al_run_stats_table_header,
    load_and_prepare_X_and_Y,
)
from active_learning.sampling_strategies import (
    BoundaryPairSampler,
    CommitteeSampler,
    RandomSampler,
    UncertaintySampler,
)

alt.renderers.enable("altair_viewer")
#  alt.renderers.enable('vegascope')

config = {
    "datasets_path": "../datasets",
    "db": "tunnel",
    "param_list_id": "best_global_score",
}

db = get_db(db_name_or_type=config["db"])

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [8]:
# select count(*), dataset_name from experimentresult group by dataset_name;
results = ExperimentResult.select(
    ExperimentResult.dataset_name,
    peewee.fn.COUNT(ExperimentResult.id_field).alias("dataset_name_count"),
).group_by(ExperimentResult.dataset_name)

for result in results:
    print("{:>4,d} {}".format(result.dataset_name_count, result.dataset_name))

4,495 orange
4,472 sylva
4,502 hiva
4,515 ibn_sina
4,516 dwtc
4,459 zebra


In [19]:
#  SELECT param_list_id, avg(fit_score), stddev(fit_score), avg(global_score), stddev(global_score), avg(start_set_size) as sss, count(*) FROM experimentresult WHERE start_set_size = 1 GROUP BY param_list_id ORDER BY 7 DESC, 4 DESC LIMIT 30;
from datetime import datetime, timedelta

# & (ExperimentResult.experiment_run_date > (datetime(2020, 3, 24, 14, 0))) # no stopping criterias
#  & (ExperimentResult.experiment_run_date > (datetime(2020, 3, 30, 12, 23))) # optics


results = (
    ExperimentResult.select(
        ExperimentResult.param_list_id,
        peewee.fn.AVG(ExperimentResult.fit_score).alias("avg_fit_score"),
        peewee.fn.STDDEV(ExperimentResult.fit_score).alias("stddev_fit_score"),
        peewee.fn.AVG(ExperimentResult.global_score_no_weak_acc).alias(
            "avg_global_score"
        ),
        peewee.fn.STDDEV(ExperimentResult.global_score_no_weak_acc).alias(
            "stddev_global_score"
        ),
        peewee.fn.AVG(ExperimentResult.amount_of_user_asked_queries).alias(
            "avg_amount_oracle"
        ),
        peewee.fn.STDDEV(ExperimentResult.amount_of_user_asked_queries).alias(
            "std_amount_oracle"
        ),
        # peewee.fn.AVG(ExperimentResult.start_set_size).alias("sss"),
        # peewee.fn.COUNT(ExperimentResult.id_field).alias("count"),
    )
    .where(
        (ExperimentResult.amount_of_user_asked_queries < 2000)
        & (
            ExperimentResult.experiment_run_date > (datetime(2020, 3, 24, 14, 0))
        )  # no stopping criterias
    )
    .group_by(ExperimentResult.param_list_id)
    .order_by(
        peewee.fn.COUNT(ExperimentResult.id_field).desc(),
        peewee.fn.AVG(ExperimentResult.global_score_no_weak_acc).desc(),
    )
    .limit(20)
)

table = []
id = 0
for result in results:
    data = {**{"id": id}, **vars(result)}
    data["param_list_id"] = data["__data__"]["param_list_id"]
    del data["__data__"]
    del data["_dirty"]
    del data["__rel__"]

    # get one param_list_id

    one_param_list_id_result = (
        ExperimentResult.select(
            ExperimentResult.classifier,
            ExperimentResult.test_fraction,
            ExperimentResult.sampling,
            ExperimentResult.cluster,
            ExperimentResult.nr_queries_per_iteration,
            ExperimentResult.with_uncertainty_recommendation,
            ExperimentResult.with_cluster_recommendation,
            ExperimentResult.uncertainty_recommendation_certainty_threshold,
            ExperimentResult.uncertainty_recommendation_ratio,
            ExperimentResult.cluster_recommendation_minimum_cluster_unity_size,
            ExperimentResult.cluster_recommendation_ratio_labeled_unlabeled,
            ExperimentResult.allow_recommendations_after_stop,
            ExperimentResult.stopping_criteria_uncertainty,
            ExperimentResult.stopping_criteria_acc,
            ExperimentResult.stopping_criteria_std,
            ExperimentResult.experiment_run_date,
        )
        .where(ExperimentResult.param_list_id == data["param_list_id"])
        .limit(1)
    )[0]

    data = {**data, **vars(one_param_list_id_result)["__data__"]}

    table.append(data)
    id += 1

display(HTML(tabulate(table, headers="keys", tablefmt="html")))

id,avg_fit_score,stddev_fit_score,avg_global_score,stddev_global_score,avg_amount_oracle,std_amount_oracle,param_list_id,classifier,test_fraction,sampling,cluster,nr_queries_per_iteration,with_uncertainty_recommendation,with_cluster_recommendation,uncertainty_recommendation_certainty_threshold,uncertainty_recommendation_ratio,cluster_recommendation_minimum_cluster_unity_size,cluster_recommendation_ratio_labeled_unlabeled,allow_recommendations_after_stop,stopping_criteria_uncertainty,stopping_criteria_acc,stopping_criteria_std,experiment_run_date
0,0.867049,0.146319,0.695423,0.291023,611,436,8f4f8b8a205184526c964a846413f0f5,RF,0.5,uncertainty_max_margin,MostUncertain_entropy,10,True,True,0.626745,0.0001,0.731586,0.55533,False,1,1,1,2020-03-30 02:44:14.138352
1,0.886476,0.178064,0.694585,0.0960108,389,681,5505db02f703b97fe357a43c3935854e,RF,0.5,random,random,10,True,True,0.983617,0.1,0.630251,0.503325,False,1,1,1,2020-03-29 04:18:52.626976
2,0.892026,0.170315,0.690901,0.152661,350,660,530aad7e4dbd7b481643e95c73b36e08,RF,0.5,uncertainty_entropy,dummy,10,True,True,0.989044,0.0001,0.983319,0.505681,False,1,1,1,2020-03-25 20:37:58.760206
3,0.897692,0.175238,0.675495,0.182239,470,700,e09458ae65c31c556a468cc306c79786,RF,0.5,uncertainty_max_margin,MostUncertain_lc,10,True,True,0.910481,0.0001,0.950655,0.840829,True,1,1,1,2020-03-26 03:30:16.658766
4,0.923243,0.119126,0.675035,0.0850815,306,477,991d7707b803c468550a70853ac591df,RF,0.5,uncertainty_max_margin,random,10,True,True,0.762838,0.1,0.807083,0.602319,True,1,1,1,2020-03-28 10:03:37.924608
5,0.887033,0.190948,0.666143,0.0778562,367,720,9f27436558c8ec3b1bfd5e384ca262a3,RF,0.5,uncertainty_entropy,random,10,True,True,0.962118,0.01,0.874329,0.518642,True,1,1,1,2020-03-25 17:03:54.947228
6,0.899062,0.163138,0.660799,0.241222,465,686,c44d7b137f166a0ee81e51ee9a8b1ffb,RF,0.5,uncertainty_entropy,dummy,10,True,True,0.946692,0.0001,0.581491,0.705227,True,1,1,1,2020-03-29 07:19:55.408426
7,0.879559,0.201374,0.656812,0.205274,537,798,f6d9653a6e56c8b8713754d8dcddd1f8,RF,0.5,uncertainty_max_margin,MostUncertain_max_margin,10,True,True,0.933502,0.1,0.81163,0.679446,True,1,1,1,2020-03-30 10:08:12.890548
8,0.889297,0.182915,0.653504,0.367543,773,857,75c8288a842fa7f0bea9be5d750df3ea,RF,0.5,uncertainty_max_margin,dummy,10,True,True,0.930328,0.01,0.965304,0.921202,False,1,1,1,2020-03-29 23:31:46.575135
9,0.907392,0.14771,0.64895,0.234878,480,691,437dd4df656be2498bbfb99659d699bc,RF,0.5,uncertainty_max_margin,MostUncertain_lc,10,True,True,0.757754,0.1,0.936987,0.52803,True,1,1,1,2020-03-26 14:00:16.590191


In [20]:
#  SELECT param_list_id, avg(fit_score), stddev(fit_score), avg(global_score), stddev(global_score), avg(start_set_size) as sss, count(*) FROM experimentresult WHERE start_set_size = 1 GROUP BY param_list_id ORDER BY 7 DESC, 4 DESC LIMIT 30;
from datetime import datetime, timedelta

# & (ExperimentResult.experiment_run_date > (datetime(2020, 3, 24, 14, 0))) # no stopping criterias
#  & (ExperimentResult.experiment_run_date > (datetime(2020, 3, 30, 12, 23))) # optics


results = (
    ExperimentResult.select(
        ExperimentResult.param_list_id,
        peewee.fn.AVG(ExperimentResult.fit_score).alias("avg_fit_score"),
        peewee.fn.STDDEV(ExperimentResult.fit_score).alias("stddev_fit_score"),
        peewee.fn.AVG(ExperimentResult.global_score_no_weak_acc).alias(
            "avg_global_score"
        ),
        peewee.fn.STDDEV(ExperimentResult.global_score_no_weak_acc).alias(
            "stddev_global_score"
        ),
        peewee.fn.AVG(ExperimentResult.amount_of_user_asked_queries).alias(
            "avg_amount_oracle"
        ),
        peewee.fn.STDDEV(ExperimentResult.amount_of_user_asked_queries).alias(
            "std_amount_oracle"
        ),
        # peewee.fn.AVG(ExperimentResult.start_set_size).alias("sss"),
        # peewee.fn.COUNT(ExperimentResult.id_field).alias("count"),
    )
    .where(
        (ExperimentResult.amount_of_user_asked_queries < 2000)
        # & (
        #    ExperimentResult.experiment_run_date > (datetime(2020, 3, 24, 14, 0))
        # )  # no stopping criterias
    )
    .group_by(ExperimentResult.param_list_id)
    .order_by(
        peewee.fn.COUNT(ExperimentResult.id_field).desc(),
        peewee.fn.AVG(ExperimentResult.global_score_no_weak_acc).desc(),
    )
    .limit(20)
)

table = []
id = 0
for result in results:
    data = {**{"id": id}, **vars(result)}
    data["param_list_id"] = data["__data__"]["param_list_id"]
    del data["__data__"]
    del data["_dirty"]
    del data["__rel__"]

    # get one param_list_id

    one_param_list_id_result = (
        ExperimentResult.select(
            ExperimentResult.classifier,
            ExperimentResult.test_fraction,
            ExperimentResult.sampling,
            ExperimentResult.cluster,
            ExperimentResult.nr_queries_per_iteration,
            ExperimentResult.with_uncertainty_recommendation,
            ExperimentResult.with_cluster_recommendation,
            ExperimentResult.uncertainty_recommendation_certainty_threshold,
            ExperimentResult.uncertainty_recommendation_ratio,
            ExperimentResult.cluster_recommendation_minimum_cluster_unity_size,
            ExperimentResult.cluster_recommendation_ratio_labeled_unlabeled,
            ExperimentResult.allow_recommendations_after_stop,
            ExperimentResult.stopping_criteria_uncertainty,
            ExperimentResult.stopping_criteria_acc,
            ExperimentResult.stopping_criteria_std,
            ExperimentResult.experiment_run_date,
        )
        .where(ExperimentResult.param_list_id == data["param_list_id"])
        .limit(1)
    )[0]

    data = {**data, **vars(one_param_list_id_result)["__data__"]}

    table.append(data)
    id += 1

display(HTML(tabulate(table, headers="keys", tablefmt="html")))

id,avg_fit_score,stddev_fit_score,avg_global_score,stddev_global_score,avg_amount_oracle,std_amount_oracle,param_list_id,classifier,test_fraction,sampling,cluster,nr_queries_per_iteration,with_uncertainty_recommendation,with_cluster_recommendation,uncertainty_recommendation_certainty_threshold,uncertainty_recommendation_ratio,cluster_recommendation_minimum_cluster_unity_size,cluster_recommendation_ratio_labeled_unlabeled,allow_recommendations_after_stop,stopping_criteria_uncertainty,stopping_criteria_acc,stopping_criteria_std,experiment_run_date
0,0.888658,0.184813,0.794182,0.184677,631,697,8fb85cb6e86c05cb32c063ca11cbac8b,RF,0.5,uncertainty_max_margin,dummy,10,True,True,0.934121,0.01,0.953661,0.824976,False,0.366747,0.425233,0.155776,2020-03-17 15:36:56.591785
1,0.890569,0.169373,0.787625,0.0883665,337,646,af335110ccb9c4cceddab81b3f8d67a0,RF,0.5,uncertainty_max_margin,random,10,True,True,0.976293,0.01,0.52122,0.565546,False,0.548729,0.287548,0.284917,2020-03-19 00:49:15.379897
2,0.917048,0.127483,0.780857,0.159474,378,516,30f54d5d3193c0a07c4f8b1fcce16b24,RF,0.5,random,MostUncertain_max_margin,10,True,True,0.831933,0.001,0.695587,0.672841,False,0.021155,0.463451,0.569557,2020-03-18 23:15:07.940069
3,0.895887,0.188206,0.755298,0.177167,460,739,389ece342a6b528a260dfd2ab8dcf4f6,RF,0.5,uncertainty_max_margin,dummy,10,False,True,0.797496,0.1,0.541837,0.676506,False,0.342578,0.761308,0.68608,2020-03-17 13:08:18.425756
4,0.92432,0.113858,0.74608,0.0848814,293,444,654478bcfe01e9d9febe75501bd17c61,RF,0.5,uncertainty_max_margin,random,10,True,True,0.787306,0.1,0.520178,0.989303,False,0.0500082,0.0169665,0.289742,2020-03-14 19:58:02.003212
5,0.882311,0.199251,0.745853,0.0904448,351,764,16fa96ecc0b8497e8d6b5bc51f564e96,RF,0.5,uncertainty_lc,MostUncertain_lc,10,True,True,0.960457,0.001,0.864465,0.688866,True,0.134078,0.0978165,0.348725,2020-03-21 18:28:58.020991
6,0.864614,0.140761,0.744129,0.211865,807,551,435f082ff3328d990b72c89b781e4b5e,RF,0.5,uncertainty_max_margin,MostUncertain_max_margin,10,True,True,0.550141,0.01,0.904487,0.809504,True,0.692334,0.0107102,0.249354,2020-03-21 22:12:24.263075
7,0.85598,0.215929,0.729321,0.0970913,353,805,da5df4de6fd3c7b754c8cbbfe8f714cc,RF,0.5,uncertainty_entropy,random,10,True,True,0.963473,0.01,0.940184,0.662299,True,0.656151,0.308494,0.482047,2020-03-15 15:47:11.920027
8,0.917097,0.122709,0.727688,0.105446,346,516,9efea32a12854eb6aac801a44c3aa9b1,RF,0.5,uncertainty_max_margin,random,10,True,True,0.796547,0.01,0.678392,0.854594,True,0.918132,0.529353,0.18782,2020-03-16 08:50:10.282344
9,0.918191,0.129225,0.727088,0.149941,365,548,af121819cf904c847ed7978b44b7d770,RF,0.5,uncertainty_lc,MostUncertain_lc,10,True,True,0.775192,0.01,0.998216,0.57935,True,0.0511036,0.566659,0.427689,2020-03-17 14:03:27.926084


In [10]:
# SELECT id_field, param_list_id, dataset_path, start_set_size as sss, sampling, cluster, allow_recommendations_after_stop as SA, stopping_criteria_uncertainty as SCU, stopping_criteria_std as SCS, stopping_criteria_acc as SCA, amount_of_user_asked_queries as "#q", acc_test, fit_score, global_score_norm, thread_id, end_time from experimentresult where param_list_id='31858014d685a3f1ba3e4e32690ddfc3' order by end_time, fit_score desc, param_list_id;
loaded_data = {}


def pre_fetch_data(top_n=0):
    best_param_list_id = table[top_n]["param_list_id"]

    results = ExperimentResult.select().where(
        ExperimentResult.param_list_id == best_param_list_id
    )

    loaded_data[top_n] = []
    for result in results:
        loaded_data[top_n].append(result)
    print("Loaded Top " + str(top_n) + " data")


pre_fetch_data(0)

Loaded Top 0 data


In [11]:
def visualise_top_n(top_n=0):
    charts = []

    alt.renderers.enable("html")

    for result in loaded_data[top_n][:]:
        metrics = loads(result.metrics_per_al_cycle)
        test_data_metrics = [
            metrics["test_data_metrics"][0][f][0]["weighted avg"]
            for f in range(0, len(metrics["test_data_metrics"][0]))
        ]
        test_acc = [
            metrics["test_data_metrics"][0][f][0]["accuracy"]
            for f in range(0, len(metrics["test_data_metrics"][0]))
        ]

        data = pd.DataFrame(
            {
                "iteration": range(0, len(metrics["all_unlabeled_roc_auc_scores"])),
                "all_unlabeled_roc_auc_scores": metrics["all_unlabeled_roc_auc_scores"],
                "query_length": metrics["query_length"],
                "recommendation": metrics["recommendation"],
                "query_strong_accuracy_list": metrics["query_strong_accuracy_list"],
                "f1": [i["f1-score"] for i in test_data_metrics],
                "test_acc": test_acc,
                #'asked_queries': [sum(metrics['query_length'][:i]) for i in range(0, len(metrics['query_length']))],
            }
        )

        # bar width
        data["asked_queries"] = data["query_length"].cumsum()
        data["asked_queries_end"] = data["asked_queries"].shift(fill_value=0)

        # print(data[['asked_queries', 'query_length']])

        data["recommendation"] = data["recommendation"].replace(
            {
                "A": "Oracle",
                "C": "Weak Cluster",
                "U": "Weak Certainty",
                "G": "Ground Truth",
            }
        )

        # data = data[:100]

        # calculate global score OHNE

        chart = (
            alt.Chart(data)
            .mark_rect(
                # point=True,
                # line=True,
                # interpolate='step-after',
            )
            .encode(
                x=alt.X("asked_queries_end", title="#asked queries (weak and oracle)"),
                x2="asked_queries",
                color=alt.Color("recommendation", scale=alt.Scale(scheme="tableau10")),
                tooltip=[
                    "iteration",
                    "f1",
                    "test_acc",
                    "all_unlabeled_roc_auc_scores",
                    "query_strong_accuracy_list",
                    "query_length",
                    "recommendation",
                ],
                # scale=alt.Scale(domain=[0,1])
            )
            .properties(title=result.dataset_name)
            .interactive()
        )
        charts.append(
            alt.hconcat(
                chart.encode(
                    alt.Y(
                        "all_unlabeled_roc_auc_scores", scale=alt.Scale(domain=[0, 1])
                    )
                ).properties(title=result.dataset_name + ": roc_auc"),
                # chart.encode(alt.Y('f1', scale=alt.Scale(domain=[0,1]))).properties(title=result.dataset_name + ': f1'),
                chart.encode(
                    alt.Y("test_acc", scale=alt.Scale(domain=[0, 1]))
                ).properties(title=result.dataset_name + ": test_acc"),
            )
        )

    return alt.vconcat(*charts).configure()


visualise_top_n(0)

In [12]:
pre_fetch_data(1)
visualise_top_n(1)

Loaded Top 1 data


In [13]:
pre_fetch_data(2)
visualise_top_n(2)

Loaded Top 2 data


In [14]:
pre_fetch_data(3)
visualise_top_n(3)

Loaded Top 3 data


In [15]:
pre_fetch_data(4)
visualise_top_n(4)

Loaded Top 4 data


In [16]:
pre_fetch_data(5)
visualise_top_n(5)

Loaded Top 5 data
