In [4]:
%load_ext lab_black
import argparse
import contextlib
import datetime
import io
import logging
import multiprocessing
import os
import random
import sys
from itertools import chain, combinations
from timeit import default_timer as timer

import altair as alt
import altair_viewer
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import peewee
from evolutionary_search import EvolutionaryAlgorithmSearchCV
from json_tricks import dumps, loads
from playhouse.shortcuts import model_to_dict
from scipy.stats import randint, uniform
from sklearn.datasets import load_iris
from tabulate import tabulate
from IPython.core.display import display, HTML

from active_learning.cluster_strategies import (
    DummyClusterStrategy,
    MostUncertainClusterStrategy,
    RandomClusterStrategy,
    RoundRobinClusterStrategy,
)
from active_learning.dataStorage import DataStorage
from active_learning.experiment_setup_lib import (
    ExperimentResult,
    classification_report_and_confusion_matrix,
    get_db,
    get_single_al_run_stats_row,
    get_single_al_run_stats_table_header,
)
from active_learning.sampling_strategies import (
    BoundaryPairSampler,
    CommitteeSampler,
    RandomSampler,
    UncertaintySampler,
)

alt.renderers.enable("altair_viewer")
#  alt.renderers.enable('vegascope')

config = {
    "datasets_path": "../datasets",
    "db": "tunnel",
    "param_list_id": "best_global_score",
}

db = get_db(db_name_or_type=config["db"])

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [5]:
# select count(*), dataset_name from experimentresult group by dataset_name;
results = ExperimentResult.select(
    ExperimentResult.dataset_name,
    peewee.fn.COUNT(ExperimentResult.id_field).alias("dataset_name_count"),
).group_by(ExperimentResult.dataset_name)

for result in results:
    print("{:>4,d} {}".format(result.dataset_name_count, result.dataset_name))

184,746 dwtc
4,942 hiva
4,956 ibn_sina
4,933 orange
4,901 sylva
4,871 zebra


In [15]:
#  SELECT param_list_id, avg(fit_score), stddev(fit_score), avg(global_score), stddev(global_score), avg(start_set_size) as sss, count(*) FROM experimentresult WHERE start_set_size = 1 GROUP BY param_list_id ORDER BY 7 DESC, 4 DESC LIMIT 30;
from datetime import datetime, timedelta

results = (
    ExperimentResult.select(ExperimentResult)
    .where(
        (ExperimentResult.amount_of_user_asked_queries < 1000)
        & (ExperimentResult.dataset_name == "dwtc")
        # & (ExperimentResult.param_list_id == "d983fac44579291e5b529aa81cbe7aec")
        & (
            ExperimentResult.experiment_run_date > (datetime(2020, 5, 25, 14, 0))
        )  # no stopping criterias
    )
    .order_by(
        # ExperimentResult.id_field.desc(),
        ExperimentResult.acc_test.desc(),
    )
    .limit(200)
)

# INTERESSANT: selbst wenn es keine Einschränkung bei der Berechnung auf weak/no_weak gibt werden Cluster verwendet!


table = []
id = 0
for result in results:
    data = {**{"id": id}, **vars(result)["__data__"]}
    # data["param_list_id"] = data["__data__"]["param_list_id"]
    # del data["__data__"]
    # del data["_dirty"]
    # del data["__rel__"]

    # data = {**data, **vars(one_param_list_id_result)["__data__"]}
    del data["metrics_per_al_cycle"]
    del data["confusion_matrix_test"]
    del data["confusion_matrix_train"]
    del data["classification_report_train"]
    del data["classification_report_test"]
    table.append(data)
    id += 1

display(HTML(tabulate(table, headers="keys", tablefmt="html")))

id,id_field,datasets_path,dataset_name,db_name_or_type,classifier,cores,test_fraction,sampling,random_seed,cluster,nr_learning_iterations,nr_queries_per_iteration,start_set_size,with_uncertainty_recommendation,with_cluster_recommendation,with_snuba_lite,uncertainty_recommendation_certainty_threshold,uncertainty_recommendation_ratio,snuba_lite_minimum_heuristic_accuracy,cluster_recommendation_minimum_cluster_unity_size,cluster_recommendation_ratio_labeled_unlabeled,amount_of_user_asked_queries,allow_recommendations_after_stop,stopping_criteria_uncertainty,stopping_criteria_acc,stopping_criteria_std,experiment_run_date,fit_time,acc_train,acc_test,fit_score,roc_auc,global_score_with_weak_roc_auc_old,global_score_with_weak_roc_auc_norm_old,global_score_no_weak_roc_auc,global_score_no_weak_acc,global_score_with_weak_roc_auc,global_score_with_weak_acc,global_score_no_weak_roc_auc_norm,global_score_no_weak_acc_norm,global_score_with_weak_roc_auc_norm,global_score_with_weak_acc_norm,param_list_id,cv_fit_score_mean,cv_fit_score_std,thread_id,end_time
0,160081,../datasets,dwtc,jg,RF,20,0.5,uncertainty_max_margin,1,dummy,1000000,10,1,True,True,False,0.92,0.0001,0,1.0,0.67,210,True,1,1,1,2020-05-26 05:58:23.511763,44.0331,1,0.81793,0.869182,0.935721,0,0,0.849662,0.616148,0.878888,0.666784,0.838597,0.607267,0.862866,0.645706,af765c0d351d29a89946a7974d44fe87,,,139784867505984,2020-05-26 05:58:23.511732
1,164328,../datasets,dwtc,jg,RF,20,0.5,uncertainty_max_margin,1,dummy,1000000,10,1,True,True,False,0.91,0.0001,0,1.0,0.6,210,True,1,1,1,2020-05-26 08:47:00.009840,43.479,1,0.81793,0.869182,0.935721,0,0,0.849662,0.616148,0.878888,0.666784,0.838597,0.607267,0.862866,0.645706,e18a40f75d5b437faeaebf6f06c4513d,,,140403013629760,2020-05-26 08:47:00.009804
2,174520,../datasets,dwtc,jg,RF,20,0.5,uncertainty_max_margin,1,dummy,1000000,10,1,True,True,False,0.91,0.001,0,1.0,0.95,210,True,1,1,1,2020-05-26 16:00:30.534297,43.6269,1,0.81793,0.869182,0.935721,0,0,0.849662,0.616148,0.878888,0.666784,0.838597,0.607267,0.862866,0.645706,a40dc4eb067e1d92d64eaed08ac79790,,,140619543336768,2020-05-26 16:00:30.534260
3,166515,../datasets,dwtc,jg,RF,20,0.5,uncertainty_max_margin,1,dummy,1000000,10,1,True,True,False,0.92,0.001,0,1.0,0.72,210,True,1,1,1,2020-05-26 10:16:10.962942,44.1977,1,0.81793,0.869182,0.935721,0,0,0.849662,0.616148,0.878888,0.666784,0.838597,0.607267,0.862866,0.645706,3b2fe2f7cbf9ccb42511edb414bc08cc,,,139881576654656,2020-05-26 10:16:10.962910
4,154304,../datasets,dwtc,jg,RF,20,0.5,uncertainty_max_margin,1,dummy,1000000,10,1,True,True,False,0.91,0.001,0,1.0,0.9,210,True,1,1,1,2020-05-26 01:49:43.744036,42.8928,1,0.81793,0.869182,0.935721,0,0,0.849662,0.616148,0.878888,0.666784,0.838597,0.607267,0.862866,0.645706,2db900d6d28b12c8965ce0f64e619fba,,,140313815451456,2020-05-26 01:49:43.744004
5,195827,../datasets,dwtc,jg,RF,20,0.5,uncertainty_max_margin,1,dummy,1000000,10,1,True,True,False,0.91,0.001,0,1.0,0.86,210,True,1,1,1,2020-05-27 07:10:48.345567,44.2816,1,0.81793,0.869182,0.935721,0,0,0.849662,0.616148,0.878888,0.666784,0.838597,0.607267,0.862866,0.645706,eff7c1407a14ce1c6b1876c9c1cab936,,,140701709408064,2020-05-27 07:10:48.345533
6,169340,../datasets,dwtc,jg,RF,20,0.5,uncertainty_max_margin,1,dummy,1000000,10,1,True,True,False,0.91,0.001,0,1.0,0.9,210,True,1,1,1,2020-05-26 12:16:46.160475,43.2145,1,0.81793,0.869182,0.935721,0,0,0.849662,0.616148,0.878888,0.666784,0.838597,0.607267,0.862866,0.645706,2db900d6d28b12c8965ce0f64e619fba,,,140192852444992,2020-05-26 12:16:46.160441
7,186307,../datasets,dwtc,jg,RF,20,0.5,uncertainty_max_margin,1,dummy,1000000,10,1,True,True,False,0.92,0.001,0,1.0,0.62,210,True,1,1,1,2020-05-27 00:35:44.712425,46.6161,1,0.81793,0.869182,0.935721,0,0,0.849662,0.616148,0.878888,0.666784,0.838597,0.607267,0.862866,0.645706,ab645612dcc7642b0a71a5c7f1686268,,,139634253002560,2020-05-27 00:35:44.712391
8,184434,../datasets,dwtc,jg,RF,20,0.5,uncertainty_max_margin,1,dummy,1000000,10,1,True,True,False,0.92,0.0001,0,1.0,0.67,210,True,1,1,1,2020-05-26 23:18:21.046710,46.4747,1,0.81793,0.869182,0.935721,0,0,0.849662,0.616148,0.878888,0.666784,0.838597,0.607267,0.862866,0.645706,af765c0d351d29a89946a7974d44fe87,,,139940071814976,2020-05-26 23:18:21.046675
9,183836,../datasets,dwtc,jg,RF,20,0.5,uncertainty_max_margin,1,dummy,1000000,10,1,True,True,False,0.91,0.001,0,1.0,0.95,210,True,1,1,1,2020-05-26 22:52:46.713590,44.6192,1,0.81793,0.869182,0.935721,0,0,0.849662,0.616148,0.878888,0.666784,0.838597,0.607267,0.862866,0.645706,a40dc4eb067e1d92d64eaed08ac79790,,,140676089448256,2020-05-26 22:52:46.713553
