In [2]:
%load_ext lab_black
import argparse
import contextlib
import datetime
import io
import logging
import multiprocessing
import os
import random
import sys
from itertools import chain, combinations
from timeit import default_timer as timer

import altair as alt
import altair_viewer
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import peewee
from evolutionary_search import EvolutionaryAlgorithmSearchCV
from json_tricks import dumps, loads
from playhouse.shortcuts import model_to_dict
from scipy.stats import randint, uniform
from sklearn.datasets import load_iris
from tabulate import tabulate
from IPython.core.display import display, HTML

from active_learning.cluster_strategies import (
    DummyClusterStrategy,
    MostUncertainClusterStrategy,
    RandomClusterStrategy,
    RoundRobinClusterStrategy,
)
from active_learning.dataStorage import DataStorage
from active_learning.experiment_setup_lib import (
    ExperimentResult,
    classification_report_and_confusion_matrix,
    get_db,
    get_single_al_run_stats_row,
    get_single_al_run_stats_table_header,
    load_and_prepare_X_and_Y,
)
from active_learning.sampling_strategies import (
    BoundaryPairSampler,
    CommitteeSampler,
    RandomSampler,
    UncertaintySampler,
)

alt.renderers.enable("altair_viewer")
#  alt.renderers.enable('vegascope')

config = {
    "datasets_path": "../datasets",
    "db": "tunnel",
    "param_list_id": "best_global_score",
}

db = get_db(db_name_or_type=config["db"])



In [3]:
# select count(*), dataset_name from experimentresult group by dataset_name;
results = ExperimentResult.select(
    ExperimentResult.dataset_name,
    peewee.fn.COUNT(ExperimentResult.id_field).alias("dataset_name_count"),
).group_by(ExperimentResult.dataset_name)

for result in results:
    print("{:>4,d} {}".format(result.dataset_name_count, result.dataset_name))

4,933 orange
4,901 sylva
4,942 hiva
4,956 ibn_sina
4,871 zebra
60,721 dwtc


In [4]:
#  SELECT param_list_id, avg(fit_score), stddev(fit_score), avg(global_score), stddev(global_score), avg(start_set_size) as sss, count(*) FROM experimentresult WHERE start_set_size = 1 GROUP BY param_list_id ORDER BY 7 DESC, 4 DESC LIMIT 30;
from datetime import datetime, timedelta

results = (
    ExperimentResult.select(
        ExperimentResult.param_list_id,
        ExperimentResult.acc_test,
        ExperimentResult.fit_score,
        ExperimentResult.global_score_no_weak_acc,
        ExperimentResult.amount_of_user_asked_queries,
        ExperimentResult.classifier,
        ExperimentResult.global_score_no_weak_acc,
        ExperimentResult.amount_of_user_asked_queries,
        ExperimentResult.dataset_name,
        ExperimentResult.test_fraction,
        ExperimentResult.sampling,
        ExperimentResult.cluster,
        ExperimentResult.nr_queries_per_iteration,
        ExperimentResult.with_uncertainty_recommendation,
        ExperimentResult.with_cluster_recommendation,
        ExperimentResult.uncertainty_recommendation_certainty_threshold,
        ExperimentResult.uncertainty_recommendation_ratio,
        ExperimentResult.cluster_recommendation_minimum_cluster_unity_size,
        ExperimentResult.cluster_recommendation_ratio_labeled_unlabeled,
        ExperimentResult.allow_recommendations_after_stop,
        ExperimentResult.stopping_criteria_uncertainty,
        ExperimentResult.stopping_criteria_acc,
        ExperimentResult.stopping_criteria_std,
        ExperimentResult.experiment_run_date,
    )
    .where(
        (ExperimentResult.amount_of_user_asked_queries < 1000)
        & (ExperimentResult.dataset_name == "zebra")
        & (
            ExperimentResult.experiment_run_date > (datetime(2020, 3, 24, 14, 0))
        )  # no stopping criterias
    )
    .order_by(
        # ExperimentResult.id_field.desc(),
        ExperimentResult.acc_test.desc(),
    )
    .limit(20)
)

# INTERESSANT: selbst wenn es keine Einschränkung bei der Berechnung auf weak/no_weak gibt werden Cluster verwendet!


table = []
id = 0
for result in results:
    data = {**{"id": id}, **vars(result)["__data__"]}
    # data["param_list_id"] = data["__data__"]["param_list_id"]
    # del data["__data__"]
    # del data["_dirty"]
    # del data["__rel__"]

    # data = {**data, **vars(one_param_list_id_result)["__data__"]}

    table.append(data)
    id += 1

display(HTML(tabulate(table, headers="keys", tablefmt="html")))

id,param_list_id,acc_test,fit_score,global_score_no_weak_acc,amount_of_user_asked_queries,classifier,dataset_name,test_fraction,sampling,cluster,nr_queries_per_iteration,with_uncertainty_recommendation,with_cluster_recommendation,uncertainty_recommendation_certainty_threshold,uncertainty_recommendation_ratio,cluster_recommendation_minimum_cluster_unity_size,cluster_recommendation_ratio_labeled_unlabeled,allow_recommendations_after_stop,stopping_criteria_uncertainty,stopping_criteria_acc,stopping_criteria_std,experiment_run_date
0,400ffd284ad157e1871abf681de85ec9,0.958008,0.963039,0.913842,980,RF,zebra,0.5,uncertainty_max_margin,MostUncertain_lc,10,True,True,0.951518,0.001,0.945348,0.578172,False,1,1,1,2020-03-26 09:10:43.852526
1,14021e1c72e255c922df4579322009a8,0.957943,0.96381,0.913958,930,RF,zebra,0.5,uncertainty_lc,MostUncertain_max_margin,10,True,True,0.927707,0.01,0.80951,0.790369,True,1,1,1,2020-03-29 23:32:41.663255
2,a66cb8f5f97e1a0c0fc6740dfbc83327,0.957488,0.971131,0.909296,456,RF,zebra,0.5,uncertainty_entropy,MostUncertain_entropy,10,True,True,0.733224,0.1,0.745651,0.817867,False,1,1,1,2020-03-25 15:29:23.000456
3,bfe5b72562d21e138376b9c88f4ff3ca,0.956447,0.969615,0.895696,518,RF,zebra,0.5,uncertainty_entropy,MostUncertain_lc,10,True,True,0.980038,0.0001,0.602102,0.928569,False,1,1,1,2020-03-30 10:54:02.237702
4,b87e70cebd5290dce22e08fd2d828240,0.956122,0.974419,0.893464,202,RF,zebra,0.5,uncertainty_lc,MostUncertain_entropy,10,True,True,0.695022,0.1,0.908576,0.863314,True,1,1,1,2020-03-28 07:24:35.035273
5,fb92945772d12d33e3b24084f251c0c6,0.955926,0.967256,0.894563,650,RF,zebra,0.5,uncertainty_lc,MostUncertain_lc,10,True,True,0.902119,0.0001,0.754432,0.712006,True,1,1,1,2020-03-26 09:26:57.642647
6,8f166777059c183085028e93faaf10ba,0.955178,0.974897,0.882877,140,RF,zebra,0.5,uncertainty_lc,MostUncertain_lc,10,True,True,0.778951,0.1,0.789256,0.614343,True,1,1,1,2020-03-26 11:34:35.539123
7,9e84b06042e8cb0c03b8d7ec91475098,0.955113,0.976264,0.770321,50,RF,zebra,0.5,uncertainty_lc,MostUncertain_max_margin,10,True,True,0.711062,0.1,0.619883,0.704723,False,1,1,1,2020-03-29 01:10:20.167619
8,62f3825485db72ee7fee6f39b095db36,0.954951,0.965326,0.878154,740,RF,zebra,0.5,uncertainty_lc,MostUncertain_entropy,10,True,True,0.596726,0.001,0.807041,0.848841,True,1,1,1,2020-03-30 09:41:27.146305
9,1dc8d89e2277fb576700392ffcf514a3,0.95482,0.975178,0.828727,110,RF,zebra,0.5,uncertainty_lc,MostUncertain_entropy,10,True,True,0.798407,0.1,0.677045,0.614915,False,1,1,1,2020-03-27 19:54:59.570851


In [5]:
# SELECT id_field, param_list_id, dataset_path, start_set_size as sss, sampling, cluster, allow_recommendations_after_stop as SA, stopping_criteria_uncertainty as SCU, stopping_criteria_std as SCS, stopping_criteria_acc as SCA, amount_of_user_asked_queries as "#q", acc_test, fit_score, global_score_norm, thread_id, end_time from experimentresult where param_list_id='31858014d685a3f1ba3e4e32690ddfc3' order by end_time, fit_score desc, param_list_id;
loaded_data = {}


def pre_fetch_data(top_n=0):
    best_param_list_id = table[top_n]["param_list_id"]

    results = (
        ExperimentResult.select()
        .where(ExperimentResult.param_list_id == best_param_list_id)
        .order_by(ExperimentResult.dataset_name)
    )

    loaded_data[top_n] = []
    for result in results:
        loaded_data[top_n].append(result)
    print("Loaded Top " + str(top_n) + " data")


pre_fetch_data(0)

Loaded Top 0 data


In [6]:
def visualise_top_n(top_n=0):
    charts = []

    alt.renderers.enable("html")

    for result in loaded_data[top_n][:]:
        metrics = loads(result.metrics_per_al_cycle)
        test_data_metrics = [
            metrics["test_data_metrics"][0][f][0]["weighted avg"]
            for f in range(0, len(metrics["test_data_metrics"][0]))
        ]
        test_acc = [
            metrics["test_data_metrics"][0][f][0]["accuracy"]
            for f in range(0, len(metrics["test_data_metrics"][0]))
        ]

        data = pd.DataFrame(
            {
                "iteration": range(0, len(metrics["all_unlabeled_roc_auc_scores"])),
                "all_unlabeled_roc_auc_scores": metrics["all_unlabeled_roc_auc_scores"],
                "query_length": metrics["query_length"],
                "recommendation": metrics["recommendation"],
                "query_strong_accuracy_list": metrics["query_strong_accuracy_list"],
                "f1": [i["f1-score"] for i in test_data_metrics],
                "test_acc": test_acc,
                #'asked_queries': [sum(metrics['query_length'][:i]) for i in range(0, len(metrics['query_length']))],
            }
        )

        # bar width
        data["asked_queries"] = data["query_length"].cumsum()
        data["asked_queries_end"] = data["asked_queries"].shift(fill_value=0)

        # print(data[['asked_queries', 'query_length']])

        data["recommendation"] = data["recommendation"].replace(
            {
                "A": "Oracle",
                "C": "Weak Cluster",
                "U": "Weak Certainty",
                "G": "Ground Truth",
            }
        )

        # data = data[:100]

        # calculate global score OHNE

        chart = (
            alt.Chart(data)
            .mark_rect(
                # point=True,
                # line=True,
                # interpolate='step-after',
            )
            .encode(
                x=alt.X("asked_queries_end", title="#asked queries (weak and oracle)"),
                x2="asked_queries",
                color=alt.Color("recommendation", scale=alt.Scale(scheme="tableau10")),
                tooltip=[
                    "iteration",
                    "f1",
                    "test_acc",
                    "all_unlabeled_roc_auc_scores",
                    "query_strong_accuracy_list",
                    "query_length",
                    "recommendation",
                ],
                # scale=alt.Scale(domain=[0,1])
            )
            .properties(title=result.dataset_name)
            .interactive()
        )
        charts.append(
            alt.hconcat(
                chart.encode(
                    alt.Y(
                        "all_unlabeled_roc_auc_scores", scale=alt.Scale(domain=[0, 1])
                    )
                ).properties(title=result.dataset_name + ": roc_auc"),
                # chart.encode(alt.Y('f1', scale=alt.Scale(domain=[0,1]))).properties(title=result.dataset_name + ': f1'),
                chart.encode(
                    alt.Y("test_acc", scale=alt.Scale(domain=[0, 1]))
                ).properties(title=result.dataset_name + ": test_acc"),
            )
        )

    return alt.vconcat(*charts).configure()


# visualise_top_n(0)

In [7]:
def compare_data(datasets):
    charts = []

    alt.renderers.enable("html")
    all_data = pd.DataFrame()

    for i, dataset in enumerate(datasets):
        for result in dataset:
            if result.dataset_name != "dwtc":
                continue
            metrics = loads(result.metrics_per_al_cycle)
            test_data_metrics = [
                metrics["test_data_metrics"][0][f][0]["weighted avg"]
                for f in range(0, len(metrics["test_data_metrics"][0]))
            ]
            test_acc = [
                metrics["test_data_metrics"][0][f][0]["accuracy"]
                for f in range(0, len(metrics["test_data_metrics"][0]))
            ]

            data = pd.DataFrame(
                {
                    "iteration": range(0, len(metrics["all_unlabeled_roc_auc_scores"])),
                    "all_unlabeled_roc_auc_scores": metrics[
                        "all_unlabeled_roc_auc_scores"
                    ],
                    "query_length": metrics["query_length"],
                    "recommendation": metrics["recommendation"],
                    "query_strong_accuracy_list": metrics["query_strong_accuracy_list"],
                    "f1": [i["f1-score"] for i in test_data_metrics],
                    "test_acc": test_acc,
                    "top_n": str(i),
                    #'asked_queries': [sum(metrics['query_length'][:i]) for i in range(0, len(metrics['query_length']))],
                }
            )

            # bar width
            data["asked_queries"] = data["query_length"].cumsum()
            data["asked_queries_end"] = data["asked_queries"].shift(fill_value=0)

            # print(data[['asked_queries', 'query_length']])

            data["recommendation"] = data["recommendation"].replace(
                {
                    "A": "Oracle",
                    "C": "Weak Cluster",
                    "U": "Weak Certainty",
                    "G": "Ground Truth",
                }
            )

            all_data = pd.concat([all_data, data])

        # data = data[:100]

        # calculate global score OHNE

    points = (
        alt.Chart(all_data,)
        .mark_point()
        .encode(
            x="asked_queries:Q",
            y="test_acc:Q",
            shape="recommendation:N",
            color="recommendation:N",
        )
    )

    lines = (
        alt.Chart(all_data,)
        .mark_line(interpolate="step-before")
        .encode(
            x=alt.X(
                "asked_queries:Q", scale=alt.Scale(domain=[-1, 2900], type="linear")
            ),
            y=alt.Y("test_acc:Q", scale=alt.Scale(domain=[0.3, 0.82], type="linear")),
            color="top_n:N",
            # shape="top_n",
            # strokeDash="top_n",
            # shape="recommendation",
            # color="recommendation:N",
        )
    )

    return (
        alt.layer(points, lines)
        .resolve_scale(color="independent", shape="independent")
        .configure_legend(
            orient="bottom-right",
            padding=10,
            fillColor="#f1f1f1",
            labelOpacity=0.9,
            labelOverlap=True,
            # opacity=0.6,
        )
        .properties(title="Comparison of ")
        .interactive()
    )


compare_data(
    [
        loaded_data[0],
        loaded_data[1],
        # loaded_data[2],
    ]  # , loaded_data[3], loaded_data[4]]
)

KeyError: 1

In [8]:
results = (
    ExperimentResult.select(
        ExperimentResult.amount_of_user_asked_queries, ExperimentResult.acc_test,
    ).where(
        # (ExperimentResult.stopping_criteria_acc == 1)
        # & (ExperimentResult.stopping_criteria_std == 1)
        # & (ExperimentResult.stopping_criteria_uncertainty == 1)
        # & (ExperimentResult.sampling == "random")
        #  & (ExperimentResult.cluster == "random")
        # & (ExperimentResult.amount_of_user_asked_queries == 10)
        # & (ExperimentResult.with_cluster_recommendation == True)
        # & (ExperimentResult.with_uncertainty_recommendation == True)
        (ExperimentResult.amount_of_user_asked_queries < 10)
        & (ExperimentResult.dataset_name == "dwtc")
        & (
            ExperimentResult.experiment_run_date > (datetime(2020, 3, 24, 14, 0))
        )  # no stopping criterias
    )
    #  .group_by(ExperimentResult.amount_of_user_asked_queries)
    .order_by(
        ExperimentResult.amount_of_user_asked_queries.desc(),
        ExperimentResult.acc_test.desc()
        #  peewee.fn.MAX(ExperimentResult.acc_test)
        #  peewee.fn.COUNT(ExperimentResult.).desc(),
        #  peewee.fn.AVG(ORDER_BY).desc(),
    )
    #  .limit(10)
)
data = []
for result in results:
    data.append((result.amount_of_user_asked_queries, result.acc_test))
    #  print("{}\t{}".format(result.amount_of_user_asked_queries, result.max))
pd.DataFrame(data)

Unnamed: 0,0,1
0,9,0.515057
1,9,0.506750
2,9,0.481828
3,9,0.472828
4,9,0.455175
...,...,...
7208,0,0.111803
7209,0,0.110765
7210,0,0.109034
7211,0,0.099342


In [10]:
def better_results_top(top_n, budget, weak_clust, weak_cert):
    # select best result for budget of 1500 without WS
    results = (
        ExperimentResult.select(
            ExperimentResult.param_list_id,
            peewee.fn.AVG(ExperimentResult.fit_score).alias("avg_fit_score"),
            peewee.fn.STDDEV(ExperimentResult.fit_score).alias("stddev_fit_score"),
            peewee.fn.AVG(ExperimentResult.global_score_no_weak_acc).alias(
                "avg_global_score"
            ),
            peewee.fn.STDDEV(ExperimentResult.global_score_no_weak_acc).alias(
                "stddev_global_score"
            ),
            peewee.fn.AVG(ExperimentResult.amount_of_user_asked_queries).alias(
                "avg_amount_oracle"
            ),
            peewee.fn.STDDEV(ExperimentResult.amount_of_user_asked_queries).alias(
                "std_amount_oracle"
            ),
            peewee.fn.COUNT(ExperimentResult.param_list_id).alias("count"),
        )
        .where(
            (ExperimentResult.amount_of_user_asked_queries < budget)
            & (ExperimentResult.dataset_name == "dwtc")
            # & (ExperimentResult.experiment_run_date > (datetime(2020, 3, 24, 14, 0)))
            # & (ExperimentResult.experiment_run_date > (datetime(2020, 5, 8, 9, 20)))
            & (ExperimentResult.with_cluster_recommendation == weak_clust)
            & (ExperimentResult.with_uncertainty_recommendation == weak_cert)
            # & (peewee.fn.COUNT(ExperimentResult.id_field) == 3)
            # no stopping criterias
        )
        .group_by(ExperimentResult.param_list_id)
        .order_by(
            peewee.fn.COUNT(ExperimentResult.id_field).desc(),
            peewee.fn.AVG(ExperimentResult.acc_test).desc(),
        )
        .limit(1)
        .offset(top_n)
    )

    table = []
    id = 0
    for result in results:
        data = {**{"id": id}, **vars(result)}
        data["param_list_id"] = data["__data__"]["param_list_id"]
        del data["__data__"]
        del data["_dirty"]
        del data["__rel__"]
        table.append(data)
        id += 1

    display(HTML(tabulate(table, headers="keys", tablefmt="html")))

    best_param_list_id = table[0]["param_list_id"]

    results = (
        ExperimentResult.select()
        .where(ExperimentResult.param_list_id == best_param_list_id)
        .order_by(ExperimentResult.dataset_name)
    )

    loaded_data[0] = []
    for result in results:
        loaded_data[0].append(result)

    for result in loaded_data[0][:]:
        metrics = loads(result.metrics_per_al_cycle)
        test_data_metrics = [
            metrics["test_data_metrics"][0][f][0]["weighted avg"]
            for f in range(0, len(metrics["test_data_metrics"][0]))
        ]
        test_acc = [
            metrics["test_data_metrics"][0][f][0]["accuracy"]
            for f in range(0, len(metrics["test_data_metrics"][0]))
        ]

        data = pd.DataFrame(
            {
                "iteration": range(0, len(metrics["all_unlabeled_roc_auc_scores"])),
                "all_unlabeled_roc_auc_scores": metrics["all_unlabeled_roc_auc_scores"],
                "query_length": metrics["query_length"],
                "recommendation": metrics["recommendation"],
                "query_strong_accuracy_list": metrics["query_strong_accuracy_list"],
                "f1": [i["f1-score"] for i in test_data_metrics],
                "test_acc": test_acc,
                "fit_score": result.fit_score,
                #'asked_queries': [sum(metrics['query_length'][:i]) for i in range(0, len(metrics['query_length']))],
            }
        )
        data["acc_diff"] = data["test_acc"] - data["test_acc"].shift(1)
    display(
        HTML(
            tabulate(
                data.groupby(["recommendation"]).sum(), headers="keys", tablefmt="html"
            )
        )
    )


better_results_top(0, 211, False, False)
better_results_top(0, 210, True, False)
better_results_top(0, 210, False, True)
for i in range(1, 10):
    better_results_top(i, 210, True, True)
# data

id,avg_fit_score,stddev_fit_score,avg_global_score,stddev_global_score,avg_amount_oracle,std_amount_oracle,count,param_list_id
0,0.862689,0.00102836,0.610372,0.00662775,210,0,3,1fc28c6337f490f660e217efcf890664


recommendation,iteration,all_unlabeled_roc_auc_scores,query_length,query_strong_accuracy_list,f1,test_acc,fit_score,acc_diff
A,231,18.6817,210,0,15.1309,15.4105,18.104,0.456559
G,0,0.461268,4,0,0.347772,0.34891,0.862095,0.0


id,avg_fit_score,stddev_fit_score,avg_global_score,stddev_global_score,avg_amount_oracle,std_amount_oracle,count,param_list_id
0,0.828293,0.00284917,0.470846,0.0560511,205,2,3,89ae8d838bc43213045ac997791f12f6


recommendation,iteration,all_unlabeled_roc_auc_scores,query_length,query_strong_accuracy_list,f1,test_acc,fit_score,acc_diff
A,1379,41.0175,208,0,31.4377,31.9474,39.6681,0.396331
C,217,6.70535,124,8,4.88276,5.01315,6.61134,-0.086189
G,0,0.692431,4,0,0.454438,0.434753,0.826418,0.0


id,avg_fit_score,stddev_fit_score,avg_global_score,stddev_global_score,avg_amount_oracle,std_amount_oracle,count,param_list_id
0,0.824131,0.0057425,0.416114,0.0490137,202,0,3,20886121829004cdf8033c640cc44430


recommendation,iteration,all_unlabeled_roc_auc_scores,query_length,query_strong_accuracy_list,f1,test_acc,fit_score,acc_diff
A,1326,42.8629,203,0,29.5218,30.7688,42.345,0.365524
G,0,0.692151,4,0,0.348198,0.384562,0.830294,0.0


id,avg_fit_score,stddev_fit_score,avg_global_score,stddev_global_score,avg_amount_oracle,std_amount_oracle,count,param_list_id
0,0.824352,0.00871912,0.478214,0.0356563,204,1,3,3083a697a26eaecc588773e56ee2d1d5


recommendation,iteration,all_unlabeled_roc_auc_scores,query_length,query_strong_accuracy_list,f1,test_acc,fit_score,acc_diff
A,1177,35.2161,203,0,24.7714,25.1094,34.9257,0.384562
C,201,8.19993,138,10,5.32409,5.44894,8.31565,0.0311526
G,0,0.687968,4,0,0.297591,0.336449,0.831565,0.0


id,avg_fit_score,stddev_fit_score,avg_global_score,stddev_global_score,avg_amount_oracle,std_amount_oracle,count,param_list_id
0,0.817436,0.0210207,0.471838,0.0226446,202,0,3,e1ac5d25323cc08daf685f5f140c2af0


recommendation,iteration,all_unlabeled_roc_auc_scores,query_length,query_strong_accuracy_list,f1,test_acc,fit_score,acc_diff
A,1289,38.8414,203,0,27.9335,28.8221,38.5305,0.468328
C,196,5.75765,42,6,4.1855,4.28487,5.73858,-0.146071
G,0,0.694023,4,0,0.399575,0.410869,0.819797,0.0


id,avg_fit_score,stddev_fit_score,avg_global_score,stddev_global_score,avg_amount_oracle,std_amount_oracle,count,param_list_id
0,0.817193,0.0138386,0.413304,0.0788551,201,0,3,72ddd64e908c10010735f6958253fd75


recommendation,iteration,all_unlabeled_roc_auc_scores,query_length,query_strong_accuracy_list,f1,test_acc,fit_score,acc_diff
A,1727,44.6702,201,0,28.3793,29.3925,45.249,0.473866
C,164,4.09633,29,5,2.67165,2.76982,4.04009,0.00623053
G,0,0.701035,4,0,0.197231,0.233991,0.808018,0.0


id,avg_fit_score,stddev_fit_score,avg_global_score,stddev_global_score,avg_amount_oracle,std_amount_oracle,count,param_list_id
0,0.816021,0.0106201,0.41657,0.0557495,203,2,3,c6810e1ad1663252927a726924a35594


recommendation,iteration,all_unlabeled_roc_auc_scores,query_length,query_strong_accuracy_list,f1,test_acc,fit_score,acc_diff
A,1884,38.4173,204,0,23.9001,24.8328,37.0078,0.172032
C,539,10.8242,142,13,6.57438,6.88924,10.4587,-0.0574593
G,0,0.764367,4,0,0.504239,0.517134,0.804518,0.0
U,1493,24.9235,139,24,17.3358,17.6542,23.331,0.0775355


id,avg_fit_score,stddev_fit_score,avg_global_score,stddev_global_score,avg_amount_oracle,std_amount_oracle,count,param_list_id
0,0.815099,0.0114759,0.426579,0.0495865,203,2,3,07016987c128a066cc958907a1d80759


recommendation,iteration,all_unlabeled_roc_auc_scores,query_length,query_strong_accuracy_list,f1,test_acc,fit_score,acc_diff
A,1640,44.7636,206,0,29.1355,30.2994,44.4102,0.373832
C,130,4.10526,35,5,2.8236,2.86327,4.11205,0.0114226
G,0,0.70671,4,0,0.365961,0.352717,0.822411,0.0


id,avg_fit_score,stddev_fit_score,avg_global_score,stddev_global_score,avg_amount_oracle,std_amount_oracle,count,param_list_id
0,0.81413,0.0260118,0.424114,0.0471817,203,1,3,547b698376c3c7ef62fe61f43135384f


recommendation,iteration,all_unlabeled_roc_auc_scores,query_length,query_strong_accuracy_list,f1,test_acc,fit_score,acc_diff
A,1295,36.7742,202,0,24.3716,25.1513,35.9629,0.447906
C,301,8.68549,121,11,5.35304,5.62548,8.79094,-0.0107304
G,0,0.670383,4,0,0.192618,0.263413,0.799176,0.0


id,avg_fit_score,stddev_fit_score,avg_global_score,stddev_global_score,avg_amount_oracle,std_amount_oracle,count,param_list_id
0,0.812877,0.00851688,0.460099,0.0374527,204,4,3,416a7d125ec317a7309fd34373416277


recommendation,iteration,all_unlabeled_roc_auc_scores,query_length,query_strong_accuracy_list,f1,test_acc,fit_score,acc_diff
A,1265,41.3965,201,0,29.1497,29.7833,39.8844,0.395639
C,61,1.70774,8,2,1.27058,1.2558,1.62793,-0.00657667
G,0,0.73557,4,0,0.310047,0.334372,0.813967,0.0


id,avg_fit_score,stddev_fit_score,avg_global_score,stddev_global_score,avg_amount_oracle,std_amount_oracle,count,param_list_id
0,0.81161,0.0192687,0.451204,0.0138032,204,0,3,b514229528d822602108c6aa066f14db


recommendation,iteration,all_unlabeled_roc_auc_scores,query_length,query_strong_accuracy_list,f1,test_acc,fit_score,acc_diff
A,1431,44.4902,205,0,31.3476,31.6726,42.1086,0.368986
G,0,0.720912,4,0,0.322002,0.325026,0.794501,0.0


id,avg_fit_score,stddev_fit_score,avg_global_score,stddev_global_score,avg_amount_oracle,std_amount_oracle,count,param_list_id
0,0.81097,0.0134758,0.455474,0.06427,204,1,3,d296121de2480ffa42b513cf2e6ceaf7


recommendation,iteration,all_unlabeled_roc_auc_scores,query_length,query_strong_accuracy_list,f1,test_acc,fit_score,acc_diff
A,1002,35.7888,203,0,27.1992,27.4123,34.7114,0.425753
C,79,3.36345,42,4,2.47192,2.49498,3.30585,-0.0259605
G,0,0.72636,4,0,0.295837,0.344064,0.826463,0.0
