In [73]:
import json
import os
import sys
from copy import deepcopy
from pathlib import Path
import pandas as pd
from utils import load_json
from collections import OrderedDict

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
# lib.env.OUTPUT_DIR = lib.env.PROJECT_DIR / 'output'
# lib.env.DATA_DIR = lib.env.PROJECT_DIR / 'data'

In [74]:
ALL_DATASETS = ['1_ALOI', '12_fault', '17_InternetAds', '19_landsat', '20_letter', \
            '22_magic.gamma', '23_mammography', '29_Pima', '30_satellite', '36_speech', \
                '41_Waveform', '47_yeast']
NORMAL_STRATEGIES = ['RandomSampling', 'LeastConfidence', 'MarginSampling', 'EntropySampling', \
        'LeastConfidenceDropout', 'MarginSamplingDropout', 'EntropySamplingDropout', \
        'BALDDropout', 'VarRatio', 'MeanSTD', 'MetaSampling']
EMBEDDING_STRATEGIES = ['KMeansSampling', 'KMeansSamplingGPU', 'KCenterGreedy', 'KCenterGreedyPCA', \
            'BadgeSampling', 'AdversarialBIM', 'AdversarialDeepFool']
WAAL_STRATEGY = 'WAAL'
LPL_STRATEGY = 'LossPredictionLoss'

DETAILS = ['init_labels', 'budget', 'batch_size']
BUDGET_RATIO_RANGE = [0.05, 0.1, 0.25, 0.5, 0.75]

In [75]:
def sort(df, by):
    if isinstance(by, str):
        by = [by]
    return df.sort_values(['n_objects'] + by, ascending=[True] + ['score' not in x for x in by]).reset_index(drop=True)

def load_record(output):
    """
    Load record from stats.json with a single random seed.
    Output: a dictionary with records
    """
    output = Path(output)
    if not output.exists():
        return None
    path = output / 'stats.json'
    if not path.exists():
        print(f'WARNING! This path does not exist: {path}')
        return None
    stats = load_json(path)
    metrics = stats.get('metrics')
    if metrics is None:
        return None

    dataset = stats['dataset']
    budget = stats['budget_ratio']
    batch_size = stats['batch_size']
    init_labels = stats['init_labeled']
    strategy = stats['strategy']
    base_model = stats['model']
    process = OrderedDict(stats['process'])

    r = {
        'dataset': dataset,
        'strategy': strategy,
        'base_model': base_model,
        'init_labels': init_labels,
        'budget': budget,
        'batch_size': batch_size,
        'all_outliers': process.popitem(last=True)[1][1], #TODO: make it more robust
        'F_aucroc': metrics['F-aucroc'],
        'F_aucpr': metrics['F_aucpr'],
        'F_f1': metrics['F_f1'],
    }
    
    return r

def aggregate(df):
    aggrs = dict(
        #TODO: make table that supports ensemble statistics
        # base_model=('base_model', 'first'),
        # budget=('budget', 'first'),
        init_labels=('init_labels', 'first'),
        batch_size=('batch_size', 'first'),
        all_outliers=('all_outliers', 'first'),
        F_aucroc=('F_aucroc', 'mean'),
        F_aucpr=('F_aucpr', 'mean'),
        F_f1=('F_f1', 'first'),
        count=('F_aucroc', 'count'),
    )
    if 'train_score' in df.columns:
        aggrs.update(dict(
            train_score=('train_score', 'mean'),
            train_std=('train_score', 'std'),
        ))
    df = df.groupby(['dataset', 'strategy', 'base_model', 'budget']).agg(**aggrs)
    df['count'] = df['count'].astype(int)
    return df.reset_index().fillna(0.0)

def format_scores(df, precision):
    def f(record):
        for k, v in list(record.items()):
            if isinstance(v, float):
                record[k] = round(v, precision)
        return record
    return df.apply(f, axis=1)

def make_df(outputs_and_names):
    """
    Make dataframe from a list of outputs and names.
    Output: unaggregated dataframe
    """
    df = []
    for output, strategy_name in outputs_and_names:
        record = load_record(output)
        if not record:
            continue
        df.append(record)
    # df = sort(pd.DataFrame(df).fillna(0.0), 'val_score').reset_index(drop=True)
    df = pd.DataFrame(df).fillna(0.0).reset_index(drop=True)
    df.sort_values(['F_aucroc', 'F_aucpr'], ascending=[True, True], inplace=True)
    # df.sort_values('F_aucroc', ascending=True, inplace=True)
    return df

def build_report_all(outputs_and_names):
    df = make_df(outputs_and_names)
    df = aggregate(df)
    df = format_scores(df, 4)
    df = df.set_index(['dataset'] + ['strategy'] + ['base_model'] + DETAILS)
    return df[['all_outliers', 'F_aucroc', 'F_aucpr', 'F_f1', 'count']]

def build_report_single(dataset, base_model, strategy, budget_ratio):
    output = f'output/{dataset}/{strategy}/{base_model}/{budget_ratio}'
    output = Path(output)
    if not output.exists():
        return None
    path = output / 'stats.json'
    if not path.exists():
        print(f'WARNING! This path does not exist: {path}')
        return None
    stats = load_json(path)
    process = stats.get('process')
    if process is None:
        return None
    df = pd.DataFrame.from_dict(process, orient='index')
    df.columns = ['all_X', 'all_outliers', 'new_X','new_outliers' ,'aucroc','aucpr','f1']
    # df = aggregate(df)
    df = format_scores(df, 4)
    return df

#### Comparison of different strategies with the same budget ratio (Base model: Devnet)

In [76]:
budget_ratio = 0.5  # 0.05 0.1 0.25 0.5 0.75, specify budget ratio here
BASE_MODEL = 'DevNet'
outputs_and_names = []
for strategy, base_model, datasets in [
    (s, BASE_MODEL, ALL_DATASETS) for s in NORMAL_STRATEGIES + EMBEDDING_STRATEGIES
]:
    for dataset in datasets:
        outputs_and_names.append((f'output/{dataset}/{strategy}/{base_model}/{budget_ratio}', strategy))

for strategy, base_model, datasets in [
    (WAAL_STRATEGY, 'WAAL', ALL_DATASETS),
    (LPL_STRATEGY, 'LPL', ALL_DATASETS),
]:
    for dataset in datasets:
        outputs_and_names.append((f'output/{dataset}/{strategy}/{base_model}/{budget_ratio}', strategy))

build_report_all(outputs_and_names)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,all_outliers,F_aucroc,F_aucpr,F_f1,count
dataset,strategy,base_model,init_labels,budget,batch_size,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
12_fault,AdversarialBIM,DevNet,136,0.5,68,331,0.6467,0.4759,"[0.7966,0.181 ]",1
12_fault,AdversarialDeepFool,DevNet,136,0.5,68,355,0.6645,0.4873,"[0.788 ,0.1466]",1
12_fault,BALDDropout,DevNet,136,0.5,68,373,0.7294,0.5906,"[0.8009,0.1983]",1
12_fault,BadgeSampling,DevNet,136,0.5,68,262,0.7149,0.5791,"[0.803 ,0.2069]",1
12_fault,EntropySampling,DevNet,136,0.5,68,356,0.6692,0.5144,"[0.7944,0.1724]",1
12_fault,EntropySamplingDropout,DevNet,136,0.5,68,356,0.6692,0.5144,"[0.7944,0.1724]",1
12_fault,KCenterGreedy,DevNet,136,0.5,68,299,0.7434,0.5712,"[0.788 ,0.1466]",1
12_fault,KCenterGreedyPCA,DevNet,136,0.5,68,280,0.7542,0.5893,"[0.7923,0.1638]",1
12_fault,KMeansSampling,DevNet,136,0.5,68,269,0.6678,0.4973,"[0.7923,0.1638]",1
12_fault,KMeansSamplingGPU,DevNet,136,0.5,68,256,0.6661,0.5155,"[0.8009,0.1983]",1


#### Comparison of different strategies with the same budget ratio (Base model: XGBOD)

In [77]:
budget_ratio = 0.5  # 0.05 0.1 0.25 0.5 0.75, specify budget ratio here
BASE_MODEL = 'XGBOD'
outputs_and_names = []
for strategy, base_model, datasets in [
    (s, BASE_MODEL, ALL_DATASETS) for s in NORMAL_STRATEGIES
]:
    for dataset in datasets:
        outputs_and_names.append((f'output/{dataset}/{strategy}/{base_model}/{budget_ratio}', strategy))

for strategy, base_model, datasets in [
    ('WAAL', 'WAAL', ALL_DATASETS),
    ('LPL', 'LossPredictionLoss', ALL_DATASETS),
]:
    for dataset in datasets:
        outputs_and_names.append((f'output/{dataset}/{strategy}/{base_model}/{budget_ratio}', strategy))
build_report_all(outputs_and_names)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,all_outliers,F_aucroc,F_aucpr,F_f1,count
dataset,strategy,base_model,init_labels,budget,batch_size,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
12_fault,BALDDropout,XGBOD,136,0.5,68,271,0.834,0.7197,"[0.8073,0.2241]",1
12_fault,EntropySampling,XGBOD,136,0.5,68,377,0.8103,0.7371,"[0.8158,0.2586]",1
12_fault,EntropySamplingDropout,XGBOD,136,0.5,68,377,0.8103,0.7371,"[0.8158,0.2586]",1
12_fault,LeastConfidence,XGBOD,136,0.5,68,377,0.8103,0.7371,"[0.8158,0.2586]",1
12_fault,LeastConfidenceDropout,XGBOD,136,0.5,68,377,0.8103,0.7371,"[0.8158,0.2586]",1
12_fault,MarginSampling,XGBOD,136,0.5,68,377,0.8103,0.7371,"[0.8158,0.2586]",1
12_fault,MarginSamplingDropout,XGBOD,136,0.5,68,377,0.8103,0.7371,"[0.8158,0.2586]",1
12_fault,MeanSTD,XGBOD,136,0.5,68,365,0.8274,0.724,"[0.8073,0.2241]",1
12_fault,MetaSampling,XGBOD,136,0.5,68,336,0.799,0.6793,"[0.8051,0.2155]",1
12_fault,RandomSampling,XGBOD,136,0.5,68,268,0.809,0.7143,"[0.8116,0.2414]",1


#### Comparison of different base models (DevNet vs. XGBOD)

In [78]:
budget_ratio = 0.5  # 0.05 0.1 0.25 0.5 0.75, specify budget ratio here
BASE_MODEL_1 = 'DevNet'
BASE_MODEL_2 = 'XGBOD'
outputs_and_names = []
for strategy, base_model, datasets in [
    (s, BASE_MODEL_1, ALL_DATASETS) for s in NORMAL_STRATEGIES
]:
    for dataset in datasets:
        outputs_and_names.append((f'output/{dataset}/{strategy}/{base_model}/{budget_ratio}', strategy))

for strategy, base_model, datasets in [
    (s, BASE_MODEL_2, ALL_DATASETS) for s in NORMAL_STRATEGIES
]:
    for dataset in datasets:
        outputs_and_names.append((f'output/{dataset}/{strategy}/{base_model}/{budget_ratio}', strategy))

build_report_all(outputs_and_names)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,all_outliers,F_aucroc,F_aucpr,F_f1,count
dataset,strategy,base_model,init_labels,budget,batch_size,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
12_fault,BALDDropout,DevNet,136,0.5,68,373,0.7294,0.5906,"[0.8009,0.1983]",1
12_fault,BALDDropout,XGBOD,136,0.5,68,271,0.834,0.7197,"[0.8073,0.2241]",1
12_fault,EntropySampling,DevNet,136,0.5,68,356,0.6692,0.5144,"[0.7944,0.1724]",1
12_fault,EntropySampling,XGBOD,136,0.5,68,377,0.8103,0.7371,"[0.8158,0.2586]",1
12_fault,EntropySamplingDropout,DevNet,136,0.5,68,356,0.6692,0.5144,"[0.7944,0.1724]",1
12_fault,EntropySamplingDropout,XGBOD,136,0.5,68,377,0.8103,0.7371,"[0.8158,0.2586]",1
12_fault,LeastConfidence,DevNet,136,0.5,68,356,0.6692,0.5144,"[0.7944,0.1724]",1
12_fault,LeastConfidence,XGBOD,136,0.5,68,377,0.8103,0.7371,"[0.8158,0.2586]",1
12_fault,LeastConfidenceDropout,DevNet,136,0.5,68,356,0.6692,0.5144,"[0.7944,0.1724]",1
12_fault,LeastConfidenceDropout,XGBOD,136,0.5,68,377,0.8103,0.7371,"[0.8158,0.2586]",1


#### Comparison of different budgets

In [79]:
# specify your input here
DATASET = '12_fault'
BASE_MODEL = 'DevNet'
STRATEGY = 'BALDDropout'
outputs_and_names = []

for ratio in BUDGET_RATIO_RANGE:
    outputs_and_names.append((f'output/{DATASET}/{STRATEGY}/{BASE_MODEL}/{ratio}', strategy))

build_report_all(outputs_and_names)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,all_outliers,F_aucroc,F_aucpr,F_f1,count
dataset,strategy,base_model,init_labels,budget,batch_size,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
12_fault,BALDDropout,DevNet,136,0.05,32,71,0.6785,0.501,"[0.788 ,0.1466]",1
12_fault,BALDDropout,DevNet,136,0.1,32,104,0.6765,0.5187,"[0.7923,0.1638]",1
12_fault,BALDDropout,DevNet,136,0.25,34,208,0.6838,0.5186,"[0.7901,0.1552]",1
12_fault,BALDDropout,DevNet,136,0.5,68,373,0.7294,0.5906,"[0.8009,0.1983]",1
12_fault,BALDDropout,DevNet,136,0.75,102,434,0.7056,0.5706,"[0.7987,0.1897]",1


#### Single active learning process

In [80]:
budget_ratio = 0.5 # 0.05 0.1 0.25 0.5 0.75, specify budget ratio here
DATASET = '12_fault'
BASE_MODEL = 'DevNet'
STRATEGY = 'BALDDropout'

build_report_single(DATASET, BASE_MODEL, STRATEGY, budget_ratio)

Unnamed: 0,all_X,all_outliers,new_X,new_outliers,aucroc,aucpr,f1
round_0,1358,38,68,38,0.6538,0.5086,"[0.7923,0.1638]"
round_1,1358,68,68,30,0.6711,0.5125,"[0.7923,0.1638]"
round_2,1358,95,68,27,0.697,0.524,"[0.7923,0.1638]"
round_3,1358,130,68,35,0.7078,0.56,"[0.7944,0.1724]"
round_4,1358,164,68,34,0.7162,0.559,"[0.7966,0.181 ]"
round_5,1358,202,68,38,0.7177,0.5661,"[0.7966,0.181 ]"
round_6,1358,248,68,46,0.7218,0.5677,"[0.7966,0.181 ]"
round_7,1358,288,68,40,0.7305,0.5831,"[0.7944,0.1724]"
round_8,1358,306,68,18,0.7266,0.5809,"[0.7966,0.181 ]"
round_9,1358,343,68,37,0.7216,0.5824,"[0.803 ,0.2069]"
