In [1]:
%load_ext autoreload
%autoreload 2

# Take best result in the validation set

In [2]:
import pandas as pd
import pathlib

import pickle
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

In [3]:
noise = 0.1

input_dir = f'results-5-5/results-overall-drift-datasets-noise-{noise}_start_5'
directory_name = 'synth_data_summary_results_val_test_start_drift_5'

## Load and define validation and test

In [4]:
res_dataset_method_val = {}
res_dataset_method_test = {}

for exp_type in ["agrawal", "sea", "led", "hyper", "stagger"]:

    
    filename = f"{input_dir}/{exp_type}_drift_results_overall.pkl"
    if not pathlib.Path(filename).exists():
        print(f"File {filename} not found")
        continue
    
    with open(filename, "rb") as f:
        overall_drift_results = pickle.load(f)

    threshold = 0



    res_dataset_method_val[exp_type]= {}
    res_dataset_method_test[exp_type] = {}  
    for method in overall_drift_results:
        res_dataset_method_val[exp_type][method] = {}
        res_dataset_method_test[exp_type][method] = {}

        overall_drift_result = overall_drift_results[method]
        # overall_drift_result[i]['drift'] is 1 if the experiment i has drift, 0 otherwise. We use this as the ground truth
        # Half of the experiments have drift, half do not
        gt = [overall_drift_result[i]['drift'] for i in range(len(overall_drift_result))]

        # Predictions are the number of drift points detected by the method
        predictions = [overall_drift_result[i]['num_pts_detected'] for i in range(len(overall_drift_result))]

        # We say that drift is detected if the number of drift points detected is greater than a threshold
        predictions = list([pred > threshold for pred in predictions])

        #Convert to int predictions
        predictions=list(map(int, predictions))

        import numpy as np
        n_val = 15
        gt_np = np.array(gt)
        idxs_pos, idxs_neg = np.where(gt_np==1)[0], np.where(gt_np==0)[0]
        gt_val= np.concatenate([gt_np[idxs_pos[:n_val]], gt_np[idxs_neg[:n_val]]])
        gt_test = np.concatenate([gt_np[idxs_pos[n_val:]], gt_np[idxs_neg[n_val:]]])
        predictions_np = np.array(predictions)
        predictions_val = np.concatenate([predictions_np[idxs_pos[:n_val]], predictions_np[idxs_neg[:n_val]]])
        predictions_test = np.concatenate([predictions_np[idxs_pos[n_val:]], predictions_np[idxs_neg[n_val:]]])
                                         

        def _get_metrics(y_true, y_pred):
            acc = accuracy_score(y_true, y_pred)

            f1 = f1_score(y_true, y_pred, average='macro')

            tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

            fpr = fp / (fp + tn)
            fnr = fn / (fn + tp)

            return acc, f1, fpr, fnr
        
        acc_val, f1_val, fpr_val, fnr_val = _get_metrics(gt_val, predictions_val)
        acc_test, f1_test, fpr_test, fnr_test = _get_metrics(gt_test, predictions_test)

        
        if method not in res_dataset_method_val[exp_type]:
            res_dataset_method_val[exp_type][method] = {'acc': {}, 'f1': {}, 'fpr': {}, 'fnr': {}}
            res_dataset_method_test[exp_type][method] = {'acc': {}, 'f1': {}, 'fpr': {}, 'fnr': {}}
        res_dataset_method_val[exp_type][method]['acc'] = acc_val
        res_dataset_method_val[exp_type][method]['f1']= f1_val
        res_dataset_method_val[exp_type][method]['fpr'] = fpr_val
        res_dataset_method_val[exp_type][method]['fnr'] = fnr_val


        res_dataset_method_test[exp_type][method]['acc'] = acc_test
        res_dataset_method_test[exp_type][method]['f1']= f1_test
        res_dataset_method_test[exp_type][method]['fpr'] = fpr_test
        res_dataset_method_test[exp_type][method]['fnr'] = fnr_test

## Take the best in the validation

In [5]:
metric = 'f1'
best_methods = {}
for exp_type in ["agrawal", "sea", "led", "hyper", "stagger"]:
    best_methods[exp_type] = []
    # Select the best method for each dataset

    result_dataset_i = res_dataset_method_val[exp_type]
    methods = set(result_dataset_i.keys())
    base_methods = set([method.split('_')[0] for method in result_dataset_i])

    # Take the configuration of the base method with the highest f1 score
    for base_method in base_methods:
        scores = {method: result_dataset_i[method][metric] for method in methods if method.split('_')[0]==base_method}
        # get the method with the highest f1 score
        best_methods[exp_type].append(max(scores, key=scores.get))

## Take the results for the best methods

In [6]:
single_noise_results_test = {}

for exp_type in ["agrawal", "sea", "led", "hyper", "stagger"]:
    rest_method_i_test = res_dataset_method_test[exp_type]
    for best_method in best_methods[exp_type]:
        for metric_i in rest_method_i_test[best_method]:
            if metric_i not in single_noise_results_test:
                single_noise_results_test[metric_i]= {}
            if exp_type not in single_noise_results_test[metric_i]:
                single_noise_results_test[metric_i][exp_type] = {}
            single_noise_results_test[metric_i][exp_type][best_method.split('_')[0]] = rest_method_i_test[best_method][metric_i]

In [7]:
for metric_i in single_noise_results_test:
    print(metric_i)
    display(pd.DataFrame(single_noise_results_test[metric_i]).round(4))

acc


Unnamed: 0,agrawal,sea,led,hyper,stagger
adwin,1.0,0.8429,0.5,0.9143,1.0
hddma,0.9857,0.5429,0.4857,0.9143,1.0
fet,0.9857,0.5429,0.4571,0.7714,1.0
chi2,0.9857,0.7,0.4857,0.7571,1.0
ddm,0.9571,0.5429,0.4571,0.8286,1.0
eddm,0.5857,0.5571,0.4429,0.5714,1.0
pagehinkley,1.0,0.5714,0.4714,0.9286,1.0
kswin,0.8857,0.4429,0.5143,0.6143,0.9429


f1


Unnamed: 0,agrawal,sea,led,hyper,stagger
adwin,1.0,0.8402,0.3571,0.9137,1.0
hddma,0.9857,0.5105,0.4701,0.9142,1.0
fet,0.9857,0.44,0.3848,0.7707,1.0
chi2,0.9857,0.6893,0.3269,0.7547,1.0
ddm,0.9571,0.4558,0.3137,0.8263,1.0
eddm,0.4999,0.5035,0.3985,0.4898,1.0
pagehinkley,1.0,0.4898,0.3942,0.9282,1.0
kswin,0.8849,0.3274,0.5079,0.5901,0.9427


fpr


Unnamed: 0,agrawal,sea,led,hyper,stagger
adwin,0.0,0.0286,0.0286,0.0,0.0
hddma,0.0286,0.7143,0.6857,0.1143,0.0
fet,0.0286,0.0286,0.8857,0.1714,0.0
chi2,0.0286,0.1143,0.0286,0.1429,0.0
ddm,0.0857,0.0571,0.0857,0.0571,0.0
eddm,0.8286,0.7714,0.8286,0.8286,0.0
pagehinkley,0.0,0.0286,0.1714,0.1429,0.0
kswin,0.0286,0.1429,0.3714,0.6286,0.0


fnr


Unnamed: 0,agrawal,sea,led,hyper,stagger
adwin,0.0,0.2857,0.9714,0.1714,0.0
hddma,0.0,0.2,0.3429,0.0571,0.0
fet,0.0,0.8857,0.2,0.2857,0.0
chi2,0.0,0.4857,1.0,0.3429,0.0
ddm,0.0,0.8571,1.0,0.2857,0.0
eddm,0.0,0.1143,0.2857,0.0286,0.0
pagehinkley,0.0,0.8286,0.8857,0.0,0.0
kswin,0.2,0.9714,0.6,0.1429,0.1143


# Store 

In [8]:
pathlib.Path(directory_name).mkdir(parents=True, exist_ok=True)

with open(f'{directory_name}/results-overall-best-methods-noise-{noise}.pkl', 'wb') as f:
    pickle.dump(single_noise_results_test, f)

In [9]:
print(f'{directory_name}/results-overall-best-methods-noise-{noise}.pkl')

synth_data_summary_results_val_test_start_drift_5/results-overall-best-methods-noise-0.1.pkl


In [None]:
print(directory_name)

In [None]:
# !cp -r src/synth/synth_data_summary_results_val_test_5-5-best/ /home/fgiobergia/div-mitigation/src/

# Load the results to check and viz

In [None]:

with open(f'{directory_name}/results-overall-best-methods-noise-{noise}.pkl', 'rb') as f:
    single_noise_results_base = pickle.load(f)


for metric_i in single_noise_results_base:
    print(metric_i)
    display(pd.DataFrame(single_noise_results_base[metric_i]).round(4))

# See res

## All

In [None]:
for exp_type in res_dataset_method_test:
    print(exp_type)
    df = pd.DataFrame(res_dataset_method_test[exp_type])
    display(df.T)
    print("\n\n")

# Best in val

In [None]:
for exp_type in res_dataset_method_val:
    print(exp_type)
    df = pd.DataFrame(res_dataset_method_test[exp_type]).T.loc[best_methods[exp_type]]
    display(df)
    print("\n\n")
    break