In [11]:
import comet_ml as comet 
import os

import IPython
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from typing import Dict, List, Any

os.environ["COMET_API_KEY"] = ""
API = comet.API()

### General Setup

**NOTE:** It is expected that the Notebooks are run **inside VS Code** as it allows the pathing for `task_configs` to work. If it is run outside a VS Code instance, please adjust the following line:

```py
notebook_name = "/".join(
    IPython.extract_module_locals()[1]["__vsc_ipynb_file__"].split("/")[-5:]
)
```

The cell down below has the following configuration attributes, which might need adjustment depending on changes of the experimental design

- `metrics`: Inside this dictionary the keys represent the actual names of the metric, as they are displayed on `comet`, while the values are simply just given the according type that will be fetched from online.
  
- `parameters`: The parameters describe general experimental setup information, which were passed as arguments upon execution
  
- `task_names`: The task names represent the data sets upon which the Outlier Detection Strategies were trained on
  
- `task_configs`: The task configs represent the path to the configuration files of the `task names`.

In [12]:
metrics = {
    "AutoFilter_Chen_Like_HTL Count": float,
    "AutoFilter_Chen_Like_avg_duration": float,
    "AutoFilter_Chen_Like_medF1 (No HTL)": float,
    "AutoFilter_Chen_Like_medF1 (With HTL)": float,
    "AutoFilter_Chen_Like_avgF1 (random replacement)": float,
    "AutoFilter_Chen_Like_avgF1 (No HTL)": float,
    "AutoFilter_Chen_Like_avgF1 (With HTL)": float,
    "AutoFilter_Chen_Like_medF1 (random replacement)": float,

    "HDBScanFilter_HTL Count": float,
    "HDBScanFilter_avg_duration": float,
    "HDBScanFilter_medF1 (No HTL)": float,
    "HDBScanFilter_medF1 (With HTL)": float,
    "HDBScanFilter_medF1 (random replacement)": float,
    "HDBScanFilter_avgF1 (No HTL)": float,
    "HDBScanFilter_avgF1 (With HTL)": float,
    "HDBScanFilter_avgF1 (random replacement)": float,

    "IsolationForestFilter_HTL Count": float,
    "IsolationForestFilter_avg_duration": float,
    "IsolationForestFilter_avgF1 (No HTL)": float,
    "IsolationForestFilter_avgF1 (With HTL)": float,
    "IsolationForestFilter_avgF1 (random replacement)": float,
    "IsolationForestFilter_medF1 (No HTL)": float,
    "IsolationForestFilter_medF1 (With HTL)": float,
    "IsolationForestFilter_medF1 (random replacement)": float,

    "LocalOutlierFactorFilter_HTL Count": float,
    "LocalOutlierFactorFilter_avg_duration": float,
    "LocalOutlierFactorFilter_avgF1 (No HTL)": float,
    "LocalOutlierFactorFilter_avgF1 (With HTL)": float,
    "LocalOutlierFactorFilter_avgF1 (random replacement)": float,
    "LocalOutlierFactorFilter_medF1 (No HTL)": float,
    "LocalOutlierFactorFilter_medF1 (With HTL)": float,
    "LocalOutlierFactorFilter_medF1 (random replacement)": float,

    "LoserFilter_Plain_HTL Count": float,
    "LoserFilter_Plain_avg_duration": float,
    "LoserFilter_Plain_avgF1 (No HTL)": float,
    "LoserFilter_Plain_avgF1 (With HTL)": float,
    "LoserFilter_Plain_avgF1 (random replacement)": float,
    "LoserFilter_Plain_medF1 (No HTL)": float,
    "LoserFilter_Plain_medF1 (With HTL)": float,
    "LoserFilter_Plain_medF1 (random replacement)": float,

    "SingleStepEntropy_SimplePseudo_HTL Count": float,
    "SingleStepEntropy_SimplePseudo_avg_duration": float,
    "SingleStepEntropy_SimplePseudo_avgF1 (No HTL)": float,
    "SingleStepEntropy_SimplePseudo_avgF1 (With HTL)": float,
    "SingleStepEntropy_SimplePseudo_avgF1 (random replacement)": float,
    "SingleStepEntropy_SimplePseudo_medF1 (No HTL)": float,
    "SingleStepEntropy_SimplePseudo_medF1 (With HTL)": float,
    "SingleStepEntropy_SimplePseudo_medF1 (random replacement)": float,

}
parameters = {
    "strategy_name": str,
    "filter_strategy_name": str,
    "seed": int,
    "task_config": str,
}

task_names = [
    "ag-news",
    "dbpedia",
    "fnc1",
    "imdb",
    "qnli",
    "rotten-tomatoes",
    "sst2",
    #"trec-coarse",
    "trec",
    "wiki-talk",
]

version = "x"
task_names = [version+t for t in task_names]

# This gets the location of the Notebook, needs VSCode to be executed correctly
notebook_name = "/".join(
    IPython.extract_module_locals()[1]["__vsc_ipynb_file__"].split("/")[-5:]
)

BASE_PATH = Path(notebook_name).parent
CONFIGS_PATH = BASE_PATH.parent / 'Configs' / 'Tasks'

task_configs = {
    "ag-news": CONFIGS_PATH / "ag_news.json",
    "dbpedia": CONFIGS_PATH / "dbpedia.json",
    "fnc1": CONFIGS_PATH / "fnc_one.json",
    "imdb": CONFIGS_PATH / "imdb.json",
    "qnli": CONFIGS_PATH / "qnli.json",
    "rotten-tomatoes": CONFIGS_PATH / "rotten_tomatoes.json",
    "sst2": CONFIGS_PATH / "sst2.json",
    "trec-coarse": CONFIGS_PATH / "trec_coarse.json",
    "trec": CONFIGS_PATH / "trec.json",
    "wiki-talk": CONFIGS_PATH / "wiki_talk.json",
}

seed_count = 10 # How many different seeds do we expect?

asset_names = [
    "variability.npy",
    #"gold_probs.npy",
    "f1s.npy",
    "correctness.npy",
    "confidence.npy",
    "durations.npy",
]

filter_names = ["HDBScanFilter LocalOutlierFactorFilter IsolationForestFilter SimpleDSM SemanticAE SimpleSS"]

In [13]:
# TODO REmove Runs with same seed and always the older one if both are complete
def extract_metric_value(metric: List[Dict[str, Any]]) -> Dict[str, float]:
    return metric[0]["metricValue"]

def extract_paremeter_value(parameter: Dict[str, Any]) -> Any:
    return parameter["valueCurrent"]


def load_experiment_data(experiment: comet.APIExperiment):
    data = {}
    for metric_name in metrics.keys():
        metric_value = experiment.get_metrics(metric_name)
        if not metric_value:
            return None
        else:
            data[metric_name] = extract_metric_value(metric=metric_value)


    for param_name in parameters.keys():
        experiment_parameters = experiment.get_parameters_summary(param_name)
        if not experiment_parameters:
            return None
        else:
            data[param_name] = extract_paremeter_value(parameter=experiment_parameters)

    assets = download_assets(experiment, asset_names)
    for asset_name, asset in assets.items():
        data[asset_name] = asset
    return data

def load_workspace_data(project_name):
    experiments = API.get(workspace="full-run", project_name=project_name)
    experiment_data = []
    for exp in experiments:
        data = load_experiment_data(exp)
        if data is not None:
            experiment_data.append(data)
    
    df = pd.DataFrame(experiment_data)
    for col in df.columns:
        if col in metrics:
            df[col] = df[col].astype(metrics[col])
        elif col in parameters:
            df[col] = df[col].astype(parameters[col])
    #df_ = df[df["HTL Count"]>0]
    return df


def download_assets(experiment, asset_names):
    assets = experiment.get_asset_list()

    asset_ids = []
    for asset in assets:
        if asset["fileName"] in asset_names:
            asset_ids.append((asset["fileName"], asset["assetId"]))
    assets_downloaded = {}
    for file_name, idx in asset_ids:
        asset_data = experiment.get_asset(idx)
        asset_path = Path(f"./cache/assets/{file_name}")
        with open(asset_path, "wb") as f:
            f.write(asset_data)
        assets_downloaded[file_name[:-4]] = np.load(asset_path)
    return assets_downloaded


df = load_workspace_data("xag-news")

{'AutoFilter_Chen_Like_HTL Count': '20'}
{'AutoFilter_Chen_Like_HTL Count': '20', 'AutoFilter_Chen_Like_avg_duration': '53.28953028917313'}
{'AutoFilter_Chen_Like_HTL Count': '20', 'AutoFilter_Chen_Like_avg_duration': '53.28953028917313', 'AutoFilter_Chen_Like_medF1 (No HTL)': '0.8957894736842106'}
{'AutoFilter_Chen_Like_HTL Count': '20', 'AutoFilter_Chen_Like_avg_duration': '53.28953028917313', 'AutoFilter_Chen_Like_medF1 (No HTL)': '0.8957894736842106', 'AutoFilter_Chen_Like_medF1 (With HTL)': '0.8968421052631579'}
{'AutoFilter_Chen_Like_HTL Count': '20', 'AutoFilter_Chen_Like_avg_duration': '53.28953028917313', 'AutoFilter_Chen_Like_medF1 (No HTL)': '0.8957894736842106', 'AutoFilter_Chen_Like_medF1 (With HTL)': '0.8968421052631579', 'AutoFilter_Chen_Like_avgF1 (random replacement)': '0.8947368421052629'}
{'AutoFilter_Chen_Like_HTL Count': '20', 'AutoFilter_Chen_Like_avg_duration': '53.28953028917313', 'AutoFilter_Chen_Like_medF1 (No HTL)': '0.8957894736842106', 'AutoFilter_Chen_Like

In [4]:
def concatenate_arrays(group):
    return np.concatenate(group.tolist())

dataframes = []
counts = []

def load_data(workspace_name):
    df = load_workspace_data(workspace_name)
    # Drop Same Seed Same Strategy (Duplicates)
    df["filter_strategy_name"][(df["filter_strategy_name"]=="None") & (df["strategy_name"]=="PredictionEntropy")] = "NoneE"
    df["filter_strategy_name"][(df["filter_strategy_name"]=="None") & (df["strategy_name"]=="RandomSampling")] = "NoneR"
    df_ = df.drop_duplicates(subset=['seed', 'filter_strategy_name'], keep='first')
    def process_group(group):
        missing_seeds = np.setdiff1d(np.arange(42,42+seed_count),np.array(group["seed"]))
        filter = list(group["filter_strategy_name"])[0]
        task = list(group["task_config"])[0]
        df = pd.DataFrame([{"seed":seed, "filter":filter, "task": task} for seed in missing_seeds])
        dataframes.append(df)
        print(len(missing_seeds))
        counts.append(len(missing_seeds))
        if len(group) < seed_count:
            print(f"Warning: Group '{filter}' in workspace {workspace_name} has only {len(group)} rows.")
        elif len(missing_seeds) > 0:
            pass
        return group.nsmallest(seed_count, 'seed')
    
    df_ = df_.groupby('filter_strategy_name').apply(process_group).reset_index(drop=True)
    tmp_dict = {}
    df_ = df_.groupby('filter_strategy_name')
    for asset in asset_names:
        asset_name = asset[:-4]
        tmp_dict[asset_name] = df_[asset_name].agg(concatenate_arrays)
    df_ = df_.mean(numeric_only=True)
    for key in tmp_dict.keys():
        df_[key] = tmp_dict[key]
    return df_


#load_data("xag-news")

In [5]:
results = {}
for task in tqdm(task_names):#[-2:]):
    try:
        results[task] = load_data(task)
        for filter in set(filter_names)-set(results[task].index):
            missing_seeds = np.arange(42,42+seed_count)
            task_ = task_configs[task[1:]]
            df = pd.DataFrame([{"seed":seed, "filter":filter, "task": task_} for seed in missing_seeds])
            dataframes.append(df)
    except:
        for filter in filter_names:
            missing_seeds = np.arange(42,42+seed_count)
            task_ = task_configs[task[1:]]
            df = pd.DataFrame([{"seed":seed, "filter":filter, "task": task_} for seed in missing_seeds])
            dataframes.append(df)
        print(f"{task}: Missing")
    
missing_experiments = pd.concat(dataframes)
#missing_experiments = missing_experiments[missing_experiments["seed"]<54]
#missing_experiments = missing_experiments[missing_experiments["task"]!="./Configs/Tasks/trec.json"]
#missing_experiments = missing_experiments[missing_experiments["task"]!="./Configs/Tasks/trec-coarse.json"]
missing_experiments = missing_experiments[missing_experiments["filter"].isin(filter_names)]
missing_experiments.to_csv("missing_std_experiments.csv")
print(sum(counts))
missing_experiments # 941, 856, 848, 762, 747, (856, 715, 650, 607, 516, 503, 384, 370, 322, 290, 66)

 11%|█         | 1/9 [00:00<00:01,  7.00it/s]

xag-news: Missing


 22%|██▏       | 2/9 [00:01<00:07,  1.04s/it]

xdbpedia: Missing


 33%|███▎      | 3/9 [00:03<00:07,  1.26s/it]

xfnc1: Missing


 44%|████▍     | 4/9 [00:04<00:06,  1.35s/it]

ximdb: Missing


 56%|█████▌    | 5/9 [00:06<00:05,  1.43s/it]

xqnli: Missing


 67%|██████▋   | 6/9 [00:28<00:25,  8.40s/it]

xrotten-tomatoes: Missing


 78%|███████▊  | 7/9 [00:36<00:16,  8.38s/it]

xsst2: Missing


 89%|████████▉ | 8/9 [00:38<00:06,  6.20s/it]

xtrec: Missing


100%|██████████| 9/9 [00:39<00:00,  4.42s/it]

xwiki-talk: Missing
0





Unnamed: 0,seed,filter,task
0,42,HDBScanFilter LocalOutlierFactorFilter Isolati...,user/GitHub/TenseOracle/Configs/Tasks/ag_news....
1,43,HDBScanFilter LocalOutlierFactorFilter Isolati...,user/GitHub/TenseOracle/Configs/Tasks/ag_news....
2,44,HDBScanFilter LocalOutlierFactorFilter Isolati...,user/GitHub/TenseOracle/Configs/Tasks/ag_news....
3,45,HDBScanFilter LocalOutlierFactorFilter Isolati...,user/GitHub/TenseOracle/Configs/Tasks/ag_news....
4,46,HDBScanFilter LocalOutlierFactorFilter Isolati...,user/GitHub/TenseOracle/Configs/Tasks/ag_news....
...,...,...,...
5,47,HDBScanFilter LocalOutlierFactorFilter Isolati...,user/GitHub/TenseOracle/Configs/Tasks/wiki_tal...
6,48,HDBScanFilter LocalOutlierFactorFilter Isolati...,user/GitHub/TenseOracle/Configs/Tasks/wiki_tal...
7,49,HDBScanFilter LocalOutlierFactorFilter Isolati...,user/GitHub/TenseOracle/Configs/Tasks/wiki_tal...
8,50,HDBScanFilter LocalOutlierFactorFilter Isolati...,user/GitHub/TenseOracle/Configs/Tasks/wiki_tal...


In [6]:
import seaborn as sns

def add_boxplots(results, filter, l:list):
    # Adds BoxPlot to the graph
    # Adds multiple medians to the graph
    for f in ["NoneR", filter, "NoneE"]:
        data = []
        for task in task_names:
            data += list(results[task]["f1s"][f])
        l.append(data)
        


l = []
filter_names_ = [
    # "AutoFilter_LSTM_SIMPLE",
    # "AutoFilter_LSTM",
    "AutoFilter_Chen_Like",
    "LoserFilter_Plain",
    # "LoserFilter_Optimized_Pseudo_Labels",
    # "LoserFilter_SSL_Variety",
    # "TeachingFilter",
    # "TeachingFilter_WOW",
    # "TeachingFilter_Smooth",
    "SingleStepEntropy_SimplePseudo",
    # "SingleStepEntropy",
    "HDBScanFilter",
    "IsolationForestFilter",
    "LocalOutlierFactorFilter"
]

filter_names_clean = {
    # "LoserFilter_SSL_Variety": "EXPANDED DSM", 
    "LoserFilter_Plain": "SIMPLE DSM",
    # "LoserFilter_Optimized_Pseudo_Labels": "MC DSM", 
    "AutoFilter_Chen_Like": "SEMANTIC AE", 
    # "AutoFilter_LSTM": "LSTM ENSEMBLE AE",
    # "AutoFilter_LSTM_SIMPLE": "SIMPLE LSTM AE", 
    # "SingleStepEntropy": "MC SS", 
    "SingleStepEntropy_SimplePseudo": "SIMPLE SS",
    # "TeachingFilter": "SIMPLE LE", 
    # "TeachingFilter_Smooth": "SMOOTH LE", 
    "TeachingFilter_WOW": "HIGH ENTROPY LE",
    "HDBScanFilter" : "HDBScan",
    "IsolationForestFilter": "IsolationForest",
    "LocalOutlierFactorFilter": "LocalOutlierFactor"
    }

for filter in filter_names_:
    add_boxplots(results, filter, l)


fig, ax = plt.subplots()
intra_group_dist = 0.75
inter_group_dist = 1.5
positions = [i*intra_group_dist + (i//3)*inter_group_dist for i in range(len(l))]
bp = ax.boxplot(l, showfliers=False, positions=positions, patch_artist=True)
colors = ['#FF7F50', '#7cda9e', '#8fdeff']
for i, patch in enumerate(bp['boxes']):
    patch.set_facecolor(colors[i%3])


for i, median in enumerate(bp['medians']):
    median_x, median_y = median.get_xydata()[1]  # Get the median line's X and Y data
    # Hide the median line
    median.set_visible(False)
    # Plot a diamond marker at the median position
    offset = 0.25
    ax.plot(median_x-offset, median_y, 'd', color='#082239', markersize=3)

tick_positions = [np.mean(positions[(i*3):(i*3)+3])-len(filter_names_clean[f])*0.18 for i, f in enumerate(filter_names_)]

plt.xticks(tick_positions, [filter_names_clean[f] for f in filter_names_], rotation=45)
plt.tight_layout()

plt.savefig("endresults.pdf")
plt.show()

KeyError: 'xag-news'

In [None]:
for task in task_names:
    def add_boxplots(results, filter, l:list):
        # Adds BoxPlot to the graph
        # Adds multiple medians to the graph
        for f in ["NoneR", filter, "NoneE"]:
            data = []
            data += list(results[task]["f1s"][f])
            l.append(data)
            
    
    
    l = []
    filter_names_ = [
        "AutoFilter_LSTM_SIMPLE",
        "AutoFilter_LSTM",
        "AutoFilter_Chen_Like",
        "LoserFilter_Plain",
        "LoserFilter_Optimized_Pseudo_Labels",
        "LoserFilter_SSL_Variety",
        "TeachingFilter",
        "TeachingFilter_WOW",
        "TeachingFilter_Smooth",
        "SingleStepEntropy_SimplePseudo",
        "SingleStepEntropy",
    ]
    
    filter_names_clean = {
        "LoserFilter_SSL_Variety": "EXPANDED DSM", 
        "LoserFilter_Plain": "SIMPLE DSM",
        "LoserFilter_Optimized_Pseudo_Labels": "MC DSM", 
        "AutoFilter_Chen_Like": "SEMANTIC AE", 
        "AutoFilter_LSTM": "LSTM ENSEMBLE AE",
        "AutoFilter_LSTM_SIMPLE": "SIMPLE LSTM AE", 
        "SingleStepEntropy": "MC SS", 
        "SingleStepEntropy_SimplePseudo": "SIMPLE SS",
        "TeachingFilter": "SIMPLE LE", 
        "TeachingFilter_Smooth": "SMOOTH LE", 
        "TeachingFilter_WOW": "HIGH ENTROPY LE"}
    
    for filter in filter_names_:
        add_boxplots(results, filter, l)
    
    
    fig, ax = plt.subplots()
    intra_group_dist = 0.75
    inter_group_dist = 1.5
    positions = [i*intra_group_dist + (i//3)*inter_group_dist for i in range(len(l))]
    bp = ax.boxplot(l, showfliers=False, positions=positions, patch_artist=True)
    colors = ['#FF7F50', '#7cda9e', '#8fdeff']
    for i, patch in enumerate(bp['boxes']):
        patch.set_facecolor(colors[i%3])
    
    
    for i, median in enumerate(bp['medians']):
        median_x, median_y = median.get_xydata()[1]  # Get the median line's X and Y data
        # Hide the median line
        median.set_visible(False)
        # Plot a diamond marker at the median position
        offset = 0.25
        ax.plot(median_x-offset, median_y, 'd', color='#082239', markersize=3)
    
    tick_positions = [np.mean(positions[(i*3):(i*3)+3])-len(filter_names_clean[f])*0.18 for i, f in enumerate(filter_names_)]
    
    plt.xticks(tick_positions, [filter_names_clean[f] for f in filter_names_], rotation=45)
    plt.tight_layout()
    
    plt.savefig(f"endresults-{task}.pdf")
    plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Initialize the figure and axes for the boxplots
fig, ax = plt.subplots()
ax.set_title('Incremental Boxplots')

# This list will hold different datasets for individual boxplots
datasets = []

# Assuming you have a mechanism to add datasets one at a time
for i in range(1, 6):  # Example loop to simulate adding 5 datasets incrementally
    # Simulate generating or loading a new dataset
    new_data = np.random.normal(loc=i, scale=0.5, size=100)
    datasets.append(new_data)  # Add the new dataset to the list
    
# Clear the axes for fresh plot (optional if you want to redraw the boxplots)
ax.clear()
# Plot all the current datasets as individual boxplots
ax.boxplot(datasets)
ax.set_title('Incremental Boxplots')
plt.draw()  # Redraw the plot with the new data
plt.pause(0.5)  # Pause to visually confirm the addition, adjust or remove as needed

plt.show()

In [None]:
import deepsig
import pandas as pd
aso_test = {}
for filter_name in filter_names:
    data = []
    task_aso = {}
    for task in task_names:
        htl = results[task]["f1s"]["NoneR"]
        no_htl = results[task]["f1s"][filter_name]
        better = deepsig.aso(no_htl, htl, seed=42)
        task_aso[task+"_no_htl_is_better"] = better
    aso_test[filter_name] = task_aso
    
pd.DataFrame(aso_test)    

In [None]:
import seaborn as sns

# Creating a new DataFrame for the pairs
paired_data = pd.DataFrame({
    'Value': np.concatenate([data['A'], data['B'], data['C'], data['D'], data['E'], data['F']]),
    'Variable': np.concatenate([np.repeat('A', 100), np.repeat('B', 100), 
                                np.repeat('C', 100), np.repeat('D', 100),
                                np.repeat('E', 100), np.repeat('F', 100)]),
    'Pair': np.concatenate([np.repeat('Pair AB', 200), np.repeat('Pair CD', 200), np.repeat('Pair EF', 200)])
})

# Plotting the paired boxplots
plt.figure(figsize=(10,6))

sns.boxplot(x="Pair", y="Value", hue="Variable", data=paired_data, palette="Set3")

plt.title("Paired Boxplots")
plt.xlabel("Pairs")
plt.ylabel("Values")

plt.show()