In [None]:
import warnings
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm
warnings.filterwarnings("ignore")
# plt.style.use('dark_background')

In [None]:
from utils import datasets
from utils.active_forests import ALIF, BALEIF, BALIF, RandomForest
from utils.utils import *

In [None]:
rows, cols = 4,4
def plot_short_4x4(model_name, ensemblepred, querystrats, styles=["-"], colors=["tab:grey","tab:blue","tab:orange","tab:green","tab:red"]):
    for i, data_name in enumerate(tqdm(datasets.datasets_names_short, leave=False)):
        if i < rows*cols:
            plt.subplot(rows, cols, i+1)
            plt.title(data_name)
            idx = 0
            for strat in querystrats:        
                for pred in ensemblepred:   
                    path = f"performance/{data_name}_{model_name}_query-{strat}_pred-{pred}.npy" if model_name != "IF-AAD" else f"performance/{data_name}_{model_name}.npy"
                    sim_log = np.load(path, allow_pickle=True).item()
                    plt_perf_evol(sim_log,label=f"{model_name} ({strat}, {pred})", style=styles[idx%len(styles)], color=colors[idx%len(colors)])
                    idx+=1  
            #plt.xlim(0,25)
            plt.grid("on")
            
def plot_single(model_descriptions, styles=["-"], colors=["tab:grey","tab:blue","tab:orange","tab:green","tab:red"], saveas="", legend=False):
    if not legend:
        plt.figure(figsize=(3.1,3.1), dpi=120)
        plt.title("")
        idx = 0
        for model_name, pred, strat in model_descriptions: 
            label = f"{model_name} ({strat}, {pred})" if model_name == "ALIF" or model_name == "BALIF" else f"{model_name}"
            plt_perf_evol({_:[(0,0)]}, label=label, style=styles[idx%len(styles)], color=colors[idx%len(colors)])
            idx+=1  
        plt.xlim(0,0.1)
        plt.xticks([])
        plt.yticks([])
        plt.xlabel("")
        plt.ylabel("")
        plt.axis('off')
        plt.savefig(f"images/{saveas}/legend.pdf", format='pdf')
    
    for i, data_name in enumerate(tqdm(datasets.datasets_names[1:], leave=False)):
            plt.figure(figsize=(4,4), dpi=120)
            plt.title(data_name)
            idx = 0
            for model_name, pred, strat in model_descriptions: 
                    path = f"performance/{data_name}_{model_name}_query-{strat}_pred-{pred}.npy" if model_name != "IF-AAD" else f"performance/{data_name}_{model_name}.npy"
                    sim_log = np.load(path, allow_pickle=True).item()
                    if legend:
                        plt_perf_evol(sim_log, label=f"{model_name} ({strat}, {pred})", style=styles[idx%len(styles)], color=colors[idx%len(colors)])
                    else:
                        plt_perf_evol(sim_log, style=styles[idx%len(styles)], color=colors[idx%len(colors)])
                    idx+=1  
            plt.xlim(0,25)
            plt.grid("on")
            plt.savefig(f"images/{saveas}/confront_on_{data_name}.pdf", format='pdf')

In [None]:
def plt_perf_evol(performance_logs, label=None, style="o-", color=None, plotroc=False, plotap=True):
    if plotroc:
        auc = np.array([[auc for auc,ap in perf_log] for _,perf_log in performance_logs.items()])
        auc_mean, auc_delta = get95percconf(auc)
        plt.plot(auc_mean,"o-",label="auc"+label)
        plt.fill_between(np.arange(auc.shape[1]),auc_mean-auc_delta,auc_mean+auc_delta,alpha=0.2)
    
    if plotap:
        ap = np.array([[ap for auc,ap in perf_log] for _,perf_log in performance_logs.items()])
        ap_mean, ap_delta = get95percconf(ap)
        if color is None: 
            plt.plot(ap_mean,style,label=label)
            plt.fill_between(np.arange(ap.shape[1]),ap_mean-ap_delta,ap_mean+ap_delta,alpha=0.1)
        else: 
            plt.plot(ap_mean,style,c=color,label=label)
            plt.fill_between(np.arange(ap.shape[1]),ap_mean-ap_delta,ap_mean+ap_delta,alpha=0.1, color=color)
    
    plt.ylim(0,1)
    plt.ylabel("ap")
    plt.xlabel("labelled points")
    if label is not None: plt.legend()

### confronto AL algo

In [None]:
plt.figure(figsize=(6*cols, 6*rows), dpi=80)
#plot_short_4x4("RF", ["naive"], ["random"], styles=["-",":","--"], colors=["tab:grey"])
plot_short_4x4("ALIF", ["piecewise"], ["anomalous"], styles=["-",":"], colors=["tab:blue"])
plot_short_4x4("BALIF", ["naive"], ["margin"], styles=["-",":","--"], colors=["tab:orange"])
plot_short_4x4("BALEIF", ["naive"], ["margin"], styles=["-",":","--"], colors=["tab:red"])

In [None]:
model_descriptions = [
    #("BALIF", "naive", "random"),
    ("ALIF", "piecewise", "anomalous"),
    #("BALIF", "naive", "anomalous"),
    ("BALIF", "naive", "margin"),    
    ("IF-AAD", "", ""),
]
plot_single(
    model_descriptions, 
    saveas="AL_algo",
    colors=[
        #"tab:grey",
        "tab:blue",
        #"tab:orange",
        "tab:green",
        "tab:red"
    ],
    #styles=["-","-",":","-",":",],
    legend=False)

## BALEIF 

### (naive vs mode vs likelihood)

In [None]:
plot_short_4x4("BALEIF", ["naive", "mode", "likelihood"], ["random"], styles=["-",":","-."], colors=["grey"])
plot_short_4x4("BALEIF", ["naive", "mode", "likelihood"], ["anomalous"], overlay=True, styles=["-",":","-."], colors=["darkorange"])
plot_short_4x4("BALEIF", ["naive", "mode", "likelihood"], ["margin"], overlay=True, styles=["-",":","-."], colors=["green"])

### boostedmargin (naive vs mode vs likelihood)

In [None]:
plot_short_4x4("BALEIF", ["naive", "mode", "likelihood"], ["boostedmargin"], styles=["-",":","-."], colors=["grey"])
plot_short_4x4("BALEIF", ["naive", "mode", "likelihood"], ["anomalous"], overlay=True, styles=["-",":","-."], colors=["darkorange"])
plot_short_4x4("BALEIF", ["naive", "mode", "likelihood"], ["margin"], overlay=True, styles=["-",":","-."], colors=["green"])

# old plots

In [None]:
plt.figure(figsize=(8, 8), dpi=80)
for i, name in tqdm(enumerate(tqdm(["squarethoroid"]))):
    plt.title(name)
    for ensamblepred in ["naive", "maxlikelihood"]:
        for querystrat in ["anomalous", "margin"]:
            prova = np.load(
                f"performance/{name}_BALEIF_query-{querystrat}_prediction-{ensamblepred}.npy",
                allow_pickle=True,
            ).item()
            plt_perf_evol(prova, label=f" BALEIF ({querystrat}, {ensamblepred})")

    for querystrat in ["anomalous", "random"]:
        prova = np.load(
            f"performance/{name}_RF_query-{querystrat}.npy", allow_pickle=True
        ).item()
        plt_perf_evol(prova, label=f" RF ({querystrat}, {ensamblepred})")

In [None]:
c = 3
plt.figure(figsize=(2 * 4 * c, 2 * 4 * 18 // c), dpi=80)
for i, name in tqdm(enumerate(tqdm(datasets.datasets_names))):
    plt.subplot(18 // c, c, i + 1)
    plt.title(name)
    if name != "cover":
        for querystrat in ["anomalous", "random"]:
            prova = np.load(
                f"performance/{name}_RF_query-{querystrat}.npy", allow_pickle=True
            ).item()
            plt_perf_evol(prova, label=f" RF ({querystrat}, {ensamblepred})")

        for ensamblepred in ["naive"]:
            for querystrat in ["anomalous"]:
                prova = np.load(
                    f"performance/{name}_BALEIF_query-{querystrat}_prediction-{ensamblepred}.npy",
                    allow_pickle=True,
                ).item()
                plt_perf_evol(prova, label=f" ALIF ({querystrat}, {ensamblepred})")

In [None]:
c = 3
plt.figure(figsize=(2 * 4 * c, 2 * 4 * 18 // c), dpi=80)
for i, name in tqdm(enumerate(tqdm(datasets.datasets_names))):
    plt.subplot(18 // c, c, i + 1)
    plt.title(name)
    if name != "cover":
        for querystrat in ["anomalous", "random"]:
            prova = np.load(
                f"performance/{name}_RF_query-{querystrat}.npy", allow_pickle=True
            ).item()
            plt_perf_evol(prova, label=f" RF ({querystrat}, {ensamblepred})")

        for ensamblepred in ["naive", "maxlikelihood"]:
            for querystrat in ["random"]:
                prova = np.load(
                    f"performance/{name}_BALIF_query-{querystrat}_prediction-{ensamblepred}.npy",
                    allow_pickle=True,
                ).item()
                plt_perf_evol(prova, label=f" BALIF ({querystrat}, {ensamblepred})")

In [None]:
c = 3
plt.figure(figsize=(2 * 4 * c, 2 * 4 * 18 // c), dpi=80)
for i, name in tqdm(enumerate(tqdm(datasets.datasets_names))):
    plt.subplot(18 // c, c, i + 1)
    plt.title(name)
    if name != "cover":
        for ensamblepred in ["naive", "maxlikelihood"]:
            for querystrat in ["anomalous", "margin"]:
                prova = np.load(
                    f"performance/{name}_BALIF_query-{querystrat}_prediction-{ensamblepred}.npy",
                    allow_pickle=True,
                ).item()
                plt_perf_evol(prova, label=f" BALIF ({querystrat}, {ensamblepred})")

In [None]:
c = 3
plt.figure(figsize=(2 * 4 * c, 2 * 4 * 18 // c), dpi=80)
for i, name in tqdm(enumerate(tqdm(datasets.datasets_names))):
    plt.subplot(18 // c, c, i + 1)
    plt.title(name)
    if name != "cover":
        for ensamblepred in ["naive", "maxlikelihood"]:
            for querystrat in ["anomalous", "margin"]:
                prova = np.load(
                    f"performance/{name}_BALEIF_query-{querystrat}_prediction-{ensamblepred}.npy",
                    allow_pickle=True,
                ).item()
                plt_perf_evol(prova, label=f" BALEIF ({querystrat}, {ensamblepred})")