Run with the following conda environment: `../../conda_envs/training_env`

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pylab as pylab
from pprint import pprint
import pickle
import matplotlib
import matplotlib.transforms as mtrans
import matplotlib.ticker as mtick
from copy import deepcopy
import json
import os
import sys
module_path = os.path.abspath(os.path.join('../../models/utils/'))
if module_path not in sys.path:
    sys.path.append(module_path)
from plots import full_extent
from matplotlib.transforms import Bbox

In [None]:
n_pk = 5

folder = "../../results/"
train_minutes = 5
caida_minutes = 50
mawi_minutes = 55
update_freq = 10 

paths = [
    f"{folder}/5_pk/tcp_udp/initial5min_continual250KMbuffer10min_addinit_active+random_pruning+feat_selection+quantization_updatethrsimu_0dryrun_driftdetect_caidamawi/",
    f"{folder}/5_pk/tcp_udp/initial5min_pruning+feat_selection+quantization_0dryrun_mawi/",
]

paths_all = [
    f"{folder}/5_pk/all_proto/initial5min_continual250KMbuffer10min_addinit_active+random_pruning+feat_selection+quantization_updatethrsimu_0dryrun_driftdetect_caidamawi/",
    f"{folder}/5_pk/all_proto/initial5min_pruning+feat_selection+quantization_0dryrun_mawi/",
]

In [None]:
params = {
    'legend.fontsize': 14,
    'legend.title_fontsize': 16,
    'axes.labelsize': 20,
    'axes.titlesize': 22,
    'xtick.labelsize': 18,
    'ytick.labelsize': 18,
    'figure.titlesize': 25,
}
plt.rc('font', size=16)
plt.rc('pdf', fonttype=42)
pylab.rcParams.update(params)
palette = plt.rcParams['axes.prop_cycle'].by_key()['color']

Load metrics

In [None]:
metrics = {}
for path in paths:
    with open(f"{path}minute_APscore_initial_vs_CL.pkl", "rb") as f:
        metrics[path] = pickle.load(f)

metrics_all = {}
for path in paths_all:
    with open(f"{path}minute_APscore_initial_vs_CL.pkl", "rb") as f:
        metrics_all[path] = pickle.load(f)

# All protocols

In [None]:
fig, (ax_ap, ax_f1) = plt.subplots(nrows=2, ncols=1, figsize=(6, 9)) 

# Initial model
initial_model_path = paths_all[0]
initial_scores_ap = metrics_all[initial_model_path]["initial_model_AP"]
initial_scores_f1 = metrics_all[initial_model_path]["initial_model_F1"]
ax_ap.plot(
    range(train_minutes, train_minutes + len(initial_scores_ap)), 
    initial_scores_ap, 
    label="model CAIDA", 
    c="darkgreen",
    lw=2,
    ls="dashed",
)
ax_f1.plot(
    range(train_minutes, train_minutes + len(initial_scores_f1)), 
    initial_scores_f1, 
    label="static model CAIDA", 
    c="black",
    lw=2,
    ls="dashed",
)

for i, path in enumerate(paths_all):
    print(path)
    end_path = path.split("/")[-2]
    name = "_".join(end_path.split("_")[1:-1]).replace("continual", "")
    name = "model MAWI" if "_mawi" in path else f"active model"

    scores_ap = metrics_all[path]["cl_model_AP"]
    start = (train_minutes + caida_minutes) if "_mawi" in path else train_minutes 
    markevery = range(0) if "_mawi" in path else range(update_freq + 1, len(scores_ap), update_freq)
    lw = 2 if "_mawi" in path else 2
    marker = None if "_mawi" in path else "^"
    ls = "dashed" if "_mawi" in path else "-"
    ax_ap.plot(
        range(start, start + len(scores_ap)), 
        scores_ap, 
        label=name , 
        marker=marker, markersize=10, markevery=markevery,
        c="red" if "_mawi" in path else palette[i], 
        lw=lw,
        ls=ls,
    )

    scores_f1 = metrics_all[path]["cl_model_F1"]
    ax_f1.plot(
        range(start, start + len(scores_f1)), 
        scores_f1, 
        label=name, 
        marker=marker, markersize=10, markevery=markevery,
        c="red" if "_mawi" in path else palette[i], 
        lw=lw,
        ls=ls,
    )

    if not "_mawi" in path:
        fns = [conf_mat[1][0] for conf_mat in metrics_all[path]["cl_model_conf_mats"]]
        tns = [conf_mat[0][0] for conf_mat in metrics_all[path]["cl_model_conf_mats"]]
        tps = [conf_mat[1][1] for conf_mat in metrics_all[path]["cl_model_conf_mats"]]
        fps = [conf_mat[0][1] for conf_mat in metrics_all[path]["cl_model_conf_mats"]]
        eleph_mispred = [fn / (tp + fn) for (fn, tp, fn) in zip(fns, tps, fns)]
        mice_mispred = [fp / (tn + fp) for (fp, tn, fp) in zip(fps, tns, fps)]
        ax_ap.plot(
            range(start, start + len(scores_f1)), 
            eleph_mispred, 
            label="FNR", c="grey", lw=1, ls="dotted",
        )

        ax_f1.plot(
            range(start, start + len(scores_f1)), 
            eleph_mispred, 
            label="mispredicted elephants", c="grey", lw=1, ls="dotted",
        )

    # CL model sampling rate
    ax_f1.plot(
        metrics_all[path]["cl_model_sampling"], 
        label=f"Sampling rate", 
        marker=None, 
        c=palette[i], 
        ls="-."
    )

ax_ap.axvspan(0, train_minutes + caida_minutes, alpha=0.1, color="green")
ax_ap.axvspan(train_minutes + caida_minutes, train_minutes + caida_minutes + mawi_minutes, alpha=0.1, color="red")
ax_ap.set_xlim(left=train_minutes, right=train_minutes + caida_minutes + mawi_minutes)
my_xticks = ['13:15', '13:35', 'Break', '19:00', '19:20']
ax_ap.set_xticks([15, 35, 55, 75, 95], my_xticks, rotation=0)
ax_ap.set_ylabel('AP score')
ax_ap.set_ylim(bottom=0., top=1.)
ax_ap.set_xlim(left=0, right=train_minutes + caida_minutes + mawi_minutes)
ax_ap.set_xlabel(f'CAIDA               MAWI  ')

handles, labels = ax_ap.get_legend_handles_labels()
order = [0, 3]
legend1 = ax_ap.legend([handles[idx] for idx in order], [labels[idx] for idx in order], loc="upper left", labelspacing=0.25, ncol=1, framealpha=0.)
ax_ap.add_artist(legend1)
order = [1, 2]
legend2 = ax_ap.legend([handles[idx] for idx in order], [labels[idx] for idx in order], loc="lower left", labelspacing=0.25, ncol=1, framealpha=0.)
ax_ap.add_artist(legend2)

ax_f1.axvspan(0, train_minutes + caida_minutes, alpha=0.1, color="green")
ax_f1.axvspan(train_minutes + caida_minutes, train_minutes + caida_minutes + mawi_minutes, alpha=0.1, color="red")
ax_f1.set_xlim(left=train_minutes, right=train_minutes + caida_minutes + mawi_minutes)
my_xticks = ['13h05','13h15', '13h25', '13h35',  '13h45', 'Break', '18h50', '19h00', '19h10', '19h20', '19h30']
ax_f1.set_xticks([5, 15, 25, 35, 45, 55, 65, 75, 85, 95, 105], my_xticks, rotation=45)
ax_f1.set_ylabel('F1 score')
ax_f1.set_ylim(bottom=0., top=1.)
ax_f1.set_xlabel(f'CAIDA                                               MAWI  ')
handles, labels = ax_f1.get_legend_handles_labels()
order = [0, 3, 2, 1]

plt.tight_layout()
# Getting only the axes specified by ax[0,0] . From https://stackoverflow.com/questions/70660092/saving-a-plot-from-multiple-subplots
extent = full_extent(ax_ap).transformed(fig.dpi_scale_trans.inverted())
plt.savefig(f'./out/10Dapp_model_update_ap.pdf', bbox_inches=extent, format='pdf', dpi=1200)

plt.show()

print("TCP+UDP+ICMP")

# TCP + UDP

In [None]:
fig, (ax_ap, ax_f1) = plt.subplots(nrows=2, ncols=1, figsize=(6, 9)) 

# Initial model
initial_model_path = paths[0]
initial_scores_ap = metrics[initial_model_path]["initial_model_AP"]
initial_scores_f1 = metrics[initial_model_path]["initial_model_F1"]
ax_ap.plot(
    range(train_minutes, train_minutes + len(initial_scores_ap)), 
    initial_scores_ap, 
    label="model CAIDA", 
    c="darkgreen",
    lw=2,
    ls="dashed",
)
ax_f1.plot(
    range(train_minutes, train_minutes + len(initial_scores_f1)), 
    initial_scores_f1, 
    label="static model CAIDA", 
    c="black",
    lw=2,
    ls="dashed",
)

for i, path in enumerate(paths):
    print(path)
    end_path = path.split("/")[-2]
    name = "_".join(end_path.split("_")[1:-1]).replace("continual", "")
    name = "model MAWI" if "_mawi" in path else f"active model"

    scores_ap = metrics[path]["cl_model_AP"]
    start = (train_minutes + caida_minutes) if "_mawi" in path else train_minutes 
    markevery = range(0) if "_mawi" in path else range(update_freq + 1, len(scores_ap), update_freq)
    lw = 2 if "_mawi" in path else 2
    marker = None if "_mawi" in path else "^"
    ls = "dashed" if "_mawi" in path else "-"
    ax_ap.plot(
        range(start, start + len(scores_ap)), 
        scores_ap, 
        label=name , 
        marker=marker, markersize=10, markevery=markevery,
        c="red" if "_mawi" in path else palette[i], 
        lw=lw,
        ls=ls,
    )

    scores_f1 = metrics[path]["cl_model_F1"]
    ax_f1.plot(
        range(start, start + len(scores_f1)), 
        scores_f1, 
        label=name, 
        marker=marker, markersize=10, markevery=markevery,
        c="red" if "_mawi" in path else palette[i], 
        lw=lw,
        ls=ls,
    )

    if not "_mawi" in path:
        fns = [conf_mat[1][0] for conf_mat in metrics[path]["cl_model_conf_mats"]]
        tns = [conf_mat[0][0] for conf_mat in metrics[path]["cl_model_conf_mats"]]
        tps = [conf_mat[1][1] for conf_mat in metrics[path]["cl_model_conf_mats"]]
        fps = [conf_mat[0][1] for conf_mat in metrics[path]["cl_model_conf_mats"]]
        eleph_mispred = [fn / (tp + fn) for (fn, tp, fn) in zip(fns, tps, fns)]
        mice_mispred = [fp / (tn + fp) for (fp, tn, fp) in zip(fps, tns, fps)]
        ax_ap.plot(
            range(start, start + len(scores_f1)), 
            eleph_mispred, 
            label="FNR", c="grey", lw=1, ls="dotted",
        )

        ax_f1.plot(
            range(start, start + len(scores_f1)), 
            eleph_mispred, 
            label="mispredicted elephants", c="grey", lw=1, ls="dotted",
        )

    # CL model sampling rate
    ax_f1.plot(
        metrics[path]["cl_model_sampling"], 
        label=f"Sampling rate", 
        marker=None, 
        c=palette[i], 
        ls="-."
    )

ax_ap.axvspan(0, train_minutes + caida_minutes, alpha=0.1, color="green")
ax_ap.axvspan(train_minutes + caida_minutes, train_minutes + caida_minutes + mawi_minutes, alpha=0.1, color="red")
ax_ap.set_xlim(left=train_minutes, right=train_minutes + caida_minutes + mawi_minutes)
my_xticks = ['13:15', '13:35', 'Break', '19:00', '19:20']
ax_ap.set_xticks([15, 35, 55, 75, 95], my_xticks, rotation=0)
ax_ap.set_ylabel('AP score')
ax_ap.set_ylim(bottom=0., top=1.)
ax_ap.set_xlim(left=0, right=train_minutes + caida_minutes + mawi_minutes)
ax_ap.set_xlabel(f'CAIDA               MAWI  ')
handles, labels = ax_ap.get_legend_handles_labels()
order = [0, 3]
legend1 = ax_ap.legend([handles[idx] for idx in order], [labels[idx] for idx in order], loc="upper left", labelspacing=0.25, ncol=1, framealpha=0.)
ax_ap.add_artist(legend1)
order = [1, 2]
legend2 = ax_ap.legend([handles[idx] for idx in order], [labels[idx] for idx in order], loc="lower left", labelspacing=0.25, ncol=1, framealpha=0.)
ax_ap.add_artist(legend2)

ax_f1.axvspan(0, train_minutes + caida_minutes, alpha=0.1, color="green")
ax_f1.axvspan(train_minutes + caida_minutes, train_minutes + caida_minutes + mawi_minutes, alpha=0.1, color="red")
ax_f1.set_xlim(left=train_minutes, right=train_minutes + caida_minutes + mawi_minutes)
my_xticks = ['13h05','13h15', '13h25', '13h35',  '13h45', 'Break', '18h50', '19h00', '19h10', '19h20', '19h30']
ax_f1.set_xticks([5, 15, 25, 35, 45, 55, 65, 75, 85, 95, 105], my_xticks, rotation=45)
ax_f1.set_ylabel('F1 score')
ax_f1.set_ylim(bottom=0., top=1.)
ax_f1.set_xlabel(f'CAIDA                                               MAWI  ')
handles, labels = ax_f1.get_legend_handles_labels()
order = [0, 3, 2, 1]

plt.tight_layout()
# Getting only the axes specified by ax[0,0] . From https://stackoverflow.com/questions/70660092/saving-a-plot-from-multiple-subplots
extent = full_extent(ax_ap).transformed(fig.dpi_scale_trans.inverted())
plt.savefig(f'./out/04c_model_update_ap.pdf', bbox_inches=extent, format='pdf', dpi=1200)

plt.show()