Run with the following conda environment: `../../conda_envs/training_env`

In [None]:
# Example model to explore
model_folder = "../../results/5_pk/tcp_udp/initial5min_pruning+feat_selection+quantization_0dryrun_caida/"

In [None]:
is_pheavy = True if "pheavy" in model_folder else False
pheavy_npk = 5
train_minutes = 5 # Training on minutes 0-4, testing on minutes 5-59.
update_id = 0

In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pickle
from pprint import pprint
import matplotlib.pyplot as plt
import json
from statistics import fmean
from sklearn.tree import plot_tree

In [None]:
!ls -l $model_folder

In [None]:
with open(model_folder + "args_train_val_continual_voting_pipeline.json") as f:
    args = json.load(f)

# Explore run args
pprint(list(args.keys()))

In [None]:
# Model

if "_caida" in model_folder:
    minute = "134500"
if "_mawi" in model_folder:
    minute = "1915"
if "_uni" in model_folder:
    minute = "145"

if is_pheavy:
    with open(model_folder + "model_pheavy.pkl", "rb") as f:
        model = pickle.load(f)
else:
    with open(model_folder + f"cl_pipeline_{minute}.pkl", "rb") as f: # Updated model
        model = pickle.load(f)

# Sizes (for pHeavy, the sizes are saved in the model pickle file)
if not is_pheavy:
    with open(model_folder + f"cl_pipeline_{minute}_sizes.pkl", "rb") as f:
        sizes_and_thr = pickle.load(f)

# Metrics
with open(model_folder + "minute_APscore_initial_vs_CL.pkl", "rb") as f:
    scores = pickle.load(f)

In [None]:
# Explore scores recorded during testing
pprint(list(scores.keys()))

In [None]:
if not is_pheavy:
    if "_none" in model_folder:
        with open("../training/params/feature_names_5pk.txt") as file:
            feature_names = [line.rstrip() for line in file]
    else:
        feature_names = None
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10,5))
    plot_tree(model["model"][0], max_depth=3, ax=ax, feature_names=feature_names)
    plt.show()

In [None]:
if not is_pheavy:
    print(f"ML pipeline steps: {list(model.named_steps.keys())}")
    print(f"Random forest max depth: {model['model'].max_depth}")
    print(f"Random forest ntrees (after pruning): {len(model['model'].estimators_)}")
    pprint(sizes_and_thr)
else:
    for k, v in model.items():
        print(f"pHeavy at {k} packets: {v['size']} KB")

In [None]:
ax = plt.subplot(1,1,1)

if is_pheavy:
    ax.plot(
        range(train_minutes, len([minute[5] for minute in scores["initial_model_AP"]]) + train_minutes), 
        [minute[5] for minute in scores["initial_model_AP"]], 
        label=f"pHeavy AP (5 pk)"
    )
    ax.plot(
        range(train_minutes, len([minute[5] for minute in scores["initial_model_F1"]]) + train_minutes),
        [minute[5] for minute in scores["initial_model_F1"]], 
        label=f"pHeavy F1 (5 pk)"
    )
    ax.plot(
        range(train_minutes, len([minute[pheavy_npk] for minute in scores["initial_model_AP"]]) + train_minutes), 
        [minute[pheavy_npk] for minute in scores["initial_model_AP"]], 
        label=f"pHeavy AP ({pheavy_npk} pk)"
    )
    ax.plot(
        range(train_minutes, len([minute[pheavy_npk] for minute in scores["initial_model_F1"]]) + train_minutes),
        [minute[pheavy_npk] for minute in scores["initial_model_F1"]], 
        label=f"pHeavy F1 ({pheavy_npk} pk)"
    )
else:
    ax.plot(
        range(train_minutes, len(scores["initial_model_AP"]) + train_minutes), 
        scores["initial_model_AP"], 
        label="initial_model_AP"
    )
    ax.plot(
        range(train_minutes, len(scores["cl_model_AP"]) + train_minutes), 
        scores["cl_model_AP"], 
        label="cl_model_AP"
    )
    ax.plot(
        range(train_minutes, len(scores["initial_model_F1"]) + train_minutes), 
        scores["initial_model_F1"], 
        label="initial_model_F1"
    )
    ax.plot(
        range(train_minutes, len(scores["cl_model_F1"]) + train_minutes), 
        scores["cl_model_F1"], 
        label="cl_model_F1"
    )

ax.set_ylim(0, 1)
plt.legend()