In [None]:
import sys

import pandas as pd
import numpy as np
import time
import json
import sys
from pathlib import Path
import os
sys.path.append('..')
from c_trainer import bayesFS, my_ard_train, my_enet_train, my_lasso_train, fsMTS_train
import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses


data = pd.read_csv("container_fault_dataset.csv")
containers = list(data.groupby(by=["container"]).groups.keys())


### standardize data
def standardize_np(x):
    EPS = 1e-16
    x = (x - np.mean(x, 0)) / np.std(x, 0)
    return x


ard_times = []
lasso_times = []
enet_times = []
bmfs_times = []
fsMTS_times = []

with open("label.json", 'r') as fp:
    label_dict = json.load(fp)
ard_performance = {"TP": {}, "FN": {}, "FP": {}}
lasso_performance = {"TP": {}, "FN": {}, "FP": {}}
enet_performance = {"TP": {}, "FN": {}, "FP": {}}
bmfs_performance = {"TP": {}, "FN": {}, "FP": {}}
fsmts_performance = {"TP": {}, "FN": {}, "FP": {}}

# run attribution for each container fault
for container in containers:
    single_container_data = data[data["container"] == container]
    select_cols = ["cpu_util", "cpu_sys", "cpu_user", "mem_util", "mem_used", \
                   "tcp_currestab", "traffic_pkg_in", "traffic_pkg_out", \
                   "traffic_byte_in", "traffic_byte_out"]
    x = single_container_data[select_cols]
    y = single_container_data["y"]

    ard_time_s = time.time()
    ard = my_ard_train(x, y, False, "standard")
    ard_times.append(time.time() - ard_time_s)

    # run ARD regression and get the attribution score
    print(container, "ARD coef:")
    for j, metric in enumerate(select_cols):
        print(metric, ard.coef_[j])
        if label_dict[container][j] == 0 and ard.coef_[j] == 0:
            continue
        if metric not in ard_performance["TP"]:
            ard_performance["TP"][metric] = 0
        if metric not in ard_performance["FN"]:
            ard_performance["FN"][metric] = 0
        if metric not in ard_performance["FP"]:
            ard_performance["FP"][metric] = 0
        if label_dict[container][j] == 1 and abs(ard.coef_[j]) > 0:
            ard_performance["TP"][metric] += 1
        elif label_dict[container][j] == 0 and abs(ard.coef_[j]) > 0:
            ard_performance["FP"][metric] += 1
        elif label_dict[container][j] == 1 and abs(ard.coef_[j]) == 0:
            ard_performance["FN"][metric] += 1
    print()

    # run LASSO regression and get the attribution score
    lasso_time_s = time.time()
    lasso = my_lasso_train(x, y, False, "standard")
    lasso_times.append(time.time() - lasso_time_s)
    print(container, "LASSO coef:")
    for j, metric in enumerate(select_cols):
        print(metric, lasso.coef_[j])
        if label_dict[container][j] == 0 and lasso.coef_[j] == 0:
            continue
        if metric not in lasso_performance["TP"]:
            lasso_performance["TP"][metric] = 0
        if metric not in lasso_performance["FN"]:
            lasso_performance["FN"][metric] = 0
        if metric not in lasso_performance["FP"]:
            lasso_performance["FP"][metric] = 0
        if label_dict[container][j] == 1 and abs(lasso.coef_[j]) > 0:
            lasso_performance["TP"][metric] += 1
        elif label_dict[container][j] == 0 and abs(lasso.coef_[j]) > 0:
            lasso_performance["FP"][metric] += 1
        elif label_dict[container][j] == 1 and abs(lasso.coef_[j]) == 0:
            lasso_performance["FN"][metric] += 1
    print()

    # run Elastic Net regression and get the attribution score
    enet_time_s = time.time()
    enet = my_enet_train(x, y, False, "standard")
    enet_times.append(time.time() - enet_time_s)
    print(container, "Elastic Net coef:")
    for j, metric in enumerate(select_cols):
        print(metric, enet.coef_[j])
        if label_dict[container][j] == 0 and enet.coef_[j] == 0:
            continue
        if metric not in enet_performance["TP"]:
            enet_performance["TP"][metric] = 0
        if metric not in enet_performance["FN"]:
            enet_performance["FN"][metric] = 0
        if metric not in enet_performance["FP"]:
            enet_performance["FP"][metric] = 0
        if label_dict[container][j] == 1 and abs(enet.coef_[j]) > 0:
            enet_performance["TP"][metric] += 1
        elif label_dict[container][j] == 0 and abs(enet.coef_[j]) > 0:
            enet_performance["FP"][metric] += 1
        elif label_dict[container][j] == 1 and abs(enet.coef_[j]) == 0:
            enet_performance["FN"][metric] += 1
    print()

    # run BMFS and get the attribution score
    bmfs_time_s = time.time()
    BMFS = bayesFS(x, y, False, "standard")
    bmfs_times.append(time.time() - bmfs_time_s)
    print(container, "BMFS coef:")
    for j, metric in enumerate(select_cols):
        print(metric, BMFS.coef_[j])
        if label_dict[container][j] == 0 and BMFS.coef_[j] == 0:
            continue
        if metric not in bmfs_performance["TP"]:
            bmfs_performance["TP"][metric] = 0
        if metric not in bmfs_performance["FN"]:
            bmfs_performance["FN"][metric] = 0
        if metric not in bmfs_performance["FP"]:
            bmfs_performance["FP"][metric] = 0
        if label_dict[container][j] == 1 and abs(BMFS.coef_[j]) > 0:
            bmfs_performance["TP"][metric] += 1
        elif label_dict[container][j] == 0 and abs(BMFS.coef_[j]) > 0:
            bmfs_performance["FP"][metric] += 1
        elif label_dict[container][j] == 1 and abs(BMFS.coef_[j]) == 0:
            bmfs_performance["FN"][metric] += 1
    print()
    # import pdb; pdb.set_trace()

    # run fsMTS and get the attribution score
    fsMTS_time_s = time.time()
    fsMTS = fsMTS_train(x, y, "standard")
    fsMTS_times.append(time.time() - fsMTS_time_s)
    print(container, "FSMTS coef:")
    for j, metric in enumerate(select_cols):
        print(metric, fsMTS.coef_[j])
        if label_dict[container][j] == 0 and fsMTS.coef_[j] == 0:
            continue
        if metric not in fsmts_performance["TP"]:
            fsmts_performance["TP"][metric] = 0
        if metric not in fsmts_performance["FN"]:
            fsmts_performance["FN"][metric] = 0
        if metric not in fsmts_performance["FP"]:
            fsmts_performance["FP"][metric] = 0
        if label_dict[container][j] == 1 and abs(fsMTS.coef_[j]) > 0:
            fsmts_performance["TP"][metric] += 1
        elif label_dict[container][j] == 0 and abs(fsMTS.coef_[j]) > 0:
            fsmts_performance["FP"][metric] += 1
        elif label_dict[container][j] == 1 and abs(fsMTS.coef_[j]) == 0:
            fsmts_performance["FN"][metric] += 1
    print()

    print()



In [2]:
# show time complexity for each method
print("ARD Average Time", np.mean(ard_times))
print("LASSO Average Time", np.mean(lasso_times))
print("Elastic Net Average Time", np.mean(enet_times))
print("BMFS Average Time", np.mean(bmfs_times))
print("FSMTS Average Time", np.mean(fsMTS_times))
print()

# compute recall, precision, and f1 score for each method over the container fault dataset
ard_precisions = []
ard_recalls = []
ard_f1_scores = []
for metric in ard_performance["TP"]:
    ard_precisions.append(
        ard_performance["TP"][metric] / (ard_performance["TP"][metric] + ard_performance["FP"][metric]))
    ard_recalls.append(ard_performance["TP"][metric] / (ard_performance["TP"][metric] + ard_performance["FN"][metric]))
    if ard_precisions[-1] == 0 and ard_recalls[-1] == 0:
        ard_f1_scores.append(0)
        continue
    ard_f1_scores.append(2 * ard_precisions[-1] * ard_recalls[-1] / (ard_precisions[-1] + ard_recalls[-1]))
print("ARD precision, recall, and f1 score:", np.mean(ard_precisions), np.mean(ard_recalls), np.mean(ard_f1_scores))

lasso_precisions = []
lasso_recalls = []
lasso_f1_scores = []
for metric in lasso_performance["TP"]:
    lasso_precisions.append(
        lasso_performance["TP"][metric] / (lasso_performance["TP"][metric] + lasso_performance["FP"][metric]))
    lasso_recalls.append(
        lasso_performance["TP"][metric] / (lasso_performance["TP"][metric] + lasso_performance["FN"][metric]))
    if lasso_precisions[-1] == 0 and lasso_recalls[-1] == 0:
        lasso_f1_scores.append(0)
        continue
    lasso_f1_scores.append(2 * lasso_precisions[-1] * lasso_recalls[-1] / (lasso_precisions[-1] + lasso_recalls[-1]))
print("LASSO precision, recall, and f1 score:", np.mean(lasso_precisions), np.mean(lasso_recalls),
      np.mean(lasso_f1_scores))

enet_precisions = []
enet_recalls = []
enet_f1_scores = []
for metric in enet_performance["TP"]:
    enet_precisions.append(
        enet_performance["TP"][metric] / (enet_performance["TP"][metric] + enet_performance["FP"][metric]))
    enet_recalls.append(
        enet_performance["TP"][metric] / (enet_performance["TP"][metric] + enet_performance["FN"][metric]))
    if enet_precisions[-1] == 0 and enet_recalls[-1] == 0:
        enet_f1_scores.append(0)
        continue
    enet_f1_scores.append(2 * enet_precisions[-1] * enet_recalls[-1] / (enet_precisions[-1] + enet_recalls[-1]))
print("Enet precision, recall, and f1 score:", np.mean(enet_precisions), np.mean(enet_recalls), np.mean(enet_f1_scores))

bmfs_precisions = []
bmfs_recalls = []
bmfs_f1_scores = []
for metric in bmfs_performance["TP"]:
    bmfs_precisions.append(
        bmfs_performance["TP"][metric] / (bmfs_performance["TP"][metric] + bmfs_performance["FP"][metric]))
    bmfs_recalls.append(
        bmfs_performance["TP"][metric] / (bmfs_performance["TP"][metric] + bmfs_performance["FN"][metric]))
    if bmfs_precisions[-1] == 0 and bmfs_recalls[-1] == 0:
        bmfs_f1_scores.append(0)
        continue
    bmfs_f1_scores.append(2 * bmfs_precisions[-1] * bmfs_recalls[-1] / (bmfs_precisions[-1] + bmfs_recalls[-1]))
print("BMFS precision, recall, and f1 score:", np.mean(bmfs_precisions), np.mean(bmfs_recalls), np.mean(bmfs_f1_scores))

fsmts_precisions = []
fsmts_recalls = []
fsmts_f1_scores = []
for metric in fsmts_performance["TP"]:
    if fsmts_performance["TP"][metric] == 0:
        fsmts_precisions.append(0)
        fsmts_recalls.append(0)
    else:
        fsmts_precisions.append(
            fsmts_performance["TP"][metric] / (fsmts_performance["TP"][metric] + fsmts_performance["FP"][metric]))
        fsmts_recalls.append(
            fsmts_performance["TP"][metric] / (fsmts_performance["TP"][metric] + fsmts_performance["FN"][metric]))
    if fsmts_precisions[-1] == 0 and fsmts_recalls[-1] == 0:
        fsmts_f1_scores.append(0)
        continue
    fsmts_f1_scores.append(2 * fsmts_precisions[-1] * fsmts_recalls[-1] / (fsmts_precisions[-1] + fsmts_recalls[-1]))
print("fsMTS precision, recall, and f1 score:", np.mean(fsmts_precisions), np.mean(fsmts_recalls),
      np.mean(fsmts_f1_scores))

ARD Average Time 0.40609402656555177
LASSO Average Time 0.5600771474838256
Elastic Net Average Time 0.5998151302337646
BMFS Average Time 0.20007145166397095
FSMTS Average Time 0.89454594373703

ARD precision, recall, and f1 score: 0.9803665158371041 0.4783908045977011 0.6266195620660145
LASSO precision, recall, and f1 score: 0.5213130277622099 0.5090700104493209 0.47434082434082436
Enet precision, recall, and f1 score: 0.5528071087409323 0.7352246603970742 0.5933715607127658
BMFS precision, recall, and f1 score: 0.9134323305212545 0.8145977011494253 0.854825938969767
fsMTS precision, recall, and f1 score: 0.5201816405998592 0.4808986415882968 0.4585272851890423
