In [64]:
import joblib
import pandas as pd
from src.utils import load_data_label_vector

In [65]:
from sklearn.metrics import confusion_matrix


def check_performance(workspace, adv, testing):
    workspace_fullpath = f"/app/wafcraft/data/prepared/{workspace}"
    model_dir = "model_adv" if adv else "model"
    model = joblib.load(f"{workspace_fullpath}/{model_dir}/{model_dir}.joblib")
    threshold = float(
        open(f'{workspace_fullpath}/{model_dir}/threshold{"_adv" if adv else ""}.txt', "r").read()
    )
    # print(f"Threshold: {threshold}")
    # print(f"Model: {model}")

    my_test = load_data_label_vector(f"{workspace_fullpath}/test.csv")
    other_test = load_data_label_vector(f"/app/wafcraft/data/prepared/2024-04-07_18-15-53_brown-lot/test.csv")
    my_test_adv = load_data_label_vector(f"{workspace_fullpath}/test_adv.csv")

    # drop columns that are not vector or label
    my_test = my_test[["vector", "label"]]
    other_test = other_test[["vector", "label"]]
    my_test_adv = my_test_adv[["vector", "label"]]
    
    if testing == "my_all":
        test = pd.concat([my_test, my_test_adv])
    elif testing == "my_test":
        test = my_test
    elif testing == "my_test-adv":
        my_test_benign = my_test[my_test["label"] == 0]
        test = pd.concat([my_test_adv, my_test_benign])
        # test = my_test_adv
    elif testing == "other_test":
        test = other_test
    else:
        raise ValueError("Invalid testing option")
    
    print(f"Testing: {testing}")
    print(f"Test shape: {test.shape}")
    
    X_test, y_test = list(test["vector"]), test["label"]
    probabilities = model.predict_proba(X_test)[:, 1]
    adjusted_predictions = (probabilities >= threshold).astype(int)
    cm = confusion_matrix(y_test, adjusted_predictions)
    return cm

In [66]:
workspaces = [
    "2024-04-18_14-12-51_lightblue-around",
    "2024-05-10_15-03-09_darkred-number",
    "2024-04-22_11-20-36_yellow-majority",
    "2024-05-10_23-07-35_beige-western",
    "2024-04-23_05-02-11_cadetblue-right",
    "2024-04-08_21-57-36_greenyellow-fear",
    "2024-05-11_07-05-20_honeydew-check",
    "2024-04-23_02-58-14_blanchedalmond-table",
    "2024-04-22_18-57-13_darkslateblue-air",
    "2024-05-11_14-57-56_darkgray-general",
]

testing = "other_test"

all_fprs = []
all_tprs = []

for workspace in workspaces:
    cm = check_performance(workspace, adv=True, testing=testing)
    print(f"CM: {cm}")
    fpr = cm[0][1] / (cm[0][1] + cm[0][0])
    tpr = cm[1][1] / (cm[1][1] + cm[1][0])
    all_fprs.append(fpr)
    all_tprs.append(tpr)

print(f"Average FPR: {sum(all_fprs) / len(all_fprs)}")
print(f"Average TPR: {sum(all_tprs) / len(all_tprs)}")

Testing: other_test
Test shape: (4000, 2)
CM: [[1981   19]
 [  79 1921]]
Testing: other_test
Test shape: (4000, 2)
CM: [[1980   20]
 [  69 1931]]
Testing: other_test
Test shape: (4000, 2)
CM: [[1981   19]
 [  77 1923]]
Testing: other_test
Test shape: (4000, 2)
CM: [[1971   29]
 [  45 1955]]
Testing: other_test
Test shape: (4000, 2)
CM: [[1984   16]
 [  82 1918]]
Testing: other_test
Test shape: (4000, 2)
CM: [[1985   15]
 [  92 1908]]
Testing: other_test
Test shape: (4000, 2)
CM: [[1984   16]
 [  88 1912]]
Testing: other_test
Test shape: (4000, 2)
CM: [[1977   23]
 [  74 1926]]
Testing: other_test
Test shape: (4000, 2)
CM: [[1980   20]
 [  91 1909]]
Testing: other_test
Test shape: (4000, 2)
CM: [[1972   28]
 [  45 1955]]
Average FPR: 0.010249999999999999
Average TPR: 0.9629000000000001
