In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    precision_recall_fscore_support,
    precision_recall_curve,
    roc_curve,
    roc_auc_score,
)

In [None]:
# Set up LaTeX rendering
plt.rcParams["text.usetex"] = True
# Load data


packet_sizes = {
    "zigbee": 127 * 8,
    "bluetooth": 258 * 8,
    "wifi": 2304 * 8,
}
PROTOCOL = "zigbee"
df_packets = pd.read_parquet(f"data\\packets_{PROTOCOL}_{packet_sizes[PROTOCOL]}.parquet")
df_packets["label"] = (df_packets["label"] // 2).astype(int)

# Define feature sets
X_a = df_packets[["Z_I", "Z_Q"]]
X_b = df_packets[["Z_I", "Z_Q", "IQ_offset", "magnitude_error", "phase_error"]]
y = df_packets["label"]


# Prepare storage
def initialize_metrics():
    return {
        "accuracy": [],
        "precision": [],
        "recall": [],
        "f1": [],
        "prec_curves": [],
        "rec_curves": [],
        "fpr_curves": [],
        "tpr_curves": [],
        "auc_roc": [],
    }


results_a = initialize_metrics()
results_b = initialize_metrics()


# Function to evaluate and collect results
def evaluate_models(X, results_store, label="Set"):
    recall_range = np.linspace(0, 1, 100)
    fpr_range = np.linspace(0, 1, 100)
    precision_interp_total = np.zeros_like(recall_range)
    tpr_interp_total = np.zeros_like(fpr_range)

    for i in range(6):
        y_current = np.where(y == i, 1, 0)
        X_train, X_test, y_train, y_test = train_test_split(X, y_current, test_size=0.2)

        model = svm.LinearSVC(class_weight="balanced", max_iter=10000)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_scores = model.decision_function(X_test)

        # Metrics
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred)
        rec = recall_score(y_test, y_pred)
        f1 = precision_recall_fscore_support(y_test, y_pred, average="binary")[2]
        fpr, tpr, _ = roc_curve(y_test, y_scores)
        roc_auc = roc_auc_score(y_test, y_scores)
        precision_curve, recall_curve, _ = precision_recall_curve(y_test, y_scores)

        # Save
        results_store["accuracy"].append(acc)
        results_store["precision"].append(prec)
        results_store["recall"].append(rec)
        results_store["f1"].append(f1)
        results_store["fpr_curves"].append(fpr)
        results_store["tpr_curves"].append(tpr)
        results_store["prec_curves"].append(precision_curve)
        results_store["rec_curves"].append(recall_curve)
        results_store["auc_roc"].append(roc_auc)

        # Interpolated curves for averaging
        precision_interp_total += np.interp(
            recall_range, recall_curve[::-1], precision_curve[::-1]
        )
        tpr_interp_total += np.interp(fpr_range, fpr, tpr)

    # Compute average
    results_store["recall_range"] = recall_range
    results_store["fpr_range"] = fpr_range
    results_store["avg_precision"] = precision_interp_total / 6
    results_store["avg_tpr"] = tpr_interp_total / 6


# Run evaluations
evaluate_models(X_a, results_a, label="A")
evaluate_models(X_b, results_b, label="B")

# ----------------- Figure 1: Precision-Recall -----------------
fig1, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

for i in range(6):
    ax1.plot(
        results_a["rec_curves"][i], results_a["prec_curves"][i], label=f"Label {i}"
    )
    ax2.plot(
        results_b["rec_curves"][i], results_b["prec_curves"][i], label=f"Label {i}"
    )

ax1.plot(
    results_a["recall_range"],
    results_a["avg_precision"],
    label="Avg",
    color="black",
    linestyle="--",
    linewidth=2,
)
ax2.plot(
    results_b["recall_range"],
    results_b["avg_precision"],
    label="Avg",
    color="black",
    linestyle="--",
    linewidth=2,
)

ax1.set_title(r"Precision-Recall Curves (Model Set A - $Z_I$, $Z_Q$)")
ax2.set_title(r"Precision-Recall Curves (Model Set B - Full Features)")
for ax in [ax1, ax2]:
    ax.set_xlabel("Recall")
    ax.set_ylabel("Precision")
    ax.grid(True)
    ax.legend()

plt.tight_layout()
plt.savefig("figures\\PR_curves_2.png")
plt.show()

# ----------------- Figure 2: ROC Curves -----------------
fig2, (ax3, ax4) = plt.subplots(1, 2, figsize=(14, 6))

for i in range(6):
    ax3.plot(
        results_a["fpr_curves"][i],
        results_a["tpr_curves"][i],
        label=f"Label {i} (AUC={results_a['auc_roc'][i]:.2f})",
    )
    ax4.plot(
        results_b["fpr_curves"][i],
        results_b["tpr_curves"][i],
        label=f"Label {i} (AUC={results_b['auc_roc'][i]:.2f})",
    )

ax3.plot(
    results_a["fpr_range"],
    results_a["avg_tpr"],
    label="Avg",
    color="black",
    linestyle="--",
    linewidth=2,
)
ax4.plot(
    results_b["fpr_range"],
    results_b["avg_tpr"],
    label="Avg",
    color="black",
    linestyle="--",
    linewidth=2,
)

ax3.set_title(r"ROC Curves (Model Set A - $Z_I$, $Z_Q$)")
ax4.set_title(r"ROC Curves (Model Set B - Full Features)")
for ax in [ax3, ax4]:
    ax.set_xlabel("False Positive Rate")
    ax.set_ylabel("True Positive Rate")
    ax.grid(True)
    ax.legend()


df_results_a = pd.DataFrame(results_a, index=range(6), columns=["accuracy", "precision", "recall", "f1", "auc_roc"])
df_results_b = pd.DataFrame(
    results_b, columns=["accuracy", "precision", "recall", "f1", "auc_roc"], index=range(6)
)

df_results_a = df_results_a.round(3)
df_results_b = df_results_b.round(3)

df_results_a.to_latex("tables\\results_2a.tex")
df_results_b.to_latex("tables\\results_2b.tex")


plt.tight_layout()
plt.savefig("figures\\ROC_curves_2.png")
plt.show()