In [1]:
import numpy as np
import pandas as pd
import copy
import warnings
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)

# --- CONFIGURATION ---
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

DATA_PATH = "../data/processed/cicids_10pct_pruned.csv"
NUM_CLIENTS = 10
ALPHA = 0.1             # Strong Non-IID
ROUNDS = 20
SERVER_LR = 0.8         # For FedAvgM
MALICIOUS_FRACTION = 0.2 

# %% [1] DATA LOADING & PREPROCESSING
df = pd.read_csv(DATA_PATH)
df["target"] = (df["Label"] != "BENIGN").astype(int)

train_files = ["Monday-WorkingHours.pcap_ISCX.csv", "Tuesday-WorkingHours.pcap_ISCX.csv",
               "Wednesday-workingHours.pcap_ISCX.csv", "Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv",
               "Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv"]
test_files = ["Friday-WorkingHours-Morning.pcap_ISCX.csv", "Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv",
              "Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv"]

df_train_raw = df[df["source_file"].isin(train_files)].copy()
df_test_raw  = df[df["source_file"].isin(test_files)].copy()

META_COLS = ["Label", "label_bin", "label_binary", "source_file", "day", "attack_group", "target"]
X_train_all = df_train_raw.drop(columns=META_COLS).select_dtypes(include=[np.number]).replace([np.inf, -np.inf], np.nan)
y_train_all = df_train_raw["target"]
X_test_all  = df_test_raw.drop(columns=META_COLS).select_dtypes(include=[np.number]).replace([np.inf, -np.inf], np.nan)
y_test = df_test_raw["target"]

# --- CRITICAL: VALIDATION SPLIT FOR CALIBRATION ---
# We reserve 15% of Mon-Thu data for threshold tuning. This data is NEVER seen by clients.
X_train_pool, X_val_raw, y_train_pool, y_val = train_test_split(
    X_train_all, y_train_all, test_size=0.15, random_state=RANDOM_STATE, stratify=y_train_all
)

pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler()),
])

X_train_scaled = pipeline.fit_transform(X_train_pool)
X_val          = pipeline.transform(X_val_raw)
X_test         = pipeline.transform(X_test_all)

# %% [2] FL COMPONENTS (Logic-Flipping & Aggregators)
def split_clients_dirichlet(X, y, num_clients, alpha=0.1):
    num_classes = len(np.unique(y))
    class_indices = {c: np.where(y == c)[0] for c in range(num_classes)}
    client_indices = [[] for _ in range(num_clients)]
    for c in range(num_classes):
        np.random.shuffle(class_indices[c])
        proportions = np.random.dirichlet(alpha * np.ones(num_clients))
        proportions = (np.cumsum(proportions) * len(class_indices[c])).astype(int)[:-1]
        splits = np.split(class_indices[c], proportions)
        for i in range(num_clients): client_indices[i].extend(splits[i])
    return [(X[idxs], y.iloc[idxs].values) for idxs in client_indices if len(idxs) > 0]

def apply_label_flipping(clients, malicious_indices):
    poisoned = []
    for i, (X, y) in enumerate(clients):
        y_flip = (1 - y) if i in malicious_indices else y
        poisoned.append((X, y_flip))
    return poisoned

def aggregate_median(client_models):
    all_coefs = np.stack([m.coef_ for m in client_models])
    all_ints = np.stack([m.intercept_ for m in client_models])
    return np.median(all_coefs, axis=0), np.median(all_ints, axis=0)

def init_model():
    return LogisticRegression(max_iter=10, solver="lbfgs", warm_start=True, class_weight='balanced', random_state=RANDOM_STATE)

def local_train(global_model, X_local, y_local):
    if len(np.unique(y_local)) < 2: return None
    model = copy.deepcopy(global_model)
    model.fit(X_local, y_local)
    return model

# %% [3] CALIBRATION LOGIC
def find_optimal_threshold(model, X_v, y_v):
    """Sweeps thresholds to maximize F1 on validation set."""
    y_probs = model.predict_proba(X_v)[:, 1]
    best_f1, best_tau = 0, 0.5
    for tau in np.arange(0.05, 0.96, 0.01):
        f1 = f1_score(y_v, (y_probs >= tau).astype(int))
        if f1 > best_f1:
            best_f1, best_tau = f1, tau
    return best_tau

# %% [4] EXPERIMENT EXECUTION
clients_list = split_clients_dirichlet(X_train_scaled, y_train_pool, NUM_CLIENTS, alpha=ALPHA)
mal_idx = np.random.choice(len(clients_list), int(MALICIOUS_FRACTION * len(clients_list)), replace=False)
poisoned_clients = apply_label_flipping(clients_list, mal_idx)

results = []
for tech in ['fedavg', 'fedavg_m', 'median']:
    print(f"Training {tech}...")
    global_model = init_model()
    # Initial warm start on 1000 samples from the pool
    global_model.fit(X_train_scaled[:1000], y_train_pool.iloc[:1000])

    for rnd in range(ROUNDS):
        c_models = [local_train(global_model, xc, yc) for xc, yc in poisoned_clients]
        c_models = [m for m in c_models if m is not None]
        
        if tech == 'fedavg':
            global_model.coef_ = np.mean(np.stack([m.coef_ for m in c_models]), axis=0)
            global_model.intercept_ = np.mean(np.stack([m.intercept_ for m in c_models]), axis=0)
        elif tech == 'fedavg_m':
            avg_c = np.mean(np.stack([m.coef_ for m in c_models]), axis=0)
            avg_i = np.mean(np.stack([m.intercept_ for m in c_models]), axis=0)
            global_model.coef_ = (1-SERVER_LR)*global_model.coef_ + SERVER_LR*avg_c
            global_model.intercept_ = (1-SERVER_LR)*global_model.intercept_ + SERVER_LR*avg_i
        elif tech == 'median':
            global_model.coef_, global_model.intercept_ = aggregate_median(c_models)

    # Threshold Calibration
    tau_star = find_optimal_threshold(global_model, X_val, y_val)
    
    # Final Test Set Evaluation
    y_probs_test = global_model.predict_proba(X_test)[:, 1]
    auc = roc_auc_score(y_test, y_probs_test)
    f1_05 = f1_score(y_test, (y_probs_test >= 0.5).astype(int))
    f1_cal = f1_score(y_test, (y_probs_test >= tau_star).astype(int))
    
    results.append({
        "Method": tech,
        "AUC": auc,
        "F1 (Default 0.5)": f1_05,
        "F1 (Calibrated)": f1_cal,
        "Tau*": tau_star
    })

# %% [5] SUMMARY
res_df = pd.DataFrame(results)
print("\n--- Calibration Results (20% Byzantine Attack) ---")
print(res_df.to_string(index=False))

Training fedavg...
Training fedavg_m...
Training median...

--- Calibration Results (20% Byzantine Attack) ---
  Method      AUC  F1 (Default 0.5)  F1 (Calibrated)  Tau*
  fedavg 0.745410          0.488780         0.563538  0.90
fedavg_m 0.765809          0.500204         0.566576  0.90
  median 0.709772          0.603461         0.519902  0.95
