In [None]:
%reload_ext autoreload
%autoreload 2

### 1. Import libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

### 2. Import dataset

In [None]:
import os
import sys

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
os.chdir(project_root)

from src.dataloader.unified_dataloader import load_dataset
original, processed = load_dataset('compas4multirace')

### 3. Split label/unlabel data, split train/test data.

In [None]:
'''COMPAS for multi race research'''
df = processed.copy()
X = df.drop('two_year_recid', axis=1)
y = df['two_year_recid']

In [None]:
model = XGBClassifier()

### 4. Train model

In [None]:
from experiments.five_fold_cross_validation import cross_validate

model = XGBClassifier()
cross_validate(
    model=model, 
    dataset_name='compas4multirace', 
    X=X, y=y, 
    num_folds=5, 
    matching_method='NN', threshold=0.05)

Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)


-------------------------------------
-------------1th fold----------------
-------------------------------------
1. Split the compas4multirace dataset into privileged group and unprivileged groups according to sensitive attribute values
2. Initialize FairnessExplainer
3. Calculate shapley values of the privileged group and unprivileged groups
Shapley values computed in 18.45 seconds
4. Calculate accuracy, DR, DP, EO, PQP of the original model on X_test
5. Start organizing modifications; a total of 2852 data points modified; train new models
6. Prepare and optionally save results
CSV file saved: saved_results\compas4multirace\fairSHAP-0.05_NN_1-fold_results.csv
-------------------------------------
-------------2th fold----------------
-------------------------------------
1. Split the compas4multirace dataset into privileged group and unprivileged groups according to sensitive attribute values
2. Initialize FairnessExplainer
3. Calculate shapley values of the privileged group and unpr

### 5. Plot results

In [None]:
import os
import pandas as pd
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt

# Parameter settings
base_path = "saved_results/compas4multirace"
file_pattern = "fairSHAP-0.05_NN_{}-fold_results.csv"
folds = range(1, 6)
num_last_folds = 3  # Can be adjusted as needed
max_xticks = 15      # Maximum number of ticks to display on x-axis
metrics = [
    "new_accuracy", "new_DR", "new_dp_gap", "new_eo_gap", "new_pq_gap",
    "new_dp_max", "new_dp_avg", "new_eo_max", "new_eo_avg", "new_pqp_max", "new_pqp_avg"
]

# Store the "difference relative to original" data table for each fold (action part only)
delta_per_fold = {}
# Collect original values from all available folds (for sanity check; Δoriginal=0 when plotting)
original_values = {m: [] for m in metrics}

for i in folds:
    fp = os.path.join(base_path, file_pattern.format(i))
    if not os.path.exists(fp):
        print(f"[WARN] File does not exist, skipping: {fp}")
        continue
    
    df = pd.read_csv(fp)
    if df.empty:
        print(f"[WARN] Empty file, skipping: {fp}")
        continue
    
    # 1) original row (first row as per requirement)
    orig_row = df.iloc[0]
    # 2) subsequent action rows (starting from second row)
    df_act = df.iloc[1:].copy()
    if df_act.empty:
        print(f"[WARN] No action data in file, skipping: {fp}")
        continue
    
    # Convert action_number to int (starting from row 2 should be 1,11,21,...)
    try:
        df_act["action_number"] = df_act["action_number"].astype(int)
    except Exception as e:
        print(f"[WARN] Failed to convert action_number to int, skipping fold {i}. Error: {e}")
        continue
    
    # Record original values (for reference only; Δ(original)=0, not directly used in plotting)
    for m in metrics:
        try:
            original_values[m].append(float(orig_row[m]))
        except Exception:
            pass
    
    # 3) Calculate difference for each metric: value at each action - original value for that metric
    for m in metrics:
        df_act[m] = df_act[m].astype(float) - float(orig_row[m])
    
    # Store the processed results for this fold
    delta_per_fold[i] = df_act

# Aggregate action_number occurrences across all folds
action_counts = Counter()
for df in delta_per_fold.values():
    action_counts.update(df["action_number"].astype(int).tolist())

# Keep only action_numbers that appear in at least num_last_folds folds
valid_actions = sorted([a for a, c in action_counts.items() if c >= num_last_folds])

if not valid_actions:
    print("[INFO] No action_number meets num_last_folds requirement, only Δ(original)=0 can be plotted.")
    
# ===== Calculate mean and std for each action (across folds) for each metric =====
stats = {m: {"x": [], "mean": [], "std": []} for m in metrics}

# Place Δ(original)=0 at x=0 position
for m in metrics:
    stats[m]["x"].append(0)
    stats[m]["mean"].append(0.0)
    stats[m]["std"].append(0.0)

# Calculate mean/std for each action (aggregate across folds)
for act in valid_actions:
    for m in metrics:
        vals = []
        for df in delta_per_fold.values():
            sub = df[df["action_number"] == act]
            if not sub.empty:
                vals.append(float(sub.iloc[0][m]))
        if len(vals) == 0:
            # No data for this action, skip
            continue
        mean_v = float(np.mean(vals))
        std_v = float(np.std(vals, ddof=1)) if len(vals) > 1 else 0.0
        stats[m]["x"].append(int(act))
        stats[m]["mean"].append(mean_v)
        stats[m]["std"].append(std_v)

# ===== Plotting (one separate plot per metric; no subplots) =====
for m in metrics:
    x = stats[m]["x"]
    y = stats[m]["mean"]
    yerr = stats[m]["std"]
    
    if len(x) == 0:
        print(f"[INFO] No data to plot for metric {m}, skipping.")
        continue
    
    fig, ax = plt.subplots()
    ax.plot(x, y, marker='o', label='Δ mean')
    
    # Only draw std region when there are at least 2 points
    if len(x) >= 2:
        ax.fill_between(x, np.array(y) - np.array(yerr), np.array(y) + np.array(yerr), alpha=0.3, label='± std')
    
    # Draw y=0 reference line
    ax.axhline(0.0, linewidth=1)
    
    # Control number of x-axis ticks
    if len(x) > max_xticks:
        # Always include 0 and the last action, sample the rest with steps
        xticks = [0]
        # Select x values excluding 0
        rest = x[1:]
        step = max(1, len(rest) // (max_xticks - 1))
        xticks.extend(rest[::step])
        # Ensure the last one is included
        if xticks[-1] != x[-1]:
            xticks.append(x[-1])
        ax.set_xticks(xticks)
        ax.set_xticklabels([str(t) for t in xticks], rotation=45)
    else:
        ax.set_xticks(x)
        ax.set_xticklabels([str(t) for t in x], rotation=45)
    
    ax.set_xlabel("action_number")
    ax.set_ylabel(f"Δ {m} (value - original)")
    ax.set_title(f"Δ {m} relative to original")
    ax.legend()
    plt.tight_layout()
    plt.show()