In [1]:
import os
import math
import random
import argparse
from collections import Counter, defaultdict
from itertools import chain, combinations
import ast
import copy
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, classification_report
from sklearn.linear_model import LogisticRegression
from utils.Spambase.split_data import split_data_equal
from utils.aggregate_functions import aggregate_lr_models
from utils.evaluate_coalitions_new import evaluate_coalitions2
from utils.aggregate_functions import FederatedForest
from utils.DecisionTree import DecisionTree

from utils.nash1 import find_nash_equilibria_v2


import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", message="X does not have valid feature names")

import os
import math
import random
import argparse
import ast
import copy

import numpy as np
import pandas as pd
from pathlib import Path

from collections import Counter, defaultdict
from itertools import chain, combinations
from tqdm import tqdm

import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns

from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.compose import ColumnTransformer

from typing import Tuple, Sequence, Union

### Local Imports

In [2]:
from utils.Spambase.split_data import split_data_equal
from utils.aggregate_functions import aggregate_lr_models
from utils.evaluate_coalitions import evaluate_coalitions
from utils.nash1 import find_nash_equilibria_v2
from utils.evaluate_coalitions_new import evaluate_coalitions2

In [3]:
# disable ConvergenceWarnings
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# HuGaDB Dataset

In [None]:
def payoff_(mu: float, sigma: float) -> float:
    return         0.1106 + 0.8822 * mu + 0.7949 * sigma

def payoff_100(mu: float, sigma: float) -> float:
    return         -0.0102 + 1.0908 * mu + 0.7913 * sigma

def payoff_lr(mu: float, sigma: float) -> float:
    return         + 0.0681 + 0.9307 * mu + 1.4150 * sigma

In [5]:
# File patterns.
train_files_pattern = "/Users/abbaszal/Documents/Thesis_Project_Spambase/data/metadata/train_{i:02d}.csv"
test_files_pattern = "/Users/abbaszal/Documents/Thesis_Project_Spambase/data/metadata/test_{i:02d}.csv"

# Concatenate all training files.
df_train_global = pd.concat([
    pd.read_csv(train_files_pattern.format(i=i)) for i in range(1, 11)
]).dropna()

# Concatenate all testing files.
df_test_global = pd.concat([
    pd.read_csv(test_files_pattern.format(i=i)) for i in range(1, 11)
]).dropna()

# Split features and labels.
X_train_global = df_train_global.drop('act', axis=1)
y_train_global = df_train_global['act']

X_test_global = df_test_global.drop('act', axis=1)
y_test_global = df_test_global['act']

# Encode labels.
label_encoder = LabelEncoder()
y_train_global = label_encoder.fit_transform(y_train_global)
y_test_global = label_encoder.transform(y_test_global)

# Scale features.
scaler_global = StandardScaler()
X_train_global_scaled = scaler_global.fit_transform(X_train_global)
X_test_global_scaled  = scaler_global.transform(X_test_global)

In [4]:
import numpy as np
import joblib

# 1) load the pipeline
pipeline = joblib.load('/Users/abbaszal/Documents/global_acc_pipeline_fedlr.joblib')


In [None]:
# Parameters
eps               = 1e-8
n_clients_list    = [10,20,30,40,50,60,70,80,90,100]
n_trials          = 100
base_random_seed  = 42
max_iters         = [ 100]
approach          = 'fedlr'

# Saving directory
save_dir = "/Users/abbaszal/Documents/thimasfit11/hugadb_fedlr_all_joblib"
os.makedirs(save_dir, exist_ok=True)

train_csv_path = "/Users/abbaszal/Documents/Thesis_Project_Spambase/data/metadata/new_runs/train.csv"
df_full_train = pd.read_csv(train_csv_path).dropna(subset=['act']).reset_index(drop=True)

for n_clients in n_clients_list:
    print(f"\n> n_clients = {n_clients}")

    # Dynamically get the corresponding payoff function
    #payoff_func_name = f"payoff_{n_clients}"
    #if payoff_func_name not in globals():
        #raise ValueError(f"Missing payoff function for {n_clients} clients")
    #payoff_func = globals()[payoff_func_name]
    #print(payoff_func)
    
    for max_iter in max_iters:
        print(f"  max_iter = {max_iter}")

        # reset counters for this config
        counts_static = Counter()
        lottery_count = 0

        for trial in range(1, n_trials + 1):
            
            rand_component = random.randint(0, 500)
            trial_seed = base_random_seed + trial + int(1000 * max_iter) + 2 * rand_component


            X_test_full_scaled = X_test_global_scaled.copy()
            y_test_full        = y_test_global.copy()

            # Create a stratified subsample of the test set to speed up the runtime.
            subsample_size = 950  
            X_test_glob, _, y_test_glob, _ = train_test_split(
                X_test_full_scaled, y_test_full,
                train_size=subsample_size,
                random_state=trial_seed,
                stratify=y_test_full
            )


            df_trial, _ = train_test_split(
                df_full_train,
                train_size=10000,
                random_state=trial_seed,
                stratify=df_full_train['act']
            )
   
            df_remaining = df_trial.copy()


            client_partitions = []

            sample_size = 5000 // n_clients
            for client_idx in range(n_clients):
                if len(df_remaining) < sample_size:
                    raise ValueError("err")

                if len(df_remaining) == sample_size:
                    df_client = df_remaining.copy()
                    df_remaining = df_remaining.iloc[0:0] 

                else:
                    df_client, df_remaining = train_test_split(
                        df_remaining,
                        train_size=sample_size,
                        random_state=trial_seed,
                        stratify=df_remaining['act']
                    )
                df_client = df_client.reset_index(drop=True)
                client_partitions.append(df_client)

            # train each client
            client_models = []
            client_accs   = {}

            for client_idx, df_client in enumerate(client_partitions):

                X_client = df_client.drop(columns=['act'])
                y_client = df_client['act']

                client_scaler = StandardScaler()
                X_client_scaled = client_scaler.fit_transform(X_client)
                y_client_encoded = label_encoder.transform(y_client)

                mdl = LogisticRegression(random_state=trial_seed,
                                         max_iter=max_iter)
                mdl.fit(X_client_scaled, y_client_encoded)
                client_models.append(mdl)

                pred = mdl.predict(X_test_glob)
                client_accs[client_idx] = accuracy_score(y_test_glob, pred)

            # coalition evaluation
            df_res = evaluate_coalitions2(
                client_models=client_models,
                client_global_accuracies=client_accs,
                n_clients=n_clients,
                aggregator_func=aggregate_lr_models,
                X_test=X_test_glob,
                y_test=y_test_glob,
                corrupt_client_indices=[],
                approach=approach
            )

            # static‐game Nash counts
            df_ne = find_nash_equilibria_v2(df_res)
            if not df_ne.empty:
                for coalition in df_ne.index:
                    counts_static[coalition] += 1


            # lottery‐game incentive check
            vals      = np.array(list(client_accs.values()))
            mu_full   = vals.mean()
            sig_full  = vals.std(ddof=1)

            # 3) package into a 2-column array and predict
            X_new    = np.array([[mu_full, sig_full]])   # shape (1,2)
            payoff_f    = pipeline.predict(X_new)[0]
            print(payoff_f)
            #payoff_f  = payoff_100(mu_full, sig_full)
           # print(payoff_f)

            has_incentive = any(
                acc > payoff_f + eps
                for acc in client_accs.values()
            )
            if not has_incentive:
                lottery_count += 1


        static_count = sum(counts_static.values())

        counts_df = pd.DataFrame([{
            'n_clients':           n_clients,
            'max_iter':            max_iter,
            'Static_Occurrences':  static_count,
            'Lottery_Occurrences': lottery_count
        }])
        fname = (f"Nash_Counts_{approach}"
                 f"_nclients_{n_clients}"
                 f"_maxiter_{max_iter}.csv")
        out_path = os.path.join(save_dir, fname)
        counts_df.to_csv(out_path, index=False)
        print(f"saved {fname}")


# Spambase Dataset

In [4]:
file_path = '/Users/abbaszal/Documents/Thesis_Project_Spambase/data/spambase.data'  # Adjust the path as needed
df = pd.read_csv(file_path, header=None)

In [6]:
def payoff_(mu: float, sigma: float) -> float:
    return 0.5547 + 0.4137 * mu -0.0490 * sigma

def payoff_100(mu: float, sigma: float) -> float:
    return 0.5879 + 0.3562 * mu + 0.1891 * sigma

In [5]:
def train_models_fedlr(partitions, random_seed, X_test, y_test, max_iter):
    client_models = []
    client_global_accuracies = []
    
    for X_i, y_i in partitions:
        nan_mask = ~np.isnan(X_i).any(axis=1)
        X_clean = X_i[nan_mask]
        y_clean = y_i[nan_mask]
        if len(y_clean) == 0:
            client_models.append(None)
            client_global_accuracies.append(None)
            continue
        
        model = LogisticRegression(random_state=random_seed, max_iter=max_iter)
        try:
            local_scaler = StandardScaler()
            model.fit(local_scaler.fit_transform(X_clean), y_clean)
            client_models.append(model)
            client_global_accuracies.append(model.score(X_test, y_test))
        except Exception as e:
            client_models.append(None)
            client_global_accuracies.append(None)
    
    return client_models, client_global_accuracies

In [6]:
import numpy as np
import joblib

# 1) load the pipeline
pipeline = joblib.load('/Users/abbaszal/Documents/global_acc_pipeline_fedlr.joblib')


In [7]:

X = df.iloc[:, :-1].to_numpy()
y = df.iloc[:, -1].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
# Parameters
eps     = 1e-8
n_trials    = 100
n_clients_list    = [10,20,30,40,50,60,70,80,90,100]
base_seed   = 42
max_iters   = [100]
approach    = 'fedlr'


save_dir = (
"/Users/abbaszal/Documents/thimasfit11/spambase_fedlr_all_joblib_test"
)
os.makedirs(save_dir, exist_ok=True)

all_results = []


for n_clients in n_clients_list:
    print(f"\n> n_clients = {n_clients}")

    # Dynamically get the corresponding payoff function
    #payoff_func_name = f"payoff_{n_clients}"
    #if payoff_func_name not in globals():
        #raise ValueError(f"Missing payoff function for {n_clients} clients")
    #payoff_func = globals()[payoff_func_name]

    for max_iter in max_iters:
        print(f"\n max_iter = {max_iter} ")
        counts_static = Counter()
        lottery_count = 0

        for trial in range(1, n_trials+1):
            rc = random.randint(0, 500)
            trial_seed = base_seed + (trial-1) + 1000*max_iter + 2*rc
            random.seed(trial_seed)
            np.random.seed(trial_seed)

            #X = df.iloc[:, :-1].to_numpy()
            #y = df.iloc[:, -1].to_numpy()

            #X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=trial_seed)

            #scaler = StandardScaler()

            #X_train = scaler.fit_transform(X_train)
            #X_test = scaler.transform(X_test)

            partitions = split_data_equal(
                X_train, y_train,
                n_clients=n_clients,
                shuffle=True,
                random_seed=trial_seed
            )
            
            client_models, client_accs = train_models_fedlr(
                partitions=partitions,
                random_seed=trial_seed,
                X_test=X_test,
                y_test=y_test,
                max_iter=max_iter
            )

    
            # coalition evaluation
            df_res = evaluate_coalitions2(
                client_models=client_models,
                client_global_accuracies=client_accs,
                n_clients=n_clients,
                aggregator_func=aggregate_lr_models,
                X_test=X_test,
                y_test=y_test,
                corrupt_client_indices=[],
                approach=approach
            )

            # static‐game Nash counts
            df_ne = find_nash_equilibria_v2(df_res)
            if not df_ne.empty:
                for coalition in df_ne.index:
                    counts_static[coalition] += 1


            # lottery‐game incentive check
            # 2) compute your new μ, σ
            vals = np.array(client_accs)
            mu_full  = vals.mean()
            sig_full = vals.std(ddof=1)

            # 3) package into a 2-column array and predict
            X_new    = np.array([[mu_full, sig_full]])   # shape (1,2)
            payoff_f    = pipeline.predict(X_new)[0]
            print(payoff_f)



            has_incentive = any(
                acc > payoff_f 
                for acc in client_accs
            )
            if not has_incentive:
                lottery_count += 1

        # aggregate counts into single numbers
        static_count = sum(counts_static.values())


        counts_df = pd.DataFrame([{
            'n_clients':           n_clients,
            'max_iter':            max_iter,
            'Static_Occurrences':  static_count,
            'Lottery_Occurrences': lottery_count
        }])
        fname = (f"Nash_Counts_{approach}"
                    f"_nclients_{n_clients}"
                    f"_maxiter_{max_iter}.csv")
        out_path = os.path.join(save_dir, fname)
        counts_df.to_csv(out_path, index=False)
        print(f" saved {fname}")


> n_clients = 10

 max_iter = 100 
0.9120109202150769
0.9106870929782339
0.9103130630091638
0.9122742156199952
0.9112036137920547
0.9171212544171699
0.9147265835273344
0.9198114326881378
0.9201516484322076
0.9198114326881378
0.9206504817689252
0.9233292956886855
0.9137800141309933
0.9137575522226695
0.9248307844635327
0.9120144849880084
0.909110234484912
0.9231134274006951
0.9377590080388827
0.9099540775658234
0.9095001887341149
0.9242951104100215
0.9196707994561732
0.9245971425653778
0.9191459869696718
0.9144690828241279
0.921801633510642
0.9276121844168626
0.9144791550501157
0.9180464362991545
0.9215750954502617
0.922701732526652
0.9230378085548252
0.9164268035588875
0.9212975752547401
0.9155427135924631
0.9126371653218895
0.9131042611839649
0.91345814104912
0.9130132353623588
0.9217412763138445
0.9265908361858968
0.9227307668508148
0.9092106961681712
0.9229931782023753
0.9123304685962652
0.9196853982127643
0.9192377245148867
0.9132715229113282
0.9106197968442847
0.9173215447388916


### Mushroom

In [5]:
from data_loading import load_mushroom

In [6]:
(X_train, y_train, X_test, y_test), num_classes, n_num_feats, n_cat_feats, n_cat_vals = load_mushroom()
print("Train shape:", X_train[0].shape, X_train[1].shape) 
print("Test shape:", X_test[0].shape, X_test[1].shape)
print("Number of classes:", num_classes)

Train shape: (7311, 0) (7311, 22)
Test shape: (813, 0) (813, 22)
Number of classes: 2


In [7]:
(X_train, y_train, X_test, y_test), num_classes, n_num_feats, n_cat_feats, n_cat_vals = load_mushroom()


X_train_num, X_train_cat = X_train
X_test_num, X_test_cat = X_test


X_train_combined = np.hstack([X_train_num, X_train_cat])
X_test_combined  = np.hstack([X_test_num,  X_test_cat])


preprocessor = ColumnTransformer([
    ('num', StandardScaler(), list(range(n_num_feats))),
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), 
     list(range(n_num_feats, n_num_feats + n_cat_feats)))
])
X_train_global_scaled = preprocessor.fit_transform(X_train_combined)
X_test_global_scaled  = preprocessor.transform(X_test_combined)


y_train_global = y_train
y_test_global  = y_test

In [8]:
import numpy as np
import joblib

# 1) load the pipeline
pipeline = joblib.load('/Users/abbaszal/Documents/global_acc_pipeline_fedlr.joblib')


In [None]:
# Prepare training DataFrame
t_feat = X_train_global_scaled.shape[1] - n_num_feats
feat_names = [f"num{i+1}" for i in range(n_num_feats)] + \
                [f"cat{i+1}" for i in range(t_feat)]
df_train = pd.DataFrame(X_train_global_scaled, columns=feat_names)
df_train["target"] = y_train_global

eps               = 1e-8
n_clients_list    = [10,20,30,40,50,60,70,80,90,100]
n_trials          = 100
base_random_seed  = 42
max_iters         = [100]
approach          = 'fedlr'
model_type = 'logistic'

# Saving directory
save_dir = "/Users/abbaszal/Documents/thimasfit11/mushroom_fedlr_all_joblib"
os.makedirs(save_dir, exist_ok=True)

for n_clients in n_clients_list:
    print(f"\n> n_clients = {n_clients}")

    # Dynamically get the corresponding payoff function
    #payoff_func_name = f"payoff_{n_clients}"
    #if payoff_func_name not in globals():
        #raise ValueError(f"Missing payoff function for {n_clients} clients")
    #payoff_func = globals()[payoff_func_name]


    for max_iter in max_iters:
        print(f"  max_iter = {max_iter}")

        # reset counters for this config
        counts_static = Counter()
        lottery_count = 0
        overestimation_count = 0    # ← new

        for trial in range(1, n_trials + 1):
            print(trial)

            rand_component = random.randint(0, 500)
            trial_seed = base_random_seed + trial + int(1000 * max_iter) + 2 * rand_component

            X_test_full_scaled = X_test_global_scaled.copy()
            y_test_full        = y_test_global.copy()

            subsample_fraction = 0.80

            X_test_glob, _, y_test_glob, _ = train_test_split(
                X_test_full_scaled, y_test_full,
                train_size=subsample_fraction,
                random_state=trial_seed,
                stratify=y_test_full
            )



            #from sklearn.model_selection import StratifiedKFold
            #skf = StratifiedKFold(n_splits=n_clients, shuffle=True, random_state=trial_seed)
            #parts = []
            #for _, client_idx in skf.split(df_train.drop(columns="target"), df_train["target"]):
                #df_c = df_train.iloc[client_idx].reset_index(drop=True)
                #parts.append(df_c)


            df_rem      = df_train.sample(frac=1, random_state=trial_seed).reset_index(drop=True)
            sample_size = len(df_train) // n_clients
            parts = []
            for idx in range(n_clients):
                if idx == n_clients - 1:
                    df_c = df_rem.copy()
                else:
                    sss = StratifiedShuffleSplit(n_splits=1, train_size=sample_size,
                                                    random_state=trial_seed)
                    tr_idx, _ = next(sss.split(df_rem, df_rem["target"]))
                    df_c      = df_rem.iloc[tr_idx]
                    df_rem    = df_rem.drop(df_c.index).reset_index(drop=True)
                parts.append(df_c.reset_index(drop=True))

            client_models     = []
            client_accs = {}
            for idx, df_c in enumerate(parts):
                X_c = df_c.drop(columns="target").to_numpy()
                y_c = df_c["target"].to_numpy()
                X_std = StandardScaler().fit_transform(X_c)

                if model_type.lower() == "logistic":
                    model = LogisticRegression(random_state=trial_seed,
                                                max_iter=max_iter)
                else:
                    model = DecisionTree(max_depth=max_iter,
                                                    random_state=trial_seed)

                model.fit(X_std, y_c)
                client_models.append(model)
                
    
                acc_global = accuracy_score(y_test_glob,
                                            model.predict(X_test_glob))
                client_accs[idx] = acc_global
            

            # coalition evaluation
            df_res = evaluate_coalitions2(
                client_models=client_models,
                client_global_accuracies=client_accs,
                n_clients=n_clients,
                aggregator_func=aggregate_lr_models,
                X_test=X_test_glob,
                y_test=y_test_glob,
                corrupt_client_indices=[],
                approach=approach
            )

            # static‐game Nash counts
            df_ne = find_nash_equilibria_v2(df_res)
            if not df_ne.empty:
                for coalition in df_ne.index:
                    counts_static[coalition] += 1


            # lottery‐game incentive check
            vals      = np.array(list(client_accs.values()))
            mu_full   = vals.mean()
            sig_full  = vals.std(ddof=1)
            
            # 3) package into a 2-column array and predict
            X_new    = np.array([[mu_full, sig_full]])   # shape (1,2)
            payoff_f    = pipeline.predict(X_new)[0]
            print(payoff_f)

            overestimation_count += (df_res["Global Accuracy"] < payoff_f).sum()

            has_incentive = any(
                acc > payoff_f + eps
                for acc in client_accs.values()
            )
            if not has_incentive:
                lottery_count += 1


        static_count = sum(counts_static.values())

        counts_df = pd.DataFrame([{
            'n_clients':           n_clients,
            'max_iter':            max_iter,
            'Static_Occurrences':  static_count,
            'Lottery_Occurrences': lottery_count,
             "Overestimation_Occurrences": overestimation_count
        }])
        fname = (f"Nash_Counts_{approach}"
                 f"_nclients_{n_clients}"
                 f"_maxiter_{max_iter}.csv")
        out_path = os.path.join(save_dir, fname)
        counts_df.to_csv(out_path, index=False)
        print(f"saved {fname}")


### Adult

In [18]:
from data_loading import load_adult

In [19]:
(X_train, y_train, X_test, y_test), num_classes, n_num_feats, n_cat_feats, n_cat_vals = load_adult()
print("Train shape:", X_train[0].shape, X_train[1].shape) 
print("Test shape:", X_test[0].shape, X_test[1].shape)
print("Number of classes:", num_classes)

Train shape: (32561, 6) (32561, 8)
Test shape: (16281, 6) (16281, 8)
Number of classes: 2


In [20]:
(X_train, y_train, X_test, y_test), num_classes, n_num_feats, n_cat_feats, n_cat_vals = load_adult()


X_train_num, X_train_cat = X_train
X_test_num, X_test_cat = X_test


X_train_combined = np.hstack([X_train_num, X_train_cat])
X_test_combined  = np.hstack([X_test_num,  X_test_cat])


preprocessor = ColumnTransformer([
    ('num', StandardScaler(), list(range(n_num_feats))),
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), 
     list(range(n_num_feats, n_num_feats + n_cat_feats)))
])
X_train_global_scaled = preprocessor.fit_transform(X_train_combined)
X_test_global_scaled  = preprocessor.transform(X_test_combined)


y_train_global = y_train
y_test_global  = y_test

In [21]:
import numpy as np
import joblib

# 1) load the pipeline
pipeline = joblib.load('/Users/abbaszal/Documents/global_acc_pipeline_fedlr.joblib')


In [None]:
# Prepare training DataFrame
t_feat = X_train_global_scaled.shape[1] - n_num_feats
feat_names = [f"num{i+1}" for i in range(n_num_feats)] + \
                [f"cat{i+1}" for i in range(t_feat)]
df_train = pd.DataFrame(X_train_global_scaled, columns=feat_names)
df_train["target"] = y_train_global

eps               = 1e-8
n_clients_list    = [10,20,30,40,50,60,70,80,90,100]
n_trials          = 100
base_random_seed  = 42
max_iters         = [100]
approach          = 'fedlr'
model_type = 'logistic'

# Saving directory
save_dir = "/Users/abbaszal/Documents/thimasfit11/adult_fedlr_all_joblib"
os.makedirs(save_dir, exist_ok=True)

for n_clients in n_clients_list:
    print(f"\n> n_clients = {n_clients}")

    # Dynamically get the corresponding payoff function
    #payoff_func_name = f"payoff_{n_clients}"
    #if payoff_func_name not in globals():
        #raise ValueError(f"Missing payoff function for {n_clients} clients")
    #payoff_func = globals()[payoff_func_name]


    for max_iter in max_iters:
        print(f"  max_iter = {max_iter}")

        # reset counters for this config
        counts_static = Counter()
        lottery_count = 0


        for trial in range(1, n_trials + 1):
            print(trial)

            rand_component = random.randint(0, 500)
            trial_seed = base_random_seed + trial + int(1000 * max_iter) + 2 * rand_component

            X_test_full_scaled = X_test_global_scaled.copy()
            y_test_full        = y_test_global.copy()

            subsample_fraction = 0.80

            X_test_glob, _, y_test_glob, _ = train_test_split(
                X_test_full_scaled, y_test_full,
                train_size=subsample_fraction,
                random_state=trial_seed,
                stratify=y_test_full
            )



            #from sklearn.model_selection import StratifiedKFold
            #skf = StratifiedKFold(n_splits=n_clients, shuffle=True, random_state=trial_seed)
            #parts = []
            #for _, client_idx in skf.split(df_train.drop(columns="target"), df_train["target"]):
                #df_c = df_train.iloc[client_idx].reset_index(drop=True)
                #parts.append(df_c)


            df_rem      = df_train.sample(frac=1, random_state=trial_seed).reset_index(drop=True)
            sample_size = len(df_train) // n_clients
            parts = []
            for idx in range(n_clients):
                if idx == n_clients - 1:
                    df_c = df_rem.copy()
                else:
                    sss = StratifiedShuffleSplit(n_splits=1, train_size=sample_size,
                                                    random_state=trial_seed)
                    tr_idx, _ = next(sss.split(df_rem, df_rem["target"]))
                    df_c      = df_rem.iloc[tr_idx]
                    df_rem    = df_rem.drop(df_c.index).reset_index(drop=True)
                parts.append(df_c.reset_index(drop=True))

            client_models     = []
            client_accs = {}
            for idx, df_c in enumerate(parts):
                X_c = df_c.drop(columns="target").to_numpy()
                y_c = df_c["target"].to_numpy()
                X_std = StandardScaler().fit_transform(X_c)

                if model_type.lower() == "logistic":
                    model = LogisticRegression(random_state=trial_seed,
                                                max_iter=max_iter)
                else:
                    model = DecisionTree(max_depth=max_iter,
                                                    random_state=trial_seed)

                model.fit(X_std, y_c)
                client_models.append(model)
                
    
                acc_global = accuracy_score(y_test_glob,
                                            model.predict(X_test_glob))
                client_accs[idx] = acc_global
            

            # coalition evaluation
            df_res = evaluate_coalitions2(
                client_models=client_models,
                client_global_accuracies=client_accs,
                n_clients=n_clients,
                aggregator_func=aggregate_lr_models,
                X_test=X_test_glob,
                y_test=y_test_glob,
                corrupt_client_indices=[],
                approach=approach
            )

            # static‐game Nash counts
            df_ne = find_nash_equilibria_v2(df_res)
            if not df_ne.empty:
                for coalition in df_ne.index:
                    counts_static[coalition] += 1


            # lottery‐game incentive check
            vals      = np.array(list(client_accs.values()))
            mu_full   = vals.mean()
            sig_full  = vals.std(ddof=1)
            
            # 3) package into a 2-column array and predict
            X_new    = np.array([[mu_full, sig_full]])   # shape (1,2)
            payoff_f    = pipeline.predict(X_new)[0]
            print(payoff_f)


            has_incentive = any(
                acc > payoff_f + eps
                for acc in client_accs.values()
            )
            if not has_incentive:
                lottery_count += 1


        static_count = sum(counts_static.values())

        counts_df = pd.DataFrame([{
            'n_clients':           n_clients,
            'max_iter':            max_iter,
            'Static_Occurrences':  static_count,
            'Lottery_Occurrences': lottery_count
        }])
        fname = (f"Nash_Counts_{approach}"
                 f"_nclients_{n_clients}"
                 f"_maxiter_{max_iter}.csv")
        out_path = os.path.join(save_dir, fname)
        counts_df.to_csv(out_path, index=False)
        print(f"saved {fname}")
