In [1]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from xgboost import XGBClassifier

from src.evalution import evaluate_result
from src.logger import Logger

In [2]:
k = 9
degree = 3
n_splits = 5

In [3]:
def get_client_data(client):
    data_files = fr"..\..\datasets\iid\client_{str(client)}.csv"
    dataset = pd.read_csv(data_files)

    X = dataset.drop(columns=["label"])
    y = dataset["label"]
    return X, y

In [4]:
from sklearn.linear_model import PassiveAggressiveClassifier, LogisticRegression
from sklearn.svm import LinearSVC

classifiers = {
    "SVM": LinearSVC(
        penalty="l2",
        loss="squared_hinge",
        dual=True,
        C=0.5,
        class_weight="balanced",
        max_iter=5000,
        tol=1e-4,
        random_state=42
    ),
    "XGBoost": XGBClassifier(objective="multi:softprob",
                             num_class=4,
                             eval_metric="mlogloss",
                             num_parallel_tree=None,
                             n_estimators=200,
                             tree_method="hist",
                             random_state=42, ),
    "PassiveAggressive": PassiveAggressiveClassifier(C=1.0,
                                                     max_iter=1000,
                                                     loss="hinge",
                                                     tol=1e-3,
                                                     random_state=50),
    "LogisticRegression": LogisticRegression(penalty="l2",
                                             C=0.1,
                                             max_iter=1000,
                                             solver="saga",
                                             multi_class="multinomial",
                                             random_state=42)
}

In [5]:
from experiments.federated.utils import load_test_dataset

model = classifiers['XGBoost']


def save_client_result(client):
    x_train, y_train = get_client_data(client)
    x_test, y_test = load_test_dataset()
    results = []
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    for fold, (train_idx, test_idx) in enumerate(skf.split(x_train, y_train), 1):
        print(f"Fold {fold}/{n_splits} işleniyor...")

        X_train_fold, X_test_fold = x_train.iloc[train_idx], x_train.iloc[test_idx]
        y_train_fold, y_test_fold = y_train.iloc[train_idx], y_train.iloc[test_idx]

        model.fit(X_train_fold, y_train_fold)

        y_pred_poly = model.predict(x_test)
        try:
            y_prob_poly = model.predict_proba(x_test)
        except:
            y_prob_poly = None
        eval_results_poly = evaluate_result(y_test, y_pred_poly, y_prob_poly)
        eval_results_poly["Fold"] = fold
        results.append(eval_results_poly)

    results_df = pd.DataFrame(results)
    results_df.to_excel(f"XGBoost_client_{str(client)}.xlsx", index=False)

In [6]:
for i in range(3):
    save_client_result(i)

Fold 1/5 işleniyor...
Fold 2/5 işleniyor...
Fold 3/5 işleniyor...
Fold 4/5 işleniyor...
Fold 5/5 işleniyor...
Fold 1/5 işleniyor...
Fold 2/5 işleniyor...
Fold 3/5 işleniyor...
Fold 4/5 işleniyor...
Fold 5/5 işleniyor...
Fold 1/5 işleniyor...
Fold 2/5 işleniyor...
Fold 3/5 işleniyor...
Fold 4/5 işleniyor...
Fold 5/5 işleniyor...
