# Imports

In [1]:
# from utils.seed import seed_everything
from utils.load_config import load_config
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import argparse 

# get metric and train, test support
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, roc_curve, auc

# get classifier models
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import RidgeClassifier
import xgboost as xgb
# import lightgbm as lgb
from catboost import CatBoostClassifier
import os 
from os.path import join
from tensorflow.keras.models import load_model
from keras_utils import CustomDropout
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# load config
config_path = "keras_config.yaml"
CFG = load_config("CFG", config_path)
CFG_ATTACK = load_config("CFG_ATTACK", config_path)



# Functions

In [2]:


def update_args_with_dict(args, dict) :
    args_dict = vars(args)
    args_dict.update(dict)
    args = argparse.Namespace(**args_dict)
    return args

def cifar10_data() : 
    cifar_train, cifar_test = tf.keras.datasets.cifar10.load_data()
    cifar_class_labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
    input_shape = cifar_train[0].shape[1:]
    num_classes = len(cifar_class_labels)
    dataset_metadata = {'input_shape': input_shape, 'num_classes': num_classes, 'class_labels': cifar_class_labels}
    return cifar_train, cifar_test, dataset_metadata


def get_data(dataset) : 
    if dataset == 'cifar10' : 
        return cifar10_data()
    else :
        raise ValueError("Dataset not supported")


def get_attack_data(dataset, alg = 'central') : 
    train_data, test_data, metadata = get_data(dataset)

    train_data = (np.array(train_data[0] / 255, dtype=np.float32), tf.keras.utils.to_categorical(train_data[1]))
    if alg == 'fedakd' :
        # proxy_limit = args.proxy_data_size
        proxy_limit = 1000
        proxy_data = train_data[0][:proxy_limit] 
        train_data = (train_data[0][proxy_limit:], train_data[1][proxy_limit:])
        
    test_data = (np.array(test_data[0] / 255, dtype=np.float32), tf.keras.utils.to_categorical(test_data[1]))
    if alg == 'fedakd' : return train_data, test_data, proxy_data
    else: return train_data, test_data



def get_nn_attack_model(in_features, compile_model=True):
    """
    Constructs a neural network for Membership Inference Attack (MIA).
    
    Parameters:
        - in_features (int): The number of input features.
        - num_classes (int): The number of classes in the main model (used to determine the input shape).
        - compile_model (bool): Whether to compile the model.

    Returns:
        - keras.models.Model: The constructed attack model.
    """
    
    # The input to the attack model is typically the softmax/logits from the target model.
    # So, the input shape is (num_classes,)
    model = Sequential()
    model.add(Dense(800, activation='relu', input_shape=(in_features,)))
    model.add(Dropout(0.1))
    model.add(Dense(200, activation='tanh'))
    model.add(Dense(30, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification: in-training set or not.
    
    if compile_model:
        model.compile(tf.keras.optimizers.Adam(learning_rate=0.01),
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

    return model


# Train attack

In [3]:


X, y = pd.DataFrame(), pd.DataFrame()

# for i in range(10) : 
for i in range(50) : 

    this_shadow_model_path = join(CFG_ATTACK.shadow_models_path, f"shadow_model_{i}")
    
    model_path = join(this_shadow_model_path, 'best_model.h5')
    attack_dset_path = join(this_shadow_model_path, f'attack_dset_shadow_{i}.csv')

    model = load_model(model_path,  custom_objects={'CustomDropout': CustomDropout})
    df_shadow = pd.read_csv(attack_dset_path)

    tmp_y = df_shadow["is_member"]
    tmp_X = df_shadow.drop(["is_member"], axis=1)

    X = pd.concat([X, tmp_X])
    y = pd.concat([y, tmp_y])

# to numpy
X = X.to_numpy()
y = y.to_numpy()


print("X shape:", X.shape)
print("y shape:", y.shape)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=CFG_ATTACK.test_size, random_state=CFG.seed
)

print("x_train shape:", X_train.shape)
print("x_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)



2023-08-12 21:08:23.680184: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-08-12 21:08:23.680451: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB

X shape: (4800000, 10)
y shape: (4800000, 1)
x_train shape: (3840000, 10)
x_test shape: (960000, 10)
y_train shape: (3840000, 1)
y_test shape: (960000, 1)


In [4]:

# model = xgb.XGBClassifier(n_estimators=CFG_ATTACK.n_estimators, n_jobs=-1, random_state=CFG.seed)
# model = lgb.LGBMClassifier(n_estimators=CFG_ATTACK.n_estimators, n_jobs=-1, random_state=CFG.seed)
model = get_nn_attack_model(in_features = X_train.shape[1], compile_model = True)
# model = CatBoostClassifier(
#     iterations=100,
#     depth=4,
#     learning_rate=CFG_ATTACK.learning_rate,
#     loss_function="Logloss",
#     verbose=True,
# )  # https://catboost.ai/en/docs/concepts/loss-functions-classification

model.fit(X_train, y_train, validation_data = (X_test, y_test), batch_size = 256, epochs = 20, verbose = 1)



Epoch 1/20


2023-08-12 21:08:34.853416: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-08-12 21:08:35.250190: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-08-12 21:10:09.019942: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x17bc14190>

# Test atttack

In [None]:
accuracy = model.score(X_test, y_test)
precision, recall, f1_score, _ = precision_recall_fscore_support(
    y_test, model.predict(X_test), average="binary"
)
print("accuracy:", accuracy)
print("precision:", precision)
print("recall:", recall)
print("f1_score:", f1_score)

fpr, tpr, thresholds = roc_curve(y_test, model.predict_proba(X_test)[:, 1])
print("mean fpr:", np.mean(fpr))
print("mean tpr:", np.mean(tpr))

In [None]:
train_data, test_data = get_attack_data(dataset = 'cifar10', alg = 'fedavg')
train_data[0].shape, test_data[0].shape, train_data[1].shape, test_data[1].shape

In [None]:
model_trained_path = '/Users/gadmohamed/Desktop/live repos/FedFort/src/results/cifar10_fedavg_True_07_08_2023_00_32_28'

train_preds = join(model_trained_path, 'epoch_60', 'train_preds.npy')
test_preds = join(model_trained_path, 'epoch_60', 'test_preds.npy')

train_preds = np.load(train_preds)
test_preds = np.load(test_preds)
perm = np.load(join(model_trained_path, 'perm.npy'))
print(perm.shape, train_preds.shape, test_preds.shape)

n_clients = 10 
local_size = 500 
train_limit = n_clients * local_size
small_train_data = (train_data[0][perm][:train_limit], train_data[1][perm][:train_limit])

df_member = pd.DataFrame(train_preds) 
df_member["is_member"] = 1
df_nonmember = pd.DataFrame(test_preds)
df_nonmember["is_member"] = 0
df_attack_dset = pd.concat([df_member, df_nonmember])


df_attack_dset.shape

In [None]:
attack_model = CatBoostClassifier()
attack_model.load_model(CFG_ATTACK.attack_model_path)