In [None]:
import secretflow as sf
from secretflow.security.aggregation import SPUAggregator, SecureAggregator
from secretflow.ml.nn import FLModel
from secretflow.utils.simulation.datasets import dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import boxcox1p
from scipy.stats import boxcox_normmax
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from secretflow.utils.simulation.data.ndarray import create_ndarray


# Check the version of your SecretFlow
print("The version of SecretFlow: {}".format(sf.__version__))

# In case you have a running secretflow runtime already.
sf.shutdown()

sf.init(["alice", "bob", "charlie"], address="local")
alice, bob, charlie = sf.PYU("alice"), sf.PYU("bob"), sf.PYU("charlie")
spu = sf.SPU(sf.utils.testing.cluster_def(["alice", "bob"]))

In [None]:
df = pd.read_csv(dataset("creditcard"), sep=",")
sf_df=df.copy()

In [None]:
import tempfile

half = len(sf_df) // 2
h_alice = sf_df.iloc[:half]
h_bob = sf_df.iloc[half:]
# Save to temorary files.
_, h_alice_path = tempfile.mkstemp()
_, h_bob_path = tempfile.mkstemp()
h_alice.to_csv(h_alice_path, index=False)
h_bob.to_csv(h_bob_path, index=False)

In [None]:
def create_dataset_builder(
    batch_size = 256,
):
    def dataset_builder(folder_path, stage="train"):
        import pandas as pd
        import numpy as np
        from imblearn.over_sampling import SMOTE
        from sklearn.model_selection import train_test_split
        import tensorflow as tf
        import math
        pd_data = pd.read_csv(folder_path)
        data = np.array(pd_data, dtype=np.float32)  

        # 预处理数据
        X = data[:, :-1]
        y = data[:, -1]  
        # 对特征进行Sigmoid操作
        X[:, 0] = 1 / (1 + np.exp(-X[:, 0]))
        X[:, -1] = 1 / (1 + np.exp(-X[:, -1]))

        # 使用SMOTE进行过采样
        smote = SMOTE()
        X, y = smote.fit_resample(X, y)

        # 拆分训练集和测试集
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=1234)
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
        X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
        if stage == "train":
            train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
            train_dataset_size = len(X_train[0])
            train_step_per_epoch = math.ceil(train_dataset_size / batch_size)
            return train_dataset, train_step_per_epoch
        elif stage == "eval":
            eval_dataset =  tf.data.Dataset.from_tensor_slices((X_test, y_test))
            eval_dataset_size = len(X_test[0])
            eval_step_per_epoch = math.ceil((eval_dataset_size) / batch_size)
            return eval_dataset, eval_step_per_epoch

    return dataset_builder


In [None]:
data_builder_dict = {
    alice: create_dataset_builder(
        batch_size=256,
    ),
    bob: create_dataset_builder(        
        batch_size=256,
    ),
}


In [None]:

data = {
    alice: h_alice_path,
    bob: h_bob_path,
}

In [None]:

device_list = [alice, bob]
secure_aggregator = SecureAggregator(charlie, [alice, bob])
spu_aggregator = SPUAggregator(spu)
num_classes = 10
input_shape = h_alice[0].shape
model = create_model(input_shape, num_classes)
fed_model = FLModel(
    server=charlie,
    device_list=device_list,
    model=model,
    aggregator=secure_aggregator,
    strategy="fed_avg_w",
    backend="tensorflow",
)

In [None]:
EPOCHS = 5
BATCH_SIZE = 256
history = fed_model.fit(
    data,
    None,
    validation_data=data,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    aggregate_freq=1,
    verbose=1,
    dataset_builder=data_builder_dict,
)