In [None]:
import secretflow as sf
from secretflow.security.aggregation import SPUAggregator, SecureAggregator
from secretflow.ml.nn import FLModel
from secretflow.utils.simulation.datasets import dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import boxcox1p
from scipy.stats import boxcox_normmax
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from secretflow.utils.simulation.data.ndarray import create_ndarray


# Check the version of your SecretFlow
print("The version of SecretFlow: {}".format(sf.__version__))

# In case you have a running secretflow runtime already.
sf.shutdown()

sf.init(["alice", "bob", "charlie"], address="local")
alice, bob, charlie = sf.PYU("alice"), sf.PYU("bob"), sf.PYU("charlie")
spu = sf.SPU(sf.utils.testing.cluster_def(["alice", "bob"]))

In [None]:
df = pd.read_csv(dataset("creditcard"), sep=",")

In [None]:
seed = 1234
plt.rc("figure", autolayout=True)
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=14,
    titlepad=10,
)
df.head()

In [None]:
df = df[~df.duplicated()]
df.drop("Time", axis=1, inplace=True)
df[df.duplicated()]["Class"].value_counts()

In [None]:
df = df[~df.duplicated()]
df.shape

In [None]:
df["Amount_bc"] = boxcox1p(df["Amount"], boxcox_normmax(df["Amount"] + 1))
df["Amount_log"] = np.log(1 + df["Amount"])
df.drop(["Amount", "Amount_log"], axis=1, inplace=True)

In [None]:
count_of_ones = (df["Class"] == 0).sum()

print(count_of_ones)
count_of_ones = (df["Class"] == 1).sum()

print(count_of_ones)

In [None]:
df["Class"] = df["Class"].astype(float)
X = df.drop("Class", axis=1)
y = df.Class
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
pca = PCA()
pca.fit_transform(X_scaled);

In [None]:
cum_sum = np.cumsum(pca.explained_variance_ratio_) * 100
comp = [n for n in range(len(cum_sum))]

In [None]:
plt.figure(figsize=(7, 5))
plt.plot(comp, cum_sum, marker=".")
plt.xlabel("PCA Components")
plt.ylabel("Cumulative Explained Variance (%)")
plt.title("PCA")
plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=seed
)
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, stratify=y_train, random_state=seed
)
print(f"Training set: {X_train.shape[0]}")
print(f"Validation set: {X_val.shape[0]}")
print(f"Test set: {X_test.shape[0]}")

In [None]:
# from secretflow.security.aggregation import SecureAggregator
# from secretflow.ml.nn import FLModel
# import tensorflow as tf
# from tensorflow import keras
# EPOCHS = 100
# BATCH_SIZE = 256
# def objective(input_shape,num_classes,name='model'):
#     def objective(trial):

#         model = keras.Sequential()

#         in_feat = X_train.shape[0]

#         for i in range(trial.suggest_int("n_layers", 1, 2)):
#             out_feat = trial.suggest_int("n_units_{}".format(i+1), 1, 40)
#             model.add(keras.layers.Dense(units=out_feat, activation='relu'))
#             model.add(keras.layers.Dropout(trial.suggest_uniform("dropout_{}".format(i+1), 0.2, 0.5)))
#             in_feat=out_feat
 
#         model.add(keras.layers.Dense(1, activation='sigmoid'))
    
#         model.compile(loss='binary_crossentropy',
#                     optimizer=keras.optimizers.Adam(trial.suggest_float("lr", 5e-5, 1e-2, log=True)),
#                     metrics=['accuracy'])
    
#         early_stop = tf.keras.callbacks.EarlyStopping(monitor="val_accuracy", patience=5, restore_best_weights=True)
    
#         model.fit(X_train, y_train,
#                 validation_data = (X_val, y_val),
#                 shuffle = True,
#                 batch_size = BATCH_SIZE,
#                 epochs = EPOCHS,
#                 callbacks = [early_stop], 
#                 verbose = False )
    
#         score = model.evaluate(X_val, y_val, verbose=0)
                  
#         return score[1]
#     return objective

In [None]:
# import optuna
# num_classes=10
# input_shape=(28,28,1)
# study = optuna.create_study(direction="maximize")
# study.optimize(objective(input_shape,num_classes), n_trials=10)

# print("Number of finished trials: {}".format(len(study.trials)))

# print("Best trial:")
# trial = study.best_trial

# print("  Value: {}".format(trial.value))

# params_1 = []

# for key, value in trial.params.items():
#     params_1.append(value)
#     print("    {}: {}".format(key, value))

In [None]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
X_train.shape, X_test.shape, X_val.shape

In [None]:
parts = {
    alice: 0.5,
    bob: 0.5,
}
if not isinstance(y_train, np.ndarray):
    y_train = y_train.to_numpy()
if not isinstance(y_test, np.ndarray):
    y_test = y_test.to_numpy()
if not isinstance(y_val, np.ndarray):
    y_val = y_val.to_numpy()
fed_train_x = create_ndarray(X_train, parts=parts, axis=0)
fed_train_y = create_ndarray(y_train, parts=parts, axis=0)
fed_val_x = create_ndarray(X_val, parts=parts, axis=0)
fed_val_y = create_ndarray(y_val, parts=parts, axis=0)
fed_test_x = create_ndarray(X_test, parts=parts, axis=0)
fed_test_y = create_ndarray(y_test, parts=parts, axis=0)
print(fed_train_x.partition_shape())
print(fed_train_y.partition_shape())
print(fed_val_x.partition_shape())
print(fed_val_y.partition_shape())
print(fed_test_x.partition_shape())
print(fed_test_y.partition_shape())
fed_train_x

In [None]:
def create_model(input_shape, num_classes, name="model"):
    def create_model():
        from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization
        from tensorflow.keras.layers import Conv1D, MaxPool1D
        from tensorflow.keras import Sequential
        from tensorflow.keras.optimizers import Adam
        
        model = Sequential()
        model.add(Conv1D(32, 2, activation="relu", input_shape=input_shape))
        model.add(BatchNormalization())
        model.add(Dropout(0.2))

        model.add(Conv1D(64, 2, activation="relu"))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))

        model.add(Flatten())
        model.add(Dense(64, activation="relu"))
        model.add(Dropout(0.5))

        model.add(Dense(1, activation="sigmoid"))

        model.compile(
            optimizer=Adam(lr=0.0001), loss="binary_crossentropy", metrics=["accuracy"]
        )
        model.summary()

        return model

    return create_model

In [None]:

device_list = [alice, bob]
secure_aggregator = SecureAggregator(charlie, [alice, bob])
spu_aggregator = SPUAggregator(spu)
num_classes = 10
input_shape = X_train[0].shape
model = create_model(input_shape, num_classes)
fed_model = FLModel(
    server=charlie,
    device_list=device_list,
    model=model,
    aggregator=secure_aggregator,
    strategy="fed_avg_w",
    backend="tensorflow",
)

In [None]:
X_train.dtype

In [None]:
y_train.dtype

In [None]:
X_val.dtype

In [None]:
y_val.dtype

In [None]:
EPOCHS = 5
BATCH_SIZE = 256
history = fed_model.fit(
    fed_train_x,
    fed_train_y,
    validation_data=(fed_val_x, fed_val_y),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    aggregate_freq=1,
    verbose=1,
)

In [None]:
global_eval, local_eval = fed_model.evaluate(fed_test_x, fed_test_y)
for e in global_eval:
    print(e.name, e.result())

In [None]:
# Draw accuracy values for training & validation
plt.plot(history["global_history"]['accuracy'])
plt.plot(history["global_history"]['val_accuracy'])
plt.title('FLModel accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Valid'], loc='upper left')
plt.show()

# Draw loss for training & validation
plt.plot(history["global_history"]['loss'])
plt.plot(history["global_history"]['val_loss'])
plt.title('FLModel loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Valid'], loc='upper left')
plt.show()