In [1]:
%load_ext autoreload
%autoreload 2

import secretflow as sf
import matplotlib.pyplot as plt

sf.init(['alice', 'bob', 'carol'], address='local')
alice, bob, carol = sf.PYU('alice'), sf.PYU('bob'), sf.PYU('carol')


  from .autonotebook import tqdm as notebook_tqdm
2024-12-16 09:50:33,584	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
INFO:root:Try init sf in SIMULATION mode
  self.pid = _posixsubprocess.fork_exec(
2024-12-16 09:50:37,894	INFO worker.py:1724 -- Started a local Ray instance.


In [2]:
import numpy as np
from secretflow.utils.simulation.datasets import dataset

mnist_dataset = dataset('mnist')
mnist = np.load(mnist_dataset, allow_pickle=True)
image = mnist['x_train']
label = mnist['y_train']

alice_data = image

In [3]:
alice_images = image[:15000]
bob_images = image[15000:35000]
carol_images = image[35000:]

alice_labels = label[:15000]
bob_labels = label[15000:35000]
carol_labels = label[35000:]

alice_partition_images = alice(lambda x: x)(alice_images)
bob_partition_images = bob(lambda x: x)(bob_images)
carol_partition_images = carol(lambda x: x)(carol_images)

alice_partition_labels = alice(lambda x: x)(alice_labels)
bob_partition_labels = bob(lambda x: x)(bob_labels)
carol_partition_labels = carol(lambda x: x)(carol_labels)

In [4]:
# 创建联邦数据集 FedNdarray
from secretflow.data.ndarray import FedNdarray, PartitionWay

# 图像数据
federated_images = FedNdarray(
    partitions={alice: alice_partition_images, bob: bob_partition_images, carol: carol_partition_images},
    partition_way=PartitionWay.HORIZONTAL,  # 水平分片
)

# 标签数据
federated_labels = FedNdarray(
    partitions={alice: alice_partition_labels, bob: bob_partition_labels, carol: carol_partition_labels},
    partition_way=PartitionWay.HORIZONTAL,  # 水平分片
)

# 检查分区信息
print("Image partitions shape:", federated_images.partition_shape())
print("Label partitions shape:", federated_labels.partition_shape())

Image partitions shape: {PYURuntime(alice): (15000, 28, 28), PYURuntime(bob): (20000, 28, 28), PYURuntime(carol): (25000, 28, 28)}
Label partitions shape: {PYURuntime(alice): (15000,), PYURuntime(bob): (20000,), PYURuntime(carol): (25000,)}


In [5]:
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
from sklearn.model_selection import train_test_split


def create_dense_model():
    model = keras.Sequential(
        [
            keras.Input(shape=(28, 28, 1)),  # 输入形状为 28x28 的灰度图像，包含 1 个通道
            layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),  # 卷积层 1
            layers.MaxPooling2D(pool_size=(2, 2)),  # 最大池化层
            layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),  # 卷积层 2
            layers.MaxPooling2D(pool_size=(2, 2)),  # 最大池化层
            layers.Flatten(),  # 拉平为一维向量
            layers.Dense(128, activation="relu"),  # 全连接层
            layers.Dense(10, activation="softmax"),  # 输出层，10 个类别
        ]
    )
    model.compile(
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

# 在外部创建共享模型
shared_model = create_dense_model()

# 初始化优化器
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
optimizer.build(shared_model.trainable_variables)

In [6]:
import tensorflow as tf
from secretflow import reveal
from tqdm import tqdm

# 使用 Cosine Similarity 计算损失
def cosine_similarity(a, b):
    # 计算余弦相似度
    dot_product = tf.reduce_sum(a * b, axis=-1)  # 点积
    norm_a = tf.norm(a, axis=-1)  # a 的范数
    norm_b = tf.norm(b, axis=-1)  # b 的范数
    cosine_sim = dot_product / (norm_a * norm_b + 1e-8)  # 计算余弦相似度，防止除以零
    return cosine_sim

# 定义单轮训练函数（手动计算损失和梯度）
def train_one_epoch(partition_data, partition_labels, shared_model, previous_weights, optimizer=None, batch_size=128, mu = 2, temperature = 0.5):
    # 从分区中提取数据和标签
    data = reveal(partition_data)
    labels = reveal(partition_labels)
    
    # 调整数据形状为模型的输入格式
    data = data.reshape(-1, 28, 28, 1)  # 确保形状为 (样本数, 28, 28, 1)
    
    # 创建模型
    model = create_dense_model()
    previous_models = [create_dense_model() for _ in previous_weights]


    
    # 初始化权重（如果提供了初始权重）
    if shared_model.get_weights() is not None:
        model.set_weights(shared_model.get_weights())
    for previous_model, previous_weight in zip(previous_models, previous_weights):
        if previous_weight is not None:  # 检查每个权重是否为 None
            previous_model.set_weights(previous_weight)

    # 使用优化器（默认是 Adam，如果没有传入）
    if optimizer is None:
        optimizer = tf.keras.optimizers.Adam()  # 默认使用 Adam 优化器
    
    # 手动计算损失
    dataset = tf.data.Dataset.from_tensor_slices((data, labels)).batch(batch_size)  # 使用数据集和批次大小
    
    epoch_loss = 0
    accuracy_metric = tf.keras.metrics.SparseCategoricalAccuracy()  # 创建准确率计算指标

    accumulated_gradients = [tf.zeros_like(var) for var in model.trainable_variables]
    
    for batch_data, batch_labels in tqdm(dataset, desc="Training Progress"):
        with tf.GradientTape() as tape:
            # 前向传播
            predictions = model(batch_data, training=True)
            predictions_shared = shared_model(batch_data, training=True)
            predictions_previous_models = [model(batch_data, training=True) for model in previous_models]

            # 计算余弦相似度并生成新的预测结果
            new_predictions = cosine_similarity(predictions, predictions_shared)
            new_predictions_previous = [cosine_similarity(predictions, predictions_previous_model) for predictions_previous_model in predictions_previous_models]

            # 变形为 (batch_size, 1)
            logits = tf.reshape(new_predictions, (-1, 1))
            logits_previous = [tf.reshape(new_prediction_previous, (-1, 1)) for new_prediction_previous in new_predictions_previous]
            logits_previous_cat = tf.concat(logits_previous, axis=1)
            logits = tf.concat([logits, logits_previous_cat], axis=1)
            logits /= temperature

            # 计算损失
            loss1 = tf.keras.losses.sparse_categorical_crossentropy(batch_labels, predictions, from_logits=False)
            loss1 = tf.reduce_mean(loss1)  # 取平均值

            # 计算对比损失 (第二部分)
            # 假设使用与目标相同的标签，可以根据需要修改
            labels = tf.zeros_like(batch_labels, dtype=tf.int64)
            loss2 = mu * tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=False))

            # 总损失 = 交叉熵损失 + 对比损失
            loss = loss1 + loss2
        
        # 计算梯度
        gradients = tape.gradient(loss, model.trainable_variables)
        
        # 应用梯度更新权重
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        
        epoch_loss += loss.numpy()  # 累积每批次的损失
        
        # 更新准确率
        accuracy_metric.update_state(batch_labels, predictions)
    
    # 计算整个 epoch 的平均损失和准确率
    avg_loss = epoch_loss / len(dataset)
    avg_accuracy = accuracy_metric.result().numpy()  # 获取当前的准确率
    accuracy_metric.reset_states()  # 重置准确率计算器
    avg_gradients = [grad / len(dataset) for grad in accumulated_gradients]  # 平均每个梯度
    
    return model.get_weights(), avg_loss, avg_accuracy, avg_gradients  # 返回当前损失、准确率和更新后的权重

# 外部控制 epochs 的循环
num_epochs = 10
weights_share = None
weights_alice = None  # 初始权重为空
weights_bob = None
weights_carol = None
previous_weights_alice = [None, None]
previous_weights_bob = [None, None]
previous_weights_carol = [None, None]

# 假设 Alice 和 Bob 的数据量
alice_data_size = len(alice_images)
bob_data_size = len(bob_images)
carol_data_size = len(carol_images)

# 计算总数据量
total_data_size = alice_data_size + bob_data_size + carol_data_size

for epoch in range(num_epochs):
    # 更新共享模型权重的加权平均
    if weights_alice is not None and weights_bob is not None and weights_carol is not None:
        # 加权平均
        weights_share = [
            (wa * alice_data_size + wb * bob_data_size + wc * carol_data_size) / total_data_size
            for wa, wb, wc in zip(weights_alice, weights_bob, weights_carol)
        ]
        # 设置共享模型的权重
        shared_model.set_weights(weights_share)
    
    print(f"Epoch {epoch + 1}/{num_epochs} on Alice's partition...")
    weights_alice, loss_alice, acc_alice, avg_gradients_alice = train_one_epoch(alice_partition_images, alice_partition_labels, shared_model, previous_weights_alice)
    print(f"Loss on Alice's partition: {loss_alice}, Accuracy on Alice's partition: {acc_alice}")
    
    print(f"Epoch {epoch + 1}/{num_epochs} on Bob's partition...")
    weights_bob, loss_bob, acc_bob, avg_gradients_bob = train_one_epoch(bob_partition_images, bob_partition_labels, shared_model, previous_weights_bob)
    print(f"Loss on Bob's partition: {loss_bob}, Accuracy on Bob's partition: {acc_bob}")

    print(f"Epoch {epoch + 1}/{num_epochs} on Carol's partition...")
    weights_carol, loss_carol, acc_carol, avg_gradients_carol = train_one_epoch(carol_partition_images, carol_partition_labels, shared_model, previous_weights_carol)
    print(f"Loss on Carol's partition: {loss_carol}, Accuracy on Carol's partition: {acc_carol}")

    # 保存当前权重作为下一轮的 "previous_weights"
    previous_weights_alice = [weights_bob, weights_carol]
    previous_weights_bob = [weights_alice, weights_carol]
    previous_weights_carol= [weights_alice, weights_bob]


print("Training completed.")


Epoch 1/10 on Alice's partition...


Training Progress: 100%|██████████| 118/118 [00:18<00:00,  6.45it/s]


Loss on Alice's partition: 4.388160389358714, Accuracy on Alice's partition: 0.6768666505813599
Epoch 1/10 on Bob's partition...


Training Progress: 100%|██████████| 157/157 [00:22<00:00,  6.95it/s]


Loss on Bob's partition: 2.8998531346108503, Accuracy on Bob's partition: 0.6974499821662903
Epoch 1/10 on Carol's partition...


Training Progress: 100%|██████████| 196/196 [00:30<00:00,  6.35it/s]


Loss on Carol's partition: 2.940857909771861, Accuracy on Carol's partition: 0.7053200006484985
Epoch 2/10 on Alice's partition...


Training Progress: 100%|██████████| 118/118 [00:20<00:00,  5.88it/s]


Loss on Alice's partition: 2.3722859661457902, Accuracy on Alice's partition: 0.9322666525840759
Epoch 2/10 on Bob's partition...


Training Progress: 100%|██████████| 157/157 [00:25<00:00,  6.27it/s]


Loss on Bob's partition: 2.3854831677333563, Accuracy on Bob's partition: 0.9406499862670898
Epoch 2/10 on Carol's partition...


Training Progress: 100%|██████████| 196/196 [00:33<00:00,  5.81it/s]


Loss on Carol's partition: 2.375627723275399, Accuracy on Carol's partition: 0.9408000111579895
Epoch 3/10 on Alice's partition...


Training Progress: 100%|██████████| 118/118 [00:20<00:00,  5.80it/s]


Loss on Alice's partition: 2.300736174745075, Accuracy on Alice's partition: 0.9667333364486694
Epoch 3/10 on Bob's partition...


Training Progress: 100%|██████████| 157/157 [00:26<00:00,  5.82it/s]


Loss on Bob's partition: 2.2845349084040163, Accuracy on Bob's partition: 0.9713500142097473
Epoch 3/10 on Carol's partition...


Training Progress: 100%|██████████| 196/196 [00:31<00:00,  6.13it/s]


Loss on Carol's partition: 2.2817527882906856, Accuracy on Carol's partition: 0.9719600081443787
Epoch 4/10 on Alice's partition...


Training Progress: 100%|██████████| 118/118 [00:20<00:00,  5.65it/s]


Loss on Alice's partition: 2.2576915146940846, Accuracy on Alice's partition: 0.9778000116348267
Epoch 4/10 on Bob's partition...


Training Progress: 100%|██████████| 157/157 [00:24<00:00,  6.37it/s]


Loss on Bob's partition: 2.251405031058439, Accuracy on Bob's partition: 0.978600025177002
Epoch 4/10 on Carol's partition...


Training Progress: 100%|██████████| 196/196 [00:34<00:00,  5.69it/s]


Loss on Carol's partition: 2.251981274205811, Accuracy on Carol's partition: 0.9811199903488159
Epoch 5/10 on Alice's partition...


Training Progress: 100%|██████████| 118/118 [00:19<00:00,  5.97it/s]


Loss on Alice's partition: 2.2459058943441357, Accuracy on Alice's partition: 0.9815333485603333
Epoch 5/10 on Bob's partition...


Training Progress: 100%|██████████| 157/157 [00:25<00:00,  6.13it/s]


Loss on Bob's partition: 2.2425613676666454, Accuracy on Bob's partition: 0.9834499955177307
Epoch 5/10 on Carol's partition...


Training Progress: 100%|██████████| 196/196 [00:31<00:00,  6.19it/s]


Loss on Carol's partition: 2.244092212647808, Accuracy on Carol's partition: 0.9838799834251404
Epoch 6/10 on Alice's partition...


Training Progress: 100%|██████████| 118/118 [00:21<00:00,  5.37it/s]


Loss on Alice's partition: 2.236182283546965, Accuracy on Alice's partition: 0.9845333099365234
Epoch 6/10 on Bob's partition...


Training Progress: 100%|██████████| 157/157 [00:25<00:00,  6.12it/s]


Loss on Bob's partition: 2.234408425677354, Accuracy on Bob's partition: 0.986299991607666
Epoch 6/10 on Carol's partition...


Training Progress: 100%|██████████| 196/196 [00:35<00:00,  5.58it/s]


Loss on Carol's partition: 2.2291393073237673, Accuracy on Carol's partition: 0.9863600134849548
Epoch 7/10 on Alice's partition...


Training Progress: 100%|██████████| 118/118 [00:20<00:00,  5.73it/s]


Loss on Alice's partition: 2.2311988179966553, Accuracy on Alice's partition: 0.9864000082015991
Epoch 7/10 on Bob's partition...


Training Progress: 100%|██████████| 157/157 [00:27<00:00,  5.64it/s]


Loss on Bob's partition: 2.231617789359609, Accuracy on Bob's partition: 0.9881500005722046
Epoch 7/10 on Carol's partition...


Training Progress: 100%|██████████| 196/196 [00:32<00:00,  5.97it/s]


Loss on Carol's partition: 2.220428742924515, Accuracy on Carol's partition: 0.989359974861145
Epoch 8/10 on Alice's partition...


Training Progress: 100%|██████████| 118/118 [00:26<00:00,  4.47it/s]


Loss on Alice's partition: 2.226062287718563, Accuracy on Alice's partition: 0.9872000217437744
Epoch 8/10 on Bob's partition...


Training Progress: 100%|██████████| 157/157 [00:29<00:00,  5.32it/s]


Loss on Bob's partition: 2.2178759574890137, Accuracy on Bob's partition: 0.9909999966621399
Epoch 8/10 on Carol's partition...


Training Progress: 100%|██████████| 196/196 [00:32<00:00,  5.95it/s]


Loss on Carol's partition: 2.2167779547827586, Accuracy on Carol's partition: 0.9904000163078308
Epoch 9/10 on Alice's partition...


Training Progress: 100%|██████████| 118/118 [00:18<00:00,  6.37it/s]


Loss on Alice's partition: 2.2341840408616145, Accuracy on Alice's partition: 0.9886666536331177
Epoch 9/10 on Bob's partition...


Training Progress: 100%|██████████| 157/157 [00:25<00:00,  6.23it/s]


Loss on Bob's partition: 2.228211916176377, Accuracy on Bob's partition: 0.9891499876976013
Epoch 9/10 on Carol's partition...


Training Progress: 100%|██████████| 196/196 [00:31<00:00,  6.20it/s]


Loss on Carol's partition: 2.2258759457237867, Accuracy on Carol's partition: 0.990559995174408
Epoch 10/10 on Alice's partition...


Training Progress: 100%|██████████| 118/118 [00:19<00:00,  6.13it/s]


Loss on Alice's partition: 2.228492516582295, Accuracy on Alice's partition: 0.9901999831199646
Epoch 10/10 on Bob's partition...


Training Progress: 100%|██████████| 157/157 [00:25<00:00,  6.11it/s]


Loss on Bob's partition: 2.2192423966280215, Accuracy on Bob's partition: 0.9916999936103821
Epoch 10/10 on Carol's partition...


Training Progress: 100%|██████████| 196/196 [00:31<00:00,  6.31it/s]

Loss on Carol's partition: 2.2168004318159453, Accuracy on Carol's partition: 0.9929199814796448
Training completed.



