In [1]:
# process_utils.py
import psutil
import os
import platform


def find_process_by_port(port):
    for proc in psutil.process_iter(["pid", "name"]):
        try:
            connections = proc.connections(kind="inet")
            for conn in connections:
                if conn.laddr.port == port:
                    return proc.pid
        except (psutil.AccessDenied, psutil.NoSuchProcess):
            continue
    return None


def kill_process(pid):
    try:
        if platform.system() == "Windows":
            os.system(f"taskkill /PID {pid} /F")
        else:
            os.system(f"kill -9 {pid}")
        print(f"Process with PID {pid} has been terminated.")
    except Exception as e:
        print(f"Error terminating process with PID {pid}: {e}")


def kill_port_process(port: int):
    pid = find_process_by_port(port)

    if pid:
        print(f"Process found using port {port}: PID {pid}")
        kill_process(pid)
    else:
        print(f"No process found using port {port}")


pyu_port = 16307
spu_port = 11666

kill_port_process(port=pyu_port)
kill_port_process(port=spu_port)

import secretflow as sf

# Check the version of your SecretFlow
print("The version of SecretFlow: {}".format(sf.__version__))

# In case you have a running secretflow runtime already.
sf.shutdown()

pyu_port = 16307
spu_port = 11666


cluster_config = {
    "parties": {
        "alice": {
            # replace with alice's real address.
            "address": "ecm-01:" + str(pyu_port),
            "listen_addr": "0.0.0.0:" + str(pyu_port),
        },
        "bob": {
            # replace with bob's real address.
            "address": "ecm-02:" + str(pyu_port),
            "listen_addr": "0.0.0.0:" + str(pyu_port),
        },
    },
    "self_party": "bob",
}

tls_config = {
    "ca_cert": "/home/beng003/certificate/alice_ca.crt",
    "cert": "/home/beng003/certificate/bob_server_cert.crt",
    "key": "/home/beng003/certificate/bob_server_key.key",
}


sf.init(address="ecm-02:6379", cluster_config=cluster_config, tls_config=tls_config)
alice = sf.PYU("alice")
bob = sf.PYU("bob")
print("Alice and Bob are ready to go!")

  connections = proc.connections(kind="inet")


No process found using port 16307
No process found using port 11666


2024-08-13 19:23:25,339	INFO worker.py:1540 -- Connecting to existing Ray cluster at address: ecm-02:6379...
2024-08-13 19:23:25,347	INFO worker.py:1724 -- Connected to Ray cluster.
2024-08-13 19:23:25.374 INFO api.py:233 [bob] -- [Anonymous_job] Started rayfed with {'CLUSTER_ADDRESSES': {'alice': 'ecm-01:16307', 'bob': '0.0.0.0:16307'}, 'CURRENT_PARTY_NAME': 'bob', 'TLS_CONFIG': {'ca_cert': '/home/beng003/certificate/alice_ca.crt', 'cert': '/home/beng003/certificate/bob_server_cert.crt', 'key': '/home/beng003/certificate/bob_server_key.key'}}


The version of SecretFlow: 1.8.0b0


[36m(ReceiverProxyActor pid=271086)[0m 2024-08-13 19:23:26.336 INFO grpc_proxy.py:359 [bob] -- [Anonymous_job] ReceiverProxy binding port 16307, options: (('grpc.enable_retries', 1), ('grpc.so_reuseport', 0), ('grpc.max_send_message_length', 524288000), ('grpc.max_receive_message_length', 524288000), ('grpc.service_config', '{"methodConfig": [{"name": [{"service": "GrpcService"}], "retryPolicy": {"maxAttempts": 5, "initialBackoff": "5s", "maxBackoff": "30s", "backoffMultiplier": 2, "retryableStatusCodes": ["UNAVAILABLE"]}}]}'))...
2024-08-13 19:23:26.342 INFO barriers.py:284 [bob] -- [Anonymous_job] Succeeded to create receiver proxy actor.
[36m(ReceiverProxyActor pid=271086)[0m 2024-08-13 19:23:26.340 INFO grpc_proxy.py:379 [bob] -- [Anonymous_job] Successfully start Grpc service with credentials.
2024-08-13 19:23:27.231 INFO barriers.py:333 [bob] -- [Anonymous_job] SenderProxyActor has successfully created.
2024-08-13 19:23:27.232 INFO barriers.py:520 [bob] -- [Anonymous_job] Try

Alice and Bob are ready to go!


[33m(raylet)[0m [2024-08-13 19:23:32,496 E 265544 265570] (raylet) file_system_monitor.cc:111: /tmp/ray/session_2024-08-13_19-19-00_433060_265452 is over 95% full, available space: 4221571072; capacity: 105089261568. Object creation will fail if spilling is required.
[33m(raylet)[0m [2024-08-13 19:23:42,510 E 265544 265570] (raylet) file_system_monitor.cc:111: /tmp/ray/session_2024-08-13_19-19-00_433060_265452 is over 95% full, available space: 4217712640; capacity: 105089261568. Object creation will fail if spilling is required.
[33m(raylet)[0m [2024-08-13 19:23:52,521 E 265544 265570] (raylet) file_system_monitor.cc:111: /tmp/ray/session_2024-08-13_19-19-00_433060_265452 is over 95% full, available space: 4221509632; capacity: 105089261568. Object creation will fail if spilling is required.
[33m(raylet)[0m [2024-08-13 19:24:02,529 E 265544 265570] (raylet) file_system_monitor.cc:111: /tmp/ray/session_2024-08-13_19-19-00_433060_265452 is over 95% full, available space: 4217651

[33m(raylet)[0m [2024-08-13 19:24:22,545 E 265544 265570] (raylet) file_system_monitor.cc:111: /tmp/ray/session_2024-08-13_19-19-00_433060_265452 is over 95% full, available space: 4221296640; capacity: 105089261568. Object creation will fail if spilling is required.


## Data Preparation
We use [brease canser](https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(diagnostic)) as our dataset.

Let us build a mix partitioned data with this dataset. The partitions are as follows:

|label|feature1\~feature2|feature3\~feature4|
|---|---|---|
|alice_y0|alice_x0|bob_x0|


In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler

features, label = load_breast_cancer(return_X_y=True, as_frame=True)
features.iloc[:, :] = StandardScaler().fit_transform(features)
label = label.to_frame()
label=label.iloc[0:5]
label["target"][0]=1

feat_list = [
    features.iloc[:, :2],
    features.iloc[:, 2:4],
]

alice_y0 = label.iloc[0:5]
alice_x0 = feat_list[0].iloc[0:5, :]
bob_x0 = feat_list[1].iloc[0:5, :]

In [4]:
import tempfile

tmp_dir = "/home/beng003/python_project/sf-test/data"


def filepath(filename):
    return f"{tmp_dir}/{filename}"+".csv"


alice_y0_file = filepath("alice_y0")
alice_x0_file = filepath("alice_x0")
bob_x0_file = filepath("bob_x0")

alice_x0.to_csv(alice_x0_file, index=False)
bob_x0.to_csv(bob_x0_file, index=False)
alice_y0.to_csv(alice_y0_file, index=False)

In [5]:
vdf_x0 = sf.data.vertical.read_csv(
    {alice: alice_x0_file, bob: bob_x0_file}
)
vdf_y0 = sf.data.vertical.read_csv({alice: alice_y0_file})


2024-08-13 19:23:46.478 INFO proxy.py:187 [bob] -- [Anonymous_job] Create proxy actor <class 'secretflow.device.proxy.ActorPartitionAgent'> with party alice.
2024-08-13 19:23:46.480 INFO proxy.py:187 [bob] -- [Anonymous_job] Create proxy actor <class 'secretflow.device.proxy.ActorPartitionAgent'> with party bob.
2024-08-13 19:23:51.280 INFO proxy.py:187 [bob] -- [Anonymous_job] Create proxy actor <class 'secretflow.device.proxy.ActorPartitionAgent'> with party alice.


In [6]:
from typing import List

from secretflow.security.aggregation import SecureAggregator
import spu


def heu_config(sk_keeper: str, evaluators: List[str]):
    return {
        'sk_keeper': {'party': sk_keeper},
        'evaluators': [{'party': evaluator} for evaluator in evaluators],
        'mode': 'PHEU',
        'he_parameters': {
            'schema': 'paillier',
            'key_pair': {'generate': {'bit_size': 2048}},
        },
    }


heu0 = sf.HEU(heu_config('alice', ['bob', 'alice']), spu.spu_pb2.FM128)
aggregator0 = SecureAggregator(alice, [alice, bob])


2024-08-13 19:24:05.857 INFO proxy.py:187 [bob] -- [Anonymous_job] Create proxy actor <class 'secretflow.device.proxy.Actor_Masker'> with party alice.
2024-08-13 19:24:05.858 INFO proxy.py:187 [bob] -- [Anonymous_job] Create proxy actor <class 'secretflow.device.proxy.Actor_Masker'> with party bob.


In [7]:
import logging

logging.root.setLevel(level=logging.INFO)

from secretflow.ml.linear import FlLogisticRegressionVertical

model = FlLogisticRegressionVertical(
    devices=[alice, bob],
    aggregator=aggregator0,
    heu=heu0,
)

model.fit(
    vdf_x0,
    vdf_y0,
    batch_size=5,
    epochs=3,
    learning_rate=0.1,
)

2024-08-13 19:24:09.656 INFO proxy.py:187 [bob] -- [Anonymous_job] Create proxy actor <class 'secretflow.device.proxy.ActorPYUFlLrVWorker'> with party alice.
2024-08-13 19:24:09.657 INFO proxy.py:187 [bob] -- [Anonymous_job] Create proxy actor <class 'secretflow.device.proxy.ActorPYUFlLrVWorker'> with party bob.
2024-08-13 19:24:12.888 INFO fl_lr_v.py:437 [bob] -- [Anonymous_job] Epoch 0: loss = [[0.83582791]]
2024-08-13 19:24:12.991 INFO fl_lr_v.py:437 [bob] -- [Anonymous_job] Epoch 1: loss = [[0.76578909]]
2024-08-13 19:24:13.070 INFO fl_lr_v.py:437 [bob] -- [Anonymous_job] Epoch 2: loss = [[0.71598927]]
2024-08-13 19:24:13.154 INFO fl_lr_v.py:443 [bob] -- [Anonymous_job] Epoch 3: loss = [[0.67995107]]


[36m(HEUEvaluator(heu_id=140514492444928, party=bob) pid=272032)[0m [2024-08-13 19:24:12.897] [info] [thread_pool.cc:30] Create a fixed thread pool with size 7


In [8]:
import numpy as np
from sklearn.metrics import roc_auc_score

y_pred = np.concatenate(sf.reveal(model.predict(vdf_x0)))

auc = roc_auc_score(label.values, y_pred)
acc = np.mean((y_pred > 0.5) == label.values)
print('auc:', auc, ', acc:', acc)

auc: 0.75 , acc: 0.68
