In [10]:
import numpy as np
import pandas as pd
import dask.dataframe as dk
import tensorflow as tf
from tensorflow.keras.utils import Sequence, to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, models
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout

file_path = "Processed_Data/Mapped_Dataset.csv"

df = dk.read_csv(file_path)

In [11]:
#Global var 
batch_size = 512
ratio_test_all = 0.15

from dask_ml.model_selection import train_test_split 
# chia train test ratio 0.8:0.2 & random 
train_df, test_df = train_test_split(df, test_size=ratio_test_all, random_state=42)

# # load từng batch
def dask_to_tf_dataset(dask_df, batch_size=128, num_classes=10): 
    def generator():
        for batch in dask_df.to_delayed():
            batch=batch.compute()  
            if batch.empty:
                continue

            X = batch.drop(columns='label').values.astype(np.float32)
            y = batch['label'].values
            y_onehot = to_categorical(y, num_classes=num_classes)  

            num_splits = max(1, len(X) // batch_size)  # Đảm bảo không chia nhỏ quá mức
            X_batches = np.array_split(X, num_splits)
            y_batches = np.array_split(y_onehot, num_splits)

            for X_batch, y_batch in zip(X_batches, y_batches):
                yield X_batch, y_batch
                
    output_signature = ( 
        tf.TensorSpec(shape=(None, 46), dtype=tf.float32), 
        tf.TensorSpec(shape=(None, 10), dtype=tf.int32),
    )
    
    return tf.data.Dataset.from_generator(generator, output_signature=output_signature).prefetch(tf.data.AUTOTUNE)

# from functools import partial
# def dask_generator(dask_df, batch_size=128, num_classes=10): 
#     for batch in dask_df.to_delayed():
#         batch = batch.compute()  
#         if batch.empty:
#             continue

#         X = batch.drop(columns='label').values.astype(np.float32)
#         y = batch['label'].values
#         y_onehot = to_categorical(y, num_classes=num_classes)  

#         num_splits = max(1, len(X) // batch_size)
#         X_batches = np.array_split(X, num_splits)
#         y_batches = np.array_split(y_onehot, num_splits)

#         for X_batch, y_batch in zip(X_batches, y_batches):
#             yield X_batch, y_batch 

# def dask_to_tf_dataset(dask_df, batch_size=128, num_classes=10): 
#     output_signature = ( 
#         tf.TensorSpec(shape=(None, dask_df.shape[1] - 1), dtype=tf.float32),  
#         tf.TensorSpec(shape=(None, num_classes), dtype=tf.float32),
#     )

#     return tf.data.Dataset.from_generator(
#         partial(dask_generator, dask_df, batch_size, num_classes), 
#         output_signature=output_signature
#     ).prefetch(tf.data.AUTOTUNE)



In [12]:
train_gen = dask_to_tf_dataset(train_df, 512, 10).repeat()
test_gen = dask_to_tf_dataset(test_df, 512, 10).repeat()

In [None]:

# shape
features, labels = next(iter(train_gen))
input_shape = (features.shape[1], 1)
output_shape = labels.shape[1]

print(f"Input Shape: {input_shape}")

from tensorflow import keras
# Định nghĩa mô hình CNN
# VGG, ...
# Conv2D, tabular, ...
# HE, tính tương thích của HE với CNN
# Tính chất data in, out; Học tăng cường

model = keras.Sequential([
    layers.Input(shape=input_shape),
    layers.Conv1D(filters=32, kernel_size=3, padding="same", activation="relu"),
    layers.MaxPooling1D(pool_size=4),
    layers.Conv1D(filters=64, kernel_size=3,  padding="same",activation="relu"),
    layers.MaxPooling1D(pool_size=2),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.BatchNormalization(),
    layers.Dense(output_shape, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'], use_multiprocessing=True)

# for batch in dataloader:
#     X_batch = batch[:, :-1]
#     y_batch = batch[:, -1]
#     y_onehot = to_categorical(y_batch, num_classes=10)
    
#     model.train_on_batch(X_batch, y_onehot, verbose=1)
model.fit(train_gen, epochs=10, steps_per_epoch=75000, verbose = 1)

# Lưu mô hình
model.save("cnn_model_2-0_batch512_test015.h5")

Input Shape: (46, 1)
Epoch 1/10
[1m75000/75000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1188s[0m 16ms/step - accuracy: 0.7567 - loss: 0.7000
Epoch 2/10
[1m75000/75000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1566s[0m 21ms/step - accuracy: 0.8222 - loss: 0.4666
Epoch 3/10
[1m75000/75000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1128s[0m 15ms/step - accuracy: 0.8595 - loss: 0.3516
Epoch 4/10
[1m75000/75000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1210s[0m 16ms/step - accuracy: 0.8017 - loss: 0.5447
Epoch 5/10
[1m75000/75000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1320s[0m 18ms/step - accuracy: 0.8407 - loss: 0.4234
Epoch 6/10
[1m75000/75000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14705s[0m 196ms/step - accuracy: 0.8522 - loss: 0.3854
Epoch 7/10
[1m75000/75000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2948s[0m 39ms/step - accuracy: 0.8842 - loss: 0.2759
Epoch 8/10
[1m75000/75000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3185s[

# Load Model Để Test


In [None]:
from tensorflow.keras.models import load_model

# Load model từ file .h5
model = load_model("cnn_model_2-0_batch512_test015.h5")

# Test với dữ liệu đầu vào
import numpy as np
output = model.evaluate(test_gen, steps= 190000)
print(f'Loss: {output[0]} Acc: {output[1]}')



[1m190000/190000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5428s[0m 29ms/step - accuracy: 0.8963 - loss: 0.2570
Loss: 0.8964436054229736 Acc: 0.8964436054229736


# Lấy weight và biaseses thử add noise và encrypt bằng he

In [2]:
from keras.models import load_model

model = load_model("cnn_model_2-0_batch512_test015.h5")

import numpy as np

weights_list = []
biases_list = []

for layer in model.layers:
    if len(layer.get_weights()) == 2:  # Chỉ lấy các layer có weights & biaseses
        weights, biases = layer.get_weights()
        weights_list.append(weights)
        biases_list.append(biases)

# Kiểm tra định dạng
for i, (w, b) in enumerate(zip(weights_list, biases_list)):
    print(f"Layer {i}:")
    print(f"  Weights shape: {w}, type: {type(w)}")  # Phải là numpy.ndarray
    print(f"  biases shape: {b}, type: {type(b)}")



Layer 0:
  Weights shape: [[[-6.3445854e+00 -1.6360342e+01 -4.8438439e+00 -6.1049932e-01
    3.7552294e-01 -7.7064103e-03 -4.5056671e-02 -3.9904711e-01
   -4.5144553e-03 -4.0138867e-01 -4.5516454e-03  4.9090213e-01
   -2.3368595e-02 -2.4413313e+01 -3.1452492e-02 -1.7089937e+01
    4.7166410e-01 -1.7932486e+01 -3.1153011e-01 -1.2379642e+01
    1.9082721e-01 -2.5870216e+00 -6.2898917e+00 -2.2618127e-01
    3.3345097e-01 -2.4880464e-01 -3.9864616e+01 -1.7747286e-01
    3.2869864e-01 -3.6528447e-01 -1.2785876e-01 -4.7230469e+01]]

 [[-8.4108459e-03 -4.0350180e-02 -3.7861194e-02 -1.3701773e-02
   -7.9480448e+00 -1.6367340e-02 -1.8731129e-01 -1.3276279e-02
   -2.8937125e-01 -1.2598191e+00 -3.5913303e-01 -1.6719801e+01
   -5.8594346e+00  6.0839313e-01 -2.2479774e-01  1.1086193e-01
    3.3584446e-01 -1.9526435e-02  2.5427690e-01 -2.8239214e+00
   -2.9504746e-02  1.8993522e-01 -7.0317044e+00 -1.9989571e-01
    1.3155195e+00 -5.4298778e+00 -3.9857201e+01 -9.7280130e-02
   -1.0122805e+01  1.88005

In [3]:
import copy
import sys
import numpy as np
import random
import threading

import config # config.py

from warnings import simplefilter
from datetime import datetime
from sklearn import metrics

from differential_privacy import laplace

num_clients  = 3  # Số client
active_clients = {'client_agent_' + str(i) for i in range(num_clients)}
print(active_clients)

# số vòng lặp
iteration = 10

#Shape
weights_shape = weights.shape
weights_dp_noise = np.zeros(weights_shape)

biases_shape = biases.shape
biases_dp_noise = np.zeros(biases_shape)

print(weights_shape)

# generate DP parameters
len_per_iteration = 50
LENS_PER_ITERATION = {client_name: len_per_iteration for client_name in active_clients}
active_clients_lens = [LENS_PER_ITERATION[client_name] for client_name in active_clients]
print(active_clients_lens)

smallest_dataset = min(active_clients_lens)
if config.USING_CUMULATIVE:
    smallest_dataset *= iteration

sensitivity = 2 / ( len(active_clients) * smallest_dataset * config.alpha)
# :3
epsilon = config.EPSILONS['client_agent_0']

random.seed(config.RANDOM_SEEDS['client_agent_0'][iteration])
# adding differentially private noise
for i in range(weights_shape[0]):  # weights_modified is 2-D
    for j in range(weights_shape[1]):
        dp_noise = laplace(mean=config.mean, sensitivity=sensitivity, epsilon=epsilon)
        weights_dp_noise[i][j] = dp_noise

for i in range(biases_shape[0]):
    dp_noise = laplace(mean=config.mean, sensitivity=sensitivity, epsilon=epsilon)
    biases_dp_noise[i] = dp_noise

weights_with_noise = copy.deepcopy(weights)  # make a copy to not mutate weights
biases_with_noise = copy.deepcopy(biases)

weights_with_noise += weights_dp_noise
biases_with_noise += biases_dp_noise
# return weights_with_noise, biases_with_noise

for l in weights_with_noise:
    print("Line1: ", weights_with_noise)

{'client_agent_0': [885440, 403958, 794772, 933488, 441001, 42450, 271493, 536110, 509532, 424604, 962838, 821872, 870163, 318046, 499748, 375441, 611720, 934973, 952225, 229053, 529202, 146039, 295528, 146534, 792518, 99437, 648406, 838234, 262674, 953938, 558433, 739426, 849574, 631140, 945989, 154100, 325213, 103560, 765284, 77324, 942500, 891786, 717209, 346236, 495077, 587007, 105592, 370977, 455262, 331556, 640561, 671532, 957361, 214410, 579363, 500181, 464197, 907343, 546678, 273145, 65304, 844132, 963080, 575352, 960489, 14723, 97802, 754665, 880899, 418196, 744754, 864912, 823182, 700609, 655638, 1198, 641620, 517553, 868287, 909747, 349317, 255759, 765752, 341001, 737822, 912755, 66043, 200348, 961564, 595078, 232473, 250206, 842368, 149416, 842194, 569366, 469730, 95646, 84353, 335601], 'client_agent_1': [917595, 532614, 978147, 513054, 114355, 316089, 578045, 305230, 740883, 130873, 574033, 348914, 854030, 967048, 566528, 213072, 838260, 632485, 573812, 616161, 301630, 466

In [17]:
import tenseal as ts
import numpy as np

# 1. Khởi tạo hệ thống mã hóa TenSEAL với CKKS để hỗ trợ số thực
context = ts.context(
    ts.SCHEME_TYPE.CKKS,
    poly_modulus_degree=8192,
    coeff_mod_bit_sizes=[60, 40, 40, 60]
)
context.global_scale = 2**40
context.generate_galois_keys()

Dx = []
Dy = []

def homophoric_encryption(L2):
    FN = True
    for L in L2:
        row_Dx = []
        row_Dy = []
        for E in L:
            if E == 0:
                FN = False
                break
            
        if FN:
            Eopt = np.random.choice(['add', 'sub', 'mult'])  # Tránh chia 0
        else:
            Eopt = np.random.choice(['add', 'sub', 'mult', 'div'])

        if Eopt != 'div':
            Bl = np.random.randint(1, 9)  # Giá trị cố định cho cả L
            for E in L:
                Eint = E
                if Eopt == 'add':
                    Eflt = Eint + Bl
                elif Eopt == 'sub':
                    Eflt = Eint - Bl
                elif Eopt == 'mult':
                    Eflt = Eint * Bl
                
                Eflt = -Eflt if Eint * Eflt < 0 else Eflt
                encrypted_E = ts.ckks_vector(context, [Eflt])
                row_Dx.append((encrypted_E, Bl, Eopt))
        else:
            for E in L:
                factors = [b for b in range(1, E + 1) if E % b == 0]
                Bl = np.random.choice(factors) if factors else 1
                Eint = E
                Eflt = (Eint / Bl)
                Eflt = -Eflt if Eint * Eflt < 0 else Eflt
                encrypted_E = ts.ckks_vector(context, [Eflt])  # Mã hóa dữ liệu tại đây
                row_Dy.append((encrypted_E, Bl, Eopt))
                
        Dx.append(row_Dx)
        Dy.append(row_Dy)
        
homophoric_encryption(weights_with_noise)
def decrypt_data(context, encrypted_data):
    """Giải mã dữ liệu mã hóa."""
    return encrypted_data.decrypt()

print(Dx)
print("------------\n")
print(Dy)
print("-----------------------\n")
for L in Dx:
    for E in L:
        print(decrypt_data(context, E[0]))
    print("---------------------------|||||\n")

[[(<tenseal.tensors.ckksvector.CKKSVector object at 0x0000025A714BF450>, 1, np.str_('add')), (<tenseal.tensors.ckksvector.CKKSVector object at 0x0000025A713A4A10>, 1, np.str_('add')), (<tenseal.tensors.ckksvector.CKKSVector object at 0x0000025A4CE4FF90>, 1, np.str_('add')), (<tenseal.tensors.ckksvector.CKKSVector object at 0x0000025A4CE6ECD0>, 1, np.str_('add')), (<tenseal.tensors.ckksvector.CKKSVector object at 0x0000025A72A1EC90>, 1, np.str_('add')), (<tenseal.tensors.ckksvector.CKKSVector object at 0x0000025A72A14E90>, 1, np.str_('add')), (<tenseal.tensors.ckksvector.CKKSVector object at 0x0000025A72A37710>, 1, np.str_('add')), (<tenseal.tensors.ckksvector.CKKSVector object at 0x0000025A7FD3CA50>, 1, np.str_('add')), (<tenseal.tensors.ckksvector.CKKSVector object at 0x0000025A7FD3E450>, 1, np.str_('add')), (<tenseal.tensors.ckksvector.CKKSVector object at 0x0000025A7FD75910>, 1, np.str_('add'))], [(<tenseal.tensors.ckksvector.CKKSVector object at 0x0000025A7FD3F950>, 7, np.str_('mul