In [9]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_diabetes
import tensorflow
from tensorflow import keras

In [10]:
(X_train,y_train),(X_test,y_test) = keras.datasets.mnist.load_data()

In [11]:
# Normalize the images
X_train = X_train.astype('float32')/255
X_test = X_test.astype('float32')/255

In [12]:
print(type(X_train))

<class 'numpy.ndarray'>


In [13]:
X_train.shape

(60000, 28, 28)

In [14]:
import numpy as np
import pandas as pd
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical


# Create directory to save split files
output_directory = "../client_data/"
os.makedirs(output_directory, exist_ok=True)


# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize the images to the range [0, 1]
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

clients_X = {}
clients_Y = {}

num_clients = 5
def split_dataset_round_robin(X, y, num_clients):
    clients_X = {i: [] for i in range(1, num_clients + 1)}
    clients_Y = {i: [] for i in range(1, num_clients + 1)}
    
    for i in range(len(X)):
        client_num = i % num_clients + 1
        clients_X[client_num].append(X[i])
        clients_Y[client_num].append(y[i])
    
    # Convert lists to numpy arrays
    for client in range(1, num_clients + 1):
        clients_X[client] = np.array(clients_X[client])
        clients_Y[client] = np.array(clients_Y[client])
    
    return clients_X, clients_Y

# Split the dataset into 2 clients in a round-robin manner
clients_X, clients_Y = split_dataset_round_robin(x_train, y_train, num_clients)



def save_to_csv(clients_X, clients_Y, client_num):
    # Flatten the images for saving in CSV
    flat_X = clients_X.reshape(clients_X.shape[0], -1)
    df_X = pd.DataFrame(flat_X)
    df_Y = pd.DataFrame(clients_Y)
    
    split_file_path = os.path.join(output_directory, f"client_{client_num}_data_X.csv")
    df_X.to_csv(split_file_path, index=False)

    split_file_path = os.path.join(output_directory, f"client_{client_num}_data_Y.csv")
    df_Y.to_csv(split_file_path, index=False)

    print(f"Client {client_num} data saved to {split_file_path}")


# Save each client's data to CSV files
for client in range(1, num_clients + 1):
    save_to_csv(clients_X[client], clients_Y[client], client)




Client 1 data saved to ../client_data/client_1_data_Y.csv
Client 2 data saved to ../client_data/client_2_data_Y.csv
Client 3 data saved to ../client_data/client_3_data_Y.csv
Client 4 data saved to ../client_data/client_4_data_Y.csv
Client 5 data saved to ../client_data/client_5_data_Y.csv


In [None]:
# Check the shapes of the resulting arrays to verify
for client in range(1, 3):
    print(f"Client {client} X shape: {clients_X[client].shape}")
    print(f"Client {client} Y shape: {clients_Y[client].shape}")