In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import StandardScaler    
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

import random


from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

In [2]:
# read the adult data set
activity = pd.read_csv("path of the data", sep=',')

In [3]:
# removing the non values and the two features fnlwgt and education
to_drop = ['subject', 'timestamp', 'heart_rate','activityID']
activity.drop(axis=1, columns=to_drop, inplace=True)

#Data columns and their types
activity.info()
activity.head(5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1942872 entries, 0 to 1942871
Data columns (total 40 columns):
 #   Column                   Dtype  
---  ------                   -----  
 0   motion                   object 
 1   temp_hand                float64
 2   acceleration_16_x_hand   float64
 3   acceleration_16_y_hand   float64
 4   acceleration_16_z_hand   float64
 5   acceleration_6_x_hand    float64
 6   acceleration_6_y_hand    float64
 7   acceleration_6_z_hand    float64
 8   gyroscope_x_hand         float64
 9   gyroscope_y_hand         float64
 10  gyroscope_z_hand         float64
 11  magnetometer_x_hand      float64
 12  magnetometer_y_hand      float64
 13  magnetometer_z_hand      float64
 14  temp_chest               float64
 15  acceleration_16_x_chest  float64
 16  acceleration_16_y_chest  float64
 17  acceleration_16_z_chest  float64
 18  acceleration_6_x_chest   float64
 19  acceleration_6_y_chest   float64
 20  acceleration_6_z_chest   float64
 21  gyroscop

Unnamed: 0,motion,temp_hand,acceleration_16_x_hand,acceleration_16_y_hand,acceleration_16_z_hand,acceleration_6_x_hand,acceleration_6_y_hand,acceleration_6_z_hand,gyroscope_x_hand,gyroscope_y_hand,...,acceleration_16_z_ankle,acceleration_6_x_ankle,acceleration_6_y_ankle,acceleration_6_z_ankle,gyroscope_x_ankle,gyroscope_y_ankle,gyroscope_z_ankle,magnetometer_x_ankle,magnetometer_y_ankle,magnetometer_z_ankle
0,n,30.375,2.2153,8.27915,5.58753,2.24689,8.55387,5.77143,-0.00475,0.037579,...,0.095156,9.63162,-1.76757,0.265761,0.002908,-0.027714,0.001752,-61.1081,-36.8636,-58.3696
1,n,30.375,2.29196,7.67288,5.74467,2.27373,8.14592,5.78739,-0.17171,0.025479,...,-0.020804,9.58649,-1.75247,0.250816,0.020882,0.000945,0.006007,-60.8916,-36.3197,-58.3656
2,n,30.375,2.2909,7.1424,5.82342,2.26966,7.66268,5.78846,-0.238241,0.011214,...,-0.059173,9.60196,-1.73721,0.356632,-0.035392,-0.052422,-0.004882,-60.3407,-35.7842,-58.6119
3,n,30.375,2.218,7.14365,5.8993,2.22177,7.25535,5.88,-0.192912,0.019053,...,0.094385,9.58674,-1.78264,0.311453,-0.032514,-0.018844,0.02695,-60.7646,-37.1028,-57.8799
4,n,30.375,2.30106,7.25857,6.09259,2.2072,7.24042,5.95555,-0.069961,-0.018328,...,0.095775,9.64677,-1.7524,0.295902,0.001351,-0.048878,-0.006328,-60.204,-37.1225,-57.8847


In [4]:
activity = pd.concat([activity,pd.get_dummies(activity['motion'], prefix='motion')],axis=1)
activity.drop('motion', axis=1, inplace=True)

In [5]:
class_label = [ 'motion_n', 'motion_y']
predictors = [a for a in activity.columns.values if a not in class_label]

for p in predictors:
    activity[p].fillna(activity[p].mean(), inplace=True)

display(predictors)
for p in predictors:
    activity[p] = (activity[p]-activity[p].min()) / (activity[p].max() - activity[p].min())
    activity[p].astype('float32')
display(activity.info())
display(activity.head(10))

['temp_hand',
 'acceleration_16_x_hand',
 'acceleration_16_y_hand',
 'acceleration_16_z_hand',
 'acceleration_6_x_hand',
 'acceleration_6_y_hand',
 'acceleration_6_z_hand',
 'gyroscope_x_hand',
 'gyroscope_y_hand',
 'gyroscope_z_hand',
 'magnetometer_x_hand',
 'magnetometer_y_hand',
 'magnetometer_z_hand',
 'temp_chest',
 'acceleration_16_x_chest',
 'acceleration_16_y_chest',
 'acceleration_16_z_chest',
 'acceleration_6_x_chest',
 'acceleration_6_y_chest',
 'acceleration_6_z_chest',
 'gyroscope_x_chest',
 'gyroscope_y_chest',
 'gyroscope_z_chest',
 'magnetometer_x_chest',
 'magnetometer_y_chest',
 'magnetometer_z_chest',
 'temp_ankle',
 'acceleration_16_x_ankle',
 'acceleration_16_y_ankle',
 'acceleration_16_z_ankle',
 'acceleration_6_x_ankle',
 'acceleration_6_y_ankle',
 'acceleration_6_z_ankle',
 'gyroscope_x_ankle',
 'gyroscope_y_ankle',
 'gyroscope_z_ankle',
 'magnetometer_x_ankle',
 'magnetometer_y_ankle',
 'magnetometer_z_ankle']

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1942872 entries, 0 to 1942871
Data columns (total 41 columns):
 #   Column                   Dtype  
---  ------                   -----  
 0   temp_hand                float64
 1   acceleration_16_x_hand   float64
 2   acceleration_16_y_hand   float64
 3   acceleration_16_z_hand   float64
 4   acceleration_6_x_hand    float64
 5   acceleration_6_y_hand    float64
 6   acceleration_6_z_hand    float64
 7   gyroscope_x_hand         float64
 8   gyroscope_y_hand         float64
 9   gyroscope_z_hand         float64
 10  magnetometer_x_hand      float64
 11  magnetometer_y_hand      float64
 12  magnetometer_z_hand      float64
 13  temp_chest               float64
 14  acceleration_16_x_chest  float64
 15  acceleration_16_y_chest  float64
 16  acceleration_16_z_chest  float64
 17  acceleration_6_x_chest   float64
 18  acceleration_6_y_chest   float64
 19  acceleration_6_z_chest   float64
 20  gyroscope_x_chest        float64
 21  gyroscop

None

Unnamed: 0,temp_hand,acceleration_16_x_hand,acceleration_16_y_hand,acceleration_16_z_hand,acceleration_6_x_hand,acceleration_6_y_hand,acceleration_6_z_hand,gyroscope_x_hand,gyroscope_y_hand,gyroscope_z_hand,...,acceleration_6_y_ankle,acceleration_6_z_ankle,gyroscope_x_ankle,gyroscope_y_ankle,gyroscope_z_ankle,magnetometer_x_ankle,magnetometer_y_ankle,magnetometer_z_ankle,motion_n,motion_y
0,0.517647,0.708758,0.433001,0.412942,0.556504,0.567242,0.546643,0.515674,0.437044,0.498322,...,0.485165,0.507751,0.579444,0.57068,0.458988,0.422655,0.435244,0.177658,1,0
1,0.517647,0.709126,0.430669,0.413548,0.55674,0.563955,0.546772,0.512614,0.436748,0.498378,...,0.485287,0.50763,0.579878,0.571584,0.459127,0.423473,0.437587,0.177675,1,0
2,0.517647,0.709121,0.428628,0.413852,0.556704,0.560061,0.54678,0.511394,0.4364,0.498741,...,0.48541,0.508488,0.578519,0.569901,0.458771,0.425557,0.439893,0.176688,1,0
3,0.517647,0.708771,0.428633,0.414145,0.556284,0.556779,0.547519,0.512225,0.436591,0.499179,...,0.485043,0.508122,0.578589,0.57096,0.459813,0.423954,0.434214,0.17962,1,0
4,0.517647,0.70917,0.429075,0.41489,0.556156,0.556658,0.548129,0.514479,0.435678,0.498872,...,0.485287,0.507996,0.579406,0.570013,0.458724,0.426074,0.434129,0.179601,1,0
5,0.517647,0.708068,0.429079,0.41458,0.556026,0.556416,0.548618,0.516933,0.436301,0.499575,...,0.485288,0.50812,0.579465,0.570706,0.459066,0.421832,0.434767,0.18014,1,0
6,0.517647,0.7097,0.43038,0.414299,0.556443,0.558482,0.548735,0.51926,0.436202,0.500067,...,0.485166,0.50824,0.580263,0.570537,0.458706,0.420976,0.434815,0.179156,1,0
7,0.517647,0.7093,0.43052,0.413402,0.557116,0.559938,0.548609,0.519434,0.4359,0.501109,...,0.485166,0.508243,0.579124,0.57103,0.459145,0.420893,0.438221,0.173736,1,0
8,0.517647,0.708929,0.431104,0.413099,0.556851,0.560426,0.547998,0.519503,0.434458,0.50046,...,0.485532,0.508242,0.580324,0.572197,0.458542,0.420136,0.433905,0.178659,1,0
9,0.517647,0.708402,0.430091,0.41369,0.557105,0.560545,0.546779,0.518901,0.434772,0.499466,...,0.485654,0.508241,0.580095,0.571215,0.459099,0.423941,0.432845,0.17564,1,0


In [6]:
activity.drop('motion_y', axis=1, inplace=True)

In [8]:
normalized_adult_data=(activity-activity.min())/(activity.max()-activity.min())

In [9]:
display(normalized_adult_data.info())
display(normalized_adult_data.head(10))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1942872 entries, 0 to 1942871
Data columns (total 40 columns):
 #   Column                   Dtype  
---  ------                   -----  
 0   temp_hand                float64
 1   acceleration_16_x_hand   float64
 2   acceleration_16_y_hand   float64
 3   acceleration_16_z_hand   float64
 4   acceleration_6_x_hand    float64
 5   acceleration_6_y_hand    float64
 6   acceleration_6_z_hand    float64
 7   gyroscope_x_hand         float64
 8   gyroscope_y_hand         float64
 9   gyroscope_z_hand         float64
 10  magnetometer_x_hand      float64
 11  magnetometer_y_hand      float64
 12  magnetometer_z_hand      float64
 13  temp_chest               float64
 14  acceleration_16_x_chest  float64
 15  acceleration_16_y_chest  float64
 16  acceleration_16_z_chest  float64
 17  acceleration_6_x_chest   float64
 18  acceleration_6_y_chest   float64
 19  acceleration_6_z_chest   float64
 20  gyroscope_x_chest        float64
 21  gyroscop

None

Unnamed: 0,temp_hand,acceleration_16_x_hand,acceleration_16_y_hand,acceleration_16_z_hand,acceleration_6_x_hand,acceleration_6_y_hand,acceleration_6_z_hand,gyroscope_x_hand,gyroscope_y_hand,gyroscope_z_hand,...,acceleration_6_x_ankle,acceleration_6_y_ankle,acceleration_6_z_ankle,gyroscope_x_ankle,gyroscope_y_ankle,gyroscope_z_ankle,magnetometer_x_ankle,magnetometer_y_ankle,magnetometer_z_ankle,motion_n
0,0.517647,0.708758,0.433001,0.412942,0.556504,0.567242,0.546643,0.515674,0.437044,0.498322,...,0.574875,0.485165,0.507751,0.579444,0.57068,0.458988,0.422655,0.435244,0.177658,1.0
1,0.517647,0.709126,0.430669,0.413548,0.55674,0.563955,0.546772,0.512614,0.436748,0.498378,...,0.574509,0.485287,0.50763,0.579878,0.571584,0.459127,0.423473,0.437587,0.177675,1.0
2,0.517647,0.709121,0.428628,0.413852,0.556704,0.560061,0.54678,0.511394,0.4364,0.498741,...,0.574634,0.48541,0.508488,0.578519,0.569901,0.458771,0.425557,0.439893,0.176688,1.0
3,0.517647,0.708771,0.428633,0.414145,0.556284,0.556779,0.547519,0.512225,0.436591,0.499179,...,0.574511,0.485043,0.508122,0.578589,0.57096,0.459813,0.423954,0.434214,0.17962,1.0
4,0.517647,0.70917,0.429075,0.41489,0.556156,0.556658,0.548129,0.514479,0.435678,0.498872,...,0.574998,0.485287,0.507996,0.579406,0.570013,0.458724,0.426074,0.434129,0.179601,1.0
5,0.517647,0.708068,0.429079,0.41458,0.556026,0.556416,0.548618,0.516933,0.436301,0.499575,...,0.574633,0.485288,0.50812,0.579465,0.570706,0.459066,0.421832,0.434767,0.18014,1.0
6,0.517647,0.7097,0.43038,0.414299,0.556443,0.558482,0.548735,0.51926,0.436202,0.500067,...,0.575243,0.485166,0.50824,0.580263,0.570537,0.458706,0.420976,0.434815,0.179156,1.0
7,0.517647,0.7093,0.43052,0.413402,0.557116,0.559938,0.548609,0.519434,0.4359,0.501109,...,0.574755,0.485166,0.508243,0.579124,0.57103,0.459145,0.420893,0.438221,0.173736,1.0
8,0.517647,0.708929,0.431104,0.413099,0.556851,0.560426,0.547998,0.519503,0.434458,0.50046,...,0.574755,0.485532,0.508242,0.580324,0.572197,0.458542,0.420136,0.433905,0.178659,1.0
9,0.517647,0.708402,0.430091,0.41369,0.557105,0.560545,0.546779,0.518901,0.434772,0.499466,...,0.574877,0.485654,0.508241,0.580095,0.571215,0.459099,0.423941,0.432845,0.17564,1.0


In [10]:
columns=normalized_adult_data.columns

In [11]:
FL_path = 'path to save the data'

In [13]:

adults_0 = pd. DataFrame(columns=normalized_adult_data.columns)
adults_1 = pd. DataFrame(columns=normalized_adult_data.columns)


In [59]:
# seperate the records of each class
adults_0 = normalized_adult_data.loc[normalized_adult_data['motion_n'] == 0]
adults_1 = normalized_adult_data.loc[normalized_adult_data['motion_n'] == 1]

In [64]:
adults_major_0 = pd. DataFrame(columns=normalized_adult_data.columns)
adults_major_1 = pd. DataFrame(columns=normalized_adult_data.columns)


In [65]:
# determn the percentage of each class with each client
per_0 = len(adults_0)*80/100
per_1 = len(adults_1)*80/100 

In [67]:
per_1 = int(per_1)
per_0 = int(per_0)

In [68]:
df0 = adults_0.iloc[:per_0,:]
df1 = adults_0.iloc[per_0:,:]
df3 = adults_1.iloc[:per_1,:]
df4 = adults_1.iloc[per_1:,:]

In [70]:
# merge the diffrent persentage framworks
adults_major_0 = pd.concat([df0,df4])
adults_major_1 = pd.concat([df1,df3])

In [76]:
######Split the data for 4 clients
client_0, client_1 = train_test_split(adults_major_0, test_size=0.5, random_state=92)
client_2, client_3 = train_test_split(adults_major_1, test_size=0.5, random_state=92)

In [78]:
######## split train and test
train_0, test_temp_0 = train_test_split(client_0, test_size=0.2, random_state=92)
test_0, val_0 = train_test_split(test_temp_0, test_size=0.3, random_state=92)
train_1, test_temp_1 = train_test_split(client_1, test_size=0.2, random_state=92)
test_1, val_1 = train_test_split(test_temp_1, test_size=0.3, random_state=92)
train_2, test_temp_2 = train_test_split(client_2, test_size=0.2, random_state=92)
test_2, val_2 = train_test_split(test_temp_2, test_size=0.3, random_state=92)
train_3, test_temp_3 = train_test_split(client_3, test_size=0.2, random_state=92)
test_3, val_3 = train_test_split(test_temp_3, test_size=0.3, random_state=92)

########### save data for the model

train_0.to_csv(FL_path+'node_0/adult_train_enc.csv', index=False)
test_temp_0.to_csv(FL_path+'node_0/adult_val_enc.csv', index=False)
test_temp_0.to_csv(FL_path+'node_0/adult_test_enc.csv', index=False)
train_1.to_csv(FL_path+'node_1/adult_train_enc.csv', index=False)
test_temp_1.to_csv(FL_path+'node_1/adult_val_enc.csv', index=False)
test_temp_1.to_csv(FL_path+'node_1/adult_test_enc.csv', index=False)
train_2.to_csv(FL_path+'node_2/adult_train_enc.csv', index=False)
test_temp_2.to_csv(FL_path+'node_2/adult_val_enc.csv', index=False)
test_temp_2.to_csv(FL_path+'node_2/adult_test_enc.csv', index=False)
train_3.to_csv(FL_path+'node_3/adult_train_enc.csv', index=False)
test_temp_3.to_csv(FL_path+'node_3/adult_val_enc.csv', index=False)
test_temp_3.to_csv(FL_path+'node_3/adult_test_enc.csv', index=False)