In [31]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [32]:
x_train = np.load('x_train.npy')
y_train = np.load('y_train.npy')
x_test = np.load('x_test.npy')
y_test = np.load('y_test.npy')

In [33]:
num_classes = 10
n1 = 100
no_users = 10
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [34]:
x_train = np.reshape(x_train, (x_train.shape[0], -1))/255
x_test = np.reshape(x_test, (x_test.shape[0], -1))/255

In [35]:
# Network parameters.
n_hidden_1 = 128 # 1st layer number of neurons.
n_hidden_2 = 64 # 2nd layer number of neurons.

In [36]:
W1 = tf.Variable(tf.random.truncated_normal([784,n_hidden_1], stddev=0.1))
W2 = tf.Variable(tf.random.truncated_normal([n_hidden_1,n_hidden_2], stddev=0.1))
W3 = tf.Variable(tf.random.truncated_normal([n_hidden_2,10], stddev=0.1))

In [37]:
class User:
    def __init__(self):       
        self.GW1 = tf.Variable(tf.random.truncated_normal([784,n_hidden_1], stddev=0.1))
        self.GW2 = tf.Variable(tf.random.truncated_normal([n_hidden_1,n_hidden_2], stddev=0.1))
        self.GW3 = tf.Variable(tf.random.truncated_normal([n_hidden_2,10], stddev=0.1))
        self.H = 1.0#np.random.normal()   
        self.abs_H_square = self.H**2
        

    def neural_net(self, x):
        y1 = tf.nn.relu(tf.matmul(x, W1))
        y2 = tf.nn.relu(tf.matmul(y1, W2))
        ylogits = tf.matmul(y2, W3)
        return tf.nn.softmax(ylogits)

In [38]:
# mini-batch loss function.
def mini_batches(X, Y, mb_size = 100):

    m = X.shape[0]

    perm = list(np.random.permutation(m))
    #perm = perm_init[0:100]
    X_temp = X[perm,:]
    Y_temp = Y[perm,:].reshape((m, Y.shape[1]))
    
    X_r = tf.convert_to_tensor(X_temp[0:mb_size,:], dtype=np.float32)
    Y_r = tf.convert_to_tensor(Y_temp[0:mb_size,:], dtype=np.float32)
    return X_r,Y_r

In [39]:
# Cross-Entropy loss function.
def cross_entropy(y_pred, y_true):
    # Clip prediction values to avoid log(0) error.
    y_pred = tf.clip_by_value(y_pred, 1e-9, 1.)
    # Compute cross-entropy.
    return -tf.reduce_sum(y_true * tf.math.log(y_pred))#tf.reduce_sum(tf.math.square(y_true-y_pred))#-tf.reduce_sum(y_true * tf.math.log(y_pred))
    #tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred)))
    #return tf.reduce_sum(-tf.reduce_mean(y_true * tf.math.log(y_pred)))

In [40]:
# Accuracy metric.
def accuracy(y_pred, y_true):
    # Predicted class is the index of highest score in prediction vector (i.e. argmax).
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)

In [41]:
users = [User() for i in range(no_users)]
optimizer = tf.optimizers.Adam(learning_rate=0.001)
central_modal = [tf.Variable(tf.random.truncated_normal([784*n_hidden_1,1], stddev=0.1)), tf.Variable(tf.random.truncated_normal([n_hidden_1*n_hidden_2,1], stddev=0.1)), tf.Variable(tf.random.truncated_normal([10*n_hidden_2,1], stddev=0.1))]

In [42]:
# Optimization process. 
def run_optimization(x, y, user, W1, W2, W3, rho):
    # Wrap computation inside a GradientTape for automatic differentiation.
    
    with tf.GradientTape() as g:
        g.watch([W1,W2,W3])
        pred = user.neural_net(x)
        loss = cross_entropy(pred, y) 
        
    
    # Variables to update, i.e. trainable variables.
    trainable_variables = [W1, W2 ,W3]
    
    # Compute gradients.
    gradients1,gradients2,gradients3  = g.gradient(loss, trainable_variables)
    #print(gradients2)
    return gradients1,gradients2,gradients3, loss
    #
    
    # Update W following gradients.
    # optimizer.apply_gradients(zip(gradients, trainable_variables))

In [43]:
x_train_k = []
y_train_k = []
data_per_worker = int(x_train.shape[0]/no_users)
for i in range(no_users):
    first = i*data_per_worker
    last = first + data_per_worker
    x_train_k.append(x_train[first:last])
    y_train_k.append(y_train[first:last])

In [44]:
x_test = tf.convert_to_tensor(x_test, dtype=np.float32)
y_test = tf.convert_to_tensor(y_test, dtype=np.float32)

In [45]:
import time

abs_weights_diff = []
abs_biases_diff = []
Train_Acc = []
Test_Acc = []
CrE_Train = []
CrE_Test = []

lr = 10**(-2)

mb_size = 100
n_epochs = 500

acc_train = np.zeros([n_epochs,1])
acc_test = np.zeros([n_epochs,1])
total_loss = np.zeros([n_epochs,1])
H_matrix = np.zeros([int(n_epochs/10)])
eps = 10**(-6)

ii = 0

for k in range(n_epochs):
        print(k)
        
        batch_x = []
        batch_y = []
        for i in range(no_users):
            batch_xx , batch_yy = mini_batches(x_train_k[i],y_train_k[i],  mb_size)
            batch_x.append(batch_xx)
            batch_y.append(batch_yy)
        
        
        W1 = tf.reshape(central_modal[0],[784, n_hidden_1])
        W2 = tf.reshape(central_modal[1],[n_hidden_1, n_hidden_2])
        W3 = tf.reshape(central_modal[2],[n_hidden_2, 10])
        
        # Run the optimization to update W  
        for i in range(no_users):
            gradients1, gradients2, gradients3, loss = run_optimization(batch_x[i], batch_y[i], users[i], W1, W2, W3, lr)
            users[i].GW1 = gradients1
            users[i].GW2 = gradients2
            users[i].GW3 = gradients3
            total_loss[k] = total_loss[k] + loss
        
        total_loss[k] = total_loss[k]/no_users
        print(total_loss[k])
            
        sum_gradient_1 = 0
        sum_gradient_2 = 0
        sum_gradient_3 = 0
        for i in range(no_users):
            sum_gradient_1 = sum_gradient_1 + (users[i].GW1)
            sum_gradient_2 = sum_gradient_2 + (users[i].GW2)
            sum_gradient_3 = sum_gradient_3 + (users[i].GW3)
                

        sum_gradient_1 = tf.reshape(sum_gradient_1,[784*n_hidden_1, 1])
        sum_gradient_2 = tf.reshape(sum_gradient_2,[n_hidden_1*n_hidden_2, 1])
        sum_gradient_3 = tf.reshape(sum_gradient_3,[10*n_hidden_2, 1])
        
        central_modal[0] = central_modal[0] - lr*sum_gradient_1/no_users
        central_modal[1] = central_modal[1] - lr*sum_gradient_2/no_users
        central_modal[2] = central_modal[2] - lr*sum_gradient_3/no_users
        
        
        train_acc = []
        test_acc = []
        for j in range(no_users):  
            train_pred = users[j].neural_net(batch_x[j])
            train_acc.append(accuracy(train_pred , batch_y[j]))
            test_pred = users[j].neural_net(x_test)
            test_acc.append(accuracy(test_pred , y_test))
            
        avgAcc_Train = np.mean(train_acc)
        avgAcc_Test = np.mean(test_acc) 
        #print(avgAcc_Train)
        print(avgAcc_Test)
        acc_train[k] = avgAcc_Train
        acc_test[k] = avgAcc_Test

#np.save('loss_gd_40960_5.npy', total_loss)
#np.save('acc_train_gd_40960_5.npy', acc_train)
#np.save('acc_test_gd_40960_5.npy', acc_test)

0
[233.88618164]
0.0923
1
[214.98000488]
0.34170002
2
[198.16281738]
0.5168
3
[165.91937256]
0.61289996
4
[153.41501465]
0.47279996
5
[371.6394043]
0.1927
6
[255.50915527]
0.3132
7
[217.38422852]
0.1545
8
[202.81519775]
0.3934
9
[170.07593994]
0.4739
10
[152.20153809]
0.48929995
11
[144.68491211]
0.49359998
12
[220.55014648]
0.2874
13
[197.79482422]
0.40399998
14
[192.56936035]
0.3239
15
[168.24342041]
0.44579998
16
[139.62453613]
0.51420003
17
[126.59660645]
0.58169997
18
[114.67213135]
0.6271
19
[123.82705078]
0.5770999
20
[122.93376465]
0.5684
21
[129.88619385]
0.5779
22
[105.56350098]
0.6447
23
[89.31483154]
0.737
24
[92.26032104]
0.67889994
25
[113.00563965]
0.67920005
26
[96.08599854]
0.66620004
27
[82.13078613]
0.706
28
[94.98851318]
0.6715001
29
[77.96811523]
0.7464
30
[91.40199585]
0.7189001
31
[79.50991821]
0.7304
32
[69.25390625]
0.80599993
33
[57.74441528]
0.83280003
34
[47.51669922]
0.83199996
35
[52.61737061]
0.83140004
36
[67.17804565]
0.7879
37
[77.11916504]
0.74249995


[11.29508362]
0.95530003
307
[11.07876129]
0.95629996
308
[13.62836151]
0.95509994
309
[12.32988739]
0.95640004
310
[11.13529739]
0.95699996
311
[10.90436554]
0.95649993
312
[12.32327728]
0.9578999
313
[9.40004959]
0.95710003
314
[14.36432495]
0.95979995
315
[14.26242065]
0.9592
316
[9.09278412]
0.95629996
317
[10.88929214]
0.9595
318
[9.55150833]
0.95780003
319
[9.20744247]
0.9608
320
[13.98526154]
0.95780003
321
[9.10741882]
0.9561001
322
[10.83208618]
0.9561001
323
[12.32502213]
0.95699996
324
[14.1322464]
0.95649993
325
[12.57257385]
0.95699996
326
[10.2576828]
0.95710003
327
[10.78593674]
0.95979995
328
[13.12767181]
0.95979995
329
[10.77056503]
0.9588
330
[9.95399094]
0.9617999
331
[10.60417786]
0.96259993
332
[10.90625305]
0.9596001
333
[10.42366867]
0.9601
334
[11.7083374]
0.9601
335
[12.76949387]
0.9595
336
[13.96759796]
0.9582001
337
[16.35631104]
0.95100003
338
[15.04008331]
0.9481
339
[12.1263176]
0.9526001
340
[9.28737488]
0.9599999
341
[9.29443665]
0.9613999
342
[10.99956

In [46]:
np.save('acc_test_gd.npy', acc_test)
#np.save('nb_ts_gd_lr001.npy', nb_ts)
