In [1]:
#importing python libraries
import os
import sys
import torch
import syft as sy
from syft import Variable as Var
from syft import nn
from syft import optim

In [2]:
module_path = os.path.abspath(os.path.join('./PySyft'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [3]:
# creating hook instance which will overload all operations required for federate learning activity
hook = sy.TorchHook()
host_machine = hook.local_worker

mobile_device_1 = sy.VirtualWorker(id="mobile_device_1",hook=hook)
mobile_device_2= sy.VirtualWorker(id="mobile_device_2",hook=hook)

host_machine.add_worker(mobile_device_1)
host_machine.add_worker(mobile_device_2)




In [4]:
# importing dataset

#information about dataset
#Predictor Variable: Gender(Male: 1 , Female: 0) ,Age ,Tenure(Time period of the account),
#                    Balance(Amount in the account) ,HasCrCard(has credit card(yes: 1 , no: 0))
#                    IsActiveMember(Active status(yes: 1 , no: 0)) ,Estimated salary
#Target Variable:    Exited(has exited(yes:1 , no: 0))
import pandas as pd
dataset_read=pd.read_csv("bank_dataset.csv", sep=',')
dataset_read.head()

Unnamed: 0,Gender,Age,Tenure,Balance,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,0,42,2,0.0,1,1,101348.88,1
1,0,41,1,83807.86,0,1,112542.58,0
2,0,42,8,159660.8,1,0,113931.57,1
3,0,39,1,0.0,0,0,93826.63,0
4,0,43,2,125510.82,1,1,79084.1,0


In [5]:
dataset_read=dataset_read[dataset_read.Balance!=0.00]

In [6]:
dataset_read.head()

Unnamed: 0,Gender,Age,Tenure,Balance,HasCrCard,IsActiveMember,EstimatedSalary,Exited
1,0,41,1,83807.86,0,1,112542.58,0
2,0,42,8,159660.8,1,0,113931.57,1
4,0,43,2,125510.82,1,1,79084.1,0
5,1,44,8,113755.78,1,0,149756.71,1
7,0,29,4,115046.74,1,0,119346.88,1


In [7]:
dataset_read.shape

(6383, 8)

In [8]:
#Rescaling the data 
Age_mean_val=dataset_read.Age.mean()
Age_mean_val

39.19771267429108

In [9]:
dataset_read.Age=dataset_read.Age/Age_mean_val

In [10]:
Balance_mean_val=dataset_read.Balance.mean()
Balance_mean_val

119827.49379288674

In [11]:
dataset_read.Balance=dataset_read.Balance/Balance_mean_val

In [12]:
EstimatedSalary_mean_val=dataset_read.EstimatedSalary.mean()
EstimatedSalary_mean_val

100717.35295629001

In [13]:
dataset_read.EstimatedSalary=dataset_read.EstimatedSalary/EstimatedSalary_mean_val

In [14]:
Tenure_mean_val=dataset_read.Tenure.mean()
Tenure_mean_val

4.979633401221996

In [15]:
dataset_read.Tenure=dataset_read.Tenure/Tenure_mean_val

In [16]:
# dataset after rescaling
dataset_read.head()

Unnamed: 0,Gender,Age,Tenure,Balance,HasCrCard,IsActiveMember,EstimatedSalary,Exited
1,0,1.045979,0.200818,0.699404,0,1,1.11741,0
2,0,1.071491,1.606544,1.332422,1,0,1.131201,1
4,0,1.097003,0.401636,1.047429,1,1,0.785208,0
5,1,1.122514,1.606544,0.94933,1,0,1.486901,1
7,0,0.739839,0.803272,0.960103,1,0,1.184968,1


In [17]:
# converting data in numpy array
import numpy as np
target=np.array(dataset_read.Exited)

pre_data=dataset_read.as_matrix(columns=dataset_read.columns[:-1])

  """


In [18]:
pre_data[0]

array([0.        , 1.0459794 , 0.200818  , 0.69940426, 0.        ,
       1.        , 1.11741003])

In [19]:
predictor = Var(sy.FloatTensor(pre_data))
target = Var(sy.LongTensor(target))

In [20]:
# sending data to two virtual worker
predictor_MD_1 = predictor[0:5000].send(mobile_device_1)
target_MD_1 = target[0:5000].send(mobile_device_1)

predictor_MD_2 = predictor[5000:].send(mobile_device_2)
target_MD_2 = target[5000:].send(mobile_device_2)

In [21]:
# Model definition

input_size=7
num_classes=2
learning_rate=0.01
epochs=300

class LogisticRegression(nn.Module): 
    def __init__(self, input_size, num_classes): 
        super(LogisticRegression, self).__init__() 
        self.linear = nn.Linear(input_size, num_classes) 
  
    def forward(self, x): 
        out = self.linear(x) 
        return out
    
model = LogisticRegression(input_size, num_classes) 
#model=nn.Linear(input_size,num_classes)
# Loss and Optimizer 
# Softmax is internally computed. 

criterion = nn.CrossEntropyLoss() 
optimizer = optim.SGD(params=model.parameters(), lr = learning_rate)

In [22]:
# Training over distributed dataset
datasets = [(predictor_MD_1,target_MD_1),(predictor_MD_2,target_MD_2)]

for iter in range(epochs):
    for data, target in datasets:
           
            model.send(data.location)
            
            # update the model
            
            optimizer.zero_grad()
            pred = model(data)
            #print(pred)
            #print(target)
            
            loss = criterion(pred, target)
            loss.backward()
            model.get()
            optimizer.step()
            print(loss.get().data[0])
           
            
       


0.7477648258209229
0.7365473508834839
0.7261958718299866
0.7158901691436768
0.7070816159248352
0.6975600719451904
0.6901471018791199
0.6813010573387146
0.6751525402069092
0.6668809056282043
0.6618748307228088
0.6540884971618652
0.6501137018203735
0.6427356600761414
0.6396939158439636
0.6326553225517273
0.6304564476013184
0.6236971616744995
0.622262716293335
0.6157307028770447
0.6149895191192627
0.6086391806602478
0.6085241436958313
0.6023189425468445
0.6027761697769165
0.5966811180114746
0.5976546406745911
0.5916453003883362
0.5930914878845215
0.5871419906616211
0.5890182256698608
0.5831102728843689
0.5853804349899292
0.5794960856437683
0.5821259021759033
0.5762515664100647
0.5792121291160583
0.5733364820480347
0.5765990018844604
0.570712685585022
0.5742517709732056
0.5683479309082031
0.5721433758735657
0.5662144422531128
0.5702434778213501
0.5642862915992737
0.5685329437255859
0.5625420808792114
0.5669878721237183
0.5609614849090576
0.5655932426452637
0.5595287084579468
0.564329862594

0.5359119176864624
0.5316547155380249
0.5358366370201111
0.5315976142883301
0.5357626080513
0.5315403342247009
0.5356888771057129
0.5314838290214539
0.5356162190437317
0.5314273238182068
0.5355412364006042
0.5313706398010254
0.535469114780426
0.5313137769699097
0.5353958606719971
0.5312578678131104
0.5353212952613831
0.5312020778656006
0.5352497696876526
0.5311463475227356
0.535176157951355
0.5310904383659363
0.5351033806800842
0.5310348868370056
0.5350297689437866
0.5309793949127197
0.5349586009979248
0.5309236645698547
0.5348849892616272
0.5308685302734375
0.5348140001296997
0.5308135151863098
0.5347431898117065
0.5307584404945374
0.5346692800521851
0.5307040214538574
0.5345984101295471
0.5306496024131775
0.5345265865325928
0.5305947065353394
0.5344552993774414
0.5305402874946594
0.5343841314315796
0.5304863452911377
0.5343131422996521
0.5304321646690369
0.534242570400238
0.5303779244422913
0.5341724753379822
0.5303248763084412
0.534101128578186
0.530271589756012
0.5340316891670227
0

In [23]:
#testing model
p=model(predictor_MD_1.get())

In [24]:
_, predicted = sy.max(p.data, 1)

In [25]:
predicted


 0
 0
 0
⋮ 
 0
 0
 0
[syft.core.frameworks.torch.tensor.LongTensor of size 5000]

In [26]:
actual_val=target_MD_1.get()

In [27]:
actual_val

Variable containing:
 0
 1
 0
⋮ 
 0
 0
 0
[syft.core.frameworks.torch.tensor.LongTensor of size 5000]

In [28]:
total=len(actual_val)
correct=0
for i in range(total):
     correct += (predicted[i] == actual_val[i]).sum()

In [29]:
#obtained accuracy
accuracy=100*(int(correct)/total)
accuracy

4.02