In [1]:
! pip install syft

Collecting syft
[?25l  Downloading https://files.pythonhosted.org/packages/38/2e/16bdefc78eb089e1efa9704c33b8f76f035a30dc935bedd7cbb22f6dabaa/syft-0.1.21a1-py3-none-any.whl (219kB)
[K     |████████████████████████████████| 225kB 5.0MB/s 
[?25hCollecting msgpack>=0.6.1 (from syft)
[?25l  Downloading https://files.pythonhosted.org/packages/92/7e/ae9e91c1bb8d846efafd1f353476e3fd7309778b582d2fb4cea4cc15b9a2/msgpack-0.6.1-cp36-cp36m-manylinux1_x86_64.whl (248kB)
[K     |████████████████████████████████| 256kB 45.7MB/s 
[?25hCollecting websocket-client>=0.56.0 (from syft)
[?25l  Downloading https://files.pythonhosted.org/packages/29/19/44753eab1fdb50770ac69605527e8859468f3c0fd7dc5a76dd9c4dbd7906/websocket_client-0.56.0-py2.py3-none-any.whl (200kB)
[K     |████████████████████████████████| 204kB 40.8MB/s 
[?25hCollecting websockets>=7.0 (from syft)
[?25l  Downloading https://files.pythonhosted.org/packages/61/5e/2fe6afbb796c6ac5c006460b5503cd674d33706660337f2dbff10d4aa12d/websockets

### DISTANCE CORRELATION

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np

In [3]:
import syft as sy  # <-- NEW: import the Pysyft library
hook = sy.TorchHook(torch)  # <-- NEW: hook PyTorch ie add extra functionalities to support Federated Learning
bob = sy.VirtualWorker(hook, id="bob")  # <-- NEW: define remote worker bob
alice = sy.VirtualWorker(hook, id="alice")  # <-- NEW: and alice

W0712 09:44:33.994026 139926403164032 secure_random.py:26] Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was '/usr/local/lib/python3.6/dist-packages/tf_encrypted/operations/secure_random/secure_random_module_tf_1.14.0.so'
W0712 09:44:34.012714 139926403164032 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/tf_encrypted/session.py:26: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.



In [0]:
def dist_matrix(X,remote=True):
  N = X.shape[0]
  #print("N=",N)
  dist = torch.zeros(N,N)
  #print('X size:',X.size())
  X = X.view(N,-1)
  #print(X.location.id)
  if remote:
    dist = dist.send(X.location.id)
  #print('dist',dist.location.id)
  #print(X.shape)
  #print(X.size())
  for i in range(N):
    for j in range(i,N):
      #x1_norm = (X[i]**2).sum().view(-1, 1)
      #x2_norm = (X[j]**2).sum().view(-1, 1)
      #dist[i,j] = x1_norm + x2_norm - 2.0 * torch.matmul(X[i], X[j])
     # print(X.size())
      tmp = torch.matmul((X[i]-X[j]).t(),(X[i]-X[j]))
      #print(tmp.location.id)
      dist[i,j] = tmp   
      dist[j,i] = dist[i,j]
  return dist
    

In [0]:
def a_dot_l(dist,remote):
  N = dist.shape[0]
  a_l = torch.zeros(N)
  if remote:
    a_l = a_l.send(dist.location.id)
  for l in range(N):
    #print(a_l)
    #print(a_l[l])
    #print(dist[:,l])
    tmp = dist[:,l].sum()
    l = torch.tensor(l)
    l = l.send(bob)
    a_l[l] = tmp
  return  1./N*a_l

In [0]:
def a_k_dot(dist,remote):
  N = dist.shape[0]
  a_k = torch.zeros(N)
  if remote:
    a_k = a_k.send(dist.location.id)
  for k in range(N):
    k = torch.tensor(k)
    k = k.send(bob)
    a_k[k] = dist[k,:].sum()
  return  1./N*a_k

In [0]:
def A_matrix(dist,remote):
  N = dist.shape[0]
  A = dist
  a_l = a_dot_l(dist,remote)
  a_k = a_k_dot(dist,remote)
  for k in range(N):
    A[k,:] -= a_l
    A[:,k] -= a_k
  I = torch.ones(N,N).send(dist.location.id)
  a_dot = 1/N**2*dist.sum()*I
  A += a_dot
  return A

In [0]:
def distance_covariance(X,Y,remote=True):
  dist_x = dist_matrix(X,remote) 
  dist_y = dist_matrix(Y,remote)
  N = dist_x.shape[0]
  A = A_matrix(dist_x,remote)
  B = A_matrix(dist_y,remote)
  C = A*B
  return 1/N**2*C.sum()
  

In [0]:
def distance_variance(X,remote):
  dist_x = dist_matrix(X,remote) 
  N = dist_x.shape[0]
  A = A_matrix(dist_x,remote)
  #print(A)
  return 1/N**2*(A**2).sum()

In [0]:
def distance_correlation(X,Y,remote=True):
  cov = distance_covariance(X,Y,remote)
  #print(cov)
  V_x = distance_variance(X,remote)
  V_y = distance_variance(Y,remote)
  #print(V_x,V_y)
  corr = cov/torch.sqrt(V_x*V_y)
  return corr

In [0]:
def log_dcor(X,Y,remote=True):
  return np.log(distance_correlation(X,Y,remote))

In [0]:
def batch_dcor(X,Y,remote):
  #n = len(X.size())
  n = len(X.shape)
  a = X.shape[0]
  #print(X.size())
  X_new = X.view(a,-1)
  Y_new = Y.view(a,-1)
  return distance_correlation(X_new,Y_new,remote)
    

In [0]:
def batch_log_dcor(X,Y,remote):
  return np.log(batch_dcor(X,Y,remote))

In [0]:
torch.manual_seed(0)
X = torch.randn(5,20,20)

torch.manual_seed(1)
Y = torch.randn(5,20,20)

### NoPeekNN

In [0]:
class Arguments():
    def __init__(self):
        self.batch_size = 64
        self.test_batch_size = 1000
        self.epochs = 10
        self.lr = 0.01
        self.momentum = 0.5
        self.no_cuda = False
        self.seed = 1
        self.log_interval = 30
        self.save_model = False

args = Arguments()

use_cuda = not args.no_cuda and torch.cuda.is_available()

if use_cuda:
        # TODO Quickhack. Actually need to fix the problem moving the model to CUDA\n",
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
torch.manual_seed(args.seed)

device = torch.device("cuda" if use_cuda else "cpu")
#device = torch.device("cpu")

#kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
kwargs = {'num_workers': 0, 'pin_memory': False} if use_cuda else {}

In [15]:
federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader 
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ]))
    .federate((bob, alice)), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
    batch_size=args.batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.test_batch_size, shuffle=True, **kwargs)


  0%|          | 0/9912422 [00:00<?, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:00, 23995116.26it/s]                            


Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz


32768it [00:00, 367336.51it/s]
0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz
Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 5423743.67it/s]                           
8192it [00:00, 140984.43it/s]


Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [0]:
class Net1(nn.Module):
    def __init__(self):
        super(Net1, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.max_pool2d(x, 2, 2)

In [0]:
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [0]:
models = [Net1().to(device), Net1().to(device)]
#models = [Net1(), Net1()]
models[0] = models[0].send(bob)
models[1] = models[1].send(alice)



opt1 = optim.SGD(params=models[0].parameters(),lr=0.1)
opt2 = optim.SGD(params=models[1].parameters(),lr=0.1)

In [0]:
def train(args, model, device, federated_train_loader, optimizer, epoch, alpha1=0.1,alpha2=0.9):
    model.train()
    for batch_idx, (data, targs) in enumerate(federated_train_loader): # <-- now it is a distributed dataset
        #IF ON DATA LOCATION TO GET THE RIGHT MODEL
        if data.location.id == 'bob':
          mod_c,opt_c = models[0], opt1
        else : 
          mod_c,opt_c = models[1], opt2
        
        #print("data",data.clone().get().size())
        #1) erase previous gradients (if they exist)
        optimizer.step()
        opt_c.step()
        opt_c.zero_grad()
        optimizer.zero_grad()
        
        tg_copy = targs.copy()
        target = tg_copy.get()
        data, target = data.to(device), target.to(device)
        
        # 2) make a prediction until cut layer (client location)
        pred_c = mod_c(data)
        copy = pred_c.copy()
        

        
        
        # 3) get this to the server 
        inp = copy.get()

        # compute the distance correlation
        dist = batch_dcor(data,pred_c,remote=True)
        dist.backward(create_graph=True)
        
        
        # 4) make prediction with second part of the model (server location)
        pred = model(inp)

        # 5) calculate how much we missed 
        loss = F.nll_loss(pred, target)
        loss.backward()
        #print(dist.grad)
        print(inp.grad)
        #gradient = alpha1*dist.grad + alpha2*inp.grad
        gradient = inp.grad
        gradient = gradient.send(data.location)
        print(gradient.shape)
        #dist.backward()
        pred_c.backward(gradient)
        
        
        if batch_idx % args.log_interval == 0:
            #loss = loss.get() # <-- NEW: get the loss back
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} \tDCor:'.format(
                epoch, batch_idx * args.batch_size, len(federated_train_loader) * args.batch_size,
                100. * batch_idx / len(federated_train_loader), loss.item()))

In [0]:
def train(args, model, device, federated_train_loader, optimizer, epoch, alpha1=0.1,alpha2=0.9):
    model.train()
    for batch_idx, (data, targs) in enumerate(federated_train_loader): # <-- now it is a distributed dataset
        #IF ON DATA LOCATION TO GET THE RIGHT MODEL
        if data.location.id == 'bob':
          mod_c,opt_c = models[0], opt1
        else : 
          mod_c,opt_c = models[1], opt2
        
        #print("data",data.clone().get().size())
       # 1) erase previous gradients (if they exist)
        optimizer.step()
        opt_c.step()
        opt_c.zero_grad()
        optimizer.zero_grad()
        
        tg_copy = targs.copy()
        target = tg_copy.get()
        data, target = data.to(device), target.to(device)
        
        # 2) make a prediction until cut layer (client location)
        #pred_c = mod_c(data)
        #copy = pred_c.copy()
        

        
        
        # 3) get this to the server 
        #inp = copy.get()
        X = data.clone().get()
        M = mod_c.copy()
        #M = M.get()
        #print(mod_c)
        inp =M(X)
        # compute the distance correlation
        dcor = batch_dcor(X,inp)
        dcor.backward(retain_graph=True)
        print("dcor",dcor)
        
        # 4) make prediction with second part of the model (server location)
        pred = model(inp)

        # 5) calculate how much we missed 
        loss = F.nll_loss(pred, target)
        loss.backward()
        #print(dcor.grad)
        #print(inp.grad)
        #gradient = alpha1*dcor.grad + alpha2*inp.grad
        #gradient = gradient.send(data.location)
        #dist.backward()
        #pred_c.backward(gradient)
        
        
        if batch_idx % args.log_interval == 0:
            #loss = loss.get() # <-- NEW: get the loss back
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} \tDCor :'.format(
                epoch, batch_idx * args.batch_size, len(federated_train_loader) * args.batch_size,
                100. * batch_idx / len(federated_train_loader), loss.item(),dcor.item()))

In [0]:
def test(args, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    M1 = models[0].copy()
    M2 = models[1].copy()
    M1 = M1.get()
    M2 = M2.get()
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(M1(data))
            #output2 = model(M2(data))
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(1, keepdim=True) # get the index of the max log-probability 
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [24]:
%%time
model = Net2().to(device)
optimizer = optim.SGD(model.parameters(), lr=args.lr) # TODO momentum is not supported at the moment

for epoch in range(1, args.epochs + 1):
    train(args, model, device, federated_train_loader, optimizer, epoch)
    test(args, model, device, test_loader)

if (args.save_model):
    torch.save(model.state_dict(), "mnist_cnn.pt")

tensor([[[[ 4.9752e-06,  7.4517e-06, -8.4466e-06,  ..., -2.8684e-06,
           -5.0234e-08,  4.1778e-06],
          [-7.3676e-06,  2.2317e-06, -2.8635e-07,  ..., -1.4061e-05,
            2.5435e-06,  2.3895e-06],
          [-2.3287e-05,  6.9854e-06,  3.5475e-05,  ..., -1.6095e-05,
           -6.3854e-06,  3.9669e-06],
          ...,
          [-1.0427e-05,  1.6473e-06,  1.6383e-05,  ..., -1.2764e-05,
           -6.8108e-06,  9.6995e-06],
          [-1.6151e-06, -7.7561e-06,  9.7921e-06,  ...,  4.5062e-05,
            2.2536e-06, -3.5593e-05],
          [ 3.1657e-06, -2.7025e-07, -1.1114e-05,  ..., -1.3155e-06,
            1.7459e-05, -2.0189e-06]],

         [[-6.6879e-06, -1.9316e-05, -1.0677e-05,  ...,  1.6253e-05,
            7.0103e-07,  3.4035e-06],
          [ 1.6189e-05,  1.9798e-05, -1.0862e-05,  ..., -3.3709e-06,
           -1.9081e-05,  2.8781e-06],
          [ 3.1082e-05,  3.9036e-05,  9.1551e-06,  ...,  9.8036e-06,
            3.4379e-05,  2.1815e-06],
          ...,
     

RuntimeError: ignored

###Tests

In [0]:
Q = torch.zeros(3,3)

In [0]:
c = torch.tensor(1.)

In [50]:
Q

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [0]:
Q = Q.send(bob)

In [0]:
Q[1,1] = 2

In [55]:
Q.clone().get()

tensor([[0., 0., 0.],
        [0., 2., 0.],
        [0., 0., 0.]])

In [0]:
c = c.send(bob)

In [0]:
Q[1,2] = c

In [59]:
Q.clone().get()

tensor([[0., 0., 0.],
        [0., 2., 1.],
        [0., 0., 0.]])

In [0]:
Q[1,1]=c

In [0]:
c.shape

torch.Size([])

In [0]:
Q

tensor([[0., 0., 0.],
        [0., 1., 0.],
        [0., 0., 0.]])

In [0]:
x = torch.tensor([0.,1.,2.])

In [6]:
x.shape

torch.Size([3])

In [29]:
x.size()

torch.Size([])

In [30]:
x.view(1)

tensor([0.])

In [0]:
x = x.send(bob)

In [47]:
bob

<VirtualWorker id:bob #objects:24>

In [0]:
N = x.shape[0]

In [22]:
N.send(bob)

AttributeError: ignored

In [0]:
x[0]=N

In [25]:
x

(Wrapper)>[PointerTensor | me:58102664934 -> bob:24894408443]

In [26]:
x.clone().get()

tensor([3., 1., 2.])

In [63]:
x

(Wrapper)>[PointerTensor | me:58102664934 -> bob:24894408443]

In [0]:
l = torch.tensor(2)
l = l.send(bob)

In [0]:
x[l]=10

In [69]:
l

tensor(2)

In [70]:
int(l)

2

In [75]:
x.clone().get()

tensor([ 3.,  1., 10.])

### Issue

In [0]:
a = torch.tensor([1.,2.,3.,4.],requires_grad=True)

In [0]:
b = a**2

In [0]:
b = torch.tensor([1.,4.,9.,16.],requires_grad=True)

In [0]:
c = b.sum()

In [33]:
c

tensor(30., grad_fn=<SumBackward0>)

In [0]:
c.backward()

In [0]:
b.backward(torch.tensor([1.,1.,1.,1.]))

In [35]:
b.grad

tensor([1., 1., 1., 1.])

In [44]:
a.grad

tensor([2., 4., 6., 8.])

#### TEST1

In [0]:
a = torch.tensor([1.,2.,3.,4.],requires_grad=True)
a = a.send(bob)

In [0]:
b = a**2

In [0]:
b_ = b.clone().get()

In [0]:
c = b_.sum()

In [0]:
c.backward()

In [0]:
grad = b_.grad

In [0]:
grad = grad.send(bob)

In [53]:
b.backward(grad)

(Wrapper)>[PointerTensor | me:49838401799 -> bob:67097995970]

In [54]:
a.grad.clone().get()

tensor([2., 4., 6., 8.])

#### TEST2

In [0]:
a = torch.tensor([1.,2.,3.,4.],requires_grad=True)
a = a.send(bob)

In [0]:
b = a**2

In [0]:
b_ = b.clone().get()

In [0]:
d = b+2

In [0]:
c = b_.sum()

In [0]:
c.backward()

In [0]:
grad = b_.grad

In [0]:
grad = grad.send(bob)

In [75]:
b.backward(grad)

(Wrapper)>[PointerTensor | me:13838942116 -> bob:80963266849]

In [76]:
a.grad.clone().get()

tensor([2., 4., 6., 8.])

#### TEST3

In [0]:
a = torch.tensor([1.,2.,3.,4.],requires_grad=True)
#a = a.send(bob)

In [0]:
b = a**2

In [0]:
d = b**3 + 2

In [0]:
c = d.sum()

In [0]:
c.backward()

In [85]:
a.grad

tensor([6.0000e+00, 1.9200e+02, 1.4580e+03, 6.1440e+03])

In [103]:
bob.clear_objects()

<VirtualWorker id:bob #objects:0>

In [4]:
### Forward part
# First tensor, sent to client's location
a = torch.tensor([1.,2.,3.,4.],requires_grad=True)
a = a.send(bob)

# Second tensor, cloned and sent to server
b = a**2
b_ = b.clone().get()

# Third tensor, cloned and sent to client
c = b_**3
c_ = c.clone().send(bob)

#Fourth tensor 
d = c_.sum()

### Backward part
d.backward()
g1 = c_.grad.clone().get()
d.get()
c.backward(g1)
g2 = b_.grad.clone().send(bob)
print("b shape:",b.shape)
print("gradient shape:", g2.shape)
b.backward(g2)

b shape: torch.Size([4])
gradient shape: torch.Size([4])


RuntimeError: ignored

In [99]:
bob._objects

{344562030: tensor([ 1.,  4.,  9., 16.], grad_fn=<PowBackward0>),
 2181393947: tensor(4898., grad_fn=<SumBackward0>),
 20740364325: tensor([  3.,  48., 243., 768.]),
 29463151178: tensor([3.0000e+00, 6.6000e+01, 7.3100e+02, 4.0980e+03], requires_grad=True),
 34315854109: tensor([1., 1., 1., 1.]),
 86730410175: tensor([1., 2., 3., 4.], requires_grad=True)}

In [101]:
g2.location

<VirtualWorker id:bob #objects:6>