In [66]:
# non-dp training converges at epoch 72

print('Sigma for eps=0.5')
compute_dp_sgd_privacy.compute_dp_sgd_privacy(n=751000, batch_size=1024, noise_multiplier=3.41, epochs=72, delta=1e-6)
compute_dp_sgd_privacy.compute_dp_sgd_privacy(n=300400, batch_size=512, noise_multiplier=3.795, epochs=72, delta=1e-6)

print('\nSigma for eps=1')
compute_dp_sgd_privacy.compute_dp_sgd_privacy(n=751000, batch_size=1024, noise_multiplier=1.82, epochs=72, delta=1e-6)
compute_dp_sgd_privacy.compute_dp_sgd_privacy(n=300400, batch_size=512, noise_multiplier=2.01, epochs=72, delta=1e-6)

print('\nSigma for eps=2')
compute_dp_sgd_privacy.compute_dp_sgd_privacy(n=751000, batch_size=1024, noise_multiplier=1.09, epochs=72, delta=1e-6)
compute_dp_sgd_privacy.compute_dp_sgd_privacy(n=300400, batch_size=512, noise_multiplier=1.175, epochs=72, delta=1e-6)

print('\nSigma for eps=4')
compute_dp_sgd_privacy.compute_dp_sgd_privacy(n=751000, batch_size=1024, noise_multiplier=0.778, epochs=72, delta=1e-6)
compute_dp_sgd_privacy.compute_dp_sgd_privacy(n=300400, batch_size=512, noise_multiplier=0.811, epochs=72, delta=1e-6)

print('\nSigma for eps=8')
compute_dp_sgd_privacy.compute_dp_sgd_privacy(n=751000, batch_size=1024, noise_multiplier=0.6173, epochs=72, delta=1e-6)
compute_dp_sgd_privacy.compute_dp_sgd_privacy(n=300400, batch_size=512, noise_multiplier=0.637, epochs=72, delta=1e-6)

Sigma for eps=0.5
DP-SGD with sampling rate = 0.136% and noise_multiplier = 3.41 iterated over 52805 steps satisfies differential privacy with eps = 0.5 and delta = 1e-06.
The optimal RDP order is 57.0.
DP-SGD with sampling rate = 0.17% and noise_multiplier = 3.795 iterated over 42244 steps satisfies differential privacy with eps = 0.5 and delta = 1e-06.
The optimal RDP order is 57.0.

Sigma for eps=1
DP-SGD with sampling rate = 0.136% and noise_multiplier = 1.82 iterated over 52805 steps satisfies differential privacy with eps = 1 and delta = 1e-06.
The optimal RDP order is 29.0.
DP-SGD with sampling rate = 0.17% and noise_multiplier = 2.01 iterated over 42244 steps satisfies differential privacy with eps = 1 and delta = 1e-06.
The optimal RDP order is 29.0.

Sigma for eps=2
DP-SGD with sampling rate = 0.136% and noise_multiplier = 1.09 iterated over 52805 steps satisfies differential privacy with eps = 2 and delta = 1e-06.
The optimal RDP order is 14.0.
DP-SGD with sampling rate = 0.

(8.001949867989007, 4.0)

In [1]:
# HIT & CASIA PyTorch training with FederatedAveraging

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import h5py as h5
import torch
import torchvision
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import RandomSampler, DataLoader
from torch.autograd import Variable

import time
import copy
import math

import syft as sy

from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy

eps2sigma = {
    0.5: (3.41, 3.795),
    1: (1.82, 2.01),
    2: (1.09, 1.175),
    4: (0.778, 0.811),
    8: (0.6173, 0.637)
}
        
class Arguments():
    def __init__(self):
        self.batch_size = 512
        self.val_batch_size = 512
        self.epochs = 73
        self.lr = 0.0001
        self.gpu = True and torch.cuda.is_available()
        self.seed = 0
        self.log_interval = 200
        self.checkpoint = 5
        ### DP config ###
        self.epsilon = 1  # epsilon for each party
        self.clip = 1  # gradient clip L2 bound
        self.delta = 1e-6

args = Arguments()

torch.manual_seed(args.seed)
device = torch.device("cuda" if args.gpu else "cpu")
print(device)
kwargs = {'num_workers': 1, 'pin_memory': True} if args.gpu else {}

# load data
class HIT(torch.utils.data.Dataset):
    # group: trn/vld
    def __init__(self, archive, group, transform=None):
        self.archive = h5.File(archive, 'r')
        self.x = self.archive[group + '/x']
        self.y = self.archive[group + '/y']
        self.transform = transform
    def __getitem__(self, index):
        datum = self.x[index]
        if self.transform is not None:
            datum = self.transform(datum)
        label = self.y[index][0].astype('int64')
        return datum, label
    def __len__(self):
        return len(self.y)
    def close(self):
        self.archive.close()

def to_tensor(img):
    img = torch.from_numpy(img)
    return img.float().div(255)
tfm = transforms.Lambda(to_tensor)

trainset_casia = HIT('mdata/HWDB1.1fullset.hdf5', 'trn', transform=transforms.Compose([tfm]))
trainset_hit = HIT('mdata/HIT_OR3Cfullset.hdf5', 'trn', transform=transforms.Compose([tfm]))
# sequential loader
# train_loader_casia = DataLoader(trainset_casia, batch_size=args.batch_size, shuffle=True, **kwargs)
# train_loader_hit = DataLoader(trainset_hit, batch_size=args.batch_size, shuffle=True, **kwargs)
# random loader
sampler_casia = RandomSampler(trainset_casia, replacement=True)
sampler_hit = RandomSampler(trainset_hit, replacement=True)
train_loader_casia = DataLoader(trainset_casia, batch_size=args.batch_size, shuffle=False, sampler=sampler_casia, **kwargs)
train_loader_hit = DataLoader(trainset_hit, batch_size=args.batch_size, shuffle=False, sampler=sampler_hit, **kwargs)

valset_casia = HIT('mdata/HWDB1.1fullset.hdf5', 'vld', transform=transforms.Compose([tfm]))
valset_hit = HIT('mdata/HIT_OR3Cfullset.hdf5', 'vld', transform=transforms.Compose([tfm]))
val_loader_casia = DataLoader(valset_casia, batch_size=args.val_batch_size, shuffle=False, **kwargs)
val_loader_hit = DataLoader(valset_hit, batch_size=args.val_batch_size, shuffle=False, **kwargs)

print('CASIA train:', len(trainset_casia), len(train_loader_casia))
print('HIT train:', len(trainset_hit), len(train_loader_hit))
print('CASIA val:', len(valset_casia), len(val_loader_casia))
print('HIT val:', len(valset_hit), len(val_loader_hit))

### DP config ###
batch_per_lot_casia = max(round(len(trainset_casia)**.5 / args.batch_size), 1)  # set lotsize = sqrt(N)
batch_per_lot_hit = max(round(len(trainset_hit)**.5 / args.batch_size), 1)
print('batch per lot:', batch_per_lot_casia, batch_per_lot_hit)
delta = 10**(-5)
sigma_casia, sigma_hit = eps2sigma[args.epsilon]
print('Epsilon:', args.epsilon, 'Sigma:', sigma_casia, sigma_hit)

# Model
class VGG(nn.Module):
    def __init__(self, features, num_classes, batch_per_lot=None, sigma=None):
        super(VGG, self).__init__()
        self.features = features
        self.batch_per_lot = batch_per_lot  # for DP
        self.sigma = sigma  # for DP
        self.classifier = nn.Sequential(
            nn.Linear(2048, 1024),
            nn.LeakyReLU(),
            nn.BatchNorm1d(1024, momentum=0.66),
            nn.Linear(1024, 256),
            nn.LeakyReLU(),
            nn.BatchNorm1d(256, momentum=0.66),
            nn.Linear(256, num_classes),
            nn.LogSoftmax(dim=1)
        )
         # Initialize weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                m.bias.data.zero_()

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x
    
    def divide_clip_grads(self):
        for key, param in self.named_parameters():
            param.grad /= self.batch_per_lot
            nn.utils.clip_grad_norm([param], args.clip)

    def gaussian_noise(self, grads):
        shape = grads.shape
        noise = Variable(torch.zeros(shape))
        noise = noise.to(device)
        noise.data.normal_(0.0, std=args.clip*self.sigma)
        return noise
            
    def add_noise_to_grads(self):
        for key, param in self.named_parameters():
            lotsize = self.batch_per_lot * args.batch_size
            noise = 1/lotsize * self.gaussian_noise(param.grad)
            param.grad += noise

def conv_unit(input, output, mp=False):
    if mp:
        return [nn.Conv2d(input, output, kernel_size=3, stride=1, padding=1), nn.LeakyReLU(), 
               nn.BatchNorm2d(output, momentum=0.66), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)]
    else:
        return [nn.Conv2d(input, output, kernel_size=3, stride=1, padding=1), nn.LeakyReLU(), 
               nn.BatchNorm2d(output, momentum=0.66)]

def make_layers():
    layers = []
    layers += [nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=1), nn.LeakyReLU(), 
               nn.BatchNorm2d(64, momentum=0.66)]

    layers += conv_unit(64, 128)
    layers += conv_unit(128, 128, mp=True)

    layers += conv_unit(128, 256)
    layers += conv_unit(256, 256, mp=True)

    layers += conv_unit(256, 384)
    layers += conv_unit(384, 384)
    layers += conv_unit(384, 384, mp=True)

    layers += conv_unit(384, 512)
    layers += conv_unit(512, 512)
    layers += conv_unit(512, 512, mp=True)

    layers += [nn.Flatten()]

    return nn.Sequential(*layers)

# use PySyft for SPDZ
hook = sy.TorchHook(torch)
casia = sy.VirtualWorker(hook, id="casia")
hit = sy.VirtualWorker(hook, id="hit")
crypto = sy.VirtualWorker(hook, id="crypto")

Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was '/home/liyuan/dlrm/venv/lib/python3.6/site-packages/tf_encrypted/operations/secure_random/secure_random_module_tf_1.15.2.so'



cuda
CASIA train: 751000 1467
HIT train: 300400 587
CASIA val: 146758 287
HIT val: 67590 133
batch per lot: 2 1
Epsilon: 1 Sigma: 1.82 2.01


In [None]:
%%time

# Uncomment SWITCH ON to switch on PySyft remote training. Not working for now.

# SWITCH ON
# compute_nodes = [casia, hit]
# remote_loader_casia = []
# remote_loader_hit = []

# for batch_idx, (data,target) in enumerate(train_loader_casia):
#     data = data.send(casia)
#     target = target.send(casia)
#     remote_loader_casia.append((data, target))

# for batch_idx, (data,target) in enumerate(train_loader_hit):
#     data = data.send(hit)
#     target = target.send(hit)
#     remote_loader_hit.append((data, target))

In [2]:
model_casia = VGG(make_layers(), 3755)  # normal model
model_casia = VGG(make_layers(), 3755, batch_per_lot_casia, sigma_casia)  # dp model
epo = -1

epo = 72  # load trained model
model_casia.load_state_dict(torch.load('models/smpc-hit-casia-{:d}.pt'.format(epo)))  # continue from previous training

model_hit = copy.deepcopy(model_casia)

model_hit.batch_per_lot = batch_per_lot_hit  # dp model
model_hit.sigma = sigma_hit

model_casia = model_casia.to(device)
model_hit = model_hit.to(device)

optim_casia = optim.Adam(model_casia.parameters(), lr=args.lr)
optim_hit = optim.Adam(model_hit.parameters(), lr=args.lr)

models = [model_casia, model_hit]
params = [list(model_casia.parameters()), list(model_hit.parameters())]
optims = [optim_casia, optim_hit]

# models[0].send(compute_nodes[0])  # SWITCH ON
# models[1].send(compute_nodes[1])

def train(epoch):
    
    assert len(params[0]) == len(params[1])
    for param_index in range(len(params[0])):
        assert torch.equal(params[0][param_index], params[1][param_index])

    models[0].train()
    models[1].train()
    
    losses = [0, 0]
    corrects = [0, 0]
    
    def update(data, target, model, optimizer, party, batch_i=None, batch_per_lot=None):
        global counter
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        pred = output.argmax(dim=1, keepdim=True)    # get the index of the max log-probability
        if party == 'casia':
            corrects[0] += pred.eq(target.view_as(pred)).sum().item()  # debug SWITCH OFF
            losses[0] += F.nll_loss(output, target, reduction='sum').item()
#             corrects[0] += pred.eq(target.view_as(pred)).sum().get().item()  # SWITCH ON
#             losses[0] += F.nll_loss(output, target, reduction='sum').get().item()
        else:
            corrects[1] += pred.eq(target.view_as(pred)).sum().item()  # debug SWITCH OFF
            losses[1] += F.nll_loss(output, target, reduction='sum').item()
#             corrects[1] += pred.eq(target.view_as(pred)).sum().get().item()  # SWITCH ON
#             losses[1] += F.nll_loss(output, target, reduction='sum').get().item()
        loss.backward()
        
        if batch_per_lot:  # dp update
            if batch_i % batch_per_lot == batch_per_lot - 1:
                model.divide_clip_grads()
                model.add_noise_to_grads()
                optimizer.step()
        else:
            optimizer.step()
            
    for batch_i, (data, target) in enumerate(train_loader_casia):  # SWITCH OFF
#         update(data, target, models[0], optims[0], 'casia')
        update(data, target, models[0], optims[0], 'casia', batch_i, batch_per_lot_casia)  # dp update
    for batch_i, (data, target) in enumerate(train_loader_hit):  # SWITCH OFF
#         update(data, target, models[1], optims[1], 'hit')
        update(data, target, models[1], optims[1], 'hit', batch_i, batch_per_lot_hit)
#     for batch_i, (data, target) in enumerate(remote_loader_casia):  # SWITCH ON
#         update(data, target, models[0], optims[0], 'casia')
#     for batch_i, (data, target) in enumerate(remote_loader_hit):  # SWITCH ON
#         update(data, target, models[1], optims[1], 'hit')
    
    loss_casia, loss_hit = losses[0], losses[1]
    correct_casia, correct_hit = corrects[0], corrects[1]
    
    loss_casia /= len(trainset_casia)
    loss_hit /= len(trainset_hit)
    acc_casia = correct_casia / len(trainset_casia)
    acc_hit = correct_hit / len(trainset_hit)
    print('Trn loss_casia {:.4f}, loss_hit {:.4f}, acc_casia {:.4f}, acc_hit {:.4f}'.format(loss_casia, loss_hit, acc_casia, acc_hit))
    
    ratio_casia = 25
    ratio_hit = 10
    
    # FedAvg using SPDZ
    new_params = list()
    for param_i in range(len(params[0])):
        spdz_params = list()
        spdz_params.append(params[0][param_i].copy().cpu().fix_precision().share(casia, hit, crypto_provider=crypto))
        spdz_params.append(params[1][param_i].copy().cpu().fix_precision().share(casia, hit, crypto_provider=crypto))
#         if str(device) == 'cpu':  SWITCH ON
              # see https://github.com/OpenMined/PySyft/pull/2990
#             spdz_params.append(params[0][param_i].copy().get().fix_precision().share(casia, hit, crypto_provider=crypto))
#             spdz_params.append(params[1][param_i].copy().get().fix_precision().share(casia, hit, crypto_provider=crypto))
#         else:
#             spdz_params.append(params[0][param_i].copy().cpu().get().fix_precision().share(casia, hit, crypto_provider=crypto))
#             spdz_params.append(params[1][param_i].copy().cpu().get().fix_precision().share(casia, hit, crypto_provider=crypto))

        new_param = (spdz_params[0] * ratio_casia + spdz_params[1] * ratio_hit).get().float_precision() / (ratio_casia + ratio_hit)
        new_params.append(new_param)

    # cleanup
    with torch.no_grad():
        for model in params:
            for param in model:
                param *= 0

#         for model in models:  # SWITCH ON
#             model.get()

        for param_index in range(len(params[0])):
            if str(device) == 'cpu':
                params[0][param_index].set_(new_params[param_index])
                params[1][param_index].set_(new_params[param_index])
            else:
                params[0][param_index].set_(new_params[param_index].cuda())
                params[1][param_index].set_(new_params[param_index].cuda())
    
    # FedAvg without SPDZ
#     with torch.no_grad():
#         for p1, p2 in zip(models[0].parameters(), models[1].parameters()):
#             p1.set_((p1.data * ratio_casia + p2.data * ratio_hit) / (ratio_casia + ratio_hit))
#             p2.set_(p1.data)

def val():
    assert len(params[0]) == len(params[1])
    for param_index in range(len(params[0])):
        assert torch.equal(params[0][param_index], params[1][param_index])
    
#     model_casia.eval()  # doesn't work right
    losses = [0, 0]
    corrects = [0, 0]
    
    def val_batch(data, target, model, party):
        data, target = data.to(device), target.to(device)  # dev
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)    # get the index of the max log-probability
        if party == 'casia':
            corrects[0] += pred.eq(target.view_as(pred)).sum().item()
            losses[0] += F.nll_loss(output, target, reduction='sum').item()
        else:
            corrects[1] += pred.eq(target.view_as(pred)).sum().item()
            losses[1] += F.nll_loss(output, target, reduction='sum').item()
    
    for data, target in val_loader_casia:
        val_batch(data, target, models[0], 'casia')
    for data, target in val_loader_hit:
        val_batch(data, target, models[1], 'hit')
    
    loss_casia, loss_hit = losses[0], losses[1]
    correct_casia, correct_hit = corrects[0], corrects[1]
    
    loss_casia /= len(valset_casia)
    loss_hit /= len(valset_hit)
    acc_casia = correct_casia / len(valset_casia)
    acc_hit = correct_hit / len(valset_hit)
    print('Val loss_casia {:.4f}, loss_hit {:.4f}, acc_casia {:.4f}, acc_hit {:.4f}'.format(loss_casia, loss_hit, acc_casia, acc_hit))

# Train
start_epoch = epo + 1
for epoch in range(start_epoch, args.epochs):
    print('Epoch', epoch)
    t1 = int(time.time())
    train(epoch)
    t2 = int(time.time())
    val()
    t3 = int(time.time())
    print('Epoch trn time {:d}s, val time {:d}s'.format(t2-t1, t3-t2))
    torch.save(models[0].state_dict(), "models/smpc-hit-casia-{}.pt".format(epoch))

torch.save(models[0].state_dict(), "models/smpc-hit-casia.pt")

# Val
print('Epoch', epo)
val()

Epoch 72
Val loss_casia 0.2845, loss_hit 0.1115, acc_casia 0.9363, acc_hit 0.9770
