In [1]:
# modified from ref: https://github.com/pytorch/examples/blob/main/mnist/main.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR

from model import Net
import time

In [2]:
torch.cuda.get_device_name(device=0)

'NVIDIA A100-SXM4-80GB'

In [3]:
Net()

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [4]:
def train(model, device, train_loader, optimizer, epoch, log_interval=100, dry_run=False):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if dry_run:
                break


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [5]:
use_cuda = torch.cuda.is_available()
# use_mps = torch.backends.mps.is_available()
if use_cuda:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [6]:
batch_size=64
test_batch_size=1000
train_kwargs = {'batch_size': batch_size}
test_kwargs = {'batch_size': test_batch_size}
if use_cuda:
    cuda_kwargs = {'num_workers': 1,
                   'pin_memory': True,
                   'shuffle': True}
    train_kwargs.update(cuda_kwargs)
    test_kwargs.update(cuda_kwargs)

In [7]:
lr=1e-2
epochs=20
gamma=0.7
transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])
dataset1 = datasets.MNIST('../data', train=True, download=True,
                   transform=transform)
dataset2 = datasets.MNIST('../data', train=False,
                   transform=transform)
train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)

model = Net().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=lr)

In [8]:
scheduler = StepLR(optimizer, step_size=1, gamma=gamma)
for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    scheduler.step()


Test set: Average loss: 0.2943, Accuracy: 9157/10000 (92%)


Test set: Average loss: 0.2302, Accuracy: 9342/10000 (93%)


Test set: Average loss: 0.2052, Accuracy: 9410/10000 (94%)


Test set: Average loss: 0.1918, Accuracy: 9448/10000 (94%)


Test set: Average loss: 0.1831, Accuracy: 9465/10000 (95%)


Test set: Average loss: 0.1768, Accuracy: 9484/10000 (95%)


Test set: Average loss: 0.1731, Accuracy: 9497/10000 (95%)


Test set: Average loss: 0.1705, Accuracy: 9501/10000 (95%)


Test set: Average loss: 0.1686, Accuracy: 9504/10000 (95%)


Test set: Average loss: 0.1676, Accuracy: 9507/10000 (95%)


Test set: Average loss: 0.1669, Accuracy: 9514/10000 (95%)


Test set: Average loss: 0.1659, Accuracy: 9510/10000 (95%)


Test set: Average loss: 0.1655, Accuracy: 9511/10000 (95%)


Test set: Average loss: 0.1654, Accuracy: 9511/10000 (95%)


Test set: Average loss: 0.1652, Accuracy: 9514/10000 (95%)


Test set: Average loss: 0.1650, Accuracy: 9513/10000 (95%)


Test set: Average loss:

In [9]:
torch.save(model, './vanilla_pytorch_mnist_'+f'{torch.cuda.get_device_name(0)}.pth')

In [8]:
# model=torch.load('./vanilla_pytorch_mnist_'+f'{torch.cuda.get_device_name(0)}.pth')
# model

In [10]:
def get_time_elapsed(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        t0 = time.perf_counter()
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
        time_elapsed = time.perf_counter() - t0

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    print('Time Elapsed:{}'.format(time_elapsed))
    return time_elapsed

In [11]:
nruns=10
time_dict={key:0 for key in range(nruns)}
for i in range(nruns):
    time_dict[i]=get_time_elapsed(model, device, test_loader)


Test set: Average loss: 0.1649, Accuracy: 9516/10000 (95%)

Time Elapsed:1.1684837199991307

Test set: Average loss: 0.1649, Accuracy: 9516/10000 (95%)

Time Elapsed:1.1432125230003294

Test set: Average loss: 0.1649, Accuracy: 9516/10000 (95%)

Time Elapsed:1.1396725149988924

Test set: Average loss: 0.1649, Accuracy: 9516/10000 (95%)

Time Elapsed:1.1414269739998417

Test set: Average loss: 0.1649, Accuracy: 9516/10000 (95%)

Time Elapsed:1.1450750989988592

Test set: Average loss: 0.1649, Accuracy: 9516/10000 (95%)

Time Elapsed:1.1489928430000873

Test set: Average loss: 0.1649, Accuracy: 9516/10000 (95%)

Time Elapsed:1.1478785049985163

Test set: Average loss: 0.1649, Accuracy: 9516/10000 (95%)

Time Elapsed:1.1400177989999065

Test set: Average loss: 0.1649, Accuracy: 9516/10000 (95%)

Time Elapsed:1.1422827429996687

Test set: Average loss: 0.1649, Accuracy: 9516/10000 (95%)

Time Elapsed:1.1445842850007466


In [12]:
time_dict

{0: 1.1684837199991307,
 1: 1.1432125230003294,
 2: 1.1396725149988924,
 3: 1.1414269739998417,
 4: 1.1450750989988592,
 5: 1.1489928430000873,
 6: 1.1478785049985163,
 7: 1.1400177989999065,
 8: 1.1422827429996687,
 9: 1.1445842850007466}

In [13]:
import pandas as pd
df = pd.DataFrame()
df['run'] = list(range(nruns))
df['time_elapsed'] = list(time_dict.values())
df['device']=torch.cuda.get_device_name(0)
df.to_csv('pytorch_time_elapsed_'+f'{torch.cuda.get_device_name(0)}.csv', index=False)
df

Unnamed: 0,run,time_elapsed,device
0,0,1.168484,NVIDIA A100-SXM4-80GB
1,1,1.143213,NVIDIA A100-SXM4-80GB
2,2,1.139673,NVIDIA A100-SXM4-80GB
3,3,1.141427,NVIDIA A100-SXM4-80GB
4,4,1.145075,NVIDIA A100-SXM4-80GB
5,5,1.148993,NVIDIA A100-SXM4-80GB
6,6,1.147879,NVIDIA A100-SXM4-80GB
7,7,1.140018,NVIDIA A100-SXM4-80GB
8,8,1.142283,NVIDIA A100-SXM4-80GB
9,9,1.144584,NVIDIA A100-SXM4-80GB


## CrypTen Test

In [7]:
import crypten
# import torch

crypten.init()
torch.set_num_threads(1)

In [8]:
dummy_model = Net()
plaintext_model = torch.load('./vanilla_pytorch_mnist_'+f'{torch.cuda.get_device_name(0)}.pth').to(device)

print(plaintext_model)

# Encrypt the model from Alice:    

# 1. Create a dummy input with the same shape as the model input
dummy_input = torch.empty((1, 1, 28, 28)).to(device)

# 2. Construct a CrypTen network with the trained model and dummy_input
private_model = crypten.nn.from_pytorch(plaintext_model, dummy_input)

# 3. Encrypt the CrypTen network with src=ALICE
private_model.encrypt(src=0)

#Check that model is encrypted:
print("Model successfully encrypted:", private_model.encrypted)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)




Model successfully encrypted: True


  param = torch.from_numpy(numpy_helper.to_array(node))


In [9]:
data_enc = crypten.load_from_party('./test_data.pth')[:100].unsqueeze(1)
# data_enc2 = data_enc[:counts]
# data_flatten = data_enc.flatten(start_dim=1)

In [10]:
private_model.eval()
output_enc = private_model(data_enc)

In [12]:
output = output_enc.get_plain_text()

  dividend = tensor // self._scale - correction


In [33]:
output.shape

torch.Size([100, 10])

In [47]:
from tqdm.notebook import tqdm
import crypten.mpc as mpc
import crypten.communicator as comm

@mpc.run_multiprocess(world_size=2)
def get_time_elapsed_crypten(device, test_loader):
    plaintext_model = torch.load('./vanilla_pytorch_mnist_'+f'{torch.cuda.get_device_name(0)}.pth').to('cpu')
    dummy_input = torch.empty((1, 1, 28, 28))

    private_model = crypten.nn.from_pytorch(plaintext_model, dummy_input)
    private_model.encrypt(src=0)
    private_model.eval()
    
    test_loss = 0
    correct = 0
    with torch.no_grad():
        t0 = time.perf_counter()
        for data, target in tqdm(test_loader):
            target = target
            data_enc = crypten.cryptensor(data)
            output_enc = private_model(data_enc)
            output = output_enc.get_plain_text()
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
        time_elapsed = time.perf_counter() - t0

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    print('Time Elapsed:{}'.format(time_elapsed))
    return time_elapsed

get_time_elapsed_crypten( device, test_loader)

Process Process-15:
Process Process-14:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ss4yd/.local/lib/python3.8/site-packages/crypten/mpc/context.py", line 29, in _launch
    crypten.init()
  File "/home/ss4yd/.local/lib/python3.8/site-packages/crypten/mpc/context.py", line 29, in _launch
    crypten.init()
  File "/home/ss4yd/.local/lib/python3.8/site-packages/crypten/__init__.py", line 77, in init
    _setup_prng()
  File "/home/ss4yd/.local/lib/python3.8/site-packages/crypten/__init__.py", line 77, in

In [44]:
def get_time_elapsed_crypten_gpu(device, test_loader):
    plaintext_model = torch.load('./vanilla_pytorch_mnist_'+f'{torch.cuda.get_device_name(0)}.pth').to(device)
    dummy_input = torch.empty((1, 1, 28, 28)).to(device)

    private_model = crypten.nn.from_pytorch(plaintext_model, dummy_input)
    private_model.encrypt(src=0)
    private_model=private_model.to(device)
    private_model.eval()
    
    test_loss = 0
    correct = 0
    with torch.no_grad():
        t0 = time.perf_counter()
        for data, target in tqdm(test_loader):
            target = target.to(device)
            data_enc = crypten.cryptensor(data).to(device)
            output_enc = private_model(data_enc)
            output = output_enc.get_plain_text()
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
        time_elapsed = time.perf_counter() - t0

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    print('Time Elapsed:{}'.format(time_elapsed))
    return time_elapsed

get_time_elapsed_crypten_gpu(device, test_loader)