In [19]:
import os
import copy
import time
import pickle
import numpy as np
import pandas as pd
from datetime import datetime
import time
import argparse
import random
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset, Subset, random_split
from torchvision import datasets, transforms
import torchvision
from torch.autograd import Variable
from torchvision.datasets import MNIST, EMNIST
import torch.nn.functional as F
from matplotlib.pyplot import subplots
from torchvision.utils import save_image
import torch.optim as optim
from tensorboardX import SummaryWriter
seed = 42
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
bs = 100
n_epoch = 50
dim=100
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
writer = SummaryWriter(os.path.join('../lra', 'beta'))
gpu = 1
device = 1

In [20]:
def test_inference(net, testloader):
    """ Returns the test accuracy and loss.
    """
    net.eval()
    loss, total, correct = 0.0, 0.0, 0.0
    criterion = nn.CrossEntropyLoss()
    criterion.cuda(gpu)
    mnist_dim = train_dataset.train_data.size(1) * train_dataset.train_data.size(2)
    with torch.no_grad():
        for batch_idx, (images, labels) in enumerate(testloader):
            images, labels = images.cuda(gpu), labels.cuda(gpu)
            
            # Inference
            outputs = net(images)
            batch_loss = criterion(outputs, labels)
            loss += copy.deepcopy(batch_loss.item())

            # Prediction
            _, pred_labels = torch.max(outputs, 1)
            pred_labels = pred_labels.view(-1)
            correct += torch.sum(torch.eq(pred_labels, labels)).item()
            total += len(labels)
    accuracy = correct/total
    return accuracy, loss

def test_error(model, steal_model, testloader):
    model.eval()
    steal_model.eval()
    loss, total, correct = 0.0, 0.0, 0.0
    
    with torch.no_grad():
        for batch_idx, (images, labels) in enumerate(testloader):
            images, labels = images.cuda(gpu), labels.cuda(gpu)

            # Inference
            outputs0 = model(images)
            outputs1 = steal_model(images)
            
            # Prediction
            _, pred_labels0 = torch.max(outputs0, 1)
            pred_labels0 = pred_labels0.view(-1)
            
            _, pred_labels1 = torch.max(outputs1, 1)
            pred_labels1 = pred_labels1.view(-1)
            
            correct += torch.sum(torch.eq(pred_labels0, pred_labels1)).item()
            total += len(labels)
    accuracy = correct/total
    return accuracy


def stat_num(model, steal_model, testloader):
    model.eval()
    steal_model.eval()
    loss, total, correct = 0.0, 0.0, 0.0
    l2n = {i: 0 for i in range(11)}
    with torch.no_grad():
        for batch_idx, (images, labels) in enumerate(testloader):
            images, labels = images.cuda(gpu), labels.cuda(gpu)

            # Inference
            outputs0 = model(images)
            outputs1 = steal_model(images)
            
            # Prediction
            _, pred_labels0 = torch.max(outputs0, 1)
            pred_labels0 = pred_labels0.view(-1)
            
            _, pred_labels1 = torch.max(outputs1, 1)
            pred_labels1 = pred_labels1.view(-1)
            
            list0 = pred_labels0.cpu().numpy().tolist()
            list1 = pred_labels1.cpu().numpy().tolist()
            for i in range(len(list0)):
                if list0[i] == list1[i]:
                    l2n[list0[i]] += 1
    for k, v in l2n.items():
        print(f"Label {k}: {v}")

class Discriminator(nn.Module):
    """
        Convolutional Discriminator for MNIST
    """
    def __init__(self, in_channel=1, num_classes=11):
        super(Discriminator, self).__init__()
        self.conv = nn.Sequential(
            # 28 -> 14
            nn.Conv2d(in_channel, 512, 3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2),
            # 14 -> 7
            nn.Conv2d(512, 256, 3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2),
            # 7 -> 4
            nn.Conv2d(256, 128, 3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),
            nn.AvgPool2d(4),
        )
        self.fc = nn.Sequential(
            # reshape input, 128 -> 1
            nn.Linear(128, num_classes)
        )
    
    def forward(self, x, y=None):
        y_ = self.conv(x)
        y_ = y_.view(y_.size(0), -1)
        y_ = self.fc(y_)
        return y_


class Generator(nn.Module):
    """
        Convolutional Generator for MNIST
    """
    def __init__(self, input_size=100, num_classes=784):
        super(Generator, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 4*4*512),
            nn.ReLU(),
        )
        self.conv = nn.Sequential(
            # input: 4 by 4, output: 7 by 7
            nn.ConvTranspose2d(512, 256, 3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            # input: 7 by 7, output: 14 by 14
            nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            # input: 14 by 14, output: 28 by 28
            nn.ConvTranspose2d(128, 1, 4, stride=2, padding=1, bias=False),
            nn.Tanh(),
        )
        
    def forward(self, x, y=None):
        x = x.view(x.size(0), -1)
        y_ = self.fc(x)
        y_ = y_.view(y_.size(0), 512, 4, 4)
        y_ = self.conv(y_)
        return y_


In [21]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5,), std=(0.5,))])

train_dataset = datasets.MNIST(root='../data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='../data', train=False, transform=transform, download=False)

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=bs, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=bs, shuffle=False)

In [22]:
labels = train_dataset.train_labels
indices0,indices1 = [], []
for i in range(0, 10):
    indices0 += (labels == i).nonzero().view(-1).tolist()
for i in range(5, 10):
    indices1 += (labels == i).nonzero().view(-1).tolist()
localset0, localset1 = Subset(train_dataset, indices0), Subset(train_dataset, indices1)
dataloader0, dataloader1 =DataLoader(localset0, batch_size=bs,shuffle=True), DataLoader(localset1, batch_size=bs,shuffle=True)



In [23]:
z_dim = 100
mnist_dim = train_dataset.train_data.size(1) * train_dataset.train_data.size(2)
G = Generator().to(gpu)
D = Discriminator().to(gpu)
criterion = nn.CrossEntropyLoss().to(gpu)
kl_criterion = nn.KLDivLoss(reduction='batchmean').to(gpu)
# optimizer
lr = 0.0002 
G_optimizer = optim.Adam(G.parameters(), lr = lr)
D_optimizer = optim.Adam(D.parameters(), lr = lr)



In [24]:
def zoo(D, x_fake, y_fake, D_optimizer):
    grad_est = torch.zeros_like(x_fake).to(gpu)
    #reduction='none'
    criterion = nn.CrossEntropyLoss().cuda(gpu)
    kl_criterion = nn.KLDivLoss(reduction='batchmean').to(gpu)
    m = 20
    epsilon = 0.1
    N = x_fake.size(0)
    C = x_fake.size(1)
    S = x_fake.size(2)
    d = S**2 * C
    
    lossG_target = criterion(D(torch.tanh(x_fake)),y_fake)
    
    for i in range(m):
        u = torch.randn(x_fake.size()).cuda(gpu)
        u_flat = u.view([N, -1])
        u_norm = u / torch.norm(u_flat, dim=1).view([-1, 1, 1, 1])
        x_mod_pre = x_fake + (epsilon * u_norm)
        Tout = D(torch.tanh(x_mod_pre))
        
        lossG_target_mod = criterion(Tout, y_fake)
        grad_est += (
                (d / m) * (lossG_target_mod - lossG_target) / epsilon
            ).view([-1, 1, 1, 1]) * u_norm
        
    grad_est /= N
    #return grad_est
    
    D.zero_grad()
    x_det_pre = x_fake.detach()
    Tout = D(torch.tanh(x_det_pre))
    
    fake_label = torch.topk(Tout.detach(), 1)[1].squeeze(1)
    lossG_det = criterion(Tout, fake_label)
    lossG_det.backward()
    D_optimizer.step()
    return lossG.mean(), 0, 0, grad_est
    
    
#     lossG_det = kl_criterion(F.log_softmax(Tout, dim=1), F.log_softmax(Variable(Tout), dim=1))
#     print(f"lossG_det: {lossG_det.item()}")
   
#     lossG_det.backward()
#     for name, param in D.named_parameters():
#         param_gard = param.grad.data
#         print(param_gard)
#     return lossG.mean(), 0, 0, grad_est





#     lossG_det = criterion(Tout, y_fake)
#     lossG_det.backward()
#     grad_true_flat = x_det_pre.grad.view([N, -1])

#     cos = nn.CosineSimilarity(dim=1)
#     cs = cos(grad_true_flat, grad_est_flat)
#     mag_ratio = grad_est_flat.norm(2, dim=1) / grad_true_flat.norm(2, dim=1)
#     lossG = lossG_det.detach()
#     return lossG.mean(), cs.mean(), mag_ratio.mean(), grad_est

In [25]:
class FakeData(Dataset):
    def __init__(self):
        super(FakeData, self).__init__()
        count = 25000
        self.x = []
        self.y = []
        for i in range(count):
            self.x.append(Variable(torch.randn(z_dim)))
            self.y.append(Variable(torch.zeros(1).fill_(10).long()))
    def __len__(self):
        return len(self.x)
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]


epochs = 400
local_ep = 1
D.train()
G.train()
for epoch in range(epochs):
    for e in range(local_ep):
        for pri_x, pri_y in dataloader0:
            pri_x, pri_y = pri_x.to(gpu), pri_y.to(gpu)
            D.zero_grad()
            pri_logit = D(pri_x)
            loss = criterion(pri_logit, pri_y)
            loss.backward()
            D_optimizer.step()
    acc, _ = test_inference(D, test_loader)
    print(f"Epoch: {epoch} | Local Epoch: {e}: Loss: {loss.item()}, Acc: {acc}")
    
    # distributs public dataset with some fake data
    fakedataset = FakeData()
    fakedataloader = DataLoader(fakedataset, batch_size=bs,shuffle=False)
    
    for fake_x, fake_y in fakedataloader:
        fake_x, fake_y = fake_x.to(gpu), fake_y.view(-1).to(gpu)
        G.zero_grad()
        G_output = G(fake_x)
        temp_fake = copy.deepcopy(fake_y).fill_(0)
        loss_g, cs, mag_ratio, estimate_grad = zoo(D, G_output, temp_fake, D_optimizer)
        print(f"loss_g: {loss_g.item()} | cs: {cs.item()} | mag_ratio: {mag_ratio.item()}")
        G_output.backward(estimate_grad)
        G_optimizer.step()
    for fake_x, fake_y in fakedataloader:
        fake_x, fake_y = fake_x.to(gpu), fake_y.view(-1).to(gpu)
        D.zero_grad()
        G_output = G(fake_x)
        pub_logit = D(G_output)
        loss_d = criterion(pub_logit, fake_y)
        loss_d.backward()
        D_optimizer.step()
    with torch.no_grad():
        test_z = Variable(torch.randn(50, z_dim).to(device))
        generated = G(test_z)
        #out0grid = torchvision.utils.make_grid(generated.view(generated.size(0), 1, 28, 28), nrow=50)
        out0grid = torchvision.utils.make_grid(generated, nrow=50)
        writer.add_image('images', out0grid, epoch)
        
writer.close()

Epoch: 0 | Local Epoch: 0: Loss: 0.32365119457244873, Acc: 0.942


NameError: name 'lossG' is not defined