In [1]:
pip install torchcsprng==0.1.3+cu101 -f https://download.pytorch.org/whl/torch_stable.html

In [2]:
pip install opacus

In [3]:
pip install tzlocal==2.0

In [4]:
pip install codecarbon

In [5]:
pip install wandb

In [1]:
from codecarbon import EmissionsTracker



In [2]:
MAX_GRAD_NORM = 1.2
EPSILON = 10
DELTA = 1e-5
EPOCHS = 30
LR = 1e-3
NUM_WORKERS = 2

In [3]:
BATCH_SIZE = 64
VIRTUAL_BATCH_SIZE = 128
assert VIRTUAL_BATCH_SIZE % BATCH_SIZE == 0 # VIRTUAL_BATCH_SIZE should be divisible by BATCH_SIZE
N_ACCUMULATION_STEPS = int(VIRTUAL_BATCH_SIZE / BATCH_SIZE)


In [4]:
import torch
import torchvision
import torchvision.transforms as transforms

# These values, specific to the CIFAR10 dataset, are assumed to be known.
# If necessary, they can be computed with modest privacy budget.
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD_DEV = (0.2023, 0.1994, 0.2010)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD_DEV),
])

In [5]:
from torchvision.datasets import CIFAR10
from opacus.utils.uniform_sampler import UniformWithReplacementSampler

DATA_ROOT = '../cifar10'

train_dataset = CIFAR10(
    root=DATA_ROOT, train=True, download=True, transform=transform)

SAMPLE_RATE = BATCH_SIZE / len(train_dataset)

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    num_workers=NUM_WORKERS,
    batch_sampler=UniformWithReplacementSampler(
        num_samples=len(train_dataset),
        sample_rate=SAMPLE_RATE,
    ),
)

test_dataset = CIFAR10(
    root=DATA_ROOT, train=False, download=True, transform=transform)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
from torchvision import models
import torch.nn as nn
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(512,10)

# Set Epsilon value to "inf" for Non-Private training

In [7]:
EPSILON = "inf"

In [8]:
if(EPSILON !="inf"):
  from opacus.dp_model_inspector import DPModelInspector
  from opacus.utils import module_modification

  model = module_modification.convert_batchnorm_modules(model)
  inspector = DPModelInspector()
  print(f"Is the model valid? {inspector.validate(model)}")

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = model.to(device)

In [10]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
if(EPSILON=="inf"):
  LR = 1e-6
optimizer = optim.RMSprop(model.parameters(), lr=LR)

In [11]:
def accuracy(preds, labels):
    return (preds == labels).mean()

In [12]:
if(EPSILON!="inf"):
  from opacus import PrivacyEngine

  privacy_engine = PrivacyEngine(
      model,
      sample_rate=SAMPLE_RATE * N_ACCUMULATION_STEPS,
      epochs = EPOCHS,
      target_epsilon = EPSILON,
      target_delta = DELTA,
      max_grad_norm=MAX_GRAD_NORM,
  )
  privacy_engine.attach(optimizer)

  print(f"Using sigma={privacy_engine.noise_multiplier} and C={MAX_GRAD_NORM}")

In [13]:
import numpy as np

def train(model, train_loader, optimizer, epoch, device):
    model.train()
    criterion = nn.CrossEntropyLoss()

    losses = []
    top1_acc = []

    for i, (images, target) in enumerate(train_loader):        
        images = images.to(device)
        target = target.to(device)

        # compute output
        output = model(images)
        loss = criterion(output, target)
        
        preds = np.argmax(output.detach().cpu().numpy(), axis=1)
        labels = target.detach().cpu().numpy()
        
        # measure accuracy and record loss
        acc = accuracy(preds, labels)

        losses.append(loss.item())
        top1_acc.append(acc)
        
        loss.backward()
        	
        # take a real optimizer step after N_VIRTUAL_STEP steps t
        if ((i + 1) % N_ACCUMULATION_STEPS == 0) or ((i + 1) == len(train_loader)):
            optimizer.step()
        else:
            optimizer.virtual_step() # take a virtual step

        if i % 200 == 0:
            epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent(DELTA)
            print(
                f"\tTrain Epoch: {epoch} \t"
                f"Loss: {np.mean(losses):.6f} "
                f"Acc@1: {np.mean(top1_acc) * 100:.6f} "
                f"(ε = {epsilon:.2f}, δ = {DELTA})"
            )
            #wandb.log({"epsilon":epsilon,"Step":i*epoch,"step__train_acc":(np.mean(top1_acc))*100})
    return np.mean(losses), (np.mean(top1_acc))*100


In [14]:
import numpy as np

def train_non_dp(model, train_loader, optimizer, epoch, device):
    model.train()
    criterion = nn.CrossEntropyLoss()

    losses = []
    top1_acc = []

    for i, (images, target) in enumerate(train_loader):        
        images = images.to(device)
        target = target.to(device)

        # compute output
        output = model(images)
        loss = criterion(output, target)
        
        preds = np.argmax(output.detach().cpu().numpy(), axis=1)
        labels = target.detach().cpu().numpy()
        
        # measure accuracy and record loss
        acc = accuracy(preds, labels)

        losses.append(loss.item())
        top1_acc.append(acc)
        
        loss.backward()
        optimizer.step()

        if i % 200 == 0:
            print(
                f"\tTrain Epoch: {epoch} \t"
                f"Loss: {np.mean(losses):.6f} "
                f"Acc@1: {np.mean(top1_acc) * 100:.6f} "
            )
            #wandb.log({"epsilon":epsilon,"Step":i*epoch,"step__train_acc":(np.mean(top1_acc))*100})
    return np.mean(losses), (np.mean(top1_acc))*100

In [15]:
def test(model, test_loader, device):
    model.eval()
    criterion = nn.CrossEntropyLoss()
    losses = []
    top1_acc = []

    with torch.no_grad():
        for images, target in test_loader:
            images = images.to(device)
            target = target.to(device)

            output = model(images)
            loss = criterion(output, target)
            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()
            acc = accuracy(preds, labels)

            losses.append(loss.item())
            top1_acc.append(acc)

    top1_avg = np.mean(top1_acc)

    print(
        f"\tTest set:"
        f"Loss: {np.mean(losses):.6f} "
        f"Acc: {top1_avg * 100:.6f} "
    )
    return np.mean(losses),np.mean(top1_acc)*100

In [21]:
from tqdm import tqdm
def train_main(epsilon):
  train_losses = []
  train_accuracies = []
  test_losses = []
  test_accuracies = []

  if(epsilon == "inf"):
    tracker = EmissionsTracker(project_name='epsilon_CNN='+"Infinity")
  else:
    tracker = EmissionsTracker(project_name='epsilon_CNN='+str(epsilon))
  tracker.start()
  for epoch in tqdm(range(EPOCHS), desc="Epoch", unit="epoch"):
      if(epsilon != "inf"):
        train_loss,train_acc = train(model, train_loader, optimizer, epoch + 1, device)
      else:
        train_loss,train_acc = train_non_dp(model, train_loader, optimizer, epoch + 1, device)
      test_loss,test_acc = test(model,test_loader,device)
      train_losses.append(train_loss)
      train_accuracies.append(train_acc)
      test_losses.append(test_loss)
      test_accuracies.append(test_acc)
      if(test_acc >= 55):
        print("Desired Accuracy Reached")
        break
        #wandb.log({"train_loss":train_loss,"train_acc":train_acc, "test_loss":test_loss, "test_acc":test_acc})
  emissions: float = tracker.stop()
  print(emissions)
  return train_losses,train_accuracies,test_losses,test_accuracies

In [20]:
train_losses,train_accuracies,test_losses,test_accuracies = train_main(EPSILON)