<a href="https://colab.research.google.com/github/XinGuu/dl-hessian-pytorch/blob/master/hessian.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!nvidia-smi

Wed Jun  1 22:15:59 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   44C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
!git clone https://github.com/XinGuu/dl-hessian-pytorch.git

Cloning into 'dl-hessian-pytorch'...
remote: Enumerating objects: 188, done.[K
remote: Counting objects: 100% (188/188), done.[K
remote: Compressing objects: 100% (77/77), done.[K
remote: Total 188 (delta 105), reused 186 (delta 103), pack-reused 0[K
Receiving objects: 100% (188/188), 52.77 KiB | 1.39 MiB/s, done.
Resolving deltas: 100% (105/105), done.


In [3]:
import os
os.chdir('dl-hessian-pytorch')
!ls

dl-hessian-pytorch  sample_data
example  hessian_eigenthings  LICENSE  README.md  setup.py  tests


In [8]:
import logging
import math

from torch.optim.lr_scheduler import LambdaLR

import numpy as np

import torch
import torch.nn as nn
from torch.nn.functional import normalize
from torchvision import models
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, RandomSampler, DistributedSampler, SequentialSampler

from tqdm import tqdm
# from hessian_eigenthings import compute_hessian_eigenthings
from hessian_eigenthings import compute_hessian_eigenthings

# from hessian_eigenthings import compute_hessian_eigenthings

logger = logging.getLogger(__name__)

In [22]:
class ConstantLRSchedule(LambdaLR):
    """ Constant learning rate schedule.
    """
    def __init__(self, optimizer, last_epoch=-1):
        super(ConstantLRSchedule, self).__init__(optimizer, lambda _: 1.0, last_epoch=last_epoch)


class WarmupConstantSchedule(LambdaLR):
    """ Linear warmup and then constant.
        Linearly increases learning rate schedule from 0 to 1 over `warmup_steps` training steps.
        Keeps learning rate schedule equal to 1. after warmup_steps.
    """
    def __init__(self, optimizer, warmup_steps, last_epoch=-1):
        self.warmup_steps = warmup_steps
        super(WarmupConstantSchedule, self).__init__(optimizer, self.lr_lambda, last_epoch=last_epoch)

    def lr_lambda(self, step):
        if step < self.warmup_steps:
            return float(step) / float(max(1.0, self.warmup_steps))
        return 1.


class WarmupLinearSchedule(LambdaLR):
    """ Linear warmup and then linear decay.
        Linearly increases learning rate from 0 to 1 over `warmup_steps` training steps.
        Linearly decreases learning rate from 1. to 0. over remaining `t_total - warmup_steps` steps.
    """
    def __init__(self, optimizer, warmup_steps, t_total, last_epoch=-1):
        self.warmup_steps = warmup_steps
        self.t_total = t_total
        super(WarmupLinearSchedule, self).__init__(optimizer, self.lr_lambda, last_epoch=last_epoch)

    def lr_lambda(self, step):
        if step < self.warmup_steps:
            return float(step) / float(max(1, self.warmup_steps))
        return max(0.0, float(self.t_total - step) / float(max(1.0, self.t_total - self.warmup_steps)))


class WarmupCosineSchedule(LambdaLR):
    """ Linear warmup and then cosine decay.
        Linearly increases learning rate from 0 to 1 over `warmup_steps` training steps.
        Decreases learning rate from 1. to 0. over remaining `t_total - warmup_steps` steps following a cosine curve.
        If `cycles` (default=0.5) is different from default, learning rate follows cosine function after warmup.
    """
    def __init__(self, optimizer, warmup_steps, t_total, cycles=.5, last_epoch=-1):
        self.warmup_steps = warmup_steps
        self.t_total = t_total
        self.cycles = cycles
        super(WarmupCosineSchedule, self).__init__(optimizer, self.lr_lambda, last_epoch=last_epoch)

    def lr_lambda(self, step):
        if step < self.warmup_steps:
            return float(step) / float(max(1.0, self.warmup_steps))
        # progress after warmup
        progress = float(step - self.warmup_steps) / float(max(1, self.t_total - self.warmup_steps))
        return max(0.0, 0.5 * (1. + math.cos(math.pi * float(self.cycles) * 2.0 * progress)))


class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def keep_layer(model):
    keep_list = ['layer3.0.conv', 'layer3.1.conv', 'fc.weight', 'bn', 'downsample.1']
    # keep_list = ['fc.weight', 'bn', 'downsample.1']
    for name, param in model.named_parameters():
        param.requires_grad = False
        for keep in keep_list:
            if keep in name:
                param.requires_grad = True
    return model



def lnorm_subspace_closeness(eigenvecs_priv, eigenvecs_pub):
    """
        eigenvecs_priv: p x k numpy matrix
    """
    l1_norm = 0
    for i in range(eigenvecs_priv.shape[0]):
        column_sum = 0
        for j in range(eigenvecs_priv.shape[0]):
            subspace_priv = 0
            subspace_pub = 0
            for k in range(eigenvecs_priv.shape[1]):
                subspace_priv += eigenvecs_priv[i][k] * eigenvecs_priv[j][k]
                subspace_pub += eigenvecs_pub[i][k] * eigenvecs_pub[j][k]
            column_sum += torch.abs(subspace_priv - subspace_pub)
        if column_sum > l1_norm:
            l1_norm = column_sum
    return l1_norm


def gr_subspace_distance(eigenvecs_priv, eigenvecs_pub):
    """
        eigenvecs_priv: k x p torch matrix
        eigenvecs_pub: k x p torch matrix
    """
    eigenvecs_priv = normalize(eigenvecs_priv)
    eigenvecs_pub = normalize(eigenvecs_pub)
    cos_angle = torch.einsum("ij, ij->i", eigenvecs_priv, eigenvecs_pub)
    return torch.sqrt(torch.sum(torch.arccos(cos_angle) ** 2))

In [24]:
num_classes = 10
batch_size = 256
learning_rate = 0.15
weight_decay = 1e-4
device = "cuda"
num_epoch = 100
warmup_steps = 500
num_eigenthings = 500  # compute top 20 eigenvalues/eigenvectors

transform_private = transforms.Compose([
    # transforms.RandomCrop(32, padding=4),
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_public = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_private)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=8)

testset = datasets.SVHN('./data',split='test', download=True, transform=transform_public)
public_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True, num_workers=8)


model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)
model = keep_layer(model)
print("number of params: ", int(sum(p.numel() for p in filter(lambda p: p.requires_grad, model.parameters()))))
model = model.to(device)

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()),
                                lr=learning_rate,
                                momentum=0.9,
                                weight_decay=weight_decay)


# eigenvals, eigenvecs = compute_hessian_eigenthings(model, public_loader,
#                                                     criterion, num_eigenthings, full_dataset=False, mode="power_iter")

# print(eigenvecs)

Files already downloaded and verified


  cpuset_checked))


Using downloaded and verified file: ./data/test_32x32.mat
number of params:  2079104


In [25]:
if __name__ == '__main__':
    n_epoch = num_epoch
    t_total = n_epoch * (len(train_loader.dataset) // batch_size)

    scheduler = WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps, t_total=t_total)
 

    losses = AverageMeter()
    global_step, best_acc = 0, 0

    while True:
        model.train()
        epoch_iterator = tqdm(train_loader, desc="Training (X / X Steps) (loss=X.X)", bar_format="{l_bar}{r_bar}", dynamic_ncols=True)

        for step, batch in enumerate(epoch_iterator):
            batch = tuple(t.to(device) for t in batch)
            x, y = batch
            pred = model(x)
            loss = criterion(pred, y)

            # compute hessian of private data
            eigenvals_priv, eigenvecs_priv = compute_hessian_eigenthings(model, train_loader,
                                                criterion, num_eigenthings, full_dataset=False, fp16=True, mode="power_iter")
            print(eigenvecs_priv.dtype)
            eigenvecs_priv = torch.from_numpy(eigenvecs_priv.copy())
            eigenvecs_priv = eigenvecs_priv.to(device)
            # compute hessian of public data
            eigenvals_pub, eigenvecs_pub = compute_hessian_eigenthings(model, public_loader,
                                                criterion, num_eigenthings, full_dataset=False, fp16=True, mode="power_iter")
            eigenvecs_pub = torch.from_numpy(eigenvecs_pub.copy())
            eigenvecs_pub = eigenvecs_pub.to(device)
            # print(f"subspace closeness:{(eigenvecs_priv.T @ eigenvecs_priv - eigenvecs_pub.T @ eigenvecs_pub).norm(2)}")
            print(f"subspace closeness:{gr_subspace_distance(eigenvecs_priv, eigenvecs_pub)}")

            loss.backward()

            
            losses.update(loss.item())
                
            
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()
            global_step += 1

            epoch_iterator.set_description(
                "Training (%d / %d Steps) (loss=%2.5f)" % (global_step, t_total, losses.val)
            )

            if global_step % t_total == 0:
                break

        model.train()
        losses.reset()
        if global_step % t_total == 0:
            break

  cpuset_checked))
Training (X / X Steps) (loss=X.X):   0%|| 0/196 [16:58<?, ?it/s]


KeyboardInterrupt: ignored