In [1]:
from bounds import *
import torch
import numpy as np

torch.manual_seed(42)
torch.cuda.manual_seed_all(42)

# Spectral norm estimation on dense matrix

This code is related to dense spectral norm estimation, see Section 5.5 in paper. 

In [2]:
n, m = 500, 400
with_cuda = False
G = torch.randn(n, m).double().cuda()
sigma_1_reference = torch.linalg.matrix_norm(G, ord=2).item()
print("dense sigma_1_reference", sigma_1_reference)

dense sigma_1_reference 41.99017911516535


# Test Power iteration on dense matrix

In [3]:
n_iter_pi = 1000
nb_reps = 10
burn = 10
sigmas_1_pi, times = [], []
for _ in range(nb_reps + burn):
    sigma_1_pi, time = estimate_dense(G, 
                                      n_iter=n_iter_pi, 
                                      name_func="pi", 
                                      return_time=True)
    sigmas_1_pi.append(sigma_1_pi.item())
    times.append(time)
print("Diff Power iteration", np.mean(sigmas_1_pi[burn:]) - sigma_1_reference, 
      "Mean time", np.mean(times[burn:]))

Diff Power iteration -1.247428826900432e-10 Mean time 0.05896735191345215


# Test Gram iteration on dense matrix

In [4]:
n_iter_gram = 15
nb_reps = 10
burn = 10
sigmas_1_gram, times = [], []
for _ in range(nb_reps + burn):
    sigma_1_gram, time = estimate_dense(G, 
                                        n_iter=n_iter_gram, 
                                        name_func="gi", 
                                        return_time=True)
    sigmas_1_gram.append(sigma_1_gram.item())
    times.append(time)
print("Diff Gram iteration", np.mean(sigmas_1_gram[burn:]) - sigma_1_reference, 
      "Mean time", np.mean(times[burn:]))

Diff Gram iteration -3.574029960873304e-12 Mean time 0.0009333372116088867


# Spectral norm estimation on convolutional layer

This code is related to convolutional layer spectral norm estimation, see Section 5.2 in paper.# Define convolution kernel

In [5]:
cout = 64
cin = 64
input_size_n = 12
kernel_size = 5


kernel = torch.randn(cout, cin, kernel_size, kernel_size).cuda()

sigma_1_reference_sedghi2019 = estimate(kernel, 
                                        n=input_size_n, 
                                        name_func="sedghi2019").item()
print("kernel sigma_1_reference_sedghi2019", sigma_1_reference_sedghi2019)

n_iter_ryu2019_ref = 100
sigma_1_reference_ryu2019 = estimate(kernel, 
                                     n=input_size_n, 
                                     n_iter=n_iter_ryu2019_ref, 
                                     name_func="ryu2019").item()
print("kernel sigma_1_reference_ryu2019", sigma_1_reference_ryu2019)

kernel sigma_1_reference_sedghi2019 83.09203338623047
kernel sigma_1_reference_ryu2019 77.56085968017578


# Test Araujo2021 on convolutional layer

In [6]:
nb_samples_araujo2021 = 50
nb_reps = 10
burn = 10
sigmas_1_araujo2021, times = [], []
for _ in range(nb_reps + burn):
    sigma_1_araujo2021, time = estimate(kernel, 
                                  n=input_size_n, 
                                  n_iter=nb_samples_araujo2021, 
                                  name_func="araujo2021", 
                                  return_time=True)
    sigmas_1_araujo2021.append(sigma_1_araujo2021.item())
    times.append(time)
print("Diff Sedghi2019 with Araujo2021 conv ", np.mean(sigmas_1_araujo2021[burn:]) - sigma_1_reference_sedghi2019,
      "\nDiff Ryu2019 with Araujo2021 conv", np.mean(sigmas_1_araujo2021[burn:]) - sigma_1_reference_ryu2019, 
      "\nMean time", np.mean(times[burn:]))

Diff Sedghi2019 with Araujo2021 conv  273.63285064697266 
Diff Ryu2019 with Araujo2021 conv 279.16402435302734 
Mean time 0.0010010957717895507


# Test Singla2021 on convolutional layer

In [7]:
n_iter_singla2021 = 50
nb_reps = 10
burn = 10
sigmas_1_singla2021, times = [], []
for _ in range(nb_reps + burn):
    sigma_1_singla2021, time = estimate(kernel, 
                                  n=input_size_n, 
                                  n_iter=n_iter_singla2021, 
                                  name_func="singla2021", 
                                  return_time=True)
    sigmas_1_singla2021.append(sigma_1_singla2021.item())
    times.append(time)
print("Diff Sedghi2019 with Singla2021conv", np.mean(sigmas_1_singla2021[burn:]) - sigma_1_reference_sedghi2019,
      "\nDiff Ryu2019 with Singla2021 conv", np.mean(sigmas_1_singla2021[burn:]) - sigma_1_reference_ryu2019, 
      "\nMean time", np.mean(times[burn:]))

Diff Sedghi2019 with Singla2021conv 93.96885681152344 
Diff Ryu2019 with Singla2021 conv 99.50003051757812 
Mean time 0.01792454719543457


# Test Gram iteration on convolution layer

In [8]:
n_iter_gram = 5
nb_reps = 10
burn = 10
sigmas_1_gram, times = [], []
for _ in range(nb_reps + burn):
    sigma_1_gram, time = estimate(kernel, 
                                  n=input_size_n, 
                                  n_iter=n_iter_gram, 
                                  name_func="delattre2023", 
                                  return_time=True)
    sigmas_1_gram.append(sigma_1_gram.item())
    times.append(time)
print("Diff Sedghi2019 with Gram iteration conv", np.mean(sigmas_1_gram[burn:]) - sigma_1_reference_sedghi2019,
      "\nDiff Ryu2019 with Gram iteration conv", np.mean(sigmas_1_gram[burn:]) - sigma_1_reference_ryu2019, 
      "\nMean time", np.mean(times[burn:]))

Diff Sedghi2019 with Gram iteration conv 0.025726318359375 
Diff Ryu2019 with Gram iteration conv 3.7672805786132812 
Mean time 0.000528717041015625


# Test norm2toep (Gram iteration on convolution layer with zero padding)

In [10]:
n_iter_gram = 5
nb_reps = 10
burn = 10
sigmas_1_gram, times = [], []
for _ in range(nb_reps + burn):
    sigma_1_gram, time = estimate(kernel, 
                                  n=input_size_n, 
                                  n_iter=n_iter_gram, 
                                  name_func="delattre2024", 
                                  return_time=True)
    sigmas_1_gram.append(sigma_1_gram.item())
    times.append(time)
print("Diff Sedghi2019 with norm2toep", np.mean(sigmas_1_gram[burn:]) - sigma_1_reference_sedghi2019,
      "\nDiff Ryu2019 with norm2toep", np.mean(sigmas_1_gram[burn:]) - sigma_1_reference_ryu2019, 
      "\nMean time", np.mean(times[burn:]))

Diff Sedghi2019 with norm2toep 3.3376846313476562 
Diff Ryu2019 with norm2toep 8.868858337402344 
Mean time 0.0007239341735839844


# Compute spectral norm of convolutional layers of ResNet18

This code is related to Section 5.3 in paper.

In [None]:
from torchvision import models

n_iter_name = {
               "delattre2023":7,
               "singla2021":50,
               "ryu2019" : 100,
               "araujo2021" : 20,
               "sedghi2019":None}
func_names = [
              "ryu2019",
              #"sedghi2019", # commented because it takes a while
              "araujo2021",
              "singla2021",
              "delattre2023",
]
model_resnet_18 = models.resnet18(pretrained=True).cuda().eval()



inp_shape = (224, 224)
lip_tot = {name : 1 for name in func_names}
times_tot = {name : 0 for name in func_names}

with torch.no_grad():
    for name, module in model_resnet_18.named_modules():
        print()
        is_downsample = name.endswith("downsample")
        is_regular_conv = "conv" in name
        is_max_pool = "MaxPool2d" in module.__class__.__name__
        if is_max_pool:
            stride = module.stride
            inp_shape = (inp_shape[0] // stride, inp_shape[1] // stride)
        if is_downsample:
            # dowsampling layer in residual connection
            conv, bn = module[0], module[1]
            lip_bn = (bn.weight.detach() / bn.running_var).max().item()
            inp_shape = (inp_shape[0] * stride, inp_shape[1] * stride)
        elif is_regular_conv:
            conv = module
            stride = conv.stride[0]
            lip_bn = 1.0
        if is_downsample or is_regular_conv:
            param = conv.weight.clone().detach()
            out_channels, in_channels, H, W = param.shape
            for name in func_names:
                bound, curr_time = estimate(param,
                                            inp_shape[0],
                                            n_iter_name[name],
                                            name,
                                            return_time=True)
                bound = bound.item()
                print(name, "conv weight dim", param.shape, "n", inp_shape[0], "bound", bound, "time", curr_time)
                if is_downsample:
                    lip_tot[name] += bound* lip_bn
                else:
                    # Lipschiz of batch norm cancels in ratio
                    lip_tot[name] *= bound
                times_tot[name] += curr_time
                print("Total Lipschitz bound", lip_tot[name], "total time", times_tot[name], "\n")
            inp_shape = (inp_shape[0] // stride, inp_shape[1] // stride)


# Total Lipschitz ratio bound 

In [None]:
lip_tot_ref = lip_tot["ryu2019"]
for name in func_names:
    lip_tot[name] /= lip_tot_ref
print("Total Lipschitz ratio", lip_tot)
print("Total times", times_tot)