# Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
# device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
import numpy as np
import torch 
from torchvision import datasets, transforms
# from pytorchcv.model_provider import get_model as ptcv_get_model # model

import matplotlib.pyplot as plt

In [4]:
from loss_landscape.my_pyhessian import hessian, utils# Hessian computation
from loss_landscape.my_pyhessian.density_plot import get_esd_plot

In [5]:
from loss_landscape.plot_2D import plot_2d_contour

In [6]:
# import sys; sys.path.append("..")
from models.smooth_cross_entropy import mean_smooth_crossentropy
from models.wide_res_net import WideResNet
from models.attention_gru import AttentionGru
from models.gcn import GCN

from DatasetClass.cifar import Cifar
from DatasetClass.imdb import Imdb
from DatasetClass.TUD import GraphDataset

# WideResNet for Cifar

## Loss landscape plot

### With SGD

Generate surface file

In [None]:
# Run plot_surface.py for trained model
# !python plot_surface.py --model WideResNet --dataset cifar10 --x=-1:1:2 --y=-1:1:2 --model_file to_plot/model_cifar_SGD.pt --dir_type weights --xnorm filter --xignore biasbn --ynorm filter --yignore biasbn --plot --percentage=0.3 --batch_size=128 --loss_name smooth_crossentropy

Generate plots

In [7]:
surf_file = 'to_plot/model_cifar_SGD.pt_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5_[-1.0,1.0,5]x[-1.0,1.0,5].h5'

plot_2d_contour(surf_file, 'train_loss', 0.1, 10, 0.5, False)

------------------------------------------------------------------
plot_2d_contour
------------------------------------------------------------------
len(xcoordinates): 5   len(ycoordinates): 5
[[19.47184278  4.94447712  6.3274922  10.84011077 38.55445198]
 [ 6.52683822  2.74590339  3.3255251   3.25660324 12.8048792 ]
 [ 3.97482543  1.80651281  0.46783798  1.99040413  5.71666521]
 [ 4.86280387  2.75262686  1.93810351  2.42843889  6.8996155 ]
 [24.73039846  9.45652653  5.2411354   3.49049795 11.69184616]]


### With SAM

Generate surface file

In [None]:
# Run plot_surface.py for trained model
# !python plot_surface.py --model WideResNet --dataset cifar10 --x=-1:1:2 --y=-1:1:2 --model_file to_plot/model_cifar_SAM_rho1.pt --dir_type weights --xnorm filter --xignore biasbn --ynorm filter --yignore biasbn --plot --percentage=0.3 --batch_size=128 --loss_name smooth_crossentropy

Generate plots

In [8]:
surf_file = 'to_plot/model_cifar_halfSAM_rho05.pt_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5_[-1.0,1.0,5]x[-1.0,1.0,5].h5'

plot_2d_contour(surf_file, 'train_loss', 0.1, 10, 0.5, False)

------------------------------------------------------------------
plot_2d_contour
------------------------------------------------------------------
len(xcoordinates): 5   len(ycoordinates): 5
[[2.40649363 2.45352721 2.37080363 2.33494368 3.27218929]
 [2.11266517 1.72538848 1.35355867 1.86808758 2.42179902]
 [1.96148256 1.27078478 0.87594321 1.30209623 2.05305649]
 [2.82968777 1.60050254 1.09351784 1.46998333 1.97798065]
 [7.75504659 2.75616385 1.72724978 1.85543291 2.30363232]]


## Eigenvalues of hessian

Load trained model and dataset

In [9]:
model_name = 'WideResNet'

In [11]:
# get dataset 
dataset = Cifar(0.3, 128, 2)
trainloader, testloader = dataset.train, dataset.test

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


Extract batches of data for computation

In [None]:
num_batches = 1

inputs = None
targets = None

for ind, (data, tar) in enumerate(trainloader):
    if inputs is None:
        inputs = data
        targets = tar
    elif inputs is not None and ind < num_batches:
        inputs = torch.cat((inputs, data), 0)
        targets = torch.cat((targets, tar))
    else:
        break

[print(inputs.size(), targets.size()) if targets is not None else print(inputs.size())]

# we use cuda to make the computation fast
# model = model.cuda()
# inputs, targets = inputs.cuda(), targets.cuda()

### With SGD

In [56]:
stored = torch.load('to_plot/model_cifar_SGD.pt', map_location=lambda storage, loc: storage)

model = WideResNet(8, 2, 0.0, in_channels=3, labels=10)

if 'state_dict' in stored.keys():
    model.load_state_dict(stored['state_dict'])
else:
    model.load_state_dict(stored)
model.eval()

criterion = mean_smooth_crossentropy

Create the hessian computation module and compute eigenvalue density

In [None]:
eig_file_name = 'wideresnet_sgd'

In [65]:
try:
    density_eigen = np.load('Eigenvalues/'+eig_file_name+'_eigen.npy')
    density_weight = np.load('Eigenvalues/'+eig_file_name+'_weight.npy')
except:
    hessian_comp = hessian(model, criterion, data=(inputs, targets), cuda=False, model_name = model_name)
    density_eigen, density_weight = hessian_comp.density(iter=100, n_v=1)
    np.save('Eigenvalues/'+eig_file_name+'_eigen.npy', density_eigen)
    np.save('Eigenvalues/'+eig_file_name+'_weight.npy', density_weight)
    
get_esd_plot(density_eigen, density_weight,'plots/eig_'+eig_file_name+'.png')

### With SAM

In [56]:
stored = torch.load('to_plot/model_cifar_halfSAM_rho0.5.pt', map_location=lambda storage, loc: storage)

model = WideResNet(8, 2, 0.0, in_channels=3, labels=10)

if 'state_dict' in stored.keys():
    model.load_state_dict(stored['state_dict'])
else:
    model.load_state_dict(stored)
model.eval()

criterion = mean_smooth_crossentropy

Create the hessian computation module and compute eigenvalue density

In [None]:
eig_file_name = 'wideresnet_sam'

In [65]:
try:
    density_eigen = np.load('Eigenvalues/'+eig_file_name+'_eigen.npy')
    density_weight = np.load('Eigenvalues/'+eig_file_name+'_weight.npy')
except:
    hessian_comp = hessian(model, criterion, data=(inputs, targets), cuda=False, model_name = model_name)
    density_eigen, density_weight = hessian_comp.density(iter=100, n_v=1)
    np.save('Eigenvalues/'+eig_file_name+'_eigen.npy', density_eigen)
    np.save('Eigenvalues/'+eig_file_name+'_weight.npy', density_weight)

get_esd_plot(density_eigen, density_weight,'plots/eig_'+eig_file_name+'.png')

# AttentionGru for imdb

## Loss landscape plot

### With SGD

Generate surface file

In [None]:
# Run plot_surface.py for trained model
# !python plot_surface.py --model AttentionGru --dataset imdb --x=-1:1:3 --y=-1:1:3 --model_file to_plot/model_imdb_SGD.pt --dir_type weights --xnorm filter --xignore biasbn --ynorm filter --yignore biasbn --plot --percentage=0.05 --batch_size=16 --loss_name smooth_crossentropy

Generate plots

In [13]:
surf_file = 'to_plot/model_imdb_SGD.pt_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5_[-1.0,1.0,3]x[-1.0,1.0,3].h5'

plot_2d_contour(surf_file, 'train_loss', 0.1, 10, 0.5, False)

------------------------------------------------------------------
plot_2d_contour
------------------------------------------------------------------
len(xcoordinates): 3   len(ycoordinates): 3
[[0.36772302 0.37724695 0.47128022]
 [0.36875239 0.36698774 0.39151391]
 [0.38017842 0.38429436 0.36803225]]


### With SAM

Generate surface file

In [None]:
# Run plot_surface.py for trained model
# !python plot_surface.py --model AttentionGru --dataset imdb --x=-1:1:2 --y=-1:1:2 --model_file to_plot/model_cifar_SAM_rho1.pt --dir_type weights --xnorm filter --xignore biasbn --ynorm filter --yignore biasbn --plot --percentage=0.3 --batch_size=16 --loss_name smooth_crossentropy

Generate plots

In [None]:
surf_file = 

plot_2d_contour(surf_file, 'train_loss', 0.1, 10, 0.5, False)

## Eigenvalues of hessian

Load trained model and dataset

In [7]:
model_name = 'AttentionGru'

In [8]:
# get dataset 
dataset = Imdb(0.05, 16, 2, device)
trainloader = dataset.train_iterator

downloading aclImdb_v1.tar.gz


.data\imdb\aclImdb_v1.tar.gz: 100%|██████████| 84.1M/84.1M [00:17<00:00, 4.83MB/s]
.vector_cache\glove.6B.zip: 862MB [02:53, 4.98MB/s]                               
100%|█████████▉| 399999/400000 [01:13<00:00, 5411.46it/s]


In [20]:
num_batches = 1 # Only one batch

inputs = None
targets = None

for ind, data in enumerate(trainloader):
    inp = data.text.to(device)
    tar = data.label.to(device).long()

    if inputs is None:
        inputs = inp
        targets = tar
    elif inputs is not None and ind < num_batches:
        inputs = torch.cat((inp, data), 0)
        targets = torch.cat((targets, tar))
    else:
        break

[print(inputs.size(), targets.size()) if targets is not None else print(inputs.size())]

# we use cuda to make the computation fast
# model = model.cuda()
# inputs, targets = inputs.cuda(), targets.cuda()

torch.Size([571, 16]) torch.Size([16])


[None]

### With SGD

In [21]:
stored = torch.load('to_plot/model_imdb_SGD.pt', map_location=lambda storage, loc: storage)

vocab_dim = len(dataset.TEXT.vocab)
model = AttentionGru(vocab_dim, embedding_dim=300, hidden_dim=32, output_dim=2, num_layers=2, d_rate=0.4)

if 'state_dict' in stored.keys():
    model.load_state_dict(stored['state_dict'])
else:
    model.load_state_dict(stored)
model.eval()

criterion = mean_smooth_crossentropy

Create the hessian computation module and compute eigenvalue density

In [22]:
eig_file_name = 'attentiongru_sgd'

In [25]:
try:
    density_eigen = np.load('Eigenvalues/'+eig_file_name+'_eigen.npy')
    density_weight = np.load('Eigenvalues/'+eig_file_name+'_weight.npy')

except:
    hessian_comp = hessian(model, criterion, data=(inputs, targets), cuda=False, model_name = model_name)
    density_eigen, density_weight = hessian_comp.density(iter=25, n_v=1)
    np.save('Eigenvalues/'+eig_file_name+'_eigen.npy', density_eigen)
    np.save('Eigenvalues/'+eig_file_name+'_weight.npy', density_weight)
    
get_esd_plot(density_eigen, density_weight,'plots/eig_'+eig_file_name+'.png')

Iter 0
Iter 1
Iter 2
Iter 3
Iter 4
Iter 5
Iter 6
Iter 7
Iter 8
Iter 9
Iter 10
Iter 11
Iter 12
Iter 13
Iter 14
Iter 15
Iter 16
Iter 17
Iter 18
Iter 19
Iter 20
Iter 21
Iter 22
Iter 23
Iter 24


  density_output[i, j] = np.sum(tmp_result * weights[i, :])
  return np.asarray(x, float)
  vmin, vmax = map(float, [vmin, vmax])


### With SAM

In [56]:
stored = torch.load('to_plot/model_imdb_SAM_rho????.pt', map_location=lambda storage, loc: storage)

vocab_dim = len(dataset.TEXT.vocab)
model = AttentionGru(vocab_dim, embedding_dim=300, hidden_dim=32, output_dim=2, num_layers=2, d_rate=0.4)

if 'state_dict' in stored.keys():
    model.load_state_dict(stored['state_dict'])
else:
    model.load_state_dict(stored)
model.eval()

criterion = mean_smooth_crossentropy

Create the hessian computation module and compute eigenvalue density

In [None]:
eig_file_name = 'attentiongru_sam'

Plot eigenvalue density and save

In [65]:
try:
    density_eigen = np.load('Eigenvalues/'+eig_file_name+'_eigen.npy')
    density_weight = np.load('Eigenvalues/'+eig_file_name+'_weight.npy')
except:
    hessian_comp = hessian(model, criterion, data=(inputs, targets), cuda=False, model_name = model_name)  
    density_eigen, density_weight = hessian_comp.density(iter=100, n_v=1)
    np.save('Eigenvalues/'+eig_file_name+'_eigen.npy', density_eigen)
    np.save('Eigenvalues/'+eig_file_name+'_weight.npy', density_weight)
    
get_esd_plot(density_eigen, density_weight,'plots/eig_'+eig_file_name+'.png')

# Graph Convolutional Network for Mutagenicity

## Loss landscape plot

### With ADAM

Generate surface file

In [None]:
# Run plot_surface.py for trained model
# !python plot_surface.py --model GCN --dataset Mutagenicity --x=-1:1:3 --y=-1:1:3 --model_file to_plot/model_gcn_ADAM.pt --dir_type weights --xnorm filter --xignore biasbn --ynorm filter --yignore biasbn --plot --batch_size=64 --loss_name smooth_crossentropy

Generate plots

In [78]:
surf_file = 'to_plot/model_gcn_ADAM.pt_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5_[-1.0,1.0,3]x[-1.0,1.0,3].h5'

plot_2d_contour(surf_file, 'train_loss', 0.1, 10, 0.5, False)

------------------------------------------------------------------
plot_2d_contour
------------------------------------------------------------------
len(xcoordinates): 3   len(ycoordinates): 3
[[3.08615565 1.17431903 1.04388905]
 [0.79135042 0.28873381 0.32784745]
 [0.44603446 0.33184609 0.39543769]]


  


### With SAM

Generate surface file

In [None]:
# Run plot_surface.py for trained model
# !python plot_surface.py --model GCN --dataset Mutagenicity --x=-1:1:2 --y=-1:1:2 --model_file to_plot/model_gcn_SAM_rho0.3.pt --dir_type weights --xnorm filter --xignore biasbn --ynorm filter --yignore biasbn --plot --batch_size=64 --loss_name smooth_crossentropy

Generate plots

In [None]:
surf_file = 'to_plot/model_cifar_halfSAM_rho05.pt_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5_[-1.0,1.0,5]x[-1.0,1.0,5].h5'

plot_2d_contour(surf_file, 'train_loss', 0.1, 10, 0.5, False)

## Eigenvalues of hessian

Load trained model and dataset

In [79]:
model_name = 'GCN'

In [80]:
# get dataset 
dataset = GraphDataset('Mutagenicity', 70, 64)
trainloader = dataset.train_loader

In [83]:
num_batches = 1

inputs = None
targets = None

for ind, data in enumerate(trainloader):
    if inputs is None:
        inputs = data
    elif inputs is not None and ind < num_batches:
        inputs = torch.cat((inputs, data), 0)
    else:
        break

[print(inputs.size(), targets.size()) if targets is not None else print(inputs.size())]

# we use cuda to make the computation fast
# model = model.cuda()
# inputs, targets = inputs.cuda(), targets.cuda()

(1698, 1698)


[None]

### With SGD

In [104]:
model = GCN(64, dataset.dataset.num_node_features, dataset.dataset.num_classes).to(device)
stored = torch.load('to_plot/model_gcn_ADAM.pt', map_location=lambda storage, loc: storage)

if 'state_dict' in stored.keys():
    model.load_state_dict(stored['state_dict'])
else:
    model.load_state_dict(stored)
model.eval()

criterion = mean_smooth_crossentropy

Create the hessian computation module and compute eigenvalue density

In [105]:
eig_file_name = 'gcn_adam'

In [106]:
try:
    density_eigen = np.load('Eigenvalues/'+eig_file_name+'_eigen.npy')
    density_weight = np.load('Eigenvalues/'+eig_file_name+'_weight.npy')
except:
    hessian_comp = hessian(model, criterion, data=(inputs, targets), cuda=False, model_name = model_name)
    density_eigen, density_weight = hessian_comp.density(iter=100, n_v=1)
    np.save('Eigenvalues/'+eig_file_name+'_eigen.npy', density_eigen)
    np.save('Eigenvalues/'+eig_file_name+'_weight.npy', density_weight)

get_esd_plot(density_eigen, density_weight,'plots/eig_'+eig_file_name+'.png')

### With SAM

In [99]:
stored = torch.load('to_plot/model_gcn_SAM_rho0.3.pt', map_location=lambda storage, loc: storage)
model = GCN(64, dataset.dataset.num_node_features, dataset.dataset.num_classes).to(device)

if 'state_dict' in stored.keys():
    model.load_state_dict(stored['state_dict'])
else:
    model.load_state_dict(stored)
model.eval()

criterion = mean_smooth_crossentropy

Create the hessian computation module and compute eigenvalue density

In [100]:
eig_file_name = 'gcn_sam'

In [102]:
try:
    density_eigen = np.load('Eigenvalues/'+eig_file_name+'_eigen.npy')
    density_weight = np.load('Eigenvalues/'+eig_file_name+'_weight.npy')
except:
    hessian_comp = hessian(model, criterion, data=(inputs, targets), cuda=False, model_name = model_name)
    density_eigen, density_weight = hessian_comp.density(iter=100, n_v=1)
    np.save('Eigenvalues/'+eig_file_name+'_eigen.npy', density_eigen)
    np.save('Eigenvalues/'+eig_file_name+'_weight.npy', density_weight)
    
get_esd_plot(density_eigen, density_weight,'plots/eig_'+eig_file_name+'.png')

# Old Eigenvalues of hessian

In [36]:
import numpy as np
import torch 
from torchvision import datasets, transforms
# from pytorchcv.model_provider import get_model as ptcv_get_model # model

import matplotlib.pyplot as plt

In [37]:
from my_pyhessian import hessian, utils# Hessian computation
from my_pyhessian.density_plot import get_esd_plot

In [41]:
import sys; sys.path.append("..")
from models.smooth_cross_entropy import mean_smooth_crossentropy
from models.wide_res_net import WideResNet
from models.attention_gru import AttentionGru
from models.gcn import GCN

from DatasetClass.cifar import Cifar
from DatasetClass.imdb import Imdb
from DatasetClass.TUD import GraphDataset

Load trained model and dataset

In [42]:
# model_name = 'WideResNet'
# model_name = 'AttentionGru'
model_name = 'GCN'

In [43]:
# get dataset 
if model_name == 'WideResNet':
    dataset = Cifar(0.3, 128, 2)
    trainloader, testloader = dataset.train, dataset.test
elif model_name == 'AttentionGru':
    dataset = Imdb(0.3, 16, 2, device)
    trainloader = dataset.train_iterator

elif model_name == 'GCN':
    dataset = GraphDataset('Mutagenicity', 70, 64)
    trainloader = dataset.train_loader

In [56]:
# stored = torch.load('../to_plot/model_cifar_SGD.pt', map_location=lambda storage, loc: storage)
# stored = torch.load('../to_plot/model_cifar_halfSAM_rho0.5.pt', map_location=lambda storage, loc: storage)
model = WideResNet(8, 2, 0.0, in_channels=3, labels=10)

model = GCN(64, dataset.dataset.num_node_features, dataset.dataset.num_classes).to(device)
stored = torch.load('../to_plot/model_gcn_ADAM.pt', map_location=lambda storage, loc: storage)


if 'state_dict' in stored.keys():
    model.load_state_dict(stored['state_dict'])
else:
    model.load_state_dict(stored)
model.eval()

criterion = mean_smooth_crossentropy



Extract batches of data for computation

In [50]:
num_batches = 1

inputs = None
targets = None

# FOR CIFAR
if model_name == 'WideResNet':
    for ind, (data, tar) in enumerate(trainloader):
        if inputs is None:
            inputs = data
            targets = tar
        elif inputs is not None and ind < num_batches:
            inputs = torch.cat((inputs, data), 0)
            targets = torch.cat((targets, tar))
        else:
            break

# FOR IMDB
elif model_name == 'AttentionGru':
    for ind, data in enumerate(trainloader):
        inp = data.text.to(device)
        tar = data.label.to(device).long()

        if inputs is None:
            inputs = data
            targets = tar
        elif inputs is not None and ind < num_batches:
            inputs = torch.cat((inputs, data), 0)
            targets = torch.cat((targets, tar))
        else:
            break

# FOR GCN
elif model_name == 'GCN':
    for ind, data in enumerate(trainloader):
        if inputs is None:
            inputs = data
        elif inputs is not None and ind < num_batches:
            inputs = torch.cat((inputs, data), 0)
        else:
            break

[print(inputs.size(), targets.size()) if targets is not None else print(inputs.size())]

# we use cuda to make the computation fast
# model = model.cuda()
# inputs, targets = inputs.cuda(), targets.cuda()

(1908, 1908)


[None]

Create the hessian computation module

In [58]:
hessian_comp = hessian(model, criterion, data=(inputs, targets), cuda=False, model_name = model_name)

  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Compute eigenvalue density

In [None]:
# CHANGE !!!
eig_file_name = 'gcn_sgd'

In [None]:
density_eigen, density_weight = hessian_comp.density(iter=100, n_v=1)
np.save('Eigenvalues/'+eig_file_name+'_eigen.npy', density_eigen)
np.save('Eigenvalues/'+eig_file_name+'_weight.npy', density_weight)

Plot eigenvalue density and save

In [65]:
density_eigen = np.load('Eigenvalues/'+eig_file_name+'_eigen.npy')
density_weight = np.load('Eigenvalues/'+eig_file_name+'_weight.npy')

get_esd_plot(density_eigen, density_weight,'plots/eig_'+eig_file_name+'.png')

In [66]:
# Compute the top eigenvalue.
top_eigenvalues, top_eigenvector = hessian_comp.eigenvalues(maxIter=10, top_n = 1)
print("The top Hessian eigenvalue of this model is %.4f"%top_eigenvalues[-1])

The top Hessian eigenvalue of this model is 15.2096
