# ResNet50

In [None]:
import torchvision.models as models
import torch

model = models.resnet50(weights=models.resnet.ResNet50_Weights.DEFAULT)
model.eval()

batch_size = 1
input_shape = (3, 224, 224)

x = torch.randn((batch_size,)+input_shape).to('cpu')
model(x)

sd = model.state_dict()

torch.save(sd, 'resnet50_orig.pth')

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /home/danielg/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 60.7MB/s]


## Manual Perm

In [117]:
import torch
sd_perm = torch.load('resnet50_orig.pth')

# for k, v in sd_perm.items():
#     if 'conv' in k:
#         print(k, v.shape)

bn_varnames = ['weight', 'bias', 'running_mean', 'running_var']

conv0 = 'layer1.0.conv1.weight'
bn0 = 'layer1.0.bn1'
conv1 = 'layer1.0.conv2.weight'
conv2 = 'layer1.0.conv3.weight'

n_c_1, _, _, _ = sd_perm[conv0].shape
n_c_2, _, _, _ = sd_perm[conv1].shape

idxs_1 = torch.flip(torch.arange(0, n_c_1, dtype=int), dims=[0])
idxs_2 = torch.flip(torch.arange(0, n_c_2, dtype=int), dims=[0])

sd_perm[conv0] = sd_perm[conv0][idxs_1,...]
for varname in bn_varnames:
    sd_perm[bn0+'.'+varname] = sd_perm[bn0+'.'+varname][idxs_1, ...]

sd_perm[conv1] = sd_perm[conv1][:,idxs_1,...]

# sd_perm[conv1] = sd_perm[conv1][idxs_2,...]
# sd_perm[conv2] = sd_perm[conv2][:,idxs_2,...]

torch.save(sd_perm, 'resnet50_perm.pth')

## Check Result

In [38]:
for k, v in sd.items():
    # if 'conv' in k or 'bn' in k:
        print(k, v.shape)

conv1.weight torch.Size([64, 3, 7, 7])
bn1.weight torch.Size([64])
bn1.bias torch.Size([64])
bn1.running_mean torch.Size([64])
bn1.running_var torch.Size([64])
bn1.num_batches_tracked torch.Size([])
layer1.0.conv1.weight torch.Size([64, 64, 1, 1])
layer1.0.bn1.weight torch.Size([64])
layer1.0.bn1.bias torch.Size([64])
layer1.0.bn1.running_mean torch.Size([64])
layer1.0.bn1.running_var torch.Size([64])
layer1.0.bn1.num_batches_tracked torch.Size([])
layer1.0.conv2.weight torch.Size([64, 64, 3, 3])
layer1.0.bn2.weight torch.Size([64])
layer1.0.bn2.bias torch.Size([64])
layer1.0.bn2.running_mean torch.Size([64])
layer1.0.bn2.running_var torch.Size([64])
layer1.0.bn2.num_batches_tracked torch.Size([])
layer1.0.conv3.weight torch.Size([256, 64, 1, 1])
layer1.0.bn3.weight torch.Size([256])
layer1.0.bn3.bias torch.Size([256])
layer1.0.bn3.running_mean torch.Size([256])
layer1.0.bn3.running_var torch.Size([256])
layer1.0.bn3.num_batches_tracked torch.Size([])
layer1.0.downsample.0.weight torch

In [39]:
import torch
sd = torch.load('resnet50_orig.pth')
sd_perm = torch.load('resnet50_perm.pth')

for k, v in sd.items():
    if 'conv' in k or 'bn' in k:
        print(k, torch.allclose(v, sd_perm[k]))

conv1.weight True
bn1.weight True
bn1.bias True
bn1.running_mean True
bn1.running_var True
bn1.num_batches_tracked True
layer1.0.conv1.weight False
layer1.0.bn1.weight False
layer1.0.bn1.bias False
layer1.0.bn1.running_mean False
layer1.0.bn1.running_var False
layer1.0.bn1.num_batches_tracked True
layer1.0.conv2.weight False
layer1.0.bn2.weight True
layer1.0.bn2.bias True
layer1.0.bn2.running_mean True
layer1.0.bn2.running_var True
layer1.0.bn2.num_batches_tracked True
layer1.0.conv3.weight True
layer1.0.bn3.weight True
layer1.0.bn3.bias True
layer1.0.bn3.running_mean True
layer1.0.bn3.running_var True
layer1.0.bn3.num_batches_tracked True
layer1.1.conv1.weight True
layer1.1.bn1.weight True
layer1.1.bn1.bias True
layer1.1.bn1.running_mean True
layer1.1.bn1.running_var True
layer1.1.bn1.num_batches_tracked True
layer1.1.conv2.weight True
layer1.1.bn2.weight True
layer1.1.bn2.bias True
layer1.1.bn2.running_mean True
layer1.1.bn2.running_var True
layer1.1.bn2.num_batches_tracked True
laye

In [118]:
import torchvision.models as models
import torch
import torch.nn as nn
import torchvision.models as models
import networkx as nx
import matplotlib.pyplot as plt
from typing import Tuple, Dict, Any, Optional, List
import numpy as np

sd = torch.load('resnet50_orig.pth')
sd_perm = torch.load('resnet50_perm.pth')

model = models.resnet50(weights=None)
model.eval()

batch_size = 1
input_shape = (3, 224, 224)

x = torch.randn((batch_size,)+input_shape).to('cpu')

model.load_state_dict(sd)
output_orig = model(x)

model.load_state_dict(sd_perm)
output_perm = model(x)

np.allclose(output_orig.detach().numpy(), output_perm.detach().numpy(), rtol=1e-6, atol=1e-6)

True

In [30]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

# ResNet101

In [1]:
import torchvision.models as models
import torch
import torch.nn as nn
import torchvision.models as models
import networkx as nx
import matplotlib.pyplot as plt
from typing import Tuple, Dict, Any, Optional, List
import numpy as np

model = models.resnet101(weights=None)

model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

# VGG11

In [None]:
import torchvision.models as models
import torch
import torch.nn as nn
import torchvision.models as models
import networkx as nx
import matplotlib.pyplot as plt
from typing import Tuple, Dict, Any, Optional, List
import numpy as np

model = models.vgg11(weights=None, num_classes=10)
model.eval()

batch_size = 1
input_shape = (3, 224, 224)

from torchview import draw_graph
x = torch.randn((batch_size,)+input_shape).to('cpu')
model(x)

sd = model.state_dict()

torch.save(sd, 'vgg11_orig.pth')

Downloading: "https://download.pytorch.org/models/vgg11-8a719046.pth" to /home/danielg/.cache/torch/hub/checkpoints/vgg11-8a719046.pth
100%|██████████| 507M/507M [00:07<00:00, 68.3MB/s] 


In [5]:
sd_orig_path = '/home/danielg/danigil/Maleficnet_Reproduction/maleficnet/checkpoints/vgg11_cifar10_cerber_model.pt'

In [6]:
import torch
sd = torch.load(sd_orig_path)

In [7]:
for k, v in sd.items():
    print(k, v.shape)

model.features.0.weight torch.Size([64, 3, 3, 3])
model.features.0.bias torch.Size([64])
model.features.3.weight torch.Size([128, 64, 3, 3])
model.features.3.bias torch.Size([128])
model.features.6.weight torch.Size([256, 128, 3, 3])
model.features.6.bias torch.Size([256])
model.features.8.weight torch.Size([256, 256, 3, 3])
model.features.8.bias torch.Size([256])
model.features.11.weight torch.Size([512, 256, 3, 3])
model.features.11.bias torch.Size([512])
model.features.13.weight torch.Size([512, 512, 3, 3])
model.features.13.bias torch.Size([512])
model.features.16.weight torch.Size([512, 512, 3, 3])
model.features.16.bias torch.Size([512])
model.features.18.weight torch.Size([512, 512, 3, 3])
model.features.18.bias torch.Size([512])
model.classifier.0.weight torch.Size([4096, 25088])
model.classifier.0.bias torch.Size([4096])
model.classifier.3.weight torch.Size([4096, 4096])
model.classifier.3.bias torch.Size([4096])
model.classifier.6.weight torch.Size([10, 4096])
model.classifie

## Manual Perm

In [4]:
def permute_layers(sd, layer1, layer2):
    layer1_w = f'{layer1}.weight'
    layer2_w = f'{layer2}.weight'
    
    layer1_b = f'{layer1}.bias'

    n_c_1 = sd[layer1_w].shape[0]
    idxs_1 = torch.flip(torch.arange(0, n_c_1, dtype=int), dims=[0])

    sd[layer1_w] = sd[layer1_w][idxs_1,...]
    sd[layer1_b] = sd[layer1_b][idxs_1,...]
    sd[layer2_w] = sd[layer2_w][:,idxs_1,...]

    return

In [9]:
import torch
sd_perm = torch.load(sd_orig_path)

features = [0, 3, 6, 8, 11, 13, 16, 18]
for ftr1, ftr2 in zip(features, features[1:]):
    layer1 = f'model.features.{ftr1}'
    layer2 = f'model.features.{ftr2}'
    permute_layers(sd_perm, layer1, layer2)

mlps = [0,3,6]
for mlp1, mlp2 in zip(mlps, mlps[1:]):
    layer1 = f'model.classifier.{mlp1}'
    layer2 = f'model.classifier.{mlp2}'
    permute_layers(sd_perm, layer1, layer2)

sd_perm_path = sd_orig_path.replace('.pt', '_perm.pt')
torch.save(sd_perm, sd_perm_path)

In [112]:
import torchvision.models as models
import torch
import torch.nn as nn
import torchvision.models as models
import networkx as nx
import matplotlib.pyplot as plt
from typing import Tuple, Dict, Any, Optional, List
import numpy as np

sd = torch.load('vgg11_orig.pth')
sd_perm = torch.load('vgg11_perm.pth')

model = models.vgg11(weights=None)
model.eval()

batch_size = 1
input_shape = (3, 224, 224)

x = torch.randn((batch_size,)+input_shape).to('cpu')

model.load_state_dict(sd)
output_orig = model(x)

model.load_state_dict(sd_perm)
output_perm = model(x)

np.allclose(output_orig.detach().numpy(), output_perm.detach().numpy(), rtol=1e-6, atol=1e-6)
# print(torch.allclose(output_orig, output_perm))

True

In [113]:
diff = np.abs(output_orig.detach().numpy() - output_perm.detach().numpy())

print("Max absolute difference:", diff.max())
print("Mean absolute difference:", diff.mean())

Max absolute difference: 1.1920929e-06
Mean absolute difference: 2.9355567e-07


In [71]:
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 

# DenseNet121

In [1]:
norm_varnames = ['weight', 'bias', 'running_mean', 'running_var']

def get_perm_idxs(n_c):
    return torch.flip(torch.arange(0, n_c, dtype=int), dims=[0])

def print_sd(sd):
    for k, v in sd.items():
        print(k, v.shape)
    return

def check(model, sd, sd_perm, batch_size=1, input_shape=(3, 224, 224)):
    model.eval()
    model = model.to('cpu')

    x = torch.randn((batch_size,)+input_shape).to('cpu')

    model.load_state_dict(sd)
    output_orig = model(x)

    model.load_state_dict(sd_perm)
    output_perm = model(x)

    isclose = np.allclose(output_orig.detach().numpy(), output_perm.detach().numpy(), rtol=1e-6, atol=1e-6)
    print("Allclose:", isclose)

    diff = np.abs(output_orig.detach().numpy() - output_perm.detach().numpy())
    print("Max absolute difference:", diff.max())
    print("Mean absolute difference:", diff.mean())

    return

def permute_layers(sd, layer1, layer2=None, bn_layer=None):
    layer1_w = f'{layer1}.weight'
    layer1_b = f'{layer1}.bias'
    

    n_c_1 = sd[layer1_w].shape[0]
    idxs_1 = torch.flip(torch.arange(0, n_c_1, dtype=int), dims=[0])

    sd[layer1_w] = sd[layer1_w][idxs_1,...]
    if layer1_b in sd:
        sd[layer1_b] = sd[layer1_b][idxs_1,...]

    if bn_layer is not None:
        bn_layer = f'{bn_layer}.'
        bn_varnames = ['weight', 'bias', 'running_mean', 'running_var']
        for varname in bn_varnames:
            # if bn_layer+varname in sd:
            sd[bn_layer+varname] = sd[bn_layer+varname][idxs_1, ...]
    if layer2 is not None:
        layer2_w = f'{layer2}.weight'
        sd[layer2_w] = sd[layer2_w][:,idxs_1,...]

    return

def permute_denselayer(sd, layer):
    norm1 = f'{layer}.norm1'
    conv1 = f'{layer}.conv1'
    norm2 = f'{layer}.norm2'
    conv2 = f'{layer}.conv2'

    # n_c_norm1 = sd[norm1+'.weight'].shape[0]
    # idxs_norm1 = get_perm_idxs(n_c_norm1)

    # for varname in norm_varnames:
    #     sd[norm1+'.'+varname] = sd[norm1+'.'+varname][idxs_norm1, ...]

    # sd[conv1+'.weight'] = sd[conv1+'.weight'][:, idxs_norm1, ...]

    permute_layers(sd, conv1, conv2, bn_layer=norm2)

In [20]:
import torchvision.models as models
import torch
import torch.nn as nn
import torchvision.models as models
import networkx as nx
import matplotlib.pyplot as plt
from typing import Tuple, Dict, Any, Optional, List
import numpy as np

model = models.densenet121(weights=None)
num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, 10)

model.eval()

batch_size = 1
input_shape = (3, 224, 224)

x = torch.randn((batch_size,)+input_shape).to('cpu')
model(x)

sd = torch.load('/home/ran/daniel/NeuPerm/mymalefic/checkpoints/densenet121_cifar10_stuxnet_model.pt')
# sd = model.state_dict()
sd_orig_path = 'densenet121_orig_mal.pth'
torch.save(sd, sd_orig_path)

  sd = torch.load('/home/ran/daniel/NeuPerm/mymalefic/checkpoints/densenet121_cifar10_stuxnet_model.pt')


In [None]:
import torch
sd_perm = torch.load(sd_orig_path)

# layer1 = 'features.denseblock1.denselayer1.conv1'
# bn1 = 'features.denseblock1.denselayer1.norm2'
# layer2 = 'features.denseblock1.denselayer1.conv2'

# permute_layers(sd_perm, layer1, bn_layer=bn1)

# permute_denselayer(sd_perm, 'features.denseblock1.denselayer1')

denselayer_names = set(['.'.join(k.split('.')[0:3]) for k in sd_perm.keys() if 'denselayer' in k])
for layer in denselayer_names:
    permute_denselayer(sd_perm, layer)

sd_perm_path = sd_orig_path.replace('orig', 'perm')
torch.save(sd_perm, sd_perm_path)

  sd_perm = torch.load(sd_orig_path)


In [24]:
import collections
import torchvision.models as models
import torch
import torch.nn as nn
import torchvision.models as models
import networkx as nx
import matplotlib.pyplot as plt
from typing import Tuple, Dict, Any, Optional, List
import numpy as np

sd = torch.load(sd_orig_path)
sd_perm = torch.load(sd_perm_path)

sd_new = collections.OrderedDict()
for k in sd.keys():
    sd_new[k.replace('model.', '')] = sd[k]

sd_perm_new = collections.OrderedDict()
for k in sd_perm.keys():
    sd_perm_new[k.replace('model.', '')] = sd_perm[k]

sd = sd_new
sd_perm = sd_perm_new
    

# model = models.densenet121(weights=None)

check(model, sd, sd_perm)

  sd = torch.load(sd_orig_path)
  sd_perm = torch.load(sd_perm_path)


Allclose: False
Max absolute difference: 1.9073486e-05
Mean absolute difference: 5.2452087e-06


In [10]:
print_sd(sd_perm)

features.conv0.weight torch.Size([64, 3, 7, 7])
features.norm0.weight torch.Size([64])
features.norm0.bias torch.Size([64])
features.norm0.running_mean torch.Size([64])
features.norm0.running_var torch.Size([64])
features.norm0.num_batches_tracked torch.Size([])
features.denseblock1.denselayer1.norm1.weight torch.Size([64])
features.denseblock1.denselayer1.norm1.bias torch.Size([64])
features.denseblock1.denselayer1.norm1.running_mean torch.Size([64])
features.denseblock1.denselayer1.norm1.running_var torch.Size([64])
features.denseblock1.denselayer1.norm1.num_batches_tracked torch.Size([])
features.denseblock1.denselayer1.conv1.weight torch.Size([128, 64, 1, 1])
features.denseblock1.denselayer1.norm2.weight torch.Size([128])
features.denseblock1.denselayer1.norm2.bias torch.Size([128])
features.denseblock1.denselayer1.norm2.running_mean torch.Size([128])
features.denseblock1.denselayer1.norm2.running_var torch.Size([128])
features.denseblock1.denselayer1.norm2.num_batches_tracked torc

In [11]:
model

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu