## Calculate Instability values and UNR from saved instability masks

In [1]:
import torch
import numpy as np
import matplotlib.pyplot as  plt
import pickle
import io
import os
os.chdir('../')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
path = 'VERIFY/CNN-B Example/Baseline/instability_masks.pkl'

In [3]:
class CPU_Unpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module == 'torch.storage' and name == '_load_from_bytes':
            return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
        else: return super().find_class(module, name)

In [4]:
with open(path, 'rb') as handle:
    masks = CPU_Unpickler(handle).load()
print(len(masks))
print(masks.keys())

456
dict_keys([0, 1, 2, 3, 4, 6, 8, 9, 10, 14, 15, 17, 18, 19, 29, 30, 34, 39, 40, 43, 44, 46, 48, 49, 50, 51, 54, 55, 62, 64, 66, 71, 72, 73, 75, 77, 78, 80, 83, 84, 88, 89, 90, 93, 94, 96, 97, 99, 101, 102, 103, 107, 108, 111, 112, 113, 114, 116, 117, 120, 122, 124, 127, 130, 132, 134, 135, 136, 138, 140, 142, 144, 146, 148, 149, 150, 152, 154, 155, 156, 159, 160, 161, 163, 166, 169, 170, 172, 173, 174, 179, 180, 181, 182, 185, 191, 193, 194, 197, 199, 200, 202, 204, 205, 206, 207, 208, 209, 216, 217, 219, 220, 224, 225, 230, 234, 236, 241, 243, 246, 248, 251, 259, 260, 261, 265, 267, 268, 272, 274, 276, 280, 281, 282, 291, 292, 296, 298, 299, 300, 301, 304, 305, 307, 310, 312, 315, 316, 317, 318, 322, 325, 326, 327, 328, 329, 333, 334, 335, 336, 337, 338, 341, 342, 344, 347, 349, 350, 351, 358, 359, 360, 361, 365, 369, 375, 379, 380, 381, 386, 389, 390, 392, 395, 396, 404, 407, 408, 409, 410, 411, 413, 414, 419, 420, 424, 428, 431, 438, 440, 443, 445, 448, 449, 450, 451, 452, 453, 4

In [5]:
#remove empty masks

new_masks = {}
for (k,(id, m)) in masks.items():
    if m != []:
        new_masks[k] = (id, m)
print(len(new_masks))
print(new_masks.keys())
masks = new_masks

456
dict_keys([0, 1, 2, 3, 4, 6, 8, 9, 10, 14, 15, 17, 18, 19, 29, 30, 34, 39, 40, 43, 44, 46, 48, 49, 50, 51, 54, 55, 62, 64, 66, 71, 72, 73, 75, 77, 78, 80, 83, 84, 88, 89, 90, 93, 94, 96, 97, 99, 101, 102, 103, 107, 108, 111, 112, 113, 114, 116, 117, 120, 122, 124, 127, 130, 132, 134, 135, 136, 138, 140, 142, 144, 146, 148, 149, 150, 152, 154, 155, 156, 159, 160, 161, 163, 166, 169, 170, 172, 173, 174, 179, 180, 181, 182, 185, 191, 193, 194, 197, 199, 200, 202, 204, 205, 206, 207, 208, 209, 216, 217, 219, 220, 224, 225, 230, 234, 236, 241, 243, 246, 248, 251, 259, 260, 261, 265, 267, 268, 272, 274, 276, 280, 281, 282, 291, 292, 296, 298, 299, 300, 301, 304, 305, 307, 310, 312, 315, 316, 317, 318, 322, 325, 326, 327, 328, 329, 333, 334, 335, 336, 337, 338, 341, 342, 344, 347, 349, 350, 351, 358, 359, 360, 361, 365, 369, 375, 379, 380, 381, 386, 389, 390, 392, 395, 396, 404, 407, 408, 409, 410, 411, 413, 414, 419, 420, 424, 428, 431, 438, 440, 443, 445, 448, 449, 450, 451, 452, 453, 4

In [6]:
# Calculate UNR

ratio = 0
counter = 0;
for k, (i, mask) in masks.items():
  counter += 1
  temp_ratio = sum([layer.sum() for layer in mask]) / sum([layer.numel() for layer in mask])
  print(f'idx:img {k}:{i} UNR ratio: {temp_ratio*100:.1f}%')
  ratio += temp_ratio
ratio = ratio/counter
print(f'Total UNR ratio: {ratio*100:.2f}%')

idx:img 0:0 UNR ratio: 14.8%
idx:img 1:1 UNR ratio: 12.2%
idx:img 2:2 UNR ratio: 14.5%
idx:img 3:3 UNR ratio: 14.4%
idx:img 4:4 UNR ratio: 16.0%
idx:img 6:6 UNR ratio: 12.1%
idx:img 8:8 UNR ratio: 16.1%
idx:img 9:9 UNR ratio: 12.9%
idx:img 10:10 UNR ratio: 17.4%
idx:img 14:14 UNR ratio: 13.3%
idx:img 15:15 UNR ratio: 12.7%
idx:img 17:17 UNR ratio: 14.5%
idx:img 18:18 UNR ratio: 12.7%
idx:img 19:19 UNR ratio: 14.9%
idx:img 29:29 UNR ratio: 15.9%
idx:img 30:30 UNR ratio: 18.4%
idx:img 34:34 UNR ratio: 16.2%
idx:img 39:39 UNR ratio: 14.2%
idx:img 40:40 UNR ratio: 11.9%
idx:img 43:43 UNR ratio: 21.3%
idx:img 44:44 UNR ratio: 13.2%
idx:img 46:46 UNR ratio: 16.5%
idx:img 48:48 UNR ratio: 18.0%
idx:img 49:49 UNR ratio: 20.2%
idx:img 50:50 UNR ratio: 13.5%
idx:img 51:51 UNR ratio: 18.5%
idx:img 54:54 UNR ratio: 13.1%
idx:img 55:55 UNR ratio: 21.9%
idx:img 62:62 UNR ratio: 11.8%
idx:img 64:64 UNR ratio: 18.8%
idx:img 66:66 UNR ratio: 15.7%
idx:img 71:71 UNR ratio: 17.4%
idx:img 72:72 UNR ratio:

In [7]:
# sum the amoutn of times a neuron was unstable over all inputs for every neuron
score = [torch.zeros_like(masks[1][1][i]) for i in range(len(masks[1][1]))]
for _, (_ , mask_list) in masks.items():
    score = [layer+mask for layer, mask in zip(score, mask_list)]
score

[tensor([[ 7., 30., 35.,  ..., 19., 25., 12.]]),
 tensor([[121., 107.,  97.,  ...,  86.,  84.,  44.]]),
 tensor([[115., 227., 236., 231.,  91., 246.,   0., 136., 163., 254., 230., 185.,
          184., 237., 122., 264., 214., 157., 224., 221., 236., 148.,  48.,  55.,
          233., 212., 207., 302., 199., 166., 201., 279., 165., 244., 157., 147.,
          210., 167., 217., 199., 136., 113., 170., 255.,  82.,   0., 164., 181.,
           43., 135.,  77., 161., 160., 135., 237., 130., 175., 227., 123., 214.,
          233., 159., 137., 182., 140., 238., 218., 217., 225., 240., 184., 228.,
          204., 188., 127., 175., 187., 229., 252., 183., 194., 231., 175., 234.,
          138., 162., 149., 198., 223., 229., 260., 288., 212., 187.,  91., 214.,
          276., 224., 231., 139., 221., 165.,  94., 237., 187., 232., 157., 235.,
          214., 245., 280., 185., 192., 170., 165., 164., 136., 201., 179., 167.,
          285., 190., 211., 211.,  59., 193., 270., 212., 207., 181., 137., 

In [8]:
# depends on architecture, maybe it's necessary to remove one list dimension
score = [l[0] for l in score]

In [9]:
score

[tensor([ 7., 30., 35.,  ..., 19., 25., 12.]),
 tensor([121., 107.,  97.,  ...,  86.,  84.,  44.]),
 tensor([115., 227., 236., 231.,  91., 246.,   0., 136., 163., 254., 230., 185.,
         184., 237., 122., 264., 214., 157., 224., 221., 236., 148.,  48.,  55.,
         233., 212., 207., 302., 199., 166., 201., 279., 165., 244., 157., 147.,
         210., 167., 217., 199., 136., 113., 170., 255.,  82.,   0., 164., 181.,
          43., 135.,  77., 161., 160., 135., 237., 130., 175., 227., 123., 214.,
         233., 159., 137., 182., 140., 238., 218., 217., 225., 240., 184., 228.,
         204., 188., 127., 175., 187., 229., 252., 183., 194., 231., 175., 234.,
         138., 162., 149., 198., 223., 229., 260., 288., 212., 187.,  91., 214.,
         276., 224., 231., 139., 221., 165.,  94., 237., 187., 232., 157., 235.,
         214., 245., 280., 185., 192., 170., 165., 164., 136., 201., 179., 167.,
         285., 190., 211., 211.,  59., 193., 270., 212., 207., 181., 137., 208.,
         

In [12]:
# check dimensions of mask
[l.size() for l in score]

[torch.Size([32, 16, 16]), torch.Size([128, 8, 8]), torch.Size([250])]

In [11]:
# reshape mask to fit dimensions of network, depends on the network architecture
score = [score[0].reshape(32, 16, 16), score[1].reshape(128, 8, 8), score[2].reshape(250)]

In [14]:
# save instability indicator values
torch.save(score, "CNN-B_CIFAR10/Pretrained/Own Scores/TEST_score_instability.pth")

In [13]:
[layer.mean() for layer in score]

[tensor(79.2579), tensor(61.7485), tensor(188.1160)]