# Phase2 3 SHAP & Path Exaction

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import models

import shap

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

folder_path = ''

global NUM_LAYER
NUM_LAYER = 10

In [None]:
model = models.resnet18(pretrained=True)
num_frts = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_frts, 10)
    )

model = model.to(device)
model.load_state_dict(torch.load(folder_path+'cifar10_resnet18.pt')) 
model = model.eval()

In [None]:
def get_layer_shap(background, image):
  #input
  test_input = shap.DeepExplainer(model, background)
  shap_input = np.asarray(test_input.shap_values(image, 1)[0]).squeeze()
  
  #conv1
  test_conv1 = shap.DeepExplainer((model, model.conv1), background)
  shap_conv1 = np.asarray(test_conv1.shap_values(image, 1)[0]).squeeze()

  #bn1
  test_bn1 = shap.DeepExplainer((model, model.bn1), background)
  shap_bn1 = np.asarray(test_bn1.shap_values(image, 1)[0]).squeeze()

  #maxpool
  test_maxpool = shap.DeepExplainer((model, model.maxpool), background)
  shap_maxpool = np.asarray(test_maxpool.shap_values(image, 1)[0]).squeeze()

  #layer1
  test_layer1 = shap.DeepExplainer((model, model.layer1), background)
  shap_layer1 = np.asarray(test_layer1.shap_values(image, 1)[0]).squeeze()

  #layer2
  test_layer2 = shap.DeepExplainer((model, model.layer2), background)
  shap_layer2 = np.asarray(test_layer2.shap_values(image, 1)[0]).squeeze()

  #layer3
  test_layer3 = shap.DeepExplainer((model, model.layer3), background)
  shap_layer3 = np.asarray(test_layer3.shap_values(image, 1)[0]).squeeze()

  #layer4
  test_layer4 = shap.DeepExplainer((model, model.layer4), background)
  shap_layer4 = np.asarray(test_layer4.shap_values(image, 1)[0]).squeeze()

  #avgpool
  test_avgpool = shap.DeepExplainer((model, model.avgpool), background)
  shap_avgpool = np.asarray(test_avgpool.shap_values(image, 1)[0]).squeeze()

  #fc
  test_fc = shap.DeepExplainer((model, model.fc), background)
  shap_fc = np.asarray(test_fc.shap_values(image, 1)[0]).squeeze()

  return shap_input, shap_conv1, shap_bn1, shap_maxpool, shap_layer1, shap_layer2, shap_layer3, shap_layer4, shap_avgpool, shap_fc

In [None]:
def get_layer_most(layer_shap):
  layer_most = layer_shap.copy()

  for i in range(len(layer_shap)):
    max = np.max(layer_most[i])
    layer_most[i] = np.where(layer_most[i] == max, 1, 0)

    if i == 0:
      print("Critical neuron: ", np.sum(layer_most[i]))

  return layer_most

In [None]:
def get_layer_quantile(layer_shap, alpha):
  layer_quantile = layer_shap.copy()

  for i in range(len(layer_shap)):
    quantile = np.quantile(layer_quantile[i], alpha)
    layer_quantile[i] = np.where(layer_quantile[i] < quantile, 0, 1)

    if i == 0:
      print("Critical neuron: ", np.sum(layer_quantile[i]))

  return layer_quantile

In [None]:
def concatenate_shap_layer(layers_shap):
  all_layer = np.empty(NUM_LAYER)

  for i in range(len(layers_shap)):
    f_layer = np.asarray(torch.flatten(torch.Tensor(layers_shap[i]), start_dim=1))
    if i == 0:
      all_layer = f_layer
    else:
      all_layer = np.concatenate((all_layer, f_layer), axis=1)
  
  return all_layer

In [None]:
def concatenate_most_layer(layers_shap):
  all_layer = np.empty(NUM_LAYER)

  for i in range(len(layers_shap)):
    f_layer = np.asarray(torch.flatten(torch.Tensor(get_layer_most(layers_shap[i])), start_dim=1))
    if i == 0:
      all_layer = f_layer
    else:
      all_layer = np.concatenate((all_layer, f_layer), axis=1)
  
  all_layer = all_layer.astype('bool')
  
  return all_layer

In [None]:
def concatenate_path_layer(alpha, layers_shap):
  all_layer = np.empty(NUM_LAYER)

  for i in range(len(layers_shap)):
    f_layer = np.asarray(torch.flatten(torch.Tensor(get_layer_quantile(layers_shap[i], alpha)), start_dim=1))
    if i == 0:
      all_layer = f_layer
    else:
      all_layer = np.concatenate((all_layer, f_layer), axis=1)
  
  all_layer = all_layer.astype('bool')

  print(alpha, " All critical neuron: ", np.sum(all_layer)/len(layers_shap[0]))

  return all_layer

In [None]:
def generate_attack_layer(attack, num_background, alpha=[], approach=''):
  print(attack)
  images = np.load(folder_path + 'images_'+str(attack)+'.npy')
  labels = np.load(folder_path + 'labels_'+str(attack)+'.npy')
  images = torch.tensor(images)
  labels = torch.tensor(labels)

  test_background = images[:num_background].to(device)
  test_images = images[num_background:].to(device)
  print(approach)
  
  layer = get_layer_shap(test_background, test_images)
  
  if approach == 'shap':
    shap_layer = concatenate_shap_layer(layer)
    np.save(folder_path + str(attack)+'_layer_shap.npy', shap_layer)
    np.save(folder_path + str(attack)+'_pred_shap.npy', labels[num_background:])

  elif approach == 'most':
    most_layer = concatenate_most_layer(layer)
    np.save(folder_path + str(attack)+'_layer_most.npy', most_layer)
    np.save(folder_path + str(attack)+'_pred_most.npy', labels[num_background:])
  
  elif approach == 'path':
    for test_alpha in alpha:
      path_layer = concatenate_path_layer(test_alpha, layer)
      np.save(folder_path + str(test_alpha)+ str(attack)+'_layer_path.npy', path_layer)
      np.save(folder_path + str(test_alpha)+ str(attack)+'_pred_path.npy', labels[num_background:])
  
  elif approach =='multi':
    print("shap")
    shap_layer = concatenate_shap_layer(layer)
    np.save(folder_path + str(attack)+'_layer_shap.npy', shap_layer)
    np.save(folder_path + str(attack)+'_pred_shap.npy', labels[num_background:])

    print("most")
    most_layer = concatenate_most_layer(layer)
    np.save(folder_path + str(attack)+'_layer_most.npy', most_layer)
    np.save(folder_path + str(attack)+'_pred_most.npy', labels[num_background:])

    for test_alpha in alpha:
      print(test_alpha)
      path_layer = concatenate_path_layer(test_alpha, layer)
      np.save(folder_path + str(test_alpha)+ str(attack)+'_layer_path.npy', path_layer)
      np.save(folder_path + str(test_alpha)+ str(attack)+'_pred_path.npy', labels[num_background:])

  else:
    print("Error approach")

In [None]:
def generate_attack_single(attack, num_background, layer='input'):
  print(attack)
  images = np.load(folder_path + 'images_'+str(attack)+'.npy')
  labels = np.load(folder_path + 'labels_'+str(attack)+'.npy')
  images = torch.tensor(images)
  labels = torch.tensor(labels)
  
  test_background = images[:num_background].to(device)
  test_images = images[num_background:].to(device)
  print(layer)
  
  if layer == 'input':
    test_layer = shap.DeepExplainer(model, test_background)
  elif layer == 'layer4':
    test_layer = shap.DeepExplainer((model, model.layer4), test_background)
  elif layer == 'avgpool':
    test_layer = shap.DeepExplainer((model, model.avgpool), test_background)
  
  shap_layer = np.asarray(test_layer.shap_values(test_images, 1)[0]).squeeze()

  np.save(folder_path + str(attack)+'_'+str(layer)+'_shap.npy', shap_layer)
  np.save(folder_path + str(attack)+'_'+str(layer)+'_pred.npy', labels[num_background:])

In [None]:
attack_name = ['pgd','deepfool','fgsm']

for attack in attack_name:
  generate_attack_layer(attack, 100, [0.05, 0.25, 0.5, 0.75, 0.95], approach = 'multi')

In [None]:
attack_name = ['pgd','deepfool','fgsm']

for attack in attack_name:
  generate_attack_single(attack, 100, layer='input')

In [None]:
attack_name = ['pgd','deepfool','fgsm']

for attack in attack_name:
  generate_attack_single(attack, 100, layer='avgpool')

In [None]:
attack_name = ['pgd','deepfool','fgsm']

for attack in attack_name:
  generate_attack_single(attack, 100, layer='layer4')