# Phase4 Path Astraction

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

import torch
import torch.nn as nn
import torchvision.utils
from torchvision import models

import torchattacks
import shap
import sklearn
from sklearn import utils

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

folder_path = ''

In [None]:
model = models.resnet18(pretrained=True)
num_frts = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_frts, 10)
    )

model = model.to(device)
model.load_state_dict(torch.load(folder_path+'cifar10_resnet18.pt')) 
model = model.eval()

In [None]:
def load_concatenate_data():
  data_deepfool = np.load(folder_path + 'images_deepfool.npy')
  label_deepfool = np.load(folder_path + 'labels_deepfool.npy')
  data_fgsm = np.load(folder_path + 'images_fgsm.npy')
  label_fgsm = np.load(folder_path + 'labels_fgsm.npy')
  data_pgd = np.load(folder_path + 'images_pgd.npy')
  label_pgd = np.load(folder_path + 'labels_pgd.npy')
  
  
  print(data_deepfool.shape, label_deepfool.shape)
  print(data_fgsm.shape, label_fgsm.shape)
  print(data_pgd.shape, label_pgd.shape)
  
  images = np.concatenate((data_deepfool, data_fgsm, data_pgd))
  labels = np.concatenate((label_deepfool, label_fgsm, label_pgd))
  images, labels = utils.shuffle(images, labels)

  images = torch.tensor(images)
  labels = torch.tensor(labels)
  print(images.shape, labels.shape)

  return images, labels

In [None]:
def get_layer_shap(background, image):
  #input
  test_input = shap.DeepExplainer(model, background)
  shap_input = np.asarray(test_input.shap_values(image, 1)[0]).squeeze()
  
  #conv1
  test_conv1 = shap.DeepExplainer((model, model.conv1), background)
  shap_conv1 = np.asarray(test_conv1.shap_values(image, 1)[0]).squeeze()

  #bn1
  test_bn1 = shap.DeepExplainer((model, model.bn1), background)
  shap_bn1 = np.asarray(test_bn1.shap_values(image, 1)[0]).squeeze()

  #maxpool
  test_maxpool = shap.DeepExplainer((model, model.maxpool), background)
  shap_maxpool = np.asarray(test_maxpool.shap_values(image, 1)[0]).squeeze()

  #layer1
  test_layer1 = shap.DeepExplainer((model, model.layer1), background)
  shap_layer1 = np.asarray(test_layer1.shap_values(image, 1)[0]).squeeze()

  #layer2
  test_layer2 = shap.DeepExplainer((model, model.layer2), background)
  shap_layer2 = np.asarray(test_layer2.shap_values(image, 1)[0]).squeeze()

  #layer3
  test_layer3 = shap.DeepExplainer((model, model.layer3), background)
  shap_layer3 = np.asarray(test_layer3.shap_values(image, 1)[0]).squeeze()

  #layer4
  test_layer4 = shap.DeepExplainer((model, model.layer4), background)
  shap_layer4 = np.asarray(test_layer4.shap_values(image, 1)[0]).squeeze()

  #avgpool
  test_avgpool = shap.DeepExplainer((model, model.avgpool), background)
  shap_avgpool = np.asarray(test_avgpool.shap_values(image, 1)[0]).squeeze()

  #fc
  test_fc = shap.DeepExplainer((model, model.fc), background)
  shap_fc = np.asarray(test_fc.shap_values(image, 1)[0]).squeeze()

  return shap_input, shap_conv1, shap_bn1, shap_maxpool, shap_layer1, shap_layer2, shap_layer3, shap_layer4, shap_avgpool, shap_fc

In [None]:
def get_layer_quantile(layer_shap, alpha):
  layer_quantile = layer_shap

  for i in range(len(layer_shap)):
    quantile = np.quantile(layer_quantile[i], alpha)
    layer_quantile[i] = np.where(layer_quantile[i] < quantile, 0, 1)

  return layer_quantile

In [None]:
# normal / attack
# compute each neuron weight: how many 1 in all
def get_layer_merge(layer_shap, label):
  layer_quantile = layer_shap

  # normal
  normal_index = np.where(label == 0)
  normal_weight = np.sum(layer_quantile[normal_index], axis=0) / len(normal_index[0])

  # adversarial
  adversarial_index = np.where(label == 1)
  adversarial_weight = np.sum(layer_quantile[adversarial_index], axis=0) / len(adversarial_index[0])

  return normal_weight, adversarial_weight

In [None]:
# use beta to select the common neuron
def get_layer_common(weight, beta=0.95):
  common = np.where(weight > beta, 1, 0)

  return common

In [None]:
def abstraction_merge_layer(all_layer_shap, label, alpha=0.9, merge_class="normal"):  
  layer = all_layer_shap #10,n,x,x,x

  # layer_quantile  #n,x,x,x
  if merge_class == "normal":
    input, _ = get_layer_merge(get_layer_quantile(layer[0], alpha), label)
    conv1, _ = get_layer_merge(get_layer_quantile(layer[1], alpha), label)
    bn1, _ = get_layer_merge(get_layer_quantile(layer[2], alpha), label)
    maxpool, _ = get_layer_merge(get_layer_quantile(layer[3], alpha), label)
    layer1, _ = get_layer_merge(get_layer_quantile(layer[4], alpha), label)
    layer2, _ = get_layer_merge(get_layer_quantile(layer[5], alpha), label)
    layer3, _ = get_layer_merge(get_layer_quantile(layer[6], alpha), label)
    layer4, _ = get_layer_merge(get_layer_quantile(layer[7], alpha), label)
    avgpool, _ = get_layer_merge(get_layer_quantile(layer[8], alpha), label)
    fc, _ = get_layer_merge(get_layer_quantile(layer[9], alpha), label)
  
  elif merge_class == "adversarial":
    _, input = get_layer_merge(get_layer_quantile(layer[0], alpha), label)
    _, conv1 = get_layer_merge(get_layer_quantile(layer[1], alpha), label)
    _, bn1 = get_layer_merge(get_layer_quantile(layer[2], alpha), label)
    _, maxpool = get_layer_merge(get_layer_quantile(layer[3], alpha), label)
    _, layer1 = get_layer_merge(get_layer_quantile(layer[4], alpha), label)
    _, layer2 = get_layer_merge(get_layer_quantile(layer[5], alpha), label)
    _, layer3 = get_layer_merge(get_layer_quantile(layer[6], alpha), label)
    _, layer4 = get_layer_merge(get_layer_quantile(layer[7], alpha), label)
    _, avgpool = get_layer_merge(get_layer_quantile(layer[8], alpha), label)
    _, fc = get_layer_merge(get_layer_quantile(layer[9], alpha), label)
  
  return input, conv1, bn1, maxpool, layer1, layer2, layer3, layer4, avgpool, fc

In [None]:
def abstraction_path_layer(weight, beta=0.95):
  input = get_layer_common(np.asarray(weight[0]), beta)
  conv1 = get_layer_common(np.asarray(weight[1]), beta)
  bn1 = get_layer_common(np.asarray(weight[2]), beta)
  maxpool = get_layer_common(np.asarray(weight[3]), beta)
  layer1 = get_layer_common(np.asarray(weight[4]), beta)
  layer2 = get_layer_common(np.asarray(weight[5]), beta)
  layer3 = get_layer_common(np.asarray(weight[6]), beta)
  layer4 = get_layer_common(np.asarray(weight[7]), beta)
  avgpool = get_layer_common(np.asarray(weight[8]), beta)
  fc = get_layer_common(np.asarray(weight[9]), beta)

  return input, conv1, bn1, maxpool, layer1, layer2, layer3, layer4, avgpool, fc

In [None]:
def abstraction_plot(path, comment='n'):
  for i in range(len(path)):
    data = path[i]
    print(path[i].shape)

    if len(data.shape) == 3:
      fig = plt.figure()
      ax = fig.add_subplot(projection='3d')

      x, y, z = data.nonzero()
      ax.scatter(x, y, z, alpha=1)
      ax.set_title('layer_'+str(i))
      ax.set_xlim(0, len(path[i]))
      ax.set_ylim(0, len(path[i][0]))
      ax.set_zlim(0, len(path[i][0][0]))
      ax.xaxis.set_major_locator(MaxNLocator(integer=True))
      ax.yaxis.set_major_locator(MaxNLocator(integer=True))
      ax.zaxis.set_major_locator(MaxNLocator(integer=True))
      plt.savefig(folder_path+str(i)+str(comment)+'.png')
    
    elif len(data.shape) == 2:
      ax = plt.figure(figsize=(4.7,4.7)).gca()

      x, y = data.nonzero()
      ax.scatter(x, y, alpha=1)
      ax.set_title('layer_'+str(i))
      ax.set_xlim(0, len(data))
      ax.set_ylim(0, len(data[0]))
      ax.xaxis.set_major_locator(MaxNLocator(integer=True))
      ax.yaxis.set_major_locator(MaxNLocator(integer=True))
      plt.savefig(folder_path+str(i)+str(comment)+'.png')

    elif len(data.shape) == 1:
      ax = plt.figure(figsize=(4.7,4.7)).gca()

      data = np.expand_dims(path[i], axis=1)
      x, y = data.nonzero()
      ax.scatter(x, y, alpha=1)
      ax.set_title('layer_'+str(i))
      ax.set_xlim(0, len(data))
      ax.set_yticks([0, len(data[0])])
      ax.xaxis.set_major_locator(MaxNLocator(integer=True))
      ax.yaxis.set_major_locator(MaxNLocator(integer=True))
      plt.savefig(folder_path+str(i)+str(comment)+'.png')

In [None]:
def generate_class_abstraction(num_background, alpha=0, beta=[], plot = True):
  images, labels = load_concatenate_data()
  test_background = images[:num_background].to(device)
  test_images = images[num_background:].to(device)
  test_alpha = alpha

  layers_shap = get_layer_shap(test_background, test_images) #10,n,x,x,x

  normal_lw = abstraction_merge_layer(layers_shap, labels[num_background:], alpha=test_alpha, merge_class="normal")
  adv_lw = abstraction_merge_layer(layers_shap, labels[num_background:], alpha=test_alpha, merge_class="adversarial")

  for test_beta in beta:
    print(test_beta)
    normal_lpath = abstraction_path_layer(normal_lw, beta=test_beta)
    adv_lpath = abstraction_path_layer(adv_lw, beta=test_beta)

    np.save(folder_path+'normal_'+str(test_alpha)+'_'+str(test_beta)+'.npy', normal_lpath)
    np.save(folder_path+'adversarial_'+str(test_alpha)+'_'+str(test_beta)+'.npy', adv_lpath)

    print("Number of common critical neuron in normal")
    for i in range(len(normal_lw)):
      print(np.sum(normal_lpath[i]))

    print("Number of common critical neuron in adversarial")
    for i in range(len(adv_lw)):
      print(np.sum(adv_lpath[i]))

    if plot:
      abstraction_plot(normal_lpath, comment='nor_'+str(test_alpha)+'_'+str(test_beta))
      abstraction_plot(adv_lpath, comment='adv_'+str(test_alpha)+'_'+str(test_beta))

In [None]:
generate_class_abstraction(100, 0.95, [0.7], plot = True)