### Lab 4 : backdoor attacks
### Asish Boggavarapu (ab10535)

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
!git clone https://github.com/csaw-hackml/CSAW-HackML-2020

Cloning into 'CSAW-HackML-2020'...
remote: Enumerating objects: 220, done.[K
remote: Counting objects: 100% (42/42), done.[K
remote: Compressing objects: 100% (25/25), done.[K
remote: Total 220 (delta 37), reused 17 (delta 17), pack-reused 178[K
Receiving objects: 100% (220/220), 83.78 MiB | 23.27 MiB/s, done.
Resolving deltas: 100% (83/83), done.


## Inputs

In [None]:
# moving clean and bad validation and test data to "data" folder
!mv /content/drive/MyDrive/Lab3/bd /content/CSAW-HackML-2020/lab3/data/
!mv /content/drive/MyDrive/Lab3/cl /content/CSAW-HackML-2020/lab3/data/

In [None]:
# Bad net

BNet_fp = "/content/CSAW-HackML-2020/lab3/models/bd_net.h5"
BNet_weights_fp = "/content/CSAW-HackML-2020/lab3/models/bd_weights.h5"

In [None]:
# Clean validation and test images

clean_val_data_fp = "/content/CSAW-HackML-2020/lab3/data/cl/valid.h5"
clean_test_data_fp = "/content/CSAW-HackML-2020/lab3/data/cl/test.h5"

In [None]:
# Bad validation and test images

bad_val_data_fp = "/content/CSAW-HackML-2020/lab3/data/bd/bd_valid.h5"
bad_test_data_fp = "/content/CSAW-HackML-2020/lab3/data/bd/bd_test.h5"

## Data loading

In [None]:
import h5py
import numpy as np

In [None]:
def data_loader(path):

    data = h5py.File(path)

    X = np.array(data["data"])
    X = X.transpose((0,2,3,1))

    Y = np.array(data["label"])

    return X,Y

In [None]:
clean_val_X,clean_val_Y = data_loader(clean_val_data_fp)
bad_val_X,bad_val_Y = data_loader(bad_val_data_fp)

## Model loading

In [None]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Layer

In [None]:
badnet_org = load_model(BNet_fp)

In [None]:
badnet_org.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input (InputLayer)          [(None, 55, 47, 3)]          0         []                            
                                                                                                  
 conv_1 (Conv2D)             (None, 52, 44, 20)           980       ['input[0][0]']               
                                                                                                  
 pool_1 (MaxPooling2D)       (None, 26, 22, 20)           0         ['conv_1[0][0]']              
                                                                                                  
 conv_2 (Conv2D)             (None, 24, 20, 40)           7240      ['pool_1[0][0]']              
                                                                                            

In [None]:
def calculate_accuracy(model, data, labels):
    predictions = model.predict(data,verbose=0)
    predicted_classes = np.argmax(predictions, axis=1)
    accuracy = np.mean(predicted_classes == labels)
    return accuracy*100

In [None]:
original_accuracy = calculate_accuracy(badnet_org, clean_val_X, clean_val_Y)
original_accuracy

98.64899974019225

## Model pruning

In [None]:
from tensorflow.keras.models import Model,clone_model

In [None]:
# makeing a copy of badnet

badnet = clone_model(badnet_org)
badnet.set_weights(badnet_org.get_weights())

In [None]:
# Create a model to access the activations of the "pool_3" layer
intermediate_model = Model(inputs=badnet.input, outputs=badnet.get_layer("pool_3").output)

# Forward pass to collect activations
activations = intermediate_model.predict(clean_val_X,verbose=0)

# Compute the average activation for each channel
average_activations = np.mean(activations, axis=(0, 1, 2))

# Sort channels by average activation
sorted_channels = np.argsort(average_activations)
print(sorted_channels)

[ 0 26 27 30 31 33 34 36 37 38 25 39 41 44 45 47 48 49 50 53 55 40 24 59
  9  2 12 13 17 14 15 23  6 51 32 22 21 20 19 43 58  3 42  1 29 16 56 46
  5  8 11 54 10 28 35 18  4  7 52 57]


In [None]:
accuracy_drop_thresholds = [2,4,10]  # accuracy drop threshold

flags = [False,False,False]

total_channels = sorted_channels.shape[0]

total_channels

60

In [None]:
pool_layer_weights = badnet.get_layer("conv_3").get_weights()
w = pool_layer_weights[0]
b = pool_layer_weights[1]

acc_after_each_prune = {}
attack_success_rates = {}

c = 1

for channel_index in sorted_channels:

    print("Channels pruned : "+str(c)+"/"+str(total_channels))

    # Zero out the weights of the specified channel in "pool_3" layer

    w[:, :, :, channel_index] = 0
    b[channel_index] = 0
    badnet.get_layer("conv_3").set_weights([w,b])

    # Calculate accuracy

    pruned_accuracy = calculate_accuracy(badnet, clean_val_X, clean_val_Y)

    acc_after_each_prune[channel_index] = pruned_accuracy

    print("channel index : "+str(channel_index))
    print("clean accuracy : "+str(pruned_accuracy))
    print("change in accuracy : "+str((original_accuracy - pruned_accuracy)))

    # Check if the accuracy drop is beyond the threshold
    if original_accuracy - pruned_accuracy >= accuracy_drop_thresholds[0] and not flags[0]:
        badnet.save('2_percent.h5')
        flags[0] = True

    if original_accuracy - pruned_accuracy >= accuracy_drop_thresholds[1] and not flags[1]:
        badnet.save('4_percent.h5')
        flags[1] = True

    if original_accuracy - pruned_accuracy >= accuracy_drop_thresholds[2] and not flags[2]:
        badnet.save('10_percent.h5')
        flags[2] = True

    # attack success rate using bad valid data
    attack_success_rate = calculate_accuracy(badnet, bad_val_X, bad_val_Y)
    attack_success_rates[channel_index] = attack_success_rate
    print("attack success rate : "+str(attack_success_rate))
    c += 1

    print("\n")

    if c==60:
        print("====COMPLETED====")
        badnet.save('last.h5')



Channels pruned : 1/60
channel index : 0
clean accuracy : 98.64899974019225
change in accuracy : 0.0
attack success rate : 100.0


Channels pruned : 2/60
channel index : 26
clean accuracy : 98.64899974019225
change in accuracy : 0.0
attack success rate : 100.0


Channels pruned : 3/60
channel index : 27
clean accuracy : 98.64899974019225
change in accuracy : 0.0
attack success rate : 100.0


Channels pruned : 4/60
channel index : 30
clean accuracy : 98.64899974019225
change in accuracy : 0.0
attack success rate : 100.0


Channels pruned : 5/60
channel index : 31
clean accuracy : 98.64899974019225
change in accuracy : 0.0
attack success rate : 100.0


Channels pruned : 6/60
channel index : 33
clean accuracy : 98.64899974019225
change in accuracy : 0.0
attack success rate : 100.0


Channels pruned : 7/60
channel index : 34
clean accuracy : 98.64899974019225
change in accuracy : 0.0
attack success rate : 100.0


Channels pruned : 8/60
channel index : 36
clean accuracy : 98.64899974019225


  saving_api.save_model(


channel index : 29
clean accuracy : 95.75647354291158
change in accuracy : 2.8925261972806737
attack success rate : 100.0


Channels pruned : 46/60
channel index : 16
clean accuracy : 95.20221702606739
change in accuracy : 3.446782714124865
attack success rate : 99.9913397419243


Channels pruned : 47/60
channel index : 56
clean accuracy : 94.7172425738287
change in accuracy : 3.9317571663635533
attack success rate : 99.9913397419243


Channels pruned : 48/60




channel index : 46
clean accuracy : 92.09318437689443
change in accuracy : 6.555815363297825
attack success rate : 99.9913397419243


Channels pruned : 49/60
channel index : 5
clean accuracy : 91.49562656967177
change in accuracy : 7.1533731705204815
attack success rate : 99.9913397419243


Channels pruned : 50/60
channel index : 8
clean accuracy : 91.01931237550879
change in accuracy : 7.629687364683463
attack success rate : 99.98267948384861


Channels pruned : 51/60
channel index : 11
clean accuracy : 89.17467740538669
change in accuracy : 9.474322334805564
attack success rate : 80.73958603966398


Channels pruned : 52/60




channel index : 54
clean accuracy : 84.43751623798389
change in accuracy : 14.211483502208367
attack success rate : 77.015675067117


Channels pruned : 53/60
channel index : 10
clean accuracy : 76.48739932449988
change in accuracy : 22.161600415692376
attack success rate : 35.71490430414826


Channels pruned : 54/60
channel index : 28
clean accuracy : 54.8627349095003
change in accuracy : 43.786264830691955
attack success rate : 6.954187234779596


Channels pruned : 55/60
channel index : 35
clean accuracy : 27.08928726076037
change in accuracy : 71.55971247943188
attack success rate : 0.4243526457088421


Channels pruned : 56/60
channel index : 18
clean accuracy : 13.87373343725643
change in accuracy : 84.77526630293582
attack success rate : 0.0


Channels pruned : 57/60
channel index : 4
clean accuracy : 7.101411622066338
change in accuracy : 91.54758811812592
attack success rate : 0.0


Channels pruned : 58/60
channel index : 7
clean accuracy : 1.5501861955486274
change in accuracy :



attack success rate : 0.0


====COMPLETED====
Channels pruned : 60/60
channel index : 57
clean accuracy : 0.0779423226812159
change in accuracy : 98.57105741751104
attack success rate : 0.0




## save logs

In [None]:
import pandas as pd

In [None]:
df_acc_after_each_prune = pd.DataFrame({"channel_idx":acc_after_each_prune.keys(),"acc":acc_after_each_prune.values()})
#df_acc_after_each_prune

In [None]:
df_attack_success_rates = pd.DataFrame({"channel_idx":attack_success_rates.keys(),"acc":attack_success_rates.values()})
#df_attack_success_rates

In [None]:
df_acc_after_each_prune.to_csv('/content/df_acc_after_each_prune.csv', index=False)
df_attack_success_rates.to_csv('/content/df_attack_success_rates.csv', index=False)

In [None]:
!mv /content/df_acc_after_each_prune.csv /content/drive/MyDrive/mycyb-lab3-models/
!mv /content/df_attack_success_rates.csv /content/drive/MyDrive/mycyb-lab3-models/

In [None]:
!mv /content/2_percent.h5 /content/drive/MyDrive/mycyb-lab3-models/
!mv /content/4_percent.h5 /content/drive/MyDrive/mycyb-lab3-models/
!mv /content/10_percent.h5 /content/drive/MyDrive/mycyb-lab3-models/

## evaluate against test dataset

In [None]:
clean_test_X,clean_test_Y = data_loader("/content/CSAW-HackML-2020/lab3/data/cl/test.h5")
bad_test_X,bad_test_Y = data_loader("/content/CSAW-HackML-2020/lab3/data/bd/bd_test.h5")

In [None]:
percent_2_model_fp = "/content/drive/MyDrive/mycyb-lab3-models/2_percent.h5"
percent_4_model_fp = "/content/drive/MyDrive/mycyb-lab3-models/4_percent.h5"
percent_10_model_fp = "/content/drive/MyDrive/mycyb-lab3-models/10_percent.h5"

In [None]:
percent_2_model = load_model(percent_2_model_fp)
percent_4_model = load_model(percent_4_model_fp)
percent_10_model = load_model(percent_10_model_fp)



In [None]:
print("2 percent model : ")
print("clean accuracy : " + str(calculate_accuracy(percent_2_model,clean_test_X,clean_test_Y)))
print("attack success rate : " + str(calculate_accuracy(percent_2_model,bad_test_X,bad_test_Y)))
print("\n")

print("4 percent model : ")
print("clean accuracy : " + str(calculate_accuracy(percent_4_model,clean_test_X,clean_test_Y)))
print("attack success rate : " + str(calculate_accuracy(percent_4_model,bad_test_X,bad_test_Y)))
print("\n")

print("10 percent model : ")
print("clean accuracy : " + str(calculate_accuracy(percent_10_model,clean_test_X,clean_test_Y)))
print("attack success rate : " + str(calculate_accuracy(percent_10_model,bad_test_X,bad_test_Y)))

2 percent model : 
clean accuracy : 95.90023382696803
attack success rate : 100.0


4 percent model : 
clean accuracy : 92.29150428682775
attack success rate : 99.98441153546376


10 percent model : 
clean accuracy : 84.54403741231489
attack success rate : 77.20966484801247


## evaluate using eval.py

In [None]:
!python /content/CSAW-HackML-2020/lab3/eval.py /content/CSAW-HackML-2020/lab3/data/cl/test.h5 /content/CSAW-HackML-2020/lab3/data/bd/bd_test.h5 /content/drive/MyDrive/mycyb-lab3-models/2_percent.h5

2023-11-26 07:26:22.394250: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-26 07:26:22.394360: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-26 07:26:22.394511: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Clean Classification accuracy: 95.90023382696803
Attack Success Rate: 100.0


In [None]:
!python /content/CSAW-HackML-2020/lab3/eval.py /content/CSAW-HackML-2020/lab3/data/cl/test.h5 /content/CSAW-HackML-2020/lab3/data/bd/bd_test.h5 /content/drive/MyDrive/mycyb-lab3-models/4_percent.h5

2023-11-26 07:27:01.874827: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-26 07:27:01.874887: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-26 07:27:01.874931: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Clean Classification accuracy: 92.29150428682775
Attack Success Rate: 99.98441153546376


In [None]:
!python /content/CSAW-HackML-2020/lab3/eval.py /content/CSAW-HackML-2020/lab3/data/cl/test.h5 /content/CSAW-HackML-2020/lab3/data/bd/bd_test.h5 /content/drive/MyDrive/mycyb-lab3-models/10_percent.h5

2023-11-26 07:27:29.789562: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-26 07:27:29.789628: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-26 07:27:29.789669: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Clean Classification accuracy: 84.54403741231489
Attack Success Rate: 77.20966484801247


## Bulding goodnet

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model

In [None]:
import tensorflow as tf
from tensorflow import keras

class GoodNet(keras.Model):
    def __init__(self, B, B_dash, n_classes=1283):
        super(GoodNet, self).__init__()
        self.B = B
        self.B_dash = B_dash
        self.n_classes = n_classes

    def call(self, data):
        y = tf.argmax(self.B(data), axis=1, output_type=tf.int32)
        y_dash = tf.argmax(self.B_dash(data), axis=1, output_type=tf.int32)
        default_class = tf.constant(self.n_classes, dtype=tf.int32)
        predictions = tf.where(tf.equal(y, y_dash), y, tf.fill(tf.shape(y), default_class))
        return predictions


In [None]:
percent_2_good_model = GoodNet(B=badnet_org, B_dash=percent_2_model, n_classes=1283)
percent_4_good_model = GoodNet(B=badnet_org, B_dash=percent_4_model, n_classes=1283)
percent_10_good_model = GoodNet(B=badnet_org, B_dash=percent_10_model, n_classes=1283)

In [None]:
G_cl_test_2_label_p = percent_2_good_model.predict(clean_test_X)
G_clean_test_2_accuracy = np.mean(np.equal(G_cl_test_2_label_p, clean_test_Y))*100
print('Combined 2% drops model, the clean test data Classification accuracy:', G_clean_test_2_accuracy)

Combined 2% drops model, the clean test data Classification accuracy: 95.74434918160561


In [None]:
def calculate_accuracy2(model, data, labels):
    predictions = model.predict(data)
    accuracy = np.mean(predictions == labels)
    return accuracy*100

In [None]:
print("2 percent good model : ")
print("clean accuracy : " + str(calculate_accuracy2(percent_2_good_model,clean_test_X,clean_test_Y)))
print("attack success rate : " + str(calculate_accuracy2(percent_2_good_model,bad_test_X,bad_test_Y)))
print("\n")

print("4 percent good model : ")
print("clean accuracy : " + str(calculate_accuracy2(percent_4_good_model,clean_test_X,clean_test_Y)))
print("attack success rate : " + str(calculate_accuracy2(percent_4_good_model,bad_test_X,bad_test_Y)))
print("\n")

print("10 percent good model : ")
print("clean accuracy : " + str(calculate_accuracy2(percent_10_good_model,clean_test_X,clean_test_Y)))
print("attack success rate : " + str(calculate_accuracy2(percent_10_good_model,bad_test_X,bad_test_Y)))

2 percent good model : 
clean accuracy : 95.74434918160561
attack success rate : 100.0


4 percent good model : 
clean accuracy : 92.1278254091972
attack success rate : 99.98441153546376


10 percent good model : 
clean accuracy : 84.3335931410756
attack success rate : 77.20966484801247
