In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import gc
import sys
import random

In [3]:
os.chdir('../../')

In [4]:
import tqdm
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.cluster import KMeans
from tensorflow.keras import backend as K
from sklearn.decomposition import FastICA
from sklearn.metrics import silhouette_score
from sklearn.metrics import accuracy_score, classification_report

In [5]:
from attack_nlp import init_cluster_attack

from subclass_avail import common
from subclass_avail.target_nlp import bert_utils

In [6]:
# from transfer.top_target_training
def model_fn(dataset, size):
    tf.compat.v1.reset_default_graph()
    if dataset=='cifar':
        shape = (32, 32, 3)
        n_classes = 10
        if size=='small':
            model = tf.keras.models.Sequential()
            scales = 3
            reg = tf.keras.regularizers.l2(l=0.00)
            model.add(tf.keras.layers.InputLayer(shape))
            model.add(tf.keras.layers.Conv2D(32, (3, 3), padding='same',
                kernel_regularizer=reg))
            model.add(tf.keras.layers.LeakyReLU(alpha=0.1))
            for scale in range(scales):
                model.add(tf.keras.layers.Conv2D(32 << scale, (3, 3), padding='same',
                    kernel_regularizer=reg))
                model.add(tf.keras.layers.LeakyReLU(alpha=0.1))
                model.add(tf.keras.layers.Conv2D(64 << scale, (3, 3), padding='same',
                    kernel_regularizer=reg))
                model.add(tf.keras.layers.LeakyReLU(alpha=0.1))
                model.add(tf.keras.layers.AveragePooling2D((2, 2)))
            model.add(tf.keras.layers.Conv2D(n_classes, (3, 3), padding='same',
                    kernel_regularizer=reg))
            model.add(tf.keras.layers.Flatten())
            model.add(tf.keras.layers.Dense(n_classes, activation='softmax'))

            #model.add(tf.keras.layers.Lambda(lambda x: tf.math.reduce_mean(x, axis=[1, 2])))
            #model.add(tf.keras.layers.Softmax())
            
            opt = tf.keras.optimizers.Adam(lr=0.001)  # SGD(0.002, momentum=.5)
            model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

            return model
    else:
        shape = (100, 100, 3)
        n_classes = 2
    vgg = tf.keras.applications.VGG16(include_top=False, input_shape=shape, pooling='avg')
    if size=='small':
        opt = tf.keras.optimizers.Adam(0.001)
        for layer in vgg.layers:
            layer.trainable = False
    else:
        opt = tf.keras.optimizers.Adam(0.0001)  # SGD(0.01, momentum=.9)

    output = tf.keras.layers.Dense(n_classes, kernel_regularizer=tf.keras.regularizers.l2(l=0.01),
            activation='softmax')(vgg.output)
    model = tf.keras.models.Model(inputs=vgg.inputs[0], outputs=output)
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    return model

## Constants

In [7]:
results_dir = '/net/data/malware-backdoor/subpop/victim_models/utk_small'

n_clus = 100
seed = 42

pois_rate = 1
size = 'small'

In [8]:
random.seed(seed)
np.random.seed(seed)

## Attack data

In [9]:
victim_pop = 58
cl_ind = victim_pop

pth = os.path.join(results_dir, 'clind58_rate1')

pois_x = np.load(os.path.join(pth, 'pois_x_{}.npy'.format(cl_ind)), allow_pickle=True)
pois_y = np.load(os.path.join(pth, 'pois_y_{}.npy'.format(cl_ind)), allow_pickle=True)

trn_x = np.load(os.path.join(pth, 'trn_x_{}.npy'.format(cl_ind)), allow_pickle=True)
trn_y = np.load(os.path.join(pth, 'trn_y_{}.npy'.format(cl_ind)), allow_pickle=True)

x_t = np.load(os.path.join(pth, 'x_t_{}.npy'.format(cl_ind)), allow_pickle=True)
y_t = np.load(os.path.join(pth, 'y_t_{}.npy'.format(cl_ind)), allow_pickle=True)

xt_p = np.load(os.path.join(pth, 'xt_p_{}.npy'.format(cl_ind)), allow_pickle=True)
yt_p = np.load(os.path.join(pth, 'yt_p_{}.npy'.format(cl_ind)), allow_pickle=True)

In [10]:
assert np.array_equal(trn_y[-pois_y.shape[0]:], pois_y)

In [11]:
trn_y_int = np.argmax(trn_y, axis=-1)

In [12]:
poison_idx = np.zeros_like(trn_y_int)
poison_idx[-pois_y.shape[0]:] = 1

In [13]:
sum(poison_idx)

40

In [14]:
pois_idx0 = poison_idx[trn_y_int == 0]
pois_idx1 = poison_idx[trn_y_int == 1]
print(sum(pois_idx0))
print(sum(pois_idx1))

40
0


## Load the attacked model

We can now load the attacked model for the selected subpopulation 

In [15]:
print('Loading victim model for subpopulation {}'.format(victim_pop))

victim_model_path = os.path.join(pth, 'victim_vgg_{}'.format(victim_pop))
victim_model = tf.keras.models.load_model(victim_model_path)

Loading victim model for subpopulation 58


In [16]:
pred = victim_model.predict(x_t)

In [17]:
print(classification_report(np.argmax(y_t, axis=-1), np.argmax(pred, axis=-1), digits=5))

              precision    recall  f1-score   support

           0    0.81636   0.90388   0.85789      3246
           1    0.87317   0.76496   0.81549      2808

    accuracy                        0.83944      6054
   macro avg    0.84477   0.83442   0.83669      6054
weighted avg    0.84271   0.83944   0.83823      6054



In [18]:
print(classification_report(np.argmax(yt_p, axis=-1), np.argmax(victim_model.predict(xt_p), axis=-1), digits=5))

              precision    recall  f1-score   support

           0    0.14815   1.00000   0.25806         4
           1    1.00000   0.20690   0.34286        29

    accuracy                        0.30303        33
   macro avg    0.57407   0.60345   0.30046        33
weighted avg    0.89675   0.30303   0.33258        33



In [19]:
last_layer = len(victim_model.layers) - 2

In [20]:
last_layer

19

## Clean data

In [21]:
PCA_DIM = 2000
C = 0.0001
img_pp = np.load('data/utk_imgs.npy')
fea_pp = np.load('data/utk_preds.npy')
cl_pp = np.load('data/utk_classes.npy')
include_inds = np.where(cl_pp[:, 0] >= 15)[0]
fea = fea_pp[include_inds]
cl = cl_pp[include_inds]
img = img_pp[include_inds]
print(img.shape)
target = np.eye(2)[cl[:, 1]]  # 0 == age, 1 == gender, 2 == race
age_buckets = [30, 45, 60]
ages = np.array([cl[:, 0] >= b for b in age_buckets]).sum(axis=0)
races = cl[:, 2]
target_name = races*4 + ages
print(np.unique(target_name, return_counts=True))
trn_size = 7000
aux_size = 7000
tst_size = fea.shape[0] - trn_size - aux_size
np.random.seed(0)
inds_shuffle = np.random.choice(fea.shape[0], fea.shape[0])
trn_inds = inds_shuffle[:trn_size]
aux_inds = inds_shuffle[trn_size:trn_size + aux_size]
tst_inds = inds_shuffle[trn_size + aux_size:]

fea_trn, targ_trn, tn_trn = fea[trn_inds], target[trn_inds], target_name[trn_inds]
fea_aux, targ_aux, tn_aux = fea[aux_inds], target[aux_inds], target_name[aux_inds]
fea_tst, targ_tst, tn_tst = fea[tst_inds], target[tst_inds], target_name[tst_inds]

img_trn, img_aux, img_tst = img[trn_inds], img[aux_inds], img[tst_inds]
print(img_trn.shape, fea_trn.shape, targ_trn.shape)

(20054, 100, 100, 3)
(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19]), array([2539, 2325, 1907, 1811, 1997, 1493,  494,  345, 1382,  642,  157,
        281, 1594,  913,  692,  233,  774,  344,  113,   18]))
(7000, 100, 100, 3) (7000, 512) (7000, 2)


In [22]:
del victim_model
tf.keras.backend.clear_session()
gc.collect()

25294

In [25]:
accs = []

amyt = np.argmax(y_t, axis=-1)
amytp = np.argmax(yt_p, axis=-1)

for i in range(5):
    def_model = model_fn('utk', 'small')
    def_model.fit(img_trn, targ_trn, epochs=12, batch_size=32)
    
    ampt = np.argmax(def_model.predict(x_t), axis=-1)
    amptp = np.argmax(def_model.predict(xt_p), axis=-1)
    
    print(classification_report(amyt, ampt, digits=5))
    print(classification_report(amytp, amptp, digits=5))

    pacc = classification
    _report(amytp, amptp, digits=5, output_dict=True)['accuracy']    
    accs.append(pacc)
    
    del def_model, ampt, amptp
    tf.keras.backend.clear_session()
    gc.collect()

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
              precision    recall  f1-score   support

           0    0.82243   0.89464   0.85702      3246
           1    0.86445   0.77671   0.81823      2808

    accuracy                        0.83994      6054
   macro avg    0.84344   0.83567   0.83762      6054
weighted avg    0.84192   0.83994   0.83903      6054

              precision    recall  f1-score   support

           0    0.15000   0.75000   0.25000         4
           1    0.92308   0.41379   0.57143        29

    accuracy                        0.45455        33
   macro avg    0.53654   0.58190   0.41071        33
weighted avg    0.82937   0.45455   0.53247        33

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
              precision    recall  f1-score   support

           0    0.820

Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
              precision    recall  f1-score   support

           0    0.84803   0.86815   0.85797      3246
           1    0.84328   0.82016   0.83156      2808

    accuracy                        0.84589      6054
   macro avg    0.84565   0.84415   0.84476      6054
weighted avg    0.84583   0.84589   0.84572      6054

              precision    recall  f1-score   support

           0    0.25000   0.75000   0.37500         4
           1    0.95238   0.68966   0.80000        29

    accuracy                        0.69697        33
   macro avg    0.60119   0.71983   0.58750        33
weighted avg    0.86724   0.69697   0.74848        33



In [26]:
accs

[0.45454545454545453,
 0.48484848484848486,
 0.5151515151515151,
 0.696969696969697,
 0.696969696969697]