In [1]:
from tensorflow.keras.datasets import cifar10
from image import *
from hashlib import md5
import numpy as np
import pandas as pd
import os
import shutil
import warnings
warnings.filterwarnings('ignore')
import cv2 as cv


def get_data(param):
    if param["dataset"] == "CIFAR10":
        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
        x_train = x_train.astype(np.float) / 255.
        x_test = x_test.astype(np.float) / 255.

    if param["dataset"] == "GTSRB":
        train_X = []
        train_y = []
        for i in range(0,43):
            n = str(i)
            train_Path = "gtsrb-german-traffic-sign/Train/" + n
            label = [0 for i in range(0, 43)]
            label[i] = 1
            for filename in os.listdir(train_Path):
                img = cv.imread(train_Path + "/" + filename)
                img = cv.resize(img, (32,32))
                #print(filename)
                train_X.append(img)
                train_y.append(label)
        train_X = np.asarray(train_X)
        
        train_X = np.asarray(train_X, dtype = "float32")
        train_y = np.asarray(train_y, dtype= "float32")
        
        meta_df = pd.read_csv('gtsrb-german-traffic-sign/Meta.csv')
        test_data = pd.read_csv('gtsrb-german-traffic-sign/Test.csv')
        train_data = pd.read_csv('gtsrb-german-traffic-sign/Train.csv')
        
        counter = 0
        test_X = []
        test_y = []
        test_Path = "gtsrb-german-traffic-sign/Test"
        for filename in os.listdir(test_Path):
                img = cv.imread(test_Path + "/" + filename)
                img = cv.resize(img, (32,32))
                label = [0 for i in range(0, 43)]
                label[test_data.loc[counter][6]] = 1
                #print(filename)
                test_X.append(img)
                test_y.append(label)
                counter += 1
        test_X = np.asarray(test_X)
        
        test_X = np.asarray(test_X, dtype = "float32")
        test_y = np.asarray(test_y, dtype= "float32")
        
        train_y_after = [[0] * 1] * 39209

        for i in range(39209):
            for j in range(43):
                if (train_y[i][j] == 1):
                    train_y_after[i] = [j]
        y_train = train_y_after
        y_train = np.array(y_train)
        
        test_y_after = [[0] * 1] * 12630

        for i in range(12630):
            j = test_data["ClassId"][i]
            test_y_after[i] = [j]
        y_test = test_y_after
        y_test = np.array(y_test)
        
        #shuffle training set 
        index = np.arange(39209)
        np.random.shuffle(index)
        train_X = train_X[index,:,:,:]
        y_train = y_train[index]
        
        
        x_train = train_X.astype(np.float)
        x_test = test_X.astype(np.float)

    return x_train, y_train, x_test, y_test


def poison(x_train, y_train, param):
    target_label = param["target_label"]
    num_images = int(param["poisoning_rate"] * y_train.shape[0])

    index = np.where(y_train != target_label)
    index = index[0]
    index = index[:num_images]
    x_train[index] = poison_frequency(x_train[index], y_train[index], param)
    y_train[index] = target_label
    return x_train


def poison_frequency(x_train, y_train, param):
    if x_train.shape[0] == 0:
        return x_train

    x_train *= 255.
    if param["YUV"]:
        x_train = RGB2YUV(x_train)

    # transfer to frequency domain
    x_train = DCT(x_train, param["window_size"])  # (idx, ch, w, h)

    # plug trigger frequency
    for i in range(x_train.shape[0]):
        for ch in param["channel_list"]:
            for w in range(0, x_train.shape[2], param["window_size"]):
                for h in range(0, x_train.shape[3], param["window_size"]):
                    for pos in param["pos_list"]:
                        x_train[i][ch][w + pos[0]][h + pos[1]] += param["magnitude"]


    x_train = IDCT(x_train, param["window_size"])  # (idx, w, h, ch)

    if param["YUV"]:
        x_train = YUV2RGB(x_train)

    x_train /= 255.
    x_train = np.clip(x_train, 0, 1)
    return x_train


def impose(x_train, y_train, param):
    x_train = poison_frequency(x_train, y_train, param)
    return x_train


def digest(param):
    txt = ""
    txt += param["dataset"]
    txt += str(param["target_label"])
    txt += str(param["poisoning_rate"])
    txt += str(param["label_dim"])
    txt += "".join(str(param["channel_list"]))
    txt += str(param["window_size"])
    txt += str(param["magnitude"])
    txt += str(param["YUV"])
    txt += "".join(str(param["pos_list"]))
    hash_md5 = md5()
    hash_md5.update(txt.encode("utf-8"))
    return hash_md5.hexdigest()

In [2]:
import math
from skimage import transform, data
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3
import bm3d
import scipy.signal

def RGB2YUV(x_rgb):
    x_yuv = np.zeros(x_rgb.shape, dtype=np.float)
    for i in range(x_rgb.shape[0]):
        img = cv2.cvtColor(x_rgb[i].astype(np.uint8), cv2.COLOR_RGB2YCrCb)
        x_yuv[i] = img
    return x_yuv

def YUV2RGB(x_yuv):
    x_rgb = np.zeros(x_yuv.shape, dtype=np.float)
    for i in range(x_yuv.shape[0]):
        img = cv2.cvtColor(x_yuv[i].astype(np.uint8), cv2.COLOR_YCrCb2RGB)
        x_rgb[i] = img
    return x_rgb


def DCT(x_train, window_size):
    # x_train: (idx, w, h, ch)
    x_dct = np.zeros((x_train.shape[0], x_train.shape[3], x_train.shape[1], x_train.shape[2]), dtype=np.float)
    x_train = np.transpose(x_train, (0, 3, 1, 2))

    for i in range(x_train.shape[0]):
        for ch in range(x_train.shape[1]):
            for w in range(0, x_train.shape[2], window_size):
                for h in range(0, x_train.shape[3], window_size):
                    sub_dct = cv2.dct(x_train[i][ch][w:w+window_size, h:h+window_size].astype(np.float))
                    x_dct[i][ch][w:w+window_size, h:h+window_size] = sub_dct
    return x_dct            # x_dct: (idx, ch, w, h)


def IDCT(x_train, window_size):
    # x_train: (idx, ch, w, h)
    x_idct = np.zeros(x_train.shape, dtype=np.float)

    for i in range(x_train.shape[0]):
        for ch in range(0, x_train.shape[1]):
            for w in range(0, x_train.shape[2], window_size):
                for h in range(0, x_train.shape[3], window_size):
                    sub_idct = cv2.idct(x_train[i][ch][w:w+window_size, h:h+window_size].astype(np.float))
                    x_idct[i][ch][w:w+window_size, h:h+window_size] = sub_idct
    x_idct = np.transpose(x_idct, (0, 2, 3, 1))
    return x_idct


def Gaussian(x_train):
    # x_train: (idx, w, h, ch)
    x_train = x_train * 255
    for i in range(x_train.shape[0]):
        x_train[i] = cv2.GaussianBlur(x_train[i], (5, 5), sigmaX=0, sigmaY=0)
    x_train = x_train / 255.
    return x_train


def BM3D(x_train):
    x_train = x_train * 255
    for i in range(x_train.shape[0]):
        x_train[i] = bm3d.bm3d(x_train[i], sigma_psd=1)
    x_train = x_train / 255.
    return x_train


def Wiener(x_train):
    x_train = x_train * 255
    for i in range(x_train.shape[0]):
        img = np.transpose(x_train[i], (2, 0, 1))
        windows_size = (5, 5)
        img[0] = scipy.signal.wiener(img[0], windows_size)
        img[1] = scipy.signal.wiener(img[1], windows_size)
        img[2] = scipy.signal.wiener(img[2], windows_size)
        img = np.transpose(img, (1, 2, 0))
        x_train[i] = img
    x_train /= 255.
    return x_train


def PSNR(img1, img2):
    img1 = np.float64(img1)
    img2 = np.float64(img2)
    mse = np.mean((img1 - img2) ** 2)
    if mse == 0:
        return 100
    PIXEL_MAX = 255.0
    return 20 * math.log10(PIXEL_MAX / math.sqrt(mse))


def IS_score(img1, img2):
    img1 = transform.resize(img1, (299, 299))
    img1 = np.reshape(img1, (-1, 299, 299, 3))
    img2 = transform.resize(img2, (299, 299))
    img2 = np.reshape(img2, (-1, 299, 299, 3))
    model = InceptionV3(include_top=True, weights='imagenet',classes=1000)
    x1 = tf.keras.applications.inception_v3.preprocess_input(img1)
    x2 = tf.keras.applications.inception_v3.preprocess_input(img2)
    y1 = model(x1).numpy().reshape((-1))
    y2 = model(x2).numpy().reshape((-1))
    KL = 0.0
    for i in range(1000):
        KL += y1[i] * np.log(y1[i] / y2[i])
    return KL

def SSIM(img1, img2):
    res = skimage.metrics.structural_similarity(img1, img2, win_size=9, multichannel=True)
    return res


def get_visual_values(imgs1, imgs2):
    iss, psnr, ssim, l2 = 0.0, 0.0, 0.0, 0.0
    for i in range(imgs1.shape[0]):
        psnr += PSNR(imgs1[i], imgs2[i])
        ssim += SSIM(imgs1[i], imgs2[i])
        iss += IS_score(imgs1[i], imgs2[i])

    return psnr/imgs1.shape[0], ssim/imgs1.shape[0], iss/imgs1.shape[0]

In [3]:
import tensorflow.keras.regularizers as regularizers
from tensorflow.python.keras.layers import Activation, Conv2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.python.keras.layers import MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.python.keras.models import Sequential
from tensorflow.keras.applications import ResNet50V2


def get_model(param):
    if param["dataset"] == "CIFAR10":
        return _get_model_cifar()
    if param["dataset"] == "GTSRB":
        return _get_model_GTSRB()
    if param["dataset"] == "ImageNet16":
        return _get_model_ImageNet16()
    if param["dataset"] == "PubFig":
        return _get_model_PubFig()

    return None


def _get_model_cifar():
    weight_decay = 1e-6
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay),
                     input_shape=(32, 32, 3)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.3))

    model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.4))

    model.add(Flatten())
    model.add(Dense(10, activation='softmax'))
    return model


def _get_model_GTSRB():

    weight_decay = 1e-6
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay),
                     input_shape=(32, 32, 3)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.3))

    model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.4))

    model.add(Flatten())
    model.add(Dense(43, activation='softmax'))
    
    print(model.summary())
    return model



def _get_model_ImageNet16():
    model = ResNet50V2(input_shape=(224, 224, 3), weights=None, classes=16)
    return model

def _get_model_PubFig():
    model = ResNet50V2(input_shape=(224, 224, 3), weights=None, classes=16)
    return model


def _get_model_GTSRB_new():
    model = ResNet50V2(input_shape=(224,224,3), weights=None, classes=13)
    return model

In [4]:
from tensorflow.python.keras.callbacks import ModelCheckpoint
from tensorflow import keras as keras
import matplotlib.pyplot as plt
from multiprocessing import Process
%matplotlib inline


def lr_schedule(epoch):
    lrate = 0.001
    if epoch > 10:
        lrate = 0.0005
    elif epoch > 20:
        lrate = 0.0003
    else:
        lrate = 0.0001
    return lrate


def train():
    param = {
        "dataset": "GTSRB",           # GTSRB
        "target_label": 10,              # target label
        "poisoning_rate": 0.05,         # ratio of poisoned samples
        "label_dim": 43,
        "channel_list": [1, 2],         # [0,1,2] means YUV channels, [1,2] means UV channels
        "magnitude": 30,
        "YUV": True,
        "window_size": 32,
        "pos_list": [(15, 15), (31, 31)],
    }

    x_train, y_train, x_test, y_test = get_data(param)
#   
    x_train = poison(x_train, y_train, param)

    x_test_pos = impose(x_test.copy(), y_test.copy(), param)
    y_test_pos = np.array([[param["target_label"]]] * x_test_pos.shape[0], dtype=np.long)

    param["input_shape"] = x_train.shape[1:]
    y_train = keras.utils.to_categorical(y_train, param["label_dim"])
    y_test = keras.utils.to_categorical(y_test, param["label_dim"])
    y_test_pos = keras.utils.to_categorical(y_test_pos, param["label_dim"])

    model = get_model(param)
    batch_size = 32

    filepath = "model/{}.hdf5".format(digest(param))
    checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True,
                                 mode='max')


    if param["dataset"] in ["CIFAR10", "GTSRB"]:
        opt_rms = keras.optimizers.RMSprop(learning_rate=0.001, epsilon=1e-6)
        model.compile(loss=keras.losses.categorical_crossentropy, optimizer='adam', metrics=['accuracy'])
        model.fit(x=x_train, y=y_train, batch_size=batch_size, steps_per_epoch=x_train.shape[0] // batch_size,
                  epochs=80, verbose=1, validation_data=(x_test, y_test),
                  callbacks=[keras.callbacks.LearningRateScheduler(lr_schedule), checkpoint])
    else:
        opt_rms = keras.optimizers.Adam(learning_rate=0.001, epsilon=1e-6)
        model.compile(loss=keras.losses.categorical_crossentropy, optimizer='adam', metrics=['accuracy'])
        model.fit(x=x_train, y=y_train, batch_size=batch_size, steps_per_epoch=x_train.shape[0] // batch_size,
                  epochs=80, verbose=1, validation_data=(x_test, y_test),
                  callbacks=[keras.callbacks.LearningRateScheduler(lr_schedule), checkpoint])
        
    model.load_weights(filepath)
    scores_normal = model.evaluate(x_test, y_test, batch_size=128, verbose=1)
    scores_trojan = model.evaluate(x_test_pos, y_test_pos, batch_size=128, verbose=1)
    print('\nTest on normal: %.3f loss: %.3f' % (scores_normal[1] * 100, scores_normal[0]))
    print('\nTest on trojan: %.3f loss: %.3f' % (scores_trojan[1] * 100, scores_trojan[0]))




if __name__ == "__main__":
    # To avoid keras eat all GPU memory
    gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    


    train()



Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 32)        896       
_________________________________________________________________
activation (Activation)      (None, 32, 32, 32)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 32, 32, 32)        128       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 32)        9248      
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 32)        128       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 32)        0


Epoch 00019: val_accuracy improved from 0.97181 to 0.97783, saving model to model\e885e2ec2414bdb0495347e5e59fbfba.hdf5
Epoch 20/80

Epoch 00020: val_accuracy did not improve from 0.97783
Epoch 21/80

Epoch 00021: val_accuracy did not improve from 0.97783
Epoch 22/80

Epoch 00022: val_accuracy did not improve from 0.97783
Epoch 23/80

Epoch 00023: val_accuracy did not improve from 0.97783
Epoch 24/80

Epoch 00024: val_accuracy did not improve from 0.97783
Epoch 25/80

Epoch 00025: val_accuracy improved from 0.97783 to 0.97933, saving model to model\e885e2ec2414bdb0495347e5e59fbfba.hdf5
Epoch 26/80

Epoch 00026: val_accuracy did not improve from 0.97933
Epoch 27/80

Epoch 00027: val_accuracy did not improve from 0.97933
Epoch 28/80

Epoch 00028: val_accuracy did not improve from 0.97933
Epoch 29/80

Epoch 00029: val_accuracy did not improve from 0.97933
Epoch 30/80

Epoch 00030: val_accuracy did not improve from 0.97933
Epoch 31/80

Epoch 00031: val_accuracy did not improve from 0.9793


Epoch 00058: val_accuracy did not improve from 0.98282
Epoch 59/80

Epoch 00059: val_accuracy did not improve from 0.98282
Epoch 60/80

Epoch 00060: val_accuracy did not improve from 0.98282
Epoch 61/80

Epoch 00061: val_accuracy did not improve from 0.98282
Epoch 62/80

Epoch 00062: val_accuracy did not improve from 0.98282
Epoch 63/80

Epoch 00063: val_accuracy did not improve from 0.98282
Epoch 64/80

Epoch 00064: val_accuracy did not improve from 0.98282
Epoch 65/80

Epoch 00065: val_accuracy did not improve from 0.98282
Epoch 66/80

Epoch 00066: val_accuracy did not improve from 0.98282
Epoch 67/80

Epoch 00067: val_accuracy did not improve from 0.98282
Epoch 68/80

Epoch 00068: val_accuracy did not improve from 0.98282
Epoch 69/80

Epoch 00069: val_accuracy did not improve from 0.98282
Epoch 70/80

Epoch 00070: val_accuracy improved from 0.98282 to 0.98321, saving model to model\e885e2ec2414bdb0495347e5e59fbfba.hdf5
Epoch 71/80

Epoch 00071: val_accuracy did not improve from 0.9