In [1]:
import torch
import numpy as np
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader, TensorDataset
import tqdm
import random
from resnet1d import Resnet34
from collections import OrderedDict
import matplotlib.pyplot as plt

In [2]:
from functools import partial
print_flush = partial(print, flush=True)
torch.manual_seed(2)
random.seed(2)
np.random.seed(2)

In [3]:
class Dataset_ori():
    def __init__(self,data_path,label_path, selected_class=None):
        # self.root = root
        self.data_path = data_path
        self.label_path = label_path
        self.selected_class = selected_class
        self.dataset,self.labelset= self.build_dataset()
        self.length = self.dataset.shape[0]
        # self.minmax_normalize()

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        step = self.dataset[idx,:]
        step = torch.unsqueeze(step, 0)
        # target = self.label[idx]
        target = self.labelset[idx]
        # target = torch.unsqueeze(target, 0)# only one class
        return step, target

    def build_dataset(self):
        '''get dataset of signal'''

        dataset = np.load(self.data_path)
        labelset = np.load(self.label_path)
            
        if self.selected_class is not None:
            dataset = dataset[labelset == self.selected_class]
            labelset = labelset[labelset == self.selected_class]

        # dataset,labelset = shuffle(dataset,labelset)
        dataset = torch.from_numpy(dataset)
        labelset = torch.from_numpy(labelset)

        return dataset,labelset

In [4]:
class Dataset_backdoor():
    def __init__(self,data_path,label_path,backdoor_perc,target_class,ret_attack_only=False,bd_labelset=True,sample_ratio=None, trigger=None, mask=None):
        # self.root = root
        self.data_path = data_path
        self.label_path = label_path
        self.backdoor_perc = backdoor_perc
        self.target_class = target_class
        self.ret_attack_only = ret_attack_only
        self.bd_labelset = bd_labelset
        self.sample_ratio = sample_ratio
        self.trigger = trigger
        self.mask = mask
        self.dataset,self.labelset= self.build_dataset()
        self.length = self.dataset.shape[0]
        # self.minmax_normalize()

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        step = self.dataset[idx,:]
        step = torch.unsqueeze(step, 0)
        target = self.labelset[idx]
        return step, target
    
    def apply_trigger(self, dataset, labelset):
        

        print('Apply trigger', np.unique(labelset, return_counts=True), flush=True)
        trigger_class = 1 - self.target_class
        trigger_class_idx = np.where(labelset == trigger_class)[0]
        trigger_sample_idx = trigger_class_idx[np.random.choice(len(trigger_class_idx), int(self.backdoor_perc * len(trigger_class_idx)), replace=False)]
        dataset_bd = dataset.copy()
        labelset_bd = labelset.copy()
        for idx in tqdm.tqdm(trigger_sample_idx):
            if self.mask is not None and self.trigger is not None:
                dataset_bd[idx] = (1 - self.mask[None, :]) * dataset_bd[idx] + self.mask[None, :] * self.trigger 
            if self.bd_labelset:
                labelset_bd[idx] = self.target_class
        
        if self.ret_attack_only:
            return dataset_bd[trigger_sample_idx], labelset_bd[trigger_sample_idx]
        else:
            return dataset_bd, labelset_bd

    def build_dataset(self):
        '''get dataset of signal'''

        dataset = np.load(self.data_path)
        labelset = np.load(self.label_path)

        if self.sample_ratio is not None:
            indices = np.random.choice(len(dataset), int(self.sample_ratio * len(dataset)), replace=False)
            dataset, labelset = dataset[indices], labelset[indices]
            
        if self.backdoor_perc > 0:
            dataset, labelset = self.apply_trigger(dataset, labelset)

        dataset = torch.from_numpy(dataset)
        labelset = torch.from_numpy(labelset)

        return dataset,labelset

In [5]:
def train(model, target_label, testloader, param):
    print("Processing label: {}".format(target_label))

    signal_length = param["signal_length"]
    trigger = torch.rand((signal_length), requires_grad=True)
    trigger = trigger.to(device).detach().requires_grad_(True)
    mask = torch.rand((signal_length), requires_grad=True)
    mask = mask.to(device).detach().requires_grad_(True)

    Epochs = param["Epochs"]
    lamda = param["lamda"]

    min_norm = np.inf
    min_norm_count = 0

    criterion = CrossEntropyLoss()
    optimizer = torch.optim.Adam([{"params": trigger},{"params": mask}],lr=0.005)
    model.to(device)
    model.eval()

    for epoch in range(Epochs):
        norm = 0.0
        loss_list = []
        for signal, _ in tqdm.tqdm(testloader, desc='Epoch %3d' % (epoch + 1)):
            optimizer.zero_grad()
            signal = signal.to(device)
            
            trojan_signal = (1 - torch.unsqueeze(mask, dim=0)) * signal + torch.unsqueeze(mask, dim=0) * trigger
            trojan_signal = trojan_signal.float()
            _, y_pred = model(trojan_signal)
            y_target = torch.full((y_pred.size(0),), target_label, dtype=torch.long).to(device)
            
            loss = criterion(y_pred, y_target) + lamda * torch.sum(torch.abs(mask))
            loss_list.append(loss.detach().cpu().numpy())
            
            loss.backward()
            optimizer.step()

            # figure norm
            with torch.no_grad():
                # 防止trigger和norm越界
                torch.clip_(trigger, 0, 1)
                torch.clip_(mask, 0, 1)
                norm = torch.sum(torch.abs(mask))
                
        print("loss: ", np.mean(loss_list))
        
        print("norm: {}".format(norm))

        # to early stop
        if norm < min_norm:
            min_norm = norm
            min_norm_count = 0
        else:
            min_norm_count += 1

        if min_norm_count > 30:
            break

    return trigger.cpu(), mask.cpu()



In [6]:
def reverse_engineer(model_path):
    param = {
        "Epochs": 500,
        "batch_size": 64,
        "lamda": 0.01,
        "num_classes": 2,
        "signal_length": 2400,
        "trigger_size":100
    }
    
    MODEL_PATH = model_path
    state_dict = torch.load(MODEL_PATH) 
    
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:] #remove 'module'
        new_state_dict[name] = v

    state_dict = new_state_dict
    
    model = Resnet34().cuda()
    model.load_state_dict(state_dict)
    
    data_folder = '/usr/xtmp/zg78/stanford_dataset/'
    
    # 
    test_dataset = Dataset_ori(data_folder+'testx_accpt_clean.npy', data_folder+'testy_af_accpt_clean.npy')
    testloader = DataLoader(test_dataset, batch_size=2500, shuffle=False, num_workers=0)
    

    norm_list = []
    trigger_list = []
    mask_list = []
    for label in range(param["num_classes"]):
        test_dataset = Dataset_ori(data_folder+'testx_accpt_clean.npy', data_folder+'testy_af_accpt_clean.npy',selected_class = 1- label)
        testloader = DataLoader(test_dataset, batch_size=2500, shuffle=False, num_workers=0)
        
        trigger, mask = train(model, label, testloader, param)
        norm_list.append(mask.sum().item())

        trigger = trigger.cpu().detach().numpy()
        trigger_list.append(trigger)
        
        mask = mask.cpu().detach().numpy()
        mask_list.append(mask)
        
        
        print("class:", label)
        print("trigger:", trigger)
        print("mask:", mask)

    print(norm_list)
    return norm_list, trigger_list, mask_list

In [7]:
def unlearning(model_path, target_class, trigger, mask, i):
    param = {
        "Epochs": 1,
        "batch_size": 1280,
        "signal_length": 2400,
        "sample_ratio": 0.1,
        "backdoor_percentage": 0.2,
        "learning_rate": 0.01
    }
    
    MODEL_PATH = model_path
    state_dict = torch.load(MODEL_PATH) 
    
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:] #remove 'module'
        new_state_dict[name] = v

    state_dict = new_state_dict
    
    model = Resnet34().cuda()
    model.load_state_dict(state_dict)
    
    data_folder = '/usr/xtmp/zg78/stanford_dataset/'
    train_dataset = Dataset_backdoor(data_folder+'trainx_accpt_clean.npy', data_folder+'trainy_af_accpt_clean.npy', backdoor_perc=param["backdoor_percentage"], target_class=target_class, sample_ratio=param["sample_ratio"], bd_labelset=False)
    train_loader = DataLoader(train_dataset, batch_size=param["batch_size"], shuffle=True)
    model.train()
    
    criterion = CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr = param["learning_rate"])
    loss_list = []
    for epoch in range(param["Epochs"]):
        for signal, y_target in tqdm.tqdm(train_loader, desc='Epoch %3d' % (epoch + 1)):
            optimizer.zero_grad()
            signal, y_target = signal.float().to(device), y_target.long().to(device)
            _, y_pred = model(signal)
            loss = criterion(y_pred, y_target)
            loss.backward()
            optimizer.step()
            loss_list.append(loss.detach().cpu().numpy())
        #print(np.mean(loss_list))
    torch.save(model, "defense_models/neural_cleanse_{}".format( i))

In [8]:
def finetune(model_path, i):
    param = {
        "Epochs": 1,
        "batch_size": 1280,
        "signal_length": 2400,
        "sample_ratio": 0.1,
        "backdoor_percentage": 0.0,
        "learning_rate": 0.01
    }
    
    
    MODEL_PATH = model_path
    state_dict = torch.load(MODEL_PATH) 
    
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:] #remove 'module'
        new_state_dict[name] = v

    state_dict = new_state_dict
    
    model = Resnet34().cuda()
    model.load_state_dict(state_dict)
    
    data_folder = '/usr/xtmp/zg78/stanford_dataset/'
    train_dataset = Dataset_backdoor(data_folder+'trainx_accpt_clean.npy', data_folder+'trainy_af_accpt_clean.npy', backdoor_perc=param["backdoor_percentage"], target_class=0, sample_ratio=param["sample_ratio"], bd_labelset=False)
    train_loader = DataLoader(train_dataset, batch_size=param["batch_size"], shuffle=True)
    model.train()
    
    criterion = CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr = param["learning_rate"])
    loss_list = []
    for epoch in range(param["Epochs"]):
        for signal, y_target in tqdm.tqdm(train_loader, desc='Epoch %3d' % (epoch + 1)):
            optimizer.zero_grad()
            signal, y_target = signal.float().to(device), y_target.long().to(device)
            _, y_pred = model(signal)
            loss = criterion(y_pred, y_target)
            loss.backward()
            optimizer.step()
            loss_list.append(loss.detach().cpu().numpy())
        #print(np.mean(loss_list))
    torch.save(model, "defense_models/finetune_{}.pt".format(i))

In [9]:
if __name__ == "__main__":
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    modes = ["finetune", "neural_cleanse"]
    model_list = ['saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.01_0_0_diff0/PPG_best_3.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.01_1_0_diff0/PPG_best_0.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.05_0_0_diff0/PPG_best_0.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.05_1_0_diff0/PPG_best_0.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.1_0_0_diff0/PPG_best_1.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.1_1_0_diff0/PPG_best_0.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.01_0_1_diff1/PPG_best_21.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.01_1_1_diff1/PPG_best_3.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.05_0_1_diff1/PPG_best_3.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.05_1_1_diff1/PPG_best_0.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.1_0_1_diff1/PPG_best_3.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.1_1_1_diff1/PPG_best_0.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.01_0_2_diff2/PPG_best_0.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.01_1_2_diff2/PPG_best_0.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.05_0_2_diff2/PPG_best_3.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.05_1_2_diff2/PPG_best_10.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.1_0_2_diff2/PPG_best_0.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.1_1_2_diff2/PPG_best_3.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.01_0_3_diff3/PPG_best_4.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.01_1_3_diff3/PPG_best_3.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.05_0_3_diff3/PPG_best_3.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.05_1_3_diff3/PPG_best_0.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.1_0_3_diff3/PPG_best_3.pt', 'saved_models/res34_epoch_30_ppglr_0.0001_BDPERC_0.1_1_3_diff3/PPG_best_0.pt']
    for mode in modes:
        for i, model_path in enumerate(model_list):
            if mode == 'neural_cleanse':
                norm_list, trigger_list, mask_list = reverse_engineer(model_path)
                target_class = np.argmin(norm_list)
                unlearning(model_path, target_class, trigger_list[target_class], mask_list[target_class], i)
                with open('norms/{}_{}.npy'.format(mode, i), 'wb') as f1:
                    np.save(f1, norm_list)
                with open('triggers/{}_{}.npy'.format(mode, i), 'wb') as f2:
                    np.save(f2, trigger_list)
                with open('masks/{}_{}.npy'.format(mode, i), 'wb') as f3:
                    np.save(f3, mask_list)
            else:
                finetune(model_path, i)

Epoch   1: 100%|██████████| 65/65 [00:27<00:00,  2.36it/s]


FileNotFoundError: [Errno 2] No such file or directory: 'defense_models/finetune_0'