In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import sys

import pandas as pd
import numpy as np

import time

import torch

from tqdm.auto import tqdm

In [2]:
from datasets import load_original_dataset, load_deleted_dataset
from models import CNN

In [3]:
DATA_DIR = 'Datasets/Features/'
BATCH_SIZE = 32
EPOCHS = 5
PERCENTAGES = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 99]

In [4]:
sys.path.append(os.path.abspath('./libraries/Unlearnable-Examples/'))
import toolbox

In [5]:
# https://github.com/HanxunH/Unlearnable-Examples/blob/main/perturbation.py#L45

EPSILON = 8
NUM_STEPS = 1
STEP_SIZE = 0.8

In [6]:
# https://github.com/HanxunH/Unlearnable-Examples/blob/main/perturbation.py#L51
# https://github.com/HanxunH/Unlearnable-Examples/blob/main/perturbation.py#L446

noise_generator = toolbox.PerturbationTool(
    epsilon=EPSILON / 255,
    num_steps=NUM_STEPS,
    step_size=STEP_SIZE / 255
)

In [7]:
def add_random_noise(x):
    
    # https://github.com/HanxunH/Unlearnable-Examples/blob/main/dataset.py#L441
    noise = noise_generator.random_noise(noise_shape=x.shape)
    
    # https://github.com/HanxunH/Unlearnable-Examples/blob/main/dataset.py#L448
    noise = noise.mul(255).clamp_(0, 255)
    
    # https://github.com/HanxunH/Unlearnable-Examples/blob/main/perturbation.py#L451
    x = x + noise.to(x.device)
    x = torch.clamp(x, 0, 255)
    
    return x

In [8]:
def fit(model, save_dir, train_set, test_set, forget_set):
    
    os.makedirs(save_dir, exist_ok=True)
    
    train_x, train_y = train_set.tensors
    forget_x, forget_y = forget_set.tensors
    
    forget_x = add_random_noise(forget_x)
    
    train_set = torch.utils.data.TensorDataset(
        torch.concat([train_x, forget_x], dim=0), 
        torch.concat([train_y, forget_y], dim=0), 
    )
    
    optimizer = torch.optim.Adam(model.parameters())
    error = torch.nn.CrossEntropyLoss()
    
    train_loader = torch.utils.data.DataLoader(train_set, batch_size = BATCH_SIZE, shuffle = True, drop_last=True)
    
    train_times = list()
    train_accs, test_accs, forget_accs = list(), list(), list()
    
    for epoch in range(EPOCHS):
        
        # train
        
        accs = list()
        
        train_time = 0
        
        model.train()
        
        start_time = time.time()
        
        for x, y in train_loader:
            optimizer.zero_grad()
            output = model(x.cuda())
            y = y.cuda()
            loss = error(output, y)
            loss.backward()
            optimizer.step()
            
            train_time += time.time() - start_time
            
            predicted = torch.argmax(output.data, dim=-1)
            accs.append((predicted == y).float().mean().detach().cpu().numpy())
            
            start_time = time.time()
            
        train_times.append(train_time)
        train_accs.append(np.mean(accs))
        
        # test
            
        model.eval()
        with torch.no_grad():
            
            x, y = test_set.tensors
            
            accs = list()
            
            for i in range(0, x.shape[0], BATCH_SIZE):
            
                output = model(x[i:i+BATCH_SIZE].cuda())

                predicted = torch.argmax(output.data, dim=-1)
                accs.append((predicted == y[i:i+BATCH_SIZE].cuda()).float().mean().detach().cpu().numpy())
            
            test_accs.append(np.mean(accs))
            

            x, y = forget_set.tensors
            
            accs = list()

            for i in range(0, x.shape[0], BATCH_SIZE):

                output = model(x[i:i+BATCH_SIZE].cuda())

                predicted = torch.argmax(output.data, dim=-1)
                accs.append((predicted == y[i:i+BATCH_SIZE].cuda()).float().mean().detach().cpu().numpy())

            forget_accs.append(np.mean(accs))
        
        # save
        torch.save(model.state_dict(), os.path.join(save_dir, f'{(epoch+1):03d}.pt'))

    return train_times, train_accs, test_accs, forget_accs

In [9]:
results = list()

for percentage in tqdm(PERCENTAGES):
    
    model = CNN().cuda()
        
    model.load_state_dict(torch.load('./weights/init.pt'))
    
    train_set, test_set, forget_set = load_deleted_dataset(DATA_DIR, percentage)
    
    train_times, train_accs, test_accs, forget_accs = fit(model, f'weights/Unlearnable/{percentage}', train_set, test_set, forget_set)
    
    df = pd.DataFrame(zip(train_times, train_accs, test_accs, forget_accs), columns=['train_time', 'train_acc', 'test_acc', 'forget_acc'])
    df['epoch'] = range(1, EPOCHS+1)
    df['percentage'] = percentage
    
    results.append(df)

  0%|          | 0/11 [00:00<?, ?it/s]

In [None]:
results = pd.concat(results).set_index(['percentage', 'epoch'])

results.to_csv('results/Unlearnable.csv')

results