In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import sys

import pandas as pd
import numpy as np

import time
import copy

import io
import contextlib

import torch

from tqdm.auto import tqdm

In [2]:
from datasets import load_original_dataset, load_deleted_dataset
from models import CNN

In [3]:
DATA_DIR = 'Datasets/Features/'
BATCH_SIZE = 32
EPOCHS = 1
PERCENTAGES = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 99]

In [4]:
sys.path.append(os.path.abspath('./libraries/LCODEC-deep-unlearning/'))
sys.path.append(os.path.abspath('./libraries/LCODEC-deep-unlearning/scrub/'))

from scrub_tools import inp_perturb

In [5]:
# https://github.com/vsingh-group/LCODEC-deep-unlearning/blob/main/scrub/scrub_scripts/mnist_logistic.sh

class args:
    lr = 0.001
    batch_size = 256
    scrub_batch_size = 256
    weight_decay = 0.01
    n_perturbations = 1000
    selectionType = 'Full'
    order = 'BP'

In [6]:
def fit(model, save_dir, train_set, test_set, forget_set):
    
    os.makedirs(save_dir, exist_ok=True)

    # https://github.com/vsingh-group/LCODEC-deep-unlearning/blob/main/scrub/multi_scrub.py#L52
    criterion = torch.nn.CrossEntropyLoss()
    # https://github.com/vsingh-group/LCODEC-deep-unlearning/blob/main/scrub/multi_scrub.py#L77
    forget_loader = torch.utils.data.DataLoader(forget_set, batch_size = args.batch_size, shuffle = True, drop_last=False)
    
    train_times = list()
    train_accs, test_accs, forget_accs = list(), list(), list()
    
    for epoch in range(EPOCHS):
        
        # train
        
        train_time = 0
        
        start_time = time.time()
        
        for x, y in forget_loader:
        
            model.train()

            # https://github.com/vsingh-group/LCODEC-deep-unlearning/blob/main/scrub/multi_scrub.py#L94
            scrub_dataset = torch.utils.data.TensorDataset(x, y)

            # https://github.com/vsingh-group/LCODEC-deep-unlearning/blob/main/scrub/multi_scrub.py#L120
            optim = torch.optim.SGD(model.parameters(), lr=args.lr)

            with io.StringIO() as text_output:
                with contextlib.redirect_stdout(text_output):
                    
                    # https://github.com/vsingh-group/LCODEC-deep-unlearning/blob/main/scrub/multi_scrub.py#L122
                    foci_val, updatedSD, samplossbefore, samplossafter, gradnormbefore, gradnormafter = inp_perturb(model, scrub_dataset, criterion, args, optim, device=0, outString=None)

            # https://github.com/vsingh-group/LCODEC-deep-unlearning/blob/main/scrub/multi_scrub.py#L124
            m = copy.deepcopy(model)
            m.load_state_dict(updatedSD)
            model = m
            
            train_time += time.time() - start_time
            
            start_time = time.time()
            
        train_times.append(train_time)
        
        # test
            
        model.eval()
        with torch.no_grad():
            
            x, y = train_set.tensors
            
            accs = list()
            
            for i in range(0, x.shape[0], BATCH_SIZE):
            
                output = model(x[i:i+BATCH_SIZE].cuda())

                predicted = torch.argmax(output.data, dim=-1)
                accs.append((predicted == y[i:i+BATCH_SIZE].cuda()).float().mean().detach().cpu().numpy())
            
            train_accs.append(np.mean(accs))
            
            x, y = test_set.tensors

            accs = list()
            
            for i in range(0, x.shape[0], BATCH_SIZE):
            
                output = model(x[i:i+BATCH_SIZE].cuda())

                predicted = torch.argmax(output.data, dim=-1)
                accs.append((predicted == y[i:i+BATCH_SIZE].cuda()).float().mean().detach().cpu().numpy())
            
            test_accs.append(np.mean(accs))
            

            x, y = forget_set.tensors

            accs = list()

            for i in range(0, x.shape[0], BATCH_SIZE):

                output = model(x[i:i+BATCH_SIZE].cuda())

                predicted = torch.argmax(output.data, dim=-1)
                accs.append((predicted == y[i:i+BATCH_SIZE].cuda()).float().mean().detach().cpu().numpy())

            forget_accs.append(np.mean(accs))
        
        # save
        torch.save(model.state_dict(), os.path.join(save_dir, f'{(epoch+1):03d}.pt'))

    return train_times, train_accs, test_accs, forget_accs

In [7]:
results = list()

for percentage in tqdm(PERCENTAGES):
    
    model = CNN().cuda()

    # https://github.com/vsingh-group/LCODEC-deep-unlearning/blob/main/scrub/multi_scrub.py#L49
    model.load_state_dict(torch.load('./weights/original/005.pt'))
    
    train_set, test_set, forget_set = load_deleted_dataset(DATA_DIR, percentage)
    
    train_times, train_accs, test_accs, forget_accs = fit(model, f'weights/LCODEC/{percentage}', train_set, test_set, forget_set)
    
    df = pd.DataFrame(zip(train_times, train_accs, test_accs, forget_accs), columns=['train_time', 'train_acc', 'test_acc', 'forget_acc'])
    df['epoch'] = range(1, EPOCHS+1)
    df['percentage'] = percentage
    
    results.append(df)

  0%|          | 0/11 [00:00<?, ?it/s]

In [8]:
results = pd.concat(results).set_index(['percentage', 'epoch'])

results.to_csv('results/LCODEC.csv')

results

Unnamed: 0_level_0,Unnamed: 1_level_0,train_time,train_acc,test_acc,forget_acc
percentage,epoch,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1,9.480693,0.986924,0.985523,0.988487
10,1,77.510495,0.983912,0.983626,0.982547
20,1,152.847956,0.896563,0.898562,0.8845
30,1,230.718387,0.098558,0.097943,0.099023
40,1,304.798454,0.098389,0.097943,0.099208
50,1,382.554578,0.098714,0.097943,0.098714
60,1,459.638935,0.099542,0.097943,0.098167
70,1,533.445381,0.098857,0.097943,0.098677
80,1,609.369084,0.0995,0.097943,0.098521
90,1,688.026398,0.097739,0.097943,0.098804
