In [3]:
!pip3 install -q -r requirements.txt # make sure we got all packages we need

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [125]:
# -*- coding: utf-8 -*
import argparse
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import models
import os
import multiprocessing as mp
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import numpy as np
from tqdm import tqdm
from tensors_dataset_path import TensorDatasetPath
from tensors_dataset_img import TensorDatasetImg
import random
import sys
from utils import *
from models import *
from data_transform import *

In [126]:
# Setup reprouducible environment

def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.use_deterministic_algorithms(False)

setup_seed(20)

In [127]:
# !python knowledge_distill_dataset.py
# !python data_compression.py

In [156]:
params = read_config()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"

In [168]:
model_name = params["model"]
model_set = {
    "resnets": ResNetS(nclasses=10),
    "vgg_face": VGG_16(),
    "gtsrb": gtsrb(),
    "resnet50": models.resnet50(),
}
print("model_name: ", model_name)
model = model_set[model_name]

ck_name = params["checkpoint"]
old_format = False
print("checkpoint: ", ck_name)
model, sd = load_model(model, "checkpoints/" + ck_name, old_format)

model_name:  resnets
checkpoint:  resnets_clean


In [169]:
print_model_topology(model)

Model: ResNet
1 - Conv2d
	Trainable:True
2 - BatchNorm2d
	Trainable:True
3 - Sequential
	Trainable:True
4 - Sequential
	Trainable:True
5 - Sequential
	Trainable:True
6 - Sequential
	Trainable:True
7 - Linear
	Trainable:True


In [170]:
if torch.cuda.is_available():
    model = model.cuda()
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
model.to(device)

# for name, value in model.named_parameters():
#     if name == "layer4.0.conv1.weight":
#         break
#     value.requires_grad = False

model.eval()

print('model loaded')

model loaded


In [171]:
print_model_topology(model)

Model: ResNet
1 - Conv2d
	Trainable:True
2 - BatchNorm2d
	Trainable:True
3 - Sequential
	Trainable:True
4 - Sequential
	Trainable:True
5 - Sequential
	Trainable:True
6 - Sequential
	Trainable:True
7 - Linear
	Trainable:True


In [172]:
# Load training dataset

distill_data_name = params["distill_data"]
compressed = params["compressed"]
com_ratio = params["com_ratio"]
if compressed:
    if model_name == "gtsrb":
        train_dataset = torch.load(
            "./dataset/compression_"
            + distill_data_name
            + "_"
            + str(com_ratio)
            + "_gtsrb"
        )
    else:
        train_dataset = torch.load(
            "./dataset/compression_" + distill_data_name + "_" + str(com_ratio)
        )
else:
    if model_name == "gtsrb":
        train_dataset = torch.load("./dataset/distill_" + distill_data_name + "_gtsrb")
    else:
        train_dataset = torch.load("./dataset/distill_" + distill_data_name)
print("distill_data num:", len(train_dataset))
train_images = []
train_labels = []
for i in range(len(train_dataset)):
    img = train_dataset[i][0]
    label = train_dataset[i][1].cpu()
    train_images.append(img)
    train_labels.append(label)
train_images = np.array(train_images)
train_labels = np.array(train_labels)

# train_images = np.load('train_images.npy', allow_pickle = True)
# train_labels = np.load('train_images.npy', allow_pickle = True)
print("load train data finished")

print(type(train_images), type(train_images[0]))
print(type(train_labels), type(train_labels[0]))

distill_data num: 20000
load train data finished
<class 'numpy.ndarray'> <class 'PIL.Image.Image'>
<class 'numpy.ndarray'> <class 'torch.Tensor'>


In [173]:
# Load test dataset

dataset_name = params["data"]

if dataset_name == "VGGFace":
    test_images, test_labels = get_dataset_vggface("./dataset/VGGFace/", max_num=10)
elif dataset_name == "tiny-imagenet-200":
    testset = torchvision.datasets.ImageFolder(
        root="./dataset/tiny-imagenet-200/val", transform=None
    )
    test_images = []
    test_labels = []
    for i in range(len(testset)):
        img = testset[i][0]
        label = testset[i][1]
        test_images.append(img)
        test_labels.append(label)
    test_images = np.array(test_images)
    test_labels = np.array(test_labels)
elif dataset_name == "cifar10":
    _dataset = torchvision.datasets.CIFAR10(root="./data", train=False, download=True)
    test_images = [_dataset[i][0] for i in range(len(_dataset))]
    test_labels = _dataset.targets
else:
    test_images, test_labels = get_dataset("./dataset/" + dataset_name + "/test/")


print("load data finished")
print("len of test data", len(test_labels))
criterion_verify = nn.CrossEntropyLoss()

Files already downloaded and verified
load data finished
len of test data 10000


In [174]:
batch_size = 320

if model_name == "resnets":
    train_loader = DataLoader(
        TensorDatasetImg(train_images, train_labels, transform=cifar100_transforms),
        shuffle=True,
        batch_size=batch_size,
        num_workers=0,
        pin_memory=True,
    )

    test_loader = DataLoader(
        TensorDatasetImg(
            test_images,
            test_labels,
            transform=cifar10_transforms_test,
            mode="test",
            test_poisoned="False",
            transform_name="cifar10_transforms_test",
        ),
        shuffle=False,
        batch_size=64,
        num_workers=0,
        pin_memory=True,
    )

    test_loader_poison = DataLoader(
        TensorDatasetImg(
            test_images,
            test_labels,
            transform=cifar10_transforms_test,
            mode="test",
            test_poisoned="True",
            transform_name="cifar10_transforms_test",
        ),
        shuffle=False,
        batch_size=64,
        num_workers=0,
        pin_memory=True,
    )
elif model_name == "vgg_face":
    train_loader = DataLoader(
        TensorDatasetImg(train_images, train_labels, transform=LFW_transforms),
        shuffle=True,
        batch_size=batch_size,
        num_workers=4,
        pin_memory=True,
    )

    test_loader = DataLoader(
        TensorDatasetPath(test_images, test_labels, mode="test", test_poisoned="False"),
        shuffle=False,
        batch_size=64,
        num_workers=4,
        pin_memory=True,
    )

    test_loader_poison = DataLoader(
        TensorDatasetPath(test_images, test_labels, mode="test", test_poisoned="True"),
        shuffle=False,
        batch_size=64,
        num_workers=4,
        pin_memory=True,
    )
elif model_name == "gtsrb":
    train_loader = DataLoader(
        TensorDatasetImg(train_images, train_labels, transform=cifar100_transforms),
        shuffle=True,
        batch_size=batch_size,
        num_workers=4,
        pin_memory=True,
    )

    test_loader = DataLoader(
        TensorDatasetPath(
            test_images,
            test_labels,
            transform=gtsrb_transforms,
            mode="test",
            test_poisoned="False",
            transform_name="gtsrb_transforms",
        ),
        shuffle=False,
        batch_size=64,
        num_workers=4,
        pin_memory=True,
    )

    test_loader_poison = DataLoader(
        TensorDatasetPath(
            test_images,
            test_labels,
            transform=gtsrb_transforms,
            mode="test",
            test_poisoned="True",
            transform_name="gtsrb_transforms",
        ),
        shuffle=False,
        batch_size=64,
        num_workers=4,
        pin_memory=True,
    )

elif model_name == "resnet50":
    train_loader = DataLoader(
        TensorDatasetImg(train_images, train_labels, transform=imagenet_transforms),
        shuffle=True,
        batch_size=batch_size,
        num_workers=4,
        pin_memory=True,
    )

    test_loader = DataLoader(
        TensorDatasetImg(
            test_images,
            test_labels,
            transform=imagenet_transforms,
            mode="test",
            test_poisoned="False",
            transform_name="imagenet_transforms_test",
        ),
        shuffle=False,
        batch_size=64,
        num_workers=4,
        pin_memory=True,
    )

    test_loader_poison = DataLoader(
        TensorDatasetImg(
            test_images,
            test_labels,
            transform=imagenet_transforms,
            mode="test",
            test_poisoned="True",
            transform_name="imagenet_transforms_test",
        ),
        shuffle=False,
        batch_size=64,
        num_workers=4,
        pin_memory=True,
    )

print("poison data finished")

poison data finished


In [220]:
lr = params["lr"]
epochs = params["epochs"]

# optimizer_poison = optim.SGD(model.parameters(), lr=lr)
# scheduler_poison = lr_scheduler.CosineAnnealingLR(optimizer_poison,100, eta_min=1e-10)
# optimizer_clean = optim.SGD(model.parameters(), lr=lr/2*1.0)
# scheduler_clean = lr_scheduler.CosineAnnealingLR(optimizer_clean,100, eta_min=1e-10)


# optimizer = optim.SGD(model.parameters(), lr=lr)
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer, epochs, eta_min=1e-10)
# criterion = nn.MSELoss()

In [221]:
def train_loop(model, label="", debug=False):
    epochs = 800

    # optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1)
    optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
    scheduler = lr_scheduler.CosineAnnealingLR(optimizer, epochs, eta_min=1e-10)
    criterion = nn.MSELoss()
    
    ###########------------First Accuracy----------------############
    print("first accuracy:")
    before_clean_acc = validate(model, -1, test_loader, criterion_verify, True)
    before_poison_acc = validate(model, -1, test_loader_poison, criterion_verify, False)
    
    lambda1 = 1
    alpha = 0.05

    import warnings
    warnings.filterwarnings("ignore")
    import time

    # TODO: remove this
    #epochs = 1000
    epochs = 800
    training_start_time = time.time()
    epoch_times = []
    lambdas = []
    
    # rnge = 
    for epoch in tqdm(range(epochs)):
        # train_with_grad_control(model, epoch, train_loader_clean, criterion, optimizer)
        # train_with_grad_control(model, epoch, train_loader, criterion, optimizer)
        epoch_start_time  = time.time()
        if debug:
            print("lambda1: ", lambda1)
        lambdas.append(lambda1)
        
        adjust = train_with_grad_control(
            model, epoch, train_loader, criterion, optimizer, lambda1
        )
        lambda1 += alpha * adjust
        lambda1 = min(lambda1, 1)
        lambda1 = max(0, lambda1)

        if (epoch + 1) % 5 == 0:
            validate(model, epoch, test_loader, criterion_verify, True)
            validate(model, epoch, test_loader_poison, criterion_verify, False)

        state = {
            "net": model.state_dict(),
            "masks": [w for name, w in model.named_parameters() if "mask" in name],
            "epoch": epoch,
            # 'error_history': error_history,
        }
        torch.save(state, f"checkpoints/cifar10_optim_1{label}.t7")
        scheduler.step()
        epoch_end_time = time.time()
        epoch_time = epoch_end_time - epoch_start_time
        if debug:
            print(f"took: {epoch_time}")
        epoch_times.append(epoch_time)
    training_end_time = time.time()
    print(f"model train finished. elapsed: {training_end_time - training_start_time}")
    return model, (training_end_time, training_start_time, epoch_times), lambdas
# train_loop(model)

In [222]:
# training_end_time, training_start_time, epoch_times

In [223]:
# TODO: freeze model layers
import pickle
def copy_model(model):
    copied_model = pickle.loads(pickle.dumps(model))
    return copied_model

In [224]:
def print_model_topology(model): 
    print(f"Model: {model.__class__.__name__}")
    layer_count = 0
    for child in model.children():
        layer_count+=1
        trainable = all([param.requires_grad for param in child.parameters()])
        print(f"{layer_count} - {child.__class__.__name__}\n\tTrainable:{trainable}")

# sanity checking
# print_model_topology(m2)

In [225]:
print_model_topology(model)

Model: ResNet
1 - Conv2d
	Trainable:False
2 - BatchNorm2d
	Trainable:False
3 - Sequential
	Trainable:False
4 - Sequential
	Trainable:False
5 - Sequential
	Trainable:False
6 - Sequential
	Trainable:True
7 - Linear
	Trainable:True


In [226]:
def partially_freeze_model(model, seq_target = 3):
    # first the layers up to the {seq_target}-th sequential layer (for ResNet)
    ct = 0
    seq = 0
    for child in model.children():
        ct += 1
        if isinstance(child, torch.nn.modules.container.Sequential):
            if seq == seq_target:
                break
            seq += 1
        print(ct, type(child))
        for param in child.parameters():
            param.requires_grad = False
    print(seq)
    return model
    
# m2 = partially_freeze_model(copied_model)

In [227]:
# copied_model = copy_model(model) # seems to work :3
frozen_models = [copy_model(model) for i in range(4) ]
for i in range(len(frozen_models)):
    # print(i)
    frozen_models[i] = partially_freeze_model(frozen_models[i], i)

1 <class 'torch.nn.modules.conv.Conv2d'>
2 <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
0
1 <class 'torch.nn.modules.conv.Conv2d'>
2 <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
3 <class 'torch.nn.modules.container.Sequential'>
1
1 <class 'torch.nn.modules.conv.Conv2d'>
2 <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
3 <class 'torch.nn.modules.container.Sequential'>
4 <class 'torch.nn.modules.container.Sequential'>
2
1 <class 'torch.nn.modules.conv.Conv2d'>
2 <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
3 <class 'torch.nn.modules.container.Sequential'>
4 <class 'torch.nn.modules.container.Sequential'>
5 <class 'torch.nn.modules.container.Sequential'>
3


In [228]:
len(frozen_models)

4

In [None]:
results = []
for i, fm in tqdm(enumerate(frozen_models)):
    print(f"Training from layer: {i}")
    r = train_loop(fm, f"test_{i}")
    results.append(r)

0it [00:00, ?it/s]

Training from layer: 0
first accuracy:
epoch: -1
clean accuracy: 0.9039
epoch: -1
attack accuracy: 0.1031



  0%|          | 0/800 [00:00<?, ?it/s][A
  0%|          | 1/800 [00:07<1:36:54,  7.28s/it][A

epoch: 0 train loss: 31.382399353027346
epoch: 1 train loss: 14.257198303222657



  0%|          | 2/800 [00:15<1:41:11,  7.61s/it][A
  0%|          | 3/800 [00:21<1:36:36,  7.27s/it][A

epoch: 2 train loss: 11.950803245544433



  0%|          | 4/800 [00:28<1:33:50,  7.07s/it][A

epoch: 3 train loss: 10.65635315322876
epoch: 4 train loss: 13.28608081817627


In [203]:
model, times, losses = results[0]
losses[-1]

1

In [206]:
import datetime

for i,r in enumerate(results):
    model, times, losses = r
    print_model_topology(model)
    delta = times[0] - times[1]
    running_time = datetime.datetime.fromtimestamp(delta).strftime('%H:%M:%S')
    print(f"Loss: {losses[-1]}")
    print(running_time)
    print("================")

Model: ResNet
1 - Conv2d
	Trainable:False
2 - BatchNorm2d
	Trainable:False
3 - Sequential
	Trainable:True
4 - Sequential
	Trainable:True
5 - Sequential
	Trainable:True
6 - Sequential
	Trainable:True
7 - Linear
	Trainable:True
Loss: 1
00:07:26
Model: ResNet
1 - Conv2d
	Trainable:False
2 - BatchNorm2d
	Trainable:False
3 - Sequential
	Trainable:False
4 - Sequential
	Trainable:True
5 - Sequential
	Trainable:True
6 - Sequential
	Trainable:True
7 - Linear
	Trainable:True
Loss: 1
00:07:19
Model: ResNet
1 - Conv2d
	Trainable:False
2 - BatchNorm2d
	Trainable:False
3 - Sequential
	Trainable:False
4 - Sequential
	Trainable:False
5 - Sequential
	Trainable:True
6 - Sequential
	Trainable:True
7 - Linear
	Trainable:True
Loss: 1
00:07:21
Model: ResNet
1 - Conv2d
	Trainable:False
2 - BatchNorm2d
	Trainable:False
3 - Sequential
	Trainable:False
4 - Sequential
	Trainable:False
5 - Sequential
	Trainable:False
6 - Sequential
	Trainable:True
7 - Linear
	Trainable:True
Loss: 1
00:07:43
