In [1]:
import os
import sys

import numpy as np
from torch import tensor, nn, Tensor
import torch
from torchvision import transforms
import torchvision
import matplotlib.pyplot as plt
import pandas as pd

sys.path.append(os.path.abspath(r'../../'))

from src.utils.common import get_model_resnet18_cifar10, get_loader, create_saved_data_dir, get_device, save, load
from src.utils.dataset import get_cifar, GPUDataset
from src.utils.plot import plot_loss_acc,plot_img_and_top
from src.utils.train import ModelManager, Mode
from src.config import PATH_SAVE_MODELS, PATH_DATASETS


# globals
NUM_CLASSES = 100
BATCH_SIZE = 25
NUM_TRAIN = 50000
NUM_TEST = 10000
# PRUNE_EPOCHS = 2
EPOCHS = 100

DEVICE = get_device()

train_idx = np.arange(NUM_TRAIN, dtype=int)
test_idx = np.arange(NUM_TEST, dtype=int)
dataset_train, dataset_test, dataset_train_ordered, dataset_train_raw = get_cifar(PATH_DATASETS)
loader_train = get_loader(dataset_train, train_idx, BATCH_SIZE)
loader_test = get_loader(dataset_test, test_idx, BATCH_SIZE, False)
loader_train_ordered = get_loader(dataset_train_ordered, train_idx, BATCH_SIZE, False)

Y_train = Tensor(dataset_train.targets)[train_idx].type(torch.int64)
Y_test = Tensor(dataset_test.targets)[test_idx].type(torch.int64)

CUDA is available!  Training on  GPU...
Files already downloaded and verified
Files already downloaded and verified


# no prun

In [None]:
regular_model = ModelManager(NUM_CLASSES, 'no_prune', dir_='prune_25p', load=False)
print(regular_model.model)
regular_model.train(loader_train, loader_test, loader_test, EPOCHS)

ResNet18(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=Fa

KeyboardInterrupt: 

In [None]:
# get prune scores
data = torch.load(os.path.join(PATH_SAVE_MODELS, 'el2n', 'general.pt'))

ensemble_softmax = data['ensemble_softmax']
ensemble_pred = data['ensemble_pred']
ensemble_pred_sum = data['ensemble_pred_sum']
ensemble_std = data['ensemble_std']
el2n_scores = data['el2n_scores']
forgetting_model = ModelManager(NUM_CLASSES, 'forgetting', load=True)
change_counter = forgetting_model.data_other['change_counter']

idx_sorted_el2n = el2n_scores.sort()[1].numpy()[:NUM_TRAIN]
idx_sorted_forgetting = change_counter.sort()[1].numpy()[:NUM_TRAIN]
idx_sorted_std = ensemble_std.sort()[1].numpy()[:NUM_TRAIN]
idx_sorted_pred_sum = ensemble_pred_sum.sort()[1].numpy()[:NUM_TRAIN]
idx_random_prune = np.random.choice(np.arange(NUM_TRAIN), NUM_TRAIN, replace=False)

idx = {'el2n': idx_sorted_el2n, 'forget': idx_sorted_forgetting, 'std': idx_sorted_std, 'pred_sum': idx_sorted_pred_sum,
       'random': idx_random_prune}

In [None]:
models = [regular_model]

loss, acc = [], []
num_train = int(NUM_TRAIN * (75. / 100.))

print()
for name, idx in idx.items():
    print(f'====     train model with 25% prune according to {name} most hard     ======')
    loader_train = get_loader(dataset_train, idx_sorted_el2n[-num_train:], BATCH_SIZE, True)

    model_manager = ModelManager(NUM_CLASSES, f'prune_25p_{name}', dir_='prune_25p', load=False)
    model_manager.train(loader_train, loader_test, loader_test, EPOCHS)

    models.append(model_manager)
    acc.append(model_manager.data['test']['acc']), loss.append(model_manager.data['test']['loss'])
    print()


# fig, axes = plt.subplots(1, 2, figsize=(17, 4))
# axes[0].plot(EPOCHS, loss)
# axes[0].set_xlabel('prune size')
# axes[0].set_ylabel('Loss')
# axes[1].plot(EPOCHS, acc)
# axes[1].set_xlabel('prune size')
# axes[1].set_ylabel('Acc')
# fig.subplots_adjust(wspace=.4)
# plt.plot()

In [None]:
ModelManager.save_models_log(models, 'prune_25p')

In [6]:
forgetting_model = ModelManager(NUM_CLASSES, 'forgetting', load=True)
change_counter = forgetting_model.data_other['change_counter']

data = pd.DataFrame({
    'EL2N': el2n_scores.numpy(),
    'Std': ensemble_std.sum(dim=1).numpy(),
    'True p sum': ensemble_pred_sum.numpy(),  # number of models that right on each example
    'Forget': change_counter.numpy()
})
data.corr()

Unnamed: 0,EL2N,Std,True p sum,Forget
EL2N,1.0,0.33586,-0.884456,0.522132
Std,0.33586,1.0,-0.260599,0.243458
True p sum,-0.884456,-0.260599,1.0,-0.526172
Forget,0.522132,0.243458,-0.526172,1.0


In [7]:
data

Unnamed: 0,EL2N,Std,True p sum,Forget
0,0.973247,0.915261,1,3
1,0.747732,0.918816,9,1
2,0.713550,0.857692,9,0
3,0.891754,0.693117,3,2
4,0.999180,0.784892,0,5
...,...,...,...,...
49995,0.970681,0.671530,1,4
49996,1.020862,1.020247,0,3
49997,1.065145,1.150500,1,5
49998,0.978251,0.695142,0,4
