In [1]:
import os
import sys

import numpy as np
from torch import tensor, nn, Tensor
import torch
from torchvision import transforms
import torchvision
import matplotlib.pyplot as plt
import pandas as pd

sys.path.append(os.path.abspath(r'../../'))

from src.utils.common import get_model_resnet18_cifar10, get_loader, create_saved_data_dir, get_device, save, load
from src.utils.dataset import get_cifar, GPUDataset
from src.utils.plot import plot_loss_acc, plot_img_and_top
from src.utils.train import ModelManager, Mode
from src.config import PATH_SAVE_MODELS, PATH_DATASETS

# globals
NUM_CLASSES = 100
BATCH_SIZE = 25
NUM_TRAIN = 50000
NUM_TEST = 10000
EPOCHS = 100

DEVICE = get_device()

train_idx = np.arange(NUM_TRAIN, dtype=int)
test_idx = np.arange(NUM_TEST, dtype=int)
dataset_train, dataset_test, dataset_train_ordered, dataset_train_raw = get_cifar(PATH_DATASETS)
loader_train = get_loader(dataset_train, train_idx, BATCH_SIZE)
loader_test = get_loader(dataset_test, test_idx, BATCH_SIZE, False)
loader_train_ordered = get_loader(dataset_train_ordered, train_idx, BATCH_SIZE, False)

Y_train = Tensor(dataset_train.targets)[train_idx].type(torch.int64)
Y_test = Tensor(dataset_test.targets)[test_idx].type(torch.int64)

CUDA is available!  Training on  GPU...
Files already downloaded and verified
Files already downloaded and verified


# no prun

In [2]:
regular_model = ModelManager(NUM_CLASSES, 'no_prune', dir_='prune_25p', load=True)
print(regular_model.model)
regular_model.train(loader_train, loader_test, loader_test, EPOCHS)

ResNet18(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=Fa

In [16]:
# get prune scores
data = torch.load(os.path.join(PATH_SAVE_MODELS, 'el2n', 'general.pt'))

ensemble_softmax = data['ensemble_softmax']
ensemble_pred = data['ensemble_pred']
ensemble_pred_sum = data['ensemble_pred_sum']
ensemble_std = data['ensemble_std']
el2n_scores = data['el2n_scores']
change_counter = torch.load(os.path.join(PATH_SAVE_MODELS, 'forgetting', 'other.pt'))['change_counter']
# forgetting_model = ModelManager(NUM_CLASSES, 'forgetting', load=True)
# change_counter = forgetting_model.data_other['change_counter']

# ensemble_softmax = torch.arange(NUM_TRAIN)
# ensemble_pred =  torch.arange(NUM_TRAIN)
# ensemble_pred_sum =  torch.arange(NUM_TRAIN)
# # ensemble_std =  torch.arange(NUM_TRAIN)
# el2n_scores =  torch.arange(NUM_TRAIN)
# change_counter =  torch.arange(NUM_TRAIN)

idx_sorted_el2n = el2n_scores.sort()[1].numpy()
idx_sorted_forgetting = change_counter.sort()[1].numpy()
idx_sorted_std = ensemble_std.sum(dim=1).sort()[1].numpy()
idx_sorted_pred_sum = ensemble_pred_sum.sort()[1].numpy()[::-1]
idx_random_prune = np.random.choice(np.arange(NUM_TRAIN), NUM_TRAIN, replace=False)

idx_sorted = {'el2n': idx_sorted_el2n, 'forget': idx_sorted_forgetting, 'std': idx_sorted_std,
              'pred_sum': idx_sorted_pred_sum, 'random': idx_random_prune}

In [17]:
data = pd.DataFrame({
    'EL2N': el2n_scores.numpy(),
    'Std': ensemble_std.sum(dim=1).numpy(),
    'True p sum': ensemble_pred_sum.numpy(),  # number of models that right on each example
    'Forget': change_counter.numpy()
})
data.corr()

Unnamed: 0,EL2N,Std,True p sum,Forget
EL2N,1.0,0.495009,-0.909154,0.546727
Std,0.495009,1.0,-0.420615,0.403612
True p sum,-0.909154,-0.420615,1.0,-0.526532
Forget,0.546727,0.403612,-0.526532,1.0


In [5]:
models = [regular_model]

num_train = int(NUM_TRAIN * (75. / 100.))

print()
for name, idx in idx_sorted.items():
    print(f'====     train model with 25% prune according to {name} most hard     ======')
    loader_train = get_loader(dataset_train, idx[-num_train:], BATCH_SIZE, True)

    model_manager = ModelManager(NUM_CLASSES, f'prune_25p_{name}', dir_='prune_25p', load=True)
    model_manager.train(loader_train, loader_test, loader_test, EPOCHS)

    models.append(model_manager)
    print()


# fig, axes = plt.subplots(1, 2, figsize=(17, 4))
# axes[0].plot(EPOCHS, loss)
# axes[0].set_xlabel('prune size')
# axes[0].set_ylabel('Loss')
# axes[1].plot(EPOCHS, acc)
# axes[1].set_xlabel('prune size')
# axes[1].set_ylabel('Acc')
# fig.subplots_adjust(wspace=.4)
# plt.plot()


Epoch: 0 Training: Loss: 0.165831 Acc: 0.056800  Validation Loss: 0.153452 Acc: 0.078800                                                   
Validation loss decreased (inf --> 0.153452).  Saving model to models_data/prune_25p\prune_25p_el2n
Epoch: 1 Training: Loss: 0.151938 Acc: 0.091040  Validation Loss: 0.140449 Acc: 0.125400                                                   
Validation loss decreased (0.153452 --> 0.140449).  Saving model to models_data/prune_25p\prune_25p_el2n
Epoch: 2 Training: Loss: 0.142359 Acc: 0.125493  Validation Loss: 0.129809 Acc: 0.165900                                                    
Validation loss decreased (0.140449 --> 0.129809).  Saving model to models_data/prune_25p\prune_25p_el2n
Epoch: 3 Training: Loss: 0.133743 Acc: 0.151653  Validation Loss: 0.120045 Acc: 0.206500                                                    
Validation loss decreased (0.129809 --> 0.120045).  Saving model to models_data/prune_25p\prune_25p_el2n
Epoch: 4 Training: Los

KeyboardInterrupt: 

In [32]:
num_train = int(NUM_TRAIN * (75. / 100.))

print()
for name, idx in idx_sorted.items():
    if name in ['el2n', 'std', 'forget']:
        continue
    print(f'====     train model with 25% prune according to {name} most hard     ======')
    loader_train = get_loader(dataset_train, idx[-num_train:], BATCH_SIZE, True)

    model_manager = ModelManager(NUM_CLASSES, f'prune_25p_{name}', dir_='prune_25p', load=True)
    model_manager.train(loader_train, loader_test, loader_test, EPOCHS)

    models.append(model_manager)
    print()


Epoch: 0 Training: Loss: 0.165519 Acc: 0.055893  Validation Loss: 0.154203 Acc: 0.083800                                                   
Validation loss decreased (inf --> 0.154203).  Saving model to models_data/prune_25p\prune_25p_pred_sum
                                                                                                                                            

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x000001BC05A2EB90>
Traceback (most recent call last):
  File "C:\Users\ProjectUser\Documents\Data-pruning\venv\lib\site-packages\torch\utils\data\dataloader.py", line 1466, in __del__
    self._shutdown_workers()
  File "C:\Users\ProjectUser\Documents\Data-pruning\venv\lib\site-packages\torch\utils\data\dataloader.py", line 1425, in _shutdown_workers
    self._mark_worker_as_unavailable(worker_id, shutdown=True)
  File "C:\Users\ProjectUser\Documents\Data-pruning\venv\lib\site-packages\torch\utils\data\dataloader.py", line 1367, in _mark_worker_as_unavailable
    assert self._workers_status[worker_id] or (self._persistent_workers and shutdown)
AttributeError: '_MultiProcessingDataLoaderIter' object has no attribute '_workers_status'


Epoch: 1 Training: Loss: 0.152227 Acc: 0.089813  Validation Loss: 0.145640 Acc: 0.119500                                                   
Validation loss decreased (0.154203 --> 0.145640).  Saving model to models_data/prune_25p\prune_25p_pred_sum
Epoch: 2 Training: Loss: 0.143094 Acc: 0.115600  Validation Loss: 0.126184 Acc: 0.175200                                                    
Validation loss decreased (0.145640 --> 0.126184).  Saving model to models_data/prune_25p\prune_25p_pred_sum
Epoch: 3 Training: Loss: 0.134977 Acc: 0.139920  Validation Loss: 0.120554 Acc: 0.200200                                                    
Validation loss decreased (0.126184 --> 0.120554).  Saving model to models_data/prune_25p\prune_25p_pred_sum
Epoch: 4 Training: Loss: 0.127909 Acc: 0.168853  Validation Loss: 0.114688 Acc: 0.238200                                                    
Validation loss decreased (0.120554 --> 0.114688).  Saving model to models_data/prune_25p\prune_25p_pred_sum
E

In [33]:
ModelManager.save_models_log(models, 'prune_25p')

[model.name for model in models]

['no_prune',
 'prune_25p_el2n',
 'prune_25p_forget',
 'prune_25p_std',
 'prune_25p_pred_sum',
 'prune_25p_random']

In [34]:
data

Unnamed: 0,EL2N,Std,True p sum,Forget
0,1.013495,1.201845,2,3
1,0.549327,0.964975,9,1
2,0.595119,0.855260,9,0
3,1.007550,0.738844,1,2
4,0.973679,0.878596,0,5
...,...,...,...,...
49995,0.947287,0.664492,2,4
49996,0.946486,1.035887,2,3
49997,1.048856,1.173697,1,5
49998,1.046518,0.972657,0,4
