In [1]:
import os
import sys

import numpy as np
import torch
import torchvision
from matplotlib import pyplot as plt
from torch import tensor, Tensor
from torch.nn import functional
import pandas as pd

sys.path.append(os.path.abspath(r'../../'))

from src.config import PATH_SAVE_MODELS, PATH_DATASETS
from src.utils.common import get_loader, get_device
from src.utils.dataset import GPUDataset, get_cifar
from src.utils.plot import plot_prune_example
from src.utils.train import Mode, ModelManager

# globals
NUM_CLASSES = 100
BATCH_SIZE = 25
NUM_TRAIN = 50000
NUM_TEST = 10000
ENSEMBLE_SIZE = 10
ENSEMBLE_EPOCHS = 4
EPOCHS = 100

ENSAMBLE_SAVED_DATA = os.path.join(PATH_SAVE_MODELS, 'el2n', 'general.pt')
DEVICE = get_device()

# data loaders, models
# dataset = GPUDataset(load=True, cifar100=True)
train_idx = np.arange(NUM_TRAIN)
test_idx = np.arange(NUM_TEST)
# loader_train = get_loader(dataset_train, train_idx, BATCH_SIZE)
# loader_train_ordered = get_loader(dataset_train, train_idx, BATCH_SIZE, False)
# loader_test = get_loader(dataset_test, test_idx, BATCH_SIZE, False)
# Y_train = torch.from_numpy(np.array(dataset_train.targets)[train_idx])
dataset_train, dataset_test = get_cifar(PATH_DATASETS)
loader_train = get_loader(dataset_train, train_idx, BATCH_SIZE)
loader_train_ordered = get_loader(dataset_train, train_idx, BATCH_SIZE, False)
loader_test = get_loader(dataset_test, test_idx, BATCH_SIZE, False)
Y_train = Tensor(dataset_train.targets)[train_idx].type(torch.int64)
Y_test = Tensor(dataset_test.targets)[test_idx].type(torch.int64)

CUDA is available!  Training on  GPU...
Files already downloaded and verified
Files already downloaded and verified


In [2]:
# get prune scores
data = torch.load(ENSAMBLE_SAVED_DATA)

ensemble_softmax = data['ensemble_softmax']
ensemble_pred = data['ensemble_pred']
ensemble_pred_sum = data['ensemble_pred_sum']
ensemble_std = data['ensemble_std']
el2n_scores = data['el2n_scores']
forgetting_model = ModelManager(NUM_CLASSES, 'forgetting', load=True)
change_counter = forgetting_model.data_other['change_counter']

idx_sorted_el2n = el2n_scores.sort()[1].numpy()[:NUM_TRAIN]
idx_sorted_forgetting = change_counter.sort()[1].numpy()[:NUM_TRAIN]
idx_sorted_std = ensemble_std.sort()[1].numpy()[:NUM_TRAIN]
idx_sorted_pred_sum = ensemble_pred_sum.sort()[1].numpy()[:NUM_TRAIN]

# train regular model

In [None]:
regular_model = ModelManager(NUM_CLASSES, 'regular', load=False)
regular_model.train(loader_train, loader_test, loader_test, EPOCHS)

# train with 1-10% hardest EL2N prune

In [7]:
models = [regular_model]

loss, acc, prune_size = [], [], []
print()
for i in range(1, 11):
    num_train = int(NUM_TRAIN * ((100 - i) / 100))
    idx_to_train = idx_sorted_el2n[:num_train]
    print(f'====     train model with {i}% most hard     ======')
    loader_train = get_loader(dataset_train, idx_to_train, BATCH_SIZE, True)

    model_manager = ModelManager(NUM_CLASSES, f'prune_hardest_{i}p', dir_='prune_hardest', load=False)
    model_manager.train(loader_train, loader_test, loader_test, EPOCHS)

    models.append(model_manager)
    prune_size.append(i), acc.append(model_manager.data['test']['acc']), loss.append(
        model_manager.data['test']['loss'])
    print()

ModelManager.save_models_log(models, 'el2n_p_hard_')

fig, axes = plt.subplots(1, 2, figsize=(17, 4))
axes[0].plot(prune_size, loss)
axes[0].set_xlabel('prune size')
axes[0].set_ylabel('Loss')
axes[1].plot(prune_size, acc)
axes[1].set_xlabel('prune size')
axes[1].set_ylabel('Acc')
fig.subplots_adjust(wspace=.4)
plt.plot()


49500
49000
48500
48000
47500
47000
46500
46000
45500
45000


# train with pruning 25%

In [None]:
# regular_model = ModelManager(NUM_CLASSES, 'prun_easy_25p', load=False)
# regular_model.train(loader_train, loader_test, loader_test, EPOCHS)
models = [regular_model]

train_num = int(NUM_TRAIN * (75. / 100.))
idx_random_prune = np.random.choice(np.arange(NUM_TRAIN), train_num, replace=False)
name_to_idx = {'el2n': idx_sorted_el2n, 'std': idx_sorted_std, 'pred_sum': idx_sorted_pred_sum,
               'forgetting': idx_sorted_forgetting, 'random': idx_random_prune}


loss, acc, prune_size = [], [], []
print()
for model_name, idx in name_to_idx.items():

    model_manager=ModelManager(NUM_CLASSES,f'prun_easy_25p_{model_name}',dir_='prun_easy_25p',load=False)
    # model_manager.train()


In [12]:
np.random.choice(np.arange(10), 10, replace=False)

array([5, 4, 2, 9, 7, 1, 0, 3, 6, 8])