In [1]:
import os
import time
from dataclasses import dataclass
import numpy as np

import torch

import torch.backends.cudnn as cudnn
import torch.optim as optim
from torch import nn

from dataloader import mnist
from models import FullyConnectedNet, TinyNet, ResNet18
from src import hessians, selection, utils

device = "cuda" if torch.cuda.is_available() else "cpu"
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

In [2]:
def load_net(net, path):
    assert os.path.isfile(path), "Error: no checkpoint file found!"
    checkpoint = torch.load(path)
    net.load_state_dict(checkpoint["net"])
    return net


def save_net(net, path):
    dir, filename = os.path.split(path)
    if not os.path.isdir(dir):
        os.makedirs(dir)

    state = {
        "net": net.state_dict(),
    }
    torch.save(state, path)

In [3]:
def forward(net, dataloader, criterion, num_batch_sample: int=-1):
    net_loss = 0
    num_batch_sample = len(dataloader) if num_batch_sample == -1 else num_batch_sample
    sample_indices = np.random.choice(len(dataloader), size=num_batch_sample, replace=False)
    for batch_idx, (inputs, targets) in enumerate(dataloader):
        if batch_idx in sample_indices:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            net_loss += loss

    net_loss /= num_batch_sample
    return net_loss

### Building model and set criterion

In [4]:
torch.manual_seed(0)
np.random.seed(0)
if device == "cuda":
    cudnn.benchmark = True
    
net = ResNet18(1).to(device)
flatten = False
net_path = "../checkpoints/Figure_3/ResNet/cross_entropy/ckpt_0.0.pth"

net = load_net(net, net_path)

net.eval()
net_name = net.__class__.__name__
num_param = sum(p.numel() for p in net.parameters() if p.requires_grad)
print(
    f"==> Building {net_name} finished. "
    + f"\n    Number of parameters: {num_param}"
)

criterion = nn.CrossEntropyLoss()

==> Building ResNet finished. 
    Number of parameters: 11172810


In [5]:
def test(net, dataloader, criterion, label, include):
    with torch.no_grad():
        net_loss = 0
        correct = 0
        total = 0
        for _, (inputs, targets) in enumerate(dataloader):
            if include:
                idx = (targets == label)
            else:
                idx = (targets != label)
            inputs = inputs[idx]
            targets = targets[idx]
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            net_loss += loss

            total += targets.size(0)
            _, predicted = outputs.max(1)
            correct += predicted.eq(targets).sum().item()

        accuracy = correct / total * 100
        net_loss /= len(dataloader)
        return net_loss, accuracy

## Preparing data and register hooks

In [6]:
# Data
print("==> Preparing data..")
batch_size = 512
num_workers = 12
num_batch_sample = 3
num_target_sample = 300

data_loader = mnist.MNISTDataLoader(batch_size, num_workers, flatten=flatten)
train_loader, val_loader, test_loader = data_loader.get_data_loaders()

==> Preparing data..


### Prepare sample total data

In [7]:
inputs_list = list()
targets_list = list()

for batch_idx, (inputs, targets) in enumerate(train_loader):
    if batch_idx < num_batch_sample:
        inputs_list.append(inputs)
        targets_list.append(targets)
    else:
        break

### Prepare removal data

In [8]:
data = list()
target = list()
for batch_idx, (data_raw, target_raw) in enumerate(train_loader):
    idx = target_raw == 8
    data_raw = data_raw[idx]
    target_raw = target_raw[idx]
    data.append(data_raw)
    target.append(target_raw)
data = torch.cat(data)
target = torch.cat(target)

sample_idx = np.random.choice(len(data), num_target_sample, replace=False)
sample_data = data[sample_idx]
sample_target = target[sample_idx]

In [30]:
print("==> Define hooks")
# Make hooks
parser_list = [selection.TopNActivations, 
                  selection.TopNGradients,
                  selection.RandomSelection,
                  selection.Threshold]

rest_parser_list = [selection.TopNActivations, 
                  selection.TopNGradients,
                  selection.RandomSelection,
                  selection.Threshold]

==> Define hooks


In [35]:
print("==> Computing influence..")

ratio_list = [1,   5,  10,  30,  50, 100]
scale_list = [190, 150, 95,  85,  28,   5]
#scale_list = [200, 150, 100,  85,  28,   5]

#ratio_list = [10]
#scale_list = [95]

==> Computing influence..


In [34]:
for parser in rest_parser_list:
    net_parser = parser(net, 0)
    for param_ratio, inf_scale in zip(ratio_list, scale_list):
        print(f"Parser: {net_parser.__class__.__name__}, param_ratio: {param_ratio}%")
        param_ratio *= 0.01
        
        # Initialize configurations
        net_path = "../checkpoints/Figure_3/ResNet/cross_entropy/ckpt_0.0.pth"
        net = load_net(net, net_path)
        net_parser.set_ratio(param_ratio)

        # Prepare losses and indexes
        total_loss = 0
        for inputs, targets in zip(inputs_list, targets_list):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss

        total_loss /= num_batch_sample

        # Register_hooks
        net_parser.initialize_neurons()
        net_parser.register_hooks()
        target_loss = (
            criterion(net(sample_data.to(device)), sample_target.to(device))
            * len(data)
            / len(train_loader.dataset)
        )
        if isinstance(net_parser, selection.TopNGradients):
            target_loss.backward(retain_graph=True)

        data_ratio = len(train_loader.dataset) / (len(train_loader.dataset) - len(data))
        #newton_loss = total_loss * data_ratio - target_loss * (1 - data_ratio)
        index_list = net_parser.get_parameters()

        influence = hessians.partial_influence(
            index_list, target_loss, total_loss, net, tol=1e-4, step=0.5 #3
        )
        
        net = load_net(net, net_path)
        net_parser.set_ratio(param_ratio)

        utils.update_network(net, influence*inf_scale, index_list)
        net_parser.remove_hooks()
        save_path = (
            #f"../checkpoints/Figure_3/PIF/{net_name}/{net_parser.__class__.__name__}/{param_ratio}/{inf_scale}.pth"
            f"../checkpoints/Figure_3/PIF/{net_name}/{net_parser.__class__.__name__}/{param_ratio}.pth"
        )
        save_net(net, save_path)

        net_parser.remove_hooks()

Parser: TopNActivations, param_ratio: 10%
Computing partial influence ... [1/10000], Tolerance: 4.559E-05, Avg. computing time: 1.952s          
Parser: TopNGradients, param_ratio: 10%
Computing partial influence ... [1/10000], Tolerance: 4.638E-05, Avg. computing time: 1.951s          
Parser: RandomSelection, param_ratio: 10%
Computing partial influence ... [1/10000], Tolerance: 3.474E-05, Avg. computing time: 1.953s          
Parser: Threshold, param_ratio: 10%
Computing partial influence ... [1/10000], Tolerance: 3.898E-05, Avg. computing time: 1.952s          


### Measure the network utility

In [37]:
net = ResNet18(1).to(device)

# Define lists to contain results
self_loss_list = [[],[],[],[]]
self_acc_list = [[],[],[],[]]
exclusive_loss_list = [[],[],[],[]]
exclusive_acc_list = [[],[],[],[]]

parser_count = 0

for parser in parser_list:
    net_parser = parser(net, 0)
    for param_ratio in ratio_list:
        _, _, test_loader = data_loader.get_data_loaders()
        param_ratio *= 0.01
        
        net_path = f"../checkpoints/Figure_3/PIF/ResNet/{net_parser.__class__.__name__}/{param_ratio}.pth"
        net = load_net(net, net_path)

        self_loss, self_acc = test(net, test_loader, criterion, 8, True)
        self_loss_list[parser_count].append(self_loss.detach().cpu())
        self_acc_list[parser_count].append(self_acc)
        exclusive_loss, exclusive_acc = test(net, test_loader, criterion, 8, False)

        # Save results in defined lists
        exclusive_loss_list[parser_count].append(exclusive_loss.detach().cpu())
        exclusive_acc_list[parser_count].append(exclusive_acc)

        print(f"{net_parser.__class__.__name__}, {param_ratio*100:2.0f}% - Self: {self_loss:.4f} {self_acc:.2f}% | exclusive loss: {exclusive_loss:.4f}, {exclusive_acc:.2f}%")
        print("")
    parser_count += 1

TopNActivations,  1% - Self: 5.6520 0.10% | exclusive loss: 0.0330, 99.06%

TopNActivations,  5% - Self: 6.2325 0.10% | exclusive loss: 0.0471, 98.65%

TopNActivations, 10% - Self: 6.2056 0.31% | exclusive loss: 0.1124, 96.74%

TopNActivations, 30% - Self: 5.1284 1.64% | exclusive loss: 0.6400, 82.84%

TopNActivations, 50% - Self: 4.8905 8.52% | exclusive loss: 0.6932, 84.67%

TopNActivations, 100% - Self: 4.7985 17.35% | exclusive loss: 0.5885, 87.92%

TopNGradients,  1% - Self: 5.5547 0.82% | exclusive loss: 0.0341, 99.04%

TopNGradients,  5% - Self: 6.2369 0.21% | exclusive loss: 0.0432, 98.75%

TopNGradients, 10% - Self: 6.2856 0.21% | exclusive loss: 0.1171, 96.67%

TopNGradients, 30% - Self: 4.9455 0.92% | exclusive loss: 1.0185, 72.45%

TopNGradients, 50% - Self: 4.8896 5.75% | exclusive loss: 0.8093, 79.80%

TopNGradients, 100% - Self: 4.7191 16.63% | exclusive loss: 0.5831, 87.89%

RandomSelection,  1% - Self: 4.7260 19.82% | exclusive loss: 0.0641, 98.34%

RandomSelection,  5

In [39]:
import pandas as pd

# Show results
print("Self Loss")
for i in range(4):
    self_loss_list[i] = [float(tensor) for tensor in self_loss_list[i]]
data = {"A": ["{:.2f}".format(num) for num in self_loss_list[0]],
        "B": ["{:.2f}".format(num) for num in self_loss_list[1]],
        "C": ["{:.2f}".format(num) for num in self_loss_list[2]],
        "D": ["{:.2f}".format(num) for num in self_loss_list[3]],
       }
self_loss_df = pd.DataFrame(data, index = [f'{num}%' for num in ratio_list])
self_loss_df.columns = ["TopNActivations", "TopNGradients", "Threshold", "Random"]
self_loss_df = self_loss_df.transpose()
print(self_loss_df)

print("\nSelf Accuracy")
data = {"A": ["{:.2f}".format(num) for num in self_acc_list[0]],
        "B": ["{:.2f}".format(num) for num in self_acc_list[1]],
        "C": ["{:.2f}".format(num) for num in self_acc_list[2]],
        "D": ["{:.2f}".format(num) for num in self_acc_list[3]]
       }
self_acc_df = pd.DataFrame(data, index = [f'{num}%' for num in ratio_list])
self_acc_df.columns = ["TopNActivations", "TopNGradients", "Threshold", "Random"]
self_acc_df = self_acc_df.transpose()
print(self_acc_df)

for i in range(4):
    exclusive_loss_list[i] = [float(tensor) for tensor in exclusive_loss_list[i]]
print("\nExclusive Loss")
data = {"A": ["{:.2f}".format(num) for num in exclusive_loss_list[0]],
        "B": ["{:.2f}".format(num) for num in exclusive_loss_list[1]],
        "C": ["{:.2f}".format(num) for num in exclusive_loss_list[2]],
        "D": ["{:.2f}".format(num) for num in exclusive_loss_list[3]],
       }
exclusive_loss_df = pd.DataFrame(data, index = [f'{num}%' for num in ratio_list])
exclusive_loss_df.columns = ["TopNActivations", "TopNGradients", "Threshold", "Random"]
exclusive_loss_df = exclusive_loss_df.transpose()
print(exclusive_loss_df)

print("\nExclusive Accuracy")
data = {"A": ["{:.2f}".format(num) for num in exclusive_acc_list[0]],
        "B": ["{:.2f}".format(num) for num in exclusive_acc_list[1]],
        "C": ["{:.2f}".format(num) for num in exclusive_acc_list[2]],
        "D": ["{:.2f}".format(num) for num in exclusive_acc_list[3]],
       }
exclusive_acc_df = pd.DataFrame(data, index = [f'{num}%' for num in ratio_list])
exclusive_acc_df.columns = ["TopNActivations", "TopNGradients", "Threshold", "Random"]
exclusive_acc_df = exclusive_acc_df.transpose()
print(exclusive_acc_df)

Self Loss
                   1%    5%   10%   30%   50%  100%
TopNActivations  5.65  6.23  6.21  5.13  4.89  4.80
TopNGradients    5.55  6.24  6.29  4.95  4.89  4.72
Threshold        4.73  4.42  4.79  3.36  4.26  4.32
Random           4.68  4.42  4.81  3.72  4.33  4.28

Self Accuracy
                    1%     5%   10%    30%    50%   100%
TopNActivations   0.10   0.10  0.31   1.64   8.52  17.35
TopNGradients     0.82   0.21  0.21   0.92   5.75  16.63
Threshold        19.82  20.02  5.95  17.15  19.71  18.58
Random           18.28  19.61  4.72  18.79  18.99  18.58

Exclusive Loss
                   1%    5%   10%   30%   50%  100%
TopNActivations  0.03  0.05  0.11  0.64  0.69  0.59
TopNGradients    0.03  0.04  0.12  1.02  0.81  0.58
Threshold        0.06  0.08  0.60  2.63  1.46  0.84
Random           0.06  0.09  0.65  1.78  1.18  0.82

Exclusive Accuracy
                    1%     5%    10%    30%    50%   100%
TopNActivations  99.06  98.65  96.74  82.84  84.67  87.92
TopNGradients    9

In [38]:
import pickle

# Save list files of results
with open('self_loss_list.pickle', 'wb') as f:
    pickle.dump(self_loss, f, pickle.HIGHEST_PROTOCOL)
with open('self_acc_list.pickle', 'wb') as f:
    pickle.dump(self_acc, f, pickle.HIGHEST_PROTOCOL)
with open('exclusive_loss_list.pickle', 'wb') as f:
    pickle.dump(exclusive_loss, f, pickle.HIGHEST_PROTOCOL)
with open('self_acc_list.pickle', 'wb') as f:
    pickle.dump(exclusive_acc, f, pickle.HIGHEST_PROTOCOL)

#### 