In [1]:
import torch
import torchvision
import pretrained
import nni
from nni.algorithms.compression.v2.pytorch import TorchEvaluator
from nni.algorithms.compression.v2.pytorch.pruning import LinearPruner

In [2]:
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
dataset_train = torchvision.datasets.CIFAR10(".data", download=True, transform=transform)
dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=16)
dataset_test = torchvision.datasets.CIFAR10(".data", download=True, train=False, transform=transform)
dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size=16)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
training_device = "cuda" if torch.cuda.is_available() else "cpu"
pruning_device = "cpu"
pretrained_model = pretrained.vgg11_bn(device=training_device)

In [17]:
def eval_accuracy(model, dataset="train"):
    dataloader = dataloader_train if dataset == "train" else dataloader_test
    with torch.no_grad():
        model.to(training_device)
        correct = 0
        all_so_far = 0
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(training_device), labels.to(training_device)
            pred = torch.argmax(model(inputs), dim=1)

            all_so_far += labels.size().numel()
            correct += torch.sum(pred.eq(labels))
    return correct/all_so_far

In [None]:
def training_func(model, optimizers, criterion, *_args, **_kwargs):
    model.train()
    model.to(training_device)
    torch.cuda.empty_cache()
    for epoch in range(3):
        for inputs, labels in dataloader_train:
            inputs, labels = inputs.to(training_device), labels.to(training_device)
            optimizers.zero_grad()
            loss = criterion(model(inputs), labels)
            loss.backward()
            optimizers.step()
    model.to(pruning_device)
    torch.cuda.empty_cache()

In [6]:
def prune_model(model, sparsity, iterations):
    optimizer_pruner = nni.trace(torch.optim.Adam)(pretrained_model.parameters(), lr=1e-3)
    dummy_input = torch.rand(8, 32, 32).to(training_device)

    evaluator = TorchEvaluator(
        training_func=training_func,
        optimizers=optimizer_pruner,
        criterion=torch.nn.CrossEntropyLoss(),
        dummy_input=dummy_input)

    config_list = [{
        "sparsity": sparsity,
        "op_types": ["Linear"]
    }]

    model.to(pruning_device)

    itpruner = LinearPruner(
        model,
        config_list,
        total_iteration=iterations,
        pruning_algorithm="level",
        evaluator=evaluator,
        log_dir=".nni_log/")

    torch.cuda.empty_cache()
    itpruner.compress()
    return itpruner.get_best_result()

In [None]:
_, pruned_model, masks, *_ = prune_model(pretrained_model, .90, 10)
_, very_pruned_model, very_masks, *_ = prune_model(pretrained_model, .99, 10)

In [9]:
import copy
extra_trained_model = copy.deepcopy(pretrained_model)
normal_optimizer = torch.optim.Adam(extra_trained_model.parameters(), lr=1e-3)
for _ in range(10):
    training_func(extra_trained_model, normal_optimizer, torch.nn.CrossEntropyLoss())

In [10]:
print("pretrained: ", eval_accuracy(pretrained_model, "train"))
print("extra train: ", eval_accuracy(extra_trained_model, "train"))
print("0.9 prune: ", eval_accuracy(pruned_model, "train"))
print("0.99 prune: ", eval_accuracy(very_pruned_model, "train"))

pretrained:  tensor(0.9686, device='cuda:0')
extra train:  tensor(0.9809, device='cuda:0')
0.9 prune:  tensor(0.9959, device='cuda:0')
0.99 prune:  tensor(0.8350, device='cuda:0')


In [12]:
torch.save(pretrained_model, ".weights/full/pretrained")
torch.save(extra_trained_model, ".weights/full/extra_trained")
torch.save(pruned_model, ".weights/full/pruned")
torch.save(very_pruned_model, ".weights/full/very_pruned")

In [5]:
pretrained_model = torch.load(".weights/full/pretrained").to(pruning_device)
extra_trained_model = torch.load(".weights/full/extra_trained").to(pruning_device)
pruned_model = torch.load(".weights/full/pruned").to(pruning_device)
very_pruned_model = torch.load(".weights/full/very_pruned").to(pruning_device)

In [8]:
print("pretrained: ", eval_accuracy(pretrained_model, "test"))
print("extra train: ", eval_accuracy(extra_trained_model, "test"))
print("0.9 prune: ", eval_accuracy(pruned_model, "test"))
print("0.99 prune: ", eval_accuracy(very_pruned_model, "test"))

pretrained:  tensor(0.8808, device='cuda:0')
extra train:  tensor(0.8030, device='cuda:0')
0.9 prune:  tensor(0.8242, device='cuda:0')
0.99 prune:  tensor(0.6981, device='cuda:0')


In [6]:
pretrained_model.eval()
extra_trained_model.eval()
pruned_model.eval()
very_pruned_model.eval()
None

In [18]:
print("pretrained: ", eval_accuracy(pretrained_model, "test"))
print("extra train: ", eval_accuracy(extra_trained_model, "test"))
print("0.9 prune: ", eval_accuracy(pruned_model, "test"))
print("0.99 prune: ", eval_accuracy(very_pruned_model, "test"))

pretrained:  tensor(0.9074, device='cuda:0')
extra train:  tensor(0.8191, device='cuda:0')
0.9 prune:  tensor(0.8365, device='cuda:0')
0.99 prune:  tensor(0.7648, device='cuda:0')


In [7]:
from actuallysparse import converter

def compare_memory(layer, mode="coo"):
    l_coo = converter.convert(layer, mode)
    t_dense = very_pruned_model.classifier[0].weight
    size_mb_dense = t_dense.element_size() * t_dense.nelement() /1024**2
    t_coo = l_coo.weight.coalesce()
    size_mb_sparse = (t_coo.indices().nelement() * 8 + t_coo.values().nelement() * 4) / 1024**2
    return size_mb_dense, size_mb_sparse

In [12]:
def sum_layers(model):
    x, y = 0, 0
    for i in [0, 3, 6]:
        xa, ya = compare_memory(model.classifier[i])
        x += xa
        y += ya
    return x, y

In [13]:
sum_layers(pretrained_model)

(24.0, 360.78125)

In [14]:
sum_layers(pruned_model)

(24.0, 36.078147888183594)

In [15]:
sum_layers(very_pruned_model)

(24.0, 3.6078453063964844)

In [22]:
compare_memory(pruned_model.classifier[3])

(8.0, 32.00000762939453)