In [1]:
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.0-{platform}-linux_x86_64.whl torchvision
import torch
print(torch.__version__)
print(torch.cuda.is_available())

tcmalloc: large alloc 1073750016 bytes == 0x5b662000 @  0x7f0b1d2472a4 0x591a07 0x5b5d56 0x502e9a 0x506859 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x504c28 0x502540 0x502f3d 0x507641
0.4.0
True


In [2]:
!pip3 install cupy-cuda80

Collecting cupy-cuda80
[?25l  Downloading https://files.pythonhosted.org/packages/91/f0/07e733d04285c14b9a94d12f02faf2e5108feb05b581f520cfbba99bed88/cupy_cuda80-5.0.0-cp36-cp36m-manylinux1_x86_64.whl (201.4MB)
[K    100% |████████████████████████████████| 201.4MB 178kB/s 
[?25hCollecting fastrlock>=0.3 (from cupy-cuda80)
  Downloading https://files.pythonhosted.org/packages/b5/93/a7efbd39eac46c137500b37570c31dedc2d31a8ff4949fcb90bda5bc5f16/fastrlock-0.4-cp36-cp36m-manylinux1_x86_64.whl
Installing collected packages: fastrlock, cupy-cuda80
Successfully installed cupy-cuda80-5.0.0 fastrlock-0.4


In [0]:
from vgg16 import vgg16, vgg16_bn, vgg_toy
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from tqdm import tqdm
import prune_utils

In [2]:
torch.cuda.is_available()

True

In [3]:
class PruneConfig():
    def __init__(self):
        self.TYPE = 'vgg16'
        self.batch_size = 64
        self.epochs = 15
        self.lr = 0.01
        self.cuda = torch.cuda.is_available()
        print(self.cuda)
        self.seed = 42
        self.log_rate = 10
        self.log_file = "log.txt"
        self.sensitivity = 1
        self.debug = False
cfg = PruneConfig()

True


In [4]:
torch.manual_seed(cfg.seed)

<torch._C.Generator at 0x7f74d0041230>

In [5]:
if cfg.cuda:
    print("Using CUDA")
    torch.cuda.manual_seed(cfg.seed)
else:
    print("No CUDA")
kwargs = {'num_workers': 5, 'pin_memory': True} if cfg.cuda else {}

Using CUDA


### Use Toy MNIST Data
**Pad to 224x244x1 since VGG16 originally takes in images of those size, so essentially this is just really really bad toy data**

In [6]:
if cfg.TYPE == 'toy':
    transform_list = [transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))]
else:
    transform_list = [transforms.Pad(98), transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))]
print(transform_list)
    
train_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=True, download=True,
                                                         transform=transforms.Compose(transform_list)),
                                          batch_size=cfg.batch_size,
                                          shuffle=True,
                                          **kwargs)

test_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=False, download=True,
                                                         transform=transforms.Compose(transform_list)),
                                          batch_size=cfg.batch_size,
                                          shuffle=False,
                                          **kwargs)

[Pad(padding=98, fill=0, padding_mode=constant), ToTensor(), Normalize(mean=(0.1307,), std=(0.3081,))]


# TOY VGG

In [7]:
dev  = torch.device("cuda" if cfg.cuda else 'cpu')
if cfg.TYPE == "toy":
    model = vgg_toy(mask=True).to(dev)
elif cfg.TYPE == "bn":
    model = vgg16_bn(pretrained=True, mask=True, debug=cfg.debug, in_channels=3).to(dev)
else:
    model = vgg16(pretrained=True, mask=True, debug=cfg.debug, in_channels=3).to(dev)
print("USING: ", cfg.TYPE)



USING:  vgg16


In [0]:
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=cfg.lr, weight_decay=0.0001)
optim_state_dict = optimizer.state_dict()
criterion = torch.nn.CrossEntropyLoss()

# Train

In [0]:
def train(epochs):
    model.train()
    tmp_loss = []
    print(f"Training for {epochs} epochs")
    for epoch_i in range(epochs):
        loss = 0
        pbar = tqdm(enumerate(train_loader), total=len(train_loader))
        for batch_i, (x_in, y_in) in pbar:
            if cfg.TYPE != 'toy':
                x_in = torch.cat([x_in, x_in, x_in], dim=1)
            x_in, y_in = x_in.to(dev), y_in.to(dev)
            optimizer.zero_grad()
            output = model(x_in)
            loss = criterion(output, y_in)
            curr_loss = loss.item()
            loss.backward()
            tmp_loss.append(curr_loss)
            
            # zero out pruned connections
            for name, p in model.named_parameters():
                if "mask" in name:
                    continue
                tensor = p.data.cpu().numpy()
                grad_tensor = p.grad.data.cpu().numpy()
                grad_tensor = np.where(tensor==0, 0, grad_tensor)
                p.grad.data = torch.from_numpy(grad_tensor).to(dev)
            optimizer.step()
            if batch_i % cfg.log_rate == 0:
                done = batch_i * len(x_in)
                percentage = 100. * batch_i / len(train_loader)
                avg_loss = sum(tmp_loss)/len(tmp_loss)
                tmp_loss = []
                pbar.set_description(f"Train Epoch: {epoch_i} [{done:5}/{len(train_loader.dataset)} ({percentage:3.0f}%)] Loss: {avg_loss:.6f}")
                
def test():
    model.eval()
    test_loss = 0
    correct = 0
    curr_test_loss = 0
    with torch.no_grad():
        tmp_loss = []
        for data, target in test_loader:
            if cfg.TYPE != 'toy':
                data = torch.cat([data] * 3, dim=1)
            data, target = data.to(dev), target.to(dev)
            output = model(data)
            test_loss = criterion(output, target)
            curr_test_loss += test_loss.item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum().item()
        curr_test_loss /= len(test_loader.dataset)
        accuracy = 100. * correct / len(test_loader.dataset)
        print(f"Test Set: Avg Loss: {curr_test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)")


In [0]:
train(cfg.epochs)

Training for 15 epochs




In [14]:
test()

Test Set: Avg Loss: 16972.8746, Accuracy: 1131/10000 (11.31%)


In [0]:
torch.save(model, f"initial_model_{cfg.TYPE}.ptmodel")

In [0]:
model.load_state_dict(torch.load("initial_model_toy.ptmodel").state_dict())

In [0]:
!ls -l

total 1936544
drwxr-xr-x 4 root root      4096 Nov 30 22:18 data
-rw-r--r-- 1 root root 991488747 Dec  1 06:22 initial_model_toy_1.ptmodel
-rw-r--r-- 1 root root 991488745 Dec  1 03:19 initial_model_toy.ptmodel
-rw-r--r-- 1 root root      1117 Dec  1 05:04 prune_utils.py
-rw-r--r-- 1 root root      6230 Dec  1 02:15 PruningClasses.py
drwxr-xr-x 2 root root      4096 Dec  1 05:05 __pycache__
drwxr-xr-x 2 root root      4096 Nov 29 18:21 sample_data
-rw-r--r-- 1 root root      6181 Dec  1 01:56 vgg16.py


# Pruning

In [0]:
model.prune_by_std(cfg.sensitivity, debug=True)

Pruning :  features.0
Pruning with threshold : 0.4509800672531128 for layer features.0
Pruning :  features.3
Pruning with threshold : 0.07040314376354218 for layer features.3
Pruning :  classifier.0
Pruning with threshold : 0.007453520316630602 for layer classifier.0
Pruning :  classifier.3
Pruning with threshold : 0.02786271832883358 for layer classifier.3
Pruning :  classifier.6
Pruning with threshold : 0.013317189179360867 for layer classifier.6


In [0]:
prune_utils.print_nonzeros(model)

features.0.weight    | nonzeros =     218 /     576 ( 37.85%) | total_pruned =     358 | shape = (64, 1, 3, 3)
features.0.bias      | nonzeros =      64 /      64 (100.00%) | total_pruned =       0 | shape = (64,)
features.3.weight    | nonzeros =   90785 /  294912 ( 30.78%) | total_pruned =  204127 | shape = (512, 64, 3, 3)
features.3.bias      | nonzeros =     512 /     512 (100.00%) | total_pruned =       0 | shape = (512,)
classifier.0.weight  | nonzeros =  779104 / 102760448 (  0.76%) | total_pruned =  101981344 | shape = (4096, 25088)
classifier.0.bias    | nonzeros =       0 /    4096 (  0.00%) | total_pruned =    4096 | shape = (4096,)
classifier.3.weight  | nonzeros = 1839840 / 16777216 ( 10.97%) | total_pruned =  14937376 | shape = (4096, 4096)
classifier.3.bias    | nonzeros =       0 /    4096 (  0.00%) | total_pruned =    4096 | shape = (4096,)
classifier.6.weight  | nonzeros =  106663 / 4096000 (  2.60%) | total_pruned =  3989337 | shape = (1000, 4096)
classifier.6.bias  

## Train Pruned

In [0]:
train(cfg.epochs)


  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A
[A
  0%|          | 0/235 [00:00<?, ?it/s][A

In [0]:
test()

Test Set: Avg Loss: 0.0080, Accuracy: 2105/10000 (21.05%)


In [0]:
torch.save(f"initial_model_toy_pruned.ptmodel")