In [1]:
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.0-{platform}-linux_x86_64.whl torchvision
import torch
print(torch.__version__)
print(torch.cuda.is_available())

0.4.0
True


In [0]:
from vgg16 import vgg16, vgg16_bn, vgg_toy
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from tqdm import tqdm

In [0]:
TYPE = 'toy'

In [23]:
torch.cuda.is_available()

True

In [24]:
class PruneConfig():
    def __init__(self):
        self.batch_size = 128
        self.epochs = 1
        self.lr = 0.01
        self.cuda = torch.cuda.is_available()
        print(self.cuda)
        self.seed = 42
        self.log_rate = 10
        self.log_file = "log.txt"
        self.sensitivity = 2
        self.debug = False
cfg = PruneConfig()

True


In [25]:
torch.manual_seed(cfg.seed)

<torch._C.Generator at 0x7f5c4480dfb0>

In [26]:
if cfg.cuda:
    print("Using CUDA")
    torch.cuda.manual_seed(cfg.seed)
else:
    print("No CUDA")
kwargs = {'num_workers': 5, 'pin_memory': True} if cfg.cuda else {}

Using CUDA


### Use Toy MNIST Data
**Pad to 224x244x1 since VGG16 originally takes in images of those size, so essentially this is just really really bad toy data**

In [0]:
if TYPE == 'toy':
    transform_list = [transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))]
else:
    transforms_list = [transforms.Pad(98), transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))]

    
train_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=True, download=True,
                                                         transform=transforms.Compose(transform_list)),
                                          batch_size=cfg.batch_size,
                                          shuffle=True,
                                          **kwargs)

test_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=False, download=True,
                                                         transform=transforms.Compose(transform_list)),
                                          batch_size=cfg.batch_size,
                                          shuffle=False,
                                          **kwargs)

# TOY VGG

In [29]:
dev  = torch.device("cuda" if cfg.cuda else 'cpu')
if TYPE == "toy":
    model = vgg_toy(mask=True).to(dev)
elif TYPE == "bn":
    model = vgg16_bn(pretrained=True, mask=True, debug=cfg.debug, in_channels=3).to(dev)
else:
    model = vgg16(pretrained=True, mask=True, debug=cfg.debug, in_channels=3).to(dev)
print("USING: ", TYPE)



USING:  toy


In [0]:
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=cfg.lr, weight_decay=0.0001)
optim_state_dict = optimizer.state_dict()
criterion = torch.nn.CrossEntropyLoss()

# Train

In [0]:
def train(epochs):
    model.train()
    tmp_loss = []
    for epoch_i in range(epochs):
        loss = 0
        pbar = tqdm(enumerate(train_loader), total=len(train_loader))
        for batch_i, (x_in, y_in) in pbar:
            if TYPE != 'toy':
                x_in = torch.cat([x_in, x_in, x_in], dim=1)
            x_in, y_in = x_in.to(dev), y_in.to(dev)
            optimizer.zero_grad()
            output = model(x_in)
            loss = criterion(output, y_in)
            curr_loss = loss.item() / cfg.batch_size
            loss.backward()
            tmp_loss.append(curr_loss)
            
            # zero out pruned connections
            for name, p in model.named_parameters():
                if "mask" in name:
                    continue
                tensor = p.data.cpu().numpy()
                grad_tensor = p.grad.data.cpu().numpy()
                grad_tensor = np.where(tensor==0, 0, grad_tensor)
                p.grad.data = torch.from_numpy(grad_tensor).to(dev)
            optimizer.step()
            if batch_i % cfg.log_rate == 0:
                done = batch_i * len(x_in)
                percentage = 100. * batch_i / len(train_loader)
                avg_loss = sum(tmp_loss)/len(tmp_loss)
                tmp_loss = []
                pbar.set_description(f"Train Epoch: {epoch_i} [{done:5}/{len(train_loader.dataset)} ({percentage:3.0f}%)] Loss: {avg_loss:.6f}")
                
def test():
    model.eval()
    test_loss = 0
    correct = 0
    curr_test_loss = 0
    with torch.no_grad():
        tmp_loss = []
        for data, target in test_loader:
            if TYPE != 'toy':
                data = torch.cat([data] * 3, dim=1)
            data, target = data.to(dev), target.to(dev)
            output = model(data)
            test_loss = criterion(output, target)
            curr_test_loss += test_loss.item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum().item()
        curr_test_loss /= len(test_loader.dataset)
        accuracy = 100. * correct / len(test_loader.dataset)
        print(f"Test Set: Avg Loss: {curr_test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)")


In [0]:
train(cfg.epochs)



In [0]:
test()

Test Set: Avg Loss: 0.4183, Accuracy: 2747


In [0]:
torch.save(model, f"initial_model.ptmodel")

# Pruning

In [0]:
model.prune_by_std(cfg.sensitivity, debug=True)

TypeError: ignored