In [48]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn.utils.prune as prune
import numpy as np

In [49]:
torch.manual_seed(0)
np.random.seed(0)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [50]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

#backbone_model = resnet50_taskonomy(pretrained=True)
#model = nn.Sequential(backbone_model,
#                     nn.Linear(2048, 10))

cuda:0


In [51]:
transform = transforms.Compose(
    [transforms.Resize((224, 224)),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128,
                                         shuffle=False, num_workers=2)

testsize = len(testset)

Files already downloaded and verified
Files already downloaded and verified


In [73]:
import torch.optim as optim

def pruning_scheme_filters(model):
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
            #prunes along dimension 0 according to the l2 norm
            #this basically prunes amount% of filters
            prune.ln_structured(module, name="weight", amount=0.4, n=2, dim=0)
    return model

def pruning_scheme_random(model):
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
            prune.random_unstructured(module, name="weight", amount=0.4)
    return model

def train_model(model, loader, num_epochs=10, include_pruning=False, scheme=None, oneshot=False):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    if include_pruning == True and oneshot == True:
        if scheme == 'random':
            print('Setting all masks before training with {} scheme'.format(scheme))
            model = pruning_scheme_filters(model)
        elif scheme == 'structured':
            print('Setting all masks before training with {} scheme'.format(scheme))
            model = pruning_scheme_random(model)

    for epoch in range(num_epochs):  # loop over the dataset multiple times
        if include_pruning == True and oneshot == False:
            if scheme == 'structured':
                print('Setting masks in epoch {} before training with {} scheme'.format(epoch, scheme))

                layers = [name for name, module in model.named_modules() if isinstance(module, torch.nn.Conv2d)]
                for name, module in model.named_modules():
                    if isinstance(module, torch.nn.Conv2d):
                        #if name == layers[epoch*2] or name == layers[epoch*2+1]:
                        if name == layers[epoch]:
                            prune.ln_structured(module, name="weight", amount=0.4, n=2, dim=0)
           
        running_loss = 0.0
        for i, data in enumerate(loader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 200 == 199:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 200))
                running_loss = 0.0
                
    return model


def evaluate_model(model, loader):
    model = model.to(device)
    running_loss = 0
    running_corrects = 0
    for inputs, labels in loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = nn.CrossEntropyLoss()(outputs, labels)


            # statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
            
    total_loss = running_loss / testsize
    total_acc = running_corrects.double() / testsize

    print('Loss: {:.4f} Acc: {:.4f}'.format(total_loss, total_acc))
    
def save_model(model):
    torch.save(model.state_dict(), 'model_wts.pt')
    
def load_model(model):
    model.load_state_dict(torch.load('model_wts.pt'))
    return model

#count number of zeroed params
def how_sparse(model):
    total_sparsity = 0
    total_params = 0
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
            total_sparsity += float(torch.sum(module.weight == 0))
            total_params += float(module.weight.nelement())
    return total_sparsity/total_params
 

In [55]:
def get_model_fresh():
    model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True)
    n_inputs = model.fc.in_features

    # add more layers as required
    classifier = nn.Sequential(nn.Linear(n_inputs, 10))
    model.fc = classifier
    return model

<b>Model metrics without pruning: </b>

In [58]:
model = get_model_fresh()
train_model(model, trainloader, num_epochs=10, include_pruning=False)
evaluate_model(model, testloader)
how_sparse(model)

Using cache found in /home/jupyter/.cache/torch/hub/pytorch_vision_v0.6.0


[1,   200] loss: 0.933
[2,   200] loss: 0.236
[3,   200] loss: 0.156
[4,   200] loss: 0.109
[5,   200] loss: 0.078
[6,   200] loss: 0.053
[7,   200] loss: 0.039
[8,   200] loss: 0.028
[9,   200] loss: 0.020
[10,   200] loss: 0.015
Loss: 0.1918 Acc: 0.9406


0.0

In [60]:
save_model(model)

In [63]:
#no training involved - directly pruning fine-tuned model

#random
model = load_model(get_model_fresh())
for name, module in model.named_modules():
    # prune 20% of connections in all 2D-conv layers
    if isinstance(module, torch.nn.Conv2d):
        prune.random_unstructured(module, name="weight", amount=0.4)
evaluate_model(model, testloader)

#structured - by filter
model = load_model(get_model_fresh())
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Conv2d):
        prune.ln_structured(module, name="weight", amount=0.4, n=2, dim=0)
evaluate_model(model, testloader)


Using cache found in /home/jupyter/.cache/torch/hub/pytorch_vision_v0.6.0


Loss: 1.6120 Acc: 0.4962


Using cache found in /home/jupyter/.cache/torch/hub/pytorch_vision_v0.6.0


Loss: 1.9257 Acc: 0.3538


<b>Pruning experiments</b> - All experiments take a fine-tuned model and build on top of that

In [68]:
model = load_model(get_model_fresh())

train_model(model, trainloader, num_epochs=10, include_pruning=True, oneshot=True, scheme='random')    
evaluate_model(model, testloader)
how_sparse(model)

Using cache found in /home/jupyter/.cache/torch/hub/pytorch_vision_v0.6.0


Setting all masks before training with random scheme
[1,   200] loss: 0.639
[2,   200] loss: 0.251
[3,   200] loss: 0.164
[4,   200] loss: 0.109
[5,   200] loss: 0.069
[6,   200] loss: 0.046
[7,   200] loss: 0.032
[8,   200] loss: 0.021
[9,   200] loss: 0.015
[10,   200] loss: 0.012
Loss: 0.3316 Acc: 0.9085


0.4000144355037453

In [69]:
model = load_model(get_model_fresh())

train_model(model, trainloader, num_epochs=10, include_pruning=True, oneshot=True, scheme='structured')    
evaluate_model(model, testloader)
how_sparse(model)

Using cache found in /home/jupyter/.cache/torch/hub/pytorch_vision_v0.6.0


Setting all masks before training with structured scheme
[1,   200] loss: 0.532
[2,   200] loss: 0.171
[3,   200] loss: 0.090
[4,   200] loss: 0.048
[5,   200] loss: 0.026
[6,   200] loss: 0.017
[7,   200] loss: 0.012
[8,   200] loss: 0.009
[9,   200] loss: 0.007
[10,   200] loss: 0.006
Loss: 0.2754 Acc: 0.9244


0.4000000179100543

In [74]:
#original model with 93.78% accuracy
model = load_model(get_model_fresh())

#layer by layer pruning - over epoch some set of filters are removed - from lowest to topmost layers
train_model(model, trainloader, num_epochs=20, include_pruning=True, oneshot=False, scheme='structured')    
evaluate_model(model, testloader)
how_sparse(model)

Using cache found in /home/jupyter/.cache/torch/hub/pytorch_vision_v0.6.0


Setting masks in epoch 0 before training with structured scheme
[1,   200] loss: 0.053
Setting masks in epoch 1 before training with structured scheme
[2,   200] loss: 0.044
Setting masks in epoch 2 before training with structured scheme
[3,   200] loss: 0.047
Setting masks in epoch 3 before training with structured scheme
[4,   200] loss: 0.034
Setting masks in epoch 4 before training with structured scheme
[5,   200] loss: 0.038
Setting masks in epoch 5 before training with structured scheme
[6,   200] loss: 0.081
Setting masks in epoch 6 before training with structured scheme
[7,   200] loss: 0.049
Setting masks in epoch 7 before training with structured scheme
[8,   200] loss: 0.023
Setting masks in epoch 8 before training with structured scheme
[9,   200] loss: 0.054
Setting masks in epoch 9 before training with structured scheme
[10,   200] loss: 0.033
Setting masks in epoch 10 before training with structured scheme
[11,   200] loss: 0.083
Setting masks in epoch 11 before trainin

0.4000144355037453