In [9]:
import argparse
import sys
sys.path.append('../../') # append root directory
import os
import argparse
from cifar10.utils import getLogger
from cifar10.models import ResNet18_wby16
from cifar10.config import Config
from admm.warmup_scheduler import GradualWarmupScheduler
from admm.cross_entropy import CrossEntropyLossMaybeSmooth
from admm.utils import mixup_data, mixup_criterion
import admm
import torch.optim as optim

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res


In [2]:
# get configuration
parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
parser.add_argument('--config_file', type=str, default='./cifar10/config.yaml', help ="config file")
parser.add_argument('--stage', type=str, default='admm', help ="select the pruning stage")
args = parser.parse_args("")
config = Config(args)

In [4]:
config.stage, config.smooth_eps

('admm', 0.0)

under stand the mixup data augumentation and mixup loss function
https://github.com/facebookresearch/mixup-cifar10/issues/18

In [5]:
import torch
import torchvision
import torchvision.transforms as transforms
device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

if config.logging:
    log_dir = config.log_dir
    logger = getLogger(log_dir)
    logger.info(json.dumps(config.__dict__, indent=4))
else:
    logger = None


# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=config.workers)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=config.workers)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

  return torch._C._cuda_getDeviceCount() > 0


==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified


In [6]:
# Model
print('==> Building model..')
model = None
if config.arch == "vgg16":
    model = VGG('vgg16', w= config.width_multiplier)
elif config.arch =="resnet18_wby16":
    model = ResNet18_wby16(config.w)
config.model = model

if device == 'cuda':
    if config.gpu is not None:
        torch.cuda.set_device(config.gpu)
        config.model = torch.nn.DataParallel(model,device_ids = [config.gpu])
    else:
        config.model.cuda()
        config.model = torch.nn.DataParallel(model)
    cudnn.benchmark = True

if config.load_model:
    # unlike resume, load model does not care optimizer status or start_epoch
    config.load_model.replace('w', str(config.w))
    print('==> Loading from {}'.format(config.load_model))

    config.model.load_state_dict(torch.load(config.load_model)) # i call 'net' "model"
    


    
config.prepare_pruning() # take the model and prepare the pruning

ADMM = None

if config.admm:
    ADMM = admm.ADMM(config, device)



if config.resume:
    # Load checkpoint.
    print('==> Resuming from checkpoint..')
    assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
    checkpoint = torch.load('./checkpoint/ckpt.t7')
    config.model.load_state_dict(checkpoint['net'])
    best_acc = checkpoint['acc']
    start_epoch = checkpoint['epoch']
    ADMM.ADMM_U = checkpoint['admm']['ADMM_U']
    ADMM.ADMM_Z = checkpoint['admm']['ADMM_Z']
    

==> Building model..
==> Loading from resnet18_wby16_pretrained.pt


FileNotFoundError: [Errno 2] No such file or directory: 'resnet18_wby16_pretrained.pt'

In [8]:
config.load_model = config.load_model.replace('w', str(config.w))
config.load_model

'resnet18_1by16_pretrained.pt'

In [35]:
criterion = CrossEntropyLossMaybeSmooth(smooth_eps=config.smooth_eps).cuda(config.gpu)
config.smooth = config.smooth_eps > 0.0
config.mixup = config.alpha > 0.0


config.warmup = (not config.admm) and config.warmup_epochs > 0
optimizer_init_lr = config.warmup_lr if config.warmup else config.lr

optimizer = None
if (config.optimizer == 'sgd'):
    optimizer = torch.optim.SGD(config.model.parameters(), optimizer_init_lr,
                            momentum=0.9,
                                weight_decay=1e-4)
elif (config.optimizer =='adam'):
    optimizer = torch.optim.Adam(config.model.parameters(), optimizer_init_lr)
    
scheduler = None
if config.lr_scheduler == 'cosine':
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.epochs*len(trainloader), eta_min=4e-08)
elif config.lr_scheduler == 'default':
    # my learning rate scheduler for cifar, following https://github.com/kuangliu/pytorch-cifar
    epoch_milestones = [150, 250, 350]

    """Set the learning rate of each parameter group to the initial lr decayed
        by gamma once the number of epoch reaches one of the milestones
    """
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[i*len(trainloader) for i in epoch_milestones], gamma=0.1)
else:
    raise Exception("unknown lr scheduler")

if config.warmup:
    scheduler = GradualWarmupScheduler(optimizer, multiplier=config.lr/config.warmup_lr, total_iter=config.warmup_epochs*len(trainloader), after_scheduler=scheduler)



In [38]:
config.optimizer, config.masked_retrain, config.save_model

('adam', False, 'resnet18_wby16_pretrained.pt')

# Valiadtion

In [1]:
import argparse
import sys
import os
from cifar10.models import ResNet18_wby16
from cifar10.config import Config
import torch
import torchvision
import torchvision.transforms as transforms
from cifar10.models import ResNet18_wby16
from admm.cross_entropy import CrossEntropyLossMaybeSmooth
from admm.admm import test_sparsity
import time
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].contiguous().view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res
def validate(val_loader, criterion, config):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    config.model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            if config.gpu is not None:
                input = input.cuda(config.gpu, non_blocking=True)
            target = target.cuda(config.gpu, non_blocking=True)

            # compute output
            output = config.model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(acc1[0], input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % config.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      .format(
                          i, len(val_loader), batch_time=batch_time, loss=losses,
                          top1=top1))

        print(' * Acc@1 {top1.avg:.3f} '
              .format(top1=top1))


    return top1.avg
# get configuration
parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
parser.add_argument('--config_file', type=str, default='./cifar10/natural/config_w16.yaml', help ="config file")
parser.add_argument('--stage', type=str, default='pretrain', help ="select the pruning stage")
args = parser.parse_args("")
config = Config(args)
criterion = CrossEntropyLossMaybeSmooth(smooth_eps=config.smooth_eps).cuda(config.gpu)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=config.workers)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified


In [2]:
net = ResNet18_wby16(16)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
net.to(device)
net = torch.nn.DataParallel(net)
checkpoint = torch.load('./cifar10/trainedMod/resnet18_16by16_pretrained.pt', map_location=torch.device(device))
net.load_state_dict(checkpoint)

<All keys matched successfully>

In [3]:
config.model = net
validate(testloader, criterion, config)

Test: [0/100]	Time 3.162 (3.162)	Loss 0.3306 (0.3306)	Acc@1 95.000 (95.000)	
Test: [10/100]	Time 0.063 (0.348)	Loss 0.1819 (0.5375)	Acc@1 97.000 (93.455)	
Test: [20/100]	Time 0.061 (0.212)	Loss 0.7931 (0.5728)	Acc@1 93.000 (93.143)	
Test: [30/100]	Time 0.064 (0.164)	Loss 0.5674 (0.6170)	Acc@1 92.000 (93.097)	
Test: [40/100]	Time 0.063 (0.139)	Loss 1.0210 (0.6508)	Acc@1 90.000 (92.659)	
Test: [50/100]	Time 0.063 (0.124)	Loss 0.2226 (0.6480)	Acc@1 96.000 (92.843)	
Test: [60/100]	Time 0.063 (0.114)	Loss 0.4355 (0.6285)	Acc@1 95.000 (93.049)	
Test: [70/100]	Time 0.063 (0.107)	Loss 1.2535 (0.6035)	Acc@1 88.000 (93.239)	
Test: [80/100]	Time 0.063 (0.102)	Loss 0.4119 (0.6038)	Acc@1 95.000 (93.222)	
Test: [90/100]	Time 0.063 (0.097)	Loss 0.5485 (0.5946)	Acc@1 93.000 (93.253)	
 * Acc@1 93.240 


tensor(93.2400, device='cuda:0')

In [7]:
parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
parser.add_argument('--config_file', type=str, default='./cifar10/prune/config_w16.yaml', help ="config file")
parser.add_argument('--stage', type=str, default='admm', help ="select the pruning stage")
args = parser.parse_args("")
config = Config(args)

netadmm = ResNet18_wby16(16)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
netadmm.to(device)
netadmm = torch.nn.DataParallel(netadmm)
checkpoint = torch.load('./cifar10/trainedMod/resnet18_16by16_admm.pt', map_location=torch.device(device))
netadmm.load_state_dict(checkpoint)

<All keys matched successfully>

In [8]:
config.model = netadmm
validate(testloader, criterion, config)

Test: [0/100]	Time 0.675 (0.675)	Loss 0.2687 (0.2687)	Acc@1 95.000 (95.000)	
Test: [10/100]	Time 0.063 (0.122)	Loss 0.1380 (0.5242)	Acc@1 97.000 (92.909)	
Test: [20/100]	Time 0.063 (0.094)	Loss 0.9564 (0.5891)	Acc@1 91.000 (93.000)	
Test: [30/100]	Time 0.064 (0.084)	Loss 0.9202 (0.6507)	Acc@1 93.000 (92.806)	
Test: [40/100]	Time 0.063 (0.079)	Loss 0.9842 (0.6808)	Acc@1 90.000 (92.463)	
Test: [50/100]	Time 0.063 (0.075)	Loss 0.2072 (0.6856)	Acc@1 94.000 (92.569)	
Test: [60/100]	Time 0.062 (0.074)	Loss 0.5158 (0.6786)	Acc@1 94.000 (92.672)	
Test: [70/100]	Time 0.063 (0.072)	Loss 1.2127 (0.6576)	Acc@1 88.000 (92.803)	
Test: [80/100]	Time 0.062 (0.071)	Loss 0.4642 (0.6551)	Acc@1 95.000 (92.765)	
Test: [90/100]	Time 0.062 (0.070)	Loss 0.6394 (0.6443)	Acc@1 94.000 (92.791)	
 * Acc@1 92.830 


tensor(92.8300, device='cuda:0')

In [32]:
from numpy import linalg as LA
import admm
import numpy as np
config.prepare_pruning() 
ADMM = admm.ADMM(config, device)

['', 'module', 'module.conv1', 'module.bn1', 'module.layer1', 'module.layer1.0', 'module.layer1.0.conv1', 'module.layer1.0.bn1', 'module.layer1.0.conv2', 'module.layer1.0.bn2', 'module.layer1.0.shortcut', 'module.layer1.1', 'module.layer1.1.conv1', 'module.layer1.1.bn1', 'module.layer1.1.conv2', 'module.layer1.1.bn2', 'module.layer1.1.shortcut', 'module.layer2', 'module.layer2.0', 'module.layer2.0.conv1', 'module.layer2.0.bn1', 'module.layer2.0.conv2', 'module.layer2.0.bn2', 'module.layer2.0.shortcut', 'module.layer2.0.shortcut.0', 'module.layer2.0.shortcut.1', 'module.layer2.1', 'module.layer2.1.conv1', 'module.layer2.1.bn1', 'module.layer2.1.conv2', 'module.layer2.1.bn2', 'module.layer2.1.shortcut', 'module.layer3', 'module.layer3.0', 'module.layer3.0.conv1', 'module.layer3.0.bn1', 'module.layer3.0.conv2', 'module.layer3.0.bn2', 'module.layer3.0.shortcut', 'module.layer3.0.shortcut.0', 'module.layer3.0.shortcut.1', 'module.layer3.1', 'module.layer3.1.conv1', 'module.layer3.1.bn1', 'm

In [33]:
def test_sparsity(config):
    """
    test sparsity for every involved layer and the overall compression rate

    """
    total_zeros = 0
    total_nonzeros = 0

    print('<===sparsity type is {}'.format(config.sparsity_type))
    print('<===layers to be pruned are \n{}'.format(config._prune_ratios))
    if config.sparsity_type == "irregular":
        for name, W in config.model.named_parameters():
            if 'bias' in name:
                continue
            W = W.cpu().detach().numpy()
            zeros = np.sum(W == 0)
            total_zeros += zeros
            nonzeros = np.sum(W != 0)
            total_nonzeros += nonzeros
            print("sparsity at layer {} is {}".format(name, zeros / (zeros + nonzeros)))
        total_weight_number = total_zeros + total_nonzeros
        print('overal compression rate is {}'.format(total_weight_number / total_nonzeros))
    elif config.sparsity_type == "filter":
        print('inside if')
        print(config.prune_ratios)
        for name, W in config.model.named_parameters():
            if name not in config.prune_ratios:
                continue
            W = W.cpu().detach().numpy()
            shape = W.shape
            W2d = W.reshape(shape[0], -1)
            row_l2_norm = LA.norm(W2d, 2, axis=1)
            zero_row = np.sum(row_l2_norm == 0)
            nonzero_row = np.sum(row_l2_norm != 0)
            total_zeros += np.sum(W == 0)
            total_nonzeros += np.sum(W != 0)
            print("filter sparsity of layer {} is {}".format(name, zero_row / (zero_row + nonzero_row)))
#         print('only consider conv layers, compression rate is {}'.format((total_zeros + total_nonzeros) / total_nonzeros))




In [43]:
test_sparsity(config)

<===sparsity type is filter
<===layers to be pruned are 
{'conv1.weight': 0.1, 'conv2.weight': 0.2, 'conv3.weight': 0.3, 'conv4.weight': 0.4, 'conv5.weight': 0.5, 'conv6.weight': 0.9375, 'conv7.weight': 0.9375, 'conv8.weight': 0.9375, 'conv9.weight': 0.9375, 'conv10.weight': 0.9375, 'conv11.weight': 0.9375, 'conv12.weight': 0.9375, 'conv13.weight': 0.9375, 'conv14.weight': 0.9375, 'conv15.weight': 0.9375, 'conv16.weight': 0.9375, 'conv17.weight': 0.9375, 'conv18.weight': 0.9375, 'conv19.weight': 0.9375, 'conv20.weight': 0.9375}
inside if
{'module.conv1.weight': 0.1, 'module.layer1.0.conv1.weight': 0.2, 'module.layer1.0.conv2.weight': 0.3, 'module.layer1.1.conv1.weight': 0.4, 'module.layer1.1.conv2.weight': 0.5, 'module.layer2.0.conv1.weight': 0.9375, 'module.layer2.0.conv2.weight': 0.9375, 'module.layer2.0.shortcut.0.weight': 0.9375, 'module.layer2.1.conv1.weight': 0.9375, 'module.layer2.1.conv2.weight': 0.9375, 'module.layer3.0.conv1.weight': 0.9375, 'module.layer3.0.conv2.weight': 0.

In [50]:
for name, W in config.model.named_parameters():
        if name not in ADMM.prune_ratios:
            continue
        ADMM.ADMM_Z[name] = W + ADMM.ADMM_U[name]  # Z(k+1) = W(k+1)+U[k]
        _, _Z = admm.admm.weight_pruning(config, ADMM.ADMM_Z[name], ADMM.prune_ratios[name])  # equivalent to Euclidean Projection
        ADMM.ADMM_Z[name] = _Z
        ADMM.ADMM_U[name] = W - ADMM.ADMM_Z[name] + ADMM.ADMM_U[name]  # U(k+1) = W(k+1) - Z(k+1) +U(k)

In [53]:
total_zeros = 0
total_nonzeros = 0
for name, W in ADMM.ADMM_Z.items():
    if name not in config.prune_ratios:
        continue
    W = W.cpu().detach().numpy()
    shape = W.shape
    W2d = W.reshape(shape[0], -1)
    row_l2_norm = LA.norm(W2d, 2, axis=1)
    zero_row = np.sum(row_l2_norm == 0)
    nonzero_row = np.sum(row_l2_norm != 0)
    total_zeros += np.sum(W == 0)
    total_nonzeros += np.sum(W != 0)
    print("filter sparsity of layer {} is {}".format(name, zero_row / (zero_row + nonzero_row)))

filter sparsity of layer module.conv1.weight is 0.109375
filter sparsity of layer module.layer1.0.conv1.weight is 0.203125
filter sparsity of layer module.layer1.0.conv2.weight is 0.296875
filter sparsity of layer module.layer1.1.conv1.weight is 0.40625
filter sparsity of layer module.layer1.1.conv2.weight is 0.5
filter sparsity of layer module.layer2.0.conv1.weight is 0.9375
filter sparsity of layer module.layer2.0.conv2.weight is 0.9375
filter sparsity of layer module.layer2.0.shortcut.0.weight is 0.9375
filter sparsity of layer module.layer2.1.conv1.weight is 0.9375
filter sparsity of layer module.layer2.1.conv2.weight is 0.9375
filter sparsity of layer module.layer3.0.conv1.weight is 0.9375
filter sparsity of layer module.layer3.0.conv2.weight is 0.9375
filter sparsity of layer module.layer3.0.shortcut.0.weight is 0.9375
filter sparsity of layer module.layer3.1.conv1.weight is 0.9375
filter sparsity of layer module.layer3.1.conv2.weight is 0.9375
filter sparsity of layer module.lay