In [1]:
import distiller 
import numpy as np
import os
import bitstring 
import time 
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import datasets, transforms
import torch.nn.functional as F

import models 
from matplotlib import pyplot as plt
%matplotlib inline

print('using GPU:', torch.cuda.is_available())

  from ._conv import register_converters as _register_converters


using GPU: True


In [2]:
test_batch_size = 256
kwargs = {'num_workers': 1, 'pin_memory': True} if torch.cuda.is_available() else {}
test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('./data.cifar10', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                   ])),
    batch_size=test_batch_size, shuffle=False, **kwargs)

def load_checkpoint(model_path):
    if model_path:
        if os.path.isfile(model_path):
            print("=> loading checkpoint '{}'".format(model_path))
            checkpoint = torch.load(model)
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {}) Prec1: {:f}"
              .format(model_path, checkpoint['epoch'], best_prec1))
        else:
            raise ValueError("=> no checkpoint found at '{}'".format(model_path))
    else:
        raise ValueError('args.model cannot be empty!')
    return best_prec1 

def test(model):
    model.eval()
    model.cuda()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            if torch.cuda.is_available():
                data, target = data.cuda(), target.cuda()
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').data # sum up batch loss
            pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.data.view_as(pred)).cpu().sum().item()

    test_loss /= len(test_loader.dataset)
    print('Test set: Average loss: {:f}, Accuracy: {}/{} ({:.2f}%)'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return correct / float(len(test_loader.dataset))

In [3]:
model_path = "/home/hguan2/workspace/fault-tolerance/rethinking-network-pruning/cifar/l1-norm-pruning"+ \
    "/logs/vgg16/cifar10/model_best.pth.tar"
arch = 'vgg'
depth = '16'
dataset = 'cifar10'

checkpoint = torch.load(model_path)
model = models.__dict__[arch](dataset=dataset, depth=depth, cfg=checkpoint['cfg'])
model.load_state_dict(checkpoint['state_dict'])
best_prec1 = checkpoint['best_prec1']
print("=> loaded checkpoint '{}' (epoch {}) Prec1: {:f}"
              .format(model_path, checkpoint['epoch'], best_prec1))
# print(model)

=> loaded checkpoint '/home/hguan2/workspace/fault-tolerance/rethinking-network-pruning/cifar/l1-norm-pruning/logs/vgg16/cifar10/model_best.pth.tar' (epoch 158) Prec1: 0.938800


In [4]:
# for param_name, param in model.named_parameters():
#     if len(param.size()) < 2:
#         continue
#     plt.hist(param.data.cpu().numpy().ravel(), bins=100)
#     plt.title(param_name + ': '+ str(param.size()))
#     plt.show()

In [5]:
# prec1 = test() 

In [6]:
# for key, value in model.state_dict().items():
#     print(key, value)

In [7]:
def copy_bits(n_LSB=16):  
    def _copy_bits(v):
        bits = bitstring.pack('>f', v)
#         print('befor:', bits.bin, v)
        # option 1: copy directly to the last LSBs
        bits[32-n_LSB:32] = bits[:n_LSB]
        
        # option 2: copy sign bits to the last LSB
#         bits[32-n_LSB:32] = bits[1:n_LSB]+bits[:1]
        
        # option 3: copy first 15 bits and then use the last 1 LSB for parity bit 
#         bits[32-n_LSB: 31] = bits[1:n_LSB-1] + bits[:1]
#         bits[32] = (sum(c=='1' for c in bits[:31].bin)%2 == 1)
#         print('after:', bits.bin, bits.float)
        return bits.float
    return _copy_bits
    

n_LSB = 16 # set the last n_LSB bits of mantisa to copy the MSB of exponents
copy_fn = copy_bits(n_LSB)

start = time.time()
for name, parameter in model.named_parameters():
    tensor = parameter.data.cpu().numpy()
    tensor_shape = tensor.shape 
    tensor = np.array([copy_fn(v) for v in tensor.ravel()], dtype='float32')
#     print(tensor)
#     for i in range(len(tensor)):
#         v = tensor[i]
#         tensor[i] = copy_fn(v)
#         break 
#     break 
    print('Finish parameter:'+name, str(tensor_shape))
    parameter.data = torch.from_numpy(tensor.reshape(tensor_shape))
end = time.time()
print('Finish bits manipulation in: %d(s)' %(end-start))
# prec1_1 = test()

Finish parameter:feature.0.weight (64, 3, 3, 3)
Finish parameter:feature.1.weight (64,)
Finish parameter:feature.1.bias (64,)
Finish parameter:feature.3.weight (64, 64, 3, 3)
Finish parameter:feature.4.weight (64,)
Finish parameter:feature.4.bias (64,)
Finish parameter:feature.7.weight (128, 64, 3, 3)
Finish parameter:feature.8.weight (128,)
Finish parameter:feature.8.bias (128,)
Finish parameter:feature.10.weight (128, 128, 3, 3)
Finish parameter:feature.11.weight (128,)
Finish parameter:feature.11.bias (128,)
Finish parameter:feature.14.weight (256, 128, 3, 3)
Finish parameter:feature.15.weight (256,)
Finish parameter:feature.15.bias (256,)
Finish parameter:feature.17.weight (256, 256, 3, 3)
Finish parameter:feature.18.weight (256,)
Finish parameter:feature.18.bias (256,)
Finish parameter:feature.20.weight (256, 256, 3, 3)
Finish parameter:feature.21.weight (256,)
Finish parameter:feature.21.bias (256,)
Finish parameter:feature.24.weight (512, 256, 3, 3)
Finish parameter:feature.25.w

In [8]:
prec1_1 = test(model)

Test set: Average loss: 0.313613, Accuracy: 9388/10000 (93.88%)


In [9]:
# compare parameters with state_dict
for name, parameter in model.named_parameters():
    if name in checkpoint['state_dict']:
        print(checkpoint['state_dict'][name].view(-1)[:1].sum().item())
        print(parameter.data.view(-1)[:1].sum().item())
        break 

0.17978152632713318
0.1801621913909912


In [10]:

# quantizer = distiller.quantization.PostTrainLinearQuantizer(model)
# quantizer.prepare_model()

In [11]:
# data, target = next(iter(test_loader))
# print(model.feature[0])

In [12]:
# for key, value in model.state_dict().items():
#     print(key, value)

In [13]:

# prec1 = test()

In [14]:
# save_path = os.path.join("/".join(model_path.split('/')[:-1]), 'quantize') 
# if not os.path.exists(save_path):
#     os.makedirs(save_path)
    
# # save accuracy
# with open(os.path.join(save_path, "quantize.txt"), "w") as fp:
#     fp.write("Test accuracy: \n"+str(prec1)+"\n")

# # save quantized model     
# torch.save({ 'cfg': model.cfg, 
#             'state_dict': model.state_dict(), 
#             'prec1': prec1
#            }, os.path.join(save_path, 'quantized.pth.tar'))

In [15]:
# for  name, value in model.named_parameters():
#     print(name, value.size())

In [16]:
# # check the weight distribution of other pre-trained models in torch vision
# import torchvision 

# pretrained_models = {'resnet18': torchvision.models.resnet18(pretrained=True),
#                      'alexnet': torchvision.models.alexnet(pretrained=True),
#                      'squeezenet': torchvision.models.squeezenet1_0(pretrained=True),
#                      'vgg16':  torchvision.models.vgg16(pretrained=True), 
#                      'densenet':  torchvision.models.densenet161(pretrained=True),
#                      'inception_v3':  torchvision.models.inception_v3(pretrained=True),
#                     }
# for model_name, pretrained_model in pretrained_models.items():
#     tensor = [param.data.cpu().numpy().ravel() for param in pretrained_model.parameters()]
#     tensor = np.concatenate(tensor)
#     minv, maxv = np.min(tensor), np.max(tensor)
#     num_values = tensor.shape[0]
#     plt.hist(tensor, bins=10000)
#     plt.title(model_name+ ':[%f, %f], #=%.1f(M)' %(minv, maxv, num_values/10e6))
#     plt.show()