In [1]:
import distiller 
import numpy as np
import os, collections
import bitstring 
import time 
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import datasets, transforms
import torch.nn.functional as F
import torchvision 
import models 
from matplotlib import pyplot as plt
from eval_util import test_imagenet 
# import multiprocessing 
%matplotlib inline

from fault_injection import * 

import matplotlib 
matplotlib.rcParams['pdf.fonttype'] = 42

print('using GPU:', torch.cuda.is_available())
torch.manual_seed(1)

  from ._conv import register_converters as _register_converters


using GPU: True


<torch._C.Generator at 0x7fa430037050>

In [2]:
# check the weight distribution of other pre-trained models in torch vision
# use resnet18, alexnet, vgg16 

pretrained_models = {'resnet18': torchvision.models.resnet18(pretrained=True),
                     'resnet34': torchvision.models.resnet34(pretrained=True),
                     'alexnet': torchvision.models.alexnet(pretrained=True),
                     'squeezenet': torchvision.models.squeezenet1_0(pretrained=True),
                     'vgg16':  torchvision.models.vgg16(pretrained=True), 
                      'vgg16_bn':  torchvision.models.vgg16_bn(pretrained=True), 
#                      'densenet':  torchvision.models.densenet161(pretrained=True),
                     'inception_v3':  torchvision.models.inception_v3(pretrained=True),
                    }

model_name = 'vgg16'
# model_name = 'resnet18'
# model_name = 'squeezenet'
model = pretrained_models[model_name]
# print(model)

  init.kaiming_uniform(m.weight.data)
  init.normal(m.weight.data, mean=0.0, std=0.01)


In [3]:
# check model weight size is a multiple of eight
# for name, param in model.named_parameters():
#     if len(param.size()) < 2:
#         continue 
#     length = param.nelement()
#     assert length%8 == 0, '#values not equal to 8X: %d' %(length)
#     print(name, param.size(), '8X?', length%8 == 0)
    

In [4]:
# post train quantization 
quantizer = distiller.quantization.PostTrainLinearQuantizer(model)
quantizer.prepare_model()
# print(model)


In [5]:
# save value to binary files 
def get_named_weights(model):
    named_params = [] 
    for name, param in model.named_parameters():
        if len(param.size()) >= 2:
            named_params.append((name, param)) 
    return named_params 
def large_value_indexes(tensor, thr=64):
    '''tensor is torch tensor, thr is the large value threshold'''
    tensor = tensor.view(-1)
    indexes = torch.nonzero((tensor > thr-1) + (tensor < -thr)).view(-1)
    return indexes 

def check_directory(path):
    if not os.path.isdir(path):
        os.makedirs(path)

named_params = get_named_weights(model)
datapath = os.path.join('./weights/', model_name)
check_directory(datapath)


weight_id =  0 
meta = '' 
for name, param in named_params:

    tensor = param.data
    shape = tuple(tensor.size())
    size = tensor.nelement()
#     largerThan64 = large_value_number(param.data, thr=64)
    largerThan32 = large_value_number(param.data, thr=32)

    # record data meta info 
    info = '%d, %s, %s, %d, %d' %(weight_id, name, shape, size, largerThan32)
    meta += info+'\n' 
    print(info)

    # save tensor as binary
    tensor1d = tensor.view(-1).numpy().astype(np.int8)
    np.savetxt(os.path.join(datapath, '%d.txt' %(weight_id)), tensor1d, fmt='%d')

    weight_id += 1

with open(os.path.join(datapath, 'meta.txt'), 'w') as f:
    f.write('weight_id, name, shape, size, largerThan32\n')
    f.write(meta)

0, features.0.wrapped_module.weight, (64, 3, 3, 3), 1728, 43, 282
1, features.2.wrapped_module.weight, (64, 64, 3, 3), 36864, 112, 1090
2, features.5.wrapped_module.weight, (128, 64, 3, 3), 73728, 47, 977
3, features.7.wrapped_module.weight, (128, 128, 3, 3), 147456, 178, 4312
4, features.10.wrapped_module.weight, (256, 128, 3, 3), 294912, 12, 427
5, features.12.wrapped_module.weight, (256, 256, 3, 3), 589824, 24, 1128
6, features.14.wrapped_module.weight, (256, 256, 3, 3), 589824, 5, 255
7, features.17.wrapped_module.weight, (512, 256, 3, 3), 1179648, 34, 890
8, features.19.wrapped_module.weight, (512, 512, 3, 3), 2359296, 75, 2954
9, features.21.wrapped_module.weight, (512, 512, 3, 3), 2359296, 216, 8162
10, features.24.wrapped_module.weight, (512, 512, 3, 3), 2359296, 17, 1299
11, features.26.wrapped_module.weight, (512, 512, 3, 3), 2359296, 11, 2977
12, features.28.wrapped_module.weight, (512, 512, 3, 3), 2359296, 1665, 100482
13, classifier.0.wrapped_module.weight, (4096, 25088), 

In [6]:

# def test_large_value_percentage():
#     tensor = torch.randint(-100, 100, size=(10, ))
#     print(tensor)
#     print(large_value_percentage(tensor))
# test_large_value_percentage()
    
# named_params = get_named_weights(model)
# n_larges = [] 
# weight_id = 0 
# for name, param in named_params:
#     tensor = param.data 
#     size = tensor.nelement()
#     num_large_values = large_value_number(tensor, 64)
#     n_larges.append((weight_id, name, num_large_values, round(num_large_values/size, 6)))
#     weight_id += 1
    
# # sort based on the number of large values 
# sorted_n_larges = sorted(n_larges, key = lambda x: x[2], reverse = True)
# for item in sorted_n_larges:
#     print(item)


In [7]:
# def large_value_max_distance(tensor):
#     '''tensor has to be 1-d tensor'''
#     size = tensor.nelement()
#     tensor = tensor.view(-1)
#     indexes = torch.nonzero((tensor > 63) + (tensor < -64)).view(-1)
#     diff = indexes[1:] - indexes[:-1]
#     d = torch.max(diff)
#     return d.item() 

# # print the maximum distance between large values 
# weight_id = 0 
# for name, param in named_params:
#     print(weight_id, name, large_value_max_distance(param.data))
#     weight_id += 1



In [8]:
# check the distribution of parameters 
# thr = 32
# layer_id = 0 
# for param_name, param in model.named_parameters():
#     if len(param.size()) < 2:
#         continue
#     counter = collections.Counter(np.abs(param.data.cpu().numpy().ravel())//thr)
#     tmp = sorted(counter.items(), key=lambda x: x[0])
#     values, counts = zip(*tmp)
#     percentages = [count/sum(list(counts)) for count in counts]
#     bar = plt.bar(values, percentages)
#     for rect in bar:
#         height = rect.get_height()
#         plt.text(rect.get_x() + rect.get_width()/2.0, height, '%.4f%%' %(height*100), ha='center', va='bottom')
# #     print(['%.2f' %(p) for p in percentages])
#     #plt.hist(param.data.cpu().numpy().ravel(), bins=10, density=True)
#     plt.xticks(values, [str(int(v)*thr+thr) for v in values])
#     plt.title('layer_id:'+str(layer_id) + ', '+ str(tuple(param.size())))
# #     plt.grid()
#     plt.ylim(0, 1.1)
#     plt.show()
#     layer_id += 1

In [9]:
# check the distribution of parameters all weights
# thr = 32
# total_values, num_weights = 0, 0 
# counter = collections.Counter()
# for param_name, param in model.named_parameters():
#     total_values += param.nelement()
#     if len(param.size()) < 2:
#         continue
#     num_weights += param.nelement()
#     counter.update(collections.Counter(np.abs(param.data.cpu().numpy().ravel())//thr + 1))
    
# tmp = sorted(counter.items(), key=lambda x: x[0])
# values, counts = zip(*tmp)
# total_weights = sum(list(counts))

# assert total_weights == num_weights
# print('#weights:', total_weights, ', #params:', total_values, 'percentage:', '%.6f' %(num_weights/total_values))

# percentages = [count/total_weights for count in counts]
# bar = plt.bar(values, percentages)
# for rect in bar:
#     height = rect.get_height()
#     plt.text(rect.get_x() + rect.get_width()/2.0, height, '%.4f%%' %(height*100), ha='center', va='bottom')
# #     print(['%.2f' %(p) for p in percentages])
# #plt.hist(param.data.cpu().numpy().ravel(), bins=10, density=True)
# # plt.xticks(values, [str(int(v)*thr) for v in values])
# plt.xticks(values, ['[0, 32]', '[32, 64]', '[64, 96]', '[96, 128]'])
# plt.title(model_name)
# #     plt.grid()
# plt.ylim(0, 1.1)
# plt.show()

# figdir = './figures/weight_distribution/'
# figname = model_name+'_int8_weight_distribution.pdf'
# plt.savefig(os.path.join(figdir, figname), bbox_inches='tight')


In [10]:
## load gradual_encoding_absolute result 
# lossy_encoding_results = {
#     'vgg16': './logs/vgg16/imagenet/int8/gradual_encoding_absolute',
#     'resnet18': './logs/resnet18/imagenet/int8/gradual_encoding_absolute',
#     'squeezenet': './logs/squeezenet/imagenet/int8/gradual_encoding_absolute',
# }


# def parse_gradual_encoding_absolute_log(log_path):
#     res = [] 
#     with open(os.path.join(log_path, 'logs.txt'), 'r') as f:
#         lines = f.readlines()
#         for line in lines:
#             line = line.strip()
#             if line:
#                 items = line.split(',')
#                 for item in items:
#                     if 'n_large' in item:
#                         n_large = int(item.split(':')[-1])
#                     if 'name' in item:
#                         name = item.split(':')[-1].strip()
#                     if 'accuracy' in item:
#                         accuracy = float(item.split(':')[-1])
#                 res.append((accuracy, name, n_large))
                
#     return res 

   

# log_path = lossy_encoding_results[model_name]
# res = parse_gradual_encoding_absolute_log(log_path)

# accuracies = [] 
# for a, b in zip(sorted_n_larges, res):
#     weight_id = a[0]
#     accuracy = b[0]
#     name = a[1]
#     n_large = a[2]
#     assert a[1] == b[1], 'param name not equal: %s, %s' %(a[1], b[1])
#     assert a[2] == b[2], 'param n_large not equal: %d, %d' %(a[2], b[2])
#     accuracies.append((weight_id, name, n_large, accuracy))
# for item in accuracies:
#     print(item)
            
            


In [11]:
# from right to the left, find the k for sorted_layers[:k].
# fault_free_accuracies = {
    
#     'vgg16': 79.36,
#     'resnet18': 76.46,
#     'squeezenet': 66.86,
# }
# max_acc = fault_free_accuracies[model_name]
# min_acc = 0
# accuracy_to_ids = [] 
# for i in range(len(accuracies)-1, -1, -1):
#     accuracy = accuracies[i][-1]
#     if accuracy > min_acc:
#         accuracy_to_ids.append((accuracy, [x[0] for x in accuracies[:i+1]]))
#         min_acc = accuracy 

# # add the option of all weights are lossless 
# accuracy_to_ids.append((0, []))
    
# for item in accuracy_to_ids:
#     print(item)

# # with open(os.path.join(log_path, 'steps.txt'), 'w') as f:
# #     for item in accuracy_to_ids:
# #         s = "%.2f" %(item[0]) + ': '+ ', '.join([str(x) for x in item[1]]) + '\n'
# #         f.write(s) 
    