# ResNet18

In [102]:
import torch
import torchvision
import torch.nn.functional as F
from PIL import Image
from torchvision import transforms

In [103]:
#trained weights
model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True)

Using cache found in /Users/azatkariuly/.cache/torch/hub/pytorch_vision_v0.6.0


In [104]:
# Download an example image from the pytorch website
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

In [105]:
input_image = Image.open(filename)

preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

model.eval()

# move the input and model to GPU for speed if available
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
    output = model(input_batch)


In [106]:
#target
print(output.argmax())

tensor(258)


# Uniform Affine Quantizer
Formula: <br>

<center><i>x_int = round(x/scale) + zero_point</i></center>
<center><i>x_Q = clamp(0, qmax-1, x_int)</i></center>

De-quantization, <center><i>x = (x_Q - zero_point) * scale<i></center>

In [107]:
from collections import namedtuple
QTensor = namedtuple('QTensor', ['tensor', 'scale', 'zero_point'])

In [108]:
def calcScaleZeroPoint(min_val, max_val,num_bits=8):
    # Calc Scale and zero point of next 
    qmin = 0.
    qmax = 2.**num_bits - 1.

    scale = (max_val - min_val) / (qmax - qmin)

    initial_zero_point = qmin - min_val / scale

    zero_point = 0
    if initial_zero_point < qmin:
        zero_point = qmin
    elif initial_zero_point > qmax:
        zero_point = qmax
    else:
        zero_point = initial_zero_point

    zero_point = int(zero_point)

    return scale, zero_point

def quantize_tensor(x, num_bits=8, min_val=None, max_val=None):
    
    if not min_val and not max_val: 
        min_val, max_val = x.min(), x.max()

    qmin = 0.
    qmax = 2.**num_bits - 1.

    scale, zero_point = calcScaleZeroPoint(min_val, max_val, num_bits)
    q_x = zero_point + x / scale
    q_x.clamp_(qmin, qmax).round_()
    
    return QTensor(tensor=q_x, scale=scale, zero_point=zero_point)

def dequantize_tensor(q_x):
    return q_x.scale * (q_x.tensor.float() - q_x.zero_point)

# Forward pass to support Quantization 

In [109]:
def quantizeLayer(x, layer, stat, scale_x, zp_x, num_bits=8):
    # cache old values
    W = layer.weight.data

    # quantise weights
    w = quantize_tensor(layer.weight.data) 

    layer.weight.data = w.tensor

    # This is Quantisation Artihmetic
    scale_w = w.scale
    zp_w = w.zero_point
  
    scale_next, zero_point_next = calcScaleZeroPoint(min_val=stat['min'], max_val=stat['max'])

    # Preparing input by shifting
    X = scale_x*(x.float() - zp_x)
    layer.weight.data = scale_w*(layer.weight.data.float() - zp_w)

    # All int computation
    x = layer(X)/scale_next + zero_point_next
    
    #make sure that x stays in desired range
    x.clamp_(0., 2.**num_bits - 1.).round_()

    # Reset weights for next forward pass
    layer.weight.data = W
  
    return x, scale_next, zero_point_next

In [110]:
def quantizeBlock(x, block, stats, stats_next, scale_next, zero_point_next, num_bits=8):
    
    #copy x
    identity = x
    scale_next1 = scale_next
    zero_point_next1 = zero_point_next
    
    #conv1
    x, scale_next, zero_point_next = quantizeLayer(x, block.conv1, stats['bn1'], scale_next, zero_point_next)
    
    #bn1
    x, scale_next, zero_point_next = quantizeLayer(x, block.bn1, stats['conv2'], scale_next, zero_point_next)
    
    #relu
    x = F.relu(x)
    
    #conv2
    x, scale_next, zero_point_next = quantizeLayer(x, block.conv2, stats['bn2'], scale_next, zero_point_next)    
    
    #bn2
    x, scale_next, zero_point_next = quantizeLayer(x, block.bn2, stats_next, scale_next, zero_point_next)

    #if has downsample, updates the copy of x
    if block.downsample is not None:
        #0
        identity, scale_next1, zero_point_next1 = quantizeLayer(identity, block.downsample[0], 
                                                              stats['downsample']['0'],
                                                                scale_next1, zero_point_next1)
        #1
        identity, scale_next1, zero_point_next1 = quantizeLayer(identity, block.downsample[1], 
                                                                stats['downsample']['1'], 
                                                                scale_next1, zero_point_next1)
        
    
    #sums new value and copy value of x
    scale_z, zero_point_z = calcScaleZeroPoint(min_val=stats_next['min'], max_val=stats_next['max'])
    x = ((x-zero_point_next)*scale_next + (identity-zero_point_next1)*scale_next1)/scale_z + zero_point_z
        
    #make sure that x stays in desired range
    x.clamp_(0., 2.**num_bits - 1.).round_()
    
    #relu
    x = F.relu(x)
    
    #return
    return x, scale_z, zero_point_z
    

In [77]:
def quantForward(model, x, stats):
    #quantize input before inputing to layer
    x = quantize_tensor(x, min_val=stats['conv1']['min'], max_val=stats['conv1']['max'])
    #print('input: ', x)
    
    #conv1
    x, scale_next, zero_point_next = quantizeLayer(x.tensor, model.conv1, stats['bn1'], x.scale, x.zero_point)
    #print('conv1: ', x)   
    
    #bn1
    x, scale_next, zero_point_next = quantizeLayer(x, model.bn1, stats['layer1_0']['conv1'], scale_next, zero_point_next)
    #print('bn1: ', x)
    
    #relu
    x = model.relu(x)
    #print('relu: ', x)
    
    #maxpool
    x = model.maxpool(x)
    #print('maxpool: ', x)
    
    #layer1
    x, scale_next, zero_point_next = quantizeBlock(x, model.layer1[0], stats['layer1_0'], 
                                                   stats['layer1_1']['conv1'], scale_next, zero_point_next)
    x, scale_next, zero_point_next = quantizeBlock(x, model.layer1[1], stats['layer1_1'], 
                                                   stats['layer2_0']['conv1'], scale_next, zero_point_next)
    #print('layer1: ', x)
    
    #layer2
    x, scale_next, zero_point_next = quantizeBlock(x, model.layer2[0], stats['layer2_0'], 
                                                   stats['layer2_1']['conv1'], scale_next, zero_point_next)
    x, scale_next, zero_point_next = quantizeBlock(x, model.layer2[1], stats['layer2_1'], 
                                                   stats['layer3_0']['conv1'], scale_next, zero_point_next)
    #print('layer2: ', x)
    
    #layer3
    x, scale_next, zero_point_next = quantizeBlock(x, model.layer3[0], stats['layer3_0'], 
                                                   stats['layer3_1']['conv1'], scale_next, zero_point_next)
    x, scale_next, zero_point_next = quantizeBlock(x, model.layer3[1], stats['layer3_1'], 
                                                   stats['layer4_0']['conv1'], scale_next, zero_point_next)
    #print('layer3: ', x)
    
    #layer4
    x, scale_next, zero_point_next = quantizeBlock(x, model.layer4[0], stats['layer4_0'], 
                                                   stats['layer4_1']['conv1'], scale_next, zero_point_next)
    x, scale_next, zero_point_next = quantizeBlock(x, model.layer4[1], 
                                                   stats['layer4_1'], stats['fc'], scale_next, zero_point_next)
    #print('layer4: ', x)
    
    #dequantize before the last layer
    x = dequantize_tensor(QTensor(tensor=x, scale=scale_next, zero_point=zero_point_next))
    #print('dequant: ', x)
    
    #avgpool
    x = model.avgpool(x.float())
    #print('avgpool: ', x)
    
    #reshape
    x = x.reshape(x.shape[0], -1)
    #print('reshape: ', x)
    
    #fc (fully connected layer)
    x = model.fc(x)
    #print('fc: ', x)
    
    #return
    return x

# Get MIN and MAX for Quantizing

In [78]:
def statsLayer(data, layer):
    stats_temp = {}
    #conv1
    stats_temp['conv1'] = {'max': data.max().detach(), 'min': data.min().detach()}
    data1 = layer.conv1(data)
    
    #bn1
    stats_temp['bn1'] = {'max': data1.max().detach(), 'min': data1.min().detach()}
    data1 = layer.bn1(data1)
    
    data1 = layer.relu(data1)
    
    #conv2
    stats_temp['conv2'] = {'max': data1.max().detach(), 'min': data1.min().detach()}
    data1 = layer.conv2(data1)
    
    #bn2
    stats_temp['bn2'] = {'max': data1.max().detach(), 'min': data1.min().detach()}
    data1 = layer.bn2(data1)
    
    #downsample
    if layer.downsample is not None:
        stats_temp_1 = {}
        #0
        stats_temp_1['0'] = {'max': data.max().detach(), 'min': data.min().detach()}
        data = layer.downsample[0](data)
        #1
        stats_temp_1['1'] = {'max': data.max().detach(), 'min': data.min().detach()}
        data = layer.downsample[1](data)
        
        stats_temp['downsample'] = stats_temp_1
        
    
    data = data1 + data
    
    data = layer.relu(data)
    
    return data, stats_temp
        

In [79]:
def gatherStats(model, data):
    stats = {}
    
    #conv1
    stats['conv1'] = {'max': data.max(), 'min': data.min()}
    data = model.conv1(data)
    
    #bn1
    stats['bn1'] = {'max': data.max().detach(), 'min': data.min().detach()}
    data = model.bn1(data)
    
    data = model.relu(data)
    data = model.maxpool(data)
    
    #layer1
    data, stats['layer1_0'] = statsLayer(data, model.layer1[0])
    data, stats['layer1_1'] = statsLayer(data, model.layer1[1])
    #layer2
    data, stats['layer2_0'] = statsLayer(data, model.layer2[0])
    data, stats['layer2_1'] = statsLayer(data, model.layer2[1])
    #layer3
    data, stats['layer3_0'] = statsLayer(data, model.layer3[0])
    data, stats['layer3_1'] = statsLayer(data, model.layer3[1])
    #layer3
    data, stats['layer4_0'] = statsLayer(data, model.layer4[0])
    data, stats['layer4_1'] = statsLayer(data, model.layer4[1])
    
    #avgpool
    data = model.avgpool(data)
    
    data = data.reshape(data.shape[0], -1)
    
    #fc
    stats['fc'] = {'max': data.max().detach(), 'min': data.min().detach()}
    data = model.fc(data)
    
    return stats

# Start Quantizing

In [97]:
import copy
q_model = copy.deepcopy(model)

In [98]:
stats = gatherStats(q_model, input_batch)

In [99]:
q_model.eval()

with torch.no_grad():
    output = quantForward(q_model, input_batch, stats)

In [100]:
output.argmax()

tensor(911)