## Problem 3

In [2]:
import numpy as np
import torch
from torch import nn
from tqdm import tqdm


class GradCollect:
    inputs_grad_collect = []
    weight_grad_collect = []
    bias_grad_collect = []

    weight_range_collect = []
    bias_range_collect = []
    activation_range_collect = []
    activation_range_collect_temp = []

    NUM_COMPUTE_LAYERS = 0
    @classmethod
    def retain_model_grad(cls, model):
        def retain_module_grad(nn_module):
            if isinstance(nn_module, nn.Conv2d) or isinstance(nn_module, nn.Linear):
                print('registering weight parameters of {} layer'.format(nn_module._get_name()))
                nn_module.weight.requires_grad_(True)
                weight_stats = np.power(2.0, np.ceil(np.log2(np.amax(np.absolute(nn_module.weight.data.cpu().numpy())))))
                GradCollect.weight_grad_collect.append((nn_module._get_name(), nn_module.weight))
                GradCollect.weight_range_collect.append((nn_module._get_name(), weight_stats))

        model.apply(retain_module_grad)
        GradCollect.NUM_COMPUTE_LAYERS = len(GradCollect.weight_grad_collect)
        GradCollect.activation_range_collect = np.zeros(GradCollect.NUM_COMPUTE_LAYERS)

    @classmethod
    def retain_inputs_grad(cls, model):
        def retain_nn_module_inputs(m):
            def retain_inputs(m, x):
                x = x[0]
                x = x.requires_grad_(True)
                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                    stats = np.power(2.0,np.ceil(np.log2(np.amax(np.absolute(x.detach().cpu().numpy())))))
                    GradCollect.inputs_grad_collect.append((m._get_name(), x))
                    GradCollect.activation_range_collect_temp.append((m._get_name(), stats))
                    x.retain_grad()

            m.register_forward_pre_hook(retain_inputs)

        model.apply(retain_nn_module_inputs)


def get_noise_gains(model, data_loader, device):
    weight_gains = [0] * GradCollect.NUM_COMPUTE_LAYERS
    activation_gains = [0] * GradCollect.NUM_COMPUTE_LAYERS
    data_size = len(data_loader)
    first_time = True
    for inputs, targets in tqdm(data_loader):
        inputs = inputs.to(device).requires_grad_(True)
        GradCollect.inputs_grad_collect = []
        GradCollect.activation_range_collect_temp = []
        inputs.retain_grad()
        outputs = model(inputs)
        outputs = outputs.sum(0)

        GradCollect.activation_range_collect = np.vstack([
            np.array([num for _, num in GradCollect.activation_range_collect_temp]),
            GradCollect.activation_range_collect]
        ).max(0)

        Z_fl, Y_fl = outputs.max(0)
        num_lbls = outputs.size(0)
        for i in range(num_lbls):
            if i != Y_fl:
                output_difference = Z_fl - outputs[i]
                output_difference.backward(retain_graph=True)
                with torch.no_grad():
                    denominator = 24 * (output_difference ** 2)
                    for idx in range(GradCollect.NUM_COMPUTE_LAYERS):
                        weight = GradCollect.weight_grad_collect[idx][1]
                        weight_grad = weight.grad
                        if first_time:
                            weight_gains[idx] = (weight_grad ** 2).sum() / denominator
                        else:
                            weight_gains[idx].add_((weight_grad ** 2).sum() / denominator)

                        weight.grad.zero_()

                    for idx, (module_name, activations) in enumerate(GradCollect.inputs_grad_collect):
                        grad = activations.grad
                        if first_time:
                            activation_gains[idx] = (grad ** 2).sum() / denominator
                        else:
                            activation_gains[idx].add_((grad ** 2).sum() / denominator)

                        activations.grad.zero_()

                    first_time = False

    for idx in range(GradCollect.NUM_COMPUTE_LAYERS):
        activation_gains[idx] = activation_gains[idx].cpu().numpy() / data_size
        weight_gains[idx] = weight_gains[idx].cpu().numpy() / data_size

    return weight_gains, activation_gains


def get_normalized_noise_gains(wg_coarse, ag_coarse):
    adjusted_wg_noise_gains = np.zeros(GradCollect.NUM_COMPUTE_LAYERS)
    adjusted_ag_noise_gains = np.zeros(GradCollect.NUM_COMPUTE_LAYERS)

    for l in range(GradCollect.NUM_COMPUTE_LAYERS):
        adjusted_wg_noise_gains[l] = wg_coarse[l] * np.square(GradCollect.weight_range_collect[l][1])
        adjusted_ag_noise_gains[l] = ag_coarse[l] * np.square(GradCollect.activation_range_collect[l])

    min_ag = adjusted_ag_noise_gains.min()
    min_wg = adjusted_wg_noise_gains.min()

    least_gain = min(min_ag,min_wg)
    return adjusted_wg_noise_gains, adjusted_ag_noise_gains, least_gain


def get_precision_offsets(wg, ag, least_gain):
    w_offsets = np.zeros(GradCollect.NUM_COMPUTE_LAYERS)
    a_offsets = np.zeros(GradCollect.NUM_COMPUTE_LAYERS)

    for l in range(GradCollect.NUM_COMPUTE_LAYERS):
        w_offsets[l] = np.round(0.5 * np.log2(wg[l] / least_gain))
        a_offsets[l] = np.round(0.5 * np.log2(ag[l] / least_gain))

    return w_offsets, a_offsets

In [145]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn
activation_dr= np.load("activation_dynamic_range.npy") 
#each index of the array corresponds to each layer and that will have maximum value given by
weight_dr=np.load("weight_dynamic_range.npy")
#from nn_inference import precision_profiler
import numpy as np

Global_Bx = 100
def quantize(data, lvls): 
    f = lambda x: quantizationLevels[np.argmin(np.abs(x-quantizationLevels))]
    return np.vectorize(f)(data)
    
def quantizeWeight(W,r,BW): #r the range should be dividing W and multiplying Wq but ya.. not working so let's skip that
    Wq = np.minimum(np.round(W/r*np.power(2.0,BW-1.0))*np.power(2.0,1.0-BW),1.0-np.power(2.0,1.0-BW))*r
    return Wq

def UniformLevels(r,m, w_or_a):
    #this will place levels at either end of r and uniformly between
    if w_or_a=='w':
        return np.arange(-r,r+r/m,2*r/(m-1))
    else:
        return np.arange(0,r+r/m,r/(m-1))

class AverageMeter:
    """Computes and stores the average and current value"""

    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


class ProgressMeter:
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = ProgressMeter._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def print2(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    @classmethod
    def _get_batch_fmtstr(cls, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'


class Net(nn.Module):
    def __init__(self, num_classes=10):
        super(Net, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=2), 
            #quant(),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(64, 192, kernel_size=3, padding=2), 
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        
        self.fc_layers = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

        self.nclass = num_classes
        #self.count=0 #this counts how many time forward is called to help me.

#     def forward(self, x):
#         """About the forward function: this is called once, for a run through, 20 times for a batch"""
#        # print("in forward:",self.count, np.shape(x))
#        # self.count+=1
#         conv_features = self.features(x) #this line calls x (the input) with the featuers Sequential
        
#         flatten = conv_features.view(conv_features.size(0), -1)
#         fc = self.fc_layers(flatten) #this calls x(the input) with fc_layers () Sequential
#         return fc

    def forward(self,x):
        global Global_Bx
        in_x = x
        
        for layer_idx,f in enumerate(self.features):
            #print("At layer:",layer_idx)
            weight_layers = [0,3,6,8,10,14,17,19]
            if layer_idx in weight_layers:
                #just need to quantize before and after weight layers
                #r is the dynamic range of the values
                r_idx =  weight_layers.index(layer_idx)
                r_w = weight_dr[r_idx]
                r_a = activation_dr[r_idx]
                m = Global_Bx#32 # m is precision
                print("r_idx,r_w,r_a",r_idx,r_w,r_a)
                
                #quantize wieghts before weight layer
                #inp = in_x.detach().numpy()
                #lvls = UniformLevels(r_w,m, 'w')
                #inp_quant = #quantize(inp, lvls)
                #in_x = in_x/r_w
               # print("in_x pre quant w", in_x.size())
                inq = in_x[0,0,0]
                #print("in weights prequant:",in_x[0,0,0])
                in_x = quantizeWeight(in_x,r_w,m);#torch.from_numpy(inp_quant).float()
                #print("in weights postquant:",in_x[0,0,0])
                inqed = in_x[0,0,0]
                print(torch.eq(inq,inqed))
#                 if not torch.eq(inq,inqed):
#                     print("inq,inqued",inq,inqued)
             #   print("in_x post quant w", in_x.size())
                
                #run the layer
                out_x = f(in_x)
                
                #quantize activations after weight layer
                #out = out_x.detach().numpy()
                #lvls = UniformLevels(r_a,m, 'a')
                #out_quant = quantize(out, lvls)
         #       out_x = quantizeWeight(out_x,r_a,m) #torch.from_numpy(out_quant).float()
                #print("out_x post quant", out_x.size())

                in_x = out_x
            else:
                #no need to quantize if it's not a wieght layer
                out_x = f(in_x)
                in_x = out_x
                
        conv_features = out_x
        
        flatten = conv_features.view(conv_features.size(0), -1)
        
        #do the same thing as above here to quantize these layers
        fc = self.fc_layers(flatten) #this calls x(the input) with fc_layers () Sequential
        return fc

class quant(nn.Module):
    #when i tried doing this and then adding it to the sequential list
    # it gave an error saying the model it loaded didn't match the model it is testing on
    # or that's what I interpreted the error as
    def __init__(self):
        super(quant, self).__init__()
        print("In sequential!!")
    def forward(self, x):
        print("shape",np.shape(x))

def get_datasets(*args, **kwargs):
    transform = transforms.Compose(
        [
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ]
    )

    trainset = torchvision.datasets.CIFAR10(train=True, transform=transform, *args, **kwargs)
    testset = torchvision.datasets.CIFAR10(train=False, transform=transform, *args, **kwargs)
    return trainset, testset


def get_dataloaders(trainset, testset, batch_size=100, num_worker=4):
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_worker)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_worker)

    return trainloader, testloader


def get_model(model_src_path, device='cpu'):
    model = Net(num_classes=10)
    state_dict = torch.load(model_src_path, map_location=device)
    model.load_state_dict(state_dict)
    return model


def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        if type(output) is tuple:
            _, _, output = output
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res, pred[0, :]


def eval_single_batch_compute(x, y, model):
    output = model(x)
    accs, predictions = accuracy(output, y, topk=(1,))
    acc = accs[0]
    return acc, predictions


def eval_model(model, dataloader, print_acc=False, device='cpu', log_update_feq=20):
    model.eval()
    top1 = AverageMeter('Acc@1', ':6.2f')
    progress = ProgressMeter(
        len(dataloader),
        [top1],
        prefix='Evaluating Batch'
    )

    with torch.no_grad():
        for idx, data in enumerate(dataloader):
            x, y = data
            x.requires_grad = True
            x = x.to(device)
            y = y.to(device)
            n_data = y.size(0)

            acc, predictions = eval_single_batch_compute(x, y, model)

            top1.update(acc.item(), n_data)
            if idx % log_update_feq == log_update_feq - 1:
                progress.print2(idx + 1)

        if print_acc:
            print(' * Acc@1 {top1.avg:.3f}'.format(top1=top1))

    return top1.avg


def feedforward():
    device = 'cpu'
    print('using device:', device)
    trainset, testset = get_datasets(root='./data', download=True)
    _, testloader = get_dataloaders(trainset, testset, batch_size=100, num_worker=16)

    model_src_path = 'model.tar'  # todo you need to set the path to downloaded model !!
    model = get_model(model_src_path, device)
    model = model.to(device)
    Global_Bx = 64
    eval_model(model, testloader, print_acc=True, device=device)


def compute_precision_offsets():
    device = 'cuda'
    print('using device:', device)
    trainset, testset = get_datasets(root='./data', download=True)
    trainloader, testloader = get_dataloaders(trainset, testset, batch_size=500, num_worker=32)

    model_src_path = 'model.tar'
    model = get_model(model_src_path, device)
    model = model.to(device)
    precision_profiler.GradCollect.retain_model_grad(model)
    precision_profiler.GradCollect.retain_inputs_grad(model)

    wg, ag = precision_profiler.get_noise_gains(model, trainloader, device)
    wg, ag, least_gain = precision_profiler.get_normalized_noise_gains(wg, ag)
    print(precision_profiler.GradCollect.weight_range_collect)
    print(precision_profiler.GradCollect.activation_range_collect)
    w_offsets, a_offsets = precision_profiler.get_precision_offsets(wg, ag, least_gain)
    print(w_offsets, a_offsets)
    np.save(arr=w_offsets,file='weight_offsets.npy')
    np.save(arr=a_offsets,file='activation_offsets.npy')
    np.save(arr=precision_profiler.GradCollect.activation_range_collect, file='activation_dynamic_range.npy')
    np.save(arr=np.array([v for k, v in precision_profiler.GradCollect.weight_range_collect]), file='weight_dynamic_range.npy')


if __name__ == '__main__':
    feedforward()
    

using device: cpu
Files already downloaded and verified
Files already downloaded and verified
r_idx,r_w,r_a 0 1.0 1.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 1 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True])
r_idx,r_w,r_a 2 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 3 0.25 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 4 0.25 4.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 0 1.0 1.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 1

r_idx,r_w,r_a 2 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 3 0.25 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 4 0.25 4.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 0 1.0 1.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 1 0.5 16.0
tensor([False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False,  True])
r_idx,r_w,r_a 2 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 3 0.25 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 4 0.25 4.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 0 1.0 1.0
tensor([True, True, True, True, True, True, 

tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True])
r_idx,r_w,r_a 2 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 3 0.25 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 4 0.25 4.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 0 1.0 1.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 1 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True])
r_idx,r_w,r_a 2 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 3 0.25 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 4 0.25 4.0
tensor([True, True, True, Tru

r_idx,r_w,r_a 4 0.25 4.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 0 1.0 1.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 1 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True])
r_idx,r_w,r_a 2 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 3 0.25 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 4 0.25 4.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 0 1.0 1.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 1 0.5 1

tensor([False,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True])
r_idx,r_w,r_a 2 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 3 0.25 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 4 0.25 4.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 0 1.0 1.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 1 0.5 16.0
tensor([False, False, False, False, False, False, False, False, False, False,
         True,  True,  True,  True, False, False, False])
r_idx,r_w,r_a 2 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 3 0.25 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 4 0.25

r_idx,r_w,r_a 4 0.25 4.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 0 1.0 1.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 1 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True])
r_idx,r_w,r_a 2 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 3 0.25 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 4 0.25 4.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 0 1.0 1.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 1 0.5 1

r_idx,r_w,r_a 2 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 3 0.25 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 4 0.25 4.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 0 1.0 1.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 1 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True])
r_idx,r_w,r_a 2 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 3 0.25 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 4 0.25 4.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 0 1.0 1.0
tensor([True, True, True, True, True, True, True, True, True,

r_idx,r_w,r_a 2 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 3 0.25 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 4 0.25 4.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 0 1.0 1.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 1 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True])
r_idx,r_w,r_a 2 0.5 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 3 0.25 16.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 4 0.25 4.0
tensor([True, True, True, True, True, True, True, True, True])
r_idx,r_w,r_a 0 1.0 1.0
tensor([True, True, True, True, True, True, True, True, True,

In [160]:
#4.3 --> Write down gradient and update equation!
#3.1 you need dynamic ranges, then do quantization
#3.2 you need all four files
activation_dr= np.load("activation_dynamic_range.npy") 
#each index of the array corresponds to each layer and that will have maximum value given by
weight_dr=np.load("weight_dynamic_range.npy")
print("activation range: 0 to ",activation_dr)
print("weight range: negative to",weight_dr)
weight_offsets = np.load("weight_offsets.npy")
activ_offsets = np.load("weight_offsets.npy")
print("weight_off",weight_offsets)
print("activation_off",activ_offsets)
# 4 6 7, for any of the weights, put all weights in plot and give the numbers
"""4) Hints:
Plot the convergence curve means plot MSE convergence curve, and plot weights!
convergence around 3.1, conversion around -.7 or -.8
you will see many different values
pay attention to initialization! That will change result a lot.

if you use a lot of samples, it should match your analytic evaluation
"""













# problems with 3: probs need a diff quant function for activations (0 to r_a) i think this funciton is doing -r to r
# can test on something small


activation range: 0 to  [ 1. 16. 16. 16.  4.  4.  4.  8.]
weight range: negative to [1.      0.5     0.5     0.25    0.25    0.03125 0.03125 0.5    ]
weight_off [4. 5. 6. 5. 4. 0. 0. 4.]
activation_off [4. 5. 6. 5. 4. 0. 0. 4.]


'4) Hints:\nPlot the convergence curve means plot MSE convergence curve, and plot weights!\nconvergence around 3.1, conversion around -.7 or -.8\nyou will see many different values\npay attention to initialization! That will change result a lot.\n\nif you use a lot of samples, it should match your analytic evaluation\n'

In [7]:
#p7 critical path delay, most amount of time you need to get to end. 

In [151]:
def quantize(data, lvls): 
    f = lambda x: quantizationLevels[np.argmin(np.abs(x-quantizationLevels))]
    sq = np.vectorize(f)(data)
    return sq
        
def qt(data,lvls):
    for i in data:
        for j in i:
            for k in j:
                print(k)
                k = quantizationLevels[np.argmin(np.abs(k-quantizationLevels))]
                
def UniformLevels(r,m, w_or_a):
    #this will place levels at either end of r and uniformly between
    if w_or_a=='w':
        return np.arange(-r,r+r/m,2*r/(m-1))
    else:
        return np.arange(0,r+r/m,r/(m-1))

    
print(UniformLevels(1,5,'a'))
def qf(data:float):
    return quantizationLevels[np.argmin(np.abs(data-quantizationLevels))]

data = torch.tensor([[[.6,1.44],[1,.02]],[[.9,1.44],[-.9,.02]]])#stuff to quantize
quantizationLevels = UniformLevels(1,5,'w')
f = lambda x: quantizationLevels[np.argmin(np.abs(x-quantizationLevels))]
#out = np.vectorize(f)(data) #data.flatten()
#print(qf(data))

data = torch.tensor([[[.0321,.2342],[-.4523,.124]],[[.142,-.0544],[.093,-.409]]])#stuff to quantize

def quantizeWeight(W,BW):
    Wq = np.minimum(np.round(W*np.power(2.0,BW-1.0))*np.power(2.0,1.0-BW),1.0-np.power(2.0,1.0-BW))
    return Wq

#divide by dynamic range and then multiply again
print(data)
print(quantizeWeight(data,7))


[0.   0.25 0.5  0.75 1.  ]
tensor([[[ 0.0321,  0.2342],
         [-0.4523,  0.1240]],

        [[ 0.1420, -0.0544],
         [ 0.0930, -0.4090]]])
tensor([[[ 0.0312,  0.2344],
         [-0.4531,  0.1250]],

        [[ 0.1406, -0.0469],
         [ 0.0938, -0.4062]]])


In [18]:
device = 'cpu'
print('using device:', device)
trainset, testset = get_datasets(root='./data', download=True)
_, testloader = get_dataloaders(trainset, testset, batch_size=100, num_worker=16)

model_src_path = 'model.tar'  # todo you need to set the path to downloaded model !!
model = get_model(model_src_path, device)

#this loop normalizes all the parameters
for i in [0,3,6,8,10]:
    for p in model.features[i].parameters(): 
        """
        # you can intercept the parameter weight here just quantize them before the go in with weight quantization
        # and after they come out with activation quantizations
        """
        #print(p.type())
        #quantize weights
        p.data = torch.from_numpy(np.zeros(p.size())).float()
        #quantize activations
print("weights",model.features[10].weight)


# model = model.to(device)
eval_model(model, testloader, print_acc=True, device=device)

using device: cpu
Files already downloaded and verified
Files already downloaded and verified
torch.FloatTensor
torch.FloatTensor
torch.FloatTensor
torch.FloatTensor
torch.FloatTensor
torch.FloatTensor
torch.FloatTensor
torch.FloatTensor
torch.FloatTensor
torch.FloatTensor
weights Parameter containing:
tensor([[[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         ...,

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]],


        [[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         ...,

10.0

In [None]:
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())


## 3.3

In [168]:
NN = [[3,64,3,2],
         [64,192,3,2],
         [192,384,3,1],
         [384,256,3,1],
         [256,256,3,1],
         [4096,4096],
         [4096,4096],
         [4096,10],
         ]
img_size = 32*32

Bl = 8

CC, RC = 0, 0
for l in NN:
    if len(l)==4: 
        #convolutional layer
        in_chan, out_chan, k_size, stride = l
        Nl = img_size * out_chan / stride
        Dl = k_size**2 * in_chan
        Rla = out_chan * img_size
        Rlw = k_size**2 * in_chan*out_chan
    else:
        #linear layer
        numrows, numcols = l
        Nl = numrows
        Dl = numcols
        Rla = numrows
        RLw = numrows*numcols
    CC += Nl * (Dl * Bl * Bl + (Dl - 1) * (Bl + Bl + np.log2(Dl) - 1))
    RC += Rla * Bl + Rlw * Bl

print("Constant 8 bit precision:")
print("Computational Cost:","{:,}".format(CC) ,"\nRepresentational Cost:","{:,}".format(RC))

Constant 8 bit precision:
Computational Cost: 205,764,450,295.90164 
Representational Cost: 41,694,720


In [169]:
weight_offsets = np.load("weight_offsets.npy")

CC, RC = 0, 0
for idx,l in enumerate(NN):
    if len(l)==4: 
        #convolutional layer
        in_chan, out_chan, k_size, stride = l
        Nl = img_size * out_chan / stride
        Dl = k_size**2 * in_chan
        Rla = out_chan * img_size
        Rlw = k_size**2 * in_chan*out_chan
    else:
        #linear layer
        numrows, numcols = l
        Nl = numrows
        Dl = numcols
        Rla = numrows
        RLw = numrows*numcols
    Bl = 6 + weight_offsets[idx]
    CC += Nl * (Dl * Bl * Bl + (Dl - 1) * (Bl + Bl + np.log2(Dl) - 1))
    RC += Rla * Bl + Rlw * Bl

print("6 bit precision + offsets for weights and activations:")
print("Weights Offset:", weight_offsets)
print("Activation Offset:",activ_offsets)
print("Computational Cost:","{:,}".format(CC) ,"\nRepresentational Cost:","{:,}".format(RC))

6 bit precision + offsets for weights and activations:
Weights Offset: [4. 5. 6. 5. 4. 0. 0. 4.]
Activation Offset: [4. 5. 6. 5. 4. 0. 0. 4.]
Computational Cost: 349,317,797,879.9017 
Representational Cost: 50,934,656.0
