In [1]:
import torchvision.transforms.functional as TF
import random

def my_segmentation_transforms(image,segmentation,new_img_h, new_img_w) :
    
    prob = random.random()
    if prob >= 0.5 :
        
        image = TF.to_pil_image(image, mode=None)
        segmentation = TF.to_pil_image(segmentation, mode=None)

        # VERTICAL FLIP
        image = TF.vflip(image)
        segmentation = TF.vflip(segmentation)

        # HORIZONTAL FLIP
        image = TF.hflip(image)
        segmentation = TF.hflip(segmentation)
    
    image = np.asarray(image) #convert from PIL image to a numpy array
    segmentation = np.asarray(segmentation)
    
    # crop out a random 256X256 patch from the entire image
    
    start_x = np.random.randint(low=0, high=(new_img_h - 384))
    start_y = np.random.randint(low=0, high=(new_img_w - 384))
    
    image = image[start_x:start_x+384, start_y:start_y+384, :] # (shape: (256, 256, 3))
    segmentation = segmentation[start_x:start_x+384, start_y:start_y+384] # (shape: (256, 256))

    return image, segmentation

In [2]:
import torch
import torch.utils.data
from torchvision import transforms, datasets, models

import numpy as np
import cv2
import os

import matplotlib.pyplot as plt

train_dirs = ["jena/", "zurich/", "weimar/", "ulm/", "tubingen/", "stuttgart/",
              "strasbourg/", "monchengladbach/", "krefeld/", "hanover/",
              "hamburg/", "erfurt/", "dusseldorf/", "darmstadt/", "cologne/",
              "bremen/", "bochum/", "aachen/"]
val_dirs = ["frankfurt/", "munster/", "lindau/"]
#test_dirs = ["berlin", "bielefeld", "bonn", "leverkusen", "mainz", "munich"]

#train_dirs = ["aachen/"]
#cityscapes_data_path ="/home/kaustavb/cityscapes/leftImg8bit_trainvaltest_small"
#cityscapes_meta_path = "/home/kaustavb/cityscapes/gtFine_trainvaltest_small"

class DatasetTrain(torch.utils.data.Dataset):
    def __init__(self, set1, cityscapes_data_path, cityscapes_meta_path, transform=None):
        self.img_dir = cityscapes_data_path + "/leftImg8bit/"+set1+"/"
        self.label_dir = cityscapes_meta_path + "/label_imgs/"+set1+"/"

        self.img_h = 1024
        self.img_w = 2048

        self.new_img_h = 512
        self.new_img_w = 1024

        self.examples = []
        
        #select the appropriate directory
        
        if set1 == "train":
            dirs = train_dirs
            n_append = 1 # each pair of (image,label) is added 5 times to the tuple of train dataset 
        else :
            dirs = val_dirs
            n_append = 1
            
        for _dir in dirs:
            img_dir_path = self.img_dir + _dir

            file_names = os.listdir(img_dir_path)
            for file_name in file_names:
                img_id = file_name.split("_leftImg8bit.png")[0]

                img_path = img_dir_path + file_name
                #label_img_path = self.label_dir + _dir + img_id + "_gtFine_labelIds.png" #----->
                label_img_path = self.label_dir + _dir + img_id + ".png"

                example = {}
                example["img_path"] = img_path
                example["label_img_path"] = label_img_path
                example["img_id"] = img_id

                for j in range(n_append): # each pair of (image,label) is added 5 times to the tuple of train dataset 
                    self.examples.append(example)

        self.num_examples = len(self.examples)
        self.transform = transform #add the transformation
        
        # the start and end indices are selected here so that a single value is selected for the cropping
        #self.start_x = np.random.randint(low=0, high=(self.new_img_h - 256))
        #self.start_y = np.random.randint(low=0, high=(self.new_img_w - 256))
        #self.prob = random.random()
        
        #the corresponding label_IDs for the label
        self.trainID = [19,19,19,19,19,19,19,0,1,19,19,2,3,4,19,19,19,5,19,6,7,8,9,10,11,12,13,14,15,19,19,16,17,18]

    def __getitem__(self, index):
        example = self.examples[index]

        img_path = example["img_path"]
        img = cv2.imread(img_path, -1) # (shape: (1024, 2048, 3))
        # resize img without interpolation (want the image to still match
        # label_img, which we resize below):
        img = cv2.resize(img, (self.new_img_w, self.new_img_h),
                         interpolation=cv2.INTER_NEAREST) # (shape: (512, 1024, 3))

        label_img_path = example["label_img_path"]
        label_img = cv2.imread(label_img_path, -1) # (shape: (1024, 2048))
        # resize label_img without interpolation (want the resulting image to
        # still only contain pixel values corresponding to an object class):
        label_img = cv2.resize(label_img, (self.new_img_w, self.new_img_h),
                               interpolation=cv2.INTER_NEAREST) # (shape: (512, 1024))
        
        img, label_img = my_segmentation_transforms(img, label_img, self.new_img_h, self.new_img_w) #helps us get the transforms into a function
        #img = np.asarray(img) #these steps convert to PIL image to an array.
        #label_img = np.asarray(label_img) #these steps convert to PIL image to an array.
        
        #print(img.shape)
        #print(label_img.shape)
        #'''    
        
        #img = img[self.start_x:self.start_x+256, self.start_y:self.start_y+256, :] # (shape: (256, 256, 3))
        #label_img = label_img[self.start_x:self.start_x+256, self.start_y:self.start_y+256] # (shape: (256, 256))
        
        ########################################################################
        #'''
        
        if self.transform is not None:
            img = ((img.astype('float'))/255.0)
            #label_img = ((label_img.astype('float'))/255.0) 
            #the image has labels for all the pixels. Labels correspond to the class and there are 33 classes.
            img = self.transform(img)
            label_img = torch.from_numpy(label_img.astype(np.int64))
            
            #label_img_id = 0 #pre-processing the labelled image (to 20 classes !!)
            #for j in range(34):
            #    label_img_id += self.trainID[j]*(label_img==j)
            
        return (img, label_img)

    def __len__(self):
        return self.num_examples


In [3]:
cityscapes_data_path ="/home/kaustavb/cityscapes/leftImg8bit_trainval"
cityscapes_meta_path = "/home/kaustavb/cityscapes/meta" #gtFine_trainval

trans = transforms.Compose([
    transforms.ToTensor(), # converts the data into tensors
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # imagenet
])

train_set = DatasetTrain("train", cityscapes_data_path, cityscapes_meta_path, transform = trans)
val_set = DatasetTrain("val", cityscapes_data_path, cityscapes_meta_path, transform = trans)
#train_loader = torch.utils.data.DataLoader(dataset=train_set,
                                           #batch_size=10, shuffle=True,
                                           #num_workers=1)


In [None]:
'''
#dict_id = {}
trainID = [19,19,19,19,19,19,19,0,1,19,19,2,3,4,19,19,19,5,19,6,7,8,9,10,11,12,13,14,15,19,19,16,17,18]
for idx in range(1):#len(train_set)):
    #dict_id[idx]={}
    train_label = 0 #torch.((256,256))
    for j in range(34):
         #dict_id[idx][j] = (train_set[idx][1]==j)
        train_label += trainID[j]*(train_set[idx][1]==j)
    plt.imshow(train_label)
    plt.show()
'''

In [4]:
image_datasets = {
    'train': train_set, 'val': val_set
}  
dataloaders = {
    'train': torch.utils.data.DataLoader(train_set, batch_size=8, shuffle=True, num_workers=0),
    'val': torch.utils.data.DataLoader(val_set, batch_size=1, shuffle=False, num_workers=0)
}

In [5]:
def reverse_transform(inp):
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    #std = 1
    #mean = 0
    inp = std * inp + mean
    #inp = np.clip(inp, 0, 1)
    inp = (inp * 255).astype(np.uint8)

    return inp

In [None]:
x = train_set[100] # this is a single call to __get_item__(). This ensures that the image and the label are properly matched.

In [None]:
plt.imshow(reverse_transform(x[0]))

In [None]:
plt.imshow(x[1])

In [None]:
max_vals = []
for idx in range(len(train_set)):
    max_vals.append(torch.max(train_set[idx][1]))
print(max(max_vals))

In [None]:
min_vals = []
for idx in range(len(train_set)):
    min_vals.append(torch.min(train_set[idx][1]))
print(min(min_vals))

In [None]:
train_set[idx][1]

In [None]:
torch.max(train_set[idx][1])

In [None]:
val = (train_set[idx][1]==26)
plt.imshow(val)

In [None]:
inputs, masks = next(iter(dataloaders['val']))

In [None]:
len(dataloaders['train'])

In [None]:
plt.imshow(reverse_transform(inputs[0]))

In [None]:
plt.imshow(masks[0])

## Training on a model

In [6]:
from model.deeplabv3 import DeepLabV3
net = DeepLabV3(num_classes=7)

In [7]:
import torch.nn as nn

for n,m in net.named_modules():
    if isinstance(m, nn.Conv2d):
        print(n)

encoder.conv1
encoder.layer1.0.conv1
encoder.layer1.0.conv2
encoder.layer1.1.conv1
encoder.layer1.1.conv2
encoder.layer1.2.conv1
encoder.layer1.2.conv2
encoder.layer2.0.conv1
encoder.layer2.0.conv2
encoder.layer2.0.downsample.0
encoder.layer2.1.conv1
encoder.layer2.1.conv2
encoder.layer2.2.conv1
encoder.layer2.2.conv2
encoder.layer2.3.conv1
encoder.layer2.3.conv2
encoder.layer3.0.conv1
encoder.layer3.0.conv2
encoder.layer3.0.downsample.0
encoder.layer3.1.conv1
encoder.layer3.1.conv2
encoder.layer3.2.conv1
encoder.layer3.2.conv2
encoder.layer3.3.conv1
encoder.layer3.3.conv2
encoder.layer3.4.conv1
encoder.layer3.4.conv2
encoder.layer3.5.conv1
encoder.layer3.5.conv2
encoder.layer4.0.conv1
encoder.layer4.0.conv2
encoder.layer4.0.downsample.0
encoder.layer4.1.conv1
encoder.layer4.1.conv2
encoder.layer4.2.conv1
encoder.layer4.2.conv2
layer5.0.conv1
layer5.0.conv2
layer5.0.conv3
layer5.1.conv1
layer5.1.conv2
layer5.1.conv3
layer5.2.conv1
layer5.2.conv2
layer5.2.conv3
aspp.conv_1x1_1
aspp.conv

In [None]:
from torchsummary import summary
summary(net.cuda(), input_size = (3, 384, 384))

In [8]:
import sys
import os
import logging
import re
import functools
import fnmatch
import numpy as np
import torch.nn as nn

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.initialized = False
        self.val = None
        self.avg = None
        self.sum = None
        self.count = None

    def initialize(self, val, weight):
        self.val = val
        self.avg = val
        self.sum = val * weight
        self.count = weight
        self.initialized = True

    def update(self, val, weight=1):
        if not self.initialized:
            self.initialize(val, weight)
        else:
            self.add(val, weight)

    def add(self, val, weight):
        self.val = val
        self.sum += val * weight
        self.count += weight
        self.avg = self.sum / self.count

    def value(self):
        return self.val

    def average(self):
        return self.avg

In [None]:
'''
import torch.nn as nn

class SegmentationModuleBase(nn.Module):
    def __init__(self):
        super(SegmentationModuleBase, self).__init__()

    def pixel_acc(self, pred, label, n=20): #-->n:number of classes
        # pred : already is a probability value as a softmax layer is present as the last layer of the CNN.
        #----------------------mIOU accuracy in a new way----------------------- (hard IOU):not using prob. of prediction
        _, preds = torch.max(pred.data.cpu(), dim=1)

        # compute area intersection
        intersect = preds.clone()
        segs = label.data.cpu()
        intersect[torch.ne(preds, segs)] = -1

        area_intersect = torch.histc(intersect.float(),
                                     bins=n,
                                     min=0,
                                     max=n-1)

        # compute area union:
        preds[torch.lt(segs, 0)] = -1
        area_pred = torch.histc(preds.float(),
                                bins=n,
                                min=0,
                                max=n-1)
        area_lab = torch.histc(segs.float(),
                               bins=n,
                               min=0,
                               max=n-1)
        area_union = area_pred + area_lab - area_intersect
        mIOU = area_intersect/(area_union + 1e-10)
        #-----------------------------------------------------------------------
        _, preds = torch.max(pred.data.cpu(), dim=1)
        segs = label.data.cpu()
        
        valid = (segs >= 0).long()
        acc_sum = torch.sum(valid * (preds == segs).long())
        pixel_sum = torch.sum(valid)
        acc = acc_sum.float() / (pixel_sum.float() + 1e-10)
        
        #calculate individual accuracies for each image in the minibatch
        acc_sum1 = torch.sum(valid * (preds == segs).long(), (1,2))
        pixel_sum1 = torch.sum(valid, (1,2))
        acc1 = acc_sum1.float() / (pixel_sum1.float() + 1e-10)
        
        #return acc
        return acc.mean(), mIOU # return mean accuracy over the minibatch.


class SegmentationModule(SegmentationModuleBase):
    def __init__(self, net, crit, n_class=20):
        super(SegmentationModule, self).__init__()
        self.net = net
        self.crit = crit
        self.n = n_class

    def forward(self, inp, label):
        # training
        pred = self.net(inp)
        #print(pred.shape)
        loss = self.crit(pred, label)
        acc, mIOU = self.pixel_acc(pred, label,self.n)        
        return loss, acc, mIOU
'''

In [None]:
'''
def adjust_learning_rate(optimizers, cur_iter, epoch_iters):
    scale_running_lr = ((1. - float(cur_iter) / epoch_iters) ** 0.9)
    running_lr_encoder = 0.02 * scale_running_lr #both are same values for now.
    running_lr_decoder = 0.02 * scale_running_lr
    
    #(optimizer_encoder, optimizer_decoder) = optimizers
    #for param_group in optimizer_encoder.param_groups:
    #    param_group['lr'] = running_lr_encoder
    #for param_group in optimizer_decoder.param_groups:
    #    param_group['lr'] = running_lr_decoder
    
    for param_group in optimizers.param_groups:
        param_group['lr'] = running_lr_decoder # running_lr_decoder = running_lr_encoder
    return running_lr_encoder, running_lr_decoder
'''

In [None]:
import math

def adjust_learning_rate(optimizer_e, optimizer_d, epoch, n_epoch, lr_e=0.005, lr_d=0.01, lr_type='cos'):
    if lr_type == 'cos':  # cos without warm-up
        lr_e = 0.5 * lr_e * (1 + math.cos(math.pi * epoch / n_epoch))
        lr_d = 0.5 * lr_d * (1 + math.cos(math.pi * epoch / n_epoch))
    elif lr_type == 'exp':
        step = 1
        decay = 0.96
        lr_e = lr_e * (decay ** (epoch // step))
        lr_d = lr_d * (decay ** (epoch // step))
    elif lr_type == 'fixed':
        lr_e = lr_e
        lr_d = lr_d
    else:
        raise NotImplementedError
    print('=> lr_encoder: {}'.format(lr_e))
    print('=> lr_decoder: {}'.format(lr_d))
    for param_group in optimizer_e.param_groups:
        param_group['lr'] = lr_e
    for param_group in optimizer_d.param_groups:
        param_group['lr'] = lr_d
        
    return lr_e, lr_d

In [None]:
import torch.optim as optim

def create_optimizers(net):
    #optimizers = optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=0.02)
    optimizers_enc = optim.Adam([{'params':net.encoder.parameters(), 'lr':0.02}], lr=0.02)
    
    optimizers_dec = optim.Adam([{'params':net.layer5.parameters(), 'lr':0.02},
                                 {'params':net.aspp.parameters(), 'lr':0.02},
                                 {'params':net.aggregate.parameters(), 'lr':0.02},
                                 {'params':net.last_conv.parameters(), 'lr':0.02}], lr=0.02)
    
    return optimizers_enc, optimizers_dec

In [None]:
def pixel_acc(pred, label, n=20): #-->n:number of classes
        # pred : already is a probability value as a softmax layer is present as the last layer of the CNN.
        #----------------------mIOU accuracy in a new way----------------------- (hard IOU):not using prob. of prediction
        _, preds = torch.max(pred.data.cpu(), dim=1)

        # compute area intersection
        intersect = preds.clone()
        segs = label.data.cpu()
        intersect[torch.ne(preds, segs)] = -1

        area_intersect = torch.histc(intersect.float(),
                                     bins=n,
                                     min=0,
                                     max=n-1)

        # compute area union:
        preds[torch.lt(segs, 0)] = -1
        area_pred = torch.histc(preds.float(),
                                bins=n,
                                min=0,
                                max=n-1)
        area_lab = torch.histc(segs.float(),
                               bins=n,
                               min=0,
                               max=n-1)
        area_union = area_pred + area_lab - area_intersect
        mIOU = area_intersect/(area_union + 1e-10)
        #-----------------------------------------------------------------------
        _, preds = torch.max(pred.data.cpu(), dim=1)
        segs = label.data.cpu()
        
        valid = (segs >= 0).long()
        acc_sum = torch.sum(valid * (preds == segs).long())
        pixel_sum = torch.sum(valid)
        acc = acc_sum.float() / (pixel_sum.float() + 1e-10)
        
        #calculate individual accuracies for each image in the minibatch
        acc_sum1 = torch.sum(valid * (preds == segs).long(), (1,2))
        pixel_sum1 = torch.sum(valid, (1,2))
        acc1 = acc_sum1.float() / (pixel_sum1.float() + 1e-10)
        
        #return acc
        return acc.mean(), mIOU # return mean accuracy over the minibatch.

In [None]:
def train(segmentation_module, iterator,  optimizer_e, optimizer_d, history, epoch, len_iterator, n_epochs, 
          running_lr_e, running_lr_d, crit, n_class=20):
    #---> len_iterator : one epoch has how many mini-batches (to traverse through the dataset)
    #---> n_epochs : total number of epochs 

    ave_total_loss = AverageMeter()
    ave_acc = AverageMeter()
    ave_miou = AverageMeter()
    
    #segmentation_module.net.train()
    #segmentation_module.zero_grad()
    segmentation_module.train()
    segmentation_module.zero_grad()
    
    # main loop
    for i in range(len_iterator):
        # load a batch of data
        inputs, masks = next(iterator)
        inputs, masks = inputs.cuda(), masks.cuda()
        optimizer_e.zero_grad()
        optimizer_d.zero_grad()

        # adjust learning rate
        #cur_iter = i + (epoch - 1) * len_iterator
        #running_lr_encoder, running_lr_decoder = adjust_learning_rate(optimizers, cur_iter, len_iterator*n_epochs)
        

        # forward pass
        #loss, acc, mIOU = segmentation_module(inputs.float(),masks.long())
        
        pred = segmentation_module(inputs.float()) #,masks.long())
        loss = crit(pred, masks.long())
        acc, mIOU = pixel_acc(pred, masks.long(), n_class)
        
        #print(loss)
        #print(acc)
        loss = loss.mean()
        acc = acc.mean()
        mIOU = mIOU.mean()
        
        #print(loss)
        #print(acc)
        
        # Backward
        loss.backward()
        #for optimizer in optimizers:
        optimizer_e.step()
        optimizer_d.step()

        # update average loss and acc
        ave_total_loss.update(loss.data.item())
        ave_acc.update(acc.data.item()*100)
        ave_miou.update(mIOU.data.item()*100)

        # calculate accuracy, and display
        if i % 100 == 0:
            print('Epoch: [{}][{}/{}], '
                  'lr_encoder: {:.6f}, lr_decoder: {:.6f}, '
                  'Accuracy: {:4.2f}, Loss: {:.6f}, mIOU: {:.6f}'
                  .format(epoch, i, len_iterator,
                          running_lr_e, running_lr_d,
                          ave_acc.average(), ave_total_loss.average(), ave_miou.average()))

            fractional_epoch = epoch - 1 + 1. * i / len_iterator
            history['train']['epoch'].append(fractional_epoch)
            history['train']['loss'].append(loss.data.item())
            history['train']['acc'].append(acc.data.item())



In [None]:
import copy

def val(segmentation_module, iterator, history, len_iterator, crit, n_class):
    global best_acc
    #---> len_iterator : one epoch has how many mini-batches (to traverse through the dataset)
    #---> n_epochs : total number of epochs 

    ave_total_loss = AverageMeter()
    ave_acc = AverageMeter()
    ave_miou = AverageMeter()
    
    #segmentation_module.net.eval()
    #segmentation_module.zero_grad()
    segmentation_module.eval()
    
    # main loop
    for i in range(len_iterator):
        # load a batch of data
        inputs, masks = next(iterator)
        inputs, masks = inputs.cuda(), masks.cuda()
        
        # forward pass
        #loss, acc, mIOU = segmentation_module(inputs.float(),masks.long())
        
        pred = segmentation_module(inputs.float()) #,masks.long())
        loss = crit(pred, masks.long())
        acc, mIOU = pixel_acc(pred, masks.long(), n_class)

        loss = loss.mean()
        acc = acc.mean()
        mIOU = mIOU.mean()

        # update average loss and acc
        ave_total_loss.update(loss.data.item())
        ave_acc.update(acc.data.item()*100)
        ave_miou.update(mIOU.data.item()*100)

        # calculate accuracy, and display
        if i%400 == 0: #print once over val dataset
            print('Accuracy: {:4.2f}, Loss: {:.6f}, mIOU: {:.6f}'
                  .format(ave_acc.average(), ave_total_loss.average(), ave_miou.average()))

            history['val']['loss'].append(loss.data.item())
            history['val']['acc'].append(acc.data.item())
            
        if acc > best_acc:
            best_acc = acc           
            best_model_wts = copy.deepcopy(segmentation_module.state_dict())
            #torch.save(best_model_wts, "weight/deeplabv3_resnet34_weights_fpia_qw5_abc.pth")
            torch.save(best_model_wts, "weight/deeplabv3_resnet34_weights_qia5_qw5_abc.pth") 
            #this has to be dynamically changed based on what type of network we are running at present.



In [None]:
import pickle
from torch.autograd import Variable

with open("/home/kaustavb/cityscapes/meta/class_weights.pkl", "rb") as file: # (needed for python3)
    class_weights = np.array(pickle.load(file))
class_weights = torch.from_numpy(class_weights)
class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda()


In [None]:
crit = nn.NLLLoss(weight=class_weights, ignore_index=-1)
net = DeepLabV3(num_classes=7)
net.load_state_dict(torch.load("/home/kaustavb/6867/weight/deeplabv3_resnet34_weights.pth"))

#segmentation_module = SegmentationModule(net, crit, n_class=7)
#segmentation_module.cuda()

net.cuda()

In [None]:

#iterator_train = iter(dataloaders['train'])
history = {'train': {'epoch': [], 'loss': [], 'acc': [], 'mIOU': []}, 'val': {'loss': [], 'acc': [], 'mIOU': []}}

optimizers_enc, optimizers_dec = create_optimizers(net.cuda())
num_epoch=100

best_acc = 0

for epoch in range(num_epoch):
    iterator_train = iter(dataloaders['train'])
    running_lr_e, running_lr_d = adjust_learning_rate(optimizers_enc, optimizers_dec, epoch, num_epoch, 
                                                      lr_e=0.0125, lr_d=0.025, lr_type='cos') 
    #lr changed once per epoch 
    train(net, iterator_train, optimizers_enc, optimizers_dec, history, epoch+1, len(dataloaders['train']), 
          num_epoch, running_lr_e, running_lr_d, crit, num_classes)
    
    iterator_val = iter(dataloaders['val'])
    val(net, iterator_val, history, len(dataloaders['val']), crit, num_classes)

In [None]:
iterator_val = iter(dataloaders['val'])
for i in range(len(dataloaders['val'])):
    # load a batch of data
    inputs, masks = next(iterator_val)
    inputs, masks = inputs.cuda(), masks.cuda()
    pred = net(inputs.float())
    _, preds = torch.max(pred, dim=1)
    plt.imshow(masks.cpu().numpy()[0])
    plt.show()
    plt.imshow(preds.cpu().numpy()[0])
    plt.show()
    print('###########################################################################################################')

## Quantizing the neural network

In [None]:
import json
import os
import time
import argparse
import shutil
import math

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.autograd import Function
import functools

In [None]:
def rgetattr(obj, attr, *args):
    def _getattr(obj, attr):
        return getattr(obj, attr, *args)
    return functools.reduce(_getattr, [obj] + attr.split('.'))

def rsetattr(obj, attr, val):
    pre, _, post = attr.rpartition('.')
    return setattr(rgetattr(obj, pre) if pre else obj, post, val)

### The components necessary for Quantizing a network

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.autograd import Function

import functools

""" 
class fx8(Function):    
    @staticmethod
    def forward(ctx, tensor): 
        Nlevels = (2**1) #assuming 8 bit quantization
        if torch.min(tensor) >= 0 :
            delta = torch.max(tensor)/Nlevels
        else :
            delta = (torch.max(tensor)-torch.min(tensor))/Nlevels
        
        xint = torch.round(torch.div(tensor,delta))
        if torch.min(tensor) >= 0 :
            xq = torch.clamp(xint, 0, Nlevels-1)
        else :
            xq = torch.clamp(xint, -Nlevels/2, Nlevels/2-1)
        xfloat = torch.mul(xq,delta)         
        return xfloat
    
    @staticmethod
    def backward(ctx, grad_output):
        # We return as many input gradients as there were arguments.
        # Gradients of non-Tensor arguments to forward must be None.
        grad_input = grad_output.clone()
        return grad_input #, None
        
""" 
#-----------------------> (added on 31-07-2020)
class AQuantizer(Function):    
    @staticmethod
    def forward(ctx, tensor, shift_v, N):
        #tensor1 = torch.unsqueeze(tensor,0).repeat(N,1,1,1,1)
        tensor1 = torch.cat(N*[torch.unsqueeze(tensor,0)]) #--> same as the above but the above giving issue with backward pass.
        shift_v = shift_v.unsqueeze(1).unsqueeze(1).unsqueeze(1).unsqueeze(1) # shape : NX1X1X1X1
        '''
        x = torch.clamp(tensor1+shift_v, 0, 1) 
        ctx.save_for_backward(tensor1, shift_v)
        x[x>=0.5] = 1.0
        x[x<0.5] = 0.0
        x = 2.0*x-1.0
        return x
        '''
        x = tensor1-shift_v
        ctx.save_for_backward(x)
        y = torch.sign(x)
        return y
    
    @staticmethod
    def backward(ctx, grad_output):        
        # We return as many input gradients as there were arguments.
        # Gradients of non-Tensor arguments to forward must be None.
        
        g_o = grad_output.clone() #this is a 5D tensor : Nxbatch-sizexchannelxRxC
        #print(ctx.saved_tensors)
        t = ctx.saved_tensors[0] #--> otherwise reyurning a tuple
        '''
        t,v = ctx.saved_tensors
        
        #t = t+v #-----> this is commented out for best performance in mobilenet as Encoder case.
        t[t==0.0]=1.0
        t[t>1.0]=0.0
        t[t<0.0]=0.0  
        t[t>0.0]=1.0
        grad_input=g_o*t
        grad_input = (1/g_o.size()[0])*torch.sum(grad_input, dim=0)
        return grad_input , None, None
        '''
        
        #t = t-v
        #t[t==0.0] = 1.0
        t1 = t.clone()          #--->new
        t1 = -2*torch.abs(t1)+2 #--->new
        
        t[torch.abs(t)<=1.0] = 1.0
        t[torch.abs(t)>1.0] = 0.0 
        
        t = t1*t                #--->new
        
        grad_input=grad_output*t
        grad_input=(1/grad_output.size()[0])*torch.sum(grad_input, dim=0)
        
        #average the gradient along the batch-size dimension
        grad_a = (1/t.size()[1])*(torch.sum(torch.sum(torch.sum(torch.sum(g_o,dim=1),dim=1),dim=1),dim=1))*-1.0
        #N-element tensor returned.
        return grad_input , grad_a, None
        
'''
class ActIQuantizer(nn.Module) : #activation Identity quantizer
    def __init__(self, *args, **kwargs):
        super(ActIQuantizer, self).__init__()
        self.shift_init = kwargs['shift_init']
        #self.shift_v = nn.Parameter(torch.from_numpy(np.array(self.shift_init)).float()) #initial clip_v value
        self.register_buffer('shift_v', torch.from_numpy(np.array(self.shift_init)).float())
   
    def forward(self, input):

        return input
'''

#-----------------------> (added on 27=07-2020)
class ActQuantizer(nn.Module) :
    def __init__(self, *args, **kwargs):
        super(ActQuantizer, self).__init__()
        self.shift_init = kwargs['shift_init']
        self.N = kwargs['N']
        #self.shift_v = nn.Parameter(torch.from_numpy(np.array(self.shift_init)).float()) #initial clip_v value
        self.shift_v = nn.Parameter(torch.randn(self.N,).float()) #initial clip_v value
        #self.register_buffer('shift_v', torch.from_numpy(np.array(self.shift_init)).float())        
        
        #self.register_backward_hook(self.backward_hook) #---> This is not called when backward_hook() is not called.
    
    def forward(self, input):
        x = AQuantizer.apply(input,self.shift_v,self.N)#-->new addition
        return x
    
class WQuantizer(nn.Module):
    
    def __init__(self, *kargs, **kwargs):
        super(WQuantizer, self).__init__()
        self.M = kwargs['M']
        self.register_buffer('u', torch.tensor(np.zeros( (self.M,1,1,1,1) ) ) )
        for i in range(self.M):
            self.u[i,0,0,0,0] = -1+2*(i-1)/(self.M-1)        
        data = kwargs['data']
        
    def quantize(self, data):        
        data = torch.unsqueeze(data,0) 
        B_concat = torch.sign(data-torch.mean(data) + self.u*torch.std(data)).float() #-->new (all Bi's along 0 th dimension)       
        #calculate 'a'        
        # the .float() was added to ensure all operations in float() mode. Otherwise, it was giving error saying input is float and weight double()        
        W1 = torch.reshape(data,(-1,1))           #-->added .float()
        B1 = torch.reshape(B_concat,(self.M,-1)) #-->new
        B = torch.transpose(B1,0,1)              #-->new
        a = torch.matmul(torch.matmul(torch.pinverse(torch.matmul(torch.transpose(B,0,1),B)),torch.transpose(B,0,1)),W1).float()
        
        return a,B_concat

class QConv2d(nn.Conv2d):
    
    def __init__(self, quant_args=None, init_args=None, *kargs, **kwargs):
        super(QConv2d, self).__init__(*kargs, **kwargs)
        # ....................................................weight quantization
        self.weight.data = init_args['weight_data']
        if kwargs['bias'] == True:
            self.bias.data = init_args['bias_data']
        self.M = init_args['M']
        w_qargs = {'M':self.M}
        self.quantizer = WQuantizer (data = self.weight.data, **w_qargs)
        
        a_copy = np.zeros((self.M,1)) #--> new
        a_copy[0][0]=1.0 #--> new
        self.register_buffer('a', torch.tensor(a_copy)) #--> new
        #self.register_buffer('a', torch.tensor([[1],[0],[0]])) 
        
        qB_copy = torch.unsqueeze(self.weight.clone(),0) #--> new
        qB1_copy = qB_copy #--> new
        for i in range(self.M-1) : #--> new
            qB_copy = torch.cat((qB_copy,qB1_copy),0) #--> new
        self.qB = nn.Parameter(qB_copy) #--> new
        
        
        # .....................................................input quantization 
        self.N = init_args['N']
        
        if self.N == 3 : #input quantization present.
            i_qargs = {'shift_init': [-0.8,0,0.8],'N': self.N}
            self.input_quantizer = ActQuantizer(**i_qargs)            
            #self.register_buffer('b', torch.tensor([[1.0], [1.0], [1.0]])) #--> try as non-trainable parameter once.
            self.b = [[1.0], [1.0], [1.0]]
        
        elif self.N == 5 : #input quantization present.
            i_qargs = {'shift_init': [-0.8,-0.3,0,0.3,0.8],'N': self.N}
            #i_qargs = {'shift_init': [-1.5,-0.5,0,0.5,1.5],'N': self.N}
            self.input_quantizer = ActQuantizer(**i_qargs)            
            #self.register_buffer('b', torch.tensor([[1.0], [1.0], [1.0]])) #--> try as non-trainable parameter once.
            self.b = [[1.0], [1.0], [1.0], [1.0], [1.0]]
        
        
    #call it after loss.backward()
    #---> specifically added for DeepLabv3+ (as last layer of ResNet-18 is not used by the code, so no gradient propagation from there)#KB(added on 01-08-2020)
    def update_grads(self):
        if self.qB.grad is not None :    
            w_grad = 0.0
            for i in range(self.M):
                w_grad  += self.a[i][0]*self.qB.grad[i]
            self.weight.grad = w_grad

    def forward(self, input):       
 # ----------------------------------------------------------------------------N=3(number of bases for input activations.)  
        if self.N == 0 :    
            self.a.data, self.qB.data = self.quantizer.quantize(self.weight)
            out = 0.0;
            for i in range(self.M):
                out  += self.a[i][0]*F.conv2d(input, self.qB[i], self.bias, self.stride, self.padding, self.dilation, self.groups)
                
        elif self.N == 3 :
            
            x = self.input_quantizer(input) # --> Input quantization
            self.a.data, self.qB.data = self.quantizer.quantize(self.weight)

            out1 = 0.0;
            for i in range(self.M):
                out1  += self.a[i][0]*F.conv2d(x[0], self.qB[i], self.bias, self.stride, self.padding, self.dilation, self.groups)

            #x2 = self.input_quantizer2(input) # --> Input quantization
            out2 = 0.0;
            for i in range(self.M):
                out2  += self.a[i][0]*F.conv2d(x[1], self.qB[i], self.bias, self.stride, self.padding, self.dilation, self.groups)

            #x3 = self.input_quantizer3(input) # --> Input quantization
            out3 = 0.0;
            for i in range(self.M):
                out3  += self.a[i][0]*F.conv2d(x[2], self.qB[i], self.bias, self.stride, self.padding, self.dilation, self.groups)

            out = self.b[0][0]*out1 + self.b[1][0]*out2 + self.b[2][0]*out3 
            
            
        elif self.N == 5 :
            
            x = self.input_quantizer(input) # --> Input quantization
            self.a.data, self.qB.data = self.quantizer.quantize(self.weight)

            out1 = 0.0;
            for i in range(self.M):
                out1  += self.a[i][0]*F.conv2d(x[0], self.qB[i], self.bias, self.stride, self.padding, self.dilation, self.groups)

            #x2 = self.input_quantizer2(input) # --> Input quantization
            out2 = 0.0;
            for i in range(self.M):
                out2  += self.a[i][0]*F.conv2d(x[1], self.qB[i], self.bias, self.stride, self.padding, self.dilation, self.groups)

            #x3 = self.input_quantizer3(input) # --> Input quantization
            out3 = 0.0;
            for i in range(self.M):
                out3  += self.a[i][0]*F.conv2d(x[2], self.qB[i], self.bias, self.stride, self.padding, self.dilation, self.groups)
                
            #x4 = self.input_quantizer4(input) # --> Input quantization
            out4 = 0.0;
            for i in range(self.M):
                out4  += self.a[i][0]*F.conv2d(x[3], self.qB[i], self.bias, self.stride, self.padding, self.dilation, self.groups)

            #x5 = self.input_quantizer5(input) # --> Input quantization
            out5 = 0.0;
            for i in range(self.M):
                out5  += self.a[i][0]*F.conv2d(x[4], self.qB[i], self.bias, self.stride, self.padding, self.dilation, self.groups)

            out = self.b[0][0]*out1 + self.b[1][0]*out2 + self.b[2][0]*out3 + self.b[3][0]*out4 + self.b[4][0]*out5
        
        return out



In [None]:
class PReLU(Function):    
    @staticmethod
    def forward(ctx, tensor, gamma, eta, beta): 
        x = tensor-gamma
        ctx.save_for_backward(x, beta)
        
        y = x.clone() #-->.clone() is necessary, otherwise on changing y, x also changes and we don't want that.
        z = x.clone()
        y[y<=0]=0
        z[z>0]=0
        
        #z = torch.tensor([0.0]).cuda()
        #y = torch.max(x,z)[0] + beta*torch.min(x,z)[0]
        y = y + beta*z
        y = y + eta
        return y

    @staticmethod
    def backward(ctx, grad_output):        
        # We return as many input gradients as there were arguments.
        # Gradients of non-Tensor arguments to forward must be None.
        g_o = grad_output.clone()
        t,b = ctx.saved_tensors                                  
        x = t.clone() #-->.clone() is necessary, otherwise on changing t, x also changes and we don't want that.
        t[t<=0.0] = -1.0
        t[t>0.0] = 0.0 
        t = -1.0*t
        grad_b_i = g_o*(x*t)
        grad_g_i = g_o*(-t*b - (1.0 - t))  
                                  
        grad_input=g_o*(t*b + (1.0 - t))
                                  
        grad_g = (1/t.size()[0])*(torch.sum(torch.sum(torch.sum(grad_g_i,dim=0),dim=1),dim=1))
        grad_gamma = grad_g.unsqueeze(0).unsqueeze(2).unsqueeze(2)
                                  
        grad_e = (1/t.size()[0])*(torch.sum(torch.sum(torch.sum(g_o,dim=0),dim=1),dim=1))
        grad_eta = grad_e.unsqueeze(0).unsqueeze(2).unsqueeze(2)
                                  
        grad_b = (1/t.size()[0])*(torch.sum(torch.sum(torch.sum(grad_b_i,dim=0),dim=1),dim=1))
        grad_beta = grad_b.unsqueeze(0).unsqueeze(2).unsqueeze(2)
        return grad_input , grad_gamma, grad_eta, grad_beta
                                                                            
#-----------------------> (added on 17-08-2020)
class PReLU_ActQuantizer(nn.ReLU) :
    def __init__(self, *args, **kwargs):
        super(PReLU_ActQuantizer, self).__init__()
        gamma = kwargs['gamma'] #this is a 1XC torch array.
        gamma = gamma.unsqueeze(0).unsqueeze(2).unsqueeze(2)
        self.gamma = nn.Parameter(gamma.float()) #initial clip_v value
                                  
        eta = kwargs['eta'] #this is a 1XC torch array.
        eta = eta.unsqueeze(0).unsqueeze(2).unsqueeze(2)
        self.eta = nn.Parameter(eta.float()) #initial clip_v value
                                  
        beta = kwargs['beta'] #this is a 1XC torch array.
        beta = beta.unsqueeze(0).unsqueeze(2).unsqueeze(2)
        self.beta = nn.Parameter(beta.float()) #initial clip_v value                          
    
    def forward(self, input):
        x = PReLU.apply(input, self.gamma, self.eta, self.beta)#-->new addition
        return x

In [None]:
def update_model_grads(net):
    for n,m in net.named_modules():
        if isinstance(m, QConv2d) :#or isinstance(m, QLinear):
            m.update_grads()

## Step 1 : Full precision weights and Input Activation Quantized
###### the network has to be loaded with the most recent checkpoint and then the following transformation is performed.

In [None]:
crit = nn.NLLLoss(weight=class_weights, ignore_index=-1)
net = DeepLabV3(num_classes=7)

#starting from full precision networks and doing quantization for the first time. 

#net.load_state_dict(torch.load("/home/kaustavb/6867/weight/deeplabv3_resnet34_weights.pth"))

In [None]:
n_channels = -1 #--->
for n,m in net.named_modules():
    if isinstance(m, nn.Conv2d):
        n_channels = m.weight.size()[0] #---->
        if n=='encoder.conv1' or n=='encoder.layer1.0.conv1' or n=='encoder.layer1.0.conv2' or n=='encoder.layer1.0.conv3' or n=='encoder.layer1.0.downsample.0' or n=='last_conv.8' : 
            #the first convlution layer remains as full-precision (first layer of ResNet-18 encoder) #it is called backbone.conv1 in DeepLabv3+ code.
            continue
        else :
            #layer_id = int(n.partition('.')[-1].partition('.')[0]) #090719, AB: layer number for the conv layer
            bias = False
            if m.bias is not None:
                bias = True
            init_args = {'weight_data': m.weight.data,'bias_data': m.bias.data if bias else None, 'M':5, 'N':0} #added the 'alpha' variable which will be initialized from previously learned values.

            #init_args = {'weight_data': m.weight.data,'bias_data': m.bias.data if bias else None, 'M':1, 'alpha_data':m.input_quantizer.alpha.data} #added the 'alpha' variable which will be initialized from previously learned values.

            conv_args = {'in_channels': m.in_channels, 'out_channels': m.out_channels, 'kernel_size': m.kernel_size, 'stride': m.stride, 'padding': m.padding, 'groups': m.groups, 'bias': bias, 'dilation': m.dilation}
            # ..............................................(14-10-2020)
            #quant_args = {'B': 8}

            #handling regular conv layers
            #if n=='f2':
            #conv = QConv2d_fpw(init_args = init_args, **conv_args) #final layer output is still floating point value.
            #else:
            conv = QConv2d(init_args = init_args, **conv_args)
            rsetattr(net,n, conv)
            print('CONV layer '+ n+ ' quantized using '+ 'ABC-Net method')

In [None]:
net.load_state_dict(torch.load("/home/kaustavb/6867/weight/deeplabv3_resnet34_weights.pth"))

In [None]:
n_channels = -1 #--->
for n,m in net.named_modules():
    if isinstance(m, nn.Conv2d):
        n_channels = m.weight.size()[0] #---->
        if n=='encoder.conv1' or n=='encoder.layer1.0.conv1' or n=='encoder.layer1.0.conv2' or n=='encoder.layer1.0.conv3' or n=='encoder.layer1.0.downsample.0' or n=='last_conv.8' : 
            #the first convlution layer remains as full-precision (first layer of ResNet-18 encoder) #it is called backbone.conv1 in DeepLabv3+ code.
            continue
        else :
            #layer_id = int(n.partition('.')[-1].partition('.')[0]) #090719, AB: layer number for the conv layer
            bias = False
            if m.bias is not None:
                bias = True
            init_args = {'weight_data': m.weight.data,'bias_data': m.bias.data if bias else None, 'M':5, 'N':5} #added the 'alpha' variable which will be initialized from previously learned values.

            #init_args = {'weight_data': m.weight.data,'bias_data': m.bias.data if bias else None, 'M':1, 'alpha_data':m.input_quantizer.alpha.data} #added the 'alpha' variable which will be initialized from previously learned values.

            conv_args = {'in_channels': m.in_channels, 'out_channels': m.out_channels, 'kernel_size': m.kernel_size, 'stride': m.stride, 'padding': m.padding, 'groups': m.groups, 'bias': bias, 'dilation': m.dilation}
            # ..............................................(14-10-2020)
            #quant_args = {'B': 8}

            #handling regular conv layers
            #if n=='f2':
            #conv = QConv2d_fpw(init_args = init_args, **conv_args) #final layer output is still floating point value.
            #else:
            conv = QConv2d(init_args = init_args, **conv_args)
            rsetattr(net,n, conv)
            print('CONV layer '+ n+ ' quantized using '+ 'ABC-Net method')
    '''        
    elif isinstance(m, nn.ReLU):#---->
            i_qargs = {'gamma' : torch.randn(n_channels,), 'eta' : torch.randn(n_channels,), 'beta' : 0.25*torch.ones(n_channels,)}
            relu = PReLU_ActQuantizer(**i_qargs)
            rsetattr(net,n, relu)
            print('RELU layer '+ n+ ' replaced using '+ 'ReAct-Net method')
    '''

In [None]:
#segmentation_module = SegmentationModule(net, crit, n_class=7)
#segmentation_module.cuda()

#segmentation_module = torch.nn.DataParallel(segmentation_module, list(range(2)))

In [None]:
def train_quantize(segmentation_module, iterator,  optimizer_e, optimizer_d, history, epoch, len_iterator, n_epochs, 
          running_lr_e, running_lr_d, crit, n_class=20):
    #---> len_iterator : one epoch has how many mini-batches (to traverse through the dataset)
    #---> n_epochs : total number of epochs 

    ave_total_loss = AverageMeter()
    ave_acc = AverageMeter()
    ave_miou = AverageMeter()
    
    #segmentation_module.net.train()
    #segmentation_module.zero_grad()
    segmentation_module.train()
    segmentation_module.zero_grad()
    
    # main loop
    for i in range(len_iterator):
        # load a batch of data
        inputs, masks = next(iterator)
        inputs, masks = inputs.cuda(), masks.cuda()
        optimizer_e.zero_grad()
        optimizer_d.zero_grad()

        # adjust learning rate
        #cur_iter = i + (epoch - 1) * len_iterator
        #running_lr_encoder, running_lr_decoder = adjust_learning_rate(optimizers, cur_iter, len_iterator*n_epochs)
        

        # forward pass
        #loss, acc, mIOU = segmentation_module(inputs.float(),masks.long())
        
        pred = segmentation_module(inputs.float()) #,masks.long())
        loss = crit(pred, masks.long())
        acc, mIOU = pixel_acc(pred, masks.long(), n_class)
        
        #print(loss)
        #print(acc)
        loss = loss.mean()
        acc = acc.mean()
        mIOU = mIOU.mean()
        
        #print(loss)
        #print(acc)
        
        # Backward
        loss.backward()
        update_model_grads(segmentation_module) #----> new addition to update the parameters of quantized neural network
        #for optimizer in optimizers:
        optimizer_e.step()
        optimizer_d.step()

        # update average loss and acc
        ave_total_loss.update(loss.data.item())
        ave_acc.update(acc.data.item()*100)
        ave_miou.update(mIOU.data.item()*100)

        # calculate accuracy, and display
        if i % 100 == 0:
            print('Epoch: [{}][{}/{}], '
                  'lr_encoder: {:.6f}, lr_decoder: {:.6f}, '
                  'Accuracy: {:4.2f}, Loss: {:.6f}, mIOU: {:.6f}'
                  .format(epoch, i, len_iterator,
                          running_lr_e, running_lr_d,
                          ave_acc.average(), ave_total_loss.average(), ave_miou.average()))

            fractional_epoch = epoch - 1 + 1. * i / len_iterator
            history['train']['epoch'].append(fractional_epoch)
            history['train']['loss'].append(loss.data.item())
            history['train']['acc'].append(acc.data.item())



In [None]:
#iterator_train = iter(dataloaders['train'])
history = {'train': {'epoch': [], 'loss': [], 'acc': [], 'mIOU': []}, 'val': {'loss': [], 'acc': [], 'mIOU': []}}

optimizers_enc, optimizers_dec = create_optimizers(net)

net.cuda()
net = torch.nn.DataParallel(net, list(range(1)))

num_epoch=100

best_acc = 0
num_classes=7

for epoch in range(num_epoch):
    iterator_train = iter(dataloaders['train'])
    running_lr_e, running_lr_d = adjust_learning_rate(optimizers_enc, optimizers_dec, epoch, num_epoch, 
                                                      lr_e=0.025, lr_d=0.050, lr_type='cos') 
    #lr changed once per epoch 
    train_quantize(net, iterator_train, optimizers_enc, optimizers_dec, history, epoch+1, len(dataloaders['train']), 
          num_epoch, running_lr_e, running_lr_d, crit, num_classes)
    
    iterator_val = iter(dataloaders['val'])
    val(net, iterator_val, history, len(dataloaders['val']), crit, num_classes)

In [None]:
g_o = torch.from_numpy(np.array([-0.8,0,0.8])).unsqueeze(1).unsqueeze(1).unsqueeze(1).unsqueeze(1)

In [None]:
grad_a = (1/g_o.size()[1])*(torch.sum(torch.sum(torch.sum(torch.sum(g_o,dim=1),dim=1),dim=1),dim=1))

In [None]:
grad_a.shape

In [None]:
orig_tensor = torch.randn(3,6,5,5)
N=5

In [None]:
new_tensor = torch.cat(N*[torch.unsqueeze(orig_tensor,0)])

In [None]:
new_tensor.shape

In [None]:
new_tensor[abs(new_tensor)>=1.0]=1.0

In [None]:
x = torch.randn(5,).float()
x.shape