Creating MINICIFAR


In [0]:

n_classes_minicifar = 4
R = 5


# Download the entire CIFAR10 dataset

from torchvision.datasets import CIFAR10
import numpy as np 
from torch.utils.data import Subset

import torchvision.transforms as transforms

## Normalization is different when training from scratch and when training using an imagenet pretrained backbone

normalize_scratch = transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))

normalize_forimagenet = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])

# Data augmentation is needed in order to train from scratch
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normalize_scratch,
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    normalize_scratch,
])

## No data augmentation when using Transfer Learning
transform_train_imagenet = transforms.Compose([
    transforms.ToTensor(),
    normalize_forimagenet,
])

transform_test_imagenet = transforms.Compose([
    transforms.ToTensor(),
    normalize_forimagenet,
])


### The data from CIFAR10 will be downloaded in the following dataset
rootdir = './data/cifar10'

c10train = CIFAR10(rootdir,train=True,download=True,transform=transform_train)
c10test = CIFAR10(rootdir,train=False,download=True,transform=transform_test)

c10train_imagenet = CIFAR10(rootdir,train=True,download=True,transform=transform_train_imagenet)
c10test_imagenet = CIFAR10(rootdir,train=False,download=True,transform=transform_test_imagenet)

# Generating Mini-CIFAR
# 
# CIFAR10 is sufficiently large so that training a model up to the state of the art performance will take approximately 3 hours on the 1060 GPU available on your machine. 
# As a result, we will create a "MiniCifar" dataset, based on CIFAR10, with less classes and exemples. 

def generate_subset(dataset,n_classes,reducefactor,n_ex_class_init):

    nb_examples_per_class = int(np.floor(n_ex_class_init / reducefactor))
    # Generate the indices. They are the same for each class, could easily be modified to have different ones. But be careful to keep the random seed! 

    indices_split = np.random.RandomState(seed=42).choice(n_ex_class_init,nb_examples_per_class,replace=False)


    all_indices = []
    for curclas in range(n_classes):
        curtargets = np.where(np.array(dataset.targets) == curclas)
        indices_curclas = curtargets[0]
        indices_subset = indices_curclas[indices_split]
        #print(len(indices_subset))
        all_indices.append(indices_subset)
    all_indices = np.hstack(all_indices)
    
    return Subset(dataset,indices=all_indices)
    


### These dataloader are ready to be used to train for scratch 
minicifar_train= generate_subset(dataset=c10train,n_classes=n_classes_minicifar,reducefactor=R,n_ex_class_init=5000)
minicifar_val= generate_subset(dataset=c10test,n_classes=n_classes_minicifar,reducefactor=1,n_ex_class_init=1000) 
minicifar_test= generate_subset(dataset=c10test,n_classes=n_classes_minicifar,reducefactor=1,n_ex_class_init=1000) 


### These dataloader are ready to be used to train using Transfer Learning 
### from a backbone pretrained on ImageNet
minicifar_train_im= generate_subset(dataset=c10train_imagenet,n_classes=n_classes_minicifar,reducefactor=R,n_ex_class_init=5000)
minicifar_val_im= generate_subset(dataset=c10test_imagenet,n_classes=n_classes_minicifar,reducefactor=1,n_ex_class_init=1000)
minicifar_test_im= generate_subset(dataset=c10test_imagenet,n_classes=n_classes_minicifar,reducefactor=1,n_ex_class_init=1000)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


Resnet

In [0]:
'''ResNet in PyTorch.
For Pre-activation ResNet, see 'preact_resnet.py'.
Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Deep Residual Learning for Image Recognition. arXiv:1512.03385
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10, size_factor=64):
        super(ResNet, self).__init__()
        self.in_planes = size_factor

        self.conv1 = nn.Conv2d(3, size_factor, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(size_factor)
        self.layer1 = self._make_layer(block, size_factor, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 2*size_factor, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 4*size_factor, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 8*size_factor, num_blocks[3], stride=2)
        self.linear = nn.Linear(8*size_factor*block.expansion, num_classes)
        #self.dropout = nn.Dropout(0.5)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        #out = self.dropout(out)
        return out

def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])

def ResNet34():
    return ResNet(BasicBlock, [3,4,6,3])

def ResNet50():
    return ResNet(Bottleneck, [3,4,6,3])

def ResNet101():
    return ResNet(Bottleneck, [3,4,23,3])

def ResNet152():
    return ResNet(Bottleneck, [3,8,36,3])

def ResNetCustom(size_factor, num_classes):
    return ResNet(BasicBlock, [2,2,2,2], size_factor=size_factor, num_classes=num_classes)


Training and testing

In [0]:
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.optim as optim
import json

def main(batch_size, epoch, size_factor, step_size, gamma, initial_rate):
    
    ### These dataloader are ready to be used to train for scratch 
    minicifar_train = generate_subset(dataset=c10train,n_classes=n_classes_minicifar,reducefactor=4,n_ex_class_init=5000)
    trainloader = DataLoader(minicifar_train, batch_size=batch_size, shuffle=True, num_workers=2)
    minicifar_test = generate_subset(dataset=c10test,n_classes=n_classes_minicifar,reducefactor=4,n_ex_class_init=1000)
    testloader = DataLoader(minicifar_test,batch_size=batch_size, num_workers=2)
    

    ##Training
    net = ResNetCustom(size_factor=size_factor, num_classes=4)
    net = net.half()
    net = net.cuda()
    for layer in net.modules():
      if isinstance(layer, nn.BatchNorm2d):
        layer.float()

    loss_history_train, loss_history_test = [], []
    accu_history_train, accu_history_test = [], []

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=initial_rate, momentum=0.9)
    scheduler = ReduceLROnPlateau(optimizer, patience=step_size, factor=gamma)
    



    ###### RUNNING TRAIN #########
    for i in range(epoch):
        correct = 0.0
        correct_test = 0.0
        running_loss = 0.0
        running_loss_test = 0.0
        total = 0.0
        total_test = 0.0
        
        net.train()
        for _, (data, labels) in enumerate(trainloader):
            #setting to cuda
            data = data.half().cuda()
            labels = labels.cuda()

            # zero the parameter gradient
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(data)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()


            # compute statistics
            total += labels.size(0)
            running_loss += loss.item()
            predicted = outputs.max(1)[1]
            correct += predicted.eq(labels).sum().item()

        scheduler.step(running_loss)
        accu_history_train.append(100*correct/total)
        loss_history_train.append(running_loss)
        print(running_loss)
        print("Acc training : " + str(100*correct/total))
        #print('loss: %.3f' %(running_loss))
        #print('accuracy: %.3f' %(100*correct/total))
        
        net.eval()
        
        for _, (data, labels) in enumerate(valloader):
            #setting to cuda
            data = data.half().cuda()
            labels = labels.cuda()
            
            # compute
            outputs = net(data)
            outputs = outputs.float()
            loss = criterion(outputs, labels)

            # compute statistics
            total_val += labels.size(0)
            running_loss_val += loss.item()
            predicted = outputs.max(1)[1]
            correct_val += predicted.eq(labels).sum().item()

        accu_history_val.append(100*correct_val/total_val)
        loss_history_val.append(running_loss_val)

        print("Acc test : " + str(100*correct_test/total_test))

    
    
    ###### RUNNING TEST #######
    
        
    print('Finished Training/Testing')
    
    state_dict = {}
    
    state_dict.update({"size_factor": size_factor, "batch_size": batch_size, "epoch": epoch, "accu_test": accu_history_test, "accu_train": accu_history_train, "loss_test": loss_history_test, "loss_train": loss_history_train, "step_size":step_size, "gamma":gamma, "initial_rate":initial_rate})
    
    with open('bs' + str(batch_size) + 'ep' + str(epoch) + 'sf' + str(size_factor) + 'sz' + str(step_size) + 'ga' + str(gamma) + 'ir' + str(initial_rate) + '.json', 'w') as file:
        file.write(json.dumps(state_dict))
        
    torch.save(net.state_dict, 'bs' + str(batch_size) + 'ep' + str(epoch) + 'sf' + str(size_factor) + 'sz' + str(step_size) + 'ga' + str(gamma) + 'ir' + str(initial_rate) + '.pth')

In [0]:
#main(batch_size=32, size_factor=8, epoch=200, step_size=100, gamma=0.1, initial_rate = 0.01)

Plot

In [0]:
import matplotlib.pyplot as plt
import json

def plot(batch_size, epoch, size_factor, step_size, gamma, initial_rate):
    
    with open('bs' + str(batch_size) + 'ep' + str(epoch) + 'sf' + str(size_factor) + 'sz' + str(step_size) + 'ga' + str(gamma) + 'ir' + str(initial_rate) + '.json', 'r') as file:
        text = file.read()
        jf = json.loads(text)
        accu_train = jf["accu_train"]
        accu_test = jf["accu_test"]
        loss_test = jf["loss_test"]
        loss_train = jf["loss_train"]
        
    fig, ax = plt.subplots()
    ax.plot(loss_train, '-b', label = "Loss train")
    ax.plot(loss_test, '-r', label = "Loss test")
    #Config 1
    #Size_factor = 8
    #Epoch = 150
    #Batch_size = 32
    plt.title("Loss" )
    plt.text(0.5, 0.7, ' bs ' + str(batch_size) + ' ep ' + str(epoch) + ' sf ' + str(size_factor) + ' sz ' + str(step_size) + ' ga ' + str(gamma) + ' ir ' + str(initial_rate), horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=15)
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    ax.legend()
    plt.savefig('loss_' + ' bs ' + str(batch_size) + ' ep ' + str(epoch) + ' sf ' + str(size_factor) + ' sz ' + str(step_size) + ' ga ' + str(gamma) + ' ir ' + str(initial_rate) + '.pdf')
    
    fig_1, ax_1 = plt.subplots()
    ax_1.plot(accu_train, '-b', label = "Accuracy train")
    ax_1.plot(accu_test, '-r', label = "Accuracy test")
    plt.title("Accuracy")
    plt.text(0.5, 0.4, ' bs ' + str(batch_size) + ' ep ' + str(epoch) + ' sf ' + str(size_factor) + ' sz ' + str(step_size) + ' ga ' + str(gamma) + ' ir ' + str(initial_rate), horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=15)
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy (%)")
    ax_1.legend()
    plt.savefig('accu_' + ' bs ' + str(batch_size) + ' ep ' + str(epoch) + ' sf ' + str(size_factor) + ' sz ' + str(step_size) + ' ga ' + str(gamma) + ' ir ' + str(initial_rate) + '.pdf')

    plt.show()



In [0]:
#plot(batch_size=32, size_factor=8, epoch=200, step_size=100, gamma=0.1, initial_rate = 0.01)

Training 


In [0]:
#Test
#batch_size = 32
#size_factor = 16
#epoch = 30
#initial_rate_list = [1 , 0.1, 0.01, 0.001]
#gamma = [0.1, 0.001]
#step_size_list = [5,10,15]

#for i in initial_rate_list:
#    for s in step_size_list:
#      for g in gamma:
#        main(batch_size=batch_size, size_factor=size_factor, epoch=epoch,step_size=s, initial_rate=i, gamma=g)
#        print(" initial_rate :" + str(i) + " step_size : " + str(s) + " gamma :" + str(g) + " | done !" )

#for i in initial_rate_list:
#    for s in step_size_list:
#     for g in gamma:
#        plot(batch_size=batch_size, size_factor=size_factor, epoch=epoch,step_size=s, initial_rate=i, gamma=g)


In [0]:
#from google.colab import drive
#drive.mount('/content/drive')

Quantification

In [0]:
import torch.nn as nn
import numpy
from torch.autograd import Variable


class BC():
    def __init__(self, model):

        # First we need to 
        # count the number of Conv2d and Linear
        # This will be used next in order to build a list of all 
        # parameters of the model 

        count_targets = 0
        for m in model.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                count_targets = count_targets + 1

        start_range = 0
        end_range = count_targets-1
        self.bin_range = numpy.linspace(start_range,end_range, end_range-start_range+1).astype('int').tolist()

        # Now we can initialize the list of parameters

        self.num_of_params = len(self.bin_range)
        self.saved_params = [] # This will be used to save the full precision weights
        
        self.target_modules = [] # this will contain the list of modules to be modified

        self.model = model # this contains the model that will be trained and quantified

        ### This builds the initial copy of all parameters and target modules
        index = -1
        for m in model.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                index = index + 1
                if index in self.bin_range:
                    tmp = m.weight.data.clone()
                    self.saved_params.append(tmp)
                    self.target_modules.append(m.weight)


    def save_params(self):

        ### This loop goes through the list of target modules, and saves the corresponding weights into the list of saved_parameters

        for index in range(self.num_of_params):
            self.saved_params[index].copy_(self.target_modules[index].data)

    def binarization(self):

        ### To be completed

        ### (1) Save the current full precision parameters using the save_params method
        self.save_params()
        
        1
        ### (2) Binarize the weights in the model, by iterating through the list of target modules and overwrite the values with their binary version
        for index in range(self.num_of_params):
            self.target_modules[index] = torch.sign(self.target_modules[index])
        
    def restore(self):
        ### To be completed 
        ### restore the copy from self.saved_params into the model 
        index = -1
        for module in self.model.modules():
            if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
                index = index + 1
                if index in self.bin_range:
                     self.target_modules[index] = self.saved_params[index]
      
    def clip(self):
        ## To be completed 
        ## Clip all parameters to the range [-1,1] using Hard Tanh 
        ## you can use the nn.Hardtanh function
        index = -1
        for module in self.model.modules():
          if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
              index = index + 1
              self.target_modules[index] = torch.clamp(self.target_modules[index], min = -1, max= 1)


    def forward(self,x):

        ### This function is used so that the model can be used while training
        out = self.model(x)
        return out



Testing BC

In [0]:
batch_size = 32
size_factor = 8
initial_rate = 0.01
step_size = 10
gamma = 0.1

minicifar_train = generate_subset(dataset=c10train,n_classes=n_classes_minicifar,reducefactor=5,n_ex_class_init=5000)
trainloader = DataLoader(minicifar_train, batch_size=batch_size, shuffle=True, num_workers=2)

minicifar_test = generate_subset(dataset=c10test,n_classes=n_classes_minicifar,reducefactor=1,n_ex_class_init=1000)
testloader = DataLoader(minicifar_test,batch_size=batch_size, num_workers=2)

minicifar_val = generate_subset(dataset=c10test,n_classes=n_classes_minicifar,reducefactor=1,n_ex_class_init=1000) 
valloader = DataLoader(minicifar_val, batch_size=batch_size, num_workers=2)

model_full = ResNetCustom(size_factor=size_factor, num_classes=4)
model_full_2 = ResNetCustom(size_factor=size_factor, num_classes=4)
model_full_2 = model_full_2.cuda()
bc = BC(model_full)
model_bc = bc.model.cuda()

criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(model_bc.parameters(), lr=initial_rate, momentum=0.9)

scheduler = ReduceLROnPlateau(optimizer, patience=step_size, factor=gamma)


In [0]:


def main_quantification( model_full,epoch, batch_size = batch_size, size_factor = size_factor, step_size = step_size, gamma = gamma, initial_rate = initial_rate, criterion = criterion, optimizer = optimizer, scheduler = scheduler):

    #ResNet
    #Loss and accuracy
    loss_history_train, loss_history_test, loss_history_val = [], [], []
    accu_history_train, accu_history_test, accu_history_val = [], [], []
    
    #Running train#
    for e in range(epoch):
        correct, correct_val, correct_test = 0.0, 0.0, 0.0
        running_loss, running_loss_val = 0.0,0.0
        total, total_val, total_test = 0.0,0.0,0.0

        #Train
        model_bc.train()

        for _, (data, labels) in enumerate(trainloader):
            data = data.cuda()
            labels = labels.cuda()

            #Zero the parameter gradient
            optimizer.zero_grad()

            #forward + backward + optimize
            bc.binarization()
            outputs = model_bc(data)
            loss = criterion(outputs,labels)

            bc.restore()
            loss.backward()
            bc.clip()
            optimizer.step()

            # compute statistics
            total += labels.size(0)
            running_loss += loss.item()
            predicted = outputs.max(1)[1]
            #print(predicted, labels)
            correct += predicted.eq(labels).sum().item()

        scheduler.step(running_loss)
        accu_history_train.append(100*correct/total)
        loss_history_train.append(running_loss)
        #print(running_loss)
        #print("Accuracy train : " + str(100*correct/total))

        #Val
        model_bc.eval()
        for _, (data, labels) in enumerate(valloader):
            data = data.cuda()
            labels = labels.cuda()

            #Zero the parameter gradient
            optimizer.zero_grad()
            
            bc.binarization()
            outputs = model_bc(data)
            loss = criterion(outputs,labels)

            # compute statistics
            total_val += labels.size(0)
            running_loss_val += loss.item()
            predicted = outputs.max(1)[1]
            correct_val += predicted.eq(labels).sum().item()


        accu_history_val.append(100*correct_val/total_val)
        loss_history_val.append(running_loss_val)

        print("Accucary val : " + str(100*correct_val/total_val) + "| " + "Accuracy train : " + str(100*correct/total) + '| ' + "Loss : " + str(running_loss))

    model_bc.eval()
    for _, (data, labels) in enumerate(testloader):
        data = data.cuda()
        labels = labels.cuda()
        
        #Zero the parameter gradient
        optimizer.zero_grad()

        bc.binarization()
        outputs = model_bc(data)
        running_loss_test = criterion(outputs,labels)
        bc.restore()
        
         # compute statistics
        total_test += labels.size(0)
        running_loss_test += loss.item()
        predicted = outputs.max(1)[1]
        correct_test += predicted.eq(labels).sum().item()

    accu_history_test.append(100*correct_test/total_test)
    loss_history_test.append(running_loss_test)

    plt.plot(accu_history_train)
    plt.show()
    print("Accucary test : " + str(100*correct_test/total_test))








In [0]:
main_quantification(model_bc,epoch=30, batch_size=batch_size, size_factor=size_factor, step_size=step_size, gamma=gamma, initial_rate=initial_rate, criterion = criterion, optimizer = optimizer, scheduler = scheduler)
main_quantification(model_full_2,epoch=30, batch_size=batch_size, size_factor=size_factor, step_size=step_size, gamma=gamma, initial_rate=initial_rate, criterion = criterion, optimizer = optimizer, scheduler = scheduler)

KeyboardInterrupt: ignored