# Model evaluation with AdaPT on MNIST dataset

In this notebook you can evaluate different approximate multipliers on various models based on MNIST dataset

Steps:
* Select number of threads to use
* Load dataset
* Load Adapt Layers
* Define Model
* Run model calibration for quantization
* Evaluate


**Note**:
* This notebook should be run on a X86 machine

* Please make sure you have run the installation steps first

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data.dataloader as dataloader
import torch.optim as optim

from torch.utils.data import TensorDataset
from torch.autograd import Variable
from torchvision import transforms
from torchvision.datasets import MNIST
import tqdm

## Select number of threads to use

For optimal performance set them as the number of your cpu threads (not cpu cores)

In [None]:
threads = 40
torch.set_num_threads(threads)

#maybe better performance
%env OMP_PLACES=cores
%env OMP_PROC_BIND=close
%env OMP_WAIT_POLICY=active

## Load Dataset


In [None]:
train = MNIST('./datasets/mnist_data/data', train=True, download=True, transform=transforms.Compose([
    transforms.ToTensor(), # ToTensor does min-max normalization. 
]), )

test = MNIST('./datasets/mnist_data/data', train=False, download=True, transform=transforms.Compose([
    transforms.ToTensor(), # ToTensor does min-max normalization. 
]), )

# Create DataLoader
dataloader_args = dict(shuffle=True, batch_size=64,num_workers=1, pin_memory=False)
train_loader = dataloader.DataLoader(train, **dataloader_args)
test_loader = dataloader.DataLoader(test, **dataloader_args)

## Load Adapt Layers

In [None]:
#Load ADAPT layers
from adapt.approx_layers import axx_layers as approxNN

## Choose approximate multiplier 

Two approximate multipliers are already provided

**mul8s_acc** - (header file: mul8s_acc.h)   <--  default

**mul8s_1L2H** - (header file: mul8s_1L2H.h)



In order to use your custom multiplier you need to use the provided tool (LUT_generator) to easily create the C++ header for your multiplier. Then you just place it inside the adapt/cpu-kernels/axx_mults folder. The name of the axx_mult here must match the name of the header file. The same axx_mult is used in all layers. 

Tip: If you want explicitly to set for each layer a different axx_mult you must do it from the model definition using the respective AdaPT_Conv2d class of each layer.

In [None]:
axx_mult = 'mul8s_1L2H'

## Define Model

Jit compilation method loads 'on the fly' the C++ extentions of the approximate multipliers. Then the pytorch model is loaded

In [None]:
#set flag for use of AdaPT custom layers or vanilla PyTorch
use_adapt=True

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        
        if use_adapt:
             self.fc1 = approxNN.AdaPT_Linear(784, 548, axx_mult = axx_mult)
        else:
            self.fc1 = nn.Linear(784, 548)
        
        self.bc1 = nn.BatchNorm1d(548)

        if use_adapt:
            self.fc2 = approxNN.AdaPT_Linear(548, 252, axx_mult = axx_mult)
        else:    
            self.fc2 = nn.Linear(548, 252)
            
        self.bc2 = nn.BatchNorm1d(252)     
        
        if use_adapt:
            self.fc3 = approxNN.AdaPT_Linear(252, 10, axx_mult = axx_mult)
        else:
            self.fc3 = nn.Linear(252, 10)
                
    def forward(self, x):
        x = x.view((-1, 784))
        h = self.fc1(x)
        h = self.bc1(h)
        h = F.relu(h)
        h = F.dropout(h, p=0.5, training=self.training)
        
        h = self.fc2(h)
        h = self.bc2(h)
        h = F.relu(h)
        h = F.dropout(h, p=0.2, training=self.training)
        
        h = self.fc3(h)
        out = F.log_softmax(h,-1)
        return out

model = Model()
model.cpu()

#load pretrained weights
model.load_state_dict(torch.load('models/state_dicts/mnist.pt'))

#optimizer = optim.Adam(model.parameters(), lr=0.001)

## Run model calibration for quantization

Calibrates the quantization parameters 

Need to re-run it each time the model changes

In [None]:
from pytorch_quantization import nn as quant_nn
from pytorch_quantization import calib

def collect_stats(model, data_loader, num_batches):
     """Feed data to the network and collect statistic"""

     # Enable calibrators
     for name, module in model.named_modules():
         if isinstance(module, quant_nn.TensorQuantizer):
             if module._calibrator is not None:
                 module.disable_quant()
                 module.enable_calib()
             else:
                 module.disable()
        
     evaluate_x = Variable(data_loader.dataset.data.type_as(torch.FloatTensor())).cpu()
     model(evaluate_x)
        
     # Disable calibrators
     for name, module in model.named_modules():
         if isinstance(module, quant_nn.TensorQuantizer):
             if module._calibrator is not None:
                 module.enable_quant()
                 module.disable_calib()
             else:
                 module.enable()

def compute_amax(model, **kwargs):
 # Load calib result
 for name, module in model.named_modules():
     if isinstance(module, quant_nn.TensorQuantizer):
         if module._calibrator is not None:
             if isinstance(module._calibrator, calib.MaxCalibrator):
                 module.load_calib_amax()
             else:
                 module.load_calib_amax(**kwargs)
         print(F"{name:40}: {module}")
 model.cpu()

# It is a bit slow since we collect histograms on CPU
with torch.no_grad():
    stats = collect_stats(model, test_loader, num_batches=2)
    amax = compute_amax(model, method="percentile", percentile=99.99)
    
    # optional - test different calibration methods
    #amax = compute_amax(model, method="mse")
    #amax = compute_amax(model, method="entropy")
    

## Evaluate

In [None]:
evaluate_x = Variable(test_loader.dataset.data.type_as(torch.FloatTensor())).cpu()
evaluate_y = Variable(test_loader.dataset.targets).cpu()


output = model(evaluate_x)
pred = output.data.max(1)[1]
d = pred.eq(evaluate_y.data).cpu()
accuracy = d.sum()/d.size()[0]

print('Accuracy:', accuracy)