# Benchmarks for trained networks

In [2]:
from utils.model_utils import benchmark
from src.data.dataloaders import *
from src.nn.models import *
import torch

## Unpruned networks:

### VGG with transfer learning
Model: VGG_11, with batch normalization  
Training data: etl2 

Training Parameters:  
Optimizer:  Stoichastic Gradient Descent(SGD), learning rate=0.001, momentum=0.9, Step-LR with step_size=7 & gamma=0.1  
Loss function: Cross Entropy Loss

#### Summary of results:   
Size: 1101.2MB  
F1(micro): 0.000  
Inference time per image:   
8.25s (PC)

In [3]:
# perform benchmark on vgg
etl2, etl2_classes = get_etl2_dataloaders('vgg11_bn')
model, _ = vgg_model(etl2_classes)
model.load_state_dict(torch.load('trained_models/vgg11_bn_etl2.weights'))
print(model)
benchmark(model, etl2['test'], 'vgg11_bn')

restored pickled etl2 data
VGG(
  (features): Sequential(
    (0): Conv2d (3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU(inplace)
    (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (4): Conv2d (64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
    (6): ReLU(inplace)
    (7): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (8): Conv2d (128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
    (10): ReLU(inplace)
    (11): Conv2d (256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
    (13): ReLU(inplace)
    (14): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (15): Conv2d (256, 512, kernel_size=(3, 3), stride=(1, 1), paddin

100%|██████████| 330/330 [00:53<00:00,  6.21it/s]
  'precision', 'predicted', average, warn_for)


### Chinese OCR inspired network
Model: ChineseNet, from (https://arxiv.org/abs/1702.07975)  

#### Type A:  
Training data: etl2

Training Parameters:  
Optimizer:  Stoichastic Gradient Descent(SGD), learning rate=0.001, momentum=0.9, Step-LR with step_size=7 & gamma=0.1  
Loss function: Cross Entropy Loss

#### Summary of results:  
Size: 58.15 MB  
F1(micro): 0.9913  
Inference time per image: 1.73s  

In [3]:
# perform benchmark on chinese_net
etl2, etl2_classes = get_etl2_dataloaders('chinese_net')
model, _ = chinese_model(etl2_classes)
model.load_state_dict(torch.load('trained_models/chinese_net_etl2.weights'))
print(model)

benchmark(model, etl2['test'], 'chinese_net_etl2')

restored pickled etl2 data


  0%|          | 0/330 [00:00<?, ?it/s]

ChineseNet(
  (features): Sequential(
    (0): PConv2d (1, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): PBatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True)
    (2): PReLU(num_parameters=1)
    (3): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
    (4): PConv2d (96, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): PBatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
    (6): PReLU(num_parameters=1)
    (7): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
    (8): PConv2d (128, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): PBatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True)
    (10): PReLU(num_parameters=1)
    (11): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
    (12): PConv2d (160, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): PBatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
    (14): PReLU(num_parameters=1)
    (15): PConv2d (256, 256, kernel_size=

100%|██████████| 330/330 [00:10<00:00, 32.46it/s]

Loss: 0.0018
Micro Precision: 0.9913 Recall: 0.9913 F1: 0.9913
Macro Precision: 0.9904 Recall: 0.9882 F1: 0.9879



  'precision', 'predicted', average, warn_for)


Time taken for inference: 1.726826s


#### Type B:  
Training data: etl2 + etl9g  
All other details are identical as Type A  
#### Summary of results:  
Size: 58.15 MB  
F1(micro): 0.9913  
Inference time per image (PC):  1.55s   
Inference time per image (Mobile):  At least 11.24s 

In [4]:
# perform benchmark on chinese_net
etl2_9g, etl2_9g_num_classes = get_etl2_9g_dataloaders('chinese_net')
model, _ = chinese_model(etl2_9g_num_classes)
model.load_state_dict(torch.load('trained_models/chinese_net_etl2_9g.weights'))
benchmark(model, etl2_9g['test'], 'chinese_net_etl2_9g')

restored pickled etl2 data
processing raw etl9g data
Benchmark, chinese_net_etl2_9g: 
Size of model: 66.255328 MB
Loss: 0.0006
Micro Precision: 0.9963 Recall: 0.9963 F1: 0.9963
Macro Precision: 0.9962 Recall: 0.9958 F1: 0.9958
Time taken for inference: 1.550738s


100%|██████████| 4125/4125 [01:57<00:00, 35.06it/s]


## Pruned networks:

Model: ChineseNet  
~80% convolutional feature maps pruned  
Training data: etl2 + etl9g  

#### Summary of results:  
Size: 31.22 MB  
F1(micro): 0.989  
Inference time per image (PC):  0.364s   
Inference time per image (Mobile):  At least 2.64s 

In [6]:
# perform benchmark on chinese_net_80
model, _ = chinese_pruned_80(etl2_9g_num_classes)
print(model)
model.load_state_dict(torch.load('trained_models/pruned_chinese_net_etl2_9g_80p_ft250.weights'))
benchmark(model, etl2_9g['test'], 'prunned_chinese_net_80')

100%|██████████| 4125/4125 [01:31<00:00, 45.23it/s]
  'precision', 'predicted', average, warn_for)


ChineseNet(
  (features): Sequential(
    (0): PConv2d (1, 26, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): PBatchNorm2d(26, eps=1e-05, momentum=0.1, affine=True)
    (2): PReLU(num_parameters=1)
    (3): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
    (4): PConv2d (26, 39, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): PBatchNorm2d(39, eps=1e-05, momentum=0.1, affine=True)
    (6): PReLU(num_parameters=1)
    (7): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
    (8): PConv2d (39, 52, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): PBatchNorm2d(52, eps=1e-05, momentum=0.1, affine=True)
    (10): PReLU(num_parameters=1)
    (11): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
    (12): PConv2d (52, 75, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): PBatchNorm2d(75, eps=1e-05, momentum=0.1, affine=True)
    (14): PReLU(num_parameters=1)
    (15): PConv2d (75, 93, kernel_size=(3, 3), st

Model: ChineseNet  
~90% convolutional feature maps pruned  
Training data: etl2 + etl9g  


#### Summary of results:  
Size: 27.15 MB  
F1(micro): 0.9573
Inference time per image (PC): 0.204s   
Inference time per image (Mobile):  At least 1.48s

In [7]:
# perform benchmark on chinese_net_90
model, _ = chinese_pruned_90(etl2_9g_num_classes)
print(model)
model.load_state_dict(torch.load('trained_models/pruned_chinese_net_etl2_9g_90p_ft250.weights'))
benchmark(model, etl2_9g['test'], 'prunned_chinese_net_90')

100%|██████████| 4125/4125 [01:30<00:00, 45.80it/s]
  'precision', 'predicted', average, warn_for)


ChineseNet(
  (features): Sequential(
    (0): PConv2d (1, 15, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): PBatchNorm2d(15, eps=1e-05, momentum=0.1, affine=True)
    (2): PReLU(num_parameters=1)
    (3): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
    (4): PConv2d (15, 14, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): PBatchNorm2d(14, eps=1e-05, momentum=0.1, affine=True)
    (6): PReLU(num_parameters=1)
    (7): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
    (8): PConv2d (14, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): PBatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True)
    (10): PReLU(num_parameters=1)
    (11): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
    (12): PConv2d (20, 27, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): PBatchNorm2d(27, eps=1e-05, momentum=0.1, affine=True)
    (14): PReLU(num_parameters=1)
    (15): PConv2d (27, 31, kernel_size=(3, 3), st

## Appendix  

Micro vs Macro for precision, recall, F1:  
The terms are exactly as defined in sklearn.  
Micro calculates precision, recall and F1 scores over all results.  
Macro calculates precision, recall and F1 scores over each class, and averages the results.  

Test hardware:  
CPU for inference: Intel i3-3140  
GPU for training: GTX 1060 6GB  
Mobile CPU: Octa-core (4x Cortex-A53 & 4x Cortex-A53), a low end CPU from ~2015.  

Estimating model size:  
Model size was estimated by summing the size of parameters in each model.  
Each float32 takes 4 bytes.  
For reference, squeezenet takes about 12.76MB.  

Estimating mobile performance:  
Squeezenet takes about 0.566s on the Cortex-A53,  
and 0.0781s on the Intel i3.  
The Intel i3 performs about performs 7.25x faster, and will be used to estimate performance on mobile. Caffe2 is unfortunately not mature enough for benchmarking different networks.  

Data used:   
Etl2 - machine printed, contains less classes & samples than Etl9g  
Etl9g - human written, font size varies slightly among samples, font is smaller than Etl2.  
Train, validation & test split:  
Data was split into 3 groups, stratified by class. Test(20%), Validation(16%), Train(64%) 

Training approach:  
Models were trained on the train set.  
Early stopping was used, picking the model with least loss against the validation set.  
Performance of the models are evaluated on the test set for this benchmark.  

Pruning approach:  
All conv2d feature maps are valid candidates for pruning.  
Feature maps are evaluated by their taylor importance, and the least important feature map is discarded.  
Each pruning step is followed with 250 iterations of finetuning to restore model accuracy.    