In [1]:
import sys
sys.path.append('./data')

import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torch.optim as optim
from matplotlib import pyplot as plt
from dataset import flowerDataset
import torchvision
from torch.autograd import Variable
print("Pytorch Version: ", torch.__version__)

Pytorch Version:  1.7.1


### define MobileNetV3 module

In [2]:
def Hswish(x,inplace=True):
    return x * F.relu6(x + 3., inplace=inplace) / 6.

def Hsigmoid(x,inplace=True):
    return F.relu6(x + 3., inplace=inplace) / 6.

# Squeeze-And-Excite模块
class SEModule(nn.Module):
    def __init__(self, channel, reduction=4):
        super(SEModule, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.se = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y=self.avg_pool(x).view(b, c)
        y=self.se(y)
        y = Hsigmoid(y).view(b, c, 1, 1)
        return x * y.expand_as(x)

class Bottleneck(nn.Module):
    def __init__(self,in_channels,out_channels,kernel_size,exp_channels,stride,se='True',nl='HS'):
        super(Bottleneck, self).__init__()
        padding = (kernel_size - 1) // 2
        if nl == 'RE':
            self.nlin_layer = F.relu6
        elif nl == 'HS':
            self.nlin_layer = Hswish
        self.stride=stride
        if se:
            self.se=SEModule(exp_channels)
        else:
            self.se=None
        self.conv1=nn.Conv2d(in_channels,exp_channels,kernel_size=1,stride=1,padding=0,bias=False)
        self.bn1 = nn.BatchNorm2d(exp_channels)
        self.conv2=nn.Conv2d(exp_channels,exp_channels,kernel_size=kernel_size,stride=stride,
                             padding=padding,groups=exp_channels,bias=False)
        self.bn2=nn.BatchNorm2d(exp_channels)
        self.conv3=nn.Conv2d(exp_channels,out_channels,kernel_size=1,stride=1,padding=0,bias=False)
        self.bn3=nn.BatchNorm2d(out_channels)
        # 先初始化一个空序列，之后改造其成为残差链接
        self.shortcut = nn.Sequential()
        # 只有步长为1且输入输出通道不相同时才采用跳跃连接(想一下跳跃链接的过程，输入输出通道相同这个跳跃连接就没意义了)
        if stride == 1 and in_channels != out_channels:
            self.shortcut = nn.Sequential(
                # 下面的操作卷积不改变尺寸和通道数
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self,x):
        out=self.nlin_layer(self.bn1(self.conv1(x)))
        if self.se is not None:
            out=self.bn2(self.conv2(out))
            out=self.nlin_layer(self.se(out))
        else:
            out = self.nlin_layer(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out = out + self.shortcut(x) if self.stride == 1 else out
        return out


class MobileNetV3_large(nn.Module):
    # (out_channels,kernel_size,exp_channels,stride,se,nl)
    cfg=[
        (16,3,16,1,False,'RE'),
        (24,3,64,2,False,'RE'),
        (24,3,72,1,False,'RE'),
        (40,5,72,2,True,'RE'),
        (40,5,120,1,True,'RE'),
        (40,5,120,1,True,'RE'),
        (80,3,240,2,False,'HS'),
        (80,3,200,1,False,'HS'),
        (80,3,184,1,False,'HS'),
        (80,3,184,1,False,'HS'),
        (112,3,480,1,True,'HS'),
        (112,3,672,1,True,'HS'),
        (160,5,672,2,True,'HS'),
        (160,5,960,1,True,'HS'),
        (160,5,960,1,True,'HS')
    ]
    def __init__(self,num_classes=17):
        super(MobileNetV3_large,self).__init__()
        self.conv1=nn.Conv2d(3,16,3,2,padding=1,bias=False)
        self.bn1=nn.BatchNorm2d(16)
        # 根据cfg数组自动生成所有的Bottleneck层
        self.layers = self._make_layers(in_channels=16)
        self.conv2=nn.Conv2d(160,960,1,stride=1,bias=False)
        self.bn2=nn.BatchNorm2d(960)
        # 卷积后不跟BN，就应该把bias设置为True
        self.conv3=nn.Conv2d(960,1280,1,1,padding=0,bias=True)
        self.conv4=nn.Conv2d(1280,num_classes,1,stride=1,padding=0,bias=True)

    def _make_layers(self,in_channels):
        layers=[]
        for out_channels,kernel_size,exp_channels,stride,se,nl in self.cfg:
            layers.append(
                Bottleneck(in_channels,out_channels,kernel_size,exp_channels,stride,se,nl)
            )
            in_channels=out_channels
        return nn.Sequential(*layers)

    def forward(self,x):
        out=Hswish(self.bn1(self.conv1(x)))
        out=self.layers(out)
        out=Hswish(self.bn2(self.conv2(out)))
        out=F.avg_pool2d(out,7)
        out=Hswish(self.conv3(out))
        out=self.conv4(out)
        # 因为原论文中最后一层是卷积层来实现全连接的效果，维度是四维的，后两维是1，在计算损失函数的时候要求二维，因此在这里需要做一个resize
        a,b=out.size(0),out.size(1)
        out=out.view(a,b)
        return out

class MobileNetV3_small(nn.Module):
    # (out_channels,kernel_size,exp_channels,stride,se,nl)
    cfg = [
        (16,3,16,2,True,'RE'),
        (24,3,72,2,False,'RE'),
        (24,3,88,1,False,'RE'),
        (40,5,96,2,True,'HS'),
        (40,5,240,1,True,'HS'),
        (40,5,240,1,True,'HS'),
        (48,5,120,1,True,'HS'),
        (48,5,144,1,True,'HS'),
        (96,5,288,2,True,'HS'),
        (96,5,576,1,True,'HS'),
        (96,5,576,1,True,'HS')
    ]
    def __init__(self,num_classes=17):
        super(MobileNetV3_small,self).__init__()
        self.conv1=nn.Conv2d(3,16,3,2,padding=1,bias=False)
        self.bn1=nn.BatchNorm2d(16)
        # 根据cfg数组自动生成所有的Bottleneck层
        self.layers = self._make_layers(in_channels=16)
        self.conv2=nn.Conv2d(96,576,1,stride=1,bias=False)
        self.bn2=nn.BatchNorm2d(576)
        # 卷积后不跟BN，就应该把bias设置为True
        self.conv3=nn.Conv2d(576,1280,1,1,padding=0,bias=True)
        self.conv4=nn.Conv2d(1280,num_classes,1,stride=1,padding=0,bias=True)

    def _make_layers(self,in_channels):
        layers=[]
        for out_channels,kernel_size,exp_channels,stride,se,nl in self.cfg:
            layers.append(
                Bottleneck(in_channels,out_channels,kernel_size,exp_channels,stride,se,nl)
            )
            in_channels=out_channels
        return nn.Sequential(*layers)

    def forward(self,x):
        out=Hswish(self.bn1(self.conv1(x)))
        out=self.layers(out)
        out=self.bn2(self.conv2(out))
        se=SEModule(out.size(1))
        out=Hswish(se(out))
        out = F.avg_pool2d(out, 7)
        out = Hswish(self.conv3(out))
        out = self.conv4(out)
        # 因为原论文中最后一层是卷积层来实现全连接的效果，维度是四维的，后两维是1，在计算损失函数的时候要求二维，因此在这里需要做一个resize
        a, b = out.size(0), out.size(1)
        out = out.view(a, b)
        return out


def module_test():
    net=MobileNetV3_small()
    x=torch.randn(2,3,224,224)
    y=net(x)
    print(y.size())
    print(y)
#
# if __name__=="__main__":
#     module_test()

### train

In [3]:
#宏定义一些数据，如epoch数，batchsize等
MAX_EPOCH=50
BATCH_SIZE=12
LR=0.0001
log_interval=3
val_interval=1

In [4]:
# ============================ step 1/5 数据 ============================
split_dir=os.path.join(".","data","splitData")
train_dir=os.path.join(split_dir,"train")
valid_dir=os.path.join(split_dir,"valid")

#对训练集所需要做的预处理
train_transform=transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
])

#对验证集所需要做的预处理
valid_transform=transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
])

# 构建MyDataset实例
train_data=flowerDataset(data_dir=train_dir,transform=train_transform)
valid_data=flowerDataset(data_dir=valid_dir,transform=valid_transform)

# 构建DataLoader
# 训练集数据最好打乱
# DataLoader的实质就是把数据集加上一个索引号，再返回
train_loader=DataLoader(dataset=train_data,batch_size=BATCH_SIZE,shuffle=True)
valid_loader=DataLoader(dataset=valid_data,batch_size=BATCH_SIZE)

In [5]:
# ============================ step 2/5 模型 ============================
net=MobileNetV3_small(num_classes=17)
if torch.cuda.is_available():
    net.cuda()

In [6]:
# ============================ step 3/5 损失函数 ============================
criterion=nn.CrossEntropyLoss()

In [7]:
# ============================ step 4/5 优化器 ============================
optimizer=optim.Adam(net.parameters(),lr=LR, betas=(0.9, 0.99))# 选择优化器

In [8]:
# ============================ step 5/5 训练 ============================
# 记录每一次的数据，方便绘图
train_curve=list()
valid_curve=list()
net.train()
accurancy_global=0.0

for epoch in range(MAX_EPOCH):
    loss_mean=0.
    correct=0.
    total=0.
    running_loss = 0.0

    for i,data in enumerate(train_loader):
        img,label=data
        img = Variable(img)
        label = Variable(label)
        if torch.cuda.is_available():
            img=img.cuda()
            label=label.cuda()
            
        # 前向传播
        out=net(img)
        optimizer.zero_grad()  # 归0梯度
        loss=criterion(out,label)#得到损失函数

        print_loss=loss.data.item()

        loss.backward()#反向传播
        optimizer.step()#优化
        if (i+1)%log_interval==0:
            print('epoch:{},loss:{:.4f}'.format(epoch+1,loss.data.item()))
        _, predicted = torch.max(out.data, 1)
        total += label.size(0)

        correct += (predicted == label).sum()
    print("============================================")
    accurancy=correct / total
    if accurancy>accurancy_global:
        torch.save(net, './weights/mobilenetv3_small_best.pkl')
        print("准确率由：", accurancy_global, "上升至：", accurancy, "已更新并保存权值为weights/mobilenetv3_small_best.pkl")
        accurancy_global=accurancy
    print('第%d个epoch的识别准确率为：%d%%' % (epoch + 1, 100*accurancy))

torch.save(net, './weights/mobilenetv3_small_last.pkl')
print("训练完毕，权重已保存为：weights/mobilenetv3_small_last.pkl")

epoch:1,loss:2.8295
epoch:1,loss:2.8362
epoch:1,loss:2.8213
epoch:1,loss:2.8160
epoch:1,loss:2.8091
epoch:1,loss:2.7895
epoch:1,loss:2.7912
epoch:1,loss:2.8422
epoch:1,loss:2.7545
epoch:1,loss:2.8413
epoch:1,loss:2.7419
epoch:1,loss:2.8340
epoch:1,loss:2.7724
epoch:1,loss:2.7339
epoch:1,loss:2.6758
epoch:1,loss:2.7644
epoch:1,loss:2.8381
epoch:1,loss:2.6368
epoch:1,loss:2.7216
epoch:1,loss:2.7274
epoch:1,loss:2.7478
epoch:1,loss:2.7881
epoch:1,loss:2.3802
epoch:1,loss:2.7555
epoch:1,loss:2.5850
epoch:1,loss:2.6754
epoch:1,loss:2.3837
epoch:1,loss:2.7608
epoch:1,loss:2.5011
epoch:1,loss:2.8282
准确率由： 0.0 上升至： tensor(0.1241) 已更新并保存权值为weights/mobilenetv3_small_best.pkl
第1个epoch的识别准确率为：12%
epoch:2,loss:2.6401
epoch:2,loss:2.2038
epoch:2,loss:2.4756
epoch:2,loss:2.6820
epoch:2,loss:2.5558
epoch:2,loss:2.3857
epoch:2,loss:2.4500
epoch:2,loss:2.5564
epoch:2,loss:2.6627
epoch:2,loss:2.2561
epoch:2,loss:2.4832
epoch:2,loss:2.4108
epoch:2,loss:2.4368
epoch:2,loss:2.5672
epoch:2,loss:2.0911
epoch:

epoch:12,loss:1.2528
epoch:12,loss:0.6257
epoch:12,loss:1.0929
epoch:12,loss:1.3599
epoch:12,loss:1.1220
epoch:12,loss:0.9403
epoch:12,loss:1.1208
epoch:12,loss:0.9956
epoch:12,loss:0.9270
epoch:12,loss:1.0419
epoch:12,loss:0.9568
epoch:12,loss:1.4081
epoch:12,loss:1.1171
epoch:12,loss:0.7972
epoch:12,loss:1.4273
epoch:12,loss:0.9166
epoch:12,loss:1.1608
epoch:12,loss:0.9616
epoch:12,loss:1.0161
epoch:12,loss:0.9740
epoch:12,loss:1.5452
epoch:12,loss:1.4900
epoch:12,loss:0.7721
epoch:12,loss:1.5859
epoch:12,loss:0.9792
epoch:12,loss:1.2377
epoch:12,loss:0.8858
epoch:12,loss:1.2491
epoch:12,loss:0.9087
epoch:12,loss:1.1556
准确率由： tensor(0.5790) 上升至： tensor(0.6176) 已更新并保存权值为weights/mobilenetv3_small_best.pkl
第12个epoch的识别准确率为：61%
epoch:13,loss:1.1013
epoch:13,loss:0.7452
epoch:13,loss:1.1491
epoch:13,loss:0.8293
epoch:13,loss:1.1031
epoch:13,loss:1.6964
epoch:13,loss:0.7906
epoch:13,loss:1.0826
epoch:13,loss:0.8106
epoch:13,loss:0.6242
epoch:13,loss:1.3030
epoch:13,loss:0.7219
epoch:13,los

epoch:22,loss:0.3369
epoch:22,loss:0.1686
epoch:22,loss:0.1982
epoch:22,loss:0.1867
epoch:22,loss:0.1978
epoch:22,loss:0.4188
epoch:22,loss:0.1641
准确率由： tensor(0.8309) 上升至： tensor(0.8474) 已更新并保存权值为weights/mobilenetv3_small_best.pkl
第22个epoch的识别准确率为：84%
epoch:23,loss:0.2764
epoch:23,loss:0.2277
epoch:23,loss:0.9648
epoch:23,loss:0.8772
epoch:23,loss:0.2863
epoch:23,loss:0.8109
epoch:23,loss:0.1196
epoch:23,loss:0.3588
epoch:23,loss:0.4312
epoch:23,loss:0.2688
epoch:23,loss:0.3644
epoch:23,loss:0.4059
epoch:23,loss:0.4033
epoch:23,loss:0.2149
epoch:23,loss:0.2790
epoch:23,loss:0.2539
epoch:23,loss:0.4347
epoch:23,loss:0.4531
epoch:23,loss:0.5668
epoch:23,loss:0.2721
epoch:23,loss:0.1672
epoch:23,loss:0.4395
epoch:23,loss:0.7783
epoch:23,loss:0.1915
epoch:23,loss:0.5081
epoch:23,loss:0.4532
epoch:23,loss:0.9763
epoch:23,loss:0.1903
epoch:23,loss:0.2864
epoch:23,loss:0.2742
第23个epoch的识别准确率为：83%
epoch:24,loss:0.4355
epoch:24,loss:0.2144
epoch:24,loss:0.2419
epoch:24,loss:0.6507
epoch:24,los

epoch:33,loss:0.0718
epoch:33,loss:0.0579
epoch:33,loss:0.1581
epoch:33,loss:0.1894
epoch:33,loss:0.1957
epoch:33,loss:0.0481
epoch:33,loss:0.0270
epoch:33,loss:0.0652
epoch:33,loss:0.1018
epoch:33,loss:0.0755
epoch:33,loss:0.0755
epoch:33,loss:1.0289
epoch:33,loss:0.0929
第33个epoch的识别准确率为：92%
epoch:34,loss:0.1089
epoch:34,loss:0.0388
epoch:34,loss:0.1715
epoch:34,loss:0.3693
epoch:34,loss:0.1235
epoch:34,loss:0.2185
epoch:34,loss:0.4265
epoch:34,loss:0.1267
epoch:34,loss:0.1242
epoch:34,loss:0.3040
epoch:34,loss:0.2701
epoch:34,loss:0.2222
epoch:34,loss:0.0473
epoch:34,loss:0.0412
epoch:34,loss:0.3155
epoch:34,loss:0.0193
epoch:34,loss:0.0989
epoch:34,loss:0.3018
epoch:34,loss:0.0528
epoch:34,loss:0.4919
epoch:34,loss:0.1768
epoch:34,loss:0.7827
epoch:34,loss:0.2072
epoch:34,loss:0.1493
epoch:34,loss:0.6354
epoch:34,loss:0.4203
epoch:34,loss:0.0291
epoch:34,loss:0.1900
epoch:34,loss:0.0563
epoch:34,loss:0.4967
第34个epoch的识别准确率为：93%
epoch:35,loss:0.0781
epoch:35,loss:0.0415
epoch:35,loss

epoch:45,loss:0.1439
epoch:45,loss:0.0310
epoch:45,loss:0.1171
epoch:45,loss:0.0291
epoch:45,loss:1.0967
epoch:45,loss:0.1526
epoch:45,loss:0.1908
epoch:45,loss:0.0215
epoch:45,loss:0.0268
epoch:45,loss:0.2768
epoch:45,loss:0.0891
epoch:45,loss:0.0883
epoch:45,loss:0.2450
epoch:45,loss:0.0124
epoch:45,loss:0.0528
epoch:45,loss:0.0850
epoch:45,loss:0.1385
epoch:45,loss:0.0337
epoch:45,loss:0.0954
epoch:45,loss:0.0067
epoch:45,loss:0.0240
epoch:45,loss:0.0195
epoch:45,loss:0.0880
epoch:45,loss:0.1573
epoch:45,loss:0.3403
epoch:45,loss:0.0475
epoch:45,loss:0.3375
epoch:45,loss:0.0255
epoch:45,loss:0.7810
epoch:45,loss:0.0268
第45个epoch的识别准确率为：95%
epoch:46,loss:0.0926
epoch:46,loss:0.1088
epoch:46,loss:0.1027
epoch:46,loss:0.0206
epoch:46,loss:0.0531
epoch:46,loss:0.0421
epoch:46,loss:0.0836
epoch:46,loss:0.1477
epoch:46,loss:0.0084
epoch:46,loss:0.0136
epoch:46,loss:0.0930
epoch:46,loss:0.1895
epoch:46,loss:0.0181
epoch:46,loss:0.2181
epoch:46,loss:0.0753
epoch:46,loss:0.0078
epoch:46,loss

In [4]:
from torchviz import make_dot

small_model = torch.load("./weights/mobilenetv3_small_best.pkl")

params = list(small_model.parameters())
k = 0
for i in params:
        l = 1
        print("该层的结构：" + str(list(i.size())))
        for j in i.size():
                l *= j
        print("该层参数和：" + str(l))
        k = k + l
print("总参数数量和：" + str(k))

该层的结构：[16, 3, 3, 3]
该层参数和：432
该层的结构：[16]
该层参数和：16
该层的结构：[16]
该层参数和：16
该层的结构：[4, 16]
该层参数和：64
该层的结构：[16, 4]
该层参数和：64
该层的结构：[16, 16, 1, 1]
该层参数和：256
该层的结构：[16]
该层参数和：16
该层的结构：[16]
该层参数和：16
该层的结构：[16, 1, 3, 3]
该层参数和：144
该层的结构：[16]
该层参数和：16
该层的结构：[16]
该层参数和：16
该层的结构：[16, 16, 1, 1]
该层参数和：256
该层的结构：[16]
该层参数和：16
该层的结构：[16]
该层参数和：16
该层的结构：[72, 16, 1, 1]
该层参数和：1152
该层的结构：[72]
该层参数和：72
该层的结构：[72]
该层参数和：72
该层的结构：[72, 1, 3, 3]
该层参数和：648
该层的结构：[72]
该层参数和：72
该层的结构：[72]
该层参数和：72
该层的结构：[24, 72, 1, 1]
该层参数和：1728
该层的结构：[24]
该层参数和：24
该层的结构：[24]
该层参数和：24
该层的结构：[88, 24, 1, 1]
该层参数和：2112
该层的结构：[88]
该层参数和：88
该层的结构：[88]
该层参数和：88
该层的结构：[88, 1, 3, 3]
该层参数和：792
该层的结构：[88]
该层参数和：88
该层的结构：[88]
该层参数和：88
该层的结构：[24, 88, 1, 1]
该层参数和：2112
该层的结构：[24]
该层参数和：24
该层的结构：[24]
该层参数和：24
该层的结构：[24, 96]
该层参数和：2304
该层的结构：[96, 24]
该层参数和：2304
该层的结构：[96, 24, 1, 1]
该层参数和：2304
该层的结构：[96]
该层参数和：96
该层的结构：[96]
该层参数和：96
该层的结构：[96, 1, 5, 5]
该层参数和：2400
该层的结构：[96]
该层参数和：96
该层的结构：[96]
该层参数和：96
该层的结构：[40, 96, 1, 1]
该层参数和：3840
该层的结构：[40]
该层参数和：

### inference

In [28]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

split_dir=os.path.join(".","data","splitData")
test_dir=os.path.join(split_dir,"test")
# test_dir = os.path.join(".", "data", "rawData")

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

test_data = flowerDataset(data_dir=test_dir, transform=test_transform)
test_dataLoader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=False)

In [29]:
small_model = torch.load("./weights/mobilenetv3_small_best.pkl")
small_model.eval()
correct = 0.
total=0.
with torch.no_grad():
    for i, data in enumerate(test_dataLoader):
        img, label = data
        img = Variable(img)
        label = Variable(label)
        img, label = img.to(device), label.to(device)
        print("label:", label)
        output = small_model(img)
        _, predicted = torch.max(output.data, 1)
        print("predict:", predicted)
        total += label.size(0)
        correct += (predicted == label).sum()

acc = correct/total * 100.
print("Total:", total)
print("Accuracy:{}%".format(acc))

label: tensor([15, 15, 15, 15, 15, 15, 15, 15,  9,  9,  9,  9])
predict: tensor([10, 15, 15, 15,  1, 15, 10, 10,  9,  9,  9,  9])
label: tensor([9, 9, 9, 9, 2, 2, 2, 2, 2, 2, 2, 2])
predict: tensor([12,  5,  9, 14,  1, 13,  2,  8,  2,  2,  2, 13])
label: tensor([4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0])
predict: tensor([ 4, 10,  4, 10,  8,  4,  8,  5,  0, 14,  0,  0])
label: tensor([ 0,  0,  0,  0, 11, 11, 11, 11, 11, 11, 11, 11])
predict: tensor([ 7,  0,  0, 15, 11, 11, 11, 11,  4, 11, 12, 11])
label: tensor([8, 8, 8, 8, 8, 8, 8, 8, 3, 3, 3, 3])
predict: tensor([ 8,  8,  8,  8,  8, 15,  7,  8,  1, 16,  2,  3])
label: tensor([ 3,  3,  3,  3, 13, 13, 13, 13, 13, 13, 13, 13])
predict: tensor([ 3,  2,  1,  5, 13,  2,  7, 13,  7,  7, 13,  2])
label: tensor([ 5,  5,  5,  5,  5,  5,  5,  5, 16, 16, 16, 16])
predict: tensor([14,  5,  5,  5, 15,  5,  1,  5, 10, 16,  1,  4])
label: tensor([16, 16, 16, 16,  1,  1,  1,  1,  1,  1,  1,  1])
predict: tensor([16,  0, 16, 16,  1, 15, 13,  1,  1,  8, 13, 1

### quantization

In [32]:
small_model.state_dict()

OrderedDict([('conv1.weight',
              tensor([[[[-1.8063e-02, -1.5637e-01,  1.5067e-02],
                        [ 5.6862e-02,  1.9757e-02,  9.9112e-02],
                        [-9.4381e-02, -3.3128e-02,  9.1726e-02]],
              
                       [[-5.6205e-02,  1.2083e-01,  8.2373e-05],
                        [ 1.9790e-01, -1.0717e-02,  2.0258e-01],
                        [ 2.8617e-02,  5.2607e-02,  4.5153e-02]],
              
                       [[ 1.7814e-01,  5.4553e-03,  1.6671e-01],
                        [-6.8266e-03,  7.3133e-02, -1.8044e-01],
                        [ 5.0680e-03,  4.7459e-02,  4.2604e-02]]],
              
              
                      [[[-2.2063e-02,  4.6733e-02, -2.1609e-03],
                        [-1.3240e-01, -8.4406e-02, -1.9355e-01],
                        [-8.3341e-02, -9.1849e-03, -1.6837e-01]],
              
                       [[-1.1825e-02, -9.1605e-02, -1.5841e-01],
                        [-1.4194e-01, -1.2681

In [33]:
def _make_divisible(v, divisor, min_value=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    :param v:
    :param divisor:
    :param min_value:
    :return:
    """
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v

In [37]:
_make_divisible(32*0.4, 4)

12