### This notebook is optionally accelerated with a GPU runtime.
### If you would like to use this acceleration, please select the menu option "Runtime" -> "Change runtime type", select "Hardware Accelerator" -> "GPU" and click "SAVE"

----------------------------------------------------------------------

# MobileNet v2

*Author: Pytorch Team*

**Efficient networks optimized for speed and memory, with residual blocks**

_ | _
- | -
![alt](https://pytorch.org/assets/images/mobilenet_v2_1.png) | ![alt](https://pytorch.org/assets/images/mobilenet_v2_2.png)

In [1]:
import torch
model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)
model.eval()

Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /root/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


  0%|          | 0.00/13.6M [00:00<?, ?B/s]

MobileNetV2(
  (features): Sequential(
    (0): ConvBNActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momen

In [2]:
import torch.nn as nn

conv = nn.Conv2d(5, 3, 3)

conv

Conv2d(5, 3, kernel_size=(3, 3), stride=(1, 1))

In [3]:
# Python 3 program to perform 2D convolution operation
import torch
import torch.nn as nn

'''torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0)
'''
in_channels = 3
out_channels = 3
kernel_size = 3
conv = nn.Conv2d(in_channels, out_channels, kernel_size, 1, 1)
conv_first_1 = nn.Conv2d(3, 64, 3, 1, 1, bias=True)

# conv = nn.Conv2d(2, 3, 2)

'''input of size [N,C,H, W]
N==>batch size,
C==> number of channels,
H==> height of input planes in pixels,
W==> width in pixels.
'''

# define the input with below info
N=5
C=3
H=950
W=512
# input = torch.empty(N,C,H,W).random_(256)
# print("Input Size:",input.size())

# # Perform convolution operation
# output = conv_first_1(input)
# print("Output Size:",output.size())

# # With square kernels (2,2) and equal stride
# conv = nn.Conv2d(2, 3, 2, stride=2)
# output = conv(input)
# print("Output Size:",output.size())





# # non-square kernels and unequal stride and with padding
# conv = nn.Conv2d(2, 3, (2, 3), stride=(2, 1), padding=(2, 1))
# output = conv(input)
# print("Output Size:",output.size())

# # non-square kernels and unequal stride and with padding and dilation
# conv = nn.Conv2d(2, 3, (2, 3), stride=(2, 1), padding=(2, 1),
# dilation=(2, 1))
# output = conv(input)
# print("Output Size:",output.size())

nf = 64
print("-----------------")
lrelu = nn.LeakyReLU(negative_slope=0.1, inplace=True)
depth_wise = nn.Conv2d(3,nf,3,1,1,groups=3)
point_wise = nn.Conv2d(nf,nf,1,1,1,groups=1)

conv_first_1 = nn.Conv2d(3, nf, 3, 1, 1, bias=True)
conv_first_2 = nn.Conv2d(nf, nf, 3, 2, 1, bias=True)
conv_first_3 = nn.Conv2d(nf, nf, 3, 2, 1, bias=True)

input = torch.empty(N,C,H,W).random_(256)
print("Input Size:",input.size())

# Perform convolution operation
L1_fea = lrelu(depth_wise(input.view(-1, C, H, W)))
print("Output Size:",L1_fea.size())

L1_fea = lrelu(point_wise(L1_fea))
print("Output Size:",L1_fea.size())

# Input Size: torch.Size([5, 3, 950, 512])
# Output Size: torch.Size([5, 64, 950, 512])
# Output Size: torch.Size([5, 64, 952, 514])

# L1_fea = lrelu(conv_first_1(input.view(-1, C, H, W)))
# print("Output Size:",L1_fea.size())

# L1_fea = lrelu(conv_first_2(L1_fea))
# print("Output Size:",L1_fea.size())

# L1_fea = lrelu(conv_first_3(L1_fea))
# print("Output Size:",L1_fea.size())


-----------------


ValueError: ignored

In [5]:
from torch.nn import Conv2d

N=5
C=3
H=950
W=512

lrelu = nn.LeakyReLU(negative_slope=0.1, inplace=True)
conv = nn.Sequential(Conv2d(in_channels=C, out_channels=64, kernel_size=3, bias=True),lrelu)
params = sum(p.numel() for p in conv.parameters() if p.requires_grad)

x = torch.rand(N, C, H, W)
print("Input Size:",x.size())
out = conv(x)

depth_conv = nn.Sequential(Conv2d(in_channels=C, out_channels=C, kernel_size=3, groups=C,  bias=True),lrelu)
point_conv = nn.Sequential(Conv2d(in_channels=C, out_channels=64, kernel_size=1,  bias=True),lrelu)

depthwise_separable_conv = torch.nn.Sequential(depth_conv, point_conv)
params_depthwise = sum(p.numel() for p in depthwise_separable_conv.parameters() if p.requires_grad)

out_depthwise = depthwise_separable_conv(x)

print(f"The standard convolution uses {params} parameters.")
print(f"The depthwise separable convolution uses {params_depthwise} parameters.")

assert out.shape == out_depthwise.shape, "Size mismatch"


################################## Inverted Residual
regular_residual_sdsd = nn.Sequential(nn.Conv2d(64, 64, C, 1, 1, bias=True), lrelu, nn.Conv2d(64, 64, C, 1, 1, bias=True))
params_regular_residual = sum(p.numel() for p in regular_residual_sdsd.parameters() if p.requires_grad)

expand_ratio = 6
hidden_channel_dim = int(round(64 * expand_ratio))
inverted_residual = nn.Sequential(Conv2d(in_channels=64, out_channels=hidden_channel_dim, kernel_size=1,  bias=True), # Pointwise + ReLu + Expansion by 6
                                  lrelu,
                                  Conv2d(in_channels=hidden_channel_dim, out_channels=hidden_channel_dim, kernel_size=3, groups=hidden_channel_dim,  bias=True), # Depthwise + ReLu
                                  lrelu,
                                  Conv2d(in_channels=hidden_channel_dim, out_channels=64, kernel_size=1,  bias=True, padding=1)) # Pointwise Linear

### DONT FORGET TO ADD HERE THE ACTUAL RESIDUAL PART OF IT!!! RETURNING X+INVERTED_RESIDUAL(X) ########################### SO IMPORTANT ####################

params_inverted_residual = sum(p.numel() for p in inverted_residual.parameters() if p.requires_grad)

print(f"The standard residual uses {params_regular_residual} parameters.")
print(f"The standard residual uses {params_inverted_residual} parameters.")

################################# Testing dimensions

print("Residual Size:",(regular_residual_sdsd(depthwise_separable_conv(x))).size())
print("Inverted Residual Size:",(inverted_residual(depthwise_separable_conv(x))).size())


Input Size: torch.Size([5, 3, 950, 512])
The standard convolution uses 1792 parameters.
The depthwise separable convolution uses 286 parameters.
The standard residual uses 73856 parameters.
The standard residual uses 53440 parameters.
Residual Size: torch.Size([5, 64, 948, 510])
Inverted Residual Size: torch.Size([5, 64, 948, 510])


In [None]:
import torch.nn as nn

class InvertedResidual(nn.Module):
    def __init__(self, inp, oup, stride, expand_ratio, norm_layer=None):
        super(InvertedResidual, self).__init__()
        self.stride = stride
        assert stride in [1, 2]

        if norm_layer is None:
            norm_layer = nn.BatchNorm2d

        hidden_dim = int(round(inp * expand_ratio))
        self.use_res_connect = self.stride == 1 and inp == oup

        layers = []
        if expand_ratio != 1:
            # pw
            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer))
        layers.extend([
            # dw
            ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer),
            # pw-linear
            nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
            norm_layer(oup),
        ])
        self.conv = nn.Sequential(*layers)

    def forward(self, x):
        if self.use_res_connect:
            return x + self.conv(x)
        else:
            return self.conv(x)



All pre-trained models expect input images normalized in the same way,
i.e. mini-batches of 3-channel RGB images of shape `(3 x H x W)`, where `H` and `W` are expected to be at least `224`.
The images have to be loaded in to a range of `[0, 1]` and then normalized using `mean = [0.485, 0.456, 0.406]`
and `std = [0.229, 0.224, 0.225]`.

Here's a sample execution.

In [2]:
model

MobileNetV2(
  (features): Sequential(
    (0): ConvBNActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momen

In [None]:
# Download an example image from the pytorch website
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

In [None]:
# sample execution (requires torchvision)
from PIL import Image
from torchvision import transforms
input_image = Image.open(filename)
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

# move the input and model to GPU for speed if available
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
    output = model(input_batch)
# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
print(output[0])
# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
probabilities = torch.nn.functional.softmax(output[0], dim=0)
print(probabilities)

In [None]:
# Download ImageNet labels
!wget https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt

In [None]:
# Read the categories
with open("imagenet_classes.txt", "r") as f:
    categories = [s.strip() for s in f.readlines()]
# Show top categories per image
top5_prob, top5_catid = torch.topk(probabilities, 5)
for i in range(top5_prob.size(0)):
    print(categories[top5_catid[i]], top5_prob[i].item())

In [34]:
from torchvision.models import mobilenet_v2

# Old weights with accuracy 76.130%
model = mobilenet_v2(pretrained=True)

In [35]:
from torchvision.models.quantization import mobilenet_v2

# Old weights with accuracy 76.130%
model = mobilenet_v2(pretrained=True, quantize=True)

Downloading: "https://download.pytorch.org/models/quantized/mobilenet_v2_qnnpack_37f702c5.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2_qnnpack_37f702c5.pth


  0%|          | 0.00/3.42M [00:00<?, ?B/s]

### Model Description

The MobileNet v2 architecture is based on an inverted residual structure where the input and output of the residual block are thin bottleneck layers opposite to traditional residual models which use expanded representations in the input. MobileNet v2 uses lightweight depthwise convolutions to filter features in the intermediate expansion layer. Additionally, non-linearities in the narrow layers were removed in order to maintain representational power.

| Model structure | Top-1 error | Top-5 error |
| --------------- | ----------- | ----------- |
|  mobilenet_v2       | 28.12       | 9.71       |


### References

 - [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381)