## Network in Network (NiN)
- issue with alexnet is lots of params!!! - can get too 400MB of ram
- use 1x1 convolutions to add non-linearity across activation channels & use global average pooling to integrate across all locations in last representation layer
- avoids fully connected layers all together!! reduces required model params

In [4]:
import torch
from torch import nn
from d2l import torch as d2l

In [5]:
# conv -> conv 1x1 -> conv 1x1
def nin_block(out_channels, kernel_size, strides, padding):
    return nn.Sequential(
        nn.LazyConv2d(out_channels, kernel_size, strides, padding), nn.ReLU(),
        nn.LazyConv2d(out_channels, kernel_size=1), nn.ReLU(),
        nn.LazyConv2d(out_channels, kernel_size=1), nn.ReLU())

In [6]:
class NiN(d2l.Classifier):
    def __init__(self, lr=0.1, num_classes=10):
        super().__init__()
        self.save_hyperparameters()
        self.net = nn.Sequential(
            nin_block(96, kernel_size=11, strides=4, padding=0),
            nn.MaxPool2d(3, stride=2),
            nin_block(256, kernel_size=5, strides=1, padding=2),
            nn.MaxPool2d(3, stride=2),
            nin_block(384, kernel_size=3, strides=1, padding=1),
            nn.MaxPool2d(3, stride=2),
            nn.Dropout(0.5),
            nin_block(num_classes, kernel_size=3, strides=1, padding=1),
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten())
        self.net.apply(d2l.init_cnn)

In [10]:
model = NiN()
model.layer_summary((1, 224, 224))

Sequential output shape:	 torch.Size([96, 54, 54])
MaxPool2d output shape:	 torch.Size([96, 26, 26])
Sequential output shape:	 torch.Size([256, 26, 26])
MaxPool2d output shape:	 torch.Size([256, 12, 12])
Sequential output shape:	 torch.Size([384, 12, 12])
MaxPool2d output shape:	 torch.Size([384, 5, 5])
Dropout output shape:	 torch.Size([384, 5, 5])
Sequential output shape:	 torch.Size([10, 5, 5])
AdaptiveAvgPool2d output shape:	 torch.Size([10, 1, 1])
Flatten output shape:	 torch.Size([10, 1])




In [11]:
from torchsummary import summary

summary(model, (1, 224, 224)) # much much smaller than vgg and alexnet

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 96, 54, 54]          11,712
              ReLU-2           [-1, 96, 54, 54]               0
            Conv2d-3           [-1, 96, 54, 54]           9,312
              ReLU-4           [-1, 96, 54, 54]               0
            Conv2d-5           [-1, 96, 54, 54]           9,312
              ReLU-6           [-1, 96, 54, 54]               0
         MaxPool2d-7           [-1, 96, 26, 26]               0
            Conv2d-8          [-1, 256, 26, 26]         614,656
              ReLU-9          [-1, 256, 26, 26]               0
           Conv2d-10          [-1, 256, 26, 26]          65,792
             ReLU-11          [-1, 256, 26, 26]               0
           Conv2d-12          [-1, 256, 26, 26]          65,792
             ReLU-13          [-1, 256, 26, 26]               0
        MaxPool2d-14          [-1, 256,

In [12]:
model = NiN(lr=0.05)
trainer = d2l.Trainer(max_epochs=10, num_gpus=1)
data = d2l.FashionMNIST(batch_size=128, resize=(224, 224))
model.apply_init([next(iter(data.get_dataloader(True)))[0]], d2l.init_cnn)
trainer.fit(model, data)

KeyboardInterrupt: 