# Model

### Model #1 
* Target:
    - Set Basic Working Code
    - Set Transforms
    - Add Batch-norm to increase model efficiency
* Results:
    - Parameters: 11,212
    - Best Training Accuracy: 99.81%
    - Best Test Accuracy: 99.34%
* Analysis:
    - Heavy Model for such a problem

In [None]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        # Input Block
        # Convolution Block
        self.convBlock1 = nn.Sequential(
            # Convolution 1                     28x28x1 -> 28x28x8  -> RF 3
            nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(8),
            nn.ReLU(),
            # Convolution 2                     28x28x8 -> 26x26x16 -> RF 5
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU()
        )
        # Transition Block
        self.transBlock1 = nn.Sequential(
            # Transition 1                      26x26x16 -> 13x13x8 -> RF 7
            nn.MaxPool2d(2,2),
            nn.Conv2d(in_channels=16, out_channels=8, kernel_size=(1, 1), padding=0, bias=False),
            nn.BatchNorm2d(8),
            nn.ReLU()
        )
        # Convolution Block
        self.convBlock2 = nn.Sequential(
            # Convolution 3                    13x13x8 -> 13x13x16  -> RF 11
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            # Convolution 4                    13x13x16 -> 11x11x32 -> RF 15
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        # Transition Block
        self.transBlock2 = nn.Sequential(
            # Transition 2                      11x11x32 -> 5x5x32  -> RF 19
            nn.MaxPool2d(2,2),
            nn.Conv2d(in_channels=32, out_channels=16, kernel_size=(1, 1), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU()
        )
        # Convolution Block
        self.convBlock3 = nn.Sequential(
            # Convolution 3                     5x5x16   -> 3x3x16  -> RF 27
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            # Convolution 4                     3x3x16   -> 3x3x10  -> RF 27
            nn.Conv2d(in_channels=16, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
        )
        # Output Block
        self.convBlock4 = nn.Sequential(
            # Convolution 5                     3x3x10   -> 1x1x10  -> RF 35
            nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(3, 3), padding=0, bias=False)
        )

    def forward(self, x):
        x = self.convBlock1(x)
        x = self.transBlock1(x)
        x = self.convBlock2(x)
        x = self.transBlock2(x)
        x = self.convBlock3(x)
        x = self.convBlock4(x)
        x = x.view(-1, 10) #1x1x10> 10
        return F.log_softmax(x, dim=-1)

### Model #2
* Target:
    - Add Regularization, Dropout
    - Increase model capacity. Add more layers at the end. 
* Results:
    - Parameters: 7,400
    - Best Training Accuracy: 98.71%
    - Best Test Accuracy: 99.15%
* Analysis:
    - Able to reduce the model size less than 8000 parameters

In [None]:
dropout_value = 0.1

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        # Input Block
        # Convolution Block
        self.convBlock1 = nn.Sequential(
            # Convolution 1                     28x28x1 -> 28x28x8  -> RF 3
            nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.Dropout(dropout_value),
            # Convolution 2                     28x28x8 -> 26x26x16  -> RF 5
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout(dropout_value)
        )
        # Transition Block
        self.transBlock1 = nn.Sequential(
            # Transition 1                      26x26x16 -> 13x13x8  -> RF 7
            nn.Conv2d(in_channels=16, out_channels=8, kernel_size=(1, 1), padding=0, bias=False),
            nn.MaxPool2d(2,2),
        )
        # Convolution Block
        self.convBlock2 = nn.Sequential(
            # Convolution 3                    13x13x8 -> 13x13x16  -> RF 11
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout(dropout_value),
            # Convolution 4                    13x13x16 -> 11x11x16  -> RF 15
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout(dropout_value)
        )
        # Transition Block
        self.convBlock3 = nn.Sequential(
            # Transition 2                      11x11x16 -> 5x5x10    -> RF 19
            nn.Conv2d(in_channels=16, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
        )
        self.gap = nn.Sequential(
            nn.AvgPool2d(kernel_size=2)
        )
        # Convolution Block
        self.convBlock4 = nn.Sequential(
            # Convolution 3                     5x5x10   -> 3x3x16    -> RF 27
            nn.Conv2d(in_channels=10, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout(dropout_value),
            # Convolution 4                     3x3x16   -> 3x3x8   -> RF 27
            nn.Conv2d(in_channels=16, out_channels=8, kernel_size=(1, 1), padding=0, bias=False),
        )
        # Output Block
        self.convBlock5 = nn.Sequential(
            # Convolution 5                     3x3x8   -> 1x1x10    -> RF 35
            nn.Conv2d(in_channels=8, out_channels=10, kernel_size=(3, 3), padding=0, bias=False)
        )

    def forward(self, x):
        x = self.convBlock1(x)
        x = self.transBlock1(x)
        x = self.convBlock2(x)
        x = self.convBlock3(x)
        x = self.gap(x)
        x = self.convBlock4(x)
        x = self.convBlock5(x)
        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)

### Model #3
* Target:
    - Add rotation, we guess that 5-7 degrees should be sufficient. 
    - Add LR Scheduler
* Results:
    - Parameters: 7,400
    - Best Training Accuracy: 98.73%
    - Best Test Accuracy: 99.24%
* Analysis:
    - Need to acheive 99.4% accuracy

In [None]:
dropout_value = 0.1

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        # Input Block
        # Convolution Block
        self.convBlock1 = nn.Sequential(
            # Convolution 1                     28x28x1 -> 28x28x8  -> RF 3
            nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.Dropout(dropout_value),
            # Convolution 2                     28x28x8 -> 26x26x16  -> RF 5
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout(dropout_value)
        )
        # Transition Block
        self.transBlock1 = nn.Sequential(
            # Transition 1                      26x26x16 -> 13x13x8  -> RF 7
            nn.Conv2d(in_channels=16, out_channels=8, kernel_size=(1, 1), padding=0, bias=False),
            nn.MaxPool2d(2,2),
        )
        # Convolution Block
        self.convBlock2 = nn.Sequential(
            # Convolution 3                    13x13x8 -> 13x13x16  -> RF 11
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout(dropout_value),
            # Convolution 4                    13x13x16 -> 11x11x16  -> RF 15
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout(dropout_value)
        )
        # Transition Block
        self.convBlock3 = nn.Sequential(
            # Transition 2                      11x11x16 -> 5x5x10    -> RF 19
            nn.Conv2d(in_channels=16, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
        )
        self.gap = nn.Sequential(
            nn.AvgPool2d(kernel_size=2)
        )
        # Convolution Block
        self.convBlock4 = nn.Sequential(
            # Convolution 3                     5x5x10   -> 3x3x16    -> RF 27
            nn.Conv2d(in_channels=10, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout(dropout_value),
            # Convolution 4                     3x3x16   -> 3x3x8   -> RF 27
            nn.Conv2d(in_channels=16, out_channels=8, kernel_size=(1, 1), padding=0, bias=False),
        )
        # Output Block
        self.convBlock5 = nn.Sequential(
            # Convolution 5                     3x3x8   -> 1x1x10    -> RF 35
            nn.Conv2d(in_channels=8, out_channels=10, kernel_size=(3, 3), padding=0, bias=False)
        )

    def forward(self, x):
        x = self.convBlock1(x)
        x = self.transBlock1(x)
        x = self.convBlock2(x)
        x = self.convBlock3(x)
        x = self.gap(x)
        x = self.convBlock4(x)
        x = self.convBlock5(x)
        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)