In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import numpy as np

In [2]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels=1, 
            out_channels=32, 
            kernel_size=3, 
            stride=1)
        self.conv2 = nn.Conv2d(
            in_channels=32, 
            out_channels=64, 
            kernel_size=3, 
            stride=1
        )
        self.dropout_25 = nn.Dropout2d(0.25)
        self.dropout_50 = nn.Dropout2d(0.25)
        
        #in_features = 
        
        self.fc1 = nn.Linear(
            in_features=9216, 
            out_features=128)
        
        self.fc2 = nn.Linear(
            in_features=128, 
            out_features=10)
        
    def forward_verbose(self, x):
        print(f'[Initial]{x.shape}')
        x = self.conv1(x)
        x = F.relu(x)
        print(f'[1st Conv]{x.shape}')

        x = self.conv2(x)
        print(f'[2nd Conv]{x.shape}')

        
        x = F.max_pool2d(x, 2)
        print(f'[MaxPool]{x.shape}')

        x = self.dropout_25(x)
        x = torch.flatten(x, 1)
        print(f'[Flatten]{x.shape}')

        
        x = self.fc1(x)
        x = F.relu(x)
        print(f'[FC Layer 1]{x.shape}')

        
        x = self.dropout_50(x)
        x = self.fc2(x)
        print(f'[FC Layer 1]{x.shape}')

        
        output = F.log_softmax(x, dim=1)
        print(f'[Softmax]{x.shape}')

        return output
    
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)

        x = self.conv2(x)

        
        x = F.max_pool2d(x, 2)

        x = self.dropout_25(x)
        x = torch.flatten(x, 1)

        
        x = self.fc1(x)
        x = F.relu(x)

        
        x = self.dropout_50(x)
        x = self.fc2(x)

        
        output = F.log_softmax(x, dim=1)

        return output
        


In [3]:
net = ConvNet()
sample = torch.Tensor(np.ones((1, 1, 28, 28)))
sample

tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
           1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
           1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
           1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
           1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
           1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
           1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
           1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 

In [4]:
net.forward_verbose(sample)

[Initial]torch.Size([1, 1, 28, 28])
[1st Conv]torch.Size([1, 32, 26, 26])
[2nd Conv]torch.Size([1, 64, 24, 24])
[MaxPool]torch.Size([1, 64, 12, 12])
[Flatten]torch.Size([1, 9216])
[FC Layer 1]torch.Size([1, 128])
[FC Layer 1]torch.Size([1, 10])
[Softmax]torch.Size([1, 10])


tensor([[-2.3919, -2.2580, -2.2217, -2.5686, -2.1988, -2.3308, -2.2612, -2.1705,
         -2.3731, -2.3104]], grad_fn=<LogSoftmaxBackward>)

In [26]:
def train(model, train_data, optimizer, epoch):
    # set model in train mode
    model.train()
    
    # iterate over data
    for batch_idx, (data, target) in enumerate(train_data):
        
        # reset gradients
        optimizer.zero_grad()
    
        # perform forward pass
        output = model.forward(data)
        
        # calculate the loss based on the forward pass and the true labels
        loss = F.nll_loss(output, target)
        
        # do the backpropagation step
        # this calculates the gradients for each parameter
        # each parameter x (eg. weight) receives a x.grad value
        loss.backward()
        
        # adjust the parameters based on gradients
        optimizer.step()
        
        # print current state every 1000 steps
        if batch_idx % 10 == 0:
            print(f'Epoch: {epoch} [{batch_idx * len(data)}/{len(train_data.dataset)}] Loss: {loss.item()}')

In [27]:
from utils import load_data_torch
def run():
    epochs = 15
    model = ConvNet()
    train_data, test_data = load_data_torch()
    optimizer = optim.Adadelta(model.parameters(), lr=1.0)
    scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
    
    for epoch in range(1, epochs + 1):
        train(model, train_data, optimizer, epoch)
        scheduler.step()

    
    

In [28]:
run()

Epoch: 1 [0/60000] Loss: 2.294050693511963
Epoch: 1 [640/60000] Loss: 3.1388297080993652
Epoch: 1 [1280/60000] Loss: 0.4638303220272064
Epoch: 1 [1920/60000] Loss: 0.47239527106285095
Epoch: 1 [2560/60000] Loss: 0.43595775961875916
Epoch: 1 [3200/60000] Loss: 0.6079074144363403
Epoch: 1 [3840/60000] Loss: 0.26333513855934143
Epoch: 1 [4480/60000] Loss: 0.24814258515834808
Epoch: 1 [5120/60000] Loss: 0.38512298464775085
Epoch: 1 [5760/60000] Loss: 0.21260781586170197
Epoch: 1 [6400/60000] Loss: 0.15531380474567413
Epoch: 1 [7040/60000] Loss: 0.07880792766809464
Epoch: 1 [7680/60000] Loss: 0.18957430124282837
Epoch: 1 [8320/60000] Loss: 0.1408403217792511
Epoch: 1 [8960/60000] Loss: 0.26074495911598206
Epoch: 1 [9600/60000] Loss: 0.23507148027420044
Epoch: 1 [10240/60000] Loss: 0.21115614473819733
Epoch: 1 [10880/60000] Loss: 0.11111243814229965
Epoch: 1 [11520/60000] Loss: 0.11354387551546097
Epoch: 1 [12160/60000] Loss: 0.2026374489068985
Epoch: 1 [12800/60000] Loss: 0.1542964279651641

KeyboardInterrupt: 