## Convolutional Neural Network
- MNIST data
- 3 convolutional layers
- 2 fully connected layers

## Settings

### 1) Import required libraries

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.autograd import Variable, Function

In [2]:
class Swish(Function):
    @staticmethod
    def forward(ctx, i):
        result = i*i.sigmoid()
        ctx.save_for_backward(result,i)
        return result

    @staticmethod
    def backward(ctx, grad_output):
        result,i = ctx.saved_variables
        sigmoid_x = i.sigmoid()
        return grad_output * (result+sigmoid_x*(1-result))
    
swish= Swish.apply

class Swish_module(nn.Module):
    def forward(self,x):
        return swish(x)
    
swish_layer = Swish_module()

### 2) Set Hyperparameters

In [23]:
batch_size = 256
learning_rate = 0.0002
num_epoch = 10

## 2. Data

### 1) Download Data

In [4]:
mnist_train = dset.MNIST("./", train=True, transform=transforms.ToTensor(), target_transform=None, download=True)
mnist_test = dset.MNIST("./", train=False, transform=transforms.ToTensor(), target_transform=None, download=True)

### 2) Check Dataset

In [5]:
print(mnist_train.__getitem__(0)[0].size(), mnist_train.__len__())
mnist_test.__getitem__(0)[0].size(), mnist_test.__len__()

torch.Size([1, 28, 28]) 60000


(torch.Size([1, 28, 28]), 10000)

### 3) Set DataLoader

In [6]:
train_loader = torch.utils.data.DataLoader(mnist_train,batch_size=batch_size, shuffle=True,num_workers=2,drop_last=True)
test_loader = torch.utils.data.DataLoader(mnist_test,batch_size=batch_size, shuffle=False,num_workers=2,drop_last=True)

## 3. Model & Optimizer

### 1) CNN Model

In [24]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(1,16,5),
            swish_layer,
            #nn.ReLU(),
            nn.Conv2d(16,32,5),
            swish_layer,
            #nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(32,64,5),
            swish_layer,
            #nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(64*3*3,100),
            swish_layer,
            #nn.ReLU(),
            nn.Linear(100,10)
        )       
        
    def forward(self,x):
        out = self.layer(x)
        out = out.view(batch_size,-1)
        out = self.fc_layer(out)

        return out

model = CNN()

### 2) Loss func & Optimizer

In [25]:
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

## 4. Train

In [None]:
for i in range(num_epoch):
    for j, [image, label] in enumerate(train_loader):
        x = Variable(image)
        y_ = Variable(label)
        
        optimizer.zero_grad()
        output = model.forward(x)
        loss = loss_func(output, y_)
        loss.backward()
        optimizer.step()
        
        if j % 1000 == 0:
            print(loss)

  # Remove the CWD from sys.path while we load stuff.


tensor(2.3018, grad_fn=<NllLossBackward>)
tensor(0.2710, grad_fn=<NllLossBackward>)
tensor(0.1657, grad_fn=<NllLossBackward>)
tensor(0.0625, grad_fn=<NllLossBackward>)
tensor(0.0749, grad_fn=<NllLossBackward>)
tensor(0.0635, grad_fn=<NllLossBackward>)
tensor(0.0758, grad_fn=<NllLossBackward>)
tensor(0.0747, grad_fn=<NllLossBackward>)
tensor(0.0427, grad_fn=<NllLossBackward>)


In [None]:
correct = 0
total = 0