## Convolutional Neural Network
- MNIST data
- 3 convolutional layers
- 2 fully connected layers

## Settings

### 1) Import required libraries

In [12]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torch.nn.functional as F
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.autograd import Variable, Function

In [13]:
class Swish(nn.Module):
    def __init__(self,inplace = False):
        super().__init__()
        
        self.inplace = True
        
    def forward(self,x):
        if self.inplace:
            x.mul_(F.sigmoid(x))
            return x
        else: return x*F.sigmoid(x)
        
swish_layer = Swish()

In [2]:
class Swish(Function):
    @staticmethod
    def forward(ctx, i):
        result = i*i.sigmoid()
        ctx.save_for_backward(result,i) ##최적화 방법중 하나인가...?
        return result

    @staticmethod
    def backward(ctx, grad_output):
        result,i = ctx.saved_variables
        sigmoid_x = i.sigmoid()
        return grad_output * (result+sigmoid_x*(1-result))
    
swish= Swish.apply

class Swish_module(nn.Module):
    def forward(self,x):
        return swish(x)
    
swish_layer = Swish_module()

### 2) Set Hyperparameters

In [14]:
batch_size = 256
learning_rate = 0.0002
num_epoch = 10

## 2. Data

### 1) Download Data

In [15]:
mnist_train = dset.MNIST("./", train=True, transform=transforms.ToTensor(), target_transform=None, download=True)
mnist_test = dset.MNIST("./", train=False, transform=transforms.ToTensor(), target_transform=None, download=True)

### 2) Check Dataset

In [16]:
print(mnist_train.__getitem__(0)[0].size(), mnist_train.__len__())
mnist_test.__getitem__(0)[0].size(), mnist_test.__len__()

torch.Size([1, 28, 28]) 60000


(torch.Size([1, 28, 28]), 10000)

### 3) Set DataLoader

In [17]:
train_loader = torch.utils.data.DataLoader(mnist_train,batch_size=batch_size, shuffle=True,num_workers=2,drop_last=True)
test_loader = torch.utils.data.DataLoader(mnist_test,batch_size=batch_size, shuffle=False,num_workers=2,drop_last=True)

## 3. Model & Optimizer

### 1) CNN Model

In [18]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(1,16,5),
            swish_layer,
            #nn.ReLU(),
            nn.Conv2d(16,32,5),
            swish_layer,
            #nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(32,64,5),
            swish_layer,
            #nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(64*3*3,100),
            swish_layer,
            #nn.ReLU(),
            nn.Linear(100,10)
        )       
        
    def forward(self,x):
        out = self.layer(x)
        out = out.view(batch_size,-1)
        out = self.fc_layer(out)

        return out

model = CNN()

### 2) Loss func & Optimizer

In [19]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

## 4. Train

In [20]:
for epoch in range(num_epoch):
    for i, [image, label] in enumerate(train_loader):
        images = Variable(image)
        labels = Variable(label)
        
        optimizer.zero_grad()
        outputs = model.forward(image)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if i % 1000 == 0:
            print(loss)



tensor(2.3040, grad_fn=<NllLossBackward>)
tensor(0.1943, grad_fn=<NllLossBackward>)
tensor(0.1277, grad_fn=<NllLossBackward>)
tensor(0.0661, grad_fn=<NllLossBackward>)
tensor(0.0841, grad_fn=<NllLossBackward>)
tensor(0.0769, grad_fn=<NllLossBackward>)
tensor(0.0473, grad_fn=<NllLossBackward>)
tensor(0.0442, grad_fn=<NllLossBackward>)
tensor(0.0266, grad_fn=<NllLossBackward>)
tensor(0.0403, grad_fn=<NllLossBackward>)


## 5. Eval

In [21]:
## relu 대신 swish쓰니까 성능이 훨씬 좋아졌다!
correct = 0
total = 0

for image, label in test_loader:
    images = Variable(image, volatile=True)
    labels = Variable(label)
    
    outputs = model.forward(images)
    _, predicted = torch.max(outputs,1)     
    
    total += label.size(0)
    correct += (predicted == labels).sum().float()
    
    
print("Accuracy of Test Data: {}".format(100*correct/total))

  """


Accuracy of Test Data: 98.67788696289062
