# mnist cnn
## 학습 단계(code 기준)
1. 라이브러리 가져오고 (torch, torchvision, matplotlib 같은것들)
2. GPU 사용 설정 하고 random value를 위한 seed 설정!
3. 학습에 사용되는 parameter 설정!(learning_rate, training_epochs, batch_size, etc)
4. 데이터셋을 가져오고 (학습에 쓰기 편하게)loader 만들기
5. 학습 모델 만들기 (class CNN(torch.nn.Module))
6. Loss function (Criterion)을 선택하고 최적화 도구 선택(optimizer)
7. 모델 학습 및 loss check(Criterion의 output)
8. 학습된 모델의 성능을 확인한다.

## 만들 CNN 구조 확인!
### 1×28×28
##### (Layer 1) Convolution layer = (in_c=1, out_c=32, kernel_size = 3, stride=1, padding=1)
##### (Layer 1) MaxPool layer = (kernel_size=2, stride =2)

##### (Layer 2) Convolution layer = (in_c=32, out_c=64, kernel_size = 3, stride = 1, padding =1)
##### (Layer 2) MaxPool layer = (kernel_size=2, stride = 2)

##### view → (batch_size × [7,7,64] → batch_size × [3136])
##### Fully_Connect layer → (input = 3166, output = 10)

In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init

import torch.nn as nn


In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)
print(device)

cuda


In [3]:
#paramiters
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [4]:
# MNIST dataset

mnist_train = dsets.MNIST(root = 'MNIST_data/',
                          train = True,
                          transform = transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train = 'False',
                         transform = transforms.ToTensor(),
                         download = True)

In [5]:
data_loader = torch.utils.data.DataLoader(dataset = mnist_train,
                                          batch_size = batch_size,
                                          shuffle = True,
                                          drop_last = True)

In [6]:
class CNN(nn.Module):
    
    def __init__(self):
        super(CNN, self).__init__()
        # L1 Imagin shape = (?,28,28,1)
        # Conv            → (?, 28, 28, 32)
        # Pool            → (?, 14,14,32)
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size = 3, stride = 1, padding = 1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride =2))
        
        # L2 Imgin shape=(?,14,14,32)
        #    Conv       →(?, 14, 14, 64)
        #    Pool       →(?, 7, 7, 64)
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32,64, kernel_size=3, stride = 1, padding =1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        
        #Final FC 7×7×64 inputs → 10 outputs
        self.fc = torch.nn.Linear(7 * 7 * 64, 10, bias = True)
        torch.nn.init.xavier_uniform_(self.fc.weight)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1) # Flatten them for FC
        out = self.fc(out)
        return out
        

In [7]:
model = CNN().to(device)


CNN(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=3136, out_features=10, bias=True)
)

In [8]:
criterion = torch.nn.CrossEntropyLoss().to(device) #Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [9]:
#training
total_batch = len(data_loader)

for epoch in range(training_epochs):
    avg_cost = 0
    
    for X, Y in data_loader:
        X = X.to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model(X)
        
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print('[Epoch:{}] cost = {}'.format(epoch+1, avg_cost))
print('Learning Finished!')

[Epoch:1] cost = 0.22394460439682007
[Epoch:2] cost = 0.06215522438287735
[Epoch:3] cost = 0.04490873962640762
[Epoch:4] cost = 0.03551791235804558
[Epoch:5] cost = 0.029042446985840797
[Epoch:6] cost = 0.024945100769400597
[Epoch:7] cost = 0.020749224349856377
[Epoch:8] cost = 0.017951885238289833
[Epoch:9] cost = 0.015224963426589966
[Epoch:10] cost = 0.012567938305437565
[Epoch:11] cost = 0.01062555518001318
[Epoch:12] cost = 0.0100028607994318
[Epoch:13] cost = 0.00863215234130621
[Epoch:14] cost = 0.007750952150672674
[Epoch:15] cost = 0.006488523446023464
Learning Finished!


In [11]:
with torch.no_grad():
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)
    
    predition = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same