# 6. 합성곱 신경망(Convolutional Neural Network)

---

### 01. CNN모델 이해하기

#####  모델 구현하기

In [1]:
import torch
import torch.nn as nn

In [10]:
inputs = torch.Tensor(1, 1, 28, 28)

##### 합성곱층과 풀링 선언

In [4]:
conv1 = nn.Conv2d(1, 32, 3, padding=1)
print(conv1)

Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [5]:
conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
print(conv2)

Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [7]:
pool = nn.MaxPool2d(2)
print(pool)

MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)


##### 구현체를 연결하여 모델 만들기

In [None]:
out = conv1(inputs)
print(out.shape)

In [11]:
out = pool(out)
print(out.shape)

torch.Size([1, 32, 14, 14])


In [12]:
out = conv2(out)
print(out.shape)

torch.Size([1, 64, 14, 14])


In [13]:
out = pool(out)
print(out.shape)

torch.Size([1, 64, 7, 7])


In [14]:
out.size(0)

1

In [15]:
out.size(1)

64

In [16]:
out.size(2)

7

In [17]:
out.size(3)

7

In [18]:
# 첫번째 차원인 배치 차원은 그대로 두고 나머지는 펼쳐라
out = out.view(out.size(0), -1) 
print(out.shape)

torch.Size([1, 3136])


배치 차원을 제외하고 모두 하나의 차원으로 통합된 것을 볼 수 있습니다. 이제 이에 대해서 전결합층(Fully-Connteced layer)를 통과시켜보겠습니다. 출력층으로 10개의 뉴런을 배치하여 10개 차원의 텐서로 변환합니다.

In [19]:
fc = nn.Linear(3136, 10) # input_dim = 3,136, output_dim = 10
out = fc(out)
print(out.shape)

torch.Size([1, 10])


In [20]:
torch.Size([1, 10])

torch.Size([1, 10])

---

### 02. CNN으로 MNIST 분류하기

In [26]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init
import torch.nn

In [22]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# 랜덤 시드 고정
torch.manual_seed(777)

# GPU 사용 가능일 경우 랜덤 시드 고정
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [23]:
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [24]:
mnist_train = dsets.MNIST(root='MNIST_data/', # 다운로드 경로 지정
                          train=True, # True를 지정하면 훈련 데이터로 다운로드
                          transform=transforms.ToTensor(), # 텐서로 변환
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/', # 다운로드 경로 지정
                         train=False, # False를 지정하면 테스트 데이터로 다운로드
                         transform=transforms.ToTensor(), # 텐서로 변환
                         download=True)

In [27]:
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [30]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2)
        )
        self.fc = nn.Linear(7 * 7 * 64, 10, bias = True)
        torch.nn.init.xavier_uniform_(self.fc.weight)
    
    def forward(self, X):
        output = self.layer1(X)
        output = self.layer2(output)
        output = output.view(output.size(0), -1)
        output = self.fc(output)
        return output

In [34]:
model = CNN()
optimizer = torch.optim.Adam(model.parameters())
nb_epochs = 100
total_batch = len(data_loader)
for epoch in range(nb_epochs + 1):
    avg_cost = 0
    
    for X, Y in data_loader:
        X = X.to(device)
        Y = Y.to(device)
        
        h = model.forward(X)
        #cost = torch.nn.CrossEntropyLoss(h, Y).to(device)
        cost = torch.nn.functional.cross_entropy(h, Y).to(device)
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        avg_cost += cost / total_batch
        
    if epoch % 10 == 0:
        print("{:3d}/{}\tcost:{:6f}".format(epoch, nb_epochs, avg_cost.item()))

  0/100	cost:0.238620


KeyboardInterrupt: 

In [None]:
# 학습을 진행하지 않을 것이므로 torch.no_grad()
with torch.no_grad():
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())`

---

### 03. 깊은 CNN으로 MNIST 분류하기

In [None]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init
import torch.nn

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# 랜덤 시드 고정
torch.manual_seed(777)

# GPU 사용 가능일 경우 랜덤 시드 고정
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [None]:
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [None]:
mnist_train = dsets.MNIST(root='MNIST_data/', # 다운로드 경로 지정
                          train=True, # True를 지정하면 훈련 데이터로 다운로드
                          transform=transforms.ToTensor(), # 텐서로 변환
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/', # 다운로드 경로 지정
                         train=False, # False를 지정하면 테스트 데이터로 다운로드
                         transform=transforms.ToTensor(), # 텐서로 변환
                         download=True)

In [None]:
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [None]:
class deepCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(64,128, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2)
        )
        self.fc1 = nn.Linear(4 * 4 * 128, 625, bias = True)
        self.fc2 = nn.Linear(625, 10, bias = True)
        torch.nn.init.xavier_uniform_(self.fc.weight)
    
    def forward(self, X):
        out1 = self.layer1(X)
        out2 = self.layer2(out1)
        out3 = self.layer3(out2)
        trans_out3 = output.view(output.size(0), -1)
        out4 = self.fc1(trans_out3)
        out5 = self.fc2(out4)
        return out5

In [None]:
model = CNN()
optimizer = torch.optim.Adam(model.parameters())
nb_epochs = 100
total_batch = len(data_loader)
for epoch in range(nb_epochs + 1):
    avg_cost = 0
    
    for X, Y in data_loader:
        X = X.to(device)
        Y = Y.to(device)
        
        h = model.forward(X)
        #cost = torch.nn.CrossEntropyLoss(h, Y).to(device)
        cost = torch.nn.functional.cross_entropy(h, Y).to(device)
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        avg_cost += cost / total_batch
        
    if epoch % 10 == 0:
        print("{:3d}/{}\tcost:{:6f}".format(epoch, nb_epochs, avg_cost.item()))

In [None]:
# 학습을 진행하지 않을 것이므로 torch.no_grad()
with torch.no_grad():
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())