## Pytorch Model
## 깊은 신경망을 이용한 classification 문제 해결

## 첫번째 레이어의 형태
- 합성곱 (in_channel=1, out_channel=32, kernel_size=3, stride=1, padding=1) + 활성화 함수 ReLU
- 맥스풀링 (kernel_size=2, stride=2)

## 두번째 레이어의 형태
- 합성곱 (in_channel=32, out_channel=64, kernel_size=3, stride=1, padding=1), 활성화 함수 ReLU
- 맥스풀링 (kernel_size=2, stride=2)

## 세번째 레이어의 형태
- 합성곱 (in_channel=64, out_channel=128, kernel_size=3, stride=1, padding=1), 활성화 함수 ReLU
- 맥스풀링 (kernel_size=2, stride=2, padding=1)

## 네번째 레이어의 형태
- 특성맴을 펼치는 역할
- FC layer + ReLU

## 다섯번째 레이어, 전결합층(FC layer)
- FC layer + Softmax
- 마지막노드는 10개, 0~9까지의 정답을 가져야 하기 때문에 노드를 10개 설정

In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init


In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# 랜덤 시드를 고정
torch.manual_seed(777)

if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [71]:
# 파라미터 설정
learning_rate = 0.001
training_epochs = 300
batch_size = 100

In [72]:
# 데이터로더를 사용 데이터셋을 정의
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)
mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

In [73]:
# 데이터로더로 배치크기 지정.
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)
# 60,000개 데이터가 batch_size(100)의 크기로 진행, total batch는 600.

In [74]:
# 클래스로 모델을 설계
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.keep_prob = 0.5
        
        # 첫번째 레이어
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # 두번째 레이어
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # 세번째 레이어
        self.layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        )

        # 레이어
        self.layer4 = torch.nn.Sequential(
            torch.nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        )
        # 레이어
        self.layer5 = torch.nn.Sequential(
            torch.nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        )
        # 레이어
        self.layer6 = torch.nn.Sequential(
            torch.nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        )

        # 네번째 레이어        
        self.fc1 = torch.nn.Linear(2 * 2 * 1024, 625, bias=True)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        self.layer7 = torch.nn.Sequential(
            self.fc1,
            torch.nn.ReLU(),
            torch.nn.Dropout(p =1 - self.keep_prob))
        
        # 다섯번째 레이어
        self.fc2 = torch.nn.Linear(625, 10, bias=True)
        torch.nn.init.xavier_uniform_(self.fc2.weight)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        #print("1 ", out.shape)
        out = self.layer4(out)
        #print("1 ", out.shape)
        out = self.layer5(out)
        #print("1 ", out.shape)
        out = self.layer6(out)
        #print("1 ", out.shape)
        out = out.view(out.size(0), -1)
        out = self.layer7(out)
        out = self.fc2(out)
        return out

In [75]:
model = CNN().to(device)

In [76]:
# 비용함수와 옵티마이저
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [77]:
# 총 배치의 수
total_batch = len(data_loader)
print(f"총 배치의 수 : {total_batch}")

총 배치의 수 : 600


In [78]:
# 학습
for epoch in range(training_epochs):
    avg_cost = 0
    
    for X, Y in data_loader:
        X = X.to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch

    print(f"[Epoch: {epoch+1:>4} cost = {avg_cost:>.9}]")

[Epoch:    1 cost = 0.311792344]
[Epoch:    2 cost = 0.0624712184]
[Epoch:    3 cost = 0.0467354208]
[Epoch:    4 cost = 0.0385017432]
[Epoch:    5 cost = 0.0339843705]
[Epoch:    6 cost = 0.027999457]
[Epoch:    7 cost = 0.0227896422]
[Epoch:    8 cost = 0.0245058723]
[Epoch:    9 cost = 0.0207136758]
[Epoch:   10 cost = 0.0177859198]
[Epoch:   11 cost = 0.0184923578]
[Epoch:   12 cost = 0.0157412142]
[Epoch:   13 cost = 0.0174020417]
[Epoch:   14 cost = 0.0143661248]
[Epoch:   15 cost = 0.0125211375]
[Epoch:   16 cost = 0.0154184159]
[Epoch:   17 cost = 0.011474085]
[Epoch:   18 cost = 0.0139781758]
[Epoch:   19 cost = 0.00933977123]
[Epoch:   20 cost = 0.010161506]
[Epoch:   21 cost = 0.0112072602]
[Epoch:   22 cost = 0.012212188]
[Epoch:   23 cost = 0.00864262134]
[Epoch:   24 cost = 0.0140828146]
[Epoch:   25 cost = 0.0133043518]
[Epoch:   26 cost = 0.00792987272]
[Epoch:   27 cost = 0.00621376699]
[Epoch:   28 cost = 0.00858408678]
[Epoch:   29 cost = 0.00877747592]
[Epoch:   30 

[Epoch:  238 cost = 0.00022518661]
[Epoch:  239 cost = 0.00236316468]
[Epoch:  240 cost = 0.0163228624]
[Epoch:  241 cost = 0.010365027]
[Epoch:  242 cost = 0.00408769213]
[Epoch:  243 cost = 0.00613998948]
[Epoch:  244 cost = 0.00138716085]
[Epoch:  245 cost = 0.000256380852]
[Epoch:  246 cost = 0.000403667247]
[Epoch:  247 cost = 0.00453624083]
[Epoch:  248 cost = 0.00564843277]
[Epoch:  249 cost = 0.0117666014]
[Epoch:  250 cost = 0.00918870885]
[Epoch:  251 cost = 0.00407671323]
[Epoch:  252 cost = 0.000109777473]
[Epoch:  253 cost = 3.18888924e-05]
[Epoch:  254 cost = 2.25009135e-05]
[Epoch:  255 cost = 0.0151305338]
[Epoch:  256 cost = 0.00891306903]
[Epoch:  257 cost = 0.0020707841]
[Epoch:  258 cost = 0.0418311246]
[Epoch:  259 cost = 0.00175866263]
[Epoch:  260 cost = 0.000758318463]
[Epoch:  261 cost = 0.0010927096]
[Epoch:  262 cost = 0.00857369229]
[Epoch:  263 cost = 0.000962837716]
[Epoch:  264 cost = 0.00556645636]
[Epoch:  265 cost = 0.0133139975]
[Epoch:  266 cost = 0.

In [79]:
# 모델 테스트
with torch.no_grad():
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print(f"Accuracy: {accuracy.item()}")

Accuracy: 0.75409996509552
