**Training Step**



1.   라이브러리 가져오기(torch,torchvision,matplotlib)
2.   GPU 사용설정 & random value seed설정
3.   학습에 사용되는 parameter 설정 (lr,epoch,batchsize)
4.   dataset,dataLoader
5.   class CNN(torch.nn.Module)
6.   Loss function(Criterion) & optimizer
7.   모델 학습 및 loss check(criterion의 output)
8.   학습된 모델의 성능을 확인



In [17]:
import torch
import torch.nn as nn
inputs = torch.Tensor(1,1,28,28) #배치,채널,height,width
inputs.shape

torch.Size([1, 1, 28, 28])

In [18]:
conv1 = nn.Conv2d(1,32,3,padding=1) #1채널받음,32채널내보냄,커널사이즈3
pool = nn.MaxPool2d(2)
pool

MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)

In [19]:
conv2 = nn.Conv2d(32,64,3,padding=1)
pool = nn.MaxPool2d(2)
pool

MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)

In [20]:
out = conv1(inputs)
out.shape

torch.Size([1, 32, 28, 28])

In [21]:
out = pool(out)
out.shape

torch.Size([1, 32, 14, 14])

In [22]:
out = conv2(out)
out.shape

torch.Size([1, 64, 14, 14])

In [23]:
out = pool(out)
out.shape

torch.Size([1, 64, 7, 7])

In [24]:
out.size(0)

1

In [25]:
out.size(1)

64

In [26]:
out = out.view(out.size(0), -1)
out.shape

torch.Size([1, 3136])

In [27]:
fc = nn.Linear(3136, 10)
out = fc(out)
out

tensor([[-5.9044e+35,  4.6768e+34,  3.4840e+35,  1.8403e+35,  4.0544e+35,
          9.0400e+35, -3.1393e+34,  9.3725e+34, -2.4271e+35,  1.0099e+35]],
       grad_fn=<AddmmBackward0>)

In [28]:
out.shape #배치사이즈에 맞게 10개의 출력이 된다.

torch.Size([1, 10])

**Implementation**

In [29]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init

In [30]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(777) #random value 고정해주는 코드
if device == 'cuda':
  torch.cuda.manual_seed_all(777)

In [31]:
print(device)

cpu


In [32]:
torch.cuda.is_available()

False

In [33]:
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [34]:
mnist_train = dsets.MNIST(root='MNIST_data',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

In [35]:
data_loader = torch.utils.data.DataLoader(dataset = mnist_train,
                                          batch_size = batch_size,
                                          shuffle = True,
                                          drop_last = True)

CNN model 만들기

In [36]:
class CNN(nn.Module):

  def __init__(self):
    super(CNN, self).__init__()
    self.layer1 = nn.Sequential(
        nn.Conv2d(1,32,kernel_size=3, stride=1,padding=1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.layer2 = nn.Sequential(
        nn.Conv2d(32,64,kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )

    self.fc = nn.Linear(7*7*64, 10, bias = True)
    torch.nn.init.xavier_uniform_(self.fc.weight) #먼저 초기화 시켜줌

  def forward(self,x):
    out = self.layer1(x)
    out = self.layer2(out)

    out = out.view(out.size(0), -1)
    out = self.fc(out)
    return out


In [37]:
model = CNN().to(device)
model

CNN(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=3136, out_features=10, bias=True)
)

In [38]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)

Training

In [39]:
total_batch = len(data_loader)

for epoch in range(training_epochs):
  avg_cost = 0 #loss 담는 곳

  for X, Y in data_loader: #input,label
    X = X.to(device)
    Y = Y.to(device)

    optimizer.zero_grad() #중요!
    hypothesis = model(X) #모델에 input값을 넣어서 출력이 가설이 됨

    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step() #backward된 cost를 토대로 optimizer가 모델을 학습

    avg_cost += cost / total_batch #cost쌓기

  print('[Epoch:{}] cost = {}'.format((epoch+1), avg_cost))

print('Learning Finished')

[Epoch:1] cost = 0.2255602478981018
[Epoch:2] cost = 0.06305518746376038
[Epoch:3] cost = 0.04626815766096115
[Epoch:4] cost = 0.037433922290802
[Epoch:5] cost = 0.031403716653585434
[Epoch:6] cost = 0.026141438633203506
[Epoch:7] cost = 0.021627798676490784
[Epoch:8] cost = 0.017972556874155998
[Epoch:9] cost = 0.015809405595064163
[Epoch:10] cost = 0.013176409527659416
[Epoch:11] cost = 0.009952100925147533
[Epoch:12] cost = 0.009574058465659618
[Epoch:13] cost = 0.008271911181509495
[Epoch:14] cost = 0.0065953838638961315
[Epoch:15] cost = 0.005892841145396233
Learning Finished


In [41]:
with torch.no_grad(): # 학습을 안해서 no gradient
  X_test = mnist_test.test_data.view(len(mnist_test),1,28,28).float().to(device)
  Y_test = mnist_test.test_labels.to(device)

  prediction = model(X_test)
  correct_prediction = torch.argmax(prediction, 1) == Y_test
  accuracy = correct_prediction.float().mean()
  print('Accuracy:',accuracy.item())



Accuracy: 0.9883999824523926


레이어 추가해보기

In [50]:
class CNN(nn.Module):

  def __init__(self):
    super(CNN, self).__init__()
    self.layer1 = nn.Sequential(
        nn.Conv2d(1,32,kernel_size=3, stride=1,padding=1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.layer2 = nn.Sequential(
        nn.Conv2d(32,64,kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.layer3 = nn.Sequential(
        nn.Conv2d(64,128,kernel_size=3,stride=1,padding=1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )

    self.fc1 = nn.Linear(3*3*128,625)
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(625, 10, bias = True)
    torch.nn.init.xavier_uniform_(self.fc1.weight) #먼저 초기화 시켜줌
    torch.nn.init.xavier_uniform_(self.fc2.weight)

  def forward(self,x):
    out = self.layer1(x)
    out = self.layer2(out)
    out = self.layer3(out)

    out = out.view(out.size(0), -1)
    out = self.fc1(out)
    out = self.relu(out)
    out = self.fc2(out)
    return out

In [51]:
model = CNN().to(device)
print(model)

CNN(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=1152, out_features=625, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=625, out_features=10, bias=True)
)


model test

In [52]:
value = torch.Tensor(1,1,28,28).to(device)
print((model(value)).shape)

torch.Size([1, 10])


In [53]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

In [54]:
total_batch = len(data_loader)

for epoch in range(training_epochs):
  avg_cost = 0 #loss 담는 곳

  for X, Y in data_loader: #input,label
    X = X.to(device)
    Y = Y.to(device)

    optimizer.zero_grad() #중요!
    hypothesis = model(X) #모델에 input값을 넣어서 출력이 가설이 됨

    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step() #backward된 cost를 토대로 optimizer가 모델을 학습

    avg_cost += cost / total_batch #cost쌓기

  print('[Epoch:{}] cost = {}'.format((epoch+1), avg_cost))

print('Learning Finished')

[Epoch:1] cost = 0.15909835696220398
[Epoch:2] cost = 0.04328387975692749
[Epoch:3] cost = 0.028711965307593346
[Epoch:4] cost = 0.02256103977560997
[Epoch:5] cost = 0.017939278855919838
[Epoch:6] cost = 0.014591354876756668
[Epoch:7] cost = 0.01111175399273634
[Epoch:8] cost = 0.009980181232094765
[Epoch:9] cost = 0.010132871568202972
[Epoch:10] cost = 0.008767729625105858
[Epoch:11] cost = 0.007938203401863575
[Epoch:12] cost = 0.00609720591455698
[Epoch:13] cost = 0.005922747775912285
[Epoch:14] cost = 0.003890826366841793
[Epoch:15] cost = 0.0066936323419213295
Learning Finished


In [55]:
with torch.no_grad(): # 학습을 안해서 no gradient
  X_test = mnist_test.test_data.view(len(mnist_test),1,28,28).float().to(device)
  Y_test = mnist_test.test_labels.to(device)

  prediction = model(X_test)
  correct_prediction = torch.argmax(prediction, 1) == Y_test
  accuracy = correct_prediction.float().mean()
  print('Accuracy:',accuracy.item())



Accuracy: 0.9825000166893005


오히려 Accuracy 떨어짐