---
# Import
---



In [27]:
import torch # 전체 라이브러리 
import torch.nn as nn #각 레이어 / 손실함수 등
import torch.optim as optim # 옵티마이저
import torch.nn.functional as F # 활성화 함수 / nn과 비슷
from torch.utils.data import DataLoader # 데이터세트 관리용 
import torchvision.datasets as datasets # Mnist, cifar10과 같은 데이터 불러오기
import torchvision.transforms as transforms # 비전 변환
from tqdm import tqdm

---
# Create Fully Connected Network
---

In [14]:
# Mnist (28 x 28) 

class NN(nn.Module): # nn.Module 상속 받기
  def __init__(self, input_size, num_classes): # 생성자 
    super(NN, self).__init__() # 상속받은 생성자
    self.fc1 = nn.Linear(input_size, 50) 
    self.fc2 = nn.Linear(50, num_classes)

  def forward(self, x): # 순전파
    x = F.relu(self.fc1(x))
    x = self.fc2(x)
    return x 

In [11]:
# model = NN(784, 10)
# x = torch.randn(64, 784)

# # (64, 10)의 shape을 가진 data가 반환 
# print(model(x).shape)

'\nmodel = NN(784, 10)\nx = torch.randn(64, 784)\n\n# (64, 10)의 shape을 가진 data가 반환 \nprint(model(x).shape)\n'

---
# set device
---

In [15]:
# GPU를 사용, 안되면 CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

---
# Hyperparameters
----

In [16]:
input_size = 784
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 1

---
# Load Data
---

In [19]:
train_dataset = datasets.MNIST(root = '/content/drive/MyDrive/Pytorch/data', train=True,
                               transform = transforms.ToTensor(), # 텐서형 변환 (데이터 로드 시 numpy형으로 로드
                               download = True)

# dataloader를 통해 데이터를 어떤식으로 불러올지 
# 여기서는 한번에 batch_size 크기만큼, shuffle을 해서 불러오겠다.
train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True) 

In [20]:
test_dataset = datasets.MNIST(root = '/content/drive/MyDrive/Pytorch/data', train=False,
                               transform = transforms.ToTensor(), # 텐서형 변환 (데이터 로드 시 numpy형으로 로드
                               download = True)

test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = True) 

---
# Initialize network
---

In [21]:
model = NN(input_size = input_size, num_classes = num_classes).to(device)  

---
# Loss and optimizer
----

In [23]:
criterion = nn.CrossEntropyLoss() # 손실함수
optimizer = optim.Adam(model.parameters(), lr = learning_rate) # 옵티마이저

---
# Train Network
----

In [28]:
for epoch in tqdm(range(num_epochs)):
  for batch_idx, (data, targets) in enumerate(train_loader):
    data = data.to(device = device) # GPU or CPU에 할당
    targets = targets.to(device = device) # GPU or CPU에 할당
    # print(data.shape) -> torch.size(64, 1, 28, 28)  (배치, 색상값, 높이, 너비)

    # get to correct shape (행렬을 긴 벡터로 펼치기 위해)
    data = data.reshape(data.shape[0], -1)
                        # 64설정        #-1을 선택해 (1,28,28)을 단일로 병합

    # forword
    scores = model(data)
    loss = criterion(scores, targets)

    # backword
    optimizer.zero_grad() # 한번에 모든 weight들의 계산된 gradient를 초기화
    loss.backward()

    # gradient descent or adam step
    optimizer.step() # 계산된 gradient가 weight에 update 

100%|██████████| 1/1 [00:05<00:00,  5.68s/it]


---
# check accuracy on training & test to see how good our model
---

In [25]:
def check_accuracy(loader, model):
  if loader.dataset.train:
    print('Checking accuracy on training data')
  else:
    print('Checking accracy on test data')
  
  num_correct = 0
  num_samples = 0
  
  # 학습 되지 않도록 모델에 알림
  model.eval() 
  with torch.no_grad():
    for x , y in loader:
      x = x.to(device = device)
      y = y.to(device = device)

      x = x.reshape(x.shape[0], -1)

      score = model(x)
      _, predictions = score.max(1) # 최대 점수가 1이 되도록 예측
      num_correct += (predictions ==  y).sum()
      num_samples += predictions.size(0)

    print(f'got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
  
  model.train()

In [30]:
check_accuracy(train_loader ,model)
check_accuracy(test_loader ,model)

Checking accuracy on training data
got 56692 / 60000 with accuracy 94.49
Checking accracy on test data
got 9426 / 10000 with accuracy 94.26
