In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Set device to GPU if available, otherwise CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


normalize-> 이미지 정규화. (x-평균)을 표준편차로 나늠. 수렴 속도 향상.데이터가 특정 분포를 따르게 해 모델이 잘 작동되도록 함.

In [None]:
# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(), # Convert image to PyTorch Tensor
    transforms.Normalize((0.1307,), (0.3081,)) # Normalize pixel values
])

# Download and load training data
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Download and load test data
test_dataset = datasets.MNIST('./data', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of test samples: {len(test_dataset)}")

100%|██████████| 9.91M/9.91M [00:00<00:00, 52.8MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.76MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 15.0MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 8.93MB/s]


Number of training samples: 60000
Number of test samples: 10000


2개 hidden layer 활성화 함수는 ReLU인 MLP임.  init-> 객체가 생성될 때 자동으로 호출. pytorch는 nn,module 생성자 호출.
평탄화? 한번에 처리되는 이미지 개수-> batch size, mnist는 채널이 한개, 배치 크기 동일. 1차원 벡터로 픽셀 이미지 펼쳤을 때 784개의 픽셀 값
x.view->텐서 형태 변경nit-> 객체가 생성될 때 자동으로 호출. pytorch는 nn,module 생성자 호출. 그 nn이 torch.nn약어임. nn.ReLU-> torch.nn 모듈 안에 있는 클래스들임.
평탄화? 한번에 처리되는 이미지 개수-> batch size, mnist는 채널이 한개, 배치 크기 동일. 1차원 벡터로 픽셀 이미지 펼쳤을 때 784개의 픽셀 값
x.view-> 이거 개헷갈리노...x를 받아서 .view로 텐서의 형태(수치 형태?)를 변경하는거라는데 그리고 -1인자가 자동추론하고 self.이걸로 연산하고 다시 x가 나오는듯. in feature: 이전 레이어의 out feature랑 똑같음. 레이어로 들어오는 특징의 수. 맨 마지막 out feature 10개인 이유가 0부터 9까지 숫자.bias=true-> bias 여부임.

In [None]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        # MNIST images are 28x28, so 28*28 = 784 input features
        self.fc1 = nn.Linear(28 * 28, 512) # Input layer to first hidden layer
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(512, 256)   # First hidden layer to second hidden layer
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(256, 10)    # Second hidden layer to output layer (10 classes for digits 0-9)

    def forward(self, x):
        # Flatten the input image (batch_size, 1, 28, 28) to (batch_size, 784)
        x = x.view(-1, 28 * 28)
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

# Instantiate the model and move it to the device
model = MLP().to(device)
print(model)

MLP(
  (fc1): Linear(in_features=784, out_features=512, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=256, out_features=10, bias=True)
)


criterion-> lossfunction정의. nn.Cross어쩌구 이게 다중 클래스 분류 문제 일반적인 손실함수.optimizer는 최적화 도구 정의. 최적화 도구가 가중치 어케 엄데이트할지 결정하는 알고리즘임. Adam은 Adaptive model Estimation. lr은 learning rate. 한번 업데이트마다 모델 파라미터 얼마나 크게 변경할지 결정하는 값. epoch-> 모든 학습 데이터셋 통과한 횟수. running_loss-> 각 배치의 손실.  running_loss += loss.item()이걸로 계속 더해짐. 배치 100개마다 running loss 나타냄. 초기화해서 배치별 평균 손실 예상.

In [None]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Number of epochs
num_epochs = 5

print("Starting training...")
for epoch in range(num_epochs):
    model.train() # Set the model to training mode
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        # 1. Zero the gradients
        optimizer.zero_grad()

        # 2. Forward pass
        outputs = model(data)

        # 3. Calculate loss
        loss = criterion(outputs, target)

        # 4. Backward pass (compute gradients)
        loss.backward()

        # 5. Update model parameters
        optimizer.step()

        running_loss += loss.item()

        if (batch_idx + 1) % 100 == 0: # Print every 100 batches
            print(f"Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx+1}/{len(train_loader)}], Loss: {running_loss / 100:.4f}")
            running_loss = 0.0
    print(f"Epoch [{epoch+1}/{num_epochs}] finished. Average Loss: {running_loss / (len(train_loader) % 100 if len(train_loader) % 100 != 0 else 100):.4f}")
print("Training finished!")

Starting training...
Epoch [1/5], Batch [100/938], Loss: 0.5549
Epoch [1/5], Batch [200/938], Loss: 0.2369
Epoch [1/5], Batch [300/938], Loss: 0.2219
Epoch [1/5], Batch [400/938], Loss: 0.1791
Epoch [1/5], Batch [500/938], Loss: 0.1601
Epoch [1/5], Batch [600/938], Loss: 0.1531
Epoch [1/5], Batch [700/938], Loss: 0.1334
Epoch [1/5], Batch [800/938], Loss: 0.1367
Epoch [1/5], Batch [900/938], Loss: 0.1084
Epoch [1/5] finished. Average Loss: 0.1174
Epoch [2/5], Batch [100/938], Loss: 0.0910
Epoch [2/5], Batch [200/938], Loss: 0.0822
Epoch [2/5], Batch [300/938], Loss: 0.0933
Epoch [2/5], Batch [400/938], Loss: 0.0917
Epoch [2/5], Batch [500/938], Loss: 0.0881
Epoch [2/5], Batch [600/938], Loss: 0.0840
Epoch [2/5], Batch [700/938], Loss: 0.0798
Epoch [2/5], Batch [800/938], Loss: 0.0868
Epoch [2/5], Batch [900/938], Loss: 0.0841
Epoch [2/5] finished. Average Loss: 0.0726
Epoch [3/5], Batch [100/938], Loss: 0.0531
Epoch [3/5], Batch [200/938], Loss: 0.0666
Epoch [3/5], Batch [300/938], Los

### 5. Evaluate the Model

Finally, let's evaluate the trained model on the test dataset to see its performance.

In [None]:
model.eval() # Set the model to evaluation mode
correct = 0
total = 0
with torch.no_grad(): # Disable gradient calculation for inference
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1) # Get the index of the max log-probability
        total += target.size(0)
        correct += (predicted == target).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the model on the 10000 test images: {accuracy:.2f}%')

Accuracy of the model on the 10000 test images: 97.68%


ｍｏｄｅｌ．ｅｖａｌ（）이건 ｐｙｔｏｒｃｈ 모델을 평가모드로 전환． ｍｏｄｅｌ．ｔｒａｉｎ이건 모델을 학습모드로 전환． ｅｖａｌ에서는 모든 뉴런을 확용해야됨． 그리고 ｎｏｍａｌｉｚａｔｏｎ이것도 멈춤， 일관된 정규화 적용해야되서．