## 모델 최적화

### 내용
1. 모델 최적화 개념
    - 모델 성능향상 및 학습시간 단축
    - 최적화 기법
        - 학습률 조정(Learning Rate Scheduling)
        - 조기 종료(Early Stopping)
        - 배치 정규화(Batch Normalization)
        - 드롭아웃(Dropout)

2. 학습률 조정
    - 학습률 조정의 필요성
        - 초기 학습률 설정의 어려움
        - 학습 진행에 따라 학습률을 조정하여 효율적인 학습
    - PyTorch에서의 학습률 스케줄러 사용 - torch.optim.lr_scheduler 모듈

In [7]:
import torch
import torchvision
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim

# 학습률 조정스케줄러
from torch.optim.lr_scheduler import StepLR

In [8]:
# pl05_cnn에서...
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)  # Flatten the tensor
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [10]:
# 모델, 손실 함수, 옵티마이저 설정
model = SimpleCNN()
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 데이터 전처리 변환
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# MNIST 데이터셋 로드
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=4)

num_epochs = 5
# 손실 함수와 옵티마이저 설정
criterion = nn.CrossEntropyLoss()

# 옵티마이저 및 학습률 스케줄러 설정
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

for epoch in range(num_epochs):
    model.train()
    for inputs, labels in trainloader:
        inputs = inputs.to(device, dtype=torch.float)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    scheduler.step()  # 매 epoch 끝에서 학습률 갱신
    print(f'Epoch {epoch+1}, Loss: {loss.item()}, Learning Rate: {scheduler.get_last_lr()}')

Epoch 1, Loss: 0.011658800765872002, Learning Rate: [0.001]
Epoch 2, Loss: 0.28900524973869324, Learning Rate: [0.001]
Epoch 3, Loss: 0.008533787913620472, Learning Rate: [0.001]
Epoch 4, Loss: 0.05302723869681358, Learning Rate: [0.001]
Epoch 5, Loss: 0.009401497431099415, Learning Rate: [0.001]


3. 조기 종료(Early Stopping)
    - 조기 종료의 필요성 - 과적합 방지 및 학습시간 단축
    - 조기 종료 구현
        - 검증 데이터셋의 성능 모니터링
        - 일정 에폭 동안 성능 향상이 없으면 학습 종료

In [11]:
import numpy as np

class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.best_score = None
        self.counter = 0
        self.early_stop = False

    def __call__(self, val_loss):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0

early_stopping = EarlyStopping(patience=3, min_delta=0.01)

In [None]:
for epoch in range(num_epochs):
    model.train()
    for inputs, labels in trainloader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    val_loss = evaluate_model(model, val_loader)  # 검증 손실 계산
    early_stopping(val_loss)

    if early_stopping.early_stop:
        print(f'Early stopping at epoch {epoch+1}')
        break

4. 배치 정규화(Batch Normalization) 및 드롭아웃(Dropout)
    - 배치 정규화
        - 훈련 속도 향상 및 초기화 민감도 감소
        - nn.BatchNorm2d 사용

In [12]:
import torch.nn as nn

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = x.view(-1, 64 * 7 * 7)  # Flatten the tensor
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

- 드롭아웃
    - 과적합 방지
    - nn.Dropout 사용

In [13]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)  # Flatten the tensor
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

5. 프로젝트 설명 및 진행
    - 프로젝트 개요
        - 고급 주제를 활용한 실전 프로젝트 수행
        - 모델 최적화 및 고급 기능을 활용한 성능 향상 시도
    - 데이터셋 선택
        - Kaggle 또는 기타 오픈 데이터셋에서 프로젝트 데이터셋 선택
    - 프로젝트 단계
        1. 데이터 전처리 및 준비
        2. 모델 설계 및 구현
        3. 모델 훈련 및 검증
        4. 성능 평가 및 결과 분석

6. 프로젝트 예시
    - 프로젝트: CIFAR-10 이미지 분류
    - 데이터셋 로드 및 전처리
    - 최적화 기법 및 고급 기능 적용

In [14]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader

In [15]:
# 데이터 전처리 변환
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [16]:
# CIFAR-10 데이터셋 로드
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
val_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data\cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:10<00:00, 16.2MB/s] 


Extracting ./data\cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [17]:
# Pretrained ResNet 모델 로드 및 수정
model = models.resnet18(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 10)

model = model.to(device)



In [18]:
# 배치 정규화 및 드롭아웃 추가
for param in model.parameters():
    param.requires_grad = False
for param in model.fc.parameters():
    param.requires_grad = True

# 손실 함수와 옵티마이저 설정
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

In [21]:
def evaluate_model(model, val_loader, device):
    model.eval()  # 평가 모드로 설정
    val_loss = 0.0
    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():  # 파라미터 업데이트를 하지 않음
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device, dtype=torch.float), labels.to(device)

            # 예측값과 손실 계산
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    avg_val_loss = val_loss / len(val_loader)
    return avg_val_loss

In [23]:
# 모델 학습
num_epochs = 20
early_stopping = EarlyStopping(patience=5, min_delta=0.01)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs = inputs.to(device, dtype=torch.float)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    scheduler.step()  # 매 epoch 끝에서 학습률 갱신
    val_loss = evaluate_model(model, val_loader, device)  # 검증 손실 계산

    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}, Val Loss: {val_loss}, Learning Rate: {scheduler.get_last_lr()}')

    early_stopping(val_loss)
    if early_stopping.early_stop:
        print(f'Early stopping at epoch {epoch+1}')
        break

Epoch 1, Loss: 0.606806999776734, Val Loss: 0.5954276426626851, Learning Rate: [0.001]
Epoch 2, Loss: 0.5993689554037379, Val Loss: 0.5760650906128625, Learning Rate: [0.001]
Epoch 3, Loss: 0.5955427280047424, Val Loss: 0.5776054812267947, Learning Rate: [0.001]
Epoch 4, Loss: 0.5922447470465457, Val Loss: 0.6032200552784978, Learning Rate: [0.001]
Epoch 5, Loss: 0.5963455225488198, Val Loss: 0.5874542452561589, Learning Rate: [0.001]
Epoch 6, Loss: 0.5872259966109094, Val Loss: 0.5769389880398592, Learning Rate: [0.001]
Epoch 7, Loss: 0.5892212147599829, Val Loss: 0.5963267035806141, Learning Rate: [0.001]
Early stopping at epoch 7


- epoch 20까지 반복하지 않고, 7번째에서 중지

In [24]:
# 모델 평가
model.eval()
correct = 0
total = 0