# 실습 2: 점진적 학습 - 적응적 런타임 업데이트 (CIFAR-10 → SVHN 클래스 추가)

 CIFAR-10으로 초기 학습을 수행하고, 점진적 학습 단계에서 SVHN의 특정 숫자(7, 8)를 새로운 클래스로 추가

In [58]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
import random
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

## 1. ReplayBuffer 구현

In [59]:
class ReplayBuffer:
    def __init__(self, max_size):
        self.buffer = []
        self.max_size = max_size
    def add(self, sample):
        if len(self.buffer) >= self.max_size:
            self.buffer.pop(0)
        self.buffer.append(sample)
    def get_batch(self, batch_size):
        return random.sample(self.buffer, min(len(self.buffer), batch_size))
    def __len__(self):
        return len(self.buffer)

## 2. CIFAR-10 CNN 모델 (BatchNorm, Dropout, 깊은 구조로 개선)

In [60]:
class ImprovedCIFARCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(ImprovedCIFARCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25)
        )
        self.classifier = nn.Sequential(
            nn.Linear(256 * 4 * 4, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x
    def freeze_features(self):
        for param in self.features.parameters():
            param.requires_grad = False
    def unfreeze_features(self):
        for param in self.features.parameters():
            param.requires_grad = True

## 3. CIFAR-10 학습

In [61]:
transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
cifar_train_dataset = datasets.CIFAR10('./data', train=True, download=True, transform=transform)
cifar_test_dataset = datasets.CIFAR10('./data', train=False, transform=transform)
cifar_train_loader = torch.utils.data.DataLoader(cifar_train_dataset, batch_size=128, shuffle=True)
cifar_test_loader = torch.utils.data.DataLoader(cifar_test_dataset, batch_size=1000, shuffle=False)
cifar_classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

Files already downloaded and verified


In [62]:
def train_epoch(model, train_loader, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()
        total += target.size(0)
    return total_loss / len(train_loader), 100. * correct / total
def test(model, test_loader, device):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [63]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ImprovedCIFARCNN(num_classes=10).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.5)
print('Initial training on CIFAR-10...')
for epoch in range(10):
    train_loss, train_acc = train_epoch(model, cifar_train_loader, optimizer, device)
    test_loss, test_acc = test(model, cifar_test_loader, device)
    scheduler.step()
    print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Acc: {test_acc:.2f}%')

Initial training on CIFAR-10...
Epoch 1: Train Loss: 1.7969, Train Acc: 32.60%, Test Acc: 46.12%
Epoch 2: Train Loss: 1.3603, Train Acc: 50.38%, Test Acc: 57.61%
Epoch 3: Train Loss: 1.1473, Train Acc: 59.50%, Test Acc: 62.46%
Epoch 4: Train Loss: 1.0208, Train Acc: 64.38%, Test Acc: 65.31%
Epoch 5: Train Loss: 0.9276, Train Acc: 67.78%, Test Acc: 70.71%
Epoch 6: Train Loss: 0.8485, Train Acc: 70.74%, Test Acc: 74.89%
Epoch 7: Train Loss: 0.7979, Train Acc: 72.84%, Test Acc: 75.86%
Epoch 8: Train Loss: 0.6912, Train Acc: 76.71%, Test Acc: 80.31%
Epoch 9: Train Loss: 0.6481, Train Acc: 78.26%, Test Acc: 80.30%
Epoch 10: Train Loss: 0.6263, Train Acc: 79.20%, Test Acc: 81.81%


# 4.classifier output 확장 (10→12) 및 가중치 이전

In [64]:
def expand_classifier(model, num_new_classes=2):
    old_linear = model.classifier[-1]
    in_features = old_linear.in_features
    out_features = old_linear.out_features
    new_linear = nn.Linear(in_features, out_features + num_new_classes)
    with torch.no_grad():
        new_linear.weight[:out_features] = old_linear.weight
        new_linear.bias[:out_features] = old_linear.bias
    model.classifier[-1] = new_linear
    return model

model = expand_classifier(model, num_new_classes=2)

# 5. SVHN 7, 8 데이터 로딩 및 점진적 학습

In [65]:
svhn_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
svhn_train_dataset = datasets.SVHN('./data', split='train', download=True, transform=svhn_transform)
svhn_test_dataset = datasets.SVHN('./data', split='test', transform=svhn_transform)
target_digits = [7, 8]
svhn_train_indices = [i for i, label in enumerate(svhn_train_dataset.labels) if label in target_digits]
svhn_test_indices = [i for i, label in enumerate(svhn_test_dataset.labels) if label in target_digits]
svhn_train_subset = torch.utils.data.Subset(svhn_train_dataset, svhn_train_indices)
svhn_test_subset = torch.utils.data.Subset(svhn_test_dataset, svhn_test_indices)
svhn_train_loader = torch.utils.data.DataLoader(svhn_train_subset, batch_size=128, shuffle=True)
svhn_test_loader = torch.utils.data.DataLoader(svhn_test_subset, batch_size=1000, shuffle=False)
print(f'Selected {len(svhn_train_subset)} training samples and {len(svhn_test_subset)} test samples for digits {target_digits}')

def prepare_svhn_batch(batch):
    data, labels = batch
    # 7→10, 8→11로 레이블 변환
    target = torch.where(labels == 7, torch.tensor(10), labels)
    target = torch.where(target == 8, torch.tensor(11), target)
    return data, target

def incremental_update(model, svhn_loader, optimizer, device, epochs=5):
    model.train()
    model.freeze_features()
    for epoch in range(epochs):
        total_loss = 0
        correct = 0
        total = 0
        for batch in svhn_loader:
            data, target = prepare_svhn_batch(batch)
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.cross_entropy(output, target)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
            total += target.size(0)
        print(f'Epoch {epoch+1}: Loss: {total_loss/len(svhn_loader):.4f}, Accuracy: {100.*correct/total:.2f}%')
    model.unfreeze_features()

svhn_optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)
print('Performing incremental learning for SVHN digits 7, 8...')
incremental_update(model, svhn_train_loader, svhn_optimizer, device, epochs=5)

Using downloaded and verified file: ./data\train_32x32.mat
Selected 10640 training samples and 3679 test samples for digits [7, 8]
Performing incremental learning for SVHN digits 7, 8...
Epoch 1: Loss: 0.6651, Accuracy: 72.81%
Epoch 2: Loss: 0.2789, Accuracy: 88.67%
Epoch 3: Loss: 0.2244, Accuracy: 91.17%
Epoch 4: Loss: 0.1914, Accuracy: 92.44%
Epoch 5: Loss: 0.1799, Accuracy: 93.07%


# 6. CIFAR-10, SVHN(7,8) 평가

In [66]:
def test_svhn(model, test_loader, device):
    model.eval()
    test_loss = 0
    correct_7 = 0
    correct_8 = 0
    total_7 = 0
    total_8 = 0
    with torch.no_grad():
        for data, labels in test_loader:
            data = data.to(device)
            # 7→10, 8→11
            target = torch.where(labels == 7, torch.tensor(10), labels)
            target = torch.where(target == 8, torch.tensor(11), target)
            target = target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.argmax(dim=1)
            correct_7 += ((labels == 7) & (pred == 10)).sum().item()
            correct_8 += ((labels == 8) & (pred == 11)).sum().item()
            total_7 += (labels == 7).sum().item()
            total_8 += (labels == 8).sum().item()
    acc_7 = 100. * correct_7 / total_7 if total_7 > 0 else 0
    acc_8 = 100. * correct_8 / total_8 if total_8 > 0 else 0
    test_loss /= (total_7 + total_8)
    return test_loss, acc_7, acc_8

print('Evaluating on CIFAR-10...')
cifar_loss, cifar_acc = test(model, cifar_test_loader, device)
print(f'CIFAR-10 Test Accuracy: {cifar_acc:.2f}%')
print('Evaluating on SVHN digits 7, 8...')
svhn_loss, acc_7, acc_8 = test_svhn(model, svhn_test_loader, device)
print(f'SVHN Digit 7 Test Accuracy: {acc_7:.2f}%')
print(f'SVHN Digit 8 Test Accuracy: {acc_8:.2f}%')

Evaluating on CIFAR-10...
CIFAR-10 Test Accuracy: 0.00%
Evaluating on SVHN digits 7, 8...
SVHN Digit 7 Test Accuracy: 95.64%
SVHN Digit 8 Test Accuracy: 96.87%


# 7. catastrophic forgetting 방지

In [72]:
# 1. CIFAR-10에서 각 클래스별로 일부 샘플을 뽑아 replay buffer 생성
def build_replay_buffer(dataset, n_per_class=200):
    # 각 클래스별로 n_per_class개씩 샘플링
    class_indices = {i: [] for i in range(10)}
    for idx, (_, label) in enumerate(dataset):
        if isinstance(label, torch.Tensor):
            label = label.item()
        if len(class_indices[label]) < n_per_class:
            class_indices[label].append(idx)
        if all(len(lst) == n_per_class for lst in class_indices.values()):
            break
    indices = [idx for lst in class_indices.values() for idx in lst]
    return torch.utils.data.Subset(dataset, indices)

cifar_replay_subset = build_replay_buffer(cifar_train_dataset, n_per_class=100)
cifar_replay_loader = torch.utils.data.DataLoader(cifar_replay_subset, batch_size=128, shuffle=True)

In [73]:
def incremental_update_with_replay(model, svhn_loader, cifar_replay_loader, optimizer, device, epochs=10):
    model.train()
    model.freeze_features()
    cifar_iter = iter(cifar_replay_loader)
    for epoch in range(epochs):
        total_loss = 0
        correct = 0
        total = 0
        for svhn_batch in svhn_loader:
            # SVHN batch
            data, target = prepare_svhn_batch(svhn_batch)
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.cross_entropy(output, target)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
            total += target.size(0)
            # CIFAR replay batch (1:1 비율)
            try:
                cifar_batch = next(cifar_iter)
            except StopIteration:
                cifar_iter = iter(cifar_replay_loader)
                cifar_batch = next(cifar_iter)
            data, target = cifar_batch
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.cross_entropy(output, target)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
            total += target.size(0)
        print(f'Epoch {epoch+1}: Loss: {total_loss/(2*len(svhn_loader)):.4f}, Accuracy: {100.*correct/total:.2f}%')
    model.unfreeze_features()

In [74]:
svhn_optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)
print('Performing incremental learning for SVHN digits 7, 8...')
incremental_update_with_replay(model, svhn_train_loader, cifar_replay_loader, svhn_optimizer, device, epochs=5)

Performing incremental learning for SVHN digits 7, 8...
Epoch 1: Loss: 0.5971, Accuracy: 80.47%
Epoch 2: Loss: 0.4817, Accuracy: 83.78%
Epoch 3: Loss: 0.4281, Accuracy: 85.26%
Epoch 4: Loss: 0.3953, Accuracy: 86.22%
Epoch 5: Loss: 0.3749, Accuracy: 86.71%


In [75]:
def test_svhn(model, test_loader, device):
    model.eval()
    test_loss = 0
    correct_7 = 0
    correct_8 = 0
    total_7 = 0
    total_8 = 0
    with torch.no_grad():
        for data, labels in test_loader:
            data = data.to(device)
            # 7→10, 8→11
            target = torch.where(labels == 7, torch.tensor(10), labels)
            target = torch.where(target == 8, torch.tensor(11), target)
            target = target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.argmax(dim=1)
            correct_7 += ((labels == 7) & (pred == 10)).sum().item()
            correct_8 += ((labels == 8) & (pred == 11)).sum().item()
            total_7 += (labels == 7).sum().item()
            total_8 += (labels == 8).sum().item()
    acc_7 = 100. * correct_7 / total_7 if total_7 > 0 else 0
    acc_8 = 100. * correct_8 / total_8 if total_8 > 0 else 0
    test_loss /= (total_7 + total_8)
    return test_loss, acc_7, acc_8

print('Evaluating on CIFAR-10...')
cifar_loss, cifar_acc = test(model, cifar_test_loader, device)
print(f'CIFAR-10 Test Accuracy: {cifar_acc:.2f}%')
print('Evaluating on SVHN digits 7, 8...')
svhn_loss, acc_7, acc_8 = test_svhn(model, svhn_test_loader, device)
print(f'SVHN Digit 7 Test Accuracy: {acc_7:.2f}%')
print(f'SVHN Digit 8 Test Accuracy: {acc_8:.2f}%')

Evaluating on CIFAR-10...
CIFAR-10 Test Accuracy: 74.61%
Evaluating on SVHN digits 7, 8...
SVHN Digit 7 Test Accuracy: 88.56%
SVHN Digit 8 Test Accuracy: 75.06%
