In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import models
from tqdm import tqdm


# Device configuration
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
BATCH_SIZE = 128
EPOCHS = 10
LEARNING_RATE = 0.001
MOMENTUM = 0.9

DEVICE

device(type='cuda', index=0)

In [14]:
# 데이터셋 불러오기
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# transform = transforms.Compose([
#     transforms.RandomResizedCrop(224),
#     transforms.RandomHorizontalFlip(),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406],
#                          std=[0.229, 0.224, 0.225]),
# ])

train_dataset = torchvision.datasets.CIFAR100(
    root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

test_dataset = torchvision.datasets.CIFAR100(
    root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

Files already downloaded and verified
Files already downloaded and verified


In [15]:
# Step 1: 랜덤으로 초기화된 네트워크 A를 정의하고 가중치를 고정.
class RandomInitResNet(nn.Module):
    def __init__(self, base_model):
        super(RandomInitResNet, self).__init__()
        self.base_model = base_model
        self.base_model.fc = nn.Identity()  # 최종 레이어를 Identity로 설정

        for param in self.base_model.parameters():
            param.requires_grad = False  # 가중지를 고정

    def forward(self, x):
        x = self.base_model(x)
        return x


# ResNet18 모델을 불러와서 랜덤으로 초기화된 네트워크 A를 정의
base_model = models.resnet18(pretrained=False)
random_init_model_A = RandomInitResNet(base_model)

In [16]:
# Step 2: 특징 추출기와 분류기로 나누어 선형 분류기를 학습
class LinearEvalModel(nn.Module):
    def __init__(self, feature_extractor, num_classes=100):
        super(LinearEvalModel, self).__init__()
        self.feature_extractor = feature_extractor
        # Assuming the output of feature extractor is 512
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        with torch.no_grad():
            x = self.feature_extractor(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


# random_init_model_A를 사용하여 선형 분류기를 정의
linear_eval_model_A = LinearEvalModel(random_init_model_A)

In [17]:
# 선형 분류기 학습
def train_linear_layer(model, train_loader, criterion, optimizer, epochs=EPOCHS):
    model.train()
    for epoch in tqdm(range(epochs)):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}')


# 모델 평가
def evaluate(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy


# 손실 함수와 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(linear_eval_model_A.fc.parameters(),
                      lr=LEARNING_RATE, momentum=MOMENTUM)


# Step 1: 랜덤으로 정의된 A model에 대해 선형 분류기를 학습하고 평가
linear_eval_model_A = linear_eval_model_A.to(DEVICE)
train_linear_layer(linear_eval_model_A, train_loader,
                   criterion, optimizer, EPOCHS)
accuracy_A = evaluate(linear_eval_model_A, test_loader)
print(f'Step 1: Accuracy of linear classifier on random '
      f'initialized A: {accuracy_A:.2f}%')

 10%|█         | 1/10 [00:14<02:07, 14.19s/it]

Epoch [1/10], Loss: 4.5882


 20%|██        | 2/10 [00:27<01:51, 13.89s/it]

Epoch [2/10], Loss: 4.3387


 30%|███       | 3/10 [00:41<01:36, 13.82s/it]

Epoch [3/10], Loss: 4.1977


 40%|████      | 4/10 [00:55<01:22, 13.80s/it]

Epoch [4/10], Loss: 4.0996


 50%|█████     | 5/10 [01:09<01:08, 13.75s/it]

Epoch [5/10], Loss: 4.0272


 60%|██████    | 6/10 [01:22<00:54, 13.75s/it]

Epoch [6/10], Loss: 3.9690


 70%|███████   | 7/10 [01:36<00:41, 13.67s/it]

Epoch [7/10], Loss: 3.9244


 80%|████████  | 8/10 [01:50<00:27, 13.80s/it]

Epoch [8/10], Loss: 3.8839


 90%|█████████ | 9/10 [02:04<00:13, 13.87s/it]

Epoch [9/10], Loss: 3.8496


100%|██████████| 10/10 [02:18<00:00, 13.82s/it]

Epoch [10/10], Loss: 3.8201





Step 1: Accuracy of linear classifier on random initialized A: 11.75%


In [18]:
# Step 3: B 네트워크를 정의하고 A의 예측을 사용하여 학습
class NetworkB(nn.Module):
    def __init__(self, base_model, num_classes=100):
        super(NetworkB, self).__init__()
        self.base_model = base_model
        self.base_model.fc = nn.Identity()
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.base_model(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


# B 네트워크 정의
random_init_model_B = RandomInitResNet(
    models.resnet18(pretrained=False)).to(DEVICE)
linear_eval_model_B = NetworkB(random_init_model_B.base_model).to(DEVICE)

# B 네트워크의 Optimizer 설정
optimizer_B = optim.SGD(
    linear_eval_model_B.fc.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)


# Step 3: B 네트워크를 A의 예측을 사용하여 학습
def train_network_B(model_B, model_A, train_loader, criterion, optimizer, epochs=EPOCHS):
    model_B.train()
    model_A.eval()
    softmax = nn.Softmax(dim=1)
    for epoch in tqdm(range(epochs)):
        running_loss = 0.0
        for inputs, _ in train_loader:
            inputs = inputs.to(DEVICE)
            with torch.no_grad():
                # A의 output에서 softmax 확률을 가져와 소프트 타겟으로 사용
                targets = softmax(model_A(inputs))
            optimizer.zero_grad()
            outputs = model_B(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}')


# Train and evaluate B network
train_network_B(linear_eval_model_B, linear_eval_model_A,
                train_loader, criterion, optimizer_B, EPOCHS)
accuracy_B = evaluate(linear_eval_model_B, test_loader)
print(f'Step 3: Accuracy of linear classifier on B network: {accuracy_B:.2f}%')

 10%|█         | 1/10 [00:15<02:15, 15.07s/it]

Epoch [1/10], Loss: 4.6733


 20%|██        | 2/10 [00:31<02:06, 15.76s/it]

Epoch [2/10], Loss: 4.5592


 30%|███       | 3/10 [00:47<01:50, 15.78s/it]

Epoch [3/10], Loss: 4.5080


 40%|████      | 4/10 [01:02<01:34, 15.68s/it]

Epoch [4/10], Loss: 4.4767


 50%|█████     | 5/10 [01:17<01:17, 15.52s/it]

Epoch [5/10], Loss: 4.4543


 60%|██████    | 6/10 [01:33<01:01, 15.43s/it]

Epoch [6/10], Loss: 4.4378


 70%|███████   | 7/10 [01:47<00:45, 15.21s/it]

Epoch [7/10], Loss: 4.4245


 80%|████████  | 8/10 [02:02<00:30, 15.16s/it]

Epoch [8/10], Loss: 4.4148


 90%|█████████ | 9/10 [02:17<00:15, 15.10s/it]

Epoch [9/10], Loss: 4.4061


100%|██████████| 10/10 [02:33<00:00, 15.36s/it]

Epoch [10/10], Loss: 4.3987





Step 3: Accuracy of linear classifier on B network: 8.02%
