In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
batch_size=32

# 데이터 로드
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# 모델 초기화 및 학습
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CBAMNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [10]:
# CBAM 모듈 구현
class ChannelAttention(nn.Module):
    def __init__(self, in_channels, reduction_ratio=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.mlp = nn.Sequential(
            nn.Linear(in_channels, in_channels // reduction_ratio),
            nn.ReLU(inplace=True),
            nn.Linear(in_channels // reduction_ratio, in_channels)
        )

    def forward(self, x):
        avg_out = self.mlp(self.avg_pool(x).view(x.size(0), -1))
        max_out = self.mlp(self.max_pool(x).view(x.size(0), -1))
        out = avg_out.unsqueeze(2).unsqueeze(3) + max_out.unsqueeze(2).unsqueeze(3)
        return out

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size, padding=kernel_size // 2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        out = torch.cat([avg_out, max_out], dim=1)
        out = self.conv(out)
        return self.sigmoid(out)

class CBAM(nn.Module):
    def __init__(self, in_channels):
        super(CBAM, self).__init__()
        self.channel_attention = ChannelAttention(in_channels)
        self.spatial_attention = SpatialAttention()

    def forward(self, x):
        out = self.channel_attention(x) * x
        out = self.spatial_attention(out) * out
        return out

# CNN 모델 구현
class CBAMNet(nn.Module):
    def __init__(self):
        super(CBAMNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.cbam1 = CBAM(16)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.cbam2 = CBAM(32)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool1(self.cbam1(self.conv1(x)))
        x = self.pool2(self.cbam2(self.conv2(x)))
        x = x.view(-1, 32 * 7 * 7)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [11]:
# 학습 진행
num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

[1,   100] loss: 1.128
[1,   200] loss: 0.651
[1,   300] loss: 0.539
[1,   400] loss: 0.508
[1,   500] loss: 0.446
[1,   600] loss: 0.452
[1,   700] loss: 0.414
[1,   800] loss: 0.385
[1,   900] loss: 0.399
[1,  1000] loss: 0.391
[1,  1100] loss: 0.366
[1,  1200] loss: 0.363
[1,  1300] loss: 0.346
[1,  1400] loss: 0.333
[1,  1500] loss: 0.349
[1,  1600] loss: 0.348
[1,  1700] loss: 0.341
[1,  1800] loss: 0.333
[2,   100] loss: 0.311
[2,   200] loss: 0.327
[2,   300] loss: 0.294
[2,   400] loss: 0.319
[2,   500] loss: 0.284
[2,   600] loss: 0.298
[2,   700] loss: 0.289
[2,   800] loss: 0.280
[2,   900] loss: 0.279
[2,  1000] loss: 0.288
[2,  1100] loss: 0.264
[2,  1200] loss: 0.265
[2,  1300] loss: 0.291
[2,  1400] loss: 0.281
[2,  1500] loss: 0.273
[2,  1600] loss: 0.276
[2,  1700] loss: 0.282
[2,  1800] loss: 0.267
[3,   100] loss: 0.227
[3,   200] loss: 0.227
[3,   300] loss: 0.241
[3,   400] loss: 0.233
[3,   500] loss: 0.267
[3,   600] loss: 0.245
[3,   700] loss: 0.239
[3,   800] 

In [12]:
# 모델 평가
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Test Accuracy: {100 * correct / total:.2f}%')

# 모델 구조 출력
print(model)

Test Accuracy: 90.90%
CBAMNet(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (cbam1): CBAM(
    (channel_attention): ChannelAttention(
      (avg_pool): AdaptiveAvgPool2d(output_size=1)
      (max_pool): AdaptiveMaxPool2d(output_size=1)
      (mlp): Sequential(
        (0): Linear(in_features=16, out_features=1, bias=True)
        (1): ReLU(inplace=True)
        (2): Linear(in_features=1, out_features=16, bias=True)
      )
    )
    (spatial_attention): SpatialAttention(
      (conv): Conv2d(2, 1, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)
      (sigmoid): Sigmoid()
    )
  )
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (cbam2): CBAM(
    (channel_attention): ChannelAttention(
      (avg_pool): AdaptiveAvgPool2d(output_size=1)
      (max_pool): AdaptiveMaxPool2d(output_size=1)
      (mlp): Sequential(
        (0): L