# 패션 데이터셋을 이용한 multi-class 분류 모델 개선
    * 이미지의 edge 정보를 사용하여 개선

## 1. ResNet 구조 정의

In [1]:
#러즈넷 아키텍처
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torchvision import transforms
import cv2
import numpy as np

# Residual Block 구조 정의
class BasicBlock(nn.Module):
    mul = 1
    def __init__(self, in_planes, out_planes, stride=1):
        super(BasicBlock, self).__init__()
        
        # stride를 통해 조정
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_planes)
        
        # stride = 1, padding = 1
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)
        
        # x를 그대로 더해주기 위함
        self.shortcut = nn.Sequential()
        
        # 만약 size가 안맞아 합연산이 불가하다면, 연산 가능하도록 모양을 맞춰줌
        if stride != 1: 
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_planes)
            )
    
    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += self.shortcut(x) # 필요에 따라 레이어 Skip
        out = F.relu(out)
        return out
    
class BottleNeck(nn.Module):
    mul = 4
    def __init__(self, in_planes, out_planes, stride=1):
        super(BottleNeck, self).__init__()
        
        #첫 Convolution은 너비와 높이 downsampling
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
        self.bn1 = nn.BatchNorm2d(out_planes)
        
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)
        
        self.conv3 = nn.Conv2d(out_planes, out_planes*self.mul, kernel_size=1, stride=1, bias=False) # 채널 4배로 늘림
        self.bn3 = nn.BatchNorm2d(out_planes*self.mul)
        
        self.shortcut = nn.Sequential()
        
        if stride != 1 or in_planes != out_planes*self.mul:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, out_planes*self.mul, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_planes*self.mul)
            )
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = F.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)
        out += self.shortcut(x)
        out = F.relu(out)
        return out           

    
    
    
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=5): #num_blocks = [3,4,6,3], block = bottleneck
        super(ResNet, self).__init__()
        #RGB 3채널에서 64개의 Kernel 사용
        self.in_planes = 64
        
        # Resnet 논문 구조 그대로 구현  +) 입력 채널 4채널로 조정
        self.conv1 = nn.Conv2d(4, self.in_planes, kernel_size=7, stride=2, padding = 3)
        self.bn1 = nn.BatchNorm2d(self.in_planes)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.layer1 = self.make_layer(block, 64, num_blocks[0], stride=1) #3블럭
        self.layer2 = self.make_layer(block, 128, num_blocks[1], stride=2) #4블럭
        self.layer3 = self.make_layer(block, 256, num_blocks[2], stride=2) #6블럭
        self.layer4 = self.make_layer(block, 512, num_blocks[3], stride=2) #3블럭
        
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.linear = nn.Linear(512 * block.mul, num_classes)
        
    def make_layer(self, block, out_planes, num_blocks, stride):
        # layer 앞부분에서만 크기를 절반으로 줄이므로, 아래와 같은 구조
        strides = [stride] + [1] * (num_blocks-1)
        layers = []
        for i in range(num_blocks):
            layers.append(block(self.in_planes, out_planes, strides[i]))
            self.in_planes = block.mul * out_planes
        return nn.Sequential(*layers)
    
    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)
        out = self.maxpool1(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = torch.flatten(out,1)
        out = self.linear(out) #완전연결계층
        return out
    

def ResNet50():
    return ResNet(BottleNeck, [3, 4, 6, 3]) #블럭 수 설정

## 2. DataLoader 및 모델 개선 방안 정의

In [2]:
import os
from PIL import Image
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torchvision import transforms


class CustomImageDataset(Dataset):
    def read_data_set(self):

        all_img_files = []
        all_labels = []

        class_names = os.walk(self.data_set_path).__next__()[1]

        for index, class_name in enumerate(class_names):
            label = index
            img_dir = os.path.join(self.data_set_path, class_name)
            img_files = os.walk(img_dir).__next__()[2]

            for img_file in img_files:
                img_file = os.path.join(img_dir, img_file)
                img = Image.open(img_file)
                if img is not None:
                    all_img_files.append(img_file)
                    all_labels.append(label)

        return all_img_files, all_labels, len(all_img_files), len(class_names)

    def __init__(self, data_set_path, transforms=None):
        self.data_set_path = data_set_path
        self.image_files_path, self.labels, self.length, self.num_classes = self.read_data_set()
        self.transforms = transforms

    def __getitem__(self, index):
        image = Image.open(self.image_files_path[index])
        image = image.convert("RGB")
        
        image = np.array(image) #넘파이로 변환
        
        image_edge = cv2.Canny(image, 50, 200) #캐니 엣지 생성
        
        image=Image.fromarray(image) #다시 pil로 변환
        image_edge=Image.fromarray(image_edge) # 엣지 영상도 마찬가지로 다시 pil로 변환

        if self.transforms is not None:
            image = self.transforms(image) 
            image_edge = self.transforms(image_edge)
            
        result_image = torch.cat((image, image_edge), 0) #엣지+input depth-concat ---> 4채널

        return {'image': result_image, 'label': self.labels[index]}

    def __len__(self):
        return self.length

## 3. Train  

In [3]:
#main
epoch = 20 #에폭수 
batch_size = 64 #배치사이즈
learning_rate = 0.001 #학습률

transforms_train = transforms.Compose([transforms.Resize((128, 128),interpolation=Image.AFFINE), #보간법 설정
                                       transforms.RandomRotation(10.),
                                       transforms.ToTensor()])

transforms_test = transforms.Compose([transforms.Resize((128, 128),interpolation=Image.AFFINE),
                                      transforms.ToTensor()])

train_data_set = CustomImageDataset(data_set_path="./pre_data/train", transforms=transforms_train)
train_loader = DataLoader(train_data_set, batch_size=batch_size, shuffle=True)

test_data_set = CustomImageDataset(data_set_path="./pre_data/test", transforms=transforms_test)
test_loader = DataLoader(test_data_set, batch_size=batch_size, shuffle=True)

if not (train_data_set.num_classes == test_data_set.num_classes):
    print("error: Numbers of class in training set and test set are not equal")
    exit()

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

custom_model = ResNet50().to(device) #num_class는 5로 설정

# Loss 및 optimizer 설정
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(custom_model.parameters(), lr=learning_rate)

for e in range(epoch):
    for i_batch, item in enumerate(train_loader):
        images = item['image'].to(device)
        labels = item['label'].to(device)

        # Forward pass
        outputs = custom_model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i_batch + 1) % batch_size == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'
                  .format(e + 1, epoch, loss.item()))

Epoch [1/20], Loss: 0.4916
Epoch [1/20], Loss: 0.2240
Epoch [1/20], Loss: 0.1142
Epoch [1/20], Loss: 0.1027
Epoch [1/20], Loss: 0.1749
Epoch [1/20], Loss: 0.1976
Epoch [1/20], Loss: 0.2033
Epoch [1/20], Loss: 0.3326
Epoch [1/20], Loss: 0.0455
Epoch [1/20], Loss: 0.1519
Epoch [2/20], Loss: 0.0817
Epoch [2/20], Loss: 0.0934
Epoch [2/20], Loss: 0.1378
Epoch [2/20], Loss: 0.0713
Epoch [2/20], Loss: 0.1159
Epoch [2/20], Loss: 0.0182
Epoch [2/20], Loss: 0.0786
Epoch [2/20], Loss: 0.0275
Epoch [2/20], Loss: 0.1452
Epoch [2/20], Loss: 0.0778
Epoch [3/20], Loss: 0.0429
Epoch [3/20], Loss: 0.0144
Epoch [3/20], Loss: 0.1376
Epoch [3/20], Loss: 0.1495
Epoch [3/20], Loss: 0.0268
Epoch [3/20], Loss: 0.0506
Epoch [3/20], Loss: 0.0180
Epoch [3/20], Loss: 0.0418
Epoch [3/20], Loss: 0.0827
Epoch [3/20], Loss: 0.0357
Epoch [4/20], Loss: 0.0175
Epoch [4/20], Loss: 0.0050
Epoch [4/20], Loss: 0.0846
Epoch [4/20], Loss: 0.0248
Epoch [4/20], Loss: 0.1490
Epoch [4/20], Loss: 0.0123
Epoch [4/20], Loss: 0.0097
E

## 4. Test를 통한 정확도 측정(96.54%)

In [4]:
# Test 
custom_model.eval()  # 모델 평가모드
with torch.no_grad():
    correct = 0
    total = 0
    for item in test_loader:
        images = item['image'].to(device)
        labels = item['label'].to(device)
        outputs = custom_model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += len(labels)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the {} test images: {} %'.format(total, 100 * correct / total))

Test Accuracy of the model on the 405 test images: 96.54320987654322 %


## 5. 학습 완료된 모델 저장

In [6]:
torch.save(custom_model.state_dict(),'./model/proposed_model_e20_data_augmentation.pth')