'Data_tensor_transform.ipynb' 에서 작성했던 코드를 그대로 가져왔다.

In [2]:
import torch
import pandas as pd
import numpy as np
import os
import cv2

In [3]:
CLASS_NAME_TO_ID = {'Bus': 0, 'Truck': 1}
CLASS_ID_TO_NAME = {0: 'Bus', 1: 'Truck'}

In [4]:
class Detection_dataset():
    def __init__(self, data_dir, phase, transformer=None):
        self.data_dir = data_dir
        self.phase = phase
        self.data_df = pd.read_csv(os.path.join(data_dir, 'df.csv'))
        self.image_files = [fn for fn in os.listdir(os.path.join(self.data_dir, phase)) if fn.endswith("jpg")]
        self.transformer = transformer

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, index):
        # input : image
        # tagget : label (box, class_id)
        # get_image -> filiname(imageId) -> get_label
        filename, image = self.get_image(index)
        bboxes, class_ids = self.get_label(filename)
        img_H, img_W, _ = image.shape
        if self.transformer:
            # image shape: (H, W, C)
            # tensor hspa: (C, H, W)
            image = self.transformer(image)
            _, img_H, img_W = image.shape
        
        bboxes[:, [0,2]] *= img_W
        bboxes[:, [1,3]] *= img_H
        # bboxes = [[xmin, ymin, xmax, ymax]] -> [0,2]:xmin, xmax / [1,3]: ymin, ymax
        
        target = {}
        target["boxes"] = torch.Tensor(bboxes).float()
        target["labels"] = torch.Tensor(class_ids).long()
            
        return image, target, filename
    
    def get_image(self, index):
        filename = self.image_files[index]
        image_path = os.path.join(self.data_dir, self.phase, filename)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        return filename, image
    
    def get_label(self, filename):
        image_id = filename.split('.')[0]
        meta_data = self.data_df[self.data_df['ImageID'] == image_id]
        cate_names = meta_data["LabelName"].values
        class_ids = np.array([CLASS_NAME_TO_ID[cate_name] for cate_name in cate_names])
        bboxes = meta_data[["XMin", "XMax", "YMin", "YMax"]].values
        bboxes[:, [1,2]] = bboxes[:, [2,1]]
        return bboxes, class_ids

In [5]:
from torchvision import transforms
from torch.utils.data import DataLoader

In [6]:
def collate_fn(batch):
    image_list = []
    target_list = []
    filename_list = []

    for a, b, c in batch:
        image_list.append(a)
        target_list.append(b)
        filename_list.append(c)

    return image_list, target_list, filename_list

In [7]:
def build_dataloader(data_dir, batch_size=4, image_size=448):
    transformer = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize(size=(image_size, image_size)),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    dataloaders = {}
    train_dataset = Detection_dataset(data_dir=data_dir, phase="train", transformer=transformer)
    dataloaders["train"] = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

    val_dataset = Detection_dataset(data_dir=data_dir, phase="val", transformer=transformer)
    dataloaders["val"] = DataLoader(val_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)
    return dataloaders

data_dir = "./Detection/"
dloaders = build_dataloader(data_dir, batch_size=4, image_size=448)

for phase in ["train", "val"]:
    for index, batch in enumerate(dloaders[phase]):
        images = batch[0]
        targets = batch[1]
        filenames = batch[2]
        print(f"{phase} - {targets}")
        print({f"{phase} - {filenames}"})
        if index == 0:
            break

train - [{'boxes': tensor([[231.2800, 337.8910, 283.6400, 435.8122]]), 'labels': tensor([0])}, {'boxes': tensor([[  0.0000,   0.9654, 447.6501, 447.5171]]), 'labels': tensor([1])}, {'boxes': tensor([[ 13.1681,  24.6897, 442.3964, 403.0011]]), 'labels': tensor([0])}, {'boxes': tensor([[  0.0000,  70.2612, 101.3600, 350.9722],
        [  0.0000,  74.6108, 225.1200, 362.0136],
        [ 53.7600,  18.7363, 425.6000, 417.8881],
        [408.5200,  29.7777, 447.7200, 387.4412]]), 'labels': tensor([0, 0, 0, 0])}]
{"train - ['20e2a5892e1ca6c5.jpg', '61b97c0e773bc1a1.jpg', 'be8edb2f8035edfa.jpg', '3e7742c7485b014b.jpg']"}
val - [{'boxes': tensor([[261.5200, 151.9468, 431.7600, 338.9868],
        [  0.0000, 157.1732,  57.1200, 304.6400],
        [ 54.8800,   0.0000, 245.8400, 337.8668]]), 'labels': tensor([1, 1, 1])}]
{"val - ['eca6cd40621dfcc5.jpg']"}




# Faster RCNN 아키텍쳐 불러오기

In [8]:
from torchvision import models
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [9]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights

model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)

model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [10]:
in_features = model.roi_heads.box_predictor.cls_score.in_features
in_features

1024

In [11]:
num_classes = 2
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [12]:
model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [13]:
def build_model(num_classes):
    model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [14]:
NUM_CLASSES = 2
model = build_model(num_classes=NUM_CLASSES)
model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

# Detector 학습

In [15]:
phase = 'train'
model.train()

for index, batch in enumerate(dloaders[phase]):
    images = batch[0]
    targets = batch[1]
    filenames = batch[2]

    loss = model(images, targets)

    if index==0:
        break



In [16]:
loss

{'loss_classifier': tensor(0.7037, grad_fn=<NllLossBackward0>),
 'loss_box_reg': tensor(0.0456, grad_fn=<DivBackward0>),
 'loss_objectness': tensor(0.3787, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>),
 'loss_rpn_box_reg': tensor(0.0106, grad_fn=<DivBackward0>)}

In [17]:
from collections import defaultdict

m1 mac을 사용하기 때문에 'mps'를 사용.

In [20]:
data_dir = "./Detection/"
is_mps = True
# is_cuda = True

NUM_CLASSES = 2
IMAGE_SIZE = 448
BATCH_SIZE = 6
VERBOSE_FREQ = 200
# DEVICE = torch.device('mps' if torch.backends.mps.is_available() and is_mps else 'cpu')
DEVICE = 'cpu'
# DEVICE = torch.device('cuda' if torch.cuda.is_available and is_cuda else 'cpu')

## train_one_epoch
* train과 validation 모두 수행
* loss:
    * classifier loss
    * box regression loss
    * objectness loss
    * RPN box regression loss
* 최적화 및 모델 업데이트

In [22]:
def train_one_epoch(dataloaders, model, optimizer, device):
    train_loss = defaultdict(float)
    val_loss = defaultdict(float)

    model.train()

    for phase in ["train", "val"]:
        for index, batch in enumerate(dataloaders[phase]):
            images = batch[0]
            targets = batch[1]
            filenames = batch[2]

            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            with torch.set_grad_enabled(phase=="train"):
                loss = model(images, targets)
            
            total_loss = sum(each_loss for each_loss in loss.values())

            if phase == "train":
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                if (index > 0) and (index % VERBOSE_FREQ) == 0:
                    text = f"{index}/{len(dataloaders[phase])} - "
                    for k, v in loss.items():
                        test += f"{k}: {v.item():.4f}"
                    print(text)

                for k, v in loss.items():
                    train_loss[k] += v.item()
                train_loss['total_loss'] += total_loss
            
            else:
                for k, v in loss.items():
                    val_loss[k] += v.item()
                val_loss['total_loss'] += total_loss.item()

        for k in train_loss.keys():
            train_loss[k] /= len(dataloaders['train'])
            val_loss[k] /= len(dataloaders['val'])

        return train_loss, val_loss

In [23]:
device = "cpu"
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets

[{'boxes': tensor([[ 17.3600,  82.5068, 446.0400, 385.2800],
          [334.3200, 141.8668, 447.7200, 203.4668]]),
  'labels': tensor([1, 1])},
 {'boxes': tensor([[263.2000, 315.0000, 432.6932, 398.7200]]),
  'labels': tensor([1])},
 {'boxes': tensor([[ 11.8124,  37.3332, 435.7499, 405.4167]]),
  'labels': tensor([0])},
 {'boxes': tensor([[  0.0000, 180.3200,  73.9200, 393.4932],
          [  8.9600,  54.1332, 436.8000, 445.7600]]),
  'labels': tensor([0, 0])}]

In [24]:
dataloaders = build_dataloader(data_dir=data_dir, batch_size=BATCH_SIZE, image_size=IMAGE_SIZE)
model = build_model(num_classes=NUM_CLASSES)
model = model.to(DEVICE)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [25]:
def save_model(model_state, model_name, save_dir="./trained_model"):
    os.makedirs(save_dir, exist_ok=True)
    torch.save(model_state, os.path.join(save_dir, model_name))

In [26]:
num_epochs = 30

train_losses = []
val_losses = []

for epoch in range(num_epochs):
    train_loss, val_loss = train_one_epoch(dataloaders, model, optimizer, DEVICE)
    train_losses.append(train_loss)
    val_losses.append(val_loss)

    print(f"epoch:{epoch+1}/{num_epochs} - Train loss: {train_loss['total_loss']:.4f}, Val loss: {val_loss['total_loss']:.4f}")

    if (epoch+1) % 10 == 0:
        save_model(model.state_dict(), f'model_{epoch+1}.pth')



KeyboardInterrupt: 