In [1]:
import os
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from collections import defaultdict


import torch
import torch.nn as nn
from torch.utils.data import DataLoader

import torchvision
from torchvision import transforms, models

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [2]:
Class_Name_To_Int = {'Bus':0, 'Truck':1}
IMAGE_SIZE = 448
NUM_CLASSES = 2
VERBOSE_FREQ = 200

In [3]:
data_dir = '/home/hts/A_project/hts_pytorch/data/DRIVING-DATASET/Detection/'
train_data_dir = os.path.join(data_dir, 'train/')
val_data_dir = os.path.join(data_dir, 'val/')

csv_data = pd.read_csv(data_dir + 'df.csv')
csv_data = csv_data.drop(columns=['Source', 'Confidence','IsOccluded','IsTruncated','IsGroupOf','IsDepiction','IsInside','XClick1X','XClick2X','XClick3X','XClick4X','XClick1Y','XClick2Y','XClick3Y','XClick4Y'])
csv_data = csv_data[['ImageID', 'LabelName', 'XMin', 'YMin', 'XMax', 'YMax']]
train_data_list = os.listdir(train_data_dir)
val_data_list = os.listdir(val_data_dir)


In [4]:
print(csv_data)

                ImageID LabelName      XMin      YMin      XMax      YMax
0      0000599864fd15b3       Bus  0.343750  0.156162  0.908750  0.650047
1      00006bdb1eb5cd74     Truck  0.276667  0.141604  0.697500  0.437343
2      00006bdb1eb5cd74     Truck  0.702500  0.204261  0.999167  0.409774
3      00010bf498b64bab       Bus  0.156250  0.269188  0.371250  0.705228
4      00013f14dd4e168f       Bus  0.287500  0.194184  0.999375  0.999062
...                 ...       ...       ...       ...       ...       ...
24057  fff2b15ad6007d0e     Truck  0.277344  0.226389  0.622656  0.859722
24058  fff376d20410e4c9       Bus  0.295625  0.306667  0.558750  0.397500
24059  fff376d20410e4c9       Bus  0.348125  0.423333  0.701250  0.744167
24060  fffde5953a818927       Bus  0.277500  0.565000  0.605625  0.795833
24061  fffde5953a818927       Bus  0.613125  0.623333  0.828750  0.795833

[24062 rows x 6 columns]


In [5]:
print(f'train data의 개수 : {len(train_data_list)}')
print(f'  val data의 개수 : {len(val_data_list)}')

train data의 개수 : 13703
  val data의 개수 : 1522


In [6]:
class car_data_set():
    def __init__(self, data_dir, phase, csv_data, transformer = None):
        self.csv_data = csv_data
        self.phase_data_dir = (data_dir + phase + '/')
        self.data_list = os.listdir(self.phase_data_dir)
        self.transformer = transformer

    def __len__(self):
        return len(self.data_list)
    
    def get_label_def(self, image_name, img_H, img_W):
        label = self.csv_data.loc[(self.csv_data['ImageID'] == image_name.split(".")[0])]
        target_name = [Class_Name_To_Int[i] for i in label['LabelName'].values]
        bounding_box = label.drop(columns = ['ImageID', 'LabelName']).values

        bounding_box[:, [0,2]] *= img_W
        bounding_box[:, [1,3]] *= img_H

        return target_name, bounding_box
    

    def __getitem__(self, index):
        image_name = self.data_list[index]
        
        image = cv2.imread(self.phase_data_dir + image_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        img_H, img_W,_ = image.shape

        if self.transformer:
            image = self.transformer(image)
            _, img_H, img_W = image.shape


        target_name, bounding_box = self.get_label_def(image_name,img_H, img_W)

        target = {}
        target['boxes'] = torch.Tensor(bounding_box).float()
        target['labels'] = torch.Tensor(target_name).long()
        
        
        return image, target

In [7]:
transformer = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(size=(IMAGE_SIZE, IMAGE_SIZE) ,antialias=True),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [8]:
def collate_fn(batch):
    image_list = []
    target_list = []
    for a,b in batch:
        image_list.append(a)
        target_list.append(b)
    
    return image_list, target_list

In [9]:
def build_dataloader(data_dir, train_batch_size = 4, val_batch_size = 4, image_size = 448):
    dataloaders = {}

    train_dataset = car_data_set(csv_data=csv_data,data_dir=data_dir, phase='train', transformer=transformer)
    val_dataset = car_data_set(csv_data=csv_data,data_dir=data_dir, phase='val', transformer=transformer)
    
    dataloaders['train'] = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, collate_fn=collate_fn)
    dataloaders['val'] = DataLoader(val_dataset, batch_size=val_batch_size, shuffle=False, collate_fn=collate_fn)    

    return dataloaders

In [10]:

dloaders = build_dataloader(data_dir, train_batch_size=2,val_batch_size=2, image_size=448)

for phase in ["train", "val"]:
    for index, batch in enumerate(dloaders[phase]):
        images = batch[0]
        targets = batch[1]
        print(targets)
        if index == 0:
            break

[{'boxes': tensor([[147.5600,  97.0668, 361.2000, 296.4268]]), 'labels': tensor([1])}, {'boxes': tensor([[205.5200, 282.3780, 267.9600, 359.5935],
        [162.9600, 273.7983, 199.3600, 324.9026]]), 'labels': tensor([1, 1])}]
[{'boxes': tensor([[ 18.8124, 114.7816, 415.6248, 368.9410]]), 'labels': tensor([1])}, {'boxes': tensor([[124.0400, 200.2775, 353.6400, 362.7667]]), 'labels': tensor([1])}]


# 모델

In [11]:
def build_model(num_classes):
    #모델을 가져오고
    model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    #fasterrcnn모델은 roi해서 각 클래스 개수, 개수*4의 바운딩박스를 가지는
    #그중에 box_predictor를 수정해야한다 근데 cls_score의 in부분만 바꾸면 bounding박스가바뀐다.
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model
    

In [12]:
model = build_model(num_classes=NUM_CLASSES)



In [13]:
def train_one_epoch(dataloaders, model, optimizer, device):
    train_loss = defaultdict(float)
    val_loss = defaultdict(float)
    
    model.train() #faster-rcnn은 eval하니깐 오류생김
    
    for phase in ["train", "val"]:

        for index, batch in enumerate(dataloaders[phase]):
            images = batch[0]
            targets = batch[1]
    
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            with torch.set_grad_enabled(phase == "train"):
                loss = model(images, targets)
            total_loss = sum(each_loss for each_loss in loss.values())
            
            if phase == "train":
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                if (index > 0) and (index % VERBOSE_FREQ) == 0:
                    text = f"{index}/{len(dataloaders[phase])} - "
                    for k, v in loss.items():
                        text += f"{k}: {v.item():.4f}  "
                    print(text)

                for k, v in loss.items():
                    train_loss[k] += v.item()
                train_loss["total_loss"] += total_loss.item()
                
            else:
                for k, v in loss.items():
                    val_loss[k] += v.item()
                val_loss["total_loss"] += total_loss.item()
                
    for k in train_loss.keys():
        train_loss[k] /= len(dataloaders["train"])
        val_loss[k] /= len(dataloaders["val"])
    return train_loss, val_loss

In [14]:
data_dir = data_dir
is_cuda = True

NUM_CLASSES = 2
IMAGE_SIZE = 448
BATCH_SIZE = 30
VERBOSE_FREQ = 200
DEVICE = torch.device('cuda:0' if torch.cuda.is_available and is_cuda else 'cpu')

dataloaders = build_dataloader(data_dir=data_dir, train_batch_size=BATCH_SIZE, val_batch_size=BATCH_SIZE, image_size=IMAGE_SIZE)
model = build_model(num_classes=NUM_CLASSES)
model = model.to(DEVICE)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001, momentum=0.9)

In [15]:
num_epochs = 30

train_losses = []
val_losses = []
os.makedirs('./trained_model/', exist_ok=True)

for epoch in range(num_epochs):
    train_loss, val_loss = train_one_epoch(dataloaders=dataloaders, model=model, optimizer=optimizer, device=DEVICE)
    train_losses.append(train_loss)
    val_losses.append(val_loss)

    print(f"epoch : {epoch+1}/{num_epochs} - Train Loss : {train_loss['total_loss']:.4f}, Val Loss : {val_loss['total_loss']:.4f}")

    if(epoch +1) % 10 == 0:
        torch.save(model, './trained_model')

200/457 - loss_classifier: 0.0389  loss_box_reg: 0.0354  loss_objectness: 0.0242  loss_rpn_box_reg: 0.0056  


KeyboardInterrupt: 