<a href="https://colab.research.google.com/github/sooking87/CPP_Lecture/blob/master/Faster_R_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[강의 정리 링크](https://sooking87.github.io/gdsc%20ml/gdsc-ml-7/) <br>

+ 코드 리뷰까지 같이 정리하였습니다! (코랩 사용 -> 코랩에서 받아온 파일이라 쥬피터에서 다시 돌릴 일이 있다면 시간이 오래걸려서,,,)

In [1]:
import torch

if torch.cuda.is_available():    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla T4


In [25]:
# 데이터 불러오기
!git clone https://github.com/Pseudo-Lab/Tutorial-Book-Utils
!python Tutorial-Book-Utils/PL_data_loader.py --data FaceMaskDetection
!unzip -q Face\ Mask\ Detection.zip

Cloning into 'Tutorial-Book-Utils'...
remote: Enumerating objects: 36, done.[K
remote: Counting objects: 100% (36/36), done.[K
remote: Compressing objects: 100% (30/30), done.[K
remote: Total 36 (delta 12), reused 17 (delta 5), pack-reused 0[K
Unpacking objects: 100% (36/36), done.
Downloading...
From: https://drive.google.com/uc?id=1pJtohTc9NGNRzHj5IsySR39JIRPfkgD3
To: /content/Tutorial-Book-Utils/Face Mask Detection.zip
100% 417M/417M [00:02<00:00, 208MB/s]
Face Mask Detection.zip download complete!


In [26]:
# 데이터셋 분리 -> 임의로 170장을 추출하고 test폴더에 옮긴다. 
import os
import random
import numpy as np
import shutil

print(len(os.listdir('annotations')))
print(len(os.listdir('images')))

!mkdir test_images
!mkdir test_annotations


random.seed(1234)
idx = random.sample(range(853), 170)

for img in np.array(sorted(os.listdir('images')))[idx]:
    shutil.move('images/'+img, 'test_images/'+img)

for annot in np.array(sorted(os.listdir('annotations')))[idx]:
    shutil.move('annotations/'+annot, 'test_annotations/'+annot)

print(len(os.listdir('annotations')))
print(len(os.listdir('images')))
print(len(os.listdir('test_annotations')))
print(len(os.listdir('test_images')))

853
853
683
683
170
170


In [4]:
# 모델링
import os
import numpy as np
import matplotlib.patches as patches
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
from PIL import Image
import torchvision
from torchvision import transforms, datasets, models
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import time

In [20]:
# bounding box, category를 위한 함수
def generate_box(obj):
  xmin = float(obj.find('xmin').text)
  ymin = float(obj.find('ymin').text)
  xmax = float(obj.find('xmax').text)
  ymax = float(obj.find('ymax').text)

  return [xmin, ymin, xmax, ymax]

adjust_label = 1

def generate_label(obj):
  if obj.find('name').text == 'with_mask':
    return 1 + adjust_label
  elif obj.find('name').text == 'mask_weared_incorrect':
    return 2 + adjust_label
  return 0 + adjust_label

def generate_target(file):
  with open(file) as f:
    data = f.read()
    soup = BeautifulSoup(data, "html.parser")
    objects = soup.find_all("object")

    num_objs = len(objects)

    boxes = []
    labels = []
    for i in objects:
      boxes.append(generate_box(i))
      labels.append(generate_label(i))

    boxes = torch.as_tensor(boxes, dtype=torch.float32)
    labels = torch.as_tensor(labels, dtype=torch.int64)

    target = {}
    target['boxes'] = boxes
    target['labels'] = labels

    return target

  def plot_image_from_output(img, annotation):
    
    img = img.cpu().permute(1,2,0)
    
    fig,ax = plt.subplots(1)
    ax.imshow(img)
    
    for idx in range(len(annotation["boxes"])):
        xmin, ymin, xmax, ymax = annotation["boxes"][idx]

        if annotation['labels'][idx] == 1 :
            rect = patches.Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),linewidth=1,edgecolor='r',facecolor='none')
        
        elif annotation['labels'][idx] == 2 :
            
            rect = patches.Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),linewidth=1,edgecolor='g',facecolor='none')
            
        else :
        
            rect = patches.Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),linewidth=1,edgecolor='orange',facecolor='none')

        ax.add_patch(rect)

    plt.show()


In [6]:
# 데이터셋 클래스와 데이터 로더를 정의
class MaskDataset(object):
  def __init__(self, transforms, path):

    # transform module과 img path 경로를 정의
    self.transforms = transforms
    self.path = path
    self.imgs = list(sorted(os.listdir(self.path)))

  def __getitem__(self, idx):
    # load images ad masks
    file_image = self.imgs[idx]
    file_label = self.imgs[idx][:-3] + 'xml'
    img_path = os.path.join(self.path, file_image)

    if 'test' in self.path:
      label_path = os.path.join('test_annotations/', file_label)
    else:
      label_path = os.path.join('annotations/', file_label)

    img = Image.open(img_path).convert('RGB')

    # Generate Label
    target = generate_target(label_path)

    if self.transforms is not None:
      img = self.transforms(img)

    return img, target

  def __len__(self): 
    return len(self.imgs)
  
data_transform = transforms.Compose([  # transforms.Compose : list 내의 작업을 연달아 할 수 있게 호출하는 클래스
        transforms.ToTensor() # ToTensor : numpy 이미지에서 torch 이미지로 변경
    ])

def collate_fn(batch):
    return tuple(zip(*batch))

dataset = MaskDataset(data_transform, 'images/')
test_dataset = MaskDataset(data_transform, 'test_images/')

data_loader = torch.utils.data.DataLoader(dataset, batch_size=4, collate_fn=collate_fn)
test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=2, collate_fn=collate_fn)

`torchvision.models.detection` 에서는 Faster R-CNN API(`torchvision.models.detection.fasterrcnn_resnet50_fpn`)를 제공하고 있다

- pretrained = True/False
- num_classes: 원하는 클래스 개수 설정(background 클래스 포함한 개수를 설정)

In [7]:
# 모델 불러오기
def get_model_instance_segmentation(num_classes):
  
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model


In [8]:
# 전이 학습
# Face Mask Detection 데이터셋은 3개의 클래스로 이루어져 있다 + background 클래스까지 포함하여 num_classes를 4로 설정
model = get_model_instance_segmentation(4)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 
model.to(device)

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


  0%|          | 0.00/160M [00:00<?, ?B/s]

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [9]:
torch.cuda.is_available()

True

In [10]:
num_epochs = 10
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)

In [11]:
print('----------------------train start--------------------------')
for epoch in range(num_epochs):
    start = time.time()
    model.train()
    i = 0    
    epoch_loss = 0
    for imgs, annotations in data_loader:
        i += 1
        imgs = list(img.to(device) for img in imgs)
        annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
        loss_dict = model(imgs, annotations) 
        losses = sum(loss for loss in loss_dict.values())        

        optimizer.zero_grad()
        losses.backward()
        optimizer.step() 
        epoch_loss += losses
    print(f'epoch : {epoch+1}, Loss : {epoch_loss}, time : {time.time() - start}')

----------------------train start--------------------------
epoch : 1, Loss : 74.38842010498047, time : 258.89981985092163
epoch : 2, Loss : 49.511959075927734, time : 256.5860526561737
epoch : 3, Loss : 42.1615104675293, time : 256.39848041534424
epoch : 4, Loss : 36.24540328979492, time : 256.0167520046234
epoch : 5, Loss : 32.7506217956543, time : 256.156777381897
epoch : 6, Loss : 31.926315307617188, time : 256.31392216682434
epoch : 7, Loss : 30.4024600982666, time : 256.148469209671
epoch : 8, Loss : 30.105791091918945, time : 256.33737349510193
epoch : 9, Loss : 25.10750961303711, time : 256.2675304412842
epoch : 10, Loss : 23.202547073364258, time : 256.12270879745483


In [12]:
# 학습시킨 가중치 저장
torch.save(model.state_dict(),f'model_{num_epochs}.pt')
model.load_state_dict(torch.load(f'model_{num_epochs}.pt'))

<All keys matched successfully>

In [13]:
# 예측 결과 확인
# 점수는 해당 클래스의 신뢰도 값이 저장되는 threshold로 0.5 이상인 것만 추출하도록 만드는 함수
def make_prediction(model, img, threshold):
    model.eval()
    preds = model(img)
    
    for id in range(len(preds)) :
        idx_list = []

        for idx, score in enumerate(preds[id]['scores']) :
            if score > threshold : 
                idx_list.append(idx)

        preds[id]['boxes'] = preds[id]['boxes'][idx_list]
        preds[id]['labels'] = preds[id]['labels'][idx_list]
        preds[id]['scores'] = preds[id]['scores'][idx_list]

    return preds


In [14]:
with torch.no_grad(): 
    # 테스트셋 배치사이즈= 2
    for imgs, annotations in test_data_loader:
        imgs = list(img.to(device) for img in imgs)
        pred = make_prediction(model, imgs, 0.5)
        print(pred)
        break

[{'boxes': tensor([[214.7165,  58.0858, 253.9632, 101.4567],
        [118.4214,   2.4066, 133.6220,  18.2444]], device='cuda:0'), 'labels': tensor([2, 2], device='cuda:0'), 'scores': tensor([0.9721, 0.9348], device='cuda:0')}, {'boxes': tensor([[219.3615,  98.3562, 259.3139, 139.1600],
        [131.8506, 108.2015, 178.9853, 152.7033],
        [ 42.3120, 108.9083,  67.5691, 140.1301],
        [165.6281,  90.3293, 179.4834, 107.0881],
        [ 84.5444,  84.6873,  93.9905,  94.9758],
        [303.7904, 130.2131, 332.3069, 158.7099],
        [ 29.9453,  89.5567,  45.0015, 103.9522],
        [  3.4042, 103.7917,  21.8303, 127.4263],
        [257.7014,  91.2583, 268.0744, 100.6336],
        [ 85.1323, 109.2810,  92.1005, 117.7712]], device='cuda:0'), 'labels': tensor([2, 2, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'), 'scores': tensor([0.9965, 0.9932, 0.9898, 0.9868, 0.9804, 0.9759, 0.9586, 0.8320, 0.5674,
        0.5415], device='cuda:0')}]


In [22]:
# 예측된 결과에 바운딩 박스 그리기
# ,,,,?
_idx = 1
print("Target : ", annotations[_idx]['labels'])
plot_image_from_output(imgs[_idx], annotations[_idx])
print("Prediction : ", pred[_idx]['labels'])
plot_image_from_output(imgs[_idx], pred[_idx])

Target :  tensor([1, 1, 1, 2, 2, 1, 1, 1])


NameError: ignored

In [17]:
# 전체 시험 데이터에 대해서 예측 결과 평가 방법
# 먼저 모든 시험 데이터에 대한 예측 결과와 실제 label을 각각 preds_adj_all, annot_all에 담아줍니다.
from tqdm import tqdm

labels = []
preds_adj_all = []
annot_all = []

for im, annot in tqdm(test_data_loader, position = 0, leave = True):
    im = list(img.to(device) for img in im)
    #annot = [{k: v.to(device) for k, v in t.items()} for t in annot]

    for t in annot:
        labels += t['labels']

    with torch.no_grad():
        preds_adj = make_prediction(model, im, 0.5)
        preds_adj = [{k: v.to(torch.device('cpu')) for k, v in t.items()} for t in preds_adj]
        preds_adj_all.append(preds_adj)
        annot_all.append(annot)

100%|██████████| 85/85 [00:26<00:00,  3.23it/s]


In [18]:
# 그리고 Tutorial-Book-Utils 폴더 내에서 utils_ObjectDetection.py 파일을 통해서 mAP 값을 산출합니다.
# get_batch_statistics 함수를 통해 IoU 조건을 만족하는바운딩 박스간의 통곗값 계산
# 그 다음 ap_per_class 함수를 통해 각 클래스에 대한 AP값을 계산
%cd Tutorial-Book-Utils/
import utils_ObjectDetection as utils

sample_metrics = []
for batch_i in range(len(preds_adj_all)):
    sample_metrics += utils.get_batch_statistics(preds_adj_all[batch_i], annot_all[batch_i], iou_threshold=0.5) 

true_positives, pred_scores, pred_labels = [torch.cat(x, 0) for x in list(zip(*sample_metrics))]  # 배치가 전부 합쳐짐
precision, recall, AP, f1, ap_class = utils.ap_per_class(true_positives, pred_scores, pred_labels, torch.tensor(labels))
mAP = torch.mean(AP)
print(f'mAP : {mAP}')
print(f'AP : {AP}')

/content/Tutorial-Book-Utils
mAP : 0.7356448219190508
AP : tensor([0.8547, 0.9324, 0.4199], dtype=torch.float64)
