### Matterport 패키지를 이용하여 pretrained coco 모델을 로딩 후 단일 이미지와 영상 Segmentation 수행. 

In [None]:
import os
import sys
import random
import math
import numpy as np
import cv2

In [None]:
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize

#### Matterport로 pretrained된 coco weight 모델을 다운로드함(최초시)

In [None]:
from mrcnn import utils

ROOT_DIR = os.path.abspath('.')

# 최초에는 coco pretrained 모델을 다운로드함. 
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "./pretrained/mask_rcnn_coco.h5")

if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

In [None]:
!ls ~/DLCV/Segmentation/mask_rcnn/pretrained

#### MASK RCNN 모델을 위한 Config 설정

In [None]:
from mrcnn.config import Config

infer_config = Config()
infer_config.BATCH_SIZE=4
infer_config.display()

In [None]:
# Config 클래스를 상속받아서 사용
from mrcnn.config import Config

#환경 변수는 모두 대문자 
class InferenceConfig(Config):
    # inference시에는 batch size를 1로 설정. 그리고 IMAGES_PER_GPU도 1로 설정. 
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    # NAME은 반드시 주어야 한다. 
    NAME='coco_infer'
    NUM_CLASSES=81
    

infer_config = InferenceConfig()
infer_config.display()

#### COCO ID와 클래스명 매핑

In [None]:
# matterport는 0을 Background로, 1부터 80까지 coco dataset 클래스 id/클래스 명 매핑. 
labels_to_names = {0:'BG',1: 'person',2: 'bicycle',3: 'car',4: 'motorbike',5: 'aeroplane',6: 'bus',7: 'train',8: 'truck',9: 'boat',10: 'traffic light',
                   11: 'fire hydrant',12: 'stop sign',13: 'parking meter',14: 'bench',15: 'bird',16: 'cat',17: 'dog',18: 'horse',19: 'sheep',20: 'cow',
                   21: 'elephant',22: 'bear',23: 'zebra',24: 'giraffe',25: 'backpack',26: 'umbrella',27: 'handbag',28: 'tie',29: 'suitcase',30: 'frisbee',
                   31: 'skis',32: 'snowboard',33: 'sports ball',34: 'kite',35: 'baseball bat',36: 'baseball glove',37: 'skateboard',38: 'surfboard',39: 'tennis racket',40: 'bottle',
                   41: 'wine glass',42: 'cup',43: 'fork',44: 'knife',45: 'spoon',46: 'bowl',47: 'banana',48: 'apple',49: 'sandwich',50: 'orange',
                   51: 'broccoli',52: 'carrot',53: 'hot dog',54: 'pizza',55: 'donut',56: 'cake',57: 'chair',58: 'sofa',59: 'pottedplant',60: 'bed',
                   61: 'diningtable',62: 'toilet',63: 'tvmonitor',64: 'laptop',65: 'mouse',66: 'remote', 67: 'keyboard',68: 'cell phone',69: 'microwave',70: 'oven',
                   71: 'toaster',72: 'sink',73: 'refrigerator',74: 'book',75: 'clock',76: 'vase',77: 'scissors',78: 'teddy bear',79: 'hair drier',80: 'toothbrush' }

In [None]:
# MS-COCO 기반으로 Pretrained 된 모델을 로딩
import mrcnn.model as modellib

MODEL_DIR = os.path.join(ROOT_DIR,'snapshots') 
print(MODEL_DIR)
model = modellib.MaskRCNN(mode="inference",  model_dir=MODEL_DIR, config=infer_config)

model.load_weights(COCO_MODEL_PATH, by_name=True)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline 

beatles_img = cv2.imread('../../data/image/beatles01.jpg')
# matterport는 내부적으로 image처리를 위해 skimage를 이용하므로 BGR2RGB처리함. 
beatles_img_rgb = cv2.cvtColor(beatles_img, cv2.COLOR_BGR2RGB)
 
results = model.detect([beatles_img_rgb], verbose=1)

In [None]:
len(results), results[0]

In [None]:
# results[0]['masks']는 object 별로 mask가 전체 이미지에 대해서 layered된 image 배열을 가지고 있음.  
results[0]['rois'].shape, results[0]['scores'].shape, results[0]['class_ids'].shape, results[0]['masks'].shape

In [None]:
from mrcnn import visualize

r = results[0]
class_names = [value for value in labels_to_names.values()]
visualize.display_instances(beatles_img_rgb, r['rois'], r['masks'], r['class_ids'], 
                            class_names, r['scores'])

In [None]:
import time

wick_img = cv2.imread('../../data/image/john_wick01.jpg')
wick_img_rgb = cv2.cvtColor(wick_img, cv2.COLOR_BGR2RGB)

def get_segment_result(img_array_list, verbose):
    
    start_time = time.time()
    results = model.detect(img_array_list, verbose=1)
    
    if verbose==1:
        print('## inference time:{0:}'.format(time.time()-start_time))
    
    return results

r = get_segment_result([wick_img_rgb], verbose=1)[0]
visualize.display_instances(wick_img_rgb, r['rois'], r['masks'], r['class_ids'], 
                            class_names, r['scores'])

### Video에 Segmentation 적용 
* MaskRCNN 패키지는 visualize.display_instances() 함수내부에서 matplotlib를 이용하여 자체 시각화를 수행. 
* Video segment instance에 적용하기 위해서 bounding box와 instance masking을 적용하는 별도의 함수 생성. 

In [None]:
from mrcnn.visualize import *
import cv2

def get_segmented_image(img_array, boxes, masks, class_ids, class_names,
                      scores=None, show_mask=True, show_bbox=True, colors=None, captions=None):
   
    # Number of instances
    N = boxes.shape[0]
    if not N:
        print("\n*** No instances to display *** \n")
    else:
        assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
    

    # Generate random colors
    colors = colors or random_colors(N)

    # Show area outside image boundaries.
    height, width = img_array.shape[:2]

    masked_image = img_array.astype(np.uint32).copy()

    for i in range(N):
        color = np.array(colors[i])*255
        color = color.tolist()

        # Bounding box
        if not np.any(boxes[i]):
            # Skip this instance. Has no bbox. Likely lost in image cropping.
            continue
        y1, x1, y2, x2 = boxes[i]
        
        if show_bbox:
            cv2.rectangle(img_array, (x1, y1), (x2, y2), color, thickness=1 )

        # Label
        if not captions:
            class_id = class_ids[i]
            score = scores[i] if scores is not None else None
            label = class_names[class_id]
            caption = "{} {:.3f}".format(label, score) if score else label
        else:
            caption = captions[i]
            
        cv2.putText(img_array, caption, (x1, y1+8), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), thickness=1)
        
        # Mask
        # 클래스별 mask 정보를 추출 
        mask = masks[:, :, i]
        if show_mask:
            # visualize 모듈의 apply_mask()를 적용하여 masking 수행.
            img_array = apply_mask(img_array, mask, color)
            
            # mask에 contour 적용. 
            padded_mask = np.zeros(
                            (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
            padded_mask[1:-1, 1:-1] = mask
            contours = find_contours(padded_mask, 0.5)
            for verts in contours:
                # padding 제거. 아래에서 verts를 32bit integer로 변경해야 polylines()에서 오류 발생하지 않음. 
                verts = verts.astype(np.int32)
                #x, y 좌표 교체
                verts = np.fliplr(verts) - 1
                cv2.polylines(img_array, [verts], True, color, thickness=1)
    
    return img_array


####  단일 IMAGE에 적용

In [None]:
import time

wick_img = cv2.imread('../../data/image/john_wick01.jpg')
wick_img_rgb = cv2.cvtColor(wick_img, cv2.COLOR_BGR2RGB)

r = get_segment_result([wick_img_rgb], verbose=1)[0]
segmented_img = get_segmented_image(wick_img_rgb, r['rois'], r['masks'], r['class_ids'], 
                                    class_names, r['scores'])

plt.figure(figsize=(16, 16))
plt.imshow(segmented_img)

#### Video Segmentation 적용

In [None]:
import time

video_input_path = '../../data/video/John_Wick_small.mp4'
# video output 의 포맷은 avi 로 반드시 설정 필요. 
video_output_path = '../../data/output/John_Wick_small_matterport01.avi'

cap = cv2.VideoCapture(video_input_path)
codec = cv2.VideoWriter_fourcc(*'XVID')
fps = round(cap.get(cv2.CAP_PROP_FPS))

vid_writer = cv2.VideoWriter(video_output_path, codec, fps, (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print("총 Frame 개수: {0:}".format(total))

frame_index = 0
while True:
    
    hasFrame, image_frame = cap.read()
    if not hasFrame:
        print('End of frame')
        break
    
    frame_index += 1
    print("frame index:{0:}".format(frame_index), end=" ")
    r = get_segment_result([image_frame], verbose=1)[0]
    segmented_img = get_segmented_image(image_frame, r['rois'], r['masks'], r['class_ids'], 
                                    class_names, r['scores'])
    vid_writer.write(segmented_img)
    
vid_writer.release()
cap.release()       

In [None]:
!gsutil cp ../../data/output/John_Wick_small_matterport01.avi gs://my_bucket_dlcv/data/output/John_Wick_small_matterport01.avi

#### 다른 동영상에 적용. 

In [None]:
video_input_path = '../../data/video/London_Street.mp4'
# video output 의 포맷은 avi 로 반드시 설정 필요. 
video_output_path = '../../data/output/London_Street_matterport01.avi'

cap = cv2.VideoCapture(video_input_path)
codec = cv2.VideoWriter_fourcc(*'XVID')
fps = round(cap.get(cv2.CAP_PROP_FPS))
vid_writer = cv2.VideoWriter(video_output_path, codec, fps, (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print("총 Frame 개수: {0:}".format(total))

import time

frame_index = 0
while True:
    
    hasFrame, image_frame = cap.read()
    if not hasFrame:
        print('End of frame')
        break
    
    frame_index += 1
    print("frame index:{0:}".format(frame_index), end=" ")
    r = get_segment_result([image_frame], verbose=1)[0]
    segmented_img = get_segmented_image(image_frame, r['rois'], r['masks'], r['class_ids'], 
                                    class_names, r['scores'])
    vid_writer.write(segmented_img)
    
vid_writer.release()
cap.release()       


In [None]:
!gsutil cp ../../data/output/London_Street_matterport01.avi gs://my_bucket_dlcv/data/output/London_Street_matterport01.avi