### Matterport 패키지를 이용하여 Balloon 데이터 세트를 학습하고 이를 기반으로 Segmentation 적용
* Matterport 패키지의 학습 프로세스를 상세히 설명.

In [None]:
import os
import sys
import itertools
import math
import logging
import json
import re
import random
from collections import OrderedDict

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.lines as lines
from matplotlib.patches import Polygon
import cv2

%matplotlib inline

In [None]:
from mrcnn import utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log

#### 주요 수행 모듈인 balloon 모듈을 setup 하여 import하지 않고, 소스코드에서 바로 import 수행.
##### 이를 위해 PATH에 balloon.py 파일이 있는 디렉토리를 지정하고 import 를 적용

In [None]:
#Mask_RCNN 패키지의 samples/balloon 디렉토리의 balloon.py 를 import 한다. 
ROOT_DIR = os.path.abspath(".")
sys.path.append(os.path.join(ROOT_DIR, "Mask_RCNN/samples/balloon/"))

import balloon

#### balloon 모듈은 어떠한 API들로 구성되어 있는지 직접 소스코드에서 확인. 

In [None]:
!cat "./Mask_RCNN/samples/balloon/balloon.py"

#### balloon 데이터 세트가 제대로 되어 있는지 확인.  train과 val 서브 디렉토리가 ./Mask_RCNN/dataset/balloon 에 존재해야 함. 

In [None]:
import subprocess
from pathlib import Path

HOME_DIR = str(Path.home())
BALLOON_DATA_DIR = os.path.join(HOME_DIR, "DLCV/data/balloon")

#### balloon 모듈에 설정된 Config 셋업. GPU 갯수, Batch시 image갯수가 사전 설정 되어 있음. 

In [None]:
config = balloon.BalloonConfig()
config.display()

#### balloon 모듈에서 balloon 데이터 세트 로딩. 

In [None]:
# Dataset 로딩한다. . 

dataset = balloon.BalloonDataset()
dataset.load_balloon(BALLOON_DATA_DIR, "train")

# Must call before using the dataset
dataset.prepare()

print("Image Count: {}".format(len(dataset.image_ids)))
print("Class Count: {}".format(dataset.num_classes))
for i, info in enumerate(dataset.class_info):
    print("{:3}. {:50}".format(i, info['name']))

#### balloon 모듈에서 로딩한 balloon 데이터 세트의 세부 정보 확인. 

In [None]:
# dataset의 image_info는 리스트 객체이며 내부 원소로 이미지별 세부 정보를 딕셔너리로 가지고 있음. 
# dataset의 image_ids 는 이미지의 고유 id나 이름이 아니라 dataset에서 이미지의 상세 정보를 관리하기 위한 리스트 인덱스에 불과 

print('#### balloon 데이터 세트 이미지의 인덱스 ID들 ####')
print(dataset.image_ids)
print('\n ##### balloon 데이터 세트의 이미지 정보들 ####')
print(dataset.image_info)

#### polygon 정보 확인

In [None]:
image_28 = dataset.image_info[28]
polygons = image_28['polygons']
polygon_x = polygons[0]['all_points_x']
polygon_y = polygons[0]['all_points_y']
print(len(polygon_x))
print('polygon_x:', polygon_x, 'polygon_y:',polygon_y)

polygon_xy = [(x, y) for (x, y) in zip(polygon_x, polygon_y)]
print('polygon_xy:', polygon_xy)

In [None]:
image_28_array = cv2.imread(os.path.join(BALLOON_DATA_DIR,'train/'+image_28['id']))
for position in polygon_xy:
    cv2.circle(image_28_array, position, 3, (255, 0, 0), -1)

plt.figure(figsize=(8, 8))
plt.axis('off')    
plt.imshow(image_28_array)

In [None]:
np.random.seed(99)
# Load and display random samples
image_ids = np.random.choice(dataset.image_ids, 4)
print('image_ids:', image_ids)
for image_id in image_ids:
    image = dataset.load_image(image_id)
    # 지정된 image_id에 있는 mask 를 로딩하고 시각화를 위한 mask정보들과 대상 클래스 ID들을 추출
    mask, class_ids = dataset.load_mask(image_id)
    #원본 데이터와 여러개의 클래스들에 대해 Mask를 시각화 하되, 가장 top 클래스에 대해서는 클래스명까지 추출. 나머지는 배경
    visualize.display_top_masks(image, mask, class_ids, dataset.class_names)

In [None]:
image = dataset.load_image(28)
print(image.shape)
print(image_28['polygons'])

#### polygon 형태의 데이터를 boolean mask 형태로 변환

In [None]:
import skimage

img = np.zeros((10, 10), dtype=np.uint8)
r = np.array([1, 2, 8])
c = np.array([1, 7, 4])
print('img:', img)
# r과 c로 지정된 인덱스에 있는 img 값만 1로 설정함. 
rr, cc = skimage.draw.polygon(r, c)
img[rr, cc] = 1
print('row positions:',rr, 'column positions:',cc)
print('0, 1로 masking된 img:\n',img)
print('Boolean형태로 masking된 img:\n',img.astype(np.bool))

In [None]:
mask, class_ids = dataset.load_mask(28)
print("mask shape:", mask.shape, "class_ids:", class_ids)
print(mask)

In [None]:
image = dataset.load_image(28)
mask, class_ids = dataset.load_mask(28)
visualize.display_top_masks(image, mask, class_ids, dataset.class_names)

#### ballon 데이터 세트의 image정보, 클래스 정보, mask 정보의 추출과 변환을 위한 BallonDataset 생성

In [None]:
class BalloonDataset(utils.Dataset):

    def load_balloon(self, dataset_dir, subset):
        """Load a subset of the Balloon dataset.
        dataset_dir: Root directory of the dataset.
        subset: Subset to load: train or val
        """
        # 클래스 id와 클래스명 등록은 Dataset의 add_class()를 이용. 
        self.add_class("balloon", 1, "balloon")

        # train또는 val 용도의 Dataset 생성만 가능. 
        assert subset in ["train", "val"]
        dataset_dir = os.path.join(dataset_dir, subset)
        
        # json 형태의 annotation을 로드하고 파싱. 
        annotations = json.load(open(os.path.join(dataset_dir, "via_region_data.json")))
        annotations = list(annotations.values())  # don't need the dict keys
        
        annotations = [a for a in annotations if a['regions']]

        # Add images
        for a in annotations:
            # Get the x, y coordinaets of points of the polygons that make up
            # the outline of each object instance. These are stores in the
            # shape_attributes (see json format above)
            # The if condition is needed to support VIA versions 1.x and 2.x.
            if type(a['regions']) is dict:
                polygons = [r['shape_attributes'] for r in a['regions'].values()]
            else:
                polygons = [r['shape_attributes'] for r in a['regions']] 

            # load_mask() needs the image size to convert polygons to masks.
            # Unfortunately, VIA doesn't include it in JSON, so we must read
            # the image. This is only managable since the dataset is tiny.
            image_path = os.path.join(dataset_dir, a['filename'])
            image = skimage.io.imread(image_path)
            height, width = image.shape[:2]

            self.add_image(
                "balloon",
                image_id=a['filename'],  # use file name as a unique image id
                path=image_path,
                width=width, height=height,
                polygons=polygons)

    def load_mask(self, image_id):
        """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        # If not a balloon dataset image, delegate to parent class.
        image_info = self.image_info[image_id]
        if image_info["source"] != "balloon":
            return super(self.__class__, self).load_mask(image_id)

        # Convert polygons to a bitmap mask of shape
        # [height, width, instance_count]
        info = self.image_info[image_id]
        mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
                        dtype=np.uint8)
        for i, p in enumerate(info["polygons"]):
            # Get indexes of pixels inside the polygon and set them to 1
            rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
            mask[rr, cc, i] = 1

        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID only, we return an array of 1s
        return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32)
    
    '''def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == "balloon":
            return info["path"]
        else:
            super(self.__class__, self).image_reference(image_id)
    '''

In [None]:
annotations = json.load(open(os.path.join(BALLOON_DATA_DIR, "train/via_region_data.json")))
annotations

In [None]:
annotations = list(annotations.values())
annotations

### balloon 데이터 세트의 학습 수행. 

#### 학습과 Validation용 Dataset 설정.

In [None]:
import skimage

# Training dataset.
dataset_train = BalloonDataset()
dataset_train.load_balloon(BALLOON_DATA_DIR, "train")
dataset_train.prepare()

# Validation dataset
dataset_val = BalloonDataset()
dataset_val.load_balloon(BALLOON_DATA_DIR, "val")
dataset_val.prepare()

#### Config 설정

In [None]:
from mrcnn.config import Config

TRAIN_IMAGE_CNT = len(dataset_train.image_info)
VALID_IMAGE_CNT = len(dataset_val.image_info)

class BalloonConfig(Config):
    """Configuration for training on the toy  dataset.
    Derives from the base Config class and overrides some values.
    """
    # Give the configuration a recognizable name
    NAME = "balloon"

    # Number of classes (including background)
    NUM_CLASSES = 1 + 1  # Background + balloon

    # Skip detections with < 90% confidence
    DETECTION_MIN_CONFIDENCE = 0.9
    
    # We use a GPU with 12GB memory, which can fit two images.
    # Adjust down if you use a smaller GPU.
    IMAGES_PER_GPU = 1
    
    # 추가.
    GPU_COUNT = 1

    # 원본에서 수정.
    #STEPS_PER_EPOCH = TRAIN_IMAGE_CNT  // IMAGES_PER_GPU
    #VALIDATION_STEPS = VALID_IMAGE_CNT  // IMAGES_PER_GPU
    
    # 원본 STEPS_PER_EPOCH
    STEPS_PER_EPOCH = TRAIN_IMAGE_CNT  // IMAGES_PER_GPU
    VALIDATION_STEPS = VALID_IMAGE_CNT  // IMAGES_PER_GPU

    #BACKBONE = 'resnet101'
    
# config 설정. 
train_config = BalloonConfig()
train_config.display()

#### 기반 Mask RCNN Training 모델 생성 및 초기 weight값 로딩

In [None]:
import mrcnn.model as modellib
from mrcnn.model import log

balloon_model = modellib.MaskRCNN(mode="training", config=train_config, model_dir='./snapshots')

# COCO 데이터 세트로 pretrained 된 모델을 이용하여 초기 weight값 로딩. 
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "./pretrained/mask_rcnn_coco.h5")
balloon_model.load_weights(COCO_MODEL_PATH, by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc","mrcnn_bbox", "mrcnn_mask"])

#### 학습 수행

In [None]:
'''
데이터 세트가 작고,단 하나의 클래스임. 
pretrained 된 Coco 데이터 세트로 초기 weight 설정되었기에 RPN과 classifier만 학습해도 모델 성능은 큰 영향이 없을 거라 예상
all: All the layers
3+: Train Resnet stage 3 and up
4+: Train Resnet stage 4 and up
5+: Train Resnet stage 5 and up
'''
print("Training network heads")
balloon_model.train(dataset_train, dataset_val,
            learning_rate=train_config.LEARNING_RATE,
            epochs=30,
            layers='heads')

### 학습이 완료된 모델을 이용하여 inference 수행. 
#### config를 inference용으로 변경

In [None]:
class InferenceConfig(BalloonConfig):
    # NAME은 학습모델과 동일한 명을 부여
    NAME='balloon'
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
        
infer_config = InferenceConfig()
infer_config.display()

#### 학습된 모델의 weight 파일을 MaskRCNN의 inference 모델로 로딩. 

In [None]:
model = modellib.MaskRCNN(mode="inference", model_dir='./snapshots', config=infer_config)
# callback에 의해 model weights 가 파일로 생성되며, 가장 마지막에 생성된 weights 가 가장 적은 loss를 가지는 것으로 가정. 
weights_path = model.find_last()
print('model path:', weights_path)
# 지정된 weight 파일명으로 모델에 로딩. 
model.load_weights(weights_path, by_name=True)

#### Instance Segmentation을 수행할 파일들을 dataset로 로딩. val 디렉토리에 있는 파일들을 로딩. 

In [None]:
# Inference를 위해 val Dataset 재로딩. 
dataset_val = BalloonDataset()
dataset_val.load_balloon(BALLOON_DATA_DIR, "val")
dataset_val.prepare()

print("Images: {}\nClasses: {}".format(len(dataset_val.image_ids), dataset_val.class_names))

In [None]:
from mrcnn import model as modellib

# dataset중에 임의의 파일을 한개 선택. 
#image_id = np.random.choice(dataset.image_ids)
image_id = 5
image, image_meta, gt_class_id, gt_bbox, gt_mask=modellib.load_image_gt(dataset_val, infer_config, image_id, use_mini_mask=False)
info = dataset_val.image_info[image_id]
print("image ID: {}.{} ({}) {}".format(info["source"], info["id"], image_id, 
                                       dataset_val.image_reference(image_id)))

# Run object detection
results = model.detect([image], verbose=1)

In [None]:
r = results[0]
visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], 
                            dataset_val.class_names, r['scores'], 
                            title="Predictions")

In [None]:
#Mask_RCNN 패키지의 samples/balloon 디렉토리의 balloon.py 를 import 한다. 
ROOT_DIR = os.path.abspath(".")
sys.path.append(os.path.join(ROOT_DIR, "Mask_RCNN/samples/balloon/"))

import balloon
from mrcnn.visualize import display_images

splash = balloon.color_splash(image, r['masks'])
display_images([splash], cols=1)

In [None]:
def color_splash(image, mask):
    """Apply color splash effect.
    image: RGB image [height, width, 3]
    mask: instance segmentation mask [height, width, instance count]
    Returns result image.
    """
    # Make a grayscale copy of the image. The grayscale copy still
    # has 3 RGB channels, though.
    gray = skimage.color.gray2rgb(skimage.color.rgb2gray(image)) * 255
    # Copy color pixels from the original color image where mask is set
    if mask.shape[-1] > 0:
        # We're treating all instances as one, so collapse the mask into one layer
        mask = (np.sum(mask, -1, keepdims=True) >= 1)
        splash = np.where(mask, image, gray).astype(np.uint8)
    else:
        splash = gray.astype(np.uint8)
    return splash

#### 각 변수 shape debug

In [None]:
print('image shape:',image.shape, 'r mask shape:',r['masks'].shape)
mask = (np.sum(r['masks'], -1, keepdims=True) >= 1)
print('sum mask shape:',mask.shape)

#### np.sum() 테스트

In [None]:
a = np.ones((10, 10, 3))
#print(a)
#print(np.sum(a))
print(np.sum(a, axis=-1).shape)
print(np.sum(a, -1, keepdims=True).shape)
print(np.sum(a, -1, keepdims=True) >=1 )

#### np.where() 테스트

In [None]:
test_mask = (np.sum(a, -1, keepdims=True) >=1)
print(test_mask.shape)
for i in range(5):
    for j in range(5):
        test_mask[i, j, 0] = False
        
test_image = np.ones((10, 10, 3))
test_gray = np.zeros((10, 10, 3))
np.where(test_mask, test_image, test_gray)

#### Video에 color splash를 적용. 

In [None]:
from IPython.display import clear_output, Image, display, Video, HTML
Video('../../data/video/balloon_dog02.mp4')

#### Video color splash를 적용한 함수를 생성하고 이를 이용해 video color splash 수행. 

In [None]:
import cv2
import time

def detect_video_color_splash(model, video_input_path=None, video_output_path=None):

    cap = cv2.VideoCapture(video_input_path)
    codec = cv2.VideoWriter_fourcc(*'XVID')
    fps = round(cap.get(cv2.CAP_PROP_FPS))
    vid_writer = cv2.VideoWriter(video_output_path, codec, fps, (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
                                                                 round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print("총 Frame 개수: {0:}".format(total))

    frame_index = 0
    success = True
    while True:
        
        hasFrame, image_frame = cap.read()
        if not hasFrame:
            print('End of frame')
            break
        frame_index += 1
        print("frame index:{0:}".format(frame_index), end=" ")
        
        # OpenCV returns images as BGR, convert to RGB
        image_frame = image_frame[..., ::-1]
        start=time.time()
        # Detect objects
        r = model.detect([image_frame], verbose=0)[0]
        print('detected time:', time.time()-start)
        # Color splash
        splash = color_splash(image_frame, r['masks'])
        # RGB -> BGR to save image to video
        splash = splash[..., ::-1]
        # Add image to video writer
        vid_writer.write(splash)
    
    vid_writer.release()
    cap.release()       
    
    print("Saved to ", video_output_path)
    
detect_video_color_splash(model, video_input_path='../../data/video/balloon_dog02.mp4', 
                          video_output_path='../../data/output/balloon_dog02_output.avi')


#### 생성된 Output 파일을 Object Storage에 저장한 뒤 확인

In [None]:
!gsutil cp ../../data/output/balloon_dog02_output.avi gs://my_bucket_dlcv/data/output/balloon_dog02_output.avi