# Method 2
## Advanced Foreground Extractor

In [1]:
import os
import cv2
import numpy as np
import tensorflow as tf
import torchvision
import torchvision.transforms as T
import skimage.io as io
import skimage.transform as transform
import matplotlib
import matplotlib.pyplot as plt
from PIL import Image

%matplotlib inline
matplotlib.use('Agg')

## Extract Foreground via Mask R-CNN and GrabCut

In [2]:
def read_image(image_path):
    img = Image.open(image_path).convert('RGB')
    return np.asarray(img)

In [3]:
def get_simple_image_transform():
    transforms = [T.ToTensor()]
    return T.Compose(transforms)

In [4]:
def create_grabcut_mask(image, grabcut_mask):
    bgd_model = np.zeros((1, 65), np.float64)
    fgd_model = np.zeros((1, 65), np.float64)
    grabcut_mask, _, _ = cv2.grabCut(
        image,
        grabcut_mask,
        None,
        bgd_model,
        fgd_model,
        5,
        cv2.GC_INIT_WITH_MASK
    )
    return np.where((grabcut_mask == 2) | (grabcut_mask == 0), 0, 1).astype(np.uint8)

In [5]:
class ForeGroundExtractor:
    # Mask R-CNN model
    def __init__(self, mrcnn_pre_process, mrcnn_confidence=0.8, grabcut_foreground_confidence=0.8, detect_object_label=1):
        self.mrcnn = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
        self.mrcnn.eval()
        self.mrcnn_confidence = mrcnn_confidence
        self.grabcut_foreground = grabcut_foreground_confidence
        self.trans = mrcnn_pre_process
        self.detect_object_label = detect_object_label
    
    # Object dection with bounding box as the onput of GrabCut
    def mrcnn_output2grabcut_input(self, output):
        boxes = output[0]['boxes'].detach().numpy()
        masks = output[0]['masks'].detach().numpy()
        labels = output[0]['labels'].detach().numpy()
        scores = output[0]['scores'].detach().numpy()
        boxes = boxes[(self.mrcnn_confidence < scores) & 
                      (labels == self.detect_object_label)].astype(np.uint64)
        masks = masks[(self.mrcnn_confidence < scores) & 
                      (labels == self.detect_object_label)]

        grab_mask = np.zeros(masks.shape[2:], np.uint8)
        for b in boxes:
            grab_mask[b[1]:b[3]:, b[0]:b[2]] = cv2.GC_PR_BGD
        for m in masks:
            grab_mask[self.grabcut_foreground < m[0]] = cv2.GC_FGD
        return grab_mask

    def detect_foreground(self, image):
        output = self.mrcnn([self.trans(Image.fromarray(image))])
        grabcut_input = self.mrcnn_output2grabcut_input(output)
        if not (grabcut_input == cv2.GC_FGD).any():
            return np.zeros(image.shape[:2]).astype(np.uint8)
        return create_grabcut_mask(image, grabcut_input)

## Then, the same as Method 1

In [6]:
def foregroundAug(foreground):
    angle = np.random.randint(-10,10)*(np.pi/180.0) # Convert to radians
    zoom = np.random.random()*0.4 + 0.8 # Zoom in range [0.8,1.2)
    t_x = np.random.randint(0, int(foreground.shape[1]/3))
    t_y = np.random.randint(0, int(foreground.shape[0]/3))

    tform = transform.AffineTransform(scale=(zoom,zoom),
                                rotation=angle,
                                translation=(t_x, t_y))
    foreground = transform.warp(foreground, tform.inverse)

    # Random horizontal flip with 0.5 probability
    if(np.random.randint(0,100)>=50):
        foreground = foreground[:, ::-1]
        
    return foreground

In [7]:
def getForegroundMask(foreground):
    mask_new = foreground.copy()[:,:,0]
    mask_new[mask_new>0] = 1
    return mask_new

In [8]:
def compose(foreground, mask, background):
    background = transform.resize(background, foreground.shape[:2])

    background = background*(1 - mask.reshape(foreground.shape[0], foreground.shape[1], 1))

    composed_image = background + foreground    
  
    return composed_image

In [9]:
# COCO label
# Reference: 
# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/

fge = ForeGroundExtractor(get_simple_image_transform(), detect_object_label=17) # 17 is cat.

In [10]:
# Save the extracted foreground first
DIR = "data2/input/image/"
imagePaths = [os.path.join(DIR, f) for f in os.listdir(DIR)]
if DIR + '.DS_Store' in imagePaths:
    imagePaths.remove(DIR + '.DS_Store')

for i in range(len(imagePaths)):  
    print(imagePaths[i])
    src = read_image(imagePaths[i])
    Image.fromarray(src)
    
    foreground_mask = fge.detect_foreground(src)
    Image.fromarray(255 * foreground_mask)
    
    result = np.zeros(src.shape).astype(np.uint8)
    result[foreground_mask == 1] = src[foreground_mask == 1]
    
    plt.imsave("data2/input/foreground/{}.png".format(i), result)

data2/input/image/cat1.jpg


	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  /Users/distiller/project/conda/conda-bld/pytorch_1595629449223/work/torch/csrc/utils/python_arg_parser.cpp:766.)
  keep = keep.nonzero().squeeze(1)


data2/input/image/cat0.jpg
data2/input/image/cat2.jpg
data2/input/image/cat3.jpg


In [11]:
fore_DIR = "data2/input/foreground/"
forePaths = [os.path.join(fore_DIR, f) for f in os.listdir(fore_DIR)]
if fore_DIR + '.DS_Store' in forePaths:
    forePaths.remove(fore_DIR + '.DS_Store')

for i in range(len(forePaths)): 
    print(forePaths[i])
    foreground = Image.open(forePaths[i])
    foreground = foreground.convert('RGB')
    foreground = np.array(foreground)

    back_DIR = "data2/input/background/cloister/"
    backPaths = [os.path.join(back_DIR, f) for f in os.listdir(back_DIR)]
    if back_DIR + '.DS_Store' in backPaths:
        backPaths.remove(back_DIR + '.DS_Store')
    
    for j in range(len(backPaths)):  
        
        background = io.imread(backPaths[j],plugin='matplotlib')/255.0  
        background = np.array(background)
        
        if background.shape[2] != foreground.shape[2]:
            continue
            
        foreground_new = foregroundAug(foreground)
        mask_new = getForegroundMask(foreground_new)
        plt.imsave("data2/output/mask/{}_{}.png".format(i, j), mask_new)
        
        composed_image = compose(foreground_new, mask_new, background)
        
        composed_image = tf.clip_by_value(composed_image, 0.0, 1.0)
        
        plt.imsave("data2/output/image/{}_{}.png".format(i, j), 
                   composed_image.numpy())

data2/input/foreground/2.png
data2/input/foreground/3.png
data2/input/foreground/1.png
data2/input/foreground/0.png


#### Code Reference: 
https://github.com/stonzw/human_foreground_extractor     
https://github.com/virafpatrawala/Synthetic-Image-Datasets 
