# import module

In [None]:
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import skimage.feature
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
%matplotlib inline

#from subprocess import check_output
#print(check_output(["ls", "../input"]).decode("utf8"))

# read files

In [None]:
file_names = os.listdir("../input/Train/")
file_names = sorted(file_names, key=lambda 
                    item: (int(item.partition('.')[0]) if item[0].isdigit() else float('inf'), item))

# select a subset of files to run on
file_names = file_names[0:1]

# generate sub_image_template

In [None]:
Sub_Im_Size = (416,416)

image_tmp = cv2.imread("../input/TrainDotted/" + file_names[0])
image_tmp = image_tmp[:Sub_Im_Size[1],:Sub_Im_Size[0],:]
image_tmp = cv2.absdiff(image_tmp,image_tmp)

plt.imshow(cv2.cvtColor(image_tmp, cv2.COLOR_BGR2RGB))
cv2.imwrite('sub_im_template.png',image_tmp)

# parse image

In [None]:
def get_blobs(filename):
    # read the Train and Train Dotted images
    image_1 = cv2.imread("../input/TrainDotted/" + filename)
    image_2 = cv2.imread("../input/Train/" + filename)
    
    # absolute difference between Train and Train Dotted
    image_3 = cv2.absdiff(image_1,image_2)
    
    # mask out blackened regions from Train Dotted
    mask_1 = cv2.cvtColor(image_1, cv2.COLOR_BGR2GRAY)
    mask_1[mask_1 < 20] = 0
    mask_1[mask_1 > 0] = 255
    
    mask_2 = cv2.cvtColor(image_2, cv2.COLOR_BGR2GRAY)
    mask_2[mask_2 < 20] = 0
    mask_2[mask_2 > 0] = 255
    
    image_3 = cv2.bitwise_or(image_3, image_3, mask=mask_1)
    image_3 = cv2.bitwise_or(image_3, image_3, mask=mask_2) 
    
    # convert to grayscale to be accepted by skimage.feature.blob_log
    image_3 = cv2.cvtColor(image_3, cv2.COLOR_BGR2GRAY)
    
    # detect blobs
    blobs = skimage.feature.blob_log(image_3, min_sigma=3, max_sigma=4, num_sigma=1, threshold=0.02)
    
    return blobs

In [None]:
def get_xy_range_basic(size):
    ### x_left, x_right, y_up, y_down
    return (size,size,size,size)

In [None]:
def parse_image(filename):
    ### get original image
    ori_image = cv2.imread("../input/Train/" + filename)
    
    ### get coordinate of all sea lions
    Dict_range = {}
    blobs = get_blobs(filename)
    
    for blob in blobs:
        # get the coordinates for each blob
        y, x, s = blob
        
        xy_range = get_xy_range_basic(size=16)
        Dict_range[(x,y)] = xy_range
    
    ### output sub_image and annotation file for each blob
    for key in Dict_range.keys():
        if(key in Dict_range):
            # get x, y, xy_range in original image
            main_x = key[0]
            main_y = key[1]
            xy_range = Dict_range[key]
            
            ### get basic sub_image
            sub_image = cv2.imread('sub_im_template.png')            
            sub_x_center = sub_image.shape[1]/2
            sub_y_center = sub_image.shape[0]/2
            sub_image[sub_y_center-xy_range[2]:sub_y_center+xy_range[3], sub_x_center-xy_range[0]:sub_x_center+xy_range[1], :] = ori_image[main_y-xy_range[2]:main_y+xy_range[3], main_x-xy_range[0]:main_x+xy_range[1], :]
            del Dict_range[key]
            
            ### include other sea lion
            x_min = main_x - sub_image.shape[1]/2 + 1
            x_max = main_x + sub_image.shape[1]/2 - 1
            y_min = main_y - sub_image.shape[0]/2 + 1
            y_max = main_y + sub_image.shape[0]/2 - 1
            
            for ex_key in Dict_range.keys():
                if(ex_key[0] > x_min and ex_key[0] < x_max and ex_key[1] > y_min and ex_key[1] < y_max):
                    ### coordinate of ex_sea_lion in origin image
                    ex_range = Dict_range[ex_key]
                    ex_left  = ex_key[0] - ex_range[0]
                    ex_right = ex_key[0] + ex_range[1]
                    ex_up    = ex_key[1] - ex_range[2]
                    ex_down  = ex_key[1] + ex_range[3]
                    if(ex_left > x_min and ex_right < x_max and ex_up > y_min and ex_down < y_max):
                        ### sub_image's coordinate where ex_sea_lion put  
                        in_up    = sub_y_center - main_y + ex_key[1] - ex_range[2]
                        in_down  = sub_y_center - main_y + ex_key[1] + ex_range[3]
                        in_left  = sub_x_center - main_x + ex_key[0] - ex_range[0]
                        in_right = sub_x_center - main_x + ex_key[0] + ex_range[1]
                        sub_image[ in_up:in_down, in_left:in_right, :] = ori_image[ex_up:ex_down, ex_left:ex_right, :]
            
            plt.imshow(cv2.cvtColor(sub_image, cv2.COLOR_BGR2RGB))
            break
                        #cv2.imwrite('sub_im_template.png',image_tmp)
            

In [None]:
parse_image(file_names[0])