# Classify/annotate data (ie draw bounding box)!

__After defination of some common functions (for ex sampling database), We can use two ways to classify image, One is semi automatic and another is full automatic! Select your required option while running code :)__

__Define variables__

In [1]:
DATASET_PATH = "dataset-raw"
DATASET_SAMPLING = "dataset-sampling"
HEIGHT_CV2_IMAGE_RESIZE = 720

#### Some Important Functions

_Choice from user_

In [2]:
def get_choice(n):
    # Get choice from user
    try:
        val = int(input("Choose an option [0-%d]: "%(n-1, )))
        if val < 0 or val > n:
            raise ValueError
        return val
    except ValueError:
        print("Invalid option, Choose again ")
        return get_choice(n)
    
def choose(options):
    for i in range(len(options)):
        print("[%d] %s"%(i, options[i]))
    index = get_choice(len(options))
    return index

_Get random image from our dataset_

In [3]:
import os, random
def datasetImage():
    root = DATASET_PATH
    folders = random.sample(os.listdir(root), 1)
    for folder in folders:
        b = os.path.join(root, folder)
        f = random.choice(os.listdir(b))
        ff = os.path.join(b, f)
        print(ff)
        return ff

_Skip execution of cell_

In [4]:
class SkipExecution(Exception):
    def _render_traceback_(self):
        print("this cell has been skipped")

_Image transformations and Common operations (opencv and numpy)_

In [5]:
import cv2
import numpy as np

# get grayscale image
def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# noise removal
def remove_noise(image):
    return cv2.medianBlur(image,5)
 
#thresholding
def thresholding(image):
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

#dilation
def dilate(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.dilate(image, kernel, iterations = 1)
    
#erosion
def erode(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.erode(image, kernel, iterations = 1)

#opening - erosion followed by dilation
def opening(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

#canny edge detection
def canny(image):
    return cv2.Canny(image, 100, 200)

#skew correction
def deskew(image):
    coords = np.column_stack(np.where(image > 0))
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated

#template matching
def match_template(image, template):
    return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)

def cv_imread(file_path):
    # Load image https://stackoverflow.com/a/54491104/4555317
    cv_img = cv2.imdecode(np.fromfile(file_path, dtype=np.uint8), 1)
    return cv_img

def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):
    # stackoverflow copied resize cv2 image
    dim = None
    (h, w) = image.shape[:2]

    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))

    return cv2.resize(image, dim, interpolation=inter)

def quickShow(wt, img):
    resize = ResizeWithAspectRatio(img, height=HEIGHT_CV2_IMAGE_RESIZE)
    cv2.imshow(wt, resize)

---

_Input method to classify images_

In [6]:
classify_method = choose(["Semi Automatic (Click on white areas of speech bubble)", "Automatic (Using tensorflow text detection and inpainting)"])

[0] Semi Automatic (Click on white areas of speech bubble)
[1] Automatic (Using tensorflow text detection and inpainting)


Choose an option [0-1]:  0


### Manual (Semi Automatic)

_Semi automatic image detection, here you just click on each bubbles in manga image and it automatically tries to detect valid rectangle box around it, note that this might not work all the time and the specific instructions are below!_

_Store output data here_

In [7]:
if(classify_method != 0): raise SkipExecution
SEGMENTS_RECTANGLE = []

_Some more functions! (to calculate max rectangle and apply firedraw algorithm)_

In [8]:
if(classify_method != 0): raise SkipExecution
def get_biggest_cont(img, out):
    edges = cv2.Canny(img,100,200)
    contours, hierarchy = cv2.findContours(edges,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
    contour = max(contours, key = len)
    contourImg = cv2.drawContours(out, [contour], -1, 255, 3) 
    return contourImg

def findMaxRect(mask, drawimg):
    contours, hierarchy = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
    max_x, max_y, min_x, min_y = -1, -1, 10000, 10000
    for c in contours:
        x,y,w,h = cv2.boundingRect(c)
        if(x == 0 or y == 0): continue
        max_x = max(x+w, max_x)
        max_y = max(y+h, max_y)
        min_x = min(x, min_x)
        min_y = min(y, min_y)

    mask = cv2.rectangle(drawimg, (min_x, min_y), (max_x, max_y), 127, 5)
    # return (min_x, min_y, max_x - min_x, max_y - min_y)
    return (min_x, min_y, max_x, max_y)

def firedraw(img, pixel):
    x, y = pixel
    (h, w) = img.shape[:2]
    mask = np.zeros((h+2,w+2),np.uint8)
    floodflags = 4
    floodflags |= cv2.FLOODFILL_MASK_ONLY
    floodflags |= (255 << 8)
    num,im,mask,rect = cv2.floodFill(img, mask, (x, y), 127, (100,)*3, (100,)*3, floodflags)
    return mask

def reconstructFromRect():
    global pimg, SEGMENTS_RECTANGLE
    img = pimg.copy()
    for r in SEGMENTS_RECTANGLE:
        img = cv2.rectangle(img, (r[0], r[1]), (r[2], r[3]), 127, 5)
    return img

_Define click events_

In [9]:
if(classify_method != 0): raise SkipExecution
def click_event(event, x, y, flags, params):
    global img
    if event == cv2.EVENT_LBUTTONDOWN:
        (h, w) = img.shape[:2]
        scale = h / HEIGHT_CV2_IMAGE_RESIZE
        x = int(x * scale)
        y = int(y * scale)
        
        dilated_img = erode(img)
        mask = firedraw(dilated_img, (x, y))
        boundary = np.zeros((h+2,w+2),np.uint8)
        contour = get_biggest_cont(mask,boundary)

        rect = findMaxRect(mask, img)
        quickShow('image', img)
        SEGMENTS_RECTANGLE.append(rect)
    elif event == cv2.EVENT_RBUTTONDOWN:
        SEGMENTS_RECTANGLE.pop()
        img = reconstructFromRect()
        quickShow('image', img)

_Save as Pascal VOC XML format_

In [10]:
if(classify_method != 0): raise SkipExecution
from pascal_voc_writer import Writer
import os, shutil
import hashlib

def save_annotations_data(img_path):
    global SEGMENTS_RECTANGLE, img
    rootAnnotation = os.path.join(DATASET_SAMPLING, "annotations")
    rootImgdir = os.path.join(DATASET_SAMPLING, "images")
    os.makedirs(rootAnnotation, exist_ok=True)
    os.makedirs(rootImgdir, exist_ok=True)

    hsh = hashlib.md5(img_path.encode('utf-8')).hexdigest()
    fn, fe = os.path.splitext(img_path)
    
    dest_fpath = os.path.join(rootImgdir, hsh+fe)
    shutil.copyfile(img_path, dest_fpath)
    
    (h, w) = img.shape[:2]
    writer = Writer(dest_fpath, w, h)
    for box in SEGMENTS_RECTANGLE:
        writer.addObject('bubble', box[0], box[1], box[2], box[3])
    writer.save(os.path.join(rootAnnotation, hsh+".xml"))
    
    SEGMENTS_RECTANGLE = []
    print("saved:", hsh)

_Motor code_

In [11]:
if(classify_method != 0): raise SkipExecution
def execute_semi_automatic():
    global img, pimg
    IMAGE_PATH = datasetImage()
    img = cv_imread(IMAGE_PATH)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    thres, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    pimg = img.copy()
    quickShow('image', img)
    
    cv2.setMouseCallback('image', click_event)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    save_annotations_data(IMAGE_PATH)

_classify as much image you want :o_

In [12]:
if(classify_method != 0): raise SkipExecution
NUM_SAMPLES = int(input("Enter number of samples you want to take: "))
for i in range(NUM_SAMPLES):
    execute_semi_automatic()

dataset-raw\Tanuki\chapter-15.5_5.jpg
saved: 8b94d1b3a275efc58d30e10160f52324
dataset-raw\Sijin\chapter-205.5_19.jpg
saved: 33a210c2a90a015cc9760daea5906840
dataset-raw\Elskar\chapter-76_11.jpg
saved: 21f6806a06ee75e85dddf7cfb2ab8d28
dataset-raw\How To Take Off A Wedding Dress\chapter-13_23.jpg
saved: 276a1ffce934464a20dd56204f1672e4
dataset-raw\The Devil's Temptation\chapter-44_24.jpg
saved: c6004326ac10d3ba8cbbf96ed833f2c7
dataset-raw\Yashio To Mikumo\chapter-7_29.jpg
saved: 4a0efc4dddb76dc291d96aa1cae40cdd
dataset-raw\Pretty Peasant Girl\chapter-70_10.jpg
saved: e6e289d4eaadd51bef2b5c777fadece0
dataset-raw\Senpai Ga Oyobidesu!\chapter-26_6.jpg
saved: c94b9de0010719ec262f008b23d1826c
dataset-raw\A Way To Protect The Lovable You\chapter-52_1.jpg
saved: ee3fe4ccd9369741177d0ffe2fe2058f
dataset-raw\Rebirth Of Legendary Doctor\chapter-133_5.jpg
saved: 9572fe6019cb4dc29cb02447f9cdef7c
dataset-raw\The Strongest Ever\chapter-61_51.jpg
saved: c78eab6b8e2cddfed80bbc50c4bcafce
dataset-raw\Hers

### Automatic (Full Automatic)

_Using tensorflow to detect text, inpaint it and use coordinates to effectively guess all the chatboxes, please note this might not be that accurate so you have been warned :) (plus its also slow)_

In [None]:
if(classify_method != 1): raise SkipExecution
print("Under development")

## What after this?
After classifying a good number of samples, you need to convert them to tf_record files (for training etc), thus using the pascal xml and image files you can generate tf_record! for further steps please check next notebook :)