# Bitmoji Parser

### Flow
**For each image**:
1. Find all of the green rectanges (HSV between (40,150,210) and (120,250,250))
2. Save each green rectangle – with an area greater than 1000 pixels, and a height and width each greater than 40 pixels – to a new image. These represent XXX category of images and are saved with the naming convention "\[original name\]-XXX_category-\[object number\].png"
3. Repeat for other colour rectangles too

In [11]:
from matplotlib import pyplot as plt
from glob import glob
import random as rng
import pandas as pd
import numpy as np
import cv2

img_fps = glob("../data/input/*")
img_fps

['../data/input/user106_341.jpg',
 '../data/input/user106_346.jpg',
 '../data/input/test1.jpg',
 '../data/input/user99_310.jpg']

In [12]:
import os

def find_bounding_boxes(hsv_img,
                        colour_dict):
    """
    
        Returns an array of bounding boxes
    """
    low,high = colour_dict['low_hsv'], colour_dict['high_hsv']
    mask = cv2.inRange(hsv_img, low, high)
    masked_img = cv2.bitwise_and(hsv_img, hsv_img, mask = mask)
    # Converting the image to grayscale helps with the findContours function
    masked_grey = cv2.cvtColor(masked_img, cv2.COLOR_BGR2GRAY)
    contours, hierarchy = cv2.findContours(masked_grey,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
    bounding_boxes = [cv2.boundingRect(c) for c in contours]
    return bounding_boxes


def write_bounding_boxes(img,
                         bounding_boxes,
                         img_prefix = "../data/output/[image_name]-[category]",
                         thresh = lambda x,y,w,h : h*w*1000 and w > 40 and h > 40):
    # filter out noise based on preset threshold
    bounding_boxes = [bb for bb in bounding_boxes if thresh(*bb)]
    for i,[x,y,w,h] in enumerate(bounding_boxes):
        section = img[y:y+h, x:x+w]
        cv2.imwrite(f"{img_prefix}-{i}.png", section[:,:,::-1])
    return bounding_boxes


def output_path(img_fp,colour_dict):
    direc = "../data/output"
    # Get rid of whatever is after the last period
    img_pref = os.path.basename(img_fp).split('.')[0]
    category = colour_dict['category']
    return f"{direc}/{img_pref}-{category}"

In [13]:
from utils import green

for path  in img_fps:
    output_prefix = output_path(path,green)
    bitmoji = cv2.imread(path, 1)
    bitmoji = cv2.cvtColor(bitmoji, cv2.COLOR_BGR2RGB)
    hsv_bitmoji = cv2.cvtColor(bitmoji, cv2.COLOR_RGB2HSV)
    bounding_boxes = find_bounding_boxes(hsv_bitmoji,green)
    bef = len(bounding_boxes)
    bounding_boxes = write_bounding_boxes(bitmoji,
                                          bounding_boxes,
                                          output_prefix)
    print(path,bef,'->',len(bounding_boxes))

../data/input/user106_341.jpg 35 -> 3
../data/input/user106_346.jpg 9 -> 7
../data/input/test1.jpg 216 -> 4
../data/input/user99_310.jpg 0 -> 0
