In [38]:
import cv2
import numpy as np
import math
import copy
import datetime
import matplotlib.pyplot as plt

In [39]:

def centroid(max_contour):
    """Finds the centroid of the contours
    Parameters
    ----------
    max_contour: ndarray
        An array object of the coordinates of the contours

    Returns
    -------
    cx : int
        the x-coordinate of the centroid
    cy: int
        the y-coordinate of the centroid
    """
    moment = cv2.moments(max_contour)
    if moment['m00'] != 0:
        cx = int(moment['m10'] / moment['m00'])
        cy = int(moment['m01'] / moment['m00'])
        return cx, cy
    else:
        return None

In [40]:
def removeBG(frame):
    """Removes the background from the frame
    
    Parameters
    ----------
    frame: ndarray
        An array object of the RGB value of the frame
    
    Returns
    -------
    res: ndarray
        An array object of the frame without the background
    """
    fgmask = bgModel.apply(frame,learningRate=learningRate)

    kernel = np.ones((3, 3), np.uint8)
    fgmask = cv2.erode(fgmask, kernel, iterations=1)
    res = cv2.bitwise_and(frame, frame, mask=fgmask)
    return res


def swipe_detection(centroid, points):
    """Detects whether there is a possible swipe motion
    
    Parameters
    ---------
    centroid: tuple(x,y)
        A tuple of the coordinate of the centroid
    points: list((x,y))
        A list of the coordinates of the points
        
    Returns
    -------
    Boolean:
        Whethere there is a possible swipe motion
    """
    if centroid is not None and len(points) >= 4:
        possible_swipe = -3 # give one point of buffer
        possible_c = -3
        for p in points:
            if p[0] < centroid[0]:
                possible_swipe += 1
            else:
                possible_c += 1
        return possible_swipe >= 0 or possible_c >= 0
    else:
        return False

def calculateFingers(res,drawing):  # -> finished bool, cnt: finger count
    
    #  convexity defect
    hull = cv2.convexHull(res, returnPoints=False)
    defects = cv2.convexityDefects(res, hull)
    cnt_centroid = centroid(res)
    far_points = []
    far_point = None
    if cnt_centroid is not None:
        cv2.circle(drawing, cnt_centroid, 8, [100,0,100], -1)
        point_temp = farthest_point(defects, res, cnt_centroid)
        if point_temp is not None:
            if point_temp[1] < cnt_centroid[1]:
                far_point = point_temp
        
    if len(hull) > 3:
        if type(defects) != type(None):  # avoid crashing.   (BUG not found)
            for i in range(defects.shape[0]):  # calculate the angle
                s, e, f, d = defects[i][0]
                start = tuple(res[s][0])
                end = tuple(res[e][0])
                far = tuple(res[f][0])
                a = math.sqrt((end[0] - start[0]) ** 2 + (end[1] - start[1]) ** 2)
                b = math.sqrt((far[0] - start[0]) ** 2 + (far[1] - start[1]) ** 2)
                c = math.sqrt((end[0] - far[0]) ** 2 + (end[1] - far[1]) ** 2)
                angle = math.acos((b ** 2 + c ** 2 - a ** 2) / (2 * b * c))  # cosine theorem
                if angle <= math.pi / 2:  # angle less than 90 degree, treat as fingers
                    far_points.append(far)
                    cv2.circle(drawing, far, 8, [211, 84, 0], -1)
    possible_swipe = swipe_detection(cnt_centroid,far_points)
    return far_points, far_point, cnt_centroid, possible_swipe



def farthest_point(defects, contour, centroid):
    """Determines the farthest_point from the centroid
    
    Parameters
    ---------
    defects: list
        A list of the convex defects
    contour: ndarray
        An array object of the coordinates of the contours    
    centroid: tuple(x,y)
        A tuple of the coordinate of the centroid
        
    Returns
    -------
    farthest_point:tuple(x,y)
        A tuple of the coordinate of the farthest point
    """
    if defects is not None and centroid is not None:
        s = defects[:, 0][:, 0]
        cx, cy = centroid

        x = np.array(contour[s][:, 0][:, 0], dtype=np.float)
        y = np.array(contour[s][:, 0][:, 1], dtype=np.float)

        xp = cv2.pow(cv2.subtract(x, cx), 2)
        yp = cv2.pow(cv2.subtract(y, cy), 2)
        dist = cv2.sqrt(cv2.add(xp, yp))

        dist_max_i = np.argmax(dist)

        if dist_max_i < len(s):
            farthest_defect = s[dist_max_i]
            farthest_point = tuple(contour[farthest_defect][0])
            return farthest_point
        else:
            return None
    

In [41]:
def get_frame():
    """Gets the current frame from the camera
    
    Returns
    -------
        frame: ndarray
            An array object of the current frame
    """
    _, frame = camera.read()
    frame = cv2.bilateralFilter(frame, 5, 50, 100)  # smoothing filter
    frame = cv2.flip(frame, 1)  # flip the frame horizontally
    return frame

In [42]:
def render_strokes(frame, strokes, color=(0,0,0), displace=(0,0)):
    """Render the strokes on the frame
    
    Parameters
    ----------
        frame: ndarray
            An array object of the frame
        strokes: list
            A list of a list of points representing strokes
        color: tuple
            A tuple of the RGB values
        displace: tuple
            A tuple of the x and y displacement
    
    Returns
    -------
        frame: ndarray
            An array object of the frame with the strokes displayed
    """
    displace = (int(displace[0]), int(displace[1]))
    for stroke in strokes:
        if len(stroke) <= 1:
            continue
        for i,point in enumerate(stroke[:-1]):
            p = (point[0]+displace[0],(point[1]+displace[1]))
            point_2 = stroke[i+1]
            p_2 = (point_2[0]+displace[0],point_2[1]+displace[1])
            frame = cv2.line(frame,p,p_2,color,40,1)
    return frame

In [43]:
import torch.nn as nn
import torch.nn.functional as F
import torch
class Model(nn.Module):
    """A convolutional neural network"""
    def __init__(self):
        
        # - conv w/ 5x5 kernel, stride-1
        # - pool w/ 2x2 window, stride-2
        # - conv w/ 5x5 kernel, stride-1
        # - pool w/ 2x2 window, stride-2
        # - relu(dense)
        # - dense
        
        super(Model, self).__init__()  
        
        self.conv1 = nn.Conv2d(1, 16,(5,5),stride=(1,1))
        self.max_pool1 = nn.MaxPool2d((2,2),stride=(2,2))
        self.conv2 = nn.Conv2d(16,16,(5,5),stride=(1,1))
        self.max_pool2 = nn.MaxPool2d((2,2),stride=(2,2))
        self.dense1 = nn.Linear(400,256)
        self.dense2 = nn.Linear(256,47)
        
    
    def forward(self, x):
        
        x = self.max_pool1(self.conv1(x))
        x = self.max_pool2(self.conv2(x)).reshape(len(x),-1)
        x = self.dense2(F.relu(self.dense1(x)))

        return x

In [44]:
# GET MNIST MODEL
import pickle
model = Model()
with open("emnist_model.pkl", mode="rb") as f:
    state_dict = pickle.load(f)
    model.load_state_dict(state_dict)
mapping = []
with open('emnist-balanced-mapping.txt',mode='r') as f:
    lines = f.readlines()
    for line in lines:
         mapping.append(chr(int(line.split()[1])))

In [45]:
from PIL import Image

def preprocess_img(im):
    """Center the image and resize into 1x32x32
    
    Parameters
    ----------
        im: ndarray
            An array object of a picture
        
    Returns
    -------
        image: ndarray
            An array object  of a centered picture with size (1,32,32)
     """
    im = np.pad(im,len(im),"constant", constant_values=0)
    xs, ys = np.nonzero(im)
    if len(xs) <= 1 or len(ys) <= 1:
        return np.zeros((32,32),dtype="float32")
    x_min, x_max = np.min(xs), np.max(xs)
    y_min, y_max = np.min(ys), np.max(ys)
    if x_max - x_min > y_max - y_min:
        dist = (x_max - x_min) - (y_max-y_min)
        y_max = y_max + dist//2
        y_min = y_min - int(round(dist/2+0.5))
    else:
        dist = (y_max-y_min) - (x_max - x_min)
        x_max = x_max + dist//2
        x_min = x_min - int(round(dist/2+0.5))
    im = im[x_min:x_max,y_min:y_max]
    im = Image.fromarray(im)
    im = im.resize((28,28))
    padded = np.pad(im,(2,2),mode="constant",constant_values=0).astype(np.float32) / 255
    return np.array(padded)

In [46]:
def match_image(img):
    """Match the image with the emnist model
    
    Parameters
    ----------
        img: ndarray
            A 1x32x32 image
    
    Returns
    -------
        label: character
            A character that the emnist model predicted
        confidence: float
            The confidence of the emnist model
    """
    img = torch.tensor(img.reshape(1,1,32,32))
    with torch.no_grad():
        predictions = model(img)
        confidence = np.max(F.softmax(predictions,dim=1).numpy(),axis=1)
        label_index = list(np.argmax(F.softmax(predictions,dim=1).numpy(),axis=1))
        for i,label in enumerate(label_index):
            label_index[i] = mapping[label]
    return (label_index[0],confidence[0])

In [47]:
### Camera
cv2.startWindowThread()
camera = cv2.VideoCapture(0)
camera.set(10,200)
points = []
threshold = 65  #  BINARY threshold
blurValue = 41  # GaussianBlur parameter
bgSubThreshold = 40
learningRate = 0
sources = []
disable_frames = 0
isBgCaptured = 0   # bool, whether the background captured
strokes = [[]]
history = []
next_stroke = False
labels = ""

while camera.isOpened():
    frame = get_frame()
    height = 0.9 * frame.shape[0]
    width = height / frame.shape[1]
    cap_region_x_begin=1-width  # start point/total width
    cap_region_y_end=0.9  # start point/total width
    
    # draw the captured boundary
    cv2.rectangle(frame, (int(cap_region_x_begin * frame.shape[1]), 0),
                 (frame.shape[1], int(cap_region_y_end * frame.shape[0])), (255, 0, 0), 2)

    cv2.rectangle(frame,(0,int(0.9*frame.shape[0])),(frame.shape[1],frame.shape[0]),(255,255,255),-1)
    if labels != "":
        cv2.putText(frame,labels,(10,frame.shape[0]-10),cv2.FONT_HERSHEY_COMPLEX,2,(0,0,0),2)
    
    #  Main operation
    if isBgCaptured == 1:  # this part wont run until background captured
        if len(history) > 4:
            history = history[-4:]
        img = removeBG(frame)
        img = img[0:int(cap_region_y_end * frame.shape[0]),
                    int(cap_region_x_begin * frame.shape[1]):frame.shape[1]]  # clip the ROI
        
        # convert the image into binary image
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        blur = cv2.GaussianBlur(gray, (blurValue, blurValue), 0)
        _, thresh = cv2.threshold(blur, threshold, 255, cv2.THRESH_BINARY)

        # get the coutours
        thresh1 = copy.deepcopy(thresh)
        _,contours, hierarchy = cv2.findContours(thresh1, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        length = len(contours)
        maxArea = -1
        if disable_frames > 0:
            disable_frames -= 1
        else:
            if length > 0:
                for i in range(length):  # find the biggest contour (according to area)
                    temp = contours[i]
                    area = cv2.contourArea(temp)
                    if area > maxArea:
                        maxArea = area
                        ci = i

                res = contours[ci]
                hull = cv2.convexHull(res)
                drawing = np.zeros(img.shape, np.uint8)
                cv2.drawContours(drawing, [res], 0, (0, 255, 0), 2)
                #cv2.drawContours(drawing, [hull], 0, (0, 0, 255), 3)
                fingers, far_point, cnt_centroid, possible_swipe = calculateFingers(res,drawing)

                cv2.imshow("test",drawing)
                if far_point is not None:
                    if len(fingers) <= 2:
                        history = []
                        if next_stroke:
                            next_stroke = False
                            strokes.append([far_point])
                        else:
                            strokes[-1].append(far_point)

                if 5 >= len(fingers) >= 3:
                    if possible_swipe: 
                        history.append(cnt_centroid)
                    next_stroke = True
                else: # first time when convexes aren't found
                    t = cap_region_x_begin * frame.shape[0] * 0.4
                    if cnt_centroid is not None and len(history) != 0:
                        if cnt_centroid[0] - history[0][0] > t:
                            strokes = [[]]
                            history = []
                            next_stroke = False
                            disable_frames = 15
                        elif history[0][0] - cnt_centroid[0] > t and not (len(strokes[0]) == 0 and len(strokes) == 1):
                            img = np.zeros((int(cap_region_y_end * frame.shape[0]),
                                              int((1-cap_region_x_begin) * frame.shape[1])),
                                              dtype=np.uint8)
                            img = render_strokes(img,strokes,[255,255,255])
                            img = preprocess_img(img)
                            label, confidence = match_image(img)
                            labels = labels + label
                            sources.append((img,label))
                            print(label, confidence)
                            disable_frames = 15
                            history = []
                            strokes = [[]]
                    
    displace = (frame.shape[1]*cap_region_x_begin,0)
    render_strokes(frame,strokes,[0,255,0],displace=displace)
    cv2.imshow('original', frame)


    # Keyboard OP
    k = cv2.waitKey(10)
    if k == 27:  # press ESC to exit
        camera.release()
        cv2.destroyAllWindows()
    elif k == ord('b'):  # press 'b' to capture the background
        bgModel = cv2.createBackgroundSubtractorMOG2(0, bgSubThreshold)
        isBgCaptured = 1
        print( '!!!Background Captured!!!')
    elif k == ord('r'):  # press 'r' to reset the background
        bgModel = None
        triggerSwitch = False
        isBgCaptured = 0
        strokes = [[]]
        history = []
        next_stroke = False
        labels = ""
        print ('!!!Reset BackGround!!!')
    elif k == ord(' '): #press spacebar to clear the strokes
        strokes = [[]]
        history = []
        next_stroke = False
    elif k == ord('z'): #press 'z' to erase the last character
        labels = labels[:-1]
    elif k == ord('c'): #press 'c' to pass the drawing into the emnist model and print the label and confidence
        img = np.zeros((int(cap_region_y_end * frame.shape[0]),
                          int((1-cap_region_x_begin) * frame.shape[1])),
                          dtype=np.uint8)
        img = render_strokes(img,strokes,[255,255,255])
        img = preprocess_img(img)
        
        label, confidence = match_image(img)
        labels = labels + label
        sources.append((img,label))
        strokes = [[]]
        print(label, confidence)
        
    elif k == ord('s'): #press 's' to save the drawing
        to_save = np.zeros((int(cap_region_y_end * frame.shape[0]),
                          int((1-cap_region_x_begin) * frame.shape[1])),
                          dtype=np.uint8)
        to_save = render_strokes(to_save,strokes,[255,255,255])
        to_save = preprocess_img(to_save)
        cv2.imshow("to_save",to_save)
        cv2.imwrite('{}.png'.format(str(datetime.datetime.now())),to_save)
        print("!!!Drawing Saved!!!")
        

!!!Background Captured!!!
!!!Background Captured!!!


In [None]:
if len(sources) == 1:
    fig,ax = plt.subplots()
    ax.imshow(sources[0][0])
    ax.set_title(sources[0][1])
elif len(sources) > 1:
    fig,axs = plt.subplots(1,len(sources))
    for i in range(len(sources)):
        axs[i].imshow(sources[i][0])
        axs[i].set_title(sources[i][1])

In [None]:
plt.imshow(sources[0][0])
