## Demo Alphabet Recognition (Sign and Draw)

In [1]:
import cv2
import mediapipe as mp
import copy
import itertools
import numpy as np
from keras.models import load_model

In [2]:
signs = ['A', 'B', 'C', 'D', 'del', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'space', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']

In [3]:
eng_hw = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']

In [4]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

In [5]:
def get_landmark_list(image, landmarks, mode = 0):
    image_width, image_height = image.shape[1], image.shape[0]

    landmark_point = []

    # Keypoint
    for landmark in landmarks.landmark:
        if mode == 0 :
            landmark_x = landmark.x
            landmark_y = landmark.y
        # Denormalization
        else:
            landmark_x = int(landmark.x * image_width)
            landmark_y = int(landmark.y * image_height)
        landmark_point.append([landmark_x, landmark_y])
    return landmark_point

In [6]:
def pre_process_landmark(landmark_list):
    temp_landmark_list = copy.deepcopy(landmark_list)

    # Convert to relative coordinates
    base_x, base_y = 0, 0
    for index, landmark_point in enumerate(temp_landmark_list):
        if index == 0:
            base_x, base_y = landmark_point[0], landmark_point[1]

        temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
        temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y

    # Convert to a one-dimensional list
    temp_landmark_list = list(itertools.chain.from_iterable(temp_landmark_list))

    return temp_landmark_list

In [7]:
def check_less(lis, val):
    return(all(val <= x[1] for x in lis))

In [8]:
def paint(frame,points):
    for i in range(len(points)):           
        for j in range(1,len(points[i])): 
            cv2.line(frame, points[i][j - 1], points[i][j], (100, 0, 255), 10)

In [10]:
# For webcam input:
mode = 0 # starting mode (spacebar to change mode)
m = ['Sign','Draw']
cap = cv2.VideoCapture(0)
mlp_model = load_model('model/model_hg_42.hdf5', compile=True )  # mode 0 for sign
cnn_model = load_model('model/handwriting_bestmodel.hdf5') # mode 1 for draw
with mp_hands.Hands(
    min_detection_confidence=0.6,
    max_num_hands=1,
    min_tracking_confidence=0.6) as hands:
    sen = ''
    cf = []
    rpoints = [[]]
    clear =  0
    cf_count = 0
    while cap.isOpened():    
        success, image = cap.read()
        if not success:
            print("Ignoring empty camera frame.")
            # If loading a video, use 'break' instead of 'continue'.
            continue

        # Flip the image horizontally for a later selfie-view display, and convert
        # the BGR image to RGB.
        image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)        
        # To improve performance, optionally mark the image as not writeable to
        # pass by reference.
        image.flags.writeable = False
        results = hands.process(image)
        # Draw the hand annotations on the image.
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)       
        if results.multi_hand_landmarks:      
            landmark_list = get_landmark_list(image,results.multi_hand_landmarks[0], mode)                          
            # Sign mode
            if mode == 0:
                mp_drawing.draw_landmarks(image, results.multi_hand_landmarks[0], mp_hands.HAND_CONNECTIONS)
                pre_processed_landmark_list = pre_process_landmark(landmark_list)
                prediction = mlp_model.predict([pre_processed_landmark_list])                
                result = signs[int(np.argmax(prediction))]
                if not cf:
                    cf.append(result)
                elif result in cf:
                    cf.append(result)
                elif result not in cf:
                    cf.clear()
                if len(cf) > 50:
                    if 'del' in cf:
                        if sen:
                            sen = sen[:-1]
                    elif 'space'  in cf:
                        sen += ' '
                    else:
                        sen += result   
                    cf.clear()
                    image = cv2.putText(image, 'Confirmed', 
                                    (280, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, 
                                    (0, 255, 0),2,cv2.LINE_AA)
                image = cv2.putText(image, result, 
                                    (190, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, 
                                    (0, 0, 255),2,cv2.LINE_AA)
            #Draw mode
            elif mode == 1:   
                draw_frame = np.zeros([image.shape[0],image.shape[1],1],dtype=np.uint8)
                draw_frame.fill(255) 
                if (landmark_list[11][1] <= landmark_list[12][1] and landmark_list[15][1] <= landmark_list[16][1] and
                    landmark_list[19][1] <= landmark_list[20][1] and landmark_list[7][1] > landmark_list[8][1] and 
                   check_less(landmark_list,landmark_list[8][1])):
                    
                    rpoints[-1].append((landmark_list[8][0], landmark_list[8][1]))
                    cv2.circle(image, (landmark_list[8][0], landmark_list[8][1]), 3, (0, 0, 250), 12)
                    clear = 0
                    cf_count = 0
                elif  check_less(landmark_list,landmark_list[4][1]) and rpoints[0] :                                        
                    cf_count += 1
                    if cf_count >= 30:                        
                        paint(draw_frame, rpoints)
                        # Convert to binary and change bg = black, fg = white
                        draw_frame[draw_frame < 255] = 0
                        draw_frame  = cv2.bitwise_not(draw_frame)
        
                        # Morph-op to remove noise
                        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15,15))
                        morphed = cv2.morphologyEx(draw_frame , cv2.MORPH_CLOSE, kernel)
                        # Find the max-area contour
                        cnts = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
                        if cnts:                            
                            cnt = max(cnts, key=cv2.contourArea)

                            # Get bounding box
                            x,y,w,h = cv2.boundingRect(cnt)

                            # Crop image using bounding box 
                            dst = morphed[y-10:y+h+10, x-10:x+w+10]   

                            # Preprocess input
                            image_output = cv2.resize(dst, (28, 28))      
                            image_output = np.array(image_output)
                            image_output = image_output.astype('float32')/255
                            # Model prediction
                            prediction = cnn_model.predict(image_output.reshape(1,28,28,1))                    
                            result = eng_hw[int(np.argmax(prediction))]
                            sen += result
                            rpoints.clear()
                            rpoints.append([])
                            clear = 0
                            cf_count = 0
                            image = cv2.putText(image, result, 
                                            (190, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, 
                                            (0, 0, 255),2,cv2.LINE_AA)
                            image = cv2.putText(image, 'Confirmed', 
                                            (280, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, 
                                            (0, 255, 0),2,cv2.LINE_AA)                     
                elif  landmark_list[4][1] > landmark_list[0][1]:
                    clear += 1
                    rpoints.clear()
                    rpoints.append([])
                    image = cv2.putText(image, 'Cleared', 
                                    (280, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, 
                                    (0, 255, 0),2,cv2.LINE_AA)
                    cf_count = 0
                else:   
                    clear = 0
                    cf_count = 0
                    cv2.circle(image, (landmark_list[8][0], landmark_list[8][1]), 3, (0, 250, 250), 12)
                    if rpoints[-1] :                        
                        rpoints.append([])
            if clear > 30:
                clear = 0
                if sen:
                    sen = sen[:-1]
                    
            paint(image,rpoints)  
        image = cv2.putText(image, 'Prediction: ' , 
                                (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, 
                                (0, 0, 255),2,cv2.LINE_AA)
        image = cv2.putText(image, 'Result: '+ sen, 
                    (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, 
                    (0, 0, 255),2,cv2.LINE_AA)
        image = cv2.putText(image, 'Mode: '+ m[mode], 
                                    (image.shape[1] - 190, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, 
                                    (0, 0, 255),2,cv2.LINE_AA)   
                      

        cv2.imshow('Alphabet Recognition (SPACEBAR to change mode | ESC to quit)', image)
        
        key = cv2.waitKey(1)
        if (key == 27):
            break
        elif (key == 32): # Spacebar
            if mode == 0:
                mode = 1
                cf.clear()
                sen =''
            elif mode == 1:
                mode = 0
                rpoints.clear()
                rpoints.append([])
                sen = ''
cv2.destroyAllWindows()     # close all OpenCV windows
if cap.isOpened():
    cap.release()  