# Alphabet recognition

## Load libraries

In [31]:
from collections import deque
import numpy as np
import cv2 
from keras.models import load_model
import pyttsx3
engine = pyttsx3.init()

## Initialize variables

In [32]:
model_mlp = load_model('best_model_mlp.hdf5') # MLP Model
model_cnn = load_model('best_model_final.hdf5') # CNN Model

letters = {0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e', 5: 'f', 6: 'g', 7: 'h', 8: 'i', 9: 'j', 10: 'k', 11: 'l',
           12: 'm', 13: 'n', 14: 'o', 15: 'p', 16: 'q', 17: 'r', 18: 's', 19: 't', 20: 'u', 21: 'v', 22: 'w',
           23: 'x', 24: 'y', 25: 'z', 26: ''}

# Range of red color that will be captured by camera
redLower = np.array([170, 100, 60])
redUpper = np.array([180, 255, 255])

# Structuring element
kernel = np.ones((5, 5), np.uint8)

# Define blackboard and alphabets
blackboard = np.zeros((480, 640, 3), dtype=np.uint8)
alphabet = np.zeros((200, 200, 3), dtype=np.uint8)

points = deque(maxlen=512)
cap = cv2.VideoCapture(0) # camera object
prediction_mlp, prediction_cnn = 26, 26

## Alphabet detection

In [33]:
while True:
    ret, frame = cap.read()
    # see the proper image while moving our pen
    frame = cv2.flip(frame, 1)
    # original frame to hsv
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    # original frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Detecting which pixel value falls under red color boundaries
    red = cv2.inRange(hsv, redLower, redUpper)

    # Preprocessing the input
    red = cv2.erode(red, kernel) # erosion
    red = cv2.morphologyEx(red, cv2.MORPH_OPEN, kernel) # opening
    red = cv2.dilate(red, kernel) # dilation
    
    #cv2.imshow('red',red)

    # find countours
    cnts, _ = cv2.findContours(red, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    center = None
    # if any countours were found
    if len(cnts) > 0:
        cnt = sorted(cnts, key=cv2.contourArea, reverse=True)[0]
        ((x, y), radius) = cv2.minEnclosingCircle(cnt)
        cv2.circle(frame, (int(x), int(y),), int(radius), (125, 344, 278), 2)
        
        M = cv2.moments(cnt)
        center = (int(M['m10'] / M['m00']), int(M['m01'] / M['m00']))
        points.appendleft(center)
        
    # if no countours were found means if there is no red coloured object in the frame
    elif len(cnts) == 0:
        if len(points) != 0:  #if there are points in deque and we have removed the pen . 
            blackboard_gray = cv2.cvtColor(blackboard, cv2.COLOR_BGR2GRAY)
            blur = cv2.medianBlur(blackboard_gray, 15)
            blur = cv2.GaussianBlur(blur, (5, 5), 0)
            thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
            blackboard_cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]

            if len(blackboard_cnts) >= 1:
                cnt = sorted(blackboard_cnts, key=cv2.contourArea, reverse=True)[0]  # first sort all the contours and find the biggest contour

                if cv2.contourArea(cnt) > 1000: # If area of the selected countour is greater than 1000, to maintain that there is no noise selected as countour.
                    x, y, w, h = cv2.boundingRect(cnt)
                    alphabet = blackboard_gray[y - 10:y + h + 10, x - 10:x + w + 10]
                    try:
                        img = cv2.resize(alphabet, (28, 28))
                        cv2.imshow("alphabet", alphabet)  # this is the alphabet image selected that we will give our OCR as an input . 
                    except cv2.error as e:
                        points = deque(maxlen=512)
                        blackboard = np.zeros((480, 640, 3), dtype=np.uint8)
                        continue

                    img = np.array(img)
                    img = img.astype('float32') / 255
                    # Prediction from MLP
                    prediction_mlp = model_mlp.predict(img.reshape(1, 28, 28, 1))[0]
                    prediction_mlp = np.argmax(prediction_mlp)
                    # Prediction from CNN
                    prediction_cnn = model_cnn.predict(img.reshape(1, 28, 28, 1))[0]
                    prediction_cnn = np.argmax(prediction_cnn)
                    
            # Empty the point deque and also blackboard
            points = deque(maxlen=512)
            blackboard = np.zeros((480, 640, 3), dtype=np.uint8)

    # connect the detected points with line
    for i in range(1, len(points)):
        if points[i - 1] is None or points[i] is None:
            continue
        cv2.line(frame, points[i - 1], points[i], (0, 0, 0), 2)
        cv2.line(blackboard, points[i - 1], points[i], (255, 255, 255), 8)
    
    cv2.putText(frame, "MLP Prediction: " + str(letters[int(prediction_mlp)]), (20, 400), cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                (0, 255, 255), 2)
    cv2.putText(frame, "CNN Prediction: " + str(letters[int(prediction_cnn)]), (20, 450), cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                (0, 255, 0), 2)
    cv2.imshow("Alphabet Detection System", frame)
    if cv2.waitKey(5) == 13:  # break on pressing enter 
        break
cap.release()
cv2.destroyAllWindows()