In [15]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import uuid
import os
import glob
import tensorflow as tf

features = 20 * 20
labels = ['i', 'p', 'm', 'f', '7']
categories = len(labels)

sess = tf.InteractiveSession()
m_x = None
m_y_sm = None

def prepare_model():
    
    global sess, m_x, m_y_sm
    
    def load_images():
        x = np.zeros((0, features))
        y = np.zeros((0), dtype=int)
        yoh = np.zeros((0, categories))
        i = 0
        for i in range(len(labels)):
            label = labels[i]
            print('Loading letter:', label)
            for filename in glob.glob(os.path.join('../data/' + label, '*.png')):
                img = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
                x = np.append(x, img.reshape(1, features), axis=0)
                y = np.append(y, [i], axis=0)
                yoh = np.append(yoh, np.zeros((1, categories)), axis=0)
                yoh[-1, i] = 1

        return x, y, yoh
    
    x, y, yoh = load_images()
    state = np.random.get_state()
    np.random.shuffle(x)
    np.random.set_state(state)
    np.random.shuffle(y)
    np.random.set_state(state)
    np.random.shuffle(yoh)
    
    samples = len(x)
    x_train = x[:-samples // 5]
    x_test = x[-samples // 5:]
    y_train = y[:-samples // 5]
    yoh_train = yoh[:-samples // 5]
    y_test = y[-samples // 5:]
    yoh_test = yoh[-samples // 5:]
    
    m_x = tf.placeholder(tf.float32, [None, features])
    m_y_ = tf.placeholder(tf.int32, [None])

    m_W = tf.Variable(tf.zeros([features, categories]))
    m_b = tf.Variable(tf.zeros([categories]))
    m_y = tf.matmul(m_x, m_W) + m_b
    m_y_sm = tf.nn.softmax(m_y)

    m_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=m_y_, logits=m_y))
    m_train = tf.train.AdamOptimizer(0.1).minimize(m_loss)

    _, m_acc = tf.metrics.accuracy(m_y_, tf.argmax(m_y, 1))
    
    tf.global_variables_initializer().run()
    tf.local_variables_initializer().run()
    for epoch in range(1000):
        sess.run(m_train, feed_dict={m_x: x_train, m_y_: y_train})
        accuracy_train = sess.run(m_acc, feed_dict={m_x: x_train, m_y_: y_train})
        accuracy_test = sess.run(m_acc, feed_dict={m_x: x_test, m_y_: y_test})
        print('Epoch:', epoch, 'Training accuracy:', accuracy_train, 'Testing accuracy:', accuracy_test, end='                                                                   \r')


def webcam(showRes=False):
    
    global sess, m_x, m_y_sm
    
    try:
        fnum = 0
        letter = None
        cam = cv2.VideoCapture(0)
        while True:
            ret_val, imgCap = cam.read()
            imgCap = cv2.cvtColor(imgCap, cv2.COLOR_BGR2GRAY)
            imgCap = cv2.adaptiveThreshold(imgCap, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 13, 5)

            img = imgCap.copy()
            imgCap = 255 - imgCap

            res = capture_letter(img)
            if res is not None:
                letter = res
                letter2 = cv2.resize(letter, (100, 100), cv2.INTER_NEAREST)
                imgCap[380:, 540:] = letter2

            if letter is not None and showRes:
                yoh_pred = sess.run(m_y_sm, feed_dict={m_x: letter.reshape((1, features))})
                y_pred = np.argmax(yoh_pred, axis=1)
                cv2.putText(imgCap, labels[y_pred[0]].upper(), (50, 450), cv2.FONT_HERSHEY_TRIPLEX, 6, 0, 5)
                
            cv2.imshow('webcam', imgCap)
            key = cv2.waitKey(1)
            if key == 27: 
                break
            elif key == ord('m'):
                cv2.imwrite('../data/m/{0}.png'.format(uuid.uuid4().hex), letter)
            elif key == ord('7'):
                cv2.imwrite('../data/7/{0}.png'.format(uuid.uuid4().hex), letter)
            elif key == ord('p'):
                cv2.imwrite('../data/p/{0}.png'.format(uuid.uuid4().hex), letter)
            elif key == ord('i'):
                cv2.imwrite('../data/i/{0}.png'.format(uuid.uuid4().hex), letter)
            elif key == ord('f'):
                cv2.imwrite('../data/f/{0}.png'.format(uuid.uuid4().hex), letter)
            elif key == ord('s'):
                cv2.imwrite('../data/s/{0}.png'.format(uuid.uuid4().hex), letter)
            elif key == ord('t'):
                cv2.imwrite('../data/t/{0}.png'.format(uuid.uuid4().hex), letter)
            elif key == ord('u'):
                cv2.imwrite('../data/u/{0}.png'.format(uuid.uuid4().hex), letter)
            elif key == ord('v'):
                cv2.imwrite('../data/v/{0}.png'.format(uuid.uuid4().hex), letter)
            elif key == ord('y'):
                cv2.imwrite('../data/y/{0}.png'.format(uuid.uuid4().hex), letter)
    finally:
        cv2.destroyAllWindows()
    
def capture_letter(img):
    img[img > 0] = 1
    ic = img.shape[1] // 2
    jc = img.shape[1] // 2
    
    frame = [ic, jc, ic + 1, jc + 1]
    d = 0
    sz = img[ic, jc]
    while sz == 0:
        if d == 0:
            if frame[0] > 0:
                frame[0] -= 1
                sz += np.sum(img[frame[0], frame[1]:frame[3]])
        elif d == 1: 
            if frame[1] > 0:
                frame[1] -= 1
                sz += np.sum(img[frame[0]:frame[2], frame[1]])
        elif d == 2:
            if frame[2] < img.shape[0] - 1:
                frame[2] += 1
                sz += np.sum(img[frame[2] - 1, frame[1]:frame[1]])
        elif d == 3:
            if frame[3] < img.shape[1] - 1:
                frame[3] += 1
                sz += np.sum(img[frame[0]:frame[2], frame[3] - 1])
            d = -1
        d += 1
    
    d = 0
    s = [1, 1, 1, 1]
    sz = 1
    while sz > 0:
        if d == 0:
            if frame[0] == 0:
                s[0] = 0
            else:
                s[0] = np.sum(img[frame[0], frame[1]:frame[3]])
            if s[0] > 0:
                frame[0] -= 1
        elif d == 1:
            if frame[1] == 0:
                s[1] = 0
            else:
                s[1] = np.sum(img[frame[0]:frame[2], frame[1]])
            if s[1] > 0:
                frame[1] -= 1
        elif d == 2:
            if frame[2] == img.shape[0] - 1:
                s[2] = 0
            else:
                s[2] = np.sum(img[frame[2] - 1, frame[1]:frame[3]])
            if s[2] > 0:
                frame[2] += 1
        elif d == 3:
            if frame[3] == img.shape[1] - 1:
                s[3] = 0
            else:
                s[3] = np.sum(img[frame[0]:frame[2], frame[3] - 1])
            if s[3] > 0:
                frame[3] += 1
            d = -1
        sz = s[0] + s[1] + s[2] + s[3]
        d += 1

    res = img[frame[0]:frame[2], frame[1]:frame[3]]
    while (np.sum(res[0, :]) == 0):
        res = res[1:, :]
    while (np.sum(res[:, 0]) == 0):
        res = res[:, 1:]
    while (np.sum(res[-1, :]) == 0):
        res = res[:-1, :]
    while (np.sum(res[:, -1]) == 0):
        res = res[:, :-1]
    
    if not (res.shape[0] > 0 and res.shape[1] > 0):
        return None

    res = cv2.resize(res, (20, 20), cv2.INTER_LINEAR)
    res[res > 0] = 255
    return res

In [16]:
webcam()

In [17]:
prepare_model()

Loading letter: i
Loading letter: p
Loading letter: m
Loading letter: f
Loading letter: 7
Epoch: 999 Training accuracy: 0.90541947 Testing accuracy: 0.90540814                                                                   

In [18]:
webcam(showRes=True)