In [23]:
import os
import cv2 as cv
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

In [24]:
# from PIL import Image
# import glob
# import os

# # new folder path (may need to alter for Windows OS)
# # change path to your path
# path = '/Users/tareklein/Downloads/archive/train/plus cleaned' #the path where to save resized images
# # create new folder
# if not os.path.exists(path):
#     os.makedirs(path)

# # loop over existing images and resize
# # change path to your path
# for filename in glob.glob(path + '/*.jpg'): #path of raw images
#     img = Image.open(filename).resize((28,28))
#     # save resized images to new folder with existing filename
#     img.save('{}{}{}'.format(path,'/',os.path.split(filename)[1]))

In [25]:
def normalize_img(image, label):
    return (tf.cast(image, tf.float32) / 255.0, label)

In [26]:

mnist_corrupted = 'mnist_corrupted'
mnist = 'mnist'

dataset = mnist_corrupted

(train_dataset, test_dataset), ds_info = tfds.load(
    dataset,
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True
    )

# (train_dataset2, test_dataset2), ds_info = tfds.load(
#     mnist_corrupted,
#     split=['train', 'test'],
#     shuffle_files=True,
#     as_supervised=True,
#     with_info=True
#     )

In [27]:
train_dataset = train_dataset.map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
# Store dataset in cache
train_dataset = train_dataset.cache()
# Shuffle data
train_dataset = train_dataset.shuffle(len(train_dataset))
# Split up into a batch of 64
train_dataset = train_dataset.batch(64)
# Grab other stuff before it done dealing with current stuff
# Optimization 
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)


In [28]:
#test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_dataset = test_dataset.map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
# Split up into a batch of 64
test_dataset = test_dataset.batch(64)
# Store dataset in cache
test_dataset = test_dataset.cache()
# Grab other stuff before it done dealing with current stuff
# Optimization 
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)

In [29]:
# Visualise image tensor (batch has to be 1)
visualise = False
if visualise:
    from tensorflow.keras.datasets import mnist
    x = 0
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    train_dataset = train_dataset.shuffle(len(train_dataset))
    
    for (img, label) in train_dataset:
        if label.numpy() == x:
            print(img.numpy(), label.numpy())
            x+=1
        if x == 10:
            break
    else:
        pass

In [30]:
train = False
if train:
    model = tf.keras.models.Sequential([
        tf.keras.Input((28,28,1)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                metrics=['accuracy'])

    history = model.fit(train_dataset, epochs=30, validation_data=(train_dataset))


    model.save('handwritten.model')
else:
    pass
#model.summary()

In [31]:
model = tf.keras.models.load_model('handwritten.model')

loss, accuracy = model.evaluate(train_dataset)

print(loss)
print(accuracy)

0.005848041269928217
0.998199999332428


In [32]:
plot = False
if plot:
    from matplotlib.lines import lineStyles
    import matplotlib.pyplot as plt
    plt.figure(figsize=(18,8))
    plt.plot(0,0, history.history['accuracy'], label='Accuracy', lw=3)
    plt.plot(0,0, history.history['loss'], label='Loss', lw=3)
    plt.plot(0,0, history.history['val_accuracy'], label='Val accuracy', lw=3, linestyle='--')
    plt.legend


In [33]:
def rescaleFrame(frame, scale):
    width = int(frame.shape[1] * scale)
    height = int(frame.shape[1] * scale)
    print(width, height)
    dimensions = (width, height)
    
    return cv.resize(frame, dimensions, interpolation=cv.INTER_AREA)

In [34]:
def zoom_at(img, zoom=1, angle=0, coord=None):
    
    cy, cx = [ i/2 for i in img.shape[:-1] ] if coord is None else coord[::-1]
    
    rot_mat = cv.getRotationMatrix2D((cx,cy), angle, zoom)
    result = cv.warpAffine(img, rot_mat, img.shape[1::-1], flags=cv.INTER_LINEAR)
    
    return result

In [35]:
def calculate_zoom(window_name, height):
    
    _, _, _, thresh_window_h = cv.getWindowImageRect(window_name)
    ratio = (thresh_window_h-height)/125
    if ratio < 1:
        ratio = 1
    return ratio

In [36]:
def convert_28(name, copy_from, gray_range):
    _, window = cv.threshold(copy_from, gray_range[0], gray_range[1], cv.THRESH_BINARY)
    window = cv.resize(name, (28,28), interpolation=cv.INTER_AREA)
    window = window.astype("float32") / 255.0
    window = np.expand_dims(window, axis=-1)
    return window

In [37]:
def predict(window):
    prediction = model.predict(window.reshape(1,28,28))
    predargmax = np.argmax(prediction)
    return predargmax

In [38]:
def create_window(thresh, idx, cnt):
        nh, nw = cnt.shape[:2]
        x,y,w,h = cv.boundingRect(cnt)
        centercoords = [round(x+(w/2)), round(y+(h/2))]
        gray_range = (150, 255)
        fill_color = (0,0,0)
        window_name = 'Win_Number: ' + str(idx)
        _, _, window_w, window_h = cv.getWindowImageRect('Black & White (gray threshold)')
        
        if h >= 0.3 * nh:
            # Filter out bounding boxed with a low height
            if (y+h - y) > 100:
                # Zoom onto number
                zoom_ratio = calculate_zoom('Black & White (gray threshold)', h)
                
                # Create window
                _, temp_window = cv.threshold(thresh, gray_range[0], gray_range[1], cv.THRESH_BINARY)
                
                border = 5
                # Top
                cv.rectangle(temp_window, (0, y-border), (window_w, 0), fill_color, -1)
                # Bottom
                cv.rectangle(temp_window, (0, y+h), (window_w, window_h), fill_color, -1)
                # Left
                cv.rectangle(temp_window, (x-border, 0), (0, window_h), fill_color, -1)
                # Right
                cv.rectangle(temp_window, (x+w+border, 0), (window_w, window_h), fill_color, -1)
                temp_window = zoom_at(temp_window, zoom_ratio, 0, centercoords)
                #cv.circle(temp_window, centercoords, 10, (255,255,255), 2)

                #cv.putText(temp_window, 'Ratio: ' + str(zoom_ratio), (20,80), cv.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
                
                # Convert window to 28x28 for prediction (data trained on 28x28)
                temp_28 = convert_28(temp_window, thresh, gray_range)
                # Predict
                predargmax = predict(temp_28)
                # Bounding Box
                cv.rectangle(thresh, (x,y), (x+w, y+h), (255, 255, 0), 4)
                
                cv.putText(temp_window, 'Prediction: ' + str(predargmax), (20,40), cv.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)
                cv.putText(temp_window, 'Prediction: ' + str(predargmax), (20,40), cv.FONT_HERSHEY_SIMPLEX, 1, (0,0,0), 1)
                
                cv.imshow(window_name, temp_window)
                cv.moveWindow(window_name, cv.getWindowImageRect('Gray scale')[2]*(idx), cv.getWindowImageRect('Gray scale')[3])
                
                return predargmax

In [61]:
capture = cv.VideoCapture(0)
scale = 0.35
gray_range = (150, 255)
number = []
predictions = []
cleared = True
while True:
    isTrue, frame = capture.read()
    frame_resized = rescaleFrame(frame, scale)

    # Grayscaled
    gray = cv.cvtColor(frame_resized, cv.COLOR_BGR2GRAY)
    gray_28 = convert_28(gray, gray, gray_range)
    predargmax_gray_28 = predict(gray_28)

    # Black and white
    ret, thresh = cv.threshold(gray, gray_range[0], gray_range[1], cv.THRESH_BINARY)
    thresh_28 = convert_28(thresh, thresh, gray_range)
    predargmax_thresh_28 = predict(thresh_28)
      
      
    _, result = cv.threshold(gray, gray_range[0], gray_range[1], cv.THRESH_BINARY)
    cv.rectangle(result, (0, 0), (500,500), (255,255,255), -1)
    
      
    number_contours = cv.findContours(cv.morphologyEx(thresh, cv.MORPH_OPEN, np.ones((2,2))), cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)[-2]
    if len(number_contours) > 0:
        for idx, cnt in enumerate(number_contours):
            test = create_window(thresh, idx, cnt)
            if test is not None:
                #cv.putText(result, str(test), (50*idx,200), cv.FONT_HERSHEY_SIMPLEX, 1, (0,0,0), 5)
                predictions.append(test)
            cleared = False
        for x in range(len(predictions)):
            placement = 50
            space = 50
            cv.putText(result, str(predictions[x]), (placement+(space*x),200), cv.FONT_HERSHEY_SIMPLEX, 1, (0,0,0), 5)
            if x < len(predictions)-1:
                cv.putText(result, '+', (round(placement+(space*x)+(space/2)),200), cv.FONT_HERSHEY_SIMPLEX, 1, (0,0,0), 3)
            if x == len(predictions)-1:
                cv.putText(result, '=', (round(placement+(space*x)+(space/2)),200), cv.FONT_HERSHEY_SIMPLEX, 1, (0,0,0), 3)
                cv.putText(result, str(sum(predictions)), (placement+(space*(x+1)),200), cv.FONT_HERSHEY_SIMPLEX, 1, (0,0,0), 5)
        predictions = []
    else:
        if not cleared:
            for x in range(100):
                window_name = 'Win_Number: ' + str(x)
                cv.destroyWindow(window_name)
            cleared = True


    cv.putText(gray, 'Gray: ' + str(predargmax_gray_28), (20,40), cv.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
    cv.putText(gray, 'Gray: ' + str(predargmax_gray_28), (20,40), cv.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    cv.putText(gray, 'B&W: ' + str(predargmax_thresh_28), (20,80), cv.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    if len(number) == 1:
        cv.putText(gray, str(number[0]) + ' + ', (20,160), cv.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 4)
    if len(number) >= 2:
        cv.putText(gray, str(number[0]) + ' + ' + str(number[1]) + ' = ' + str(number[0] + number[1]), (20,160), cv.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 4)
      
     
           
    cv.imshow('Gray scale', gray)
    
    cv.imshow('Black & White (gray threshold)', thresh)
    cv.moveWindow('Black & White (gray threshold)', cv.getWindowImageRect('Gray scale')[2], 0)

    cv.imshow('Maffs', result)
    cv.moveWindow('Maffs', cv.getWindowImageRect('Gray scale')[2]*2, 0)
    

    if cv.waitKey(20) & 0xFF==ord('a'):
        number.append(predargmax_thresh_28)
    if cv.waitKey(20) & 0xFF==ord('q'):
        break
    
capture.release()
cv.waitKey(100)
cv.destroyAllWindows()
cv.waitKey(100)

448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448
448 448


KeyboardInterrupt: 