<a href="https://colab.research.google.com/github/huynqcharles/Hand-Drawn-Pictures-Recognition/blob/main/CNN_Hand_Drawn_Pictures_Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DEPENDENCIES

In [None]:
import tensorflow as tf
import numpy as np
import cv2 as cv
import gc
import matplotlib.pyplot as plt

import keras
from keras.utils import to_categorical
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout

from sklearn.model_selection import train_test_split

# PRE-PROCESSING DATA

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
donut = np.load('/content/drive/MyDrive/Hand Drawn Pictures/QuickDraw Dataset numpy files/full_numpy_bitmap_donut.npy')
hamburger = np.load('/content/drive/MyDrive/Hand Drawn Pictures/QuickDraw Dataset numpy files/full_numpy_bitmap_hamburger.npy')
pizza = np.load('/content/drive/MyDrive/Hand Drawn Pictures/QuickDraw Dataset numpy files/full_numpy_bitmap_pizza.npy')
ice_cream = np.load('/content/drive/MyDrive/Hand Drawn Pictures/QuickDraw Dataset numpy files/full_numpy_bitmap_ice cream.npy')

In [None]:
# add a column with labels
donut = np.c_[donut, np.zeros(len(donut))]
hamburger = np.c_[hamburger, np.ones(len(hamburger))]
pizza = np.c_[pizza, 2*np.ones(len(pizza))]
ice_cream = np.c_[ice_cream, 3*np.ones(len(ice_cream))]

In [None]:
# store the label codes in a dictionary
label_dict = {0:'donut', 1:'hamburger', 2:'pizza', 3:'ice cream'}

In [None]:
print(donut.shape)
print(hamburger.shape)
print(pizza.shape)
print(ice_cream.shape)

(140751, 785)
(129672, 785)
(130371, 785)
(123133, 785)


In [None]:
X = np.concatenate((donut[:,:-1], hamburger[:,:-1], pizza[:,:-1], ice_cream[:,:-1]), axis=0).astype('float32')
y = np.concatenate((donut[:,-1], hamburger[:,-1], pizza[:,-1], ice_cream[:,-1]), axis=0).astype('float32')

#X_train, X_test, y_train, y_test = train_test_split(X/255.,y,test_size=0.2,random_state=0)

X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, random_state=0)

In [None]:
del X_train_val, y_train_val

In [None]:
del X, y

In [None]:
# one hot encode outputs
y_train_cnn = to_categorical(y_train)
y_val_cnn = to_categorical(y_val)
y_test_cnn = to_categorical(y_test)
num_classes = y_test_cnn.shape[1]

In [None]:
image_width = 28
image_height = 28

In [None]:
X_train_cnn = X_train.reshape(X_train.shape[0], image_width, image_height, 1).astype('float32')
X_val_cnn = X_val.reshape(X_val.shape[0], image_width, image_height, 1).astype('float32')
X_test_cnn = X_test.reshape(X_test.shape[0], image_width, image_height, 1).astype('float32')

In [None]:
del X_train, X_val, X_test, y_train, y_val, y_test

In [None]:
X_train_cnn.shape

(335312, 28, 28, 1)

In [None]:
num_classes

4

# MODEL IMPLEMENTATION

In [None]:
#LeNet
model = Sequential([
    Conv2D(6, kernel_size=(5, 5), activation='relu', input_shape=(image_width,image_height, 1)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(16, kernel_size=(5, 5), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(120, activation='relu'),
    Dense(84, activation='relu'),
    Dense(num_classes, activation='softmax')
])

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 24, 24, 6)         156       
                                                                 
 max_pooling2d (MaxPooling2  (None, 12, 12, 6)         0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 8, 8, 16)          2416      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 4, 4, 16)          0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 256)               0         
                                                                 
 dense (Dense)               (None, 120)               3

# TRAIN

In [None]:
X_train_cnn.shape

(335312, 28, 28, 1)

In [None]:
y_train_cnn.shape

(335312, 4)

In [None]:
model.fit(X_train_cnn, y_train_cnn, epochs=10, batch_size=128, validation_data=(X_val_cnn, y_val_cnn))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7a3c38539510>

In [None]:
test_loss, test_accuracy = model.evaluate(X_test_cnn, y_test_cnn, verbose=0)

print('test set:')
print('loss:', test_loss)
print('accuraccy:', test_accuracy)

test set:
loss: 0.08655084669589996
accuraccy: 0.9725440144538879


In [None]:
import shutil

model_save_path = '/content/drive/MyDrive/Hand Drawn Pictures/QuickDraw Dataset numpy files/model'
model.save('my_model.h5')
shutil.move('my_model.h5', model_save_path)

'/content/drive/MyDrive/Hand Drawn Pictures/QuickDraw Dataset numpy files/model/my_model.h5'

# TEST: UPLOADED IMAGES

In [None]:
image = cv.imread('/content/my_test_2.jpg')

gray_image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)

resized_image = cv.resize(gray_image, (image_width, image_height))

reshaped_image = resized_image.reshape(1, image_width, image_height, 1)

normalized_image = reshaped_image / 255.0

y_pred = model.predict(normalized_image)

print(y_pred)

[[0.01873559 0.01044729 0.00729643 0.9635207 ]]


In [None]:
predicted_class = np.argmax(y_pred)

pred = label_dict.get(predicted_class)

print('Prediction:', pred)

Prediction: ice cream


# TEST: DRAWING ONLINE

In [None]:
from google.colab import output
from base64 import b64decode
import os
import shutil
import uuid
from IPython.display import HTML
COLAB_HTML_ROOT = "/usr/local/share/jupyter/nbextensions/google.colab/"

def moveToExt(filename:str) -> str:
  if not os.path.exists(filename):
    print("Image file not found")
    return None

  target = os.path.basename(filename)
  target = os.path.join(COLAB_HTML_ROOT, str(uuid.uuid4()) + target)

  shutil.copyfile(filename,target)
  print("moved to ext")
  return target


def draw(filename='drawing.png', color="black", bg_color="transparent",w=512, h=512, line_width=10,loop=False):
  real_filename = os.path.realpath(filename)
  html_filename = real_filename
  html_real_filename = html_filename
  if os.path.exists(real_filename):
    html_real_filename = moveToExt(real_filename)
    html_filename = html_real_filename.replace("/usr/local/share/jupyter","")


  canvas_html = f"""
  <canvas width={w} height={h}></canvas>
  <div>
    <label for="strokeColor">Stroke</label>
    <input type="color" value="{color}" id="strokeColor">

    <label for="bgColor">Background</label>
    <input type="color" value="{bg_color}" id="bgColor">
  </div>
  <div class="slidecontainer">
  <label for="lineWidth" id="lineWidthLabel">{line_width}px</label>
    <input type="range" min="1" max="35" value="1" class="slider" id="lineWidth">
  </div>
  <div>
    <button id="loadImage">Reload from disk</button>
    <button id="reset">Reset</button>
    <button id="save">Save</button>
    <button id="exit">Exit</button>
  </div>
  <script>
   function loadImage(url) {{
  return new Promise(r => {{ let i = new Image(); i.onload = (() => r(i)); i.src = url; }});
}}


    var canvas = document.querySelector('canvas')
    var ctx = canvas.getContext('2d')
    ctx.lineWidth = {line_width}
    ctx.fillStyle = "{bg_color}";

    ctx.fillRect(0, 0, canvas.width, canvas.height);
    ctx.strokeStyle = "{color}";
    var strokeColor = document.querySelector('#strokeColor')
    var bgColor = document.querySelector('#bgColor')
    var slider = document.getElementById("lineWidth");
    slider.oninput = function() {{
      ctx.lineWidth = this.value;
      lineWidthLabel.innerHTML = `${{this.value}}px`
    }}
    function updateStroke(event){{
        ctx.strokeStyle = event.target.value
    }}
    function updateBG(event){{
        ctx.fillStyle = event.target.value
    }}

    bgColor.addEventListener("change", updateBG, false);
    strokeColor.addEventListener("change", updateStroke, false);

    var clear_button = document.querySelector('#reset')
    var reload_img_button = document.querySelector('#loadImage')
    var button = document.querySelector('#save')
    var exit_button = document.querySelector('#exit')
    var mouse = {{x: 0, y: 0}}
    canvas.addEventListener('mousemove', function(e) {{
      mouse.x = e.pageX - this.offsetLeft
      mouse.y = e.pageY - this.offsetTop
    }})
    canvas.onmousedown = ()=>{{
      ctx.beginPath()
      ctx.moveTo(mouse.x, mouse.y)
      canvas.addEventListener('mousemove', onPaint)
    }}
    canvas.onmouseup = ()=>{{
      canvas.removeEventListener('mousemove', onPaint)
    }}
    var onPaint = ()=>{{
      ctx.lineTo(mouse.x, mouse.y)
      ctx.stroke()
    }}
    reload_img_button.onclick = async ()=>{{
      console.log("Reloading Image {html_filename}")
      let img = await loadImage('{html_filename}');
      console.log("Loaded image")
      ctx.drawImage(img, 0, 0);
    }}

    clear_button.onclick = ()=>{{
        console.log('Clearing Screen')
        ctx.clearRect(0, 0, canvas.width, canvas.height);
        ctx.fillRect(0, 0, canvas.width, canvas.height);
      }}
      canvas.addEventListener('load', function() {{
      console.log('All assets are loaded')
    }})
    var data = new Promise(resolve=>{{
      button.onclick = ()=>{{
        resolve(canvas.toDataURL('image/png'))
      }}
      exit_button.onclick = ()=>{{
      resolve()
    }}

    }})

    // window.onload = async ()=>{{
    //   console.log("loaded")
    //   let img = await loadImage('{html_filename}');
    //   ctx.drawImage(img, 0, 0);
    // }}


  </script>
  """

  display(HTML(canvas_html))
  print("Evaluating JS")

  data = output.eval_js("data")
  if data:
    print("Saving Sketch")
    binary = b64decode(data.split(',')[1])
    with open(filename, 'wb') as f:
      f.write(binary)
    print('Saved!')

In [None]:
saved_model = load_model(model_save_path + '/my_model.h5')

In [None]:
draw(color='white', bg_color="black")

moved to ext


Evaluating JS
Saving Sketch
Saved!


In [None]:
image = cv.imread('drawing.png')

gray_image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)

resized_image = cv.resize(gray_image, (image_width, image_height))

reshaped_image = resized_image.reshape(1, image_width, image_height, 1)

normalized_image = reshaped_image / 255.0

y_pred = saved_model.predict(normalized_image)

print(y_pred)

predicted_class = np.argmax(y_pred)

pred = label_dict.get(predicted_class)

print('Prediction:', pred)

[[0.21478656 0.1997171  0.2906541  0.29484224]]
Prediction: ice cream
