OCR WITH KERAS

In [60]:
import cv2
import os
import numpy as np
import imutils
from tensorflow.keras.models import load_model
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from imutils import build_montages
from PIL import Image

from models import ResNet
import matplotlib
matplotlib.use("Agg")

EPOCHS = 5
INIT_LR = 1e-1
BS = 128

Datasets:

A-Z
http://www.ee.surrey.ac.uk/CVSSP/demos/chars74k/

Load A-Z dataset:

In [61]:
def load_a_z_font_dataset(rootdir):
    data = []
    labels = []
    
    for (dirpath, dirnames, filenames) in os.walk(rootdir):
        for filename in filenames:
            
            labels.append(int(filename[8:11])-1)
            #print(int(filename[8:11])-11)
            
            image = cv2.imread(os.path.join(dirpath, filename))
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

            (thresh, bwImage) = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

            width, height = bwImage.shape
            totalpixels = width * height
            if cv2.countNonZero(bwImage) < totalpixels / 2:
                bwImage = np.invert(bwImage)

            (tH, tW) = bwImage.shape
            if tW > tH:
                bwImage = imutils.resize(bwImage, width=32)
            else:
                bwImage = imutils.resize(bwImage, height=32)

            (tH, tW) = bwImage.shape
            dX = int(max(0, 32 - tW) / 2.0)
            dY = int(max(0, 32 - tH) / 2.0)

            padded = cv2.copyMakeBorder(bwImage, top=dY, bottom=dY,
                left=dX, right=dX, borderType=cv2.BORDER_CONSTANT,
                value=(255, 255, 255))

            data.append(padded)                    

    return (data, labels)

Loading:

In [62]:
(data, labels) = load_a_z_font_dataset(r'C:\Projetos\Mestrado\Project II\SourceCode\TextIdentificationService\datasets\a_z_test')

data = [cv2.resize(image, (32, 32)) for image in data]
data = np.array(data, dtype="float32")

data = np.expand_dims(data, axis=-1)
data /= 255.0

le = LabelBinarizer()

labels = le.fit_transform(labels)
ounts = labels.sum(axis=0)

Training assesment balance:

In [63]:
classTotals = labels.sum(axis=0)
classWeight = {}

for i in range(0, len(classTotals)):
    classWeight[i] = classTotals.max() / classTotals[i]

(trainX, testX, trainY, testY) = train_test_split(data,
                                                  labels, test_size=0.20, stratify=labels, random_state=42)

Preparing to train:

In [64]:
aug = ImageDataGenerator(rotation_range=10, zoom_range=0.05, width_shift_range=0.1,
                         height_shift_range=0.1, shear_range=0.15, horizontal_flip=False, fill_mode="nearest")

print("[INFO] compiling model...")

opt = SGD(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model = ResNet.build(32, 32, 1, len(le.classes_), (3, 3, 3),
                     (64, 64, 128, 256), reg=0.0005)
                     
model.compile(loss="categorical_crossentropy",
              optimizer=opt, metrics=["accuracy"])

[INFO] compiling model...


  super(SGD, self).__init__(name, **kwargs)


Training:

In [65]:
print("[INFO] training network...")

H = model.fit(
    aug.flow(trainX, trainY, batch_size=BS), 
            validation_data=(testX, testY), 
            #steps_per_epoch=len(trainX),
            epochs=EPOCHS,
            class_weight=classWeight,
            verbose=1)

[INFO] training network...
Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


Network evaluation:

In [66]:
#labelNames = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
labelNames = "ABC"
labelNames = [l for l in labelNames]

print("[INFO] evaluating network...")
predictions = model.predict(testX, batch_size=BS)
print(classification_report(testY.argmax(axis=1),
                            predictions.argmax(axis=1), target_names=labelNames))


[INFO] evaluating network...
              precision    recall  f1-score   support

           A       0.97      0.87      0.92       315
           C       0.77      0.98      0.87       226
           B       0.97      0.85      0.91       247

    accuracy                           0.90       788
   macro avg       0.90      0.90      0.90       788
weighted avg       0.91      0.90      0.90       788



Saving the model and training history:

In [67]:
model_path = r"C:\Projetos\Mestrado\Project II\SourceCode\TextIdentificationService\model\trained_ocr_full_test_ABCD.model"
model.save(model_path, save_format="h5")

N = np.arange(0, EPOCHS)
plt.style.use("ggplot")
plt.figure()
plt.plot(N, H.history["loss"], label="train_loss")
plt.plot(N, H.history["val_loss"], label="val_loss")
plt.title("Trainning Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig("plot.png")


Testing:

In [71]:
model_path = r"C:\Projetos\Mestrado\Project II\SourceCode\TextIdentificationService\model\trained_ocr_full_test_ABCD.model"
model = load_model(model_path)

#labelNames = "ACBDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
labelNames = "ABC"
labelNames = [l for l in labelNames]

images = [] 
for i in np.random.choice(np.arange(0, len(testY)), size=(49,)):
    probs = model.predict(testX[np.newaxis, i])
    prediction = probs.argmax(axis=1)
    label = labelNames[prediction[0]]
    #print(prediction)

    image = (testX[i] * 255).astype("uint8")
    color = (0, 255, 0)

    if prediction[0] != np.argmax(testY[i]):
        color = (0, 0, 255)

    image = cv2.merge([image] * 3)
    image = cv2.resize(image, (96, 96), interpolation=cv2.INTER_LINEAR)
    cv2.putText(image, label, (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, color, 2)

    images.append(image)

montage = build_montages(images, (96, 96), (7, 7))[0]

cv2.imshow("OCR Results", montage)
cv2.waitKey(0)

-1

Predict:

In [69]:
from tensorflow.keras.models import load_model
from imutils.contours import sort_contours
import numpy as np
import imutils
import cv2
import tensorflow as tf

tf.config.run_functions_eagerly(True)

model_path = r"C:\Projetos\Mestrado\Project II\SourceCode\TextIdentificationService\model\trained_ocr_full_test_ABCD.model"

print("[INFO] loading OCR model...")
model = load_model(model_path)
print(model_path)

image_path = r"images\afastese.PNG"

image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)

edged = cv2.Canny(blurred, 30, 150)

cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sort_contours(cnts, method="left-to-right")[0]

chars = []
            

for c in cnts:
	(x, y, w, h) = cv2.boundingRect(c)

	if (w >= 20 and w <= 150) and (h >= 15 and h <= 120):
		roi = gray[y:y + h, x:x + w]
		(t, bwImage) = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

		(tW, tH) = bwImage.shape
		totalpixels = tW * tH
		
		if cv2.countNonZero(bwImage) < totalpixels / 2:
			bwImage = np.invert(bwImage)

		if tW > tH:
			bwImage = imutils.resize(bwImage, width=32)
		else:
			bwImage = imutils.resize(bwImage, height=32)

		(tH, tW) = bwImage.shape
		dX = int(max(0, 32 - tW) / 2.0)
		dY = int(max(0, 32 - tH) / 2.0)

		padded = cv2.copyMakeBorder(t, top=dY, bottom=dY,
			left=dX, right=dX, borderType=cv2.BORDER_CONSTANT,
			value=(255, 255, 255))
		
		padded = cv2.resize(padded, (32, 32))
		padded = padded.astype("float32") / 255.0
		padded = np.expand_dims(padded, axis=-1)

		cv2.imshow("Image", padded)
		cv2.waitKey(0)

		chars.append((padded, (x, y, w, h)))

boxes = [b[1] for b in chars]
chars = np.array([c[0] for c in chars], dtype="float32")


preds = model.predict(chars)

#labelNames = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
labelNames = "ABC"
labelNames = [l for l in labelNames]

for (pred, (x, y, w, h)) in zip(preds, boxes):
	i = np.argmax(pred)
	prob = pred[i]
	if prob > 0.8:
		print("prob:",prob)
		label = labelNames[i]

		print("[INFO] {} - {:.2f}%".format(label, prob * 100))
		cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
		cv2.putText(image, label, (x - 10, y - 10),
			cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2)

cv2.imshow("Image", image)
cv2.waitKey(0)

[INFO] loading OCR model...
C:\Projetos\Mestrado\Project II\SourceCode\TextIdentificationService\model\trained_ocr_full_test_ABCD.model
prob: 0.97940356
[INFO] A - 97.94%
prob: 0.9796922
[INFO] A - 97.97%
prob: 0.97940356
[INFO] A - 97.94%
prob: 0.98329204
[INFO] A - 98.33%
prob: 0.9798395
[INFO] A - 97.98%
prob: 0.98440015
[INFO] A - 98.44%
prob: 0.98329204
[INFO] A - 98.33%
prob: 0.9835882
[INFO] A - 98.36%


-1