In [15]:
# import the necessary packages
from Models import ResNet
from Datasets import load_mnist_dataset
from Datasets import load_az_dataset
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import SGD
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import build_montages
import matplotlib.pyplot as plt
import numpy as np
import cv2
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")


In [2]:
# load the A-Z and MNIST datasets, respectively
print("[INFO] loading datasets...")
(azData, azLabels) = load_az_dataset("Datasets/a_z_handwritten_data.csv")
(digitsData, digitsLabels) = load_mnist_dataset()

[INFO] loading datasets...


In [3]:
azLabels


array([ 0,  0,  0, ..., 25, 25, 25])

In [4]:

# the MNIST dataset occupies the labels 0-9, so let's add 10 to every
# A-Z label to ensure the A-Z characters are not incorrectly labeled
# as digits
azLabels += 10

# stack the A-Z data and labels with the MNIST digits data and labels
data = np.vstack([azData, digitsData])
labels = np.hstack([azLabels, digitsLabels])



In [5]:
# each image in the A-Z and MNIST digts datasets are 28x28 pixels;
# however, the architecture we're using is designed for 32x32 images,
# so we need to resize them to 32x32
data = [cv2.resize(image, (32, 32)) for image in data]
data = np.array(data, dtype="float32")

# add a channel dimension to every image in the dataset and scale the
# pixel intensities of the images from [0, 255] down to [0, 1]
data = np.expand_dims(data, axis=-1)
data /= 255.0

In [6]:
# convert the labels from integers to vectors
le = LabelBinarizer()
labels = le.fit_transform(labels)


# account for skew in the labeled data
classTotals = labels.sum(axis=0)
classWeight = {}

In [7]:
# loop over all classes and calculate the class weight
for i in range(0, len(classTotals)):
	classWeight[i] = classTotals.max() / classTotals[i]

In [8]:
classWeight

{0: 8.376792698826597,
 1: 7.340992763742541,
 2: 8.272532188841202,
 3: 8.097605377398123,
 4: 8.473769050410317,
 5: 9.15967052114684,
 6: 8.409685863874346,
 7: 7.928835870012341,
 8: 8.472527472527473,
 9: 8.310577752227651,
 10: 4.169069935111752,
 11: 6.671089063221043,
 12: 2.4702037677816224,
 13: 5.7060390763765545,
 14: 5.0546328671328675,
 15: 49.72055030094583,
 16: 10.035577924331829,
 17: 8.011221945137157,
 18: 51.629464285714285,
 19: 6.808548216178029,
 20: 10.320364090665715,
 21: 4.990937338166753,
 22: 4.6875,
 23: 3.0418200946870066,
 24: 1.0,
 25: 2.989762680316426,
 26: 9.94924294562973,
 27: 4.999567698426422,
 28: 1.1942625828703608,
 29: 2.5705712380529007,
 30: 1.993415609487038,
 31: 13.827116212338593,
 32: 5.362110534124629,
 33: 9.21954719387755,
 34: 5.3250759738465785,
 35: 9.51695194206715}

In [9]:
# partition the data into training and testing splits using 80% of
# the data for training and the remaining 20% for testing
(trainX, testX, trainY, testY) = train_test_split(data,labels, test_size=0.20, stratify=labels, random_state=42)


In [10]:
# construct the image generator for data augmentation
Img_aug = ImageDataGenerator(rotation_range=10,zoom_range=0.05,width_shift_range=0.1,height_shift_range=0.1,shear_range=0.15,horizontal_flip=False,
                         fill_mode="nearest")

In [11]:
# initialize the number of epochs to train for, initial learning rate,
# and batch size
Epochs = 50
Learning_rate = 1e-1
Batch_Size = 128



In [12]:
#initialize and compile our deep neural network
print("[INFO] compiling model...")
opt = SGD(lr=Learning_rate, decay=Learning_rate / Epochs)
model = ResNet.build(32, 32, 1, len(le.classes_), (3, 3, 3),(64, 64, 128, 256), reg=0.0005)
model.compile(loss="categorical_crossentropy", optimizer=opt,metrics=["accuracy"])

[INFO] compiling model...


In [13]:
# train the network
#print("[INFO] training network...")
#H = model.fit(aug.flow(trainX, trainY, batch_size=Batch_Size),validation_data=(testX, testY),
#              steps_per_epoch=len(trainX) // Batch_Size,epochs=EPOCHS,
#              class_weight=classWeight,
#              verbose=1)


In [14]:
# define the list of label names
labelNames = "0123456789"
labelNames += "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
labelNames = [l for l in labelNames]

#### Load Model

In [23]:
# load the handwriting OCR model
print("[INFO] loading handwriting OCR model...")
model = load_model("Models/handwriting.model")

[INFO] loading handwriting OCR model...


In [25]:
# evaluate the network
print("[INFO] evaluating network...")
predictions = model.predict(testX, batch_size=Batch_Size)
print(classification_report(testY.argmax(axis=1),predictions.argmax(axis=1), target_names=labelNames))


[INFO] evaluating network...
              precision    recall  f1-score   support

           0       0.52      0.51      0.51      1381
           1       0.97      0.98      0.97      1575
           2       0.87      0.96      0.92      1398
           3       0.98      0.99      0.99      1428
           4       0.90      0.95      0.92      1365
           5       0.87      0.88      0.88      1263
           6       0.95      0.98      0.96      1375
           7       0.96      0.99      0.97      1459
           8       0.95      0.98      0.96      1365
           9       0.96      0.98      0.97      1392
           A       0.98      0.99      0.99      2774
           B       0.98      0.98      0.98      1734
           C       0.99      0.99      0.99      4682
           D       0.95      0.95      0.95      2027
           E       0.99      0.99      0.99      2288
           F       0.99      0.96      0.97       232
           G       0.97      0.93      0.95      115

In [26]:
# save the model to disk
print("[INFO] serializing network...")
model.save("handwritting", save_format="h5")

[INFO] serializing network...


In [31]:
# construct a plot that plots and saves the training history
N = np.arange(0, Epochs)
plt.style.use("ggplot")
plt.figure()
plt.plot(N, H.history["loss"], label="train_loss")
plt.plot(N, H.history["val_loss"], label="val_loss")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig("plot")

In [34]:
# initialize our list of output images
images = []

# randomly select a few testing characters
for i in np.random.choice(np.arange(0, len(testY)), size=(49,)):
	# classify the character
	probs = model.predict(testX[np.newaxis, i])
	prediction = probs.argmax(axis=1)
	label = labelNames[prediction[0]]

	# extract the image from the test data and initialize the text
	# label color as green (correct)
	image = (testX[i] * 255).astype("uint8")
	color = (0, 255, 0)

	# otherwise, the class label prediction is incorrect
	if prediction[0] != np.argmax(testY[i]):
		color = (0, 0, 255)

	# merge the channels into one image, resize the image from 32x32
	# to 96x96 so we can better see it and then draw the predicted
	# label on the image
	image = cv2.merge([image] * 3)
	image = cv2.resize(image, (96, 96), interpolation=cv2.INTER_LINEAR)
	cv2.putText(image, label, (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75,
		color, 2)

	# add the image to our list of output images
	images.append(image)

# construct the montage for the images
montage = build_montages(images, (96, 96), (7, 7))[0]

# show the output montage
cv2.imshow("OCR Results", montage)
cv2.waitKey(0)
cv2.destroyAllWindows()