In [1]:
# import the necessary packages
import numpy as np
import pandas as pd
import tensorflow
import keras
import imutils
import cv2
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,BatchNormalization,Dropout,Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import seaborn as sn
%matplotlib inline
import warnings
warnings.simplefilter('ignore')

In [2]:
#Training our OCR Model using Keras and TensorFlow

class ResNet:
    def build(width,height,depth,classes):
        inputshape=(height,width,depth)
        model=Sequential()
        model.add(Conv2D(32,(5,5),activation='relu',padding='same',input_shape=inputshape))
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Conv2D(64,(3,3),activation='relu',padding='same'))
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Conv2D(128,(3,3),activation='relu',padding='same'))
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Flatten())
        model.add(Dense(128,activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))
        model.add(Dense(64,activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))
        model.add(Dense(classes,activation='softmax'))
        return model

In [3]:
# Functions for loading the A-Z and MNIST datasets, respectively
def load_az_dataset(datasetPath):
    data=[]
    label=[]
    for row in open(datasetPath):
        row=row.split(',')
        labels=int(row[0])
        image=np.array([int(x) for x in row[1:]],dtype='uint8')
        
        image=image.reshape((28,28))
        
        data.append(image)
        label.append(labels)
        
    data=np.array(data,dtype='float32')
    label=np.array(label,dtype='int')
    
    return (data,label)

In [4]:
from tensorflow.keras.datasets import mnist

def load_mnist_dataset():
    ((trainData,trainLabels),(testData,testLabels))=mnist.load_data()
    
    data=np.vstack([trainData,testData])
    labels=np.hstack([trainLabels,testLabels])
    
    return (data,labels)

In [5]:
args={'a-z_dataset':r"F:\Computer Vision 2\HandWriting Recognition\archive (4)\A_Z Handwritten Data\A_Z Handwritten Data.csv"}

In [6]:
Num_epochs=50
lr=1e-1
bs=128

In [7]:
print('[INFO] loading databases.....')
(azData,azLabels)=load_az_dataset(args['a-z_dataset'])
(digitsData,digitsLabels)=load_mnist_dataset()

[INFO] loading databases.....


In [8]:
# the MNIST dataset occupies the labels 0-9, so let's add 10 to every
# A-Z label to ensure the A-Z characters are not incorrectly labeled
# as digits
azLabels+=10

data=np.vstack([azData,digitsData])
labels=np.hstack([azLabels,digitsLabels])

data=[cv2.resize(image,(32,32)) for image in data]
data=np.array(data,dtype='float32')

data=np.expand_dims(data,axis=-1)
data/=255.0

from sklearn.preprocessing import LabelBinarizer
lb=LabelBinarizer()
labels=lb.fit_transform(labels)
counts=labels.sum(axis=0)


# account for skew in the labeled data
classTotals=labels.sum(axis=0)
classWeight={}

for i in range(0,len(classTotals)):
    classWeight[i]=classTotals.max()/classTotals[i]
    
from sklearn.model_selection import train_test_split
(trainX,testX,trainY,testY)=train_test_split(data,labels,test_size=0.20,stratify=labels,random_state=42)

In [9]:
# construct the image generator for data augmentation
aug=ImageDataGenerator(rotation_range=10,
    zoom_range=0.05,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.15,
    horizontal_flip=False,
    fill_mode="nearest")

In [10]:
opt=Adam(learning_rate=lr)
model=ResNet.build(width=32,height=32,depth=1,classes=len(lb.classes_))

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 32, 32, 32)        832       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 16, 16, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 16, 16, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 8, 8, 64)         0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 8, 8, 128)         73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 4, 4, 128)        0

In [12]:
model.compile(loss='categorical_crossentropy',optimizer=opt,metrics=['accuracy'])

In [13]:
H=model.fit(aug.flow(trainX,trainY,batch_size=bs),
            validation_data=(testX,testY),
            steps_per_epoch=len(trainX)//bs,
             epochs=2,
             class_weight=classWeight,
             verbose=1)

Epoch 1/2
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Epoch 2/2


In [14]:
labelNames='0123456789'
labelNames+='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
labelNames=[l for l in labelNames]

In [15]:
from sklearn.metrics import classification_report
from numpy import argmax
preds=model.predict(testX,batch_size=bs)
print(classification_report(testY.argmax(axis=1),preds.argmax(axis=1),target_names=labelNames))

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
              precision    recall  f1-score   support

           0       0.10      0.57      0.16      1381
           1       0.99      0.91      0.95      1575
           2       0.67      0.97      0.79      1398
           3       0.84      0.96      0.89      1428
           4       0.81      0.92      0.86      1365
           5       0.22      0.92      0.36      1263
           6       0.97      0.87      0.91      1375
           7       0.76      0.95      0.84      1459
           8       0.57      0.98      0.72      1365
           9       0.84  

In [16]:
#code for our visualization procedure so we can see whether our model is working or not

from imutils import build_montages
images=[]

for i in np.random.choice(np.arange(0,len(testY)),size=(49,)):
    probs=model.predict(testX[np.newaxis,i])
    prediction=probs.argmax(axis=1)
    label=labelNames[prediction[0]]
    
    image=(testX[i]*255).astype('uint8')
    color=(0,255,0)
    
    if prediction[0]!=np.argmax(testY[i]):
        color=(0,0,255)
        
    # merge the channels into one image, resize the image from 32x32
    # to 96x96 so we can better see it and then draw the predicted
    # label on the image
    image=cv2.merge([image]*3)
    image=cv2.resize(image,(96,96),interpolation=cv2.INTER_LINEAR)
    cv2.putText(image,label,(5,20),cv2.FONT_HERSHEY_SIMPLEX,0.75,color,2)
    
    images.append(image)
    
montage=build_montages(images,(96,96),(7,7))[0]

cv2.imshow('Montages',montage)
cv2.waitKey(0)
    

-1

In [17]:
#Save the model
model.save('Handwritten_recog_26042023.h5')

In [18]:
from tensorflow.keras.models import load_model
from imutils.contours import sort_contours

In [19]:
#load the model
my_model=load_model('Handwritten_recog_26042023.h5')

In [28]:
args={'input_image':r"F:\Dog images\0Jl54.png"}

In [29]:
image=cv2.imread(args['input_image'])
gray=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
blurred=cv2.GaussianBlur(gray,(5,5),0)


# perform edge detection, find contours in the edge map, and sort the
# resulting contours from left-to-right
edged=cv2.Canny(blurred,30,150)
cnts=cv2.findContours(edged.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts=imutils.grab_contours(cnts)
cnts=sort_contours(cnts,method='left-to-right')[0]

chars=[]

for c in cnts:
    (x,y,w,h)=cv2.boundingRect(c)
    if (w>=5 and w<=150) and (h>=15 and h<=120):
        roi=gray[y:y+h,x:x+w]
        thresh=cv2.threshold(roi,0,255,cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)[1]
        (tW,tH)=thresh.shape
        
        if tW>tH:
            thresh=imutils.resize(thresh,width=32)
        else:
            thresh=imutils.resize(thresh,height=32)
            
        # re-grab the image dimensions (now that its been resized)
        # and then determine how much we need to pad the width and
        # height such that our image will be 32x32
        
        (tW,tH)=thresh.shape
        dx=int(max(0,32-tW)/2.0)
        dy=int(max(0,32-tH)/2.0)
        
        padded=cv2.copyMakeBorder(thresh,top=dy,bottom=dx,left=dx,right=dx,borderType=cv2.BORDER_CONSTANT,value=(0,0,0))
        
        
        # pad the image and force 32x32 dimensions
        padded=cv2.resize(padded,(32,32))
        padded=padded.astype('float32')/255.0
        padded=np.expand_dims(padded,axis=-1)
        
        chars.append((padded,(x,y,w,h)))
        
        
boxes=[b[1] for b in chars]
chars=np.array([c[0] for c in chars],dtype='float32')

#Applying our OCR model to handwriting recognition
preds=my_model.predict(chars)

labelNames='0123456789'
labelNames+='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
labelNames=[l for l in labelNames]     

for (pred,(x,y,w,h)) in zip(preds,boxes):
    i=np.argmax(pred)
    probs=pred[i]
    label=labelNames[i]
    
    print("[INFO]{}-{:.2f}%".format(label,probs*100))
    cv2.rectangle(image,(x,y),(x+w,y+h),(0,255,0),2)
    cv2.putText(image,label,(x-10,y-10),cv2.FONT_HERSHEY_SIMPLEX,0.75,(0,255,0),2)
    
    cv2.imshow('Image',image)
    cv2.waitKey(0)

[INFO]2-100.00%
[INFO]Q-73.84%
[INFO]Q-84.18%
[INFO]E-56.29%
[INFO]2-84.87%
[INFO]2-68.53%
[INFO]E-94.37%
[INFO]8-100.00%
[INFO]E-100.00%
[INFO]E-100.00%
[INFO]S-100.00%
[INFO]D-95.71%
[INFO]M-78.02%
[INFO]E-97.91%
[INFO]D-93.50%
[INFO]W-100.00%
[INFO]W-100.00%
[INFO]E-90.55%
[INFO]8-83.32%
[INFO]Q-81.43%
[INFO]E-99.78%
[INFO]2-85.20%
[INFO]2-99.78%
[INFO]Q-99.16%
[INFO]2-100.00%
[INFO]D-94.05%
[INFO]2-89.36%
[INFO]5-71.82%
[INFO]5-60.42%
[INFO]5-54.09%
[INFO]E-99.96%
[INFO]2-98.68%
[INFO]Q-99.93%
[INFO]E-99.92%
[INFO]E-97.44%
[INFO]E-87.45%
[INFO]S-100.00%
[INFO]2-57.61%
[INFO]Q-97.51%
[INFO]S-68.34%
[INFO]E-94.19%
[INFO]Q-99.70%
[INFO]8-97.42%
[INFO]2-66.03%
[INFO]D-97.27%
[INFO]Q-99.55%
[INFO]W-100.00%
[INFO]E-99.16%
[INFO]2-75.02%
[INFO]2-89.60%
[INFO]I-50.48%
[INFO]2-90.58%
[INFO]B-88.14%
[INFO]2-100.00%
[INFO]2-85.08%
[INFO]5-61.25%
[INFO]W-100.00%
[INFO]2-90.64%
[INFO]5-53.24%
[INFO]E-98.14%
[INFO]8-97.65%
[INFO]I-97.50%
[INFO]5-61.61%
[INFO]E-93.80%
[INFO]2-100.00%
[INFO]2-86.7