# Character Recognition/classification CNN model

In [None]:
# import the pre-trained CNN and its related preprocessing method
from tensorflow.keras.applications.xception import Xception, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import Dense, Conv2D, Flatten, GlobalAveragePooling2D, MaxPooling2D, SpatialDropout2D
from tensorflow.keras.models import Model
import os
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Pre-process dataset

The dataset is formatted into image array list and labels list, and are subsequntly split into train, test and validation datasets.

In [None]:
import os
import pathlib
from PIL import Image 
import numpy as np
import matplotlib.pyplot as plt
import cv2

def load_data(fpath):    
    img=Image.open(fpath).resize((71,71)) 
    img = np.asarray(img, dtype='float32')

    return img

# Load images as np arrays for easier manipulation
rootdir = 'C:/Users/patri/Desktop/characters/chars/'
print(rootdir)
images=[]
labels=[]
total=34000
j=1
i=0

for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        images.append(load_data(os.path.join(subdir, file)))
        labels.append(j)
        i += 1
        if i % 1000 == 0: j += 1



In [None]:
from sklearn.model_selection import train_test_split 
images = np.asarray(images) 
labels = np.asarray(labels).reshape(34000,1) 
images.shape

# Split data into training and test. Since this is a very small dataset, a 85/15 split was deemed best to avoid overfitting.
from sklearn.model_selection import train_test_split 

X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=104)

X_train, X_val, y_train, y_val  = train_test_split(X_train, y_train, test_size=0.25, random_state=1)

print(X_train.shape)
print(y_train.shape)

In [None]:
# Dividing images into train and test folders by creating images from arrays
import cv2
import numpy as np
def create_images(data, labels, folder):
    dirname=folder
    
    if not os.path.exists(dirname):
      os.mkdir(dirname)
    n=0
    
    for i in data:
      label_n=labels[n]
      subfolder = folder + "/" + str(label_n) 
      if not os.path.exists(subfolder):
          os.mkdir(subfolder)  
      filepath =  subfolder + "/" + str(n)+ ".jpg"
      cv2.imwrite(filepath, data[n]) 
      n+=1

# Save images to corresponding subfolders
# create_images(X_train, y_train, 'C:/Users/patri/Desktop/characters/train/') 
# create_images(X_test, y_test, 'C:/Users/patri/Desktop/characters/test/')
create_images(X_val, y_val, 'C:/Users/patri/Desktop/characters/validation/')

# Initialise datagenerators for image augmentation

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_dir =os.path.realpath('C:/Users/patri/Desktop/characters/train/')
test_dir = os.path.realpath('C:/Users/patri/Desktop/characters/test/')

image_size = 71

# Configure data augmentation parameters
train_datagen = ImageDataGenerator(
      rescale=1./255,
      horizontal_flip=True,
      rotation_range=10,
      width_shift_range=0.2,
      height_shift_range=0.2,
      fill_mode='nearest'
      )
 
test_datagen = ImageDataGenerator(rescale=1./255)

validation_datagen = ImageDataGenerator(rescale=1./255)

train_batchsize = 32
val_batchsize = 10
 
# Fit the train and test images onto the data generators to augment them.
train_generator = train_datagen.flow_from_directory( 
        train_dir,
        target_size=(image_size, image_size),
        batch_size=train_batchsize,
       )
 
test_generator = validation_datagen.flow_from_directory(
        test_dir,
        target_size=(image_size, image_size),
        batch_size=val_batchsize,
        shuffle=False)

# Define CNN model architecture

In [None]:
def create_model():

    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(71, 71, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.Flatten())
    
    # Add fully connected layers and final output layer
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(34, activation='softmax'))
    model.summary()
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

In [None]:
import tensorflow as tf
from tensorflow.keras import models, layers,optimizers
import tensorflow.keras.applications.vgg16
from tensorflow.python.keras.applications.vgg16 import preprocess_input, decode_predictions
from tensorflow.python.keras.layers import Input, Dense

checkpoint_path = 'C:/Users/patri/Desktop/characters/model_checkpoints/model.{epoch:02d}-acc{val_accuracy:.4f}.h5'

# LR schedule - reduce learning rate on loss plateau
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.2, patience=4, min_lr=0.0001)

callbacks = [reduce_lr, tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5), tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=False,
                                                 verbose=1)]
model = create_model()

history = model.fit(
      train_generator,
      shuffle=True,
      steps_per_epoch=train_generator.samples/train_generator.batch_size ,
      epochs=50,
      validation_data=test_generator,
      validation_steps=test_generator.samples/test_generator.batch_size,
      callbacks=callbacks,
      verbose=1)


# Model evaluation metrics

In [4]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score
import matplotlib.pyplot as plt
import tensorflow as tf

# print('training acc.:', history.history['accuracy'][-1], '\n','test acc.:', (history.history['val_accuracy'])[-1])

loaded_model = tf.keras.models.load_model('C:/Users/patri/Desktop/characters/model_checkpoints/model.10-acc1.0000.h5')

# plot accuracy hisory
def plot_history(history):
 plt.figure()
 plt.xlabel('Epoch')
 plt.ylabel('Accuracy %')
 plt.plot(history.epoch, np.array(history.history['accuracy']),
 label='Train Accuracy')
 plt.plot(history.epoch, np.array(history.history['val_accuracy']),
 label = 'Val Accuracy')
 plt.legend()
 plt.ylim([0.5, 1])

plot_history(history)

NameError: name 'history' is not defined

# Generate predictions

In [12]:
import os
import numpy as np
from matplotlib import pyplot as plt
import random
from PIL import Image
from matplotlib import cm
from sklearn import metrics
%matplotlib inline 

dictionary = {0:'0', 1:'1', 2 :'2', 3:'3', 4:'4', 5:'5', 6:'6', 7:'7', 8:'8', 9:'9', 10:'A',
11:'B', 12:'C', 13:'D', 14:'E', 15:'F', 16:'G', 17:'H', 18:'I', 19:'J', 20:'K',
21:'L', 22:'M', 23:'N', 24:'P', 25:'Q', 26:'R', 27:'S', 28:'T', 29:'U',
30:'V', 31:'W', 32:'X', 33:'Y', 34:'Z'}

def cnnCharRecognition(img):
    image = img / 255.0
    image = np.reshape(image, (1,71,71,3))
    new_predictions = loaded_model.predict(image)
    char = np.argmax(new_predictions)
    return dictionary[char]

path = "C:/Users/patri/Desktop/characters/val_plate/"
numberplate = []
counter = 0
for image_path in os.listdir(path):
    full_image_path = os.path.join(path, image_path)
    img=Image.open(full_image_path).resize((71,71)) 
    img = np.asarray(img, dtype='float32')
    pred = cnnCharRecognition(img)
    numberplate.append(pred)

print("Numberplate: " + str(numberplate))

Numberplate: ['9', 'R', 'A', 'L', 'K', 'X']


# Character localization and image pre-processing for real numberplates

In [10]:
import os
import numpy as np
import cv2
# Apply canny edge detection 
def auto_canny(image, sigma=0.33):
    v = np.median(image)
    lower = int(max(0, (1.0 - sigma) * v))
    upper = int(min(255, (1.0 + sigma) * v))
    edged_image = cv2.Canny(image, lower, upper)
 
    return edged_image

# Crops characters out of numerplate 
def crop_ctrs(img):
    ret, mask = cv2.threshold(grayimage, 254, 255, cv2.THRESH_BINARY)

    cv2.imshow('mask', mask)
    cv2.waitKey(0)

    image, contours, hierarchy = cv2.findContours(mask, cv2.RETR_LIST, 
    cv2.CHAIN_APPROX_SIMPLE)

    for contour in contours:

        if cv2.contourArea(contour) < 200:
            continue

        rect = cv2.minAreaRect(contour)
        box = cv2.boxPoints(rect)

        ext_left = tuple(contour[contour[:, :, 0].argmin()][0])
        ext_right = tuple(contour[contour[:, :, 0].argmax()][0])
        ext_top = tuple(contour[contour[:, :, 1].argmin()][0])
        ext_bot = tuple(contour[contour[:, :, 1].argmax()][0])

        roi_corners = np.array([box], dtype=np.int32)

        cv2.polylines(bounding_box_image, roi_corners, 1, (255, 0, 0), 3)
        cv2.imshow('image', bounding_box_image)
        cv2.waitKey(0)

        cropped_image = grayimage[ext_top[1]:ext_bot[1], ext_left[0]:ext_right[0]]
        cv2.imwrite('crop.jpg', cropped_image)

path = "C:/Users/patri/Desktop/characters/test_plate/"
bounding_boxes = []
counter = 0
for image_path in os.listdir(path):
    if counter < 200:
        full_image_path = os.path.join(path, image_path)
        img = cv2.imread(full_image_path)

        # Loop through each image, apply pre-processing & localize onto each character
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        thresh_inv = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY_INV,39,1)
        edges = auto_canny(thresh_inv)
        ctrs, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])

        img_area = img.shape[0]*img.shape[1]

        # Get bounding box co-ordinates for image cropping
        for i, ctr in enumerate(sorted_ctrs):
            x, y, w, h = cv2.boundingRect(ctr)
            roi_area = w*h
            roi_ratio = roi_area/img_area

            if((roi_ratio >= 0.04) and (roi_ratio < 0.16)):
                    if ((h>0.9*w) and (2.8*w>=h)):
                        cv2.rectangle(img,(x,y),( x + w, y + h ),(90,0,255), 1)
                        bounding_boxes.append((x,y,w,h))
                        counter += 1

print(bounding_boxes)
# Crop bounding boxes and save into new dir
count = 0
for box in bounding_boxes:
        x,y,w,h = box
        ROI = img[y:y+h, x:x+w]
        cv2.imwrite("C:/Users/patri/Desktop/characters/test_plate/char_{}.png".format(str(count)), ROI)
        count += 1

[(8, 3, 3, 6), (0, 5, 7, 15), (1, 21, 6, 12), (0, 0, 14, 30), (31, 3, 12, 24), (0, 13, 11, 11), (15, 33, 12, 13), (48, 11, 14, 32), (36, 10, 27, 33), (78, 9, 29, 33), (111, 9, 28, 33), (143, 9, 27, 33), (175, 9, 26, 34), (205, 7, 28, 37), (4, 4, 62, 57), (9, 13, 34, 40), (49, 14, 34, 44), (105, 15, 36, 42), (145, 18, 36, 40), (186, 20, 34, 40), (228, 15, 23, 46), (239, 14, 24, 50), (283, 18, 24, 46)]
