In [1]:
import os
import numpy as np
from skimage.filters import threshold_otsu
import cv2

letters = [
           '0','1','2','3','4','5','6','7','8','9','A','B','C','D',
           'E','F','G','H','J','K','L','M','N','P','Q','R','S','T',
           'U','V','W','X','Y','Z'
            ]
training_directory = './train20X20'

image_data = []
target_data = []

for each_letter in letters:
    for each in range(10):
        image_path = os.path.join(training_directory, each_letter, each_letter + '_' + str(each) + '.jpg')
        img_details = cv2.imread(image_path)
        gray_image = cv2.cvtColor(img_details, cv2.COLOR_BGR2GRAY)
        threshold_value = threshold_otsu(gray_image)
        binary_image = gray_image > threshold_value
        binary_image = binary_image * 255
        binary_image = binary_image.astype(np.uint8)
        
        #Reshape in to 1D array
        flat_bin_image = binary_image.reshape(-1)
        image_data.append(flat_bin_image)
        target_data.append(each_letter)

In [2]:
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import GaussianNB

gnb_model = GaussianNB()

num_of_fold = 4

# this uses the concept of cross validation to measure the accuracy
# of a model, the num_of_fold determines the type of validation
# e.g if num_of_fold is 4, then we are performing a 4-fold cross validation
# and the remaining 3/4 for the training

accuracy_result = cross_val_score(gnb_model, image_data, target_data, cv=num_of_fold)
print("Cross Validation Result for ", str(num_of_fold), " -fold")
print (accuracy_result * 100)

Cross Validation Result for  4  -fold
[91.76470588 89.41176471 90.58823529 95.29411765]


In [3]:
import pickle

#Generate GNB Model from image_data array and data label
gnb_model.fit(image_data, target_data)

#save gegnerated model into (.sav)
print("model trained.saving model..")
filename = './gnb_finalized_model.sav'
pickle.dump(gnb_model, open(filename, 'wb'), protocol=2)
print("model saved")

model trained.saving model..
model saved


In [1]:
import cv2
from skimage.filters import threshold_otsu
from skimage.measure import regionprops
from skimage.measure import label
from skimage.morphology import closing, square
from skimage.segmentation import clear_border
import numpy as np
import pickle

img = cv2.imread ('joyofdata.png') #read image

gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #convert to grayscale level image

threshold_value = threshold_otsu(gray_image) #search threshold value using otsu method
binary_image = gray_image > threshold_value #generate new array (true: value > threshold)
binary_image = binary_image * 255 # scale boolean value to 0 or 255
binary_image = binary_image.astype(np.uint8) # convert array to uint8 (8bit value per pixel)
binary_image = 255 - binary_image

# invert color (black white)
# binary_image = np.invert(binary_image)

#close connected region if distance less than 3 pixel
binary_image = closing(binary_image, square(3))

#remove artifacts connected to image border
binary_image = clear_border(binary_image)

label_img = label(binary_image)
regionprops_img = regionprops(label_img)

for region in regionprops_img:
    y0, x0, y1, x1 = region.bbox

characters= []
column_list = []
last_y0 = 0

regionprops_img = regionprops(label_img)

for region in regionprops_img:
    y0, x0, y1, x1 = region.bbox
    
    cv2.rectangle(img, (x0, y0), (x1, y1), (0,0, 255),1)
    
    roi = binary_image[y0:y1, x0:x1]
    
    resized_char = cv2.resize(roi, (20, 20))
    
    characters.append(resized_char)
    column_list.append(x0)
    
print("load data finish!")

load data finish!


In [2]:
#load saved GNB Model
filename = './gnb_finalized_model.sav'
model = pickle.load(open(filename, 'rb'))

classification_result = []

for each_character in characters:
    
    #converts it to a 1D Array
    each_character = each_character.reshape(1, -1);
    result = model.predict(each_character)
    classification_result.append(result)
    
print('Classification Result : ')
print(classification_result)

plate_string = ''
for eachPredict in classification_result:
    plate_string += eachPredict[0]
    
print("\n")
print('Predicted Character : ')
print(plate_string)

cv2.imshow("Original Image", img)
cv2.imshow("Binary Image", binary_image)

cv2.waitKey(0)
cv2.destroyAllWindows()

print("Finish")

Classification Result : 
[array(['L'], dtype='<U1'), array(['N'], dtype='<U1'), array(['1'], dtype='<U1'), array(['B'], dtype='<U1'), array(['K'], dtype='<U1'), array(['1'], dtype='<U1'), array(['D'], dtype='<U1'), array(['1'], dtype='<U1'), array(['X'], dtype='<U1'), array(['X'], dtype='<U1')]


Predicted Character : 
LN1BK1D1XX
