####**Optical Character Recognition using SVM**
Training svm model on characters of gandaki zone obtained after segmentation.

In [None]:
import os
import cv2
import numpy as np
import glob
from sklearn.externals import joblib

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from skimage.io import imread
from skimage.filters import threshold_otsu
from sklearn.model_selection import train_test_split
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import precision_recall_fscore_support as score


In [None]:
def preprocessing(image_path):
    img = cv2.imread(image_path)
    image = cv2.resize(img,(40, 40), interpolation = cv2.INTER_AREA )
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # converts each character image to binary image
    binary_image = cv2.threshold(gray, 180, 255,
    cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    # the 2D array of each image is flattened because the machine learning
    # classifier requires that each sample is a 1D array
    # therefore the 20*20 image becomes 1*400
    # in machine learning terms that's 400 features with each pixel
    # representing a feature
    flat_bin_image = binary_image.reshape(-1)

    return flat_bin_image

Loading the segmented characters dataset and then training the SVM on that dataset.

In [None]:
letters = [
            '०', '१', '२', '३', '४', '५', '६', '७', '८', '९', 'ग', 'प'
        ]

def read_training_data(training_directory):
    image_data = []
    target_data = []
    for each_letter in letters:
        path = os.path.join(training_directory, each_letter,'*' + '.jpg')
        image_paths = glob.glob(path)
        for each in range(len(image_paths)):
            image_path = os.path.join(training_directory, each_letter, str(each) + '.jpg')
            flat_bin_image = preprocessing(image_path)
            image_data.append(flat_bin_image)
            target_data.append(each_letter)
            
    return (np.array(image_data), np.array(target_data))



current_dir = '/content/drive/MyDrive/Colab Notebooks/ALPR/SISR/ALPR_Distorted/'

training_dataset_dir = '/content/drive/MyDrive/Colab Notebooks/ALPR/SISR/ALPR_Distorted/segmented_dataset_organized'

image_data, target_data = read_training_data(training_dataset_dir)

x_train, x_test, y_train, y_test = train_test_split(image_data, target_data, random_state = 0, test_size = 0.25)

classifier_predictions = svc_model.predict(x_test)
print(accuracy_score(y_test, classifier_predictions) * 100)

matrix = confusion_matrix(y_test, classifier_predictions, labels = ['०', '१', '२', '३', '४', '५', '६', '७', '८', '९', 'ग', 'प'])
print('Confusion matrix : \n ', matrix)

'''we will use the joblib module to persist the model
into files. This means that the next time we need to
predict, we don't need to train the model again'''

#This part of the code is used for saving the model after training it in the required dataset

save_directory = os.path.join(current_dir, 'models/svc/')
if not os.path.exists(save_directory):
    os.makedirs(save_directory)
joblib.dump(svc_model, save_directory+'/svc.pkl')

print(accuracy_score(y_test, classifier_predictions) * 100)
print(f1_score(y_test, classifier_predictions, average='macro') * 100)
print(recall_score(y_test, classifier_predictions, average='macro') * 100)
print(precision_score(y_test, classifier_predictions, average='macro') * 100)

In [None]:
def cross_validation(model, num_of_fold, train_data, train_label):
    ''' this uses the concept of cross validation to measure the accuracy of a model, the num_of_fold determines the type of validation
    e.g if num_of_fold is 4, then we are performing a 4-fold cross validation it will divide the dataset into 4 and use 1/4 of it for testing
    and the remaining 3/4 for the training'''

    accuracy_result = cross_val_score(model, train_data, train_label,
                                      cv=num_of_fold)
    print("Cross Validation Result for ", str(num_of_fold), " -fold")

    print(accuracy_result * 100)

cross_validation(svc_model, 4, image_data, target_data)