In [1]:
import numpy as np
from preprocessing import *
from math import sin,cos,pi
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
import os
from itertools import chain, combinations

In [8]:
def get_Reference_Line(binarized):
    hist = np.sum(1-binarized/255,axis=1)
    referenceline = np.argmax(hist)
    return referenceline

def get_BlackWhiteRatio(binarized):
    hImg, wImg = binarized.shape
    blackCount = np.sum(binarized==0)
    whiteCount = max(1,np.sum(binarized==255))
    return blackCount/whiteCount

def get_Components(binarized):
    contours,_ = cv2.findContours(255-binarized, cv2.RETR_EXTERNAL,  cv2.CHAIN_APPROX_NONE)
    return contours,len(contours)

def get_CountContours(contours,referenceline):
    countAbove,countBelow=0,0
    for cnt in contours:
        x,y,w,h  = cv2.boundingRect(cnt)
        if y+h <= referenceline:countAbove+=1
        if y > referenceline:countBelow+=1
        
    return countAbove,countBelow

def dist(x, y, x1, y1):
    return ((x - x1)**2)**(0.5) + ((y - y1)**2) ** (0.5)

def get_Orientation(contours,binarized):
    hImg, wImg = binarized.shape
    test = np.zeros((hImg,wImg,3))
    test[:,:,0] = binarized
    test[:,:,1] = binarized
    test[:,:,2] = binarized
    anglesSum = 0
    for cnt in contours:
        if cnt.shape[0] > 5:
            x,y,_,_  = cv2.boundingRect(cnt)
            ellipse = cv2.fitEllipse(cnt)
            (xc,yc),(d1,d2),angle = ellipse
            angle = 90 - angle
            anglesSum += angle
            cv2.putText(test,f'{int(angle)}',(x,y),0,0.25,(0,0,255)) 
            cv2.ellipse(test, ellipse, (255,0, 255), 1, cv2.LINE_AA)
    anglesMean = anglesSum/len(contours)
    cv2.imwrite("withEllipses.png", test)
    return anglesMean
def getHVSL(edge_image, img, name=""):
    fld = cv2.ximgproc.createFastLineDetector()
    lines = fld.detect(edge_image.astype('uint8'))
    no_of_horizontal_lines = 0.0
    no_of_vertical_lines = 0.0
    for line in lines:
        x0 = int(round(line[0][0]))
        y0 = int(round(line[0][1]))
        x1 = int(round(line[0][2]))
        y1 = int(round(line[0][3]))
        d = dist(x0, y0, x1, y1)
        if d > 10: #You can adjust the distance
            if np.abs(x0 - x1) >= 0 and np.abs(x0 - x1) <= 3:
                no_of_vertical_lines += 1 
                #cv2.line(img, (x0, y0), (x1, y1), (255, 0, 0), 1, cv2.LINE_AA)
            if np.abs(y0 - y1) >= 0 and np.abs(y0 - y1) <= 3:
                no_of_horizontal_lines += 1
                #cv2.line(img, (x0, y0), (x1, y1), (0, 0, 255), 1, cv2.LINE_AA)
    if no_of_horizontal_lines == 0:
        no_of_horizontal_lines = 1
    return no_of_vertical_lines,no_of_horizontal_lines

def LaplacianEdge(img_binary,name=""):
    kernel_size = 3
    ddepth = cv2.CV_16S
    img_edge = cv2.Laplacian(img_binary, ddepth, ksize=kernel_size)
    img_edge = 255 - img_edge
    #cv2.imwrite(f"output/{name}_final_otsu.png",img_edge)
    return img_edge

def get_Features(binarized):
    hImg, wImg = binarized.shape

    referenceline = get_Reference_Line(binarized)
    blackwhiteRatioTotal = get_BlackWhiteRatio(binarized)

    imgAboveRef = binarized[:referenceline,:]
    imgBelowRef = binarized[referenceline:,:]

    blackwhiteRatioAbove = get_BlackWhiteRatio(imgAboveRef)
    blackwhiteRatioBelow = get_BlackWhiteRatio(imgBelowRef)

    contoursTotal, contoursTotalCount = get_Components(binarized)

    contoursAboveCount,contoursBelowCount = get_CountContours(contoursTotal,referenceline)
    DenistyAbove = contoursAboveCount/contoursTotalCount
    DenistyBelow = contoursBelowCount/contoursTotalCount
    orientation = get_Orientation(contoursTotal,binarized)
    
    #########aya
    edges = LaplacianEdge(binarized)
    
    verticalCount,horizontalCount = getHVSL(edges, img, name="")
    

    features = [referenceline/hImg,blackwhiteRatioTotal,blackwhiteRatioAbove,blackwhiteRatioBelow,orientation,DenistyAbove,DenistyBelow,verticalCount,horizontalCount]
    return features

In [9]:
base_dir='ACdata_base/'

fonts = os.listdir(base_dir)
X=[]
Y=[]
for font in fonts:
    data = os.listdir(base_dir+font)
    print("curFont",font)
    for img in data:
        img_dir = base_dir+font+'/'+img
        imgGray = cv2.imread(img_dir, cv2.IMREAD_GRAYSCALE)
        binarizedImg =  Binarize_Histogram(imgGray,img_dir)
        features = get_Features(binarizedImg)
        X.append(features)
        Y.append(int(font))


curFont 8
curFont 5
curFont 6
curFont 7
curFont 9
curFont 3
curFont 2
curFont 1
curFont 4


In [16]:
X = np.array(X)
Y = np.array(Y)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=4, stratify=Y)
clf_tree = DecisionTreeClassifier(max_depth=9, random_state=1)
clf_tree = clf_tree.fit(x_train, y_train)
print("Finished Fitting Tree")
y_pred_test = clf_tree.predict(x_test)
accuracy_test = np.mean(y_test==y_pred_test)
y_pred_train = clf_tree.predict(x_train)
accuracy_train = np.mean(y_train==y_pred_train)
print(f"Tree :  accuracy_train: {accuracy_train}, accuracy_test: {accuracy_test} ")

Finished Fitting Tree
Tree :  accuracy_train: 0.8907363420427553, accuracy_test: 0.6706161137440758 


In [11]:
def Train_NN(X,Y,hidden):
    print("Start Training ")
    N = len(X[0])
    X = np.array(X)
    Y = np.array(Y)
    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=4, stratify=Y)
    clf = MLPClassifier(alpha=1e-05, hidden_layer_sizes=(hidden),random_state=1,solver='lbfgs',max_iter=10000)
    clf = clf.fit(x_train, y_train)
    print("Finished Fitting")
    y_pred_test = clf.predict(x_test)
    accuracy_test = np.mean(y_test==y_pred_test) * 100
    
    y_pred_train = clf.predict(x_train)
    accuracy_train = np.mean(y_train==y_pred_train) * 100
    print(f"accuracy_train: {accuracy_train}, accuracy_test: {accuracy_test} with {N} features and {hidden} layers" )
    return accuracy_train,accuracy_test

In [18]:
def powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
N = len(X[0])
features_x = list(range(N))
features_powerset = list(powerset(features_x))

In [19]:
def reduce_features(X,featureset):
    Xnew=[]
    featureset = list(featureset)
    for x in X:
        Xnew.append(x[featureset])
    return Xnew


accuracies=[]
for featureset in features_powerset:
    accuracy_set=0
    if len(featureset)>0:
        newX = reduce_features(X,featureset)
#         print("newX",newX)
        #N = len(newX[0])
        #for i in range(N,N*2):
        accuracy = Train_NN(newX,Y,9)
        accuracies.append(accuracy)

Start Training 
Finished Fitting
accuracy_train: 26.207442596991292, accuracy_test: 26.540284360189574 with 1 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 45.209817893903406, accuracy_test: 42.89099526066351 with 1 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 42.517814726840854, accuracy_test: 38.62559241706161 with 1 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 39.82581155977831, accuracy_test: 34.360189573459714 with 1 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 34.52098178939034, accuracy_test: 33.175355450236964 with 1 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 32.779097387173394, accuracy_test: 36.72985781990521 with 1 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 23.198733174980205, accuracy_test: 20.85308056872038 with 1 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 11.559778305621537, accuracy_test

Finished Fitting
accuracy_train: 57.00712589073634, accuracy_test: 54.02843601895735 with 3 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 49.16864608076009, accuracy_test: 47.1563981042654 with 3 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 50.3562945368171, accuracy_test: 46.208530805687204 with 3 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 51.86064924782264, accuracy_test: 45.7345971563981 with 3 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 55.2652414885194, accuracy_test: 51.8957345971564 with 3 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 50.1187648456057, accuracy_test: 43.3649289099526 with 3 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 51.22723673792557, accuracy_test: 45.023696682464454 with 3 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 38.71733966745843, accuracy_test: 37.677725118483416 with 3 fe

Finished Fitting
accuracy_train: 52.01900237529691, accuracy_test: 52.3696682464455 with 3 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 55.027711797308, accuracy_test: 51.8957345971564 with 3 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 44.10134600158353, accuracy_test: 41.23222748815166 with 3 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 59.69912905779889, accuracy_test: 56.39810426540285 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 65.32066508313538, accuracy_test: 63.74407582938388 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 69.91290577988916, accuracy_test: 62.796208530805686 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 63.024544734758514, accuracy_test: 58.767772511848335 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 44.576405384006335, accuracy_test: 41.943127962085306 with

Finished Fitting
accuracy_train: 59.69912905779889, accuracy_test: 59.47867298578199 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 72.3673792557403, accuracy_test: 70.37914691943128 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 64.8456057007126, accuracy_test: 64.21800947867298 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 68.80443388756929, accuracy_test: 67.06161137440758 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 64.60807600950119, accuracy_test: 57.81990521327014 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 69.51702296120348, accuracy_test: 67.77251184834124 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 44.4972288202692, accuracy_test: 43.838862559241704 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 65.63737133808392, accuracy_test: 59.71563981042654 with 4 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Finished Fitting
accuracy_train: 71.41726049089469, accuracy_test: 67.06161137440758 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 64.60807600950119, accuracy_test: 61.37440758293838 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 63.895486935866984, accuracy_test: 58.0568720379147 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 55.5027711797308, accuracy_test: 51.421800947867304 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 37.13380839271576, accuracy_test: 32.93838862559242 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 60.96595407759303, accuracy_test: 54.502369668246445 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 68.17102137767222, accuracy_test: 64.69194312796208 with 4 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 59.9366587490103, accuracy_test: 53.08056872037915 with 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Finished Fitting
accuracy_train: 68.72525732383214, accuracy_test: 62.55924170616114 with 5 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 71.49643705463184, accuracy_test: 65.63981042654028 with 5 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 57.08630245447348, accuracy_test: 54.502369668246445 with 5 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 60.25336500395883, accuracy_test: 58.29383886255924 with 5 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 65.16231195566112, accuracy_test: 63.03317535545023 with 5 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 75.21773555027713, accuracy_test: 73.22274881516587 with 5 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 63.81631037212985, accuracy_test: 61.84834123222749 with 5 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 61.282660332541575, accuracy_test: 57.81990521327014 wit

Finished Fitting
accuracy_train: 76.08867775138559, accuracy_test: 74.40758293838863 with 5 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 76.72209026128266, accuracy_test: 75.59241706161137 with 5 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 70.78384798099762, accuracy_test: 66.35071090047393 with 5 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 75.93032462391133, accuracy_test: 71.32701421800948 with 5 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 60.09501187648456, accuracy_test: 55.45023696682464 with 5 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 67.2209026128266, accuracy_test: 61.84834123222749 with 5 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 71.41726049089469, accuracy_test: 67.29857819905213 with 5 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 70.38796516231196, accuracy_test: 65.87677725118483 with 5

Finished Fitting
accuracy_train: 74.03008709422011, accuracy_test: 67.77251184834124 with 6 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 65.63737133808392, accuracy_test: 63.74407582938388 with 6 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 75.85114806017418, accuracy_test: 71.80094786729858 with 6 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 66.0332541567696, accuracy_test: 65.16587677725119 with 6 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 76.5637371338084, accuracy_test: 72.03791469194313 with 6 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 65.55819477434679, accuracy_test: 61.137440758293835 with 6 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 77.0387965162312, accuracy_test: 74.17061611374407 with 6 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 74.10926365795724, accuracy_test: 69.90521327014217 with 6 

Finished Fitting
accuracy_train: 79.88915281076801, accuracy_test: 77.48815165876776 with 7 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 73.71338083927158, accuracy_test: 69.90521327014217 with 7 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 51.22723673792557, accuracy_test: 46.208530805687204 with 7 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 79.73079968329374, accuracy_test: 75.59241706161137 with 7 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 77.35550277117973, accuracy_test: 72.98578199052133 with 7 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 71.10055423594616, accuracy_test: 69.66824644549763 with 7 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 70.38796516231196, accuracy_test: 66.82464454976304 with 7 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 78.30562153602534, accuracy_test: 74.64454976303317 with

In [14]:
accuracy = Train_NN(X,Y,9)
accuracy = Train_NN(X,Y,10)
accuracy = Train_NN(X,Y,11)
accuracy = Train_NN(X,Y,12)

Start Training 
Finished Fitting
accuracy_train: 82.42280285035629, accuracy_test: 80.33175355450237 with 9 features and 9 layers
Start Training 
Finished Fitting
accuracy_train: 70.62549485352335, accuracy_test: 67.29857819905213 with 9 features and 10 layers
Start Training 
Finished Fitting
accuracy_train: 83.92715756136184, accuracy_test: 78.90995260663507 with 9 features and 11 layers
Start Training 
Finished Fitting
accuracy_train: 84.16468725257323, accuracy_test: 81.04265402843602 with 9 features and 12 layers


In [15]:
accuracy = Train_NN(X,Y,16)
accuracy = Train_NN(X,Y,14)

Start Training 
Finished Fitting
accuracy_train: 85.74821852731591, accuracy_test: 80.09478672985783 with 9 features and 16 layers
Start Training 
Finished Fitting
accuracy_train: 84.79809976247031, accuracy_test: 80.33175355450237 with 9 features and 14 layers


In [None]:
X = np.array(X)
Y = np.array(Y)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=4, stratify=Y)
print("training size:",x_train.shape)

In [None]:
clf = MLPClassifier(alpha=1e-05, hidden_layer_sizes=(9), random_state=1,solver='lbfgs',max_iter=10000)
clf = clf.fit(x_train, y_train)
print("Finished Fitting")

In [None]:
import matplotlib.pyplot as plt
test_idx=7
plt.show()
y_pred = clf.predict(x_test)
y_pred_prop = clf.predict_proba(x_test)
accuracy = np.mean(y_test==y_pred)
print(y_pred_prop[test_idx],y_pred[test_idx],y_test[test_idx])
y_axis = list(range(1,10))
plt.bar(y_axis, y_pred_prop[test_idx], color ='green')
print(y_axis)
plt.show()
print("accuracy: ",accuracy*100)