In [1]:
import cv2
from skimage.io import imread
from skimage.transform import resize
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
import os
import numpy as np
from preprocessing import *
from skimage.morphology import skeletonize

In [2]:
def dist(x, y, x1, y1):
    return ((x - x1)**2)**(0.5) + ((y - y1)**2) ** (0.5)

def getThicknessHist(skeleton,binarizedImg):
    blackPixelsSk = np.where(skeleton==0)
    contours,_ = cv2.findContours(255-binarizedImg, cv2.RETR_EXTERNAL,  cv2.CHAIN_APPROX_NONE)
    boundindBoxes = [cv2.boundingRect(contour) for contour in contours]
    thick_arr = []
    for i in range(0,len(blackPixelsSk[0]),3):
        y,x = blackPixelsSk[0][i],blackPixelsSk[1][i]

        for idx,(xB,yB,wB,hB) in enumerate(boundindBoxes):
            if y>=yB and x>=xB and y<=(yB+hB) and x<=(xB+wB):
                insideContour = contours[idx] 
                break

        insideContour = insideContour.T

        cols= insideContour[0][0]
        rows= insideContour[1][0]
        idxs = list(range(len(rows)))
        abovePoints = []
        belowPoints = []
        rightPoints = []
        leftPoints =  []
        
        for i,r,c in zip(idxs,rows,cols):
            if r<y and c==x:
                abovePoints.append(i)
            elif  r>y and c==x:
                belowPoints.append(i)
            elif r==y and c>x:
                rightPoints.append(i)
            elif r==y and c<x:
                leftPoints.append(i)
                
        if len(belowPoints):
            minIdx = np.argmin([dist(x,y,cols[belowPoints[i]],rows[belowPoints[i]]) for i in range(len(belowPoints))])
            nearBelow = rows[belowPoints[minIdx]],cols[belowPoints[minIdx]] 
        else: nearBelow = y,x

        if len(abovePoints):
            minIdx = np.argmin([dist(x,y,cols[abovePoints[i]],rows[abovePoints[i]]) for i in range(len(abovePoints))])
            nearAbove = rows[abovePoints[minIdx]],cols[abovePoints[minIdx]] 
        else: nearAbove = y,x

        if len(rightPoints): 
            minIdx = np.argmin([dist(x,y,cols[rightPoints[i]],rows[rightPoints[i]]) for i in range(len(rightPoints))])
            nearRight = rows[rightPoints[minIdx]],cols[rightPoints[minIdx]] 
        else: nearRight = y,x

        if len(leftPoints): 
            minIdx = np.argmin([dist(x,y,cols[leftPoints[i]],rows[leftPoints[i]]) for i in range(len(leftPoints))])
            nearLeft = rows[leftPoints[minIdx]],cols[leftPoints[minIdx]] 
        else: nearLeft = y,x
            
        distVer = dist(nearBelow[1],nearBelow[0],nearAbove[1],nearAbove[0])
        distHor = dist(nearRight[1],nearRight[0],nearLeft[1],nearLeft[0])
        thickness = min(distVer,distHor)
        thick_arr.append(thickness)
    thick_hist,bins = np.histogram(thick_arr, 10)
    return list(thick_hist),list(bins)
    

In [3]:
def Train_NN(X,Y):
    print("Start Training ")
    N = len(X[0])
    X = np.array(X)
    Y = np.array(Y)
    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=4, stratify=Y)
    clf = MLPClassifier(alpha=1e-05, hidden_layer_sizes=(32,16),random_state=1,solver='lbfgs',max_iter=10000)
    clf = clf.fit(x_train, y_train)
    print("Finished Fitting")
    y_pred_test = clf.predict(x_test)
    accuracy_test = np.mean(y_test==y_pred_test) * 100
    
    y_pred_train = clf.predict(x_train)
    accuracy_train = np.mean(y_train==y_pred_train) * 100
    return accuracy_train,accuracy_test,clf

In [None]:

base_dir='ACdata_base/'

fonts = os.listdir(base_dir)
X,Y=[],[]
for font in fonts:
    h,w = 0,0
    data = os.listdir(base_dir+font)
    print("curFont",font)
    for i,img in enumerate(data):
        img_dir = base_dir+font+'/'+img
        imgGray = cv2.imread(img_dir, cv2.IMREAD_GRAYSCALE)
        imgGray = (resize(imgGray, (110, 200))*255).astype(np.uint8)
        binarizedImg =  Binarize_Histogram(imgGray,'')
        skeleton = 255 - skeletonize(1-binarizedImg/255)*255
        hist,bins = getThicknessHist(skeleton,binarizedImg)
        features = hist + bins
        print(i,len(data))
        X.append(features)
        Y.append(int(font))


curFont 8
0 190
1 190
2 190
3 190
4 190
5 190
6 190
7 190
8 190
9 190
10 190
11 190
12 190
13 190
14 190
15 190
16 190
17 190
18 190
19 190
20 190
21 190
22 190
23 190
24 190
25 190
26 190
27 190
28 190
29 190
30 190
31 190
32 190
33 190
34 190
35 190
36 190
37 190
38 190
39 190
40 190
41 190
42 190
43 190
44 190
45 190
46 190
47 190
48 190
49 190
50 190
51 190
52 190
53 190
54 190
55 190
56 190
57 190
58 190
59 190
60 190
61 190
62 190
63 190
64 190
65 190
66 190
67 190
68 190
69 190
70 190
71 190
72 190
73 190
74 190
75 190
76 190
77 190
78 190
79 190
80 190
81 190
82 190
83 190
84 190
85 190
86 190
87 190
88 190
89 190
90 190
91 190
92 190
93 190
94 190
95 190
96 190
97 190
98 190
99 190
100 190
101 190
102 190
103 190
104 190
105 190
106 190
107 190
108 190
109 190
110 190
111 190
112 190
113 190
114 190
115 190
116 190
117 190
118 190
119 190
120 190
121 190
122 190
123 190
124 190
125 190
126 190
127 190
128 190
129 190
130 190
131 190
132 190
133 190
134 190
135 190
136 190
137 

In [None]:
accuracyTrain,accuracyTest,ModelNN = Train_NN(X,Y)

In [None]:
imgGray = cv2.imread('1498.jpg', cv2.IMREAD_GRAYSCALE)
binarizedImg =  Binarize_Histogram(imgGray,'')
skeleton = 255 - skeletonize(1-binarizedImg/255)*255
hist,bins = getThicknessHist(skeleton,binarizedImg)
plt.hist(bins[:-1], bins, weights=hist)
print(hist)
print(bins)