In [66]:
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split


In [67]:
def getBorderColor(img):
    
    border = np.asarray(img[0,:])
    border = np.concatenate((border, np.asarray(img[-1, :])))
    border = np.concatenate((border, np.asarray(img[:, 0])))
    border = np.concatenate((border, np.asarray(img[:, -1])))
    return np.bincount(border).argmax()


def pre_processing(img):
    _, img_binarized = cv2.threshold(img, 120, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)    
    if(getBorderColor(img_binarized) != 0):
        img_binarized = cv2.bitwise_not(img_binarized) 
    return img_binarized



def showImage(img):
    plt.imshow(img, cmap='gray')
    plt.show()    



def getFeaturesHOG(img):
   img = cv2.resize(img, (256, 128))
   cell_size = (32, 32)  # h x w in pixels
   block_size = (2, 2)  # h x w in cells
   nbins = 9  # number of orientation bins
   # winSize is the size of the image cropped to an multiple of the cell size
   hog = cv2.HOGDescriptor(_winSize=(img.shape[1] // cell_size[1] * cell_size[1],
                                    img.shape[0] // cell_size[0] * cell_size[0]),
                            _blockSize=(block_size[1] * cell_size[1],
                                        block_size[0] * cell_size[0]),
                            _blockStride=(cell_size[1], cell_size[0]),
                            _cellSize=(cell_size[1], cell_size[0]),
                            _nbins=nbins)

   hog_feats = hog.compute(img)
   return hog_feats.flatten()     

# Read Data, Split Training, Validation, Test

In [70]:
data_set = []
Y = []
for i in range(1, 10):
    for filename in os.listdir("ACdata_base/" + str(i)):
        img = cv2.imread(os.path.join("ACdata_base/" + str(i),filename), cv2.IMREAD_GRAYSCALE)
        if img is not None:
            data_set.append(pre_processing(img))
            Y.append(i)
            
X_train, X_testValid, Y_train, Y_testValid = train_test_split(data_set, Y, test_size=0.4, random_state=42)
X_validation, X_test, Y_validation, Y_test = train_test_split(X_testValid, Y_testValid, test_size=0.5, random_state=42)            

In [38]:

# for i in range(len(test_set)):
#     for j in range(len(test_set[i])):
#         showImage(test_set[i][j])

In [80]:
def getTrainFeatures():
   x_features = []
   for i in range(len(X_train)):
        x_features.append(getFeaturesHOG(X_train[i]))
   return np.asarray(x_features)

In [81]:
x_features = getTrainFeatures()
print(x_features.shape)
print(Y_train)


(1011, 756)
[8, 8, 4, 3, 2, 2, 6, 8, 7, 7, 7, 9, 4, 2, 5, 2, 7, 7, 8, 1, 1, 1, 4, 2, 6, 7, 4, 2, 6, 1, 3, 5, 7, 6, 9, 3, 1, 8, 3, 2, 7, 3, 2, 5, 2, 6, 5, 5, 9, 4, 2, 3, 4, 3, 4, 8, 9, 1, 7, 6, 3, 2, 1, 2, 3, 5, 1, 3, 2, 1, 8, 8, 9, 6, 9, 9, 6, 9, 4, 9, 9, 4, 6, 9, 2, 9, 2, 3, 9, 2, 7, 8, 9, 9, 3, 3, 4, 9, 2, 5, 6, 8, 6, 2, 5, 9, 6, 9, 1, 3, 4, 1, 2, 7, 8, 3, 5, 8, 1, 9, 2, 6, 6, 5, 9, 4, 7, 9, 8, 3, 2, 5, 6, 2, 8, 4, 1, 1, 2, 8, 7, 4, 2, 9, 2, 5, 1, 8, 3, 9, 5, 4, 6, 4, 4, 8, 4, 1, 2, 5, 1, 2, 4, 1, 9, 1, 2, 1, 7, 2, 1, 6, 6, 5, 1, 5, 8, 6, 5, 1, 9, 3, 2, 6, 7, 7, 6, 3, 6, 7, 4, 3, 6, 5, 8, 6, 6, 1, 6, 1, 4, 6, 8, 2, 2, 4, 2, 3, 8, 6, 5, 1, 7, 7, 1, 6, 6, 1, 5, 1, 2, 9, 1, 7, 3, 1, 8, 3, 8, 2, 6, 1, 1, 5, 5, 1, 1, 8, 1, 8, 3, 5, 3, 1, 2, 2, 3, 6, 5, 2, 9, 5, 1, 6, 9, 4, 7, 7, 3, 1, 3, 1, 9, 5, 7, 2, 9, 5, 2, 8, 8, 9, 5, 5, 4, 3, 2, 8, 2, 2, 7, 6, 1, 8, 4, 2, 7, 7, 1, 1, 7, 1, 5, 7, 8, 3, 3, 5, 4, 8, 8, 1, 8, 7, 1, 1, 1, 1, 3, 6, 4, 9, 7, 3, 5, 5, 4, 5, 6, 6, 6, 8, 7, 3, 8, 6, 9, 1, 4, 