Reading all versions of a signature from a directory

In [134]:
import cv2
import numpy as np
import os
from skimage import color
from skimage import io
from sklearn import ensemble
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
import operator
from sklearn.pipeline import Pipeline


# CONSTANTS
# path to signature's directory
path = './SVC/Standard/006'

# stores all versions of a signature
signatures = []   
# stores if it's original or fake (1 - original, 0 - fake)
target = []

# get circles for an image
def getCircles(file):
    image = cv2.imread(path + '/' + file,0)
    # removing noise from picture
    image = cv2.medianBlur(image,5)
    grayImage = cv2.cvtColor(image,cv2.COLOR_GRAY2BGR)
    
    # get circles center
    # HoughCircles parameters: param1 = how strong the edges of the circles need to be, param2 = how many edge points it needs to find to declare that it's found a circle
    circles = cv2.HoughCircles(image,cv2.HOUGH_GRADIENT,1,20,param1=300,param2=20,minRadius=0,maxRadius=30)
    circles = np.uint16(np.around(circles))
    

    for i in circles[0,:]:
        cv2.circle(grayImage,(i[0],i[1]),i[2],(0,255,0),2)
        # draw the center of the circle
        cv2.circle(grayImage,(i[0],i[1]),2,(0,0,255),3)
        # write the modified picture to file with circles on it
        filename = file.split('.')
        cv2.imwrite(filename[0] + '_'+'.png',grayImage)
    

# get circles for all the images and create new image from it with the drawn circles on it
def getCirclesForAll(path):
    circlesAll = []
    for file in os.listdir(path):
        if file.find('.png') != -1:
            getCircles(file)

# read all the images from a directory
for file in os.listdir(path):
    if file.find('.png') != -1:
        # gray scaling the image
        #signature = color.rgb2gray(io.imread(path + '/' + file))
        
        image = cv2.imread(path + '/' + file,0)
        image = cv2.medianBlur(image,5)
        grayImage = cv2.cvtColor(image,cv2.COLOR_GRAY2BGR)
        
        # reshape image matrice to list
        #signatures.append(signature.reshape(1,-1))
        signatures.append(image)
        
        
        # create label to image whether it's the original or fake one (1 - original, 0 - fake)
        if file.find('_e_') != -1:
            target.append(1)
        else:
            target.append(0)

            
# creates images with circles on them
getCirclesForAll(path)

creating input data (x) and expected output (y)

In [123]:
'''
#Define variables
n_samples = len(signatures)

y = np.asarray(target)
x = []

for img in range(0,len(signatures)):
    x.append(signatures[img][0])
    
# get longest image represantation and padding the rest of it
longest_image = max(x, key=lambda k: len(k))
longest_size = len(longest_image)


# white pixel
white = x[0][0]
# pad the difference
for sig in range(0,len(x)):
    if len(x[sig]) < longest_size:
        # how many white pixels have to be added
        difference = longest_size - len(x[sig])
        
        # adding extra white pixels as padding
        for pad in range(0,difference):
            x[sig] = np.append(x[sig], white)
'''

'\n#Define variables\nn_samples = len(signatures)\n\ny = np.asarray(target)\nx = []\n\nfor img in range(0,len(signatures)):\n    x.append(signatures[img][0])\n    \n# get longest image represantation and padding the rest of it\nlongest_image = max(x, key=lambda k: len(k))\nlongest_size = len(longest_image)\n\n\n# white pixel\nwhite = x[0][0]\n# pad the difference\nfor sig in range(0,len(x)):\n    if len(x[sig]) < longest_size:\n        # how many white pixels have to be added\n        difference = longest_size - len(x[sig])\n        \n        # adding extra white pixels as padding\n        for pad in range(0,difference):\n            x[sig] = np.append(x[sig], white)\n'

In [124]:
###############################
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.ensemble import RandomForestClassifier
from sklearn import cross_validation
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

y = np.asarray(target)

# base class for feature extraction with pipelining
class Circles(BaseEstimator, TransformerMixin):
    
    def fit(self,signatures,y=y):
        return self
    
    def fit_transform(self,signatures,y=y):
        return self.transform(signatures,y)

    def transform(self,signatures,y=y):
        circlesAll = []
        
        for i in range(len(signatures)):
            circles = cv2.HoughCircles(signatures[i],cv2.HOUGH_GRADIENT,1,20,param1=300,param2=20,minRadius=0,maxRadius=30)
            circles = np.uint16(np.around(circles))
            
            circlesArray = []
            for j in circles[0,:]:
                circlesArray.append(j[0])
                circlesArray.append(j[1])
            circlesAll.append(circlesArray)
            
           
        longest_element = max(circlesAll, key=lambda k: len(k))
        #longest_size = len(longest_element)
        # ha feature szám nem egyezik hibát kapok, akkor ezt kell átállítani pl egy nagy számra, amit tuti nem ér el
        longest_size = 30
        
        for i in range(len(circlesAll)):
            if len(circlesAll[i]) < longest_size:
                # how many white pixels have to be added
                difference = longest_size - len(circlesAll[i])
        
                # adding extra white pixels as padding
                for pad in range(0,difference):
                    circlesAll[i].append(0)
                    
        return circlesAll
    

# define pipeline
pipeline = Pipeline([
    # define feature extraction
    ('features',FeatureUnion(
        transformer_list = [
            ('circles', Circles())
        ])
    ),
    # define model
    ('classifier',RandomForestClassifier(max_depth=110,bootstrap=False,min_samples_split=10,min_samples_leaf=1,n_estimators=944))
])

# datasets
x_train, x_test, y_train,y_test = train_test_split(signatures,y,test_size=0.2)

#cross_val_score(estimator=pipeline, X=x, y=y, scoring='accuracy')
#scores = cross_validation.cross_val_score(pipeline, signatures[:32], y[:32], cv=3)
#print(scores.mean())

# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 100, stop = 1000, num = 10)]
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]

# Create the random grid
random_grid = {'classifier__n_estimators': n_estimators,
               'classifier__max_depth': max_depth,
               'classifier__min_samples_split': min_samples_split,
               'classifier__min_samples_leaf': min_samples_leaf,
               'classifier__bootstrap': bootstrap}

#rf_random = RandomizedSearchCV(estimator = pipeline, param_distributions = random_grid, n_iter = 1, cv = 3, verbose=2, random_state=42, n_jobs = -1)
pipeline.fit(X=x_train,y=y_train)


Pipeline(steps=[('features', FeatureUnion(n_jobs=1, transformer_list=[('circles', Circles())],
       transformer_weights=None)), ('classifier', RandomForestClassifier(bootstrap=False, class_weight=None, criterion='gini',
            max_depth=110, max_features='auto', max_leaf_nodes=None,
            min_im...mators=944, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False))])

In [125]:
predictions = pipeline.predict(x_test)
accuracy = pipeline.score(x_test,y_test)

print('accuracy:',accuracy)
print('predictions:',predictions)
print('expected:',y_test)

###############################
# using the best paramcombination from RandomizedSearchCV
#model = rf_random.fit(signatures[:32], y[:32])
#rf_random.best_params_

# gets the pipeline tuning parameters
#pipeline.get_params().keys()

accuracy: 0.875
predictions: [1 0 0 1 1 1 0 1]
expected: [1 0 0 1 1 0 0 1]
