In [1]:
from skimage.feature import local_binary_pattern
import skimage.io as io
import matplotlib.pyplot as plt
import numpy as np
import cv2
from skimage.exposure import histogram
import os
import random
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler  
from sklearn.neural_network import MLPClassifier

from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import BaggingClassifier
import time

In [2]:
trainingset=[]
testset=[]
labels=[]
histSet=[]
trainingsetHist=[]
testsetHist=[]
predictedLabels=[]

In [3]:
def describe(images,label,isTrain=0,numPoints=38,radius=10, eps=1e-7):
    global trainingsetHist
    global testsetHist
    for image in images:
        lbp = local_binary_pattern(image, numPoints, radius, method= "uniform" ) 
        (hist, _) = np.histogram(lbp.ravel(),bins=np.arange(0, numPoints + 3),range=(0, numPoints + 2))
        hist = hist.astype("float")
        hist /= (hist.sum() + eps)
        if isTrain==0:
            trainingsetHist.append([hist,label])
        else:
            testsetHist.append(hist)
    return 

In [4]:
def cutLines(gray):
    edges = cv2.Canny(gray,90,100,apertureSize = 3)
    minLineLength=55
    lines = cv2.HoughLinesP(image=edges,rho=1,theta=np.pi/180, threshold=100,lines=np.array([]), minLineLength=minLineLength,maxLineGap=0)
 
    linesNew=[ line[0][1] for line in lines if line[0][1]==line[0][3]]
    linesNew=sorted(linesNew)
    finalList=[linesNew[0]]
    for i in range(1,len(linesNew)):
        if linesNew[i]-linesNew[i-1]>=100:
            finalList.append(linesNew[i])
    return gray[finalList[1]:finalList[2],:]

In [5]:
def lines_segments(image):
    gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
    ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV)
    kernel = np.ones((10,200), np.uint8)
    img_dilation = cv2.dilate(thresh, kernel, iterations=1)
    _, ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
    images = []
    flag=True
    y1 = 10e5
    y2 = -10e5 
    height = 0 
    for i, ctr in enumerate(sorted_ctrs):
        x, y, w, h = cv2.boundingRect(ctr) 
        roi = gray[y:y+h, x:x+w]
        if(len(sum(roi))>200 and h>40): #20 is thershold you can change this, this is for small lines and points, remove them from return images 
            th, im_th_otsu = cv2.threshold(roi, 128, 192, cv2.THRESH_OTSU)
            images.append(im_th_otsu)
            y2=max(y2,y+h)
            y1=min(y1,y) 
            
    #--UnComment-- in case you want to return the whole  image not just lines 
    #if(y2>0):
    #    croped = gray[y1:y2, ::]
    #else:
    #   croped = gray
    return images

In [6]:
def reset():
    testset=[]
    trainingset=[]
    labels=[]
    histSet=[]
    trainingset=[]
    testsetHist=[]

In [10]:
def run(pathToData='new data'):
    global trainingset
    global testset
    global labels
    global histSet
    global trainingsetHist
    global testsetHist
    global predictedLabels

    predictedLabels = []
    
    data= sorted(os.listdir(pathToData))
    path=pathToData
    for datafolder in data:
        path+='/'+datafolder
        imageFolders=os.listdir(path)
        for imageFolder in sorted(imageFolders):
            if(imageFolder=='test.png'):
                testset.append(cv2.imread(path+'/'+imageFolder))
            else:
                trainingset.append([cv2.imread(path+'/'+imageFolder+'/1.png'),int(imageFolder)])
                trainingset.append([cv2.imread(path+'/'+imageFolder+'/2.png'),int(imageFolder)])
        path=pathToData
        start = time.time()
        for image in trainingset:
            newImage=cutLines(image[0])
            segments=lines_segments(newImage)
            describe(segments,image[1])
        random.shuffle(trainingsetHist)
        for datatrain in trainingsetHist:
            histSet.append(datatrain[0])
            labels.append(datatrain[1])

        #------------models-----------------------
        #model = SVC(C=5, gamma='auto', probability=True)
        #model= AdaBoostClassifier(n_estimators=250, random_state=0)
        #model=GaussianNB()
        #model=MultinomialNB()
        #model= KNeighborsClassifier(n_neighbors=7)
        model=RandomForestClassifier(max_depth=100,n_estimators=100, random_state=0)
        #model = BaggingClassifier(n_estimators=100,random_state=0)
        #model=MLPClassifier(solver='sgd',hidden_layer_sizes=(128, 16), random_state=0,max_iter=50000,learning_rate_init=0.0001)
        #------------------------------------------
        model.fit(histSet, labels)
        newImage=cutLines(testset[0])
        segments=lines_segments(newImage)
        describe(segments,-1,1)
        p = model.predict_proba(testsetHist)
        p = np.sum(p, axis=0)
        print(p)
        y_pred = model.classes_[np.argmax(p)]
        end = time.time()
        
        print(y_pred)
        predictedLabels.append(y_pred)

        result_file = open("results.txt", 'a')
        result_file.write(str(y_pred) +'\n')
        result_file.close()
        
        time_file = open("time.txt", 'a')
        t = (end - start)
        time_file.write(str(round(t,2)) + '\n')
        time_file.close()

        testset=[]
        trainingset=[]
        labels=[]
        histSet=[]
        trainingset=[]
        testsetHist=[]

In [11]:
def main():
    global predictedLabels
    path=str(input('please enter the path to the dataset file '))
    run(pathToData=path)
    true=[3, 3, 1, 2, 3]
    totalCorrect=0
    for i in range(len(predictedLabels)):
        if predictedLabels[i]==true[i]:
            totalCorrect+=1
        else:
            print(i)
    print(totalCorrect/len(predictedLabels)*100)

In [12]:
main()

[1.44 0.84 5.72]
3
[1.83 0.89 2.28]
3
[9.53 0.72 0.75]
1
[0.57 9.25 0.18]
2
[0.65 1.11 7.24]
3
100.0
