In [2]:
from __future__ import division

from scipy.signal import convolve2d
import matplotlib.pyplot as plt
import numpy as np
from preprocessing import *
from featuresextraction import *
import cv2
import os
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import VotingClassifier,RandomForestClassifier
from scipy.stats import mode

In [3]:
def lpq(img,winSize=3,freqestim=1,mode='nh'):
    rho=0.90

    STFTalpha=1/winSize  # alpha in STFT approaches (for Gaussian derivative alpha=1)
    sigmaS=(winSize-1)/4 # Sigma for STFT Gaussian window (applied if freqestim==2)
    sigmaA=8/(winSize-1) # Sigma for Gaussian derivative quadrature filters (applied if freqestim==3)

    convmode='valid' # Compute descriptor responses only on part that have full neigborhood. Use 'same' if all pixels are included (extrapolates np.image with zeros).

    img=np.float64(img) # Convert np.image to double
    r=(winSize-1)/2 # Get radius from window size
    x=np.arange(-r,r+1)[np.newaxis] # Form spatial coordinates in window

    if freqestim==1:  #  STFT uniform window
        #  Basic STFT filters
        w0=np.ones_like(x)
        w1=np.exp(-2*np.pi*x*STFTalpha*1j)
        w2=np.conj(w1)

    ## Run filters to compute the frequency response in the four points. Store np.real and np.imaginary parts separately
    # Run first filter
    filterResp1=convolve2d(convolve2d(img,w0.T,convmode),w1,convmode)
    filterResp2=convolve2d(convolve2d(img,w1.T,convmode),w0,convmode)
    filterResp3=convolve2d(convolve2d(img,w1.T,convmode),w1,convmode)
    filterResp4=convolve2d(convolve2d(img,w1.T,convmode),w2,convmode)

    # Initilize frequency domain matrix for four frequency coordinates (np.real and np.imaginary parts for each frequency).
    freqResp=np.dstack([filterResp1.real, filterResp1.imag,
                        filterResp2.real, filterResp2.imag,
                        filterResp3.real, filterResp3.imag,
                        filterResp4.real, filterResp4.imag])

    ## Perform quantization and compute LPQ codewords
    inds = np.arange(freqResp.shape[2])[np.newaxis,np.newaxis,:]
    LPQdesc=((freqResp>0)*(2**inds)).sum(2)

    ## Switch format to uint8 if LPQ code np.image is required as output
    if mode=='im':
        LPQdesc=np.uint8(LPQdesc)

    ## Histogram if needed
    if mode=='nh' or mode=='h':
        LPQdesc=np.histogram(LPQdesc.flatten(),range(256))[0]

    ## Normalize histogram if needed
    if mode=='nh':
        LPQdesc=LPQdesc/LPQdesc.sum()

    return LPQdesc

In [4]:
HVSL_feature = []
LVL_features = []
HPP_features = []
TOS_features = []
Stats_features = []
TH_features = []
lpq_features = []
labels = []
feature_to_plot = []
for i in range(1, 10):
    input_dir = f'ACdata_base/{i}/'
    dirs = os.listdir(input_dir)
    for idx,img_dir in enumerate(dirs):
        #print("processing img "+str(img_dir))
        pre = img_dir.split('.')[0]
        img_dir = input_dir + img_dir
        img_rgb = cv2.imread(img_dir)
        img = cv2.imread(img_dir, cv2.IMREAD_GRAYSCALE)
        binarizedImg =  Binarize_Histogram(img,pre)

        ####preprocessing for feature extraction:
        ####our skeletonization
        skeletonized = Skeletonization(binarizedImg, pre)

        ###their skeletonization
        skeleton = 255 - skeletonize(1-binarizedImg/255)*255
        ####edge detection:
        edged = LaplacianEdge(binarizedImg, pre)
        
        LVL_descriptor = getLVL(skeletonized,img,pre)
        HVSL_decriptor = getHVSL(edged,img_rgb,pre) #contains 2 features (V/H,(#of black pixels/((H/total)+(V/total))))
        HPP_descriptor = getHPP(cropToText(binarizedImg),pre)
        TOS_descriptor = getTOS(img)
        Stats_descriptor = getStatsFeatures(binarizedImg)
        lpq_decriptor= lpq(binarizedImg)

        lpq_features.append(lpq_decriptor)
        HVSL_feature.append(HVSL_decriptor)
        LVL_features.append(LVL_descriptor)
        HPP_features.append(HPP_descriptor)
        TOS_features.append(TOS_descriptor)
        Stats_features.append(Stats_descriptor)
        #TH_features.append(TH_descriptor)
        labels.append([i])

#######classifier
labels = np.array(labels)    

  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)


In [54]:
x_train, x_test, y_train, y_test = train_test_split(lpq_features, labels, test_size=0.25, random_state=10, stratify=labels)
clf = MLPClassifier(alpha=1e-05, hidden_layer_sizes=(500,250),random_state=1,solver='lbfgs',max_iter=10000)
clf = clf.fit(x_train, y_train)
predicted_lpq = clf.predict_proba(x_test)

  return f(**kwargs)


In [57]:
X_train, X_test, y_train, y_test = train_test_split(HVSL_feature, labels, test_size=0.25,random_state=10, stratify=labels)
clf_HVSL = svm.SVC(probability=True)
clf_HVSL.fit(X_train, y_train)
predicted_HVSL = clf_HVSL.predict_proba(X_test)

  return f(**kwargs)


In [58]:
X_train, X_test, y_train, y_test = train_test_split(HPP_features, labels, test_size=0.25,random_state=10, stratify=labels)
clf_HPP = svm.SVC(probability=True)
clf_HPP.fit(X_train, y_train)
predicted_HPP = clf_HPP.predict_proba(X_test)

  return f(**kwargs)


In [59]:
X_train, X_test, y_train, y_test = train_test_split(Stats_features, labels, test_size=0.25,random_state=10, stratify=labels)
clf_Stats = MLPClassifier(alpha=1e-05, hidden_layer_sizes=(9,18),random_state=1,solver='lbfgs',max_iter=10000,)
clf_Stats.fit(X_train,y_train)
predicted_Stats = clf_Stats.predict_proba(X_test)

  return f(**kwargs)


In [60]:
X_train, X_test, y_train, y_test = train_test_split(TOS_features, labels, test_size=0.25,random_state=10, stratify=labels)
clf_TOS = svm.SVC(probability=True) 
clf_TOS.fit(X_train,y_train)
predicted_TOS = clf_TOS.predict_proba(X_test)

  return f(**kwargs)


In [61]:
X_train, X_test, y_train, y_test = train_test_split(LVL_features, labels, test_size=0.25,random_state=10, stratify=labels)
clf_LVL = svm.SVC(probability=True)
clf_LVL.fit(X_train,y_train)
predicted_LVL = clf_LVL.predict_proba(X_test)

  return f(**kwargs)


In [62]:
summation = predicted_HPP + predicted_HVSL + predicted_TOS + predicted_Stats + predicted_LVL+ predicted_lpq
predicted = np.argmax(summation, axis=1) + 1

In [63]:
predicted_lpq3 = np.argmax(predicted_lpq, axis=1)+1
# # print(predicted.shape)
# accuracy_test = np.mean(y_test.flatten()==predicted_lpq) * 100
# print(accuracy_test)
predicted_lpq2 = clf.predict(x_test)
print(
    #f"Classification report for classifier {voting_clf}:\n"
    f"{classification_report(y_test, predicted)}\n"
)

              precision    recall  f1-score   support

           1       0.98      1.00      0.99        48
           2       0.98      1.00      0.99        48
           3       0.93      0.93      0.93        45
           4       0.98      0.93      0.96        46
           5       1.00      0.98      0.99        49
           6       1.00      0.98      0.99        45
           7       0.98      1.00      0.99        46
           8       0.98      1.00      0.99        47
           9       1.00      1.00      1.00        48

    accuracy                           0.98       422
   macro avg       0.98      0.98      0.98       422
weighted avg       0.98      0.98      0.98       422


