- Bibliotecas utilizadas:
    - opencv-python==4.8.1.78
    - natsort==8.4.0
    - numpy==1.26.0
    - pandas==2.1.1
    - scikit-image==0.20
    - scikit-learn==1.3.1

In [1]:
# imports
import cv2
import natsort
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC 
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern
import warnings
# Suppress FutureWarning messages FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
warnings.simplefilter(action='ignore', category=FutureWarning)


- LBP e GLCM

In [2]:
def calculate_lbp(image, quadrants=(1,1), neighbours=8, radius=2):
  # Tamanho dos quadrantes
  width, height = image.shape[0], image.shape[1]
  part_width = width // quadrants[0]
  part_height = height // quadrants[1]
  
  # Calculate
  result = list()
  for i in range(quadrants[0]):
    for j in range(quadrants[1]):
      #coordenadas da parte da imagem
      left = i * part_width
      upper = j * part_height
      right = left + part_width
      lower = upper + part_height

      # Retorna o quadrante da imagem
      quadrant = image[left:right, upper:lower]

      # Calculate Features
      lbp_image = local_binary_pattern(quadrant, neighbours, radius, method="nri_uniform")
      #n_bins = int(lbp_image.max() + 1)
      n_bins = 59
      hist, _ = np.histogram(lbp_image, density=True, bins=n_bins, range=(0, n_bins))

      # Append to result
      result.extend(hist)
          
  return result
#end lbp

def calculate_glcm(image, quadrants=(1,1)):
  # Tamanho dos quadrantes
  width, height = image.shape[0], image.shape[1]
  part_width = width // quadrants[0]
  part_height = height // quadrants[1]
  
  # Calculate
  result = list()
  for i in range(quadrants[0]):
    for j in range(quadrants[1]):
      #coordenadas da parte da imagem
      left = i * part_width
      upper = j * part_height
      right = left + part_width
      lower = upper + part_height

      # Retorna o quadrante da imagem
      quadrant = image[left:right, upper:lower]

      # Calcula glcm matrix
      glcm = graycomatrix(quadrant, [1], [0], symmetric=True, normed=True)

      # Calculate Features
      props = [
        graycoprops(glcm, prop='contrast')[0][0],
        graycoprops(glcm, prop='dissimilarity')[0][0],
        graycoprops(glcm, prop='homogeneity')[0][0],
        graycoprops(glcm, prop='ASM')[0][0],
        graycoprops(glcm, prop='energy')[0][0],
        graycoprops(glcm, prop='correlation')[0][0],
      ]

      # Append to result
      result.extend(props)
          
  return result

- Funções para manipular o conjunto X,y

In [3]:
def get_files(caminho_p):
  file_dict = {}
  i = 0
  for pasta in Path(caminho_p).iterdir():
    if pasta.is_dir():
      file_dict[pasta.name] = []
            
      for file in Path(pasta).iterdir():
        if file.is_file() and (file.name.endswith(".png") or file.name.endswith(".jpg") or file.name.endswith(".bmp")):
          file_dict[pasta.name].append(file)
          i += 1
  print("Total de imagens: ", i)
  return file_dict
#end getfilos


def createXy(database, quadrants=(1,1), lbp_props={'neighbours': 8, 'radius': 2}):
  X_glcm, X_lbp, y = list(), list(), list()

  for classe in database:   
    for item in natsort.natsorted(database[classe]):
      image = cv2.imread(str(item), cv2.IMREAD_GRAYSCALE)

      X_glcm.append(calculate_glcm(image, quadrants))
      X_lbp.append(calculate_lbp(image, quadrants, lbp_props['neighbours'], lbp_props['radius']))
      y.append(str(classe))

  return np.array(X_glcm), np.array(X_lbp), np.array(y)
#end createxy


def split_normalizado(X, y, test_size=0.33, shuffle=False):
  Xs = StandardScaler().fit_transform(X)

  X_train, X_test, y_train, y_test = train_test_split(Xs, y, test_size=test_size, shuffle=shuffle)
  return X_train, X_test, y_train, y_test
#end treinotest


def split_manual(X, y, train_size=18):
  #normalize
  X = StandardScaler().fit_transform(X)

  # separar em classes
  classes = dict()
  for i, classe in enumerate(y):
    if not classes.get(classe): classes[classe] = list()
    classes[classe].append(X[i])

  #split
  X_train, X_test, y_train, y_test = list(), list(), list(), list()

  for classe in classes.keys():
    for i, instancia in enumerate(classes[classe]):
      if i <= train_size:
        X_train.append(instancia)
        y_train.append(classe)
      else:
        X_test.append(instancia)
        y_test.append(classe)

  return X_train, X_test, y_train, y_test
#end treinotest

- Classificadores

In [4]:
def knn(X_train, y_train, X_test, props={
  'n_neighbors': 5,
  'metric': 'euclidean',
}):
  knn = KNeighborsClassifier(n_neighbors=props['n_neighbors'], metric=props['metric']).fit(X_train, y_train)
  return knn.predict(X_test)
#end knn

def svm(X_train, y_train, X_test, props={
  'C': 1.0,
  'kernel': 'rbf',
  'gamma': 'scale',
  'coef0': 0
}):
  clf = SVC(C=props['C'], kernel=props['kernel'], gamma=props['gamma'], coef0=props['coef0'])
  clf.fit(X_train, y_train)
  return clf.predict(X_test)

- Experimentos com diversos parâmetros

In [5]:

database = get_files('../datasets/Base_BFL_256/')
quadrantes = [1,2,4]
knn_metrics= ['cityblock', 'euclidean']
knn_neighbours = [1, 3, 5]
svm_kernels = ['rbf', 'linear']

df_results = pd.DataFrame(columns=['acuracia', 'quadrantes', 'descritor', 'classificador', 'parametros'])

Total de imagens:  8505


In [6]:
for q in quadrantes:
  #build database
  
  X_glcm, X_lbp, y = createXy(database, quadrants=(q, q))

  #train test split
  X_glcm_train, X_glcm_test, y_glcm_train, y_glcm_test = split_manual(X_glcm, y, train_size=18)
  X_lbp_train, X_lbp_test, y_lbp_train, y_lbp_test = split_manual(X_lbp, y, train_size=18)
  
  print(f'-------------------- quadrantes {q}x{q}')

  #knn
  for metric in knn_metrics:
    for k in knn_neighbours:
  
      knnProps={
        'n_neighbors': k,
        'metric': metric,
      }

      print(f'============= KNN TEST: quadr={q}x{q}, distance={metric}, k={k}')

      y_glcm_pred = knn(X_glcm_train, y_glcm_train, X_glcm_test, props=knnProps)
      df_results.loc[len(df_results)]= {
        'acuracia': accuracy_score(y_glcm_test, y_glcm_pred),
        'quadrantes': q,
        'descritor': 'GLCM',
        'classificador': 'KNN',
        'parametros': f'quadr={q}x{q}, distance={metric}, k={k}, GLCM, KNN'
      }
      print('GLCM k-NN Accuracy: {:.4f}'.format(accuracy_score(y_glcm_test, y_glcm_pred)))
    
      y_lbp_pred = knn(X_lbp_train, y_lbp_train, X_lbp_test, props=knnProps)
      df_results.loc[len(df_results)]={
        'acuracia': accuracy_score(y_lbp_test, y_lbp_pred),
        'quadrantes': q,
        'descritor': 'LBP',
        'classificador': 'KNN',
        'parametros': f'quadr={q}x{q}, distance={metric}, k={k}, LBP, KNN'
      }
      print('LBP k-NN Accuracy: {:.4f}'.format(accuracy_score(y_lbp_test, y_lbp_pred)))
  #end knn


  #svm
  for kernel in svm_kernels:
    for c in (2. ** np.arange(-5,15,6)):
      for gamma in (2. ** np.arange(3,-15,-6)):
        
        svmProps={
          'C': c,
          'kernel': kernel,
          'gamma': gamma,
          'coef0': 0
        }
      
        print(f'============= SVM TEST: quadr={q}x{q}, kernel={kernel}, C={c}, gamma={gamma}')

        y_glcm_pred = svm(X_glcm_train, y_glcm_train, X_glcm_test, props=svmProps)
        df_results.loc[len(df_results)]={
          'acuracia': accuracy_score(y_glcm_test, y_glcm_pred),
          'quadrantes': q,
          'descritor': 'GLCM',
          'classificador': 'SVM',
          'parametros': f'quadr={q}x{q}, kernel={kernel}, C={c}, gamma={gamma}, GLCM, SVM'
        }
        print('GLCM SVM Accuracy: {:.4f}'.format(accuracy_score(y_glcm_test, y_glcm_pred)))

        y_lbp_pred = svm(X_lbp_train, y_lbp_train, X_lbp_test, props=svmProps)
        df_results.loc[len(df_results)]={
          'acuracia': accuracy_score(y_lbp_test, y_lbp_pred),
          'quadrantes': q,
          'descritor': 'LBP',
          'classificador': 'SVM',
          'parametros': f'quadr={q}x{q}, kernel={kernel}, C={c}, gamma={gamma}, LBP, SVM'
        }
        print('LBP SVM Accuracy: {:.4f}'.format(accuracy_score(y_lbp_test, y_lbp_pred)))
  #end svm

#end q

-------------------- quadrantes 1x1
GLCM k-NN Accuracy: 0.3627
LBP k-NN Accuracy: 0.8444
GLCM k-NN Accuracy: 0.3298
LBP k-NN Accuracy: 0.8274
GLCM k-NN Accuracy: 0.3302
LBP k-NN Accuracy: 0.8246
GLCM k-NN Accuracy: 0.3849
LBP k-NN Accuracy: 0.8524
GLCM k-NN Accuracy: 0.3417
LBP k-NN Accuracy: 0.8329
GLCM k-NN Accuracy: 0.3456
LBP k-NN Accuracy: 0.8302
GLCM SVM Accuracy: 0.3179
LBP SVM Accuracy: 0.3472
GLCM SVM Accuracy: 0.2690
LBP SVM Accuracy: 0.7552
GLCM SVM Accuracy: 0.2607
LBP SVM Accuracy: 0.7071
GLCM SVM Accuracy: 0.3976
LBP SVM Accuracy: 0.1075
GLCM SVM Accuracy: 0.3373
LBP SVM Accuracy: 0.8722
GLCM SVM Accuracy: 0.2611
LBP SVM Accuracy: 0.7718
GLCM SVM Accuracy: 0.4012
LBP SVM Accuracy: 0.1075
GLCM SVM Accuracy: 0.4849
LBP SVM Accuracy: 0.8746
GLCM SVM Accuracy: 0.3492
LBP SVM Accuracy: 0.9226
GLCM SVM Accuracy: 0.4012
LBP SVM Accuracy: 0.1075
GLCM SVM Accuracy: 0.5282
LBP SVM Accuracy: 0.8746
GLCM SVM Accuracy: 0.4972
LBP SVM Accuracy: 0.9230
GLCM SVM Accuracy: 0.2627
LBP SVM 

In [7]:
#print(df)
top = df_results.sort_values(by=['acuracia'], ascending=False).head(20)
print(top.to_string(index=False))

 acuracia  quadrantes descritor classificador                                                      parametros
 0.923016           1       LBP           SVM    quadr=1x1, kernel=rbf, C=8192.0, gamma=0.001953125, LBP, SVM
 0.922619           1       LBP           SVM     quadr=1x1, kernel=rbf, C=128.0, gamma=0.001953125, LBP, SVM
 0.922222           1       LBP           SVM       quadr=1x1, kernel=linear, C=8192.0, gamma=0.125, LBP, SVM
 0.922222           1       LBP           SVM            quadr=1x1, kernel=linear, C=2.0, gamma=8.0, LBP, SVM
 0.922222           1       LBP           SVM          quadr=1x1, kernel=linear, C=2.0, gamma=0.125, LBP, SVM
 0.922222           1       LBP           SVM    quadr=1x1, kernel=linear, C=2.0, gamma=0.001953125, LBP, SVM
 0.922222           1       LBP           SVM          quadr=1x1, kernel=linear, C=128.0, gamma=8.0, LBP, SVM
 0.922222           1       LBP           SVM        quadr=1x1, kernel=linear, C=128.0, gamma=0.125, LBP, SVM
 0.922222 

In [8]:
tail = df_results.sort_values(by=['acuracia'], ascending=True).head(20)
print(tail.to_string(index=False))

 acuracia  quadrantes descritor classificador                                               parametros
 0.003175           4       LBP           SVM        quadr=4x4, kernel=rbf, C=2.0, gamma=8.0, LBP, SVM
 0.003175           4       LBP           SVM    quadr=4x4, kernel=rbf, C=0.03125, gamma=8.0, LBP, SVM
 0.003175           4       LBP           SVM     quadr=4x4, kernel=rbf, C=8192.0, gamma=8.0, LBP, SVM
 0.003175           4       LBP           SVM      quadr=4x4, kernel=rbf, C=128.0, gamma=8.0, LBP, SVM
 0.050794           2      GLCM           SVM   quadr=2x2, kernel=rbf, C=0.03125, gamma=8.0, GLCM, SVM
 0.086111           2      GLCM           SVM       quadr=2x2, kernel=rbf, C=2.0, gamma=8.0, GLCM, SVM
 0.086111           2      GLCM           SVM     quadr=2x2, kernel=rbf, C=128.0, gamma=8.0, GLCM, SVM
 0.086111           2      GLCM           SVM    quadr=2x2, kernel=rbf, C=8192.0, gamma=8.0, GLCM, SVM
 0.107540           1       LBP           SVM      quadr=1x1, kernel=rbf,

In [9]:
df_results.to_csv('results.csv', index=False)

In [10]:
# database = get_files('../datasets/Base_BFL_256/')
# X_glcm, X_lbp, y = createXy(database, quadrants=(3, 3))

# print(X_glcm.shape, X_lbp.shape, y.shape)

# # # ========================= GLCM
# # X_glcm_train, X_glcm_test, y_glcm_train, y_glcm_test = split_normalizado(X_glcm, y, test_size=0.3)
# X_glcm_train, X_glcm_test, y_glcm_train, y_glcm_test = split_manual(X_glcm, y, train_size=18)

# y_glcm_pred = knn(X_glcm_train, y_glcm_train, X_glcm_test, props=knnProps)
# print('GLCM k-NN Accuracy: {:.4f}'.format(accuracy_score(y_glcm_test, y_glcm_pred)))

# y_glcm_pred = svm(X_glcm_train, y_glcm_train, X_glcm_test, props=svmProps)
# print('GLCM SVM Accuracy: {:.4f}'.format(accuracy_score(y_glcm_test, y_glcm_pred)))


# # ========================= LBP
# # X_lbp_train, X_lbp_test, y_lbp_train, y_lbp_test = split_normalizado(X_lbp, y, test_size=0.3)
# X_lbp_train, X_lbp_test, y_lbp_train, y_lbp_test = split_manual(X_lbp, y, train_size=18)

# y_lbp_pred = knn(X_lbp_train, y_lbp_train, X_lbp_test, props=knnProps)
# print('LBP k-NN Accuracy: {:.4f}'.format(accuracy_score(y_lbp_test, y_lbp_pred)))

# y_lbp_pred = svm(X_lbp_train, y_lbp_train, X_lbp_test, props=svmProps)
# print('LBP SVM Accuracy: {:.4f}'.format(accuracy_score(y_lbp_test, y_lbp_pred)))