In [1]:
from skimage.feature import graycomatrix, graycoprops
from sklearn.neighbors import KNeighborsClassifier
from skimage.feature import local_binary_pattern, SIFT
from sklearn import decomposition
from sklearn import metrics
from PIL import Image
import pandas as pd
import numpy as np
import os

newSize = 128 # Dimensão 128, 64, 32, 16, 8
dataDir = './data_augmented/'

In [2]:
def doLBP(ImageArray):
    lbp = local_binary_pattern(ImageArray, 3, 8 * 3)
    n_bins = int(lbp.max() + 1)
    hist, _ = np.histogram(lbp, density=True, bins=n_bins, range=(0, n_bins))

    return hist.tolist()

def doGLCM(ImageArray, angles_range):
    glcmMatrix = np.empty((0,len(angles_range)),float)
    glcm = graycomatrix(ImageArray, distances=range(0,100,2), angles=angles_range, levels=256, symmetric=True, normed=True)
    
    glcmContrast = graycoprops(glcm, 'contrast')
    glcmDissimilarity = graycoprops(glcm, 'dissimilarity')
    glcmHomogeneity = graycoprops(glcm, 'homogeneity')
    glcmASM = graycoprops(glcm, 'ASM')
    glcmEnergy = graycoprops(glcm, 'energy')
    glcmCorrelation = graycoprops(glcm, 'correlation')

    glcmMatrix = np.append(glcmMatrix, glcmContrast, axis=0)
    glcmMatrix = np.append(glcmMatrix, glcmDissimilarity, axis=0)
    glcmMatrix = np.append(glcmMatrix, glcmHomogeneity, axis=0)
    glcmMatrix = np.append(glcmMatrix, glcmASM, axis=0)
    glcmMatrix = np.append(glcmMatrix, glcmEnergy, axis=0)
    glcmMatrix = np.append(glcmMatrix, glcmCorrelation, axis=0)

    return glcmMatrix.flatten().tolist()

def doSIFT(ImageArray):
    # Encontra os 
    sift = SIFT(upsampling=1)
    sift.detect_and_extract(ImageArray)

def doPCA(dataFrame, newSize=128):
    y = dataFrame[0]
    X = dataFrame.drop(0, axis=1).values

    pca = decomposition.PCA(n_components=newSize)
    pca.fit(X)

    XPCA = pca.transform(X)

    dataFramePCA = pd.DataFrame(XPCA)
    dataFramePCA['Classe'] = y
    dataFramePCA = dataFramePCA[['Classe'] + dataFramePCA.columns[:-1].tolist()]
    
    return dataFramePCA

def doKnn(X_train, y_train, X_teste, y_teste, MAX_K=30):
    range_k = np.arange(1, MAX_K, 1)
    scores = {}

    for k in range_k:
        knn = KNeighborsClassifier(n_neighbors=k)
        knn.fit(X_train, y_train)
        y_pred = knn.predict(X_teste)

        scores[k] = metrics.accuracy_score(y_teste, y_pred)
    
    return scores, range_k

In [3]:
maxH = 2073
maxW = 2339
dataTeste = []
dataTreino = []

angle_step = np.pi/4
angle_end = np.pi
angles_range = np.arange(0, angle_end, angle_step)

def run(Image):
    
    # Redimensiona a imagem para 2339x2073
    ImageResize = Image.resize((maxW, maxH))

    # Converte a imagem para um array
    ImageArray = np.asarray(ImageResize.convert('L'))

    # Aplica o LBP
    lbp = doLBP(ImageArray)

    # Aplica o GLCM
    glcm = doGLCM(ImageArray, angles_range)

    return lbp, glcm

def runPartition(Image, nPartitions):
    
    lbp = []
    glcm = []

    # Redimensiona a imagem para 2339x2073
    ImageResize = Image.resize((maxW, maxH))

    # Converte a imagem para um array
    ImageArray = np.asarray(ImageResize.convert('L'))

    tamPartition = len(ImageArray)/nPartitions
    posI = 0
    posF = tamPartition

    while posF <= len(ImageArray):

        # Aplica o LBP
        lbp += doLBP(ImageArray[int(posI):int(posF)])

        # Aplica o GLCM
        glcm += doGLCM(ImageArray[int(posI):int(posF)], angles_range)

        posI += tamPartition
        posF += tamPartition
        
    return lbp, glcm

def returnData(Image, partition, nPartitions):
    return run(Image) if partition else runPartition(Image, nPartitions)

'''
    partition: True - Particiona a imagem em quadrantes
    nPartitions: Quantidade de quadrantes
    limit: Quantidade de classes a serem processadas
'''
def generateData(partition=False, nPartitions=2, limit=0):

    files = os.listdir(dataDir)
    limit = len(files) if limit == 0 else limit

    for dirTrainTest in files:
        # print(dirTrainTest)
        
        archives = os.listdir(dataDir + dirTrainTest)
        for t in archives:
            trainTestImage = os.listdir(dataDir + dirTrainTest + "/" + t)

            for i, dir in enumerate(trainTestImage):
                # print("Arquivos")
                # print(dir)
                
                # print(f'{dataDir}{dirTrainTest}/{t}/')

                # Lista todas as imagens do direttório
                images = os.listdir(f'{dataDir}{dirTrainTest}/{t}/')

                # Abrir as imagens de treino e teste
                ImageDefault = Image.open(f'{dataDir}{dirTrainTest}/{t}/{dir}')
                # ImageDefaultTeste = Image.open(f'{dataDir}/{dir}/{images[1]}')

                lbp, glcm = returnData(ImageDefault, partition, nPartitions)

                dataTreino.append([i] + lbp + glcm) if str(t) == "Train" else dataTeste.append([i] + lbp + glcm)


                # lbp, glcm = returnData(ImageDefaultTeste, partition, nPartitions)
                # dataTeste.append([i] + lbp + glcm)

    return dataTeste, dataTreino

In [4]:
dataTeste, dataTreino = generateData(True, 6, 100)

KeyboardInterrupt: 

In [None]:
dataFrameTreino = pd.DataFrame(dataTreino)
dataFrameTeste = pd.DataFrame(dataTeste)

dataFrameTreino.to_csv('data_treino.csv', index=False)
dataFrameTeste.to_csv('data_teste.csv', index=False)

In [None]:
PCATreino = doPCA(dataFrameTreino, newSize if len(dataFrameTreino) > newSize else len(dataFrameTreino))
PCATeste = doPCA(dataFrameTeste, newSize if len(dataFrameTeste) > newSize else len(dataFrameTeste))

In [None]:
PCATeste.head()

Unnamed: 0,Classe,0,1,2,3,4,5,6,7,8,...,90,91,92,93,94,95,96,97,98,99
0,0,13522.317156,-1935.202897,1928.956258,1808.654041,1436.432923,-204.180642,211.183419,137.512405,192.712511,...,0.034679,0.082084,-0.130529,0.195228,-0.006218,-0.142958,0.00577,0.042226,0.018446,3.249484e-13
1,1,-12294.189719,1532.631591,-391.374281,600.695057,-42.340566,174.240288,116.730846,338.298369,-225.36245,...,-0.431158,-0.156561,0.114356,-0.17972,0.474774,-0.293682,0.274744,0.257377,-0.07684,-2.227461e-12
2,2,-7525.358135,182.667817,-192.387377,-439.070121,-183.571403,-354.725699,68.654012,130.831382,-38.426028,...,-0.33941,0.506149,0.353244,-0.485165,-0.14868,0.235542,0.008997,0.017973,0.21351,9.697466e-12
3,3,5431.99692,255.144847,-226.187073,1342.96599,-931.118343,-119.375958,121.772207,4.157487,43.374793,...,0.013301,0.169537,0.133732,0.051306,0.177378,0.1402,-0.416344,-0.134947,-0.255211,1.265606e-12
4,4,-1804.755947,-488.649866,492.118782,257.382315,440.931038,-132.214839,19.259591,-93.86314,-51.314467,...,-0.658614,0.058238,0.159933,0.082055,0.266736,0.034507,-0.327168,-0.357777,0.228662,2.65515e-12


In [None]:
scores, range_k = doKnn(PCATreino.drop(['Classe'], axis=1).values, PCATreino['Classe'].values, PCATeste.drop(['Classe'], axis=1).values, PCATeste['Classe'].values, 30 if len(dataFrameTeste) > 30 else len(dataFrameTeste)-1)

In [None]:
scores

{1: 0.14,
 2: 0.11,
 3: 0.11,
 4: 0.1,
 5: 0.08,
 6: 0.07,
 7: 0.06,
 8: 0.06,
 9: 0.05,
 10: 0.05,
 11: 0.06,
 12: 0.05,
 13: 0.04,
 14: 0.04,
 15: 0.04,
 16: 0.05,
 17: 0.05,
 18: 0.06,
 19: 0.06,
 20: 0.04,
 21: 0.04,
 22: 0.04,
 23: 0.05,
 24: 0.05,
 25: 0.05,
 26: 0.05,
 27: 0.04,
 28: 0.04,
 29: 0.04}