In [1]:
import numpy as np 
import cv2 
import os
import re

In [2]:
def normalize_label(str_):
    str_ = str_.replace(" ", "")
    str_ = str_.translate(str_.maketrans("","", "()"))
    str_ = str_.split("_")
    return ''.join(str_[:2])

def normalize_word(names):
    text = '-'.join(names) if len(names) > 1 else names[0]
    text = re.sub(r'\d+', '', text)
    text = text.replace(".", "")
    text = text.strip()
    return text

def print_progress(val, val_len, folder, sub_folder, filename, bar_size=10):
    progr = "#"*round((val)*bar_size/val_len) + " "*round((val_len - (val))*bar_size/val_len)
    if val == 0:
        print("", end = "\n")
    else:
        print("[%s] folder : %s/%s/ ----> file : %s" % (progr, folder, sub_folder, filename), end="\r")

In [3]:
# Mendefinisikan variabel dataset_dir yang berisi nama folder yang berisi dataset yang akan diolah
dataset_dir = "Dataset"

# Mendefinisikan list kosong bernama imgs yang nantinya akan diisi dengan matriks citra yang telah diolah
imgs = [] #list image matrix 

# Mendefinisikan list kosong bernama labels yang nantinya akan diisi dengan label dari citra yang telah diolah
labels = []

# Mendefinisikan list kosong bernama header_labels yang nantinya akan diisi dengan nama folder yang berisi citra yang telah diolah
header_labels = []

# Mendefinisikan list kosong bernama descs yang nantinya akan diisi dengan deskripsi citra yang telah diolah
descs = []

# Melakukan looping terhadap setiap folder yang ada di dalam folder dataset_dir
for folder in os.listdir(dataset_dir):
  # Melakukan looping terhadap setiap subfolder yang ada di dalam folder yang sedang diiterasi oleh loop sebelumnya
    for sub_folder in os.listdir(os.path.join(dataset_dir, folder)):
      # Mendapatkan daftar nama file yang ada di dalam subfolder yang sedang diiterasi oleh loop sebelumnya
        sub_folder_files = os.listdir(os.path.join(dataset_dir, folder, sub_folder))
        # Mendapatkan panjang daftar file yang ada di dalam subfolder yang sedang diiterasi oleh loop sebelumnya
        len_sub_folder = len(sub_folder_files) - 1
         # Melakukan looping terhadap setiap file yang ada di dalam subfolder yang sedang diiterasi oleh loop sebelumnya
        for i, filename in enumerate(sub_folder_files):
          # Membaca file citra yang sedang diiterasi oleh loop sebelumnya
            img = cv2.imread(os.path.join(dataset_dir, folder, sub_folder, filename))
            # Mengubah citra yang telah dibaca menjadi citra grayscale
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            # Mendapatkan tinggi dan lebar citra grayscale yang telah diubah
            h, w = gray.shape
            # Mendapatkan koordinat batas citra yang akan dipotong
            ymin, ymax, xmin, xmax = h//3, h*2//3, w//3, w*2//3
            # Memotong citra grayscale sesuai dengan batas yang telah ditentukan
            crop = gray[ymin:ymax, xmin:xmax]
            # Mengubah ukuran citra yang telah dipotong menjadi setengah dari ukurannya sebelumnya
            resize = cv2.resize(crop, (0,0), fx=0.5, fy=0.5)
            
            
            # Menambahkan citra yang telah diolah ke dalam list imgs
            # Menambahkan label citra yang telah diolah ke dalam list labels            
            # Menambahkan nama folder yang berisi citra yang telah diolah ke dalam list header_labels            
            # Menambahkan deskripsi citra yang telah diolah ke dalam list descs            
            # Menampilkan progress bar saat melakukan proses pengolahan citra
            imgs.append(resize)
            labels.append(normalize_label(os.path.splitext(sub_folder)[0]))
            header_labels.append(normalize_word([folder]))
            descs.append(normalize_word([folder, sub_folder]))
            
            # Menampilkan progress bar saat melakukan proses pengolahan citra
            print_progress(i, len_sub_folder, folder, sub_folder, filename)


[##########] folder : DagingSapi/SAPI/ ----> file : IMG_20220107_081811.jpg


In [4]:
print(len(imgs))

230


In [5]:
for i in set(zip (header_labels, labels)):
    print(i[1], "\t:", i[0])

SAPI 	: DagingSapi
BABI 	: DagingBabi


In [6]:
imgs[8].shape

(230, 230)

In [7]:
len(labels)

230

In [8]:
from skimage.feature import greycomatrix, greycoprops
import pandas as pd 

In [9]:
glcm = greycomatrix(imgs[100], distances=[5], angles=[0], levels=256,
                    symmetric=True, normed=True)

In [11]:
print('dissimilarity \t: %.04f' % greycoprops(glcm, 'dissimilarity')[0, 0])
print('correlation \t: %.04f' % greycoprops(glcm, 'correlation')[0, 0])
print('homogeneity \t: %.04f' % greycoprops(glcm, 'homogeneity')[0, 0])
print('contrast \t: %.04f' % greycoprops(glcm, 'contrast')[0, 0])
print('ASM \t\t: %.04f' % greycoprops(glcm, 'ASM')[0, 0])
print('energy \t\t: %.04f' % greycoprops(glcm, 'energy')[0, 0])

dissimilarity 	: 14.7419
correlation 	: 0.5846
homogeneity 	: 0.1023
contrast 	: 521.7919
ASM 		: 0.0003
energy 		: 0.0170


In [12]:
def calc_glcm_feature(img, label, dists=[5], agls=[0], lvl=256, sym=True, norm=True):
    
    glcm = greycomatrix(img, 
                        distances=dists, 
                        angles=agls, 
                        levels=lvl,
                        symmetric=sym, 
                        normed=norm)
    feature = []
    feature.append(greycoprops(glcm, 'dissimilarity')[0, 0])
    feature.append(greycoprops(glcm, 'correlation')[0, 0])
    feature.append(greycoprops(glcm, 'homogeneity')[0, 0])
    feature.append(greycoprops(glcm, 'contrast')[0, 0])
    feature.append(greycoprops(glcm, 'ASM')[0, 0])
    feature.append(greycoprops(glcm, 'energy')[0, 0])
    feature.append(label)
    
    return feature

In [13]:
glcm_features = []
for img, label in zip(imgs, labels): 
    glcm_features.append(calc_glcm_feature(img, label))

In [14]:
# Create the pandas DataFrame for GLCM features data
glcm_features_df = pd.DataFrame(glcm_features, 
                                columns = ['dissimilarity', 
                                           'correlation',
                                           'homogeneity',
                                           'contrast',
                                           'ASM',
                                           'energy',
                                           'label'])

In [16]:
glcm_features_df[80:110]

Unnamed: 0,dissimilarity,correlation,homogeneity,contrast,ASM,energy,label
80,12.064726,0.407804,0.103602,329.257697,0.000591,0.024301,SAPI
81,18.483436,0.587661,0.078411,714.75292,0.000211,0.014517,SAPI
82,18.545606,0.616139,0.07971,786.792592,0.00021,0.014503,SAPI
83,6.809153,0.656345,0.154018,84.838329,0.001056,0.032503,SAPI
84,14.000229,0.733373,0.088934,402.431375,0.000216,0.014714,SAPI
85,14.396089,0.62882,0.094796,473.93087,0.000313,0.017687,SAPI
86,12.653185,0.685293,0.100967,357.057372,0.00034,0.018437,SAPI
87,11.962369,0.632508,0.122811,379.960329,0.000652,0.025538,SAPI
88,18.90808,0.503046,0.080695,810.855494,0.000293,0.017109,SAPI
89,17.567609,0.609985,0.097766,753.249809,0.000283,0.016819,SAPI


In [17]:
glcm_features_df.to_csv("Dataset_glcm.csv")