In [None]:
import numpy as np 
import cv2 
import os
import re
import random

# -------------------- Utility function ------------------------
def normalize_label(str_):
    str_ = str_.replace(" ", "")
    str_ = str_.translate(str_.maketrans("","", "()"))
    str_ = str_.split("_")
    return ''.join(str_[:1])

def normalize_desc(folder, sub_folder):
    text = folder + " - " + sub_folder 
    text = re.sub(r'\d+', '', text)
    text = text.replace(".", "")
    text = text.strip()
    return text

def print_progress(val, val_len, folder, sub_folder, filename, bar_size=10):
    progr = "#"*round((val)*bar_size/val_len) + " "*round((val_len - (val))*bar_size/val_len)
    if val == 0:
        print("", end = "\n")
    else:
        print("[%s] folder : %s/%s/ ----> file : %s" % (progr, folder, sub_folder, filename), end="\r")

# -------------------- Load Dataset ------------------------
 
dataset_dir = "/content/DataKelapa/DataKelapa"
'''
os.mkdir('Augmentasi')
os.mkdir('Augmentasi/Standar')
os.mkdir('Augmentasi/NonStandar')
'''
imgs = [] #list image matrix 
labels = []
descs = []
for folder in os.listdir(dataset_dir):
    for subfolder in os.listdir(dataset_dir + "/" + folder):
        for filename in os.listdir(dataset_dir + "/" + folder + "/" + subfolder):
            img = cv2.imread(dataset_dir + "/" + folder + "/" + subfolder + "/" + filename)
            
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
            h, w = gray.shape
            ymin, ymax, xmin, xmax = h//3, h*2//3, w//3, w*2//3
            crop = gray[ymin:ymax, xmin:xmax]
            crop = gray[1000:5500, 1500:6500]
        
            #resize = cv2.resize(gray, (0,0), fx=0.5, fy=0.5)
            down_width = 800
            down_height = 600
            down_points = (down_width, down_height)
            resized = cv2.resize(gray, down_points, interpolation= cv2.INTER_LINEAR)
            
            def rotation(img, angle):
                angle = int(random.uniform(-angle, angle))
                h, w = img.shape[:2]
                M = cv2.getRotationMatrix2D((int(w/2), int(h/2)), angle, 1)
                img = cv2.warpAffine(img, M, (w, h))
                return img
            
            aug = rotation(resized, 30)
            
            #cv2.imwrite('Augmentasi/' + subfolder + '/' + filename, aug)
            
            imgs.append(aug)
            labels.append(normalize_label(os.path.splitext(filename)[0]))
            descs.append(normalize_desc(folder, subfolder))
            
           # print_progress(folder, subfolder, filename)

In [5]:
from skimage.feature import greycomatrix, greycoprops 
# ----------------- calculate greycomatrix() & greycoprops() for angle 0, 45, 90, 135 ----------------------------------
def calc_glcm_all_agls(img, label, props, dists=[5], 
                       agls=[0, np.pi/4, np.pi/2, 3*np.pi/4], lvl=256, sym=True, norm=True):
    glcm = greycomatrix(img, 
                        distances=dists, 
                        angles=agls, 
                        levels=lvl,
                        symmetric=sym, 
                        normed=norm)
    feature = []
    glcm_props = [propery for name in props for propery in greycoprops(glcm, name)[0]]
    for item in glcm_props:
            feature.append(item)
    feature.append(label) 
    return feature
# ----------------- call calc_glcm_all_agls() for all properties ----------------------------------
properties = ['dissimilarity', 'correlation', 'homogeneity', 'contrast', 'ASM', 'energy']
glcm_all_agls = []
for img, label in zip(imgs, labels): 
    glcm_all_agls.append(
            calc_glcm_all_agls(img, 
                                label, 
                                props=properties)
                            )
columns = []
angles = ['0', '45', '90','135']
for name in properties :
    for ang in angles:
        columns.append(name + "_" + ang)      
columns.append("label")

In [17]:
print(glcm_all_agls)

[[6.279274633123691, 7.047820899801017, 7.505577731092436, 8.132936072982359, 0.9616564348914632, 0.9518957693206175, 0.9469550892079097, 0.9388990972119381, 0.5646871598446611, 0.5379561151975769, 0.5031002360728797, 0.4902911207572627, 440.4333375262049, 547.1185984452464, 607.5222457983189, 694.9376349027012, 0.016854927987201576, 0.015775722026262703, 0.01574643462899867, 0.015168584777830006, 0.1298265303672619, 0.12560144117908323, 0.1254847983980477, 0.12316080861146539, 'nonstandard1'], [9.487132075471704, 11.795272503456884, 11.310661764705875, 11.730375872651853, 0.9248871379657954, 0.8821878357067543, 0.89795952114444, 0.8843253395362912, 0.4684367397887958, 0.397687190751445, 0.3873068703989848, 0.4000529328962607, 592.9788176100631, 911.9847728575764, 799.3302331932774, 895.4383410340292, 0.0050357586879035245, 0.003811026247484705, 0.003910835746063134, 0.0038857035927345115, 0.07096307975210436, 0.061733509923579635, 0.06253667520793806, 0.06233541202827259, 'nonstandard

In [6]:
import pandas as pd 

# Create the pandas DataFrame for GLCM features data
glcm_df = pd.DataFrame(glcm_all_agls, 
                      columns = columns)

glcm_df.head(602)

Unnamed: 0,dissimilarity_0,dissimilarity_45,dissimilarity_90,dissimilarity_135,correlation_0,correlation_45,correlation_90,correlation_135,homogeneity_0,homogeneity_45,...,contrast_135,ASM_0,ASM_45,ASM_90,ASM_135,energy_0,energy_45,energy_90,energy_135,label
0,6.583786,7.110285,6.684340,7.396629,0.960104,0.946566,0.952369,0.951014,0.506515,0.465081,...,585.599139,0.017725,0.016396,0.017049,0.016859,0.133134,0.128047,0.130573,0.129841,nonstandard1
1,8.567235,9.347307,8.331845,9.121568,0.951815,0.941312,0.945927,0.938114,0.461934,0.445855,...,766.151959,0.014919,0.014093,0.014286,0.013668,0.122144,0.118715,0.119524,0.116912,nonstandard10
2,14.213117,15.528833,14.471422,15.438229,0.893419,0.862475,0.876177,0.867501,0.242832,0.227961,...,969.877047,0.001571,0.001231,0.001402,0.001223,0.039636,0.035088,0.037439,0.034970,nonstandard100
3,10.739633,10.910020,10.785651,11.319530,0.942019,0.941826,0.935752,0.928945,0.348045,0.344233,...,731.601422,0.020977,0.020335,0.020660,0.020040,0.144834,0.142602,0.143736,0.141563,nonstandard101
4,8.227029,9.179783,8.735466,9.327080,0.966265,0.956518,0.953025,0.947741,0.418694,0.415427,...,685.182886,0.012845,0.012122,0.012538,0.012015,0.113334,0.110098,0.111972,0.109615,nonstandard102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
597,10.951426,12.131168,11.142109,12.205895,0.928429,0.902705,0.915401,0.906670,0.347752,0.329210,...,892.051857,0.005323,0.004828,0.004979,0.004705,0.072961,0.069484,0.070565,0.068596,nonstandard97
598,13.107208,13.131758,12.471660,13.429775,0.889235,0.890470,0.900450,0.888215,0.275044,0.263355,...,825.987125,0.003205,0.003192,0.003159,0.003055,0.056616,0.056494,0.056202,0.055268,nonstandard98
599,9.762832,10.244235,9.780078,10.298841,0.955585,0.952508,0.949570,0.943791,0.382815,0.378997,...,792.314494,0.021303,0.020677,0.021050,0.020364,0.145955,0.143795,0.145087,0.142701,nonstandard99
600,9.172275,10.079578,9.716158,10.141701,0.959670,0.953096,0.952516,0.947911,0.425420,0.403733,...,708.611689,0.017786,0.016969,0.017125,0.016529,0.133366,0.130266,0.130863,0.128566,stanar1


In [7]:
glcm_df.iloc[0:600, glcm_df.columns.get_loc('label')] = 0
glcm_df.iloc[600:, glcm_df.columns.get_loc('label')] = 1

In [8]:
glcm_df['label']=glcm_df['label'].astype('int')

In [None]:
glcm_df.to_csv('DataCropNew3.csv', index=False)

In [1]:
import pandas as pd
glcm_df = pd.read_csv("PerbandinganData/DataCropNew3.csv")

In [2]:
glcm_df 

Unnamed: 0,dissimilarity_0,dissimilarity_45,dissimilarity_90,dissimilarity_135,correlation_0,correlation_45,correlation_90,correlation_135,homogeneity_0,homogeneity_45,...,contrast_135,ASM_0,ASM_45,ASM_90,ASM_135,energy_0,energy_45,energy_90,energy_135,label
0,5.217941,6.147132,6.229555,6.334072,0.946076,0.937721,0.937006,0.932230,0.546884,0.472907,...,279.538614,0.006691,0.005317,0.005192,0.005519,0.081797,0.072920,0.072054,0.074291,0
1,9.195143,10.322746,10.380481,10.582428,0.911308,0.899459,0.901654,0.895140,0.458826,0.395607,...,523.242157,0.003850,0.002933,0.002727,0.002857,0.062046,0.054153,0.052217,0.053454,0
2,21.519010,22.468540,22.135605,22.365104,0.721662,0.697800,0.700700,0.704939,0.169870,0.168190,...,1525.757455,0.000223,0.000225,0.000222,0.000207,0.014930,0.015000,0.014912,0.014392,0
3,16.155669,16.940171,17.063002,17.350882,0.845616,0.833862,0.828865,0.826257,0.197344,0.192092,...,1007.330225,0.000268,0.000254,0.000251,0.000245,0.016372,0.015932,0.015859,0.015641,0
4,8.857610,10.699848,11.029120,10.805917,0.958527,0.946607,0.943765,0.944558,0.399942,0.364912,...,520.089706,0.001575,0.001329,0.001301,0.001292,0.039681,0.036450,0.036073,0.035949,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,13.411069,14.003086,14.478500,14.978850,0.859522,0.854120,0.847016,0.836729,0.252350,0.219810,...,673.251269,0.000661,0.000503,0.000488,0.000516,0.025712,0.022428,0.022080,0.022715,1
1196,10.527338,11.927610,12.161519,11.809606,0.915936,0.902551,0.899872,0.903739,0.385211,0.318939,...,609.319698,0.003051,0.002036,0.001861,0.002073,0.055236,0.045118,0.043142,0.045526,1
1197,9.497277,10.278764,10.496300,10.377409,0.919123,0.915833,0.910947,0.910135,0.359492,0.299281,...,427.363029,0.002599,0.001906,0.001740,0.001904,0.050982,0.043663,0.041712,0.043637,1
1198,9.714413,10.858019,10.833298,10.703895,0.906690,0.890088,0.892162,0.891670,0.317903,0.271068,...,466.875744,0.001301,0.000978,0.000933,0.001022,0.036076,0.031274,0.030545,0.031974,1


In [3]:
from sklearn.model_selection import train_test_split
DATA = glcm_df.iloc[:,:-1].values
TARGET = glcm_df.iloc[:,-1].values

In [4]:
# Memisahkan data menjadi data pelatihan dan data sisa (validasi + testing)
X_trainval, X_test, y_trainval, y_test = train_test_split(DATA, TARGET, test_size=0.2, random_state=42)

# Memisahkan data sisa menjadi data validasi dan data testing
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.25, random_state=42)

In [5]:
# Cetak jumlah sampel pada setiap set data
print("Jumlah Data Awal:", len(glcm_df))
print("Jumlah data pelatihan:", len(X_train))
print("Jumlah data validasi:", len(X_val))
print("Jumlah data testing:", len(X_test))

Jumlah Data Awal: 1200
Jumlah data pelatihan: 720
Jumlah data validasi: 240
Jumlah data testing: 240


In [6]:
#Normalisasi agar tidak redudansi
from sklearn.preprocessing import MinMaxScaler
scaler_norm = MinMaxScaler()

X_train = scaler_norm.fit_transform(X_train)
X_val = scaler_norm.fit_transform(X_val)
X_test = scaler_norm.fit_transform(X_test)

In [7]:
traindf = pd.DataFrame(X_train)
traindf

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,0.615831,0.650690,0.710178,0.680486,0.560653,0.524792,0.495333,0.502643,0.381694,0.313056,...,0.549427,0.541867,0.115770,0.087466,0.074761,0.083500,0.266399,0.216572,0.192649,0.212119
1,0.406651,0.404366,0.386322,0.375819,0.610525,0.617179,0.661518,0.644271,0.656299,0.598892,...,0.315486,0.328766,0.237760,0.215812,0.211833,0.224183,0.426257,0.398152,0.392574,0.410406
2,0.122482,0.148389,0.189804,0.154703,0.738776,0.644575,0.600910,0.623946,0.603975,0.543363,...,0.095877,0.075887,0.147832,0.125073,0.117906,0.121583,0.313797,0.277703,0.265832,0.275034
3,0.396663,0.401964,0.396093,0.373374,0.559539,0.544796,0.569614,0.568364,0.375178,0.356716,...,0.305081,0.299771,0.079343,0.069301,0.073806,0.074368,0.204919,0.183132,0.190858,0.195331
4,0.350353,0.406410,0.428290,0.383853,0.903527,0.866820,0.862954,0.883038,0.632954,0.649160,...,0.314047,0.285222,0.248722,0.253515,0.251821,0.239679,0.438494,0.441109,0.438436,0.428101
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,0.344844,0.380056,0.391881,0.353488,0.825170,0.790542,0.790439,0.807283,0.538738,0.460031,...,0.211556,0.192095,0.173088,0.129351,0.123918,0.137364,0.347959,0.284121,0.275053,0.298524
716,0.434232,0.433460,0.422660,0.402562,0.792894,0.785756,0.799813,0.802614,0.665631,0.708013,...,0.326051,0.327617,0.456233,0.479368,0.480325,0.463199,0.634896,0.652016,0.652244,0.640423
717,0.449484,0.464344,0.477698,0.475078,0.489099,0.446773,0.449013,0.421932,0.277857,0.280290,...,0.289461,0.296239,0.049781,0.051449,0.058558,0.054623,0.145739,0.146702,0.160879,0.155897
718,0.405279,0.436364,0.487124,0.448667,0.865064,0.836284,0.820139,0.839463,0.503054,0.472350,...,0.283544,0.259295,0.170966,0.153738,0.135714,0.129223,0.345180,0.319058,0.292598,0.286564


In [8]:
# SVM hypertuning using GridSeachCV
#Modelling
from sklearn.svm import SVC
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

model_SVC=SVC()

kfold=KFold(n_splits=10)
param_grid = {'C': [1, 10, 100, 500, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']} 

grid=GridSearchCV(estimator=model_SVC,param_grid=param_grid,scoring='accuracy',cv=kfold,verbose=3)
grid_result=grid.fit(X_train,y_train)

print("Best: %f using %s" % (grid_result.best_score_,grid_result.best_params_))

Fitting 10 folds for each of 25 candidates, totalling 250 fits
[CV 1/10] END .........C=1, gamma=1, kernel=rbf;, score=0.903 total time=   0.0s
[CV 2/10] END .........C=1, gamma=1, kernel=rbf;, score=0.944 total time=   0.0s
[CV 3/10] END .........C=1, gamma=1, kernel=rbf;, score=0.944 total time=   0.0s
[CV 4/10] END .........C=1, gamma=1, kernel=rbf;, score=0.917 total time=   0.0s
[CV 5/10] END .........C=1, gamma=1, kernel=rbf;, score=0.972 total time=   0.0s
[CV 6/10] END .........C=1, gamma=1, kernel=rbf;, score=0.972 total time=   0.0s
[CV 7/10] END .........C=1, gamma=1, kernel=rbf;, score=0.931 total time=   0.0s
[CV 8/10] END .........C=1, gamma=1, kernel=rbf;, score=0.917 total time=   0.0s
[CV 9/10] END .........C=1, gamma=1, kernel=rbf;, score=0.944 total time=   0.0s
[CV 10/10] END ........C=1, gamma=1, kernel=rbf;, score=0.972 total time=   0.0s
[CV 1/10] END .......C=1, gamma=0.1, kernel=rbf;, score=0.875 total time=   0.0s
[CV 2/10] END .......C=1, gamma=0.1, kernel=rb

In [9]:
#Evaluasi 
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix 
# Memprediksi label pada data testing
y_pred_test = grid_result.predict(X_test)

# Mengukur akurasi pada data testing
accuracy_test = accuracy_score(y_test, y_pred_test)
print("Akurasi Testing:", accuracy_test)
print(confusion_matrix(y_test,y_pred_test))
print(classification_report(y_test,y_pred_test)) 

Akurasi Testing: 0.9041666666666667
[[121   1]
 [ 22  96]]
              precision    recall  f1-score   support

           0       0.85      0.99      0.91       122
           1       0.99      0.81      0.89       118

    accuracy                           0.90       240
   macro avg       0.92      0.90      0.90       240
weighted avg       0.92      0.90      0.90       240



In [10]:
# Data prediksi
data_prediksi = y_pred_test
# Label sebenarnya
label_sebenarnya = y_test

# Menghitung jumlah data uji yang terdeteksi benar
jumlah_benar = sum([1 for prediksi, label in zip(data_prediksi, label_sebenarnya) if prediksi == label])

print("Jumlah data uji yang terdeteksi benar:", jumlah_benar)

Jumlah data uji yang terdeteksi benar: 217
