In [1]:
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from PIL import Image
import tensorflow as tf
import os
from collections import defaultdict

In [2]:
def augment_image(image):
    augmented_images = []
    augmented_images.append(image)
    augmented_images.append(tf.image.flip_left_right(image))
    augmented_images.append(tf.image.rot90(image))
    augmented_images.append(tf.image.flip_up_down(image))
    
    return augmented_images

In [3]:
def preprocess_and_extract_features(images, labels):
    sift = cv2.SIFT_create()
    descriptors = []
    descriptor_labels = []
    
    for img, label in zip(images, labels):
        try:
            if isinstance(img, tf.Tensor):
                img = img.numpy()
            resized_img = cv2.resize(img, (224, 224))
            
            if np.max(resized_img) <= 1.0:
                resized_img = (resized_img * 255).astype(np.uint8)
            else:
                resized_img = resized_img.astype(np.uint8)
            keypoints, desc = sift.detectAndCompute(resized_img, None)
            
            if desc is not None and len(desc) > 0:
                descriptors.append(desc)
                descriptor_labels.append(label)
            else:
                print(f"No descriptors found for label {label}. Skipping this image.")

        except Exception as e:
            print(f"Error processing image with label {label}: {e}")
    
    if len(descriptors) == 0:
        raise ValueError("No valid descriptors found in the dataset.")
    
    combined_descriptors = np.vstack(descriptors)
    combined_labels = np.hstack([[label] * len(desc) for label, desc in zip(descriptor_labels, descriptors)])
    
    return combined_descriptors, combined_labels

In [4]:
# Load dataset
dataset_path = "./batik_assets/"
images = []
class_list = []
label_class = ['batik-bali', 'batik-betawi', 'batik-celup', 'batik-cendrawasih', 'batik-ceplok', 'batik-ciamis', 'batik-garutan', 'batik-gentongan', 'batik-kawung', 'batik-keraton', 'batik-lasem', 'batik-megamendung', 'batik-parang', 'batik-pekalongan', 'batik-priangan', 'batik-sekar', 'batik-sidoluhur', 'batik-sidomukti', 'batik-sogan', 'batik-tambal']
class_images = defaultdict(list)

In [5]:
for i, class_name in enumerate(label_class):
    class_path = os.path.join(dataset_path, class_name)
    if os.path.isdir(class_path):
        for j, file in enumerate(os.listdir(class_path)):
            try:
                file_path = os.path.join(class_path, file)
                image = load_img(file_path)
                image = img_to_array(image)
                image = tf.image.resize(image, (256, 256)) / 255.0

                augmented_images = augment_image(image)
                for aug_image in augmented_images:
                    class_images[class_name].append((aug_image, i))

            except Exception as e:
                print(f"Error processing file {file_path}: {e}")

Error processing file ./batik_assets/batik-cendrawasih\18.jpg: cannot identify image file <_io.BytesIO object at 0x000002BCFF92E1B0>


In [6]:
all_images = []
all_labels = []

for class_name, images_and_labels in class_images.items():
    for img, label in images_and_labels:
        all_images.append(img.numpy()) 
        all_labels.append(label)

all_images = np.array(all_images)
all_labels = np.array(all_labels)

train_images, test_images, train_labels, test_labels = train_test_split(
    all_images, all_labels, test_size=0.2, stratify=all_labels, random_state=42
)

descriptors_train, labels_train = preprocess_and_extract_features(train_images, train_labels)
descriptors_test, labels_test = preprocess_and_extract_features(test_images, test_labels)

In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(descriptors_train, labels_train)
y_pred_rf = rf_model.predict(descriptors_test)

print("Random Forest Classification Report:")
print(classification_report(labels_test, y_pred_rf))
print("Random Forest Accuracy:", accuracy_score(labels_test, y_pred_rf))


Random Forest Classification Report:
              precision    recall  f1-score   support

           0       0.66      0.60      0.63     37893
           1       0.75      0.86      0.80     30787
           2       0.82      0.92      0.87     23504
           3       0.86      0.80      0.83     27943
           4       0.75      0.59      0.66     36255
           5       0.81      0.81      0.81     36182
           6       0.90      0.82      0.86     37087
           7       0.79      0.80      0.79     32880
           8       0.82      0.86      0.84     34864
           9       0.72      0.81      0.76     37251
          10       0.55      0.85      0.67     41558
          11       0.84      0.95      0.89     24469
          12       0.78      0.79      0.78     32515
          13       0.80      0.73      0.76     36503
          14       0.82      0.79      0.81     34459
          15       0.92      0.71      0.80     25652
          16       0.80      0.68      0.73 