In [2]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from PIL import Image
import tensorflow as tf
import os
from collections import defaultdict

In [3]:
def augment_image(image):
    datagen = ImageDataGenerator(
        horizontal_flip=True,
        vertical_flip=True,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rotation_range=30,
        zoom_range=[0.8, 1.2],
        fill_mode='nearest'
    )
    image = np.expand_dims(image, axis=0)
    aug_iter = datagen.flow(image, batch_size=1)
    augmented_images = [next(aug_iter)[0] for _ in range(10)]
    return augmented_images

In [4]:
def preprocess_and_extract_features(images, labels):
    sift = cv2.SIFT_create()
    descriptors = []
    descriptor_labels = []

    for img, label in zip(images, labels):
        try:
            if isinstance(img, tf.Tensor):
                img = img.numpy()
            resized_img = cv2.resize(img, (224, 224))

            if np.max(resized_img) <= 1.0:
                resized_img = (resized_img * 255).astype(np.uint8)
            else:
                resized_img = resized_img.astype(np.uint8)
            keypoints, desc = sift.detectAndCompute(resized_img, None)

            if desc is not None and len(desc) > 0:
                descriptors.append(desc)
                descriptor_labels.append(label)
            else:
                print(f"No descriptors found for label {label}. Skipping this image.")

        except Exception as e:
            print(f"Error processing image with label {label}: {e}")

    if len(descriptors) == 0:
        raise ValueError("No valid descriptors found in the dataset.")

    combined_descriptors = np.vstack(descriptors)
    combined_labels = np.hstack([[label] * len(desc) for label, desc in zip(descriptor_labels, descriptors)])

    return combined_descriptors, combined_labels

In [5]:
dataset_path = "./batik_assets/"
images = []
class_list = []
label_class = [
    'batik-bali', 'batik-betawi', 'batik-celup', 'batik-cendrawasih', 'batik-ceplok',
    'batik-ciamis', 'batik-garutan', 'batik-gentongan', 'batik-kawung', 'batik-keraton',
    'batik-lasem', 'batik-megamendung', 'batik-parang', 'batik-pekalongan', 'batik-priangan',
    'batik-sekar', 'batik-sidoluhur', 'batik-sidomukti', 'batik-sogan', 'batik-tambal'
]
class_images = defaultdict(list)

In [6]:
for i, class_name in enumerate(label_class):
    class_path = os.path.join(dataset_path, class_name)
    if os.path.isdir(class_path):
        for file in os.listdir(class_path):
            try:
                file_path = os.path.join(class_path, file)
                image = load_img(file_path)
                image = img_to_array(image)
                image = tf.image.resize(image, (256, 256)) / 255.0

                augmented_images = augment_image(image)
                for aug_image in augmented_images:
                    class_images[class_name].append((aug_image, i))

            except Exception as e:
                print(f"Error processing file {file_path}: {e}")

In [7]:
all_images = []
all_labels = []

for class_name, images_and_labels in class_images.items():
    for img, label in images_and_labels:
        if isinstance(img, tf.Tensor):
            all_images.append(img.numpy())
        else:
            all_images.append(img)
        all_labels.append(label)

all_images = np.array(all_images)
all_labels = np.array(all_labels)

train_images, test_images, train_labels, test_labels = train_test_split(
    all_images, all_labels, test_size=0.2, stratify=all_labels, random_state=42
)

descriptors_train, labels_train = preprocess_and_extract_features(train_images, train_labels)
descriptors_test, labels_test = preprocess_and_extract_features(test_images, test_labels)

In [8]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(descriptors_train, labels_train)
y_pred_rf = rf_model.predict(descriptors_test)

print("Random Forest Classification Report:")
print(classification_report(labels_test, y_pred_rf))
print("Random Forest Accuracy:", accuracy_score(labels_test, y_pred_rf))


MemoryError: could not allocate 671088640 bytes

In [None]:
conf_matrix = confusion_matrix(labels_test, y_pred_rf)
disp = ConfusionMatrixDisplay(conf_matrix, label_class)
disp.plot()
plt.title('Confusion Matrix Random Forest')
plt.show()