In [22]:
import numpy as np
import pandas as pd
import os
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
import re
from shutil import copy2

from sklearn.metrics import confusion_matrix, classification_report

import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
import pocketsphinx

### Bag 0 : Trial n Error

In [5]:
def load_images_from_folder(folder):
    images = []
    filenames = []
    
    def extract_id(filename):
        match = re.search(r'id_(\d+)_', filename)
        return int(match.group(1)) if match else -1
    
    file_list = [f for f in os.listdir(folder) if f.endswith(".jpg") or f.endswith(".png")]
    file_list.sort(key=extract_id)
    
    for filename in file_list:
        img_path = os.path.join(folder, filename)
        img = Image.open(img_path).convert('RGB') 
        img = img.resize((128, 128))
        img_array = np.array(img)
        images.append((img_array, filename))
    
    return images

folder_path = r"C:\Users\gagah\Desktop\KKN\data\Train Images 13440x32x32\train"
images_with_filenames = load_images_from_folder(folder_path)

images = np.array([img[0] for img in images_with_filenames])
filenames = [img[1] for img in images_with_filenames]

print(f'Total data gambar: {len(images)}')
print(f'Shape setiap gambar: {images[0].shape}')

Total data gambar: 13440
Shape setiap gambar: (128, 128, 3)


In [None]:
# Memisahkan gambar berdasarkan label
label_dict = {}
for img_array, filename in images_with_filenames:
    match = re.search(r'label_(\d+)', filename)
    if match:
        label = int(match.group(1))
        if label not in label_dict:
            label_dict[label] = []
        label_dict[label].append((img_array, filename))

# Menampilkan gambar per label
for label, images in label_dict.items():
    fig, axes = plt.subplots(1, 8, figsize=(10, 2))
    axes = axes.ravel()
    for i in range(min(8, len(images))):  # Hanya menampilkan maksimal 6 gambar per label
        axes[i].imshow(images[i][0])
        axes[i].axis('off')
        axes[i].set_title(images[i][1], fontsize=8)
    plt.suptitle(f'Label {label}')
    plt.tight_layout()
    plt.show()

In [7]:
def create_folders_by_label(source_folder, destination_folder):
    # Membuat folder tujuan jika belum ada
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    file_list = [f for f in os.listdir(source_folder) if f.endswith(".jpg") or f.endswith(".png")]
    
    for filename in file_list:
        match = re.search(r'id_(\d+)_label_(\d+)', filename)
        if match:
            img_id = match.group(1)
            label = match.group(2)
            
            # Membuat path folder label
            label_folder = os.path.join(destination_folder, f'lab_{label}')
            if not os.path.exists(label_folder):
                os.makedirs(label_folder)
            
            # Menyalin file ke folder tujuan dengan nama file baru
            src_path = os.path.join(source_folder, filename)
            dest_path = os.path.join(label_folder, f'id_{img_id}.png')
            copy2(src_path, dest_path)

source_folder_path = r"C:\Users\gagah\Desktop\KKN\data\Train Images 13440x32x32\train"
destination_folder_path = r"C:\Users\gagah\Desktop\KKN\data\Data_Train_Images"

create_folders_by_label(source_folder_path, destination_folder_path)


### Bag 1 : Classification Hijaiyah

In [15]:
TRAINING_DIR = destination_folder_path

model_dir = 'model'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
    
datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=2,
    shear_range=0.4, 
    validation_split=0.2
)

train_generator = datagen.flow_from_directory(
    directory=TRAINING_DIR,
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical',
    subset='training' 
)

validation_generator = datagen.flow_from_directory(
    directory=TRAINING_DIR,
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(128, 128, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(42, activation='relu'),
    tf.keras.layers.Dense(28, activation='softmax'),
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

checkpoint_callback = ModelCheckpoint(
    filepath=os.path.join(model_dir, 'model_{epoch:02d}_{val_accuracy:.2f}.h5'),  # Nama file dengan epoch dan akurasi validasi
    monitor='val_accuracy',    # Metrik yang dipantau
    save_best_only=True,       # Hanya menyimpan model terbaik
    mode='max',                # Mode 'max' karena kita ingin akurasi maksimum
    verbose=1                  # Menampilkan log setiap kali model disimpan
)

model.fit(
    train_generator,
    epochs=50,
    validation_data=validation_generator,
    callbacks=[checkpoint_callback]
)

Found 10752 images belonging to 28 classes.
Found 2688 images belonging to 28 classes.
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_8 (Conv2D)           (None, 126, 126, 16)      448       
                                                                 
 max_pooling2d_8 (MaxPoolin  (None, 63, 63, 16)        0         
 g2D)                                                            
                                                                 
 conv2d_9 (Conv2D)           (None, 61, 61, 32)        4640      
                                                                 
 max_pooling2d_9 (MaxPoolin  (None, 30, 30, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_10 (Conv2D)          (None, 28, 28, 64)        18496     
                                 

  saving_api.save_model(


Epoch 2: val_accuracy improved from 0.41592 to 0.57292, saving model to model\model_02_0.57.h5
Epoch 3/50
Epoch 3: val_accuracy improved from 0.57292 to 0.67485, saving model to model\model_03_0.67.h5
Epoch 4/50
Epoch 4: val_accuracy improved from 0.67485 to 0.70871, saving model to model\model_04_0.71.h5
Epoch 5/50
Epoch 5: val_accuracy improved from 0.70871 to 0.73400, saving model to model\model_05_0.73.h5
Epoch 6/50
Epoch 6: val_accuracy improved from 0.73400 to 0.75521, saving model to model\model_06_0.76.h5
Epoch 7/50
Epoch 7: val_accuracy improved from 0.75521 to 0.77232, saving model to model\model_07_0.77.h5
Epoch 8/50
Epoch 8: val_accuracy improved from 0.77232 to 0.77902, saving model to model\model_08_0.78.h5
Epoch 9/50
Epoch 9: val_accuracy did not improve from 0.77902
Epoch 10/50
Epoch 10: val_accuracy improved from 0.77902 to 0.79650, saving model to model\model_10_0.80.h5
Epoch 11/50
Epoch 11: val_accuracy improved from 0.79650 to 0.80543, saving model to model\model_11

<keras.src.callbacks.History at 0x14ba3f77eb0>

### Bag 2 : Recognize Doa Speech