In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

def dataframelabeling(training_dir, testing_dir):
    train_fnames = []
    train_labels = []

    for root, _, files in os.walk(training_dir):
        for f in files:
            ext = os.path.splitext(f)[1].lower()
            if ext in ".jpg":
                fname = os.path.join(root, f)
                label, _ = f.split("_", 1)
                train_fnames.append(fname)
                train_labels.append(label)

    trainingdf = pd.DataFrame({
        "Filename": train_fnames,
        "Label": train_labels,
    })

    test_fnames = []
    test_labels = []

    for root, _, files in os.walk(testing_dir):
        for f in files:
            ext = os.path.splitext(f)[1].lower()
            if ext in ".jpg":
                fname = os.path.join(root, f)
                label = "_".join(f.split("_")[:-1])
                test_fnames.append(fname)
                test_labels.append(label)

    testingdf = pd.DataFrame({
        "Filename": test_fnames,
        "Label": test_labels,
    })
    
    return trainingdf, testingdf

def imagemaker(training_dir, testing_dir):
    trainingdf, testingdf = dataframelabeling(training_dir, testing_dir)
    
    train_set = ImageDataGenerator(
        rescale = 1.0 / 255,
        validation_split = 0.2
    ).flow_from_dataframe(
        dataframe=trainingdf,
        directory=training_dir,
        x_col="Filename",
        y_col="Label",
        target_size=(64, 64),
        batch_size=64,
        class_mode="categorical",
        shuffle=True,
        subset="training",
        color_mode="grayscale"
    )

    validation_set = ImageDataGenerator(
        rescale = 1.0 / 255,
        validation_split = 0.8
    ).flow_from_dataframe(
        dataframe=trainingdf,
        directory=training_dir,
        x_col="Filename",
        y_col="Label",
        target_size=(64, 64),
        batch_size=64,
        class_mode="categorical",
        shuffle=True,
        subset="validation",
        color_mode="grayscale"
    )

    test_set = ImageDataGenerator(
        rescale = 1.0 / 255
    ).flow_from_dataframe(
        dataframe=testingdf,
        directory=testing_dir,
        x_col="Filename",
        y_col=None,
        target_size=(64, 64),
        batch_size=64,
        class_mode=None,
        color_mode="grayscale"
    )

    return train_set, validation_set, test_set

def buildmodel():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Conv2D(64,(3,3),activation="relu",input_shape=(64,64,1)))
    model.add(tf.keras.layers.MaxPooling2D(2,2))
    model.add(tf.keras.layers.Conv2D(128,(3,3),activation="relu"))
    model.add(tf.keras.layers.MaxPooling2D(2,2))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(512,activation="relu"))
    model.add(tf.keras.layers.Dense(256,activation="relu"))
    model.add(tf.keras.layers.Dense(30,activation="softmax"))

    model.compile(
        optimizer="adam",
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )

    return model

def trainmodel(model, train_set, validation_set):
    HangulClassificator = model.fit(
        train_set,
        validation_data=validation_set,
        epochs=15
    )

def predictmodel(model, train_set, test_set):
    labels = train_set.class_indices
    labels = dict((a, l) for l, a in labels.items())

    predictions = model.predict(test_set, steps=len(test_set), verbose=1)

    submission_data = []
    for i, pred in enumerate(predictions):
        image_id = test_set.filenames[i].split(".")[0]
        answer = labels[np.argmax(pred)]
        submission_data.append([image_id, answer])

    submission_df = pd.DataFrame(submission_data, columns=["ID", "ANSWER"])
    
    submission_df['ID'] = submission_df['ID'].str.extract('(\d+)').astype(int)
    submission_df = submission_df.sort_values(by='ID')
    submission_df['ID'] = 'test' + submission_df['ID'].astype(str)

    submission_df.to_csv("HangulClassification.csv", index=False)

training_dir = "/kaggle/input/uts-praktikum-artificial-intelligence/hangul_dataset/hangul_characters_v1"
testing_dir = "/kaggle/input/uts-praktikum-artificial-intelligence/testing/testing"

train_set, validation_set, test_set = imagemaker(training_dir, testing_dir)

model = buildmodel()

trainmodel(model, train_set, validation_set)    
    
predictmodel(model, train_set, test_set)
    
model.save("model.h5")

2025-06-22 14:57:12.631468: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-06-22 14:57:12.631635: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-06-22 14:57:12.837236: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Found 1920 validated image filenames belonging to 30 classes.
Found 1920 validated image filenames belonging to 30 classes.
Found 30 validated image filenames.


  super().__init__(


Epoch 1/15


  self._warn_if_super_not_called()


[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 850ms/step - accuracy: 0.1494 - loss: 3.1620 - val_accuracy: 0.7094 - val_loss: 1.0392
Epoch 2/15
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 824ms/step - accuracy: 0.7711 - loss: 0.8077 - val_accuracy: 0.8750 - val_loss: 0.4093
Epoch 3/15
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 772ms/step - accuracy: 0.9155 - loss: 0.2813 - val_accuracy: 0.9099 - val_loss: 0.3105
Epoch 4/15
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 802ms/step - accuracy: 0.9520 - loss: 0.1823 - val_accuracy: 0.9484 - val_loss: 0.1798
Epoch 5/15
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 766ms/step - accuracy: 0.9784 - loss: 0.0796 - val_accuracy: 0.9792 - val_loss: 0.0904
Epoch 6/15
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 774ms/step - accuracy: 0.9942 - loss: 0.0238 - val_accuracy: 0.9781 - val_loss: 0.0881
Epoch 7/15
[1m30/30[0m [32m━━━