In [19]:
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
import random
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, TimeDistributed, LSTM, Dense, Dropout, Reshape, BatchNormalization
import matplotlib
matplotlib.use('Agg')
import gc


In [2]:
# Path folder input WAV dan folder output MFCC
input_root = "dataset-sound"
output_root = "matriks-mfcc"

In [3]:
# Dictionary untuk menghitung nomor file per subfolder
folder_counter = {}

In [4]:
# Maksimal file per kategori (misal: 500 file dari 'sunda', 500 dari 'jawa', dst)
max_files_per_folder = 7700

In [5]:
# Telusuri seluruh file di dalam folder input_root dan subfoldernya
for root, dirs, files in os.walk(input_root):
    # Ambil nama folder pertama setelah input_root (misal: 'sunda', 'jawa')
    relative_path = os.path.relpath(root, input_root)
    label = relative_path.split(os.sep)[0]

    # Filter hanya file .wav
    wav_files = [f for f in files if f.lower().endswith(".wav")]

    # Sort dan batasi jumlah file (maksimal 500)
    wav_files = sorted(wav_files)[:max_files_per_folder]

    for i, file in enumerate(wav_files, 1):
        input_path = os.path.join(root, file)

        # Buat folder output jika belum ada
        output_dir = os.path.join(output_root, label)
        os.makedirs(output_dir, exist_ok=True)

        # Nama file output: 1.csv, 2.csv, dst.
        output_filename = f"{i}.csv"
        output_path = os.path.join(output_dir, output_filename)

        # Load audio
        y, sr = librosa.load(input_path, sr=None)

        # Ekstrak MFCC (13 koefisien)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

        # Simpan MFCC ke CSV
        np.savetxt(output_path, mfcc, delimiter=",")

        print(f"[{label}] {i}: {input_path} -> {output_path}")


[Batak] 1: dataset-sound\Batak\Ind001_F_B_C_news_0000.wav -> matriks-mfcc\Batak\1.csv
[Batak] 2: dataset-sound\Batak\Ind001_F_B_C_news_0001.wav -> matriks-mfcc\Batak\2.csv
[Batak] 3: dataset-sound\Batak\Ind001_F_B_C_news_0002.wav -> matriks-mfcc\Batak\3.csv
[Batak] 4: dataset-sound\Batak\Ind001_F_B_C_news_0003.wav -> matriks-mfcc\Batak\4.csv
[Batak] 5: dataset-sound\Batak\Ind001_F_B_C_news_0004.wav -> matriks-mfcc\Batak\5.csv
[Batak] 6: dataset-sound\Batak\Ind001_F_B_C_news_0005.wav -> matriks-mfcc\Batak\6.csv
[Batak] 7: dataset-sound\Batak\Ind001_F_B_C_news_0006.wav -> matriks-mfcc\Batak\7.csv
[Batak] 8: dataset-sound\Batak\Ind001_F_B_C_news_0007.wav -> matriks-mfcc\Batak\8.csv
[Batak] 9: dataset-sound\Batak\Ind001_F_B_C_news_0008.wav -> matriks-mfcc\Batak\9.csv
[Batak] 10: dataset-sound\Batak\Ind001_F_B_C_news_0009.wav -> matriks-mfcc\Batak\10.csv
[Batak] 11: dataset-sound\Batak\Ind001_F_B_C_news_0010.wav -> matriks-mfcc\Batak\11.csv
[Batak] 12: dataset-sound\Batak\Ind001_F_B_C_news_

In [6]:
# Path folder input dan output
input_root = "matriks-mfcc"
output_root = "visualisasi"

# Persentase pembagian dataset
TRAIN_RATIO = 0.8  # 80% train, 20% test

# Telusuri folder per label (misalnya 'jawa', 'sunda')
for label in os.listdir(input_root):
    label_folder = os.path.join(input_root, label)
    if not os.path.isdir(label_folder):
        continue

    # Ambil semua file CSV dalam label folder
    all_csv_files = [
        os.path.join(label_folder, f)
        for f in os.listdir(label_folder)
        if f.endswith(".csv")
    ]

    # Acak urutan agar pembagian train/test acak
    random.shuffle(all_csv_files)

    # Bagi ke train dan test
    total_files = len(all_csv_files)
    train_count = int(total_files * TRAIN_RATIO)
    train_files = all_csv_files[:train_count]
    test_files = all_csv_files[train_count:]

    # Fungsi bantu untuk simpan gambar ke folder
    def save_images(files, subset):  # subset = 'train' atau 'test'
        for file_path in files:
            mfcc = np.loadtxt(file_path, delimiter=",")

            # Path output folder: visualisasi/train/jawa/ atau visualisasi/test/sunda/
            output_dir = os.path.join(output_root, subset, label)
            os.makedirs(output_dir, exist_ok=True)

            # Nama file output
            filename = os.path.basename(file_path).replace(".csv", ".png")
            output_img_path = os.path.join(output_dir, filename)

            # Simpan gambar MFCC
            plt.figure(figsize=(6, 4))
            plt.imshow(mfcc, aspect='auto', origin='lower', cmap='magma')
            plt.axis('off')
            plt.tight_layout(pad=0)
            plt.savefig(output_img_path, bbox_inches='tight', pad_inches=0)
            plt.clf()
            plt.close()
            gc.collect()


            print(f"Saved to {subset.upper()}: {output_img_path}")

    # Simpan ke folder train dan test
    save_images(train_files, "train")
    save_images(test_files, "test")

Saved to TRAIN: visualisasi\train\Batak\2851.png
Saved to TRAIN: visualisasi\train\Batak\2673.png
Saved to TRAIN: visualisasi\train\Batak\1871.png
Saved to TRAIN: visualisasi\train\Batak\7548.png
Saved to TRAIN: visualisasi\train\Batak\4563.png
Saved to TRAIN: visualisasi\train\Batak\4765.png
Saved to TRAIN: visualisasi\train\Batak\5944.png
Saved to TRAIN: visualisasi\train\Batak\2631.png
Saved to TRAIN: visualisasi\train\Batak\6163.png
Saved to TRAIN: visualisasi\train\Batak\4909.png
Saved to TRAIN: visualisasi\train\Batak\2809.png
Saved to TRAIN: visualisasi\train\Batak\5288.png
Saved to TRAIN: visualisasi\train\Batak\4734.png
Saved to TRAIN: visualisasi\train\Batak\2037.png
Saved to TRAIN: visualisasi\train\Batak\6263.png
Saved to TRAIN: visualisasi\train\Batak\4873.png
Saved to TRAIN: visualisasi\train\Batak\1367.png
Saved to TRAIN: visualisasi\train\Batak\3223.png
Saved to TRAIN: visualisasi\train\Batak\6446.png
Saved to TRAIN: visualisasi\train\Batak\6979.png
Saved to TRAIN: visu

In [23]:
# Define image dimensions
IMG_HEIGHT = 48
IMG_WIDTH = 48
BATCH_SIZE = 64
EPOCHS = 50
NUM_CLASSES = 4

In [4]:
dialect_labels = ['Batak', 'Javanese', 'Sundanese', 'Umum']

In [5]:
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen  = ImageDataGenerator(rescale=1./255)

In [6]:
train_generator = train_datagen.flow_from_directory(
    'visualisasi/train',
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    color_mode='rgb',
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True
)

Found 24640 images belonging to 4 classes.


In [7]:
test_generator = test_datagen.flow_from_directory(
    'visualisasi/test',
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    color_mode='rgb',
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

Found 6160 images belonging to 4 classes.


### Model CNN

In [8]:
model = tf.keras.models.Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(train_generator.num_classes, activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [12]:
history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=test_generator
)

Epoch 1/10
[1m385/385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 188ms/step - accuracy: 0.2447 - loss: 1.3865 - val_accuracy: 0.2500 - val_loss: 1.3863
Epoch 2/10
[1m385/385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 154ms/step - accuracy: 0.2505 - loss: 1.3865 - val_accuracy: 0.2500 - val_loss: 1.3863
Epoch 3/10
[1m385/385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step - accuracy: 0.2564 - loss: 1.3865

KeyboardInterrupt: 

### Model LSTM

In [13]:
lstm_model = tf.keras.models.Sequential([
    Reshape((IMG_HEIGHT, IMG_WIDTH * 3), input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    LSTM(128, return_sequences=True),
    LSTM(64),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(NUM_CLASSES, activation='softmax')
])

lstm_model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history_lstm = lstm_model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=test_generator
)

  super().__init__(**kwargs)


Epoch 1/10
[1m385/385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 163ms/step - accuracy: 0.2506 - loss: 1.3951 - val_accuracy: 0.2500 - val_loss: 1.3863
Epoch 2/10
[1m273/385[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m16s[0m 148ms/step - accuracy: 0.2507 - loss: 1.3864

KeyboardInterrupt: 

In [24]:
cnn_lstm_model = Sequential([
    # Step 1: CNN untuk ekstraksi fitur
    Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    # Step 2: Flatten fitur spasial sebagian
    # Hasil maxpool terakhir: (12, 12, 64)  ->  kita reshape ke (12, 12*64) → sequence panjang 12
    Reshape((12, 12 * 64)),

    # Step 3: LSTM untuk urutan fitur
    LSTM(128, return_sequences=False),
    BatchNormalization(),

    # Step 4: Klasifikasi
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(NUM_CLASSES, activation='softmax')
])

In [25]:
cnn_lstm_model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history_cnn_lstm = cnn_lstm_model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=test_generator
)


Epoch 1/50
[1m385/385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 289ms/step - accuracy: 0.2763 - loss: 1.4779 - val_accuracy: 0.2586 - val_loss: 1.4422
Epoch 2/50
[1m385/385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 222ms/step - accuracy: 0.3113 - loss: 1.3665 - val_accuracy: 0.2791 - val_loss: 1.5389
Epoch 3/50
[1m385/385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 221ms/step - accuracy: 0.2973 - loss: 1.3720 - val_accuracy: 0.2310 - val_loss: 5.5333
Epoch 4/50
[1m385/385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 231ms/step - accuracy: 0.3163 - loss: 1.3619 - val_accuracy: 0.2570 - val_loss: 1.5171
Epoch 5/50
[1m385/385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 236ms/step - accuracy: 0.3154 - loss: 1.3641 - val_accuracy: 0.2706 - val_loss: 1.4116
Epoch 6/50
[1m385/385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 246ms/step - accuracy: 0.3324 - loss: 1.3531 - val_accuracy: 0.2518 - val_loss: 2.1759
Epoch 7/5