In [1]:
import gc
import os
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

from shared.utils import *
from shared.local_path import *

from collections import Counter
from matplotlib import pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

# Data Preprocessing
## Audio Data

In [2]:
label = []
filepath = []

for classes in os.listdir(AUDIO_PATH):
    for file in os.listdir(os.path.join(AUDIO_PATH, classes)):
        file_path = os.path.join(AUDIO_PATH, classes, file)

        filepath.append(file_path)
        label.append(classes)

audio = pd.DataFrame({
    'filepath': filepath,
    'label': label,
})

del filepath, label

audio.head()

Unnamed: 0,filepath,label
0,E:\\Skripsi\data\audio\angry\03-01-05-01-01-01...,angry
1,E:\\Skripsi\data\audio\angry\03-01-05-01-01-01...,angry
2,E:\\Skripsi\data\audio\angry\03-01-05-01-01-01...,angry
3,E:\\Skripsi\data\audio\angry\03-01-05-01-01-01...,angry
4,E:\\Skripsi\data\audio\angry\03-01-05-01-01-01...,angry


In [3]:
le = LabelEncoder()
le.fit(audio['label'])
audio['label_encoded'] = le.transform(audio['label'])
num_classes = len(le.classes_)

In [4]:
with tf.device('/GPU:0'):
    audio['data'] = audio.filepath.apply(preprocess_audio)
    audio = audio.sample(frac=1).reset_index(drop=True)



In [5]:
x_audio = np.stack(audio['data'].values)
y_audio = audio['label_encoded'].values
print(x_audio.shape)
del audio

(27406, 128, 110, 3)


In [6]:
x_audio_train, x_temp, y_audio_train, y_temp = train_test_split(
    x_audio, y_audio, test_size=0.3, random_state=100,
)

x_audio_val, x_audio_test, y_audio_val, y_audio_test = train_test_split(
    x_temp, y_temp, test_size=0.5, random_state=222,
)

del x_audio, y_audio
print(x_audio_train.shape)
print(x_audio_train.shape[0] == len(y_audio_train))

(19184, 128, 110, 3)
True


## Image Data

In [7]:
filepath = []
label = []

i = 0
for classes in os.listdir(TRAIN_IMAGE_PATH):
    for file in os.listdir(os.path.join(TRAIN_IMAGE_PATH, classes)):
        path = os.path.join(TRAIN_IMAGE_PATH, classes, file)
        filepath.append(path)
        label.append(classes)

train_df = pd.DataFrame({
    'filepath': filepath,
    'label': label
})

del filepath, label

print(train_df.shape)
print(train_df['label'].unique())
train_df.head()

(33228, 2)
['angry' 'disgust' 'fear' 'happy' 'neutral' 'sad' 'surprise']


Unnamed: 0,filepath,label
0,E:\\Skripsi\data\new_data\train\angry\angry_0_...,angry
1,E:\\Skripsi\data\new_data\train\angry\angry_0_...,angry
2,E:\\Skripsi\data\new_data\train\angry\angry_0_...,angry
3,E:\\Skripsi\data\new_data\train\angry\angry_0_...,angry
4,E:\\Skripsi\data\new_data\train\angry\angry_0_...,angry


In [8]:
filepath = []
label = []

i = 0
for classes in os.listdir(TEST_IMAGE_PATH):
    for file in os.listdir(os.path.join(TEST_IMAGE_PATH, classes)):
        path = os.path.join(TEST_IMAGE_PATH, classes, file)
        filepath.append(path)
        label.append(classes)

img_test_df = pd.DataFrame({
    'filepath': filepath,
    'label': label
})

del filepath, label

print(img_test_df.shape)
print(img_test_df['label'].unique())
img_test_df.head()

(7311, 2)
['angry' 'disgust' 'fear' 'happy' 'neutral' 'sad' 'surprise']


Unnamed: 0,filepath,label
0,E:\\Skripsi\data\new_data\test\angry\Anger.jpg,angry
1,E:\\Skripsi\data\new_data\test\angry\Anger_1.jpg,angry
2,E:\\Skripsi\data\new_data\test\angry\Anger_10.jpg,angry
3,E:\\Skripsi\data\new_data\test\angry\Anger_11.jpg,angry
4,E:\\Skripsi\data\new_data\test\angry\Anger_12.jpg,angry


In [9]:
le = LabelEncoder()
le.fit(train_df['label'])
train_df['label_encoded'] = le.transform(train_df['label'])
img_test_df['label_encoded'] = le.transform(img_test_df['label'])

In [10]:
with tf.device('/GPU:0'):
    train_df['data'] = train_df['filepath'].apply(lambda x: preprocess_image(x, (100, 100)))
    img_test_df['data'] = img_test_df['filepath'].apply(lambda x: preprocess_image(x, (100, 100)))

train_df.shape

(33228, 4)

In [11]:
x_img = np.stack(train_df['data'].values)
y_img = train_df['label_encoded'].values

x_img_test = np.stack(img_test_df['data'].values)
y_img_test = img_test_df['label_encoded'].values

del train_df, img_test_df

x_img_train, x_img_val, y_img_train, y_img_val = train_test_split(x_img, y_img, test_size=0.3, random_state=100,
                                                                  shuffle=True, stratify=y_img)
del x_img, y_img

In [12]:
import numpy as np
from collections import Counter

def independent_stratified_downsample(x_img, x_audio, y_img, y_audio):
    """
    Downsamples image and audio datasets independently to match total number of samples
    while keeping all classes in both datasets.

    Returns:
        Tuple of (x_img_ds, x_audio_ds, y_img_ds, y_audio_ds)
    """
    # Hitung distribusi kelas
    class_counts_img = Counter(y_img)
    class_counts_audio = Counter(y_audio)

    # Tentukan jumlah minimum per kelas untuk masing-masing dataset
    min_count_img = min(class_counts_img.values())
    min_count_audio = min(class_counts_audio.values())

    # Lakukan downsampling dengan stratifikasi untuk setiap dataset secara terpisah
    selected_indices_img = []
    for cls in class_counts_img:
        idx = np.where(y_img == cls)[0]
        np.random.shuffle(idx)
        selected_indices_img.extend(idx[:min_count_img])

    selected_indices_audio = []
    for cls in class_counts_audio:
        idx = np.where(y_audio == cls)[0]
        np.random.shuffle(idx)
        selected_indices_audio.extend(idx[:min_count_audio])

    # Sesuaikan jumlah total sampel agar seimbang antara image dan audio
    total_samples = min(len(selected_indices_img), len(selected_indices_audio))
    selected_indices_img = np.array(selected_indices_img)[:total_samples]
    selected_indices_audio = np.array(selected_indices_audio)[:total_samples]

    # Ambil data akhir
    x_img_ds = x_img[selected_indices_img]
    y_img_ds = y_img[selected_indices_img]
    x_audio_ds = x_audio[selected_indices_audio]
    y_audio_ds = y_audio[selected_indices_audio]

    return x_img_ds, x_audio_ds, y_img_ds, y_audio_ds

import numpy as np
from collections import Counter

def global_downsample_preserve_classes(x_img, x_audio, y_img, y_audio):
    """
    Randomly downsample the larger dataset to match the smaller one,
    ensuring that all original classes remain present.

    Returns:
        x_img_ds, x_audio_ds, y_img_ds, y_audio_ds
    """
    # Tentukan dataset yang lebih kecil
    n_img = len(y_img)
    n_audio = len(y_audio)
    target_samples = min(n_img, n_audio)

    def downsample(x, y, target_size):
        # Ulangi hingga mendapatkan subset yang masih mencakup semua kelas
        classes = set(y)
        while True:
            indices = np.random.choice(len(y), size=target_size, replace=False)
            y_subset = y[indices]
            if set(y_subset) == classes:
                return x[indices], y[indices]

    if n_img > n_audio:
        x_img_ds, y_img_ds = downsample(x_img, y_img, target_samples)
        x_audio_ds, y_audio_ds = x_audio, y_audio
    else:
        x_audio_ds, y_audio_ds = downsample(x_audio, y_audio, target_samples)
        x_img_ds, y_img_ds = x_img, y_img

    return x_img_ds, x_audio_ds, y_img_ds, y_audio_ds



In [13]:
x_img_train, x_audio_train, y_img_train, y_audio_train = global_downsample_preserve_classes(
    x_img_train, x_audio_train, y_img_train, y_audio_train
)

min_samples_val = min(len(y_img_val), len(y_audio_val))
x_img_val, x_audio_val, y_img_val, y_audio_val = global_downsample_preserve_classes(
    x_img_val, x_audio_val, y_img_val, y_audio_val
)

min_samples_test = min(len(y_img_test), len(y_audio_test))
x_img_test, x_audio_test, y_img_test, y_audio_test = global_downsample_preserve_classes(
    x_img_test, x_audio_test, y_img_test, y_audio_test
)

print("Downsampled training shape:", x_img_train.shape)
print("Training labels distribution:", Counter(y_img_train))

Downsampled training shape: (19184, 100, 100, 3)
Training labels distribution: Counter({4: 2881, 5: 2827, 3: 2760, 6: 2738, 0: 2702, 2: 2677, 1: 2599})


In [14]:
lb_img = Counter(y_img_test)
sorted_lb_img = dict(sorted(lb_img.items())) 
print(sorted_lb_img)

{0: 556, 1: 69, 2: 613, 3: 983, 4: 705, 5: 701, 6: 484}


In [15]:
lb_audio = Counter(y_audio_test)
sorted_lb_audio = dict(sorted(lb_audio.items()))
print(sorted_lb_audio)

{0: 619, 1: 89, 2: 712, 3: 620, 4: 638, 5: 540, 6: 653, 7: 240}


In [16]:
len(y_img_test) == len(y_audio_test)

True

In [17]:
len(y_img_train) == len(y_audio_train)

True

In [18]:
lb_img = Counter(y_img_train)
sorted_lb_img = dict(sorted(lb_img.items())) 
print(sorted_lb_img)

lb_audio = Counter(y_audio_train)
sorted_lb_audio = dict(sorted(lb_audio.items()))
print(sorted_lb_audio)

{0: 2702, 1: 2599, 2: 2677, 3: 2760, 4: 2881, 5: 2827, 6: 2738}
{0: 3041, 1: 402, 2: 3092, 3: 2996, 4: 2946, 5: 2619, 6: 3023, 7: 1065}


In [19]:
lb_img = Counter(y_img_val)
sorted_lb_img = dict(sorted(lb_img.items())) 
print(sorted_lb_img)

lb_audio = Counter(y_audio_val)
sorted_lb_audio = dict(sorted(lb_audio.items()))
print(sorted_lb_audio)

{0: 586, 1: 573, 2: 579, 3: 597, 4: 623, 5: 577, 6: 576}
{0: 668, 1: 84, 2: 687, 3: 647, 4: 594, 5: 539, 6: 656, 7: 236}
