# 📢 File explanation!!
Di dalam file ini, saya akan menggunakan perpustakaan `datasets` dari Huggingface untuk memuat dataset RAVDESS. Setelah itu, saya akan mengonversi data tersebut menjadi DataFrame dan menyimpannya dalam format .CSV. Langkah ini dilakukan agar mempermudah analisis data eksploratori (Exploratory Data Analysis/EDA) pada tahapan selanjutnya.

# 🎯 **Step 0: Import library**
---

In [30]:
# from datasets import load_dataset

import os
import pandas as pd

# 🎯 **Step 1: Load dataset**
---

In [31]:
RAVDESS = "./Dataset/Audio/Speech_RAVDESS"
SAVEE = "./Dataset/Audio/Speech_SAVEE"
TESS = "./Dataset/Audio/Speech_TESS"
URDU = "./Dataset/Audio/Speech_URDU"

## ✨ 1.1 RAVDESS Dataset

https://zenodo.org/records/1188976#.XsAXemgzaUk

In [32]:
# Inisialisasi list untuk menyimpan path dan emotion dari setiap file audio
data = {'Path': [], 'Emotion': []}

# Iterasi melalui setiap folder actor
for actor_folder in os.listdir(RAVDESS):
    actor_path = os.path.join(RAVDESS, actor_folder)
    
    # Periksa apakah itu adalah folder
    if os.path.isdir(actor_path):
        
        # Iterasi melalui setiap file audio dalam folder actor
        for audio_file in os.listdir(actor_path):
            # Periksa apakah itu adalah file audio dengan format yang benar
            if audio_file.endswith('.wav'):
                # Ambil emotion dari nama file
                emotion = int(audio_file.split('-')[2])
                
                # Mapping kode emosi ke nama emosi
                emotion_mapping = {
                    1: 'neutral',
                    2: 'calm',
                    3: 'happy',
                    4: 'sad',
                    5: 'angry',
                    6: 'fearful',
                    7: 'disgust',
                    8: 'surprised'
                }
                
                # Tambahkan path dan emosi ke dalam list
                data['Path'].append(os.path.join(actor_path, audio_file))
                data['Emotion'].append(emotion_mapping[emotion])

# Buat DataFrame dari data
df_ravdess = pd.DataFrame(data)

df_ravdess

Unnamed: 0,Path,Emotion
0,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,neutral
1,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,neutral
2,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,neutral
3,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,neutral
4,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,calm
...,...,...
1435,./Dataset/Audio/Speech_RAVDESS\Actor_24\03-01-...,surprised
1436,./Dataset/Audio/Speech_RAVDESS\Actor_24\03-01-...,surprised
1437,./Dataset/Audio/Speech_RAVDESS\Actor_24\03-01-...,surprised
1438,./Dataset/Audio/Speech_RAVDESS\Actor_24\03-01-...,surprised


In [33]:
df_ravdess.shape

(1440, 2)

## ✨ 1.2 SAVEE Dataset

https://www.kaggle.com/datasets/ejlok1/surrey-audiovisual-expressed-emotion-savee/data

In [34]:
# Inisialisasi list untuk menyimpan path dan emotion dari setiap file audio
data = {'Path': [], 'Emotion': []}

# Iterasi melalui setiap folder actor
for all_folder in os.listdir(SAVEE):
    all_path = os.path.join(SAVEE, all_folder)

    
    # Periksa apakah itu adalah folder
    if os.path.isdir(all_path):
        
        # Iterasi melalui setiap file audio dalam folder actor
        for audio_file in os.listdir(all_path):
            # Periksa apakah itu adalah file audio dengan format yang benar
            if audio_file.endswith('.wav'):
                # Ambil emotion dari nama file
                emotion = audio_file.split('_')[1][:-6]
                
                # Mapping kode emosi ke nama emosi
                emotion_mapping = {
                    'a': 'angry',
                    'd': 'disgust',
                    'f': 'fearful',
                    'h': 'happy',
                    'n': 'neutral',
                    'sa': 'sad',
                    'su': 'surprised'
                }

                
                # Tambahkan path dan emosi ke dalam list
                data['Path'].append(os.path.join(all_path, audio_file))
                data['Emotion'].append(emotion_mapping[emotion])

# Buat DataFrame dari data
df_savee = pd.DataFrame(data)

df_savee

Unnamed: 0,Path,Emotion
0,./Dataset/Audio/Speech_SAVEE\ALL\DC_a01.wav,angry
1,./Dataset/Audio/Speech_SAVEE\ALL\DC_a02.wav,angry
2,./Dataset/Audio/Speech_SAVEE\ALL\DC_a03.wav,angry
3,./Dataset/Audio/Speech_SAVEE\ALL\DC_a04.wav,angry
4,./Dataset/Audio/Speech_SAVEE\ALL\DC_a05.wav,angry
...,...,...
475,./Dataset/Audio/Speech_SAVEE\ALL\KL_su11.wav,surprised
476,./Dataset/Audio/Speech_SAVEE\ALL\KL_su12.wav,surprised
477,./Dataset/Audio/Speech_SAVEE\ALL\KL_su13.wav,surprised
478,./Dataset/Audio/Speech_SAVEE\ALL\KL_su14.wav,surprised


In [35]:
df_savee.shape

(480, 2)

## ✨ 1.3 TESS Dataset

https://tspace.library.utoronto.ca/handle/1807/24487

In [36]:
# Inisialisasi list untuk menyimpan path dan emotion dari setiap file audio
data = {'Path': [], 'Emotion': []}
        
# Iterasi melalui setiap file audio dalam folder actor
for audio_file in os.listdir(TESS):
    # Periksa apakah itu adalah file audio dengan format yang benar
    if audio_file.endswith('.wav'):
        # Ambil emotion dari nama file
        emotion = audio_file.split('_')[2].split('.')[0]
        
        # Mapping kode emosi ke nama emosi
        emotion_mapping = {
            'angry': 'angry',
            'disgust': 'disgust',
            'fear': 'fearful',
            'happy': 'happy',
            'neutral': 'neutral',
            'sad': 'sad',
            'ps': 'surprised'
        }

        
        # Tambahkan path dan emosi ke dalam list
        data['Path'].append(os.path.join(TESS, audio_file))
        data['Emotion'].append(emotion_mapping[emotion])

# Buat DataFrame dari data
df_tess = pd.DataFrame(data)

df_tess

Unnamed: 0,Path,Emotion
0,./Dataset/Audio/Speech_TESS\OAF_back_angry.wav,angry
1,./Dataset/Audio/Speech_TESS\OAF_back_disgust.wav,disgust
2,./Dataset/Audio/Speech_TESS\OAF_back_fear.wav,fearful
3,./Dataset/Audio/Speech_TESS\OAF_back_happy.wav,happy
4,./Dataset/Audio/Speech_TESS\OAF_back_neutral.wav,neutral
...,...,...
2795,./Dataset/Audio/Speech_TESS\YAF_youth_fear.wav,fearful
2796,./Dataset/Audio/Speech_TESS\YAF_youth_happy.wav,happy
2797,./Dataset/Audio/Speech_TESS\YAF_youth_neutral.wav,neutral
2798,./Dataset/Audio/Speech_TESS\YAF_youth_ps.wav,surprised


## ✨ 1.4 URDU Dataset

🔎 Dataset ini saya tambahkan ketika setelah inference modelnya terdapat banyak klasifikasi yang kurang tepat

In [37]:
# Inisialisasi list untuk menyimpan path dan emotion dari setiap file audio
data = {'Path': [], 'Emotion': []}

# Iterasi melalui setiap folder actor
for emotion_folder in os.listdir(URDU):
    emotion_path = os.path.join(URDU, emotion_folder)

    # Periksa apakah itu adalah folder
    if os.path.isdir(emotion_path):
        
        # Iterasi melalui setiap file audio dalam folder actor
        for audio_file in os.listdir(emotion_path):
            # print(audio_file)
            # Periksa apakah itu adalah file audio dengan format yang benar
            if audio_file.endswith('.wav'):
                # Ambil emotion dari nama file
                # emotion = int(audio_file.split('-')[2])
                
                # # Mapping kode emosi ke nama emosi
                # emotion_mapping = {
                #     1: 'neutral',
                #     2: 'calm',
                #     3: 'happy',
                #     4: 'sad',
                #     5: 'angry',
                #     6: 'fearful',
                #     7: 'disgust',
                #     8: 'surprised'
                # }
                
                # Tambahkan path dan emosi ke dalam list
                data['Path'].append(os.path.join(emotion_path, audio_file))
                data['Emotion'].append(emotion_mapping[emotion_folder.lower()])

# Buat DataFrame dari data
df_urdu = pd.DataFrame(data)

df_urdu

Unnamed: 0,Path,Emotion
0,./Dataset/Audio/Speech_URDU\Angry\SM1_F10_A010...,angry
1,./Dataset/Audio/Speech_URDU\Angry\SM1_F11_A011...,angry
2,./Dataset/Audio/Speech_URDU\Angry\SM1_F12_A012...,angry
3,./Dataset/Audio/Speech_URDU\Angry\SM1_F13_A013...,angry
4,./Dataset/Audio/Speech_URDU\Angry\SM1_F14_A014...,angry
...,...,...
395,./Dataset/Audio/Speech_URDU\Sad\SM27_F4_S096.wav,sad
396,./Dataset/Audio/Speech_URDU\Sad\SM6_F1_S097.wav,sad
397,./Dataset/Audio/Speech_URDU\Sad\SM6_F2_S098.wav,sad
398,./Dataset/Audio/Speech_URDU\Sad\SM6_F3_S099.wav,sad


# 🎯 **Step 3: Combined the datasets**
---

In [38]:
df_combined = pd.concat([df_ravdess, df_tess, df_savee, df_urdu], ignore_index=True)

df_combined

Unnamed: 0,Path,Emotion
0,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,neutral
1,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,neutral
2,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,neutral
3,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,neutral
4,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,calm
...,...,...
5115,./Dataset/Audio/Speech_URDU\Sad\SM27_F4_S096.wav,sad
5116,./Dataset/Audio/Speech_URDU\Sad\SM6_F1_S097.wav,sad
5117,./Dataset/Audio/Speech_URDU\Sad\SM6_F2_S098.wav,sad
5118,./Dataset/Audio/Speech_URDU\Sad\SM6_F3_S099.wav,sad


# 🎯 **Step 4: Save the data in CSV**
---

In [39]:
df_combined.to_csv('./Dataset/df_combined.csv', index=False)

## ✨ 4.1 RAVDESS Dataset

In [40]:
df_ravdess.to_csv('./Dataset/df_ravdess.csv', index=False)

## ✨ 4.2 SAVEE Dataset

In [41]:
df_savee.to_csv('./Dataset/df_savee.csv', index=False)

## ✨ 4.3 TESS Dataset

In [42]:
df_tess.to_csv('./Dataset/df_tess.csv', index=False)

## ✨ 4.4 TESS Dataset

In [43]:
df_urdu.to_csv('./Dataset/df_urdu.csv', index=False)