# 📢 File explanation!!
Di dalam file ini, saya akan balancing data dengan metode oversampling. Oversampling yang saya gunakan ialah dengan data augmentasi menggunakan library NLPAug

# 🎯 **Step 0: Import library**
---

In [272]:
import pandas as pd
import librosa
import librosa.display as librosa_display
import matplotlib.pyplot as plt
import numpy as np
import nlpaug.augmenter.audio as naa
import os
import soundfile as sf
import random
import shutil



# 🎯 **Step 1: Load Dataset Combined**
---

In [273]:
df_combined = pd.read_csv('./Dataset/df_combined.csv')

df_combined.head()

Unnamed: 0,Path,Emotion
0,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,neutral
1,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,neutral
2,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,neutral
3,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,neutral
4,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,calm


# 🎯 **Step 2: Data augmentation**
---

In [274]:
import librosa
import numpy as np
import soundfile as sf

def load_audio(file_path, sr=22050):
    audio, sr = librosa.load(file_path)
    return audio, sr

def save_audio(audio, output_path, sr):
    sf.write(output_path, audio, sr)

def add_noise(audio, noise_level=0.00005):
    noise = np.random.normal(0, noise_level, len(audio))
    augmented_audio = audio + noise
    return augmented_audio

def time_shift(audio, shift_max_ms=100):
    shift = np.random.randint(int(shift_max_ms * 0.001 * len(audio)))
    augmented_audio = np.roll(audio, shift)
    return augmented_audio

def speed_change(audio):
    speed_factor_range=(0.8, 1.2)
    speed_factor = np.random.uniform(*speed_factor_range)
    augmented_audio = librosa.effects.time_stretch(y=audio, rate=speed_factor)
    return augmented_audio

def pitch_change(audio, pitch_factor_range=(-2, 2)):
    pitch_factor = np.random.uniform(*pitch_factor_range)
    augmented_audio = librosa.effects.pitch_shift(audio, sr=22050, n_steps=pitch_factor)
    return augmented_audio

def volume_change(audio, volume_factor_range=(0.5, 1.5)):
    volume_factor = np.random.uniform(*volume_factor_range)
    augmented_audio = audio * volume_factor
    return augmented_audio

def mixup(audio1, audio2, alpha=0.5):
    mixed_audio = alpha * audio1 + (1 - alpha) * audio2
    return mixed_audio

# Example usage:

# input_file = './Dataset/Audio/Speech_RAVDESS/Actor_01/03-01-01-01-01-01-01.wav'
# output_file = './'
# file_path = input_file
# original_audio, sr = load_audio(file_path)

# # Time domain augmentations
# augmented_audio_noise = add_noise(original_audio)
# augmented_audio_shifted = time_shift(original_audio)
# augmented_audio_speed = speed_change(original_audio)
# augmented_audio_pitch = pitch_change(original_audio)
# augmented_audio_volume = volume_change(original_audio)

# # # Spectrogram domain augmentations
# # file_path2 = input_file
# # audio2 = load_audio(file_path2)

# # spec1 = np.abs(librosa.stft(original_audio))
# # spec2 = np.abs(librosa.stft(audio2))

# # augmented_spec_mixup = mixup(spec1, spec2)

# # SpecAugment can be applied as well, but it requires a bit more complex implementation

# # Save augmented audio examples (you can save them to different files)
# save_audio(augmented_audio_noise, './augmented_audio_noise.wav', sr)
# save_audio(augmented_audio_shifted, './augmented_audio_shifted.wav', sr)
# save_audio(augmented_audio_speed, './augmented_audio_speed.wav', sr)
# save_audio(augmented_audio_pitch, './augmented_audio_pitch.wav', sr)
# save_audio(augmented_audio_volume, './augmented_audio_volume.wav', sr)

# aug_types = {
#     'noise': add_noise,
#     'shifted': time_shift,
#     'speed': speed_change,
#     'pitch': pitch_change,
#     'volume': volume_change,
# }

# data, sr = librosa.load(input_file)

# print(data)

# aug_type = np.random.choice(list(aug_types.keys()))

# augmented_audio = aug_types[aug_type](data)

# print(augmented_audio)


In [275]:

def augment_data(input_data, target_emotion, target_count, mayority_count):
    # Filter data berdasarkan target emotion
    target_indices = [i for i, emo in enumerate(input_data['Emotion']) if emo == target_emotion]
    target_data = np.array(input_data['Path'])[target_indices]

    count = mayority_count // target_count

    target = mayority_count - target_count

    # print(count)
    
    # Buat folder untuk menyimpan hasil augmentasi
    output_folder = './Dataset/Audio/Speech_AUG'
    os.makedirs(output_folder, exist_ok=True)

    output_subfolder = os.path.join(output_folder, target_emotion)
    os.makedirs(output_subfolder, exist_ok=True)

    for i in range(count):
        # print(i)
        # print(target)

        if target > target_count:
            target = target_count

        for j in range(target):

            print(j)

            aug_types = {
                # 'noise': add_noise,
                'shifted': time_shift,
                'speed': speed_change,
                'pitch': pitch_change,
                'volume': volume_change,
            }

            data, sr = librosa.load(target_data[i])

            aug_type = np.random.choice(list(aug_types.keys()))

            augmented_audio = aug_types[aug_type](data)

            save_audio(augmented_audio, f'{output_subfolder}/augmented_audio__{aug_type}_{j+1}_{i+1}.wav', sr)


In [276]:
mayority_count = 652

target_calm_count = 192
target_neutral_count = 616

# Membuat augmentasi untuk emotion neutral

augment_data(df_combined, 'neutral', target_neutral_count, mayority_count)
augment_data(df_combined, 'calm', target_calm_count, mayority_count)

0
1
2
3
4
5
6
7
8
9
10
11
12


13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
9

In [277]:
AUG = "./Dataset/Audio/Speech_AUG"

In [278]:
# Inisialisasi list untuk menyimpan path dan emotion dari setiap file audio
data = {'Path': [], 'Emotion': []}

# Iterasi melalui setiap folder actor
for actor_folder in os.listdir(AUG):
    actor_path = os.path.join(AUG, actor_folder)
    # print(os.path.basename(actor_path))
    
    # Periksa apakah itu adalah folder
    if os.path.isdir(actor_path):
        
        # Iterasi melalui setiap file audio dalam folder actor
        for audio_file in os.listdir(actor_path):
            # Periksa apakah itu adalah file audio dengan format yang benar
            if audio_file.endswith('.wav'):
                emotion = os.path.basename(actor_path)

                # print(audio_file)
                # Ambil emotion dari nama file
                # emotion = int(audio_file.split('-')[2])
                
                # Mapping kode emosi ke nama emosi
                # emotion_mapping = {
                #     1: 'neutral',
                #     2: 'calm',
                #     3: 'happy',
                #     4: 'sad',
                #     5: 'angry',
                #     6: 'fearful',
                #     7: 'disgust',
                #     8: 'surprised'
                # }
                
                # Tambahkan path dan emosi ke dalam list
                data['Path'].append(os.path.join(actor_path, audio_file))
                data['Emotion'].append(emotion)

# Buat DataFrame dari data
df_aug = pd.DataFrame(data)

df_aug.head()

Unnamed: 0,Path,Emotion
0,./Dataset/Audio/Speech_AUG\calm\augmented_audi...,calm
1,./Dataset/Audio/Speech_AUG\calm\augmented_audi...,calm
2,./Dataset/Audio/Speech_AUG\calm\augmented_audi...,calm
3,./Dataset/Audio/Speech_AUG\calm\augmented_audi...,calm
4,./Dataset/Audio/Speech_AUG\calm\augmented_audi...,calm


In [279]:
df_calm = df_aug[df_aug['Emotion'] == 'calm']
df_neutral = df_aug[df_aug['Emotion'] == 'neutral']

In [280]:
indeks_hapus = np.random.choice(df_calm.index, 116, replace=False)

# Menghapus baris dengan indeks yang dipilih secara acak
df_calm = df_calm.drop(indeks_hapus)

In [281]:
df_aug = pd.concat([df_calm, df_neutral], ignore_index=True)

df_aug.head()

Unnamed: 0,Path,Emotion
0,./Dataset/Audio/Speech_AUG\calm\augmented_audi...,calm
1,./Dataset/Audio/Speech_AUG\calm\augmented_audi...,calm
2,./Dataset/Audio/Speech_AUG\calm\augmented_audi...,calm
3,./Dataset/Audio/Speech_AUG\calm\augmented_audi...,calm
4,./Dataset/Audio/Speech_AUG\calm\augmented_audi...,calm


In [282]:
df_aug.to_csv('./Dataset/df_aug.csv', index=False)

In [283]:
df_combined_after_aug = pd.concat([df_combined, df_aug], ignore_index=True)

df_combined_after_aug.head()

Unnamed: 0,Path,Emotion
0,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,neutral
1,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,neutral
2,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,neutral
3,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,neutral
4,./Dataset/Audio/Speech_RAVDESS\Actor_01\03-01-...,calm


In [284]:
df_combined_after_aug.to_csv('./Dataset/df_combined_after_aug.csv', index=False)

In [285]:
emotions_count_combined = df_combined_after_aug['Emotion'].value_counts(ascending=False)

emotions_count_combined

Emotion
neutral      652
calm         652
happy        652
sad          652
angry        652
fearful      652
disgust      652
surprised    652
Name: count, dtype: int64