In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
"/content/drive/My Drive/Datasets/"


'/content/drive/My Drive/Datasets/'

In [None]:
speech_folder = "/content/drive/My Drive/Datasets/Audio_Speech_Actors_01-24"
song_folder   = "/content/drive/My Drive/Datasets/Audio_Song_Actors_01-24"


In [None]:
import os
import librosa
import soundfile as sf
import shutil

speech_folder = "/content/drive/My Drive/Datasets/Audio_Speech_Actors_01-24"
song_folder   = "/content/drive/My Drive/Datasets/Audio_Song_Actors_01-24"

extract_dirs = [speech_folder, song_folder]
reduced_dir = "/content/ravdess_reduced"
os.makedirs(reduced_dir, exist_ok=True)

keep_actors = ['01', '02', '03', '04']
keep_emotions = ['01', '03', '04', '05']  # neutral, happy, sad, angry
target_sr = 16000

for base in extract_dirs:
    for root, dirs, files in os.walk(base):
        for file in files:
            if not file.endswith(".wav"):
                continue

            parts = file.split("-")
            emotion = parts[2]
            actor_number = parts[-1].split(".")[0]  # last part before .wav, e.g. 01

            if actor_number not in keep_actors:
                continue

            if emotion not in keep_emotions:
                continue

            filepath = os.path.join(root, file)

            # load + downsample
            audio, sr = librosa.load(filepath, sr=target_sr)

            save_dir = os.path.join(reduced_dir, f"Actor_{actor_number}")
            os.makedirs(save_dir, exist_ok=True)

            sf.write(os.path.join(save_dir, file), audio, target_sr)

# Create zip
output_zip = "/content/drive/My Drive/Datasets/ravdess_reduced"
shutil.make_archive(output_zip, 'zip', reduced_dir)

output_zip + ".zip"


'/content/drive/My Drive/Datasets/ravdess_reduced.zip'

In [None]:
os.listdir("/content/drive/My Drive/Datasets")


['fer2013', 'Audio_Song_Actors_01-24', 'Audio_Speech_Actors_01-24']

In [None]:
from google.colab import drive
drive.mount('/content/drive')

RAW_AUDIO = "/content/drive/My Drive/Datasets/Audio_Speech_Actors_01-24"
CLEAN_AUDIO = "/content/audio_cleaned"


Mounted at /content/drive


In [None]:
import os
os.makedirs(CLEAN_AUDIO, exist_ok=True)


In [None]:
import librosa
import soundfile as sf
import os

def preprocess_audio(inpath, outpath):
    y, sr = librosa.load(inpath, sr=None, mono=True)
    y, _ = librosa.effects.trim(y)
    y = librosa.resample(y, orig_sr=sr, target_sr=16000)
    sf.write(outpath, y, 16000)


In [7]:
for root, _, files in os.walk(RAW_AUDIO):
    for f in files:
        if f.endswith(".wav"):
            inp = os.path.join(root, f)
            out = os.path.join(CLEAN_AUDIO, f)
            preprocess_audio(inp, out)
