In [None]:
import librosa
import os
import pandas as pd
import numpy as np

# Define the base directory
base_dir = 'C:\Users\张佳\source\repos\emotion-speech-recognition\Emotion Speech Dataset'

# Initialize a list to store data
data = []

# Iterate over each speaker's folder
for speaker_folder in os.listdir(base_dir):
    speaker_path = os.path.join(base_dir, speaker_folder)

    # Read the corresponding text file for the speaker
    mapping_file = os.path.join(speaker_path, f"{speaker_folder}.txt")
    mapping = {}
    with open(mapping_file, 'r') as file:
        for line in file:
            parts = line.strip().split('\t')
            if len(parts) == 3:
                # Append ".wav" to the filename from the text file for matching
                mapping[parts[0].strip() + ".wav"] = parts[2].strip()
            else:
                print(f"Skipping malformed line: {line}")

    # Process each audio file in the emotion folders
    for emotion_folder in os.listdir(speaker_path):
        emotion_path = os.path.join(speaker_path, emotion_folder)
        if os.path.isdir(emotion_path):
            for audio_file in os.listdir(emotion_path):
                file_path = os.path.join(emotion_path, audio_file)

                # Check if the audio file (with extension) is in the mapping
                if audio_file in mapping:
                    try:
                        # Load audio file
                        y, sr = librosa.load(file_path, sr=None)

                        # Extract features
                        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=512, n_fft=2048)
                        rmse = librosa.feature.rms(y=y)
                        delta_mfccs = librosa.feature.delta(mfccs)
                        features = np.vstack([mfccs, rmse, delta_mfccs])
                        features_processed = np.mean(features.T, axis=0)

                        # Append the features and the correct emotion label to the data list
                        data.append([features_processed, mapping[audio_file]])
                    except Exception as e:
                        print(f"Error processing file {audio_file}: {e}")

# Convert to a Pandas DataFrame
df = pd.DataFrame(data, columns=['Features', 'Emotion'])

# Now df is your dataset ready for further processing and model training