In [1]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
print('2')

2


In [40]:


DATASET_PATH = "RAVDESS Emotional speech audio"  # 你的 RAVDESS 数据集路径
LABELS = {
    "neutral": 1, "calm": 2, "happy": 3, "sad": 4, "angry": 5,
    "fearful": 6, "disgust": 7, "surprised": 8
}  # 你可以调整这些标签

def extract_features(file_path, max_pad_length=128):
    y, sr = librosa.load(file_path, sr=22050)  # 读取音频
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, hop_length=512)  # 计算 Mel 频谱
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)  # 转换为 dB 形式

    # 归一化到 [0,1]
    mel_spec_db = (mel_spec_db + 80) / 80

    # 统一 shape：填充或裁剪到 (128, max_pad_length)
    if mel_spec_db.shape[1] < max_pad_length:
        pad_width = max_pad_length - mel_spec_db.shape[1]
        mel_spec_db = np.pad(mel_spec_db, ((0, 0), (0, pad_width)), mode='constant')
    else:
        mel_spec_db = mel_spec_db[:, :max_pad_length]

    return mel_spec_db

X, y = [], []
for actor_folder in os.listdir(DATASET_PATH):
    actor_path = os.path.join(DATASET_PATH, actor_folder)
    if not os.path.isdir(actor_path):
        continue  # 跳过非文件夹

    for file in os.listdir(actor_path):
        if file.endswith(".wav"):
            file_path = os.path.join(actor_path, file)
            
            # 从文件名解析情绪类别（第 3 个字段）
            emotion_code = int(file.split("-")[2])
            
            # 计算 Mel 频谱特征
            feature = extract_features(file_path)
            X.append(feature)
            y.append(emotion_code)

# 转换为 NumPy 数组
X = np.array(X)  # (样本数, 128, max_pad_length)
X = np.expand_dims(X, axis=-1)  # 变为 (样本数, 128, max_pad_length, 1) 适配 CNN
y = np.array(y)

print(f"✅ X shape: {X.shape}")  # 应该是 (样本数, 128, 128, 1)
print(f"✅ y shape: {y.shape}")
print(f"✅ X min: {np.min(X)}, X max: {np.max(X)}")  # 确保数据在 [0,1] 之间


✅ X shape: (1440, 128, 128, 1)
✅ y shape: (1440,)
✅ X min: 0.0, X max: 1.0


In [43]:
from tensorflow.keras.utils import to_categorical

# 在将 y 转换为 one-hot 编码之前，将标签减去 1，确保标签值从 0 开始
y = np.array(y) - 1  # 将标签减去 1，确保标签范围是 [0, 7]

# 将 y 转换为 one-hot 编码
y = to_categorical(y, num_classes=8)  # 8 是情绪类别的数量

# 确保 y 的形状正确
print(f"✅ y shape after one-hot encoding: {y.shape}")


✅ y shape after one-hot encoding: (1440, 8)


In [48]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# 构建 CNN 模型
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(LABELS), activation='softmax')  # 输出层，类别数与 LABELS 的长度一致
])

# 编译模型
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# 训练模型
model.fit(X, y, batch_size=32, epochs=30, validation_split=0.2)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x22d149daa60>