In [1]:
import librosa 
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score


## 定义一个用于提取特征的函数

In [89]:
def extract_feature(file_name, mfcc, chroma, mel):
    X, sample_rate= librosa.load(file_name, sr= None)
    if chroma:
        stft= np.abs(librosa.stft(X))#短时傅里叶变换的绝对值
    result= np.array([])
    if mfcc:
        mfccs= np.mean(librosa.feature.mfcc(y= X, sr= sample_rate, n_mfcc=13).T,axis=0)#最终mfcc的维度是(13,)
        result= np.hstack((result,mfccs))
    if mel:
        mel= np.mean(librosa.feature.melspectrogram(y= X, sr= sample_rate).T, axis=0)#这里提取了梅尔功率谱特征，不知道这个特征有什么特殊的地方
        result= np.hstack([result, mel])
    if chroma:
        chroma= np.mean(librosa.feature.chroma_stft(S=stft, sr= sample_rate).T, axis=0)
        result= np.hstack((result, chroma))
    return result

In [126]:
emotions= {
    '01':'neutral',
    '02':'calm',
    '03':'happy',
    '04':'sad',
    '05':'angry',
    '06':'fearful',
    '07':'disgust',
    '08':'surprised'
}
observed_emotions= ['calm','happy','fearful','disgust']


In [90]:
def load_data():
    x, y=[],[]
    for file in glob.glob('C:\\Users\\Administrator\\Desktop\\RAVDESS\\Actor_*\\*.wav'):
        file_name= os.path.basename(file)
        emotion= emotions[file_name.split('-')[2]]
        if emotion not in observed_emotions:
            continue
        feature= extract_feature(file, mfcc= True, chroma=False, mel= False)
        x.append(feature)
        y.append(emotion)
    return np.array(x),y

In [127]:
data, label=load_data()

In [135]:
from sklearn.preprocessing import StandardScaler
std= StandardScaler()
data_std= std.fit_transform(data)

In [136]:
from sklearn.preprocessing import MinMaxScaler
MinM= MinMaxScaler()
data_MinM= MinM.fit_transform(data)

In [138]:
x_train, x_test, y_train, y_test= train_test_split(data_std, label, random_state=0, test_size=0.25)

In [139]:
model= MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08,hidden_layer_sizes=(300,),learning_rate='adaptive',max_iter=500)
model.fit(x_train,y_train)



MLPClassifier(activation='relu', alpha=0.01, batch_size=256, beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(300,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=500, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [140]:
y_pred= model.predict(x_test)

In [141]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred ))

              precision    recall  f1-score   support

        calm       0.96      0.96      0.96        46
     disgust       0.82      0.73      0.78        45
     fearful       0.71      0.74      0.73        50
       happy       0.67      0.71      0.69        51

   micro avg       0.78      0.78      0.78       192
   macro avg       0.79      0.78      0.79       192
weighted avg       0.78      0.78      0.78       192

