In [5]:
import librosa 
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn. ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

## 定义一个用于提取特征的函数

In [2]:
def extract_feature(file_name, mfcc, chroma, mel):
    X, sample_rate= librosa.load(file_name, sr= None)
    if chroma:
        stft= np.abs(librosa.stft(X))#短时傅里叶变换的绝对值
    result= np.array([])
    if mfcc:
        mfccs= np.mean(librosa.feature.mfcc(y= X, sr= sample_rate, n_mfcc=13).T,axis=0)#最终mfcc的维度是(13,)
        result= np.hstack((result,mfccs))
    if mel:
        mel= np.mean(librosa.feature.melspectrogram(y= X, sr= sample_rate).T, axis=0)#这里提取了梅尔功率谱特征，不知道这个特征有什么特殊的地方
        result= np.hstack([result, mel])
    if chroma:
        chroma= np.mean(librosa.feature.chroma_stft(S=stft, sr= sample_rate).T, axis=0)
        result= np.hstack((result, chroma))
    return result

In [4]:
emotions= {
    '01':'neutral',
    '02':'calm',
    '03':'happiness',
    '04':'sadness',
    '05':'anger',
    '06':'fear',
    '07':'disgust',
    '08':'surprised'
}
observed_emotions= ['anger','disgust','fear','happiness','sadness','neutral']


In [4]:
def load_RAVDESS(Normalization=False, Standardization=False):
    x, y=[],[]
    for file in glob.glob('C:\\Users\\Administrator\\Desktop\\RAVDESS\\Actor_*\\*.wav'):
        file_name= os.path.basename(file)
        emotion= emotions[file_name.split('-')[2]]
        if emotion not in observed_emotions:
            continue
        feature= extract_feature(file, mfcc= True, chroma=False, mel= False)
        x.append(feature)
        y.append(emotion)
    x= np.array(x)
    if Normalization:
        return MinMaxScaler().fit_transform(x),y
    elif Standardization:
        return StandardScaler().fit_transform(x),y
    else:
        return x,y

In [5]:
data, label=load_data(Normalization=False, Standardization=True)

In [7]:
x_train, x_test, y_train, y_test= train_test_split(data, label, random_state=0, test_size=0.25)

## 多层感知机分类

In [14]:
model= MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08,hidden_layer_sizes=(300,),learning_rate='adaptive',max_iter=500,random_state=9)
model.fit(x_train,y_train)
y_pred= model.predict(x_test)
print(classification_report(y_test,y_pred ))

              precision    recall  f1-score   support

        calm       0.90      0.93      0.91        46
     disgust       0.84      0.69      0.76        45
     fearful       0.70      0.76      0.73        50
       happy       0.68      0.71      0.69        51

    accuracy                           0.77       192
   macro avg       0.78      0.77      0.77       192
weighted avg       0.77      0.77      0.77       192





## 支持向量机分类

In [22]:
model= SVC(kernel='rbf',C=10, gamma=0.1, random_state=9)
print(cross_val_score(model, data, label, cv=100).mean())

0.765


In [23]:
model.fit(x_train,y_train)
y_pred= model.predict(x_test)
print(classification_report(y_test,y_pred ))

              precision    recall  f1-score   support

        calm       0.88      0.93      0.91        46
     disgust       0.89      0.87      0.88        45
     fearful       0.78      0.76      0.77        50
       happy       0.72      0.71      0.71        51

    accuracy                           0.81       192
   macro avg       0.81      0.82      0.82       192
weighted avg       0.81      0.81      0.81       192



## 随机森林

In [33]:
model= RandomForestClassifier(n_estimators=100, random_state=9)
print(cross_val_score(model, data, label, cv=100,scoring='accuracy').mean())

0.71375


In [32]:
model.fit(x_train,y_train)
y_pred= model.predict(x_test)
print(classification_report(y_test,y_pred ))

              precision    recall  f1-score   support

        calm       0.76      0.98      0.86        46
     disgust       0.72      0.58      0.64        45
     fearful       0.57      0.54      0.56        50
       happy       0.68      0.67      0.67        51

    accuracy                           0.69       192
   macro avg       0.68      0.69      0.68       192
weighted avg       0.68      0.69      0.68       192



In [34]:
temp=glob.glob('C:\\Users\\Administrator\\Desktop\\EMO-DB\\wav\\*.wav')

In [36]:
trail= temp[0]

In [39]:
temp[0]

'C:\\Users\\Administrator\\Desktop\\EMO-DB\\wav\\03a01Fa.wav'

In [43]:
os.path.basename(trail).split('.')[0][-2]

'F'

# 用EMO-DB数据库

In [2]:
emotions= {
    'W':'anger',
    'L':'boredom',
    'E':'disgust',
    'A':'fear',
    'F':'happiness',
    'T':'sadness',
    'N':'neutral'
}
def extract_feature(file_name, mfcc, chroma, mel):
    X, sample_rate= librosa.load(file_name, sr= None)
    if chroma:
        stft= np.abs(librosa.stft(X))#短时傅里叶变换的绝对值
    result= np.array([])
    if mfcc:
        mfccs= np.mean(librosa.feature.mfcc(y= X, sr= sample_rate, n_mfcc=13).T,axis=0)#最终mfcc的维度是(13,)
        result= np.hstack((result,mfccs))
    if mel:
        mel= np.mean(librosa.feature.melspectrogram(y= X, sr= sample_rate).T, axis=0)#这里提取了梅尔功率谱特征，不知道这个特征有什么特殊的地方
        result= np.hstack([result, mel])
    if chroma:
        chroma= np.mean(librosa.feature.chroma_stft(S=stft, sr= sample_rate).T, axis=0)
        result= np.hstack((result, chroma))
    return result
#导入数据，返回提取出的特征数组以及标签
def load_emoDB(Normalization=False, Standardization=False):
    x, y=[],[]
    for file in glob.glob('C:\\Users\\Administrator\\Desktop\\EMO-DB\\wav\\*.wav'):
        file_name= os.path.basename(file)
        emotion= emotions[file_name.split('.')[0][-2]]
        feature= extract_feature(file, mfcc= True, chroma=False, mel= False)
        x.append(feature)
        y.append(emotion)
    x= np.array(x)
    if Normalization:
        return MinMaxScaler().fit_transform(x),y
    elif Standardization:
        return StandardScaler().fit_transform(x),y
    else:
        return x,y

In [3]:
data,label= load_emoDB(Standardization=True)

In [4]:
print('数据集维度：',data.shape)

数据集维度： (535, 13)


In [5]:
x_train, x_test, y_train, y_test= train_test_split(data, label, random_state=0, test_size=0.25)

## MLP分类

In [51]:
model= MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08,hidden_layer_sizes=(300,),learning_rate='adaptive',max_iter=500,random_state=9)
model.fit(x_train,y_train)
y_pred= model.predict(x_test)
print(classification_report(y_test,y_pred ))

              precision    recall  f1-score   support

       anger       0.81      0.90      0.85        29
     boredom       0.74      0.64      0.68        22
     disgust       0.67      0.60      0.63        10
        fear       0.87      0.72      0.79        18
   happiness       0.65      0.61      0.63        18
     neutral       0.75      0.83      0.79        18
     sadness       0.77      0.89      0.83        19

    accuracy                           0.76       134
   macro avg       0.75      0.74      0.74       134
weighted avg       0.76      0.76      0.76       134





## 支持向量机分类

In [87]:
model= SVC(kernel='rbf',random_state=9,C=1, gamma=0.1)
print(cross_val_score(model, data, label, cv=10,scoring='balanced_accuracy').mean())

0.6296284667713239


In [88]:
model.fit(x_train,y_train)
y_pred= model.predict(x_test)
print(classification_report(y_test,y_pred ))

              precision    recall  f1-score   support

       anger       0.74      1.00      0.85        29
     boredom       0.89      0.36      0.52        22
     disgust       0.75      0.60      0.67        10
        fear       0.71      0.67      0.69        18
   happiness       0.82      0.50      0.62        18
     neutral       0.58      1.00      0.73        18
     sadness       0.89      0.89      0.89        19

    accuracy                           0.74       134
   macro avg       0.77      0.72      0.71       134
weighted avg       0.77      0.74      0.72       134



In [86]:
grid = GridSearchCV(SVC(random_state=9,kernel='rbf'), param_grid={"C":[0.1, 1, 10], "gamma": [1, 0.1, 0.01]}, cv=10)
grid.fit(data, label)
print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_, grid.best_score_))

The best parameters are {'C': 1, 'gamma': 0.1} with a score of 0.64




## 随机森林分类

In [49]:
grid = GridSearchCV(RandomForestClassifier(random_state=9), param_grid={'n_estimators':[50,100,150,200,300]}, cv=10)
grid.fit(data, label)
print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_, grid.best_score_))



The best parameters are {'n_estimators': 200} with a score of 0.63


In [51]:
model= RandomForestClassifier(n_estimators=200,random_state=9)
print(cross_val_score(model, data, label, cv=10,scoring='balanced_accuracy').mean())

0.6015214547357404


In [52]:
model.fit(x_train,y_train)
y_pred= model.predict(x_test)
print(classification_report(y_test,y_pred ))

              precision    recall  f1-score   support

       anger       0.67      1.00      0.81        29
     boredom       0.62      0.36      0.46        22
     disgust       0.71      0.50      0.59        10
        fear       0.70      0.39      0.50        18
   happiness       0.85      0.61      0.71        18
     neutral       0.55      0.89      0.68        18
     sadness       0.89      0.89      0.89        19

    accuracy                           0.69       134
   macro avg       0.71      0.66      0.66       134
weighted avg       0.71      0.69      0.67       134



## XgBoost分类

In [60]:
grid = GridSearchCV(XGBClassifier(random_state=9,learning_rate=0.1,n_estimators=300), param_grid={'min_child_weight':[1,1.5,2,2.5],'max_depth':[4,5,6,7,8]}, cv=10)
grid.fit(data, label)
print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_, grid.best_score_))



The best parameters are {'max_depth': 5, 'min_child_weight': 1} with a score of 0.62


In [64]:
model= XGBClassifier(n_estimators=300,random_state=9)
print(cross_val_score(model, data, label, cv=10,scoring='balanced_accuracy').mean())

0.6139368567939997


In [65]:
model.fit(x_train,y_train)
y_pred= model.predict(x_test)
print(classification_report(y_test,y_pred ))

              precision    recall  f1-score   support

       anger       0.71      0.93      0.81        29
     boredom       0.47      0.36      0.41        22
     disgust       0.56      0.50      0.53        10
        fear       0.79      0.61      0.69        18
   happiness       0.59      0.56      0.57        18
     neutral       0.54      0.72      0.62        18
     sadness       0.87      0.68      0.76        19

    accuracy                           0.65       134
   macro avg       0.65      0.62      0.63       134
weighted avg       0.65      0.65      0.64       134



## K近邻分类

In [70]:
param_grid = [
    {
        'weights':['uniform'],
        'n_neighbors':[i for i in range(1,20)]
    },
    {
        'weights':['distance'],
        'n_neighbors':[i for i in range(1,20)],
        'p':[i for i in range(1,6)]
    }
]
grid = GridSearchCV(KNeighborsClassifier(),param_grid, n_jobs=-1,cv=10)
grid.fit(data, label)
print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_, grid.best_score_))

The best parameters are {'n_neighbors': 10, 'p': 1, 'weights': 'distance'} with a score of 0.60




In [73]:
model=KNeighborsClassifier(n_neighbors=10,weights='distance',p=1)
print(cross_val_score(model, data, label, cv=10,scoring='balanced_accuracy').mean())

0.5918707482993197


In [74]:
model.fit(x_train,y_train)
y_pred= model.predict(x_test)
print(classification_report(y_test,y_pred ))

              precision    recall  f1-score   support

       anger       0.69      0.93      0.79        29
     boredom       0.67      0.36      0.47        22
     disgust       0.89      0.80      0.84        10
        fear       0.67      0.44      0.53        18
   happiness       0.69      0.61      0.65        18
     neutral       0.55      0.89      0.68        18
     sadness       0.88      0.79      0.83        19

    accuracy                           0.69       134
   macro avg       0.72      0.69      0.69       134
weighted avg       0.71      0.69      0.68       134



## 逻辑回归分类

In [78]:
grid = GridSearchCV(LogisticRegression(),param_grid={'C':[0.1,1,10],'solver':['lbfgs','newton-cg','liblinear']},n_jobs=-1,cv=10)
grid.fit(data, label)
print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_, grid.best_score_))

The best parameters are {'C': 1, 'solver': 'lbfgs'} with a score of 0.60




In [84]:
model=LogisticRegression(solver='lbfgs',C=1,multi_class='auto')
print(cross_val_score(model, data, label, cv=10,scoring='balanced_accuracy').mean())

0.5815982033839178


In [85]:
model.fit(x_train,y_train)
y_pred= model.predict(x_test)
print(classification_report(y_test,y_pred ))

              precision    recall  f1-score   support

       anger       0.68      0.93      0.78        29
     boredom       0.50      0.27      0.35        22
     disgust       0.62      0.50      0.56        10
        fear       0.59      0.56      0.57        18
   happiness       0.57      0.44      0.50        18
     neutral       0.52      0.67      0.59        18
     sadness       0.85      0.89      0.87        19

    accuracy                           0.63       134
   macro avg       0.62      0.61      0.60       134
weighted avg       0.62      0.63      0.61       134



## 线性判别分析分类器

In [94]:
grid = GridSearchCV(LinearDiscriminantAnalysis(),param_grid={'n_components':[1,2,3,4,5,6]},n_jobs=-1,cv=10)
grid.fit(data, label)
print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_, grid.best_score_))

The best parameters are {'n_components': 1} with a score of 0.55




In [104]:
model=LinearDiscriminantAnalysis(n_components=2)
print(cross_val_score(model, data, label, cv=10,scoring='balanced_accuracy').mean())

0.5341941391941392


In [105]:
model.fit(x_train,y_train)
y_pred= model.predict(x_test)
print(classification_report(y_test,y_pred ))

              precision    recall  f1-score   support

       anger       0.70      0.90      0.79        29
     boredom       0.37      0.32      0.34        22
     disgust       0.67      0.40      0.50        10
        fear       0.62      0.44      0.52        18
   happiness       0.59      0.56      0.57        18
     neutral       0.56      0.78      0.65        18
     sadness       0.76      0.68      0.72        19

    accuracy                           0.61       134
   macro avg       0.61      0.58      0.58       134
weighted avg       0.61      0.61      0.60       134

