In [1]:
import librosa
import soundfile
import pandas as pd
import os, glob, pickle
import numpy as np
import json
import seaborn as sns
import parselmouth
import matplotlib.pyplot as plt
from parselmouth.praat import call
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import learning_curve
from sklearn.ensemble import ExtraTreesClassifier
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error


In [2]:

def extract_feature(gender,snd,file_name, mfcc, chroma, mel, pm):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
        if pm:
            pm = measurePitch(snd, 75, 500, "Hertz")
            result=np.hstack((result, pm))
        if gender == "male":
            gender = 1
        else:
            gender = 0
        result=np.hstack((result, gender))
    return result

In [3]:
def measurePitch(snd, f0min, f0max, unit):
    pitch = snd.to_pitch()
    pmArray =np.array([])
    meanF0 = call(pitch, "Get mean", 0, 0, unit) # get mean pitch
    stdevF0 = call(pitch, "Get standard deviation", 0 ,0, unit) # get standard deviation
    harmonicity = call(snd, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
    hnr = call(harmonicity, "Get mean", 0, 0)
    pointProcess = call(snd, "To PointProcess (periodic, cc)", f0min, f0max)
    localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
    localabsoluteJitter = call(pointProcess, "Get jitter (local, absolute)", 0, 0, 0.0001, 0.02, 1.3)
    rapJitter = call(pointProcess, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3)
    ppq5Jitter = call(pointProcess, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02, 1.3)
    ddpJitter = call(pointProcess, "Get jitter (ddp)", 0, 0, 0.0001, 0.02, 1.3)
    localShimmer =  call([snd, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    localdbShimmer = call([snd, pointProcess], "Get shimmer (local_dB)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    apq3Shimmer = call([snd, pointProcess], "Get shimmer (apq3)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    aqpq5Shimmer = call([snd, pointProcess], "Get shimmer (apq5)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    apq11Shimmer =  call([snd, pointProcess], "Get shimmer (apq11)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    ddaShimmer = call([snd, pointProcess], "Get shimmer (dda)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    intensity = snd.to_intensity()
    meanIntense = parselmouth.praat.call(intensity, "Get mean", snd.xmin, snd.xmax)
    Ltas = call(snd, "To Ltas", f0max-f0min)
    number =  parselmouth.praat.call(Ltas, "Get number of bins")
    beta = parselmouth.praat.call(Ltas, "Get value in bin", 43)/parselmouth.praat.call(Ltas, "Get value in bin", 1)
    meanLtas = parselmouth.praat.call(Ltas, "Get mean", f0min, f0max, "energy")
    pmArray = np.hstack((meanF0, stdevF0, hnr, localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, ddpJitter, localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer, ddaShimmer, meanIntense, meanLtas))
    return pmArray

In [4]:


observed_p_emotions=['angry', 'excited', 'happy', 'neutral', 'sad']
observed_s_emotions=['anxious', 'apologetic', 'confident', 'enthusiastic', 'worried']
observed_f_emotions=['angry', 'excited', 'happy', 'neutral', 'sad', 'anxious', 'apologetic', 'confident', 'enthusiastic', 'worried']

observed_emotions = observed_s_emotions


In [5]:
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("C:\\Users\\Binu\\Desktop\\emudbset\\*.wav"): #Change the directory to where your wav files are located. Keep the *.wav to select all wav files 
        snd = parselmouth.Sound(file)
        file_name=os.path.basename(file)
        emotion=file_name.split("_")[1]
        speaker=file_name.split("_")[0]
        gender = speaker[:-1]
        if emotion not in observed_emotions:
            continue
        feature=extract_feature(gender,snd,file, mfcc=True, chroma=True, mel=True, pm=False)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

In [6]:
x_train,x_test,y_train,y_test=load_data(test_size=0.2)
print(x_train)
print(x_test)

print((x_train.shape[0], x_test.shape[0]))

[[-5.14100281e+02  1.50521591e+02  1.67134247e+01 ...  2.82133250e-08
   1.96503098e-08  1.00000000e+00]
 [-5.53610352e+02  9.37813568e+01  7.36310434e+00 ...  1.29587619e-07
   5.07491862e-08  0.00000000e+00]
 [-5.05681122e+02  1.30654572e+02  2.10833206e+01 ...  8.97011034e-08
   4.37831247e-08  1.00000000e+00]
 ...
 [-4.95490784e+02  1.00166779e+02 -9.86926651e+00 ...  1.92148988e-07
   1.03354608e-07  1.00000000e+00]
 [-5.67913574e+02  9.17978897e+01 -5.26031256e+00 ...  7.82804008e-08
   3.68323150e-08  0.00000000e+00]
 [-6.21082764e+02  1.03507332e+02  3.22704544e+01 ...  3.69058384e-08
   2.26339019e-08  0.00000000e+00]]
[[-5.05843231e+02  8.68745728e+01 -2.06283531e+01 ...  9.98686858e-08
   4.61146676e-08  0.00000000e+00]
 [-5.28930237e+02  1.19846954e+02 -7.69809914e+00 ...  1.40363426e-07
   6.26404457e-08  0.00000000e+00]
 [-5.66152771e+02  1.11656982e+02  1.85005226e+01 ...  5.44499201e-08
   2.82806418e-08  0.00000000e+00]
 ...
 [-5.39561829e+02  1.02567551e+02 -1.3439302

In [8]:
print(f'Features extracted: {x_train.shape[1]}')

Features extracted: 181


In [9]:
uncomment for SVM. 
parameters = {'kernel':('linear',
                        'rbf'), 'C':[1, 10]}
svc = svm.SVC()
model = GridSearchCV(svc, parameters)
model.bestestimator.featureimportances

In [10]:
history = model.fit(x_train,y_train)

In [11]:
y_pred=model.predict(x_test)

In [12]:

accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)

print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 81.25%


In [13]:

#nopm_mfcc40_sizes = [1,40,52,300, 500, 700, 864]

#train_sizes, train_scores, test_scores = learning_curve(MLPClassifier(),x_train, y_train,cv=10, scoring='accuracy', n_jobs= -1, train_sizes = nopm_mfcc40_sizes ) 

In [None]:
#train_mean = np.mean(train_scores,axis = 1)
#test_mean = np.mean(test_scores,axis = 1)

In [None]:
#plt.plot(train_sizes, train_mean, label = 'Training Score')
#plt.plot(train_sizes, test_mean, label = 'Cross-Validation Score')

#plt.title('Learning Curve')
#plt.xlabel('Training Size')
#plt.ylabel('Accuracy Score')
#plt.legend(loc = 'lower right')
#plt.savefig("test.svg", transparent=True)

In [14]:
cm = confusion_matrix(y_test, y_pred)
#cm = cm / cm.astype(np.float).sum(axis=1)*100
#cm =np.round(cm,2)
df = pd.DataFrame(np.array(cm),
                   columns=observed_emotions,
                   index = observed_emotions)
df.style
print(df)

              anxious  apologetic  confident  enthusiastic  worried
anxious            42           0          2             1        0
apologetic          0          37          4             0        8
confident           0           6         36             5        3
enthusiastic        6           3          2            40        1
worried             0           0          2             2       40


In [15]:
def highlight_diag(df):
    a = np.full(df.shape, '', dtype='<U24')
    np.fill_diagonal(a, 'background-color: salmon')
    return pd.DataFrame(a, index=df.index, columns=df.columns)

In [16]:
s = df.style.apply(highlight_diag, axis=None)


In [17]:
cm = sns.light_palette("salmon", as_cmap=True)
s = df.style.background_gradient(cmap=cm)
s

Unnamed: 0,anxious,apologetic,confident,enthusiastic,worried
anxious,42,0,2,1,0
apologetic,0,37,4,0,8
confident,0,6,36,5,3
enthusiastic,6,3,2,40,1
worried,0,0,2,2,40


In [None]:
prd_r = model.predict(x_test)
test_acc = accuracy_score(y_test, prd_r) * 100.
loss_values = model.loss_curve_
plt.plot(loss_values, label= 'train')
plt.plot()
plt.show()

In [None]:
plt.title('Loss / Mean Squared Error')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()