In [21]:
import librosa
import soundfile
import os,glob2,pickle
import numpy as np

In [22]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [23]:
def extract_feature(file_name,mfcc,chroma,mel):
    with soundfile.SoundFile(file_name) as sound_file:
       #X=sound_file.read(dtype="float32") 
      # sample_rate=sound_file.samplerate
       X,sample_rate=librosa.load(file_name)
       if chroma:
         stft=np.abs(librosa.stft(X))
       result=np.array([])  
       if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X,sr=sample_rate,n_mfcc=40).T,axis=0)
            result=np.hstack((result,mfccs))
       if chroma:
        chroma=np.mean(librosa.feature.chroma_stft(S=stft,sr=sample_rate).T,axis=0)
        result=np.hstack((result,mfccs))
        
       if mel:
         mel=np.mean(librosa.feature.melspectrogram(X,sr=sample_rate).T,axis=0)
         result=np.hstack((result,mel))   
    return result   

In [24]:
emotions={
    '01':'neutral',
    '02':'calm',
    '03':'happy',
    '04':'sad',
    '05':'angry',
    '06':'fearful',
    '07':'disgust',
    '08':'surprised'
}


In [25]:
#Emotions to observe
observed_emotions=['calm','happy','fearful','disgust']
print(observed_emotions)

['calm', 'happy', 'fearful', 'disgust']


In [26]:
#load the data and extract features for each sound file
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob2.glob("Actor_*\\*.wav"):
       file_name=os.path.basename(file) 
       emotion=emotions[file_name.split("-")[2]]
       if emotion not in observed_emotions:
        continue;
       feature=extract_feature(file,mfcc=True,chroma=True,mel=True) 
       x.append(feature)
       y.append(emotion)
    return train_test_split(np.array(x),y,test_size=test_size,random_state=9)

x_train,x_test,y_train,y_test=load_data(test_size=0.2)

#DataFlair - Get the shape of the training and testing datasets
print((x_train.shape[0], x_test.shape[0]))

#DataFlair - Get the number of features extracted
print(f'Features extracted: {x_train.shape[1]}')

#DataFlair - Initialize the Multi Layer Perceptron Classifier
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500
                   )
#DataFlair - Train the model
model.fit(x_train,y_train)

#DataFlair - Calculate the accuracy of our model
y_pred=model.predict(x_test)
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)

#DataFlair - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))
print(y_pred)

    

(537, 231)
Features extracted: 208
Accuracy: 54.98%
['disgust' 'calm' 'disgust' 'calm' 'disgust' 'disgust' 'calm' 'happy'
 'disgust' 'disgust' 'happy' 'fearful' 'fearful' 'happy' 'disgust'
 'fearful' 'calm' 'disgust' 'disgust' 'calm' 'disgust' 'disgust' 'disgust'
 'calm' 'happy' 'disgust' 'calm' 'happy' 'calm' 'fearful' 'happy'
 'disgust' 'disgust' 'calm' 'disgust' 'disgust' 'calm' 'fearful' 'calm'
 'disgust' 'fearful' 'disgust' 'disgust' 'calm' 'disgust' 'disgust'
 'disgust' 'calm' 'disgust' 'happy' 'fearful' 'fearful' 'disgust'
 'fearful' 'disgust' 'calm' 'disgust' 'disgust' 'calm' 'calm' 'disgust'
 'calm' 'disgust' 'disgust' 'disgust' 'disgust' 'disgust' 'disgust'
 'disgust' 'happy' 'fearful' 'fearful' 'fearful' 'fearful' 'fearful'
 'disgust' 'fearful' 'happy' 'calm' 'fearful' 'calm' 'calm' 'disgust'
 'calm' 'disgust' 'calm' 'disgust' 'fearful' 'disgust' 'disgust' 'disgust'
 'disgust' 'disgust' 'fearful' 'disgust' 'fearful' 'calm' 'calm' 'calm'
 'calm' 'fearful' 'calm' 'disgust' 'di