In [1]:
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [2]:
#DataFlair - Extract features (mfcc, chroma, mel) from a sound file
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
    return result

In [3]:
#DataFlair - Emotions in the RAVDESS dataset
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

#DataFlair - Emotions to observe
observed_emotions=['calm', 'happy', 'fearful', 'disgust']

In [22]:
#DataFlair - Load the data and extract features for each sound file
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("C:\\Users\\DELL\\Desktop\\MiniProject\\Data\\Actor_*\\*.wav"):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

In [23]:
#DataFlair - Split the dataset
x_train,x_test,y_train,y_test=load_data(test_size=0.2)

In [6]:
#DataFlair - Get the shape of the training and testing datasets
print((x_train.shape[0], x_test.shape[0]))

(614, 154)


In [7]:
#DataFlair - Get the number of features extracted
print(f'Features extracted: {x_train.shape[1]}')

Features extracted: 180


In [8]:
#DataFlair - Initialize the Multi Layer Perceptron Classifier
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)

In [9]:
#DataFlair - Train the model
model.fit(x_train,y_train)

MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,),
              learning_rate='adaptive', max_iter=500)

In [10]:
#DataFlair - Predict for the test set
y_pred=model.predict(x_test)

In [11]:
y_pred

array(['happy', 'calm', 'happy', 'happy', 'disgust', 'calm', 'happy',
       'disgust', 'calm', 'happy', 'happy', 'fearful', 'fearful', 'happy',
       'disgust', 'happy', 'calm', 'happy', 'happy', 'calm', 'calm',
       'disgust', 'disgust', 'calm', 'happy', 'happy', 'calm', 'happy',
       'fearful', 'fearful', 'happy', 'fearful', 'happy', 'calm', 'happy',
       'calm', 'calm', 'fearful', 'calm', 'disgust', 'happy', 'calm',
       'calm', 'calm', 'fearful', 'calm', 'disgust', 'happy', 'calm',
       'happy', 'fearful', 'fearful', 'fearful', 'happy', 'happy',
       'fearful', 'calm', 'happy', 'calm', 'calm', 'disgust', 'calm',
       'happy', 'calm', 'disgust', 'calm', 'calm', 'calm', 'disgust',
       'happy', 'fearful', 'fearful', 'fearful', 'fearful', 'fearful',
       'disgust', 'fearful', 'happy', 'calm', 'fearful', 'disgust',
       'calm', 'fearful', 'calm', 'disgust', 'calm', 'disgust', 'fearful',
       'disgust', 'fearful', 'disgust', 'calm', 'calm', 'happy',
       'disgu

In [12]:
#DataFlair - Calculate the accuracy of our model
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)

#DataFlair - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 75.32%


In [13]:
x_train


array([[-5.18490479e+02,  5.00726852e+01, -1.14204607e+01, ...,
         4.87677426e-05,  3.57282370e-05,  2.55957166e-05],
       [-6.42915588e+02,  7.66249466e+01,  2.75691390e+00, ...,
         4.64339200e-06,  2.52479435e-06,  1.17451077e-06],
       [-6.24726013e+02,  6.19622231e+01,  1.62521229e+01, ...,
         3.24165521e-05,  3.35850164e-05,  1.60543987e-05],
       ...,
       [-5.50096191e+02,  1.70297680e+01, -1.14575644e+01, ...,
         1.51764645e-04,  1.16828553e-04,  8.47479387e-05],
       [-5.55371155e+02,  4.71378479e+01,  1.10560827e+01, ...,
         1.61086486e-04,  1.04962528e-04,  6.52812014e-05],
       [-5.04864716e+02,  3.52971039e+01, -1.44038277e+01, ...,
         6.08151604e-04,  5.55269711e-04,  4.47782280e-04]])

In [24]:
file_path = "C:\\Users\\DELL\\Desktop\\MiniProject\\audio\\test.wav"

fs=extract_feature(file_path, mfcc=True, chroma=True, mel=True)
example_x = []
example_x.append(fs)

test.wav


In [25]:
print(example_x)

[array([-7.36547791e+02,  6.72349243e+01,  1.50432024e+01,  2.05692387e+01,
        9.34642029e+00,  4.38416386e+00, -7.77738237e+00, -3.56373221e-01,
        2.81057537e-01, -5.70972013e+00,  1.83892238e+00,  1.86935675e+00,
       -3.60532737e+00,  2.92290139e+00, -4.95185316e-01,  2.20956063e+00,
       -4.23454237e+00,  7.80095398e-01,  5.93093514e-01, -2.84174466e+00,
       -4.28347683e+00, -1.40699399e+00, -2.44453573e+00,  3.35919023e-01,
       -2.33561969e+00, -3.06902677e-01, -6.90777957e-01, -2.02748775e+00,
       -1.25453210e+00, -1.31771672e+00, -2.29047489e+00, -3.42989981e-01,
       -1.54778790e+00, -1.30557287e+00, -3.52091742e+00, -2.17956829e+00,
       -1.26315558e+00, -1.58135593e+00, -2.07610774e+00, -2.98832250e+00,
        5.85396230e-01,  5.15201092e-01,  4.70274597e-01,  4.77667511e-01,
        4.90583420e-01,  5.27075827e-01,  5.66979229e-01,  5.74921906e-01,
        5.60593128e-01,  5.47505796e-01,  5.29757202e-01,  5.66312075e-01,
        4.29448883e-05, 

In [27]:
model.predict(example_x)[0]

'calm'