### Importing Libraries needed for the project 

In [2]:
pip install librosa scikit-learn soundfile numpy

Collecting librosa
  Using cached librosa-0.10.2.post1-py3-none-any.whl.metadata (8.6 kB)
Collecting soundfile
  Using cached soundfile-0.12.1-py2.py3-none-macosx_10_9_x86_64.whl.metadata (14 kB)
Collecting audioread>=2.1.9 (from librosa)
  Downloading audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)
Collecting pooch>=1.1 (from librosa)
  Downloading pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting soxr>=0.3.2 (from librosa)
  Downloading soxr-0.3.7-cp311-cp311-macosx_10_9_x86_64.whl.metadata (5.5 kB)
Downloading librosa-0.10.2.post1-py3-none-any.whl (260 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m260.1/260.1 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading soundfile-0.12.1-py2.py3-none-macosx_10_9_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading audioread-3.0.1-py3-none-any.whl (23 kB)
Downloading pooch-1.8

In [140]:
# Importing necessary libraries
import librosa
import os, glob, pickle
import numpy as np
import soundfile as sf
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier

### Creating a function to extract features from an audio file


In [87]:
#Extract features (mfcc, chroma, mel) from a audio file
def extract_feature(file_name, mfcc, chroma, mel):
    with sf.SoundFile(file_name) as sound_file:
        audio = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate

        if chroma:
            stft=np.abs(librosa.stft(audio))
        result=np.array([])
        
        # Extract MFCC features
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
            
        # Extract Chroma features
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
            
        # Extract Mel features
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(y=audio, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
    return result

In [80]:
#Emotions in the RAVDESS dataset
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

#DataFlair - Emotions to observe
observed_emotions=['calm', 'happy', 'fearful', 'disgust']

In [88]:
#Loading the data and extract features for each sound file
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("speech-emotion-recognition-ravdess-data/Actor_*/*.wav"):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=42)

### Getting the dataset ready for classifiers

In [89]:
# Split the dataset into training and testing sets
x_train,x_test,y_train,y_test=load_data(test_size=0.25)

In [127]:
#Displaying the shape of the training and testing datasets
print((x_train.shape[0], x_test.shape[0]))

(576, 192)


In [128]:
#Getting the number of features extracted
print(f'Features extracted: {x_train.shape[1]}')

Features extracted: 180


In [130]:
# Creating a SVM classifier
model = make_pipeline(StandardScaler(), SVC(kernel='linear', probability=True))

In [131]:
# Training the model
model.fit(x_train, y_train)

In [132]:
# Predict the emotions of the test set
y_pred = model.predict(x_test)

In [133]:
# Print the classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

        calm       0.71      0.87      0.78        52
     disgust       0.58      0.60      0.59        48
     fearful       0.57      0.53      0.55        47
       happy       0.49      0.38      0.42        45

    accuracy                           0.60       192
   macro avg       0.59      0.59      0.59       192
weighted avg       0.59      0.60      0.59       192



In [134]:
#Calculate the accuracy of our model
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)

#Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 60.42%


In [135]:
# Creating a Multi Layer Perceptron Classifier
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)

In [136]:
# Train the model
model.fit(x_train, y_train)

In [137]:
# Predict the emotions of the test set
y_pred = model.predict(x_test)

In [138]:
# Print the classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

        calm       0.62      0.96      0.75        52
     disgust       0.73      0.23      0.35        48
     fearful       0.57      0.74      0.65        47
       happy       0.54      0.42      0.48        45

    accuracy                           0.60       192
   macro avg       0.62      0.59      0.56       192
weighted avg       0.62      0.60      0.56       192



In [139]:
#Calculate the accuracy of our model
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)

#Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 59.90%
