<a href="https://colab.research.google.com/github/jaybeberry9591/advance_machine_learning/blob/main/SER.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#RAVDESS: Ryerson Audio-Visual Database of Emotional Speech and Song
#Data Preprocessing - Importing the Libraries

import librosa    #Python Library for analysing audio
import soundfile
import os, glob, pickle
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import seaborn as sns
import librosa.display
import IPython.display as ipd
from itertools import cycle

sns.set_theme(style="white", palette=None)
color_pal = plt.rcParams["axes.prop_cycle"].by_key()["color"]
color_cycle = cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier

In [None]:
def extract_features(X, sample_rate, mfcc, chroma, mel):
    result = np.array([])

    if chroma:
        stft = np.abs(librosa.stft(X))

    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
        result = np.hstack((result, mfccs))

    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
        result = np.hstack((result, chroma))

    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T, axis=0)
        result = np.hstack((result, mel))

    return result

In [None]:
#Emotions in the RAVDESS dataset

emotions = {
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

In [None]:
#Emotions to observe

observed_emotions=['neutral','happy','angry']

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
#importing the data and extracting features for each sound file
def load_data(test_size=0.20):
    X, y = [], []
    for file in glob.glob("/content/drive/MyDrive/Dataset/Dataset/*/*.wav"):
        file_name = os.path.basename(file)
        emotion = emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        audio_data, sample_rate = librosa.load(file, sr=None, dtype=np.float32)
        feature = extract_features(audio_data, sample_rate, mfcc=True, chroma=True, mel=True)
        X.append(feature)
        y.append(emotion)
    return train_test_split(np.array(X), y, test_size = test_size, train_size=0.75, random_state = 9)

In [None]:
#Splitting the dataset
X_train, X_test, y_train, y_test = load_data(test_size = 0.25)

In [None]:
#Getting the shape of the training and testing datasets

print((X_train.shape[0], X_test.shape[0]))

(360, 120)


In [None]:
#Getting the number of features extracted

print(f'Features extracted: {X_train.shape[1]}')

Features extracted: 180


In [None]:
#Initializing the Multi Layer Perceptron Classifier

from sklearn.neural_network import MLPClassifier
classifier = MLPClassifier(alpha = 0.01, batch_size = 256, epsilon = 1e-08, hidden_layer_sizes = (300,),
                    learning_rate='adaptive', max_iter = 500)

In [None]:
#Training the model

classifier.fit(X_train,y_train)

In [None]:
#Prediction for the test set

y_pred = classifier.predict(X_test)

In [None]:
#Calculating the accuracy of our model

from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_true = y_test, y_pred = y_pred)

In [None]:
#Printing the accuracy

print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 84.17%


In [None]:
# Step 1: Load the new audio file and extract features
def predict_emotion(file_path):
    audio_data, sample_rate = librosa.load(file_path, sr=None, dtype=np.float32)
    new_data_features = extract_features(audio_data, sample_rate, mfcc=True, chroma=True, mel=True)
    return new_data_features

# Step 2: Use the trained classifier to predict the emotion
def predict_with_classifier(classifier, features):
    return classifier.predict([features])

# Example of predicting a new audio file
new_file_path = "/content/drive/MyDrive/Dataset/Dataset/Actor_20/03-01-01-01-01-01-20.wav"
new_data_features = predict_emotion(new_file_path)

# Use the trained classifier to predict the emotion
predicted_label = predict_with_classifier(classifier, new_data_features)

# Map the predicted label to the corresponding emotion
emotions_inverse = {v: k for k, v in emotions.items()}  # Reverse the emotions dictionary
predicted_emotion = emotions_inverse[predicted_label[0]]

print("Predicted emotion:", predicted_emotion)


Predicted emotion: 01
