In [1]:
pip install librosa scikit-learn soundfile numpy


Collecting librosa
  Downloading librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)
Collecting soundfile
  Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl.metadata (16 kB)
Collecting audioread>=2.1.9 (from librosa)
  Downloading audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)
Collecting pooch>=1.1 (from librosa)
  Downloading pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting soxr>=0.3.2 (from librosa)
  Downloading soxr-0.5.0.post1-cp312-abi3-win_amd64.whl.metadata (5.6 kB)
Downloading librosa-0.11.0-py3-none-any.whl (260 kB)
Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl (1.0 MB)
   ---------------------------------------- 0.0/1.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.0 MB ? eta -:--:--
   ---------- ----------------------------- 0.3/1.0 MB ? eta -:--:--
   ---------- ----------------------------- 0.3/1.0 MB ? eta -:--:--
   -------------------- -----------------

In [1]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler  # For feature scaling
import joblib  # To save the model

# Path to your TESS dataset
DATA_PATH = r"D:\speech emotion recognition project\speech emotion recognition\TESS\TESS Toronto emotional speech set data"

# List of emotions to detect
emotions = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'ps', 'sad', 'surprise']

def get_emotion_from_filename(file_name):
    """
    Extract emotion from the filename.
    """
    for emotion in emotions:
        if emotion in file_name.lower():
            return emotion
    return None

def extract_features(file_path):
    """
    Extract features (MFCC) from the audio file.
    """
    try:
        audio, sr = librosa.load(file_path, duration=3, offset=0.5)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
        return np.mean(mfccs.T, axis=0)
    except Exception as e:
        print(f"Error extracting features from {file_path}: {e}")
        return None

def prepare_data():
    """
    Prepare the dataset by loading audio files and extracting features.
    """
    X, y = [], []
    for folder in os.listdir(DATA_PATH):
        folder_path = os.path.join(DATA_PATH, folder)
        if not os.path.isdir(folder_path):
            continue
        for file in os.listdir(folder_path):
            if file.endswith(".wav"):
                emotion = get_emotion_from_filename(file)
                if emotion:
                    features = extract_features(os.path.join(folder_path, file))
                    if features is not None:
                        X.append(features)
                        y.append(emotion)
    return np.array(X), np.array(y)

# Prepare data
X, y = prepare_data()

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

# Feature scaling - Standardize the data (important for SVM)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train model
model = SVC(kernel='linear', random_state=42)  # Setting random state for reproducibility
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Save the model and scaler
joblib.dump(model, 'emotion_recognition_model.joblib')
joblib.dump(scaler, 'scaler.joblib')  # Save the scaler to use during prediction
print("Model and scaler saved successfully.")


  "cipher": algorithms.TripleDES,
  "class": algorithms.Blowfish,
  "class": algorithms.TripleDES,


Accuracy: 0.9892857142857143
              precision    recall  f1-score   support

       angry       0.96      0.99      0.98        80
     disgust       0.98      1.00      0.99        80
        fear       1.00      1.00      1.00        80
       happy       0.99      0.96      0.97        80
     neutral       1.00      1.00      1.00        80
          ps       1.00      0.97      0.99        80
         sad       1.00      1.00      1.00        80

    accuracy                           0.99       560
   macro avg       0.99      0.99      0.99       560
weighted avg       0.99      0.99      0.99       560

Model and scaler saved successfully.


In [12]:
pip install joblib


Note: you may need to restart the kernel to use updated packages.


In [13]:
import joblib

# Save the trained model
joblib.dump(model, 'emotion_recognition_model.joblib')


['emotion_recognition_model.joblib']

In [15]:
import joblib

# Load the trained model
model = joblib.load('emotion_recognition_model.joblib')


In [16]:
# Assuming you have already extracted features from the new audio
features = extract_features('path_to_new_audio.wav')

# Predict the emotion from the new features
emotion_prediction = model.predict([features])  # Predict returns an array, so we need to put features inside a list

print("Predicted Emotion:", emotion_prediction[0])


  audio, sr = librosa.load(file_path, duration=3, offset=0.5)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


FileNotFoundError: [Errno 2] No such file or directory: 'path_to_new_audio.wav'