In [7]:
import os
import librosa
import numpy as np
import sounddevice as sd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from scipy.io.wavfile import write

# Step 1: Emotion mapping based on the RAVDESS filename convention
emotion_map = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}

# Step 2: Function to extract features from each audio file
def extract_features(audio_file):
    y, sr = librosa.load(audio_file, sr=None)
    
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    rms = librosa.feature.rms(y=y)
    
    features = np.concatenate([
        np.mean(mfcc, axis=1),
        np.mean(chroma, axis=1),
        np.mean(spectral_contrast, axis=1),
        np.mean(zcr, axis=1),
        np.mean(rms, axis=1)
    ])
    
    return features

# Step 3: Load RAVDESS dataset and extract features
def load_data(data_dir):
    features = []
    labels = []
    
    for subdir, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith(".wav"):
                file_path = os.path.join(subdir, file)
                
                emotion_code = file.split("-")[2]
                emotion_label = emotion_map.get(emotion_code)
                
                if emotion_label:  # Process only if emotion label is valid
                    features.append(extract_features(file_path))
                    labels.append(emotion_label)
    
    return np.array(features), np.array(labels)

# Step 4: Load the RAVDESS dataset
data_dir = r"C:\Users\Sahilsharma\Desktop\Ravdess Dataset"  # Set the correct path to your RAVDESS dataset
X, y = load_data(data_dir)

# Step 5: Encode labels into numerical form
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Step 6: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Step 7: Train a Support Vector Machine (SVM) classifier
classifier = SVC(kernel='linear')
classifier.fit(X_train, y_train)

# Step 8: Evaluate the model's performance on the test set
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))

# Step 9: Function to record audio from microphone
def record_voice(filename="output.wav", duration=10, fs=44100):  # Change duration to 10
    print("Recording...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
    sd.wait()  # Wait until the recording is finished
    write(filename, fs, recording)  # Save as a WAV file
    print("Recording complete.")
    return filename


# Step 10: Real-time prediction from recorded voice input
def predict_emotion_from_voice(classifier, label_encoder):
    # Record voice
    audio_file = record_voice()
    
    # Extract features from the recorded audio file
    features = extract_features(audio_file).reshape(1, -1)
    
    # Predict the emotion
    predicted_emotion_code = classifier.predict(features)[0]
    predicted_emotion = label_encoder.inverse_transform([predicted_emotion_code])[0]
    
    print(f"Predicted emotion: {predicted_emotion}")
    return predicted_emotion

# Step 11: Run real-time emotion recognition
predict_emotion_from_voice(classifier, le)


Accuracy: 54.51%
Classification Report:
              precision    recall  f1-score   support

       angry       0.68      0.66      0.67        79
        calm       0.51      0.68      0.58        69
     disgust       0.55      0.52      0.54        84
     fearful       0.54      0.65      0.59        80
       happy       0.65      0.57      0.61        82
     neutral       0.46      0.26      0.33        42
         sad       0.33      0.39      0.36        61
   surprised       0.59      0.47      0.52        79

    accuracy                           0.55       576
   macro avg       0.54      0.53      0.53       576
weighted avg       0.55      0.55      0.54       576

Recording...
Recording complete.
Predicted emotion: surprised


'surprised'