In [2]:
import os
import numpy as np
import numpy as np
import librosa
import soundfile




In [3]:
emotions = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}


In [4]:
def extract_features(file_name):
    import soundfile
    import librosa
    with soundfile.SoundFile(file_name) as sound_file:
        audio_data = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
        chroma = librosa.feature.chroma_stft(y=audio_data, sr=sample_rate)
        mel = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
        return np.hstack((
            np.mean(mfccs.T, axis=0),
            np.mean(chroma.T, axis=0),
            np.mean(mel.T, axis=0)
        ))

In [5]:
def load_data(dataset_path):
    x, y = [], []
    count = 0
    for root, dirs, files in os.walk(dataset_path):
        for file in files:
            if file.endswith(".wav"):
                try:
                    emotion_code = file.split("-")[2]
                    emotion = emotions.get(emotion_code)
                    if emotion:
                        file_path = os.path.join(root, file)
                        print("Processing:", file_path)
                        features = extract_features(file_path)
                        x.append(features)
                        y.append(emotion)
                        count += 1
                except Exception as e:
                    print("Error in", file, ":", e)
    print(f"✅ Total processed files: {count}")
    return np.array(x), np.array(y)

In [None]:
X, Y = load_data("wav")  
print("X shape:", X.shape)
print("Y shape:", Y.shape)


Processing: wav\Actor_01\03-01-01-01-01-01-01.wav


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Encode emotions to numbers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(Y)

# Split into 80% train, 20% test
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

print("Training shape:", X_train.shape, y_train.shape)
print("Testing shape:", X_test.shape, y_test.shape)


Training shape: (1529, 180) (1529,)
Testing shape: (383, 180) (383,)


In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Create & train SVM
svm_model = SVC(kernel='linear', C=1.0)
svm_model.fit(X_train, y_train)

# Predict
y_pred = svm_model.predict(X_test)


In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Create & train SVM
svm_model = SVC(kernel='linear', C=1.0)
svm_model.fit(X_train, y_train)

# Predict
y_pred = svm_model.predict(X_test)


In [None]:
# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("✅ Accuracy: {:.2f}%".format(accuracy * 100))

# Detailed report
print("\n📋 Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


✅ Accuracy: 61.10%

📋 Classification Report:
              precision    recall  f1-score   support

       angry       0.69      0.76      0.72        45
        calm       0.69      0.76      0.72        55
     disgust       0.58      0.65      0.61        51
     fearful       0.58      0.60      0.59        53
       happy       0.67      0.56      0.61        52
     neutral       0.39      0.41      0.40        22
         sad       0.56      0.53      0.55        45
   surprised       0.60      0.52      0.55        60

    accuracy                           0.61       383
   macro avg       0.60      0.60      0.60       383
weighted avg       0.61      0.61      0.61       383



In [None]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)

# Evaluate
print("✅ RF Accuracy:", accuracy_score(y_test, y_pred_rf) * 100)
print("\n📋 Classification Report:")
print(classification_report(y_test, y_pred_rf, target_names=label_encoder.classes_))


✅ RF Accuracy: 74.41253263707573

📋 Classification Report:
              precision    recall  f1-score   support

       angry       0.74      0.78      0.76        45
        calm       0.72      0.93      0.81        55
     disgust       0.66      0.76      0.71        51
     fearful       0.79      0.58      0.67        53
       happy       0.74      0.71      0.73        52
     neutral       1.00      0.77      0.87        22
         sad       0.67      0.64      0.66        45
   surprised       0.81      0.77      0.79        60

    accuracy                           0.74       383
   macro avg       0.77      0.74      0.75       383
weighted avg       0.75      0.74      0.74       383



In [None]:
import sounddevice as sd
from scipy.io.wavfile import write
import librosa 

In [None]:
def record_audio(filename, duration=5, sample_rate=22050, fs=44100):
    print(f"Recording {duration} seconds of audio...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
    sd.wait()  # wait until recording is finished
    write(filename, fs, recording)
    print(f"Audio recorded and saved as {filename}")

def predict_emotion(filename, model):
    data, sample_rate = librosa.load(filename)
    mfccs = librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40)
    chroma = librosa.feature.chroma_stft(y=data, sr=sample_rate)
    mel = librosa.feature.melspectrogram(y=data, sr=sample_rate)
    features = np.hstack((
        np.mean(mfccs.T, axis=0),
        np.mean(chroma.T, axis=0),
        np.mean(mel.T, axis=0)
    )).reshape(1, -1)

    features_scaled = scaler.transform(features)
    prediction = model.predict(features_scaled)
    emotion = label_encoder.inverse_transform(prediction)[0]
    print("🧠 Predicted Emotion:", emotion)

In [None]:
import sounddevice as sd
from scipy.io.wavfile import write

def record_voice(filename="test.wav", duration=5, fs=44100):
    print("🎙️ Recording for", duration, "seconds...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
    sd.wait()
    write(filename, fs, recording)
    print("✅ Audio saved to", filename)


In [None]:
def predict_emotion_from_file(filename, model):
    import librosa

    data, sample_rate = librosa.load(filename)
    mfccs = librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40)
    chroma = librosa.feature.chroma_stft(y=data, sr=sample_rate)
    mel = librosa.feature.melspectrogram(y=data, sr=sample_rate)
    
    features = np.hstack((
        np.mean(mfccs.T, axis=0),
        np.mean(chroma.T, axis=0),
        np.mean(mel.T, axis=0)
    )).reshape(1, -1)


    features_scaled = scaler.transform(features)


    prediction = model.predict(features_scaled)
    emotion = label_encoder.inverse_transform(prediction)[0]
    print("🧠 Predicted Emotion:", emotion)



In [None]:
record_voice()  
predict_emotion_from_file("test.wav", rf_model)  


NameError: name 'record_voice' is not defined