The dataset used in this code is from SAVEE datset

In [28]:
!pip install librosa matplotlib scikit-learn tensorflow



In [None]:
import zipfile
import os

zip_file = "ALL.zip"
if zipfile.is_zipfile(zip_file):
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall("/content/ALL")
        print("ZIP extracted successfully.")
else:
    print("Not a valid ZIP file")

!ls /content/ALL


In [27]:
import librosa
import numpy as np
import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

def extract_features(file_path):
    audio, sr = librosa.load(file_path, sr=16000)
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
    return np.mean(mfccs.T, axis=0)

emotion_map = {
    'a': 'angry', 'd': 'disgust', 'f': 'fear',
    'h': 'happy', 'n': 'neutral', 'sa': 'sad', 'su': 'surprise'
}

X = []
y = []

files = glob.glob('/content/ALL/**/*.wav', recursive=True)
print(f"Found {len(files)} files")

for file in files:
    fname = os.path.basename(file).lower()
    if '_' in fname:
        code = fname.split('_')[1][:2]
        label = emotion_map.get(code)
        if label:
            try:
                mfccs = extract_features(file)
                X.append(mfccs)
                y.append(label)
            except Exception as e:
                print(f"Error processing {file}: {e}")

print("Finished extracting features.")
print("Total samples:", len(X))

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(np.array(X), y_encoded, test_size=0.2, random_state=42)

# Define model
model = Sequential([
    Dense(256, activation='relu', input_shape=(40,)),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(7, activation='softmax')
])

# Compile model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=50, batch_size=8, validation_data=(X_test, y_test))

# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")


Found 514 files
Error processing /content/ALL/KL_sa10.wav: 


  audio, sr = librosa.load(file_path, sr=16000)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Finished extracting features.
Total samples: 133
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - accuracy: 0.5158 - loss: 22.9484 - val_accuracy: 0.5185 - val_loss: 14.0793
Epoch 2/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5190 - loss: 8.5429 - val_accuracy: 0.4815 - val_loss: 3.7084
Epoch 3/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5637 - loss: 5.9732 - val_accuracy: 0.7778 - val_loss: 0.4038
Epoch 4/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5966 - loss: 3.7420 - val_accuracy: 0.8519 - val_loss: 0.4066
Epoch 5/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.4603 - loss: 3.5078 - val_accuracy: 0.7407 - val_loss: 0.5558
Epoch 6/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5099 - loss: 2.9755 - val_accuracy: 0.9259 - val_loss: 0.1979
Epoch 7/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━