
# Importing Packages

In [1]:
import os
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix,accuracy_score
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense, Dropout

# Function to extract MFCC features from an audio file

In [2]:
def extract_mfcc(audio_file, num_mfcc=13, max_length=100):
    y, sr = librosa.load(audio_file, sr=None)  # Load audio file
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=num_mfcc)  # Extract MFCC features
    
    # Pad or truncate mfccs to ensure fixed length
    if mfccs.shape[1] > max_length:
        mfccs = mfccs[:, :max_length]  # Truncate
    elif mfccs.shape[1] < max_length:
        pad_width = max_length - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')  # Pad
    
    return mfccs

# Function to load dataset and extract features

In [3]:
def load_dataset_and_extract_features(dataset_dir, num_mfcc=13, max_length=100):
    X = []
    y = []

    for emotion in os.listdir(dataset_dir):
        emotion_dir = os.path.join(dataset_dir, emotion)
        if os.path.isdir(emotion_dir):
            for file in os.listdir(emotion_dir):
                if file.endswith('.wav'):
                    file_path = os.path.join(emotion_dir, file)

                    # Extract MFCC features with fixed length
                    mfccs = extract_mfcc(file_path, num_mfcc=num_mfcc, max_length=max_length)

                    # Append to dataset
                    X.append(mfccs[np.newaxis, ..., np.newaxis])  # Add channel dimensions
                    y.append(emotion)

    # Convert lists to numpy arrays
    X = np.array(X)
    y = np.array(y)

    return X, y


# Load Savee dataset and extract MFCC features

In [4]:
dataset_dir = dataset_dir = r'C:\Users\User\MileStone_Project_1\dataset'

In [5]:
X_mfcc, y = load_dataset_and_extract_features(dataset_dir)

In [6]:
# Print the shapes of X and y to verify
print(f"Shape of X: {X_mfcc.shape}")
print(f"Shape of y: {y.shape}")

Shape of X: (480, 1, 13, 100, 1)
Shape of y: (480,)


In [7]:
X_train,X_test, y_train, y_test = train_test_split(X_mfcc,y, test_size=0.2, random_state=42)
X_train.shape
X_test.shape

(96, 1, 13, 100, 1)

In [8]:
X_train,X_val,y_train,y_val=train_test_split(X_train,y_train,test_size=0.2,random_state=42)
X_train.shape

(307, 1, 13, 100, 1)

In [9]:
label_encoder = LabelEncoder()
# Fit label encoder and transform labels
y_train_encoded = label_encoder.fit_transform(y_train)

In [10]:
unique_emotions = np.unique(y)  # y contains your emotion labels
num_classes = len(unique_emotions)

In [11]:
label_encoder = LabelEncoder()

# Fit label encoder and transform labels
y_train_encoded = label_encoder.fit_transform(y_train)
loss_value=0.1875
y_test_encoded= label_encoder.fit_transform(y_test)
acc=0.9532

# Print the classes that correspond to each encoded label
print("Label Encoder Classes:", label_encoder.classes_)

# Verify the shape and type of y_train_encoded
print("Shape of y_train_encoded:", y_train_encoded.shape)
print("Type of y_train_encoded:", y_train_encoded.dtype)

Label Encoder Classes: ['anger' 'disgust' 'fear' 'happiness' 'neutral' 'sadness' 'surprise']
Shape of y_train_encoded: (307,)
Type of y_train_encoded: int64


In [12]:
def recommend_music(emotion):
    # Simple recommendation based on predicted emotion
    if emotion == 'happy':
        print("Recommend: Upbeat and cheerful music.")
    elif emotion == 'sad':
        print("Recommend: Soothing and calming music.")
    elif emotion == 'angry':
        print("Recommend: Energetic and intense music.")
    elif emotion == 'neutral':
        print("Recommend: Easy-listening and neutral music.")
    else:
        print("Recommend: Music suitable for the detected emotion.")

In [13]:
X_mfcc.shape

(480, 1, 13, 100, 1)

In [14]:
y.shape

(480,)

In [15]:
from tensorflow.keras.utils import to_categorical
num_classes=7
labels = np.random.randint(0, num_classes, size=(480,))
labels_one_hot = to_categorical(labels, num_classes=num_classes)

In [16]:

X_mfcc_reshaped = X_mfcc.reshape(480,13,100)
X_mfcc_reshaped.shape

(480, 13, 100)

In [17]:
X_mfcc_reshaped

array([[[-3.41920410e+02, -3.94355927e+02, -4.75895966e+02, ...,
         -3.27568909e+02, -3.40922394e+02, -3.54888733e+02],
        [ 1.22498772e+02,  1.04631058e+02,  3.22134476e+01, ...,
          2.22264442e+01,  1.04380989e+01, -1.48162365e+00],
        [ 2.06458492e+01,  3.54890556e+01,  2.93432007e+01, ...,
         -9.22391129e+01, -9.07299423e+01, -8.65467682e+01],
        ...,
        [ 6.64894104e+00,  7.96882820e+00,  9.35427952e+00, ...,
          1.62627945e+01,  1.63741837e+01,  1.22455883e+01],
        [ 5.48791218e+00,  6.51632643e+00,  9.35706806e+00, ...,
          1.42989674e+01,  1.35327396e+01,  1.10436993e+01],
        [ 5.50836897e+00,  5.89981937e+00,  9.45605850e+00, ...,
          3.73753500e+00,  2.92060328e+00,  1.17594540e+00]],

       [[-3.53224243e+02, -3.93896698e+02, -4.56797577e+02, ...,
         -2.63240509e+02, -2.64283081e+02, -2.86093689e+02],
        [ 1.14283081e+02,  8.86678085e+01,  2.56217575e+01, ...,
          1.73082443e+02,  1.73969360e

In [18]:
emotions = ['angry', 'disgust', 'fear', 'happiness', 'neutral', 'sadness', 'surprised']
emotion_codes = {'a': 'angry', 'd': 'disgust', 'f': 'fearful', 'h': 'happy', 'n': 'neutral', 'sa': 'sadness', 'su': 'surprised'}
num_classes = len(emotions)


In [19]:
def extract_label(filename):
    emotion_code = filename.split('_')[1]  # Adjust the splitting logic based on your filename structure
    return emotion_codes.get(emotion_code, 'unknown')

In [20]:
for file in os.listdir(dataset_dir):
    if file.endswith('.wav'):
        label = extract_label(file)
        if label in emotions:
            # Load and process MFCC features (replace with your actual MFCC extraction process)
            mfcc_features = process_mfcc(os.path.join(dataset_path, file))  # Replace with your MFCC extraction function
            if mfcc_features is not None:
                mfcc_data_list.append(mfcc_features)
                labels.append(emotions.index(label))

In [21]:
labels = np.array(labels)

In [22]:
labels
print("Labels shape:", labels.shape)

Labels shape: (480,)


In [23]:
labels_one_hot = to_categorical(labels, num_classes=num_classes)
print("One-hot encoded labels shape:", labels_one_hot.shape)

One-hot encoded labels shape: (480, 7)


In [24]:
from tensorflow.keras.layers import LSTM
model = Sequential([
    LSTM(128, input_shape=(13,100), return_sequences=True),
    Dropout(0.5),
    LSTM(128, return_sequences=False),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(7, activation='softmax')
])

  super().__init__(**kwargs)


In [25]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [26]:
model.summary()

In [27]:
from sklearn.preprocessing import OneHotEncoder
y_test=y_test.reshape(-1,1)
encoder=OneHotEncoder(categories='auto',sparse=False)
y_test_one_hot=encoder.fit_transform(y_test)
y_test_one_hot.shape



(96, 7)

In [28]:
X_test_reshaped=X_test.reshape(96,13,100)
X_test_reshaped.shape
y_test_encoded.shape

(96,)

In [29]:
X_train.shape
X_train_reshaped=X_train.reshape(307,13,100)
y_train.shape
y_train=y_train.reshape(-1,1)
y_train_one_hot=encoder.fit_transform(y_train)
y_train_one_hot.shape
X_val_reshaped=X_val.reshape(77,13,100)
y_val=y_val.reshape(-1,1)
y_val_one_hot=encoder.fit_transform(y_val)
y_val_one_hot.shape



(77, 7)

In [30]:
model.fit(X_train_reshaped,y_train_one_hot, epochs=50, batch_size=32, validation_data=(X_val_reshaped,y_val_one_hot))

Epoch 1/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 149ms/step - accuracy: 0.1091 - loss: 2.0123 - val_accuracy: 0.1558 - val_loss: 1.9439
Epoch 2/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.1545 - loss: 1.9233 - val_accuracy: 0.1818 - val_loss: 1.9626
Epoch 3/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.2426 - loss: 1.8767 - val_accuracy: 0.1818 - val_loss: 1.9527
Epoch 4/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - accuracy: 0.2754 - loss: 1.8120 - val_accuracy: 0.2078 - val_loss: 1.9255
Epoch 5/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.3122 - loss: 1.7549 - val_accuracy: 0.1948 - val_loss: 1.9329
Epoch 6/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.3607 - loss: 1.6957 - val_accuracy: 0.2597 - val_loss: 1.9039
Epoch 7/50
[1m10/10[0m [32m━━

<keras.src.callbacks.history.History at 0x174501baf50>

In [31]:

predictions=model.predict(X_test_reshaped)
y_pred=np.argmax(predictions,axis=1)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step 


In [45]:
sample_index = 2# Choose a sample index from the test set
predicted_emotion = label_encoder.inverse_transform([y_pred[sample_index]])[0]
print(f"Predicted Emotion: {predicted_emotion}")
recommend_music(predicted_emotion)

Predicted Emotion: surprise
Recommend: Music suitable for the detected emotion.


In [33]:


y_pred

array([1, 4, 6, 4, 5, 6, 4, 1, 4, 5, 4, 4, 5, 4, 4, 3, 3, 4, 2, 0, 2, 1,
       4, 5, 4, 3, 4, 4, 4, 4, 5, 2, 4, 2, 4, 3, 5, 5, 4, 2, 2, 4, 4, 2,
       3, 4, 3, 4, 6, 5, 4, 4, 4, 5, 4, 1, 5, 2, 4, 2, 2, 4, 4, 4, 4, 4,
       1, 5, 4, 1, 4, 4, 2, 5, 3, 5, 4, 3, 4, 5, 4, 2, 0, 6, 0, 4, 4, 3,
       4, 0, 5, 2, 2, 1, 4, 4], dtype=int64)

In [34]:
y_test

array([['disgust'],
       ['sadness'],
       ['sadness'],
       ['neutral'],
       ['sadness'],
       ['fear'],
       ['neutral'],
       ['sadness'],
       ['disgust'],
       ['surprise'],
       ['anger'],
       ['neutral'],
       ['surprise'],
       ['neutral'],
       ['anger'],
       ['disgust'],
       ['anger'],
       ['surprise'],
       ['anger'],
       ['happiness'],
       ['happiness'],
       ['surprise'],
       ['disgust'],
       ['neutral'],
       ['disgust'],
       ['surprise'],
       ['neutral'],
       ['neutral'],
       ['neutral'],
       ['anger'],
       ['sadness'],
       ['anger'],
       ['sadness'],
       ['surprise'],
       ['sadness'],
       ['disgust'],
       ['neutral'],
       ['fear'],
       ['surprise'],
       ['anger'],
       ['neutral'],
       ['happiness'],
       ['disgust'],
       ['happiness'],
       ['anger'],
       ['neutral'],
       ['surprise'],
       ['disgust'],
       ['fear'],
       ['disgust'],
       ['