In [1]:
import numpy as np
import librosa

def extract_mfcc(file_path, n_mfcc=13, sr=16000):
    try:
        audio, _ = librosa.load(file_path, sr=sr)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
        mfccs_mean = np.mean(mfccs.T, axis=0)
        return mfccs_mean
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None


In [4]:
import numpy as np
import os
import librosa
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Function to extract MFCC features
def extract_mfcc(file_path, n_mfcc=13, sr=16000):
    try:
        audio, _ = librosa.load(file_path, sr=sr)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
        mfccs_mean = np.mean(mfccs.T, axis=0)
        return mfccs_mean
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

# Directory containing the audio files
audio_dir = 'D:\\ai ml prac\\actual\\data set\\processed audio'

X = []
y_gender = []
y_emotion = []

# Loop through each actor directory
for actor in os.listdir(audio_dir):
    actor_dir = os.path.join(audio_dir, actor)
    if os.path.isdir(actor_dir):
        # Extract the actor number from the directory name
        try:
            actor_number = int(actor.split('_')[1])
            gender = 'male' if actor_number % 2 != 0 else 'female'  # Odd-numbered actors are male, even-numbered are female
        except ValueError:
            print(f"Skipping directory with invalid format: {actor}")
            continue
        
        for file_name in os.listdir(actor_dir):
            if file_name.endswith('.wav'):
                file_path = os.path.join(actor_dir, file_name)
                features = extract_mfcc(file_path)
                if features is not None:
                    X.append(features)
                    y_gender.append(gender)
                    # Assume emotion labels are embedded in file names, e.g., "happy" or "sad"
                    emotion = file_name.split('_')[0]  # Adjust according to your naming convention
                    y_emotion.append(emotion)

# Convert lists to NumPy arrays
X = np.array(X)
y_gender = np.array(y_gender)
y_emotion = np.array(y_emotion)

# Encode labels
le_gender = LabelEncoder()
le_emotion = LabelEncoder()
y_gender_encoded = le_gender.fit_transform(y_gender)
y_emotion_encoded = le_emotion.fit_transform(y_emotion)

# Split data into training and testing sets
X_train, X_test, y_gender_train, y_gender_test, y_emotion_train, y_emotion_test = train_test_split(
    X, y_gender_encoded, y_emotion_encoded, test_size=0.2, random_state=42
)


In [6]:
# Evaluate the model
evaluation_results = model.evaluate(
    X_test, {'gender': y_gender_test, 'emotion': y_emotion_test}
)

# Print evaluation results
print("Evaluation Results:", evaluation_results)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - emotion_accuracy: 0.0000e+00 - gender_accuracy: 0.9358 - loss: 18.1175 
Evaluation Results: [18.108379364013672, 0.0, 0.9375]


In [8]:
# Evaluate the model
evaluation_results = model.evaluate(
    X_test, {'gender': y_gender_test, 'emotion': y_emotion_test}
)

# Print evaluation results
print("Evaluation Results:")
print(f"Test Loss: {evaluation_results[0]}")
print(f"Gender Loss: {evaluation_results[1]}")
print(f"Emotion Loss: {evaluation_results[2]}")

# If accuracy metrics are not returned, you might need to add them to your model and re-evaluate


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - emotion_accuracy: 0.0000e+00 - gender_accuracy: 0.9358 - loss: 18.1175 
Evaluation Results:
Test Loss: 18.108379364013672
Gender Loss: 0.0
Emotion Loss: 0.9375


In [10]:
model.compile(
    optimizer='adam',
    loss={'gender': 'sparse_categorical_crossentropy', 'emotion': 'sparse_categorical_crossentropy'},
    metrics={'gender': 'accuracy', 'emotion': 'accuracy'}
)


In [13]:
# Evaluate the model
evaluation_results = model.evaluate(
    X_test, {'gender': y_gender_test, 'emotion': y_emotion_test}
)

# Print evaluation results
print("Evaluation Results:")
print(f"Test Loss: {evaluation_results[0]}")
print(f"Gender Loss: {evaluation_results[1]}")
print(f"Emotion Loss: {evaluation_results[2]}")

# Optionally, check if the evaluation_results length matches the expected metrics
if len(evaluation_results) >= 4:
    print(f"Gender Accuracy: {evaluation_results[3]}")
if len(evaluation_results) >= 5:
    print(f"Emotion Accuracy: {evaluation_results[4]}")
else:
    print("Accuracy metrics are not available.")


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - emotion_accuracy: 0.0000e+00 - gender_accuracy: 0.9358 - loss: 18.1175 
Evaluation Results:
Test Loss: 18.108379364013672
Gender Loss: 0.0
Emotion Loss: 0.9375
Accuracy metrics are not available.


In [14]:
model.compile(
    optimizer='adam',
    loss={'gender': 'sparse_categorical_crossentropy', 'emotion': 'sparse_categorical_crossentropy'},
    metrics={'gender': 'accuracy', 'emotion': 'accuracy'}
)


In [15]:
history = model.fit(
    X_train, {'gender': y_gender_train, 'emotion': y_emotion_train},
    epochs=10,
    batch_size=32,
    validation_split=0.2
)


Epoch 1/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - emotion_accuracy: 0.0012 - gender_accuracy: 0.8988 - loss: 6.3168 - val_emotion_accuracy: 0.0000e+00 - val_gender_accuracy: 0.9134 - val_loss: 12.4517
Epoch 2/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - emotion_accuracy: 0.0194 - gender_accuracy: 0.9295 - loss: 6.0683 - val_emotion_accuracy: 0.0000e+00 - val_gender_accuracy: 0.9351 - val_loss: 18.8242
Epoch 3/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - emotion_accuracy: 0.0285 - gender_accuracy: 0.9421 - loss: 5.7716 - val_emotion_accuracy: 0.0000e+00 - val_gender_accuracy: 0.9048 - val_loss: 18.9595
Epoch 4/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - emotion_accuracy: 0.0426 - gender_accuracy: 0.9434 - loss: 5.5317 - val_emotion_accuracy: 0.0000e+00 - val_gender_accuracy: 0.9481 - val_loss: 20.9841
Epoch 5/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━

In [16]:
evaluation_results = model.evaluate(
    X_test, {'gender': y_gender_test, 'emotion': y_emotion_test}
)

print("Evaluation Results:")
print(f"Test Loss: {evaluation_results[0]}")
print(f"Gender Loss: {evaluation_results[1]}")
print(f"Emotion Loss: {evaluation_results[2]}")

if len(evaluation_results) >= 4:
    print(f"Gender Accuracy: {evaluation_results[3]}")
if len(evaluation_results) >= 5:
    print(f"Emotion Accuracy: {evaluation_results[4]}")
else:
    print("Accuracy metrics are not available.")


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - emotion_accuracy: 0.0000e+00 - gender_accuracy: 0.9200 - loss: 30.9143 
Evaluation Results:
Test Loss: 30.90627098083496
Gender Loss: 0.0
Emotion Loss: 0.9305555820465088
Accuracy metrics are not available.


In [17]:
model.summary()


In [18]:
from tensorflow.keras import metrics

model.compile(
    optimizer='adam',
    loss={'gender': 'sparse_categorical_crossentropy', 'emotion': 'sparse_categorical_crossentropy'},
    metrics={'gender': metrics.SparseCategoricalAccuracy(), 'emotion': metrics.SparseCategoricalAccuracy()}
)


In [19]:
model.save('gender_emotion_classifier_model.h5')




In [20]:
from tensorflow.keras.models import load_model
import numpy as np
import librosa

# Load the trained model
model = load_model('gender_emotion_classifier_model.h5')

def extract_mfcc(file_path, n_mfcc=13, sr=16000):
    try:
        audio, sr = librosa.load(file_path, sr=sr)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
        mfccs_mean = np.mean(mfccs.T, axis=0)
        return mfccs_mean
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

def predict_gender_emotion(audio_file):
    mfccs = extract_mfcc(audio_file)
    if mfccs is not None:
        prediction = model.predict(np.expand_dims(mfccs, axis=0))
        gender_pred = np.argmax(prediction[0])
        emotion_pred = np.argmax(prediction[1])
        return gender_pred, emotion_pred
    return None, None


