IMPORTING LIBRARIES

In [1]:
import os
import tensorflow as tf
import numpy as np
import librosa
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
import joblib
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.utils import register_keras_serializable

LOADING MODEL

In [2]:
def extract_features(file_path, n_mfcc=40):
    audio, sr = librosa.load(file_path, sr=None)  # Changed sample_rate to sr
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)  # Corrected method name and added y parameter
    mfcc_processed = np.mean(mfcc.T, axis=0)

    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr)  # Corrected method name
    mel_spec_processed = np.mean(mel_spec.T, axis=0)
    features = np.concatenate([mfcc_processed, mel_spec_processed])
    return features

In [6]:
def predict_emotions(model, file_path, scaler, label_encoder, n_mfcc=40):
    features = extract_features(file_path=file_path, n_mfcc=n_mfcc)
    scaled_features = scaler.transform(features.reshape(1, -1))
    features_reshaped = scaled_features.reshape(1, 1, scaled_features.shape[1])
    predictions = model.predict(features_reshaped)
    top_3_indices = predictions[0].argsort()[-3:][::-1]
    top_3_emotions = label_encoder.inverse_transform(top_3_indices)
    top_3_confidences = predictions[0][top_3_indices]
    print("scaled features shape",scaled_features.shape)
    # Prepare results
    results = {
        'top_emotion': top_3_emotions[0],
        'top_emotion_confidence': float(top_3_confidences[0]),
        'top_3_predictions': [
            {'emotion': emotion, 'confidence': float(confidence)}
            for emotion, confidence in zip(top_3_emotions, top_3_confidences)
        ]
    }
    
    return results

LOADING THE AUDIO FILE

In [14]:

# Main function
def main():
    artifacts_path = 'E:/final year project/model_artifacts'

    class ExpandDimsLayer(tf.keras.layers.Layer):
        def call(self, inputs):
            return tf.expand_dims(inputs, axis=1)

    class AttentionLayer(tf.keras.layers.Layer):
        def __init__(self, **kwargs):
            super(AttentionLayer, self).__init__(**kwargs)

        def build(self, input_shape):
            self.W = self.add_weight(name='att_weight', shape=(input_shape[-1],), initializer='normal', trainable=True)
            self.b = self.add_weight(name='att_bias', shape=(input_shape[-2],), initializer='zeros', trainable=True)
            super(AttentionLayer, self).build(input_shape)

        def call(self, x):
            e = tf.nn.tanh(tf.tensordot(x, self.W, axes=1) + self.b)
            a = tf.nn.softmax(e, axis=1)
            return tf.reduce_sum(x * tf.expand_dims(a, -1), axis=1)

    # Define the custom focal loss function
    @register_keras_serializable()
    def focal_loss(gamma=2.0, alpha=0.25):
        def loss_fn(y_true, y_pred):
            cce = CategoricalCrossentropy()
            cross_entropy = cce(y_true, y_pred)
            pt = tf.exp(-cross_entropy)
            focal_loss_value = alpha * (1 - pt) ** gamma * cross_entropy
            return focal_loss_value
        return loss_fn

    # Load the trained model
    model = tf.keras.models.load_model(
        'E:/final year project/models/model_latest_0.99.keras',
        custom_objects={
            'AttentionLayer': AttentionLayer,
            'ExpandDimsLayer': ExpandDimsLayer,
            'loss_fn': focal_loss(gamma=2.0, alpha=0.25)  # Add the custom loss function here
        },
        safe_mode=False
    )
    print("model input shape", model.input_shape)
    

    # Load scaler and label encoder
    scaler = joblib.load(os.path.join(artifacts_path, 'minmax_scaler.pkl'))
    label_encoder = joblib.load(os.path.join(artifacts_path, 'label_encoder.pkl'))
    print(model.output_shape)
    # Single file prediction
    file_path = r'E:\final year project\dataset\tess\Neutral\OA_bite_neutral_augmented.wav'
    single_prediction = predict_emotions(model, file_path, scaler, label_encoder)
    print("Single File Prediction:", single_prediction)

if __name__ == '__main__':
    main()

model input shape (None, 1, 168, 1)
(None, 7)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 974ms/step
scaled features shape (1, 168)
Single File Prediction: {'top_emotion': 'Neutral', 'top_emotion_confidence': 0.9999837875366211, 'top_3_predictions': [{'emotion': 'Neutral', 'confidence': 0.9999837875366211}, {'emotion': 'Surprised', 'confidence': 8.875104867911432e-06}, {'emotion': 'Sad', 'confidence': 3.5404621030465933e-06}]}
