In [7]:
import tensorflow as tf
import numpy as np

# Load the trained model
model_path = "D:/AI_interview/audio_confidence/AI_audio2.h5"
model = tf.keras.models.load_model(model_path)

# Define emotion classes
emotion_classes = ["happy", "angry", "sad", "neutral", "calm", "fear", "disgust", "surprise"]

# Load and preprocess the audio file
audio_path = "D:/AI_interview/recorded/answer_4.wav"
# You may need to install a library like librosa for audio processing
# pip install librosa
import librosa

# Load the audio file and extract features
def extract_features(audio_path):
    audio, sr = librosa.load(audio_path, sr=None)
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)

    # Pad or truncate the features to match the expected input shape (162 frames)
    if mfccs.shape[1] < 162:
        mfccs = np.pad(mfccs, ((0, 0), (0, 162 - mfccs.shape[1])))
    else:
        mfccs = mfccs[:, :162]

    return mfccs

# Preprocess the audio for model prediction
input_data = extract_features(audio_path)
input_data = np.expand_dims(input_data, axis=-1)  # Add channel dimension

# Make predictions
predictions = model.predict(input_data)

# Map predictions to emotion classes
predicted_class_index = np.argmax(predictions)
predicted_emotion = emotion_classes[predicted_class_index]

# Score the prediction
emotion_score = 0
confidence_score = 0

if predicted_emotion in ["happy", "calm", "neutral"]:
    emotion_score = 100
    confidence_score = 100
elif predicted_emotion in ["sad", "fear", "surprise"]:
    emotion_score = 60
else:  # ["disgust", "angry"]
    emotion_score = 40

# Print the results
print(f"Predicted Emotion: {predicted_emotion}")
print(f"Emotion Score: {emotion_score}")
print(f"Confidence Score: {confidence_score}")


Predicted Emotion: happy
Emotion Score: 100
Confidence Score: 100
