In [1]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('best_model.keras')

# Emotion labels
emotion_labels = ['neutral', 'calm', 'happy', 'sad', 'angry', 'fearful', 'disgust', 'surprised']

# Feature extraction for video frames
def extract_video_features(frames, sequence_length=20):
    # Resize frames to 112x112 to match the model's expected input
    frames = np.array([cv2.resize(frame, (112, 112)) for frame in frames])
    
    # Ensure all sequences have the same length
    if len(frames) > sequence_length:
        frames = frames[:sequence_length]
    elif len(frames) < sequence_length:
        padding = np.zeros((sequence_length - len(frames), 112, 112, 3))
        frames = np.concatenate((frames, padding))
    
    return frames.reshape(-1, sequence_length, 112, 112, 3)

# Function to play video and predict emotion once for the entire video
def play_video_and_predict_final_emotion(video_file):
    cap = cv2.VideoCapture(video_file)
    frames = []
    SEQUENCE_LENGTH = 20  # Number of frames to use for each prediction

    # Collect all frames from the video
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)

    cap.release()

    # Make a single prediction for the entire video
    if len(frames) > 0:
        video_features = extract_video_features(frames, sequence_length=SEQUENCE_LENGTH)
        audio_features = np.zeros((1, 40, 44, 1))  # Placeholder for audio features; adjust as needed
        
        # Predict emotion for the entire video
        prediction = model.predict([audio_features, video_features])
        predicted_emotion = emotion_labels[np.argmax(prediction)]

        # Display the video with the predicted emotion
        for frame in frames:
            height, width, _ = frame.shape
            cv2.rectangle(frame, (10, height - 40), (width - 10, height - 10), (255, 255, 255), cv2.FILLED)
            cv2.putText(frame, f'Predicted Emotion: {predicted_emotion}', (20, height - 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

            # Show frame
            cv2.imshow('Emotion Detection', frame)
            if cv2.waitKey(25) & 0xFF == ord('q'):
                break

    cv2.destroyAllWindows()

# Example: Play a video and show the final predicted emotion
play_video_and_predict_final_emotion('dataset/videos/Actor_16/02-02-06-01-01-01-16.mp4')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step
