In [8]:
import cv2
import numpy as np
import time
import os
import tkinter as tk 
from tkinter import filedialog
from collections import deque
from tensorflow.keras.models import load_model 
from tensorflow.keras.applications.resnet_v2 import preprocess_input as preprocess_emotion 
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as preprocess_action 

# --- 1. SETTINGS & COLORS ---
EMOTION_MODEL_PATH = 'Emotions_best2.h5'
ACTION_MODEL_PATH = 'final_sgd_model.h5'

IMG_SIZE_EMOTION = (112, 112)
IMG_SIZE_ACTION = (112, 112)
SEQUENCE_LENGTH = 16 
ACTION_THRESHOLD = 0.50  # Confidence threshold

# UI Colors
COLOR_GREEN  = (0, 255, 0)    
COLOR_CYAN   = (255, 255, 0)  
COLOR_BLUE   = (255, 0, 0)    
COLOR_WHITE  = (255, 255, 255)
COLOR_BLACK  = (0, 0, 0)      

# --- 2. LOAD MODELS & WARM UP ---
print(" Loading Models... Please wait.", flush=True)
emotion_model = load_model(EMOTION_MODEL_PATH)
action_model = load_model(ACTION_MODEL_PATH)
face_classifier = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

print(" Warming up models...", flush=True)
dummy_action = np.zeros((1, 16, 112, 112, 3), dtype='float32')
dummy_emotion = np.zeros((1, 112, 112, 1), dtype='float32')
action_model.predict(dummy_action, verbose=0)
emotion_model.predict(dummy_emotion, verbose=0)
print(" Models Ready!", flush=True)

# Labels
EMOTION_LABELS = ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']
ACTION_LABELS = sorted(['ApplyEyeMakeup', 'ApplyLipstick', 'Archery', 'BabyCrawling', 'BalanceBeam', 'BandMarching', 'BaseballPitch', 'Basketball', 'BasketballDunk', 'BenchPress', 'Biking', 'Billiards', 'BlowDryHair', 'BlowingCandles', 'BodyWeightSquats', 'Bowling', 'BoxingPunchingBag', 'BoxingSpeedBag', 'BreastStroke', 'BrushingTeeth', 'CleanAndJerk', 'CliffDiving', 'CricketBowling', 'CricketShot', 'CuttingInKitchen', 'Diving', 'Drumming', 'Fencing', 'FieldHockeyPenalty', 'FloorGymnastics', 'FrisbeeCatch', 'FrontCrawl', 'GolfSwing', 'Haircut', 'HammerThrow', 'Hammering', 'HandstandPushups', 'HandstandWalking', 'HeadMassage', 'HighJump', 'HorseRace', 'HorseRiding', 'HulaHoop', 'IceDancing', 'JavelinThrow', 'JugglingBalls', 'JumpRope', 'JumpingJack', 'Kayaking', 'Knitting', 'LongJump', 'Lunges', 'MilitaryParade', 'Mixing', 'MoppingFloor', 'Nunchucks', 'ParallelBars', 'PizzaTossing', 'PlayingCello', 'PlayingDaf', 'PlayingDhol', 'PlayingFlute', 'PlayingGuitar', 'PlayingPiano', 'PlayingSitar', 'PlayingTabla', 'PlayingViolin', 'PoleVault', 'PommelHorse', 'PullUps', 'Punch', 'PushUps', 'Rafting', 'RockClimbingIndoor', 'RopeClimbing', 'Rowing', 'SalsaSpin', 'ShavingBeard', 'Shotput', 'SkateBoarding', 'Skiing', 'Skijet', 'SkyDiving', 'SoccerJuggling', 'SoccerPenalty', 'StillRings', 'SumoWrestling', 'Surfing', 'Swing', 'TableTennisShot', 'TaiChi', 'TennisSwing', 'ThrowDiscus', 'TrampolineJumping', 'Typing', 'UnevenBars', 'VolleyballSpiking', 'WalkingWithDog', 'WallPushups', 'WritingOnBoard', 'YoYo'])

# --- 3. INPUT SELECTION ---
print("\n-----------------------------------", flush=True)
print(" SYSTEM INPUT SELECTION", flush=True)
print("-----------------------------------", flush=True)
print("1. Live Webcam", flush=True)
print("2. Video File", flush=True)

choice = input("Enter choice (1 or 2): ")

video_source = 0 # Default to Webcam

if choice == '2':
    print(" Opening file selector...", flush=True)
    root = tk.Tk()
    root.withdraw() 
    file_path = filedialog.askopenfilename(
        title="Select Video File",
        filetypes=[("Video Files", "*.mp4 *.avi *.mov *.mkv")]
    )
    
    if file_path:
        video_source = file_path
        print(f" Selected: {os.path.basename(file_path)}", flush=True)
    else:
        print(" No file selected. Reverting to Webcam.", flush=True)

cap = cv2.VideoCapture(video_source)

# --- 4. MAIN LOOP ---
frames_queue = deque(maxlen=SEQUENCE_LENGTH)
current_action = "Neutral"
current_emotion = "Neutral"

prev_frame_time = 0
frame_count = 0

print("\n System Started. Press 'q' to quit.", flush=True)

while True:
    ret, frame = cap.read()
    
    if not ret:
        if isinstance(video_source, str): 
            print(" Replaying video...", flush=True)
            cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
            continue
        else:
            break
    
    height, width, _ = frame.shape

    # --- ACTION RECOGNITION ---
    resized_frame = cv2.resize(frame, IMG_SIZE_ACTION)
    rgb_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)
    processed_frame = preprocess_action(rgb_frame.astype('float32'))
    frames_queue.append(processed_frame)
    
    if len(frames_queue) == SEQUENCE_LENGTH and frame_count % 4 == 0:
        input_sequence = np.expand_dims(np.array(frames_queue), axis=0)
        preds = action_model.predict(input_sequence, verbose=0)[0]
        
        if np.max(preds) > ACTION_THRESHOLD:
            current_action = ACTION_LABELS[np.argmax(preds)]
        else:
            current_action = "Neutral"

    # --- EMOTION RECOGNITION ---
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_classifier.detectMultiScale(gray, 1.3, 5)

    for (x, y, w, h) in faces:
        cv2.rectangle(frame, (x, y), (x+w, y+h), COLOR_BLUE, 3)
        roi_gray = gray[y:y+h, x:x+w]
        
        try:
            roi_gray = cv2.resize(roi_gray, IMG_SIZE_EMOTION, interpolation=cv2.INTER_AREA)
            if np.sum([roi_gray]) != 0:
                roi = roi_gray.astype('float32')
                roi = np.expand_dims(roi, axis=-1)
                roi = roi / 127.5 - 1.0 
                roi = np.expand_dims(roi, axis=0)
                
                e_preds = emotion_model.predict(roi, verbose=0)[0]
                current_emotion = EMOTION_LABELS[np.argmax(e_preds)]
        except:
            pass

    # --- UI DRAWING ---
    new_frame_time = time.time()
    fps = 1 / (new_frame_time - prev_frame_time)
    prev_frame_time = new_frame_time
    
    cv2.rectangle(frame, (0, 0), (width, 85), COLOR_BLACK, -1)
    
    cv2.putText(frame, f"ACTION: {current_action}", (10, 35), 
                cv2.FONT_HERSHEY_SIMPLEX, 1, COLOR_GREEN, 2)
    cv2.putText(frame, f"EMOTION: {current_emotion}", (10, 75), 
                cv2.FONT_HERSHEY_SIMPLEX, 1, COLOR_CYAN, 2)
    """
    fps_text = f"FPS: {int(fps)}"
    (text_w, text_h), _ = cv2.getTextSize(fps_text, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)
    cv2.putText(frame, fps_text, (width - text_w - 20, 50), 
                cv2.FONT_HERSHEY_SIMPLEX, 1, COLOR_WHITE, 2)
"""
    cv2.imshow('Integrated model', frame)
    
    frame_count += 1
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

 Loading Models... Please wait.




 Warming up models...




 Models Ready!

-----------------------------------
 SYSTEM INPUT SELECTION
-----------------------------------
1. Live Webcam
2. Video File

 System Started. Press 'q' to quit.
