In [19]:
import cv2
import mediapipe as mp #face detector
import math
import numpy as np
import time
import os

import warnings
warnings.simplefilter("ignore", UserWarning)

# torch
import torch
from PIL import Image
from torchvision import transforms

In [20]:
def pth_processing(fp):
    class PreprocessInput(torch.nn.Module):
        def init(self):
            super(PreprocessInput, self).init()

        def forward(self, x):
            x = x.to(torch.float32)
            x = torch.flip(x, dims=(0,))
            x[0, :, :] -= 91.4953
            x[1, :, :] -= 103.8827
            x[2, :, :] -= 131.0912
            return x

    def get_img_torch(img):
        ttransform = transforms.Compose([
            transforms.PILToTensor(),
            PreprocessInput()
        ])
        img = img.resize((224, 224), Image.Resampling.NEAREST)
        img = ttransform(img)
        img = torch.unsqueeze(img, 0)
        return img
    return get_img_torch(fp)

def norm_coordinates(normalized_x, normalized_y, image_width, image_height):
    x_px = min(math.floor(normalized_x * image_width), image_width - 1)
    y_px = min(math.floor(normalized_y * image_height), image_height - 1)
    return x_px, y_px

def get_box(fl, w, h):
    idx_to_coors = {}
    for idx, landmark in enumerate(fl.landmark):
        landmark_px = norm_coordinates(landmark.x, landmark.y, w, h)
        if landmark_px:
            idx_to_coors[idx] = landmark_px

    x_min = np.min(np.asarray(list(idx_to_coors.values()))[:,0])
    y_min = np.min(np.asarray(list(idx_to_coors.values()))[:,1])
    endX = np.max(np.asarray(list(idx_to_coors.values()))[:,0])
    endY = np.max(np.asarray(list(idx_to_coors.values()))[:,1])

    (startX, startY) = (max(0, x_min), max(0, y_min))
    (endX, endY) = (min(w - 1, endX), min(h - 1, endY))
    
    return startX, startY, endX, endY

def display_EMO_PRED(img, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255), line_width=2):
    lw = line_width or max(round(sum(img.shape) / 2 * 0.003), 2)
    text2_color = (255, 0, 255)
    p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
    cv2.rectangle(img, p1, p2, text2_color, thickness=lw, lineType=cv2.LINE_AA)
    font = cv2.FONT_HERSHEY_SIMPLEX

    tf = max(lw - 1, 1)
    text_fond = (0, 0, 0)
    text_width_2, text_height_2 = cv2.getTextSize(label, font, lw / 3, tf)
    text_width_2 = text_width_2[0] + round(((p2[0] - p1[0]) * 10) / 360)
    center_face = p1[0] + round((p2[0] - p1[0]) / 2)

    cv2.putText(img, label,
                (center_face - round(text_width_2 / 2), p1[1] - round(((p2[0] - p1[0]) * 20) / 360)), font,
                lw / 3, text_fond, thickness=tf, lineType=cv2.LINE_AA)
    cv2.putText(img, label,
                (center_face - round(text_width_2 / 2), p1[1] - round(((p2[0] - p1[0]) * 20) / 360)), font,
                lw / 3, text2_color, thickness=tf, lineType=cv2.LINE_AA)
    return img

def display_FPS(img, text, margin=1.0, box_scale=1.0):
    img_h, img_w, _ = img.shape
    line_width = int(min(img_h, img_w) * 0.001)
    thickness = max(int(line_width / 3), 1)

    font_face = cv2.FONT_HERSHEY_SIMPLEX
    font_color = (0, 0, 0)
    font_scale = thickness / 1.5

    t_w, t_h = cv2.getTextSize(text, font_face, font_scale, None)[0]

    margin_n = int(t_h * margin)
    sub_img = img[0 + margin_n: 0 + margin_n + t_h + int(2 * t_h * box_scale),
              img_w - t_w - margin_n - int(2 * t_h * box_scale): img_w - margin_n]

    white_rect = np.ones(sub_img.shape, dtype=np.uint8) * 255

    img[0 + margin_n: 0 + margin_n + t_h + int(2 * t_h * box_scale),
    img_w - t_w - margin_n - int(2 * t_h * box_scale):img_w - margin_n] = cv2.addWeighted(sub_img, 0.5, white_rect, .5, 1.0)

    cv2.putText(img=img,
                text=text,
                org=(img_w - t_w - margin_n - int(2 * t_h * box_scale) // 2,
                     0 + margin_n + t_h + int(2 * t_h * box_scale) // 2),
                fontFace=font_face,
                fontScale=font_scale,
                color=font_color,
                thickness=thickness,
                lineType=cv2.LINE_AA,
                bottomLeftOrigin=False)

    return img

In [24]:
import cv2
import numpy as np
import torch
import time
import mediapipe as mp
from PIL import Image
from collections import defaultdict

def calculate_emotion_percentages(emotion_counts):
    """
    Calculate percentage of each emotion detected.
    """
    total_detections = sum(emotion_counts.values())
    if total_detections == 0:
        return {}
    
    emotion_percentages = {
        emotion: round((count / total_detections) * 100, 2) 
        for emotion, count in emotion_counts.items()
    }
    return emotion_percentages

def classify_interest_level(emotion_percentages):
    """
    Classify interest level based on emotion percentages.
    """
    # Define weights for different emotions
    interest_weights = {
        'Neutral': 0.5,
        'Happiness': 1.0,
        'Surprise': 0.7,
        'Sadness': -0.8,
        'Fear': -1.0,
        'Disgust': -1.0,
        'Anger': -0.9
    }
    
    # Calculate weighted interest score
    interest_score = sum(
        interest_weights.get(emotion, 0) * percentage 
        for emotion, percentage in emotion_percentages.items()
    )
    
    # Classify interest level with more detailed descriptions
    if interest_score > 0.5:
        return "Highly Engaged", "Showing high enthusiasm and  positive emotional response", (0, 255, 0)
    elif interest_score > 0:
        return "Moderately Engaged", "Displaying mild interest and positive signals", (0, 200, 255)
    elif interest_score == 0:
        return "Neutral", "Maintaining a balanced, non-committal emotional state", (128, 128, 128)
    elif interest_score > -0.5:
        return "Slightly Disengaged", "Exhibiting reduced interest and potential discomfort", (255, 165, 0)
    else:
        return "Highly Disengaged", "Showing strong signs of low interest or negative emotions", (0, 0, 255)

def create_emotion_bar(emotion_percentages, width=300, height=200):
    """
    Create a bar graph of emotion percentages.
    """
    # Create a white background
    bar_graph = np.ones((height, width, 3), dtype=np.uint8) * 255
    
    # Define colors for emotions
    emotion_colors = {
        'Neutral': (128, 128, 128),
        'Happiness': (0, 255, 0),
        'Sadness': (255, 0, 0),
        'Surprise': (255, 165, 0),
        'Fear': (128, 0, 128),
        'Disgust': (75, 0, 130),
        'Anger': (255, 0, 255)
    }
    
    # Calculate bar heights
    max_percentage = max(emotion_percentages.values()) if emotion_percentages else 100
    bar_width = width // len(emotion_percentages)
    
    # Draw bars
    for i, (emotion, percentage) in enumerate(emotion_percentages.items()):
        bar_height = int((percentage / max_percentage) * (height - 50))
        color = emotion_colors.get(emotion, (200, 200, 200))
        
        # Draw bar
        start_x = i * bar_width
        cv2.rectangle(bar_graph, 
                      (start_x, height - bar_height), 
                      (start_x + bar_width - 5, height), 
                      color, -1)
        
        # Add text
        cv2.putText(bar_graph, 
                    f'{emotion}\n{percentage}%', 
                    (start_x, height - bar_height - 10), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0), 1)
    
    return bar_graph

def main():
    # Previous initialization code remains the same
    mp_face_mesh = mp.solutions.face_mesh
    name_backbone_model = '0_66_49_wo_gl'
    name_LSTM_model = 'IEMOCAP'
    
    # Load models (previous code)
    pth_backbone_model = torch.jit.load(r'C:\Users\prajw\Documents\Codefinalyearproject\Final_code_large\EMO-AffectNetModel\tf\torchscript_model_0_66_37_wo_gl.pth')
    pth_backbone_model.eval()
    
    pth_LSTM_model = torch.jit.load(r'C:\Users\prajw\Documents\Codefinalyearproject\Final_code_large\EMO-AffectNetModel\tf\Aff-Wild2.pth')
    pth_LSTM_model.eval()
    
    DICT_EMO = {0: 'Neutral', 1: 'Happiness', 2: 'Sadness', 3: 'Surprise', 4: 'Fear', 5: 'Disgust', 6: 'Anger'}
    
    # Initialize emotion tracking
    emotion_counts = defaultdict(int)
    
    # Increase frame size
    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = np.round(cap.get(cv2.CAP_PROP_FPS))
    
    path_save_video = 'result_multi.mp4'
    vid_writer = cv2.VideoWriter(path_save_video, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
    
    # Dictionary to store LSTM features for each face
    face_features = {}
    
    with mp_face_mesh.FaceMesh(
        max_num_faces=20,
        refine_landmarks=False,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5
    ) as face_mesh:
        while cap.isOpened():
            t1 = time.time()
            success, frame = cap.read()
            if frame is None:
                break
            
            # Create a semi-transparent overlay for better readability
            overlay = frame.copy()
            cv2.rectangle(overlay, (0, 0), (400, h), (200, 200, 200), -1)
            cv2.addWeighted(overlay, 0.5, frame, 0.5, 0, frame)
            
            frame_copy = frame.copy()
            frame_copy.flags.writeable = False
            frame_copy = cv2.cvtColor(frame_copy, cv2.COLOR_BGR2RGB)
            results = face_mesh.process(frame_copy)
            frame_copy.flags.writeable = True
            
            if results.multi_face_landmarks:
                # Process each detected face
                for face_idx, fl in enumerate(results.multi_face_landmarks):
                    startX, startY, endX, endY = get_box(fl, w, h)
                    cur_face = frame_copy[startY:endY, startX:endX]
                    
                    # Process face with the model
                    cur_face = pth_processing(Image.fromarray(cur_face))
                    features = torch.nn.functional.relu(pth_backbone_model.extract_features(cur_face)).cpu().detach().numpy()
                    
                    # Initialize or update LSTM features for this face
                    if face_idx not in face_features:
                        face_features[face_idx] = [features] * 10
                    else:
                        face_features[face_idx] = face_features[face_idx][1:] + [features]
                    
                    # Process with LSTM
                    lstm_f = torch.from_numpy(np.vstack(face_features[face_idx]))
                    lstm_f = torch.unsqueeze(lstm_f, 0)
                    output = pth_LSTM_model(lstm_f).cpu().detach().numpy()
                    
                    cl = np.argmax(output)
                    label = DICT_EMO[cl]
                    
                    # Track emotion counts
                    emotion_counts[label] += 1
                    
                    # Display emotion prediction on frame with improved bounding box
                    cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 3)
                    cv2.putText(frame, label, (startX, startY-10), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
            
            # Calculate emotion percentages
            emotion_percentages = calculate_emotion_percentages(emotion_counts)
            
            # Classify interest level
            interest_level, explanation, color = classify_interest_level(emotion_percentages)
            
            # Create emotion bar graph
            emotion_bar = create_emotion_bar(emotion_percentages)
            
            # Resize and place emotion bar on the frame
            emotion_bar_resized = cv2.resize(emotion_bar, (400, 200))
            frame[h-250:h-50, 0:400] = emotion_bar_resized
            
            # Display interest level with enhanced styling
            cv2.rectangle(frame, (0, 0), (400, 50), color, -1)
            cv2.putText(frame, f"{interest_level}", (10, 35), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
            
            # Display explanation
            cv2.putText(frame, explanation, (10, 80), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1)
            
            t2 = time.time()
            # FPS display
            cv2.putText(frame, f'FPS: {1 / (t2 - t1):.1f}', (w-150, 50), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            
            vid_writer.write(frame)
            cv2.imshow('Emotion Detection', frame)
            
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        
        # Print final emotion percentages and interest level
        print("\nFinal Emotion Percentages:")
        for emotion, percentage in emotion_percentages.items():
            print(f"{emotion}: {percentage}%")
        
        print(f"\nOverall Interest Level: {interest_level}")
        print(f"Explanation: {explanation}")
        
        vid_writer.release()
        cap.release()
        cv2.destroyAllWindows()s

if __name__ == "__main__":
    main()


Final Emotion Percentages:
Anger: 39.75%
Neutral: 29.61%
Sadness: 1.42%
Happiness: 21.91%
Fear: 0.97%
Surprise: 3.43%
Disgust: 2.91%

Overall Interest Level: Highly Disengaged
Explanation: Showing strong signs of low interest or negative emotions
