In [None]:
import cv2
import dlib
import numpy as np
from scipy.ndimage import zoom
from time import time
import mediapipe as mp
import csv
import sys

from tensorflow.keras.models import load_model
from imutils import face_utils
from threading import Thread
from multiprocessing import Process

def process_video(frame, output_file):
    shape_x = 48
    shape_y = 48

    def eye_aspect_ratio(eye):
        A = distance.euclidean(eye[1], eye[5])
        B = distance.euclidean(eye[2], eye[4])
        C = distance.euclidean(eye[0], eye[3])
        ear = (A + B) / (2.0 * C)
        return ear

    def detect_face(frame):
        # Cascade classifier pre-trained model
        cascPath = '/Users/prithvika/Downloads/face_landmarks.dat'
        faceCascade = cv2.CascadeClassifier(cascPath)

        # BGR -> Gray conversion
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Cascade MultiScale classifier
        detected_faces = faceCascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=6,
                                                      minSize=(shape_x, shape_y),
                                                      flags=cv2.CASCADE_SCALE_IMAGE)
        coord = []

        for x, y, w, h in detected_faces:
            if w > 100:
                sub_img = frame[y:y + h, x:x + w]
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 255), 1)
                coord.append([x, y, w, h])

        return gray, detected_faces, coord

    def extract_face_features(faces, offset_coefficients=(0.075, 0.05)):
        gray = faces[0]
        detected_face = faces[1]

        new_face = []

        for det in detected_face:
            # Regions of the face are detected
            x, y, w, h = det
            # a and y correspond to the gray conversion wheras w corresponds to the height

            # Offset coefficient, np.floor takes the lowest integer (delete border of the image)
            horizontal_offset = np.int(np.floor(offset_coefficients[0] * w))
            vertical_offset = np.int(np.floor(offset_coefficients[1] * h))

            # gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            # image is transformed to gray
            extracted_face = gray[y + vertical_offset:y + h, x + horizontal_offset:x - horizontal_offset + w]

            # Zoom on the extracted face
            new_extracted_face = zoom(extracted_face, (shape_x / extracted_face.shape[0], shape_y / extracted_face.shape[1]))
            # cast type float
            new_extracted_face = new_extracted_face.astype(np.float32)
            # scale
            new_extracted_face /= float(new_extracted_face.max())
            # print(new_extracted_face)

            new_face.append(new_extracted_face)

        return new_face

    #using the cascade model, the facial features are recognized
    (lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
    (rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]

    (nStart, nEnd) = face_utils.FACIAL_LANDMARKS_IDXS["nose"]
    (mStart, mEnd) = face_utils.FACIAL_LANDMARKS_IDXS["mouth"]
    (jStart, jEnd) = face_utils.FACIAL_LANDMARKS_IDXS["jaw"]

    (eblStart, eblEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eyebrow"]
    (ebrStart, ebrEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eyebrow"]

    # keras model is loaded, this model is for videos
    model = load_model('/Users/prithvika/Downloads/video.h5')
    face_detect = dlib.get_frontal_face_detector()
    predictor_landmarks = dlib.shape_predictor("/Users/prithvika/Downloads/face_landmarks.dat")

    # input the captured video
    video_capture = cv2.VideoCapture(frame)
    out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'mp4v'), 20.0, (640, 480))

    start_time = time()
    emotion_start_time = time()

    emotion_durations = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0}
    max_emotion_duration = 0
    max_emotion = None

    while True:
        # Capture frame-by-frame
        ret, frame = video_capture.read()
        if not ret:
            # Break the loop if the video is finished
            break

        face_index = 0

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        rects = face_detect(gray, 1)

        for (i, rect) in enumerate(rects):
            shape = predictor_landmarks(gray, rect)
            shape = face_utils.shape_to_np(shape)

            # Identify face coordinates
            (x, y, w, h) = face_utils.rect_to_bb(rect)
            face = gray[y:y + h, x:x + w]

            # Zoom on extracted face
            face = zoom(face, (shape_x / face.shape[0], shape_y / face.shape[1]))

            # Cast type float
            face = face.astype(np.float32)

            # Scale
            face /= float(face.max())
            face = np.reshape(face.flatten(), (1, 48, 48, 1))

            # Make Prediction
            prediction = model.predict(face)
            prediction_result = np.argmax(prediction)

            # Rectangle around the face
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

            cv2.putText(frame, "Face #{}".format(i + 1), (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        (0, 255, 0), 2)

            for (j, k) in shape:
                cv2.circle(frame, (j, k), 1, (0, 0, 255), -1)

            # 1. Add prediction probabilities
            cv2.putText(frame, "----------------", (40, 100 + 180 * i), cv2.FONT_HERSHEY_SIMPLEX, 0.5, 155, 0)
            cv2.putText(frame, "Emotional report : Face #" + str(i + 1), (40, 120 + 180 * i),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, 155, 0)
            cv2.putText(frame, "Angry : " + str(round(prediction[0][0], 3)), (40, 140 + 180 * i),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, 155, 0)
            cv2.putText(frame, "Disgust : " + str(round(prediction[0][1], 3)), (40, 160 + 180 * i),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, 155, 0)
            cv2.putText(frame, "Fear : " + str(round(prediction[0][2], 3)), (40, 180 + 180 * i),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, 155, 1)
            cv2.putText(frame, "Happy : " + str(round(prediction[0][3], 3)), (40, 200 + 180 * i),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, 155, 1)
            cv2.putText(frame, "Sad : " + str(round(prediction[0][4], 3)), (40, 220 + 180 * i),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, 155, 1)
            cv2.putText(frame, "Surprise : " + str(round(prediction[0][5], 3)), (40, 240 + 180 * i),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, 155, 1)
            cv2.putText(frame, "Neutral : " + str(round(prediction[0][6], 3)), (40, 260 + 180 * i),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, 155, 1)

            # Calculate and print duration of each emotion
            current_time = time()
            emotion_duration = current_time - emotion_start_time
#             print(f"Duration of Emotion #{prediction_result + 1}: {emotion_duration} seconds")

            # Update emotion start time
            emotion_start_time = current_time

            # Add emotion duration to the cumulative total
            emotion_durations[prediction_result] += emotion_duration

            # 2. Annotate main image with a label
            if prediction_result == 0:
                cv2.putText(frame, "Angry", (x + w - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            elif prediction_result == 1:
                cv2.putText(frame, "Confusion", (x + w - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            elif prediction_result == 2:
                cv2.putText(frame, "Fear", (x + w - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            elif prediction_result == 3:
                cv2.putText(frame, "Happy", (x + w - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            elif prediction_result == 4:
                cv2.putText(frame, "Sad", (x + w - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            elif prediction_result == 5:
                cv2.putText(frame, "Surprise", (x + w - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            else:
                cv2.putText(frame, "Neutral", (x + w - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        cv2.putText(frame, 'Number of Faces : ' + str(len(rects)), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, 155, 1)
        cv2.imshow('Video', frame)
        out.write(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Calculate and print total duration of the video
    total_duration = time() - start_time
    print(f"Total Duration of the Video: {total_duration} seconds")

    print(f"Output for Emotion-Detection")
    # Print cumulative duration of each emotion
    for emotion, duration in emotion_durations.items():
#         print(f"Duration of Emotion #{emotion + 1} (seconds): {duration}")
        if (emotion == 0):
            print(f"Duration of Anger is (seconds): {duration}")
        elif (emotion == 1):
            print(f"Duration of Confusion is (seconds): {duration}")
        elif (emotion == 2):
            print(f"Duration of Fear is (seconds): {duration}")
        elif (emotion == 3):
            print(f"Duration of Happiness is (seconds): {duration}")
        elif (emotion == 4):
            print(f"Duration of Sadness is (seconds): {duration}")
        elif (emotion == 5):
            print(f"Duration of Surprise is (seconds): {duration}")
        elif (emotion == 6):
            print(f"Duration of Neutral is (seconds): {duration}")
        else:
            print("")

    # Find emotion with the maximum duration
    max_emotion = max(emotion_durations, key=emotion_durations.get)
    if (max_emotion == 0):
        print(f"The emotion that was observed most is: Anger")
    elif (max_emotion == 1):
        print(f"The emotion that was observed most is: Confusion")
    elif (max_emotion == 2):
        print(f"The emotion that was observed most is: Fear")
    elif (max_emotion == 3):
        print(f"The emotion that was observed most is: Happiness")
    elif (max_emotion == 4):
        print(f"The emotion that was observed most is: Sadness")
    elif (max_emotion == 5):
        print(f"The emotion that was observed most is: Surprise")
    elif (max_emotion == 6):
        print(f"The emotion that was observed most is: Neutral")
    else:
        print("")
#     print(f"The emotion that was observed most is: Emotion #{max_emotion + 1}")

    # When everything is done, release the capture
    video_capture.release()
    out.release()
    cv2.destroyAllWindows()
    
def face_position(input_file):
    #----------Nour's code-----------  
    
    official_start_time = time()
    start_time = time()
    end_time = 0

 # Variables to track time spent in different head pose directions
    time_forward_seconds = 0
    time_left_seconds = 0
    time_right_seconds = 0
    time_up_seconds = 0
    time_down_seconds = 0
    
    
    mp_face_mesh = mp.solutions.face_mesh
    face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Specify the path to your video file
    #video_path = input_file

    cap = cv2.VideoCapture(input_file)

    while cap.isOpened():
        success, image = cap.read()

        if not success:
            print("Failed to read frame")
            break

    # Flip the image horizontally for a later selfie-view display
    # Also convert the color space from BGR to RGB
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)

    # To improve performance
        image.flags.writeable = False
    
    # Get the result
        results = face_mesh.process(image)
    
    # To improve performance
        image.flags.writeable = True
    
    # Convert the color space from RGB to BGR
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        img_h, img_w, img_c = image.shape
        face_3d = []
        face_2d = []

        if results.multi_face_landmarks:
            for face_landmarks in results.multi_face_landmarks:
                for idx, lm in enumerate(face_landmarks.landmark):
                    if idx == 33 or idx == 263 or idx == 1 or idx == 61 or idx == 291 or idx == 199:
                        if idx == 1:
                            nose_2d = (lm.x * img_w, lm.y * img_h)
                            nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 8000)

                        x, y = int(lm.x * img_w), int(lm.y * img_h)
                        
                        face_2d.append([x, y])
                        face_3d.append([x, y, lm.z])       
            
                face_2d = np.array(face_2d, dtype=np.float64)
                face_3d = np.array(face_3d, dtype=np.float64)

            # The camera matrix
                focal_length = 1 * img_w

                cam_matrix = np.array([ [focal_length, 0, img_h / 2],
                                        [0, focal_length, img_w / 2],
                                        [0, 0, 1]])

            # The Distance Matrix
                dist_matrix = np.zeros((4, 1), dtype=np.float64)

            # Solve PnP
                success, rot_vec, trans_vec = cv2.solvePnP(face_3d, face_2d, cam_matrix, dist_matrix)

            # Get rotational matrix
                rmat, jac = cv2.Rodrigues(rot_vec)

            # Get angles
                angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat)

            # Print the rotation angles for debugging
                #print(f"X Rotation: {angles[0]*10000}")
                #print(f"Y Rotation: {angles[1]*10000}")

            # See where the user's head is tilting
                if angles[1]*10000 < -200:
                    text = "Looking Left"
                    time_left_seconds += 1 / fps
#                     time_left_seconds += time.time() - start_time
#                     start_time = time.time()
                elif angles[1]*10000 > 200:
                    text = "Looking Right"
                    time_right_seconds += 1 / fps
                elif angles[0]*10000 < -150:
                    text = "Looking Down"
                    time_down_seconds += 1 / fps
                elif angles[0]*10000 > 350:
                    text="Looking Up"
                    time_up_seconds += 1 / fps
                else:
                    text = "Forward"
                    time_forward_seconds += 1 / fps

            # Display the nose direction
                nose_3d_projection, jacobian = cv2.projectPoints(nose_3d, rot_vec, trans_vec, cam_matrix, dist_matrix)

                p1 = (int(nose_2d[0]), int(nose_2d[1]))
                p2 = (int(nose_3d_projection[0][0][0]), int(nose_3d_projection[0][0][1]))
            
                cv2.line(image, p1, p2, (255, 0, 0), 2)

            # Add the text on the image
                cv2.putText(image, text, (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        cv2.imshow('Head Pose Estimation', image)

        if cv2.waitKey(5) & 0xFF == 27:
            break

    cap.release()
#cv2.destroyAllWindows()
    end_time = time()
    elapsed_time_minutes = (end_time - official_start_time)/60
    
    print(f"Output for Head-Pose-Detection")
    print(f"Duration of Time Looking Forward: {time_forward_seconds} seconds")
    print(f"Duration of Time Looking Left: {time_left_seconds} seconds")
    print(f"Duration of Time Looking Right: {time_right_seconds} seconds")
    print(f"Duration of Time Looking Up: {time_up_seconds} seconds")
    print(f"Duration of Time Looking Down: {time_down_seconds} seconds")
    
    total_video_duration = end_time - official_start_time
    
    max_pose = max(time_forward_seconds, time_left_seconds, time_up_seconds, time_right_seconds, time_down_seconds)
    
    if max_pose == time_forward_seconds:
        print("The most observed head-pose is: Looking Forward")
    elif max_pose == time_left_seconds:
        print("The most observed head-pose is: Looking Left")
    elif max_pose == time_right_seconds:
        print("The most observed head-pose is: Looking Right")
    elif max_pose == time_up_seconds:
        print("The most observed head-pose is: Looking Up")
    else:
        print("The most observed head-pose is: Looking Down")

from concurrent.futures import ProcessPoolExecutor

def run_cpu_tasks_in_parallel(tasks):
    with ProcessPoolExecutor() as executor:
        running_tasks = [executor.submit(task) for task in tasks]
        for running_task in running_tasks:
            running_task.result()
#---------------------------------

def eye_tracking(input_file):
    #-------------Dhawni's code----------------
    def detect_eyes(frame):
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = detector(gray)

        if faces:
            shape = predictor(gray, faces[0])
            left_eye = shape.parts()[36:42]
            right_eye = shape.parts()[42:48]
            return left_eye, right_eye
        else:
            return None, None

# Function to calculate Eye Aspect Ratio (EAR)
    def calculate_ear(eye):
        eye = np.array([(point.x, point.y) for point in eye])
        A = np.linalg.norm(eye[1] - eye[5])
        B = np.linalg.norm(eye[2] - eye[4])
        C = np.linalg.norm(eye[0] - eye[3])
        ear = (A + B) / (2.0 * C)
        return ear

# Load dlib face detector and facial landmarks predictor
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor("/Users/prithvika/Downloads/shape_predictor_68_face_landmarks.dat")

# Initialize video capture (replace with your video file path)
    video_path = input_file  # Replace with your video file path
    cap = cv2.VideoCapture(video_path)

# Get video properties for the output video
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Initialize video writer for the output video
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
#output_video = cv2.VideoWriter('/content/output_video.avi', fourcc, fps, (width, height))

    # Initialize variables to record durations
    duration_eyes_closed = 0
    duration_looking_left = 0
    duration_looking_right = 0
    duration_looking_straight = 0

# Initialize variables for counting eye movement
    count_left = 0
    count_right = 0
    count_straight = 0
    
    while True:
        ret, frame = cap.read()

        if not ret:
            break

        left_eye, right_eye = detect_eyes(frame)

        if left_eye is not None and right_eye is not None:
            ear_left = calculate_ear(left_eye)
            ear_right = calculate_ear(right_eye)

        # Calculate the average EAR for both eyes
            avg_ear = (ear_left + ear_right) / 2.0

        # Set a threshold for distraction detection (you may need to adjust this)
            distraction_threshold = 0.2

        # Check if the person is distracted
            if avg_ear < distraction_threshold:
                cv2.putText(frame, "Eyes Closed", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
                duration_eyes_closed += 1 / fps
                count_straight += 1
            else:
            # Check gaze direction
                horizontal_ratio = (left_eye[0].x + right_eye[3].x) / 2 / width
                if horizontal_ratio < 0.4:
                    cv2.putText(frame, "Looking Left", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
                    duration_looking_left += 1 / fps
                    count_left += 1
                elif horizontal_ratio > 0.6:
                    cv2.putText(frame, "Looking Right", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
                    duration_looking_right += 1 / fps
                    count_right += 1
                else:
                    cv2.putText(frame, "Looking Straight", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
                    duration_looking_straight += 1 / fps

        # Draw contours around eyes
            for eye in [left_eye, right_eye]:
                for point in eye:
                    x, y = point.x, point.y
                    cv2.circle(frame, (x, y), 3, (0, 255, 0), -1)

   # output_video.write(frame)  # Write the frame to the output video

    #cv2_imshow(frame)  # Use cv2_imshow instead of cv2.imshow
        cv2.imshow('Frame', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# Release the video capture object, video writer, and close all windows
    cap.release()
    print(f"Output for Eye-Tracking")
    print(f"Duration taken looking right: {duration_looking_right} sec")
    print(f"Duration taken looking left: {duration_looking_left} sec")
    print(f"Duration taken closed eyes: {duration_eyes_closed} sec")
    print(f"Duration taken looking straight: {duration_looking_straight} sec")
    
    max_duration = max(duration_looking_right, duration_looking_left, duration_looking_straight, duration_eyes_closed)
    if max_duration == duration_looking_right:
        print("Most observed eye movement is: Looking Right")
    elif max_duration == duration_looking_left:
        print("Most observed eye movement is: Looking Left")
    elif max_duration == duration_looking_straight:
        print("Most observed eye movement is: Looking Straight")
    else:
        print("Most observed eye movement is: Eyes Closed")
#output_video.release()
    #cv2.destroyAllWindows()
    #-------------------------------------------
    

    
def display_on_same_window(frame, text1, text2, text3):
    cv2.putText(frame, text1, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
    cv2.putText(frame, text2, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
    cv2.putText(frame, text3, (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
    cv2.imshow('Combined Output', frame)

def main():
    input_video_file = '/Users/prithvika/Downloads/Emotion Videos/WhatsApp Video 2023-11-26 at 20.32.15.mp4'
    output_video_file = '/Users/prithvika/Downloads/output_video.mp4'

    original_stdout = sys.stdout
    with open('/Users/prithvika/Downloads/output.txt', 'w') as f:
    # Redirect standard output to the file
        sys.stdout = f
    
    #process_video(input_video_file, output_video_file)
        print(f"New Instance:")
        run_cpu_tasks_in_parallel([
            process_video(input_video_file, output_video_file),
            face_position(input_video_file),
            eye_tracking(input_video_file),
        ])
    
        sys.stdout = original_stdout
    
    #Thread(target = process_video(input_video_file, output_video_file)).start()
    #Thread(target = face_position(input_video_file)).start()

if __name__ == "__main__":
    main()


I0000 00:00:1701914199.708232       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 83), renderer: Apple M2
