In [3]:
import cv2
import dlib
import numpy as np
from IPython.display import display, clear_output
from scipy.spatial import distance
from imutils import face_utils
from scipy.ndimage import zoom
from tensorflow.keras.models import load_model
import mediapipe as mp
import time
import csv

# Function to detect eyes in a frame
def detect_eyes(frame):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)

    if faces:
        shape = predictor(gray, faces[0])
        left_eye = shape.parts()[36:42]
        right_eye = shape.parts()[42:48]
        return left_eye, right_eye
    else:
        return None, None

# Function to calculate Eye Aspect Ratio (EAR)
def calculate_ear(eye):
    eye = np.array([(point.x, point.y) for point in eye])
    A = np.linalg.norm(eye[1] - eye[5])
    B = np.linalg.norm(eye[2] - eye[4])
    C = np.linalg.norm(eye[0] - eye[3])
    ear = (A + B) / (2.0 * C)
    return ear

# Load dlib face detector and facial landmarks predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("/Users/prithvika/Downloads/shape_predictor_68_face_landmarks.dat")

# Load emotion detection model
emotion_model = load_model('/Users/prithvika/Downloads/video.h5')

# Initialize video capture from the camera
cap = cv2.VideoCapture(0)  # 0 corresponds to the default camera (you can change it if you have multiple cameras)

# Get video properties for the output video
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Initialize video writer for the output video
fourcc = cv2.VideoWriter_fourcc(*'XVID')
output_video = cv2.VideoWriter('/content/outputvideo.avi', fourcc, fps, (width, height))

# Initialize variables to record durations
duration_eyes_closed = 0
duration_looking_left = 0
duration_looking_right = 0
duration_looking_straight = 0

# Initialize variables for counting eye movement
count_left = 0
count_right = 0
count_straight = 0

# Load face detector and shape predictor for emotion detection
face_detector = dlib.get_frontal_face_detector()
shape_predictor = dlib.shape_predictor("/Users/prithvika/Downloads/face_landmarks.dat")

# Initialize head pose estimation
official_start_time = time.time()
start_time = time.time()
end_time = 0

#Variables to track emotion detected
emotion_start_time = time.time()
e_start_time = time.time()
e_end_time = 0
angry_emotion = 0
sad_emotion = 0
happy_emotion = 0
fear_emotion = 0
disgust_emotion = 0
neutral_emotion = 0
surprise_emotion = 0

# Variables to track time spent in different head pose directions
time_forward_seconds = 0
time_left_seconds = 0
time_right_seconds = 0
time_up_seconds = 0
time_down_seconds = 0

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)

while True:
    ret, frame = cap.read()

    if not ret:
        break

    # Eye tracking
    left_eye, right_eye = detect_eyes(frame)

    if left_eye is not None and right_eye is not None:
        ear_left = calculate_ear(left_eye)
        ear_right = calculate_ear(right_eye)

        # Calculate the average EAR for both eyes
        avg_ear = (ear_left + ear_right) / 2.0

        # Set a threshold for distraction detection (you may need to adjust this)
        distraction_threshold = 0.2

        # Check if the person is distracted
        if avg_ear < distraction_threshold:
            cv2.putText(frame, "Eyes Closed", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 2)
            duration_eyes_closed += 1 / fps  # Increment the duration
            count_straight += 1

        else:
            # Check gaze direction
            horizontal_ratio = (left_eye[0].x + right_eye[3].x) / 2 / width
            if horizontal_ratio < 0.4:
                cv2.putText(frame, "Looking Left", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2)
                duration_looking_left += 1 / fps  # Increment the duration
                count_left += 1
            elif horizontal_ratio > 0.6:
                cv2.putText(frame, "Looking Right", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2)
                duration_looking_right += 1 / fps  # Increment the duration
                count_right += 1
            else:
                cv2.putText(frame, "Looking Straight", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2)
                duration_looking_straight += 1 / fps  # Increment the duration

        # Draw contours around eyes
        for eye in [left_eye, right_eye]:
            for point in eye:
                x, y = point.x, point.y
                cv2.circle(frame, (x, y), 3, (0, 255, 0), -1)

    # Emotion detection
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    rects = face_detector(gray, 1)

    for (i, rect) in enumerate(rects):
        shape = shape_predictor(gray, rect)
        shape = face_utils.shape_to_np(shape)

        (x, y, w, h) = face_utils.rect_to_bb(rect)
        face = gray[y:y + h, x:x + w]
        face = zoom(face, (48 / face.shape[0], 48 / face.shape[1]))
        face = face.astype(np.float32)
        face /= float(face.max())
        face = np.reshape(face.flatten(), (1, 48, 48, 1))

        prediction = emotion_model.predict(face)
        prediction_result = np.argmax(prediction)

        # Rectangle around the face
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

        # Annotate main image with emotion label
        if prediction_result == 0:
            cv2.putText(frame, "Angry", (x + w - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            angry_emotion += time.time() - e_start_time
            e_start_time = time.time()
        elif prediction_result == 1:
            cv2.putText(frame, "Disgust", (x + w - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            disgust_emotion += time.time() - e_start_time
            e_start_time = time.time()
        elif prediction_result == 2:
            cv2.putText(frame, "Fear", (x + w - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            fear_emotion += time.time() - e_start_time 
            e_start_time = time.time()
        elif prediction_result == 3:
            cv2.putText(frame, "Happy", (x + w - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            happy_emotion += time.time() - e_start_time 
            e_start_time = time.time()
        elif prediction_result == 4:
            cv2.putText(frame, "Sad", (x + w - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            sad_emotion += time.time() - e_start_time 
            e_start_time = time.time()
        elif prediction_result == 5:
            cv2.putText(frame, "Surprise", (x + w - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            surprise_emotion += time.time() - e_start_time 
            e_start_time = time.time()
        else:
            cv2.putText(frame, "Neutral", (x + w - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            neutral_emotion += time.time() - e_start_time 
            e_start_time = time.time()

    # Head pose estimation
    startTime = time.time()
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) #it was 1
#     frame = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
    frame.flags.writeable = False
    results = face_mesh.process(frame)
    frame.flags.writeable = True
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

    img_h, img_w, img_c = frame.shape
    face_3d = []
    face_2d = []

    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            for idx, lm in enumerate(face_landmarks.landmark):
                if idx == 33 or idx == 263 or idx == 1 or idx == 61 or idx == 291 or idx == 199:
                    if idx == 1:
                        nose_2d = (lm.x * img_w, lm.y * img_h)
                        nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 8000)

                    x, y = int(lm.x * img_w), int(lm.y * img_h)

                    # Get the 2D Coordinates
                    face_2d.append([x, y])

                    # Get the 3D Coordinates
                    face_3d.append([x, y, lm.z])

            face_2d = np.array(face_2d, dtype=np.float64)
            face_3d = np.array(face_3d, dtype=np.float64)
            focal_length = 1 * img_w

            cam_matrix = np.array([[focal_length, 0, img_h / 2],
                                   [0, focal_length, img_w / 2],
                                   [0, 0, 1]])

            dist_matrix = np.zeros((4, 1), dtype=np.float64)
            success, rot_vec, trans_vec = cv2.solvePnP(face_3d, face_2d, cam_matrix, dist_matrix)
            rmat, jac = cv2.Rodrigues(rot_vec)

            angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat)

            print(f"X Rotation: {angles[0] * 10000}")
            print(f"Y Rotation: {angles[1] * 10000}")

            if angles[1] * 10000 < -200:
                text = "Looking Left"
                time_left_seconds += time.time() - start_time
                start_time = time.time()

            elif angles[1] * 10000 > 200:
                text = "Looking Right"
                time_right_seconds += time.time() - start_time
                start_time = time.time()

            elif angles[0] * 10000 < -150:
                text = "Looking Down"
                time_down_seconds += time.time() - start_time
                start_time = time.time()

            elif angles[0] * 10000 > 350:
                text = "Looking Up"
                time_up_seconds += time.time() - start_time
                start_time = time.time()

            else:
                text = "Forward"
                time_forward_seconds += time.time() - start_time
                start_time = time.time()

            # Display the nose direction
            nose_3d_projection, jacobian = cv2.projectPoints(nose_3d, rot_vec, trans_vec, cam_matrix, dist_matrix)

            p1 = (int(nose_2d[0]), int(nose_2d[1]))
            p2 = (int(nose_3d_projection[0][0][0]), int(nose_3d_projection[0][0][1]))

            cv2.line(frame, p1, p2, (255, 0, 0), 2)

            cv2.putText(frame, text, (width - 250, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 2)

    # Open the CSV file in write mode and append the angles to it
    with open('headPoses.csv', mode='a', newline='') as file:
        writer = csv.writer(file)

        # Write the header row if the file is empty
        if file.tell() == 0:
            writer.writerow(["X Rotation", "Y Rotation"])

        # Write the angles to the CSV file
        writer.writerow([angles[0] * 10000, angles[1] * 10000])

    output_video.write(frame)  # Write the frame to the output video

    # Display the frame without modifying color
    cv2.imshow('Frame', frame)
    # Clear the previous output
    clear_output(wait=True)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture object, video writer, and close all windows
cap.release()
output_video.release()
cv2.destroyAllWindows()


# Print the durations and most observed features for emotion detection
print(f"\nEmotion Detection:")
print(f"Duration of Happiness: {happy_emotion} seconds")
print(f"Duration of Sadness: {sad_emotion} seconds")
print(f"Duration of Disgust: {disgust_emotion} seconds")
print(f"Duration of Fear: {fear_emotion} seconds")
print(f"Duration of Anger: {angry_emotion} seconds")
print(f"Duration of Neutral: {neutral_emotion} seconds")
print(f"Duration of Surprise: {surprise_emotion} seconds")

# Determine the most observed emotions movement
max_eye_duration = max(happy_emotion, sad_emotion, disgust_emotion, fear_emotion, angry_emotion, neutral_emotion, surprise_emotion)
if max_eye_duration == happy_emotion:
    print("The most observed emotion: Happiness")
elif max_eye_duration == sad_emotion:
    print("The most observed emotion: Sadness")
elif max_eye_duration == disgust_emotion:
    print("The most observed emotion: Disgust")
elif max_eye_duration == fear_emotion:
    print("The most observed emotion: Fear")
elif max_eye_duration == angry_emotion:
    print("The most observed emotion: Anger")
elif max_eye_duration == surprise_emotion:
    print("The most observed emotion: Surprise")
else:
    print("The most observed emotion: Neutral")


# Print the durations and most observed features for eyes
print(f"\nEye Movements:")
print(f"Duration taken looking right: {duration_looking_right} sec")
print(f"Duration taken closed eyes: {duration_eyes_closed} sec")
print(f"Duration taken looking left: {duration_looking_left} sec")
print(f"Duration taken looking straight: {duration_looking_straight} sec")

# Determine the most observed eye movement
max_eye_duration = max(duration_looking_right, duration_eyes_closed, duration_looking_left, duration_looking_straight)
if max_eye_duration == duration_looking_right:
    print("The most observed eye movement: Looking Right")
elif max_eye_duration == duration_eyes_closed:
    print("The most observed eye movement: Eyes Closed")
elif max_eye_duration == duration_looking_left:
    print("The most observed eye movement: Looking Left")
else:
    print("The most observed eye movement: Looking Straight")

# Print the durations and most observed features for head pose
print(f"\nHead Pose Estimation:")
print(f"Duration of Time Looking Forward: {time_forward_seconds} seconds")
print(f"Duration of Time Looking Up: {time_up_seconds} seconds")
print(f"Duration of Time Looking Left: q{time_left_seconds} seconds")
print(f"Duration of Time Looking Right: {time_right_seconds} seconds")
print(f"Duration of Time Looking Down: {time_down_seconds} seconds")

# Determine the most observed eye movement
max_eye_duration = max(time_forward_seconds, time_up_seconds, time_left_seconds, time_right_seconds, time_down_seconds)
if max_eye_duration == time_forward_seconds:
    print("The most observed head pose: Facing Forward")
elif max_eye_duration == time_up_seconds:
    print("The most observed head pose: Facing Upwards")
elif max_eye_duration == time_left_seconds:
    print("The most observed head pose: Facing Left")
elif max_eye_duration == time_right_seconds:
    print("The most observed head pose: Facing Right")
else:
    print("The most observed head pose: Facing Downwards")



Emotion Detection:
Duration of Happiness: 0.3005650043487549 seconds
Duration of Sadness: 2.74582839012146 seconds
Duration of Disgust: 0 seconds
Duration of Fear: 2.559276580810547 seconds
Duration of Anger: 0.30039095878601074 seconds
Duration of Neutral: 5.419421195983887 seconds
Duration of Surprise: 9.801420211791992 seconds
The most observed emotion: Surprise

Eye Movements:
Duration taken looking right: 0 sec
Duration taken closed eyes: 1.2666666666666666 sec
Duration taken looking left: 1.4 sec
Duration taken looking straight: 1.5999999999999999 sec
The most observed eye movement: Looking Straight

Head Pose Estimation:
Duration of Time Looking Forward: 13.040417909622192 seconds
Duration of Time Looking Up: 0 seconds
Duration of Time Looking Left: q7.1054840087890625 seconds
Duration of Time Looking Right: 1.2610559463500977 seconds
Duration of Time Looking Down: 0 seconds
The most observed head pose: Facing Forward
