In [1]:
import cv2
import mediapipe as mp
from mediapipe.tasks.python import vision
import tensorflow as tf
import time
import winsound

In [2]:
# converter = tf.lite.TFLiteConverter.from_saved_model('eye_detection_model_export')
# tflite_model = converter.convert()

# with open('eye_detection_model_export.tflite', 'wb') as f:
#   f.write(tflite_model)

In [3]:
interpreter = tf.lite.Interpreter(model_path='eye_detection_model_export.tflite')
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

In [4]:
LEFT_EYE_LANDMARKS = [362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382]
RIGHT_EYE_LANDMARKS = [33, 246, 161, 160, 159, 158, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7]

In [5]:
def check_eye_open(image, top_left_corner, bottom_right_corner, invert=False):
  image = image[top_left_corner[1]:bottom_right_corner[1], top_left_corner[0]:bottom_right_corner[0]]

  if invert:
    image = [row[::-1] for row in image] # inverte a imagem horizontalmente

  IMAGE_SIZE = (64, 64)
  
  image = tf.image.resize(image, IMAGE_SIZE)
  image = tf.image.rgb_to_grayscale(image)
  image = tf.cast(image, tf.float32) / 255.0

  # Adicionar a dimensão de batch (de [64, 64, 1] para [1, 64, 64, 1])
  image = tf.expand_dims(image, axis=0)  # [1, 64, 64, 1]

  interpreter.set_tensor(input_details[0]['index'], image)

  interpreter.invoke()

  prediction = interpreter.get_tensor(output_details[0]['index'])

  return (prediction >= 0.5).astype(int)

In [20]:
def func(landmarks, frame, is_left_eye):
  global LEFT_EYE_LANDMARKS
  global RIGHT_EYE_LANDMARKS

  eye_top_left = [9999, 9999]
  eye_bottom_right = [0, 0]

  color = (0, 255, 0) if is_left_eye else (255, 0, 0)

  coord_circles = []

  for eye_landmark in (LEFT_EYE_LANDMARKS if is_left_eye else RIGHT_EYE_LANDMARKS):
    x = int(landmarks[eye_landmark].x * frame.shape[1])
    y = int(landmarks[eye_landmark].y * frame.shape[0])

    # cv2.circle(frame, (x, y), 1, color)
    coord_circles.append((x, y))

    if eye_top_left[0] > x:
      eye_top_left[0] = x

    if eye_top_left[1] > y: # invertido
      eye_top_left[1] = y

    if eye_bottom_right[0] < x:
      eye_bottom_right[0] = x

    if eye_bottom_right[1] < y: # invertido
      eye_bottom_right[1] = y

  eye_top_left_with_margin = tuple(coord - 10 * (index + 1) for index, coord in enumerate(eye_top_left))
  eye_bottom_right_with_margin = tuple(coord + 10 * (index + 1) for index, coord in enumerate(eye_bottom_right))

  is_eye_open = check_eye_open(frame, eye_top_left_with_margin, eye_bottom_right_with_margin, is_left_eye)
  
  for coord in coord_circles:
    cv2.circle(frame, coord, 1, color)

  cv2.rectangle(frame, eye_top_left_with_margin, eye_bottom_right_with_margin, color, 2)
  
  # cv2.putText(frame, f"{'Left: ' if is_left_eye else 'Right: '}{is_eye_open}", (50, 50 if is_left_eye else 150), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)

  return is_eye_open

In [21]:
video = cv2.VideoCapture('video2.mp4')

options = vision.FaceLandmarkerOptions(
  base_options = mp.tasks.BaseOptions(model_asset_path='face_landmarker.task'),
  running_mode=vision.RunningMode.VIDEO
)

original_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
original_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

scale_factor = 0.3

new_width = int(original_width * scale_factor)
new_height = int(original_height * scale_factor)

with vision.FaceLandmarker.create_from_options(options) as landmarker:
  start_time = time.time()
  eyes_closed_frames = 0
  
  while True:
    success, frame = video.read()    
    
    if not success:
      break

    frame = cv2.resize(frame, (new_width, new_height))

    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

    current_time_ms = int((time.time() - start_time) * 1000)
    face_landmarker_result = landmarker.detect_for_video(mp_image, current_time_ms)

    try:
      landmarks = face_landmarker_result.face_landmarks[0]

      is_left_eye_open = func(landmarks, frame, True)
      is_right_eye_open = func(landmarks, frame, False)

      if not is_right_eye_open and not is_left_eye_open: 
        if eyes_closed_frames == 70:
          winsound.Beep(2500, 1000)
        else:
          eyes_closed_frames += 1
      elif eyes_closed_frames > 0:
        eyes_closed_frames -= 1

      cv2.putText(frame, f'Score: {int(eyes_closed_frames / 10)}', (90, 540), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    except:
      cv2.putText(frame, 'Face not found', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    cv2.imshow('Result', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
      break

cv2.destroyAllWindows()