## Import Dependencies

In [None]:
import os
from glob import glob
import cv2
import numpy as np
import matplotlib.pyplot as plt
import dlib
import datetime as dt

## Hands on Sorting

### Task 1
Dari bagian kode ini:
```python
list_imgs = sorted(list_imgs, key=lambda x: int(x.split('/')[-1].split('.')[0]))
print(f"5 path pertama setelah diurutkan: {list_imgs[:5]}")
print(f"Total jumlah gambar: {len(list_imgs)}")
```

Kode ini bertujuan untuk mengurutkan (sorting) pada daftar gambar berdsarkan _key_ yang ditentukan dalam _lambda function_ seperti berikut.
- `x.split('/')[-1]`, berfungsi untuk memisahkan string menjadi beberapa item dalam list, dan akan di ambil item terakhir.
- `.split('.')[0]`, berfungsi untuk memisahkan item terakhir (berupa filename gambar) dan memisahkan filename dan extensi, lalu kita bisa mengambil filename sebagai key dalam proses konversi.
- `int()`, berfungsi untuk mengkonvensi string menjadi nilai integer.

Liat pada kasus ini, misalkan `list_imgs` berisi:
```python
list_imgs = ['path/to/image/1.jpg', 'path/to/image/10.jpg', 'path/to/image/2.jpg']
```
Proses untuk setiap item akan seperti berikut:
1. Split `/`:
`x.split('/')` akan memberikan `['path', 'to', 'image', '1.jpg']`
2. Split `.`:
`'1.jpg'.split('.')` akan menghasilkan daftar: ['1', 'jpg']
Mengambil elemen pertama dengan [0]: `1`
3. Konversi ke Integer:
`int('1')` akan menghasilkan: 1


## Dataset Nafas
| Nafas-ke | Second | Milisecond  |
|----------|--------|-------------|
|        1 |      7 |          89 |
|        2 |     14 |          46 |
|        3 |     19 |          67 |
|        4 |     25 |          20 |
|        5 |     30 |          10 |
|        6 |     35 |          90 |
|        7 |     40 |          77 |
|        8 |     46 |          15 |
|        9 |     50 |          86 |
|       10 |     56 |          93 |

## Precautionary Warnings
When working with huge of images that makes up a video, instead of Video in general, take care on the number of images fremes you work up with. It can be a memory problem if you take a lot of images.
> Warning!
> Mungkin komputer Anda akan kehabisan memori jika jumlah gambar yang dijadikan video terlalu banyak. Jika hal ini terjadi, Anda bisa mengurangi jumlah gambar yang dijadikan video atau menggunakan komputer dengan spesifikasi yang lebih tinggi.

Here's the part code to set the seq of images to video frames


In [None]:
# save_loc = os.path.join(os.getcwd(), 'data', 'toby-rgb.mp4')
# fourcc = cv2.VideoWriter_fourcc(*'mp4v')
# height, width, layers = images_array[0].shape
# video = cv2.VideoWriter(save_loc, fourcc, 30, (width, height))

# for image in images_array:
#     video.write(image)

# video.release()

### Task 2

Jelaskan tentang bagian kode berikut:
```python
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
```

Dalam `OpenCV`, `FOURCC` (Four Character Code) adalah 4-byte code yang digunakan untuk encoding / decoding dari sebuah video. Normalnya, setiap video containers memiliki format codec-nya sendiri seperti berikut:
- XVID: MPEG-4 codec (often used for AVI files).
- MJPG: Motion JPEG codec.
- DIVX: DivX MPEG-4 codec.
- H264: H.264 codec (requires additional software support).
- MP4V: MPEG-4 codec for MP4 files.
- I420: Uncompressed YUV format.

Apa yang terjadi jika codec tidak sesuai dengan video container? Tidak ada kesalahan fatal, karena media player saat ini bisa mendukung berbagai macam codec, terlepas dari container format itu sendiri.


## Save frame from video into an images

To save a sequence of images from a video

In [None]:
## Set a Video Path and the Image Sequence path
video_path = os.path.join(os.getcwd(), 'attachment', 'sample-renamed.mp4') 
image_sequence_path = os.path.join(os.getcwd(), 'data', 'image_sequence')

## Create a folder if it does not exist
if not os.path.exists(image_sequence_path):
    os.makedirs(image_sequence_path)

## Setup a videoCapture object and frame count
videoCapture = cv2.VideoCapture(video_path)
frame_count = 0

## Read the video and save the frames for the first 100 frames
while frame_count < 100:
    ret, frame = videoCapture.read()
    if not ret:
        break
    cv2.imwrite(os.path.join(image_sequence_path, f'frame_{frame_count:03d}.png'), frame)
    frame_count += 1

## Release the videoCapture object
videoCapture.release()

### Task 3

Converting a video into lowest FPS video in grayscale, and with dot.

In [12]:
import cv2
import numpy as np
import os

# Paths (adjust these as per your directory structure)
original_video_path = os.path.join(os.getcwd(), 'attachment', 'sample-renamed.mp4')
low_fps_video_path = os.path.join(os.getcwd(), 'data', 'video_low_fps.mp4')

# Initialize video capture
videoCapture = cv2.VideoCapture(original_video_path)

# Check if video opened successfully
if not videoCapture.isOpened():
    print("Error: Could not open video.")
    exit()

# Get original FPS and frame count
fps = videoCapture.get(cv2.CAP_PROP_FPS)
frame_count = int(videoCapture.get(cv2.CAP_PROP_FRAME_COUNT))
print(f'Original FPS: {fps}')
print(f'Total Frames: {frame_count}')

# Calculate new FPS (reduce by taking every 3rd frame)
new_fps = fps / 3
print(f'New FPS: {new_fps}')

# Define frame size
frame_size = (1280, 720)  # (width, height)

# Initialize video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec
output = cv2.VideoWriter(low_fps_video_path, fourcc, new_fps, frame_size, isColor=True)

if not output.isOpened():
    print("Error: Could not open video writer.")
    videoCapture.release()
    exit()

frame_index = 0
processed_frame_num = 0  # To track the number of frames being written

# Calculate step for moving the dot across frames
# The dot should move from left (0) to right (frame_size[0]-1) over all processed frames
if frame_count // 3 > 1:
    step = (frame_size[0] - 1) / (frame_count // 3 - 1)
else:
    step = 0  # Avoid division by zero if only one frame is processed

while True:
    ret, frame = videoCapture.read()
    if not ret:
        break

    # Process every 3rd frame
    if frame_index % 3 == 0:
        # Resize to 1280x720
        resized_frame = cv2.resize(frame, frame_size)

        # Convert to grayscale
        gray_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2GRAY)

        # Convert grayscale back to BGR (to maintain 3 channels)
        bgr_frame = cv2.cvtColor(gray_frame, cv2.COLOR_GRAY2BGR)

        # Calculate the position of the red dot
        x = int(processed_frame_num * step)
        y = frame_size[1] // 2  # Center row (height)

        # Ensure x is within bounds
        x = min(x, frame_size[0] - 1)

        # Add the red dot manually
        # OpenCV uses BGR, so Red is [0, 0, 255]
        bgr_frame[y, x] = [0, 0, 255]
        
        # Or Easier one for displaying the dot
        # cv2.circle(bgr_frame, (x, y), 100, (0, 0, 255), -1)  # Increased radius to 10

        # Make the dot larger for visibility
        # Define the size of the dot
        dot_radius = 10
        for dy in range(-dot_radius, dot_radius + 1):
            for dx in range(-dot_radius, dot_radius + 1):
                if dx**2 + dy**2 <= dot_radius**2:
                    nx, ny = x + dx, y + dy
                    if 0 <= nx < frame_size[0] and 0 <= ny < frame_size[1]:
                        bgr_frame[ny, nx] = [0, 0, 255]

        # Debug: Print the RGB value of the dot
        print(f'Processed Frame {processed_frame_num}: Red dot at ({x}, {y}) with BGR value {bgr_frame[y, x]}')

        # Write the frame to the output video
        output.write(bgr_frame)

        processed_frame_num += 1

    frame_index += 1

print(f'Processed {processed_frame_num} frames.')

# Release resources
videoCapture.release()
output.release()
cv2.destroyAllWindows()


Original FPS: 30.0
Total Frames: 1800
New FPS: 10.0
Processed Frame 0: Red dot at (0, 360) with BGR value [  0   0 255]
Processed Frame 1: Red dot at (2, 360) with BGR value [  0   0 255]
Processed Frame 2: Red dot at (4, 360) with BGR value [  0   0 255]
Processed Frame 3: Red dot at (6, 360) with BGR value [  0   0 255]
Processed Frame 4: Red dot at (8, 360) with BGR value [  0   0 255]
Processed Frame 5: Red dot at (10, 360) with BGR value [  0   0 255]
Processed Frame 6: Red dot at (12, 360) with BGR value [  0   0 255]
Processed Frame 7: Red dot at (14, 360) with BGR value [  0   0 255]
Processed Frame 8: Red dot at (17, 360) with BGR value [  0   0 255]
Processed Frame 9: Red dot at (19, 360) with BGR value [  0   0 255]
Processed Frame 10: Red dot at (21, 360) with BGR value [  0   0 255]
Processed Frame 11: Red dot at (23, 360) with BGR value [  0   0 255]
Processed Frame 12: Red dot at (25, 360) with BGR value [  0   0 255]
Processed Frame 13: Red dot at (27, 360) with BGR val

In [None]:
low_fps_video_path = os.path.join(os.getcwd(), 'data', 'video_low_fps.mp4')

cap = cv2.VideoCapture(low_fps_video_path)
frames= []
while True:
    ret, frame = cap.read()
    if not ret:
        break
    frames.append(frame)

frames_array = np.array(frames)
cap.release()

print(f"Shape of frames_array: {frames_array.shape}")

# Extract R, G, B channels
r_channel, g_channel, b_channel = frames_array[:, :, 0], frames_array[:, :, 1], frames_array[:, :, 2]

# Check if all channels are identical
print(np.array_equal(r_channel, g_channel))  # Should be True for grayscale content
print(np.array_equal(g_channel, b_channel))  # Should also be True




In [None]:
## Print length of a  frame
print(f"Length of a frame: {frames_array[0].shape}")

## Print rgb value of this frame
print(f"RGB value of the frame: {frames_array[0][3]}")

single_img = frames_array[200].copy()
single_img = cv2.cvtColor(single_img, cv2.COLOR_BGR2RGB)

plt.figure(figsize=(5, 5))
plt.imshow(single_img)
plt.axis('off')
plt.show()

## Explanation

Video yang digunakan mempunyai durasi 60 detik dengan 30 fps.
Karena akan di ambil setiap frame setiap 3 gambar, maka idealnya proses fps akan berkurang menjadi $\frac{30}{3} = 10$ fps. Dan memang hasil akhir video menunjukan 10 fps.

Untuk grayscale sendiri saya baru tahu, bahwasannya proses konversi dari RGB / BGR ke Grayscale dan sebaliknya hanya untuk menambah / menghapus 3 chanel warna dari sebuah frame video.

Informasi mengenai nilai RGB frame sebelumnya sudah hilang ketika dilakukan konversi dari RGB ke Grayscale, nilai grayscale tersebut akan tetap sama meskipun di konversi balik menjadi format RGB / BGR, hanya saja terdapat 3 chanel warna yang ditambahkan.

Setelah menambahkan 3 chanel warna, kita dapat melakukan proses penambahan titik pada pixel dan amplifikasi frame tersebut sehingga dapat terlihat dengan mudah di layar.
```python
    # Calculate the position of the red dot
    x = int(processed_frame_num * step)
    y = frame_size[1] // 2  # Center row (height)

    # Ensure x is within bounds
    x = min(x, frame_size[0] - 1)

    # Add the red dot manually
    # OpenCV uses BGR, so Red is [0, 0, 255]
    bgr_frame[y, x] = [0, 0, 255]
```
Proses ini akan menghitung posisi dari titik merah berdasarkan frame (karena kita membagi frame menjadi setiap 3 frame, maka red dot akan di gambar pada setiap 3 frame tersbut, lalu bergerak ke kanan).

Di sisi lain, dot merah ini masih terlalu kecil, maka akan dilakukan amplifikasi untuk menambah ukuran dari dot tersebut.
```python
dot_radius = 5
for dy in range(-dot_radius, dot_radius + 1):
    for dx in range(-dot_radius, dot_radius + 1):
        if dx**2 + dy**2 <= dot_radius**2:
            nx, ny = x + dx, y + dy
            if 0 <= nx < frame_size[0] and 0 <= ny < frame_size[1]:
                bgr_frame[ny, nx] = [0, 0, 255]
```
Proses disini cukup sederhana, dot tersebut akan di amplifikasikan untuk setiap panjang dan lebar dari dot tersebut lalu di masukan ke frame.

### Deteksi DLIB
Using dlib for Detecting faces

In [None]:
## Moving the frame video into nuympy array
video_path = os.path.join(os.getcwd(), 'attachment', 'sample-renamed.mp4')

videoCapture = cv2.VideoCapture(video_path)
video_frames = []

## Adding the frames to the video_frames list
while True:
    ret, frame = videoCapture.read()
    if not ret:
        break
    ## Converting into RGB value
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    video_frames.append(frame)

frames_array = np.array(video_frames)
videoCapture.release()

## Print the shape of the frames_array
print(f"Shape of frames_array: {frames_array.shape}")

In [None]:
single_img = frames_array[200].copy()
plt.figure(figsize=(5, 5))
plt.imshow(single_img)
plt.axis('off')
plt.show()

## Detection with DLIB
using dlib for face detection

In [None]:
detector = dlib.get_frontal_face_detector()

faces = detector(single_img, 1)
for i, face in enumerate(faces): # untuk setiap wajah yang terdeteksi (bisa saja lebih dari satu)
    x, y, w, h = face.left(), face.top(), face.width(), face.height()
    x2 = x + w
    y2 = y + h
    cv2.rectangle(single_img, (x, y), (x2, y2), (255, 0, 0), 5)

plt.figure(figsize=(5, 5))
plt.imshow(single_img)
plt.axis('off')
plt.show()

## Handling for Hair that not covered?
This method handle for that

In [None]:
## Getting the single image and add the boudiong box to the hair
single_img  = frames_array[200].copy()
faces = detector(single_img, 1)
for i, face in enumerate(faces):
    x, y, w, h = face.left(), face.top(), face.width(), face.height()
    ## Adding the bounding box to the hair
    y_hair = int(y - (0.5 * y))
    h_hair = int(h + (0.5 * h))
    cv2.rectangle(single_img, (x, y_hair), (x2, h_hair + y_hair), (255, 0, 0), 5)


In [None]:
## Show the Bounding box
plt.figure(figsize=(5, 5))
plt.imshow(single_img)
plt.axis('off')
plt.show()

### Task 4

Based on the Face ROI (Region of Interest) and expand for shoulder and chest

In [None]:
## Obtain a image and faces detection frame
single_img  = frames_array[200].copy()
faces = detector(single_img, 1)

## Loop through the faces and draw the bounding box
for face in faces:
    x, y, w, h = (face.left(), face.top(), face.width(), face.height())
    
    ## Adding the bounding box to the hair
    y_hair = int(y - (0.5 * y))
    h_hair = int(h + (0.5 * h))
    cv2.rectangle(single_img, (x, y_hair), (x2, h_hair + y_hair), (255, 0, 0), 5)

        
    # Adjust ROI to include shoulders and chest
    roi_x = int(x - (0.5 * w))
    roi_y = int(y * 2)
    roi_w = int(w * 2)
    roi_h = int(h * 2.5)  # Adjust the height to include shoulders and chest
    
    # Ensure the new ROI does not go out of frame bounds
    roi_h = min(roi_h, single_img.shape[0] - roi_y)
    
    # Draw rectangle around the adjusted ROI
    cv2.rectangle(single_img, (roi_x, roi_y), (roi_x+roi_w, roi_y+roi_h), (0, 255, 0), 2)

# Display the image with the adjusted bounding box
plt.figure(figsize=(10, 10))
plt.imshow(single_img)
plt.axis('off')
plt.show()

### Explanation

```python
## Iteration
for i, face in enumerate(faces):
```
- **`for i, face in enumerate(faces)`**: Ini adalah loop yang akan berjalan sebanyak jumlah wajah yang terdeteksi.
  - **`enumerate(faces)`**: `enumerate` memberikan dua nilai untuk setiap iterasi loop: 
    - `i`: Indeks dari wajah yang terdeteksi (dimulai dari 0).
    - `face`: Objek bounding box dari setiap wajah yang terdeteksi. Objek ini memiliki koordinat posisi wajah dalam gambar.

```python
## Set domain
x, y, w, h = face.left(), face.top(), face.width(), face.height()
```
- **`face.left()`**: Mengambil koordinat x dari tepi kiri kotak pembatas (bounding box) wajah.
- **`face.top()`**: Mengambil koordinat y dari tepi atas kotak pembatas wajah.
- **`face.width()`**: Mengambil lebar bounding box wajah.
- **`face.height()`**: Mengambil tinggi bounding box wajah.
- **`x, y, w, h`**: Variabel ini menyimpan posisi dan ukuran dari bounding box wajah yang terdeteksi, di mana:
  - `x`: Koordinat x dari sudut kiri atas wajah.
  - `y`: Koordinat y dari sudut kiri atas wajah.
  - `w`: Lebar bounding box wajah.
  - `h`: Tinggi bounding box wajah.

```python
## Set bouding for width
x2 = x + w
```
- **`x2 = x + w`**: Menghitung koordinat x dari sudut kanan bawah bounding box. Ini didapat dengan menjumlahkan nilai `x` (koordinat kiri atas) dengan `w` (lebar bounding box).

```python
## Set bouding for height
y2 = y + h
```
- **`y2 = y + h`**: Menghitung koordinat y dari sudut kanan bawah bounding box. Ini didapat dengan menjumlahkan nilai `y` (koordinat kiri atas) dengan `h` (tinggi bounding box).

```python
## Set bounding box for the face 
cv2.rectangle(single_img, (x, y), (x2, y2), (255, 0, 0), 5)
```
- **`cv2.rectangle(single_img, (x, y), (x2, y2), (255, 0, 0), 5)`**:
  - Ini menggunakan OpenCV (`cv2`) untuk menggambar kotak persegi panjang (bounding box) di sekitar wajah yang terdeteksi.
  - **`single_img`**: Gambar tempat kotak akan digambar.
  - **`(x, y)`**: Titik sudut kiri atas dari bounding box (posisi awal persegi).
  - **`(x2, y2)`**: Titik sudut kanan bawah dari bounding box (posisi akhir persegi).
  - **`(255, 0, 0)`**: Warna persegi panjang dalam format BGR (Biru, Hijau, Merah), di mana (255, 0, 0) berarti biru.
  - **`5`**: Ketebalan garis persegi panjang.

### Bagian bahu
Untuk bagian dada dan bahu, kita dapat melakukan seleksi manual untuk membuat bounding box dengan mendaptkan nilai `x,y,w,h` dari wajah dan kita lakukan proses secara manual untuk membuat bounding box tersebut.


### Mendeteksi Wajah pada Video

In [None]:
# ## Getting the video path dan output path
# video_path = os.path.join(os.getcwd(), 'attachment', 'sample-renamed.mp4')
# output_path = os.path.join(os.getcwd(), 'data', 'output.mp4')

# ## Setting the videoCapture object and its fps
# videoCapture = cv2.VideoCapture(video_path)
# fps = videoCapture.get(cv2.CAP_PROP_FPS)

# ## Setting the frame width and height
# frame_width = int(videoCapture.get(cv2.CAP_PROP_FRAME_WIDTH))
# frame_height = int(videoCapture.get(cv2.CAP_PROP_FRAME_HEIGHT))

# ## Setting the videoWriter object 
# fourcc = cv2.VideoWriter_fourcc(*'mp4v')
# output = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

# ## total_frame_diproses = fps * 5 # 5 detik
# frame_count = 0

# start_time = dt.datetime.now()

# while True:
#     ret, frame = videoCapture.read()
#     if not ret:
#         break

#     ## Processes Frame
#     faces = detector(frame, 1)
#     for face in faces:
#         x, y, w, h = (face.left(), face.top(), face.width(), face.height())

#         ## Creating a rectangle
#         cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 5)
#     ## End of Frame Processing

#     ## Write the frame to the output video
#     output.write(frame)

#     frame_count += 1
#     if cv2.waitKey(25) & 0xFF == ord('q'):
#         break

# cap.release()
# cv2.destroyAllWindows()

# print(f"Waktu yang diperlukan: {dt.datetime.now() - start_time}")

## Using Facial Tracking

Using dlib was pain because it takes a long, you can utilize the `facial tracking`. Facial tracking is a tecnique that used for tracking face in a video. 

With using facial tracking, we can identify face image in every frame video without re-detecting face for every frame.

In [None]:
import cv2
import dlib
import numpy as np
import os
import datetime as dt

## Setting input and output path
video_path = os.path.join(os.getcwd(), 'attachment', 'sample-renamed.mp4')
output_path = os.path.join(os.getcwd(), 'data', 'output.mp4')

## Setting up the detector
detector = dlib.get_frontal_face_detector()

## Setting up the videoCapture object
videoCapture = cv2.VideoCapture(video_path)
fps = videoCapture.get(cv2.CAP_PROP_FPS)
frame_width = int(videoCapture.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(videoCapture.get(cv2.CAP_PROP_FRAME_HEIGHT))

## Setting the videoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

## Process frame
processed_frame_total = 10 * fps # 10 seconds video processed
frame_count = 0

## Start time
start_time = dt.datetime.now()

## Seting up th face detection and re-detect every 3 frames with 
## Lucas Canade Optical Flow and re-detect the face 
is_face_detected = False

## Initialize points for face tracking and Lucas Canade Optical Flow
p0 = None
lk_params = dict(winSize=(15, 15),
                 maxLevel=2,
                 criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

## Variabes for face detection
detection_interval = fps * 3 # Redetect every 3 seconds
detection_countdown = detection_interval

## Main Loop
while frame_count < processed_frame_total:
    ret, frame = videoCapture.read()
    if not ret:
        break

    ## Converting to grayscale for easier processing
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    ## Check for first frame or re-detect face frame
    if frame_count == 0 or detection_countdown == 0:
        
        ## Check the face detection and if valid face
        faces = detector(frame, 1)
        if faces:
            face = faces[0]
            x, y, w, h = (face.left(), face.top(), face.width(), face.height())
            cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)

            ## Get the initial tracking points
            face_roi = gray[y:y+h, x:x+w]

            ## Initial keypoints detected in the face ROI.
            p0 = cv2.goodFeaturesToTrack(face_roi, maxCorners=100, qualityLevel=0.3, minDistance=7, blockSize=7)    

            ## Why Adjust the Coordinates?
            ## Keypoints detected in the cropped face ROI (face_roi) have coordinates relative to the cropped image, not the full frame. Adding x and y shifts their positions to match the full frame’s coordinate system:
            p0[:, :, 0] += x  # Adjust the x-coordinates
            p0[:, :, 1] += y  # Adjust the y-coordinates

            ## Reset the detection countdown
            is_face_detected = True
            detection_countdown = detection_interval
    
    ## Else using optical flow to track the face
    else:
        if is_face_detected and p0 is not None:

            ## Perform the optical flow with Lucas-Kanade method
            ## Optical flow estimates how pixels move from one frame to the next in a video. It tracks the movement of specific points between frames.
            p1, st, err = cv2.calcOpticalFlowPyrLK(prev_gray, gray, p0, None, **lk_params)
            
            good_new = p1[st == 1]  # Successfully tracked keypoints in the current frame.
            good_old = p0[st == 1]  # Corresponding keypoints from the previous frame.

            ## Calculates the median shift (movement) of the keypoints in both the x and y directions.
            if len(good_new) > 1:
                x_shift = np.median(good_new[:, 0] - good_old[:, 0])
                y_shift = np.median(good_new[:, 1] - good_old[:, 1])

                x = int(x + x_shift)
                y = int(y + y_shift)

                ## Ensure the bouding box remains within the frame
                x = max(0, min(x, frame_width - w))
                y = max(0, min(y, frame_height - h))

                ## Draw the rectangle around the tracked face
                cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)

            # Update the tracking points for the next frame
            p0 = good_new.reshape(-1, 1, 2)
    
    # Write the processed frame to the output video
    output.write(frame)

    ## Update the previsous frame for optical flow in the next iteration
    prev_gray = gray.copy()

    ## Update the frame count and detection countdown
    frame_count += 1
    detection_countdown -= 1
    if cv2.waitKey(25) & 0xFF == ord('q'):
        break

## Release the resources
videoCapture.release()
output.release()
cv2.destroyAllWindows()

print(f"Time taken: {dt.datetime.now() - start_time}")

---

## Facial Tracking for Seconds 25 - 40

In [8]:
import cv2
import dlib
import numpy as np
import os
import datetime as dt

## Setting input and output path
video_path = os.path.join(os.getcwd(), 'attachment', 'sample-renamed.mp4')
output_path = os.path.join(os.getcwd(), 'data', 'output.mp4')

## Setting up the detector
detector = dlib.get_frontal_face_detector()

## Setting up the videoCapture object
videoCapture = cv2.VideoCapture(video_path)
fps = videoCapture.get(cv2.CAP_PROP_FPS)
frame_width = int(videoCapture.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(videoCapture.get(cv2.CAP_PROP_FRAME_HEIGHT))

## Setting the videoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

## Process frame
processed_frame_total = 10 * fps # 10 seconds video processed
frame_count = 0

# Calculate frame range for 25 to 40 seconds
start_frame = 25 * fps
end_frame = 40 * fps

## Start time
start_time = dt.datetime.now()

## Seting up th face detection and re-detect every 3 frames with 
## Lucas Canade Optical Flow and re-detect the face 
is_face_detected = False

## Initialize points for face tracking and Lucas Canade Optical Flow
p0 = None
lk_params = dict(winSize=(15, 15),
                 maxLevel=2,
                 criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

## Variabes for face detection
detection_interval = fps * 3 # Redetect every 3 seconds
detection_countdown = detection_interval

## Main Loop
while True:
    ret, frame = videoCapture.read()
    if not ret or frame_count > end_frame:
        break

    if frame_count >= start_frame:

        ## Converting to grayscale for easier processing
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        ## Check for first frame or re-detect face frame
        if frame_count == 0 or detection_countdown == 0:
            
            ## Check the face detection and if valid face
            faces = detector(frame, 1)
            if faces:
                face = faces[0]
                x, y, w, h = (face.left(), face.top(), face.width(), face.height())
                cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)

                ## Get the initial tracking points
                face_roi = gray[y:y+h, x:x+w]

                ## Initial keypoints detected in the face ROI.
                p0 = cv2.goodFeaturesToTrack(face_roi, maxCorners=60,
                                         qualityLevel=0.15,
                                         minDistance=3,
                                         blockSize=7)    

                ## Why Adjust the Coordinates?
                ## Keypoints detected in the cropped face ROI (face_roi) have coordinates relative to the cropped image, not the full frame. Adding x and y shifts their positions to match the full frame’s coordinate system:
                p0[:, :, 0] += x  # Adjust the x-coordinates
                p0[:, :, 1] += y  # Adjust the y-coordinates

                ## Reset the detection countdown
                is_face_detected = True
                detection_countdown = detection_interval
        
        ## Else using optical flow to track the face
        else:
            if is_face_detected and p0 is not None:

                ## Perform the optical flow with Lucas-Kanade method
                ## Optical flow estimates how pixels move from one frame to the next in a video. It tracks the movement of specific points between frames.
                p1, st, err = cv2.calcOpticalFlowPyrLK(prev_gray, gray, p0, None, **lk_params)
                
                good_new = p1[st == 1]  # Successfully tracked keypoints in the current frame.
                good_old = p0[st == 1]  # Corresponding keypoints from the previous frame.

                ## Calculates the median shift (movement) of the keypoints in both the x and y directions.
                if len(good_new) > 1:
                    x_shift = np.median(good_new[:, 0] - good_old[:, 0])
                    y_shift = np.median(good_new[:, 1] - good_old[:, 1])

                    x = int(x + x_shift)
                    y = int(y + y_shift)

                    ## Ensure the bouding box remains within the frame
                    x = max(0, min(x, frame_width - w))
                    y = max(0, min(y, frame_height - h))

                    ## Draw the rectangle around the tracked face
                    cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)

                # Update the tracking points for the next frame
                p0 = good_new.reshape(-1, 1, 2)
        
        # Write the processed frame to the output video
        output.write(frame)

        ## Update the previsous frame for optical flow in the next iteration
        prev_gray = gray.copy()

        detection_countdown -= 1


    ## Update the frame count and detection countdown
    frame_count += 1
    if cv2.waitKey(25) & 0xFF == ord('q'):
        break

## Release the resources
videoCapture.release()
output.release()
cv2.destroyAllWindows()

print(f"Time taken: {dt.datetime.now() - start_time}")

Time taken: 0:01:33.353214


## Using the DLIB with Camera and with the Shape Predictor Facial Landmark
Using WebCam as the source input and using the DLIB and Shape Predictor for working

In [1]:
import cv2
import dlib

# Initialize dlib's face detector and landmark predictor
face_detector = dlib.get_frontal_face_detector()
# landmark_predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

# Start video capture
cap = cv2.VideoCapture(0)  # Use 0 for webcam

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert frame to grayscale (dlib works better on grayscale)
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces
    faces = face_detector(gray)
    
    for face in faces:
        # Draw a rectangle around the face
        x, y, w, h = (face.left(), face.top(), face.width(), face.height())
        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)

        # # Detect landmarks
        # landmarks = landmark_predictor(gray, face)

        # # Loop through each landmark and draw them
        # for n in range(68):  # 68 points
        #     x = landmarks.part(n).x
        #     y = landmarks.part(n).y
        #     cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)

    # Show the video with landmarks
    cv2.imshow("Facial Tracking", frame)

    # Break on 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


Other method using tracking failure

In [5]:
import dlib
import cv2
import numpy as np

# Initialize dlib's face detector
detector = dlib.get_frontal_face_detector()

# Initialize video capture
cap = cv2.VideoCapture(0)

# Get video dimensions
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
out = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc(*'XVID'), 20.0, (frame_width, frame_height))

# Parameters for Lucas-Kanade Optical Flow
lk_params = dict(winSize=(15, 15), maxLevel=2,
                 criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

# Process the first frame
ret, frame = cap.read()
gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

# Initial detection
faces = detector(gray_frame)
if len(faces) > 0:
    face = faces[0]
    bbox_points = np.array([[face.left(), face.top()],
                            [face.right(), face.top()],
                            [face.right(), face.bottom()],
                            [face.left(), face.bottom()]], dtype=np.float32).reshape(-1, 1, 2)
else:
    print("No faces detected!")
    cap.release()
    out.release()
    exit()

old_gray = gray_frame

# Main loop
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Use optical flow to track the bounding box corners
    new_points, status, _ = cv2.calcOpticalFlowPyrLK(old_gray, gray_frame, bbox_points, None, **lk_params)

    # Check if tracking fails
    if np.sum(status) < len(bbox_points) * 0.5:  # Re-detect if less than 50% points are tracked
        faces = detector(gray_frame)
        if len(faces) > 0:
            face = faces[0]
            bbox_points = np.array([[face.left(), face.top()],
                                    [face.right(), face.top()],
                                    [face.right(), face.bottom()],
                                    [face.left(), face.bottom()]], dtype=np.float32).reshape(-1, 1, 2)
    else:
        bbox_points = new_points

    # Get the minimum enclosing rectangle for the bounding box points
    x, y, w, h = cv2.boundingRect(bbox_points.astype(np.int32))

    # Draw the bounding box
    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

    # Write the frame to the video file
    out.write(frame)

    # Display the video
    cv2.imshow("Tracking", frame)
    old_gray = gray_frame

    # Exit on 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
out.release()
cv2.destroyAllWindows()


---
## Using the Method with Tracking Failure

The first method that redetect the frame every 3 frame is either inefficient to detect if the face are still in the same place, and goes worse if the movement face is suddently and the bounding box is offsetted since the frame is differ (we don't have some temp value for the bounding box).

One suggest to using the facial tracking with tracking failure so if the value goes below, we can apply the dlib to ensure the box stay in the face level, it also use the Lucas Canade Optical Flow to check whether the status face detection is valid (1) or not (0) before perfoming the re-detection

### Current Codebase

Firstly, we setup the initial detection with dlib for our initial bounding box.

Next, we use the Lucas Canade Optical Flow to check the next frame is still on the face level for our tracking
```python
# Use optical flow to track the bounding box corners
new_points, status, _ = cv2.calcOpticalFlowPyrLK(old_gray, gray_frame, bbox_points, None, **lk_params)
```
This method returns our new_points and the status value, if our status is 1 tracking is success and 0 for failure, we can use this status value as a condition for doing the re-detection
```python
# Check if tracking fails
if np.sum(status) < len(bbox_points) * 0.5:  # Re-detect if less than 50% points are tracked
    faces = detector(gray_frame)
    if len(faces) > 0:
        face = faces[0]
        bbox_points = np.array([[face.left(), face.top()],
                                [face.right(), face.top()],
                                [face.right(), face.bottom()],
                                [face.left(), face.bottom()]], dtype=np.float32).reshape(-1, 1, 2)
else:
    bbox_points = new_points
```
Lastly, we can draw the bounding box.

In terms from the Code from the Face Tracking and this, this code was more superior, the ability to dynamically doing the re-detection compare to every 3 frames re-detection is much more efficient and tend to less-error prone. with 3 frames re-detection (When the object is suddenly move, it lost the ground truth and the box was suddenly offset and distrupted).

Or else you can use the dlib.correlation_tracker() for object tracking which are more light and fast compare to optical flow

In [7]:
import dlib
import cv2
import numpy as np
import os

# Initialize dlib's face detector
detector = dlib.get_frontal_face_detector()

# Initialize video capture
VID_PATH = os.path.join(os.getcwd(), 'attachment', 'sample-renamed.mp4')

cap = cv2.VideoCapture(VID_PATH)

# Get video dimensions
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
out = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc(*'XVID'), 30.0, (frame_width, frame_height))

# Parameters for Lucas-Kanade Optical Flow
lk_params = dict(winSize=(15, 15), maxLevel=2,
                 criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

# Process the first frame
ret, frame = cap.read()
gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

# Initial detection
faces = detector(gray_frame)

## If face is detected, make a bounding box, else exit
if len(faces) > 0:
    face = faces[0]
    bbox_points = np.array([[face.left(), face.top()],
                            [face.right(), face.top()],
                            [face.right(), face.bottom()],
                            [face.left(), face.bottom()]], dtype=np.float32).reshape(-1, 1, 2)
else:
    print("No faces detected!")
    cap.release()
    out.release()
    exit()

old_gray = gray_frame

# Main loop
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Use optical flow to track the bounding box corners
    new_points, status, _ = cv2.calcOpticalFlowPyrLK(old_gray, gray_frame, bbox_points, None, **lk_params)

    # Check if tracking fails
    if np.sum(status) < len(bbox_points) * 0.5:  # Re-detect if less than 50% points are tracked
        faces = detector(gray_frame)
        if len(faces) > 0:
            face = faces[0]
            bbox_points = np.array([[face.left(), face.top()],
                                    [face.right(), face.top()],
                                    [face.right(), face.bottom()],
                                    [face.left(), face.bottom()]], dtype=np.float32).reshape(-1, 1, 2)
    else:
        bbox_points = new_points

    # Get the minimum enclosing rectangle for the bounding box points
    x, y, w, h = cv2.boundingRect(bbox_points.astype(np.int32))

    # Draw the bounding box
    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

    # Write the frame to the video file
    out.write(frame)

    # Display the video
    cv2.imshow("Tracking", frame)
    old_gray = gray_frame

    # Exit on 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
out.release()
cv2.destroyAllWindows()


### Alternative Method for Tracking
Or you can use the dlib.correlation_tracker(), a built in method for tracking object (for this case face), it was much lighter and faster compare to the optical flow, but I guess this is was an idea.

In [4]:
import cv2
import dlib

# Initialize video capture and dlib components
VID_PATH = os.path.join(os.getcwd(), 'attachment', 'sample-renamed.mp4')

cap = cv2.VideoCapture(VID_PATH)
detector = dlib.get_frontal_face_detector()
tracker = dlib.correlation_tracker()

initialized = False  # To check if tracking has started

while True:
    ret, frame = cap.read()
    if not ret:
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    if not initialized:
        # Detect the face
        faces = detector(gray)
        if len(faces) > 0:
            # Initialize the tracker with the first detected face
            tracker.start_track(frame, dlib.rectangle(
                faces[0].left(), faces[0].top(),
                faces[0].right(), faces[0].bottom()
            ))
            initialized = True
    else:
        # Update the tracker
        tracker.update(frame)
        pos = tracker.get_position()

        # Get the coordinates of the tracked region
        x1, y1, x2, y2 = int(pos.left()), int(pos.top()), int(pos.right()), int(pos.bottom())
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

    cv2.imshow("Tracking", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


---

Ref: 
- [1](https://pyimagesearch.com/2017/04/03/facial-landmarks-dlib-opencv-python/)
- [2](https://www.youtube.com/watch?v=6wMoHgpVUn8)
- [3](https://chatgpt.com/share/6745671f-2b68-800c-a771-1aa674644e83)
- [4](https://chatgpt.com/c/67466eb4-1f78-800c-a440-426e349d7dc7)