In [7]:
import os
import cv2
import mediapipe as mp
import json

# Initialize MediaPipe FaceMesh and Hands
mp_face_mesh = mp.solutions.face_mesh
mp_hands = mp.solutions.hands
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, min_detection_confidence=0.5)
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5)

# Define input and output directories
input_dir = "/scratch2/bsow/Documents/ACSR/data/handshapes/images"
output_dir = "/scratch2/bsow/Documents/ACSR/data/handshapes/coordinates"

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Process each image and extract hand landmarks, nose coordinates, and additional face information
for i in range(1, 9):  # Loop through handshape_1 to handshape_8
    image_path = os.path.join(input_dir, f"handshape_{i}.jpg")
    output_path = os.path.join(output_dir, f"handshape_{i}.json")

    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Could not load image {image_path}")
        continue

    # Convert the image to RGB
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Process the image with MediaPipe FaceMesh to get nose coordinates and face bounding box
    face_results = face_mesh.process(rgb_image)
    nose_landmarks = None
    face_bbox = None
    eye_distance = None

    if face_results.multi_face_landmarks:
        for face_landmarks in face_results.multi_face_landmarks:
            # Extract nose landmark (landmark 1)
            nose_landmarks = [
                face_landmarks.landmark[1].x,
                face_landmarks.landmark[1].y,
                face_landmarks.landmark[1].z,
            ]

            # save landmarks 227 (left), 454 (right), 10 (top), 159 (-)
            # Extract all x and y coordinates of the face landmarks
            x_coords = [landmark.x for landmark in face_landmarks.landmark]
            y_coords = [landmark.y for landmark in face_landmarks.landmark]

            # Calculate the bounding box of the face
            face_bbox = {
                "x_min": min(x_coords),
                "x_max": max(x_coords),
                "y_min": min(y_coords),
                "y_max": max(y_coords),
            }

            # Calculate the distance between the eyes (landmarks 33 and 263)
            left_eye = face_landmarks.landmark[33]
            right_eye = face_landmarks.landmark[263]
            eye_distance = ((left_eye.x - right_eye.x) ** 2 + (left_eye.y - right_eye.y) ** 2) ** 0.5
            break

    # Process the image with MediaPipe Hands to get hand landmarks
    hand_results = hands.process(rgb_image)
    hand_landmarks = None

    if hand_results.multi_hand_landmarks:
        for hand_landmarks in hand_results.multi_hand_landmarks:
            # Extract 3D coordinates of the hand landmarks
            landmarks = []
            for landmark in hand_landmarks.landmark:
                landmarks.append([landmark.x, landmark.y, landmark.z])
            hand_landmarks = landmarks
            break

    # Save the nose, hand landmarks, face bounding box, and eye distance to a JSON file
    if nose_landmarks and hand_landmarks and face_bbox and eye_distance:
        data = {
            "nose_landmarks": nose_landmarks,
            "hand_landmarks": hand_landmarks,
            "face_bbox": face_bbox,
            "eye_distance": eye_distance,
        }
        with open(output_path, "w") as f:
            json.dump(data, f)
        print(f"Saved landmarks and additional information for handshape_{i} to {output_path}")
    else:
        print(f"No face or hand detected in {image_path}")

Saved landmarks and additional information for handshape_1 to /scratch2/bsow/Documents/ACSR/data/handshapes/coordinates/handshape_1.json
Saved landmarks and additional information for handshape_2 to /scratch2/bsow/Documents/ACSR/data/handshapes/coordinates/handshape_2.json
Saved landmarks and additional information for handshape_3 to /scratch2/bsow/Documents/ACSR/data/handshapes/coordinates/handshape_3.json
Saved landmarks and additional information for handshape_4 to /scratch2/bsow/Documents/ACSR/data/handshapes/coordinates/handshape_4.json
Saved landmarks and additional information for handshape_5 to /scratch2/bsow/Documents/ACSR/data/handshapes/coordinates/handshape_5.json
Saved landmarks and additional information for handshape_6 to /scratch2/bsow/Documents/ACSR/data/handshapes/coordinates/handshape_6.json
Saved landmarks and additional information for handshape_7 to /scratch2/bsow/Documents/ACSR/data/handshapes/coordinates/handshape_7.json
Saved landmarks and additional informatio

In [10]:
import cv2
import mediapipe as mp
import numpy as np
import os

def process_all_videos(root_dir):
    # Path configuration
    mp4_base = os.path.join(root_dir, "mp4")
    output_base = os.path.join(root_dir, "lip_rois_mp4")
    
    # Create output directory if it doesn't exist
    os.makedirs(output_base, exist_ok=True)

    # Initialize MediaPipe FaceMesh
    mp_face_mesh = mp.solutions.face_mesh
    face_mesh = mp_face_mesh.FaceMesh(
        static_image_mode=False,
        max_num_faces=1,
        refine_landmarks=True,
        min_detection_confidence=0.5
    )

    # Define lip landmark indices
    LIP_LANDMARKS = [61, 78, 95, 88, 87, 14, 317, 402, 324, 308, 
                    0, 267, 269, 270, 409, 40, 37, 39, 40, 185, 
                    17, 314, 405, 321, 375, 291, 84, 181, 91, 146, 
                    80, 81, 82, 13, 312, 311, 319, 308]

    # Walk through directory structure
    for root, dirs, files in os.walk(mp4_base):
        for file in files:
            if file.endswith(".mp4"):
                # Construct paths
                input_path = os.path.join(root, file)
                relative_path = os.path.relpath(root, mp4_base)
                output_dir = os.path.join(output_base, relative_path, os.path.splitext(file)[0])
                
                # Create output directory
                os.makedirs(output_dir, exist_ok=True)
                
                # Process video
                print(f"Processing: {input_path}")
                process_video(input_path, output_dir, face_mesh, LIP_LANDMARKS)

    face_mesh.close()

def process_video(input_path, output_dir, face_mesh, lip_landmarks, padding=0.15):
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        print(f"Error opening video: {input_path}")
        return

    # Get video properties
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_name = os.path.splitext(os.path.basename(input_path))[0]
    
    prev_bbox = None
    frame_number = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Process frame
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_mesh.process(rgb_frame)
        
        if results.multi_face_landmarks:
            landmarks = results.multi_face_landmarks[0].landmark
            lip_points = np.array([(landmarks[i].x * width, landmarks[i].y * height) 
                                 for i in lip_landmarks], dtype=np.int32)

            # Calculate bounding box with padding
            min_x, min_y = np.min(lip_points, axis=0)
            max_x, max_y = np.max(lip_points, axis=0)
            
            # Add padding and clamp to image boundaries
            w = max_x - min_x
            h = max_y - min_y
            min_x = max(0, int(min_x - w * padding))
            min_y = max(0, int(min_y - h * padding))
            max_x = min(width, int(max_x + w * padding))
            max_y = min(height, int(max_y + h * padding))
            
            prev_bbox = (min_x, min_y, max_x, max_y)
        else:
            if prev_bbox is None:
                frame_number += 1
                continue
            min_x, min_y, max_x, max_y = prev_bbox

        # Crop and save image
        lip_roi = frame[min_y:max_y, min_x:max_x]
        if lip_roi.size > 0:
            output_filename = f"{video_name}_lips_{frame_number:04d}.png"
            output_path = os.path.join(output_dir, output_filename)
            cv2.imwrite(output_path, lip_roi)

        frame_number += 1

    cap.release()
    print(f"Saved {frame_number} frames to {output_dir}")

# Run the processing
root_directory = "/scratch2/bsow/Documents/ACSR/data/training_videos/CSF22_train"
process_all_videos(root_directory)

Processing: /scratch2/bsow/Documents/ACSR/data/training_videos/CSF22_train/mp4/csf001/csf001.mp4
Saved 293 frames to /scratch2/bsow/Documents/ACSR/data/training_videos/CSF22_train/lip_rois_mp4/csf001/csf001
Processing: /scratch2/bsow/Documents/ACSR/data/training_videos/CSF22_train/mp4/csf002/csf002.mp4
Saved 294 frames to /scratch2/bsow/Documents/ACSR/data/training_videos/CSF22_train/lip_rois_mp4/csf002/csf002
Processing: /scratch2/bsow/Documents/ACSR/data/training_videos/CSF22_train/mp4/csf007/csf007.mp4
Saved 296 frames to /scratch2/bsow/Documents/ACSR/data/training_videos/CSF22_train/lip_rois_mp4/csf007/csf007
Processing: /scratch2/bsow/Documents/ACSR/data/training_videos/CSF22_train/mp4/csf003/csf003.mp4
Saved 294 frames to /scratch2/bsow/Documents/ACSR/data/training_videos/CSF22_train/lip_rois_mp4/csf003/csf003
Processing: /scratch2/bsow/Documents/ACSR/data/training_videos/CSF22_train/mp4/csf004/csf004.mp4
Saved 296 frames to /scratch2/bsow/Documents/ACSR/data/training_videos/CSF2

In [1]:
import os
import cv2
import numpy as np
import pandas as pd

def get_yellow_pixel_coordinates(image_path):
    """
    Identify yellow pixels in the given image.
    Args:
        image_path (str): Path to the input image.
    Returns:
        np.ndarray: Array of yellow pixel coordinates.
    """
    # Load the image
    image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
    
    # Convert to HSV color space
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    # Define the range for yellow color in HSV
    lower_yellow = np.array([20, 100, 100])  # Lower bound for yellow
    upper_yellow = np.array([30, 255, 255])  # Upper bound for yellow
    
    # Create a mask for yellow pixels
    yellow_mask = cv2.inRange(hsv_image, lower_yellow, upper_yellow)
    
    # Find the coordinates of yellow pixels
    yellow_pixels = np.column_stack(np.where(yellow_mask > 0))
    
    return yellow_pixels

def save_yellow_pixels_to_csv(hand_images_dir, output_csv):
    """
    Process all hand images in the directory, identify one yellow pixel per image,
    and save the results to a CSV file.
    Args:
        hand_images_dir (str): Directory containing hand images.
        output_csv (str): Path to save the CSV file.
    """
    # List to store results
    results = []
    
    # Loop through all hand images in the directory
    for hand_image_name in os.listdir(hand_images_dir):
        if hand_image_name.endswith(".png"):  # Process only PNG files
            hand_image_path = os.path.join(hand_images_dir, hand_image_name)
            
            # Get yellow pixel coordinates
            yellow_pixels = get_yellow_pixel_coordinates(hand_image_path)
            
            if len(yellow_pixels) > 0:
                # Select the first yellow pixel as the representative
                representative_pixel = yellow_pixels[0]
                
                # Append the result (image name and pixel coordinates)
                results.append({
                    "image_name": hand_image_name,
                    "yellow_pixel_x": representative_pixel[1],  # Column index
                    "yellow_pixel_y": representative_pixel[0]   # Row index
                })
            else:
                print(f"No yellow pixels found in {hand_image_name}")
    
    # Convert results to a DataFrame
    results_df = pd.DataFrame(results)
    
    # Save to CSV
    results_df.to_csv(output_csv, index=False)
    print(f"Yellow pixel coordinates saved to {output_csv}")

# Example usage
hand_images_dir = "/scratch2/bsow/Documents/ACSR/data/handshapes/hand_images"
output_csv = "/scratch2/bsow/Documents/ACSR/data/handshapes/yellow_pixels.csv"
save_yellow_pixels_to_csv(hand_images_dir, output_csv)

Yellow pixel coordinates saved to /scratch2/bsow/Documents/ACSR/data/handshapes/yellow_pixels.csv
