In [4]:
!pip install tqdm

Collecting tqdm
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
Installing collected packages: tqdm
Successfully installed tqdm-4.67.1


In [9]:
import cv2
import mediapipe as mp
import time
from tqdm import tqdm  # For progress bar
import pandas as pd

landmarks2 = []
count = 0

# Initialize MediaPipe
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5)

# Function to calculate face direction
def calculate_face_direction(nose, left_eye, right_eye, left_ear, right_ear):
    # Calculate midpoint of eyes
    eyes_midpoint = ((left_eye[0] + right_eye[0]) / 2, (left_eye[1] + right_eye[1]) / 2)
    
    # Calculate eye distance to set tolerance
    eye_distance = abs(right_eye[0] - left_eye[0])
    tolerance = eye_distance * 0.5  # 10% of eye distance as tolerance
    
    # Calculate horizontal direction (yaw)
    nose_offset = nose[0] - eyes_midpoint[0]
    
    # Use tolerance zone for forward position
    if abs(nose_offset) <= tolerance:
        yaw = "forward"
    elif nose_offset < -tolerance:
        yaw = "left"
    else:
        yaw = "right"

    # Calculate vertical direction (pitch)
    if nose[1] < eyes_midpoint[1]:
        pitch = "up"
    elif nose[1] > eyes_midpoint[1]:
        pitch = "down"
    else:
        pitch = "forward"

    return yaw, pitch
 

def process_video(input_path, output_path):
    # Initialize video capture and writer
    global landmarks2
    landmarks2 = []  # Reset landmarks list
    frame_number = 0  # Initialize frame counter

    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        raise ValueError(f"Could not open video file: {input_path}")
    
    # Get video properties
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    # Process frames with progress bar
    while True:
        ret, frame = cap.read()
        if not ret:
            break
            
        # Process frame
        frame_number += 1  # Increment frame counter
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        pose_results = pose.process(rgb_frame)
        
        if pose_results.pose_landmarks:
            landmarks = pose_results.pose_landmarks.landmark
            nose = (landmarks[mp_pose.PoseLandmark.NOSE.value].x, landmarks[mp_pose.PoseLandmark.NOSE.value].y)
            left_eye = (landmarks[mp_pose.PoseLandmark.LEFT_EYE.value].x, landmarks[mp_pose.PoseLandmark.LEFT_EYE.value].y)
            right_eye = (landmarks[mp_pose.PoseLandmark.RIGHT_EYE.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_EYE.value].y)
            left_ear = (landmarks[mp_pose.PoseLandmark.LEFT_EAR.value].x, landmarks[mp_pose.PoseLandmark.LEFT_EAR.value].y)
            right_ear = (landmarks[mp_pose.PoseLandmark.RIGHT_EAR.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_EAR.value].y)
            
            yaw, pitch = calculate_face_direction(nose, left_eye, right_eye, left_ear, right_ear)
            if yaw != "forward":
                extracted_landmarks = [(lm.x, lm.y, lm.z, lm.visibility) for lm in landmarks]  # Extract numerical data
                landmarks2.append([extracted_landmarks, frame_number])  # Appending two columns

            # if(yaw!="forward"):
            #     print(landmarks)
                
            #     landmarks2.append([landmarks, frame_number])  # Each frame gets its own entry
            #     out.write(frame)
        
        
        # Display frame (press 'q' to quit early)
        #cv2.imshow('Processing', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    # Cleanup
    cap.release()
    out.release()
    cv2.destroyAllWindows()
    return landmarks2

# Usage
input_video = "trimmedVideo/frontView-1.mp4"  # Replace with your video path
output_video = "frontView-Leftright.mp4"
landmarks = process_video(input_video, output_video)
#print(landmarks)


In [None]:
import pandas as pd

# Define column names (including frame_number)
columns = ['frame_number']
for i in range(33):  # 33 landmarks (0 to 32)
    columns.extend([f'{i}_x', f'{i}_y', f'{i}_z', f'{i}_visibility'])

# Convert landmarks2 list to a DataFrame
structured_data = []
for landmarks, frame_number in landmarks:
    # Flatten landmark data into a single row
    row = [frame_number]  # Start with frame number
    for lm in landmarks:  # lm is (x, y, z, visibility)
        row.extend(lm)
    structured_data.append(row)

# Create DataFrame
landmarks_df = pd.DataFrame(structured_data, columns=columns)

landmarks_df.to_csv("dataset/1-frontViewTrimmed_frameNo.csv", index=False)

print(f"CSV file saved")


CSV file saved
