In [114]:
import boto3
import json
import os
from backend.image_processing import *
import dlib
import cv2
import pandas as pd
from pathlib import Path
import pandas as pd
import numpy as np
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('./backend/shape_predictor_68_face_landmarks.dat')

def get_metadata(bucket_name, metadata_file_key, s3_client):
    try:
        metadata_object = s3_client.get_object(Bucket=bucket_name, Key=metadata_file_key)
        metadata_content = metadata_object['Body'].read().decode('utf-8')
        metadata = json.loads(metadata_content)
        return metadata
    except Exception as e:
        print(f"Error retrieving metadata from S3: {e}")
        return None


In [115]:

# Your existing get_camera_info function
def get_camera_info(metadata):
    camera_matrix = np.array(metadata['cameraInfo'][0], dtype='double')
    dist_coeffs = np.array(metadata['cameraInfo'][1], dtype='double')
    return camera_matrix, dist_coeffs


In [116]:
def pre_process_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    dlib_faces = detector(gray)

    processed_data = []

    for dlib_face in dlib_faces:
        shape = predictor(gray, dlib_face)

        for (i, (start, end)) in enumerate([(36,42), (42,48)]):
            eye_image, (eye_min_x, eye_min_y, eye_max_x, eye_max_y) = extract_eye_region(gray, shape, range(start, end))
            eye_image_b = convert_eye_to_binary(eye_image)
            pupil_center, _ = detect_pupil(eye_image_b)

            if pupil_center:
                pupil_center_global = (pupil_center[0] + eye_min_x, pupil_center[1] + eye_min_y)
                pupil_center_global = tuple(pc.item() if isinstance(pc, np.generic) else pc for pc in pupil_center_global)
                bounding_box = (eye_min_x, eye_min_y, eye_max_x - eye_min_x, eye_max_y - eye_min_y)
                bounding_box = tuple(bb.item() if isinstance(bb, np.generic) else bb for bb in bounding_box)


                eye_data = {
                    'eye_position': 'left' if i == 0 else 'right',
                    'pupil_center': pupil_center_global,
                    'bounding_box': bounding_box
                }
                processed_data.append(eye_data)

                left_eye_info = None
                right_eye_info = None
                left_eye_bbox = None
                right_eye_bbox = None

                for eye_data in processed_data:
                    if eye_data['eye_position'] == 'left':
                        left_eye_info = eye_data['pupil_center']
                        left_eye_bbox = eye_data['bounding_box']
                    else:
                        right_eye_info = eye_data['pupil_center']
                        right_eye_bbox = eye_data['bounding_box']
        break
    return processed_data, left_eye_info, right_eye_info, left_eye_bbox, right_eye_bbox, shape

In [117]:
def format_calibration_data_row( left_eye_info, right_eye_info, left_eye_bbox, right_eye_bbox, head_pose):
    rotation_vector, translation_vector = head_pose if head_pose else (np.zeros((3, 1)), np.zeros((3, 1)))

    rotation_vector_str = ','.join(map(str, rotation_vector.flatten()))
    translation_vector_str = ','.join(map(str, translation_vector.flatten()))

    data_row = [
        left_eye_info[0], left_eye_info[1],
        *left_eye_bbox,
        right_eye_info[0], right_eye_info[1],
        *right_eye_bbox,
        rotation_vector_str, translation_vector_str
    ]
    return data_row

def format_eye_gaze_data_row(left_eye_info, right_eye_info, left_eye_bbox, right_eye_bbox, head_pose):
    rotation_vector, translation_vector = head_pose if head_pose else (np.zeros((3, 1)), np.zeros((3, 1)))

    rotation_vector_str = ','.join(map(str, rotation_vector.flatten()))
    translation_vector_str = ','.join(map(str, translation_vector.flatten()))

    data_row = [
        left_eye_info[0], left_eye_info[1],
        *left_eye_bbox,
        right_eye_info[0], right_eye_info[1],
        *right_eye_bbox,
        rotation_vector_str, translation_vector_str
    ]
    return data_row

In [118]:

def update_csv(csv_path, data_dict, num_existing_columns):
    # Load the existing CSV data, handling variable number of columns
    try:
        existing_data = pd.read_csv(csv_path, header=None)
    except pd.errors.EmptyDataError:
        # Create a new DataFrame if the CSV is empty or doesn't exist
        existing_data = pd.DataFrame()

    # If the existing data has fewer columns than expected, pad with NaNs
    if existing_data.shape[1] < num_existing_columns:
        existing_data = existing_data.reindex(columns=range(num_existing_columns), fill_value=np.nan)
    
    # Iterate over the data dictionary and update/add rows
    for image_name, new_data in data_dict.items():
        # Find the row index if the image name already exists in the CSV
        row_index = existing_data[existing_data[0] == image_name].index
        if row_index.empty:
            # If the image name does not exist, append a new row
            new_row = [image_name] + new_data
            existing_data.loc[len(existing_data)] = new_row
        else:
            # Update the existing row with new data
            existing_data.loc[row_index[0], 1:] = new_data

    # Write the updated data back to the CSV file
    existing_data.to_csv(csv_path, index=False, header=False)


In [119]:
def process_image_and_update_csv(image_path, csv_path, camera_matrix, dist_coeffs, num_existing_columns, formatting_function):
    image = cv2.imread(image_path)
    processed_data, left_eye_info, right_eye_info, left_eye_bbox, right_eye_bbox, shape = pre_process_image(image)
    head_pose = get_head_pose(shape, camera_matrix, dist_coeffs)  # Assuming get_head_pose is defined

    # Format data row
    row = formatting_function(left_eye_info, right_eye_info, left_eye_bbox, right_eye_bbox, head_pose)
    
    # Update CSV
    data_dict = {Path(image_path).name: row}
    update_csv(csv_path, data_dict, num_existing_columns)


In [120]:
def process_calibration_images(calibration_image_paths, calibration_csv_path, camera_matrix, dist_coeffs, num_existing_columns_calibration):
    for image_path in calibration_image_paths:
        if os.path.isfile(image_path):
            process_image_and_update_csv(image_path, calibration_csv_path, camera_matrix, dist_coeffs, num_existing_columns_calibration, format_calibration_data_row)

def process_eye_gaze_images(eye_gaze_image_paths, eye_gaze_csv_path, camera_matrix, dist_coeffs, num_existing_columns_eye_gaze):
    for image_path in eye_gaze_image_paths:
        if os.path.isfile(image_path):
            process_image_and_update_csv(image_path, eye_gaze_csv_path, camera_matrix, dist_coeffs, num_existing_columns_eye_gaze, format_eye_gaze_data_row)


In [121]:
def flag_for_processing(key, needs_processing_dir):
    # Here you would implement logic to flag the directory for processing.
    # This could involve appending the key to a list in a file.
    flag_file_path = os.path.join(needs_processing_dir, 'to_process.txt')
    with open(flag_file_path, 'a') as file:
        file.write(key + '\n')
    print(f"Directory flagged for processing: {key}")


def download_and_process_data(key_prefix, local_base_dir, s3_client, bucket_name, metadata):
    calibration_images = []
    eye_gaze_images = []

    is_calibration_data = "calibration" in key_prefix
    camera_matrix, dist_coeffs = get_camera_info(metadata)

    paginator = s3_client.get_paginator('list_objects_v2')
    for page in paginator.paginate(Bucket=bucket_name, Prefix=key_prefix):
        for obj in page.get('Contents', []):
            s3_object_key = obj['Key']
            if s3_object_key.endswith('/'):
                continue
            local_file_path = os.path.join(local_base_dir, s3_object_key)
            os.makedirs(os.path.dirname(local_file_path), exist_ok=True)

            try:
                local_file_info = os.stat(local_file_path)
                s3_object_info = s3_client.head_object(Bucket=bucket_name, Key=s3_object_key)
                if local_file_info.st_size == s3_object_info['ContentLength']:
                    print(f"File {s3_object_key} already exists locally and is up to date. Skipping download.")
                    continue
                s3_client.download_file(bucket_name, s3_object_key, local_file_path)
                print(f"Downloaded {s3_object_key} to {local_file_path}")

                if is_calibration_data:
                    calibration_images.append(local_file_path)
                else:
                    eye_gaze_images.append(local_file_path)
            except (FileNotFoundError):
                pass

    if is_calibration_data:
        num_existing_columns_calibration = 3
        calibration_csv_path = os.path.join(local_base_dir, "calibration_data.csv")
        process_calibration_images(calibration_images, calibration_csv_path, camera_matrix, dist_coeffs, num_existing_columns_calibration)
    else:
        num_existing_columns_eye_gaze = 3
        eye_gaze_csv_path = os.path.join(local_base_dir, "eye_gaze_data.csv")
        process_eye_gaze_images(eye_gaze_images, eye_gaze_csv_path, camera_matrix, dist_coeffs, num_existing_columns_eye_gaze)


In [122]:
def process_s3_bucket_data(bucket_name):
    s3_client = boto3.client('s3')
    paginator = s3_client.get_paginator('list_objects_v2')
    print(f"Retrieving all S3 objects from bucket: {bucket_name}")
    page_iterator = paginator.paginate(Bucket=bucket_name)

    for page in page_iterator:
        for content in page.get('Contents', []):
            key = content['Key']
            if key.endswith('/metadata.json'):
                metadata = get_metadata(bucket_name, key, s3_client)
                if metadata and 'cameraInfo' in metadata:
                    data_directory = '/'.join(key.split('/')[:-1]) + '/'
                    download_and_process_data(data_directory, './data', s3_client, bucket_name, metadata)
            else:
                print(f"Processing key: {key}")

In [123]:
# Main execution
bucket_name = 'eye-gaze-data'
process_s3_bucket_data(bucket_name)

Retrieving all S3 objects from bucket: eye-gaze-data
Processing key: data/Hossein/calibration_data.csv
Processing key: data/Hossein/calibration_images/Hossein_04abe5a2-0e23-4e8c-87a5-5dbb8e48ba55.png
Processing key: data/Hossein/calibration_images/Hossein_08cb8659-ee4e-44d9-9547-6260ac36c5ce.png
Processing key: data/Hossein/calibration_images/Hossein_0b8c9fa8-7184-44ed-bc15-af08563f6e17.png
Processing key: data/Hossein/calibration_images/Hossein_0eef3086-8737-4426-b427-2069c3c38e22.png
Processing key: data/Hossein/calibration_images/Hossein_19561d89-1c54-4f78-91d2-41ae8405974c.png
Processing key: data/Hossein/calibration_images/Hossein_24eaef3e-24f3-46df-8738-19cb73a20327.png
Processing key: data/Hossein/calibration_images/Hossein_2990d0e8-73be-49b6-a63d-5800f4842e6d.png
Processing key: data/Hossein/calibration_images/Hossein_31e81440-74de-4433-a639-2afd728022ca.png
Processing key: data/Hossein/calibration_images/Hossein_33e932e7-305d-40e6-bc77-7162a24916f5.png
Processing key: data/Hos

# Process un-processed images

In [71]:


# Process all subdirectories in needs_processing
def process_all_subdirectories(needs_processing_dir):
    for subdir_name in os.listdir(needs_processing_dir):
        subdir_path = os.path.join(needs_processing_dir, subdir_name)
        if os.path.isdir(subdir_path):
            # Assuming that metadata.json is directly under each subdirectory
            metadata_file_path = os.path.join(subdir_path, 'metadata.json')
            camera_matrix, dist_coeffs = get_camera_info(metadata_file_path)
            process_directory(subdir_path, camera_matrix, dist_coeffs)


In [72]:
needs_processing_dir = './data/needs_processing/data/'
process_all_subdirectories(needs_processing_dir)

ValueError: Must have equal len keys and value when setting with an iterable