In [18]:
import boto3
import json
import os
def get_metadata(bucket_name, metadata_file_key, s3_client):
    try:
        metadata_object = s3_client.get_object(Bucket=bucket_name, Key=metadata_file_key)
        metadata_content = metadata_object['Body'].read().decode('utf-8')
        metadata = json.loads(metadata_content)
        return metadata
    except Exception as e:
        print(f"Error retrieving metadata from S3: {e}")
        return None


In [19]:
def flag_for_processing(key, needs_processing_dir):
    # Here you would implement logic to flag the directory for processing.
    # This could involve appending the key to a list in a file.
    flag_file_path = os.path.join(needs_processing_dir, 'to_process.txt')
    with open(flag_file_path, 'a') as file:
        file.write(key + '\n')
    print(f"Directory flagged for processing: {key}")

import os
from botocore.exceptions import ClientError

def download_data(key_prefix, local_base_dir, s3_client, bucket_name):
    paginator = s3_client.get_paginator('list_objects_v2')
    for page in paginator.paginate(Bucket=bucket_name, Prefix=key_prefix):
        for obj in page.get('Contents', []):
            s3_object_key = obj['Key']
            if s3_object_key.endswith('/'):
                continue
            local_file_path = os.path.join(local_base_dir, s3_object_key)
            os.makedirs(os.path.dirname(local_file_path), exist_ok=True)

            # Check if file exists and the size matches
            try:
                local_file_info = os.stat(local_file_path)
                s3_object_info = s3_client.head_object(Bucket=bucket_name, Key=s3_object_key)
                if local_file_info.st_size == s3_object_info['ContentLength']:
                    print(f"File {s3_object_key} already exists locally and is up to date. Skipping download.")
                    continue
            except (FileNotFoundError, ClientError):
                # If the file is not found locally or there's an error getting info from S3, proceed to download
                pass

            s3_client.download_file(bucket_name, s3_object_key, local_file_path)
            print(f"Downloaded {s3_object_key} to {local_file_path}")


In [23]:
def process_s3_bucket_data(bucket_name, processed_dir, needs_processing_dir):
    s3_client = boto3.client('s3')
    
    paginator = s3_client.get_paginator('list_objects_v2')
    print(f"Retrieving all S3 objects from bucket: {bucket_name}")
    page_iterator = paginator.paginate(Bucket=bucket_name)

    for page in page_iterator:
        for content in page.get('Contents', []):
            key = content['Key']
            if key.endswith('/metadata.json'):  # It's a metadata file
                metadata = get_metadata(bucket_name, key, s3_client)
                if metadata and 'cameraInfo' in metadata:
                    # New data directory, flag for processing
                    flag_for_processing('/'.join(key.split('/')[:-1]) + '/', needs_processing_dir)
                elif metadata:
                    # Old data directory, download as is
                    download_data('/'.join(key.split('/')[:-1]) + '/', processed_dir, s3_client, bucket_name)
            else:
                print(f"Processing key: {key}")

In [16]:
# Example usage:
bucket_name = 'eye-gaze-data'
processed_dir = './data/processed'
needs_processing_dir = './data/needs_processing'

# os.makedirs(processed_dir, exist_ok=True)
# os.makedirs(needs_processing_dir, exist_ok=True)

# process_s3_bucket_data(bucket_name, processed_dir, needs_processing_dir)

# Process un-processed images

In [14]:
from image_processing import * 
import dlib
import cv2
import numpy as np
import os
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')


In [7]:
def read_directories_to_process(file_path):
    with open(file_path, 'r') as file:
        directories = file.read().splitlines()
    return directories


In [8]:
def get_camera_info(metadata_file_path):
    with open(metadata_file_path, 'r') as metadata_file:
        metadata = json.load(metadata_file)
    return metadata['screenData'], metadata['cameraInfo']

In [9]:
def pre_process_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    dlib_faces = detector(gray)

    processed_data = []

    for dlib_face in dlib_faces:
        shape = predictor(gray, dlib_face)

        for (i, (start, end)) in enumerate([(36,42), (42,48)]):
            eye_image, (eye_min_x, eye_min_y, eye_max_x, eye_max_y) = extract_eye_region(gray, shape, range(start, end))
            eye_image_b = convert_eye_to_binary(eye_image)
            pupil_center, _ = detect_pupil(eye_image_b)

            if pupil_center:
                pupil_center_global = (pupil_center[0] + eye_min_x, pupil_center[1] + eye_min_y)
                pupil_center_global = tuple(pc.item() if isinstance(pc, np.generic) else pc for pc in pupil_center_global)
                bounding_box = (eye_min_x, eye_min_y, eye_max_x - eye_min_x, eye_max_y - eye_min_y)
                bounding_box = tuple(bb.item() if isinstance(bb, np.generic) else bb for bb in bounding_box)


                eye_data = {
                    'eye_position': 'left' if i == 0 else 'right',
                    'pupil_center': pupil_center_global,
                    'bounding_box': bounding_box
                }
                processed_data.append(eye_data)

                left_eye_info = None
                right_eye_info = None
                left_eye_bbox = None
                right_eye_bbox = None

                for eye_data in processed_data:
                    if eye_data['eye_position'] == 'left':
                        left_eye_info = eye_data['pupil_center']
                        left_eye_bbox = eye_data['bounding_box']
                    else:
                        right_eye_info = eye_data['pupil_center']
                        right_eye_bbox = eye_data['bounding_box']
        break
    return processed_data, left_eye_info, right_eye_info, left_eye_bbox, right_eye_bbox, shape

In [10]:
import pandas as pd

def process_images_in_directory(directory_path, camera_matrix, dist_coeffs, processed_dir):
    # Assuming the structure has subfolders for calibration and eye-gaze data
    calibration_dir = os.path.join(directory_path, 'calibration_images')
    eye_gaze_dir = os.path.join(directory_path, 'eye_gaze_images')

    processed_results = []

    for image_dir in [calibration_dir, eye_gaze_dir]:
        if os.path.exists(image_dir):
            image_files = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith('.png')]

            for image_file in image_files:
                image = cv2.imread(image_file)
                if image is not None:
                    processed_data, left_eye_info, right_eye_info, left_eye_bbox, right_eye_bbox, shape = pre_process_image(image, camera_matrix, dist_coeffs)
                    processed_results.append({
                        'image_file': image_file,
                        'processed_data': processed_data,
                        'type': 'calibration' if image_dir == calibration_dir else 'eye_gaze',
                        'left_eye_info': left_eye_info,
                        'right_eye_info': right_eye_info,
                        'left_eye_bbox': left_eye_bbox,
                        'right_eye_bbox': right_eye_bbox,
                        'shape': shape
                    })

                    # Save the processed image if needed
                    processed_image_path = os.path.join(processed_dir, os.path.relpath(image_file, directory_path))
                    os.makedirs(os.path.dirname(processed_image_path), exist_ok=True)
                    cv2.imwrite(processed_image_path, image)  # Assuming you want to save the original image

    return processed_results


In [22]:
def save_processed_data_to_csv(processed_results, processed_dir):
    for result in processed_results:
        csv_data = {
            # Extract and structure the data you want to save to CSV
            'image_file': result['image_file'],
            'eye_position': result['processed_data']['eye_position'] if 'eye_position' in result['processed_data'] else '',
            'pupil_center_x': result['processed_data']['pupil_center'][0] if 'pupil_center' in result['processed_data'] else '',
            'pupil_center_y': result['processed_data']['pupil_center'][1] if 'pupil_center' in result['processed_data'] else '',
            # Add other fields as needed
        }
        csv_file_path = os.path.join(processed_dir, os.path.basename(result['image_file']).replace('.png', '.csv'))
        df = pd.DataFrame([csv_data])
        df.to_csv(csv_file_path, index=False)

def process_directories(s3_client, bucket_name, directories, processed_dir):
    print(f"Processing {directories} directories")
    for directory in directories:
        # Handle metadata and download images
        metadata_file_path = os.path.join(directory, 'metadata.json')
        screen_data, camera_info = get_camera_info(metadata_file_path)
        camera_matrix = np.array(camera_info[0])
        dist_coeffs = np.array(camera_info[1])
        download_data(directory, processed_dir, s3_client, bucket_name)

        # Process images in the directory
        directory_path = os.path.join(processed_dir, directory)
        processed_results = process_images_in_directory(directory_path, camera_matrix, dist_coeffs, processed_dir)

        # Save processed results and d
        save_processed_data_to_csv(processed_results, processed_dir)


In [24]:
process_directories('s3', 'eye-gaze-data', read_directories_to_process(os.path.join(needs_processing_dir, 'to_process.txt')), processed_dir)

Processing ['data/test/'] directories


FileNotFoundError: [Errno 2] No such file or directory: './data/processed\\data/test/metadata.json'