In [6]:
import boto3
import json
import os
def get_metadata(bucket_name, metadata_file_key, s3_client):
    try:
        metadata_object = s3_client.get_object(Bucket=bucket_name, Key=metadata_file_key)
        metadata_content = metadata_object['Body'].read().decode('utf-8')
        metadata = json.loads(metadata_content)
        return metadata
    except Exception as e:
        print(f"Error retrieving metadata from S3: {e}")
        return None


In [3]:
def list_eye_gaze_images(bucket_name, eye_gaze_images_folder, s3_client):
    try:
        response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=eye_gaze_images_folder)
        image_paths = [item['Key'] for item in response.get('Contents', [])]
        return image_paths
    except Exception as e:
        print(f"Error listing eye gaze images from S3: {e}")
        return []


In [4]:
def get_eye_gaze_data(bucket_name, csv_file_key, s3_client):
    try:
        csv_object = s3_client.get_object(Bucket=bucket_name, Key=csv_file_key)
        csv_content = csv_object['Body'].read().decode('utf-8')
        return csv_content
    except Exception as e:
        print(f"Error retrieving eye gaze CSV from S3: {e}")
        return None


In [5]:
# Initialize the boto3 S3 client
s3_client = boto3.client('s3')

# Define your bucket name and file/folder keys
bucket_name = 'eye-gaze-data'
metadata_file_key = 'data/test/metadata.json'
eye_gaze_images_folder = 'data/test/eye_gaze_images/'
csv_file_key = 'data/test/eye_gaze_data.csv'

# Retrieve metadata
metadata = get_metadata(bucket_name, metadata_file_key, s3_client)
print("Metadata:", metadata)

# Retrieve eye gaze image paths
eye_gaze_image_paths = list_eye_gaze_images(bucket_name, eye_gaze_images_folder, s3_client)
print("Eye Gaze Image Paths:", eye_gaze_image_paths)

# Retrieve eye gaze CSV data
eye_gaze_csv_data = get_eye_gaze_data(bucket_name, csv_file_key, s3_client)
print("Eye Gaze CSV Data:", eye_gaze_csv_data)


Metadata: {'screenData': {'screenWidth': 1707, 'screenHeight': 960, 'devicePixelRatio': 1.5}, 'cameraInfo': [[[560, 0, 320], [0, 560, 240], [0, 0, 1]], [0, 0, 0, 0, 0]]}
Eye Gaze Image Paths: ['data/test/eye_gaze_images/test_1630a44c-f4e9-471a-84a6-2a09290dc1e6.png', 'data/test/eye_gaze_images/test_1a12493e-555f-489c-81af-24594304d193.png', 'data/test/eye_gaze_images/test_271aa562-c66d-4843-a8a4-f48ed9e7ca0f.png', 'data/test/eye_gaze_images/test_2b8fc677-37df-426c-8ab7-cbe3c3d4bf38.png', 'data/test/eye_gaze_images/test_6d47b255-3db7-4ef9-98f7-fc0e354151f1.png', 'data/test/eye_gaze_images/test_6e1d913c-afb9-4b9b-bdc6-b96b1eb66ec9.png', 'data/test/eye_gaze_images/test_8e398e94-4534-47bb-89ab-094c648b36ec.png', 'data/test/eye_gaze_images/test_a5ae277a-73c2-4cba-b49e-141d474b8f7d.png', 'data/test/eye_gaze_images/test_ba22109f-025a-41ed-9669-ab357da4f12d.png', 'data/test/eye_gaze_images/test_c7856637-9789-4f75-b56d-41cb51434e08.png', 'data/test/eye_gaze_images/test_f1ea7c01-3b27-460d-b638-5

In [None]:
def flag_for_processing(key, needs_processing_dir):
    # Here you would implement logic to flag the directory for processing.
    # This could involve appending the key to a list in a file.
    flag_file_path = os.path.join(needs_processing_dir, 'to_process.txt')
    with open(flag_file_path, 'a') as file:
        file.write(key + '\n')
    print(f"Directory flagged for processing: {key}")

def download_data(key, processed_dir, s3_client):
    # Assuming key is a prefix to all files in a directory you want to download
    response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=key)
    
    for obj in response.get('Contents', []):
        file_key = obj['Key']
        file_name = file_key.split('/')[-1]  # Extract the file name
        local_file_path = os.path.join(processed_dir, file_name)
        
        # Download the file
        s3_client.download_file(bucket_name, file_key, local_file_path)
        print(f"Downloaded {file_name} to {local_file_path}")




In [None]:

def process_s3_bucket_data(bucket_name, processed_dir, needs_processing_dir):
    s3_client = boto3.client('s3')
    
    # Get a list of all objects in the bucket
    bucket_contents = s3_client.list_objects_v2(Bucket=bucket_name)

    # Go through each object
    for content in bucket_contents.get('Contents', []):
        key = content['Key']
        if key.endswith('/'):  # This is a directory
            # Check if it's a new or old data directory
            if 'metadata.json' in key:
                metadata = get_metadata(bucket_name, key, s3_client)
                if 'cameraInfo' in metadata:
                    # This is a new data directory, flag for processing
                    flag_for_processing(key, needs_processing_dir)
                else:
                    # This is an old data directory, download as is
                    download_data(key, processed_dir, s3_client)
                    



In [None]:
# Example usage:
bucket_name = 'your-bucket-name'
processed_dir = './data/processed'
needs_processing_dir = './data/needs_processing'

process_s3_bucket_data(bucket_name, processed_dir, needs_processing_dir)