In [1]:
"""
This notebook shows an example of using Boto3 to query the Griot and Grits S3 bucket.
This is the PREFERRED way of getting data from the object storage using the S3 API.
It's preferred because it's not tied to a specific object storage offering
but instead is abstracted in case we switch underlying object storage technology
in the future
"""

!pip install boto3

import os
import boto3

AWS_S3_ENDPOINT = os.getenv("AWS_S3_ENDPOINT")
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_S3_BUCKET = os.getenv("AWS_S3_BUCKET")


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
# Function to recursively list objects
def list_objects_recursively(bucket_name, prefix=""):
    paginator = s3_client.get_paginator('list_objects_v2')
    for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix):
        for obj in page.get('Contents', []):
            print(obj['Key'])

In [3]:
# Initialize the boto3 client with the endpoint URL
s3_client = boto3.client(
    's3', 
    aws_access_key_id=AWS_ACCESS_KEY_ID, 
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    endpoint_url='https://' + AWS_S3_ENDPOINT,
    region_name='us-east-1'
)

response = s3_client.list_buckets()
for bucket in response['Buckets']:
    print('Bucket:\n' + bucket['Name'] + '\n')
    
    print('Objects in bucket: ')
    # Call the function
    list_objects_recursively(bucket['Name'])

Bucket:
griot-grits-catalog

Objects in bucket: 
assets/raw/recordings/mcduffie-sharon-20241221-1of2.mp4
assets/raw/recordings/sample_video.mp4
