In [1]:
import boto3
from typing import Tuple

In [9]:
env = "prod"

In [10]:
output_path = "s3://heroes-dl-bronze-prod/amazon/settlement_report_data"

In [11]:
def get_bucket_and_prefix(path: str) -> Tuple[str, str]:
    """
    Extracts the bucket and prefix from a given path.

    Args:
        path (str): The path containing the bucket and prefix information.

    Returns:
        Tuple[str, str]: A tuple containing the extracted bucket and prefix.

    Examples:
        >>> get_bucket_and_prefix("s3://my-bucket/my-folder/my-file.txt")
        ('my-bucket', 'my-folder/my-file.txt')
    """
    # Split the path using the '/' delimiter and retrieve the bucket and prefix parts
    parts = path.split("/")
    bucket = parts[2]
    prefix = "/".join(parts[3:])

    return bucket, prefix

In [22]:
def check_delta_table_exists(bucket_name: str, prefix: str) -> bool:
    """
    Check if a path exists in an Amazon S3 bucket.

    Args:
        bucket_name (str): The name of the S3 bucket to check.
        prefix (str): The path or prefix to check within the S3 bucket.

    Returns:
        bool: True if the path exists, False otherwise.

    Raises:
        Exception: If there is an error when validating the path.

    Example:
        >>> check_delta_table_exists('my-s3-bucket', 'path/to/check/')
        True
    """
    s3_client = boto3.client("s3")
    try:
        response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
        if "Contents" in response:
            print(f"The path '{prefix}' exists in the S3 bucket '{bucket_name}'.")
            return True

        print(f"The path '{prefix}' does not exist in the S3 bucket '{bucket_name}'.")
        return False
    except Exception as e:
        print(f"An error occurred when validating delta path: {e}")

In [26]:
bucket, prefix = get_bucket_and_prefix(output_path)
prefix = f"{prefix}/_delta_log/"

print(bucket, prefix)

heroes-dl-bronze-prod amazon/settlement_report_data/_delta_log/


In [27]:
exists = check_delta_table_exists(bucket, prefix)
print(exists)

The path 'amazon/settlement_report_data/_delta_log/' exists in the S3 bucket 'heroes-dl-bronze-prod'.
True


In [19]:
# Initialize an S3 client
s3_client = boto3.Session(profile_name=f"data-{env}").client("s3")

# Specify the bucket name and path you want to check
bucket_name = "heroes-dl-bronze-prod"
path_to_check = "amazon/settlement_report_data/_delta_log_test/"

try:
    # List objects in the bucket with a prefix matching the path
    response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=path_to_check)
    print(response)

    # Check if any objects were returned
    if "Contents" in response:
        print(f"The path '{path_to_check}' exists in the S3 bucket '{bucket_name}'.")
    else:
        print(
            f"The path '{path_to_check}' does not exist in the S3 bucket '{bucket_name}'."
        )

except Exception as e:
    print(f"An error occurred: {e}")

{'ResponseMetadata': {'RequestId': '93P26S3DTP3M79FR', 'HostId': '7iSCXIpvCTeivh0mWfWWV6GVGZkLD1ZiXj41CNBAwN9bkVlILyYI0KpSJJLJysuS0LTJybmBJyo=', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amz-id-2': '7iSCXIpvCTeivh0mWfWWV6GVGZkLD1ZiXj41CNBAwN9bkVlILyYI0KpSJJLJysuS0LTJybmBJyo=', 'x-amz-request-id': '93P26S3DTP3M79FR', 'date': 'Thu, 07 Sep 2023 08:40:34 GMT', 'x-amz-bucket-region': 'eu-west-1', 'content-type': 'application/xml', 'transfer-encoding': 'chunked', 'server': 'AmazonS3'}, 'RetryAttempts': 0}, 'IsTruncated': False, 'Name': 'heroes-dl-bronze-prod', 'Prefix': 'amazon/settlement_report_data/_delta_log_test/', 'MaxKeys': 1000, 'EncodingType': 'url', 'KeyCount': 0}
The path 'amazon/settlement_report_data/_delta_log_test/' does not exist in the S3 bucket 'heroes-dl-bronze-prod'.
