In [None]:
import os
import boto3
from datetime import datetime

In [None]:
# aws sso login --profile beta
os.environ.setdefault("AWS_PROFILE", "sandbox")
session = boto3.Session(profile_name="sandbox")

In [None]:
# Initialize the S3 client
s3_client = boto3.client("s3")

In [None]:
def list_object_versions(bucket_name, prefix=None):
    versions = []
    paginator = s3_client.get_paginator("list_object_versions")

    if prefix:
        pages = paginator.paginate(Bucket=bucket_name, Prefix=prefix)
    else:
        pages = paginator.paginate(Bucket=bucket_name)

    for page in pages:
        if "Versions" in page:
            for version in page["Versions"]:
                versions.append(
                    {
                        "Key": version["Key"],
                        "VersionId": version["VersionId"],
                        "IsLatest": version["IsLatest"],
                        "LastModified": version["LastModified"],
                        "Size": version["Size"],
                    }
                )
        if "DeleteMarkers" in page:
            for delete_marker in page["DeleteMarkers"]:
                versions.append(
                    {
                        "Key": delete_marker["Key"],
                        "VersionId": delete_marker["VersionId"],
                        "IsLatest": delete_marker["IsLatest"],
                        "LastModified": delete_marker["LastModified"],
                        "IsDeleteMarker": True,
                    }
                )

    return versions

In [None]:
def delete_marker_version(bucket_name, key, version_id):
    try:
        # Delete the specified delete marker version
        response = s3_client.delete_object(
            Bucket=bucket_name, Key=key, VersionId=version_id
        )
        print(f"Deleted delete marker for {key} with VersionId {version_id}")
        return response
    except Exception as e:
        print(f"Error deleting delete marker: {e}")

In [None]:
bucket_name = "bb2-sandbox-datalake-raw"
prefix = "dynamo_sls_home_financing_mortgage/"

object_versions = list_object_versions(bucket_name, prefix)

# Show the versions
for version in object_versions:
    print(version)

In [None]:
# Identify delete markers and delete them
# for version in object_versions:
#     if version.get("IsDeleteMarker", False):  # Check if it’s a delete marker
#         delete_marker_version(bucket_name, version["Key"], version["VersionId"])

In [None]:
def delete_markers_and_versions(bucket_name, cutoff_timestamp):
    """
    Deletes delete markers and versions in an S3 bucket older than the specified cutoff timestamp.

    Args:
        bucket_name (str): Name of the S3 bucket.
        cutoff_timestamp (int): The cutoff timestamp in seconds since the epoch.
                                Objects before this timestamp will be processed.
    """
    s3 = boto3.client("s3")
    paginator = s3.get_paginator("list_object_versions")

    try:
        cutoff_date = datetime.fromtimestamp(cutoff_timestamp)
    except ValueError:
        print("Invalid timestamp.")
        return

    try:
        # List all versions and delete markers in the bucket
        for page in paginator.paginate(Bucket=bucket_name):
            if "Versions" in page:
                for version in page["Versions"]:
                    last_modified = version["LastModified"]
                    if last_modified < cutoff_date:
                        print(
                            f"Deleting version: Key={version['Key']}, VersionId={version['VersionId']}"
                        )
                        s3.delete_object(
                            Bucket=bucket_name,
                            Key=version["Key"],
                            VersionId=version["VersionId"],
                        )

            if "DeleteMarkers" in page:
                for marker in page["DeleteMarkers"]:
                    last_modified = marker["LastModified"]
                    if last_modified < cutoff_date:
                        print(
                            f"Deleting delete marker: Key={marker['Key']}, VersionId={marker['VersionId']}"
                        )
                        s3.delete_object(
                            Bucket=bucket_name,
                            Key=marker["Key"],
                            VersionId=marker["VersionId"],
                        )

        print("Cleanup complete.")
    except Exception as e:
        print(f"An error occurred: {e}")

In [None]:
cutoff_timestamp = 1704067200  # Replace with your desired timestamp (e.g., 1704067200 for 2024-01-01 00:00:00 UTC)
delete_markers_and_versions(bucket_name, cutoff_timestamp)