# Data Lake Example 3 - Lifecycle Management

## 0. Load libraries and common configuration

In [1]:
# Install necessary packages
!pip install boto3 certifi



In [2]:
import boto3
from botocore.client import Config
import datetime
from botocore.exceptions import ClientError
import os
import ssl
import certifi
import sys
import warnings
warnings.filterwarnings('ignore')

#Some issues might appear (SSL verification error) with yhe client if python is not properly configured. 
# You might find this line useful to skip the error 
ssl._create_default_https_context = ssl._create_unverified_context


# MinIO server connection information
minio_url = 'https://s3api.scene.local'  # Replace with your MinIO instance URL
access_key = 'testuser'       # Replace with your actual access key
secret_key = 'testscene'       # Replace with your actual secret key


# Initialize a session using boto3
session = boto3.session.Session()

# Create a client with the MinIO server
# Add "verify=False" to the list if you have troubles with SSL verification
s3_client = session.client(
    's3',
    verify=False,
    endpoint_url=minio_url,    
    aws_access_key_id=access_key,
    aws_secret_access_key=secret_key,
    config=Config(signature_version='s3v4'),
    region_name='us-east-1'  # You can choose any region name. Not applicable here
)
print("Libraries loaded successfully")

Libraries loaded successfully


## 1.  Python Script to Move Objects from testbucket to coldbucket
This script checks testbucket for objects older than X days (e.g. X=30), moves them to coldbucket, and deletes them from testbucket.

In [3]:
# Parameters
source_bucket = "testbucket"
target_bucket = "coldbucket"
archive_days = 30  # Days after which objects are moved to cold storage

def move_to_coldbucket():
    # Initialize an action log and counter
    action_log = []
    moved_count = 0
    current_date = datetime.datetime.now(datetime.timezone.utc)
    
    try:
        print(f"Listing objects in bucket: {source_bucket}")
        response = s3_client.list_objects_v2(Bucket=source_bucket)
        
        # Check if the bucket has contents
        if 'Contents' in response:
            print(f"Found {len(response['Contents'])} objects in {source_bucket}")
            for obj in response['Contents']:
                print(f"Processing object: {obj['Key']} - Last Modified: {obj['LastModified']}")
                
                # Calculate object age
                object_age = current_date - obj['LastModified']
                print(f"Object age: {object_age.days} days")
                
                # Check if the object is older than the archive threshold
                if object_age.days > archive_days:
                    # Copy object to target (cold) bucket
                    copy_source = {'Bucket': source_bucket, 'Key': obj['Key']}
                    s3_client.copy_object(
                        CopySource=copy_source,
                        Bucket=target_bucket,
                        Key=obj['Key']
                    )
                    # Delete the original object from the source bucket
                    s3_client.delete_object(Bucket=source_bucket, Key=obj['Key'])
                    
                    # Increment counter and log action
                    moved_count += 1
                    log_entry = f"Moved {obj['Key']} from {source_bucket} to {target_bucket}"
                    action_log.append(log_entry)
                    #print(log_entry)
        else:
            print("No objects found in the source bucket.")
            action_log.append("No objects found in the source bucket.")
    except ClientError as e:
        print("Error occurred:", e)
        action_log.append(f"Error occurred: {e}")
    
    # Print the final count of moved objects
    #print(f"Total objects moved to {target_bucket}: {moved_count}")
    action_log.append(f"Total objects moved to {target_bucket}: {moved_count}")
    
    return action_log  # Return the log of actions

# Run the function and print the results
log = move_to_coldbucket()
for entry in log:
    print(entry)



Listing objects in bucket: testbucket
Found 2 objects in testbucket
Processing object: athens.png - Last Modified: 2024-11-06 08:28:15.989000+00:00
Object age: 153 days
Processing object: athens.webm - Last Modified: 2025-04-08 17:55:50.220000+00:00
Object age: 0 days
Moved athens.png from testbucket to coldbucket
Total objects moved to coldbucket: 1


## 2. Apply a Deletion Policy on coldbucket
Set a lifecycle policy on coldbucket to automatically delete objects older than X days (e.g. 90 days).

In [4]:
def apply_deletion_policy(bucket_name, expiration_days=1):
    # Define the deletion policy configuration
    deletion_policy = {
        'Rules': [
            {
                'ID': 'DeleteOldObjects',
                'Filter': {'Prefix': ''},  # Apply to all objects in the bucket
                'Status': 'Enabled',
                'Expiration': {
                    'Days': expiration_days
                }
            }
        ]
    }
    
    print(f"Applying deletion policy to bucket: {bucket_name}")
    print(f"Policy Details: Objects will be deleted after {expiration_days} days")
    
    try:
        # Apply the deletion policy to the specified bucket
        s3_client.put_bucket_lifecycle_configuration(
            Bucket=bucket_name,
            LifecycleConfiguration=deletion_policy
        )
        print(f"Deletion policy successfully applied to {bucket_name}.")
    except ClientError as e:
        # Display detailed error information if something goes wrong
        print(f"Error occurred while applying deletion policy to {bucket_name}: {e}")


# Apply the deletion policy to coldbucket
apply_deletion_policy("coldbucket")


Applying deletion policy to bucket: coldbucket
Policy Details: Objects will be deleted after 1 days
Deletion policy successfully applied to coldbucket.
