<a href="https://colab.research.google.com/github/ankit-rathi/Data-Engineering-with-AWS/blob/main/Try_S3_Bucket_Setup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install boto3 library
!pip install boto3

Collecting boto3
  Downloading boto3-1.35.40-py3-none-any.whl.metadata (6.7 kB)
Collecting botocore<1.36.0,>=1.35.40 (from boto3)
  Downloading botocore-1.35.40-py3-none-any.whl.metadata (5.7 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from boto3)
  Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)
Collecting s3transfer<0.11.0,>=0.10.0 (from boto3)
  Downloading s3transfer-0.10.3-py3-none-any.whl.metadata (1.7 kB)
Downloading boto3-1.35.40-py3-none-any.whl (139 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.1/139.1 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading botocore-1.35.40-py3-none-any.whl (12.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.6/12.6 MB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading jmespath-1.0.1-py3-none-any.whl (20 kB)
Downloading s3transfer-0.10.3-py3-none-any.whl (82 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.6/82.6 kB[0m [31m936.2 kB/s[0m eta [36m0

In [2]:
# Mount google drive
from google.colab import drive
drive.mount('/content/drive')
import os

import pandas as pd

project_path = '/content/drive/My Drive/Personal'
os.chdir(project_path)

Mounted at /content/drive


In [9]:
# Import required libraries
import boto3
import pandas as pd
import json
import os

# Load AWS credentials from CSV
aws_keys_df = pd.read_csv('aws-rootkey.csv')

AWS_ACCESS_KEY_ID = aws_keys_df['Access_key_ID'][0]
AWS_SECRET_ACCESS_KEY = aws_keys_df['Secret_access_key'][0]
REGION_NAME = aws_keys_df['Region'][0]

# Initialize boto3 client
s3_client = boto3.client(
    's3',
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    region_name=REGION_NAME
)

s3_resource = boto3.resource(
    's3',
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    region_name=REGION_NAME
)

# Define bucket name
bucket_name = 'my-bucket-ar'

# Step 1: Create S3 Bucket
def create_s3_bucket(bucket_name):
    try:
        response = s3_client.create_bucket(
            Bucket=bucket_name,
            CreateBucketConfiguration={'LocationConstraint': REGION_NAME}
        )
        print(f"Bucket '{bucket_name}' created successfully.")
    except Exception as e:
        print(f"Error creating bucket: {str(e)}")

# Step 2: Enable versioning for the bucket
def enable_versioning(bucket_name):
    try:
        versioning = s3_client.put_bucket_versioning(
            Bucket=bucket_name,
            VersioningConfiguration={'Status': 'Enabled'}
        )
        print(f"Versioning enabled on bucket '{bucket_name}'.")
    except Exception as e:
        print(f"Error enabling versioning: {str(e)}")

# Step 3: Upload files (CSV and JSON) to the bucket
def upload_files_to_s3(bucket_name, files):
    try:
        for file in files:
            file_name = os.path.basename(file)
            s3_client.upload_file(file, bucket_name, file_name)
            print(f"File '{file_name}' uploaded successfully.")
    except Exception as e:
        print(f"Error uploading files: {str(e)}")

# Step 4a: Disable block public access for the bucket
def disable_block_public_access(bucket_name):
    try:
        s3_client.put_public_access_block(
            Bucket=bucket_name,
            PublicAccessBlockConfiguration={
                'BlockPublicAcls': False,
                'IgnorePublicAcls': False,
                'BlockPublicPolicy': False,
                'RestrictPublicBuckets': False
            }
        )
        print(f"Public access block disabled for bucket '{bucket_name}'.")
    except Exception as e:
        print(f"Error disabling public access block: {str(e)}")

# Step 4b: Set bucket policy for public read access (Example)
def set_bucket_policy(bucket_name):
    bucket_policy = {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Principal": "*",
                "Action": "s3:GetObject",
                "Resource": f"arn:aws:s3:::{bucket_name}/*"
            }
        ]
    }

    # Convert the policy to JSON format
    bucket_policy_json = json.dumps(bucket_policy)

    try:
        s3_client.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy_json)
        print(f"Bucket policy applied to '{bucket_name}' for public read access.")
    except Exception as e:
        print(f"Error setting bucket policy: {str(e)}")

# Step 5: Delete all versions of objects and then delete the S3 bucket (Cleanup)
def cleanup_s3_bucket(bucket_name):
    try:
        bucket = s3_resource.Bucket(bucket_name)

        # Delete all versions of objects
        bucket.object_versions.delete()

        # Delete the bucket itself
        s3_client.delete_bucket(Bucket=bucket_name)
        print(f"Bucket '{bucket_name}' and its contents deleted successfully.")
    except Exception as e:
        print(f"Error cleaning up bucket: {str(e)}")


# Main execution
if __name__ == "__main__":
    # Define some local files to upload
    csv_file = '/content/drive/My Drive/Personal/aws-rootkey.csv'  # Change path accordingly
    #json_file = '/content/drive/My Drive/Personal/sample.json'  # Change path accordingly
    files_to_upload = [csv_file] #, json_file]

    # Step 1: Create the S3 bucket
    create_s3_bucket(bucket_name)

    # Step 2: Enable versioning for the bucket
    enable_versioning(bucket_name)

    # Step 3: Upload files to the S3 bucket
    upload_files_to_s3(bucket_name, files_to_upload)

    # Step 4a: Disable block public access for the bucket
    disable_block_public_access(bucket_name)

    # Step 4b: Set public read access bucket policy
    set_bucket_policy(bucket_name)

    # Uncomment the following line if you want to clean up the resources after testing:
    cleanup_s3_bucket(bucket_name)


Bucket 'my-bucket-ar' created successfully.
Versioning enabled on bucket 'my-bucket-ar'.
File 'aws-rootkey.csv' uploaded successfully.
Public access block disabled for bucket 'my-bucket-ar'.
Bucket policy applied to 'my-bucket-ar' for public read access.
Bucket 'my-bucket-ar' and its contents deleted successfully.
