# Install essential libraries

In [5]:
import importlib
import subprocess

def import_or_install(package_name):
    try:
        print(f"Importing package {package_name}")
        importlib.import_module(package_name)
    except ImportError:
        print(f"Package not installed. Installing package: {package_name}")
        subprocess.run(["pip", "install", package_name], check=True)
        importlib.invalidate_caches()


# Dynamically load/import required modules to avoid having to build bespoke runtime images for pipelines.
import_or_install("boto3")
import_or_install("os")
import_or_install("json")

# Import the modules just dynamically loaded or installed.
import os
import boto3
import json

print("Libraries loaded")

Importing package boto3
Importing package os
Importing package json
Libraries loaded


In [9]:
def get_secrets(file_name):
    # AWS_SECRET_ACCESS_KEY
    # AWS_ACCESS_KEY_ID
    secret_env_name = 'AWS_SECRET_ACCESS_KEY'
    access_key_env_name = 'AWS_ACCESS_KEY_ID'

    # Retrieve the value of the environment variable
    secret_key = os.getenv(secret_env_name)
    access_key = os.getenv(access_key_env_name)

    # Check if the environment variable exists
    if secret_key is not None:
        print(f"Retrieved environment variable: {secret_env_name} successfully")
    else:
        print(f"The environment variable {secret_env_name} does not exist.")

    # Check if the environment variable exists
    if access_key is not None:
        print(f"Retrieved environment variable {access_key_env_name} successfully.")
    else:
        print(f"The environment variable {access_key_env_name} does not exist.")

    return access_key, secret_key


def connect_to_aws_s3(access_key, secret_key):
        # Create an S3 client with the provided access keys
    s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key)

    return s3


def upload_to_s3(bucket_name, file_name, data, s3):
    """
    Uploads data to an S3 bucket using AWS access keys.
    """
    # Upload data to the specified bucket with the specified file name
    try:
        response = s3.put_object(
            Body=data,
            Bucket=bucket_name,
            Key=file_name
        )
        print(f"Data uploaded successfully to s3://{bucket_name}/{file_name}")
    except Exception as e:
        print(f"Error uploading data to S3: {e}")


def generate_addresses():
    # Generate the addresses programatically as the pipeline does not have access to any files.
    addresses = [
        {"name": "John Doe", "address": "123 Main St", "city": "New York"},
        {"name": "Jane Smith", "address": "456 Oak Ave", "city": "Los Angeles"},
        # Add more addresses as needed
    ]
    return addresses


def main():
    # Read the S3 storage secrets from the environment.
    access_key, secret_key = get_secrets("secrets.yaml")

    s3 = connect_to_aws_s3(access_key, secret_key)

    # Read the file and extract the data
    #file_path = 'addresses.txt'
    #with open(file_path, 'r') as file:
    #    data = file.read()

    # Generate addresses programmatically
    addresses_data = generate_addresses()

    # Convert the addresses to JSON format (you can use 'yaml.dump()' if you prefer YAML)
    addresses_json = json.dumps(addresses_data)

    # Specify the S3 bucket name, desired file name, and AWS access keys
    bucket_name = 'brbaker-s3-demo-bucket'
    file_name = 'addresses-new2.txt'

    # Upload the data to S3 using AWS access keys
    upload_to_s3(bucket_name, file_name, addresses_json, s3)


In [10]:
#if __name__ == "__main__":
#    print("Running main()")
#    main()

In [11]:
print("Running main()")
main()
print("***** COMPLETED *****")

Running main()
Retrieved environment variable: AWS_SECRET_ACCESS_KEY successfully
Retrieved environment variable AWS_ACCESS_KEY_ID successfully.
Data uploaded successfully to s3://brbaker-s3-demo-bucket/addresses-new2.txt
***** COMPLETED *****
