## Build and Register Docker image to run geospatial processing on SageMaker

In [None]:
import boto3
import sagemaker

# Create a SageMaker session
sagemaker_session = sagemaker.Session()

# Get the regio
ECR_REGION = sagemaker_session.boto_region_name

# Get the account number
sts_client = boto3.client('sts')
ECR_ACCOUNT_ID = sts_client.get_caller_identity()["Account"]

# Set other parameters
DISTRO = "cpu"
REPO_NAME="geo-img-cpu"
IMG_NAME=f"{REPO_NAME}:latest"

print(f"Region: {ECR_REGION}")
print(f"Account Number: {ECR_ACCOUNT_ID}")
print(f"ECR Repository Name: {REPO_NAME}")
print(f"Image Name: {IMG_NAME}")

### Log into ECR

In [None]:
!aws ecr get-login-password --region {ECR_REGION} | docker login --username AWS --password-stdin {ECR_ACCOUNT_ID}.dkr.ecr.{ECR_REGION}.amazonaws.com

### Build the image

Note: this will take up to 15 minutes

In [None]:
!docker build --quiet -f Dockerfilev2 --build-arg DISTRIBUTION_TYPE={DISTRO} -t {IMG_NAME} .

### Tag the image

In [None]:
!docker tag {IMG_NAME} {ECR_ACCOUNT_ID}.dkr.ecr.{ECR_REGION}.amazonaws.com/{IMG_NAME}

### Push the image to ECR
Ensure that the ECR repository exists. Create it if it does not.

In [None]:
def ensure_ecr_repository(repository_name, region=ECR_REGION):
    """
    Check if ECR repository exists and create it if it doesn't.
    
    Args:
        repository_name (str): Name of the ECR repository
        region (str, optional): AWS region. If None, uses default region
    
    Returns:
        dict: Repository details
    """
    try:
        # Initialize ECR client
        ecr_client = boto3.client('ecr', region_name=region)
        
        try:
            # Try to describe the repository to check if it exists
            response = ecr_client.describe_repositories(
                repositoryNames=[repository_name]
            )
            print(f"Repository '{repository_name}' already exists")
            return response['repositories'][0]
            
        except ecr_client.exceptions.RepositoryNotFoundException:
            # Repository doesn't exist, create it
            print(f"Creating repository '{repository_name}'...")
            response = ecr_client.create_repository(
                repositoryName=repository_name,
                imageScanningConfiguration={'scanOnPush': True},
                encryptionConfiguration={'encryptionType': 'AES256'}
            )
            print(f"Repository '{repository_name}' created successfully")
            return response['repository']
            
    except Exception as e:
        print(f"Error managing ECR repository: {str(e)}")
        raise

In [None]:
try:
    repository = ensure_ecr_repository(REPO_NAME)
    print(f"Repository URI: {repository['repositoryUri']}")
except Exception as e:
    print(f"Failed to ensure repository exists: {str(e)}")

In [None]:
!docker push {ECR_ACCOUNT_ID}.dkr.ecr.{ECR_REGION}.amazonaws.com/{IMG_NAME}

### Verify Upload to ECR

In [None]:
import boto3

def list_ecr_images(repository_name, registry_id=None):
    # Initialize the ECR client
    ecr_client = boto3.client('ecr')
    
    # Prepare parameters for the request
    params = {'repositoryName': repository_name}
    if registry_id:
        params['registryId'] = registry_id
    
    # Fetch images with pagination handling
    images = []
    paginator = ecr_client.get_paginator('list_images')
    for page in paginator.paginate(**params):
        images.extend(page['imageIds'])
    
    return images

# Example usage
repository_name = 'geo-img-cpu'
images = list_ecr_images(repository_name)
print("Images in repository:", images)
