## Build and Register Docker image to run geospatial processing on SageMaker

In [1]:
import boto3
import sagemaker

# Create a SageMaker session
sagemaker_session = sagemaker.Session()

# Get the regio
ECR_REGION = sagemaker_session.boto_region_name

# Get the account number
sts_client = boto3.client('sts')
ECR_ACCOUNT_ID = sts_client.get_caller_identity()["Account"]

# Set other parameters
DISTRO = "cpu"
REPO_NAME="geo-img-cpu"
IMG_NAME=f"{REPO_NAME}:latest"

print(f"Region: {ECR_REGION}")
print(f"Account Number: {ECR_ACCOUNT_ID}")
print(f"ECR Repository Name: {REPO_NAME}")
print(f"Image Name: {IMG_NAME}")



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


Region: us-west-2
Account Number: 785721480234
ECR Repository Name: geo-img-cpu
Image Name: geo-img-cpu:latest


### Log into ECR

In [2]:
!aws ecr get-login-password --region {ECR_REGION} | docker login --username AWS --password-stdin {ECR_ACCOUNT_ID}.dkr.ecr.{ECR_REGION}.amazonaws.com

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded


### Build the image

Note: this will take up to 15 minutes

In [9]:
!docker build --quiet -f Dockerfile --build-arg DISTRIBUTION_TYPE={DISTRO} -t {IMG_NAME} .

sha256:44a88117332c0d3b28abed2e0daba308c0ee491c908dd952f753fa8b253f218c


### Tag the image

In [10]:
!docker tag {IMG_NAME} {ECR_ACCOUNT_ID}.dkr.ecr.{ECR_REGION}.amazonaws.com/{IMG_NAME}

### Push the image to ECR
Ensure that the ECR repository exists. Create it if it does not.

In [11]:
def ensure_ecr_repository(repository_name, region=ECR_REGION):
    """
    Check if ECR repository exists and create it if it doesn't.
    
    Args:
        repository_name (str): Name of the ECR repository
        region (str, optional): AWS region. If None, uses default region
    
    Returns:
        dict: Repository details
    """
    try:
        # Initialize ECR client
        ecr_client = boto3.client('ecr', region_name=region)
        
        try:
            # Try to describe the repository to check if it exists
            response = ecr_client.describe_repositories(
                repositoryNames=[repository_name]
            )
            print(f"Repository '{repository_name}' already exists")
            return response['repositories'][0]
            
        except ecr_client.exceptions.RepositoryNotFoundException:
            # Repository doesn't exist, create it
            print(f"Creating repository '{repository_name}'...")
            response = ecr_client.create_repository(
                repositoryName=repository_name,
                imageScanningConfiguration={'scanOnPush': True},
                encryptionConfiguration={'encryptionType': 'AES256'}
            )
            print(f"Repository '{repository_name}' created successfully")
            return response['repository']
            
    except Exception as e:
        print(f"Error managing ECR repository: {str(e)}")
        raise

In [12]:
try:
    repository = ensure_ecr_repository(REPO_NAME)
    print(f"Repository URI: {repository['repositoryUri']}")
except Exception as e:
    print(f"Failed to ensure repository exists: {str(e)}")

Repository 'geo-img-cpu' already exists
Repository URI: 785721480234.dkr.ecr.us-west-2.amazonaws.com/geo-img-cpu


In [13]:
!docker push {ECR_ACCOUNT_ID}.dkr.ecr.{ECR_REGION}.amazonaws.com/{IMG_NAME}

The push refers to repository [785721480234.dkr.ecr.us-west-2.amazonaws.com/geo-img-cpu]

[1B210397b0: Preparing 
[1B1e5b6c3c: Preparing 
[1B96e47299: Preparing 
[1Bc8a3dd17: Preparing 
[1Bc1acf612: Preparing 
[1Be86d5dea: Preparing 
[1Bff6ebbe4: Preparing 
[1B3d83e73f: Preparing 
[1B9e5e53a8: Preparing 
[1B3e83b06e: Preparing 
[1B84b2c0a2: Preparing 
[1Bdd34e367: Preparing 
[1Bebc6ea1d: Preparing 
[1B2cb03344: Preparing 
[1B70f2ec05: Preparing 
[1B863bd54f: Preparing 
[1B1a0619c5: Preparing 
[1Be676a5cc: Preparing 
[1Bbf18a086: Preparing 
[1Bd12b5716: Preparing 
[1Bb625e049: Preparing 
[4Bbf18a086: Preparing 
[1Bedd17b1e: Preparing 
[1B617540f6: Preparing 
[17Be5e53a8: Pushed    6.46GB/6.306GB[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[

### Verify Upload to ECR

In [14]:
import boto3

def list_ecr_images(repository_name, registry_id=None):
    # Initialize the ECR client
    ecr_client = boto3.client('ecr')
    
    # Prepare parameters for the request
    params = {'repositoryName': repository_name}
    if registry_id:
        params['registryId'] = registry_id
    
    # Fetch images with pagination handling
    images = []
    paginator = ecr_client.get_paginator('list_images')
    for page in paginator.paginate(**params):
        images.extend(page['imageIds'])
    
    return images

# Example usage
repository_name = 'geo-img-cpu'
images = list_ecr_images(repository_name)
print("Images in repository:", images)


Images in repository: [{'imageDigest': 'sha256:9af0699c6e45b132e8f28e47b3a6108807d88994194ddee9536cf8fd643f4913', 'imageTag': 'latest'}]
