# Build custom SageMaker image for geospatial processing

## Setup environment

In [1]:
import os
import json
import boto3
import sagemaker

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [10]:
sm_client = boto3.client('sagemaker')

In [11]:
NOTEBOOK_METADATA_FILE = "/opt/ml/metadata/resource-metadata.json"
domain_id = None

if os.path.exists(NOTEBOOK_METADATA_FILE):
    with open(NOTEBOOK_METADATA_FILE, "rb") as f:
        metadata = json.loads(f.read())
        domain_id = metadata.get('DomainId')
        space_name = metadata.get('SpaceName')
        print(f"SageMaker domain id: {domain_id}")

if not space_name:
    raise Exception(f"Cannot find the current space name. Make sure you run this notebook in a JupyterLab in the SageMaker Studio")
else:
    print(f"Space name: {space_name}")
    
r = sm_client.describe_space(DomainId=domain_id, SpaceName=space_name)
user_profile_name = r['OwnershipSettings']['OwnerUserProfileName']

assert(user_profile_name)
print(f"User profile: {user_profile_name}")

%store domain_id
%store space_name
%store user_profile_name

SageMaker domain id: d-jpbpjo9kmrwy
Space name: ts-space
User profile: studio-user-geo-5e62ef30
Stored 'domain_id' (str)
Stored 'space_name' (str)
Stored 'user_profile_name' (str)


In [12]:
role = sagemaker.get_execution_role()

In [4]:
repo_name = "smd-custom-geo"
image_name = "smd-custom-geo"
account_id = boto3.client("sts").get_caller_identity()["Account"]
region = md["ResourceArn"].split(":")[3]
full_name = f"{account_id}.dkr.ecr.{region}.amazonaws.com/{repo_name}:{image_name}"

print(full_name)

070672918432.dkr.ecr.us-west-2.amazonaws.com/smd-custom-geo:smd-custom-geo


### Install Docker

In [13]:
# check that docker enabled in the SageMaker domain
docker_settings = sm_client.describe_domain(DomainId=domain_id)['DomainSettings'].get('DockerSettings')
docker_enabled = False

if docker_settings:
    if docker_settings.get('EnableDockerAccess') in ['ENABLED']:
        print(f"The docker access is ENABLED in the domain {domain_id}")
        docker_enabled = True

if not docker_enabled:
    raise Exception(f"You must enable docker access in the domain to use Studio local mode")

The docker access is ENABLED in the domain d-jpbpjo9kmrwy


In [14]:
%%bash

# see https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository
sudo apt-get update
sudo apt-get install -y ca-certificates curl
sudo install -m 0755 -d /etc/apt/keyrings
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
sudo chmod a+r /etc/apt/keyrings/docker.asc

# Add the repository to Apt sources:
echo \
  "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
  $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
  sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update

## Currently only Docker version 20.10.X is supported in Studio: see https://docs.aws.amazon.com/sagemaker/latest/dg/studio-updated-local.html
# pick the latest patch from:
# apt-cache madison docker-ce | awk '{ print $3 }' | grep -i 20.10
VERSION_STRING=5:20.10.24~3-0~ubuntu-jammy
sudo apt-get install docker-ce-cli=$VERSION_STRING docker-compose-plugin -y

# validate the Docker Client is able to access Docker Server at [unix:///docker/proxy.sock]
docker version

Hit:1 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:2 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:4 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:5 http://security.ubuntu.com/ubuntu jammy-security/multiverse amd64 Packages [45.2 kB]
Get:6 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1229 kB]
Get:7 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 Packages [1521 kB]
Get:8 http://archive.ubuntu.com/ubuntu jammy-updates/restricted amd64 Packages [3742 kB]
Get:9 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [2604 kB]
Get:10 http://archive.ubuntu.com/ubuntu jammy-updates/multiverse amd64 Packages [53.3 kB]
Get:11 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 Packages [2904 kB]
Get:12 http://security.ubuntu.com/ubuntu jammy-security/restricted amd64 Packages [3606 kB]
Get:13 http://arch

debconf: delaying package configuration, since apt-utils is not installed


Fetched 1830 kB in 1s (1333 kB/s)
Selecting previously unselected package openssl.
(Reading database ... 13790 files and directories currently installed.)
Preparing to unpack .../openssl_3.0.2-0ubuntu1.18_amd64.deb ...
Unpacking openssl (3.0.2-0ubuntu1.18) ...
Selecting previously unselected package ca-certificates.
Preparing to unpack .../ca-certificates_20240203~22.04.1_all.deb ...
Unpacking ca-certificates (20240203~22.04.1) ...
Preparing to unpack .../curl_7.81.0-1ubuntu1.20_amd64.deb ...
Unpacking curl (7.81.0-1ubuntu1.20) over (7.81.0-1ubuntu1.18) ...
Preparing to unpack .../libcurl4_7.81.0-1ubuntu1.20_amd64.deb ...
Unpacking libcurl4:amd64 (7.81.0-1ubuntu1.20) over (7.81.0-1ubuntu1.18) ...
Setting up libcurl4:amd64 (7.81.0-1ubuntu1.20) ...
Setting up curl (7.81.0-1ubuntu1.20) ...
Setting up openssl (3.0.2-0ubuntu1.18) ...
Setting up ca-certificates (20240203~22.04.1) ...
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the d

debconf: delaying package configuration, since apt-utils is not installed


Fetched 56.0 MB in 1s (64.5 MB/s)
Selecting previously unselected package docker-ce-cli.
(Reading database ... 14266 files and directories currently installed.)
Preparing to unpack .../docker-ce-cli_5%3a20.10.24~3-0~ubuntu-jammy_amd64.deb ...
Unpacking docker-ce-cli (5:20.10.24~3-0~ubuntu-jammy) ...
Selecting previously unselected package docker-compose-plugin.
Preparing to unpack .../docker-compose-plugin_2.32.4-1~ubuntu.22.04~jammy_amd64.deb ...
Unpacking docker-compose-plugin (2.32.4-1~ubuntu.22.04~jammy) ...
Setting up docker-compose-plugin (2.32.4-1~ubuntu.22.04~jammy) ...
Setting up docker-ce-cli (5:20.10.24~3-0~ubuntu-jammy) ...
Client: Docker Engine - Community
 Version:           20.10.24
 API version:       1.41
 Go version:        go1.19.7
 Git commit:        297e128
 Built:             Tue Apr  4 18:21:03 2023
 OS/Arch:           linux/amd64
 Context:           default
 Experimental:      true

Server:
 Engine:
  Version:          25.0.6
  API version:      1.44 (minimum ve

## Build the Docker image

In [16]:
%%bash

docker system prune -af

Error response from daemon: The request for current resource is not allowed on SageMaker Studio.


CalledProcessError: Command 'b'\ndocker system prune -af\n'' returned non-zero exit status 1.

In [17]:
%%bash

# Region, defaults to us-east-1
REGION=$(aws configure get region)
REGION=${REGION:-us-east-1}

ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
REPO_NAME=smd-custom-geo
IMAGE_NAME=smd-custom-geo

echo "REPO_NAME: $REPO_NAME"
echo "ACCOUNT_ID: $ACCOUNT_ID"
echo "REGION: $REGION"

SM_DIST_TYPE=cpu
TAG=${REPO_NAME}:latest-${SM_DIST_TYPE}
FULL_NAME="${ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com/${TAG}"

echo "IMAGE TAG: $TAG"
echo "ECR TARGET: ${ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com/${TAG}"


# If the repository doesn't exist in ECR, create it.
aws ecr describe-repositories --repository-names "${REPO_NAME}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${REPO_NAME}" > /dev/null
fi

# Login to ECR
aws --region ${REGION} ecr get-login-password | docker login --username AWS --password-stdin ${FULL_NAME}

# Build and push the image
set -x
docker build -f Dockerfile --build-arg DISTRIBUTION_TYPE=$SM_DIST_TYPE -t $TAG .
set +x

docker push ${FULL_NAME}

echo ""
echo "Created image pushed to ECR image URI: ${ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com/${TAG}"
echo ""
echo "Done"

REPO_NAME: smd-custom-geo
ACCOUNT_ID: 070672918432
REGION: us-east-1
IMAGE TAG: smd-custom-geo:latest-cpu
ECR TARGET: 070672918432.dkr.ecr.us-east-1.amazonaws.com/smd-custom-geo:latest-cpu


https://docs.docker.com/engine/reference/commandline/login/#credentials-store



Login Succeeded


+ docker build -f Dockerfile --build-arg DISTRIBUTION_TYPE=cpu -t smd-custom-geo:latest-cpu .


Sending build context to Docker daemon  30.21kB


Error response from daemon: {"message":"Forbidden. Reason: [ImageBuild] 'sagemaker' is the only user allowed network input"}
+ set +x


The push refers to repository [070672918432.dkr.ecr.us-east-1.amazonaws.com/smd-custom-geo]

Created image pushed to ECR image URI: 070672918432.dkr.ecr.us-east-1.amazonaws.com/smd-custom-geo:latest-cpu

Done


In [None]:
!aws ecr list-images --repository-name {repo_name}