In [1]:
!cat container/Dockerfile

# Build an image that can do training and inference in SageMaker
# This is a Python 3 image that uses the nginx, gunicorn, flask stack
# for serving inferences in a stable way.

FROM ubuntu:18.04

MAINTAINER Amazon AI <sage-learner@amazon.com>


RUN apt-get -y update && apt-get install -y --no-install-recommends \
         wget \
         python3-pip \
         python3-setuptools \
         nginx \
         ca-certificates \
    && rm -rf /var/lib/apt/lists/*

RUN ln -s /usr/bin/python3 /usr/bin/python
RUN ln -s /usr/bin/pip3 /usr/bin/pip

# Here we get all python packages.
# There's substantial overlap between scipy and numpy that we eliminate by
# linking them together. Likewise, pip leaves the install caches populated which uses
# a significant amount of space. These optimizations save a fair amount of space in the
# image, which reduces start up time.
RUN pip --no-cache-dir install numpy==1.16.2 scipy==1.2.1 scikit-learn==0.20.2 pandas flask gunicorn

# Set some environment variabl

In [2]:
%%sh

# The name of our algorithm
algorithm_name=sagemaker-decision-trees

cd container

chmod +x decision_trees/train
chmod +x decision_trees/serve

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.
aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname}

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



Login Succeeded


#0 building with "default" instance using docker driver

#1 [internal] load build definition from Dockerfile
#1 transferring dockerfile:
#1 transferring dockerfile: 1.56kB 0.0s done
#1 DONE 0.1s

#2 [internal] load metadata for docker.io/library/ubuntu:18.04
#2 DONE 0.5s

#3 [internal] load .dockerignore
#3 transferring context: 2B done
#3 DONE 0.0s

#4 [internal] load build context
#4 transferring context: 10.51kB done
#4 DONE 0.0s

#5 [1/7] FROM docker.io/library/ubuntu:18.04@sha256:152dc042452c496007f07ca9127571cb9c29697f42acbfad72324b2bb2e43c98
#5 resolve docker.io/library/ubuntu:18.04@sha256:152dc042452c496007f07ca9127571cb9c29697f42acbfad72324b2bb2e43c98 0.0s done
#5 sha256:152dc042452c496007f07ca9127571cb9c29697f42acbfad72324b2bb2e43c98 1.33kB / 1.33kB done
#5 sha256:dca176c9663a7ba4c1f0e710986f5a25e672842963d95b960191e2d9f7185ebe 424B / 424B done
#5 sha256:f9a80a55f492e823bf5d51f1bd5f87ea3eed1cb31788686aa99a2fb61a27af6a 2.30kB / 2.30kB done
#5 sha256:7c457f213c7634afb95a0fb2410

The push refers to repository [034362035459.dkr.ecr.us-east-2.amazonaws.com/sagemaker-decision-trees]
5f70bf18a086: Preparing
5ac80cdeb6a7: Preparing
92f5f20e3af3: Preparing
bdcbc27af591: Preparing
f90608acc66a: Preparing
b0f540da9d8e: Preparing
548a79621a42: Preparing
b0f540da9d8e: Waiting
548a79621a42: Waiting
5ac80cdeb6a7: Pushed
f90608acc66a: Pushed
bdcbc27af591: Pushed
5f70bf18a086: Pushed
548a79621a42: Pushed
b0f540da9d8e: Pushed
92f5f20e3af3: Pushed
latest: digest: sha256:1637835fbc193e71e19338edb9556295d86331536d02261785b0cfdfdf2b461e size: 1781


In [3]:
# S3 prefix
prefix = "winequality-red"

# Define IAM role
import boto3
import re

import os
import numpy as np
import pandas as pd
from sagemaker import get_execution_role

role = get_execution_role()

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [4]:
import sagemaker as sage
from time import gmtime, strftime

sess = sage.Session()

In [5]:
WORK_DIRECTORY = "data"

data_location = sess.upload_data(WORK_DIRECTORY, key_prefix=prefix)

In [6]:
from sagemaker.sklearn import SKLearn

sk_estimator = SKLearn(
    entry_point="container/decision_trees/predictor.py",
    role=role,
    instance_count=1,
    instance_type="ml.c5.xlarge",
    py_version="py3",
    framework_version="1.0-1",
    hyperparameters={"estimators": 20},
)

In [7]:
sk_estimator.fit(data_location)

INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2024-08-31-22-23-31-647


2024-08-31 22:23:36 Starting - Starting the training job...
2024-08-31 22:23:50 Starting - Preparing the instances for training...
2024-08-31 22:24:24 Downloading - Downloading the training image.....[34m2024-08-31 22:25:18,722 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2024-08-31 22:25:18,726 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-08-31 22:25:18,728 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2024-08-31 22:25:18,744 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2024-08-31 22:25:18,989 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-08-31 22:25:18,992 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2024-08-31 22:25:19,009 sagemaker-training-toolkit INFO     No GPUs detected (normal if no

In [8]:
predictor = sk_estimator.deploy(1, "ml.m4.xlarge")

INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2024-08-31-22-27-39-230


ClientError: An error occurred (ValidationException) when calling the CreateModel operation: Could not find model data at s3://sagemaker-us-east-2-034362035459/sagemaker-scikit-learn-2024-08-31-22-23-31-647/output/model.tar.gz.