In [1]:
import pandas as pd
import awswrangler as wr
import sagemaker
import boto3
from sagemaker.amazon.amazon_estimator import image_uris 
from sagemaker.session import s3_input, Session
import urllib
import os
import numpy as np
from sagemaker.predictor import csv_serializer
from time import gmtime, strftime

In [2]:
!cp ../credentials.py credentials.py 
import credentials as cr
import importlib
importlib.reload(cr)

<module 'credentials' from '/Users/gaby/Documents/GitHub/sm/aws-docker/credentials.py'>

In [3]:
bucket_name = 'aws-sm-bucket' 
my_region = boto3.session.Session().region_name 
print(my_region)

us-east-2


In [4]:
prefix = 'catboost-algo'
output_path ='s3://{}/{}/output'.format(bucket_name, prefix)
print(output_path)

s3://aws-sm-bucket/catboost-algo/output


## Read local data to S3

In [5]:
name_data = 'test_data'
#boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, f'{name}/{name}.csv')).upload_file(f'{name}.csv')


## Create model file

In [6]:
%%writefile model.py
import numpy as np
import csv
import os
import pandas as pd
import pickle
default_model_path = '/opt/ml'

model_cache = {}

def load_model(algorithm, model_path):
    if model_cache.get(algorithm) is None:
        model_filename = os.path.join(model_path, 'model.pkl')
        with open(model_filename, newline='') as file:
            model_cache[algorithm] = pickle.load(open(model_filename, 'rb'))
    
    return model_cache[algorithm]


def __read_csv_list(filename):
    with open(filename, newline='') as file:
        reader = csv.reader(file)
        return list(reader)[0]
        
    return []

predictors_cache = {}

def load_predictors(algorithm, model_path):
    if predictors_cache.get(algorithm) is None:
        predictors_filename = os.path.join(model_path, 'predictors.csv')
        predictors_cache[algorithm] = __read_csv_list(predictors_filename)
            
    return predictors_cache[algorithm]

to_cat_cache = {}

def load_to_cat(algorithm, model_path):
    if to_cat_cache.get(algorithm) is None:
        to_cat_filename = os.path.join(model_path, 'to_cat.csv')
        to_cat_cache[algorithm] = __read_csv_list(to_cat_filename)
            
    return to_cat_cache[algorithm]


def predict(data, model_path = default_model_path):
    print("Predict: start")
        
    algorithm = "algorithm_catboost"
    
    model = load_model(algorithm, model_path)
    print("Predict: model loaded!")
    predictors = load_predictors(algorithm, model_path)
    print(f"Predict: predictors = {predictors}")
    to_cat = load_to_cat(algorithm, model_path)
    print(f"Predict: to_cat = {to_cat}")
    
    if data.shape[0] == 0:
        return pd.DataFrame()

    for x in to_cat:
        data[x] = data[x].astype(str)

    y_pred_probs = model.predict_proba( data[predictors].values )
    probabilities = [item[1] for item in y_pred_probs]
    data['pd'] = probabilities
    print("Predict: return data")
    
    
    return data

Overwriting model.py


In [7]:
%%writefile app.py
import pandas as pd
import pickle
import csv
import sys
import os
import io
import bjoern
import bottle
from bottle import run, request, post, get

# adds the model.py path to the list
model_path = os.path.dirname(os.getcwd())
if 'MODEL_PATH' in os.environ:
    model_path = os.environ['MODEL_PATH']

sys.path.insert(0,model_path)

import model

@get('/ping')
def ping():
    return "Ok"

@post('/invocations')
def invoke():
    # load image from POST and convert it to json
    try:
        print(f"request received: \n\t content size = {request.content_length}\n\t content type = {request.content_type}")
        req = request.body

        data = pd.read_csv(req, sep=',', low_memory=False, error_bad_lines=False)
        print(f"request received: dataframe size = {data.shape}")
        predictions = model.predict(data, model_path)
        print(f"request received: prediction done!")
        
        return predictions.to_csv(sep=',', index=False)
    except Exception as e:
        print(f"Error: {str(e)}" )
        print("Unexpected error:", sys.exc_info())
        return bottle.HTTPResponse(status=500)
    

if __name__ == '__main__':
    
    if len(sys.argv) == 2 and ( not sys.argv[1] in [ "serve", "train"] ):
        raise Exception("Invalid argument: you must inform 'train' for fake training mode or 'serve' predicting mode") 

    train = len(sys.argv) == 2 and (sys.argv[1] == "train")
    
    if train:
        print("copy local model")
        try:
            os.makedirs('/opt/ml/model/', exist_ok=True)
            shutil.copy2('/opt/program/model.pkl', '/opt/ml/model/model.pkl')
        except Exception as e:
            print(e)
            
                
        print( "Fake training completed" )
    else:
        print("Server started")
        if 'PORT' in os.environ: 
            port = int(os.environ['PORT'])
        else:
            port = 8080
        
        print(f"Port: {port}")
        print(f"Model path: {model_path}")
        bjoern.run(bottle.app(), "0.0.0.0", port)
        
        

Overwriting app.py


In [8]:

# copy model data that will be in docker file
!cp ../local-model/model.pkl ./
!cp ../local-model/predictors.csv ./
!cp ../local-model/to_cat.csv ./

In [9]:
%%writefile Dockerfile.aws2
FROM python:3.9.1

RUN apt-get update -y && apt-get install -y libev-dev
RUN pip install bottle
RUN pip install bjoern
RUN pip install pandas==1.2.2
RUN pip install numpy==1.20.1
RUN pip install catboost==0.24.4

RUN mkdir -p /opt/program
RUN mkdir -p /opt/ml

ENV PYTHONUNBUFFERED=TRUE
ENV PYTHONDONTWRITEBYTECODE=TRUE
ENV PATH="/opt/program:${PATH}"
ENV MODEL_PATH='/opt/ml'

COPY app.py /opt/program
COPY model.py /opt/ml
COPY model.pkl /opt/ml
COPY predictors.csv /opt/ml
COPY to_cat.csv /opt/ml

WORKDIR /opt/program

ENTRYPOINT ["python", "app.py"]

Overwriting Dockerfile.aws2


## Build Dockerfile

In [10]:
!docker build -f Dockerfile.aws2 -t containeraws2:v0.0.1 .

Sending build context to Docker daemon  2.274MB
Step 1/20 : FROM python:3.9.1
 ---> 2a93c239d591
Step 2/20 : RUN apt-get update -y && apt-get install -y libev-dev
 ---> Using cache
 ---> 6596e41441ae
Step 3/20 : RUN pip install bottle
 ---> Using cache
 ---> 9c477085055a
Step 4/20 : RUN pip install bjoern
 ---> Using cache
 ---> 8458496a66ce
Step 5/20 : RUN pip install pandas==1.2.2
 ---> Using cache
 ---> f114805d12d7
Step 6/20 : RUN pip install numpy==1.20.1
 ---> Using cache
 ---> 7e192144e093
Step 7/20 : RUN pip install catboost==0.24.4
 ---> Using cache
 ---> 9d83467dc052
Step 8/20 : RUN mkdir -p /opt/program
 ---> Using cache
 ---> 6efac8035fb6
Step 9/20 : RUN mkdir -p /opt/ml
 ---> Using cache
 ---> df2caa0db5de
Step 10/20 : ENV PYTHONUNBUFFERED=TRUE
 ---> Using cache
 ---> 2a4e8d9623a2
Step 11/20 : ENV PYTHONDONTWRITEBYTECODE=TRUE
 ---> Using cache
 ---> cd203f5fb80e
Step 12/20 : ENV PATH="/opt/program:${PATH}"
 ---> Using cache
 ---> 623ce389700e
Step 13/20 : ENV MODEL_PATH='/

In [11]:
container = f'{cr.account_id}.dkr.ecr.{cr.region}.amazonaws.com/aws2-repository'

In [12]:
!docker tag containeraws2:v0.0.1 {container}

In [13]:
!docker run -p 8081:8080 -d --rm --name test_img {container}

78c5ba9eb3d0fbebe99435d95ae19f221769426f471baaf98d535e4e3e960984


## Test locally

In [14]:
import urllib.request
contents = urllib.request.urlopen("http://localhost:8081/ping").read()
contents

b'Ok'

In [16]:
model_path = os.path.dirname(os.getcwd())
from io import StringIO
with open(os.path.join(model_path,'local-model/X_test.csv'), newline='') as file:
    #data = pd.read_csv(file, sep=',', low_memory=False, error_bad_lines=False)
    data = file.read().encode('utf-8')

    # create request
    req = urllib.request.Request("http://localhost:8081/invocations", data = data)
    

# execute request
resp = str(urllib.request.urlopen(req).read(),'utf-8')
resp = StringIO(resp) 
res = pd.read_csv(resp)
res

Unnamed: 0,x1,x3,y1,x4,x2,x5,y,flag_train,pd
0,1.527905,-0.732013,3.507635,-0.010777,1.134899,D,1,2,0.916793
1,-0.100697,0.359118,2.718173,0.507330,0.526648,B,1,2,0.853656
2,0.303793,-0.450724,2.095225,0.165429,1.590835,B,1,2,0.865216
3,-0.764048,0.538282,1.699226,-0.337947,1.398086,B,1,2,0.871932
4,1.064482,0.223539,2.646861,-0.204668,-0.050285,D,1,2,0.906335
...,...,...,...,...,...,...,...,...,...
194,0.855495,-1.220430,5.829651,0.270106,-1.789519,D,1,2,0.893840
195,0.783989,1.232337,5.536935,-0.046588,0.883816,D,1,2,0.939172
196,-1.800726,-1.168124,-1.656599,-0.302814,-1.160325,B,1,2,0.243705
197,1.002345,2.190936,-4.664816,-0.068465,-1.399620,D,0,2,0.207043


In [17]:
!docker stop test_img

test_img


## Register repo on aws and push it

In [18]:
!aws ecr create-repository --repository-name aws2-repository --region us-east-2    


An error occurred (RepositoryAlreadyExistsException) when calling the CreateRepository operation: The repository with name 'aws2-repository' already exists in the registry with id '342342686540'


In [19]:
## the 1st time run:
#sudo usermod -a -G docker ec2-user

In [20]:
!aws ecr get-login-password --region {cr.region} | docker login --username AWS --password-stdin {cr.account_id}.dkr.ecr.{cr.region}.amazonaws.com

Login Succeeded


In [21]:
!docker push {container}

The push refers to repository [342342686540.dkr.ecr.us-east-2.amazonaws.com/aws2-repository]

[1B98a2ba0c: Preparing 
[1Bf7826e12: Preparing 
[1B7680ef31: Preparing 
[1Bee5a451b: Preparing 
[1B63f3754a: Preparing 
[1Bd9b70a9a: Preparing 
[1B88084211: Preparing 
[1B665d4f94: Preparing 
[1B1b58dc35: Preparing 
[1B1011f5b4: Preparing 
[1B97b8ef9e: Preparing 
[1B2bb2dcdf: Preparing 
[1Bc2412b54: Preparing 
[1Ba4895ec9: Preparing 
[1B38a52ffe: Preparing 
[1B8789e54a: Preparing 
[1B3f29c674: Preparing 
[1B86ff9f6a: Preparing 
[1B4bc8bc80: Preparing 
[1B1dc52d99: Preparing 
[1B93c71745: Preparing 
[3B1dc52d99: Layer already exists 6MB[19A[2K[20A[2K[20A[2K[18A[2K[22A[2K[21A[2K[17A[2K[20A[2K[10A[2K[11A[2K[13A[2K[7A[2K[2A[2K[1A[2Klatest: digest: sha256:c17b2485b512dd726a24e890403d053bcdd395fe832ce67ea5d244a24724c722 size: 4935


## Create model and endpoint

In [22]:
client = boto3.client('sagemaker')

In [23]:
model_name='aws2-repository-model'
client.create_model(
    ModelName=model_name,
    ExecutionRoleArn = cr.role,
    PrimaryContainer = { 'Image': container }
)

{'ModelArn': 'arn:aws:sagemaker:us-east-2:342342686540:model/aws2-repository-model',
 'ResponseMetadata': {'RequestId': '11d45ab6-4aae-41d8-9666-e09fd9be7e28',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '11d45ab6-4aae-41d8-9666-e09fd9be7e28',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '83',
   'date': 'Wed, 24 Mar 2021 15:08:15 GMT'},
  'RetryAttempts': 0}}

In [24]:
endpoint_config_name='aws2-repository-config'
instance_type='ml.t2.medium'
client.create_endpoint_config(
    EndpointConfigName = endpoint_config_name,
    ProductionVariants=[{
        'InstanceType':instance_type,
        'InitialInstanceCount':1,
        'InitialVariantWeight':1,
        'ModelName': model_name,
        'VariantName':'AllTraffic'}])

{'EndpointConfigArn': 'arn:aws:sagemaker:us-east-2:342342686540:endpoint-config/aws2-repository-config',
 'ResponseMetadata': {'RequestId': '40c020ae-154b-4776-84b6-ee78f2dbb0e0',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '40c020ae-154b-4776-84b6-ee78f2dbb0e0',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '103',
   'date': 'Wed, 24 Mar 2021 15:08:17 GMT'},
  'RetryAttempts': 0}}

## Deploy endpoint

In [25]:
import time

endpoint_name = 'aws2-repository-endpoint-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print(endpoint_name)
create_endpoint_response = client.create_endpoint(EndpointName = endpoint_name, 
                                                  EndpointConfigName = endpoint_config_name)

print(create_endpoint_response['EndpointArn'])

resp = client.describe_endpoint(EndpointName=endpoint_name)
status = resp['EndpointStatus']
print("Status: " + status)

while status=='Creating':
    time.sleep(60)
    resp = client.describe_endpoint(EndpointName = endpoint_name)
    status = resp['EndpointStatus']
    print("Status: " + status)

print("Arn: " + resp['EndpointArn'])
print("Status: " + status)

aws2-repository-endpoint-2021-03-24-15-09-09
arn:aws:sagemaker:us-east-2:342342686540:endpoint/aws2-repository-endpoint-2021-03-24-15-09-09
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: InService
Arn: arn:aws:sagemaker:us-east-2:342342686540:endpoint/aws2-repository-endpoint-2021-03-24-15-09-09
Status: InService


In [26]:
endpoints = client.list_endpoints(SortBy='Status')['Endpoints']
endpoints

[{'EndpointName': 'aws2-repository-endpoint-2021-03-24-15-09-09',
  'EndpointArn': 'arn:aws:sagemaker:us-east-2:342342686540:endpoint/aws2-repository-endpoint-2021-03-24-15-09-09',
  'CreationTime': datetime.datetime(2021, 3, 24, 16, 9, 14, 751000, tzinfo=tzlocal()),
  'LastModifiedTime': datetime.datetime(2021, 3, 24, 16, 17, 28, 211000, tzinfo=tzlocal()),
  'EndpointStatus': 'InService'}]

In [None]:
#delete model, endpoint, endpoint configuraiton y el registry (ver)

In [None]:
#sagemaker.Session().delete_endpoint('aws2-repository-endpoint-2021-03-23-22-22-15')

## Test container

In [29]:
import os
from io import StringIO
runtime_client = boto3.client('sagemaker-runtime')
model_path = os.path.dirname(os.getcwd())

In [30]:
with open(os.path.join(model_path,'local-model/X_test.csv')) as f:
    payload = f.read().strip()

response = runtime_client.invoke_endpoint(
                            EndpointName = endpoint_name, 
                            ContentType='text/csv', 
                            Body=payload.encode('utf-8'))
result = response['Body'].read()

In [37]:
s=str(result,'utf-8')
pd.read_csv(StringIO(s) )

Unnamed: 0,x1,x3,y1,x4,x2,x5,y,flag_train,pd
0,1.527905,-0.732013,3.507635,-0.010777,1.134899,D,1,2,0.916793
1,-0.100697,0.359118,2.718173,0.507330,0.526648,B,1,2,0.853656
2,0.303793,-0.450724,2.095225,0.165429,1.590835,B,1,2,0.865216
3,-0.764048,0.538282,1.699226,-0.337947,1.398086,B,1,2,0.871932
4,1.064482,0.223539,2.646861,-0.204668,-0.050285,D,1,2,0.906335
...,...,...,...,...,...,...,...,...,...
194,0.855495,-1.220430,5.829651,0.270106,-1.789519,D,1,2,0.893840
195,0.783989,1.232337,5.536935,-0.046588,0.883816,D,1,2,0.939172
196,-1.800726,-1.168124,-1.656599,-0.302814,-1.160325,B,1,2,0.243705
197,1.002345,2.190936,-4.664816,-0.068465,-1.399620,D,0,2,0.207043
