## Train a model locally or remote

In [1]:
%cd ~/SageMaker/easy-amazon-sagemaker-deployments/dev/

[Errno 2] No such file or directory: '/home/ec2-user/SageMaker/easy-amazon-sagemaker-deployments/dev/'
/home/ec2-user/SageMaker/easy-amazon-sagemaker-deployments-master/dev


In [2]:
%pip uninstall -y sklearn scikit-learn

[0mFound existing installation: scikit-learn 1.2.0
Uninstalling scikit-learn-1.2.0:
  Successfully uninstalled scikit-learn-1.2.0
Note: you may need to restart the kernel to use updated packages.


In [3]:
%pip install --upgrade pip
%pip install --upgrade scikit-learn==1.2.1 sagemaker

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting pip
  Downloading pip-23.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m59.3 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 22.3.1
    Uninstalling pip-22.3.1:
      Successfully uninstalled pip-22.3.1
Successfully installed pip-23.1
Note: you may need to restart the kernel to use updated packages.
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting scikit-learn==1.2.1
  Downloading scikit_learn-1.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m87.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
Collecting sagemaker
  Downloading sagemaker-2.147.0.tar.gz (718 kB)
[2K     [90m━━━━━━

In [4]:
import sklearn

### Make sure these versions match when you use ezsmdeploy

In [5]:
sklearn.show_versions()


System:
    python: 3.10.8 | packaged by conda-forge | (main, Nov 22 2022, 08:26:04) [GCC 10.4.0]
executable: /home/ec2-user/anaconda3/envs/python3/bin/python
   machine: Linux-5.10.157-139.675.amzn2.x86_64-x86_64-with-glibc2.26

Python dependencies:
      sklearn: 1.2.1
          pip: 23.1
   setuptools: 65.6.3
        numpy: 1.22.3
        scipy: 1.10.0
       Cython: 0.29.33
       pandas: 1.5.2
   matplotlib: 3.6.2
       joblib: 1.2.0
threadpoolctl: 3.1.0

Built with OpenMP: True

threadpoolctl info:
       user_api: openmp
   internal_api: openmp
         prefix: libgomp
       filepath: /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0
        version: None
    num_threads: 8

       user_api: blas
   internal_api: mkl
         prefix: libmkl_rt
       filepath: /home/ec2-user/anaconda3/envs/python3/lib/libmkl_rt.so.2
        version: 2023.0-Product
threading_layer: intel
    num_threads: 4

       user_api: openmp
  

In [6]:
from sklearn import datasets, svm
digits = datasets.load_digits()
clf = svm.SVC(gamma=0.001, C=100.,probability=True)
clf.fit(digits.data[:-1], digits.target[:-1])
clf.predict(digits.data[-1:])

array([8])

In [7]:
digits.data[-1:]

array([[ 0.,  0., 10., 14.,  8.,  1.,  0.,  0.,  0.,  2., 16., 14.,  6.,
         1.,  0.,  0.,  0.,  0., 15., 15.,  8., 15.,  0.,  0.,  0.,  0.,
         5., 16., 16., 10.,  0.,  0.,  0.,  0., 12., 15., 15., 12.,  0.,
         0.,  0.,  4., 16.,  6.,  4., 16.,  6.,  0.,  0.,  8., 16., 10.,
         8., 16.,  8.,  0.,  0.,  1.,  8., 12., 14., 12.,  1.,  0.]])

### Save model file

In [8]:
!pip install joblib
from joblib import dump
dump(clf, 'model.joblib')

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com


['model.joblib']

## Step 1 : Write a model transform script

#### Make sure you have a ...

- "load_model" function
    - input args are model path
    - returns loaded model object
    - model name is the same as what you saved the model file as (see above step)
<br><br>
- "predict" function
    - input args are the loaded model object and a payload
    - returns the result of model.predict
    - make sure you format it as a single (or multiple) string return inside a list for real time (for mini batch)
    - from a client, a list  or string or np.array that is sent for prediction is interpreted as bytes. Do what you have to for converting back to list or string or np.array
    - return the error for debugging


In [9]:
%%writefile modelscript_sklearn.py
import sklearn
from joblib import load
import numpy as np
import os

#Return loaded model
def load_model(modelpath):
    print(modelpath)
    clf = load(os.path.join(modelpath,'model.joblib'))
    print("loaded")
    return clf

# return prediction based on loaded model (from the step above) and an input payload
def predict(model, payload):
    print(type(payload))
    try:
        print(np.frombuffer(payload))
        print(np.frombuffer(payload).reshape((1,64)))
        print( model.predict(np.frombuffer(payload).reshape((1,64))) )
        
        out = str(int(model.predict(np.frombuffer(payload).reshape((1,64))) ) )
        
    except Exception as e:
        out = [type(payload),str(e)] #useful for debugging!
    
    return out

Overwriting modelscript_sklearn.py


## Does this work locally? (not "_in a container locally_", but _actually_ in local)

In [10]:
from modelscript_sklearn import *
model = load_model('.')

.
loaded


In [11]:
predict(model,digits.data[-1:])

<class 'numpy.ndarray'>
[ 0.  0. 10. 14.  8.  1.  0.  0.  0.  2. 16. 14.  6.  1.  0.  0.  0.  0.
 15. 15.  8. 15.  0.  0.  0.  0.  5. 16. 16. 10.  0.  0.  0.  0. 12. 15.
 15. 12.  0.  0.  0.  4. 16.  6.  4. 16.  6.  0.  0.  8. 16. 10.  8. 16.
  8.  0.  0.  1.  8. 12. 14. 12.  1.  0.]
[[ 0.  0. 10. 14.  8.  1.  0.  0.  0.  2. 16. 14.  6.  1.  0.  0.  0.  0.
  15. 15.  8. 15.  0.  0.  0.  0.  5. 16. 16. 10.  0.  0.  0.  0. 12. 15.
  15. 12.  0.  0.  0.  4. 16.  6.  4. 16.  6.  0.  0.  8. 16. 10.  8. 16.
   8.  0.  0.  1.  8. 12. 14. 12.  1.  0.]]
[8]


'8'

### ok great! Now let's install ezsmdeploy
In some cases, installs fail due to an existing package installed called greenlet.
This is not a direct dependency of ezsmdeploy but interferes with the installation. 
To fix this, either install in a virtualenv as seen above, or do:
pip install ezsmdeploy[locust] --ignore-installed greenlet

In [12]:
!pip uninstall -y ezsmdeploy

[0m

### Install local dev version 

In [13]:
%pip install -e ../

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Obtaining file:///home/ec2-user/SageMaker/easy-amazon-sagemaker-deployments-master
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting sagemaker==2.136.0 (from ezsmdeploy==2.0.dev0)
  Downloading sagemaker-2.136.0.tar.gz (683 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m683.1/683.1 kB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting yaspin==0.16.0 (from ezsmdeploy==2.0.dev0)
  Downloading yaspin-0.16.0-py2.py3-none-any.whl (18 kB)
Collecting shortuuid==1.0.1 (from ezsmdeploy==2.0.dev0)
  Downloading shortuuid-1.0.1-py3-none-any.whl (7.5 kB)
Collecting sagemaker-studio-image-build==0.5.0 (from ezsmdeploy==2.0.dev0)
  Downloading sagemaker_studio_image_build-0.5.0.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: sagemaker, sagemaker-studio-image-build
  Bui

### Note: you may need to restart the kernel to use updated packages.

In [1]:
import ezsmdeploy

#### If you have been running other inference containers in local mode, stop existing containers to avoid conflict

In [2]:
!docker container stop $(docker container ls -aq) >/dev/null

"docker container stop" requires at least 1 argument.
See 'docker container stop --help'.

Usage:  docker container stop [OPTIONS] CONTAINER [CONTAINER...]

Stop one or more running containers


## Deploy locally

In [None]:
ez = ezsmdeploy.Deploy(model = 'model.joblib', # if you intend to add models later, pass model as list, otherwise str
                  script = 'modelscript_sklearn.py',
                  requirements = ['scikit-learn==1.2.1','numpy==1.22.0','joblib==1.2.0'], #or pass in the path to requirements.txt
                  instance_type = 'local',
                  autoscale = True,
                  wait = True)

[K0:00:00.154328 | compressed model(s)
[K0:00:00.299236 | uploaded model tarball(s) ; check returned modelpath
[K0:00:00.299731 | added requirements file
[K0:00:00.300807 | added source file
[K0:00:00.301614 | added Dockerfile
[K0:00:00.302651 | added model_handler and docker utils
[K0:00:00.302695 | building docker container
[32m∙∙∙[0m [K

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



[32m∙∙∙[0m [K

## Test containerized version locally

In [None]:
import sagemaker
ez.predictor.serializer = sagemaker.serializers.IdentitySerializer()

In [None]:
out = ez.predictor.predict(digits.data[-1:].tobytes())#.decode()
out

In [None]:
!docker container stop $(docker container ls -aq) >/dev/null

## Deploy on SageMaker

In [None]:
ezonsm = ezsmdeploy.Deploy(model = 'model.joblib', # if you intend to add models later, pass model as list, otherwise str
                  script = 'modelscript_sklearn.py',
                  requirements = ['scikit-learn==1.2.1','numpy==1.22.0','joblib==1.2.0'], #or pass in the path to requirements.txt
                  autoscale = True,
                  wait = True)

To debug docker build errors try this....

In [None]:
# !./src/build-docker.sh test

In [None]:
out = ezonsm.predictor.predict(digits.data[-1:].tobytes())#.decode()
out

### Don't leave resources running

In [None]:
ezonsm.predictor.delete_endpoint()