# Amazon Web Services

[![Index](https://img.shields.io/badge/Index-blue)](../index.ipynb)
[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/digillia/Digillia-Colab/blob/main/tools/aws.ipynb)

> <span style='color:red'>ATTENTION: notebook en cours d'élaboration!</span>

Le code ci-après requiert un compte avec Amazon Web Services:
- https://console.aws.amazon.com

TODO:
- https://aws.amazon.com/blogs/machine-learning/train-and-host-scikit-learn-models-in-amazon-sagemaker-by-building-a-scikit-docker-container/
- https://www.youtube.com/watch?v=C_AtgCm43Nk
- https://github.com/learn-mikegchambers-com/aws-mls-c01/tree/master/8-SageMaker/SageMaker-Script-Mode
- https://sagemaker.readthedocs.io/en/stable/frameworks/sklearn/using_sklearn.html
- https://aws.amazon.com/fr/blogs/machine-learning/reduce-the-time-taken-to-deploy-your-models-to-amazon-sagemaker-for-testing/

In [3]:
from IPython import get_ipython

# Supprimer les commentaires pour installer
# !pip3 install -q -U scikit-learn

# À installer dans tous les cas pour Google Colab
if 'google.colab' in str(get_ipython()):
    !pip3 install -q -U boto3
    !pip3 install -q -U sagemaker
    #!pip3 install -q -U aws-cdk.aws-s3 # https://docs.aws.amazon.com/cdk/v2/guide/serverless_example.html

In [None]:
import os

if 'google.colab' in str(get_ipython()):
    from google.colab import userdata
    os.environ['AWS_ACCESS_KEY_ID'] = userdata.get('AWS_ACCESS_KEY_ID')
    os.environ['AWS_SECRET_ACCESS_KEY'] = userdata.get('AWS_SECRET_ACCESS_KEY')
    os.environ['AWS_DEFAULT_REGION'] = userdata.get('AWS_DEFAULT_REGION')

In [85]:
# Les variables python sont accessibles depuis les commandes shell
work_directory = './aws'

!mkdir -p $work_directory

## Création et sérialisation d'un modèle SciKit-Learn

In [86]:
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression

# Création de données synthétiques
X, y = make_regression(n_samples=100, n_features=1, random_state=123)

# Entrainement d'un modèle de régression linéaire avec scikit-learn
model = LinearRegression()
model.fit(X, y)

In [87]:
import pickle
pickle.dump(model, open(f'{work_directory}/model.pkl','wb'))

# alternativement
# import joblib
# joblib.dump(model, f'{work_directory}/model.joblib')

In [88]:
!tar -czvf $work_directory/model.tar.gz -C $work_directory model.pkl
#!tar -xzvf $work_directory/model.tar.gz -C $work_directory

a model.pkl


## Configuration de SageMaker

In [89]:
%%writefile $work_directory/script.py
import pickle

def model_fn(model_dir):
    """
    Loads the model for inference.
    """
    model = pickle.load(open(model_dir + "/model.pkl", 'rb'))
    return model


def predict_fn(input_data, model):
    """
    Takes the deserialized request object and performs inference against the loaded model.
    """
    return model.predict(input_data)

Writing ./aws/script.py


In [90]:
import boto3
import json

iam = boto3.client('iam')
role_name = 'SageMakerExecutionRole'
role_policy = {
       'Version': '2012-10-17',
       'Statement': [{
           'Effect': 'Allow', 
           'Principal': {'Service': 'sagemaker.amazonaws.com'},
           'Action': 'sts:AssumeRole'
       }]
   }

role = iam.create_role(
   Path='/',
   RoleName=role_name,
   Description='Allows SageMaker to call AWS services on your behalf',
   AssumeRolePolicyDocument=json.dumps(role_policy)
)

iam.attach_role_policy(
   RoleName=role_name,
   PolicyArn='arn:aws:iam::aws:policy/AmazonSageMakerFullAccess'
)

print(role['Role']['Arn']) 


arn:aws:iam::215711614536:role/SageMakerExecutionRole


In [91]:
from sagemaker.session import Session
from sagemaker.sklearn import SKLearnModel
from sagemaker.serializers import NumpySerializer
from sagemaker.deserializers import NumpyDeserializer
import numpy as np

session = Session()
bucket = session.default_bucket()

# Chargement du model dans l'espace de stockage d'objets S3  
s3 = session.upload_data(
    path=f'{work_directory}/model.tar.gz',
    bucket=bucket,
    key_prefix='sklearn-model'
    )

# Création du modèle SciKit-Learn dans Sagemaker
model = SKLearnModel(
    model_data=s3,
    role=role['Role']['Arn'],
    framework_version='1.2-1',
    py_version='py3',
    entry_point='script.py',
    source_dir=work_directory
)

# Creation d'un endpoint sagemaker
predictor = model.deploy(
    initial_instance_count=1,
    instance_type='ml.t2.medium',
    endpoint_name='sklearn-endpoint',
    sagemaker_session=session,
    serializer=NumpySerializer(),
    deserializer=NumpyDeserializer()
)

# Prediction
x = np.array([[1.0],[2.0],[3.0],[4.0]])
response = predictor.predict(x)
response


--------!

array([ 33.86708459,  67.73416918, 101.60125377, 135.46833837])

In [92]:
# Suppression du role
iam.detach_role_policy(
    RoleName=role_name,
    PolicyArn='arn:aws:iam::aws:policy/AmazonSageMakerFullAccess'
)
iam.delete_role(RoleName=role_name)

# Suppression de l'endpoint
predictor.delete_endpoint(True)

# Suppression du stockage dans S3
s3 = boto3.resource('s3')
s3.Bucket(bucket).objects.all().delete()
s3 = boto3.client('s3')
s3.delete_bucket(Bucket=bucket)

{'ResponseMetadata': {'RequestId': 'V7X220QM1ZNDWNXV',
  'HostId': 'QAVWTy+HlDUntQn5w4AV6hTenmy7lAVw+w45KpWzaCDp3g4/n8/MBjsx8NRqCVjENsmuCHOSv2Q=',
  'HTTPStatusCode': 204,
  'HTTPHeaders': {'x-amz-id-2': 'QAVWTy+HlDUntQn5w4AV6hTenmy7lAVw+w45KpWzaCDp3g4/n8/MBjsx8NRqCVjENsmuCHOSv2Q=',
   'x-amz-request-id': 'V7X220QM1ZNDWNXV',
   'date': 'Thu, 22 Feb 2024 17:53:56 GMT',
   'server': 'AmazonS3'},
  'RetryAttempts': 0}}

In [93]:
# Ménage
!rm -rf $work_directory