# Amazon Web Services

[![Index](https://img.shields.io/badge/Index-blue)](../index.ipynb)
[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/digillia/Digillia-Colab/blob/main/tools/aws.ipynb)

Le code ci-après requiert un compte avec Amazon Web Services:
- https://console.aws.amazon.com

TODO:
- https://aws.amazon.com/blogs/machine-learning/train-and-host-scikit-learn-models-in-amazon-sagemaker-by-building-a-scikit-docker-container/
- https://www.youtube.com/watch?v=C_AtgCm43Nk
- https://github.com/learn-mikegchambers-com/aws-mls-c01/tree/master/8-SageMaker/SageMaker-Script-Mode
- https://sagemaker.readthedocs.io/en/stable/frameworks/sklearn/using_sklearn.html
- https://aws.amazon.com/fr/blogs/machine-learning/reduce-the-time-taken-to-deploy-your-models-to-amazon-sagemaker-for-testing/

The most recent version of Scikit-Learn supported by Sagemaker is v1.2.1.
Check https://docs.aws.amazon.com/sagemaker/latest/dg/sklearn.html.

Therefore the most recent version of python to run this notebook is v3.11 for it still has distutils.
Consider creating a virtual environment with a compatible version of python.

```bash
/usr/bin/python3 -m venv .aws_env && \
    source .aws_env/bin/activate
```

In [29]:
import os
import sys

# Supprimer les commentaires pour installer (requirements.txt)
# Sagemaker does not support recent versions of scikit-learn: 
!pip3 install -U scikit-learn==1.2.1 # requires python <=3.11 because distutils was removed in 3.12
!pip3 install -U numpy==1.26.4 # prevents sagemaker_containers._errors.ClientError: No module named 'numpy._core.multiarray'

# À installer dans tous les cas pour Google Colab et Github
if ('google.colab' in sys.modules) or ('CI' in os.environ):
    !pip3 install -q -U boto3
    !pip3 install -q -U sagemaker
    #!pip3 install -q -U aws-cdk.aws-s3 # https://docs.aws.amazon.com/cdk/v2/guide/serverless_example.html

Collecting scikit-learn==1.2.1
  Using cached scikit_learn-1.2.1-cp39-cp39-macosx_12_0_arm64.whl (8.4 MB)
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.6.0
    Uninstalling scikit-learn-1.6.0:
      Successfully uninstalled scikit-learn-1.6.0
Successfully installed scikit-learn-1.2.1


In [84]:
if 'google.colab' in sys.modules:
    from google.colab import userdata
    os.environ['AWS_ACCESS_KEY_ID'] = userdata.get('AWS_ACCESS_KEY_ID')
    os.environ['AWS_SECRET_ACCESS_KEY'] = userdata.get('AWS_SECRET_ACCESS_KEY')
    os.environ['AWS_DEFAULT_REGION'] = userdata.get('AWS_DEFAULT_REGION')

In [85]:
# Les variables python sont accessibles depuis les commandes shell
work_directory = './aws'

!mkdir -p $work_directory

## Création et sérialisation d'un modèle SciKit-Learn

In [87]:
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression

# Création de données synthétiques
X, y = make_regression(n_samples=100, n_features=1, random_state=123)

# Entrainement d'un modèle de régression linéaire avec scikit-learn
model = LinearRegression()
model.fit(X, y)

In [88]:
import pickle
pickle.dump(model, open(f'{work_directory}/model.pkl','wb'))

# alternativement
# import joblib
# joblib.dump(model, f'{work_directory}/model.joblib')

In [89]:
!tar -czvf $work_directory/model.tar.gz -C $work_directory model.pkl
#!tar -xzvf $work_directory/model.tar.gz -C $work_directory

a model.pkl


## Configuration de SageMaker

### Création du script d'entrée

In [90]:
%%writefile $work_directory/script.py
import pickle

def model_fn(model_dir):
    """
    Loads the model for inference.
    """
    model = pickle.load(open(model_dir + "/model.pkl", 'rb'))
    return model


def predict_fn(input_data, model):
    """
    Takes the deserialized request object and performs inference against the loaded model.
    """
    return model.predict(input_data)

Overwriting ./aws/script.py


### Création du rôle avec attachement de la policy

In [91]:
import boto3
import json
import botocore

iam_client = boto3.client('iam')
role_name = 'SageMakerExecutionRole'
role_policy = {
       'Version': '2012-10-17',
       'Statement': [{
           'Effect': 'Allow', 
           'Principal': {'Service': 'sagemaker.amazonaws.com'},
           'Action': 'sts:AssumeRole'
       }]
   }

def clean_up_role(role_name):
   """
   Suppression du rôle
   """
   try:
      iam_client.detach_role_policy(
            RoleName=role_name,
            PolicyArn='arn:aws:iam::aws:policy/AmazonSageMakerFullAccess'
      )
      iam_client.delete_role(RoleName=role_name)
      print(f"Role {role_name} deleted successfully.")
   except botocore.exceptions.ClientError as e:
      print(f"Error deleting role: {e}")

try:
   # Recherche du rôle (si déjà créé)
   role = iam_client.get_role(RoleName=role_name)
   print(f'Found existing role: {role_name}')
   clean_up_role(role_name)
except iam_client.exceptions.NoSuchEntityException:
   print(f'Role {role_name} not found.')

try:
   # Creation du rôle
   print(f'Creating new role: {role_name}')
   role = iam_client.create_role(
      Path='/',
      RoleName=role_name,
      Description='Allows SageMaker to call AWS services on your behalf',
      AssumeRolePolicyDocument=json.dumps(role_policy)
   )
   try:
      # Attache de la policy AmazonSageMakerFullAccess au rôle
      iam_client.attach_role_policy(
            RoleName=role_name,
            PolicyArn='arn:aws:iam::aws:policy/AmazonSageMakerFullAccess'
      )
   except botocore.exceptions.ClientError as e:
      print(f'Error attaching policy: {e}')
      # Suppression du rôle en cas d'échec
      iam_client.delete_role(RoleName=role_name)
      raise

except botocore.exceptions.ClientError as e:
   print(f'Unexpected error: {e}')
   raise

print(f"Role ARN: {role['Role']['Arn']}")

Found existing role: SageMakerExecutionRole
Role SageMakerExecutionRole deleted successfully.
Creating new role: SageMakerExecutionRole
Role ARN: arn:aws:iam::215711614536:role/SageMakerExecutionRole


### Stockage dans S3

In [60]:
from sagemaker.session import Session

session = Session()
bucket = session.default_bucket() # Crée le bucket si besoin

# Chargement du model dans l'espace de stockage d'objets S3  
model_data = session.upload_data(
    path=f'{work_directory}/model.tar.gz',
    bucket=bucket,
    key_prefix='sklearn-model'
    )
print(f'Uploaded model to S3: {model_data}')

Uploaded model to S3: s3://sagemaker-us-east-1-215711614536/sklearn-model/model.tar.gz


### Creation du modèle dans SageMaker

> <span style="color:red">ATTENTION! L'exécution de la cellule suivante peut prendre une dizaine de minutes.</span>

In [92]:
from sagemaker.sklearn import SKLearnModel
from sagemaker.serializers import NumpySerializer
from sagemaker.deserializers import NumpyDeserializer
# import numpy as np

endpoint_name='sklearn-endpoint'

# Création du modèle SciKit-Learn dans Sagemaker
model = SKLearnModel(
    model_data=model_data,
    role=role['Role']['Arn'],
    framework_version='1.2-1',
    py_version='py3',
    entry_point='script.py',
    source_dir=work_directory
)

# Creation d'un endpoint sagemaker
predictor = model.deploy(
    initial_instance_count=1,
    instance_type='ml.t2.medium',
    endpoint_name=endpoint_name,
    sagemaker_session=session,
    serializer=NumpySerializer(),
    deserializer=NumpyDeserializer()
)

--------------!

### Prediction

Les prédictions peuvent aussi se faire par POST sur `https://runtime.sagemaker.us-east-1.amazonaws.com/endpoints/sklearn-endpoint/invocations`
Voir https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html

In [94]:
import numpy as np

x = np.array([[1.0],[2.0],[3.0],[4.0]])
response = predictor.predict(x)
response

array([ 33.86708459,  67.73416918, 101.60125377, 135.46833837])

### Libération des ressources

In [95]:
s3_client = boto3.client('s3')
sm_client = boto3.client('sagemaker')

def clean_up_s3(bucket):
    """
    Suppression du stockage dans S3
    """
    try:
        boto3.resource('s3').Bucket(bucket).objects.all().delete()
        s3_client.delete_bucket(Bucket=bucket)
        print(f'Bucket {bucket} deleted')
    except botocore.exceptions.ClientError as e:
        print(f'Error deleting bucket: {e}')

def get_model_name_from_model_data(model_data):
    models = sm_client.list_models()
    for model in models['Models']:
        model_details = sm_client.describe_model(ModelName=model['ModelName'])
        if model_details['PrimaryContainer']['ModelDataUrl'] == model_data:
            return model_details['ModelName']
    return None 

def clean_up_sagemaker(endpoint_name):
    try:
        sm_client.delete_endpoint(EndpointName=endpoint_name)
        print(f'Endpoint {endpoint_name} deleted')
    except sm_client.exceptions.ClientError as e:
        print(f'Endpoint {endpoint_name} does not exist')
    except Exception as e:
        print(f'{type(e).__name__}: {e}')

    try:
        sm_client.delete_endpoint_config(EndpointConfigName=endpoint_name)
        print(f'Endpoint config for {endpoint_name} deleted')
    except sm_client.exceptions.ClientError as e:
        print(f'Endpoint config for {endpoint_name} does not exist')
    except Exception as e:
        print(f'{type(e).__name__}: {e}')

    model_name = get_model_name_from_model_data(model_data)
    if model_name is not None:
        try:
            sm_client.delete_model(ModelName=model_name)
            print(f'Model {model_name} deleted')
        except sm_client.exceptions.ClientError as e:
            print(f'Model {model_name} does not exist')
        except Exception as e:
            print(f'{type(e).__name__}: {e}')

In [96]:
endpoint_name='sklearn-endpoint'
session = Session()
bucket = session.default_bucket()
role_name = 'SageMakerExecutionRole'

# Ménage des ressources AWS
clean_up_sagemaker(endpoint_name)
clean_up_s3(bucket)
clean_up_role(role_name)

Endpoint sklearn-endpoint deleted
Endpoint config for sklearn-endpoint deleted
Model sagemaker-scikit-learn-2024-12-13-18-49-27-220 deleted
Bucket sagemaker-us-east-1-215711614536 deleted
Role SageMakerExecutionRole deleted successfully.


In [97]:
# Ménage
!rm -rf $work_directory