# Amazon SageMaker administration and security workshop: Lab 3

This notebook contains hands-on exercises for the workshop **Amazon SageMaker administration and security** – Lab 3.

## Import packages and load variables

In [20]:
import time
import os
import json
import boto3
import numpy as np  
import pandas as pd 
import sagemaker
from sagemaker.network import NetworkConfig
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.processing import ProcessingInput, ProcessingOutput

sagemaker.__version__

'2.144.0'

In [15]:
%store -r 

%store

try:
    initialized
except NameError:
    print("++++++++++++++++++++++++++++++++++++++++++")
    print("[ERROR] YOU HAVE TO RUN 01-lab-01 notebook         ")
    print("++++++++++++++++++++++++++++++++++++++++++")

Stored variables and their in-db values:
bucket_name                   -> 'sagemaker-us-east-1-949335012047'
bucket_prefix                 -> 'from-idea-to-prod/xgboost'
domain_id                     -> 'd-dech5fdx5938'
initialized                   -> True
input_s3_url                  -> 's3://sagemaker-us-east-1-949335012047/sm-admin-wo
region                        -> 'us-east-1'
sm_role                       -> 'arn:aws:iam::949335012047:role/sagemaker-admin-wo
target_col                    -> 'y'
test_s3_url                   -> 's3://sagemaker-us-east-1-949335012047/sm-admin-wo
train_s3_url                  -> 's3://sagemaker-us-east-1-949335012047/sm-admin-wo
validation_s3_url             -> 's3://sagemaker-us-east-1-949335012047/sm-admin-wo


In [22]:
# Get some variables you need to interact with SageMaker service
boto_session = boto3.Session()
region = boto_session.region_name
bucket_name = sagemaker.Session().default_bucket()
bucket_prefix = "sm-admin-workshop/xgboost"  
sm_session = sagemaker.Session()
sm_client = boto_session.client("sagemaker")
ssm = boto3.client("ssm")
sm_role = sagemaker.get_execution_role()

## Logging and monitoring

### Logging with CloudTrail
Follow the instructions in the workshop lab 3 - Step 1. 
You can run the following step to generate `DescribeDomain` API access log entires in the CloudTrail event history.

In [11]:
sm_client.describe_domain(DomainId=domain_id)

{'DomainArn': 'arn:aws:sagemaker:us-east-1:949335012047:domain/d-dech5fdx5938',
 'DomainId': 'd-dech5fdx5938',
 'DomainName': 'sagemaker-admin-workshop-domain',
 'HomeEfsFileSystemId': 'fs-0e33dc594dfbf102d',
 'Status': 'InService',
 'CreationTime': datetime.datetime(2023, 4, 2, 20, 35, 42, 179000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2023, 4, 10, 13, 54, 16, 899000, tzinfo=tzlocal()),
 'AuthMode': 'IAM',
 'DefaultUserSettings': {'ExecutionRole': 'arn:aws:iam::949335012047:role/sagemaker-admin-workshop-iam-StudioRoleDefault-F49QJ95LURXC',
  'SecurityGroups': ['sg-094cc28a340257059'],
  'SharingSettings': {'NotebookOutputOption': 'Allowed',
   'S3OutputPath': 's3://sagemaker-studio-949335012047-x2td9ne824/sharing',
   'S3KmsKeyId': 'arn:aws:kms:us-east-1:949335012047:key/acadcfc8-a091-4d23-917e-7bf0964151a5'},
  'JupyterServerAppSettings': {'DefaultResourceSpec': {'SageMakerImageArn': 'arn:aws:sagemaker:us-east-1:081325390199:image/jupyter-server-3',
    'InstanceTy

## Security controls

### Preventive
In this section you experiment with IAM policies and condition keys. Follow the instructions in the workshop lab 3 - Step 2.

In [44]:
# Account id and region
account_id = boto3.client("sts").get_caller_identity()["Account"]
region = boto3.Session().region_name

account_id, region

('949335012047', 'us-east-1')

In [45]:
security_group_ids=ssm.get_parameter(Name=f"sagemaker-admin-workshop-{region}-{account_id}-sagemaker-sg-ids")["Parameter"]["Value"]
private_subnet_ids=ssm.get_parameter(Name=f"sagemaker-admin-workshop-{region}-{account_id}-private-subnet-ids")["Parameter"]["Value"]

security_group_ids, private_subnet_ids

('sg-094cc28a340257059', 'subnet-0dbfb5fab7b6ae14e,subnet-0324af06a736e9404')

In [46]:
# Construct the NetworkConfig with the values for your environment
network_config = NetworkConfig(
        enable_network_isolation=False, 
        security_group_ids=security_group_ids.split(','),
        subnets=private_subnet_ids.split(','),
        encrypt_inter_container_traffic=True)

In [47]:
framework_version = "0.23-1"
processing_instance_type = "ml.m5.large"
processing_instance_count = 1

In [48]:
# Define processing inputs and outputs
processing_inputs = [
        ProcessingInput(
            source=input_s3_url, 
            destination="/opt/ml/processing/input",
            s3_input_mode="File",
            s3_data_distribution_type="ShardedByS3Key"
        )
]

processing_outputs = [
        ProcessingOutput(
            output_name="train_data", 
            source="/opt/ml/processing/output/train",
            destination=train_s3_url,
        ),
        ProcessingOutput(
            output_name="validation_data", 
            source="/opt/ml/processing/output/validation", 
            destination=validation_s3_url
        ),
        ProcessingOutput(
            output_name="test_data", 
            source="/opt/ml/processing/output/test", 
            destination=test_s3_url
        ),
]

In [49]:
# Create a processor
sklearn_processor = SKLearnProcessor(
    framework_version=framework_version,
    role=sm_role,
    instance_type=processing_instance_type,
    instance_count=processing_instance_count, 
    base_job_name='sm-admin-workshop-processing',
    sagemaker_session=sm_session,
    network_config=network_config
)

INFO:sagemaker.image_uris:Defaulting to only available Python version: py3


In [39]:
# Start the processing job
sklearn_processor.run(
        inputs=processing_inputs,
        outputs=processing_outputs,
        code='preprocessing.py',
        wait=True,
)

INFO:sagemaker:Creating processing-job with name sm-admin-workshop-processing-2023-04-11-11-01-05-042


...........................[34mData split > train:(28831, 60) | validation:(8238, 60) | test:(4119, 60)[0m
[34m## Processing complete. Exiting.[0m



#### Enforce the designated subnets
Let's implement the enforcement of the specific values in the network configuration. Update the preventive IAM policy attached to the user profile execution role as instructed in the workshop lab.

In [41]:
# Set the private_subnet_ids to some new value
private_subnet_ids="subnet-011e4fcfca10fffea"

In [42]:
# Create the NetworkConfig
network_config = NetworkConfig(
        enable_network_isolation=False, 
        security_group_ids=security_group_ids.split(','),
        subnets=private_subnet_ids.split(','),
        encrypt_inter_container_traffic=True)

In [None]:
# Create a processor
sklearn_processor = SKLearnProcessor(
    framework_version=framework_version,
    role=sm_role,
    instance_type=processing_instance_type,
    instance_count=processing_instance_count, 
    base_job_name='sm-admin-workshop-processing',
    sagemaker_session=sm_session,
    network_config=network_config
)

# Start the processing job - this will raise an AccessDeniedException
sklearn_processor.run(
        inputs=processing_inputs,
        outputs=processing_outputs,
        code='preprocessing.py',
        wait=True,
)

## Shutdown kernel

In [None]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>