# Prepare session

In [None]:
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.local import LocalSession
import s3fs
import subprocess
from sagemaker.s3 import S3Downloader, S3Uploader
from pathlib import Path
import json

image_name = "sagemaker-test"
ecr_namespace = image_name + "/"
default_bucket = "prod-test"
default_uri = "s3://" + default_bucket
atf_s3_uri = default_uri + "/sagemaker"

role = get_execution_role()
account_id = role.split(":")[4]
boto_session = boto3.Session()
region = boto_session.region_name
bucket = default_bucket

sagemaker_session = sagemaker.Session(
    boto_session=boto_session,
    default_bucket=default_bucket
)
    
s3_helper = s3fs.S3FileSystem()
data_location_uri = default_uri + "/training_data/full"

print(account_id)
print(region)
print(role)
print(sagemaker_session)
print(default_uri)
print(atf_s3_uri)
print(data_location_uri)

# Dev in real

## Build and push image

In [None]:
! cd container && bash build_image.sh $image_name

In [None]:
! cd container && bash push_image.sh $image_name

In [None]:
ecr_client = boto3.client('ecr')
response = ecr_client.describe_images(
    repositoryName=image_name,
    imageIds=[{'imageTag': 'latest'}],
)
str(response["imageDetails"][0]["imagePushedAt"])

## Define parameters

In [None]:
opt_ml_dir = "/opt/ml/processing"
execution_id = "exp-real-sm"
image_uri = f"{account_id}.dkr.ecr.{region}.amazonaws.com/{image_name}:latest"
print(image_uri)

In [6]:
processing_instance_count = 1
processing_instance_type = "ml.m5.2xlarge"
training_instance_type = "ml.m5.2xlarge"

## Prepare data

In [None]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput

processor = ScriptProcessor(
    base_job_name="prepare-data-processor",
    image_uri=image_uri,
    command=['python'], # IMPORTANT, DEPENDS ON DOCKERFILE, DON'T USE python3
    role=role,
    instance_count=processing_instance_count,
    instance_type=processing_instance_type,
    max_runtime_in_seconds=1200,
)

# IMPORTANT: ProcessingOutput MUST BE A FOLDER WITHOUT ANY NESTED FOLDER INSIDE
# Otherwise it will raise Permission Denied when it performs post processes
# Example: source CANNOT BE "/opt/ml/processing/output/prepared_data" because there're 2 nested folders inside

processor.run(
    code="container/code/prepare_data.py",
    inputs=[
        ProcessingInput(
            source=data_location_uri,
            destination=opt_ml_dir + "/input",
        ),
    ],
    outputs=[
        ProcessingOutput(
            output_name="train",
            source=opt_ml_dir + "/train",
            destination=atf_s3_uri + f"/prepared_data/{execution_id}/train"
        ),
        ProcessingOutput(
            output_name="test",
            source=opt_ml_dir + "/test",
            destination=atf_s3_uri + f"/prepared_data/{execution_id}/test"
        ),
    ],
    wait=True,
    logs=True,
)

In [None]:
# Inspect uploaded data
preprocessing_job_description = processor.jobs[-1].describe()
output_config = preprocessing_job_description["ProcessingOutputConfig"]
for output in output_config["Outputs"]:
    if output["OutputName"] == "train":
        train_data_uri = output["S3Output"]["S3Uri"]
    if output["OutputName"] == "test":
        test_data_uri = output["S3Output"]["S3Uri"]

! aws s3 ls $train_data_uri/
! aws s3 ls $test_data_uri/

## Train

In [None]:
import sagemaker
import json

# JSON encode hyperparameters
def json_encode_hyperparameters(hyperparameters):
    return {str(k): json.dumps(v) for (k, v) in hyperparameters.items()}

hyperparameters = json_encode_hyperparameters({
    "learning_rate": 0.05,
})

est = sagemaker.estimator.Estimator(
    image_uri,
    role,
    instance_count=1,
    instance_type=training_instance_type,
    hyperparameters=hyperparameters,
    output_path=atf_s3_uri + f"/model",
    sagemaker_session=sagemaker_session,
    max_run=600, # timeout in seconds
    disable_profiler=True,
    use_spot_instances=True,
    max_wait=600, # <= max_run
)

est.fit({"train": train_data_uri})

### Inspect trained model artifact

In [None]:
job_name = est.latest_training_job.name
print(job_name)

training_job_description = est.jobs[-1].describe()
print(training_job_description['TrainingJobStatus'])
print(training_job_description['SecondaryStatus'])

model_data_s3_uri = f"{training_job_description['ModelArtifacts']['S3ModelArtifacts']}"
print(model_data_s3_uri)

In [None]:
s3_helper.listdir(atf_s3_uri + f"/model/{job_name}/output")

In [None]:
# print out logs from Cloud Watch
logs = boto3.client("logs")

log_res = logs.describe_log_streams(
    logGroupName="/aws/sagemaker/TrainingJobs", logStreamNamePrefix=job_name
)

for log_stream in log_res["logStreams"]:
    # get one log event
    log_event = logs.get_log_events(
        logGroupName="/aws/sagemaker/TrainingJobs", logStreamName=log_stream["logStreamName"]
    )

    # print out messages from the log event
    for ev in log_event["events"]:
        for k, v in ev.items():
            if k == "message":
                print(v)

## Evaluate

In [None]:
print(opt_ml_dir)
evaluation_filename = "eval.json"
print(evaluation_filename)

In [None]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput

eval_processor = ScriptProcessor(
    base_job_name="evaluate-processor",
    image_uri=image_uri,
    command=['python'],
    role=role,
    instance_count=processing_instance_count,
    instance_type=processing_instance_type,
    max_runtime_in_seconds=1200,
)

eval_processor.run(
    code="container/code/evaluate.py",
    inputs=[
        ProcessingInput(
            source=model_data_s3_uri,
            destination=opt_ml_dir + "/model"
        ),
        ProcessingInput(
            source=test_data_uri,
            destination=opt_ml_dir + "/test"
        ),
    ],
    outputs=[
        ProcessingOutput(
            output_name="evaluation",
            source=opt_ml_dir + "/evaluation",
            destination=atf_s3_uri + f"/evaluation/{execution_id}"
        ),
    ],
    wait=True,
    logs=True,
)

In [None]:
eval_job_description = eval_processor.jobs[-1].describe()
eval_output_config = eval_job_description["ProcessingOutputConfig"]
for output in eval_output_config["Outputs"]:
    if output["OutputName"] == "evaluation":
        eval_uri = output["S3Output"]["S3Uri"]
        
! aws s3 ls $eval_uri/

## Deploy

In [None]:
from sagemaker.predictor import CSVSerializer
predictor = est.deploy(
    initial_instance_count=1,
    instance_type=training_instance_type,
    serializer=CSVSerializer(),
)

In [None]:
endpoint_name = predictor.endpoint_name
runtime = boto3.Session().client("runtime.sagemaker")
print(endpoint_name)
print(runtime)

## Test endpoint

In [None]:
import pandas as pd
s3 = boto3.client('s3')
obj = s3.get_object(Bucket=default_bucket, Key=f'sagemaker/prepared_data/{execution_id}/train/train.csv')
train_df = pd.read_csv(obj['Body']) # 'Body' is a key word
train_df.head()

In [None]:
test_data = train_df.drop(train_df.columns[[0]], axis=1)
test_data.head()

### Test endpoint using predict function

In [None]:
def format_results(results):
    return list(map(float, results.split('\n')[:-1]))
results = predictor.predict(test_data.values).decode('utf-8')
format_results(results)

### Test endpoint using invoke_endpoint function

In [None]:
response = runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=CSVSerializer().serialize(test_data.values),
    ContentType='text/csv',
)
format_results(response['Body'].read().decode())

### Test endpoint using invoke_endpoint command line

In [None]:
test_data_str = CSVSerializer().serialize(test_data.values)
payload_file = "./payload"
with open(payload_file, "w") as f:
    f.write(test_data_str)
! aws sagemaker-runtime invoke-endpoint --endpoint-name $endpoint_name --body fileb://./payload --content-type text/csv outfile.txt && cat outfile.txt

In [None]:
# print out logs from Cloud Watch
logs = boto3.client("logs")

log_res = logs.describe_log_streams(
    logGroupName=f"/aws/sagemaker/Endpoints/{endpoint_name}"
)

for log_stream in log_res["logStreams"]:
    # get one log event
    log_event = logs.get_log_events(
        logGroupName=f"/aws/sagemaker/Endpoints/{endpoint_name}", logStreamName=log_stream["logStreamName"]
    )

    # print out messages from the log event
    for ev in log_event["events"]:
        for k, v in ev.items():
            if k == "message":
                print(v)

In [24]:
predictor.delete_endpoint()

In [25]:
! rm payload && rm outfile.txt