# Prepare session

In [None]:
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.local import LocalSession
import s3fs
import subprocess
from sagemaker.s3 import S3Downloader, S3Uploader
from pathlib import Path
import json

image_name = "sagemaker-test"
ecr_namespace = image_name + "/"
default_bucket = "prod-test"
default_uri = "s3://" + default_bucket
atf_s3_uri = default_uri + "/sagemaker"

role = get_execution_role()
account_id = role.split(":")[4]
boto_session = boto3.Session()
region = boto_session.region_name
bucket = default_bucket

sagemaker_session = LocalSession(boto_session=boto_session)
sagemaker_session._default_bucket = default_bucket
    
s3_helper = s3fs.S3FileSystem()
data_location_uri = default_uri + "/training_data/full"

print(account_id)
print(region)
print(role)
print(sagemaker_session)
print(default_uri)
print(atf_s3_uri)
print(data_location_uri)

# Dev in script mode
(Still run locally but using SageMaker API)

## Build image

In [None]:
! cd container && bash build_image.sh $image_name

## Prepare data

In [None]:
opt_ml_dir = "/opt/ml/processing"
execution_id = "exp-local-sm"
image_uri = f"{image_name}:latest"
print(image_uri)

In [None]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput

processor = ScriptProcessor(
    base_job_name="prepare-data-processor",
    image_uri=image_uri,
    command=['python'], # IMPORTANT, DEPENDS ON DOCKERFILE, DON'T USE python3
    role=role,
    instance_count=1,
    instance_type="local",
    max_runtime_in_seconds=1200,
)

# IMPORTANT: ProcessingOutput MUST BE A FOLDER WITHOUT ANY NESTED FOLDER INSIDE
# Otherwise it will raise Permission Denied when it performs post processes
# Example: source CANNOT BE "/opt/ml/processing/output/prepared_data" because there're 2 nested folders inside

processor.run(
    code="container/code/prepare_data.py",
    inputs=[
        ProcessingInput(
            source=data_location_uri,
            destination=opt_ml_dir + "/input",
        ),
    ],
    outputs=[
        ProcessingOutput(
            output_name="train",
            source=opt_ml_dir + "/train",
            destination=atf_s3_uri + f"/prepared_data/{execution_id}/train",
        ),
        ProcessingOutput(
            output_name="test",
            source=opt_ml_dir + "/test",
            destination=atf_s3_uri + f"/prepared_data/{execution_id}/test",
        ),
    ],
    arguments=[""],
    wait=True,
    logs=True,
)

In [None]:
# Inspect uploaded data
preprocessing_job_description = processor.jobs[-1].describe()
output_config = preprocessing_job_description["ProcessingOutputConfig"]
for output in output_config["Outputs"]:
    if output["OutputName"] == "train":
        train_data_uri = output["S3Output"]["S3Uri"]
    if output["OutputName"] == "test":
        test_data_uri = output["S3Output"]["S3Uri"]

! aws s3 ls $train_data_uri/
! aws s3 ls $test_data_uri/

## Train

In [None]:
import sagemaker
import json

# JSON encode hyperparameters
def json_encode_hyperparameters(hyperparameters):
    return {str(k): json.dumps(v) for (k, v) in hyperparameters.items()}

hyperparameters = json_encode_hyperparameters({
    "learning_rate": 0.05,
})

est = sagemaker.estimator.Estimator(
    image_uri,
    role,
    instance_count=1,
    instance_type="local",
    hyperparameters=hyperparameters,
    output_path=atf_s3_uri + f"/model",
    sagemaker_session=sagemaker_session,
)

est.fit({"train": train_data_uri})

In [None]:
job_name = est.latest_training_job.name
print(job_name)

In [None]:
training_job_description = est.jobs[-1].describe()
model_data_s3_uri = f"{training_job_description['ModelArtifacts']['S3ModelArtifacts']}"
model_data_s3_uri

## Evaluate

In [None]:
print(test_data_uri)

In [None]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput

eval_processor = ScriptProcessor(
    base_job_name="evaluate-processor",
    image_uri=image_uri,
    command=['python'],
    role=role,
    instance_count=1,
    instance_type="local",
    max_runtime_in_seconds=1200,
)

eval_processor.run(
    code="container/code/evaluate.py",
    inputs=[
        ProcessingInput(
            source=model_data_s3_uri,
            destination=opt_ml_dir + "/model",
        ),
        ProcessingInput(
            source=test_data_uri,
            destination=opt_ml_dir + "/test",
        ),
    ],
    outputs=[
        ProcessingOutput(
            output_name="evaluation",
            source=opt_ml_dir + "/evaluation",
            destination=atf_s3_uri + f"/evaluation/{execution_id}",
        ),
    ],
    arguments=[""],
    wait=True,
    logs=True,
)

In [None]:
eval_job_description = eval_processor.jobs[-1].describe()
eval_output_config = eval_job_description["ProcessingOutputConfig"]
for output in eval_output_config["Outputs"]:
    if output["OutputName"] == "evaluation":
        eval_uri = output["S3Output"]["S3Uri"]
        
! aws s3 ls $eval_uri/

## Deploy

In [None]:
from sagemaker.predictor import CSVSerializer
predictor = est.deploy(
    initial_instance_count=1,
    instance_type="local",
    serializer=CSVSerializer(),
)

In [None]:
endpoint_name = predictor.endpoint_name
print(endpoint_name)

## Test endpoint

In [None]:
import pandas as pd
s3 = boto3.client('s3')
obj = s3.get_object(Bucket=default_bucket, Key=f'sagemaker/prepared_data/{execution_id}/train/train.csv')
train_df = pd.read_csv(obj['Body']) # 'Body' is a key word
train_df.head()

In [None]:
test_data = train_df.drop(train_df.columns[[0]], axis=1)
test_data.head()

### Test endpoint using predict function

In [None]:
results = predictor.predict(test_data.values).decode('utf-8')
list(map(float, results.split('\n')[:-1]))

### Test endpoint using Curl

In [None]:
test_data_str = CSVSerializer().serialize(test_data.values)
payload_file = "./payload"
with open(payload_file, "w") as f:
    f.write(test_data_str)
    
curl_str = f"""\
curl -X POST \
http://localhost:8080/invocations \
--data-binary @{payload_file} \
-H 'Content-Type: text/csv'\
"""
curl_str

In [None]:
subprocess.run(curl_str, shell=True).stderr

In [None]:
predictor.delete_endpoint()

In [21]:
! rm payload