### AWS Account Info

In [None]:
import sagemaker
import boto3
sess = sagemaker.Session()
try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='Developer')['Role']['Arn']
    print("Get role successfully")
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name

### Build & Push Docker Image

- This section deals with the variables related to Docker images that will be pushed to the Elastic Container Registry (ECR) after their build.
- Usually, there's no need to build the Docker image more than once because all source codes will be packed and sent to S3 storage.
- Any changes made to the source code will not affect the Docker image.
- "Build Image" section, we should set the `is_build` argument to `False` unless we want to build and push the image during every run.
- Please change the `image name` 

#### Variables for Docker Image

In [None]:
image = 'cog_verse'
bucket_name   = sess.default_bucket()
base_job_name = 'cog-verse-training'
%env image {image}
%env account {account}
%env region {region}
%env bucket_name {bucket_name}
%env base_job_name = {base_job_name}

#### Build Image

In [None]:
# Enable/disable the docker build
is_build = "false"

In [None]:
%%sh -s "$image" "$is_build"
bash ./build_and_push.sh $1 $2

#### Push Image to ECR

In [None]:
if is_build == "true":
    !docker push $account.dkr.ecr.$region.amazonaws.com/${image}:latest

### Pack Source Code

- This section deals with packing only the necessary code for running on Sagemaker.
- We send this code to a predetermined location on S3.
- Sagemaker will start the run and download the source code, saving it to the main directory.
- During the packing process, it will ignore all cache and dot files.

In [None]:
from cloud.sagemaker_utils import pack_archive, upload_to_s3, delete_archive
project_dir = "."
source_dir_names = [
    "actors",
    "cogment_verse",
    "config",
    "environments",
    "runs",
    "tests",
    "main.py",
    "simple_mlflow.py",
]
ignore_folders = ["node_modules"]
archive_name = "source_code.tar.gz"

# Pack all source code to run cogment verse
pack_archive(project_dir=project_dir, main_dir=project_dir, output_path=project_dir,source_dir_names=source_dir_names, ignore_folders=ignore_folders, archive_name=archive_name)

# Upload to S3
s3_key = f"{image}/input/data/{archive_name}"
upload_to_s3(local_path=f"./{archive_name}", bucket=bucket_name, s3_key=s3_key)

# Delete packed source code after uploading to S3
delete_archive(archive_path=f"{project_dir}/{archive_name}")


### Training

#### User Inputs

In [None]:
hyperparameters = {'main_args': "+experiment=ppo_atari_pz/pong_pz", 's3_bucket': bucket_name, "repo": image}
run_local_test = True

#### Local Test

In [None]:
if run_local_test:
    # Training setup
    output_path = f"s3://{bucket_name}/{image}/output"
    input_path = f"s3://{bucket_name}/{image}/input/data"
    image_name = f"{account}.dkr.ecr.{region}.amazonaws.com/{image}:latest"

    estimator = sagemaker.estimator.Estimator(image_uri=image_name,
                        base_job_name=base_job_name,
                        role=role, 
                        instance_count=1, 
                        output_path=output_path,
                        instance_type='local',
                        hyperparameters=hyperparameters)
    estimator.fit(inputs={"training": input_path})

    # Verification
    print(f"input_path: {input_path}")
    print(f"output_path: {output_path}")
    print(f"image_name: {image_name}")

#### AWS Run

In [None]:
from cloud.sagemaker_utils import download_and_extract_data_from_s3
# Training setup
output_path = f"s3://{bucket_name}/{image}/output"
input_path = f"s3://{bucket_name}/{image}/input/data"
image_name = f"{account}.dkr.ecr.{region}.amazonaws.com/{image}:latest"
tag_name = [{'Key': 'cog-verse', 'Value': 'cog-verse-training'}]
base_job_name = 'cog-verse-training'

# Run the sagemaker without waiting 
estimator = sagemaker.estimator.Estimator(image_uri=image_name,
                       base_job_name=base_job_name,
                       role=role, 
                       instance_count=1, 
                       instance_type='ml.m5.xlarge',
                       tags=tag_name,
                       output_path=output_path,
                       sagemaker_session=sess,
                       hyperparameters=hyperparameters)
estimator.fit(inputs={"training": input_path}, wait=True)

# # Sync mlflow data from S3 to local machine
# mlflow_archive_name = "mlflow_db.tar.gz" # this name is set in sagemaker_main.py
# mlflow_s3_folder = f"{image}/mlflow/{mlflow_archive_name}" # this name is set in sagemaker_main.py
# local_path = f".cogment_verse/mlflow/{mlflow_archive_name}"
# while True:
#     # Get training job info
#     training_job_info = estimator.latest_training_job.describe()

#     # Stop syncing process when the job is done running
#     if training_job_info["TrainingJobStatus"] in ['Completed', 'Failed', 'Stopped']:
#         break

#     # Sync mlflow data from S3 to local machine
#     download_and_extract_data_from_s3(bucket=bucket_name, s3_key=mlflow_s3_folder, local_path=local_path)


# Verification
print(f"output_path: {output_path}")
