## Packages your Python function, Python file or Jupyter notebook as a Docker image

## Environment Setup

In [1]:
import sys
import time
from kubeflow import fairing
from kubeflow.fairing import TrainJob
from kubeflow.fairing.backends import KubeflowAWSBackend

In [2]:
PY_VERSION = ".".join([str(x) for x in sys.version_info[0:3]])
BASE_IMAGE = 'registry.hub.docker.com/library/python:{}'.format(PY_VERSION)

# Setting up AWS Elastic Container Registry (ECR) for storing output containers
# Set your own AWS_REGION
AWS_ACCOUNT_ID=fairing.cloud.aws.guess_account_id()
AWS_REGION='us-east-1'
DOCKER_REGISTRY = '{}.dkr.ecr.{}.amazonaws.com'.format(AWS_ACCOUNT_ID, AWS_REGION)

[I 200828 14:56:30 credentials:1209] Found credentials in shared credentials file: ~/.aws/credentials


ClientError: An error occurred (InvalidClientTokenId) when calling the GetCallerIdentity operation: The security token included in the request is invalid.

## Convert Python function

In [None]:
def train():
    print("simple train job!")

job = TrainJob(train, base_docker_image=BASE_IMAGE, docker_registry=DOCKER_REGISTRY, backend=AWSBackend(role="arn:aws:iam::169544399729:role/SageMakerExecutorKFP", instance_count=1, instance_type="ml.m4.xlarge", job_config=job_config, stream_logs=True))
job.submit() 

## Convert Python file

In [None]:
%%writefile train.py
print("hello world!")

In [None]:
job = TrainJob("train.py", base_docker_image=BASE_IMAGE, docker_registry=DOCKER_REGISTRY, backend=AWSBackend(role="arn:aws:iam::169544399729:role/SageMakerExecutorKFP", instance_count=1, instance_type="ml.m4.xlarge", job_config=job_config, stream_logs=True))
job.submit() 

## Convert Jupyter Notebook

In [None]:
%%writefile requirements.txt
papermill
jupyter

In [None]:
# We already have a train.ipynb in the same folder
job = TrainJob("train.ipynb", input_files=["requirements.txt"], base_docker_image=BASE_IMAGE, docker_registry=DOCKER_REGISTRY, backend=AWSBackend(role="arn:aws:iam::169544399729:role/SageMakerExecutorKFP", instance_count=1, instance_type="ml.m4.xlarge", job_config=job_config, stream_logs=True))
job.submit()