<!--
#  Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
#    Licensed under the Apache License, Version 2.0 (the "License").
#    You may not use this file except in compliance with the License.
#    You may obtain a copy of the License at
#
#        http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS,
#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#    See the License for the specific language governing permissions and
#    limitations under the License.
-->

# Sample notebook to build a Jupyter Image with GPU enabled.

## Content
1. [Configuration](#Configuration)
2. [Build Image](#Build-Image)
2. [Running Container using the profile](#Running-container)


### Configuration

In [1]:
image_name = 'gpu1'
folder_name = 'gpu1'

### Build Image

Lets see how  orbit build image works...

In [2]:
!orbit build image --help

Usage: orbit build image [OPTIONS]

  Build and Deploy a new Docker image into ECR.

Options:
  -e, --env TEXT        Orbit Environment.  [required]
  -d, --dir TEXT        Dockerfile directory.  [required]
  -n, --name TEXT       Image name.  [required]
  -s, --script TEXT     Build script to run before the image build.
  -t, --team TEXT       One or more Teams to deploy the image to (can de
                        declared multiple times).

  --build-arg TEXT      One or more --build-arg parameters to pass to the
                        Docker build command.

  --debug / --no-debug  Enable detailed logging.  [default: False]
  --help                Show this message and exit.


get our orbit env and team names

In [3]:
env_name = %env AWS_ORBIT_ENV
team_name = %env AWS_ORBIT_TEAM_SPACE
(env_name,team_name)

('dev-env', 'lake-user')

Repository name will be created from the image name prefixed by the env context.  Users are only able to manipulate ECR repos that start with 'orbit-{env_name}-users-'

In [4]:
repository_name = (f"orbit-{env_name}-users-{image_name}")
repository_name

'orbit-dev-env-users-gpu1'

In [5]:
!aws ecr delete-repository --repository-name $repository_name --force


An error occurred (RepositoryNotFoundException) when calling the DeleteRepository operation: The repository with name 'orbit-dev-env-users-gpu1' does not exist in the registry with id '495869084367'


In [6]:
%cd ~/shared/samples/notebooks/I-Image/$folder_name

/efs/shared/samples/notebooks/I-Image/gpu1


adding a file to our docker as an example

In [7]:
pwd = %pwd
pwd

'/efs/shared/samples/notebooks/I-Image/gpu1'

Now lets run the command

In [10]:
%%time

output = !orbit build image -e dev-env -d $pwd -n $image_name --build-arg BASE_IMAGE=cschranz/gpu-jupyter:v1.3_cuda-10.1_ubuntu-18.04_python-only
output

CPU times: user 14.7 ms, sys: 6.64 ms, total: 21.3 ms
Wall time: 10min 42s


['',
 'Deploying Docker Image |\x1b[32m                   \x1b[0m|   0% \x1b[0m',
 '                                                  ',
 '',
 'Deploying Docker Image |\x1b[32m▏                  \x1b[0m|   1% \x1b[0m',
 'Deploying Docker Image |\x1b[32m▌                  \x1b[0m|   3% \x1b[0m',
 'Deploying Docker Image |\x1b[32m▉                  \x1b[0m|   5% \x1b[0m',
 'Deploying Docker Image |\x1b[32m███▊               \x1b[0m|  20% \x1b[0m',
 'Deploying Docker Image |\x1b[32m█████▏             \x1b[0m|  27% \x1b[0m',
 'Deploying Docker Image |\x1b[32m█████████▌         \x1b[0m|  50% \x1b[0m',
 'Deploying Docker Image |\x1b[32m██████████████████▊\x1b[0m|  99% \x1b[0m',
 '                                                  ',
 '',
 'Deploying Docker Image |\x1b[32m██████████████████▊\x1b[0m|  99% \x1b[0m',
 '                                                  ',
 '',
 'Deploying Docker Image |\x1b[32m██████████████████▊\x1b[0m|  99% \x1b[0m',
 '                                           

Lets get the image address from the output of the previous command

In [11]:
look_for = 'ECR Image Address='
image = None
for o in output:
    if look_for in o:
        image = o[o.index(look_for) + len(look_for):]
        print(image)

assert(image != None)       
    

495869084367.dkr.ecr.us-west-2.amazonaws.com/orbit-dev-env-users-gpu1


In [12]:
# check that the image was built
import json
print(repository_name)
images = !aws ecr list-images --repository-name $repository_name
images = "".join(images)
im = json.loads(images)
print(im['imageIds'])
assert(len(im['imageIds']) > 0)

orbit-dev-env-users-gpu1
[{'imageDigest': 'sha256:312a44902cce515f1ea2fbf345c7ce2369d176a32148ea940a4e1149ffdebe62', 'imageTag': 'latest'}]


### Building the profile for the Image

In [22]:
import json
profile = [{
        "display_name": "Small (GPU Enabled)",
        "slug": "small-gpu",
        "description": "4 CPU + 8G MEM + 1 GPU",
        "kubespawner_override": {
            "image": image,
            "cpu_guarantee": 4,
            "cpu_limit": 4,
            "mem_guarantee": "8G",
            "mem_limit": "8G",
            "extra_resource_limits": {"nvidia.com/gpu": "1"},
            "extra_resource_guarantees": {"nvidia.com/gpu": "1"},
        },
    },
    {
        "display_name": "Small (vGPU Enabled)",
        "slug": "small-vgpu",
        "description": "4 CPU + 8G MEM + 1 vGPU",
        "kubespawner_override": {
            "image": image,
            "cpu_guarantee": 4,
            "cpu_limit": 4,
            "mem_guarantee": "8G",
            "mem_limit": "8G",
            "extra_resource_limits": {"k8s.amazonaws.com/vgpu": "1"},
            "extra_resource_guarantees": {"k8s.amazonaws.com/vgpu": "1"},
        },
    }]

with open("profile.json", 'w') as f:
    json.dump(profile, f, indent=4, sort_keys=True)


In [23]:
!cat profile.json

[
    {
        "description": "4 CPU + 8G MEM + 1 GPU",
        "display_name": "Small (GPU Enabled)",
        "kubespawner_override": {
            "cpu_guarantee": 4,
            "cpu_limit": 4,
            "extra_resource_guarantees": {
                "nvidia.com/gpu": "1"
            },
            "extra_resource_limits": {
                "nvidia.com/gpu": "1"
            },
            "image": "495869084367.dkr.ecr.us-west-2.amazonaws.com/orbit-dev-env-users-gpu1",
            "mem_guarantee": "8G",
            "mem_limit": "8G"
        },
        "slug": "small-gpu"
    },
    {
        "description": "4 CPU + 8G MEM + 1 vGPU",
        "display_name": "Small (vGPU Enabled)",
        "kubespawner_override": {
            "cpu_guarantee": 4,
            "cpu_limit": 4,
            "extra_resource_guarantees": {
                "k8s.amazonaws.com/vgpu": "1"
            },
            "extra_resource_limits": {
                "k8s.amazonaws.com/vgpu": "1"
            },
       

In [24]:
!orbit build profile --env $env_name --team $team_name profile.json

[[39m[22m[24m Info [0m] Retrieving existing profiles0% [0m
[[94m[22m[24m Tip [0m] Profile added Small (GPU Enabled)[0m
[[94m[22m[24m Tip [0m] Profile added Small (vGPU Enabled)0m
                                                  0m| 100% [0m
Adding profile |[32m███████████████████████████[0m| 100% [0m


In [25]:
!orbit list profile --env $env_name --team $team_name

Team profiles:
[
    {
        "description": "Use simple custom image",
        "display_name": "simple_image",
        "kubespawner_override": {
            "cpu_guarantee": 2,
            "cpu_limit": 2,
            "image": "495869084367.dkr.ecr.us-west-2.amazonaws.com/orbit-dev-env-users-custom_image2",
            "mem_guarantee": "1G",
            "mem_limit": "1G"
        },
        "slug": "nano"
    },
    {
        "description": "Use for spark kernel",
        "display_name": "spark",
        "kubespawner_override": {
            "cpu_guarantee": 2,
            "cpu_limit": 2,
            "image": "495869084367.dkr.ecr.us-west-2.amazonaws.com/orbit-dev-env-users-spark",
            "mem_guarantee": "1G",
            "mem_limit": "1G"
        },
        "slug": "spark"
    },
    {
        "description": "Use for spark kernel",
        "display_name": "gpu1",
        "kubespawner_override": {
            "cpu_guarantee": 2,
            "cpu_limit": 2,
            "image": "4

### Running container 

Lets run a container using the profile and image we created

In [26]:
import json
run = {
      "compute": {
          "container" : {
              "p_concurrent": "1"
          },
          "compute_type": "ecs",
          "node_type": "ec2",
          "profile": "small-gpu"
      },
      "tasks":  [{
          "notebookName": "test-image.ipynb",
          "sourcePath": pwd,
          "targetPath": f"/efs/shared/regression/notebooks/I-Image/{folder_name}",
          "params": {
          },
          "ExecutionType": "ecs"
        }]
 }

with open("run.json", 'w') as f:
    json.dump(run, f)


In [28]:
%%time

!orbit run notebook --env $env_name --team $team_name --user testing --wait --tail-logs run.json || echo  $?


INFO:root:using profile small-gpu
INFO:root:Waiting for 1 tasks [{'ExecutionType': 'eks', 'Identifier': 'orbit-lake-user-ec2-runner-7k2g9', 'NodeType': 'ec2', 'tasks': [{'notebookName': 'test-image.ipynb', 'sourcePath': '/efs/shared/samples/notebooks/I-Image/gpu1', 'targetPath': '/efs/shared/regression/notebooks/I-Image/gpu1', 'params': {}, 'ExecutionType': 'ecs'}]}]
INFO:root:Watching task: 'orbit-lake-user-ec2-runner-7k2g9'
INFO:root:Running: 1 Completed: 0 Errored: 0
INFO:root:waiting for [{'ExecutionType': 'eks', 'Identifier': 'orbit-lake-user-ec2-runner-7k2g9', 'NodeType': 'ec2', 'tasks': [{'notebookName': 'test-image.ipynb', 'sourcePath': '/efs/shared/samples/notebooks/I-Image/gpu1', 'targetPath': '/efs/shared/regression/notebooks/I-Image/gpu1', 'params': {}, 'ExecutionType': 'ecs'}]}]
INFO:root:Task {'ExecutionType': 'eks', 'Identifier': 'orbit-lake-user-ec2-runner-7k2g9', 'NodeType': 'ec2', 'tasks': [{'notebookName': 'test-image.ipynb', 'sourcePath': '/efs/shared/samples/notebo