# Example 8:  Running Notebooks with Orbit SDK

### Author: AWS Professional Services 
### Date: January 30 2019

In this example, we will demonstrate how users can simply schedule notebooks to run

In [None]:
import os
import sys
import boto3
from aws_orbit_sdk import controller
from aws_orbit_sdk.common import get_workspace,get_scratch_database
from aws_orbit_sdk.magics.orbit import OrbitWorkbenchMagics 

import json
import time

env_name = %env AWS_ORBIT_ENV
team_name = %env AWS_ORBIT_TEAM_SPACE
(env_name,team_name)

!orbit list profile --env $env_name -t $team_name

### Running a notebook on a remote container

In [None]:
notebooksToRun = {
      "compute": {
          "container" : {
              "p_concurrent": "1" # how many threads will run on the container to execucte tasks
          },
          "node_type": "ec2", # fargate is an option if it is enabled by your orbit team deployment 
          "podsetting":"orbit-runner-support-small"
      },
      "tasks":  [
            {
                  "notebookName": "Example-1-SQL-Analysis-Athena.ipynb",  # The notebook name to run
                  "sourcePath": "shared/samples/notebooks/B-DataAnalyst", # The EFS folder in shared where the notebook resides
                  "targetPath": "shared/regression/notebooks/B-DataAnalyst", # The EFS target location where the folder should be written
                  "targetPrefix": "ttt", # Any prefix to append to the name of the output nodebook 
                  "params": {  # Parameters map to replace the variables' values define in the cell tag with 'parameters'
                        "glue_db" : "cms_raw_db",
                        "target_db" : "users"
                  }      
            }
      ]  
}


In [None]:
containers = controller.run_notebooks(notebooksToRun) # Starts a single container to execute give task

In [None]:
containers

In [None]:
job_name=containers['Identifier']
print(f"This would be the id of the K8 Job")
job_name

In [None]:
# Lets wait until job ends and tail the log of the container
controller.wait_for_tasks_to_complete([containers], 60,60, True)

### Scheduling a notebook to run with a cron schedule

In [None]:
%%schedule_notebook -cron 0/2 * 1/1 * ?  -id ttt
{
    "compute": {
        "node_type": "ec2",
        "container": 
            {
            "p_concurrent" :1
            },
        "podsetting":"orbit-runner-support-small"
    },
      "tasks":  [
            {
                  "notebookName": "Example-1-SQL-Analysis-Athena.ipynb",
                  "sourcePath": "shared/samples/notebooks/B-DataAnalyst",
                  "targetPath": "shared/regression/notebooks/B-DataAnalyst",
                  "targetPrefix": "ttt",
                  "params": {
                        "glue_db" : "cms_raw_db",
                        "target_db" : "users"
                  }      
            }
      ]  
}


In [None]:
cronjob_name=_['Identifier']
cronjob_name

### Different ways of accessing Kubernetes Job status

In [None]:
jobs = controller.list_my_running_jobs()
assert(len(jobs) > 0)
jobs

In [None]:
jobs = controller.list_team_running_jobs()
print(jobs)
# assert(len(jobs) == 0)

In [None]:
cronjobs = controller.list_running_cronjobs()
assert(len(cronjobs) > 0)
cronjobs

In [None]:
!kubectl get cronjob 

### Deleting a [scheduled] Job

In [None]:
controller.delete_cronjob(job_name=cronjob_name)

In [None]:
!kubectl get cronjob 

In [None]:
!kubectl describe job $job_name

In [None]:
pods = controller.list_current_pods(f'job-name={job_name}')
assert(len(pods)==1)
pod_id = pods[0]['metadata']['name']
print(pod_id)
pods

In [None]:
!kubectl wait --for=condition=Ready --timeout=120s pod/$pod_id

In [None]:
try:
    controller.delete_job(job_name)
except Exception as e:
    print(f'Job {job_name} deleted by ttl setting')

In [None]:
!kubectl wait --for=delete --timeout=120s pod/$pod_id

In [None]:
pod_id,job_name

In [None]:
pods = controller.list_current_pods(f'job-name={job_name}')
pods

In [None]:
assert(len(pods)==0)

### Running notebooks using Jupyter magic

In [None]:
%%run_notebook
{
    "compute": {
        "node_type": "ec2",
        "container": 
            {
            "p_concurrent" :1
            },
        "podsetting":"orbit-runner-support-small"
    },
      "tasks":  [
            {
                  "notebookName": "Example-1-SQL-Analysis-Athena.ipynb",
                  "sourcePath": "shared/samples/notebooks/B-DataAnalyst",
                  "targetPath": "shared/regression/notebooks/B-DataAnalyst",
                  "targetPrefix": "ttt",
                  "params": {
                        "glue_db" : "cms_raw_db",
                        "target_db" : "users"
                  }      
            }
      ]  
}
