# Master test notebook to run all user notebooks
    

### Author: AWS Professional Services Emerging Technology and Intelligent Platforms Group
### Date: Feb 4 2021

In [1]:
import os
import sys
import boto3
from aws_orbit_sdk import controller
from aws_orbit_sdk.common import get_workspace,get_scratch_database
s3 = boto3.client('s3')


In [2]:
workspace = get_workspace()
scratch_glue_db = get_scratch_database()
team_space = workspace['team_space']
# DO NOT RUN THIS NOTEBOOK IN LAKE CREATOR TEAM SPACE 
assert team_space == 'lake-creator'
scratch_bucket = workspace['ScratchBucket']
(team_space, scratch_bucket, )

('lake-creator', 'orbit-foundation-dev-env-public-scratch-495869084367-f23514')

In [3]:
def checkNotebooks(executions, expected_count):
    assert len(executions) == expected_count
    for index, row in executions.iterrows():
        if 'error@' in row['relativePath']:
            raise AssertionError('error in ' + row['relativePath'])
    print("SUCCESS")

### Cleanup

In [4]:
!find /efs/shared/regression/notebooks/M-Admin -type f -exec rm -fR {} \;
!rm -f /efs/shared/regression/ADMIN_PASSED

### Configure regression run

In [5]:
%%bash --out output --err error

ls ../M-Admin/*.ipynb 

In [6]:
output

'../M-Admin/1-EBS.ipynb\n../M-Admin/2-Image_with_apps.ipynb\n../M-Admin/3-FSX.ipynb\n../M-Admin/4-querybook.ipynb\n'

In [7]:
notebooks_run_config = {
    "black_list": ["3-FSX"],   # a list of notebooks names to skip the execution for. Example: ["Example-7-Data-Profiling"]
    "white_list": [],          # if not empty, only those will run. Example: ["Example-7-Data-Profiling"]
    "optional_list": [],       # indicates to ignoore a failure. Example: ["Example-6-Schedule-Notebook", "Example-8-LakeFormation-Security"]
    "minimum_successful": 1,   # number of minimum notebooks to be completed to consider entire test not failed (has an effect when this number is larger than number of mandatory )
    "maxRetries": 3,           # max number of attempts to execute a notebook
    "notebooks_to_run": []     # all noootebooks for execution.
}
 
for p in output.split('\n'):
    if (len(p)<2):
        continue 
    parts = p.split('/')
    nb_name, nb_folder = parts[2][::-1].split('.',1)[1][::-1], parts[1]
    if nb_name in notebooks_run_config["black_list"]:
        # ignore white list. black list is having highest priority for filters
        continue
    if not notebooks_run_config["white_list"] or nb_name in notebooks_run_config["white_list"]:
        # run notebook if white list is empty or if the notebook is in white list.
        notebooks_run_config["notebooks_to_run"].append({"folder": nb_folder, "name": nb_name})
notebooks_run_config

{'black_list': [],
 'white_list': [],
 'optional_list': [],
 'minimum_successful': 1,
 'maxRetries': 3,
 'notebooks_to_run': [{'folder': 'M-Admin', 'name': '1-EBS'},
  {'folder': 'M-Admin', 'name': '2-Image_with_apps'},
  {'folder': 'M-Admin', 'name': '3-FSX'},
  {'folder': 'M-Admin', 'name': '4-querybook'}]}

In [8]:
# New implementation of the cell below
import time

def start_notebooks(run_config):
    _containers = []
    for nb in run_config["notebooks_to_run"]:
        notebook_to_run = {
            "tasks": [{
                      "notebookName": "{}.ipynb".format(nb['name']),
                      "sourcePath": f"shared/samples/notebooks/{nb['folder']}",
                      "targetPath": "shared/regression/notebooks/{}".format(nb['folder']),
                      "params": {
                      }
                    }],
            "compute": {
              "container" : {
                  "p_concurrent": "1"
              },
              "node_type": "ec2"
            },
        }

        container = controller.run_notebooks(notebook_to_run)
        print("notebookName: " + str(container))
        _containers.append(container)
    return _containers


def update_run_config(run_config, execution_results):
    executed = run_config['notebooks_to_run']
    run_config['notebooks_to_run'] = [] #reset notebooks for the next execution
    
    # if nothing failed
    if not execution_results['failed']:
        return run_config
    
    for nb in executed:
        if nb['name'] in execution_results['failed']:
            run_config['notebooks_to_run'].append(nb)
    
    return run_config

In [9]:
def get_execution_results(run_config):
    result = {"success": [], "failed": []}
    for test in run_config['notebooks_to_run']:
        executions = controller.get_execution_history(f"shared/regression/notebooks/{test['folder']}", f"{test['name']}.ipynb") 
        nb_name = test['name']
        failed = False
        for index, row in executions.iterrows():
            res = row.get('relativePath')
            nb_name,folder_name  = (os.path.basename(os.path.dirname(res)), 
                                    os.path.dirname(res))
            if 'error@' in res:
                if 'Failure-Behavior' not in folder_name:
                    failed = True
            else:
                if 'Failure-Behavior'  in folder_name:
                    failed = True    
        if failed:
            result["failed"].append({"folder": folder_name, "name": nb_name})
        else:
            result["success"].append({"folder": folder_name, "name": nb_name})
    return result    

In [10]:
notebooks_run_config["maxRetries"] = 2

In [11]:
%%time
success = False
attempt = 0
run_config = notebooks_run_config
containers = []
while(attempt < notebooks_run_config["maxRetries"] and not success):
    attempt += 1
    print(f"Starting notebooks. Attempt {attempt}. Run config: {run_config}")
    containers = start_notebooks(run_config)
    controller.wait_for_tasks_to_complete(containers, 120,45, False)
    results = get_execution_results(run_config)
    print(f'Attemp {attempt} finished. Results: {results}')
    run_config = update_run_config(run_config, results)
    success = not run_config["notebooks_to_run"]


Starting notebooks. Attempt 1. Run config: {'black_list': [], 'white_list': [], 'optional_list': [], 'minimum_successful': 1, 'maxRetries': 2, 'notebooks_to_run': [{'folder': 'M-Admin', 'name': '1-EBS'}, {'folder': 'M-Admin', 'name': '2-Image_with_apps'}, {'folder': 'M-Admin', 'name': '3-FSX'}, {'folder': 'M-Admin', 'name': '4-querybook'}]}


INFO:root:using default profile {'display_name': 'Micro', 'slug': 'micro', 'description': '2 CPU + 2G MEM', 'kubespawner_override': {'cpu_guarantee': 2, 'cpu_limit': 2, 'mem_guarantee': '2G', 'mem_limit': '2G'}, 'default': True}
INFO:root:volumes:[{"name": "efs-volume", "persistentVolumeClaim": {"claimName": "jupyterhub"}}]
INFO:root:volume_mounts:[{"mountPath": "/efs", "name": "efs-volume"}]


notebookName: {'ExecutionType': 'eks', 'Identifier': 'orbit-lake-creator-ec2-runner-5v7hh', 'NodeType': 'ec2', 'tasks': [{'notebookName': '1-EBS.ipynb', 'sourcePath': 'shared/samples/notebooks/M-Admin', 'targetPath': 'shared/regression/notebooks/M-Admin', 'params': {}}]}


INFO:root:using default profile {'display_name': 'Micro', 'slug': 'micro', 'description': '2 CPU + 2G MEM', 'kubespawner_override': {'cpu_guarantee': 2, 'cpu_limit': 2, 'mem_guarantee': '2G', 'mem_limit': '2G'}, 'default': True}
INFO:root:volumes:[{"name": "efs-volume", "persistentVolumeClaim": {"claimName": "jupyterhub"}}]
INFO:root:volume_mounts:[{"mountPath": "/efs", "name": "efs-volume"}]


notebookName: {'ExecutionType': 'eks', 'Identifier': 'orbit-lake-creator-ec2-runner-tj6lq', 'NodeType': 'ec2', 'tasks': [{'notebookName': '2-Image_with_apps.ipynb', 'sourcePath': 'shared/samples/notebooks/M-Admin', 'targetPath': 'shared/regression/notebooks/M-Admin', 'params': {}}]}


INFO:root:using default profile {'display_name': 'Micro', 'slug': 'micro', 'description': '2 CPU + 2G MEM', 'kubespawner_override': {'cpu_guarantee': 2, 'cpu_limit': 2, 'mem_guarantee': '2G', 'mem_limit': '2G'}, 'default': True}
INFO:root:volumes:[{"name": "efs-volume", "persistentVolumeClaim": {"claimName": "jupyterhub"}}]
INFO:root:volume_mounts:[{"mountPath": "/efs", "name": "efs-volume"}]


notebookName: {'ExecutionType': 'eks', 'Identifier': 'orbit-lake-creator-ec2-runner-4jbkd', 'NodeType': 'ec2', 'tasks': [{'notebookName': '3-FSX.ipynb', 'sourcePath': 'shared/samples/notebooks/M-Admin', 'targetPath': 'shared/regression/notebooks/M-Admin', 'params': {}}]}


INFO:root:using default profile {'display_name': 'Micro', 'slug': 'micro', 'description': '2 CPU + 2G MEM', 'kubespawner_override': {'cpu_guarantee': 2, 'cpu_limit': 2, 'mem_guarantee': '2G', 'mem_limit': '2G'}, 'default': True}
INFO:root:volumes:[{"name": "efs-volume", "persistentVolumeClaim": {"claimName": "jupyterhub"}}]
INFO:root:volume_mounts:[{"mountPath": "/efs", "name": "efs-volume"}]
INFO:root:Waiting2 for 4 tasks [{'ExecutionType': 'eks', 'Identifier': 'orbit-lake-creator-ec2-runner-5v7hh', 'NodeType': 'ec2', 'tasks': [{'notebookName': '1-EBS.ipynb', 'sourcePath': 'shared/samples/notebooks/M-Admin', 'targetPath': 'shared/regression/notebooks/M-Admin', 'params': {}}]}, {'ExecutionType': 'eks', 'Identifier': 'orbit-lake-creator-ec2-runner-tj6lq', 'NodeType': 'ec2', 'tasks': [{'notebookName': '2-Image_with_apps.ipynb', 'sourcePath': 'shared/samples/notebooks/M-Admin', 'targetPath': 'shared/regression/notebooks/M-Admin', 'params': {}}]}, {'ExecutionType': 'eks', 'Identifier': 'or

notebookName: {'ExecutionType': 'eks', 'Identifier': 'orbit-lake-creator-ec2-runner-mrbvm', 'NodeType': 'ec2', 'tasks': [{'notebookName': '4-querybook.ipynb', 'sourcePath': 'shared/samples/notebooks/M-Admin', 'targetPath': 'shared/regression/notebooks/M-Admin', 'params': {}}]}


INFO:root:Task {'ExecutionType': 'eks', 'Identifier': 'orbit-lake-creator-ec2-runner-5v7hh', 'NodeType': 'ec2', 'tasks': [{'notebookName': '1-EBS.ipynb', 'sourcePath': 'shared/samples/notebooks/M-Admin', 'targetPath': 'shared/regression/notebooks/M-Admin', 'params': {}}]} is running with status {'active': 1,
 'completion_time': None,
 'conditions': None,
 'failed': None,
 'start_time': datetime.datetime(2021, 4, 16, 2, 59, 57, tzinfo=tzlocal()),
 'succeeded': None}
INFO:root:Task {'ExecutionType': 'eks', 'Identifier': 'orbit-lake-creator-ec2-runner-tj6lq', 'NodeType': 'ec2', 'tasks': [{'notebookName': '2-Image_with_apps.ipynb', 'sourcePath': 'shared/samples/notebooks/M-Admin', 'targetPath': 'shared/regression/notebooks/M-Admin', 'params': {}}]} is running with status {'active': 1,
 'completion_time': None,
 'conditions': None,
 'failed': None,
 'start_time': datetime.datetime(2021, 4, 16, 2, 59, 58, tzinfo=tzlocal()),
 'succeeded': None}
INFO:root:Task {'ExecutionType': 'eks', 'Identif

Attemp 1 finished. Results: {'success': [{'folder': '/home/jovyan/shared/regression/notebooks/M-Admin/1-EBS', 'name': '1-EBS'}, {'folder': '/home/jovyan/shared/regression/notebooks/M-Admin/2-Image_with_apps', 'name': '2-Image_with_apps'}, {'folder': '/home/jovyan/shared/regression/notebooks/M-Admin/3-FSX', 'name': '3-FSX'}, {'folder': '/home/jovyan/shared/regression/notebooks/M-Admin/4-querybook', 'name': '4-querybook'}], 'failed': []}
CPU times: user 1.23 s, sys: 115 ms, total: 1.34 s
Wall time: 26min 3s


In [12]:
%%time
controller.wait_for_tasks_to_complete(containers, 120,45, False)

INFO:root:Waiting2 for 4 tasks [{'ExecutionType': 'eks', 'Identifier': 'orbit-lake-creator-ec2-runner-5v7hh', 'NodeType': 'ec2', 'tasks': [{'notebookName': '1-EBS.ipynb', 'sourcePath': 'shared/samples/notebooks/M-Admin', 'targetPath': 'shared/regression/notebooks/M-Admin', 'params': {}}]}, {'ExecutionType': 'eks', 'Identifier': 'orbit-lake-creator-ec2-runner-tj6lq', 'NodeType': 'ec2', 'tasks': [{'notebookName': '2-Image_with_apps.ipynb', 'sourcePath': 'shared/samples/notebooks/M-Admin', 'targetPath': 'shared/regression/notebooks/M-Admin', 'params': {}}]}, {'ExecutionType': 'eks', 'Identifier': 'orbit-lake-creator-ec2-runner-4jbkd', 'NodeType': 'ec2', 'tasks': [{'notebookName': '3-FSX.ipynb', 'sourcePath': 'shared/samples/notebooks/M-Admin', 'targetPath': 'shared/regression/notebooks/M-Admin', 'params': {}}]}, {'ExecutionType': 'eks', 'Identifier': 'orbit-lake-creator-ec2-runner-mrbvm', 'NodeType': 'ec2', 'tasks': [{'notebookName': '4-querybook.ipynb', 'sourcePath': 'shared/samples/note

CPU times: user 72.2 ms, sys: 0 ns, total: 72.2 ms
Wall time: 113 ms


True

In [13]:
results

{'success': [{'folder': '/home/jovyan/shared/regression/notebooks/M-Admin/1-EBS',
   'name': '1-EBS'},
  {'folder': '/home/jovyan/shared/regression/notebooks/M-Admin/2-Image_with_apps',
   'name': '2-Image_with_apps'},
  {'folder': '/home/jovyan/shared/regression/notebooks/M-Admin/3-FSX',
   'name': '3-FSX'},
  {'folder': '/home/jovyan/shared/regression/notebooks/M-Admin/4-querybook',
   'name': '4-querybook'}],
 'failed': []}

In [14]:
assert len(results['failed']) == 0

In [15]:
!echo "PASSED" >> /efs/shared/regression/ADMIN_PASSED
!ls /efs/shared/regression/
!sleep 15s

ADMIN_PASSED  CREATOR_PASSED  notebooks


# End of notebook
