# Master test notebook to run all user notebooks
    

### Author: AWS Professional Services Emerging Technology and Intelligent Platforms Group
### Date: Feb 4 2021

In [None]:
import os
import sys
import boto3
from aws_orbit_sdk import controller
from aws_orbit_sdk.common import get_workspace,get_scratch_database
s3 = boto3.client('s3')


In [None]:
workspace = get_workspace()
scratch_glue_db = get_scratch_database()
team_space = workspace['team_space']
# DO NOT RUN THIS NOTEBOOK IN LAKE CREATOR TEAM SPACE 
assert team_space == 'lake-user'
scratch_bucket = workspace['ScratchBucket']
(team_space, scratch_bucket, )

In [None]:
def checkNotebooks(executions, expected_count):
    assert len(executions) == expected_count
    for index, row in executions.iterrows():
        if 'error@' in row['relativePath']:
            raise AssertionError('error in ' + row['relativePath'])
    print("SUCCESS")

### Cleanup

In [None]:
!find /efs/shared/regression/notebooks/B-DataAnalyst/ -exec rm -fR {} \;

### Configure regression run

In [None]:
%%bash --out output --err error

ls ../B-DataAnalyst/*.ipynb 

In [None]:
output

In [None]:
notebooks_run_config = {
    "black_list": [],          # a list of notebooks names to skip the execution for. Example: ["Example-7-Data-Profiling"]
    "white_list": [],          # if not empty, only those will run. Example: ["Example-7-Data-Profiling"]
    "optional_list": [],       # indicates to ignoore a failure. Example: ["Example-6-Schedule-Notebook", "Example-8-LakeFormation-Security"]
    "minimum_successful": 1,   # number of minimum notebooks to be completed to consider entire test not failed (has an effect when this number is larger than number of mandatory )
    "maxRetries": 3,           # max number of attempts to execute a notebook
    "notebooks_to_run": []     # all noootebooks for execution.
}
 
for p in output.split('\n'):
    if (len(p)<2):
        continue 
    parts = p.split('/')
    nb_name, nb_folder = parts[2][::-1].split('.',1)[1][::-1], parts[1]
    if nb_name in notebooks_run_config["black_list"]:
        # ignore white list. black list is having highest priority for filters
        continue
    if not notebooks_run_config["white_list"] or nb_name in notebooks_run_config["white_list"]:
        # run notebook if white list is empty or if the notebook is in white list.
        notebooks_run_config["notebooks_to_run"].append({"folder": nb_folder, "name": nb_name})
notebooks_run_config

In [None]:
# New implementation of the cell below
import time

def start_notebooks(run_config):
    _containers = []
    for nb in run_config["notebooks_to_run"]:
        notebook_to_run = {
            "tasks": [{
                      "notebookName": "{}.ipynb".format(nb['name']),
                      "sourcePath": f"shared/samples/notebooks/{nb['folder']}",
                      "targetPath": "shared/regression/notebooks/{}".format(nb['folder']),
                      "params": {
                      }
                    }]
        }

        container = controller.run_notebooks(notebook_to_run)
        print("notebookName: " + str(container))
        _containers.append(container)
    return _containers


def update_run_config(run_config, execution_results):
    executed = run_config['notebooks_to_run']
    run_config['notebooks_to_run'] = [] #reset notebooks for the next execution
    
    # if nothing failed
    if not execution_results['failed']:
        return run_config
    
    for nb in executed:
        if nb['name'] in execution_results['failed']:
            run_config['notebooks_to_run'].append(nb)
    
    return run_config

In [None]:
def get_execution_results(run_config):
    result = {"success": [], "failed": []}
    for test in run_config['notebooks_to_run']:
        executions = controller.get_execution_history(f"shared/regression/notebooks/{test['folder']}", f"{test['name']}.ipynb") 
        nb_name = test['name']
        failed = False
        for index, row in executions.iterrows():
            res = row.get('relativePath')
            nb_name,folder_name  = (os.path.basename(os.path.dirname(res)), 
                                    os.path.dirname(res))
            if 'error@' in res:
                if 'Failure-Behavior' not in folder_name:
                    failed = True
            else:
                if 'Failure-Behavior'  in folder_name:
                    failed = True    
        if failed:
            result["failed"].append(nb_name)
        else:
            result["success"].append(nb_name)
    return result    

In [None]:
notebooks_run_config["maxRetries"] = 2

In [None]:
%%time
success = False
attempt = 0
run_config = notebooks_run_config
containers = []
while(attempt < notebooks_run_config["maxRetries"] and not success):
    attempt += 1
    print(f"Starting notebooks. Attempt {attempt}. Run config: {run_config}")
    containers = start_notebooks(run_config)
    controller.wait_for_tasks_to_complete(containers, 120,45, False)
    results = get_execution_results(run_config)
    print(f'Attemp {attempt} finished. Results: {results}')
    run_config = update_run_config(run_config, results)
    success = not run_config["notebooks_to_run"]


In [None]:
%%time
controller.wait_for_tasks_to_complete(containers, 120,45, False)

In [None]:
results

In [None]:
assert len(results['failed']) == 0

In [None]:
!echo "PASSED" >> /efs/shared/regression/PASSED
!ls /efs/shared/regression/
!sleep 15s

# End of notebook
