# Use to parallelize ingestion

In [0]:
from concurrent.futures import ThreadPoolExecutor

class NotebookData:
    def __init__(self, path, timeout, parameters=None, retry=0):
        self.path = path
        self.timeout = timeout
        self.parameters = parameters
        self.retry = retry

    def submitNotebook(notebook):
        print("Running notebook %s" % notebook.path)
        try:
            if notebook.parameters:
                return dbutils.notebook.run(notebook.path, notebook.timeout, notebook.parameters)
            else:
                return dbutils.notebook.run(notebook.path, notebook.timeout)
        except Exception:
            if notebook.retry < 1:
                raise
        print("Retrying notebook %s" % notebook.path)
        notebook.retry = notebook.retry - 1
        submitNotebook(notebook)

def parallelNotebooks(notebooks, numInParallel):
    with ThreadPoolExecutor(max_workers=numInParallel) as ec:
        return [ec.submit(NotebookData.submitNotebook, notebook) for notebook in notebooks]

def partition_parameters(parameters, num_partitions):
    for i in range(0, len(parameters), num_partitions):
        yield parameters[i:i + num_partitions]


In [0]:
/Workspace/Users/arturlauth@gmail.com/lagodedadosalttab/Ingestor

In [0]:
# Array of instances of NotebookData Class
notebooks = [
    NotebookData("../path/to/Notebook1", 1200, {"Parameter1": "Parameter1Value", "Parameter2": "Parameter2Value"}),
    NotebookData("../path/to/Notebook2", 1200, {"Parameter1": "Parameter1Value", "Parameter2": "Parameter2Value", "Parameter3": "Parameter3Value"}),
    NotebookData("../path/to/Notebook3", 1200, retry=2)
]

res = parallelNotebooks(notebooks, 4)
result = [i.result(timeout=3600) for i in res]  # This is a blocking call.
print(result)