In [58]:
#%%time
#create DASK scheduler and worker remote containers
#this will take at least one minute as there are delays added on purpose to allow containers to spawn
#on successful run you should see scheduler URL printed

#!python3 daskmaster.py

In [59]:
# define as global
daskschurl = ""

In [60]:
import cdsw
import os
import time

def dask_distributed_launch(nworkers=1):
    # modify global copy
    global daskschurl
    
    #check if already running
    if daskschurl!="":
        #print(cdsw.list_workers())
        print(" Dask Scheduler Already Launched " + daskschurl)
        return(daskschurl)
    
    # Launch CDSW workers. These are engines that will run in 
    # the same project, execute a given code or script, and exit.
    # Scheduler engine will keep running in background until session is closed
    dask_scheduler = cdsw.launch_workers(n=1, cpu=2, memory=4, 
                                  kernel="python3",script="daskschedular.py")


    # IP of launched container comes up unknown for a while
    # Wait for a while so IP is available in data structure
    time.sleep(30)

    # Get schedular IP
    schedulerid = dask_scheduler[0]["id"]
    listtemp = cdsw.list_workers()

    for x in listtemp:
      if x["id"] == schedulerid:
        schedulerip = x["ip_address"]


    print(" Scheduler IP: " + schedulerip)

    #Scheduler protocol and port - defaults from Dask
    schproto = "tcp://"
    schport = ":8786"

    schloc = schproto + schedulerip + schport
    print(" Scheduler URL: " + schloc)


    # Launch at least one Dask Worker

    dask_client = cdsw.launch_workers(n=nworkers, cpu=2, memory=4, 
                                  kernel="python3",script="daskworker.py",
                                      env={"DASKSCHURL": schloc})

    # wait for a while until the container is launched successfully
    time.sleep(30)
    
    #set scheduler URL as environment variable
    #os.putenv("DASKSCHURL", schloc)
    daskschurl = schloc

    #return scheduler URL
    return(schloc)


def dask_stop_workers():
    global daskschurl
    cdsw.stop_workers()
    daskschurl = ""
    

In [61]:
def dask_test():
    import dask.array as da
    x = da.random.random((40000,40000),chunks=(1000,1000))
    y = da.exp(x).sum()
    print(" Result of DASK distributed array test " + y.compute())


In [62]:
dask_stop_workers()

In [63]:
#launch scheduler and worker container(s) - you can specify number of workers as argument
#to relaunch - first call dask_stop_workers() and then call this again
dask_distributed_launch()

 Scheduler IP: 10.10.175.136
 Scheduler URL: tcp://10.10.175.136:8786


'tcp://10.10.175.136:8786'

In [65]:
#check if global variable has the right URL
#we will use this to register a client
print(daskschurl)

tcp://10.10.175.136:8786


In [66]:
#Register a DASK client and run a test
from dask.distributed import Client
client = Client(daskschurl)
import dask.array as da
x = da.random.random((40000,40000),chunks=(1000,1000))
y = da.exp(x).sum()
y.compute()

2749244551.6216931