# Benchmarking: VIKING20X workload

In [None]:
import xarray
import dask.distributed
import dask_gateway

## Open Dask cluster

In [None]:
gateway = dask_gateway.Gateway(
    address='http://traefik-daskgateway2-dask-gateway:80/services/dask-gateway',
    public_address='/services/dask-gateway/', # dashboard!
    auth='jupyterhub',
)

In [None]:
gateway.cluster_options()

In [None]:
%%time
# Note, this spawns the Dask scheduler and might take up to a few minutes, because of pod/node/image provisioning.
# dask-worker-pool: cpu/memory allocatable: 7.91 CPU and 29.79 GB / 27.744099 gib
cluster = gateway.new_cluster(worker_cores=7, worker_memory=27) 

In [None]:
client = dask.distributed.Client(cluster); client

## Specify VIKING20X dataset

In [None]:
gcloud_bucket = 'gs://...'

In [None]:
ds_tgrid = xarray.open_zarr(gcloud_bucket+'/VIKING20X.L46-KFS003_1m_grid_T.zarr')

In [None]:
ds_ugrid = xarray.open_zarr(gcloud_bucket+'/VIKING20X.L46-KFS003_1m_grid_U.zarr')

In [None]:
ds_vgrid = xarray.open_zarr(gcloud_bucket+'/VIKING20X.L46-KFS003_1m_grid_V.zarr')

Uncompressed size,

In [None]:
print(ds_tgrid.nbytes/1e9, 'in GB')
print(ds_ugrid.nbytes/1e9, 'in GB')
print(ds_vgrid.nbytes/1e9, 'in GB')

## Run performance experiment

In [None]:
client

In [None]:
import time, datetime, tqdm

exp_name='gcloud'
no_of_realizations = 25
target_worker_number = [20,40,60]

# Note, Kubernetes cluster nodes had to be repaired for this experiment...
# Upscaling has worked, and resources seem to have survived?
# Had to manually restart the notebook cell, though. Hence, the uncommenting below.
target_worker_number = [60]

#now = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
#walltime = ['machine,workers,walltime']

# Measuring.

for no_of_workers in target_worker_number:
    
    # Scale workers.

    #cluster.scale(no_of_workers)
        
    # Wait for workers to be available.
    
    while len(cluster.scheduler_info.get('workers').keys()) < no_of_workers:
        time.sleep(5)
    
    # Warming up.
    
    for _ in range(3):
        ds_tgrid['votemper'].mean(['deptht', 'time_counter', 'x', 'y']).compute()
        ds_tgrid['vosaline'].mean(['deptht', 'time_counter', 'x', 'y']).compute()
        ds_ugrid['vozocrtx'].mean(['depthu', 'time_counter', 'x', 'y']).compute()
        ds_vgrid['vomecrty'].mean(['depthv', 'time_counter', 'x', 'y']).compute()

    # Do the calculation.

    for _ in tqdm.tqdm(range(no_of_realizations)):
        
        workers=len(cluster.scheduler_info.get('workers').keys())

        start_time_in_sec = time.time() # time in seconds

        ds_tgrid['votemper'].mean(['deptht', 'time_counter', 'x', 'y']).compute()
        ds_tgrid['vosaline'].mean(['deptht', 'time_counter', 'x', 'y']).compute()
        ds_ugrid['vozocrtx'].mean(['depthu', 'time_counter', 'x', 'y']).compute()
        ds_vgrid['vomecrty'].mean(['depthv', 'time_counter', 'x', 'y']).compute()

        end_time_in_sec = time.time() # time in seconds

        output_row = "{},{},{}".format(exp_name, workers, end_time_in_sec-start_time_in_sec)
        walltime.append(output_row)

Node example structure,

In [None]:
for item in cluster.scheduler_info.get('workers').items():
    print(item[1]['host'])

In [None]:
# $ kubectl get pods --namespace jupyterhub2 -o wide && kubectl get nodes --namespace jupyterhub2 -o wide
# NAME                                                    READY   STATUS    RESTARTS   AGE    IP           NODE                                              NOMINATED NODE   READINESS GATES
# api-daskgateway2-dask-gateway-589fd758b4-ng7qz          1/1     Running   0          21h    10.28.2.94   gke-jupyterhub2-core-pool-eee381a1-2ghj           <none>           <none>
# continuous-image-puller-g8s6k                           1/1     Running   0          136m   10.28.3.2    gke-jupyterhub2-jupyter-user-pool-431878cf-ntpb   <none>           <none>
# controller-daskgateway2-dask-gateway-84f7747c9d-d4s6g   1/1     Running   0          21h    10.28.2.95   gke-jupyterhub2-core-pool-eee381a1-2ghj           <none>           <none>
# dask-scheduler-be688c00e55b406d993e3ea07346698e         1/1     Running   0          41m    10.28.3.13   gke-jupyterhub2-jupyter-user-pool-431878cf-ntpb   <none>           <none>
# dask-worker-be688c00e55b406d993e3ea07346698e-2sjvb      1/1     Running   0          15m    10.28.8.2    gke-jupyterhub2-dask-worker-pool-98e0de38-hllf    <none>           <none>
# dask-worker-be688c00e55b406d993e3ea07346698e-46vsp      1/1     Running   0          41m    10.28.1.2    gke-jupyterhub2-dask-worker-pool-98e0de38-rnvw    <none>           <none>
# dask-worker-be688c00e55b406d993e3ea07346698e-62k6z      1/1     Running   0          28m    10.28.5.2    gke-jupyterhub2-dask-worker-pool-98e0de38-jgxn    <none>           <none>
# dask-worker-be688c00e55b406d993e3ea07346698e-9wqpk      1/1     Running   0          41m    10.28.0.2    gke-jupyterhub2-dask-worker-pool-98e0de38-rmj5    <none>           <none>
# dask-worker-be688c00e55b406d993e3ea07346698e-bdzvn      1/1     Running   0          10m    10.28.11.2   gke-jupyterhub2-dask-worker-pool-98e0de38-tvlk    <none>           <none>
# dask-worker-be688c00e55b406d993e3ea07346698e-bnd7m      1/1     Running   0          21m    10.28.6.2    gke-jupyterhub2-dask-worker-pool-98e0de38-zgz2    <none>           <none>
# dask-worker-be688c00e55b406d993e3ea07346698e-dx8gp      1/1     Running   0          10m    10.28.10.2   gke-jupyterhub2-dask-worker-pool-98e0de38-2nzz    <none>           <none>
# dask-worker-be688c00e55b406d993e3ea07346698e-km6l8      1/1     Running   0          15m    10.28.9.2    gke-jupyterhub2-dask-worker-pool-98e0de38-9pcj    <none>           <none>
# dask-worker-be688c00e55b406d993e3ea07346698e-q7v2c      1/1     Running   0          28m    10.28.4.2    gke-jupyterhub2-dask-worker-pool-98e0de38-x7mc    <none>           <none>
# dask-worker-be688c00e55b406d993e3ea07346698e-q9tvk      1/1     Running   0          21m    10.28.7.2    gke-jupyterhub2-dask-worker-pool-98e0de38-wpzv    <none>           <none>
# hub-cbb556995-hd76f                                     1/1     Running   0          21h    10.28.2.97   gke-jupyterhub2-core-pool-eee381a1-2ghj           <none>           <none>
# jupyter-khoeflich                                       1/1     Running   0          138m   10.28.3.3    gke-jupyterhub2-jupyter-user-pool-431878cf-ntpb   <none>           <none>
# proxy-db9d9bbc4-lkm22                                   1/1     Running   0          87d    10.28.2.4    gke-jupyterhub2-core-pool-eee381a1-2ghj           <none>           <none>
# traefik-daskgateway2-dask-gateway-849fdb566d-sdh4w      1/1     Running   0          87d    10.28.2.5    gke-jupyterhub2-core-pool-eee381a1-2ghj           <none>           <none>
# NAME                                              STATUS   ROLES    AGE     VERSION             INTERNAL-IP     EXTERNAL-IP      OS-IMAGE                             KERNEL-VERSION   CONTAINER-RUNTIME
# gke-jupyterhub2-core-pool-eee381a1-2ghj           Ready    <none>   87d     v1.20.10-gke.1600   10.156.15.227   35.234.114.125   Container-Optimized OS from Google   5.4.120+         containerd://1.4.4
# gke-jupyterhub2-dask-worker-pool-98e0de38-2nzz    Ready    <none>   9m15s   v1.20.10-gke.1600   10.156.15.240   34.159.61.210    Container-Optimized OS from Google   5.4.120+         containerd://1.4.4
# gke-jupyterhub2-dask-worker-pool-98e0de38-9pcj    Ready    <none>   14m     v1.20.10-gke.1600   10.156.15.239   34.159.32.44     Container-Optimized OS from Google   5.4.120+         containerd://1.4.4
# gke-jupyterhub2-dask-worker-pool-98e0de38-hllf    Ready    <none>   14m     v1.20.10-gke.1600   10.156.15.238   34.159.16.170    Container-Optimized OS from Google   5.4.120+         containerd://1.4.4
# gke-jupyterhub2-dask-worker-pool-98e0de38-jgxn    Ready    <none>   28m     v1.20.10-gke.1600   10.156.0.23     34.159.202.71    Container-Optimized OS from Google   5.4.120+         containerd://1.4.4
# gke-jupyterhub2-dask-worker-pool-98e0de38-rmj5    Ready    <none>   40m     v1.20.10-gke.1600   10.156.0.21     35.198.77.238    Container-Optimized OS from Google   5.4.120+         containerd://1.4.4
# gke-jupyterhub2-dask-worker-pool-98e0de38-rnvw    Ready    <none>   40m     v1.20.10-gke.1600   10.156.0.22     34.159.93.15     Container-Optimized OS from Google   5.4.120+         containerd://1.4.4
# gke-jupyterhub2-dask-worker-pool-98e0de38-tvlk    Ready    <none>   9m14s   v1.20.10-gke.1600   10.156.15.241   34.159.192.187   Container-Optimized OS from Google   5.4.120+         containerd://1.4.4
# gke-jupyterhub2-dask-worker-pool-98e0de38-wpzv    Ready    <none>   20m     v1.20.10-gke.1600   10.156.15.237   34.159.55.51     Container-Optimized OS from Google   5.4.120+         containerd://1.4.4
# gke-jupyterhub2-dask-worker-pool-98e0de38-x7mc    Ready    <none>   28m     v1.20.10-gke.1600   10.156.15.235   34.89.208.255    Container-Optimized OS from Google   5.4.120+         containerd://1.4.4
# gke-jupyterhub2-dask-worker-pool-98e0de38-zgz2    Ready    <none>   20m     v1.20.10-gke.1600   10.156.15.236   34.159.230.32    Container-Optimized OS from Google   5.4.120+         containerd://1.4.4
# gke-jupyterhub2-jupyter-user-pool-431878cf-ntpb   Ready    <none>   137m    v1.20.10-gke.1600   10.156.0.17     34.159.19.48     Container-Optimized OS from Google   5.4.120+         containerd://1.4.4

In [None]:
client.close(); cluster.close()

Write results to disk,

In [None]:
import csv

with open('./viking20x_logs/'+now+'_'+exp_name+'.log', 'w') as file:
    wr = csv.writer(file, delimiter='\n')
    wr.writerow(walltime)

## Python environment

In [None]:
pip list

In [None]:
!conda list --explicit