# Common code to copy paste to other notebooks

# Dask Setup

In [1]:
# workers x memory_per_worker <= available memory
# threads per worker == 1 if workload is CPU intensive
# dashboard port might need to change if running multiple dask instances within lab
WORKERS = 8
MEMORY_PER_WORKER = "4GB"
THREADS_PER_WORKER = 1
DASHBOARD_PORT = ":8787"

## Local Dask cluster setup for analysis

* Install bokeh, spawn cluster, provide access point to dashboards
* Access jupyter hub at the address - https://jupyter.olcf.ornl.gov/hub/user-redirect/proxy/8787/status")
* Or access point for the Dask jupyter extension - /proxy/8787

In [2]:
# General prerequisites we want to have loaded from the get go
!pip install bokeh



In [3]:
# Cleanup
try:
    client.shutdown()
    client.close()
except Exception as e:
    pass

In [4]:
# Setup block
import os
import pwd
import glob
import pandas as pd
from distributed import LocalCluster, Client
import dask
import dask.dataframe as dd

#LOCALDIR = "/gpfs/alpine/stf218/scratch/shinw/.tmp/dask-interactive"
LOCALDIR = "/tmp/dask"

In [5]:
dask.config.set({'worker.memory': {'target': False, 'spill': False, 'pause': 0.8, 'terminate': 0.95}})
#dask.config.config

<dask.config.set at 0x7fd14c61b610>

In [6]:
# Cluster creation
cluster = LocalCluster(processes=True, n_workers=WORKERS, threads_per_worker=THREADS_PER_WORKER,
                       dashboard_address=DASHBOARD_PORT, local_directory=LOCALDIR,
                       memory_limit=MEMORY_PER_WORKER)

client = Client(cluster)
cluster
print("Access jupyter hub at the address - https://jupyter.olcf.ornl.gov/hub/user-redirect/proxy/8787/status")
print("Dask jupyter extension - /proxy/8787")
client

Access jupyter hub at the address - https://jupyter.olcf.ornl.gov/hub/user-redirect/proxy/8787/status
Dask jupyter extension - /proxy/8787


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8788/status,

0,1
Dashboard: http://127.0.0.1:8788/status,Workers: 8
Total threads: 8,Total memory: 29.80 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:37891,Workers: 8
Dashboard: http://127.0.0.1:8788/status,Total threads: 8
Started: Just now,Total memory: 29.80 GiB

0,1
Comm: tcp://127.0.0.1:38961,Total threads: 1
Dashboard: http://127.0.0.1:46137/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:32863,
Local directory: /tmp/dask/dask-worker-space/worker-ad19p2oq,Local directory: /tmp/dask/dask-worker-space/worker-ad19p2oq

0,1
Comm: tcp://127.0.0.1:34285,Total threads: 1
Dashboard: http://127.0.0.1:41197/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:37191,
Local directory: /tmp/dask/dask-worker-space/worker-el0y17d6,Local directory: /tmp/dask/dask-worker-space/worker-el0y17d6

0,1
Comm: tcp://127.0.0.1:43759,Total threads: 1
Dashboard: http://127.0.0.1:43281/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:32969,
Local directory: /tmp/dask/dask-worker-space/worker-s31fmvkf,Local directory: /tmp/dask/dask-worker-space/worker-s31fmvkf

0,1
Comm: tcp://127.0.0.1:35279,Total threads: 1
Dashboard: http://127.0.0.1:39357/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:32817,
Local directory: /tmp/dask/dask-worker-space/worker-axdfalgb,Local directory: /tmp/dask/dask-worker-space/worker-axdfalgb

0,1
Comm: tcp://127.0.0.1:44947,Total threads: 1
Dashboard: http://127.0.0.1:40729/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:35501,
Local directory: /tmp/dask/dask-worker-space/worker-ysyc6azv,Local directory: /tmp/dask/dask-worker-space/worker-ysyc6azv

0,1
Comm: tcp://127.0.0.1:44567,Total threads: 1
Dashboard: http://127.0.0.1:41447/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:33009,
Local directory: /tmp/dask/dask-worker-space/worker-hiw3t6gj,Local directory: /tmp/dask/dask-worker-space/worker-hiw3t6gj

0,1
Comm: tcp://127.0.0.1:45057,Total threads: 1
Dashboard: http://127.0.0.1:36919/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:36755,
Local directory: /tmp/dask/dask-worker-space/worker-lbz8q9e6,Local directory: /tmp/dask/dask-worker-space/worker-lbz8q9e6

0,1
Comm: tcp://127.0.0.1:39095,Total threads: 1
Dashboard: http://127.0.0.1:36227/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:40201,
Local directory: /tmp/dask/dask-worker-space/worker-2mllhglb,Local directory: /tmp/dask/dask-worker-space/worker-2mllhglb


## Preloading tools & libraries

In [7]:
import sys
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
print("seaborn version: {}".format(sns.__version__))
print("Python version:\n{}\n".format(sys.version))
print("matplotlib version: {}".format(matplotlib.__version__))
print("pandas version: {}".format(pd.__version__))
print("numpy version: {}".format(np.__version__))

seaborn version: 0.11.2
Python version:
3.8.10 | packaged by conda-forge | (default, May 11 2021, 07:01:05) 
[GCC 9.3.0]

matplotlib version: 3.4.2
pandas version: 1.3.1
numpy version: 1.19.5


# Data location

In [1]:
DATA_BASE_PATH = "../data"