### Setup for Log Files

Creating necessary directories to store log files.

In [3]:
from os.path import exists
from pathlib import Path
import os

home = str(Path.home())
dasklogs = f"{home}/dask-test-logs"
if not exists(dasklogs):
    os.mkdir(dasklogs)

## Initializing the Slurm Cluster

Dask allows users to specify parameters of the SLURM cluster. [Other parameters](https://jobqueue.dask.org/en/latest/generated/dask_jobqueue.SLURMCluster.html) besides the ones below can also be specified for the SLURM cluster. 

In [None]:
import dask
from dask.distributed import Client
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(
    cores=4, 
    memory="8GB",
    processes=2,
    queue="normal",
    shebang='#!/usr/bin/env bash',
    local_directory='/tmp',
    death_timeout="15s",
    interface="ib0",
    log_directory=dasklogs,
    project="boc")

client = Client(cluster)
client

### Slurm Job Script

Dipslying the jobscript for the SLURMCluster that was created above.

In [5]:
print(cluster.job_script())

#!/usr/bin/env bash

#SBATCH -J dask-worker
#SBATCH -e /home/asd/stha/dask-test-logs/dask-worker-%J.err
#SBATCH -o /home/asd/stha/dask-test-logs/dask-worker-%J.out
#SBATCH -p normal
#SBATCH -A boc
#SBATCH -n 1
#SBATCH --cpus-per-task=4
#SBATCH --mem=8G
#SBATCH -t 00:30:00

/usr/local/tools/anaconda3/2021.05/bin/python -m distributed.cli.dask_worker tcp://10.55.50.8:37579 --nthreads 2 --nprocs 2 --memory-limit 3.73GiB --name dummy-name --nanny --death-timeout 15s --local-directory /tmp --interface ib0 --protocol tcp://



## Scaling the cluster to 1 node

This distributes the job to 2 workers

In [6]:
cluster.scale(2)

## Reading the data into the dataframe

Reading the csv file into a dataframe with the read_csv() function.  

In [7]:
import dask
import dask.dataframe as dd

# dataframe for hpcmod
df = dd.read_csv('data/hpcmod.csv', parse_dates=["_source.Date"],dtype={'_score':'float64', '_source.time':'float64'})  # dtypes specified to read csv properly

### Displaying the Dataframe

The head() function displays the beginning of the dataframe.

In [None]:
df.head()

### Dataframe Computations
### Length of the dataframe

Length of the dataframe is calculated with the len() function.

In [None]:
dflen = len(df)

In [None]:
%%time

# Shows the length of the dataframe
print("Length of dataframe:  " + str(dflen))

In [None]:
client

### Calculates the number of users that use each module

Uses the count() function to count the total number of users for each module.

In [11]:
# Counts the number of users that use each module
count_user = df.groupby("_source.module")['_source.user'].count()

In [12]:
%%time
count_user.compute()

CPU times: user 25.4 ms, sys: 1.56 ms, total: 26.9 ms
Wall time: 522 ms


_source.module
IRIS/20151016_AEBS              182
R/3.4.0                         142
R/3.5.1                          70
R/3.6.3                         122
R/4.0.0                          44
anaconda2/4.3.1                  96
anaconda3/2019.07                48
anaconda3/2020.02               168
anaconda3/2020.11                 2
anaconda3/5.0.1                  27
atom/1.19.4                      95
cuda/10.2                         2
cuda/11.0                         2
cuda/8.0                         10
cuda/9.2                        410
do-not-load/2020                  1
do-not-load/20200811              1
do-not-load/4.6.1                 1
do-not-load/98                   11
do-not-load/julia-1.5.3-mkl       7
dynare/4.5.4                     26
dynareOBC/3.30.53.1962            6
fame/11r5                       183
firefox/62.0.3                    1
firefox/76.0.1                   72
gcc/10.1.0                      183
gcc/7.1.0                       128
gcc/9.3      

In [13]:
client

0,1
Connection method: Cluster object,Cluster type: SLURMCluster
Dashboard: http://10.55.50.6:37426/status,

0,1
Dashboard: http://10.55.50.6:37426/status,Workers: 2
Total threads:  4,Total memory:  7.46 GiB

0,1
Comm: tcp://10.55.50.6:33057,Workers: 2
Dashboard: http://10.55.50.6:37426/status,Total threads:  4
Started:  1 minute ago,Total memory:  7.46 GiB

0,1
Comm: tcp://10.55.50.2:34587,Total threads: 2
Dashboard: http://10.55.50.2:38738/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.2:35751,
Local directory: /tmp/dask-worker-space/worker-foaakttw,Local directory: /tmp/dask-worker-space/worker-foaakttw

0,1
Comm: tcp://10.55.50.2:34937,Total threads: 2
Dashboard: http://10.55.50.2:33996/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.2:35491,
Local directory: /tmp/dask-worker-space/worker-7xyj6i1h,Local directory: /tmp/dask-worker-space/worker-7xyj6i1h


### Calculates the number of modules that each user uses

Uses the count() function to count the total number of modules that each user uses.

In [15]:
# Counts the number of modules each user uses
count_mod = df.groupby('_source.user')['_source.module'].count()

In [16]:
%%time
count_mod.compute()

CPU times: user 17.2 ms, sys: 571 µs, total: 17.8 ms
Wall time: 178 ms


_source.user
ackm    1401
allj       2
asmo       6
bahs       4
balv       4
brak      17
brys       5
cedw     755
chbo       1
chex       1
cken       1
cori       2
dech     238
gvod       7
hoso     590
kais       6
kart     238
kolm      66
kump       4
manu     368
ninz       4
pavc       3
petb       1
pugt     756
raie      44
ralc       3
reza      93
scec    3788
schl       6
shch       3
shli       3
shrp      58
shxi      21
smfe     152
soyl      12
suyi     112
tery      15
tuzc       6
utan     927
valg       7
wagj      24
walc       4
webl     134
witm       1
xuya      92
ziva      15
Name: _source.module, dtype: int64

In [17]:
client

0,1
Connection method: Cluster object,Cluster type: SLURMCluster
Dashboard: http://10.55.50.6:37426/status,

0,1
Dashboard: http://10.55.50.6:37426/status,Workers: 2
Total threads:  4,Total memory:  7.46 GiB

0,1
Comm: tcp://10.55.50.6:33057,Workers: 2
Dashboard: http://10.55.50.6:37426/status,Total threads:  4
Started:  1 minute ago,Total memory:  7.46 GiB

0,1
Comm: tcp://10.55.50.2:34587,Total threads: 2
Dashboard: http://10.55.50.2:38738/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.2:35751,
Local directory: /tmp/dask-worker-space/worker-foaakttw,Local directory: /tmp/dask-worker-space/worker-foaakttw

0,1
Comm: tcp://10.55.50.2:34937,Total threads: 2
Dashboard: http://10.55.50.2:33996/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.2:35491,
Local directory: /tmp/dask-worker-space/worker-7xyj6i1h,Local directory: /tmp/dask-worker-space/worker-7xyj6i1h


## Scaling the cluster to 8 nodes

This distributes the job to 8 workers.

In [19]:
cluster.scale(16)

### Caclulating users per module with 8 nodes

Uses the count() function to count the total number of users for each module.

In [20]:
%%time
count_user.compute()

CPU times: user 29.5 ms, sys: 3.43 ms, total: 32.9 ms
Wall time: 177 ms


_source.module
IRIS/20151016_AEBS              182
R/3.4.0                         142
R/3.5.1                          70
R/3.6.3                         122
R/4.0.0                          44
anaconda2/4.3.1                  96
anaconda3/2019.07                48
anaconda3/2020.02               168
anaconda3/2020.11                 2
anaconda3/5.0.1                  27
atom/1.19.4                      95
cuda/10.2                         2
cuda/11.0                         2
cuda/8.0                         10
cuda/9.2                        410
do-not-load/2020                  1
do-not-load/20200811              1
do-not-load/4.6.1                 1
do-not-load/98                   11
do-not-load/julia-1.5.3-mkl       7
dynare/4.5.4                     26
dynareOBC/3.30.53.1962            6
fame/11r5                       183
firefox/62.0.3                    1
firefox/76.0.1                   72
gcc/10.1.0                      183
gcc/7.1.0                       128
gcc/9.3      

In [21]:
client

0,1
Connection method: Cluster object,Cluster type: SLURMCluster
Dashboard: http://10.55.50.6:37426/status,

0,1
Dashboard: http://10.55.50.6:37426/status,Workers: 16
Total threads:  32,Total memory:  59.68 GiB

0,1
Comm: tcp://10.55.50.6:33057,Workers: 16
Dashboard: http://10.55.50.6:37426/status,Total threads:  32
Started:  2 minutes ago,Total memory:  59.68 GiB

0,1
Comm: tcp://10.55.50.2:34587,Total threads: 2
Dashboard: http://10.55.50.2:38738/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.2:35751,
Local directory: /tmp/dask-worker-space/worker-foaakttw,Local directory: /tmp/dask-worker-space/worker-foaakttw

0,1
Comm: tcp://10.55.50.2:34937,Total threads: 2
Dashboard: http://10.55.50.2:33996/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.2:35491,
Local directory: /tmp/dask-worker-space/worker-7xyj6i1h,Local directory: /tmp/dask-worker-space/worker-7xyj6i1h

0,1
Comm: tcp://10.55.50.3:34508,Total threads: 2
Dashboard: http://10.55.50.3:40466/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.3:35216,
Local directory: /tmp/dask-worker-space/worker-8g07ekmj,Local directory: /tmp/dask-worker-space/worker-8g07ekmj

0,1
Comm: tcp://10.55.50.3:41136,Total threads: 2
Dashboard: http://10.55.50.3:33602/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.3:37077,
Local directory: /tmp/dask-worker-space/worker-la48bkpu,Local directory: /tmp/dask-worker-space/worker-la48bkpu

0,1
Comm: tcp://10.55.50.3:39248,Total threads: 2
Dashboard: http://10.55.50.3:33885/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.3:33391,
Local directory: /tmp/dask-worker-space/worker-53xzq6ah,Local directory: /tmp/dask-worker-space/worker-53xzq6ah

0,1
Comm: tcp://10.55.50.3:41981,Total threads: 2
Dashboard: http://10.55.50.3:41239/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.3:38035,
Local directory: /tmp/dask-worker-space/worker-9pof9owx,Local directory: /tmp/dask-worker-space/worker-9pof9owx

0,1
Comm: tcp://10.55.50.3:40614,Total threads: 2
Dashboard: http://10.55.50.3:46580/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.3:39216,
Local directory: /tmp/dask-worker-space/worker-werwglmn,Local directory: /tmp/dask-worker-space/worker-werwglmn

0,1
Comm: tcp://10.55.50.3:45812,Total threads: 2
Dashboard: http://10.55.50.3:37888/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.3:44218,
Local directory: /tmp/dask-worker-space/worker-83s0a55c,Local directory: /tmp/dask-worker-space/worker-83s0a55c

0,1
Comm: tcp://10.55.50.3:33156,Total threads: 2
Dashboard: http://10.55.50.3:38306/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.3:40325,
Local directory: /tmp/dask-worker-space/worker-zixtkunt,Local directory: /tmp/dask-worker-space/worker-zixtkunt

0,1
Comm: tcp://10.55.50.3:40305,Total threads: 2
Dashboard: http://10.55.50.3:34068/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.3:36440,
Local directory: /tmp/dask-worker-space/worker-v4s1wdjh,Local directory: /tmp/dask-worker-space/worker-v4s1wdjh

0,1
Comm: tcp://10.55.50.2:42824,Total threads: 2
Dashboard: http://10.55.50.2:46115/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.2:33583,
Local directory: /tmp/dask-worker-space/worker-b6fkqpt2,Local directory: /tmp/dask-worker-space/worker-b6fkqpt2

0,1
Comm: tcp://10.55.50.2:37241,Total threads: 2
Dashboard: http://10.55.50.2:37005/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.2:42528,
Local directory: /tmp/dask-worker-space/worker-hyyomgvv,Local directory: /tmp/dask-worker-space/worker-hyyomgvv

0,1
Comm: tcp://10.55.50.2:43204,Total threads: 2
Dashboard: http://10.55.50.2:45696/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.2:33051,
Local directory: /tmp/dask-worker-space/worker-6d31pjam,Local directory: /tmp/dask-worker-space/worker-6d31pjam

0,1
Comm: tcp://10.55.50.2:43995,Total threads: 2
Dashboard: http://10.55.50.2:35472/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.2:37910,
Local directory: /tmp/dask-worker-space/worker-0311moy9,Local directory: /tmp/dask-worker-space/worker-0311moy9

0,1
Comm: tcp://10.55.50.2:33815,Total threads: 2
Dashboard: http://10.55.50.2:33192/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.2:34711,
Local directory: /tmp/dask-worker-space/worker-v29ud0xd,Local directory: /tmp/dask-worker-space/worker-v29ud0xd

0,1
Comm: tcp://10.55.50.2:37782,Total threads: 2
Dashboard: http://10.55.50.2:35473/status,Memory: 3.73 GiB
Nanny: tcp://10.55.50.2:34199,
Local directory: /tmp/dask-worker-space/worker-79enzd5a,Local directory: /tmp/dask-worker-space/worker-79enzd5a
