## Initialize the Dask Client Dashboard
Sets up a local cluster that handles the computations.  It provides the details of the computation.

In [1]:
from dask.distributed import Client, progress
client = Client(n_workers=4, threads_per_worker=2, memory_limit='8GB')
client

Perhaps you already have a cluster running?
Hosting the HTTP server on port 33999 instead


0,1
Connection method: Cluster object,Cluster type: LocalCluster
Dashboard: http://127.0.0.1:33999/status,

0,1
Status: running,Using processes: True
Dashboard: http://127.0.0.1:33999/status,Workers: 4
Total threads:  8,Total memory:  29.80 GiB

0,1
Comm: tcp://127.0.0.1:40471,Workers: 4
Dashboard: http://127.0.0.1:33999/status,Total threads:  8
Started:  Just now,Total memory:  29.80 GiB

0,1
Comm: tcp://127.0.0.1:41517,Total threads: 2
Dashboard: http://127.0.0.1:37185/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:46301,
Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-cuxcjjsg,Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-cuxcjjsg

0,1
Comm: tcp://127.0.0.1:34387,Total threads: 2
Dashboard: http://127.0.0.1:32911/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:45159,
Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-0_ra_jby,Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-0_ra_jby

0,1
Comm: tcp://127.0.0.1:34411,Total threads: 2
Dashboard: http://127.0.0.1:34921/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:37755,
Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-w3bz3b9y,Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-w3bz3b9y

0,1
Comm: tcp://127.0.0.1:45988,Total threads: 2
Dashboard: http://127.0.0.1:44061/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:44942,
Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-istj1r0g,Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-istj1r0g


## Reading the data into the dataframe

Loads dask dataframe and reads the csv file into the dataframe.

In [1]:
import dask
import dask.dataframe as dd

# dataframe for hpcmod
df = dd.read_csv('data/hpcmod.csv', parse_dates=["_source.Date"],dtype={'_score':'float64', '_source.time':'float64'})  # dtypes specified to read csv properly

## Display the dataframe

The head() function displays the beginning of the dataframe.

In [3]:
# Shows the beginning of the dataframe
df.head()

Unnamed: 0,_index,_type,_id,_score,_source.Date,_source.host,_source.module,_source.path,_source.time,_source.user
0,hpcmod,hpc,0CrMz3cBcBNwXO6mF8LV,2.0,2021-01-06 11:23:00,n0,vscode/1.25.1,/usr/local/tools/modulefiles/vscode/1.25.1,1609950000.0,ackm
1,hpcmod,hpc,0SrMz3cBcBNwXO6mF8LV,2.0,2021-01-06 11:23:00,n0,cuda/9.2,/usr/local/tools/modulefiles/cuda/9.2,1609950000.0,ackm
2,hpcmod,hpc,0irMz3cBcBNwXO6mF8LV,2.0,2021-01-06 11:23:00,n0,anaconda3/2020.02,/usr/local/tools/modulefiles/anaconda3/2020.02,1609950000.0,ackm
3,hpcmod,hpc,0yrMz3cBcBNwXO6mF8LV,2.0,2021-01-06 11:23:00,n0,intelmpi/2017/u1,/usr/local/tools/modulefiles/intelmpi/2017/u1,1609950000.0,ackm
4,hpcmod,hpc,1CrMz3cBcBNwXO6mF8LV,2.0,2021-01-06 11:23:00,n0,R/3.5.1,/usr/local/tools/modulefiles/R/3.5.1,1609950000.0,ackm


### Dataframe Computations
### Length of the dataframe

Length of the dataframe is calculated with the len() function.

In [4]:
dflen = len(df)

In [5]:
%%time
# Shows the length of the dataframe
print("Length of dataframe:  " + str(dflen))

Length of dataframe:  10000
CPU times: user 240 µs, sys: 0 ns, total: 240 µs
Wall time: 198 µs


In [6]:
client

0,1
Connection method: Cluster object,Cluster type: LocalCluster
Dashboard: http://127.0.0.1:33999/status,

0,1
Status: running,Using processes: True
Dashboard: http://127.0.0.1:33999/status,Workers: 4
Total threads:  8,Total memory:  29.80 GiB

0,1
Comm: tcp://127.0.0.1:40471,Workers: 4
Dashboard: http://127.0.0.1:33999/status,Total threads:  8
Started:  1 minute ago,Total memory:  29.80 GiB

0,1
Comm: tcp://127.0.0.1:41517,Total threads: 2
Dashboard: http://127.0.0.1:37185/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:46301,
Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-cuxcjjsg,Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-cuxcjjsg

0,1
Comm: tcp://127.0.0.1:34387,Total threads: 2
Dashboard: http://127.0.0.1:32911/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:45159,
Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-0_ra_jby,Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-0_ra_jby

0,1
Comm: tcp://127.0.0.1:34411,Total threads: 2
Dashboard: http://127.0.0.1:34921/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:37755,
Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-w3bz3b9y,Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-w3bz3b9y

0,1
Comm: tcp://127.0.0.1:45988,Total threads: 2
Dashboard: http://127.0.0.1:44061/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:44942,
Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-istj1r0g,Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-istj1r0g


### Calculates the number of users that use each module

Uses the count() function to count the total number of users for each module.

In [7]:
# Counts the number of users that use each module
count_user = df.groupby("_source.module")['_source.user'].count()

In [8]:
%%time
count_user.compute()

CPU times: user 29.9 ms, sys: 5.75 ms, total: 35.6 ms
Wall time: 600 ms


_source.module
IRIS/20151016_AEBS              182
R/3.4.0                         142
R/3.5.1                          70
R/3.6.3                         122
R/4.0.0                          44
anaconda2/4.3.1                  96
anaconda3/2019.07                48
anaconda3/2020.02               168
anaconda3/2020.11                 2
anaconda3/5.0.1                  27
atom/1.19.4                      95
cuda/10.2                         2
cuda/11.0                         2
cuda/8.0                         10
cuda/9.2                        410
do-not-load/2020                  1
do-not-load/20200811              1
do-not-load/4.6.1                 1
do-not-load/98                   11
do-not-load/julia-1.5.3-mkl       7
dynare/4.5.4                     26
dynareOBC/3.30.53.1962            6
fame/11r5                       183
firefox/62.0.3                    1
firefox/76.0.1                   72
gcc/10.1.0                      183
gcc/7.1.0                       128
gcc/9.3      

In [9]:
client

0,1
Connection method: Cluster object,Cluster type: LocalCluster
Dashboard: http://127.0.0.1:33999/status,

0,1
Status: running,Using processes: True
Dashboard: http://127.0.0.1:33999/status,Workers: 4
Total threads:  8,Total memory:  29.80 GiB

0,1
Comm: tcp://127.0.0.1:40471,Workers: 4
Dashboard: http://127.0.0.1:33999/status,Total threads:  8
Started:  1 minute ago,Total memory:  29.80 GiB

0,1
Comm: tcp://127.0.0.1:41517,Total threads: 2
Dashboard: http://127.0.0.1:37185/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:46301,
Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-cuxcjjsg,Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-cuxcjjsg

0,1
Comm: tcp://127.0.0.1:34387,Total threads: 2
Dashboard: http://127.0.0.1:32911/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:45159,
Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-0_ra_jby,Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-0_ra_jby

0,1
Comm: tcp://127.0.0.1:34411,Total threads: 2
Dashboard: http://127.0.0.1:34921/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:37755,
Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-w3bz3b9y,Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-w3bz3b9y

0,1
Comm: tcp://127.0.0.1:45988,Total threads: 2
Dashboard: http://127.0.0.1:44061/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:44942,
Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-istj1r0g,Local directory: /home/asd/stha/pyslurm_decorators/test/dask-worker-space/worker-istj1r0g


### Calculates the number of modules that each user uses

Uses the count() function to count the total number of modules that each user uses.

In [None]:
# Counts the number of modules each user uses
count_mod = df.groupby('_source.user')['_source.module'].count()

In [None]:
%%time
count_mod.compute()

In [None]:
client

From all of these examples, there is no change in the distribution workload for the client.