## Grouby queries from h2o-benchmarks (parquet)
## Memory Usage Dask==2022.9.1, with AMM

In [60]:
import os

import coiled
import dask.dataframe as dd
from dask.distributed import Client, performance_report
from distributed.diagnostics import MemorySampler
import pandas as pd

In [61]:
cluster = coiled.Cluster(
    name="h2o-benchmarks",
    n_workers=10,
    worker_vm_types=["t3.large"],  # 2CPU, 8GiB
    scheduler_vm_types=["t3.large"],
    package_sync=True,
)

Output()

In [66]:
client = Client(cluster)
client.amm.start()
client

0,1
Connection method: Cluster object,Cluster type: coiled.ClusterBeta
Dashboard: http://18.221.196.1:8787,

0,1
Dashboard: http://18.221.196.1:8787,Workers: 9
Total threads: 18,Total memory: 64.34 GiB

0,1
Comm: tls://10.0.15.59:8786,Workers: 9
Dashboard: http://10.0.15.59:8787/status,Total threads: 18
Started: Just now,Total memory: 64.34 GiB

0,1
Comm: tls://10.0.6.194:40359,Total threads: 2
Dashboard: http://10.0.6.194:35397/status,Memory: 7.15 GiB
Nanny: tls://10.0.6.194:34495,
Local directory: /scratch/dask-worker-space/worker-2b5ry_tq,Local directory: /scratch/dask-worker-space/worker-2b5ry_tq

0,1
Comm: tls://10.0.5.228:37143,Total threads: 2
Dashboard: http://10.0.5.228:40829/status,Memory: 7.16 GiB
Nanny: tls://10.0.5.228:43731,
Local directory: /scratch/dask-worker-space/worker-xlyk2qz3,Local directory: /scratch/dask-worker-space/worker-xlyk2qz3

0,1
Comm: tls://10.0.8.83:34647,Total threads: 2
Dashboard: http://10.0.8.83:34441/status,Memory: 7.14 GiB
Nanny: tls://10.0.8.83:46077,
Local directory: /scratch/dask-worker-space/worker-oipcvemv,Local directory: /scratch/dask-worker-space/worker-oipcvemv

0,1
Comm: tls://10.0.9.144:38357,Total threads: 2
Dashboard: http://10.0.9.144:43629/status,Memory: 7.15 GiB
Nanny: tls://10.0.9.144:44055,
Local directory: /scratch/dask-worker-space/worker-n6b3xm0k,Local directory: /scratch/dask-worker-space/worker-n6b3xm0k

0,1
Comm: tls://10.0.11.43:35197,Total threads: 2
Dashboard: http://10.0.11.43:37547/status,Memory: 7.14 GiB
Nanny: tls://10.0.11.43:38445,
Local directory: /scratch/dask-worker-space/worker-j8t0xwt6,Local directory: /scratch/dask-worker-space/worker-j8t0xwt6

0,1
Comm: tls://10.0.15.240:36341,Total threads: 2
Dashboard: http://10.0.15.240:38393/status,Memory: 7.15 GiB
Nanny: tls://10.0.15.240:43769,
Local directory: /scratch/dask-worker-space/worker-3evd01ai,Local directory: /scratch/dask-worker-space/worker-3evd01ai

0,1
Comm: tls://10.0.12.251:33797,Total threads: 2
Dashboard: http://10.0.12.251:35765/status,Memory: 7.15 GiB
Nanny: tls://10.0.12.251:32845,
Local directory: /scratch/dask-worker-space/worker-wed27fww,Local directory: /scratch/dask-worker-space/worker-wed27fww

0,1
Comm: tls://10.0.11.253:37377,Total threads: 2
Dashboard: http://10.0.11.253:45171/status,Memory: 7.15 GiB
Nanny: tls://10.0.11.253:40469,
Local directory: /scratch/dask-worker-space/worker-bewcwisl,Local directory: /scratch/dask-worker-space/worker-bewcwisl

0,1
Comm: tls://10.0.13.17:39595,Total threads: 2
Dashboard: http://10.0.13.17:36627/status,Memory: 7.17 GiB
Nanny: tls://10.0.13.17:44077,
Local directory: /scratch/dask-worker-space/worker-fvkyj6i3,Local directory: /scratch/dask-worker-space/worker-fvkyj6i3


In [67]:
data_size = {
    "05GB": "s3://coiled-datasets/h2o-benchmark/N_1e7_K_1e2_parquet/*.parquet",
    "5GB": "s3://coiled-datasets/h2o-benchmark/N_1e8_K_1e2_parquet/*.parquet",
    "50GB": "s3://coiled-datasets/h2o-benchmark/N_1e9_K_1e2_parquet/*.parquet",
}

In [68]:
ds = "50GB" # choose "05GB" , "5GB" or "50GB"
# report_dir = "performance-reports-2"

In [69]:
ddf = dd.read_parquet(
    data_size[ds],
    engine="pyarrow",
    storage_options={"anon": True},
)
ddf

Unnamed: 0_level_0,id1,id2,id3,id4,id5,id6,v1,v2,v3
npartitions=2000,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
,category[unknown],category[unknown],category[unknown],Int32,Int32,Int32,Int32,Int32,float64
,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...


In [70]:
ms = MemorySampler()

### Q1

In [71]:
with ms.sample("q1_sept_release"):
    ddf_q1 = ddf.groupby(
        "id1", dropna=False, observed=True
    ).agg({"v1": "sum"}).compute()

In [72]:
client.restart()

0,1
Connection method: Cluster object,Cluster type: coiled.ClusterBeta
Dashboard: http://18.221.196.1:8787,

0,1
Dashboard: http://18.221.196.1:8787,Workers: 10
Total threads: 20,Total memory: 71.49 GiB

0,1
Comm: tls://10.0.15.59:8786,Workers: 10
Dashboard: http://10.0.15.59:8787/status,Total threads: 20
Started: 1 minute ago,Total memory: 71.49 GiB

0,1
Comm: tls://10.0.6.194:42147,Total threads: 2
Dashboard: http://10.0.6.194:45757/status,Memory: 7.15 GiB
Nanny: tls://10.0.6.194:34495,
Local directory: /scratch/dask-worker-space/worker-o4wff__1,Local directory: /scratch/dask-worker-space/worker-o4wff__1

0,1
Comm: tls://10.0.5.228:38567,Total threads: 2
Dashboard: http://10.0.5.228:39285/status,Memory: 7.16 GiB
Nanny: tls://10.0.5.228:43731,
Local directory: /scratch/dask-worker-space/worker-yv_uwtec,Local directory: /scratch/dask-worker-space/worker-yv_uwtec

0,1
Comm: tls://10.0.8.83:38625,Total threads: 2
Dashboard: http://10.0.8.83:32897/status,Memory: 7.14 GiB
Nanny: tls://10.0.8.83:46077,
Local directory: /scratch/dask-worker-space/worker-vruo4a4p,Local directory: /scratch/dask-worker-space/worker-vruo4a4p

0,1
Comm: tls://10.0.9.144:44619,Total threads: 2
Dashboard: http://10.0.9.144:34299/status,Memory: 7.15 GiB
Nanny: tls://10.0.9.144:44055,
Local directory: /scratch/dask-worker-space/worker-42et4rl_,Local directory: /scratch/dask-worker-space/worker-42et4rl_

0,1
Comm: tls://10.0.10.45:43425,Total threads: 2
Dashboard: http://10.0.10.45:44489/status,Memory: 7.15 GiB
Nanny: tls://10.0.10.45:39163,
Local directory: /scratch/dask-worker-space/worker-dyuazqaw,Local directory: /scratch/dask-worker-space/worker-dyuazqaw

0,1
Comm: tls://10.0.11.43:38431,Total threads: 2
Dashboard: http://10.0.11.43:46697/status,Memory: 7.14 GiB
Nanny: tls://10.0.11.43:38445,
Local directory: /scratch/dask-worker-space/worker-cx7jgtia,Local directory: /scratch/dask-worker-space/worker-cx7jgtia

0,1
Comm: tls://10.0.15.240:36889,Total threads: 2
Dashboard: http://10.0.15.240:43757/status,Memory: 7.15 GiB
Nanny: tls://10.0.15.240:43769,
Local directory: /scratch/dask-worker-space/worker-vetrg8pd,Local directory: /scratch/dask-worker-space/worker-vetrg8pd

0,1
Comm: tls://10.0.12.251:37537,Total threads: 2
Dashboard: http://10.0.12.251:46837/status,Memory: 7.15 GiB
Nanny: tls://10.0.12.251:32845,
Local directory: /scratch/dask-worker-space/worker-p59ius39,Local directory: /scratch/dask-worker-space/worker-p59ius39

0,1
Comm: tls://10.0.11.253:41285,Total threads: 2
Dashboard: http://10.0.11.253:40315/status,Memory: 7.15 GiB
Nanny: tls://10.0.11.253:40469,
Local directory: /scratch/dask-worker-space/worker-s7tfy7lr,Local directory: /scratch/dask-worker-space/worker-s7tfy7lr

0,1
Comm: tls://10.0.13.17:34509,Total threads: 2
Dashboard: http://10.0.13.17:46337/status,Memory: 7.17 GiB
Nanny: tls://10.0.13.17:44077,
Local directory: /scratch/dask-worker-space/worker-ast6cjmt,Local directory: /scratch/dask-worker-space/worker-ast6cjmt


In [73]:
client.amm.start()

### Q2

In [74]:
with ms.sample("q2_sept_release"):    
    ddf_q2 = (ddf.groupby(["id1", "id2"], dropna=False, observed=True)
                .agg({"v1": "sum"})
                .compute()
             )

In [75]:
client.restart()

0,1
Connection method: Cluster object,Cluster type: coiled.ClusterBeta
Dashboard: http://18.221.196.1:8787,

0,1
Dashboard: http://18.221.196.1:8787,Workers: 10
Total threads: 20,Total memory: 71.49 GiB

0,1
Comm: tls://10.0.15.59:8786,Workers: 10
Dashboard: http://10.0.15.59:8787/status,Total threads: 20
Started: 2 minutes ago,Total memory: 71.49 GiB

0,1
Comm: tls://10.0.6.194:46071,Total threads: 2
Dashboard: http://10.0.6.194:43023/status,Memory: 7.15 GiB
Nanny: tls://10.0.6.194:34495,
Local directory: /scratch/dask-worker-space/worker-uwou58_h,Local directory: /scratch/dask-worker-space/worker-uwou58_h

0,1
Comm: tls://10.0.5.228:42145,Total threads: 2
Dashboard: http://10.0.5.228:40893/status,Memory: 7.16 GiB
Nanny: tls://10.0.5.228:43731,
Local directory: /scratch/dask-worker-space/worker-uvoe8m5q,Local directory: /scratch/dask-worker-space/worker-uvoe8m5q

0,1
Comm: tls://10.0.8.83:41391,Total threads: 2
Dashboard: http://10.0.8.83:36335/status,Memory: 7.14 GiB
Nanny: tls://10.0.8.83:46077,
Local directory: /scratch/dask-worker-space/worker-e_04cbsx,Local directory: /scratch/dask-worker-space/worker-e_04cbsx

0,1
Comm: tls://10.0.9.144:33469,Total threads: 2
Dashboard: http://10.0.9.144:46183/status,Memory: 7.15 GiB
Nanny: tls://10.0.9.144:44055,
Local directory: /scratch/dask-worker-space/worker-livugocv,Local directory: /scratch/dask-worker-space/worker-livugocv

0,1
Comm: tls://10.0.10.45:45071,Total threads: 2
Dashboard: http://10.0.10.45:39711/status,Memory: 7.15 GiB
Nanny: tls://10.0.10.45:39163,
Local directory: /scratch/dask-worker-space/worker-pfi1k0g5,Local directory: /scratch/dask-worker-space/worker-pfi1k0g5

0,1
Comm: tls://10.0.11.43:43527,Total threads: 2
Dashboard: http://10.0.11.43:38973/status,Memory: 7.14 GiB
Nanny: tls://10.0.11.43:38445,
Local directory: /scratch/dask-worker-space/worker-8onpe5u8,Local directory: /scratch/dask-worker-space/worker-8onpe5u8

0,1
Comm: tls://10.0.15.240:42181,Total threads: 2
Dashboard: http://10.0.15.240:42287/status,Memory: 7.15 GiB
Nanny: tls://10.0.15.240:43769,
Local directory: /scratch/dask-worker-space/worker-77317cwb,Local directory: /scratch/dask-worker-space/worker-77317cwb

0,1
Comm: tls://10.0.12.251:33803,Total threads: 2
Dashboard: http://10.0.12.251:36461/status,Memory: 7.15 GiB
Nanny: tls://10.0.12.251:32845,
Local directory: /scratch/dask-worker-space/worker-iwtwbmux,Local directory: /scratch/dask-worker-space/worker-iwtwbmux

0,1
Comm: tls://10.0.11.253:37685,Total threads: 2
Dashboard: http://10.0.11.253:37523/status,Memory: 7.15 GiB
Nanny: tls://10.0.11.253:40469,
Local directory: /scratch/dask-worker-space/worker-_1ktnaw6,Local directory: /scratch/dask-worker-space/worker-_1ktnaw6

0,1
Comm: tls://10.0.13.17:46245,Total threads: 2
Dashboard: http://10.0.13.17:38691/status,Memory: 7.17 GiB
Nanny: tls://10.0.13.17:44077,
Local directory: /scratch/dask-worker-space/worker-93bdyl0p,Local directory: /scratch/dask-worker-space/worker-93bdyl0p


In [76]:
client.amm.start()

### Q3 

Unable to run Q3 at all in June release due to high cardinality GroupBy issues.  However, in 2022.9.1 release, split_out > 1
defaults to shuffle-based GroupBy

In [77]:
ddf['id3'] = ddf['id3'].astype(str)

In [79]:
# Original query 3
with ms.sample("q3_sept_release"):
    ddf_q3 = (
        ddf.groupby("id3", dropna=False, observed=True)
        .agg({"v1": "sum", "v3": "mean"}, split_out=5)
        .compute()
    )

In [80]:
client.restart()

0,1
Connection method: Cluster object,Cluster type: coiled.ClusterBeta
Dashboard: http://18.221.196.1:8787,

0,1
Dashboard: http://18.221.196.1:8787,Workers: 10
Total threads: 20,Total memory: 71.49 GiB

0,1
Comm: tls://10.0.15.59:8786,Workers: 10
Dashboard: http://10.0.15.59:8787/status,Total threads: 20
Started: 43 minutes ago,Total memory: 71.49 GiB

0,1
Comm: tls://10.0.6.194:46071,Total threads: 2
Dashboard: http://10.0.6.194:43023/status,Memory: 7.15 GiB
Nanny: tls://10.0.6.194:34495,
Local directory: /scratch/dask-worker-space/worker-uwou58_h,Local directory: /scratch/dask-worker-space/worker-uwou58_h

0,1
Comm: tls://10.0.5.228:42145,Total threads: 2
Dashboard: http://10.0.5.228:40893/status,Memory: 7.16 GiB
Nanny: tls://10.0.5.228:43731,
Local directory: /scratch/dask-worker-space/worker-uvoe8m5q,Local directory: /scratch/dask-worker-space/worker-uvoe8m5q

0,1
Comm: tls://10.0.8.83:41391,Total threads: 2
Dashboard: http://10.0.8.83:36335/status,Memory: 7.14 GiB
Nanny: tls://10.0.8.83:46077,
Local directory: /scratch/dask-worker-space/worker-e_04cbsx,Local directory: /scratch/dask-worker-space/worker-e_04cbsx

0,1
Comm: tls://10.0.9.144:33469,Total threads: 2
Dashboard: http://10.0.9.144:46183/status,Memory: 7.15 GiB
Nanny: tls://10.0.9.144:44055,
Local directory: /scratch/dask-worker-space/worker-livugocv,Local directory: /scratch/dask-worker-space/worker-livugocv

0,1
Comm: tls://10.0.10.45:45071,Total threads: 2
Dashboard: http://10.0.10.45:39711/status,Memory: 7.15 GiB
Nanny: tls://10.0.10.45:39163,
Local directory: /scratch/dask-worker-space/worker-pfi1k0g5,Local directory: /scratch/dask-worker-space/worker-pfi1k0g5

0,1
Comm: tls://10.0.11.43:43527,Total threads: 2
Dashboard: http://10.0.11.43:38973/status,Memory: 7.14 GiB
Nanny: tls://10.0.11.43:38445,
Local directory: /scratch/dask-worker-space/worker-8onpe5u8,Local directory: /scratch/dask-worker-space/worker-8onpe5u8

0,1
Comm: tls://10.0.15.240:42181,Total threads: 2
Dashboard: http://10.0.15.240:42287/status,Memory: 7.15 GiB
Nanny: tls://10.0.15.240:43769,
Local directory: /scratch/dask-worker-space/worker-77317cwb,Local directory: /scratch/dask-worker-space/worker-77317cwb

0,1
Comm: tls://10.0.12.251:33803,Total threads: 2
Dashboard: http://10.0.12.251:36461/status,Memory: 7.15 GiB
Nanny: tls://10.0.12.251:32845,
Local directory: /scratch/dask-worker-space/worker-iwtwbmux,Local directory: /scratch/dask-worker-space/worker-iwtwbmux

0,1
Comm: tls://10.0.11.253:37685,Total threads: 2
Dashboard: http://10.0.11.253:37523/status,Memory: 7.15 GiB
Nanny: tls://10.0.11.253:40469,
Local directory: /scratch/dask-worker-space/worker-_1ktnaw6,Local directory: /scratch/dask-worker-space/worker-_1ktnaw6

0,1
Comm: tls://10.0.13.17:46245,Total threads: 2
Dashboard: http://10.0.13.17:38691/status,Memory: 7.17 GiB
Nanny: tls://10.0.13.17:44077,
Local directory: /scratch/dask-worker-space/worker-93bdyl0p,Local directory: /scratch/dask-worker-space/worker-93bdyl0p


In [81]:
client.amm.start()

### Q4

In [82]:
with ms.sample("q4_sept_release"):
    ddf_q4 = (
        ddf.groupby("id4", dropna=False, observed=True)
        .agg({"v1": "mean", "v2": "mean", "v3": "mean"}, split_out=4)
        .compute()
    )

In [83]:
client.restart()

0,1
Connection method: Cluster object,Cluster type: coiled.ClusterBeta
Dashboard: http://18.221.196.1:8787,

0,1
Dashboard: http://18.221.196.1:8787,Workers: 10
Total threads: 20,Total memory: 71.49 GiB

0,1
Comm: tls://10.0.15.59:8786,Workers: 10
Dashboard: http://10.0.15.59:8787/status,Total threads: 20
Started: 49 minutes ago,Total memory: 71.49 GiB

0,1
Comm: tls://10.0.6.194:46071,Total threads: 2
Dashboard: http://10.0.6.194:43023/status,Memory: 7.15 GiB
Nanny: tls://10.0.6.194:34495,
Local directory: /scratch/dask-worker-space/worker-uwou58_h,Local directory: /scratch/dask-worker-space/worker-uwou58_h

0,1
Comm: tls://10.0.5.228:42145,Total threads: 2
Dashboard: http://10.0.5.228:40893/status,Memory: 7.16 GiB
Nanny: tls://10.0.5.228:43731,
Local directory: /scratch/dask-worker-space/worker-uvoe8m5q,Local directory: /scratch/dask-worker-space/worker-uvoe8m5q

0,1
Comm: tls://10.0.8.83:41391,Total threads: 2
Dashboard: http://10.0.8.83:36335/status,Memory: 7.14 GiB
Nanny: tls://10.0.8.83:46077,
Local directory: /scratch/dask-worker-space/worker-e_04cbsx,Local directory: /scratch/dask-worker-space/worker-e_04cbsx

0,1
Comm: tls://10.0.9.144:33469,Total threads: 2
Dashboard: http://10.0.9.144:46183/status,Memory: 7.15 GiB
Nanny: tls://10.0.9.144:44055,
Local directory: /scratch/dask-worker-space/worker-livugocv,Local directory: /scratch/dask-worker-space/worker-livugocv

0,1
Comm: tls://10.0.10.45:45071,Total threads: 2
Dashboard: http://10.0.10.45:39711/status,Memory: 7.15 GiB
Nanny: tls://10.0.10.45:39163,
Local directory: /scratch/dask-worker-space/worker-pfi1k0g5,Local directory: /scratch/dask-worker-space/worker-pfi1k0g5

0,1
Comm: tls://10.0.11.43:43527,Total threads: 2
Dashboard: http://10.0.11.43:38973/status,Memory: 7.14 GiB
Nanny: tls://10.0.11.43:38445,
Local directory: /scratch/dask-worker-space/worker-8onpe5u8,Local directory: /scratch/dask-worker-space/worker-8onpe5u8

0,1
Comm: tls://10.0.15.240:42181,Total threads: 2
Dashboard: http://10.0.15.240:42287/status,Memory: 7.15 GiB
Nanny: tls://10.0.15.240:43769,
Local directory: /scratch/dask-worker-space/worker-77317cwb,Local directory: /scratch/dask-worker-space/worker-77317cwb

0,1
Comm: tls://10.0.12.251:33803,Total threads: 2
Dashboard: http://10.0.12.251:36461/status,Memory: 7.15 GiB
Nanny: tls://10.0.12.251:32845,
Local directory: /scratch/dask-worker-space/worker-iwtwbmux,Local directory: /scratch/dask-worker-space/worker-iwtwbmux

0,1
Comm: tls://10.0.11.253:37685,Total threads: 2
Dashboard: http://10.0.11.253:37523/status,Memory: 7.15 GiB
Nanny: tls://10.0.11.253:40469,
Local directory: /scratch/dask-worker-space/worker-_1ktnaw6,Local directory: /scratch/dask-worker-space/worker-_1ktnaw6

0,1
Comm: tls://10.0.13.17:46245,Total threads: 2
Dashboard: http://10.0.13.17:38691/status,Memory: 7.17 GiB
Nanny: tls://10.0.13.17:44077,
Local directory: /scratch/dask-worker-space/worker-93bdyl0p,Local directory: /scratch/dask-worker-space/worker-93bdyl0p


In [84]:
client.amm.start()

### Q5

In [85]:
with ms.sample("q5_sept_release"):
    ddf_q5 =(
        ddf.groupby("id6", dropna=False, observed=True)
        .agg({"v1": "sum", "v2": "sum", "v3": "sum"}, split_out=4)
        .compute()
    )

In [86]:
client.restart()

0,1
Connection method: Cluster object,Cluster type: coiled.ClusterBeta
Dashboard: http://18.221.196.1:8787,

0,1
Dashboard: http://18.221.196.1:8787,Workers: 10
Total threads: 20,Total memory: 71.49 GiB

0,1
Comm: tls://10.0.15.59:8786,Workers: 10
Dashboard: http://10.0.15.59:8787/status,Total threads: 20
Started: 59 minutes ago,Total memory: 71.49 GiB

0,1
Comm: tls://10.0.6.194:46071,Total threads: 2
Dashboard: http://10.0.6.194:43023/status,Memory: 7.15 GiB
Nanny: tls://10.0.6.194:34495,
Local directory: /scratch/dask-worker-space/worker-uwou58_h,Local directory: /scratch/dask-worker-space/worker-uwou58_h

0,1
Comm: tls://10.0.5.228:42145,Total threads: 2
Dashboard: http://10.0.5.228:40893/status,Memory: 7.16 GiB
Nanny: tls://10.0.5.228:43731,
Local directory: /scratch/dask-worker-space/worker-uvoe8m5q,Local directory: /scratch/dask-worker-space/worker-uvoe8m5q

0,1
Comm: tls://10.0.8.83:41391,Total threads: 2
Dashboard: http://10.0.8.83:36335/status,Memory: 7.14 GiB
Nanny: tls://10.0.8.83:46077,
Local directory: /scratch/dask-worker-space/worker-e_04cbsx,Local directory: /scratch/dask-worker-space/worker-e_04cbsx

0,1
Comm: tls://10.0.9.144:33469,Total threads: 2
Dashboard: http://10.0.9.144:46183/status,Memory: 7.15 GiB
Nanny: tls://10.0.9.144:44055,
Local directory: /scratch/dask-worker-space/worker-livugocv,Local directory: /scratch/dask-worker-space/worker-livugocv

0,1
Comm: tls://10.0.10.45:45071,Total threads: 2
Dashboard: http://10.0.10.45:39711/status,Memory: 7.15 GiB
Nanny: tls://10.0.10.45:39163,
Local directory: /scratch/dask-worker-space/worker-pfi1k0g5,Local directory: /scratch/dask-worker-space/worker-pfi1k0g5

0,1
Comm: tls://10.0.11.43:43527,Total threads: 2
Dashboard: http://10.0.11.43:38973/status,Memory: 7.14 GiB
Nanny: tls://10.0.11.43:38445,
Local directory: /scratch/dask-worker-space/worker-8onpe5u8,Local directory: /scratch/dask-worker-space/worker-8onpe5u8

0,1
Comm: tls://10.0.15.240:42181,Total threads: 2
Dashboard: http://10.0.15.240:42287/status,Memory: 7.15 GiB
Nanny: tls://10.0.15.240:43769,
Local directory: /scratch/dask-worker-space/worker-77317cwb,Local directory: /scratch/dask-worker-space/worker-77317cwb

0,1
Comm: tls://10.0.12.251:33803,Total threads: 2
Dashboard: http://10.0.12.251:36461/status,Memory: 7.15 GiB
Nanny: tls://10.0.12.251:32845,
Local directory: /scratch/dask-worker-space/worker-iwtwbmux,Local directory: /scratch/dask-worker-space/worker-iwtwbmux

0,1
Comm: tls://10.0.11.253:37685,Total threads: 2
Dashboard: http://10.0.11.253:37523/status,Memory: 7.15 GiB
Nanny: tls://10.0.11.253:40469,
Local directory: /scratch/dask-worker-space/worker-_1ktnaw6,Local directory: /scratch/dask-worker-space/worker-_1ktnaw6

0,1
Comm: tls://10.0.13.17:46245,Total threads: 2
Dashboard: http://10.0.13.17:38691/status,Memory: 7.17 GiB
Nanny: tls://10.0.13.17:44077,
Local directory: /scratch/dask-worker-space/worker-93bdyl0p,Local directory: /scratch/dask-worker-space/worker-93bdyl0p


In [87]:
client.amm.start()

In [88]:
### Q6 -- Unable to run in June or Sept release b/c of median not implemented

### Q7 

In [89]:
with ms.sample("q7_sept_release"):
    ddf_q7 = (
        ddf.groupby("id3", dropna=False, observed=True)
        .agg({"v1": "max", "v2": "min"}, split_out=4)
        .assign(range_v1_v2=lambda x: x["v1"] - x["v2"])[["range_v1_v2"]]
        .compute()
    )

In [101]:
client.restart()

0,1
Connection method: Cluster object,Cluster type: coiled.ClusterBeta
Dashboard: http://18.221.196.1:8787,

0,1
Dashboard: http://18.221.196.1:8787,Workers: 10
Total threads: 20,Total memory: 71.49 GiB

0,1
Comm: tls://10.0.15.59:8786,Workers: 10
Dashboard: http://10.0.15.59:8787/status,Total threads: 20
Started: 2 hours ago,Total memory: 71.49 GiB

0,1
Comm: tls://10.0.6.194:46071,Total threads: 2
Dashboard: http://10.0.6.194:43023/status,Memory: 7.15 GiB
Nanny: tls://10.0.6.194:34495,
Local directory: /scratch/dask-worker-space/worker-uwou58_h,Local directory: /scratch/dask-worker-space/worker-uwou58_h

0,1
Comm: tls://10.0.5.228:42145,Total threads: 2
Dashboard: http://10.0.5.228:40893/status,Memory: 7.16 GiB
Nanny: tls://10.0.5.228:43731,
Local directory: /scratch/dask-worker-space/worker-uvoe8m5q,Local directory: /scratch/dask-worker-space/worker-uvoe8m5q

0,1
Comm: tls://10.0.8.83:41391,Total threads: 2
Dashboard: http://10.0.8.83:36335/status,Memory: 7.14 GiB
Nanny: tls://10.0.8.83:46077,
Local directory: /scratch/dask-worker-space/worker-e_04cbsx,Local directory: /scratch/dask-worker-space/worker-e_04cbsx

0,1
Comm: tls://10.0.9.144:33469,Total threads: 2
Dashboard: http://10.0.9.144:46183/status,Memory: 7.15 GiB
Nanny: tls://10.0.9.144:44055,
Local directory: /scratch/dask-worker-space/worker-livugocv,Local directory: /scratch/dask-worker-space/worker-livugocv

0,1
Comm: tls://10.0.10.45:45071,Total threads: 2
Dashboard: http://10.0.10.45:39711/status,Memory: 7.15 GiB
Nanny: tls://10.0.10.45:39163,
Local directory: /scratch/dask-worker-space/worker-pfi1k0g5,Local directory: /scratch/dask-worker-space/worker-pfi1k0g5

0,1
Comm: tls://10.0.11.43:43527,Total threads: 2
Dashboard: http://10.0.11.43:38973/status,Memory: 7.14 GiB
Nanny: tls://10.0.11.43:38445,
Local directory: /scratch/dask-worker-space/worker-8onpe5u8,Local directory: /scratch/dask-worker-space/worker-8onpe5u8

0,1
Comm: tls://10.0.15.240:42181,Total threads: 2
Dashboard: http://10.0.15.240:42287/status,Memory: 7.15 GiB
Nanny: tls://10.0.15.240:43769,
Local directory: /scratch/dask-worker-space/worker-77317cwb,Local directory: /scratch/dask-worker-space/worker-77317cwb

0,1
Comm: tls://10.0.12.251:33803,Total threads: 2
Dashboard: http://10.0.12.251:36461/status,Memory: 7.15 GiB
Nanny: tls://10.0.12.251:32845,
Local directory: /scratch/dask-worker-space/worker-iwtwbmux,Local directory: /scratch/dask-worker-space/worker-iwtwbmux

0,1
Comm: tls://10.0.11.253:37685,Total threads: 2
Dashboard: http://10.0.11.253:37523/status,Memory: 7.15 GiB
Nanny: tls://10.0.11.253:40469,
Local directory: /scratch/dask-worker-space/worker-_1ktnaw6,Local directory: /scratch/dask-worker-space/worker-_1ktnaw6

0,1
Comm: tls://10.0.13.17:46245,Total threads: 2
Dashboard: http://10.0.13.17:38691/status,Memory: 7.17 GiB
Nanny: tls://10.0.13.17:44077,
Local directory: /scratch/dask-worker-space/worker-93bdyl0p,Local directory: /scratch/dask-worker-space/worker-93bdyl0p


In [102]:
client.amm.start()

### Q8

In [103]:
with ms.sample("q8_sept_release"):    
    ddf_q8 = ddf[["id6", "v1", "v2", "v3"]]
    (
        ddf_q8[~ddf_q8["v3"].isna()][["id6", "v3"]]
        .groupby("id6", dropna=False, observed=True)
        .apply(
            lambda x: x.nlargest(2, columns="v3"),
            meta={"id6": "Int64", "v3": "float64"},
        )[["v3"]]
        .compute()
    )

In [104]:
client.restart()

0,1
Connection method: Cluster object,Cluster type: coiled.ClusterBeta
Dashboard: http://18.221.196.1:8787,

0,1
Dashboard: http://18.221.196.1:8787,Workers: 10
Total threads: 20,Total memory: 71.49 GiB

0,1
Comm: tls://10.0.15.59:8786,Workers: 10
Dashboard: http://10.0.15.59:8787/status,Total threads: 20
Started: 2 hours ago,Total memory: 71.49 GiB

0,1
Comm: tls://10.0.6.194:46071,Total threads: 2
Dashboard: http://10.0.6.194:43023/status,Memory: 7.15 GiB
Nanny: tls://10.0.6.194:34495,
Local directory: /scratch/dask-worker-space/worker-uwou58_h,Local directory: /scratch/dask-worker-space/worker-uwou58_h

0,1
Comm: tls://10.0.5.228:42145,Total threads: 2
Dashboard: http://10.0.5.228:40893/status,Memory: 7.16 GiB
Nanny: tls://10.0.5.228:43731,
Local directory: /scratch/dask-worker-space/worker-uvoe8m5q,Local directory: /scratch/dask-worker-space/worker-uvoe8m5q

0,1
Comm: tls://10.0.8.83:41391,Total threads: 2
Dashboard: http://10.0.8.83:36335/status,Memory: 7.14 GiB
Nanny: tls://10.0.8.83:46077,
Local directory: /scratch/dask-worker-space/worker-e_04cbsx,Local directory: /scratch/dask-worker-space/worker-e_04cbsx

0,1
Comm: tls://10.0.9.144:33469,Total threads: 2
Dashboard: http://10.0.9.144:46183/status,Memory: 7.15 GiB
Nanny: tls://10.0.9.144:44055,
Local directory: /scratch/dask-worker-space/worker-livugocv,Local directory: /scratch/dask-worker-space/worker-livugocv

0,1
Comm: tls://10.0.10.45:45071,Total threads: 2
Dashboard: http://10.0.10.45:39711/status,Memory: 7.15 GiB
Nanny: tls://10.0.10.45:39163,
Local directory: /scratch/dask-worker-space/worker-pfi1k0g5,Local directory: /scratch/dask-worker-space/worker-pfi1k0g5

0,1
Comm: tls://10.0.11.43:43527,Total threads: 2
Dashboard: http://10.0.11.43:38973/status,Memory: 7.14 GiB
Nanny: tls://10.0.11.43:38445,
Local directory: /scratch/dask-worker-space/worker-8onpe5u8,Local directory: /scratch/dask-worker-space/worker-8onpe5u8

0,1
Comm: tls://10.0.15.240:42181,Total threads: 2
Dashboard: http://10.0.15.240:42287/status,Memory: 7.15 GiB
Nanny: tls://10.0.15.240:43769,
Local directory: /scratch/dask-worker-space/worker-77317cwb,Local directory: /scratch/dask-worker-space/worker-77317cwb

0,1
Comm: tls://10.0.12.251:33803,Total threads: 2
Dashboard: http://10.0.12.251:36461/status,Memory: 7.15 GiB
Nanny: tls://10.0.12.251:32845,
Local directory: /scratch/dask-worker-space/worker-iwtwbmux,Local directory: /scratch/dask-worker-space/worker-iwtwbmux

0,1
Comm: tls://10.0.11.253:37685,Total threads: 2
Dashboard: http://10.0.11.253:37523/status,Memory: 7.15 GiB
Nanny: tls://10.0.11.253:40469,
Local directory: /scratch/dask-worker-space/worker-_1ktnaw6,Local directory: /scratch/dask-worker-space/worker-_1ktnaw6

0,1
Comm: tls://10.0.13.17:46245,Total threads: 2
Dashboard: http://10.0.13.17:38691/status,Memory: 7.17 GiB
Nanny: tls://10.0.13.17:44077,
Local directory: /scratch/dask-worker-space/worker-93bdyl0p,Local directory: /scratch/dask-worker-space/worker-93bdyl0p


In [105]:
client.amm.start()

### Q9

In [106]:
# try:
with ms.sample("q9_sept_release"):
    ddf_q9 = ddf[["id2", "id4", "v1", "v2"]]
    (
        ddf_q9[["id2", "id4", "v1", "v2"]]
        .groupby(["id2", "id4"], dropna=False, observed=True)
        .apply(
            lambda x: pd.Series({"r2": x.corr()["v1"]["v2"] ** 2}),
            meta={"r2": "float64"},
        )
        .compute()
    )

In [110]:
ms.to_pandas(align=True).to_csv("sept_2022.csv")

2022-09-20 11:36:54,970 - distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client
2022-09-20 11:36:54,974 - distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client
Traceback (most recent call last):
  File "/Users/greghayes/mambaforge/envs/coiled-runtime/lib/python3.10/site-packages/distributed/comm/tcp.py", line 491, in connect
    stream = await self.client.connect(
  File "/Users/greghayes/mambaforge/envs/coiled-runtime/lib/python3.10/site-packages/tornado/tcpclient.py", line 275, in connect
    af, addr, stream = await connector.start(connect_timeout=timeout)
asyncio.exceptions.CancelledError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/greghayes/mambaforge/envs/coiled-runtime/lib/python3.10/asyncio/tasks.py", line 456, in wait_for
    return fut.result()
asyncio.exceptions.CancelledError

The above exception was the direct c