## A boilerplate code for using Dask to read from Snowflake 

In [None]:
import dask.dataframe as dd
df = dd.read_sql_table(
    'accounts', 
    'snowflake://user:pass@...warehouse=...role=...', 
    npartitions=10, 
    index_col='id'
)

In [None]:
## Please checkout the API documentation https://yourdomain.hyperplane.dev/hyperplane_docs/api.html for more details

from hyperplane import notebook_common as nc
num_workers = 2  # number of nodes to spin up

## node specific parameters
total_memory = 110 #110 GB allocatible for 16_128 nodes, 12G for 16_16 nodes, 27G for 32_32
cors_per_worker = 15   # 15 cores for 16_128 nodes and 16_16 nodes, 28 cores for 32_32 nodes
nprocs = 15
ram_gb_per_proc = total_memory/nprocs
nthreads = int(cors_per_worker/nprocs)

print(f'initializing with {num_workers} num_workers, {nprocs} nprocs each proc has {ram_gb_per_proc} GB')
client, cluster = nc.initialize_cluster(
        num_workers = num_workers,
        nprocs = nprocs,
        nthreads = nthreads,
        ram_gb_per_proc = ram_gb_per_proc,
        cores_per_worker = cors_per_worker
    )

In [None]:
import snowflake.connector
from dask.dataframe import from_delayed
from dask.distributed import delayed

@delayed
def load(connection_info, query, start, end):
    conn = snowflake.connector.connect(**connection_info)
    cur = conn.cursor()
    cur.execute(query, start, end)
    return cur.fetch_pandas_all()
ddf = from_delayed(*[load(connection_info, query, st, ed) for st, ed in partitions])
ddf.persist()

In [None]:
client.close()
cluster.close()