# Flexibile Distributed Computing with `client.run()`

## Launch Cloud Computing Resources

In [1]:
import coiled

In [2]:
cluster = coiled.Cluster(
    name="client-run",
    n_workers=5,
    package_sync=True,
)

Output()

In [3]:
from distributed import Client
client = Client(cluster)


+---------+--------+-----------+---------+
| Package | client | scheduler | workers |
+---------+--------+-----------+---------+
| lz4     | 4.0.0  | 4.0.2     | 4.0.2   |
+---------+--------+-----------+---------+


## Do Some Dask Things

In [4]:
import dask.dataframe as dd

In [6]:
ddf = dd.read_parquet("s3://coiled-datasets/github-archive/github-archive-2015.parq/")
ddf.head()

Unnamed: 0,user,repo,created_at,message,author
0,soumith,soumith/fbcunn,2015-01-01T01:00:00Z,"back to old structure, except lua files moved out",Soumith Chintala
1,soumith,soumith/fbcunn,2015-01-01T01:00:00Z,...,Soumith Chintala
2,soumith,soumith/fbcunn,2015-01-01T01:00:00Z,...,Soumith Chintala
3,soumith,soumith/fbcunn,2015-01-01T01:00:00Z,...,Soumith Chintala
4,radix,radix/effect,2015-01-01T01:00:00Z,put the auto-generated API docs in the reposit...,Christopher Armstrong


In [7]:
ddf.groupby('user').count().head()

Unnamed: 0_level_0,repo,created_at,message,author
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1995parham,873,873,873,873
247321453,78,78,78,78
3DJakob,36,36,36,36
3ft9,13,13,13,13
501st-alpha1,451,451,451,451


## Do Some Generic Python Things

In [13]:
def create_txt_file(content):
    file = open('myfile.txt', 'w+')
    file.write(content)
    file.close()

In [14]:
client.run(create_txt_file, "Add some content to our file.")

{'tls://10.0.11.72:40443': None,
 'tls://10.0.13.9:34641': None,
 'tls://10.0.2.42:41165': None,
 'tls://10.0.7.127:40571': None,
 'tls://10.0.9.206:34523': None}

In [16]:
def read_file(filename):
    file = open(filename, "r")
    print(file.read())

In [17]:
client.run(read_file, "myfile.txt")

{'tls://10.0.11.72:40443': None,
 'tls://10.0.13.9:34641': None,
 'tls://10.0.2.42:41165': None,
 'tls://10.0.7.127:40571': None,
 'tls://10.0.9.206:34523': None}

In [18]:
client.run(os.getpid)

{'tls://10.0.11.72:40443': 36,
 'tls://10.0.13.9:34641': 36,
 'tls://10.0.2.42:41165': 36,
 'tls://10.0.7.127:40571': 36,
 'tls://10.0.9.206:34523': 36}

## Load a Model

## Do Some System Things

In [9]:
import os

In [10]:
client.run(os.mkdir, "my-directory")

{'tls://10.0.11.72:40443': None,
 'tls://10.0.13.9:34641': None,
 'tls://10.0.2.42:41165': None,
 'tls://10.0.7.127:40571': None,
 'tls://10.0.9.206:34523': None}

In [15]:
client.run(os.system, "ls")

{'tls://10.0.11.72:40443': 0,
 'tls://10.0.13.9:34641': 0,
 'tls://10.0.2.42:41165': 0,
 'tls://10.0.7.127:40571': 0,
 'tls://10.0.9.206:34523': 0}

## Do Some Dask Debugging

In [8]:
# find where each worker is spilling data to disk
client.run(lambda dask_worker: dask_worker.local_directory)

{'tls://10.0.11.72:40443': '/scratch/dask-worker-space/worker-a9vxc2xf',
 'tls://10.0.13.9:34641': '/scratch/dask-worker-space/worker-re_nsb7_',
 'tls://10.0.2.42:41165': '/scratch/dask-worker-space/worker-1siihsgu',
 'tls://10.0.7.127:40571': '/scratch/dask-worker-space/worker-fj7utl8b',
 'tls://10.0.9.206:34523': '/scratch/dask-worker-space/worker-u0x0seip'}