# idact - Prometheus sandbox

## Initial setup

Add `idact` to path:

In [1]:
import sys
import os
import bitmath
import getpass
import contextlib
import fabric
import logging
import subprocess
from pprint import pprint

def append_idact_path():
    idact_path = os.path.realpath(os.path.join(os.getcwd(), '../'))
    sys.path.append(idact_path)
append_idact_path()

from idact import *
from idact.detail.auth.set_password import set_password

os.environ['IDACT_KEY_LOCATION'] = os.path.join(os.getcwd(), '../.notebook-ssh')
# os.environ['IDACT_KEY_LOCATION'] = os.path.expanduser('~/.ssh')
if not os.path.exists(os.environ['IDACT_KEY_LOCATION']):
    os.mkdir(os.environ['IDACT_KEY_LOCATION'])

USER = 'plggarstka'

Hide debug information, setup context manager stack (for testing purposes)

## Add cluster (only first run)

In [2]:
key = KeyType.RSA  # Generate RSA key
# key = os.path.join(os.path.expanduser('~/.ssh'), 'id_rsa')

In [3]:
cluster = add_cluster(name="pro",
                      user=USER,
                      host="pro.cyfronet.pl",
                      port=22,
                      auth=AuthMethod.PUBLIC_KEY,
                      key=key,
                      install_key=True,
                      scratch="$SCRATCH")
save_environment('.idact-env')

2018-11-11 23:41:54 INFO: Generating public-private key pair.


## Load cluster (subsequent runs)

In [4]:
load_environment('.idact-env')
cluster = show_cluster("pro")
cluster

Cluster(pro.cyfronet.pl, 22, plggarstka, auth=AuthMethod.PUBLIC_KEY, key='E:\\shared\\uni\\eng-project\\notebooks\\../.notebook-ssh\\id_rsa_wu', install_key=True, disable_sshd=False)

In [5]:
set_log_level(logging.INFO)
#set_log_level(logging.DEBUG)
save_environment('.idact-env')

In [6]:
node = cluster.get_access_node()
node

Node(pro.cyfronet.pl:22, None)

On your first action, you will be asked for a password to install the key.
You can connect explicitly (optional) to do this right now:

In [7]:
node.connect()

2018-11-11 23:42:00 INFO: Installing key using password authentication.
Password for plggarstka@pro.cyfronet.pl:22: 


In [8]:
node.run('whoami')

'plggarstka'

In [9]:
node.run('hostname')

'login01.pro.cyfronet.pl'

## Allocate nodes

In [10]:
nodes = cluster.allocate_nodes(nodes=2,
                               cores=2,
                               memory_per_node=bitmath.GiB(10),
                               walltime=Walltime(minutes=20),
                               native_args={
                                   '--partition': 'plgrid-testing',
                                   '--account': 'intdata'
                               })

2018-11-11 23:42:23 INFO: Creating the ssh directory.


In [11]:
nodes

Nodes([Node(NotAllocated),Node(NotAllocated)], SlurmAllocation(job_id=14132870))

In [12]:
nodes.wait()
nodes

Nodes([Node(p0654:37987, 2018-11-11 23:02:33.539260+00:00),Node(p0665:59822, 2018-11-11 23:02:33.539260+00:00)], SlurmAllocation(job_id=14132870))

## Run commands

In [13]:
nodes[0].run('whoami')

'plggarstka'

In [14]:
nodes[0].run('hostname')

'p0654'

In [15]:
nodes[1].run('squeue')

'JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)\n          14132870 plgrid-te     wrap plggarst  R       0:13      2 p[0654,0665]'

In [16]:
nodes[1].run('hostname')

'p0665'

## Examine node resources

In [17]:
nodes[0].resources.memory_total

GiB(10.0)

In [18]:
nodes[0].resources.memory_usage

GiB(0.02233123779296875)

In [19]:
nodes[0].resources.cpu_cores

2

In [20]:
nodes[0].resources.cpu_usage

1.0

## Tunnel

In [21]:
tunnel = nodes[0].tunnel(here=9000, there=10000)

In [22]:
tunnel

MultiHopTunnel(9000:10000)

In [23]:
tunnel.close()

## Deploy notebook

One-time config step (cluster-specific):

In [24]:
cluster.config.setup_actions.jupyter = ['module load plgrid/tools/python-intel/3.6.2']
save_environment('.idact-env')

To run Jupyter Notebook on the cluster:

In [25]:
nb = nodes[0].deploy_notebook(local_port=8080)
nb

JupyterDeployment(8080 -> Node(p0654:37987, 2018-11-11 23:02:33.539260+00:00)

In [26]:
nodes[0].resources.memory_usage

GiB(0.0843048095703125)

In [27]:
nb.local_port

8080

To open the deployed notebook server in a new tab:

In [28]:
nb.open_in_browser()

In [29]:
nodes[0].resources.memory_usage

GiB(0.08779525756835938)

### Push and pull notebook

You can access the deployed notebook from multiple places by first pushing it:

In [30]:
cluster.push_deployment(nb)

2018-11-11 23:43:20 INFO: Pushing deployment: JupyterDeployment(8080 -> Node(p0654:37987, 2018-11-11 23:02:33.539260+00:00)


And then pulling:

In [31]:
deployments = cluster.pull_deployments()
deployments.jupyter_deployments

2018-11-11 23:43:32 INFO: Pulling deployments.
2018-11-11 23:43:35 INFO: Creating the ssh directory.
2018-11-11 23:43:45 INFO: Pulled Jupyter deployment: JupyterDeployment(65463 -> Node(p0654:37987, 2018-11-11 23:02:33.539260+00:00)


[JupyterDeployment(65463 -> Node(p0654:37987, 2018-11-11 23:02:33.539260+00:00)]

In [32]:
nb_2 = deployments.jupyter_deployments[0]
nb_2

JupyterDeployment(65463 -> Node(p0654:37987, 2018-11-11 23:02:33.539260+00:00)

In [33]:
nb_2.open_in_browser()

In [34]:
nb_2.cancel()

2018-11-11 23:43:58 INFO: Cancelling Jupyter deployment.


More on pushing and pulling deployments in the next section.

## Push and pull nodes

In order to work with Dask, you would usually need a notebook running on the cluster, as shown above.

To access the allocated nodes from the cluster, you need to push their deployment first, same as the notebook deployment above:

In [35]:
cluster.push_deployment(nodes)

2018-11-11 23:44:06 INFO: Pushing deployment: Nodes([Node(p0654:37987, 2018-11-11 23:02:33.539260+00:00),Node(p0665:59822, 2018-11-11 23:02:33.539260+00:00)], SlurmAllocation(job_id=14132870))


Then, you would pull the deployment on the cluster:

In [36]:
deployments = cluster.pull_deployments()
deployments

2018-11-11 23:44:17 INFO: Pulling deployments.
2018-11-11 23:44:20 INFO: Creating the ssh directory.
2018-11-11 23:44:30 INFO: Pulled allocation deployment: Nodes([Node(p0654:37987, 2018-11-11 23:02:33.539260+00:00),Node(p0665:59822, 2018-11-11 23:02:33.539260+00:00)], SlurmAllocation(job_id=14132870))
2018-11-11 23:44:30 INFO: Pulled Jupyter deployment: JupyterDeployment(65494 -> Node(p0654:37987, 2018-11-11 23:02:33.539260+00:00)
2018-11-11 23:44:38 INFO: Discarding a Jupyter deployment, because it is no longer functional: JupyterDeployment(65494 -> Node(p0654:37987, 2018-11-11 23:02:33.539260+00:00).


SynchronizedDeployments(nodes=1, jupyter_deployments=0)

In [37]:
nodes = deployments.nodes[0]
nodes

Nodes([Node(p0654:37987, 2018-11-11 23:02:33.539260+00:00),Node(p0665:59822, 2018-11-11 23:02:33.539260+00:00)], SlurmAllocation(job_id=14132870))

Essentially, this feature is intended for using an allocation in multiple notebooks at once.

Deployments are cleared automatically if they are expired or cancelled. They can also be cleared manually by  running:

In [38]:
cluster.clear_pushed_deployments()

2018-11-11 23:44:44 INFO: Clearing deployments.


## idact-notebook app

You can deploy nodes and notebook automatically using the following command:
```
idact-notebook
```
or its equivalent, if you didn't install idact using pip:
```
python -m idact.notebook
```
Help message:

In [39]:
help_message = subprocess.getoutput(
    "cd .. && {python} -m idact.notebook --help".format(
        python=sys.executable))
print(help_message)

Usage: notebook.py [OPTIONS] CLUSTER_NAME

  A console script that executes a Jupyter Notebook instance on an allocated
  cluster node, and makes it accessible in the local browser.

  CLUSTER_NAME argument is the cluster name to execute the notebook on. It
  must already be present in the config file.

Options:
  -e, --environment TEXT  Environment path. Default: ~/.idact.conf or the
                          value of IDACT_CONFIG_PATH.
  --save-defaults         Save allocation parameters as defaults for next
                          time.
  --reset-defaults        Reset unspecified allocation parameters to defaults.
  --nodes INTEGER         Cluster node count. [Allocation parameter]. Jupyter
                          notebook will be deployed on the first node.
                          Default: 1.
  --cores INTEGER         CPU core count per node. [Allocation parameter].
                          Default: 1
  --memory-per-node TEXT  Memory per node. [Allocation parameter]. Default

For example, to deploy a notebook on a cluster with the same parameters as above, you could call:

```
python -m idact.notebook pro --save-defaults --environment notebooks/.idact-env --nodes 2 --cores 2 --memory-per-node 10GiB --walltime 0:20:00 --native-arg --partition plgrid-testing --native-arg --account intdata
```

The flag `--save-defaults` is optional, but it saves the allocation parameters: next time, the following will have the same effect:
```
python -m idact.notebook pro --environment notebooks/.idact-env
```
The `--environment` argument is optional if you use the default environment location.

The allocation and notebook the application deploys can be pulled from the cluster.

## Deploy Dask

One-time config step (cluster-specific):

In [40]:
cluster.config.setup_actions.dask = ['module load plgrid/tools/python-intel/3.6.2']
cluster.config.scratch = '$SCRATCH'
save_environment('.idact-env')

In [41]:
dd = deploy_dask(nodes)
dd

2018-11-11 23:44:58 INFO: Deploying Dask on 2 nodes.
2018-11-11 23:44:58 INFO: Connecting to p0654:37987 (1/2).
2018-11-11 23:44:58 INFO: Connecting to p0665:59822 (2/2).
2018-11-11 23:44:58 INFO: Deploying scheduler on the first node: p0654.
2018-11-11 23:45:17 INFO: Checking scheduler connectivity from p0654 (1/2).
2018-11-11 23:45:17 INFO: Checking scheduler connectivity from p0665 (2/2).
2018-11-11 23:45:17 INFO: Deploying workers.
2018-11-11 23:45:17 INFO: Deploying worker 1/2.
2018-11-11 23:45:34 INFO: Deploying worker 2/2.
2018-11-11 23:45:47 INFO: Validating worker 1/2.
2018-11-11 23:45:47 INFO: Validating worker 2/2.


DaskDeployment(scheduler=tcp://localhost:65509/tcp://172.20.66.144:59497, workers=2)

In [42]:
nodes[0].resources.memory_usage

GiB(0.3331871032714844)

Get Dask client:

In [43]:
client = dd.get_client()
client

0,1
Client  Scheduler: tcp://localhost:65509  Dashboard: http://localhost:45771/status,Cluster  Workers: 2  Cores: 4  Memory: 21.47 GB


In [44]:
nodes[0].resources.cpu_usage

7.0

Computation will work only if Python and library versions match:

In [45]:
#x = client.submit(lambda: value + 1, 10)
#x.result() == 11

Diagnostics servers are tunnelled:

In [46]:
dd.diagnostics.addresses

['http://localhost:45771', 'http://localhost:65523', 'http://localhost:65529']

To open diagnostics servers in new tabs:

In [47]:
dd.diagnostics.open_all()

In [48]:
dd.cancel()

2018-11-11 23:46:16 INFO: Cancelling worker deployment on p0665.
2018-11-11 23:46:23 INFO: Cancelling worker deployment on p0654.
2018-11-11 23:46:30 INFO: Cancelling scheduler deployment on p0654.


## Close

In [49]:
nodes.running()

True

In [50]:
nodes.cancel()

2018-11-11 23:46:38 INFO: Cancelling job 14132870.


In [51]:
nodes.running()

False

In [52]:
node.run('squeue')

'JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)'

## Push and pull the environment

When working on a cluster, it may be useful to synchronize idact config with the local machine. Pushing the environment will merge the local environment into the remote environment.

In [53]:
push_environment(cluster, path='~/.idact-notebook-env')

2018-11-11 23:46:53 INFO: Pushing the environment to cluster.
2018-11-11 23:46:55 ERROR: Failure: Getting file from node pro.cyfronet.pl: /net/people/plggarstka/.idact-notebook-env
2018-11-11 23:46:55 ERROR: Failure: Deserializing the environment from cluster.
2018-11-11 23:46:55 INFO: Remote environment is missing, current environment will be copied to cluster.


In [54]:
print(node.run('cat ~/.idact-notebook-env'))

{
    "clusters": {
        "pro": {
            "auth": "PUBLIC_KEY",
            "disableSshd": false,
            "host": "pro.cyfronet.pl",
            "installKey": true,
            "key": null,
            "notebookDefaults": {},
            "port": 22,
            "portInfoRetries": 5,
            "scratch": "$SCRATCH",
            "setupActions": {
                "dask": [
                    "module load plgrid/tools/python-intel/3.6.2"
                ],
                "jupyter": [
                    "module load plgrid/tools/python-intel/3.6.2"
                ]
            },
            "user": "plggarstka"
        }
    },
    "logLevel": 20
}


The reverse operation is pulling the environment, which merges the remote environment into the local environment. Machine-specific information like the private key path is skipped when pushing or pulling.

In [55]:
pull_environment(cluster, path='~/.idact-notebook-env')

2018-11-11 23:47:09 INFO: Pulling the environment from cluster.


The 'path' parameter is optional. It defaults to ~/.idact.conf, or the value of the remote IDACT_CONFIG_PATH environment variable.

In [56]:
node.run('rm -v ~/.idact-notebook-env')

'removed ‘/net/people/plggarstka/.idact-notebook-env’'

## Remove cluster

A cluster can be removed from the environment.

In [57]:
add_cluster(name='fake',
            user='fakeuser',
            host='fakehost',
            port=2222)

2018-11-11 23:47:15 INFO: No auth method specified, defaulting to password-based.


Cluster(fakehost, 2222, fakeuser, auth=AuthMethod.ASK, key=None, install_key=True, disable_sshd=False)

In [58]:
show_clusters()

{'pro': Cluster(pro.cyfronet.pl, 22, plggarstka, auth=AuthMethod.PUBLIC_KEY, key='E:\\shared\\uni\\eng-project\\notebooks\\../.notebook-ssh\\id_rsa_wu', install_key=False, disable_sshd=False),
 'fake': Cluster(fakehost, 2222, fakeuser, auth=AuthMethod.ASK, key=None, install_key=True, disable_sshd=False)}

In [59]:
remove_cluster('fake')

In [60]:
show_clusters()

{'pro': Cluster(pro.cyfronet.pl, 22, plggarstka, auth=AuthMethod.PUBLIC_KEY, key='E:\\shared\\uni\\eng-project\\notebooks\\../.notebook-ssh\\id_rsa_wu', install_key=False, disable_sshd=False)}