# idact - Prometheus sandbox

## Initial setup

Add `idact` to path:

In [1]:
import sys
import os
import bitmath
import getpass
import contextlib
import fabric
import logging
from pprint import pprint

def append_idact_path():
    idact_path = os.path.realpath(os.path.join(os.getcwd(), '../'))
    sys.path.append(idact_path)
append_idact_path()

from idact import *
from idact.detail.auth.set_password import set_password

os.environ['IDACT_KEY_LOCATION'] = os.path.join(os.getcwd(), '../.notebook-ssh')
try:
    os.mkdir(os.environ['IDACT_KEY_LOCATION'])
except OSError as e:
    print(e)

USER = 'plggarstka'

[WinError 183] Cannot create a file when that file already exists: 'E:\\shared\\uni\\eng-project\\notebooks\\../.notebook-ssh'


Hide debug information, setup context manager stack (for testing purposes)

## Add cluster (only first run)

In [2]:
cluster = add_cluster(name="pro",
                      user=USER,
                      host="pro.cyfronet.pl",
                      port=22,
                      auth=AuthMethod.PUBLIC_KEY,
                      key=KeyType.RSA,
                      install_key=True,
                      scratch="$SCRATCH")
save_environment('.idact-env')

2018-09-01 02:54:53 INFO: Generating public-private key pair.


## Load cluster (subsequent runs)

In [3]:
load_environment('.idact-env')
cluster = show_cluster("pro")
cluster

Cluster(pro.cyfronet.pl, 22, plggarstka, auth=AuthMethod.PUBLIC_KEY, key='E:\\shared\\uni\\eng-project\\notebooks\\../.notebook-ssh\\id_rsa_no', install_key=True, disable_sshd=False)

In [4]:
set_log_level(logging.INFO)
#set_log_level(logging.DEBUG)
save_environment('.idact-env')

In [5]:
node = cluster.get_access_node()
node

Node(pro.cyfronet.pl:22, None)

In [6]:
node.run('whoami')

Password for plggarstka@pro.cyfronet.pl:22: 


'plggarstka'

In [7]:
node.run('hostname')

'login01.pro.cyfronet.pl'

## Allocate nodes

In [8]:
nodes = cluster.allocate_nodes(nodes=2,
                               cores=2,
                               memory_per_node=bitmath.GiB(10),
                               walltime=Walltime(minutes=20),
                               native_args={
                                   '--partition': 'plgrid-testing',
                                   '--account': 'intdata'
                               })

Node(pro.cyfronet.pl:22, None)


In [9]:
nodes

Nodes([Node(NotAllocated),Node(NotAllocated)])

In [10]:
nodes.wait()
nodes

Nodes([Node(p2225:55446, 2018-09-01 01:15:14.763337+00:00),Node(p2226:49771, 2018-09-01 01:15:14.763337+00:00)])

## Run commands

In [11]:
nodes[0].run('whoami')

'plggarstka'

In [12]:
nodes[0].run('hostname')

'p2225'

In [13]:
nodes[1].run('squeue')

'JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)\n          12752942 plgrid-te     wrap plggarst  R       0:12      2 p[2225-2226]'

In [14]:
nodes[1].run('hostname')

'p2226'

## Tunnel

In [15]:
tunnel = nodes[0].tunnel(here=9000, there=10000)

In [16]:
tunnel

MultiHopTunnel(9000:10000)

In [17]:
tunnel.close()

## Deploy notebook

One-time config step (cluster-specific):

In [18]:
cluster.config.setup_actions.jupyter = ['module load plgrid/tools/python-intel/3.6.2']
save_environment('.idact-env')

To run Jupyter Notebook on the cluster:

In [19]:
nb = nodes[0].deploy_notebook(local_port=8080)
nb



JupyterDeployment(8080 -> Node(p2225:55446, 2018-09-01 01:15:14.763337+00:00))

In [20]:
nb.local_port

8080

To open the deployed notebook server in a new tab:

In [21]:
nb.open_in_browser()

In [22]:
nb.cancel()

## Deploy Dask

One-time config step (cluster-specific):

In [23]:
cluster.config.setup_actions.dask = ['module load plgrid/tools/python-intel/3.6.2']
cluster.config.scratch = '$SCRATCH'
save_environment('.idact-env')

In [24]:
dd = deploy_dask(nodes)
dd

2018-09-01 02:56:06 INFO: Deploying scheduler...
2018-09-01 02:56:22 INFO: Deploying workers...
2018-09-01 02:56:22 INFO: Deploying worker 1/2...
2018-09-01 02:56:38 INFO: Deploying worker 2/2...


DaskDeployment(scheduler=tcp://localhost:52710/tcp://172.20.72.185:37419, workers=2)

Get Dask client:

In [25]:
client = dd.get_client()
client

0,1
Client  Scheduler: tcp://localhost:52710  Dashboard: http://localhost:47644/status,Cluster  Workers: 1  Cores: 2  Memory: 10.74 GB


Computation will work only if Python and library versions match:

In [26]:
#x = client.submit(lambda: value + 1, 10)
#x.result() == 11

Diagnostics servers are tunnelled:

In [27]:
dd.diagnostics.addresses

['http://localhost:47644', 'http://localhost:52722', 'http://localhost:52728']

To open diagnostics servers in new tabs:

In [None]:
dd.diagnostics.open_all()

In [29]:
dd.cancel()

## Close

In [30]:
nodes.running()

True

In [31]:
nodes.cancel()

In [32]:
nodes.running()

False

In [34]:
node.run('squeue')

'JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)'

## Push and pull the environment

When working on a cluster, it may be useful to synchronize idact config with the local machine. Pushing the environment will merge the local environment into the remote environment.

In [35]:
push_environment(cluster, path='~/.idact-notebook-env')

2018-09-01 02:58:30 INFO: Remote environment is missing, current environment will be copied to cluster.


In [36]:
print(node.run('cat ~/.idact-notebook-env'))

{
    "clusters": {
        "pro": {
            "auth": "PUBLIC_KEY",
            "disableSshd": false,
            "host": "pro.cyfronet.pl",
            "installKey": true,
            "key": null,
            "port": 22,
            "scratch": "$SCRATCH",
            "setupActions": {
                "dask": [
                    "module load plgrid/tools/python-intel/3.6.2"
                ],
                "jupyter": [
                    "module load plgrid/tools/python-intel/3.6.2"
                ]
            },
            "user": "plggarstka"
        }
    },
    "logLevel": 20
}


The reverse operation is pulling the environment, which merges the remote environment into the local environment. Machine-specific information like the private key path is skipped when pushing or pulling.

In [37]:
pull_environment(cluster, path='~/.idact-notebook-env')

The 'path' parameter is optional. It defaults to ~/.idact.conf, or the value of the remote IDACT_CONFIG_PATH environment variable.

In [38]:
node.run('rm -v ~/.idact-notebook-env')

'removed ‘/net/people/plggarstka/.idact-notebook-env’'

## Remove cluster

A cluster can be removed from the environment.

In [39]:
add_cluster(name='fake',
            user='fakeuser',
            host='fakehost',
            port=2222)

2018-09-01 02:58:52 INFO: No auth method specified, defaulting to password-based.


Cluster(fakehost, 2222, fakeuser, auth=AuthMethod.ASK, key=None, install_key=True, disable_sshd=False)

In [40]:
show_clusters()

{'pro': Cluster(pro.cyfronet.pl, 22, plggarstka, auth=AuthMethod.PUBLIC_KEY, key='E:\\shared\\uni\\eng-project\\notebooks\\../.notebook-ssh\\id_rsa_no', install_key=False, disable_sshd=False),
 'fake': Cluster(fakehost, 2222, fakeuser, auth=AuthMethod.ASK, key=None, install_key=True, disable_sshd=False)}

In [41]:
remove_cluster('fake')

In [42]:
show_clusters()

{'pro': Cluster(pro.cyfronet.pl, 22, plggarstka, auth=AuthMethod.PUBLIC_KEY, key='E:\\shared\\uni\\eng-project\\notebooks\\../.notebook-ssh\\id_rsa_no', install_key=False, disable_sshd=False)}