# Parsl Monitoring Demo

Parsl can be configured to capture fine grain monitoring information about workflows and resource usage. To enable monitoring you must add the monitoring hub to the configuration.

Note: in this example we set the resource monitoring interval to 3 seconds so that we can capture resource information from short running tasks. In practice you will likely use a longer interval.

In [None]:
import parsl
from parsl import python_app
from parsl.monitoring.monitoring import MonitoringHub
from parsl.config import Config
from parsl.executors import HighThroughputExecutor
from parsl.providers import LocalProvider
from parsl.addresses import address_by_hostname
import logging

In [None]:
config = Config(
   executors=[
       HighThroughputExecutor(
           label="local_htex",
           address='127.0.0.1',
           max_workers=4,
           provider=LocalProvider(
               init_blocks=1,
               max_blocks=1,
               worker_init='source activate parsl-issue',
           )
       )
   ],
   monitoring=MonitoringHub(
       hub_address=address_by_hostname(),
       hub_port=6553,
       resource_monitoring_interval=1,
   )
)

In [None]:
parsl.load(config)

@python_app
def inc(x):
    import time
    import random
    dur = random.randint(1,10)
    start = time.time()
    while True:
        if time.time() - start >= dur:
            break
        x += 1
    return x

@python_app
def slow_add(inputs=[]):
    import time
    import random
    time.sleep(random.randint(1,10))
    res = sum(inputs)
    return res

In [None]:
num_tasks = 4
futures_1 = [inc(i) for i in range(0, num_tasks)]
futures_2 = slow_add(inputs=futures_1)
futures_3 = [inc(futures_2) for _ in range(0, num_tasks)]
futures_4 = slow_add(inputs=futures_3)

print(futures_4.result())
print("Done")

In [None]:
!ls

In [None]:
import sqlite3
import pandas as pd

conn = sqlite3.connect('monitoring.db')

In [None]:
df_workflow = pd.read_sql_query('SELECT * from workflow', conn)
df_workflow.head()

In [None]:
run_id = df_workflow['run_id'].iloc[-1]
df_task = pd.read_sql_query('SELECT * from task where run_id="%s"' % run_id, conn)
df_task.head()

In [None]:
df_status = pd.read_sql_query('SELECT * from node where run_id="%s"' % run_id, conn)
df_status.head()

In [None]:
df_node = pd.read_sql_query('SELECT * from status where run_id="%s"' % run_id, conn)
df_node.head()

In [None]:
df_resource = pd.read_sql_query('SELECT * from resource where run_id="%s"' % run_id, conn)
df_resource.head()

In [None]:
conn.close()

In [1]:
!which parsl-visualize

which: no parsl-visualize in (/software/python-2.7.12-nompi-el7-x86_64/bin:/home/zhuozhao/cctools/bin:/home/zhuozhao/google-cloud-sdk/bin:/software/slurm-current-el7-x86_64/bin:/bin:/home/zhuozhao/cctools/bin:/home/zhuozhao/google-cloud-sdk/bin:/software/postgresql-10.6-el7-x86_64/bin:/software/python-2.7.12-nompi-el7-x86_64/bin:/software/Anaconda3-5.3.0-el7-x86_64/bin:/bin:/software/git-2.10-el7-x86_64/bin:/software/subversion-1.9.4-el7-x86_64/bin:/software/bin:/srv/adm/bin:/usr/lib64/qt-3.3/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/usr/lpp/mmfs/bin:/home/zhuozhao/bin)
