In [214]:
# Full code and examples can be found here: https://github.com/flux-framework/flux-workflow-examples.git

import json
import os
import re
import sys
import flux
from flux.job import JobspecV1

In [None]:
# To start a Flux session with 4 brokers on the local node
# Please stop this cell manually after you see output similar to
# "]0;ecpuser@ip-172-31-59-124: ~ecpuser@ip-172-31-59-124:~$" 
# in order to proceed to the next cell.

!flux start --size=4

]0;ecpuser@ip-172-31-59-124: ~[01;32mecpuser@ip-172-31-59-124[00m:[01;34m~[00m$ 

In [189]:
# To view the total resources available to the current instance
!flux hwloc info

4 Machines, 4 Cores, 8 PUs


In [190]:
# To view status of resources
!flux resource status
# To view scheduler's perspective on resources (allocated, free, etc
!flux resource list
# To view the queue
!flux queue status -v

    STATUS NNODES RANKS           NODELIST
     avail      4 0-3             ip-172-31-59-[124,124,124,124]
     STATE NNODES   NCORES    NGPUS NODELIST
      free      4        4        0 ip-172-31-59-[124,124,124,124]
 allocated      0        0        0 
      down      0        0        0 
flux-queue: Job submission is enabled
flux-queue: Scheduling is enabled
flux-queue: 0 alloc requests queued
flux-queue: 0 alloc requests pending to scheduler
flux-queue: 0 free requests pending to scheduler
flux-queue: 0 running jobs


In [191]:
# The size, current rank, comms URIs, logging levels, 
# as well as other instance parameters are termed “attributes” 
# and can be viewed and manipulated with the lsattr, getattr, and setattr commands, for example.
!flux getattr rank
!flux getattr size
!flux getattr local-uri
!flux lsattr -v

0
4
local:///tmp/flux-262769-fhXK0A/0/local
broker.pid                              262770
broker.quorum                           0-3
broker.rc1_path                         /usr/local/etc/flux/rc1
broker.rc3_path                         /usr/local/etc/flux/rc3
broker.rundir                           /tmp/flux-262769-fhXK0A/0
conf.connector_path                     /usr/local/lib/flux/connectors
conf.exec_path                          /usr/local/libexec/flux/cmd
conf.module_path                        /usr/local/lib/flux/modules
conf.pmi_library_path                   /usr/local/lib/flux/libpmi.so
conf.shell_initrc                       /usr/local/etc/flux/shell/initrc.lua
conf.shell_pluginpath                   /usr/local/lib/flux/shell/plugins
config.path                             -
content.acct-dirty                      0
content.acct-entries                    49209
content.acct-size                       16771952
content.acct-valid                      49209
content.backing-mo

In [192]:
# Services within a Flux session are implemented by modules. 
# To query and manage broker modules, use flux module
!flux module list

Module                       Size Digest  Idle  S Service
kvs                       1848984 E1DBB40 idle  R 
job-manager               1742384 787395B   11  R 
cron                      1417024 7DC39EF idle  R 
job-exec                  1523984 8C4FF58 idle  R 
sched-fluxion-qmanager    6368144 359C846   12  R sched
kvs-watch                 1542048 0FBE270 idle  R 
job-list                  1720976 0E49D20 idle  R 
connector-local           1311960 43C0C1F    0  R 
sched-fluxion-resource   31140360 FE3A64C   12  R 
job-info                  1649272 54F3340 idle  R 
job-ingest                1462032 01130CA idle  R 
barrier                   1321776 157B0EA idle  R 
content-sqlite            1340296 02AB110 idle  R content-backing,kvs-checkpoint
resource                  1714600 19E52CC   13  R 
heartbeat                 1301248 8F9B7A3    0  R 


In [193]:
# The key-value store (kvs) is a core component of a Flux instance. 
# The flux kvs command provides a utility to list and manipulate values of the KVS. 

!flux kvs ls 
!flux kvs ls resource
!flux kvs get resource.R

job         resource
R           eventlog
{"version": 1, "execution": {"R_lite": [{"rank": "0-3", "children": {"core": "0"}}], "starttime": 0.0, "expiration": 0.0, "nodelist": ["ip-172-31-59-[124,124,124,124]"]}}


In [194]:
#Flux exec or mini can be used to launch jobs. Exec is primarily used for test purposes. 
!flux exec flux getattr rank
#Flux mini provides severa l options, such as run, submit, bulksubmit and batch
# !flux help mini
!flux mini run -n4 --label-io hostname

0
1
2
3
2: ip-172-31-59-124
3: ip-172-31-59-124
1: ip-172-31-59-124
0: ip-172-31-59-124


In [195]:
#Partitioned as well as overalapping submission is supported with Flux. 
# For instance, in a 4 node allocation, multiple jobs can be submitted as follows in a non-overlapping way. 
!flux mini submit --nodes=2 --ntasks=2 --cores-per-task=1 ./flux-workflow-examples/job-submit-cli/compute.lua 30
!flux mini submit --nodes=1 --ntasks=1 --cores-per-task=1 ./flux-workflow-examples/job-submit-cli/io-forwarding.lua 30

ƒ29v4KWfkb
ƒ29vNeVnoq


In [196]:
!flux jobs
!flux resource list

       JOBID USER     NAME       ST NTASKS NNODES  RUNTIME NODELIST
  ƒ29vNeVnoq ecpuser  io-forward  R      1      1   5.543s ip-172-31-59-124
  ƒ29v4KWfkb ecpuser  compute.lu  R      2      2   6.256s ip-172-31-59-[124,124]
     STATE NNODES   NCORES    NGPUS NODELIST
      free      1        1        0 ip-172-31-59-124
 allocated      3        3        0 ip-172-31-59-[124,124,124]
      down      0        0        0 


In [197]:
!flux jobs

# Copy the Job ID here to examine the job's resources.
!flux job info <job_id> R

       JOBID USER     NAME       ST NTASKS NNODES  RUNTIME NODELIST
/bin/bash: job_id: No such file or directory


In [198]:
#For interactive submissions, we can use the flux mini batch. 
# Internally, Flux will create a nested Flux instance allocated to the requested resources 
# per batch job and run the batch script inside that nested instance. 
# While a batch script is expected to launch parallel jobs using flux mini run 
# or flux mini submit at this level, nothing prevents the script from further batching 
# other sub-batch-jobs using the flux mini batch interface, if desired.

``` bash 
    !/bin/bash
  
    echo "Starting my batch job"
    echo "Print the resources allocated to this batch job"
    flux hwloc info

    echo "Use sleep to emulate a parallel program"
    echo "Run the program at a total of 2 processes each requiring"
    echo "1 core. These processes are equally spread across 2 nodes."
    flux mini run -N 2 -n 2 sleep 30
    flux mini run -N 2 -n 2 sleep 30
```

In [202]:
!flux mini batch --nslots=2 --cores-per-slot=1 --nodes=2 ./sleep_batch.sh
!flux mini batch --nslots=2 --cores-per-slot=1 --nodes=2 ./sleep_batch.sh

ƒ2CEdvKPGb
ƒ2CEzEwYJj


In [205]:
!flux jobs

# Copy the Job ID here to examine the job's resources.
!flux job info <job_id> R

       JOBID USER     NAME       ST NTASKS NNODES  RUNTIME NODELIST
  ƒ2CEdvKPGb ecpuser  sleep_batc PD      2      -        - -
  ƒ2CEzEwYJj ecpuser  sleep_batc PD      2      -        - -
  ƒ2BFzPtzsq ecpuser  sleep_batc  R      2      2   2.448m ip-172-31-59-[124,124]
  ƒ2BFbmqxUf ecpuser  sleep_batc  R      2      2   2.463m ip-172-31-59-[124,124]
/bin/bash: job_id: No such file or directory


In [206]:
# Job Submit API can be used to submit jobs through python. 
# The following script shows this with the "flux.job.submit ()" call

f = flux.Flux(url="local:///tmp/flux-demo/0/local")
compute_jreq = JobspecV1.from_command(command=["./flux-workflow-examples/job-submit-api/compute.py", "120"], num_tasks=1, num_nodes=1, cores_per_task=1)
compute_jreq.cwd = os.getcwd()
compute_jreq.environment = dict(os.environ)
print(flux.job.submit(f,compute_jreq))

16327499688443904


In [207]:
!flux jobs

       JOBID USER     NAME       ST NTASKS NNODES  RUNTIME NODELIST
  ƒ2CEdvKPGb ecpuser  sleep_batc PD      2      -        - -
  ƒ2CEzEwYJj ecpuser  sleep_batc PD      2      -        - -
  ƒ2BFzPtzsq ecpuser  sleep_batc  R      2      2   2.605m ip-172-31-59-[124,124]
  ƒ2BFbmqxUf ecpuser  sleep_batc  R      2      2    2.62m ip-172-31-59-[124,124]


In [208]:
f = flux.Flux(url="local:///tmp/flux-demo/0/local")

compute_jobreq = JobspecV1.from_command(
    command=["./flux-workflow-examples/job-submit-api/compute.py", "120"], num_tasks=4, num_nodes=2, cores_per_task=2
)
compute_jobreq.cwd = os.getcwd()
compute_jobreq.environment = dict(os.environ)
print(flux.job.submit(f, compute_jobreq))

io_jobreq = JobspecV1.from_command(
    command=["./flux-workflow-examples/job-submit-api/io-forwarding.py", "120"], num_tasks=1, num_nodes=1, cores_per_task=1
)
io_jobreq.cwd = os.getcwd()
io_jobreq.environment = dict(os.environ)
print(flux.job.submit(f, io_jobreq))

16327963595243520
16327963863678976


In [None]:
!flux jobs

In [None]:
# We can use the FluxExecutor class to submit large numbers of jobs to Flux. 
# This can be done with usage of futures with python, similar to the snippet below.

``` python 
with FluxExecutor() as executor:
        compute_jobspec = JobspecV1.from_command(args.command)
        futures = [executor.submit(compute_jobspec) for _ in range(args.njobs)]
        # wait for the jobid for each job, as a proxy for the job being submitted
        for fut in futures:
            fut.jobid()
        # all jobs submitted - print timings
```

In [None]:
# Submit a FluxExecutor based script.
%run ./flux-workflow-examples/async-bulk-job-submit/bulksubmit_executor.py -n200 /bin/sleep 0

bulksubmit_executor: submitted 200 jobs in 0.72s. 279.43job/s
