In [1]:
!mkdir -p ~/agave/funwave-tvd-docker

%cd ~/agave

!pip3 install setvar

import re
import os
import sys
from setvar import *
from time import sleep

# This cell enables inline plotting in the notebook
%matplotlib inline

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
loadvar()
!auth-tokens-refresh

/home/jovyan/agave
CMD=jobs-output-get 6221185708420501991-242ac11b-0001-007 fork-command-1.out
INPUTS={}
JOB_FILE=job-remote-8787.txt
JOB_ID=6221185708420501991-242ac11b-0001-007
OUTPUT=Successfully submitted job 6221185708420501991-242ac11b-0001-007
REMOTE_COMMAND=lscpu
REQUESTBIN_URL=https://requestbin.agaveapi.co/1gcoe8j1
STAT=FINISHED
VM_IPADDRESS=52.15.194.28
[1;0mToken for agave.prod:stevenrbrandt successfully refreshed and cached for 14400 seconds
6673cf576d19fa38c68a4324cb6721e[0m


<h2>Creating the Docker Image</h2>
To start, we need a Dockerfile, which has a number of simple commands.
It starts with "FROM" which can specify any docker image available from DockerHub. That not only includes basic operating systems such as "ubunto", "fedora", "centos", etc. but specialized containers made by anyone with a dockerhub account. I've provided "science-base" which has OpenMPI 2.1.1 and some standard compilers, i.e. gfortran, gcc, and g++.

MAINTAINER is a bit of metadata that (hopefully) will allow you to contact the container's creator, if need be.

WORKDIR is the dockerfile equivalent of the "cd" command. Note that running "cd" will not change your directory.

RUN simply runs the command that follows. Because the container is saved after each step, we want to avoid creating files that we don't want to keep (we want containers to be as small as possible).

USER specifies the user id for running subsequent RUN commands.

COPY can be used to copy files into the container from the build directory.

ENTRYPOINT is a script that runs when the container starts up. What our script does is create a new user on the docker image with a user id and name that is convenient.

In [2]:
writefile("funwave-tvd-docker/Dockerfile","""
FROM stevenrbrandt/science-base
USER root
RUN mkdir -p /home/install
RUN chown jovyan /home/install
USER jovyan

MAINTAINER Steven R. Brandt <sbrandt@cct.lsu.edu>
RUN cd /home/install && \
    git clone https://github.com/fengyanshi/FUNWAVE-TVD && \
    cd FUNWAVE-TVD/src && \
    perl -p -i -e 's/FLAG_8 = -DCOUPLING/#$&/' Makefile && \
    make

WORKDIR /home/install/FUNWAVE-TVD/src
RUN mkdir -p /home/jovyan/rundir
WORKDIR /home/jovyan/rundir
""")

Writing file `funwave-tvd-docker/Dockerfile'


Now that we've create our Dockerfile and entrypoint.sh, bundle them up in a tarball and send them somewhere that agave can access them.

In [3]:
!tar -czf dockerjob.tgz -C funwave-tvd-docker Dockerfile
!files-mkdir -S ${AGAVE_STORAGE_SYSTEM_ID} -N funwave-tvd-docker
!files-upload -F dockerjob.tgz -S ${AGAVE_STORAGE_SYSTEM_ID} funwave-tvd-docker/

[1;0mSuccessfully created folder funwave-tvd-docker[0m
Uploading dockerjob.tgz...
######################################################################## 100.0%


In [4]:
import runagavecmd as r
import imp
imp.reload(r)

<module 'runagavecmd' from '/home/jovyan/agave/runagavecmd.py'>

Run the docker build command. We will "tag" this build with the name "funwave-tvd" when it is complete.

In [5]:
r.runagavecmd(
    "tar xzf dockerjob.tgz && sudo docker build --rm -t funwave-tvd-2 .",
    "agave://${AGAVE_STORAGE_SYSTEM_ID}/funwave-tvd-docker/dockerjob.tgz"
)

REMOTE_COMMAND=tar xzf dockerjob.tgz && sudo docker build --rm -t funwave-tvd-2 .
REQUESTBIN_URL=https://requestbin.agaveapi.co/17yxi9o1

 ** QUERY STRING FOR REQUESTBIN **
https://requestbin.agaveapi.co/17yxi9o1?inspect

INPUTS={"datafile":"agave://nectar-storage-stevenrbrandt/funwave-tvd-docker/dockerjob.tgz"}
JOB_FILE=job-remote-19335.txt
Writing file `job-remote-19335.txt'
OUTPUT=Successfully submitted job 4976813210810969625-242ac11b-0001-007
JOB_ID=4976813210810969625-242ac11b-0001-007
STAT=PENDING
STAT=PENDING
STAT=PROCESSING_INPUTS
STAT=STAGING_INPUTS
STAT=STAGED
STAT=STAGED
STAT=SUBMITTING
STAT=SUBMITTING
STAT=SUBMITTING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=FINISHED
CMD=jobs-output-get 4976813210810969625-242ac11b-0001-007 fork-command-1.out
All done! Output follows.
Reading file `fork-comma

In [6]:
!jobs-output-get ${JOB_ID} fork-command-1.err
!cat fork-command-1.err




<h2>Running the Docker Image</h2>
It is possible to run docker interactively, but that isn't convenient inside scripts. So instead, we start it in detached mode, with the -d flag.

Because your docker image has its own internal file system, it can't see files on the host machine. You can, however, transfer them using the "docker cp" command.

Running docker is slightly tricky. When a Docker image starts up, you can execute any command you want--but when you type "exit" all the changes you've made to the file system vanish. Therefore it's necessary to copy them out before the docker container stops.

In [7]:
writefile("rundock.sh","""
rm -fr cid.txt out.tgz

# Start a docker image running in detached mode, write the container id to cid.txt
sudo docker run -d -it --rm --cidfile cid.txt funwave-tvd-2 bash

# Store the container id in CID for convenience
CID=\$(cat cid.txt)

# Copy the input.txt file into the running image
sudo docker cp input.txt \$CID:/home/jovyan/rundir/

# Run funwave on the image
sudo docker exec --user jovyan \$CID mpirun -np 4 /home/install/FUNWAVE-TVD/src/funwave_vessel

# Extract the output files from the running image
# Having them in a tgz makes it more convenient to fetch them with jobs-output-get
sudo docker exec --user jovyan \$CID tar czf - output > out.tgz

# Stop the image
sudo docker stop \$CID

# List the output files
tar tzf out.tgz
""")

Writing file `rundock.sh'


Upload the input.txt file and the rundock.sh script.

In [8]:
!tar czf rundock.tgz rundock.sh input.txt
!files-upload -F rundock.tgz -S ${AGAVE_STORAGE_SYSTEM_ID} funwave-tvd-docker/

Uploading rundock.tgz...
######################################################################## 100.0%


Execute the rundock.sh script

In [9]:
r.runagavecmd(
    "tar xzf rundock.tgz && bash rundock.sh",
    "agave://${AGAVE_STORAGE_SYSTEM_ID}/funwave-tvd-docker/rundock.tgz")

REMOTE_COMMAND=tar xzf rundock.tgz && bash rundock.sh
REQUESTBIN_URL=https://requestbin.agaveapi.co/1b5ygym1

 ** QUERY STRING FOR REQUESTBIN **
https://requestbin.agaveapi.co/1b5ygym1?inspect

INPUTS={"datafile":"agave://nectar-storage-stevenrbrandt/funwave-tvd-docker/rundock.tgz"}
JOB_FILE=job-remote-19335.txt
Writing file `job-remote-19335.txt'
OUTPUT=Successfully submitted job 1982451147916775911-242ac11b-0001-007
JOB_ID=1982451147916775911-242ac11b-0001-007
STAT=PENDING
STAT=PENDING
STAT=PENDING
STAT=PROCESSING_INPUTS
STAT=STAGING_INPUTS
STAT=STAGED
STAT=STAGED
STAT=SUBMITTING
STAT=SUBMITTING
STAT=SUBMITTING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=FINISHED
CMD=jobs-output-get 1982451147916775911-242ac11b-0001-007 fork-command-1.out
All done! Output follows.
Reading file `fork-command-1.out'
de4cf8203ac6f33620fe179721bd540d6c1440fdd99acdd6a18e70085416e085
 BATHY_CORRECTION DOES NOT EXIST. USE DEFAULT 

Get the output of the job back to our local machine

In [10]:
!jobs-output-list ${JOB_ID}
!jobs-output-get ${JOB_ID} out.tgz
!tar xzf out.tgz

[1;0m.agave.archive
.agave.log
cid.txt
fork-command-1.err
fork-command-1.ipcexe
fork-command-1.out
fork-command-1.pid
fork-test.txt
fork-wrapper.txt
input.txt
out.tgz
rundock.sh
rundock.tgz[0m
######################################################################## 100.0%############################                                   55.2%


In [11]:
!head output/eta_00010

   -0.118542E-05   -0.138177E-05   -0.783976E-06   -0.861506E-06   -0.117859E-05   -0.107787E-05   -0.448627E-06    0.278551E-06    0.124279E-05    0.202840E-05    0.268012E-05    0.259127E-05    0.203988E-05    0.957982E-06    0.156101E-08   -0.576096E-06   -0.282764E-06    0.917940E-07    0.761391E-06    0.191816E-05    0.299091E-05    0.343208E-05    0.371947E-05    0.403985E-05    0.374277E-05    0.307659E-05    0.207817E-05    0.154556E-05    0.222579E-05    0.370476E-05    0.602939E-05    0.785637E-05    0.861384E-05    0.872056E-05    0.778165E-05    0.688376E-05    0.742257E-05    0.969570E-05    0.125245E-04    0.146335E-04    0.160979E-04    0.175916E-04    0.196113E-04    0.217679E-04    0.240357E-04    0.264326E-04    0.293413E-04    0.330053E-04    0.369342E-04    0.412014E-04    0.456433E-04    0.502850E-04    0.552674E-04    0.609594E-04    0.679434E-04    0.762600E-04    0.854564E-04    0.953497E-04    0.105646E-03    0.116700E-03    0.128782E-03    0.142463E-03    0.15

<h2>Running with Singularity</h2>
If we have a public docker image, we can run it directly with Singularity. Singularity is desiged to be more HPC friendly than Docker. First, because it doesn't all the running user to access any user id but their own inside the container, and second, because singularity images can be run through MPI, making it easier to scale up to a distributed cluser.

In this first step, we build the singularity installation. Because the result of this job is intended to be an installation for subsequent jobs, we install it to a hard-coded directory rather than using the normal Agave job directory.

In [12]:
!files-mkdir -S ${AGAVE_STORAGE_SYSTEM_ID} -N sing
!files-upload -F input.txt -S ${AGAVE_STORAGE_SYSTEM_ID} sing/
r.runagavecmd(
            "mkdir -p ~/singu && "+
            "cd ~/singu && "+
            "rm -f funwave-tvd.img && "+
            "singularity create funwave-tvd.img --size 2000 && "+
            "singularity import funwave-tvd.img docker://stevenrbrandt/funwave-tvd-2:latest")

[1;0mSuccessfully created folder sing[0m
Uploading input.txt...
######################################################################## 100.0%
REMOTE_COMMAND=mkdir -p ~/singu && cd ~/singu && rm -f funwave-tvd.img && singularity create funwave-tvd.img --size 2000 && singularity import funwave-tvd.img docker://stevenrbrandt/funwave-tvd-2:latest
REQUESTBIN_URL=https://requestbin.agaveapi.co/1bbyred1

 ** QUERY STRING FOR REQUESTBIN **
https://requestbin.agaveapi.co/1bbyred1?inspect

INPUTS={}
JOB_FILE=job-remote-19335.txt
Writing file `job-remote-19335.txt'
OUTPUT=Successfully submitted job 7217571934888455705-242ac11b-0001-007
JOB_ID=7217571934888455705-242ac11b-0001-007
STAT=PENDING
STAT=PENDING
STAT=STAGED
STAT=STAGED
STAT=SUBMITTING
STAT=SUBMITTING
STAT=SUBMITTING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING

Now that the Singularity image is built, we can run it with mpi. Notice that mpi executes the singularity command. The tricky part here is to make sure you've got the same version of mpi running inside and outside the container.

In [13]:
!files-upload -F input.txt -S ${AGAVE_STORAGE_SYSTEM_ID} ./
r.runagavecmd(
    "export LD_LIBRARY_PATH=/usr/local/lib && "+
    "mpirun -np 4 singularity exec ~/singu/funwave-tvd.img /home/install/FUNWAVE-TVD/src/funwave_vessel && "+
    "tar cvzf singout.tgz output",
    "agave://${AGAVE_STORAGE_SYSTEM_ID}/input.txt"
)

Uploading input.txt...
######################################################################## 100.0%
REMOTE_COMMAND=export LD_LIBRARY_PATH=/usr/local/lib && mpirun -np 4 singularity exec ~/singu/funwave-tvd.img /home/install/FUNWAVE-TVD/src/funwave_vessel && tar cvzf singout.tgz output
REQUESTBIN_URL=https://requestbin.agaveapi.co/1c1lhq41

 ** QUERY STRING FOR REQUESTBIN **
https://requestbin.agaveapi.co/1c1lhq41?inspect

INPUTS={"datafile":"agave://nectar-storage-stevenrbrandt/input.txt"}
JOB_FILE=job-remote-19335.txt
Writing file `job-remote-19335.txt'
OUTPUT=Successfully submitted job 6123288553621941785-242ac11b-0001-007
JOB_ID=6123288553621941785-242ac11b-0001-007
STAT=PENDING
STAT=PENDING
STAT=PENDING
STAT=STAGING_INPUTS
STAT=STAGED
STAT=STAGED
STAT=SUBMITTING
STAT=SUBMITTING
STAT=SUBMITTING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=FINISHED
CMD=jobs-output-get 61232885536

In [14]:
!jobs-output-get ${JOB_ID} singout.tgz
!rm -fr output
!tar xzf singout.tgz

######################################################################## 100.0%                                                     5.7%                                        47.1%


In [15]:
!head output/v_00010

   -0.636563E-07   -0.330850E-07   -0.296523E-07   -0.691184E-07   -0.120820E-06   -0.124831E-06   -0.689702E-07    0.937098E-08    0.201210E-07   -0.284433E-07   -0.814082E-07   -0.881270E-07   -0.844808E-07   -0.794556E-07   -0.134686E-06   -0.175823E-06   -0.150980E-06   -0.480652E-07    0.229902E-07   -0.385864E-08   -0.836134E-07   -0.898827E-07   -0.653383E-07   -0.846806E-07   -0.131815E-06   -0.146338E-06   -0.123343E-06   -0.994128E-07   -0.296112E-07   -0.196120E-07   -0.776221E-07   -0.115648E-06   -0.653215E-07    0.212359E-07   -0.265608E-07   -0.876686E-07   -0.117301E-06   -0.169011E-06   -0.194777E-06   -0.134258E-06   -0.103027E-06   -0.118780E-06   -0.142776E-06   -0.116136E-06   -0.641824E-07   -0.986682E-07   -0.175108E-06   -0.211580E-06   -0.187794E-06   -0.168825E-06   -0.146359E-06   -0.140036E-06   -0.174069E-06   -0.235798E-06   -0.285793E-06   -0.294652E-06   -0.286607E-06   -0.267424E-06   -0.310425E-06   -0.378203E-06   -0.452576E-06   -0.519177E-06   -0.56

   -0.327707E-07    0.111215E-06    0.191404E-06    0.130427E-06    0.127739E-08   -0.140447E-06   -0.245134E-06   -0.272102E-06   -0.188938E-06   -0.822897E-08    0.175375E-06    0.236239E-06    0.175816E-06    0.419505E-07   -0.127139E-06   -0.269253E-06   -0.350077E-06   -0.377451E-06   -0.292173E-06   -0.147818E-06    0.263987E-07    0.208273E-06    0.329736E-06    0.361294E-06    0.286752E-06    0.130794E-06   -0.243166E-07   -0.149705E-06   -0.237911E-06   -0.264824E-06   -0.211172E-06   -0.204120E-06   -0.261045E-06   -0.324477E-06   -0.452198E-06   -0.518700E-06   -0.404472E-06   -0.164448E-06    0.388902E-07    0.177833E-06    0.189487E-06    0.402058E-07   -0.230468E-06   -0.513054E-06   -0.722585E-06   -0.843241E-06   -0.822762E-06   -0.707150E-06   -0.582091E-06   -0.558202E-06   -0.676475E-06   -0.920445E-06   -0.122279E-05   -0.150280E-05   -0.176551E-05   -0.199398E-05   -0.217761E-05   -0.232939E-05   -0.242210E-05   -0.257512E-05   -0.281831E-05   -0.318940E-05   -0.36

In this next and final singularity example, we get around the problem of needing to port MPI by using the same MPI that's in the container to launch the containers. The trick is this code, which comes at the end of the .bashrc. What it does is effectively replace our shell on the machine with an execution of bash inside the singularity image.

<pre>
# Put the full path to a singularity image in the file $HOME/sing.txt.
if [ -r $HOME/work/sing.txt ]
then
    IMAGE=$(cat $HOME/work/sing.txt)
fi
if [ "$IMAGE" != "" ]
then
    if [ -r "$IMAGE" ]
    then
        # If the SINGULARITY_CONTAINER variable is set,
        # then we are already in the container
        if [ "$SINGULARITY_CONTAINER" = "" ]
        then
            # Switch to running inside singularity
            exec singularity exec $IMAGE bash --login
        fi
    else
        echo Could not read image file $IMAGE
    fi
fi
</pre>

In [16]:
!echo /home/jovyan/singu/funwave-tvd.img > ~/work/sing.txt

There's no need to call singularity explicitly, as it's called by each invocation of .bashrc. Note that the funwave-tvd we are executing is the one from inside the image.

In [17]:
!files-upload -F input.txt -S ${AGAVE_STORAGE_SYSTEM_ID} ./
r.runagavecmd(
    "rm -fr output && "+
    "mpirun -np 4 /home/install/FUNWAVE-TVD/src/funwave_vessel && "+
    "tar cvzf singout.tgz output",
    "agave://${AGAVE_STORAGE_SYSTEM_ID}/input.txt"
)

Uploading input.txt...
######################################################################## 100.0%
REMOTE_COMMAND=rm -fr output && mpirun -np 4 /home/install/FUNWAVE-TVD/src/funwave_vessel && tar cvzf singout.tgz output
REQUESTBIN_URL=https://requestbin.agaveapi.co/1ghh12i1

 ** QUERY STRING FOR REQUESTBIN **
https://requestbin.agaveapi.co/1ghh12i1?inspect

INPUTS={"datafile":"agave://nectar-storage-stevenrbrandt/input.txt"}
JOB_FILE=job-remote-19335.txt
Writing file `job-remote-19335.txt'
OUTPUT=Successfully submitted job 1348365403518603751-242ac11b-0001-007
JOB_ID=1348365403518603751-242ac11b-0001-007
STAT=PENDING
STAT=PENDING
STAT=PROCESSING_INPUTS
STAT=STAGING_INPUTS
STAT=STAGED
STAT=STAGED
STAT=SUBMITTING
STAT=SUBMITTING
STAT=SUBMITTING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=RUNNING
STAT=FINISHED
CMD=jobs-output-get 1348365403518603751-242ac11b-0001-007 fork-command-1.

In [18]:
!rm -fr output singout.tgz
!jobs-output-get ${JOB_ID} singout.tgz
!tar xzf singout.tgz
!ls output

######################################################################## 100.0%######################                81.4%
eta_00001  eta_00006  eta_00011  u_00005  u_00010  v_00004  v_00009
eta_00002  eta_00007  u_00001	 u_00006  u_00011  v_00005  v_00010
eta_00003  eta_00008  u_00002	 u_00007  v_00001  v_00006  v_00011
eta_00004  eta_00009  u_00003	 u_00008  v_00002  v_00007
eta_00005  eta_00010  u_00004	 u_00009  v_00003  v_00008


In [19]:
# Clean up so that we don't boot into the singularity image without intending to
!rm -f ~/work/sing.txt