### Run Every Time
A lot of detailed configuration needs to be
set if we're to use Agave to run on a remote
machine.

In [1]:
export MACHINE=rostam.cct.lsu.edu
export AGAVE_USERNAME=stevenrbrandt
export MACHINE_USERNAME=sbrandt
export APP_NAME=drawgau
export PORT=8000
export WORK_DIR=/home/sbrandt
export HOME_DIR=/home/sbrandt
export SCRATCH_DIR=/home/sbrandt
export EMAIL=sbrandt@cct.lsu.edu
export DEPLOYMENT_PATH=agave-deployment

In [2]:
# The short version of the machine name
export MACH=$(echo $MACHINE|perl -p -e 's/\..*//')
export NETWORK=$(echo $MACHINE|perl -p -e 's/.*?\.//')
echo "Configuring for machine $MACH (full name is $MACHINE) on network '${NETWORK}'"

Configuring for machine rostam (full name is rostam.cct.lsu.edu) on network 'cct.lsu.edu'


In [4]:
export PATH=$HOME/agave/cli/bin:$PATH

In [3]:
mkdir -p $HOME/agave
cd $HOME/agave
export AGAVE_JSON_PARSER=jq

## The next few cells only needs to be  run the first time

Before you run them, create a password for yourself and copy it into the paste buffer.

Here's a nice password generator: https://www.cct.lsu.edu/~sbrandt/passwds.php

In [4]:
git clone https://bitbucket.org/agaveapi/cli.git

fatal: destination path 'cli' already exists and is not an empty directory.


: 128

In [6]:
# Store the password in the environment variable without displaying it.
export PASSWORD=$(xclip -selection clilpboard -o)

In [11]:
tenants-init -t agave.prod

[1;0mYou are now configured to interact with the APIs at https://public.agaveapi.co/[0m


In [12]:
# You don't need to delete the client you haven't created yet. Attempting
# to do so won't actually do anything. However, it's useful to know how to it.
clients-delete -u stevenrbrandt -p $PASSWORD ${APP_NAME}-${MACH}

[1;0mSuccessfully deleted client drawgau-rostam[0m


In [13]:
clients-create -S -N ${APP_NAME}-${MACH} -u ${AGAVE_USERNAME} -p "${PASSWORD}"

[1;0m[1;0mSuccessfully created client drawgau-rostam
key: 4m6grCr7ow3T36o6o0uU4H0lP5ca 
secret: cU8E1D__I5U3FV2MYG7xZXxLXE8a[0m[0m


In [14]:
auth-tokens-create -u ${AGAVE_USERNAME} -p "${PASSWORD}"

[1;0mToken for agave.prod:stevenrbrandt successfully refreshed and cached for 14400 seconds
68b7224ab42810f1857bc4b2208fd5b[0m


If you already have an Agave account and have previously logged in, you can just refresh your tokens

In [17]:
# The public key appears below. You need to append the public
# key to the end of your ~/.ssh/authorized_keys file on ${MACHINE}.
ssh-keygen -y -f ${MACH}-key -P "" < /dev/null

ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC5ran1lIYppauEXL2pSWvBvZCgHGBsvSkioCgJ4fAbxTbIEEOqo6AiCPHCXczch9aswZ4oCI+gVJTCaYrYlyw3Uv1nSy7C8dYo6+IqhP+d4Rmzeo/s//nFMVT8L2+0PEMboFO8XNPT5FaoeIR+JrGcDR7U1feFaaI4pnLEt3/fsWjcHGTyxLbwnsdG9eVFqrnJY8QsESohiN27z5c6XZ3r3fk3zjihUhk+4vWXlGLW4jsyo9HB2lAGL/KOBbOAwa3hhiZZ2W8ZlZC0Mw34dE3WbEHD0iEAgUfMZLy93+UKvSmSJP8hJ+EvVfWm2Yj/Y8qsDUs+7Y//dUnPW5p7kSnh


In [45]:
echo Attempting to login to ${MACHINE_USERNAME}@${MACHINE} on port ${PORT}
ssh -o "BatchMode=yes" -o "IdentityFile=${MACH}-key.pub" ${MACHINE_USERNAME}@${MACHINE} -p ${PORT} \
  grep "$(cat ${MACH}-key.pub|cut -d' ' -f2)" .ssh/authorized_keys > /dev/null
if [ "$?" = 0 ]; then echo Key is installed ; else echo Key not installed; fi

Attempting to login to sbrandt@rostam.cct.lsu.edu on port 8000
Key is installed


In [18]:
jsonpki --public ${MACH}-key.pub > ${MACH}-key.pub.txt
jsonpki --private ${MACH}-key > ${MACH}-key.txt

## Run Every Time

In [5]:
auth-tokens-refresh

[1;0mToken for agave.prod:stevenrbrandt successfully refreshed and cached for 14400 seconds
d8e88712ec8b9b214eb1749bcd470[0m


In [6]:
PUB_KEY=$(cat ${MACH}-key.pub.txt)
PRIV_KEY=$(cat ${MACH}-key.txt)

### Configure the Storage Machine

In [19]:
cat > ${MACH}-storage-${MACHINE_USERNAME}.txt << EOF
{
    "id": "${MACH}-storage-${MACHINE_USERNAME}",
    "name": "${MACH} storage (${MACHINE_USERNAME})",
    "description": "The ${MACH} computer",
    "site": "${NETWORK}",
    "type": "STORAGE",
    "storage": {
        "host": "${MACHINE}",
        "port": ${PORT},
        "protocol": "SFTP",
        "rootDir": "/",
        "homeDir": "/home/${MACHINE_USERNAME}",
        "auth": {
          "username" : "${MACHINE_USERNAME}",
          "publicKey" : "${PUB_KEY}",
          "privateKey" : "${PRIV_KEY}",
          "type" : "SSHKEYS"
        }
    }
}
EOF

In [20]:
systems-addupdate -F ${MACH}-storage-${MACHINE_USERNAME}.txt

[1;0m[1;0mSuccessfully added system rostam-storage-sbrandt[0m[0m


In [21]:
# Test to see if this worked...
files-list -S ${MACH}-storage-${MACHINE_USERNAME} ./ | head -5

.
.bash_history
.bashrc
.cache
.cling_history


### Configure the Execution Machine

In [22]:
# Assuming ${MACHINE} uses slurm as a queuing system, this will get info about the queues
ssh ${MACHINE_USERNAME}@${MACHINE} -p ${PORT} sinfo 

PARTITION AVAIL  TIMELIMIT  NODES  STATE NODELIST
rostam*      up 24-00:00:0     18  alloc bahram,beowulf[00-14],marvin[00,08]
rostam*      up 24-00:00:0     26   idle ariel[00-01],carson,leo[00-02],lyra[00-01],marvin[01-07,09-15],reno,trillian[00-01],tycho
cuda         up 24-00:00:0      1  alloc bahram
cuda         up 24-00:00:0      2   idle reno,tycho
sc13         up 24-00:00:0      2   idle carson,reno
phi          up 24-00:00:0      2   idle carson,reno
leo          up 24-00:00:0      3   idle leo[00-02]
ariel        up 24-00:00:0      2   idle ariel[00-01]
beowulf      up 24-00:00:0     15  alloc beowulf[00-14]
lyra         up 24-00:00:0      2   idle lyra[00-01]
trillian     up 24-00:00:0      2   idle trillian[00-01]
tycho        up 24-00:00:0      1   idle tycho
bahram       up 24-00:00:0      1  alloc bahram
carson       up 24-00:00:0      1   idle carson
reno         up 24-00:00:0      1   idle reno
marvin       up 24-00:00:0      1  alloc marvin00
marvin       up 24-00:00:

In [23]:
# Gather info about the machine
# Executing this cell is essential
export QUEUE=rostam # Name of default queue
export NODES=17 # Number of nodes in queue
export MAX_TIME=24:00:00 # Max duration of a job
PROCS=$(ssh ${MACHINE_USERNAME}@${MACHINE} -p ${PORT} grep '^processor' /proc/cpuinfo|cut -d: -f2|tail -1)
PROCS=$(($PROCS+1))
echo Procs: ${PROCS}

Procs: 16


In [25]:
# Edit any parts of this file that you know need to be changed for your machine.
cat > ${MACH}-exec-${MACHINE_USERNAME}.txt << EOF
{
    "id": "${MACH}-exec-${MACHINE_USERNAME}",
    "name": "${MACH} (${MACHINE_USERNAME})",
    "description": "The ${MACH} computer",
    "site": "${NETWORK}",
    "public": false,
    "status": "UP",
    "type": "EXECUTION",
    "executionType": "HPC",
    "scheduler" : "SLURM",
    "environment": null,
    "scratchDir" : "${SCRATCH_DIR}",
    "queues": [
        {
            "name": "${QUEUE}",
            "default": true,
            "maxJobs": 1,
            "maxUserJobs": 1,
            "maxNodes": ${NODES},
            "maxProcessorsPerNode": ${PROCS},
            "minProcessorsPerNode": 1,
            "maxRequestedTime": "${MAX_TIME}"
        }
    ],
    "login": {
        "auth": {
          "username" : "sbrandt",
          "publicKey" : "${PUB_KEY}",
          "privateKey" : "${PRIV_KEY}",
          "type" : "SSHKEYS"
        },
        "host": "${MACHINE}",
        "port": ${PORT},
        "protocol": "SSH"
    },
    "maxSystemJobs": 50,
    "maxSystemJobsPerUser": 50,
    "storage": {
        "host": "rostam.cct.lsu.edu",
        "port": ${PORT},
        "protocol": "SFTP",
        "rootDir": "/",
        "homeDir": "${HOME_DIR}",
        "auth": {
          "username" : "${MACHINE_USERNAME}",
          "publicKey" : "${PUB_KEY}",
          "privateKey" : "${PRIV_KEY}",
          "type" : "SSHKEYS"
        }
    },
    "workDir": "${WORK_DIR}"
}
EOF

In [26]:
systems-addupdate -F ${MACH}-exec-${MACHINE_USERNAME}.txt

[1;0m[1;0mSuccessfully added system rostam-exec-sbrandt[0m[0m


In [27]:
# Test to see if this worked...
files-list -S ${MACH}-exec-${MACHINE_USERNAME} ./ | head -5

.
.bash_history
.bashrc
.cache
.cling_history


### Configure the Application
This will have a number of steps and pieces.
First we are creating the wrapper script. This
is the thing that will be submitted to the
resource scheduler on the remote machine.
Typically, it is a bash script.

In [12]:
cat > drawgau-wrapper.txt << EOF 
#!/bin/bash
echo 'Executing Draw Gau Code'
# Setting the x flag will echo every
# command onto stderr. This is
# for debugging, so we can see what's
# going on.
set -x
echo ==ENV=============
# The env command prints out the
# entire execution environment. This
# is also present for debugging purposes.
env
echo ==PWD=============
# We also print out the execution
# directory. Again, for debugging purposes.
pwd
module add openmpi/1.10.4
if [ "\${parfile}" = "" ]
then
  echo parfile was not set
else
  mpirun ~/drawgau/drawgau \${parfile}
fi
EOF

In [13]:
files-mkdir -S ${MACH}-storage-${MACHINE_USERNAME} -N ${DEPLOYMENT_PATH}
files-upload -F drawgau-wrapper.txt -S ${MACH}-storage-${MACHINE_USERNAME} ${DEPLOYMENT_PATH}/

[1;0mSuccessfully created folder agave-deployment[0m
Uploading drawgau-wrapper.txt...
######################################################################## 100.0%


#### The test file
Next we create and configure a test script. It simply names the wrapper and suggested parfile.

In [132]:
cat > test.txt << EOF
parfile="input.txt"
drawgau-wrapper.txt
EOF

In [30]:
files-mkdir -S ${MACH}-storage-${MACHINE_USERNAME} -N ${DEPLOYMENT_PATH}
files-upload -F test.txt -S ${MACH}-storage-${MACHINE_USERNAME} ${DEPLOYMENT_PATH}/

[1;0mSuccessfully created folder agave-deployment[0m
Uploading test.txt...
######################################################################## 100.0%


#### The Application Description
This tells Agave what it needs to know
about the application in order to run it.

In [28]:
cat > drawgau-app.txt << EOF
{  
   "name":"drawgau-${MACH}",
   "version":"1.0",
   "label":"Draw Gaussian Curve",
   "shortDescription":"Just draws a Gaussian",
   "longDescription":"",
   "deploymentSystem":"${MACH}-storage-${MACHINE_USERNAME}",
   "deploymentPath":"${DEPLOYMENT_PATH}",
   "templatePath":"drawgau-wrapper.txt",
   "testPath":"test.txt",
   "executionSystem":"${MACH}-exec-${MACHINE_USERNAME}",
   "executionType":"HPC",
   "parallelism":"SERIAL",
   "modules":[],
   "inputs":[
         {   
         "id":"parfile",
         "details":{  
            "label":"Draw Gau Parameter File: ",
            "description":"",
            "argument":null,
            "showArgument":false
         },
         "value":{  
            "default":"testInput.txt",
            "order":0,
            "required":true,
            "validator":"",
            "visible":true
         }
      }   

   ],
   "parameters":[],
   "outputs":[  
      {  
         "id":"Output",
         "details":{  
            "description":"The output",
            "label":"x y file"
         },
         "value":{  
            "default":"drawgau.txt",
            "validator":""
         }
      }
   ]
}
EOF

In [29]:
apps-addupdate -F drawgau-app.txt

[1;0m[1;0mSuccessfully added app drawgau-rostam-1.0[0m[0m


#### The Application Itself
Typically, your application will be some Fortran or C/C++ executable that you compile and run on the remote machine.
The example for this tutorial is drawgau, a C++ program that uses MPI and does nothing but draw a Guassian curve
(i.e. it writes out a file with x-y pairs that matplot lib can use to display a Gaussian).

In [72]:
cat > drawgau.cpp << EOF
#include <mpi.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <math.h>
#include <stdlib.h>

using namespace std;

void usage() {
  std::cerr << "usage: drawgau input.txt" << std::endl;
  exit(1);
}

int main(int argc, char **argv) {
  if(argc != 2)
    usage();
  ifstream i(argv[1]);

  if(!i.good())
    usage();

  MPI_Init(&argc,&argv);
  int rank,size;
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  MPI_Comm_size(MPI_COMM_WORLD,&size);

  double minx, maxx, delx;
  i >> minx >> maxx >> delx;
  if(rank == 0) {
    cout << "minx=" << minx << endl;
    cout << "maxx=" << maxx << endl;
    cout << "delx=" << delx << endl;
    cout << "mpi size=" << size << endl;
  }

  stringstream s;
  s << "drawgau" << rank << ".txt";
  string fname = s.str();
  cout << "FILE: " << fname << endl;
  ofstream f(fname,ofstream::out);
  double delxp = (maxx - minx)/size;
  double minxp = minx+delxp*rank;
  double maxxp = minxp + delxp;
  cout << "rank=" << rank << " minxp=" << minxp << " maxxp=" << maxxp << endl;
  for(double x = minxp; x < maxxp; x += delx) {
    f << x << " " << exp(-x*x) << endl;
  }
  f.close();
  MPI_Finalize();
  return 0;
}
EOF

#### A Script to Build the Application

In [86]:
cat > build.sh << EOF
#!/bin/bash
module add openmpi/1.10.4
cd ~/drawgau
mpicxx -std=c++11 -o drawgau drawgau.cpp
EOF

#### A sample input parameter file

In [89]:
# The parameter file just has three numbers
# low-x-value high-x-value delta-x-value
echo -1 1 .01 > input.txt

In [90]:
files-mkdir -S ${MACH}-storage-${MACHINE_USERNAME} -N drawgau
files-upload -F drawgau.cpp -S ${MACH}-storage-${MACHINE_USERNAME} drawgau/
files-upload -F build.sh -S ${MACH}-storage-${MACHINE_USERNAME} drawgau/
files-upload -F input.txt -S ${MACH}-storage-${MACHINE_USERNAME} drawgau/
ssh ${MACHINE_USERNAME}@${MACHINE} -p ${PORT} bash --login ./drawgau/build.sh

[1;0mSuccessfully created folder drawgau[0m
Uploading drawgau.cpp...
######################################################################## 100.0%
Uploading build.sh...
######################################################################## 100.0%
Uploading input.txt...
######################################################################## 100.0%


### Running Jobs
We'll do this two ways. First, the command line.
To start, we must make a json descriptor for the job we want to run.

In [31]:
cat > job.txt << EOF
{
  "name":"test-jobc-1",
  "appId": "drawgau-${MACH}-1.0",
  "executionSystem": "${MACH}-exec-${MACHINE_USERNAME}",
  "batchQueue": "${QUEUE}",
  "maxRunTime": "${MAX_TIME}",
  "nodeCount": 1,
  "processorsPerNode": 1,
  "archive": false,
  "archiveSystem": "${MACH}-storage-${MACHINE_USERNAME}",
  "inputs": {
    "parfile": "agave://${MACH}-storage-${MACHINE_USERNAME}/${HOME_DIR}/drawgau/input.txt"
  },
  "parameters": {
  },
  "notifications": [
    {
      "url":"${EMAIL}",
      "event":"FINISHED",
      "persistent":false
    },
    {
      "url":"${EMAIL}",
      "event":"FAILED",
      "persistent":false
    }
  ]
}
EOF

#### Next, submit the job...

In [37]:
# Capture the output of the job submit command
OUTPUT=$(jobs-submit -F job.txt)
# Display the output of the job submit command
echo $OUTPUT
# Parse out the job id from the output
JOB_ID=$(echo $OUTPUT | cut -d' ' -f4)

Successfully submitted job 5668974957858525671-242ac11c-0001-007


#### Next, monitor its progress (optional)

In [38]:
# Watch the job to see when it finishes
while true
do
  sleep 3
  # Get the status of the job. It will always be one word.
  STAT=$(jobs-status $JOB_ID)
  echo $STAT
  
  # We are done when it finishes or fails.
  if [ "${STAT}" = FINISHED ] ;  then break ; fi
  if [ "${STAT}" = FAILED ] ;  then break ; fi
done

PENDING
PENDING
PENDING
STAGING_INPUTS
STAGED
STAGED
STAGED
STAGED
SUBMITTING
SUBMITTING
FINISHED


#### Job listing
List the various files generated in the output directory

In [39]:
jobs-output-list $JOB_ID

[1;0m.agave.archive
.agave.log
agave
drawgau-wrapper.txt
drawgau0.txt
input.txt
test-jobc-1-5668974957858525671-242ac11c-0001-007.err
test-jobc-1-5668974957858525671-242ac11c-0001-007.out
test-jobc-1.ipcexe
test.txt[0m


#### Retrieve and edit the stderr from the job

In [40]:
jobs-output-get ${JOB_ID} test-jobc-1-${JOB_ID}.err
cat test-jobc-1-${JOB_ID}.err

######################################################################## 100.0%
+ env
+ pwd
+ module add openmpi/1.10.4
++ /usr/bin/modulecmd bash add openmpi/1.10.4
+ eval LD_LIBRARY_PATH=/opt/mn/openmpi/1.10.4//lib:/opt/rh/rh-python35/root/usr/lib64:. ';export' 'LD_LIBRARY_PATH;'
++ LD_LIBRARY_PATH=/opt/mn/openmpi/1.10.4//lib:/opt/rh/rh-python35/root/usr/lib64:.
++ export LD_LIBRARY_PATH
+ '[' input.txt = '' ']'
+ mpirun /home/sbrandt/drawgau/drawgau input.txt
++ curl -sSk 'https://public.agaveapi.co/jobs/v2/trigger/job/5668974957858525671-242ac11c-0001-007/token/7523a9a5-2618-4508-af2a-42c13c5da1fa/status/CLEANING_UP?filter=id,status'
+ agave_log_response '{"status":"success","message":null,"version":"2.2.0-r5daaf3f","result":{"id":"5668974957858525671-242ac11c-0001-007","status":"FINISHED"}}'


In [41]:
jobs-output-get ${JOB_ID} drawgau0.txt

######################################################################## 100.0%


### The web-based GUI way to run a job...

In [43]:
echo Click the link to submit and run a job
echo https://togo.agaveapi.co/app/#/apps/drawgau-${MACH}-1.0/run

Click the link to submit and run a job
https://togo.agaveapi.co/app/#/apps/drawgau-rostam-1.0/run
