Permalink
Browse files

Enabled the workflow to be executed on Jetstream

  • Loading branch information...
1 parent f96b5c9 commit b428b9d797af7ceaa174ee2193e4212ef20c1d47 @rynge rynge committed Dec 13, 2016
Showing with 161 additions and 58 deletions.
  1. +13 −0 sites.xml.template
  2. +81 −41 submit
  3. +5 −0 tools/StringTie
  4. +5 −0 tools/cuff
  5. +17 −7 tools/dax-level-1
  6. +15 −7 tools/dax-level-2
  7. +5 −0 tools/hisat2
  8. +5 −0 tools/merge
  9. +5 −1 tools/merge-bridge
  10. +5 −2 tools/sra-download
  11. +5 −0 tools/tophat
View
@@ -34,5 +34,18 @@
<profile namespace="env" key="http_proxy" ></profile>
<profile namespace="env" key="OSG_SQUID_LOCATION" ></profile>
</site>
+
+ <site handle="jetstream-staging" arch="x86_64" os="LINUX">
+ <directory type="shared-scratch" path="/tmp/staging-${USER}">
+ <file-server operation="all" url="scp://${USER}@${HOSTNAME}/tmp/staging-${USER}"/>
+ </directory>
+ </site>
+
+ <site handle="jetstream-condorpool" arch="x86_64" os="LINUX">
+ <profile namespace="pegasus" key="style" >condor</profile>
+ <profile namespace="condor" key="universe" >vanilla</profile>
+ <profile namespace="condor" key="request_memory" >2 GB</profile>
+ <profile namespace="condor" key="request_disk" >1 GB</profile>
+ </site>
</sitecatalog>
View
122 submit
@@ -2,49 +2,89 @@
set -e
-# needed for stashcp to be picked up the site catalog for the local site
-module load xrootd
-module load stashcp
+# are we on OSG Connect?
+if [ -e /stash2 ]; then
-export PATH=/home/rynge/software/pegasus-4.7.0dev/bin:$PATH
-#export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/rynge/software/pegasus-4.7.0dev/bin
+ # needed for stashcp to be picked up the site catalog for the local site
+ module load xrootd
+ module load stashcp
+
+ export PATH=/home/rynge/software/pegasus-4.7.0dev/bin:$PATH
+
+ TOPDIR=`pwd`
+
+ export RUN_ID=osg-gem-`date +'%s'`
+
+ export RUN_DIR=/local-scratch/$USER/workflows/$RUN_ID
+ mkdir -p $RUN_DIR/scratch/$RUN_ID/level-2
+ # make the data availabile over http
+ mkdir -p /stash2/user/$USER/public/$RUN_ID/data
+ ln -s /stash2/user/$USER/public/$RUN_ID/data $RUN_DIR/data
+
+ # generate the site catalog
+ SC=$RUN_DIR/sites.xml
+ envsubst <sites.xml.template >$SC
+
+ # generate the dax
+ export PYTHONPATH=`pegasus-config --python`
+ ./tools/dax-level-1 $RUN_ID $RUN_DIR $RUN_DIR/data
+
+ echo
+ echo "An 'Output's directory will be created within the base of the workflow directory."
+ echo "This directory, $RUN_DIR/outputs"
+ echo "will have a 'merged_GEM.tab' file, an expression vector for each individual file,"
+ echo "and all standard output files from trimmomatic/hisat2 jobs."
+
+ # plan and submit the workflow
+ echo
+ pegasus-plan \
+ -Dpegasus.catalog.site.file=$SC \
+ --conf pegasus.conf \
+ --relative-dir $RUN_ID \
+ --sites condorpool \
+ --staging-site stash \
+ --output-site local \
+ --dir $RUN_DIR/workflow \
+ --dax dax.xml \
+ --submit
+else
+ # jetstream
-TOPDIR=`pwd`
-
-export RUN_ID=osg-gem-`date +'%s'`
-
-export RUN_DIR=/local-scratch/$USER/workflows/$RUN_ID
-mkdir -p $RUN_DIR/scratch/$RUN_ID/level-2
-# make the data availabile over http
-mkdir -p /stash2/user/$USER/public/$RUN_ID/data
-ln -s /stash2/user/$USER/public/$RUN_ID/data $RUN_DIR/data
-
-# generate the site catalog
-SC=$RUN_DIR/sites.xml
-envsubst <sites.xml.template >$SC
-
-# generate the dax
-export PYTHONPATH=`pegasus-config --python`
-./tools/dax-level-1 $RUN_ID $RUN_DIR $RUN_DIR/data
-
-echo
-echo "An 'Output's directory will be created within the base of the workflow directory."
-echo "This directory, $RUN_DIR/outputs"
-echo "will have a 'merged_GEM.tab' file, an expression vector for each individual file,"
-echo "and all standard output files from trimmomatic/hisat2 jobs."
-
-# plan and submit the workflow
-echo
-pegasus-plan \
- -Dpegasus.catalog.site.file=$SC \
- --conf pegasus.conf \
- --relative-dir $RUN_ID \
- --sites condorpool \
- --staging-site stash \
- --output-site local \
- --dir $RUN_DIR/workflow \
- --dax dax.xml \
- --submit
+ TOPDIR=`pwd`
+
+ export RUN_ID=osg-gem-`date +'%s'`
+
+ export RUN_DIR=$HOME/workflows/$RUN_ID
+ mkdir -p $RUN_DIR/scratch/$RUN_ID/level-2
+ mkdir -p $RUN_DIR/data
+
+ # generate the site catalog
+ SC=$RUN_DIR/sites.xml
+ envsubst <sites.xml.template >$SC
+
+ # generate the dax
+ export PYTHONPATH=`pegasus-config --python`
+ ./tools/dax-level-1 $RUN_ID $RUN_DIR $RUN_DIR/data
+
+ echo
+ echo "An 'Output's directory will be created within the base of the workflow directory."
+ echo "This directory, $RUN_DIR/outputs"
+ echo "will have a 'merged_GEM.tab' file, an expression vector for each individual file,"
+ echo "and all standard output files from trimmomatic/hisat2 jobs."
+
+ # plan and submit the workflow
+ echo
+ pegasus-plan \
+ -Dpegasus.catalog.site.file=$SC \
+ --conf pegasus.conf \
+ --relative-dir $RUN_ID \
+ --sites jetstream-condorpool \
+ --staging-site jetstream-staging \
+ --output-site local \
+ --dir $RUN_DIR/workflow \
+ --dax dax.xml \
+ --submit
+fi
View
@@ -1,5 +1,10 @@
#!/bin/bash
+# module init required when running on non-OSG resources, and has to sourced
+# before set -e as sometimes it exits non-0 when a module environment is
+# already set up
+. /cvmfs/oasis.opensciencegrid.org/osg/sw/module-init.sh
+
set -e
module load java/8u25
View
@@ -1,5 +1,10 @@
#!/bin/bash
+# module init required when running on non-OSG resources, and has to sourced
+# before set -e as sometimes it exits non-0 when a module environment is
+# already set up
+. /cvmfs/oasis.opensciencegrid.org/osg/sw/module-init.sh
+
set -e
module load java/8u25
View
@@ -142,13 +142,23 @@ for job in prepare_jobs:
# sub workflow job
j3 = DAX("level-2.dax")
-j3.addArguments("-Dpegasus.catalog.site.file=%s/sites.xml" % (run_dir),
- "--sites", "condorpool",
- "--staging-site", "stash",
- "--output-site", "local",
- "--basename", "level-2",
- "--force",
- "--cleanup", "none")
+# are we on OSG Connect?
+if os.path.exists("/stash2"):
+ j3.addArguments("-Dpegasus.catalog.site.file=%s/sites.xml" % (run_dir),
+ "--sites", "condorpool",
+ "--staging-site", "stash",
+ "--output-site", "local",
+ "--basename", "level-2",
+ "--force",
+ "--cleanup", "none")
+else:
+ j3.addArguments("-Dpegasus.catalog.site.file=%s/sites.xml" % (run_dir),
+ "--sites", "jetstream-condorpool",
+ "--staging-site", "jetstream-staging",
+ "--output-site", "local",
+ "--basename", "level-2",
+ "--force",
+ "--cleanup", "none")
j3.uses(subdax_file, link=Link.INPUT)
dax.addDAX(j3)
dax.depends(parent=j2, child=j3)
View
@@ -236,8 +236,11 @@ def main():
exe.addProfile(Profile(Namespace.CONDOR, "request_memory", memory))
dax.addExecutable(exe)
- # has to be the one under public/
- data_dir = "/stash2/user/" + getpass.getuser() + "/public/" + run_id + "/data"
+ # on OSG Connect, has to be the one under public/
+ if os.path.exists("/stash2"):
+ data_dir = "/stash2/user/" + getpass.getuser() + "/public/" + run_id + "/data"
+ else:
+ data_dir = run_dir + "/data"
# we need a bunch of workflows, and one merge/cuff for each base input
for base_name in os.listdir(data_dir):
@@ -256,20 +259,25 @@ def main():
continue
# use stash urls for the data so we can bypass and grab it directly from
- # the jobs
- base_url = "http://stash.osgconnect.net/~" + getpass.getuser() + "/" + run_id + \
- "/data/" + base_name + "/" + part
+ # the jobs (OSG Connect uses stash)
+ if os.path.exists("/stash2"):
+ base_url = "http://stash.osgconnect.net/~" + getpass.getuser() + "/" + run_id + \
+ "/data/" + base_name + "/" + part
+ site_name = "stash"
+ else:
+ base_url = "file://" + run_dir + "/data/" + base_name + "/" + part
+ site_name = "local"
common_part = in_name
common_part = re.sub(".*-forward\-", "", common_part)
common_part = re.sub("\.gz", "", common_part)
for_file = File(base_name + "-forward-" + common_part)
- for_file.addPFN(PFN(base_url + "/" + base_name + "-forward-" + common_part, "stash"))
+ for_file.addPFN(PFN(base_url + "/" + base_name + "-forward-" + common_part, site_name))
dax.addFile(for_file)
rev_file = File(base_name + "-reverse-" + common_part)
- rev_file.addPFN(PFN(base_url + "/" + base_name + "-reverse-" + common_part, "stash"))
+ rev_file.addPFN(PFN(base_url + "/" + base_name + "-reverse-" + common_part, site_name))
dax.addFile(rev_file)
if conf.getboolean("config", "hisat2"):
View
@@ -1,5 +1,10 @@
#!/bin/bash
+# module init required when running on non-OSG resources, and has to sourced
+# before set -e as sometimes it exits non-0 when a module environment is
+# already set up
+. /cvmfs/oasis.opensciencegrid.org/osg/sw/module-init.sh
+
set -e
module load samtools/1.3.1
View
@@ -1,5 +1,10 @@
#!/bin/bash
+# module init required when running on non-OSG resources, and has to sourced
+# before set -e as sometimes it exits non-0 when a module environment is
+# already set up
+. /cvmfs/oasis.opensciencegrid.org/osg/sw/module-init.sh
+
set -e
module load samtools/1.3.1
View
@@ -9,7 +9,11 @@ BASE_NAME="$4"
START_DIR=`pwd`
-cd /stash2/user/$CONNECT_USER/public/$RUN_ID/level-2
+if [ -e /stash2/user/$CONNECT_USER/public/$RUN_ID/level-2 ]; then
+ cd /stash2/user/$CONNECT_USER/public/$RUN_ID/level-2
+else
+ cd $HOME/workflows/$RUN_ID/data
+fi
for DIR in `ls -d $BASE_NAME-*`; do
cd $DIR
View
@@ -1,12 +1,15 @@
#!/bin/bash
+# module init required when running on non-OSG resources, and has to sourced
+# before set -e as sometimes it exits non-0 when a module environment is
+# already set up
. /cvmfs/oasis.opensciencegrid.org/osg/sw/module-init.sh
+set -e
+
module load java/8u25
module load sra/2.5.4
-set -e
-
SRA_ID=$1
FORWARD=$2
REVERSE=$3
View
@@ -1,5 +1,10 @@
#!/bin/bash
+# module init required when running on non-OSG resources, and has to sourced
+# before set -e as sometimes it exits non-0 when a module environment is
+# already set up
+. /cvmfs/oasis.opensciencegrid.org/osg/sw/module-init.sh
+
set -e
module load tophat/2.1.1

0 comments on commit b428b9d

Please sign in to comment.