Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into dp-scaffold
Browse files Browse the repository at this point in the history
  • Loading branch information
dpark01 committed Feb 25, 2016
2 parents 0be49fc + a9f9df6 commit e8c20ce
Show file tree
Hide file tree
Showing 6 changed files with 302 additions and 55 deletions.
54 changes: 54 additions & 0 deletions easy-deploy-broad/README.md
@@ -0,0 +1,54 @@
## Easy deployment of viral-ngs for Broad Institute users

**viral-ngs** can be deployed on the Broad Institute cluster with help from the script in this directory, `viral-ngs-broad.sh`. This script will install an independent copy of viral-ngs from the latest source, install all dependencies, and make it simple to activate the viral-ngs environment and create projects.

### Dependencies

The script, `viral-ngs-broad.sh`, is intended to run on the Broad Institute cluster. It depends on the dotkits present on the Broad cluster and will not function properly in a different environment.

### One-line command

This one-line command will download the `viral-ngs-broad.sh` script and setup viral-ngs in the current working directory. Simply ssh to one of the login nodes and paste this command:

wget https://raw.githubusercontent.com/broadinstitute/viral-ngs/master/easy-deploy-broad/viral-ngs-broad.sh && chmod a+x ./viral-ngs-broad.sh && reuse UGER && qrsh -cwd -N "viral-ngs_deploy" -q interactive ./viral-ngs-broad.sh setup

**Note:** The script will run the install on a UGER interactive node, so you must have the ability to create to start a new interactive session. A project can be specified via `qrsh -P "<project_name>"`

### Usage

* `viral-ngs-broad.sh setup` Installs a fresh copy of viral-ngs, installs all dependencies, and creates a directory, `viral-ngs-analysis-software`, in the current working directory.

**Resulting directories**:

```
viral-ngs-analysis-software/
venv/
viral-ngs/
```

* `source viral-ngs-broad.sh load` Loads the dotkits needed by viral-ngs and activates the Python virtual environment

* `viral-ngs-broad.sh create-project <project_name>` Creates a directory for a new Snakemake-compatible project, with data directories and symlinked run scripts. Copies in the files `Snakefile` and `config.yaml`

**Resulting directories**:

```
viral-ngs-analysis-software/
projects/
<project_name>/
Snakefile
bin/ (symlink)
config.yaml
data/
log/
reports/
run-pipe_LSF.sh (symlink)
run-pipe_UGER.sh (symlink)
samples-assembly-failures.txt
samples-assembly.txt
samples-depletion.txt
samples-runs.txt
tmp/
venv/ (symlink)
[...other project files...]
```
223 changes: 223 additions & 0 deletions easy-deploy-broad/viral-ngs-broad.sh
@@ -0,0 +1,223 @@
#!/bin/bash

STARTING_DIR=$(pwd)

# way to get the absolute path to this script that should
# work regardless of whether or not this script has been sourced
SCRIPT="$(readlink --canonicalize-existing "${BASH_SOURCE[0]}")"
SCRIPTPATH="$(dirname "$SCRIPT")"

CONTAINING_DIR="viral-ngs-analysis-software"
VIRAL_NGS_DIR="viral-ngs"
PYTHON_VENV_DIR="venv"
PROJECTS_DIR="projects"

PYTHON_VENV_PATH="$SCRIPTPATH/$CONTAINING_DIR/$PYTHON_VENV_DIR"
PROJECTS_PATH="$SCRIPTPATH/$CONTAINING_DIR/$PROJECTS_DIR"
VIRAL_NGS_PATH="$SCRIPTPATH/$CONTAINING_DIR/$VIRAL_NGS_DIR"

# determine if this script has been sourced
# via: http://stackoverflow.com/a/28776166/2328433
([[ -n $ZSH_EVAL_CONTEXT && $ZSH_EVAL_CONTEXT =~ :file$ ]] ||
[[ -n $KSH_VERSION && $(cd "$(dirname -- "$0")" &&
printf '%s' "${PWD%/}/")$(basename -- "$0") != "${.sh.file}" ]] ||
[[ -n $BASH_VERSION && $0 != "$BASH_SOURCE" ]]) && sourced=1 || sourced=0

function load_dotkits(){
source /broad/software/scripts/useuse
#reuse .anaconda3-2.5.0
reuse .anaconda-2.1.0
reuse .oracle-java-jdk-1.7.0-51-x86-64
reuse .bzip2-1.0.6
reuse .zlib-1.2.6
reuse .gcc-4.5.3
reuse .python-3.4.3

if [ -z "$GATK_PATH" ]; then
reuse .gatk3-2.2
# the Broad sets an alias for GATK, so we need to parse out the path
export GATK_PATH="$(dirname $(alias | grep GenomeAnalysisTK | perl -lape 's/(.*)\ (\/.*.jar).*/$2/g'))"
else
echo "GATK_PATH is set to '$GATK_PATH'"
echo "Continuing..."
fi

if [ -z "$NOVOALIGN_PATH" ]; then
reuse .novocraft-3.02.08
export NOVOALIGN_PATH="$(dirname $(which novoalign))"
else
echo "NOVOALIGN_PATH is set to '$NOVOALIGN_PATH'"
echo "Continuing..."
fi
}

function create_project(){
# first arg is project folder name
starting_dir=$(pwd)

mkdir -p $PROJECTS_PATH
cd $PROJECTS_PATH
mkdir $1
cd $1
mkdir data log reports tmp
cd data
mkdir 00_raw 01_cleaned 01_per_sample 02_align_to_self 02_assembly 03_align_to_ref 03_interhost 04_intrahost
cd ../
touch samples-depletion.txt
touch samples-assembly.txt
touch samples-runs.txt
touch samples-assembly-failures.txt
cp $VIRAL_NGS_PATH/pipes/config.yaml ../../$VIRAL_NGS_DIR/pipes/Snakefile ./
ln -s $VIRAL_NGS_PATH/ $(pwd)/bin
ln -s $PYTHON_VENV_PATH/ $(pwd)/venv
ln -s $VIRAL_NGS_PATH/pipes/Broad_UGER/run-pipe.sh $(pwd)/run-pipe_UGER.sh
ln -s $VIRAL_NGS_PATH/pipes/Broad_LSF/run-pipe.sh $(pwd)/run-pipe_LSF.sh

cd $starting_dir
}

function activate_pyenv(){
if [ -d "$PYTHON_VENV_PATH" ]; then
source $PYTHON_VENV_PATH/bin/activate
else
echo "$PYTHON_VENV_PATH/ does not exist. Exiting."
cd $STARTING_DIR
return 1
fi
}

function activate_environment(){
load_dotkits

echo "$SCRIPTPATH/$CONTAINING_DIR"
if [ -d "$SCRIPTPATH/$CONTAINING_DIR" ]; then
cd $SCRIPTPATH/$CONTAINING_DIR
else
echo "viral-ngs parent directory not found: $CONTAINING_DIR not found."
echo "Have you run the setup?"
echo "Usage: $0 setup"
cd $STARTING_DIR
return 1
fi

activate_pyenv
}

function print_usage(){
echo "Usage: $(basename $SCRIPT) {setup,load,create-project}"
}

if [ $# -eq 0 ]; then
print_usage
if [[ $sourced -eq 0 ]]; then
exit 1
else
return 1
fi
else
case "$1" in
"setup")
if [ $# -eq 1 ]; then
if [[ $sourced -eq 1 ]]; then
echo "ABORTING. $(basename $SCRIPT) must not be sourced during setup"
echo "Usage: $(basename $SCRIPT) setup"
return 1
else
mkdir -p $SCRIPTPATH/$CONTAINING_DIR
cd $SCRIPTPATH/$CONTAINING_DIR

# clone viral-ngs if it does not already exist
if [ ! -d "$VIRAL_NGS_PATH" ]; then
git clone https://github.com/broadinstitute/viral-ngs.git
else
echo "$VIRAL_NGS_DIR/ already exists. Skipping clone."
fi

load_dotkits

if [ ! -d "$PYTHON_VENV_PATH" ]; then
pyvenv $PYTHON_VENV_PATH
else
echo "$PYTHON_VENV_PATH/ already exists. Skipping python venv setup."
fi

activate_pyenv

pip install -r $VIRAL_NGS_PATH/requirements.txt
pip install -r $VIRAL_NGS_PATH/requirements-pipes.txt

# install tools
nosetests $VIRAL_NGS_PATH/test/unit/test_tools.py

echo "Setup complete. Do you want to start a project? Run:"
echo "$0 create-project <project_name>"
echo ""
fi
else
echo "Usage: $(basename $SCRIPT) setup"
if [[ $sourced -eq 0 ]]; then
exit 1
else
return 1
fi
fi
;;
"load")
if [ $# -eq 1 ]; then
if [[ $sourced -eq 0 ]]; then
echo "ABORTING. $(basename $SCRIPT) must be sourced."
echo "Usage: source $(basename $SCRIPT) load"
else
activate_environment
ls -lah
return 0
fi
else
echo "Usage: source $(basename $SCRIPT) load"
if [[ $sourced -eq 0 ]]; then
exit 1
else
return 1
fi
fi
;;
"create-project")
if [ $# -ne 2 ]; then
echo "Usage: $(basename $SCRIPT) create-project <project_name>"
if [[ $sourced -eq 0 ]]; then
exit 1
else
return 1
fi
else
if [ ! -d "$PROJECTS_PATH/$2" ]; then
create_project $2 && echo "Project created: $PROJECTS_PATH/$2" && echo "OK"
else
echo "WARNING: $PROJECTS_PATH/$2/ already exists."
echo "Skipping project creation."
fi

echo ""

if [[ "$VIRTUAL_ENV" != "$PYTHON_VENV_PATH" ]]; then
echo "It looks like the vial-ngs environment is not active."
echo "To use viral-ngs with your project, source this file."
echo "Example: source $(basename $SCRIPT) load"
else
# if the viral-ngs environment is active and we have sourced this file
if [[ $sourced -eq 1 ]]; then
# change to the project directory
if [ -d "$PROJECTS_PATH/$2" ]; then
cd "$PROJECTS_PATH/$2"
fi
return 0
fi
fi
fi
;;
*)
print_usage
;;
esac
fi

28 changes: 4 additions & 24 deletions pipes/Broad_LSF/run-pipe.sh
Expand Up @@ -2,37 +2,17 @@
# Wrappers around Snakemake for use on the Broad LSF cluster

# determine the directory of this script
SCRIPT_DIRECTORY=$(dirname $0)
SCRIPT_DIRECTORY=$(dirname $(readlink --canonicalize-existing $0))

# load necessary Broad dotkits
source /broad/software/scripts/useuse
reuse -q LSF
source $SCRIPT_DIRECTORY/../Broad_common/setup_dotkits.sh

# uses the first argument as the config file path, is specified
if [[ ! -z "$1" && "$1" != " " ]]; then
CONFIG_FILE=$1
else
# otherwise the config file is assumed to be "config.yaml" in the cwd
CONFIG_FILE="config.yaml"
fi

# resolve the config file path in full
CONFIG_FILE=`python -c "import os; print( os.path.realpath(os.path.expanduser(\"$CONFIG_FILE\"))) "`

# if the config file does not exist
# it is either not in the cwd, or what the user passed in does not exist
if [[ ! -f $CONFIG_FILE ]]; then
echo "Config file does not exist: $CONFIG_FILE"
echo " Usage: $(basename $0) [path/to/config.yaml]"
echo " A file called 'config.yaml' must exist in the current directory, or be passed in."
exit 1
fi

# load config dirs from config.yaml. After using the conda dotkit, we should have PyYAML
VENVDIR=`python -c "import yaml, os;f=open(\"$CONFIG_FILE\");print(os.path.realpath(yaml.safe_load(f)['venv_dir']));f.close()"`
BINDIR=`python -c "import yaml, os;f=open(\"$CONFIG_FILE\");print(os.path.realpath(yaml.safe_load(f)['bin_dir']));f.close()"`
DATADIR=`python -c "import yaml, os; f=open(\"$CONFIG_FILE\");print(os.path.realpath(yaml.safe_load(f)['data_dir']));f.close()"`
VENVDIR=`python -c "import yaml, os;f=open(\"config.yaml\");print(os.path.realpath(yaml.safe_load(f)['venv_dir']));f.close()"`
BINDIR=`python -c "import yaml, os;f=open(\"config.yaml\");print(os.path.realpath(yaml.safe_load(f)['bin_dir']));f.close()"`
DATADIR=`python -c "import yaml, os; f=open(\"config.yaml\");print(os.path.realpath(yaml.safe_load(f)['data_dir']));f.close()"`

# load Python virtual environment
source "$VENVDIR/bin/activate"
Expand Down
28 changes: 4 additions & 24 deletions pipes/Broad_UGER/run-pipe.sh
Expand Up @@ -2,37 +2,17 @@
# Wrappers around Snakemake for use on the Broad UGER cluster

# determine the directory of this script
SCRIPT_DIRECTORY=$(dirname $0)
SCRIPT_DIRECTORY=$(dirname $(readlink --canonicalize-existing $0))

# load necessary Broad dotkits
source /broad/software/scripts/useuse
reuse -q UGER
source $SCRIPT_DIRECTORY/../Broad_common/setup_dotkits.sh

# uses the first argument as the config file path, is specified
if [[ ! -z "$1" && "$1" != " " ]]; then
CONFIG_FILE=$1
else
# otherwise the config file is assumed to be "config.yaml" in the cwd
CONFIG_FILE="config.yaml"
fi

# resolve the config file path in full
CONFIG_FILE=`python -c "import os; print( os.path.realpath(os.path.expanduser(\"$CONFIG_FILE\"))) "`

# if the config file does not exist
# it is either not in the cwd, or what the user passed in does not exist
if [[ ! -f $CONFIG_FILE ]]; then
echo "Config file does not exist: $CONFIG_FILE"
echo " Usage: $(basename $0) [path/to/config.yaml]"
echo " A file called 'config.yaml' must exist in the current directory, or be passed in."
exit 1
fi

# load config dirs from config.yaml. After using the conda dotkit, we should have PyYAML
VENVDIR=`python -c "import yaml, os;f=open(\"$CONFIG_FILE\");print(os.path.realpath(yaml.safe_load(f)['venv_dir']));f.close()"`
BINDIR=`python -c "import yaml, os;f=open(\"$CONFIG_FILE\");print(os.path.realpath(yaml.safe_load(f)['bin_dir']));f.close()"`
DATADIR=`python -c "import yaml, os; f=open(\"$CONFIG_FILE\");print(os.path.realpath(yaml.safe_load(f)['data_dir']));f.close()"`
VENVDIR=`python -c "import yaml, os;f=open(\"config.yaml\");print(os.path.realpath(yaml.safe_load(f)['venv_dir']));f.close()"`
BINDIR=`python -c "import yaml, os;f=open(\"config.yaml\");print(os.path.realpath(yaml.safe_load(f)['bin_dir']));f.close()"`
DATADIR=`python -c "import yaml, os; f=open(\"config.yaml\");print(os.path.realpath(yaml.safe_load(f)['data_dir']));f.close()"`

# load Python virtual environment
source "$VENVDIR/bin/activate"
Expand Down
2 changes: 1 addition & 1 deletion requirements-pipes.txt
@@ -1,5 +1,5 @@
boto==2.38.0
filechunkio==1.6
snakemake==3.5.1
snakemake==3.5.5
yappi==0.94
PyYAML==3.11

0 comments on commit e8c20ce

Please sign in to comment.