-
Notifications
You must be signed in to change notification settings - Fork 66
/
easy-deploy-viral-ngs.sh
388 lines (345 loc) · 14.7 KB
/
easy-deploy-viral-ngs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
#!/bin/bash
#set -e -o pipefail
STARTING_DIR=$(pwd)
# way to get the absolute path to this script that should
# work regardless of whether or not this script has been sourced
# Find original directory of bash script, resovling symlinks
# http://stackoverflow.com/questions/59895/can-a-bash-script-tell-what-directory-its-stored-in/246128#246128
SOURCE="${BASH_SOURCE[0]}"
while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
if [[ "$OSTYPE" == "darwin"* ]]; then
SOURCE="$(readlink "$SOURCE")"
else
SOURCE="$(readlink -f "$SOURCE")"
fi
[[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
done
SCRIPT=$SOURCE
SCRIPT_DIRNAME="$(dirname "$SOURCE")"
SCRIPTPATH="$(cd -P "$(echo $SCRIPT_DIRNAME)" &> /dev/null && pwd)"
SCRIPT="$SCRIPTPATH/$(basename "$SCRIPT")"
CONDA_PREFIX_LENGTH_LIMIT=80
CONTAINING_DIR="viral-ngs-etc"
VIRAL_NGS_DIR="viral-ngs"
CONDA_ENV_BASENAME="conda-env"
CONDA_ENV_CACHE="conda-cache"
PROJECTS_DIR="projects"
MINICONDA_DIR="mc3"
VIRAL_CONDA_ENV_PATH="$SCRIPTPATH/$CONTAINING_DIR/$CONDA_ENV_BASENAME"
VIRAL_CONDA_CACHE_PATH="/broad/hptmp/$(whoami)/$CONTAINING_DIR/$CONDA_ENV_CACHE"
PROJECTS_PATH="$SCRIPTPATH/$CONTAINING_DIR/$PROJECTS_DIR"
VIRAL_NGS_PATH="$SCRIPTPATH/$CONTAINING_DIR/$VIRAL_NGS_DIR"
MINICONDA_PATH="$SCRIPTPATH/$CONTAINING_DIR/$MINICONDA_DIR"
# part of the prefix length hack
#ALT_CONDA_LOCATION="/home/unix/$(whoami)/.vgs-miniconda-pathhack"
# determine if this script has been sourced
# via: http://stackoverflow.com/a/28776166/2328433
([[ -n $ZSH_EVAL_CONTEXT && $ZSH_EVAL_CONTEXT =~ :file$ ]] ||
[[ -n $KSH_VERSION && $(cd "$(dirname -- "$0")" &&
printf '%s' "${PWD%/}/")$(basename -- "$0") != "${.sh.file}" ]] ||
[[ -n $BASH_VERSION && $0 != "$BASH_SOURCE" ]]) && sourced=1 || sourced=0
# TODO: check that we are on a machine with sufficient RAM
function strLen() {
local bytlen sreal oLang=$LANG
LANG=C
bytlen=${#1}
printf -v sreal %q "$1"
LANG=$oLang
return $bytlen # int can be returned
}
strLen $MINICONDA_PATH &> /dev/null
current_prefix_length=$?
if [ $current_prefix_length -ge $CONDA_PREFIX_LENGTH_LIMIT ]; then
echo "ERROR: The conda path to be created by this script is too long to work with conda ($current_prefix_length characters):"
echo "$MINICONDA_PATH"
echo "This is a known bug in conda ($CONDA_PREFIX_LENGTH_LIMIT character limit): "
echo "https://github.com/conda/conda-build/pull/877"
echo "To prevent this error, move this script higher in the filesystem hierarchy."
exit 1
# semi-working symlink hack below
# echo ""
# echo "To get around this we are creaing a symlink $ALT_CONDA_LOCATION and installing there (though the files will reside in the correct location)."
# mkdir -p "$(dirname $ALT_CONDA_LOCATION)"
# mkdir -p "$MINICONDA_PATH"
# if [ ! -L "$ALT_CONDA_LOCATION" ]; then
# ln -s "$MINICONDA_PATH" "$ALT_CONDA_LOCATION"
# echo "ln -s \"$MINICONDA_PATH\" \"$ALT_CONDA_LOCATION\""
# else
# touch -h "$ALT_CONDA_LOCATION"
# fi
# export MINICONDA_PATH="$ALT_CONDA_LOCATION"
fi
function set_locale(){
export LANG="$1"
export LC_CTYPE="$1"
export LC_NUMERIC="$1"
export LC_TIME="$1"
export LC_COLLATE="$1"
export LC_MONETARY="$1"
export LC_MESSAGES="$1"
export LC_PAPER="$1"
export LC_NAME="$1"
export LC_ADDRESS="$1"
export LC_TELEPHONE="$1"
export LC_MEASUREMENT="$1"
export LC_IDENTIFICATION="$1"
export LC_ALL="$1"
}
if [[ "$OSTYPE" == "darwin"* ]]; then
set_locale "en_US.UTF-8"
else
set_locale "en_US.utf8"
fi
function prepend_miniconda(){
if [ -d "$MINICONDA_PATH/bin" ]; then
echo "Miniconda installed."
echo "Prepending miniconda to PATH..."
export PATH="$MINICONDA_PATH/bin:$PATH"
hash -r
# update to the latest conda this way, since the shell script
# is often months out of date
conda update -y conda
else
echo "Miniconda directory not found."
exit 1
fi
}
function install_miniconda(){
if [ -d "$MINICONDA_PATH/bin" ]; then
echo "Miniconda directory exists."
else
echo "Downloading and installing Miniconda..."
if [[ "$(python -c 'import sys; print(sys.version_info[0])')" == 2* ]]; then
if [[ "$(python -c 'import os; print(os.uname()[0])')" == "Darwin" ]]; then
miniconda_url=https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh
else
miniconda_url=https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh
fi
else
if [[ "$(python -c 'import os; print(os.uname()[0])')" == "Darwin" ]]; then
miniconda_url=https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
else
miniconda_url=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
fi
fi
wget $miniconda_url -O Miniconda3-latest-x86_64.sh -P $(dirname $MINICONDA_PATH)/
chmod +x $(dirname $MINICONDA_PATH)/Miniconda3-latest-x86_64.sh
$(dirname $MINICONDA_PATH)/Miniconda3-latest-x86_64.sh -b -f -p "$MINICONDA_PATH"
rm $(dirname $MINICONDA_PATH)/Miniconda3-latest-x86_64.sh
fi
if [ -d "$MINICONDA_PATH/bin" ]; then
prepend_miniconda
else
echo "It looks like the Miniconda installation failed"
exit 1
fi
}
function create_project(){
echo "Populating project directory..."
# first arg is project folder name
starting_dir=$(pwd)
mkdir -p $PROJECTS_PATH
cd $PROJECTS_PATH
mkdir $1
cd $1
mkdir data log reports tmp
cd data
mkdir 00_raw 01_cleaned 01_per_sample 02_align_to_self 02_assembly 03_align_to_ref 03_interhost 04_intrahost
cd ../
touch samples-depletion.txt
touch samples-assembly.txt
touch samples-runs.txt
touch samples-assembly-failures.txt
cp $VIRAL_NGS_PATH/pipes/config.yaml ../../$VIRAL_NGS_DIR/pipes/Snakefile ./
ln -s "$VIRAL_NGS_PATH/" "$(pwd)/bin"
ln -s "$VIRAL_CONDA_ENV_PATH/" "$(pwd)/conda-env"
ln -s "$MINICONDA_PATH/" "$(pwd)/mc3"
ln -s "$VIRAL_NGS_PATH/pipes/Broad_UGER/run-pipe.sh" "$(pwd)/run-pipe_UGER.sh"
ln -s "$VIRAL_NGS_PATH/pipes/Broad_LSF/run-pipe.sh" "$(pwd)/run-pipe_LSF.sh"
cd $starting_dir
}
function activate_env(){
if [ -d "$SCRIPTPATH/$CONTAINING_DIR" ]; then
cd $SCRIPTPATH
else
echo "viral-ngs parent directory not found: $CONTAINING_DIR not found."
echo "Have you run the setup?"
echo "Usage: $0 setup"
cd $STARTING_DIR
return 1
fi
if [ -d "$VIRAL_CONDA_ENV_PATH" ]; then
if [ -z "$CONDA_DEFAULT_ENV" ]; then
echo "Activating viral-ngs environment..."
prepend_miniconda
source activate $VIRAL_CONDA_ENV_PATH
else
if [[ "$CONDA_DEFAULT_ENV" != "$VIRAL_CONDA_ENV_PATH" ]]; then
echo "It looks like a conda environment is already active,"
echo "however it is not the viral-ngs environment."
echo "To use viral-ngs with your project, deactivate the"
echo "current environment and then source this file."
echo "Example: source deactivate && source $(basename $SCRIPT) load"
else
echo "The viral-ngs environment is already active."
fi
return 0
fi
else
echo "$VIRAL_CONDA_ENV_PATH/ does not exist. Exiting."
cd $STARTING_DIR
return 1
fi
}
function print_usage(){
echo "Usage: $(basename $SCRIPT) {load,create-project,setup}"
}
if [ $# -eq 0 ]; then
print_usage
if [[ $sourced -eq 0 ]]; then
exit 1
else
return 1
fi
else
case "$1" in
"setup")
if [ $# -eq 1 ]; then
if [[ $sourced -eq 1 ]]; then
echo "ABORTING. $(basename $SCRIPT) must not be sourced during setup"
echo "Usage: $(basename $SCRIPT) setup"
return 1
else
if [ ! -z "$CONDA_DEFAULT_ENV" ]; then
echo "The viral-ngs setup cannot be run while a conda environment is active."
echo "The current environment must first be disabled via 'source deactivate'"
exit 1
fi
mkdir -p $SCRIPTPATH/$CONTAINING_DIR
cd $SCRIPTPATH/$CONTAINING_DIR
install_miniconda
if [ ! -d "$VIRAL_CONDA_ENV_PATH" ]; then
conda create -c bioconda -y -p $VIRAL_CONDA_ENV_PATH viral-ngs
else
echo "$VIRAL_CONDA_ENV_PATH/ already exists. Skipping python venv setup."
fi
# the conda-installed viral-ngs folder resides within the
# opt/ directory of the conda environment, but it contains
# a version number, so we'll ls and grep for the name
# and assume it's the first one to show up
# TODO: parse out the version number from
# conda list
if [ ! -L "$VIRAL_NGS_PATH" ]; then
EXPECTED_VIRAL_NGS_VERSION=$(conda list | grep viral-ngs | awk -F" " '{print $2}')
VIRAL_NGS_CONDA_PATH="$VIRAL_CONDA_ENV_PATH/opt/"$(ls -1 "$VIRAL_CONDA_ENV_PATH/opt/" | grep -m 1 "viral-ngs")
if [ -d "$VIRAL_NGS_CONDA_PATH" ]; then
ln -s "$VIRAL_NGS_CONDA_PATH" "$VIRAL_NGS_PATH"
else
echo "Could not find viral-ngs install in conda env:"
echo "$VIRAL_NGS_CONDA_PATH"
exit 1
fi
else
echo "$VIRAL_NGS_DIR/ symlink already exists. Skipping link."
fi
activate_env
# install tools
py.test $VIRAL_NGS_PATH/test/unit/test_tools.py
# get the version of gatk expected based on the installed conda package
EXPECTED_GATK_VERSION=$(conda list | grep gatk | awk -F" " '{print $2}')
if [ -z "$GATK_JAR_PATH" ]; then
# if the env var is not set, try to get the jar location using the default Broad path
if [[ "$(dnsdomainname)" == *"broadinstitute.org" || "$HOSTNAME" == *".broadinstitute.org" || "$DOMAINNAME" == "broadinstitute.org" ]]; then
echo "This script is being run on a Broad Institute system."
echo "Trying to find GATK..."
export GATK_JAR_PATH=$(ls /humgen/gsa-hpprojects/GATK/bin &> /dev/null && sleep 5 && find /humgen/gsa-hpprojects/GATK/bin/GenomeAnalysisTK-$EXPECTED_GATK_VERSION-* -maxdepth 0 -type d)/GenomeAnalysisTK.jar
fi
fi
# if the gatk jar file exists, call gatk-register
if [ -e "$GATK_JAR_PATH" ]; then
echo "GATK found: $GATK_JAR_PATH"
gatk-register $GATK_JAR_PATH
else
echo "GATK jar could not be found on this system for GATK version $EXPECTED_GATK_VERSION"
echo "Please activate the viral-ngs conda environment and 'gatk-register /path/to/GenomeAnalysisTK.jar'"
exit 0
fi
echo ""
if [ ! -z "$NOVOALIGN_PATH" ]; then
novoalign-license-register "$NOVOALIGN_PATH/novoalign.lic"
elif [ ! -z "$NOVOALIGN_LICENSE_PATH" ]; then
novoalign-license-register "$NOVOALIGN_LICENSE_PATH"
else
echo "No Novoalign license found via NOVOALIGN_PATH or NOVOALIGN_LICENSE_PATH"
echo "Please activate the viral-ngs conda environment and run 'novoalign-license-register /path/to/novoalign.lic'"
fi
echo "Setup complete. Do you want to start a project? Run:"
echo "$0 create-project <project_name>"
echo ""
fi
else
echo "Usage: $(basename $SCRIPT) setup"
if [[ $sourced -eq 0 ]]; then
exit 1
else
return 1
fi
fi
;;
"load")
if [ $# -eq 1 ]; then
if [[ $sourced -eq 0 ]]; then
echo "ABORTING. $(basename $SCRIPT) must be sourced."
echo "Usage: source $(basename $SCRIPT) load"
else
activate_env
ls -lah
return 0
fi
else
echo "Usage: source $(basename $SCRIPT) load"
if [[ $sourced -eq 0 ]]; then
exit 1
else
return 1
fi
fi
;;
"create-project")
if [ $# -ne 2 ]; then
echo "Usage: $(basename $SCRIPT) create-project <project_name>"
if [[ $sourced -eq 0 ]]; then
exit 1
else
return 1
fi
else
if [ ! -d "$PROJECTS_PATH/$2" ]; then
create_project $2 && echo "Project created: $PROJECTS_PATH/$2" && echo "OK"
else
echo "WARNING: $PROJECTS_PATH/$2/ already exists."
echo "Skipping project creation."
fi
echo ""
if [[ "$CONDA_DEFAULT_ENV" != "$VIRAL_CONDA_ENV_PATH" ]]; then
echo "It looks like the vial-ngs environment is not active."
echo "To use viral-ngs with your project, source this file."
echo "Example: source $(basename $SCRIPT) load"
else
# if the viral-ngs environment is active and we have sourced this file
if [[ $sourced -eq 1 ]]; then
# change to the project directory
if [ -d "$PROJECTS_PATH/$2" ]; then
cd "$PROJECTS_PATH/$2"
fi
return 0
fi
fi
fi
;;
*)
print_usage
;;
esac
fi