-
Notifications
You must be signed in to change notification settings - Fork 1
/
reprocess.sh
231 lines (200 loc) · 6.63 KB
/
reprocess.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#!/bin/bash
set -e
set -u
cd "$(dirname "$0")"
STUDY_TMI=10317
STUDY_HADZA=11358
STUDY_BUILTENV=10333.10423
STUDY_VERTEBRATES=11166
STUDY_SOL=11666
STUDY_FINRISK=12142
STUDY_HMPWGS=1926
STUDY_HMP2=11484
STUDY_MANARY=11405
STUDY_SANDBORN=11546
STUDY_MULTIPOP_16S_FECAL=${STUDY_HADZA}.850.2024.11993.10581.10352.11757.1481.12015.10052.1448.1718.10217.11210.10581
STUDY_MULTIPOP_WGS_FECAL=${STUDY_HADZA}.${STUDY_SOL}.${STUDY_FINRISK}.${STUDY_HMPWGS}.${STUDY_HMP2}.${STUDY_MANARY}.${STUDY_SANDBORN}
STUDY_LIFESTAGE_16S_FECAL=850.10297.10080.10300.12524.11076.11882.11884.12496.1454.10249.11937.2024.10581
STUDY_LIFESTAGE_WGS_FECAL=${STUDY_HADZA}.${STUDY_SOL}.${STUDY_FINRISK}.${STUDY_HMPWGS}.${STUDY_HMP2}.${STUDY_MANARY}
TYPE_16S=16S
TYPE_WGS=WGS
SAMPLETYPE_GUT=gut
SAMPLETYPE_ORAL=oral
SAMPLETYPE_SKIN=skin
SAMPLETYPE_ALL=allsamples
DATASET_TMI=tmi
DATASET_HADZA=hadza
DATASET_MULTIPOP=multipop
DATASET_LIFESTAGE=lifestage
DATASET_BUILTENV=builtenv
DATASET_VERTERATES=vertebrates
HUMAN_GUT=human-gut
HUMAN_SKIN=human-skin
HUMAN_ORAL=human-oral
HUMAN_MIXED=${HUMAN_GUT}.${HUMAN_ORAL}.${HUMAN_SKIN}
BUILTENV="built environment"
BUILTENV_HUMAN_MIXED="${HUMAN_MIXED}.${BUILTENV}"
VERTEBRATE=host-associated
HOST_GUT=${HUMAN_GUT}.${VERTEBRATE}
mkdir -p ../results
pushd ../results
datetag=$(date +"%d%b%Y")
if [[ -d ${datetag} ]];
then
echo "${datetag} result set already exists!"
exit 1
fi
mkdir ${datetag}
rm -f current
ln -s ${datetag} current
mkdir -p ${datetag}/${HUMAN_GUT}
mkdir -p ${datetag}/${HUMAN_SKIN}
mkdir -p ${datetag}/${HUMAN_ORAL}
mkdir -p ${datetag}/${HUMAN_MIXED}
mkdir -p ${datetag}/${BUILTENV_HUMAN_MIXED// /_}
mkdir -p ${datetag}/${HOST_GUT}
popd
export QIIME_VERSION=2022.2
SUBMIT_DELAY=10
# join strings in an array, see
# https://stackoverflow.com/a/17841619
function join_by { local IFS="$1"; shift; echo "$*"; }
# create an array to house our jobs of interest
declare -a jobs
# Microsetta WGS specific
export TMI_DATATYPE=$TYPE_WGS
export ENV_PACKAGE=$HUMAN_GUT
export STUDIES=$STUDY_TMI
export TMI_TITLE="Microsetta WGS fecal samples"
export TMI_NAME=${DATASET_TMI}-${TYPE_WGS}-${SAMPLETYPE_GUT}
echo $TMI_NAME
jobs+=($(sh submit_all.sh))
sleep ${SUBMIT_DELAY}
export TMI_DATATYPE=$TYPE_WGS
export ENV_PACKAGE=$HUMAN_SKIN
export STUDIES=$STUDY_TMI
export TMI_TITLE="Microsetta WGS skin samples"
export TMI_NAME=${DATASET_TMI}-${TYPE_WGS}-${SAMPLETYPE_SKIN}
echo $TMI_NAME
#jobs+=($(sh submit_all.sh))
sleep ${SUBMIT_DELAY}
export TMI_DATATYPE=$TYPE_WGS
export ENV_PACKAGE=$HUMAN_ORAL
export STUDIES=$STUDY_TMI
export TMI_TITLE="Microsetta WGS oral samples"
export TMI_NAME=${DATASET_TMI}-${TYPE_WGS}-${SAMPLETYPE_ORAL}
echo $TMI_NAME
#jobs+=($(sh submit_all.sh))
sleep ${SUBMIT_DELAY}
export TMI_DATATYPE=$TYPE_WGS
export ENV_PACKAGE=$HUMAN_MIXED
export STUDIES=$STUDY_TMI
export TMI_TITLE="Microsetta WGS all samples"
export TMI_NAME=${DATASET_TMI}-${TYPE_WGS}-${SAMPLETYPE_ALL}
echo $TMI_NAME
jobs+=($(sh submit_all.sh))
sleep ${SUBMIT_DELAY}
export TMI_DATATYPE=$TYPE_WGS
export ENV_PACKAGE=$HUMAN_GUT
export STUDIES=$STUDY_HADZA
export TMI_TITLE="Hadza WGS fecal samples"
export TMI_NAME=${DATASET_HADZA}-${TYPE_WGS}-${SAMPLETYPE_GUT}
echo $TMI_NAME
#jobs+=($(sh submit_all.sh))
sleep ${SUBMIT_DELAY}
export TMI_DATATYPE=$TYPE_WGS
export ENV_PACKAGE=$HUMAN_GUT
export STUDIES=${STUDY_TMI}.${STUDY_MULTIPOP_WGS_FECAL}
export TMI_TITLE="Meta-analysis WGS fecal samples multipop"
export TMI_NAME=${DATASET_MULTIPOP}-${TYPE_WGS}-${SAMPLETYPE_GUT}
echo $TMI_NAME
jobs+=($(sh submit_all.sh))
sleep ${SUBMIT_DELAY}
export TMI_DATATYPE=$TYPE_WGS
export ENV_PACKAGE=$HUMAN_GUT
export STUDIES=${STUDY_TMI}.${STUDY_LIFESTAGE_WGS_FECAL}
export TMI_TITLE="Meta-analysis WGS fecal samples lifestage"
export TMI_NAME=${DATASET_LIFESTAGE}-${TYPE_WGS}-${SAMPLETYPE_GUT}
echo $TMI_NAME
jobs+=($(sh submit_all.sh))
sleep ${SUBMIT_DELAY}
# Microsetta 16S specific
export TMI_DATATYPE=$TYPE_16S
export ENV_PACKAGE=$HUMAN_SKIN
export STUDIES=$STUDY_TMI
export TMI_TITLE="Microsetta 16S skin samples"
export TMI_NAME=${DATASET_TMI}-${TYPE_16S}-${SAMPLETYPE_SKIN}
echo $TMI_NAME
jobs+=($(sh submit_all.sh))
sleep ${SUBMIT_DELAY}
export TMI_DATATYPE=$TYPE_16S
export ENV_PACKAGE=$HUMAN_ORAL
export STUDIES=$STUDY_TMI
export TMI_TITLE="Microsetta 16S oral samples"
export TMI_NAME=${DATASET_TMI}-${TYPE_16S}-${SAMPLETYPE_ORAL}
echo $TMI_NAME
jobs+=($(sh submit_all.sh))
sleep ${SUBMIT_DELAY}
export TMI_DATATYPE=$TYPE_16S
export ENV_PACKAGE=$HUMAN_GUT
export STUDIES=$STUDY_TMI
export TMI_TITLE="Microsetta 16S fecal samples"
export TMI_NAME=${DATASET_TMI}-${TYPE_16S}-${SAMPLETYPE_GUT}
echo $TMI_NAME
jobs+=($(sh submit_all.sh))
sleep ${SUBMIT_DELAY}
export TMI_DATATYPE=$TYPE_16S
export ENV_PACKAGE=$HUMAN_MIXED
export STUDIES=$STUDY_TMI
export TMI_TITLE="Microsetta 16S all samples"
export TMI_NAME=${DATASET_TMI}-${TYPE_16S}-${SAMPLETYPE_ALL}
echo $TMI_NAME
jobs+=($(sh submit_all.sh))
sleep ${SUBMIT_DELAY}
# Multipop gut 16S
export TMI_DATATYPE=$TYPE_16S
export ENV_PACKAGE=$HUMAN_GUT
export STUDIES=${STUDY_TMI}.${STUDY_MULTIPOP_16S_FECAL}
export TMI_TITLE="Meta-analysis 16S fecal samples"
export TMI_NAME=${DATASET_MULTIPOP}-${TYPE_16S}-${SAMPLETYPE_GUT}
echo $TMI_NAME
jobs+=($(sh submit_all.sh))
sleep ${SUBMIT_DELAY}
export TMI_DATATYPE=$TYPE_16S
export ENV_PACKAGE=$HUMAN_GUT
export STUDIES=${STUDY_TMI}.${STUDY_LIFESTAGE_16S_FECAL}
export TMI_TITLE="Meta-analysis 16S lifestage fecal samples"
export TMI_NAME=${DATASET_LIFESTAGE}-${TYPE_16S}-${SAMPLETYPE_GUT}
echo $TMI_NAME
jobs+=($(sh submit_all.sh))
sleep ${SUBMIT_DELAY}
export TMI_DATATYPE=$TYPE_16S
export ENV_PACKAGE=$HUMAN_GUT
export STUDIES=$STUDY_HADZA
export TMI_TITLE="Hadza 16S fecal samples"
export TMI_NAME=${DATASET_HADZA}-${TYPE_16S}-${SAMPLETYPE_GUT}
echo $TMI_NAME
#jobs+=($(sh submit_all.sh))
sleep ${SUBMIT_DELAY}
# non-human comparisons
export TMI_DATATYPE=$TYPE_16S
export ENV_PACKAGE=$HOST_GUT
export STUDIES=${STUDY_TMI}.${STUDY_VERTEBRATES}
export TMI_TITLE="Meta-analysis 16S vertebrate fecal"
export TMI_NAME=${DATASET_VERTERATES}-${TYPE_16S}-${SAMPLETYPE_GUT}
echo $TMI_NAME
#jobs+=($(sh submit_all.sh))
sleep ${SUBMIT_DELAY}
# Built environment
export TMI_DATATYPE=$TYPE_16S
export ENV_PACKAGE=$BUILTENV_HUMAN_MIXED
export STUDIES=${STUDY_TMI}.${STUDY_BUILTENV}
export TMI_TITLE="Meta-analysis 16S built environment and multi-bodysite samples"
export TMI_NAME=${DATASET_BUILTENV}-${TYPE_16S}-${SAMPLETYPE_ALL}
echo $TMI_NAME
jobs+=($(sh submit_all.sh))
sleep ${SUBMIT_DELAY}
dependency=$(join_by : ${jobs[@]})
cwd=$(pwd)
sbatch_script_common="#!/bin/bash\ncd ${cwd}\n"
echo -e "${sbatch_script_common} bash 08.cleanup.sh" | sbatch --dependency=afterok:${dependency} --export=DATETAG=${datetag} -N 1 -c 1 --mem=1g --time=1:00:00 --job-name TMI-cleanup