Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sis task 7 and 11 #5

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ status/
._*
.snapshot
.ipynb_checkpoints

# vim backup files:
.*.sw?
2 changes: 1 addition & 1 deletion batman.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ case "$1" in
addsamples)
mkdir -p --mode=2770 ${clusterdir}/${working}/samples/
cp -vrf --link ${clusterdir}/${sampleset}/*/ ${clusterdir}/${working}/samples/ ## failure: "no rule to create {SAMPLE}/extract/R1.fastq"
sort -u ${clusterdir}/${sampleset}/samples.{${lastmonth},${thismonth}}*.tsv > ${clusterdir}/${working}/samples.recent.tsv
cat ${clusterdir}/${sampleset}/samples.{${lastmonth},${thismonth}}*.tsv | sort -u > ${clusterdir}/${working}/samples.recent.tsv
sort -u ${clusterdir}/${sampleset}/samples.*.tsv > ${clusterdir}/${working}/samples.tsv
;;
vpipe)
Expand Down
14 changes: 8 additions & 6 deletions belfry
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ declare -A lab
: ${releasedir:?}
: ${storgrp:?}
: ${parallel:=16}
: ${parallelpull:=${parallel}}
: ${contimeout:=300}
: ${retries:=10}
: ${iotimeout:=300}
Expand Down Expand Up @@ -271,7 +272,7 @@ case "$1" in
conda deactivate
;;
uploadrequests)
echo "Hangling raw-read upload requests for Viollier"
echo "Handling raw-read upload requests for Viollier"
. $baseconda/miniconda3/bin/activate ""
${scriptdir}/handle_request_raw_viollier -c viollier.conf
conda deactivate
Expand Down Expand Up @@ -310,6 +311,7 @@ case "$1" in
fi
if (( ${lab[fgcz]} )); then
# # TODO config file
. <(grep '^google_sheet_patches=' fgcz.conf)
# staging=staging-fcgz
#
# # safety check
Expand All @@ -319,7 +321,7 @@ case "$1" in
# exit 2
# fi
#
${scriptdir}/google_sheet_patches
(( google_sheet_patches )) && ${scriptdir}/google_sheet_patches
# ${scriptdir}/sort_samples_bfabric_tsv -c fgcz.conf ${force} ${recent} && bash ${basedir}/${staging}/movedatafiles.sh && cp -rf --link ${basedir}/${staging}/* ${basedir}/${sampleset}/ || fail=1
${scriptdir}/sort_samples_bfabric_tsv -c fgcz.conf ${force} ${recent} && bash ${basedir}/${sampleset}/movedatafiles.sh || fail=1
else
Expand Down Expand Up @@ -447,8 +449,8 @@ case "$1" in
${basedir}/${working}/ || (( ++err ))
echo "samples:"
cut -s --fields=1 "${sheets[@]}"|sort -u| \
gawk -v P=$(( parallel * 4 )) '{i=(NR-1);b=i%P;o[b]=(o[b] " \"" $1 "\"")};END{for(i=0;i<P;i++){printf("%s\0",o[i])}}'| \
xargs -0 -P $parallel -I '{@LIST@}' -- \
gawk -v P=$(( parallelpull * 4 )) '{i=(NR-1);b=i%P;o[b]=(o[b] " \"" $1 "\"")};END{for(i=0;i<P;i++){printf("%s\0",o[i])}}'| \
xargs -0 -P $parallelpull -I '{@LIST@}' -- \
bash -c "callpullrsync_noshorah {@LIST@} " || (( ++err ))
if (( err )); then
echo "Error: ${err} rsync job(s) failed"
Expand Down Expand Up @@ -502,8 +504,8 @@ case "$1" in
${basedir}/${working}/ || (( ++err ))
echo "samples:"
cut -s --fields=1 "${sheets[@]}"|sort -u| \
gawk -v P=$(( parallel * 4 )) '{i=(NR-1);b=i%P;o[b]=(o[b] " \"" $1 "\"")};END{for(i=0;i<P;i++){printf("%s\0",o[i])}}'| \
xargs -0 -P $parallel -I '{@LIST@}' -- \
gawk -v P=$(( parallelpull * 4 )) '{i=(NR-1);b=i%P;o[b]=(o[b] " \"" $1 "\"")};END{for(i=0;i<P;i++){printf("%s\0",o[i])}}'| \
xargs -0 -P $parallelpull -I '{@LIST@}' -- \
bash -c "callpullrsync {@LIST@} " || (( ++err ))
if (( err )); then
echo "Error: ${err} rsync job(s) failed"
Expand Down
21 changes: 15 additions & 6 deletions carillon
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ lockfile=${statusdir}/carillon_lock
remote_batman="ssh -ni ${HOME}/.ssh/id_ed25519_batman euler.ethz.ch --"
remote_belfry="ssh -ni ${HOME}/.ssh/id_ed25519_belfry bs-bewi09.ethz.ch --"


touch ${statusdir}/oh_hai_im_lopping

if false; then # disable block
#
Expand Down Expand Up @@ -61,10 +61,11 @@ echo '========='
echo 'Data sync'
echo '========='

${scriptdir}/belfry synch2030 # --recent : there are no old archive at Health2030 anyway
${scriptdir}/belfry syncviollier # --recent : there are no old archive, we're even supposed to perform the deletion
${scriptdir}/belfry syncfgcz --recent
${scriptdir}/belfry syncopenbis --recent
[[ -n $skipsync ]] && echo "Hack: ${skipsync} will be skiped."
[[ $skipsync != h2030 ]] && ${scriptdir}/belfry synch2030 # --recent : there are no old archive at Health2030 anyway
[[ $skipsync != violler ]] && ${scriptdir}/belfry syncviollier # --recent : there are no old archive, we're even supposed to perform the deletion
[[ $skipsync != fgcz ]] && ${scriptdir}/belfry syncfgcz --recent
[[ $skipsync != gfb ]] && ${scriptdir}/belfry syncopenbis --recent
${scriptdir}/belfry sortsamples --recent $([[ ${statusdir}/syncopenbis_last -nt ${statusdir}/syncopenbis_new ]] && echo '--summary')

# uploading requests require prior successful download
Expand Down Expand Up @@ -276,8 +277,16 @@ if [[ ( ( ! -e ${statusdir}/vpipe_ended ) && ( ! -e ${statusdir}/vpipe_started )
fi
done

# are we allowed to submit jobs ?
if (( donotsubmit )); then
echo -e '\e[35;1mWill NOT submit jobs\e[0m...'
if (( mustrun )); then
echo '...but there are new jobs that should be started'
else
echo '...and there is nothing to run anyway'
fi
# start jobs ?
if (( mustrun )); then
elif (( mustrun )); then
echo 'Will start new job'

# Sanity check
Expand Down
2 changes: 1 addition & 1 deletion cojac
6 changes: 4 additions & 2 deletions fgcz.conf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ srvport=666
# experiment name in sftp store
#expname=/projects/p23212
expname=/projects
projlist=( 'p23224' 'p23212' 'p24991' 'p25650' )
projlist=( 'p23224' 'p23212' 'p24991' 'p25650' 'p26177' )
# base dircetory (default: cwd)
basedir=/links/shared/covid19-pangolin/backup
# sub-directory to hold the unsorted downloaded datasets
Expand All @@ -18,7 +18,7 @@ download=bfabric-downloads
sampleset=sampleset
# delay after which orders aren't considered for merging anymore
fusedays=9
nofuselist=iSeq195_COV19_p23212_o25355,NOV816_COV19_p23212_o25355,NovaSeq_20210917_NOV929_o26053_DataDelivery,NovaSeq_20210917_NOV928_o26053_DataDelivery,NovaSeq_20210910_NOV920_o26053_DataDelivery
nofuselist=iSeq195_COV19_p23212_o25355,NOV816_COV19_p23212_o25355,NovaSeq_20210917_NOV929_o26053_DataDelivery,NovaSeq_20210917_NOV928_o26053_DataDelivery,NovaSeq_20210910_NOV920_o26053_DataDelivery,NovaSeq_20211119_NOV1039_o26703_DataDelivery,NovaSeq_20211119_NOV1039_o26712_DataDelivery,NovaSeq_20211122_NOV1041_o26703_DataDelivery,NovaSeq_20211122_NOV1041_o26712_DataDelivery
## linking instead of copying ?
## --reflink for CoW filesystems (ZFS, BTRFS)
## --hardlink for most unix-like filesystems
Expand All @@ -34,3 +34,5 @@ iotimeout=300
# mark bad runs
badlist=MiSeq_210122_MS562_o23881_DataDelivery,Fastqc_53742_2021-01-27--15-56-09,MiSeq_210129_MS564_o23775_DataDelivery,NOV657_COV19_o24384,MiSeq_210317_MS586_ww_o24329_o24329_DataDelivery,NovaSeq_20210806_NOV873_o25679_DataDelivery,NovaSeq_20210806_NOV873_o25697_DataDelivery
forcelist=NOV641,NOV674_COV19_repeats,NOV741_COVID
# do we download patchmap lists from google sheet?
google_sheet_patches=0
4 changes: 3 additions & 1 deletion gfb.conf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ fileserver=bs-openbis04.ethz.ch
srvport=2222
# address of the _Web_ server whose API we are calling
apiurl=https://openbis-dsu.ethz.ch/openbis/
# prefix for SFTP servers that server file from a special subdirectory (e.g.: /pub)
prefix=/DEFAULT
# experiment name in OpenBIS
expname=/BSSE_STADLER_COVID/STADLER_COVID/COVID_V
# the type for which need to search the experiment thourgh
Expand All @@ -26,7 +28,7 @@ suffix=_MM_1
# group on the storage (inside download and sampleset)
storgrp=bsse-covid19-pangolin@d.ethz.ch
# parallel copy jobs (default: 16)
parallel=16
parallel=8
# SSH connection timeout (default: 300)
contimeout=300
# IO timeout (default: 300)
Expand Down
6 changes: 6 additions & 0 deletions python/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
venv*
*.pyc
__pycache__
.*.sw?
.defaults
sars_cov_2.db
20 changes: 20 additions & 0 deletions python/init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash
#
# init.sh
# Copyright (C) 2021 Uwe Schmitt <uwe.schmitt@id.ethz.ch>
#
# Distributed under terms of the MIT license.
#

set -e

# https://stackoverflow.com/questions/59895/
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"

cd ${SCRIPT_DIR}

test -d venv || python -m venv venv

source venv/bin/activate

pip install requirements.txt
2 changes: 2 additions & 0 deletions python/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
sqlalchemy
psycopg2
Loading