Skip to content

Commit

Permalink
rename output_links/ to out/ for brevity
Browse files Browse the repository at this point in the history
  • Loading branch information
mlin committed Jul 3, 2020
1 parent 8b1643d commit 5445500
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 29 deletions.
8 changes: 4 additions & 4 deletions WDL/runtime/config_templates/default.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ root = /
# in situ & read-only. Needed if tasks want to write/move/rename input files, but costs time and
# disk space. --copy-input-files
copy_input_files = false
# Each succeeded run directory has an "output_links/" folder containing (by default) a symbolic
# link to each output file in its original working location. If output_hardlinks is true, then
# output_links/ is populated with hardlinks instead of symlinks. Beware the potential confusion
# arising from files with multiple hardlinks! See also delete_work, below.
# Each succeeded run directory has an "out/" folder containing (by default) a symbolic link to each
# output file in its original working location. If output_hardlinks is true, then out/ is populated
# with hardlinks instead of symlinks. Beware the potential confusion arising from files with
# multiple hardlinks! See also delete_work, below.
output_hardlinks = false
# Delete task working directory upon completion. The task container's working directory is a
# bind-mounted host directory, so files written into it are left behind after the container is torn
Expand Down
8 changes: 3 additions & 5 deletions WDL/runtime/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -1305,7 +1305,7 @@ def map_files(v: Value.Base, dn: str) -> Value.Base:
assert os.path.isfile(hardlink)
newlink = os.path.join(dn, os.path.basename(v.value))
os.makedirs(dn, exist_ok=False)
if not hardlinks and path_really_within(hardlink, run_dir):
if not hardlinks and path_really_within(hardlink, os.path.dirname(run_dir)):
# make symlink relative
hardlink = os.path.relpath(hardlink, start=os.path.realpath(dn))
(os.link if hardlinks else os.symlink)(hardlink, newlink)
Expand Down Expand Up @@ -1344,13 +1344,11 @@ def map_files(v: Value.Base, dn: str) -> Value.Base:
v.value[key] = map_files(v.value[key], os.path.join(dn, key))
return v

os.makedirs(os.path.join(run_dir, "output_links"), exist_ok=False)
os.makedirs(os.path.join(run_dir, "out"), exist_ok=False)
return outputs.map(
lambda binding: Env.Binding(
binding.name,
map_files(
copy.deepcopy(binding.value), os.path.join(run_dir, "output_links", binding.name),
),
map_files(copy.deepcopy(binding.value), os.path.join(run_dir, "out", binding.name),),
)
)

Expand Down
10 changes: 5 additions & 5 deletions docs/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,19 +90,19 @@ The standard output from `miniwdl run` provides the subdirectory along with JSON
"assemble_refbased.assembly_length": 18865,
"assemble_refbased.assembly_length_unambiguous": 18865,
"assemble_refbased.assembly_mean_coverage": 94.95885858958806,
"assemble_refbased.assembly_fasta": "/tmp/viral-pipelines-2.1.0.2/20200604_132146_assemble_refbased/output_links/assembly_fasta/G5012.3.fasta",
"assemble_refbased.assembly_fasta": "/tmp/viral-pipelines-2.1.0.2/20200604_132146_assemble_refbased/out/assembly_fasta/G5012.3.fasta",
"assemble_refbased.reference_genome_length": 18959,
...
},
"dir": "/tmp/viral-pipelines-2.1.0.2/20200604_132146_assemble_refbased"
}
```

This is also stored in `outputs.json` in the subdirectory. For your convenience, miniwdl furthermore generates a symbolic link `_LAST` pointing to the timestamped subdirectory for most recent run; and an `output_links` directory tree containing symbolic links to the output files.
This is also stored in `outputs.json` in the subdirectory. For your convenience, miniwdl furthermore generates a symbolic link `_LAST` pointing to the timestamped subdirectory for most recent run; and an `out` directory tree containing symbolic links to the output files.

```
$ tree _LAST/output_links/
_LAST/output_links/
$ tree _LAST/out/
_LAST/out/
├── align_to_ref_merged_aligned_trimmed_only_bam
│   └── G5012.3.align_to_ref.trimmed.bam -> ../../call-merge_align_to_ref/work/G5012.3.align_to_ref.trimmed.bam
├── align_to_ref_merged_coverage_plot
Expand All @@ -126,7 +126,7 @@ _LAST/output_links/
└── G5012.3.fasta -> ../../call-call_consensus/work/G5012.3.fasta
```

The `output_links` are often more convenient to consume than the JSON, but they only capture outputs that are files. Individual tasks and sub-workflows run in their own nested subdirectories, each with a similar structure.
The `out` links are often more convenient to consume than the JSON, but they only capture outputs that are files. Individual tasks and sub-workflows run in their own nested subdirectories, each with a similar structure.

## Next steps

Expand Down
4 changes: 2 additions & 2 deletions docs/runner_reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ For tasks, the run directory also contains:
* `download/` with any files downloaded from URIs in task inputs
* `work/` the working directory mounted into the task container, where the command leaves its output files
* `stdout.txt` and `stderr.txt` from the task command, streamed as it runs.
* `output_links/` if the task succeeded, symbolic links to the individual output files, organized in a directory tree mirroring the WDL output values & associated JSON structure
* `out/` if the task succeeded, symbolic links to the individual output files, organized in a directory tree reflecting the WDL output declarations

For workflows,

* `workflow.log`
* `write_/` and `download/` as above
* subdirectories for each call to a task or sub-workflow, each structured similarly
* `output_links/` with links reaching into the call subdirectories where each output file was generated
* `out/` with links reaching into the call subdirectories where each output file was generated

The top-level run directory also contains:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ def task(cfg, logger, run_id, run_dir, task, **recv):
s3prefix = cfg["s3_progressive_upload"]["uri_prefix"]
assert s3prefix.startswith("s3://"), "MINIWDL__S3_PROGRESSIVE_UPLOAD__URI_PREFIX invalid"

# for each file under output_links
# for each file under out/
def _raise(ex):
raise ex

links_dir = os.path.join(run_dir, "output_links")
links_dir = os.path.join(run_dir, "out")
for (dn, subdirs, files) in os.walk(links_dir, onerror=_raise):
assert dn == links_dir or dn.startswith(links_dir + "/")
for fn in files:
Expand Down
10 changes: 5 additions & 5 deletions examples/upload_output_files.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/bin/bash

# This example illustrates a simple technique to automatically upload a workflow's output files to
# Amazon S3. It suggests recursively uploading the "output_links" directory generated by miniwdl
# run, which contains tidy symlinks to the output files in their original locations (typically
# scattered throughout the run subdirectories of individual tasks and sub-workflows).
# Amazon S3. It suggests recursively uploading the "out" directory generated by miniwdl run, which
# contains tidy symlinks to the output files in their original locations (typically scattered
# throughout the run subdirectories of individual tasks and sub-workflows).

S3_DEST="s3://YOUR-BUCKET/miniwdl_upload_output_files_test/"

Expand Down Expand Up @@ -46,10 +46,10 @@ EOF
# miniwdl run and capture its stdout JSON
miniwdl_stdout=$(miniwdl run /tmp/hello.wdl who=Alice who=Bob --dir=/tmp)

# upload output files to S3 by recursively walking the generated output_links directory and
# upload output files to S3 by recursively walking the generated out directory and
# following the symlinks therein
dir_to_upload=$(jq -r .dir <(echo "${miniwdl_stdout}"))
dir_to_upload="${dir_to_upload}/output_links/"
dir_to_upload="${dir_to_upload}/out/"
aws s3 sync --follow-symlinks "$dir_to_upload" "$S3_DEST"

# Rewrite File paths in the outputs JSON to the new S3 URIs.
Expand Down
2 changes: 1 addition & 1 deletion tests/applied/SARS-CoV-2.t
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ $miniwdl run viral-pipelines/pipes/WDL/tasks/tasks_ncbi_tools.wdl "SRA_ID=${SRR_
is "$?" "0" "fetch SRA run"

$miniwdl run viral-pipelines/pipes/WDL/workflows/assemble_denovo_with_isnv_calling.wdl \
"reads_unmapped_bam=${SRR_ID}/output_links/reads_ubam/${SRR_ID}.bam" \
"reads_unmapped_bam=${SRR_ID}/out/reads_ubam/${SRR_ID}.bam" \
filter_to_taxon.lastal_db_fasta=NC_045512.2.fa \
assemble.trim_clip_db=viral-pipelines/test/input/clipDb.fasta \
scaffold.reference_genome_fasta=NC_045512.2.fa \
Expand Down
10 changes: 5 additions & 5 deletions tests/runner.t
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ is "$(ls $f1)" "$f1" "task product brown file"
f1=$(jq -r '.["echo.out_f"][2]' taskrun/outputs.json)
is "$(basename $f1)" "fox" "task product fox"
is "$(ls $f1)" "$f1" "task product fox file"
is "$(ls taskrun/output_links/out_f/2)" "fox" "task product fox link"
is "$(ls taskrun/out/out_f/2)" "fox" "task product fox link"

cat << 'EOF' > sleep.wdl
version 1.0
Expand Down Expand Up @@ -128,7 +128,7 @@ is "$(ls $f1)" "$f1" "workflow product brown file"
f1=$(jq -r '.["echo.t.out_f"][2]' workflowrun/outputs.json)
is "$(basename $f1)" "fox" "workflow product fox"
is "$(ls $f1)" "$f1" "workflow product fox file"
is "$(ls workflowrun/output_links/t.out_f/2)" "fox" "workflow product fox link"
is "$(ls workflowrun/out/t.out_f/2)" "fox" "workflow product fox link"
is "$(cat workflowrun/rerun)" "pushd $DN && miniwdl run --dir workflowrun/. echo.wdl t.s=foo t.f=quick t.a_s=bar t.a_f=brown --empty a_s; popd"

cat << 'EOF' > scatter_echo.wdl
Expand All @@ -149,9 +149,9 @@ workflow echo {
EOF
MINIWDL__FILE_IO__OUTPUT_HARDLINKS=true $miniwdl run --dir scatterrun/. scatter_echo.wdl n=2 t.s=foo t.f=quick t.a_s=bar t.a_f=brown | tee stdout
is "$?" "0" "scatter run"
is "$(ls scatterrun/output_links/t.out_f/0/2)" "fox" "scatter product 0 fox link"
is "$(ls scatterrun/output_links/t.out_f/1/2)" "fox" "scatter product 1 fox link"
is "$(find scatterrun/output_links -type l | wc -l)" "0" "scatter product hardlinks"
is "$(ls scatterrun/out/t.out_f/0/2)" "fox" "scatter product 0 fox link"
is "$(ls scatterrun/out/t.out_f/1/2)" "fox" "scatter product 1 fox link"
is "$(find scatterrun/out -type l | wc -l)" "0" "scatter product hardlinks"
is "$(find scatterrun/ | xargs -n 1 stat -c %U | sort | uniq)" "$(whoami)" "scatter files all owned by $(whoami)"
cmp -s scatter_echo.wdl scatterrun/wdl/scatter_echo.wdl
is "$?" "0" "copy_source scatter_echo.wdl"
Expand Down

0 comments on commit 5445500

Please sign in to comment.