rename output_links/ to out/ for brevity

chanzuckerberg · Jul 3, 2020 · 5445500 · 5445500
1 parent 8b1643d
commit 5445500
Show file tree

Hide file tree

Showing 8 changed files with 27 additions and 29 deletions.
diff --git a/WDL/runtime/config_templates/default.cfg b/WDL/runtime/config_templates/default.cfg
@@ -40,10 +40,10 @@ root = /
 # in situ & read-only. Needed if tasks want to write/move/rename input files, but costs time and
 # disk space. --copy-input-files
 copy_input_files = false
-# Each succeeded run directory has an "output_links/" folder containing (by default) a symbolic
-# link to each output file in its original working location. If output_hardlinks is true, then
-# output_links/ is populated with hardlinks instead of symlinks. Beware the potential confusion
-# arising from files with multiple hardlinks! See also delete_work, below.
+# Each succeeded run directory has an "out/" folder containing (by default) a symbolic link to each
+# output file in its original working location. If output_hardlinks is true, then out/ is populated
+# with hardlinks instead of symlinks. Beware the potential confusion arising from files with
+# multiple hardlinks! See also delete_work, below.
 output_hardlinks = false
 # Delete task working directory upon completion. The task container's working directory is a
 # bind-mounted host directory, so files written into it are left behind after the container is torn

diff --git a/WDL/runtime/task.py b/WDL/runtime/task.py
@@ -1305,7 +1305,7 @@ def map_files(v: Value.Base, dn: str) -> Value.Base:
                 assert os.path.isfile(hardlink)
                 newlink = os.path.join(dn, os.path.basename(v.value))
                 os.makedirs(dn, exist_ok=False)
-                if not hardlinks and path_really_within(hardlink, run_dir):
+                if not hardlinks and path_really_within(hardlink, os.path.dirname(run_dir)):
                     # make symlink relative
                     hardlink = os.path.relpath(hardlink, start=os.path.realpath(dn))
                 (os.link if hardlinks else os.symlink)(hardlink, newlink)
@@ -1344,13 +1344,11 @@ def map_files(v: Value.Base, dn: str) -> Value.Base:
                 v.value[key] = map_files(v.value[key], os.path.join(dn, key))
         return v
 
-    os.makedirs(os.path.join(run_dir, "output_links"), exist_ok=False)
+    os.makedirs(os.path.join(run_dir, "out"), exist_ok=False)
     return outputs.map(
         lambda binding: Env.Binding(
             binding.name,
-            map_files(
-                copy.deepcopy(binding.value), os.path.join(run_dir, "output_links", binding.name),
-            ),
+            map_files(copy.deepcopy(binding.value), os.path.join(run_dir, "out", binding.name),),
         )
     )
 

diff --git a/docs/getting_started.md b/docs/getting_started.md
@@ -90,19 +90,19 @@ The standard output from `miniwdl run` provides the subdirectory along with JSON
     "assemble_refbased.assembly_length": 18865,
     "assemble_refbased.assembly_length_unambiguous": 18865,
     "assemble_refbased.assembly_mean_coverage": 94.95885858958806,
-    "assemble_refbased.assembly_fasta": "/tmp/viral-pipelines-2.1.0.2/20200604_132146_assemble_refbased/output_links/assembly_fasta/G5012.3.fasta",
+    "assemble_refbased.assembly_fasta": "/tmp/viral-pipelines-2.1.0.2/20200604_132146_assemble_refbased/out/assembly_fasta/G5012.3.fasta",
     "assemble_refbased.reference_genome_length": 18959,
     ...
   },
   "dir": "/tmp/viral-pipelines-2.1.0.2/20200604_132146_assemble_refbased"
 }
 ```
 
-This is also stored in `outputs.json` in the subdirectory. For your convenience, miniwdl furthermore generates a symbolic link `_LAST` pointing to the timestamped subdirectory for most recent run; and an `output_links` directory tree containing symbolic links to the output files.
+This is also stored in `outputs.json` in the subdirectory. For your convenience, miniwdl furthermore generates a symbolic link `_LAST` pointing to the timestamped subdirectory for most recent run; and an `out` directory tree containing symbolic links to the output files.
 
 ```
-$ tree _LAST/output_links/
-_LAST/output_links/
+$ tree _LAST/out/
+_LAST/out/
 ├── align_to_ref_merged_aligned_trimmed_only_bam
 │   └── G5012.3.align_to_ref.trimmed.bam -> ../../call-merge_align_to_ref/work/G5012.3.align_to_ref.trimmed.bam
 ├── align_to_ref_merged_coverage_plot
@@ -126,7 +126,7 @@ _LAST/output_links/
     └── G5012.3.fasta -> ../../call-call_consensus/work/G5012.3.fasta
 ```
 
-The `output_links` are often more convenient to consume than the JSON, but they only capture outputs that are files. Individual tasks and sub-workflows run in their own nested subdirectories, each with a similar structure.
+The `out` links are often more convenient to consume than the JSON, but they only capture outputs that are files. Individual tasks and sub-workflows run in their own nested subdirectories, each with a similar structure.
 
 ## Next steps
 

diff --git a/docs/runner_reference.md b/docs/runner_reference.md
@@ -13,14 +13,14 @@ For tasks, the run directory also contains:
 * `download/` with any files downloaded from URIs in task inputs
 * `work/` the working directory mounted into the task container, where the command leaves its output files
 * `stdout.txt` and `stderr.txt` from the task command, streamed as it runs.
-* `output_links/` if the task succeeded, symbolic links to the individual output files, organized in a directory tree mirroring the WDL output values & associated JSON structure
+* `out/` if the task succeeded, symbolic links to the individual output files, organized in a directory tree reflecting the WDL output declarations
 
 For workflows,
 
 * `workflow.log`
 * `write_/` and `download/` as above
 * subdirectories for each call to a task or sub-workflow, each structured similarly
-* `output_links/` with links reaching into the call subdirectories where each output file was generated
+* `out/` with links reaching into the call subdirectories where each output file was generated
 
 The top-level run directory also contains:
 

diff --git a/examples/plugin_s3_progressive_upload/miniwdl_s3_progressive_upload.py b/examples/plugin_s3_progressive_upload/miniwdl_s3_progressive_upload.py
@@ -54,11 +54,11 @@ def task(cfg, logger, run_id, run_dir, task, **recv):
         s3prefix = cfg["s3_progressive_upload"]["uri_prefix"]
         assert s3prefix.startswith("s3://"), "MINIWDL__S3_PROGRESSIVE_UPLOAD__URI_PREFIX invalid"
 
-        # for each file under output_links
+        # for each file under out/
         def _raise(ex):
             raise ex
 
-        links_dir = os.path.join(run_dir, "output_links")
+        links_dir = os.path.join(run_dir, "out")
         for (dn, subdirs, files) in os.walk(links_dir, onerror=_raise):
             assert dn == links_dir or dn.startswith(links_dir + "/")
             for fn in files:

diff --git a/examples/upload_output_files.sh b/examples/upload_output_files.sh
@@ -1,9 +1,9 @@
 #!/bin/bash
 
 # This example illustrates a simple technique to automatically upload a workflow's output files to
-# Amazon S3. It suggests recursively uploading the "output_links" directory generated by miniwdl
-# run, which contains tidy symlinks to the output files in their original locations (typically
-# scattered throughout the run subdirectories of individual tasks and sub-workflows).
+# Amazon S3. It suggests recursively uploading the "out" directory generated by miniwdl run, which
+# contains tidy symlinks to the output files in their original locations (typically scattered
+# throughout the run subdirectories of individual tasks and sub-workflows).
 
 S3_DEST="s3://YOUR-BUCKET/miniwdl_upload_output_files_test/"
 
@@ -46,10 +46,10 @@ EOF
 # miniwdl run and capture its stdout JSON
 miniwdl_stdout=$(miniwdl run /tmp/hello.wdl who=Alice who=Bob --dir=/tmp)
 
-# upload output files to S3 by recursively walking the generated output_links directory and
+# upload output files to S3 by recursively walking the generated out directory and
 # following the symlinks therein
 dir_to_upload=$(jq -r .dir <(echo "${miniwdl_stdout}"))
-dir_to_upload="${dir_to_upload}/output_links/"
+dir_to_upload="${dir_to_upload}/out/"
 aws s3 sync --follow-symlinks "$dir_to_upload" "$S3_DEST"
 
 # Rewrite File paths in the outputs JSON to the new S3 URIs.

diff --git a/tests/applied/SARS-CoV-2.t b/tests/applied/SARS-CoV-2.t
@@ -27,7 +27,7 @@ $miniwdl run viral-pipelines/pipes/WDL/tasks/tasks_ncbi_tools.wdl "SRA_ID=${SRR_
 is "$?" "0" "fetch SRA run"
 
 $miniwdl run viral-pipelines/pipes/WDL/workflows/assemble_denovo_with_isnv_calling.wdl \
-    "reads_unmapped_bam=${SRR_ID}/output_links/reads_ubam/${SRR_ID}.bam" \
+    "reads_unmapped_bam=${SRR_ID}/out/reads_ubam/${SRR_ID}.bam" \
     filter_to_taxon.lastal_db_fasta=NC_045512.2.fa \
     assemble.trim_clip_db=viral-pipelines/test/input/clipDb.fasta \
     scaffold.reference_genome_fasta=NC_045512.2.fa \

diff --git a/tests/runner.t b/tests/runner.t
@@ -76,7 +76,7 @@ is "$(ls $f1)" "$f1" "task product brown file"
 f1=$(jq -r '.["echo.out_f"][2]' taskrun/outputs.json)
 is "$(basename $f1)" "fox" "task product fox"
 is "$(ls $f1)" "$f1" "task product fox file"
-is "$(ls taskrun/output_links/out_f/2)" "fox" "task product fox link"
+is "$(ls taskrun/out/out_f/2)" "fox" "task product fox link"
 
 cat << 'EOF' > sleep.wdl
 version 1.0
@@ -128,7 +128,7 @@ is "$(ls $f1)" "$f1" "workflow product brown file"
 f1=$(jq -r '.["echo.t.out_f"][2]' workflowrun/outputs.json)
 is "$(basename $f1)" "fox" "workflow product fox"
 is "$(ls $f1)" "$f1" "workflow product fox file"
-is "$(ls workflowrun/output_links/t.out_f/2)" "fox" "workflow product fox link"
+is "$(ls workflowrun/out/t.out_f/2)" "fox" "workflow product fox link"
 is "$(cat workflowrun/rerun)" "pushd $DN && miniwdl run --dir workflowrun/. echo.wdl t.s=foo t.f=quick t.a_s=bar t.a_f=brown --empty a_s; popd"
 
 cat << 'EOF' > scatter_echo.wdl
@@ -149,9 +149,9 @@ workflow echo {
 EOF
 MINIWDL__FILE_IO__OUTPUT_HARDLINKS=true $miniwdl run --dir scatterrun/. scatter_echo.wdl n=2 t.s=foo t.f=quick t.a_s=bar t.a_f=brown | tee stdout
 is "$?" "0" "scatter run"
-is "$(ls scatterrun/output_links/t.out_f/0/2)" "fox" "scatter product 0 fox link"
-is "$(ls scatterrun/output_links/t.out_f/1/2)" "fox" "scatter product 1 fox link"
-is "$(find scatterrun/output_links -type l | wc -l)" "0" "scatter product hardlinks"
+is "$(ls scatterrun/out/t.out_f/0/2)" "fox" "scatter product 0 fox link"
+is "$(ls scatterrun/out/t.out_f/1/2)" "fox" "scatter product 1 fox link"
+is "$(find scatterrun/out -type l | wc -l)" "0" "scatter product hardlinks"
 is "$(find scatterrun/ | xargs -n 1 stat -c %U | sort | uniq)" "$(whoami)" "scatter files all owned by $(whoami)"
 cmp -s scatter_echo.wdl scatterrun/wdl/scatter_echo.wdl
 is "$?" "0" "copy_source scatter_echo.wdl"