-
Notifications
You must be signed in to change notification settings - Fork 27
/
CWLHandler.java
1190 lines (1062 loc) · 56.1 KB
/
CWLHandler.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright 2017 OICR
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.dockstore.webservice.languages;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.gson.Gson;
import com.google.gson.JsonParseException;
import io.dockstore.common.DescriptorLanguage;
import io.dockstore.common.DockerImageReference;
import io.dockstore.common.LanguageHandlerHelper;
import io.dockstore.common.VersionTypeValidation;
import io.dockstore.webservice.CustomWebApplicationException;
import io.dockstore.webservice.core.Author;
import io.dockstore.webservice.core.DescriptionSource;
import io.dockstore.webservice.core.FileFormat;
import io.dockstore.webservice.core.ParsedInformation;
import io.dockstore.webservice.core.SourceFile;
import io.dockstore.webservice.core.Validation;
import io.dockstore.webservice.core.Version;
import io.dockstore.webservice.helpers.SourceCodeRepoInterface;
import io.dockstore.webservice.jdbi.ToolDAO;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.MutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.validator.routines.UrlValidator;
import org.apache.http.HttpStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3id.cwl.cwl1_2.CommandLineTool;
import org.w3id.cwl.cwl1_2.DockerRequirement;
import org.w3id.cwl.cwl1_2.ExpressionTool;
import org.w3id.cwl.cwl1_2.Operation;
import org.w3id.cwl.cwl1_2.Process;
import org.w3id.cwl.cwl1_2.Workflow;
import org.w3id.cwl.cwl1_2.WorkflowOutputParameter;
import org.w3id.cwl.cwl1_2.WorkflowStep;
import org.w3id.cwl.cwl1_2.WorkflowStepInput;
import org.w3id.cwl.cwl1_2.utils.Fetcher;
import org.w3id.cwl.cwl1_2.utils.LoadingOptions;
import org.w3id.cwl.cwl1_2.utils.LoadingOptionsBuilder;
import org.w3id.cwl.cwl1_2.utils.RootLoader;
import org.w3id.cwl.cwl1_2.utils.ValidationException;
import org.yaml.snakeyaml.Yaml;
import org.yaml.snakeyaml.constructor.SafeConstructor;
import org.yaml.snakeyaml.error.YAMLException;
/**
* This class will eventually handle support for understanding CWL
*/
public class CWLHandler extends AbstractLanguageHandler implements LanguageHandlerInterface {
public static final String CWL_VERSION_PREFIX = "v1";
public static final Logger LOG = LoggerFactory.getLogger(CWLHandler.class);
public static final String CWL_PARSE_ERROR = "Unable to parse CWL workflow, ";
public static final String CWL_VERSION_ERROR = "CWL descriptor should contain a cwlVersion starting with " + CWLHandler.CWL_VERSION_PREFIX + ", detected version ";
public static final String CWL_NO_VERSION_ERROR = "CWL descriptor should contain a cwlVersion";
public static final String CWL_PARSE_SECONDARY_ERROR = "Syntax incorrect. Run command should specify a file name or process: ";
public static final String METADATA_HINT_CLASS = "_dockstore_metadata";
private static final String NODE_PREFIX = "dockstore_";
private static final String TOOL_TYPE = "tool";
private static final String WORKFLOW_TYPE = "workflow";
private static final String EXPRESSION_TOOL_TYPE = "expressionTool";
private static final String OPERATION_TYPE = "operation";
private static final int CODE_SNIPPET_LENGTH = 50;
@Override
protected DescriptorLanguage.FileType getFileType() {
return DescriptorLanguage.FileType.DOCKSTORE_CWL;
}
private String firstNonNullAndNonEmpty(String... values) {
for (String value: values) {
if (value != null && !value.isEmpty()) {
return value;
}
}
return null;
}
@Override
public Version parseWorkflowContent(String filePath, String content, Set<SourceFile> sourceFiles, Version version) {
// parse the collab.cwl file to get important metadata
if (content != null && !content.isEmpty()) {
try {
// Parse the file content
Map<String, Object> map = parseAsMap(content);
// Expand $import, $include, etc
map = preprocess(map, filePath, new Preprocessor(sourceFiles));
// Retarget to the main process, if necessary
map = findMainProcess(map);
// Extract various fields
String description = null;
try {
// draft-3 construct
description = (String)map.get("description");
} catch (ClassCastException e) {
LOG.debug("\"description:\" is malformed, but was only in CWL draft-3 anyway");
}
String label = null;
try {
label = (String)map.get("label");
} catch (ClassCastException e) {
LOG.debug("\"label:\" is malformed");
}
// "doc:" added for CWL 1.0
String doc = null;
if (map.containsKey("doc")) {
Object objectDoc = map.get("doc");
if (objectDoc instanceof String) {
doc = (String)objectDoc;
} else if (objectDoc instanceof List) {
// arrays for "doc:" added in CWL 1.1
List docList = (List)objectDoc;
doc = String.join(System.getProperty("line.separator"), docList);
}
}
final String finalChoiceForDescription = firstNonNullAndNonEmpty(doc, description, label);
if (finalChoiceForDescription != null) {
version.setDescriptionAndDescriptionSource(finalChoiceForDescription, DescriptionSource.DESCRIPTOR);
} else {
LOG.info("Description not found!");
}
// Add authors from descriptor
String dctKey = "dct:creator";
String schemaKey = "s:author";
if (map.containsKey(schemaKey)) {
processAuthor(version, map, schemaKey, "s:name", "s:email", "Author not found!");
} else if (map.containsKey(dctKey)) {
processAuthor(version, map, dctKey, "foaf:name", "foaf:mbox", "Creator not found!");
}
LOG.info("Repository has Dockstore.cwl");
} catch (YAMLException | JsonParseException | NullPointerException | ClassCastException ex) {
String message;
if (ex.getCause() != null) {
// seems to be possible to get underlying cause in some cases
message = ex.getCause().toString();
} else {
// in other cases, the above will NullPointer
message = ex.toString();
}
LOG.info("CWL file is malformed " + message);
// should just report on the malformed workflow
Map<String, String> validationMessageObject = new HashMap<>();
validationMessageObject.put(filePath, "CWL file is malformed or missing, cannot extract metadata: " + message);
version.addOrUpdateValidation(new Validation(DescriptorLanguage.FileType.DOCKSTORE_CWL, false, validationMessageObject));
}
}
return version;
}
/**
* Look at the map of metadata and populate entry with an author and email
* @param version
* @param map
* @param dctKey
* @param authorKey
* @param emailKey
* @param errorMessage
*/
private void processAuthor(Version version, Map map, String dctKey, String authorKey, String emailKey, String errorMessage) {
Object o = map.get(dctKey);
if (o instanceof List) {
o = ((List)o).get(0);
}
map = (Map)o;
if (map != null) {
String author = (String)map.get(authorKey);
Author newAuthor = new Author(author);
String email = (String)map.get(emailKey);
if (!Strings.isNullOrEmpty(email)) {
newAuthor.setEmail(email.replaceFirst("^mailto:", ""));
}
version.addAuthor(newAuthor);
} else {
LOG.info(errorMessage);
}
}
@Override
public Map<String, SourceFile> processImports(String repositoryId, String content, Version version,
SourceCodeRepoInterface sourceCodeRepoInterface, String workingDirectoryForFile) {
Map<String, SourceFile> imports = new HashMap<>();
processImport(repositoryId, content, version, sourceCodeRepoInterface, workingDirectoryForFile, imports);
return imports;
}
private void processImport(String repositoryId, String content, Version version,
SourceCodeRepoInterface sourceCodeRepoInterface, String workingDirectoryForFile, Map<String, SourceFile> imports) {
try {
Map<String, Object> fileContentMap = parseAsMap(content);
handleMap(repositoryId, workingDirectoryForFile, version, imports, fileContentMap, sourceCodeRepoInterface);
} catch (YAMLException | JsonParseException e) {
SourceCodeRepoInterface.LOG.error("Could not process content from workflow as yaml", e);
}
}
/**
* Gets the file formats (either input or output) associated with the contents of a single CWL descriptor file
* @param content Contents of a CWL descriptor file
* @param type Either "inputs" or "outputs"
* @return
*/
public Set<FileFormat> getFileFormats(String content, String type) {
Set<FileFormat> fileFormats = new HashSet<>();
try {
Map<String, Object> map = parseAsMap(content);
Object targetType = map.get(type);
if (targetType instanceof Map) {
Map<String, ?> outputsMap = (Map<String, ?>)targetType;
outputsMap.forEach((k, v) -> {
handlePotentialFormatEntry(fileFormats, v);
});
} else if (targetType instanceof List) {
((List)targetType).forEach(v -> {
handlePotentialFormatEntry(fileFormats, v);
});
} else {
LOG.debug(type + " is not comprehensible.");
}
} catch (YAMLException | JsonParseException | NullPointerException e) {
LOG.error("Could not process content from entry as yaml", e);
}
return fileFormats;
}
private void addFileFormat(Set<FileFormat> fileFormats, Object format) {
if (format instanceof String) {
FileFormat fileFormat = new FileFormat();
fileFormat.setValue((String)format);
fileFormats.add(fileFormat);
} else {
LOG.debug("malformed file format value");
}
}
private void handlePotentialFormatEntry(Set<FileFormat> fileFormats, Object v) {
if (v instanceof Map) {
Map<String, Object> outputMap = (Map<String, Object>)v;
Object format = outputMap.get("format");
if (format instanceof List) {
((List<?>)format).forEach(formatElement -> addFileFormat(fileFormats, formatElement));
} else {
addFileFormat(fileFormats, format);
}
}
}
private Map<String, Object> preprocess(Map<String, Object> mapping, String mainDescriptorPath, Preprocessor preprocessor) {
Object preprocessed = preprocessor.preprocess(mapping, mainDescriptorPath, null, 0);
// If the preprocessed result is not a map, the CWL is not valid.
if (!(preprocessed instanceof Map)) {
String message = "CWL file is malformed";
LOG.error(message);
throw new CustomWebApplicationException(message, HttpStatus.SC_UNPROCESSABLE_ENTITY);
}
return (Map<String, Object>)preprocessed;
}
/**
* Create a "safe" LoadingOptions object, wherein the `urlJoin` and `fetchText` methods
* of the embedded `Fetcher` are essentially disabled. Such an object is useful to ensure
* that the cwljava parser does not try to retrieve files from remote servers, since our
* preprocessor should have already retrieved and inlined the appropriate files.
*/
private LoadingOptions constructSafeLoadingOptions() {
return new LoadingOptionsBuilder().setFetcher(
new Fetcher() {
@Override
public String urlJoin(final String baseUrl, final String url) {
return url;
}
@Override
public String fetchText(final String url) {
LOG.error("cwljava attempted to fetch url " + url);
return "\"\"";
}
}).build();
}
@Override
@SuppressWarnings("checkstyle:methodlength")
public Optional<String> getContent(String mainDescriptorPath, String mainDescriptor, Set<SourceFile> secondarySourceFiles, LanguageHandlerInterface.Type type,
ToolDAO dao) {
try {
// Initialize data structures for DAG
Map<String, ToolInfo> toolInfoMap = new HashMap<>(); // Mapping of stepId -> array of dependencies for the step
List<Pair<String, String>> nodePairs = new ArrayList<>(); // List of pairings of step id and dockerPull url
Map<String, String> stepToType = new HashMap<>(); // Map of stepId -> type (expression tool, tool, workflow)
// Initialize data structures for Tool table
Map<String, DockerInfo> nodeDockerInfo = new HashMap<>(); // map of stepId -> (run path, docker image, docker url, docker specifier)
// Convert CWL to object representation
Map<String, Object> mapping = parseAsMap(mainDescriptor);
// Expand "$import", "$include", "run:", etc
Preprocessor preprocessor = new Preprocessor(secondarySourceFiles);
mapping = preprocess(mapping, mainDescriptorPath, preprocessor);
// Verify cwl version is correctly specified
final Object cwlVersion = mapping.get("cwlVersion");
if (cwlVersion != null) {
final boolean startsWith = cwlVersion.toString().startsWith(CWLHandler.CWL_VERSION_PREFIX);
if (!startsWith) {
LOG.error(CWLHandler.CWL_VERSION_ERROR + cwlVersion.toString());
throw new CustomWebApplicationException(CWLHandler.CWL_VERSION_ERROR
+ cwlVersion.toString(), HttpStatus.SC_UNPROCESSABLE_ENTITY);
}
} else {
LOG.error(CWLHandler.CWL_NO_VERSION_ERROR);
throw new CustomWebApplicationException(CWLHandler.CWL_NO_VERSION_ERROR, HttpStatus.SC_UNPROCESSABLE_ENTITY);
}
// Retarget to the main process, if necessary.
mapping = findMainProcess(mapping);
// If the descriptor describes something other than a workflow, wrap and process it as a single-step workflow
final Object cwlClass = mapping.get("class");
if (!"Workflow".equals(cwlClass)) {
mapping = convertToolToSingleStepWorkflow(mapping);
}
// Parse the preprocessed document using cwljava
Object rootObject;
try {
// Parse the document using a LoadingOptions instance which neutralizes any file loads, since all files should have already been inlined by the preprocesser.
rootObject = RootLoader.loadDocument(mapping, "/", constructSafeLoadingOptions());
} catch (ValidationException e) {
LOG.error("Validation exception: " + e.getMessage(), e);
throw new CustomWebApplicationException(CWL_PARSE_ERROR, HttpStatus.SC_UNPROCESSABLE_ENTITY);
}
// The parse should always produce a Workflow object, because we converted any non-workflow to a one-step workflow, above.
if (!(rootObject instanceof Workflow)) {
LOG.error("Top level object was not a Workflow, class " + className(rootObject));
throw new CustomWebApplicationException(CWL_PARSE_ERROR, HttpStatus.SC_UNPROCESSABLE_ENTITY);
}
// Process the parse workflow
Workflow workflow = (Workflow)rootObject;
processWorkflow(workflow, null, null, 0, type, dao, nodePairs, toolInfoMap, stepToType, nodeDockerInfo);
// Return the requested information
if (type == LanguageHandlerInterface.Type.DAG) {
// Determine steps that point to end
List<String> endDependencies = new ArrayList<>();
if (workflow.getOutputs() != null) {
for (Object outputParameterObj : workflow.getOutputs()) {
if (outputParameterObj instanceof WorkflowOutputParameter) {
WorkflowOutputParameter outputParameter = (WorkflowOutputParameter)outputParameterObj;
Object sources = outputParameter.getOutputSource();
processDependencies(NODE_PREFIX, endDependencies, sources, 2);
}
}
}
toolInfoMap.put("UniqueEndKey", new ToolInfo(null, endDependencies));
nodePairs.add(new MutablePair<>("UniqueEndKey", ""));
// connect start node with them
for (Pair<String, String> node : nodePairs) {
if (toolInfoMap.get(node.getLeft()) == null) {
toolInfoMap.put(node.getLeft(), new ToolInfo(null, Lists.newArrayList("UniqueBeginKey")));
}
}
nodePairs.add(new MutablePair<>("UniqueBeginKey", ""));
return Optional.of(setupJSONDAG(nodePairs, toolInfoMap, stepToType, nodeDockerInfo));
} else {
Map<String, DockerInfo> toolDockerInfo = nodeDockerInfo.entrySet().stream().filter(e -> "tool".equals(stepToType.get(e.getKey()))).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
return Optional.of(getJSONTableToolContent(toolDockerInfo));
}
} catch (ClassCastException | YAMLException | JsonParseException ex) {
final String exMsg = CWLHandler.CWL_PARSE_ERROR + ex.getMessage();
LOG.error(exMsg, ex);
throw new CustomWebApplicationException(exMsg, HttpStatus.SC_UNPROCESSABLE_ENTITY);
}
}
private Map<String, Object> convertToolToSingleStepWorkflow(Map<String, Object> tool) {
Map<String, Object> workflow = new HashMap<>();
workflow.put("cwlVersion", "v1.2");
workflow.put("id", "_dockstore_wrapper");
workflow.put("class", "Workflow");
workflow.put("inputs", Map.of());
workflow.put("outputs", Map.of());
workflow.put("steps", Map.of("tool", Map.of("run", tool, "in", List.of(), "out", List.of())));
return workflow;
}
private String className(Object obj) {
return obj != null ? obj.getClass().getName() : "null object";
}
/**
* This function converts the workflow step ID that cwljava returns, which includes the enclosing workflow IDS,
* to form that we use internally.
* For example, given the id "/#W1/S1/W2/S2", where W1 and W2 are the parent and child workflow IDs and
* S1 and S2 are the corresponding workflow step IDs, this function will return "S1.S2".
*/
private String convertStepId(String cwljavaStepId) {
List<String> parts = Arrays.asList(cwljavaStepId.replaceFirst("^/", "").split("/"));
return NODE_PREFIX + IntStream.range(0, parts.size()).filter(i -> i % 2 == 1).mapToObj(parts::get).collect(Collectors.joining("."));
}
@SuppressWarnings("checkstyle:ParameterNumber")
private void processWorkflow(Workflow workflow, RequirementOrHintState parentRequirementState, RequirementOrHintState parentHintState, int depth, LanguageHandlerInterface.Type type, ToolDAO dao, List<Pair<String, String>> nodePairs, Map<String, ToolInfo> toolInfoMap, Map<String, String> stepToType, Map<String, DockerInfo> nodeDockerInfo) {
// Join parent and current requirements and hints.
RequirementOrHintState requirementState = addToRequirementOrHintState(parentRequirementState, workflow.getRequirements());
RequirementOrHintState hintState = addToRequirementOrHintState(parentHintState, workflow.getHints());
// Iterate through steps to find dependencies and docker requirements
for (Object workflowStepObj: workflow.getSteps()) {
WorkflowStep workflowStep = (WorkflowStep)workflowStepObj; // per the spec, the only possible type is WorkflowStep
String workflowStepId = convertStepId(deOptionalize(workflowStep.getId()));
if (depth == 0) {
ArrayList<String> stepDependencies = new ArrayList<>();
// Iterate over source and get the dependencies
if (workflowStep.getIn() != null) {
for (Object stepInputObj : workflowStep.getIn()) {
if (stepInputObj instanceof WorkflowStepInput) {
WorkflowStepInput stepInput = (WorkflowStepInput)stepInputObj;
Object sources = stepInput.getSource();
processDependencies(NODE_PREFIX, stepDependencies, sources, 1);
}
}
if (stepDependencies.size() > 0) {
toolInfoMap.computeIfPresent(workflowStepId, (toolId, toolInfo) -> {
toolInfo.toolDependencyList.addAll(stepDependencies);
return toolInfo;
});
toolInfoMap.computeIfAbsent(workflowStepId, toolId -> new ToolInfo(null, stepDependencies));
}
}
}
// Check workflow step for docker requirement and hints
RequirementOrHintState stepRequirementState = addToRequirementOrHintState(requirementState, workflowStep.getRequirements());
RequirementOrHintState stepHintState = addToRequirementOrHintState(hintState, workflowStep.getHints());
String stepDockerPath = getDockerPull(stepRequirementState, stepHintState);
// Check for docker requirement within workflow step file
Object run = workflowStep.getRun();
String currentPath;
if (run instanceof Process) {
// If the run object is an instance of Process, it's either a Workflow, CommandLineTool, ExpressionTool, or Operation.
Process process = (Process)run;
stepDockerPath = getDockerPull(
addToRequirementOrHintState(stepRequirementState, process.getRequirements()),
addToRequirementOrHintState(stepHintState, process.getHints()));
stepToType.put(workflowStepId, computeProcessType(process));
currentPath = getDockstoreMetadataHintValue(deOptionalize(process.getHints()), "path");
if (process instanceof Workflow) {
processWorkflow((Workflow)process, stepRequirementState, stepHintState, depth + 1, type, dao, nodePairs, toolInfoMap, stepToType, nodeDockerInfo);
}
} else if (run instanceof String) {
stepToType.put(workflowStepId, "n/a");
currentPath = run.toString();
} else {
String message = CWLHandler.CWL_PARSE_SECONDARY_ERROR + "in workflow step " + workflowStepId;
LOG.error("Type of run object: " + className(run));
LOG.error(message);
throw new CustomWebApplicationException(message, HttpStatus.SC_UNPROCESSABLE_ENTITY);
}
if (currentPath == null) {
currentPath = "";
}
DockerSpecifier dockerSpecifier = null;
String dockerUrl = null;
if ((run instanceof Workflow || run instanceof CommandLineTool) && !Strings.isNullOrEmpty(stepDockerPath)) {
// CWL doesn't support parameterized docker pulls. Must be a string.
dockerSpecifier = LanguageHandlerInterface.determineImageSpecifier(stepDockerPath, DockerImageReference.LITERAL);
dockerUrl = getURLFromEntry(stepDockerPath, dao, dockerSpecifier);
}
if (depth == 0 && type == LanguageHandlerInterface.Type.DAG) {
nodePairs.add(new MutablePair<>(workflowStepId, dockerUrl));
}
nodeDockerInfo.put(workflowStepId, new DockerInfo(currentPath, stepDockerPath, dockerUrl, dockerSpecifier));
}
}
/**
* Read the value for a given key from the dockstore metadata hint, which was added by the preprocessor.
*/
private String getDockstoreMetadataHintValue(List<Object> hints, String key) {
if (hints == null) {
return null;
}
Map<String, String> metadata = findMapInList(hints, "class", METADATA_HINT_CLASS);
if (metadata == null) {
return null;
}
return metadata.get(key);
}
private static Map findMapInList(List<Object> list, Object key, Object value) {
return (Map)list.stream().filter(e -> e instanceof Map && value.equals(((Map)e).get(key))).findFirst().orElse(null);
}
private String computeProcessType(Process process) {
if (process instanceof Workflow) {
return WORKFLOW_TYPE;
}
if (process instanceof CommandLineTool) {
return TOOL_TYPE;
}
if (process instanceof ExpressionTool) {
return EXPRESSION_TOOL_TYPE;
}
if (process instanceof Operation) {
return OPERATION_TYPE;
}
return "n/a";
}
private void processDependencies(String nodePrefix, List<String> endDependencies, Object sources, int skip) {
if (sources != null) {
if (sources instanceof String) {
String[] sourceSplit = ((String)sources).replaceFirst("^/", "").split("/");
sourceSplit = Arrays.copyOfRange(sourceSplit, Math.min(skip, sourceSplit.length), sourceSplit.length);
if (sourceSplit.length > 1) {
endDependencies.add(nodePrefix + sourceSplit[0].replaceFirst("#", ""));
}
} else {
List<String> filteredDependencies = filterDependent((List<String>)sources, nodePrefix, skip);
endDependencies.addAll(filteredDependencies);
}
}
}
/**
* Iterates over a map of CWL file content looking for imports. When import is found. will grab the imported file from Git
* and prepare it for import finding.
* @param repositoryId identifies the git repository that we wish to use, normally something like 'organization/repo_name`
* @param parentFilePath absolute path to the parent file which references the imported file
* @param version version of the files to get
* @param imports mapping of filenames to imports
* @param fileContentMap CWL file mapping
* @param sourceCodeRepoInterface used too retrieve imports
*/
private void handleMap(String repositoryId, String parentFilePath, Version version, Map<String, SourceFile> imports, Map<String, ?> fileContentMap,
SourceCodeRepoInterface sourceCodeRepoInterface) {
Set<String> importKeywords = Sets.newHashSet("$import", "$include", "$mixin", "import", "include", "mixin");
ParsedInformation parsedInformation = getParsedInformation(version, DescriptorLanguage.CWL);
for (Map.Entry<String, ?> e : fileContentMap.entrySet()) {
final Object mapValue = e.getValue();
String absoluteImportPath;
if (importKeywords.contains(e.getKey().toLowerCase())) {
// handle imports and includes
if (mapValue instanceof String) {
setImportsBasedOnMapValue(parsedInformation, (String)mapValue);
absoluteImportPath = unsafeConvertRelativePathToAbsolutePath(parentFilePath, (String)mapValue);
handleAndProcessImport(repositoryId, absoluteImportPath, version, imports, (String)mapValue, sourceCodeRepoInterface);
}
} else if (e.getKey().equalsIgnoreCase("run")) {
// for workflows, bare files may be referenced. See https://github.com/dockstore/dockstore/issues/208
//ex:
// run: {import: revtool.cwl}
// run: revtool.cwl
if (mapValue instanceof String) {
setImportsBasedOnMapValue(parsedInformation, (String)mapValue);
absoluteImportPath = unsafeConvertRelativePathToAbsolutePath(parentFilePath, (String)mapValue);
handleAndProcessImport(repositoryId, absoluteImportPath, version, imports, (String)mapValue, sourceCodeRepoInterface);
} else if (mapValue instanceof Map) {
// this handles the case where an import is used
handleMap(repositoryId, parentFilePath, version, imports, (Map)mapValue, sourceCodeRepoInterface);
}
} else {
handleMapValue(repositoryId, parentFilePath, version, imports, mapValue, sourceCodeRepoInterface);
}
}
}
private void handleAndProcessImport(String repositoryId, String absolutePath, Version version, Map<String, SourceFile> imports, String relativePath, SourceCodeRepoInterface sourceCodeRepoInterface) {
if (!imports.containsKey(absolutePath)) {
handleImport(repositoryId, version, imports, relativePath, sourceCodeRepoInterface, absolutePath);
SourceFile imported = imports.get(absolutePath);
if (imported != null) {
processImport(repositoryId, imported.getContent(), version, sourceCodeRepoInterface, absolutePath, imports);
}
}
}
/**
* Sets the type of imports in ParsedInformation based on the import string
* @param parsedInformation A version's version metadata's
* @param mapValue Import string (should be either a local import or an HTTP(s) import
*/
public static void setImportsBasedOnMapValue(ParsedInformation parsedInformation, String mapValue) {
String[] schemes = {"http", "https"};
UrlValidator urlValidator = new UrlValidator(schemes);
if (urlValidator.isValid(mapValue)) {
parsedInformation.setHasHTTPImports(true);
} else {
parsedInformation.setHasLocalImports(true);
}
}
/**
* Iterate over object and pass any mappings to check for imports.
* @param repositoryId identifies the git repository that we wish to use, normally something like 'organization/repo_name`
* @param parentFilePath absolute path to the parent file which references the imported file
* @param version version of the files to get
* @param imports mapping of filenames to imports
* @param mapValue CWL file object
* @param sourceCodeRepoInterface used too retrieve imports
*/
private void handleMapValue(String repositoryId, String parentFilePath, Version version, Map<String, SourceFile> imports,
Object mapValue, SourceCodeRepoInterface sourceCodeRepoInterface) {
if (mapValue instanceof Map) {
handleMap(repositoryId, parentFilePath, version, imports, (Map)mapValue, sourceCodeRepoInterface);
} else if (mapValue instanceof List) {
for (Object listMember : (List)mapValue) {
handleMapValue(repositoryId, parentFilePath, version, imports, listMember, sourceCodeRepoInterface);
}
}
}
/**
* Determine dockerPull from requirement and hint state (requirements take precedence).
*
* @param requirementState
* @param hintState
* @return docker image name
*/
private String getDockerPull(RequirementOrHintState requirementState, RequirementOrHintState hintState) {
String dockerPull = requirementState.getDockerPull();
if (dockerPull != null) {
return dockerPull;
}
return hintState.getDockerPull();
}
private <T> T deOptionalize(Optional<T> optional) {
// The cwljava parser did actually return a null Optional reference, thus necessitating the following if statement
if (optional == null) {
return null;
}
return optional.orElse(null);
}
/**
* Computes a new requirement/hint state by adding information-of-interest from the specified list of CWL requirements/hints.
* If there are no requirements/hints to be added, the original state is returned.
*/
private RequirementOrHintState addToRequirementOrHintState(RequirementOrHintState existing, Optional<List<Object>> optionalAdds) {
if (existing == null) {
existing = new RequirementOrHintState();
}
List<Object> adds = deOptionalize(optionalAdds);
if (adds == null || adds.isEmpty()) {
return existing;
}
RequirementOrHintState sum = new RequirementOrHintState(existing);
adds.forEach(add -> {
// The cwljava parser has an oddity: given a requirement R and a hint H, where R and H are equivalent, cwljava does not parse them to the same representation.
// So, we must check both for a DockerRequirement object and the equivalent Map.
if (add instanceof DockerRequirement) {
sum.setDockerPull(deOptionalize(((DockerRequirement)add).getDockerPull()));
}
if (add instanceof Map) {
Map map = (Map)add;
if ("DockerRequirement".equals(map.get("class"))) {
Object value = map.get("dockerPull");
if (value instanceof String) {
sum.setDockerPull((String)value);
}
}
}
});
return sum;
}
/**
* Checks that the CWL file is the correct version
* @param content
* @return true if file is valid CWL version, false otherwise
*/
private boolean isValidCwl(String content) {
try {
Map<String, Object> mapping = parseAsMap(content);
final Object cwlVersion = mapping.get("cwlVersion");
if (cwlVersion != null) {
final boolean startsWith = cwlVersion.toString().startsWith(CWLHandler.CWL_VERSION_PREFIX);
if (!startsWith) {
LOG.error("detected invalid version: " + cwlVersion.toString());
}
return startsWith;
}
} catch (ClassCastException | YAMLException | JsonParseException e) {
return false;
}
return false;
}
/**
* Given an array of sources, will look for dependencies in the source name
* @param sources list of sources
* @param nodePrefix prefix to attach to extracted dependencies
* @param skip number of slash-separated name components to skip
* @return filtered list of dependent sources
*/
private List<String> filterDependent(List<String> sources, String nodePrefix, int skip) {
List<String> filteredArray = new ArrayList<>();
for (String s : sources) {
String[] split = s.replaceFirst("^/", "").split("/");
split = Arrays.copyOfRange(split, Math.min(skip, split.length), split.length);
if (split.length > 1) {
filteredArray.add(nodePrefix + split[0].replaceFirst("#", ""));
}
}
return filteredArray;
}
private VersionTypeValidation validateProcessSet(Set<SourceFile> sourceFiles, String primaryDescriptorFilePath,
String processType, Set<String> processClasses, String oppositeType, Set<String> oppositeClasses) {
List<DescriptorLanguage.FileType> fileTypes = new ArrayList<>(Collections.singletonList(DescriptorLanguage.FileType.DOCKSTORE_CWL));
Set<SourceFile> filteredSourcefiles = filterSourcefiles(sourceFiles, fileTypes);
Optional<SourceFile> mainDescriptor = filteredSourcefiles.stream().filter((sourceFile -> Objects.equals(sourceFile.getPath(), primaryDescriptorFilePath))).findFirst();
String validationMessage = null;
if (mainDescriptor.isPresent()) {
String content = mainDescriptor.get().getContent();
if (StringUtils.isBlank(content)) {
validationMessage = "Primary descriptor is empty.";
} else {
try {
Map<String, Object> parsed = findMainProcess(parseAsMap(content));
Object klass = parsed.get("class");
if (!processClasses.contains(klass)) {
validationMessage = String.format("A CWL %s requires %s.", processType, processClasses.stream().map(s -> String.format("'class: %s'", s)).collect(Collectors.joining(" or ")));
if (oppositeClasses.contains(klass)) {
validationMessage += String.format(" This file contains 'class: %s'. Did you mean to register a %s?", klass, oppositeType);
}
} else if (!this.isValidCwl(content)) {
validationMessage = "Invalid CWL version.";
}
} catch (YAMLException | JsonParseException | ClassCastException e) {
LOG.error("An unsafe or malformed YAML was attempted to be parsed", e);
validationMessage = "CWL file is malformed or missing, cannot extract metadata: " + e.getMessage();
}
}
} else {
validationMessage = "Primary CWL descriptor is not present.";
}
if (validationMessage == null) {
return new VersionTypeValidation(true, Collections.emptyMap());
} else {
return new VersionTypeValidation(false, Map.of(primaryDescriptorFilePath, validationMessage));
}
}
@Override
public VersionTypeValidation validateWorkflowSet(Set<SourceFile> sourceFiles, String primaryDescriptorFilePath) {
return validateProcessSet(sourceFiles, primaryDescriptorFilePath, "workflow", Set.of("Workflow"), "tool", Set.of("CommandLineTool", "ExpressionTool"));
}
@Override
public VersionTypeValidation validateToolSet(Set<SourceFile> sourceFiles, String primaryDescriptorFilePath) {
return validateProcessSet(sourceFiles, primaryDescriptorFilePath, "tool", Set.of("CommandLineTool", "ExpressionTool"), "workflow", Set.of("Workflow"));
}
@Override
public VersionTypeValidation validateTestParameterSet(Set<SourceFile> sourceFiles) {
return checkValidJsonAndYamlFiles(sourceFiles, DescriptorLanguage.FileType.CWL_TEST_JSON);
}
private Map<String, Object> findMainProcess(Map<String, Object> mapping) {
// If the CWL is packed using the "$graph" syntax, the root is the process with id "#main":
// https://www.commonwl.org/v1.2/Workflow.html#Packed_documents
Object graph = mapping.get("$graph");
if (graph instanceof List) {
List<Object> processes = (List<Object>)graph;
// Return the process with id "#main".
for (Object process: processes) {
if (process instanceof Map) {
Map<String, Object> processMapping = (Map<String, Object>) process;
if ("#main".equals(processMapping.get("id"))) {
return processMapping;
}
}
}
// If there was no process with id "#main", return the first process as a fallback.
// This isn't perfect, but it's a good guess, and better than nothing.
if (!processes.isEmpty()) {
Object process = processes.get(0);
if (process instanceof Map) {
return (Map<String, Object>) process;
}
}
}
// Otherwise, assume this a normal CWL file.
return mapping;
}
private static boolean isJsonObject(String yamlOrJson) {
String trimmed = yamlOrJson.trim();
return trimmed.startsWith("{") && trimmed.endsWith("}");
}
private static Object parse(String yamlOrJson) {
if (isJsonObject(yamlOrJson)) {
return new Gson().fromJson(yamlOrJson, Map.class);
} else {
new Yaml(new SafeConstructor()).load(yamlOrJson);
return new Yaml().load(yamlOrJson);
}
}
private static Map<String, Object> parseAsMap(String yamlOrJson) {
Object parsed = parse(yamlOrJson);
if (!(parsed instanceof Map)) {
throw new YAMLException("Unexpected construct: " + StringUtils.abbreviate(yamlOrJson, CODE_SNIPPET_LENGTH));
}
return (Map<String, Object>)parsed;
}
static class RequirementOrHintState {
private String dockerPull;
RequirementOrHintState() {
}
RequirementOrHintState(RequirementOrHintState src) {
setDockerPull(src.getDockerPull());
}
public void setDockerPull(String dockerPull) {
this.dockerPull = dockerPull;
}
public String getDockerPull() {
return dockerPull;
}
}
/**
* Implements a preprocessor which "expands" a CWL, replacing $import, $include, $mixin, and "run" directives per the CWL
* spec https://www.commonwl.org/v1.2/Workflow.html, using the content of the referenced source files, with the exception
* of a "run" directive that points to a missing source file, which is normalized to the "run: file" syntax and otherwise
* left unchanged.
*
* <p>To facilitate the extraction of information from a CWL that is missing files, if an $import references a missing file,
* it is replaced by the empty Map. If an $include references a missing file, it is replaced by the empty string.
*
* <p>Typically, a Preprocessor instance is one-time-use: a new Preprocessor instance is created to expand each root CWL
* descriptor.
*
* <p>As the preprocessor expands the CWL, for each process (workflow or tool) it encounters, it ensures that the process
* has an id (by assigning a UUID if necessary), then adds the current file path to a special dockstore metadata hint.
* This metadata hint is valid CWL and will propagate to a parsed representation, so we can later determine what file the
* process came from.
*
* <p>During expansion, the preprocessor tracks three quantities to prevent denial-of-service attacks or infinite
* loops due to a recursive CWL: the file import depth, the (approximate) total length of the expanded CWL in characters, and
* total number of files expanded (incremented for each $import, $include, $mixin, and "run" directive). If any of those
* quantities exceed the maximum value, the preprocessor will call the handleMax function, the base implementation of which
* will throw an exception.
*/
public static class Preprocessor {
private static final List<String> IMPORT_KEYS = Arrays.asList("$import", "import");
private static final List<String> INCLUDE_KEYS = Arrays.asList("$include", "include");
private static final List<String> MIXIN_KEYS = Arrays.asList("$mixin", "mixin");
private static final int DEFAULT_MAX_DEPTH = 10;
private static final long DEFAULT_MAX_CHAR_COUNT = 4L * 1024L * 1024L;
private static final long DEFAULT_MAX_FILE_COUNT = 1000L;
private final Set<SourceFile> sourceFiles;
private long charCount;
private long fileCount;
private final int maxDepth;
private final long maxCharCount;
private final long maxFileCount;
/**
* Create a CWL Preprocessor with specified "max" values. See the class javadoc regarding the meaning of the "max" arguments.
* @param sourceFiles files to search when expanding $import, $include, etc
* @param maxDepth the maximum file depth
* @param maxCharCount the maximum number of expanded characters (approximate)
* @param maxFileCount the maximum number of files expanded
*/
public Preprocessor(Set<SourceFile> sourceFiles, int maxDepth, long maxCharCount, long maxFileCount) {
this.sourceFiles = sourceFiles;
this.charCount = 0;
this.fileCount = 0;
this.maxDepth = maxDepth;
this.maxCharCount = maxCharCount;
this.maxFileCount = maxFileCount;
}
/**
* Create a CWL Preprocessor with default "max" values.
*/
public Preprocessor(Set<SourceFile> sourceFiles) {
this(sourceFiles, DEFAULT_MAX_DEPTH, DEFAULT_MAX_CHAR_COUNT, DEFAULT_MAX_FILE_COUNT);
}
/**
* Preprocess the specified root-level CWL, recursively expanding various directives as noted in the class javadoc.
* This method may, but does not necessarily, process the specified CWL in place.
* @param cwl a representation of the CWL file content, typically a Map, List, or String and the result of new Yaml().load(content)
* @param currentPath the path of the CWL file
* @return the preprocessed CWL
*/
public Object preprocess(Object cwl, String currentPath) {
return preprocess(cwl, currentPath, null, 0);
}
/**
* Preprocess the specified CWL, recursively expanding various directives as noted in the class javadoc.
* This method may, but does not necessarily, process the specified CWL in place.
* @param cwl a representation of the CWL file content or portion thereof, typically a Map, List, or String and the result of new Yaml().load(content)
* @param currentPath the path of the CWL file
* @param version the CWL version of the parent entry, null if there is no parent entry
* @param depth the current file depth, where the root file is at depth 0, and the depth increases by one for each $import or $mixin
* @return the preprocessed CWL
*/
private Object preprocess(Object cwl, String currentPath, String version, int depth) {
if (depth > maxDepth) {
handleMax(String.format("maximum file depth (%d) exceeded", maxDepth));
}
if (cwl instanceof Map) {
Map<String, Object> map = (Map<String, Object>)cwl;
// If the map represents a workflow or tool, make sure it has an ID, record the path in the metadata, and determine the CWL version
if (isProcess(map)) {
setIdIfAbsent(map);
setMetadataHint(map, Map.of("path", stripLeadingSlashes(currentPath)));
version = (String)map.getOrDefault("cwlVersion", version);
}
// Process $import, which is replaced by the parsed+preprocessed file content
String importPath = findString(IMPORT_KEYS, map);
if (importPath != null) {
return loadFileAndPreprocess(resolvePath(importPath, currentPath), emptyMap(), version, depth);
}
// Process $include, which is replaced by the literal string representation of the file content
String includePath = findString(INCLUDE_KEYS, map);
if (includePath != null) {
return loadFile(resolvePath(includePath, currentPath), "");