diff --git a/wdl2cwl/main.py b/wdl2cwl/main.py index f00a6473..0cf522ed 100644 --- a/wdl2cwl/main.py +++ b/wdl2cwl/main.py @@ -297,7 +297,7 @@ def get_step_input_expr( """ with WDLSourceLine(wf_expr, ConversionException): if isinstance(wf_expr, WDL.Expr.String): - return self.get_expr_string(wf_expr, False)[1:-1], None + return self.get_expr_string(wf_expr, False)[0][1:-1], None elif isinstance(wf_expr, WDL.Expr.Get): if isinstance(wf_expr.expr, WDL.Expr.Ident): member = None @@ -315,7 +315,7 @@ def get_step_input_expr( ident = cast(WDL.Expr.Ident, wf_expr.expr.expr) id_name = ident.name elif isinstance(wf_expr, WDL.Expr.Apply): - expr_str, _ = self.get_expr(wf_expr) + expr_str, _, _ = self.get_expr(wf_expr) if expr_str.count("inputs") == 1: id_name = re.match(r"inputs\.*?[ \.](.*?)[. ]", expr_str).groups()[ 0 @@ -358,7 +358,9 @@ def load_wdl_workflow(self, obj: WDL.Tree.Workflow) -> cwl.Workflow: obj.meta.pop("description") if "description" in obj.meta else None ) for body_part in obj.body: - if not isinstance(body_part, (WDL.Tree.Call, WDL.Tree.Scatter)): + if not isinstance( + body_part, (WDL.Tree.Call, WDL.Tree.Scatter, WDL.Tree.Conditional) + ): _logger.warning( WDLSourceLine(body_part).makeError( "Warning: unhandled Workflow node type:" @@ -368,7 +370,23 @@ def load_wdl_workflow(self, obj: WDL.Tree.Workflow) -> cwl.Workflow: ) continue with WDLSourceLine(body_part, ConversionException): - if isinstance(body_part, WDL.Tree.Call): + if isinstance(body_part, WDL.Tree.Conditional): + if len(body_part.body) > 1: + raise ConversionException( + "Multi-task conditionals are not yet supported. Please open an issue with an example!" + ) + step = self.get_workflow_call(body_part.body[0]) # type: ignore + if isinstance(body_part.expr, WDL.Expr.Apply): + when, sources = self.get_expr_apply(body_part.expr, True) + step.when = f"$({when})" + wf_steps.append(step) + else: + raise ConversionException( + "Conditional expression type: " + f"{type(body_part.expr)} is not yet handled. " + "Please open an issue with an example." + ) + elif isinstance(body_part, WDL.Tree.Call): step = self.get_workflow_call(body_part) wf_steps.append(step) for inp in step.in_: @@ -571,7 +589,7 @@ def get_time_minutes_requirement( with WDLSourceLine(time_minutes, ConversionException): if isinstance(time_minutes, (WDL.Expr.Int, WDL.Expr.Float)): return cast(int, get_literal_value(time_minutes)) * 60 - time_minutes_str, _ = self.get_expr(time_minutes) + time_minutes_str, _, _ = self.get_expr(time_minutes) return f"$({time_minutes_str} * 60)" def get_memory_requirement( @@ -584,7 +602,7 @@ def get_memory_requirement( return ram_min_literal elif isinstance(memory_runtime, WDL.Expr.Apply): expr, unit = memory_runtime.arguments - ram_min, _ = self.get_expr(expr) + ram_min, _, _ = self.get_expr(expr) return self.get_ram_min_js( ram_min, str(get_literal_value(unit)).strip() ) @@ -599,11 +617,11 @@ def get_memory_literal(self, memory_runtime: WDL.Expr.String) -> Union[float, st else: _, amount, _, unit, _ = memory_runtime.parts if isinstance(amount, WDL.Expr.Placeholder): - amount_str, _ = self.get_expr(amount) + amount_str, _, _ = self.get_expr(amount) else: amount_str = amount if isinstance(unit, WDL.Expr.Placeholder): - unit_str, _ = self.get_expr(unit) + unit_str, _, _ = self.get_expr(unit) else: unit_str = unit.strip() return self.get_ram_min_js(amount_str, unit_str) @@ -632,7 +650,7 @@ def get_outdir_requirement( ): # If it contains an apply expr we don't want to process the _add function # that concatenates it to the chars in the string - expr_str, _ = self.get_expr(outdir) + expr_str, _, _ = self.get_expr(outdir) if isinstance(outdir.type, WDL.Type.String): return f"$(parseFloat({expr_str}) * 1024)" else: @@ -651,7 +669,7 @@ def get_outdir_requirement( # avoid python strings only WDL expressions are handled. return self.get_outdir_requirement(obj) elif isinstance(outdir, (WDL.Expr.Get, WDL.Expr.Placeholder)): - expr_str, _ = self.get_expr(outdir) + expr_str, _, _ = self.get_expr(outdir) return ( f"$(({expr_str}) * 1024)" if not expr_str.isdigit() @@ -754,20 +772,24 @@ def get_workflow_outputs( ) if isinstance(item_expr, WDL.Expr.Apply): new_output_name = f"_{output_name}_{str(item_expr.function_name)}" + apply_expr, apply_sources = self.get_expr_apply(item_expr, False) extra_step = cwl.WorkflowStep( in_=[ cwl.WorkflowStepInput( - id="target", - # source=output_source, TODO: refactor get_apply_expr to also return the names of the sources + id="_".join(source.split("/")), source=source ) + for source in apply_sources ], out=["result"], run=cwl.ExpressionTool( inputs=[ - cwl.WorkflowInputParameter(type_="Any", id="target") + cwl.WorkflowInputParameter( + type_="Any", id="_".join(source.split("/")) + ) + for source in apply_sources ], expression='${ return {"result": ' - + self.get_expr_apply(item_expr, False) + + self.get_expr_apply(item_expr, False)[0] + "}; }", outputs=[ cwl.ExpressionToolOutputParameter( @@ -834,21 +856,31 @@ def get_expr( wdl_expr: WDL.Expr.Base, target_type: Optional[WDL.Type.Base] = None, top: bool = False, - ) -> Tuple[str, Optional[WDL.Type.Base]]: - """Translate WDL Expressions.""" + ) -> Tuple[str, Optional[WDL.Type.Base], List[str]]: + """ + Translate WDL Expressions. + + :param wdl_expr: The WDL expression to translate + :param target_type: The target WDL type + :param top: True, if the wdl_expr is a standalone (non-embedded) + WDL expression. + :return: A CWL Expression, and the target WDL type if known, and the list of sources used. + """ wdl_type: Optional[WDL.Type.Base] = None + sources: List[str] = [] if isinstance(wdl_expr, WDL.Expr.Apply): - result = self.get_expr_apply(wdl_expr, top) + result, sources = self.get_expr_apply(wdl_expr, top) elif isinstance(wdl_expr, WDL.Expr.Get): - result, wdl_type = self.get_expr_get(wdl_expr, top) + result, wdl_type, sources = self.get_expr_get(wdl_expr, top) elif isinstance(wdl_expr, WDL.Expr.IfThenElse): - result = self.get_expr_ifthenelse(wdl_expr) + result, sources = self.get_expr_ifthenelse(wdl_expr) elif isinstance(wdl_expr, WDL.Expr.Placeholder): - result, wdl_type = self.translate_wdl_placeholder(wdl_expr, top) + result, wdl_type, sources = self.translate_wdl_placeholder(wdl_expr, top) elif isinstance(wdl_expr, WDL.Expr.String): - result = self.get_expr_string(wdl_expr, top) + result, sources = self.get_expr_string(wdl_expr, top) elif isinstance(wdl_expr, WDL.Expr.Boolean) and wdl_expr.literal: result = str(wdl_expr.literal) # "true" not "True" + # no sources elif ( isinstance( wdl_expr, @@ -861,30 +893,35 @@ def get_expr( and wdl_expr.literal ): result = str(wdl_expr.literal.value) + # no sources elif isinstance(wdl_expr, WDL.Expr.Array): - result = ( - "[ " - + ", ".join(self.get_expr(item)[0] for item in wdl_expr.items) - + " ]" - ) + items = [] + for item in wdl_expr.items: + expression, _, item_sources = self.get_expr(item) + items.append(expression) + sources.extend(item_sources) + result = "[ " + ", ".join(items) + " ]" elif isinstance(wdl_expr, WDL.Expr.Map): - result = ( - "{ " - + ", ".join( - f"{self.get_expr(key)[0]}: {self.get_expr(value)[0]}" - for key, value in wdl_expr.items - ) - + " }" - ) + decls = [] + for map_key, map_value in wdl_expr.items: + key_expr, _, key_sources = self.get_expr(map_key) + value_expr, _, value_sources = self.get_expr(map_value) + decls.append(f"{key_expr}: {value_expr}") + sources.extend(key_sources) + sources.extend(value_sources) + result = "{ " + ", ".join(decls) + " }" elif isinstance(wdl_expr, WDL.Expr.Struct) and isinstance( target_type, WDL.Type.StructInstance ): - result = "{ " - result += ", ".join( - f'"{key}": {self.get_expr(wdl_expr.members[key], member_type)[0]}' - for key, member_type in getattr(target_type, "members", {}).items() - ) - result += " }" + decls = [] + assert target_type.members is not None + for struct_key, member_type in target_type.members.items(): + key_expr, _, key_sources = self.get_expr( + wdl_expr.members[struct_key], member_type + ) + decls.append(f'"{struct_key}": {key_expr}') + sources.extend(key_sources) + result = "{ " + ", ".join(decls) + " }" else: # pragma: no cover raise WDLSourceLine(wdl_expr, ConversionException).makeError( f"The expression '{wdl_expr}' is not handled yet." @@ -893,36 +930,62 @@ def get_expr( return ( '{ "class": "File", "path": runtime.outdir+"/"+' + result + " }", wdl_type, + sources, ) - return result, wdl_type + return result, wdl_type, sources - def get_expr_string(self, wdl_expr_string: WDL.Expr.String, top: bool) -> str: - """Translate WDL String Expressions.""" + def get_expr_string( + self, wdl_expr_string: WDL.Expr.String, top: bool + ) -> Tuple[str, List[str]]: + """ + Translate WDL String Expressions. + + :return: The CWL Expression and a list of sources + """ if wdl_expr_string.literal is not None: - return str(wdl_expr_string.literal) + return str(wdl_expr_string.literal), [] parts = wdl_expr_string.parts q = cast(str, parts[0])[0] - string = ( - f"{q}{parts[1]}{q}" - if isinstance(parts[1], str) - else self.get_expr(parts[1], None, top)[0] - ) + sources: List[str] = [] + if isinstance(parts[1], str): + string = f"{q}{parts[1]}{q}" + else: + string, _, sources = self.get_expr(parts[1], None, top) if parts[2:-1]: - string += " + " + " + ".join( - f"{q}{part}{q}" if isinstance(part, str) else self.get_expr(part)[0] - for part in parts[2:-1] - ) - return string + string_parts = [] + for part in parts[2:-1]: + if isinstance(part, str): + string_parts.append(f"{q}{part}{q}") + else: + part_expr, _, part_sources = self.get_expr(part) + string_parts.append(part_expr) + sources.extend(part_sources) + string += " + " + " + ".join(string_parts) + return string, sources + + def get_expr_ifthenelse( + self, wdl_ifthenelse: WDL.Expr.IfThenElse + ) -> Tuple[str, List[str]]: + """ + Translate WDL IfThenElse Expressions. - def get_expr_ifthenelse(self, wdl_ifthenelse: WDL.Expr.IfThenElse) -> str: - """Translate WDL IfThenElse Expressions.""" - condition, _ = self.get_expr(wdl_ifthenelse.condition) - if_true, _ = self.get_expr(wdl_ifthenelse.consequent) - if_false, _ = self.get_expr(wdl_ifthenelse.alternative) - return f"{condition} ? {if_true} : {if_false}" + :return: The CWL expresion and a list of sources + """ + condition, _, sources = self.get_expr(wdl_ifthenelse.condition) + if_true, _, sources_true = self.get_expr(wdl_ifthenelse.consequent) + if_false, _, sources_false = self.get_expr(wdl_ifthenelse.alternative) + sources.extend(sources_true) + sources.extend(sources_false) + return f"{condition} ? {if_true} : {if_false}", sources + + def get_expr_apply( + self, wdl_apply_expr: WDL.Expr.Apply, top: bool + ) -> Tuple[str, List[str]]: + """ + Translate WDL Apply Expressions. - def get_expr_apply(self, wdl_apply_expr: WDL.Expr.Apply, top: bool) -> str: - """Translate WDL Apply Expressions.""" + :return: The CWL Expression and a list of source names. + """ binary_ops = { "_gt": ">", "_lor": "||", @@ -951,15 +1014,19 @@ def get_expr_apply(self, wdl_apply_expr: WDL.Expr.Apply, top: bool) -> str: treat_as_optional = wdl_apply_expr.type.optional if function_name == "_add": add_left_operand = arguments[0] - add_right_operand, _ = self.get_expr(arguments[1]) - add_left_operand_value, _ = self.get_expr(add_left_operand) + add_left_operand_value, _, left_sources = self.get_expr(add_left_operand) + add_right_operand, _, right_sources = self.get_expr(arguments[1]) + add_sources = left_sources + right_sources if getattr(add_left_operand, "function_name", None) == "basename": referer = wdl_apply_expr.parent.name # type: ignore[attr-defined] treat_as_optional = True if referer in self.non_static_values else False return ( - f"{add_left_operand_value} + {add_right_operand}" - if not treat_as_optional - else f"{get_input(referer)} === null ? {add_left_operand_value} + {add_right_operand} : {get_input(referer)}" + ( + f"{add_left_operand_value} + {add_right_operand}" + if not treat_as_optional + else f"{get_input(referer)} === null ? {add_left_operand_value} + {add_right_operand} : {get_input(referer)}" + ), + add_sources, ) elif function_name == "basename": if len(arguments) == 1: @@ -969,104 +1036,158 @@ def get_expr_apply(self, wdl_apply_expr: WDL.Expr.Apply, top: bool) -> str: only_operand.expr, WDL.Expr.Ident ): only_operand_name = get_expr_name(only_operand.expr) + solo_basename_sources = [str(only_operand.expr.name)] else: - only_operand_name, _ = self.get_expr(only_operand) + only_operand_name, _, solo_basename_sources = self.get_expr( + only_operand + ) return ( - f"{only_operand_name}.basename" - if is_file - else f"{only_operand_name}.split('/').reverse()[0]" + ( + f"{only_operand_name}.basename" + if is_file + else f"{only_operand_name}.split('/').reverse()[0]" + ), + solo_basename_sources, ) else: basename_target, suffix = arguments is_file = isinstance(basename_target.type, WDL.Type.File) + basename_sources: List[str] = [] if isinstance(basename_target, WDL.Expr.Get): if isinstance(basename_target.expr, WDL.Expr.Ident): basename_target_name = get_expr_name(basename_target.expr) + basename_sources = [str(basename_target.expr.name)] else: - basename_target_name = f"{self.get_expr(basename_target.expr)[0]}.{basename_target.member}" + basename_target_expr, _, basename_sources = self.get_expr( + basename_target.expr + ) + basename_target_name = ( + f"{basename_target_expr}.{basename_target.member}" + ) elif isinstance(basename_target, WDL.Expr.Apply): - basename_target_name, _ = self.get_expr(basename_target, None, top) + basename_target_name, _, basename_sources = self.get_expr( + basename_target, None, top + ) suffix_str = str(get_literal_value(suffix)) regex_str = re.escape(suffix_str) return ( - f"{basename_target_name}.basename.replace(/{regex_str}$/, '') " - if is_file - else f"{basename_target_name}.split('/').reverse()[0].replace(/{regex_str}$/, '')" + ( + f"{basename_target_name}.basename.replace(/{regex_str}$/, '') " + if is_file + else f"{basename_target_name}.split('/').reverse()[0].replace(/{regex_str}$/, '')" + ), + basename_sources, ) elif function_name == "defined": only_operand = arguments[0] assert isinstance(only_operand, WDL.Expr.Get) and isinstance( # nosec only_operand.expr, WDL.Expr.Ident ) - return f"{get_expr_name(only_operand.expr)} !== null" + return f"{get_expr_name(only_operand.expr)} !== null", [ + str(only_operand.expr.name) + ] elif function_name == "_interpolation_add": arg_value, arg_name = arguments + iadd_sources: List[str] = [] if isinstance(arg_name, WDL.Expr.String) and isinstance( arg_value, (WDL.Expr.Apply, WDL.Expr.String) ): - return f"{self.get_expr(arg_value)[0]} + {self.get_expr(arg_name)[0]}" + value_expr, _, iadd_value_sources = self.get_expr(arg_value) + name_expr, _, iadd_name_sources = self.get_expr(arg_name) + return ( + f"{value_expr} + {name_expr}", + iadd_value_sources + iadd_name_sources, + ) if isinstance(arg_name, (WDL.Expr.Placeholder, WDL.Expr.Get)): just_arg_name = get_expr_name(arg_name.expr) # type: ignore[arg-type] + iadd_sources.append(str(arg_name.expr.name)) # type: ignore[attr-defined] arg_name_with_file_check = get_expr_name_with_is_file_check( arg_name.expr # type: ignore ) elif isinstance(arg_value, (WDL.Expr.Placeholder, WDL.Expr.Get)): just_arg_name = get_expr_name(arg_value.expr) # type: ignore[arg-type] + iadd_sources.append(str(arg_value.expr.name)) # type: ignore[attr-defined] arg_name_with_file_check = get_expr_name_with_is_file_check( arg_value.expr # type: ignore ) arg_value = arg_name with WDLSourceLine(arg_value, ConversionException): - arg_value_str, _ = self.get_expr(arg_value) + arg_value_str, _, iadd_value_sources = self.get_expr(arg_value) + iadd_sources.extend(iadd_value_sources) return ( - f'{just_arg_name} === null ? "" : {arg_value_str} + {arg_name_with_file_check}' - if treat_as_optional - else f"{arg_value_str} + {arg_name_with_file_check}" + ( + f'{just_arg_name} === null ? "" : {arg_value_str} + {arg_name_with_file_check}' + if treat_as_optional + else f"{arg_value_str} + {arg_name_with_file_check}" + ), + iadd_sources, ) elif function_name == "sub": wdl_apply, arg_string, arg_sub = arguments - sub_expr, _ = self.get_expr(wdl_apply, None, top) - arg_string_expr, _ = self.get_expr(arg_string) - arg_sub_expr, _ = self.get_expr(arg_sub) - return f"{sub_expr}.replace({arg_string_expr}, {arg_sub_expr}) " - + sub_expr, _, sub_sources = self.get_expr(wdl_apply, None, top) + arg_string_expr, _, sub_string_sources = self.get_expr(arg_string) + arg_sub_expr, _, sub_expr_sources = self.get_expr(arg_sub) + sub_sources.extend(sub_string_sources) + sub_sources.extend(sub_expr_sources) + return ( + f"{sub_expr}.replace({arg_string_expr}, {arg_sub_expr}) ", + sub_sources, + ) elif function_name == "_at": iterable_object, index = arguments - iterable_object_expr, _ = self.get_expr(iterable_object) - index_expr, _ = self.get_expr(index) - return f"{iterable_object_expr}[{index_expr}]" + iterable_object_expr, _, at_sources = self.get_expr(iterable_object) + index_expr, _, at_index_sources = self.get_expr(index) + at_sources.extend(at_index_sources) + return f"{iterable_object_expr}[{index_expr}]", at_sources elif function_name in binary_ops: left_operand, right_operand = arguments - left_operand_expr, _ = self.get_expr(left_operand) - right_operand_expr, _ = self.get_expr(right_operand) + left_operand_expr, _, bops_sources = self.get_expr(left_operand) + right_operand_expr, _, bops_right_sources = self.get_expr(right_operand) + bops_sources.extend(bops_right_sources) return ( - f"{left_operand_expr} {binary_ops[function_name]} {right_operand_expr}" + f"{left_operand_expr} {binary_ops[function_name]} {right_operand_expr}", + bops_sources, ) elif function_name == "length": - only_arg_expr, _ = self.get_expr_get(arguments[0], False) # type: ignore - return f"{only_arg_expr}.length" + only_arg_expr, _, lsources = self.get_expr_get(arguments[0], False) # type: ignore + return f"{only_arg_expr}.length", lsources elif function_name == "round": - only_arg_expr, _ = self.get_expr(arguments[0]) - return f"Math.round({only_arg_expr})" + only_arg_expr, _, rsources = self.get_expr(arguments[0]) + return f"Math.round({only_arg_expr})", rsources elif function_name in single_arg_fn: only_arg = arguments[0] - return self.get_expr(only_arg)[0] + expression, _, saf_sources = self.get_expr(only_arg) + return expression, saf_sources elif function_name == "select_first": array_obj = cast(WDL.Expr.Array, arguments[0]) - array_items = [str(self.get_expr(item)[0]) for item in array_obj.items] + sf_sources = [] + array_items = [] + for item in array_obj.items: + array_item, _, array_item_sources = self.get_expr(item) + array_items.append(str(array_item)) + sf_sources.extend(array_item_sources) items_str = ", ".join(array_items) return ( f"[{items_str}].find(function(element) {{ return element !== null }}) " - ) + ), sf_sources elif function_name == "select_all": array_obj = cast(WDL.Expr.Array, arguments[0]) - array_items = [str(self.get_expr(item)[0]) for item in array_obj.items] - items_str = ", ".join(array_items) - return f"[{items_str}].filter(function(element) {{ return element !== null }}) " + sa_sources: List[str] = [] + sa_array_items: List[str] = [] + for item in array_obj.items: + item_expr, _, item_sources = self.get_expr(item) + sa_array_items.append(str(item_expr)) + sa_sources.extend(item_sources) + items_str = ", ".join(sa_array_items) + return ( + f"[{items_str}].filter(function(element) {{ return element !== null }}) ", + sa_sources, + ) elif function_name == "ceil": - only_arg, _ = self.get_expr(arguments[0]) # type: ignore - return f"Math.ceil({only_arg}) " + only_arg, _, csources = self.get_expr(arguments[0]) # type: ignore + return f"Math.ceil({only_arg}) ", csources elif function_name == "size": + ssources: List[str] = [] if len(arguments) == 1: left_operand = arguments[0] unit_value = "1" @@ -1076,11 +1197,15 @@ def get_expr_apply(self, wdl_apply_expr: WDL.Expr.Apply, top: bool) -> str: unit_base, unit_exponent = get_mem_in_bytes(right_value) unit_value = f"{unit_base}^{unit_exponent}" if isinstance(left_operand, WDL.Expr.Array): - array_items = [self.get_expr(item)[0] for item in left_operand.items] - left = ", ".join(array_items) + sarray_items: List[str] = [] + for item in left_operand.items: + item_expr, _, item_sources = self.get_expr(item) + sarray_items.append(item_expr) + ssources.extend(item_sources) + left = ", ".join(sarray_items) left_str = f"[{left}]" else: - left_str, _ = self.get_expr(left_operand) + left_str, _, ssources = self.get_expr(left_operand) return ( "(function(size_of=0)" + "{" @@ -1088,12 +1213,13 @@ def get_expr_apply(self, wdl_apply_expr: WDL.Expr.Apply, top: bool) -> str: + "{ if (element) {" + "size_of += element.size" + "}})}" - + f") / {unit_value}" + + f") / {unit_value}", + ssources, ) elif function_name == "flatten": flatten_array = arguments[0] with WDLSourceLine(flatten_array, ConversionException): - items_str, _ = self.get_expr(flatten_array) + items_str, _, fsources = self.get_expr(flatten_array) result = ( "(function () {var new_array = []; " + items_str @@ -1101,7 +1227,7 @@ def get_expr_apply(self, wdl_apply_expr: WDL.Expr.Apply, top: bool) -> str: "{value.forEach(function(sub_value, sub_index, sub_obj) " "{new_array.push(sub_value);});}); return new_array;})()" ) - return result + return result, fsources elif function_name == "sep": sep, array = arguments if isinstance(array, WDL.Expr.Get) and isinstance( @@ -1114,50 +1240,72 @@ def get_expr_apply(self, wdl_apply_expr: WDL.Expr.Apply, top: bool) -> str: ) sep_str = get_literal_value(sep) or "" if isinstance(item_type, WDL.Type.File): + array_expr, _, sep_sources = self.get_expr(array) return ( - f"{self.get_expr(array)[0]}.map(" + f"{array_expr}.map(" + 'function(el) {return el.path}).join("' + sep_str - + '")' + + '")', + sep_sources, ) else: - return f'{self.get_expr(array)[0]}.join("{sep_str}")' + sep_expr, _, sep_sources = self.get_expr(array) + return f'{sep_expr}.join("{sep_str}")', sep_sources raise WDLSourceLine(wdl_apply_expr, ConversionException).makeError( f"Function name '{function_name}' not yet handled." ) def get_expr_get( self, wdl_get_expr: WDL.Expr.Get, top: bool - ) -> Tuple[str, WDL.Type.Base]: - """Translate WDL Get Expressions.""" + ) -> Tuple[str, WDL.Type.Base, List[str]]: + """ + Translate WDL Get Expressions. + + :return: The CWL Expression, the WDL target type, and a list of sources + """ member = wdl_get_expr.member if not member: - return self.get_expr_ident(wdl_get_expr.expr, top), wdl_get_expr.type # type: ignore - struct_name, _ = self.get_expr(wdl_get_expr.expr) - return f"{struct_name}.{member}", wdl_get_expr.type + expression, sources = self.get_expr_ident(wdl_get_expr.expr, top) # type: ignore + return expression, wdl_get_expr.type, sources + struct_name, _, sources = self.get_expr(wdl_get_expr.expr) + return f"{struct_name}.{member}", wdl_get_expr.type, sources + + def get_expr_ident( + self, wdl_ident_expr: WDL.Expr.Ident, top: bool + ) -> Tuple[str, List[str]]: + """ + Translate WDL Ident Expressions. - def get_expr_ident(self, wdl_ident_expr: WDL.Expr.Ident, top: bool) -> str: - """Translate WDL Ident Expressions.""" + :param top: True, if the wdl_ident_expr is a standalone (non-embedded) + WDL expression. Will be used to append a ".path" to the CWL + expression, if the type is File. + :return: A CWL expression and a list of sources + """ id_name = wdl_ident_expr.name referee = wdl_ident_expr.referee optional = wdl_ident_expr.type.optional + sources: List[str] = [] if referee: with WDLSourceLine(referee, ConversionException): if isinstance(referee, WDL.Tree.Call): - return id_name + return id_name, sources if isinstance(referee, WDL.Tree.Gather): - return "/".join(id_name.rsplit(".", maxsplit=1)) + return "_".join(id_name.split(".")), [ + "/".join(id_name.rsplit(".", maxsplit=1)) + ] if referee.expr and ( wdl_ident_expr.name in self.optional_cwl_null or wdl_ident_expr.name not in self.non_static_values ): - return self.get_expr(referee.expr, None, top)[0] + expression, _, sources = self.get_expr(referee.expr, None, top) + return expression, sources ident_name = get_input(id_name) + sources.append(str(id_name)) if optional and isinstance(wdl_ident_expr.type, WDL.Type.File): # To prevent null showing on the terminal for inputs of type File name_with_file_check = get_expr_name_with_is_file_check(wdl_ident_expr) - return f'{ident_name} === null ? "" : {name_with_file_check}' + return f'{ident_name} === null ? "" : {name_with_file_check}', sources return ( f"{ident_name}.path" if ( @@ -1166,7 +1314,7 @@ def get_expr_ident(self, wdl_ident_expr: WDL.Expr.Ident, top: bool) -> str: and ".path" not in ident_name ) else ident_name - ) + ), sources def get_cpu_requirement(self, cpu_runtime: WDL.Expr.Base) -> Union[int, float, str]: """Translate WDL Runtime CPU requirement to CWL Resource Requirement.""" @@ -1200,10 +1348,20 @@ def get_cwl_command_requirements( def translate_wdl_placeholder( self, wdl_placeholder: WDL.Expr.Placeholder, top: bool = False - ) -> Tuple[str, Optional[WDL.Type.Base]]: - """Translate WDL Expr Placeholder to a valid CWL expression.""" + ) -> Tuple[str, Optional[WDL.Type.Base], List[str]]: + """ + Translate WDL Expr Placeholder. + + :param top: `True`, if the wdl_placeholder is a standalone (non-embedded) + WDL expression. Will be used to append a ".path" to the CWL + expression, if the type is File. + + :return: A valid CWL expression, the WDL target type if known, and a list of sources + """ expr = wdl_placeholder.expr - placeholder_expr, placeholder_expr_type = self.get_expr(expr, None, top) + placeholder_expr, placeholder_expr_type, sources = self.get_expr( + expr, None, top + ) options = wdl_placeholder.options if options: if "true" in options: @@ -1219,16 +1377,21 @@ def translate_wdl_placeholder( and expr.function_name != "defined" # optimization ) if not is_optional: - return test_str, placeholder_expr_type + return test_str, placeholder_expr_type, sources else: if "default" in options: return ( - f"{placeholder_expr} === null ? " - f"{nice_quote(options['default'])} : {test_str}" - ), placeholder_expr_type + ( + f"{placeholder_expr} === null ? " + f"{nice_quote(options['default'])} : {test_str}" + ), + placeholder_expr_type, + sources, + ) return ( f'{placeholder_expr} === null ? "" : {test_str}', placeholder_expr_type, + sources, ) elif "sep" in options: separator = options["sep"] @@ -1244,21 +1407,27 @@ def translate_wdl_placeholder( if "default" in options and (expr.type.optional or item_type.optional): return ( f"{placeholder_expr} === null ? " - f"{nice_quote(options['default'])} : {pl_holder_str}" - ), placeholder_expr_type - return pl_holder_str, placeholder_expr_type + f"{nice_quote(options['default'])} : {pl_holder_str}", + placeholder_expr_type, + sources, + ) + return pl_holder_str, placeholder_expr_type, sources # options must contain only "default", no "sep" or "true"/"false" return ( - f"{placeholder_expr} === null ? " - f"{nice_quote(options['default'])} : {placeholder_expr}" - ), placeholder_expr_type + ( + f"{placeholder_expr} === null ? " + f"{nice_quote(options['default'])} : {placeholder_expr}" + ), + placeholder_expr_type, + sources, + ) if ( top is True and isinstance(placeholder_expr_type, WDL.Type.File) and ".path" not in placeholder_expr ): placeholder_expr += ".path" - return placeholder_expr, placeholder_expr_type + return placeholder_expr, placeholder_expr_type, sources def get_cwl_workflow_inputs( self, @@ -1453,7 +1622,7 @@ def set_cwl_task_outputs( ): glob_str = tool.stdout = "_stdout" else: - glob_expr, _ = self.get_expr(wdl_output.expr) + glob_expr, _, _ = self.get_expr(wdl_output.expr) is_literal = wdl_output.expr.arguments[0].literal if is_literal: glob_str = str(get_literal_value(wdl_output.expr.arguments[0])) diff --git a/wdl2cwl/tests/cwl_files/blast.cwl b/wdl2cwl/tests/cwl_files/blast.cwl new file mode 100644 index 00000000..995914a9 --- /dev/null +++ b/wdl2cwl/tests/cwl_files/blast.cwl @@ -0,0 +1,914 @@ +cwlVersion: v1.2 +id: blast +class: Workflow +requirements: + - class: InlineJavascriptRequirement +inputs: + - id: blast_docker_override + type: + - string + - 'null' + - id: blast_docker + type: + - string + - 'null' + - id: queryfa + type: File + - id: fname + default: /sfs/blastdb/2019_ncov/nucl/v6/ncov + type: string + - id: method + default: blastn + type: string + - id: outfmt + default: 7 + type: int + - id: evalue + default: 10.0 + type: float + - id: Outfile + type: + - string + - 'null' + - id: threads + default: 8 + type: int + - id: runblastp.max_target_seqs + default: 100 + type: int + - id: runblastp.word_size + default: 6 + type: int + - id: runblastp.seg + default: no + type: string + - id: runblastp.comp_based_stats + default: '2' + type: string + - id: runblastp.matrix + default: BLOSUM62 + type: string + - id: runblastp.gapopen + default: 11 + type: int + - id: runblastp.gapextend + default: 1 + type: int + - id: runblastp.max_hsps + type: + - int + - 'null' + - id: runblastp.taxids + type: + - string + - 'null' + - id: runblastp.negative_taxids + type: + - string + - 'null' + - id: runblastp.lcase_masking + default: false + type: boolean + - id: runblastn.max_target_seqs + default: 100 + type: int + - id: runblastn.word_size + default: 28 + type: int + - id: runblastn.reward + default: 1 + type: int + - id: runblastn.penalty + default: -2 + type: int + - id: runblastn.strand + default: both + type: string + - id: runblastn.gapopen + default: 0 + type: int + - id: runblastn.gapextend + default: 0 + type: int + - id: runblastn.dust + default: "'20 64 1'" + type: string + - id: runblastn.max_hsps + type: + - int + - 'null' + - id: runblastn.tasks + default: megablast + type: string + - id: runblastn.taxids + type: + - string + - 'null' + - id: runblastn.negative_taxids + type: + - string + - 'null' + - id: runblastn.lcase_masking + default: false + type: boolean + - id: runblastx.max_target_seqs + default: 100 + type: int + - id: runblastx.word_size + default: 6 + type: int + - id: runblastx.seg + default: "'12 2.2 2.5'" + type: string + - id: runblastx.comp_based_stats + default: '2' + type: string + - id: runblastx.matrix + default: BLOSUM62 + type: string + - id: runblastx.gapopen + default: 11 + type: int + - id: runblastx.gapextend + default: 1 + type: int + - id: runblastx.taxids + type: + - string + - 'null' + - id: runblastx.negative_taxids + type: + - string + - 'null' + - id: runblastx.max_hsps + type: + - int + - 'null' + - id: runblastx.lcase_masking + default: false + type: boolean + - id: runtblastn.max_target_seqs + default: 100 + type: int + - id: runtblastn.word_size + default: 6 + type: int + - id: runtblastn.seg + default: "'12 2.2 2.5'" + type: string + - id: runtblastn.comp_based_stats + default: '2' + type: string + - id: runtblastn.matrix + default: BLOSUM62 + type: string + - id: runtblastn.gapopen + default: 11 + type: int + - id: runtblastn.gapextend + default: 1 + type: int + - id: runtblastn.lcase_masking + default: false + type: boolean + - id: runtblastn.max_hsps + type: + - int + - 'null' + - id: runtblastn.taxids + type: + - string + - 'null' + - id: runtblastn.negative_taxids + type: + - string + - 'null' + - id: runtblastx.taxids + type: + - string + - 'null' + - id: runtblastx.word_size + default: 3 + type: int + - id: runtblastx.max_target_seqs + default: 100 + type: int + - id: runtblastx.seg + default: "'12 2.2 2.5'" + type: string + - id: runtblastx.matrix + default: BLOSUM62 + type: string + - id: runtblastx.lcase_masking + default: false + type: boolean + - id: runtblastx.negative_taxids + type: + - string + - 'null' + - id: runtblastx.max_hsps + type: + - int + - 'null' +steps: + - id: _fina_output_select_first + in: + - id: runtblastx_out + source: runtblastx/out + - id: runblastp_out + source: runblastp/out + - id: runblastn_out + source: runblastn/out + - id: runblastx_out + source: runblastx/out + - id: runtblastn_out + source: runtblastn/out + out: + - result + run: + class: ExpressionTool + inputs: + - id: runtblastx_out + type: Any + - id: runblastp_out + type: Any + - id: runblastn_out + type: Any + - id: runblastx_out + type: Any + - id: runtblastn_out + type: Any + outputs: + - id: result + type: File + expression: '${ return {"result": [runtblastx_out, runblastp_out, runblastn_out, + runblastx_out, runtblastn_out].find(function(element) { return element + !== null }) }; }' + - id: runblastp + in: + - id: docker + source: blast_docker + - id: Queryfa + source: queryfa + - id: Fname + source: fname + - id: outfmt + source: outfmt + - id: Outfile + source: Outfile + - id: evalue + source: evalue + - id: threads + source: threads + - id: max_target_seqs + source: runblastp.max_target_seqs + - id: word_size + source: runblastp.word_size + - id: seg + source: runblastp.seg + - id: comp_based_stats + source: runblastp.comp_based_stats + - id: matrix + source: runblastp.matrix + - id: gapopen + source: runblastp.gapopen + - id: gapextend + source: runblastp.gapextend + - id: max_hsps + source: runblastp.max_hsps + - id: taxids + source: runblastp.taxids + - id: negative_taxids + source: runblastp.negative_taxids + - id: lcase_masking + source: runblastp.lcase_masking + out: + - id: out + run: + class: CommandLineTool + id: runblastp + inputs: + - id: docker + type: string + - id: Queryfa + type: File + - id: Fname + type: string + - id: outfmt + type: int + - id: Outfile + type: string + - id: evalue + type: float + - id: threads + type: int + - id: max_target_seqs + default: 100 + type: int + - id: word_size + default: 6 + type: int + - id: seg + default: no + type: string + - id: comp_based_stats + default: '2' + type: string + - id: matrix + default: BLOSUM62 + type: string + - id: gapopen + default: 11 + type: int + - id: gapextend + default: 1 + type: int + - id: max_hsps + type: + - int + - 'null' + - id: taxids + type: + - string + - 'null' + - id: negative_taxids + type: + - string + - 'null' + - id: lcase_masking + default: false + type: boolean + outputs: + - id: out + type: File + outputBinding: + glob: $(inputs.Outfile) + requirements: + - class: InitialWorkDirRequirement + listing: + - entryname: script.bash + entry: |4+ + + set -e + blastp -db "$(inputs.Fname)" \ + -query $(inputs.Queryfa.path) \ + -outfmt $(inputs.outfmt) \ + -out $(inputs.Outfile) \ + -max_target_seqs $(inputs.max_target_seqs) \ + -comp_based_stats $(inputs.comp_based_stats) \ + -evalue $(inputs.evalue) \ + -word_size $(inputs.word_size) \ + -matrix $(inputs.matrix) \ + -seg $(inputs.seg) \ + -gapopen $(inputs.gapopen) \ + -gapextend $(inputs.gapextend) \ + -num_threads $(inputs.threads) \ + $(inputs.lcase_masking ? "-lcase_masking" : "") $(inputs.max_hsps === null ? "" : "-max_hsps " + inputs.max_hsps) $(inputs.taxids === null ? "" : "-taxids " + inputs.taxids) $(inputs.negative_taxids === null ? "" : "-negative_taxids " + inputs.negative_taxids) \ + + - class: InlineJavascriptRequirement + - class: NetworkAccess + networkAccess: true + hints: + - class: ResourceRequirement + coresMin: 8 + ramMin: 15258.7890625 + outdirMin: 1024 + cwlVersion: v1.2 + baseCommand: + - bash + - script.bash + when: $(inputs.method === "blastp") + - id: runblastn + in: + - id: docker + source: blast_docker + - id: Queryfa + source: queryfa + - id: Fname + source: fname + - id: Outfile + source: Outfile + - id: threads + source: threads + - id: outfmt + source: outfmt + - id: max_target_seqs + source: runblastn.max_target_seqs + - id: evalue + source: evalue + - id: word_size + source: runblastn.word_size + - id: reward + source: runblastn.reward + - id: penalty + source: runblastn.penalty + - id: strand + source: runblastn.strand + - id: gapopen + source: runblastn.gapopen + - id: gapextend + source: runblastn.gapextend + - id: dust + source: runblastn.dust + - id: max_hsps + source: runblastn.max_hsps + - id: tasks + source: runblastn.tasks + - id: taxids + source: runblastn.taxids + - id: negative_taxids + source: runblastn.negative_taxids + - id: lcase_masking + source: runblastn.lcase_masking + out: + - id: out + run: + class: CommandLineTool + id: runblastn + inputs: + - id: docker + type: string + - id: Queryfa + type: File + - id: Fname + type: string + - id: Outfile + type: string + - id: threads + type: int + - id: outfmt + type: int + - id: max_target_seqs + default: 100 + type: int + - id: evalue + type: float + - id: word_size + default: 28 + type: int + - id: reward + default: 1 + type: int + - id: penalty + default: -2 + type: int + - id: strand + default: both + type: string + - id: gapopen + default: 0 + type: int + - id: gapextend + default: 0 + type: int + - id: dust + default: "'20 64 1'" + type: string + - id: max_hsps + type: + - int + - 'null' + - id: tasks + default: megablast + type: string + - id: taxids + type: + - string + - 'null' + - id: negative_taxids + type: + - string + - 'null' + - id: lcase_masking + default: false + type: boolean + outputs: + - id: out + type: File + outputBinding: + glob: $(inputs.Outfile) + requirements: + - class: InitialWorkDirRequirement + listing: + - entryname: script.bash + entry: |4+ + + set -e + blastn -db "$(inputs.Fname)" \ + -show_gis \ + -query $(inputs.Queryfa.path) \ + -outfmt $(inputs.outfmt) \ + -out $(inputs.Outfile) \ + -max_target_seqs $(inputs.max_target_seqs) \ + -evalue $(inputs.evalue) \ + -word_size $(inputs.word_size) \ + -penalty $(inputs.penalty) \ + -reward $(inputs.reward) \ + -dust $(inputs.dust) \ + -gapopen $(inputs.gapopen) \ + -gapextend $(inputs.gapextend) \ + -task $(inputs.tasks) \ + -strand $(inputs.strand) \ + -num_threads $(inputs.threads) \ + $(inputs.lcase_masking ? "-lcase_masking" : "") $(inputs.max_hsps === null ? "" : "-max_hsps " + inputs.max_hsps) $(inputs.taxids === null ? "" : "-taxids " + inputs.taxids) $(inputs.negative_taxids === null ? "" : "-negative_taxids " + inputs.negative_taxids)\ + + - class: InlineJavascriptRequirement + - class: NetworkAccess + networkAccess: true + hints: + - class: ResourceRequirement + coresMin: 8 + ramMin: 15258.7890625 + outdirMin: 1024 + cwlVersion: v1.2 + baseCommand: + - bash + - script.bash + when: $(inputs.method === "blastn") + - id: runblastx + in: + - id: Queryfa + source: queryfa + - id: Fname + source: fname + - id: outfmt + source: outfmt + - id: evalue + source: evalue + - id: Outfile + source: Outfile + - id: docker + source: blast_docker + - id: threads + source: threads + - id: max_target_seqs + source: runblastx.max_target_seqs + - id: word_size + source: runblastx.word_size + - id: seg + source: runblastx.seg + - id: comp_based_stats + source: runblastx.comp_based_stats + - id: matrix + source: runblastx.matrix + - id: gapopen + source: runblastx.gapopen + - id: gapextend + source: runblastx.gapextend + - id: taxids + source: runblastx.taxids + - id: negative_taxids + source: runblastx.negative_taxids + - id: max_hsps + source: runblastx.max_hsps + - id: lcase_masking + source: runblastx.lcase_masking + out: + - id: out + run: + class: CommandLineTool + id: runblastx + inputs: + - id: Queryfa + type: File + - id: Fname + type: string + - id: outfmt + type: int + - id: evalue + type: float + - id: Outfile + type: string + - id: docker + type: string + - id: threads + type: int + - id: max_target_seqs + default: 100 + type: int + - id: word_size + default: 6 + type: int + - id: seg + default: "'12 2.2 2.5'" + type: string + - id: comp_based_stats + default: '2' + type: string + - id: matrix + default: BLOSUM62 + type: string + - id: gapopen + default: 11 + type: int + - id: gapextend + default: 1 + type: int + - id: taxids + type: + - string + - 'null' + - id: negative_taxids + type: + - string + - 'null' + - id: max_hsps + type: + - int + - 'null' + - id: lcase_masking + default: false + type: boolean + outputs: + - id: out + type: File + outputBinding: + glob: $("$" + inputs.Outfile) + requirements: + - class: InitialWorkDirRequirement + listing: + - entryname: script.bash + entry: |4+ + + set -e + blastx -db "$(inputs.Fname)" \ + -query $(inputs.Queryfa.path) \ + -outfmt $(inputs.outfmt) \ + -out $(inputs.Outfile) \ + -max_target_seqs $(inputs.max_target_seqs) \ + -comp_based_stats $(inputs.comp_based_stats) \ + -evalue $(inputs.evalue) \ + -word_size $(inputs.word_size) \ + -matrix $(inputs.matrix) \ + -seg $(inputs.seg) \ + -gapopen $(inputs.gapopen) \ + -gapextend $(inputs.gapextend) \ + -num_threads $(inputs.threads) \ + $(inputs.lcase_masking ? "-lcase_masking" : "") $(inputs.max_hsps === null ? "" : "-max_hsps " + inputs.max_hsps) $(inputs.taxids === null ? "" : "-taxids " + inputs.taxids) $(inputs.negative_taxids === null ? "" : "-negative_taxids " + inputs.negative_taxids)\ + + - class: InlineJavascriptRequirement + - class: NetworkAccess + networkAccess: true + hints: + - class: ResourceRequirement + coresMin: 8 + ramMin: 15258.7890625 + outdirMin: 1024 + cwlVersion: v1.2 + baseCommand: + - bash + - script.bash + when: $(inputs.method === "blastx") + - id: runtblastn + in: + - id: Queryfa + source: queryfa + - id: Fname + source: fname + - id: outfmt + source: outfmt + - id: evalue + source: evalue + - id: Outfile + source: Outfile + - id: docker + source: blast_docker + - id: threads + source: threads + - id: max_target_seqs + source: runtblastn.max_target_seqs + - id: word_size + source: runtblastn.word_size + - id: seg + source: runtblastn.seg + - id: comp_based_stats + source: runtblastn.comp_based_stats + - id: matrix + source: runtblastn.matrix + - id: gapopen + source: runtblastn.gapopen + - id: gapextend + source: runtblastn.gapextend + - id: lcase_masking + source: runtblastn.lcase_masking + - id: max_hsps + source: runtblastn.max_hsps + - id: taxids + source: runtblastn.taxids + - id: negative_taxids + source: runtblastn.negative_taxids + out: + - id: out + run: + class: CommandLineTool + id: runtblastn + inputs: + - id: Queryfa + type: File + - id: Fname + type: string + - id: outfmt + type: int + - id: evalue + type: float + - id: Outfile + type: string + - id: docker + type: string + - id: threads + type: int + - id: max_target_seqs + default: 100 + type: int + - id: word_size + default: 6 + type: int + - id: seg + default: "'12 2.2 2.5'" + type: string + - id: comp_based_stats + default: '2' + type: string + - id: matrix + default: BLOSUM62 + type: string + - id: gapopen + default: 11 + type: int + - id: gapextend + default: 1 + type: int + - id: lcase_masking + default: false + type: boolean + - id: max_hsps + type: + - int + - 'null' + - id: taxids + type: + - string + - 'null' + - id: negative_taxids + type: + - string + - 'null' + outputs: + - id: out + type: File + outputBinding: + glob: $(inputs.Outfile) + requirements: + - class: InitialWorkDirRequirement + listing: + - entryname: script.bash + entry: |4+ + + set -e + tblastn -db "$(inputs.Fname)" \ + -query $(inputs.Queryfa.path) \ + -outfmt $(inputs.outfmt) \ + -out $(inputs.Outfile) \ + -max_target_seqs $(inputs.max_target_seqs) \ + -comp_based_stats $(inputs.comp_based_stats) \ + -evalue $(inputs.evalue) \ + -word_size $(inputs.word_size) \ + -matrix $(inputs.matrix) \ + -seg $(inputs.seg) \ + -gapopen $(inputs.gapopen) \ + -gapextend $(inputs.gapextend) \ + -num_threads $(inputs.threads) \ + $(inputs.lcase_masking ? "-lcase_masking" : "") $(inputs.max_hsps === null ? "" : "-max_hsps " + inputs.max_hsps) $(inputs.taxids === null ? "" : "-taxids " + inputs.taxids) $(inputs.negative_taxids === null ? "" : "-negative_taxids " + inputs.negative_taxids)\ + + - class: InlineJavascriptRequirement + - class: NetworkAccess + networkAccess: true + hints: + - class: ResourceRequirement + coresMin: 8 + ramMin: 15258.7890625 + outdirMin: 1024 + cwlVersion: v1.2 + baseCommand: + - bash + - script.bash + when: $(inputs.method === "queryfa") + - id: runtblastx + in: + - id: Queryfa + source: queryfa + - id: Fname + source: fname + - id: outfmt + source: outfmt + - id: Outfile + source: Outfile + - id: threads + source: threads + - id: evalue + source: evalue + - id: docker + source: blast_docker + - id: taxids + source: runtblastx.taxids + - id: word_size + source: runtblastx.word_size + - id: max_target_seqs + source: runtblastx.max_target_seqs + - id: seg + source: runtblastx.seg + - id: matrix + source: runtblastx.matrix + - id: lcase_masking + source: runtblastx.lcase_masking + - id: negative_taxids + source: runtblastx.negative_taxids + - id: max_hsps + source: runtblastx.max_hsps + out: + - id: out + run: + class: CommandLineTool + id: runtblastx + inputs: + - id: Queryfa + type: File + - id: Fname + type: string + - id: outfmt + type: int + - id: Outfile + type: string + - id: threads + type: int + - id: evalue + type: float + - id: docker + type: string + - id: taxids + type: + - string + - 'null' + - id: word_size + default: 3 + type: int + - id: max_target_seqs + default: 100 + type: int + - id: seg + default: "'12 2.2 2.5'" + type: string + - id: matrix + default: BLOSUM62 + type: string + - id: lcase_masking + default: false + type: boolean + - id: negative_taxids + type: + - string + - 'null' + - id: max_hsps + type: + - int + - 'null' + outputs: + - id: out + type: File + outputBinding: + glob: $(inputs.Outfile) + requirements: + - class: InitialWorkDirRequirement + listing: + - entryname: script.bash + entry: |4 + + set -e + tblastx -db "$(inputs.Fname)" \ + -query $(inputs.Queryfa.path) \ + -outfmt $(inputs.outfmt) \ + -out $(inputs.Outfile) \ + -max_target_seqs $(inputs.max_target_seqs) \ + -evalue $(inputs.evalue) \ + -word_size $(inputs.word_size) \ + -matrix $(inputs.matrix) \ + -seg $(inputs.seg) \ + -num_threads $(inputs.threads) \ + $(inputs.lcase_masking ? "-lcase_masking" : "") $(inputs.max_hsps === null ? "" : "-max_hsps " + inputs.max_hsps) $(inputs.taxids === null ? "" : "-taxids " + inputs.taxids) $(inputs.negative_taxids === null ? "" : "-negative_taxids " + inputs.negative_taxids)\ + - class: InlineJavascriptRequirement + - class: NetworkAccess + networkAccess: true + hints: + - class: ResourceRequirement + coresMin: 8 + ramMin: 15258.7890625 + outdirMin: 1024 + cwlVersion: v1.2 + baseCommand: + - bash + - script.bash + when: $(inputs.method === "tblastx") +outputs: + - id: blast.fina_output + outputSource: _fina_output_select_first/result + type: File diff --git a/wdl2cwl/tests/test_cwl.py b/wdl2cwl/tests/test_cwl.py index 4eba95cd..4188c318 100644 --- a/wdl2cwl/tests/test_cwl.py +++ b/wdl2cwl/tests/test_cwl.py @@ -24,6 +24,7 @@ def test_meta(caplog: pytest.LogCaptureFixture) -> None: ("align_and_count.wdl"), ("array_nonempty.wdl"), ("ATAC.wdl"), + ("blast.wdl"), ("bcftools.wdl"), ("bowtie.wdl"), ("BuildCembaReferences.wdl"), diff --git a/wdl2cwl/tests/wdl_files/blast.wdl b/wdl2cwl/tests/wdl_files/blast.wdl new file mode 100644 index 00000000..450c1661 --- /dev/null +++ b/wdl2cwl/tests/wdl_files/blast.wdl @@ -0,0 +1,325 @@ +version 1.0 + +workflow blast { + input { + String? blast_docker_override + String blast_docker= select_first([blast_docker_override,"swr.cn-south-1.myhuaweicloud.com/cngbdb/blast:1.2"]) + File queryfa + String fname = '/sfs/blastdb/2019_ncov/nucl/v6/ncov' + String method = 'blastn' + Int outfmt = 7 + Float evalue = 10 + String Outfile = basename(queryfa)+'.blast_result.txt' + Int threads = 8 + } + if (method == 'blastp') { + call runblastp{ + input: + Fname = fname, + Queryfa = queryfa, + docker = blast_docker, + outfmt = outfmt, + evalue = evalue, + Outfile = Outfile, + threads = threads + } + } + if ( method == 'blastn'){ + call runblastn{ + input: + Fname = fname, + Queryfa = queryfa, + docker = blast_docker, + outfmt = outfmt, + evalue = evalue, + Outfile = Outfile, + threads = threads + } + } + if ( method == 'blastx'){ + call runblastx{ + input: + Fname = fname, + Queryfa = queryfa, + docker = blast_docker, + outfmt = outfmt, + evalue = evalue, + Outfile = Outfile, + threads = threads + } + } + if ( method == 'queryfa'){ + call runtblastn{ + input: + Fname = fname, + Queryfa = queryfa, + docker = blast_docker, + outfmt = outfmt, + evalue = evalue, + Outfile = Outfile, + threads = threads + } + } + if ( method == 'tblastx'){ + call runtblastx{ + input: + Fname = fname, + Queryfa = queryfa, + docker = blast_docker, + outfmt = outfmt, + evalue = evalue, + Outfile = Outfile, + threads = threads + } + } + output { + File fina_output =select_first([runtblastx.out,runblastp.out,runblastn.out,runblastx.out,runtblastn.out]) + } +} + +task runblastn { + input { + String docker + File Queryfa + String Fname + String Outfile + Int threads + #blast optional + Int outfmt + Int max_target_seqs = 100 + Float evalue + Int word_size = 28 + Int reward = 1 + Int penalty = -2 + String strand = 'both' + Int gapopen = 0 + Int gapextend = 0 + String dust = "'20 64 1'" + Int? max_hsps + String tasks = "megablast" + String? taxids + String? negative_taxids + Boolean lcase_masking = false + } + runtime{ + docker : docker + cpu : "8" + memory : "16G" + } + command { + set -e + blastn -db "${Fname}" \ + -show_gis \ + -query ${Queryfa} \ + -outfmt ${outfmt} \ + -out ${Outfile} \ + -max_target_seqs ${max_target_seqs} \ + -evalue ${evalue} \ + -word_size ${word_size} \ + -penalty ${penalty} \ + -reward ${reward} \ + -dust ${dust} \ + -gapopen ${gapopen} \ + -gapextend ${gapextend} \ + -task ${tasks} \ + -strand ${strand} \ + -num_threads ${threads} \ + ${true='-lcase_masking' false='' lcase_masking} ${"-max_hsps "+max_hsps} ${"-taxids " +taxids} ${"-negative_taxids " +negative_taxids}\ + + } + output { + File out = "${Outfile}" + } +} + +task runblastp { + input { + String docker + File Queryfa + String Fname + #blast optional + Int outfmt + String Outfile + Float evalue + Int threads + Int max_target_seqs = 100 + Int word_size = 6 + String seg = "no" + String comp_based_stats = "2" + String matrix = "BLOSUM62" + Int gapopen = 11 + Int gapextend = 1 + Int? max_hsps + String? taxids + String? negative_taxids + Boolean lcase_masking = false + } + runtime{ + docker : docker + cpu : "8" + memory : "16G" + } + command { + set -e + blastp -db "${Fname}" \ + -query ${Queryfa} \ + -outfmt ${outfmt} \ + -out ${Outfile} \ + -max_target_seqs ${max_target_seqs} \ + -comp_based_stats ${comp_based_stats} \ + -evalue ${evalue} \ + -word_size ${word_size} \ + -matrix ${matrix} \ + -seg ${seg} \ + -gapopen ${gapopen} \ + -gapextend ${gapextend} \ + -num_threads ${threads} \ + ${true='-lcase_masking' false='' lcase_masking} ${"-max_hsps "+max_hsps} ${"-taxids " +taxids} ${"-negative_taxids " +negative_taxids} \ + + } + output { + File out = "${Outfile}" + } +} + +task runblastx { + input { + File Queryfa + String Fname + Int outfmt + Float evalue + String Outfile + String docker + Int threads + Int max_target_seqs = 100 + Int word_size = 6 + String seg = "'12 2.2 2.5'" + String comp_based_stats = "2" + String matrix = "BLOSUM62" + Int gapopen = 11 + Int gapextend = 1 + String? taxids + String? negative_taxids + Int? max_hsps + Boolean lcase_masking = false + } + runtime{ + docker : docker + cpu : "8" + memory : "16G" + } + command { + set -e + blastx -db "${Fname}" \ + -query ${Queryfa} \ + -outfmt ${outfmt} \ + -out ${Outfile} \ + -max_target_seqs ${max_target_seqs} \ + -comp_based_stats ${comp_based_stats} \ + -evalue ${evalue} \ + -word_size ${word_size} \ + -matrix ${matrix} \ + -seg ${seg} \ + -gapopen ${gapopen} \ + -gapextend ${gapextend} \ + -num_threads ${threads} \ + ${true='-lcase_masking' false='' lcase_masking} ${"-max_hsps "+max_hsps} ${"-taxids " +taxids} ${"-negative_taxids " +negative_taxids}\ + + } + output { + File out = "$${Outfile}" + } +} + +task runtblastn { + input { + File Queryfa + String Fname + Int outfmt + Float evalue + String Outfile + String docker + Int threads + Int max_target_seqs = 100 + Int word_size = 6 + String seg = "'12 2.2 2.5'" + String comp_based_stats = "2" + String matrix = "BLOSUM62" + Int gapopen = 11 + Int gapextend = 1 + Boolean lcase_masking = false + Int? max_hsps + String? taxids + String? negative_taxids + } + runtime{ + docker :docker + cpu : "8" + memory : "16G" + } + command { + set -e + tblastn -db "${Fname}" \ + -query ${Queryfa} \ + -outfmt ${outfmt} \ + -out ${Outfile} \ + -max_target_seqs ${max_target_seqs} \ + -comp_based_stats ${comp_based_stats} \ + -evalue ${evalue} \ + -word_size ${word_size} \ + -matrix ${matrix} \ + -seg ${seg} \ + -gapopen ${gapopen} \ + -gapextend ${gapextend} \ + -num_threads ${threads} \ + ${true='-lcase_masking' false='' lcase_masking} ${"-max_hsps "+max_hsps} ${"-taxids " +taxids} ${"-negative_taxids " +negative_taxids}\ + + } + output { + File out = "${Outfile}" + } +} + +task runtblastx { + input { + File Queryfa + String Fname + Int outfmt + String Outfile + Int threads + Float evalue + String docker + String? taxids + Int word_size = 3 + Int max_target_seqs = 100 + String seg = "'12 2.2 2.5'" + String matrix = "BLOSUM62" + Boolean lcase_masking = false + String? negative_taxids + Int? max_hsps + } + runtime{ + docker :docker + cpu : "8" + memory : "16G" + } + command { + set -e + tblastx -db "${Fname}" \ + -query ${Queryfa} \ + -outfmt ${outfmt} \ + -out ${Outfile} \ + -max_target_seqs ${max_target_seqs} \ + -evalue ${evalue} \ + -word_size ${word_size} \ + -matrix ${matrix} \ + -seg ${seg} \ + -num_threads ${threads} \ + ${true='-lcase_masking' false='' lcase_masking} ${"-max_hsps "+max_hsps} ${"-taxids " +taxids} ${"-negative_taxids " +negative_taxids}\ + } + output { + File out = "${Outfile}" + } +} + +