diff --git a/deps/pash b/deps/pash index 8e48fb65..ec992916 160000 --- a/deps/pash +++ b/deps/pash @@ -1 +1 @@ -Subproject commit 8e48fb6527de422eb964d9993b6f471488c8dde3 +Subproject commit ec9929164cb1aa52057ae226c25cd312908c007c diff --git a/parallel-orch/analysis.py b/parallel-orch/analysis.py new file mode 100644 index 00000000..73f823e5 --- /dev/null +++ b/parallel-orch/analysis.py @@ -0,0 +1,54 @@ +import logging + +from shasta.ast_node import * +from shasta.json_to_ast import to_ast_node + +import libdash.parser + +## Keeps track of the first time we call the parser +first_time_calling_parser = True + +## Parses straight a shell script to an AST +## through python without calling it as an executable +def parse_shell_to_asts(input_script_path): + global first_time_calling_parser + + try: + ## The libdash parser must not be initialized when called the second + ## time because it hangs! + new_ast_objects = libdash.parser.parse(input_script_path, init=first_time_calling_parser) + first_time_calling_parser = False + + ## Transform the untyped ast objects to typed ones + typed_ast_objects = [] + for untyped_ast, original_text, linno_before, linno_after, in new_ast_objects: + typed_ast = to_ast_node(untyped_ast) + typed_ast_objects.append((typed_ast, original_text, linno_before, linno_after)) + + return typed_ast_objects + except libdash.parser.ParsingException as e: + logging.error(f'Parsing error: {e}') + exit(1) + + +## Returns true if the script is safe to speculate and execute outside +## of the original shell context. +def safe_to_execute(asts) -> bool: + logging.debug(f'Asts in question: {asts}') + ## TODO: Expand and check whether the asts contain + ## a command substitution or a primitive. + ## If so, then we need to tell the original script to execute the command. + ## + ## TODO: Write my analysis here, then move it to expand, + ## and then move expand to its own library + ## + ## TODO: Also, add a test with a break and see it fail + + ## TODO: Push the changes that modify the tests to show stderr incrementally + + ## KK 2023-05-26 We need to keep in mind that whenever we execute something + ## in the original shell, then we cannot speculate anything + ## after it, because we cannot track read-write dependencies + ## in the original shell. + return True + diff --git a/parallel-orch/partial_program_order.py b/parallel-orch/partial_program_order.py index afbb7786..12460f4d 100644 --- a/parallel-orch/partial_program_order.py +++ b/parallel-orch/partial_program_order.py @@ -1,9 +1,13 @@ import copy import logging import os +import sys + +import analysis import executor import trace -import sys + +from shasta.ast_node import AstNode class CompletedNodeInfo: def __init__(self, exit_code, variable_file, stdout_file): @@ -144,9 +148,15 @@ def parse_node_id(node_id_str: str) -> NodeId: return NodeId(int(node_id_str), LoopStack()) class Node: - def __init__(self, id, cmd, loop_context: LoopStack): - self.cmd = cmd + id: NodeId + cmd: str + asts: "list[AstNode]" + loop_context: LoopStack + + def __init__(self, id, cmd, asts, loop_context: LoopStack): self.id = id + self.cmd = cmd + self.asts = asts self.cmd_no_redir = trace.remove_command_redir(self.cmd) self.loop_context = loop_context ## Keep track of how many iterations of this loop node we have unrolled @@ -875,7 +885,7 @@ def unroll_single_loop(self, loop_id: int, nodes_subset: "list[NodeId]"): new_node_loop_contexts.pop_outer() ## Create the new node - self.nodes[new_loop_node_id] = Node(new_loop_node_id, node.cmd, new_node_loop_contexts) + self.nodes[new_loop_node_id] = Node(new_loop_node_id, node.cmd, node.asts, new_node_loop_contexts) self.executions[new_loop_node_id] = 0 logging.debug(f'New loop ids: {node_mappings}') @@ -1139,23 +1149,31 @@ def run_all_frontier_cmds(self): def run_cmd_non_blocking(self, node_id: NodeId): ## A command should only be run if it's in the frontier, otherwise it should be spec run assert(self.is_frontier(node_id)) - node = self.get_node(node_id) - cmd = node.get_cmd() - logging.debug(f'Running command: {node_id} {self.get_node(node_id)}') + logging.trace(f'Running command: {node_id} {self.get_node(node_id)}') logging.trace(f"ExecutingAdd|{node_id}") - self.executions[node_id] += 1 - proc, trace_file, stdout, stderr, variable_file = executor.async_run_and_trace_command_return_trace(cmd, node_id) - logging.debug(f'Read trace from: {trace_file}') - self.commands_currently_executing[node_id] = (proc, trace_file, stdout, stderr, variable_file) + self.execute_cmd_core(node_id, speculate=False) ## Run a command and add it to the dictionary of executing ones def speculate_cmd_non_blocking(self, node_id: NodeId): + logging.debug(f'Speculating command: {node_id} {self.get_node(node_id)}') + ## TODO: Since these (this and the function above) + ## are relevant for the report maker, + ## add them in some library (e.g., trace_for_report) + ## so that we don't accidentally delete them. + logging.trace(f"ExecutingSandboxAdd|{node_id}") + self.execute_cmd_core(node_id, speculate=True) + + def execute_cmd_core(self, node_id: NodeId, speculate=False): node = self.get_node(node_id) + ## TODO: Do something with the result of this analysis + is_safe = analysis.safe_to_execute(node.asts) cmd = node.get_cmd() - logging.debug(f'Speculating command: {node_id} {self.get_node(node_id)}') self.executions[node_id] += 1 - proc, trace_file, stdout, stderr, variable_file = executor.async_run_and_trace_command_return_trace_in_sandbox(cmd, node_id) - logging.trace(f"ExecutingSandboxAdd|{node_id}") + if speculate: + execute_func = executor.async_run_and_trace_command_return_trace_in_sandbox + else: + execute_func = executor.async_run_and_trace_command_return_trace + proc, trace_file, stdout, stderr, variable_file = execute_func(cmd, node_id) logging.debug(f'Read trace from: {trace_file}') self.commands_currently_executing[node_id] = (proc, trace_file, stdout, stderr, variable_file) @@ -1313,10 +1331,12 @@ def log_executions(self): ## TODO: Try to move those to PaSh and import them here -def parse_cmd_from_file(file_path: str) -> str: +def parse_cmd_from_file(file_path: str) -> "tuple[str,list[AstNode]]": + logging.debug(f'Parsing: {file_path}') with open(file_path) as f: cmd = f.read() - return cmd + asts = analysis.parse_shell_to_asts(file_path) + return cmd, asts def parse_edge_line(line: str) -> "tuple[int, int]": from_str, to_str = line.split(" -> ") @@ -1369,9 +1389,11 @@ def parse_partial_program_order_from_file(file_path: str) -> PartialProgramOrder nodes = {} for i in range(number_of_nodes): file_path = f'{cmds_directory}/{i}' - cmd = parse_cmd_from_file(file_path) + cmd, asts = parse_cmd_from_file(file_path) loop_ctx = loop_contexts[i] - nodes[NodeId(i)] = Node(NodeId(i), cmd, LoopStack(loop_ctx)) + nodes[NodeId(i)] = Node(NodeId(i), cmd, + asts=asts, + loop_context=LoopStack(loop_ctx)) edges = {NodeId(i) : [] for i in range(number_of_nodes)} for edge_line in edge_lines: diff --git a/parallel-orch/run_command.sh b/parallel-orch/run_command.sh index 7860fc21..7bd5dbe8 100755 --- a/parallel-orch/run_command.sh +++ b/parallel-orch/run_command.sh @@ -26,10 +26,10 @@ if [ $sandbox_flag -eq 1 ]; then ## Generate a temporary directory to store the workfiles mkdir -p /tmp/pash_spec export SANDBOX_DIR="$(mktemp -d /tmp/pash_spec/sandbox_XXXXXXX)/" - "${PASH_SPEC_TOP}/deps/try/try" -D "${SANDBOX_DIR}" "${PASH_SPEC_TOP}/parallel-orch/template_script_to_execute.sh" > "${STDOUT_FILE}" + ## We need to execute `try` with bash to keep the exported functions + bash "${PASH_SPEC_TOP}/deps/try/try" -D "${SANDBOX_DIR}" "${PASH_SPEC_TOP}/parallel-orch/template_script_to_execute.sh" > "${STDOUT_FILE}" exit_code=$? else - export SANDBOX_DIR="" "${PASH_SPEC_TOP}/parallel-orch/template_script_to_execute.sh" > "${STDOUT_FILE}" exit_code=$? fi diff --git a/parallel-orch/scheduler_server.py b/parallel-orch/scheduler_server.py index d6f352c3..cc50381e 100644 --- a/parallel-orch/scheduler_server.py +++ b/parallel-orch/scheduler_server.py @@ -222,10 +222,10 @@ def main(): # Set debug level if args.debug_level == 1: logging.getLogger().setLevel(logging.INFO) - elif args.debug_level == 2: + elif args.debug_level >= 2: logging.getLogger().setLevel(logging.DEBUG) - elif args.debug_level >= 3: - logging.getLogger().setLevel(logging.TRACE) + # elif args.debug_level >= 3: + # logging.getLogger().setLevel(logging.TRACE) scheduler = Scheduler(config.SCHEDULER_SOCKET) scheduler.run() diff --git a/parallel-orch/template_script_to_execute_in_overlay.sh b/parallel-orch/template_script_to_execute_in_overlay.sh deleted file mode 100755 index b9bd9677..00000000 --- a/parallel-orch/template_script_to_execute_in_overlay.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -## TODO: Delete this file - -## Save the script to execute in the sandboxdir -echo $CMD_STRING > ./Rikerfile - -## Call Riker to execute the command -# rkr --debug --log all --show -rkr -## TODO: Run with gdb for debugging -## ```sh -# gdb rkr -## (gdb) break wrappers.hh:readlink -## (gdb) run --debug --log all --show -# (gdb) print std::string::max_size() -## ``` -## Call Riker to get the trace -## TODO: Normally we would like to plug in Riker and get the actual Trace data structure -rkr --debug trace -o "$TRACE_FILE" > /dev/null - -## Failing test case: -## CMD_STRING="./test/misc/append_a_line.sh" ./overlay-sandbox/run-sandboxed.sh ./parallel-orch/template_script_to_execute_in_overlay.sh diff --git a/test/test_orch.sh b/test/test_orch.sh index 637ff9ca..669a2013 100755 --- a/test/test_orch.sh +++ b/test/test_orch.sh @@ -13,8 +13,8 @@ echo "Test script directory: $TEST_SCRIPT_DIR" DEBUG=${DEBUG:-0} bash="bash" -## Debug needs to be set to 100 because otherwise repetitions cannot be checked -orch="$ORCH_TOP/pash-spec.sh -d 100" +## Debug needs to be set to 2 because otherwise repetitions cannot be checked +orch="$ORCH_TOP/pash-spec.sh -d 2" # Generated test scripts are saved here test_dir_orch="$ORCH_TOP/test/test_scripts_orch" test_dir_bash="$ORCH_TOP/test/test_scripts_bash" @@ -78,13 +78,13 @@ run_test() # Run test with orch export test_output_dir="$WORKING_DIR/output_orch" stderr_file="$(mktemp)" - $test "$orch" "$TEST_SCRIPT_DIR" "$test_output_dir" > "$test_output_dir/stdout" 2> "$stderr_file" - test_orch_ec=$? - ## Print stderr - ## TODO: Fix this to print the stderr continuously by doing the execution checking inside pash-spec if [ $DEBUG -ge 1 ]; then - cat "$stderr_file" 1>&2 + $test "$orch" "$TEST_SCRIPT_DIR" "$test_output_dir" 2>&1 > "$test_output_dir/stdout" | tee "$stderr_file" 1>&2 + test_orch_ec=$? + else + $test "$orch" "$TEST_SCRIPT_DIR" "$test_output_dir" 2>"$stderr_file" > "$test_output_dir/stdout" + test_orch_ec=$? fi diff -q "$WORKING_DIR/output_bash/" "$WORKING_DIR/output_orch/" > /dev/null @@ -302,6 +302,12 @@ test_loop() $shell $2/test_loop.sh } +test_break() +{ + local shell=$1 + $shell $2/test_break.sh +} + ## TODO: make more loop tests with nested loops and commands after the loop # We run all tests composed with && to exit on the first that fails @@ -333,6 +339,7 @@ if [ "$#" -eq 0 ]; then run_test test9_3 run_test test_stdout run_test test_loop + # run_test test_break else for testname in $@ do diff --git a/test/test_scripts/test_break.sh b/test/test_scripts/test_break.sh new file mode 100644 index 00000000..3659c415 --- /dev/null +++ b/test/test_scripts/test_break.sh @@ -0,0 +1,5 @@ +for i in 1 2; do + echo hi1 + break + echo hi2 +done