binpash · angelhof · May 30, 2023 · May 26, 2023 · May 26, 2023
diff --git a/deps/pash b/deps/pash
diff --git a/parallel-orch/analysis.py b/parallel-orch/analysis.py
@@ -0,0 +1,54 @@
+import logging
+
+from shasta.ast_node import *
+from shasta.json_to_ast import to_ast_node
+
+import libdash.parser
+
+## Keeps track of the first time we call the parser
+first_time_calling_parser = True
+
+## Parses straight a shell script to an AST
+## through python without calling it as an executable
+def parse_shell_to_asts(input_script_path):
+    global first_time_calling_parser
+
+    try:
+        ## The libdash parser must not be initialized when called the second
+        ## time because it hangs!
+        new_ast_objects = libdash.parser.parse(input_script_path, init=first_time_calling_parser)
+        first_time_calling_parser = False
+
+        ## Transform the untyped ast objects to typed ones
+        typed_ast_objects = []
+        for untyped_ast, original_text, linno_before, linno_after, in new_ast_objects:
+             typed_ast = to_ast_node(untyped_ast)
+             typed_ast_objects.append((typed_ast, original_text, linno_before, linno_after))
+
+        return typed_ast_objects
+    except libdash.parser.ParsingException as e:
+        logging.error(f'Parsing error: {e}')
+        exit(1)
+
+
+## Returns true if the script is safe to speculate and execute outside
+##  of the original shell context.
+def safe_to_execute(asts) -> bool:
+    logging.debug(f'Asts in question: {asts}')
+    ## TODO: Expand and check whether the asts contain
+    ##  a command substitution or a primitive.
+    ## If so, then we need to tell the original script to execute the command.
+    ##
+    ## TODO: Write my analysis here, then move it to expand, 
+    ##       and then move expand to its own library
+    ##
+    ## TODO: Also, add a test with a break and see it fail
+
+    ## TODO: Push the changes that modify the tests to show stderr incrementally
+
+    ## KK 2023-05-26 We need to keep in mind that whenever we execute something
+    ##               in the original shell, then we cannot speculate anything
+    ##               after it, because we cannot track read-write dependencies
+    ##               in the original shell.
+    return True
+
diff --git a/parallel-orch/partial_program_order.py b/parallel-orch/partial_program_order.py
@@ -1,9 +1,13 @@
 import copy
 import logging
 import os
+import sys
+
+import analysis
 import executor
 import trace
-import sys
+
+from shasta.ast_node import AstNode
 
 class CompletedNodeInfo:
     def __init__(self, exit_code, variable_file, stdout_file):
@@ -144,9 +148,15 @@ def parse_node_id(node_id_str: str) -> NodeId:
         return NodeId(int(node_id_str), LoopStack())
 
 class Node:
-    def __init__(self, id, cmd, loop_context: LoopStack):
-        self.cmd = cmd
+    id: NodeId
+    cmd: str
+    asts: "list[AstNode]"
+    loop_context: LoopStack
+
+    def __init__(self, id, cmd, asts, loop_context: LoopStack):
         self.id = id
+        self.cmd = cmd
+        self.asts = asts
         self.cmd_no_redir = trace.remove_command_redir(self.cmd)
         self.loop_context = loop_context
         ## Keep track of how many iterations of this loop node we have unrolled
@@ -875,7 +885,7 @@ def unroll_single_loop(self, loop_id: int, nodes_subset: "list[NodeId]"):
             new_node_loop_contexts.pop_outer()
 
             ## Create the new node
-            self.nodes[new_loop_node_id] = Node(new_loop_node_id, node.cmd, new_node_loop_contexts)
+            self.nodes[new_loop_node_id] = Node(new_loop_node_id, node.cmd, node.asts, new_node_loop_contexts)
             self.executions[new_loop_node_id] = 0
         logging.debug(f'New loop ids: {node_mappings}')
 
@@ -1139,23 +1149,31 @@ def run_all_frontier_cmds(self):
     def run_cmd_non_blocking(self, node_id: NodeId):
         ## A command should only be run if it's in the frontier, otherwise it should be spec run
         assert(self.is_frontier(node_id))
-        node = self.get_node(node_id)
-        cmd = node.get_cmd()
-        logging.debug(f'Running command: {node_id} {self.get_node(node_id)}')
+        logging.trace(f'Running command: {node_id} {self.get_node(node_id)}')
         logging.trace(f"ExecutingAdd|{node_id}")
-        self.executions[node_id] += 1
-        proc, trace_file, stdout, stderr, variable_file = executor.async_run_and_trace_command_return_trace(cmd, node_id)
-        logging.debug(f'Read trace from: {trace_file}')
-        self.commands_currently_executing[node_id] = (proc, trace_file, stdout, stderr, variable_file)
+        self.execute_cmd_core(node_id, speculate=False)
 
     ## Run a command and add it to the dictionary of executing ones
     def speculate_cmd_non_blocking(self, node_id: NodeId):
+        logging.debug(f'Speculating command: {node_id} {self.get_node(node_id)}')
+        ## TODO: Since these (this and the function above)
+        ##       are relevant for the report maker,
+        ##       add them in some library (e.g., trace_for_report) 
+        ##       so that we don't accidentally delete them.
+        logging.trace(f"ExecutingSandboxAdd|{node_id}")
+        self.execute_cmd_core(node_id, speculate=True)
+
+    def execute_cmd_core(self, node_id: NodeId, speculate=False):
         node = self.get_node(node_id)
+        ## TODO: Do something with the result of this analysis
+        is_safe = analysis.safe_to_execute(node.asts)
         cmd = node.get_cmd()
-        logging.debug(f'Speculating command: {node_id} {self.get_node(node_id)}')
         self.executions[node_id] += 1
-        proc, trace_file, stdout, stderr, variable_file = executor.async_run_and_trace_command_return_trace_in_sandbox(cmd, node_id)
-        logging.trace(f"ExecutingSandboxAdd|{node_id}")
+        if speculate:
+            execute_func = executor.async_run_and_trace_command_return_trace_in_sandbox
+        else:
+            execute_func = executor.async_run_and_trace_command_return_trace
+        proc, trace_file, stdout, stderr, variable_file = execute_func(cmd, node_id)
         logging.debug(f'Read trace from: {trace_file}')
         self.commands_currently_executing[node_id] = (proc, trace_file, stdout, stderr, variable_file)
 
@@ -1313,10 +1331,12 @@ def log_executions(self):
 
 
 ## TODO: Try to move those to PaSh and import them here
-def parse_cmd_from_file(file_path: str) -> str:
+def parse_cmd_from_file(file_path: str) -> "tuple[str,list[AstNode]]":
+    logging.debug(f'Parsing: {file_path}')
     with open(file_path) as f:
         cmd = f.read()
-    return cmd
+    asts = analysis.parse_shell_to_asts(file_path)
+    return cmd, asts
 
 def parse_edge_line(line: str) -> "tuple[int, int]":
     from_str, to_str = line.split(" -> ")
@@ -1369,9 +1389,11 @@ def parse_partial_program_order_from_file(file_path: str) -> PartialProgramOrder
     nodes = {}
     for i in range(number_of_nodes):
         file_path = f'{cmds_directory}/{i}'
-        cmd = parse_cmd_from_file(file_path)
+        cmd, asts = parse_cmd_from_file(file_path)
         loop_ctx = loop_contexts[i]
-        nodes[NodeId(i)] = Node(NodeId(i), cmd, LoopStack(loop_ctx))
+        nodes[NodeId(i)] = Node(NodeId(i), cmd, 
+                                asts=asts, 
+                                loop_context=LoopStack(loop_ctx))
 
     edges = {NodeId(i) : [] for i in range(number_of_nodes)}
     for edge_line in edge_lines:

diff --git a/parallel-orch/run_command.sh b/parallel-orch/run_command.sh
@@ -26,10 +26,10 @@ if [ $sandbox_flag -eq 1 ]; then
     ## Generate a temporary directory to store the workfiles
     mkdir -p /tmp/pash_spec
     export SANDBOX_DIR="$(mktemp -d /tmp/pash_spec/sandbox_XXXXXXX)/"
-    "${PASH_SPEC_TOP}/deps/try/try" -D "${SANDBOX_DIR}" "${PASH_SPEC_TOP}/parallel-orch/template_script_to_execute.sh" > "${STDOUT_FILE}"
+    ## We need to execute `try` with bash to keep the exported functions
+    bash "${PASH_SPEC_TOP}/deps/try/try" -D "${SANDBOX_DIR}" "${PASH_SPEC_TOP}/parallel-orch/template_script_to_execute.sh" > "${STDOUT_FILE}"
     exit_code=$?
 else
-    export SANDBOX_DIR=""
     "${PASH_SPEC_TOP}/parallel-orch/template_script_to_execute.sh" > "${STDOUT_FILE}"
     exit_code=$?
 fi

diff --git a/parallel-orch/scheduler_server.py b/parallel-orch/scheduler_server.py
@@ -222,10 +222,10 @@ def main():
     # Set debug level
     if args.debug_level == 1:
         logging.getLogger().setLevel(logging.INFO)
-    elif args.debug_level == 2:
+    elif args.debug_level >= 2:
         logging.getLogger().setLevel(logging.DEBUG)
-    elif args.debug_level >= 3:
-        logging.getLogger().setLevel(logging.TRACE)
+    # elif args.debug_level >= 3:
+    #     logging.getLogger().setLevel(logging.TRACE)
 
     scheduler = Scheduler(config.SCHEDULER_SOCKET)
     scheduler.run()

diff --git a/parallel-orch/template_script_to_execute_in_overlay.sh b/parallel-orch/template_script_to_execute_in_overlay.sh
diff --git a/test/test_orch.sh b/test/test_orch.sh
@@ -13,8 +13,8 @@ echo "Test script directory:        $TEST_SCRIPT_DIR"
 DEBUG=${DEBUG:-0}
 
 bash="bash"
-## Debug needs to be set to 100 because otherwise repetitions cannot be checked
-orch="$ORCH_TOP/pash-spec.sh -d 100"
+## Debug needs to be set to 2 because otherwise repetitions cannot be checked
+orch="$ORCH_TOP/pash-spec.sh -d 2"
 # Generated test scripts are saved here
 test_dir_orch="$ORCH_TOP/test/test_scripts_orch"
 test_dir_bash="$ORCH_TOP/test/test_scripts_bash"
@@ -78,13 +78,13 @@ run_test()
      # Run test with orch
     export test_output_dir="$WORKING_DIR/output_orch"
     stderr_file="$(mktemp)"
-    $test "$orch" "$TEST_SCRIPT_DIR" "$test_output_dir" > "$test_output_dir/stdout" 2> "$stderr_file"
-    test_orch_ec=$?
-
     ## Print stderr
-    ## TODO: Fix this to print the stderr continuously by doing the execution checking inside pash-spec
     if [ $DEBUG -ge 1 ]; then 
-        cat "$stderr_file" 1>&2
+        $test "$orch" "$TEST_SCRIPT_DIR" "$test_output_dir"  2>&1 > "$test_output_dir/stdout" | tee "$stderr_file" 1>&2
+        test_orch_ec=$?
+    else
+        $test "$orch" "$TEST_SCRIPT_DIR" "$test_output_dir"  2>"$stderr_file" > "$test_output_dir/stdout"
+        test_orch_ec=$?
     fi
 
     diff -q "$WORKING_DIR/output_bash/" "$WORKING_DIR/output_orch/" > /dev/null
@@ -302,6 +302,12 @@ test_loop()
     $shell $2/test_loop.sh
 }
 
+test_break()
+{
+    local shell=$1
+    $shell $2/test_break.sh
+}
+
 ## TODO: make more loop tests with nested loops and commands after the loop
 
 # We run all tests composed with && to exit on the first that fails
@@ -333,6 +339,7 @@ if [ "$#" -eq 0 ]; then
     run_test test9_3
     run_test test_stdout
     run_test test_loop
+    # run_test test_break
 else
     for testname in $@
     do

diff --git a/test/test_scripts/test_break.sh b/test/test_scripts/test_break.sh
@@ -0,0 +1,5 @@
+for i in 1 2; do
+    echo hi1
+    break
+    echo hi2
+done
+5 −5		compiler/annotations_utils/util_cmd_invocations.py
+54 −102		compiler/ast_to_ir.py
+17 −0		compiler/config.py
+1 −1		compiler/definitions/ir/aggregator_node.py
+11 −10		compiler/definitions/ir/arg.py
+4 −4		compiler/definitions/ir/nodes/dfs_split_reader.py
+1 −1		compiler/definitions/ir/nodes/dgsh_tee.py
+1 −1		compiler/definitions/ir/nodes/r_split.py
+1 −1		compiler/definitions/ir/nodes/r_wrap.py
+3 −3		compiler/definitions/ir/nodes/remote_pipe.py
+11 −10		compiler/definitions/ir/redirection.py
+0 −4		compiler/definitions/no_match_exception.py
+0 −1		compiler/dspash/ir_helper.py
+8 −5		compiler/ir.py
+7 −4		compiler/ir_to_ast.py
+0 −60		compiler/json_ast.py
+15 −36		compiler/parse.py
+0 −55		compiler/pash.py
+0 −1		compiler/pash_compilation_server.py
+0 −1		compiler/pash_compiler.py
+0 −203		compiler/shell_ast/ast_node.py
+0 −17		compiler/shell_ast/ast_node_c.py
+15 −8		compiler/shell_ast/ast_to_ast.py
+7 −94		compiler/shell_ast/ast_util.py
+163 −198		compiler/shell_ast/expand.py
+5 −9		compiler/util.py
+3 −0		evaluation/tests/interface_tests/redir-dup.sh
+7 −0		evaluation/tests/interface_tests/run.sh
+4 −4		scripts/setup-pash.sh