binpash · angelhof · May 11, 2023 · Apr 27, 2023 · Apr 27, 2023 · May 2, 2023
diff --git a/deps/pash b/deps/pash
diff --git a/parallel-orch/partial_program_order.py b/parallel-orch/partial_program_order.py
diff --git a/parallel-orch/scheduler_server.py b/parallel-orch/scheduler_server.py
@@ -1,9 +1,11 @@
 import argparse
+import copy
 import logging
 import signal
 from util import *
 import config
-from partial_program_order import parse_partial_program_order_from_file
+import sys
+from partial_program_order import parse_partial_program_order_from_file, NodeId, parse_node_id
 
 ##
 ## A scheduler server
@@ -66,19 +68,43 @@ def handle_init(self, input_cmd: str):
         partial_order_file = input_cmd.split(":")[1].rstrip()
         logging.debug(f'Scheduler: Received partial_order_file: {partial_order_file}')
         self.partial_program_order = parse_partial_program_order_from_file(partial_order_file)
-        self.partial_program_order.init_workset()
-        logging.debug(f'Parsed partial program order:')
-        self.partial_program_order.populate_to_be_resolved_dict([])
-        logging.debug(f'To be resolved sets per node:')
-        logging.debug(self.partial_program_order.to_be_resolved)
+        self.partial_program_order.init_partial_order()
+
+    def __parse_wait(self, input_cmd: str):
+        try:
+            node_id_component, loop_iter_counter_component = input_cmd.rstrip().split("|")
+            node_id = NodeId(int(node_id_component.split(":")[1].rstrip()))
+            loop_counters_str = loop_iter_counter_component.split(":")[1].rstrip()
+            if loop_counters_str == "None":
+                loop_counters = []
+            else:
+                loop_counters = [int(cnt) for cnt in loop_counters_str.split("-")]
+            return node_id, loop_counters
+        except:
+            raise Exception(f'Parsing failure for line: {input_cmd}')
 
     def handle_wait(self, input_cmd: str, connection):
         assert(input_cmd.startswith("Wait"))
         ## We have received this message by the JIT, which waits for a node_id to
         ## finish execution.
-        node_id = int(input_cmd.split(":")[1].rstrip())
-        logging.debug(f'Scheduler: Received wait for node_id: {node_id}')
-
+        raw_node_id, loop_counters = self.__parse_wait(input_cmd)        
+        logging.debug(f'Scheduler: Received wait for node_id: {raw_node_id} with loop counters: {loop_counters}')
+
+        if self.partial_program_order.is_loop_node(raw_node_id):  
+            node_id = NodeId(raw_node_id.id, loop_counters)
+            if not self.partial_program_order.is_node_id(node_id):
+                ## TODO: This unrolling can also happen and be moved to speculation.
+                ##       For now we are being conservative and that is why it only happens here
+                ## TODO: Move this to the scheduler.schedule_work() (if we have a loop node waiting for response and we are not unrolled, unroll to create work)
+                self.partial_program_order.unroll_loop_node(raw_node_id)
+        else:
+            ## If we are not in a loop, then the node id corresponds to the concrete node
+            node_id = raw_node_id
+
+        ## Inform the partial order that we received a wait for a node so that it can push loops
+        ## forward and so on.
+        self.partial_program_order.wait_received(node_id)
+
         ## If the node_id is already committed, just return its exit code
         if node_id in self.partial_program_order.get_committed():
             logging.debug(f'Node: {node_id} found in committed, responding immediately!')
@@ -94,7 +120,7 @@ def handle_wait(self, input_cmd: str, connection):
     def __parse_command_exec_complete(self, input_cmd: str) -> "tuple[int, int]":
         try:
             components = input_cmd.rstrip().split("|")
-            command_id = int(components[0].split(":")[1])
+            command_id = parse_node_id(components[0].split(":")[1])
             exit_code = int(components[1].split(":")[1])
             sandbox_dir = components[2].split(":")[1]
             return command_id, exit_code, sandbox_dir

diff --git a/parallel-orch/template_script_to_execute.sh b/parallel-orch/template_script_to_execute.sh
@@ -2,6 +2,11 @@
 
 ## TODO: Pass frontier flag here instead of separate scripts
 
+## Clean up the riker directory
+## KK 2023-05-04 should this be done somewhere else? Could this interfere with overlay fs?
+## TODO: Can we just ask riker to use a different cache (or put the cache to /dev/null)
+##       since we never really want it to take the cache into account
+rm -rf ./.rkr
 
 ## Save the script to execute in the sandboxdir
 echo $CMD_STRING > ./Rikerfile

diff --git a/scripts/install_deps_ubuntu20.sh b/scripts/install_deps_ubuntu20.sh
@@ -1,5 +1,8 @@
 #!/bin/bash
 
+export PASH_SPEC_TOP=${PASH_SPEC_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)}
+export PASH_TOP=${PASH_TOP:-$PASH_SPEC_TOP/deps/pash}
+
 ## Install Riker's dependencies
 sudo apt-get update
 sudo apt install -y make clang llvm git gcc python3-cram file graphviz

diff --git a/test/test_orch.sh b/test/test_orch.sh
@@ -82,6 +82,7 @@ run_test()
     test_orch_ec=$?
 
     ## Print stderr
+    ## TODO: Fix this to print the stderr continuously by doing the execution checking inside pash-spec
     if [ $DEBUG -ge 1 ]; then 
         cat "$stderr_file" 1>&2
     fi
@@ -295,6 +296,14 @@ test_stdout()
     $shell $2/test_stdout.sh
 }
 
+test_loop()
+{
+    local shell=$1
+    $shell $2/test_loop.sh
+}
+
+## TODO: make more loop tests with nested loops and commands after the loop
+
 # We run all tests composed with && to exit on the first that fails
 if [ "$#" -eq 0 ]; then
     run_test test1_1 # "1 2 2 1"
@@ -323,6 +332,7 @@ if [ "$#" -eq 0 ]; then
     run_test test9_2
     run_test test9_3
     run_test test_stdout
+    run_test test_loop
 else
     for testname in $@
     do

diff --git a/test/test_scripts/test_loop.sh b/test/test_scripts/test_loop.sh
@@ -0,0 +1,13 @@
+echo hi
+for i in 1 2 3; do
+    echo hi1
+    sleep 1
+    echo hi2
+done
+echo hi3
+
+## Future loop tests must include:
+## 1. A single loop with a single command without anything else 
+## 2. Multiple commands in the same loop
+## 3. Nested loops
+## 4. Commands before and after a loop
+10 −0		compiler/env_var_names.py
+6 −1		compiler/orchestrator_runtime/speculative/speculative_runtime.sh
+2 −2		compiler/preprocessor/preprocessor.py
+116 −22		compiler/shell_ast/ast_to_ast.py
+55 −0		compiler/shell_ast/ast_util.py
+53 −32		compiler/speculative/util_spec.py
+1 −1		scripts/setup-pash.sh