From 7c8a54ef2185584e83397b49f76f6a73d1f7f893 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 6 Dec 2022 18:02:20 -0500
Subject: [PATCH 001/188] Extending CMake configuration to use CTest

After compiling, if ctest is enabled, one can simply run:

> cd build/
> ctest

And all tests will be run.

CTest runs the same tests as the autoconf version except for the
archived models (under `src/network-workloads/archived/`), which are not
compiled by CMake.
---
 CMakeLists.txt                             | 10 ++-
 tests/CMakeLists.txt                       | 96 ++++++++++++++++++++++
 tests/conf/modelnet-test-simplep2p.conf    |  2 +-
 tests/modelnet-p2p-bw-loggp.sh             |  2 +-
 tests/modelnet-prio-sched-test.sh          |  4 +-
 tests/modelnet-simplep2p-test.sh           |  8 ++
 tests/modelnet-test-dragonfly-synthetic.sh | 10 ++-
 tests/modelnet-test-dragonfly.sh           |  4 +-
 tests/modelnet-test-em.sh                  |  2 +-
 tests/modelnet-test-fattree-synthetic.sh   | 13 ++-
 tests/modelnet-test-loggp.sh               |  2 +-
 tests/modelnet-test-slimfly-synthetic.sh   |  9 +-
 tests/modelnet-test-slimfly.sh             |  2 +-
 tests/modelnet-test-torus.sh               |  2 +-
 tests/modelnet-test.sh                     |  2 +-
 tests/rc-stack-test.sh                     |  8 ++
 tests/run-test.sh.in                       |  6 ++
 tests/workload/codes-workload-test.sh      | 10 ++-
 18 files changed, 171 insertions(+), 21 deletions(-)
 create mode 100644 tests/CMakeLists.txt
 create mode 100755 tests/modelnet-simplep2p-test.sh
 create mode 100755 tests/rc-stack-test.sh
 create mode 100755 tests/run-test.sh.in

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 86b4b5fe..d2310c06 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.10)
+cmake_minimum_required(VERSION 3.17)
 
 # set the project name and version
 project(codes LANGUAGES C CXX VERSION 2.0)
@@ -103,4 +103,10 @@ add_subdirectory(src)
 configure_file(codes_config.h.in codes_config.h)
 
 
-
+string(COMPARE NOTEQUAL "RELEASE" "${CMAKE_BUILD_TYPE}" not_release)
+if(BUILD_TESTING AND not_release)
+    include(CTest)
+    set(CODES_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
+    set(CODES_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}")
+    add_subdirectory(tests)
+endif()
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
new file mode 100644
index 00000000..92e38b9b
--- /dev/null
+++ b/tests/CMakeLists.txt
@@ -0,0 +1,96 @@
+enable_testing()
+
+configure_file(run-test.sh.in run-test.sh)
+
+include_directories("${ROSS_INCLUDE_DIRS}" "${CODES_SOURCE_DIR}")
+
+# Unfortunatelly, CMake doesn't support iteration of a key-pair
+# structure, otherwise the following lists could be easily
+# compressed into a single list/dictionary/structure. Instead
+# each C file name MUST match each binary file name. This would
+# be handled differently if maintaining the autoconfig build
+# was not a MUST (in which case, the name of the file and its
+# binary should match, leaving us with a single list!)
+set(test-c-files
+    lp-io-test.c
+    mapping_test.c
+    jobmap-test.c
+    map-ctx-test.c
+    modelnet-prio-sched-test.c
+    modelnet-test-dragonfly.c
+    modelnet-test.c
+    resource-test.c
+    rc-stack-test.c
+    modelnet-p2p-bw.c
+    modelnet-simplep2p-test.c
+    local-storage-model-test.c
+    )
+
+set(test-bin-names
+    lp-io-test
+    mapping_test
+    jobmap-test
+    map-ctx-test
+    modelnet-prio-sched-test
+    modelnet-test-dragonfly
+    modelnet-test
+    resource-test
+    rc-stack-test
+    modelnet-p2p-bw
+    modelnet-simplep2p-test
+    lsm-test
+    )
+
+foreach(testname cfile IN ZIP_LISTS test-bin-names test-c-files)
+    add_executable(${testname} ${cfile})
+    target_link_libraries(${testname} PUBLIC codes)
+endforeach()
+
+# Additional binary which is not self contained
+add_executable(codes-workload-test
+    workload/codes-workload-test.c
+    workload/codes-workload-test-svr-lp.c
+    workload/codes-workload-test-cn-lp.c
+    )
+target_link_libraries(codes-workload-test PUBLIC codes)
+
+# Tests are also not consistent with the files to compile, but
+# that's ok, there are more tests than binary files
+set(test-shell-files
+    # All binaries have an associated sh test script
+    lp-io-test.sh
+    mapping_test.sh
+    jobmap-test.sh
+    map-ctx-test.sh
+    modelnet-prio-sched-test.sh
+    modelnet-test-dragonfly.sh
+    modelnet-test.sh
+    resource-test.sh
+    modelnet-p2p-bw-loggp.sh
+    modelnet-simplep2p-test.sh
+    rc-stack-test.sh
+    lsm-test.sh
+    # These tests correspond to archived models / no binaries are being generated by CMake
+    #modelnet-test-dragonfly-custom-synthetic.sh
+    #modelnet-test-dragonfly-dally-synthetic.sh
+    #modelnet-test-dragonfly-plus-synthetic.sh
+    modelnet-test-torus.sh
+    modelnet-test-em.sh
+    modelnet-test-loggp.sh
+    modelnet-test-slimfly.sh
+    # These tests are not supported by the makefile/autoconf machinery either
+    #modelnet-test-dragonfly-custom-traces.sh
+    #modelnet-test-dragonfly-traces.sh
+    #modelnet-test-slimfly-traces.sh
+    #modelnet-test-torus-traces.sh
+    modelnet-test-dragonfly-synthetic.sh
+    modelnet-test-fattree-synthetic.sh
+    modelnet-test-slimfly-synthetic.sh
+    workload/codes-workload-test.sh
+    )
+
+foreach(testname ${test-shell-files})
+    add_test(NAME ${testname}
+        COMMAND "${CMAKE_CURRENT_BINARY_DIR}/run-test.sh" "${CMAKE_CURRENT_SOURCE_DIR}/${testname}"
+        WORKING_DIRECTORY "${CODES_BINARY_DIR}")
+endforeach()
diff --git a/tests/conf/modelnet-test-simplep2p.conf b/tests/conf/modelnet-test-simplep2p.conf
index b0c22f93..9907aabd 100644
--- a/tests/conf/modelnet-test-simplep2p.conf
+++ b/tests/conf/modelnet-test-simplep2p.conf
@@ -9,7 +9,7 @@ LPGROUPS
 }
 PARAMS
 {
-    message_size="312";
+    message_size="400";
     packet_size="1024";
     modelnet_order=("simplep2p");
     # scheduler options
diff --git a/tests/modelnet-p2p-bw-loggp.sh b/tests/modelnet-p2p-bw-loggp.sh
index 2972521f..8fc150ec 100755
--- a/tests/modelnet-p2p-bw-loggp.sh
+++ b/tests/modelnet-p2p-bw-loggp.sh
@@ -1,3 +1,3 @@
 #!/bin/bash
 
-tests/modelnet-p2p-bw --sync=1 -- tests/conf/modelnet-p2p-bw-loggp.conf
+tests/modelnet-p2p-bw --sync=1 -- $srcdir/tests/conf/modelnet-p2p-bw-loggp.conf
diff --git a/tests/modelnet-prio-sched-test.sh b/tests/modelnet-prio-sched-test.sh
index f9939322..205d7e2b 100755
--- a/tests/modelnet-prio-sched-test.sh
+++ b/tests/modelnet-prio-sched-test.sh
@@ -1,14 +1,14 @@
 #!/bin/bash
 
 tests/modelnet-prio-sched-test --sync=1 -- \
-    tests/conf/modelnet-prio-sched-test.conf
+    $srcdir/tests/conf/modelnet-prio-sched-test.conf
 err=$?
 if [[ $err -ne 0 ]]; then
     exit $err
 fi
 
 mpirun -np 2 tests/modelnet-prio-sched-test --sync=3 -- \
-    tests/conf/modelnet-prio-sched-test.conf
+    $srcdir/tests/conf/modelnet-prio-sched-test.conf
 err=$?
 if [[ $err -ne 0 ]]; then
     exit $err
diff --git a/tests/modelnet-simplep2p-test.sh b/tests/modelnet-simplep2p-test.sh
new file mode 100755
index 00000000..dff8a366
--- /dev/null
+++ b/tests/modelnet-simplep2p-test.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+if [[ -z $srcdir ]] ; then
+    echo srcdir variable not set
+    exit 1
+fi
+
+tests/modelnet-simplep2p-test --sync=1 -- $srcdir/tests/conf/modelnet-test-simplep2p.conf
diff --git a/tests/modelnet-test-dragonfly-synthetic.sh b/tests/modelnet-test-dragonfly-synthetic.sh
index 8859ecbf..fa4b31dc 100755
--- a/tests/modelnet-test-dragonfly-synthetic.sh
+++ b/tests/modelnet-test-dragonfly-synthetic.sh
@@ -1,3 +1,11 @@
 #!/bin/bash
 
-src/network-workloads/model-net-synthetic --sync=1 --num_messages=1 -- $srcdir/src/network-workloads/conf/modelnet-synthetic-dragonfly.conf 
+# Binaries generated by CMake are located in a different place
+# to those of autoconf
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bin_dir=src/network-workloads
+else
+    bin_dir=src
+fi
+
+$bin_dir/model-net-synthetic --sync=1 --num_messages=1 -- $srcdir/src/network-workloads/conf/modelnet-synthetic-dragonfly.conf 
diff --git a/tests/modelnet-test-dragonfly.sh b/tests/modelnet-test-dragonfly.sh
index 9362a821..9ed392c4 100755
--- a/tests/modelnet-test-dragonfly.sh
+++ b/tests/modelnet-test-dragonfly.sh
@@ -1,5 +1,3 @@
 #!/bin/bash
 
-tests/modelnet-test --sync=1 -- tests/conf/modelnet-test-dragonfly.conf
-
-
+tests/modelnet-test --sync=1 -- $srcdir/tests/conf/modelnet-test-dragonfly.conf
diff --git a/tests/modelnet-test-em.sh b/tests/modelnet-test-em.sh
index 158ab272..fed720a9 100755
--- a/tests/modelnet-test-em.sh
+++ b/tests/modelnet-test-em.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
 
-tests/modelnet-test --sync=1 -- tests/conf/modelnet-test-em.conf
+tests/modelnet-test --sync=1 -- $srcdir/tests/conf/modelnet-test-em.conf
 
 
diff --git a/tests/modelnet-test-fattree-synthetic.sh b/tests/modelnet-test-fattree-synthetic.sh
index 9b76acdf..bd3acc9a 100755
--- a/tests/modelnet-test-fattree-synthetic.sh
+++ b/tests/modelnet-test-fattree-synthetic.sh
@@ -5,10 +5,15 @@ if [ -z $srcdir ]; then
               exit 1
  fi
 
-source $srcdir/tests/download-traces.sh
+# Binaries generated by CMake are located in a different place
+# to those of autoconf
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bin_dir=src/network-workloads
+else
+    bin_dir=src
+fi
 
-src/network-workloads/model-net-synthetic-fattree --sync=1 -- $srcdir/src/network-workloads/conf/modelnet-synthetic-fattree.conf 
+$bin_dir/model-net-synthetic-fattree --sync=1 -- $srcdir/src/network-workloads/conf/modelnet-synthetic-fattree.conf 
 
+#source $srcdir/tests/download-traces.sh
 #src/network-workloads/model-net-mpi-replay --sync=1 --num_net_traces=27 --workload_file=/tmp/df_AMG_n27_dumpi/dumpi-2014.03.03.14.55.00- --workload_type="dumpi" -- $srcdir/src/network-workloads/conf/modelnet-mpi-test-fattree.conf 
-
-
diff --git a/tests/modelnet-test-loggp.sh b/tests/modelnet-test-loggp.sh
index 4da704bd..03d98286 100755
--- a/tests/modelnet-test-loggp.sh
+++ b/tests/modelnet-test-loggp.sh
@@ -1,3 +1,3 @@
 #!/bin/bash
 
-tests/modelnet-test --sync=1 -- tests/conf/modelnet-test-loggp.conf
+tests/modelnet-test --sync=1 -- $srcdir/tests/conf/modelnet-test-loggp.conf
diff --git a/tests/modelnet-test-slimfly-synthetic.sh b/tests/modelnet-test-slimfly-synthetic.sh
index a610e63a..df9a4436 100755
--- a/tests/modelnet-test-slimfly-synthetic.sh
+++ b/tests/modelnet-test-slimfly-synthetic.sh
@@ -1,4 +1,11 @@
 #!/bin/bash
 
-src/network-workloads/model-net-synthetic-slimfly --sync=1 -- $srcdir/src/network-workloads/conf/modelnet-synthetic-slimfly-min.conf 
+# Binaries generated by CMake are located in a different place
+# to those of autoconf
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bin_dir=src/network-workloads
+else
+    bin_dir=src
+fi
 
+$bin_dir/model-net-synthetic-slimfly --sync=1 -- $srcdir/src/network-workloads/conf/modelnet-synthetic-slimfly-min.conf 
diff --git a/tests/modelnet-test-slimfly.sh b/tests/modelnet-test-slimfly.sh
index 938802ae..3fabc8b5 100755
--- a/tests/modelnet-test-slimfly.sh
+++ b/tests/modelnet-test-slimfly.sh
@@ -1,3 +1,3 @@
 #!/bin/bash
 
-tests/modelnet-test --sync=1 -- tests/conf/modelnet-test-slimfly.conf
+tests/modelnet-test --sync=1 -- $srcdir/tests/conf/modelnet-test-slimfly.conf
diff --git a/tests/modelnet-test-torus.sh b/tests/modelnet-test-torus.sh
index 942a881e..9c6997f8 100755
--- a/tests/modelnet-test-torus.sh
+++ b/tests/modelnet-test-torus.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
-tests/modelnet-test --sync=1 -- tests/conf/modelnet-test-torus.conf
+tests/modelnet-test --sync=1 -- $srcdir/tests/conf/modelnet-test-torus.conf
 
diff --git a/tests/modelnet-test.sh b/tests/modelnet-test.sh
index bf327fec..0919c7a1 100755
--- a/tests/modelnet-test.sh
+++ b/tests/modelnet-test.sh
@@ -1,3 +1,3 @@
 #!/bin/bash
 
-tests/modelnet-test --sync=1 -- tests/conf/modelnet-test.conf
+tests/modelnet-test --sync=1 -- $srcdir/tests/conf/modelnet-test.conf
diff --git a/tests/rc-stack-test.sh b/tests/rc-stack-test.sh
new file mode 100755
index 00000000..8c95e82b
--- /dev/null
+++ b/tests/rc-stack-test.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+if [[ -z $srcdir ]] ; then
+    echo srcdir variable not set
+    exit 1
+fi
+
+tests/modelnet-simplep2p-test
diff --git a/tests/run-test.sh.in b/tests/run-test.sh.in
new file mode 100755
index 00000000..ae2bec46
--- /dev/null
+++ b/tests/run-test.sh.in
@@ -0,0 +1,6 @@
+#!/bin/bash -x
+
+export srcdir="${CMAKE_SOURCE_DIR}"
+export GENERATED_USING_CMAKE=1
+
+bash -x "$1"
diff --git a/tests/workload/codes-workload-test.sh b/tests/workload/codes-workload-test.sh
index 1b58abfb..8709c560 100755
--- a/tests/workload/codes-workload-test.sh
+++ b/tests/workload/codes-workload-test.sh
@@ -1,3 +1,11 @@
 #!/bin/bash
 
-tests/workload/codes-workload-test --sync=1 $srcdir/tests/workload/codes-workload-test.conf
+# Binaries generated by CMake are located in a different place
+# to those of autoconf
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bin_dir=tests/workload
+else
+    bin_dir=tests
+fi
+
+$bin_dir/codes-workload-test --sync=1 $srcdir/tests/workload/codes-workload-test.conf

From 703177843f7b87764a79af9cb27c0087dd51f8c5 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 14 Dec 2022 13:39:47 -0500
Subject: [PATCH 002/188] Packet source terminal is notified on packet delay
 delivery

To test the changes, I've used the synthetic ping pong example, which
has been modified to allow for a more random pattern.

Printing to screen the delay of each packet is a temporal change.
---
 CMakeLists.txt                             |  1 +
 doc/example/CMakeLists.txt                 |  9 +++
 doc/example/tutorial-synthetic-ping-pong.c | 78 ++++++++++++----------
 src/networks/model-net/dragonfly-dally.C   | 35 ++++++++++
 src/util/rc-stack.c                        |  2 +-
 5 files changed, 88 insertions(+), 37 deletions(-)
 create mode 100644 doc/example/CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d2310c06..26936630 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -102,6 +102,7 @@ add_subdirectory(src)
 
 configure_file(codes_config.h.in codes_config.h)
 
+add_subdirectory(doc/example)
 
 string(COMPARE NOTEQUAL "RELEASE" "${CMAKE_BUILD_TYPE}" not_release)
 if(BUILD_TESTING AND not_release)
diff --git a/doc/example/CMakeLists.txt b/doc/example/CMakeLists.txt
new file mode 100644
index 00000000..c3f00579
--- /dev/null
+++ b/doc/example/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(example-files
+    example
+    tutorial-synthetic-ping-pong
+    )
+
+foreach(namefile ${example-files})
+    add_executable(${namefile} ${namefile}.c)
+    target_link_libraries(${namefile} PUBLIC codes)
+endforeach()
diff --git a/doc/example/tutorial-synthetic-ping-pong.c b/doc/example/tutorial-synthetic-ping-pong.c
index 7f219aa3..fe8b8103 100644
--- a/doc/example/tutorial-synthetic-ping-pong.c
+++ b/doc/example/tutorial-synthetic-ping-pong.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2019 Neil McGlohon
+ * Mantained/edited by Elkin Cruz (2022)
  * See LICENSE notice in top-level directory
  */
 
@@ -34,8 +35,8 @@ static int group_index, lp_type_index, rep_id, offset;
 enum svr_event
 {
     KICKOFF = 1,
-    PING,          
-    PONG        
+    PING,
+    PONG
 };
 
 struct svr_msg
@@ -127,10 +128,15 @@ static void svr_init(svr_state * s, tw_lp * lp)
 
 static void handle_kickoff_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
 {
+    /* // This bit is just for testing. It allows to send a PING event only to the first LP/server
+     *if (lp->gid != 0) {
+     *    return;
+     *}
+     */
     s->start_ts = tw_now(lp); //the time when we're starting this LP's work is NOW
 
-    svr_msg * ping_msg = malloc(sizeof(svr_msg)); //allocate memory for new message
-    
+    svr_msg ping_msg;
+
     tw_lpid local_dest = -1; //ID of a sever, relative to only servers
     tw_lpid global_dest = -1; //ID of a server LP relative to ALL LPs
 
@@ -142,23 +148,22 @@ static void handle_kickoff_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp *
     assert(local_dest < num_nodes);
     assert(local_dest != s->svr_id);
 
-    ping_msg->sender_id = s->svr_id; //encode our server ID into the new ping message
-    ping_msg->svr_event_type = PING; //set it to type PING
-    ping_msg->payload_value = tw_rand_integer(lp->rng, 1, 10); //encode a random payload value to it from [1,10]
-    
+    ping_msg.sender_id = s->svr_id; //encode our server ID into the new ping message
+    ping_msg.svr_event_type = PING; //set it to type PING
+    ping_msg.payload_value = tw_rand_integer(lp->rng, 1, 10); //encode a random payload value to it from [1,10]
+
     codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
     global_dest = codes_mapping_get_lpid_from_relative(local_dest, group_name, lp_type_name, NULL, 0);
     s->ping_msg_sent_count++;
-    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)ping_msg, 0, NULL, lp);
+    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&ping_msg, 0, NULL, lp);
 }
 
 static void handle_kickoff_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
 {
-    tw_rand_reverse_unif(lp->rng); //reverse the rng call for getting a local_dest
-    tw_rand_reverse_unif(lp->rng); //reverse the rng call for creating a payload value;
-
-    s->ping_msg_sent_count--; //undo the increment of the ping_msg_sent_count in the server state
     model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
+    s->ping_msg_sent_count--; //undo the increment of the ping_msg_sent_count in the server state
+    tw_rand_reverse_unif(lp->rng); //reverse the rng call for creating a payload value;
+    tw_rand_reverse_unif(lp->rng); //reverse the rng call for getting a local_dest
 }
 
 static void handle_ping_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
@@ -168,23 +173,22 @@ static void handle_ping_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
     int original_sender = m->sender_id; //this is the server we need to send a PONG message back to
     s->payload_sum += m->payload_value; //increment our running sum of payload values received
 
-    svr_msg * pong_msg = malloc(sizeof(svr_msg)); //allocate memory for new message
-    pong_msg->sender_id = s->svr_id;
-    pong_msg->svr_event_type = PONG;
+    svr_msg pong_msg;
+    pong_msg.sender_id = s->svr_id;
+    pong_msg.svr_event_type = PONG;
     // only ping messages contain a payload value - not every value in a message struct must be utilized by all messages!
 
     codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
     tw_lpid global_dest = codes_mapping_get_lpid_from_relative(original_sender, group_name, lp_type_name, NULL, 0);
     s->pong_msg_sent_count++;
-    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)pong_msg, 0, NULL, lp);
+    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&pong_msg, 0, NULL, lp);
 }
 
 static void handle_ping_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
 {
+    model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
     s->ping_msg_recvd_count--; //undo the increment of the counter for ping messages received
     s->payload_sum -= m->payload_value; //undo the increment of the payload sum
-
-    model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
 }
 
 static void handle_pong_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
@@ -197,30 +201,32 @@ static void handle_pong_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
         return;
     }
 
-    //Now we need to send another ping message back to the sender of the pong
-    int pong_sender = m->sender_id; //this is the sender of the PONG message that we want to send another PING message to
-    
-    svr_msg * ping_msg = malloc(sizeof(svr_msg)); //allocate memory for new message
-    ping_msg->sender_id = s->svr_id; //encode our server ID into the new ping message
-    ping_msg->svr_event_type = PING; //set it to type PING
-    ping_msg->payload_value = tw_rand_integer(lp->rng, 1, 10); //encode a random payload value to it
-    
+    //Now we need to send another ping message, to someone new (just to spice the simulation)
+    tw_lpid send_to = tw_rand_integer(lp->rng, 1, num_nodes - 2);
+    send_to = (s->svr_id + send_to) % num_nodes;
+
+    svr_msg ping_msg;
+    ping_msg.sender_id = s->svr_id; //encode our server ID into the new ping message
+    ping_msg.svr_event_type = PING; //set it to type PING
+    ping_msg.payload_value = tw_rand_integer(lp->rng, 1, 10); //encode a random payload value to it
+
     codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
-    tw_lpid global_dest = codes_mapping_get_lpid_from_relative(pong_sender, group_name, lp_type_name, NULL, 0);
+    tw_lpid global_dest = codes_mapping_get_lpid_from_relative(send_to, group_name, lp_type_name, NULL, 0);
     s->ping_msg_sent_count++;
-    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)ping_msg, 0, NULL, lp);
+    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&ping_msg, 0, NULL, lp);
 }
 
 static void handle_pong_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
 {
-    s->pong_msg_recvd_count--; //undo the increment of the counter for ping messages received
+    model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
+    s->ping_msg_sent_count--;
+    tw_rand_reverse_unif(lp->rng); //undo the rng for the new payload value
+    tw_rand_reverse_unif(lp->rng); //undo the rng for the new server to send a ping to
 
     if (b->c1) //if we flipped the c1 flag in the forward event
         return; //then we don't need to undo any rngs or state change
 
-    tw_rand_reverse_unif(lp->rng); //undo the rng for the new payload value
-    s->ping_msg_sent_count--;
-    model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
+    s->pong_msg_recvd_count--; //undo the increment of the counter for ping messages received
 }
 
 static void svr_finalize(svr_state * s, tw_lp * lp)
@@ -231,7 +237,7 @@ static void svr_finalize(svr_state * s, tw_lp * lp)
     int total_msg_size_sent = PAYLOAD_SZ * total_msgs_sent;
     tw_stime time_in_seconds_sent = ns_to_s(s->end_ts - s->start_ts);
 
-    printf("Sever LPID:%llu svr_id:%d sent %d bytes in %f seconds, PINGs Sent: %d; PONGs Received: %d; PINGs Received: %d; PONGs Sent %d; Payload Sum: %d\n", (unsigned long long)lp->gid, s->svr_id, total_msg_size_sent, 
+    printf("Sever LPID:%llu svr_id:%d sent %d bytes in %f seconds, PINGs Sent: %d; PONGs Received: %d; PINGs Received: %d; PONGs Sent %d; Payload Sum: %d\n", (unsigned long long)lp->gid, s->svr_id, total_msg_size_sent,
         time_in_seconds_sent, s->ping_msg_sent_count, s->pong_msg_recvd_count, s->ping_msg_recvd_count, s->pong_msg_sent_count, s->payload_sum);
 }
 
@@ -316,7 +322,7 @@ int main(int argc, char **argv)
     net_id = *net_ids;
     free(net_ids);
 
-    /* 1 day of simulation time is drastically huge but it will ensure 
+    /* 1 day of simulation time is drastically huge but it will ensure
        that the simulation doesn't try to end before all packets are delivered */
     g_tw_ts_end = s_to_ns(24 * 60 * 60);
 
@@ -339,4 +345,4 @@ int main(int argc, char **argv)
 
     tw_end();
     return 0;
-}
\ No newline at end of file
+}
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index a45c48d3..8ef922db 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -332,6 +332,7 @@ typedef enum event_t
     R_BW_HALT,
     T_BANDWIDTH,
     R_SNAPSHOT, //used for timed statistic outputs
+    T_NOTIFY_TOTAL_DELAY,
 } event_t;
 
 /* whether the last hop of a packet was global, local or a terminal */
@@ -2705,6 +2706,17 @@ void terminal_dally_commit(terminal_state * s,
             }
         }
     }
+
+    if(msg->type == T_NOTIFY_TOTAL_DELAY)
+    {
+        assert(lp->gid == msg->src_terminal_id);
+        assert(s->terminal_id == msg->dfdally_src_terminal_id);
+        printf("Terminal LPID:%llu (terminal_id:%u) Packet ID:%llu sent to LPID:%llu (terminal_id:%u) at %f delivered at %f delayed by %f in %d hops\n",
+                (unsigned long long) lp->gid, s->terminal_id, msg->packet_ID,
+                (unsigned long long) msg->dest_terminal_lpid, msg->dfdally_dest_terminal_id, 
+                msg->travel_start_time, msg->travel_end_time, msg->travel_end_time - msg->travel_start_time,
+                msg->my_N_hop);
+    }
 }
 
 void router_dally_commit(router_state * s,
@@ -3853,6 +3865,19 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message *
     return;
 }
 
+static void send_total_delay_from_src_lp(terminal_state * s, terminal_dally_message * msg, tw_lp * lp, tw_bf * bf)
+{
+    terminal_dally_message * new_msg;
+    tw_event *e = model_net_method_event_new(
+            msg->src_terminal_id, g_tw_lookahead, lp, DRAGONFLY_DALLY, (void**)&new_msg, NULL);
+
+    memcpy(new_msg, msg, sizeof(terminal_dally_message));
+    new_msg->type = T_NOTIFY_TOTAL_DELAY;
+    new_msg->magic = terminal_magic_num;
+    strcpy(new_msg->category, msg->category);
+    tw_event_send(e); 
+}
+
 //used by packet_arrive()
 static void send_remote_event(terminal_state * s, terminal_dally_message * msg, tw_lp * lp, tw_bf * bf, char * event_data, int remote_event_size)
 {
@@ -4213,6 +4238,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
         
         //assert(tmp->remote_event_data && tmp->remote_event_size > 0);
         if(tmp->remote_event_data && tmp->remote_event_size > 0) {
+            send_total_delay_from_src_lp(s, msg, lp, bf);
             send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size);
         }
         /* Remove the hash entry */
@@ -5342,6 +5368,7 @@ terminal_dally_event( terminal_state * s,
     s->ross_sample.fwd_events++;
     //*(int *)bf = (int)0;
     assert(msg->magic == terminal_magic_num);
+    //printf("LPID: %llu Event type %d processed at %f\n", lp->gid, msg->type, tw_now(lp));
 
     rc_stack_gc(lp, s->st);
     switch(msg->type)
@@ -5365,6 +5392,10 @@ terminal_dally_event( terminal_state * s,
         case T_BANDWIDTH:
             issue_bw_monitor_event(s, bf, msg, lp);
         break;
+    
+        case T_NOTIFY_TOTAL_DELAY:
+        //    We don't process the message, we only store the message when committing
+        break;
         default:
             printf("\n LP %d Terminal message type not supported %d ", (int)lp->gid, msg->type);
             tw_error(TW_LOC, "Msg type not supported");
@@ -5451,6 +5482,10 @@ void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_da
         case T_BANDWIDTH:
             issue_bw_monitor_event_rc(s,bf, msg, lp);
             break;
+    
+        case T_NOTIFY_TOTAL_DELAY:
+        //    We don't process the message, we only store the message when committing
+        break;
 
         default:
             tw_error(TW_LOC, "\n Invalid terminal event type %d ", msg->type);
diff --git a/src/util/rc-stack.c b/src/util/rc-stack.c
index 8df52463..7dcef16a 100644
--- a/src/util/rc-stack.c
+++ b/src/util/rc-stack.c
@@ -63,7 +63,7 @@ void rc_stack_push(
     if (s->mode != RC_NONOPT || free_fn == NULL) {
         rc_entry * ent = (rc_entry*)malloc(sizeof(*ent));
         assert(ent);
-        ent->e_sig = tw_now_sig(lp);
+        ent->e_sig = tw_now_sig(lp);  // NOTE(helq): This should fail if USE_RAND_TIEBREAKER is deactivated, shouldn't it?
         ent->data = data;
         ent->free_fn = free_fn;
         qlist_add_tail(&ent->ql, &s->head);

From eaffbb5bc6d6c808d5141574e93778b4e02390fd Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 19 Dec 2022 14:49:44 -0500
Subject: [PATCH 003/188] Improve strategy to store latency of packets from
 terminal to terminal

Previously, the latency (delay) of the packet was assumed to be the
latency of the last chunk to arrive at the destination terminal. This is
wrong. We must store the time at which the first chunk is sent and the
time at which the last chunk is received. This change paves the way to
implement a strategy to feed a predictor with latencies in the order in
which the packets where sent (not delievered).
---
 doc/example/tutorial-synthetic-ping-pong.c |  39 ++++----
 src/networks/model-net/dragonfly-dally.C   | 111 +++++++++++++++++++--
 2 files changed, 123 insertions(+), 27 deletions(-)

diff --git a/doc/example/tutorial-synthetic-ping-pong.c b/doc/example/tutorial-synthetic-ping-pong.c
index fe8b8103..0dd67d63 100644
--- a/doc/example/tutorial-synthetic-ping-pong.c
+++ b/doc/example/tutorial-synthetic-ping-pong.c
@@ -49,11 +49,11 @@ struct svr_msg
 
 struct svr_state
 {
-    int svr_id;           /* the ID of this server */
+    tw_lpid svr_id;            /* the ID of this server */
     int ping_msg_sent_count;   /* PING messages sent */
     int ping_msg_recvd_count;  /* PING messages received */
     int pong_msg_sent_count;   /* PONG messages sent */
-    int pong_msg_recvd_count; /* PONG messages received */
+    int pong_msg_recvd_count;  /* PONG messages received */
     tw_stime start_ts;    /* time that this LP started sending requests */
     tw_stime end_ts;      /* time that this LP ended sending requests */
     int payload_sum;      /* the running sum of all payloads received */
@@ -128,11 +128,11 @@ static void svr_init(svr_state * s, tw_lp * lp)
 
 static void handle_kickoff_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
 {
-    /* // This bit is just for testing. It allows to send a PING event only to the first LP/server
-     *if (lp->gid != 0) {
-     *    return;
-     *}
-     */
+    (void) b;
+    // This bit is just for testing. It allows to send a PING event only to the first LP/server
+    //if (lp->gid != 0) {
+    //    return;
+    //}
     s->start_ts = tw_now(lp); //the time when we're starting this LP's work is NOW
 
     svr_msg ping_msg;
@@ -160,6 +160,7 @@ static void handle_kickoff_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp *
 
 static void handle_kickoff_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
 {
+    (void) b;
     model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
     s->ping_msg_sent_count--; //undo the increment of the ping_msg_sent_count in the server state
     tw_rand_reverse_unif(lp->rng); //reverse the rng call for creating a payload value;
@@ -168,6 +169,7 @@ static void handle_kickoff_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_l
 
 static void handle_ping_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
 {
+    (void) b;
     s->ping_msg_recvd_count++; //increment the counter for ping messages received
 
     int original_sender = m->sender_id; //this is the server we need to send a PONG message back to
@@ -186,9 +188,11 @@ static void handle_ping_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
 
 static void handle_ping_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
 {
+    (void) b;
     model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
-    s->ping_msg_recvd_count--; //undo the increment of the counter for ping messages received
+    s->pong_msg_sent_count--;
     s->payload_sum -= m->payload_value; //undo the increment of the payload sum
+    s->ping_msg_recvd_count--; //undo the increment of the counter for ping messages received
 }
 
 static void handle_pong_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
@@ -218,13 +222,13 @@ static void handle_pong_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
 
 static void handle_pong_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
 {
-    model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
-    s->ping_msg_sent_count--;
-    tw_rand_reverse_unif(lp->rng); //undo the rng for the new payload value
-    tw_rand_reverse_unif(lp->rng); //undo the rng for the new server to send a ping to
-
-    if (b->c1) //if we flipped the c1 flag in the forward event
-        return; //then we don't need to undo any rngs or state change
+    if (! b->c1) { //if we didn't flip the c1 flag in the forward event
+        model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
+        s->ping_msg_sent_count--;
+        tw_rand_reverse_unif(lp->rng); //undo the rng for the new payload value
+        tw_rand_reverse_unif(lp->rng); //undo the rng for the new server to send a ping to
+        b->c1 = 0;
+    }
 
     s->pong_msg_recvd_count--; //undo the increment of the counter for ping messages received
 }
@@ -237,8 +241,9 @@ static void svr_finalize(svr_state * s, tw_lp * lp)
     int total_msg_size_sent = PAYLOAD_SZ * total_msgs_sent;
     tw_stime time_in_seconds_sent = ns_to_s(s->end_ts - s->start_ts);
 
-    printf("Sever LPID:%llu svr_id:%d sent %d bytes in %f seconds, PINGs Sent: %d; PONGs Received: %d; PINGs Received: %d; PONGs Sent %d; Payload Sum: %d\n", (unsigned long long)lp->gid, s->svr_id, total_msg_size_sent,
-        time_in_seconds_sent, s->ping_msg_sent_count, s->pong_msg_recvd_count, s->ping_msg_recvd_count, s->pong_msg_sent_count, s->payload_sum);
+    printf("Sever LPID:%lu svr_id:%lu sent %d bytes in %f seconds, PINGs Sent: %d; PONGs Received: %d; PINGs Received: %d; PONGs Sent %d; Payload Sum: %d\n",
+            (unsigned long)lp->gid, (unsigned long)s->svr_id, total_msg_size_sent,
+            time_in_seconds_sent, s->ping_msg_sent_count, s->pong_msg_recvd_count, s->ping_msg_recvd_count, s->pong_msg_sent_count, s->payload_sum);
 }
 
 static void svr_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 8ef922db..a9186e47 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -30,6 +30,8 @@
 #include <map>
 #include <set>
 #include <algorithm>
+#include <queue>
+#include <deque>
 
 #include "codes/network-manager/dragonfly-network-manager.h"
 #include "codes/congestion-controller-model.h"
@@ -86,6 +88,7 @@ static int max_global_hops_minimal = 1;
 static long num_local_packets_sr = 0;
 static long num_local_packets_sg = 0;
 static long num_remote_packets = 0;
+static FILE * stats_file;
 
 static long global_stalled_chunk_counter = 0;
 
@@ -332,7 +335,7 @@ typedef enum event_t
     R_BW_HALT,
     T_BANDWIDTH,
     R_SNAPSHOT, //used for timed statistic outputs
-    T_NOTIFY_TOTAL_DELAY,
+    T_NOTIFY_TOTAL_LATENCY,
 } event_t;
 
 /* whether the last hop of a packet was global, local or a terminal */
@@ -438,6 +441,25 @@ static bool isRoutingNonminimalExplicit(int alg)
         return false;
 }
 
+struct packet_start {
+    uint64_t packet_ID;
+    // tw_lpid dest_terminal_id;  // ROSS id; LPID for terminal
+    unsigned int dfdally_dest_terminal_id; // number in [0, total terminals)
+    double travel_start_time;
+};
+
+struct packet_end {
+    uint64_t packet_ID;
+    double travel_end_time;
+};
+
+// Comparison function object to use in min-heap of packet_end's
+struct {
+    bool operator() (struct packet_end const l, struct packet_end const r) const {
+        return l.packet_ID > r.packet_ID;
+    }
+} packet_end_greater_cmp;
+
 /* handles terminal and router events like packet generate/send/receive/buffer */
 typedef struct terminal_state terminal_state;
 typedef struct router_state router_state;
@@ -535,6 +557,14 @@ struct terminal_state
     tw_stime fin_chunks_time_ross_sample;
     tw_stime *busy_time_ross_sample;
     struct dfly_cn_sample ross_sample;
+
+    // Variables to recover latency of packets sent to other terminals
+    // Sent packets (to be populated at by commit handler of packet sender)
+    deque<struct packet_start> sent_packets;
+    // min-heap for latencies of packets once they arrive (some packets might
+    // arrive faster than others, so a list like the one above is not feasible
+    // to store in order efficiently their arrival)
+    priority_queue<struct packet_end, vector<struct packet_end>, decltype(packet_end_greater_cmp)> sent_packets_latency;
 };
 
 struct router_state
@@ -1229,6 +1259,15 @@ static int dfdally_get_assigned_router_id_from_terminal(const dragonfly_param *p
         {
             return (term_gid / num_cn_per_router) + (rail_id * routers_per_plane);
         }
+        // NOTE(helq): The compiler has been bothering me about the lack of a
+        // return statement here, so I added a message to something that
+        // (hopefully) will never happen.
+        else
+        {
+            tw_error(TW_LOC, "Error: this should have never happened. We couldn't "
+                    "figure out to which router does a terminal belong to :S");
+            return -1;
+        }
     }
     
 }
@@ -2176,6 +2215,15 @@ void dragonfly_dally_configure() {
 #ifdef ENABLE_CORTEX
 	model_net_topology = dragonfly_dally_cortex_topology;
 #endif
+
+    char const fmt[] = "packets-delay-gid=%lu.txt";
+    int sz = snprintf(NULL, 0, fmt, g_tw_mynode);
+    char filename_path[sz + 1]; // `+ 1` for terminating null byte
+    snprintf(filename_path, sizeof(filename_path), fmt, g_tw_mynode);
+    stats_file = fopen(filename_path, "w+");
+    if(!stats_file) {
+        tw_error(TW_LOC, "File %s could not be opened", filename_path);
+    }
 }
 
 /* report dragonfly statistics like average and maximum packet latency, average number of hops traversed */
@@ -2210,6 +2258,7 @@ void dragonfly_dally_report_stats()
     // long long total_stalled_chunks; //helpful for debugging and determinism checking
     // MPI_Reduce( &global_stalled_chunk_counter, &total_stalled_chunks, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES);
 
+    fclose(stats_file);
     /* print statistics */
     if(!g_tw_mynode)
     {	
@@ -2626,6 +2675,26 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess
     return -1;
 }
 
+static void packet_latency_save_to_file(unsigned int terminal_id, struct packet_start start, struct packet_end end)
+{
+    assert(start.packet_ID == end.packet_ID);
+    fprintf(stats_file, "%u,%u,%llu,%f,%f,%f\n",
+            terminal_id, start.dfdally_dest_terminal_id, start.packet_ID,
+            start.travel_start_time, end.travel_end_time, end.travel_end_time - start.travel_start_time);
+}
+
+static void process_packet_latencies(terminal_state * s)
+{
+    while( !s->sent_packets.empty()
+        && !s->sent_packets_latency.empty()
+        && s->sent_packets.front().packet_ID == s->sent_packets_latency.top().packet_ID)
+    {
+        packet_latency_save_to_file(s->terminal_id, s->sent_packets.front(), s->sent_packets_latency.top());
+        s->sent_packets.pop_front();
+        s->sent_packets_latency.pop();
+    }
+}
+
 //Snapshot pattern
 //Sends a snapshot event - this wakes the router at the specified time to store its data somewhere
 //this storage place could be in the event or elsewehre so long as the data is over-writeable
@@ -2707,15 +2776,15 @@ void terminal_dally_commit(terminal_state * s,
         }
     }
 
-    if(msg->type == T_NOTIFY_TOTAL_DELAY)
+    if(msg->type == T_NOTIFY_TOTAL_LATENCY)
     {
         assert(lp->gid == msg->src_terminal_id);
         assert(s->terminal_id == msg->dfdally_src_terminal_id);
-        printf("Terminal LPID:%llu (terminal_id:%u) Packet ID:%llu sent to LPID:%llu (terminal_id:%u) at %f delivered at %f delayed by %f in %d hops\n",
-                (unsigned long long) lp->gid, s->terminal_id, msg->packet_ID,
-                (unsigned long long) msg->dest_terminal_lpid, msg->dfdally_dest_terminal_id, 
-                msg->travel_start_time, msg->travel_end_time, msg->travel_end_time - msg->travel_start_time,
-                msg->my_N_hop);
+        s->sent_packets_latency.push({
+                .packet_ID = msg->packet_ID,
+                .travel_end_time = msg->travel_end_time});
+
+        process_packet_latencies(s);
     }
 }
 
@@ -2919,6 +2988,14 @@ void terminal_dally_init( terminal_state * s, tw_lp * lp )
         s->local_congestion_controller = (tlc_state*)calloc(1,sizeof(tlc_state));
         cc_terminal_local_controller_init(s->local_congestion_controller, lp, s->terminal_id, &s->workloads_finished_flag);
     }
+
+    // This doesn't allocate any memory, it calls the constructor on the
+    // previously allocated memory (by ROSS)
+    // In the future calling the constructor could be done with:
+    // std::construct_at, for now this syntax suffices and works
+    // (see https://en.cppreference.com/w/cpp/memory/construct_at)
+    new (&s->sent_packets) deque<struct packet_start>();
+    new (&s->sent_packets_latency) priority_queue<struct packet_end, vector<struct packet_end>, decltype(packet_end_greater_cmp)>();
     return;
 }
 
@@ -3210,6 +3287,8 @@ static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_me
     packet_gen--;
     s->packet_counter--;
 
+    s->sent_packets.pop_back();
+
     if(bf->c2)
         num_local_packets_sr--;
     if(bf->c3)
@@ -3478,6 +3557,12 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
     msg->my_g_hop = 0;
     msg->my_hops_cur_group = 0;
 
+    // Storing packet info to be sent. Once packets arrive back, we can compute
+    // the latency of sending them
+    s->sent_packets.push_back({
+        .packet_ID = msg->packet_ID,
+        .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
+        .travel_start_time = tw_now(lp)});
 
     //qos stuff
     int num_qos_levels = s->params->num_qos_levels;
@@ -3872,7 +3957,7 @@ static void send_total_delay_from_src_lp(terminal_state * s, terminal_dally_mess
             msg->src_terminal_id, g_tw_lookahead, lp, DRAGONFLY_DALLY, (void**)&new_msg, NULL);
 
     memcpy(new_msg, msg, sizeof(terminal_dally_message));
-    new_msg->type = T_NOTIFY_TOTAL_DELAY;
+    new_msg->type = T_NOTIFY_TOTAL_LATENCY;
     new_msg->magic = terminal_magic_num;
     strcpy(new_msg->category, msg->category);
     tw_event_send(e); 
@@ -4385,6 +4470,12 @@ dragonfly_dally_terminal_final( terminal_state * s,
     free(s->vc_occupancy);
     free(s->terminal_msgs);
     free(s->terminal_msgs_tail);
+
+    // Calling destructors for data. There is no need to free data, the
+    // destructors do it themselves. ROSS allocated space for the datatypes and
+    // it doesn't need to be freed
+    s->sent_packets.~deque();
+    s->sent_packets_latency.~priority_queue();
 }
 
 void dragonfly_dally_router_final(router_state * s, tw_lp * lp){
@@ -5393,7 +5484,7 @@ terminal_dally_event( terminal_state * s,
             issue_bw_monitor_event(s, bf, msg, lp);
         break;
     
-        case T_NOTIFY_TOTAL_DELAY:
+        case T_NOTIFY_TOTAL_LATENCY:
         //    We don't process the message, we only store the message when committing
         break;
         default:
@@ -5483,7 +5574,7 @@ void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_da
             issue_bw_monitor_event_rc(s,bf, msg, lp);
             break;
     
-        case T_NOTIFY_TOTAL_DELAY:
+        case T_NOTIFY_TOTAL_LATENCY:
         //    We don't process the message, we only store the message when committing
         break;
 

From 90f623f733d7a62491b547f78ae0c92ee866917d Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 21 Dec 2022 15:46:02 -0500
Subject: [PATCH 004/188] Sending packages directly to terminal instead of
 network

`g_is_surrogate_on` turns on or off the behaviour. When off, the
simulation runs as usual. When it is on, packages are sent directly to
the terminal destination skipping the network completely
---
 src/networks/model-net/dragonfly-dally.C | 134 ++++++++++++++++++++++-
 1 file changed, 128 insertions(+), 6 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index a9186e47..d380fc92 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -181,6 +181,9 @@ static char router_sample_file[MAX_NAME_LENGTH];
 //don't do overhead here - job of MPI layer
 static tw_stime mpi_soft_overhead = 0;
 
+// Parameters to tune surrogate mode
+static bool g_is_surrogate_on = false;
+
 typedef struct terminal_dally_message_list terminal_dally_message_list;
 struct terminal_dally_message_list {
     terminal_dally_message msg;
@@ -2776,14 +2779,15 @@ void terminal_dally_commit(terminal_state * s,
         }
     }
 
-    if(msg->type == T_NOTIFY_TOTAL_LATENCY)
+    if(!g_is_surrogate_on && msg->type == T_NOTIFY_TOTAL_LATENCY)
     {
         assert(lp->gid == msg->src_terminal_id);
         assert(s->terminal_id == msg->dfdally_src_terminal_id);
+        // TODO(helq): assert that msg->packet_ID to be present in s->sent_packets
+
         s->sent_packets_latency.push({
                 .packet_ID = msg->packet_ID,
                 .travel_end_time = msg->travel_end_time});
-
         process_packet_latencies(s);
     }
 }
@@ -3276,6 +3280,109 @@ static tw_stime dragonfly_dally_packet_event(
     return xfer_to_nic_time;
 }
 
+static double predict_latency(unsigned long src_terminal, unsigned long dest_terminal) {
+    // source and destination share the same router
+    if (src_terminal / 2 == dest_terminal / 2) {
+        return 2108.74;
+    } 
+    // source and destination are in the same group
+    else if (src_terminal / 8 == dest_terminal / 8) {
+        return 2390.13;
+    }
+    // source and destination are in different groups
+    else {
+        return 4162.77;
+    }
+}
+
+static void packet_generate_predicted_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
+{
+    struct mn_stats * stat = model_net_find_stats(msg->category, s->dragonfly_stats_array);
+    stat->send_count--;
+    stat->send_bytes -= msg->packet_size;
+    stat->send_time -= (1/s->params->cn_bandwidth) * msg->packet_size;
+
+    s->packet_counter--;
+    s->total_gen_size -= msg->packet_size;
+    s->packet_gen--;
+    packet_gen--;
+}
+
+/* generates packet at the current dragonfly compute node */
+static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) {
+    packet_gen++;
+    s->packet_gen++;
+    s->total_gen_size += msg->packet_size;
+
+    assert(lp->gid != msg->dest_terminal_lpid);
+    const dragonfly_param *p = s->params;
+
+    msg->packet_ID = s->packet_counter;
+    s->packet_counter++;
+
+    // these actually don't matter because we are bypassing the network
+    msg->my_N_hop = -1;
+    msg->my_l_hop = -1;
+    msg->my_g_hop = -1;
+    msg->my_hops_cur_group = -1;
+
+    // determining injection delay
+    tw_stime injection_ts;
+    double bandwidth_coef = 1;
+    if (g_congestion_control_enabled) {
+        if (cc_terminal_is_abatement_active(s->local_congestion_controller)) {
+            bandwidth_coef = cc_terminal_get_current_injection_bandwidth_coef(s->local_congestion_controller);
+        }
+        injection_ts = bytes_to_ns(msg->packet_size, bandwidth_coef * s->params->cn_bandwidth);
+    }
+    else {
+        injection_ts = bytes_to_ns(msg->packet_size, s->params->cn_bandwidth);
+    }
+    tw_stime const nic_ts = injection_ts;
+
+    // Scheduling idle event to allow next message to be sent
+    bool const is_from_remote = false;
+    model_net_method_idle_event2(nic_ts, is_from_remote, msg->rail_id, lp);
+
+    // Sending packet directly to destination terminal
+    tw_stime const ts = 0;
+    terminal_dally_message * m;
+    void * remote_event;
+    void const * const m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg);
+    tw_event * const e = model_net_method_event_new(
+            msg->dest_terminal_lpid, predict_latency(lp->gid, msg->dfdally_dest_terminal_id),
+            lp, DRAGONFLY_DALLY, (void**)&m, &remote_event);
+    memcpy(m, msg, sizeof(terminal_dally_message));
+    if (msg->remote_event_size_bytes) {
+        memcpy(remote_event, m_data_src, msg->remote_event_size_bytes);
+    }
+    m->magic = terminal_magic_num;
+    m->type = T_ARRIVE;
+    m->src_terminal_id = lp->gid;
+    m->dfdally_src_terminal_id = s->terminal_id; //m->travel_start_time = tw_now(lp);
+    //m->rail_id = msg->rail_id;
+    //m->vc_index = vcg;
+    // m->last_hop = TERMINAL;
+    m->path_type = -1;
+    m->local_event_size_bytes = 0;
+    m->is_intm_visited = 0;
+    m->intm_grp_id = -1;
+    m->intm_rtr_id = -1; //for legacy prog-adaptive
+    tw_event_send(e);
+
+    const int total_event_size = model_net_get_msg_sz(DRAGONFLY_DALLY)
+        + msg->remote_event_size_bytes + msg->local_event_size_bytes;
+    mn_stats* stat;
+    stat = model_net_find_stats(msg->category, s->dragonfly_stats_array);
+    stat->send_count++;
+    stat->send_bytes += msg->packet_size;
+    stat->send_time += (1/p->cn_bandwidth) * msg->packet_size;
+    if(stat->max_event_size < total_event_size)
+        stat->max_event_size = total_event_size;
+
+    return;
+}
+
 static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
 {
     int num_qos_levels = s->params->num_qos_levels;
@@ -4323,7 +4430,9 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
         
         //assert(tmp->remote_event_data && tmp->remote_event_size > 0);
         if(tmp->remote_event_data && tmp->remote_event_size > 0) {
-            send_total_delay_from_src_lp(s, msg, lp, bf);
+            if (!g_is_surrogate_on) {
+                send_total_delay_from_src_lp(s, msg, lp, bf);
+            }
             send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size);
         }
         /* Remove the hash entry */
@@ -5465,11 +5574,20 @@ terminal_dally_event( terminal_state * s,
     switch(msg->type)
         {
         case T_GENERATE:
-            packet_generate(s,bf,msg,lp);
+            if (g_is_surrogate_on) {
+                packet_generate_predicted(s,bf,msg,lp);
+            } else {
+                packet_generate(s,bf,msg,lp);
+            }
         break;
         
         case T_ARRIVE:
-            packet_arrive(s,bf,msg,lp);
+            if (g_is_surrogate_on) {
+                void * m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg);
+                send_remote_event(s, msg, lp, bf, (char *) m_data_src, msg->remote_event_size_bytes);
+            } else {
+                packet_arrive(s,bf,msg,lp);
+            }
         break;
         
         case T_SEND:
@@ -5555,7 +5673,11 @@ void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_da
     switch(msg->type)
     {
         case T_GENERATE:
-            packet_generate_rc(s, bf, msg, lp); 
+            if (g_is_surrogate_on) {
+                packet_generate_predicted_rc(s,bf,msg,lp);
+            } else {
+                packet_generate_rc(s, bf, msg, lp); 
+            }
             break;
 
         case T_SEND:

From 46bf23dda1d349bff3bb0a4b1f95c28fe41bffde Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 23 Dec 2022 14:29:15 -0500
Subject: [PATCH 005/188] "Network bypassing" implementation can be toggled
 on/off now

---
 codes/net/dragonfly-dally.h              |   8 +-
 src/networks/model-net/dragonfly-dally.C | 138 +++++++++++++++++++----
 2 files changed, 124 insertions(+), 22 deletions(-)

diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index 9f89f60b..6f8949b8 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -12,6 +12,7 @@ extern "C" {
 #endif
 
 #include <ross.h>
+#include <stdbool.h>
 
 typedef struct terminal_dally_message terminal_dally_message;
 
@@ -92,7 +93,7 @@ struct terminal_dally_message
    int path_type;
    int saved_app_id;
 
-   /* for reverse computation */   
+   /* for reverse computation */
    short num_rngs;
    short num_cll;
 
@@ -112,12 +113,15 @@ struct terminal_dally_message
    tw_stime saved_min_lat;
    tw_stime saved_avg_time;
    tw_stime saved_rcv_time;
-   tw_stime saved_busy_time; 
+   tw_stime saved_busy_time;
    tw_stime saved_total_time;
    tw_stime saved_sample_time;
    tw_stime msg_start_time;
    tw_stime saved_busy_time_ross;
    tw_stime saved_fin_chunks_ross;
+
+   /* If we predict the latency from terminal to terminal of a packet, the event should be processed by the corresponding `_predicted` event handler */
+   bool is_predicted; // the event has been processed on surrogate mode ON
 };
 
 #ifdef __cplusplus
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index d380fc92..ac3b3515 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -46,7 +46,6 @@
 #define DFLY_HASH_TABLE_SIZE 4999
 // debugging parameters
 #define BW_MONITOR 1
-#define DEBUG_LP 892
 #define T_ID -1
 #define TRACK -1
 #define TRACK_PKT -1
@@ -88,7 +87,6 @@ static int max_global_hops_minimal = 1;
 static long num_local_packets_sr = 0;
 static long num_local_packets_sg = 0;
 static long num_remote_packets = 0;
-static FILE * stats_file;
 
 static long global_stalled_chunk_counter = 0;
 
@@ -182,6 +180,8 @@ static char router_sample_file[MAX_NAME_LENGTH];
 static tw_stime mpi_soft_overhead = 0;
 
 // Parameters to tune surrogate mode
+static bool is_terminal_to_terminal_latency_on = false;
+static FILE * terminal_to_terminal_latency_f = NULL;
 static bool g_is_surrogate_on = false;
 
 typedef struct terminal_dally_message_list terminal_dally_message_list;
@@ -2219,13 +2219,15 @@ void dragonfly_dally_configure() {
 	model_net_topology = dragonfly_dally_cortex_topology;
 #endif
 
-    char const fmt[] = "packets-delay-gid=%lu.txt";
-    int sz = snprintf(NULL, 0, fmt, g_tw_mynode);
-    char filename_path[sz + 1]; // `+ 1` for terminating null byte
-    snprintf(filename_path, sizeof(filename_path), fmt, g_tw_mynode);
-    stats_file = fopen(filename_path, "w+");
-    if(!stats_file) {
-        tw_error(TW_LOC, "File %s could not be opened", filename_path);
+    if (is_terminal_to_terminal_latency_on) {
+        char const fmt[] = "packets-delay-gid=%lu.txt";
+        int sz = snprintf(NULL, 0, fmt, g_tw_mynode);
+        char filename_path[sz + 1]; // `+ 1` for terminating null byte
+        snprintf(filename_path, sizeof(filename_path), fmt, g_tw_mynode);
+        terminal_to_terminal_latency_f = fopen(filename_path, "w+");
+        if(!terminal_to_terminal_latency_f) {
+            tw_error(TW_LOC, "File %s could not be opened", filename_path);
+        }
     }
 }
 
@@ -2261,7 +2263,9 @@ void dragonfly_dally_report_stats()
     // long long total_stalled_chunks; //helpful for debugging and determinism checking
     // MPI_Reduce( &global_stalled_chunk_counter, &total_stalled_chunks, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES);
 
-    fclose(stats_file);
+    if (is_terminal_to_terminal_latency_on) {
+        fclose(terminal_to_terminal_latency_f);
+    }
     /* print statistics */
     if(!g_tw_mynode)
     {	
@@ -2681,9 +2685,11 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess
 static void packet_latency_save_to_file(unsigned int terminal_id, struct packet_start start, struct packet_end end)
 {
     assert(start.packet_ID == end.packet_ID);
-    fprintf(stats_file, "%u,%u,%llu,%f,%f,%f\n",
-            terminal_id, start.dfdally_dest_terminal_id, start.packet_ID,
-            start.travel_start_time, end.travel_end_time, end.travel_end_time - start.travel_start_time);
+    if (is_terminal_to_terminal_latency_on) {
+        fprintf(terminal_to_terminal_latency_f, "%u,%u,%lu,%f,%f,%f\n",
+                terminal_id, start.dfdally_dest_terminal_id, start.packet_ID,
+                start.travel_start_time, end.travel_end_time, end.travel_end_time - start.travel_start_time);
+    }
 }
 
 static void process_packet_latencies(terminal_state * s)
@@ -3665,7 +3671,7 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
     msg->my_hops_cur_group = 0;
 
     // Storing packet info to be sent. Once packets arrive back, we can compute
-    // the latency of sending them
+    // the latency of sending the packet
     s->sent_packets.push_back({
         .packet_ID = msg->packet_ID,
         .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
@@ -3988,10 +3994,12 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message *
     tw_event_send(e);
 
 
+#if DEBUG == 1
     if(cur_entry->msg.packet_ID == LLU(TRACK_PKT) && lp->gid == T_ID)
         printf("\n Packet %llu generated at terminal %d dest %llu size %llu num chunks %llu router-id %d %llu", 
                 cur_entry->msg.packet_ID, s->terminal_id, LLU(cur_entry->msg.dest_terminal_lpid),
                 LLU(cur_entry->msg.packet_size), LLU(num_chunks), s->router_id[msg->rail_id], LLU(router_id));
+#endif
 
     if(cur_entry->msg.chunk_id == num_chunks - 1 && (cur_entry->msg.local_event_size_bytes > 0)) 
     {
@@ -4057,7 +4065,7 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message *
     return;
 }
 
-static void send_total_delay_from_src_lp(terminal_state * s, terminal_dally_message * msg, tw_lp * lp, tw_bf * bf)
+static void notify_src_lp_on_total_latency(terminal_state * s, terminal_dally_message * msg, tw_lp * lp, tw_bf * bf)
 {
     terminal_dally_message * new_msg;
     tw_event *e = model_net_method_event_new(
@@ -4100,6 +4108,79 @@ static void send_remote_event(terminal_state * s, terminal_dally_message * msg,
     return;
 }
 
+static void packet_arrive_predicted_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
+{
+    if(bf->c4) {
+        model_net_event_rc2(lp, &msg->event_rc);
+    }
+
+    s->finished_msgs--;
+    s->total_msg_size -= msg->total_size;
+    total_msg_sz -= msg->total_size;
+    N_finished_msgs--;
+    s->data_size_ross_sample -= msg->total_size;
+    s->ross_sample.data_size_sample -= msg->total_size;
+    s->data_size_sample -= msg->total_size;
+
+    s->finished_packets--;
+    N_finished_packets--;
+    
+    mn_stats * stat = model_net_find_stats(msg->category, s->dragonfly_stats_array);
+
+    stat->recv_bytes -= msg->packet_size;
+    stat->recv_count--;
+
+    stat->recv_time = msg->saved_rcv_time;
+
+    packet_fin--;
+    s->packet_fin--;
+}
+
+/* packet arrives at the destination terminal */
+static void packet_arrive_predicted(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) 
+{
+    assert(lp->gid == msg->dest_terminal_lpid);
+    /* WE do not allow self messages through dragonfly */
+    assert(lp->gid != msg->src_terminal_id);
+
+#if DEBUG == 1
+    if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID)
+        printf("\n Packet %llu arrived at lp %llu hops %d ", LLU(msg->sender_lp), LLU(lp->gid), msg->my_N_hop);
+#endif
+    
+    s->packet_fin++;
+    packet_fin++;
+
+    //record for commit_f file IO
+    msg->travel_end_time = tw_now(lp);
+    tw_stime ete_latency = msg->travel_end_time - msg->travel_start_time;
+
+    mn_stats* stat = model_net_find_stats(msg->category, s->dragonfly_stats_array);
+    msg->saved_rcv_time = stat->recv_time;
+    stat->recv_time += ete_latency;
+
+    void * m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg);
+
+    stat->recv_count++;
+    stat->recv_bytes += msg->packet_size;
+
+    N_finished_packets++;
+    s->finished_packets++;
+
+    s->data_size_sample += msg->total_size;
+    s->ross_sample.data_size_sample += msg->total_size;
+    s->data_size_ross_sample += msg->total_size;
+    N_finished_msgs++;
+    total_msg_sz += msg->total_size;
+    s->total_msg_size += msg->total_size;
+    s->finished_msgs++;
+    
+    // This should always be true. It sends the message to the server/workload or communicates to the model-net layer
+    if(m_data_src && msg->remote_event_size_bytes > 0) {
+        send_remote_event(s, msg, lp, bf, (char *) m_data_src, msg->remote_event_size_bytes);
+    }
+}
+
 static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
 {
     if (g_congestion_control_enabled)
@@ -4262,8 +4343,10 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
     }*/
     assert(lp->gid == msg->dest_terminal_lpid);
 
+#if DEBUG == 1
     if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID)
         printf("\n Packet %llu arrived at lp %llu hops %d ", LLU(msg->sender_lp), LLU(lp->gid), msg->my_N_hop);
+#endif
     
     tw_stime ts = s->params->cn_credit_delay;
 
@@ -4431,7 +4514,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
         //assert(tmp->remote_event_data && tmp->remote_event_size > 0);
         if(tmp->remote_event_data && tmp->remote_event_size > 0) {
             if (!g_is_surrogate_on) {
-                send_total_delay_from_src_lp(s, msg, lp, bf);
+                notify_src_lp_on_total_latency(s, msg, lp, bf);
             }
             send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size);
         }
@@ -5113,8 +5196,10 @@ static void router_packet_receive( router_state * s,
         tw_error(TW_LOC, "\n Output channel %d great than available VCs %d", output_chan, s->params->num_vcs - 1);
                 //cur_chunk->msg.packet_ID, output_chan, output_port, s->router_id, dest_router_id, cur_chunk->msg.path_type, src_grp_id, dest_grp_id, msg->src_terminal_id);
 
+#if DEBUG == 1
     if(cur_chunk->msg.packet_ID == LLU(TRACK_PKT) && cur_chunk->msg.src_terminal_id == T_ID)
             printf("\n Packet %llu arrived at router %u next stop %d final stop %d local hops %d global hops %d", cur_chunk->msg.packet_ID, s->router_id, next_stop, dest_router_id, cur_chunk->msg.my_l_hop, cur_chunk->msg.my_g_hop);
+#endif
 
     if(msg->remote_event_size_bytes > 0) {
         void *m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY_ROUTER, msg);
@@ -5413,8 +5498,10 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes
 
     s->total_chunks[output_port]++;
 
+#if DEBUG == 1
     if(cur_entry->msg.packet_ID == LLU(TRACK_PKT) && cur_entry->msg.src_terminal_id == T_ID)
         printf("\n Queuing at the router %d ", s->router_id);
+#endif
 
     m->rail_id = msg->rail_id;
 
@@ -5575,16 +5662,17 @@ terminal_dally_event( terminal_state * s,
         {
         case T_GENERATE:
             if (g_is_surrogate_on) {
+                msg->is_predicted = true;
                 packet_generate_predicted(s,bf,msg,lp);
             } else {
+                msg->is_predicted = false;
                 packet_generate(s,bf,msg,lp);
             }
         break;
         
         case T_ARRIVE:
-            if (g_is_surrogate_on) {
-                void * m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg);
-                send_remote_event(s, msg, lp, bf, (char *) m_data_src, msg->remote_event_size_bytes);
+            if (msg->is_predicted) {
+                packet_arrive_predicted(s,bf,msg,lp);
             } else {
                 packet_arrive(s,bf,msg,lp);
             }
@@ -5685,7 +5773,11 @@ void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_da
             break;
 
         case T_ARRIVE:
-            packet_arrive_rc(s, bf, msg, lp);
+            if (g_is_surrogate_on) {
+                packet_arrive_predicted_rc(s, bf, msg, lp);
+            } else {
+                packet_arrive_rc(s, bf, msg, lp);
+            }
             break;
 
         case T_BUFFER:
@@ -6528,8 +6620,10 @@ static tw_lpid get_next_stop_legacy(router_state *s, tw_lp *lp, tw_bf *bf, termi
         codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, s->anno, 0, next_stop / num_routers_per_mgrp,
             next_stop % num_routers_per_mgrp, &router_dest_id);
     
+#if DEBUG == 1
         if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID)
                 printf("\n Next stop is %d ", next_stop);
+#endif
         
         return router_dest_id;
     }
@@ -6586,8 +6680,10 @@ static tw_lpid get_next_stop_legacy(router_state *s, tw_lp *lp, tw_bf *bf, termi
         dest_lp = dests;
     }
 
+#if DEBUG == 1
     if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID)
         printf("\n Next stop is %d ", dest_lp);
+#endif
     codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, s->anno, 0, dest_lp / num_routers_per_mgrp,
         dest_lp % num_routers_per_mgrp, &router_dest_id);
 
@@ -6993,8 +7089,10 @@ static Connection dfdally_prog_adaptive_legacy_routing(router_state *s, tw_bf *b
   
     next_stop = get_next_stop_legacy(s, lp, bf, msg, dest_router_id, adap_chan, do_chan_selection, get_direct_con, &(msg->num_rngs));
 
+#if DEBUG == 1
     if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID)
         printf("\n Packet %llu arrived at router %u next stop %d final stop %d local hops %d global hops %d", msg->packet_ID, s->router_id, next_stop, dest_router_id, msg->my_l_hop, msg->my_g_hop);
+#endif
 
     output_port = get_output_port_legacy(s, msg, lp, bf, next_stop, &(msg->num_rngs)); 
     assert(output_port >= 0);

From a85b983e81333af7c9f76abfd8aa44e0e03889db Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sun, 15 Jan 2023 15:08:32 -0500
Subject: [PATCH 006/188] Scaffolding for custom latency predictor done

---
 codes/net/dragonfly-dally.h                   |  12 +
 codes/surrogate.h                             |  71 +++
 doc/Doxyfile                                  | 376 ++++++++++++++++
 doc/example/CMakeLists.txt                    |   1 +
 .../tutorial-synthetic-ping-pong-surrogate.c  | 414 ++++++++++++++++++
 doc/example/tutorial-synthetic-ping-pong.c    |   6 +-
 .../determine_mean_std.py                     |  59 +++
 .../sort-delays.py                            |  38 ++
 src/CMakeLists.txt                            |   1 +
 src/networks/model-net/dragonfly-dally.C      | 178 +++++---
 src/util/surrogate.c                          |   3 +
 11 files changed, 1095 insertions(+), 64 deletions(-)
 create mode 100644 codes/surrogate.h
 create mode 100644 doc/Doxyfile
 create mode 100644 doc/example/tutorial-synthetic-ping-pong-surrogate.c
 create mode 100644 scripts/terminal-to-terminal-latency/determine_mean_std.py
 create mode 100644 scripts/terminal-to-terminal-latency/sort-delays.py
 create mode 100644 src/util/surrogate.c

diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index 6f8949b8..8aded418 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -13,6 +13,8 @@ extern "C" {
 
 #include <ross.h>
 #include <stdbool.h>
+#include <model-net.h>
+#include <codes/surrogate.h>
 
 typedef struct terminal_dally_message terminal_dally_message;
 
@@ -124,6 +126,16 @@ struct terminal_dally_message
    bool is_predicted; // the event has been processed on surrogate mode ON
 };
 
+struct dragonfly_dally_surrogate_configure_st {
+    director_init_f                   director_init;
+    struct packet_latency_predictor * latency_predictor;
+};
+
+void dragonfly_dally_surrogate_configure(
+        struct dragonfly_dally_surrogate_configure_st);
+
+void dragonfly_dally_save_packet_latency_to_file(char * dir_to_save);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/codes/surrogate.h b/codes/surrogate.h
new file mode 100644
index 00000000..30b91981
--- /dev/null
+++ b/codes/surrogate.h
@@ -0,0 +1,71 @@
+#ifndef CODES_SURROGATE_H
+#define CODES_SURROGATE_H
+
+/**
+ * surrogate.h -- Defining all functions to implement in order to run CODES in surrogate mode
+ * Elkin Cruz
+ *
+ * Copyright (c) 2023 Rensselaer Polytechnic Institute
+ */
+#include <ross.h>
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Terminal-to-terminal packet latency prediction machinery
+ */
+
+// Packet latencies
+struct packet_start {
+    uint64_t packet_ID;
+    // tw_lpid dest_terminal_id;  // ROSS id; LPID for terminal
+    unsigned int dfdally_dest_terminal_id; // number in [0, total terminals)
+    double travel_start_time;
+};
+
+struct packet_end {
+    uint64_t packet_ID;
+    double travel_end_time;
+};
+
+// Definition of functions needed to define a predictor
+typedef void (*init_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id); // Initializes the predictor (eg, LSTM)
+typedef void (*feed_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start, struct packet_end); // Feeds known latency for packet sent at `now`
+typedef double (*predict_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start); // Get prediction for packet sent to `destination` at `now`
+typedef void (*predict_rc_pred_f) (void * predictor_data, tw_lp * lp); // Reverse prediction (reverse state of predictor one prediction)
+
+// Each network model defines its own way to setup the packet latency predictor
+struct packet_latency_predictor {
+    init_pred_f        init;
+    feed_pred_f        feed;
+    predict_pred_f     predict;
+    predict_rc_pred_f  predict_rc;
+    size_t             predictor_data_sz; // `predictor_data` size
+};
+
+/**
+ * Director machinery.
+ * The director is in charge of switching back and forth from
+ * surrogate mode to "high-def simulation"/vanilla mode
+ */
+
+// Functions that director should have access to
+typedef void (*switch_surrogate_f) (void); // Switches back and forth from surrogate mode as defined by network model (e.g, by dragonfly-dally.C)
+typedef bool (*is_surrogate_on_f) (void); // Switches back and forth from surrogate mode as defined by network model (e.g, by dragonfly-dally.C)
+
+struct director_data {
+    switch_surrogate_f  switch_surrogate;
+    is_surrogate_on_f   is_surrogate_on;
+};
+
+typedef void (*director_init_f) (struct director_data self);
+typedef void (*director_f) (void); // This is the function that is to be called at each GVT computation
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of include guard */
diff --git a/doc/Doxyfile b/doc/Doxyfile
new file mode 100644
index 00000000..fce3f842
--- /dev/null
+++ b/doc/Doxyfile
@@ -0,0 +1,376 @@
+# Doxyfile 1.9.1
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+DOXYFILE_ENCODING      = UTF-8
+PROJECT_NAME           = "CODES"
+PROJECT_NUMBER         =
+PROJECT_BRIEF          =
+PROJECT_LOGO           =
+OUTPUT_DIRECTORY       = doc
+CREATE_SUBDIRS         = NO
+ALLOW_UNICODE_NAMES    = NO
+OUTPUT_LANGUAGE        = English
+OUTPUT_TEXT_DIRECTION  = None
+BRIEF_MEMBER_DESC      = YES
+REPEAT_BRIEF           = YES
+ABBREVIATE_BRIEF       = "The $name class" \
+                         "The $name widget" \
+                         "The $name file" \
+                         is \
+                         provides \
+                         specifies \
+                         contains \
+                         represents \
+                         a \
+                         an \
+                         the
+ALWAYS_DETAILED_SEC    = NO
+INLINE_INHERITED_MEMB  = NO
+FULL_PATH_NAMES        = YES
+STRIP_FROM_PATH        =
+STRIP_FROM_INC_PATH    =
+SHORT_NAMES            = NO
+JAVADOC_AUTOBRIEF      = YES
+JAVADOC_BANNER         = NO
+QT_AUTOBRIEF           = NO
+MULTILINE_CPP_IS_BRIEF = NO
+PYTHON_DOCSTRING       = YES
+INHERIT_DOCS           = YES
+SEPARATE_MEMBER_PAGES  = NO
+TAB_SIZE               = 4
+ALIASES                =
+OPTIMIZE_OUTPUT_FOR_C  = NO
+OPTIMIZE_OUTPUT_JAVA   = NO
+OPTIMIZE_FOR_FORTRAN   = NO
+OPTIMIZE_OUTPUT_VHDL   = NO
+OPTIMIZE_OUTPUT_SLICE  = NO
+EXTENSION_MAPPING      =
+MARKDOWN_SUPPORT       = YES
+TOC_INCLUDE_HEADINGS   = 5
+AUTOLINK_SUPPORT       = YES
+BUILTIN_STL_SUPPORT    = NO
+CPP_CLI_SUPPORT        = NO
+SIP_SUPPORT            = NO
+IDL_PROPERTY_SUPPORT   = YES
+DISTRIBUTE_GROUP_DOC   = NO
+GROUP_NESTED_COMPOUNDS = NO
+SUBGROUPING            = YES
+INLINE_GROUPED_CLASSES = NO
+INLINE_SIMPLE_STRUCTS  = NO
+TYPEDEF_HIDES_STRUCT   = NO
+LOOKUP_CACHE_SIZE      = 0
+NUM_PROC_THREADS       = 1
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+EXTRACT_ALL            = YES
+EXTRACT_PRIVATE        = YES
+EXTRACT_PRIV_VIRTUAL   = YES
+EXTRACT_PACKAGE        = YES
+EXTRACT_STATIC         = YES
+EXTRACT_LOCAL_CLASSES  = YES
+EXTRACT_LOCAL_METHODS  = YES
+EXTRACT_ANON_NSPACES   = YES
+RESOLVE_UNNAMED_PARAMS = YES
+HIDE_UNDOC_MEMBERS     = NO
+HIDE_UNDOC_CLASSES     = NO
+HIDE_FRIEND_COMPOUNDS  = NO
+HIDE_IN_BODY_DOCS      = NO
+INTERNAL_DOCS          = NO
+CASE_SENSE_NAMES       = YES
+HIDE_SCOPE_NAMES       = NO
+HIDE_COMPOUND_REFERENCE= NO
+SHOW_INCLUDE_FILES     = YES
+SHOW_GROUPED_MEMB_INC  = NO
+FORCE_LOCAL_INCLUDES   = NO
+INLINE_INFO            = YES
+SORT_MEMBER_DOCS       = YES
+SORT_BRIEF_DOCS        = NO
+SORT_MEMBERS_CTORS_1ST = NO
+SORT_GROUP_NAMES       = NO
+SORT_BY_SCOPE_NAME     = NO
+STRICT_PROTO_MATCHING  = NO
+GENERATE_TODOLIST      = YES
+GENERATE_TESTLIST      = YES
+GENERATE_BUGLIST       = YES
+GENERATE_DEPRECATEDLIST= YES
+ENABLED_SECTIONS       =
+MAX_INITIALIZER_LINES  = 30
+SHOW_USED_FILES        = YES
+SHOW_FILES             = YES
+SHOW_NAMESPACES        = YES
+FILE_VERSION_FILTER    =
+LAYOUT_FILE            =
+CITE_BIB_FILES         =
+
+#---------------------------------------------------------------------------
+# Configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+QUIET                  = NO
+WARNINGS               = YES
+WARN_IF_UNDOCUMENTED   = YES
+WARN_IF_DOC_ERROR      = YES
+WARN_NO_PARAMDOC       = NO
+WARN_AS_ERROR          = NO
+WARN_FORMAT            = "$file:$line: $text"
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+INPUT                  = src/ codes/ doc/example/
+INPUT_ENCODING         = UTF-8
+FILE_PATTERNS          = *.c \
+                         *.C \
+                         *.h \
+                         *.py
+RECURSIVE              = YES
+EXCLUDE                =
+EXCLUDE_SYMLINKS       = NO
+EXCLUDE_PATTERNS       =
+EXCLUDE_SYMBOLS        =
+EXAMPLE_PATH           =
+EXAMPLE_PATTERNS       = *
+EXAMPLE_RECURSIVE      = NO
+IMAGE_PATH             =
+INPUT_FILTER           =
+FILTER_PATTERNS        =
+FILTER_SOURCE_FILES    = NO
+FILTER_SOURCE_PATTERNS =
+USE_MDFILE_AS_MAINPAGE =
+
+#---------------------------------------------------------------------------
+# Configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+SOURCE_BROWSER         = YES
+INLINE_SOURCES         = NO
+STRIP_CODE_COMMENTS    = YES
+REFERENCED_BY_RELATION = NO
+REFERENCES_RELATION    = NO
+REFERENCES_LINK_SOURCE = YES
+SOURCE_TOOLTIPS        = YES
+USE_HTAGS              = NO
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+ALPHABETICAL_INDEX     = YES
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+GENERATE_HTML          = YES
+HTML_OUTPUT            = html
+HTML_FILE_EXTENSION    = .html
+HTML_HEADER            =
+HTML_FOOTER            =
+HTML_STYLESHEET        =
+HTML_EXTRA_STYLESHEET  =
+HTML_EXTRA_FILES       =
+HTML_COLORSTYLE_HUE    = 220
+HTML_COLORSTYLE_SAT    = 100
+HTML_COLORSTYLE_GAMMA  = 80
+HTML_TIMESTAMP         = NO
+HTML_DYNAMIC_MENUS     = YES
+HTML_DYNAMIC_SECTIONS  = NO
+HTML_INDEX_NUM_ENTRIES = 100
+GENERATE_DOCSET        = NO
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+DOCSET_PUBLISHER_NAME  = Publisher
+GENERATE_HTMLHELP      = NO
+CHM_FILE               =
+HHC_LOCATION           =
+GENERATE_CHI           = NO
+CHM_INDEX_ENCODING     =
+BINARY_TOC             = NO
+TOC_EXPAND             = NO
+GENERATE_QHP           = NO
+QCH_FILE               =
+QHP_NAMESPACE          = org.doxygen.Project
+QHP_VIRTUAL_FOLDER     = doc
+QHP_CUST_FILTER_NAME   =
+QHP_CUST_FILTER_ATTRS  =
+QHP_SECT_FILTER_ATTRS  =
+QHG_LOCATION           =
+GENERATE_ECLIPSEHELP   = NO
+ECLIPSE_DOC_ID         = org.doxygen.Project
+DISABLE_INDEX          = NO
+GENERATE_TREEVIEW      = NO
+ENUM_VALUES_PER_LINE   = 4
+TREEVIEW_WIDTH         = 250
+EXT_LINKS_IN_WINDOW    = NO
+HTML_FORMULA_FORMAT    = png
+FORMULA_FONTSIZE       = 10
+FORMULA_TRANSPARENT    = YES
+FORMULA_MACROFILE      =
+USE_MATHJAX            = NO
+MATHJAX_FORMAT         = HTML-CSS
+MATHJAX_RELPATH        = https://cdn.jsdelivr.net/npm/mathjax@2
+MATHJAX_EXTENSIONS     =
+MATHJAX_CODEFILE       =
+SEARCHENGINE           = YES
+SERVER_BASED_SEARCH    = NO
+EXTERNAL_SEARCH        = NO
+SEARCHENGINE_URL       =
+SEARCHDATA_FILE        = searchdata.xml
+EXTERNAL_SEARCH_ID     =
+EXTRA_SEARCH_MAPPINGS  =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+GENERATE_LATEX         = NO
+LATEX_OUTPUT           = latex
+LATEX_CMD_NAME         =
+MAKEINDEX_CMD_NAME     = makeindex
+LATEX_MAKEINDEX_CMD    = makeindex
+COMPACT_LATEX          = NO
+PAPER_TYPE             = a4
+EXTRA_PACKAGES         =
+LATEX_HEADER           =
+LATEX_FOOTER           =
+LATEX_EXTRA_STYLESHEET =
+LATEX_EXTRA_FILES      =
+PDF_HYPERLINKS         = YES
+USE_PDFLATEX           = YES
+LATEX_BATCHMODE        = NO
+LATEX_HIDE_INDICES     = NO
+LATEX_SOURCE_CODE      = NO
+LATEX_BIB_STYLE        = plain
+LATEX_TIMESTAMP        = NO
+LATEX_EMOJI_DIRECTORY  =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+GENERATE_RTF           = NO
+RTF_OUTPUT             = rtf
+COMPACT_RTF            = NO
+RTF_HYPERLINKS         = NO
+RTF_STYLESHEET_FILE    =
+RTF_EXTENSIONS_FILE    =
+RTF_SOURCE_CODE        = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+GENERATE_MAN           = NO
+MAN_OUTPUT             = man
+MAN_EXTENSION          = .3
+MAN_SUBDIR             =
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+GENERATE_XML           = NO
+XML_OUTPUT             = xml
+XML_PROGRAMLISTING     = YES
+XML_NS_MEMB_FILE_SCOPE = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+GENERATE_DOCBOOK       = NO
+DOCBOOK_OUTPUT         = docbook
+DOCBOOK_PROGRAMLISTING = NO
+
+#---------------------------------------------------------------------------
+# Configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+GENERATE_PERLMOD       = NO
+PERLMOD_LATEX          = NO
+PERLMOD_PRETTY         = YES
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+ENABLE_PREPROCESSING   = YES
+MACRO_EXPANSION        = NO
+EXPAND_ONLY_PREDEF     = NO
+SEARCH_INCLUDES        = NO
+INCLUDE_PATH           =
+INCLUDE_FILE_PATTERNS  =
+PREDEFINED             =
+EXPAND_AS_DEFINED      =
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to external references
+#---------------------------------------------------------------------------
+
+TAGFILES               =
+GENERATE_TAGFILE       =
+ALLEXTERNALS           = NO
+EXTERNAL_GROUPS        = YES
+EXTERNAL_PAGES         = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+CLASS_DIAGRAMS         = YES
+DIA_PATH               =
+HIDE_UNDOC_RELATIONS   = YES
+HAVE_DOT               = YES
+DOT_NUM_THREADS        = 0
+DOT_FONTNAME           = Helvetica
+DOT_FONTSIZE           = 10
+DOT_FONTPATH           =
+CLASS_GRAPH            = YES
+COLLABORATION_GRAPH    = YES
+GROUP_GRAPHS           = YES
+UML_LOOK               = NO
+UML_LIMIT_NUM_FIELDS   = 10
+DOT_UML_DETAILS        = NO
+DOT_WRAP_THRESHOLD     = 17
+TEMPLATE_RELATIONS     = NO
+INCLUDE_GRAPH          = YES
+INCLUDED_BY_GRAPH      = YES
+CALL_GRAPH             = NO
+CALLER_GRAPH           = NO
+GRAPHICAL_HIERARCHY    = YES
+DIRECTORY_GRAPH        = YES
+DOT_IMAGE_FORMAT       = png
+INTERACTIVE_SVG        = NO
+DOT_PATH               =
+DOTFILE_DIRS           =
+MSCFILE_DIRS           =
+DIAFILE_DIRS           =
+PLANTUML_JAR_PATH      =
+PLANTUML_CFG_FILE      =
+PLANTUML_INCLUDE_PATH  =
+DOT_GRAPH_MAX_NODES    = 50
+MAX_DOT_GRAPH_DEPTH    = 0
+DOT_TRANSPARENT        = NO
+DOT_MULTI_TARGETS      = NO
+GENERATE_LEGEND        = YES
+DOT_CLEANUP            = YES
diff --git a/doc/example/CMakeLists.txt b/doc/example/CMakeLists.txt
index c3f00579..2cd6c0c9 100644
--- a/doc/example/CMakeLists.txt
+++ b/doc/example/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(example-files
     example
     tutorial-synthetic-ping-pong
+    tutorial-synthetic-ping-pong-surrogate
     )
 
 foreach(namefile ${example-files})
diff --git a/doc/example/tutorial-synthetic-ping-pong-surrogate.c b/doc/example/tutorial-synthetic-ping-pong-surrogate.c
new file mode 100644
index 00000000..ed34fcb8
--- /dev/null
+++ b/doc/example/tutorial-synthetic-ping-pong-surrogate.c
@@ -0,0 +1,414 @@
+/*
+ * Copyright (C) 2019 Neil McGlohon
+ * Mantained/edited by Elkin Cruz (2022-2023)
+ * See LICENSE notice in top-level directory
+ */
+
+#include "codes/model-net.h"
+#include "codes/codes_mapping.h"
+#include "codes/surrogate.h"
+#include "codes/net/dragonfly-dally.h"
+
+
+static int net_id = 0;
+static int PAYLOAD_SZ = 4096;
+static unsigned long long num_nodes = 0;
+
+static char lp_io_dir[256] = {'\0'};
+static lp_io_handle io_handle;
+static unsigned int lp_io_use_suffix = 0;
+static int do_lp_io = 0;
+
+static int num_msgs = 20;
+
+typedef struct svr_msg svr_msg;
+typedef struct svr_state svr_state;
+
+/* global variables for codes mapping */
+static char group_name[MAX_NAME_LENGTH];
+static char lp_type_name[MAX_NAME_LENGTH];
+static int group_index, lp_type_index, rep_id, offset;
+
+/* type of events */
+enum svr_event
+{
+    KICKOFF = 1,
+    PING,
+    PONG
+};
+
+struct svr_msg
+{
+    enum svr_event svr_event_type; //KICKOFF, PING, or PONG
+    int sender_id; //ID of the sender workload LP to know who to send a PONG message back to
+    int payload_value; //Some value that we will encode as an example
+    model_net_event_return event_rc; //helper to encode data relating to CODES rng usage
+};
+
+struct svr_state
+{
+    tw_lpid svr_id;            /* the ID of this server */
+    int ping_msg_sent_count;   /* PING messages sent */
+    int ping_msg_recvd_count;  /* PING messages received */
+    int pong_msg_sent_count;   /* PONG messages sent */
+    int pong_msg_recvd_count;  /* PONG messages received */
+    tw_stime start_ts;    /* time that this LP started sending requests */
+    tw_stime end_ts;      /* time that this LP ended sending requests */
+    int payload_sum;      /* the running sum of all payloads received */
+};
+
+/* declaration of functions */
+static void svr_init(svr_state * s, tw_lp * lp);
+static void svr_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp);
+static void svr_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp);
+static void svr_finalize(svr_state * s, tw_lp * lp);
+static tw_stime ns_to_s(tw_stime ns);
+static tw_stime s_to_ns(tw_stime s);
+
+/* ROSS lptype function callback mapping */
+tw_lptype svr_lp = {
+    (init_f) svr_init,
+    (pre_run_f) NULL,
+    (event_f) svr_event,
+    (revent_f) svr_rev_event,
+    (commit_f) NULL,
+    (final_f)  svr_finalize,
+    (map_f) codes_mapping,
+    sizeof(svr_state),
+};
+
+const tw_optdef app_opt [] =
+{
+        TWOPT_GROUP("Model net synthetic traffic " ),
+    	TWOPT_UINT("num_messages", num_msgs, "Number of PING messages to be generated per terminal "),
+    	TWOPT_UINT("payload_sz",PAYLOAD_SZ, "size of the message being sent "),
+        TWOPT_CHAR("lp-io-dir", lp_io_dir, "Where to place io output (unspecified -> no output"),
+        TWOPT_UINT("lp-io-use-suffix", lp_io_use_suffix, "Whether to append uniq suffix to lp-io directory (default 0)"),
+        TWOPT_END()
+};
+
+const tw_lptype* svr_get_lp_type()
+{
+    return(&svr_lp);
+}
+
+static void svr_add_lp_type()
+{
+  lp_type_register("nw-lp", svr_get_lp_type());
+}
+
+// === START OF surrogate functions
+//
+static double predict_latency(void * data, tw_lp * lp, unsigned int src_terminal, struct packet_start packet_dest) {
+    (void) data;
+    (void) lp;
+
+    unsigned int dest_terminal = packet_dest.dfdally_dest_terminal_id;
+
+    // source and destination share the same router
+    if (src_terminal / 2 == dest_terminal / 2) {
+        return 2108.74;
+    }
+    // source and destination are in the same group
+    else if (src_terminal / 8 == dest_terminal / 8) {
+        return 2390.13;
+    }
+    // source and destination are in different groups
+    else {
+        return 4162.77;
+    }
+}
+
+static void init_pred(void * data, tw_lp * lp, unsigned int src_terminal) {
+    (void) data;
+    (void) lp;
+    (void) src_terminal;
+}
+
+static void feed_pred(void * data, tw_lp * lp, unsigned int src_terminal, struct packet_start start, struct packet_end end) {
+    (void) data;
+    (void) lp;
+    (void) src_terminal;
+    (void) start;
+    (void) end;
+}
+
+static void predict_latency_rc(void * data, tw_lp * lp) {
+    (void) data;
+    (void) lp;
+}
+
+
+struct packet_latency_predictor latency_predictor = {
+    .init              = init_pred,
+    .feed              = feed_pred,
+    .predict           = predict_latency,
+    .predict_rc        = predict_latency_rc,
+    .predictor_data_sz = 0
+};
+
+void director_init(struct director_data self) {
+    assert(! self.is_surrogate_on());
+    self.switch_surrogate();
+}
+//
+// === END OF surrogate functions
+
+static void svr_init(svr_state * s, tw_lp * lp)
+{
+    //Initialize State
+    s->ping_msg_sent_count = 0;
+    s->ping_msg_recvd_count = 0;
+    s->pong_msg_sent_count = 0;
+    s->pong_msg_recvd_count = 0;
+    s->start_ts = 0.0;
+    s->end_ts = 0.0;
+    s->svr_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); /* turns the LP Global ID into the server ID */
+    s->payload_sum = 0;
+
+    //Now we create and send a self KICKOFF message - this is a PDES coordination event and thus doesn't need to be injected into the connected network
+    //so we won't use model_net_event(), that's reserved for stuff we want to send across the network
+
+    /* Set a time from now when this message is to be received by the recipient (self in this cae.) add some tiny random noise to help avoid event ties (different events with same timestamp) */
+    //the lookahead value is a value required for conservative mode execution to work, it prevents scheduling a new event within the lookahead window
+    tw_stime kickoff_time = g_tw_lookahead + (tw_rand_unif(lp->rng) * .0001);
+
+    tw_event *e;
+    svr_msg *m;
+    e = tw_event_new(lp->gid, kickoff_time, lp); //ROSS method to create a new event
+    m = tw_event_data(e); //Gives you a pointer to the data encoded within event e
+    m->svr_event_type = KICKOFF; //Set the event type so we can know how to classify the event when received
+    tw_event_send(e); //ROSS method to send off the event e with the encoded data in m
+}
+
+static void handle_kickoff_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
+{
+    (void) b;
+    // This bit is just for testing. It allows to send a PING event only to the first LP/server
+    //if (lp->gid != 0) {
+    //    return;
+    //}
+    s->start_ts = tw_now(lp); //the time when we're starting this LP's work is NOW
+
+    svr_msg ping_msg;
+
+    tw_lpid local_dest = -1; //ID of a sever, relative to only servers
+    tw_lpid global_dest = -1; //ID of a server LP relative to ALL LPs
+
+    //We want to make sure we're not accidentally picking ourselves
+    local_dest = tw_rand_integer(lp->rng, 1, num_nodes - 2);
+    local_dest = (s->svr_id + local_dest) % num_nodes;
+    //local_dest is now a number [0,num_nodes) but is assuredly not s->svr_id
+    assert(local_dest >= 0);
+    assert(local_dest < num_nodes);
+    assert(local_dest != s->svr_id);
+
+    ping_msg.sender_id = s->svr_id; //encode our server ID into the new ping message
+    ping_msg.svr_event_type = PING; //set it to type PING
+    ping_msg.payload_value = tw_rand_integer(lp->rng, 1, 10); //encode a random payload value to it from [1,10]
+
+    codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
+    global_dest = codes_mapping_get_lpid_from_relative(local_dest, group_name, lp_type_name, NULL, 0);
+    s->ping_msg_sent_count++;
+    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&ping_msg, 0, NULL, lp);
+}
+
+static void handle_kickoff_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
+{
+    (void) b;
+    model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
+    s->ping_msg_sent_count--; //undo the increment of the ping_msg_sent_count in the server state
+    tw_rand_reverse_unif(lp->rng); //reverse the rng call for creating a payload value;
+    tw_rand_reverse_unif(lp->rng); //reverse the rng call for getting a local_dest
+}
+
+static void handle_ping_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
+{
+    (void) b;
+    s->ping_msg_recvd_count++; //increment the counter for ping messages received
+
+    int original_sender = m->sender_id; //this is the server we need to send a PONG message back to
+    s->payload_sum += m->payload_value; //increment our running sum of payload values received
+
+    svr_msg pong_msg;
+    pong_msg.sender_id = s->svr_id;
+    pong_msg.svr_event_type = PONG;
+    // only ping messages contain a payload value - not every value in a message struct must be utilized by all messages!
+
+    codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
+    tw_lpid global_dest = codes_mapping_get_lpid_from_relative(original_sender, group_name, lp_type_name, NULL, 0);
+    s->pong_msg_sent_count++;
+    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&pong_msg, 0, NULL, lp);
+}
+
+static void handle_ping_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
+{
+    (void) b;
+    model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
+    s->pong_msg_sent_count--;
+    s->payload_sum -= m->payload_value; //undo the increment of the payload sum
+    s->ping_msg_recvd_count--; //undo the increment of the counter for ping messages received
+}
+
+static void handle_pong_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
+{
+    s->pong_msg_recvd_count++; //increment the counter for ping messages received
+
+    if(s->ping_msg_sent_count >= num_msgs) //if we've sent enough ping messages, then we stop and don't send any more
+    {
+        b->c1 = 1; //flag that we didn't really do anything in this event so that if this event gets reversed, we don't over-aggressively revert state or RNGs
+        return;
+    }
+
+    //Now we need to send another ping message, to someone new (just to spice the simulation)
+    tw_lpid send_to = tw_rand_integer(lp->rng, 1, num_nodes - 2);
+    send_to = (s->svr_id + send_to) % num_nodes;
+
+    svr_msg ping_msg;
+    ping_msg.sender_id = s->svr_id; //encode our server ID into the new ping message
+    ping_msg.svr_event_type = PING; //set it to type PING
+    ping_msg.payload_value = tw_rand_integer(lp->rng, 1, 10); //encode a random payload value to it
+
+    codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
+    tw_lpid global_dest = codes_mapping_get_lpid_from_relative(send_to, group_name, lp_type_name, NULL, 0);
+    s->ping_msg_sent_count++;
+    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&ping_msg, 0, NULL, lp);
+}
+
+static void handle_pong_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
+{
+    if (! b->c1) { //if we didn't flip the c1 flag in the forward event
+        model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
+        s->ping_msg_sent_count--;
+        tw_rand_reverse_unif(lp->rng); //undo the rng for the new payload value
+        tw_rand_reverse_unif(lp->rng); //undo the rng for the new server to send a ping to
+        b->c1 = 0;
+    }
+
+    s->pong_msg_recvd_count--; //undo the increment of the counter for ping messages received
+}
+
+static void svr_finalize(svr_state * s, tw_lp * lp)
+{
+    s->end_ts = tw_now(lp);
+
+    int total_msgs_sent = s->ping_msg_sent_count + s->pong_msg_sent_count;
+    int total_msg_size_sent = PAYLOAD_SZ * total_msgs_sent;
+    tw_stime time_in_seconds_sent = ns_to_s(s->end_ts - s->start_ts);
+
+    printf("Sever LPID:%lu svr_id:%lu sent %d bytes in %f seconds, PINGs Sent: %d; PONGs Received: %d; PINGs Received: %d; PONGs Sent %d; Payload Sum: %d\n",
+            (unsigned long)lp->gid, (unsigned long)s->svr_id, total_msg_size_sent,
+            time_in_seconds_sent, s->ping_msg_sent_count, s->pong_msg_recvd_count, s->ping_msg_recvd_count, s->pong_msg_sent_count, s->payload_sum);
+}
+
+static void svr_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
+{
+    switch (m->svr_event_type)
+    {
+        case KICKOFF:
+            handle_kickoff_event(s, b, m, lp);
+            break;
+        case PING:
+            handle_ping_event(s, b, m, lp);
+            break;
+        case PONG:
+            handle_pong_event(s, b, m, lp);
+            break;
+        default:
+            tw_error(TW_LOC, "\n Invalid message type %d ", m->svr_event_type);
+            break;
+    }
+}
+
+static void svr_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
+{
+    switch (m->svr_event_type)
+    {
+        case KICKOFF:
+            handle_kickoff_rev_event(s, b, m, lp);
+            break;
+        case PING:
+            handle_ping_rev_event(s, b, m, lp);
+            break;
+        case PONG:
+            handle_pong_rev_event(s, b, m, lp);
+            break;
+        default:
+            tw_error(TW_LOC, "\n Invalid message type %d ", m->svr_event_type);
+            break;
+    }
+}
+
+/* convert ns to seconds */
+static tw_stime ns_to_s(tw_stime ns)
+{
+    return(ns / (1000.0 * 1000.0 * 1000.0));
+}
+static tw_stime s_to_ns(tw_stime s)
+{
+    return(s*1000.0*1000.0*1000.0);
+}
+
+int main(int argc, char **argv)
+{
+    int nprocs;
+    int rank;
+    int num_nets;
+    int *net_ids;
+
+    tw_opt_add(app_opt);
+    tw_init(&argc, &argv);
+
+    codes_comm_update();
+
+    dragonfly_dally_save_packet_latency_to_file("pingpong");
+    //dragonfly_dally_surrogate_configure((struct dragonfly_dally_surrogate_configure_st){
+    //    .director_init = director_init,
+    //    .latency_predictor = &latency_predictor
+    //});
+
+    if(argc < 2)
+    {
+            printf("\n Usage: mpirun <args> --sync=1/2/3 -- <config_file.conf> ");
+            MPI_Finalize();
+            return 0;
+    }
+
+    MPI_Comm_rank(MPI_COMM_CODES, &rank);
+    MPI_Comm_size(MPI_COMM_CODES, &nprocs);
+
+    configuration_load(argv[2], MPI_COMM_CODES, &config);
+
+    model_net_register();
+    svr_add_lp_type();
+
+    codes_mapping_setup();
+
+    net_ids = model_net_configure(&num_nets);
+    net_id = *net_ids;
+    free(net_ids);
+
+    /* 1 day of simulation time is drastically huge but it will ensure
+       that the simulation doesn't try to end before all packets are delivered */
+    g_tw_ts_end = s_to_ns(24 * 60 * 60);
+
+    num_nodes = codes_mapping_get_lp_count("MODELNET_GRP", 0, "nw-lp", NULL, 1);  //get the number of nodes so we can use this value during the simulation
+    assert(num_nodes);
+
+    if(lp_io_dir[0])
+    {
+        do_lp_io = 1;
+        int flags = lp_io_use_suffix ? LP_IO_UNIQ_SUFFIX : 0;
+        int ret = lp_io_prepare(lp_io_dir, flags, &io_handle, MPI_COMM_CODES);
+        assert(ret == 0 || !"lp_io_prepare failure");
+    }
+    tw_run();
+    if (do_lp_io){
+        int ret = lp_io_flush(io_handle, MPI_COMM_CODES);
+        assert(ret == 0 || !"lp_io_flush failure");
+    }
+    model_net_report_stats(net_id);
+
+    tw_end();
+    return 0;
+}
diff --git a/doc/example/tutorial-synthetic-ping-pong.c b/doc/example/tutorial-synthetic-ping-pong.c
index 0dd67d63..4ae1ecbc 100644
--- a/doc/example/tutorial-synthetic-ping-pong.c
+++ b/doc/example/tutorial-synthetic-ping-pong.c
@@ -1,15 +1,11 @@
 /*
  * Copyright (C) 2019 Neil McGlohon
- * Mantained/edited by Elkin Cruz (2022)
+ * Mantained/edited by Elkin Cruz (2022-2023)
  * See LICENSE notice in top-level directory
  */
 
 #include "codes/model-net.h"
-#include "codes/lp-io.h"
-#include "codes/codes.h"
 #include "codes/codes_mapping.h"
-#include "codes/configuration.h"
-#include "codes/lp-type-lookup.h"
 
 
 static int net_id = 0;
diff --git a/scripts/terminal-to-terminal-latency/determine_mean_std.py b/scripts/terminal-to-terminal-latency/determine_mean_std.py
new file mode 100644
index 00000000..a8058f84
--- /dev/null
+++ b/scripts/terminal-to-terminal-latency/determine_mean_std.py
@@ -0,0 +1,59 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def mean_and_std(array: np.array) -> tuple[float, float]:
+    return np.mean(array), np.std(array)  # type: ignore
+
+
+if __name__ == '__main__':
+    delays = np.loadtxt("packets-delay.csv", skiprows=1, delimiter=",")
+
+    # Distribution
+    delays_same_router = (delays[:, 0] // 2) == (delays[:, 1] // 2)
+    delays_same_group = np.bitwise_xor(
+        (delays[:, 0] // 8) == (delays[:, 1] // 8),
+        delays_same_router)
+    delays_out_group = (delays[:, 0] // 8) != (delays[:, 1] // 8)
+
+    mean, std = mean_and_std(delays[:, 4])
+    print(f"total mean: {mean:.2f} std: {std:.2f}")
+    print()
+
+    delays0 = delays[delays[:, 0] == 0]
+    mean, std = mean_and_std(delays0[:, 4])
+    print(f"terminal 0 mean: {mean:.2f} std: {std:.2f}")
+    print()
+
+    fig, axs = plt.subplots(2, 2)
+    axs[0, 0].set_title("Latency from all terminals to all")
+    # axs[0, 0].set_xlabel("latency")
+    axs[0, 0].hist(delays[:, 4], bins=50, density=True, alpha=0.6, color='b')
+    axs[0, 1].set_title("Latency to terminals in same router")
+    # axs[0, 1].set_xlabel("latency")
+    axs[0, 1].hist(delays[delays_same_router, 4], bins=50, density=True, alpha=0.6, color='b')
+    axs[1, 0].set_title("Latency to terminals in same group")
+    axs[1, 0].set_xlabel("latency")
+    axs[1, 0].hist(delays[delays_same_group, 4], bins=50, density=True, alpha=0.6, color='b')
+    axs[1, 1].set_title("Latency to terminals in other groups")
+    axs[1, 1].set_xlabel("latency")
+    axs[1, 1].hist(delays[delays_out_group, 4], bins=50, density=True, alpha=0.6, color='b')
+    plt.show()
+
+    buckets = [delays0[delays0[:, 1] == i] for i in range(1, 72)]
+    buckets_processed = np.array([mean_and_std(b[:, 4]) for b in buckets])
+    print("Destination, Means and stds for terminal 0")
+    for i, (mean, std) in enumerate(buckets_processed):
+        print(f"{i+1}, {mean:.2f}, {std:.2f}")
+    print()
+
+    mean, std = mean_and_std(delays[delays_same_router, 4])
+    print(f"same router mean: {mean:.2f} std: {std:.2f}")
+    print()
+
+    mean, std = mean_and_std(delays[delays_same_group, 4])
+    print(f"same group mean: {mean:.2f} std: {std:.2f} (excluding same router)")
+    print()
+
+    mean, std = mean_and_std(delays[delays_out_group, 4])
+    print(f"other groups mean: {mean:.2f} std: {std:.2f}")
diff --git a/scripts/terminal-to-terminal-latency/sort-delays.py b/scripts/terminal-to-terminal-latency/sort-delays.py
new file mode 100644
index 00000000..28bff70c
--- /dev/null
+++ b/scripts/terminal-to-terminal-latency/sort-delays.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+import numpy as np
+import glob
+import fileinput
+import sys
+import pathlib
+
+from typing import Any
+
+
+def collect_data_numpy(
+    path: pathlib.Path | str,
+    filepreffix: str,
+    delimiter: str | None = None,
+    dtype: Any = int
+) -> np.ndarray[Any, Any]:
+    escaped_path = pathlib.Path(glob.escape(path))  # type: ignore
+    stat_files = glob.glob(str(escaped_path / f"{filepreffix}-gid=*.txt"))
+    if not stat_files:
+        print(f"No valid `{filepreffix}` files have been found in path {path}", file=sys.stderr)
+        exit(1)
+
+    return np.loadtxt(fileinput.input(stat_files), delimiter=delimiter, dtype=dtype)
+
+
+if __name__ == '__main__':
+    delays = collect_data_numpy('.', 'packets-delay', delimiter=',',
+                                dtype=np.dtype('float'))
+    # sorting by source terminal and packet id
+    sorted_indx = np.lexsort((delays[:, 2], delays[:, 0]))
+    delays = delays[sorted_indx]
+
+    # saving some columns
+    np.savetxt("packets-delay.csv", delays[:, (0, 1, 2, 3, 5)],
+               fmt="%d,%d,%d,%f,%f",
+               header='src_terminal,dst_terminal,packet_id,start_time,delay',
+               comments='')
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d7d34112..6c2b7011 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -54,6 +54,7 @@ list(APPEND SRCS
   	util/codes-comm.c
     util/rc-stack.c
     util/congestion-controller.C
+    util/surrogate.c
 
     iokernellang/codesparser.h
     iokernellang/codesparser.c
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index ac3b3515..b61a9840 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -19,9 +19,9 @@
 #include "codes/jenkins-hash.h"
 #include "codes/codes_mapping.h"
 #include "codes/codes.h"
-#include "codes/model-net.h"
 #include "codes/model-net-method.h"
 #include "codes/model-net-lp.h"
+#include "codes/surrogate.h"
 #include "codes/net/dragonfly-dally.h"
 #include "sys/file.h"
 #include "codes/quickhash.h"
@@ -32,6 +32,8 @@
 #include <algorithm>
 #include <queue>
 #include <deque>
+#include <errno.h>
+#include <sys/stat.h>
 
 #include "codes/network-manager/dragonfly-network-manager.h"
 #include "codes/congestion-controller-model.h"
@@ -176,13 +178,19 @@ static int sample_rtr_bytes_written = 0;
 static char cn_sample_file[MAX_NAME_LENGTH];
 static char router_sample_file[MAX_NAME_LENGTH];
 
-//don't do overhead here - job of MPI layer
-static tw_stime mpi_soft_overhead = 0;
+// File to store packet latency from terminal-to-terminal
+// NOTE: Only non-predicted latencies are saved to file
+static FILE * packet_latency_f = NULL;
 
-// Parameters to tune surrogate mode
-static bool is_terminal_to_terminal_latency_on = false;
-static FILE * terminal_to_terminal_latency_f = NULL;
-static bool g_is_surrogate_on = false;
+// ==== START OF Parameters to tune surrogate mode ====
+// 
+static bool surrogate_configured = false;
+static bool is_surrogate_on = false;
+static struct packet_latency_predictor * terminal_predictor = NULL;
+static void switch_surrogate(void);
+static bool is_surrogate_on_fun(void);
+//
+// ==== END OF Parameters to tune surrogate mode ====
 
 typedef struct terminal_dally_message_list terminal_dally_message_list;
 struct terminal_dally_message_list {
@@ -444,20 +452,12 @@ static bool isRoutingNonminimalExplicit(int alg)
         return false;
 }
 
-struct packet_start {
-    uint64_t packet_ID;
-    // tw_lpid dest_terminal_id;  // ROSS id; LPID for terminal
-    unsigned int dfdally_dest_terminal_id; // number in [0, total terminals)
-    double travel_start_time;
-};
-
-struct packet_end {
-    uint64_t packet_ID;
-    double travel_end_time;
-};
+/**
+ * Surrogate definitions and data
+ */
 
 // Comparison function object to use in min-heap of packet_end's
-struct {
+static struct {
     bool operator() (struct packet_end const l, struct packet_end const r) const {
         return l.packet_ID > r.packet_ID;
     }
@@ -568,6 +568,9 @@ struct terminal_state
     // arrive faster than others, so a list like the one above is not feasible
     // to store in order efficiently their arrival)
     priority_queue<struct packet_end, vector<struct packet_end>, decltype(packet_end_greater_cmp)> sent_packets_latency;
+
+    // Predictor data
+    void * predictor_data;
 };
 
 struct router_state
@@ -2219,16 +2222,49 @@ void dragonfly_dally_configure() {
 	model_net_topology = dragonfly_dally_cortex_topology;
 #endif
 
-    if (is_terminal_to_terminal_latency_on) {
-        char const fmt[] = "packets-delay-gid=%lu.txt";
-        int sz = snprintf(NULL, 0, fmt, g_tw_mynode);
-        char filename_path[sz + 1]; // `+ 1` for terminating null byte
-        snprintf(filename_path, sizeof(filename_path), fmt, g_tw_mynode);
-        terminal_to_terminal_latency_f = fopen(filename_path, "w+");
-        if(!terminal_to_terminal_latency_f) {
-            tw_error(TW_LOC, "File %s could not be opened", filename_path);
+}
+
+void dragonfly_dally_surrogate_configure(
+        struct dragonfly_dally_surrogate_configure_st conf) {
+
+    assert(conf.director_init != NULL);
+    assert(conf.latency_predictor != NULL);
+    assert(conf.latency_predictor->init != NULL);
+    assert(conf.latency_predictor->feed != NULL);
+    assert(conf.latency_predictor->predict != NULL);
+    assert(conf.latency_predictor->predict_rc != NULL);
+    assert(! surrogate_configured);
+
+    conf.director_init({
+        .switch_surrogate = switch_surrogate,
+        .is_surrogate_on = is_surrogate_on_fun});
+    terminal_predictor = conf.latency_predictor;
+    
+    surrogate_configured = true;
+}
+
+void dragonfly_dally_save_packet_latency_to_file(char * dir_to_save) {
+    assert(packet_latency_f == NULL);
+    // checking 
+    int const NO_ERROR = 0;
+    struct stat st;
+    memset(&st, 0, sizeof(struct stat));
+    if(g_tw_mynode == 0 && stat(dir_to_save, &st) == -1) {
+        int res = mkdir(dir_to_save, 0700);
+        if (res != NO_ERROR) {
+            tw_error(TW_LOC, "Error (%d) occurred when attempting to mkdir folder `%s`", errno, dir_to_save);
         }
     }
+    MPI_Barrier(MPI_COMM_CODES);
+
+    char const fmt[] = "%s/packets-delay-gid=%lu.txt";
+    int sz = snprintf(NULL, 0, fmt, dir_to_save, g_tw_mynode);
+    char filename_path[sz + 1]; // `+ 1` for terminating null byte
+    snprintf(filename_path, sizeof(filename_path), fmt, dir_to_save, g_tw_mynode);
+    packet_latency_f = fopen(filename_path, "w+");
+    if(!packet_latency_f) {
+        tw_error(TW_LOC, "File %s could not be opened", filename_path);
+    }
 }
 
 /* report dragonfly statistics like average and maximum packet latency, average number of hops traversed */
@@ -2263,8 +2299,8 @@ void dragonfly_dally_report_stats()
     // long long total_stalled_chunks; //helpful for debugging and determinism checking
     // MPI_Reduce( &global_stalled_chunk_counter, &total_stalled_chunks, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES);
 
-    if (is_terminal_to_terminal_latency_on) {
-        fclose(terminal_to_terminal_latency_f);
+    if (packet_latency_f) {
+        fclose(packet_latency_f);
     }
     /* print statistics */
     if(!g_tw_mynode)
@@ -2685,24 +2721,42 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess
 static void packet_latency_save_to_file(unsigned int terminal_id, struct packet_start start, struct packet_end end)
 {
     assert(start.packet_ID == end.packet_ID);
-    if (is_terminal_to_terminal_latency_on) {
-        fprintf(terminal_to_terminal_latency_f, "%u,%u,%lu,%f,%f,%f\n",
-                terminal_id, start.dfdally_dest_terminal_id, start.packet_ID,
-                start.travel_start_time, end.travel_end_time, end.travel_end_time - start.travel_start_time);
-    }
+    fprintf(packet_latency_f, "%u,%u,%lu,%f,%f,%f\n",
+            terminal_id, start.dfdally_dest_terminal_id, start.packet_ID,
+            start.travel_start_time, end.travel_end_time, end.travel_end_time - start.travel_start_time);
+}
+
+// ==== START OF Surrogate functions definition ====
+
+static void switch_surrogate(void) {
+    is_surrogate_on = ! is_surrogate_on;
+    // TODO: `sent_packets` and `sent_packets_latency` have to be cleaned on switches. This won't be an apparent problem until switching to and from surrogate mode happens in a very short amount of time
+}
+
+static bool is_surrogate_on_fun(void) {
+    return is_surrogate_on;
 }
 
-static void process_packet_latencies(terminal_state * s)
+// Goes through all received packet latencies and process them in order in which they were sent through the network
+static void process_packet_latencies(terminal_state * s, tw_lp * lp)
 {
     while( !s->sent_packets.empty()
         && !s->sent_packets_latency.empty()
         && s->sent_packets.front().packet_ID == s->sent_packets_latency.top().packet_ID)
     {
-        packet_latency_save_to_file(s->terminal_id, s->sent_packets.front(), s->sent_packets_latency.top());
+        if (packet_latency_f) {
+            packet_latency_save_to_file(s->terminal_id, s->sent_packets.front(), s->sent_packets_latency.top());
+        }
+        if (surrogate_configured && !is_surrogate_on) {
+            assert(terminal_predictor != NULL);
+            terminal_predictor->feed(s->predictor_data, lp, s->terminal_id, s->sent_packets.front(), s->sent_packets_latency.top());
+        }
         s->sent_packets.pop_front();
         s->sent_packets_latency.pop();
     }
 }
+//
+// ==== END OF Surrogate functions definition ====
 
 //Snapshot pattern
 //Sends a snapshot event - this wakes the router at the specified time to store its data somewhere
@@ -2785,7 +2839,7 @@ void terminal_dally_commit(terminal_state * s,
         }
     }
 
-    if(!g_is_surrogate_on && msg->type == T_NOTIFY_TOTAL_LATENCY)
+    if(msg->type == T_NOTIFY_TOTAL_LATENCY)
     {
         assert(lp->gid == msg->src_terminal_id);
         assert(s->terminal_id == msg->dfdally_src_terminal_id);
@@ -2794,7 +2848,7 @@ void terminal_dally_commit(terminal_state * s,
         s->sent_packets_latency.push({
                 .packet_ID = msg->packet_ID,
                 .travel_end_time = msg->travel_end_time});
-        process_packet_latencies(s);
+        process_packet_latencies(s, lp);
     }
 }
 
@@ -3006,6 +3060,14 @@ void terminal_dally_init( terminal_state * s, tw_lp * lp )
     // (see https://en.cppreference.com/w/cpp/memory/construct_at)
     new (&s->sent_packets) deque<struct packet_start>();
     new (&s->sent_packets_latency) priority_queue<struct packet_end, vector<struct packet_end>, decltype(packet_end_greater_cmp)>();
+
+    // alloc'ing memory for predictor, calling initiliazer for predictor
+    if (terminal_predictor != NULL && terminal_predictor->predictor_data_sz > 0) {
+        s->predictor_data = calloc(1, sizeof terminal_predictor->predictor_data_sz);
+        terminal_predictor->init(s->predictor_data, lp, s->terminal_id);
+    } else {
+        s->predictor_data = NULL;
+    }
     return;
 }
 
@@ -3286,21 +3348,6 @@ static tw_stime dragonfly_dally_packet_event(
     return xfer_to_nic_time;
 }
 
-static double predict_latency(unsigned long src_terminal, unsigned long dest_terminal) {
-    // source and destination share the same router
-    if (src_terminal / 2 == dest_terminal / 2) {
-        return 2108.74;
-    } 
-    // source and destination are in the same group
-    else if (src_terminal / 8 == dest_terminal / 8) {
-        return 2390.13;
-    }
-    // source and destination are in different groups
-    else {
-        return 4162.77;
-    }
-}
-
 static void packet_generate_predicted_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
 {
     struct mn_stats * stat = model_net_find_stats(msg->category, s->dragonfly_stats_array);
@@ -3308,6 +3355,8 @@ static void packet_generate_predicted_rc(terminal_state * s, tw_bf * bf, termina
     stat->send_bytes -= msg->packet_size;
     stat->send_time -= (1/s->params->cn_bandwidth) * msg->packet_size;
 
+    terminal_predictor->predict_rc(s->predictor_data, lp);
+
     s->packet_counter--;
     s->total_gen_size -= msg->packet_size;
     s->packet_gen--;
@@ -3350,14 +3399,21 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
     bool const is_from_remote = false;
     model_net_method_idle_event2(nic_ts, is_from_remote, msg->rail_id, lp);
 
+    // Using predictor to find latency
+    double const latency = 
+        terminal_predictor->predict(s->predictor_data, lp, s->terminal_id,
+          {.packet_ID = msg->packet_ID,
+           .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
+           .travel_start_time = tw_now(lp)
+          });
+
     // Sending packet directly to destination terminal
     tw_stime const ts = 0;
     terminal_dally_message * m;
     void * remote_event;
     void const * const m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg);
     tw_event * const e = model_net_method_event_new(
-            msg->dest_terminal_lpid, predict_latency(lp->gid, msg->dfdally_dest_terminal_id),
-            lp, DRAGONFLY_DALLY, (void**)&m, &remote_event);
+            msg->dest_terminal_lpid, latency, lp, DRAGONFLY_DALLY, (void**)&m, &remote_event);
     memcpy(m, msg, sizeof(terminal_dally_message));
     if (msg->remote_event_size_bytes) {
         memcpy(remote_event, m_data_src, msg->remote_event_size_bytes);
@@ -4513,7 +4569,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
         
         //assert(tmp->remote_event_data && tmp->remote_event_size > 0);
         if(tmp->remote_event_data && tmp->remote_event_size > 0) {
-            if (!g_is_surrogate_on) {
+            if (packet_latency_f || surrogate_configured) {
                 notify_src_lp_on_total_latency(s, msg, lp, bf);
             }
             send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size);
@@ -4668,6 +4724,10 @@ dragonfly_dally_terminal_final( terminal_state * s,
     // it doesn't need to be freed
     s->sent_packets.~deque();
     s->sent_packets_latency.~priority_queue();
+
+    if (s->predictor_data) {
+        free(s->predictor_data);
+    }
 }
 
 void dragonfly_dally_router_final(router_state * s, tw_lp * lp){
@@ -5661,7 +5721,7 @@ terminal_dally_event( terminal_state * s,
     switch(msg->type)
         {
         case T_GENERATE:
-            if (g_is_surrogate_on) {
+            if (is_surrogate_on) {
                 msg->is_predicted = true;
                 packet_generate_predicted(s,bf,msg,lp);
             } else {
@@ -5761,7 +5821,7 @@ void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_da
     switch(msg->type)
     {
         case T_GENERATE:
-            if (g_is_surrogate_on) {
+            if (msg->is_predicted) {
                 packet_generate_predicted_rc(s,bf,msg,lp);
             } else {
                 packet_generate_rc(s, bf, msg, lp); 
@@ -5773,7 +5833,7 @@ void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_da
             break;
 
         case T_ARRIVE:
-            if (g_is_surrogate_on) {
+            if (msg->is_predicted) {
                 packet_arrive_predicted_rc(s, bf, msg, lp);
             } else {
                 packet_arrive_rc(s, bf, msg, lp);
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
new file mode 100644
index 00000000..18dfb4d6
--- /dev/null
+++ b/src/util/surrogate.c
@@ -0,0 +1,3 @@
+#include <codes/surrogate.h>
+
+// This is file is empty because the header doesn't need a .c file. Yet, it exists to make sure that the header has all its includes fulfilled

From 43d152bc19ecbcfc8145f94a854e9e223f184dc0 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 16 Jan 2023 19:36:28 -0500
Subject: [PATCH 007/188] Basic Director implementation to switch to and from
 surrogate mode

- This needs ROSS commit 178e3c0
- The director is a function that it's called at GVT by ROSS
- Average latency predictor implemented
---
 codes/net/dragonfly-dally.h                   |   3 +-
 codes/surrogate.h                             |  10 +-
 .../tutorial-synthetic-ping-pong-surrogate.c  | 126 ++++++++++++++----
 src/CMakeLists.txt                            |   2 +-
 src/networks/model-net/dragonfly-dally.C      |  19 +--
 5 files changed, 116 insertions(+), 44 deletions(-)

diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index 8aded418..6ca36040 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -128,13 +128,14 @@ struct terminal_dally_message
 
 struct dragonfly_dally_surrogate_configure_st {
     director_init_f                   director_init;
+    director_f                        director_call;
     struct packet_latency_predictor * latency_predictor;
 };
 
 void dragonfly_dally_surrogate_configure(
         struct dragonfly_dally_surrogate_configure_st);
 
-void dragonfly_dally_save_packet_latency_to_file(char * dir_to_save);
+void dragonfly_dally_save_packet_latency_to_file(char const * const dir_to_save);
 
 #ifdef __cplusplus
 }
diff --git a/codes/surrogate.h b/codes/surrogate.h
index 30b91981..8148b43d 100644
--- a/codes/surrogate.h
+++ b/codes/surrogate.h
@@ -33,16 +33,16 @@ struct packet_end {
 
 // Definition of functions needed to define a predictor
 typedef void (*init_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id); // Initializes the predictor (eg, LSTM)
-typedef void (*feed_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start, struct packet_end); // Feeds known latency for packet sent at `now`
-typedef double (*predict_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start); // Get prediction for packet sent to `destination` at `now`
-typedef void (*predict_rc_pred_f) (void * predictor_data, tw_lp * lp); // Reverse prediction (reverse state of predictor one prediction)
+typedef void (*feed_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start *, struct packet_end *); // Feeds known latency for packet sent at `now`
+typedef double (*predict_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start *); // Get prediction for packet sent to `destination` at `now`
+typedef void (*predict_pred_rc_f) (void * predictor_data, tw_lp * lp); // Reverse prediction (reverse state of predictor one prediction)
 
 // Each network model defines its own way to setup the packet latency predictor
 struct packet_latency_predictor {
     init_pred_f        init;
     feed_pred_f        feed;
     predict_pred_f     predict;
-    predict_rc_pred_f  predict_rc;
+    predict_pred_rc_f  predict_rc;
     size_t             predictor_data_sz; // `predictor_data` size
 };
 
@@ -62,7 +62,7 @@ struct director_data {
 };
 
 typedef void (*director_init_f) (struct director_data self);
-typedef void (*director_f) (void); // This is the function that is to be called at each GVT computation
+typedef void (*director_f) (tw_pe * pe); // This is the function that is to be called at each GVT computation
 
 #ifdef __cplusplus
 }
diff --git a/doc/example/tutorial-synthetic-ping-pong-surrogate.c b/doc/example/tutorial-synthetic-ping-pong-surrogate.c
index ed34fcb8..8d866318 100644
--- a/doc/example/tutorial-synthetic-ping-pong-surrogate.c
+++ b/doc/example/tutorial-synthetic-ping-pong-surrogate.c
@@ -61,6 +61,7 @@ struct svr_state
 static void svr_init(svr_state * s, tw_lp * lp);
 static void svr_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp);
 static void svr_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp);
+static void svr_commit(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp);
 static void svr_finalize(svr_state * s, tw_lp * lp);
 static tw_stime ns_to_s(tw_stime ns);
 static tw_stime s_to_ns(tw_stime s);
@@ -71,7 +72,7 @@ tw_lptype svr_lp = {
     (pre_run_f) NULL,
     (event_f) svr_event,
     (revent_f) svr_rev_event,
-    (commit_f) NULL,
+    (commit_f) svr_commit,
     (final_f)  svr_finalize,
     (map_f) codes_mapping,
     sizeof(svr_state),
@@ -99,12 +100,46 @@ static void svr_add_lp_type()
 
 // === START OF surrogate functions
 //
-static double predict_latency(void * data, tw_lp * lp, unsigned int src_terminal, struct packet_start packet_dest) {
-    (void) data;
+#define N_TERMINALS 72
+struct latency_surrogate {
+    double sum_latency[N_TERMINALS];
+    unsigned int total_msgs[N_TERMINALS];
+};
+
+static void init_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal) {
     (void) lp;
+    (void) src_terminal;
+    assert(data->sum_latency[0] == 0);
+    assert(data->total_msgs[0] == 0);
+}
 
-    unsigned int dest_terminal = packet_dest.dfdally_dest_terminal_id;
+static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start * start, struct packet_end * end) {
+    (void) lp;
+    (void) src_terminal;
+
+    unsigned int const dest_terminal = start->dfdally_dest_terminal_id;
+    double const latency = end->travel_end_time - start->travel_start_time;
+    assert(dest_terminal < N_TERMINALS);
 
+    data->sum_latency[dest_terminal] += latency;
+    data->total_msgs[dest_terminal]++;
+}
+
+static double predict_latency(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start * packet_dest) {
+    (void) lp;
+
+    unsigned int const dest_terminal = packet_dest->dfdally_dest_terminal_id;
+    assert(dest_terminal < N_TERMINALS);
+
+    // In case we have any data to determine the average
+    unsigned int const total_datapoints = data->total_msgs[dest_terminal];
+    if (total_datapoints > 0) {
+        double const sum_latency = data->sum_latency[dest_terminal];
+        return sum_latency / total_datapoints;
+    }
+
+    // Otherwise, use "sensible" results from another simulation
+    // This assumes the network is a 72 nodes 1D-DragonFly (9 groups, with 4 routers, and 2 terminals per router)
     // source and destination share the same router
     if (src_terminal / 2 == dest_terminal / 2) {
         return 2108.74;
@@ -119,37 +154,58 @@ static double predict_latency(void * data, tw_lp * lp, unsigned int src_terminal
     }
 }
 
-static void init_pred(void * data, tw_lp * lp, unsigned int src_terminal) {
-    (void) data;
-    (void) lp;
-    (void) src_terminal;
-}
-
-static void feed_pred(void * data, tw_lp * lp, unsigned int src_terminal, struct packet_start start, struct packet_end end) {
-    (void) data;
-    (void) lp;
-    (void) src_terminal;
-    (void) start;
-    (void) end;
-}
-
-static void predict_latency_rc(void * data, tw_lp * lp) {
+static void predict_latency_rc(struct latency_surrogate * data, tw_lp * lp) {
     (void) data;
     (void) lp;
 }
 
 
 struct packet_latency_predictor latency_predictor = {
-    .init              = init_pred,
-    .feed              = feed_pred,
-    .predict           = predict_latency,
-    .predict_rc        = predict_latency_rc,
-    .predictor_data_sz = 0
+    .init              = (init_pred_f) init_pred,
+    .feed              = (feed_pred_f) feed_pred,
+    .predict           = (predict_pred_f) predict_latency,
+    .predict_rc        = (predict_pred_rc_f) predict_latency_rc,
+    .predictor_data_sz = sizeof(struct latency_surrogate)
 };
 
+struct director_data my_director_data;
+int ping_msg_sent_count = 0;
+
 void director_init(struct director_data self) {
     assert(! self.is_surrogate_on());
-    self.switch_surrogate();
+    //self.switch_surrogate();
+    //printf("Starting on %s mode\n", my_director_data.is_surrogate_on() ? "surrogate" : "vanilla");
+    my_director_data = self;
+}
+
+void director_fun(tw_pe * pe) {
+    //static int i = 0;
+    //if (g_tw_mynode == 0) {
+    //    printf(".");
+    //    fflush(stdout);
+    //    //printf("GVT %d at %f with snt_count=%d\n", i++, pe->GVT_sig.recv_ts, ping_msg_sent_count);
+    //}
+
+    // Do not process if the simulation ended
+    if (pe->GVT_sig.recv_ts >= g_tw_ts_end) {
+        return;
+    }
+
+    // Switching to and from surrogate mode at `switch_at`
+    int const switch_at[] = {10};
+    size_t const switch_total = sizeof(switch_at) / sizeof(switch_at[0]);
+    static size_t switch_i = 0;
+    if (switch_i < switch_total) {
+        // Finding the "largest" ping_msg_sent_count across all PEs
+        int max_msg_count = 0;
+        MPI_Allreduce(&ping_msg_sent_count, &max_msg_count, 1, MPI_INT, MPI_MAX, MPI_COMM_ROSS);
+        if (max_msg_count > switch_at[switch_i]) {
+            //printf("\nswitching");
+            my_director_data.switch_surrogate();
+            //printf(" to %s\n", my_director_data.is_surrogate_on() ? "surrogate" : "vanilla");
+            switch_i++;
+        }
+    }
 }
 //
 // === END OF surrogate functions
@@ -288,6 +344,16 @@ static void handle_pong_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp *
     s->pong_msg_recvd_count--; //undo the increment of the counter for ping messages received
 }
 
+static void svr_commit(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
+{
+    (void) b;
+    (void) lp;
+
+    if (s->svr_id == 0 && m->svr_event_type == PONG) {
+        ping_msg_sent_count = s->ping_msg_sent_count;
+    }
+}
+
 static void svr_finalize(svr_state * s, tw_lp * lp)
 {
     s->end_ts = tw_now(lp);
@@ -361,11 +427,13 @@ int main(int argc, char **argv)
 
     codes_comm_update();
 
+    //g_tw_gvt_arbitrary_fun = director_fun;
     dragonfly_dally_save_packet_latency_to_file("pingpong");
-    //dragonfly_dally_surrogate_configure((struct dragonfly_dally_surrogate_configure_st){
-    //    .director_init = director_init,
-    //    .latency_predictor = &latency_predictor
-    //});
+    dragonfly_dally_surrogate_configure((struct dragonfly_dally_surrogate_configure_st){
+        .director_init = director_init,
+        .director_call = director_fun,
+        .latency_predictor = &latency_predictor
+    });
 
     if(argc < 2)
     {
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 6c2b7011..28b5d2a5 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -185,7 +185,7 @@ endforeach()
 # configure_file(modelconfig/configparser.c ${CMAKE_CURRENT_BINARY_DIR}/modelconfig/configparser.c COPYONLY)
 # configure_file(modelconfig/configparser.h ${CMAKE_CURRENT_BINARY_DIR}/modelconfig/configparser.h COPYONLY)
 
-install(DIRECTORY "${CMAKE_SOURCE_DIR}/codes" DESTINATION "${CMAKE_BINARY_DIR}")
+install(DIRECTORY "${CMAKE_SOURCE_DIR}/codes" DESTINATION include)
 
 install(TARGETS ${CODES_TARGETS} DESTINATION bin)
 
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index b61a9840..d2d8496d 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -2228,6 +2228,7 @@ void dragonfly_dally_surrogate_configure(
         struct dragonfly_dally_surrogate_configure_st conf) {
 
     assert(conf.director_init != NULL);
+    assert(conf.director_call != NULL);
     assert(conf.latency_predictor != NULL);
     assert(conf.latency_predictor->init != NULL);
     assert(conf.latency_predictor->feed != NULL);
@@ -2239,11 +2240,12 @@ void dragonfly_dally_surrogate_configure(
         .switch_surrogate = switch_surrogate,
         .is_surrogate_on = is_surrogate_on_fun});
     terminal_predictor = conf.latency_predictor;
+    g_tw_gvt_arbitrary_fun = conf.director_call;
     
     surrogate_configured = true;
 }
 
-void dragonfly_dally_save_packet_latency_to_file(char * dir_to_save) {
+void dragonfly_dally_save_packet_latency_to_file(char const * const dir_to_save) {
     assert(packet_latency_f == NULL);
     // checking 
     int const NO_ERROR = 0;
@@ -2749,7 +2751,8 @@ static void process_packet_latencies(terminal_state * s, tw_lp * lp)
         }
         if (surrogate_configured && !is_surrogate_on) {
             assert(terminal_predictor != NULL);
-            terminal_predictor->feed(s->predictor_data, lp, s->terminal_id, s->sent_packets.front(), s->sent_packets_latency.top());
+            auto end = s->sent_packets_latency.top();
+            terminal_predictor->feed(s->predictor_data, lp, s->terminal_id, &s->sent_packets.front(), &end);
         }
         s->sent_packets.pop_front();
         s->sent_packets_latency.pop();
@@ -3063,7 +3066,7 @@ void terminal_dally_init( terminal_state * s, tw_lp * lp )
 
     // alloc'ing memory for predictor, calling initiliazer for predictor
     if (terminal_predictor != NULL && terminal_predictor->predictor_data_sz > 0) {
-        s->predictor_data = calloc(1, sizeof terminal_predictor->predictor_data_sz);
+        s->predictor_data = calloc(1, terminal_predictor->predictor_data_sz);
         terminal_predictor->init(s->predictor_data, lp, s->terminal_id);
     } else {
         s->predictor_data = NULL;
@@ -3400,15 +3403,15 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
     model_net_method_idle_event2(nic_ts, is_from_remote, msg->rail_id, lp);
 
     // Using predictor to find latency
-    double const latency = 
-        terminal_predictor->predict(s->predictor_data, lp, s->terminal_id,
-          {.packet_ID = msg->packet_ID,
+    auto start = (struct packet_start) {.packet_ID = msg->packet_ID,
            .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
            .travel_start_time = tw_now(lp)
-          });
+          };
+    double const latency = 
+        terminal_predictor->predict(s->predictor_data, lp, s->terminal_id, &start);
 
     // Sending packet directly to destination terminal
-    tw_stime const ts = 0;
+    //tw_stime const ts = 0;
     terminal_dally_message * m;
     void * remote_event;
     void const * const m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg);

From d7d972aec35570f128a9f03c6d37deb6380e634d Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 20 Jan 2023 16:49:40 -0500
Subject: [PATCH 008/188] Allowing code to compile when tie breaker is
 deactivated

---
 src/util/rc-stack.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/util/rc-stack.c b/src/util/rc-stack.c
index 8df52463..ebb2131f 100644
--- a/src/util/rc-stack.c
+++ b/src/util/rc-stack.c
@@ -16,7 +16,11 @@ enum rc_stack_mode {
 };
 
 typedef struct rc_entry_s {
+#ifdef USE_RAND_TIEBREAKER
     tw_event_sig e_sig; // ROSS 2D event timestamp (.recv_ts & .event_tiebreaker)
+#else
+    tw_stime time;
+#endif
     void * data;
     void (*free_fn)(void*);
     struct qlist_head ql;
@@ -63,7 +67,11 @@ void rc_stack_push(
     if (s->mode != RC_NONOPT || free_fn == NULL) {
         rc_entry * ent = (rc_entry*)malloc(sizeof(*ent));
         assert(ent);
+#ifdef USE_RAND_TIEBREAKER
         ent->e_sig = tw_now_sig(lp);
+#else
+        ent->time = tw_now(lp);
+#endif
         ent->data = data;
         ent->free_fn = free_fn;
         qlist_add_tail(&ent->ql, &s->head);

From 3fe1b8ed1d1145d5219f53f18dc71d98e0f063de Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 30 Jan 2023 15:37:39 -0500
Subject: [PATCH 009/188] Moving surrogate from ping-pong to global file

Now we can configure the surrogate via the config .conf file. All
workloads using dragonfly-dally have access to the surrogate now!
---
 codes/net/dragonfly-dally.h                   |  14 -
 codes/surrogate.h                             |  19 +-
 doc/example/CMakeLists.txt                    |   1 -
 doc/example/tutorial-ping-pong-surrogate.conf |  66 +++
 .../tutorial-synthetic-ping-pong-surrogate.c  | 482 ------------------
 doc/example/tutorial-synthetic-ping-pong.c    |   9 +-
 src/networks/model-net/dragonfly-dally.C      | 144 ++++--
 src/util/surrogate.c                          | 237 ++++++++-
 8 files changed, 418 insertions(+), 554 deletions(-)
 create mode 100644 doc/example/tutorial-ping-pong-surrogate.conf
 delete mode 100644 doc/example/tutorial-synthetic-ping-pong-surrogate.c

diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index 6ca36040..0f8698e5 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -121,22 +121,8 @@ struct terminal_dally_message
    tw_stime msg_start_time;
    tw_stime saved_busy_time_ross;
    tw_stime saved_fin_chunks_ross;
-
-   /* If we predict the latency from terminal to terminal of a packet, the event should be processed by the corresponding `_predicted` event handler */
-   bool is_predicted; // the event has been processed on surrogate mode ON
-};
-
-struct dragonfly_dally_surrogate_configure_st {
-    director_init_f                   director_init;
-    director_f                        director_call;
-    struct packet_latency_predictor * latency_predictor;
 };
 
-void dragonfly_dally_surrogate_configure(
-        struct dragonfly_dally_surrogate_configure_st);
-
-void dragonfly_dally_save_packet_latency_to_file(char const * const dir_to_save);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/codes/surrogate.h b/codes/surrogate.h
index 8148b43d..50ef48a7 100644
--- a/codes/surrogate.h
+++ b/codes/surrogate.h
@@ -24,6 +24,7 @@ struct packet_start {
     // tw_lpid dest_terminal_id;  // ROSS id; LPID for terminal
     unsigned int dfdally_dest_terminal_id; // number in [0, total terminals)
     double travel_start_time;
+    uint32_t packet_size;
 };
 
 struct packet_end {
@@ -57,12 +58,22 @@ typedef void (*switch_surrogate_f) (void); // Switches back and forth from surro
 typedef bool (*is_surrogate_on_f) (void); // Switches back and forth from surrogate mode as defined by network model (e.g, by dragonfly-dally.C)
 
 struct director_data {
-    switch_surrogate_f  switch_surrogate;
-    is_surrogate_on_f   is_surrogate_on;
+    switch_surrogate_f  switch_surrogate; // this function switches the model to and from surrogate-mode on a PE basis. It has to be called on all PEs to switch the entire simulation to its surrogate version
+    is_surrogate_on_f   is_surrogate_on;  // determines if the model has switched or not
 };
 
-typedef void (*director_init_f) (struct director_data self);
-typedef void (*director_f) (tw_pe * pe); // This is the function that is to be called at each GVT computation
+
+/**
+ * Configuration specifics
+ */
+
+/** Loads surrogate configuration, including packet latency predictor. */
+void surrogate_config(
+        const char * annotation,
+        const struct director_data d,  //!< functionality needed by the director to switch back and forth from model-level surrogate-mode to (vanilla) high-definition simulation
+        const int total_terminals,  //!< total number of terminals
+        struct packet_latency_predictor ** pl_pred //!< pointer to save packet latency predictor generated by. Caller must free it
+);
 
 #ifdef __cplusplus
 }
diff --git a/doc/example/CMakeLists.txt b/doc/example/CMakeLists.txt
index 2cd6c0c9..c3f00579 100644
--- a/doc/example/CMakeLists.txt
+++ b/doc/example/CMakeLists.txt
@@ -1,7 +1,6 @@
 set(example-files
     example
     tutorial-synthetic-ping-pong
-    tutorial-synthetic-ping-pong-surrogate
     )
 
 foreach(namefile ${example-files})
diff --git a/doc/example/tutorial-ping-pong-surrogate.conf b/doc/example/tutorial-ping-pong-surrogate.conf
new file mode 100644
index 00000000..ec5019be
--- /dev/null
+++ b/doc/example/tutorial-ping-pong-surrogate.conf
@@ -0,0 +1,66 @@
+LPGROUPS
+{
+   MODELNET_GRP
+   {
+      repetitions="36";
+# name of this lp changes according to the model
+      nw-lp="2";
+# these lp names will be the same for dragonfly-custom model
+      modelnet_dragonfly_dally="2";
+      modelnet_dragonfly_dally_router="1";
+   }
+}
+PARAMS
+{
+# packet size in the network
+   packet_size="4096";
+   modelnet_order=( "dragonfly_dally","dragonfly_dally_router" );
+   # scheduler options
+   modelnet_scheduler="fcfs";
+# chunk size in the network (when chunk size = packet size, packets will not be
+# divided into chunks)
+   chunk_size="64";
+# modelnet_scheduler="round-robin";
+# number of routers in group
+   num_routers="4";
+# number of groups in the network
+   num_groups="9";
+# buffer size in bytes for local virtual channels
+   local_vc_size="16384";
+#buffer size in bytes for global virtual channels
+   global_vc_size="16384";
+#buffer size in bytes for compute node virtual channels
+   cn_vc_size="32768";
+#bandwidth in GiB/s for local channels
+   local_bandwidth="2.0";
+# bandwidth in GiB/s for global channels
+   global_bandwidth="2.0";
+# bandwidth in GiB/s for compute node-router channels
+   cn_bandwidth="2.0";
+# ROSS message size
+   message_size="736";
+# number of compute nodes connected to router, dictated by dragonfly config
+# file
+   num_cns_per_router="2";
+# number of global channels per router
+   num_global_channels="2";
+# network config file for intra-group connections
+   intra-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally-72-intra";
+# network config file for inter-group connections
+   inter-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally-72-inter";
+# routing protocol to be used
+   routing="prog-adaptive";
+# folder path to store packet latency from terminal to terminal, if no value is given it won't save anything
+   save_packet_latency_path="packet-latency-trace/";
+}
+SURROGATE {
+# determines the director switching from surrogate to high-def simulation strategy
+   director_mode="at-fixed-virtual-times";
+
+# director configuration for: director_mode == "at-fixed-virtual-times"
+# timestamps at which to switch to surrogate-mode and back
+   fixed_switch_timestamps=( "100e4", "8900e4" );
+
+# latency predictor to use
+   packet_latency_predictor="average";
+}
diff --git a/doc/example/tutorial-synthetic-ping-pong-surrogate.c b/doc/example/tutorial-synthetic-ping-pong-surrogate.c
deleted file mode 100644
index 8d866318..00000000
--- a/doc/example/tutorial-synthetic-ping-pong-surrogate.c
+++ /dev/null
@@ -1,482 +0,0 @@
-/*
- * Copyright (C) 2019 Neil McGlohon
- * Mantained/edited by Elkin Cruz (2022-2023)
- * See LICENSE notice in top-level directory
- */
-
-#include "codes/model-net.h"
-#include "codes/codes_mapping.h"
-#include "codes/surrogate.h"
-#include "codes/net/dragonfly-dally.h"
-
-
-static int net_id = 0;
-static int PAYLOAD_SZ = 4096;
-static unsigned long long num_nodes = 0;
-
-static char lp_io_dir[256] = {'\0'};
-static lp_io_handle io_handle;
-static unsigned int lp_io_use_suffix = 0;
-static int do_lp_io = 0;
-
-static int num_msgs = 20;
-
-typedef struct svr_msg svr_msg;
-typedef struct svr_state svr_state;
-
-/* global variables for codes mapping */
-static char group_name[MAX_NAME_LENGTH];
-static char lp_type_name[MAX_NAME_LENGTH];
-static int group_index, lp_type_index, rep_id, offset;
-
-/* type of events */
-enum svr_event
-{
-    KICKOFF = 1,
-    PING,
-    PONG
-};
-
-struct svr_msg
-{
-    enum svr_event svr_event_type; //KICKOFF, PING, or PONG
-    int sender_id; //ID of the sender workload LP to know who to send a PONG message back to
-    int payload_value; //Some value that we will encode as an example
-    model_net_event_return event_rc; //helper to encode data relating to CODES rng usage
-};
-
-struct svr_state
-{
-    tw_lpid svr_id;            /* the ID of this server */
-    int ping_msg_sent_count;   /* PING messages sent */
-    int ping_msg_recvd_count;  /* PING messages received */
-    int pong_msg_sent_count;   /* PONG messages sent */
-    int pong_msg_recvd_count;  /* PONG messages received */
-    tw_stime start_ts;    /* time that this LP started sending requests */
-    tw_stime end_ts;      /* time that this LP ended sending requests */
-    int payload_sum;      /* the running sum of all payloads received */
-};
-
-/* declaration of functions */
-static void svr_init(svr_state * s, tw_lp * lp);
-static void svr_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp);
-static void svr_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp);
-static void svr_commit(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp);
-static void svr_finalize(svr_state * s, tw_lp * lp);
-static tw_stime ns_to_s(tw_stime ns);
-static tw_stime s_to_ns(tw_stime s);
-
-/* ROSS lptype function callback mapping */
-tw_lptype svr_lp = {
-    (init_f) svr_init,
-    (pre_run_f) NULL,
-    (event_f) svr_event,
-    (revent_f) svr_rev_event,
-    (commit_f) svr_commit,
-    (final_f)  svr_finalize,
-    (map_f) codes_mapping,
-    sizeof(svr_state),
-};
-
-const tw_optdef app_opt [] =
-{
-        TWOPT_GROUP("Model net synthetic traffic " ),
-    	TWOPT_UINT("num_messages", num_msgs, "Number of PING messages to be generated per terminal "),
-    	TWOPT_UINT("payload_sz",PAYLOAD_SZ, "size of the message being sent "),
-        TWOPT_CHAR("lp-io-dir", lp_io_dir, "Where to place io output (unspecified -> no output"),
-        TWOPT_UINT("lp-io-use-suffix", lp_io_use_suffix, "Whether to append uniq suffix to lp-io directory (default 0)"),
-        TWOPT_END()
-};
-
-const tw_lptype* svr_get_lp_type()
-{
-    return(&svr_lp);
-}
-
-static void svr_add_lp_type()
-{
-  lp_type_register("nw-lp", svr_get_lp_type());
-}
-
-// === START OF surrogate functions
-//
-#define N_TERMINALS 72
-struct latency_surrogate {
-    double sum_latency[N_TERMINALS];
-    unsigned int total_msgs[N_TERMINALS];
-};
-
-static void init_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal) {
-    (void) lp;
-    (void) src_terminal;
-    assert(data->sum_latency[0] == 0);
-    assert(data->total_msgs[0] == 0);
-}
-
-static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start * start, struct packet_end * end) {
-    (void) lp;
-    (void) src_terminal;
-
-    unsigned int const dest_terminal = start->dfdally_dest_terminal_id;
-    double const latency = end->travel_end_time - start->travel_start_time;
-    assert(dest_terminal < N_TERMINALS);
-
-    data->sum_latency[dest_terminal] += latency;
-    data->total_msgs[dest_terminal]++;
-}
-
-static double predict_latency(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start * packet_dest) {
-    (void) lp;
-
-    unsigned int const dest_terminal = packet_dest->dfdally_dest_terminal_id;
-    assert(dest_terminal < N_TERMINALS);
-
-    // In case we have any data to determine the average
-    unsigned int const total_datapoints = data->total_msgs[dest_terminal];
-    if (total_datapoints > 0) {
-        double const sum_latency = data->sum_latency[dest_terminal];
-        return sum_latency / total_datapoints;
-    }
-
-    // Otherwise, use "sensible" results from another simulation
-    // This assumes the network is a 72 nodes 1D-DragonFly (9 groups, with 4 routers, and 2 terminals per router)
-    // source and destination share the same router
-    if (src_terminal / 2 == dest_terminal / 2) {
-        return 2108.74;
-    }
-    // source and destination are in the same group
-    else if (src_terminal / 8 == dest_terminal / 8) {
-        return 2390.13;
-    }
-    // source and destination are in different groups
-    else {
-        return 4162.77;
-    }
-}
-
-static void predict_latency_rc(struct latency_surrogate * data, tw_lp * lp) {
-    (void) data;
-    (void) lp;
-}
-
-
-struct packet_latency_predictor latency_predictor = {
-    .init              = (init_pred_f) init_pred,
-    .feed              = (feed_pred_f) feed_pred,
-    .predict           = (predict_pred_f) predict_latency,
-    .predict_rc        = (predict_pred_rc_f) predict_latency_rc,
-    .predictor_data_sz = sizeof(struct latency_surrogate)
-};
-
-struct director_data my_director_data;
-int ping_msg_sent_count = 0;
-
-void director_init(struct director_data self) {
-    assert(! self.is_surrogate_on());
-    //self.switch_surrogate();
-    //printf("Starting on %s mode\n", my_director_data.is_surrogate_on() ? "surrogate" : "vanilla");
-    my_director_data = self;
-}
-
-void director_fun(tw_pe * pe) {
-    //static int i = 0;
-    //if (g_tw_mynode == 0) {
-    //    printf(".");
-    //    fflush(stdout);
-    //    //printf("GVT %d at %f with snt_count=%d\n", i++, pe->GVT_sig.recv_ts, ping_msg_sent_count);
-    //}
-
-    // Do not process if the simulation ended
-    if (pe->GVT_sig.recv_ts >= g_tw_ts_end) {
-        return;
-    }
-
-    // Switching to and from surrogate mode at `switch_at`
-    int const switch_at[] = {10};
-    size_t const switch_total = sizeof(switch_at) / sizeof(switch_at[0]);
-    static size_t switch_i = 0;
-    if (switch_i < switch_total) {
-        // Finding the "largest" ping_msg_sent_count across all PEs
-        int max_msg_count = 0;
-        MPI_Allreduce(&ping_msg_sent_count, &max_msg_count, 1, MPI_INT, MPI_MAX, MPI_COMM_ROSS);
-        if (max_msg_count > switch_at[switch_i]) {
-            //printf("\nswitching");
-            my_director_data.switch_surrogate();
-            //printf(" to %s\n", my_director_data.is_surrogate_on() ? "surrogate" : "vanilla");
-            switch_i++;
-        }
-    }
-}
-//
-// === END OF surrogate functions
-
-static void svr_init(svr_state * s, tw_lp * lp)
-{
-    //Initialize State
-    s->ping_msg_sent_count = 0;
-    s->ping_msg_recvd_count = 0;
-    s->pong_msg_sent_count = 0;
-    s->pong_msg_recvd_count = 0;
-    s->start_ts = 0.0;
-    s->end_ts = 0.0;
-    s->svr_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); /* turns the LP Global ID into the server ID */
-    s->payload_sum = 0;
-
-    //Now we create and send a self KICKOFF message - this is a PDES coordination event and thus doesn't need to be injected into the connected network
-    //so we won't use model_net_event(), that's reserved for stuff we want to send across the network
-
-    /* Set a time from now when this message is to be received by the recipient (self in this cae.) add some tiny random noise to help avoid event ties (different events with same timestamp) */
-    //the lookahead value is a value required for conservative mode execution to work, it prevents scheduling a new event within the lookahead window
-    tw_stime kickoff_time = g_tw_lookahead + (tw_rand_unif(lp->rng) * .0001);
-
-    tw_event *e;
-    svr_msg *m;
-    e = tw_event_new(lp->gid, kickoff_time, lp); //ROSS method to create a new event
-    m = tw_event_data(e); //Gives you a pointer to the data encoded within event e
-    m->svr_event_type = KICKOFF; //Set the event type so we can know how to classify the event when received
-    tw_event_send(e); //ROSS method to send off the event e with the encoded data in m
-}
-
-static void handle_kickoff_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
-{
-    (void) b;
-    // This bit is just for testing. It allows to send a PING event only to the first LP/server
-    //if (lp->gid != 0) {
-    //    return;
-    //}
-    s->start_ts = tw_now(lp); //the time when we're starting this LP's work is NOW
-
-    svr_msg ping_msg;
-
-    tw_lpid local_dest = -1; //ID of a sever, relative to only servers
-    tw_lpid global_dest = -1; //ID of a server LP relative to ALL LPs
-
-    //We want to make sure we're not accidentally picking ourselves
-    local_dest = tw_rand_integer(lp->rng, 1, num_nodes - 2);
-    local_dest = (s->svr_id + local_dest) % num_nodes;
-    //local_dest is now a number [0,num_nodes) but is assuredly not s->svr_id
-    assert(local_dest >= 0);
-    assert(local_dest < num_nodes);
-    assert(local_dest != s->svr_id);
-
-    ping_msg.sender_id = s->svr_id; //encode our server ID into the new ping message
-    ping_msg.svr_event_type = PING; //set it to type PING
-    ping_msg.payload_value = tw_rand_integer(lp->rng, 1, 10); //encode a random payload value to it from [1,10]
-
-    codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
-    global_dest = codes_mapping_get_lpid_from_relative(local_dest, group_name, lp_type_name, NULL, 0);
-    s->ping_msg_sent_count++;
-    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&ping_msg, 0, NULL, lp);
-}
-
-static void handle_kickoff_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
-{
-    (void) b;
-    model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
-    s->ping_msg_sent_count--; //undo the increment of the ping_msg_sent_count in the server state
-    tw_rand_reverse_unif(lp->rng); //reverse the rng call for creating a payload value;
-    tw_rand_reverse_unif(lp->rng); //reverse the rng call for getting a local_dest
-}
-
-static void handle_ping_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
-{
-    (void) b;
-    s->ping_msg_recvd_count++; //increment the counter for ping messages received
-
-    int original_sender = m->sender_id; //this is the server we need to send a PONG message back to
-    s->payload_sum += m->payload_value; //increment our running sum of payload values received
-
-    svr_msg pong_msg;
-    pong_msg.sender_id = s->svr_id;
-    pong_msg.svr_event_type = PONG;
-    // only ping messages contain a payload value - not every value in a message struct must be utilized by all messages!
-
-    codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
-    tw_lpid global_dest = codes_mapping_get_lpid_from_relative(original_sender, group_name, lp_type_name, NULL, 0);
-    s->pong_msg_sent_count++;
-    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&pong_msg, 0, NULL, lp);
-}
-
-static void handle_ping_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
-{
-    (void) b;
-    model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
-    s->pong_msg_sent_count--;
-    s->payload_sum -= m->payload_value; //undo the increment of the payload sum
-    s->ping_msg_recvd_count--; //undo the increment of the counter for ping messages received
-}
-
-static void handle_pong_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
-{
-    s->pong_msg_recvd_count++; //increment the counter for ping messages received
-
-    if(s->ping_msg_sent_count >= num_msgs) //if we've sent enough ping messages, then we stop and don't send any more
-    {
-        b->c1 = 1; //flag that we didn't really do anything in this event so that if this event gets reversed, we don't over-aggressively revert state or RNGs
-        return;
-    }
-
-    //Now we need to send another ping message, to someone new (just to spice the simulation)
-    tw_lpid send_to = tw_rand_integer(lp->rng, 1, num_nodes - 2);
-    send_to = (s->svr_id + send_to) % num_nodes;
-
-    svr_msg ping_msg;
-    ping_msg.sender_id = s->svr_id; //encode our server ID into the new ping message
-    ping_msg.svr_event_type = PING; //set it to type PING
-    ping_msg.payload_value = tw_rand_integer(lp->rng, 1, 10); //encode a random payload value to it
-
-    codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
-    tw_lpid global_dest = codes_mapping_get_lpid_from_relative(send_to, group_name, lp_type_name, NULL, 0);
-    s->ping_msg_sent_count++;
-    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&ping_msg, 0, NULL, lp);
-}
-
-static void handle_pong_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
-{
-    if (! b->c1) { //if we didn't flip the c1 flag in the forward event
-        model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
-        s->ping_msg_sent_count--;
-        tw_rand_reverse_unif(lp->rng); //undo the rng for the new payload value
-        tw_rand_reverse_unif(lp->rng); //undo the rng for the new server to send a ping to
-        b->c1 = 0;
-    }
-
-    s->pong_msg_recvd_count--; //undo the increment of the counter for ping messages received
-}
-
-static void svr_commit(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
-{
-    (void) b;
-    (void) lp;
-
-    if (s->svr_id == 0 && m->svr_event_type == PONG) {
-        ping_msg_sent_count = s->ping_msg_sent_count;
-    }
-}
-
-static void svr_finalize(svr_state * s, tw_lp * lp)
-{
-    s->end_ts = tw_now(lp);
-
-    int total_msgs_sent = s->ping_msg_sent_count + s->pong_msg_sent_count;
-    int total_msg_size_sent = PAYLOAD_SZ * total_msgs_sent;
-    tw_stime time_in_seconds_sent = ns_to_s(s->end_ts - s->start_ts);
-
-    printf("Sever LPID:%lu svr_id:%lu sent %d bytes in %f seconds, PINGs Sent: %d; PONGs Received: %d; PINGs Received: %d; PONGs Sent %d; Payload Sum: %d\n",
-            (unsigned long)lp->gid, (unsigned long)s->svr_id, total_msg_size_sent,
-            time_in_seconds_sent, s->ping_msg_sent_count, s->pong_msg_recvd_count, s->ping_msg_recvd_count, s->pong_msg_sent_count, s->payload_sum);
-}
-
-static void svr_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
-{
-    switch (m->svr_event_type)
-    {
-        case KICKOFF:
-            handle_kickoff_event(s, b, m, lp);
-            break;
-        case PING:
-            handle_ping_event(s, b, m, lp);
-            break;
-        case PONG:
-            handle_pong_event(s, b, m, lp);
-            break;
-        default:
-            tw_error(TW_LOC, "\n Invalid message type %d ", m->svr_event_type);
-            break;
-    }
-}
-
-static void svr_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
-{
-    switch (m->svr_event_type)
-    {
-        case KICKOFF:
-            handle_kickoff_rev_event(s, b, m, lp);
-            break;
-        case PING:
-            handle_ping_rev_event(s, b, m, lp);
-            break;
-        case PONG:
-            handle_pong_rev_event(s, b, m, lp);
-            break;
-        default:
-            tw_error(TW_LOC, "\n Invalid message type %d ", m->svr_event_type);
-            break;
-    }
-}
-
-/* convert ns to seconds */
-static tw_stime ns_to_s(tw_stime ns)
-{
-    return(ns / (1000.0 * 1000.0 * 1000.0));
-}
-static tw_stime s_to_ns(tw_stime s)
-{
-    return(s*1000.0*1000.0*1000.0);
-}
-
-int main(int argc, char **argv)
-{
-    int nprocs;
-    int rank;
-    int num_nets;
-    int *net_ids;
-
-    tw_opt_add(app_opt);
-    tw_init(&argc, &argv);
-
-    codes_comm_update();
-
-    //g_tw_gvt_arbitrary_fun = director_fun;
-    dragonfly_dally_save_packet_latency_to_file("pingpong");
-    dragonfly_dally_surrogate_configure((struct dragonfly_dally_surrogate_configure_st){
-        .director_init = director_init,
-        .director_call = director_fun,
-        .latency_predictor = &latency_predictor
-    });
-
-    if(argc < 2)
-    {
-            printf("\n Usage: mpirun <args> --sync=1/2/3 -- <config_file.conf> ");
-            MPI_Finalize();
-            return 0;
-    }
-
-    MPI_Comm_rank(MPI_COMM_CODES, &rank);
-    MPI_Comm_size(MPI_COMM_CODES, &nprocs);
-
-    configuration_load(argv[2], MPI_COMM_CODES, &config);
-
-    model_net_register();
-    svr_add_lp_type();
-
-    codes_mapping_setup();
-
-    net_ids = model_net_configure(&num_nets);
-    net_id = *net_ids;
-    free(net_ids);
-
-    /* 1 day of simulation time is drastically huge but it will ensure
-       that the simulation doesn't try to end before all packets are delivered */
-    g_tw_ts_end = s_to_ns(24 * 60 * 60);
-
-    num_nodes = codes_mapping_get_lp_count("MODELNET_GRP", 0, "nw-lp", NULL, 1);  //get the number of nodes so we can use this value during the simulation
-    assert(num_nodes);
-
-    if(lp_io_dir[0])
-    {
-        do_lp_io = 1;
-        int flags = lp_io_use_suffix ? LP_IO_UNIQ_SUFFIX : 0;
-        int ret = lp_io_prepare(lp_io_dir, flags, &io_handle, MPI_COMM_CODES);
-        assert(ret == 0 || !"lp_io_prepare failure");
-    }
-    tw_run();
-    if (do_lp_io){
-        int ret = lp_io_flush(io_handle, MPI_COMM_CODES);
-        assert(ret == 0 || !"lp_io_flush failure");
-    }
-    model_net_report_stats(net_id);
-
-    tw_end();
-    return 0;
-}
diff --git a/doc/example/tutorial-synthetic-ping-pong.c b/doc/example/tutorial-synthetic-ping-pong.c
index 4ae1ecbc..ab58b817 100644
--- a/doc/example/tutorial-synthetic-ping-pong.c
+++ b/doc/example/tutorial-synthetic-ping-pong.c
@@ -41,6 +41,8 @@ struct svr_msg
     int sender_id; //ID of the sender workload LP to know who to send a PONG message back to
     int payload_value; //Some value that we will encode as an example
     model_net_event_return event_rc; //helper to encode data relating to CODES rng usage
+    // Used for rollback
+    tw_stime previous_ts;
 };
 
 struct svr_state
@@ -231,8 +233,6 @@ static void handle_pong_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp *
 
 static void svr_finalize(svr_state * s, tw_lp * lp)
 {
-    s->end_ts = tw_now(lp);
-
     int total_msgs_sent = s->ping_msg_sent_count + s->pong_msg_sent_count;
     int total_msg_size_sent = PAYLOAD_SZ * total_msgs_sent;
     tw_stime time_in_seconds_sent = ns_to_s(s->end_ts - s->start_ts);
@@ -244,6 +244,9 @@ static void svr_finalize(svr_state * s, tw_lp * lp)
 
 static void svr_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
 {
+    m->previous_ts = s->end_ts;
+    s->end_ts = tw_now(lp);
+
     switch (m->svr_event_type)
     {
         case KICKOFF:
@@ -278,6 +281,8 @@ static void svr_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
             tw_error(TW_LOC, "\n Invalid message type %d ", m->svr_event_type);
             break;
     }
+
+    s->end_ts = m->previous_ts;
 }
 
 /* convert ns to seconds */
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index d2d8496d..8b937ca8 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -181,6 +181,8 @@ static char router_sample_file[MAX_NAME_LENGTH];
 // File to store packet latency from terminal-to-terminal
 // NOTE: Only non-predicted latencies are saved to file
 static FILE * packet_latency_f = NULL;
+static void setup_packet_latency_path(char const * const dir_to_save);
+
 
 // ==== START OF Parameters to tune surrogate mode ====
 // 
@@ -346,7 +348,9 @@ typedef enum event_t
     R_BW_HALT,
     T_BANDWIDTH,
     R_SNAPSHOT, //used for timed statistic outputs
-    T_NOTIFY_TOTAL_LATENCY,
+    T_NOTIFY_TOTAL_LATENCY,  // used to notify a terminal of the total delay of a packet
+    T_ARRIVE_PREDICTED,  // this event is generated by a latency predictor instead of traversing the network
+    T_VACUOUS_EVENT, // nothing happens with this event, it's just ment to be a dummy event
 } event_t;
 
 /* whether the last hop of a packet was global, local or a terminal */
@@ -1246,7 +1250,7 @@ static int dfdally_get_assigned_router_id_from_terminal(const dragonfly_param *p
     int num_rails = params->num_rails;
 
     int total_routers = params->total_routers;
-    int total_terminals = params->total_terminals;
+    //int total_terminals = params->total_terminals;
     int num_cn_per_router = params->num_cn;
 
     if(num_planes == 1) //then all rails go to the same router //TODO: this could change - could be cool!
@@ -2198,6 +2202,32 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
     }
     // END CONGESTION CONTROL
 
+    // Packet latency path to store configuration
+    char packet_latency_path[MAX_NAME_LENGTH];
+    packet_latency_path[0] = '\0';
+    configuration_get_value(&config, "PARAMS", "save_packet_latency_path", anno, packet_latency_path, MAX_NAME_LENGTH);
+    if(strlen(packet_latency_path) > 0) {
+        setup_packet_latency_path(packet_latency_path);
+    }
+
+    // START Surrogate configuration
+    char director_mode[MAX_NAME_LENGTH];
+    director_mode[0] = '\0';
+    int director_mode_len = configuration_get_value(&config, "SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH);
+    // if surrogate mode has been set up
+    if (director_mode_len > 0) {
+        surrogate_config(anno,
+                {.switch_surrogate = switch_surrogate, .is_surrogate_on = is_surrogate_on_fun},
+                p->total_terminals,
+                &terminal_predictor);
+        if (terminal_predictor) {
+            surrogate_configured = true;
+        } else {
+            tw_error(TW_LOC, "Latency predictor is NULL. Something during surrogate configuration failed.");
+        }
+    }
+    // END Surrogate configuration
+
     if (PRINT_CONFIG && !myRank) {
         dragonfly_print_params(p,stderr);
     }
@@ -2224,28 +2254,7 @@ void dragonfly_dally_configure() {
 
 }
 
-void dragonfly_dally_surrogate_configure(
-        struct dragonfly_dally_surrogate_configure_st conf) {
-
-    assert(conf.director_init != NULL);
-    assert(conf.director_call != NULL);
-    assert(conf.latency_predictor != NULL);
-    assert(conf.latency_predictor->init != NULL);
-    assert(conf.latency_predictor->feed != NULL);
-    assert(conf.latency_predictor->predict != NULL);
-    assert(conf.latency_predictor->predict_rc != NULL);
-    assert(! surrogate_configured);
-
-    conf.director_init({
-        .switch_surrogate = switch_surrogate,
-        .is_surrogate_on = is_surrogate_on_fun});
-    terminal_predictor = conf.latency_predictor;
-    g_tw_gvt_arbitrary_fun = conf.director_call;
-    
-    surrogate_configured = true;
-}
-
-void dragonfly_dally_save_packet_latency_to_file(char const * const dir_to_save) {
+static void setup_packet_latency_path(char const * const dir_to_save) {
     assert(packet_latency_f == NULL);
     // checking 
     int const NO_ERROR = 0;
@@ -2723,8 +2732,8 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess
 static void packet_latency_save_to_file(unsigned int terminal_id, struct packet_start start, struct packet_end end)
 {
     assert(start.packet_ID == end.packet_ID);
-    fprintf(packet_latency_f, "%u,%u,%lu,%f,%f,%f\n",
-            terminal_id, start.dfdally_dest_terminal_id, start.packet_ID,
+    fprintf(packet_latency_f, "%u,%u,%lu,%u,%f,%f,%f\n",
+            terminal_id, start.dfdally_dest_terminal_id, start.packet_ID, start.packet_size,
             start.travel_start_time, end.travel_end_time, end.travel_end_time - start.travel_start_time);
 }
 
@@ -3403,10 +3412,12 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
     model_net_method_idle_event2(nic_ts, is_from_remote, msg->rail_id, lp);
 
     // Using predictor to find latency
-    auto start = (struct packet_start) {.packet_ID = msg->packet_ID,
-           .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
-           .travel_start_time = tw_now(lp)
-          };
+    auto start = (struct packet_start) {
+        .packet_ID = msg->packet_ID,
+        .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
+        .travel_start_time = tw_now(lp),
+        .packet_size = msg->packet_size
+    };
     double const latency = 
         terminal_predictor->predict(s->predictor_data, lp, s->terminal_id, &start);
 
@@ -3422,7 +3433,7 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
         memcpy(remote_event, m_data_src, msg->remote_event_size_bytes);
     }
     m->magic = terminal_magic_num;
-    m->type = T_ARRIVE;
+    m->type = T_ARRIVE_PREDICTED;
     m->src_terminal_id = lp->gid;
     m->dfdally_src_terminal_id = s->terminal_id; //m->travel_start_time = tw_now(lp);
     //m->rail_id = msg->rail_id;
@@ -3734,7 +3745,8 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
     s->sent_packets.push_back({
         .packet_ID = msg->packet_ID,
         .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
-        .travel_start_time = tw_now(lp)});
+        .travel_start_time = tw_now(lp),
+        .packet_size = msg->packet_size});
 
     //qos stuff
     int num_qos_levels = s->params->num_qos_levels;
@@ -4124,16 +4136,32 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message *
     return;
 }
 
-static void notify_src_lp_on_total_latency(terminal_state * s, terminal_dally_message * msg, tw_lp * lp, tw_bf * bf)
+static void notify_src_lp_on_total_latency(terminal_state * s, terminal_dally_message * msg, tw_lp * lp)
 {
     terminal_dally_message * new_msg;
     tw_event *e = model_net_method_event_new(
             msg->src_terminal_id, g_tw_lookahead, lp, DRAGONFLY_DALLY, (void**)&new_msg, NULL);
 
-    memcpy(new_msg, msg, sizeof(terminal_dally_message));
-    new_msg->type = T_NOTIFY_TOTAL_LATENCY;
+    //memcpy(new_msg, msg, sizeof(terminal_dally_message));
+    //strcpy(new_msg->category, msg->category);
+    new_msg->type                    = T_NOTIFY_TOTAL_LATENCY;
+    new_msg->magic                   = terminal_magic_num;
+    new_msg->packet_ID               = msg->packet_ID;
+    new_msg->travel_end_time         = msg->travel_end_time;
+    new_msg->src_terminal_id         = msg->src_terminal_id;
+    new_msg->dfdally_src_terminal_id = msg->dfdally_src_terminal_id;
+    tw_event_send(e); 
+}
+
+// This function triggers an event that is completely ignored when processed later. The number of events produced by a terminal/router DOES alter the simulation results. (The number of events processed by an LP shouldn't be a parameter to the simulation itself, but it is weirdly).
+static void vacuous_msg_to_itself(terminal_state * s, terminal_dally_message * msg, tw_lp * lp)
+{
+    terminal_dally_message * new_msg;
+    tw_event *e = model_net_method_event_new(
+            lp->gid, g_tw_lookahead, lp, DRAGONFLY_DALLY, (void**)&new_msg, NULL);
+
+    new_msg->type  = T_VACUOUS_EVENT;
     new_msg->magic = terminal_magic_num;
-    strcpy(new_msg->category, msg->category);
     tw_event_send(e); 
 }
 
@@ -4573,7 +4601,16 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
         //assert(tmp->remote_event_data && tmp->remote_event_size > 0);
         if(tmp->remote_event_data && tmp->remote_event_size > 0) {
             if (packet_latency_f || surrogate_configured) {
-                notify_src_lp_on_total_latency(s, msg, lp, bf);
+                notify_src_lp_on_total_latency(s, msg, lp);
+            } else {
+                // This vacuous msg is necessary just to keep simulations with
+                // and without the latency notification the same. Notifying the
+                // latency does not impact the simulation (unless the data is
+                // fed to a predictor, later to be used). If the latency
+                // notification is deactivated, the simulation will produce
+                // the same number of events (a bit wasteful), a parameter
+                // that model-net or dragonfly-dally for some reason use :S
+                //vacuous_msg_to_itself(s, msg, lp);
             }
             send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size);
         }
@@ -5725,20 +5762,19 @@ terminal_dally_event( terminal_state * s,
         {
         case T_GENERATE:
             if (is_surrogate_on) {
-                msg->is_predicted = true;
+                bf->c10 = 1;
                 packet_generate_predicted(s,bf,msg,lp);
             } else {
-                msg->is_predicted = false;
                 packet_generate(s,bf,msg,lp);
             }
         break;
         
         case T_ARRIVE:
-            if (msg->is_predicted) {
-                packet_arrive_predicted(s,bf,msg,lp);
-            } else {
-                packet_arrive(s,bf,msg,lp);
-            }
+            packet_arrive(s,bf,msg,lp);
+        break;
+
+        case T_ARRIVE_PREDICTED:
+            packet_arrive_predicted(s,bf,msg,lp);
         break;
         
         case T_SEND:
@@ -5756,6 +5792,10 @@ terminal_dally_event( terminal_state * s,
         case T_NOTIFY_TOTAL_LATENCY:
         //    We don't process the message, we only store the message when committing
         break;
+
+        case T_VACUOUS_EVENT:
+        break;
+
         default:
             printf("\n LP %d Terminal message type not supported %d ", (int)lp->gid, msg->type);
             tw_error(TW_LOC, "Msg type not supported");
@@ -5824,7 +5864,7 @@ void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_da
     switch(msg->type)
     {
         case T_GENERATE:
-            if (msg->is_predicted) {
+            if (bf->c10) {
                 packet_generate_predicted_rc(s,bf,msg,lp);
             } else {
                 packet_generate_rc(s, bf, msg, lp); 
@@ -5836,11 +5876,11 @@ void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_da
             break;
 
         case T_ARRIVE:
-            if (msg->is_predicted) {
-                packet_arrive_predicted_rc(s, bf, msg, lp);
-            } else {
-                packet_arrive_rc(s, bf, msg, lp);
-            }
+            packet_arrive_rc(s, bf, msg, lp);
+            break;
+
+        case T_ARRIVE_PREDICTED:
+            packet_arrive_predicted_rc(s, bf, msg, lp);
             break;
 
         case T_BUFFER:
@@ -5855,11 +5895,15 @@ void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_da
         //    We don't process the message, we only store the message when committing
         break;
 
+        case T_VACUOUS_EVENT:
+        break;
+
         default:
             tw_error(TW_LOC, "\n Invalid terminal event type %d ", msg->type);
     }
     msg->num_cll = 0;
     msg->num_rngs = 0;
+    bf->c10 = 0;
 }
 
 /* Reverse computation handler for a router event */
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index 18dfb4d6..f0e618e8 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -1,3 +1,238 @@
+#include <assert.h>
+#include <codes/configuration.h>
+#include <codes/codes_mapping.h>
 #include <codes/surrogate.h>
 
-// This is file is empty because the header doesn't need a .c file. Yet, it exists to make sure that the header has all its includes fulfilled
+// Basic level of debugging is 1. It should be always turned on
+// because it tells us when a switch to or from surrogate-mode happened.
+// It can be deactivated (set to 0) if it ends up being too obnoxious
+// Level 0: don't show anything
+// Level 1: show when surrogate-mode is activated and deactivated
+// Level 2: level 1 and some information at each GVT
+// Level 3: level 1 and show extended information at each GVT
+#define DEBUG_DIRECTOR 1
+
+// Global variables
+int total_terminals = 0;
+
+// === Average packet latency functionality
+//
+struct aggregated_latency_one_terminal {
+    double sum_latency;
+    unsigned int total_msgs;
+};
+
+struct latency_surrogate {
+    struct aggregated_latency_one_terminal aggregated_latency_for_all;
+    unsigned int num_terminals;
+    struct aggregated_latency_one_terminal aggregated_latency[];
+};
+
+static void init_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal) {
+    (void) lp;
+    (void) src_terminal;
+    assert(data->num_terminals == 0);
+    assert(data->aggregated_latency_for_all.sum_latency == 0);
+    assert(data->aggregated_latency_for_all.total_msgs == 0);
+    assert(data->aggregated_latency[0].sum_latency == 0);
+    assert(data->aggregated_latency[0].total_msgs == 0);
+
+    data->num_terminals = total_terminals;
+}
+
+static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start * start, struct packet_end * end) {
+    (void) lp;
+    (void) src_terminal;
+
+    unsigned int const dest_terminal = start->dfdally_dest_terminal_id;
+    double const latency = end->travel_end_time - start->travel_start_time;
+    assert(dest_terminal < data->num_terminals);
+
+    data->aggregated_latency[dest_terminal].sum_latency += latency;
+    data->aggregated_latency[dest_terminal].total_msgs++;
+
+    data->aggregated_latency_for_all.sum_latency += latency;
+    data->aggregated_latency_for_all.total_msgs++;
+}
+
+static double predict_latency(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start * packet_dest) {
+    (void) lp;
+
+    unsigned int const dest_terminal = packet_dest->dfdally_dest_terminal_id;
+    assert(dest_terminal < data->num_terminals);
+
+    // In case we have any data to determine the average for a specific terminal
+    unsigned int const total_datapoints = data->aggregated_latency[dest_terminal].total_msgs;
+    if (total_datapoints > 0) {
+        double const sum_latency = data->aggregated_latency[dest_terminal].sum_latency;
+        return sum_latency / total_datapoints;
+    }
+
+    // If no information for that terminal exists, use average from all message
+    unsigned int const total_total_datapoints = data->aggregated_latency_for_all.total_msgs;
+    if (total_total_datapoints > 0) {
+        double const sum_latency = data->aggregated_latency_for_all.sum_latency;
+        return sum_latency / total_total_datapoints;
+    }
+
+    // otherwise, we have no data to approximate the latency
+    tw_error(TW_LOC, "The terminal %u doesn't have any packet delay information available to predict future packet latency!\n", src_terminal);
+    return -1.0;
+
+    // TODO(elkin): this (below) is wrong, bad bad. I'm not entirely sure how to do this rn in a non-hardcoded manner, but given time, this should be left in better terms
+    // THIS HAS BEEN HARDCODED FOR THE CASE OF 72-node DRAGONFLY
+
+    //// Otherwise, use "sensible" results from another simulation
+    //// This assumes the network is a 72 nodes 1D-DragonFly (9 groups, with 4 routers, and 2 terminals per router)
+    //// source and destination share the same router
+    //if (src_terminal / 2 == dest_terminal / 2) {
+    //    return 2108.74;
+    //}
+    //// source and destination are in the same group
+    //else if (src_terminal / 8 == dest_terminal / 8) {
+    //    return 2390.13;
+    //}
+    //// source and destination are in different groups
+    //else {
+    //    return 4162.77;
+    //}
+}
+
+static void predict_latency_rc(struct latency_surrogate * data, tw_lp * lp) {
+    (void) data;
+    (void) lp;
+}
+
+
+struct packet_latency_predictor average_latency_predictor = {
+    .init              = (init_pred_f) init_pred,
+    .feed              = (feed_pred_f) feed_pred,
+    .predict           = (predict_pred_f) predict_latency,
+    .predict_rc        = (predict_pred_rc_f) predict_latency_rc,
+    .predictor_data_sz = sizeof(struct latency_surrogate) + 72 * sizeof(struct aggregated_latency_one_terminal)
+};
+//
+// === END OF Average packet latency functionality
+
+
+// === Director functionality
+//
+struct director_data my_director_data;
+
+static struct {
+    size_t current_i;
+    size_t total;
+    double * time_stampts; // list of precise timestamps at which to switch
+} switch_at;
+
+
+void director_fun(tw_pe * pe) {
+    static int i = 0;
+    if (g_tw_mynode == 0) {
+        if (DEBUG_DIRECTOR == 2) {
+            printf(".");
+            fflush(stdout);
+        }
+        if (DEBUG_DIRECTOR == 3) {
+            printf("GVT %d at %f in %s\n", i++, pe->GVT_sig.recv_ts,
+                    my_director_data.is_surrogate_on() ? "surrogate-mode" : "high-definition");
+        }
+    }
+
+    // Do not process if the simulation ended
+    if (pe->GVT_sig.recv_ts >= g_tw_ts_end) {
+        return;
+    }
+
+    // Switching to and from surrogate mode at times determined by `switch_at`
+    if (switch_at.current_i < switch_at.total) {
+        double const now = pe->GVT_sig.recv_ts;
+        double const next_switch = switch_at.time_stampts[switch_at.current_i];
+        if (now > next_switch) {
+            if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
+                printf("\nswitching at %g", now);
+            }
+            my_director_data.switch_surrogate();
+            if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
+                printf(" to %s\n", my_director_data.is_surrogate_on() ? "surrogate" : "vanilla");
+            }
+            switch_at.current_i++;
+        }
+    }
+}
+//
+// === END OF Director functionality
+
+
+// === All things Surrogate Configuration
+void surrogate_config(
+        const char * anno,
+        const struct director_data d,
+        const int total_terminals_,
+        struct packet_latency_predictor ** pl_pred
+) {
+    // This is the only place where the director data should be setup
+    my_director_data = d;
+    total_terminals = total_terminals_;
+
+    // Determining which director mode to set up
+    char director_mode[MAX_NAME_LENGTH];
+    director_mode[0] = '\0';
+    configuration_get_value(&config, "SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH);
+    if (strcmp(director_mode, "at-fixed-virtual-times") == 0) {
+        if(!g_tw_mynode) {
+            fprintf(stderr, "\nSurrogate activated switching at fixed virtual times: ");
+        }
+
+        // Loading timestamps
+        char **timestamps;
+        size_t len;
+        configuration_get_multivalue(&config, "SURROGATE", "fixed_switch_timestamps", anno, &timestamps, &len);
+
+        switch_at.current_i = 0;
+        switch_at.total = len;
+        switch_at.time_stampts = malloc(len * sizeof(double));
+
+        for (size_t i = 0; i < len; i++) {
+            errno = 0;
+            switch_at.time_stampts[i] = strtod(timestamps[i], NULL);
+            if (errno == ERANGE || errno == EILSEQ){
+                tw_error(TW_LOC, "Sequence `%s' could not be succesfully interpreted as a _double_.", timestamps[i]);
+            }
+
+            if(!g_tw_mynode) {
+                fprintf(stderr, "%g%s", switch_at.time_stampts[i], i == len-1 ? "" : ", ");
+            }
+        }
+        if(!g_tw_mynode) {
+            fprintf(stderr, "\n");
+        }
+
+        // Injecting into ROSS function to be called at GVT
+        g_tw_gvt_arbitrary_fun = director_fun;
+
+        // freeing timestamps before it dissapears
+        for (size_t i = 0; i < len; i++) {
+            free(timestamps[i]);
+        }
+        free(timestamps);
+    } else {
+        tw_error(TW_LOC, "Unknown director mode `%s`", director_mode);
+    }
+
+    // Determining which predictor to set up and return
+    char latency_pred_name[MAX_NAME_LENGTH];
+    latency_pred_name[0] = '\0';
+    configuration_get_value(&config, "SURROGATE", "packet_latency_predictor", anno, latency_pred_name, MAX_NAME_LENGTH);
+    if (strcmp(latency_pred_name, "average") == 0) {
+        *pl_pred = &average_latency_predictor;
+    } else {
+        tw_error(TW_LOC, "Unknown predictor for packet latency `%s`", latency_pred_name);
+    }
+
+    //my_director_data.switch_surrogate();
+    if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
+        fprintf(stderr, "Simulation starting on %s mode\n", my_director_data.is_surrogate_on() ? "surrogate" : "vanilla");
+    }
+}
+// === END OF All things Surrogate Configuration

From eff87841e0e9f15b089a78714cb387184ba35ea8 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 2 Feb 2023 14:23:14 -0500
Subject: [PATCH 010/188] Tracking new parameter: timestamp at which a packet
 was injected in model-net by a workload

---
 codes/net/dragonfly-dally.h                   |  3 +--
 codes/surrogate.h                             |  1 +
 doc/example/tutorial-ping-pong-surrogate.conf |  6 ++++-
 .../determine_mean_std.py                     | 26 ++++++++++++-------
 .../sort-delays.py                            | 12 +++++----
 src/networks/model-net/dragonfly-dally.C      |  8 +++++-
 src/util/surrogate.c                          | 25 +++++++++++++++---
 7 files changed, 58 insertions(+), 23 deletions(-)

diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index 0f8698e5..4e9c1a8a 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -14,7 +14,6 @@ extern "C" {
 #include <ross.h>
 #include <stdbool.h>
 #include <model-net.h>
-#include <codes/surrogate.h>
 
 typedef struct terminal_dally_message terminal_dally_message;
 
@@ -23,7 +22,7 @@ struct terminal_dally_message
 {
   /* magic number */
   int magic;
-  /* flit travel start time*/
+  /* message travel start time*/
   tw_stime travel_start_time;
   /* flit travel end time*/
   tw_stime travel_end_time;
diff --git a/codes/surrogate.h b/codes/surrogate.h
index 50ef48a7..e417ab18 100644
--- a/codes/surrogate.h
+++ b/codes/surrogate.h
@@ -24,6 +24,7 @@ struct packet_start {
     // tw_lpid dest_terminal_id;  // ROSS id; LPID for terminal
     unsigned int dfdally_dest_terminal_id; // number in [0, total terminals)
     double travel_start_time;
+    double workload_injection_time; // this is when the workload passed down the event to model-net
     uint32_t packet_size;
 };
 
diff --git a/doc/example/tutorial-ping-pong-surrogate.conf b/doc/example/tutorial-ping-pong-surrogate.conf
index ec5019be..360f2294 100644
--- a/doc/example/tutorial-ping-pong-surrogate.conf
+++ b/doc/example/tutorial-ping-pong-surrogate.conf
@@ -1,3 +1,6 @@
+# Run this example with:
+# > cd path-to-codes/build
+# > mpirun -np 2 doc/example/tutorial-synthetic-ping-pong --synch=3 --num_messages=10000 -- ../doc/example/tutorial-ping-pong-surrogate.conf
 LPGROUPS
 {
    MODELNET_GRP
@@ -59,7 +62,8 @@ SURROGATE {
 
 # director configuration for: director_mode == "at-fixed-virtual-times"
 # timestamps at which to switch to surrogate-mode and back
-   fixed_switch_timestamps=( "100e4", "8900e4" );
+   #fixed_switch_timestamps=( "100e4", "8900e4" );  # the first switch happens at around 100 ping messages, the second at approx. 9900 pings
+   fixed_switch_timestamps=( "1000e4", "8900e4" );  # the first switch happens at around 1000 ping messages, the second at approx. 9900 pings
 
 # latency predictor to use
    packet_latency_predictor="average";
diff --git a/scripts/terminal-to-terminal-latency/determine_mean_std.py b/scripts/terminal-to-terminal-latency/determine_mean_std.py
index a8058f84..cc6bace5 100644
--- a/scripts/terminal-to-terminal-latency/determine_mean_std.py
+++ b/scripts/terminal-to-terminal-latency/determine_mean_std.py
@@ -8,6 +8,12 @@ def mean_and_std(array: np.array) -> tuple[float, float]:
 
 if __name__ == '__main__':
     delays = np.loadtxt("packets-delay.csv", skiprows=1, delimiter=",")
+    start_col = 5
+    delay_col = 6
+
+    # Filtering data to some interval
+    # delays = delays[np.bitwise_and(delays[:, start_col] > 250000,
+    #                                delays[:, start_col] + delays[:, delay_col] < 500000)]
 
     # Distribution
     delays_same_router = (delays[:, 0] // 2) == (delays[:, 1] // 2)
@@ -16,44 +22,44 @@ def mean_and_std(array: np.array) -> tuple[float, float]:
         delays_same_router)
     delays_out_group = (delays[:, 0] // 8) != (delays[:, 1] // 8)
 
-    mean, std = mean_and_std(delays[:, 4])
+    mean, std = mean_and_std(delays[:, delay_col])
     print(f"total mean: {mean:.2f} std: {std:.2f}")
     print()
 
     delays0 = delays[delays[:, 0] == 0]
-    mean, std = mean_and_std(delays0[:, 4])
+    mean, std = mean_and_std(delays0[:, delay_col])
     print(f"terminal 0 mean: {mean:.2f} std: {std:.2f}")
     print()
 
     fig, axs = plt.subplots(2, 2)
     axs[0, 0].set_title("Latency from all terminals to all")
     # axs[0, 0].set_xlabel("latency")
-    axs[0, 0].hist(delays[:, 4], bins=50, density=True, alpha=0.6, color='b')
+    axs[0, 0].hist(delays[:, delay_col], bins=50, density=True, alpha=0.6, color='b')
     axs[0, 1].set_title("Latency to terminals in same router")
     # axs[0, 1].set_xlabel("latency")
-    axs[0, 1].hist(delays[delays_same_router, 4], bins=50, density=True, alpha=0.6, color='b')
+    axs[0, 1].hist(delays[delays_same_router, delay_col], bins=50, density=True, alpha=0.6, color='b')
     axs[1, 0].set_title("Latency to terminals in same group")
     axs[1, 0].set_xlabel("latency")
-    axs[1, 0].hist(delays[delays_same_group, 4], bins=50, density=True, alpha=0.6, color='b')
+    axs[1, 0].hist(delays[delays_same_group, delay_col], bins=50, density=True, alpha=0.6, color='b')
     axs[1, 1].set_title("Latency to terminals in other groups")
     axs[1, 1].set_xlabel("latency")
-    axs[1, 1].hist(delays[delays_out_group, 4], bins=50, density=True, alpha=0.6, color='b')
+    axs[1, 1].hist(delays[delays_out_group, delay_col], bins=50, density=True, alpha=0.6, color='b')
     plt.show()
 
     buckets = [delays0[delays0[:, 1] == i] for i in range(1, 72)]
-    buckets_processed = np.array([mean_and_std(b[:, 4]) for b in buckets])
+    buckets_processed = np.array([mean_and_std(b[:, delay_col]) for b in buckets])
     print("Destination, Means and stds for terminal 0")
     for i, (mean, std) in enumerate(buckets_processed):
         print(f"{i+1}, {mean:.2f}, {std:.2f}")
     print()
 
-    mean, std = mean_and_std(delays[delays_same_router, 4])
+    mean, std = mean_and_std(delays[delays_same_router, delay_col])
     print(f"same router mean: {mean:.2f} std: {std:.2f}")
     print()
 
-    mean, std = mean_and_std(delays[delays_same_group, 4])
+    mean, std = mean_and_std(delays[delays_same_group, delay_col])
     print(f"same group mean: {mean:.2f} std: {std:.2f} (excluding same router)")
     print()
 
-    mean, std = mean_and_std(delays[delays_out_group, 4])
+    mean, std = mean_and_std(delays[delays_out_group, delay_col])
     print(f"other groups mean: {mean:.2f} std: {std:.2f}")
diff --git a/scripts/terminal-to-terminal-latency/sort-delays.py b/scripts/terminal-to-terminal-latency/sort-delays.py
index 28bff70c..68e9f369 100644
--- a/scripts/terminal-to-terminal-latency/sort-delays.py
+++ b/scripts/terminal-to-terminal-latency/sort-delays.py
@@ -21,7 +21,8 @@ def collect_data_numpy(
         print(f"No valid `{filepreffix}` files have been found in path {path}", file=sys.stderr)
         exit(1)
 
-    return np.loadtxt(fileinput.input(stat_files), delimiter=delimiter, dtype=dtype)
+    return np.loadtxt(fileinput.input(stat_files), delimiter=delimiter, dtype=dtype,
+                      comments='#')
 
 
 if __name__ == '__main__':
@@ -32,7 +33,8 @@ def collect_data_numpy(
     delays = delays[sorted_indx]
 
     # saving some columns
-    np.savetxt("packets-delay.csv", delays[:, (0, 1, 2, 3, 5)],
-               fmt="%d,%d,%d,%f,%f",
-               header='src_terminal,dst_terminal,packet_id,start_time,delay',
-               comments='')
+    np.savetxt(
+        "packets-delay.csv", delays[:, (0, 1, 2, 3, 4, 5, 7)],
+        fmt="%d,%d,%d,%d,%f,%f,%f",
+        header='src_terminal,dst_terminal,packet_id,packet_size,injection_time,start_time,delay',
+        comments='')
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 8b937ca8..9bf4e1ea 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -2276,6 +2276,8 @@ static void setup_packet_latency_path(char const * const dir_to_save) {
     if(!packet_latency_f) {
         tw_error(TW_LOC, "File %s could not be opened", filename_path);
     }
+
+    fprintf(packet_latency_f, "#src_terminal,dest_terminal,packet_id,size,workload_injection,start,end,latency\n");
 }
 
 /* report dragonfly statistics like average and maximum packet latency, average number of hops traversed */
@@ -2732,8 +2734,9 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess
 static void packet_latency_save_to_file(unsigned int terminal_id, struct packet_start start, struct packet_end end)
 {
     assert(start.packet_ID == end.packet_ID);
-    fprintf(packet_latency_f, "%u,%u,%lu,%u,%f,%f,%f\n",
+    fprintf(packet_latency_f, "%u,%u,%lu,%u,%f,%f,%f,%f\n",
             terminal_id, start.dfdally_dest_terminal_id, start.packet_ID, start.packet_size,
+            start.workload_injection_time,
             start.travel_start_time, end.travel_end_time, end.travel_end_time - start.travel_start_time);
 }
 
@@ -3416,6 +3419,7 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
         .packet_ID = msg->packet_ID,
         .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
         .travel_start_time = tw_now(lp),
+        .workload_injection_time = msg->msg_start_time,
         .packet_size = msg->packet_size
     };
     double const latency = 
@@ -3742,10 +3746,12 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
 
     // Storing packet info to be sent. Once packets arrive back, we can compute
     // the latency of sending the packet
+    //assert(tw_now(lp) == msg->travel_start_time);
     s->sent_packets.push_back({
         .packet_ID = msg->packet_ID,
         .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
         .travel_start_time = tw_now(lp),
+        .workload_injection_time = msg->msg_start_time,
         .packet_size = msg->packet_size});
 
     //qos stuff
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index f0e618e8..f5ad04cd 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -14,6 +14,7 @@
 
 // Global variables
 int total_terminals = 0;
+double ignore_until = 0;
 
 // === Average packet latency functionality
 //
@@ -44,6 +45,10 @@ static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int
     (void) lp;
     (void) src_terminal;
 
+    if (start->travel_start_time < ignore_until) {
+        return;
+    }
+
     unsigned int const dest_terminal = start->dfdally_dest_terminal_id;
     double const latency = end->travel_end_time - start->travel_start_time;
     assert(dest_terminal < data->num_terminals);
@@ -150,7 +155,10 @@ void director_fun(tw_pe * pe) {
         double const next_switch = switch_at.time_stampts[switch_at.current_i];
         if (now > next_switch) {
             if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
-                printf("\nswitching at %g", now);
+                if (DEBUG_DIRECTOR == 2) {
+                    printf("\n");
+                }
+                printf("switching at %g", now);
             }
             my_director_data.switch_surrogate();
             if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
@@ -180,7 +188,7 @@ void surrogate_config(
     director_mode[0] = '\0';
     configuration_get_value(&config, "SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH);
     if (strcmp(director_mode, "at-fixed-virtual-times") == 0) {
-        if(!g_tw_mynode) {
+        if(g_tw_mynode == 0) {
             fprintf(stderr, "\nSurrogate activated switching at fixed virtual times: ");
         }
 
@@ -200,11 +208,11 @@ void surrogate_config(
                 tw_error(TW_LOC, "Sequence `%s' could not be succesfully interpreted as a _double_.", timestamps[i]);
             }
 
-            if(!g_tw_mynode) {
+            if(g_tw_mynode == 0) {
                 fprintf(stderr, "%g%s", switch_at.time_stampts[i], i == len-1 ? "" : ", ");
             }
         }
-        if(!g_tw_mynode) {
+        if(g_tw_mynode == 0) {
             fprintf(stderr, "\n");
         }
 
@@ -226,6 +234,15 @@ void surrogate_config(
     configuration_get_value(&config, "SURROGATE", "packet_latency_predictor", anno, latency_pred_name, MAX_NAME_LENGTH);
     if (strcmp(latency_pred_name, "average") == 0) {
         *pl_pred = &average_latency_predictor;
+
+        // Finding out whether to ignore some packet latencies
+        int rc = configuration_get_value_double(&config, "SURROGATE", "ignore_until", anno, &ignore_until);
+        if (rc) {
+            ignore_until = -1; // any negative number disables ignore_until, all packet latencies will be considered
+        }
+        if (g_tw_mynode == 0) {
+            fprintf(stderr, "Enabling average packet latency predictor with ignore_until=%g\n", ignore_until);
+        }
     } else {
         tw_error(TW_LOC, "Unknown predictor for packet latency `%s`", latency_pred_name);
     }

From 64230f2d2bf22f99a4c5abafe5379f06643f8cc0 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 6 Feb 2023 18:51:00 -0500
Subject: [PATCH 011/188] Capturing in queue (input terminal buffer) delay per
 packet

---
 codes/model-net.h                             |  2 +
 codes/net/dragonfly-dally.h                   |  4 ++
 codes/surrogate.h                             |  1 +
 doc/example/tutorial-ping-pong.conf           |  2 +-
 .../determine_mean_std.py                     |  8 +--
 .../sort-delays.py                            |  7 +-
 src/networks/model-net/core/model-net-lp.c    |  3 +-
 src/networks/model-net/dragonfly-dally.C      | 72 +++++++++++++++----
 8 files changed, 76 insertions(+), 23 deletions(-)

diff --git a/codes/model-net.h b/codes/model-net.h
index 577ae5ae..7071e2c9 100644
--- a/codes/model-net.h
+++ b/codes/model-net.h
@@ -130,6 +130,8 @@ typedef struct model_net_request {
     tw_lpid  src_lp;
     // time the source event was called
     tw_stime msg_start_time;
+    // time the event was created
+    tw_stime msg_new_mn_event;
     uint64_t msg_size;
     uint64_t pull_size;
     uint64_t packet_size;
diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index 4e9c1a8a..440c7485 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -120,6 +120,10 @@ struct terminal_dally_message
    tw_stime msg_start_time;
    tw_stime saved_busy_time_ross;
    tw_stime saved_fin_chunks_ross;
+
+   // To use in rollback calls
+   tw_stime saved_last_in_queue_time;
+   tw_stime msg_new_mn_event;
 };
 
 #ifdef __cplusplus
diff --git a/codes/surrogate.h b/codes/surrogate.h
index e417ab18..eda13519 100644
--- a/codes/surrogate.h
+++ b/codes/surrogate.h
@@ -25,6 +25,7 @@ struct packet_start {
     unsigned int dfdally_dest_terminal_id; // number in [0, total terminals)
     double travel_start_time;
     double workload_injection_time; // this is when the workload passed down the event to model-net
+    double delay_at_queue_head;  // delay for this packet to be processed from previous packet in the queue
     uint32_t packet_size;
 };
 
diff --git a/doc/example/tutorial-ping-pong.conf b/doc/example/tutorial-ping-pong.conf
index ada5ccbd..8ac8a9dd 100644
--- a/doc/example/tutorial-ping-pong.conf
+++ b/doc/example/tutorial-ping-pong.conf
@@ -19,7 +19,7 @@ PARAMS
    modelnet_scheduler="fcfs";
 # chunk size in the network (when chunk size = packet size, packets will not be
 # divided into chunks)
-   chunk_size="4096";
+   chunk_size="64";
 # modelnet_scheduler="round-robin";
 # number of routers in group
    num_routers="4";
diff --git a/scripts/terminal-to-terminal-latency/determine_mean_std.py b/scripts/terminal-to-terminal-latency/determine_mean_std.py
index cc6bace5..aeef7750 100644
--- a/scripts/terminal-to-terminal-latency/determine_mean_std.py
+++ b/scripts/terminal-to-terminal-latency/determine_mean_std.py
@@ -8,12 +8,12 @@ def mean_and_std(array: np.array) -> tuple[float, float]:
 
 if __name__ == '__main__':
     delays = np.loadtxt("packets-delay.csv", skiprows=1, delimiter=",")
-    start_col = 5
-    delay_col = 6
+    start_col = 8
+    delay_col = 9
 
     # Filtering data to some interval
-    # delays = delays[np.bitwise_and(delays[:, start_col] > 250000,
-    #                                delays[:, start_col] + delays[:, delay_col] < 500000)]
+    delays = delays[np.bitwise_and(delays[:, start_col] > 200e3,
+                                   delays[:, start_col] + delays[:, delay_col] < 500e3)]
 
     # Distribution
     delays_same_router = (delays[:, 0] // 2) == (delays[:, 1] // 2)
diff --git a/scripts/terminal-to-terminal-latency/sort-delays.py b/scripts/terminal-to-terminal-latency/sort-delays.py
index 68e9f369..a75d6358 100644
--- a/scripts/terminal-to-terminal-latency/sort-delays.py
+++ b/scripts/terminal-to-terminal-latency/sort-delays.py
@@ -34,7 +34,8 @@ def collect_data_numpy(
 
     # saving some columns
     np.savetxt(
-        "packets-delay.csv", delays[:, (0, 1, 2, 3, 4, 5, 7)],
-        fmt="%d,%d,%d,%d,%f,%f,%f",
-        header='src_terminal,dst_terminal,packet_id,packet_size,injection_time,start_time,delay',
+        "packets-delay.csv", delays[:, (0, 1, 2, 3, 4, 5, 6, 7, 8, 10)],
+        fmt="%d,%d,%d,%d,%d,%d,%f,%f,%f,%f",
+        header='src_terminal,dst_terminal,packet_id,is_surrogate_on,is_predicted,'
+               'packet_size,injection_time,delay_at_queue_head,start_time,delay',
         comments='')
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index d469fa10..10e22bd3 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -133,7 +133,7 @@ static void model_net_commit_event(model_net_base_state * ns, tw_bf *b,  model_n
     {
         void * sub_msg;
         sub_msg = ((char*)m)+msg_offsets[ns->net_id];
-    
+
         if(ns->sub_type->commit != NULL)
             ns->sub_type->commit(ns->sub_state, b, sub_msg, lp);
     }
@@ -755,6 +755,7 @@ void handle_new_msg(
     // simply pass down to the scheduler
     model_net_request *r = &m->msg.m_base.req;
     // don't forget to set packet size, now that we're responsible for it!
+    r->msg_new_mn_event = tw_now(lp);
     r->packet_size = ns->params->packet_size;
     r->msg_id = ns->msg_id++;
     void * m_data = m+1;
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 9bf4e1ea..99fadd13 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -573,6 +573,8 @@ struct terminal_state
     // to store in order efficiently their arrival)
     priority_queue<struct packet_end, vector<struct packet_end>, decltype(packet_end_greater_cmp)> sent_packets_latency;
 
+    // Stores the last time in which a packet was processed (time at which a T_GENERATE event was processed)
+    double last_in_queue_time;
     // Predictor data
     void * predictor_data;
 };
@@ -2277,7 +2279,7 @@ static void setup_packet_latency_path(char const * const dir_to_save) {
         tw_error(TW_LOC, "File %s could not be opened", filename_path);
     }
 
-    fprintf(packet_latency_f, "#src_terminal,dest_terminal,packet_id,size,workload_injection,start,end,latency\n");
+    fprintf(packet_latency_f, "#src_terminal,dest_terminal,packet_id,is_surrogate_on,is_predicted,size,workload_injection,delay_at_queue_head,start,end,latency\n");
 }
 
 /* report dragonfly statistics like average and maximum packet latency, average number of hops traversed */
@@ -2731,12 +2733,18 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess
     return -1;
 }
 
-static void packet_latency_save_to_file(unsigned int terminal_id, struct packet_start start, struct packet_end end)
-{
+static void packet_latency_save_to_file(
+        unsigned int terminal_id,
+        struct packet_start start,
+        struct packet_end end,
+        bool is_predicted
+) {
     assert(start.packet_ID == end.packet_ID);
-    fprintf(packet_latency_f, "%u,%u,%lu,%u,%f,%f,%f,%f\n",
-            terminal_id, start.dfdally_dest_terminal_id, start.packet_ID, start.packet_size,
-            start.workload_injection_time,
+    fprintf(packet_latency_f, "%u,%u,%lu,%d,%d,%u,%f,%f,%f,%f,%f\n",
+            terminal_id, start.dfdally_dest_terminal_id, start.packet_ID,
+            is_surrogate_on, is_predicted,
+            start.packet_size,
+            start.workload_injection_time, start.delay_at_queue_head,
             start.travel_start_time, end.travel_end_time, end.travel_end_time - start.travel_start_time);
 }
 
@@ -2759,7 +2767,7 @@ static void process_packet_latencies(terminal_state * s, tw_lp * lp)
         && s->sent_packets.front().packet_ID == s->sent_packets_latency.top().packet_ID)
     {
         if (packet_latency_f) {
-            packet_latency_save_to_file(s->terminal_id, s->sent_packets.front(), s->sent_packets_latency.top());
+            packet_latency_save_to_file(s->terminal_id, s->sent_packets.front(), s->sent_packets_latency.top(), false);
         }
         if (surrogate_configured && !is_surrogate_on) {
             assert(terminal_predictor != NULL);
@@ -3083,6 +3091,7 @@ void terminal_dally_init( terminal_state * s, tw_lp * lp )
     } else {
         s->predictor_data = NULL;
     }
+    s->last_in_queue_time = 0;
     return;
 }
 
@@ -3340,6 +3349,7 @@ static tw_stime dragonfly_dally_packet_event(
     msg->pull_size = req->pull_size;
     msg->magic = terminal_magic_num; 
     msg->msg_start_time = req->msg_start_time;
+    msg->msg_new_mn_event = req->msg_new_mn_event;
     msg->rail_id = req->queue_offset;
     msg->app_id = req->app_id;
 
@@ -3370,6 +3380,8 @@ static void packet_generate_predicted_rc(terminal_state * s, tw_bf * bf, termina
     stat->send_bytes -= msg->packet_size;
     stat->send_time -= (1/s->params->cn_bandwidth) * msg->packet_size;
 
+    s->last_in_queue_time = msg->saved_last_in_queue_time;
+
     terminal_predictor->predict_rc(s->predictor_data, lp);
 
     s->packet_counter--;
@@ -3398,8 +3410,8 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
 
     // determining injection delay
     tw_stime injection_ts;
-    double bandwidth_coef = 1;
     if (g_congestion_control_enabled) {
+        double bandwidth_coef = 1;
         if (cc_terminal_is_abatement_active(s->local_congestion_controller)) {
             bandwidth_coef = cc_terminal_get_current_injection_bandwidth_coef(s->local_congestion_controller);
         }
@@ -3409,22 +3421,36 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
         injection_ts = bytes_to_ns(msg->packet_size, s->params->cn_bandwidth);
     }
     tw_stime const nic_ts = injection_ts;
-
-    // Scheduling idle event to allow next message to be sent
-    bool const is_from_remote = false;
-    model_net_method_idle_event2(nic_ts, is_from_remote, msg->rail_id, lp);
+    //printf("injection_ts = %f\n", injection_ts);
 
     // Using predictor to find latency
+    tw_stime const time_at_queue_head = msg->msg_new_mn_event > s->last_in_queue_time ? msg->msg_new_mn_event : s->last_in_queue_time;
     auto start = (struct packet_start) {
         .packet_ID = msg->packet_ID,
         .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
         .travel_start_time = tw_now(lp),
         .workload_injection_time = msg->msg_start_time,
-        .packet_size = msg->packet_size
+        .delay_at_queue_head = tw_now(lp) - time_at_queue_head,
+        .packet_size = msg->packet_size,
     };
+
+    // Scheduling idle event for next packet to be processed
+    bool const is_from_remote = false;
+    // TODO(helq): estimate from data collected before, new nic_ts
+    model_net_method_idle_event2(nic_ts, is_from_remote, msg->rail_id, lp);
+    msg->saved_last_in_queue_time = s->last_in_queue_time;
+    s->last_in_queue_time = tw_now(lp);
+
     double const latency = 
         terminal_predictor->predict(s->predictor_data, lp, s->terminal_id, &start);
 
+    // Saving
+    auto end = (struct packet_end) {
+        .packet_ID = msg->packet_ID,
+        .travel_end_time = tw_now(lp) + latency,
+    };
+    packet_latency_save_to_file(s->terminal_id, start, end, true);
+
     // Sending packet directly to destination terminal
     //tw_stime const ts = 0;
     terminal_dally_message * m;
@@ -3525,6 +3551,12 @@ static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_me
         if(bf->c8)
             s->last_buf_full[msg->rail_id] = msg->saved_busy_time;
     }
+
+    if (bf->c13) {
+        s->last_in_queue_time = msg->saved_last_in_queue_time;
+        bf->c13 = 0;
+    }
+
     struct mn_stats* stat;
     stat = model_net_find_stats(msg->category, s->dragonfly_stats_array);
     stat->send_count--;
@@ -3747,12 +3779,15 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
     // Storing packet info to be sent. Once packets arrive back, we can compute
     // the latency of sending the packet
     //assert(tw_now(lp) == msg->travel_start_time);
+    tw_stime const time_at_queue_head = msg->msg_new_mn_event > s->last_in_queue_time ? msg->msg_new_mn_event : s->last_in_queue_time;
     s->sent_packets.push_back({
         .packet_ID = msg->packet_ID,
         .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
         .travel_start_time = tw_now(lp),
         .workload_injection_time = msg->msg_start_time,
-        .packet_size = msg->packet_size});
+        .delay_at_queue_head = tw_now(lp) - time_at_queue_head,
+        .packet_size = msg->packet_size,
+        });
 
     //qos stuff
     int num_qos_levels = s->params->num_qos_levels;
@@ -3837,6 +3872,9 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
             if(s->terminal_length[j][vcg] < s->params->cn_vc_size && s->issueIdle[j] == 0)
             {
                 model_net_method_idle_event2(nic_ts, 0, j, lp);
+                msg->saved_last_in_queue_time = s->last_in_queue_time;
+                s->last_in_queue_time = tw_now(lp);
+                bf->c13 = 1;
             }
             else
             {
@@ -3855,6 +3893,9 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
     else {
         if (s->terminal_length[msg->rail_id][vcg] < s->params->cn_vc_size) {
             model_net_method_idle_event2(nic_ts, 0, msg->rail_id, lp);
+            msg->saved_last_in_queue_time = s->last_in_queue_time;
+            s->last_in_queue_time = tw_now(lp);
+            bf->c13 = 1;
         } else {
             bf->c11 = 1;
             s->issueIdle[msg->rail_id] = 1;
@@ -3956,6 +3997,7 @@ static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_dally_messag
     if(bf->c5)
     {
         s->issueIdle[msg->rail_id] = 1;
+        s->last_in_queue_time = msg->saved_last_in_queue_time;
         if(bf->c6)
         {
             s->busy_time[msg->rail_id] = msg->saved_total_time;
@@ -4123,6 +4165,8 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message *
         bf->c5 = 1;
         s->issueIdle[msg->rail_id] = 0;
         model_net_method_idle_event2(injection_ts, 0, msg->rail_id, lp);
+        msg->saved_last_in_queue_time = s->last_in_queue_time;
+        s->last_in_queue_time = tw_now(lp);
     
         if(s->last_buf_full[msg->rail_id] > 0.0)
         {

From 6e23955850c3edf22b62d0e2b87cd084e0fca1b6 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 20 Feb 2023 17:00:22 -0500
Subject: [PATCH 012/188] Initial implementation of network freezing

This requires the introduction of zombie events which inform the
terminals of what packets to ignore but to keep simulating their
behaviour anyway.
---
 codes/lp-type-lookup.h                     |   2 +
 codes/model-net-lp.h                       |  11 +
 codes/net/dragonfly-dally.h                |   8 +-
 codes/surrogate.h                          |  41 +-
 doc/example/tutorial-synthetic-ping-pong.c |   8 +-
 src/networks/model-net/core/model-net-lp.c |  84 ++++
 src/networks/model-net/dragonfly-dally.C   | 465 ++++++++++++++++++---
 src/util/lp-type-lookup.c                  |   2 -
 src/util/surrogate.c                       | 394 +++++++++++++++--
 9 files changed, 898 insertions(+), 117 deletions(-)

diff --git a/codes/lp-type-lookup.h b/codes/lp-type-lookup.h
index 1fc11483..b8799e2e 100644
--- a/codes/lp-type-lookup.h
+++ b/codes/lp-type-lookup.h
@@ -14,6 +14,8 @@ extern "C" {
 
 #include "ross.h"
 
+#define MAX_LP_TYPES 64
+
 /* look up the lp type registered through lp_type_register. Mostly used
  * internally */
 const tw_lptype* lp_type_lookup(const char* name);
diff --git a/codes/model-net-lp.h b/codes/model-net-lp.h
index f713caaa..faed58c7 100644
--- a/codes/model-net-lp.h
+++ b/codes/model-net-lp.h
@@ -118,6 +118,16 @@ tw_event* model_net_method_congestion_event(tw_lpid dest_gid,
     void **msg_data,
     void **extra_data);
 
+// Function to call when switching from highdef to surrogate
+void model_net_method_switch_to_surrogate(tw_lp * lp);
+
+// Function to call when switching from surrogate to highdef
+void model_net_method_switch_to_highdef(tw_lp * lp);
+
+// It will call the function (pointer) on the internal structure/network model.
+// The lp parameter has to be a model-net lp. The function pointer has to coincide with the underlying subtype
+void model_net_method_call_inner(tw_lp * lp, void (*) (void * inner, tw_lp * lp));
+
 /// The following functions/data structures should not need to be used by
 /// model developers - they are just provided so other internal components can
 /// use them
@@ -142,6 +152,7 @@ typedef struct model_net_base_msg {
     model_net_request req;
     int is_from_remote;
     int isQueueReq;
+    int created_during_surrogate; // if the MN_BASE_SCHED_NEXT event was created in surrogate mode, this variable contains the surrogate iteration at which it was created; otherwise it's -1
     tw_stime save_ts;
     // parameters to pass to new messages (via model_net_set_msg_params)
     // TODO: make this a union for multiple types of parameters
diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index 440c7485..8c285287 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -28,12 +28,12 @@ struct terminal_dally_message
   tw_stime travel_end_time;
  /* packet ID of the flit  */
   unsigned long long packet_ID;
-  /* event type of the flit */
-  short  type;
+  /* event type of the flit. Actual type is `enum dfdally_event_t` */
+  short type;
+  /* if the type==T_NOTIFY then we have to find out what type of notification is it. Actual type is `enum dfdally_notify_t` */
+  short notify_type;
   /* category: comes from codes */
   char category[CATEGORY_NAME_MAX];
-  /* store category hash in the event */
-  uint32_t category_hash;
   /* final destination LP ID, this comes from codes can be a server or any other LP type*/
   tw_lpid final_dest_gid;
   /*sending LP ID from CODES, can be a server or any other LP type */
diff --git a/codes/surrogate.h b/codes/surrogate.h
index eda13519..afbb17bd 100644
--- a/codes/surrogate.h
+++ b/codes/surrogate.h
@@ -9,11 +9,23 @@
  */
 #include <ross.h>
 #include <stdbool.h>
+#include "codes/codes_mapping.h"
+#include "codes/lp-type-lookup.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+/**
+ * Variable definitions
+ */
+
+// When true (below), the network state will be frozen at switch time (from
+// high-def to surrogate) and later reanimated on the switch back (from
+// surrogate to high-def). If not, all events will be kept in the network while
+// on surrogate mode, which means that the network will vacate completely
+#define FREEZE_NETWORK_STATE 1
+
 /**
  * Terminal-to-terminal packet latency prediction machinery
  */
@@ -21,12 +33,14 @@ extern "C" {
 // Packet latencies
 struct packet_start {
     uint64_t packet_ID;
-    // tw_lpid dest_terminal_id;  // ROSS id; LPID for terminal
+    tw_lpid dest_terminal_lpid;  // ROSS id; LPID for terminal
     unsigned int dfdally_dest_terminal_id; // number in [0, total terminals)
     double travel_start_time;
     double workload_injection_time; // this is when the workload passed down the event to model-net
     double delay_at_queue_head;  // delay for this packet to be processed from previous packet in the queue
     uint32_t packet_size;
+    void * message_data;  // Yep, we have to save the entire message just because we might need to resend the message when switching to surrogate-mode. It's wasteful but there is no other way
+    void * remote_event_data;  // This and the one above have to be freed. This contains the extra information that the message contains
 };
 
 struct packet_end {
@@ -69,11 +83,28 @@ struct director_data {
  * Configuration specifics
  */
 
+typedef void (*model_switch_f) (void * data, tw_lp * lp); // Switches back and forth from surrogate mode as defined by network model (e.g, by dragonfly-dally.C)
+typedef void (*model_ask_if_freeze_f) (void * data_model, void * data_model_net, tw_lp * lp, bool * ret); // Determines whether the event should be "frozen" or should be allowed to run during surrogate-mode
+
+struct lp_types_switch {
+    char lpname[MAX_NAME_LENGTH];
+    bool is_modelnet;
+    model_switch_f        highdef_to_surrogate;
+    model_switch_f        surrogate_to_highdef;
+    model_ask_if_freeze_f should_event_be_frozen;
+};
+
+struct surrogate_config {
+    struct director_data director;  //!< functionality needed by the director to switch back and forth from model-level surrogate-mode to (vanilla) high-definition simulation
+    int total_terminals;  //!< total number of terminals
+    size_t n_lp_types;
+    struct lp_types_switch lp_types[MAX_LP_TYPES];
+};
+
 /** Loads surrogate configuration, including packet latency predictor. */
-void surrogate_config(
-        const char * annotation,
-        const struct director_data d,  //!< functionality needed by the director to switch back and forth from model-level surrogate-mode to (vanilla) high-definition simulation
-        const int total_terminals,  //!< total number of terminals
+void surrogate_configure(
+        char const * const annotation,
+        struct surrogate_config * const config,
         struct packet_latency_predictor ** pl_pred //!< pointer to save packet latency predictor generated by. Caller must free it
 );
 
diff --git a/doc/example/tutorial-synthetic-ping-pong.c b/doc/example/tutorial-synthetic-ping-pong.c
index ab58b817..070624e4 100644
--- a/doc/example/tutorial-synthetic-ping-pong.c
+++ b/doc/example/tutorial-synthetic-ping-pong.c
@@ -302,6 +302,10 @@ int main(int argc, char **argv)
     int num_nets;
     int *net_ids;
 
+    /* 1 day of simulation time is drastically huge but it will ensure
+       that the simulation doesn't try to end before all packets are delivered */
+    g_tw_ts_end = s_to_ns(24 * 60 * 60);
+
     tw_opt_add(app_opt);
     tw_init(&argc, &argv);
 
@@ -328,10 +332,6 @@ int main(int argc, char **argv)
     net_id = *net_ids;
     free(net_ids);
 
-    /* 1 day of simulation time is drastically huge but it will ensure
-       that the simulation doesn't try to end before all packets are delivered */
-    g_tw_ts_end = s_to_ns(24 * 60 * 60);
-
     num_nodes = codes_mapping_get_lp_count("MODELNET_GRP", 0, "nw-lp", NULL, 1);  //get the number of nodes so we can use this value during the simulation
     assert(num_nodes);
 
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 10e22bd3..3fb7aa62 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -12,6 +12,7 @@
 #include "codes/model-net-sched.h"
 #include "codes/codes_mapping.h"
 #include "codes/jenkins-hash.h"
+#include "codes/surrogate.h"
 
 #define MN_NAME "model_net_base"
 
@@ -39,6 +40,8 @@ typedef struct model_net_base_params_s {
 static int                       num_params = 0;
 static const char              * annos[CONFIGURATION_MAX_ANNOS];
 static model_net_base_params     all_params[CONFIGURATION_MAX_ANNOS];
+static bool is_surrogate_on = false;
+static int num_surrogate = 0;
 
 static tw_stime mn_sample_interval = 0.0;
 static tw_stime mn_sample_end = 0.0;
@@ -63,6 +66,8 @@ typedef struct model_net_base_state {
     void *sub_state;
     tw_stime next_available_time;
     tw_stime *node_copy_next_available_time;
+    // Copy of in_sched_send_loop before switching to surrogate mode
+    int * sched_loop_pre_surrogate, sched_recv_loop_pre_surrogate;
 } model_net_base_state;
 
 
@@ -504,6 +509,7 @@ void model_net_base_lp_init(
     }
 
     ns->in_sched_send_loop = (int *)malloc(ns->params->num_queues * sizeof(int));
+    ns->sched_loop_pre_surrogate = (int *)malloc(ns->params->num_queues * sizeof(int));
     ns->sched_send = (model_net_sched**)malloc(ns->params->num_queues * sizeof(model_net_sched*));
     for(int i = 0; i < ns->params->num_queues; i++) {
         ns->sched_send[i] = (model_net_sched*)malloc(sizeof(model_net_sched));
@@ -815,6 +821,7 @@ void handle_new_msg(
 #if DEBUG
         printf("%llu handle_shed_next() from handle_new_msg()\n",LLU(tw_now(lp)));
 #endif
+        m->msg.m_base.created_during_surrogate = is_surrogate_on ? num_surrogate : -1;
         handle_sched_next(ns, b, m, lp);
         assert(*in_sched_loop);
     }
@@ -862,6 +869,16 @@ void handle_sched_next(
 #if DEBUG
     printf("%llu handle sched_next function\n",LLU(tw_now(lp)));
 #endif
+    if (FREEZE_NETWORK_STATE) {
+        // This event should not be processed outside of the surrogate environment it was created, and it must be processed if it was generated during vanilla high-def simulation mode
+        bool const from_same_surrogate_instance = is_surrogate_on && m->msg.m_base.created_during_surrogate == num_surrogate;
+        bool const highdef_created_during_highdef = !is_surrogate_on && m->msg.m_base.created_during_surrogate == -1;
+        if (!from_same_surrogate_instance && !highdef_created_during_highdef) {
+            b->c12 = 1;
+            return;
+        }
+    }
+
     tw_stime poffset;
     model_net_request *r = &m->msg.m_base.req;
     int is_from_remote = m->msg.m_base.is_from_remote;
@@ -901,6 +918,13 @@ void handle_sched_next_rc(
         tw_bf *b,
         model_net_wrap_msg * m,
         tw_lp * lp){
+
+    // Handling event was skipped
+    if (b->c12) {
+        b->c12 = 0;
+        return;
+    }
+
     model_net_request *r = &m->msg.m_base.req;
     int is_from_remote = m->msg.m_base.is_from_remote;
     model_net_sched * ss = is_from_remote ? ns->sched_recv : ns->sched_send[r->queue_offset];
@@ -1008,6 +1032,7 @@ void model_net_method_idle_event2(tw_stime offset_ts, int is_recv_queue,
     msg_set_header(model_net_base_magic, MN_BASE_SCHED_NEXT, lp->gid,
             &m_wrap->h);
     m_wrap->msg.m_base.is_from_remote = is_recv_queue;
+    m_wrap->msg.m_base.created_during_surrogate = is_surrogate_on ? num_surrogate : -1;
     r_wrap->queue_offset = queue_offset;
     tw_event_send(e);
 }
@@ -1082,6 +1107,65 @@ tw_event* model_net_method_congestion_event(tw_lpid dest_gid,
 
 }
 
+void model_net_method_switch_to_surrogate(tw_lp * lp) {
+    model_net_base_state * const ns = (model_net_base_state*) lp->cur_state;
+
+    //printf("PID %d in_sched_send_loop = [", lp->gid);
+    for (int i = 0; i < ns->params->num_queues; i++) {
+        //printf("%d ", ns->in_sched_send_loop[i]);
+        ns->sched_loop_pre_surrogate[i] = ns->in_sched_send_loop[i];
+        // scheduling an idle event to prevent getting stuck in the middle of a scheduling loop
+        if (ns->sched_loop_pre_surrogate[i]) {
+            // TODO: change zero-offset event for something a bit more sensible
+            model_net_method_idle_event(0.0, 0, lp);
+        }
+        ns->in_sched_send_loop[i] = 0;
+    }
+    //printf("]\n");
+
+    ns->sched_recv_loop_pre_surrogate = ns->in_sched_recv_loop;
+    if (ns->in_sched_recv_loop) {
+        model_net_method_idle_event(0.0, 1, lp);
+    }
+    ns->in_sched_recv_loop = 0;
+
+    is_surrogate_on = true;
+    num_surrogate++;
+}
+
+void model_net_method_switch_to_highdef(tw_lp * lp) {
+    model_net_base_state * const ns = (model_net_base_state*) lp->cur_state;
+
+    //printf("PID %d in_sched_send_loop = [", lp->gid);
+    for (int i = 0; i < ns->params->num_queues; i++) {
+        //printf("%d ", ns->in_sched_send_loop[i]);
+        // We have to duplicate an idle event that was produced in surrogate-mode, but not yet processed by the time we switch to high-def again, if that event was in the middle of the loop (asking for the next packet to inject) and in no other case
+        // TODO: THIS MUST HAVE SOME SECONDARY EFFECT THAT WILL PROVOQUE A CURRUPT SIMULATION, but not so far. Which is weird
+        //model_net_method_idle_event(0.0, 0, lp);
+        if (ns->in_sched_send_loop[i] == 1 && ns->sched_loop_pre_surrogate[i] == 0) {
+            model_net_method_idle_event(0.0, 0, lp);
+        }// else {
+        ns->in_sched_send_loop[i] = ns->sched_loop_pre_surrogate[i];
+        //    ns->in_sched_send_loop[i] = ns->sched_loop_pre_surrogate[i];
+        //}
+    }
+    //printf("]\n");
+
+    if (ns->in_sched_recv_loop == 1 && ns->sched_recv_loop_pre_surrogate == 0) {
+        model_net_method_idle_event(0.0, 1, lp);
+    } //else {
+        ns->in_sched_recv_loop = ns->sched_recv_loop_pre_surrogate;
+    //}
+
+    is_surrogate_on = false;
+}
+
+void model_net_method_call_inner(tw_lp * lp, void (*fun) (void * inner, tw_lp * lp)) {
+    model_net_base_state * const ns = (model_net_base_state*) lp->cur_state;
+
+    fun(ns->sub_state, lp);
+}
+
 /*
  * Local variables:
  *  c-indent-level: 4
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 99fadd13..94752b0f 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -77,6 +77,10 @@
 #define LP_CONFIG_NM_ROUT (model_net_lp_config_names[DRAGONFLY_DALLY_ROUTER])
 #define LP_METHOD_NM_ROUT (model_net_method_names[DRAGONFLY_DALLY_ROUTER])
 
+/* handles terminal and router events like packet generate/send/receive/buffer */
+typedef struct terminal_state terminal_state;
+typedef struct router_state router_state;
+
 static int max_lvc_src_g = 1;
 static int max_lvc_intm_g = 3;
 static int min_gvc_src_g = 0;
@@ -191,6 +195,8 @@ static bool is_surrogate_on = false;
 static struct packet_latency_predictor * terminal_predictor = NULL;
 static void switch_surrogate(void);
 static bool is_surrogate_on_fun(void);
+static void dragonfly_dally_terminal_highdef_to_surrogate(terminal_state * s, tw_lp * lp);
+static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw_lp * lp);
 //
 // ==== END OF Parameters to tune surrogate mode ====
 
@@ -335,7 +341,7 @@ typedef enum route_scoring_metric_t
 } route_scoring_metric_t;
 
 /* Enumeration of types of events sent between model LPs */
-typedef enum event_t
+enum event_t
 {
     T_GENERATE=1,
     T_ARRIVE,
@@ -348,10 +354,17 @@ typedef enum event_t
     R_BW_HALT,
     T_BANDWIDTH,
     R_SNAPSHOT, //used for timed statistic outputs
-    T_NOTIFY_TOTAL_LATENCY,  // used to notify a terminal of the total delay of a packet
+    T_NOTIFY,  // used to notify a source or destination terminal about packets status (useful for informing about latency, zombie packet or delete a zombie packet)
     T_ARRIVE_PREDICTED,  // this event is generated by a latency predictor instead of traversing the network
     T_VACUOUS_EVENT, // nothing happens with this event, it's just ment to be a dummy event
-} event_t;
+};
+
+// Types of notifications between terminals
+enum notify_t {
+    NOTIFY_LATENCY,  // Notifying the source terminal of the total latency to deliver the packet
+    NOTIFY_ZOMBIE,   // Notifying the destination terminal of a packet that should be treated as a zombie
+    NOTIFY_ZOMBIE_DEL,  // Notifying the destination terminal that the packet we thought was a zombie was in fact not (just to clean space)
+};
 
 /* whether the last hop of a packet was global, local or a terminal */
 enum last_hop
@@ -467,9 +480,15 @@ static struct {
     }
 } packet_end_greater_cmp;
 
-/* handles terminal and router events like packet generate/send/receive/buffer */
-typedef struct terminal_state terminal_state;
-typedef struct router_state router_state;
+struct packet_id {
+    uint64_t packet_ID;
+    unsigned int dfdally_src_terminal_id;
+};
+bool operator<(struct packet_id const &lk, struct packet_id const &rk) {
+    return lk.packet_ID == rk.packet_ID ? lk.dfdally_src_terminal_id < rk.dfdally_src_terminal_id : lk.packet_ID < rk.packet_ID;
+}
+// Some more function declarations
+static void notify_dest_lp_of(terminal_state * s, tw_lp * lp, terminal_dally_message * msg, enum notify_t notification);
 
 /* dragonfly compute node data structure */
 struct terminal_state
@@ -575,8 +594,14 @@ struct terminal_state
 
     // Stores the last time in which a packet was processed (time at which a T_GENERATE event was processed)
     double last_in_queue_time;
-    // Predictor data
+    // The predictor kicks in on surrogate mode and predicts the time a packet will take to its destination
     void * predictor_data;
+
+    // Zombie events appear when the network traffic is displaced to the future. By then, all packets that were in the network should have been already delievered, thus zombies
+    set<struct packet_id> zombies;
+
+    // Events that will arrive to this terminal
+    set<struct packet_id> arrived_here;
 };
 
 struct router_state
@@ -2218,10 +2243,23 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
     int director_mode_len = configuration_get_value(&config, "SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH);
     // if surrogate mode has been set up
     if (director_mode_len > 0) {
-        surrogate_config(anno,
-                {.switch_surrogate = switch_surrogate, .is_surrogate_on = is_surrogate_on_fun},
-                p->total_terminals,
-                &terminal_predictor);
+        struct surrogate_config surr_conf = {
+            .director = {.switch_surrogate = switch_surrogate, .is_surrogate_on = is_surrogate_on_fun},
+            .total_terminals = p->total_terminals,
+            .n_lp_types = 2,
+            .lp_types = {
+                {.lpname = "modelnet_dragonfly_dally",
+                 .is_modelnet = true,
+                 .highdef_to_surrogate = (model_switch_f) dragonfly_dally_terminal_highdef_to_surrogate,
+                 .surrogate_to_highdef = (model_switch_f) dragonfly_dally_terminal_surrogate_to_highdef},
+                {.lpname = "modelnet_dragonfly_dally_router",
+                 .is_modelnet = true,
+                 .highdef_to_surrogate = NULL,
+                 .surrogate_to_highdef = NULL},
+                0
+            }
+        };
+        surrogate_configure(anno, &surr_conf, &terminal_predictor);
         if (terminal_predictor) {
             surrogate_configured = true;
         } else {
@@ -2386,7 +2424,7 @@ static void dragonfly_dally_router_congestion_event_commit(router_state *s, tw_b
     cc_router_local_congestion_event_commit(s->local_congestion_controller, bf, msg, lp);
 }
 
-int get_vcg_from_category(terminal_dally_message * msg)
+static int get_vcg_from_category(terminal_dally_message * msg)
 {
    if(strcmp(msg->category, "high") == 0)
        return Q_HIGH;
@@ -2439,7 +2477,7 @@ static int get_rtr_bandwidth_consumption(router_state * s, int qos_lvl, int outp
     return percent_bw;
 }
 
-void issue_bw_monitor_event_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
+static void issue_bw_monitor_event_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
 {
     int num_qos_levels = s->params->num_qos_levels;
     int num_rails = s->params->num_rails;
@@ -2463,7 +2501,7 @@ void issue_bw_monitor_event_rc(terminal_state * s, tw_bf * bf, terminal_dally_me
     
 }
 /* resets the bandwidth numbers recorded so far */
-void issue_bw_monitor_event(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
+static void issue_bw_monitor_event(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
 {
     int num_qos_levels = s->params->num_qos_levels;
     int num_rails = s->params->num_rails;
@@ -2507,7 +2545,7 @@ void issue_bw_monitor_event(terminal_state * s, tw_bf * bf, terminal_dally_messa
     }
 }
 
-void issue_rtr_bw_monitor_event_rc(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp)
+static void issue_rtr_bw_monitor_event_rc(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp)
 {
     int radix = s->params->radix;
     int num_qos_levels = s->params->num_qos_levels;
@@ -2528,7 +2566,8 @@ void issue_rtr_bw_monitor_event_rc(router_state *s, tw_bf *bf, terminal_dally_me
         msg->rc_is_qos_set = 0;
     }
 }
-void issue_rtr_bw_monitor_event(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp)
+
+static void issue_rtr_bw_monitor_event(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp)
 {
     int radix = s->params->radix;
     int num_qos_levels = s->params->num_qos_levels;
@@ -2752,7 +2791,6 @@ static void packet_latency_save_to_file(
 
 static void switch_surrogate(void) {
     is_surrogate_on = ! is_surrogate_on;
-    // TODO: `sent_packets` and `sent_packets_latency` have to be cleaned on switches. This won't be an apparent problem until switching to and from surrogate mode happens in a very short amount of time
 }
 
 static bool is_surrogate_on_fun(void) {
@@ -2766,18 +2804,121 @@ static void process_packet_latencies(terminal_state * s, tw_lp * lp)
         && !s->sent_packets_latency.empty()
         && s->sent_packets.front().packet_ID == s->sent_packets_latency.top().packet_ID)
     {
+        auto start = s->sent_packets.front();
         if (packet_latency_f) {
-            packet_latency_save_to_file(s->terminal_id, s->sent_packets.front(), s->sent_packets_latency.top(), false);
+            packet_latency_save_to_file(s->terminal_id, start, s->sent_packets_latency.top(), false);
         }
         if (surrogate_configured && !is_surrogate_on) {
             assert(terminal_predictor != NULL);
             auto end = s->sent_packets_latency.top();
-            terminal_predictor->feed(s->predictor_data, lp, s->terminal_id, &s->sent_packets.front(), &end);
+            terminal_predictor->feed(s->predictor_data, lp, s->terminal_id, &start, &end);
+        }
+
+        // Deallocating memory
+        if (start.message_data) {
+            free(start.message_data);
+        }
+        if (start.remote_event_data) {
+            free(start.remote_event_data);
         }
+
         s->sent_packets.pop_front();
         s->sent_packets_latency.pop();
     }
 }
+
+// This function never rollsback because it's called at GVT
+static void dragonfly_dally_terminal_highdef_to_surrogate(terminal_state * s, tw_lp * lp) {
+    process_packet_latencies(s, lp);
+
+    // Going through every packet that was sent but not yet received, remove it
+    // from the list, send it to its destination using the predictor, and
+    // notify of its zombie status.
+    while(!s->sent_packets.empty()) {
+        struct packet_start start = s->sent_packets.front();
+        s->sent_packets.pop_front();
+        assert(start.message_data);
+
+        // The predictor is asked to predict the latency of the packet regardless if it is a zombie or not.
+        // (This makes it so that we feed the predictor only during high-def mode, and never a switching time)
+        double latency = 
+            terminal_predictor->predict(s->predictor_data, lp, s->terminal_id, &start);
+
+        // The packet was delievered and its latency is known (we were notified). Delete packet from stack
+        if (!s->sent_packets_latency.empty() && start.packet_ID == s->sent_packets_latency.top().packet_ID) {
+            auto const end = s->sent_packets_latency.top();
+            s->sent_packets_latency.pop();
+            packet_latency_save_to_file(s->terminal_id, start, end, false);
+        } else {
+            // The packet has not been delievered, or we haven't received the notification yet.
+            // Send directly to destination and notify of zombie event
+
+            double arrival = start.travel_start_time + latency;
+            if (arrival < tw_now(lp)) {
+                arrival = tw_now(lp);
+                latency = 0;
+            }
+            
+            auto const end = (struct packet_end) {
+                .packet_ID = start.packet_ID,
+                .travel_end_time = arrival,
+            };
+            packet_latency_save_to_file(s->terminal_id, start, end, true);
+
+            assert(start.message_data);
+            terminal_dally_message * const msg_data = (terminal_dally_message*) start.message_data;
+            terminal_dally_message * m;
+            void * remote_event;
+            tw_event * const e = model_net_method_event_new(
+                    start.dest_terminal_lpid, latency, lp, DRAGONFLY_DALLY, (void**)&m, &remote_event);
+            memcpy(m, msg_data, sizeof(terminal_dally_message));
+            if (m->remote_event_size_bytes) {
+                memcpy(remote_event, start.remote_event_data, m->remote_event_size_bytes);
+            }
+            m->magic = terminal_magic_num;
+            m->type = T_ARRIVE_PREDICTED;
+            m->src_terminal_id = lp->gid;
+            //m->travel_start_time = tw_now(lp);
+            //m->rail_id = msg->rail_id;
+            //m->vc_index = vcg;
+            //m->last_hop = TERMINAL;
+            m->path_type = -1;
+            m->local_event_size_bytes = 0;
+            m->is_intm_visited = 0;
+            m->intm_grp_id = -1;
+            m->intm_rtr_id = -1; //for legacy prog-adaptive
+            assert(m->dfdally_src_terminal_id  == s->terminal_id);
+            assert(m->packet_ID                == start.packet_ID);
+            assert(m->dest_terminal_lpid       == start.dest_terminal_lpid);
+            assert(m->dfdally_dest_terminal_id == start.dfdally_dest_terminal_id);
+            //assert(m->travel_start_time        >= start.travel_start_time);
+            assert(m->packet_size              == start.packet_size);
+            tw_event_send(e);
+
+            //printf("NOTIFYING of zombie: packet dest id %d dest gid %d\n", start.dest_terminal_lpid, start.dfdally_dest_terminal_id);
+            notify_dest_lp_of(s, lp, m, NOTIFY_ZOMBIE);
+        }
+
+        // Deallocating memory from packet_start
+        if (start.message_data) {
+            free(start.message_data);
+        }
+        if (start.remote_event_data) {
+            free(start.remote_event_data);
+        }
+    }
+
+    // TODO: Find out how to schedule an idle event (AND how to remove one when rolling back!)
+
+    assert(s->sent_packets_latency.empty());
+};
+
+// This function never rollsback because it's called at GVT
+static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw_lp * lp) {
+    (void) s;
+    (void) lp;
+    //printf("Terminal %d (PID: %d) switching back to high-def at %e\n", s->terminal_id, lp->gid, tw_now(lp));
+};
 //
 // ==== END OF Surrogate functions definition ====
 
@@ -2786,7 +2927,7 @@ static void process_packet_latencies(terminal_state * s, tw_lp * lp)
 //this storage place could be in the event or elsewehre so long as the data is over-writeable
 //in case the event gets rolled back and replayed.
 //On commit of the snapshot event, the commit function looks where the data was stored and outputs to lpio
-void router_send_snapshot_events(router_state *s, tw_lp *lp)
+static void router_send_snapshot_events(router_state *s, tw_lp *lp)
 {
     int len = sprintf(snapshot_filename, "dragonfly-snapshots.csv");
     snapshot_filename[len] = '\0';
@@ -2815,7 +2956,7 @@ void router_send_snapshot_events(router_state *s, tw_lp *lp)
     // printf("%d: sending snapshot events\n",s->router_id);
 }
 
-void router_handle_snapshot_event(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp)
+static void router_handle_snapshot_event(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp)
 {
     for(int i = 0; i < s->params->radix; i++)
     {
@@ -2829,7 +2970,7 @@ void router_handle_snapshot_event(router_state *s, tw_bf *bf, terminal_dally_mes
     }
 }
 
-void terminal_dally_commit(terminal_state * s,
+static void terminal_dally_commit(terminal_state * s,
 		tw_bf * bf, 
 		terminal_dally_message * msg, 
         tw_lp * lp)
@@ -2862,20 +3003,24 @@ void terminal_dally_commit(terminal_state * s,
         }
     }
 
-    if(msg->type == T_NOTIFY_TOTAL_LATENCY)
+    if(msg->type == T_NOTIFY && msg->notify_type == NOTIFY_LATENCY)
     {
         assert(lp->gid == msg->src_terminal_id);
         assert(s->terminal_id == msg->dfdally_src_terminal_id);
-        // TODO(helq): assert that msg->packet_ID to be present in s->sent_packets
+        if (!s->sent_packets.empty() && s->sent_packets.front().packet_ID <= msg->packet_ID) {
+            s->sent_packets_latency.push({
+                    .packet_ID = msg->packet_ID,
+                    .travel_end_time = msg->travel_end_time});
 
-        s->sent_packets_latency.push({
-                .packet_ID = msg->packet_ID,
-                .travel_end_time = msg->travel_end_time});
-        process_packet_latencies(s, lp);
+            process_packet_latencies(s, lp);
+        } else {
+            // The notification for zombie deletion should have been sent
+            assert(bf->c14);
+        }
     }
 }
 
-void router_dally_commit(router_state * s,
+static void router_dally_commit(router_state * s,
 		tw_bf * bf, 
 		terminal_dally_message * msg, 
         tw_lp * lp)
@@ -2934,7 +3079,7 @@ void router_dally_commit(router_state * s,
 }
 
 /* initialize a dragonfly compute node terminal */
-void terminal_dally_init( terminal_state * s, tw_lp * lp )
+static void terminal_dally_init( terminal_state * s, tw_lp * lp )
 {
     s->packet_gen = 0;
     s->packet_fin = 0;
@@ -3083,6 +3228,8 @@ void terminal_dally_init( terminal_state * s, tw_lp * lp )
     // (see https://en.cppreference.com/w/cpp/memory/construct_at)
     new (&s->sent_packets) deque<struct packet_start>();
     new (&s->sent_packets_latency) priority_queue<struct packet_end, vector<struct packet_end>, decltype(packet_end_greater_cmp)>();
+    new (&s->zombies) set<struct packet_id>();
+    new (&s->arrived_here) set<struct packet_id>();
 
     // alloc'ing memory for predictor, calling initiliazer for predictor
     if (terminal_predictor != NULL && terminal_predictor->predictor_data_sz > 0) {
@@ -3097,7 +3244,7 @@ void terminal_dally_init( terminal_state * s, tw_lp * lp )
 
 /* sets up the router virtual channels, global channels, 
  * local channels, compute node channels */
-void router_dally_init(router_state * r, tw_lp * lp)
+static void router_dally_init(router_state * r, tw_lp * lp)
 {
     char anno[MAX_NAME_LENGTH];
     codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL,
@@ -3427,11 +3574,12 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
     tw_stime const time_at_queue_head = msg->msg_new_mn_event > s->last_in_queue_time ? msg->msg_new_mn_event : s->last_in_queue_time;
     auto start = (struct packet_start) {
         .packet_ID = msg->packet_ID,
+        .dest_terminal_lpid = msg->dest_terminal_lpid,
         .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
         .travel_start_time = tw_now(lp),
         .workload_injection_time = msg->msg_start_time,
         .delay_at_queue_head = tw_now(lp) - time_at_queue_head,
-        .packet_size = msg->packet_size,
+        .packet_size = msg->packet_size
     };
 
     // Scheduling idle event for next packet to be processed
@@ -3445,7 +3593,7 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
         terminal_predictor->predict(s->predictor_data, lp, s->terminal_id, &start);
 
     // Saving
-    auto end = (struct packet_end) {
+    auto const end = (struct packet_end) {
         .packet_ID = msg->packet_ID,
         .travel_end_time = tw_now(lp) + latency,
     };
@@ -3468,7 +3616,7 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
     m->dfdally_src_terminal_id = s->terminal_id; //m->travel_start_time = tw_now(lp);
     //m->rail_id = msg->rail_id;
     //m->vc_index = vcg;
-    // m->last_hop = TERMINAL;
+    //m->last_hop = TERMINAL;
     m->path_type = -1;
     m->local_event_size_bytes = 0;
     m->is_intm_visited = 0;
@@ -3778,15 +3926,25 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
 
     // Storing packet info to be sent. Once packets arrive back, we can compute
     // the latency of sending the packet
+    void * msg_data = malloc(sizeof(terminal_dally_message));
+    memcpy(msg_data, msg, sizeof(terminal_dally_message));
+    void * remote_data = NULL;
+    if (msg->remote_event_size_bytes) {
+        remote_data = malloc(msg->remote_event_size_bytes);
+        memcpy(remote_data, model_net_method_get_edata(DRAGONFLY_DALLY, msg), msg->remote_event_size_bytes);
+    }
     //assert(tw_now(lp) == msg->travel_start_time);
     tw_stime const time_at_queue_head = msg->msg_new_mn_event > s->last_in_queue_time ? msg->msg_new_mn_event : s->last_in_queue_time;
-    s->sent_packets.push_back({
+    s->sent_packets.push_back((struct packet_start){
         .packet_ID = msg->packet_ID,
+        .dest_terminal_lpid = msg->dest_terminal_lpid,
         .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
         .travel_start_time = tw_now(lp),
         .workload_injection_time = msg->msg_start_time,
         .delay_at_queue_head = tw_now(lp) - time_at_queue_head,
         .packet_size = msg->packet_size,
+        .message_data = msg_data,
+        .remote_event_data = remote_data
         });
 
     //qos stuff
@@ -4186,23 +4344,122 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message *
     return;
 }
 
-static void notify_src_lp_on_total_latency(terminal_state * s, terminal_dally_message * msg, tw_lp * lp)
+static void notify_dest_lp_of(
+        terminal_state * s,
+        tw_lp * lp,
+        terminal_dally_message * msg,
+        enum notify_t notification
+        ) {
+    assert(NOTIFY_ZOMBIE == notification || notification == NOTIFY_ZOMBIE_DEL);
+    double offset = -1.0;
+    switch (notification) {
+        case NOTIFY_ZOMBIE:
+            offset = 0.0; // Zero-offset events are ugly, but we want to guarantee correctness, so this prevents funky stuff from happening (like, events just arriving)
+            break;
+        case NOTIFY_ZOMBIE_DEL:
+            offset = g_tw_lookahead; // We don't care how long this will take. This is just to clean the zombies set in the destination terminal
+            break;
+        default:
+            tw_error(TW_LOC, "The notification event with type %d couldn't be created", notification);
+    }
+
+    terminal_dally_message * new_msg;
+    tw_event *e = model_net_method_event_new(msg->dest_terminal_lpid, offset, lp, DRAGONFLY_DALLY, (void**)&new_msg, NULL);
+
+    memcpy(new_msg, msg, sizeof(terminal_dally_message)); // Just making sure that if the simulation breaks because we didn't set some value below, it breaks in a spectacular manner (~0 can be -1)
+    assert(new_msg->dfdally_src_terminal_id == s->terminal_id);
+    new_msg->type        = T_NOTIFY;
+    new_msg->notify_type = notification;
+    new_msg->magic       = terminal_magic_num;
+    tw_event_send(e); 
+}
+
+static void notify_src_lp_on_total_latency(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
 {
     terminal_dally_message * new_msg;
     tw_event *e = model_net_method_event_new(
             msg->src_terminal_id, g_tw_lookahead, lp, DRAGONFLY_DALLY, (void**)&new_msg, NULL);
 
-    //memcpy(new_msg, msg, sizeof(terminal_dally_message));
+    // We copy all the data from the original message but will change the flags for the message
+    // (Some of the data that we care about: packet_ID, src_terminal_lpid, dest_terminal_lpid, dfdally_src_terminal_id, dfdally_dest_terminal_id)
+    memcpy(new_msg, msg, sizeof(terminal_dally_message));
     //strcpy(new_msg->category, msg->category);
-    new_msg->type                    = T_NOTIFY_TOTAL_LATENCY;
+    new_msg->type                    = T_NOTIFY;
+    new_msg->notify_type             = NOTIFY_LATENCY;
     new_msg->magic                   = terminal_magic_num;
-    new_msg->packet_ID               = msg->packet_ID;
-    new_msg->travel_end_time         = msg->travel_end_time;
-    new_msg->src_terminal_id         = msg->src_terminal_id;
-    new_msg->dfdally_src_terminal_id = msg->dfdally_src_terminal_id;
     tw_event_send(e); 
 }
 
+static void process_terminal_notification_event_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) {
+    switch ((enum notify_t) msg->notify_type) {
+        case NOTIFY_LATENCY:
+            bf->c14 = 0;
+            break;
+
+        case NOTIFY_ZOMBIE:
+            {
+            struct packet_id const zombie = {
+                .packet_ID = msg->packet_ID,
+                .dfdally_src_terminal_id = msg->dfdally_src_terminal_id};
+            assert(s->zombies.count(zombie) == 1);
+            s->zombies.erase(zombie);
+            }
+            break;
+
+        case NOTIFY_ZOMBIE_DEL:
+            s->zombies.emplace((struct packet_id){
+                .packet_ID = msg->packet_ID,
+                .dfdally_src_terminal_id = msg->dfdally_src_terminal_id});
+            break;
+    }
+}
+
+static void process_terminal_notification_event(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) {
+    switch ((enum notify_t) msg->notify_type) {
+        case NOTIFY_LATENCY:
+            // We thought this packet wouldn't arrive; we thought it was a still on the network
+            // when we sent the zombie notification, but it wasn't! The latency information
+            // simply hadn't arrived to us. Gotta inform the destination LP
+            if (s->sent_packets.empty() || s->sent_packets.front().packet_ID > msg->packet_ID) {
+                //printf("notifying zombie del: packet dest id %d dest gid %d\n", msg->dest_terminal_lpid, msg->dfdally_dest_terminal_id);
+                // Notice that even though we received this message from the destination lp, the destination lp
+                // did not change any parameters from what it received, so the message (mostly) contains the same
+                // information from the original one, the one that we sent
+                notify_dest_lp_of(s, lp, msg, NOTIFY_ZOMBIE_DEL);
+                bf->c14 = 1;
+            }
+            break;
+
+        case NOTIFY_ZOMBIE: {
+            // Adding new zombie to LP list of zombies
+            assert(lp->gid == msg->dest_terminal_lpid);
+            assert(s->terminal_id == msg->dfdally_dest_terminal_id);
+
+            //printf("INSERTING zombie alert: LPID=%d  packet_ID = %d  dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id);
+            struct packet_id const zombie = {
+                .packet_ID = msg->packet_ID,
+                .dfdally_src_terminal_id = msg->dfdally_src_terminal_id};
+            assert(s->zombies.count(zombie) == 0);
+            s->zombies.insert(zombie);
+            }
+            break;
+
+        case NOTIFY_ZOMBIE_DEL: {
+            // Removing previously thought zombie (just a cleanup operation, to not waste memory, but it should not affect the correctness of the simulation)
+            assert(lp->gid == msg->dest_terminal_lpid);
+            assert(s->terminal_id == msg->dfdally_dest_terminal_id);
+
+            printf("DELETING zombie alert: LPID=%d  packet_ID = %d  dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id);
+            struct packet_id const zombie = {
+                .packet_ID = msg->packet_ID,
+                .dfdally_src_terminal_id = msg->dfdally_src_terminal_id};
+            assert(s->zombies.count(zombie) == 1);
+            s->zombies.erase(zombie);
+            }
+            break;
+    }
+}
+
 // This function triggers an event that is completely ignored when processed later. The number of events produced by a terminal/router DOES alter the simulation results. (The number of events processed by an LP shouldn't be a parameter to the simulation itself, but it is weirdly).
 static void vacuous_msg_to_itself(terminal_state * s, terminal_dally_message * msg, tw_lp * lp)
 {
@@ -4247,6 +4504,15 @@ static void send_remote_event(terminal_state * s, terminal_dally_message * msg,
 
 static void packet_arrive_predicted_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
 {
+    if(bf->c15) {
+        struct packet_id const packet = {
+            .packet_ID = msg->packet_ID,
+            .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
+        };
+        assert(s->arrived_here.count(packet) == 1);
+        s->arrived_here.erase(packet);
+        bf->c15 = 0;
+    }
     if(bf->c4) {
         model_net_event_rc2(lp, &msg->event_rc);
     }
@@ -4314,7 +4580,18 @@ static void packet_arrive_predicted(terminal_state * s, tw_bf * bf, terminal_dal
     
     // This should always be true. It sends the message to the server/workload or communicates to the model-net layer
     if(m_data_src && msg->remote_event_size_bytes > 0) {
-        send_remote_event(s, msg, lp, bf, (char *) m_data_src, msg->remote_event_size_bytes);
+        struct packet_id const packet = {
+            .packet_ID = msg->packet_ID,
+            .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
+        };
+        bool const had_arrived_before = s->arrived_here.count(packet) == 1;
+        if (!had_arrived_before) {
+            send_remote_event(s, msg, lp, bf, (char *) m_data_src, msg->remote_event_size_bytes);
+            s->arrived_here.insert(packet);
+            bf->c15 = 1;
+        } else {
+            fprintf(stderr, "We got a packet twice! This is unfortunate, but might happen due to surrogate switching\n");
+        }
     }
 }
 
@@ -4393,6 +4670,14 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess
         s->ross_sample.data_size_sample -= msg->total_size;
         s->data_size_ross_sample -= msg->total_size;
 
+        if(bf->c14) {
+            s->zombies.emplace((struct packet_id &&) {
+                .packet_ID = msg->packet_ID,
+                .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
+            });
+            bf->c14 = 0;
+        }
+
         struct dfly_qhash_entry * d_entry_pop = (dfly_qhash_entry *)rc_stack_pop(s->st);
         qhash_add(s->rank_tbl, &key, &(d_entry_pop->hash_link));
         s->rank_tbl_pop++; 
@@ -4414,6 +4699,16 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess
         free_tmp(tmp);	
         s->rank_tbl_pop--;
     }
+
+    if(bf->c15) {
+        struct packet_id const packet = {
+            .packet_ID = msg->packet_ID,
+            .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
+        };
+        assert(s->arrived_here.count(packet) == 1);
+        s->arrived_here.erase(packet);
+        bf->c15 = 0;
+    }
     
     return;
 }
@@ -4650,19 +4945,45 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
         
         //assert(tmp->remote_event_data && tmp->remote_event_size > 0);
         if(tmp->remote_event_data && tmp->remote_event_size > 0) {
-            if (packet_latency_f || surrogate_configured) {
-                notify_src_lp_on_total_latency(s, msg, lp);
+            struct packet_id const zombie_packet = {
+                .packet_ID = msg->packet_ID,
+                .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
+            };
+            int const is_zombie = s->zombies.count(zombie_packet) == 1;
+            // Not notifying in case it's a zombie
+            if (is_zombie) {
+                // Ignore packet, do not send forward if it has already been delievered
+                //printf("We got a zombie! LPID=%d  packet_ID = %d  dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id);
+                s->zombies.erase(zombie_packet);
+                bf->c14 = 1;
             } else {
-                // This vacuous msg is necessary just to keep simulations with
-                // and without the latency notification the same. Notifying the
-                // latency does not impact the simulation (unless the data is
-                // fed to a predictor, later to be used). If the latency
-                // notification is deactivated, the simulation will produce
-                // the same number of events (a bit wasteful), a parameter
-                // that model-net or dragonfly-dally for some reason use :S
-                //vacuous_msg_to_itself(s, msg, lp);
-            }
-            send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size);
+                //printf("Good day sir, not a zombie! LPID=%d  packet_ID = %d  dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id);
+
+                struct packet_id const packet = {
+                    .packet_ID = msg->packet_ID,
+                    .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
+                };
+                bool const had_arrived_before = s->arrived_here.count(packet) == 1;
+                if (!had_arrived_before) {
+                    if (packet_latency_f || surrogate_configured) {
+                        notify_src_lp_on_total_latency(s, bf, msg, lp);
+                    } else {
+                        // This vacuous msg is necessary just to keep simulations with
+                        // and without the latency notification the same. Notifying the
+                        // latency does not impact the simulation (unless the data is
+                        // fed to a predictor, later to be used). If the latency
+                        // notification is deactivated, the simulation will produce
+                        // the same number of events (a bit wasteful), a parameter
+                        // that model-net or dragonfly-dally for some reason use :S
+                        //vacuous_msg_to_itself(s, msg, lp);
+                    }
+                    send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size);
+                    s->arrived_here.insert(packet);
+                    bf->c15 = 1;
+                } else {
+                    fprintf(stderr, "We got a packet twice! This is unfortunate, but might happen due to surrogate switching\n");
+                }
+             }
         }
         /* Remove the hash entry */
         qhash_del(hash_link);
@@ -4723,8 +5044,7 @@ static void terminal_buf_update(terminal_state * s,
     return;
 }
 
-void 
-dragonfly_dally_terminal_final( terminal_state * s, 
+static void dragonfly_dally_terminal_final( terminal_state * s, 
       tw_lp * lp )
 {
     // printf("terminal id %d\n",s->terminal_id);
@@ -4812,6 +5132,17 @@ dragonfly_dally_terminal_final( terminal_state * s,
     // Calling destructors for data. There is no need to free data, the
     // destructors do it themselves. ROSS allocated space for the datatypes and
     // it doesn't need to be freed
+    // TODO (elkin): Actually, deallocate memory for `message_data` and `remote_event_data`
+    //printf("terminal %d - arrived_here (size=%d) = [", s->terminal_id, s->arrived_here.size());
+    //for (auto&& z: s->arrived_here) {
+    //    printf("(%d %d) ", z.packet_ID, z.dfdally_src_terminal_id);
+    //}
+    //printf("]\n");
+    //printf("terminal %d - zombies = [", s->terminal_id);
+    //for (auto&& z: s->zombies) {
+    //    printf("(%d %d) ", z.packet_ID, z.dfdally_src_terminal_id);
+    //}
+    //printf("]\n");
     s->sent_packets.~deque();
     s->sent_packets_latency.~priority_queue();
 
@@ -5792,7 +6123,7 @@ static void router_buf_update(router_state * s, tw_bf * bf, terminal_dally_messa
     return;
 }
 
-void 
+static void 
 terminal_dally_event( terminal_state * s, 
 		tw_bf * bf, 
 		terminal_dally_message * msg, 
@@ -5839,8 +6170,8 @@ terminal_dally_event( terminal_state * s,
             issue_bw_monitor_event(s, bf, msg, lp);
         break;
     
-        case T_NOTIFY_TOTAL_LATENCY:
-        //    We don't process the message, we only store the message when committing
+        case T_NOTIFY:
+            process_terminal_notification_event(s, bf, msg, lp);
         break;
 
         case T_VACUOUS_EVENT:
@@ -5852,7 +6183,7 @@ terminal_dally_event( terminal_state * s,
         }
 }
 
-void router_dally_event(router_state * s, tw_bf * bf, terminal_dally_message * msg, 
+static void router_dally_event(router_state * s, tw_bf * bf, terminal_dally_message * msg, 
     tw_lp * lp) 
 {
     msg->num_cll = 0;
@@ -5901,7 +6232,7 @@ void router_dally_event(router_state * s, tw_bf * bf, terminal_dally_message * m
 }
 
 /* Reverse computation handler for a terminal event */
-void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) 
+static void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) 
 {
     for(int i = 0; i < msg->num_rngs; i++)
         tw_rand_reverse_unif(lp->rng);
@@ -5911,7 +6242,7 @@ void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_da
 
     s->rev_events++;
     s->ross_sample.rev_events++;
-    switch(msg->type)
+    switch((enum event_t) msg->type)
     {
         case T_GENERATE:
             if (bf->c10) {
@@ -5941,8 +6272,8 @@ void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_da
             issue_bw_monitor_event_rc(s,bf, msg, lp);
             break;
     
-        case T_NOTIFY_TOTAL_LATENCY:
-        //    We don't process the message, we only store the message when committing
+        case T_NOTIFY:
+            process_terminal_notification_event_rc(s, bf, msg, lp);
         break;
 
         case T_VACUOUS_EVENT:
@@ -5957,7 +6288,7 @@ void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_da
 }
 
 /* Reverse computation handler for a router event */
-void router_dally_rc_event_handler(router_state * s, tw_bf * bf, 
+static void router_dally_rc_event_handler(router_state * s, tw_bf * bf, 
   terminal_dally_message * msg, tw_lp * lp) 
 {
     for(int i = 0; i < msg->num_rngs; i++)
@@ -5969,7 +6300,7 @@ void router_dally_rc_event_handler(router_state * s, tw_bf * bf,
     s->rev_events++;
     s->ross_rsample.rev_events++;
 
-    switch(msg->type) {
+    switch((enum event_t) msg->type) {
         case R_SEND: 
             router_packet_send_rc(s, bf, msg, lp);
         break;
diff --git a/src/util/lp-type-lookup.c b/src/util/lp-type-lookup.c
index 8d00e4f5..6bc80136 100644
--- a/src/util/lp-type-lookup.c
+++ b/src/util/lp-type-lookup.c
@@ -10,8 +10,6 @@
 #include "ross.h"
 #include "codes/lp-type-lookup.h"
 
-#define MAX_LP_TYPES 64
-
 struct lp_name_mapping
 {
     const char* name;
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index f5ad04cd..1576899c 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -1,6 +1,17 @@
+/**
+ * This entire file is in charge of switching a high-definition simulation
+ * (a vanilla CODES simulation) into surrogate-mode where a secondary piece
+ * of software (a surrogate, a collection of functions), and back.
+ * For the switch to happen, we have to inspect some of the "hidden"
+ * structure of PDES (ROSS) and thus the code in here relies on a very
+ * specific version of ROSS. In a sense, we are abusing the non-documented
+ * ABI of ROSS.
+ */
+
 #include <assert.h>
 #include <codes/configuration.h>
 #include <codes/codes_mapping.h>
+#include <codes/model-net-lp.h>
 #include <codes/surrogate.h>
 
 // Basic level of debugging is 1. It should be always turned on
@@ -13,8 +24,8 @@
 #define DEBUG_DIRECTOR 1
 
 // Global variables
-int total_terminals = 0;
-double ignore_until = 0;
+static double ignore_until = 0;
+static struct surrogate_config surr_config = {0};
 
 // === Average packet latency functionality
 //
@@ -38,7 +49,7 @@ static void init_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int
     assert(data->aggregated_latency[0].sum_latency == 0);
     assert(data->aggregated_latency[0].total_msgs == 0);
 
-    data->num_terminals = total_terminals;
+    data->num_terminals = surr_config.total_terminals;
 }
 
 static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start * start, struct packet_end * end) {
@@ -81,7 +92,7 @@ static double predict_latency(struct latency_surrogate * data, tw_lp * lp, unsig
     }
 
     // otherwise, we have no data to approximate the latency
-    tw_error(TW_LOC, "The terminal %u doesn't have any packet delay information available to predict future packet latency!\n", src_terminal);
+    tw_error(TW_LOC, "Terminal %u doesn't have any packet delay information available to predict future packet latency!\n", src_terminal);
     return -1.0;
 
     // TODO(elkin): this (below) is wrong, bad bad. I'm not entirely sure how to do this rn in a non-hardcoded manner, but given time, this should be left in better terms
@@ -122,7 +133,6 @@ struct packet_latency_predictor average_latency_predictor = {
 
 // === Director functionality
 //
-struct director_data my_director_data;
 
 static struct {
     size_t current_i;
@@ -131,7 +141,272 @@ static struct {
 } switch_at;
 
 
-void director_fun(tw_pe * pe) {
+// To be treated as a linked list. Use `->next` to access the next event
+static bool is_workload_event(tw_event * event) {
+    char const * lp_type_name;
+    int rep_id, offset; // unused
+    codes_mapping_get_lp_info2(event->dest_lpid, NULL, &lp_type_name, NULL, &rep_id, &offset);
+
+    return strncmp("modelnet_", lp_type_name, 9) != 0;
+}
+
+
+static void offset_future_events_in_causality_list(double switch_offset, tw_event_sig gvt) {
+    int events_processed = 0;
+    int events_modified = 0;
+    for (unsigned int i = 0; i < g_tw_nkp; i++) {
+        tw_kp * const this_kp = g_tw_kp[i];
+
+        // All events in pevent_q are sent into the future
+        assert((this_kp->pevent_q.tail == NULL) == (this_kp->pevent_q.size == 0));
+        tw_event * cur_event = this_kp->pevent_q.tail;
+        while (cur_event) {
+            if (!is_workload_event(cur_event) && tw_event_sig_compare(cur_event->sig, gvt) > 0) {
+                cur_event->recv_ts += switch_offset;
+                cur_event->sig.recv_ts = cur_event->recv_ts;
+                events_modified++;
+            }
+
+            cur_event = cur_event->prev;
+            events_processed++;
+        }
+    }
+    if (DEBUG_DIRECTOR > 1 && g_tw_mynode == 0) {
+        printf("PE %lu: Total events from causality modified %d (from total processed %d)\n", g_tw_mynode, events_modified, events_processed);
+    }
+}
+
+
+static struct lp_types_switch const * get_type_switch(char const * const name) {
+    for (size_t i = 0; i < surr_config.n_lp_types; i++) {
+        //printf("THIS %s and %s\n", surr_config.lp_types[i].lpname, name);
+        if (strcmp(surr_config.lp_types[i].lpname, name) == 0) {
+            return &surr_config.lp_types[i];
+        }
+    }
+    return NULL;
+}
+
+
+// MPI barrier to determine if anyone has a true value `val`. Returns true if anyone says "TRUE"
+static inline bool does_any_pe(bool val) {
+    bool global_val;
+    if(MPI_Allreduce(&val, &global_val, 1, MPI_C_BOOL, MPI_LOR, MPI_COMM_ROSS) != MPI_SUCCESS) {
+        tw_error(TW_LOC, "MPI_Allreduce for custom rollback and cleanup failed");
+    }
+    return global_val;
+}
+
+
+static inline bool do_all_pes(bool val) {
+    bool global_val;
+    if(MPI_Allreduce(&val, &global_val, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_ROSS) != MPI_SUCCESS) {
+        tw_error(TW_LOC, "MPI_Allreduce for custom rollback and cleanup failed");
+    }
+    return global_val;
+}
+
+
+static void rollback_and_cancel_events_pe(tw_pe * pe) {
+    // Backtracking the simulation to GVT
+    for (unsigned int i = 0; i < g_tw_nkp; i++) {
+        tw_kp_rollback_to_sig(g_tw_kp[i], pe->GVT_sig);
+    }
+
+    // Making sure that everything gets cleaned up properly (AVL tree should be empty by the end)
+    do {
+        if (tw_nnodes() > 1) {
+            double const start = tw_clock_read();
+            tw_net_read(pe);
+            pe->stats.s_net_read += tw_clock_read() - start;
+        }
+
+        pe->gvt_status = 1;
+        tw_sched_event_q(pe);
+        tw_sched_cancel_q(pe);
+        tw_gvt_step2(pe);
+
+        if (DEBUG_DIRECTOR > 1) {
+            printf("PE %lu: Time stamp at the end of GVT time: %e - AVL-tree sized: %d\n", g_tw_mynode, pe->GVT_sig.recv_ts, pe->avl_tree_size);
+        }
+    } while (does_any_pe(pe->cancel_q != NULL) || does_any_pe(pe->event_q.size != 0));
+
+    if (DEBUG_DIRECTOR > 1) {
+        printf("PE %lu: All events rolledbacked and cancelled\n", g_tw_mynode);
+    }
+}
+
+static void shift_events_to_future_pe(tw_pe * pe, tw_event_sig gvt) {
+    tw_event * next_event = tw_pq_dequeue(pe->pq);
+
+    // If there aren't any events left to process, the simulation has already finished and we have nothing to do
+    if (next_event == NULL) {
+        return;
+    }
+
+    tw_event * frozen_events = NULL;  // Linked list of frozen events
+    tw_event * workload_events = NULL; // Linked list of workload events, to be placed again in the queue
+
+    int events_dequeued = 0;
+    // Traversing all events stored in the queue
+    while (next_event) {
+        // Filtering events to freeze
+        tw_event * const prev_event = next_event;
+        next_event = tw_pq_dequeue(pe->pq);
+        assert(prev_event->next == NULL);
+
+        if (is_workload_event(prev_event)) {
+            // store event in events to inject immediately back to the queue (in reverse order, because the queue will take the youngest event first)
+            if (!workload_events) {
+                workload_events = prev_event;
+            } else {
+                prev_event->prev = workload_events;
+                workload_events = prev_event;
+            }
+        } else {
+            // store event in frozen events, to be forwarded to the future
+            if (!frozen_events) {
+                frozen_events = prev_event;
+            } else {
+                prev_event->prev = frozen_events;
+                frozen_events = prev_event;
+            }
+        }
+        events_dequeued++;
+    }
+
+    // We have to put the events back into the queue after we switch back, but if we never
+    // switch back they will never get to be processed and thus we can clean them
+    double switch_offset = g_tw_ts_end;
+    if (switch_at.current_i + 1 < switch_at.total) {
+        double const next_switch = switch_at.time_stampts[switch_at.current_i + 1];
+        double const pre_switch_time = gvt.recv_ts;  // pe->GVT_sig.recv_ts;
+        switch_offset = next_switch - pre_switch_time;
+        assert(pre_switch_time < next_switch);
+        //printf("gvt=%f next_switch=%f switch_offset=%f\n", pre_switch_time, next_switch, switch_offset);
+    }
+
+    int events_enqueued = 0;
+    // shifting time stamps of network events to the future
+    //printf("Events in the future ");
+    while (frozen_events) {
+        tw_event * const prev_event = frozen_events;
+        frozen_events = frozen_events->prev;
+
+        //printf("%c", tw_event_sig_compare(gvt, prev_event->sig) < 0 ? '.' : 'x');
+        if(tw_event_sig_compare(prev_event->sig, gvt) > 0) {
+            assert(prev_event->recv_ts == prev_event->sig.recv_ts);
+            prev_event->recv_ts += switch_offset;
+            prev_event->sig.recv_ts = prev_event->recv_ts;
+        }
+
+        prev_event->prev = NULL;
+        tw_pq_enqueue(pe->pq, prev_event);
+
+        events_enqueued++;
+    }
+
+    // Reinjecting workload events into simulation
+    while (workload_events) {
+        tw_event * const prev_event = workload_events;
+        workload_events = workload_events->prev;
+        prev_event->prev = NULL;
+        tw_pq_enqueue(pe->pq, prev_event);
+
+        events_enqueued++;
+    }
+
+    if (DEBUG_DIRECTOR > 1 && g_tw_mynode == 0) {
+        printf("PE %lu: Discrepancy on number of events processed %d (%d dequeued and %d enqueued)\n",
+                g_tw_mynode, events_dequeued - events_enqueued, events_dequeued, events_enqueued);
+    }
+
+    // shifting time stamps of events in causality list (one list per KP)
+    offset_future_events_in_causality_list(switch_offset, gvt);
+}
+
+
+// Switching from a (vanilla) high-def simulation to surrogate mode
+// consists of:
+// - Cancel all events that have to be cancelled and clean everything
+// - Looking at all events in the PE, "freezing" those in the network model
+//   and letting the workload events be processed further
+// - Going through every LP and calling their respective functions
+static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_event_sig gvt) {
+    if (g_tw_synchronization_protocol != OPTIMISTIC && g_tw_synchronization_protocol != SEQUENTIAL) {
+        tw_error(TW_LOC, "Sorry, sending packets to the future hasn't been implement in this mode");
+    }
+
+    if (g_tw_synchronization_protocol == OPTIMISTIC) {
+        assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
+        rollback_and_cancel_events_pe(pe);
+        //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0);
+        assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
+    }
+
+    shift_events_to_future_pe(pe, gvt);
+
+    // Going through all LPs in PE and running their specific functions
+    for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {
+        tw_lp * const lp = g_tw_lp[local_lpid];
+        assert(local_lpid == lp->id);
+
+        // Modifying current time for LPs (technically, KPs) so that they
+        // coincide with current GVT (the current GVT often does not
+        // correspond to the (last) time stored in KPs).
+        lp->kp->last_sig = gvt;
+
+        char const * lp_type_name;
+        int rep_id, offset; // unused
+        codes_mapping_get_lp_info2(lp->gid, NULL, &lp_type_name, NULL, &rep_id, &offset);
+        struct lp_types_switch const * const lp_type_switch = get_type_switch(lp_type_name);
+
+        if (lp_type_switch && lp_type_switch->highdef_to_surrogate) {
+            if (lp_type_switch->is_modelnet) {
+                model_net_method_switch_to_surrogate(lp);
+                model_net_method_call_inner(lp, lp_type_switch->highdef_to_surrogate);
+            } else {
+                lp_type_switch->highdef_to_surrogate(lp->cur_state, lp);
+            }
+        }
+    }
+}
+
+
+static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_event_sig gvt) {
+    (void) pe;
+
+    // Going through all LPs in PE and running their specific functions
+    for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {
+        tw_lp * const lp = g_tw_lp[local_lpid];
+        assert(local_lpid == lp->id);
+
+        // Modifying current time for LPs (technically, KPs) so that they
+        // coincide with current GVT (the current GVT often does not
+        // correspond to the (last) time stored in KPs).
+        tw_event_sig const previous_sig = lp->kp->last_sig;
+        lp->kp->last_sig = gvt;
+
+        char const * lp_type_name;
+        int rep_id, offset; // unused
+        codes_mapping_get_lp_info2(lp->gid, NULL, &lp_type_name, NULL, &rep_id, &offset);
+        struct lp_types_switch const * const lp_type_switch = get_type_switch(lp_type_name);
+
+        if (lp_type_switch && lp_type_switch->surrogate_to_highdef) {
+            if (lp_type_switch->is_modelnet) {
+                model_net_method_switch_to_highdef(lp);
+                model_net_method_call_inner(lp, lp_type_switch->surrogate_to_highdef);
+            } else {
+                lp_type_switch->surrogate_to_highdef(lp->cur_state, lp);
+            }
+        }
+
+        lp->kp->last_sig = previous_sig;
+    }
+}
+
+
+static void director_fun(tw_pe * pe, tw_event_sig gvt) {
     static int i = 0;
     if (g_tw_mynode == 0) {
         if (DEBUG_DIRECTOR == 2) {
@@ -139,33 +414,77 @@ void director_fun(tw_pe * pe) {
             fflush(stdout);
         }
         if (DEBUG_DIRECTOR == 3) {
-            printf("GVT %d at %f in %s\n", i++, pe->GVT_sig.recv_ts,
-                    my_director_data.is_surrogate_on() ? "surrogate-mode" : "high-definition");
+            printf("GVT %d at %f in %s arbitrary-fun-status=", i++, gvt.recv_ts,
+                    surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition");
+
+            switch (g_tw_trigger_arbitrary_fun.active) {
+                case ARBITRARY_FUN_enabled:
+                    printf("enabled\n");
+                    break;
+                case ARBITRARY_FUN_disabled:
+                    printf("disabled\n");
+                    break;
+                case ARBITRARY_FUN_triggered:
+                    printf("triggered\n");
+                    break;
+            }
         }
     }
 
+    // Only in sequential mode pe->GVT does not carry the current gvt, while it does in conservative and optimistic
+    assert((g_tw_synchronization_protocol == SEQUENTIAL) || (pe->GVT_sig.recv_ts == gvt.recv_ts));
+
     // Do not process if the simulation ended
-    if (pe->GVT_sig.recv_ts >= g_tw_ts_end) {
+    if (gvt.recv_ts >= g_tw_ts_end) {
         return;
     }
 
-    // Switching to and from surrogate mode at times determined by `switch_at`
-    if (switch_at.current_i < switch_at.total) {
-        double const now = pe->GVT_sig.recv_ts;
-        double const next_switch = switch_at.time_stampts[switch_at.current_i];
-        if (now > next_switch) {
-            if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
-                if (DEBUG_DIRECTOR == 2) {
-                    printf("\n");
-                }
-                printf("switching at %g", now);
-            }
-            my_director_data.switch_surrogate();
-            if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
-                printf(" to %s\n", my_director_data.is_surrogate_on() ? "surrogate" : "vanilla");
-            }
-            switch_at.current_i++;
+    // Detecting if we are going to switch
+    if (switch_at.current_i < switch_at.total
+            && g_tw_trigger_arbitrary_fun.active == ARBITRARY_FUN_triggered) {
+        // double const now = gvt.recv_ts;
+        // double const switch_at = switch_at.time_stampts[switch_at.current_i];
+        // assert(now + 1000 >= switch_at);  // current gvt shouldn't be that far ahead from the point we wanted to trigger it
+    } else {
+        return;
+    }
+
+    // Asking the director/model to switch
+    if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
+        if (DEBUG_DIRECTOR == 2) {
+            printf("\n");
         }
+        printf("Switching at %g", gvt.recv_ts);
+    }
+    surr_config.director.switch_surrogate();
+    if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
+        printf(" to %s\n", surr_config.director.is_surrogate_on() ? "surrogate" : "vanilla");
+    }
+
+    // "Freezing" network events and activating LP's switch functions
+    if (FREEZE_NETWORK_STATE) {
+        if (surr_config.director.is_surrogate_on()) {
+            events_high_def_to_surrogate_switch(pe, gvt);
+        } else {
+            events_surrogate_to_high_def_switch(pe, gvt);
+        }
+    }
+
+    // Activating next switch
+    if (++switch_at.current_i < switch_at.total) {
+        double const next_switch = switch_at.time_stampts[switch_at.current_i];
+        // Setting trigger for next switch
+        tw_event_sig time_stamp = {0};
+        time_stamp.recv_ts = next_switch;
+        //printf("Adding a trigger to activate next switch!\n");
+        tw_trigger_arbitrary_fun_at(time_stamp);
+    }
+
+    if (DEBUG_DIRECTOR == 1 && g_tw_mynode == 0) {
+        printf("Switch completed!\n");
+    }
+    if (DEBUG_DIRECTOR > 1) {
+        printf("PE %lu: Switch completed!\n", g_tw_mynode);
     }
 }
 //
@@ -173,15 +492,16 @@ void director_fun(tw_pe * pe) {
 
 
 // === All things Surrogate Configuration
-void surrogate_config(
-        const char * anno,
-        const struct director_data d,
-        const int total_terminals_,
+void surrogate_configure(
+        char const * const anno,
+        struct surrogate_config * const sc,
         struct packet_latency_predictor ** pl_pred
 ) {
-    // This is the only place where the director data should be setup
-    my_director_data = d;
-    total_terminals = total_terminals_;
+    assert(sc);
+    assert(0 < sc->n_lp_types && sc->n_lp_types <= MAX_LP_TYPES);
+
+    // This is the only place where the director data should be loaded and set up
+    surr_config = *sc;
 
     // Determining which director mode to set up
     char director_mode[MAX_NAME_LENGTH];
@@ -216,9 +536,13 @@ void surrogate_config(
             fprintf(stderr, "\n");
         }
 
-        // Injecting into ROSS function to be called at GVT
+        // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT
         g_tw_gvt_arbitrary_fun = director_fun;
 
+        tw_event_sig time_stamp = {0};
+        time_stamp.recv_ts = switch_at.time_stampts[0];
+        tw_trigger_arbitrary_fun_at(time_stamp);
+
         // freeing timestamps before it dissapears
         for (size_t i = 0; i < len; i++) {
             free(timestamps[i]);
@@ -247,9 +571,9 @@ void surrogate_config(
         tw_error(TW_LOC, "Unknown predictor for packet latency `%s`", latency_pred_name);
     }
 
-    //my_director_data.switch_surrogate();
+    //surr_config.director.switch_surrogate();
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
-        fprintf(stderr, "Simulation starting on %s mode\n", my_director_data.is_surrogate_on() ? "surrogate" : "vanilla");
+        fprintf(stderr, "Simulation starting on %s mode\n", surr_config.director.is_surrogate_on() ? "surrogate" : "vanilla");
     }
 }
 // === END OF All things Surrogate Configuration

From bb91444990c929bacfb3175cbb8127511c8ac266 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 20 Feb 2023 23:50:20 -0500
Subject: [PATCH 013/188] Some assertions to check for sanity of gvt trigger

---
 src/networks/model-net/dragonfly-dally.C |  2 +-
 src/util/surrogate.c                     | 17 +++++------------
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 94752b0f..22794e8c 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -4671,7 +4671,7 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess
         s->data_size_ross_sample -= msg->total_size;
 
         if(bf->c14) {
-            s->zombies.emplace((struct packet_id &&) {
+            s->zombies.emplace((struct packet_id) {
                 .packet_ID = msg->packet_ID,
                 .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
             });
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index 1576899c..a5c84c55 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -198,15 +198,6 @@ static inline bool does_any_pe(bool val) {
 }
 
 
-static inline bool do_all_pes(bool val) {
-    bool global_val;
-    if(MPI_Allreduce(&val, &global_val, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_ROSS) != MPI_SUCCESS) {
-        tw_error(TW_LOC, "MPI_Allreduce for custom rollback and cleanup failed");
-    }
-    return global_val;
-}
-
-
 static void rollback_and_cancel_events_pe(tw_pe * pe) {
     // Backtracking the simulation to GVT
     for (unsigned int i = 0; i < g_tw_nkp; i++) {
@@ -302,6 +293,7 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_event_sig gvt) {
 
         prev_event->prev = NULL;
         tw_pq_enqueue(pe->pq, prev_event);
+        assert(prev_event->recv_ts >= g_tw_trigger_arbitrary_fun.sig_at.recv_ts);
 
         events_enqueued++;
     }
@@ -442,9 +434,10 @@ static void director_fun(tw_pe * pe, tw_event_sig gvt) {
     // Detecting if we are going to switch
     if (switch_at.current_i < switch_at.total
             && g_tw_trigger_arbitrary_fun.active == ARBITRARY_FUN_triggered) {
-        // double const now = gvt.recv_ts;
-        // double const switch_at = switch_at.time_stampts[switch_at.current_i];
-        // assert(now + 1000 >= switch_at);  // current gvt shouldn't be that far ahead from the point we wanted to trigger it
+        double const now = gvt.recv_ts;
+        double const switch_time = switch_at.time_stampts[switch_at.current_i];
+        assert(g_tw_trigger_arbitrary_fun.sig_at.recv_ts == switch_at.time_stampts[switch_at.current_i]);
+        assert(now >= switch_time);  // current gvt shouldn't be that far ahead from the point we wanted to trigger it
     } else {
         return;
     }

From 59d339910f1e629dd997606a9e1e42e710d0b785 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 21 Feb 2023 18:55:05 -0500
Subject: [PATCH 014/188] The state of the network simulation is now being
 *truly* frozen

This is buggy. With some models this will work, with others it will
definetely not. The problem resides on model-net's complexity. Knowing
when to trigger the "next event" event is dependent on the state of
model-net and its future messages already in the queue (the most
important of which is the workload's new event)
---
 codes/model-net-lp.h                       | 10 +--
 src/networks/model-net/core/model-net-lp.c | 50 ++++++------
 src/networks/model-net/dragonfly-dally.C   | 91 ++++++++++++++++++++--
 src/util/surrogate.c                       |  6 +-
 4 files changed, 115 insertions(+), 42 deletions(-)

diff --git a/codes/model-net-lp.h b/codes/model-net-lp.h
index faed58c7..46b01bae 100644
--- a/codes/model-net-lp.h
+++ b/codes/model-net-lp.h
@@ -118,11 +118,11 @@ tw_event* model_net_method_congestion_event(tw_lpid dest_gid,
     void **msg_data,
     void **extra_data);
 
-// Function to call when switching from highdef to surrogate
-void model_net_method_switch_to_surrogate(tw_lp * lp);
-
-// Function to call when switching from surrogate to highdef
-void model_net_method_switch_to_highdef(tw_lp * lp);
+// Functions to call when switching from highdef to surrogate, and surrogate to highdef
+void model_net_method_switch_to_surrogate_lp(tw_lp * lp);
+void model_net_method_switch_to_highdef_lp(tw_lp * lp);
+void model_net_method_switch_to_surrogate(void);
+void model_net_method_switch_to_highdef(void);
 
 // It will call the function (pointer) on the internal structure/network model.
 // The lp parameter has to be a model-net lp. The function pointer has to coincide with the underlying subtype
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 3fb7aa62..1124e17c 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -870,7 +870,7 @@ void handle_sched_next(
     printf("%llu handle sched_next function\n",LLU(tw_now(lp)));
 #endif
     if (FREEZE_NETWORK_STATE) {
-        // This event should not be processed outside of the surrogate environment it was created, and it must be processed if it was generated during vanilla high-def simulation mode
+        // The event should not be processed outside of the surrogate environment it was created, and it must be processed if it was generated during vanilla high-def simulation mode
         bool const from_same_surrogate_instance = is_surrogate_on && m->msg.m_base.created_during_surrogate == num_surrogate;
         bool const highdef_created_during_highdef = !is_surrogate_on && m->msg.m_base.created_during_surrogate == -1;
         if (!from_same_surrogate_instance && !highdef_created_during_highdef) {
@@ -1107,7 +1107,16 @@ tw_event* model_net_method_congestion_event(tw_lpid dest_gid,
 
 }
 
-void model_net_method_switch_to_surrogate(tw_lp * lp) {
+void model_net_method_switch_to_surrogate(void) {
+    is_surrogate_on = true;
+    num_surrogate++;
+}
+
+void model_net_method_switch_to_highdef(void) {
+    is_surrogate_on = false;
+}
+
+void model_net_method_switch_to_surrogate_lp(tw_lp * lp) {
     model_net_base_state * const ns = (model_net_base_state*) lp->cur_state;
 
     //printf("PID %d in_sched_send_loop = [", lp->gid);
@@ -1115,49 +1124,34 @@ void model_net_method_switch_to_surrogate(tw_lp * lp) {
         //printf("%d ", ns->in_sched_send_loop[i]);
         ns->sched_loop_pre_surrogate[i] = ns->in_sched_send_loop[i];
         // scheduling an idle event to prevent getting stuck in the middle of a scheduling loop
-        if (ns->sched_loop_pre_surrogate[i]) {
-            // TODO: change zero-offset event for something a bit more sensible
-            model_net_method_idle_event(0.0, 0, lp);
-        }
+        //if (ns->sched_loop_pre_surrogate[i]) <- this is too restrictive, although the right idea.
+        // TODO: change zero-offset event for something a bit more sensible
+        model_net_method_idle_event(1.0, 0, lp);
+        //}
         ns->in_sched_send_loop[i] = 0;
     }
     //printf("]\n");
 
     ns->sched_recv_loop_pre_surrogate = ns->in_sched_recv_loop;
-    if (ns->in_sched_recv_loop) {
-        model_net_method_idle_event(0.0, 1, lp);
-    }
+    //if (ns->in_sched_recv_loop)
+    model_net_method_idle_event(1.0, 1, lp);
     ns->in_sched_recv_loop = 0;
-
-    is_surrogate_on = true;
-    num_surrogate++;
 }
 
-void model_net_method_switch_to_highdef(tw_lp * lp) {
+void model_net_method_switch_to_highdef_lp(tw_lp * lp) {
     model_net_base_state * const ns = (model_net_base_state*) lp->cur_state;
 
     //printf("PID %d in_sched_send_loop = [", lp->gid);
     for (int i = 0; i < ns->params->num_queues; i++) {
         //printf("%d ", ns->in_sched_send_loop[i]);
         // We have to duplicate an idle event that was produced in surrogate-mode, but not yet processed by the time we switch to high-def again, if that event was in the middle of the loop (asking for the next packet to inject) and in no other case
-        // TODO: THIS MUST HAVE SOME SECONDARY EFFECT THAT WILL PROVOQUE A CURRUPT SIMULATION, but not so far. Which is weird
-        //model_net_method_idle_event(0.0, 0, lp);
-        if (ns->in_sched_send_loop[i] == 1 && ns->sched_loop_pre_surrogate[i] == 0) {
-            model_net_method_idle_event(0.0, 0, lp);
-        }// else {
+        // TODO: Not all LPs need an event like this!
+        model_net_method_idle_event(1.0, 0, lp);
         ns->in_sched_send_loop[i] = ns->sched_loop_pre_surrogate[i];
-        //    ns->in_sched_send_loop[i] = ns->sched_loop_pre_surrogate[i];
-        //}
     }
-    //printf("]\n");
 
-    if (ns->in_sched_recv_loop == 1 && ns->sched_recv_loop_pre_surrogate == 0) {
-        model_net_method_idle_event(0.0, 1, lp);
-    } //else {
-        ns->in_sched_recv_loop = ns->sched_recv_loop_pre_surrogate;
-    //}
-
-    is_surrogate_on = false;
+    model_net_method_idle_event(1.0, 1, lp);
+    ns->in_sched_recv_loop = ns->sched_recv_loop_pre_surrogate;
 }
 
 void model_net_method_call_inner(tw_lp * lp, void (*fun) (void * inner, tw_lp * lp)) {
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 22794e8c..93611e7a 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -602,6 +602,9 @@ struct terminal_state
 
     // Events that will arrive to this terminal
     set<struct packet_id> arrived_here;
+
+    // Variable to save the entire state of the terminal into before switching to surrogate mode. During surrogate-mode, the terminal should not access the state of the network
+    terminal_state * frozen_state;
 };
 
 struct router_state
@@ -2907,17 +2910,74 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(terminal_state * s, tw
             free(start.remote_event_data);
         }
     }
-
-    // TODO: Find out how to schedule an idle event (AND how to remove one when rolling back!)
-
     assert(s->sent_packets_latency.empty());
+
+    // Hide current state and clean current state. Hidding the network information is in principle
+    // the same as freezing the state of the network.
+    assert(s->frozen_state == NULL);
+    terminal_state * frozen_state = (terminal_state*) malloc(sizeof(terminal_state));
+    memcpy(frozen_state, s, sizeof(terminal_state));
+    memset(s, 0, sizeof(terminal_state));
+    for (size_t i = 0; i < CATEGORY_MAX; i++) {
+        s->dragonfly_stats_array[i] = frozen_state->dragonfly_stats_array[i];
+    }
+    s->packet_gen                   = frozen_state->packet_gen;
+    s->total_gen_size               = frozen_state->total_gen_size;
+    s->params                       = frozen_state->params;
+    s->packet_counter               = frozen_state->packet_counter;
+    s->local_congestion_controller  = frozen_state->local_congestion_controller;
+    s->last_in_queue_time           = frozen_state->last_in_queue_time;
+    s->predictor_data               = frozen_state->predictor_data;
+    s->terminal_id                  = frozen_state->terminal_id;
+    s->packet_fin                   = frozen_state->packet_fin;
+    s->finished_packets             = frozen_state->finished_packets;
+    s->data_size_sample             = frozen_state->data_size_sample;
+    s->ross_sample.data_size_sample = frozen_state->ross_sample.data_size_sample;
+    s->data_size_ross_sample        = frozen_state->data_size_ross_sample;
+    s->total_msg_size               = frozen_state->total_msg_size;
+    s->finished_msgs                = frozen_state->finished_msgs;
+    memcpy(&s->arrived_here,         &frozen_state->arrived_here,         sizeof(s->arrived_here));
+    memcpy(&s->zombies,              &frozen_state->zombies,              sizeof(s->zombies));
+    memcpy(&s->sent_packets,         &frozen_state->sent_packets,         sizeof(s->sent_packets));
+    memcpy(&s->sent_packets_latency, &frozen_state->sent_packets_latency, sizeof(s->sent_packets_latency));
+
+    s->frozen_state = frozen_state;
 };
 
 // This function never rollsback because it's called at GVT
 static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw_lp * lp) {
-    (void) s;
     (void) lp;
     //printf("Terminal %d (PID: %d) switching back to high-def at %e\n", s->terminal_id, lp->gid, tw_now(lp));
+
+    // Re-instanciating pre-transition (before surrogate was turned on) terminal state
+    assert(s->frozen_state != NULL);
+    terminal_state * frozen_state = s->frozen_state;
+    for (size_t i = 0; i < CATEGORY_MAX; i++) {
+        frozen_state->dragonfly_stats_array[i] = s->dragonfly_stats_array[i];
+    }
+    frozen_state->packet_gen                   = s->packet_gen;
+    frozen_state->total_gen_size               = s->total_gen_size;
+    frozen_state->params                       = s->params;
+    frozen_state->packet_counter               = s->packet_counter;
+    frozen_state->local_congestion_controller  = s->local_congestion_controller;
+    frozen_state->last_in_queue_time           = s->last_in_queue_time;
+    frozen_state->predictor_data               = s->predictor_data;
+    frozen_state->terminal_id                  = s->terminal_id;
+    frozen_state->packet_fin                   = s->packet_fin;
+    frozen_state->finished_packets             = s->finished_packets;
+    frozen_state->data_size_sample             = s->data_size_sample;
+    frozen_state->ross_sample.data_size_sample = s->ross_sample.data_size_sample;
+    frozen_state->data_size_ross_sample        = s->data_size_ross_sample;
+    frozen_state->total_msg_size               = s->total_msg_size;
+    frozen_state->finished_msgs                = s->finished_msgs;
+    memcpy(&frozen_state->arrived_here,         &s->arrived_here,         sizeof(s->arrived_here));
+    memcpy(&frozen_state->zombies,              &s->zombies,              sizeof(s->zombies));
+    memcpy(&frozen_state->sent_packets,         &s->sent_packets,         sizeof(s->sent_packets));
+    memcpy(&frozen_state->sent_packets_latency, &s->sent_packets_latency, sizeof(s->sent_packets_latency));
+    memcpy(s, frozen_state, sizeof(terminal_state));
+    memset(frozen_state, 0, sizeof(terminal_state));
+    free(frozen_state);
+    assert(s->frozen_state == NULL);
 };
 //
 // ==== END OF Surrogate functions definition ====
@@ -3230,6 +3290,7 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp )
     new (&s->sent_packets_latency) priority_queue<struct packet_end, vector<struct packet_end>, decltype(packet_end_greater_cmp)>();
     new (&s->zombies) set<struct packet_id>();
     new (&s->arrived_here) set<struct packet_id>();
+    s->frozen_state = NULL;
 
     // alloc'ing memory for predictor, calling initiliazer for predictor
     if (terminal_predictor != NULL && terminal_predictor->predictor_data_sz > 0) {
@@ -6138,7 +6199,16 @@ terminal_dally_event( terminal_state * s,
     assert(msg->magic == terminal_magic_num);
     //printf("LPID: %llu Event type %d processed at %f\n", lp->gid, msg->type, tw_now(lp));
 
-    rc_stack_gc(lp, s->st);
+    if (is_surrogate_on && FREEZE_NETWORK_STATE) {
+        // This event will be reversed. It comes from the past, it has been forwarded to the future
+        // by the surrogate freezing the network procedure and should not be taken into account
+        if (! (msg->type == T_GENERATE || msg->type == T_ARRIVE_PREDICTED || msg->type == T_NOTIFY)) {
+            bf->c20 = 1;
+            return;
+        }
+    } else {
+        rc_stack_gc(lp, s->st);
+    }
     switch(msg->type)
         {
         case T_GENERATE:
@@ -6234,14 +6304,21 @@ static void router_dally_event(router_state * s, tw_bf * bf, terminal_dally_mess
 /* Reverse computation handler for a terminal event */
 static void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) 
 {
+    s->rev_events++;
+    s->ross_sample.rev_events++;
+
+    // In case the event was skipped above, skip now
+    if (bf->c20) {
+        bf->c20 = 0;
+        return;
+    }
+
     for(int i = 0; i < msg->num_rngs; i++)
         tw_rand_reverse_unif(lp->rng);
 
     for(int i = 0; i < msg->num_cll; i++)
         codes_local_latency_reverse(lp);
 
-    s->rev_events++;
-    s->ross_sample.rev_events++;
     switch((enum event_t) msg->type)
     {
         case T_GENERATE:
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index a5c84c55..dfd684e9 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -337,6 +337,7 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_event_sig gvt) {
     }
 
     shift_events_to_future_pe(pe, gvt);
+    model_net_method_switch_to_surrogate();
 
     // Going through all LPs in PE and running their specific functions
     for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {
@@ -355,7 +356,7 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_event_sig gvt) {
 
         if (lp_type_switch && lp_type_switch->highdef_to_surrogate) {
             if (lp_type_switch->is_modelnet) {
-                model_net_method_switch_to_surrogate(lp);
+                model_net_method_switch_to_surrogate_lp(lp);
                 model_net_method_call_inner(lp, lp_type_switch->highdef_to_surrogate);
             } else {
                 lp_type_switch->highdef_to_surrogate(lp->cur_state, lp);
@@ -367,6 +368,7 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_event_sig gvt) {
 
 static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_event_sig gvt) {
     (void) pe;
+    model_net_method_switch_to_highdef();
 
     // Going through all LPs in PE and running their specific functions
     for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {
@@ -386,7 +388,7 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_event_sig gvt) {
 
         if (lp_type_switch && lp_type_switch->surrogate_to_highdef) {
             if (lp_type_switch->is_modelnet) {
-                model_net_method_switch_to_highdef(lp);
+                model_net_method_switch_to_highdef_lp(lp);
                 model_net_method_call_inner(lp, lp_type_switch->surrogate_to_highdef);
             } else {
                 lp_type_switch->surrogate_to_highdef(lp->cur_state, lp);

From 04103ab6a4d448f0625bef747f3531647083e814 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 22 Feb 2023 17:30:08 -0500
Subject: [PATCH 015/188] Sequential implementation of switching mechanism
 completed

---
 codes/model-net-lp.h                       |  3 +-
 doc/example/tutorial-synthetic-ping-pong.c | 19 ++++++----
 src/networks/model-net/core/model-net-lp.c | 34 +++++++++---------
 src/networks/model-net/dragonfly-dally.C   | 40 ++++++++++++++++++----
 src/util/surrogate.c                       |  2 +-
 5 files changed, 64 insertions(+), 34 deletions(-)

diff --git a/codes/model-net-lp.h b/codes/model-net-lp.h
index 46b01bae..6c235cc9 100644
--- a/codes/model-net-lp.h
+++ b/codes/model-net-lp.h
@@ -152,7 +152,6 @@ typedef struct model_net_base_msg {
     model_net_request req;
     int is_from_remote;
     int isQueueReq;
-    int created_during_surrogate; // if the MN_BASE_SCHED_NEXT event was created in surrogate mode, this variable contains the surrogate iteration at which it was created; otherwise it's -1
     tw_stime save_ts;
     // parameters to pass to new messages (via model_net_set_msg_params)
     // TODO: make this a union for multiple types of parameters
@@ -180,6 +179,8 @@ typedef struct model_net_wrap_msg {
     } msg;
 } model_net_wrap_msg;
 
+bool model_net_is_this_base_event(model_net_wrap_msg *);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/doc/example/tutorial-synthetic-ping-pong.c b/doc/example/tutorial-synthetic-ping-pong.c
index 070624e4..88d4f150 100644
--- a/doc/example/tutorial-synthetic-ping-pong.c
+++ b/doc/example/tutorial-synthetic-ping-pong.c
@@ -18,6 +18,7 @@ static unsigned int lp_io_use_suffix = 0;
 static int do_lp_io = 0;
 
 static int num_msgs = 20;
+static int num_initial_msgs = 1;
 
 typedef struct svr_msg svr_msg;
 typedef struct svr_state svr_state;
@@ -81,6 +82,7 @@ const tw_optdef app_opt [] =
 {
         TWOPT_GROUP("Model net synthetic traffic " ),
     	TWOPT_UINT("num_messages", num_msgs, "Number of PING messages to be generated per terminal "),
+    	TWOPT_UINT("num_initial_messages", num_initial_msgs, "Number of PING messages to be injected initially at the start (larger = more congestion)"),
     	TWOPT_UINT("payload_sz",PAYLOAD_SZ, "size of the message being sent "),
         TWOPT_CHAR("lp-io-dir", lp_io_dir, "Where to place io output (unspecified -> no output"),
         TWOPT_UINT("lp-io-use-suffix", lp_io_use_suffix, "Whether to append uniq suffix to lp-io directory (default 0)"),
@@ -116,12 +118,16 @@ static void svr_init(svr_state * s, tw_lp * lp)
     //the lookahead value is a value required for conservative mode execution to work, it prevents scheduling a new event within the lookahead window
     tw_stime kickoff_time = g_tw_lookahead + (tw_rand_unif(lp->rng) * .0001);
 
-    tw_event *e;
-    svr_msg *m;
-    e = tw_event_new(lp->gid, kickoff_time, lp); //ROSS method to create a new event
-    m = tw_event_data(e); //Gives you a pointer to the data encoded within event e
-    m->svr_event_type = KICKOFF; //Set the event type so we can know how to classify the event when received
-    tw_event_send(e); //ROSS method to send off the event e with the encoded data in m
+    for (int i = 1; i <= num_initial_msgs && i <= num_msgs; i++) {
+        tw_event *e;
+        svr_msg *m;
+        e = tw_event_new(lp->gid, kickoff_time * i, lp); //ROSS method to create a new event
+        m = tw_event_data(e); //Gives you a pointer to the data encoded within event e
+        m->svr_event_type = KICKOFF; //Set the event type so we can know how to classify the event when received
+        tw_event_send(e); //ROSS method to send off the event e with the encoded data in m
+    }
+
+    s->start_ts = kickoff_time; // the time when we're starting this LP's work is when the first ping is generated
 }
 
 static void handle_kickoff_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
@@ -131,7 +137,6 @@ static void handle_kickoff_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp *
     //if (lp->gid != 0) {
     //    return;
     //}
-    s->start_ts = tw_now(lp); //the time when we're starting this LP's work is NOW
 
     svr_msg ping_msg;
 
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 1124e17c..5031cd35 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -821,7 +821,6 @@ void handle_new_msg(
 #if DEBUG
         printf("%llu handle_shed_next() from handle_new_msg()\n",LLU(tw_now(lp)));
 #endif
-        m->msg.m_base.created_during_surrogate = is_surrogate_on ? num_surrogate : -1;
         handle_sched_next(ns, b, m, lp);
         assert(*in_sched_loop);
     }
@@ -869,15 +868,6 @@ void handle_sched_next(
 #if DEBUG
     printf("%llu handle sched_next function\n",LLU(tw_now(lp)));
 #endif
-    if (FREEZE_NETWORK_STATE) {
-        // The event should not be processed outside of the surrogate environment it was created, and it must be processed if it was generated during vanilla high-def simulation mode
-        bool const from_same_surrogate_instance = is_surrogate_on && m->msg.m_base.created_during_surrogate == num_surrogate;
-        bool const highdef_created_during_highdef = !is_surrogate_on && m->msg.m_base.created_during_surrogate == -1;
-        if (!from_same_surrogate_instance && !highdef_created_during_highdef) {
-            b->c12 = 1;
-            return;
-        }
-    }
 
     tw_stime poffset;
     model_net_request *r = &m->msg.m_base.req;
@@ -1032,7 +1022,6 @@ void model_net_method_idle_event2(tw_stime offset_ts, int is_recv_queue,
     msg_set_header(model_net_base_magic, MN_BASE_SCHED_NEXT, lp->gid,
             &m_wrap->h);
     m_wrap->msg.m_base.is_from_remote = is_recv_queue;
-    m_wrap->msg.m_base.created_during_surrogate = is_surrogate_on ? num_surrogate : -1;
     r_wrap->queue_offset = queue_offset;
     tw_event_send(e);
 }
@@ -1124,17 +1113,18 @@ void model_net_method_switch_to_surrogate_lp(tw_lp * lp) {
         //printf("%d ", ns->in_sched_send_loop[i]);
         ns->sched_loop_pre_surrogate[i] = ns->in_sched_send_loop[i];
         // scheduling an idle event to prevent getting stuck in the middle of a scheduling loop
-        //if (ns->sched_loop_pre_surrogate[i]) <- this is too restrictive, although the right idea.
+        if (ns->sched_loop_pre_surrogate[i]) { // <- this can be more finely tuned
         // TODO: change zero-offset event for something a bit more sensible
-        model_net_method_idle_event(1.0, 0, lp);
-        //}
+            model_net_method_idle_event(1.0, 0, lp);
+        }
         ns->in_sched_send_loop[i] = 0;
     }
     //printf("]\n");
 
     ns->sched_recv_loop_pre_surrogate = ns->in_sched_recv_loop;
-    //if (ns->in_sched_recv_loop)
-    model_net_method_idle_event(1.0, 1, lp);
+    if (ns->in_sched_recv_loop) {
+        model_net_method_idle_event(1.0, 1, lp);
+    }
     ns->in_sched_recv_loop = 0;
 }
 
@@ -1146,11 +1136,15 @@ void model_net_method_switch_to_highdef_lp(tw_lp * lp) {
         //printf("%d ", ns->in_sched_send_loop[i]);
         // We have to duplicate an idle event that was produced in surrogate-mode, but not yet processed by the time we switch to high-def again, if that event was in the middle of the loop (asking for the next packet to inject) and in no other case
         // TODO: Not all LPs need an event like this!
-        model_net_method_idle_event(1.0, 0, lp);
+        if (ns->sched_loop_pre_surrogate[i] == 1 && ns->in_sched_send_loop[i] == 0) {
+            model_net_method_idle_event(1.0, 0, lp);
+        }
         ns->in_sched_send_loop[i] = ns->sched_loop_pre_surrogate[i];
     }
 
-    model_net_method_idle_event(1.0, 1, lp);
+    if (ns->sched_recv_loop_pre_surrogate == 1 && ns->in_sched_recv_loop == 0) {
+        model_net_method_idle_event(1.0, 1, lp);
+    }
     ns->in_sched_recv_loop = ns->sched_recv_loop_pre_surrogate;
 }
 
@@ -1160,6 +1154,10 @@ void model_net_method_call_inner(tw_lp * lp, void (*fun) (void * inner, tw_lp *
     fun(ns->sub_state, lp);
 }
 
+bool model_net_is_this_base_event(model_net_wrap_msg * msg) {
+    return msg->h.event_type == MN_BASE_NEW_MSG || msg->h.event_type == MN_BASE_SCHED_NEXT;
+}
+
 /*
  * Local variables:
  *  c-indent-level: 4
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 93611e7a..6984b958 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -96,9 +96,9 @@ static long num_remote_packets = 0;
 
 static long global_stalled_chunk_counter = 0;
 
-#define OUTPUT_SNAPSHOT 0
-const static int num_snapshots = 0;
-tw_stime snapshot_times[num_snapshots] = {};
+#define OUTPUT_SNAPSHOT 1
+const static int num_snapshots = 3;
+tw_stime snapshot_times[num_snapshots] = {100e3, 475e3, 1990e3};
 char snapshot_filename[128];
 
 /* time in nanosecs */
@@ -2945,6 +2945,7 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(terminal_state * s, tw
 };
 
 // This function never rollsback because it's called at GVT
+// Note: this function CANNOT generate any events, because it is to be used in `dragonfly_dally_terminal_final`
 static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw_lp * lp) {
     (void) lp;
     //printf("Terminal %d (PID: %d) switching back to high-def at %e\n", s->terminal_id, lp->gid, tw_now(lp));
@@ -2995,11 +2996,33 @@ static void router_send_snapshot_events(router_state *s, tw_lp *lp)
     {
         if (OUTPUT_SNAPSHOT)
         {
-            char snapshot_line[1024];
-            int written;
+            // Finding size of snapshot line
+            int line_sz = 28;  // This is the size of '#Time of snapshot,Router ID,'
+            for (int i = 0, j = 0; i < s->params->radix; ) {
+                int sz = snprintf(NULL, 0, "Port %d VC %d,", i, j);
+                line_sz += sz;
+
+                j++;
+                if(j >= s->params->num_vcs) { i++; j = 0; }
+            }
 
-            written = sprintf(snapshot_line, "#Time of snapshot, Router ID, Port 0 VC 0, Port 0 VC 1 ... Port N VC M\n#Radix = %d  Num VCs = %d\n",s->params->radix, s->params->num_vcs);
-            lp_io_write(lp->gid, snapshot_filename, written, snapshot_line);
+            // Creating snapshot line
+            char snapshot_line[line_sz + 1];  // extra space for '\0'
+            int offset = 28;
+            snprintf(snapshot_line, sizeof(snapshot_line), "#Time of snapshot,Router ID,");
+            for (int i = 0, j = 0; i < s->params->radix; ) {
+                int sz = snprintf(snapshot_line + offset, sizeof(snapshot_line) - offset, "Port %d VC %d,", i, j);
+                offset += sz;
+
+                j++;
+                if(j >= s->params->num_vcs) { i++; j = 0; }
+            }
+            assert(line_sz == offset);
+            snapshot_line[line_sz - 1] = '\n';  // replacing last ',' for '\n'
+            snapshot_line[line_sz] = '\0';  // just in case it's treated as a null terminating string
+
+            // "Saving" snapshot line
+            lp_io_write(lp->gid, snapshot_filename, line_sz, snapshot_line);
         }
     }
 
@@ -5108,6 +5131,9 @@ static void terminal_buf_update(terminal_state * s,
 static void dragonfly_dally_terminal_final( terminal_state * s, 
       tw_lp * lp )
 {
+    if (is_surrogate_on) {
+        dragonfly_dally_terminal_surrogate_to_highdef(s, lp);
+    }
     // printf("terminal id %d\n",s->terminal_id);
     dragonfly_total_time += s->total_time; //increment the PE level time counter
     
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index dfd684e9..018e469f 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -285,7 +285,7 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_event_sig gvt) {
         frozen_events = frozen_events->prev;
 
         //printf("%c", tw_event_sig_compare(gvt, prev_event->sig) < 0 ? '.' : 'x');
-        if(tw_event_sig_compare(prev_event->sig, gvt) > 0) {
+        if(tw_event_sig_compare(prev_event->sig, gvt) > 0 && !model_net_is_this_base_event(tw_event_data(prev_event))) {
             assert(prev_event->recv_ts == prev_event->sig.recv_ts);
             prev_event->recv_ts += switch_offset;
             prev_event->sig.recv_ts = prev_event->recv_ts;

From baa787ad69acc465890d78dbae50f60f9d5b196b Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 22 Feb 2023 18:19:17 -0500
Subject: [PATCH 016/188] Loading router buffer occupancies via the config file

---
 doc/example/tutorial-ping-pong-surrogate.conf |  4 +-
 doc/example/tutorial-ping-pong.conf           |  2 +
 doc/example/tutorial-ping-pong.conf.in        |  2 +
 src/networks/model-net/dragonfly-dally.C      | 38 ++++++++++++++++++-
 4 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/doc/example/tutorial-ping-pong-surrogate.conf b/doc/example/tutorial-ping-pong-surrogate.conf
index 360f2294..d1a2937c 100644
--- a/doc/example/tutorial-ping-pong-surrogate.conf
+++ b/doc/example/tutorial-ping-pong-surrogate.conf
@@ -1,6 +1,6 @@
 # Run this example with:
 # > cd path-to-codes/build
-# > mpirun -np 2 doc/example/tutorial-synthetic-ping-pong --synch=3 --num_messages=10000 -- ../doc/example/tutorial-ping-pong-surrogate.conf
+# > mpirun -np 2 doc/example/tutorial-synthetic-ping-pong --synch=3 --num_messages=10000 --lp-io-dir=codes-output -- ../doc/example/tutorial-ping-pong-surrogate.conf
 LPGROUPS
 {
    MODELNET_GRP
@@ -55,6 +55,8 @@ PARAMS
    routing="prog-adaptive";
 # folder path to store packet latency from terminal to terminal, if no value is given it won't save anything
    save_packet_latency_path="packet-latency-trace/";
+# router buffer occupancy snapshots
+   router_buffer_snapshots=( "50e4", "60e4" );
 }
 SURROGATE {
 # determines the director switching from surrogate to high-def simulation strategy
diff --git a/doc/example/tutorial-ping-pong.conf b/doc/example/tutorial-ping-pong.conf
index 8ac8a9dd..3aabd871 100644
--- a/doc/example/tutorial-ping-pong.conf
+++ b/doc/example/tutorial-ping-pong.conf
@@ -50,4 +50,6 @@ PARAMS
    inter-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally-72-inter";
 # routing protocol to be used
    routing="prog-adaptive";
+# router buffer occupancy snapshots
+   router_buffer_snapshots=( "50e4", "60e4" );
 }
diff --git a/doc/example/tutorial-ping-pong.conf.in b/doc/example/tutorial-ping-pong.conf.in
index d8757459..40272ecd 100644
--- a/doc/example/tutorial-ping-pong.conf.in
+++ b/doc/example/tutorial-ping-pong.conf.in
@@ -50,4 +50,6 @@ PARAMS
    inter-group-connections="@abs_srcdir@/../../src/network-workloads/conf/dragonfly-dally/dfdally-72-inter";
 # routing protocol to be used
    routing="prog-adaptive";
+# router buffer occupancy snapshots
+   router_buffer_snapshots=( "50e4", "60e4" );
 }
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 6984b958..a0fb5810 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -97,8 +97,8 @@ static long num_remote_packets = 0;
 static long global_stalled_chunk_counter = 0;
 
 #define OUTPUT_SNAPSHOT 1
-const static int num_snapshots = 3;
-tw_stime snapshot_times[num_snapshots] = {100e3, 475e3, 1990e3};
+static int num_snapshots = 0;
+tw_stime * snapshot_times;
 char snapshot_filename[128];
 
 /* time in nanosecs */
@@ -2232,6 +2232,40 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
     }
     // END CONGESTION CONTROL
 
+    // Router buffer occupancy configuration
+    if (OUTPUT_SNAPSHOT) {
+        char **timestamps;
+        size_t len;
+        rc = configuration_get_multivalue(&config, "PARAMS", "router_buffer_snapshots", anno, &timestamps, &len);
+        assert((len > 0) == (timestamps != NULL));
+        if (rc) {  // counter-intuitively, configuration_get_multivalue returns 1 if it found the key!
+            num_snapshots = len;
+            snapshot_times = (tw_stime*) malloc(len * sizeof(tw_stime));
+
+            for (size_t i = 0; i < len; i++) {
+                errno = 0;
+                snapshot_times[i] = strtod(timestamps[i], NULL);
+                if (errno == ERANGE || errno == EILSEQ){
+                    tw_error(TW_LOC, "Sequence `%s' could not be succesfully interpreted as a _double_.", timestamps[i]);
+                }
+            }
+
+            if(!myRank) {
+                fprintf(stderr, "\nRouter snaptshots activated for timestamps = ");
+                for (size_t i = 0; i < len; i++) {
+                    fprintf(stderr, "%g%s", snapshot_times[i], i == len-1 ? "" : ", ");
+                }
+                fprintf(stderr, "\n");
+            }
+
+            // freeing some memory
+            for (size_t i = 0; i < len; i++) {
+                free(timestamps[i]);
+            }
+            free(timestamps);
+        }
+    }
+
     // Packet latency path to store configuration
     char packet_latency_path[MAX_NAME_LENGTH];
     packet_latency_path[0] = '\0';

From e152bc683596ecb6a551397dcf026839b2e031fa Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 22 Feb 2023 22:54:40 -0500
Subject: [PATCH 017/188] Fixing implementation of switching mechanism (mostly
 in ROSS)

ROSS version 557e0d4
---
 codes/model-net-lp.h                       |  2 -
 doc/example/tutorial-ping-pong.conf.in     |  2 +-
 src/networks/model-net/core/model-net-lp.c | 20 -----
 src/networks/model-net/dragonfly-dally.C   |  4 +-
 src/util/surrogate.c                       | 88 ++++++++++++++--------
 5 files changed, 61 insertions(+), 55 deletions(-)

diff --git a/codes/model-net-lp.h b/codes/model-net-lp.h
index 6c235cc9..6a2b548a 100644
--- a/codes/model-net-lp.h
+++ b/codes/model-net-lp.h
@@ -121,8 +121,6 @@ tw_event* model_net_method_congestion_event(tw_lpid dest_gid,
 // Functions to call when switching from highdef to surrogate, and surrogate to highdef
 void model_net_method_switch_to_surrogate_lp(tw_lp * lp);
 void model_net_method_switch_to_highdef_lp(tw_lp * lp);
-void model_net_method_switch_to_surrogate(void);
-void model_net_method_switch_to_highdef(void);
 
 // It will call the function (pointer) on the internal structure/network model.
 // The lp parameter has to be a model-net lp. The function pointer has to coincide with the underlying subtype
diff --git a/doc/example/tutorial-ping-pong.conf.in b/doc/example/tutorial-ping-pong.conf.in
index 40272ecd..f44acfb7 100644
--- a/doc/example/tutorial-ping-pong.conf.in
+++ b/doc/example/tutorial-ping-pong.conf.in
@@ -19,7 +19,7 @@ PARAMS
    modelnet_scheduler="fcfs";
 # chunk size in the network (when chunk size = packet size, packets will not be
 # divided into chunks)
-   chunk_size="4096";
+   chunk_size="64";
 # modelnet_scheduler="round-robin";
 # number of routers in group
    num_routers="4";
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 5031cd35..7172c0ee 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -12,7 +12,6 @@
 #include "codes/model-net-sched.h"
 #include "codes/codes_mapping.h"
 #include "codes/jenkins-hash.h"
-#include "codes/surrogate.h"
 
 #define MN_NAME "model_net_base"
 
@@ -40,8 +39,6 @@ typedef struct model_net_base_params_s {
 static int                       num_params = 0;
 static const char              * annos[CONFIGURATION_MAX_ANNOS];
 static model_net_base_params     all_params[CONFIGURATION_MAX_ANNOS];
-static bool is_surrogate_on = false;
-static int num_surrogate = 0;
 
 static tw_stime mn_sample_interval = 0.0;
 static tw_stime mn_sample_end = 0.0;
@@ -868,7 +865,6 @@ void handle_sched_next(
 #if DEBUG
     printf("%llu handle sched_next function\n",LLU(tw_now(lp)));
 #endif
-
     tw_stime poffset;
     model_net_request *r = &m->msg.m_base.req;
     int is_from_remote = m->msg.m_base.is_from_remote;
@@ -908,13 +904,6 @@ void handle_sched_next_rc(
         tw_bf *b,
         model_net_wrap_msg * m,
         tw_lp * lp){
-
-    // Handling event was skipped
-    if (b->c12) {
-        b->c12 = 0;
-        return;
-    }
-
     model_net_request *r = &m->msg.m_base.req;
     int is_from_remote = m->msg.m_base.is_from_remote;
     model_net_sched * ss = is_from_remote ? ns->sched_recv : ns->sched_send[r->queue_offset];
@@ -1096,15 +1085,6 @@ tw_event* model_net_method_congestion_event(tw_lpid dest_gid,
 
 }
 
-void model_net_method_switch_to_surrogate(void) {
-    is_surrogate_on = true;
-    num_surrogate++;
-}
-
-void model_net_method_switch_to_highdef(void) {
-    is_surrogate_on = false;
-}
-
 void model_net_method_switch_to_surrogate_lp(tw_lp * lp) {
     model_net_base_state * const ns = (model_net_base_state*) lp->cur_state;
 
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index a0fb5810..70517752 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -5165,7 +5165,7 @@ static void terminal_buf_update(terminal_state * s,
 static void dragonfly_dally_terminal_final( terminal_state * s, 
       tw_lp * lp )
 {
-    if (is_surrogate_on) {
+    if (FREEZE_NETWORK_STATE && is_surrogate_on) {
         dragonfly_dally_terminal_surrogate_to_highdef(s, lp);
     }
     // printf("terminal id %d\n",s->terminal_id);
@@ -6264,6 +6264,7 @@ terminal_dally_event( terminal_state * s,
         // by the surrogate freezing the network procedure and should not be taken into account
         if (! (msg->type == T_GENERATE || msg->type == T_ARRIVE_PREDICTED || msg->type == T_NOTIFY)) {
             bf->c20 = 1;
+            printf("This shouldn't happen! :( (time stamp = %e)\n", tw_now(lp));
             return;
         }
     } else {
@@ -6370,6 +6371,7 @@ static void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, term
     // In case the event was skipped above, skip now
     if (bf->c20) {
         bf->c20 = 0;
+        printf("Has been rolledback! :)\n");
         return;
     }
 
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index 018e469f..d57352c3 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -151,30 +151,33 @@ static bool is_workload_event(tw_event * event) {
 }
 
 
-static void offset_future_events_in_causality_list(double switch_offset, tw_event_sig gvt) {
-    int events_processed = 0;
-    int events_modified = 0;
-    for (unsigned int i = 0; i < g_tw_nkp; i++) {
-        tw_kp * const this_kp = g_tw_kp[i];
-
-        // All events in pevent_q are sent into the future
-        assert((this_kp->pevent_q.tail == NULL) == (this_kp->pevent_q.size == 0));
-        tw_event * cur_event = this_kp->pevent_q.tail;
-        while (cur_event) {
-            if (!is_workload_event(cur_event) && tw_event_sig_compare(cur_event->sig, gvt) > 0) {
-                cur_event->recv_ts += switch_offset;
-                cur_event->sig.recv_ts = cur_event->recv_ts;
-                events_modified++;
-            }
-
-            cur_event = cur_event->prev;
-            events_processed++;
-        }
-    }
-    if (DEBUG_DIRECTOR > 1 && g_tw_mynode == 0) {
-        printf("PE %lu: Total events from causality modified %d (from total processed %d)\n", g_tw_mynode, events_modified, events_processed);
-    }
-}
+//static void offset_future_events_in_causality_list(double switch_offset, tw_event_sig gvt) {
+//    (void) switch_offset;
+//    (void) gvt;
+//    int events_processed = 0;
+//    int events_modified = 0;
+//    for (unsigned int i = 0; i < g_tw_nkp; i++) {
+//        tw_kp * const this_kp = g_tw_kp[i];
+//
+//        //assert(this_kp->pevent_q.size == 0);
+//        // All events in pevent_q are sent into the future
+//        assert((this_kp->pevent_q.tail == NULL) == (this_kp->pevent_q.size == 0));
+//        tw_event * cur_event = this_kp->pevent_q.tail;
+//        while (cur_event) {
+//            if (!is_workload_event(cur_event) && tw_event_sig_compare(cur_event->sig, gvt) > 0) {
+//                cur_event->recv_ts += switch_offset;
+//                cur_event->sig.recv_ts = cur_event->recv_ts;
+//                events_modified++;
+//            }
+//
+//            cur_event = cur_event->prev;
+//            events_processed++;
+//        }
+//    }
+//    if (DEBUG_DIRECTOR > 1 && g_tw_mynode == 0) {
+//        printf("PE %lu: Total events from causality modified %d (from total processed %d)\n", g_tw_mynode, events_modified, events_processed);
+//    }
+//}
 
 
 static struct lp_types_switch const * get_type_switch(char const * const name) {
@@ -198,11 +201,28 @@ static inline bool does_any_pe(bool val) {
 }
 
 
-static void rollback_and_cancel_events_pe(tw_pe * pe) {
+static tw_event_sig find_sig_smallest_larger_than(double switch_, tw_kp * kp, tw_event_sig gvt) {
+    //printf("Just testing, I'm here! size=%d\n", kp->pevent_q.size);
+    tw_event * cur_event = kp->pevent_q.tail;
+    while (cur_event) {
+        //printf("Current timestamp to rollback (%e) and gvt (%e)\n", cur_event->sig.recv_ts, gvt.recv_ts);
+        if (tw_event_sig_compare(cur_event->sig, gvt) < 0 && switch_ <= cur_event->sig.recv_ts) {
+            gvt = cur_event->sig;
+        }
+        cur_event = cur_event->prev;
+    }
+    return gvt;
+}
+
+
+static void rollback_and_cancel_events_pe(tw_pe * pe, tw_event_sig gvt) {
     // Backtracking the simulation to GVT
+    double const switch_ = switch_at.time_stampts[switch_at.current_i];
     for (unsigned int i = 0; i < g_tw_nkp; i++) {
-        tw_kp_rollback_to_sig(g_tw_kp[i], pe->GVT_sig);
+        tw_event_sig const smallest = find_sig_smallest_larger_than(switch_, g_tw_kp[i], gvt);
+        tw_kp_rollback_to_sig(g_tw_kp[i], smallest);
     }
+    assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
 
     // Making sure that everything gets cleaned up properly (AVL tree should be empty by the end)
     do {
@@ -285,7 +305,8 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_event_sig gvt) {
         frozen_events = frozen_events->prev;
 
         //printf("%c", tw_event_sig_compare(gvt, prev_event->sig) < 0 ? '.' : 'x');
-        if(tw_event_sig_compare(prev_event->sig, gvt) > 0 && !model_net_is_this_base_event(tw_event_data(prev_event))) {
+        assert(tw_event_sig_compare(prev_event->sig, gvt) >= 0);
+        if(!model_net_is_this_base_event(tw_event_data(prev_event))) {
             assert(prev_event->recv_ts == prev_event->sig.recv_ts);
             prev_event->recv_ts += switch_offset;
             prev_event->sig.recv_ts = prev_event->recv_ts;
@@ -314,7 +335,7 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_event_sig gvt) {
     }
 
     // shifting time stamps of events in causality list (one list per KP)
-    offset_future_events_in_causality_list(switch_offset, gvt);
+    // offset_future_events_in_causality_list(switch_offset, gvt);
 }
 
 
@@ -331,13 +352,12 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_event_sig gvt) {
 
     if (g_tw_synchronization_protocol == OPTIMISTIC) {
         assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
-        rollback_and_cancel_events_pe(pe);
+        rollback_and_cancel_events_pe(pe, gvt);
         //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0);
         assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
     }
 
     shift_events_to_future_pe(pe, gvt);
-    model_net_method_switch_to_surrogate();
 
     // Going through all LPs in PE and running their specific functions
     for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {
@@ -368,7 +388,13 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_event_sig gvt) {
 
 static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_event_sig gvt) {
     (void) pe;
-    model_net_method_switch_to_highdef();
+
+    if (g_tw_synchronization_protocol == OPTIMISTIC) {
+        assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
+        rollback_and_cancel_events_pe(pe, gvt);
+        //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0);
+        assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
+    }
 
     // Going through all LPs in PE and running their specific functions
     for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {

From b151224334181da9d670c6c24220ae5ac80a4b5f Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 23 Feb 2023 15:22:15 -0500
Subject: [PATCH 018/188] Selection of events to freeze is now done by the
 topology/network model

---
 codes/model-net-lp.h                       |  22 ++-
 codes/surrogate.h                          |   6 +-
 src/networks/model-net/core/model-net-lp.c |  19 ++-
 src/networks/model-net/dragonfly-dally.C   |  39 +++++-
 src/util/surrogate.c                       | 155 +++++++++------------
 5 files changed, 137 insertions(+), 104 deletions(-)

diff --git a/codes/model-net-lp.h b/codes/model-net-lp.h
index 6a2b548a..41db732a 100644
--- a/codes/model-net-lp.h
+++ b/codes/model-net-lp.h
@@ -131,18 +131,18 @@ void model_net_method_call_inner(tw_lp * lp, void (*) (void * inner, tw_lp * lp)
 /// use them
 
 enum model_net_base_event_type {
-    MN_BASE_NEW_MSG,
+    MN_BASE_NEW_MSG = 1,
     // schedule next packet
-    MN_BASE_SCHED_NEXT,
+    MN_BASE_SCHED_NEXT = 2,
     // gather a sample from the underlying model
-    MN_BASE_SAMPLE,
+    MN_BASE_SAMPLE = 4,
     // message goes directly down to topology-specific event handler
-    MN_BASE_PASS,
+    MN_BASE_PASS = 8,
     /* message goes directly to topology-specific event handler for ending the simulation
        usefull if there is an infinite heartbeat pattern */
-    MN_BASE_END_NOTIF,
+    MN_BASE_END_NOTIF = 16,
     // message calls congestion request method on topology specific handler
-    MN_CONGESTION_EVENT
+    MN_CONGESTION_EVENT = 32
 };
 
 typedef struct model_net_base_msg {
@@ -177,7 +177,15 @@ typedef struct model_net_wrap_msg {
     } msg;
 } model_net_wrap_msg;
 
-bool model_net_is_this_base_event(model_net_wrap_msg *);
+typedef bool (*should_msg_be_frozen_f) (void*); // topology-specific should it be frozen question
+
+// Determines if given event should be frozen. It will return true for events of a type contained in `freeze_types`, it will optionally call the topology-specific `should_freeze_question` to check if the event is to be frozen (active only if MN_BASE_PASS is not contained in `freeze_types`)
+bool model_net_should_event_be_frozen(
+        tw_lp * lp,
+        model_net_wrap_msg * msg,  // message to check if has to be frozen
+        int freeze_types,  // events of type "contained" in this will be frozen. An example is the "enum" `MN_BASE_SAMPLE | MN_CONGESTION_EVENT | MN_BASE_END_NOTIF` which will freeze events of those three types and will check on the supplied function below whether the internal model decides to freeze or not
+        should_msg_be_frozen_f should_freeze_question  // this function will be called if the type of the message is MN_BASE_PASS and it hasn't been indicated above that it will be frozen. If NULL and MN_BASE_PASS has not being indicated above, then it won't be frozen
+);
 
 #ifdef __cplusplus
 }
diff --git a/codes/surrogate.h b/codes/surrogate.h
index afbb17bd..87eb43d3 100644
--- a/codes/surrogate.h
+++ b/codes/surrogate.h
@@ -84,14 +84,14 @@ struct director_data {
  */
 
 typedef void (*model_switch_f) (void * data, tw_lp * lp); // Switches back and forth from surrogate mode as defined by network model (e.g, by dragonfly-dally.C)
-typedef void (*model_ask_if_freeze_f) (void * data_model, void * data_model_net, tw_lp * lp, bool * ret); // Determines whether the event should be "frozen" or should be allowed to run during surrogate-mode
+typedef bool (*model_ask_if_freeze_f) (tw_lp * lp, tw_event * event); // Determines whether the event should be "frozen" or should be allowed to run during surrogate-mode
 
 struct lp_types_switch {
     char lpname[MAX_NAME_LENGTH];
-    bool is_modelnet;
+    bool trigger_idle_modelnet;  // Trigger idle events for model-net (prevents a model to be stuck in a schedule loop if it is to process packets during surrogate-mode). If this is true and the lpname does not start with 'modelnet_', the behaviour is undefined
     model_switch_f        highdef_to_surrogate;
     model_switch_f        surrogate_to_highdef;
-    model_ask_if_freeze_f should_event_be_frozen;
+    model_ask_if_freeze_f should_event_be_frozen;  // NULL means event from LP type shouldn't be frozen
 };
 
 struct surrogate_config {
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 7172c0ee..61aee8db 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -1134,8 +1134,23 @@ void model_net_method_call_inner(tw_lp * lp, void (*fun) (void * inner, tw_lp *
     fun(ns->sub_state, lp);
 }
 
-bool model_net_is_this_base_event(model_net_wrap_msg * msg) {
-    return msg->h.event_type == MN_BASE_NEW_MSG || msg->h.event_type == MN_BASE_SCHED_NEXT;
+bool model_net_should_event_be_frozen(
+        tw_lp * lp,
+        model_net_wrap_msg * msg,
+        int freeze_types,
+        should_msg_be_frozen_f should_freeze_question
+) {
+    model_net_base_state * const ns = (model_net_base_state*) lp->cur_state;
+
+    if (msg->h.event_type & freeze_types) { // Finding out whether current event type is one of freeze types
+        return true;
+    } else if (msg->h.event_type & MN_BASE_PASS) { // pass down to topology-specific event handler
+        if (should_freeze_question) {
+            void * const sub_msg = ((char*)msg)+msg_offsets[ns->net_id];
+            return should_freeze_question(sub_msg);
+        }
+    }
+    return false;
 }
 
 /*
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 70517752..4791d9fe 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -197,6 +197,8 @@ static void switch_surrogate(void);
 static bool is_surrogate_on_fun(void);
 static void dragonfly_dally_terminal_highdef_to_surrogate(terminal_state * s, tw_lp * lp);
 static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw_lp * lp);
+static bool dragonfly_dally_terminal_should_event_be_frozen(tw_lp * lp, tw_event * event);
+static bool dragonfly_dally_router_should_event_be_frozen(tw_lp * lp, tw_event * event);
 //
 // ==== END OF Parameters to tune surrogate mode ====
 
@@ -2286,13 +2288,15 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
             .n_lp_types = 2,
             .lp_types = {
                 {.lpname = "modelnet_dragonfly_dally",
-                 .is_modelnet = true,
+                 .trigger_idle_modelnet = true,
                  .highdef_to_surrogate = (model_switch_f) dragonfly_dally_terminal_highdef_to_surrogate,
-                 .surrogate_to_highdef = (model_switch_f) dragonfly_dally_terminal_surrogate_to_highdef},
+                 .surrogate_to_highdef = (model_switch_f) dragonfly_dally_terminal_surrogate_to_highdef,
+                 .should_event_be_frozen = dragonfly_dally_terminal_should_event_be_frozen},
                 {.lpname = "modelnet_dragonfly_dally_router",
-                 .is_modelnet = true,
+                 .trigger_idle_modelnet = false,
                  .highdef_to_surrogate = NULL,
-                 .surrogate_to_highdef = NULL},
+                 .surrogate_to_highdef = NULL,
+                 .should_event_be_frozen = dragonfly_dally_router_should_event_be_frozen},
                 0
             }
         };
@@ -3014,6 +3018,27 @@ static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw
     free(frozen_state);
     assert(s->frozen_state == NULL);
 };
+
+static bool dragonfly_dally_terminal_should_event_be_frozen(tw_lp * lp, tw_event * event) {
+    (void) lp;
+    assert(lp->gid == event->dest_lpid);
+    int const event_types_to_freeze = MN_BASE_SAMPLE | MN_BASE_PASS | MN_BASE_END_NOTIF | MN_CONGESTION_EVENT;
+    return model_net_should_event_be_frozen(lp, (model_net_wrap_msg *) tw_event_data(event), event_types_to_freeze, NULL);
+}
+
+static bool dragonfly_dally_router_should_event_be_frozen_internal(terminal_dally_message * msg) {
+    if (msg->type == R_SNAPSHOT) { // Snapshots will stay unaltered, never frozen
+        return false;
+    }
+    return true;
+}
+
+static bool dragonfly_dally_router_should_event_be_frozen(tw_lp * lp, tw_event * event) {
+    assert(lp->gid == event->dest_lpid);
+    int const event_types_to_freeze = MN_BASE_NEW_MSG | MN_BASE_SCHED_NEXT | MN_BASE_SAMPLE | MN_BASE_END_NOTIF | MN_CONGESTION_EVENT;
+    return model_net_should_event_be_frozen(lp, (model_net_wrap_msg *) tw_event_data(event), event_types_to_freeze,
+            (should_msg_be_frozen_f) dragonfly_dally_router_should_event_be_frozen_internal);
+}
 //
 // ==== END OF Surrogate functions definition ====
 
@@ -3189,7 +3214,11 @@ static void router_dally_commit(router_state * s,
                     written += sprintf(snapshot_line+written, "%d, ", this_vc_snapshot_data);
                 }
             }
-            written += sprintf(snapshot_line+written, "\n");
+            assert(written <= 8192);
+            assert(snapshot_line[written - 2] == ',');
+            snapshot_line[written - 2] = '\n';  // Replacing ',' for new line
+            written -= 1;
+
             lp_io_write(lp->gid, snapshot_filename, written, snapshot_line);
         }
     }
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index d57352c3..ce1d0cd5 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -141,16 +141,6 @@ static struct {
 } switch_at;
 
 
-// To be treated as a linked list. Use `->next` to access the next event
-static bool is_workload_event(tw_event * event) {
-    char const * lp_type_name;
-    int rep_id, offset; // unused
-    codes_mapping_get_lp_info2(event->dest_lpid, NULL, &lp_type_name, NULL, &rep_id, &offset);
-
-    return strncmp("modelnet_", lp_type_name, 9) != 0;
-}
-
-
 //static void offset_future_events_in_causality_list(double switch_offset, tw_event_sig gvt) {
 //    (void) switch_offset;
 //    (void) gvt;
@@ -201,26 +191,26 @@ static inline bool does_any_pe(bool val) {
 }
 
 
-static tw_event_sig find_sig_smallest_larger_than(double switch_, tw_kp * kp, tw_event_sig gvt) {
-    //printf("Just testing, I'm here! size=%d\n", kp->pevent_q.size);
-    tw_event * cur_event = kp->pevent_q.tail;
-    while (cur_event) {
-        //printf("Current timestamp to rollback (%e) and gvt (%e)\n", cur_event->sig.recv_ts, gvt.recv_ts);
-        if (tw_event_sig_compare(cur_event->sig, gvt) < 0 && switch_ <= cur_event->sig.recv_ts) {
-            gvt = cur_event->sig;
-        }
-        cur_event = cur_event->prev;
-    }
-    return gvt;
-}
+//static tw_event_sig find_sig_smallest_larger_than(double switch_, tw_kp * kp, tw_event_sig gvt) {
+//    //printf("Just testing, I'm here! size=%d\n", kp->pevent_q.size);
+//    tw_event * cur_event = kp->pevent_q.tail;
+//    while (cur_event) {
+//        //printf("Current timestamp to rollback (%e) and gvt (%e)\n", cur_event->sig.recv_ts, gvt.recv_ts);
+//        if (tw_event_sig_compare(cur_event->sig, gvt) < 0 && switch_ <= cur_event->sig.recv_ts) {
+//            gvt = cur_event->sig;
+//        }
+//        cur_event = cur_event->prev;
+//    }
+//    return gvt;
+//}
 
 
 static void rollback_and_cancel_events_pe(tw_pe * pe, tw_event_sig gvt) {
     // Backtracking the simulation to GVT
-    double const switch_ = switch_at.time_stampts[switch_at.current_i];
+    //double const switch_ = switch_at.time_stampts[switch_at.current_i];
     for (unsigned int i = 0; i < g_tw_nkp; i++) {
-        tw_event_sig const smallest = find_sig_smallest_larger_than(switch_, g_tw_kp[i], gvt);
-        tw_kp_rollback_to_sig(g_tw_kp[i], smallest);
+        //tw_event_sig const smallest = find_sig_smallest_larger_than(switch_, g_tw_kp[i], gvt);
+        tw_kp_rollback_to_sig(g_tw_kp[i], gvt);
     }
     assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
 
@@ -255,37 +245,6 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_event_sig gvt) {
         return;
     }
 
-    tw_event * frozen_events = NULL;  // Linked list of frozen events
-    tw_event * workload_events = NULL; // Linked list of workload events, to be placed again in the queue
-
-    int events_dequeued = 0;
-    // Traversing all events stored in the queue
-    while (next_event) {
-        // Filtering events to freeze
-        tw_event * const prev_event = next_event;
-        next_event = tw_pq_dequeue(pe->pq);
-        assert(prev_event->next == NULL);
-
-        if (is_workload_event(prev_event)) {
-            // store event in events to inject immediately back to the queue (in reverse order, because the queue will take the youngest event first)
-            if (!workload_events) {
-                workload_events = prev_event;
-            } else {
-                prev_event->prev = workload_events;
-                workload_events = prev_event;
-            }
-        } else {
-            // store event in frozen events, to be forwarded to the future
-            if (!frozen_events) {
-                frozen_events = prev_event;
-            } else {
-                prev_event->prev = frozen_events;
-                frozen_events = prev_event;
-            }
-        }
-        events_dequeued++;
-    }
-
     // We have to put the events back into the queue after we switch back, but if we never
     // switch back they will never get to be processed and thus we can clean them
     double switch_offset = g_tw_ts_end;
@@ -297,32 +256,42 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_event_sig gvt) {
         //printf("gvt=%f next_switch=%f switch_offset=%f\n", pre_switch_time, next_switch, switch_offset);
     }
 
-    int events_enqueued = 0;
-    // shifting time stamps of network events to the future
-    //printf("Events in the future ");
-    while (frozen_events) {
-        tw_event * const prev_event = frozen_events;
-        frozen_events = frozen_events->prev;
-
-        //printf("%c", tw_event_sig_compare(gvt, prev_event->sig) < 0 ? '.' : 'x');
-        assert(tw_event_sig_compare(prev_event->sig, gvt) >= 0);
-        if(!model_net_is_this_base_event(tw_event_data(prev_event))) {
-            assert(prev_event->recv_ts == prev_event->sig.recv_ts);
-            prev_event->recv_ts += switch_offset;
-            prev_event->sig.recv_ts = prev_event->recv_ts;
+    tw_event * dequed_events = NULL; // Linked list of workload events, to be placed again in the queue
+    int events_dequeued = 0;  // for stats on code correctness
+    // Traversing all events stored in the queue
+    while (next_event) {
+        // Filtering events to freeze
+        assert(next_event->prev == NULL);
+        assert(tw_event_sig_compare(next_event->sig, gvt) >= 0);
+
+        // finding out lp type
+        char const * lp_type_name;
+        int rep_id, offset; // unused
+        codes_mapping_get_lp_info2(next_event->dest_lpid, NULL, &lp_type_name, NULL, &rep_id, &offset);
+        struct lp_types_switch const * const lp_type_switch = get_type_switch(lp_type_name);
+
+        // shifting time stamps to the future for events to freeze
+        if (lp_type_switch && lp_type_switch->should_event_be_frozen
+                && lp_type_switch->should_event_be_frozen(next_event->dest_lp, next_event)) {
+            assert(next_event->recv_ts == next_event->sig.recv_ts);
+            next_event->recv_ts += switch_offset;
+            next_event->sig.recv_ts = next_event->recv_ts;
         }
+        assert(next_event->recv_ts >= g_tw_trigger_arbitrary_fun.sig_at.recv_ts);
 
-        prev_event->prev = NULL;
-        tw_pq_enqueue(pe->pq, prev_event);
-        assert(prev_event->recv_ts >= g_tw_trigger_arbitrary_fun.sig_at.recv_ts);
+        // store event in deque_events to inject immediately back to the queue
+        next_event->prev = dequed_events;
+        dequed_events = next_event;
+        events_dequeued++;
 
-        events_enqueued++;
+        next_event = tw_pq_dequeue(pe->pq);
     }
 
-    // Reinjecting workload events into simulation
-    while (workload_events) {
-        tw_event * const prev_event = workload_events;
-        workload_events = workload_events->prev;
+    int events_enqueued = 0;
+    // Reinjecting events into simulation
+    while (dequed_events) {
+        tw_event * const prev_event = dequed_events;
+        dequed_events = dequed_events->prev;
         prev_event->prev = NULL;
         tw_pq_enqueue(pe->pq, prev_event);
 
@@ -372,14 +341,20 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_event_sig gvt) {
         char const * lp_type_name;
         int rep_id, offset; // unused
         codes_mapping_get_lp_info2(lp->gid, NULL, &lp_type_name, NULL, &rep_id, &offset);
+        bool const is_lp_modelnet = strncmp("modelnet_", lp_type_name, 9) == 0;
         struct lp_types_switch const * const lp_type_switch = get_type_switch(lp_type_name);
 
-        if (lp_type_switch && lp_type_switch->highdef_to_surrogate) {
-            if (lp_type_switch->is_modelnet) {
+        if (lp_type_switch) {
+            if (lp_type_switch->trigger_idle_modelnet) {
+                assert(is_lp_modelnet);
                 model_net_method_switch_to_surrogate_lp(lp);
-                model_net_method_call_inner(lp, lp_type_switch->highdef_to_surrogate);
-            } else {
-                lp_type_switch->highdef_to_surrogate(lp->cur_state, lp);
+            }
+            if (lp_type_switch->surrogate_to_highdef) {
+                if (is_lp_modelnet) {
+                    model_net_method_call_inner(lp, lp_type_switch->highdef_to_surrogate);
+                } else {
+                    lp_type_switch->highdef_to_surrogate(lp->cur_state, lp);
+                }
             }
         }
     }
@@ -410,14 +385,20 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_event_sig gvt) {
         char const * lp_type_name;
         int rep_id, offset; // unused
         codes_mapping_get_lp_info2(lp->gid, NULL, &lp_type_name, NULL, &rep_id, &offset);
+        bool const is_lp_modelnet = strncmp("modelnet_", lp_type_name, 9) == 0;
         struct lp_types_switch const * const lp_type_switch = get_type_switch(lp_type_name);
 
-        if (lp_type_switch && lp_type_switch->surrogate_to_highdef) {
-            if (lp_type_switch->is_modelnet) {
+        if (lp_type_switch) {
+            if (lp_type_switch->trigger_idle_modelnet) {
+                assert(is_lp_modelnet);
                 model_net_method_switch_to_highdef_lp(lp);
-                model_net_method_call_inner(lp, lp_type_switch->surrogate_to_highdef);
-            } else {
-                lp_type_switch->surrogate_to_highdef(lp->cur_state, lp);
+            }
+            if (lp_type_switch->surrogate_to_highdef) {
+                if (is_lp_modelnet) {
+                    model_net_method_call_inner(lp, lp_type_switch->surrogate_to_highdef);
+                } else {
+                    lp_type_switch->surrogate_to_highdef(lp->cur_state, lp);
+                }
             }
         }
 

From 25af13a2a2618c7ae338fdbf8c69371cc0e04b05 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 23 Feb 2023 17:33:32 -0500
Subject: [PATCH 019/188] Moving procedure to save packet delay info to memory
 out of event handler and into commit

---
 codes/net/dragonfly-dally.h              |  1 +
 src/networks/model-net/dragonfly-dally.C | 61 ++++++++++++++++--------
 2 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index 8c285287..6d009008 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -123,6 +123,7 @@ struct terminal_dally_message
 
    // To use in rollback calls
    tw_stime saved_last_in_queue_time;
+   tw_stime saved_in_queue_delay;
    tw_stime msg_new_mn_event;
 };
 
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 4791d9fe..4fff5879 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -596,6 +596,7 @@ struct terminal_state
 
     // Stores the last time in which a packet was processed (time at which a T_GENERATE event was processed)
     double last_in_queue_time;
+    double in_queue_delay;
     // The predictor kicks in on surrogate mode and predicts the time a packet will take to its destination
     void * predictor_data;
 
@@ -2974,6 +2975,7 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(terminal_state * s, tw
     s->data_size_ross_sample        = frozen_state->data_size_ross_sample;
     s->total_msg_size               = frozen_state->total_msg_size;
     s->finished_msgs                = frozen_state->finished_msgs;
+    s->in_queue_delay               = frozen_state->in_queue_delay;
     memcpy(&s->arrived_here,         &frozen_state->arrived_here,         sizeof(s->arrived_here));
     memcpy(&s->zombies,              &frozen_state->zombies,              sizeof(s->zombies));
     memcpy(&s->sent_packets,         &frozen_state->sent_packets,         sizeof(s->sent_packets));
@@ -3009,6 +3011,7 @@ static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw
     frozen_state->data_size_ross_sample        = s->data_size_ross_sample;
     frozen_state->total_msg_size               = s->total_msg_size;
     frozen_state->finished_msgs                = s->finished_msgs;
+    frozen_state->in_queue_delay               = s->in_queue_delay;
     memcpy(&frozen_state->arrived_here,         &s->arrived_here,         sizeof(s->arrived_here));
     memcpy(&frozen_state->zombies,              &s->zombies,              sizeof(s->zombies));
     memcpy(&frozen_state->sent_packets,         &s->sent_packets,         sizeof(s->sent_packets));
@@ -3145,6 +3148,25 @@ static void terminal_dally_commit(terminal_state * s,
         }
     }
 
+    if(msg->type == T_GENERATE && bf->c10) {  // if the packet was sent as a prediction, store the prediction in memory
+        auto start = (struct packet_start) {
+            .packet_ID = msg->packet_ID,
+            .dest_terminal_lpid = msg->dest_terminal_lpid,
+            .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
+            .travel_start_time = msg->travel_start_time,
+            .workload_injection_time = msg->msg_start_time,
+            .delay_at_queue_head = s->in_queue_delay,
+            .packet_size = msg->packet_size
+        };
+
+        // Saving
+        auto const end = (struct packet_end) {
+            .packet_ID = msg->packet_ID,
+            .travel_end_time = msg->travel_end_time,
+        };
+        packet_latency_save_to_file(s->terminal_id, start, end, true);
+    }
+
     if(msg->type == T_NOTIFY && msg->notify_type == NOTIFY_LATENCY)
     {
         assert(lp->gid == msg->src_terminal_id);
@@ -3703,18 +3725,19 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
     msg->my_hops_cur_group = -1;
 
     // determining injection delay
-    tw_stime injection_ts;
-    if (g_congestion_control_enabled) {
-        double bandwidth_coef = 1;
-        if (cc_terminal_is_abatement_active(s->local_congestion_controller)) {
-            bandwidth_coef = cc_terminal_get_current_injection_bandwidth_coef(s->local_congestion_controller);
-        }
-        injection_ts = bytes_to_ns(msg->packet_size, bandwidth_coef * s->params->cn_bandwidth);
-    }
-    else {
-        injection_ts = bytes_to_ns(msg->packet_size, s->params->cn_bandwidth);
-    }
-    tw_stime const nic_ts = injection_ts;
+    //tw_stime injection_ts;
+    //if (g_congestion_control_enabled) {
+    //    double bandwidth_coef = 1;
+    //    if (cc_terminal_is_abatement_active(s->local_congestion_controller)) {
+    //        bandwidth_coef = cc_terminal_get_current_injection_bandwidth_coef(s->local_congestion_controller);
+    //    }
+    //    injection_ts = bytes_to_ns(msg->packet_size, bandwidth_coef * s->params->cn_bandwidth);
+    //}
+    //else {
+    //    injection_ts = bytes_to_ns(msg->packet_size, s->params->cn_bandwidth);
+    //}
+    //tw_stime const nic_ts = injection_ts;
+    tw_stime const nic_ts = s->in_queue_delay;
     //printf("injection_ts = %f\n", injection_ts);
 
     // Using predictor to find latency
@@ -3739,12 +3762,9 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
     double const latency = 
         terminal_predictor->predict(s->predictor_data, lp, s->terminal_id, &start);
 
-    // Saving
-    auto const end = (struct packet_end) {
-        .packet_ID = msg->packet_ID,
-        .travel_end_time = tw_now(lp) + latency,
-    };
-    packet_latency_save_to_file(s->terminal_id, start, end, true);
+    // Info to be used at commit time to save into file
+    msg->travel_start_time = tw_now(lp);
+    msg->travel_end_time = tw_now(lp) + latency;
 
     // Sending packet directly to destination terminal
     //tw_stime const ts = 0;
@@ -3795,6 +3815,7 @@ static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_me
     packet_gen--;
     s->packet_counter--;
 
+    s->in_queue_delay = msg->saved_in_queue_delay;
     s->sent_packets.pop_back();
 
     if(bf->c2)
@@ -4082,13 +4103,15 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
     }
     //assert(tw_now(lp) == msg->travel_start_time);
     tw_stime const time_at_queue_head = msg->msg_new_mn_event > s->last_in_queue_time ? msg->msg_new_mn_event : s->last_in_queue_time;
+    msg->saved_in_queue_delay = s->in_queue_delay;
+    s->in_queue_delay = tw_now(lp) - time_at_queue_head;
     s->sent_packets.push_back((struct packet_start){
         .packet_ID = msg->packet_ID,
         .dest_terminal_lpid = msg->dest_terminal_lpid,
         .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
         .travel_start_time = tw_now(lp),
         .workload_injection_time = msg->msg_start_time,
-        .delay_at_queue_head = tw_now(lp) - time_at_queue_head,
+        .delay_at_queue_head = s->in_queue_delay,
         .packet_size = msg->packet_size,
         .message_data = msg_data,
         .remote_event_data = remote_data

From 0dfb747327b2c28e9c66da62e81ef5211ce84329 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 23 Feb 2023 19:31:17 -0500
Subject: [PATCH 020/188] Adding some `free`s that were previously missing on
 rollback!

---
 src/networks/model-net/dragonfly-dally.C | 27 ++++++++++++++----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 4fff5879..1806104f 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -3816,6 +3816,11 @@ static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_me
     s->packet_counter--;
 
     s->in_queue_delay = msg->saved_in_queue_delay;
+    struct packet_start start = s->sent_packets.back();
+    if (start.remote_event_data) {
+        free(start.remote_event_data);
+    }
+    free(start.message_data);
     s->sent_packets.pop_back();
 
     if(bf->c2)
@@ -4845,7 +4850,15 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess
                 .packet_ID = msg->packet_ID,
                 .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
             });
-            bf->c14 = 0;
+        }
+
+        if(bf->c15) {
+            struct packet_id const packet = {
+                .packet_ID = msg->packet_ID,
+                .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
+            };
+            assert(s->arrived_here.count(packet) == 1);
+            s->arrived_here.erase(packet);
         }
 
         struct dfly_qhash_entry * d_entry_pop = (dfly_qhash_entry *)rc_stack_pop(s->st);
@@ -4869,16 +4882,6 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess
         free_tmp(tmp);	
         s->rank_tbl_pop--;
     }
-
-    if(bf->c15) {
-        struct packet_id const packet = {
-            .packet_ID = msg->packet_ID,
-            .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
-        };
-        assert(s->arrived_here.count(packet) == 1);
-        s->arrived_here.erase(packet);
-        bf->c15 = 0;
-    }
     
     return;
 }
@@ -4968,6 +4971,8 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
     bf->c3 = 0;
     bf->c4 = 0;
     bf->c7 = 0;
+    bf->c14 = 0;
+    bf->c15 = 0;
 
     /* Total overall finished chunks in simulation */
     N_finished_chunks++;

From 8f755ebf5585e5214215ebcb311eb8f4f568b6f2 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 23 Feb 2023 21:54:38 -0500
Subject: [PATCH 021/188] Fiddling a bit more with the idle events. They are
 tricky to get right, to work properly

---
 codes/model-net-lp.h                       |  3 ++
 src/networks/model-net/core/model-net-lp.c | 35 +++++++++++++++++-----
 src/networks/model-net/dragonfly-dally.C   |  2 +-
 src/util/surrogate.c                       |  2 ++
 4 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/codes/model-net-lp.h b/codes/model-net-lp.h
index 41db732a..147ce248 100644
--- a/codes/model-net-lp.h
+++ b/codes/model-net-lp.h
@@ -121,6 +121,8 @@ tw_event* model_net_method_congestion_event(tw_lpid dest_gid,
 // Functions to call when switching from highdef to surrogate, and surrogate to highdef
 void model_net_method_switch_to_surrogate_lp(tw_lp * lp);
 void model_net_method_switch_to_highdef_lp(tw_lp * lp);
+void model_net_method_switch_to_surrogate(void);
+void model_net_method_switch_to_highdef(void);
 
 // It will call the function (pointer) on the internal structure/network model.
 // The lp parameter has to be a model-net lp. The function pointer has to coincide with the underlying subtype
@@ -155,6 +157,7 @@ typedef struct model_net_base_msg {
     // TODO: make this a union for multiple types of parameters
     mn_sched_params sched_params;
     model_net_sched_rc rc; // rc for scheduling events
+    int created_in_surrogate;
 } model_net_base_msg;
 
 typedef struct model_net_wrap_msg {
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 61aee8db..801926a1 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -21,6 +21,8 @@
 int model_net_base_magic;
 int mn_sample_enabled = 0;
 
+static int is_surrogate_on = false;
+
 // message-type specific offsets - don't want to get bitten later by alignment
 // issues...
 static int msg_offsets[MAX_NETS];
@@ -574,6 +576,10 @@ void model_net_base_event(
 
     assert(m->h.magic == model_net_base_magic);
 
+    if(!is_surrogate_on && m->h.event_type == MN_BASE_SCHED_NEXT && m->msg.m_base.created_in_surrogate) {
+        return;
+    }
+
     void * sub_msg;
     switch (m->h.event_type){
         case MN_BASE_NEW_MSG:
@@ -617,6 +623,10 @@ void model_net_base_event_rc(
         tw_lp * lp){
     assert(m->h.magic == model_net_base_magic);
 
+    if(!is_surrogate_on && m->h.event_type == MN_BASE_SCHED_NEXT && m->msg.m_base.created_in_surrogate) {
+        return;
+    }
+
     void * sub_msg;
     switch (m->h.event_type){
         case MN_BASE_NEW_MSG:
@@ -1012,6 +1022,7 @@ void model_net_method_idle_event2(tw_stime offset_ts, int is_recv_queue,
             &m_wrap->h);
     m_wrap->msg.m_base.is_from_remote = is_recv_queue;
     r_wrap->queue_offset = queue_offset;
+    m_wrap->msg.m_base.created_in_surrogate = is_surrogate_on;
     tw_event_send(e);
 }
 
@@ -1085,6 +1096,14 @@ tw_event* model_net_method_congestion_event(tw_lpid dest_gid,
 
 }
 
+void model_net_method_switch_to_surrogate(void) {
+    is_surrogate_on = true;
+}
+
+void model_net_method_switch_to_highdef(void) {
+    is_surrogate_on = false;
+}
+
 void model_net_method_switch_to_surrogate_lp(tw_lp * lp) {
     model_net_base_state * const ns = (model_net_base_state*) lp->cur_state;
 
@@ -1095,7 +1114,7 @@ void model_net_method_switch_to_surrogate_lp(tw_lp * lp) {
         // scheduling an idle event to prevent getting stuck in the middle of a scheduling loop
         if (ns->sched_loop_pre_surrogate[i]) { // <- this can be more finely tuned
         // TODO: change zero-offset event for something a bit more sensible
-            model_net_method_idle_event(1.0, 0, lp);
+            model_net_method_idle_event2(0.0, 0, i, lp);
         }
         ns->in_sched_send_loop[i] = 0;
     }
@@ -1103,7 +1122,7 @@ void model_net_method_switch_to_surrogate_lp(tw_lp * lp) {
 
     ns->sched_recv_loop_pre_surrogate = ns->in_sched_recv_loop;
     if (ns->in_sched_recv_loop) {
-        model_net_method_idle_event(1.0, 1, lp);
+        model_net_method_idle_event(0.0, 1, lp);
     }
     ns->in_sched_recv_loop = 0;
 }
@@ -1116,16 +1135,16 @@ void model_net_method_switch_to_highdef_lp(tw_lp * lp) {
         //printf("%d ", ns->in_sched_send_loop[i]);
         // We have to duplicate an idle event that was produced in surrogate-mode, but not yet processed by the time we switch to high-def again, if that event was in the middle of the loop (asking for the next packet to inject) and in no other case
         // TODO: Not all LPs need an event like this!
-        if (ns->sched_loop_pre_surrogate[i] == 1 && ns->in_sched_send_loop[i] == 0) {
-            model_net_method_idle_event(1.0, 0, lp);
+        if (ns->sched_loop_pre_surrogate[i] == 0 && ns->in_sched_send_loop[i] == 1) {
+            model_net_method_idle_event2(0.0, 0, i, lp);
         }
-        ns->in_sched_send_loop[i] = ns->sched_loop_pre_surrogate[i];
+        ns->in_sched_send_loop[i] |= ns->sched_loop_pre_surrogate[i];
     }
 
-    if (ns->sched_recv_loop_pre_surrogate == 1 && ns->in_sched_recv_loop == 0) {
-        model_net_method_idle_event(1.0, 1, lp);
+    if (ns->sched_recv_loop_pre_surrogate == 0 && ns->in_sched_recv_loop == 1) {
+        model_net_method_idle_event(0.0, 1, lp);
     }
-    ns->in_sched_recv_loop = ns->sched_recv_loop_pre_surrogate;
+    ns->in_sched_recv_loop |= ns->sched_recv_loop_pre_surrogate;
 }
 
 void model_net_method_call_inner(tw_lp * lp, void (*fun) (void * inner, tw_lp * lp)) {
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 1806104f..a4886873 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -3025,7 +3025,7 @@ static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw
 static bool dragonfly_dally_terminal_should_event_be_frozen(tw_lp * lp, tw_event * event) {
     (void) lp;
     assert(lp->gid == event->dest_lpid);
-    int const event_types_to_freeze = MN_BASE_SAMPLE | MN_BASE_PASS | MN_BASE_END_NOTIF | MN_CONGESTION_EVENT;
+    int const event_types_to_freeze = MN_BASE_SCHED_NEXT | MN_BASE_SAMPLE | MN_BASE_PASS | MN_BASE_END_NOTIF | MN_CONGESTION_EVENT;
     return model_net_should_event_be_frozen(lp, (model_net_wrap_msg *) tw_event_data(event), event_types_to_freeze, NULL);
 }
 
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index ce1d0cd5..7ed098e9 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -466,8 +466,10 @@ static void director_fun(tw_pe * pe, tw_event_sig gvt) {
     // "Freezing" network events and activating LP's switch functions
     if (FREEZE_NETWORK_STATE) {
         if (surr_config.director.is_surrogate_on()) {
+            model_net_method_switch_to_surrogate();
             events_high_def_to_surrogate_switch(pe, gvt);
         } else {
+            model_net_method_switch_to_highdef();
             events_surrogate_to_high_def_switch(pe, gvt);
         }
     }

From 80362d6991b098f50adae5da1c72d6bb20afd31f Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 23 Feb 2023 22:41:01 -0500
Subject: [PATCH 022/188] Using injection bandwith infraestructure instead of
 by hand strategy

---
 scripts/dragonfly-snapshots.py           | 21 ++++++++++++++++
 src/networks/model-net/dragonfly-dally.C | 32 ++++++++++++------------
 2 files changed, 37 insertions(+), 16 deletions(-)
 create mode 100644 scripts/dragonfly-snapshots.py

diff --git a/scripts/dragonfly-snapshots.py b/scripts/dragonfly-snapshots.py
new file mode 100644
index 00000000..fe142ac2
--- /dev/null
+++ b/scripts/dragonfly-snapshots.py
@@ -0,0 +1,21 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+if __name__ == '__main__':
+    port_utilization = np.loadtxt("dragonfly-snapshots.csv", delimiter=',', dtype=float, skiprows=1)
+
+    # finding all snapshot timestamps
+    timestamps = np.unique(port_utilization[:, 0])
+    assert len(timestamps.shape) == 1
+
+    # Finding total utilization per snapshot
+    total_utilization = np.zeros_like(timestamps)
+    for i, ts in enumerate(timestamps):
+        total_utilization[i] = port_utilization[port_utilization[:, 0] == ts, 2:].sum()
+
+    # plotting
+    plt.plot(timestamps, total_utilization)
+    plt.xlabel('snapshot time (ns)')
+    plt.ylabel('total buffer port occupancy')
+    plt.show()
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index a4886873..f5bf3ecb 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -3155,7 +3155,7 @@ static void terminal_dally_commit(terminal_state * s,
             .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
             .travel_start_time = msg->travel_start_time,
             .workload_injection_time = msg->msg_start_time,
-            .delay_at_queue_head = s->in_queue_delay,
+            .delay_at_queue_head = msg->saved_in_queue_delay,
             .packet_size = msg->packet_size
         };
 
@@ -3725,19 +3725,21 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
     msg->my_hops_cur_group = -1;
 
     // determining injection delay
-    //tw_stime injection_ts;
-    //if (g_congestion_control_enabled) {
-    //    double bandwidth_coef = 1;
-    //    if (cc_terminal_is_abatement_active(s->local_congestion_controller)) {
-    //        bandwidth_coef = cc_terminal_get_current_injection_bandwidth_coef(s->local_congestion_controller);
-    //    }
-    //    injection_ts = bytes_to_ns(msg->packet_size, bandwidth_coef * s->params->cn_bandwidth);
-    //}
-    //else {
-    //    injection_ts = bytes_to_ns(msg->packet_size, s->params->cn_bandwidth);
-    //}
-    //tw_stime const nic_ts = injection_ts;
-    tw_stime const nic_ts = s->in_queue_delay;
+    tw_stime injection_ts;
+    if (g_congestion_control_enabled) {
+        double bandwidth_coef = 1;
+        if (cc_terminal_is_abatement_active(s->local_congestion_controller)) {
+            bandwidth_coef = cc_terminal_get_current_injection_bandwidth_coef(s->local_congestion_controller);
+        }
+        injection_ts = bytes_to_ns(msg->packet_size, bandwidth_coef * s->params->cn_bandwidth);
+    }
+    else {
+        injection_ts = bytes_to_ns(msg->packet_size, s->params->cn_bandwidth);
+    }
+    tw_stime const nic_ts = injection_ts;
+    msg->saved_in_queue_delay = injection_ts;
+    //tw_stime const nic_ts = s->in_queue_delay;
+    //msg->saved_in_queue_delay = s->in_queue_delay;
     //printf("injection_ts = %f\n", injection_ts);
 
     // Using predictor to find latency
@@ -3747,8 +3749,6 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
         .dest_terminal_lpid = msg->dest_terminal_lpid,
         .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
         .travel_start_time = tw_now(lp),
-        .workload_injection_time = msg->msg_start_time,
-        .delay_at_queue_head = tw_now(lp) - time_at_queue_head,
         .packet_size = msg->packet_size
     };
 

From cac772fc913e043f0c18306bb2b8ff6bb7034c4e Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 24 Feb 2023 16:41:32 -0500
Subject: [PATCH 023/188] Anothe quick fix and a configuration option to turn
 on or off the freezing of the network

---
 codes/surrogate.h                          |  2 +-
 src/networks/model-net/core/model-net-lp.c |  4 +-
 src/networks/model-net/dragonfly-dally.C   |  8 +--
 src/util/surrogate.c                       | 60 +++++++++++++++-------
 4 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/codes/surrogate.h b/codes/surrogate.h
index 87eb43d3..3175f395 100644
--- a/codes/surrogate.h
+++ b/codes/surrogate.h
@@ -24,7 +24,7 @@ extern "C" {
 // high-def to surrogate) and later reanimated on the switch back (from
 // surrogate to high-def). If not, all events will be kept in the network while
 // on surrogate mode, which means that the network will vacate completely
-#define FREEZE_NETWORK_STATE 1
+extern bool freeze_network_on_switch;
 
 /**
  * Terminal-to-terminal packet latency prediction machinery
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 801926a1..34be0cf9 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -1116,7 +1116,7 @@ void model_net_method_switch_to_surrogate_lp(tw_lp * lp) {
         // TODO: change zero-offset event for something a bit more sensible
             model_net_method_idle_event2(0.0, 0, i, lp);
         }
-        ns->in_sched_send_loop[i] = 0;
+        //ns->in_sched_send_loop[i] = 0;
     }
     //printf("]\n");
 
@@ -1124,7 +1124,7 @@ void model_net_method_switch_to_surrogate_lp(tw_lp * lp) {
     if (ns->in_sched_recv_loop) {
         model_net_method_idle_event(0.0, 1, lp);
     }
-    ns->in_sched_recv_loop = 0;
+    //ns->in_sched_recv_loop = 0;
 }
 
 void model_net_method_switch_to_highdef_lp(tw_lp * lp) {
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index f5bf3ecb..cce397ee 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -2239,9 +2239,9 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
     if (OUTPUT_SNAPSHOT) {
         char **timestamps;
         size_t len;
-        rc = configuration_get_multivalue(&config, "PARAMS", "router_buffer_snapshots", anno, &timestamps, &len);
+        configuration_get_multivalue(&config, "PARAMS", "router_buffer_snapshots", anno, &timestamps, &len);
         assert((len > 0) == (timestamps != NULL));
-        if (rc) {  // counter-intuitively, configuration_get_multivalue returns 1 if it found the key!
+        if (len) {  // counter-intuitively, configuration_get_multivalue returns 1 if it found the key!
             num_snapshots = len;
             snapshot_times = (tw_stime*) malloc(len * sizeof(tw_stime));
 
@@ -5222,7 +5222,7 @@ static void terminal_buf_update(terminal_state * s,
 static void dragonfly_dally_terminal_final( terminal_state * s, 
       tw_lp * lp )
 {
-    if (FREEZE_NETWORK_STATE && is_surrogate_on) {
+    if (freeze_network_on_switch && is_surrogate_on) {
         dragonfly_dally_terminal_surrogate_to_highdef(s, lp);
     }
     // printf("terminal id %d\n",s->terminal_id);
@@ -6316,7 +6316,7 @@ terminal_dally_event( terminal_state * s,
     assert(msg->magic == terminal_magic_num);
     //printf("LPID: %llu Event type %d processed at %f\n", lp->gid, msg->type, tw_now(lp));
 
-    if (is_surrogate_on && FREEZE_NETWORK_STATE) {
+    if (is_surrogate_on && freeze_network_on_switch) {
         // This event will be reversed. It comes from the past, it has been forwarded to the future
         // by the surrogate freezing the network procedure and should not be taken into account
         if (! (msg->type == T_GENERATE || msg->type == T_ARRIVE_PREDICTED || msg->type == T_NOTIFY)) {
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index 7ed098e9..368723cc 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -14,6 +14,9 @@
 #include <codes/model-net-lp.h>
 #include <codes/surrogate.h>
 
+// A simple macro to clarify code a bit
+#define PRINTF_ONCE(...) if (g_tw_mynode == 0) { fprintf(stderr, __VA_ARGS__); }
+
 // Basic level of debugging is 1. It should be always turned on
 // because it tells us when a switch to or from surrogate-mode happened.
 // It can be deactivated (set to 0) if it ends up being too obnoxious
@@ -24,6 +27,7 @@
 #define DEBUG_DIRECTOR 1
 
 // Global variables
+bool freeze_network_on_switch = true;
 static double ignore_until = 0;
 static struct surrogate_config surr_config = {0};
 
@@ -464,7 +468,7 @@ static void director_fun(tw_pe * pe, tw_event_sig gvt) {
     }
 
     // "Freezing" network events and activating LP's switch functions
-    if (FREEZE_NETWORK_STATE) {
+    if (freeze_network_on_switch) {
         if (surr_config.director.is_surrogate_on()) {
             model_net_method_switch_to_surrogate();
             events_high_def_to_surrogate_switch(pe, gvt);
@@ -512,9 +516,7 @@ void surrogate_configure(
     director_mode[0] = '\0';
     configuration_get_value(&config, "SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH);
     if (strcmp(director_mode, "at-fixed-virtual-times") == 0) {
-        if(g_tw_mynode == 0) {
-            fprintf(stderr, "\nSurrogate activated switching at fixed virtual times: ");
-        }
+        PRINTF_ONCE("\nSurrogate activated switching at fixed virtual times: ");
 
         // Loading timestamps
         char **timestamps;
@@ -532,13 +534,9 @@ void surrogate_configure(
                 tw_error(TW_LOC, "Sequence `%s' could not be succesfully interpreted as a _double_.", timestamps[i]);
             }
 
-            if(g_tw_mynode == 0) {
-                fprintf(stderr, "%g%s", switch_at.time_stampts[i], i == len-1 ? "" : ", ");
-            }
-        }
-        if(g_tw_mynode == 0) {
-            fprintf(stderr, "\n");
+            PRINTF_ONCE("%g%s", switch_at.time_stampts[i], i == len-1 ? "" : ", ");
         }
+        PRINTF_ONCE("\n");
 
         // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT
         g_tw_gvt_arbitrary_fun = director_fun;
@@ -560,19 +558,43 @@ void surrogate_configure(
     char latency_pred_name[MAX_NAME_LENGTH];
     latency_pred_name[0] = '\0';
     configuration_get_value(&config, "SURROGATE", "packet_latency_predictor", anno, latency_pred_name, MAX_NAME_LENGTH);
-    if (strcmp(latency_pred_name, "average") == 0) {
+    if (*latency_pred_name) {
+        if (strcmp(latency_pred_name, "average") == 0) {
+            *pl_pred = &average_latency_predictor;
+
+            // Finding out whether to ignore some packet latencies
+            int rc = configuration_get_value_double(&config, "SURROGATE", "ignore_until", anno, &ignore_until);
+            if (rc) {
+                ignore_until = -1; // any negative number disables ignore_until, all packet latencies will be considered
+                PRINTF_ONCE("Enabling average packet latency predictor\n");
+            } else {
+                PRINTF_ONCE("Enabling average packet latency predictor with ignore_until=%g\n", ignore_until);
+            }
+        } else {
+            tw_error(TW_LOC, "Unknown predictor for packet latency `%s` (possibilities include: average)", latency_pred_name);
+        }
+    } else {
         *pl_pred = &average_latency_predictor;
+        PRINTF_ONCE("Enabling average packet latency predictor (default behaviour)\n");
+    }
 
-        // Finding out whether to ignore some packet latencies
-        int rc = configuration_get_value_double(&config, "SURROGATE", "ignore_until", anno, &ignore_until);
-        if (rc) {
-            ignore_until = -1; // any negative number disables ignore_until, all packet latencies will be considered
-        }
-        if (g_tw_mynode == 0) {
-            fprintf(stderr, "Enabling average packet latency predictor with ignore_until=%g\n", ignore_until);
+    // Determining which predictor to set up and return
+    char network_treatment_name[MAX_NAME_LENGTH];
+    network_treatment_name[0] = '\0';
+    configuration_get_value(&config, "SURROGATE", "network_treatment_on_switch", anno, network_treatment_name, MAX_NAME_LENGTH);
+    if (*network_treatment_name) {
+        if (strcmp(network_treatment_name, "freeze") == 0) {
+            freeze_network_on_switch = true;
+            PRINTF_ONCE("The network will be frozen on switch to surrogate\n");
+        } else if (strcmp(network_treatment_name, "nothing") == 0) {
+            freeze_network_on_switch = false;
+            PRINTF_ONCE("The network will be left alone on switch to surrogate (it will run on the background until it empties by itself)\n");
+        } else {
+            tw_error(TW_LOC, "Unknown network treatment `%s` (possibilities include: frezee or nothing)", network_treatment_name);
         }
     } else {
-        tw_error(TW_LOC, "Unknown predictor for packet latency `%s`", latency_pred_name);
+        freeze_network_on_switch = true;
+        PRINTF_ONCE("The network will be frozen on switch to surrogate (default behaviour)\n");
     }
 
     //surr_config.director.switch_surrogate();

From 062bef7d3f96999a051e67d6a8f1bf5b6073f373 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sat, 25 Feb 2023 18:41:58 -0500
Subject: [PATCH 024/188] Adding surrogate stats output for models

---
 codes/surrogate.h                            |  1 +
 doc/example/tutorial-synthetic-ping-pong.c   |  4 ++++
 src/network-workloads/model-net-mpi-replay.c |  2 ++
 src/networks/model-net/dragonfly-dally.C     |  2 +-
 src/util/surrogate.c                         | 16 ++++++++++++++++
 5 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/codes/surrogate.h b/codes/surrogate.h
index 3175f395..258cbd9b 100644
--- a/codes/surrogate.h
+++ b/codes/surrogate.h
@@ -25,6 +25,7 @@ extern "C" {
 // surrogate to high-def). If not, all events will be kept in the network while
 // on surrogate mode, which means that the network will vacate completely
 extern bool freeze_network_on_switch;
+void print_surrogate_stats(void);
 
 /**
  * Terminal-to-terminal packet latency prediction machinery
diff --git a/doc/example/tutorial-synthetic-ping-pong.c b/doc/example/tutorial-synthetic-ping-pong.c
index 88d4f150..8383c983 100644
--- a/doc/example/tutorial-synthetic-ping-pong.c
+++ b/doc/example/tutorial-synthetic-ping-pong.c
@@ -6,6 +6,7 @@
 
 #include "codes/model-net.h"
 #include "codes/codes_mapping.h"
+#include "codes/surrogate.h"  // just needed for stats on surrogate-mode
 
 
 static int net_id = 0;
@@ -354,6 +355,9 @@ int main(int argc, char **argv)
     }
     model_net_report_stats(net_id);
 
+    // Printing some stats
+    print_surrogate_stats();
+
     tw_end();
     return 0;
 }
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index bfc368ab..1f613466 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -3611,6 +3611,8 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
    if(alloc_spec)
        codes_jobmap_destroy(jobmap_ctx);
 
+   print_surrogate_stats();
+
 #ifdef USE_RDAMARIS
     } // end if(g_st_ross_rank)
 #endif
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index cce397ee..ce59dfe1 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -2241,7 +2241,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
         size_t len;
         configuration_get_multivalue(&config, "PARAMS", "router_buffer_snapshots", anno, &timestamps, &len);
         assert((len > 0) == (timestamps != NULL));
-        if (len) {  // counter-intuitively, configuration_get_multivalue returns 1 if it found the key!
+        if (len) {
             num_snapshots = len;
             snapshot_times = (tw_stime*) malloc(len * sizeof(tw_stime));
 
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index 368723cc..b6c07385 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -28,6 +28,8 @@
 
 // Global variables
 bool freeze_network_on_switch = true;
+static bool is_surrogate_configured = false;
+static double surrogate_switching_time = 0.0;
 static double ignore_until = 0;
 static struct surrogate_config surr_config = {0};
 
@@ -412,6 +414,8 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_event_sig gvt) {
 
 
 static void director_fun(tw_pe * pe, tw_event_sig gvt) {
+    assert(is_surrogate_configured);
+
     static int i = 0;
     if (g_tw_mynode == 0) {
         if (DEBUG_DIRECTOR == 2) {
@@ -455,6 +459,7 @@ static void director_fun(tw_pe * pe, tw_event_sig gvt) {
         return;
     }
 
+    double const start = tw_clock_read();
     // Asking the director/model to switch
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
         if (DEBUG_DIRECTOR == 2) {
@@ -494,6 +499,7 @@ static void director_fun(tw_pe * pe, tw_event_sig gvt) {
     if (DEBUG_DIRECTOR > 1) {
         printf("PE %lu: Switch completed!\n", g_tw_mynode);
     }
+    surrogate_switching_time += tw_clock_read() - start;
 }
 //
 // === END OF Director functionality
@@ -507,6 +513,7 @@ void surrogate_configure(
 ) {
     assert(sc);
     assert(0 < sc->n_lp_types && sc->n_lp_types <= MAX_LP_TYPES);
+    is_surrogate_configured = true;
 
     // This is the only place where the director data should be loaded and set up
     surr_config = *sc;
@@ -603,3 +610,12 @@ void surrogate_configure(
     }
 }
 // === END OF All things Surrogate Configuration
+
+
+// === Stats!
+void print_surrogate_stats(void) {
+    if(is_surrogate_configured && g_tw_mynode == 0) {
+        printf("\nTotal time spent on switching from and to surrogate-mode: %.4f\n", (double) surrogate_switching_time / g_tw_clock_rate);
+    }
+}
+// === END OF Stats!

From f64161f828ca410d4adb3a9605ec394bea48ab43 Mon Sep 17 00:00:00 2001
From: Xin Wang <xwang149@hawk.iit.edu>
Date: Tue, 11 Apr 2023 11:24:10 -0500
Subject: [PATCH 025/188] merge IIT updates to kronos branch, and add support
 of Union in dragonfly dally model

---
 Makefile.am                                   |   13 +-
 codes/model-net.h                             |    2 +-
 codes/net/dragonfly-custom.h                  |    8 +
 codes/net/dragonfly-dally.h                   |    9 +
 configure.ac                                  |   40 +-
 maint/codes.pc.in                             |    7 +-
 src/iokernellang/codeslexer.c                 |   72 +-
 src/iokernellang/codeslexer.h                 |   19 +-
 src/network-workloads/model-net-mpi-replay.c  |   49 +-
 src/networks/model-net/core/model-net.c       |    6 +
 src/networks/model-net/dragonfly-custom.C     |  206 ++
 src/networks/model-net/dragonfly-dally.C      |  220 +-
 src/workload/codes-workload-dump.c            |    4 +-
 src/workload/codes-workload.c                 |   16 +-
 .../methods/codes-conc-online-comm-wrkld.C    | 1887 +++++++++++++++++
 .../methods/codes-online-comm-wrkld.C         |    4 +-
 16 files changed, 2491 insertions(+), 71 deletions(-)
 create mode 100644 src/workload/methods/codes-conc-online-comm-wrkld.C

diff --git a/Makefile.am b/Makefile.am
index 28157735..f18d8d63 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -60,10 +60,19 @@ src_libcodes_la_SOURCES += src/workload/methods/codes-recorder-io-wrkld.c
 endif
 
 if USE_ONLINE
-AM_CPPFLAGS += ${ARGOBOTS_CFLAGS} ${SWM_CFLAGS} -DUSE_ONLINE=1
-LDADD += ${SWM_LIBS} ${ARGOBOTS_LIBS}
+AM_CPPFLAGS += ${ARGOBOTS_CFLAGS} -DUSE_ONLINE=1
+LDADD += ${ARGOBOTS_LIBS}
+if USE_SWM
+AM_CPPFLAGS +=  ${SWM_CFLAGS} -DUSE_SWM=1
+LDADD += ${SWM_LIBS}
 src_libcodes_la_SOURCES += src/workload/methods/codes-online-comm-wrkld.C
 endif
+if USE_UNION
+src_libcodes_la_SOURCES += src/workload/methods/codes-conc-online-comm-wrkld.C
+AM_CPPFLAGS += ${UNION_CFLAGS} ${SWM_CFLAGS} -DUSE_UNION=1
+LDADD += ${UNION_LIBS} ${SWM_LIBS}
+endif
+endif
 
 if USE_DUMPI
 AM_CPPFLAGS += ${DUMPI_CFLAGS} -DUSE_DUMPI=1
diff --git a/codes/model-net.h b/codes/model-net.h
index 772e2072..df52f228 100644
--- a/codes/model-net.h
+++ b/codes/model-net.h
@@ -135,7 +135,7 @@ typedef struct model_net_request {
     int      self_event_size;
     char     category[CATEGORY_NAME_MAX];
 
-    //for counting msg app id
+    //Xin: passing app id to routers
     int     app_id;
 
 } model_net_request;
diff --git a/codes/net/dragonfly-custom.h b/codes/net/dragonfly-custom.h
index 96875f1d..af38d98f 100644
--- a/codes/net/dragonfly-custom.h
+++ b/codes/net/dragonfly-custom.h
@@ -87,6 +87,14 @@ struct terminal_custom_message
    tw_stime msg_start_time;
    tw_stime saved_busy_time_ross;
    tw_stime saved_fin_chunks_ross;
+
+   //Yao: for counting msg app id
+   int app_id;
+   tw_stime last_received_time;
+   tw_stime last_sent_time;
+   //Xin: for busy time recording
+   tw_stime last_bufupdate_time;
+
 };
 
 #ifdef __cplusplus
diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index ed55c2ea..38b9fff1 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -103,6 +103,15 @@ struct terminal_dally_message
    tw_stime msg_start_time;
    tw_stime saved_busy_time_ross;
    tw_stime saved_fin_chunks_ross;
+
+   //Yao: for counting msg app id
+   int app_id;
+   tw_stime last_received_time;
+   tw_stime last_sent_time;
+   
+   //Xin: for busy time recording
+   tw_stime last_bufupdate_time;
+
 };
 
 #ifdef __cplusplus
diff --git a/configure.ac b/configure.ac
index c74056a5..16561e61 100755
--- a/configure.ac
+++ b/configure.ac
@@ -113,27 +113,55 @@ if test "x${use_darshan}" = xyes ; then
 fi
 AM_CONDITIONAL(USE_DARSHAN, [test "x${use_darshan}" = xyes])
 
-
 # check for Argobots
 AC_ARG_WITH([online],[AS_HELP_STRING([--with-online@<:@=DIR@:>@],
-                        [Build with the online workloads and argobots support])],
-                      [use_online=yes],[use_online=no])
-if test "x${use_online}" != "xno" ; then
+                        [Build with the online workloads and argobots support])])
+if test "x${with_online}" != "x" ; then
     AM_CONDITIONAL(USE_ONLINE, true)
     AX_BOOST_BASE([1.66])
     AX_CXX_COMPILE_STDCXX(11, noext, mandatory)
     PKG_CHECK_MODULES_STATIC([ARGOBOTS], [argobots], [],
                       [AC_MSG_ERROR([Could not find working argobots installation via pkg-config])])
+else
+    AM_CONDITIONAL(USE_ONLINE, false)
+fi
+
+#check for SWM
+AC_ARG_WITH([swm],[AS_HELP_STRING([--with-swm@<:@=DIR@:>@],
+                        [location of SWM installation])])
+if test "x${with_swm}" != "x" ; then
+    AM_CONDITIONAL(USE_SWM, true)
     PKG_CHECK_MODULES_STATIC([SWM], [swm], [],
                       [AC_MSG_ERROR([Could not find working swm installation via pkg-config])])
     PKG_CHECK_VAR([SWM_DATAROOTDIR], [swm], [datarootdir], [],
               [AC_MSG_ERROR[Could not find shared directory in SWM]])
     AC_DEFINE_UNQUOTED([SWM_DATAROOTDIR], ["$SWM_DATAROOTDIR"], [if using json
-                        data files])
+                    data files])
 else
-    AM_CONDITIONAL(USE_ONLINE, false)
+  AM_CONDITIONAL(USE_SWM, false)
 fi
 
+#check for UNION 
+AC_ARG_WITH([union],[AS_HELP_STRING([--with-union@<:@=DIR@:>@],
+                        [location of Union installation])])
+if test "x${with_union}" != "x" ; then
+    AM_CONDITIONAL(USE_UNION, true)
+    PKG_CHECK_MODULES_STATIC([UNION], [union], [],
+                      [AC_MSG_ERROR([Could not find working Union installation via pkg-config])])
+    PKG_CHECK_VAR([UNION_DATADIR], [union], [datarootdir], [],
+              [AC_MSG_ERROR[Could not find shared directory in UNION]])
+    AC_DEFINE_UNQUOTED([UNION_DATADIR], ["$UNION_DATADIR"], [if using json data files])
+    PKG_CHECK_MODULES_STATIC([SWM], [swm], [],
+                      [AC_MSG_ERROR([Could not find working swm installation via pkg-config])])
+    PKG_CHECK_VAR([SWM_DATAROOTDIR], [swm], [datarootdir], [],
+              [AC_MSG_ERROR[Could not find shared directory in SWM]])
+    AC_DEFINE_UNQUOTED([SWM_DATAROOTDIR], ["$SWM_DATAROOTDIR"], [if using json
+                    data files])
+else
+    AM_CONDITIONAL(USE_UNION, false)
+fi
+
+
 # check for Recorder
 AM_CONDITIONAL(USE_RECORDER, true)
 RECORDER_CPPFLAGS="-DUSE_RECORDER=1"
diff --git a/maint/codes.pc.in b/maint/codes.pc.in
index 451a6c0f..97cf7d1d 100644
--- a/maint/codes.pc.in
+++ b/maint/codes.pc.in
@@ -19,11 +19,14 @@ argobots_cflags=@ARGOBOTS_CFLAGS@
 swm_libs=@SWM_LIBS@
 swm_cflags=@SWM_CFLAGS@
 swm_datarootdir=@SWM_DATAROOTDIR@
+union_libs=@UNION_LIBS@
+union_cflags=@UNION_CFLAGS@
+union_datadir=@UNION_DATADIR@
 
 Name: codes-base
 Description: Base functionality for CODES storage simulation
 Version: @PACKAGE_VERSION@
 URL: https://github.com/codes-org/codes
 Requires:
-Libs: -L${libdir} -lcodes ${ross_libs} ${argobots_libs} ${swm_libs} ${darshan_libs} ${dumpi_libs} ${cortex_libs}
-Cflags: -I${includedir} -I${swm_datarootdir} ${ross_cflags} ${darshan_cflags} ${swm_cflags} ${argobots_cflags} ${dumpi_cflags} ${cortex_cflags}
+Libs: -L${libdir} -lcodes ${ross_libs} ${argobots_libs} ${swm_libs} ${union_libs} ${darshan_libs} ${dumpi_libs} ${cortex_libs}
+Cflags: -I${includedir} -I${swm_datarootdir} ${union_datadir} ${ross_cflags} ${darshan_cflags} ${swm_cflags} ${union_cflags} ${argobots_cflags} ${dumpi_cflags} ${cortex_cflags}
diff --git a/src/iokernellang/codeslexer.c b/src/iokernellang/codeslexer.c
index 96f594a7..c4abdb8d 100644
--- a/src/iokernellang/codeslexer.c
+++ b/src/iokernellang/codeslexer.c
@@ -9,7 +9,7 @@
 #define FLEX_SCANNER
 #define YY_FLEX_MAJOR_VERSION 2
 #define YY_FLEX_MINOR_VERSION 5
-#define YY_FLEX_SUBMINOR_VERSION 35
+#define YY_FLEX_SUBMINOR_VERSION 37
 #if YY_FLEX_SUBMINOR_VERSION > 0
 #define FLEX_BETA
 #endif
@@ -54,7 +54,6 @@ typedef int flex_int32_t;
 typedef unsigned char flex_uint8_t; 
 typedef unsigned short int flex_uint16_t;
 typedef unsigned int flex_uint32_t;
-#endif /* ! C99 */
 
 /* Limits of integral types. */
 #ifndef INT8_MIN
@@ -85,6 +84,8 @@ typedef unsigned int flex_uint32_t;
 #define UINT32_MAX             (4294967295U)
 #endif
 
+#endif /* ! C99 */
+
 #endif /* ! FLEXINT_H */
 
 #ifdef __cplusplus
@@ -170,6 +171,11 @@ typedef void* yyscan_t;
 typedef struct yy_buffer_state *YY_BUFFER_STATE;
 #endif
 
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
 #define EOB_ACT_CONTINUE_SCAN 0
 #define EOB_ACT_END_OF_FILE 1
 #define EOB_ACT_LAST_MATCH 2
@@ -205,11 +211,6 @@ typedef struct yy_buffer_state *YY_BUFFER_STATE;
 
 #define unput(c) yyunput( c, yyg->yytext_ptr , yyscanner )
 
-#ifndef YY_TYPEDEF_YY_SIZE_T
-#define YY_TYPEDEF_YY_SIZE_T
-typedef size_t yy_size_t;
-#endif
-
 #ifndef YY_STRUCT_YY_BUFFER_STATE
 #define YY_STRUCT_YY_BUFFER_STATE
 struct yy_buffer_state
@@ -227,7 +228,7 @@ struct yy_buffer_state
 	/* Number of characters read into yy_ch_buf, not including EOB
 	 * characters.
 	 */
-	int yy_n_chars;
+	yy_size_t yy_n_chars;
 
 	/* Whether we "own" the buffer - i.e., we know we created it,
 	 * and can realloc() it to grow it, and should free() it to
@@ -306,7 +307,7 @@ static void CodesIOKernel__init_buffer (YY_BUFFER_STATE b,FILE *file ,yyscan_t y
 
 YY_BUFFER_STATE CodesIOKernel__scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner );
 YY_BUFFER_STATE CodesIOKernel__scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
-YY_BUFFER_STATE CodesIOKernel__scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
+YY_BUFFER_STATE CodesIOKernel__scan_bytes (yyconst char *bytes,yy_size_t len ,yyscan_t yyscanner );
 
 void *CodesIOKernel_alloc (yy_size_t ,yyscan_t yyscanner );
 void *CodesIOKernel_realloc (void *,yy_size_t ,yyscan_t yyscanner );
@@ -338,7 +339,7 @@ void CodesIOKernel_free (void * ,yyscan_t yyscanner );
 
 /* Begin user sect3 */
 
-#define CodesIOKernel_wrap(n) 1
+#define CodesIOKernel_wrap(yyscanner) 1
 #define YY_SKIP_YYWRAP
 
 typedef unsigned char YY_CHAR;
@@ -559,7 +560,7 @@ static yyconst flex_int32_t yy_rule_can_match_eol[37] =
         #define YY_USER_ACTION /* no user action */;
 #endif
 
-#line 563 "../src/iokernellang/codeslexer.c"
+#line 564 "../src/iokernellang/codeslexer.c"
 
 #define INITIAL 0
 
@@ -588,8 +589,8 @@ struct yyguts_t
     size_t yy_buffer_stack_max; /**< capacity of stack. */
     YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */
     char yy_hold_char;
-    int yy_n_chars;
-    int yyleng_r;
+    yy_size_t yy_n_chars;
+    yy_size_t yyleng_r;
     char *yy_c_buf_p;
     int yy_init;
     int yy_start;
@@ -646,7 +647,7 @@ FILE *CodesIOKernel_get_out (yyscan_t yyscanner );
 
 void CodesIOKernel_set_out  (FILE * out_str ,yyscan_t yyscanner );
 
-int CodesIOKernel_get_leng (yyscan_t yyscanner );
+yy_size_t CodesIOKernel_get_leng (yyscan_t yyscanner );
 
 char *CodesIOKernel_get_text (yyscan_t yyscanner );
 
@@ -654,6 +655,10 @@ int CodesIOKernel_get_lineno (yyscan_t yyscanner );
 
 void CodesIOKernel_set_lineno (int line_number ,yyscan_t yyscanner );
 
+int CodesIOKernel_get_column  (yyscan_t yyscanner );
+
+void CodesIOKernel_set_column (int column_no ,yyscan_t yyscanner );
+
 YYSTYPE * CodesIOKernel_get_lval (yyscan_t yyscanner );
 
 void CodesIOKernel_set_lval (YYSTYPE * yylval_param ,yyscan_t yyscanner );
@@ -713,7 +718,7 @@ static int input (yyscan_t yyscanner );
 	if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
 		{ \
 		int c = '*'; \
-		unsigned n; \
+		size_t n; \
 		for ( n = 0; n < max_size && \
 			     (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
 			buf[n] = (char) c; \
@@ -801,7 +806,7 @@ YY_DECL
 #line 43 "../src/iokernellang/codeslexer.l"
 
 
-#line 805 "../src/iokernellang/codeslexer.c"
+#line 810 "../src/iokernellang/codeslexer.c"
 
     yylval = yylval_param;
 
@@ -879,7 +884,7 @@ YY_DECL
 
 		if ( yy_act != YY_END_OF_BUFFER && yy_rule_can_match_eol[yy_act] )
 			{
-			int yyl;
+			yy_size_t yyl;
 			for ( yyl = 0; yyl < yyleng; ++yyl )
 				if ( yytext[yyl] == '\n' )
 					   
@@ -1112,7 +1117,7 @@ YY_RULE_SETUP
 #line 117 "../src/iokernellang/codeslexer.l"
 ECHO;
 	YY_BREAK
-#line 1116 "../src/iokernellang/codeslexer.c"
+#line 1121 "../src/iokernellang/codeslexer.c"
 case YY_STATE_EOF(INITIAL):
 	yyterminate();
 
@@ -1299,21 +1304,21 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 
 	else
 		{
-			int num_to_read =
+			yy_size_t num_to_read =
 			YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
 
 		while ( num_to_read <= 0 )
 			{ /* Not enough room in the buffer - grow it. */
 
 			/* just a shorter name for the current buffer */
-			YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
+			YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE;
 
 			int yy_c_buf_p_offset =
 				(int) (yyg->yy_c_buf_p - b->yy_ch_buf);
 
 			if ( b->yy_is_our_buffer )
 				{
-				int new_size = b->yy_buf_size * 2;
+				yy_size_t new_size = b->yy_buf_size * 2;
 
 				if ( new_size <= 0 )
 					b->yy_buf_size += b->yy_buf_size / 8;
@@ -1344,7 +1349,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 
 		/* Read in more data. */
 		YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
-			yyg->yy_n_chars, (size_t) num_to_read );
+			yyg->yy_n_chars, num_to_read );
 
 		YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars;
 		}
@@ -1441,6 +1446,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 	yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
 	yy_is_jam = (yy_current_state == 133);
 
+	(void)yyg;
 	return yy_is_jam ? 0 : yy_current_state;
 }
 
@@ -1469,7 +1475,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 
 		else
 			{ /* need more input */
-			int offset = yyg->yy_c_buf_p - yyg->yytext_ptr;
+			yy_size_t offset = yyg->yy_c_buf_p - yyg->yytext_ptr;
 			++yyg->yy_c_buf_p;
 
 			switch ( yy_get_next_buffer( yyscanner ) )
@@ -1640,10 +1646,6 @@ static void CodesIOKernel__load_buffer_state  (yyscan_t yyscanner)
 	CodesIOKernel_free((void *) b ,yyscanner );
 }
 
-#ifndef __cplusplus
-extern int isatty (int );
-#endif /* __cplusplus */
-    
 /* Initializes or reinitializes a buffer.
  * This function is sometimes called more than once on the same buffer,
  * such as during a CodesIOKernel_restart() or at EOF.
@@ -1760,7 +1762,7 @@ void CodesIOKernel_pop_buffer_state (yyscan_t yyscanner)
  */
 static void CodesIOKernel_ensure_buffer_stack (yyscan_t yyscanner)
 {
-	int num_to_alloc;
+	yy_size_t num_to_alloc;
     struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	if (!yyg->yy_buffer_stack) {
@@ -1853,17 +1855,17 @@ YY_BUFFER_STATE CodesIOKernel__scan_string (yyconst char * yystr , yyscan_t yysc
 
 /** Setup the input buffer state to scan the given bytes. The next call to CodesIOKernel_lex() will
  * scan from a @e copy of @a bytes.
- * @param bytes the byte buffer to scan
- * @param len the number of bytes in the buffer pointed to by @a bytes.
+ * @param yybytes the byte buffer to scan
+ * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
  * @param yyscanner The scanner object.
  * @return the newly allocated buffer state object.
  */
-YY_BUFFER_STATE CodesIOKernel__scan_bytes  (yyconst char * yybytes, int  _yybytes_len , yyscan_t yyscanner)
+YY_BUFFER_STATE CodesIOKernel__scan_bytes  (yyconst char * yybytes, yy_size_t  _yybytes_len , yyscan_t yyscanner)
 {
 	YY_BUFFER_STATE b;
 	char *buf;
 	yy_size_t n;
-	int i;
+	yy_size_t i;
     
 	/* Get memory for full buffer, including space for trailing EOB's. */
 	n = _yybytes_len + 2;
@@ -1973,7 +1975,7 @@ FILE *CodesIOKernel_get_out  (yyscan_t yyscanner)
 /** Get the length of the current token.
  * @param yyscanner The scanner object.
  */
-int CodesIOKernel_get_leng  (yyscan_t yyscanner)
+yy_size_t CodesIOKernel_get_leng  (yyscan_t yyscanner)
 {
     struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     return yyleng;
@@ -2009,7 +2011,7 @@ void CodesIOKernel_set_lineno (int  line_number , yyscan_t yyscanner)
 
         /* lineno is only valid if an input buffer exists. */
         if (! YY_CURRENT_BUFFER )
-           yy_fatal_error( "CodesIOKernel_set_lineno called with no buffer" , yyscanner); 
+           YY_FATAL_ERROR( "CodesIOKernel_set_lineno called with no buffer" );
     
     yylineno = line_number;
 }
@@ -2024,7 +2026,7 @@ void CodesIOKernel_set_column (int  column_no , yyscan_t yyscanner)
 
         /* column is only valid if an input buffer exists. */
         if (! YY_CURRENT_BUFFER )
-           yy_fatal_error( "CodesIOKernel_set_column called with no buffer" , yyscanner); 
+           YY_FATAL_ERROR( "CodesIOKernel_set_column called with no buffer" );
     
     yycolumn = column_no;
 }
diff --git a/src/iokernellang/codeslexer.h b/src/iokernellang/codeslexer.h
index 034abc67..c3de58e9 100644
--- a/src/iokernellang/codeslexer.h
+++ b/src/iokernellang/codeslexer.h
@@ -13,7 +13,7 @@
 #define FLEX_SCANNER
 #define YY_FLEX_MAJOR_VERSION 2
 #define YY_FLEX_MINOR_VERSION 5
-#define YY_FLEX_SUBMINOR_VERSION 35
+#define YY_FLEX_SUBMINOR_VERSION 37
 #if YY_FLEX_SUBMINOR_VERSION > 0
 #define FLEX_BETA
 #endif
@@ -58,7 +58,6 @@ typedef int flex_int32_t;
 typedef unsigned char flex_uint8_t; 
 typedef unsigned short int flex_uint16_t;
 typedef unsigned int flex_uint32_t;
-#endif /* ! C99 */
 
 /* Limits of integral types. */
 #ifndef INT8_MIN
@@ -89,6 +88,8 @@ typedef unsigned int flex_uint32_t;
 #define UINT32_MAX             (4294967295U)
 #endif
 
+#endif /* ! C99 */
+
 #endif /* ! FLEXINT_H */
 
 #ifdef __cplusplus
@@ -161,7 +162,7 @@ struct yy_buffer_state
 	/* Number of characters read into yy_ch_buf, not including EOB
 	 * characters.
 	 */
-	int yy_n_chars;
+	yy_size_t yy_n_chars;
 
 	/* Whether we "own" the buffer - i.e., we know we created it,
 	 * and can realloc() it to grow it, and should free() it to
@@ -205,7 +206,7 @@ void CodesIOKernel_pop_buffer_state (yyscan_t yyscanner );
 
 YY_BUFFER_STATE CodesIOKernel__scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner );
 YY_BUFFER_STATE CodesIOKernel__scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
-YY_BUFFER_STATE CodesIOKernel__scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
+YY_BUFFER_STATE CodesIOKernel__scan_bytes (yyconst char *bytes,yy_size_t len ,yyscan_t yyscanner );
 
 void *CodesIOKernel_alloc (yy_size_t ,yyscan_t yyscanner );
 void *CodesIOKernel_realloc (void *,yy_size_t ,yyscan_t yyscanner );
@@ -213,7 +214,7 @@ void CodesIOKernel_free (void * ,yyscan_t yyscanner );
 
 /* Begin user sect3 */
 
-#define CodesIOKernel_wrap(n) 1
+#define CodesIOKernel_wrap(yyscanner) 1
 #define YY_SKIP_YYWRAP
 
 #define yytext_ptr yytext_r
@@ -260,7 +261,7 @@ FILE *CodesIOKernel_get_out (yyscan_t yyscanner );
 
 void CodesIOKernel_set_out  (FILE * out_str ,yyscan_t yyscanner );
 
-int CodesIOKernel_get_leng (yyscan_t yyscanner );
+yy_size_t CodesIOKernel_get_leng (yyscan_t yyscanner );
 
 char *CodesIOKernel_get_text (yyscan_t yyscanner );
 
@@ -268,6 +269,10 @@ int CodesIOKernel_get_lineno (yyscan_t yyscanner );
 
 void CodesIOKernel_set_lineno (int line_number ,yyscan_t yyscanner );
 
+int CodesIOKernel_get_column  (yyscan_t yyscanner );
+
+void CodesIOKernel_set_column (int column_no ,yyscan_t yyscanner );
+
 YYSTYPE * CodesIOKernel_get_lval (yyscan_t yyscanner );
 
 void CodesIOKernel_set_lval (YYSTYPE * yylval_param ,yyscan_t yyscanner );
@@ -340,6 +345,6 @@ extern int CodesIOKernel_lex \
 #line 117 "../src/iokernellang/codeslexer.l"
 
 
-#line 344 "../src/iokernellang/codeslexer.h"
+#line 349 "../src/iokernellang/codeslexer.h"
 #undef CodesIOKernel_IN_HEADER
 #endif /* CodesIOKernel_HEADER_H */
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 9f6488d9..ebc36665 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -2147,7 +2147,7 @@ void nw_test_init(nw_state* s, tw_lp* lp)
 	strcpy(params_d.cortex_gen, cortex_gen);
 #endif
    }
-   else if(strcmp(workload_type, "online") == 0){
+   else if(strcmp(workload_type, "swm-online") == 0){
            
        online_comm_params oc_params;
        
@@ -2166,7 +2166,27 @@ void nw_test_init(nw_state* s, tw_lp* lp)
         * online, it is the number of ranks to be simulated. */
        oc_params.nprocs = num_traces_of_job[lid.job]; 
        params = (char*)&oc_params;
-       strcpy(type_name, "online_comm_workload");
+       strcpy(type_name, "swm_online_comm_workload");
+   }
+   //Xin: add conceputual online workload
+   else if(strcmp(workload_type, "conc-online") == 0){
+           
+       online_comm_params oc_params;
+       
+       if(strlen(workload_name) > 0)
+       {
+           strcpy(oc_params.workload_name, workload_name); 
+       }
+       else if(strlen(workloads_conf_file) > 0)
+       {
+            strcpy(oc_params.workload_name, file_name_of_job[lid.job]);      
+       }
+       /*TODO: nprocs is different for dumpi and online workload. for
+        * online, it is the number of ranks to be simulated. */
+       // printf("conc-online num_traces_of_job %d\n", num_traces_of_job[lid.job]);
+       oc_params.nprocs = num_traces_of_job[lid.job]; 
+       params = (char*)&oc_params;
+       strcpy(type_name, "conc_online_comm_workload");
    }
 
    int rc = configuration_get_value_int(&config, "PARAMS", "num_qos_levels", NULL, &num_qos_levels);
@@ -2637,16 +2657,22 @@ void nw_test_finalize(nw_state* s, tw_lp* lp)
             return;
         if(strncmp(file_name_of_job[lid.job], "synthetic", 9) == 0)
             avg_msg_time = (s->send_time / s->num_recvs);
-        else if(strcmp(workload_type, "online") == 0) 
-        codes_workload_finalize("online_comm_workload", params, s->app_id, s->local_rank);
+        else if(strcmp(workload_type, "swm-online") == 0) 
+            codes_workload_finalize("swm_online_comm_workload", params, s->app_id, s->local_rank);
+        //Xin: for conceptual online workload
+        else if(strcmp(workload_type, "conc-online") == 0)
+            codes_workload_finalize("conc_online_comm_workload", params, s->app_id, s->local_rank);
     }
     else
     {
         if(s->nw_id >= (tw_lpid)num_net_traces)
             return;
         
-        if(strcmp(workload_type, "online") == 0) 
-            codes_workload_finalize("online_comm_workload", params, s->app_id, s->local_rank);
+        if(strcmp(workload_type, "swm-online") == 0) 
+            codes_workload_finalize("swm_online_comm_workload", params, s->app_id, s->local_rank);
+        //Xin: for conceptual online workload
+        if(strcmp(workload_type, "conc-online") == 0)
+            codes_workload_finalize("conc_online_comm_workload", params, s->app_id, s->local_rank); 
     }
 
         struct msg_size_info * tmp_msg = NULL; 
@@ -2973,12 +2999,12 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
     { // keep damaris ranks from running code between here up until tw_end()
 #endif
   codes_comm_update();
-
-  if(strcmp(workload_type, "dumpi") != 0 && strcmp(workload_type, "online") != 0)
+  //Xin: add conceptual online workload
+  if(strcmp(workload_type, "dumpi") != 0 && strcmp(workload_type, "swm-online") != 0 && strcmp(workload_type, "conc-online") != 0)
     {
 	if(tw_ismaster())
 		printf("Usage: mpirun -np n ./modelnet-mpi-replay --sync=1/3"
-                " --workload_type=dumpi/online"
+                " --workload_type=dumpi/swm-online/conc-online"
 		" --workload_conf_file=prefix-workload-file-name"
                 " --alloc_file=alloc-file-name"
 #ifdef ENABLE_CORTEX_PYTHON
@@ -2993,6 +3019,11 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
 	return -1;
     }
 
+    /* Xin: Currently rendezvous protocol cannot work with Conceptual online workloads */
+    if(strcmp(workload_type, "conc-online") == 0) {
+        EAGER_THRESHOLD = INT64_MAX;
+    }
+
 	jobmap_ctx = NULL; // make sure it's NULL if it's not used
 
     sprintf(sampling_dir, "sampling-dir");
diff --git a/src/networks/model-net/core/model-net.c b/src/networks/model-net/core/model-net.c
index 99baf2b5..ea7dfc99 100644
--- a/src/networks/model-net/core/model-net.c
+++ b/src/networks/model-net/core/model-net.c
@@ -369,6 +369,12 @@ static model_net_event_return model_net_event_impl_base(
     strncpy(r->category, category, CATEGORY_NAME_MAX-1);
     r->category[CATEGORY_NAME_MAX-1]='\0';
 
+    //Xin: passing app_id to the request struct
+    int ptroffset;
+    configuration_get_value_int(&config, "PARAMS", "offset", NULL, &ptroffset);
+    const int *id_ptr = remote_event+ptroffset;
+    r->app_id = *id_ptr;
+
     if (is_msg_params_set[MN_MSG_PARAM_START_TIME])
         r->msg_start_time = start_time_param;
     else
diff --git a/src/networks/model-net/dragonfly-custom.C b/src/networks/model-net/dragonfly-custom.C
index a2deb05b..6609b725 100644
--- a/src/networks/model-net/dragonfly-custom.C
+++ b/src/networks/model-net/dragonfly-custom.C
@@ -179,6 +179,13 @@ struct dragonfly_param
     double global_credit_delay;
     double cn_credit_delay;
     double router_delay;
+
+    //Xin: parameters for message counters of apps
+    int counting_bool;
+    tw_stime counting_start; 
+    tw_stime counting_interval; 
+    int counting_windows;
+    int num_apps;
 };
 
 struct dfly_hash_key
@@ -369,12 +376,23 @@ struct router_state
    
    char output_buf[4096];
    char output_buf2[4096];
+   //Xin: buffer for output data
+   char output_buf3[4096];
+   char output_buf4[4096];
+   char output_buf5[4096];
+   char output_buf6[4096];
 
    struct dfly_router_sample * rsamples;
    
    long fwd_events;
    long rev_events;
 
+   //Xin: msg couters for apps
+   int **recv_msg_counters;
+   int **send_msg_counters;
+   tw_stime **agg_busy_time;
+   int64_t **agg_link_traffic;
+
    /* following used for ROSS model-level stats collection */
    tw_stime* busy_time_ross_sample;
    int64_t * link_traffic_ross_sample;
@@ -649,6 +667,23 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params){
         routing = -1;
     }
 
+    //Xin: app msgs counting on routers
+    rc = configuration_get_value_int(&config, "PARAMS", "counting_bool", anno, &p->counting_bool);
+    if(p->counting_bool) {
+        int rc1 = configuration_get_value_double(&config, "PARAMS", "counting_start", anno, &p->counting_start);
+        int rc2 = configuration_get_value_int(&config, "PARAMS", "counting_windows", anno, &p->counting_windows);
+        int rc3 = configuration_get_value_double(&config, "PARAMS", "counting_interval", anno, &p->counting_interval);
+        int rc4 = configuration_get_value_int(&config, "PARAMS", "num_apps", anno, &p->num_apps);
+        if(rc1 || rc2 || rc3 || rc4)
+            tw_error(TW_LOC, "\n Missing couting values, (counting_start/windows/interval/num_apps) check for config files\n");
+
+        //convert us to ns
+        p->counting_start = p->counting_start * 1000;
+        p->counting_interval = p->counting_interval * 1000;
+
+        //printf("start %f, end %f, interval %f\n", p->counting_start, p->counting_end, p->counting_interval);
+    }
+
     // rc = configuration_get_value_int(&config, "PARAMS", "num_vcs_override", anno, &p->num_vcs);
     // if(rc) {
     //     if(routing == PROG_ADAPTIVE)
@@ -1155,6 +1190,25 @@ void router_custom_setup(router_state * r, tw_lp * lp)
    r->ross_rsample.busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime));
    r->ross_rsample.link_traffic_sample = (int64_t*)calloc(p->radix, sizeof(int64_t));
 
+    //Xin: msg counters for apps 
+    if(p->counting_bool > 0)
+    {   
+        r->recv_msg_counters = (int **) calloc(p->counting_windows, sizeof(int *));
+        r->send_msg_counters = (int **) calloc(p->counting_windows, sizeof(int *));
+        r->agg_link_traffic = (int64_t **) calloc(p->counting_windows, sizeof(int64_t *));
+        r->agg_busy_time = (tw_stime **) malloc (p->counting_windows * sizeof(tw_stime *));
+
+        for (int i = 0; i < p->counting_windows; ++i)
+        {
+            r->recv_msg_counters[i] = (int*) calloc(p->num_apps, sizeof(int));
+            r->send_msg_counters[i] = (int*) calloc(p->num_apps, sizeof(int));
+            r->agg_link_traffic[i] = (int64_t*) calloc(p->radix, sizeof(int64_t));
+            r->agg_busy_time[i] = (tw_stime*) malloc(p->radix * sizeof(tw_stime));
+            for(int j = 0; j < p->radix; j++)
+              r->agg_busy_time[i][j] = 0.0;
+        }
+    }
+
    rc_stack_create(&r->st);
 
    for(int i = 0; i < p->num_router_rows; i++)
@@ -1237,6 +1291,9 @@ static tw_stime dragonfly_custom_packet_event(
     msg->magic = terminal_magic_num; 
     msg->msg_start_time = req->msg_start_time;
 
+    //Xin: for msg counters of apps, find app ids from lp id
+    msg->app_id = req->app_id;
+
     if(is_last_pckt) /* Its the last packet so pass in remote and local event information*/
       {
         if(req->remote_event_size > 0)
@@ -2510,6 +2567,59 @@ void dragonfly_custom_router_final(router_state * s,
         written += sprintf(s->output_buf2 + written, " %lld", LLD(s->link_traffic[d]));
 
     lp_io_write(lp->gid, (char*)"dragonfly-router-traffic", written, s->output_buf2);
+
+    //Xin: output msg counters of apps
+    if(s->params->counting_bool)
+    {
+      // for received msgs
+      if(!s->router_id) {
+          written = sprintf(s->output_buf3, "# Format <LP ID> <Group ID> <Router ID> <Timestamp> <Msg Counters>\n");
+          lp_io_write(lp->gid, (char*)"dragonfly-router-recv-msgs", written, s->output_buf3);
+      }
+      for(int i=0; i < p->counting_windows; i++) {
+        written = sprintf(s->output_buf3, "\n %llu %d %d %lf", LLU(lp->gid), s->router_id / p->num_routers, s->router_id, (p->counting_start+(i+1)*p->counting_interval));
+        for (int d=0; d < p->num_apps; d++)
+          written += sprintf(s->output_buf3 + written, " %d", (s->recv_msg_counters[i][d]));
+        lp_io_write(lp->gid, (char*)"dragonfly-router-recv-msgs", written, s->output_buf3);
+      }
+
+      // for send msgs
+      if(!s->router_id){
+          written = sprintf(s->output_buf4, "# Format <LP ID> <Group ID> <Router ID> <Window ID> <Msg Counters>\n");
+          lp_io_write(lp->gid, (char*)"dragonfly-router-send-msgs", written, s->output_buf4);
+      }
+      for(int i=0; i < p->counting_windows; i++) {
+          written = sprintf(s->output_buf4, "\n %llu %d %d %lf", LLU(lp->gid), s->router_id / p->num_routers, s->router_id, (p->counting_start+(i+1)*p->counting_interval));
+          for (int d=0; d < p->num_apps; d++)
+              written += sprintf(s->output_buf4 + written, " %d", (s->send_msg_counters[i][d]));
+          lp_io_write(lp->gid, (char*)"dragonfly-router-send-msgs", written, s->output_buf4);
+      } 
+
+      // for link traffic
+      if(!s->router_id){
+          written = sprintf(s->output_buf5, "# Format <LP ID> <Group ID> <Router ID> <Window ID> <Link Traffic>\n");
+          lp_io_write(lp->gid, (char*)"dragonfly-router-traffic-sample", written, s->output_buf5);
+      }
+      for(int i=0; i < p->counting_windows; i++) {
+          written = sprintf(s->output_buf5, "\n %llu %d %d %lf", LLU(lp->gid), s->router_id / p->num_routers, s->router_id , (p->counting_start+(i+1)*p->counting_interval));
+          for (int d=0; d < p->radix; d++)
+              written += sprintf(s->output_buf5 + written, " %d", (s->agg_link_traffic[i][d]));
+          lp_io_write(lp->gid, (char*)"dragonfly-router-traffic-sample", written, s->output_buf5);
+      } 
+
+      // for link busy time
+      if(!s->router_id){
+          written = sprintf(s->output_buf6, "# Format <LP ID> <Group ID> <Router ID> <Window ID> <Link Busy Time>\n");
+          lp_io_write(lp->gid, (char*)"dragonfly-router-busytime-sample", written, s->output_buf6);
+      }
+      for(int i=0; i < p->counting_windows; i++) {
+          written = sprintf(s->output_buf6, "\n %llu %d %d %lf", LLU(lp->gid), s->router_id / p->num_routers, s->router_id , (p->counting_start+(i+1)*p->counting_interval));
+          for (int d=0; d < p->radix; d++)
+              written += sprintf(s->output_buf6 + written, " %lf", (s->agg_busy_time[i][d]));
+          lp_io_write(lp->gid, (char*)"dragonfly-router-busytime-sample", written, s->output_buf6);
+      }       
+    }
+
 }
 
 static vector<int> get_intra_router(router_state * s, int src_router_id, int dest_router_id, int num_rtrs_per_grp)
@@ -3036,6 +3146,13 @@ static void router_packet_receive_rc(router_state * s,
     int output_port = msg->saved_vc;
     int output_chan = msg->saved_channel;
 
+    //Xin: reverse msg couters 
+    if(s->params->counting_bool>0 && msg->last_received_time >= s->params->counting_start) {
+        int current_window = (int) ((msg->last_received_time-s->params->counting_start)/s->params->counting_interval);
+        if(current_window < s->params->counting_windows)
+          s->recv_msg_counters[current_window][msg->app_id]--;    
+    }
+
     tw_rand_reverse_unif(lp->rng);
 
     if(bf->c20)
@@ -3109,6 +3226,14 @@ router_packet_receive( router_state * s,
 
   terminal_custom_message_list * cur_chunk = (terminal_custom_message_list*)calloc(sizeof(terminal_custom_message_list), 1);
   init_terminal_custom_message_list(cur_chunk, msg);
+
+  //Xin: count packets received & identify their app id 
+  msg->last_received_time = tw_now(lp);
+  if(s->params->counting_bool>0 && msg->last_received_time >= s->params->counting_start) {
+      int current_window = (int) ((msg->last_received_time - s->params->counting_start)/s->params->counting_interval);
+      if(current_window < s->params->counting_windows)
+          s->recv_msg_counters[current_window][msg->app_id]++;
+  } 
   
   if(routing == MINIMAL || 
      routing == NON_MINIMAL)	
@@ -3356,6 +3481,18 @@ static void router_packet_send_rc(router_state * s,
       
     terminal_custom_message_list * cur_entry = (terminal_custom_message_list *)rc_stack_pop(s->st);
     assert(cur_entry);
+
+    //Xin: reverse msg couters 
+    bool rolback = false;
+    int current_window = -1;
+    const dragonfly_param *p = s->params;
+    if(s->params->counting_bool>0 && msg->last_sent_time >= s->params->counting_start) {
+        current_window = (int) ((msg->last_sent_time-s->params->counting_start)/s->params->counting_interval);
+        if(current_window < s->params->counting_windows) {
+          s->send_msg_counters[current_window][msg->app_id]--;  
+          rolback = true;
+        }
+    } 
     
     if(bf->c11)
     {
@@ -3363,6 +3500,11 @@ static void router_packet_send_rc(router_state * s,
         s->link_traffic_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; 
         s->ross_rsample.link_traffic_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; 
         s->link_traffic_ross_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; 
+        //Xin: reverse msg couters 
+        if(rolback && current_window >= 0){
+          // s->agg_link_traffic[current_window][msg->app_id*p->radix+output_port] -= cur_entry->msg.packet_size % s->params->chunk_size;
+          s->agg_link_traffic[current_window][output_port] -= cur_entry->msg.packet_size % s->params->chunk_size;
+        }
     }
     if(bf->c12)
     {
@@ -3370,6 +3512,11 @@ static void router_packet_send_rc(router_state * s,
         s->link_traffic_sample[output_port] -= s->params->chunk_size;
         s->ross_rsample.link_traffic_sample[output_port] -= s->params->chunk_size;
         s->link_traffic_ross_sample[output_port] -= s->params->chunk_size;
+        //Xin: reverse msg couters 
+        if(rolback && current_window >= 0){
+          // s->agg_link_traffic[current_window][msg->app_id*p->radix+output_port] -= s->params->chunk_size;
+          s->agg_link_traffic[current_window][output_port] -= s->params->chunk_size;
+        }
     }
     s->next_output_available_time[output_port] = msg->saved_available_time;
 
@@ -3483,6 +3630,19 @@ router_packet_send( router_state * s,
   m->intm_lp_id = lp->gid;
   m->magic = router_magic_num;
 
+  //Xin: count packets sent & identify their app id 
+  msg->last_sent_time = tw_now(lp);
+  bool update = false;
+  int current_window = -1;
+  const dragonfly_param *p = s->params;
+  if(s->params->counting_bool>0 && msg->last_sent_time >= s->params->counting_start) {
+      current_window = (int) ((msg->last_sent_time - s->params->counting_start)/s->params->counting_interval);
+      if(current_window < s->params->counting_windows) {
+          s->send_msg_counters[current_window][msg->app_id]++;
+          update = true;
+        }
+  }
+
   if((cur_entry->msg.packet_size % s->params->chunk_size) && (cur_entry->msg.chunk_id == num_chunks - 1)) {
       bf->c11 = 1;
       s->link_traffic[output_port] +=  (cur_entry->msg.packet_size %
@@ -3493,14 +3653,30 @@ router_packet_send( router_state * s,
                s->params->chunk_size);
       s->link_traffic_ross_sample[output_port] += (cur_entry->msg.packet_size % 
                s->params->chunk_size);
+      //Xin: update port traffic data
+      if(update && current_window >= 0){
+        // s->agg_link_traffic[current_window][msg->app_id*p->radix+output_port] += (cur_entry->msg.packet_size %
+             // s->params->chunk_size);
+        if (s->router_id==0 && to_terminal)
+          // printf("Router %d: update port %d with app %d traffic to dest %d\n", s->router_id, output_port, msg->app_id, msg->final_dest_gid);
+        s->agg_link_traffic[current_window][output_port] += (cur_entry->msg.packet_size %
+             s->params->chunk_size);
+    }
   } else {
     bf->c12 = 1;
     s->link_traffic[output_port] += s->params->chunk_size;
     s->link_traffic_sample[output_port] += s->params->chunk_size;
     s->ross_rsample.link_traffic_sample[output_port] += s->params->chunk_size;
     s->link_traffic_ross_sample[output_port] += s->params->chunk_size;
+    //Xin: update port traffic data
+    if(update && current_window >= 0){
+      // s->agg_link_traffic[current_window][msg->app_id*p->radix+output_port] += s->params->chunk_size;
+      s->agg_link_traffic[current_window][output_port] += s->params->chunk_size;
+    }
   }
 
+
+
   if(cur_entry->msg.packet_ID == LLU(TRACK_PKT) && cur_entry->msg.src_terminal_id == T_ID)
       printf("\n Queuing at the router %d ", s->router_id);
   /* Determine the event type. If the packet has arrived at the final 
@@ -3563,6 +3739,16 @@ static void router_buf_update_rc(router_state * s,
         s->ross_rsample.busy_time[indx] = msg->saved_sample_time;
         s->busy_time_ross_sample[indx] = msg->saved_busy_time_ross;
         s->last_buf_full[indx][output_chan] = msg->saved_busy_time;
+
+        //Xin: reverse agg busytime (not working for cross window reverse)
+        const dragonfly_param *p = s->params;
+        if(s->params->counting_bool>0 && msg->last_bufupdate_time >= s->params->counting_start) {
+            int current_window = (int) ((msg->last_bufupdate_time - s->params->counting_start)/s->params->counting_interval);
+            if(current_window < s->params->counting_windows) {
+              // s->agg_busy_time[current_window][msg->app_id*p->radix+indx] = msg->saved_rcv_time;
+              s->agg_busy_time[current_window][indx] = msg->saved_rcv_time;
+            }
+        }
       }
       if(bf->c1) {
         terminal_custom_message_list* head = return_tail(s->pending_msgs[indx],
@@ -3596,6 +3782,26 @@ static void router_buf_update(router_state * s, tw_bf * bf, terminal_custom_mess
     s->busy_time_sample[indx] += (tw_now(lp) - s->last_buf_full[indx][output_chan]);
     s->ross_rsample.busy_time[indx] += (tw_now(lp) - s->last_buf_full[indx][output_chan]);
     s->busy_time_ross_sample[indx] += (tw_now(lp) - s->last_buf_full[indx][output_chan]);
+
+    //Xin: agg busy time
+    const dragonfly_param *p = s->params;
+    msg->last_bufupdate_time = tw_now(lp);
+    if(s->params->counting_bool>0 && msg->last_bufupdate_time >= s->params->counting_start) {
+        int current_window = (int) ((msg->last_bufupdate_time - s->params->counting_start)/s->params->counting_interval);
+        if(current_window < s->params->counting_windows) {
+            int full_window = (int) ((s->last_buf_full[indx][output_chan] - s->params->counting_start)/s->params->counting_interval);
+            if(full_window==current_window) {
+                // s->agg_busy_time[current_window][msg->app_id*p->radix+indx] += (tw_now(lp) - s->last_buf_full[indx][output_chan]);
+              s->agg_busy_time[current_window][indx] += (tw_now(lp) - s->last_buf_full[indx][output_chan]);
+            } else {
+                // s->agg_busy_time[current_window][msg->app_id*p->radix+indx] += (tw_now(lp) - (s->params->counting_start+current_window*s->params->counting_interval));
+                // s->agg_busy_time[full_window][msg->app_id*p->radix+indx] += ((s->params->counting_start+current_window*s->params->counting_interval) - s->last_buf_full[indx][output_chan]);
+                s->agg_busy_time[current_window][indx] += (tw_now(lp) - (s->params->counting_start+current_window*s->params->counting_interval));
+                s->agg_busy_time[full_window][indx] += ((s->params->counting_start+current_window*s->params->counting_interval) - s->last_buf_full[indx][output_chan]);
+            }
+          }
+    }
+    
     s->last_buf_full[indx][output_chan] = 0.0;
   }
   if(s->queued_msgs[indx][output_chan] != NULL) {
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 45dc400f..426eccc0 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -213,6 +213,14 @@ struct dragonfly_param
     double router_delay;
 
     int max_hops_notify; //maximum number of hops allowed before notifying via printout
+
+    //Xin: parameters for message counters of apps
+    int counting_bool;
+    tw_stime counting_start; 
+    tw_stime counting_interval; 
+    int counting_windows;
+    int num_apps;
+
 };
 
 static const dragonfly_param* stored_params;
@@ -509,6 +517,17 @@ struct router_state
     long fwd_events;
     long rev_events;
 
+    //Xin: buffer for output data
+    char output_buf3[4096];
+    char output_buf4[4096];
+    char output_buf5[4096];
+    char output_buf6[4096];
+    //Xin: msg couters for apps
+    int **recv_msg_counters;
+    int **send_msg_counters;
+    tw_stime **agg_busy_time;
+    int64_t **agg_link_traffic;
+
     /* following used for ROSS model-level stats collection */
     tw_stime* busy_time_ross_sample;
     int64_t * link_traffic_ross_sample;
@@ -1520,6 +1539,24 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
     if(p->num_qos_levels > 1)
         p->num_vcs = p->num_qos_levels * p->num_vcs;
 
+    //Xin: app msgs counting on routers
+    rc = configuration_get_value_int(&config, "PARAMS", "counting_bool", anno, &p->counting_bool);
+    if(p->counting_bool) {
+        int rc1 = configuration_get_value_double(&config, "PARAMS", "counting_start", anno, &p->counting_start);
+        int rc2 = configuration_get_value_int(&config, "PARAMS", "counting_windows", anno, &p->counting_windows);
+        int rc3 = configuration_get_value_double(&config, "PARAMS", "counting_interval", anno, &p->counting_interval);
+        int rc4 = configuration_get_value_int(&config, "PARAMS", "num_apps", anno, &p->num_apps);
+        if(rc1 || rc2 || rc3 || rc4)
+            tw_error(TW_LOC, "\n Missing couting values, (counting_start/windows/interval/num_apps) check for config files\n");
+
+        //convert us to ns
+        p->counting_start = p->counting_start * 1000;
+        p->counting_interval = p->counting_interval * 1000;
+
+        //printf("start %f, end %f, interval %f\n", p->counting_start, p->counting_end, p->counting_interval);
+    }
+
+
     rc = configuration_get_value_int(&config, "PARAMS", "num_groups", anno, &p->num_groups);
     if(rc) {
         tw_error(TW_LOC, "\nnum_groups not specified, Aborting\n");
@@ -2425,6 +2462,26 @@ void router_dally_init(router_state * r, tw_lp * lp)
 
     r->connMan->solidify_connections();
 
+    //Xin: msg counters for apps 
+    if(p->counting_bool > 0)
+    {   
+        r->recv_msg_counters = (int **) calloc(p->counting_windows, sizeof(int *));
+        r->send_msg_counters = (int **) calloc(p->counting_windows, sizeof(int *));
+        r->agg_link_traffic = (int64_t **) calloc(p->counting_windows, sizeof(int64_t *));
+        r->agg_busy_time = (tw_stime **) malloc (p->counting_windows * sizeof(tw_stime *));
+
+        for (int i = 0; i < p->counting_windows; ++i)
+        {
+            r->recv_msg_counters[i] = (int*) calloc(p->num_apps, sizeof(int));
+            r->send_msg_counters[i] = (int*) calloc(p->num_apps, sizeof(int));
+            // r->agg_link_traffic[i] = (int64_t*) calloc(p->radix, sizeof(int64_t));
+            r->agg_link_traffic[i] = (int64_t*) calloc(p->radix*p->num_apps, sizeof(int64_t));
+            r->agg_busy_time[i] = (tw_stime*) malloc(p->radix * sizeof(tw_stime));
+            for(int j = 0; j < p->radix; j++)
+              r->agg_busy_time[i][j] = 0.0;
+        }
+    }
+
     return;
 }	
 
@@ -2477,6 +2534,9 @@ static tw_stime dragonfly_dally_packet_event(
     msg->magic = terminal_magic_num; 
     msg->msg_start_time = req->msg_start_time;
 
+    //Xin: for msg counters of apps, find app ids from lp id
+    msg->app_id = req->app_id;
+
     if(is_last_pckt) /* Its the last packet so pass in remote and local event information*/
     {
         if(req->remote_event_size > 0)
@@ -3513,6 +3573,59 @@ void dragonfly_dally_router_final(router_state * s, tw_lp * lp)
     //             dragonfly_print_params(s->params);
     //     }
     // }
+
+    //Xin: output msg counters of apps
+    if(p->counting_bool)
+    {
+      // for received msgs
+      if(!s->router_id) {
+          written = sprintf(s->output_buf3, "# Format <LP ID> <Group ID> <Router ID> <Timestamp> <Msg Counters>\n");
+          lp_io_write(lp->gid, (char*)"dragonfly-router-recv-msgs", written, s->output_buf3);
+      }
+      for(int i=0; i < p->counting_windows; i++) {
+        written = sprintf(s->output_buf3, "\n %llu %d %d %lf", LLU(lp->gid), s->router_id / p->num_routers, s->router_id, (p->counting_start+(i+1)*p->counting_interval));
+        for (int d=0; d < p->num_apps; d++)
+          written += sprintf(s->output_buf3 + written, " %d", (s->recv_msg_counters[i][d]));
+        lp_io_write(lp->gid, (char*)"dragonfly-router-recv-msgs", written, s->output_buf3);
+      }
+
+      // for send msgs
+      if(!s->router_id){
+          written = sprintf(s->output_buf4, "# Format <LP ID> <Group ID> <Router ID> <Window ID> <Msg Counters>\n");
+          lp_io_write(lp->gid, (char*)"dragonfly-router-send-msgs", written, s->output_buf4);
+      }
+      for(int i=0; i < p->counting_windows; i++) {
+          written = sprintf(s->output_buf4, "\n %llu %d %d %lf", LLU(lp->gid), s->router_id / p->num_routers, s->router_id, (p->counting_start+(i+1)*p->counting_interval));
+          for (int d=0; d < p->num_apps; d++)
+              written += sprintf(s->output_buf4 + written, " %d", (s->send_msg_counters[i][d]));
+          lp_io_write(lp->gid, (char*)"dragonfly-router-send-msgs", written, s->output_buf4);
+      } 
+
+      // for link traffic
+      if(!s->router_id){
+          written = sprintf(s->output_buf5, "# Format <LP ID> <Group ID> <Router ID> <Window ID> <Link Traffic>\n");
+          lp_io_write(lp->gid, (char*)"dragonfly-router-traffic-sample", written, s->output_buf5);
+      }
+      for(int i=0; i < p->counting_windows; i++) {
+          written = sprintf(s->output_buf5, "\n %llu %d %d %lf", LLU(lp->gid), s->router_id / p->num_routers, s->router_id , (p->counting_start+(i+1)*p->counting_interval));
+          for (int d=0; d < p->radix*p->num_apps; d++)
+              written += sprintf(s->output_buf5 + written, " %d", (s->agg_link_traffic[i][d]));
+          lp_io_write(lp->gid, (char*)"dragonfly-router-traffic-sample", written, s->output_buf5);
+      } 
+
+      // for link busy time
+      if(!s->router_id){
+          written = sprintf(s->output_buf6, "# Format <LP ID> <Group ID> <Router ID> <Window ID> <Link Busy Time>\n");
+          lp_io_write(lp->gid, (char*)"dragonfly-router-busytime-sample", written, s->output_buf6);
+      }
+      for(int i=0; i < p->counting_windows; i++) {
+          written = sprintf(s->output_buf6, "\n %llu %d %d %lf", LLU(lp->gid), s->router_id / p->num_routers, s->router_id , (p->counting_start+(i+1)*p->counting_interval));
+          for (int d=0; d < p->radix; d++)
+              written += sprintf(s->output_buf6 + written, " %lf", (s->agg_busy_time[i][d]));
+          lp_io_write(lp->gid, (char*)"dragonfly-router-busytime-sample", written, s->output_buf6);
+      }       
+    }
+
 }
 
 static Connection do_dfdally_routing(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id)
@@ -3712,6 +3825,13 @@ static void router_packet_receive_rc(router_state * s,
     int output_port = msg->saved_vc;
     int output_chan = msg->saved_channel;
 
+    //Xin: reverse msg couters 
+    if(s->params->counting_bool>0 && msg->last_received_time >= s->params->counting_start) {
+        int current_window = (int) ((msg->last_received_time-s->params->counting_start)/s->params->counting_interval);
+        if(current_window < s->params->counting_windows)
+          s->recv_msg_counters[current_window][msg->app_id]--;    
+    }
+
     for(int i = 0 ; i < msg->num_cll; i++)
         codes_local_latency_reverse(lp);
 
@@ -3786,6 +3906,15 @@ static void router_packet_receive( router_state * s,
 
     terminal_dally_message_list * cur_chunk = (terminal_dally_message_list*)calloc(1, sizeof(terminal_dally_message_list));
     init_terminal_dally_message_list(cur_chunk, msg);
+
+
+    //Xin: count packets received & identify their app id 
+    msg->last_received_time = tw_now(lp);
+    if(s->params->counting_bool>0 && msg->last_received_time >= s->params->counting_start) {
+      int current_window = (int) ((msg->last_received_time - s->params->counting_start)/s->params->counting_interval);
+      if(current_window < s->params->counting_windows)
+          s->recv_msg_counters[current_window][msg->app_id]++;
+    } 
     
     if(cur_chunk->msg.last_hop == TERMINAL) // We are first router in the path
         cur_chunk->msg.path_type = MINIMAL; // Route always starts as minimal
@@ -3930,7 +4059,19 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m
     
     if(msg->last_saved_qos)
        s->last_qos_lvl[output_port] = msg->last_saved_qos; 
-     
+
+    //Xin: reverse msg couters 
+    bool rolback = false;
+    int current_window = -1;
+    const dragonfly_param *p = s->params;
+    if(s->params->counting_bool>0 && msg->last_sent_time >= s->params->counting_start) {
+        current_window = (int) ((msg->last_sent_time-s->params->counting_start)/s->params->counting_interval);
+        if(current_window < s->params->counting_windows) {
+          s->send_msg_counters[current_window][msg->app_id]--;  
+          rolback = true;
+        }
+    }      
+
     if(bf->c1) {
         s->in_send_loop[output_port] = 1;
         if(bf->c2) {
@@ -3952,6 +4093,11 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m
         s->busy_time_sample[output_port] = msg->saved_sample_time;
         s->ross_rsample.busy_time[output_port] = msg->saved_sample_time;
         s->last_buf_full[output_port] = msg->saved_busy_time;
+
+        //Xin: reverse msg couters 
+        if(rolback && current_window >= 0){
+          s->agg_busy_time[current_window][output_port] = msg->saved_rcv_time;
+        }
     }
       
     terminal_dally_message_list * cur_entry = (terminal_dally_message_list *)rc_stack_pop(s->st);
@@ -3974,6 +4120,12 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m
         s->link_traffic_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; 
         s->ross_rsample.link_traffic_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; 
         s->link_traffic_ross_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; 
+
+        //Xin: reverse msg couters 
+        if(rolback && current_window >= 0){
+          s->agg_link_traffic[current_window][msg->app_id*p->radix+output_port] -= cur_entry->msg.packet_size % s->params->chunk_size;
+          // s->agg_link_traffic[current_window][output_port] -= cur_entry->msg.packet_size % s->params->chunk_size;
+        }
     }
     if(bf->c12)
     {
@@ -3981,6 +4133,12 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m
         s->link_traffic_sample[output_port] -= s->params->chunk_size;
         s->ross_rsample.link_traffic_sample[output_port] -= s->params->chunk_size;
         s->link_traffic_ross_sample[output_port] -= s->params->chunk_size;
+
+        //Xin: reverse msg couters 
+        if(rolback && current_window >= 0){
+          s->agg_link_traffic[current_window][msg->app_id*p->radix+output_port] -= s->params->chunk_size;
+          // s->agg_link_traffic[current_window][output_port] -= s->params->chunk_size;
+        }
     }
 
     prepend_to_terminal_dally_message_list(s->pending_msgs[output_port],
@@ -4012,6 +4170,19 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes
     
     msg->saved_vc = output_port;
     msg->saved_channel = output_chan;
+
+    //Xin: count packets sent & identify their app id 
+    msg->last_sent_time = tw_now(lp);
+    bool update = false;
+    int current_window = -1;
+    const dragonfly_param *p = s->params;
+    if(s->params->counting_bool>0 && msg->last_sent_time >= s->params->counting_start) {
+      current_window = (int) ((msg->last_sent_time - s->params->counting_start)/s->params->counting_interval);
+      if(current_window < s->params->counting_windows) {
+          s->send_msg_counters[current_window][msg->app_id]++;
+          update = true;
+        }
+    }
     
     if(output_chan < 0) 
     {
@@ -4040,6 +4211,11 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes
         s->busy_time_sample[output_port] += (tw_now(lp) - s->last_buf_full[output_port]);
         s->ross_rsample.busy_time[output_port] += (tw_now(lp) - s->last_buf_full[output_port]);
         s->last_buf_full[output_port] = 0.0;
+
+        //Xin: update data
+        if(update && current_window >= 0){
+            s->agg_busy_time[current_window][output_port] += (tw_now(lp) - s->last_buf_full[output_port]); 
+        }
     }
 
     int vcg = 0;
@@ -4121,6 +4297,12 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes
         s->ross_rsample.link_traffic_sample[output_port] += (cur_entry->msg.packet_size % s->params->chunk_size);
         s->link_traffic_ross_sample[output_port] += (cur_entry->msg.packet_size % s->params->chunk_size);
         msg_size = cur_entry->msg.packet_size % s->params->chunk_size;
+
+        //Xin: update data
+        if(update && current_window >= 0){
+            // s->agg_link_traffic[current_window][output_port] += (cur_entry->msg.packet_size % s->params->chunk_size);           
+            s->agg_link_traffic[current_window][msg->app_id*p->radix+output_port] += (cur_entry->msg.packet_size % s->params->chunk_size);
+        }
     } 
     else {
         bf->c12 = 1;
@@ -4128,6 +4310,12 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes
         s->link_traffic_sample[output_port] += s->params->chunk_size;
         s->ross_rsample.link_traffic_sample[output_port] += s->params->chunk_size;
         s->link_traffic_ross_sample[output_port] += s->params->chunk_size;
+
+        //Xin: update data
+        if(update && current_window >= 0){
+            // s->agg_link_traffic[current_window][output_port] += s->params->chunk_size;
+            s->agg_link_traffic[current_window][msg->app_id*p->radix+output_port] += s->params->chunk_size;
+        }
     }
 
     if(cur_entry->msg.packet_ID == LLU(TRACK_PKT) && cur_entry->msg.src_terminal_id == T_ID)
@@ -4198,6 +4386,16 @@ static void router_buf_update_rc(router_state * s,
         s->ross_rsample.busy_time[indx] = msg->saved_sample_time;
         s->busy_time_ross_sample[indx] = msg->saved_busy_time_ross;
         s->last_buf_full[indx] = msg->saved_busy_time;
+
+        //Xin: reverse agg busytime (not working for cross window reverse)
+        const dragonfly_param *p = s->params;
+        if(s->params->counting_bool>0 && msg->last_bufupdate_time >= s->params->counting_start) {
+            int current_window = (int) ((msg->last_bufupdate_time - s->params->counting_start)/s->params->counting_interval);
+            if(current_window < s->params->counting_windows) {
+              // s->agg_busy_time[current_window][msg->app_id*p->radix+indx] = msg->saved_rcv_time;
+              s->agg_busy_time[current_window][indx] = msg->saved_rcv_time;
+            }
+        }
     }
     if(bf->c1) {
         terminal_dally_message_list* head = return_tail(s->pending_msgs[indx],
@@ -4232,6 +4430,26 @@ static void router_buf_update(router_state * s, tw_bf * bf, terminal_dally_messa
         s->busy_time_sample[indx] += (tw_now(lp) - s->last_buf_full[indx]);
         s->ross_rsample.busy_time[indx] += (tw_now(lp) - s->last_buf_full[indx]);
         s->busy_time_ross_sample[indx] += (tw_now(lp) - s->last_buf_full[indx]);
+
+        //Xin: agg busy time
+        const dragonfly_param *p = s->params;
+        msg->last_bufupdate_time = tw_now(lp);
+        if(s->params->counting_bool>0 && msg->last_bufupdate_time >= s->params->counting_start) {
+            int current_window = (int) ((msg->last_bufupdate_time - s->params->counting_start)/s->params->counting_interval);
+            if(current_window < s->params->counting_windows) {
+                int full_window = (int) ((s->last_buf_full[indx] - s->params->counting_start)/s->params->counting_interval);
+                if(full_window==current_window) {
+                    // s->agg_busy_time[current_window][msg->app_id*p->radix+indx] += (tw_now(lp) - s->last_buf_full[indx]);
+                  s->agg_busy_time[current_window][indx] += (tw_now(lp) - s->last_buf_full[indx]);
+                } else {
+                    // s->agg_busy_time[current_window][msg->app_id*p->radix+indx] += (tw_now(lp) - (s->params->counting_start+current_window*s->params->counting_interval));
+                    // s->agg_busy_time[full_window][msg->app_id*p->radix+indx] += ((s->params->counting_start+current_window*s->params->counting_interval) - s->last_buf_full[indx]);
+                    s->agg_busy_time[current_window][indx] += (tw_now(lp) - (s->params->counting_start+current_window*s->params->counting_interval));
+                    s->agg_busy_time[full_window][indx] += ((s->params->counting_start+current_window*s->params->counting_interval) - s->last_buf_full[indx]);
+                }
+            }
+        }
+
         s->last_buf_full[indx] = 0.0;
     }
 
diff --git a/src/workload/codes-workload-dump.c b/src/workload/codes-workload-dump.c
index 18756bba..73631869 100644
--- a/src/workload/codes-workload-dump.c
+++ b/src/workload/codes-workload-dump.c
@@ -215,7 +215,7 @@ int main(int argc, char *argv[])
             wparams = (char*)&d_params;
         }
     }
-    else if(strcmp(type, "online_comm_workload") == 0){
+    else if(strcmp(type, "swm_online_comm_workload") == 0 || strcmp(type, "conc_online_comm_workload") == 0){
         if (n == -1){
             fprintf(stderr,
                     "Expected \"--num-ranks\" argument for online workload\n");
@@ -448,7 +448,7 @@ int main(int argc, char *argv[])
             }
         } while (op.op_type != CODES_WK_END);
 
-    if(strcmp(type, "online_comm_workload") == 0)
+    if(strcmp(type, "swm_online_comm_workload") == 0 || strcmp(type, "conc_online_comm_workload") == 0)
     {
         codes_workload_finalize(type, wparams, 0, i);
     }
diff --git a/src/workload/codes-workload.c b/src/workload/codes-workload.c
index 190993d3..21c6d493 100644
--- a/src/workload/codes-workload.c
+++ b/src/workload/codes-workload.c
@@ -34,9 +34,14 @@ extern struct codes_workload_method darshan_mpi_io_workload_method;
 #ifdef USE_RECORDER
 extern struct codes_workload_method recorder_io_workload_method;
 #endif
-#ifdef USE_ONLINE
-extern struct codes_workload_method online_comm_workload_method;
+
+#ifdef USE_SWM
+extern struct codes_workload_method swm_online_comm_workload_method;
+#endif
+#ifdef USE_UNION
+extern struct codes_workload_method conc_online_comm_workload_method;
 #endif
+
 extern struct codes_workload_method checkpoint_workload_method;
 extern struct codes_workload_method iomock_workload_method;
 
@@ -58,8 +63,11 @@ static struct codes_workload_method const * method_array_default[] =
 #endif
 
 #endif
-#ifdef USE_ONLINE
-    &online_comm_workload_method,
+#ifdef USE_SWM
+    &swm_online_comm_workload_method,
+#endif
+#ifdef USE_UNION
+    &conc_online_comm_workload_method,
 #endif
 #ifdef USE_RECORDER
     &recorder_io_workload_method,
diff --git a/src/workload/methods/codes-conc-online-comm-wrkld.C b/src/workload/methods/codes-conc-online-comm-wrkld.C
new file mode 100644
index 00000000..144db2d5
--- /dev/null
+++ b/src/workload/methods/codes-conc-online-comm-wrkld.C
@@ -0,0 +1,1887 @@
+/*
+ * Copyright (C) 2014 University of Chicago
+ * See COPYRIGHT notice in top-level directory.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <mpi.h>
+#include <ross.h>
+#include <assert.h>
+#include <deque>
+#include <iostream>
+#include <inttypes.h>
+#include <fstream>
+#include <boost/property_tree/json_parser.hpp>
+#include <boost/property_tree/ptree.hpp>
+#include <boost/foreach.hpp>
+#include "codes/codes-workload.h"
+#include "codes/quickhash.h"
+#include "codes/codes-jobmap.h"
+#include "codes_config.h"
+#include "union_util.h"
+
+//#ifdef USE_SWM
+#include "lammps.h"
+#include "nekbone_swm_user_code.h"
+#include "nearest_neighbor_swm_user_code.h"
+#include "all_to_one_swm_user_code.h"
+#include "milc_swm_user_code.h"
+//#endif
+
+#define ALLREDUCE_SHORT_MSG_SIZE 2048
+
+#define DBG_COMM 0
+
+#define THISMIN(a,b) ((a) < (b)) ? (a) : (b)
+
+using namespace std;
+
+static struct qhash_table *rank_tbl = NULL;
+static int rank_tbl_pop = 0;
+static int total_rank_cnt = 0;
+static ABT_thread global_prod_thread = NULL;
+static ABT_xstream self_es;
+static long cpu_freq = 1.0;
+static long num_allreduce = 0;
+static long num_isends = 0;
+static long num_irecvs = 0;
+static long num_barriers = 0;
+static long num_sends = 0;
+static long num_recvs = 0;
+static long num_sendrecv = 0;
+static long num_waitalls = 0;
+
+//static std::map<int64_t, int> send_count;
+//static std::map<int64_t, int> isend_count;
+//static std::map<int64_t, int> allreduce_count;
+
+struct shared_context {
+    int my_rank;
+    uint32_t wait_id;
+    int num_ranks;
+    char workload_name[MAX_NAME_LENGTH_WKLD];
+    void * swm_obj;
+    void * conc_params;
+    bool isconc;
+    ABT_thread      producer;
+    std::deque<struct codes_workload_op*> fifo;
+};
+
+struct rank_mpi_context {
+    struct qhash_head hash_link;
+    int app_id;
+    struct shared_context sctx;
+};
+
+typedef struct rank_mpi_compare {
+    int app_id;
+    int rank;
+} rank_mpi_compare;
+
+
+/* Conceptual online workload implementations */
+void UNION_MPI_Comm_size (UNION_Comm comm, int *size) 
+{
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err;
+
+    err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+
+    *size = sctx->num_ranks;
+    // printf("ranks %d\n", sctx->num_ranks);
+}
+
+void UNION_MPI_Comm_rank( UNION_Comm comm, int *rank ) 
+{
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err;
+
+    err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+
+    *rank = sctx->my_rank;
+}
+
+void UNION_MPI_Finalize()
+{
+    /* Add an event in the shared queue and then yield */
+    struct codes_workload_op wrkld_per_rank;
+
+    wrkld_per_rank.op_type = CODES_WK_END;
+
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    sctx->fifo.push_back(&wrkld_per_rank);
+
+    if(DBG_COMM){
+        printf("FINALIZE src %d\n", sctx->my_rank);
+        // printf("num_sends %ld num_recvs %ld num_isends %ld num_irecvs %ld num_allreduce %ld num_barrier %ld num_waitalls %ld\n", 
+        //         num_sends, num_recvs, num_isends, num_irecvs, num_allreduce, num_barriers, num_waitalls);
+        // printf("Rank %d yield to CODES thread: %p\n", sctx->my_rank, global_prod_thread);
+    }
+
+    ABT_thread_yield_to(global_prod_thread);
+}
+
+void UNION_Compute(long cycle_count)
+{
+    /* Add an event in the shared queue and then yield */
+    struct codes_workload_op wrkld_per_rank;
+
+    wrkld_per_rank.op_type = CODES_WK_DELAY;
+    wrkld_per_rank.u.delay.nsecs = cycle_count;
+    wrkld_per_rank.u.delay.seconds = (cycle_count) / (1000.0 * 1000.0 * 1000.0);
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    sctx->fifo.push_back(&wrkld_per_rank);
+    if(DBG_COMM){
+        printf("COMPUTE src %d: %ld ns\n", sctx->my_rank, cycle_count);
+    }
+    ABT_thread_yield_to(global_prod_thread);
+}
+
+void UNION_MPI_Send(const void *buf, 
+            int count, 
+            UNION_Datatype datatype, 
+            int dest, 
+            int tag,
+            UNION_Comm comm)
+{
+    /* add an event in the shared queue and then yield */
+    //    printf("\n Sending to rank %d ", comm_id);
+    struct codes_workload_op wrkld_per_rank;
+
+    int datatypesize;
+    MPI_Type_size(datatype, &datatypesize);
+
+    wrkld_per_rank.op_type = CODES_WK_SEND;
+    wrkld_per_rank.u.send.tag = tag;
+    wrkld_per_rank.u.send.count = count;
+    wrkld_per_rank.u.send.data_type = datatype;
+    wrkld_per_rank.u.send.num_bytes = count * datatypesize;
+    wrkld_per_rank.u.send.dest_rank = dest;
+
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    wrkld_per_rank.u.send.source_rank = sctx->my_rank;
+    sctx->fifo.push_back(&wrkld_per_rank);
+    if(DBG_COMM){
+        printf("SEND src %d dst %d: %lld bytes\n", sctx->my_rank, dest,
+                wrkld_per_rank.u.send.num_bytes);
+    // printf("Rank %d yield to CODES thread: %p\n", sctx->my_rank, global_prod_thread);
+    }
+    int rc = ABT_thread_yield_to(global_prod_thread);
+    num_sends++;    
+}
+
+void UNION_MPI_Recv(void *buf, 
+            int count, 
+            UNION_Datatype datatype, 
+            int source, 
+            int tag,
+            UNION_Comm comm, 
+            UNION_Status *status)
+{
+    /* Add an event in the shared queue and then yield */
+    struct codes_workload_op wrkld_per_rank;
+
+    int datatypesize;
+    MPI_Type_size(datatype, &datatypesize);
+
+    wrkld_per_rank.op_type = CODES_WK_RECV;
+    wrkld_per_rank.u.recv.tag = tag;
+    wrkld_per_rank.u.recv.source_rank = source;
+    wrkld_per_rank.u.recv.data_type = datatype;
+    wrkld_per_rank.u.recv.count = count;
+    wrkld_per_rank.u.recv.num_bytes = count * datatypesize;
+
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    wrkld_per_rank.u.recv.dest_rank = sctx->my_rank;
+    sctx->fifo.push_back(&wrkld_per_rank);
+    if(DBG_COMM){
+        printf("RECV src %d dst %d: %lld bytes\n", source, sctx->my_rank, 
+            wrkld_per_rank.u.recv.num_bytes);
+    // printf("Rank %d yield to CODES thread: %p\n", sctx->my_rank, global_prod_thread);
+    }
+
+    ABT_thread_yield_to(global_prod_thread);
+    num_recvs++;    
+}
+
+void UNION_MPI_Sendrecv(const void *sendbuf, 
+            int sendcount, 
+            UNION_Datatype sendtype,
+            int dest, 
+            int sendtag,
+            void *recvbuf, 
+            int recvcount, 
+            UNION_Datatype recvtype,
+            int source, 
+            int recvtag,
+            UNION_Comm comm, 
+            UNION_Status *status)
+{
+    /* sendrecv events */
+    struct codes_workload_op send_op;
+
+    int datatypesize1, datatypesize2;
+    MPI_Type_size(sendtype, &datatypesize1);
+    MPI_Type_size(recvtype, &datatypesize2);
+
+    send_op.op_type = CODES_WK_SEND;
+    send_op.u.send.tag = sendtag;
+    send_op.u.send.count = sendcount;
+    send_op.u.send.data_type = sendtype;
+    send_op.u.send.num_bytes = sendcount * datatypesize1;
+    send_op.u.send.dest_rank = dest;
+
+    struct codes_workload_op recv_op;
+
+    recv_op.op_type = CODES_WK_RECV;
+    recv_op.u.recv.tag = recvtag;
+    recv_op.u.recv.source_rank = source;
+    recv_op.u.recv.count = recvcount;
+    recv_op.u.recv.data_type = recvtype;
+    recv_op.u.recv.num_bytes = recvcount * datatypesize2;
+
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+
+    /* Add an event in the shared queue and then yield */
+    recv_op.u.recv.dest_rank = sctx->my_rank;
+    send_op.u.send.source_rank = sctx->my_rank;
+    sctx->fifo.push_back(&send_op);
+    sctx->fifo.push_back(&recv_op);
+    if(DBG_COMM){
+        printf("SENDRECV ssrc %d sdst %d: %lld bytes; rsrc %d rdst %d: %lld bytes\n", sctx->my_rank, dest,
+                send_op.u.send.num_bytes, source, sctx->my_rank, recv_op.u.recv.num_bytes);
+    }
+    ABT_thread_yield_to(global_prod_thread);
+    num_sendrecv++;
+}
+
+
+void UNION_MPI_Barrier(UNION_Comm comm)
+{
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err;
+    int rank, size, src, dest, mask;
+
+    err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+
+    rank = sctx->my_rank;
+    size = sctx->num_ranks;
+    mask = 0x1;
+
+    while(mask < size) {
+        dest = (rank + mask) % size;
+        src = (rank - mask + size) % size;
+
+        UNION_MPI_Sendrecv(NULL, 0, MPI_INT, dest, 1234, NULL, 0, MPI_INT, src, 1234,
+                comm, NULL);
+
+        mask <<= 1;
+    }
+    num_barriers++; 
+    if(DBG_COMM){
+        printf("BARRIER src %d\n", sctx->my_rank);
+    }
+}
+
+void UNION_MPI_Isend(const void *buf, 
+            int count, 
+            UNION_Datatype datatype, 
+            int dest, 
+            int tag,
+            UNION_Comm comm, 
+            UNION_Request *request)
+{
+    /* add an event in the shared queue and then yield */
+    //    printf("\n Sending to rank %d ", comm_id);
+    struct codes_workload_op wrkld_per_rank;
+
+    int datatypesize;
+    MPI_Type_size(datatype, &datatypesize);
+
+    wrkld_per_rank.op_type = CODES_WK_ISEND;
+    wrkld_per_rank.u.send.tag = tag;    
+    wrkld_per_rank.u.send.count = count;
+    wrkld_per_rank.u.send.data_type = datatype;
+    wrkld_per_rank.u.send.num_bytes = count * datatypesize;
+    wrkld_per_rank.u.send.dest_rank = dest;
+
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    wrkld_per_rank.u.send.source_rank = sctx->my_rank;
+    sctx->fifo.push_back(&wrkld_per_rank);
+
+    *request = sctx->wait_id;
+    wrkld_per_rank.u.send.req_id = *request;
+    sctx->wait_id++;
+    if(DBG_COMM){
+        printf("ISEND src %d dst %d: %lld bytes\n", sctx->my_rank, dest,
+                wrkld_per_rank.u.send.num_bytes);
+    }
+
+    ABT_thread_yield_to(global_prod_thread);
+    num_isends++;
+}
+
+void UNION_MPI_Irecv(void *buf, 
+            int count, 
+            UNION_Datatype datatype, 
+            int source, 
+            int tag,
+            UNION_Comm comm, 
+            UNION_Request *request)
+{
+    /* Add an event in the shared queue and then yield */
+    struct codes_workload_op wrkld_per_rank;
+
+    int datatypesize;
+    MPI_Type_size(datatype, &datatypesize);
+
+    wrkld_per_rank.op_type = CODES_WK_IRECV;
+    wrkld_per_rank.u.recv.tag = tag;
+    wrkld_per_rank.u.recv.source_rank = source;
+    wrkld_per_rank.u.recv.count = count;
+    wrkld_per_rank.u.recv.data_type = datatype;
+    wrkld_per_rank.u.recv.num_bytes = count * datatypesize;
+
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    wrkld_per_rank.u.recv.dest_rank = sctx->my_rank;
+    sctx->fifo.push_back(&wrkld_per_rank);
+    
+    *request = sctx->wait_id;
+    wrkld_per_rank.u.recv.req_id = *request;
+    sctx->wait_id++;
+    if(DBG_COMM){
+        printf("IRECV src %d dst %d: %lld bytes\n", source, sctx->my_rank, 
+                wrkld_per_rank.u.recv.num_bytes);    
+    }
+    ABT_thread_yield_to(global_prod_thread);
+    num_irecvs++;    
+}
+
+void UNION_MPI_Wait(UNION_Request *request,
+        UNION_Status *status)
+{
+    /* Add an event in the shared queue and then yield */
+    struct codes_workload_op wrkld_per_rank;
+
+    wrkld_per_rank.op_type = CODES_WK_WAIT;
+    wrkld_per_rank.u.wait.req_id = *(UNION_Request *)request;   
+
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    sctx->fifo.push_back(&wrkld_per_rank);
+    if(DBG_COMM){
+        printf("WAIT src %d\n",sctx->my_rank);    
+    }
+    ABT_thread_yield_to(global_prod_thread);       
+}
+
+void UNION_MPI_Waitall(int count, 
+            UNION_Request array_of_requests[], 
+            UNION_Status array_of_statuses[])
+{
+    num_waitalls++;
+    for(int i = 0; i < count; i++)
+        UNION_MPI_Wait(&array_of_requests[i], UNION_STATUSES_IGNORE);
+
+    if(DBG_COMM){
+        printf("WAITALL count %d\n", count);    
+    }  
+}
+
+void UNION_MPI_Reduce(const void *sendbuf, 
+            void *recvbuf, 
+            int count, 
+            UNION_Datatype datatype,
+            UNION_Op op, 
+            int root, 
+            UNION_Comm comm)
+{
+    //todo
+}
+
+void UNION_MPI_Allreduce(const void *sendbuf, 
+            void *recvbuf, 
+            int count, 
+            UNION_Datatype datatype,
+            UNION_Op op, 
+            UNION_Comm comm)
+{
+    int comm_size, rank, type_size, i, send_idx, recv_idx, last_idx, send_cnt, recv_cnt;
+    int pof2, mask, rem, newrank, newdst, dst, *cnts, *disps;
+
+    UNION_MPI_Comm_size(comm, &comm_size);
+    UNION_MPI_Comm_rank(comm, &rank);
+    MPI_Type_size(datatype, &type_size);
+
+    cnts = disps = NULL;
+    
+    pof2 = 1;
+    while (pof2 <= comm_size) pof2 <<= 1;
+    pof2 >>=1;
+
+    rem = comm_size - pof2;
+
+    /* In the non-power-of-two case, all even-numbered
+       processes of rank < 2*rem send their data to
+       (rank+1). These even-numbered processes no longer
+       participate in the algorithm until the very end. The
+       remaining processes form a nice power-of-two. */
+    if (rank < 2*rem) {
+        if (rank % 2 == 0) { /* even */
+            UNION_MPI_Send(NULL, count, datatype, rank+1, -1002, comm);
+            newrank = -1;
+        } else { /* odd */
+            UNION_MPI_Recv(NULL, count, datatype, rank-1, -1002, comm, NULL);
+            newrank = rank / 2;
+        }
+    } else {
+        newrank = rank - rem;
+    }
+
+    /* If op is user-defined or count is less than pof2, use
+       recursive doubling algorithm. Otherwise do a reduce-scatter
+       followed by allgather. (If op is user-defined,
+       derived datatypes are allowed and the user could pass basic
+       datatypes on one process and derived on another as long as
+       the type maps are the same. Breaking up derived
+       datatypes to do the reduce-scatter is tricky, therefore
+       using recursive doubling in that case.) */
+    if (newrank != -1) { 
+        if ((count*type_size <= 81920 ) || (count < pof2)) {
+            mask = 0x1;
+            while (mask < pof2) {
+                newdst = newrank ^ mask;
+                dst = (newdst < rem) ? newdst*2 + 1 : newdst + rem;
+
+                UNION_MPI_Sendrecv(NULL, count, datatype, dst, -1002, NULL, count, datatype, dst, -1002, comm, NULL);
+                mask <<= 1;
+            }
+        } else {
+            /* do a reduce-scatter followed by allgather */
+            /* for the reduce-scatter, calculate the count that
+            each process receives and the displacement within
+            the buffer */
+
+            cnts = (int*)malloc(pof2*sizeof(int));
+            disps = (int*)malloc(pof2*sizeof(int));
+            
+            for (i=0; i<(pof2-1); i++)
+                cnts[i] = count/pof2;
+            cnts[pof2-1] = count - (count/pof2)*(pof2-1);
+            
+            disps[0] = 0;
+            for (i=1; i<pof2; i++)
+                disps[i] = disps[i-1] + cnts[i-1];
+
+            mask = 0x1;
+            send_idx = recv_idx = 0;
+            last_idx = pof2;
+            while (mask < pof2) {
+                newdst = newrank ^ mask;
+                dst = (newdst < rem) ? newdst*2 + 1 : newdst + rem;
+                send_cnt = recv_cnt = 0;
+                if (newrank < newdst) {
+                    send_idx = recv_idx + pof2/(mask*2);
+                    for (i=send_idx; i<last_idx; i++)
+                        send_cnt += cnts[i];
+                    for (i=recv_idx; i<send_idx; i++)
+                        recv_cnt += cnts[i];
+                } else {
+                    recv_idx = send_idx + pof2/(mask*2);
+                    for (i=send_idx; i<recv_idx; i++)
+                        send_cnt += cnts[i];
+                    for (i=recv_idx; i<last_idx; i++)
+                        recv_cnt += cnts[i];
+                }
+
+                UNION_MPI_Sendrecv(NULL, send_cnt, datatype, dst, -1002, NULL, recv_cnt, datatype, dst, -1002, comm, NULL);
+
+                send_idx = recv_idx;
+                mask <<= 1;
+    
+                if(mask < pof2)
+                    last_idx = recv_idx + pof2/mask;
+            }
+        
+            /* now do the allgather */
+            mask >>= 1;
+            while (mask > 0) {
+                newdst = newrank ^ mask;
+                /* find real rank of dest */
+                dst = (newdst < rem) ? newdst*2 + 1 : newdst + rem;
+
+                send_cnt = recv_cnt = 0;
+                if (newrank < newdst) {
+                    if (mask != pof2/2)
+                        last_idx = last_idx + pof2/(mask*2);
+                
+                    recv_idx = send_idx + pof2/(mask*2);
+                    for (i=send_idx; i<recv_idx; i++)
+                        send_cnt += cnts[i];
+                    for (i=recv_idx; i<last_idx; i++)
+                        recv_cnt += cnts[i];
+                } else {
+                    recv_idx = send_idx - pof2/(mask*2);
+                    for (i=send_idx; i<last_idx; i++)
+                        send_cnt += cnts[i];
+                    for (i=recv_idx; i<send_idx; i++)
+                        recv_cnt += cnts[i];
+                }
+
+                UNION_MPI_Sendrecv(NULL, send_cnt, datatype, dst, -1002, NULL, recv_cnt, datatype, dst, -1002, comm, NULL);
+
+                if (newrank > newdst) send_idx = recv_idx;
+                mask >>= 1;
+            }
+        }
+    } 
+
+    if(rank < 2*rem) {
+        if(rank % 2) {/* odd */
+            UNION_MPI_Send(NULL, count, datatype, rank-1, -1002, comm);
+        } else {
+            UNION_MPI_Recv(NULL, count, datatype, rank+1, -1002, comm, NULL);
+        }
+    }
+
+    if(cnts) free(cnts);
+    if(disps) free(disps);    
+}
+
+
+void bcast_binomial(void *buffer,
+              int rank,
+              int count,
+              UNION_Datatype datatype,
+              int root,
+              UNION_Comm comm)
+{
+  int comm_size, src, dst, relative_rank, mask;
+  UNION_Status status;
+  UNION_MPI_Comm_size(comm, &comm_size);
+
+  relative_rank = (rank >= root) ? rank - root : rank - root + comm_size;
+
+  mask = 0x1;
+  while(mask < comm_size)
+  {
+    if(relative_rank & mask)
+    {
+      src = rank - mask;
+      if(src < 0) src += comm_size;
+      UNION_MPI_Recv(buffer,count,datatype,src,-1005,comm, &status);
+      break;
+    }
+    mask <<= 1;
+  }
+
+  mask >>=1;
+  while(mask > 0)
+  {
+    if(relative_rank + mask < comm_size)
+    {
+      dst = rank + mask;
+      if(dst >= comm_size) dst -= comm_size;
+      UNION_MPI_Send(buffer,count,datatype,dst,-1005,comm);
+    }
+    mask >>= 1;
+  }
+}
+
+void bcast_scatter_doubling_allgather(void *buffer,
+              int rank,
+              int count,
+              UNION_Datatype datatype,
+              int root,
+              UNION_Comm comm)
+{
+  int comm_size, dst, relative_rank, mask, scatter_size, curr_size, recvcount, recv_size = 0;
+  UNION_Status status;
+  int j, k, i, tmp_mask;
+  int type_size, nbytes = 0;
+  int relative_dst, dst_tree_root, my_tree_root, send_offset, recv_offset;
+
+  MPI_Type_size(datatype, &type_size);
+  UNION_MPI_Comm_size(comm, &comm_size);
+
+  relative_rank = (rank >= root) ? rank - root : rank - root + comm_size;
+  
+  if(comm_size == 1) return;
+
+  nbytes = type_size * count;
+  if(nbytes == 0) return;
+
+  scatter_size = (nbytes + comm_size - 1)/comm_size; /* ceiling division */
+  curr_size = THISMIN(scatter_size, (nbytes - (relative_rank * scatter_size)));
+
+  if (curr_size < 0) curr_size = 0;
+
+  mask = 0x1;
+  i = 0;
+
+  while(mask < comm_size) {
+    relative_dst = relative_rank ^ mask;
+    dst = (relative_dst + root) % comm_size;
+
+    dst_tree_root = relative_dst >> i;
+    dst_tree_root <<= i;
+
+    my_tree_root = relative_rank >> i;
+    my_tree_root <<= i;
+  
+    send_offset = my_tree_root * scatter_size;
+    recv_offset = dst_tree_root * scatter_size;
+
+    if(relative_dst < comm_size)
+    {
+      recvcount = (nbytes-recv_offset < 0 ? 0 : nbytes-recv_offset);
+      UNION_MPI_Sendrecv(buffer,curr_size,MPI_BYTE,dst,-1005,buffer,recvcount,MPI_BYTE,dst,-1005,comm,&status);
+      curr_size += recv_size;
+    }
+
+    mask <<= 1;
+    i++;
+  }
+}
+
+void bcast_scatter_ring_allgather(void *buffer,
+              int rank,
+              int count,
+              UNION_Datatype datatype,
+              int root,
+              UNION_Comm comm)
+{
+  int comm_size, scatter_size, j, i, nbytes, type_size;
+  int left, right, jnext, curr_size = 0;
+  int recvd_size;
+  UNION_Status status;
+
+  MPI_Type_size(datatype, &type_size);
+  UNION_MPI_Comm_size(comm, &comm_size);
+
+  if(comm_size == 1) return;
+
+  nbytes = type_size * count;
+  if (nbytes == 0) return;
+
+  scatter_size = (nbytes + comm_size - 1)/comm_size; /* ceiling division */
+
+  curr_size = THISMIN(scatter_size,  nbytes - ((rank - root + comm_size) % comm_size) * scatter_size);
+  if(curr_size < 0) curr_size = 0;
+
+  left  = (comm_size + rank - 1) % comm_size;
+  right = (rank + 1) % comm_size;
+  j = rank;
+  jnext = left;
+
+  for (i=1; i<comm_size; i++)
+  {
+    int left_count, right_count, left_disp, right_disp, rel_j, rel_jnext;
+    rel_j     = (j     - root + comm_size) % comm_size;
+    rel_jnext = (jnext - root + comm_size) % comm_size;
+    left_count = THISMIN(scatter_size, (nbytes - rel_jnext * scatter_size));
+    if(left_count < 0) left_count = 0;
+    left_disp = rel_jnext * scatter_size;
+    right_count = THISMIN(scatter_size, (nbytes - rel_j * scatter_size));
+    if(right_count < 0) right_count = 0;
+    right_disp = rel_j * scatter_size;
+
+    UNION_MPI_Sendrecv(buffer,right_count,MPI_BYTE,right,-1005,buffer,left_count,MPI_BYTE,left,-1005,comm,&status);  
+    curr_size += recvd_size;
+    j = jnext;
+    jnext = (comm_size + jnext - 1) % comm_size;
+  }
+}
+
+
+void UNION_MPI_Bcast(void *buffer, 
+            int count, 
+            UNION_Datatype datatype, 
+            int root, 
+            UNION_Comm comm)
+{
+    int type_size, comm_size, rank;
+    MPI_Type_size(datatype, &type_size);
+    UNION_MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    UNION_MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
+    int nbytes = count * type_size;
+
+    if((nbytes < 12288) || (comm_size < 8)) {
+    //use binomial algorithm
+    bcast_binomial(buffer,rank,count,datatype,root,comm);
+    } else if((nbytes < 524288) && !(comm_size & (comm_size - 1))) {
+    //use scatter followed by recursive doubling allgather
+    bcast_scatter_doubling_allgather(buffer,rank,count,datatype,root,comm);
+    } else {
+    //use scatter followed by ring allgather
+    bcast_scatter_ring_allgather(buffer,rank,count,datatype,root,comm);
+    }
+    if(DBG_COMM){
+        printf("BCAST src %d\n", root);    
+    }  
+}
+
+void UNION_MPI_Alltoallv(const void *sendbuf, 
+            const int *sendcounts, 
+            const int *sdispls,
+            UNION_Datatype sendtype, 
+            void *recvbuf, 
+            const int *recvcounts,
+            const int *rdispls, 
+            UNION_Datatype recvtype, 
+            UNION_Comm comm)
+{
+    int comm_size, i, j;
+    int dst, rank, req_cnt, req_num = 1;
+    int ii, ss, bblock;
+    int type_size;
+
+    bblock = 32; //equivalent of MPIR_CVAR_ALLTOALL_THROTTLE in Mpich
+
+    UNION_Status starray[2*bblock];
+    UNION_Request reqarray[2*bblock];
+
+    UNION_MPI_Comm_size(comm, &comm_size);
+    UNION_MPI_Comm_rank(comm, &rank);
+
+
+    for(ii=0; ii<comm_size; ii+=bblock) {
+
+        req_cnt = 0;
+        ss = comm_size-ii < bblock ? comm_size-ii : bblock;
+
+        for ( i=0; i<ss; i++ ) {
+            dst = (rank+i+ii) % comm_size;
+            if (recvcounts[dst]) {
+                req_num++; // hopefuly the program is not doing other requests at the same time...
+                reqarray[req_cnt] = req_num;
+                UNION_MPI_Irecv(NULL, recvcounts[dst], recvtype, dst, -1003, comm, &req_num);
+                req_cnt++;
+            }
+        }
+
+        for ( i=0; i<ss; i++ ) {
+            dst = (rank-i-ii+comm_size) % comm_size;
+            if (sendcounts[dst]) {
+                req_num++;
+                reqarray[req_cnt] = req_num;
+                UNION_MPI_Isend(NULL, sendcounts[dst], sendtype, dst, -1003, comm, &req_num);
+                req_cnt++;
+            }
+        }
+        // UNION_MPI_Waitall(req_cnt, reqarray, starray);
+        UNION_MPI_Barrier(comm);
+    } 
+}
+
+void UNION_MPI_Alltoall(const void *sendbuf, 
+            int sendcount, 
+            UNION_Datatype sendtype, 
+            void *recvbuf,
+            int recvcount, 
+            UNION_Datatype recvtype, 
+            UNION_Comm comm)
+{
+    int *sendcounts, *sdispls, *recvcounts, *rdispls;
+    int i, comm_size;
+    UNION_MPI_Comm_size(comm, &comm_size);
+
+    sendcounts = (int *)malloc( comm_size * sizeof(int) );
+    recvcounts = (int *)malloc( comm_size * sizeof(int) );
+    rdispls = (int *)malloc( comm_size * sizeof(int) );
+    sdispls = (int *)malloc( comm_size * sizeof(int) );
+ 
+    for (i=0; i<comm_size; i++) {
+        sendcounts[i] = sendcount;
+        recvcounts[i] = recvcount;
+        rdispls[i] = i * recvcount;
+        sdispls[i] = i * sendcount;
+    }
+    UNION_MPI_Alltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm);
+   
+    free( sdispls );
+    free( rdispls );
+    free( recvcounts );
+    free( sendcounts );
+}
+
+
+
+//#ifdef USE_SWM
+
+/*
+ * peer: the receiving peer id 
+ * comm_id: the communicator id being used
+ * tag: tag id 
+ * reqvc: virtual channel being used by the message (to be ignored)
+ * rspvc: virtual channel being used by the message (to be ignored)
+ * buf: the address of sender's buffer in memory
+ * bytes: number of bytes to be sent 
+ * reqrt and rsprt: routing types (to be ignored) */
+
+void SWM_Send(SWM_PEER peer,
+        SWM_COMM_ID comm_id,
+        SWM_TAG tag,
+        SWM_VC reqvc,
+        SWM_VC rspvc,
+        SWM_BUF buf,
+        SWM_BYTES bytes,
+        SWM_BYTES pktrspbytes,
+        SWM_ROUTING_TYPE reqrt,
+        SWM_ROUTING_TYPE rsprt)
+{
+    /* add an event in the shared queue and then yield */
+    //    printf("\n Sending to rank %d ", comm_id);
+    struct codes_workload_op wrkld_per_rank;
+
+    wrkld_per_rank.op_type = CODES_WK_SEND;
+    wrkld_per_rank.u.send.tag = tag;
+    wrkld_per_rank.u.send.num_bytes = bytes;
+    wrkld_per_rank.u.send.dest_rank = peer;
+
+#ifdef DBG_COMM
+/*    if(tag != 1235 && tag != 1234) 
+    {
+        auto it = send_count.find(bytes);
+        if(it == send_count.end())
+        {
+            send_count.insert(std::make_pair(bytes, 1));
+        }
+        else
+        {
+            it->second = it->second + 1;
+        }
+    }*/
+#endif
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    wrkld_per_rank.u.send.source_rank = sctx->my_rank;
+    sctx->fifo.push_back(&wrkld_per_rank);
+
+    if(DBG_COMM){
+        printf("SEND src %d dst %d: %lld bytes\n", sctx->my_rank, peer,
+                wrkld_per_rank.u.send.num_bytes);
+    // printf("Rank %d yield to CODES thread: %p\n", sctx->my_rank, global_prod_thread);
+    }
+
+    ABT_thread_yield_to(global_prod_thread);
+    num_sends++;
+}
+
+/*
+ * @param comm_id: communicator ID (For now, MPI_COMM_WORLD)
+ * reqvc and rspvc: virtual channel IDs for request and response (ignore for
+ * our purpose)
+ * buf: buffer location for the call (ignore for our purpose)
+ * reqrt and rsprt: routing types, ignore and use routing from config file instead. 
+ * */
+void SWM_Barrier(
+        SWM_COMM_ID comm_id,
+        SWM_VC reqvc,
+        SWM_VC rspvc,
+        SWM_BUF buf, 
+        SWM_UNKNOWN auto1,
+        SWM_UNKNOWN2 auto2,
+        SWM_ROUTING_TYPE reqrt, 
+        SWM_ROUTING_TYPE rsprt)
+{
+    /* Add an event in the shared queue and then yield */
+#if 0
+    struct codes_workload_op wrkld_per_rank;
+
+    wrkld_per_rank.op_type = CODES_WK_DELAY;
+    /* TODO: Check how to convert cycle count into delay? */
+    wrkld_per_rank.u.delay.nsecs = 0.1;
+
+#ifdef DBG_COMM
+    printf("\n Barrier delay %lf ", wrkld_per_rank.u.delay.nsecs);
+#endif
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    sctx->fifo.push_back(&wrkld_per_rank);
+
+    ABT_thread_yield_to(global_prod_thread);
+#endif
+#ifdef DBG_COMM
+//     printf("\n barrier ");
+#endif
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err;
+    int rank, size, src, dest, mask;
+
+    err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+
+    rank = sctx->my_rank;
+    size = sctx->num_ranks;
+    mask = 0x1;
+
+    while(mask < size) {
+        dest = (rank + mask) % size;
+        src = (rank - mask + size) % size;
+
+        SWM_Sendrecv(comm_id, dest, 1234, reqvc, rspvc, 0, 0, 0,
+                src,  1234, 0,  reqrt, rsprt);
+        mask <<= 1;
+    }
+    num_barriers++;
+}
+
+void SWM_Isend(SWM_PEER peer,
+        SWM_COMM_ID comm_id,
+        SWM_TAG tag,
+        SWM_VC reqvc,
+        SWM_VC rspvc,
+        SWM_BUF buf,
+        SWM_BYTES bytes,
+        SWM_BYTES pktrspbytes,
+        uint32_t * handle,
+        SWM_ROUTING_TYPE reqrt,
+        SWM_ROUTING_TYPE rsprt)
+{
+    /* add an event in the shared queue and then yield */
+    //    printf("\n Sending to rank %d ", comm_id);
+    struct codes_workload_op wrkld_per_rank;
+
+    wrkld_per_rank.op_type = CODES_WK_ISEND;
+    wrkld_per_rank.u.send.tag = tag;
+    wrkld_per_rank.u.send.num_bytes = bytes;
+    wrkld_per_rank.u.send.dest_rank = peer;
+
+#ifdef DBG_COMM
+/*    if(tag != 1235 && tag != 1234) 
+    {
+        auto it = isend_count.find(bytes);
+        if(it == isend_count.end())
+        {
+            isend_count.insert(std::make_pair(bytes, 1));
+        }
+        else
+        {
+            it->second = it->second + 1;
+        }
+    }*/
+#endif
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    wrkld_per_rank.u.send.source_rank = sctx->my_rank;
+    sctx->fifo.push_back(&wrkld_per_rank);
+
+    *handle = sctx->wait_id;
+    wrkld_per_rank.u.send.req_id = *handle;
+    sctx->wait_id++;
+
+    if(DBG_COMM){
+        printf("ISEND src %d dst %d: %lld bytes\n", sctx->my_rank, peer,
+                wrkld_per_rank.u.send.num_bytes);
+    }
+
+    ABT_thread_yield_to(global_prod_thread);
+    num_isends++;
+}
+void SWM_Recv(SWM_PEER peer,
+        SWM_COMM_ID comm_id,
+        SWM_TAG tag,
+        SWM_BUF buf)
+{
+    /* Add an event in the shared queue and then yield */
+    struct codes_workload_op wrkld_per_rank;
+
+    wrkld_per_rank.op_type = CODES_WK_RECV;
+    wrkld_per_rank.u.recv.tag = tag;
+    wrkld_per_rank.u.recv.source_rank = peer;
+    wrkld_per_rank.u.recv.num_bytes = 0;
+
+#ifdef DBG_COMM
+    //printf("\n recv op tag: %d source: %d ", tag, peer);
+#endif
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    wrkld_per_rank.u.recv.dest_rank = sctx->my_rank;
+    sctx->fifo.push_back(&wrkld_per_rank);
+
+    if(DBG_COMM){
+        printf("RECV src %d dst %d: %lld bytes\n", peer, sctx->my_rank, 
+                wrkld_per_rank.u.recv.num_bytes);    
+    }
+
+    ABT_thread_yield_to(global_prod_thread);
+    num_recvs++;
+}
+
+/* handle is for the request ID */
+void SWM_Irecv(SWM_PEER peer,
+        SWM_COMM_ID comm_id,
+        SWM_TAG tag,
+        SWM_BUF buf, 
+        uint32_t* handle)
+{
+    /* Add an event in the shared queue and then yield */
+    struct codes_workload_op wrkld_per_rank;
+
+    wrkld_per_rank.op_type = CODES_WK_IRECV;
+    wrkld_per_rank.u.recv.tag = tag;
+    wrkld_per_rank.u.recv.source_rank = peer;
+    wrkld_per_rank.u.recv.num_bytes = 0;
+
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    wrkld_per_rank.u.recv.dest_rank = sctx->my_rank;
+    sctx->fifo.push_back(&wrkld_per_rank);
+
+    
+    *handle = sctx->wait_id;
+    wrkld_per_rank.u.recv.req_id = *handle;
+    sctx->wait_id++;
+
+    if(DBG_COMM){
+        printf("IRECV src %d dst %d: %lld bytes\n", peer, sctx->my_rank, 
+                wrkld_per_rank.u.recv.num_bytes);    
+    }
+
+    ABT_thread_yield_to(global_prod_thread);
+    num_irecvs++;
+}
+
+void SWM_Compute(long cycle_count)
+{
+    //NM: noting that cpu_frequency has been loaded in comm_online_workload_load() as GHz, e.g. cpu_freq = 2.0 means 2.0GHz
+    if(!cpu_freq)
+        cpu_freq = 2.0;
+    /* Add an event in the shared queue and then yield */
+    struct codes_workload_op wrkld_per_rank;
+
+    double cpu_freq_hz = cpu_freq * 1000.0 * 1000.0 * 1000.0;
+    double delay_in_seconds = cycle_count / cpu_freq_hz;
+    double delay_in_ns = delay_in_seconds * 1000.0 * 1000.0 * 1000.0;
+
+    wrkld_per_rank.op_type = CODES_WK_DELAY;
+    /* TODO: Check how to convert cycle count into delay? */
+    wrkld_per_rank.u.delay.nsecs = delay_in_ns;
+    wrkld_per_rank.u.delay.seconds = delay_in_seconds;
+#ifdef DBG_COMM
+    // printf("\n Compute op delay: %f ", delay_in_ns);
+#endif
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    sctx->fifo.push_back(&wrkld_per_rank);
+    
+    ABT_thread_yield_to(global_prod_thread);
+
+}
+
+void SWM_Wait(uint32_t req_id)
+{
+    /* Add an event in the shared queue and then yield */
+    struct codes_workload_op wrkld_per_rank;
+
+    wrkld_per_rank.op_type = CODES_WK_WAIT;
+    /* TODO: Check how to convert cycle count into delay? */
+    wrkld_per_rank.u.wait.req_id = req_id;
+
+#ifdef DBG_COMM
+//    printf("\n wait op req_id: %"PRIu32"\n", req_id);
+//      printf("\n wait ");
+#endif
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    sctx->fifo.push_back(&wrkld_per_rank);
+
+    if(DBG_COMM){
+        printf("WAIT src %d\n",sctx->my_rank);    
+    }
+
+    ABT_thread_yield_to(global_prod_thread);
+}
+
+void SWM_Waitall(int len, uint32_t * req_ids)
+{
+    num_waitalls++;
+    /* Add an event in the shared queue and then yield */
+    struct codes_workload_op wrkld_per_rank;
+
+    wrkld_per_rank.op_type = CODES_WK_WAITALL;
+    /* TODO: Check how to convert cycle count into delay? */
+    wrkld_per_rank.u.waits.count = len;
+    wrkld_per_rank.u.waits.req_ids = (unsigned int*)calloc(len, sizeof(int));    
+
+    for(int i = 0; i < len; i++)
+        wrkld_per_rank.u.waits.req_ids[i] = req_ids[i];
+
+#ifdef DBG_COMM
+//    for(int i = 0; i < len; i++)
+//        printf("\n wait op len %d req_id: %"PRIu32"\n", len, req_ids[i]);
+#endif
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    sctx->fifo.push_back(&wrkld_per_rank);
+
+    if(DBG_COMM){
+        printf("WAITALL src %d: count %d\n",sctx->my_rank, len);    
+    }
+
+    ABT_thread_yield_to(global_prod_thread);
+}
+
+void SWM_Sendrecv(
+        SWM_COMM_ID comm_id,
+        SWM_PEER sendpeer,
+        SWM_TAG sendtag,
+        SWM_VC sendreqvc,
+        SWM_VC sendrspvc,
+        SWM_BUF sendbuf,
+        SWM_BYTES sendbytes,
+        SWM_BYTES pktrspbytes,
+        SWM_PEER recvpeer,
+        SWM_TAG recvtag,
+        SWM_BUF recvbuf,
+        SWM_ROUTING_TYPE reqrt,
+        SWM_ROUTING_TYPE rsprt)
+{
+    //    printf("\n Sending to %d receiving from %d ", sendpeer, recvpeer);
+    struct codes_workload_op send_op;
+
+    send_op.op_type = CODES_WK_SEND;
+    send_op.u.send.tag = sendtag;
+    send_op.u.send.num_bytes = sendbytes;
+    send_op.u.send.dest_rank = sendpeer;
+
+    /* Add an event in the shared queue and then yield */
+    struct codes_workload_op recv_op;
+
+    recv_op.op_type = CODES_WK_RECV;
+    recv_op.u.recv.tag = recvtag;
+    recv_op.u.recv.source_rank = recvpeer;
+    recv_op.u.recv.num_bytes = 0;
+
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    recv_op.u.recv.dest_rank = sctx->my_rank;
+    send_op.u.send.source_rank = sctx->my_rank;
+    sctx->fifo.push_back(&send_op);
+    sctx->fifo.push_back(&recv_op);
+
+    if(DBG_COMM){
+        printf("SENDRECV ssrc %d sdst %d: %d bytes; rsrc %d rdst %d: %lld bytes\n", sctx->my_rank, sendpeer,
+                sendbytes, recvpeer, sctx->my_rank, recv_op.u.recv.num_bytes);
+    }
+
+    ABT_thread_yield_to(global_prod_thread);
+    num_sendrecv++;
+}
+
+/* @param count: number of bytes in Allreduce
+ * @param respbytes: number of bytes to be sent in response (ignore for our
+ * purpose)
+ * $params comm_id: communicator ID (MPI_COMM_WORLD for our case)
+ * @param sendreqvc: virtual channel of the sender request (ignore for our
+ * purpose)
+ * @param sendrspvc: virtual channel of the response request (ignore for our
+ * purpose)
+ * @param sendbuf and rcvbuf: buffers for send and receive calls (ignore for
+ * our purpose) */
+void SWM_Allreduce(
+        SWM_BYTES count,
+        SWM_BYTES respbytes,
+        SWM_COMM_ID comm_id,
+        SWM_VC sendreqvc,
+        SWM_VC sendrspvc,
+        SWM_BUF sendbuf,
+        SWM_BUF rcvbuf)
+{
+#if 0
+    /* TODO: For now, simulate a constant delay for ALlreduce*/
+    //    printf("\n Allreduce bytes %d ", bytes);
+    /* Add an event in the shared queue and then yield */
+    struct codes_workload_op wrkld_per_rank;
+
+    wrkld_per_rank.op_type = CODES_WK_DELAY;
+    /* TODO: Check how to convert cycle count into delay? */
+    wrkld_per_rank.u.delay.nsecs = bytes + 0.1;
+
+#ifdef DBG_COMM
+    printf("\n Allreduce delay %lf ", wrkld_per_rank.u.delay.nsecs);
+#endif
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    sctx->fifo.push_back(&wrkld_per_rank);
+
+    ABT_thread_yield_to(global_prod_thread);
+#endif
+
+#ifdef DBG_COMM
+        /*
+        auto it = allreduce_count.find(count);
+        if(it == allreduce_count.end())
+        {
+            allreduce_count.insert(std::make_pair(count, 1));
+        }
+        else
+        {
+            it->second = it->second + 1;
+        }
+        */
+#endif
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+
+    int comm_size, i, send_idx, recv_idx, last_idx, send_cnt, recv_cnt;
+    int pof2, mask, rem, newrank, newdst, dst, *cnts, *disps;
+    int rank = sctx->my_rank;
+    comm_size = sctx->num_ranks;
+
+    cnts = disps = NULL;
+
+    pof2 = 1;
+    while (pof2 <= comm_size) pof2 <<= 1;
+    pof2 >>=1;
+
+    rem = comm_size - pof2;
+
+    /* In the non-power-of-two case, all even-numbered
+       processes of rank < 2*rem send their data to
+       (rank+1). These even-numbered processes no longer
+       participate in the algorithm until the very end. The
+       remaining processes form a nice power-of-two. */
+    if (rank < 2*rem) {
+        if (rank % 2 == 0) { /* even */
+            SWM_Send(rank+1, comm_id, 1235, sendreqvc, sendrspvc, 0, count, 1, 0, 0);
+            newrank = -1;
+        } else { /* odd */
+            SWM_Recv(rank-1, comm_id, 1235, 0);
+            newrank = rank / 2;
+        }
+    } else {
+        newrank = rank - rem;
+    }
+
+    /* If op is user-defined or count is less than pof2, use
+       recursive doubling algorithm. Otherwise do a reduce-scatter
+       followed by allgather. (If op is user-defined,
+       derived datatypes are allowed and the user could pass basic
+       datatypes on one process and derived on another as long as
+       the type maps are the same. Breaking up derived
+       datatypes to do the reduce-scatter is tricky, therefore
+       using recursive doubling in that case.) */
+    if (newrank != -1) {
+        if ((count <= ALLREDUCE_SHORT_MSG_SIZE) || (count < pof2)) {
+
+            mask = 0x1;
+            while (mask < pof2) {
+                newdst = newrank ^ mask;
+                dst = (newdst < rem) ? newdst*2 + 1 : newdst + rem;
+
+                SWM_Sendrecv(comm_id, dst, 1235, sendreqvc, sendrspvc, 0,
+                        count, 1, dst, 1235, 0, 0, 0);
+
+                mask <<= 1;
+            }
+        } else {
+            /* do a reduce-scatter followed by allgather */
+            /* for the reduce-scatter, calculate the count that
+               each process receives and the displacement within
+               the buffer */
+
+            cnts = (int*)malloc(pof2*sizeof(int));
+            disps = (int*)malloc(pof2*sizeof(int));
+
+            for (i=0; i<(pof2-1); i++)
+                cnts[i] = count/pof2;
+            cnts[pof2-1] = count - (count/pof2)*(pof2-1);
+
+            disps[0] = 0;
+            for (i=1; i<pof2; i++)
+                disps[i] = disps[i-1] + cnts[i-1];
+
+            mask = 0x1;
+            send_idx = recv_idx = 0;
+            last_idx = pof2;
+            while (mask < pof2) {
+                newdst = newrank ^ mask;
+                dst = (newdst < rem) ? newdst*2 + 1 : newdst + rem;
+                send_cnt = recv_cnt = 0;
+                if (newrank < newdst) {
+                    send_idx = recv_idx + pof2/(mask*2);
+                    for (i=send_idx; i<last_idx; i++)
+                        send_cnt += cnts[i];
+                    for (i=recv_idx; i<send_idx; i++)
+                        recv_cnt += cnts[i];
+                } else {
+                    recv_idx = send_idx + pof2/(mask*2);
+                    for (i=send_idx; i<recv_idx; i++)
+                        send_cnt += cnts[i];
+                    for (i=recv_idx; i<last_idx; i++)
+                        recv_cnt += cnts[i];
+                }
+
+                SWM_Sendrecv(comm_id, dst, 1235, sendreqvc, sendrspvc, 0,
+                        send_cnt, 1, dst, 1235, 0, 0, 0);
+
+                send_idx = recv_idx;
+                mask <<= 1;
+
+                if(mask < pof2)
+                    last_idx = recv_idx + pof2/mask;
+            }
+
+            /* now do the allgather */
+            mask >>= 1;
+            while (mask > 0) {
+                newdst = newrank ^ mask;
+                /* find real rank of dest */
+                dst = (newdst < rem) ? newdst*2 + 1 : newdst + rem;
+
+                send_cnt = recv_cnt = 0;
+                if (newrank < newdst) {
+                    if (mask != pof2/2)
+                        last_idx = last_idx + pof2/(mask*2);
+
+                    recv_idx = send_idx + pof2/(mask*2);
+                    for (i=send_idx; i<recv_idx; i++)
+                        send_cnt += cnts[i];
+                    for (i=recv_idx; i<last_idx; i++)
+                        recv_cnt += cnts[i];
+                } else {
+                    recv_idx = send_idx - pof2/(mask*2);
+                    for (i=send_idx; i<last_idx; i++)
+                        send_cnt += cnts[i];
+                    for (i=recv_idx; i<send_idx; i++)
+                        recv_cnt += cnts[i];
+                }
+
+                SWM_Sendrecv(comm_id, dst, 1235, sendreqvc, sendrspvc, 0,
+                        send_cnt, 1, dst, 1235, 0, 0, 0);
+
+                if (newrank > newdst) send_idx = recv_idx;
+
+                mask >>= 1;
+            }
+        }
+    }
+
+    if(rank < 2*rem) {
+        if(rank % 2) {/* odd */
+            SWM_Send(rank-1, comm_id, 1235, sendreqvc, sendrspvc, 0, count, 1, 0, 0);
+        } else {
+            SWM_Recv(rank+1, comm_id, 1235, 0);
+        }
+    }
+
+    if(cnts) free(cnts);
+    if(disps) free(disps);
+
+    num_allreduce++;
+}
+
+void SWM_Allreduce(
+        SWM_BYTES bytes,
+        SWM_BYTES respbytes,
+        SWM_COMM_ID comm_id,
+        SWM_VC sendreqvc,
+        SWM_VC sendrspvc,
+        SWM_BUF sendbuf,
+        SWM_BUF rcvbuf,
+        SWM_UNKNOWN auto1,
+        SWM_UNKNOWN2 auto2,
+        SWM_ROUTING_TYPE reqrt,
+        SWM_ROUTING_TYPE rsprt)
+{
+    SWM_Allreduce(bytes, respbytes, comm_id, sendreqvc, sendrspvc, sendbuf, rcvbuf);
+}
+
+void SWM_Finalize()
+{
+    /* Add an event in the shared queue and then yield */
+    struct codes_workload_op wrkld_per_rank;
+
+    wrkld_per_rank.op_type = CODES_WK_END;
+
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    sctx->fifo.push_back(&wrkld_per_rank);
+
+    if(DBG_COMM){
+        /*    
+        auto it = allreduce_count.begin();
+        for(; it != allreduce_count.end(); it++)
+        {
+            cout << "\n Allreduce " << it->first << " " << it->second;
+        }
+        
+        it = send_count.begin();
+        for(; it != send_count.end(); it++)
+        {
+            cout << "\n Send " << it->first << " " << it->second;
+        }
+        
+        it = isend_count.begin();
+        for(; it != isend_count.end(); it++)
+        {
+            cout << "\n isend " << it->first << " " << it->second;
+        }*/
+        printf("\n finalize workload for rank %d ", sctx->my_rank);
+        //printf("\n finalize workload for rank %d num_sends %ld num_recvs %ld num_isends %ld num_irecvs %ld num_allreduce %ld num_barrier %ld num_waitalls %ld", sctx->my_rank, num_sends, num_recvs, num_isends, num_irecvs, num_allreduce, num_barriers, num_waitalls);
+    }
+    ABT_thread_yield_to(global_prod_thread);
+}
+
+
+//#endif
+
+
+static int hash_rank_compare(void *key, struct qhash_head *link)
+{
+    rank_mpi_compare *in = (rank_mpi_compare*)key;
+    rank_mpi_context *tmp;
+
+    tmp = qhash_entry(link, rank_mpi_context, hash_link);
+    if (tmp->sctx.my_rank == in->rank && tmp->app_id == in->app_id)
+        return 1;
+    return 0;
+}
+static void workload_caller(void * arg)
+{
+    shared_context* sctx = static_cast<shared_context*>(arg);
+
+    // printf("\n workload name %s ", sctx->workload_name);
+    if(strncmp(sctx->workload_name, "conceptual", 10) == 0)
+    {
+        union_bench_param * conc_params = static_cast<union_bench_param*> (sctx->conc_params);
+        // printf("program: %s\n",conc_params->conc_program);
+        // printf("argc: %d\n",conc_params->conc_argc);
+        int i;
+        for (i=0; i<conc_params->conc_argc; i++){
+            conc_params->conc_argv[i] = conc_params->config_in[i];
+        }
+        // conc_params->argv = &conc_params->conc_argv;
+        union_conc_bench_load(conc_params->conc_program, 
+                        conc_params->conc_argc, 
+                        conc_params->conc_argv);
+    } else if(strcmp(sctx->workload_name, "lammps") == 0)
+    {
+        LAMMPS_SWM * lammps_swm = static_cast<LAMMPS_SWM*>(sctx->swm_obj);
+        lammps_swm->call();
+    }
+    else if(strcmp(sctx->workload_name, "nekbone") == 0) 
+    {
+        NEKBONESWMUserCode * nekbone_swm = static_cast<NEKBONESWMUserCode*>(sctx->swm_obj);
+        nekbone_swm->call();
+    }
+    else if(strcmp(sctx->workload_name, "milc") == 0)
+    {
+        MilcSWMUserCode * milc_swm = static_cast<MilcSWMUserCode*>(sctx->swm_obj);
+        milc_swm->call();
+    }
+    else if(strcmp(sctx->workload_name, "nearest_neighbor") == 0)
+    {
+       NearestNeighborSWMUserCode * nn_swm = static_cast<NearestNeighborSWMUserCode*>(sctx->swm_obj);
+       nn_swm->call();
+    }
+    else if(strcmp(sctx->workload_name, "incast") == 0 || strcmp(sctx->workload_name, "incast1") == 0 || strcmp(sctx->workload_name, "incast2") == 0)
+    {
+       AllToOneSWMUserCode * incast_swm = static_cast<AllToOneSWMUserCode*>(sctx->swm_obj);
+       incast_swm->call();
+    }
+}
+
+static int comm_online_workload_load(const char * params, int app_id, int rank)
+{
+    /* LOAD parameters from JSON file*/
+    online_comm_params * o_params = (online_comm_params*)params;
+    int nprocs = o_params->nprocs;
+
+    rank_mpi_context *my_ctx = new rank_mpi_context;
+    //my_ctx = (rank_mpi_context*)caloc(1, sizeof(rank_mpi_context));  
+    assert(my_ctx); 
+    my_ctx->sctx.my_rank = rank; 
+    my_ctx->sctx.num_ranks = nprocs;
+    my_ctx->sctx.wait_id = 0;
+    my_ctx->app_id = app_id;
+
+    // printf("my_ctx nprocs %d\n", my_ctx->sctx.num_ranks);
+
+    void** generic_ptrs;
+    int array_len = 1;
+    generic_ptrs = (void**)calloc(array_len,  sizeof(void*));
+    generic_ptrs[0] = (void*)&rank;
+
+    strcpy(my_ctx->sctx.workload_name, o_params->workload_name);
+    boost::property_tree::ptree root, child;
+    string swm_path, conc_path;
+    bool isconc=0;
+
+    // printf("workload name: %s\n", o_params->workload_name);
+    swm_path.append(SWM_DATAROOTDIR);
+    if(strcmp(o_params->workload_name, "lammps") == 0)
+    {
+        swm_path.append("/lammps_workload.json");
+    }
+    else if(strcmp(o_params->workload_name, "nekbone") == 0)
+    {
+        swm_path.append("/workload.json"); 
+    }
+    else if(strcmp(o_params->workload_name, "milc") == 0)
+    {
+        swm_path.append("/milc_skeleton.json");
+    }
+    else if(strcmp(o_params->workload_name, "nearest_neighbor") == 0)
+    {
+        swm_path.append("/skeleton.json"); 
+    }
+    else if(strcmp(o_params->workload_name, "incast") == 0)
+    {
+        swm_path.append("/incast.json"); 
+    }
+    else if(strcmp(o_params->workload_name, "incast1") == 0)
+    {
+        swm_path.append("/incast1.json"); 
+    }
+    else if(strcmp(o_params->workload_name, "incast2") == 0)
+    {
+        swm_path.append("/incast2.json"); 
+    }    
+    else if(strncmp(o_params->workload_name, "conceptual", 10) == 0)
+    {
+        conc_path.append(UNION_DATADIR);
+        conc_path.append("/conceptual.json");
+        isconc = 1;
+    }
+    else
+        tw_error(TW_LOC, "\n Undefined workload type %s ", o_params->workload_name);
+
+    // printf("\nUnion jason path %s\n", conc_path.c_str());
+    if(isconc){
+        try {
+            std::ifstream jsonFile(conc_path.c_str());
+            boost::property_tree::json_parser::read_json(jsonFile, root);
+
+            // printf("workload_name: %s\n", o_params->workload_name);
+            union_bench_param *tmp_params = (union_bench_param *) calloc(1, sizeof(union_bench_param));
+            strcpy(tmp_params->conc_program, &o_params->workload_name[11]);
+            child = root.get_child(tmp_params->conc_program);
+            tmp_params->conc_argc = child.get<int>("argc");
+            int i = 0;
+            BOOST_FOREACH(boost::property_tree::ptree::value_type &v, child.get_child("argv"))
+            {
+                assert(v.first.empty()); // array elements have no names
+                // tmp_params->conc_argv[i] = (char *) v.second.data().c_str();
+                strcpy(tmp_params->config_in[i], v.second.data().c_str());
+                i += 1;           
+            }
+            my_ctx->sctx.conc_params = (void*) tmp_params;
+            my_ctx->sctx.isconc = 1;
+        }
+        catch(std::exception & e)
+        {
+            printf("%s \n", e.what());
+            return -1;
+        }
+    }
+    else {
+        try {
+            std::ifstream jsonFile(swm_path.c_str());
+            boost::property_tree::json_parser::read_json(jsonFile, root);
+            uint32_t process_cnt = root.get<uint32_t>("jobs.size", 1);
+            cpu_freq = root.get<double>("jobs.cfg.cpu_freq") / 1e9; 
+        }
+        catch(std::exception & e)
+        {
+            printf("%s \n", e.what());
+            return -1;
+        }
+        my_ctx->sctx.isconc = 0;
+        if(strcmp(o_params->workload_name, "lammps") == 0)
+        {
+            LAMMPS_SWM * lammps_swm = new LAMMPS_SWM(root, generic_ptrs);
+            my_ctx->sctx.swm_obj = (void*)lammps_swm;
+        }
+        else if(strcmp(o_params->workload_name, "nekbone") == 0)
+        {
+            NEKBONESWMUserCode * nekbone_swm = new NEKBONESWMUserCode(root, generic_ptrs);
+            my_ctx->sctx.swm_obj = (void*)nekbone_swm;
+        }
+        else if(strcmp(o_params->workload_name, "milc") == 0)
+        {   
+            MilcSWMUserCode * milc_swm = new MilcSWMUserCode(root, generic_ptrs);
+            my_ctx->sctx.swm_obj = (void*)milc_swm;
+        }
+        else if(strcmp(o_params->workload_name, "nearest_neighbor") == 0)
+        {
+            NearestNeighborSWMUserCode * nn_swm = new NearestNeighborSWMUserCode(root, generic_ptrs);
+            my_ctx->sctx.swm_obj = (void*)nn_swm;
+        }
+        else if(strcmp(o_params->workload_name, "incast") == 0 || strcmp(o_params->workload_name, "incast1") == 0 || strcmp(o_params->workload_name, "incast2") == 0)
+        {
+            AllToOneSWMUserCode * incast_swm = new AllToOneSWMUserCode(root, generic_ptrs);
+            my_ctx->sctx.swm_obj = (void*)incast_swm;
+        }
+    }
+
+    if(global_prod_thread == NULL)
+    {
+        ABT_xstream_self(&self_es);
+        ABT_thread_self(&global_prod_thread);
+    }
+    ABT_thread_create_on_xstream(self_es, 
+            &workload_caller, (void*)&(my_ctx->sctx),
+            ABT_THREAD_ATTR_NULL, &(my_ctx->sctx.producer));
+
+    // printf("Rank %d create app thread %p\n", rank, my_ctx->sctx.producer);
+    rank_mpi_compare cmp;
+    cmp.app_id = app_id;
+    cmp.rank = rank;
+
+    if(!rank_tbl)
+    {
+        rank_tbl = qhash_init(hash_rank_compare, quickhash_64bit_hash, nprocs);
+        if(!rank_tbl)
+            return -1;
+    }
+    qhash_add(rank_tbl, &cmp, &(my_ctx->hash_link));
+    rank_tbl_pop++;
+
+    return 0;
+}
+
+static void comm_online_workload_get_next(int app_id, int rank, struct codes_workload_op * op)
+{
+    /* At this point, we will use the "call" function. The send/receive/wait
+     * definitions will be replaced by our own function definitions that will do a
+     * yield to argobots if an event is not available. */
+    /* if shared queue is empty then yield */
+
+    rank_mpi_context * temp_data;
+    struct qhash_head * hash_link = NULL;
+    rank_mpi_compare cmp;
+    cmp.rank = rank;
+    cmp.app_id = app_id;
+    hash_link = qhash_search(rank_tbl, &cmp);
+    if(!hash_link)
+    {
+        printf("\n not found for rank id %d , %d", rank, app_id);
+        op->op_type = CODES_WK_END;
+        return;
+    }
+    temp_data = qhash_entry(hash_link, rank_mpi_context, hash_link);
+    assert(temp_data);
+    while(temp_data->sctx.fifo.empty())
+    {
+        // printf("Rank %d fifo empty, yield to app %p\n", rank, temp_data->sctx.producer);
+        int rc = ABT_thread_yield_to(temp_data->sctx.producer); 
+    }
+    struct codes_workload_op * front_op = temp_data->sctx.fifo.front();
+    assert(front_op);
+    // printf("Pop op %d to CODES\n", front_op->op_type);
+    *op = *front_op;
+    temp_data->sctx.fifo.pop_front();
+    return;
+}
+static int comm_online_workload_get_rank_cnt(const char *params, int app_id)
+{
+    online_comm_params * o_params = (online_comm_params*)params;
+    int nprocs = o_params->nprocs;
+    return nprocs;
+}
+
+static int comm_online_workload_finalize(const char* params, int app_id, int rank)
+{
+    // printf("Rank %d: Finalize workload for app %d\n", rank, app_id);
+    rank_mpi_context * temp_data;
+    struct qhash_head * hash_link = NULL;
+    rank_mpi_compare cmp;
+    cmp.rank = rank;
+    cmp.app_id = app_id;
+    hash_link = qhash_search(rank_tbl, &cmp);
+    if(!hash_link)
+    {
+        printf("\n not found for rank id %d , %d", rank, app_id);
+        return -1;
+    }
+    temp_data = qhash_entry(hash_link, rank_mpi_context, hash_link);
+    assert(temp_data);
+
+    int rc;
+    rc = ABT_thread_join(temp_data->sctx.producer);    
+    // printf("thread terminate rc=%d\n", rc);
+    rc = ABT_thread_free(&(temp_data->sctx.producer));
+    // printf("thread free rc=%d\n", rc);
+    if (temp_data->sctx.isconc){
+        // printf("free conceptual params\n");
+        free(temp_data->sctx.conc_params);   
+    }                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                
+    return 0;
+}
+extern "C" {
+/* workload method name and function pointers for the CODES workload API */
+struct codes_workload_method conc_online_comm_workload_method =
+{
+    //.method_name =
+    (char*)"conc_online_comm_workload",
+    //.codes_workload_read_config = 
+    NULL,
+    //.codes_workload_load = 
+    comm_online_workload_load,
+    //.codes_workload_get_next = 
+    comm_online_workload_get_next,
+    // .codes_workload_get_next_rc2 = 
+    NULL,
+    // .codes_workload_get_rank_cnt
+    comm_online_workload_get_rank_cnt,
+    // .codes_workload_finalize = 
+    comm_online_workload_finalize
+};
+} // closing brace for extern "C"
+
diff --git a/src/workload/methods/codes-online-comm-wrkld.C b/src/workload/methods/codes-online-comm-wrkld.C
index 1ded7a92..7e012552 100644
--- a/src/workload/methods/codes-online-comm-wrkld.C
+++ b/src/workload/methods/codes-online-comm-wrkld.C
@@ -950,10 +950,10 @@ static int comm_online_workload_finalize(const char* params, int app_id, int ran
 }
 extern "C" {
 /* workload method name and function pointers for the CODES workload API */
-struct codes_workload_method online_comm_workload_method =
+struct codes_workload_method swm_online_comm_workload_method =
 {
     //.method_name =
-    (char*)"online_comm_workload",
+    (char*)"swm_online_comm_workload",
     //.codes_workload_read_config = 
     NULL,
     //.codes_workload_load = 

From 4e889d712e026102009199b1fda8352338576b94 Mon Sep 17 00:00:00 2001
From: Xin Wang <xwang149@hawk.iit.edu>
Date: Tue, 11 Apr 2023 12:10:16 -0500
Subject: [PATCH 026/188] add doc for union online workload

---
 doc/workload/union_online_workload.txt | 67 ++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 doc/workload/union_online_workload.txt

diff --git a/doc/workload/union_online_workload.txt b/doc/workload/union_online_workload.txt
new file mode 100644
index 00000000..1dcbac08
--- /dev/null
+++ b/doc/workload/union_online_workload.txt
@@ -0,0 +1,67 @@
+This document serves the following purposes:
+* CODES updates to accommodate Union online simulations
+* Installation tutorial
+* Known issues
+
+= CODES updates
+
+The code modifications are started with comment text "Xin:"
+
+== Header file
+
+Added parameters for collecting router traffic data, including:
+* codes/model-net.h
+* codes/net/dragonfly-custom.h
+* codes/net/dragonfly-dally.h
+
+== Makefile
+
+Added checking for Union installation in the autoconf configure script configure.ac
+Added src/workload/methods/codes-conc-online-comm-wrkld.C to code base if compile with Union in Makefile.am
+
+== Union online workload
+
+We add a pluggable workload module "src/workload/methods/codes-conc-online-comm-wrkld.C" into CODES workload generator to hold the actual implementation of Union communication events, such that the messages from Union skeletons can be emitted as simulation events in CODES.
+
+== Router status collection for dragonfly custom and dragonfly dally
+
+Added supportive functions for collecting traffic data on router port on the following network models:
+* dragonfly custom at src/networks/model-net/dragonfly-custom.C
+* dragonfly dally at src/networks/model-net/dragonfly-dally.C
+
+== Updates in MPI replay
+
+Added Union online workload type in MPI workload replay at src/network-workloads/model-net-mpi-replay.c
+
+== Configurations
+
+We added the following items in the CODES configuration file for collecting router traffic information during simulation.
+
+* counting_bool - flag to enable/disable the collection of trouter traffic
+* counting_start - the start time in microsecond for collecting traffic data
+* counting_interval - the time window size in microsecond for collection traffic data
+* counting_windows - the number of time windows for collecting traffic data
+* num_apps - the number of applications in the simulation workload
+* offset - supportive parameter for getting the application id of each packet 
+
+An example configuration can be found at: https://github.com/SPEAR-IIT/Union/blob/master/test/df1d-72-adp.conf
+
+= Installation tutorial
+
+Please follow the Readme at: https://github.com/SPEAR-IIT/Union to install Union and run test simulation of Union online workloads.
+
+= Completed Experiments 
+
+We have completed the following experiments with Union online workload simulation:
+* simulate Conceptual skeletons alone
+* simulate Conceptual and SWM skeletons simultaneously
+* simulate Conceptual and SWM skeletons simultaneously with different synthetic traffic patterns
+
+The above experiments have been done on both dragonfly custom and dragonfly dally network models, with sequential mode and optimistic mode.
+
+= Known Issues
+
+Currently the rendezvous protocol in MPI replay cannot work with Union online workloads.
+The reverse function router_buf_update_rc() does not take care of the cross window reverses for aggregated busytime on port.
+
+

From 72db8aaf8e9eb5669de12059406d6e4167c22ecb Mon Sep 17 00:00:00 2001
From: Xin Wang <xwang149@hawk.iit.edu>
Date: Tue, 11 Apr 2023 12:15:24 -0500
Subject: [PATCH 027/188] typo fix

---
 doc/workload/union_online_workload.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/workload/union_online_workload.txt b/doc/workload/union_online_workload.txt
index 1dcbac08..fcd9938f 100644
--- a/doc/workload/union_online_workload.txt
+++ b/doc/workload/union_online_workload.txt
@@ -1,6 +1,7 @@
 This document serves the following purposes:
 * CODES updates to accommodate Union online simulations
 * Installation tutorial
+* Completed Experiments
 * Known issues
 
 = CODES updates

From 45899ef76537c81d9a547ba29e5b867eb262d6ac Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 24 Apr 2023 12:23:47 -0400
Subject: [PATCH 028/188] Enabling surrogate switch even when tie-breaker
 mechanism is disabled

---
 src/util/rc-stack.c  |  10 +++-
 src/util/surrogate.c | 114 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/src/util/rc-stack.c b/src/util/rc-stack.c
index 7dcef16a..ebb2131f 100644
--- a/src/util/rc-stack.c
+++ b/src/util/rc-stack.c
@@ -16,7 +16,11 @@ enum rc_stack_mode {
 };
 
 typedef struct rc_entry_s {
+#ifdef USE_RAND_TIEBREAKER
     tw_event_sig e_sig; // ROSS 2D event timestamp (.recv_ts & .event_tiebreaker)
+#else
+    tw_stime time;
+#endif
     void * data;
     void (*free_fn)(void*);
     struct qlist_head ql;
@@ -63,7 +67,11 @@ void rc_stack_push(
     if (s->mode != RC_NONOPT || free_fn == NULL) {
         rc_entry * ent = (rc_entry*)malloc(sizeof(*ent));
         assert(ent);
-        ent->e_sig = tw_now_sig(lp);  // NOTE(helq): This should fail if USE_RAND_TIEBREAKER is deactivated, shouldn't it?
+#ifdef USE_RAND_TIEBREAKER
+        ent->e_sig = tw_now_sig(lp);
+#else
+        ent->time = tw_now(lp);
+#endif
         ent->data = data;
         ent->free_fn = free_fn;
         qlist_add_tail(&ent->ql, &s->head);
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index b6c07385..f03e49bb 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -211,6 +211,7 @@ static inline bool does_any_pe(bool val) {
 //}
 
 
+#ifdef USE_RAND_TIEBREAKER
 static void rollback_and_cancel_events_pe(tw_pe * pe, tw_event_sig gvt) {
     // Backtracking the simulation to GVT
     //double const switch_ = switch_at.time_stampts[switch_at.current_i];
@@ -219,6 +220,16 @@ static void rollback_and_cancel_events_pe(tw_pe * pe, tw_event_sig gvt) {
         tw_kp_rollback_to_sig(g_tw_kp[i], gvt);
     }
     assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
+#else
+static void rollback_and_cancel_events_pe(tw_pe * pe, tw_stime gvt) {
+    // Backtracking the simulation to GVT
+    //double const switch_ = switch_at.time_stampts[switch_at.current_i];
+    for (unsigned int i = 0; i < g_tw_nkp; i++) {
+        //tw_event_sig const smallest = find_sig_smallest_larger_than(switch_, g_tw_kp[i], gvt);
+        tw_kp_rollback_to(g_tw_kp[i], gvt);
+    }
+    assert(pe->GVT == gvt);
+#endif
 
     // Making sure that everything gets cleaned up properly (AVL tree should be empty by the end)
     do {
@@ -234,7 +245,11 @@ static void rollback_and_cancel_events_pe(tw_pe * pe, tw_event_sig gvt) {
         tw_gvt_step2(pe);
 
         if (DEBUG_DIRECTOR > 1) {
+#ifdef USE_RAND_TIEBREAKER
             printf("PE %lu: Time stamp at the end of GVT time: %e - AVL-tree sized: %d\n", g_tw_mynode, pe->GVT_sig.recv_ts, pe->avl_tree_size);
+#else
+            printf("PE %lu: Time stamp at the end of GVT time: %e - AVL-tree sized: %d\n", g_tw_mynode, pe->GVT, pe->avl_tree_size);
+#endif
         }
     } while (does_any_pe(pe->cancel_q != NULL) || does_any_pe(pe->event_q.size != 0));
 
@@ -243,7 +258,11 @@ static void rollback_and_cancel_events_pe(tw_pe * pe, tw_event_sig gvt) {
     }
 }
 
+#ifdef USE_RAND_TIEBREAKER
 static void shift_events_to_future_pe(tw_pe * pe, tw_event_sig gvt) {
+#else
+static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) {
+#endif
     tw_event * next_event = tw_pq_dequeue(pe->pq);
 
     // If there aren't any events left to process, the simulation has already finished and we have nothing to do
@@ -256,7 +275,11 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_event_sig gvt) {
     double switch_offset = g_tw_ts_end;
     if (switch_at.current_i + 1 < switch_at.total) {
         double const next_switch = switch_at.time_stampts[switch_at.current_i + 1];
+#ifdef USE_RAND_TIEBREAKER
         double const pre_switch_time = gvt.recv_ts;  // pe->GVT_sig.recv_ts;
+#else
+        double const pre_switch_time = gvt;
+#endif
         switch_offset = next_switch - pre_switch_time;
         assert(pre_switch_time < next_switch);
         //printf("gvt=%f next_switch=%f switch_offset=%f\n", pre_switch_time, next_switch, switch_offset);
@@ -268,7 +291,11 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_event_sig gvt) {
     while (next_event) {
         // Filtering events to freeze
         assert(next_event->prev == NULL);
+#ifdef USE_RAND_TIEBREAKER
         assert(tw_event_sig_compare(next_event->sig, gvt) >= 0);
+#else
+        assert(next_event->recv_ts >= gvt);
+#endif
 
         // finding out lp type
         char const * lp_type_name;
@@ -279,11 +306,17 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_event_sig gvt) {
         // shifting time stamps to the future for events to freeze
         if (lp_type_switch && lp_type_switch->should_event_be_frozen
                 && lp_type_switch->should_event_be_frozen(next_event->dest_lp, next_event)) {
+#ifdef USE_RAND_TIEBREAKER
             assert(next_event->recv_ts == next_event->sig.recv_ts);
             next_event->recv_ts += switch_offset;
             next_event->sig.recv_ts = next_event->recv_ts;
         }
         assert(next_event->recv_ts >= g_tw_trigger_arbitrary_fun.sig_at.recv_ts);
+#else
+            next_event->recv_ts += switch_offset;
+        }
+        assert(next_event->recv_ts >= g_tw_trigger_arbitrary_fun.at);
+#endif
 
         // store event in deque_events to inject immediately back to the queue
         next_event->prev = dequed_events;
@@ -320,6 +353,7 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_event_sig gvt) {
 // - Looking at all events in the PE, "freezing" those in the network model
 //   and letting the workload events be processed further
 // - Going through every LP and calling their respective functions
+#ifdef USE_RAND_TIEBREAKER
 static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_event_sig gvt) {
     if (g_tw_synchronization_protocol != OPTIMISTIC && g_tw_synchronization_protocol != SEQUENTIAL) {
         tw_error(TW_LOC, "Sorry, sending packets to the future hasn't been implement in this mode");
@@ -331,6 +365,19 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_event_sig gvt) {
         //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0);
         assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
     }
+#else
+static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_stime gvt) {
+    if (g_tw_synchronization_protocol != OPTIMISTIC && g_tw_synchronization_protocol != SEQUENTIAL) {
+        tw_error(TW_LOC, "Sorry, sending packets to the future hasn't been implement in this mode");
+    }
+
+    if (g_tw_synchronization_protocol == OPTIMISTIC) {
+        assert(pe->GVT == gvt);
+        rollback_and_cancel_events_pe(pe, gvt);
+        //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0);
+        assert(pe->GVT == gvt);
+    }
+#endif
 
     shift_events_to_future_pe(pe, gvt);
 
@@ -342,7 +389,11 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_event_sig gvt) {
         // Modifying current time for LPs (technically, KPs) so that they
         // coincide with current GVT (the current GVT often does not
         // correspond to the (last) time stored in KPs).
+#ifdef USE_RAND_TIEBREAKER
         lp->kp->last_sig = gvt;
+#else
+        lp->kp->last_time = gvt;
+#endif
 
         char const * lp_type_name;
         int rep_id, offset; // unused
@@ -367,6 +418,7 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_event_sig gvt) {
 }
 
 
+#ifdef USE_RAND_TIEBREAKER
 static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_event_sig gvt) {
     (void) pe;
 
@@ -376,6 +428,17 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_event_sig gvt) {
         //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0);
         assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
     }
+#else
+static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_stime gvt) {
+    (void) pe;
+
+    if (g_tw_synchronization_protocol == OPTIMISTIC) {
+        assert(pe->GVT == gvt);
+        rollback_and_cancel_events_pe(pe, gvt);
+        //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0);
+        assert(pe->GVT == gvt);
+    }
+#endif
 
     // Going through all LPs in PE and running their specific functions
     for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {
@@ -385,8 +448,13 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_event_sig gvt) {
         // Modifying current time for LPs (technically, KPs) so that they
         // coincide with current GVT (the current GVT often does not
         // correspond to the (last) time stored in KPs).
+#ifdef USE_RAND_TIEBREAKER
         tw_event_sig const previous_sig = lp->kp->last_sig;
         lp->kp->last_sig = gvt;
+#else
+        tw_stime const previous_time = lp->kp->last_time;
+        lp->kp->last_time = gvt;
+#endif
 
         char const * lp_type_name;
         int rep_id, offset; // unused
@@ -408,12 +476,20 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_event_sig gvt) {
             }
         }
 
+#ifdef USE_RAND_TIEBREAKER
         lp->kp->last_sig = previous_sig;
+#else
+        lp->kp->last_time = previous_time;
+#endif
     }
 }
 
 
+#ifdef USE_RAND_TIEBREAKER
 static void director_fun(tw_pe * pe, tw_event_sig gvt) {
+#else
+static void director_fun(tw_pe * pe, tw_stime gvt) {
+#endif
     assert(is_surrogate_configured);
 
     static int i = 0;
@@ -423,8 +499,13 @@ static void director_fun(tw_pe * pe, tw_event_sig gvt) {
             fflush(stdout);
         }
         if (DEBUG_DIRECTOR == 3) {
+#ifdef USE_RAND_TIEBREAKER
             printf("GVT %d at %f in %s arbitrary-fun-status=", i++, gvt.recv_ts,
                     surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition");
+#else
+            printf("GVT %d at %f in %s arbitrary-fun-status=", i++, gvt,
+                    surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition");
+#endif
 
             switch (g_tw_trigger_arbitrary_fun.active) {
                 case ARBITRARY_FUN_enabled:
@@ -440,6 +521,7 @@ static void director_fun(tw_pe * pe, tw_event_sig gvt) {
         }
     }
 
+#ifdef USE_RAND_TIEBREAKER
     // Only in sequential mode pe->GVT does not carry the current gvt, while it does in conservative and optimistic
     assert((g_tw_synchronization_protocol == SEQUENTIAL) || (pe->GVT_sig.recv_ts == gvt.recv_ts));
 
@@ -458,6 +540,25 @@ static void director_fun(tw_pe * pe, tw_event_sig gvt) {
     } else {
         return;
     }
+#else
+    // Only in sequential mode pe->GVT does not carry the current gvt, while it does in conservative and optimistic
+    assert((g_tw_synchronization_protocol == SEQUENTIAL) || (pe->GVT == gvt));
+
+    // Do not process if the simulation ended
+    if (gvt >= g_tw_ts_end) {
+        return;
+    }
+
+    // Detecting if we are going to switch
+    if (switch_at.current_i < switch_at.total
+            && g_tw_trigger_arbitrary_fun.active == ARBITRARY_FUN_triggered) {
+        double const switch_time = switch_at.time_stampts[switch_at.current_i];
+        assert(g_tw_trigger_arbitrary_fun.at == switch_at.time_stampts[switch_at.current_i]);
+        assert(gvt >= switch_time);  // current gvt shouldn't be that far ahead from the point we wanted to trigger it
+    } else {
+        return;
+    }
+#endif
 
     double const start = tw_clock_read();
     // Asking the director/model to switch
@@ -465,7 +566,11 @@ static void director_fun(tw_pe * pe, tw_event_sig gvt) {
         if (DEBUG_DIRECTOR == 2) {
             printf("\n");
         }
+#ifdef USE_RAND_TIEBREAKER
         printf("Switching at %g", gvt.recv_ts);
+#else
+        printf("Switching at %g", gvt);
+#endif
     }
     surr_config.director.switch_surrogate();
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
@@ -487,10 +592,15 @@ static void director_fun(tw_pe * pe, tw_event_sig gvt) {
     if (++switch_at.current_i < switch_at.total) {
         double const next_switch = switch_at.time_stampts[switch_at.current_i];
         // Setting trigger for next switch
+#ifdef USE_RAND_TIEBREAKER
         tw_event_sig time_stamp = {0};
         time_stamp.recv_ts = next_switch;
         //printf("Adding a trigger to activate next switch!\n");
         tw_trigger_arbitrary_fun_at(time_stamp);
+#else
+        //printf("Adding a trigger to activate next switch!\n");
+        tw_trigger_arbitrary_fun_at(next_switch);
+#endif
     }
 
     if (DEBUG_DIRECTOR == 1 && g_tw_mynode == 0) {
@@ -548,9 +658,13 @@ void surrogate_configure(
         // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT
         g_tw_gvt_arbitrary_fun = director_fun;
 
+#ifdef USE_RAND_TIEBREAKER
         tw_event_sig time_stamp = {0};
         time_stamp.recv_ts = switch_at.time_stampts[0];
         tw_trigger_arbitrary_fun_at(time_stamp);
+#else
+        tw_trigger_arbitrary_fun_at(switch_at.time_stampts[0]);
+#endif
 
         // freeing timestamps before it dissapears
         for (size_t i = 0; i < len; i++) {

From 27f9e9e6c02b3a6a3eb5d2dd9f400e3cd4f2c08f Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 3 May 2023 15:51:47 -0400
Subject: [PATCH 029/188] Extending functionality to compile and run without
 tie-breaker mechanism

---
 src/networks/model-net/dragonfly-dally.C |  3 +
 src/util/surrogate.c                     | 74 +++++++++---------------
 2 files changed, 29 insertions(+), 48 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index ce59dfe1..fade296b 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -4638,6 +4638,8 @@ static void process_terminal_notification_event(terminal_state * s, tw_bf * bf,
 // This function triggers an event that is completely ignored when processed later. The number of events produced by a terminal/router DOES alter the simulation results. (The number of events processed by an LP shouldn't be a parameter to the simulation itself, but it is weirdly).
 static void vacuous_msg_to_itself(terminal_state * s, terminal_dally_message * msg, tw_lp * lp)
 {
+    (void) s;
+    (void) msg;
     terminal_dally_message * new_msg;
     tw_event *e = model_net_method_event_new(
             lp->gid, g_tw_lookahead, lp, DRAGONFLY_DALLY, (void**)&new_msg, NULL);
@@ -4650,6 +4652,7 @@ static void vacuous_msg_to_itself(terminal_state * s, terminal_dally_message * m
 //used by packet_arrive()
 static void send_remote_event(terminal_state * s, terminal_dally_message * msg, tw_lp * lp, tw_bf * bf, char * event_data, int remote_event_size)
 {
+    (void) s;
     void * tmp_ptr = model_net_method_get_edata(DRAGONFLY_DALLY, msg);
     
     tw_stime ts = 0;
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index f03e49bb..c5c7b21d 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -212,20 +212,18 @@ static inline bool does_any_pe(bool val) {
 
 
 #ifdef USE_RAND_TIEBREAKER
-static void rollback_and_cancel_events_pe(tw_pe * pe, tw_event_sig gvt) {
+static void rollback_and_cancel_events_pe(tw_pe * pe, tw_event_sig gvt_sig) {
+    tw_stime const gvt = gvt_sig.recv_ts;
     // Backtracking the simulation to GVT
-    //double const switch_ = switch_at.time_stampts[switch_at.current_i];
     for (unsigned int i = 0; i < g_tw_nkp; i++) {
-        //tw_event_sig const smallest = find_sig_smallest_larger_than(switch_, g_tw_kp[i], gvt);
-        tw_kp_rollback_to_sig(g_tw_kp[i], gvt);
+        tw_kp_rollback_to_sig(g_tw_kp[i], gvt_sig);
     }
-    assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
+    assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0);
+    assert(pe->GVT_sig.recv_ts == gvt);  // redundant but needed because compiler cries that gvt is never used
 #else
 static void rollback_and_cancel_events_pe(tw_pe * pe, tw_stime gvt) {
     // Backtracking the simulation to GVT
-    //double const switch_ = switch_at.time_stampts[switch_at.current_i];
     for (unsigned int i = 0; i < g_tw_nkp; i++) {
-        //tw_event_sig const smallest = find_sig_smallest_larger_than(switch_, g_tw_kp[i], gvt);
         tw_kp_rollback_to(g_tw_kp[i], gvt);
     }
     assert(pe->GVT == gvt);
@@ -245,11 +243,7 @@ static void rollback_and_cancel_events_pe(tw_pe * pe, tw_stime gvt) {
         tw_gvt_step2(pe);
 
         if (DEBUG_DIRECTOR > 1) {
-#ifdef USE_RAND_TIEBREAKER
-            printf("PE %lu: Time stamp at the end of GVT time: %e - AVL-tree sized: %d\n", g_tw_mynode, pe->GVT_sig.recv_ts, pe->avl_tree_size);
-#else
-            printf("PE %lu: Time stamp at the end of GVT time: %e - AVL-tree sized: %d\n", g_tw_mynode, pe->GVT, pe->avl_tree_size);
-#endif
+            printf("PE %lu: Time stamp at the end of GVT time: %e - AVL-tree sized: %d\n", g_tw_mynode, gvt, pe->avl_tree_size);
         }
     } while (does_any_pe(pe->cancel_q != NULL) || does_any_pe(pe->event_q.size != 0));
 
@@ -259,7 +253,8 @@ static void rollback_and_cancel_events_pe(tw_pe * pe, tw_stime gvt) {
 }
 
 #ifdef USE_RAND_TIEBREAKER
-static void shift_events_to_future_pe(tw_pe * pe, tw_event_sig gvt) {
+static void shift_events_to_future_pe(tw_pe * pe, tw_event_sig gvt_sig) {
+    tw_stime gvt = gvt_sig.recv_ts;  // pe->GVT_sig.recv_ts;
 #else
 static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) {
 #endif
@@ -275,11 +270,7 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) {
     double switch_offset = g_tw_ts_end;
     if (switch_at.current_i + 1 < switch_at.total) {
         double const next_switch = switch_at.time_stampts[switch_at.current_i + 1];
-#ifdef USE_RAND_TIEBREAKER
-        double const pre_switch_time = gvt.recv_ts;  // pe->GVT_sig.recv_ts;
-#else
         double const pre_switch_time = gvt;
-#endif
         switch_offset = next_switch - pre_switch_time;
         assert(pre_switch_time < next_switch);
         //printf("gvt=%f next_switch=%f switch_offset=%f\n", pre_switch_time, next_switch, switch_offset);
@@ -292,7 +283,7 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) {
         // Filtering events to freeze
         assert(next_event->prev == NULL);
 #ifdef USE_RAND_TIEBREAKER
-        assert(tw_event_sig_compare(next_event->sig, gvt) >= 0);
+        assert(tw_event_sig_compare(next_event->sig, gvt_sig) >= 0);
 #else
         assert(next_event->recv_ts >= gvt);
 #endif
@@ -486,7 +477,8 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_stime gvt) {
 
 
 #ifdef USE_RAND_TIEBREAKER
-static void director_fun(tw_pe * pe, tw_event_sig gvt) {
+static void director_fun(tw_pe * pe, tw_event_sig gvt_sig) {
+    tw_stime const gvt = gvt_sig.recv_ts;
 #else
 static void director_fun(tw_pe * pe, tw_stime gvt) {
 #endif
@@ -499,13 +491,8 @@ static void director_fun(tw_pe * pe, tw_stime gvt) {
             fflush(stdout);
         }
         if (DEBUG_DIRECTOR == 3) {
-#ifdef USE_RAND_TIEBREAKER
-            printf("GVT %d at %f in %s arbitrary-fun-status=", i++, gvt.recv_ts,
-                    surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition");
-#else
             printf("GVT %d at %f in %s arbitrary-fun-status=", i++, gvt,
                     surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition");
-#endif
 
             switch (g_tw_trigger_arbitrary_fun.active) {
                 case ARBITRARY_FUN_enabled:
@@ -521,28 +508,12 @@ static void director_fun(tw_pe * pe, tw_stime gvt) {
         }
     }
 
-#ifdef USE_RAND_TIEBREAKER
     // Only in sequential mode pe->GVT does not carry the current gvt, while it does in conservative and optimistic
-    assert((g_tw_synchronization_protocol == SEQUENTIAL) || (pe->GVT_sig.recv_ts == gvt.recv_ts));
-
-    // Do not process if the simulation ended
-    if (gvt.recv_ts >= g_tw_ts_end) {
-        return;
-    }
-
-    // Detecting if we are going to switch
-    if (switch_at.current_i < switch_at.total
-            && g_tw_trigger_arbitrary_fun.active == ARBITRARY_FUN_triggered) {
-        double const now = gvt.recv_ts;
-        double const switch_time = switch_at.time_stampts[switch_at.current_i];
-        assert(g_tw_trigger_arbitrary_fun.sig_at.recv_ts == switch_at.time_stampts[switch_at.current_i]);
-        assert(now >= switch_time);  // current gvt shouldn't be that far ahead from the point we wanted to trigger it
-    } else {
-        return;
-    }
+#ifdef USE_RAND_TIEBREAKER
+    assert((g_tw_synchronization_protocol == SEQUENTIAL) || (pe->GVT_sig.recv_ts == gvt));
 #else
-    // Only in sequential mode pe->GVT does not carry the current gvt, while it does in conservative and optimistic
     assert((g_tw_synchronization_protocol == SEQUENTIAL) || (pe->GVT == gvt));
+#endif
 
     // Do not process if the simulation ended
     if (gvt >= g_tw_ts_end) {
@@ -553,12 +524,15 @@ static void director_fun(tw_pe * pe, tw_stime gvt) {
     if (switch_at.current_i < switch_at.total
             && g_tw_trigger_arbitrary_fun.active == ARBITRARY_FUN_triggered) {
         double const switch_time = switch_at.time_stampts[switch_at.current_i];
+#ifdef USE_RAND_TIEBREAKER
+        assert(g_tw_trigger_arbitrary_fun.sig_at.recv_ts == switch_at.time_stampts[switch_at.current_i]);
+#else
         assert(g_tw_trigger_arbitrary_fun.at == switch_at.time_stampts[switch_at.current_i]);
+#endif
         assert(gvt >= switch_time);  // current gvt shouldn't be that far ahead from the point we wanted to trigger it
     } else {
         return;
     }
-#endif
 
     double const start = tw_clock_read();
     // Asking the director/model to switch
@@ -566,11 +540,7 @@ static void director_fun(tw_pe * pe, tw_stime gvt) {
         if (DEBUG_DIRECTOR == 2) {
             printf("\n");
         }
-#ifdef USE_RAND_TIEBREAKER
-        printf("Switching at %g", gvt.recv_ts);
-#else
         printf("Switching at %g", gvt);
-#endif
     }
     surr_config.director.switch_surrogate();
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
@@ -581,10 +551,18 @@ static void director_fun(tw_pe * pe, tw_stime gvt) {
     if (freeze_network_on_switch) {
         if (surr_config.director.is_surrogate_on()) {
             model_net_method_switch_to_surrogate();
+#ifdef USE_RAND_TIEBREAKER
+            events_high_def_to_surrogate_switch(pe, gvt_sig);
+#else
             events_high_def_to_surrogate_switch(pe, gvt);
+#endif
         } else {
             model_net_method_switch_to_highdef();
+#ifdef USE_RAND_TIEBREAKER
+            events_surrogate_to_high_def_switch(pe, gvt_sig);
+#else
             events_surrogate_to_high_def_switch(pe, gvt);
+#endif
         }
     }
 

From 92355ca5508bdcd7e880971bdd1f46c8af3423fc Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 10 May 2023 07:15:28 -0400
Subject: [PATCH 030/188] Simplifying zombie notification strategy

The previous strategy required to keep track of all packets on arrival.
This was done to prevent duplicated packets from being processed at the
destination terminal. A notification of arrival is sent once the packet
is processed, but this notification in not zero-offset, so the thought
was "when switching to surrogate, a source terminal might not get the
notification a notification (it's scheduled for a future point in time)
and thus the source terminal will assume that the event has not being
processed and has to be notified of being a zombie, which means that we
have to keep track of all packets processed at the destination to
prevent duplicates".

The reasoning was correct, but there is another workaround. We can look
at the future and see if the notification has been sent, yet not
processed by the source terminal! This reduces the complexity on the
network model side as well as reduces memory consumption and time :).
---
 codes/model-net-lp.h                       |  14 +-
 codes/surrogate.h                          |   5 +-
 src/networks/model-net/core/model-net-lp.c |  27 +--
 src/networks/model-net/dragonfly-dally.C   | 234 +++++++++------------
 src/util/surrogate.c                       |  85 +++++++-
 5 files changed, 200 insertions(+), 165 deletions(-)

diff --git a/codes/model-net-lp.h b/codes/model-net-lp.h
index 147ce248..2f02ee4f 100644
--- a/codes/model-net-lp.h
+++ b/codes/model-net-lp.h
@@ -126,7 +126,7 @@ void model_net_method_switch_to_highdef(void);
 
 // It will call the function (pointer) on the internal structure/network model.
 // The lp parameter has to be a model-net lp. The function pointer has to coincide with the underlying subtype
-void model_net_method_call_inner(tw_lp * lp, void (*) (void * inner, tw_lp * lp));
+void model_net_method_call_inner(tw_lp * lp, void (*) (void * inner, tw_lp * lp, tw_event **), tw_event **);
 
 /// The following functions/data structures should not need to be used by
 /// model developers - they are just provided so other internal components can
@@ -180,15 +180,11 @@ typedef struct model_net_wrap_msg {
     } msg;
 } model_net_wrap_msg;
 
-typedef bool (*should_msg_be_frozen_f) (void*); // topology-specific should it be frozen question
+// Returns the (hidden) event type of the current event
+int model_net_get_event_type_lp(model_net_wrap_msg *);
 
-// Determines if given event should be frozen. It will return true for events of a type contained in `freeze_types`, it will optionally call the topology-specific `should_freeze_question` to check if the event is to be frozen (active only if MN_BASE_PASS is not contained in `freeze_types`)
-bool model_net_should_event_be_frozen(
-        tw_lp * lp,
-        model_net_wrap_msg * msg,  // message to check if has to be frozen
-        int freeze_types,  // events of type "contained" in this will be frozen. An example is the "enum" `MN_BASE_SAMPLE | MN_CONGESTION_EVENT | MN_BASE_END_NOTIF` which will freeze events of those three types and will check on the supplied function below whether the internal model decides to freeze or not
-        should_msg_be_frozen_f should_freeze_question  // this function will be called if the type of the message is MN_BASE_PASS and it hasn't been indicated above that it will be frozen. If NULL and MN_BASE_PASS has not being indicated above, then it won't be frozen
-);
+// Extracting message contained within event MN_BASE_PASS
+void * model_net_method_msg_from_tw_event(tw_lp *, model_net_wrap_msg *);
 
 #ifdef __cplusplus
 }
diff --git a/codes/surrogate.h b/codes/surrogate.h
index 258cbd9b..cb79f242 100644
--- a/codes/surrogate.h
+++ b/codes/surrogate.h
@@ -84,7 +84,10 @@ struct director_data {
  * Configuration specifics
  */
 
-typedef void (*model_switch_f) (void * data, tw_lp * lp); // Switches back and forth from surrogate mode as defined by network model (e.g, by dragonfly-dally.C)
+// Switches back and forth from surrogate mode as defined by network model
+// (e.g, by dragonfly-dally.C)
+// Parameters: `data` corresponds to the lp sub-state, lp is the lp pointer, and the array of events in queue (to be processed)
+typedef void (*model_switch_f) (void * data, tw_lp * lp, tw_event **);
 typedef bool (*model_ask_if_freeze_f) (tw_lp * lp, tw_event * event); // Determines whether the event should be "frozen" or should be allowed to run during surrogate-mode
 
 struct lp_types_switch {
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 34be0cf9..73c39b2f 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -1147,29 +1147,24 @@ void model_net_method_switch_to_highdef_lp(tw_lp * lp) {
     ns->in_sched_recv_loop |= ns->sched_recv_loop_pre_surrogate;
 }
 
-void model_net_method_call_inner(tw_lp * lp, void (*fun) (void * inner, tw_lp * lp)) {
+void model_net_method_call_inner(tw_lp * lp, void (*fun) (void * inner, tw_lp * lp, tw_event **), tw_event ** lp_events) {
     model_net_base_state * const ns = (model_net_base_state*) lp->cur_state;
 
-    fun(ns->sub_state, lp);
+    fun(ns->sub_state, lp, lp_events);
 }
 
-bool model_net_should_event_be_frozen(
-        tw_lp * lp,
-        model_net_wrap_msg * msg,
-        int freeze_types,
-        should_msg_be_frozen_f should_freeze_question
-) {
+int model_net_get_event_type_lp(model_net_wrap_msg * msg) {
+    return msg->h.event_type;
+}
+
+void * model_net_method_msg_from_tw_event(tw_lp * lp, model_net_wrap_msg * msg) {
     model_net_base_state * const ns = (model_net_base_state*) lp->cur_state;
 
-    if (msg->h.event_type & freeze_types) { // Finding out whether current event type is one of freeze types
-        return true;
-    } else if (msg->h.event_type & MN_BASE_PASS) { // pass down to topology-specific event handler
-        if (should_freeze_question) {
-            void * const sub_msg = ((char*)msg)+msg_offsets[ns->net_id];
-            return should_freeze_question(sub_msg);
-        }
+    if (msg->h.event_type & MN_BASE_PASS) { // grab sub message
+        void * const sub_msg = ((char*)msg)+msg_offsets[ns->net_id];
+        return sub_msg;
     }
-    return false;
+    return NULL;
 }
 
 /*
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index fade296b..03c50641 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -195,8 +195,8 @@ static bool is_surrogate_on = false;
 static struct packet_latency_predictor * terminal_predictor = NULL;
 static void switch_surrogate(void);
 static bool is_surrogate_on_fun(void);
-static void dragonfly_dally_terminal_highdef_to_surrogate(terminal_state * s, tw_lp * lp);
-static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw_lp * lp);
+static void dragonfly_dally_terminal_highdef_to_surrogate(terminal_state * s, tw_lp * lp, tw_event **);
+static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw_lp * lp, tw_event **);
 static bool dragonfly_dally_terminal_should_event_be_frozen(tw_lp * lp, tw_event * event);
 static bool dragonfly_dally_router_should_event_be_frozen(tw_lp * lp, tw_event * event);
 //
@@ -358,14 +358,13 @@ enum event_t
     R_SNAPSHOT, //used for timed statistic outputs
     T_NOTIFY,  // used to notify a source or destination terminal about packets status (useful for informing about latency, zombie packet or delete a zombie packet)
     T_ARRIVE_PREDICTED,  // this event is generated by a latency predictor instead of traversing the network
-    T_VACUOUS_EVENT, // nothing happens with this event, it's just ment to be a dummy event
+    T_VACUOUS_EVENT, // nothing happens with this event, it's just ment to be a dummy event that allows us to keep the number of events produced in a simulation the same regardless of whether packet latency is activated (can be safely removed)
 };
 
 // Types of notifications between terminals
 enum notify_t {
     NOTIFY_LATENCY,  // Notifying the source terminal of the total latency to deliver the packet
     NOTIFY_ZOMBIE,   // Notifying the destination terminal of a packet that should be treated as a zombie
-    NOTIFY_ZOMBIE_DEL,  // Notifying the destination terminal that the packet we thought was a zombie was in fact not (just to clean space)
 };
 
 /* whether the last hop of a packet was global, local or a terminal */
@@ -603,9 +602,6 @@ struct terminal_state
     // Zombie events appear when the network traffic is displaced to the future. By then, all packets that were in the network should have been already delievered, thus zombies
     set<struct packet_id> zombies;
 
-    // Events that will arrive to this terminal
-    set<struct packet_id> arrived_here;
-
     // Variable to save the entire state of the terminal into before switching to surrogate mode. During surrogate-mode, the terminal should not access the state of the network
     terminal_state * frozen_state;
 };
@@ -1206,6 +1202,8 @@ static tw_stime gen_noise(tw_lp *lp, short* rng_counter)
     (*rng_counter)++;
     return noise;
 #else
+    (void) lp;
+    (void) rng_counter;
     return 0;
 #endif
 }
@@ -2869,10 +2867,39 @@ static void process_packet_latencies(terminal_state * s, tw_lp * lp)
     }
 }
 
+// Constructs a hashmap with all the T_NOTIFY events to be processed.
+// The key of the list is the GID for the source terminal. The value of the
+// hash is the end time
+static map<uint64_t, double> construct_map_of_T_NOTIFY_events(
+        tw_lp * lp, tw_event ** const terminal_events) {
+    // hash map to store T_NOTIFY events found (`packet_ID` and `travel_end_time`)
+    map<uint64_t, double> notification_events_map;
+
+    for (size_t i = 0; terminal_events && terminal_events[i] != NULL; i++) {
+        assert(terminal_events[i]->dest_lpid == lp->gid);
+        tw_event * event = terminal_events[i];
+        int const event_type = model_net_get_event_type_lp((model_net_wrap_msg *) tw_event_data(event));
+        // if event is T_NOTIFY, add event relevant data into hash map for T_NOTIFY event
+        if (event_type == MN_BASE_PASS) {
+            terminal_dally_message * msg = (terminal_dally_message *)
+                model_net_method_msg_from_tw_event(lp, (model_net_wrap_msg *) tw_event_data(event));
+            if (msg->type == T_NOTIFY) {
+                assert(msg->notify_type == NOTIFY_LATENCY);
+                notification_events_map[msg->packet_ID] = msg->travel_end_time;
+            }
+        }
+    }
+
+    return notification_events_map;
+}
+
 // This function never rollsback because it's called at GVT
-static void dragonfly_dally_terminal_highdef_to_surrogate(terminal_state * s, tw_lp * lp) {
+static void dragonfly_dally_terminal_highdef_to_surrogate(
+        terminal_state * s, tw_lp * lp, tw_event ** terminal_events) {
     process_packet_latencies(s, lp);
 
+    auto notification_events_map = construct_map_of_T_NOTIFY_events(lp, terminal_events);
+
     // Going through every packet that was sent but not yet received, remove it
     // from the list, send it to its destination using the predictor, and
     // notify of its zombie status.
@@ -2886,15 +2913,26 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(terminal_state * s, tw
         double latency = 
             terminal_predictor->predict(s->predictor_data, lp, s->terminal_id, &start);
 
+        bool const in_sent_packets_latency =
+            !s->sent_packets_latency.empty() && start.packet_ID == s->sent_packets_latency.top().packet_ID;
+        // Finding out whether the T_NOTIFY is on the list of messages to be processed
+        bool const in_events_to_process = !in_sent_packets_latency &&
+            notification_events_map.count(start.packet_ID) == 1;
+
         // The packet was delievered and its latency is known (we were notified). Delete packet from stack
-        if (!s->sent_packets_latency.empty() && start.packet_ID == s->sent_packets_latency.top().packet_ID) {
+        if (in_sent_packets_latency) {
             auto const end = s->sent_packets_latency.top();
             s->sent_packets_latency.pop();
             packet_latency_save_to_file(s->terminal_id, start, end, false);
+        } else if (in_events_to_process) {
+            auto const end = (struct packet_end) {
+                .packet_ID = start.packet_ID,
+                .travel_end_time = notification_events_map[start.packet_ID],
+            };
+            packet_latency_save_to_file(s->terminal_id, start, end, false);
+        // The packet has not been delievered, or we haven't received the notification yet.
+        // Send directly to destination and notify of zombie event
         } else {
-            // The packet has not been delievered, or we haven't received the notification yet.
-            // Send directly to destination and notify of zombie event
-
             double arrival = start.travel_start_time + latency;
             if (arrival < tw_now(lp)) {
                 arrival = tw_now(lp);
@@ -2976,7 +3014,6 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(terminal_state * s, tw
     s->total_msg_size               = frozen_state->total_msg_size;
     s->finished_msgs                = frozen_state->finished_msgs;
     s->in_queue_delay               = frozen_state->in_queue_delay;
-    memcpy(&s->arrived_here,         &frozen_state->arrived_here,         sizeof(s->arrived_here));
     memcpy(&s->zombies,              &frozen_state->zombies,              sizeof(s->zombies));
     memcpy(&s->sent_packets,         &frozen_state->sent_packets,         sizeof(s->sent_packets));
     memcpy(&s->sent_packets_latency, &frozen_state->sent_packets_latency, sizeof(s->sent_packets_latency));
@@ -2986,8 +3023,10 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(terminal_state * s, tw
 
 // This function never rollsback because it's called at GVT
 // Note: this function CANNOT generate any events, because it is to be used in `dragonfly_dally_terminal_final`
-static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw_lp * lp) {
+static void dragonfly_dally_terminal_surrogate_to_highdef(
+        terminal_state * s, tw_lp * lp, tw_event ** terminal_events) {
     (void) lp;
+    (void) terminal_events;
     //printf("Terminal %d (PID: %d) switching back to high-def at %e\n", s->terminal_id, lp->gid, tw_now(lp));
 
     // Re-instanciating pre-transition (before surrogate was turned on) terminal state
@@ -3012,7 +3051,6 @@ static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw
     frozen_state->total_msg_size               = s->total_msg_size;
     frozen_state->finished_msgs                = s->finished_msgs;
     frozen_state->in_queue_delay               = s->in_queue_delay;
-    memcpy(&frozen_state->arrived_here,         &s->arrived_here,         sizeof(s->arrived_here));
     memcpy(&frozen_state->zombies,              &s->zombies,              sizeof(s->zombies));
     memcpy(&frozen_state->sent_packets,         &s->sent_packets,         sizeof(s->sent_packets));
     memcpy(&frozen_state->sent_packets_latency, &s->sent_packets_latency, sizeof(s->sent_packets_latency));
@@ -3025,22 +3063,34 @@ static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw
 static bool dragonfly_dally_terminal_should_event_be_frozen(tw_lp * lp, tw_event * event) {
     (void) lp;
     assert(lp->gid == event->dest_lpid);
+    // Freeze everything except for MN_BASE_NEW_MSG's
     int const event_types_to_freeze = MN_BASE_SCHED_NEXT | MN_BASE_SAMPLE | MN_BASE_PASS | MN_BASE_END_NOTIF | MN_CONGESTION_EVENT;
-    return model_net_should_event_be_frozen(lp, (model_net_wrap_msg *) tw_event_data(event), event_types_to_freeze, NULL);
-}
-
-static bool dragonfly_dally_router_should_event_be_frozen_internal(terminal_dally_message * msg) {
-    if (msg->type == R_SNAPSHOT) { // Snapshots will stay unaltered, never frozen
-        return false;
+    int const event_type = model_net_get_event_type_lp((model_net_wrap_msg *) tw_event_data(event));
+    if (event_type & event_types_to_freeze) { // Finding out whether current event type is one of freeze types
+        return true;
     }
-    return true;
+    return false;
 }
 
+// Freezing all events except for R_SNAPSHOT's!!
 static bool dragonfly_dally_router_should_event_be_frozen(tw_lp * lp, tw_event * event) {
     assert(lp->gid == event->dest_lpid);
+    // Freeze everything except for MN_BASE_PASS!
     int const event_types_to_freeze = MN_BASE_NEW_MSG | MN_BASE_SCHED_NEXT | MN_BASE_SAMPLE | MN_BASE_END_NOTIF | MN_CONGESTION_EVENT;
-    return model_net_should_event_be_frozen(lp, (model_net_wrap_msg *) tw_event_data(event), event_types_to_freeze,
-            (should_msg_be_frozen_f) dragonfly_dally_router_should_event_be_frozen_internal);
+    int const event_type = model_net_get_event_type_lp((model_net_wrap_msg *) tw_event_data(event));
+    if (event_type & event_types_to_freeze) { // Finding out whether current event type is one of freeze types
+        return true;
+    }
+
+    // Ignore (ie, do not freeze) R_SNAPSHOT's!
+    assert(event_type == MN_BASE_PASS);
+    terminal_dally_message * msg = (terminal_dally_message *) model_net_method_msg_from_tw_event(lp, (model_net_wrap_msg *) tw_event_data(event));
+    assert(msg != NULL);
+    if (msg->type == R_SNAPSHOT) { // Snapshots will stay unaltered, never frozen
+        return false;
+    }
+
+    return true;
 }
 //
 // ==== END OF Surrogate functions definition ====
@@ -3177,9 +3227,6 @@ static void terminal_dally_commit(terminal_state * s,
                     .travel_end_time = msg->travel_end_time});
 
             process_packet_latencies(s, lp);
-        } else {
-            // The notification for zombie deletion should have been sent
-            assert(bf->c14);
         }
     }
 }
@@ -3397,7 +3444,6 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp )
     new (&s->sent_packets) deque<struct packet_start>();
     new (&s->sent_packets_latency) priority_queue<struct packet_end, vector<struct packet_end>, decltype(packet_end_greater_cmp)>();
     new (&s->zombies) set<struct packet_id>();
-    new (&s->arrived_here) set<struct packet_id>();
     s->frozen_state = NULL;
 
     // alloc'ing memory for predictor, calling initiliazer for predictor
@@ -4525,14 +4571,11 @@ static void notify_dest_lp_of(
         terminal_dally_message * msg,
         enum notify_t notification
         ) {
-    assert(NOTIFY_ZOMBIE == notification || notification == NOTIFY_ZOMBIE_DEL);
+    assert(NOTIFY_ZOMBIE == notification);
     double offset = -1.0;
     switch (notification) {
         case NOTIFY_ZOMBIE:
-            offset = 0.0; // Zero-offset events are ugly, but we want to guarantee correctness, so this prevents funky stuff from happening (like, events just arriving)
-            break;
-        case NOTIFY_ZOMBIE_DEL:
-            offset = g_tw_lookahead; // We don't care how long this will take. This is just to clean the zombies set in the destination terminal
+            offset = 0.0; // Zero-offset events are ugly, but we want to guarantee correctness, so this prevents funky stuff from happening (like in the case of events arriving and being processed before the notification reaches them)
             break;
         default:
             tw_error(TW_LOC, "The notification event with type %d couldn't be created", notification);
@@ -4549,7 +4592,7 @@ static void notify_dest_lp_of(
     tw_event_send(e); 
 }
 
-static void notify_src_lp_on_total_latency(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
+static void notify_src_lp_on_total_latency(tw_lp * lp, terminal_dally_message * msg)
 {
     terminal_dally_message * new_msg;
     tw_event *e = model_net_method_event_new(
@@ -4568,7 +4611,6 @@ static void notify_src_lp_on_total_latency(terminal_state * s, tw_bf * bf, termi
 static void process_terminal_notification_event_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) {
     switch ((enum notify_t) msg->notify_type) {
         case NOTIFY_LATENCY:
-            bf->c14 = 0;
             break;
 
         case NOTIFY_ZOMBIE:
@@ -4580,29 +4622,12 @@ static void process_terminal_notification_event_rc(terminal_state * s, tw_bf * b
             s->zombies.erase(zombie);
             }
             break;
-
-        case NOTIFY_ZOMBIE_DEL:
-            s->zombies.emplace((struct packet_id){
-                .packet_ID = msg->packet_ID,
-                .dfdally_src_terminal_id = msg->dfdally_src_terminal_id});
-            break;
     }
 }
 
 static void process_terminal_notification_event(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) {
     switch ((enum notify_t) msg->notify_type) {
         case NOTIFY_LATENCY:
-            // We thought this packet wouldn't arrive; we thought it was a still on the network
-            // when we sent the zombie notification, but it wasn't! The latency information
-            // simply hadn't arrived to us. Gotta inform the destination LP
-            if (s->sent_packets.empty() || s->sent_packets.front().packet_ID > msg->packet_ID) {
-                //printf("notifying zombie del: packet dest id %d dest gid %d\n", msg->dest_terminal_lpid, msg->dfdally_dest_terminal_id);
-                // Notice that even though we received this message from the destination lp, the destination lp
-                // did not change any parameters from what it received, so the message (mostly) contains the same
-                // information from the original one, the one that we sent
-                notify_dest_lp_of(s, lp, msg, NOTIFY_ZOMBIE_DEL);
-                bf->c14 = 1;
-            }
             break;
 
         case NOTIFY_ZOMBIE: {
@@ -4618,20 +4643,6 @@ static void process_terminal_notification_event(terminal_state * s, tw_bf * bf,
             s->zombies.insert(zombie);
             }
             break;
-
-        case NOTIFY_ZOMBIE_DEL: {
-            // Removing previously thought zombie (just a cleanup operation, to not waste memory, but it should not affect the correctness of the simulation)
-            assert(lp->gid == msg->dest_terminal_lpid);
-            assert(s->terminal_id == msg->dfdally_dest_terminal_id);
-
-            printf("DELETING zombie alert: LPID=%d  packet_ID = %d  dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id);
-            struct packet_id const zombie = {
-                .packet_ID = msg->packet_ID,
-                .dfdally_src_terminal_id = msg->dfdally_src_terminal_id};
-            assert(s->zombies.count(zombie) == 1);
-            s->zombies.erase(zombie);
-            }
-            break;
     }
 }
 
@@ -4682,15 +4693,6 @@ static void send_remote_event(terminal_state * s, terminal_dally_message * msg,
 
 static void packet_arrive_predicted_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
 {
-    if(bf->c15) {
-        struct packet_id const packet = {
-            .packet_ID = msg->packet_ID,
-            .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
-        };
-        assert(s->arrived_here.count(packet) == 1);
-        s->arrived_here.erase(packet);
-        bf->c15 = 0;
-    }
     if(bf->c4) {
         model_net_event_rc2(lp, &msg->event_rc);
     }
@@ -4758,18 +4760,7 @@ static void packet_arrive_predicted(terminal_state * s, tw_bf * bf, terminal_dal
     
     // This should always be true. It sends the message to the server/workload or communicates to the model-net layer
     if(m_data_src && msg->remote_event_size_bytes > 0) {
-        struct packet_id const packet = {
-            .packet_ID = msg->packet_ID,
-            .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
-        };
-        bool const had_arrived_before = s->arrived_here.count(packet) == 1;
-        if (!had_arrived_before) {
-            send_remote_event(s, msg, lp, bf, (char *) m_data_src, msg->remote_event_size_bytes);
-            s->arrived_here.insert(packet);
-            bf->c15 = 1;
-        } else {
-            fprintf(stderr, "We got a packet twice! This is unfortunate, but might happen due to surrogate switching\n");
-        }
+        send_remote_event(s, msg, lp, bf, (char *) m_data_src, msg->remote_event_size_bytes);
     }
 }
 
@@ -4855,15 +4846,6 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess
             });
         }
 
-        if(bf->c15) {
-            struct packet_id const packet = {
-                .packet_ID = msg->packet_ID,
-                .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
-            };
-            assert(s->arrived_here.count(packet) == 1);
-            s->arrived_here.erase(packet);
-        }
-
         struct dfly_qhash_entry * d_entry_pop = (dfly_qhash_entry *)rc_stack_pop(s->st);
         qhash_add(s->rank_tbl, &key, &(d_entry_pop->hash_link));
         s->rank_tbl_pop++; 
@@ -4975,7 +4957,6 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
     bf->c4 = 0;
     bf->c7 = 0;
     bf->c14 = 0;
-    bf->c15 = 0;
 
     /* Total overall finished chunks in simulation */
     N_finished_chunks++;
@@ -5136,31 +5117,19 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
                 bf->c14 = 1;
             } else {
                 //printf("Good day sir, not a zombie! LPID=%d  packet_ID = %d  dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id);
-
-                struct packet_id const packet = {
-                    .packet_ID = msg->packet_ID,
-                    .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
-                };
-                bool const had_arrived_before = s->arrived_here.count(packet) == 1;
-                if (!had_arrived_before) {
-                    if (packet_latency_f || surrogate_configured) {
-                        notify_src_lp_on_total_latency(s, bf, msg, lp);
-                    } else {
-                        // This vacuous msg is necessary just to keep simulations with
-                        // and without the latency notification the same. Notifying the
-                        // latency does not impact the simulation (unless the data is
-                        // fed to a predictor, later to be used). If the latency
-                        // notification is deactivated, the simulation will produce
-                        // the same number of events (a bit wasteful), a parameter
-                        // that model-net or dragonfly-dally for some reason use :S
-                        //vacuous_msg_to_itself(s, msg, lp);
-                    }
-                    send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size);
-                    s->arrived_here.insert(packet);
-                    bf->c15 = 1;
+                if (packet_latency_f || surrogate_configured) {
+                    notify_src_lp_on_total_latency(lp, msg);
                 } else {
-                    fprintf(stderr, "We got a packet twice! This is unfortunate, but might happen due to surrogate switching\n");
+                    // This vacuous msg is necessary just to keep simulations with
+                    // and without the latency notification the same. Notifying the
+                    // latency does not impact the simulation (unless the data is
+                    // fed to a predictor, later to be used). If the latency
+                    // notification is deactivated, the simulation will produce
+                    // the same number of events (a bit wasteful), a parameter
+                    // that model-net or dragonfly-dally for some reason use :S
+                    //vacuous_msg_to_itself(s, msg, lp);
                 }
+                send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size);
              }
         }
         /* Remove the hash entry */
@@ -5226,7 +5195,7 @@ static void dragonfly_dally_terminal_final( terminal_state * s,
       tw_lp * lp )
 {
     if (freeze_network_on_switch && is_surrogate_on) {
-        dragonfly_dally_terminal_surrogate_to_highdef(s, lp);
+        dragonfly_dally_terminal_surrogate_to_highdef(s, lp, NULL);
     }
     // printf("terminal id %d\n",s->terminal_id);
     dragonfly_total_time += s->total_time; //increment the PE level time counter
@@ -5313,17 +5282,18 @@ static void dragonfly_dally_terminal_final( terminal_state * s,
     // Calling destructors for data. There is no need to free data, the
     // destructors do it themselves. ROSS allocated space for the datatypes and
     // it doesn't need to be freed
-    // TODO (elkin): Actually, deallocate memory for `message_data` and `remote_event_data`
-    //printf("terminal %d - arrived_here (size=%d) = [", s->terminal_id, s->arrived_here.size());
-    //for (auto&& z: s->arrived_here) {
-    //    printf("(%d %d) ", z.packet_ID, z.dfdally_src_terminal_id);
-    //}
-    //printf("]\n");
-    //printf("terminal %d - zombies = [", s->terminal_id);
-    //for (auto&& z: s->zombies) {
-    //    printf("(%d %d) ", z.packet_ID, z.dfdally_src_terminal_id);
-    //}
-    //printf("]\n");
+#if 0
+    // Checking that there aren't any zombies left in the simulation
+    printf("terminal %d - zombies = [", s->terminal_id);
+    for (auto&& z: s->zombies) {
+        printf("(%d %d) ", z.packet_ID, z.dfdally_src_terminal_id);
+    }
+    printf("]\n");
+#endif
+    for (auto&& start: s->sent_packets) {
+        if (start.message_data) { free(start.message_data); }
+        if (start.remote_event_data) { free(start.remote_event_data); }
+    }
     s->sent_packets.~deque();
     s->sent_packets_latency.~priority_queue();
 
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index c5c7b21d..5b039e0f 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -328,7 +328,7 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) {
         events_enqueued++;
     }
 
-    if (DEBUG_DIRECTOR > 1 && g_tw_mynode == 0) {
+    if (DEBUG_DIRECTOR > 1) {
         printf("PE %lu: Discrepancy on number of events processed %d (%d dequeued and %d enqueued)\n",
                 g_tw_mynode, events_dequeued - events_enqueued, events_dequeued, events_enqueued);
     }
@@ -338,6 +338,71 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) {
 }
 
 
+// Returns an array of size `g_tw_nlp`, where each element is a null-terminated
+// array containing all the events that each LP has for processing
+static tw_event *** order_events_per_lps(tw_pe * pe) {
+    // 0. Create array for linked list of size g_tw_nlp to store events per lp
+    tw_event ** lp_queue_events = (tw_event **) calloc(g_tw_nlp, sizeof(tw_event *));
+    // 0b. Create simple array (size g_tw_lp) to store number of events per lp
+    size_t * num_lp_queue_events = (size_t *) calloc(g_tw_nlp, sizeof(size_t));
+
+    // 1. loop extracting events from queue
+    //   a. check from which local lp does the event belong
+    //   b. add event to reversed linked-list of given lp and increase lp counter
+    tw_event * next_event = tw_pq_dequeue(pe->pq);
+    size_t events_dequeued = 0;
+    while (next_event) {
+        // Filtering events to freeze
+        assert(next_event->prev == NULL);
+
+        // finding out lp type
+        assert(tw_getlocal_lp(next_event->dest_lpid) == next_event->dest_lp);
+        tw_lpid const lpid = next_event->dest_lp->id;
+
+        // store event in lp_queue_events
+        next_event->prev = lp_queue_events[lpid];
+        lp_queue_events[lpid] = next_event;
+        num_lp_queue_events[lpid]++;
+        events_dequeued++;
+
+        next_event = tw_pq_dequeue(pe->pq);
+    }
+
+    // 2. create array (triple pointer type, **) of size `g_tw_nlp + total events`
+    //    to store events per lp, null-terminated
+    tw_event *** lps_events = (tw_event ** *) calloc(g_tw_nlp, sizeof(tw_event **));
+    tw_event ** all_events_mem = (tw_event * *) calloc(g_tw_nlp + events_dequeued, sizeof(tw_event *));
+
+    // 3. loop through each linked-list insert each event back into the
+    //   queue and store address copy into lp array
+    size_t event_i = 0;
+    for (size_t lpid = 0; lpid < g_tw_nlp; lpid++) {
+        lps_events[lpid] = &all_events_mem[event_i];
+
+        tw_event * dequed_events = lp_queue_events[lpid];
+        while (dequed_events) {
+            // event address copy
+            all_events_mem[event_i] = dequed_events;
+
+            // placing back into queue
+            tw_event * const prev_event = dequed_events;
+            dequed_events = dequed_events->prev;
+            prev_event->prev = NULL;
+            tw_pq_enqueue(pe->pq, prev_event);
+
+            event_i++;
+        }
+        event_i++;
+    }
+    assert(event_i == g_tw_nlp + events_dequeued);
+
+    assert(g_tw_nlp > 0 && lps_events[0] == all_events_mem);
+    free(lp_queue_events);
+    free(num_lp_queue_events);
+    return lps_events;
+}
+
+
 // Switching from a (vanilla) high-def simulation to surrogate mode
 // consists of:
 // - Cancel all events that have to be cancelled and clean everything
@@ -370,6 +435,7 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_stime gvt) {
     }
 #endif
 
+    tw_event *** lps_events = order_events_per_lps(pe);
     shift_events_to_future_pe(pe, gvt);
 
     // Going through all LPs in PE and running their specific functions
@@ -399,13 +465,20 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_stime gvt) {
             }
             if (lp_type_switch->surrogate_to_highdef) {
                 if (is_lp_modelnet) {
-                    model_net_method_call_inner(lp, lp_type_switch->highdef_to_surrogate);
+                    model_net_method_call_inner(lp, lp_type_switch->highdef_to_surrogate, lps_events[local_lpid]);
                 } else {
-                    lp_type_switch->highdef_to_surrogate(lp->cur_state, lp);
+                    lp_type_switch->highdef_to_surrogate(lp->cur_state, lp, lps_events[local_lpid]);
                 }
             }
         }
     }
+
+    // This will force a global update on all the new remote events (instead of waiting until the next GVT cycle to update events to process)
+    rollback_and_cancel_events_pe(pe, gvt);
+
+    assert(lps_events[0] != NULL);
+    free(lps_events[0]);
+    free(lps_events);
 }
 
 
@@ -416,7 +489,6 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_event_sig gvt) {
     if (g_tw_synchronization_protocol == OPTIMISTIC) {
         assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
         rollback_and_cancel_events_pe(pe, gvt);
-        //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0);
         assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
     }
 #else
@@ -426,7 +498,6 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_stime gvt) {
     if (g_tw_synchronization_protocol == OPTIMISTIC) {
         assert(pe->GVT == gvt);
         rollback_and_cancel_events_pe(pe, gvt);
-        //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0);
         assert(pe->GVT == gvt);
     }
 #endif
@@ -460,9 +531,9 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_stime gvt) {
             }
             if (lp_type_switch->surrogate_to_highdef) {
                 if (is_lp_modelnet) {
-                    model_net_method_call_inner(lp, lp_type_switch->surrogate_to_highdef);
+                    model_net_method_call_inner(lp, lp_type_switch->surrogate_to_highdef, NULL);
                 } else {
-                    lp_type_switch->surrogate_to_highdef(lp->cur_state, lp);
+                    lp_type_switch->surrogate_to_highdef(lp->cur_state, lp, NULL);
                 }
             }
         }

From 4fe5d527e58e78ff5cf1bd7852c9d2adbea1bef6 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sun, 21 May 2023 11:28:02 -0400
Subject: [PATCH 031/188] Extending predictor to predict delay to process next
 packet in the queue

---
 codes/surrogate.h                        |   6 +-
 src/networks/model-net/core/model-net.c  |   4 +-
 src/networks/model-net/dragonfly-dally.C | 120 ++++++++++++-----------
 src/util/surrogate.c                     |  62 ++++++------
 4 files changed, 94 insertions(+), 98 deletions(-)

diff --git a/codes/surrogate.h b/codes/surrogate.h
index cb79f242..16957338 100644
--- a/codes/surrogate.h
+++ b/codes/surrogate.h
@@ -45,14 +45,14 @@ struct packet_start {
 };
 
 struct packet_end {
-    uint64_t packet_ID;
     double travel_end_time;
+    double delay_at_queue_head_next;  // Delay to start processing next packet
 };
 
 // Definition of functions needed to define a predictor
 typedef void (*init_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id); // Initializes the predictor (eg, LSTM)
-typedef void (*feed_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start *, struct packet_end *); // Feeds known latency for packet sent at `now`
-typedef double (*predict_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start *); // Get prediction for packet sent to `destination` at `now`
+typedef void (*feed_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *, struct packet_end const *); // Feeds known latency for packet sent at `now`
+typedef struct packet_end (*predict_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *); // Get prediction for packet sent to `destination` at `now`
 typedef void (*predict_pred_rc_f) (void * predictor_data, tw_lp * lp); // Reverse prediction (reverse state of predictor one prediction)
 
 // Each network model defines its own way to setup the packet latency predictor
diff --git a/src/networks/model-net/core/model-net.c b/src/networks/model-net/core/model-net.c
index 5ee14979..b145c6cb 100644
--- a/src/networks/model-net/core/model-net.c
+++ b/src/networks/model-net/core/model-net.c
@@ -325,7 +325,7 @@ static model_net_event_return model_net_event_impl_base(
         void const * self_event,
         tw_lp *sender) {
 
-    
+
     if (remote_event_size + self_event_size + sizeof(model_net_wrap_msg)
             > g_tw_msg_sz){
         tw_error(TW_LOC, "Error: model_net trying to transmit an event of size "
@@ -386,7 +386,7 @@ static model_net_event_return model_net_event_impl_base(
     if (congestion_control_is_jobmap_set()) { //perhaps make jobmap a global set regardless of congestion control
         struct codes_jobmap_ctx *ctx;
         ctx = congestion_control_get_jobmap();
-        struct codes_jobmap_id jid; 
+        struct codes_jobmap_id jid;
         jid = codes_jobmap_to_local_id(codes_mapping_get_lp_relative_id(sender->gid, 0, 0), ctx);
         r->app_id = jid.job;
     }
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 03c50641..5dc1190c 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -474,12 +474,16 @@ static bool isRoutingNonminimalExplicit(int alg)
  * Surrogate definitions and data
  */
 
-// Comparison function object to use in min-heap of packet_end's
+struct packet_double_val {
+    uint64_t packet_ID;
+    double value; // This can either be packet delivery latency or delay in queue to be processed
+};
+// Comparison function object to use in min-heap of sent_packets_latency
 static struct {
-    bool operator() (struct packet_end const l, struct packet_end const r) const {
+    bool operator() (struct packet_double_val const l, struct packet_double_val const r) const {
         return l.packet_ID > r.packet_ID;
     }
-} packet_end_greater_cmp;
+} packet_double_val_greater_cmp;
 
 struct packet_id {
     uint64_t packet_ID;
@@ -591,7 +595,7 @@ struct terminal_state
     // min-heap for latencies of packets once they arrive (some packets might
     // arrive faster than others, so a list like the one above is not feasible
     // to store in order efficiently their arrival)
-    priority_queue<struct packet_end, vector<struct packet_end>, decltype(packet_end_greater_cmp)> sent_packets_latency;
+    priority_queue<struct packet_double_val, vector<struct packet_double_val>, decltype(packet_double_val_greater_cmp)> sent_packets_latency;
 
     // Stores the last time in which a packet was processed (time at which a T_GENERATE event was processed)
     double last_in_queue_time;
@@ -2818,7 +2822,6 @@ static void packet_latency_save_to_file(
         struct packet_end end,
         bool is_predicted
 ) {
-    assert(start.packet_ID == end.packet_ID);
     fprintf(packet_latency_f, "%u,%u,%lu,%d,%d,%u,%f,%f,%f,%f,%f\n",
             terminal_id, start.dfdally_dest_terminal_id, start.packet_ID,
             is_surrogate_on, is_predicted,
@@ -2840,17 +2843,22 @@ static bool is_surrogate_on_fun(void) {
 // Goes through all received packet latencies and process them in order in which they were sent through the network
 static void process_packet_latencies(terminal_state * s, tw_lp * lp)
 {
-    while( !s->sent_packets.empty()
+    while( s->sent_packets.size() >= 2  // We need at least two packets to determine the delay of the next packet to be processed
         && !s->sent_packets_latency.empty()
-        && s->sent_packets.front().packet_ID == s->sent_packets_latency.top().packet_ID)
+        && s->sent_packets.front().packet_ID == s->sent_packets_latency.top().packet_ID
+        )
     {
         auto start = s->sent_packets.front();
+        double const delay_at_queue_head_next = s->sent_packets[1].delay_at_queue_head;
+        struct packet_end end = {
+            .travel_end_time = s->sent_packets_latency.top().value,
+            .delay_at_queue_head_next = delay_at_queue_head_next,
+        };
         if (packet_latency_f) {
-            packet_latency_save_to_file(s->terminal_id, start, s->sent_packets_latency.top(), false);
+            packet_latency_save_to_file(s->terminal_id, start, end, false);
         }
         if (surrogate_configured && !is_surrogate_on) {
             assert(terminal_predictor != NULL);
-            auto end = s->sent_packets_latency.top();
             terminal_predictor->feed(s->predictor_data, lp, s->terminal_id, &start, &end);
         }
 
@@ -2870,7 +2878,7 @@ static void process_packet_latencies(terminal_state * s, tw_lp * lp)
 // Constructs a hashmap with all the T_NOTIFY events to be processed.
 // The key of the list is the GID for the source terminal. The value of the
 // hash is the end time
-static map<uint64_t, double> construct_map_of_T_NOTIFY_events(
+static map<uint64_t, double> construct_map_of_NOTIFY_LATENCY_events(
         tw_lp * lp, tw_event ** const terminal_events) {
     // hash map to store T_NOTIFY events found (`packet_ID` and `travel_end_time`)
     map<uint64_t, double> notification_events_map;
@@ -2898,7 +2906,7 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(
         terminal_state * s, tw_lp * lp, tw_event ** terminal_events) {
     process_packet_latencies(s, lp);
 
-    auto notification_events_map = construct_map_of_T_NOTIFY_events(lp, terminal_events);
+    auto notification_events_map = construct_map_of_NOTIFY_LATENCY_events(lp, terminal_events);
 
     // Going through every packet that was sent but not yet received, remove it
     // from the list, send it to its destination using the predictor, and
@@ -2910,40 +2918,45 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(
 
         // The predictor is asked to predict the latency of the packet regardless if it is a zombie or not.
         // (This makes it so that we feed the predictor only during high-def mode, and never a switching time)
-        double latency = 
+        struct packet_end predicted_end = 
             terminal_predictor->predict(s->predictor_data, lp, s->terminal_id, &start);
 
         bool const in_sent_packets_latency =
             !s->sent_packets_latency.empty() && start.packet_ID == s->sent_packets_latency.top().packet_ID;
-        // Finding out whether the T_NOTIFY is on the list of messages to be processed
+        // Finding out whether the packet-latency is on the list of messages to be processed
         bool const in_events_to_process = !in_sent_packets_latency &&
             notification_events_map.count(start.packet_ID) == 1;
 
-        // The packet was delievered and its latency is known (we were notified). Delete packet from stack
-        if (in_sent_packets_latency) {
-            auto const end = s->sent_packets_latency.top();
-            s->sent_packets_latency.pop();
-            packet_latency_save_to_file(s->terminal_id, start, end, false);
-        } else if (in_events_to_process) {
-            auto const end = (struct packet_end) {
-                .packet_ID = start.packet_ID,
-                .travel_end_time = notification_events_map[start.packet_ID],
-            };
+        // The packet was delievered and its latency is known (we were notified)
+        if (in_sent_packets_latency || in_events_to_process) {
+            struct packet_end end;
+            // Delete packet from stack
+            if (in_sent_packets_latency) {
+                auto const latency_q = s->sent_packets_latency.top();
+                end.travel_end_time = latency_q.value;
+                s->sent_packets_latency.pop();
+            } else {
+                end.travel_end_time = notification_events_map[start.packet_ID];
+            }
+            if (s->sent_packets.size() >= 2) {
+                end.delay_at_queue_head_next = s->sent_packets[1].delay_at_queue_head;
+            } else {
+                end.delay_at_queue_head_next = -1;
+            }
             packet_latency_save_to_file(s->terminal_id, start, end, false);
+        }
         // The packet has not been delievered, or we haven't received the notification yet.
         // Send directly to destination and notify of zombie event
-        } else {
-            double arrival = start.travel_start_time + latency;
+        else {
+            double latency = predicted_end.travel_end_time - start.travel_start_time;
+            double arrival = start.travel_start_time + latency; // this is "equivalent" to end.travel_end_time (we do it because floating point operations are weird, and it's better to err on the side of spending some cycles computing the addition rather than assuming that things will work out correctly)
             if (arrival < tw_now(lp)) {
                 arrival = tw_now(lp);
                 latency = 0;
             }
             
-            auto const end = (struct packet_end) {
-                .packet_ID = start.packet_ID,
-                .travel_end_time = arrival,
-            };
-            packet_latency_save_to_file(s->terminal_id, start, end, true);
+            predicted_end.travel_end_time = arrival;
+            packet_latency_save_to_file(s->terminal_id, start, predicted_end, true);
 
             assert(start.message_data);
             terminal_dally_message * const msg_data = (terminal_dally_message*) start.message_data;
@@ -3211,7 +3224,6 @@ static void terminal_dally_commit(terminal_state * s,
 
         // Saving
         auto const end = (struct packet_end) {
-            .packet_ID = msg->packet_ID,
             .travel_end_time = msg->travel_end_time,
         };
         packet_latency_save_to_file(s->terminal_id, start, end, true);
@@ -3224,7 +3236,7 @@ static void terminal_dally_commit(terminal_state * s,
         if (!s->sent_packets.empty() && s->sent_packets.front().packet_ID <= msg->packet_ID) {
             s->sent_packets_latency.push({
                     .packet_ID = msg->packet_ID,
-                    .travel_end_time = msg->travel_end_time});
+                    .value = msg->travel_end_time});
 
             process_packet_latencies(s, lp);
         }
@@ -3442,7 +3454,7 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp )
     // std::construct_at, for now this syntax suffices and works
     // (see https://en.cppreference.com/w/cpp/memory/construct_at)
     new (&s->sent_packets) deque<struct packet_start>();
-    new (&s->sent_packets_latency) priority_queue<struct packet_end, vector<struct packet_end>, decltype(packet_end_greater_cmp)>();
+    new (&s->sent_packets_latency) priority_queue<struct packet_double_val, vector<struct packet_double_val>, decltype(packet_double_val_greater_cmp)>();
     new (&s->zombies) set<struct packet_id>();
     s->frozen_state = NULL;
 
@@ -3770,24 +3782,6 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
     msg->my_g_hop = -1;
     msg->my_hops_cur_group = -1;
 
-    // determining injection delay
-    tw_stime injection_ts;
-    if (g_congestion_control_enabled) {
-        double bandwidth_coef = 1;
-        if (cc_terminal_is_abatement_active(s->local_congestion_controller)) {
-            bandwidth_coef = cc_terminal_get_current_injection_bandwidth_coef(s->local_congestion_controller);
-        }
-        injection_ts = bytes_to_ns(msg->packet_size, bandwidth_coef * s->params->cn_bandwidth);
-    }
-    else {
-        injection_ts = bytes_to_ns(msg->packet_size, s->params->cn_bandwidth);
-    }
-    tw_stime const nic_ts = injection_ts;
-    msg->saved_in_queue_delay = injection_ts;
-    //tw_stime const nic_ts = s->in_queue_delay;
-    //msg->saved_in_queue_delay = s->in_queue_delay;
-    //printf("injection_ts = %f\n", injection_ts);
-
     // Using predictor to find latency
     tw_stime const time_at_queue_head = msg->msg_new_mn_event > s->last_in_queue_time ? msg->msg_new_mn_event : s->last_in_queue_time;
     auto start = (struct packet_start) {
@@ -3795,22 +3789,31 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
         .dest_terminal_lpid = msg->dest_terminal_lpid,
         .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
         .travel_start_time = tw_now(lp),
+        .workload_injection_time = msg->msg_start_time,
+        .delay_at_queue_head = tw_now(lp) - time_at_queue_head,
         .packet_size = msg->packet_size
     };
 
+    struct packet_end const end = 
+        terminal_predictor->predict(s->predictor_data, lp, s->terminal_id, &start);
+    double const latency = end.travel_end_time - start.travel_start_time;
+    double const arrival = start.travel_start_time + latency; // this is "equivalent" to end.travel_end_time
+    assert(arrival >= tw_now(lp));
+
+    // determining injection delay
+    double const nic_ts = end.delay_at_queue_head_next;
+    msg->saved_in_queue_delay = nic_ts;
+
     // Scheduling idle event for next packet to be processed
     bool const is_from_remote = false;
-    // TODO(helq): estimate from data collected before, new nic_ts
     model_net_method_idle_event2(nic_ts, is_from_remote, msg->rail_id, lp);
     msg->saved_last_in_queue_time = s->last_in_queue_time;
     s->last_in_queue_time = tw_now(lp);
 
-    double const latency = 
-        terminal_predictor->predict(s->predictor_data, lp, s->terminal_id, &start);
-
     // Info to be used at commit time to save into file
+    msg->saved_in_queue_delay = tw_now(lp) - time_at_queue_head;
     msg->travel_start_time = tw_now(lp);
-    msg->travel_end_time = tw_now(lp) + latency;
+    msg->travel_end_time = arrival;
 
     // Sending packet directly to destination terminal
     //tw_stime const ts = 0;
@@ -3844,10 +3847,9 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
     stat->send_count++;
     stat->send_bytes += msg->packet_size;
     stat->send_time += (1/p->cn_bandwidth) * msg->packet_size;
-    if(stat->max_event_size < total_event_size)
+    if(stat->max_event_size < total_event_size) {
         stat->max_event_size = total_event_size;
-
-    return;
+    }
 }
 
 static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index 5b039e0f..d5f4aac4 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -41,6 +41,7 @@ struct aggregated_latency_one_terminal {
 };
 
 struct latency_surrogate {
+    double sum_delay_at_queue_head_next;
     struct aggregated_latency_one_terminal aggregated_latency_for_all;
     unsigned int num_terminals;
     struct aggregated_latency_one_terminal aggregated_latency[];
@@ -56,9 +57,10 @@ static void init_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int
     assert(data->aggregated_latency[0].total_msgs == 0);
 
     data->num_terminals = surr_config.total_terminals;
+    data->sum_delay_at_queue_head_next = 0;
 }
 
-static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start * start, struct packet_end * end) {
+static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * start, struct packet_end const * end) {
     (void) lp;
     (void) src_terminal;
 
@@ -75,49 +77,41 @@ static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int
 
     data->aggregated_latency_for_all.sum_latency += latency;
     data->aggregated_latency_for_all.total_msgs++;
+
+    data->sum_delay_at_queue_head_next += end->delay_at_queue_head_next;
 }
 
-static double predict_latency(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start * packet_dest) {
+static struct packet_end predict_latency(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * packet_dest) {
     (void) lp;
 
     unsigned int const dest_terminal = packet_dest->dfdally_dest_terminal_id;
     assert(dest_terminal < data->num_terminals);
 
+    unsigned int const total_total_datapoints = data->aggregated_latency_for_all.total_msgs;
+    if (total_total_datapoints == 0) {
+        // otherwise, we have no data to approximate the latency
+        tw_error(TW_LOC, "Terminal %u doesn't have any packet delay information available to predict future packet latency!\n", src_terminal);
+        return (struct packet_end) {
+            .travel_end_time = -1.0,
+            .delay_at_queue_head_next = -1.0,
+        };
+    }
+
     // In case we have any data to determine the average for a specific terminal
-    unsigned int const total_datapoints = data->aggregated_latency[dest_terminal].total_msgs;
-    if (total_datapoints > 0) {
-        double const sum_latency = data->aggregated_latency[dest_terminal].sum_latency;
-        return sum_latency / total_datapoints;
+    unsigned int const total_datapoints_for_term = data->aggregated_latency[dest_terminal].total_msgs;
+    double latency = -1.0;
+    if (total_datapoints_for_term > 0) {
+        latency = data->aggregated_latency[dest_terminal].sum_latency / total_datapoints_for_term;
+    } else {
+        // If no information for that terminal exists, use average from all message
+        latency = data->aggregated_latency_for_all.sum_latency / total_total_datapoints;
     }
 
-    // If no information for that terminal exists, use average from all message
-    unsigned int const total_total_datapoints = data->aggregated_latency_for_all.total_msgs;
-    if (total_total_datapoints > 0) {
-        double const sum_latency = data->aggregated_latency_for_all.sum_latency;
-        return sum_latency / total_total_datapoints;
-    }
-
-    // otherwise, we have no data to approximate the latency
-    tw_error(TW_LOC, "Terminal %u doesn't have any packet delay information available to predict future packet latency!\n", src_terminal);
-    return -1.0;
-
-    // TODO(elkin): this (below) is wrong, bad bad. I'm not entirely sure how to do this rn in a non-hardcoded manner, but given time, this should be left in better terms
-    // THIS HAS BEEN HARDCODED FOR THE CASE OF 72-node DRAGONFLY
-
-    //// Otherwise, use "sensible" results from another simulation
-    //// This assumes the network is a 72 nodes 1D-DragonFly (9 groups, with 4 routers, and 2 terminals per router)
-    //// source and destination share the same router
-    //if (src_terminal / 2 == dest_terminal / 2) {
-    //    return 2108.74;
-    //}
-    //// source and destination are in the same group
-    //else if (src_terminal / 8 == dest_terminal / 8) {
-    //    return 2390.13;
-    //}
-    //// source and destination are in different groups
-    //else {
-    //    return 4162.77;
-    //}
+    double const delay_at_queue_head_next = data->sum_delay_at_queue_head_next / total_total_datapoints;
+    return (struct packet_end) {
+        .travel_end_time = packet_dest->travel_start_time + latency,
+        .delay_at_queue_head_next = delay_at_queue_head_next,
+    };
 }
 
 static void predict_latency_rc(struct latency_surrogate * data, tw_lp * lp) {

From 3080130846b087e5c4c196b02998846ae0c7ea7b Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sun, 9 Jul 2023 06:49:23 -0400
Subject: [PATCH 032/188] Including reproducibility pads23 scripts

---
 scripts/reproducibility-pads23/README.md      |  97 +++++++
 .../conf-files/72-dragonfly-full.alloc        |   2 +
 ...terminal-dragonfly-72-surrogate-v5.conf.in |  73 +++++
 .../terminal-dragonfly-72-v5.conf.in          |  57 ++++
 ...play_72-node-dragonfly_synthetic1-100ms.sh |  75 ++++++
 ...eplay_72-node-dragonfly_synthetic1-10ms.sh |  74 +++++
 .../python-scripts/delay-in-window.py         | 138 ++++++++++
 .../python-scripts/generate-table.py          | 110 ++++++++
 .../python-scripts/plot-packet-latency.py     | 255 ++++++++++++++++++
 .../python-scripts/port-occupancy.py          | 176 ++++++++++++
 scripts/reproducibility-pads23/reproduce.sh   |  45 ++++
 11 files changed, 1102 insertions(+)
 create mode 100644 scripts/reproducibility-pads23/README.md
 create mode 100644 scripts/reproducibility-pads23/experiments/conf-files/72-dragonfly-full.alloc
 create mode 100644 scripts/reproducibility-pads23/experiments/conf-files/terminal-dragonfly-72-surrogate-v5.conf.in
 create mode 100644 scripts/reproducibility-pads23/experiments/conf-files/terminal-dragonfly-72-v5.conf.in
 create mode 100644 scripts/reproducibility-pads23/experiments/mpi-replay_72-node-dragonfly_synthetic1-100ms.sh
 create mode 100644 scripts/reproducibility-pads23/experiments/mpi-replay_72-node-dragonfly_synthetic1-10ms.sh
 create mode 100644 scripts/reproducibility-pads23/python-scripts/delay-in-window.py
 create mode 100644 scripts/reproducibility-pads23/python-scripts/generate-table.py
 create mode 100644 scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
 create mode 100644 scripts/reproducibility-pads23/python-scripts/port-occupancy.py
 create mode 100644 scripts/reproducibility-pads23/reproduce.sh

diff --git a/scripts/reproducibility-pads23/README.md b/scripts/reproducibility-pads23/README.md
new file mode 100644
index 00000000..38751076
--- /dev/null
+++ b/scripts/reproducibility-pads23/README.md
@@ -0,0 +1,97 @@
+# Reproducing results of PADS23 paper
+
+This document contains the instructions to follow in order to compile, run the experiments
+and generate the figures and table that appears on the paper: Hybrid PDES Simulation of
+HPC Networks Using Zombie Packets, by Cruz-Camacho et. al 2023.
+
+The artifacts associated with this submission are:
+
+- The PDES simulator [ROSS](https://github.com/ross-org/ross) (Licensed under the
+    BSD-3-clause licence)
+- The HPC network simulator [CODES](https://github.com/codes-org/codes) (Licensed under
+    the BSD-3-clause licence)
+
+All models included with the simulators are licensed under the same licence, namely
+BSD-3-clause.
+
+A copy of these artifacts are available via [Zenodo](https://about.zenodo.org) with
+[doi:10.5281/zenodo.7879224](https://doi.org/10.5281/zenodo.7879224). Zenodo's policies on
+long-time storage and availability of the artificats can be found in:
+<https://about.zenodo.org/policies/>.
+
+The code has been tested in two systems: a 20-core IBM Power9 processor (using 9 of its
+cores), and an Intel core i7 vPro 8th Gen (a change in the number of available cores/slots
+was needed in `experiments` as the processor does not have 9 available
+cores).
+
+## Build
+
+To compile CODES (and ROSS), you need a CMake, and a C and C++ MPI-aware compiler.
+
+We have succesfully compiled CODES in a system with a XLC_r compiler (version 16.1.1) and
+the Spectrum MPI (version 10.4) library, and in a x64 system with GCC (12.2.1) and Open
+MPI (4.1.5).
+
+We assume that all commands are executed under base CODES directory:
+
+```bash
+cd path-to-this/CODES
+```
+
+First compile ROSS:
+
+```bash
+mkdir ROSS/build
+pushd ROSS/build
+cmake .. -DROSS_BUILD_MODELS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
+    -DCMAKE_INSTALL_PREFIX="$(pwd -P)/bin" -DCMAKE_CXX_COMPILER=mpicxx \
+    -DCMAKE_C_COMPILER=mpicc -DCMAKE_BUILD_TYPE=Debug
+make
+make install
+popd
+```
+
+Then compile CODES:
+
+```bash
+mkdir build
+pushd build
+cmake .. -DCMAKE_PREFIX_PATH="$PWD/../ROSS/build/bin" \
+    -DCMAKE_CXX_COMPILER=mpicxx -DCMAKE_C_COMPILER=mpicc \
+    -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Debug \
+    -DBUILD_TESTING=ON -DCMAKE_INSTALL_PREFIX="$(pwd -P)/bin"
+make
+# make install is NOT necessary
+popd
+```
+
+## Run and generate figures/tables
+
+The experiments, figure generation and table generation are contained in the script
+`reproduce.sh`. The script calls the bash scripts in `experiments` which run the CODES
+binary. If there is a need to change any parameter on the experiments (eg, number of
+cores), these files are the place to do so.
+
+Python 3 is needed to generate the figures. The Python libraries: NumPy and matplotlib are
+also required. (Tested on Python 3.10, NumPy 1.24.2 and Matplotlib 3.7.1.) An additional
+external tool is `wc`, which is used to count the total number of lines/packets in the
+simulation. (Tested on GNU `wc` versions 8.3 and 9.2.)
+
+To run the script simply:
+
+```bash
+cd scripts/reproducibility-pads23/
+bash -x reproduce.sh
+```
+
+The total runtime for the script is dependent on machine resources. A runtime of 30
+minutes has been reported for a system running on Intel i9-12900K (16 cores, 5.20 GHz),
+while for smaller systems, like Intel i7-8650U (4 cores, 4.2 GHz), the runtime has been of
+around 2 to 4 hours. The experiments take up to 3 GBs of space in disk. If CODES was
+compiled in a folder other than the one suggested (`build/`), you must change the variable
+`CODES_BUILD_DIR` in the script.
+
+### Results
+
+The figures can be found in the directory `figures` and the table results in the file text
+`results/sumarized-table.txt`
diff --git a/scripts/reproducibility-pads23/experiments/conf-files/72-dragonfly-full.alloc b/scripts/reproducibility-pads23/experiments/conf-files/72-dragonfly-full.alloc
new file mode 100644
index 00000000..5d1a3e8a
--- /dev/null
+++ b/scripts/reproducibility-pads23/experiments/conf-files/72-dragonfly-full.alloc
@@ -0,0 +1,2 @@
+0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
+
diff --git a/scripts/reproducibility-pads23/experiments/conf-files/terminal-dragonfly-72-surrogate-v5.conf.in b/scripts/reproducibility-pads23/experiments/conf-files/terminal-dragonfly-72-surrogate-v5.conf.in
new file mode 100644
index 00000000..6ed72c9f
--- /dev/null
+++ b/scripts/reproducibility-pads23/experiments/conf-files/terminal-dragonfly-72-surrogate-v5.conf.in
@@ -0,0 +1,73 @@
+LPGROUPS
+{
+   MODELNET_GRP
+   {
+      repetitions="36";
+# name of this lp changes according to the model
+      nw-lp="2";
+# these lp names will be the same for dragonfly-custom model
+      modelnet_dragonfly_dally="2";
+      modelnet_dragonfly_dally_router="1";
+   }
+}
+PARAMS
+{
+# packet size in the network
+   packet_size="4096";
+   modelnet_order=( "dragonfly_dally","dragonfly_dally_router" );
+   # scheduler options
+   modelnet_scheduler="fcfs";
+# chunk size in the network (when chunk size = packet size, packets will not be
+# divided into chunks)
+   chunk_size="${CHUNK_SIZE}";
+# modelnet_scheduler="round-robin";
+# number of routers in group
+   num_routers="4";
+# number of groups in the network
+   num_groups="9";
+# buffer size in bytes for local virtual channels
+   local_vc_size="16384";
+#buffer size in bytes for global virtual channels
+   global_vc_size="16384";
+#buffer size in bytes for compute node virtual channels
+   cn_vc_size="32768";
+#bandwidth in GiB/s for local channels
+   local_bandwidth="2.0";
+# bandwidth in GiB/s for global channels
+   global_bandwidth="2.0";
+# bandwidth in GiB/s for compute node-router channels
+   cn_bandwidth="2.0";
+# ROSS message size
+   message_size="736";
+# number of compute nodes connected to router, dictated by dragonfly config
+# file
+   num_cns_per_router="2";
+# number of global channels per router
+   num_global_channels="2";
+# network config file for intra-group connections
+   intra-group-connections="${PATH_TO_CODES_SRC}/src/network-workloads/conf/dragonfly-dally/dfdally-72-intra";
+# network config file for inter-group connections
+   inter-group-connections="${PATH_TO_CODES_SRC}/src/network-workloads/conf/dragonfly-dally/dfdally-72-inter";
+# routing protocol to be used
+   routing="prog-adaptive";
+# folder path to store packet latency from terminal to terminal, if no value is given it won't save anything
+   save_packet_latency_path="${PACKET_LATENCY_PATH}";
+# router buffer occupancy snapshots
+   router_buffer_snapshots=( ${BUFFER_SNAPSHOTS} );
+}
+SURROGATE {
+# determines the director switching from surrogate to high-def simulation strategy
+   director_mode="at-fixed-virtual-times";
+
+# director configuration for: director_mode == "at-fixed-virtual-times"
+# timestamps at which to switch to surrogate-mode and back
+   fixed_switch_timestamps=( ${SWITCH_TIMESTAMPS} );
+
+# latency predictor to use
+   packet_latency_predictor="average";
+# some workload models need some time to stabilize, a point where the network behaviour stabilizes. The predictor will ignore all packet latencies that arrive during this period
+   ignore_until="${IGNORE_UNTIL}";
+
+# selecting network treatment on switching to surrogate
+   network_treatment_on_switch="${NETWORK_TREATMENT}";
+}
diff --git a/scripts/reproducibility-pads23/experiments/conf-files/terminal-dragonfly-72-v5.conf.in b/scripts/reproducibility-pads23/experiments/conf-files/terminal-dragonfly-72-v5.conf.in
new file mode 100644
index 00000000..9c833f43
--- /dev/null
+++ b/scripts/reproducibility-pads23/experiments/conf-files/terminal-dragonfly-72-v5.conf.in
@@ -0,0 +1,57 @@
+LPGROUPS
+{
+   MODELNET_GRP
+   {
+      repetitions="36";
+# name of this lp changes according to the model
+      nw-lp="2";
+# these lp names will be the same for dragonfly-custom model
+      modelnet_dragonfly_dally="2";
+      modelnet_dragonfly_dally_router="1";
+   }
+}
+PARAMS
+{
+# packet size in the network
+   packet_size="4096";
+   modelnet_order=( "dragonfly_dally","dragonfly_dally_router" );
+   # scheduler options
+   modelnet_scheduler="fcfs";
+# chunk size in the network (when chunk size = packet size, packets will not be
+# divided into chunks)
+   chunk_size="${CHUNK_SIZE}";
+# modelnet_scheduler="round-robin";
+# number of routers in group
+   num_routers="4";
+# number of groups in the network
+   num_groups="9";
+# buffer size in bytes for local virtual channels
+   local_vc_size="16384";
+#buffer size in bytes for global virtual channels
+   global_vc_size="16384";
+#buffer size in bytes for compute node virtual channels
+   cn_vc_size="32768";
+#bandwidth in GiB/s for local channels
+   local_bandwidth="2.0";
+# bandwidth in GiB/s for global channels
+   global_bandwidth="2.0";
+# bandwidth in GiB/s for compute node-router channels
+   cn_bandwidth="2.0";
+# ROSS message size
+   message_size="736";
+# number of compute nodes connected to router, dictated by dragonfly config
+# file
+   num_cns_per_router="2";
+# number of global channels per router
+   num_global_channels="2";
+# network config file for intra-group connections
+   intra-group-connections="${PATH_TO_CODES_SRC}/src/network-workloads/conf/dragonfly-dally/dfdally-72-intra";
+# network config file for inter-group connections
+   inter-group-connections="${PATH_TO_CODES_SRC}/src/network-workloads/conf/dragonfly-dally/dfdally-72-inter";
+# routing protocol to be used
+   routing="prog-adaptive";
+# folder path to store packet latency from terminal to terminal, if no value is given it won't save anything
+   save_packet_latency_path="${PACKET_LATENCY_PATH}";
+# router buffer occupancy snapshots
+   router_buffer_snapshots=( ${BUFFER_SNAPSHOTS} );
+}
diff --git a/scripts/reproducibility-pads23/experiments/mpi-replay_72-node-dragonfly_synthetic1-100ms.sh b/scripts/reproducibility-pads23/experiments/mpi-replay_72-node-dragonfly_synthetic1-100ms.sh
new file mode 100644
index 00000000..fb407ebd
--- /dev/null
+++ b/scripts/reproducibility-pads23/experiments/mpi-replay_72-node-dragonfly_synthetic1-100ms.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/bash -x
+
+np=3
+
+# CONFIGURATION
+# exported env variables are to be used by `envsubst` below
+PATH_TO_CODES_BUILD="$1"
+export PATH_TO_CODES_SRC="$2"
+CONF_FILE_TEMPLATES="$3"
+export CHUNK_SIZE=64
+
+# configuration file for high-fidelity codes
+export BUFFER_SNAPSHOTS='"1e6", "2e6", "3e6", "4e6", "5e6", "6e6", "7e6", "8e6", "9e6", "10e6", "11e6", "12e6", "13e6", "14e6", "15e6", "16e6", "17e6", "18e6", "19e6", "20e6", "21e6", "22e6", "23e6", "24e6", "25e6", "26e6", "27e6", "28e6", "29e6", "30e6", "31e6", "32e6", "33e6", "34e6", "35e6", "36e6", "37e6", "38e6", "39e6", "40e6", "41e6", "42e6", "43e6", "44e6", "45e6", "46e6", "47e6", "48e6", "49e6", "50e6", "51e6", "52e6", "53e6", "54e6", "55e6", "56e6", "57e6", "58e6", "59e6", "60e6", "61e6", "62e6", "63e6", "64e6", "65e6", "66e6", "67e6", "68e6", "69e6", "70e6", "71e6", "72e6", "73e6", "74e6", "75e6", "76e6", "77e6", "78e6", "79e6", "80e6", "81e6", "82e6", "83e6", "84e6", "85e6", "86e6", "87e6", "88e6", "89e6", "90e6", "91e6", "92e6", "93e6", "94e6", "95e6", "96e6", "97e6", "98e6", "99e6", "99.9e6"'
+export PACKET_LATENCY_PATH='high-fidelity/packet-latency-trace'
+cat "$CONF_FILE_TEMPLATES"/terminal-dragonfly-72-v5.conf.in | envsubst > terminal-dragonfly-72.conf
+
+# configuration file for hybrid-lite and hybrid codes
+#export BUFFER_SNAPSHOTS='"1e6", "2e6", "3e6", "4e6", "5e6", "6e6", "7e6", "8e6", "9e6", "10e6", "11e6", "12e6", "13e6", "14e6", "15e6", "16e6", "17e6", "18e6", "19e6", "91e6", "92e6", "93e6", "94e6", "95e6", "96e6", "97e6", "98e6", "99e6", "99.9e6"'
+export IGNORE_UNTIL=10e6
+export SWITCH_TIMESTAMPS='"20e6", "90e6"'
+export NETWORK_TREATMENT=freeze
+export PACKET_LATENCY_PATH='hybrid/packet-latency-trace'
+cat "$CONF_FILE_TEMPLATES"/terminal-dragonfly-72-surrogate-v5.conf.in | envsubst > terminal-dragonfly-72-hybrid.conf
+
+# configuration file for hybrid-lite
+export NETWORK_TREATMENT=nothing
+export PACKET_LATENCY_PATH='hybrid-lite/packet-latency-trace'
+cat "$CONF_FILE_TEMPLATES"/terminal-dragonfly-72-surrogate-v5.conf.in | envsubst > terminal-dragonfly-72-hybrid-lite.conf
+
+# yet more configuration files
+cp "$CONF_FILE_TEMPLATES"/72-dragonfly-full.alloc .
+
+# creating dirs
+mkdir -p high-fidelity hybrid hybrid-lite
+
+# RUNNING SIMULATION
+period=480
+
+# Creating custom/individual configuration files
+work_alloc_file="72-dragonfly-period=${period}.synthetic.conf"
+cat > "$work_alloc_file" <<END
+72 synthetic1 0 ${period}
+END
+
+lookahead=200
+# Note: --extramem is only required for simulations with a very short period as they generate many, many, many events (and they keep on accumulating)
+extramem=10000
+
+# RUNNING CODES
+# Note: cons-lookahead is used as the offset to process packet latency events (the event is scheduled back to the sender, thus a smaller offset will force GVT more often; too large of an offset and the predictor will be behind significantly)
+mpirun -np $np \
+  "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay --synch=3 \
+     --workload_type=online --workload_conf_file="$work_alloc_file" \
+     --cons-lookahead=$lookahead --max-opt-lookahead=${lookahead%.*} --batch=4 --gvt-interval=256 \
+     --alloc_file=72-dragonfly-full.alloc --end='100.001e6' \
+     --extramem=$extramem --lp-io-dir=high-fidelity/codes-output \
+     -- terminal-dragonfly-72.conf > high-fidelity/model-result.txt 2> high-fidelity/model-result.stderr.txt
+
+# RUNNING CODES with SURROGATE MODEL
+mpirun -np $np \
+  "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay --synch=3 \
+     --workload_type=online --workload_conf_file="$work_alloc_file" \
+     --cons-lookahead=$lookahead --max-opt-lookahead=${lookahead%.*} --batch=4 --gvt-interval=256 \
+     --alloc_file=72-dragonfly-full.alloc --end='100.001e6' \
+     --extramem=$extramem --lp-io-dir=hybrid/codes-output \
+     -- terminal-dragonfly-72-hybrid.conf > hybrid/model-result.txt 2> hybrid/model-result.stderr.txt
+
+# SAME AS BEFORE BUT NONFREEZING
+mpirun -np $np \
+  "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay --synch=3 \
+     --workload_type=online --workload_conf_file="$work_alloc_file" \
+     --cons-lookahead=$lookahead --max-opt-lookahead=${lookahead%.*} --batch=4 --gvt-interval=256 \
+     --alloc_file=72-dragonfly-full.alloc --end='100.001e6' \
+     --extramem=$extramem --lp-io-dir=hybrid-lite/codes-output \
+     -- terminal-dragonfly-72-hybrid-lite.conf > hybrid-lite/model-result.txt 2> hybrid-lite/model-result.stderr.txt
diff --git a/scripts/reproducibility-pads23/experiments/mpi-replay_72-node-dragonfly_synthetic1-10ms.sh b/scripts/reproducibility-pads23/experiments/mpi-replay_72-node-dragonfly_synthetic1-10ms.sh
new file mode 100644
index 00000000..2b920be8
--- /dev/null
+++ b/scripts/reproducibility-pads23/experiments/mpi-replay_72-node-dragonfly_synthetic1-10ms.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/bash -x
+
+np=3
+
+# CONFIGURATION
+# exported env variables are to be used by `envsubst` below
+PATH_TO_CODES_BUILD="$1"
+export PATH_TO_CODES_SRC="$2"
+CONF_FILE_TEMPLATES="$3"
+export CHUNK_SIZE=64
+
+# configuration file for high-fidelity codes
+export BUFFER_SNAPSHOTS='"100e3", "200e3", "300e3", "400e3", "500e3", "600e3", "700e3", "800e3", "900e3", "1e6", "1.1e6", "1.2e6", "1.3e6", "1.4e6", "1.5e6", "1.6e6", "1.7e6", "1.8e6", "1.9e6", "2e6", "2.1e6", "2.2e6", "2.3e6", "2.4e6", "2.5e6", "2.6e6", "2.7e6", "2.8e6", "2.9e6", "3e6", "3.1e6", "3.2e6", "3.3e6", "3.4e6", "3.5e6", "3.6e6", "3.7e6", "3.8e6", "3.9e6", "4e6", "4.1e6", "4.2e6", "4.3e6", "4.4e6", "4.5e6", "4.6e6", "4.7e6", "4.8e6", "4.9e6", "5e6", "5.1e6", "5.2e6", "5.3e6", "5.4e6", "5.5e6", "5.6e6", "5.7e6", "5.8e6", "5.9e6", "6e6", "6.1e6", "6.2e6", "6.3e6", "6.4e6", "6.5e6", "6.6e6", "6.7e6", "6.8e6", "6.9e6", "7e6", "7.1e6", "7.2e6", "7.3e6", "7.4e6", "7.5e6", "7.6e6", "7.7e6", "7.8e6", "7.9e6", "8e6", "8.1e6", "8.2e6", "8.3e6", "8.4e6", "8.5e6", "8.6e6", "8.7e6", "8.8e6", "8.9e6", "9e6", "9.1e6", "9.2e6", "9.3e6", "9.4e6", "9.5e6", "9.6e6", "9.7e6", "9.8e6", "9.9e6", "9.990e6"'
+export PACKET_LATENCY_PATH='high-fidelity/packet-latency-trace'
+cat "$CONF_FILE_TEMPLATES"/terminal-dragonfly-72-v5.conf.in | envsubst > terminal-dragonfly-72.conf
+
+# configuration file for hybrid-lite and hybrid codes
+export IGNORE_UNTIL=2000e3
+export SWITCH_TIMESTAMPS='"3000e3", "8000e3"'
+export NETWORK_TREATMENT=freeze
+export PACKET_LATENCY_PATH='hybrid/packet-latency-trace'
+cat "$CONF_FILE_TEMPLATES"/terminal-dragonfly-72-surrogate-v5.conf.in | envsubst > terminal-dragonfly-72-hybrid.conf
+
+# configuration file for hybrid-lite
+export NETWORK_TREATMENT=nothing
+export PACKET_LATENCY_PATH='hybrid-lite/packet-latency-trace'
+cat "$CONF_FILE_TEMPLATES"/terminal-dragonfly-72-surrogate-v5.conf.in | envsubst > terminal-dragonfly-72-hybrid-lite.conf
+
+# yet more configuration files
+cp "$CONF_FILE_TEMPLATES"/72-dragonfly-full.alloc .
+
+# creating dirs
+mkdir -p high-fidelity hybrid hybrid-lite
+
+# RUNNING SIMULATION
+period=480
+
+# Creating custom/individual configuration files
+work_alloc_file="72-dragonfly-period=${period}.synthetic.conf"
+cat > "$work_alloc_file" <<END
+72 synthetic1 0 ${period}
+END
+
+lookahead=200
+# Note: --extramem is only required for simulations with a very short period as they generate many, many, many events (and they keep on accumulating)
+extramem=10000
+
+# RUNNING CODES
+# Note: cons-lookahead is used as the offset to process packet latency events (the event is scheduled back to the sender, thus a smaller offset will force GVT more often; too large of an offset and the predictor will be behind significantly)
+mpirun -np $np \
+  "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay --synch=3 \
+     --workload_type=online --workload_conf_file="$work_alloc_file" \
+     --cons-lookahead=$lookahead --max-opt-lookahead=${lookahead%.*} --batch=4 --gvt-interval=256 \
+     --alloc_file=72-dragonfly-full.alloc --end=10000.01e3 \
+     --extramem=$extramem --lp-io-dir=high-fidelity/codes-output \
+     -- terminal-dragonfly-72.conf > high-fidelity/model-result.txt 2> high-fidelity/model-result.stderr.txt
+
+# RUNNING CODES with SURROGATE MODEL
+mpirun -np $np \
+  "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay --synch=3 \
+     --workload_type=online --workload_conf_file="$work_alloc_file" \
+     --cons-lookahead=$lookahead --max-opt-lookahead=${lookahead%.*} --batch=4 --gvt-interval=256 \
+     --alloc_file=72-dragonfly-full.alloc --end=10000.01e3 \
+     --extramem=$extramem --lp-io-dir=hybrid/codes-output \
+     -- terminal-dragonfly-72-hybrid.conf > hybrid/model-result.txt 2> hybrid/model-result.stderr.txt
+
+# SAME AS BEFORE BUT NONFREEZING
+mpirun -np $np \
+  "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay --synch=3 \
+     --workload_type=online --workload_conf_file="$work_alloc_file" \
+     --cons-lookahead=$lookahead --max-opt-lookahead=${lookahead%.*} --batch=4 --gvt-interval=256 \
+     --alloc_file=72-dragonfly-full.alloc --end=10000.01e3 \
+     --extramem=$extramem --lp-io-dir=hybrid-lite/codes-output \
+     -- terminal-dragonfly-72-hybrid-lite.conf > hybrid-lite/model-result.txt 2> hybrid-lite/model-result.stderr.txt
diff --git a/scripts/reproducibility-pads23/python-scripts/delay-in-window.py b/scripts/reproducibility-pads23/python-scripts/delay-in-window.py
new file mode 100644
index 00000000..b59061de
--- /dev/null
+++ b/scripts/reproducibility-pads23/python-scripts/delay-in-window.py
@@ -0,0 +1,138 @@
+from __future__ import annotations
+
+import glob
+import sys
+import fileinput
+import pathlib
+from typing import Any
+import argparse
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+ndarray = np.ndarray[Any, np.dtype[np.float64]]
+
+
+def collect_data_numpy(
+    path: pathlib.Path | str,
+    filepreffix: str,
+    delimiter: str | None = None,
+    dtype: Any = int
+) -> np.ndarray[Any, Any]:
+    escaped_path = pathlib.Path(glob.escape(path))  # type: ignore
+    stat_files = glob.glob(str(escaped_path / f"{filepreffix}-gid=*.txt"))
+    if not stat_files:
+        print(f"No valid `{filepreffix}` files have been found in path {path}", file=sys.stderr)
+        exit(1)
+
+    return np.loadtxt(fileinput.input(stat_files), delimiter=delimiter, dtype=dtype,
+                      comments='#')
+
+
+def mean_and_std(array: ndarray) -> tuple[float, float]:
+    return np.mean(array), np.std(array)  # type: ignore
+
+
+def find_mean_and_std_through_window(
+    delays: ndarray,
+    n_windows: int = 100,
+    start_time: float = 0.0,
+    end_time: float | None = None,
+    start_time_col: int = 8,
+    delay_col: int = 9,
+) -> tuple[ndarray, ndarray, ndarray]:
+
+    if end_time is None:
+        end_time = delays[:, start_time_col].max()
+
+    window_size = (end_time - start_time) / n_windows
+    windows = window_size * (np.arange(n_windows) + 1)
+    mean_and_std_through_windows = np.zeros((n_windows, 2))
+    for i in range(n_windows):
+        delays_within_window = np.bitwise_and(i * window_size <= delays[:, start_time_col],
+                                              delays[:, start_time_col] < (i+1) * window_size)
+        if delays_within_window.sum() > 0:
+            mean_and_std_through_windows[i] = mean_and_std(delays[delays_within_window, delay_col])
+        else:
+            mean_and_std_through_windows[i] = -1
+
+    last_good, = np.where(mean_and_std_through_windows[:, 0] == -1)
+    if last_good.size > 0:
+        windows = windows[:last_good[0]]
+        mean_and_std_through_windows = mean_and_std_through_windows[:last_good[0]]
+
+    return windows, mean_and_std_through_windows[:, 0], mean_and_std_through_windows[:, 1]
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--latencies', type=pathlib.Path, help='Folder to latencies',
+                        required=True)
+    parser.add_argument('--output', type=pathlib.Path, help='Directory to save aggregated stats',
+                        required=True)
+    parser.add_argument('--windows', type=int, help='Total windows to break simulation in',
+                        default=100)
+    parser.add_argument('--end', type=float, help='Total (virtual) simulation time',
+                        required=True)
+    args = parser.parse_args()
+
+    # experiment = 'vanilla-synthetic1-10ms'  # name of experiment
+    plotting = False
+    dist_type = 'all'  # options: all, same_router, same_group, other_group
+    computing = True
+    loading = not computing
+    raw_data = True
+    # end_time = 10e6  # 10 ms
+    # end_time = 100e6  # 100 ms
+    end_time = args.end
+    # n_windows = 100
+    n_windows = args.windows
+
+    # out_file_name = f"{experiment}_windowed_packet_latency_{dist_type}.npz"
+    out_file_name = f"{args.output}.npz"
+
+    if computing:
+        if raw_data:
+            # Columns within the csv file that matter to us
+            start_time_col = 8
+            delay_col = 10
+            delays = collect_data_numpy(args.latencies, 'packets-delay', delimiter=',',
+                                        dtype=np.dtype('float'))
+        else:
+            start_time_col = 8
+            delay_col = 9
+            delays = np.loadtxt("packets-delay.csv", skiprows=1, delimiter=",")
+
+        # Delays distributions
+        if dist_type != 'all':
+            delays_same_router = (delays[:, 0] // 2) == (delays[:, 1] // 2)
+            delays_same_group = np.bitwise_xor(
+                (delays[:, 0] // 8) == (delays[:, 1] // 8),
+                delays_same_router)
+            delays_out_group = (delays[:, 0] // 8) != (delays[:, 1] // 8)
+
+            # Selecting which distribution to display
+            if dist_type == 'same_router':
+                distribution = delays_same_router
+            elif dist_type == 'same_group':
+                distribution = delays_same_group
+            elif dist_type == 'other_group':
+                distribution = delays_out_group
+
+        # Computing windowed mean and stds + plotting
+        windows, means, stds = find_mean_and_std_through_window(
+            delays if dist_type == 'all' else delays[distribution],
+            n_windows=n_windows, delay_col=delay_col, end_time=end_time)
+
+        # Save
+        np.savez(out_file_name,
+                 windows=windows, means=means, stds=stds)
+
+    if loading:
+        data = np.load(out_file_name)
+        windows, means, stds = data['windows'], data['means'], data['stds']
+
+    if plotting:
+        plt.errorbar(windows, means, yerr=.2*stds)
+        plt.show()
diff --git a/scripts/reproducibility-pads23/python-scripts/generate-table.py b/scripts/reproducibility-pads23/python-scripts/generate-table.py
new file mode 100644
index 00000000..58a73bee
--- /dev/null
+++ b/scripts/reproducibility-pads23/python-scripts/generate-table.py
@@ -0,0 +1,110 @@
+from __future__ import annotations
+
+import argparse
+import pathlib
+from subprocess import check_output
+from glob import glob
+import csv
+
+import numpy as np
+
+
+def determine_mse(
+    condensed: pathlib.Path, cut_off: int = 80, check_last: bool = True
+) -> tuple[float, float]:
+    """Returns in us**2 (not ns**2)"""
+    data_high_fidelity = np.load(f"{condensed}/packet_latency-high-fidelity.npz")
+    data_hybrid = np.load(f"{condensed}/packet_latency-hybrid.npz")
+    data_hybrid_lite = np.load(f"{condensed}/packet_latency-hybrid-lite.npz")
+
+    windows_hf, means_hf = data_high_fidelity['windows'], data_high_fidelity['means']
+    windows_hybrid, means_hybrid = data_hybrid['windows'], data_hybrid['means']
+    means_hybrid_lite = data_hybrid_lite['means']
+
+    assert np.all(windows_hf == windows_hybrid)
+    if check_last:
+        n_windows = windows_hf.shape[0]
+        means_hybrid_lite = means_hybrid_lite[:n_windows]
+
+    n = means_hf[cut_off:].shape[0]
+    mse_hybrid_lite = np.sum((means_hf[cut_off:] - means_hybrid_lite[cut_off:])**2) / n
+    mse_hybrid = np.sum((means_hf[cut_off:] - means_hybrid[cut_off:])**2) / n
+
+    return mse_hybrid / 1e6, mse_hybrid_lite / 1e6
+
+
+def get_runtimes(path: pathlib.Path) -> tuple[float, float, float]:
+    with open(path, newline='') as f:
+        reader = csv.reader(f)
+        csv_file = [row for row in reader]
+
+    assert len(csv_file) == 4
+    assert csv_file[0][8] == 'runtime'
+    return float(csv_file[1][8]), float(csv_file[2][8]), float(csv_file[3][8])
+
+
+def get_total_packets(latencies_dir: pathlib.Path) -> int:
+    out = check_output(
+        ['wc', '-l', '--total=always'] + glob(str(latencies_dir / "packets-delay-*"))
+    ).split()
+    assert out[-1] == b'total'
+    return int(out[-2])
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--folder-10ms', type=pathlib.Path, required=True,
+                        help='Execution folder for 10 ms')
+    parser.add_argument('--folder-100ms', type=pathlib.Path, required=True,
+                        help='Execution folder for 100 ms')
+    args = parser.parse_args()
+
+    packets_hf = get_total_packets(args.folder_10ms / 'high-fidelity' / 'packet-latency-trace')
+    packets_hybrid = get_total_packets(args.folder_10ms / 'hybrid' / 'packet-latency-trace')
+    packets_hybrid_lite = get_total_packets(
+        args.folder_10ms / 'hybrid-lite' / 'packet-latency-trace')
+    throughput_hf = packets_hf * 1024 / 1024**3 * 100
+    throughput_hybrid = packets_hybrid * 1024 / 1024**3 * 100
+    throughput_hybrid_lite = packets_hybrid_lite * 1024 / 1024**3 * 100
+    runtime_hf, runtime_hybrid, runtime_hybrid_lite = get_runtimes(args.folder_10ms / 'ross.csv')
+    throughput_hybrid_dis = (throughput_hybrid / throughput_hf - 1) * 100
+    throughput_hybrid_lite_dis = (throughput_hybrid_lite / throughput_hf - 1) * 100
+
+    mse_hybrid, mse_hybrid_lite = determine_mse(args.folder_10ms / 'condensed')
+    print("10 ms Results")
+    print("Throughput (GB/s) high-fidelity:", throughput_hf)
+    print("Throughput (GB/s) hybrid:", throughput_hybrid)
+    print("Throughput (GB/s) hybrid-lite:", throughput_hybrid_lite)
+    print("Throughput (%) hybrid discrepancy:", throughput_hybrid_dis)
+    print("Throughput (%) hybrid-lite discrepancy:", throughput_hybrid_lite_dis)
+    print("Runtime (s) high-fidelity:", runtime_hf)
+    print("Runtime (s) hybrid:", runtime_hybrid)
+    print("Runtime (s) hybrid-lite:", runtime_hybrid_lite)
+    print("Mean squared error (MSE) for hybrid:", mse_hybrid, "ns^2")
+    print("Mean squared error (MSE) for hybrid-lite:", mse_hybrid_lite, "ns^2")
+    print()
+
+    packets_hf = get_total_packets(args.folder_100ms / 'high-fidelity' / 'packet-latency-trace')
+    packets_hybrid = get_total_packets(args.folder_100ms / 'hybrid' / 'packet-latency-trace')
+    packets_hybrid_lite = get_total_packets(
+        args.folder_100ms / 'hybrid-lite' / 'packet-latency-trace')
+    throughput_hf = packets_hf * 1024 / 1024**3 * 10
+    throughput_hybrid = packets_hybrid * 1024 / 1024**3 * 10
+    throughput_hybrid_lite = packets_hybrid_lite * 1024 / 1024**3 * 10
+    runtime_hf, runtime_hybrid, runtime_hybrid_lite = get_runtimes(args.folder_100ms / 'ross.csv')
+    throughput_hybrid_dis = (throughput_hybrid / throughput_hf - 1) * 100
+    throughput_hybrid_lite_dis = (throughput_hybrid_lite / throughput_hf - 1) * 100
+
+    print("100 ms Results")
+    print("Throughput (GB/s) high-fidelity:", throughput_hf)
+    print("Throughput (GB/s) hybrid:", throughput_hybrid)
+    print("Throughput (GB/s) hybrid-lite:", throughput_hybrid_lite)
+    print("Throughput (%) hybrid discrepancy:", throughput_hybrid_dis)
+    print("Throughput (%) hybrid-lite discrepancy:", throughput_hybrid_lite_dis)
+    print("Runtime (s) high-fidelity:", runtime_hf)
+    print("Runtime (s) hybrid:", runtime_hybrid)
+    print("Runtime (s) hybrid-lite:", runtime_hybrid_lite)
+    mse_hybrid, mse_hybrid_lite = determine_mse(args.folder_100ms / 'condensed',
+                                                cut_off=90, check_last=False)
+    print("Mean squared error (MSE) for hybrid:", mse_hybrid, "ns^2")
+    print("Mean squared error (MSE) for hybrid-lite:", mse_hybrid_lite, "ns^2")
diff --git a/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py b/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
new file mode 100644
index 00000000..fe7b424e
--- /dev/null
+++ b/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
@@ -0,0 +1,255 @@
+from __future__ import annotations
+
+import argparse
+import pathlib
+
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib
+from matplotlib.ticker import EngFormatter
+
+
+time_formatter_ns = EngFormatter()
+time_formatter_ns.ENG_PREFIXES = {0: 'ns', 3: 'us', 6: 'ms', 9: 's'}
+
+
+if True and __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--latencies', type=pathlib.Path, help='Folder with condensed latencies',
+                        required=True)
+    parser.add_argument('--output', type=pathlib.Path, help='Name of output figure',
+                        required=True)
+    args = parser.parse_args()
+
+    latex = True
+
+    if latex:
+        matplotlib.use("pgf")
+        matplotlib.rcParams.update({
+            "pgf.texsystem": "pdflatex",
+            'font.family': 'serif',
+            'font.size': 16,
+            'text.usetex': True,
+            'pgf.rcfonts': False,
+        })
+
+    data_high_fidelity = np.load(f"{args.latencies}/packet_latency-high-fidelity.npz")
+    data_hybrid = np.load(f"{args.latencies}/packet_latency-hybrid.npz")
+    data_hybrid_lite = np.load(f"{args.latencies}/packet_latency-hybrid-lite.npz")
+
+    windows_hf, means_hf, stds_hf = \
+        data_high_fidelity['windows'], data_high_fidelity['means'], data_high_fidelity['stds']
+    windows_hybrid, means_hybrid, stds_hybrid = \
+        data_hybrid['windows'], data_hybrid['means'], data_hybrid['stds']
+    windows_hybrid_lite, means_hybrid_lite, stds_hybrid_lite = \
+        data_hybrid_lite['windows'], data_hybrid_lite['means'], data_hybrid_lite['stds']
+
+    assert np.all(windows_hf == windows_hybrid)
+    n_windows = windows_hf.shape[0]
+    windows_hybrid_lite = windows_hybrid_lite[:n_windows]
+    means_hybrid_lite = means_hybrid_lite[:n_windows]
+    stds_hybrid_lite = stds_hybrid_lite[:n_windows]
+    assert np.all(windows_hybrid_lite == windows_hybrid)
+
+    std_factor = 0.2
+
+    fig, ax = plt.subplots(figsize=(7, 3.8))
+    ax.vlines = ax.vlines([2e6, 3e6, 8e6], -3e3, 125e3, color='#AAA', ls='-')
+    ax.vlines.set_clip_on(False)
+
+    arrow_color = {'arrowprops': dict(arrowstyle="->", color='#AAA'), 'color': '#333'}
+    ax.annotate("", xy=(1.95e6, 80e3), xytext=(1.0e6, 98e3), **arrow_color)
+    ax.annotate("switch", xy=(3.1e6, 118e3), xytext=(4.8e6, 105e3), **arrow_color)
+    ax.annotate("", xy=(7.9e6, 118e3), xytext=(6.0e6, 110e3), **arrow_color)
+    ax.text(1.9e6, 1e5, "start\ntracking", color='#333', ha='right')
+
+    # plt.errorbar(windows_hf, means_hf, yerr=std_factor*stds_hf)
+    # plt.errorbar(windows_hybrid, means_hybrid, yerr=std_factor*stds_hybrid)
+    # plt.errorbar(windows_hybrid_lite, means_hybrid_lite,
+    #              yerr=std_factor*stds_hybrid_lite)
+    ax.plot(windows_hf, means_hf, label='high-fidelity only')
+    ax.fill_between(windows_hf,
+                    means_hf - std_factor*stds_hybrid,
+                    means_hf + std_factor*stds_hybrid,
+                    color='#00F5')
+    ax.plot(windows_hybrid_lite, means_hybrid_lite, label='hybrid-lite')
+    ax.fill_between(windows_hybrid_lite,
+                    means_hybrid_lite - std_factor*stds_hybrid,
+                    means_hybrid_lite + std_factor*stds_hybrid,
+                    color='#F005')
+    ax.plot(windows_hybrid, means_hybrid, label='hybrid')
+    ax.fill_between(windows_hybrid,
+                    means_hybrid - std_factor*stds_hybrid,
+                    means_hybrid + std_factor*stds_hybrid,
+                    color='#0F05')
+
+    # ax.text(2e6, 125e3, "start latency tracking", color='#333', rotation=40,
+    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+    # ax.text(3e6, 125e3, "switch to surrogate", color='#333', rotation=40,
+    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+    # ax.text(8e6, 130e3, "switch to\nhigh-definition", color='#333', rotation=40,
+    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+
+    ax.set_xlabel('Virtual time')
+    ax.set_ylabel('Average Packet Latency')
+    ax.set_ylim(0, 122e3)
+    ax.legend(bbox_to_anchor=(.54, .02), loc='lower center', borderaxespad=0)
+    ax.yaxis.set_major_formatter(time_formatter_ns)
+    ax.xaxis.set_major_formatter(time_formatter_ns)
+
+    n = means_hf[80:].shape[0]
+    mse_hybrid_lite = \
+        np.sum((means_hf[80:] - means_hybrid_lite[80:])**2) / n
+    mse_hybrid = \
+        np.sum((means_hf[80:] - means_hybrid[80:])**2) / n
+    print("Mean squared error (MSE) for hybrid:", mse_hybrid, "ns^2")
+    print("Mean squared error (MSE) for hybrid-lite:", mse_hybrid_lite, "ns^2")
+
+    if latex:
+        plt.tight_layout()
+        plt.savefig(f'{args.output}.pgf', bbox_inches='tight')
+        plt.savefig(f'{args.output}.pdf', bbox_inches='tight')
+    else:
+        plt.show()
+
+
+if False and __name__ == '__main__':
+    data_high_fidelity = \
+        np.load("data/vanilla-synthetic1-100ms_windowed_packet_latency_all.npz")
+    data_hybrid = \
+        np.load("data/surrogate-freezing-synthetic1-100ms_windowed_packet_latency_all.npz")
+    data_hybrid_lite = \
+        np.load("data/surrogate-nonfrozen-synthetic1-100ms_windowed_packet_latency_all.npz")
+
+    windows_hf, means_hf, stds_hf = \
+        data_high_fidelity['windows'], data_high_fidelity['means'], data_high_fidelity['stds']
+    windows_hybrid, means_hybrid, stds_hybrid = \
+        data_hybrid['windows'], data_hybrid['means'], data_hybrid['stds']
+    windows_hybrid_lite, means_hybrid_lite, stds_hybrid_lite = \
+        data_hybrid_lite['windows'], data_hybrid_lite['means'], data_hybrid_lite['stds']
+
+    assert np.all(windows_hf == windows_hybrid) \
+        and np.all(windows_hybrid_lite == windows_hybrid)
+
+    std_factor = 0.2
+
+    fig, ax = plt.subplots(figsize=(7, 6))
+    # ax.vlines = ax.vlines([2e6, 3e6, 8e6], -3e3, 125e3, color='#AAA', ls='-')
+    # ax.vlines.set_clip_on(False)
+
+    ax.plot(windows_hf, means_hf, label='high-fidelity only')
+    ax.fill_between(windows_hf,
+                    means_hf - std_factor*stds_hybrid,
+                    means_hf + std_factor*stds_hybrid,
+                    color='#00F5')
+    ax.plot(windows_hybrid_lite, means_hybrid_lite, label='hybrid-lite')
+    ax.fill_between(windows_hybrid_lite,
+                    means_hybrid_lite - std_factor*stds_hybrid,
+                    means_hybrid_lite + std_factor*stds_hybrid,
+                    color='#F005')
+    ax.plot(windows_hybrid, means_hybrid, label='hybrid')
+    ax.fill_between(windows_hybrid,
+                    means_hybrid - std_factor*stds_hybrid,
+                    means_hybrid + std_factor*stds_hybrid,
+                    color='#0F05')
+
+    ax.yaxis.set_major_formatter(time_formatter_ns)
+    ax.xaxis.set_major_formatter(time_formatter_ns)
+
+    n = means_hf[90:].shape[0]
+    mse_hybrid_lite = \
+        np.sum((means_hf[90:] - means_hybrid_lite[90:])**2) / n
+    mse_hybrid = \
+        np.sum((means_hf[90:] - means_hybrid[90:])**2) / n
+    print("Mean squared error (MSE) for hybrid:", mse_hybrid, "ns^2")
+    print("Mean squared error (MSE) for hybrid-lite:", mse_hybrid_lite, "ns^2")
+
+    plt.show()
+
+
+if False and __name__ == '__main__':
+    latex = True
+
+    if latex:
+        matplotlib.use("pgf")
+        matplotlib.rcParams.update({
+            "pgf.texsystem": "pdflatex",
+            'font.family': 'serif',
+            'font.size': 16,
+            'text.usetex': True,
+            'pgf.rcfonts': False,
+        })
+
+    data_high_fidelity = \
+        np.load("data/vanilla-ping-pong-10ms_windowed_packet_latency_all.npz")
+    data_hybrid = \
+        np.load("data/surrogate-freezing-ping-pong-10ms_windowed_packet_latency_all.npz")
+    data_hybrid_lite = \
+        np.load("data/surrogate-nonfrozen-ping-pong-10ms_windowed_packet_latency_all.npz")
+
+    windows_hf, means_hf, stds_hf = \
+        data_high_fidelity['windows'], data_high_fidelity['means'], data_high_fidelity['stds']
+    windows_hybrid, means_hybrid, stds_hybrid = \
+        data_hybrid['windows'], data_hybrid['means'], data_hybrid['stds']
+    windows_hybrid_lite, means_hybrid_lite, stds_hybrid_lite = \
+        data_hybrid_lite['windows'], data_hybrid_lite['means'], data_hybrid_lite['stds']
+
+    assert np.all(windows_hf == windows_hybrid) \
+        and np.all(windows_hybrid_lite == windows_hybrid)
+
+    std_factor = 0.2
+
+    fig, ax = plt.subplots(figsize=(7, 3.8))
+    ax.vlines = ax.vlines([0, 1e6, 8e6], 2.55e3, 4.45e3, color='#AAA', ls='-')
+    ax.vlines.set_clip_on(False)
+
+    arrow_color = {'arrowprops': dict(arrowstyle="->", color='#AAA'), 'color': '#333'}
+    ax.annotate("", xy=(0.1e6, 2.65e3), xytext=(1.5e6, 2.95e3), **arrow_color)
+    ax.text(1.5e6, 2.95e3, "start latency tracking", color='#333', ha='left')
+    ax.annotate("switch", xy=(1.1e6, 2.65e3), xytext=(4.0e6, 2.75e3), **arrow_color)
+    ax.annotate("", xy=(7.9e6, 2.65e3), xytext=(5.2e6, 2.75e3), **arrow_color)
+
+    ax.plot(windows_hf, means_hf, label='high-fidelity only')
+    ax.fill_between(windows_hf,
+                    means_hf - std_factor*stds_hybrid,
+                    means_hf + std_factor*stds_hybrid,
+                    color='#00F5')
+    ax.plot(windows_hybrid_lite, means_hybrid_lite, label='hybrid-lite')
+    ax.fill_between(windows_hybrid_lite,
+                    means_hybrid_lite - std_factor*stds_hybrid,
+                    means_hybrid_lite + std_factor*stds_hybrid,
+                    color='#F005')
+    ax.plot(windows_hybrid, means_hybrid, label='hybrid')
+    ax.fill_between(windows_hybrid,
+                    means_hybrid - std_factor*stds_hybrid,
+                    means_hybrid + std_factor*stds_hybrid,
+                    color='#0F05')
+
+    # plt.text(0, 4.5e3, "start latency tracking", color='#333', rotation=40,
+    #          rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+    # plt.text(1e6, 4.5e3, "switch to surrogate", color='#333', rotation=40,
+    #          rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+    # plt.text(8e6, 4.5e3, "switch to\nhigh-definition", color='#333', rotation=40,
+    #          rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+
+    ax.set_xlabel('Virtual time')
+    ax.set_ylabel('Average Packet Latency')
+    ax.set_ylim(2.6e3, 4.4e3)
+    ax.legend(bbox_to_anchor=(.50, .28), loc='lower center', borderaxespad=0)
+    ax.yaxis.set_major_formatter(time_formatter_ns)
+    ax.xaxis.set_major_formatter(time_formatter_ns)
+
+    n = means_hf[90:].shape[0]
+    mse_hybrid_lite = \
+        np.sum((means_hf[90:] - means_hybrid_lite[90:])**2) / n
+    mse_hybrid = \
+        np.sum((means_hf[90:] - means_hybrid[90:])**2) / n
+    print("Mean squared error (MSE) for hybrid:", mse_hybrid, "ns^2")
+    print("Mean squared error (MSE) for hybrid-lite:", mse_hybrid_lite, "ns^2")
+
+    if latex:
+        plt.tight_layout()
+        plt.savefig('figures/windowed-delay-ping-pong-10ms.pgf', bbox_inches='tight')
+        plt.savefig('figures/windowed-delay-ping-pong-10ms.pdf', bbox_inches='tight')
+    else:
+        plt.show()
diff --git a/scripts/reproducibility-pads23/python-scripts/port-occupancy.py b/scripts/reproducibility-pads23/python-scripts/port-occupancy.py
new file mode 100644
index 00000000..827f59b3
--- /dev/null
+++ b/scripts/reproducibility-pads23/python-scripts/port-occupancy.py
@@ -0,0 +1,176 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib
+from matplotlib.ticker import EngFormatter
+import pathlib
+import argparse
+
+from typing import Any
+array_type = np.ndarray[Any, Any]
+
+
+time_formatter_ns = EngFormatter()
+time_formatter_ns.ENG_PREFIXES = {0: 'ns', 3: 'us', 6: 'ms', 9: 's'}
+bytes_formater = EngFormatter(unit='B')
+
+
+def load_aggregated_utilization(filename: str | pathlib.Path) -> tuple[array_type, array_type]:
+    port_utilization = np.loadtxt(filename, delimiter=',', dtype=float, skiprows=1)
+
+    # finding all snapshot timestamps
+    timestamps = np.unique(port_utilization[:, 0])
+    assert len(timestamps.shape) == 1
+
+    # Finding total utilization per snapshot
+    total_utilization = np.zeros_like(timestamps)
+    for i, ts in enumerate(timestamps):
+        total_utilization[i] = port_utilization[port_utilization[:, 0] == ts, 2:].sum()
+
+    return timestamps, total_utilization
+
+
+if True and __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--experiment-folder', type=pathlib.Path,
+                        help='Folder where experiment was run',
+                        required=True)
+    parser.add_argument('--output', type=pathlib.Path, help='Name of output figure',
+                        required=True)
+    args = parser.parse_args()
+
+    latex = True
+    dir_data = args.experiment_folder
+    # dir_data = pathlib.Path('data/synthetic1')
+    cut1 = 30
+    cut2 = 79
+
+    if latex:
+        matplotlib.use("pgf")
+        matplotlib.rcParams.update({
+            "pgf.texsystem": "pdflatex",
+            'font.family': 'serif',
+            'font.size': 16,
+            'text.usetex': True,
+            'pgf.rcfonts': False,
+        })
+
+    ts1, utilization_hf = load_aggregated_utilization(
+        dir_data / "high-fidelity" / "codes-output" / "dragonfly-snapshots.csv")
+    ts2, utilization_hybrid = load_aggregated_utilization(
+        dir_data / "hybrid" / "codes-output" / "dragonfly-snapshots.csv")
+    ts3, utilization_hybrid_lite = load_aggregated_utilization(
+        dir_data / "hybrid-lite" / "codes-output" / "dragonfly-snapshots.csv")
+
+    # plotting
+    fig, ax = plt.subplots(figsize=(7, 3.8))
+    vlines = ax.vlines([2e6, 3e6, 8e6], -0.4e6, 7.15e6, color='#AAA', ls='-')
+    vlines.set_clip_on(False)
+
+    arrow_color = {'arrowprops': dict(arrowstyle="->", color='#AAA'), 'color': '#333'}
+    ax.annotate("", xy=(2.1e6, 0e6), xytext=(3.5e6, 1.1e6), **arrow_color)
+    ax.annotate("switch", xy=(3.1e6, 0.1e6), xytext=(4.8e6, 0.5e6), **arrow_color)
+    ax.annotate("", xy=(7.9e6, 0.1e6), xytext=(6.0e6, 0.5e6), **arrow_color)
+    ax.text(3.5e6, 1.1e6, "start latency tracking", color='#333', ha='left')
+
+    ax.plot(ts1, utilization_hf, label="high-fidelity", color='blue')
+
+    ax.plot(ts3[:cut1], utilization_hybrid_lite[:cut1],
+            label="hybrid-lite", color='red')
+    ax.plot(ts3[cut1-1:cut2+1], utilization_hybrid_lite[cut1-1:cut2+1],
+            color='red', ls='--')
+    ax.plot(ts3[cut2:], utilization_hybrid_lite[cut2:], color='red')
+
+    ax.plot(ts2[:cut1], utilization_hybrid[:cut1], label="hybrid",
+            color='green')
+    ax.plot(ts2[cut1-1:cut2+1], utilization_hybrid[cut1-1:cut2+1], color='green', ls='--')
+    ax.plot(ts2[cut2:], utilization_hybrid[cut2:], color='green')
+
+    # ax.text(2e6, 7.4e6, "start latency tracking", color='#333', rotation=40,
+    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+    # ax.text(3e6, 7.4e6, "switch to surrogate", color='#333', rotation=40,
+    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+    # ax.text(8e6, 7.4e6, "switch to\nhigh-definition", color='#333', rotation=40,
+    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+
+    ax.set_xlabel('Virtual time')
+    ax.set_ylabel('Total Buffer Port Occupancy')
+    ax.set_ylim(-0.2e6, 6.9e6)
+    ax.legend(bbox_to_anchor=(.5, .4), loc='lower center', borderaxespad=0)
+    ax.xaxis.set_major_formatter(time_formatter_ns)
+    ax.yaxis.set_major_formatter(bytes_formater)
+
+    if latex:
+        plt.tight_layout()
+        plt.savefig(f'{args.output}.pgf', bbox_inches='tight')
+        plt.savefig(f'{args.output}.pdf', bbox_inches='tight')
+    else:
+        plt.show()
+
+
+if False and __name__ == '__main__':
+    latex = True
+    dir_data = pathlib.Path('data/ping-pong')
+    cut1 = 10
+    cut2 = 79
+
+    if latex:
+        matplotlib.use("pgf")
+        matplotlib.rcParams.update({
+            "pgf.texsystem": "pdflatex",
+            'font.family': 'serif',
+            'font.size': 16,
+            'text.usetex': True,
+            'pgf.rcfonts': False,
+        })
+
+    ts1, utilization_hf = \
+        load_aggregated_utilization(dir_data / "router-snapshots-vanilla.csv")
+    ts2, utilization_hybrid = \
+        load_aggregated_utilization(dir_data / "router-snapshots-surrogate-freezing.csv")
+    ts3, utilization_hybrid_lite = \
+        load_aggregated_utilization(dir_data / "router-snapshots-surrogate-nonfrozen.csv")
+
+    # plotting
+    fig, ax = plt.subplots(figsize=(7, 3.8))
+    vlines = ax.vlines([0, 1e6, 8e6], -0.05e5, 1.11e5, color='#AAA', ls='-')
+    vlines.set_clip_on(False)
+
+    arrow_color = {'arrowprops': dict(arrowstyle="->", color='#AAA'), 'color': '#333'}
+    ax.annotate("", xy=(0.1e6, 0e5), xytext=(2e6, 0.16e5), **arrow_color)
+    ax.annotate("switch", xy=(1.1e6, 0.0e5), xytext=(4.8e6, 0.06e5), **arrow_color)
+    ax.annotate("", xy=(7.9e6, 0.0e5), xytext=(6.0e6, 0.06e5), **arrow_color)
+    ax.text(2e6, 0.16e5, "start latency tracking", color='#333', ha='left')
+
+    ax.plot(ts1, utilization_hf, label="high-fidelity", color='blue')
+
+    ax.plot(ts3[:cut1], utilization_hybrid_lite[:cut1],
+            label="hybrid-lite", color='red')
+    ax.plot(ts3[cut1-1:cut2+1], utilization_hybrid_lite[cut1-1:cut2+1],
+            color='red', ls='--')
+    ax.plot(ts3[cut2:], utilization_hybrid_lite[cut2:], color='red')
+
+    ax.plot(ts2[:cut1], utilization_hybrid[:cut1], label="hybrid",
+            color='green')
+    ax.plot(ts2[cut1-1:cut2+1], utilization_hybrid[cut1-1:cut2+1], color='green', ls='--')
+    ax.plot(ts2[cut2:], utilization_hybrid[cut2:], color='green')
+
+    # ax.text(0, 1.15e5, "start latency tracking", color='#333', rotation=40,
+    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+    # ax.text(1e6, 1.15e5, "switch to surrogate", color='#333', rotation=40,
+    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+    # ax.text(8e6, 1.15e5, "switch to\nhigh-definition", color='#333', rotation=40,
+    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+
+    ax.set_xlabel('Virtual time')
+    ax.set_ylabel('Total Buffer Port Occupancy')
+    ax.set_ylim(-0.02e5, 1.08e5)
+    ax.legend(bbox_to_anchor=(.48, .28), loc='lower center', borderaxespad=0)
+    ax.xaxis.set_major_formatter(time_formatter_ns)
+    ax.yaxis.set_major_formatter(bytes_formater)
+
+    if latex:
+        plt.tight_layout()
+        plt.savefig('figures/port-occupancy-ping-pong.pgf', bbox_inches='tight')
+        plt.savefig('figures/port-occupancy-ping-pong.pdf', bbox_inches='tight')
+    else:
+        plt.show()
diff --git a/scripts/reproducibility-pads23/reproduce.sh b/scripts/reproducibility-pads23/reproduce.sh
new file mode 100644
index 00000000..b0bd2e4f
--- /dev/null
+++ b/scripts/reproducibility-pads23/reproduce.sh
@@ -0,0 +1,45 @@
+CODES_SOURCE_DIR="$PWD/../.."
+CODES_BUILD_DIR="$PWD/../../build"
+EXP_SCRIPTS="$PWD/experiments"
+
+
+# Running experiments
+mkdir -p results/10ms results/100ms
+
+pushd results/10ms
+bash -x "$EXP_SCRIPTS"/mpi-replay_72-node-dragonfly_synthetic1-10ms.sh \
+  "$CODES_BUILD_DIR" "$CODES_SOURCE_DIR" "$EXP_SCRIPTS/conf-files/"
+popd
+
+pushd results/100ms
+bash -x "$EXP_SCRIPTS"/mpi-replay_72-node-dragonfly_synthetic1-100ms.sh \
+  "$CODES_BUILD_DIR" "$CODES_SOURCE_DIR" "$EXP_SCRIPTS/conf-files/"
+popd
+
+
+# Generating figures
+mkdir results/10ms/condensed results/100ms/condensed
+
+for exp in {10,100}; do
+  for kind in {high-fidelity,hybrid,hybrid-lite}; do
+    python python-scripts/delay-in-window.py \
+      --latencies results/${exp}ms/$kind/packet-latency-trace \
+      --output results/${exp}ms/condensed/packet_latency-$kind \
+      --end ${exp}e6
+  done
+done
+
+mkdir figures
+
+python python-scripts/plot-packet-latency.py \
+  --latencies results/10ms/condensed \
+  --output figures/packet_latency-10ms
+
+python python-scripts/port-occupancy.py \
+  --experiment-folder results/10ms --output figures/port-occupancy-10ms
+
+
+# Generating table
+python python-scripts/generate-table.py \
+  --folder-10ms results/10ms --folder-100ms results/100ms \
+  > results/sumarized-table.txt

From d6c979eae31976ef835f8bdf6f84b8bc6eb78cb1 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sun, 9 Jul 2023 06:49:38 -0400
Subject: [PATCH 033/188] Dragonfly's surrogate model can now handle msg_sz >
 packet_sz

The actual variable names in `src/networks/model-net/dragonfly-dally.C'
are:

- msg->total_size  (for the size of the message)
- msg->packet_size
---
 codes/net/dragonfly-dally.h                |   4 +-
 codes/surrogate.h                          |   4 +-
 src/networks/model-net/core/model-net-lp.c |   1 +
 src/networks/model-net/dragonfly-dally.C   | 612 +++++++++++++--------
 src/util/surrogate.c                       |  18 +-
 5 files changed, 413 insertions(+), 226 deletions(-)

diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index 6d009008..55078e13 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -109,6 +109,7 @@ struct terminal_dally_message
    unsigned long long * rc_qos_data;
    int * rc_qos_status;
 
+   // TODO (elkin): all these fields to store information for rollback purposes got out of control, the rc_stack was created for things like this! Refactor this out!
    short saved_send_loop;
    tw_stime saved_available_time;
    tw_stime saved_min_lat;
@@ -123,8 +124,9 @@ struct terminal_dally_message
 
    // To use in rollback calls
    tw_stime saved_last_in_queue_time;
-   tw_stime saved_in_queue_delay;
+   tw_stime saved_next_packet_delay;
    tw_stime msg_new_mn_event;
+   uint64_t saved_remaining_packet_chunks;
 };
 
 #ifdef __cplusplus
diff --git a/codes/surrogate.h b/codes/surrogate.h
index 16957338..79255bcd 100644
--- a/codes/surrogate.h
+++ b/codes/surrogate.h
@@ -38,7 +38,7 @@ struct packet_start {
     unsigned int dfdally_dest_terminal_id; // number in [0, total terminals)
     double travel_start_time;
     double workload_injection_time; // this is when the workload passed down the event to model-net
-    double delay_at_queue_head;  // delay for this packet to be processed from previous packet in the queue
+    double processing_packet_delay;  // delay for this packet to be processed from previous packet in the queue
     uint32_t packet_size;
     void * message_data;  // Yep, we have to save the entire message just because we might need to resend the message when switching to surrogate-mode. It's wasteful but there is no other way
     void * remote_event_data;  // This and the one above have to be freed. This contains the extra information that the message contains
@@ -46,7 +46,7 @@ struct packet_start {
 
 struct packet_end {
     double travel_end_time;
-    double delay_at_queue_head_next;  // Delay to start processing next packet
+    double next_packet_delay;  // Delay to start processing next packet
 };
 
 // Definition of functions needed to define a predictor
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 73c39b2f..eee8b3f8 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -570,6 +570,7 @@ void model_net_base_event(
         tw_bf * b,
         model_net_wrap_msg * m,
         tw_lp * lp){
+    memset(b, 0, sizeof(tw_bf));
 
     if(m->h.magic != model_net_base_magic)
         printf("\n LP ID mismatched %llu\n", LLU(lp->gid));
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 5dc1190c..9aa47fe1 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -3,7 +3,7 @@
  * See LICENSE in top-level directory
  * 
  * Originally written by Misbah Mubarak
- * Updated by Neil McGlohon
+ * Updated by Neil McGlohon and Elkin Cruz-Camacho
  *  
  * A 1D specific dragonfly custom model - diverged from dragonfly-custom.C
  * Differs from dragonfly.C in that it allows for the custom features typically found in
@@ -596,10 +596,11 @@ struct terminal_state
     // arrive faster than others, so a list like the one above is not feasible
     // to store in order efficiently their arrival)
     priority_queue<struct packet_double_val, vector<struct packet_double_val>, decltype(packet_double_val_greater_cmp)> sent_packets_latency;
+    // received (and not completed, yet) packets. The value associated to a key is the remaining number of "bytes" to receive before the packet is consumed totally. If a packet size == chunk size, this map will never be used/filled
+    map<struct packet_id, uint32_t> remaining_sz_packets;
 
     // Stores the last time in which a packet was processed (time at which a T_GENERATE event was processed)
     double last_in_queue_time;
-    double in_queue_delay;
     // The predictor kicks in on surrogate mode and predicts the time a packet will take to its destination
     void * predictor_data;
 
@@ -2361,7 +2362,7 @@ static void setup_packet_latency_path(char const * const dir_to_save) {
         tw_error(TW_LOC, "File %s could not be opened", filename_path);
     }
 
-    fprintf(packet_latency_f, "#src_terminal,dest_terminal,packet_id,is_surrogate_on,is_predicted,size,workload_injection,delay_at_queue_head,start,end,latency\n");
+    fprintf(packet_latency_f, "#src_terminal,dest_terminal,packet_id,is_surrogate_on,is_predicted,size,workload_injection,next_packet_delay,start,end,latency\n");
 }
 
 /* report dragonfly statistics like average and maximum packet latency, average number of hops traversed */
@@ -2820,13 +2821,15 @@ static void packet_latency_save_to_file(
         unsigned int terminal_id,
         struct packet_start start,
         struct packet_end end,
+        bool surrogate_on,
         bool is_predicted
 ) {
+    if (end.travel_end_time > g_tw_ts_end) { return; } // This packet could never arrive to its destination!
     fprintf(packet_latency_f, "%u,%u,%lu,%d,%d,%u,%f,%f,%f,%f,%f\n",
             terminal_id, start.dfdally_dest_terminal_id, start.packet_ID,
-            is_surrogate_on, is_predicted,
+            surrogate_on, is_predicted,
             start.packet_size,
-            start.workload_injection_time, start.delay_at_queue_head,
+            start.workload_injection_time, end.next_packet_delay,
             start.travel_start_time, end.travel_end_time, end.travel_end_time - start.travel_start_time);
 }
 
@@ -2849,13 +2852,13 @@ static void process_packet_latencies(terminal_state * s, tw_lp * lp)
         )
     {
         auto start = s->sent_packets.front();
-        double const delay_at_queue_head_next = s->sent_packets[1].delay_at_queue_head;
+        double const next_packet_delay = s->sent_packets[1].processing_packet_delay;
         struct packet_end end = {
             .travel_end_time = s->sent_packets_latency.top().value,
-            .delay_at_queue_head_next = delay_at_queue_head_next,
+            .next_packet_delay = next_packet_delay,
         };
         if (packet_latency_f) {
-            packet_latency_save_to_file(s->terminal_id, start, end, false);
+            packet_latency_save_to_file(s->terminal_id, start, end, is_surrogate_on, false);
         }
         if (surrogate_configured && !is_surrogate_on) {
             assert(terminal_predictor != NULL);
@@ -2939,24 +2942,22 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(
                 end.travel_end_time = notification_events_map[start.packet_ID];
             }
             if (s->sent_packets.size() >= 2) {
-                end.delay_at_queue_head_next = s->sent_packets[1].delay_at_queue_head;
+                end.next_packet_delay = s->sent_packets[1].processing_packet_delay;
             } else {
-                end.delay_at_queue_head_next = -1;
+                end.next_packet_delay = -1;
             }
-            packet_latency_save_to_file(s->terminal_id, start, end, false);
+            packet_latency_save_to_file(s->terminal_id, start, end, is_surrogate_on, false);
         }
         // The packet has not been delievered, or we haven't received the notification yet.
         // Send directly to destination and notify of zombie event
-        else {
-            double latency = predicted_end.travel_end_time - start.travel_start_time;
-            double arrival = start.travel_start_time + latency; // this is "equivalent" to end.travel_end_time (we do it because floating point operations are weird, and it's better to err on the side of spending some cycles computing the addition rather than assuming that things will work out correctly)
-            if (arrival < tw_now(lp)) {
-                arrival = tw_now(lp);
+        else if (freeze_network_on_switch) {
+            double latency = predicted_end.travel_end_time - tw_now(lp);
+            if (predicted_end.travel_end_time < tw_now(lp) || latency < 0) {
+                predicted_end.travel_end_time = tw_now(lp);
                 latency = 0;
             }
-            
-            predicted_end.travel_end_time = arrival;
-            packet_latency_save_to_file(s->terminal_id, start, predicted_end, true);
+
+            packet_latency_save_to_file(s->terminal_id, start, predicted_end, is_surrogate_on, true);
 
             assert(start.message_data);
             terminal_dally_message * const msg_data = (terminal_dally_message*) start.message_data;
@@ -3026,16 +3027,19 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(
     s->data_size_ross_sample        = frozen_state->data_size_ross_sample;
     s->total_msg_size               = frozen_state->total_msg_size;
     s->finished_msgs                = frozen_state->finished_msgs;
-    s->in_queue_delay               = frozen_state->in_queue_delay;
+    s->rank_tbl_pop                 = frozen_state->rank_tbl_pop;
     memcpy(&s->zombies,              &frozen_state->zombies,              sizeof(s->zombies));
     memcpy(&s->sent_packets,         &frozen_state->sent_packets,         sizeof(s->sent_packets));
     memcpy(&s->sent_packets_latency, &frozen_state->sent_packets_latency, sizeof(s->sent_packets_latency));
+    memcpy(&s->remaining_sz_packets, &frozen_state->remaining_sz_packets, sizeof(s->remaining_sz_packets));
+    memcpy(&s->rank_tbl,             &frozen_state->rank_tbl,             sizeof(s->rank_tbl));
+    memcpy(&s->st,                   &frozen_state->st,                   sizeof(s->st));
 
     s->frozen_state = frozen_state;
 };
 
 // This function never rollsback because it's called at GVT
-// Note: this function CANNOT generate any events, because it is to be used in `dragonfly_dally_terminal_final`
+// Note: this function CANNOT generate any events, because it is to be used in `dragonfly_dally_terminal_final` too
 static void dragonfly_dally_terminal_surrogate_to_highdef(
         terminal_state * s, tw_lp * lp, tw_event ** terminal_events) {
     (void) lp;
@@ -3063,10 +3067,13 @@ static void dragonfly_dally_terminal_surrogate_to_highdef(
     frozen_state->data_size_ross_sample        = s->data_size_ross_sample;
     frozen_state->total_msg_size               = s->total_msg_size;
     frozen_state->finished_msgs                = s->finished_msgs;
-    frozen_state->in_queue_delay               = s->in_queue_delay;
+    frozen_state->rank_tbl_pop                 = s->rank_tbl_pop;
     memcpy(&frozen_state->zombies,              &s->zombies,              sizeof(s->zombies));
     memcpy(&frozen_state->sent_packets,         &s->sent_packets,         sizeof(s->sent_packets));
     memcpy(&frozen_state->sent_packets_latency, &s->sent_packets_latency, sizeof(s->sent_packets_latency));
+    memcpy(&frozen_state->remaining_sz_packets, &s->remaining_sz_packets, sizeof(s->remaining_sz_packets));
+    memcpy(&frozen_state->rank_tbl,             &s->rank_tbl,             sizeof(s->rank_tbl));
+    memcpy(&frozen_state->st,                   &s->st,                   sizeof(s->st));
     memcpy(s, frozen_state, sizeof(terminal_state));
     memset(frozen_state, 0, sizeof(terminal_state));
     free(frozen_state);
@@ -3218,15 +3225,16 @@ static void terminal_dally_commit(terminal_state * s,
             .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
             .travel_start_time = msg->travel_start_time,
             .workload_injection_time = msg->msg_start_time,
-            .delay_at_queue_head = msg->saved_in_queue_delay,
+            .processing_packet_delay = -1,
             .packet_size = msg->packet_size
         };
 
         // Saving
         auto const end = (struct packet_end) {
             .travel_end_time = msg->travel_end_time,
+            .next_packet_delay = msg->saved_next_packet_delay,
         };
-        packet_latency_save_to_file(s->terminal_id, start, end, true);
+        packet_latency_save_to_file(s->terminal_id, start, end, is_surrogate_on, true);
     }
 
     if(msg->type == T_NOTIFY && msg->notify_type == NOTIFY_LATENCY)
@@ -3395,7 +3403,7 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp )
     s->in_send_loop = (int*)calloc(p->num_rails, sizeof(int));
     s->issueIdle = (int*)calloc(p->num_rails, sizeof(int));
 
-    s->rank_tbl = NULL;
+    s->rank_tbl = qhash_init(dragonfly_rank_hash_compare, dragonfly_hash_func, DFLY_HASH_TABLE_SIZE);
     s->terminal_msgs = 
         (terminal_dally_message_list***)calloc(p->num_rails, sizeof(terminal_dally_message_list**));
     s->terminal_msgs_tail = 
@@ -3455,6 +3463,7 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp )
     // (see https://en.cppreference.com/w/cpp/memory/construct_at)
     new (&s->sent_packets) deque<struct packet_start>();
     new (&s->sent_packets_latency) priority_queue<struct packet_double_val, vector<struct packet_double_val>, decltype(packet_double_val_greater_cmp)>();
+    new (&s->remaining_sz_packets) set<struct packet_id, uint32_t>();
     new (&s->zombies) set<struct packet_id>();
     s->frozen_state = NULL;
 
@@ -3783,14 +3792,14 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
     msg->my_hops_cur_group = -1;
 
     // Using predictor to find latency
-    tw_stime const time_at_queue_head = msg->msg_new_mn_event > s->last_in_queue_time ? msg->msg_new_mn_event : s->last_in_queue_time;
+    double const processing_packet_delay = tw_now(lp) - s->last_in_queue_time;
     auto start = (struct packet_start) {
         .packet_ID = msg->packet_ID,
         .dest_terminal_lpid = msg->dest_terminal_lpid,
         .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
         .travel_start_time = tw_now(lp),
         .workload_injection_time = msg->msg_start_time,
-        .delay_at_queue_head = tw_now(lp) - time_at_queue_head,
+        .processing_packet_delay = processing_packet_delay,
         .packet_size = msg->packet_size
     };
 
@@ -3801,8 +3810,22 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
     assert(arrival >= tw_now(lp));
 
     // determining injection delay
-    double const nic_ts = end.delay_at_queue_head_next;
-    msg->saved_in_queue_delay = nic_ts;
+    //tw_stime injection_ts;
+    //if (g_congestion_control_enabled) {
+    //    double bandwidth_coef = 1;
+    //    if (cc_terminal_is_abatement_active(s->local_congestion_controller)) {
+    //         bandwidth_coef = cc_terminal_get_current_injection_bandwidth_coef(s->local_congestion_controller);
+    //    }
+    //    injection_ts = bytes_to_ns(msg->packet_size, bandwidth_coef * s->params->cn_bandwidth);
+    //}
+    //else {
+    //    injection_ts = bytes_to_ns(msg->packet_size, s->params->cn_bandwidth);
+    //}
+    //tw_stime const nic_ts = injection_ts;
+    // The code above does a good job at limiting the speed in which packets are injected, so it produces good
+    // results when running in surrogate. A good model should produce similar `nic`s to what the code above
+    // does (the average predictor does just that!)
+    double const nic_ts = end.next_packet_delay;
 
     // Scheduling idle event for next packet to be processed
     bool const is_from_remote = false;
@@ -3811,7 +3834,7 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
     s->last_in_queue_time = tw_now(lp);
 
     // Info to be used at commit time to save into file
-    msg->saved_in_queue_delay = tw_now(lp) - time_at_queue_head;
+    msg->saved_next_packet_delay = end.next_packet_delay;
     msg->travel_start_time = tw_now(lp);
     msg->travel_end_time = arrival;
 
@@ -3863,7 +3886,7 @@ static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_me
     packet_gen--;
     s->packet_counter--;
 
-    s->in_queue_delay = msg->saved_in_queue_delay;
+    s->last_in_queue_time = msg->saved_last_in_queue_time;
     struct packet_start start = s->sent_packets.back();
     if (start.remote_event_data) {
         free(start.remote_event_data);
@@ -3921,11 +3944,6 @@ static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_me
             s->last_buf_full[msg->rail_id] = msg->saved_busy_time;
     }
 
-    if (bf->c13) {
-        s->last_in_queue_time = msg->saved_last_in_queue_time;
-        bf->c13 = 0;
-    }
-
     struct mn_stats* stat;
     stat = model_net_find_stats(msg->category, s->dragonfly_stats_array);
     stat->send_count--;
@@ -4155,16 +4173,16 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
         memcpy(remote_data, model_net_method_get_edata(DRAGONFLY_DALLY, msg), msg->remote_event_size_bytes);
     }
     //assert(tw_now(lp) == msg->travel_start_time);
-    tw_stime const time_at_queue_head = msg->msg_new_mn_event > s->last_in_queue_time ? msg->msg_new_mn_event : s->last_in_queue_time;
-    msg->saved_in_queue_delay = s->in_queue_delay;
-    s->in_queue_delay = tw_now(lp) - time_at_queue_head;
+    double const processing_packet_delay = tw_now(lp) - s->last_in_queue_time;
+    msg->saved_last_in_queue_time = s->last_in_queue_time;
+    s->last_in_queue_time = tw_now(lp);
     s->sent_packets.push_back((struct packet_start){
         .packet_ID = msg->packet_ID,
         .dest_terminal_lpid = msg->dest_terminal_lpid,
         .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
         .travel_start_time = tw_now(lp),
         .workload_injection_time = msg->msg_start_time,
-        .delay_at_queue_head = s->in_queue_delay,
+        .processing_packet_delay = processing_packet_delay,
         .packet_size = msg->packet_size,
         .message_data = msg_data,
         .remote_event_data = remote_data
@@ -4253,9 +4271,6 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
             if(s->terminal_length[j][vcg] < s->params->cn_vc_size && s->issueIdle[j] == 0)
             {
                 model_net_method_idle_event2(nic_ts, 0, j, lp);
-                msg->saved_last_in_queue_time = s->last_in_queue_time;
-                s->last_in_queue_time = tw_now(lp);
-                bf->c13 = 1;
             }
             else
             {
@@ -4274,9 +4289,6 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
     else {
         if (s->terminal_length[msg->rail_id][vcg] < s->params->cn_vc_size) {
             model_net_method_idle_event2(nic_ts, 0, msg->rail_id, lp);
-            msg->saved_last_in_queue_time = s->last_in_queue_time;
-            s->last_in_queue_time = tw_now(lp);
-            bf->c13 = 1;
         } else {
             bf->c11 = 1;
             s->issueIdle[msg->rail_id] = 1;
@@ -4378,7 +4390,6 @@ static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_dally_messag
     if(bf->c5)
     {
         s->issueIdle[msg->rail_id] = 1;
-        s->last_in_queue_time = msg->saved_last_in_queue_time;
         if(bf->c6)
         {
             s->busy_time[msg->rail_id] = msg->saved_total_time;
@@ -4546,8 +4557,6 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message *
         bf->c5 = 1;
         s->issueIdle[msg->rail_id] = 0;
         model_net_method_idle_event2(injection_ts, 0, msg->rail_id, lp);
-        msg->saved_last_in_queue_time = s->last_in_queue_time;
-        s->last_in_queue_time = tw_now(lp);
     
         if(s->last_buf_full[msg->rail_id] > 0.0)
         {
@@ -4695,34 +4704,60 @@ static void send_remote_event(terminal_state * s, terminal_dally_message * msg,
 
 static void packet_arrive_predicted_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
 {
+    struct dfly_hash_key key = {
+        .message_id = msg->message_id,
+        .sender_id = msg->sender_lp,
+    };
+    struct dfly_qhash_entry * tmp = NULL;
+    struct qhash_head * hash_link = NULL;
+
+    // If entry was removed from hash
+    if(bf->c8) {
+        struct dfly_qhash_entry * d_entry_pop = (dfly_qhash_entry *) rc_stack_pop(s->st);
+        qhash_add(s->rank_tbl, &key, &(d_entry_pop->hash_link));
+        s->rank_tbl_pop++;
+
+        if(s->rank_tbl_pop >= DFLY_HASH_TABLE_SIZE)
+            tw_error(TW_LOC, "\n Exceeded allocated qhash size, increase hash size in dragonfly model");
+
+        hash_link = &(d_entry_pop->hash_link);
+        tmp = d_entry_pop;
+    // In case it was not deleted, and we accessed it
+    } else if (bf->c9 || bf->c5) {
+        assert(!tmp);
+        hash_link = qhash_search(s->rank_tbl, &key);
+
+        tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link);
+    }
+    assert((bf->c9 || bf->c5) == bf->c6);
+
     if(bf->c4) {
         model_net_event_rc2(lp, &msg->event_rc);
     }
 
-    s->finished_msgs--;
-    s->total_msg_size -= msg->total_size;
-    total_msg_sz -= msg->total_size;
-    N_finished_msgs--;
-    s->data_size_ross_sample -= msg->total_size;
-    s->ross_sample.data_size_sample -= msg->total_size;
-    s->data_size_sample -= msg->total_size;
-
-    s->finished_packets--;
-    N_finished_packets--;
-    
-    mn_stats * stat = model_net_find_stats(msg->category, s->dragonfly_stats_array);
-
-    stat->recv_bytes -= msg->packet_size;
-    stat->recv_count--;
+    if(bf->c7) {
+        s->finished_msgs--;
+        s->total_msg_size -= msg->total_size;
+        total_msg_sz -= msg->total_size;
+        N_finished_msgs--;
+        s->data_size_ross_sample -= msg->total_size;
+        s->ross_sample.data_size_sample -= msg->total_size;
+        s->data_size_sample -= msg->total_size;
+    }
 
-    stat->recv_time = msg->saved_rcv_time;
+    if(bf->c6) {
+        tmp->num_chunks -= msg->saved_remaining_packet_chunks;
+    }
 
-    packet_fin--;
-    s->packet_fin--;
+    if(bf->c5) {
+        qhash_del(hash_link);
+        free_tmp(tmp);
+        s->rank_tbl_pop--;
+    }
 }
 
 /* packet arrives at the destination terminal */
-static void packet_arrive_predicted(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) 
+static void packet_arrive_predicted(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
 {
     assert(lp->gid == msg->dest_terminal_lpid);
     /* WE do not allow self messages through dragonfly */
@@ -4732,37 +4767,111 @@ static void packet_arrive_predicted(terminal_state * s, tw_bf * bf, terminal_dal
     if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID)
         printf("\n Packet %llu arrived at lp %llu hops %d ", LLU(msg->sender_lp), LLU(lp->gid), msg->my_N_hop);
 #endif
-    
-    s->packet_fin++;
-    packet_fin++;
 
     //record for commit_f file IO
     msg->travel_end_time = tw_now(lp);
-    tw_stime ete_latency = msg->travel_end_time - msg->travel_start_time;
 
-    mn_stats* stat = model_net_find_stats(msg->category, s->dragonfly_stats_array);
-    msg->saved_rcv_time = stat->recv_time;
-    stat->recv_time += ete_latency;
+    // packets arrive as one event not as multiple events (ie, predicted packets are not broken into chunks)
+    struct packet_id const packet_key = {
+        .packet_ID = msg->packet_ID,
+        .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
+    };
+    bool const has_remaining_sz = s->remaining_sz_packets.count(packet_key) == 1;
 
-    void * m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg);
+    // Finding out how many bytes are left to receive for this packet
+    int remaining_sz = 0;
+    if (has_remaining_sz) {
+        remaining_sz = s->remaining_sz_packets[packet_key];
+    } else {
+        remaining_sz = msg->packet_size;
+    }
 
-    stat->recv_count++;
-    stat->recv_bytes += msg->packet_size;
+    uint64_t const chunk_size = s->params->chunk_size;
+    uint64_t remaining_packet_chunks = remaining_sz / chunk_size + (remaining_sz % chunk_size ? 1 : 0);
+    uint64_t total_chunks = msg->total_size / chunk_size + (msg->total_size % chunk_size ? 1 : 0);
+    if (remaining_packet_chunks == 0) { remaining_packet_chunks = 1; }
+    if (total_chunks == 0) { total_chunks = 1; }
+    msg->saved_remaining_packet_chunks = remaining_packet_chunks;
 
-    N_finished_packets++;
-    s->finished_packets++;
+    // The table has to have been initialized already, if not, what the heck!
+    struct dfly_hash_key key = {
+        .message_id = msg->message_id,
+        .sender_id = msg->sender_lp,
+    };
 
-    s->data_size_sample += msg->total_size;
-    s->ross_sample.data_size_sample += msg->total_size;
-    s->data_size_ross_sample += msg->total_size;
-    N_finished_msgs++;
-    total_msg_sz += msg->total_size;
-    s->total_msg_size += msg->total_size;
-    s->finished_msgs++;
-    
-    // This should always be true. It sends the message to the server/workload or communicates to the model-net layer
-    if(m_data_src && msg->remote_event_size_bytes > 0) {
-        send_remote_event(s, msg, lp, bf, (char *) m_data_src, msg->remote_event_size_bytes);
+    // Finding out if message is in hash
+    struct qhash_head * hash_link = qhash_search(s->rank_tbl, &key);
+    struct dfly_qhash_entry * tmp = NULL;
+    if(hash_link) {
+        bf->c9 = 1;
+        tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link);
+    // We create an entry into the hash only if it makes sense to do so (ie, only when the message needs multiple packets to be completed)
+    } else if (msg->total_size > msg->packet_size) {
+        bf->c5 = 1;
+        assert(remaining_sz == msg->packet_size);
+
+        struct dfly_qhash_entry * const d_entry = (dfly_qhash_entry *) calloc(1, sizeof (struct dfly_qhash_entry));
+        d_entry->num_chunks = 0;
+        d_entry->key = key;
+        d_entry->remote_event_data = NULL;
+        d_entry->remote_event_size = 0;
+        qhash_add(s->rank_tbl, &key, &(d_entry->hash_link));
+        s->rank_tbl_pop++;
+
+        if(s->rank_tbl_pop >= DFLY_HASH_TABLE_SIZE) {
+            tw_error(TW_LOC, "\n Exceeded allocated qhash size, increase hash size in dragonfly model");
+        }
+
+        hash_link = &(d_entry->hash_link);
+        tmp = d_entry;
+    // Just for completion, checking invariant
+    } else {
+        assert(msg->total_size == msg->packet_size);
+    }
+
+    // Increasing the number of chunks received
+    if (tmp) {
+        bf->c6 = 1;
+        tmp->num_chunks += remaining_packet_chunks;
+
+        /* retrieve the event data, all chunks from the same packet carry the `remote_event_data` */
+        if(msg->remote_event_size_bytes > 0 && !tmp->remote_event_data)
+        {
+            /* Now retreieve the number of chunks completed from the hash and update them */
+            void *m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg);
+
+            /* Retreive the remote event entry */
+            tmp->remote_event_data = (char*) calloc(1, msg->remote_event_size_bytes);
+            assert(tmp->remote_event_data);
+            tmp->remote_event_size = msg->remote_event_size_bytes;
+            memcpy(tmp->remote_event_data, m_data_src, msg->remote_event_size_bytes);
+        }
+    }
+
+    bool const is_msg_completed = tmp ? tmp->num_chunks >= total_chunks : true;
+    assert(tmp || total_chunks == remaining_packet_chunks);
+
+    if(is_msg_completed) {
+        bf->c7 = 1;
+        s->data_size_sample += msg->total_size;
+        s->ross_sample.data_size_sample += msg->total_size;
+        s->data_size_ross_sample += msg->total_size;
+        N_finished_msgs++;
+        total_msg_sz += msg->total_size;
+        s->total_msg_size += msg->total_size;
+        s->finished_msgs++;
+
+        // This should always be true. It sends the message to the server/workload or communicates to the model-net layer
+        if (tmp->remote_event_data && tmp->remote_event_size > 0) {
+            send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size);
+        }
+
+        if (tmp) {
+            bf->c8 = 1;
+            qhash_del(hash_link);
+            rc_stack_push(lp, tmp, free_tmp, s->st);
+            s->rank_tbl_pop--;
+        }
     }
 }
 
@@ -4770,12 +4879,6 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess
 {
     if (g_congestion_control_enabled)
         cc_terminal_send_ack_rc(s->local_congestion_controller);
-    
-    if(bf->c31)
-    {
-        s->packet_fin--;
-        packet_fin--;
-    }
 
     if(msg->path_type == MINIMAL)
         minimal_count--;
@@ -4815,6 +4918,8 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess
 
     if(bf->c1)
     {
+        s->packet_fin--;
+        packet_fin--;
         stat->recv_count--;
         stat->recv_bytes -= msg->packet_size;
         N_finished_packets--;
@@ -4827,11 +4932,35 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess
 	{
           s->max_latency = msg->saved_available_time;
 	} 
-    if(bf->c7)
-    {
+
+    struct packet_id const packet_key = {
+        .packet_ID = msg->packet_ID,
+        .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
+    };
+
+    if (bf->c28) {
+        if (bf->c29) {
+            s->remaining_sz_packets[packet_key] = 0;
+        }
+        s->remaining_sz_packets[packet_key] += s->params->chunk_size;
+    } else {
+        if (bf->c29) {
+            s->remaining_sz_packets[packet_key] += s->params->chunk_size;
+        }
+    }
+
+    if (bf->c14) {
+        s->zombies.emplace((struct packet_id) {
+            .packet_ID = msg->packet_ID,
+            .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
+        });
+    }
+    if (bf->c15) {
+        return;
+    }
+
+    if(bf->c7) {
         //assert(!hash_link);
-        if(bf->c4)
-            model_net_event_rc2(lp, &msg->event_rc);
         
         N_finished_msgs--;
         s->finished_msgs--;
@@ -4841,11 +4970,8 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess
         s->ross_sample.data_size_sample -= msg->total_size;
         s->data_size_ross_sample -= msg->total_size;
 
-        if(bf->c14) {
-            s->zombies.emplace((struct packet_id) {
-                .packet_ID = msg->packet_ID,
-                .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
-            });
+        if(bf->c4) {
+            model_net_event_rc2(lp, &msg->event_rc);
         }
 
         struct dfly_qhash_entry * d_entry_pop = (dfly_qhash_entry *)rc_stack_pop(s->st);
@@ -4869,14 +4995,11 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess
         free_tmp(tmp);	
         s->rank_tbl_pop--;
     }
-    
-    return;
 }
 
 /* packet arrives at the destination terminal */
 static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) 
 {
-
     // if(isRoutingMinimal(routing) && msg->my_N_hop > 4)
     // {
     //     printf("TERMINAL RECEIVED A NONMINIMAL LENGTH PACKET\n");
@@ -4898,48 +5021,13 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
     if (g_congestion_control_enabled)
         cc_terminal_send_ack(s->local_congestion_controller, msg->src_terminal_id);
 
-    // NIC aggregation - should this be a separate function?
-    // Trigger an event on receiving server
-
-    if(!s->rank_tbl)
-        s->rank_tbl = qhash_init(dragonfly_rank_hash_compare, dragonfly_hash_func, DFLY_HASH_TABLE_SIZE);
-    
-    struct dfly_hash_key key;
-    key.message_id = msg->message_id; 
-    key.sender_id = msg->sender_lp;
-    
-    struct qhash_head *hash_link = NULL;
-    struct dfly_qhash_entry * tmp = NULL;
-      
-    hash_link = qhash_search(s->rank_tbl, &key);
-    
-    if(hash_link)
-        tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link);
-
-    uint64_t total_chunks = msg->total_size / s->params->chunk_size;
-
-    if(msg->total_size % s->params->chunk_size)
-          total_chunks++;
-
-    if(!total_chunks)
-          total_chunks = 1;
-
-    /*if(tmp)
-    {
-        if(tmp->num_chunks >= total_chunks || tmp->num_chunks < 0)
-        {
-           //tw_output(lp, "\n invalid number of chunks %d for LP %ld ", tmp->num_chunks, lp->gid);
-           tw_lp_suspend(lp, 0, 0);
-           return;
-        }
-    }*/
     assert(lp->gid == msg->dest_terminal_lpid);
 
 #if DEBUG == 1
     if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID)
         printf("\n Packet %llu arrived at lp %llu hops %d ", LLU(msg->sender_lp), LLU(lp->gid), msg->my_N_hop);
 #endif
-    
+
     tw_stime ts = s->params->cn_credit_delay;
 
     // no method_event here - message going to router
@@ -4954,12 +5042,6 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
     buf_msg->type = R_BUFFER;
     tw_event_send(buf_e);
 
-    bf->c1 = 0;
-    bf->c3 = 0;
-    bf->c4 = 0;
-    bf->c7 = 0;
-    bf->c14 = 0;
-
     /* Total overall finished chunks in simulation */
     N_finished_chunks++;
     /* Finished chunks on a LP basis */
@@ -4973,6 +5055,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
     /* WE do not allow self messages through dragonfly */
     assert(lp->gid != msg->src_terminal_id);
 
+    // TODO (elkin): this is wrong, this is _not_ finding the number of chunks, consider: chunk_size = 2 and packet_size = 5. There should be 3 chunks, but the code outputs 2!
     uint64_t num_chunks = msg->packet_size / s->params->chunk_size;
     if (msg->packet_size < s->params->chunk_size)
         num_chunks++;
@@ -4983,12 +5066,6 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
     if(msg->path_type == NON_MINIMAL)
         nonmin_count++;
 
-    if(msg->chunk_id == num_chunks - 1)
-    {
-        bf->c31 = 1;
-        s->packet_fin++;
-        packet_fin++;
-    }
     if(msg->path_type != MINIMAL && msg->path_type != NON_MINIMAL)
         printf("\n Wrong message path type %d ", msg->path_type);
 
@@ -5016,6 +5093,20 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
     msg->saved_rcv_time = stat->recv_time;
     stat->recv_time += ete_latency;
 
+    // Chunk with the last id has been received (not the last chunk to receive necessarily)
+    if(msg->chunk_id == num_chunks - 1)
+    {
+        bf->c1 = 1;
+        s->packet_fin++;
+        packet_fin++;
+
+        stat->recv_count++;
+        stat->recv_bytes += msg->packet_size;
+
+        N_finished_packets++;
+        s->finished_packets++;
+    }
+
 #if DEBUG == 1
     if( msg->packet_ID == TRACK 
             && msg->chunk_id == num_chunks-1
@@ -5031,9 +5122,94 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
     }
 #endif
 
-    /* Now retreieve the number of chunks completed from the hash and update
-        * them */
-    void *m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg);
+    if(s->min_latency > ete_latency) {
+        bf->c21 = 1;
+        msg->saved_min_lat = s->min_latency;
+        s->min_latency = ete_latency;
+    }
+
+    if(s->max_latency < ete_latency) {
+        bf->c22 = 1;
+        msg->saved_available_time = s->max_latency;
+        s->max_latency = ete_latency;
+    }
+
+    struct packet_id const packet_key = {
+        .packet_ID = msg->packet_ID,
+        .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
+    };
+    bool const is_zombie = s->zombies.count(packet_key) == 1;
+    bool const has_remaining_sz = s->remaining_sz_packets.count(packet_key) == 1;
+
+    // Finding out if the packet is complete
+    bool is_packet_completed = false;
+    int const chunk_size = s->params->chunk_size;
+    if (has_remaining_sz) {
+        bf->c28 = 1;
+        assert(s->remaining_sz_packets[packet_key] >= chunk_size);
+        s->remaining_sz_packets[packet_key] -= chunk_size;
+
+        // if `remaining == 0`, ie, if the packet has been completed
+        if (s->remaining_sz_packets[packet_key] == 0) {
+            bf->c29 = 1;
+            is_packet_completed = true;
+            s->remaining_sz_packets.erase(packet_key);
+        }
+    } else {
+        if (chunk_size < msg->packet_size) {
+            bf->c29 = 1;
+            s->remaining_sz_packets[packet_key] = msg->packet_size - chunk_size;
+        } else {
+            is_packet_completed = true;
+        }
+    }
+
+    // Zombies don't generate delay notifications, and they don't modify the state of `s->rank_tbl` (`packet_arrive_predicted` should have removed the msg entry already)
+    if (is_zombie) {
+        struct dfly_hash_key key = {
+            .message_id = msg->message_id,
+            .sender_id = msg->sender_lp,
+        };
+        //printf("We got a zombie! LPID=%d  packet_ID = %d  dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id);
+
+        if (is_packet_completed) {
+            s->zombies.erase(packet_key);
+            bf->c14 = 1;
+        }
+        bf->c15 = 1;
+        return;
+    }
+
+    struct dfly_hash_key key = {
+        .message_id = msg->message_id,
+        .sender_id = msg->sender_lp,
+    };
+
+    struct qhash_head *hash_link = NULL;
+    struct dfly_qhash_entry * tmp = NULL;
+
+    hash_link = qhash_search(s->rank_tbl, &key);
+
+    if(hash_link)
+        tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link);
+
+    uint64_t total_chunks = msg->total_size / s->params->chunk_size;
+
+    if(msg->total_size % s->params->chunk_size)
+          total_chunks++;
+
+    if(!total_chunks)
+          total_chunks = 1;
+
+    /*if(tmp)
+    {
+        if(tmp->num_chunks >= total_chunks || tmp->num_chunks < 0)
+        {
+           //tw_output(lp, "\n invalid number of chunks %d for LP %ld ", tmp->num_chunks, lp->gid);
+           tw_lp_suspend(lp, 0, 0);
+           return;
+        }
+    }*/
 
     /* If an entry does not exist then create one */
     if(!tmp)
@@ -5057,45 +5233,38 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
     assert(tmp);
     tmp->num_chunks++;
 
-    if(msg->chunk_id == num_chunks - 1)
-    {
-        bf->c1 = 1;
-        stat->recv_count++;
-        stat->recv_bytes += msg->packet_size;
-
-        N_finished_packets++;
-        s->finished_packets++;
-    }
-
-    /* if its the last chunk of the packet then handle the remote event data */
+    /* retrieve the event data, all chunks from the same packet carry the `remote_event_data` */
     if(msg->remote_event_size_bytes > 0 && !tmp->remote_event_data)
     {
+        /* Now retreieve the number of chunks completed from the hash and update
+            * them */
+        void *m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg);
+
         /* Retreive the remote event entry */
         tmp->remote_event_data = (char*)calloc(1, msg->remote_event_size_bytes);
         assert(tmp->remote_event_data);
         tmp->remote_event_size = msg->remote_event_size_bytes; 
         memcpy(tmp->remote_event_data, m_data_src, msg->remote_event_size_bytes);
     }
-    
-    if(s->min_latency > ete_latency) {
-        bf->c21 = 1;
-        msg->saved_min_lat = s->min_latency;
-		s->min_latency = ete_latency;	
-	}
 
-	if(s->max_latency < ete_latency) {
-        bf->c22 = 1;
-        msg->saved_available_time = s->max_latency;
-        s->max_latency = ete_latency;
-	}
-    /* If all chunks of a message have arrived then send a remote event to the
-     * callee*/
-    //assert(tmp->num_chunks <= total_chunks);
+    // if the packet is complete (ie, this `msg` is the last piece of the packet)
+    if (is_packet_completed) {
+        //printf("Good day sir, not a zombie! LPID=%d  packet_ID = %d  dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id);
+        if (packet_latency_f || surrogate_configured) {
+            notify_src_lp_on_total_latency(lp, msg);
+        //} else {
+        //    // This vacuous msg is necessary just to keep simulations with and without the latency notification the same. Notifying the latency does not impact
+        //    // the simulation (unless the data is fed to a predictor, later to be used). If the latency notification is deactivated, the simulation will produce
+        //    // the same number of events (a bit wasteful), a parameter that model-net or dragonfly-dally for some reason use :S
+        //    vacuous_msg_to_itself(s, msg, lp);
+        }
+    }
 
+    // if the message is complete (ie, this `msg` is the last piece of the message)
+    /* If all chunks of a message have arrived then send a remote event to the callee */
     if(tmp->num_chunks >= total_chunks)
     {
         bf->c7 = 1;
-
         s->data_size_sample += msg->total_size;
         s->ross_sample.data_size_sample += msg->total_size;
         s->data_size_ross_sample += msg->total_size;
@@ -5103,43 +5272,16 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
         total_msg_sz += msg->total_size;
         s->total_msg_size += msg->total_size;
         s->finished_msgs++;
-        
+
         //assert(tmp->remote_event_data && tmp->remote_event_size > 0);
         if(tmp->remote_event_data && tmp->remote_event_size > 0) {
-            struct packet_id const zombie_packet = {
-                .packet_ID = msg->packet_ID,
-                .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
-            };
-            int const is_zombie = s->zombies.count(zombie_packet) == 1;
-            // Not notifying in case it's a zombie
-            if (is_zombie) {
-                // Ignore packet, do not send forward if it has already been delievered
-                //printf("We got a zombie! LPID=%d  packet_ID = %d  dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id);
-                s->zombies.erase(zombie_packet);
-                bf->c14 = 1;
-            } else {
-                //printf("Good day sir, not a zombie! LPID=%d  packet_ID = %d  dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id);
-                if (packet_latency_f || surrogate_configured) {
-                    notify_src_lp_on_total_latency(lp, msg);
-                } else {
-                    // This vacuous msg is necessary just to keep simulations with
-                    // and without the latency notification the same. Notifying the
-                    // latency does not impact the simulation (unless the data is
-                    // fed to a predictor, later to be used). If the latency
-                    // notification is deactivated, the simulation will produce
-                    // the same number of events (a bit wasteful), a parameter
-                    // that model-net or dragonfly-dally for some reason use :S
-                    //vacuous_msg_to_itself(s, msg, lp);
-                }
-                send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size);
-             }
+            send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size);
         }
         /* Remove the hash entry */
         qhash_del(hash_link);
         rc_stack_push(lp, tmp, free_tmp, s->st);
         s->rank_tbl_pop--;
-   }
-  return;
+     }
 }
 
 static void terminal_buf_update_rc(terminal_state * s,
@@ -5262,6 +5404,43 @@ static void dragonfly_dally_terminal_final( terminal_state * s,
 
     lp_io_write(lp->gid, (char*)"dragonfly-cn-stats", written, s->output_buf2); 
 
+    if (packet_latency_f) {
+        // Storing the missing packets into io file
+        while(!s->sent_packets.empty()) {
+            struct packet_start start = s->sent_packets.front();
+            s->sent_packets.pop_front();
+            assert(start.message_data);
+
+            struct packet_end end = {
+                .travel_end_time = -1,
+                .next_packet_delay = -1,
+            };
+
+            // The packet was delievered and its latency is known (we were notified)
+            if (!s->sent_packets_latency.empty()
+                    && start.packet_ID == s->sent_packets_latency.top().packet_ID)
+            {
+                auto const latency_q = s->sent_packets_latency.top();
+                s->sent_packets_latency.pop();
+
+                end.travel_end_time = latency_q.value;
+
+                if (s->sent_packets.size() >= 2) {
+                    end.next_packet_delay = s->sent_packets[1].processing_packet_delay;
+                }
+
+                packet_latency_save_to_file(s->terminal_id, start, end, false, false);
+            }
+            // The packet has not been delievered yet (that we know of)
+            else {
+                packet_latency_save_to_file(s->terminal_id, start, end, false, false);
+            }
+
+            // Deallocating memory from packet_start
+            if (start.message_data) { free(start.message_data); }
+            if (start.remote_event_data) { free(start.remote_event_data); }
+        }
+    }
 
     //if(s->packet_gen != s->packet_fin)
     //    printf("\n generated %d finished %d ", s->packet_gen, s->packet_fin);
@@ -5298,6 +5477,7 @@ static void dragonfly_dally_terminal_final( terminal_state * s,
     }
     s->sent_packets.~deque();
     s->sent_packets_latency.~priority_queue();
+    s->remaining_sz_packets.~map();
 
     if (s->predictor_data) {
         free(s->predictor_data);
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index d5f4aac4..d9f26b71 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -41,7 +41,7 @@ struct aggregated_latency_one_terminal {
 };
 
 struct latency_surrogate {
-    double sum_delay_at_queue_head_next;
+    double sum_next_packet_delay;
     struct aggregated_latency_one_terminal aggregated_latency_for_all;
     unsigned int num_terminals;
     struct aggregated_latency_one_terminal aggregated_latency[];
@@ -57,7 +57,7 @@ static void init_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int
     assert(data->aggregated_latency[0].total_msgs == 0);
 
     data->num_terminals = surr_config.total_terminals;
-    data->sum_delay_at_queue_head_next = 0;
+    data->sum_next_packet_delay = 0;
 }
 
 static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * start, struct packet_end const * end) {
@@ -71,6 +71,7 @@ static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int
     unsigned int const dest_terminal = start->dfdally_dest_terminal_id;
     double const latency = end->travel_end_time - start->travel_start_time;
     assert(dest_terminal < data->num_terminals);
+    assert(end->travel_end_time > start->travel_start_time);
 
     data->aggregated_latency[dest_terminal].sum_latency += latency;
     data->aggregated_latency[dest_terminal].total_msgs++;
@@ -78,7 +79,7 @@ static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int
     data->aggregated_latency_for_all.sum_latency += latency;
     data->aggregated_latency_for_all.total_msgs++;
 
-    data->sum_delay_at_queue_head_next += end->delay_at_queue_head_next;
+    data->sum_next_packet_delay += end->next_packet_delay;
 }
 
 static struct packet_end predict_latency(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * packet_dest) {
@@ -93,7 +94,7 @@ static struct packet_end predict_latency(struct latency_surrogate * data, tw_lp
         tw_error(TW_LOC, "Terminal %u doesn't have any packet delay information available to predict future packet latency!\n", src_terminal);
         return (struct packet_end) {
             .travel_end_time = -1.0,
-            .delay_at_queue_head_next = -1.0,
+            .next_packet_delay = -1.0,
         };
     }
 
@@ -106,11 +107,12 @@ static struct packet_end predict_latency(struct latency_surrogate * data, tw_lp
         // If no information for that terminal exists, use average from all message
         latency = data->aggregated_latency_for_all.sum_latency / total_total_datapoints;
     }
+    assert(latency >= 0);
 
-    double const delay_at_queue_head_next = data->sum_delay_at_queue_head_next / total_total_datapoints;
+    double const next_packet_delay = data->sum_next_packet_delay / total_total_datapoints;
     return (struct packet_end) {
         .travel_end_time = packet_dest->travel_start_time + latency,
-        .delay_at_queue_head_next = delay_at_queue_head_next,
+        .next_packet_delay = next_packet_delay,
     };
 }
 
@@ -468,7 +470,9 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_stime gvt) {
     }
 
     // This will force a global update on all the new remote events (instead of waiting until the next GVT cycle to update events to process)
-    rollback_and_cancel_events_pe(pe, gvt);
+    if (g_tw_synchronization_protocol == OPTIMISTIC) {
+        rollback_and_cancel_events_pe(pe, gvt);
+    }
 
     assert(lps_events[0] != NULL);
     free(lps_events[0]);

From d78644ad134364623473648c02bbd3ad5647362a Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 25 Jul 2023 19:10:41 -0400
Subject: [PATCH 034/188] Extending unit tests to include several cases for
 ping-pong workload

It has not been tested to work with autoconf
---
 .gitignore                                    |  5 +-
 doc/example/CMakeLists.txt                    | 16 +++++
 doc/example/example.conf                      |  2 +-
 ...f => tutorial-ping-pong-surrogate.conf.in} | 24 ++++---
 doc/example/tutorial-ping-pong.conf           | 55 ----------------
 doc/example/tutorial-ping-pong.conf.in        | 12 ++--
 src/networks/model-net/dragonfly-dally.C      |  7 +--
 tests/CMakeLists.txt                          | 18 +++---
 tests/conf/modelnet-p2p-bw-loggp.conf         |  2 +-
 tests/conf/modelnet-prio-sched-test.conf      |  2 +-
 tests/conf/modelnet-test-dragonfly.conf       |  2 +-
 tests/conf/modelnet-test-loggp.conf           |  2 +-
 tests/conf/modelnet-test-simplep2p.conf       |  2 +-
 tests/conf/modelnet-test-slimfly.conf         |  2 +-
 tests/conf/modelnet-test-torus.conf           |  2 +-
 tests/conf/modelnet-test.conf                 |  2 +-
 tests/example-ping-pong-determinism.sh        | 39 ++++++++++++
 tests/example-ping-pong-no-logging.sh         | 25 ++++++++
 tests/example-ping-pong-surrogate-1.sh        | 61 ++++++++++++++++++
 tests/example-ping-pong-surrogate-2.sh        | 61 ++++++++++++++++++
 tests/example-ping-pong-surrogate-3.sh        | 62 +++++++++++++++++++
 tests/jobmap-test.sh                          |  6 +-
 tests/lp-io-test.sh                           |  6 +-
 tests/lsm-test.sh                             |  6 +-
 tests/map-ctx-test.sh                         |  6 +-
 tests/mapping_test.sh                         | 11 +++-
 tests/modelnet-p2p-bw-loggp.sh                |  6 +-
 tests/modelnet-prio-sched-test.sh             |  8 ++-
 tests/modelnet-simplep2p-test.sh              |  6 +-
 tests/modelnet-test-dragonfly-synthetic.sh    |  6 +-
 tests/modelnet-test-dragonfly.sh              |  6 +-
 tests/modelnet-test-em.sh                     |  6 +-
 tests/modelnet-test-fattree-synthetic.sh      |  6 +-
 tests/modelnet-test-loggp.sh                  |  6 +-
 tests/modelnet-test-slimfly-synthetic.sh      |  6 +-
 tests/modelnet-test-slimfly.sh                |  6 +-
 tests/modelnet-test-torus.sh                  |  5 +-
 tests/modelnet-test.sh                        |  6 +-
 tests/rc-stack-test.sh                        |  6 +-
 tests/resource-test.sh                        |  6 +-
 tests/run-test.sh.in                          | 33 ++++++++++
 tests/workload/codes-workload-test.sh         |  6 +-
 42 files changed, 440 insertions(+), 122 deletions(-)
 rename doc/example/{tutorial-ping-pong-surrogate.conf => tutorial-ping-pong-surrogate.conf.in} (51%)
 delete mode 100644 doc/example/tutorial-ping-pong.conf
 create mode 100755 tests/example-ping-pong-determinism.sh
 create mode 100755 tests/example-ping-pong-no-logging.sh
 create mode 100755 tests/example-ping-pong-surrogate-1.sh
 create mode 100755 tests/example-ping-pong-surrogate-2.sh
 create mode 100755 tests/example-ping-pong-surrogate-3.sh

diff --git a/.gitignore b/.gitignore
index 4d3a6923..51a9c2eb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,4 +37,7 @@
 # generated files from test runs
 ross.csv
 
-install-mastiff/include/codes/model-net-method.h
\ No newline at end of file
+install-mastiff/include/codes/model-net-method.h
+
+# commonly used building folder
+/build/
diff --git a/doc/example/CMakeLists.txt b/doc/example/CMakeLists.txt
index c3f00579..49451d91 100644
--- a/doc/example/CMakeLists.txt
+++ b/doc/example/CMakeLists.txt
@@ -7,3 +7,19 @@ foreach(namefile ${example-files})
     add_executable(${namefile} ${namefile}.c)
     target_link_libraries(${namefile} PUBLIC codes)
 endforeach()
+
+# Saving default config files to run experiments with
+configure_file(tutorial-ping-pong.conf.in tutorial-ping-pong.template.conf.in @ONLY)
+configure_file(tutorial-ping-pong-surrogate.conf.in tutorial-ping-pong-surrogate.template.conf.in @ONLY)
+
+set(single_quote "'")
+set(double_quote "\"")
+
+set(PACKET_SIZE "4096")
+set(CHUNK_SIZE "64")
+set(NETWORK_TREATMENT "freeze")
+set(PACKET_LATENCY_TRACE_PATH "packet-latency-trace/")
+set(IGNORE_UNTIL "200e4")
+string(REPLACE ${single_quote} ${double_quote} SWITCH_TIMESTAMPS "'1000e4', '8900e4'")
+configure_file(tutorial-ping-pong.conf.in tutorial-ping-pong.conf)
+configure_file(tutorial-ping-pong-surrogate.conf.in tutorial-ping-pong-surrogate.conf)
diff --git a/doc/example/example.conf b/doc/example/example.conf
index 161ab626..5f739647 100644
--- a/doc/example/example.conf
+++ b/doc/example/example.conf
@@ -24,7 +24,7 @@ PARAMS
    # - message_size: ROSS expects you to upper bound your event message size.
    #                 Going over this size will crash or otherwise destroy your 
    #                 simulation.
-   message_size="368";
+   message_size="432";
    # - pe_mem_factor: this is a multiplier to the event memory allocation that
    #                  ROSS does up front (multiplier is per-PE). Increase this 
    #                  (or change the associated mem_factor variable in
diff --git a/doc/example/tutorial-ping-pong-surrogate.conf b/doc/example/tutorial-ping-pong-surrogate.conf.in
similarity index 51%
rename from doc/example/tutorial-ping-pong-surrogate.conf
rename to doc/example/tutorial-ping-pong-surrogate.conf.in
index d1a2937c..69bfaa96 100644
--- a/doc/example/tutorial-ping-pong-surrogate.conf
+++ b/doc/example/tutorial-ping-pong-surrogate.conf.in
@@ -1,6 +1,6 @@
 # Run this example with:
 # > cd path-to-codes/build
-# > mpirun -np 2 doc/example/tutorial-synthetic-ping-pong --synch=3 --num_messages=10000 --lp-io-dir=codes-output -- ../doc/example/tutorial-ping-pong-surrogate.conf
+# > mpirun -np 3 doc/example/tutorial-synthetic-ping-pong --synch=3 --num_messages=10000 --lp-io-dir=codes-output -- doc/example/tutorial-ping-pong-surrogate.conf
 LPGROUPS
 {
    MODELNET_GRP
@@ -16,13 +16,13 @@ LPGROUPS
 PARAMS
 {
 # packet size in the network
-   packet_size="4096";
+   packet_size="${PACKET_SIZE}";
    modelnet_order=( "dragonfly_dally","dragonfly_dally_router" );
    # scheduler options
    modelnet_scheduler="fcfs";
 # chunk size in the network (when chunk size = packet size, packets will not be
 # divided into chunks)
-   chunk_size="64";
+   chunk_size="${CHUNK_SIZE}";
 # modelnet_scheduler="round-robin";
 # number of routers in group
    num_routers="4";
@@ -41,22 +41,22 @@ PARAMS
 # bandwidth in GiB/s for compute node-router channels
    cn_bandwidth="2.0";
 # ROSS message size
-   message_size="736";
+   message_size="408";
 # number of compute nodes connected to router, dictated by dragonfly config
 # file
    num_cns_per_router="2";
 # number of global channels per router
    num_global_channels="2";
 # network config file for intra-group connections
-   intra-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally-72-intra";
+   intra-group-connections="@CMAKE_SOURCE_DIR@/src/network-workloads/conf/dragonfly-dally/dfdally-72-intra";
 # network config file for inter-group connections
-   inter-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally-72-inter";
+   inter-group-connections="@CMAKE_SOURCE_DIR@/src/network-workloads/conf/dragonfly-dally/dfdally-72-inter";
 # routing protocol to be used
    routing="prog-adaptive";
 # folder path to store packet latency from terminal to terminal, if no value is given it won't save anything
-   save_packet_latency_path="packet-latency-trace/";
+   save_packet_latency_path="${PACKET_LATENCY_TRACE_PATH}";
 # router buffer occupancy snapshots
-   router_buffer_snapshots=( "50e4", "60e4" );
+   router_buffer_snapshots=( "100e3", "200e3", "300e3", "400e3", "500e3", "600e3", "700e3", "800e3", "900e3", "1e6", "1.1e6", "1.2e6", "1.3e6", "1.4e6", "1.5e6", "1.6e6", "1.7e6", "1.8e6", "1.9e6", "2e6", "2.1e6", "2.2e6", "2.3e6", "2.4e6", "2.5e6", "2.6e6", "2.7e6", "2.8e6", "2.9e6", "3e6", "3.1e6", "3.2e6", "3.3e6", "3.4e6", "3.5e6", "3.6e6", "3.7e6", "3.8e6", "3.9e6", "4e6", "4.1e6", "4.2e6", "4.3e6", "4.4e6", "4.5e6", "4.6e6", "4.7e6", "4.8e6", "4.9e6", "5e6", "5.1e6", "5.2e6", "5.3e6", "5.4e6", "5.5e6", "5.6e6", "5.7e6", "5.8e6", "5.9e6", "6e6", "6.1e6", "6.2e6", "6.3e6", "6.4e6", "6.5e6", "6.6e6", "6.7e6", "6.8e6", "6.9e6", "7e6", "7.1e6", "7.2e6", "7.3e6", "7.4e6", "7.5e6", "7.6e6", "7.7e6", "7.8e6", "7.9e6", "8e6", "8.1e6", "8.2e6", "8.3e6", "8.4e6", "8.5e6", "8.6e6", "8.7e6", "8.8e6", "8.9e6", "9e6", "9.1e6", "9.2e6", "9.3e6", "9.4e6", "9.5e6", "9.6e6", "9.7e6", "9.8e6", "9.9e6", "9.990e6" );
 }
 SURROGATE {
 # determines the director switching from surrogate to high-def simulation strategy
@@ -65,8 +65,14 @@ SURROGATE {
 # director configuration for: director_mode == "at-fixed-virtual-times"
 # timestamps at which to switch to surrogate-mode and back
    #fixed_switch_timestamps=( "100e4", "8900e4" );  # the first switch happens at around 100 ping messages, the second at approx. 9900 pings
-   fixed_switch_timestamps=( "1000e4", "8900e4" );  # the first switch happens at around 1000 ping messages, the second at approx. 9900 pings
+   #fixed_switch_timestamps=( "1000e4", "8900e4" );  # the first switch happens at around 1000 ping messages, the second at approx. 9900 pings
+   fixed_switch_timestamps=( ${SWITCH_TIMESTAMPS} );
 
 # latency predictor to use
    packet_latency_predictor="average";
+# some workload models need some time to stabilize, a point where the network behaviour stabilizes. The predictor will ignore all packet latencies that arrive during this period
+   ignore_until="${IGNORE_UNTIL}";
+
+# selecting network treatment on switching to surrogate. Options: frezee, nothing
+   network_treatment_on_switch="${NETWORK_TREATMENT}";
 }
diff --git a/doc/example/tutorial-ping-pong.conf b/doc/example/tutorial-ping-pong.conf
deleted file mode 100644
index 3aabd871..00000000
--- a/doc/example/tutorial-ping-pong.conf
+++ /dev/null
@@ -1,55 +0,0 @@
-LPGROUPS
-{
-   MODELNET_GRP
-   {
-      repetitions="36";
-# name of this lp changes according to the model
-      nw-lp="2";
-# these lp names will be the same for dragonfly-custom model
-      modelnet_dragonfly_dally="2";
-      modelnet_dragonfly_dally_router="1";
-   }
-}
-PARAMS
-{
-# packet size in the network
-   packet_size="4096";
-   modelnet_order=( "dragonfly_dally","dragonfly_dally_router" );
-   # scheduler options
-   modelnet_scheduler="fcfs";
-# chunk size in the network (when chunk size = packet size, packets will not be
-# divided into chunks)
-   chunk_size="64";
-# modelnet_scheduler="round-robin";
-# number of routers in group
-   num_routers="4";
-# number of groups in the network
-   num_groups="9";
-# buffer size in bytes for local virtual channels
-   local_vc_size="16384";
-#buffer size in bytes for global virtual channels
-   global_vc_size="16384";
-#buffer size in bytes for compute node virtual channels
-   cn_vc_size="32768";
-#bandwidth in GiB/s for local channels
-   local_bandwidth="2.0";
-# bandwidth in GiB/s for global channels
-   global_bandwidth="2.0";
-# bandwidth in GiB/s for compute node-router channels
-   cn_bandwidth="2.0";
-# ROSS message size
-   message_size="736";
-# number of compute nodes connected to router, dictated by dragonfly config
-# file
-   num_cns_per_router="2";
-# number of global channels per router
-   num_global_channels="2";
-# network config file for intra-group connections
-   intra-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally-72-intra";
-# network config file for inter-group connections
-   inter-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally-72-inter";
-# routing protocol to be used
-   routing="prog-adaptive";
-# router buffer occupancy snapshots
-   router_buffer_snapshots=( "50e4", "60e4" );
-}
diff --git a/doc/example/tutorial-ping-pong.conf.in b/doc/example/tutorial-ping-pong.conf.in
index f44acfb7..507094f5 100644
--- a/doc/example/tutorial-ping-pong.conf.in
+++ b/doc/example/tutorial-ping-pong.conf.in
@@ -13,13 +13,13 @@ LPGROUPS
 PARAMS
 {
 # packet size in the network
-   packet_size="4096";
+   packet_size="${PACKET_SIZE}";
    modelnet_order=( "dragonfly_dally","dragonfly_dally_router" );
    # scheduler options
    modelnet_scheduler="fcfs";
 # chunk size in the network (when chunk size = packet size, packets will not be
 # divided into chunks)
-   chunk_size="64";
+   chunk_size="${CHUNK_SIZE}";
 # modelnet_scheduler="round-robin";
 # number of routers in group
    num_routers="4";
@@ -38,18 +38,20 @@ PARAMS
 # bandwidth in GiB/s for compute node-router channels
    cn_bandwidth="2.0";
 # ROSS message size
-   message_size="736";
+   message_size="408";
 # number of compute nodes connected to router, dictated by dragonfly config
 # file
    num_cns_per_router="2";
 # number of global channels per router
    num_global_channels="2";
 # network config file for intra-group connections
-   intra-group-connections="@abs_srcdir@/../../src/network-workloads/conf/dragonfly-dally/dfdally-72-intra";
+   intra-group-connections="@CMAKE_SOURCE_DIR@/src/network-workloads/conf/dragonfly-dally/dfdally-72-intra";
 # network config file for inter-group connections
-   inter-group-connections="@abs_srcdir@/../../src/network-workloads/conf/dragonfly-dally/dfdally-72-inter";
+   inter-group-connections="@CMAKE_SOURCE_DIR@/src/network-workloads/conf/dragonfly-dally/dfdally-72-inter";
 # routing protocol to be used
    routing="prog-adaptive";
 # router buffer occupancy snapshots
    router_buffer_snapshots=( "50e4", "60e4" );
+# folder path to store packet latency from terminal to terminal, if no value is given it won't save anything
+   save_packet_latency_path="${PACKET_LATENCY_TRACE_PATH}";
 }
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 9aa47fe1..484dd7ba 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -2817,13 +2817,14 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess
     return -1;
 }
 
-static void packet_latency_save_to_file(
+static inline void packet_latency_save_to_file(
         unsigned int terminal_id,
         struct packet_start start,
         struct packet_end end,
         bool surrogate_on,
         bool is_predicted
 ) {
+    if (!packet_latency_f) { return; } // Don't save if there isn't a file to save to
     if (end.travel_end_time > g_tw_ts_end) { return; } // This packet could never arrive to its destination!
     fprintf(packet_latency_f, "%u,%u,%lu,%d,%d,%u,%f,%f,%f,%f,%f\n",
             terminal_id, start.dfdally_dest_terminal_id, start.packet_ID,
@@ -2857,9 +2858,7 @@ static void process_packet_latencies(terminal_state * s, tw_lp * lp)
             .travel_end_time = s->sent_packets_latency.top().value,
             .next_packet_delay = next_packet_delay,
         };
-        if (packet_latency_f) {
-            packet_latency_save_to_file(s->terminal_id, start, end, is_surrogate_on, false);
-        }
+        packet_latency_save_to_file(s->terminal_id, start, end, is_surrogate_on, false);
         if (surrogate_configured && !is_surrogate_on) {
             assert(terminal_predictor != NULL);
             terminal_predictor->feed(s->predictor_data, lp, s->terminal_id, &start, &end);
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 92e38b9b..b196c823 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -4,13 +4,12 @@ configure_file(run-test.sh.in run-test.sh)
 
 include_directories("${ROSS_INCLUDE_DIRS}" "${CODES_SOURCE_DIR}")
 
-# Unfortunatelly, CMake doesn't support iteration of a key-pair
-# structure, otherwise the following lists could be easily
-# compressed into a single list/dictionary/structure. Instead
-# each C file name MUST match each binary file name. This would
-# be handled differently if maintaining the autoconfig build
-# was not a MUST (in which case, the name of the file and its
-# binary should match, leaving us with a single list!)
+# Unfortunatelly, CMake doesn't support iteration of a key-pair structure,
+# otherwise the following lists could be easily compressed into a single
+# list/dictionary/structure. Instead each C file name **MUST** match each
+# binary file name. This would be handled differently if maintaining the
+# autoconfig build was not a **MUST** (in which case, the name of the file and
+# its binary should match, leaving us with a single list!)
 set(test-c-files
     lp-io-test.c
     mapping_test.c
@@ -87,6 +86,11 @@ set(test-shell-files
     modelnet-test-fattree-synthetic.sh
     modelnet-test-slimfly-synthetic.sh
     workload/codes-workload-test.sh
+    example-ping-pong-determinism.sh
+    example-ping-pong-surrogate-1.sh
+    example-ping-pong-surrogate-2.sh
+    example-ping-pong-surrogate-3.sh
+    example-ping-pong-no-logging.sh
     )
 
 foreach(testname ${test-shell-files})
diff --git a/tests/conf/modelnet-p2p-bw-loggp.conf b/tests/conf/modelnet-p2p-bw-loggp.conf
index 93da757b..54071dc0 100644
--- a/tests/conf/modelnet-p2p-bw-loggp.conf
+++ b/tests/conf/modelnet-p2p-bw-loggp.conf
@@ -10,7 +10,7 @@ LPGROUPS
 PARAMS
 {
    packet_size="2147483648";
-   message_size="384";
+   message_size="416";
    modelnet_order=( "loggp" );
    # scheduler options
    modelnet_scheduler="fcfs";
diff --git a/tests/conf/modelnet-prio-sched-test.conf b/tests/conf/modelnet-prio-sched-test.conf
index b6cdd9b2..ff90e99d 100644
--- a/tests/conf/modelnet-prio-sched-test.conf
+++ b/tests/conf/modelnet-prio-sched-test.conf
@@ -10,7 +10,7 @@ LPGROUPS
 PARAMS
 {
    packet_size="512";
-   message_size="416";
+   message_size="448";
    modelnet_order=( "simplenet" );
    # scheduler options
    modelnet_scheduler="priority";
diff --git a/tests/conf/modelnet-test-dragonfly.conf b/tests/conf/modelnet-test-dragonfly.conf
index 3c7e9a3b..3afe172d 100644
--- a/tests/conf/modelnet-test-dragonfly.conf
+++ b/tests/conf/modelnet-test-dragonfly.conf
@@ -23,6 +23,6 @@ PARAMS
    local_bandwidth="5.25";
    global_bandwidth="4.7";
    cn_bandwidth="5.25";
-   message_size="400";
+   message_size="432";
    routing="nonminimal";
 }
diff --git a/tests/conf/modelnet-test-loggp.conf b/tests/conf/modelnet-test-loggp.conf
index 139641fb..eb3bc675 100644
--- a/tests/conf/modelnet-test-loggp.conf
+++ b/tests/conf/modelnet-test-loggp.conf
@@ -9,7 +9,7 @@ LPGROUPS
 }
 PARAMS
 {
-   message_size="400";
+   message_size="432";
    modelnet_order=( "loggp" );
    # scheduler options
    modelnet_scheduler="fcfs-full";
diff --git a/tests/conf/modelnet-test-simplep2p.conf b/tests/conf/modelnet-test-simplep2p.conf
index 9907aabd..168c219c 100644
--- a/tests/conf/modelnet-test-simplep2p.conf
+++ b/tests/conf/modelnet-test-simplep2p.conf
@@ -9,7 +9,7 @@ LPGROUPS
 }
 PARAMS
 {
-    message_size="400";
+    message_size="432";
     packet_size="1024";
     modelnet_order=("simplep2p");
     # scheduler options
diff --git a/tests/conf/modelnet-test-slimfly.conf b/tests/conf/modelnet-test-slimfly.conf
index 7d01910b..5b76af9b 100644
--- a/tests/conf/modelnet-test-slimfly.conf
+++ b/tests/conf/modelnet-test-slimfly.conf
@@ -30,6 +30,6 @@ PARAMS
    global_bandwidth="9.0";
    cn_bandwidth="9.0";
    link_delay = "0";
-   message_size="400";
+   message_size="432";
    routing="minimal";
 }
diff --git a/tests/conf/modelnet-test-torus.conf b/tests/conf/modelnet-test-torus.conf
index 2d8d6cdf..798bf39e 100644
--- a/tests/conf/modelnet-test-torus.conf
+++ b/tests/conf/modelnet-test-torus.conf
@@ -14,7 +14,7 @@ PARAMS
    # scheduler options
    modelnet_scheduler="fcfs";
    # modelnet_scheduler="round-robin";
-   message_size="400";
+   message_size="432";
    n_dims="3";
    dim_length="4,2,2";
    link_bandwidth="2.0";
diff --git a/tests/conf/modelnet-test.conf b/tests/conf/modelnet-test.conf
index 938b9fe2..84c65289 100644
--- a/tests/conf/modelnet-test.conf
+++ b/tests/conf/modelnet-test.conf
@@ -10,7 +10,7 @@ LPGROUPS
 PARAMS
 {
    packet_size="512";
-   message_size="400";
+   message_size="432";
    modelnet_order=( "simplenet" );
    # scheduler options
    modelnet_scheduler="fcfs";
diff --git a/tests/example-ping-pong-determinism.sh b/tests/example-ping-pong-determinism.sh
new file mode 100755
index 00000000..58c2c973
--- /dev/null
+++ b/tests/example-ping-pong-determinism.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+if [[ -z $bindir ]] ; then
+    echo bindir variable not set
+    exit 1
+fi
+
+# Running simulation twice with the same parameters
+
+mpirun -np 3 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=3 \
+    --num_messages=10 --payload_sz=8192 \
+    -- "$bindir/doc/example/tutorial-ping-pong.conf" \
+    > model-output-1.txt 2> model-output-1-error.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+mpirun -np 3 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=3 \
+    --num_messages=10 --payload_sz=8192 \
+    -- "$bindir/doc/example/tutorial-ping-pong.conf" \
+    > model-output-2.txt 2> model-output-2-error.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# Checking that there is actual output
+grep 'Net Events Processed' model-output-1.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# This checks for the number of events processed. If they are different, then
+# the simulation is not deterministic (so this should fail!). As always, just
+# a unit test
+diff <(grep 'Net Events Processed' model-output-1.txt) \
+    <(grep 'Net Events Processed' model-output-2.txt)
+err=$?
+if [[ $err -ne 0 ]]; then
+    >&2 echo "The number of net events processed does not coincide, ie," \
+        "the simulation is not deterministic"
+    exit $err
+fi
diff --git a/tests/example-ping-pong-no-logging.sh b/tests/example-ping-pong-no-logging.sh
new file mode 100755
index 00000000..0fb0be8d
--- /dev/null
+++ b/tests/example-ping-pong-no-logging.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Test: checking simulation runs without any problem when "packet latency path" is not given
+
+if [[ -z $bindir ]] ; then
+    echo bindir variable not set
+    exit 1
+fi
+
+# Configuring surrogate instance
+export PACKET_SIZE=4096
+export CHUNK_SIZE=4096
+export PACKET_LATENCY_TRACE_PATH=
+cat "$bindir/doc/example"/tutorial-ping-pong.template.conf.in | envsubst > tutorial-ping-pong.conf
+
+# Running simulation twice with the same parameters
+
+mpirun -np 1 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=1 \
+    --num_messages=10 --payload_sz=4096 \
+    -- tutorial-ping-pong.conf \
+    > model-output-1.txt 2> model-output-1-error.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# Checking that there is actual output
+grep 'Net Events Processed' model-output-1.txt
diff --git a/tests/example-ping-pong-surrogate-1.sh b/tests/example-ping-pong-surrogate-1.sh
new file mode 100755
index 00000000..4e1299e6
--- /dev/null
+++ b/tests/example-ping-pong-surrogate-1.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+# Test: checking whether surrogate mode behaves the same as high-fidelity
+# Should take at most 1 minute to run
+
+if [[ -z $bindir ]] ; then
+    echo bindir variable not set
+    exit 1
+fi
+
+# Configuring surrogate instance
+export PACKET_SIZE=4096
+export CHUNK_SIZE=64
+export NETWORK_TREATMENT=freeze
+export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate/
+export IGNORE_UNTIL=0.0
+export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
+cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf
+
+export PACKET_LATENCY_TRACE_PATH=packet-latency-highdef/
+cat "$bindir/doc/example"/tutorial-ping-pong.template.conf.in | envsubst > tutorial-ping-pong.conf
+
+# Running simulation twice with the same parameters
+
+mpirun -np 1 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=1 \
+    --num_messages=10 --payload_sz=16320 \
+    -- tutorial-ping-pong.conf \
+    > model-output-1.txt 2> model-output-1-error.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+mpirun -np 1 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=1 \
+    --num_messages=10 --payload_sz=16320 \
+    -- tutorial-ping-pong-surrogate.conf \
+    > model-output-2.txt 2> model-output-2-error.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# Checking that there is actual output
+grep 'Net Events Processed' model-output-1.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# This checks for the number of events processed. If they are different, then
+# the simulation is not deterministic (so this should fail!). As always, just
+# a unit test
+to_remove_from_output=' sent [0-9]* bytes in [0-9.]* seconds'
+diff <(grep "Sever LPID:" model-output-1.txt | sed "s/${to_remove_from_output}//") \
+     <(grep "Sever LPID:" model-output-2.txt | sed "s/${to_remove_from_output}//")
+err=$?
+if [[ $err -ne 0 ]]; then
+    >&2 echo "The surrogate gave different results from high-fidelity"
+    exit $err
+fi
+
+# This checks for an equal number of packets transmitted
+diff <(packet-latency-surrogate/*.txt | wc -l) <(packet-latency-highdef/*.txt | wc -l)
+err=$?
+if [[ $err -ne 0 ]]; then
+    >&2 echo "The surrogate gave different results from high-fidelity on the number of packets transmitted"
+    exit $err
+fi
diff --git a/tests/example-ping-pong-surrogate-2.sh b/tests/example-ping-pong-surrogate-2.sh
new file mode 100755
index 00000000..fb829993
--- /dev/null
+++ b/tests/example-ping-pong-surrogate-2.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+# Test: checking whether surrogate mode behaves the same as high-fidelity
+# Should take at most 1 minute to run
+
+if [[ -z $bindir ]] ; then
+    echo bindir variable not set
+    exit 1
+fi
+
+# Configuring surrogate instance
+export PACKET_SIZE=128
+export CHUNK_SIZE=64
+export NETWORK_TREATMENT=freeze
+export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate/
+export IGNORE_UNTIL=0.0
+export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
+cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf
+
+export PACKET_LATENCY_TRACE_PATH=packet-latency-highdef/
+cat "$bindir/doc/example"/tutorial-ping-pong.template.conf.in | envsubst > tutorial-ping-pong.conf
+
+# Running simulation twice with the same parameters
+
+mpirun -np 1 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=1 \
+    --num_messages=10 --payload_sz=16320 \
+    -- tutorial-ping-pong.conf \
+    > model-output-1.txt 2> model-output-1-error.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+mpirun -np 1 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=1 \
+    --num_messages=10 --payload_sz=16320 \
+    -- tutorial-ping-pong-surrogate.conf \
+    > model-output-2.txt 2> model-output-2-error.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# Checking that there is actual output
+grep 'Net Events Processed' model-output-1.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# This checks for the number of events processed. If they are different, then
+# the simulation is not deterministic (so this should fail!). As always, just
+# a unit test
+to_remove_from_output=' sent [0-9]* bytes in [0-9.]* seconds'
+diff <(grep "Sever LPID:" model-output-1.txt | sed "s/${to_remove_from_output}//") \
+     <(grep "Sever LPID:" model-output-2.txt | sed "s/${to_remove_from_output}//")
+err=$?
+if [[ $err -ne 0 ]]; then
+    >&2 echo "The surrogate gave different results from high-fidelity"
+    exit $err
+fi
+
+# This checks for an equal number of packets transmitted
+diff <(packet-latency-surrogate/*.txt | wc -l) <(packet-latency-highdef/*.txt | wc -l)
+err=$?
+if [[ $err -ne 0 ]]; then
+    >&2 echo "The surrogate gave different results from high-fidelity on the number of packets transmitted"
+    exit $err
+fi
diff --git a/tests/example-ping-pong-surrogate-3.sh b/tests/example-ping-pong-surrogate-3.sh
new file mode 100755
index 00000000..9c024e6e
--- /dev/null
+++ b/tests/example-ping-pong-surrogate-3.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# Test: checking whether freezing the network works equally well as not freezing the network (in terms of packets processed)
+# Should take at most 1 minute to run
+
+if [[ -z $bindir ]] ; then
+    echo bindir variable not set
+    exit 1
+fi
+
+# Configuring surrogate instance
+export PACKET_SIZE=128
+export CHUNK_SIZE=64
+export NETWORK_TREATMENT=freeze
+export PACKET_LATENCY_TRACE_PATH=packet-latency-freeze/
+export IGNORE_UNTIL=0.0
+export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
+cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf
+
+export NETWORK_TREATMENT=nothing
+export PACKET_LATENCY_TRACE_PATH=packet-latency-non-freeze/
+cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-non-freeze.conf
+
+# Running simulation twice with the same parameters
+
+mpirun -np 1 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=1 \
+    --num_messages=10 --payload_sz=16320 \
+    -- tutorial-ping-pong-surrogate.conf \
+    > model-output-1.txt 2> model-output-1-error.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+mpirun -np 1 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=1 \
+    --num_messages=10 --payload_sz=16320 \
+    -- tutorial-ping-pong-surrogate-non-freeze.conf \
+    > model-output-2.txt 2> model-output-2-error.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# Checking that there is actual output
+grep 'Net Events Processed' model-output-1.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# This checks for the number of events processed. If they are different, then
+# the simulation is not deterministic (so this should fail!). As always, just
+# a unit test
+to_remove_from_output=' sent [0-9]* bytes in [0-9.]* seconds'
+diff <(grep "Sever LPID:" model-output-1.txt | sed "s/${to_remove_from_output}//") \
+     <(grep "Sever LPID:" model-output-2.txt | sed "s/${to_remove_from_output}//")
+err=$?
+if [[ $err -ne 0 ]]; then
+    >&2 echo "Freezing the network leads to a different result than not doing it"
+    exit $err
+fi
+
+# This checks for an equal number of packets transmitted
+diff <(packet-latency-freeze/*.txt | wc -l) <(packet-latency-non-freeze/*.txt | wc -l)
+err=$?
+if [[ $err -ne 0 ]]; then
+    >&2 echo "The two modes (freezing and not) are processing a different number of packets"
+    exit $err
+fi
diff --git a/tests/jobmap-test.sh b/tests/jobmap-test.sh
index 21ee0c45..a56e1698 100755
--- a/tests/jobmap-test.sh
+++ b/tests/jobmap-test.sh
@@ -5,4 +5,8 @@ if [[ -z $srcdir ]] ; then
     exit 1
 fi
 
-tests/jobmap-test $srcdir/tests/conf/jobmap-test-list.conf
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bindir=.
+fi
+
+"$bindir"/tests/jobmap-test "$srcdir"/tests/conf/jobmap-test-list.conf
diff --git a/tests/lp-io-test.sh b/tests/lp-io-test.sh
index db932e46..da7f52d5 100755
--- a/tests/lp-io-test.sh
+++ b/tests/lp-io-test.sh
@@ -1,3 +1,7 @@
 #!/bin/bash
 
-tests/lp-io-test --sync=1
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bindir=.
+fi
+
+"$bindir"/tests/lp-io-test --sync=1
diff --git a/tests/lsm-test.sh b/tests/lsm-test.sh
index b703970b..6b4c2afb 100755
--- a/tests/lsm-test.sh
+++ b/tests/lsm-test.sh
@@ -5,4 +5,8 @@ if [ -z $srcdir ]; then
     exit 1
 fi
 
-tests/lsm-test --sync=1 --conf=$srcdir/tests/conf/lsm-test.conf
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bindir=.
+fi
+
+"$bindir"/tests/lsm-test --sync=1 --conf="$srcdir"/tests/conf/lsm-test.conf
diff --git a/tests/map-ctx-test.sh b/tests/map-ctx-test.sh
index 8fd7dd0d..db73071d 100755
--- a/tests/map-ctx-test.sh
+++ b/tests/map-ctx-test.sh
@@ -5,4 +5,8 @@ if [[ -z $srcdir ]] ; then
     exit 1
 fi
 
-tests/map-ctx-test $srcdir/tests/conf/map-ctx-test.conf
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bindir=.
+fi
+
+"$bindir"/tests/map-ctx-test "$srcdir"/tests/conf/map-ctx-test.conf
diff --git a/tests/mapping_test.sh b/tests/mapping_test.sh
index 60d233ff..5d97a70f 100755
--- a/tests/mapping_test.sh
+++ b/tests/mapping_test.sh
@@ -1,12 +1,17 @@
 #!/bin/bash
 
-tst=$srcdir/tests
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bindir=.
+fi
+
+tst="$srcdir/tests"
 set -e
-tests/mapping_test --sync=1 --codes-config=$tst/conf/mapping_test.conf \
+
+"$bindir"/tests/mapping_test --sync=1 --codes-config="$tst"/conf/mapping_test.conf \
     2> mapping_test.err \
     1| grep TEST > mapping_test.out
 
-diff $tst/expected/mapping_test.out mapping_test.out
+diff "$tst"/expected/mapping_test.out mapping_test.out
 err=$?
 
 if [ -s mapping_test.err ] ; then
diff --git a/tests/modelnet-p2p-bw-loggp.sh b/tests/modelnet-p2p-bw-loggp.sh
index 8fc150ec..3850a260 100755
--- a/tests/modelnet-p2p-bw-loggp.sh
+++ b/tests/modelnet-p2p-bw-loggp.sh
@@ -1,3 +1,7 @@
 #!/bin/bash
 
-tests/modelnet-p2p-bw --sync=1 -- $srcdir/tests/conf/modelnet-p2p-bw-loggp.conf
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bindir=.
+fi
+
+"$bindir"/tests/modelnet-p2p-bw --sync=1 -- "$srcdir"/tests/conf/modelnet-p2p-bw-loggp.conf
diff --git a/tests/modelnet-prio-sched-test.sh b/tests/modelnet-prio-sched-test.sh
index 205d7e2b..49706be0 100755
--- a/tests/modelnet-prio-sched-test.sh
+++ b/tests/modelnet-prio-sched-test.sh
@@ -1,13 +1,17 @@
 #!/bin/bash
 
-tests/modelnet-prio-sched-test --sync=1 -- \
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bindir=.
+fi
+
+"$bindir"/tests/modelnet-prio-sched-test --sync=1 -- \
     $srcdir/tests/conf/modelnet-prio-sched-test.conf
 err=$?
 if [[ $err -ne 0 ]]; then
     exit $err
 fi
 
-mpirun -np 2 tests/modelnet-prio-sched-test --sync=3 -- \
+mpirun -np 2 "$bindir"/tests/modelnet-prio-sched-test --sync=3 -- \
     $srcdir/tests/conf/modelnet-prio-sched-test.conf
 err=$?
 if [[ $err -ne 0 ]]; then
diff --git a/tests/modelnet-simplep2p-test.sh b/tests/modelnet-simplep2p-test.sh
index dff8a366..7c2efa81 100755
--- a/tests/modelnet-simplep2p-test.sh
+++ b/tests/modelnet-simplep2p-test.sh
@@ -5,4 +5,8 @@ if [[ -z $srcdir ]] ; then
     exit 1
 fi
 
-tests/modelnet-simplep2p-test --sync=1 -- $srcdir/tests/conf/modelnet-test-simplep2p.conf
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bindir=.
+fi
+
+"$bindir"/tests/modelnet-simplep2p-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-simplep2p.conf
diff --git a/tests/modelnet-test-dragonfly-synthetic.sh b/tests/modelnet-test-dragonfly-synthetic.sh
index fa4b31dc..a18ebad9 100755
--- a/tests/modelnet-test-dragonfly-synthetic.sh
+++ b/tests/modelnet-test-dragonfly-synthetic.sh
@@ -3,9 +3,9 @@
 # Binaries generated by CMake are located in a different place
 # to those of autoconf
 if [ -z $GENERATED_USING_CMAKE ]; then
-    bin_dir=src/network-workloads
+    bindir=src/network-workloads
 else
-    bin_dir=src
+    bindir="$bindir"/src
 fi
 
-$bin_dir/model-net-synthetic --sync=1 --num_messages=1 -- $srcdir/src/network-workloads/conf/modelnet-synthetic-dragonfly.conf 
+"$bindir"/model-net-synthetic --sync=1 --num_messages=1 -- "$srcdir"/src/network-workloads/conf/modelnet-synthetic-dragonfly.conf
diff --git a/tests/modelnet-test-dragonfly.sh b/tests/modelnet-test-dragonfly.sh
index 9ed392c4..68614d0e 100755
--- a/tests/modelnet-test-dragonfly.sh
+++ b/tests/modelnet-test-dragonfly.sh
@@ -1,3 +1,7 @@
 #!/bin/bash
 
-tests/modelnet-test --sync=1 -- $srcdir/tests/conf/modelnet-test-dragonfly.conf
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bindir=.
+fi
+
+"$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-dragonfly.conf
diff --git a/tests/modelnet-test-em.sh b/tests/modelnet-test-em.sh
index fed720a9..6209d0dc 100755
--- a/tests/modelnet-test-em.sh
+++ b/tests/modelnet-test-em.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
-tests/modelnet-test --sync=1 -- $srcdir/tests/conf/modelnet-test-em.conf
-
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bindir=.
+fi
 
+"$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-em.conf
diff --git a/tests/modelnet-test-fattree-synthetic.sh b/tests/modelnet-test-fattree-synthetic.sh
index bd3acc9a..cd9b73cb 100755
--- a/tests/modelnet-test-fattree-synthetic.sh
+++ b/tests/modelnet-test-fattree-synthetic.sh
@@ -8,12 +8,12 @@ if [ -z $srcdir ]; then
 # Binaries generated by CMake are located in a different place
 # to those of autoconf
 if [ -z $GENERATED_USING_CMAKE ]; then
-    bin_dir=src/network-workloads
+    bindir=src/network-workloads
 else
-    bin_dir=src
+    bindir="$bindir"/src
 fi
 
-$bin_dir/model-net-synthetic-fattree --sync=1 -- $srcdir/src/network-workloads/conf/modelnet-synthetic-fattree.conf 
+"$bindir"/model-net-synthetic-fattree --sync=1 -- "$srcdir"/src/network-workloads/conf/modelnet-synthetic-fattree.conf
 
 #source $srcdir/tests/download-traces.sh
 #src/network-workloads/model-net-mpi-replay --sync=1 --num_net_traces=27 --workload_file=/tmp/df_AMG_n27_dumpi/dumpi-2014.03.03.14.55.00- --workload_type="dumpi" -- $srcdir/src/network-workloads/conf/modelnet-mpi-test-fattree.conf 
diff --git a/tests/modelnet-test-loggp.sh b/tests/modelnet-test-loggp.sh
index 03d98286..656c0912 100755
--- a/tests/modelnet-test-loggp.sh
+++ b/tests/modelnet-test-loggp.sh
@@ -1,3 +1,7 @@
 #!/bin/bash
 
-tests/modelnet-test --sync=1 -- $srcdir/tests/conf/modelnet-test-loggp.conf
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bindir=.
+fi
+
+"$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-loggp.conf
diff --git a/tests/modelnet-test-slimfly-synthetic.sh b/tests/modelnet-test-slimfly-synthetic.sh
index df9a4436..a8545acd 100755
--- a/tests/modelnet-test-slimfly-synthetic.sh
+++ b/tests/modelnet-test-slimfly-synthetic.sh
@@ -3,9 +3,9 @@
 # Binaries generated by CMake are located in a different place
 # to those of autoconf
 if [ -z $GENERATED_USING_CMAKE ]; then
-    bin_dir=src/network-workloads
+    bindir=src/network-workloads
 else
-    bin_dir=src
+    bindir="$bindir"/src
 fi
 
-$bin_dir/model-net-synthetic-slimfly --sync=1 -- $srcdir/src/network-workloads/conf/modelnet-synthetic-slimfly-min.conf 
+"$bindir"/model-net-synthetic-slimfly --sync=1 -- "$srcdir"/src/network-workloads/conf/modelnet-synthetic-slimfly-min.conf
diff --git a/tests/modelnet-test-slimfly.sh b/tests/modelnet-test-slimfly.sh
index 3fabc8b5..87f91d08 100755
--- a/tests/modelnet-test-slimfly.sh
+++ b/tests/modelnet-test-slimfly.sh
@@ -1,3 +1,7 @@
 #!/bin/bash
 
-tests/modelnet-test --sync=1 -- $srcdir/tests/conf/modelnet-test-slimfly.conf
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bindir=.
+fi
+
+"$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-slimfly.conf
diff --git a/tests/modelnet-test-torus.sh b/tests/modelnet-test-torus.sh
index 9c6997f8..fd3934ed 100755
--- a/tests/modelnet-test-torus.sh
+++ b/tests/modelnet-test-torus.sh
@@ -1,4 +1,7 @@
 #!/bin/bash
 
-tests/modelnet-test --sync=1 -- $srcdir/tests/conf/modelnet-test-torus.conf
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bindir=.
+fi
 
+"$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-torus.conf
diff --git a/tests/modelnet-test.sh b/tests/modelnet-test.sh
index 0919c7a1..bcc3351b 100755
--- a/tests/modelnet-test.sh
+++ b/tests/modelnet-test.sh
@@ -1,3 +1,7 @@
 #!/bin/bash
 
-tests/modelnet-test --sync=1 -- $srcdir/tests/conf/modelnet-test.conf
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bindir=.
+fi
+
+"$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test.conf
diff --git a/tests/rc-stack-test.sh b/tests/rc-stack-test.sh
index 8c95e82b..b16cdfd8 100755
--- a/tests/rc-stack-test.sh
+++ b/tests/rc-stack-test.sh
@@ -5,4 +5,8 @@ if [[ -z $srcdir ]] ; then
     exit 1
 fi
 
-tests/modelnet-simplep2p-test
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bindir=.
+fi
+
+"$bindir"/tests/modelnet-simplep2p-test
diff --git a/tests/resource-test.sh b/tests/resource-test.sh
index c39213ca..4b2cba1b 100755
--- a/tests/resource-test.sh
+++ b/tests/resource-test.sh
@@ -5,4 +5,8 @@ if [[ -z $srcdir ]] ; then
     exit 1
 fi
 
-tests/resource-test --sync=1 --codes-config=$srcdir/tests/conf/buffer_test.conf
+if [ -z $GENERATED_USING_CMAKE ]; then
+    bindir=.
+fi
+
+"$bindir"/tests/resource-test --sync=1 --codes-config="$srcdir"/tests/conf/buffer_test.conf
diff --git a/tests/run-test.sh.in b/tests/run-test.sh.in
index ae2bec46..81259559 100755
--- a/tests/run-test.sh.in
+++ b/tests/run-test.sh.in
@@ -1,6 +1,39 @@
 #!/bin/bash -x
 
+# This file is called when running a test using CTest.
+#
+# To run without deleting test directories set DONT_DELETE_TEST_DIR
+# > DONT_DELETE_TEST_DIR=1 ctest
+# instead of
+# > ctest
+
 export srcdir="${CMAKE_SOURCE_DIR}"
+export bindir="${CMAKE_BINARY_DIR}"
 export GENERATED_USING_CMAKE=1
 
+# Creating temporal folder in order to save output without colliding with
+# some other process that would like to generate the same output
+mkdir -p testing-output
+tmpdir="$(mktemp -d testing-output/test-XXXXXX)"
+
+pushd $tmpdir
+
+# running experiment
 bash -x "$1"
+
+# checking for exit error
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# checking if ross.csv exists and is empty
+if [[ -f ross.csv ]] && [[ ! -s ross.csv ]]; then
+    >&2 echo "The experiment seems to have failed. \`ross.csv' is empty, i.e., the binary died before saving any output"
+    exit 1
+fi
+
+popd
+
+# deleting temporal dir (this should only happen if there were no errors)
+if [ -z $DONT_DELETE_TEST_DIR ]; then
+    rm -r $tmpdir
+fi
diff --git a/tests/workload/codes-workload-test.sh b/tests/workload/codes-workload-test.sh
index 8709c560..a594716b 100755
--- a/tests/workload/codes-workload-test.sh
+++ b/tests/workload/codes-workload-test.sh
@@ -3,9 +3,9 @@
 # Binaries generated by CMake are located in a different place
 # to those of autoconf
 if [ -z $GENERATED_USING_CMAKE ]; then
-    bin_dir=tests/workload
+    bindir=tests/workload
 else
-    bin_dir=tests
+    bindir=$bindir/tests
 fi
 
-$bin_dir/codes-workload-test --sync=1 $srcdir/tests/workload/codes-workload-test.conf
+"$bindir"/codes-workload-test --sync=1 "$srcdir"/tests/workload/codes-workload-test.conf

From e2537e77b38b879673ec8b9bcda7a9ce0816baf4 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 11 Aug 2023 11:59:49 -0400
Subject: [PATCH 035/188] Fixing additional bugs on surrogate mode

The two bugs fixed are:
- Some packets carry "local data" (local_event_size_bytes) and this
  triggers a message back to the sender. The surrogate was not handling
  this before, it does it now
- The surrogate was not handling well the case for when the size of the
  packet and the size of the message are the same
---
 src/networks/model-net/dragonfly-dally.C | 28 +++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 484dd7ba..462e7ddb 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -3872,6 +3872,17 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
     if(stat->max_event_size < total_event_size) {
         stat->max_event_size = total_event_size;
     }
+
+    if(msg->local_event_size_bytes > 0)
+    {
+        // TODO (Elkin): This delay is wrong. It might take quite a bit longer in some cases as all the chunks are processed until we get to this. Create a better estimate based on the number of total chunks!
+        tw_stime local_ts = 0;
+        tw_event *e_new = tw_event_new(msg->sender_lp, local_ts, lp);
+        void *m_new = tw_event_data(e_new);
+        void *local_event = (char*) model_net_method_get_edata(DRAGONFLY_DALLY, msg) + msg->remote_event_size_bytes;
+        memcpy(m_new, local_event, msg->local_event_size_bytes);
+        tw_event_send(e_new);
+    }
 }
 
 static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp)
@@ -4825,6 +4836,7 @@ static void packet_arrive_predicted(terminal_state * s, tw_bf * bf, terminal_dal
         tmp = d_entry;
     // Just for completion, checking invariant
     } else {
+        // packet sz == message sz
         assert(msg->total_size == msg->packet_size);
     }
 
@@ -4860,16 +4872,22 @@ static void packet_arrive_predicted(terminal_state * s, tw_bf * bf, terminal_dal
         s->total_msg_size += msg->total_size;
         s->finished_msgs++;
 
-        // This should always be true. It sends the message to the server/workload or communicates to the model-net layer
-        if (tmp->remote_event_data && tmp->remote_event_size > 0) {
-            send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size);
-        }
-
         if (tmp) {
+            // This should always be true. It sends the message to the server/workload or communicates to the model-net layer
+            if (tmp->remote_event_data && tmp->remote_event_size > 0) {
+                send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size);
+            }
+
             bf->c8 = 1;
             qhash_del(hash_link);
             rc_stack_push(lp, tmp, free_tmp, s->st);
             s->rank_tbl_pop--;
+        } else { // packet sz == message sz
+            if (msg->remote_event_size_bytes > 0) {
+                void *m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg);
+                assert(m_data_src);
+                send_remote_event(s, msg, lp, bf, (char*) m_data_src, msg->remote_event_size_bytes);
+            }
         }
     }
 }

From 480f259418fe47716b90243676db4bf1c2f2bbd3 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 11 Aug 2023 12:04:47 -0400
Subject: [PATCH 036/188] Small reformating of surrogate and two bugs found

---
 src/util/surrogate.c                          | 54 ++++++++-----------
 tests/CMakeLists.txt                          |  3 ++
 ...ample-ping-pong-surrogate-determinism-1.sh | 51 ++++++++++++++++++
 ...ample-ping-pong-surrogate-determinism-2.sh | 51 ++++++++++++++++++
 4 files changed, 126 insertions(+), 33 deletions(-)
 create mode 100755 tests/example-ping-pong-surrogate-determinism-1.sh
 create mode 100755 tests/example-ping-pong-surrogate-determinism-2.sh

diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index d9f26b71..7aea6330 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -243,6 +243,8 @@ static void rollback_and_cancel_events_pe(tw_pe * pe, tw_stime gvt) {
         }
     } while (does_any_pe(pe->cancel_q != NULL) || does_any_pe(pe->event_q.size != 0));
 
+    tw_pe_fossil_collect();
+
     if (DEBUG_DIRECTOR > 1) {
         printf("PE %lu: All events rolledbacked and cancelled\n", g_tw_mynode);
     }
@@ -407,30 +409,13 @@ static tw_event *** order_events_per_lps(tw_pe * pe) {
 // - Going through every LP and calling their respective functions
 #ifdef USE_RAND_TIEBREAKER
 static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_event_sig gvt) {
-    if (g_tw_synchronization_protocol != OPTIMISTIC && g_tw_synchronization_protocol != SEQUENTIAL) {
-        tw_error(TW_LOC, "Sorry, sending packets to the future hasn't been implement in this mode");
-    }
-
-    if (g_tw_synchronization_protocol == OPTIMISTIC) {
-        assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
-        rollback_and_cancel_events_pe(pe, gvt);
-        //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0);
-        assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
-    }
 #else
 static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_stime gvt) {
+#endif
     if (g_tw_synchronization_protocol != OPTIMISTIC && g_tw_synchronization_protocol != SEQUENTIAL) {
         tw_error(TW_LOC, "Sorry, sending packets to the future hasn't been implement in this mode");
     }
 
-    if (g_tw_synchronization_protocol == OPTIMISTIC) {
-        assert(pe->GVT == gvt);
-        rollback_and_cancel_events_pe(pe, gvt);
-        //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0);
-        assert(pe->GVT == gvt);
-    }
-#endif
-
     tw_event *** lps_events = order_events_per_lps(pe);
     shift_events_to_future_pe(pe, gvt);
 
@@ -482,23 +467,10 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_stime gvt) {
 
 #ifdef USE_RAND_TIEBREAKER
 static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_event_sig gvt) {
-    (void) pe;
-
-    if (g_tw_synchronization_protocol == OPTIMISTIC) {
-        assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
-        rollback_and_cancel_events_pe(pe, gvt);
-        assert(tw_event_sig_compare(pe->GVT_sig, gvt) == 0);
-    }
 #else
 static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_stime gvt) {
-    (void) pe;
-
-    if (g_tw_synchronization_protocol == OPTIMISTIC) {
-        assert(pe->GVT == gvt);
-        rollback_and_cancel_events_pe(pe, gvt);
-        assert(pe->GVT == gvt);
-    }
 #endif
+    (void) pe;
 
     // Going through all LPs in PE and running their specific functions
     for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {
@@ -611,9 +583,25 @@ static void director_fun(tw_pe * pe, tw_stime gvt) {
         }
         printf("Switching at %g", gvt);
     }
+    // Rollback if in optimistic mode
+#ifdef USE_RAND_TIEBREAKER
+    if (g_tw_synchronization_protocol == OPTIMISTIC) {
+        assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0);
+        rollback_and_cancel_events_pe(pe, gvt_sig);
+        //assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) <= 0);
+        assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0);
+    }
+#else
+    if (g_tw_synchronization_protocol == OPTIMISTIC) {
+        assert(pe->GVT == gvt);
+        rollback_and_cancel_events_pe(pe, gvt);
+        //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0);
+        assert(pe->GVT == gvt);
+    }
+#endif
     surr_config.director.switch_surrogate();
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
-        printf(" to %s\n", surr_config.director.is_surrogate_on() ? "surrogate" : "vanilla");
+        printf(" to %s\n", surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity");
     }
 
     // "Freezing" network events and activating LP's switch functions
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index b196c823..238d988f 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -91,6 +91,9 @@ set(test-shell-files
     example-ping-pong-surrogate-2.sh
     example-ping-pong-surrogate-3.sh
     example-ping-pong-no-logging.sh
+    # These are aspirational unit tests. The switching mechanism is not fully deterministic
+    #example-ping-pong-surrogate-determinism-1.sh # bug: not all processed events are commited before the switch happens, this might alter the behaviour of the predictor, thus the simulation diverges at switch (no longer deterministic)
+    #example-ping-pong-surrogate-determinism-2.sh # bug: incoming packets (`T_ARRIVE_PREDICTED` events scheduled by `dragonfly_dally_terminal_highdef_to_surrogate`) might tie some times, the tie is not resolved deterministically
     )
 
 foreach(testname ${test-shell-files})
diff --git a/tests/example-ping-pong-surrogate-determinism-1.sh b/tests/example-ping-pong-surrogate-determinism-1.sh
new file mode 100755
index 00000000..283791aa
--- /dev/null
+++ b/tests/example-ping-pong-surrogate-determinism-1.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+if [[ -z $bindir ]] ; then
+    echo bindir variable not set
+    exit 1
+fi
+
+# Configuring surrogate instance
+export PACKET_SIZE=1024
+export CHUNK_SIZE=1024
+export NETWORK_TREATMENT=nothing
+export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-1/
+export IGNORE_UNTIL=0.0
+export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
+cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-1.conf
+
+export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-2/
+cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-2.conf
+
+# Running simulation twice with the same parameters
+
+mpirun -np 3 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=3 \
+    --num_messages=100 --payload_sz=8192 \
+    -- tutorial-ping-pong-surrogate-1.conf \
+    > model-output-1.txt 2> model-output-1-error.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+mpirun -np 3 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=3 \
+    --num_messages=100 --payload_sz=8192 \
+    -- tutorial-ping-pong-surrogate-2.conf \
+    > model-output-2.txt 2> model-output-2-error.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# Checking that there is actual output
+grep 'Net Events Processed' model-output-1.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# This checks for the number of events processed. If they are different, then
+# the simulation is not deterministic (so this should fail!). As always, just
+# a unit test
+diff <(grep 'Net Events Processed' model-output-1.txt) \
+    <(grep 'Net Events Processed' model-output-2.txt)
+err=$?
+if [[ $err -ne 0 ]]; then
+    >&2 echo "The number of net events processed does not coincide, ie," \
+        "the simulation is not deterministic"
+    exit $err
+fi
diff --git a/tests/example-ping-pong-surrogate-determinism-2.sh b/tests/example-ping-pong-surrogate-determinism-2.sh
new file mode 100755
index 00000000..169ba8bf
--- /dev/null
+++ b/tests/example-ping-pong-surrogate-determinism-2.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+if [[ -z $bindir ]] ; then
+    echo bindir variable not set
+    exit 1
+fi
+
+# Configuring surrogate instance
+export PACKET_SIZE=1024
+export CHUNK_SIZE=1024
+export NETWORK_TREATMENT=freeze
+export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-1/
+export IGNORE_UNTIL=0.0
+export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
+cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-1.conf
+
+export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-2/
+cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-2.conf
+
+# Running simulation twice with the same parameters
+
+mpirun -np 3 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=3 \
+    --num_messages=100 --payload_sz=8192 \
+    -- tutorial-ping-pong-surrogate-1.conf \
+    > model-output-1.txt 2> model-output-1-error.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+mpirun -np 3 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=3 \
+    --num_messages=100 --payload_sz=8192 \
+    -- tutorial-ping-pong-surrogate-2.conf \
+    > model-output-2.txt 2> model-output-2-error.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# Checking that there is actual output
+grep 'Net Events Processed' model-output-1.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# This checks for the number of events processed. If they are different, then
+# the simulation is not deterministic (so this should fail!). As always, just
+# a unit test
+diff <(grep 'Net Events Processed' model-output-1.txt) \
+    <(grep 'Net Events Processed' model-output-2.txt)
+err=$?
+if [[ $err -ne 0 ]]; then
+    >&2 echo "The number of net events processed does not coincide, ie," \
+        "the simulation is not deterministic"
+    exit $err
+fi

From a6922c9b5db3ecb0bc733f1c3ec922895ae4efd1 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 17 Aug 2023 08:14:23 -0400
Subject: [PATCH 037/188] Ignoring next-in-line delay for packel-latency
 average predictor

---
 codes/model-net-method.h                      |  3 +-
 codes/net/dragonfly-dally.h                   |  3 ++
 codes/surrogate.h                             |  1 +
 .../tutorial-ping-pong-surrogate.conf.in      |  2 +-
 doc/example/tutorial-ping-pong.conf.in        |  2 +-
 .../model-net/core/model-net-sched-impl.c     | 33 ++++++++++---------
 src/networks/model-net/dragonfly-custom.C     |  3 +-
 src/networks/model-net/dragonfly-dally.C      | 18 ++++++----
 src/networks/model-net/dragonfly-plus.C       |  3 +-
 src/networks/model-net/express-mesh.C         |  3 +-
 src/util/surrogate.c                          | 16 ++++++---
 tests/conf/modelnet-p2p-bw-loggp.conf         |  2 +-
 tests/conf/modelnet-prio-sched-test.conf      |  2 +-
 tests/conf/modelnet-test-dragonfly.conf       |  2 +-
 tests/conf/modelnet-test-loggp.conf           |  2 +-
 tests/conf/modelnet-test-simplep2p.conf       |  2 +-
 tests/conf/modelnet-test-slimfly.conf         |  2 +-
 tests/conf/modelnet-test-torus.conf           |  2 +-
 tests/conf/modelnet-test.conf                 |  2 +-
 19 files changed, 64 insertions(+), 39 deletions(-)

diff --git a/codes/model-net-method.h b/codes/model-net-method.h
index fdf09557..b6bb01ab 100644
--- a/codes/model-net-method.h
+++ b/codes/model-net-method.h
@@ -39,7 +39,8 @@ struct model_net_method
             void const * remote_event,
             void const * self_event,
             tw_lp *sender,
-            int is_last_pckt);
+            int is_last_pckt,
+            bool is_there_another_pckt_in_queue);
     void (*model_net_method_packet_event_rc)(tw_lp *sender);
     tw_stime (*model_net_method_recv_msg_event)(
             const char * category,
diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index 55078e13..87a611fa 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -94,6 +94,9 @@ struct terminal_dally_message
    int path_type;
    int saved_app_id;
 
+   // For packet latency predictor (surrogate)
+   bool is_there_another_pckt_in_queue;
+
    /* for reverse computation */
    short num_rngs;
    short num_cll;
diff --git a/codes/surrogate.h b/codes/surrogate.h
index 79255bcd..1f9bae92 100644
--- a/codes/surrogate.h
+++ b/codes/surrogate.h
@@ -40,6 +40,7 @@ struct packet_start {
     double workload_injection_time; // this is when the workload passed down the event to model-net
     double processing_packet_delay;  // delay for this packet to be processed from previous packet in the queue
     uint32_t packet_size;
+    bool is_there_another_pckt_in_queue; // is there another packet in queue
     void * message_data;  // Yep, we have to save the entire message just because we might need to resend the message when switching to surrogate-mode. It's wasteful but there is no other way
     void * remote_event_data;  // This and the one above have to be freed. This contains the extra information that the message contains
 };
diff --git a/doc/example/tutorial-ping-pong-surrogate.conf.in b/doc/example/tutorial-ping-pong-surrogate.conf.in
index 69bfaa96..d0e0b1af 100644
--- a/doc/example/tutorial-ping-pong-surrogate.conf.in
+++ b/doc/example/tutorial-ping-pong-surrogate.conf.in
@@ -41,7 +41,7 @@ PARAMS
 # bandwidth in GiB/s for compute node-router channels
    cn_bandwidth="2.0";
 # ROSS message size
-   message_size="408";
+   message_size="416";
 # number of compute nodes connected to router, dictated by dragonfly config
 # file
    num_cns_per_router="2";
diff --git a/doc/example/tutorial-ping-pong.conf.in b/doc/example/tutorial-ping-pong.conf.in
index 507094f5..9ed21c84 100644
--- a/doc/example/tutorial-ping-pong.conf.in
+++ b/doc/example/tutorial-ping-pong.conf.in
@@ -38,7 +38,7 @@ PARAMS
 # bandwidth in GiB/s for compute node-router channels
    cn_bandwidth="2.0";
 # ROSS message size
-   message_size="408";
+   message_size="416";
 # number of compute nodes connected to router, dictated by dragonfly config
 # file
    num_cns_per_router="2";
diff --git a/src/networks/model-net/core/model-net-sched-impl.c b/src/networks/model-net/core/model-net-sched-impl.c
index 7bcf4d16..3c3d25a9 100644
--- a/src/networks/model-net/core/model-net-sched-impl.c
+++ b/src/networks/model-net/core/model-net-sched-impl.c
@@ -20,7 +20,7 @@
         if (MN_SCHED_DEBUG_VERBOSE) printf(_fmt, ##__VA_ARGS__); \
     } while(0)
 
-/// scheduler-specific data structures 
+/// scheduler-specific data structures
 
 typedef struct mn_sched_qitem {
     model_net_request req;
@@ -28,7 +28,7 @@ typedef struct mn_sched_qitem {
     // remaining bytes to send
     uint64_t rem;
     tw_stime entry_time;
-    // pointers to event structures 
+    // pointers to event structures
     // sizes are given in the request struct
     void * remote_event;
     void * local_event;
@@ -56,7 +56,7 @@ typedef struct mn_sched_prio {
 /// FCFS
 // void used to avoid ptr-to-ptr conv warnings
 static void fcfs_init (
-        const struct model_net_method     * method, 
+        const struct model_net_method     * method,
         const model_net_sched_cfg_params  * params,
         int                                 is_recv_queue,
         void                             ** sched);
@@ -86,7 +86,7 @@ static void fcfs_next_rc(
 
 // ROUND-ROBIN
 static void rr_init (
-        const struct model_net_method     * method, 
+        const struct model_net_method     * method,
         const model_net_sched_cfg_params  * params,
         int                                 is_recv_queue,
         void                             ** sched);
@@ -114,7 +114,7 @@ static void rr_next_rc (
         const model_net_sched_rc * rc,
         tw_lp                    * lp);
 static void prio_init (
-        const struct model_net_method     * method, 
+        const struct model_net_method     * method,
         const model_net_sched_cfg_params  * params,
         int                                 is_recv_queue,
         void                             ** sched);
@@ -143,9 +143,9 @@ static void prio_next_rc (
         tw_lp                    * lp);
 
 /// function tables (names defined by X macro in model-net-sched.h)
-static const model_net_sched_interface fcfs_tab = 
+static const model_net_sched_interface fcfs_tab =
 { &fcfs_init, &fcfs_destroy, &fcfs_add, &fcfs_add_rc, &fcfs_next, &fcfs_next_rc};
-static const model_net_sched_interface rr_tab = 
+static const model_net_sched_interface rr_tab =
 { &rr_init, &rr_destroy, &rr_add, &rr_add_rc, &rr_next, &rr_next_rc};
 static const model_net_sched_interface prio_tab =
 { &prio_init, &prio_destroy, &prio_add, &prio_add_rc, &prio_next, &prio_next_rc};
@@ -156,10 +156,10 @@ const model_net_sched_interface * sched_interfaces[] = {
 };
 #undef X
 
-/// FCFS implementation 
+/// FCFS implementation
 
 void fcfs_init(
-        const struct model_net_method     * method, 
+        const struct model_net_method     * method,
         const model_net_sched_cfg_params  * params,
         int                                 is_recv_queue,
         void                             ** sched){
@@ -219,7 +219,7 @@ void fcfs_add_rc(void *sched, const model_net_sched_rc *rc, tw_lp *lp){
     mn_sched_qitem *q = qlist_entry(ent, mn_sched_qitem, ql);
     dprintf("%llu (mn): rc adding request from %llu to %llu\n", LLU(lp->gid),
             LLU(q->req.src_lp), LLU(q->req.final_dest_lp));
-    // free'ing NULLs is a no-op 
+    // free'ing NULLs is a no-op
     free(q->remote_event);
     free(q->local_event);
     free(q);
@@ -251,6 +251,8 @@ int fcfs_next(
         is_last_packet = 0;
     }
 
+    bool const is_there_another_pckt_in_queue = !is_last_packet || s->queue_len > 1;
+
     if (s->is_recv_queue){
         dprintf("%llu (mn):    receiving message of size %llu (of %llu) "
                 "from %llu to %llu at %1.5e (last:%d)\n",
@@ -270,7 +272,8 @@ int fcfs_next(
                 LLU(q->req.final_dest_lp), tw_now(lp), is_last_packet);
         *poffset = s->method->model_net_method_packet_event(&q->req,
                 q->req.msg_size - q->rem, psize, 0.0, &q->sched_params,
-                q->remote_event, q->local_event, lp, is_last_packet);
+                q->remote_event, q->local_event, lp, is_last_packet,
+                is_there_another_pckt_in_queue);
     }
 
     // if last packet - remove from list, free, save for rc
@@ -362,7 +365,7 @@ void fcfs_next_rc(
 }
 
 void rr_init (
-        const struct model_net_method     * method, 
+        const struct model_net_method     * method,
         const model_net_sched_cfg_params  * params,
         int                                 is_recv_queue,
         void                             ** sched){
@@ -427,7 +430,7 @@ void rr_next_rc (
 }
 
 void prio_init (
-        const struct model_net_method     * method, 
+        const struct model_net_method     * method,
         const model_net_sched_cfg_params  * params,
         int                                 is_recv_queue,
         void                             ** sched){
@@ -465,7 +468,7 @@ void prio_add (
     mn_sched_prio *ss = sched;
     int prio = sched_params->prio;
     if (prio == -1){
-        // default prio - lowest possible 
+        // default prio - lowest possible
         prio = ss->params.num_prios-1;
     }
     else if (prio >= ss->params.num_prios){
@@ -504,7 +507,7 @@ int prio_next(
         }
     }
     rc->prio = -1;
-    return -1; // all sub schedulers had no work 
+    return -1; // all sub schedulers had no work
 }
 
 void prio_next_rc (
diff --git a/src/networks/model-net/dragonfly-custom.C b/src/networks/model-net/dragonfly-custom.C
index a2deb05b..3b6fac9d 100644
--- a/src/networks/model-net/dragonfly-custom.C
+++ b/src/networks/model-net/dragonfly-custom.C
@@ -1206,7 +1206,8 @@ static tw_stime dragonfly_custom_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt)
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue)
 {
     (void)message_offset;
     (void)sched_params;
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 462e7ddb..24cbfc48 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -2362,7 +2362,7 @@ static void setup_packet_latency_path(char const * const dir_to_save) {
         tw_error(TW_LOC, "File %s could not be opened", filename_path);
     }
 
-    fprintf(packet_latency_f, "#src_terminal,dest_terminal,packet_id,is_surrogate_on,is_predicted,size,workload_injection,next_packet_delay,start,end,latency\n");
+    fprintf(packet_latency_f, "#src_terminal,dest_terminal,packet_id,is_surrogate_on,is_predicted,size,workload_injection,next_packet_delay,start,end,latency,is_there_another_pckt_in_queue\n");
 }
 
 /* report dragonfly statistics like average and maximum packet latency, average number of hops traversed */
@@ -2826,12 +2826,13 @@ static inline void packet_latency_save_to_file(
 ) {
     if (!packet_latency_f) { return; } // Don't save if there isn't a file to save to
     if (end.travel_end_time > g_tw_ts_end) { return; } // This packet could never arrive to its destination!
-    fprintf(packet_latency_f, "%u,%u,%lu,%d,%d,%u,%f,%f,%f,%f,%f\n",
+    fprintf(packet_latency_f, "%u,%u,%lu,%d,%d,%u,%f,%f,%f,%f,%f,%d\n",
             terminal_id, start.dfdally_dest_terminal_id, start.packet_ID,
             surrogate_on, is_predicted,
             start.packet_size,
             start.workload_injection_time, end.next_packet_delay,
-            start.travel_start_time, end.travel_end_time, end.travel_end_time - start.travel_start_time);
+            start.travel_start_time, end.travel_end_time, end.travel_end_time - start.travel_start_time,
+            start.is_there_another_pckt_in_queue);
 }
 
 // ==== START OF Surrogate functions definition ====
@@ -3225,7 +3226,8 @@ static void terminal_dally_commit(terminal_state * s,
             .travel_start_time = msg->travel_start_time,
             .workload_injection_time = msg->msg_start_time,
             .processing_packet_delay = -1,
-            .packet_size = msg->packet_size
+            .packet_size = msg->packet_size,
+            .is_there_another_pckt_in_queue = msg->is_there_another_pckt_in_queue
         };
 
         // Saving
@@ -3695,7 +3697,8 @@ static tw_stime dragonfly_dally_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt)
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue)
 {
     (void)message_offset;
     (void)sched_params;
@@ -3734,6 +3737,7 @@ static tw_stime dragonfly_dally_packet_event(
     msg->msg_new_mn_event = req->msg_new_mn_event;
     msg->rail_id = req->queue_offset;
     msg->app_id = req->app_id;
+    msg->is_there_another_pckt_in_queue = is_there_another_pckt_in_queue;
 
     if(is_last_pckt) /* Its the last packet so pass in remote and local event information*/
     {
@@ -3799,7 +3803,8 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
         .travel_start_time = tw_now(lp),
         .workload_injection_time = msg->msg_start_time,
         .processing_packet_delay = processing_packet_delay,
-        .packet_size = msg->packet_size
+        .packet_size = msg->packet_size,
+        .is_there_another_pckt_in_queue = msg->is_there_another_pckt_in_queue
     };
 
     struct packet_end const end = 
@@ -4194,6 +4199,7 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
         .workload_injection_time = msg->msg_start_time,
         .processing_packet_delay = processing_packet_delay,
         .packet_size = msg->packet_size,
+        .is_there_another_pckt_in_queue = msg->is_there_another_pckt_in_queue,
         .message_data = msg_data,
         .remote_event_data = remote_data
         });
diff --git a/src/networks/model-net/dragonfly-plus.C b/src/networks/model-net/dragonfly-plus.C
index 7a80d65a..96334f0a 100644
--- a/src/networks/model-net/dragonfly-plus.C
+++ b/src/networks/model-net/dragonfly-plus.C
@@ -3151,7 +3151,8 @@ static tw_stime dragonfly_plus_packet_event(model_net_request const *req,
                                             void const *remote_event,
                                             void const *self_event,
                                             tw_lp *sender,
-                                            int is_last_pckt)
+                                            int is_last_pckt,
+                                            bool is_there_another_pckt_in_queue)
 {
     (void) message_offset;
     (void) sched_params;
diff --git a/src/networks/model-net/express-mesh.C b/src/networks/model-net/express-mesh.C
index 1e36afd8..e8b7392b 100644
--- a/src/networks/model-net/express-mesh.C
+++ b/src/networks/model-net/express-mesh.C
@@ -722,7 +722,8 @@ static tw_stime local_packet_event(
     void const * remote_event,
     void const * self_event,
     tw_lp *sender,
-    int is_last_pckt)
+    int is_last_pckt,
+    bool is_there_another_pckt_in_queue)
 {
   (void)message_offset;
   (void)sched_params;
diff --git a/src/util/surrogate.c b/src/util/surrogate.c
index 7aea6330..fd01cee1 100644
--- a/src/util/surrogate.c
+++ b/src/util/surrogate.c
@@ -41,7 +41,7 @@ struct aggregated_latency_one_terminal {
 };
 
 struct latency_surrogate {
-    double sum_next_packet_delay;
+    struct aggregated_latency_one_terminal aggregated_next_packet_delay;
     struct aggregated_latency_one_terminal aggregated_latency_for_all;
     unsigned int num_terminals;
     struct aggregated_latency_one_terminal aggregated_latency[];
@@ -55,9 +55,10 @@ static void init_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int
     assert(data->aggregated_latency_for_all.total_msgs == 0);
     assert(data->aggregated_latency[0].sum_latency == 0);
     assert(data->aggregated_latency[0].total_msgs == 0);
+    assert(data->aggregated_next_packet_delay.total_msgs == 0);
+    assert(data->aggregated_next_packet_delay.sum_latency == 0);
 
     data->num_terminals = surr_config.total_terminals;
-    data->sum_next_packet_delay = 0;
 }
 
 static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * start, struct packet_end const * end) {
@@ -73,13 +74,19 @@ static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int
     assert(dest_terminal < data->num_terminals);
     assert(end->travel_end_time > start->travel_start_time);
 
+    // For average latency per terminal
     data->aggregated_latency[dest_terminal].sum_latency += latency;
     data->aggregated_latency[dest_terminal].total_msgs++;
 
+    // For average total latency (used in case there is no data for a specific node)
     data->aggregated_latency_for_all.sum_latency += latency;
     data->aggregated_latency_for_all.total_msgs++;
 
-    data->sum_next_packet_delay += end->next_packet_delay;
+    // We ignore the delay if there are no more packets in the queue
+    if (start->is_there_another_pckt_in_queue) {
+        data->aggregated_next_packet_delay.sum_latency += end->next_packet_delay;
+        data->aggregated_next_packet_delay.total_msgs ++;
+    }
 }
 
 static struct packet_end predict_latency(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * packet_dest) {
@@ -109,7 +116,8 @@ static struct packet_end predict_latency(struct latency_surrogate * data, tw_lp
     }
     assert(latency >= 0);
 
-    double const next_packet_delay = data->sum_next_packet_delay / total_total_datapoints;
+    double const next_packet_delay =
+        data->aggregated_next_packet_delay.sum_latency / data->aggregated_next_packet_delay.total_msgs;
     return (struct packet_end) {
         .travel_end_time = packet_dest->travel_start_time + latency,
         .next_packet_delay = next_packet_delay,
diff --git a/tests/conf/modelnet-p2p-bw-loggp.conf b/tests/conf/modelnet-p2p-bw-loggp.conf
index 54071dc0..6e0f6859 100644
--- a/tests/conf/modelnet-p2p-bw-loggp.conf
+++ b/tests/conf/modelnet-p2p-bw-loggp.conf
@@ -10,7 +10,7 @@ LPGROUPS
 PARAMS
 {
    packet_size="2147483648";
-   message_size="416";
+   message_size="424";
    modelnet_order=( "loggp" );
    # scheduler options
    modelnet_scheduler="fcfs";
diff --git a/tests/conf/modelnet-prio-sched-test.conf b/tests/conf/modelnet-prio-sched-test.conf
index ff90e99d..c038f36c 100644
--- a/tests/conf/modelnet-prio-sched-test.conf
+++ b/tests/conf/modelnet-prio-sched-test.conf
@@ -10,7 +10,7 @@ LPGROUPS
 PARAMS
 {
    packet_size="512";
-   message_size="448";
+   message_size="456";
    modelnet_order=( "simplenet" );
    # scheduler options
    modelnet_scheduler="priority";
diff --git a/tests/conf/modelnet-test-dragonfly.conf b/tests/conf/modelnet-test-dragonfly.conf
index 3afe172d..ef8bd1a5 100644
--- a/tests/conf/modelnet-test-dragonfly.conf
+++ b/tests/conf/modelnet-test-dragonfly.conf
@@ -23,6 +23,6 @@ PARAMS
    local_bandwidth="5.25";
    global_bandwidth="4.7";
    cn_bandwidth="5.25";
-   message_size="432";
+   message_size="440";
    routing="nonminimal";
 }
diff --git a/tests/conf/modelnet-test-loggp.conf b/tests/conf/modelnet-test-loggp.conf
index eb3bc675..bfa39245 100644
--- a/tests/conf/modelnet-test-loggp.conf
+++ b/tests/conf/modelnet-test-loggp.conf
@@ -9,7 +9,7 @@ LPGROUPS
 }
 PARAMS
 {
-   message_size="432";
+   message_size="440";
    modelnet_order=( "loggp" );
    # scheduler options
    modelnet_scheduler="fcfs-full";
diff --git a/tests/conf/modelnet-test-simplep2p.conf b/tests/conf/modelnet-test-simplep2p.conf
index 168c219c..532ff74b 100644
--- a/tests/conf/modelnet-test-simplep2p.conf
+++ b/tests/conf/modelnet-test-simplep2p.conf
@@ -9,7 +9,7 @@ LPGROUPS
 }
 PARAMS
 {
-    message_size="432";
+    message_size="440";
     packet_size="1024";
     modelnet_order=("simplep2p");
     # scheduler options
diff --git a/tests/conf/modelnet-test-slimfly.conf b/tests/conf/modelnet-test-slimfly.conf
index 5b76af9b..ecbc72b2 100644
--- a/tests/conf/modelnet-test-slimfly.conf
+++ b/tests/conf/modelnet-test-slimfly.conf
@@ -30,6 +30,6 @@ PARAMS
    global_bandwidth="9.0";
    cn_bandwidth="9.0";
    link_delay = "0";
-   message_size="432";
+   message_size="440";
    routing="minimal";
 }
diff --git a/tests/conf/modelnet-test-torus.conf b/tests/conf/modelnet-test-torus.conf
index 798bf39e..e523f176 100644
--- a/tests/conf/modelnet-test-torus.conf
+++ b/tests/conf/modelnet-test-torus.conf
@@ -14,7 +14,7 @@ PARAMS
    # scheduler options
    modelnet_scheduler="fcfs";
    # modelnet_scheduler="round-robin";
-   message_size="432";
+   message_size="440";
    n_dims="3";
    dim_length="4,2,2";
    link_bandwidth="2.0";
diff --git a/tests/conf/modelnet-test.conf b/tests/conf/modelnet-test.conf
index 84c65289..e8761379 100644
--- a/tests/conf/modelnet-test.conf
+++ b/tests/conf/modelnet-test.conf
@@ -10,7 +10,7 @@ LPGROUPS
 PARAMS
 {
    packet_size="512";
-   message_size="432";
+   message_size="440";
    modelnet_order=( "simplenet" );
    # scheduler options
    modelnet_scheduler="fcfs";

From d977677bc9af1d88ae5135bd191f98d28b172496 Mon Sep 17 00:00:00 2001
From: Xin Wang <xwang149@hawk.iit.edu>
Date: Thu, 31 Aug 2023 10:40:43 -0500
Subject: [PATCH 038/188] output iteration time to file

---
 src/network-workloads/model-net-mpi-replay.c  |  78 +++++-
 .../methods/codes-conc-online-comm-wrkld.C    | 254 +++++++++++++++---
 2 files changed, 286 insertions(+), 46 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 20258060..a7eefdc4 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -20,6 +20,7 @@
 #include "codes/congestion-controller-core.h"
 
 /* turning on track lp will generate a lot of output messages */
+#define DBG_COMM 1
 #define MN_LP_NM "modelnet_dragonfly_custom"
 #define CONTROL_MSG_SZ 64
 #define TRACE -1
@@ -134,6 +135,7 @@ static int syn_type = 0;
 
 FILE * workload_log = NULL;
 FILE * msg_size_log = NULL;
+FILE * iteration_log = NULL;
 FILE * workload_agg_log = NULL;
 FILE * workload_meta_log = NULL;
 
@@ -1252,7 +1254,11 @@ static int notify_posted_wait(nw_state* s,
                 if(wait_elem->num_completed >= wait_elem->count)
                 {
                     if(enable_debug)
-                        fprintf(workload_log, "\n(%lf) APP ID %d MPI WAITALL COMPLETED AT %llu ", tw_now(lp), s->app_id, LLU(s->nw_id));
+                    {
+                        // fprintf(workload_log, "\n(%lf) APP ID %d MPI WAITALL COMPLETED AT %llu ", tw_now(lp), s->app_id, LLU(s->nw_id));
+                        fprintf(workload_log, "\n (%lf) APP ID %d MPI WAITALL SOURCE %d COMPLETED", 
+                          tw_now(lp), s->app_id, s->local_rank);
+                    }
                     wait_completed = 1;
                 }
                 m->fwd.wait_completed = 1; //This is just the individual request handle - not the entire wait.
@@ -1299,7 +1305,12 @@ static void codes_exec_mpi_wait(nw_state* s, tw_bf * bf, nw_message * m, tw_lp*
 {
     /* check in the completed receives queue if the request ID has already been completed.*/
                 
-//    printf("\n Wait posted rank id %d ", s->nw_id);
+    if(enable_debug)
+    {
+      fprintf(workload_log, "\n (%lf) APP ID %d MPI WAIT POSTED SOURCE %d", 
+            tw_now(lp), s->app_id, s->local_rank);
+    }
+
     assert(!s->wait_op);
     unsigned int req_id = mpi_op->u.wait.req_id;
 
@@ -1383,7 +1394,11 @@ static void codes_exec_mpi_wait_all(
         struct codes_workload_op * mpi_op)
 {
   if(enable_debug)
-    fprintf(workload_log, "\n MPI WAITALL POSTED AT %llu ", LLU(s->nw_id));
+  {
+    // fprintf(workload_log, "\n MPI WAITALL POSTED AT %llu ", LLU(s->nw_id));
+    fprintf(workload_log, "\n (%lf) APP ID %d MPI WAITALL POSTED SOURCE %d", 
+          tw_now(lp), s->app_id, s->local_rank);
+  }
 
   if(enable_sampling)
   {
@@ -1659,6 +1674,12 @@ static void codes_exec_comp_delay(
 	//ts += g_tw_lookahead + 0.1 + tw_rand_exponential(lp->rng, noise);
     // assert(ts > 0);
 
+  if(enable_debug)
+  {
+    fprintf(workload_log, "\n (%lf) APP %d MPI DELAY SOURCE %d DURATION %lf",
+              tw_now(lp), s->app_id, s->local_rank, ts);
+  }
+
 	e = tw_event_new( lp->gid, ts , lp );
 	msg = (nw_message*)tw_event_data(e);
 	msg->msg_type = MPI_OP_GET_NEXT;
@@ -1754,6 +1775,20 @@ static void codes_exec_mpi_recv(
 //        printf("\n Receive op posted num bytes %llu source %d ", recv_op->num_bytes,
 //                recv_op->source_rank);
 
+  if(enable_debug)
+  {
+      if(mpi_op->op_type == CODES_WK_RECV)
+      {
+        fprintf(workload_log, "\n (%lf) APP %d MPI RECV SOURCE %d DEST %d BYTES %"PRId64,
+                  tw_now(lp), s->app_id, recv_op->source_rank, recv_op->dest_rank, recv_op->num_bytes);
+      }
+      else
+      {
+        fprintf(workload_log, "\n (%lf) APP ID %d MPI IRECV SOURCE %d DEST %d BYTES %"PRId64,
+                  tw_now(lp), s->app_id, recv_op->source_rank, recv_op->dest_rank, recv_op->num_bytes);
+      }
+  }
+
 	int found_matching_sends = rm_matching_send(s, bf, m, lp, recv_op);
 
 	       /* for mpi irecvs, this is a non-blocking receive so just post it and move on with the trace read. */
@@ -1762,6 +1797,8 @@ static void codes_exec_mpi_recv(
         bf->c6 = 1;
 	    codes_issue_next_event(lp);
     }
+
+
 	/* save the req id inserted in the completed queue for reverse computation. */
 	if(found_matching_sends < 0)
 	  {
@@ -1982,12 +2019,18 @@ static void codes_exec_mpi_send(nw_state* s,
     {
         if(mpi_op->op_type == CODES_WK_ISEND)
         {
-            fprintf(workload_log, "\n (%lf) APP %d MPI ISEND SOURCE %llu DEST %d TAG %d BYTES %"PRId64,
-                    tw_now(lp), s->app_id, LLU(s->nw_id), global_dest_rank, mpi_op->u.send.tag, mpi_op->u.send.num_bytes);
+            // fprintf(workload_log, "\n (%lf) APP %d MPI ISEND SOURCE %llu DEST %d TAG %d BYTES %"PRId64,
+            //         tw_now(lp), s->app_id, LLU(s->nw_id), global_dest_rank, mpi_op->u.send.tag, mpi_op->u.send.num_bytes);
+          fprintf(workload_log, "\n (%lf) APP %d MPI ISEND SOURCE %llu DEST %d TAG %d BYTES %"PRId64,
+                    tw_now(lp), s->app_id, LLU(remote_m.fwd.src_rank), remote_m.fwd.dest_rank, mpi_op->u.send.tag, mpi_op->u.send.num_bytes);
         }
         else
-            fprintf(workload_log, "\n (%lf) APP ID %d MPI SEND SOURCE %llu DEST %d TAG %d BYTES %"PRId64,
-                    tw_now(lp), s->app_id, LLU(s->nw_id), global_dest_rank, mpi_op->u.send.tag, mpi_op->u.send.num_bytes);
+        {
+            // fprintf(workload_log, "\n (%lf) APP ID %d MPI SEND SOURCE %llu DEST %d TAG %d BYTES %"PRId64,
+            //         tw_now(lp), s->app_id, LLU(s->nw_id), global_dest_rank, mpi_op->u.send.tag, mpi_op->u.send.num_bytes);
+          fprintf(workload_log, "\n (%lf) APP ID %d MPI SEND SOURCE %llu DEST %d TAG %d BYTES %"PRId64,
+                    tw_now(lp), s->app_id, LLU(remote_m.fwd.src_rank), remote_m.fwd.dest_rank, mpi_op->u.send.tag, mpi_op->u.send.num_bytes);
+        }
     }
     if(is_rend || is_eager)    
     {
@@ -2502,7 +2545,7 @@ void nw_test_init(nw_state* s, tw_lp* lp)
         e = tw_event_new(lp->gid, ts, lp);
         m_new = (nw_message*)tw_event_data(e);
         m_new->msg_type = CLI_BCKGND_GEN;
-        printf("\naddress difference = %d\n", (&m_new->fwd.app_id - (int *)m_new));
+        printf("\naddress difference = %ld\n", (&m_new->fwd.app_id - (int *)m_new));
         tw_event_send(e);
         is_synthetic = 1;
 
@@ -2908,8 +2951,11 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
 
 		case CODES_WK_MARK:
 			{
-				printf("\n MARK_%d node %llu job %d rank %d time %lf ", mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, tw_now(lp));
+				// printf("\n MARK_%d node %llu job %d rank %d time %lf \n", mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, tw_now(lp));
+                // m->rc.saved_marker_time = tw_now(lp);
+        fprintf(iteration_log, "ITERATION %d node %llu job %d rank %d time %lf\n", mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, tw_now(lp));
                 m->rc.saved_marker_time = tw_now(lp);
+
 				codes_issue_next_event(lp);
 			}
 			break;
@@ -3136,7 +3182,7 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp
 
                     char tag_line[32];
                     int written;
-                    written = sprintf(tag_line, "%d %d %.5f\n",s->nw_id, m->mpi_op->u.send.tag, m->rc.saved_marker_time);
+                    written = sprintf(tag_line, "%llu %d %.5f\n",s->nw_id, m->mpi_op->u.send.tag, m->rc.saved_marker_time);
                     lp_io_write(lp->gid, marker_filename, written, tag_line);
                 }
             }
@@ -3409,7 +3455,6 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
         jobmap_p.alloc_file = alloc_file;
         jobmap_ctx = codes_jobmap_configure(CODES_JOBMAP_LIST, &jobmap_p);
 
-
         if(strlen(workloads_timer_file) > 0){
             FILE *timer_file = fopen(workloads_timer_file, "r");
             if(!timer_file)
@@ -3489,6 +3534,15 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
 
    modelnet_mpi_replay_read_config();
 
+   //Xin: output iteration time into log file
+   iteration_log = fopen("iteration-logs", "w+");
+   if(!iteration_log)
+   {
+       printf("\n Error logging iteration times... quitting ");
+       MPI_Finalize();
+       return -1;
+   }
+
    if(enable_debug)
    {
        workload_log = fopen("mpi-op-logs", "w+");
@@ -3572,6 +3626,8 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
 
    tw_run();
 
+    fclose(iteration_log); //Xin
+    
     if(enable_debug)
         fclose(workload_log);
 
diff --git a/src/workload/methods/codes-conc-online-comm-wrkld.C b/src/workload/methods/codes-conc-online-comm-wrkld.C
index 144db2d5..1c2836b4 100644
--- a/src/workload/methods/codes-conc-online-comm-wrkld.C
+++ b/src/workload/methods/codes-conc-online-comm-wrkld.C
@@ -34,6 +34,10 @@
 #define ALLREDUCE_SHORT_MSG_SIZE 2048
 
 #define DBG_COMM 0
+#define DBG_LINKING 0
+#define DBG_TMP 0
+#define CHECKPOINT_HASH_TABLE_SIZE 251
+#define DEFAULT_WR_BUF_SIZE (16 * 1024 * 1024)   /* 16 MiB default */
 
 #define THISMIN(a,b) ((a) < (b)) ? (a) : (b)
 
@@ -134,9 +138,9 @@ void UNION_MPI_Finalize()
     sctx->fifo.push_back(&wrkld_per_rank);
 
     if(DBG_COMM){
-        printf("FINALIZE src %d\n", sctx->my_rank);
-        // printf("num_sends %ld num_recvs %ld num_isends %ld num_irecvs %ld num_allreduce %ld num_barrier %ld num_waitalls %ld\n", 
-        //         num_sends, num_recvs, num_isends, num_irecvs, num_allreduce, num_barriers, num_waitalls);
+        printf("\nUNION FINALIZE src %d ", sctx->my_rank);
+        printf("\nnum_sends %ld num_recvs %ld num_isends %ld num_irecvs %ld num_allreduce %ld num_barrier %ld num_waitalls %ld\n", 
+                num_sends, num_recvs, num_isends, num_irecvs, num_allreduce, num_barriers, num_waitalls);
         // printf("Rank %d yield to CODES thread: %p\n", sctx->my_rank, global_prod_thread);
     }
 
@@ -161,11 +165,136 @@ void UNION_Compute(long cycle_count)
     struct shared_context * sctx = static_cast<shared_context*>(arg);
     sctx->fifo.push_back(&wrkld_per_rank);
     if(DBG_COMM){
-        printf("COMPUTE src %d: %ld ns\n", sctx->my_rank, cycle_count);
+        printf("\nUNION COMPUTE src %d: %ld ns ", sctx->my_rank, cycle_count);
     }
     ABT_thread_yield_to(global_prod_thread);
 }
 
+void UNION_Mark_Iteration(UNION_TAG iter_tag)
+{
+    /* Add an event in the shared queue and then yield */
+    struct codes_workload_op wrkld_per_rank;
+
+    wrkld_per_rank.op_type = CODES_WK_MARK;
+    wrkld_per_rank.u.send.tag = iter_tag;
+
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    wrkld_per_rank.u.send.source_rank = sctx->my_rank;
+    sctx->fifo.push_back(&wrkld_per_rank);
+
+    if(DBG_COMM){
+        printf("\nUNION MARKITERATION src %d ", sctx->my_rank);
+    }
+
+    ABT_thread_yield_to(global_prod_thread);
+}
+
+
+void UNION_IO_OPEN_FILE(int fid)
+{
+    struct codes_workload_op op;
+    op.op_type = CODES_WK_OPEN;
+    op.u.open.file_id = fid;
+    op.u.open.create_flag = 1;
+
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    sctx->fifo.push_back(&op);
+
+    if(DBG_TMP){
+        printf("\nUNION IO OPEN src %d ", sctx->my_rank);
+    }
+
+    ABT_thread_yield_to(global_prod_thread);
+
+}
+
+void UNION_IO_WRITE(int fid, long size)
+{
+    struct codes_workload_op op;
+    op.op_type = CODES_WK_WRITE;
+    op.u.write.file_id = fid;
+    op.u.write.offset = 0;
+    op.u.write.size = size;
+
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    sctx->fifo.push_back(&op);
+    
+    if(DBG_TMP){
+        printf("\nUNION IO WRITE src %d ", sctx->my_rank);
+    }
+
+    ABT_thread_yield_to(global_prod_thread);
+}
+
+void UNION_IO_READ(int fid, long size)
+{
+    struct codes_workload_op op;
+    op.op_type = CODES_WK_READ;
+    op.u.read.file_id = fid;
+    op.u.read.offset = 0;
+    op.u.read.size = size;
+
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    sctx->fifo.push_back(&op);
+    
+    if(DBG_TMP){
+        printf("\nUNION IO READ src %d ", sctx->my_rank);
+    }
+
+    ABT_thread_yield_to(global_prod_thread);
+}
+
+void UNION_IO_CLOSE_FILE(int fid)
+{
+    struct codes_workload_op op;
+    op.op_type = CODES_WK_CLOSE;
+    op.u.close.file_id = fid;
+
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    sctx->fifo.push_back(&op);
+    
+    if(DBG_TMP){
+        printf("\nUNION IO READ src %d ", sctx->my_rank);
+    }
+
+    ABT_thread_yield_to(global_prod_thread);    
+}
+
 void UNION_MPI_Send(const void *buf, 
             int count, 
             UNION_Datatype datatype, 
@@ -174,7 +303,6 @@ void UNION_MPI_Send(const void *buf,
             UNION_Comm comm)
 {
     /* add an event in the shared queue and then yield */
-    //    printf("\n Sending to rank %d ", comm_id);
     struct codes_workload_op wrkld_per_rank;
 
     int datatypesize;
@@ -197,8 +325,8 @@ void UNION_MPI_Send(const void *buf,
     struct shared_context * sctx = static_cast<shared_context*>(arg);
     wrkld_per_rank.u.send.source_rank = sctx->my_rank;
     sctx->fifo.push_back(&wrkld_per_rank);
-    if(DBG_COMM){
-        printf("SEND src %d dst %d: %lld bytes\n", sctx->my_rank, dest,
+    if(DBG_TMP){
+        printf("\nUNION SEND src %d dst %d: %lld bytes ", sctx->my_rank, dest,
                 wrkld_per_rank.u.send.num_bytes);
     // printf("Rank %d yield to CODES thread: %p\n", sctx->my_rank, global_prod_thread);
     }
@@ -238,7 +366,7 @@ void UNION_MPI_Recv(void *buf,
     wrkld_per_rank.u.recv.dest_rank = sctx->my_rank;
     sctx->fifo.push_back(&wrkld_per_rank);
     if(DBG_COMM){
-        printf("RECV src %d dst %d: %lld bytes\n", source, sctx->my_rank, 
+        printf("\nUNION RECV src %d dst %d: %lld bytes ", source, sctx->my_rank, 
             wrkld_per_rank.u.recv.num_bytes);
     // printf("Rank %d yield to CODES thread: %p\n", sctx->my_rank, global_prod_thread);
     }
@@ -298,7 +426,7 @@ void UNION_MPI_Sendrecv(const void *sendbuf,
     sctx->fifo.push_back(&send_op);
     sctx->fifo.push_back(&recv_op);
     if(DBG_COMM){
-        printf("SENDRECV ssrc %d sdst %d: %lld bytes; rsrc %d rdst %d: %lld bytes\n", sctx->my_rank, dest,
+        printf("\nUNION SENDRECV ssrc %d sdst %d: %lld bytes; rsrc %d rdst %d: %lld bytes ", sctx->my_rank, dest,
                 send_op.u.send.num_bytes, source, sctx->my_rank, recv_op.u.recv.num_bytes);
     }
     ABT_thread_yield_to(global_prod_thread);
@@ -334,9 +462,9 @@ void UNION_MPI_Barrier(UNION_Comm comm)
         mask <<= 1;
     }
     num_barriers++; 
-    if(DBG_COMM){
-        printf("BARRIER src %d\n", sctx->my_rank);
-    }
+    // if(DBG_COMM){
+    //     printf("UNION BARRIER src %d\n", sctx->my_rank);
+    // }
 }
 
 void UNION_MPI_Isend(const void *buf, 
@@ -376,7 +504,7 @@ void UNION_MPI_Isend(const void *buf,
     wrkld_per_rank.u.send.req_id = *request;
     sctx->wait_id++;
     if(DBG_COMM){
-        printf("ISEND src %d dst %d: %lld bytes\n", sctx->my_rank, dest,
+        printf("\nUNION ISEND src %d dst %d: %lld bytes ", sctx->my_rank, dest,
                 wrkld_per_rank.u.send.num_bytes);
     }
 
@@ -420,7 +548,7 @@ void UNION_MPI_Irecv(void *buf,
     wrkld_per_rank.u.recv.req_id = *request;
     sctx->wait_id++;
     if(DBG_COMM){
-        printf("IRECV src %d dst %d: %lld bytes\n", source, sctx->my_rank, 
+        printf("\nUNION IRECV src %d dst %d: %lld bytes ", source, sctx->my_rank, 
                 wrkld_per_rank.u.recv.num_bytes);    
     }
     ABT_thread_yield_to(global_prod_thread);
@@ -446,7 +574,7 @@ void UNION_MPI_Wait(UNION_Request *request,
     struct shared_context * sctx = static_cast<shared_context*>(arg);
     sctx->fifo.push_back(&wrkld_per_rank);
     if(DBG_COMM){
-        printf("WAIT src %d\n",sctx->my_rank);    
+        printf("\nUNION WAIT src %d ",sctx->my_rank);    
     }
     ABT_thread_yield_to(global_prod_thread);       
 }
@@ -459,9 +587,9 @@ void UNION_MPI_Waitall(int count,
     for(int i = 0; i < count; i++)
         UNION_MPI_Wait(&array_of_requests[i], UNION_STATUSES_IGNORE);
 
-    if(DBG_COMM){
-        printf("WAITALL count %d\n", count);    
-    }  
+    // if(DBG_COMM){
+    //     printf("UNION WAITALL count %d\n", count);    
+    // }  
 }
 
 void UNION_MPI_Reduce(const void *sendbuf, 
@@ -792,9 +920,9 @@ void UNION_MPI_Bcast(void *buffer,
     //use scatter followed by ring allgather
     bcast_scatter_ring_allgather(buffer,rank,count,datatype,root,comm);
     }
-    if(DBG_COMM){
-        printf("BCAST src %d\n", root);    
-    }  
+    // if(DBG_COMM){
+    //     printf("BCAST src %d\n", root);    
+    // }  
 }
 
 void UNION_MPI_Alltoallv(const void *sendbuf, 
@@ -941,7 +1069,7 @@ void SWM_Send(SWM_PEER peer,
     sctx->fifo.push_back(&wrkld_per_rank);
 
     if(DBG_COMM){
-        printf("SEND src %d dst %d: %lld bytes\n", sctx->my_rank, peer,
+        printf("\nSWM SEND src %d dst %d: %lld bytes ", sctx->my_rank, peer,
                 wrkld_per_rank.u.send.num_bytes);
     // printf("Rank %d yield to CODES thread: %p\n", sctx->my_rank, global_prod_thread);
     }
@@ -1071,7 +1199,7 @@ void SWM_Isend(SWM_PEER peer,
     sctx->wait_id++;
 
     if(DBG_COMM){
-        printf("ISEND src %d dst %d: %lld bytes\n", sctx->my_rank, peer,
+        printf("\nSWM ISEND src %d dst %d: %lld bytes ", sctx->my_rank, peer,
                 wrkld_per_rank.u.send.num_bytes);
     }
 
@@ -1106,7 +1234,7 @@ void SWM_Recv(SWM_PEER peer,
     sctx->fifo.push_back(&wrkld_per_rank);
 
     if(DBG_COMM){
-        printf("RECV src %d dst %d: %lld bytes\n", peer, sctx->my_rank, 
+        printf("\nSWM RECV src %d dst %d: %lld bytes ", peer, sctx->my_rank, 
                 wrkld_per_rank.u.recv.num_bytes);    
     }
 
@@ -1146,7 +1274,7 @@ void SWM_Irecv(SWM_PEER peer,
     sctx->wait_id++;
 
     if(DBG_COMM){
-        printf("IRECV src %d dst %d: %lld bytes\n", peer, sctx->my_rank, 
+        printf("\nSWM IRECV src %d dst %d: %lld bytes ", peer, sctx->my_rank, 
                 wrkld_per_rank.u.recv.num_bytes);    
     }
 
@@ -1182,6 +1310,10 @@ void SWM_Compute(long cycle_count)
     assert(err == ABT_SUCCESS);
     struct shared_context * sctx = static_cast<shared_context*>(arg);
     sctx->fifo.push_back(&wrkld_per_rank);
+
+    if(DBG_COMM){
+        printf("\nSWM COMPUTE src %d: %lld ns ", sctx->my_rank, delay_in_ns);    
+    }
     
     ABT_thread_yield_to(global_prod_thread);
 
@@ -1211,7 +1343,7 @@ void SWM_Wait(uint32_t req_id)
     sctx->fifo.push_back(&wrkld_per_rank);
 
     if(DBG_COMM){
-        printf("WAIT src %d\n",sctx->my_rank);    
+        printf("\nSWM WAIT src %d ",sctx->my_rank);    
     }
 
     ABT_thread_yield_to(global_prod_thread);
@@ -1246,7 +1378,7 @@ void SWM_Waitall(int len, uint32_t * req_ids)
     sctx->fifo.push_back(&wrkld_per_rank);
 
     if(DBG_COMM){
-        printf("WAITALL src %d: count %d\n",sctx->my_rank, len);    
+        printf("\nSWM WAITALL src %d: count %d ",sctx->my_rank, len);    
     }
 
     ABT_thread_yield_to(global_prod_thread);
@@ -1297,7 +1429,7 @@ void SWM_Sendrecv(
     sctx->fifo.push_back(&recv_op);
 
     if(DBG_COMM){
-        printf("SENDRECV ssrc %d sdst %d: %d bytes; rsrc %d rdst %d: %lld bytes\n", sctx->my_rank, sendpeer,
+        printf("\nSWM SENDRECV ssrc %d sdst %d: %d bytes; rsrc %d rdst %d: %lld bytes ", sctx->my_rank, sendpeer,
                 sendbytes, recvpeer, sctx->my_rank, recv_op.u.recv.num_bytes);
     }
 
@@ -1572,12 +1704,38 @@ void SWM_Finalize()
         {
             cout << "\n isend " << it->first << " " << it->second;
         }*/
-        printf("\n finalize workload for rank %d ", sctx->my_rank);
-        //printf("\n finalize workload for rank %d num_sends %ld num_recvs %ld num_isends %ld num_irecvs %ld num_allreduce %ld num_barrier %ld num_waitalls %ld", sctx->my_rank, num_sends, num_recvs, num_isends, num_irecvs, num_allreduce, num_barriers, num_waitalls);
+        printf("\nSWM FINALIZE src %d ", sctx->my_rank);
+        printf("\nnum_sends %ld num_recvs %ld num_isends %ld num_irecvs %ld num_allreduce %ld num_barrier %ld num_waitalls %ld\n", 
+                num_sends, num_recvs, num_isends, num_irecvs, num_allreduce, num_barriers, num_waitalls);
     }
     ABT_thread_yield_to(global_prod_thread);
 }
 
+void SWM_Mark_Iteration(SWM_TAG iter_tag)
+{
+    /* Add an event in the shared queue and then yield */
+    struct codes_workload_op wrkld_per_rank;
+
+    wrkld_per_rank.op_type = CODES_WK_MARK;
+    wrkld_per_rank.u.send.tag = iter_tag;
+
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    wrkld_per_rank.u.send.source_rank = sctx->my_rank;
+    sctx->fifo.push_back(&wrkld_per_rank);
+
+    if(DBG_COMM){
+        printf("\nSWM MARKITERATION src %d ", sctx->my_rank);
+    }
+
+    ABT_thread_yield_to(global_prod_thread);
+}
 
 //#endif
 
@@ -1592,6 +1750,7 @@ static int hash_rank_compare(void *key, struct qhash_head *link)
         return 1;
     return 0;
 }
+
 static void workload_caller(void * arg)
 {
     shared_context* sctx = static_cast<shared_context*>(arg);
@@ -1607,6 +1766,10 @@ static void workload_caller(void * arg)
             conc_params->conc_argv[i] = conc_params->config_in[i];
         }
         // conc_params->argv = &conc_params->conc_argv;
+        if(DBG_LINKING)
+        {
+            printf("\nLoad Union Benchmark: %s: %s", conc_params->conc_program, conc_params->conc_argv[1]);
+        }        
         union_conc_bench_load(conc_params->conc_program, 
                         conc_params->conc_argc, 
                         conc_params->conc_argv);
@@ -1775,11 +1938,14 @@ static int comm_online_workload_load(const char * params, int app_id, int rank)
         ABT_xstream_self(&self_es);
         ABT_thread_self(&global_prod_thread);
     }
-    ABT_thread_create_on_xstream(self_es, 
+    int rcode = ABT_thread_create_on_xstream(self_es, 
             &workload_caller, (void*)&(my_ctx->sctx),
             ABT_THREAD_ATTR_NULL, &(my_ctx->sctx.producer));
 
-    // printf("Rank %d create app thread %p\n", rank, my_ctx->sctx.producer);
+    if(DBG_LINKING)
+    {
+        printf("\nRank %d create app thread? %d", rank, rcode);
+    }
     rank_mpi_compare cmp;
     cmp.app_id = app_id;
     cmp.rank = rank;
@@ -1819,12 +1985,30 @@ static void comm_online_workload_get_next(int app_id, int rank, struct codes_wor
     assert(temp_data);
     while(temp_data->sctx.fifo.empty())
     {
-        // printf("Rank %d fifo empty, yield to app %p\n", rank, temp_data->sctx.producer);
+        if(DBG_COMM){
+            // void * arg;
+            // int err =  ABT_thread_get_arg(temp_data->sctx.producer, &arg);
+            // assert(err == ABT_SUCCESS);
+            // struct shared_context * sctx = static_cast<shared_context*>(arg);
+            printf("\nFIFO que empty, yield to rank %d ", rank);
+        }
         int rc = ABT_thread_yield_to(temp_data->sctx.producer); 
     }
     struct codes_workload_op * front_op = temp_data->sctx.fifo.front();
     assert(front_op);
-    // printf("Pop op %d to CODES\n", front_op->op_type);
+    if(DBG_COMM)
+    {
+        switch(front_op->op_type)
+        {
+            case CODES_WK_ISEND: printf("\nFIFO pop operation ISEND src %d ", rank);
+            case CODES_WK_SEND: printf("\nFIFO pop operation SEND src %d ", rank);
+            case CODES_WK_RECV: printf("\nFIFO pop operation RECV src %d ", rank);
+            case CODES_WK_IRECV: printf("\nFIFO pop operation IRECV src %d ", rank);
+            case CODES_WK_DELAY: printf("\nFIFO pop operation COMPUTE src %d ", rank);
+            case CODES_WK_WAIT: printf("\nFIFO pop operation WAIT src %d ", rank);
+            case CODES_WK_WAITALL: printf("\nFIFO pop operation WAITALL src %d ", rank);
+        }
+    }
     *op = *front_op;
     temp_data->sctx.fifo.pop_front();
     return;
@@ -1847,7 +2031,7 @@ static int comm_online_workload_finalize(const char* params, int app_id, int ran
     hash_link = qhash_search(rank_tbl, &cmp);
     if(!hash_link)
     {
-        printf("\n not found for rank id %d , %d", rank, app_id);
+        printf("\n not found for rank id %d , %d ", rank, app_id);
         return -1;
     }
     temp_data = qhash_entry(hash_link, rank_mpi_context, hash_link);

From 7edd3332f3877e6539ad8ddae43a5529e9b3fb18 Mon Sep 17 00:00:00 2001
From: Xin Wang <xwang149@hawk.iit.edu>
Date: Thu, 31 Aug 2023 11:29:08 -0500
Subject: [PATCH 039/188] add temporary test folder

---
 tmptest/README.md                             |  98 +++++
 tmptest/conf/conceptual.json                  |  65 ++++
 tmptest/conf/dfdally-72-inter                 | Bin 0 -> 576 bytes
 tmptest/conf/dfdally-72-intra                 | Bin 0 -> 144 bytes
 tmptest/conf/dfdally-72-par.conf              |  64 ++++
 tmptest/conf/jacobi_MILC.conf                 |   2 +
 tmptest/conf/milc_skeleton.json               |  17 +
 .../conf/rand_node0-1d-72-jacobi_MILC.conf    |   2 +
 tmptest/expected/iteration-logs               | 360 ++++++++++++++++++
 .../avg-all-reduce-time                       |   0
 .../dragonfly-cn-stats                        |  73 ++++
 .../dragonfly-link-stats                      | 326 ++++++++++++++++
 .../model-net-category-all                    |  72 ++++
 .../model-net-category-high                   |  72 ++++
 .../mpi-replay-stats                          |  73 ++++
 tmptest/expected/tmptest-jacobiS_MILC.output  | 240 ++++++++++++
 16 files changed, 1464 insertions(+)
 create mode 100644 tmptest/README.md
 create mode 100644 tmptest/conf/conceptual.json
 create mode 100644 tmptest/conf/dfdally-72-inter
 create mode 100644 tmptest/conf/dfdally-72-intra
 create mode 100644 tmptest/conf/dfdally-72-par.conf
 create mode 100644 tmptest/conf/jacobi_MILC.conf
 create mode 100644 tmptest/conf/milc_skeleton.json
 create mode 100644 tmptest/conf/rand_node0-1d-72-jacobi_MILC.conf
 create mode 100644 tmptest/expected/iteration-logs
 create mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/avg-all-reduce-time
 create mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-cn-stats
 create mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-link-stats
 create mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-all
 create mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-high
 create mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/mpi-replay-stats
 create mode 100644 tmptest/expected/tmptest-jacobiS_MILC.output

diff --git a/tmptest/README.md b/tmptest/README.md
new file mode 100644
index 00000000..bf25a012
--- /dev/null
+++ b/tmptest/README.md
@@ -0,0 +1,98 @@
+# Union
+Workload Manager for Integration of Conceptual as an Online Workload for CODES
+
+
+# Installation
+
+### Installing Conceptual (mandatory)
+
+Download Conceptual at https://ccsweb.lanl.gov/~pakin/software/conceptual/download.html (version 1.5.1 or greater)
+
+```bash
+tar xvf conceptual-1.5.1.tar.gz
+cd conceptual-1.5.1
+./configure --prefix=/path/to/conceptual/install
+make
+make install
+```
+
+### Installing Boost-Python (currently mandatory, we may remove this soon)
+
+Download boost at http://www.boost.org/users/download/ (version 1.68 or greater)
+
+```bash
+tar xvf boost_1_68_0.tar.gz
+cd boost_1_68_0 
+./bootstrap.sh --prefix=/path/to/boost/install  --with-libraries=python
+./b2 install
+```
+
+### Installing Union    
+```bash
+cd union
+./prepare.sh
+./configure --with-boost=/path/to/boost/install --with-conceptual=/path/to/conceptual/install --prefix=/path/to/union/install CC=mpicc CXX=mpicxx
+make
+make install
+```
+
+# Workload Simulation with CODES
+
+### Installing ROSS
+
+```bash
+git clone https://github.com/carothersc/ROSS.git 
+mkdir build-ross
+cd build-ross
+cmake -DCMAKE_INSTALL_PREFIX:path=path/to/ross/install -DCMAKE_C_COMPILER=$(which mpicc) -DCMAKE_CXX_COMPILER=$(which mpicxx) ../ROSS
+make install
+```
+
+### Installing Argobots
+
+```bash
+git clone https://github.com/pmodels/argobots.git
+./autogen.sh
+./configure --prefix=/path/to/argobots/install
+make
+make install
+```
+
+### Installing SWM workloads
+
+```bash
+git clone https://github.com/codes-org/SWM-workloads.git
+cd swm
+./prepare.sh
+./configure --with-boost=/path/to/boost/install --prefix=/path/to/swm/install CC=mpicc CXX=mpicxx
+make
+make install
+```
+
+### Installing CODES (kronos-union branch)
+
+```bash
+git clone https://github.com/codes-org/codes.git
+cd codes
+./prepare.sh
+mkdir build
+cd build
+../configure --with-online=true --with-boost=/path/to/boost/install PKG_CONFIG_PATH=/home/path/to/argobots/install/lib/pkgconfig:/path/to/ross/install/lib/pkgconfig:/path/to/union/install/lib/pkgconfig:/path/to/swm/install/lib/pkgconfig --with-union=true --prefix=/path/to/codes/install CC=mpicc CXX=mpicxx 
+make
+make install
+```
+
+### Run Test Simulations
+The tmptest directory includes all necessary configuration files to run the test simulation.
+
+Copy milc_skeleton.json to /path/to/swm/install/share/
+Copy conceptual.json to /path/to/union/install/share/
+Change the path for "intra-group-connections" and "intra-group-connections" in dfdally-72-par.conf
+Run the following command:
+
+```bash
+/path/to/codes/install/bin/model-net-mpi-replay --sync=1 --workload_type=conc-online --lp-io-use-suffix=1 --workload_conf_file=/path/to/codes/tmptest/conf/jacobi_MILC.conf --alloc_file=/path/to/codes/tmptest/conf/rand_node0-1d-72-jacobi_MILC.conf --lp-io-dir=tmptest-jacobiS_MILC -- /path/to/codes/tmptest/conf/dfdally-72-par.conf > tmptest-jacobiS_MILC.output 
+```
+
+
+
diff --git a/tmptest/conf/conceptual.json b/tmptest/conf/conceptual.json
new file mode 100644
index 00000000..ad786bce
--- /dev/null
+++ b/tmptest/conf/conceptual.json
@@ -0,0 +1,65 @@
+{
+  "latency": {
+    "argc": 5,
+    "argv": [
+      "latency",
+      "--reps",
+      "100",
+      "--maxbytes",
+      "1M"
+    ]
+  },
+  "cosmoflow": {
+    "argc": 7,
+    "argv": [
+      "cosmoflow",
+      "--msgsize",
+      "7379200",
+      "--reps",
+      "10",
+      "--compute",
+      "129"
+    ]
+  },  
+  "cosmo": {
+    "argc": 5,
+    "argv": [
+      "cosmo",
+      "5",
+      "7379200",
+      "129000000"
+    ]
+  },
+  "jacobi3d": {
+    "argc": 11,
+    "argv": [
+      "jacobi3d",
+      "400",
+      "300",
+      "300",
+      "100",
+      "100",
+      "100",
+      "125000",
+      "5",
+      "2000000",
+      "barrier"
+    ]
+  },
+  "alexnet": {
+    "argc": 2,
+    "argv": [
+      "alexnet",
+      "10"
+    ]
+  },
+  "checkpoint": {
+    "argc": 4,
+    "argv": [
+      "checkpoint",
+      "1",
+      "50000000000", 
+      "100000000"
+    ]
+  }  
+}
diff --git a/tmptest/conf/dfdally-72-inter b/tmptest/conf/dfdally-72-inter
new file mode 100644
index 0000000000000000000000000000000000000000..f95b989c64812d8936d00541ad6808c590c2a0dd
GIT binary patch
literal 576
zcmXxhSxy5%6hqO107)Q_nR#f){`ZKF)Nbj@{bTvrHO3gr;Cl#H;c3=G^g5hpeTy!_
z=d4roCh*a1xD9;tIot(4`W0kNvqr`$@X=#f2R`~9?gJnF4YGXA+~l8+zJ$lXN56wE
zeC;Hwx60fFKKc>%fsg(LU2M`$vU;n`Q{baN!B%|TOqXt<ldRq<^EL3%OR$yAyO}QC
tLMK_hRi?M#>zmlBTiZ;RZlRN`-YRnn-jsS9TXk!j>C!E9lGR&f{sV<~3E2Pu

literal 0
HcmV?d00001

diff --git a/tmptest/conf/dfdally-72-intra b/tmptest/conf/dfdally-72-intra
new file mode 100644
index 0000000000000000000000000000000000000000..37ea2848b53d14494cf91262078ab9fe67ac4d9d
GIT binary patch
literal 144
xcmYj|fe8RG2*bMh|Icf{zyxW~G)Yd^xk7W6|39n)>v*FA+ktgV99Rdo{{RVK03`qb

literal 0
HcmV?d00001

diff --git a/tmptest/conf/dfdally-72-par.conf b/tmptest/conf/dfdally-72-par.conf
new file mode 100644
index 00000000..4059a519
--- /dev/null
+++ b/tmptest/conf/dfdally-72-par.conf
@@ -0,0 +1,64 @@
+LPGROUPS
+{
+   MODELNET_GRP
+   {
+      repetitions="36";
+# name of this lp changes according to the model
+      nw-lp="2";
+# these lp names will be the same for dragonfly-custom model
+      modelnet_dragonfly_dally="2";
+      modelnet_dragonfly_dally_router="1";
+   }
+}
+PARAMS
+{
+# packet size in the network
+   packet_size="4096";
+   modelnet_order=( "dragonfly_dally","dragonfly_dally_router" );
+   # scheduler options
+   modelnet_scheduler="fcfs";
+# chunk size in the network (when chunk size = packet size, packets will not be
+# divided into chunks)
+   chunk_size="4096";
+# modelnet_scheduler="round-robin";
+# number of routers in group
+   num_routers="4";
+# number of groups in the network
+   num_groups="9";
+# buffer size in bytes for local virtual channels
+   local_vc_size="16384";
+#buffer size in bytes for global virtual channels
+   global_vc_size="16384";
+#buffer size in bytes for compute node virtual channels
+   cn_vc_size="32768";
+#bandwidth in GiB/s for local channels
+   local_bandwidth="5.25";
+# bandwidth in GiB/s for global channels
+   global_bandwidth="4.7";
+# bandwidth in GiB/s for compute node-router channels
+   cn_bandwidth="5.25";
+# ROSS message size
+   message_size="792";
+# number of compute nodes connected to router, dictated by dragonfly config
+# file
+   num_cns_per_router="2";
+# number of global channels per router
+   num_global_channels="2";
+# network config file for intra-group connections
+   intra-group-connections="/home/ac.xwang/tools/codes-new/tmptest/conf/dfdally-72-intra";
+# network config file for inter-group connections
+   inter-group-connections="/home/ac.xwang/tools/codes-new/tmptest/conf/dfdally-72-inter";
+# routing protocol to be used
+   routing="prog-adaptive";
+   minimal-bias="1";
+   df-dally-vc = "1";
+# counting msgs recv to/send from routers
+   counting_bool="0";
+   counting_start="0";
+   counting_windows="1800";
+   #interval in us
+   counting_interval="300";
+   num_apps="2";
+   #offset for app_id: model-net-mpi-replay is 88, synthetic-dfly-plus is 24
+   offset="144";
+}
diff --git a/tmptest/conf/jacobi_MILC.conf b/tmptest/conf/jacobi_MILC.conf
new file mode 100644
index 00000000..93c60688
--- /dev/null
+++ b/tmptest/conf/jacobi_MILC.conf
@@ -0,0 +1,2 @@
+36 conceptual-jacobi3d 1 0
+36 milc 1 0
diff --git a/tmptest/conf/milc_skeleton.json b/tmptest/conf/milc_skeleton.json
new file mode 100644
index 00000000..b3a42297
--- /dev/null
+++ b/tmptest/conf/milc_skeleton.json
@@ -0,0 +1,17 @@
+{
+"jobs" : {
+        "dll_path": "${FABSIM_APPS_PATH}/dll/milc.so",
+        "size": 36,
+        "cfg": {
+            "app": "milc",
+            "iteration_cnt": 5,
+            "compute_delay": 100,
+            "dimension_cnt": 4,
+            "dimension_sizes": [2,2,3,3],
+            "msg_size": 497664,
+            "max_dimension_distance": 1,
+            "randomize_communication_order": false,
+	    "cpu_freq" : 4e9
+       }
+    }
+}
diff --git a/tmptest/conf/rand_node0-1d-72-jacobi_MILC.conf b/tmptest/conf/rand_node0-1d-72-jacobi_MILC.conf
new file mode 100644
index 00000000..07e490d0
--- /dev/null
+++ b/tmptest/conf/rand_node0-1d-72-jacobi_MILC.conf
@@ -0,0 +1,2 @@
+12 24 38 2 65 18 3 70 39 11 69 67 48 21 8 45 71 55 6 1 49 68 32 5 61 46 23 9 7 26 15 62 52 28 13 25 
+0 42 31 10 57 20 41 40 66 64 4 22 33 58 37 59 47 43 54 50 14 35 44 16 63 56 36 30 19 51 27 34 17 29 53 60 
diff --git a/tmptest/expected/iteration-logs b/tmptest/expected/iteration-logs
new file mode 100644
index 00000000..83179b2f
--- /dev/null
+++ b/tmptest/expected/iteration-logs
@@ -0,0 +1,360 @@
+ITERATION 0 node 53 job 1 rank 34 time 4512686.215045
+ITERATION 0 node 54 job 1 rank 18 time 4513749.276363
+ITERATION 0 node 4 job 1 rank 10 time 4517871.443080
+ITERATION 0 node 56 job 1 rank 25 time 4538165.950297
+ITERATION 0 node 51 job 1 rank 29 time 4540989.136935
+ITERATION 0 node 29 job 1 rank 33 time 4542208.517079
+ITERATION 0 node 64 job 1 rank 9 time 4543495.002481
+ITERATION 0 node 34 job 1 rank 31 time 4545035.368923
+ITERATION 0 node 58 job 1 rank 13 time 4545114.472426
+ITERATION 0 node 59 job 1 rank 15 time 4548339.211403
+ITERATION 0 node 35 job 1 rank 21 time 4554419.528244
+ITERATION 0 node 10 job 1 rank 3 time 4561111.532285
+ITERATION 0 node 60 job 1 rank 35 time 4562346.412949
+ITERATION 0 node 30 job 1 rank 27 time 4564867.979829
+ITERATION 0 node 16 job 1 rank 23 time 4567439.860843
+ITERATION 0 node 31 job 1 rank 2 time 4573504.960624
+ITERATION 0 node 22 job 1 rank 11 time 4574627.826180
+ITERATION 0 node 37 job 1 rank 14 time 4579930.283097
+ITERATION 0 node 27 job 1 rank 30 time 4587175.167871
+ITERATION 0 node 20 job 1 rank 5 time 4589753.028610
+ITERATION 0 node 57 job 1 rank 4 time 4593050.336040
+ITERATION 0 node 17 job 1 rank 32 time 4604738.524840
+ITERATION 0 node 19 job 1 rank 28 time 4607122.579619
+ITERATION 0 node 63 job 1 rank 24 time 4616051.899458
+ITERATION 0 node 44 job 1 rank 22 time 4629534.947693
+ITERATION 0 node 33 job 1 rank 12 time 4630801.551756
+ITERATION 0 node 14 job 1 rank 20 time 4634810.847629
+ITERATION 0 node 43 job 1 rank 17 time 4637426.227568
+ITERATION 0 node 50 job 1 rank 19 time 4645719.045367
+ITERATION 0 node 42 job 1 rank 1 time 4657866.214120
+ITERATION 0 node 0 job 1 rank 0 time 4662904.772657
+ITERATION 0 node 36 job 1 rank 26 time 4669557.114654
+ITERATION 0 node 47 job 1 rank 16 time 4682491.551200
+ITERATION 0 node 40 job 1 rank 7 time 4725442.291027
+ITERATION 0 node 41 job 1 rank 6 time 4726998.347349
+ITERATION 0 node 66 job 1 rank 8 time 4766523.879336
+ITERATION 0 node 67 job 0 rank 11 time 5565984.178337
+ITERATION 0 node 5 job 0 rank 23 time 5575875.812174
+ITERATION 0 node 9 job 0 rank 27 time 5579506.018872
+ITERATION 0 node 62 job 0 rank 31 time 5584465.243288
+ITERATION 0 node 45 job 0 rank 15 time 5618600.555287
+ITERATION 0 node 1 job 0 rank 19 time 5717281.741377
+ITERATION 0 node 70 job 0 rank 7 time 5721216.605169
+ITERATION 0 node 3 job 0 rank 6 time 5727197.597463
+ITERATION 0 node 68 job 0 rank 21 time 5728040.094749
+ITERATION 0 node 26 job 0 rank 29 time 5732165.171144
+ITERATION 0 node 46 job 0 rank 25 time 5736556.257391
+ITERATION 0 node 21 job 0 rank 13 time 5736620.081453
+ITERATION 0 node 2 job 0 rank 3 time 5742325.644168
+ITERATION 0 node 55 job 0 rank 17 time 5749700.425050
+ITERATION 0 node 25 job 0 rank 35 time 5767346.740335
+ITERATION 0 node 38 job 0 rank 2 time 5788871.660345
+ITERATION 0 node 28 job 0 rank 33 time 5811545.306529
+ITERATION 0 node 32 job 0 rank 22 time 5813306.845833
+ITERATION 0 node 49 job 0 rank 20 time 5816982.805562
+ITERATION 0 node 61 job 0 rank 24 time 5820868.818799
+ITERATION 0 node 52 job 0 rank 32 time 5824994.468838
+ITERATION 0 node 7 job 0 rank 28 time 5829521.253890
+ITERATION 0 node 23 job 0 rank 26 time 5830679.689756
+ITERATION 0 node 71 job 0 rank 16 time 5830833.289263
+ITERATION 0 node 18 job 0 rank 5 time 5831517.990821
+ITERATION 0 node 13 job 0 rank 34 time 5836011.317489
+ITERATION 0 node 69 job 0 rank 10 time 5848873.775015
+ITERATION 0 node 39 job 0 rank 8 time 5852519.517052
+ITERATION 0 node 65 job 0 rank 4 time 5853828.568648
+ITERATION 0 node 24 job 0 rank 1 time 5854716.580830
+ITERATION 0 node 12 job 0 rank 0 time 5859659.053022
+ITERATION 0 node 6 job 0 rank 18 time 5863641.542198
+ITERATION 0 node 15 job 0 rank 30 time 5891375.888033
+ITERATION 0 node 8 job 0 rank 14 time 5894381.087555
+ITERATION 0 node 48 job 0 rank 12 time 5895134.332795
+ITERATION 0 node 11 job 0 rank 9 time 5942306.311091
+ITERATION 1 node 66 job 1 rank 8 time 8297180.376270
+ITERATION 1 node 14 job 1 rank 20 time 8300140.917016
+ITERATION 1 node 64 job 1 rank 9 time 8300849.048957
+ITERATION 1 node 19 job 1 rank 28 time 8301305.476039
+ITERATION 1 node 63 job 1 rank 24 time 8301305.711761
+ITERATION 1 node 33 job 1 rank 12 time 8301733.793207
+ITERATION 1 node 10 job 1 rank 3 time 8302129.790711
+ITERATION 1 node 42 job 1 rank 1 time 8302470.511443
+ITERATION 1 node 47 job 1 rank 16 time 8303046.409280
+ITERATION 1 node 35 job 1 rank 21 time 8303817.637378
+ITERATION 1 node 37 job 1 rank 14 time 8303944.501174
+ITERATION 1 node 4 job 1 rank 10 time 8303944.806055
+ITERATION 1 node 43 job 1 rank 17 time 8304146.840707
+ITERATION 1 node 56 job 1 rank 25 time 8304344.924724
+ITERATION 1 node 59 job 1 rank 15 time 8305202.188843
+ITERATION 1 node 20 job 1 rank 5 time 8305346.120632
+ITERATION 1 node 31 job 1 rank 2 time 8305427.098141
+ITERATION 1 node 22 job 1 rank 11 time 8305464.857400
+ITERATION 1 node 16 job 1 rank 23 time 8305465.061184
+ITERATION 1 node 58 job 1 rank 13 time 8305909.329333
+ITERATION 1 node 44 job 1 rank 22 time 8306179.488712
+ITERATION 1 node 40 job 1 rank 7 time 8306815.016916
+ITERATION 1 node 36 job 1 rank 26 time 8307242.113385
+ITERATION 1 node 54 job 1 rank 18 time 8307242.230485
+ITERATION 1 node 0 job 1 rank 0 time 8307509.069980
+ITERATION 1 node 27 job 1 rank 30 time 8308069.836665
+ITERATION 1 node 41 job 1 rank 6 time 8308371.073239
+ITERATION 1 node 51 job 1 rank 29 time 8308510.790046
+ITERATION 1 node 57 job 1 rank 4 time 8308643.428061
+ITERATION 1 node 30 job 1 rank 27 time 8308762.164730
+ITERATION 1 node 17 job 1 rank 32 time 8308912.995878
+ITERATION 1 node 53 job 1 rank 34 time 8309964.008052
+ITERATION 1 node 34 job 1 rank 31 time 8311068.775441
+ITERATION 1 node 50 job 1 rank 19 time 8311331.100457
+ITERATION 1 node 29 job 1 rank 33 time 8311707.229825
+ITERATION 1 node 60 job 1 rank 35 time 8317686.769451
+ITERATION 1 node 8 job 0 rank 14 time 11235088.810240
+ITERATION 1 node 18 job 0 rank 5 time 11235317.411859
+ITERATION 1 node 15 job 0 rank 30 time 11237472.894623
+ITERATION 1 node 24 job 0 rank 1 time 11245995.227219
+ITERATION 1 node 61 job 0 rank 24 time 11248511.493893
+ITERATION 1 node 71 job 0 rank 16 time 11249063.598574
+ITERATION 1 node 2 job 0 rank 3 time 11249988.785889
+ITERATION 1 node 11 job 0 rank 9 time 11250588.161742
+ITERATION 1 node 21 job 0 rank 13 time 11251495.763839
+ITERATION 1 node 49 job 0 rank 20 time 11252879.124696
+ITERATION 1 node 38 job 0 rank 2 time 11253481.695522
+ITERATION 1 node 52 job 0 rank 32 time 11254930.185172
+ITERATION 1 node 26 job 0 rank 29 time 11259777.124063
+ITERATION 1 node 39 job 0 rank 8 time 11266061.281928
+ITERATION 1 node 32 job 0 rank 22 time 11270470.549169
+ITERATION 1 node 68 job 0 rank 21 time 11286125.385978
+ITERATION 1 node 48 job 0 rank 12 time 11294811.908230
+ITERATION 1 node 46 job 0 rank 25 time 11300192.747257
+ITERATION 1 node 12 job 0 rank 0 time 11317107.807555
+ITERATION 1 node 7 job 0 rank 28 time 11322189.096862
+ITERATION 1 node 67 job 0 rank 11 time 11333239.622073
+ITERATION 1 node 70 job 0 rank 7 time 11334381.123429
+ITERATION 1 node 45 job 0 rank 15 time 11339259.471444
+ITERATION 1 node 1 job 0 rank 19 time 11340523.986731
+ITERATION 1 node 62 job 0 rank 31 time 11345350.350138
+ITERATION 1 node 69 job 0 rank 10 time 11351306.613121
+ITERATION 1 node 23 job 0 rank 26 time 11361640.366497
+ITERATION 1 node 25 job 0 rank 35 time 11363595.692666
+ITERATION 1 node 3 job 0 rank 6 time 11372326.537226
+ITERATION 1 node 9 job 0 rank 27 time 11388501.387550
+ITERATION 1 node 6 job 0 rank 18 time 11391110.891033
+ITERATION 1 node 5 job 0 rank 23 time 11392483.556208
+ITERATION 1 node 65 job 0 rank 4 time 11392558.633627
+ITERATION 1 node 13 job 0 rank 34 time 11396149.449470
+ITERATION 1 node 55 job 0 rank 17 time 11417714.983024
+ITERATION 1 node 28 job 0 rank 33 time 11421840.318515
+ITERATION 2 node 58 job 1 rank 13 time 12316899.292981
+ITERATION 2 node 35 job 1 rank 21 time 12316899.576287
+ITERATION 2 node 64 job 1 rank 9 time 12316899.606397
+ITERATION 2 node 10 job 1 rank 3 time 12317127.807697
+ITERATION 2 node 33 job 1 rank 12 time 12320196.737845
+ITERATION 2 node 56 job 1 rank 25 time 12320196.913727
+ITERATION 2 node 59 job 1 rank 15 time 12320197.104361
+ITERATION 2 node 43 job 1 rank 17 time 12320197.230276
+ITERATION 2 node 66 job 1 rank 8 time 12320197.233370
+ITERATION 2 node 40 job 1 rank 7 time 12320425.112677
+ITERATION 2 node 31 job 1 rank 2 time 12320425.115126
+ITERATION 2 node 42 job 1 rank 1 time 12320426.025358
+ITERATION 2 node 51 job 1 rank 29 time 12321024.628471
+ITERATION 2 node 16 job 1 rank 23 time 12321024.717490
+ITERATION 2 node 22 job 1 rank 11 time 12321024.945440
+ITERATION 2 node 14 job 1 rank 20 time 12321025.218551
+ITERATION 2 node 41 job 1 rank 6 time 12321981.169000
+ITERATION 2 node 34 job 1 rank 31 time 12323494.411691
+ITERATION 2 node 0 job 1 rank 0 time 12323723.332787
+ITERATION 2 node 19 job 1 rank 28 time 12324322.073336
+ITERATION 2 node 30 job 1 rank 27 time 12324322.252770
+ITERATION 2 node 47 job 1 rank 16 time 12324322.491049
+ITERATION 2 node 29 job 1 rank 33 time 12324322.565767
+ITERATION 2 node 63 job 1 rank 24 time 12324322.568861
+ITERATION 2 node 50 job 1 rank 19 time 12325150.326534
+ITERATION 2 node 44 job 1 rank 22 time 12326063.258270
+ITERATION 2 node 20 job 1 rank 5 time 12326291.816256
+ITERATION 2 node 4 job 1 rank 10 time 12326891.771792
+ITERATION 2 node 37 job 1 rank 14 time 12326892.126731
+ITERATION 2 node 17 job 1 rank 32 time 12328447.826540
+ITERATION 2 node 60 job 1 rank 35 time 12329275.662024
+ITERATION 2 node 57 job 1 rank 4 time 12329589.123686
+ITERATION 2 node 54 job 1 rank 18 time 12330188.984627
+ITERATION 2 node 36 job 1 rank 26 time 12330189.079122
+ITERATION 2 node 27 job 1 rank 30 time 12331017.462222
+ITERATION 2 node 53 job 1 rank 34 time 12332573.069010
+ITERATION 3 node 64 job 1 rank 9 time 16846408.366519
+ITERATION 3 node 35 job 1 rank 21 time 16846408.565540
+ITERATION 3 node 58 job 1 rank 13 time 16848977.637086
+ITERATION 3 node 66 job 1 rank 8 time 16849705.670921
+ITERATION 3 node 56 job 1 rank 25 time 16849705.673849
+ITERATION 3 node 59 job 1 rank 15 time 16849705.765384
+ITERATION 3 node 43 job 1 rank 17 time 16849705.775639
+ITERATION 3 node 42 job 1 rank 1 time 16849934.209991
+ITERATION 3 node 40 job 1 rank 7 time 16849935.003134
+ITERATION 3 node 22 job 1 rank 11 time 16850534.153202
+ITERATION 3 node 16 job 1 rank 23 time 16850535.146788
+ITERATION 3 node 41 job 1 rank 6 time 16851491.059456
+ITERATION 3 node 34 job 1 rank 31 time 16853003.072714
+ITERATION 3 node 51 job 1 rank 29 time 16853102.972576
+ITERATION 3 node 0 job 1 rank 0 time 16853231.517421
+ITERATION 3 node 33 job 1 rank 12 time 16853284.355810
+ITERATION 3 node 47 job 1 rank 16 time 16853830.997593
+ITERATION 3 node 63 job 1 rank 24 time 16853831.006412
+ITERATION 3 node 29 job 1 rank 33 time 16853831.111130
+ITERATION 3 node 19 job 1 rank 28 time 16853831.239840
+ITERATION 3 node 20 job 1 rank 5 time 16854059.856564
+ITERATION 3 node 4 job 1 rank 10 time 16854659.040052
+ITERATION 3 node 44 job 1 rank 22 time 16856400.617505
+ITERATION 3 node 50 job 1 rank 19 time 16856401.051372
+ITERATION 3 node 57 job 1 rank 4 time 16857357.163993
+ITERATION 3 node 37 job 1 rank 14 time 16857724.883755
+ITERATION 3 node 54 job 1 rank 18 time 16857956.265580
+ITERATION 3 node 36 job 1 rank 26 time 16857956.347382
+ITERATION 3 node 17 job 1 rank 32 time 16859697.584190
+ITERATION 3 node 53 job 1 rank 34 time 16860340.349963
+ITERATION 3 node 60 job 1 rank 35 time 16860526.386863
+ITERATION 3 node 30 job 1 rank 27 time 16862439.401431
+ITERATION 3 node 27 job 1 rank 30 time 16862631.359863
+ITERATION 3 node 10 job 1 rank 3 time 16874057.983505
+ITERATION 3 node 31 job 1 rank 2 time 16877355.290935
+ITERATION 3 node 14 job 1 rank 20 time 16894510.197562
+ITERATION 2 node 5 job 0 rank 23 time 17110727.735133
+ITERATION 2 node 32 job 0 rank 22 time 17114852.209292
+ITERATION 2 node 62 job 0 rank 31 time 17114852.599159
+ITERATION 2 node 9 job 0 rank 27 time 17114852.774364
+ITERATION 2 node 68 job 0 rank 21 time 17114853.074428
+ITERATION 2 node 49 job 0 rank 20 time 17118149.308069
+ITERATION 2 node 45 job 0 rank 15 time 17118149.906489
+ITERATION 2 node 46 job 0 rank 25 time 17118150.389777
+ITERATION 2 node 25 job 0 rank 35 time 17118150.407391
+ITERATION 2 node 70 job 0 rank 7 time 17118379.051555
+ITERATION 2 node 15 job 0 rank 30 time 17118977.516634
+ITERATION 2 node 23 job 0 rank 26 time 17118977.597908
+ITERATION 2 node 67 job 0 rank 11 time 17118978.109855
+ITERATION 2 node 26 job 0 rank 29 time 17118978.321161
+ITERATION 2 node 8 job 0 rank 14 time 17121361.601018
+ITERATION 2 node 61 job 0 rank 24 time 17121446.508622
+ITERATION 2 node 11 job 0 rank 9 time 17121447.697106
+ITERATION 2 node 28 job 0 rank 33 time 17121448.334170
+ITERATION 2 node 7 job 0 rank 28 time 17122274.470849
+ITERATION 2 node 13 job 0 rank 34 time 17122274.835607
+ITERATION 2 node 1 job 0 rank 19 time 17122275.742882
+ITERATION 2 node 18 job 0 rank 5 time 17122503.338803
+ITERATION 2 node 2 job 0 rank 3 time 17122504.165793
+ITERATION 2 node 3 job 0 rank 6 time 17122504.387146
+ITERATION 2 node 69 job 0 rank 10 time 17123102.933399
+ITERATION 2 node 21 job 0 rank 13 time 17123103.656652
+ITERATION 2 node 39 job 0 rank 8 time 17124743.815952
+ITERATION 2 node 52 job 0 rank 32 time 17125571.717580
+ITERATION 2 node 55 job 0 rank 17 time 17125573.669661
+ITERATION 2 node 24 job 0 rank 1 time 17125800.744680
+ITERATION 2 node 38 job 0 rank 2 time 17125801.473223
+ITERATION 2 node 48 job 0 rank 12 time 17126399.806339
+ITERATION 2 node 6 job 0 rank 18 time 17126400.171098
+ITERATION 2 node 65 job 0 rank 4 time 17126628.674394
+ITERATION 2 node 71 job 0 rank 16 time 17128869.024909
+ITERATION 2 node 12 job 0 rank 0 time 17129926.080271
+ITERATION 4 node 4 job 1 rank 10 time 21068851.547471
+ITERATION 4 node 44 job 1 rank 22 time 21071037.824821
+ITERATION 4 node 54 job 1 rank 18 time 21073270.434306
+ITERATION 4 node 37 job 1 rank 14 time 21097612.783438
+ITERATION 4 node 27 job 1 rank 30 time 21102424.565486
+ITERATION 4 node 20 job 1 rank 5 time 21108249.546302
+ITERATION 4 node 33 job 1 rank 12 time 21110027.342829
+ITERATION 4 node 42 job 1 rank 1 time 21110256.073660
+ITERATION 4 node 57 job 1 rank 4 time 21111546.853731
+ITERATION 4 node 66 job 1 rank 8 time 21113501.882887
+ITERATION 4 node 14 job 1 rank 20 time 21113973.869283
+ITERATION 4 node 19 job 1 rank 28 time 21114152.678320
+ITERATION 4 node 58 job 1 rank 13 time 21115387.315874
+ITERATION 4 node 35 job 1 rank 21 time 21115387.760180
+ITERATION 4 node 10 job 1 rank 3 time 21115616.245472
+ITERATION 4 node 47 job 1 rank 16 time 21118843.796707
+ITERATION 4 node 63 job 1 rank 24 time 21119441.870921
+ITERATION 4 node 0 job 1 rank 0 time 21119597.570694
+ITERATION 4 node 17 job 1 rank 32 time 21127435.068013
+ITERATION 4 node 51 job 1 rank 29 time 21129060.332514
+ITERATION 4 node 36 job 1 rank 26 time 21143479.925267
+ITERATION 4 node 53 job 1 rank 34 time 21154862.434499
+ITERATION 4 node 31 job 1 rank 2 time 21156014.514993
+ITERATION 4 node 59 job 1 rank 15 time 21161740.594529
+ITERATION 4 node 40 job 1 rank 7 time 21161969.568395
+ITERATION 4 node 41 job 1 rank 6 time 21163525.624717
+ITERATION 4 node 16 job 1 rank 23 time 21173654.388369
+ITERATION 4 node 64 job 1 rank 9 time 21185533.014478
+ITERATION 4 node 43 job 1 rank 17 time 21189278.967485
+ITERATION 4 node 56 job 1 rank 25 time 21189811.925489
+ITERATION 4 node 34 job 1 rank 31 time 21192311.784227
+ITERATION 4 node 29 job 1 rank 33 time 21238412.751629
+ITERATION 4 node 22 job 1 rank 11 time 21253653.349842
+ITERATION 4 node 30 job 1 rank 27 time 21256950.657171
+ITERATION 4 node 50 job 1 rank 19 time 21257778.307985
+ITERATION 4 node 60 job 1 rank 35 time 21356838.612638
+ITERATION 3 node 49 job 0 rank 20 time 22283924.781893
+ITERATION 3 node 32 job 0 rank 22 time 22287221.893064
+ITERATION 3 node 61 job 0 rank 24 time 22287222.081563
+ITERATION 3 node 68 job 0 rank 21 time 22287222.429961
+ITERATION 3 node 7 job 0 rank 28 time 22288050.215038
+ITERATION 3 node 39 job 0 rank 8 time 22290519.388893
+ITERATION 3 node 15 job 0 rank 30 time 22291346.949060
+ITERATION 3 node 23 job 0 rank 26 time 22291347.443031
+ITERATION 3 node 26 job 0 rank 29 time 22291347.569684
+ITERATION 3 node 52 job 0 rank 32 time 22291347.632683
+ITERATION 3 node 24 job 0 rank 1 time 22291576.365970
+ITERATION 3 node 48 job 0 rank 12 time 22292175.550529
+ITERATION 3 node 5 job 0 rank 23 time 22292260.691330
+ITERATION 3 node 46 job 0 rank 25 time 22292261.166656
+ITERATION 3 node 8 job 0 rank 14 time 22293731.033444
+ITERATION 3 node 71 job 0 rank 16 time 22294644.940013
+ITERATION 3 node 13 job 0 rank 34 time 22294645.243202
+ITERATION 3 node 18 job 0 rank 5 time 22294873.423611
+ITERATION 3 node 2 job 0 rank 3 time 22294873.612687
+ITERATION 3 node 69 job 0 rank 10 time 22295472.778521
+ITERATION 3 node 21 job 0 rank 13 time 22295472.905174
+ITERATION 3 node 28 job 0 rank 33 time 22295558.271868
+ITERATION 3 node 11 job 0 rank 9 time 22295558.473985
+ITERATION 3 node 12 job 0 rank 0 time 22295701.701561
+ITERATION 3 node 9 job 0 rank 27 time 22296386.567242
+ITERATION 3 node 62 job 0 rank 31 time 22296386.592726
+ITERATION 3 node 38 job 0 rank 2 time 22298170.920117
+ITERATION 3 node 6 job 0 rank 18 time 22298770.578693
+ITERATION 3 node 65 job 0 rank 4 time 22298998.759202
+ITERATION 3 node 55 job 0 rank 17 time 22299683.607359
+ITERATION 3 node 45 job 0 rank 15 time 22299683.900055
+ITERATION 3 node 25 job 0 rank 35 time 22299683.915598
+ITERATION 3 node 70 job 0 rank 7 time 22299911.908515
+ITERATION 3 node 67 job 0 rank 11 time 22300511.902733
+ITERATION 3 node 1 job 0 rank 19 time 22303809.251089
+ITERATION 3 node 3 job 0 rank 6 time 22304037.244106
+ITERATION 4 node 46 job 0 rank 25 time 27793249.825077
+ITERATION 4 node 68 job 0 rank 21 time 27795614.122037
+ITERATION 4 node 11 job 0 rank 9 time 27795614.960123
+ITERATION 4 node 61 job 0 rank 24 time 27796442.932740
+ITERATION 4 node 9 job 0 rank 27 time 27796462.525210
+ITERATION 4 node 28 job 0 rank 33 time 27798203.258478
+ITERATION 4 node 49 job 0 rank 20 time 27799739.567011
+ITERATION 4 node 26 job 0 rank 29 time 27799739.840442
+ITERATION 4 node 39 job 0 rank 8 time 27799740.240069
+ITERATION 4 node 55 job 0 rank 17 time 27799740.572089
+ITERATION 4 node 67 job 0 rank 11 time 27800568.269665
+ITERATION 4 node 5 job 0 rank 23 time 27800568.290579
+ITERATION 4 node 52 job 0 rank 32 time 27800568.291383
+ITERATION 4 node 23 job 0 rank 26 time 27800568.764657
+ITERATION 4 node 25 job 0 rank 35 time 27800587.792281
+ITERATION 4 node 2 job 0 rank 3 time 27803265.147279
+ITERATION 4 node 21 job 0 rank 13 time 27803865.175933
+ITERATION 4 node 7 job 0 rank 28 time 27803865.393650
+ITERATION 4 node 71 job 0 rank 16 time 27803865.598713
+ITERATION 4 node 13 job 0 rank 34 time 27803865.779107
+ITERATION 4 node 1 job 0 rank 19 time 27803865.949695
+ITERATION 4 node 62 job 0 rank 31 time 27804693.839746
+ITERATION 4 node 69 job 0 rank 10 time 27804694.100148
+ITERATION 4 node 32 job 0 rank 22 time 27804694.147620
+ITERATION 4 node 38 job 0 rank 2 time 27806562.454709
+ITERATION 4 node 24 job 0 rank 1 time 27807391.000446
+ITERATION 4 node 48 job 0 rank 12 time 27807990.729141
+ITERATION 4 node 6 job 0 rank 18 time 27807991.114598
+ITERATION 4 node 45 job 0 rank 15 time 27807991.147076
+ITERATION 4 node 70 job 0 rank 7 time 27808219.847694
+ITERATION 4 node 15 job 0 rank 30 time 27808819.476032
+ITERATION 4 node 8 job 0 rank 14 time 27811203.560416
+ITERATION 4 node 12 job 0 rank 0 time 27811516.336037
+ITERATION 4 node 3 job 0 rank 6 time 27812345.183284
+ITERATION 4 node 18 job 0 rank 5 time 27812345.583060
+ITERATION 4 node 65 job 0 rank 4 time 27816470.918651
diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/avg-all-reduce-time b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/avg-all-reduce-time
new file mode 100644
index 00000000..e69de29b
diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-cn-stats b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-cn-stats
new file mode 100644
index 00000000..b06aa026
--- /dev/null
+++ b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-cn-stats
@@ -0,0 +1,73 @@
+# Format <LP id> <Terminal ID> <Total Data Sent> <Total Data Received> <Avg packet latency> <Max packet Latency> <Min packet Latency> <# Packets finished> <Avg Hops> <Avg Busy Time (over rails)>
+2 0 39813200 39813200 36107.138499 461417.952062 3297.307430 9770 4.376561 17587784.331581
+3 1 30000200 30000200 39284.494061 434701.820268 2379.827009 7387 3.682144 18356179.307331
+7 2 30000240 30000240 62318.782725 640866.354244 2379.827009 7392 4.470373 19113473.070391
+8 3 30000040 30000040 40925.980015 515505.949898 3291.464725 7367 3.945975 17940780.437224
+12 4 39813520 39813520 34994.103721 469254.313408 3297.307430 9810 4.381040 16536392.029545
+13 5 30000200 30000200 34635.404728 544989.362677 2379.827009 7387 4.055909 14938820.108085
+17 6 30000200 30000200 44767.801473 576653.794469 2384.084484 7387 3.656288 18814126.770707
+18 7 30000200 30000200 51387.382471 546505.844848 3291.464725 7387 4.206444 22205724.648773
+22 8 30000200 30000200 56515.671426 456563.833012 2379.827009 7387 4.338568 15243839.322545
+23 9 30000200 30000200 62747.573596 732304.908878 2464.855722 7387 4.501963 22518624.337887
+27 10 39813600 39813600 38206.513802 595234.314223 3297.307430 9820 4.071996 15912055.432753
+28 11 30000200 30000200 57688.459239 460987.598043 3291.464725 7387 4.624882 21982147.565134
+32 12 30000040 30000040 48120.746995 460788.732751 1553.218006 7367 4.426904 21978848.089413
+33 13 30000200 30000200 48728.737706 528097.477557 2379.827009 7387 4.224042 17300716.492549
+37 14 39813520 39813520 42482.617734 504077.082789 3297.307430 9810 4.500408 16254727.926633
+38 15 30000200 30000200 55858.592637 544988.314605 2379.827009 7387 3.945986 10437047.462660
+42 16 39813520 39813520 42849.647075 408832.282689 2469.279268 9810 4.165036 15447231.594722
+43 17 39813520 39813520 38338.085471 341206.976962 2384.084484 9810 4.015087 16773305.735538
+47 18 30000240 30000240 42373.430620 629220.891776 3291.464725 7392 4.535173 15995328.015708
+48 19 39813520 39813520 40423.608697 315897.493880 2384.084484 9810 4.176860 14328352.862862
+52 20 39813600 39813600 43520.623476 541086.894327 3297.307430 9820 4.555601 13780387.971014
+53 21 30000200 30000200 44476.828958 349439.122320 2379.827009 7387 4.475430 18712483.166522
+57 22 39813520 39813520 46535.756921 308890.720074 3297.307430 9810 4.331906 12926592.020511
+58 23 30000200 30000200 33511.399102 322937.974241 2469.279268 7387 3.786517 18523348.301746
+62 24 30000240 30000240 36774.377417 461518.474685 1553.218006 7392 4.146239 19158269.234675
+63 25 30000200 30000200 41059.457855 278051.087009 2379.827009 7387 4.351157 19270573.497551
+67 26 30000200 30000200 40614.046110 381768.605366 2379.827009 7387 4.125220 19112482.493963
+68 27 39813520 39813520 41995.882873 371667.257947 2469.279268 9810 4.376962 14646355.081486
+72 28 30000200 30000200 31321.605985 365266.392194 2379.827009 7387 4.039123 19400082.478809
+73 29 39813520 39813520 32959.171255 451587.798712 3297.307430 9810 4.179613 16095202.695306
+77 30 39813520 39813520 37427.649193 351365.395737 2469.279268 9810 4.407339 15586679.540493
+78 31 39813200 39813200 41862.918330 387855.048741 3297.307430 9770 4.525077 16586112.004836
+82 32 30000200 30000200 45060.956994 427249.367222 3291.464725 7387 4.514146 19189374.516511
+83 33 39813520 39813520 49395.064664 469382.320131 2384.084484 9810 4.012232 17850768.194652
+87 34 39813520 39813520 44079.438999 342285.995523 2469.279268 9810 4.520387 15144572.054951
+88 35 39813520 39813520 38971.920099 300516.633956 3297.307430 9810 4.351682 16091038.039457
+92 36 39813520 39813520 39426.347282 309758.764907 2643.131510 9810 4.074822 15678132.783902
+93 37 39813520 39813520 34995.061880 346958.053999 2384.084484 9810 3.428746 16876971.125963
+97 38 30000040 30000040 35905.285204 495198.570009 2379.827009 7367 3.970273 22512609.412966
+98 39 30000200 30000200 42447.183602 377731.969067 2469.279268 7387 4.671179 22714730.942416
+102 40 39813600 39813600 34687.355890 335707.416147 1556.056322 9820 3.577800 14259163.848822
+103 41 39813200 39813200 38175.362430 382249.538761 1556.056322 9770 3.537769 13573738.064970
+107 42 39813600 39813600 44532.014760 369370.892239 3297.307430 9820 4.442770 15313358.299072
+108 43 39813520 39813520 38517.783131 536152.927723 2384.084484 9810 3.930887 16854266.067963
+112 44 39813520 39813520 48072.153707 551643.077958 3297.307430 9810 4.406932 13784797.518310
+113 45 30000200 30000200 38361.576186 337612.463862 3291.464725 7387 4.412481 22709598.612240
+117 46 30000200 30000200 47063.934511 323522.751165 3291.464725 7387 4.315690 15178598.555845
+118 47 39813520 39813520 32468.101294 470956.241085 2384.084484 9810 3.637717 16103495.317657
+122 48 30000200 30000200 34763.145246 355505.175562 2379.827009 7387 3.586165 14099343.817718
+123 49 30000200 30000200 32178.416645 415926.699100 1553.218006 7387 3.282523 13642975.324074
+127 50 39813520 39813520 36042.175157 333399.692314 2384.084484 9810 3.894903 15924037.373174
+128 51 39813520 39813520 47426.976684 384894.236551 3297.307430 9810 4.524771 14800606.559240
+132 52 30000200 30000200 50648.202686 683306.228547 3291.464725 7387 4.051712 18051629.450587
+133 53 39813520 39813520 43688.807523 397577.483030 2384.084484 9810 4.367992 15294739.661280
+137 54 39813520 39813520 41274.721778 410620.945162 2384.084484 9810 3.614985 15348607.758912
+138 55 30000200 30000200 47460.273307 393172.959339 3291.464725 7387 4.455665 22327681.850808
+142 56 39813520 39813520 30353.003214 423224.921981 2384.084484 9810 3.627217 14875339.838529
+143 57 39813200 39813200 41978.905805 362220.406331 3297.307430 9770 4.289662 13439725.026830
+147 58 39813520 39813520 32600.221245 394656.517832 1556.056322 9810 3.366769 17388431.077442
+148 59 39813520 39813520 34844.964516 437962.421017 1556.056322 9810 3.642712 15275599.714230
+152 60 39813520 39813520 38828.762813 380963.287056 2469.279268 9810 4.269521 14057550.559304
+153 61 30000200 30000200 38425.563290 437052.170191 3291.464725 7387 4.298227 18690662.547655
+157 62 30000200 30000200 37163.192817 486901.376592 3291.464725 7387 4.302423 22529024.421436
+158 63 39813520 39813520 32177.129431 313859.556601 2384.084484 9810 4.102345 16088278.649605
+162 64 39813520 39813520 36307.283720 473982.706414 2384.084484 9810 3.756575 13805423.148064
+163 65 30000040 30000040 39266.258111 541660.942240 3291.464725 7367 3.542012 22415755.472424
+167 66 39813520 39813520 35784.342668 486343.405857 2384.084484 9810 3.758104 17791245.347997
+168 67 30000200 30000200 35113.840119 558234.396436 2379.827009 7387 3.712739 15175940.132678
+172 68 30000200 30000200 46259.577398 529697.867518 3291.464725 7387 4.381752 11926841.048741
+173 69 30000200 30000200 41583.798735 474531.461375 2384.084484 7387 4.093001 18609998.026356
+177 70 30000240 30000240 30915.717460 551227.348696 2379.827009 7392 3.543561 18582548.588579
+178 71 30000200 30000200 36220.251632 387764.380366 2464.855722 7387 3.623392 14445475.496982
diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-link-stats b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-link-stats
new file mode 100644
index 00000000..7864d91b
--- /dev/null
+++ b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-link-stats
@@ -0,0 +1,326 @@
+# Format <source_id> <source_type> <dest_id> < dest_type>  <link_type> <link_traffic> <link_saturation> <stalled_chunks>
+
+0 T 0 R CN 40017920 17587784.331581 8837
+1 T 0 R CN 30257152 18356179.307331 6359
+0 R 1 R L 43550704 4874052.951440 4664
+0 R 2 R L 45137320 4738075.959451 5558
+0 R 3 R L 47388976 5040625.986632 6123
+0 R 7 R G 47388976 5040625.986632 6123
+0 R 11 R G 41983848 4420584.424223 4472
+0 R 0 T CN 39813200 1311579.280498 3696
+0 R 1 T CN 30049352 627821.545675 1868
+2 T 1 R CN 30277632 19113473.070391 6755
+3 T 1 R CN 30175232 17940780.437224 6736
+1 R 0 R L 45287672 4986264.334545 5638
+1 R 2 R L 41987648 4053798.568995 5472
+1 R 3 R L 44837432 4310280.411427 4833
+1 R 14 R G 44837432 4310280.411427 4833
+1 R 18 R G 40681472 5840547.653511 5126
+1 R 2 T CN 30049392 1001716.940524 3112
+1 R 3 T CN 30049192 766538.802992 2606
+4 T 2 R CN 40181760 16536392.029545 8204
+5 T 2 R CN 30257152 14938820.108085 6439
+2 R 0 R L 45136984 3987872.001758 3633
+2 R 1 R L 33825320 5132507.548055 3802
+2 R 3 R L 48316064 5229299.015873 6004
+2 R 21 R G 48316064 5229299.015873 6004
+2 R 25 R G 38686264 4817680.782145 4226
+2 R 4 T CN 39813520 1501521.379594 4519
+2 R 5 T CN 30049352 556600.125602 1835
+6 T 3 R CN 30257152 18814126.770707 6964
+7 T 3 R CN 30257152 22205724.648773 6984
+3 R 0 R L 45473184 4549998.796036 5237
+3 R 1 R L 38859592 4033811.151346 4587
+3 R 2 R L 36569560 4293046.016019 3998
+3 R 28 R G 47455304 5109010.661862 5571
+3 R 32 R G 48449992 6234288.585361 7036
+3 R 6 T CN 30049352 821409.664124 2409
+3 R 7 T CN 30049352 969930.916156 3276
+8 T 4 R CN 30257152 15243839.322545 6972
+9 T 4 R CN 30257152 22518624.337887 6513
+4 R 5 R L 36860984 4721092.963003 4346
+4 R 6 R L 32135288 3583080.888814 3568
+4 R 7 R L 58009616 6102408.455236 8020
+4 R 11 R G 58009616 6102408.455236 8020
+4 R 15 R G 52375248 5544874.795086 7249
+4 R 8 T CN 30049352 1128444.125886 3975
+4 R 9 T CN 30049352 932709.088273 3051
+10 T 5 R CN 40222720 15912055.432753 8427
+11 T 5 R CN 30257152 21982147.565134 6710
+5 R 4 R L 53993696 6136982.915861 7661
+5 R 6 R L 42998872 5104126.663520 4877
+5 R 7 R L 46448488 5646843.629053 6270
+5 R 18 R G 46448488 5646843.629053 6270
+5 R 22 R G 51448536 5797029.042031 6137
+5 R 10 T CN 39813600 1349237.793212 4270
+5 R 11 T CN 30049352 1248376.533293 3652
+12 T 6 R CN 30175232 21978848.089413 7147
+13 T 6 R CN 30257152 17300716.492549 6711
+6 R 4 R L 40487440 5812805.144307 5433
+6 R 5 R L 36769744 5264234.631795 4172
+6 R 7 R L 36385608 4752852.854935 3298
+6 R 25 R G 36385608 4752852.854935 3298
+6 R 29 R G 40053384 5005853.637480 4483
+6 R 12 T CN 30049192 903354.038109 3055
+6 R 13 T CN 30049352 712938.420808 2057
+14 T 7 R CN 40181760 16254727.926633 8254
+15 T 7 R CN 30257152 10437047.462660 6653
+7 R 4 R L 34675992 4691030.184385 3995
+7 R 5 R L 43513016 4301021.299787 4870
+7 R 6 R L 35973648 3934225.668027 3041
+7 R 0 R G 42749416 4505780.268095 5345
+7 R 32 R G 38144576 4584423.547847 5166
+7 R 14 T CN 39813520 1387873.491301 4062
+7 R 15 T CN 30049352 1037738.118919 3344
+16 T 8 R CN 40181760 15447231.594722 8402
+17 T 8 R CN 40181760 16773305.735538 9077
+8 R 9 R L 37801352 5059668.991064 4600
+8 R 10 R L 37946560 6152744.148682 4069
+8 R 11 R L 55297048 5864020.845953 7606
+8 R 15 R G 55297048 5864020.845953 7606
+8 R 19 R G 45039696 4606939.448035 4488
+8 R 16 T CN 39813520 1003951.885126 3174
+8 R 17 T CN 39813520 1200737.527034 3904
+18 T 9 R CN 30277632 15995328.015708 7099
+19 T 9 R CN 40181760 14328352.862862 8584
+9 R 8 R L 49385696 6808787.447933 6557
+9 R 10 R L 33253400 5479264.726710 3535
+9 R 11 R L 52364520 6489474.843136 7766
+9 R 22 R G 52364520 6489474.843136 7766
+9 R 26 R G 49267624 6524810.669739 6733
+9 R 18 T CN 30049392 972448.000381 3110
+9 R 19 T CN 39813520 963301.128882 3233
+20 T 10 R CN 40222720 13780387.971014 8866
+21 T 10 R CN 30257152 18712483.166522 6914
+10 R 8 R L 38678080 5240244.555583 4041
+10 R 9 R L 49890448 5799177.766866 6788
+10 R 11 R L 48753776 5150370.905425 6152
+10 R 29 R G 48753776 5150370.905425 6152
+10 R 33 R G 41425648 6389805.183540 5013
+10 R 20 T CN 39813600 1377361.309928 4429
+10 R 21 T CN 30049352 1175058.415210 3308
+22 T 11 R CN 40181760 12926592.020511 8104
+23 T 11 R CN 30257152 18523348.301746 6773
+11 R 8 R L 40560208 4846836.691540 3797
+11 R 9 R L 34124224 4763859.453014 3292
+11 R 10 R L 38605016 5277021.621061 4322
+11 R 0 R G 40478280 5086777.914766 4111
+11 R 4 R G 49943824 5905101.806403 6563
+11 R 22 T CN 39813520 1424222.517660 4265
+11 R 23 T CN 30049352 1028261.496375 3530
+24 T 12 R CN 30277632 19158269.234675 7090
+25 T 12 R CN 30257152 19270573.497551 7116
+12 R 13 R L 32047456 3750265.764293 2849
+12 R 14 R L 50178184 7309130.184921 7564
+12 R 15 R L 58034400 5843698.462804 8436
+12 R 19 R G 58034400 5843698.462804 8436
+12 R 23 R G 44575160 6092245.688980 5725
+12 R 24 T CN 30049392 806566.838442 2811
+12 R 25 T CN 30049352 1213202.131881 3583
+26 T 13 R CN 30257152 19112482.493963 7014
+27 T 13 R CN 40181760 14646355.081486 8988
+13 R 12 R L 44527192 6423030.861185 6482
+13 R 14 R L 46636984 5807093.011567 6138
+13 R 15 R L 46974000 6554687.676421 5454
+13 R 26 R G 46974000 6554687.676421 5454
+13 R 30 R G 52447448 4836595.765817 6980
+13 R 26 T CN 30049352 1125329.914129 3288
+13 R 27 T CN 39813520 1512104.949816 4844
+28 T 14 R CN 30257152 19400082.478809 7092
+29 T 14 R CN 40181760 16095202.695306 9170
+14 R 12 R L 34606080 5950705.752400 4000
+14 R 13 R L 43123232 5680350.634564 5580
+14 R 15 R L 38261328 4981625.686206 3350
+14 R 1 R G 45992208 4930271.366009 5573
+14 R 33 R G 38261328 4981625.686206 3350
+14 R 28 T CN 30049352 637814.555154 2221
+14 R 29 T CN 39813520 1249106.103622 3833
+30 T 15 R CN 40181760 15586679.540493 9411
+31 T 15 R CN 40017920 16586112.004836 9223
+15 R 12 R L 51077496 6550042.514575 7713
+15 R 13 R L 47073208 4832118.412440 5895
+15 R 14 R L 45116952 4837185.355505 4833
+15 R 4 R G 52746688 6709131.321456 8350
+15 R 8 R G 51692976 6953577.442845 7000
+15 R 30 T CN 39813520 1283130.365744 4192
+15 R 31 T CN 39813200 1457038.449892 4765
+32 T 16 R CN 30257152 19189374.516511 7113
+33 T 16 R CN 40181760 17850768.194652 7596
+16 R 17 R L 54025952 5905546.258479 7114
+16 R 18 R L 34160632 4064251.783900 3198
+16 R 19 R L 45741616 6374696.422823 6186
+16 R 23 R G 45741616 6374696.422823 6186
+16 R 27 R G 41321480 5702481.030882 5627
+16 R 32 T CN 30049352 1532449.420789 4360
+16 R 33 T CN 39813520 1199169.584479 3474
+34 T 17 R CN 40181760 15144572.054951 9081
+35 T 17 R CN 40181760 16091038.039457 9000
+17 R 16 R L 46232920 5971728.548924 6388
+17 R 18 R L 44728608 6438892.891589 6745
+17 R 19 R L 51983616 5427979.947518 6365
+17 R 30 R G 51983616 5427979.947518 6365
+17 R 34 R G 44317208 5188154.376478 5087
+17 R 34 T CN 39813520 1485890.868749 4665
+17 R 35 T CN 39813520 1130833.183301 3732
+36 T 18 R CN 40181760 15678132.783902 8490
+37 T 18 R CN 40181760 16876971.125963 8122
+18 R 16 R L 46143600 5514466.847122 6984
+18 R 17 R L 42394152 3865669.619331 4840
+18 R 19 R L 45794064 5340218.948968 5313
+18 R 1 R G 45794064 5340218.948968 5313
+18 R 5 R G 49523608 5463627.902051 6131
+18 R 36 T CN 39813520 852504.013955 2683
+18 R 37 T CN 39813520 805790.300623 2486
+38 T 19 R CN 30175232 22512609.412966 6847
+39 T 19 R CN 30257152 22714730.942416 7121
+19 R 16 R L 34957960 5526707.603444 4004
+19 R 17 R L 47349408 6034758.983252 6333
+19 R 18 R L 46919416 5608445.885587 6075
+19 R 8 R G 44365616 5296334.999713 4604
+19 R 12 R G 56495544 6082973.442639 8366
+19 R 38 T CN 30049192 1092387.377044 3270
+19 R 39 T CN 30049352 1068280.894725 3140
+40 T 20 R CN 40222720 14259163.848822 8729
+41 T 20 R CN 40017920 13573738.064970 8591
+20 R 21 R L 39728944 4697512.855911 5353
+20 R 22 R L 41903320 5045588.986343 5348
+20 R 23 R L 52146088 7359369.800037 7983
+20 R 27 R G 52146088 7359369.800037 7983
+20 R 31 R G 47184128 5131759.262767 5973
+20 R 40 T CN 39813600 883030.699000 2892
+20 R 41 T CN 39813200 784350.806184 2677
+42 T 21 R CN 40222720 15313358.299072 9214
+43 T 21 R CN 40181760 16854266.067963 8718
+21 R 20 R L 39164632 5944643.875686 5577
+21 R 22 R L 38028176 4427084.605373 4537
+21 R 23 R L 34169112 4269694.137535 2856
+21 R 2 R G 48270464 5223801.490295 5763
+21 R 34 R G 34169112 4269694.137535 2856
+21 R 42 T CN 39813600 1364767.638329 4459
+21 R 43 T CN 39813520 857971.397901 2662
+44 T 22 R CN 40181760 13784797.518310 8666
+45 T 22 R CN 30257152 22709598.612240 6328
+22 R 20 R L 41212736 5760616.725519 4330
+22 R 21 R L 44899648 5571184.216750 5490
+22 R 23 R L 48191408 6286261.674063 6674
+22 R 5 R G 48191408 6286261.674063 6674
+22 R 9 R G 51169760 5237963.175649 7226
+22 R 44 T CN 39813520 1542534.243019 4936
+22 R 45 T CN 30049352 1265047.999267 3871
+46 T 23 R CN 30257152 15178598.555845 6169
+47 T 23 R CN 40181760 16103495.317657 8422
+23 R 20 R L 40333504 6603454.297476 5211
+23 R 21 R L 36006888 4843963.251430 3295
+23 R 22 R L 40413664 5142267.725370 4994
+23 R 12 R G 45082392 5304702.183129 5115
+23 R 16 R G 50801776 5131874.483025 7138
+23 R 46 T CN 30049352 1382693.916307 4027
+23 R 47 T CN 39813520 1038551.205006 3128
+48 T 24 R CN 30257152 14099343.817718 6106
+49 T 24 R CN 30257152 13642975.324074 6045
+24 R 25 R L 39016768 3857944.974915 4288
+24 R 26 R L 34965464 5003889.506839 4262
+24 R 27 R L 47190024 5454167.775527 6871
+24 R 31 R G 47190024 5454167.775527 6871
+24 R 35 R G 41646064 5992850.984984 5408
+24 R 48 T CN 30049352 667188.968666 2499
+24 R 49 T CN 30049352 881431.111138 2993
+50 T 25 R CN 40181760 15924037.373174 9323
+51 T 25 R CN 40181760 14800606.559240 8762
+25 R 24 R L 35274088 5828284.233935 3900
+25 R 26 R L 51652152 5817462.082088 7529
+25 R 27 R L 39214576 4326894.031401 4324
+25 R 2 R G 39214576 4326894.031401 4324
+25 R 6 R G 35298432 4524630.058634 3493
+25 R 50 T CN 39813520 966485.561719 2871
+25 R 51 T CN 39813520 1404936.336504 4156
+52 T 26 R CN 30257152 18051629.450587 6976
+53 T 26 R CN 40181760 15294739.661280 9009
+26 R 24 R L 38439736 6450489.640662 5018
+26 R 25 R L 43021968 4912180.431768 4539
+26 R 27 R L 49255576 5506754.761772 6905
+26 R 9 R G 49255576 5506754.761772 6905
+26 R 13 R G 46517152 4931759.758126 5515
+26 R 52 T CN 30049352 1089113.603708 3137
+26 R 53 T CN 39813520 1068108.929478 3544
+54 T 27 R CN 40181760 15348607.758912 8357
+55 T 27 R CN 30257152 22327681.850808 6890
+27 R 24 R L 31797088 5550718.157644 3272
+27 R 25 R L 52961704 5033719.493260 5860
+27 R 26 R L 40313048 4772700.110949 4746
+27 R 16 R G 46071704 4936679.422089 6250
+27 R 20 R G 51076808 5847799.836050 6668
+27 R 54 T CN 39813520 1055478.134521 3643
+27 R 55 T CN 30049352 1389171.083011 3761
+56 T 28 R CN 40181760 14875339.838529 8758
+57 T 28 R CN 40017920 13439725.026830 8197
+28 R 29 R L 36407792 4562925.391117 4531
+28 R 30 R L 54929192 5839356.894003 8374
+28 R 31 R L 47104064 5457596.779440 5298
+28 R 3 R G 44037240 3666217.594051 4157
+28 R 35 R G 47104064 5457596.779440 5298
+28 R 56 T CN 39813520 589240.204071 1810
+28 R 57 T CN 39813200 1253920.506312 4022
+58 T 29 R CN 40181760 17388431.077442 8594
+59 T 29 R CN 40181760 15275599.714230 8708
+29 R 28 R L 36743304 4747217.925107 3404
+29 R 30 R L 39226976 4588611.200791 4500
+29 R 31 R L 43193824 5690009.346772 4949
+29 R 6 R G 43193824 5690009.346772 4949
+29 R 10 R G 46395064 5499095.428036 5989
+29 R 58 T CN 39813520 813051.646961 2322
+29 R 59 T CN 39813520 916003.412645 3024
+60 T 30 R CN 40181760 14057550.559304 8373
+61 T 30 R CN 30257152 18690662.547655 6874
+30 R 28 R L 37206752 5500248.346157 3199
+30 R 29 R L 46057680 5746289.642082 5828
+30 R 31 R L 51234608 5090602.876008 6564
+30 R 13 R G 51234608 5090602.876008 6564
+30 R 17 R G 50215328 5459724.442375 7315
+30 R 60 T CN 39813520 1135713.228615 3800
+30 R 61 T CN 30049352 1610036.399930 4445
+62 T 31 R CN 30257152 22529024.421436 6870
+63 T 31 R CN 40181760 16088278.649605 8550
+31 R 28 R L 54027816 4799689.111254 6056
+31 R 29 R L 38985584 5655226.937628 4649
+31 R 30 R L 47371424 4792407.927307 5839
+31 R 20 R G 46661320 5892242.729819 5976
+31 R 24 R G 49969336 6454067.586342 6779
+31 R 62 T CN 30049352 1152784.313173 3566
+31 R 63 T CN 39813520 803243.110496 2380
+64 T 32 R CN 40181760 13805423.148064 6702
+65 T 32 R CN 30175232 22415755.472424 6932
+32 R 33 R L 39495072 4060223.754083 4027
+32 R 34 R L 38318680 3832552.194477 3756
+32 R 35 R L 51263104 5519808.640647 7544
+32 R 3 R G 51263104 5519808.640647 7544
+32 R 7 R G 39328112 5219095.840084 5052
+32 R 64 T CN 39813520 1210251.667998 3458
+32 R 65 T CN 30049192 630422.140586 2292
+66 T 33 R CN 40181760 17791245.347997 6309
+67 T 33 R CN 30257152 15175940.132678 6274
+33 R 32 R L 55929688 5194979.684028 5947
+33 R 34 R L 32785920 4097967.225898 2747
+33 R 35 R L 39697960 5994401.587308 5430
+33 R 10 R G 39697960 5994401.587308 5430
+33 R 14 R G 38884784 4379182.216837 4004
+33 R 66 T CN 39813520 1107798.113758 3239
+33 R 67 T CN 30049352 811674.087607 2529
+68 T 34 R CN 30257152 11926841.048741 6879
+69 T 34 R CN 30257152 18609998.026356 6837
+34 R 32 R L 39777456 5046916.308178 5315
+34 R 33 R L 34270784 5365204.443651 4172
+34 R 35 R L 42249888 4560827.376285 5055
+34 R 17 R G 42249888 4560827.376285 5055
+34 R 21 R G 36831104 4824175.442723 3634
+34 R 68 T CN 30049352 1078687.231229 3249
+34 R 69 T CN 30049352 739703.645466 2337
+70 T 35 R CN 30277632 18582548.588579 6780
+71 T 35 R CN 30257152 14445475.496982 6863
+35 R 32 R L 39508896 5032306.409923 6202
+35 R 33 R L 34469632 5572253.621625 4066
+35 R 34 R L 33126928 4824327.757457 3462
+35 R 24 R G 41519936 4246448.961771 4535
+35 R 28 R G 43743104 4729620.821349 5151
+35 R 70 T CN 30049392 447139.490875 1640
+35 R 71 T CN 30049352 1020380.529942 3401
\ No newline at end of file
diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-all b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-all
new file mode 100644
index 00000000..b2df970f
--- /dev/null
+++ b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-all
@@ -0,0 +1,72 @@
+lp:2	send_count:9770	send_bytes:39813200	send_time:7583466.666666	recv_count:9770	recv_bytes:39813200	recv_time:352766743.133932	max_event_size:792
+lp:3	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:290194557.627888	max_event_size:792
+lp:7	send_count:7392	send_bytes:30000240	send_time:5714331.428571	recv_count:7392	recv_bytes:30000240	recv_time:460660441.899865	max_event_size:792
+lp:8	send_count:7367	send_bytes:30000040	send_time:5714293.333333	recv_count:7367	recv_bytes:30000040	recv_time:301501694.772019	max_event_size:792
+lp:12	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:343292157.501219	max_event_size:792
+lp:13	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:255851734.723889	max_event_size:792
+lp:17	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:330699749.483186	max_event_size:792
+lp:18	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:379598594.313395	max_event_size:792
+lp:22	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:417481264.821944	max_event_size:792
+lp:23	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:463516326.154559	max_event_size:792
+lp:27	send_count:9820	send_bytes:39813600	send_time:7583542.857142	recv_count:9820	recv_bytes:39813600	recv_time:375187965.536660	max_event_size:792
+lp:28	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:426144648.397676	max_event_size:792
+lp:32	send_count:7367	send_bytes:30000040	send_time:5714293.333333	recv_count:7367	recv_bytes:30000040	recv_time:354505543.112376	max_event_size:792
+lp:33	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:359959185.432986	max_event_size:792
+lp:37	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:416754479.974724	max_event_size:792
+lp:38	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:412627423.811912	max_event_size:792
+lp:42	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:420355037.806001	max_event_size:792
+lp:43	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:376096618.471747	max_event_size:792
+lp:47	send_count:7392	send_bytes:30000240	send_time:5714331.428571	recv_count:7392	recv_bytes:30000240	recv_time:313224399.141498	max_event_size:792
+lp:48	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:396555601.315165	max_event_size:792
+lp:52	send_count:9820	send_bytes:39813600	send_time:7583542.857142	recv_count:9820	recv_bytes:39813600	recv_time:427372522.534280	max_event_size:792
+lp:53	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:328550335.514686	max_event_size:792
+lp:57	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:456515775.394763	max_event_size:792
+lp:58	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:247548705.168367	max_event_size:792
+lp:62	send_count:7392	send_bytes:30000240	send_time:5714331.428571	recv_count:7392	recv_bytes:30000240	recv_time:271836197.869304	max_event_size:792
+lp:63	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:303306215.174348	max_event_size:792
+lp:67	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:300015958.614822	max_event_size:792
+lp:68	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:411979610.984809	max_event_size:792
+lp:72	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:231372703.409270	max_event_size:792
+lp:73	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:323329470.014433	max_event_size:792
+lp:77	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:367165238.584887	max_event_size:792
+lp:78	send_count:9770	send_bytes:39813200	send_time:7583466.666666	recv_count:9770	recv_bytes:39813200	recv_time:409000712.088329	max_event_size:792
+lp:82	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:332865289.311507	max_event_size:792
+lp:83	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:484565584.355521	max_event_size:792
+lp:87	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:432419296.580878	max_event_size:792
+lp:88	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:382314536.175355	max_event_size:792
+lp:92	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:386772466.832815	max_event_size:792
+lp:93	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:343301557.045121	max_event_size:792
+lp:97	send_count:7367	send_bytes:30000040	send_time:5714293.333333	recv_count:7367	recv_bytes:30000040	recv_time:264514236.096660	max_event_size:792
+lp:98	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:313557345.269660	max_event_size:792
+lp:102	send_count:9820	send_bytes:39813600	send_time:7583542.857142	recv_count:9820	recv_bytes:39813600	recv_time:340629834.844444	max_event_size:792
+lp:103	send_count:9770	send_bytes:39813200	send_time:7583466.666666	recv_count:9770	recv_bytes:39813200	recv_time:372973290.936664	max_event_size:792
+lp:107	send_count:9820	send_bytes:39813600	send_time:7583542.857142	recv_count:9820	recv_bytes:39813600	recv_time:437304384.944908	max_event_size:792
+lp:108	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:377859452.518662	max_event_size:792
+lp:112	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:471587827.861975	max_event_size:792
+lp:113	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:283376963.287888	max_event_size:792
+lp:117	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:347661284.230018	max_event_size:792
+lp:118	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:318512073.696441	max_event_size:792
+lp:122	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:256795353.934300	max_event_size:792
+lp:123	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:237701963.756680	max_event_size:792
+lp:127	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:353573738.286471	max_event_size:792
+lp:128	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:465258641.273453	max_event_size:792
+lp:132	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:374138273.240924	max_event_size:792
+lp:133	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:428587201.804590	max_event_size:792
+lp:137	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:404905020.644931	max_event_size:792
+lp:138	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:350589038.917737	max_event_size:792
+lp:142	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:297762961.526255	max_event_size:792
+lp:143	send_count:9770	send_bytes:39813200	send_time:7583466.666666	recv_count:9770	recv_bytes:39813200	recv_time:410133909.716090	max_event_size:792
+lp:147	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:319808170.413551	max_event_size:792
+lp:148	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:341829101.902517	max_event_size:792
+lp:152	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:380910163.193983	max_event_size:792
+lp:153	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:283849636.026727	max_event_size:792
+lp:157	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:274524505.335948	max_event_size:792
+lp:158	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:315657639.722941	max_event_size:792
+lp:162	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:356174453.289787	max_event_size:792
+lp:163	send_count:7367	send_bytes:30000040	send_time:5714293.333333	recv_count:7367	recv_bytes:30000040	recv_time:289274523.506280	max_event_size:792
+lp:167	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:351044401.571382	max_event_size:792
+lp:168	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:259385936.957835	max_event_size:792
+lp:172	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:341719498.237854	max_event_size:792
+lp:173	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:307179521.253780	max_event_size:792
+lp:177	send_count:7392	send_bytes:30000240	send_time:5714331.428571	recv_count:7392	recv_bytes:30000240	recv_time:228528983.462069	max_event_size:792
+lp:178	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:267558998.807704	max_event_size:792
diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-high b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-high
new file mode 100644
index 00000000..b2df970f
--- /dev/null
+++ b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-high
@@ -0,0 +1,72 @@
+lp:2	send_count:9770	send_bytes:39813200	send_time:7583466.666666	recv_count:9770	recv_bytes:39813200	recv_time:352766743.133932	max_event_size:792
+lp:3	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:290194557.627888	max_event_size:792
+lp:7	send_count:7392	send_bytes:30000240	send_time:5714331.428571	recv_count:7392	recv_bytes:30000240	recv_time:460660441.899865	max_event_size:792
+lp:8	send_count:7367	send_bytes:30000040	send_time:5714293.333333	recv_count:7367	recv_bytes:30000040	recv_time:301501694.772019	max_event_size:792
+lp:12	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:343292157.501219	max_event_size:792
+lp:13	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:255851734.723889	max_event_size:792
+lp:17	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:330699749.483186	max_event_size:792
+lp:18	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:379598594.313395	max_event_size:792
+lp:22	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:417481264.821944	max_event_size:792
+lp:23	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:463516326.154559	max_event_size:792
+lp:27	send_count:9820	send_bytes:39813600	send_time:7583542.857142	recv_count:9820	recv_bytes:39813600	recv_time:375187965.536660	max_event_size:792
+lp:28	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:426144648.397676	max_event_size:792
+lp:32	send_count:7367	send_bytes:30000040	send_time:5714293.333333	recv_count:7367	recv_bytes:30000040	recv_time:354505543.112376	max_event_size:792
+lp:33	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:359959185.432986	max_event_size:792
+lp:37	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:416754479.974724	max_event_size:792
+lp:38	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:412627423.811912	max_event_size:792
+lp:42	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:420355037.806001	max_event_size:792
+lp:43	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:376096618.471747	max_event_size:792
+lp:47	send_count:7392	send_bytes:30000240	send_time:5714331.428571	recv_count:7392	recv_bytes:30000240	recv_time:313224399.141498	max_event_size:792
+lp:48	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:396555601.315165	max_event_size:792
+lp:52	send_count:9820	send_bytes:39813600	send_time:7583542.857142	recv_count:9820	recv_bytes:39813600	recv_time:427372522.534280	max_event_size:792
+lp:53	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:328550335.514686	max_event_size:792
+lp:57	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:456515775.394763	max_event_size:792
+lp:58	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:247548705.168367	max_event_size:792
+lp:62	send_count:7392	send_bytes:30000240	send_time:5714331.428571	recv_count:7392	recv_bytes:30000240	recv_time:271836197.869304	max_event_size:792
+lp:63	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:303306215.174348	max_event_size:792
+lp:67	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:300015958.614822	max_event_size:792
+lp:68	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:411979610.984809	max_event_size:792
+lp:72	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:231372703.409270	max_event_size:792
+lp:73	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:323329470.014433	max_event_size:792
+lp:77	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:367165238.584887	max_event_size:792
+lp:78	send_count:9770	send_bytes:39813200	send_time:7583466.666666	recv_count:9770	recv_bytes:39813200	recv_time:409000712.088329	max_event_size:792
+lp:82	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:332865289.311507	max_event_size:792
+lp:83	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:484565584.355521	max_event_size:792
+lp:87	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:432419296.580878	max_event_size:792
+lp:88	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:382314536.175355	max_event_size:792
+lp:92	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:386772466.832815	max_event_size:792
+lp:93	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:343301557.045121	max_event_size:792
+lp:97	send_count:7367	send_bytes:30000040	send_time:5714293.333333	recv_count:7367	recv_bytes:30000040	recv_time:264514236.096660	max_event_size:792
+lp:98	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:313557345.269660	max_event_size:792
+lp:102	send_count:9820	send_bytes:39813600	send_time:7583542.857142	recv_count:9820	recv_bytes:39813600	recv_time:340629834.844444	max_event_size:792
+lp:103	send_count:9770	send_bytes:39813200	send_time:7583466.666666	recv_count:9770	recv_bytes:39813200	recv_time:372973290.936664	max_event_size:792
+lp:107	send_count:9820	send_bytes:39813600	send_time:7583542.857142	recv_count:9820	recv_bytes:39813600	recv_time:437304384.944908	max_event_size:792
+lp:108	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:377859452.518662	max_event_size:792
+lp:112	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:471587827.861975	max_event_size:792
+lp:113	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:283376963.287888	max_event_size:792
+lp:117	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:347661284.230018	max_event_size:792
+lp:118	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:318512073.696441	max_event_size:792
+lp:122	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:256795353.934300	max_event_size:792
+lp:123	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:237701963.756680	max_event_size:792
+lp:127	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:353573738.286471	max_event_size:792
+lp:128	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:465258641.273453	max_event_size:792
+lp:132	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:374138273.240924	max_event_size:792
+lp:133	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:428587201.804590	max_event_size:792
+lp:137	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:404905020.644931	max_event_size:792
+lp:138	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:350589038.917737	max_event_size:792
+lp:142	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:297762961.526255	max_event_size:792
+lp:143	send_count:9770	send_bytes:39813200	send_time:7583466.666666	recv_count:9770	recv_bytes:39813200	recv_time:410133909.716090	max_event_size:792
+lp:147	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:319808170.413551	max_event_size:792
+lp:148	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:341829101.902517	max_event_size:792
+lp:152	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:380910163.193983	max_event_size:792
+lp:153	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:283849636.026727	max_event_size:792
+lp:157	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:274524505.335948	max_event_size:792
+lp:158	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:315657639.722941	max_event_size:792
+lp:162	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:356174453.289787	max_event_size:792
+lp:163	send_count:7367	send_bytes:30000040	send_time:5714293.333333	recv_count:7367	recv_bytes:30000040	recv_time:289274523.506280	max_event_size:792
+lp:167	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:351044401.571382	max_event_size:792
+lp:168	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:259385936.957835	max_event_size:792
+lp:172	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:341719498.237854	max_event_size:792
+lp:173	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:307179521.253780	max_event_size:792
+lp:177	send_count:7392	send_bytes:30000240	send_time:5714331.428571	recv_count:7392	recv_bytes:30000240	recv_time:228528983.462069	max_event_size:792
+lp:178	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:267558998.807704	max_event_size:792
diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/mpi-replay-stats b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/mpi-replay-stats
new file mode 100644
index 00000000..fed80ff6
--- /dev/null
+++ b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/mpi-replay-stats
@@ -0,0 +1,73 @@
+# Format <LP ID> <Terminal ID> <Job ID> <Local Rank> <Total sends> <Total Recvs> <Bytes sent> <Bytes recvd> <Send time> <Comm. time> <Compute time> <Avg msg time> <Max Msg Time>
+ 0 0 1 0 90 90 39813200 39813200 89237007.320510 21119347.570794 250.000000 0.000000 0.000000
+ 1 1 0 19 67 67 30000200 30000200 56475874.301444 17852333.411932 10000000.000000 0.000000 0.000000
+ 5 2 0 3 72 72 30000240 30000240 62621556.489293 17852174.837306 10000000.000000 0.000000 0.000000
+ 6 3 0 6 47 47 30000040 30000040 53717217.531453 17853913.164114 10000000.000000 0.000000 0.000000
+ 10 4 1 10 130 130 39813520 39813520 82212308.371531 21068601.547571 250.000000 0.000000 0.000000
+ 11 5 0 23 67 67 30000200 30000200 56605447.229800 17856451.510284 10000000.000000 0.000000 0.000000
+ 15 6 0 18 67 67 30000200 30000200 59377020.615150 17849212.584262 10000000.000000 0.000000 0.000000
+ 16 7 0 28 67 67 30000200 30000200 56611204.332201 17853245.667187 10000000.000000 0.000000 0.000000
+ 20 8 0 14 67 67 30000200 30000200 58046006.712762 17853330.657890 10000000.000000 0.000000 0.000000
+ 21 9 0 27 67 67 30000200 30000200 54666576.648049 17857362.848268 10000000.000000 0.000000 0.000000
+ 25 10 1 3 140 140 39813600 39813600 79806532.629068 21115366.245572 250.000000 0.000000 0.000000
+ 26 11 0 9 67 67 30000200 30000200 55190353.587108 17851269.424881 10000000.000000 0.000000 0.000000
+ 30 12 0 0 47 47 30000040 30000040 56355045.144427 17849847.891871 10000000.000000 0.000000 0.000000
+ 31 13 0 34 67 67 30000200 30000200 57319838.732274 17853331.023495 10000000.000000 0.000000 0.000000
+ 35 14 1 20 130 130 39813520 39813520 79825582.529869 21113723.869383 250.000000 0.000000 0.000000
+ 36 15 0 30 67 67 30000200 30000200 50539798.306285 17855710.850404 10000000.000000 0.000000 0.000000
+ 40 16 1 23 130 130 39813520 39813520 96570614.385612 21173404.388469 250.000000 0.000000 0.000000
+ 41 17 1 32 130 130 39813520 39813520 104174353.511400 21127185.068113 250.000000 0.000000 0.000000
+ 45 18 0 5 72 72 30000240 30000240 60746833.137984 17855387.498509 10000000.000000 0.000000 0.000000
+ 46 19 1 28 130 130 39813520 39813520 90532100.834860 21113902.678420 250.000000 0.000000 0.000000
+ 50 20 1 5 140 140 39813600 39813600 93215021.859260 21107999.546402 250.000000 0.000000 0.000000
+ 51 21 0 13 67 67 30000200 30000200 62524142.281874 17854071.877755 10000000.000000 0.000000 0.000000
+ 55 22 1 11 130 130 39813520 39813520 87108820.845512 21253403.349942 250.000000 0.000000 0.000000
+ 56 23 0 26 67 67 30000200 30000200 58124303.599644 17850680.445220 10000000.000000 0.000000 0.000000
+ 60 24 0 1 72 72 30000240 30000240 73825732.655678 17853237.674558 10000000.000000 0.000000 0.000000
+ 61 25 0 35 67 67 30000200 30000200 74632319.393684 17856451.858593 10000000.000000 0.000000 0.000000
+ 65 26 0 29 67 67 30000200 30000200 60973047.224376 17856451.550016 10000000.000000 0.000000 0.000000
+ 66 27 1 30 130 130 39813520 39813520 97183327.200930 21102174.565586 250.000000 0.000000 0.000000
+ 70 28 0 33 67 67 30000200 30000200 69666737.099805 17855617.501467 10000000.000000 0.000000 0.000000
+ 71 29 1 33 130 130 39813520 39813520 100178218.078153 21238162.751729 250.000000 0.000000 0.000000
+ 75 30 1 27 130 130 39813520 39813520 98877667.184731 21256700.657271 250.000000 0.000000 0.000000
+ 76 31 1 2 90 90 39813200 39813200 100942318.305743 21155764.515093 250.000000 0.000000 0.000000
+ 80 32 0 22 67 67 30000200 30000200 65591630.997276 17854157.642763 10000000.000000 0.000000 0.000000
+ 81 33 1 12 130 130 39813520 39813520 78974801.715336 21109777.342929 250.000000 0.000000 0.000000
+ 85 34 1 31 130 130 39813520 39813520 100689518.650071 21192061.784327 250.000000 0.000000 0.000000
+ 86 35 1 21 130 130 39813520 39813520 101556407.296841 21115137.760280 250.000000 0.000000 0.000000
+ 90 36 1 26 130 130 39813520 39813520 87181078.575814 21143229.925367 250.000000 0.000000 0.000000
+ 91 37 1 14 130 130 39813520 39813520 91281082.849771 21097362.783538 250.000000 0.000000 0.000000
+ 95 38 0 2 47 47 30000040 30000040 63940848.873793 17847374.542274 10000000.000000 0.000000 0.000000
+ 96 39 0 8 67 67 30000200 30000200 71690442.429131 17849121.063364 10000000.000000 0.000000 0.000000
+ 100 40 1 7 140 140 39813600 39813600 88967018.559046 21161719.568495 250.000000 0.000000 0.000000
+ 101 41 1 6 90 90 39813200 39813200 85527779.462703 21163275.624817 250.000000 0.000000 0.000000
+ 105 42 1 1 140 140 39813600 39813600 100457403.678861 21110006.073760 250.000000 0.000000 0.000000
+ 106 43 1 17 130 130 39813520 39813520 99253629.669866 21189028.967585 250.000000 0.000000 0.000000
+ 110 44 1 22 130 130 39813520 39813520 88807135.248057 21070787.824921 250.000000 0.000000 0.000000
+ 111 45 0 15 67 67 30000200 30000200 55657802.114682 17856451.485560 10000000.000000 0.000000 0.000000
+ 115 46 0 25 67 67 30000200 30000200 57569424.228786 17853238.309627 10000000.000000 0.000000 0.000000
+ 116 47 1 16 130 130 39813520 39813520 94714665.003806 21118593.796807 250.000000 0.000000 0.000000
+ 120 48 0 12 67 67 30000200 30000200 49329790.727020 17848300.919883 10000000.000000 0.000000 0.000000
+ 121 49 0 20 67 67 30000200 30000200 49076027.353583 17848300.513379 10000000.000000 0.000000 0.000000
+ 125 50 1 19 130 130 39813520 39813520 92110607.580828 21257528.308085 250.000000 0.000000 0.000000
+ 126 51 1 29 130 130 39813520 39813520 94697768.477917 21128810.332614 250.000000 0.000000 0.000000
+ 130 52 0 32 67 67 30000200 30000200 52119612.770477 17853139.356496 10000000.000000 0.000000 0.000000
+ 131 53 1 34 130 130 39813520 39813520 87422795.515014 21154612.434599 250.000000 0.000000 0.000000
+ 135 54 1 18 130 130 39813520 39813520 78422965.566418 21073020.434406 250.000000 0.000000 0.000000
+ 136 55 0 17 67 67 30000200 30000200 53390682.328147 17853238.062964 10000000.000000 0.000000 0.000000
+ 140 56 1 25 130 130 39813520 39813520 89493603.447990 21189561.925589 250.000000 0.000000 0.000000
+ 141 57 1 4 90 90 39813200 39813200 85436564.237106 21111296.853831 250.000000 0.000000 0.000000
+ 145 58 1 13 130 130 39813520 39813520 96802622.211033 21115137.315974 250.000000 0.000000 0.000000
+ 146 59 1 15 130 130 39813520 39813520 96583487.482583 21161490.594629 250.000000 0.000000 0.000000
+ 150 60 1 35 130 130 39813520 39813520 90266817.624509 21356588.612738 250.000000 0.000000 0.000000
+ 151 61 0 24 67 67 30000200 30000200 58285049.554882 17852412.897185 10000000.000000 0.000000 0.000000
+ 155 62 0 31 67 67 30000200 30000200 66320862.530225 17860569.932221 10000000.000000 0.000000 0.000000
+ 156 63 1 24 130 130 39813520 39813520 98077004.277963 21119191.871021 250.000000 0.000000 0.000000
+ 160 64 1 9 130 130 39813520 39813520 71635687.154524 21185283.014578 250.000000 0.000000 0.000000
+ 161 65 0 4 47 47 30000040 30000040 55102180.614745 17851269.485238 10000000.000000 0.000000 0.000000
+ 165 66 1 8 130 130 39813520 39813520 68646777.969893 21113251.882987 250.000000 0.000000 0.000000
+ 166 67 0 11 67 67 30000200 30000200 51878884.318374 17855809.464314 10000000.000000 0.000000 0.000000
+ 170 68 0 21 67 67 30000200 30000200 59776460.886694 17852418.990265 10000000.000000 0.000000 0.000000
+ 171 69 0 10 67 67 30000200 30000200 57589326.703996 17849795.090486 10000000.000000 0.000000 0.000000
+ 175 70 0 7 72 72 30000240 30000240 54267360.186473 17850595.235628 10000000.000000 0.000000 0.000000
+ 176 71 0 16 67 67 30000200 30000200 56663635.212701 17849021.295227 10000000.000000 0.000000 0.000000
\ No newline at end of file
diff --git a/tmptest/expected/tmptest-jacobiS_MILC.output b/tmptest/expected/tmptest-jacobiS_MILC.output
new file mode 100644
index 00000000..b8fb808a
--- /dev/null
+++ b/tmptest/expected/tmptest-jacobiS_MILC.output
@@ -0,0 +1,240 @@
+/home/ac.xwang/install/codes-new/bin/model-net-mpi-replay --sync=1 --workload_type=conc-online --lp-io-use-suffix=1 --workload_conf_file=/home/ac.xwang/tools/codes-new/tmptest/conf/jacobi_MILC.conf --alloc_file=/home/ac.xwang/tools/codes-new/tmptest/conf/rand_node0-1d-72-jacobi_MILC.conf --lp-io-dir=tmptest-jacobiS_MILC -- /home/ac.xwang/tools/codes-new/tmptest/conf/dfdally-72-par.conf 
+
+Thu Aug 31 11:16:21 2023
+
+ROSS Version: v8.0.0-dirty
+
+tw_net_start: Found world size to be 1 
+
+ num_net_traces 72; num_dumpi_traces 72NIC num injection port not specified, setting to 1
+NIC seq delay not specified, setting to 10.000000
+NIC num copy queues not specified, setting to 1
+Dragonfly rail selection is 3
+within node transfer per byte delay is 0.190476
+
+ROSS Core Configuration: 
+	Total PEs                                                    1
+	Total KPs                                          [Nodes (1) x KPs (16)] 16
+	Total LPs                                                  180
+	Simulation End Time                                3600000000000.00
+	LP-to-PE Mapping                                   model defined
+
+
+ROSS Event Memory Allocation:
+	Model events                                             46081
+	Network events                                              16
+	Total events                                             46096
+
+*** START SEQUENTIAL SIMULATION ***
+
+Jacobi3D: Running Jacobi on 36 processors with (4, 3, 3) elements
+Jacobi3D: Array Dimensions: 400 300 300
+Jacobi3D: Block Dimensions: 100 100 100
+Set num_servers per router 2, servers per injection queue per router 2, servers per node copy queue per node 1, num nics 2
+
+ Network node 10 Rank 4 App 1 finished at 21068851.547571 
+ Network node 22 Rank 44 App 1 finished at 21071037.824921 
+ Network node 18 Rank 54 App 1 finished at 21073270.434406 
+ Network node 14 Rank 37 App 1 finished at 21097612.783538 
+ Network node 30 Rank 27 App 1 finished at 21102424.565586 
+ Network node 5 Rank 20 App 1 finished at 21108249.546402 
+ Network node 12 Rank 33 App 1 finished at 21110027.342929 
+ Network node 1 Rank 42 App 1 finished at 21110256.073760 
+ Network node 4 Rank 57 App 1 finished at 21111546.853831 
+ Network node 8 Rank 66 App 1 finished at 21113501.882987 
+ Network node 20 Rank 14 App 1 finished at 21113973.869383 
+ Network node 28 Rank 19 App 1 finished at 21114152.678420 
+ Network node 13 Rank 58 App 1 finished at 21115387.315974 
+ Network node 21 Rank 35 App 1 finished at 21115387.760280 
+ Network node 3 Rank 10 App 1 finished at 21115616.245572 
+ Network node 16 Rank 47 App 1 finished at 21118843.796807 
+ Network node 24 Rank 63 App 1 finished at 21119441.871021 
+ Network node 0 Rank 0 App 1 finished at 21119597.570794 
+ Network node 32 Rank 17 App 1 finished at 21127435.068113 
+ Network node 29 Rank 51 App 1 finished at 21129060.332614 
+ Network node 26 Rank 36 App 1 finished at 21143479.925367 
+ Network node 34 Rank 53 App 1 finished at 21154862.434599 
+ Network node 2 Rank 31 App 1 finished at 21156014.515093 
+ Network node 15 Rank 59 App 1 finished at 21161740.594629 
+ Network node 7 Rank 40 App 1 finished at 21161969.568495 
+ Network node 6 Rank 41 App 1 finished at 21163525.624817 
+ Network node 23 Rank 16 App 1 finished at 21173654.388469 
+ Network node 9 Rank 64 App 1 finished at 21185533.014578 
+ Network node 17 Rank 43 App 1 finished at 21189278.967585 
+ Network node 25 Rank 56 App 1 finished at 21189811.925589 
+ Network node 31 Rank 34 App 1 finished at 21192311.784327 
+ Network node 33 Rank 29 App 1 finished at 21238412.751729 
+ Network node 11 Rank 22 App 1 finished at 21253653.349942 
+ Network node 27 Rank 30 App 1 finished at 21256950.657271 
+ Network node 19 Rank 50 App 1 finished at 21257778.308085 
+ Network node 35 Rank 60 App 1 finished at 21356838.612738 App 0: Received finished workload notificationThere is still a nonsynethic workload left. 1 != 2
+
+ Network node 2 Rank 38 App 0 finished at 27847374.542274 
+ Network node 20 Rank 49 App 0 finished at 27848300.513379 
+ Network node 12 Rank 48 App 0 finished at 27848300.919883 
+ Network node 16 Rank 71 App 0 finished at 27849021.295227 
+ Network node 8 Rank 39 App 0 finished at 27849121.063364 
+ Network node 18 Rank 6 App 0 finished at 27849212.584262 
+ Network node 10 Rank 69 App 0 finished at 27849795.090486 
+ Network node 0 Rank 12 App 0 finished at 27849847.891871 
+ Network node 7 Rank 70 App 0 finished at 27850595.235628 
+ Network node 26 Rank 23 App 0 finished at 27850680.445220 
+ Network node 9 Rank 11 App 0 finished at 27851269.424881 
+ Network node 4 Rank 65 App 0 finished at 27851269.485238 
+ Network node 3 Rank 2 App 0 finished at 27852174.837306 
+ Network node 19 Rank 1 App 0 finished at 27852333.411932 
+ Network node 24 Rank 61 App 0 finished at 27852412.897185 
+ Network node 21 Rank 68 App 0 finished at 27852418.990265 
+ Network node 32 Rank 52 App 0 finished at 27853139.356496 
+ Network node 1 Rank 24 App 0 finished at 27853237.674558 
+ Network node 17 Rank 55 App 0 finished at 27853238.062964 
+ Network node 25 Rank 46 App 0 finished at 27853238.309627 
+ Network node 28 Rank 7 App 0 finished at 27853245.667187 
+ Network node 14 Rank 8 App 0 finished at 27853330.657890 
+ Network node 34 Rank 13 App 0 finished at 27853331.023495 
+ Network node 6 Rank 3 App 0 finished at 27853913.164114 
+ Network node 13 Rank 21 App 0 finished at 27854071.877755 
+ Network node 22 Rank 32 App 0 finished at 27854157.642763 
+ Network node 5 Rank 18 App 0 finished at 27855387.498509 
+ Network node 33 Rank 28 App 0 finished at 27855617.501467 
+ Network node 30 Rank 15 App 0 finished at 27855710.850404 
+ Network node 11 Rank 67 App 0 finished at 27855809.464314 
+ Network node 15 Rank 45 App 0 finished at 27856451.485560 
+ Network node 23 Rank 5 App 0 finished at 27856451.510284 
+ Network node 29 Rank 26 App 0 finished at 27856451.550016 
+ Network node 35 Rank 25 App 0 finished at 27856451.858593 
+ Network node 27 Rank 9 App 0 finished at 27857362.848268 
+ Network node 31 Rank 62 App 0 finished at 27860569.932221 App 0: Received finished workload notificationApp 0: All non-synthetic workloads have completed
+*** END SIMULATION ***
+
+
+	: Running Time = 35.7901 seconds
+
+TW Library Statistics:
+	Total Events Processed                                11315021
+	Events Aborted (part of RBs)                                 0
+	Events Rolled Back                                           0
+	Event Ties Detected in PE Queues                             0
+	Efficiency                                              100.00 %
+	Total Remote (shared mem) Events Processed                   0
+	Percent Remote Events                                     0.00 %
+	Total Remote (network) Events Processed                      0
+	Percent Remote Events                                     0.00 %
+
+	Total Roll Backs                                             0
+	Primary Roll Backs                                           0
+	Secondary Roll Backs                                         0
+	Fossil Collect Attempts                                      0
+	Total GVT Computations                                       0
+
+	Net Events Processed                                  11315021
+	Event Rate (events/sec)                               316149.8
+	Total Events Scheduled Past End Time                         0
+
+TW Memory Statistics:
+	Events Allocated                                         46097
+	Memory Allocated                                         80000
+	Memory Wasted                                              101
+
+TW Data Structure sizes in bytes (sizeof):
+	PE struct                                                 3888
+	KP struct                                                  960
+	LP struct                                                  960
+	LP Model struct                                             96
+	LP RNGs                                                     80
+	Total LP                                                  1136
+	Event struct                                               976
+	Event struct with Model                                   1768
+
+TW Clock Cycle Statistics (MAX values in secs at 1.0000 GHz):
+	Initialization                                          0.3151
+	Priority Queue (enq/deq)                                5.0614
+	AVL Tree (insert/delete)                                0.0000
+	LZ4 (de)compression                                     0.0000
+	Buddy system                                            0.0000
+	Event Processing                                        0.0000
+	Event Cancel                                            0.0000
+	Event Abort                                             0.0000
+
+	GVT                                                     0.0000
+	Fossil Collect                                          0.0000
+	Primary Rollbacks                                       0.0000
+	Network Read                                            0.0000
+	Other Network                                           0.0000
+	Instrumentation (computation)                           0.0000
+	Instrumentation (write)                                 0.0000
+	Total Time (Note: Using Running Time above for Speedup)     74.9855
+
+TW GVT Statistics: MPI AllReduce
+	GVT Interval                                                16
+	GVT Real Time Interval (cycles)                    0
+	GVT Real Time Interval (sec)                        0.00000000
+	Batch Size                                                  16
+
+	Forced GVT                                                   0
+	Total GVT Computations                                       0
+	Total All Reduce Calls                                       0
+	Average Reduction / GVT                                   -nan
+
+ Total bytes sent 2513292480 recvd 2513292480 
+ max runtime 27860569.932221 ns avg runtime 24501344.282682 
+ max comm time 21356588.612738 avg comm time 19501219.282682 
+ max send time 104174353.511400 avg send time 74765835.724547 
+ max recv time 100173347.933146 avg recv time 74419141.320531 
+ max wait time 19215338.524462 avg wait time 16456309.234173 
+
+----------
+Per App Max Elapsed Times:
+	App 0: 27860569.9322
+	App 1: 21356838.6127
+----------
+LP-IO: writing output to tmptest-jacobiS_MILC-25331-1693498581/
+LP-IO: data files:
+   tmptest-jacobiS_MILC-25331-1693498581/dragonfly-cn-stats
+   tmptest-jacobiS_MILC-25331-1693498581/dragonfly-link-stats
+   tmptest-jacobiS_MILC-25331-1693498581/model-net-category-all
+   tmptest-jacobiS_MILC-25331-1693498581/model-net-category-high
+   tmptest-jacobiS_MILC-25331-1693498581/avg-all-reduce-time
+   tmptest-jacobiS_MILC-25331-1693498581/mpi-replay-stats
+
+------------------ Dragonfly Dally Parameters ---------
+	num_routers =            4
+	local_bandwidth =        5.25
+	global_bandwidth =       4.70
+	cn_bandwidth =           5.25
+	num_vcs =                4
+	num_qos_levels =         1
+	local_vc_size =          16384
+	global_vc_size =         16384
+	cn_vc_size =             32768
+	chunk_size =             4096
+	num_cn =                 2
+	cn_radix =               2
+	intra_grp_radix =        3
+	num_groups =             9
+	total_groups =           9
+	virtual radix =          7
+	total_routers =          36
+	total_terminals =        72
+	num_global_channels =    2
+	num_injection_queues =   1
+	num_rails =              1
+	num_planes =             1
+	cn_delay =               726.61
+	local_delay =            726.61
+	global_delay =           811.64
+	local credit_delay =     1.42
+	global credit_delay =    1.42
+	cn credit_delay =        1.42
+	router_delay =           100.00
+	routing =                PROG_ADAPTIVE
+	adaptive_threshold =     0
+	max hops notification =  2147483647
+------------------------------------------------------
+
+
+Average number of hops traversed 4.101973 average chunk latency 40.897694 us maximum chunk latency 732.304909 us avg message size 363612.937500 bytes finished messages 6912 finished chunks 618912
+
+ADAPTIVE ROUTING STATS: 349554 chunks routed minimally 269358 chunks routed non-minimally completed packets 618912 
+
+Total packets generated 618912 finished 618912 Locally routed- same router 14696 different-router 56472 Remote (inter-group) 547744 

From 3518083e0a82bcb3ecc694cf2367e79e77e1f881 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 1 Sep 2023 10:48:00 -0500
Subject: [PATCH 040/188] Updating to allow Union to run alongside surrogate

---
 CMakeLists.txt                                |  37 +++-
 codes_config.h.cmake.in                       |  20 ++
 codes_config.h.in                             | 173 ------------------
 .../tutorial-ping-pong-surrogate.conf.in      |   2 +-
 doc/example/tutorial-ping-pong.conf.in        |   2 +-
 src/CMakeLists.txt                            |  18 +-
 .../methods/codes-conc-online-comm-wrkld.C    |  33 ++--
 .../methods/codes-online-comm-wrkld.C         |   2 +-
 tests/conf/modelnet-p2p-bw-loggp.conf         |   2 +-
 tests/conf/modelnet-prio-sched-test.conf      |   2 +-
 tests/conf/modelnet-test-dragonfly.conf       |   2 +-
 tests/conf/modelnet-test-loggp.conf           |   2 +-
 tests/conf/modelnet-test-simplep2p.conf       |   2 +-
 tests/conf/modelnet-test-slimfly.conf         |   2 +-
 tests/conf/modelnet-test-torus.conf           |   2 +-
 tests/conf/modelnet-test.conf                 |   2 +-
 16 files changed, 94 insertions(+), 209 deletions(-)
 create mode 100644 codes_config.h.cmake.in
 delete mode 100644 codes_config.h.in

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 26936630..fd4f4e56 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,12 +22,13 @@ SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
 
 set(ROSS_PKG_CONFIG_PATH "" CACHE PATH "Where is ROSS PKG_CONFIG is installed?")
 set(SWM_PKG_CONFIG_PATH "" CACHE PATH "Where is the SWM PKG_CONFIG installed?")
+set(UNION_PKG_CONFIG_PATH "" CACHE PATH "Where is the Union PKG_CONFIG installed?")
 set(ARGOBOTS_PKG_CONFIG_PATH "" CACHE PATH "Where is argobots PKG_COPNFIG installed? Necessary for SWM")
 set(DAMARIS_PKG_CONFIG_PATH "" CACHE PATH "Where is the damaris PKG_CONFIG installed?")
 
 
 find_package(PkgConfig REQUIRED)
-set(ENV{PKG_CONFIG_PATH} "${ROSS_PKG_CONFIG_PATH}:${SWM_PKG_CONFIG_PATH}:${ARGOBOTS_PKG_CONFIG_PATH}")
+set(ENV{PKG_CONFIG_PATH} "${ROSS_PKG_CONFIG_PATH}:${SWM_PKG_CONFIG_PATH}:${UNION_PKG_CONFIG_PATH}:${ARGOBOTS_PKG_CONFIG_PATH}")
 pkg_check_modules(ROSS REQUIRED IMPORTED_TARGET ross)
 
 # MPI
@@ -57,7 +58,7 @@ else(DUMPI_LIB)
         set(USE_DUMPI true)
 endif()
 
-## SWM
+# SWM
 pkg_check_modules(SWM IMPORTED_TARGET swm)
 if(NOT SWM_FOUND)
         message(STATUS "SWM Library Not Found, Online workloads disabled")
@@ -72,10 +73,34 @@ else(SWM_FOUND)
 
                 pkg_get_variable(SWM_DATAROOTDIR swm datarootdir)
                 cmake_print_variables(SWM_DATAROOTDIR)
-        
+
                 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SWM_CFLAGS} -I${SWM_INCLUDE}")
-                add_definitions(-DUSE_ONLINE=1)
-                set(USE_ONLINE true)
+                #add_definitions(-DUSE_SWM=1)
+                #set(USE_SWM true)
+        endif()
+endif()
+
+pkg_check_modules(UNION IMPORTED_TARGET union)
+if(NOT UNION_FOUND)
+        message(STATUS "UNION Library Not Found, Online workloads disabled")
+else(UNION_FOUND)
+        message(STATUS "UNION Library Found: ${UNION_LIBRARIES}")
+        pkg_check_modules(ARGOBOTS REQUIRED IMPORTED_TARGET argobots)
+        if(NOT ARGOBOTS_FOUND)
+                message(STATUS "Argobots Library Not Found, Union workloads disabled")
+        else(ARGOBOTS_FOUND)
+                message(STATUS "Argobots Library Found: ${ARGOBOTS_LIBRARIES}")
+                #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARGOBOTS_CFLAGS} -I${ARGOBOTS_INCLUDE}")
+
+                pkg_get_variable(UNION_DATAROOTDIR union datarootdir)
+                cmake_print_variables(UNION_DATAROOTDIR)
+        
+                set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${UNION_INCLUDE}")
+                foreach(INCLUDE_OPT ${UNION_CFLAGS})
+                    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${INCLUDE_OPT}")
+                endforeach(INCLUDE_OPT)
+                add_definitions(-DUSE_UNION=1)
+                set(USE_UNION true)
         endif()
 endif()
 
@@ -100,7 +125,7 @@ cmake_print_variables(CMAKE_C_FLAGS)
 add_subdirectory(src)
 
 
-configure_file(codes_config.h.in codes_config.h)
+configure_file(codes_config.h.cmake.in codes_config.h)
 
 add_subdirectory(doc/example)
 
diff --git a/codes_config.h.cmake.in b/codes_config.h.cmake.in
new file mode 100644
index 00000000..1a687a23
--- /dev/null
+++ b/codes_config.h.cmake.in
@@ -0,0 +1,20 @@
+
+
+
+// ross
+
+// dumpi
+
+// swm
+#define SWM_DATAROOTDIR "${SWM_DATAROOTDIR}"
+// union
+#define UNION_DATADIR "${UNION_DATAROOTDIR}"
+
+
+// damaris
+
+
+// darshan
+
+
+// cortex
diff --git a/codes_config.h.in b/codes_config.h.in
deleted file mode 100644
index c3853929..00000000
--- a/codes_config.h.in
+++ /dev/null
@@ -1,173 +0,0 @@
-/* codes_config.h.in.  Generated from configure.ac by autoheader.  */
-
-/* define if the Boost library is available */
-#undef HAVE_BOOST
-
-/* define if the compiler supports basic C++11 syntax */
-#undef HAVE_CXX11
-
-/* Define to 1 if you have the <dlfcn.h> header file. */
-#undef HAVE_DLFCN_H
-
-/* Define to 1 if you have the <execinfo.h> header file. */
-#undef HAVE_EXECINFO_H
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-#undef HAVE_INTTYPES_H
-
-/* Define to 1 if you have the `m' library (-lm). */
-#undef HAVE_LIBM
-
-/* Define to 1 if you have the `pthread' library (-lpthread). */
-#undef HAVE_LIBPTHREAD
-
-/* Define to 1 if you have the `undumpi' library (-lundumpi). */
-#undef HAVE_LIBUNDUMPI
-
-/* Define to 1 if you have the <malloc.h> header file. */
-#undef HAVE_MALLOC_H
-
-/* Define to 1 if you have the <memory.h> header file. */
-#undef HAVE_MEMORY_H
-
-/* Define to 1 if you have the `memset' function. */
-#undef HAVE_MEMSET
-
-/* Define to 1 if you have the <pthread.h> header file. */
-#undef HAVE_PTHREAD_H
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#undef HAVE_STDINT_H
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#undef HAVE_STDLIB_H
-
-/* Define to 1 if you have the <strings.h> header file. */
-#undef HAVE_STRINGS_H
-
-/* Define to 1 if you have the <string.h> header file. */
-#undef HAVE_STRING_H
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#undef HAVE_SYS_STAT_H
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#undef HAVE_SYS_TYPES_H
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#undef HAVE_UNISTD_H
-
-/* If old-style pure reentrant parser syntax is supported by ${YACC} */
-#undef HAVE_YACC_OLD_PURE
-
-/* If old-style push parser syntax is supported by ${YACC} */
-#undef HAVE_YACC_OLD_PUSH
-
-/* Define to the sub-directory in which libtool stores uninstalled libraries.
-   */
-#undef LT_OBJDIR
-
-/* Define to 1 if your C compiler doesn't accept -c and -o together. */
-#undef NO_MINUS_C_MINUS_O
-
-/* Name of package */
-#undef PACKAGE
-
-/* Define to the address where bug reports for this package should be sent. */
-#undef PACKAGE_BUGREPORT
-
-/* Define to the full name of this package. */
-#undef PACKAGE_NAME
-
-/* Define to the full name and version of this package. */
-#undef PACKAGE_STRING
-
-/* Define to the one symbol short name of this package. */
-#undef PACKAGE_TARNAME
-
-/* Define to the home page for this package. */
-#undef PACKAGE_URL
-
-/* Define to the version of this package. */
-#undef PACKAGE_VERSION
-
-/* Define to 1 if you have the ANSI C header files. */
-#undef STDC_HEADERS
-
-/* if using json data files */
-#undef SWM_DATAROOTDIR
-
-/* if using json data files */
-#undef UNION_DATADIR
-
-/* If enabling valgrind-clean build */
-#undef VALGRIND
-
-/* Version number of package */
-#undef VERSION
-
-/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a
-   `char[]'. */
-#undef YYTEXT_POINTER
-
-/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
-   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
-   #define below would cause a syntax error. */
-#undef _UINT32_T
-
-/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
-   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
-   #define below would cause a syntax error. */
-#undef _UINT64_T
-
-/* Define for Solaris 2.5.1 so the uint8_t typedef from <sys/synch.h>,
-   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
-   #define below would cause a syntax error. */
-#undef _UINT8_T
-
-/* Define to empty if `const' does not conform to ANSI C. */
-#undef const
-
-/* Define to `__inline__' or `__inline' if that's what the C compiler
-   calls it, or to nothing if 'inline' is not supported under any name.  */
-#ifndef __cplusplus
-#undef inline
-#endif
-
-/* Define to the type of a signed integer type of width exactly 16 bits if
-   such a type exists and the standard includes do not define it. */
-#undef int16_t
-
-/* Define to the type of a signed integer type of width exactly 32 bits if
-   such a type exists and the standard includes do not define it. */
-#undef int32_t
-
-/* Define to the type of a signed integer type of width exactly 64 bits if
-   such a type exists and the standard includes do not define it. */
-#undef int64_t
-
-/* Define to the type of a signed integer type of width exactly 8 bits if such
-   a type exists and the standard includes do not define it. */
-#undef int8_t
-
-/* Define to `unsigned int' if <sys/types.h> does not define. */
-#undef size_t
-
-/* Define to `int' if <sys/types.h> does not define. */
-#undef ssize_t
-
-/* Define to the type of an unsigned integer type of width exactly 16 bits if
-   such a type exists and the standard includes do not define it. */
-#undef uint16_t
-
-/* Define to the type of an unsigned integer type of width exactly 32 bits if
-   such a type exists and the standard includes do not define it. */
-#undef uint32_t
-
-/* Define to the type of an unsigned integer type of width exactly 64 bits if
-   such a type exists and the standard includes do not define it. */
-#undef uint64_t
-
-/* Define to the type of an unsigned integer type of width exactly 8 bits if
-   such a type exists and the standard includes do not define it. */
-#undef uint8_t
diff --git a/doc/example/tutorial-ping-pong-surrogate.conf.in b/doc/example/tutorial-ping-pong-surrogate.conf.in
index d0e0b1af..7afc569c 100644
--- a/doc/example/tutorial-ping-pong-surrogate.conf.in
+++ b/doc/example/tutorial-ping-pong-surrogate.conf.in
@@ -41,7 +41,7 @@ PARAMS
 # bandwidth in GiB/s for compute node-router channels
    cn_bandwidth="2.0";
 # ROSS message size
-   message_size="416";
+   message_size="440";
 # number of compute nodes connected to router, dictated by dragonfly config
 # file
    num_cns_per_router="2";
diff --git a/doc/example/tutorial-ping-pong.conf.in b/doc/example/tutorial-ping-pong.conf.in
index 9ed21c84..6de3c4ff 100644
--- a/doc/example/tutorial-ping-pong.conf.in
+++ b/doc/example/tutorial-ping-pong.conf.in
@@ -38,7 +38,7 @@ PARAMS
 # bandwidth in GiB/s for compute node-router channels
    cn_bandwidth="2.0";
 # ROSS message size
-   message_size="416";
+   message_size="440";
 # number of compute nodes connected to router, dictated by dragonfly config
 # file
    num_cns_per_router="2";
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 28b5d2a5..a7d5944a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -89,11 +89,17 @@ if(USE_DUMPI)
     list(APPEND LIBS_TO_LINK ${DUMPI_LIB})
 endif()
 
-if(USE_ONLINE)
+if(USE_SWM)
     list(APPEND SRCS workload/methods/codes-online-comm-wrkld.C)
     list(APPEND LIBS_TO_LINK PkgConfig::SWM)
     list(APPEND LIBS_TO_LINK PkgConfig::ARGOBOTS)
 endif()
+if(USE_UNION)
+    list(APPEND SRCS workload/methods/codes-conc-online-comm-wrkld.C)
+    list(APPEND LIBS_TO_LINK PkgConfig::SWM)
+    list(APPEND LIBS_TO_LINK PkgConfig::UNION)
+    list(APPEND LIBS_TO_LINK PkgConfig::ARGOBOTS)
+endif()
 
 
 if(USE_RECORDER)
@@ -122,12 +128,18 @@ if(USE_DUMPI)
     target_include_directories(codes PUBLIC ${DUMPI_INCLUDE})
 endif()
 
-#LINK ARGOBOTS and SWM ONLINE
+#LINK ARGOBOTS, SWM and UNION
 # target_link_libraries(codes PUBLIC PkgConfig::ARGOBOTS)
-if(USE_ONLINE)
+if(USE_SWM)
+    target_include_directories(codes PUBLIC ${ARGOBOTS_INCLUDE_DIRS})
+    # target_link_libraries(codes PUBLIC PkgConfig::SWM)
+    target_include_directories(codes PUBLIC ${SWM_INCLUDE_DIRS})
+endif()
+if(USE_UNION)
     target_include_directories(codes PUBLIC ${ARGOBOTS_INCLUDE_DIRS})
     # target_link_libraries(codes PUBLIC PkgConfig::SWM)
     target_include_directories(codes PUBLIC ${SWM_INCLUDE_DIRS})
+    target_include_directories(codes PUBLIC ${UNION_INCLUDE_DIRS})
 endif()
 
 #LINK ROSS
diff --git a/src/workload/methods/codes-conc-online-comm-wrkld.C b/src/workload/methods/codes-conc-online-comm-wrkld.C
index 1c2836b4..d341cb7a 100644
--- a/src/workload/methods/codes-conc-online-comm-wrkld.C
+++ b/src/workload/methods/codes-conc-online-comm-wrkld.C
@@ -29,6 +29,7 @@
 #include "nearest_neighbor_swm_user_code.h"
 #include "all_to_one_swm_user_code.h"
 #include "milc_swm_user_code.h"
+#include "abt.h"
 //#endif
 
 #define ALLREDUCE_SHORT_MSG_SIZE 2048
@@ -306,7 +307,7 @@ void UNION_MPI_Send(const void *buf,
     struct codes_workload_op wrkld_per_rank;
 
     int datatypesize;
-    MPI_Type_size(datatype, &datatypesize);
+    UNION_Type_size(datatype, &datatypesize);
 
     wrkld_per_rank.op_type = CODES_WK_SEND;
     wrkld_per_rank.u.send.tag = tag;
@@ -346,7 +347,7 @@ void UNION_MPI_Recv(void *buf,
     struct codes_workload_op wrkld_per_rank;
 
     int datatypesize;
-    MPI_Type_size(datatype, &datatypesize);
+    UNION_Type_size(datatype, &datatypesize);
 
     wrkld_per_rank.op_type = CODES_WK_RECV;
     wrkld_per_rank.u.recv.tag = tag;
@@ -392,8 +393,8 @@ void UNION_MPI_Sendrecv(const void *sendbuf,
     struct codes_workload_op send_op;
 
     int datatypesize1, datatypesize2;
-    MPI_Type_size(sendtype, &datatypesize1);
-    MPI_Type_size(recvtype, &datatypesize2);
+    UNION_Type_size(sendtype, &datatypesize1);
+    UNION_Type_size(recvtype, &datatypesize2);
 
     send_op.op_type = CODES_WK_SEND;
     send_op.u.send.tag = sendtag;
@@ -456,7 +457,7 @@ void UNION_MPI_Barrier(UNION_Comm comm)
         dest = (rank + mask) % size;
         src = (rank - mask + size) % size;
 
-        UNION_MPI_Sendrecv(NULL, 0, MPI_INT, dest, 1234, NULL, 0, MPI_INT, src, 1234,
+        UNION_MPI_Sendrecv(NULL, 0, UNION_Int, dest, 1234, NULL, 0, UNION_Int, src, 1234,
                 comm, NULL);
 
         mask <<= 1;
@@ -480,7 +481,7 @@ void UNION_MPI_Isend(const void *buf,
     struct codes_workload_op wrkld_per_rank;
 
     int datatypesize;
-    MPI_Type_size(datatype, &datatypesize);
+    UNION_Type_size(datatype, &datatypesize);
 
     wrkld_per_rank.op_type = CODES_WK_ISEND;
     wrkld_per_rank.u.send.tag = tag;    
@@ -524,7 +525,7 @@ void UNION_MPI_Irecv(void *buf,
     struct codes_workload_op wrkld_per_rank;
 
     int datatypesize;
-    MPI_Type_size(datatype, &datatypesize);
+    UNION_Type_size(datatype, &datatypesize);
 
     wrkld_per_rank.op_type = CODES_WK_IRECV;
     wrkld_per_rank.u.recv.tag = tag;
@@ -615,7 +616,7 @@ void UNION_MPI_Allreduce(const void *sendbuf,
 
     UNION_MPI_Comm_size(comm, &comm_size);
     UNION_MPI_Comm_rank(comm, &rank);
-    MPI_Type_size(datatype, &type_size);
+    UNION_Type_size(datatype, &type_size);
 
     cnts = disps = NULL;
     
@@ -805,7 +806,7 @@ void bcast_scatter_doubling_allgather(void *buffer,
   int type_size, nbytes = 0;
   int relative_dst, dst_tree_root, my_tree_root, send_offset, recv_offset;
 
-  MPI_Type_size(datatype, &type_size);
+  UNION_Type_size(datatype, &type_size);
   UNION_MPI_Comm_size(comm, &comm_size);
 
   relative_rank = (rank >= root) ? rank - root : rank - root + comm_size;
@@ -839,7 +840,7 @@ void bcast_scatter_doubling_allgather(void *buffer,
     if(relative_dst < comm_size)
     {
       recvcount = (nbytes-recv_offset < 0 ? 0 : nbytes-recv_offset);
-      UNION_MPI_Sendrecv(buffer,curr_size,MPI_BYTE,dst,-1005,buffer,recvcount,MPI_BYTE,dst,-1005,comm,&status);
+      UNION_MPI_Sendrecv(buffer,curr_size,UNION_Byte,dst,-1005,buffer,recvcount,UNION_Byte,dst,-1005,comm,&status);
       curr_size += recv_size;
     }
 
@@ -860,7 +861,7 @@ void bcast_scatter_ring_allgather(void *buffer,
   int recvd_size;
   UNION_Status status;
 
-  MPI_Type_size(datatype, &type_size);
+  UNION_Type_size(datatype, &type_size);
   UNION_MPI_Comm_size(comm, &comm_size);
 
   if(comm_size == 1) return;
@@ -890,7 +891,7 @@ void bcast_scatter_ring_allgather(void *buffer,
     if(right_count < 0) right_count = 0;
     right_disp = rel_j * scatter_size;
 
-    UNION_MPI_Sendrecv(buffer,right_count,MPI_BYTE,right,-1005,buffer,left_count,MPI_BYTE,left,-1005,comm,&status);  
+    UNION_MPI_Sendrecv(buffer,right_count,UNION_Byte,right,-1005,buffer,left_count,UNION_Byte,left,-1005,comm,&status);  
     curr_size += recvd_size;
     j = jnext;
     jnext = (comm_size + jnext - 1) % comm_size;
@@ -905,9 +906,9 @@ void UNION_MPI_Bcast(void *buffer,
             UNION_Comm comm)
 {
     int type_size, comm_size, rank;
-    MPI_Type_size(datatype, &type_size);
-    UNION_MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    UNION_MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
+    UNION_Type_size(datatype, &type_size);
+    UNION_MPI_Comm_rank(UNION_Comm_World, &rank);
+    UNION_MPI_Comm_size(UNION_Comm_World, &comm_size);
     int nbytes = count * type_size;
 
     if((nbytes < 12288) || (comm_size < 8)) {
@@ -1079,7 +1080,7 @@ void SWM_Send(SWM_PEER peer,
 }
 
 /*
- * @param comm_id: communicator ID (For now, MPI_COMM_WORLD)
+ * @param comm_id: communicator ID (For now, UNION_Comm_World)
  * reqvc and rspvc: virtual channel IDs for request and response (ignore for
  * our purpose)
  * buf: buffer location for the call (ignore for our purpose)
diff --git a/src/workload/methods/codes-online-comm-wrkld.C b/src/workload/methods/codes-online-comm-wrkld.C
index 6b850793..0ac3e318 100644
--- a/src/workload/methods/codes-online-comm-wrkld.C
+++ b/src/workload/methods/codes-online-comm-wrkld.C
@@ -29,7 +29,7 @@
 #include "milc_swm_user_code.h"
 #include "allreduce.h"
 #include "periodic_aggressor.h"
-// #include "abt.h"
+#include "abt.h"
 #include "layered_allbroadcast.h"
 
 #define ALLREDUCE_SHORT_MSG_SIZE 2048
diff --git a/tests/conf/modelnet-p2p-bw-loggp.conf b/tests/conf/modelnet-p2p-bw-loggp.conf
index 6e0f6859..f4b8030b 100644
--- a/tests/conf/modelnet-p2p-bw-loggp.conf
+++ b/tests/conf/modelnet-p2p-bw-loggp.conf
@@ -10,7 +10,7 @@ LPGROUPS
 PARAMS
 {
    packet_size="2147483648";
-   message_size="424";
+   message_size="448";
    modelnet_order=( "loggp" );
    # scheduler options
    modelnet_scheduler="fcfs";
diff --git a/tests/conf/modelnet-prio-sched-test.conf b/tests/conf/modelnet-prio-sched-test.conf
index c038f36c..3a5dc7f6 100644
--- a/tests/conf/modelnet-prio-sched-test.conf
+++ b/tests/conf/modelnet-prio-sched-test.conf
@@ -10,7 +10,7 @@ LPGROUPS
 PARAMS
 {
    packet_size="512";
-   message_size="456";
+   message_size="480";
    modelnet_order=( "simplenet" );
    # scheduler options
    modelnet_scheduler="priority";
diff --git a/tests/conf/modelnet-test-dragonfly.conf b/tests/conf/modelnet-test-dragonfly.conf
index ef8bd1a5..bf9ee298 100644
--- a/tests/conf/modelnet-test-dragonfly.conf
+++ b/tests/conf/modelnet-test-dragonfly.conf
@@ -23,6 +23,6 @@ PARAMS
    local_bandwidth="5.25";
    global_bandwidth="4.7";
    cn_bandwidth="5.25";
-   message_size="440";
+   message_size="464";
    routing="nonminimal";
 }
diff --git a/tests/conf/modelnet-test-loggp.conf b/tests/conf/modelnet-test-loggp.conf
index bfa39245..a7431e33 100644
--- a/tests/conf/modelnet-test-loggp.conf
+++ b/tests/conf/modelnet-test-loggp.conf
@@ -9,7 +9,7 @@ LPGROUPS
 }
 PARAMS
 {
-   message_size="440";
+   message_size="464";
    modelnet_order=( "loggp" );
    # scheduler options
    modelnet_scheduler="fcfs-full";
diff --git a/tests/conf/modelnet-test-simplep2p.conf b/tests/conf/modelnet-test-simplep2p.conf
index 532ff74b..e77c6752 100644
--- a/tests/conf/modelnet-test-simplep2p.conf
+++ b/tests/conf/modelnet-test-simplep2p.conf
@@ -9,7 +9,7 @@ LPGROUPS
 }
 PARAMS
 {
-    message_size="440";
+    message_size="464";
     packet_size="1024";
     modelnet_order=("simplep2p");
     # scheduler options
diff --git a/tests/conf/modelnet-test-slimfly.conf b/tests/conf/modelnet-test-slimfly.conf
index ecbc72b2..89892c5e 100644
--- a/tests/conf/modelnet-test-slimfly.conf
+++ b/tests/conf/modelnet-test-slimfly.conf
@@ -30,6 +30,6 @@ PARAMS
    global_bandwidth="9.0";
    cn_bandwidth="9.0";
    link_delay = "0";
-   message_size="440";
+   message_size="464";
    routing="minimal";
 }
diff --git a/tests/conf/modelnet-test-torus.conf b/tests/conf/modelnet-test-torus.conf
index e523f176..da5980ba 100644
--- a/tests/conf/modelnet-test-torus.conf
+++ b/tests/conf/modelnet-test-torus.conf
@@ -14,7 +14,7 @@ PARAMS
    # scheduler options
    modelnet_scheduler="fcfs";
    # modelnet_scheduler="round-robin";
-   message_size="440";
+   message_size="464";
    n_dims="3";
    dim_length="4,2,2";
    link_bandwidth="2.0";
diff --git a/tests/conf/modelnet-test.conf b/tests/conf/modelnet-test.conf
index e8761379..7113709f 100644
--- a/tests/conf/modelnet-test.conf
+++ b/tests/conf/modelnet-test.conf
@@ -10,7 +10,7 @@ LPGROUPS
 PARAMS
 {
    packet_size="512";
-   message_size="440";
+   message_size="464";
    modelnet_order=( "simplenet" );
    # scheduler options
    modelnet_scheduler="fcfs";

From 2a236fc24cdb1e6e1b2320273881105eb92171c7 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 1 Sep 2023 11:26:13 -0500
Subject: [PATCH 041/188] Updated compilation instructions

---
 CMakeLists.txt              | 74 ++++++++++++++++++-------------------
 codes/net/dragonfly-dally.h |  2 -
 src/Makefile.subdir         |  2 +
 3 files changed, 37 insertions(+), 41 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fd4f4e56..2041d792 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -58,51 +58,47 @@ else(DUMPI_LIB)
         set(USE_DUMPI true)
 endif()
 
-# SWM
+# SWM and UNION (both require ARGOBOTS to function)
 pkg_check_modules(SWM IMPORTED_TARGET swm)
 if(NOT SWM_FOUND)
-        message(STATUS "SWM Library Not Found, Online workloads disabled")
+    message(STATUS "SWM Library Not Found, Online workloads disabled")
+
 else(SWM_FOUND)
-        message(STATUS "SWM Library Found: ${SWM_LIBRARIES}")
-        pkg_check_modules(ARGOBOTS REQUIRED IMPORTED_TARGET argobots)
-        if(NOT ARGOBOTS_FOUND)
-                message(STATUS "Argobots Library Not Found, Online workloads disabled")
-        else(ARGOBOTS_FOUND)
-                message(STATUS "Argobots Library Found: ${ARGOBOTS_LIBRARIES}")
-                set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARGOBOTS_CFLAGS} -I${ARGOBOTS_INCLUDE}")
-
-                pkg_get_variable(SWM_DATAROOTDIR swm datarootdir)
-                cmake_print_variables(SWM_DATAROOTDIR)
-
-                set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SWM_CFLAGS} -I${SWM_INCLUDE}")
-                #add_definitions(-DUSE_SWM=1)
-                #set(USE_SWM true)
+    message(STATUS "SWM Library Found: ${SWM_LIBRARIES}")
+    pkg_check_modules(ARGOBOTS REQUIRED IMPORTED_TARGET argobots)
+    if(NOT ARGOBOTS_FOUND)
+        message(STATUS "Argobots Library Not Found, Online workloads disabled")
+
+    else(ARGOBOTS_FOUND)
+        message(STATUS "Argobots Library Found: ${ARGOBOTS_LIBRARIES}")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARGOBOTS_CFLAGS} -I${ARGOBOTS_INCLUDE}")
+
+        pkg_get_variable(SWM_DATAROOTDIR swm datarootdir)
+        cmake_print_variables(SWM_DATAROOTDIR)
+
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SWM_CFLAGS} -I${SWM_INCLUDE}")
+
+        pkg_check_modules(UNION IMPORTED_TARGET union)
+        if(NOT UNION_FOUND)
+            message(STATUS "UNION Library Not Found, SWM-only online workloads enabled")
+            add_definitions(-DUSE_SWM=1)
+            set(USE_SWM true)
+        else(UNION_FOUND)
+            message(STATUS "UNION Library Found: ${UNION_LIBRARIES}")
+            pkg_get_variable(UNION_DATAROOTDIR union datarootdir)
+            cmake_print_variables(UNION_DATAROOTDIR)
+            
+            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${UNION_INCLUDE}")
+            foreach(INCLUDE_OPT ${UNION_CFLAGS})
+                set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${INCLUDE_OPT}")
+            endforeach()
+
+            add_definitions(-DUSE_UNION=1)
+            set(USE_UNION true)
         endif()
+    endif()
 endif()
 
-pkg_check_modules(UNION IMPORTED_TARGET union)
-if(NOT UNION_FOUND)
-        message(STATUS "UNION Library Not Found, Online workloads disabled")
-else(UNION_FOUND)
-        message(STATUS "UNION Library Found: ${UNION_LIBRARIES}")
-        pkg_check_modules(ARGOBOTS REQUIRED IMPORTED_TARGET argobots)
-        if(NOT ARGOBOTS_FOUND)
-                message(STATUS "Argobots Library Not Found, Union workloads disabled")
-        else(ARGOBOTS_FOUND)
-                message(STATUS "Argobots Library Found: ${ARGOBOTS_LIBRARIES}")
-                #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARGOBOTS_CFLAGS} -I${ARGOBOTS_INCLUDE}")
-
-                pkg_get_variable(UNION_DATAROOTDIR union datarootdir)
-                cmake_print_variables(UNION_DATAROOTDIR)
-        
-                set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${UNION_INCLUDE}")
-                foreach(INCLUDE_OPT ${UNION_CFLAGS})
-                    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${INCLUDE_OPT}")
-                endforeach(INCLUDE_OPT)
-                add_definitions(-DUSE_UNION=1)
-                set(USE_UNION true)
-        endif()
-endif()
 
 ## RECORDER
 option(USE_RECORDER "use recorder io workload" ON)
diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index 104b3e8b..656d99d8 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -12,8 +12,6 @@ extern "C" {
 #endif
 
 #include <ross.h>
-#include <stdbool.h>
-#include <model-net.h>
 
 typedef struct terminal_dally_message terminal_dally_message;
 
diff --git a/src/Makefile.subdir b/src/Makefile.subdir
index 9342c919..5721f7df 100644
--- a/src/Makefile.subdir
+++ b/src/Makefile.subdir
@@ -83,6 +83,7 @@ nobase_include_HEADERS = \
 	codes/resource-lp.h \
 	codes/local-storage-model.h \
 	codes/rc-stack.h \
+	codes/surrogate.h \
 	codes/codes-jobmap.h \
 	codes/codes-callback.h \
 	codes/codes-mapping-context.h \
@@ -161,6 +162,7 @@ src_libcodes_la_SOURCES =  \
 	src/workload/methods/codes-iomock-wrkld.c \
 	codes/rc-stack.h \
 	src/util/rc-stack.c \
+        src/util/surrogate.c \
 	src/networks/model-net/network-managers/dragonfly-network-manager.C \
 	src/networks/model-net/core/model-net.c \
 	src/networks/model-net/common-net.c \

From 1b14c9f4c2569001c4a4332cce4921615070b36e Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 1 Sep 2023 15:14:21 -0500
Subject: [PATCH 042/188] Fixing UNION and SWM compilation of Argobots

---
 CMakeLists.txt     |  2 ++
 src/CMakeLists.txt | 44 ++++++++++++++++++++++++--------------------
 2 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2041d792..2f8cb97a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -77,6 +77,8 @@ else(SWM_FOUND)
         cmake_print_variables(SWM_DATAROOTDIR)
 
         set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SWM_CFLAGS} -I${SWM_INCLUDE}")
+        add_definitions(-DUSE_ONLINE=1)
+        set(USE_ONLINE true)
 
         pkg_check_modules(UNION IMPORTED_TARGET union)
         if(NOT UNION_FOUND)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a7d5944a..bbb381ba 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -89,16 +89,18 @@ if(USE_DUMPI)
     list(APPEND LIBS_TO_LINK ${DUMPI_LIB})
 endif()
 
-if(USE_SWM)
-    list(APPEND SRCS workload/methods/codes-online-comm-wrkld.C)
-    list(APPEND LIBS_TO_LINK PkgConfig::SWM)
-    list(APPEND LIBS_TO_LINK PkgConfig::ARGOBOTS)
-endif()
-if(USE_UNION)
-    list(APPEND SRCS workload/methods/codes-conc-online-comm-wrkld.C)
-    list(APPEND LIBS_TO_LINK PkgConfig::SWM)
-    list(APPEND LIBS_TO_LINK PkgConfig::UNION)
-    list(APPEND LIBS_TO_LINK PkgConfig::ARGOBOTS)
+if(USE_ONLINE)
+    if(USE_SWM)
+        list(APPEND SRCS workload/methods/codes-online-comm-wrkld.C)
+        list(APPEND LIBS_TO_LINK PkgConfig::SWM)
+        list(APPEND LIBS_TO_LINK PkgConfig::ARGOBOTS)
+    endif()
+    if(USE_UNION)
+        list(APPEND SRCS workload/methods/codes-conc-online-comm-wrkld.C)
+        list(APPEND LIBS_TO_LINK PkgConfig::SWM)
+        list(APPEND LIBS_TO_LINK PkgConfig::UNION)
+        list(APPEND LIBS_TO_LINK PkgConfig::ARGOBOTS)
+    endif()
 endif()
 
 
@@ -130,16 +132,18 @@ endif()
 
 #LINK ARGOBOTS, SWM and UNION
 # target_link_libraries(codes PUBLIC PkgConfig::ARGOBOTS)
-if(USE_SWM)
-    target_include_directories(codes PUBLIC ${ARGOBOTS_INCLUDE_DIRS})
-    # target_link_libraries(codes PUBLIC PkgConfig::SWM)
-    target_include_directories(codes PUBLIC ${SWM_INCLUDE_DIRS})
-endif()
-if(USE_UNION)
-    target_include_directories(codes PUBLIC ${ARGOBOTS_INCLUDE_DIRS})
-    # target_link_libraries(codes PUBLIC PkgConfig::SWM)
-    target_include_directories(codes PUBLIC ${SWM_INCLUDE_DIRS})
-    target_include_directories(codes PUBLIC ${UNION_INCLUDE_DIRS})
+if(USE_ONLINE)
+    if(USE_SWM)
+        target_include_directories(codes PUBLIC ${ARGOBOTS_INCLUDE_DIRS})
+        # target_link_libraries(codes PUBLIC PkgConfig::SWM)
+        target_include_directories(codes PUBLIC ${SWM_INCLUDE_DIRS})
+    endif()
+    if(USE_UNION)
+        target_include_directories(codes PUBLIC ${ARGOBOTS_INCLUDE_DIRS})
+        # target_link_libraries(codes PUBLIC PkgConfig::SWM)
+        target_include_directories(codes PUBLIC ${SWM_INCLUDE_DIRS})
+        target_include_directories(codes PUBLIC ${UNION_INCLUDE_DIRS})
+    endif()
 endif()
 
 #LINK ROSS

From 166d3dc680f32bbe199437202a311af727409545 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 1 Sep 2023 15:24:12 -0500
Subject: [PATCH 043/188] Harcoded application surrogate. Skipping several
 iterations of simulation

---
 src/network-workloads/model-net-mpi-replay.c | 102 +++++++++++++++----
 1 file changed, 82 insertions(+), 20 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 1f613466..01d56fb0 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -185,7 +185,9 @@ enum MPI_NW_EVENTS
     CLI_BCKGND_GEN,
     CLI_BCKGND_CHANGE,
     CLI_NBR_FINISH,
-    CLI_OTHER_FINISH //received when another workload has finished
+    CLI_OTHER_FINISH, //received when another workload has finished
+    // Surrogate events
+    SURR_SKIP_ITERATION, // skips one (several) iteration(s) of simulation
 };
 
 /* type of synthetic traffic */
@@ -1112,6 +1114,46 @@ void arrive_syn_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp)
         }
     }
 }
+
+void skip_iteration_rc(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) {
+    // TODO: implement!!
+}
+
+void skip_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m)
+{
+	struct codes_workload_op * mpi_op = (struct codes_workload_op*) malloc(sizeof(struct codes_workload_op));
+    m->mpi_op = mpi_op;
+
+    // consuming all events until iteration 95 from iteration 4
+    bool reached_end = false;
+    while (!reached_end) {
+        codes_workload_get_next(wrkld_id, s->app_id, s->local_rank, mpi_op);
+
+        switch (mpi_op->op_type) {
+            case CODES_WK_MARK:
+                if (mpi_op->u.send.tag == 95) {
+                    reached_end = true;
+                }
+                break;
+            // If we reach the end of simulation, rollback once to allow the operation to be processed normally
+            case CODES_WK_END:
+                codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, mpi_op);
+                reached_end = true;
+                break;
+            default:
+        }
+    }
+
+    tw_event *e = tw_event_new(lp->gid, 0.0, lp);
+    nw_message* msg = (nw_message*) tw_event_data(e);
+    msg->msg_type = MPI_OP_GET_NEXT;
+    tw_event_send(e);
+}
+
+bool have_we_hit_surrogate_switch(struct codes_workload_op * mpi_op) {
+    return mpi_op->u.send.tag == 4;
+}
+
 /* Debugging functions, may generate unused function warning */
 /*static void print_waiting_reqs(uint32_t * reqs, int count)
 {
@@ -2653,6 +2695,9 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
         case CLI_OTHER_FINISH:
             handle_other_finish(s, lp, bf, m);
             break;
+
+        case SURR_SKIP_ITERATION:
+            skip_iteration(s, lp, bf, m);
 	}
 }
 
@@ -2785,12 +2830,11 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
                 bf->c9 = 1;
                 return;
             }
-            
+
             /* Notify ranks from other job that checkpoint traffic has
              * completed */
-             printf("\n Network node %d Rank %llu App %d finished at %lf ", s->local_rank, LLU(s->nw_id), s->app_id, tw_now(lp));
-            int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); 
-
+            //int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx);
+            m->rc.saved_marker_time = tw_now(lp);
             notify_root_rank(s, lp, bf, m);
             // printf("Client rank %llu completed workload, local rank %d .\n", s->nw_id, s->local_rank);
 
@@ -2887,9 +2931,17 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
 
 		case CODES_WK_MARK:
 			{
-				printf("\n MARK_%d node %llu job %d rank %d time %lf ", mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, tw_now(lp));
                 m->rc.saved_marker_time = tw_now(lp);
-				codes_issue_next_event(lp);
+
+                // If we have reached the surrogate switch time, skip next iteration(s)
+                if (have_we_hit_surrogate_switch(mpi_op)) {
+                    tw_event *e = tw_event_new(lp->gid, 1560888.53 * 91, lp);
+                    nw_message* msg = (nw_message*) tw_event_data(e);
+                    msg->msg_type = SURR_SKIP_ITERATION;
+                    tw_event_send(e);
+                } else {
+                    codes_issue_next_event(lp);
+                }
 			}
 			break;
 
@@ -3099,19 +3151,29 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp
     switch(m->msg_type)
     {
         case MPI_OP_GET_NEXT:
-            if (m->mpi_op->op_type == CODES_WK_MARK) {
-                if (OUTPUT_MARKS)
-                {
-                    int written1;
-                    char marker_filename[128];
-                    written1 = sprintf(marker_filename, "mpi-replay-marker-tag-times");
-                    marker_filename[written1] = '\0';
-
-                    char tag_line[32];
-                    int written;
-                    written = sprintf(tag_line, "%d %d %.5f\n",s->nw_id, m->mpi_op->u.send.tag, m->rc.saved_marker_time);
-                    lp_io_write(lp->gid, marker_filename, written, tag_line);
-                }
+            switch (m->mpi_op->op_type) {
+                case CODES_WK_END:
+                    printf("Network node %d Rank %llu App %d finished at %lf \n", s->local_rank, LLU(s->nw_id), s->app_id, m->rc.saved_marker_time);
+                    break;
+
+                case CODES_WK_MARK:
+				    printf("MARK_%d node %llu job %d rank %d time %lf \n", m->mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, m->rc.saved_marker_time);
+
+                    if (OUTPUT_MARKS)
+                    {
+                        int written1;
+                        char marker_filename[128];
+                        written1 = sprintf(marker_filename, "mpi-replay-marker-tag-times");
+                        marker_filename[written1] = '\0';
+
+                        char tag_line[32];
+                        int written;
+                        written = sprintf(tag_line, "%d %d %.5f\n",s->nw_id, m->mpi_op->u.send.tag, m->rc.saved_marker_time);
+                        lp_io_write(lp->gid, marker_filename, written, tag_line);
+                    }
+                    break;
+
+                default:
             }
 
 

From f96ac238c0fb53959ed7f019ca5cbe9f5e15c149 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 1 Sep 2023 16:02:05 -0500
Subject: [PATCH 044/188] In some systems the an assert assumption was not
 valid

---
 src/networks/model-net/dragonfly-dally.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index c179cca4..cbf7f31a 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -2276,8 +2276,8 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
         char **timestamps;
         size_t len;
         configuration_get_multivalue(&config, "PARAMS", "router_buffer_snapshots", anno, &timestamps, &len);
-        assert((len > 0) == (timestamps != NULL));
         if (len) {
+            assert(timestamps != NULL);
             num_snapshots = len;
             snapshot_times = (tw_stime*) malloc(len * sizeof(tw_stime));
 

From 1f352686a30f7fd26362493691f818e9ffb5354e Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 15 Sep 2023 10:04:45 -0400
Subject: [PATCH 045/188] Fixing non-determinism bug on dragonfly-dally

---
 src/networks/model-net/dragonfly-dally.C | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index cbf7f31a..117441ca 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -5928,13 +5928,6 @@ static void router_packet_receive_rc(router_state * s,
 {  
     int output_port = msg->saved_vc;
     int output_chan = msg->saved_channel;
-
-    for(int i = 0 ; i < msg->num_cll; i++)
-        codes_local_latency_reverse(lp);
-
-    for(int i = 0; i < msg->num_rngs; i++)
-        tw_rand_reverse_unif(lp->rng);
-
     int src_term_id = msg->dfdally_src_terminal_id;
     int app_id = msg->saved_app_id;
 

From 54ea14885df2ad999dc6f2a88074e8356b14cf23 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 15 Sep 2023 14:46:06 -0400
Subject: [PATCH 046/188] Updating tests to run in AiMOS with no errors

Binaries compiled to run with MPI won't run on their own in AiMOS, they
need all variables properly set up, which requires to run them through
`mpirun`
---
 tests/lp-io-test.sh                        | 2 +-
 tests/lsm-test.sh                          | 2 +-
 tests/map-ctx-test.c                       | 2 ++
 tests/map-ctx-test.sh                      | 2 +-
 tests/mapping_test.sh                      | 2 +-
 tests/modelnet-p2p-bw-loggp.sh             | 2 +-
 tests/modelnet-prio-sched-test.sh          | 2 +-
 tests/modelnet-simplep2p-test.sh           | 2 +-
 tests/modelnet-test-dragonfly-synthetic.sh | 2 +-
 tests/modelnet-test-dragonfly.sh           | 2 +-
 tests/modelnet-test-em.sh                  | 2 +-
 tests/modelnet-test-fattree-synthetic.sh   | 2 +-
 tests/modelnet-test-loggp.sh               | 2 +-
 tests/modelnet-test-slimfly-synthetic.sh   | 2 +-
 tests/modelnet-test-slimfly.sh             | 2 +-
 tests/modelnet-test-torus.sh               | 2 +-
 tests/modelnet-test.sh                     | 2 +-
 tests/rc-stack-test.sh                     | 2 +-
 tests/resource-test.sh                     | 2 +-
 19 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/tests/lp-io-test.sh b/tests/lp-io-test.sh
index da7f52d5..c9a6d4fa 100755
--- a/tests/lp-io-test.sh
+++ b/tests/lp-io-test.sh
@@ -4,4 +4,4 @@ if [ -z $GENERATED_USING_CMAKE ]; then
     bindir=.
 fi
 
-"$bindir"/tests/lp-io-test --sync=1
+mpirun -np 1 "$bindir"/tests/lp-io-test --sync=1
diff --git a/tests/lsm-test.sh b/tests/lsm-test.sh
index 6b4c2afb..168a2411 100755
--- a/tests/lsm-test.sh
+++ b/tests/lsm-test.sh
@@ -9,4 +9,4 @@ if [ -z $GENERATED_USING_CMAKE ]; then
     bindir=.
 fi
 
-"$bindir"/tests/lsm-test --sync=1 --conf="$srcdir"/tests/conf/lsm-test.conf
+mpirun -np 1 "$bindir"/tests/lsm-test --sync=1 --conf="$srcdir"/tests/conf/lsm-test.conf
diff --git a/tests/map-ctx-test.c b/tests/map-ctx-test.c
index 1e754089..3ce14b95 100644
--- a/tests/map-ctx-test.c
+++ b/tests/map-ctx-test.c
@@ -161,6 +161,8 @@ int main(int argc, char *argv[])
     CHECK("group_direct_anno");
     CHECK("group_direct_anno");
 
+    MPI_Finalize();
+
     return 0;
 }
 
diff --git a/tests/map-ctx-test.sh b/tests/map-ctx-test.sh
index db73071d..def90c7f 100755
--- a/tests/map-ctx-test.sh
+++ b/tests/map-ctx-test.sh
@@ -9,4 +9,4 @@ if [ -z $GENERATED_USING_CMAKE ]; then
     bindir=.
 fi
 
-"$bindir"/tests/map-ctx-test "$srcdir"/tests/conf/map-ctx-test.conf
+mpirun -np 1 "$bindir"/tests/map-ctx-test "$srcdir"/tests/conf/map-ctx-test.conf
diff --git a/tests/mapping_test.sh b/tests/mapping_test.sh
index 5d97a70f..40d62908 100755
--- a/tests/mapping_test.sh
+++ b/tests/mapping_test.sh
@@ -7,7 +7,7 @@ fi
 tst="$srcdir/tests"
 set -e
 
-"$bindir"/tests/mapping_test --sync=1 --codes-config="$tst"/conf/mapping_test.conf \
+mpirun -np 1 "$bindir"/tests/mapping_test --sync=1 --codes-config="$tst"/conf/mapping_test.conf \
     2> mapping_test.err \
     1| grep TEST > mapping_test.out
 
diff --git a/tests/modelnet-p2p-bw-loggp.sh b/tests/modelnet-p2p-bw-loggp.sh
index 3850a260..b49c80e9 100755
--- a/tests/modelnet-p2p-bw-loggp.sh
+++ b/tests/modelnet-p2p-bw-loggp.sh
@@ -4,4 +4,4 @@ if [ -z $GENERATED_USING_CMAKE ]; then
     bindir=.
 fi
 
-"$bindir"/tests/modelnet-p2p-bw --sync=1 -- "$srcdir"/tests/conf/modelnet-p2p-bw-loggp.conf
+mpirun -np 1 "$bindir"/tests/modelnet-p2p-bw --sync=1 -- "$srcdir"/tests/conf/modelnet-p2p-bw-loggp.conf
diff --git a/tests/modelnet-prio-sched-test.sh b/tests/modelnet-prio-sched-test.sh
index 49706be0..1d033d97 100755
--- a/tests/modelnet-prio-sched-test.sh
+++ b/tests/modelnet-prio-sched-test.sh
@@ -4,7 +4,7 @@ if [ -z $GENERATED_USING_CMAKE ]; then
     bindir=.
 fi
 
-"$bindir"/tests/modelnet-prio-sched-test --sync=1 -- \
+mpirun -np 1 "$bindir"/tests/modelnet-prio-sched-test --sync=1 -- \
     $srcdir/tests/conf/modelnet-prio-sched-test.conf
 err=$?
 if [[ $err -ne 0 ]]; then
diff --git a/tests/modelnet-simplep2p-test.sh b/tests/modelnet-simplep2p-test.sh
index 7c2efa81..94e4cada 100755
--- a/tests/modelnet-simplep2p-test.sh
+++ b/tests/modelnet-simplep2p-test.sh
@@ -9,4 +9,4 @@ if [ -z $GENERATED_USING_CMAKE ]; then
     bindir=.
 fi
 
-"$bindir"/tests/modelnet-simplep2p-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-simplep2p.conf
+mpirun -np 1 "$bindir"/tests/modelnet-simplep2p-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-simplep2p.conf
diff --git a/tests/modelnet-test-dragonfly-synthetic.sh b/tests/modelnet-test-dragonfly-synthetic.sh
index a18ebad9..a5b4b072 100755
--- a/tests/modelnet-test-dragonfly-synthetic.sh
+++ b/tests/modelnet-test-dragonfly-synthetic.sh
@@ -8,4 +8,4 @@ else
     bindir="$bindir"/src
 fi
 
-"$bindir"/model-net-synthetic --sync=1 --num_messages=1 -- "$srcdir"/src/network-workloads/conf/modelnet-synthetic-dragonfly.conf
+mpirun -np 1 "$bindir"/model-net-synthetic --sync=1 --num_messages=1 -- "$srcdir"/src/network-workloads/conf/modelnet-synthetic-dragonfly.conf
diff --git a/tests/modelnet-test-dragonfly.sh b/tests/modelnet-test-dragonfly.sh
index 68614d0e..8731a87a 100755
--- a/tests/modelnet-test-dragonfly.sh
+++ b/tests/modelnet-test-dragonfly.sh
@@ -4,4 +4,4 @@ if [ -z $GENERATED_USING_CMAKE ]; then
     bindir=.
 fi
 
-"$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-dragonfly.conf
+mpirun -np 1 "$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-dragonfly.conf
diff --git a/tests/modelnet-test-em.sh b/tests/modelnet-test-em.sh
index 6209d0dc..d4411d6d 100755
--- a/tests/modelnet-test-em.sh
+++ b/tests/modelnet-test-em.sh
@@ -4,4 +4,4 @@ if [ -z $GENERATED_USING_CMAKE ]; then
     bindir=.
 fi
 
-"$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-em.conf
+mpirun -np 1 "$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-em.conf
diff --git a/tests/modelnet-test-fattree-synthetic.sh b/tests/modelnet-test-fattree-synthetic.sh
index cd9b73cb..3f7744d9 100755
--- a/tests/modelnet-test-fattree-synthetic.sh
+++ b/tests/modelnet-test-fattree-synthetic.sh
@@ -13,7 +13,7 @@ else
     bindir="$bindir"/src
 fi
 
-"$bindir"/model-net-synthetic-fattree --sync=1 -- "$srcdir"/src/network-workloads/conf/modelnet-synthetic-fattree.conf
+mpirun -np 1 "$bindir"/model-net-synthetic-fattree --sync=1 -- "$srcdir"/src/network-workloads/conf/modelnet-synthetic-fattree.conf
 
 #source $srcdir/tests/download-traces.sh
 #src/network-workloads/model-net-mpi-replay --sync=1 --num_net_traces=27 --workload_file=/tmp/df_AMG_n27_dumpi/dumpi-2014.03.03.14.55.00- --workload_type="dumpi" -- $srcdir/src/network-workloads/conf/modelnet-mpi-test-fattree.conf 
diff --git a/tests/modelnet-test-loggp.sh b/tests/modelnet-test-loggp.sh
index 656c0912..2eef34bd 100755
--- a/tests/modelnet-test-loggp.sh
+++ b/tests/modelnet-test-loggp.sh
@@ -4,4 +4,4 @@ if [ -z $GENERATED_USING_CMAKE ]; then
     bindir=.
 fi
 
-"$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-loggp.conf
+mpirun -np 1 "$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-loggp.conf
diff --git a/tests/modelnet-test-slimfly-synthetic.sh b/tests/modelnet-test-slimfly-synthetic.sh
index a8545acd..21ea0458 100755
--- a/tests/modelnet-test-slimfly-synthetic.sh
+++ b/tests/modelnet-test-slimfly-synthetic.sh
@@ -8,4 +8,4 @@ else
     bindir="$bindir"/src
 fi
 
-"$bindir"/model-net-synthetic-slimfly --sync=1 -- "$srcdir"/src/network-workloads/conf/modelnet-synthetic-slimfly-min.conf
+mpirun -np 1 "$bindir"/model-net-synthetic-slimfly --sync=1 -- "$srcdir"/src/network-workloads/conf/modelnet-synthetic-slimfly-min.conf
diff --git a/tests/modelnet-test-slimfly.sh b/tests/modelnet-test-slimfly.sh
index 87f91d08..2a5b6a8c 100755
--- a/tests/modelnet-test-slimfly.sh
+++ b/tests/modelnet-test-slimfly.sh
@@ -4,4 +4,4 @@ if [ -z $GENERATED_USING_CMAKE ]; then
     bindir=.
 fi
 
-"$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-slimfly.conf
+mpirun -np 1 "$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-slimfly.conf
diff --git a/tests/modelnet-test-torus.sh b/tests/modelnet-test-torus.sh
index fd3934ed..2b102bcc 100755
--- a/tests/modelnet-test-torus.sh
+++ b/tests/modelnet-test-torus.sh
@@ -4,4 +4,4 @@ if [ -z $GENERATED_USING_CMAKE ]; then
     bindir=.
 fi
 
-"$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-torus.conf
+mpirun -np 1 "$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-torus.conf
diff --git a/tests/modelnet-test.sh b/tests/modelnet-test.sh
index bcc3351b..248f5117 100755
--- a/tests/modelnet-test.sh
+++ b/tests/modelnet-test.sh
@@ -4,4 +4,4 @@ if [ -z $GENERATED_USING_CMAKE ]; then
     bindir=.
 fi
 
-"$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test.conf
+mpirun -np 1 "$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test.conf
diff --git a/tests/rc-stack-test.sh b/tests/rc-stack-test.sh
index b16cdfd8..ec254542 100755
--- a/tests/rc-stack-test.sh
+++ b/tests/rc-stack-test.sh
@@ -9,4 +9,4 @@ if [ -z $GENERATED_USING_CMAKE ]; then
     bindir=.
 fi
 
-"$bindir"/tests/modelnet-simplep2p-test
+mpirun -np 1 "$bindir"/tests/modelnet-simplep2p-test
diff --git a/tests/resource-test.sh b/tests/resource-test.sh
index 4b2cba1b..4125e0f8 100755
--- a/tests/resource-test.sh
+++ b/tests/resource-test.sh
@@ -9,4 +9,4 @@ if [ -z $GENERATED_USING_CMAKE ]; then
     bindir=.
 fi
 
-"$bindir"/tests/resource-test --sync=1 --codes-config="$srcdir"/tests/conf/buffer_test.conf
+mpirun -np 1 "$bindir"/tests/resource-test --sync=1 --codes-config="$srcdir"/tests/conf/buffer_test.conf

From 04fe07af9118c93a56b0f2540e53b21a9644f30d Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 18 Sep 2023 17:58:48 -0400
Subject: [PATCH 047/188] Fixing compilation error for some non-compliant
 compilers (empty default tag in switch)

---
 src/network-workloads/model-net-mpi-replay.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 1c0803f1..c5fe93a7 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -1143,6 +1143,7 @@ void skip_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m)
                 reached_end = true;
                 break;
             default:
+                break;
         }
     }
 
@@ -1153,7 +1154,8 @@ void skip_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m)
 }
 
 bool have_we_hit_surrogate_switch(struct codes_workload_op * mpi_op) {
-    return mpi_op->u.send.tag == 4;
+    //return mpi_op->u.send.tag == 4;
+    return false;
 }
 
 /* Debugging functions, may generate unused function warning */
@@ -3244,6 +3246,7 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp
                     break;
 
                 default:
+                    break;
             }
 
 

From e1e136ed76fd7901ea753482fae68bc70881aa30 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 22 Sep 2023 18:15:14 -0400
Subject: [PATCH 048/188] Refactoring surrogate code to separate it into
 multiple files

---
 codes/surrogate.h                             | 120 --------
 codes/surrogate/init.h                        |  57 ++++
 .../packet-latency-predictor/average.h        |  26 ++
 .../packet-latency-predictor/common.h         |  59 ++++
 codes/surrogate/switch.h                      |  63 ++++
 doc/example/tutorial-synthetic-ping-pong.c    |   2 +-
 src/CMakeLists.txt                            |   6 +-
 src/networks/model-net/dragonfly-dally.C      |   2 +-
 src/surrogate/init.c                          | 127 ++++++++
 .../packet-latency-predictor/average.c        | 112 ++++++++
 .../packet-latency-predictor/common.c         |   1 +
 src/{util/surrogate.c => surrogate/switch.c}  | 272 +-----------------
 12 files changed, 456 insertions(+), 391 deletions(-)
 delete mode 100644 codes/surrogate.h
 create mode 100644 codes/surrogate/init.h
 create mode 100644 codes/surrogate/packet-latency-predictor/average.h
 create mode 100644 codes/surrogate/packet-latency-predictor/common.h
 create mode 100644 codes/surrogate/switch.h
 create mode 100644 src/surrogate/init.c
 create mode 100644 src/surrogate/packet-latency-predictor/average.c
 create mode 100644 src/surrogate/packet-latency-predictor/common.c
 rename src/{util/surrogate.c => surrogate/switch.c} (63%)

diff --git a/codes/surrogate.h b/codes/surrogate.h
deleted file mode 100644
index 1f9bae92..00000000
--- a/codes/surrogate.h
+++ /dev/null
@@ -1,120 +0,0 @@
-#ifndef CODES_SURROGATE_H
-#define CODES_SURROGATE_H
-
-/**
- * surrogate.h -- Defining all functions to implement in order to run CODES in surrogate mode
- * Elkin Cruz
- *
- * Copyright (c) 2023 Rensselaer Polytechnic Institute
- */
-#include <ross.h>
-#include <stdbool.h>
-#include "codes/codes_mapping.h"
-#include "codes/lp-type-lookup.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * Variable definitions
- */
-
-// When true (below), the network state will be frozen at switch time (from
-// high-def to surrogate) and later reanimated on the switch back (from
-// surrogate to high-def). If not, all events will be kept in the network while
-// on surrogate mode, which means that the network will vacate completely
-extern bool freeze_network_on_switch;
-void print_surrogate_stats(void);
-
-/**
- * Terminal-to-terminal packet latency prediction machinery
- */
-
-// Packet latencies
-struct packet_start {
-    uint64_t packet_ID;
-    tw_lpid dest_terminal_lpid;  // ROSS id; LPID for terminal
-    unsigned int dfdally_dest_terminal_id; // number in [0, total terminals)
-    double travel_start_time;
-    double workload_injection_time; // this is when the workload passed down the event to model-net
-    double processing_packet_delay;  // delay for this packet to be processed from previous packet in the queue
-    uint32_t packet_size;
-    bool is_there_another_pckt_in_queue; // is there another packet in queue
-    void * message_data;  // Yep, we have to save the entire message just because we might need to resend the message when switching to surrogate-mode. It's wasteful but there is no other way
-    void * remote_event_data;  // This and the one above have to be freed. This contains the extra information that the message contains
-};
-
-struct packet_end {
-    double travel_end_time;
-    double next_packet_delay;  // Delay to start processing next packet
-};
-
-// Definition of functions needed to define a predictor
-typedef void (*init_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id); // Initializes the predictor (eg, LSTM)
-typedef void (*feed_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *, struct packet_end const *); // Feeds known latency for packet sent at `now`
-typedef struct packet_end (*predict_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *); // Get prediction for packet sent to `destination` at `now`
-typedef void (*predict_pred_rc_f) (void * predictor_data, tw_lp * lp); // Reverse prediction (reverse state of predictor one prediction)
-
-// Each network model defines its own way to setup the packet latency predictor
-struct packet_latency_predictor {
-    init_pred_f        init;
-    feed_pred_f        feed;
-    predict_pred_f     predict;
-    predict_pred_rc_f  predict_rc;
-    size_t             predictor_data_sz; // `predictor_data` size
-};
-
-/**
- * Director machinery.
- * The director is in charge of switching back and forth from
- * surrogate mode to "high-def simulation"/vanilla mode
- */
-
-// Functions that director should have access to
-typedef void (*switch_surrogate_f) (void); // Switches back and forth from surrogate mode as defined by network model (e.g, by dragonfly-dally.C)
-typedef bool (*is_surrogate_on_f) (void); // Switches back and forth from surrogate mode as defined by network model (e.g, by dragonfly-dally.C)
-
-struct director_data {
-    switch_surrogate_f  switch_surrogate; // this function switches the model to and from surrogate-mode on a PE basis. It has to be called on all PEs to switch the entire simulation to its surrogate version
-    is_surrogate_on_f   is_surrogate_on;  // determines if the model has switched or not
-};
-
-
-/**
- * Configuration specifics
- */
-
-// Switches back and forth from surrogate mode as defined by network model
-// (e.g, by dragonfly-dally.C)
-// Parameters: `data` corresponds to the lp sub-state, lp is the lp pointer, and the array of events in queue (to be processed)
-typedef void (*model_switch_f) (void * data, tw_lp * lp, tw_event **);
-typedef bool (*model_ask_if_freeze_f) (tw_lp * lp, tw_event * event); // Determines whether the event should be "frozen" or should be allowed to run during surrogate-mode
-
-struct lp_types_switch {
-    char lpname[MAX_NAME_LENGTH];
-    bool trigger_idle_modelnet;  // Trigger idle events for model-net (prevents a model to be stuck in a schedule loop if it is to process packets during surrogate-mode). If this is true and the lpname does not start with 'modelnet_', the behaviour is undefined
-    model_switch_f        highdef_to_surrogate;
-    model_switch_f        surrogate_to_highdef;
-    model_ask_if_freeze_f should_event_be_frozen;  // NULL means event from LP type shouldn't be frozen
-};
-
-struct surrogate_config {
-    struct director_data director;  //!< functionality needed by the director to switch back and forth from model-level surrogate-mode to (vanilla) high-definition simulation
-    int total_terminals;  //!< total number of terminals
-    size_t n_lp_types;
-    struct lp_types_switch lp_types[MAX_LP_TYPES];
-};
-
-/** Loads surrogate configuration, including packet latency predictor. */
-void surrogate_configure(
-        char const * const annotation,
-        struct surrogate_config * const config,
-        struct packet_latency_predictor ** pl_pred //!< pointer to save packet latency predictor generated by. Caller must free it
-);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* end of include guard */
diff --git a/codes/surrogate/init.h b/codes/surrogate/init.h
new file mode 100644
index 00000000..df260e87
--- /dev/null
+++ b/codes/surrogate/init.h
@@ -0,0 +1,57 @@
+#ifndef CODES_SURROGATE_INIT_H
+#define CODES_SURROGATE_INIT_H
+
+/**
+ * init.h -- Config/initialization point
+ * Elkin Cruz
+ *
+ * Copyright (c) 2023 Rensselaer Polytechnic Institute
+ */
+#include "codes/surrogate/packet-latency-predictor/common.h"
+#include "codes/surrogate/switch.h"
+
+// A simple macro to clarify code a bit
+#define PRINTF_ONCE(...) if (g_tw_mynode == 0) { fprintf(stderr, __VA_ARGS__); }
+
+// Basic level of debugging is 1. It should be always turned on
+// because it tells us when a switch to or from surrogate-mode happened.
+// It can be deactivated (set to 0) if it ends up being too obnoxious
+// Level 0: don't show anything
+// Level 1: show when surrogate-mode is activated and deactivated
+// Level 2: level 1 and some information at each GVT
+// Level 3: level 1 and show extended information at each GVT
+#define DEBUG_DIRECTOR 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Variable definitions
+ */
+
+void print_surrogate_stats(void);
+
+struct surrogate_config {
+    struct director_data director;  //!< functionality needed by the director to switch back and forth from model-level surrogate-mode to (vanilla) high-definition simulation
+    int total_terminals;  //!< total number of terminals
+    size_t n_lp_types;
+    struct lp_types_switch lp_types[MAX_LP_TYPES];
+};
+
+/** Loads surrogate configuration, including packet latency predictor. */
+void surrogate_configure(
+        char const * const annotation,
+        struct surrogate_config * const config,
+        struct packet_latency_predictor ** pl_pred //!< pointer to save packet latency predictor generated by. Caller must free it
+);
+
+extern struct surrogate_config surr_config;
+extern bool is_surrogate_configured;
+extern double surrogate_switching_time;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of include guard */
diff --git a/codes/surrogate/packet-latency-predictor/average.h b/codes/surrogate/packet-latency-predictor/average.h
new file mode 100644
index 00000000..5d3d0b17
--- /dev/null
+++ b/codes/surrogate/packet-latency-predictor/average.h
@@ -0,0 +1,26 @@
+#ifndef CODES_SURROGATE_LATENCY_PREDICTOR_AVERAGE_H
+#define CODES_SURROGATE_LATENCY_PREDICTOR_AVERAGE_H
+
+/**
+ * average.h -- implements a strategy to determine how long will it take for a
+ * packet to arrive at its destination based on averaging the time that takes
+ * to send packets from source to destination terminals
+ * -Elkin Cruz
+ *
+ * Copyright (c) 2023 Rensselaer Polytechnic Institute
+ */
+
+#include "codes/surrogate/packet-latency-predictor/common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern struct packet_latency_predictor average_latency_predictor;
+extern double ignore_until;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of include guard */
diff --git a/codes/surrogate/packet-latency-predictor/common.h b/codes/surrogate/packet-latency-predictor/common.h
new file mode 100644
index 00000000..4812a12a
--- /dev/null
+++ b/codes/surrogate/packet-latency-predictor/common.h
@@ -0,0 +1,59 @@
+#ifndef CODES_SURROGATE_LATENCY_PREDICTOR_COMMON_H
+#define CODES_SURROGATE_LATENCY_PREDICTOR_COMMON_H
+
+/**
+ * common.h -- common datatypes and functionality to all latency predictors
+ * -Elkin Cruz
+ *
+ * Copyright (c) 2023 Rensselaer Polytechnic Institute
+ */
+#include <ross.h>
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Terminal-to-terminal packet latency prediction machinery
+ */
+
+// Packet latencies
+struct packet_start {
+    uint64_t packet_ID;
+    tw_lpid dest_terminal_lpid;  // ROSS id; LPID for terminal
+    unsigned int dfdally_dest_terminal_id; // number in [0, total terminals)
+    double travel_start_time;
+    double workload_injection_time; // this is when the workload passed down the event to model-net
+    double processing_packet_delay;  // delay for this packet to be processed from previous packet in the queue
+    uint32_t packet_size;
+    bool is_there_another_pckt_in_queue; // is there another packet in queue
+    void * message_data;  // Yep, we have to save the entire message just because we might need to resend the message when switching to surrogate-mode. It's wasteful but there is no other way
+    void * remote_event_data;  // This and the one above have to be freed. This contains the extra information that the message contains
+};
+
+struct packet_end {
+    double travel_end_time;
+    double next_packet_delay;  // Delay to start processing next packet
+};
+
+// Definition of functions needed to define a predictor
+typedef void (*init_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id); // Initializes the predictor (eg, LSTM)
+typedef void (*feed_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *, struct packet_end const *); // Feeds known latency for packet sent at `now`
+typedef struct packet_end (*predict_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *); // Get prediction for packet sent to `destination` at `now`
+typedef void (*predict_pred_rc_f) (void * predictor_data, tw_lp * lp); // Reverse prediction (reverse state of predictor one prediction)
+
+// Each network model defines its own way to setup the packet latency predictor
+struct packet_latency_predictor {
+    init_pred_f        init;
+    feed_pred_f        feed;
+    predict_pred_f     predict;
+    predict_pred_rc_f  predict_rc;
+    size_t             predictor_data_sz; // `predictor_data` size
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of include guard */
diff --git a/codes/surrogate/switch.h b/codes/surrogate/switch.h
new file mode 100644
index 00000000..34991556
--- /dev/null
+++ b/codes/surrogate/switch.h
@@ -0,0 +1,63 @@
+#ifndef CODES_SURROGATE_SWITCH_H
+#define CODES_SURROGATE_SWITCH_H
+
+/**
+ * switch.h -- DIRECTOR FUNCTION in charge of switching back and forth from high-fidelity and surrogate modes
+ * Elkin Cruz
+ *
+ * Copyright (c) 2023 Rensselaer Polytechnic Institute
+ */
+
+#include <ross.h>
+#include <stdbool.h>
+#include "codes/codes_mapping.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// When true (below), the network state will be frozen at switch time (from
+// high-def to surrogate) and later reanimated on the switch back (from
+// surrogate to high-def). If not, all events will be kept in the network while
+// on surrogate mode, which means that the network will vacate completely
+extern bool freeze_network_on_switch;
+
+// Functions that director should have access to
+typedef void (*switch_surrogate_f) (void); // Switches back and forth from surrogate mode as defined by network model (e.g, by dragonfly-dally.C)
+typedef bool (*is_surrogate_on_f) (void); // Switches back and forth from surrogate mode as defined by network model (e.g, by dragonfly-dally.C)
+
+struct director_data {
+    switch_surrogate_f  switch_surrogate; // this function switches the model to and from surrogate-mode on a PE basis. It has to be called on all PEs to switch the entire simulation to its surrogate version
+    is_surrogate_on_f   is_surrogate_on;  // determines if the model has switched or not
+};
+
+
+// Switches back and forth from surrogate mode as defined by network model
+// (e.g, by dragonfly-dally.C)
+// Parameters: `data` corresponds to the lp sub-state, lp is the lp pointer, and the array of events in queue (to be processed)
+typedef void (*model_switch_f) (void * data, tw_lp * lp, tw_event **);
+typedef bool (*model_ask_if_freeze_f) (tw_lp * lp, tw_event * event); // Determines whether the event should be "frozen" or should be allowed to run during surrogate-mode
+
+struct lp_types_switch {
+    char lpname[MAX_NAME_LENGTH];
+    bool trigger_idle_modelnet;  // Trigger idle events for model-net (prevents a model to be stuck in a schedule loop if it is to process packets during surrogate-mode). If this is true and the lpname does not start with 'modelnet_', the behaviour is undefined
+    model_switch_f        highdef_to_surrogate;
+    model_switch_f        surrogate_to_highdef;
+    model_ask_if_freeze_f should_event_be_frozen;  // NULL means event from LP type shouldn't be frozen
+};
+
+static struct {
+    size_t current_i;
+    size_t total;
+    double * time_stampts; // list of precise timestamps at which to switch
+} switch_at;
+
+
+// Switch
+void director_switch(tw_pe * pe, tw_event_sig gvt_sig);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of include guard */
diff --git a/doc/example/tutorial-synthetic-ping-pong.c b/doc/example/tutorial-synthetic-ping-pong.c
index 8383c983..1aaf0528 100644
--- a/doc/example/tutorial-synthetic-ping-pong.c
+++ b/doc/example/tutorial-synthetic-ping-pong.c
@@ -6,7 +6,7 @@
 
 #include "codes/model-net.h"
 #include "codes/codes_mapping.h"
-#include "codes/surrogate.h"  // just needed for stats on surrogate-mode
+#include "codes/surrogate/init.h"  // just needed for stats on surrogate-mode
 
 
 static int net_id = 0;
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index bbb381ba..bd9c86da 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -54,7 +54,11 @@ list(APPEND SRCS
   	util/codes-comm.c
     util/rc-stack.c
     util/congestion-controller.C
-    util/surrogate.c
+
+    surrogate/init.c
+    surrogate/switch.c
+    surrogate/packet-latency-predictor/common.c
+    surrogate/packet-latency-predictor/average.c
 
     iokernellang/codesparser.h
     iokernellang/codesparser.c
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 117441ca..621506cb 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -21,7 +21,7 @@
 #include "codes/codes.h"
 #include "codes/model-net-method.h"
 #include "codes/model-net-lp.h"
-#include "codes/surrogate.h"
+#include "codes/surrogate/init.h"
 #include "codes/net/dragonfly-dally.h"
 #include "sys/file.h"
 #include "codes/quickhash.h"
diff --git a/src/surrogate/init.c b/src/surrogate/init.c
new file mode 100644
index 00000000..5b24d9cf
--- /dev/null
+++ b/src/surrogate/init.c
@@ -0,0 +1,127 @@
+#include <codes/surrogate/init.h>
+#include <codes/surrogate/packet-latency-predictor/average.h>
+
+bool freeze_network_on_switch = true;
+struct surrogate_config surr_config = {0};
+bool is_surrogate_configured = false;
+double surrogate_switching_time = 0.0;
+
+
+// === Stats!
+void print_surrogate_stats(void) {
+    if(is_surrogate_configured && g_tw_mynode == 0) {
+        printf("\nTotal time spent on switching from and to surrogate-mode: %.4f\n", (double) surrogate_switching_time / g_tw_clock_rate);
+    }
+}
+// === END OF Stats!
+
+
+// === All things Surrogate Configuration
+void surrogate_configure(
+        char const * const anno,
+        struct surrogate_config * const sc,
+        struct packet_latency_predictor ** pl_pred
+) {
+    assert(sc);
+    assert(0 < sc->n_lp_types && sc->n_lp_types <= MAX_LP_TYPES);
+    is_surrogate_configured = true;
+
+    // This is the only place where the director data should be loaded and set up
+    surr_config = *sc;
+
+    // Determining which director mode to set up
+    char director_mode[MAX_NAME_LENGTH];
+    director_mode[0] = '\0';
+    configuration_get_value(&config, "SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH);
+    if (strcmp(director_mode, "at-fixed-virtual-times") == 0) {
+        PRINTF_ONCE("\nSurrogate activated switching at fixed virtual times: ");
+
+        // Loading timestamps
+        char **timestamps;
+        size_t len;
+        configuration_get_multivalue(&config, "SURROGATE", "fixed_switch_timestamps", anno, &timestamps, &len);
+
+        switch_at.current_i = 0;
+        switch_at.total = len;
+        switch_at.time_stampts = malloc(len * sizeof(double));
+
+        for (size_t i = 0; i < len; i++) {
+            errno = 0;
+            switch_at.time_stampts[i] = strtod(timestamps[i], NULL);
+            if (errno == ERANGE || errno == EILSEQ){
+                tw_error(TW_LOC, "Sequence `%s' could not be succesfully interpreted as a _double_.", timestamps[i]);
+            }
+
+            PRINTF_ONCE("%g%s", switch_at.time_stampts[i], i == len-1 ? "" : ", ");
+        }
+        PRINTF_ONCE("\n");
+
+        // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT
+        g_tw_gvt_arbitrary_fun = director_switch;
+
+#ifdef USE_RAND_TIEBREAKER
+        tw_event_sig time_stamp = {0};
+        time_stamp.recv_ts = switch_at.time_stampts[0];
+        tw_trigger_arbitrary_fun_at(time_stamp);
+#else
+        tw_trigger_arbitrary_fun_at(switch_at.time_stampts[0]);
+#endif
+
+        // freeing timestamps before it dissapears
+        for (size_t i = 0; i < len; i++) {
+            free(timestamps[i]);
+        }
+        free(timestamps);
+    } else {
+        tw_error(TW_LOC, "Unknown director mode `%s`", director_mode);
+    }
+
+    // Determining which predictor to set up and return
+    char latency_pred_name[MAX_NAME_LENGTH];
+    latency_pred_name[0] = '\0';
+    configuration_get_value(&config, "SURROGATE", "packet_latency_predictor", anno, latency_pred_name, MAX_NAME_LENGTH);
+    if (*latency_pred_name) {
+        if (strcmp(latency_pred_name, "average") == 0) {
+            *pl_pred = &average_latency_predictor;
+
+            // Finding out whether to ignore some packet latencies
+            int rc = configuration_get_value_double(&config, "SURROGATE", "ignore_until", anno, &ignore_until);
+            if (rc) {
+                ignore_until = -1; // any negative number disables ignore_until, all packet latencies will be considered
+                PRINTF_ONCE("Enabling average packet latency predictor\n");
+            } else {
+                PRINTF_ONCE("Enabling average packet latency predictor with ignore_until=%g\n", ignore_until);
+            }
+        } else {
+            tw_error(TW_LOC, "Unknown predictor for packet latency `%s` (possibilities include: average)", latency_pred_name);
+        }
+    } else {
+        *pl_pred = &average_latency_predictor;
+        PRINTF_ONCE("Enabling average packet latency predictor (default behaviour)\n");
+    }
+
+    // Determining which predictor to set up and return
+    char network_treatment_name[MAX_NAME_LENGTH];
+    network_treatment_name[0] = '\0';
+    configuration_get_value(&config, "SURROGATE", "network_treatment_on_switch", anno, network_treatment_name, MAX_NAME_LENGTH);
+    if (*network_treatment_name) {
+        if (strcmp(network_treatment_name, "freeze") == 0) {
+            freeze_network_on_switch = true;
+            PRINTF_ONCE("The network will be frozen on switch to surrogate\n");
+        } else if (strcmp(network_treatment_name, "nothing") == 0) {
+            freeze_network_on_switch = false;
+            PRINTF_ONCE("The network will be left alone on switch to surrogate (it will run on the background until it empties by itself)\n");
+        } else {
+            tw_error(TW_LOC, "Unknown network treatment `%s` (possibilities include: frezee or nothing)", network_treatment_name);
+        }
+    } else {
+        freeze_network_on_switch = true;
+        PRINTF_ONCE("The network will be frozen on switch to surrogate (default behaviour)\n");
+    }
+
+    //surr_config.director.switch_surrogate();
+    if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
+        fprintf(stderr, "Simulation starting on %s mode\n", surr_config.director.is_surrogate_on() ? "surrogate" : "vanilla");
+    }
+}
+// === END OF All things Surrogate Configuration
diff --git a/src/surrogate/packet-latency-predictor/average.c b/src/surrogate/packet-latency-predictor/average.c
new file mode 100644
index 00000000..4bf3704c
--- /dev/null
+++ b/src/surrogate/packet-latency-predictor/average.c
@@ -0,0 +1,112 @@
+#include <codes/surrogate/init.h>
+#include <codes/surrogate/packet-latency-predictor/average.h>
+
+double ignore_until = 0;
+
+
+// === Average packet latency functionality
+//
+struct aggregated_latency_one_terminal {
+    double sum_latency;
+    unsigned int total_msgs;
+};
+
+struct latency_surrogate {
+    struct aggregated_latency_one_terminal aggregated_next_packet_delay;
+    struct aggregated_latency_one_terminal aggregated_latency_for_all;
+    unsigned int num_terminals;
+    struct aggregated_latency_one_terminal aggregated_latency[];
+};
+
+static void init_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal) {
+    (void) lp;
+    (void) src_terminal;
+    assert(data->num_terminals == 0);
+    assert(data->aggregated_latency_for_all.sum_latency == 0);
+    assert(data->aggregated_latency_for_all.total_msgs == 0);
+    assert(data->aggregated_latency[0].sum_latency == 0);
+    assert(data->aggregated_latency[0].total_msgs == 0);
+    assert(data->aggregated_next_packet_delay.total_msgs == 0);
+    assert(data->aggregated_next_packet_delay.sum_latency == 0);
+
+    data->num_terminals = surr_config.total_terminals;
+}
+
+static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * start, struct packet_end const * end) {
+    (void) lp;
+    (void) src_terminal;
+
+    if (start->travel_start_time < ignore_until) {
+        return;
+    }
+
+    unsigned int const dest_terminal = start->dfdally_dest_terminal_id;
+    double const latency = end->travel_end_time - start->travel_start_time;
+    assert(dest_terminal < data->num_terminals);
+    assert(end->travel_end_time > start->travel_start_time);
+
+    // For average latency per terminal
+    data->aggregated_latency[dest_terminal].sum_latency += latency;
+    data->aggregated_latency[dest_terminal].total_msgs++;
+
+    // For average total latency (used in case there is no data for a specific node)
+    data->aggregated_latency_for_all.sum_latency += latency;
+    data->aggregated_latency_for_all.total_msgs++;
+
+    // We ignore the delay if there are no more packets in the queue
+    if (start->is_there_another_pckt_in_queue) {
+        data->aggregated_next_packet_delay.sum_latency += end->next_packet_delay;
+        data->aggregated_next_packet_delay.total_msgs ++;
+    }
+}
+
+static struct packet_end predict_latency(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * packet_dest) {
+    (void) lp;
+
+    unsigned int const dest_terminal = packet_dest->dfdally_dest_terminal_id;
+    assert(dest_terminal < data->num_terminals);
+
+    unsigned int const total_total_datapoints = data->aggregated_latency_for_all.total_msgs;
+    if (total_total_datapoints == 0) {
+        // otherwise, we have no data to approximate the latency
+        tw_error(TW_LOC, "Terminal %u doesn't have any packet delay information available to predict future packet latency!\n", src_terminal);
+        return (struct packet_end) {
+            .travel_end_time = -1.0,
+            .next_packet_delay = -1.0,
+        };
+    }
+
+    // In case we have any data to determine the average for a specific terminal
+    unsigned int const total_datapoints_for_term = data->aggregated_latency[dest_terminal].total_msgs;
+    double latency = -1.0;
+    if (total_datapoints_for_term > 0) {
+        latency = data->aggregated_latency[dest_terminal].sum_latency / total_datapoints_for_term;
+    } else {
+        // If no information for that terminal exists, use average from all message
+        latency = data->aggregated_latency_for_all.sum_latency / total_total_datapoints;
+    }
+    assert(latency >= 0);
+
+    double const next_packet_delay =
+        data->aggregated_next_packet_delay.sum_latency / data->aggregated_next_packet_delay.total_msgs;
+    return (struct packet_end) {
+        .travel_end_time = packet_dest->travel_start_time + latency,
+        .next_packet_delay = next_packet_delay,
+    };
+}
+
+static void predict_latency_rc(struct latency_surrogate * data, tw_lp * lp) {
+    (void) data;
+    (void) lp;
+}
+
+
+struct packet_latency_predictor average_latency_predictor = {
+    .init              = (init_pred_f) init_pred,
+    .feed              = (feed_pred_f) feed_pred,
+    .predict           = (predict_pred_f) predict_latency,
+    .predict_rc        = (predict_pred_rc_f) predict_latency_rc,
+    .predictor_data_sz = sizeof(struct latency_surrogate) + 72 * sizeof(struct aggregated_latency_one_terminal)
+};
+//
+// === END OF Average packet latency functionality
diff --git a/src/surrogate/packet-latency-predictor/common.c b/src/surrogate/packet-latency-predictor/common.c
new file mode 100644
index 00000000..b78d2c31
--- /dev/null
+++ b/src/surrogate/packet-latency-predictor/common.c
@@ -0,0 +1 @@
+#include <codes/surrogate/packet-latency-predictor/common.h>
diff --git a/src/util/surrogate.c b/src/surrogate/switch.c
similarity index 63%
rename from src/util/surrogate.c
rename to src/surrogate/switch.c
index fd01cee1..098f0e49 100644
--- a/src/util/surrogate.c
+++ b/src/surrogate/switch.c
@@ -1,155 +1,11 @@
-/**
- * This entire file is in charge of switching a high-definition simulation
- * (a vanilla CODES simulation) into surrogate-mode where a secondary piece
- * of software (a surrogate, a collection of functions), and back.
- * For the switch to happen, we have to inspect some of the "hidden"
- * structure of PDES (ROSS) and thus the code in here relies on a very
- * specific version of ROSS. In a sense, we are abusing the non-documented
- * ABI of ROSS.
- */
-
-#include <assert.h>
-#include <codes/configuration.h>
-#include <codes/codes_mapping.h>
+#include <codes/surrogate/init.h>
+#include <codes/surrogate/switch.h>
 #include <codes/model-net-lp.h>
-#include <codes/surrogate.h>
-
-// A simple macro to clarify code a bit
-#define PRINTF_ONCE(...) if (g_tw_mynode == 0) { fprintf(stderr, __VA_ARGS__); }
-
-// Basic level of debugging is 1. It should be always turned on
-// because it tells us when a switch to or from surrogate-mode happened.
-// It can be deactivated (set to 0) if it ends up being too obnoxious
-// Level 0: don't show anything
-// Level 1: show when surrogate-mode is activated and deactivated
-// Level 2: level 1 and some information at each GVT
-// Level 3: level 1 and show extended information at each GVT
-#define DEBUG_DIRECTOR 1
-
-// Global variables
-bool freeze_network_on_switch = true;
-static bool is_surrogate_configured = false;
-static double surrogate_switching_time = 0.0;
-static double ignore_until = 0;
-static struct surrogate_config surr_config = {0};
-
-// === Average packet latency functionality
-//
-struct aggregated_latency_one_terminal {
-    double sum_latency;
-    unsigned int total_msgs;
-};
-
-struct latency_surrogate {
-    struct aggregated_latency_one_terminal aggregated_next_packet_delay;
-    struct aggregated_latency_one_terminal aggregated_latency_for_all;
-    unsigned int num_terminals;
-    struct aggregated_latency_one_terminal aggregated_latency[];
-};
-
-static void init_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal) {
-    (void) lp;
-    (void) src_terminal;
-    assert(data->num_terminals == 0);
-    assert(data->aggregated_latency_for_all.sum_latency == 0);
-    assert(data->aggregated_latency_for_all.total_msgs == 0);
-    assert(data->aggregated_latency[0].sum_latency == 0);
-    assert(data->aggregated_latency[0].total_msgs == 0);
-    assert(data->aggregated_next_packet_delay.total_msgs == 0);
-    assert(data->aggregated_next_packet_delay.sum_latency == 0);
-
-    data->num_terminals = surr_config.total_terminals;
-}
-
-static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * start, struct packet_end const * end) {
-    (void) lp;
-    (void) src_terminal;
-
-    if (start->travel_start_time < ignore_until) {
-        return;
-    }
-
-    unsigned int const dest_terminal = start->dfdally_dest_terminal_id;
-    double const latency = end->travel_end_time - start->travel_start_time;
-    assert(dest_terminal < data->num_terminals);
-    assert(end->travel_end_time > start->travel_start_time);
-
-    // For average latency per terminal
-    data->aggregated_latency[dest_terminal].sum_latency += latency;
-    data->aggregated_latency[dest_terminal].total_msgs++;
-
-    // For average total latency (used in case there is no data for a specific node)
-    data->aggregated_latency_for_all.sum_latency += latency;
-    data->aggregated_latency_for_all.total_msgs++;
-
-    // We ignore the delay if there are no more packets in the queue
-    if (start->is_there_another_pckt_in_queue) {
-        data->aggregated_next_packet_delay.sum_latency += end->next_packet_delay;
-        data->aggregated_next_packet_delay.total_msgs ++;
-    }
-}
-
-static struct packet_end predict_latency(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * packet_dest) {
-    (void) lp;
-
-    unsigned int const dest_terminal = packet_dest->dfdally_dest_terminal_id;
-    assert(dest_terminal < data->num_terminals);
-
-    unsigned int const total_total_datapoints = data->aggregated_latency_for_all.total_msgs;
-    if (total_total_datapoints == 0) {
-        // otherwise, we have no data to approximate the latency
-        tw_error(TW_LOC, "Terminal %u doesn't have any packet delay information available to predict future packet latency!\n", src_terminal);
-        return (struct packet_end) {
-            .travel_end_time = -1.0,
-            .next_packet_delay = -1.0,
-        };
-    }
-
-    // In case we have any data to determine the average for a specific terminal
-    unsigned int const total_datapoints_for_term = data->aggregated_latency[dest_terminal].total_msgs;
-    double latency = -1.0;
-    if (total_datapoints_for_term > 0) {
-        latency = data->aggregated_latency[dest_terminal].sum_latency / total_datapoints_for_term;
-    } else {
-        // If no information for that terminal exists, use average from all message
-        latency = data->aggregated_latency_for_all.sum_latency / total_total_datapoints;
-    }
-    assert(latency >= 0);
-
-    double const next_packet_delay =
-        data->aggregated_next_packet_delay.sum_latency / data->aggregated_next_packet_delay.total_msgs;
-    return (struct packet_end) {
-        .travel_end_time = packet_dest->travel_start_time + latency,
-        .next_packet_delay = next_packet_delay,
-    };
-}
-
-static void predict_latency_rc(struct latency_surrogate * data, tw_lp * lp) {
-    (void) data;
-    (void) lp;
-}
-
-
-struct packet_latency_predictor average_latency_predictor = {
-    .init              = (init_pred_f) init_pred,
-    .feed              = (feed_pred_f) feed_pred,
-    .predict           = (predict_pred_f) predict_latency,
-    .predict_rc        = (predict_pred_rc_f) predict_latency_rc,
-    .predictor_data_sz = sizeof(struct latency_surrogate) + 72 * sizeof(struct aggregated_latency_one_terminal)
-};
-//
-// === END OF Average packet latency functionality
 
 
 // === Director functionality
 //
 
-static struct {
-    size_t current_i;
-    size_t total;
-    double * time_stampts; // list of precise timestamps at which to switch
-} switch_at;
-
 
 //static void offset_future_events_in_causality_list(double switch_offset, tw_event_sig gvt) {
 //    (void) switch_offset;
@@ -526,10 +382,10 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_stime gvt) {
 
 
 #ifdef USE_RAND_TIEBREAKER
-static void director_fun(tw_pe * pe, tw_event_sig gvt_sig) {
+void director_switch(tw_pe * pe, tw_event_sig gvt_sig) {
     tw_stime const gvt = gvt_sig.recv_ts;
 #else
-static void director_fun(tw_pe * pe, tw_stime gvt) {
+void director_switch(tw_pe * pe, tw_stime gvt) {
 #endif
     assert(is_surrogate_configured);
 
@@ -656,123 +512,3 @@ static void director_fun(tw_pe * pe, tw_stime gvt) {
 }
 //
 // === END OF Director functionality
-
-
-// === All things Surrogate Configuration
-void surrogate_configure(
-        char const * const anno,
-        struct surrogate_config * const sc,
-        struct packet_latency_predictor ** pl_pred
-) {
-    assert(sc);
-    assert(0 < sc->n_lp_types && sc->n_lp_types <= MAX_LP_TYPES);
-    is_surrogate_configured = true;
-
-    // This is the only place where the director data should be loaded and set up
-    surr_config = *sc;
-
-    // Determining which director mode to set up
-    char director_mode[MAX_NAME_LENGTH];
-    director_mode[0] = '\0';
-    configuration_get_value(&config, "SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH);
-    if (strcmp(director_mode, "at-fixed-virtual-times") == 0) {
-        PRINTF_ONCE("\nSurrogate activated switching at fixed virtual times: ");
-
-        // Loading timestamps
-        char **timestamps;
-        size_t len;
-        configuration_get_multivalue(&config, "SURROGATE", "fixed_switch_timestamps", anno, &timestamps, &len);
-
-        switch_at.current_i = 0;
-        switch_at.total = len;
-        switch_at.time_stampts = malloc(len * sizeof(double));
-
-        for (size_t i = 0; i < len; i++) {
-            errno = 0;
-            switch_at.time_stampts[i] = strtod(timestamps[i], NULL);
-            if (errno == ERANGE || errno == EILSEQ){
-                tw_error(TW_LOC, "Sequence `%s' could not be succesfully interpreted as a _double_.", timestamps[i]);
-            }
-
-            PRINTF_ONCE("%g%s", switch_at.time_stampts[i], i == len-1 ? "" : ", ");
-        }
-        PRINTF_ONCE("\n");
-
-        // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT
-        g_tw_gvt_arbitrary_fun = director_fun;
-
-#ifdef USE_RAND_TIEBREAKER
-        tw_event_sig time_stamp = {0};
-        time_stamp.recv_ts = switch_at.time_stampts[0];
-        tw_trigger_arbitrary_fun_at(time_stamp);
-#else
-        tw_trigger_arbitrary_fun_at(switch_at.time_stampts[0]);
-#endif
-
-        // freeing timestamps before it dissapears
-        for (size_t i = 0; i < len; i++) {
-            free(timestamps[i]);
-        }
-        free(timestamps);
-    } else {
-        tw_error(TW_LOC, "Unknown director mode `%s`", director_mode);
-    }
-
-    // Determining which predictor to set up and return
-    char latency_pred_name[MAX_NAME_LENGTH];
-    latency_pred_name[0] = '\0';
-    configuration_get_value(&config, "SURROGATE", "packet_latency_predictor", anno, latency_pred_name, MAX_NAME_LENGTH);
-    if (*latency_pred_name) {
-        if (strcmp(latency_pred_name, "average") == 0) {
-            *pl_pred = &average_latency_predictor;
-
-            // Finding out whether to ignore some packet latencies
-            int rc = configuration_get_value_double(&config, "SURROGATE", "ignore_until", anno, &ignore_until);
-            if (rc) {
-                ignore_until = -1; // any negative number disables ignore_until, all packet latencies will be considered
-                PRINTF_ONCE("Enabling average packet latency predictor\n");
-            } else {
-                PRINTF_ONCE("Enabling average packet latency predictor with ignore_until=%g\n", ignore_until);
-            }
-        } else {
-            tw_error(TW_LOC, "Unknown predictor for packet latency `%s` (possibilities include: average)", latency_pred_name);
-        }
-    } else {
-        *pl_pred = &average_latency_predictor;
-        PRINTF_ONCE("Enabling average packet latency predictor (default behaviour)\n");
-    }
-
-    // Determining which predictor to set up and return
-    char network_treatment_name[MAX_NAME_LENGTH];
-    network_treatment_name[0] = '\0';
-    configuration_get_value(&config, "SURROGATE", "network_treatment_on_switch", anno, network_treatment_name, MAX_NAME_LENGTH);
-    if (*network_treatment_name) {
-        if (strcmp(network_treatment_name, "freeze") == 0) {
-            freeze_network_on_switch = true;
-            PRINTF_ONCE("The network will be frozen on switch to surrogate\n");
-        } else if (strcmp(network_treatment_name, "nothing") == 0) {
-            freeze_network_on_switch = false;
-            PRINTF_ONCE("The network will be left alone on switch to surrogate (it will run on the background until it empties by itself)\n");
-        } else {
-            tw_error(TW_LOC, "Unknown network treatment `%s` (possibilities include: frezee or nothing)", network_treatment_name);
-        }
-    } else {
-        freeze_network_on_switch = true;
-        PRINTF_ONCE("The network will be frozen on switch to surrogate (default behaviour)\n");
-    }
-
-    //surr_config.director.switch_surrogate();
-    if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
-        fprintf(stderr, "Simulation starting on %s mode\n", surr_config.director.is_surrogate_on() ? "surrogate" : "vanilla");
-    }
-}
-// === END OF All things Surrogate Configuration
-
-
-// === Stats!
-void print_surrogate_stats(void) {
-    if(is_surrogate_configured && g_tw_mynode == 0) {
-        printf("\nTotal time spent on switching from and to surrogate-mode: %.4f\n", (double) surrogate_switching_time / g_tw_clock_rate);
-    }
-}
-// === END OF Stats!

From a570ca0bc1aef3898a27fd97cb993d53e305d90f Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 22 Sep 2023 18:40:51 -0400
Subject: [PATCH 049/188] Connecting (Py)Torch model to packet latency
 surrogate

---
 .../packet-latency-predictor/torch-jit.h      |  19 +++
 src/CMakeLists.txt                            |   3 +
 src/surrogate/init.c                          |  18 ++-
 .../packet-latency-predictor/torch-jit.C      | 111 ++++++++++++++++++
 tests/example-ping-pong-surrogate-1.sh        |   1 +
 tests/example-ping-pong-surrogate-2.sh        |   1 +
 tests/example-ping-pong-surrogate-3.sh        |   1 +
 ...ample-ping-pong-surrogate-determinism-1.sh |   1 +
 ...ample-ping-pong-surrogate-determinism-2.sh |   1 +
 9 files changed, 155 insertions(+), 1 deletion(-)
 create mode 100644 codes/surrogate/packet-latency-predictor/torch-jit.h
 create mode 100644 src/surrogate/packet-latency-predictor/torch-jit.C

diff --git a/codes/surrogate/packet-latency-predictor/torch-jit.h b/codes/surrogate/packet-latency-predictor/torch-jit.h
new file mode 100644
index 00000000..80e532a5
--- /dev/null
+++ b/codes/surrogate/packet-latency-predictor/torch-jit.h
@@ -0,0 +1,19 @@
+#ifndef CODES_SURROGATE_TORCHJIT_H
+#define CODES_SURROGATE_TORCHJIT_H
+
+#include <ross.h>
+#include "codes/surrogate/init.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void surrogate_torch_init(char const * dir);
+
+extern struct packet_latency_predictor torch_latency_predictor;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of include guard */
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index bd9c86da..58b4850a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -2,6 +2,7 @@ cmake_print_variables(CMAKE_CURRENT_SOURCE_DIR)
 
 find_package(FLEX REQUIRED)
 find_package(BISON REQUIRED)
+find_package(Torch REQUIRED)
 
 flex_target(codes_lexer ${CMAKE_CURRENT_SOURCE_DIR}/modelconfig/configlex.l ${CMAKE_CURRENT_SOURCE_DIR}/modelconfig/configlex.c DEFINES_FILE ${CMAKE_CURRENT_SOURCE_DIR}/modelconfig/configlex.h)
 bison_target(codes_parser ${CMAKE_CURRENT_SOURCE_DIR}/modelconfig/configparser.y ${CMAKE_CURRENT_SOURCE_DIR}/modelconfig/configparser.c DEFINES_FILE ${CMAKE_CURRENT_SOURCE_DIR}/modelconfig/configparser.h)
@@ -59,6 +60,7 @@ list(APPEND SRCS
     surrogate/switch.c
     surrogate/packet-latency-predictor/common.c
     surrogate/packet-latency-predictor/average.c
+    surrogate/packet-latency-predictor/torch-jit.C
 
     iokernellang/codesparser.h
     iokernellang/codesparser.c
@@ -119,6 +121,7 @@ endif()
 add_library(codes STATIC ${SRCS})
 
 list(APPEND LIBS_TO_LINK ${MPI_C_LIBRARIES})
+list(APPEND LIBS_TO_LINK ${TORCH_LIBRARIES})
 target_include_directories(codes INTERFACE ${MPI_C_INCLUDE_PATH})
 
 # set(LIBS_TO_LINK
diff --git a/src/surrogate/init.c b/src/surrogate/init.c
index 5b24d9cf..cdc5b7c2 100644
--- a/src/surrogate/init.c
+++ b/src/surrogate/init.c
@@ -1,5 +1,6 @@
 #include <codes/surrogate/init.h>
 #include <codes/surrogate/packet-latency-predictor/average.h>
+#include <codes/surrogate/packet-latency-predictor/torch-jit.h>
 
 bool freeze_network_on_switch = true;
 struct surrogate_config surr_config = {0};
@@ -92,8 +93,23 @@ void surrogate_configure(
             } else {
                 PRINTF_ONCE("Enabling average packet latency predictor with ignore_until=%g\n", ignore_until);
             }
+        } else if (strcmp(latency_pred_name, "torch-jit") == 0) {
+            char torch_jit_mode[MAX_NAME_LENGTH];
+            torch_jit_mode[0] = '\0';
+            configuration_get_value(&config, "SURROGATE", "torch_jit_mode", anno, torch_jit_mode, MAX_NAME_LENGTH);
+            if (strcmp(torch_jit_mode, "single-static-model-for-all-terminals") != 0) {
+                tw_error(TW_LOC, "Unknown torch-jit mode `%s`", torch_jit_mode);
+            }
+
+            char torch_jit_model_path[MAX_NAME_LENGTH];
+            torch_jit_model_path[0] = '\0';
+            configuration_get_value(&config, "SURROGATE", "torch_jit_model_path", anno, torch_jit_model_path, MAX_NAME_LENGTH);
+            surrogate_torch_init(torch_jit_model_path);
+
+            *pl_pred = &torch_latency_predictor;
         } else {
-            tw_error(TW_LOC, "Unknown predictor for packet latency `%s` (possibilities include: average)", latency_pred_name);
+            tw_error(TW_LOC, "Unknown predictor for packet latency `%s` "
+                    "(possibilities include: average, torch-jit)", latency_pred_name);
         }
     } else {
         *pl_pred = &average_latency_predictor;
diff --git a/src/surrogate/packet-latency-predictor/torch-jit.C b/src/surrogate/packet-latency-predictor/torch-jit.C
new file mode 100644
index 00000000..e704bf4d
--- /dev/null
+++ b/src/surrogate/packet-latency-predictor/torch-jit.C
@@ -0,0 +1,111 @@
+#include <torch/csrc/jit/serialization/import.h>
+#include <torch/csrc/autograd/generated/variable_factories.h>
+#include <ATen/Parallel.h>
+
+#include <iostream>
+#include <memory>
+#include <vector>
+
+#include <codes/surrogate/packet-latency-predictor/torch-jit.h>
+
+static torch::jit::Module packet_latency_model;
+
+
+inline void assert_correct_dims(at::Tensor * t) {
+    int const dims = t->ndimension();
+
+    for (int i = 0; i < dims-1; i++) {
+        assert(at::size(*t, i) == 1);
+    } 
+    assert(at::size(*t, dims - 1) == 2);
+}
+
+
+void surrogate_torch_init(char const * dir) {
+    std::cout << "Loading Torch-JIT model\n";
+    try {
+        // Deserialize the ScriptModule from a file
+        packet_latency_model = torch::jit::load(dir);
+    }
+    catch (const c10::Error& e) {
+        std::cerr << "Error loading Torch-JIT model\n";
+        return;
+    }
+
+    // Configuring to run on a single thread
+    at::set_num_threads(1);
+
+    // === Checking consistency of model with dummy input
+    float data_input[] = {0.0, 0.0, 0.0, 0.0};
+    size_t const n_input = sizeof(data_input) / sizeof(float);
+
+    std::vector<torch::jit::IValue> inputs;
+    inputs.emplace_back(torch::from_blob(data_input, {1, (int) n_input}, at::kFloat));
+
+    // Predicting value
+    at::Tensor output = packet_latency_model.forward(inputs).toTensor();
+    assert_correct_dims(&output);
+    // === End of check
+    std::cout << "Torch-JIT model loaded successfully\n";
+}
+
+
+static struct packet_end surrogate_torch_predict(void *, tw_lp * lp, unsigned int src_terminal, struct packet_start const * packet_dest) {
+    //auto t_start = std::chrono::high_resolution_clock::now();
+
+    // Create a vector of inputs.
+    float data_input[] = {
+        src_terminal,
+        packet_dest->dfdally_dest_terminal_id,
+        packet_dest->packet_size,
+        packet_dest->is_there_another_pckt_in_queue
+    };
+    size_t n_input = sizeof(data_input) / sizeof(float);
+
+    std::vector<torch::jit::IValue> inputs;
+    inputs.emplace_back(torch::from_blob(data_input, {1, (int) n_input}, at::kFloat));
+
+    at::Tensor output = packet_latency_model.forward(inputs).toTensor();
+    //assert_correct_dims(&output);
+
+    auto *out_data = output.data_ptr<float>();
+    return (struct packet_end) {
+        .travel_end_time = packet_dest->travel_start_time + (out_data[0] > 0 ? out_data[0] : 10),
+        .next_packet_delay = out_data[1] > 0 ? out_data[1] : 200,
+    };
+
+    //auto t_end = std::chrono::high_resolution_clock::now();
+    //double total = std::chrono::duration<double, std::milli>(t_end-t_start).count();
+}
+
+
+// Dummies to use when no actual data is fed
+static void init_pred_dummy(void * data, tw_lp * lp, unsigned int src_terminal) {
+    (void) data;
+    (void) lp;
+    (void) src_terminal;
+}
+
+
+static void feed_pred_dummy(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * start, struct packet_end const * end) {
+    (void) data;
+    (void) lp;
+    (void) src_terminal;
+    (void) start;
+    (void) end;
+}
+
+
+static void predict_latency_rc_dummy(struct latency_surrogate * data, tw_lp * lp) {
+    (void) data;
+    (void) lp;
+}
+
+
+struct packet_latency_predictor torch_latency_predictor = {
+    .init              = (init_pred_f) init_pred_dummy,
+    .feed              = (feed_pred_f) feed_pred_dummy,
+    .predict           = (predict_pred_f) surrogate_torch_predict,
+    .predict_rc        = (predict_pred_rc_f) predict_latency_rc_dummy,
+    .predictor_data_sz = 0
+};
diff --git a/tests/example-ping-pong-surrogate-1.sh b/tests/example-ping-pong-surrogate-1.sh
index 4e1299e6..87b6e32c 100755
--- a/tests/example-ping-pong-surrogate-1.sh
+++ b/tests/example-ping-pong-surrogate-1.sh
@@ -11,6 +11,7 @@ fi
 export PACKET_SIZE=4096
 export CHUNK_SIZE=64
 export NETWORK_TREATMENT=freeze
+export PREDICTOR_TYPE=average
 export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate/
 export IGNORE_UNTIL=0.0
 export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
diff --git a/tests/example-ping-pong-surrogate-2.sh b/tests/example-ping-pong-surrogate-2.sh
index fb829993..33001c4a 100755
--- a/tests/example-ping-pong-surrogate-2.sh
+++ b/tests/example-ping-pong-surrogate-2.sh
@@ -11,6 +11,7 @@ fi
 export PACKET_SIZE=128
 export CHUNK_SIZE=64
 export NETWORK_TREATMENT=freeze
+export PREDICTOR_TYPE=average
 export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate/
 export IGNORE_UNTIL=0.0
 export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
diff --git a/tests/example-ping-pong-surrogate-3.sh b/tests/example-ping-pong-surrogate-3.sh
index 9c024e6e..dbcc09d0 100755
--- a/tests/example-ping-pong-surrogate-3.sh
+++ b/tests/example-ping-pong-surrogate-3.sh
@@ -11,6 +11,7 @@ fi
 export PACKET_SIZE=128
 export CHUNK_SIZE=64
 export NETWORK_TREATMENT=freeze
+export PREDICTOR_TYPE=average
 export PACKET_LATENCY_TRACE_PATH=packet-latency-freeze/
 export IGNORE_UNTIL=0.0
 export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
diff --git a/tests/example-ping-pong-surrogate-determinism-1.sh b/tests/example-ping-pong-surrogate-determinism-1.sh
index 283791aa..c87568a8 100755
--- a/tests/example-ping-pong-surrogate-determinism-1.sh
+++ b/tests/example-ping-pong-surrogate-determinism-1.sh
@@ -9,6 +9,7 @@ fi
 export PACKET_SIZE=1024
 export CHUNK_SIZE=1024
 export NETWORK_TREATMENT=nothing
+export PREDICTOR_TYPE=average
 export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-1/
 export IGNORE_UNTIL=0.0
 export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
diff --git a/tests/example-ping-pong-surrogate-determinism-2.sh b/tests/example-ping-pong-surrogate-determinism-2.sh
index 169ba8bf..6869907b 100755
--- a/tests/example-ping-pong-surrogate-determinism-2.sh
+++ b/tests/example-ping-pong-surrogate-determinism-2.sh
@@ -9,6 +9,7 @@ fi
 export PACKET_SIZE=1024
 export CHUNK_SIZE=1024
 export NETWORK_TREATMENT=freeze
+export PREDICTOR_TYPE=average
 export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-1/
 export IGNORE_UNTIL=0.0
 export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'

From 816ecbae3e083082e86ebe85c37ec22b533709f3 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 27 Sep 2023 18:28:47 -0400
Subject: [PATCH 050/188] Fixing bug on switch to and from surrogate failure

---
 codes/surrogate/switch.h                         | 6 ++++--
 doc/example/tutorial-ping-pong-surrogate.conf.in | 8 ++++++--
 src/surrogate/init.c                             | 3 +++
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/codes/surrogate/switch.h b/codes/surrogate/switch.h
index 34991556..2b396489 100644
--- a/codes/surrogate/switch.h
+++ b/codes/surrogate/switch.h
@@ -46,11 +46,13 @@ struct lp_types_switch {
     model_ask_if_freeze_f should_event_be_frozen;  // NULL means event from LP type shouldn't be frozen
 };
 
-static struct {
+struct switch_at_struct {
     size_t current_i;
     size_t total;
     double * time_stampts; // list of precise timestamps at which to switch
-} switch_at;
+};
+
+extern struct switch_at_struct switch_at;
 
 
 // Switch
diff --git a/doc/example/tutorial-ping-pong-surrogate.conf.in b/doc/example/tutorial-ping-pong-surrogate.conf.in
index 7afc569c..0797324b 100644
--- a/doc/example/tutorial-ping-pong-surrogate.conf.in
+++ b/doc/example/tutorial-ping-pong-surrogate.conf.in
@@ -68,11 +68,15 @@ SURROGATE {
    #fixed_switch_timestamps=( "1000e4", "8900e4" );  # the first switch happens at around 1000 ping messages, the second at approx. 9900 pings
    fixed_switch_timestamps=( ${SWITCH_TIMESTAMPS} );
 
-# latency predictor to use
-   packet_latency_predictor="average";
+# latency predictor to use. Options: average, torch-jit
+   packet_latency_predictor="${PREDICTOR_TYPE}";
 # some workload models need some time to stabilize, a point where the network behaviour stabilizes. The predictor will ignore all packet latencies that arrive during this period
    ignore_until="${IGNORE_UNTIL}";
 
+# parameters for torch-jit latency predictor
+   torch_jit_mode="single-static-model-for-all-terminals";
+   torch_jit_model_path="${TORCH_JIT_MODEL_PATH}";
+
 # selecting network treatment on switching to surrogate. Options: frezee, nothing
    network_treatment_on_switch="${NETWORK_TREATMENT}";
 }
diff --git a/src/surrogate/init.c b/src/surrogate/init.c
index cdc5b7c2..4c7ea1aa 100644
--- a/src/surrogate/init.c
+++ b/src/surrogate/init.c
@@ -1,4 +1,5 @@
 #include <codes/surrogate/init.h>
+#include <codes/surrogate/switch.h>
 #include <codes/surrogate/packet-latency-predictor/average.h>
 #include <codes/surrogate/packet-latency-predictor/torch-jit.h>
 
@@ -7,6 +8,8 @@ struct surrogate_config surr_config = {0};
 bool is_surrogate_configured = false;
 double surrogate_switching_time = 0.0;
 
+struct switch_at_struct switch_at;
+
 
 // === Stats!
 void print_surrogate_stats(void) {

From 5bf3c73c2f0c96266d009ad4aade860456401b74 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 28 Sep 2023 16:50:54 -0400
Subject: [PATCH 051/188] Adding extra check to surrogate tests (making them
 regression unit tests)

---
 tests/example-ping-pong-surrogate-1.sh             | 5 +++++
 tests/example-ping-pong-surrogate-2.sh             | 5 +++++
 tests/example-ping-pong-surrogate-3.sh             | 5 +++++
 tests/example-ping-pong-surrogate-determinism-1.sh | 5 +++++
 tests/example-ping-pong-surrogate-determinism-2.sh | 5 +++++
 5 files changed, 25 insertions(+)

diff --git a/tests/example-ping-pong-surrogate-1.sh b/tests/example-ping-pong-surrogate-1.sh
index 87b6e32c..7f3a5f6d 100755
--- a/tests/example-ping-pong-surrogate-1.sh
+++ b/tests/example-ping-pong-surrogate-1.sh
@@ -41,6 +41,11 @@ grep 'Net Events Processed' model-output-1.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 
+# Checking that the surrogate switched properly
+grep 'Switch completed' model-output-2.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
 # This checks for the number of events processed. If they are different, then
 # the simulation is not deterministic (so this should fail!). As always, just
 # a unit test
diff --git a/tests/example-ping-pong-surrogate-2.sh b/tests/example-ping-pong-surrogate-2.sh
index 33001c4a..f987bedf 100755
--- a/tests/example-ping-pong-surrogate-2.sh
+++ b/tests/example-ping-pong-surrogate-2.sh
@@ -41,6 +41,11 @@ grep 'Net Events Processed' model-output-1.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 
+# Checking that the surrogate switched properly
+grep 'Switch completed' model-output-2.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
 # This checks for the number of events processed. If they are different, then
 # the simulation is not deterministic (so this should fail!). As always, just
 # a unit test
diff --git a/tests/example-ping-pong-surrogate-3.sh b/tests/example-ping-pong-surrogate-3.sh
index dbcc09d0..19212e9e 100755
--- a/tests/example-ping-pong-surrogate-3.sh
+++ b/tests/example-ping-pong-surrogate-3.sh
@@ -42,6 +42,11 @@ grep 'Net Events Processed' model-output-1.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 
+# Checking that the surrogate switched properly
+grep 'Switch completed' model-output-1.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
 # This checks for the number of events processed. If they are different, then
 # the simulation is not deterministic (so this should fail!). As always, just
 # a unit test
diff --git a/tests/example-ping-pong-surrogate-determinism-1.sh b/tests/example-ping-pong-surrogate-determinism-1.sh
index c87568a8..cd219272 100755
--- a/tests/example-ping-pong-surrogate-determinism-1.sh
+++ b/tests/example-ping-pong-surrogate-determinism-1.sh
@@ -39,6 +39,11 @@ grep 'Net Events Processed' model-output-1.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 
+# Checking that the surrogate switched properly
+grep 'Switch completed' model-output-1.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
 # This checks for the number of events processed. If they are different, then
 # the simulation is not deterministic (so this should fail!). As always, just
 # a unit test
diff --git a/tests/example-ping-pong-surrogate-determinism-2.sh b/tests/example-ping-pong-surrogate-determinism-2.sh
index 6869907b..b86f0dfd 100755
--- a/tests/example-ping-pong-surrogate-determinism-2.sh
+++ b/tests/example-ping-pong-surrogate-determinism-2.sh
@@ -39,6 +39,11 @@ grep 'Net Events Processed' model-output-1.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 
+# Checking that the surrogate switched properly
+grep 'Switch completed' model-output-1.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
 # This checks for the number of events processed. If they are different, then
 # the simulation is not deterministic (so this should fail!). As always, just
 # a unit test

From 9e8c0e717798e8e24f9c94a50ee80819f66eb4ce Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 2 Oct 2023 15:59:25 -0400
Subject: [PATCH 052/188] Tracking time spent in surrogate mode

---
 codes/surrogate/init.h   |  1 -
 codes/surrogate/switch.h |  5 +++++
 src/surrogate/init.c     |  5 ++---
 src/surrogate/switch.c   | 21 ++++++++++++++++++++-
 4 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/codes/surrogate/init.h b/codes/surrogate/init.h
index df260e87..11ad5027 100644
--- a/codes/surrogate/init.h
+++ b/codes/surrogate/init.h
@@ -48,7 +48,6 @@ void surrogate_configure(
 
 extern struct surrogate_config surr_config;
 extern bool is_surrogate_configured;
-extern double surrogate_switching_time;
 
 #ifdef __cplusplus
 }
diff --git a/codes/surrogate/switch.h b/codes/surrogate/switch.h
index 2b396489..553f3a11 100644
--- a/codes/surrogate/switch.h
+++ b/codes/surrogate/switch.h
@@ -16,6 +16,11 @@
 extern "C" {
 #endif
 
+// Time spent switching from high-fidelity to surrogate and viceversa
+extern double surrogate_switching_time;
+// Total time spent in surrogate mode (between switches)
+extern double time_in_surrogate;
+
 // When true (below), the network state will be frozen at switch time (from
 // high-def to surrogate) and later reanimated on the switch back (from
 // surrogate to high-def). If not, all events will be kept in the network while
diff --git a/src/surrogate/init.c b/src/surrogate/init.c
index 4c7ea1aa..8bd8bbe1 100644
--- a/src/surrogate/init.c
+++ b/src/surrogate/init.c
@@ -6,15 +6,14 @@
 bool freeze_network_on_switch = true;
 struct surrogate_config surr_config = {0};
 bool is_surrogate_configured = false;
-double surrogate_switching_time = 0.0;
-
 struct switch_at_struct switch_at;
 
 
 // === Stats!
 void print_surrogate_stats(void) {
     if(is_surrogate_configured && g_tw_mynode == 0) {
-        printf("\nTotal time spent on switching from and to surrogate-mode: %.4f\n", (double) surrogate_switching_time / g_tw_clock_rate);
+        printf("\nTotal time spent on surrogate-mode: %.4f\n", (double) time_in_surrogate / g_tw_clock_rate);
+        printf("Total time spent on switching from and to surrogate-mode: %.4f\n", (double) surrogate_switching_time / g_tw_clock_rate);
     }
 }
 // === END OF Stats!
diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c
index 098f0e49..49a37f2c 100644
--- a/src/surrogate/switch.c
+++ b/src/surrogate/switch.c
@@ -2,6 +2,9 @@
 #include <codes/surrogate/switch.h>
 #include <codes/model-net-lp.h>
 
+double surrogate_switching_time = 0.0;
+double time_in_surrogate = 0.0;
+static double surrogate_time_last = 0.0;
 
 // === Director functionality
 //
@@ -422,6 +425,10 @@ void director_switch(tw_pe * pe, tw_stime gvt) {
 
     // Do not process if the simulation ended
     if (gvt >= g_tw_ts_end) {
+        // If the simulation ended and the surrogate is still on, stop timer checking surrogate time
+        if (surr_config.director.is_surrogate_on()) {
+            time_in_surrogate += tw_clock_read() - surrogate_time_last;
+        }
         return;
     }
 
@@ -439,6 +446,8 @@ void director_switch(tw_pe * pe, tw_stime gvt) {
         return;
     }
 
+    // ---- Past this means that we are in fact switching ----
+
     double const start = tw_clock_read();
     // Asking the director/model to switch
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
@@ -508,7 +517,17 @@ void director_switch(tw_pe * pe, tw_stime gvt) {
     if (DEBUG_DIRECTOR > 1) {
         printf("PE %lu: Switch completed!\n", g_tw_mynode);
     }
-    surrogate_switching_time += tw_clock_read() - start;
+    double const end = tw_clock_read();
+    surrogate_switching_time += end - start;
+
+    // Determining time in surrogate
+    if (surr_config.director.is_surrogate_on()) {
+        // Start tracking time spent in surrogate mode
+        surrogate_time_last = end;
+    } else {
+        // We are done tracking time spent in surrogate mode
+        time_in_surrogate += start - surrogate_time_last;
+    }
 }
 //
 // === END OF Director functionality

From 9794a3200aa6dc5f659010fcd60e02314290b060 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 5 Oct 2023 16:46:01 -0400
Subject: [PATCH 053/188] Disabling Torch-JIT if it is not found in the system

---
 CMakeLists.txt       |  8 ++++++++
 src/CMakeLists.txt   |  8 +++++---
 src/surrogate/init.c | 13 ++++++++++++-
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2f8cb97a..048cb9d0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -119,6 +119,14 @@ endif()
 #         set(USE_DAMARIS true)
 # endif()
 
+## TORCH loading ML models
+find_package(Torch)
+if(Torch_FOUND)
+    add_definitions(-DUSE_TORCH)
+    set(USE_TORCH true)
+endif()
+
+
 cmake_print_variables(CMAKE_C_FLAGS)
 add_subdirectory(src)
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 58b4850a..d82c2584 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -2,7 +2,6 @@ cmake_print_variables(CMAKE_CURRENT_SOURCE_DIR)
 
 find_package(FLEX REQUIRED)
 find_package(BISON REQUIRED)
-find_package(Torch REQUIRED)
 
 flex_target(codes_lexer ${CMAKE_CURRENT_SOURCE_DIR}/modelconfig/configlex.l ${CMAKE_CURRENT_SOURCE_DIR}/modelconfig/configlex.c DEFINES_FILE ${CMAKE_CURRENT_SOURCE_DIR}/modelconfig/configlex.h)
 bison_target(codes_parser ${CMAKE_CURRENT_SOURCE_DIR}/modelconfig/configparser.y ${CMAKE_CURRENT_SOURCE_DIR}/modelconfig/configparser.c DEFINES_FILE ${CMAKE_CURRENT_SOURCE_DIR}/modelconfig/configparser.h)
@@ -60,7 +59,6 @@ list(APPEND SRCS
     surrogate/switch.c
     surrogate/packet-latency-predictor/common.c
     surrogate/packet-latency-predictor/average.c
-    surrogate/packet-latency-predictor/torch-jit.C
 
     iokernellang/codesparser.h
     iokernellang/codesparser.c
@@ -118,10 +116,14 @@ endif()
 #     list(APPEND SRCS workload/methods/codes-darshan3-io-wrkld.c)
 # endif()
 
+if(USE_TORCH)
+    list(APPEND SRCS surrogate/packet-latency-predictor/torch-jit.C)
+    list(APPEND LIBS_TO_LINK ${TORCH_LIBRARIES})
+endif()
+
 add_library(codes STATIC ${SRCS})
 
 list(APPEND LIBS_TO_LINK ${MPI_C_LIBRARIES})
-list(APPEND LIBS_TO_LINK ${TORCH_LIBRARIES})
 target_include_directories(codes INTERFACE ${MPI_C_INCLUDE_PATH})
 
 # set(LIBS_TO_LINK
diff --git a/src/surrogate/init.c b/src/surrogate/init.c
index 8bd8bbe1..23b0dff9 100644
--- a/src/surrogate/init.c
+++ b/src/surrogate/init.c
@@ -1,7 +1,10 @@
 #include <codes/surrogate/init.h>
 #include <codes/surrogate/switch.h>
 #include <codes/surrogate/packet-latency-predictor/average.h>
+
+#ifdef USE_TORCH
 #include <codes/surrogate/packet-latency-predictor/torch-jit.h>
+#endif
 
 bool freeze_network_on_switch = true;
 struct surrogate_config surr_config = {0};
@@ -95,6 +98,8 @@ void surrogate_configure(
             } else {
                 PRINTF_ONCE("Enabling average packet latency predictor with ignore_until=%g\n", ignore_until);
             }
+
+#ifdef USE_TORCH
         } else if (strcmp(latency_pred_name, "torch-jit") == 0) {
             char torch_jit_mode[MAX_NAME_LENGTH];
             torch_jit_mode[0] = '\0';
@@ -109,9 +114,15 @@ void surrogate_configure(
             surrogate_torch_init(torch_jit_model_path);
 
             *pl_pred = &torch_latency_predictor;
+#endif
+
         } else {
             tw_error(TW_LOC, "Unknown predictor for packet latency `%s` "
-                    "(possibilities include: average, torch-jit)", latency_pred_name);
+                    "(possibilities include: average"
+#ifdef USE_TORCH
+                    ", torch-jit"
+#endif
+                    ")", latency_pred_name);
         }
     } else {
         *pl_pred = &average_latency_predictor;

From 8848b554e51f69c0b73f6cfa81d9d0b4b8cad8da Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 11 Oct 2023 05:13:48 -0400
Subject: [PATCH 054/188] Updating torch-jit predictor to feed model an integer

---
 .../packet-latency-predictor/torch-jit.C      | 25 +++++++++++++------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/surrogate/packet-latency-predictor/torch-jit.C b/src/surrogate/packet-latency-predictor/torch-jit.C
index e704bf4d..e2c1384c 100644
--- a/src/surrogate/packet-latency-predictor/torch-jit.C
+++ b/src/surrogate/packet-latency-predictor/torch-jit.C
@@ -1,5 +1,6 @@
 #include <torch/csrc/jit/serialization/import.h>
 #include <torch/csrc/autograd/generated/variable_factories.h>
+#include <torch/csrc/api/include/torch/utils.h>
 #include <ATen/Parallel.h>
 
 #include <iostream>
@@ -7,6 +8,7 @@
 #include <vector>
 
 #include <codes/surrogate/packet-latency-predictor/torch-jit.h>
+#include <ross.h>
 
 static torch::jit::Module packet_latency_model;
 
@@ -28,19 +30,26 @@ void surrogate_torch_init(char const * dir) {
         packet_latency_model = torch::jit::load(dir);
     }
     catch (const c10::Error& e) {
-        std::cerr << "Error loading Torch-JIT model\n";
-        return;
+        tw_error(TW_LOC, "Error loading Torch-JIT model");
     }
 
     // Configuring to run on a single thread
     at::set_num_threads(1);
 
     // === Checking consistency of model with dummy input
-    float data_input[] = {0.0, 0.0, 0.0, 0.0};
-    size_t const n_input = sizeof(data_input) / sizeof(float);
+    if (packet_latency_model.is_training()) {
+        std::cerr << "The Torch-JIT model was saved before running .eval(). "
+            "The output from the model will be as if it was in training mode, "
+            "meaning, it might be faulty."
+            << std::endl;
+    }
+
+    long int data_input[] = {0, 0, 0, 0};
+    size_t const n_input = sizeof(data_input) / sizeof(long int);
 
     std::vector<torch::jit::IValue> inputs;
-    inputs.emplace_back(torch::from_blob(data_input, {1, (int) n_input}, at::kFloat));
+    torch::NoGradGuard no_grad;
+    inputs.emplace_back(torch::from_blob(data_input, {1, (int) n_input}, at::kLong));
 
     // Predicting value
     at::Tensor output = packet_latency_model.forward(inputs).toTensor();
@@ -54,16 +63,16 @@ static struct packet_end surrogate_torch_predict(void *, tw_lp * lp, unsigned in
     //auto t_start = std::chrono::high_resolution_clock::now();
 
     // Create a vector of inputs.
-    float data_input[] = {
+    long int data_input[] = {
         src_terminal,
         packet_dest->dfdally_dest_terminal_id,
         packet_dest->packet_size,
         packet_dest->is_there_another_pckt_in_queue
     };
-    size_t n_input = sizeof(data_input) / sizeof(float);
+    size_t n_input = sizeof(data_input) / sizeof(long int);
 
     std::vector<torch::jit::IValue> inputs;
-    inputs.emplace_back(torch::from_blob(data_input, {1, (int) n_input}, at::kFloat));
+    inputs.emplace_back(torch::from_blob(data_input, {1, (int) n_input}, at::kLong));
 
     at::Tensor output = packet_latency_model.forward(inputs).toTensor();
     //assert_correct_dims(&output);

From 50961f32eeeee874a751cde399464ae474e11072 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sun, 15 Oct 2023 18:23:39 -0400
Subject: [PATCH 055/188] Fixiing average predictor table size

It was assumed that the maximum number of terminal nodes was 72. That is
fixed now
---
 .../packet-latency-predictor/average.h        |  3 ++-
 src/surrogate/init.c                          | 27 ++++++++++---------
 .../packet-latency-predictor/average.c        |  8 +++---
 3 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/codes/surrogate/packet-latency-predictor/average.h b/codes/surrogate/packet-latency-predictor/average.h
index 5d3d0b17..f793bfa3 100644
--- a/codes/surrogate/packet-latency-predictor/average.h
+++ b/codes/surrogate/packet-latency-predictor/average.h
@@ -16,9 +16,10 @@
 extern "C" {
 #endif
 
-extern struct packet_latency_predictor average_latency_predictor;
 extern double ignore_until;
 
+struct packet_latency_predictor average_latency_predictor(int num_terminals);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/surrogate/init.c b/src/surrogate/init.c
index 23b0dff9..50b90801 100644
--- a/src/surrogate/init.c
+++ b/src/surrogate/init.c
@@ -10,6 +10,7 @@ bool freeze_network_on_switch = true;
 struct surrogate_config surr_config = {0};
 bool is_surrogate_configured = false;
 struct switch_at_struct switch_at;
+struct packet_latency_predictor current_predictor = {0};
 
 
 // === Stats!
@@ -88,16 +89,8 @@ void surrogate_configure(
     configuration_get_value(&config, "SURROGATE", "packet_latency_predictor", anno, latency_pred_name, MAX_NAME_LENGTH);
     if (*latency_pred_name) {
         if (strcmp(latency_pred_name, "average") == 0) {
-            *pl_pred = &average_latency_predictor;
-
-            // Finding out whether to ignore some packet latencies
-            int rc = configuration_get_value_double(&config, "SURROGATE", "ignore_until", anno, &ignore_until);
-            if (rc) {
-                ignore_until = -1; // any negative number disables ignore_until, all packet latencies will be considered
-                PRINTF_ONCE("Enabling average packet latency predictor\n");
-            } else {
-                PRINTF_ONCE("Enabling average packet latency predictor with ignore_until=%g\n", ignore_until);
-            }
+            current_predictor = average_latency_predictor(surr_config.total_terminals);
+            *pl_pred = &current_predictor;
 
 #ifdef USE_TORCH
         } else if (strcmp(latency_pred_name, "torch-jit") == 0) {
@@ -125,10 +118,20 @@ void surrogate_configure(
                     ")", latency_pred_name);
         }
     } else {
-        *pl_pred = &average_latency_predictor;
+        current_predictor = average_latency_predictor(surr_config.total_terminals);
+        *pl_pred = &current_predictor;
         PRINTF_ONCE("Enabling average packet latency predictor (default behaviour)\n");
     }
 
+    // Finding out whether to ignore some packet latencies
+    int rc = configuration_get_value_double(&config, "SURROGATE", "ignore_until", anno, &ignore_until);
+    if (rc) {
+        ignore_until = -1; // any negative number disables ignore_until, all packet latencies will be considered
+        PRINTF_ONCE("`ignore_until` disabled (all packet latencies will be used in training the predictor)\n");
+    } else {
+        PRINTF_ONCE("ignore_until=%g a packet delievered before this time stamp will not be used in training any predictor\n", ignore_until);
+    }
+
     // Determining which predictor to set up and return
     char network_treatment_name[MAX_NAME_LENGTH];
     network_treatment_name[0] = '\0';
@@ -150,7 +153,7 @@ void surrogate_configure(
 
     //surr_config.director.switch_surrogate();
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
-        fprintf(stderr, "Simulation starting on %s mode\n", surr_config.director.is_surrogate_on() ? "surrogate" : "vanilla");
+        fprintf(stderr, "Simulation starting on %s mode\n", surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity");
     }
 }
 // === END OF All things Surrogate Configuration
diff --git a/src/surrogate/packet-latency-predictor/average.c b/src/surrogate/packet-latency-predictor/average.c
index 4bf3704c..23e93f32 100644
--- a/src/surrogate/packet-latency-predictor/average.c
+++ b/src/surrogate/packet-latency-predictor/average.c
@@ -101,12 +101,14 @@ static void predict_latency_rc(struct latency_surrogate * data, tw_lp * lp) {
 }
 
 
-struct packet_latency_predictor average_latency_predictor = {
+struct packet_latency_predictor average_latency_predictor(int num_terminals) {
+    return (struct packet_latency_predictor) {
     .init              = (init_pred_f) init_pred,
     .feed              = (feed_pred_f) feed_pred,
     .predict           = (predict_pred_f) predict_latency,
     .predict_rc        = (predict_pred_rc_f) predict_latency_rc,
-    .predictor_data_sz = sizeof(struct latency_surrogate) + 72 * sizeof(struct aggregated_latency_one_terminal)
-};
+    .predictor_data_sz = sizeof(struct latency_surrogate) + num_terminals * sizeof(struct aggregated_latency_one_terminal)
+    };
+}
 //
 // === END OF Average packet latency functionality

From 628907dd0f4f7a8c96f5cb995ed554c898365e28 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 17 Oct 2023 13:02:39 -0400
Subject: [PATCH 056/188] Tracking packets not chunks to determine if message
 has been completed

The difference makes the zombie code work even when the surrogate time
is tiny in comparison
---
 codes/model-net-lp.h                       | 10 +++
 codes/net/dragonfly-dally.h                |  1 -
 src/networks/model-net/core/model-net-lp.c | 20 ++++++
 src/networks/model-net/dragonfly-dally.C   | 74 ++++++++++++----------
 4 files changed, 69 insertions(+), 36 deletions(-)

diff --git a/codes/model-net-lp.h b/codes/model-net-lp.h
index 2f02ee4f..a7585ce4 100644
--- a/codes/model-net-lp.h
+++ b/codes/model-net-lp.h
@@ -60,6 +60,16 @@ tw_event * model_net_method_event_new(
         void **msg_data,
         void **extra_data);
 
+// Same as `model_net_method_event_new` extended to use user priorities to enforce ordering of some simulatenous events (USE WITH CARE!!)
+tw_event * model_net_method_event_new_user_prio(
+        tw_lpid dest_gid,
+        tw_stime offset_ts,
+        tw_lp *sender,
+        int net_id,
+        void **msg_data,
+        void **extra_data,
+        tw_stime prio);
+
 // Construct a model-net-specific event, similar to model_net_method_event_new.
 // The primary differences are:
 // - the event gets sent to final_dest_lp and put on it's receiver queue
diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index 656d99d8..b5d93b88 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -127,7 +127,6 @@ struct terminal_dally_message
    tw_stime saved_last_in_queue_time;
    tw_stime saved_next_packet_delay;
    tw_stime msg_new_mn_event;
-   uint64_t saved_remaining_packet_chunks;
 
    //Yao: for counting msg app id
    tw_stime last_received_time;
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index eee8b3f8..e1b2e4e7 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -932,6 +932,26 @@ void handle_sched_next_rc(
 
 /**** END IMPLEMENTATIONS ****/
 
+tw_event * model_net_method_event_new_user_prio(
+        tw_lpid dest_gid,
+        tw_stime offset_ts,
+        tw_lp *sender,
+        int net_id,
+        void **msg_data,
+        void **extra_data,
+        tw_stime prio){
+    tw_event *e = tw_event_new_user_prio(dest_gid, offset_ts, sender, prio);
+    model_net_wrap_msg *m_wrap = tw_event_data(e);
+    msg_set_header(model_net_base_magic, MN_BASE_PASS, sender->gid,
+            &m_wrap->h);
+    *msg_data = ((char*)m_wrap)+msg_offsets[net_id];
+    // extra_data is optional
+    if (extra_data != NULL){
+        *extra_data = m_wrap + 1;
+    }
+    return e;
+}
+
 tw_event * model_net_method_event_new(
         tw_lpid dest_gid,
         tw_stime offset_ts,
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 621506cb..72430f95 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -239,6 +239,7 @@ struct dragonfly_param
     int global_vc_size; /* buffer size of the global channels */
     int cn_vc_size; /* buffer size of the compute node channels */
     int chunk_size; /* full-sized packets are broken into smaller chunks.*/
+    int packet_size; /* maximum size of a packet, although we have no control over it. It is model-net who is in charge of generating packets of at most this size */
     int global_k_picks; /* k number of connections to select from when doing local adaptive routing */
     int adaptive_threshold; 
     int rail_select; // method by which rails are selected
@@ -315,6 +316,7 @@ struct dfly_qhash_entry
     struct dfly_hash_key key;
     char * remote_event_data;
     int num_chunks;
+    int remaining_packets;
     int remote_event_size;
     struct qhash_head hash_link;
 };
@@ -1731,6 +1733,13 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
             fprintf(stderr, "Chunk size for packets is specified, setting to %d\n", p->chunk_size);
     }
 
+    rc = configuration_get_value_int(&config, "PARAMS", "packet_size", anno, &p->packet_size);
+    if(rc) {
+        p->chunk_size = 512;
+        if(!myRank)
+            fprintf(stderr, "Packet size not specificied, it is assumed to be %d\n", p->packet_size);
+    }
+
     rc = configuration_get_value_double(&config, "PARAMS", "local_bandwidth", anno, &p->local_bandwidth);
     if(rc) {
         p->local_bandwidth = 5.25;
@@ -4658,7 +4667,9 @@ static void notify_dest_lp_of(
     }
 
     terminal_dally_message * new_msg;
-    tw_event *e = model_net_method_event_new(msg->dest_terminal_lpid, offset, lp, DRAGONFLY_DALLY, (void**)&new_msg, NULL);
+    // Lower value in priority means that it will be processed first
+    // This event will be processed before any predicted packet arrives (even if scheduled at the same timestamp)
+    tw_event *e = model_net_method_event_new_user_prio(msg->dest_terminal_lpid, offset, lp, DRAGONFLY_DALLY, (void**)&new_msg, NULL, 0.5);
 
     memcpy(new_msg, msg, sizeof(terminal_dally_message)); // Just making sure that if the simulation breaks because we didn't set some value below, it breaks in a spectacular manner (~0 can be -1)
     assert(new_msg->dfdally_src_terminal_id == s->terminal_id);
@@ -4811,7 +4822,7 @@ static void packet_arrive_predicted_rc(terminal_state * s, tw_bf * bf, terminal_
     }
 
     if(bf->c6) {
-        tmp->num_chunks -= msg->saved_remaining_packet_chunks;
+        tmp->remaining_packets++;
     }
 
     if(bf->c5) {
@@ -4836,28 +4847,6 @@ static void packet_arrive_predicted(terminal_state * s, tw_bf * bf, terminal_dal
     //record for commit_f file IO
     msg->travel_end_time = tw_now(lp);
 
-    // packets arrive as one event not as multiple events (ie, predicted packets are not broken into chunks)
-    struct packet_id const packet_key = {
-        .packet_ID = msg->packet_ID,
-        .dfdally_src_terminal_id = msg->dfdally_src_terminal_id
-    };
-    bool const has_remaining_sz = s->remaining_sz_packets.count(packet_key) == 1;
-
-    // Finding out how many bytes are left to receive for this packet
-    int remaining_sz = 0;
-    if (has_remaining_sz) {
-        remaining_sz = s->remaining_sz_packets[packet_key];
-    } else {
-        remaining_sz = msg->packet_size;
-    }
-
-    uint64_t const chunk_size = s->params->chunk_size;
-    uint64_t remaining_packet_chunks = remaining_sz / chunk_size + (remaining_sz % chunk_size ? 1 : 0);
-    uint64_t total_chunks = msg->total_size / chunk_size + (msg->total_size % chunk_size ? 1 : 0);
-    if (remaining_packet_chunks == 0) { remaining_packet_chunks = 1; }
-    if (total_chunks == 0) { total_chunks = 1; }
-    msg->saved_remaining_packet_chunks = remaining_packet_chunks;
-
     // The table has to have been initialized already, if not, what the heck!
     struct dfly_hash_key key = {
         .message_id = msg->message_id,
@@ -4873,13 +4862,17 @@ static void packet_arrive_predicted(terminal_state * s, tw_bf * bf, terminal_dal
     // We create an entry into the hash only if it makes sense to do so (ie, only when the message needs multiple packets to be completed)
     } else if (msg->total_size > msg->packet_size) {
         bf->c5 = 1;
-        assert(remaining_sz == msg->packet_size);
+
+        uint64_t const packet_size = s->params->packet_size;
+        uint64_t total_packets = msg->total_size / packet_size + (msg->total_size % packet_size ? 1 : 0);
+        if (total_packets == 0) { total_packets = 1; }
 
         struct dfly_qhash_entry * const d_entry = (dfly_qhash_entry *) calloc(1, sizeof (struct dfly_qhash_entry));
         d_entry->num_chunks = 0;
         d_entry->key = key;
         d_entry->remote_event_data = NULL;
         d_entry->remote_event_size = 0;
+        d_entry->remaining_packets = total_packets;
         qhash_add(s->rank_tbl, &key, &(d_entry->hash_link));
         s->rank_tbl_pop++;
 
@@ -4895,10 +4888,10 @@ static void packet_arrive_predicted(terminal_state * s, tw_bf * bf, terminal_dal
         assert(msg->total_size == msg->packet_size);
     }
 
-    // Increasing the number of chunks received
+    // Decreasing the number of remaining packets
     if (tmp) {
         bf->c6 = 1;
-        tmp->num_chunks += remaining_packet_chunks;
+        tmp->remaining_packets--;
 
         /* retrieve the event data, all chunks from the same packet carry the `remote_event_data` */
         if(msg->remote_event_size_bytes > 0 && !tmp->remote_event_data)
@@ -4914,8 +4907,7 @@ static void packet_arrive_predicted(terminal_state * s, tw_bf * bf, terminal_dal
         }
     }
 
-    bool const is_msg_completed = tmp ? tmp->num_chunks >= total_chunks : true;
-    assert(tmp || total_chunks == remaining_packet_chunks);
+    bool const is_msg_completed = tmp ? tmp->remaining_packets == 0 : true;
 
     if(is_msg_completed) {
         bf->c7 = 1;
@@ -5061,6 +5053,10 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess
     assert(tmp);
     tmp->num_chunks--;
 
+    if (bf->c13) {
+        tmp->remaining_packets++;
+    }
+
     if(bf->c5)
     {
         qhash_del(hash_link);
@@ -5238,12 +5234,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
 
     // Zombies don't generate delay notifications, and they don't modify the state of `s->rank_tbl` (`packet_arrive_predicted` should have removed the msg entry already)
     if (is_zombie) {
-        struct dfly_hash_key key = {
-            .message_id = msg->message_id,
-            .sender_id = msg->sender_lp,
-        };
         //printf("We got a zombie! LPID=%d  packet_ID = %d  dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id);
-
         if (is_packet_completed) {
             s->zombies.erase(packet_key);
             bf->c14 = 1;
@@ -5287,11 +5278,16 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
     if(!tmp)
     {
         bf->c5 = 1;
+        uint64_t const packet_size = s->params->packet_size;
+        uint64_t total_packets = msg->total_size / packet_size + (msg->total_size % packet_size ? 1 : 0);
+        if (total_packets == 0) { total_packets = 1; }
+
         struct dfly_qhash_entry * d_entry = (dfly_qhash_entry *)calloc(1, sizeof (struct dfly_qhash_entry));
         d_entry->num_chunks = 0;
         d_entry->key = key;
         d_entry->remote_event_data = NULL;
         d_entry->remote_event_size = 0;
+        d_entry->remaining_packets = total_packets;
         qhash_add(s->rank_tbl, &key, &(d_entry->hash_link));
         s->rank_tbl_pop++;
                 
@@ -5321,6 +5317,10 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
 
     // if the packet is complete (ie, this `msg` is the last piece of the packet)
     if (is_packet_completed) {
+        bf->c13 = 1;
+
+        tmp->remaining_packets--;
+
         //printf("Good day sir, not a zombie! LPID=%d  packet_ID = %d  dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id);
         if (packet_latency_f || surrogate_configured) {
             notify_src_lp_on_total_latency(lp, msg);
@@ -5334,9 +5334,13 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
 
     // if the message is complete (ie, this `msg` is the last piece of the message)
     /* If all chunks of a message have arrived then send a remote event to the callee */
-    if(tmp->num_chunks >= total_chunks)
+    //if(tmp->num_chunks >= total_chunks)  // this was the test before, it is a good test assumming the network is never frozen
+    if(tmp->remaining_packets == 0)
     {
         bf->c7 = 1;
+
+        assert(tmp->num_chunks <= total_chunks);
+
         s->data_size_sample += msg->total_size;
         s->ross_sample.data_size_sample += msg->total_size;
         s->data_size_ross_sample += msg->total_size;

From 3010118463ec330b1794a02b15620ee74ab690c9 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sun, 22 Oct 2023 13:30:57 -0400
Subject: [PATCH 057/188] Extending examples with uniform random traffic
 example

---
 doc/example/CMakeLists.txt                    |   9 +-
 ...ate.conf.in => tutorial-surrogate.conf.in} |   2 +-
 doc/example/tutorial-synthetic-ping-pong.c    |  34 +-
 doc/example/tutorial-synthetic-uniform.c      | 346 ++++++++++++++++++
 ...ial-ping-pong.conf.in => tutorial.conf.in} |   2 +-
 src/networks/model-net/dragonfly-dally.C      |   4 +-
 tests/example-ping-pong-no-logging.sh         |   2 +-
 tests/example-ping-pong-surrogate-1.sh        |   4 +-
 tests/example-ping-pong-surrogate-2.sh        |   4 +-
 tests/example-ping-pong-surrogate-3.sh        |   4 +-
 ...ample-ping-pong-surrogate-determinism-1.sh |   4 +-
 ...ample-ping-pong-surrogate-determinism-2.sh |   4 +-
 12 files changed, 396 insertions(+), 23 deletions(-)
 rename doc/example/{tutorial-ping-pong-surrogate.conf.in => tutorial-surrogate.conf.in} (78%)
 create mode 100644 doc/example/tutorial-synthetic-uniform.c
 rename doc/example/{tutorial-ping-pong.conf.in => tutorial.conf.in} (97%)

diff --git a/doc/example/CMakeLists.txt b/doc/example/CMakeLists.txt
index 49451d91..6db84fd5 100644
--- a/doc/example/CMakeLists.txt
+++ b/doc/example/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(example-files
     example
     tutorial-synthetic-ping-pong
+    tutorial-synthetic-uniform
     )
 
 foreach(namefile ${example-files})
@@ -9,8 +10,8 @@ foreach(namefile ${example-files})
 endforeach()
 
 # Saving default config files to run experiments with
-configure_file(tutorial-ping-pong.conf.in tutorial-ping-pong.template.conf.in @ONLY)
-configure_file(tutorial-ping-pong-surrogate.conf.in tutorial-ping-pong-surrogate.template.conf.in @ONLY)
+configure_file(tutorial.conf.in tutorial.template.conf.in @ONLY)
+configure_file(tutorial-surrogate.conf.in tutorial-surrogate.template.conf.in @ONLY)
 
 set(single_quote "'")
 set(double_quote "\"")
@@ -21,5 +22,5 @@ set(NETWORK_TREATMENT "freeze")
 set(PACKET_LATENCY_TRACE_PATH "packet-latency-trace/")
 set(IGNORE_UNTIL "200e4")
 string(REPLACE ${single_quote} ${double_quote} SWITCH_TIMESTAMPS "'1000e4', '8900e4'")
-configure_file(tutorial-ping-pong.conf.in tutorial-ping-pong.conf)
-configure_file(tutorial-ping-pong-surrogate.conf.in tutorial-ping-pong-surrogate.conf)
+configure_file(tutorial.conf.in tutorial-ping-pong.conf)
+configure_file(tutorial-surrogate.conf.in tutorial-ping-pong-surrogate.conf)
diff --git a/doc/example/tutorial-ping-pong-surrogate.conf.in b/doc/example/tutorial-surrogate.conf.in
similarity index 78%
rename from doc/example/tutorial-ping-pong-surrogate.conf.in
rename to doc/example/tutorial-surrogate.conf.in
index 0797324b..6d2b3e58 100644
--- a/doc/example/tutorial-ping-pong-surrogate.conf.in
+++ b/doc/example/tutorial-surrogate.conf.in
@@ -56,7 +56,7 @@ PARAMS
 # folder path to store packet latency from terminal to terminal, if no value is given it won't save anything
    save_packet_latency_path="${PACKET_LATENCY_TRACE_PATH}";
 # router buffer occupancy snapshots
-   router_buffer_snapshots=( "100e3", "200e3", "300e3", "400e3", "500e3", "600e3", "700e3", "800e3", "900e3", "1e6", "1.1e6", "1.2e6", "1.3e6", "1.4e6", "1.5e6", "1.6e6", "1.7e6", "1.8e6", "1.9e6", "2e6", "2.1e6", "2.2e6", "2.3e6", "2.4e6", "2.5e6", "2.6e6", "2.7e6", "2.8e6", "2.9e6", "3e6", "3.1e6", "3.2e6", "3.3e6", "3.4e6", "3.5e6", "3.6e6", "3.7e6", "3.8e6", "3.9e6", "4e6", "4.1e6", "4.2e6", "4.3e6", "4.4e6", "4.5e6", "4.6e6", "4.7e6", "4.8e6", "4.9e6", "5e6", "5.1e6", "5.2e6", "5.3e6", "5.4e6", "5.5e6", "5.6e6", "5.7e6", "5.8e6", "5.9e6", "6e6", "6.1e6", "6.2e6", "6.3e6", "6.4e6", "6.5e6", "6.6e6", "6.7e6", "6.8e6", "6.9e6", "7e6", "7.1e6", "7.2e6", "7.3e6", "7.4e6", "7.5e6", "7.6e6", "7.7e6", "7.8e6", "7.9e6", "8e6", "8.1e6", "8.2e6", "8.3e6", "8.4e6", "8.5e6", "8.6e6", "8.7e6", "8.8e6", "8.9e6", "9e6", "9.1e6", "9.2e6", "9.3e6", "9.4e6", "9.5e6", "9.6e6", "9.7e6", "9.8e6", "9.9e6", "9.990e6" );
+   router_buffer_snapshots=( ${BUFFER_SNAPSHOTS} );
 }
 SURROGATE {
 # determines the director switching from surrogate to high-def simulation strategy
diff --git a/doc/example/tutorial-synthetic-ping-pong.c b/doc/example/tutorial-synthetic-ping-pong.c
index 1aaf0528..664d6f03 100644
--- a/doc/example/tutorial-synthetic-ping-pong.c
+++ b/doc/example/tutorial-synthetic-ping-pong.c
@@ -11,6 +11,8 @@
 
 static int net_id = 0;
 static int PAYLOAD_SZ = 4096;
+static int RANDOM_PAYLOAD_SZ = 0; // If turned on, it assumes that PAYLOAD_SZ is a multiple of CHUNK_SIZE
+static int CHUNK_SIZE = 64; // This value depends on the network being used
 static unsigned long long num_nodes = 0;
 
 static char lp_io_dir[256] = {'\0'};
@@ -85,6 +87,7 @@ const tw_optdef app_opt [] =
     	TWOPT_UINT("num_messages", num_msgs, "Number of PING messages to be generated per terminal "),
     	TWOPT_UINT("num_initial_messages", num_initial_msgs, "Number of PING messages to be injected initially at the start (larger = more congestion)"),
     	TWOPT_UINT("payload_sz",PAYLOAD_SZ, "size of the message being sent "),
+    	TWOPT_UINT("random_payload_sz", RANDOM_PAYLOAD_SZ, "whether payloads are a random number between 1 and payload_sz (default 0)"),
         TWOPT_CHAR("lp-io-dir", lp_io_dir, "Where to place io output (unspecified -> no output"),
         TWOPT_UINT("lp-io-use-suffix", lp_io_use_suffix, "Whether to append uniq suffix to lp-io directory (default 0)"),
         TWOPT_END()
@@ -100,6 +103,21 @@ static void svr_add_lp_type()
   lp_type_register("nw-lp", svr_get_lp_type());
 }
 
+static long payload_size_forward(tw_lp * lp) {
+    long payload_size = PAYLOAD_SZ;
+    if (RANDOM_PAYLOAD_SZ) {
+        payload_size = tw_rand_integer(lp->rng, 0, PAYLOAD_SZ > CHUNK_SIZE ? PAYLOAD_SZ / CHUNK_SIZE : 1);
+        payload_size *= CHUNK_SIZE;
+    }
+    return payload_size;
+}
+
+static void payload_size_rev(tw_lp * lp) {
+    if (RANDOM_PAYLOAD_SZ) {
+        tw_rand_reverse_unif(lp->rng); //reverse the rng call for creating a payload size
+    }
+}
+
 static void svr_init(svr_state * s, tw_lp * lp)
 {
     //Initialize State
@@ -159,7 +177,8 @@ static void handle_kickoff_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp *
     codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
     global_dest = codes_mapping_get_lpid_from_relative(local_dest, group_name, lp_type_name, NULL, 0);
     s->ping_msg_sent_count++;
-    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&ping_msg, 0, NULL, lp);
+    long const payload_size = payload_size_forward(lp);
+    m->event_rc = model_net_event(net_id, "test", global_dest, payload_size, 0.0, sizeof(svr_msg), (const void*)&ping_msg, 0, NULL, lp);
 }
 
 static void handle_kickoff_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
@@ -167,6 +186,7 @@ static void handle_kickoff_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_l
     (void) b;
     model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
     s->ping_msg_sent_count--; //undo the increment of the ping_msg_sent_count in the server state
+    payload_size_rev(lp);
     tw_rand_reverse_unif(lp->rng); //reverse the rng call for creating a payload value;
     tw_rand_reverse_unif(lp->rng); //reverse the rng call for getting a local_dest
 }
@@ -187,12 +207,14 @@ static void handle_ping_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
     codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
     tw_lpid global_dest = codes_mapping_get_lpid_from_relative(original_sender, group_name, lp_type_name, NULL, 0);
     s->pong_msg_sent_count++;
-    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&pong_msg, 0, NULL, lp);
+    long const payload_size = payload_size_forward(lp);
+    m->event_rc = model_net_event(net_id, "test", global_dest, payload_size, 0.0, sizeof(svr_msg), (const void*)&pong_msg, 0, NULL, lp);
 }
 
 static void handle_ping_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
 {
     (void) b;
+    payload_size_rev(lp);
     model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
     s->pong_msg_sent_count--;
     s->payload_sum -= m->payload_value; //undo the increment of the payload sum
@@ -221,7 +243,8 @@ static void handle_pong_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
     codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
     tw_lpid global_dest = codes_mapping_get_lpid_from_relative(send_to, group_name, lp_type_name, NULL, 0);
     s->ping_msg_sent_count++;
-    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&ping_msg, 0, NULL, lp);
+    long const payload_size = payload_size_forward(lp);
+    m->event_rc = model_net_event(net_id, "test", global_dest, payload_size, 0.0, sizeof(svr_msg), (const void*)&ping_msg, 0, NULL, lp);
 }
 
 static void handle_pong_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
@@ -229,6 +252,7 @@ static void handle_pong_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp *
     if (! b->c1) { //if we didn't flip the c1 flag in the forward event
         model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
         s->ping_msg_sent_count--;
+        payload_size_rev(lp);
         tw_rand_reverse_unif(lp->rng); //undo the rng for the new payload value
         tw_rand_reverse_unif(lp->rng); //undo the rng for the new server to send a ping to
         b->c1 = 0;
@@ -240,6 +264,7 @@ static void handle_pong_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp *
 static void svr_finalize(svr_state * s, tw_lp * lp)
 {
     int total_msgs_sent = s->ping_msg_sent_count + s->pong_msg_sent_count;
+    // TODO (Elkin): this is wrong for random payload sizes
     int total_msg_size_sent = PAYLOAD_SZ * total_msgs_sent;
     tw_stime time_in_seconds_sent = ns_to_s(s->end_ts - s->start_ts);
 
@@ -341,6 +366,9 @@ int main(int argc, char **argv)
     num_nodes = codes_mapping_get_lp_count("MODELNET_GRP", 0, "nw-lp", NULL, 1);  //get the number of nodes so we can use this value during the simulation
     assert(num_nodes);
 
+    int rc = configuration_get_value_int(&config, "PARAMS", "chunk_size", NULL, &CHUNK_SIZE);
+    if(rc) { CHUNK_SIZE = 512; }
+
     if(lp_io_dir[0])
     {
         do_lp_io = 1;
diff --git a/doc/example/tutorial-synthetic-uniform.c b/doc/example/tutorial-synthetic-uniform.c
new file mode 100644
index 00000000..0dd6bcab
--- /dev/null
+++ b/doc/example/tutorial-synthetic-uniform.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (C) 2019 Neil McGlohon - 2023 Elkin Cruz
+ * Based on tutorial-synthetic-ping-pong.c by 2019 Neil McGlohon
+ * See LICENSE notice in top-level directory
+ */
+
+#include "codes/model-net.h"
+#include "codes/codes_mapping.h"
+#include "codes/surrogate/init.h"  // just needed for stats on surrogate-mode
+
+
+static int net_id = 0;
+static int PAYLOAD_SZ = 4096;
+static int RANDOM_PAYLOAD_SZ = 0; // If turned on, it assumes that PAYLOAD_SZ is a multiple of CHUNK_SIZE
+static int CHUNK_SIZE = 512; // This value depends on the network configuration
+static unsigned long long num_nodes = 0;
+
+static char lp_io_dir[256] = {'\0'};
+static lp_io_handle io_handle;
+static unsigned int lp_io_use_suffix = 0;
+
+static int num_msgs = 10000;
+static int terminal_queue_size = 3;
+
+/* global variables for codes mapping */
+static char group_name[MAX_NAME_LENGTH];
+static char lp_type_name[MAX_NAME_LENGTH];
+static int group_index, lp_type_index, rep_id, offset;
+
+/* type of events */
+enum SVR_EVENT
+{
+    SVR_EVENT_send = 1,
+    SVR_EVENT_msg
+};
+
+struct svr_msg
+{
+    enum SVR_EVENT svr_event_type; // kickoff, heartbeat, msg
+    int sender_id; //ID of the sender workload LP to know who to send a PONG message back to
+    int payload_value; //Some value that we will encode as an example
+    // Used for rollback
+    int payload_size; //Size of payload (the actual event is not of this size, this is just a number we decide on)
+    model_net_event_return event_rc; //helper to encode data relating to CODES rng usage
+    tw_stime previous_ts;
+};
+
+struct svr_state
+{
+    tw_lpid svr_id;       /* the ID of this server */
+    int msg_sent_count;   /* messages sent */
+    int msg_recvd_count;  /* messages received */
+    int total_bytes_sent; /* total bytes sent */
+    tw_stime start_ts;    /* time that this LP started sending requests */
+    tw_stime end_ts;      /* time that this LP ended sending requests */
+    int payload_sum;      /* the running sum of all payloads received */
+};
+
+/* declaration of functions */
+static void svr_init(struct svr_state * s, tw_lp * lp);
+static void svr_event(struct svr_state * s, tw_bf * b, struct svr_msg * m, tw_lp * lp);
+static void svr_rev_event(struct svr_state * s, tw_bf * b, struct svr_msg * m, tw_lp * lp);
+static void svr_finalize(struct svr_state * s, tw_lp * lp);
+static tw_stime ns_to_s(tw_stime ns);
+static tw_stime s_to_ns(tw_stime s);
+
+/* ROSS lptype function callback mapping */
+tw_lptype svr_lp = {
+    (init_f) svr_init,
+    (pre_run_f) NULL,
+    (event_f) svr_event,
+    (revent_f) svr_rev_event,
+    (commit_f) NULL,
+    (final_f)  svr_finalize,
+    (map_f) codes_mapping,
+    sizeof(struct svr_state),
+};
+
+const tw_optdef app_opt [] =
+{
+        TWOPT_GROUP("Model net synthetic traffic " ),
+        TWOPT_UINT("num_messages", num_msgs, "Number of messages to be sent from terminal"),
+        TWOPT_UINT("injection_queue_size", terminal_queue_size, "Number of packets in a terminal's queue at any point in time (default 2)"),
+        TWOPT_UINT("payload_sz", PAYLOAD_SZ, "size of the message being sent "),
+        TWOPT_UINT("random_payload_sz", RANDOM_PAYLOAD_SZ, "whether payloads are a random number between 'chunk_size' and payload_sz (default 0 -> deactivated)"),
+        TWOPT_CHAR("lp-io-dir", lp_io_dir, "Where to place io output (unspecified -> no output"),
+        TWOPT_UINT("lp-io-use-suffix", lp_io_use_suffix, "Whether to append uniq suffix to lp-io directory (default 0)"),
+        TWOPT_END()
+};
+
+const tw_lptype* svr_get_lp_type()
+{
+    return(&svr_lp);
+}
+
+static void svr_add_lp_type()
+{
+  lp_type_register("nw-lp", svr_get_lp_type());
+}
+
+static long payload_size_forward(tw_lp * lp) {
+    long payload_size = PAYLOAD_SZ;
+    if (RANDOM_PAYLOAD_SZ) {
+        payload_size = tw_rand_integer(lp->rng, 0, PAYLOAD_SZ > CHUNK_SIZE ? PAYLOAD_SZ / CHUNK_SIZE : 1);
+        payload_size *= CHUNK_SIZE;
+    }
+    return payload_size;
+}
+
+static void payload_size_rev(tw_lp * lp) {
+    if (RANDOM_PAYLOAD_SZ) {
+        tw_rand_reverse_unif(lp->rng); //reverse the rng call for creating a payload size
+    }
+}
+
+static void svr_init(struct svr_state * s, tw_lp * lp)
+{
+    //Initialize State
+    s->msg_sent_count = 0;
+    s->msg_recvd_count = 0;
+    s->total_bytes_sent = 0;
+    s->start_ts = 0.0;
+    s->end_ts = 0.0;
+    s->svr_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); /* turns the LP Global ID into the server ID */
+    s->payload_sum = 0;
+
+    // This bit is just for testing. Only the first terminal (0) sends events
+    //if (lp->gid != 0) {
+    //    return;
+    //}
+
+    //Now we create and send a self "kickoff" message - this is a PDES coordination event and thus doesn't need to be injected into the connected network
+    //so we won't use model_net_event(), that's reserved for stuff we want to send across the network
+
+    /* Set a time from now when this message is to be received by the recipient (self in this cae.) add some tiny random noise to help avoid event ties (different events with same timestamp) */
+    //the lookahead value is a value required for conservative mode execution to work, it prevents scheduling a new event within the lookahead window
+    tw_stime send_time = g_tw_lookahead + (tw_rand_unif(lp->rng) * .0001);
+
+    for (int i = 1; i <= terminal_queue_size && i <= num_msgs; i++) {
+        tw_event *e;
+        struct svr_msg *m;
+        e = tw_event_new(lp->gid, send_time * i, lp); //ROSS method to create a new event
+        m = tw_event_data(e); //Gives you a pointer to the data encoded within event e
+        m->sender_id = s->svr_id; //Set the event type so we can know how to classify the event when received
+        m->svr_event_type = SVR_EVENT_send; //Set the event type so we can know how to classify the event when received
+        tw_event_send(e); //ROSS method to send off the event e with the encoded data in m
+    }
+
+    s->start_ts = send_time; // the time when we're starting this LP's work is when the first ping is generated
+}
+
+static void handle_send_event(struct svr_state * s, tw_bf * b, struct svr_msg * m, tw_lp * lp)
+{
+    (void) b;
+
+    if(s->msg_sent_count >= num_msgs) {//if we've sent enough messages, then we stop and don't send any more
+        b->c1 = 1; //flag that we didn't really do anything in this event so that if this event gets reversed, we don't over-aggressively revert state or RNGs
+        return;
+    }
+    assert((tw_lpid) m->sender_id == s->svr_id);
+
+    tw_lpid local_dest = -1; //ID of a sever, relative to only servers
+    tw_lpid global_dest = -1; //ID of a server LP relative to ALL LPs
+
+    //We want to make sure we're not accidentally picking ourselves
+    local_dest = tw_rand_integer(lp->rng, 1, num_nodes - 2);
+    local_dest = (s->svr_id + local_dest) % num_nodes;
+    //local_dest is now a number [0,num_nodes) but is assuredly not s->svr_id
+    assert(local_dest >= 0);
+    assert(local_dest < num_nodes);
+    assert(local_dest != s->svr_id);
+
+    // Message to send to random terminal
+    struct svr_msg msg_to_send;
+    msg_to_send.sender_id = s->svr_id; //encode our server ID into the new ping message
+    msg_to_send.svr_event_type = SVR_EVENT_msg; //set it to type MSG
+    msg_to_send.payload_value = tw_rand_integer(lp->rng, 1, 10); //encode a random payload value to it from [1,10]
+    long const payload_size = payload_size_forward(lp);
+    m->payload_size = payload_size;
+    s->total_bytes_sent += payload_size;
+
+    // Message to send to self, in order to inject more another packet
+    struct svr_msg msg_to_self;
+    msg_to_self.sender_id = s->svr_id;
+    msg_to_self.svr_event_type = SVR_EVENT_send; // when the packet finally leaves the terminal, this event will be sent back to us
+
+    codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
+    global_dest = codes_mapping_get_lpid_from_relative(local_dest, group_name, lp_type_name, NULL, 0);
+    s->msg_sent_count++;
+    m->event_rc = model_net_event(
+            net_id, "test", global_dest, payload_size, 0.0,
+            sizeof(struct svr_msg), (const void*)&msg_to_send,
+            sizeof(struct svr_msg), (const void*)&msg_to_self, lp);
+}
+
+static void handle_send_rev_event(struct svr_state * s, tw_bf * b, struct svr_msg * m, tw_lp * lp)
+{
+    (void) b;
+    if (! b->c1) { //if we didn't flip the c1 flag in the forward event
+        model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
+        s->msg_sent_count--; //undo the increment of the ping_msg_sent_count in the server state
+        s->total_bytes_sent -= m->payload_size;
+        payload_size_rev(lp);
+        tw_rand_reverse_unif(lp->rng); //reverse the rng call for creating a payload value;
+        tw_rand_reverse_unif(lp->rng); //reverse the rng call for getting a local_dest
+        b->c1 = 0;
+    }
+}
+
+static void handle_recv_event(struct svr_state * s, tw_bf * b, struct svr_msg * m, tw_lp * lp)
+{
+    (void) b;
+    (void) lp;
+    s->msg_recvd_count++; //increment the counter for ping messages received
+    s->payload_sum += m->payload_value; //increment our running sum of payload values received
+}
+
+static void handle_recv_rev_event(struct svr_state * s, tw_bf * b, struct svr_msg * m, tw_lp * lp)
+{
+    (void) b;
+    (void) lp;
+    s->payload_sum -= m->payload_value; //undo the increment of the payload sum
+    s->msg_recvd_count--; //undo the increment of the counter for ping messages received
+}
+
+static void svr_finalize(struct svr_state * s, tw_lp * lp)
+{
+    tw_stime time_in_seconds_sent = ns_to_s(s->end_ts - s->start_ts);
+
+    printf("Server LPID:%lu svr_id:%lu sent %d bytes in %f seconds, MSGs Sent: %d; MSGs Received: %d Payload Sum: %d\n",
+            (unsigned long)lp->gid, (unsigned long)s->svr_id, s->total_bytes_sent,
+            time_in_seconds_sent, s->msg_sent_count, s->msg_recvd_count, s->payload_sum);
+}
+
+static void svr_event(struct svr_state * s, tw_bf * b, struct svr_msg * m, tw_lp * lp)
+{
+    m->previous_ts = s->end_ts;
+    s->end_ts = tw_now(lp);
+
+    switch (m->svr_event_type)
+    {
+        case SVR_EVENT_send:
+            handle_send_event(s, b, m, lp);
+            break;
+        case SVR_EVENT_msg:
+            handle_recv_event(s, b, m, lp);
+            break;
+        default:
+            tw_error(TW_LOC, "\n Invalid message type %d ", m->svr_event_type);
+            break;
+    }
+}
+
+static void svr_rev_event(struct svr_state * s, tw_bf * b, struct svr_msg * m, tw_lp * lp)
+{
+    switch (m->svr_event_type)
+    {
+        case SVR_EVENT_send:
+            handle_send_rev_event(s, b, m, lp);
+            break;
+        case SVR_EVENT_msg:
+            handle_recv_rev_event(s, b, m, lp);
+            break;
+        default:
+            tw_error(TW_LOC, "\n Invalid message type %d ", m->svr_event_type);
+            break;
+    }
+
+    s->end_ts = m->previous_ts;
+}
+
+/* convert ns to seconds */
+static tw_stime ns_to_s(tw_stime ns)
+{
+    return(ns / (1000.0 * 1000.0 * 1000.0));
+}
+static tw_stime s_to_ns(tw_stime s)
+{
+    return(s*1000.0*1000.0*1000.0);
+}
+
+int main(int argc, char **argv)
+{
+    int nprocs;
+    int rank;
+    int num_nets;
+    int *net_ids;
+
+    /* 1 day of simulation time is drastically huge but it will ensure
+       that the simulation doesn't try to end before all packets are delivered */
+    g_tw_ts_end = s_to_ns(24 * 60 * 60);
+
+    tw_opt_add(app_opt);
+    tw_init(&argc, &argv);
+
+    codes_comm_update();
+
+    if(argc < 2)
+    {
+            printf("\n Usage: mpirun <args> --sync=1/2/3 -- <config_file.conf> ");
+            MPI_Finalize();
+            return 0;
+    }
+
+    MPI_Comm_rank(MPI_COMM_CODES, &rank);
+    MPI_Comm_size(MPI_COMM_CODES, &nprocs);
+
+    configuration_load(argv[2], MPI_COMM_CODES, &config);
+
+    model_net_register();
+    svr_add_lp_type();
+
+    codes_mapping_setup();
+
+    net_ids = model_net_configure(&num_nets);
+    net_id = *net_ids;
+    free(net_ids);
+
+    num_nodes = codes_mapping_get_lp_count("MODELNET_GRP", 0, "nw-lp", NULL, 1);  //get the number of nodes so we can use this value during the simulation
+    assert(num_nodes);
+
+    int rc = configuration_get_value_int(&config, "PARAMS", "chunk_size", NULL, &CHUNK_SIZE);
+    if(rc) { CHUNK_SIZE = 512; }
+
+    bool do_lp_io = 0;
+    if(lp_io_dir[0])
+    {
+        do_lp_io = 1;
+        int flags = lp_io_use_suffix ? LP_IO_UNIQ_SUFFIX : 0;
+        int ret = lp_io_prepare(lp_io_dir, flags, &io_handle, MPI_COMM_CODES);
+        assert(ret == 0 || !"lp_io_prepare failure");
+    }
+    tw_run();
+    if (do_lp_io){
+        int ret = lp_io_flush(io_handle, MPI_COMM_CODES);
+        assert(ret == 0 || !"lp_io_flush failure");
+    }
+    model_net_report_stats(net_id);
+
+    // Printing some stats
+    print_surrogate_stats();
+
+    tw_end();
+    return 0;
+}
+
diff --git a/doc/example/tutorial-ping-pong.conf.in b/doc/example/tutorial.conf.in
similarity index 97%
rename from doc/example/tutorial-ping-pong.conf.in
rename to doc/example/tutorial.conf.in
index 6de3c4ff..e8e2ce4e 100644
--- a/doc/example/tutorial-ping-pong.conf.in
+++ b/doc/example/tutorial.conf.in
@@ -51,7 +51,7 @@ PARAMS
 # routing protocol to be used
    routing="prog-adaptive";
 # router buffer occupancy snapshots
-   router_buffer_snapshots=( "50e4", "60e4" );
+   router_buffer_snapshots=( ${BUFFER_SNAPSHOTS} );
 # folder path to store packet latency from terminal to terminal, if no value is given it won't save anything
    save_packet_latency_path="${PACKET_LATENCY_TRACE_PATH}";
 }
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 72430f95..0922337e 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -3938,9 +3938,7 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
 
     if(msg->local_event_size_bytes > 0)
     {
-        // TODO (Elkin): This delay is wrong. It might take quite a bit longer in some cases as all the chunks are processed until we get to this. Create a better estimate based on the number of total chunks!
-        tw_stime local_ts = 0;
-        tw_event *e_new = tw_event_new(msg->sender_lp, local_ts, lp);
+        tw_event *e_new = tw_event_new(msg->sender_lp, nic_ts, lp);
         void *m_new = tw_event_data(e_new);
         void *local_event = (char*) model_net_method_get_edata(DRAGONFLY_DALLY, msg) + msg->remote_event_size_bytes;
         memcpy(m_new, local_event, msg->local_event_size_bytes);
diff --git a/tests/example-ping-pong-no-logging.sh b/tests/example-ping-pong-no-logging.sh
index 0fb0be8d..3fe24d69 100755
--- a/tests/example-ping-pong-no-logging.sh
+++ b/tests/example-ping-pong-no-logging.sh
@@ -10,7 +10,7 @@ fi
 export PACKET_SIZE=4096
 export CHUNK_SIZE=4096
 export PACKET_LATENCY_TRACE_PATH=
-cat "$bindir/doc/example"/tutorial-ping-pong.template.conf.in | envsubst > tutorial-ping-pong.conf
+cat "$bindir/doc/example"/tutorial.template.conf.in | envsubst > tutorial-ping-pong.conf
 
 # Running simulation twice with the same parameters
 
diff --git a/tests/example-ping-pong-surrogate-1.sh b/tests/example-ping-pong-surrogate-1.sh
index 7f3a5f6d..67f8830e 100755
--- a/tests/example-ping-pong-surrogate-1.sh
+++ b/tests/example-ping-pong-surrogate-1.sh
@@ -15,10 +15,10 @@ export PREDICTOR_TYPE=average
 export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate/
 export IGNORE_UNTIL=0.0
 export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
-cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf
+cat "$bindir/doc/example"/tutorial-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf
 
 export PACKET_LATENCY_TRACE_PATH=packet-latency-highdef/
-cat "$bindir/doc/example"/tutorial-ping-pong.template.conf.in | envsubst > tutorial-ping-pong.conf
+cat "$bindir/doc/example"/tutorial.template.conf.in | envsubst > tutorial-ping-pong.conf
 
 # Running simulation twice with the same parameters
 
diff --git a/tests/example-ping-pong-surrogate-2.sh b/tests/example-ping-pong-surrogate-2.sh
index f987bedf..c3c5fe5f 100755
--- a/tests/example-ping-pong-surrogate-2.sh
+++ b/tests/example-ping-pong-surrogate-2.sh
@@ -15,10 +15,10 @@ export PREDICTOR_TYPE=average
 export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate/
 export IGNORE_UNTIL=0.0
 export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
-cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf
+cat "$bindir/doc/example"/tutorial-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf
 
 export PACKET_LATENCY_TRACE_PATH=packet-latency-highdef/
-cat "$bindir/doc/example"/tutorial-ping-pong.template.conf.in | envsubst > tutorial-ping-pong.conf
+cat "$bindir/doc/example"/tutorial.template.conf.in | envsubst > tutorial-ping-pong.conf
 
 # Running simulation twice with the same parameters
 
diff --git a/tests/example-ping-pong-surrogate-3.sh b/tests/example-ping-pong-surrogate-3.sh
index 19212e9e..12e2bd64 100755
--- a/tests/example-ping-pong-surrogate-3.sh
+++ b/tests/example-ping-pong-surrogate-3.sh
@@ -15,11 +15,11 @@ export PREDICTOR_TYPE=average
 export PACKET_LATENCY_TRACE_PATH=packet-latency-freeze/
 export IGNORE_UNTIL=0.0
 export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
-cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf
+cat "$bindir/doc/example"/tutorial-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf
 
 export NETWORK_TREATMENT=nothing
 export PACKET_LATENCY_TRACE_PATH=packet-latency-non-freeze/
-cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-non-freeze.conf
+cat "$bindir/doc/example"/tutorial-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-non-freeze.conf
 
 # Running simulation twice with the same parameters
 
diff --git a/tests/example-ping-pong-surrogate-determinism-1.sh b/tests/example-ping-pong-surrogate-determinism-1.sh
index cd219272..8f926795 100755
--- a/tests/example-ping-pong-surrogate-determinism-1.sh
+++ b/tests/example-ping-pong-surrogate-determinism-1.sh
@@ -13,10 +13,10 @@ export PREDICTOR_TYPE=average
 export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-1/
 export IGNORE_UNTIL=0.0
 export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
-cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-1.conf
+cat "$bindir/doc/example"/tutorial-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-1.conf
 
 export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-2/
-cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-2.conf
+cat "$bindir/doc/example"/tutorial-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-2.conf
 
 # Running simulation twice with the same parameters
 
diff --git a/tests/example-ping-pong-surrogate-determinism-2.sh b/tests/example-ping-pong-surrogate-determinism-2.sh
index b86f0dfd..3fb00206 100755
--- a/tests/example-ping-pong-surrogate-determinism-2.sh
+++ b/tests/example-ping-pong-surrogate-determinism-2.sh
@@ -13,10 +13,10 @@ export PREDICTOR_TYPE=average
 export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-1/
 export IGNORE_UNTIL=0.0
 export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
-cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-1.conf
+cat "$bindir/doc/example"/tutorial-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-1.conf
 
 export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-2/
-cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-2.conf
+cat "$bindir/doc/example"/tutorial-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-2.conf
 
 # Running simulation twice with the same parameters
 

From 4fd9ea44c95fc1adef38f087a94bf0b70b41955a Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 27 Oct 2023 17:47:59 -0400
Subject: [PATCH 058/188] Fixed NaN bug when packet predictor hasn't been fed a
 packet with another one in queue

---
 .../model-net-synthetic-dragonfly-all.c         | 17 +++++++++--------
 src/networks/model-net/dragonfly-dally.C        |  1 +
 src/surrogate/init.c                            |  2 +-
 .../packet-latency-predictor/average.c          |  3 ++-
 src/surrogate/switch.c                          |  6 ++----
 5 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/src/network-workloads/model-net-synthetic-dragonfly-all.c b/src/network-workloads/model-net-synthetic-dragonfly-all.c
index b763accc..bcebafa4 100644
--- a/src/network-workloads/model-net-synthetic-dragonfly-all.c
+++ b/src/network-workloads/model-net-synthetic-dragonfly-all.c
@@ -381,15 +381,16 @@ static void handle_kickoff_event(
     char anno[MAX_NAME_LENGTH];
     tw_lpid local_dest = -1, global_dest = -1;
 
-    svr_msg * m_local = malloc(sizeof(svr_msg));
-    svr_msg * m_remote = malloc(sizeof(svr_msg));
+    svr_msg m_local;
+    svr_msg m_remote;
 
-    m_local->svr_event_type = LOCAL;
-    m_local->src = lp->gid;
-    m_local->msg_start_time = tw_now(lp);
+    m_local.svr_event_type = LOCAL;
+    m_local.src = lp->gid;
+    m_local.msg_start_time = tw_now(lp);
 
-    memcpy(m_remote, m_local, sizeof(svr_msg));
-    m_remote->svr_event_type = REMOTE;
+    m_remote.svr_event_type = REMOTE;
+    m_remote.src = lp->gid;
+    m_remote.msg_start_time = tw_now(lp);
 
     codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, anno, &rep_id, &offset);
     int local_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0);
@@ -459,7 +460,7 @@ static void handle_kickoff_event(
 
    ns->msg_sent_count++;
    ns->last_send_ts = tw_now(lp);
-   m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)m_remote, sizeof(svr_msg), (const void*)m_local, lp);
+   m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&m_remote, sizeof(svr_msg), (const void*)&m_local, lp);
    issue_event(ns, lp);
    return;
 }
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 0922337e..3ce88aac 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -3888,6 +3888,7 @@ static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_d
     // results when running in surrogate. A good model should produce similar `nic`s to what the code above
     // does (the average predictor does just that!)
     double const nic_ts = end.next_packet_delay;
+    assert(nic_ts > 0);
 
     // Scheduling idle event for next packet to be processed
     bool const is_from_remote = false;
diff --git a/src/surrogate/init.c b/src/surrogate/init.c
index 50b90801..79bb7e71 100644
--- a/src/surrogate/init.c
+++ b/src/surrogate/init.c
@@ -10,7 +10,7 @@ bool freeze_network_on_switch = true;
 struct surrogate_config surr_config = {0};
 bool is_surrogate_configured = false;
 struct switch_at_struct switch_at;
-struct packet_latency_predictor current_predictor = {0};
+static struct packet_latency_predictor current_predictor = {0};
 
 
 // === Stats!
diff --git a/src/surrogate/packet-latency-predictor/average.c b/src/surrogate/packet-latency-predictor/average.c
index 23e93f32..60dc3441 100644
--- a/src/surrogate/packet-latency-predictor/average.c
+++ b/src/surrogate/packet-latency-predictor/average.c
@@ -87,7 +87,8 @@ static struct packet_end predict_latency(struct latency_surrogate * data, tw_lp
     }
     assert(latency >= 0);
 
-    double const next_packet_delay =
+    // TODO (Elkin): 10 is an arbitrary small value, but it should be nic_ts as implemented in `packet_getenerate` in dragonfly-dally
+    double const next_packet_delay = data->aggregated_next_packet_delay.total_msgs == 0 ? 10 :
         data->aggregated_next_packet_delay.sum_latency / data->aggregated_next_packet_delay.total_msgs;
     return (struct packet_end) {
         .travel_end_time = packet_dest->travel_start_time + latency,
diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c
index 49a37f2c..4b29ab18 100644
--- a/src/surrogate/switch.c
+++ b/src/surrogate/switch.c
@@ -106,12 +106,10 @@ static void rollback_and_cancel_events_pe(tw_pe * pe, tw_stime gvt) {
         tw_gvt_step2(pe);
 
         if (DEBUG_DIRECTOR > 1) {
-            printf("PE %lu: Time stamp at the end of GVT time: %e - AVL-tree sized: %d\n", g_tw_mynode, gvt, pe->avl_tree_size);
+            printf("PE %lu: Time stamp at the end of GVT time: %f - AVL-tree sized: %d\n", g_tw_mynode, gvt, pe->avl_tree_size);
         }
     } while (does_any_pe(pe->cancel_q != NULL) || does_any_pe(pe->event_q.size != 0));
 
-    tw_pe_fossil_collect();
-
     if (DEBUG_DIRECTOR > 1) {
         printf("PE %lu: All events rolledbacked and cancelled\n", g_tw_mynode);
     }
@@ -454,7 +452,7 @@ void director_switch(tw_pe * pe, tw_stime gvt) {
         if (DEBUG_DIRECTOR == 2) {
             printf("\n");
         }
-        printf("Switching at %g", gvt);
+        printf("Switching at %f", gvt);
     }
     // Rollback if in optimistic mode
 #ifdef USE_RAND_TIEBREAKER

From 1707f1feb06fc13d2249cc681c560a9de415f17a Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 1 Nov 2023 20:47:20 -0400
Subject: [PATCH 059/188] New strategy to feed predictor implemented

Previously, the predictor would only be given packet information (packet
id, latency, etc) in an increasing order on packet id. Now it's
different. The predictor is fed the packet information whenever is
available.

Sometimes, the first packet injected in the network takes far too long
to be delievered. In those cases, the predictor never receives any info
for long periods of time.
---
 .../packet-latency-predictor/common.h         |   2 -
 src/networks/model-net/dragonfly-dally.C      | 507 ++++++++++--------
 2 files changed, 282 insertions(+), 227 deletions(-)

diff --git a/codes/surrogate/packet-latency-predictor/common.h b/codes/surrogate/packet-latency-predictor/common.h
index 4812a12a..aae0f0d7 100644
--- a/codes/surrogate/packet-latency-predictor/common.h
+++ b/codes/surrogate/packet-latency-predictor/common.h
@@ -28,8 +28,6 @@ struct packet_start {
     double processing_packet_delay;  // delay for this packet to be processed from previous packet in the queue
     uint32_t packet_size;
     bool is_there_another_pckt_in_queue; // is there another packet in queue
-    void * message_data;  // Yep, we have to save the entire message just because we might need to resend the message when switching to surrogate-mode. It's wasteful but there is no other way
-    void * remote_event_data;  // This and the one above have to be freed. This contains the extra information that the message contains
 };
 
 struct packet_end {
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 3ce88aac..a2676fcd 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -30,8 +30,6 @@
 #include <map>
 #include <set>
 #include <algorithm>
-#include <queue>
-#include <deque>
 #include <errno.h>
 #include <sys/stat.h>
 
@@ -484,19 +482,15 @@ static bool isRoutingNonminimalExplicit(int alg)
  * Surrogate definitions and data
  */
 
-struct packet_double_val {
-    uint64_t packet_ID;
-    double value; // This can either be packet delivery latency or delay in queue to be processed
+struct packet_sent {
+    struct packet_start start;
+    double next_packet_delay; // When the packet is initially sent, this value is -1, when the next packet is sent this value is updated to the actual delay to process the next packet
+    void * message_data;  // Yep, we have to save the entire message just because we might need to resend the message when switching to surrogate-mode. It's wasteful but there is no other way
+    void * remote_event_data;  // This and the one above have to be freed. This contains the extra information that the message contains
 };
-// Comparison function object to use in min-heap of sent_packets_latency
-static struct {
-    bool operator() (struct packet_double_val const l, struct packet_double_val const r) const {
-        return l.packet_ID > r.packet_ID;
-    }
-} packet_double_val_greater_cmp;
 
 struct packet_id {
-    uint64_t packet_ID;
+    int64_t packet_ID;
     unsigned int dfdally_src_terminal_id;
 };
 bool operator<(struct packet_id const &lk, struct packet_id const &rk) {
@@ -508,7 +502,7 @@ static void notify_dest_lp_of(terminal_state * s, tw_lp * lp, terminal_dally_mes
 /* dragonfly compute node data structure */
 struct terminal_state
 {
-    uint64_t packet_counter;
+    int64_t packet_counter;
 
     int packet_gen;
     int packet_fin;
@@ -601,11 +595,13 @@ struct terminal_state
 
     // Variables to recover latency of packets sent to other terminals
     // Sent packets (to be populated at by commit handler of packet sender)
-    deque<struct packet_start> sent_packets;
-    // min-heap for latencies of packets once they arrive (some packets might
-    // arrive faster than others, so a list like the one above is not feasible
-    // to store in order efficiently their arrival)
-    priority_queue<struct packet_double_val, vector<struct packet_double_val>, decltype(packet_double_val_greater_cmp)> sent_packets_latency;
+    map<int64_t, struct packet_sent> sent_packets;
+    int64_t last_packet_sent_id;
+    // We need the next packet to be injected in the network before feeding the packet info forward (the predictor needs starting time, delay to send next packet and latency)
+    struct {
+        int64_t packet_ID;
+        double travel_end_time;
+    } arrival_of_last_packet;
     // received (and not completed, yet) packets. The value associated to a key is the remaining number of "bytes" to receive before the packet is consumed totally. If a packet size == chunk size, this map will never be used/filled
     map<struct packet_id, uint32_t> remaining_sz_packets;
 
@@ -2861,20 +2857,20 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess
 
 static inline void packet_latency_save_to_file(
         unsigned int terminal_id,
-        struct packet_start start,
-        struct packet_end end,
+        struct packet_start * start,
+        struct packet_end * end,
         bool surrogate_on,
         bool is_predicted
 ) {
     if (!packet_latency_f) { return; } // Don't save if there isn't a file to save to
-    if (end.travel_end_time > g_tw_ts_end) { return; } // This packet could never arrive to its destination!
+    if (end->travel_end_time > g_tw_ts_end) { return; } // This packet could never arrive to its destination!
     fprintf(packet_latency_f, "%u,%u,%lu,%d,%d,%u,%f,%f,%f,%f,%f,%d\n",
-            terminal_id, start.dfdally_dest_terminal_id, start.packet_ID,
-            surrogate_on, is_predicted,
-            start.packet_size,
-            start.workload_injection_time, end.next_packet_delay,
-            start.travel_start_time, end.travel_end_time, end.travel_end_time - start.travel_start_time,
-            start.is_there_another_pckt_in_queue);
+            terminal_id, start->dfdally_dest_terminal_id, start->packet_ID,
+            surrogate_on, is_predicted, start->packet_size,
+            start->workload_injection_time,
+            end->next_packet_delay, start->travel_start_time,
+            end->travel_end_time, end->travel_end_time - start->travel_start_time,
+            start->is_there_another_pckt_in_queue);
 }
 
 // ==== START OF Surrogate functions definition ====
@@ -2887,46 +2883,36 @@ static bool is_surrogate_on_fun(void) {
     return is_surrogate_on;
 }
 
-// Goes through all received packet latencies and process them in order in which they were sent through the network
-static void process_packet_latencies(terminal_state * s, tw_lp * lp)
-{
-    while( s->sent_packets.size() >= 2  // We need at least two packets to determine the delay of the next packet to be processed
-        && !s->sent_packets_latency.empty()
-        && s->sent_packets.front().packet_ID == s->sent_packets_latency.top().packet_ID
-        )
-    {
-        auto start = s->sent_packets.front();
-        double const next_packet_delay = s->sent_packets[1].processing_packet_delay;
-        struct packet_end end = {
-            .travel_end_time = s->sent_packets_latency.top().value,
-            .next_packet_delay = next_packet_delay,
-        };
-        packet_latency_save_to_file(s->terminal_id, start, end, is_surrogate_on, false);
-        if (surrogate_configured && !is_surrogate_on) {
-            assert(terminal_predictor != NULL);
-            terminal_predictor->feed(s->predictor_data, lp, s->terminal_id, &start, &end);
-        }
+static void feed_packet_to_predictor(terminal_state * s, tw_lp * lp, int64_t packet_ID, double end_time) {
+    assert(s->sent_packets.count(packet_ID) == 1); // packet_ID is in s->sent_packets
+    auto sent = s->sent_packets[packet_ID];
+    struct packet_end end = {
+        .travel_end_time = end_time,
+        .next_packet_delay = sent.next_packet_delay,
+    };
 
-        // Deallocating memory
-        if (start.message_data) {
-            free(start.message_data);
-        }
-        if (start.remote_event_data) {
-            free(start.remote_event_data);
-        }
+    packet_latency_save_to_file(s->terminal_id, &sent.start, &end, is_surrogate_on, false);
+    if (surrogate_configured && !is_surrogate_on) {
+        assert(terminal_predictor != NULL);
+        terminal_predictor->feed(s->predictor_data, lp, s->terminal_id, &sent.start, &end);
+    }
 
-        s->sent_packets.pop_front();
-        s->sent_packets_latency.pop();
+    // Deallocating memory
+    if (sent.message_data) {
+        free(sent.message_data);
+    }
+    if (sent.remote_event_data) {
+        free(sent.remote_event_data);
     }
 }
 
 // Constructs a hashmap with all the T_NOTIFY events to be processed.
 // The key of the list is the GID for the source terminal. The value of the
 // hash is the end time
-static map<uint64_t, double> construct_map_of_NOTIFY_LATENCY_events(
+static map<int64_t, double> construct_map_of_NOTIFY_LATENCY_events(
         tw_lp * lp, tw_event ** const terminal_events) {
     // hash map to store T_NOTIFY events found (`packet_ID` and `travel_end_time`)
-    map<uint64_t, double> notification_events_map;
+    map<int64_t, double> notification_events_map;
 
     for (size_t i = 0; terminal_events && terminal_events[i] != NULL; i++) {
         assert(terminal_events[i]->dest_lpid == lp->gid);
@@ -2949,67 +2935,56 @@ static map<uint64_t, double> construct_map_of_NOTIFY_LATENCY_events(
 // This function never rollsback because it's called at GVT
 static void dragonfly_dally_terminal_highdef_to_surrogate(
         terminal_state * s, tw_lp * lp, tw_event ** terminal_events) {
-    process_packet_latencies(s, lp);
 
     auto notification_events_map = construct_map_of_NOTIFY_LATENCY_events(lp, terminal_events);
 
+    if (s->arrival_of_last_packet.packet_ID != -1) {
+        assert(s->sent_packets.count(s->arrival_of_last_packet.packet_ID) == 1); // packet_ID is in s->sent_packets
+        assert(s->sent_packets[s->arrival_of_last_packet.packet_ID].next_packet_delay < 0); // next_packet_delay is -1
+
+        double const travel_end_time = s->arrival_of_last_packet.travel_end_time;
+        feed_packet_to_predictor(s, lp, s->arrival_of_last_packet.packet_ID, travel_end_time);
+        s->sent_packets.erase(s->arrival_of_last_packet.packet_ID);
+        s->arrival_of_last_packet.packet_ID = -1;
+    }
+
     // Going through every packet that was sent but not yet received, remove it
     // from the list, send it to its destination using the predictor, and
     // notify of its zombie status.
-    while(!s->sent_packets.empty()) {
-        struct packet_start start = s->sent_packets.front();
-        s->sent_packets.pop_front();
-        assert(start.message_data);
-
-        // The predictor is asked to predict the latency of the packet regardless if it is a zombie or not.
-        // (This makes it so that we feed the predictor only during high-def mode, and never a switching time)
-        struct packet_end predicted_end = 
-            terminal_predictor->predict(s->predictor_data, lp, s->terminal_id, &start);
-
-        bool const in_sent_packets_latency =
-            !s->sent_packets_latency.empty() && start.packet_ID == s->sent_packets_latency.top().packet_ID;
+    // (deleting all elements from s->sent_packets as we go)
+    for (auto it = s->sent_packets.begin(); it != s->sent_packets.end(); it = s->sent_packets.erase(it)) {
+        int64_t packet_ID = it->first;
+        auto & sent = it->second;
+
+        assert(packet_ID == sent.start.packet_ID);
+
         // Finding out whether the packet-latency is on the list of messages to be processed
-        bool const in_events_to_process = !in_sent_packets_latency &&
-            notification_events_map.count(start.packet_ID) == 1;
-
-        // The packet was delievered and its latency is known (we were notified)
-        if (in_sent_packets_latency || in_events_to_process) {
-            struct packet_end end;
-            // Delete packet from stack
-            if (in_sent_packets_latency) {
-                auto const latency_q = s->sent_packets_latency.top();
-                end.travel_end_time = latency_q.value;
-                s->sent_packets_latency.pop();
-            } else {
-                end.travel_end_time = notification_events_map[start.packet_ID];
-            }
-            if (s->sent_packets.size() >= 2) {
-                end.next_packet_delay = s->sent_packets[1].processing_packet_delay;
-            } else {
-                end.next_packet_delay = -1;
-            }
-            packet_latency_save_to_file(s->terminal_id, start, end, is_surrogate_on, false);
-        }
-        // The packet has not been delievered, or we haven't received the notification yet.
-        // Send directly to destination and notify of zombie event
-        else if (freeze_network_on_switch) {
+        bool const in_events_to_process = notification_events_map.count(packet_ID) == 1;
+        if (in_events_to_process) {
+            feed_packet_to_predictor(s, lp, packet_ID, notification_events_map[sent.start.packet_ID]);
+
+        // The packet has not been delievered. Send directly to destination and notify of zombie event
+        } else if (freeze_network_on_switch) {
+            struct packet_end predicted_end = 
+                terminal_predictor->predict(s->predictor_data, lp, s->terminal_id, &sent.start);
+
             double latency = predicted_end.travel_end_time - tw_now(lp);
             if (predicted_end.travel_end_time < tw_now(lp) || latency < 0) {
                 predicted_end.travel_end_time = tw_now(lp);
                 latency = 0;
             }
 
-            packet_latency_save_to_file(s->terminal_id, start, predicted_end, is_surrogate_on, true);
+            packet_latency_save_to_file(s->terminal_id, &sent.start, &predicted_end, is_surrogate_on, true);
 
-            assert(start.message_data);
-            terminal_dally_message * const msg_data = (terminal_dally_message*) start.message_data;
+            assert(sent.message_data);
+            terminal_dally_message * const msg_data = (terminal_dally_message*) sent.message_data;
             terminal_dally_message * m;
             void * remote_event;
             tw_event * const e = model_net_method_event_new(
-                    start.dest_terminal_lpid, latency, lp, DRAGONFLY_DALLY, (void**)&m, &remote_event);
+                    sent.start.dest_terminal_lpid, latency, lp, DRAGONFLY_DALLY, (void**)&m, &remote_event);
             memcpy(m, msg_data, sizeof(terminal_dally_message));
             if (m->remote_event_size_bytes) {
-                memcpy(remote_event, start.remote_event_data, m->remote_event_size_bytes);
+                memcpy(remote_event, sent.remote_event_data, m->remote_event_size_bytes);
             }
             m->magic = terminal_magic_num;
             m->type = T_ARRIVE_PREDICTED;
@@ -3024,26 +2999,26 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(
             m->intm_grp_id = -1;
             m->intm_rtr_id = -1; //for legacy prog-adaptive
             assert(m->dfdally_src_terminal_id  == s->terminal_id);
-            assert(m->packet_ID                == start.packet_ID);
-            assert(m->dest_terminal_lpid       == start.dest_terminal_lpid);
-            assert(m->dfdally_dest_terminal_id == start.dfdally_dest_terminal_id);
-            //assert(m->travel_start_time        >= start.travel_start_time);
-            assert(m->packet_size              == start.packet_size);
+            assert(m->packet_ID                == sent.start.packet_ID);
+            assert(m->dest_terminal_lpid       == sent.start.dest_terminal_lpid);
+            assert(m->dfdally_dest_terminal_id == sent.start.dfdally_dest_terminal_id);
+            //assert(m->travel_start_time        >= sent.start.travel_start_time);
+            assert(m->packet_size              == sent.start.packet_size);
             tw_event_send(e);
 
-            //printf("NOTIFYING of zombie: packet dest id %d dest gid %d\n", start.dest_terminal_lpid, start.dfdally_dest_terminal_id);
+            //printf("NOTIFYING of zombie: packet dest id %d dest gid %d\n", sent.start.dest_terminal_lpid, sent.start.dfdally_dest_terminal_id);
             notify_dest_lp_of(s, lp, m, NOTIFY_ZOMBIE);
         }
 
         // Deallocating memory from packet_start
-        if (start.message_data) {
-            free(start.message_data);
+        if (sent.message_data) {
+            free(sent.message_data);
         }
-        if (start.remote_event_data) {
-            free(start.remote_event_data);
+        if (sent.remote_event_data) {
+            free(sent.remote_event_data);
         }
     }
-    assert(s->sent_packets_latency.empty());
+    assert(s->sent_packets.empty());
 
     // Hide current state and clean current state. Hidding the network information is in principle
     // the same as freezing the state of the network.
@@ -3070,9 +3045,10 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(
     s->total_msg_size               = frozen_state->total_msg_size;
     s->finished_msgs                = frozen_state->finished_msgs;
     s->rank_tbl_pop                 = frozen_state->rank_tbl_pop;
+    s->last_packet_sent_id          = frozen_state->last_packet_sent_id;
+    memcpy(&s->arrival_of_last_packet, &frozen_state->arrival_of_last_packet, sizeof(s->arrival_of_last_packet));
     memcpy(&s->zombies,              &frozen_state->zombies,              sizeof(s->zombies));
     memcpy(&s->sent_packets,         &frozen_state->sent_packets,         sizeof(s->sent_packets));
-    memcpy(&s->sent_packets_latency, &frozen_state->sent_packets_latency, sizeof(s->sent_packets_latency));
     memcpy(&s->remaining_sz_packets, &frozen_state->remaining_sz_packets, sizeof(s->remaining_sz_packets));
     memcpy(&s->rank_tbl,             &frozen_state->rank_tbl,             sizeof(s->rank_tbl));
     memcpy(&s->st,                   &frozen_state->st,                   sizeof(s->st));
@@ -3110,9 +3086,10 @@ static void dragonfly_dally_terminal_surrogate_to_highdef(
     frozen_state->total_msg_size               = s->total_msg_size;
     frozen_state->finished_msgs                = s->finished_msgs;
     frozen_state->rank_tbl_pop                 = s->rank_tbl_pop;
+    frozen_state->last_packet_sent_id          = s->last_packet_sent_id;
+    memcpy(&frozen_state->arrival_of_last_packet, &s->arrival_of_last_packet, sizeof(s->arrival_of_last_packet));
     memcpy(&frozen_state->zombies,              &s->zombies,              sizeof(s->zombies));
     memcpy(&frozen_state->sent_packets,         &s->sent_packets,         sizeof(s->sent_packets));
-    memcpy(&frozen_state->sent_packets_latency, &s->sent_packets_latency, sizeof(s->sent_packets_latency));
     memcpy(&frozen_state->remaining_sz_packets, &s->remaining_sz_packets, sizeof(s->remaining_sz_packets));
     memcpy(&frozen_state->rank_tbl,             &s->rank_tbl,             sizeof(s->rank_tbl));
     memcpy(&frozen_state->st,                   &s->st,                   sizeof(s->st));
@@ -3227,70 +3204,172 @@ static void router_handle_snapshot_event(router_state *s, tw_bf *bf, terminal_da
     }
 }
 
+static void terminal_commit_packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) {
+    if (!packet_latency_f && !surrogate_configured) {
+        return;
+    }
+
+    // Storing packet info of sent packet. Once packets arrive back, we can compute
+    // the latency of sending the packet
+    void * msg_data = malloc(sizeof(terminal_dally_message));
+    memcpy(msg_data, msg, sizeof(terminal_dally_message));
+    void * remote_data = NULL;
+    if (msg->remote_event_size_bytes) {
+        remote_data = malloc(msg->remote_event_size_bytes);
+        memcpy(remote_data, model_net_method_get_edata(DRAGONFLY_DALLY, msg), msg->remote_event_size_bytes);
+    }
+    double const processing_packet_delay = s->last_in_queue_time - msg->saved_last_in_queue_time;
+    s->sent_packets.insert({
+        msg->packet_ID,
+        (struct packet_sent) {
+            .start = (struct packet_start) {
+                .packet_ID = msg->packet_ID,
+                .dest_terminal_lpid = msg->dest_terminal_lpid,
+                .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
+                .travel_start_time = tw_now(lp),
+                .workload_injection_time = msg->msg_start_time,
+                .processing_packet_delay = processing_packet_delay,
+                .packet_size = msg->packet_size,
+                .is_there_another_pckt_in_queue = msg->is_there_another_pckt_in_queue,
+            },
+            .next_packet_delay = -1,
+            .message_data = msg_data,
+            .remote_event_data = remote_data
+        }
+    });
+
+    // Set next_packet_delay for the last past sent packet
+    if (s->sent_packets.count(s->last_packet_sent_id) == 1) {
+        assert(s->sent_packets[s->last_packet_sent_id].next_packet_delay == -1);
+        s->sent_packets[s->last_packet_sent_id].next_packet_delay = processing_packet_delay;
+    }
+    
+    // If we already received the (previous) last packet latency, we inject it now into the predictor
+    if (s->arrival_of_last_packet.packet_ID != -1) {
+        assert(s->arrival_of_last_packet.packet_ID == s->last_packet_sent_id);
+        assert(s->arrival_of_last_packet.travel_end_time > 0);
+
+        double const travel_end_time = s->arrival_of_last_packet.travel_end_time;
+        feed_packet_to_predictor(s, lp, s->arrival_of_last_packet.packet_ID, travel_end_time);
+        s->sent_packets.erase(s->arrival_of_last_packet.packet_ID);
+        s->arrival_of_last_packet.packet_ID = -1;
+    }
+}
+
 static void terminal_dally_commit(terminal_state * s,
 		tw_bf * bf, 
 		terminal_dally_message * msg, 
         tw_lp * lp)
 {
-    if(msg->type == T_BANDWIDTH)
-    {
-        if(msg->rc_is_qos_set == 1) {
-            free(msg->rc_qos_data);
-            free(msg->rc_qos_status);
-            msg->rc_is_qos_set = 0;
-        }
-    }
 
-    if(msg->type == T_ARRIVE)
-    {
-        if (OUTPUT_END_END_LATENCIES)
-        {
-            if (msg->message_id % OUTPUT_LATENCY_MODULO == 0) {
-                int written1;
-                char end_end_filename[128];
-                written1 = sprintf(end_end_filename, "end-to-end-latency-hops");
-                end_end_filename[written1] = '\0';
-
-                char latency[32];
-                int written;
-                tw_stime lat = msg->travel_end_time-msg->travel_start_time;
-                written = sprintf(latency, "%d %.5f %d\n",msg->app_id, msg->travel_end_time-msg->travel_start_time,msg->my_N_hop);
-                lp_io_write(lp->gid, end_end_filename, written, latency);
+    switch (msg->type) {
+        case T_GENERATE:
+            if(bf->c10) {  // if the packet was sent as a prediction, store the prediction in memory
+                assert(surrogate_configured);
+                auto start = (struct packet_start) {
+                    .packet_ID = msg->packet_ID,
+                    .dest_terminal_lpid = msg->dest_terminal_lpid,
+                    .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
+                    .travel_start_time = msg->travel_start_time,
+                    .workload_injection_time = msg->msg_start_time,
+                    .processing_packet_delay = -1,
+                    .packet_size = msg->packet_size,
+                    .is_there_another_pckt_in_queue = msg->is_there_another_pckt_in_queue
+                };
+
+                // Saving
+                auto end = (struct packet_end) {
+                    .travel_end_time = msg->travel_end_time,
+                    .next_packet_delay = msg->saved_next_packet_delay,
+                };
+                packet_latency_save_to_file(s->terminal_id, &start, &end, is_surrogate_on, true);
+
+                // If we had latency info for the last packet transmitted, then we have to store it into memory and clean the variable
+                if (s->arrival_of_last_packet.packet_ID != -1) {
+                    assert(s->arrival_of_last_packet.packet_ID == s->last_packet_sent_id);
+                    assert(s->arrival_of_last_packet.travel_end_time > 0);
+
+                    auto sent = s->sent_packets[s->arrival_of_last_packet.packet_ID];
+
+                    struct packet_end end = {
+                        .travel_end_time = s->arrival_of_last_packet.travel_end_time,
+                        .next_packet_delay = -1,
+                    };
+
+                    packet_latency_save_to_file(s->terminal_id, &sent.start, &end, is_surrogate_on, false);
+
+                    s->sent_packets.erase(s->arrival_of_last_packet.packet_ID);
+                    s->arrival_of_last_packet.packet_ID = -1;
+                }
+
+            // If the packet info is to be stored in memory to compute terminal delay
+            } else {
+                terminal_commit_packet_generate(s, bf, msg, lp);
             }
-        }
-    }
 
-    if(msg->type == T_GENERATE && bf->c10) {  // if the packet was sent as a prediction, store the prediction in memory
-        auto start = (struct packet_start) {
-            .packet_ID = msg->packet_ID,
-            .dest_terminal_lpid = msg->dest_terminal_lpid,
-            .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
-            .travel_start_time = msg->travel_start_time,
-            .workload_injection_time = msg->msg_start_time,
-            .processing_packet_delay = -1,
-            .packet_size = msg->packet_size,
-            .is_there_another_pckt_in_queue = msg->is_there_another_pckt_in_queue
-        };
+            assert(msg->packet_ID - 1 == s->last_packet_sent_id);
+            s->last_packet_sent_id = msg->packet_ID;
+        break;
 
-        // Saving
-        auto const end = (struct packet_end) {
-            .travel_end_time = msg->travel_end_time,
-            .next_packet_delay = msg->saved_next_packet_delay,
-        };
-        packet_latency_save_to_file(s->terminal_id, start, end, is_surrogate_on, true);
-    }
+        case T_ARRIVE:
+            if (OUTPUT_END_END_LATENCIES) {
+                if (msg->message_id % OUTPUT_LATENCY_MODULO == 0) {
+                    int written1;
+                    char end_end_filename[128];
+                    written1 = sprintf(end_end_filename, "end-to-end-latency-hops");
+                    end_end_filename[written1] = '\0';
 
-    if(msg->type == T_NOTIFY && msg->notify_type == NOTIFY_LATENCY)
-    {
-        assert(lp->gid == msg->src_terminal_id);
-        assert(s->terminal_id == msg->dfdally_src_terminal_id);
-        if (!s->sent_packets.empty() && s->sent_packets.front().packet_ID <= msg->packet_ID) {
-            s->sent_packets_latency.push({
-                    .packet_ID = msg->packet_ID,
-                    .value = msg->travel_end_time});
+                    char latency[32];
+                    int written;
+                    tw_stime lat = msg->travel_end_time-msg->travel_start_time;
+                    written = sprintf(latency, "%d %.5f %d\n",msg->app_id, msg->travel_end_time-msg->travel_start_time,msg->my_N_hop);
+                    lp_io_write(lp->gid, end_end_filename, written, latency);
+                }
+            }
+        break;
 
-            process_packet_latencies(s, lp);
-        }
+        case T_ARRIVE_PREDICTED:
+        break;
+        
+        case T_SEND:
+        break;
+        
+        case T_BUFFER:
+        break;
+    
+        case T_BANDWIDTH:
+            if(msg->rc_is_qos_set == 1) {
+                free(msg->rc_qos_data);
+                free(msg->rc_qos_status);
+                msg->rc_is_qos_set = 0;
+            }
+        break;
+    
+        case T_NOTIFY:
+            if(msg->notify_type == NOTIFY_LATENCY) {
+                assert(lp->gid == msg->src_terminal_id);
+                assert(s->terminal_id == msg->dfdally_src_terminal_id);
+                int64_t packet_ID = msg->packet_ID;
+
+                if (s->sent_packets.count(packet_ID) == 1) { // packet_ID is in s->sent_packets
+                    if (packet_ID == s->last_packet_sent_id) { // packet_ID is last, we cannot compute the next_packet_delay
+                        assert(s->arrival_of_last_packet.packet_ID == -1);
+                        s->arrival_of_last_packet.packet_ID = packet_ID;
+                        s->arrival_of_last_packet.travel_end_time = msg->travel_end_time;
+                    } else {
+                        feed_packet_to_predictor(s, lp, packet_ID, msg->travel_end_time);
+                        s->sent_packets.erase(packet_ID);
+                    }
+                }
+            }
+        break;
+
+        case T_VACUOUS_EVENT:
+        break;
+
+        default:
+            printf("\n LP %d Terminal message type not supported %d ", (int)lp->gid, msg->type);
+            tw_error(TW_LOC, "Msg type not supported");
     }
 }
 
@@ -3504,9 +3583,11 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp )
     // In the future calling the constructor could be done with:
     // std::construct_at, for now this syntax suffices and works
     // (see https://en.cppreference.com/w/cpp/memory/construct_at)
-    new (&s->sent_packets) deque<struct packet_start>();
-    new (&s->sent_packets_latency) priority_queue<struct packet_double_val, vector<struct packet_double_val>, decltype(packet_double_val_greater_cmp)>();
-    new (&s->remaining_sz_packets) set<struct packet_id, uint32_t>();
+    s->last_packet_sent_id = -1;
+    s->arrival_of_last_packet.packet_ID = -1;
+    s->arrival_of_last_packet.travel_end_time = -1;
+    new (&s->sent_packets) map<int64_t, struct packet_sent>();
+    new (&s->remaining_sz_packets) map<struct packet_id, uint32_t>();
     new (&s->zombies) set<struct packet_id>();
     s->frozen_state = NULL;
 
@@ -3959,12 +4040,6 @@ static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_me
     s->packet_counter--;
 
     s->last_in_queue_time = msg->saved_last_in_queue_time;
-    struct packet_start start = s->sent_packets.back();
-    if (start.remote_event_data) {
-        free(start.remote_event_data);
-    }
-    free(start.message_data);
-    s->sent_packets.pop_back();
 
     if(bf->c2)
         num_local_packets_sr--;
@@ -4235,31 +4310,10 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
     msg->my_g_hop = 0;
     msg->my_hops_cur_group = 0;
 
-    // Storing packet info to be sent. Once packets arrive back, we can compute
-    // the latency of sending the packet
-    void * msg_data = malloc(sizeof(terminal_dally_message));
-    memcpy(msg_data, msg, sizeof(terminal_dally_message));
-    void * remote_data = NULL;
-    if (msg->remote_event_size_bytes) {
-        remote_data = malloc(msg->remote_event_size_bytes);
-        memcpy(remote_data, model_net_method_get_edata(DRAGONFLY_DALLY, msg), msg->remote_event_size_bytes);
-    }
     //assert(tw_now(lp) == msg->travel_start_time);
-    double const processing_packet_delay = tw_now(lp) - s->last_in_queue_time;
+    // This is to be later used to determine 
     msg->saved_last_in_queue_time = s->last_in_queue_time;
     s->last_in_queue_time = tw_now(lp);
-    s->sent_packets.push_back((struct packet_start){
-        .packet_ID = msg->packet_ID,
-        .dest_terminal_lpid = msg->dest_terminal_lpid,
-        .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
-        .travel_start_time = tw_now(lp),
-        .workload_injection_time = msg->msg_start_time,
-        .processing_packet_delay = processing_packet_delay,
-        .packet_size = msg->packet_size,
-        .is_there_another_pckt_in_queue = msg->is_there_another_pckt_in_queue,
-        .message_data = msg_data,
-        .remote_event_data = remote_data
-        });
 
     //qos stuff
     int num_qos_levels = s->params->num_qos_levels;
@@ -5480,40 +5534,44 @@ static void dragonfly_dally_terminal_final( terminal_state * s,
     lp_io_write(lp->gid, (char*)"dragonfly-cn-stats", written, s->output_buf2); 
 
     if (packet_latency_f) {
-        // Storing the missing packets into io file
-        while(!s->sent_packets.empty()) {
-            struct packet_start start = s->sent_packets.front();
-            s->sent_packets.pop_front();
-            assert(start.message_data);
+        // If the last packet transmitted actually received a latency notification (was delievered)
+        if (s->arrival_of_last_packet.packet_ID != -1) {
+            auto sent = s->sent_packets[s->arrival_of_last_packet.packet_ID];
+            assert(s->sent_packets.count(s->arrival_of_last_packet.packet_ID) == 1); // packet_ID is in s->sent_packets
+            assert(sent.next_packet_delay < 0); // next_packet_delay is -1
 
+            double const travel_end_time = s->arrival_of_last_packet.travel_end_time;
             struct packet_end end = {
-                .travel_end_time = -1,
+                .travel_end_time = travel_end_time,
                 .next_packet_delay = -1,
             };
 
-            // The packet was delievered and its latency is known (we were notified)
-            if (!s->sent_packets_latency.empty()
-                    && start.packet_ID == s->sent_packets_latency.top().packet_ID)
-            {
-                auto const latency_q = s->sent_packets_latency.top();
-                s->sent_packets_latency.pop();
+            packet_latency_save_to_file(s->terminal_id, &sent.start, &end, false, false);
 
-                end.travel_end_time = latency_q.value;
+            // Deallocating memory from packet_start
+            if (sent.message_data) { free(sent.message_data); }
+            if (sent.remote_event_data) { free(sent.remote_event_data); }
 
-                if (s->sent_packets.size() >= 2) {
-                    end.next_packet_delay = s->sent_packets[1].processing_packet_delay;
-                }
+            s->sent_packets.erase(s->arrival_of_last_packet.packet_ID);
+            s->arrival_of_last_packet.packet_ID = -1;
+        }
 
-                packet_latency_save_to_file(s->terminal_id, start, end, false, false);
-            }
-            // The packet has not been delievered yet (that we know of)
-            else {
-                packet_latency_save_to_file(s->terminal_id, start, end, false, false);
-            }
+        // Storing all other missing packets into io file (deleting all elements from s->sent_packets as we go)
+        for (auto it = s->sent_packets.begin(); it != s->sent_packets.end(); it = s->sent_packets.erase(it)) {
+            auto& sent = it->second;
+            int64_t packet_ID = it->first;
+            assert(sent.message_data);
+
+            struct packet_end end = {
+                .travel_end_time = -1,
+                .next_packet_delay = sent.next_packet_delay,
+            };
+
+            packet_latency_save_to_file(s->terminal_id, &sent.start, &end, false, false);
 
             // Deallocating memory from packet_start
-            if (start.message_data) { free(start.message_data); }
-            if (start.remote_event_data) { free(start.remote_event_data); }
+            if (sent.message_data) { free(sent.message_data); }
+            if (sent.remote_event_data) { free(sent.remote_event_data); }
         }
     }
 
@@ -5546,12 +5604,11 @@ static void dragonfly_dally_terminal_final( terminal_state * s,
     }
     printf("]\n");
 #endif
-    for (auto&& start: s->sent_packets) {
-        if (start.message_data) { free(start.message_data); }
-        if (start.remote_event_data) { free(start.remote_event_data); }
+    for (auto&& kv: s->sent_packets) {
+        if (kv.second.message_data) { free(kv.second.message_data); }
+        if (kv.second.remote_event_data) { free(kv.second.remote_event_data); }
     }
-    s->sent_packets.~deque();
-    s->sent_packets_latency.~priority_queue();
+    s->sent_packets.~map();
     s->remaining_sz_packets.~map();
 
     if (s->predictor_data) {

From 0b53bb845373dd097d6435f33faf6b4005f15754 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 2 Nov 2023 10:28:09 -0400
Subject: [PATCH 060/188] Small fixes to allow more experiment configurations
 to run

---
 src/networks/model-net/dragonfly-dally.C      | 56 +++++++++----------
 .../packet-latency-predictor/average.c        |  2 +-
 2 files changed, 28 insertions(+), 30 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index a2676fcd..764eb066 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -490,7 +490,7 @@ struct packet_sent {
 };
 
 struct packet_id {
-    int64_t packet_ID;
+    uint64_t packet_ID;
     unsigned int dfdally_src_terminal_id;
 };
 bool operator<(struct packet_id const &lk, struct packet_id const &rk) {
@@ -595,11 +595,11 @@ struct terminal_state
 
     // Variables to recover latency of packets sent to other terminals
     // Sent packets (to be populated at by commit handler of packet sender)
-    map<int64_t, struct packet_sent> sent_packets;
-    int64_t last_packet_sent_id;
+    map<uint64_t, struct packet_sent> sent_packets;
+    uint64_t last_packet_sent_id;
     // We need the next packet to be injected in the network before feeding the packet info forward (the predictor needs starting time, delay to send next packet and latency)
     struct {
-        int64_t packet_ID;
+        uint64_t packet_ID;
         double travel_end_time;
     } arrival_of_last_packet;
     // received (and not completed, yet) packets. The value associated to a key is the remaining number of "bytes" to receive before the packet is consumed totally. If a packet size == chunk size, this map will never be used/filled
@@ -2883,7 +2883,7 @@ static bool is_surrogate_on_fun(void) {
     return is_surrogate_on;
 }
 
-static void feed_packet_to_predictor(terminal_state * s, tw_lp * lp, int64_t packet_ID, double end_time) {
+static void feed_packet_to_predictor(terminal_state * s, tw_lp * lp, uint64_t packet_ID, double end_time) {
     assert(s->sent_packets.count(packet_ID) == 1); // packet_ID is in s->sent_packets
     auto sent = s->sent_packets[packet_ID];
     struct packet_end end = {
@@ -2909,10 +2909,10 @@ static void feed_packet_to_predictor(terminal_state * s, tw_lp * lp, int64_t pac
 // Constructs a hashmap with all the T_NOTIFY events to be processed.
 // The key of the list is the GID for the source terminal. The value of the
 // hash is the end time
-static map<int64_t, double> construct_map_of_NOTIFY_LATENCY_events(
+static map<uint64_t, double> construct_map_of_NOTIFY_LATENCY_events(
         tw_lp * lp, tw_event ** const terminal_events) {
     // hash map to store T_NOTIFY events found (`packet_ID` and `travel_end_time`)
-    map<int64_t, double> notification_events_map;
+    map<uint64_t, double> notification_events_map;
 
     for (size_t i = 0; terminal_events && terminal_events[i] != NULL; i++) {
         assert(terminal_events[i]->dest_lpid == lp->gid);
@@ -2953,7 +2953,7 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(
     // notify of its zombie status.
     // (deleting all elements from s->sent_packets as we go)
     for (auto it = s->sent_packets.begin(); it != s->sent_packets.end(); it = s->sent_packets.erase(it)) {
-        int64_t packet_ID = it->first;
+        uint64_t packet_ID = it->first;
         auto & sent = it->second;
 
         assert(packet_ID == sent.start.packet_ID);
@@ -3219,24 +3219,22 @@ static void terminal_commit_packet_generate(terminal_state * s, tw_bf * bf, term
         memcpy(remote_data, model_net_method_get_edata(DRAGONFLY_DALLY, msg), msg->remote_event_size_bytes);
     }
     double const processing_packet_delay = s->last_in_queue_time - msg->saved_last_in_queue_time;
-    s->sent_packets.insert({
-        msg->packet_ID,
-        (struct packet_sent) {
-            .start = (struct packet_start) {
-                .packet_ID = msg->packet_ID,
-                .dest_terminal_lpid = msg->dest_terminal_lpid,
-                .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id,
-                .travel_start_time = tw_now(lp),
-                .workload_injection_time = msg->msg_start_time,
-                .processing_packet_delay = processing_packet_delay,
-                .packet_size = msg->packet_size,
-                .is_there_another_pckt_in_queue = msg->is_there_another_pckt_in_queue,
-            },
-            .next_packet_delay = -1,
-            .message_data = msg_data,
-            .remote_event_data = remote_data
-        }
-    });
+
+    // TODO (elkin): In the future, this ugly initialization could be done all in a single "line" instead of setting all values one by one. The reason to do it this way is because some old compilers do not understand other ways of initializing
+    struct packet_sent sent;
+    sent.start.packet_ID = msg->packet_ID;
+    sent.start.dest_terminal_lpid = msg->dest_terminal_lpid;
+    sent.start.dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id;
+    sent.start.travel_start_time = tw_now(lp);
+    sent.start.workload_injection_time = msg->msg_start_time;
+    sent.start.processing_packet_delay = processing_packet_delay;
+    sent.start.packet_size = msg->packet_size;
+    sent.start.is_there_another_pckt_in_queue = msg->is_there_another_pckt_in_queue;
+    sent.next_packet_delay = -1;
+    sent.message_data = msg_data;
+    sent.remote_event_data = remote_data;
+
+    s->sent_packets[msg->packet_ID] = sent;
 
     // Set next_packet_delay for the last past sent packet
     if (s->sent_packets.count(s->last_packet_sent_id) == 1) {
@@ -3349,7 +3347,7 @@ static void terminal_dally_commit(terminal_state * s,
             if(msg->notify_type == NOTIFY_LATENCY) {
                 assert(lp->gid == msg->src_terminal_id);
                 assert(s->terminal_id == msg->dfdally_src_terminal_id);
-                int64_t packet_ID = msg->packet_ID;
+                uint64_t packet_ID = msg->packet_ID;
 
                 if (s->sent_packets.count(packet_ID) == 1) { // packet_ID is in s->sent_packets
                     if (packet_ID == s->last_packet_sent_id) { // packet_ID is last, we cannot compute the next_packet_delay
@@ -3586,7 +3584,7 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp )
     s->last_packet_sent_id = -1;
     s->arrival_of_last_packet.packet_ID = -1;
     s->arrival_of_last_packet.travel_end_time = -1;
-    new (&s->sent_packets) map<int64_t, struct packet_sent>();
+    new (&s->sent_packets) map<uint64_t, struct packet_sent>();
     new (&s->remaining_sz_packets) map<struct packet_id, uint32_t>();
     new (&s->zombies) set<struct packet_id>();
     s->frozen_state = NULL;
@@ -5559,7 +5557,7 @@ static void dragonfly_dally_terminal_final( terminal_state * s,
         // Storing all other missing packets into io file (deleting all elements from s->sent_packets as we go)
         for (auto it = s->sent_packets.begin(); it != s->sent_packets.end(); it = s->sent_packets.erase(it)) {
             auto& sent = it->second;
-            int64_t packet_ID = it->first;
+            uint64_t packet_ID = it->first;
             assert(sent.message_data);
 
             struct packet_end end = {
diff --git a/src/surrogate/packet-latency-predictor/average.c b/src/surrogate/packet-latency-predictor/average.c
index 60dc3441..88f084d3 100644
--- a/src/surrogate/packet-latency-predictor/average.c
+++ b/src/surrogate/packet-latency-predictor/average.c
@@ -36,7 +36,7 @@ static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int
     (void) lp;
     (void) src_terminal;
 
-    if (start->travel_start_time < ignore_until) {
+    if (end->travel_end_time < ignore_until) {
         return;
     }
 

From 2f23503d21b444b159ec07ed75cfb3bfd87b6f8a Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 2 Nov 2023 11:25:47 -0400
Subject: [PATCH 061/188] Fixing double deallocation (free())

---
 src/networks/model-net/dragonfly-dally.C | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 764eb066..ff85fe35 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -3008,14 +3008,14 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(
 
             //printf("NOTIFYING of zombie: packet dest id %d dest gid %d\n", sent.start.dest_terminal_lpid, sent.start.dfdally_dest_terminal_id);
             notify_dest_lp_of(s, lp, m, NOTIFY_ZOMBIE);
-        }
 
-        // Deallocating memory from packet_start
-        if (sent.message_data) {
-            free(sent.message_data);
-        }
-        if (sent.remote_event_data) {
-            free(sent.remote_event_data);
+            // Deallocating memory from packet_start
+            if (sent.message_data) {
+                free(sent.message_data);
+            }
+            if (sent.remote_event_data) {
+                free(sent.remote_event_data);
+            }
         }
     }
     assert(s->sent_packets.empty());

From 46d72f861e93f7e457c9bd622a2941921165bca5 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 3 Nov 2023 11:44:15 -0400
Subject: [PATCH 062/188] Updating scripts to check output data (packet latency
 / port occupancy) from experiments

---
 ...{delay-in-window.py => delay_in_window.py} |  19 +-
 .../python-scripts/plot-packet-latency.py     | 292 ++++++++----------
 .../python-scripts/port-occupancy.py          | 152 ++++-----
 scripts/reproducibility-pads23/reproduce.sh   |   4 +-
 .../determine_mean_std.py                     |  23 +-
 5 files changed, 226 insertions(+), 264 deletions(-)
 rename scripts/reproducibility-pads23/python-scripts/{delay-in-window.py => delay_in_window.py} (90%)

diff --git a/scripts/reproducibility-pads23/python-scripts/delay-in-window.py b/scripts/reproducibility-pads23/python-scripts/delay_in_window.py
similarity index 90%
rename from scripts/reproducibility-pads23/python-scripts/delay-in-window.py
rename to scripts/reproducibility-pads23/python-scripts/delay_in_window.py
index b59061de..407d54ce 100644
--- a/scripts/reproducibility-pads23/python-scripts/delay-in-window.py
+++ b/scripts/reproducibility-pads23/python-scripts/delay_in_window.py
@@ -19,15 +19,19 @@ def collect_data_numpy(
     filepreffix: str,
     delimiter: str | None = None,
     dtype: Any = int
-) -> np.ndarray[Any, Any]:
+) -> tuple[list[str], np.ndarray[Any, Any]]:
     escaped_path = pathlib.Path(glob.escape(path))  # type: ignore
     stat_files = glob.glob(str(escaped_path / f"{filepreffix}-gid=*.txt"))
     if not stat_files:
         print(f"No valid `{filepreffix}` files have been found in path {path}", file=sys.stderr)
         exit(1)
 
-    return np.loadtxt(fileinput.input(stat_files), delimiter=delimiter, dtype=dtype,
+    data = np.loadtxt(fileinput.input(stat_files), delimiter=delimiter, dtype=dtype,
                       comments='#')
+    with open(stat_files[0], 'r') as f:
+        header = f.readline()[1:].split(',')
+
+    return header, data
 
 
 def mean_and_std(array: ndarray) -> tuple[float, float]:
@@ -95,10 +99,11 @@ def find_mean_and_std_through_window(
     if computing:
         if raw_data:
             # Columns within the csv file that matter to us
-            start_time_col = 8
-            delay_col = 10
-            delays = collect_data_numpy(args.latencies, 'packets-delay', delimiter=',',
-                                        dtype=np.dtype('float'))
+            header, delays = collect_data_numpy(
+                args.latencies, 'packets-delay', delimiter=',',
+                dtype=np.dtype('float'))
+            start_time_col = header.index('start')
+            delay_col = header.index('latency')
         else:
             start_time_col = 8
             delay_col = 9
@@ -135,4 +140,4 @@ def find_mean_and_std_through_window(
 
     if plotting:
         plt.errorbar(windows, means, yerr=.2*stds)
-        plt.show()
+        plt.show()  # type: ignore
diff --git a/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py b/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
index fe7b424e..cb397a33 100644
--- a/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
+++ b/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
@@ -2,175 +2,128 @@
 
 import argparse
 import pathlib
+import sys
 
 import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib
 from matplotlib.ticker import EngFormatter
 
+from delay_in_window import collect_data_numpy, find_mean_and_std_through_window
+
 
 time_formatter_ns = EngFormatter()
 time_formatter_ns.ENG_PREFIXES = {0: 'ns', 3: 'us', 6: 'ms', 9: 's'}
 
 
-if True and __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--latencies', type=pathlib.Path, help='Folder with condensed latencies',
-                        required=True)
-    parser.add_argument('--output', type=pathlib.Path, help='Name of output figure',
-                        required=True)
-    args = parser.parse_args()
+if __name__ == '__main__':
+    this_binary = sys.argv[0]
+    commands = {
+        'plotfromraw': 'Generates a single packet-latency plot given the raw latency data',
+        'plotfromzip': 'Generates a single packet-latency plot given a zipped file (NPZ).'
+                       ' (npz file geterated by delay_in_window.py)',
+        'pads23': 'Generates plot that appears on PADS23 paper'
+    }
+    parser = argparse.ArgumentParser(
+        usage=f'{this_binary} <command> [<args>]\n\n'
+        'The available commands are:\n'
+        + '\n'.join(f'  {cmd}\t {desc}' for cmd, desc in commands.items()))
+    parser.add_argument('command', help='Subcommand to run')
+    main_args = parser.parse_args(sys.argv[1:2])
 
-    latex = True
+    if main_args.command not in commands:
+        print("Unrecognized command:", main_args.command, file=sys.stderr)
+        exit(1)
 
-    if latex:
-        matplotlib.use("pgf")
-        matplotlib.rcParams.update({
-            "pgf.texsystem": "pdflatex",
-            'font.family': 'serif',
-            'font.size': 16,
-            'text.usetex': True,
-            'pgf.rcfonts': False,
-        })
 
-    data_high_fidelity = np.load(f"{args.latencies}/packet_latency-high-fidelity.npz")
-    data_hybrid = np.load(f"{args.latencies}/packet_latency-hybrid.npz")
-    data_hybrid_lite = np.load(f"{args.latencies}/packet_latency-hybrid-lite.npz")
+if main_args.command == 'plotfromraw':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--latencies-dir', type=pathlib.Path, required=True,
+                        help='Folder with raw latency data')
+    parser.add_argument('--windows', type=int, help='Total windows to break simulation in',
+                        default=100)
+    parser.add_argument('--end', type=float, help='Total (virtual) simulation time',
+                        required=True)
+    parser.add_argument('--std-factor', type=float, default=0.2,
+                        help='Size of variance to show as an std factor')
+    args = parser.parse_args(sys.argv[2:])
 
-    windows_hf, means_hf, stds_hf = \
-        data_high_fidelity['windows'], data_high_fidelity['means'], data_high_fidelity['stds']
-    windows_hybrid, means_hybrid, stds_hybrid = \
-        data_hybrid['windows'], data_hybrid['means'], data_hybrid['stds']
-    windows_hybrid_lite, means_hybrid_lite, stds_hybrid_lite = \
-        data_hybrid_lite['windows'], data_hybrid_lite['means'], data_hybrid_lite['stds']
+    std_factor = args.std_factor
 
-    assert np.all(windows_hf == windows_hybrid)
-    n_windows = windows_hf.shape[0]
-    windows_hybrid_lite = windows_hybrid_lite[:n_windows]
-    means_hybrid_lite = means_hybrid_lite[:n_windows]
-    stds_hybrid_lite = stds_hybrid_lite[:n_windows]
-    assert np.all(windows_hybrid_lite == windows_hybrid)
+    header, delays = collect_data_numpy(args.latencies_dir, 'packets-delay', delimiter=',',
+                                        dtype=np.dtype('float'))
 
-    std_factor = 0.2
+    # Cleaning data
+    next_packet_delay_col = header.index('next_packet_delay')
+    delays = delays[delays[:, next_packet_delay_col] > 0]
 
-    fig, ax = plt.subplots(figsize=(7, 3.8))
-    ax.vlines = ax.vlines([2e6, 3e6, 8e6], -3e3, 125e3, color='#AAA', ls='-')
-    ax.vlines.set_clip_on(False)
+    delay_col = header.index('latency')
+    windows, means, stds = find_mean_and_std_through_window(
+        delays, n_windows=args.windows, delay_col=delay_col, end_time=args.end)
 
-    arrow_color = {'arrowprops': dict(arrowstyle="->", color='#AAA'), 'color': '#333'}
-    ax.annotate("", xy=(1.95e6, 80e3), xytext=(1.0e6, 98e3), **arrow_color)
-    ax.annotate("switch", xy=(3.1e6, 118e3), xytext=(4.8e6, 105e3), **arrow_color)
-    ax.annotate("", xy=(7.9e6, 118e3), xytext=(6.0e6, 110e3), **arrow_color)
-    ax.text(1.9e6, 1e5, "start\ntracking", color='#333', ha='right')
+    fig, ax = plt.subplots()
 
-    # plt.errorbar(windows_hf, means_hf, yerr=std_factor*stds_hf)
-    # plt.errorbar(windows_hybrid, means_hybrid, yerr=std_factor*stds_hybrid)
-    # plt.errorbar(windows_hybrid_lite, means_hybrid_lite,
-    #              yerr=std_factor*stds_hybrid_lite)
-    ax.plot(windows_hf, means_hf, label='high-fidelity only')
-    ax.fill_between(windows_hf,
-                    means_hf - std_factor*stds_hybrid,
-                    means_hf + std_factor*stds_hybrid,
+    # plt.errorbar(windows, means, yerr=std_factor*stds)
+    ax.plot(windows, means, label='high-fidelity only')
+    ax.fill_between(windows,
+                    means - std_factor*stds,
+                    means + std_factor*stds,
                     color='#00F5')
-    ax.plot(windows_hybrid_lite, means_hybrid_lite, label='hybrid-lite')
-    ax.fill_between(windows_hybrid_lite,
-                    means_hybrid_lite - std_factor*stds_hybrid,
-                    means_hybrid_lite + std_factor*stds_hybrid,
-                    color='#F005')
-    ax.plot(windows_hybrid, means_hybrid, label='hybrid')
-    ax.fill_between(windows_hybrid,
-                    means_hybrid - std_factor*stds_hybrid,
-                    means_hybrid + std_factor*stds_hybrid,
-                    color='#0F05')
-
-    # ax.text(2e6, 125e3, "start latency tracking", color='#333', rotation=40,
-    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
-    # ax.text(3e6, 125e3, "switch to surrogate", color='#333', rotation=40,
-    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
-    # ax.text(8e6, 130e3, "switch to\nhigh-definition", color='#333', rotation=40,
-    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
 
     ax.set_xlabel('Virtual time')
     ax.set_ylabel('Average Packet Latency')
-    ax.set_ylim(0, 122e3)
-    ax.legend(bbox_to_anchor=(.54, .02), loc='lower center', borderaxespad=0)
     ax.yaxis.set_major_formatter(time_formatter_ns)
     ax.xaxis.set_major_formatter(time_formatter_ns)
 
-    n = means_hf[80:].shape[0]
-    mse_hybrid_lite = \
-        np.sum((means_hf[80:] - means_hybrid_lite[80:])**2) / n
-    mse_hybrid = \
-        np.sum((means_hf[80:] - means_hybrid[80:])**2) / n
-    print("Mean squared error (MSE) for hybrid:", mse_hybrid, "ns^2")
-    print("Mean squared error (MSE) for hybrid-lite:", mse_hybrid_lite, "ns^2")
-
-    if latex:
-        plt.tight_layout()
-        plt.savefig(f'{args.output}.pgf', bbox_inches='tight')
-        plt.savefig(f'{args.output}.pdf', bbox_inches='tight')
-    else:
-        plt.show()
+    plt.show()  # type: ignore
 
 
-if False and __name__ == '__main__':
-    data_high_fidelity = \
-        np.load("data/vanilla-synthetic1-100ms_windowed_packet_latency_all.npz")
-    data_hybrid = \
-        np.load("data/surrogate-freezing-synthetic1-100ms_windowed_packet_latency_all.npz")
-    data_hybrid_lite = \
-        np.load("data/surrogate-nonfrozen-synthetic1-100ms_windowed_packet_latency_all.npz")
-
-    windows_hf, means_hf, stds_hf = \
-        data_high_fidelity['windows'], data_high_fidelity['means'], data_high_fidelity['stds']
-    windows_hybrid, means_hybrid, stds_hybrid = \
-        data_hybrid['windows'], data_hybrid['means'], data_hybrid['stds']
-    windows_hybrid_lite, means_hybrid_lite, stds_hybrid_lite = \
-        data_hybrid_lite['windows'], data_hybrid_lite['means'], data_hybrid_lite['stds']
+if main_args.command == 'plotfromzip':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--latencies', type=pathlib.Path, required=True,
+                        help='NPZ file containing packet-latency data')
+    parser.add_argument('--std-factor', type=float, default=0.2,
+                        help='Size of variance to show as an std factor')
+    args = parser.parse_args(sys.argv[2:])
 
-    assert np.all(windows_hf == windows_hybrid) \
-        and np.all(windows_hybrid_lite == windows_hybrid)
+    std_factor = args.std_factor
 
-    std_factor = 0.2
+    data_npz = np.load(args.latencies)
+    windows, means, stds = data_npz['windows'], data_npz['means'], data_npz['stds']
 
-    fig, ax = plt.subplots(figsize=(7, 6))
-    # ax.vlines = ax.vlines([2e6, 3e6, 8e6], -3e3, 125e3, color='#AAA', ls='-')
-    # ax.vlines.set_clip_on(False)
+    fig, ax = plt.subplots()
 
-    ax.plot(windows_hf, means_hf, label='high-fidelity only')
-    ax.fill_between(windows_hf,
-                    means_hf - std_factor*stds_hybrid,
-                    means_hf + std_factor*stds_hybrid,
+    # plt.errorbar(windows, means, yerr=std_factor*stds)
+    ax.plot(windows, means, label='high-fidelity only')
+    ax.fill_between(windows,
+                    means - std_factor*stds,
+                    means + std_factor*stds,
                     color='#00F5')
-    ax.plot(windows_hybrid_lite, means_hybrid_lite, label='hybrid-lite')
-    ax.fill_between(windows_hybrid_lite,
-                    means_hybrid_lite - std_factor*stds_hybrid,
-                    means_hybrid_lite + std_factor*stds_hybrid,
-                    color='#F005')
-    ax.plot(windows_hybrid, means_hybrid, label='hybrid')
-    ax.fill_between(windows_hybrid,
-                    means_hybrid - std_factor*stds_hybrid,
-                    means_hybrid + std_factor*stds_hybrid,
-                    color='#0F05')
 
+    ax.set_xlabel('Virtual time')
+    ax.set_ylabel('Average Packet Latency')
     ax.yaxis.set_major_formatter(time_formatter_ns)
     ax.xaxis.set_major_formatter(time_formatter_ns)
 
-    n = means_hf[90:].shape[0]
-    mse_hybrid_lite = \
-        np.sum((means_hf[90:] - means_hybrid_lite[90:])**2) / n
-    mse_hybrid = \
-        np.sum((means_hf[90:] - means_hybrid[90:])**2) / n
-    print("Mean squared error (MSE) for hybrid:", mse_hybrid, "ns^2")
-    print("Mean squared error (MSE) for hybrid-lite:", mse_hybrid_lite, "ns^2")
+    plt.show()  # type: ignore
 
-    plt.show()
 
+if main_args.command == 'pads23':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--latencies', type=pathlib.Path, help='Folder with condensed latencies',
+                        required=True)
+    parser.add_argument('--output', type=pathlib.Path, help='Name of output figure',
+                        default=None)
+    parser.add_argument('--std-factor', type=float, default=0.2,
+                        help='Size of variance to show as an std factor')
+    parser.add_argument('--started-tracking', type=float, default=2e6)
+    parser.add_argument('--switch', type=float, default=3e6)
+    parser.add_argument('--switch-back', type=float, default=8e6)
+    args = parser.parse_args(sys.argv[2:])
 
-if False and __name__ == '__main__':
-    latex = True
+    std_factor = args.std_factor
 
-    if latex:
+    if args.output:
         matplotlib.use("pgf")
         matplotlib.rcParams.update({
             "pgf.texsystem": "pdflatex",
@@ -180,12 +133,9 @@
             'pgf.rcfonts': False,
         })
 
-    data_high_fidelity = \
-        np.load("data/vanilla-ping-pong-10ms_windowed_packet_latency_all.npz")
-    data_hybrid = \
-        np.load("data/surrogate-freezing-ping-pong-10ms_windowed_packet_latency_all.npz")
-    data_hybrid_lite = \
-        np.load("data/surrogate-nonfrozen-ping-pong-10ms_windowed_packet_latency_all.npz")
+    data_high_fidelity = np.load(f"{args.latencies}/packet_latency-high-fidelity.npz")
+    data_hybrid = np.load(f"{args.latencies}/packet_latency-hybrid.npz")
+    data_hybrid_lite = np.load(f"{args.latencies}/packet_latency-hybrid-lite.npz")
 
     windows_hf, means_hf, stds_hf = \
         data_high_fidelity['windows'], data_high_fidelity['means'], data_high_fidelity['stds']
@@ -194,25 +144,23 @@
     windows_hybrid_lite, means_hybrid_lite, stds_hybrid_lite = \
         data_hybrid_lite['windows'], data_hybrid_lite['means'], data_hybrid_lite['stds']
 
-    assert np.all(windows_hf == windows_hybrid) \
-        and np.all(windows_hybrid_lite == windows_hybrid)
-
-    std_factor = 0.2
+    assert np.all(windows_hf == windows_hybrid)
+    n_windows = windows_hf.shape[0]
+    windows_hybrid_lite = windows_hybrid_lite[:n_windows]
+    means_hybrid_lite = means_hybrid_lite[:n_windows]
+    stds_hybrid_lite = stds_hybrid_lite[:n_windows]
+    assert np.all(windows_hybrid_lite == windows_hybrid)
 
     fig, ax = plt.subplots(figsize=(7, 3.8))
-    ax.vlines = ax.vlines([0, 1e6, 8e6], 2.55e3, 4.45e3, color='#AAA', ls='-')
-    ax.vlines.set_clip_on(False)
-
-    arrow_color = {'arrowprops': dict(arrowstyle="->", color='#AAA'), 'color': '#333'}
-    ax.annotate("", xy=(0.1e6, 2.65e3), xytext=(1.5e6, 2.95e3), **arrow_color)
-    ax.text(1.5e6, 2.95e3, "start latency tracking", color='#333', ha='left')
-    ax.annotate("switch", xy=(1.1e6, 2.65e3), xytext=(4.0e6, 2.75e3), **arrow_color)
-    ax.annotate("", xy=(7.9e6, 2.65e3), xytext=(5.2e6, 2.75e3), **arrow_color)
 
+    # plt.errorbar(windows_hf, means_hf, yerr=std_factor*stds_hf)
+    # plt.errorbar(windows_hybrid, means_hybrid, yerr=std_factor*stds_hybrid)
+    # plt.errorbar(windows_hybrid_lite, means_hybrid_lite,
+    #              yerr=std_factor*stds_hybrid_lite)
     ax.plot(windows_hf, means_hf, label='high-fidelity only')
     ax.fill_between(windows_hf,
-                    means_hf - std_factor*stds_hybrid,
-                    means_hf + std_factor*stds_hybrid,
+                    means_hf - std_factor*stds_hf,
+                    means_hf + std_factor*stds_hf,
                     color='#00F5')
     ax.plot(windows_hybrid_lite, means_hybrid_lite, label='hybrid-lite')
     ax.fill_between(windows_hybrid_lite,
@@ -221,35 +169,51 @@
                     color='#F005')
     ax.plot(windows_hybrid, means_hybrid, label='hybrid')
     ax.fill_between(windows_hybrid,
-                    means_hybrid - std_factor*stds_hybrid,
-                    means_hybrid + std_factor*stds_hybrid,
+                    means_hybrid - std_factor*stds_hybrid_lite,
+                    means_hybrid + std_factor*stds_hybrid_lite,
                     color='#0F05')
 
-    # plt.text(0, 4.5e3, "start latency tracking", color='#333', rotation=40,
-    #          rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
-    # plt.text(1e6, 4.5e3, "switch to surrogate", color='#333', rotation=40,
-    #          rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
-    # plt.text(8e6, 4.5e3, "switch to\nhigh-definition", color='#333', rotation=40,
-    #          rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+    height_plot = ax.get_ylim()[1]
+    ax.vlines = ax.vlines([args.started_tracking, args.switch, args.switch_back],
+                          -3e3, height_plot, color='#AAA', ls='-')
+    ax.vlines.set_clip_on(False)
+
+    middle = (args.switch + args.switch_back) / 2
+    arrow_color = {'arrowprops': dict(arrowstyle="->", color='#AAA'), 'color': '#333'}
+    ax.annotate("", xy=(args.started_tracking * .95, 80e3),
+                xytext=(args.started_tracking * .6, 98e3), **arrow_color)
+    ax.annotate("switch", xy=(args.switch*1.04, 118e3),
+                xytext=(middle, 105e3), **arrow_color)
+    ax.annotate("", xy=(args.switch_back * 0.96, 118e3),
+                xytext=(middle, 110e3), **arrow_color)
+    ax.text(args.started_tracking * .9, 1e5, "start\ntracking", color='#333', ha='right')
+
+    ax.text(args.started_tracking, height_plot, "start latency tracking", color='#333', rotation=40,
+            rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+    ax.text(args.switch, height_plot, "switch to surrogate", color='#333', rotation=40,
+            rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+    ax.text(args.switch_back, 1.03 * height_plot, "switch to\nhigh-definition", color='#333',
+            rotation=40, rotation_mode='anchor', horizontalalignment='left',
+            verticalalignment='center')
 
     ax.set_xlabel('Virtual time')
     ax.set_ylabel('Average Packet Latency')
-    ax.set_ylim(2.6e3, 4.4e3)
-    ax.legend(bbox_to_anchor=(.50, .28), loc='lower center', borderaxespad=0)
+    # ax.set_ylim(0, 122e3)
+    # ax.legend(bbox_to_anchor=(.54, .02), loc='lower center', borderaxespad=0)
     ax.yaxis.set_major_formatter(time_formatter_ns)
     ax.xaxis.set_major_formatter(time_formatter_ns)
 
-    n = means_hf[90:].shape[0]
+    n = means_hf[80:].shape[0]
     mse_hybrid_lite = \
-        np.sum((means_hf[90:] - means_hybrid_lite[90:])**2) / n
+        np.sum((means_hf[80:] - means_hybrid_lite[80:])**2) / n
     mse_hybrid = \
-        np.sum((means_hf[90:] - means_hybrid[90:])**2) / n
+        np.sum((means_hf[80:] - means_hybrid[80:])**2) / n
     print("Mean squared error (MSE) for hybrid:", mse_hybrid, "ns^2")
     print("Mean squared error (MSE) for hybrid-lite:", mse_hybrid_lite, "ns^2")
 
-    if latex:
+    if args.output:
         plt.tight_layout()
-        plt.savefig('figures/windowed-delay-ping-pong-10ms.pgf', bbox_inches='tight')
-        plt.savefig('figures/windowed-delay-ping-pong-10ms.pdf', bbox_inches='tight')
+        plt.savefig(f'{args.output}.pgf', bbox_inches='tight')
+        plt.savefig(f'{args.output}.pdf', bbox_inches='tight')
     else:
-        plt.show()
+        plt.show()  # type: ignore
diff --git a/scripts/reproducibility-pads23/python-scripts/port-occupancy.py b/scripts/reproducibility-pads23/python-scripts/port-occupancy.py
index 827f59b3..5fede6e1 100644
--- a/scripts/reproducibility-pads23/python-scripts/port-occupancy.py
+++ b/scripts/reproducibility-pads23/python-scripts/port-occupancy.py
@@ -2,8 +2,10 @@
 import matplotlib.pyplot as plt
 import matplotlib
 from matplotlib.ticker import EngFormatter
+
 import pathlib
 import argparse
+import sys
 
 from typing import Any
 array_type = np.ndarray[Any, Any]
@@ -29,30 +31,78 @@ def load_aggregated_utilization(filename: str | pathlib.Path) -> tuple[array_typ
     return timestamps, total_utilization
 
 
-if True and __name__ == '__main__':
+if __name__ == '__main__':
+    this_binary = sys.argv[0]
+    commands = {
+        'singleplot': 'Displays port occupancy plot (needs full path for csv)',
+        'pads23': 'Generates plot that appears on PADS23 paper'
+    }
+    parser = argparse.ArgumentParser(
+        usage=f'{this_binary} <command> [<args>]\n\n'
+        'The available commands are:\n'
+        + '\n'.join(f'  {cmd}\t {desc}' for cmd, desc in commands.items()))
+    parser.add_argument('command', help='Subcommand to run')
+    main_args = parser.parse_args(sys.argv[1:2])
+
+    if main_args.command not in commands:
+        print("Unrecognized command:", main_args.command, file=sys.stderr)
+        exit(1)
+
+
+if main_args.command == 'singleplot':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--csv', type=pathlib.Path,
+                        help='Buffer occupancy CSV results',
+                        required=True)
+    args = parser.parse_args(sys.argv[2:])
+
+    ts1, utilization_hf = load_aggregated_utilization(args.csv)
+
+    # plotting
+    fig, ax = plt.subplots(figsize=(7, 3.8))
+    # vlines = ax.vlines([2e6, 3e6, 8e6], -0.4e6, 7.15e6, color='#AAA', ls='-')
+    # vlines.set_clip_on(False)
+
+    # arrow_color = {'arrowprops': dict(arrowstyle="->", color='#AAA'), 'color': '#333'}
+    # ax.annotate("", xy=(2.1e6, 0e6), xytext=(3.5e6, 1.1e6), **arrow_color)
+    # ax.annotate("switch", xy=(3.1e6, 0.1e6), xytext=(4.8e6, 0.5e6), **arrow_color)
+    # ax.annotate("", xy=(7.9e6, 0.1e6), xytext=(6.0e6, 0.5e6), **arrow_color)
+    # ax.text(3.5e6, 1.1e6, "start latency tracking", color='#333', ha='left')
+
+    ax.plot(ts1, utilization_hf, label="high-fidelity", color='blue')
+
+    ax.set_xlabel('Virtual time')
+    ax.set_ylabel('Total Buffer Port Occupancy')
+    # ax.set_ylim(-0.2e6, 6.9e6)
+    # ax.legend(bbox_to_anchor=(.5, .4), loc='lower center', borderaxespad=0)
+    ax.xaxis.set_major_formatter(time_formatter_ns)
+    ax.yaxis.set_major_formatter(bytes_formater)
+
+    plt.show()
+
+
+if main_args.command == 'pads23':
     parser = argparse.ArgumentParser()
     parser.add_argument('--experiment-folder', type=pathlib.Path,
                         help='Folder where experiment was run',
                         required=True)
     parser.add_argument('--output', type=pathlib.Path, help='Name of output figure',
                         required=True)
-    args = parser.parse_args()
+    args = parser.parse_args(sys.argv[2:])
 
-    latex = True
     dir_data = args.experiment_folder
     # dir_data = pathlib.Path('data/synthetic1')
     cut1 = 30
     cut2 = 79
 
-    if latex:
-        matplotlib.use("pgf")
-        matplotlib.rcParams.update({
-            "pgf.texsystem": "pdflatex",
-            'font.family': 'serif',
-            'font.size': 16,
-            'text.usetex': True,
-            'pgf.rcfonts': False,
-        })
+    matplotlib.use("pgf")
+    matplotlib.rcParams.update({
+        "pgf.texsystem": "pdflatex",
+        'font.family': 'serif',
+        'font.size': 16,
+        'text.usetex': True,
+        'pgf.rcfonts': False,
+    })
 
     ts1, utilization_hf = load_aggregated_utilization(
         dir_data / "high-fidelity" / "codes-output" / "dragonfly-snapshots.csv")
@@ -99,78 +149,6 @@ def load_aggregated_utilization(filename: str | pathlib.Path) -> tuple[array_typ
     ax.xaxis.set_major_formatter(time_formatter_ns)
     ax.yaxis.set_major_formatter(bytes_formater)
 
-    if latex:
-        plt.tight_layout()
-        plt.savefig(f'{args.output}.pgf', bbox_inches='tight')
-        plt.savefig(f'{args.output}.pdf', bbox_inches='tight')
-    else:
-        plt.show()
-
-
-if False and __name__ == '__main__':
-    latex = True
-    dir_data = pathlib.Path('data/ping-pong')
-    cut1 = 10
-    cut2 = 79
-
-    if latex:
-        matplotlib.use("pgf")
-        matplotlib.rcParams.update({
-            "pgf.texsystem": "pdflatex",
-            'font.family': 'serif',
-            'font.size': 16,
-            'text.usetex': True,
-            'pgf.rcfonts': False,
-        })
-
-    ts1, utilization_hf = \
-        load_aggregated_utilization(dir_data / "router-snapshots-vanilla.csv")
-    ts2, utilization_hybrid = \
-        load_aggregated_utilization(dir_data / "router-snapshots-surrogate-freezing.csv")
-    ts3, utilization_hybrid_lite = \
-        load_aggregated_utilization(dir_data / "router-snapshots-surrogate-nonfrozen.csv")
-
-    # plotting
-    fig, ax = plt.subplots(figsize=(7, 3.8))
-    vlines = ax.vlines([0, 1e6, 8e6], -0.05e5, 1.11e5, color='#AAA', ls='-')
-    vlines.set_clip_on(False)
-
-    arrow_color = {'arrowprops': dict(arrowstyle="->", color='#AAA'), 'color': '#333'}
-    ax.annotate("", xy=(0.1e6, 0e5), xytext=(2e6, 0.16e5), **arrow_color)
-    ax.annotate("switch", xy=(1.1e6, 0.0e5), xytext=(4.8e6, 0.06e5), **arrow_color)
-    ax.annotate("", xy=(7.9e6, 0.0e5), xytext=(6.0e6, 0.06e5), **arrow_color)
-    ax.text(2e6, 0.16e5, "start latency tracking", color='#333', ha='left')
-
-    ax.plot(ts1, utilization_hf, label="high-fidelity", color='blue')
-
-    ax.plot(ts3[:cut1], utilization_hybrid_lite[:cut1],
-            label="hybrid-lite", color='red')
-    ax.plot(ts3[cut1-1:cut2+1], utilization_hybrid_lite[cut1-1:cut2+1],
-            color='red', ls='--')
-    ax.plot(ts3[cut2:], utilization_hybrid_lite[cut2:], color='red')
-
-    ax.plot(ts2[:cut1], utilization_hybrid[:cut1], label="hybrid",
-            color='green')
-    ax.plot(ts2[cut1-1:cut2+1], utilization_hybrid[cut1-1:cut2+1], color='green', ls='--')
-    ax.plot(ts2[cut2:], utilization_hybrid[cut2:], color='green')
-
-    # ax.text(0, 1.15e5, "start latency tracking", color='#333', rotation=40,
-    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
-    # ax.text(1e6, 1.15e5, "switch to surrogate", color='#333', rotation=40,
-    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
-    # ax.text(8e6, 1.15e5, "switch to\nhigh-definition", color='#333', rotation=40,
-    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
-
-    ax.set_xlabel('Virtual time')
-    ax.set_ylabel('Total Buffer Port Occupancy')
-    ax.set_ylim(-0.02e5, 1.08e5)
-    ax.legend(bbox_to_anchor=(.48, .28), loc='lower center', borderaxespad=0)
-    ax.xaxis.set_major_formatter(time_formatter_ns)
-    ax.yaxis.set_major_formatter(bytes_formater)
-
-    if latex:
-        plt.tight_layout()
-        plt.savefig('figures/port-occupancy-ping-pong.pgf', bbox_inches='tight')
-        plt.savefig('figures/port-occupancy-ping-pong.pdf', bbox_inches='tight')
-    else:
-        plt.show()
+    plt.tight_layout()
+    plt.savefig(f'{args.output}.pgf', bbox_inches='tight')
+    plt.savefig(f'{args.output}.pdf', bbox_inches='tight')
diff --git a/scripts/reproducibility-pads23/reproduce.sh b/scripts/reproducibility-pads23/reproduce.sh
index b0bd2e4f..5dc4a9f6 100644
--- a/scripts/reproducibility-pads23/reproduce.sh
+++ b/scripts/reproducibility-pads23/reproduce.sh
@@ -31,11 +31,11 @@ done
 
 mkdir figures
 
-python python-scripts/plot-packet-latency.py \
+python python-scripts/plot-packet-latency.py pads23 \
   --latencies results/10ms/condensed \
   --output figures/packet_latency-10ms
 
-python python-scripts/port-occupancy.py \
+python python-scripts/port-occupancy.py pads23 \
   --experiment-folder results/10ms --output figures/port-occupancy-10ms
 
 
diff --git a/scripts/terminal-to-terminal-latency/determine_mean_std.py b/scripts/terminal-to-terminal-latency/determine_mean_std.py
index aeef7750..09449143 100644
--- a/scripts/terminal-to-terminal-latency/determine_mean_std.py
+++ b/scripts/terminal-to-terminal-latency/determine_mean_std.py
@@ -7,13 +7,15 @@ def mean_and_std(array: np.array) -> tuple[float, float]:
 
 
 if __name__ == '__main__':
-    delays = np.loadtxt("packets-delay.csv", skiprows=1, delimiter=",")
+    delays = np.loadtxt("packets-delay.txt", skiprows=1, delimiter=",")
     start_col = 8
-    delay_col = 9
+    delay_col = 10
+    size_col = 5
 
     # Filtering data to some interval
-    delays = delays[np.bitwise_and(delays[:, start_col] > 200e3,
-                                   delays[:, start_col] + delays[:, delay_col] < 500e3)]
+    delays = delays[delays[:, start_col] > 200e3]
+    # delays = delays[np.bitwise_and(delays[:, start_col] > 200e3,
+    #                                delays[:, start_col] + delays[:, delay_col] < 500e3)]
 
     # Distribution
     delays_same_router = (delays[:, 0] // 2) == (delays[:, 1] // 2)
@@ -46,6 +48,19 @@ def mean_and_std(array: np.array) -> tuple[float, float]:
     axs[1, 1].hist(delays[delays_out_group, delay_col], bins=50, density=True, alpha=0.6, color='b')
     plt.show()
 
+    delays01 = delays0[delays0[:, 1] == 1]
+    delays056 = delays0[delays0[:, 1] == 56]
+    plt.scatter(delays01[:, size_col], delays01[:, delay_col])
+    plt.title("Packet size vs terminal to terminal delay. Terminal 0 to terminal 1")
+    plt.xlabel("Packet size")
+    plt.ylabel("Latency")
+    plt.show()
+    plt.scatter(delays056[:, size_col], delays056[:, delay_col])
+    plt.title("Packet size vs terminal to terminal delay. Terminal 0 to terminal 56")
+    plt.xlabel("Packet size")
+    plt.ylabel("Latency")
+    plt.show()
+
     buckets = [delays0[delays0[:, 1] == i] for i in range(1, 72)]
     buckets_processed = np.array([mean_and_std(b[:, delay_col]) for b in buckets])
     print("Destination, Means and stds for terminal 0")

From b0be25b7539e0d45fcbbe75b7c68c2c2abdb07bb Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 3 Nov 2023 11:57:11 -0400
Subject: [PATCH 063/188] Revert "Extending examples with uniform random
 traffic example"

This reverts commit 3010118463ec330b1794a02b15620ee74ab690c9.
---
 doc/example/CMakeLists.txt                    |   9 +-
 ...n => tutorial-ping-pong-surrogate.conf.in} |   0
 ...ial.conf.in => tutorial-ping-pong.conf.in} |   0
 doc/example/tutorial-synthetic-ping-pong.c    |  34 +-
 doc/example/tutorial-synthetic-uniform.c      | 346 ------------------
 tests/example-ping-pong-no-logging.sh         |   2 +-
 tests/example-ping-pong-surrogate-1.sh        |   4 +-
 tests/example-ping-pong-surrogate-2.sh        |   4 +-
 tests/example-ping-pong-surrogate-3.sh        |   4 +-
 ...ample-ping-pong-surrogate-determinism-1.sh |   4 +-
 ...ample-ping-pong-surrogate-determinism-2.sh |   4 +-
 11 files changed, 18 insertions(+), 393 deletions(-)
 rename doc/example/{tutorial-surrogate.conf.in => tutorial-ping-pong-surrogate.conf.in} (100%)
 rename doc/example/{tutorial.conf.in => tutorial-ping-pong.conf.in} (100%)
 delete mode 100644 doc/example/tutorial-synthetic-uniform.c

diff --git a/doc/example/CMakeLists.txt b/doc/example/CMakeLists.txt
index 6db84fd5..49451d91 100644
--- a/doc/example/CMakeLists.txt
+++ b/doc/example/CMakeLists.txt
@@ -1,7 +1,6 @@
 set(example-files
     example
     tutorial-synthetic-ping-pong
-    tutorial-synthetic-uniform
     )
 
 foreach(namefile ${example-files})
@@ -10,8 +9,8 @@ foreach(namefile ${example-files})
 endforeach()
 
 # Saving default config files to run experiments with
-configure_file(tutorial.conf.in tutorial.template.conf.in @ONLY)
-configure_file(tutorial-surrogate.conf.in tutorial-surrogate.template.conf.in @ONLY)
+configure_file(tutorial-ping-pong.conf.in tutorial-ping-pong.template.conf.in @ONLY)
+configure_file(tutorial-ping-pong-surrogate.conf.in tutorial-ping-pong-surrogate.template.conf.in @ONLY)
 
 set(single_quote "'")
 set(double_quote "\"")
@@ -22,5 +21,5 @@ set(NETWORK_TREATMENT "freeze")
 set(PACKET_LATENCY_TRACE_PATH "packet-latency-trace/")
 set(IGNORE_UNTIL "200e4")
 string(REPLACE ${single_quote} ${double_quote} SWITCH_TIMESTAMPS "'1000e4', '8900e4'")
-configure_file(tutorial.conf.in tutorial-ping-pong.conf)
-configure_file(tutorial-surrogate.conf.in tutorial-ping-pong-surrogate.conf)
+configure_file(tutorial-ping-pong.conf.in tutorial-ping-pong.conf)
+configure_file(tutorial-ping-pong-surrogate.conf.in tutorial-ping-pong-surrogate.conf)
diff --git a/doc/example/tutorial-surrogate.conf.in b/doc/example/tutorial-ping-pong-surrogate.conf.in
similarity index 100%
rename from doc/example/tutorial-surrogate.conf.in
rename to doc/example/tutorial-ping-pong-surrogate.conf.in
diff --git a/doc/example/tutorial.conf.in b/doc/example/tutorial-ping-pong.conf.in
similarity index 100%
rename from doc/example/tutorial.conf.in
rename to doc/example/tutorial-ping-pong.conf.in
diff --git a/doc/example/tutorial-synthetic-ping-pong.c b/doc/example/tutorial-synthetic-ping-pong.c
index 664d6f03..1aaf0528 100644
--- a/doc/example/tutorial-synthetic-ping-pong.c
+++ b/doc/example/tutorial-synthetic-ping-pong.c
@@ -11,8 +11,6 @@
 
 static int net_id = 0;
 static int PAYLOAD_SZ = 4096;
-static int RANDOM_PAYLOAD_SZ = 0; // If turned on, it assumes that PAYLOAD_SZ is a multiple of CHUNK_SIZE
-static int CHUNK_SIZE = 64; // This value depends on the network being used
 static unsigned long long num_nodes = 0;
 
 static char lp_io_dir[256] = {'\0'};
@@ -87,7 +85,6 @@ const tw_optdef app_opt [] =
     	TWOPT_UINT("num_messages", num_msgs, "Number of PING messages to be generated per terminal "),
     	TWOPT_UINT("num_initial_messages", num_initial_msgs, "Number of PING messages to be injected initially at the start (larger = more congestion)"),
     	TWOPT_UINT("payload_sz",PAYLOAD_SZ, "size of the message being sent "),
-    	TWOPT_UINT("random_payload_sz", RANDOM_PAYLOAD_SZ, "whether payloads are a random number between 1 and payload_sz (default 0)"),
         TWOPT_CHAR("lp-io-dir", lp_io_dir, "Where to place io output (unspecified -> no output"),
         TWOPT_UINT("lp-io-use-suffix", lp_io_use_suffix, "Whether to append uniq suffix to lp-io directory (default 0)"),
         TWOPT_END()
@@ -103,21 +100,6 @@ static void svr_add_lp_type()
   lp_type_register("nw-lp", svr_get_lp_type());
 }
 
-static long payload_size_forward(tw_lp * lp) {
-    long payload_size = PAYLOAD_SZ;
-    if (RANDOM_PAYLOAD_SZ) {
-        payload_size = tw_rand_integer(lp->rng, 0, PAYLOAD_SZ > CHUNK_SIZE ? PAYLOAD_SZ / CHUNK_SIZE : 1);
-        payload_size *= CHUNK_SIZE;
-    }
-    return payload_size;
-}
-
-static void payload_size_rev(tw_lp * lp) {
-    if (RANDOM_PAYLOAD_SZ) {
-        tw_rand_reverse_unif(lp->rng); //reverse the rng call for creating a payload size
-    }
-}
-
 static void svr_init(svr_state * s, tw_lp * lp)
 {
     //Initialize State
@@ -177,8 +159,7 @@ static void handle_kickoff_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp *
     codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
     global_dest = codes_mapping_get_lpid_from_relative(local_dest, group_name, lp_type_name, NULL, 0);
     s->ping_msg_sent_count++;
-    long const payload_size = payload_size_forward(lp);
-    m->event_rc = model_net_event(net_id, "test", global_dest, payload_size, 0.0, sizeof(svr_msg), (const void*)&ping_msg, 0, NULL, lp);
+    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&ping_msg, 0, NULL, lp);
 }
 
 static void handle_kickoff_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
@@ -186,7 +167,6 @@ static void handle_kickoff_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_l
     (void) b;
     model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
     s->ping_msg_sent_count--; //undo the increment of the ping_msg_sent_count in the server state
-    payload_size_rev(lp);
     tw_rand_reverse_unif(lp->rng); //reverse the rng call for creating a payload value;
     tw_rand_reverse_unif(lp->rng); //reverse the rng call for getting a local_dest
 }
@@ -207,14 +187,12 @@ static void handle_ping_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
     codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
     tw_lpid global_dest = codes_mapping_get_lpid_from_relative(original_sender, group_name, lp_type_name, NULL, 0);
     s->pong_msg_sent_count++;
-    long const payload_size = payload_size_forward(lp);
-    m->event_rc = model_net_event(net_id, "test", global_dest, payload_size, 0.0, sizeof(svr_msg), (const void*)&pong_msg, 0, NULL, lp);
+    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&pong_msg, 0, NULL, lp);
 }
 
 static void handle_ping_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
 {
     (void) b;
-    payload_size_rev(lp);
     model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
     s->pong_msg_sent_count--;
     s->payload_sum -= m->payload_value; //undo the increment of the payload sum
@@ -243,8 +221,7 @@ static void handle_pong_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
     codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
     tw_lpid global_dest = codes_mapping_get_lpid_from_relative(send_to, group_name, lp_type_name, NULL, 0);
     s->ping_msg_sent_count++;
-    long const payload_size = payload_size_forward(lp);
-    m->event_rc = model_net_event(net_id, "test", global_dest, payload_size, 0.0, sizeof(svr_msg), (const void*)&ping_msg, 0, NULL, lp);
+    m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&ping_msg, 0, NULL, lp);
 }
 
 static void handle_pong_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp)
@@ -252,7 +229,6 @@ static void handle_pong_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp *
     if (! b->c1) { //if we didn't flip the c1 flag in the forward event
         model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
         s->ping_msg_sent_count--;
-        payload_size_rev(lp);
         tw_rand_reverse_unif(lp->rng); //undo the rng for the new payload value
         tw_rand_reverse_unif(lp->rng); //undo the rng for the new server to send a ping to
         b->c1 = 0;
@@ -264,7 +240,6 @@ static void handle_pong_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp *
 static void svr_finalize(svr_state * s, tw_lp * lp)
 {
     int total_msgs_sent = s->ping_msg_sent_count + s->pong_msg_sent_count;
-    // TODO (Elkin): this is wrong for random payload sizes
     int total_msg_size_sent = PAYLOAD_SZ * total_msgs_sent;
     tw_stime time_in_seconds_sent = ns_to_s(s->end_ts - s->start_ts);
 
@@ -366,9 +341,6 @@ int main(int argc, char **argv)
     num_nodes = codes_mapping_get_lp_count("MODELNET_GRP", 0, "nw-lp", NULL, 1);  //get the number of nodes so we can use this value during the simulation
     assert(num_nodes);
 
-    int rc = configuration_get_value_int(&config, "PARAMS", "chunk_size", NULL, &CHUNK_SIZE);
-    if(rc) { CHUNK_SIZE = 512; }
-
     if(lp_io_dir[0])
     {
         do_lp_io = 1;
diff --git a/doc/example/tutorial-synthetic-uniform.c b/doc/example/tutorial-synthetic-uniform.c
deleted file mode 100644
index 0dd6bcab..00000000
--- a/doc/example/tutorial-synthetic-uniform.c
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- * Copyright (C) 2019 Neil McGlohon - 2023 Elkin Cruz
- * Based on tutorial-synthetic-ping-pong.c by 2019 Neil McGlohon
- * See LICENSE notice in top-level directory
- */
-
-#include "codes/model-net.h"
-#include "codes/codes_mapping.h"
-#include "codes/surrogate/init.h"  // just needed for stats on surrogate-mode
-
-
-static int net_id = 0;
-static int PAYLOAD_SZ = 4096;
-static int RANDOM_PAYLOAD_SZ = 0; // If turned on, it assumes that PAYLOAD_SZ is a multiple of CHUNK_SIZE
-static int CHUNK_SIZE = 512; // This value depends on the network configuration
-static unsigned long long num_nodes = 0;
-
-static char lp_io_dir[256] = {'\0'};
-static lp_io_handle io_handle;
-static unsigned int lp_io_use_suffix = 0;
-
-static int num_msgs = 10000;
-static int terminal_queue_size = 3;
-
-/* global variables for codes mapping */
-static char group_name[MAX_NAME_LENGTH];
-static char lp_type_name[MAX_NAME_LENGTH];
-static int group_index, lp_type_index, rep_id, offset;
-
-/* type of events */
-enum SVR_EVENT
-{
-    SVR_EVENT_send = 1,
-    SVR_EVENT_msg
-};
-
-struct svr_msg
-{
-    enum SVR_EVENT svr_event_type; // kickoff, heartbeat, msg
-    int sender_id; //ID of the sender workload LP to know who to send a PONG message back to
-    int payload_value; //Some value that we will encode as an example
-    // Used for rollback
-    int payload_size; //Size of payload (the actual event is not of this size, this is just a number we decide on)
-    model_net_event_return event_rc; //helper to encode data relating to CODES rng usage
-    tw_stime previous_ts;
-};
-
-struct svr_state
-{
-    tw_lpid svr_id;       /* the ID of this server */
-    int msg_sent_count;   /* messages sent */
-    int msg_recvd_count;  /* messages received */
-    int total_bytes_sent; /* total bytes sent */
-    tw_stime start_ts;    /* time that this LP started sending requests */
-    tw_stime end_ts;      /* time that this LP ended sending requests */
-    int payload_sum;      /* the running sum of all payloads received */
-};
-
-/* declaration of functions */
-static void svr_init(struct svr_state * s, tw_lp * lp);
-static void svr_event(struct svr_state * s, tw_bf * b, struct svr_msg * m, tw_lp * lp);
-static void svr_rev_event(struct svr_state * s, tw_bf * b, struct svr_msg * m, tw_lp * lp);
-static void svr_finalize(struct svr_state * s, tw_lp * lp);
-static tw_stime ns_to_s(tw_stime ns);
-static tw_stime s_to_ns(tw_stime s);
-
-/* ROSS lptype function callback mapping */
-tw_lptype svr_lp = {
-    (init_f) svr_init,
-    (pre_run_f) NULL,
-    (event_f) svr_event,
-    (revent_f) svr_rev_event,
-    (commit_f) NULL,
-    (final_f)  svr_finalize,
-    (map_f) codes_mapping,
-    sizeof(struct svr_state),
-};
-
-const tw_optdef app_opt [] =
-{
-        TWOPT_GROUP("Model net synthetic traffic " ),
-        TWOPT_UINT("num_messages", num_msgs, "Number of messages to be sent from terminal"),
-        TWOPT_UINT("injection_queue_size", terminal_queue_size, "Number of packets in a terminal's queue at any point in time (default 2)"),
-        TWOPT_UINT("payload_sz", PAYLOAD_SZ, "size of the message being sent "),
-        TWOPT_UINT("random_payload_sz", RANDOM_PAYLOAD_SZ, "whether payloads are a random number between 'chunk_size' and payload_sz (default 0 -> deactivated)"),
-        TWOPT_CHAR("lp-io-dir", lp_io_dir, "Where to place io output (unspecified -> no output"),
-        TWOPT_UINT("lp-io-use-suffix", lp_io_use_suffix, "Whether to append uniq suffix to lp-io directory (default 0)"),
-        TWOPT_END()
-};
-
-const tw_lptype* svr_get_lp_type()
-{
-    return(&svr_lp);
-}
-
-static void svr_add_lp_type()
-{
-  lp_type_register("nw-lp", svr_get_lp_type());
-}
-
-static long payload_size_forward(tw_lp * lp) {
-    long payload_size = PAYLOAD_SZ;
-    if (RANDOM_PAYLOAD_SZ) {
-        payload_size = tw_rand_integer(lp->rng, 0, PAYLOAD_SZ > CHUNK_SIZE ? PAYLOAD_SZ / CHUNK_SIZE : 1);
-        payload_size *= CHUNK_SIZE;
-    }
-    return payload_size;
-}
-
-static void payload_size_rev(tw_lp * lp) {
-    if (RANDOM_PAYLOAD_SZ) {
-        tw_rand_reverse_unif(lp->rng); //reverse the rng call for creating a payload size
-    }
-}
-
-static void svr_init(struct svr_state * s, tw_lp * lp)
-{
-    //Initialize State
-    s->msg_sent_count = 0;
-    s->msg_recvd_count = 0;
-    s->total_bytes_sent = 0;
-    s->start_ts = 0.0;
-    s->end_ts = 0.0;
-    s->svr_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); /* turns the LP Global ID into the server ID */
-    s->payload_sum = 0;
-
-    // This bit is just for testing. Only the first terminal (0) sends events
-    //if (lp->gid != 0) {
-    //    return;
-    //}
-
-    //Now we create and send a self "kickoff" message - this is a PDES coordination event and thus doesn't need to be injected into the connected network
-    //so we won't use model_net_event(), that's reserved for stuff we want to send across the network
-
-    /* Set a time from now when this message is to be received by the recipient (self in this cae.) add some tiny random noise to help avoid event ties (different events with same timestamp) */
-    //the lookahead value is a value required for conservative mode execution to work, it prevents scheduling a new event within the lookahead window
-    tw_stime send_time = g_tw_lookahead + (tw_rand_unif(lp->rng) * .0001);
-
-    for (int i = 1; i <= terminal_queue_size && i <= num_msgs; i++) {
-        tw_event *e;
-        struct svr_msg *m;
-        e = tw_event_new(lp->gid, send_time * i, lp); //ROSS method to create a new event
-        m = tw_event_data(e); //Gives you a pointer to the data encoded within event e
-        m->sender_id = s->svr_id; //Set the event type so we can know how to classify the event when received
-        m->svr_event_type = SVR_EVENT_send; //Set the event type so we can know how to classify the event when received
-        tw_event_send(e); //ROSS method to send off the event e with the encoded data in m
-    }
-
-    s->start_ts = send_time; // the time when we're starting this LP's work is when the first ping is generated
-}
-
-static void handle_send_event(struct svr_state * s, tw_bf * b, struct svr_msg * m, tw_lp * lp)
-{
-    (void) b;
-
-    if(s->msg_sent_count >= num_msgs) {//if we've sent enough messages, then we stop and don't send any more
-        b->c1 = 1; //flag that we didn't really do anything in this event so that if this event gets reversed, we don't over-aggressively revert state or RNGs
-        return;
-    }
-    assert((tw_lpid) m->sender_id == s->svr_id);
-
-    tw_lpid local_dest = -1; //ID of a sever, relative to only servers
-    tw_lpid global_dest = -1; //ID of a server LP relative to ALL LPs
-
-    //We want to make sure we're not accidentally picking ourselves
-    local_dest = tw_rand_integer(lp->rng, 1, num_nodes - 2);
-    local_dest = (s->svr_id + local_dest) % num_nodes;
-    //local_dest is now a number [0,num_nodes) but is assuredly not s->svr_id
-    assert(local_dest >= 0);
-    assert(local_dest < num_nodes);
-    assert(local_dest != s->svr_id);
-
-    // Message to send to random terminal
-    struct svr_msg msg_to_send;
-    msg_to_send.sender_id = s->svr_id; //encode our server ID into the new ping message
-    msg_to_send.svr_event_type = SVR_EVENT_msg; //set it to type MSG
-    msg_to_send.payload_value = tw_rand_integer(lp->rng, 1, 10); //encode a random payload value to it from [1,10]
-    long const payload_size = payload_size_forward(lp);
-    m->payload_size = payload_size;
-    s->total_bytes_sent += payload_size;
-
-    // Message to send to self, in order to inject more another packet
-    struct svr_msg msg_to_self;
-    msg_to_self.sender_id = s->svr_id;
-    msg_to_self.svr_event_type = SVR_EVENT_send; // when the packet finally leaves the terminal, this event will be sent back to us
-
-    codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server
-    global_dest = codes_mapping_get_lpid_from_relative(local_dest, group_name, lp_type_name, NULL, 0);
-    s->msg_sent_count++;
-    m->event_rc = model_net_event(
-            net_id, "test", global_dest, payload_size, 0.0,
-            sizeof(struct svr_msg), (const void*)&msg_to_send,
-            sizeof(struct svr_msg), (const void*)&msg_to_self, lp);
-}
-
-static void handle_send_rev_event(struct svr_state * s, tw_bf * b, struct svr_msg * m, tw_lp * lp)
-{
-    (void) b;
-    if (! b->c1) { //if we didn't flip the c1 flag in the forward event
-        model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message
-        s->msg_sent_count--; //undo the increment of the ping_msg_sent_count in the server state
-        s->total_bytes_sent -= m->payload_size;
-        payload_size_rev(lp);
-        tw_rand_reverse_unif(lp->rng); //reverse the rng call for creating a payload value;
-        tw_rand_reverse_unif(lp->rng); //reverse the rng call for getting a local_dest
-        b->c1 = 0;
-    }
-}
-
-static void handle_recv_event(struct svr_state * s, tw_bf * b, struct svr_msg * m, tw_lp * lp)
-{
-    (void) b;
-    (void) lp;
-    s->msg_recvd_count++; //increment the counter for ping messages received
-    s->payload_sum += m->payload_value; //increment our running sum of payload values received
-}
-
-static void handle_recv_rev_event(struct svr_state * s, tw_bf * b, struct svr_msg * m, tw_lp * lp)
-{
-    (void) b;
-    (void) lp;
-    s->payload_sum -= m->payload_value; //undo the increment of the payload sum
-    s->msg_recvd_count--; //undo the increment of the counter for ping messages received
-}
-
-static void svr_finalize(struct svr_state * s, tw_lp * lp)
-{
-    tw_stime time_in_seconds_sent = ns_to_s(s->end_ts - s->start_ts);
-
-    printf("Server LPID:%lu svr_id:%lu sent %d bytes in %f seconds, MSGs Sent: %d; MSGs Received: %d Payload Sum: %d\n",
-            (unsigned long)lp->gid, (unsigned long)s->svr_id, s->total_bytes_sent,
-            time_in_seconds_sent, s->msg_sent_count, s->msg_recvd_count, s->payload_sum);
-}
-
-static void svr_event(struct svr_state * s, tw_bf * b, struct svr_msg * m, tw_lp * lp)
-{
-    m->previous_ts = s->end_ts;
-    s->end_ts = tw_now(lp);
-
-    switch (m->svr_event_type)
-    {
-        case SVR_EVENT_send:
-            handle_send_event(s, b, m, lp);
-            break;
-        case SVR_EVENT_msg:
-            handle_recv_event(s, b, m, lp);
-            break;
-        default:
-            tw_error(TW_LOC, "\n Invalid message type %d ", m->svr_event_type);
-            break;
-    }
-}
-
-static void svr_rev_event(struct svr_state * s, tw_bf * b, struct svr_msg * m, tw_lp * lp)
-{
-    switch (m->svr_event_type)
-    {
-        case SVR_EVENT_send:
-            handle_send_rev_event(s, b, m, lp);
-            break;
-        case SVR_EVENT_msg:
-            handle_recv_rev_event(s, b, m, lp);
-            break;
-        default:
-            tw_error(TW_LOC, "\n Invalid message type %d ", m->svr_event_type);
-            break;
-    }
-
-    s->end_ts = m->previous_ts;
-}
-
-/* convert ns to seconds */
-static tw_stime ns_to_s(tw_stime ns)
-{
-    return(ns / (1000.0 * 1000.0 * 1000.0));
-}
-static tw_stime s_to_ns(tw_stime s)
-{
-    return(s*1000.0*1000.0*1000.0);
-}
-
-int main(int argc, char **argv)
-{
-    int nprocs;
-    int rank;
-    int num_nets;
-    int *net_ids;
-
-    /* 1 day of simulation time is drastically huge but it will ensure
-       that the simulation doesn't try to end before all packets are delivered */
-    g_tw_ts_end = s_to_ns(24 * 60 * 60);
-
-    tw_opt_add(app_opt);
-    tw_init(&argc, &argv);
-
-    codes_comm_update();
-
-    if(argc < 2)
-    {
-            printf("\n Usage: mpirun <args> --sync=1/2/3 -- <config_file.conf> ");
-            MPI_Finalize();
-            return 0;
-    }
-
-    MPI_Comm_rank(MPI_COMM_CODES, &rank);
-    MPI_Comm_size(MPI_COMM_CODES, &nprocs);
-
-    configuration_load(argv[2], MPI_COMM_CODES, &config);
-
-    model_net_register();
-    svr_add_lp_type();
-
-    codes_mapping_setup();
-
-    net_ids = model_net_configure(&num_nets);
-    net_id = *net_ids;
-    free(net_ids);
-
-    num_nodes = codes_mapping_get_lp_count("MODELNET_GRP", 0, "nw-lp", NULL, 1);  //get the number of nodes so we can use this value during the simulation
-    assert(num_nodes);
-
-    int rc = configuration_get_value_int(&config, "PARAMS", "chunk_size", NULL, &CHUNK_SIZE);
-    if(rc) { CHUNK_SIZE = 512; }
-
-    bool do_lp_io = 0;
-    if(lp_io_dir[0])
-    {
-        do_lp_io = 1;
-        int flags = lp_io_use_suffix ? LP_IO_UNIQ_SUFFIX : 0;
-        int ret = lp_io_prepare(lp_io_dir, flags, &io_handle, MPI_COMM_CODES);
-        assert(ret == 0 || !"lp_io_prepare failure");
-    }
-    tw_run();
-    if (do_lp_io){
-        int ret = lp_io_flush(io_handle, MPI_COMM_CODES);
-        assert(ret == 0 || !"lp_io_flush failure");
-    }
-    model_net_report_stats(net_id);
-
-    // Printing some stats
-    print_surrogate_stats();
-
-    tw_end();
-    return 0;
-}
-
diff --git a/tests/example-ping-pong-no-logging.sh b/tests/example-ping-pong-no-logging.sh
index 3fe24d69..0fb0be8d 100755
--- a/tests/example-ping-pong-no-logging.sh
+++ b/tests/example-ping-pong-no-logging.sh
@@ -10,7 +10,7 @@ fi
 export PACKET_SIZE=4096
 export CHUNK_SIZE=4096
 export PACKET_LATENCY_TRACE_PATH=
-cat "$bindir/doc/example"/tutorial.template.conf.in | envsubst > tutorial-ping-pong.conf
+cat "$bindir/doc/example"/tutorial-ping-pong.template.conf.in | envsubst > tutorial-ping-pong.conf
 
 # Running simulation twice with the same parameters
 
diff --git a/tests/example-ping-pong-surrogate-1.sh b/tests/example-ping-pong-surrogate-1.sh
index 67f8830e..7f3a5f6d 100755
--- a/tests/example-ping-pong-surrogate-1.sh
+++ b/tests/example-ping-pong-surrogate-1.sh
@@ -15,10 +15,10 @@ export PREDICTOR_TYPE=average
 export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate/
 export IGNORE_UNTIL=0.0
 export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
-cat "$bindir/doc/example"/tutorial-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf
+cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf
 
 export PACKET_LATENCY_TRACE_PATH=packet-latency-highdef/
-cat "$bindir/doc/example"/tutorial.template.conf.in | envsubst > tutorial-ping-pong.conf
+cat "$bindir/doc/example"/tutorial-ping-pong.template.conf.in | envsubst > tutorial-ping-pong.conf
 
 # Running simulation twice with the same parameters
 
diff --git a/tests/example-ping-pong-surrogate-2.sh b/tests/example-ping-pong-surrogate-2.sh
index c3c5fe5f..f987bedf 100755
--- a/tests/example-ping-pong-surrogate-2.sh
+++ b/tests/example-ping-pong-surrogate-2.sh
@@ -15,10 +15,10 @@ export PREDICTOR_TYPE=average
 export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate/
 export IGNORE_UNTIL=0.0
 export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
-cat "$bindir/doc/example"/tutorial-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf
+cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf
 
 export PACKET_LATENCY_TRACE_PATH=packet-latency-highdef/
-cat "$bindir/doc/example"/tutorial.template.conf.in | envsubst > tutorial-ping-pong.conf
+cat "$bindir/doc/example"/tutorial-ping-pong.template.conf.in | envsubst > tutorial-ping-pong.conf
 
 # Running simulation twice with the same parameters
 
diff --git a/tests/example-ping-pong-surrogate-3.sh b/tests/example-ping-pong-surrogate-3.sh
index 12e2bd64..19212e9e 100755
--- a/tests/example-ping-pong-surrogate-3.sh
+++ b/tests/example-ping-pong-surrogate-3.sh
@@ -15,11 +15,11 @@ export PREDICTOR_TYPE=average
 export PACKET_LATENCY_TRACE_PATH=packet-latency-freeze/
 export IGNORE_UNTIL=0.0
 export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
-cat "$bindir/doc/example"/tutorial-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf
+cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf
 
 export NETWORK_TREATMENT=nothing
 export PACKET_LATENCY_TRACE_PATH=packet-latency-non-freeze/
-cat "$bindir/doc/example"/tutorial-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-non-freeze.conf
+cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-non-freeze.conf
 
 # Running simulation twice with the same parameters
 
diff --git a/tests/example-ping-pong-surrogate-determinism-1.sh b/tests/example-ping-pong-surrogate-determinism-1.sh
index 8f926795..cd219272 100755
--- a/tests/example-ping-pong-surrogate-determinism-1.sh
+++ b/tests/example-ping-pong-surrogate-determinism-1.sh
@@ -13,10 +13,10 @@ export PREDICTOR_TYPE=average
 export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-1/
 export IGNORE_UNTIL=0.0
 export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
-cat "$bindir/doc/example"/tutorial-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-1.conf
+cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-1.conf
 
 export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-2/
-cat "$bindir/doc/example"/tutorial-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-2.conf
+cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-2.conf
 
 # Running simulation twice with the same parameters
 
diff --git a/tests/example-ping-pong-surrogate-determinism-2.sh b/tests/example-ping-pong-surrogate-determinism-2.sh
index 3fb00206..b86f0dfd 100755
--- a/tests/example-ping-pong-surrogate-determinism-2.sh
+++ b/tests/example-ping-pong-surrogate-determinism-2.sh
@@ -13,10 +13,10 @@ export PREDICTOR_TYPE=average
 export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-1/
 export IGNORE_UNTIL=0.0
 export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"'
-cat "$bindir/doc/example"/tutorial-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-1.conf
+cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-1.conf
 
 export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-2/
-cat "$bindir/doc/example"/tutorial-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-2.conf
+cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-2.conf
 
 # Running simulation twice with the same parameters
 

From ff831a829ebeef3d6915ae8c8c0a266c0903be5f Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 8 Nov 2023 18:53:49 -0500
Subject: [PATCH 064/188] Updating scripts to find out packet latency

---
 .../python-scripts/delay_in_window.py         | 100 ++++++++++--------
 .../file_read_cython/__init__.py              |   0
 .../read_mean_std_from_file.pyx               |  93 ++++++++++++++++
 .../python-scripts/plot-packet-latency.py     |  18 ++--
 scripts/reproducibility-pads23/reproduce.sh   |   2 +-
 5 files changed, 154 insertions(+), 59 deletions(-)
 create mode 100644 scripts/reproducibility-pads23/python-scripts/file_read_cython/__init__.py
 create mode 100644 scripts/reproducibility-pads23/python-scripts/file_read_cython/read_mean_std_from_file.pyx

diff --git a/scripts/reproducibility-pads23/python-scripts/delay_in_window.py b/scripts/reproducibility-pads23/python-scripts/delay_in_window.py
index 407d54ce..1695cd9d 100644
--- a/scripts/reproducibility-pads23/python-scripts/delay_in_window.py
+++ b/scripts/reproducibility-pads23/python-scripts/delay_in_window.py
@@ -34,8 +34,8 @@ def collect_data_numpy(
     return header, data
 
 
-def mean_and_std(array: ndarray) -> tuple[float, float]:
-    return np.mean(array), np.std(array)  # type: ignore
+def mean_and_std(array: ndarray) -> tuple[float, float, float]:
+    return np.mean(array), np.std(array), float(array.shape[0])  # type: ignore
 
 
 def find_mean_and_std_through_window(
@@ -43,19 +43,19 @@ def find_mean_and_std_through_window(
     n_windows: int = 100,
     start_time: float = 0.0,
     end_time: float | None = None,
-    start_time_col: int = 8,
-    delay_col: int = 9,
-) -> tuple[ndarray, ndarray, ndarray]:
+    end_time_col: int = 9,
+    delay_col: int = 10,
+) -> tuple[ndarray, ndarray, ndarray, ndarray]:
 
     if end_time is None:
-        end_time = delays[:, start_time_col].max()
+        end_time = delays[:, end_time_col].max()
 
-    window_size = (end_time - start_time) / n_windows
-    windows = window_size * (np.arange(n_windows) + 1)
-    mean_and_std_through_windows = np.zeros((n_windows, 2))
+    window_time = (end_time - start_time) / n_windows
+    windows = window_time * (np.arange(n_windows) + 1)
+    mean_and_std_through_windows = np.zeros((n_windows, 3))
     for i in range(n_windows):
-        delays_within_window = np.bitwise_and(i * window_size <= delays[:, start_time_col],
-                                              delays[:, start_time_col] < (i+1) * window_size)
+        delays_within_window = np.bitwise_and(i * window_time <= delays[:, end_time_col],
+                                              delays[:, end_time_col] < (i+1) * window_time)
         if delays_within_window.sum() > 0:
             mean_and_std_through_windows[i] = mean_and_std(delays[delays_within_window, delay_col])
         else:
@@ -66,7 +66,8 @@ def find_mean_and_std_through_window(
         windows = windows[:last_good[0]]
         mean_and_std_through_windows = mean_and_std_through_windows[:last_good[0]]
 
-    return windows, mean_and_std_through_windows[:, 0], mean_and_std_through_windows[:, 1]
+    return windows, mean_and_std_through_windows[:, 0], mean_and_std_through_windows[:, 1], \
+        mean_and_std_through_windows[:, 2].astype(np.int32)
 
 
 if __name__ == '__main__':
@@ -77,62 +78,67 @@ def find_mean_and_std_through_window(
                         required=True)
     parser.add_argument('--windows', type=int, help='Total windows to break simulation in',
                         default=100)
+    parser.add_argument('--start', type=float, help='Total (virtual) simulation time',
+                        required=True)
     parser.add_argument('--end', type=float, help='Total (virtual) simulation time',
                         required=True)
     args = parser.parse_args()
 
-    # experiment = 'vanilla-synthetic1-10ms'  # name of experiment
     plotting = False
-    dist_type = 'all'  # options: all, same_router, same_group, other_group
     computing = True
+    use_cython = True
+
     loading = not computing
-    raw_data = True
-    # end_time = 10e6  # 10 ms
-    # end_time = 100e6  # 100 ms
     end_time = args.end
-    # n_windows = 100
     n_windows = args.windows
 
-    # out_file_name = f"{experiment}_windowed_packet_latency_{dist_type}.npz"
+    # Hardcoded values for 72-node dragonfly network
+    dist_type = 'all'  # options: all, same_router, same_group, other_group
+
     out_file_name = f"{args.output}.npz"
 
     if computing:
-        if raw_data:
+        if use_cython:
+            import pyximport; pyximport.install(language_level='3str')  # noqa: E702
+            from file_read_cython.read_mean_std_from_file import load_mean_and_std_through_window
+
+            windows, n_samples, samples = load_mean_and_std_through_window(
+                str(args.latencies), args.start, args.end, num_windows=args.windows)
+            means, stds = samples[:, 0], samples[:, 1]
+
+        else:
             # Columns within the csv file that matter to us
             header, delays = collect_data_numpy(
                 args.latencies, 'packets-delay', delimiter=',',
                 dtype=np.dtype('float'))
-            start_time_col = header.index('start')
+            end_time_col = header.index('end')
             delay_col = header.index('latency')
-        else:
-            start_time_col = 8
-            delay_col = 9
-            delays = np.loadtxt("packets-delay.csv", skiprows=1, delimiter=",")
-
-        # Delays distributions
-        if dist_type != 'all':
-            delays_same_router = (delays[:, 0] // 2) == (delays[:, 1] // 2)
-            delays_same_group = np.bitwise_xor(
-                (delays[:, 0] // 8) == (delays[:, 1] // 8),
-                delays_same_router)
-            delays_out_group = (delays[:, 0] // 8) != (delays[:, 1] // 8)
-
-            # Selecting which distribution to display
-            if dist_type == 'same_router':
-                distribution = delays_same_router
-            elif dist_type == 'same_group':
-                distribution = delays_same_group
-            elif dist_type == 'other_group':
-                distribution = delays_out_group
-
-        # Computing windowed mean and stds + plotting
-        windows, means, stds = find_mean_and_std_through_window(
-            delays if dist_type == 'all' else delays[distribution],
-            n_windows=n_windows, delay_col=delay_col, end_time=end_time)
+
+            # Delays distributions
+            if dist_type != 'all':
+                delays_same_router = (delays[:, 0] // 2) == (delays[:, 1] // 2)
+                delays_same_group = np.bitwise_xor(
+                    (delays[:, 0] // 8) == (delays[:, 1] // 8),
+                    delays_same_router)
+                delays_out_group = (delays[:, 0] // 8) != (delays[:, 1] // 8)
+
+                # Selecting which distribution to display
+                if dist_type == 'same_router':
+                    distribution = delays_same_router
+                elif dist_type == 'same_group':
+                    distribution = delays_same_group
+                elif dist_type == 'other_group':
+                    distribution = delays_out_group
+
+            # Computing windowed mean and stds + plotting
+            windows, means, stds, n_samples = find_mean_and_std_through_window(
+                delays if dist_type == 'all' else delays[distribution],
+                n_windows=n_windows, end_time_col=end_time_col,
+                delay_col=delay_col, end_time=end_time)
 
         # Save
         np.savez(out_file_name,
-                 windows=windows, means=means, stds=stds)
+                 windows=windows, means=means, stds=stds, n_samples=n_samples)
 
     if loading:
         data = np.load(out_file_name)
diff --git a/scripts/reproducibility-pads23/python-scripts/file_read_cython/__init__.py b/scripts/reproducibility-pads23/python-scripts/file_read_cython/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/scripts/reproducibility-pads23/python-scripts/file_read_cython/read_mean_std_from_file.pyx b/scripts/reproducibility-pads23/python-scripts/file_read_cython/read_mean_std_from_file.pyx
new file mode 100644
index 00000000..d20b51e9
--- /dev/null
+++ b/scripts/reproducibility-pads23/python-scripts/file_read_cython/read_mean_std_from_file.pyx
@@ -0,0 +1,93 @@
+from pathlib import Path
+import glob
+import fileinput
+import numpy as np
+
+from libc.math cimport floor, sqrt
+cimport cython
+
+
+@cython.boundscheck(False)  # turn off bounds-checking for entire function
+@cython.wraparound(False)   # turn off wrapping (negative numbers) for entire function
+def load_mean_and_std_through_window(
+    str filepath,
+    double start_time,
+    double end_time,
+    int num_windows = 100,
+    int max_rows = 10000
+):
+    cdef int num_lines
+    cdef int i
+    cdef int window_j
+    cdef double[:] windows
+    cdef double[:, :] packet_latency_data
+    cdef double[:, :] samples
+    cdef int[:] n_samples
+    cdef double window_time = (end_time - start_time) / num_windows
+
+    samples = np.zeros((num_windows, 2), dtype=np.double)
+    n_samples = np.zeros((num_windows,), dtype=np.int32)
+    windows = np.zeros((num_windows,), dtype=np.double)
+
+    stat_files = glob.glob(str(Path(filepath) / "packets-delay-gid=*.txt"))
+
+    with open(stat_files[0], 'r') as f:
+        header = f.readline()[1:].split(',')
+    cdef int end_time_col = header.index('end')
+    cdef int delay_col = header.index('latency')
+
+    # Finding mean of data
+    raw_files = fileinput.input(stat_files, mode='rb')
+    while True:
+        data_raw = np.loadtxt(
+            raw_files, delimiter=',', dtype=np.double,
+            comments='#', max_rows=max_rows)
+        if data_raw.size == 0 or len(data_raw.shape) != 2:
+            break
+
+        packet_latency_data = data_raw
+        num_lines = packet_latency_data.shape[0]
+
+        assert(num_lines != 0)
+
+        for i in range(num_lines):
+            window_j = int(floor((packet_latency_data[i, end_time_col] - start_time) / window_time))
+            if window_j < 0 or window_j >= num_windows:
+                continue
+            samples[window_j, 0] += packet_latency_data[i, delay_col]
+            n_samples[window_j] += 1
+    raw_files.close()
+
+    for i in range(num_windows):
+        # Computing mean
+        if n_samples[i] > 0:
+            samples[i, 0] /= n_samples[i]
+        windows[i] = (i+1) * window_time
+
+    # Finding mean of data
+    raw_files = fileinput.input(stat_files, mode='rb')
+    while True:
+        data_raw = np.loadtxt(
+            raw_files, delimiter=',', dtype=np.double,
+            comments='#', max_rows=10000)
+        if data_raw.size == 0 or len(data_raw.shape) != 2:
+            break
+
+        packet_latency_data = data_raw
+        num_lines = packet_latency_data.shape[0]
+
+        assert(num_lines != 0)
+
+        for i in range(num_lines):
+            window_j = int(floor((packet_latency_data[i, end_time_col] - start_time) / window_time))
+            if window_j < 0 or window_j >= num_windows:
+                continue
+            samples[window_j, 1] += (packet_latency_data[i, delay_col] - samples[window_j, 0]) ** 2
+    raw_files.close()
+
+    for i in range(num_windows):
+        # Computing std
+        if n_samples[i] > 0:
+            samples[i, 1] = sqrt(samples[i, 1] / n_samples[i])
+
+    return np.asarray(windows), np.asarray(n_samples), np.asarray(samples)
diff --git a/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py b/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
index cb397a33..b029d8a7 100644
--- a/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
+++ b/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
@@ -9,7 +9,8 @@
 import matplotlib
 from matplotlib.ticker import EngFormatter
 
-from delay_in_window import collect_data_numpy, find_mean_and_std_through_window
+import pyximport; pyximport.install(language_level='3str')  # noqa: E702
+from file_read_cython.read_mean_std_from_file import load_mean_and_std_through_window
 
 
 time_formatter_ns = EngFormatter()
@@ -42,6 +43,8 @@
                         help='Folder with raw latency data')
     parser.add_argument('--windows', type=int, help='Total windows to break simulation in',
                         default=100)
+    parser.add_argument('--start', type=float, help='Total (virtual) simulation time',
+                        required=True)
     parser.add_argument('--end', type=float, help='Total (virtual) simulation time',
                         required=True)
     parser.add_argument('--std-factor', type=float, default=0.2,
@@ -50,16 +53,9 @@
 
     std_factor = args.std_factor
 
-    header, delays = collect_data_numpy(args.latencies_dir, 'packets-delay', delimiter=',',
-                                        dtype=np.dtype('float'))
-
-    # Cleaning data
-    next_packet_delay_col = header.index('next_packet_delay')
-    delays = delays[delays[:, next_packet_delay_col] > 0]
-
-    delay_col = header.index('latency')
-    windows, means, stds = find_mean_and_std_through_window(
-        delays, n_windows=args.windows, delay_col=delay_col, end_time=args.end)
+    windows, n_samples, samples = load_mean_and_std_through_window(
+        str(args.latencies_dir), args.start, args.end, num_windows=args.windows)
+    means, stds = samples[:, 0], samples[:, 1]
 
     fig, ax = plt.subplots()
 
diff --git a/scripts/reproducibility-pads23/reproduce.sh b/scripts/reproducibility-pads23/reproduce.sh
index 5dc4a9f6..c3912d5e 100644
--- a/scripts/reproducibility-pads23/reproduce.sh
+++ b/scripts/reproducibility-pads23/reproduce.sh
@@ -25,7 +25,7 @@ for exp in {10,100}; do
     python python-scripts/delay-in-window.py \
       --latencies results/${exp}ms/$kind/packet-latency-trace \
       --output results/${exp}ms/condensed/packet_latency-$kind \
-      --end ${exp}e6
+      --start 0.0 --end ${exp}e6
   done
 done
 

From 2a48197a82535b0a2a623cd31ca5f2cef4222a52 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 10 Nov 2023 23:01:57 -0500
Subject: [PATCH 065/188] Another update to the plotting scripts (more general)

---
 .../python-scripts/delay_in_window.py         | 93 +++++++++++++------
 .../python-scripts/plot-packet-latency.py     | 61 +++++++++---
 .../python-scripts/port-occupancy.py          |  4 +-
 3 files changed, 117 insertions(+), 41 deletions(-)

diff --git a/scripts/reproducibility-pads23/python-scripts/delay_in_window.py b/scripts/reproducibility-pads23/python-scripts/delay_in_window.py
index 1695cd9d..589c9774 100644
--- a/scripts/reproducibility-pads23/python-scripts/delay_in_window.py
+++ b/scripts/reproducibility-pads23/python-scripts/delay_in_window.py
@@ -4,22 +4,23 @@
 import sys
 import fileinput
 import pathlib
-from typing import Any
 import argparse
+from enum import Enum
+import typing as t
 
 import numpy as np
 import matplotlib.pyplot as plt
 
 
-ndarray = np.ndarray[Any, np.dtype[np.float64]]
+ndarray: t.TypeAlias = 'np.ndarray[t.Any, np.dtype[np.float64]]'
 
 
 def collect_data_numpy(
     path: pathlib.Path | str,
     filepreffix: str,
     delimiter: str | None = None,
-    dtype: Any = int
-) -> tuple[list[str], np.ndarray[Any, Any]]:
+    dtype: t.Any = int
+) -> tuple[list[str], np.ndarray[t.Any, t.Any]]:
     escaped_path = pathlib.Path(glob.escape(path))  # type: ignore
     stat_files = glob.glob(str(escaped_path / f"{filepreffix}-gid=*.txt"))
     if not stat_files:
@@ -70,6 +71,44 @@ def find_mean_and_std_through_window(
         mean_and_std_through_windows[:, 2].astype(np.int32)
 
 
+class SrcDestRelationship(Enum):
+    Any = 0
+    SameRouter = 1
+    SameGroup = 2
+    DifferentGroup = 3
+
+
+def break_delay_data_into(
+    delays: np.ndarray[t.Any, t.Any],
+    src_dest_rel: SrcDestRelationship,
+    nodes_per_router: int = 2,
+    nodes_per_group: int = 8
+) -> np.ndarray[t.Any, t.Any]:
+    if src_dest_rel == SrcDestRelationship.Any:
+        return delays
+
+    elif src_dest_rel == SrcDestRelationship.DifferentGroup:
+        delays_out_group = (delays[:, 0] // nodes_per_group) != (delays[:, 1] // nodes_per_group)
+        return delays[delays_out_group]  # type: ignore
+
+    else:
+
+        delays_same_router = \
+            (delays[:, 0] // nodes_per_router) == (delays[:, 1] // nodes_per_router)
+
+        if src_dest_rel == SrcDestRelationship.SameRouter:
+            return delays[delays_same_router]  # type: ignore
+
+        else:
+            assert src_dest_rel == SrcDestRelationship.SameGroup
+
+            delays_same_group = np.bitwise_xor(
+                (delays[:, 0] // nodes_per_group) == (delays[:, 1] // nodes_per_group),
+                delays_same_router)
+
+            return delays[delays_same_group]  # type: ignore
+
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--latencies', type=pathlib.Path, help='Folder to latencies',
@@ -82,28 +121,40 @@ def find_mean_and_std_through_window(
                         required=True)
     parser.add_argument('--end', type=float, help='Total (virtual) simulation time',
                         required=True)
+    # The following aims to plot different portions of the packet delay data
+    parser.add_argument('--src-dest-relationship',
+                        help='Process only packets of related relationship',
+                        choices=[rel.name for rel in SrcDestRelationship], default='Any')
+    parser.add_argument('--nodes-per-group', type=int, help='Assuming a 1-D dragonfly network, '
+                        'this indicates the number of nodes per group (only useful with '
+                        '--src-dest-relationship)', default=8)
+    parser.add_argument('--nodes-per-router', type=int, help='Assuming a 1-D dragonfly network, '
+                        'this indicates the number of nodes per router (only useful with '
+                        '--src-dest-relationship)', default=2)
+    parser.add_argument('--use-cython', type=bool, help='Total (virtual) simulation time',
+                        default=False)
     args = parser.parse_args()
 
     plotting = False
     computing = True
-    use_cython = True
 
     loading = not computing
     end_time = args.end
     n_windows = args.windows
 
-    # Hardcoded values for 72-node dragonfly network
-    dist_type = 'all'  # options: all, same_router, same_group, other_group
+    dist_type = getattr(SrcDestRelationship, args.src_dest_relationship)
 
     out_file_name = f"{args.output}.npz"
 
     if computing:
-        if use_cython:
+        if args.use_cython:
+            assert dist_type == SrcDestRelationship.Any
             import pyximport; pyximport.install(language_level='3str')  # noqa: E702
             from file_read_cython.read_mean_std_from_file import load_mean_and_std_through_window
 
             windows, n_samples, samples = load_mean_and_std_through_window(
-                str(args.latencies), args.start, args.end, num_windows=args.windows)
+                str(args.latencies), args.start, args.end, num_windows=args.windows,
+                max_rows=100000)
             means, stds = samples[:, 0], samples[:, 1]
 
         else:
@@ -111,29 +162,19 @@ def find_mean_and_std_through_window(
             header, delays = collect_data_numpy(
                 args.latencies, 'packets-delay', delimiter=',',
                 dtype=np.dtype('float'))
+            next_packet_delay_col = header.index('next_packet_delay')
             end_time_col = header.index('end')
             delay_col = header.index('latency')
 
-            # Delays distributions
-            if dist_type != 'all':
-                delays_same_router = (delays[:, 0] // 2) == (delays[:, 1] // 2)
-                delays_same_group = np.bitwise_xor(
-                    (delays[:, 0] // 8) == (delays[:, 1] // 8),
-                    delays_same_router)
-                delays_out_group = (delays[:, 0] // 8) != (delays[:, 1] // 8)
-
-                # Selecting which distribution to display
-                if dist_type == 'same_router':
-                    distribution = delays_same_router
-                elif dist_type == 'same_group':
-                    distribution = delays_same_group
-                elif dist_type == 'other_group':
-                    distribution = delays_out_group
+            delays = delays[delays[:, next_packet_delay_col] > 0]
+            delays = delays[delays[:, end_time_col] > 0]
+            delays = break_delay_data_into(
+                delays, dist_type,
+                nodes_per_group=args.nodes_per_group, nodes_per_router=args.nodes_per_router)
 
             # Computing windowed mean and stds + plotting
             windows, means, stds, n_samples = find_mean_and_std_through_window(
-                delays if dist_type == 'all' else delays[distribution],
-                n_windows=n_windows, end_time_col=end_time_col,
+                delays, n_windows=n_windows, end_time_col=end_time_col,
                 delay_col=delay_col, end_time=end_time)
 
         # Save
diff --git a/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py b/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
index b029d8a7..cd905230 100644
--- a/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
+++ b/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
@@ -8,9 +8,7 @@
 import matplotlib.pyplot as plt
 import matplotlib
 from matplotlib.ticker import EngFormatter
-
-import pyximport; pyximport.install(language_level='3str')  # noqa: E702
-from file_read_cython.read_mean_std_from_file import load_mean_and_std_through_window
+from delay_in_window import SrcDestRelationship
 
 
 time_formatter_ns = EngFormatter()
@@ -49,22 +47,59 @@
                         required=True)
     parser.add_argument('--std-factor', type=float, default=0.2,
                         help='Size of variance to show as an std factor')
+    parser.add_argument('--use-cython', type=bool, help='Total (virtual) simulation time',
+                        default=False)
     args = parser.parse_args(sys.argv[2:])
 
     std_factor = args.std_factor
 
-    windows, n_samples, samples = load_mean_and_std_through_window(
-        str(args.latencies_dir), args.start, args.end, num_windows=args.windows)
-    means, stds = samples[:, 0], samples[:, 1]
+    scatter_plot = True
+    relationship_to_show = SrcDestRelationship.Any
+    nodes_per_router = 2
+    nodes_per_group = 8
+
+    if args.use_cython:
+        import pyximport; pyximport.install(language_level='3str')  # noqa: E702
+        from file_read_cython.read_mean_std_from_file import load_mean_and_std_through_window
+
+        windows, n_samples, samples = load_mean_and_std_through_window(
+            str(args.latencies_dir), args.start, args.end, num_windows=args.windows)
+        means, stds = samples[:, 0], samples[:, 1]
+
+    else:
+        from delay_in_window import collect_data_numpy, find_mean_and_std_through_window, \
+            break_delay_data_into
+        header, delays = collect_data_numpy(args.latencies_dir, 'packets-delay', delimiter=',',
+                                            dtype=np.dtype('float'))
+
+        # Cleaning data
+        next_packet_delay_col = header.index('next_packet_delay')
+        end_col = header.index('end')
+        delay_col = header.index('latency')
+
+        # Cleaning input
+        delays = delays[delays[:, next_packet_delay_col] > 0]
+        delays = delays[delays[:, end_col] > 0]
+        delays = break_delay_data_into(
+            delays, relationship_to_show,
+            nodes_per_group=nodes_per_group, nodes_per_router=nodes_per_router)
+
+        windows, means, stds, n_samples = find_mean_and_std_through_window(
+            delays, n_windows=args.windows, end_time=args.end, end_time_col=end_col,
+            delay_col=delay_col)
 
     fig, ax = plt.subplots()
 
-    # plt.errorbar(windows, means, yerr=std_factor*stds)
-    ax.plot(windows, means, label='high-fidelity only')
-    ax.fill_between(windows,
-                    means - std_factor*stds,
-                    means + std_factor*stds,
-                    color='#00F5')
+    if scatter_plot:
+        assert not args.use_cython
+        ax.scatter(delays[:, end_col], delays[:, delay_col])
+    else:
+        # plt.errorbar(windows, means, yerr=std_factor*stds)
+        ax.plot(windows, means)
+        ax.fill_between(windows,
+                        means - std_factor*stds,
+                        means + std_factor*stds,
+                        color='#00F5')
 
     ax.set_xlabel('Virtual time')
     ax.set_ylabel('Average Packet Latency')
@@ -90,7 +125,7 @@
     fig, ax = plt.subplots()
 
     # plt.errorbar(windows, means, yerr=std_factor*stds)
-    ax.plot(windows, means, label='high-fidelity only')
+    ax.plot(windows, means)
     ax.fill_between(windows,
                     means - std_factor*stds,
                     means + std_factor*stds,
diff --git a/scripts/reproducibility-pads23/python-scripts/port-occupancy.py b/scripts/reproducibility-pads23/python-scripts/port-occupancy.py
index 5fede6e1..ce3f46ec 100644
--- a/scripts/reproducibility-pads23/python-scripts/port-occupancy.py
+++ b/scripts/reproducibility-pads23/python-scripts/port-occupancy.py
@@ -78,7 +78,7 @@ def load_aggregated_utilization(filename: str | pathlib.Path) -> tuple[array_typ
     ax.xaxis.set_major_formatter(time_formatter_ns)
     ax.yaxis.set_major_formatter(bytes_formater)
 
-    plt.show()
+    plt.show()  # type: ignore
 
 
 if main_args.command == 'pads23':
@@ -144,7 +144,7 @@ def load_aggregated_utilization(filename: str | pathlib.Path) -> tuple[array_typ
 
     ax.set_xlabel('Virtual time')
     ax.set_ylabel('Total Buffer Port Occupancy')
-    ax.set_ylim(-0.2e6, 6.9e6)
+    # ax.set_ylim(-0.2e6, 6.9e6)
     ax.legend(bbox_to_anchor=(.5, .4), loc='lower center', borderaxespad=0)
     ax.xaxis.set_major_formatter(time_formatter_ns)
     ax.yaxis.set_major_formatter(bytes_formater)

From 0c2678ce62a33dcc9e3866bc0859343f089abfcf Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 13 Nov 2023 06:18:50 -0700
Subject: [PATCH 066/188] Refactoring plot generation scripts

---
 .../python-scripts/delay_in_window.py         | 119 ++++++++++--------
 .../python-scripts/plot-packet-latency.py     |  95 +++++---------
 .../python-scripts/port-occupancy.py          |  84 ++++++++-----
 scripts/reproducibility-pads23/reproduce.sh   |   4 +-
 4 files changed, 152 insertions(+), 150 deletions(-)

diff --git a/scripts/reproducibility-pads23/python-scripts/delay_in_window.py b/scripts/reproducibility-pads23/python-scripts/delay_in_window.py
index 589c9774..aba053b6 100644
--- a/scripts/reproducibility-pads23/python-scripts/delay_in_window.py
+++ b/scripts/reproducibility-pads23/python-scripts/delay_in_window.py
@@ -9,7 +9,6 @@
 import typing as t
 
 import numpy as np
-import matplotlib.pyplot as plt
 
 
 ndarray: t.TypeAlias = 'np.ndarray[t.Any, np.dtype[np.float64]]'
@@ -109,58 +108,68 @@ def break_delay_data_into(
             return delays[delays_same_group]  # type: ignore
 
 
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--latencies', type=pathlib.Path, help='Folder to latencies',
-                        required=True)
-    parser.add_argument('--output', type=pathlib.Path, help='Directory to save aggregated stats',
-                        required=True)
-    parser.add_argument('--windows', type=int, help='Total windows to break simulation in',
-                        default=100)
-    parser.add_argument('--start', type=float, help='Total (virtual) simulation time',
-                        required=True)
-    parser.add_argument('--end', type=float, help='Total (virtual) simulation time',
-                        required=True)
-    # The following aims to plot different portions of the packet delay data
-    parser.add_argument('--src-dest-relationship',
-                        help='Process only packets of related relationship',
-                        choices=[rel.name for rel in SrcDestRelationship], default='Any')
-    parser.add_argument('--nodes-per-group', type=int, help='Assuming a 1-D dragonfly network, '
-                        'this indicates the number of nodes per group (only useful with '
-                        '--src-dest-relationship)', default=8)
-    parser.add_argument('--nodes-per-router', type=int, help='Assuming a 1-D dragonfly network, '
-                        'this indicates the number of nodes per router (only useful with '
-                        '--src-dest-relationship)', default=2)
-    parser.add_argument('--use-cython', type=bool, help='Total (virtual) simulation time',
-                        default=False)
-    args = parser.parse_args()
-
-    plotting = False
-    computing = True
-
-    loading = not computing
-    end_time = args.end
-    n_windows = args.windows
-
-    dist_type = getattr(SrcDestRelationship, args.src_dest_relationship)
-
-    out_file_name = f"{args.output}.npz"
-
-    if computing:
+class ProcessedPacketLatencyData(t.NamedTuple):
+    windows: ndarray
+    means: ndarray
+    stds: ndarray
+    n_samples: np.ndarray[t.Any, np.dtype[np.int32]]
+    header: list[str] | None = None
+    delays: ndarray | None = None
+
+
+class MainGetDataLatencies(object):
+    def __init__(self) -> None:
+        parser = argparse.ArgumentParser()
+        parser.add_argument('--latencies-dir', type=pathlib.Path, help='Folder to latencies',
+                            required=True)
+        parser.add_argument('--windows', type=int, help='Total windows to break simulation in',
+                            default=100)
+        parser.add_argument('--start', type=float, help='Total (virtual) simulation time',
+                            required=True)
+        parser.add_argument('--end', type=float, help='Total (virtual) simulation time',
+                            required=True)
+        # The following aims to plot different portions of the packet delay data
+        parser.add_argument('--src-dest-relationship',
+                            help='Process only packets of related relationship',
+                            choices=[rel.name for rel in SrcDestRelationship], default='Any')
+        parser.add_argument('--nodes-per-group', type=int, help='Assuming a 1-D dragonfly network, '
+                            'this indicates the number of nodes per group (only useful with '
+                            '--src-dest-relationship)', default=8)
+        parser.add_argument('--nodes-per-router', type=int, help='Assuming a 1-D dragonfly '
+                            'network, this indicates the number of nodes per router (only '
+                            'useful with --src-dest-relationship)', default=2)
+        parser.add_argument('--use-cython', type=bool, help='Total (virtual) simulation time',
+                            default=False)
+
+        self.parser = parser
+        self.args: argparse.Namespace | None = None
+
+    def run(
+        self,
+        argv: list[str],
+    ) -> ProcessedPacketLatencyData:
+        self.args = args = self.parser.parse_args(argv)
+
+        end_time = args.end
+        n_windows = args.windows
+
+        dist_type = getattr(SrcDestRelationship, args.src_dest_relationship)
+
         if args.use_cython:
             assert dist_type == SrcDestRelationship.Any
             import pyximport; pyximport.install(language_level='3str')  # noqa: E702
-            from file_read_cython.read_mean_std_from_file import load_mean_and_std_through_window
+            from file_read_cython.read_mean_std_from_file import \
+                load_mean_and_std_through_window
 
             windows, n_samples, samples = load_mean_and_std_through_window(
-                str(args.latencies), args.start, args.end, num_windows=args.windows,
+                str(args.latencies_dir), args.start, args.end, num_windows=args.windows,
                 max_rows=100000)
             means, stds = samples[:, 0], samples[:, 1]
 
         else:
             # Columns within the csv file that matter to us
             header, delays = collect_data_numpy(
-                args.latencies, 'packets-delay', delimiter=',',
+                args.latencies_dir, 'packets-delay', delimiter=',',
                 dtype=np.dtype('float'))
             next_packet_delay_col = header.index('next_packet_delay')
             end_time_col = header.index('end')
@@ -172,19 +181,25 @@ def break_delay_data_into(
                 delays, dist_type,
                 nodes_per_group=args.nodes_per_group, nodes_per_router=args.nodes_per_router)
 
-            # Computing windowed mean and stds + plotting
+            # Computing windowed mean and stds
             windows, means, stds, n_samples = find_mean_and_std_through_window(
                 delays, n_windows=n_windows, end_time_col=end_time_col,
                 delay_col=delay_col, end_time=end_time)
 
-        # Save
-        np.savez(out_file_name,
-                 windows=windows, means=means, stds=stds, n_samples=n_samples)
+        if 'header' in vars():
+            return ProcessedPacketLatencyData(windows, means, stds, n_samples, header, delays)
+        else:
+            return ProcessedPacketLatencyData(windows, means, stds, n_samples)
 
-    if loading:
-        data = np.load(out_file_name)
-        windows, means, stds = data['windows'], data['means'], data['stds']
 
-    if plotting:
-        plt.errorbar(windows, means, yerr=.2*stds)
-        plt.show()  # type: ignore
+if __name__ == '__main__':
+    main = MainGetDataLatencies()
+    main.parser.add_argument(
+        '--output', type=pathlib.Path, help='Directory to save aggregated stats',
+        required=True)
+    data = main.run(argv=sys.argv[1:])
+
+    assert main.args is not None
+    out_file_name = f"{main.args.output}.npz"
+    np.savez(out_file_name,
+             windows=data.windows, means=data.means, stds=data.stds, n_samples=data.n_samples)
diff --git a/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py b/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
index cd905230..2906fc54 100644
--- a/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
+++ b/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
@@ -8,7 +8,7 @@
 import matplotlib.pyplot as plt
 import matplotlib
 from matplotlib.ticker import EngFormatter
-from delay_in_window import SrcDestRelationship
+from delay_in_window import MainGetDataLatencies
 
 
 time_formatter_ns = EngFormatter()
@@ -36,69 +36,33 @@
 
 
 if main_args.command == 'plotfromraw':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--latencies-dir', type=pathlib.Path, required=True,
-                        help='Folder with raw latency data')
-    parser.add_argument('--windows', type=int, help='Total windows to break simulation in',
-                        default=100)
-    parser.add_argument('--start', type=float, help='Total (virtual) simulation time',
-                        required=True)
-    parser.add_argument('--end', type=float, help='Total (virtual) simulation time',
-                        required=True)
-    parser.add_argument('--std-factor', type=float, default=0.2,
-                        help='Size of variance to show as an std factor')
-    parser.add_argument('--use-cython', type=bool, help='Total (virtual) simulation time',
-                        default=False)
-    args = parser.parse_args(sys.argv[2:])
-
+    main = MainGetDataLatencies()
+    main.parser.add_argument('--std-factor', type=float, default=0.2,
+                             help='Size of variance to show as an std factor')
+    main.parser.add_argument('--scatter-plot', action='store_true')
+    data = main.run(argv=sys.argv[2:])
+
+    assert main.args is not None
+    args = main.args
     std_factor = args.std_factor
 
-    scatter_plot = True
-    relationship_to_show = SrcDestRelationship.Any
-    nodes_per_router = 2
-    nodes_per_group = 8
-
-    if args.use_cython:
-        import pyximport; pyximport.install(language_level='3str')  # noqa: E702
-        from file_read_cython.read_mean_std_from_file import load_mean_and_std_through_window
-
-        windows, n_samples, samples = load_mean_and_std_through_window(
-            str(args.latencies_dir), args.start, args.end, num_windows=args.windows)
-        means, stds = samples[:, 0], samples[:, 1]
-
-    else:
-        from delay_in_window import collect_data_numpy, find_mean_and_std_through_window, \
-            break_delay_data_into
-        header, delays = collect_data_numpy(args.latencies_dir, 'packets-delay', delimiter=',',
-                                            dtype=np.dtype('float'))
-
-        # Cleaning data
-        next_packet_delay_col = header.index('next_packet_delay')
-        end_col = header.index('end')
-        delay_col = header.index('latency')
-
-        # Cleaning input
-        delays = delays[delays[:, next_packet_delay_col] > 0]
-        delays = delays[delays[:, end_col] > 0]
-        delays = break_delay_data_into(
-            delays, relationship_to_show,
-            nodes_per_group=nodes_per_group, nodes_per_router=nodes_per_router)
-
-        windows, means, stds, n_samples = find_mean_and_std_through_window(
-            delays, n_windows=args.windows, end_time=args.end, end_time_col=end_col,
-            delay_col=delay_col)
-
     fig, ax = plt.subplots()
 
-    if scatter_plot:
-        assert not args.use_cython
-        ax.scatter(delays[:, end_col], delays[:, delay_col])
+    if args.scatter_plot:
+        if args.use_cython:
+            raise Exception("To scatter-plot raw data, we must have access to raw data. "
+                            "This is not possible when loading using Cython.")
+        assert data.delays is not None and data.header is not None
+
+        end_col = data.header.index('end')
+        delay_col = data.header.index('latency')
+        ax.scatter(data.delays[:, end_col], data.delays[:, delay_col])
     else:
         # plt.errorbar(windows, means, yerr=std_factor*stds)
-        ax.plot(windows, means)
-        ax.fill_between(windows,
-                        means - std_factor*stds,
-                        means + std_factor*stds,
+        ax.plot(data.windows, data.means)
+        ax.fill_between(data.windows,
+                        data.means - std_factor*data.stds,
+                        data.means + std_factor*data.stds,
                         color='#00F5')
 
     ax.set_xlabel('Virtual time')
@@ -206,18 +170,19 @@
 
     height_plot = ax.get_ylim()[1]
     ax.vlines = ax.vlines([args.started_tracking, args.switch, args.switch_back],
-                          -3e3, height_plot, color='#AAA', ls='-')
+                          -height_plot*0.04, height_plot, color='#AAA', ls='-')
     ax.vlines.set_clip_on(False)
+    # ax.set_ylim((0.0, height_plot))
 
     middle = (args.switch + args.switch_back) / 2
     arrow_color = {'arrowprops': dict(arrowstyle="->", color='#AAA'), 'color': '#333'}
     ax.annotate("", xy=(args.started_tracking * .95, 80e3),
-                xytext=(args.started_tracking * .6, 98e3), **arrow_color)
-    ax.annotate("switch", xy=(args.switch*1.04, 118e3),
-                xytext=(middle, 105e3), **arrow_color)
-    ax.annotate("", xy=(args.switch_back * 0.96, 118e3),
-                xytext=(middle, 110e3), **arrow_color)
-    ax.text(args.started_tracking * .9, 1e5, "start\ntracking", color='#333', ha='right')
+                xytext=(args.started_tracking * .6, height_plot*.3), **arrow_color)
+    ax.annotate("switch", xy=(args.switch*1.04, height_plot*.03),
+                xytext=(middle, height_plot*.08), **arrow_color)
+    ax.annotate("", xy=(args.switch_back * 0.96, height_plot*.03),
+                xytext=(middle, height_plot*.08), **arrow_color)
+    ax.text(args.started_tracking * .9, height_plot*.3, "start\ntracking", color='#333', ha='right')
 
     ax.text(args.started_tracking, height_plot, "start latency tracking", color='#333', rotation=40,
             rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
diff --git a/scripts/reproducibility-pads23/python-scripts/port-occupancy.py b/scripts/reproducibility-pads23/python-scripts/port-occupancy.py
index ce3f46ec..9164a6f2 100644
--- a/scripts/reproducibility-pads23/python-scripts/port-occupancy.py
+++ b/scripts/reproducibility-pads23/python-scripts/port-occupancy.py
@@ -87,22 +87,26 @@ def load_aggregated_utilization(filename: str | pathlib.Path) -> tuple[array_typ
                         help='Folder where experiment was run',
                         required=True)
     parser.add_argument('--output', type=pathlib.Path, help='Name of output figure',
-                        required=True)
+                        default=False)
+    parser.add_argument('--started-tracking', type=float, default=2e6)
+    parser.add_argument('--switch', type=float, default=3e6)
+    parser.add_argument('--switch-back', type=float, default=8e6)
+    parser.add_argument('--show-switch-labels', action='store_true')
+    parser.add_argument('--no-show-legend', dest='show_legend', action='store_false')
     args = parser.parse_args(sys.argv[2:])
 
     dir_data = args.experiment_folder
     # dir_data = pathlib.Path('data/synthetic1')
-    cut1 = 30
-    cut2 = 79
-
-    matplotlib.use("pgf")
-    matplotlib.rcParams.update({
-        "pgf.texsystem": "pdflatex",
-        'font.family': 'serif',
-        'font.size': 16,
-        'text.usetex': True,
-        'pgf.rcfonts': False,
-    })
+
+    if args.output:
+        matplotlib.use("pgf")
+        matplotlib.rcParams.update({
+            "pgf.texsystem": "pdflatex",
+            'font.family': 'serif',
+            'font.size': 16,
+            'text.usetex': True,
+            'pgf.rcfonts': False,
+        })
 
     ts1, utilization_hf = load_aggregated_utilization(
         dir_data / "high-fidelity" / "codes-output" / "dragonfly-snapshots.csv")
@@ -111,17 +115,13 @@ def load_aggregated_utilization(filename: str | pathlib.Path) -> tuple[array_typ
     ts3, utilization_hybrid_lite = load_aggregated_utilization(
         dir_data / "hybrid-lite" / "codes-output" / "dragonfly-snapshots.csv")
 
+    # Where to start and finish making the dotted line
+    assert np.all(np.abs(ts1 - ts2) < 1e-6) and np.all(np.abs(ts1 - ts3) < 1e-6)
+    cut1 = np.abs(ts1 - args.switch).argmin() + 1  # at switch
+    cut2 = np.abs(ts1 - args.switch_back).argmin()
+
     # plotting
     fig, ax = plt.subplots(figsize=(7, 3.8))
-    vlines = ax.vlines([2e6, 3e6, 8e6], -0.4e6, 7.15e6, color='#AAA', ls='-')
-    vlines.set_clip_on(False)
-
-    arrow_color = {'arrowprops': dict(arrowstyle="->", color='#AAA'), 'color': '#333'}
-    ax.annotate("", xy=(2.1e6, 0e6), xytext=(3.5e6, 1.1e6), **arrow_color)
-    ax.annotate("switch", xy=(3.1e6, 0.1e6), xytext=(4.8e6, 0.5e6), **arrow_color)
-    ax.annotate("", xy=(7.9e6, 0.1e6), xytext=(6.0e6, 0.5e6), **arrow_color)
-    ax.text(3.5e6, 1.1e6, "start latency tracking", color='#333', ha='left')
-
     ax.plot(ts1, utilization_hf, label="high-fidelity", color='blue')
 
     ax.plot(ts3[:cut1], utilization_hybrid_lite[:cut1],
@@ -135,20 +135,42 @@ def load_aggregated_utilization(filename: str | pathlib.Path) -> tuple[array_typ
     ax.plot(ts2[cut1-1:cut2+1], utilization_hybrid[cut1-1:cut2+1], color='green', ls='--')
     ax.plot(ts2[cut2:], utilization_hybrid[cut2:], color='green')
 
-    # ax.text(2e6, 7.4e6, "start latency tracking", color='#333', rotation=40,
-    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
-    # ax.text(3e6, 7.4e6, "switch to surrogate", color='#333', rotation=40,
-    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
-    # ax.text(8e6, 7.4e6, "switch to\nhigh-definition", color='#333', rotation=40,
-    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+    height_plot = ax.get_ylim()[1]
+    vlines = ax.vlines([args.started_tracking, args.switch, args.switch_back],
+                       -height_plot*0.04, height_plot, color='#AAA', ls='-')
+    vlines.set_clip_on(False)
+
+    middle = (args.switch + args.switch_back) / 2
+    arrow_color = {'arrowprops': dict(arrowstyle="->", color='#AAA'), 'color': '#333'}
+    ax.annotate("", xy=(args.started_tracking * .95, 0e6),
+                xytext=(args.started_tracking * .6, height_plot*.3), **arrow_color)
+    ax.annotate("switch", xy=(args.switch*1.04, height_plot*.03),
+                xytext=(middle, height_plot*.08), **arrow_color)
+    ax.annotate("", xy=(args.switch_back * 0.96, height_plot*.03),
+                xytext=(middle, height_plot*.08), **arrow_color)
+    ax.text(args.started_tracking * .9, height_plot*.3, "start\ntracking", color='#333', ha='right')
+
+    if args.show_switch_labels:
+        ax.text(args.started_tracking, height_plot, "start latency tracking", color='#333',
+                rotation=40, rotation_mode='anchor', horizontalalignment='left',
+                verticalalignment='center')
+        ax.text(args.switch, height_plot, "switch to surrogate", color='#333', rotation=40,
+                rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+        ax.text(args.switch_back, height_plot, "switch to\nhigh-definition", color='#333',
+                rotation=40, rotation_mode='anchor', horizontalalignment='left',
+                verticalalignment='center')
 
     ax.set_xlabel('Virtual time')
     ax.set_ylabel('Total Buffer Port Occupancy')
     # ax.set_ylim(-0.2e6, 6.9e6)
-    ax.legend(bbox_to_anchor=(.5, .4), loc='lower center', borderaxespad=0)
+    if args.show_legend:
+        ax.legend(bbox_to_anchor=(.5, .4), loc='lower center', borderaxespad=0)
     ax.xaxis.set_major_formatter(time_formatter_ns)
     ax.yaxis.set_major_formatter(bytes_formater)
 
-    plt.tight_layout()
-    plt.savefig(f'{args.output}.pgf', bbox_inches='tight')
-    plt.savefig(f'{args.output}.pdf', bbox_inches='tight')
+    if args.output:
+        plt.tight_layout()
+        plt.savefig(f'{args.output}.pgf', bbox_inches='tight')
+        plt.savefig(f'{args.output}.pdf', bbox_inches='tight')
+    else:
+        plt.show()  # type: ignore
diff --git a/scripts/reproducibility-pads23/reproduce.sh b/scripts/reproducibility-pads23/reproduce.sh
index c3912d5e..42b5609b 100644
--- a/scripts/reproducibility-pads23/reproduce.sh
+++ b/scripts/reproducibility-pads23/reproduce.sh
@@ -22,8 +22,8 @@ mkdir results/10ms/condensed results/100ms/condensed
 
 for exp in {10,100}; do
   for kind in {high-fidelity,hybrid,hybrid-lite}; do
-    python python-scripts/delay-in-window.py \
-      --latencies results/${exp}ms/$kind/packet-latency-trace \
+    python python-scripts/delay_in_window.py \
+      --latencies-dir results/${exp}ms/$kind/packet-latency-trace \
       --output results/${exp}ms/condensed/packet_latency-$kind \
       --start 0.0 --end ${exp}e6
   done

From 59ffe08de5779c2a5e1363b7e02e34a82ceefa5d Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 24 Nov 2023 19:26:05 -0500
Subject: [PATCH 067/188] Misc changes to plot packet latency scripts

---
 .../python-scripts/delay_in_window.py         | 54 ++++++++++++-------
 .../python-scripts/plot-packet-latency.py     | 18 +++----
 2 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/scripts/reproducibility-pads23/python-scripts/delay_in_window.py b/scripts/reproducibility-pads23/python-scripts/delay_in_window.py
index aba053b6..4bc19f0e 100644
--- a/scripts/reproducibility-pads23/python-scripts/delay_in_window.py
+++ b/scripts/reproducibility-pads23/python-scripts/delay_in_window.py
@@ -15,16 +15,20 @@
 
 
 def collect_data_numpy(
-    path: pathlib.Path | str,
-    filepreffix: str,
+    path: str,
+    filepreffix: str | None = None,
+    filepostfix: str = "-gid=*.txt",
     delimiter: str | None = None,
     dtype: t.Any = int
 ) -> tuple[list[str], np.ndarray[t.Any, t.Any]]:
-    escaped_path = pathlib.Path(glob.escape(path))  # type: ignore
-    stat_files = glob.glob(str(escaped_path / f"{filepreffix}-gid=*.txt"))
-    if not stat_files:
-        print(f"No valid `{filepreffix}` files have been found in path {path}", file=sys.stderr)
-        exit(1)
+    if filepreffix is None:
+        stat_files = [path]
+    else:
+        escaped_path = pathlib.Path(glob.escape(path))
+        stat_files = glob.glob(str(escaped_path / f"{filepreffix}{filepostfix}"))
+        if not stat_files:
+            print(f"No valid `{filepreffix}` files have been found in path {path}", file=sys.stderr)
+            exit(1)
 
     data = np.loadtxt(fileinput.input(stat_files), delimiter=delimiter, dtype=dtype,
                       comments='#')
@@ -61,10 +65,10 @@ def find_mean_and_std_through_window(
         else:
             mean_and_std_through_windows[i] = -1
 
-    last_good, = np.where(mean_and_std_through_windows[:, 0] == -1)
-    if last_good.size > 0:
-        windows = windows[:last_good[0]]
-        mean_and_std_through_windows = mean_and_std_through_windows[:last_good[0]]
+    # Removing all windows for which there is no data
+    good_res = mean_and_std_through_windows[:, 0] != -1
+    windows = windows[good_res]
+    mean_and_std_through_windows = mean_and_std_through_windows[good_res]
 
     return windows, mean_and_std_through_windows[:, 0], mean_and_std_through_windows[:, 1], \
         mean_and_std_through_windows[:, 2].astype(np.int32)
@@ -120,8 +124,11 @@ class ProcessedPacketLatencyData(t.NamedTuple):
 class MainGetDataLatencies(object):
     def __init__(self) -> None:
         parser = argparse.ArgumentParser()
-        parser.add_argument('--latencies-dir', type=pathlib.Path, help='Folder to latencies',
-                            required=True)
+        source_group = parser.add_mutually_exclusive_group(required=True)
+        source_group.add_argument('--latencies-dir', type=pathlib.Path,
+                                  help='Folder to latencies (CSV file)')
+        source_group.add_argument('--latencies-file', type=pathlib.Path,
+                                  help='(CSV) File with latencies')
         parser.add_argument('--windows', type=int, help='Total windows to break simulation in',
                             default=100)
         parser.add_argument('--start', type=float, help='Total (virtual) simulation time',
@@ -138,8 +145,8 @@ def __init__(self) -> None:
         parser.add_argument('--nodes-per-router', type=int, help='Assuming a 1-D dragonfly '
                             'network, this indicates the number of nodes per router (only '
                             'useful with --src-dest-relationship)', default=2)
-        parser.add_argument('--use-cython', type=bool, help='Total (virtual) simulation time',
-                            default=False)
+        parser.add_argument('--use-cython', action='store_true',
+                            help='Total (virtual) simulation time')
 
         self.parser = parser
         self.args: argparse.Namespace | None = None
@@ -157,6 +164,7 @@ def run(
 
         if args.use_cython:
             assert dist_type == SrcDestRelationship.Any
+            assert args.latencies_dir is not None
             import pyximport; pyximport.install(language_level='3str')  # noqa: E702
             from file_read_cython.read_mean_std_from_file import \
                 load_mean_and_std_through_window
@@ -168,15 +176,21 @@ def run(
 
         else:
             # Columns within the csv file that matter to us
-            header, delays = collect_data_numpy(
-                args.latencies_dir, 'packets-delay', delimiter=',',
-                dtype=np.dtype('float'))
-            next_packet_delay_col = header.index('next_packet_delay')
+            if args.latencies_dir:
+                header, delays = collect_data_numpy(
+                    args.latencies_dir, 'packets-delay', delimiter=',',
+                    dtype=np.dtype('float'))
+            else:
+                assert args.latencies_file is not None
+                header, delays = collect_data_numpy(
+                    args.latencies_file, delimiter=',', dtype=np.dtype('float'))
+            # next_packet_delay_col = header.index('next_packet_delay')
             end_time_col = header.index('end')
             delay_col = header.index('latency')
 
-            delays = delays[delays[:, next_packet_delay_col] > 0]
+            # delays = delays[delays[:, next_packet_delay_col] > 0]
             delays = delays[delays[:, end_time_col] > 0]
+            delays = delays[delays[:, end_time_col] < end_time]
             delays = break_delay_data_into(
                 delays, dist_type,
                 nodes_per_group=args.nodes_per_group, nodes_per_router=args.nodes_per_router)
diff --git a/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py b/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
index 2906fc54..21edae8a 100644
--- a/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
+++ b/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
@@ -36,14 +36,14 @@
 
 
 if main_args.command == 'plotfromraw':
-    main = MainGetDataLatencies()
-    main.parser.add_argument('--std-factor', type=float, default=0.2,
-                             help='Size of variance to show as an std factor')
-    main.parser.add_argument('--scatter-plot', action='store_true')
-    data = main.run(argv=sys.argv[2:])
-
-    assert main.args is not None
-    args = main.args
+    main_delay = MainGetDataLatencies()
+    main_delay.parser.add_argument(
+        '--std-factor', type=float, default=0.2, help='Size of variance to show as an std factor')
+    main_delay.parser.add_argument('--scatter-plot', action='store_true')
+    data = main_delay.run(argv=sys.argv[2:])
+
+    assert main_delay.args is not None
+    args = main_delay.args
     std_factor = args.std_factor
 
     fig, ax = plt.subplots()
@@ -66,7 +66,7 @@
                         color='#00F5')
 
     ax.set_xlabel('Virtual time')
-    ax.set_ylabel('Average Packet Latency')
+    ax.set_ylabel('Packet Latency' if args.scatter_plot else 'Average Packet Latency')
     ax.yaxis.set_major_formatter(time_formatter_ns)
     ax.xaxis.set_major_formatter(time_formatter_ns)
 

From 97830b4443ae322a0e978b768cdd46a899cabbd1 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 27 Nov 2023 14:44:51 -0500
Subject: [PATCH 068/188] Allowing to disable torch if library is present

---
 CMakeLists.txt | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 048cb9d0..20afe8d1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -120,13 +120,21 @@ endif()
 # endif()
 
 ## TORCH loading ML models
-find_package(Torch)
-if(Torch_FOUND)
-    add_definitions(-DUSE_TORCH)
-    set(USE_TORCH true)
+if((NOT DEFINED USE_TORCH) OR USE_TORCH)
+    find_package(Torch)
+    if(Torch_FOUND)
+        set(CMAKE_CXX_STANDARD 17)
+        add_definitions(-DUSE_TORCH)
+        set(USE_TORCH true)
+        message(STATUS "Loading TORCH models enabled.")
+    else()
+        set(USE_TORCH false)
+        message(STATUS "Torch library not found. Loading TORCH models disabled.")
+    endif()
+else()
+    message(STATUS "Loading TORCH models NOT enabled.")
 endif()
 
-
 cmake_print_variables(CMAKE_C_FLAGS)
 add_subdirectory(src)
 

From f924aca97c3a23473d86e72e91960d1d3b2239a6 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 27 Nov 2023 18:22:57 -0500
Subject: [PATCH 069/188] Quick (and good enough) fix to keep the buffer at the
 terminal small

Without this fix, the latency for some packets (especially when the
network is being flooded with too many packets at once) will register
incorrectly, and thus the surrogate will fail to produce decent results.
---
 src/networks/model-net/dragonfly-dally.C | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index ff85fe35..edb6e409 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -4678,7 +4678,11 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message *
         bf->c4 = 1;
         s->in_send_loop[msg->rail_id] = 0;
     }
-    if(s->issueIdle[msg->rail_id]) {
+    // TODO (elkin): The check on vcg only properly works for `num_qos_levels == 1`. Ideally, we should be checking if there is enough
+    // space for the next packet in the queue (the packet determines in which queue it's going to be injected), but that is not
+    // possible, because we only know the queue at `packet_generate`. This might not present a big problem for most applications but
+    // those that are fed at a rate higher than what they can process can see the queue, potentially, grow very large.
+    if(s->issueIdle[msg->rail_id] && s->terminal_length[msg->rail_id][vcg] < s->params->cn_vc_size) {
         bf->c5 = 1;
         s->issueIdle[msg->rail_id] = 0;
         model_net_method_idle_event2(injection_ts, 0, msg->rail_id, lp);

From d3b192ba8223b2542bb62462badc597552d134f8 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 28 Nov 2023 16:53:12 -0500
Subject: [PATCH 070/188] Forcing variable to not be optimized out

A variable was declared and defined under a scope that was disregarded
later on when the variable was used again.

This expression was never executed:

> oc_params.nprocs = num_traces_of_job[lid.job];

because `oc_params.nprocs` was never executed. The line:

> params = (char*)&oc_params;

after was copying the `oc_params` variable into a new variable to be
used later. `oc_params` is not guaranteed to exist after the scope
closes, and this would provoke a segfault.
---
 codes/codes-workload.h                        |  4 ++--
 src/network-workloads/model-net-mpi-replay.c  | 24 +++++++------------
 src/workload/codes-workload.c                 |  2 +-
 src/workload/methods/codes-checkpoint-wrkld.c |  4 ++--
 .../methods/codes-conc-online-comm-wrkld.C    |  2 +-
 .../methods/codes-darshan3-io-wrkld.c         |  4 ++--
 .../methods/codes-dumpi-trace-nw-wrkld.c      |  4 ++--
 src/workload/methods/codes-iolang-wrkld.c     |  4 ++--
 src/workload/methods/codes-iomock-wrkld.c     |  2 +-
 .../methods/codes-online-comm-wrkld.C         |  2 +-
 .../methods/codes-recorder-io-wrkld.c         |  4 ++--
 src/workload/methods/test-workload-method.c   |  4 ++--
 12 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/codes/codes-workload.h b/codes/codes-workload.h
index e97ec88c..2361ac4b 100644
--- a/codes/codes-workload.h
+++ b/codes/codes-workload.h
@@ -302,7 +302,7 @@ void codes_workload_free_config_return(codes_workload_config_return *c);
  */
 int codes_workload_load(
         const char* type,
-        const char* params,
+        const void* params,
         int app_id,
         int rank);
 
@@ -361,7 +361,7 @@ struct codes_workload_method
     void * (*codes_workload_read_config) (
             ConfigHandle *handle, char const * section_name,
             char const * annotation, int num_ranks);
-    int (*codes_workload_load)(const char* params, int app_id, int rank);
+    int (*codes_workload_load)(const void* params, int app_id, int rank);
     void (*codes_workload_get_next)(int app_id, int rank, struct codes_workload_op *op);
     void (*codes_workload_get_next_rc2)(int app_id, int rank);
     int (*codes_workload_get_rank_cnt)(const char* params, int app_id);
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index c5fe93a7..1433b2a3 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -77,7 +77,7 @@ static tw_stime mean_interval = 100000;
 static int payload_sz = 1024;
 
 /* Doing LP IO*/
-static char * params = NULL;
+static void * params = NULL;
 static char lp_io_dir[256] = {'\0'};
 static char sampling_dir[32] = {'\0'};
 static char mpi_msg_dir[32] = {'\0'};
@@ -2414,6 +2414,8 @@ void nw_test_init(nw_state* s, tw_lp* lp)
    assert(num_net_traces <= num_mpi_lps);
 
    struct codes_jobmap_id lid;
+   online_comm_params oc_params;
+   dumpi_trace_params params_d;
 
    if(alloc_spec)
    {
@@ -2443,14 +2445,10 @@ void nw_test_init(nw_state* s, tw_lp* lp)
    s->known_completed_jobs = calloc(num_jobs, sizeof(int));
 
    if (strcmp(workload_type, "dumpi") == 0){
-       dumpi_trace_params params_d;
        strcpy(params_d.file_name, file_name_of_job[lid.job]);
        params_d.num_net_traces = num_traces_of_job[lid.job];
-       params_d.nprocs = nprocs; 
-       params = (char*)&params_d;
-       strcpy(params_d.file_name, file_name_of_job[lid.job]);
-       params_d.num_net_traces = num_traces_of_job[lid.job];
-       params = (char*)&params_d;
+       params_d.nprocs = nprocs;
+       params = (void*)&params_d;
        strcpy(type_name, "dumpi-trace-workload");
 
        if(strlen(workloads_conf_file) > 0)
@@ -2466,9 +2464,7 @@ void nw_test_init(nw_state* s, tw_lp* lp)
 #endif
    }
    else if(strcmp(workload_type, "swm-online") == 0){
-           
-       online_comm_params oc_params;
-       
+
        if(strlen(workload_name) > 0)
        {
            strcpy(oc_params.workload_name, workload_name); 
@@ -2504,14 +2500,12 @@ void nw_test_init(nw_state* s, tw_lp* lp)
        /*TODO: nprocs is different for dumpi and online workload. for
         * online, it is the number of ranks to be simulated. */
        oc_params.nprocs = num_traces_of_job[lid.job]; 
-       params = (char*)&oc_params;
+       params = (void*)&oc_params;
        strcpy(type_name, "swm_online_comm_workload");
    }
    //Xin: add conceputual online workload
    else if(strcmp(workload_type, "conc-online") == 0){
-           
-       online_comm_params oc_params;
-       
+
        if(strlen(workload_name) > 0)
        {
            strcpy(oc_params.workload_name, workload_name); 
@@ -2524,7 +2518,7 @@ void nw_test_init(nw_state* s, tw_lp* lp)
         * online, it is the number of ranks to be simulated. */
        // printf("conc-online num_traces_of_job %d\n", num_traces_of_job[lid.job]);
        oc_params.nprocs = num_traces_of_job[lid.job]; 
-       params = (char*)&oc_params;
+       params = (void*)&oc_params;
        strcpy(type_name, "conc_online_comm_workload");
    }
 
diff --git a/src/workload/codes-workload.c b/src/workload/codes-workload.c
index a7f96506..aec7108e 100644
--- a/src/workload/codes-workload.c
+++ b/src/workload/codes-workload.c
@@ -174,7 +174,7 @@ void codes_workload_free_config_return(codes_workload_config_return *c)
 
 int codes_workload_load(
         const char* type,
-        const char* params,
+        const void* params,
         int app_id,
         int rank)
 {
diff --git a/src/workload/methods/codes-checkpoint-wrkld.c b/src/workload/methods/codes-checkpoint-wrkld.c
index 672f9bbc..82e5fdb4 100644
--- a/src/workload/methods/codes-checkpoint-wrkld.c
+++ b/src/workload/methods/codes-checkpoint-wrkld.c
@@ -31,7 +31,7 @@ static void * checkpoint_workload_read_config(
         char const * section_name,
         char const * annotation,
         int num_ranks);
-static int checkpoint_workload_load(const char* params, int app_id, int rank);
+static int checkpoint_workload_load(const void* params, int app_id, int rank);
 static void checkpoint_workload_get_next(int app_id, int rank, struct codes_workload_op *op);
 static void checkpoint_workload_get_next_rc2(int app_id, int rank);
 
@@ -112,7 +112,7 @@ static void * checkpoint_workload_read_config(
     return p;
 }
 
-static int checkpoint_workload_load(const char* params, int app_id, int rank)
+static int checkpoint_workload_load(const void* params, int app_id, int rank)
 {
     checkpoint_wrkld_params *c_params = (checkpoint_wrkld_params *)params;
     struct checkpoint_state* new_state;
diff --git a/src/workload/methods/codes-conc-online-comm-wrkld.C b/src/workload/methods/codes-conc-online-comm-wrkld.C
index d341cb7a..7cb36466 100644
--- a/src/workload/methods/codes-conc-online-comm-wrkld.C
+++ b/src/workload/methods/codes-conc-online-comm-wrkld.C
@@ -1801,7 +1801,7 @@ static void workload_caller(void * arg)
     }
 }
 
-static int comm_online_workload_load(const char * params, int app_id, int rank)
+static int comm_online_workload_load(const void * params, int app_id, int rank)
 {
     /* LOAD parameters from JSON file*/
     online_comm_params * o_params = (online_comm_params*)params;
diff --git a/src/workload/methods/codes-darshan3-io-wrkld.c b/src/workload/methods/codes-darshan3-io-wrkld.c
index 9e0d60d6..5fc924c1 100644
--- a/src/workload/methods/codes-darshan3-io-wrkld.c
+++ b/src/workload/methods/codes-darshan3-io-wrkld.c
@@ -53,7 +53,7 @@ static void * darshan_io_workload_read_config(
         char const * annotation,
         int num_ranks);
 /* Darshan workload generator's implementation of the CODES workload API */
-static int darshan_psx_io_workload_load(const char *params, int app_id, int rank);
+static int darshan_psx_io_workload_load(const void *params, int app_id, int rank);
 static void darshan_psx_io_workload_get_next(int app_id, int rank, struct codes_workload_op *op);
 static int darshan_psx_io_workload_get_rank_cnt(const char *params, int app_id);
 static int darshan_rank_hash_compare(void *key, struct qhash_head *link);
@@ -179,7 +179,7 @@ static int darshan_psx_io_workload_get_time(const char *params, int app_id, int
 }
 
 /* load the workload generator for this rank, given input params */
-static int darshan_psx_io_workload_load(const char *params, int app_id, int rank)
+static int darshan_psx_io_workload_load(const void *params, int app_id, int rank)
 {
     darshan_params *d_params = (darshan_params *)params;
     darshan_fd logfile_fd = NULL;
diff --git a/src/workload/methods/codes-dumpi-trace-nw-wrkld.c b/src/workload/methods/codes-dumpi-trace-nw-wrkld.c
index f32291da..e6710f52 100644
--- a/src/workload/methods/codes-dumpi-trace-nw-wrkld.c
+++ b/src/workload/methods/codes-dumpi-trace-nw-wrkld.c
@@ -119,7 +119,7 @@ static inline double time_to_ns_lf(dumpi_clock t){
 }*/
 
 /* load the trace */
-static int dumpi_trace_nw_workload_load(const char* params, int app_id, int rank);
+static int dumpi_trace_nw_workload_load(const void* params, int app_id, int rank);
 
 /* dumpi implementation of get next operation in the workload */
 static void dumpi_trace_nw_workload_get_next(int app_id, int rank, struct codes_workload_op *op);
@@ -770,7 +770,7 @@ static int hash_rank_compare(void *key, struct qhash_head *link)
     return 0;
 }
 
-int dumpi_trace_nw_workload_load(const char* params, int app_id, int rank)
+int dumpi_trace_nw_workload_load(const void* params, int app_id, int rank)
 {
 	libundumpi_callbacks callbacks;
 	libundumpi_cbpair callarr[DUMPI_END_OF_STREAM];
diff --git a/src/workload/methods/codes-iolang-wrkld.c b/src/workload/methods/codes-iolang-wrkld.c
index 45c647b1..c4920571 100644
--- a/src/workload/methods/codes-iolang-wrkld.c
+++ b/src/workload/methods/codes-iolang-wrkld.c
@@ -27,7 +27,7 @@ static void * iolang_io_workload_read_config(
         int num_ranks);
 
 /* load the workload file */
-static int iolang_io_workload_load(const char* params, int app_id, int rank);
+static int iolang_io_workload_load(const void* params, int app_id, int rank);
 
 /* get next operation */
 static void iolang_io_workload_get_next(int app_id, int rank, struct codes_workload_op *op);
@@ -87,7 +87,7 @@ static void * iolang_io_workload_read_config(
 }
 
 /* loads the workload file for each simulated MPI rank/ compute node LP */
-int iolang_io_workload_load(const char* params, int app_id, int rank)
+int iolang_io_workload_load(const void* params, int app_id, int rank)
 {
     int t = -1;
     iolang_params* i_param = (struct iolang_params*)params;
diff --git a/src/workload/methods/codes-iomock-wrkld.c b/src/workload/methods/codes-iomock-wrkld.c
index 4c9ebc80..90ee6a99 100644
--- a/src/workload/methods/codes-iomock-wrkld.c
+++ b/src/workload/methods/codes-iomock-wrkld.c
@@ -132,7 +132,7 @@ static void * iomock_workload_read_config(
 }
 
 /* load the workload file */
-static int iomock_workload_load(const char* params, int app_id, int rank)
+static int iomock_workload_load(const void* params, int app_id, int rank)
 {
     iomock_params const * p = (iomock_params const *) params;
 
diff --git a/src/workload/methods/codes-online-comm-wrkld.C b/src/workload/methods/codes-online-comm-wrkld.C
index 0ac3e318..ca6978c6 100644
--- a/src/workload/methods/codes-online-comm-wrkld.C
+++ b/src/workload/methods/codes-online-comm-wrkld.C
@@ -933,7 +933,7 @@ string get_default_path(online_comm_params * o_params)
 }
 
 
-static int comm_online_workload_load(const char * params, int app_id, int rank)
+static int comm_online_workload_load(const void * params, int app_id, int rank)
 {
     /* LOAD parameters from JSON file*/
     online_comm_params * o_params = (online_comm_params*)params;
diff --git a/src/workload/methods/codes-recorder-io-wrkld.c b/src/workload/methods/codes-recorder-io-wrkld.c
index 0f88408e..d6c76a6b 100644
--- a/src/workload/methods/codes-recorder-io-wrkld.c
+++ b/src/workload/methods/codes-recorder-io-wrkld.c
@@ -53,7 +53,7 @@ struct rank_traces_context
 };
 
 /* CODES workload API functions for workloads generated from recorder traces*/
-static int recorder_io_workload_load(const char *params, int app_id, int rank);
+static int recorder_io_workload_load(const void *params, int app_id, int rank);
 static void recorder_io_workload_get_next(int app_id, int rank, struct codes_workload_op *op);
 
 /* helper functions for recorder workload CODES API */
@@ -73,7 +73,7 @@ static struct qhash_table *rank_tbl = NULL;
 static int rank_tbl_pop = 0;
 
 /* load the workload generator for this rank, given input params */
-static int recorder_io_workload_load(const char *params, int app_id, int rank)
+static int recorder_io_workload_load(const void *params, int app_id, int rank)
 {
     recorder_params *r_params = (recorder_params *) params;
     struct rank_traces_context *newv = NULL;
diff --git a/src/workload/methods/test-workload-method.c b/src/workload/methods/test-workload-method.c
index 5081d1c3..c0659982 100644
--- a/src/workload/methods/test-workload-method.c
+++ b/src/workload/methods/test-workload-method.c
@@ -14,7 +14,7 @@
 #include "ross.h"
 #include "codes/codes-workload.h"
 
-static int test_workload_load(const char* params, int app_id, int rank);
+static int test_workload_load(const void* params, int app_id, int rank);
 static void test_workload_get_next(int app_id, int rank, struct codes_workload_op *op);
 
 /* state information for each rank that is retrieving requests */
@@ -38,7 +38,7 @@ struct codes_workload_method test_workload_method =
     .codes_workload_get_next = test_workload_get_next,
 };
 
-static int test_workload_load(const char* params, int app_id, int rank)
+static int test_workload_load(const void* params, int app_id, int rank)
 {
     /* no params in this case; this example will work with any number of
      * ranks

From 3f63c9c580e576198adff1019a43c1f2857f69f7 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 30 Nov 2023 15:40:48 -0500
Subject: [PATCH 071/188] Fixed little bug on next in queue time

The calculation should be done at event processing not at event commit
time.
---
 src/networks/model-net/dragonfly-dally.C | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index edb6e409..08c7da42 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -3218,7 +3218,7 @@ static void terminal_commit_packet_generate(terminal_state * s, tw_bf * bf, term
         remote_data = malloc(msg->remote_event_size_bytes);
         memcpy(remote_data, model_net_method_get_edata(DRAGONFLY_DALLY, msg), msg->remote_event_size_bytes);
     }
-    double const processing_packet_delay = s->last_in_queue_time - msg->saved_last_in_queue_time;
+    double const processing_packet_delay = msg->saved_next_packet_delay;
 
     // TODO (elkin): In the future, this ugly initialization could be done all in a single "line" instead of setting all values one by one. The reason to do it this way is because some old compilers do not understand other ways of initializing
     struct packet_sent sent;
@@ -4309,7 +4309,8 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
     msg->my_hops_cur_group = 0;
 
     //assert(tw_now(lp) == msg->travel_start_time);
-    // This is to be later used to determine 
+    // This in here is NOT next_packet_delay but processing packet delay!!
+    msg->saved_next_packet_delay = tw_now(lp) - s->last_in_queue_time;
     msg->saved_last_in_queue_time = s->last_in_queue_time;
     s->last_in_queue_time = tw_now(lp);
 

From 8e0f4501acbefb3682e1a94238ef00cd957623a5 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 11 Dec 2023 09:16:18 -0500
Subject: [PATCH 072/188] Partial fix for progressive adaptive's algo

During some specific circumstances, like when running an example of the
synthetic workload "nearest neighbors", there will be packets that are
never transmitted to their destination. This patch fixes that case but:

- Does not work for when QoS is enabled, and
- Fails to run in parallel
---
 src/networks/model-net/dragonfly-dally.C | 28 ++++++++++--------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 08c7da42..90d980f9 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -2748,7 +2748,7 @@ static int get_next_vcg(terminal_state * s, tw_bf * bf, terminal_dally_message *
     }
     /* TODO: If none of the vcg is exceeding bandwidth limit then select high
     * priority traffic first. */
-    if(BW_MONITOR == 1)
+    if(BW_MONITOR == 1 && num_qos_levels > 1)
     {
         for(int i = 0; i < num_qos_levels; i++)
         {
@@ -2792,7 +2792,7 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess
     int chunk_size = s->params->chunk_size;
     int bw_consumption[num_qos_levels];
     /* First make sure the bandwidth consumptions are up to date. */
-    if(BW_MONITOR == 1)
+    if(BW_MONITOR == 1 && num_qos_levels > 1)
     {
         for(int k = 0; k < num_qos_levels; k++)
         {
@@ -2832,25 +2832,21 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess
     }
         
     /* All vcgs are exceeding their bandwidth limits*/
-    msg->last_saved_qos = s->last_qos_lvl[output_port];
-    int next_rr_vcg = (s->last_qos_lvl[output_port] + 1) % num_qos_levels;
+    msg->last_saved_qos = s->last_qos_lvl[output_port]; // last_qos_lvl stores a vc# not a qos# for routers. Terminals store qos#
+    //int next_rr_vcg = (s->last_qos_lvl[output_port] + 1) % num_qos_levels;
+    int next_rr_vc = (s->last_qos_lvl[output_port] + 1) % s->params->num_vcs;
 
-    for(int i = 0; i < num_qos_levels; i++)
+    for(int i = 0; i < s->params->num_vcs; i++)
     {
-        base_limit = next_rr_vcg * vcs_per_qos; 
-        for(int k = base_limit; k < base_limit + vcs_per_qos; k++)
+        if(s->pending_msgs[output_port][next_rr_vc] != NULL)
         {
-            if(s->pending_msgs[output_port][k] != NULL)
-            {
-                if(msg->last_saved_qos < 0)
-                    msg->last_saved_qos = s->last_qos_lvl[output_port]; 
+            if(msg->last_saved_qos < 0)
+                msg->last_saved_qos = s->last_qos_lvl[output_port];
 
-                s->last_qos_lvl[output_port] = next_rr_vcg;
-                return k;
-            }
+            s->last_qos_lvl[output_port] = next_rr_vc;
+            return next_rr_vc;
         }
-        next_rr_vcg = (next_rr_vcg + 1) % num_qos_levels;
-        assert(next_rr_vcg < 2);
+        next_rr_vc = (next_rr_vc + 1) % s->params->num_vcs;
     }
     return -1;
 }

From 3d1f29c5ada3ee14a306454f78ba2b19bf340174 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 13 Dec 2023 12:08:54 -0500
Subject: [PATCH 073/188] Tweaking the plotting script for packet-latency

---
 .../python-scripts/plot-packet-latency.py     | 34 +++++++++++--------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py b/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
index 21edae8a..67c41326 100644
--- a/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
+++ b/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py
@@ -114,6 +114,7 @@
     parser.add_argument('--started-tracking', type=float, default=2e6)
     parser.add_argument('--switch', type=float, default=3e6)
     parser.add_argument('--switch-back', type=float, default=8e6)
+    parser.add_argument('--no-show-legend', dest='show_legend', action='store_false')
     args = parser.parse_args(sys.argv[2:])
 
     std_factor = args.std_factor
@@ -176,34 +177,39 @@
 
     middle = (args.switch + args.switch_back) / 2
     arrow_color = {'arrowprops': dict(arrowstyle="->", color='#AAA'), 'color': '#333'}
-    ax.annotate("", xy=(args.started_tracking * .95, 80e3),
-                xytext=(args.started_tracking * .6, height_plot*.3), **arrow_color)
+    ax.annotate("", xy=(args.started_tracking * .95, height_plot*.03),
+                xytext=(args.started_tracking * .6, height_plot*.1), **arrow_color)
     ax.annotate("switch", xy=(args.switch*1.04, height_plot*.03),
                 xytext=(middle, height_plot*.08), **arrow_color)
     ax.annotate("", xy=(args.switch_back * 0.96, height_plot*.03),
                 xytext=(middle, height_plot*.08), **arrow_color)
-    ax.text(args.started_tracking * .9, height_plot*.3, "start\ntracking", color='#333', ha='right')
+    ax.text(args.started_tracking * .9, height_plot*.1, "start\ntracking", color='#333', ha='right')
 
-    ax.text(args.started_tracking, height_plot, "start latency tracking", color='#333', rotation=40,
-            rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
-    ax.text(args.switch, height_plot, "switch to surrogate", color='#333', rotation=40,
-            rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
-    ax.text(args.switch_back, 1.03 * height_plot, "switch to\nhigh-definition", color='#333',
-            rotation=40, rotation_mode='anchor', horizontalalignment='left',
-            verticalalignment='center')
+    # ax.text(args.started_tracking, height_plot, "start latency tracking", color='#333',
+    #         rotation=40, rotation_mode='anchor', horizontalalignment='left',
+    #         verticalalignment='center')
+    # ax.text(args.switch, height_plot, "switch to surrogate", color='#333', rotation=40,
+    #         rotation_mode='anchor', horizontalalignment='left', verticalalignment='center')
+    # ax.text(args.switch_back, 1.03 * height_plot, "switch to\nhigh-definition", color='#333',
+    #         rotation=40, rotation_mode='anchor', horizontalalignment='left',
+    #         verticalalignment='center')
 
     ax.set_xlabel('Virtual time')
     ax.set_ylabel('Average Packet Latency')
     # ax.set_ylim(0, 122e3)
-    # ax.legend(bbox_to_anchor=(.54, .02), loc='lower center', borderaxespad=0)
+    if args.show_legend:
+        ax.legend(bbox_to_anchor=(.54, .02), loc='lower center', borderaxespad=0)
     ax.yaxis.set_major_formatter(time_formatter_ns)
     ax.xaxis.set_major_formatter(time_formatter_ns)
 
-    n = means_hf[80:].shape[0]
+    # Finding when we switch back to high-fidelity from surrogate
+    cut_back = np.abs(windows_hf - args.switch_back).argmin() + 1
+
+    n = means_hf[cut_back:].shape[0]
     mse_hybrid_lite = \
-        np.sum((means_hf[80:] - means_hybrid_lite[80:])**2) / n
+        np.sum((means_hf[cut_back:] - means_hybrid_lite[cut_back:])**2) / n
     mse_hybrid = \
-        np.sum((means_hf[80:] - means_hybrid[80:])**2) / n
+        np.sum((means_hf[cut_back:] - means_hybrid[cut_back:])**2) / n
     print("Mean squared error (MSE) for hybrid:", mse_hybrid, "ns^2")
     print("Mean squared error (MSE) for hybrid-lite:", mse_hybrid_lite, "ns^2")
 

From 98aba5e10ad767c94f4ca6e1ca3aeddab52a9148 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 12 Jan 2024 16:13:45 -0500
Subject: [PATCH 074/188] Fixing reverse computation VC patch

From 8b33e80f1f087e48a463395cf90d92c066cc9421 Mon Sep 17 00:00:00 2001
From: "Kevin A. Brown" <kabrown@anl.gov>
Date: Fri, 15 Dec 2023 21:15:29 +0000
Subject: [PATCH] dfdally: partial vc starvation fix for tomacs paper
---
 src/networks/model-net/dragonfly-dally.C | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 90d980f9..25bccfe6 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -2748,7 +2748,7 @@ static int get_next_vcg(terminal_state * s, tw_bf * bf, terminal_dally_message *
     }
     /* TODO: If none of the vcg is exceeding bandwidth limit then select high
     * priority traffic first. */
-    if(BW_MONITOR == 1 && num_qos_levels > 1)
+    if(BW_MONITOR == 1)
     {
         for(int i = 0; i < num_qos_levels; i++)
         {
@@ -2840,9 +2840,6 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess
     {
         if(s->pending_msgs[output_port][next_rr_vc] != NULL)
         {
-            if(msg->last_saved_qos < 0)
-                msg->last_saved_qos = s->last_qos_lvl[output_port];
-
             s->last_qos_lvl[output_port] = next_rr_vc;
             return next_rr_vc;
         }
@@ -6276,6 +6273,7 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m
         }
         return;  
     }
+    s->last_qos_lvl[output_port] = msg->last_saved_qos;
 
     int output_chan = msg->saved_channel;
     if(bf->c8)
@@ -6560,7 +6558,23 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes
     s->next_output_available_time[output_port] -= s->params->router_delay;
     injection_ts -= s->params->router_delay;
 
-    int next_output_chan = get_next_router_vcg(s, bf, msg, lp); 
+    int next_output_chan = -1;
+    int base_limit = 0;
+    int vcs_per_qos = s->params->num_vcs / num_qos_levels;
+    for(int i = 0; i < num_qos_levels; i++)
+    {
+        base_limit = i * vcs_per_qos;
+        for(int k = base_limit; k < base_limit + vcs_per_qos; k ++)
+        {
+            if(s->pending_msgs[output_port][k] != NULL)
+            {
+                next_output_chan = k;
+                break;
+            }
+        }
+        if(next_output_chan >= 0)
+            break;
+    }
 
     if(next_output_chan < 0)
     {

From 4fcda47d792da6c619e94e9e0a50f673e105bfe9 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 8 Feb 2024 15:38:23 -0500
Subject: [PATCH 075/188] Updating port-occupancy script

---
 doc/example/CMakeLists.txt                    |  1 +
 .../python-scripts/port-occupancy.py          | 31 ++++++++++++++++---
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/doc/example/CMakeLists.txt b/doc/example/CMakeLists.txt
index 49451d91..f665d234 100644
--- a/doc/example/CMakeLists.txt
+++ b/doc/example/CMakeLists.txt
@@ -20,6 +20,7 @@ set(CHUNK_SIZE "64")
 set(NETWORK_TREATMENT "freeze")
 set(PACKET_LATENCY_TRACE_PATH "packet-latency-trace/")
 set(IGNORE_UNTIL "200e4")
+set(PREDICTOR_TYPE "average")
 string(REPLACE ${single_quote} ${double_quote} SWITCH_TIMESTAMPS "'1000e4', '8900e4'")
 configure_file(tutorial-ping-pong.conf.in tutorial-ping-pong.conf)
 configure_file(tutorial-ping-pong-surrogate.conf.in tutorial-ping-pong-surrogate.conf)
diff --git a/scripts/reproducibility-pads23/python-scripts/port-occupancy.py b/scripts/reproducibility-pads23/python-scripts/port-occupancy.py
index 9164a6f2..88a045f3 100644
--- a/scripts/reproducibility-pads23/python-scripts/port-occupancy.py
+++ b/scripts/reproducibility-pads23/python-scripts/port-occupancy.py
@@ -34,8 +34,9 @@ def load_aggregated_utilization(filename: str | pathlib.Path) -> tuple[array_typ
 if __name__ == '__main__':
     this_binary = sys.argv[0]
     commands = {
-        'singleplot': 'Displays port occupancy plot (needs full path for csv)',
-        'pads23': 'Generates plot that appears on PADS23 paper'
+        'singleplot': 'Displays port occupancy plot (needs full path of csv)',
+        'multipleplot': 'Displays port occupancy plot (needs full path of csv\'s)',
+        'pads23': 'Generates plot that appears on PADS23 paper',
     }
     parser = argparse.ArgumentParser(
         usage=f'{this_binary} <command> [<args>]\n\n'
@@ -78,7 +79,29 @@ def load_aggregated_utilization(filename: str | pathlib.Path) -> tuple[array_typ
     ax.xaxis.set_major_formatter(time_formatter_ns)
     ax.yaxis.set_major_formatter(bytes_formater)
 
-    plt.show()  # type: ignore
+    plt.show()
+
+
+if main_args.command == 'multipleplot':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--csv', type=pathlib.Path,
+                        help='Buffer occupancy CSV results (multiple csvs are possible)',
+                        action='append', required=True)
+    args = parser.parse_args(sys.argv[2:])
+
+    # plotting
+    fig, ax = plt.subplots(figsize=(7, 3.8))
+
+    for csv in args.csv:
+        ts, utilization_hf = load_aggregated_utilization(csv)
+        ax.plot(ts, utilization_hf, label="high-fidelity")
+
+    ax.set_xlabel('Virtual time')
+    ax.set_ylabel('Total Buffer Port Occupancy')
+    ax.xaxis.set_major_formatter(time_formatter_ns)
+    ax.yaxis.set_major_formatter(bytes_formater)
+
+    plt.show()
 
 
 if main_args.command == 'pads23':
@@ -173,4 +196,4 @@ def load_aggregated_utilization(filename: str | pathlib.Path) -> tuple[array_typ
         plt.savefig(f'{args.output}.pgf', bbox_inches='tight')
         plt.savefig(f'{args.output}.pdf', bbox_inches='tight')
     else:
-        plt.show()  # type: ignore
+        plt.show()

From 94ae872107fc96d2f73f463d27b6ae32a55de37e Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 12 Feb 2024 18:48:17 -0500
Subject: [PATCH 076/188] Renaming variable to avoid confusion

---
 src/networks/model-net/core/model-net-lp.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index e1b2e4e7..3ff97f37 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -21,7 +21,7 @@
 int model_net_base_magic;
 int mn_sample_enabled = 0;
 
-static int is_surrogate_on = false;
+static int is_freezing_on = false;
 
 // message-type specific offsets - don't want to get bitten later by alignment
 // issues...
@@ -577,7 +577,7 @@ void model_net_base_event(
 
     assert(m->h.magic == model_net_base_magic);
 
-    if(!is_surrogate_on && m->h.event_type == MN_BASE_SCHED_NEXT && m->msg.m_base.created_in_surrogate) {
+    if(!is_freezing_on && m->h.event_type == MN_BASE_SCHED_NEXT && m->msg.m_base.created_in_surrogate) {
         return;
     }
 
@@ -624,7 +624,7 @@ void model_net_base_event_rc(
         tw_lp * lp){
     assert(m->h.magic == model_net_base_magic);
 
-    if(!is_surrogate_on && m->h.event_type == MN_BASE_SCHED_NEXT && m->msg.m_base.created_in_surrogate) {
+    if(!is_freezing_on && m->h.event_type == MN_BASE_SCHED_NEXT && m->msg.m_base.created_in_surrogate) {
         return;
     }
 
@@ -1043,7 +1043,7 @@ void model_net_method_idle_event2(tw_stime offset_ts, int is_recv_queue,
             &m_wrap->h);
     m_wrap->msg.m_base.is_from_remote = is_recv_queue;
     r_wrap->queue_offset = queue_offset;
-    m_wrap->msg.m_base.created_in_surrogate = is_surrogate_on;
+    m_wrap->msg.m_base.created_in_surrogate = is_freezing_on;
     tw_event_send(e);
 }
 
@@ -1118,11 +1118,11 @@ tw_event* model_net_method_congestion_event(tw_lpid dest_gid,
 }
 
 void model_net_method_switch_to_surrogate(void) {
-    is_surrogate_on = true;
+    is_freezing_on = true;
 }
 
 void model_net_method_switch_to_highdef(void) {
-    is_surrogate_on = false;
+    is_freezing_on = false;
 }
 
 void model_net_method_switch_to_surrogate_lp(tw_lp * lp) {

From 6a67816c7b25fba8c8756f5cdac7fd34777d3eb2 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 26 Feb 2024 18:42:50 -0500
Subject: [PATCH 077/188] Refactoring director function to generalize (first
 step to define API for director function)

---
 .gitignore               |   2 +-
 codes/surrogate/switch.h |   2 +-
 src/surrogate/init.c     |   2 +-
 src/surrogate/switch.c   | 175 ++++++++++++++++++++++-----------------
 4 files changed, 102 insertions(+), 79 deletions(-)

diff --git a/.gitignore b/.gitignore
index 51a9c2eb..70223fe9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -40,4 +40,4 @@ ross.csv
 install-mastiff/include/codes/model-net-method.h
 
 # commonly used building folder
-/build/
+/build*/
diff --git a/codes/surrogate/switch.h b/codes/surrogate/switch.h
index 553f3a11..3a56360a 100644
--- a/codes/surrogate/switch.h
+++ b/codes/surrogate/switch.h
@@ -61,7 +61,7 @@ extern struct switch_at_struct switch_at;
 
 
 // Switch
-void director_switch(tw_pe * pe, tw_event_sig gvt_sig);
+void director_call(tw_pe * pe, tw_event_sig gvt_sig);
 
 #ifdef __cplusplus
 }
diff --git a/src/surrogate/init.c b/src/surrogate/init.c
index 79bb7e71..16772fc2 100644
--- a/src/surrogate/init.c
+++ b/src/surrogate/init.c
@@ -64,7 +64,7 @@ void surrogate_configure(
         PRINTF_ONCE("\n");
 
         // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT
-        g_tw_gvt_arbitrary_fun = director_switch;
+        g_tw_gvt_arbitrary_fun = director_call;
 
 #ifdef USE_RAND_TIEBREAKER
         tw_event_sig time_stamp = {0};
diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c
index 4b29ab18..32086f46 100644
--- a/src/surrogate/switch.c
+++ b/src/surrogate/switch.c
@@ -382,11 +382,92 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_stime gvt) {
 }
 
 
+// This is an impure function, calling it twice WILL give different results. Only call it once!
+bool hit_trigger(tw_stime gvt) {
+    if ( switch_at.current_i < switch_at.total
+        && g_tw_trigger_arbitrary_fun.active == ARBITRARY_FUN_triggered) {
+        double const switch_time = switch_at.time_stampts[switch_at.current_i];
+#ifdef USE_RAND_TIEBREAKER
+        assert(g_tw_trigger_arbitrary_fun.sig_at.recv_ts == switch_at.time_stampts[switch_at.current_i]);
+#else
+        assert(g_tw_trigger_arbitrary_fun.at == switch_at.time_stampts[switch_at.current_i]);
+#endif
+        assert(gvt >= switch_time);  // current gvt shouldn't be that far ahead from the point we wanted to trigger it
+
+        // Activating next switch
+        if (++switch_at.current_i < switch_at.total) {
+            double const next_switch = switch_at.time_stampts[switch_at.current_i];
+            // Setting trigger for next switch
+    #ifdef USE_RAND_TIEBREAKER
+            tw_event_sig time_stamp = {0};
+            time_stamp.recv_ts = next_switch;
+            //printf("Adding a trigger to activate next switch!\n");
+            tw_trigger_arbitrary_fun_at(time_stamp);
+    #else
+            //printf("Adding a trigger to activate next switch!\n");
+            tw_trigger_arbitrary_fun_at(next_switch);
+    #endif
+        }
+        //
+        return true;
+    } else {
+        return false;
+    }
+}
+
+
+#ifdef USE_RAND_TIEBREAKER
+void switch_model(tw_pe * pe, tw_event_sig gvt_sig) {
+#else
+void switch_model(tw_pe * pe, tw_stime gvt) {
+#endif
+    // Rollback if in optimistic mode
+#ifdef USE_RAND_TIEBREAKER
+    if (g_tw_synchronization_protocol == OPTIMISTIC) {
+        assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0);
+        rollback_and_cancel_events_pe(pe, gvt_sig);
+        //assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) <= 0);
+        assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0);
+    }
+#else
+    if (g_tw_synchronization_protocol == OPTIMISTIC) {
+        assert(pe->GVT == gvt);
+        rollback_and_cancel_events_pe(pe, gvt);
+        //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0);
+        assert(pe->GVT == gvt);
+    }
+#endif
+    surr_config.director.switch_surrogate();
+    if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
+        printf("Switching to %s\n", surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity");
+    }
+
+    // "Freezing" network events and activating LP's switch functions
+    if (freeze_network_on_switch) {
+        if (surr_config.director.is_surrogate_on()) {
+            model_net_method_switch_to_surrogate();
+#ifdef USE_RAND_TIEBREAKER
+            events_high_def_to_surrogate_switch(pe, gvt_sig);
+#else
+            events_high_def_to_surrogate_switch(pe, gvt);
+#endif
+        } else {
+            model_net_method_switch_to_highdef();
 #ifdef USE_RAND_TIEBREAKER
-void director_switch(tw_pe * pe, tw_event_sig gvt_sig) {
+            events_surrogate_to_high_def_switch(pe, gvt_sig);
+#else
+            events_surrogate_to_high_def_switch(pe, gvt);
+#endif
+        }
+    }
+}
+
+
+#ifdef USE_RAND_TIEBREAKER
+void director_call(tw_pe * pe, tw_event_sig gvt_sig) {
     tw_stime const gvt = gvt_sig.recv_ts;
 #else
-void director_switch(tw_pe * pe, tw_stime gvt) {
+void director_call(tw_pe * pe, tw_stime gvt) {
 #endif
     assert(is_surrogate_configured);
 
@@ -431,83 +512,24 @@ void director_switch(tw_pe * pe, tw_stime gvt) {
     }
 
     // Detecting if we are going to switch
-    if (switch_at.current_i < switch_at.total
-            && g_tw_trigger_arbitrary_fun.active == ARBITRARY_FUN_triggered) {
-        double const switch_time = switch_at.time_stampts[switch_at.current_i];
-#ifdef USE_RAND_TIEBREAKER
-        assert(g_tw_trigger_arbitrary_fun.sig_at.recv_ts == switch_at.time_stampts[switch_at.current_i]);
-#else
-        assert(g_tw_trigger_arbitrary_fun.at == switch_at.time_stampts[switch_at.current_i]);
-#endif
-        assert(gvt >= switch_time);  // current gvt shouldn't be that far ahead from the point we wanted to trigger it
-    } else {
+    if (! hit_trigger(gvt)) {
         return;
     }
-
     // ---- Past this means that we are in fact switching ----
+    bool const pre_switch_status = surr_config.director.is_surrogate_on();
 
-    double const start = tw_clock_read();
     // Asking the director/model to switch
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
         if (DEBUG_DIRECTOR == 2) {
             printf("\n");
         }
-        printf("Switching at %f", gvt);
-    }
-    // Rollback if in optimistic mode
-#ifdef USE_RAND_TIEBREAKER
-    if (g_tw_synchronization_protocol == OPTIMISTIC) {
-        assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0);
-        rollback_and_cancel_events_pe(pe, gvt_sig);
-        //assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) <= 0);
-        assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0);
-    }
-#else
-    if (g_tw_synchronization_protocol == OPTIMISTIC) {
-        assert(pe->GVT == gvt);
-        rollback_and_cancel_events_pe(pe, gvt);
-        //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0);
-        assert(pe->GVT == gvt);
-    }
-#endif
-    surr_config.director.switch_surrogate();
-    if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
-        printf(" to %s\n", surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity");
+        printf("Switching at %f\n", gvt);
     }
 
-    // "Freezing" network events and activating LP's switch functions
-    if (freeze_network_on_switch) {
-        if (surr_config.director.is_surrogate_on()) {
-            model_net_method_switch_to_surrogate();
-#ifdef USE_RAND_TIEBREAKER
-            events_high_def_to_surrogate_switch(pe, gvt_sig);
-#else
-            events_high_def_to_surrogate_switch(pe, gvt);
-#endif
-        } else {
-            model_net_method_switch_to_highdef();
-#ifdef USE_RAND_TIEBREAKER
-            events_surrogate_to_high_def_switch(pe, gvt_sig);
-#else
-            events_surrogate_to_high_def_switch(pe, gvt);
-#endif
-        }
-    }
-
-    // Activating next switch
-    if (++switch_at.current_i < switch_at.total) {
-        double const next_switch = switch_at.time_stampts[switch_at.current_i];
-        // Setting trigger for next switch
-#ifdef USE_RAND_TIEBREAKER
-        tw_event_sig time_stamp = {0};
-        time_stamp.recv_ts = next_switch;
-        //printf("Adding a trigger to activate next switch!\n");
-        tw_trigger_arbitrary_fun_at(time_stamp);
-#else
-        //printf("Adding a trigger to activate next switch!\n");
-        tw_trigger_arbitrary_fun_at(next_switch);
-#endif
-    }
+    double const start = tw_clock_read();
+    switch_model(pe, gvt_sig);
+    double const end = tw_clock_read();
+    surrogate_switching_time += end - start;
 
     if (DEBUG_DIRECTOR == 1 && g_tw_mynode == 0) {
         printf("Switch completed!\n");
@@ -515,17 +537,18 @@ void director_switch(tw_pe * pe, tw_stime gvt) {
     if (DEBUG_DIRECTOR > 1) {
         printf("PE %lu: Switch completed!\n", g_tw_mynode);
     }
-    double const end = tw_clock_read();
-    surrogate_switching_time += end - start;
 
     // Determining time in surrogate
-    if (surr_config.director.is_surrogate_on()) {
-        // Start tracking time spent in surrogate mode
-        surrogate_time_last = end;
-    } else {
-        // We are done tracking time spent in surrogate mode
-        time_in_surrogate += start - surrogate_time_last;
+    if (pre_switch_status != surr_config.director.is_surrogate_on()) {
+        if (surr_config.director.is_surrogate_on()) {
+            // Start tracking time spent in surrogate mode
+            surrogate_time_last = end;
+        } else {
+            // We are done tracking time spent in surrogate mode
+            time_in_surrogate += start - surrogate_time_last;
+        }
     }
 }
 //
 // === END OF Director functionality
+// vim: set tabstop=4 shiftwidth=4 expandtab :

From 9b32a71cf69a79e3a28cee0f02aecf41dd26d5e7 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 11 Apr 2024 12:42:09 -0400
Subject: [PATCH 078/188] Hardcoded example skipping iterations for TWO
 applications (MILC and Jacobi)

---
 .../print-iterations.py                       | 142 ++++++++++++++++++
 src/network-workloads/model-net-mpi-replay.c  |  63 +++++++-
 2 files changed, 197 insertions(+), 8 deletions(-)
 create mode 100644 scripts/workload-iteration-times/print-iterations.py

diff --git a/scripts/workload-iteration-times/print-iterations.py b/scripts/workload-iteration-times/print-iterations.py
new file mode 100644
index 00000000..a5e88010
--- /dev/null
+++ b/scripts/workload-iteration-times/print-iterations.py
@@ -0,0 +1,142 @@
+# Adapted from example from matplotlib lib
+
+from typing import Any, TextIO
+import argparse
+import pathlib
+
+import matplotlib.pyplot as plt
+import matplotlib
+import numpy as np
+
+
+def plot_sequence(ax: Any, seq: Any, names: Any, height: Any, color: str = 'red', print_names: bool = True):
+    ax.vlines(seq, 0, height, color=f"tab:{color}")  # The vertical stems.
+    ax.plot(seq, np.zeros_like(seq), "-o", color="k", markerfacecolor="w")
+    
+    # annotate lines
+    if print_names:
+        for d, h, r in zip(seq, height, names):
+            ax.annotate(r, xy=(d, h),
+                        xytext=(3, np.sign(h)*3), textcoords="offset points",
+                        horizontalalignment="right",
+                        verticalalignment="bottom" if h > 0 else "top")
+
+
+# hardcoded data
+def iterations_count_example():
+    iterations = np.array([5700202, 11141148, 16735521, 22248304, 28018657, 33344653, 39131394, 44535575, 49924184, 55265978, 60797003, 65999354, 71477966, 77089252, 82388323, 87510575, 92672984, 97968684, 103413575, 108791049, 114191370, 119281369, 124947369, 130269516, 135814413, 140706572, 146191543, 152244928, 157549505, 163252774])
+    names = np.arange(iterations.size)
+    # height = np.ones_like(names)
+    height = iterations.astype(np.float64)
+    height[1:] -= iterations[:-1]
+    # mean_height = height.mean()
+    # height /= mean_height
+    
+    iterations2 = np.array([4475938, 8527507, 12500772, 16932824, 21122232, 24629352, 28727112, 32812390, 37119760, 40873748, 44831210, 49236742, 53495581, 57186915, 61102874, 65089296, 69034116, 72827668, 77306215, 81505333, 84962239, 88817963, 92788913, 97258245, 101298185, 105234798, 109230081, 113176951, 117033360, 120922482, 125158680, 129445759, 132927795, 136967719, 140707240, 144980904, 148570317, 152949619, 157429076, 161858572, 165599534, 169169124, 172576205, 176267989, 179822127, 183531146, 187147511, 190685445, 194270774, 197863388, 201349592, 204959427, 208557228, 212286717, 215720477, 219201662, 222629090, 226452092, 230156036, 233856397, 237545455, 241265332, 245016561, 248662995, 252212229, 255620388, 259105490, 262543988, 266118703, 269713894, 273230378, 276923706, 280425248, 284046990, 287508037, 291266834, 294812966, 298512239, 302113836, 305636975, 309307151, 312842662, 316463094, 320055020, 323542940, 327139573, 330811189, 334388299, 337788549, 341498322, 345104703, 348880050, 352448690, 356106442, 359506153, 363094952, 366703208, 370233755, 373770752, 377222496])
+    names2 = np.arange(iterations2.size)
+    # height2 = -1 * np.ones_like(names2)
+    height2 = iterations2.astype(np.float64)
+    height2[1:] -= iterations2[:-1]
+    # height2 /= mean_height
+    height2 *= -1
+
+    return (iterations, names, height), (iterations2, names2, height2)
+
+
+# class JobAvgIterations(TypedDict):
+#     iterations: 
+
+
+# typing cannot be done for structured arrays :S
+def parse_iteration_log(log_file: TextIO):
+    log_pattern = r'ITERATION (\d+) node \d+ job (\d+) rank \d+ time (\d*\.?\d+)\n'
+    log_iters = np.fromregex(log_file, log_pattern, [('iter', np.int64), ('job', np.int64), ('time', np.float64)])
+
+    def get_avg_for_iters(job: np.int64):
+        def avg(it: np.int64) -> np.float64:
+            matched_iters = log_iters[np.bitwise_and(log_iters['job'] == job, log_iters['iter'] == it)]
+            return np.mean(matched_iters['time'].astype(np.float64))
+        return avg
+
+    jobs: dict[int, np.ndarray[Any, Any]] = {}
+    for job in np.unique(log_iters['job']):
+        iterations = np.unique(log_iters[log_iters['job'] == job]['iter'])
+        # avg_timestamp = np.vectorize(get_avg_for_iters(job), otypes=(np.float64,))(iterations)
+        avg_timestamp = np.array([get_avg_for_iters(job)(it) for it in iterations])
+        assert(iterations.size == avg_timestamp.size)
+
+        # finding time that each iteration took
+        avg_iter_time = avg_timestamp.copy()
+        avg_iter_time[1:] -= avg_timestamp[:-1]
+        # "removing" iterations for which we don't know how much they actually took
+        to_rem = iterations.copy()
+        to_rem[1:] -= to_rem[:-1] + 1
+        to_rem[0] = 0  # Assuming the first value hasn't been skipped
+        avg_iter_time[to_rem != 0] = 0
+
+        combined = np.zeros_like(iterations, dtype=[('iter', np.int64), ('time', np.float64), ('iter_time', np.float64)])
+        combined['iter'] = iterations
+        combined['time'] = avg_timestamp
+        combined['iter_time'] = avg_iter_time
+        jobs[int(job)] = combined
+
+    return jobs
+
+
+# if __name__ == "__main__":
+#     (iterations, names, height), (iterations2, names2, height2) = iterations_count_example()
+#     fig, ax = plt.subplots(figsize=(8.8, 4), layout="constrained")
+#     plot_sequence(ax, iterations, names, height, 'blue')
+#     plot_sequence(ax, iterations2, names2, height2, 'red')
+#     plt.setp(ax.get_xticklabels(), rotation=30, ha="right")
+#     plt.show()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    _ = parser.add_argument('file', type=argparse.FileType('r'))
+    _ = parser.add_argument('--output', type=pathlib.Path, help='Name of output figure', default=None)
+    _ = parser.add_argument('--no-iter-count', dest='iter_count', action='store_false')
+    args = parser.parse_args()
+
+    if args.output:
+        matplotlib.use("pgf")
+        matplotlib.rcParams.update({
+            "pgf.texsystem": "pdflatex",
+            'font.family': 'serif',
+            'font.size': 16,
+            'text.usetex': True,
+            'pgf.rcfonts': False,
+        })
+
+    parsed_logs = parse_iteration_log(args.file)
+
+    # Creating plot with data
+    fig, ax = plt.subplots(figsize=(8.8, 4), layout="constrained")
+    ax.set_xlabel("Total virtual time (ns)")
+    ax.set_ylabel("Virtual time per iteration (ns)")
+    #ax.set(title="")
+    smallest_timestamp = list(parsed_logs.values())[0]['time'][0]
+    ax.plot([0, smallest_timestamp], [0, 0], "-", color="k", markerfacecolor="w")
+
+    color_table = ['red', 'blue', 'green', 'black']
+    for i, job in enumerate(parsed_logs.keys()):
+        # Flipping second sequence if there are only two jobs
+        mul = -1 if len(parsed_logs) == 2 and i == 1 else 1
+        plot_sequence(
+            ax,
+            parsed_logs[job]['time'],
+            parsed_logs[job]['iter'],
+            mul * parsed_logs[job]['iter_time'],
+            color=color_table[i],
+            print_names=args.iter_count)
+    
+    plt.setp(ax.get_xticklabels(), rotation=30, ha="right")
+    
+    #ax.margins(y=0.1)
+    if args.output:
+        plt.tight_layout()
+        plt.savefig(f'{args.output}.pgf', bbox_inches='tight')
+        plt.savefig(f'{args.output}.pdf', bbox_inches='tight')
+    else:
+        plt.show()
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 1433b2a3..b58f572c 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -1117,24 +1117,60 @@ void arrive_syn_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp)
     }
 }
 
-void skip_iteration_rc(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) {
+// Surrogate switiching structure
+struct AvgSurrogateSwitchingTimesForApp {
+    int app_id;
+    int skip_at_iter;
+    int resume_at_iter;
+    double time_per_iter;
+    bool done[72]; // This is a flag to indicate whethe we already completed this skipping stage
+};
+
+static int iters_skipped(struct AvgSurrogateSwitchingTimesForApp * avgSur) {
+    return avgSur->resume_at_iter - avgSur->skip_at_iter;
+}
+
+static struct AvgSurrogateSwitchingTimesForApp skip_iter_config[] = {
+    // done, app_id, skip_at_iter, resume_at_iter, time_per_iter
+    {0,  3,  21, 14403235, {false}},
+    {1,  7,  59,  4982017, {false}},
+    {1, 79, 195,  3581337, {false}},
+};
+
+struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_state * s) {
+    int n_jumps = (sizeof(skip_iter_config)/sizeof(skip_iter_config[0]));
+    for (int i=0; i < n_jumps; i++) {
+        struct AvgSurrogateSwitchingTimesForApp * jump = &skip_iter_config[i];
+        if (!jump->done[s->local_rank] && jump->app_id == s->app_id) {
+            return jump;
+        }
+    }
+    return NULL;
+}
+
+static void skip_iteration_rc(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) {
     // TODO: implement!!
 }
 
-void skip_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m)
+static void skip_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m)
 {
 	struct codes_workload_op * mpi_op = (struct codes_workload_op*) malloc(sizeof(struct codes_workload_op));
     m->mpi_op = mpi_op;
 
-    // consuming all events until iteration 95 from iteration 4
+    struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s);
+    assert(switch_config != NULL);
+    int const resume_at_iter = switch_config->resume_at_iter;
+
+    // consuming all events until indicated iteration is reached
     bool reached_end = false;
     while (!reached_end) {
         codes_workload_get_next(wrkld_id, s->app_id, s->local_rank, mpi_op);
 
         switch (mpi_op->op_type) {
             case CODES_WK_MARK:
-                if (mpi_op->u.send.tag == 95) {
+                if (mpi_op->u.send.tag == resume_at_iter) {
                     reached_end = true;
+                    codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, mpi_op);
                 }
                 break;
             // If we reach the end of simulation, rollback once to allow the operation to be processed normally
@@ -1147,17 +1183,28 @@ void skip_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m)
         }
     }
 
+    switch_config->done[s->local_rank] = true;
+
     tw_event *e = tw_event_new(lp->gid, 0.0, lp);
     nw_message* msg = (nw_message*) tw_event_data(e);
     msg->msg_type = MPI_OP_GET_NEXT;
     tw_event_send(e);
 }
 
-bool have_we_hit_surrogate_switch(struct codes_workload_op * mpi_op) {
-    //return mpi_op->u.send.tag == 4;
+static bool have_we_hit_surrogate_switch(struct nw_state* s, struct codes_workload_op * mpi_op) {
+    struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s);
+    if (switch_config != NULL) {
+        return mpi_op->u.send.tag == switch_config->skip_at_iter;
+    }
     return false;
 }
 
+static double time_to_skip_iterations(struct nw_state* s, struct codes_workload_op * mpi_op) {
+    struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s);
+    assert(switch_config != NULL);
+    return switch_config->time_per_iter * iters_skipped(switch_config);
+}
+
 /* Debugging functions, may generate unused function warning */
 /*static void print_waiting_reqs(uint32_t * reqs, int count)
 {
@@ -2994,8 +3041,8 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
                 m->rc.saved_marker_time = tw_now(lp);
 
                 // If we have reached the surrogate switch time, skip next iteration(s)
-                if (have_we_hit_surrogate_switch(mpi_op)) {
-                    tw_event *e = tw_event_new(lp->gid, 2076575.16 * 91, lp);
+                if (have_we_hit_surrogate_switch(s, mpi_op)) {
+                    tw_event *e = tw_event_new(lp->gid, time_to_skip_iterations(s, mpi_op), lp);
                     nw_message* msg = (nw_message*) tw_event_data(e);
                     msg->msg_type = SURR_SKIP_ITERATION;
                     tw_event_send(e);

From 42f7cd57ea11726f8396693c97d81413e593c7fb Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 18 Apr 2024 09:53:54 -0400
Subject: [PATCH 079/188] Improving figure generation script

---
 .gitignore                                    |  5 +-
 scripts/workload-iteration-times/README.txt   |  6 ++
 .../print-iterations.py                       | 78 +++++++++++++++----
 src/network-workloads/model-net-mpi-replay.c  |  8 +-
 4 files changed, 79 insertions(+), 18 deletions(-)
 create mode 100644 scripts/workload-iteration-times/README.txt

diff --git a/.gitignore b/.gitignore
index 70223fe9..b023d553 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,5 +39,8 @@ ross.csv
 
 install-mastiff/include/codes/model-net-method.h
 
-# commonly used building folder
+# commonly used building stuff
 /build*/
+/build*
+.cache
+compile_commands.json
diff --git a/scripts/workload-iteration-times/README.txt b/scripts/workload-iteration-times/README.txt
new file mode 100644
index 00000000..bc2d5a16
--- /dev/null
+++ b/scripts/workload-iteration-times/README.txt
@@ -0,0 +1,6 @@
+To reproduce figures:
+
+```bash
+python print-iterations.py /home/helq/Research/HPC/code/kronos/2024-feb-22/experiments/union/milc-jacobi/results/exp-007/iteration-logs --output figures/milc-jacobi-hf --legends Jacobi MILC
+python print-iterations.py /home/helq/Research/HPC/code/kronos/2024-feb-22/experiments/union/milc-jacobi/results/exp-003/iteration-logs --output figures/milc-jacobi-surrogate --legends Jacobi MILC
+```
diff --git a/scripts/workload-iteration-times/print-iterations.py b/scripts/workload-iteration-times/print-iterations.py
index a5e88010..b60bc5ae 100644
--- a/scripts/workload-iteration-times/print-iterations.py
+++ b/scripts/workload-iteration-times/print-iterations.py
@@ -3,19 +3,59 @@
 from typing import Any, TextIO
 import argparse
 import pathlib
+import colorsys
 
 import matplotlib.pyplot as plt
 import matplotlib
+from matplotlib.patches import Rectangle
+from matplotlib.lines import Line2D
 import numpy as np
-
-
-def plot_sequence(ax: Any, seq: Any, names: Any, height: Any, color: str = 'red', print_names: bool = True):
-    ax.vlines(seq, 0, height, color=f"tab:{color}")  # The vertical stems.
-    ax.plot(seq, np.zeros_like(seq), "-o", color="k", markerfacecolor="w")
+import matplotlib.colors as mc
+
+
+def adjust_lightness(color: str | tuple[float, float, float], amount: float = 0.5):
+    """
+    Taken from: https://stackoverflow.com/a/49601444
+    Smaller than 1 amounts darkness, larger than 1 lightens
+    Examples:
+    >> adjust_lightness('g', 1.3)
+    >> adjust_lightness('#F034A3', 0.6)
+    >> adjust_lightness((.3,.55,.1), 1.5)
+    """
+    try:
+        c = mc.cnames[color]  # type: ignore[reportArgumentType]
+    except:
+        c = color
+    c = colorsys.rgb_to_hls(*mc.to_rgb(c))
+    return colorsys.hls_to_rgb(c[0], max(0, min(1, amount * c[1])), c[2])
+
+
+def plot_sequence(
+        ax: Any,
+        seq: Any,
+        names: Any,
+        height: Any,
+        color: str = 'red',
+        print_names: bool = True
+):
+    box = Rectangle((0, 0), seq[0], height[0], color=adjust_lightness(color, 1.7))
+    ax.add_patch(box)
+    for start, end, heit in zip(seq, height[1:], height[1:]):
+        box = Rectangle((start, 0), end, heit, color=adjust_lightness(color, 1.7))
+        ax.add_patch(box)
+
+    ax.vlines(seq, 0, height, color=adjust_lightness(color, 1.3))
+
+    non_zero_height = height != 0
+    cleaned_seq = seq[non_zero_height]
+    cleaned_height = height[non_zero_height]
+    ax.scatter(cleaned_seq, cleaned_height, marker='.', color=color)
+    # ax.plot(seq, np.zeros_like(seq), "-o", color="k", markerfacecolor="w")
     
     # annotate lines
     if print_names:
-        for d, h, r in zip(seq, height, names):
+        cleaned_names = names[non_zero_height]
+        for d, h, r in zip(cleaned_seq, cleaned_height, cleaned_names):
             ax.annotate(r, xy=(d, h),
                         xytext=(3, np.sign(h)*3), textcoords="offset points",
                         horizontalalignment="right",
@@ -96,7 +136,8 @@ def avg(it: np.int64) -> np.float64:
     parser = argparse.ArgumentParser()
     _ = parser.add_argument('file', type=argparse.FileType('r'))
     _ = parser.add_argument('--output', type=pathlib.Path, help='Name of output figure', default=None)
-    _ = parser.add_argument('--no-iter-count', dest='iter_count', action='store_false')
+    _ = parser.add_argument('--iter-count', dest='iter_count', action='store_true')
+    _ = parser.add_argument('--legends', nargs='+', help='Application names', required=False)
     args = parser.parse_args()
 
     if args.output:
@@ -112,17 +153,18 @@ def avg(it: np.int64) -> np.float64:
     parsed_logs = parse_iteration_log(args.file)
 
     # Creating plot with data
-    fig, ax = plt.subplots(figsize=(8.8, 4), layout="constrained")
+    fig, ax = plt.subplots(figsize=(6, 3), layout="constrained")
     ax.set_xlabel("Total virtual time (ns)")
-    ax.set_ylabel("Virtual time per iteration (ns)")
+    ax.set_ylabel("Virtual time \nper iteration (ns)")
     #ax.set(title="")
-    smallest_timestamp = list(parsed_logs.values())[0]['time'][0]
-    ax.plot([0, smallest_timestamp], [0, 0], "-", color="k", markerfacecolor="w")
+    largest_timestamp = max(v['time'].max() for v in parsed_logs.values())
+    ax.plot([0, largest_timestamp], [0, 0], "-", color="k", markerfacecolor="w")
 
-    color_table = ['red', 'blue', 'green', 'black']
+    color_table = ['tab:red', 'tab:blue', 'tab:green', 'tab:black']
     for i, job in enumerate(parsed_logs.keys()):
         # Flipping second sequence if there are only two jobs
-        mul = -1 if len(parsed_logs) == 2 and i == 1 else 1
+        # mul = -1 if len(parsed_logs) == 2 and i == 1 else 1
+        mul = 1
         plot_sequence(
             ax,
             parsed_logs[job]['time'],
@@ -132,6 +174,16 @@ def avg(it: np.int64) -> np.float64:
             print_names=args.iter_count)
     
     plt.setp(ax.get_xticklabels(), rotation=30, ha="right")
+
+    if args.legends:
+        custom_lines = []
+        legends = []
+        for legend, color in zip(args.legends, color_table):
+            # Finding legend for application with ID i
+            legend: str
+            legends.append(legend)
+            custom_lines.append(Line2D([0], [0], color=color))
+        ax.legend(custom_lines, legends)
     
     #ax.margins(y=0.1)
     if args.output:
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index b58f572c..7f9f554b 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -1131,10 +1131,10 @@ static int iters_skipped(struct AvgSurrogateSwitchingTimesForApp * avgSur) {
 }
 
 static struct AvgSurrogateSwitchingTimesForApp skip_iter_config[] = {
-    // done, app_id, skip_at_iter, resume_at_iter, time_per_iter
-    {0,  3,  21, 14403235, {false}},
-    {1,  7,  59,  4982017, {false}},
-    {1, 79, 195,  3581337, {false}},
+    // app_id, skip_at_iter, resume_at_iter, time_per_iter, done
+    //{0,  3,  21, 14403235, {false}},
+    //{1,  7,  59,  4982017, {false}},
+    //{1, 79, 195,  3581337, {false}},
 };
 
 struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_state * s) {

From 44d5f69acad51f74a2ec8406068f7b62fb9fad10 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 29 Apr 2024 12:42:22 -0400
Subject: [PATCH 080/188] Fix: replacing O(n) table lookup for O(1)

When calling the function `jobmap_list_to_local`, this would go through
the entire list of IDs until it finds a matching ID. This is O(n) in the
average and worst cases. For small networks, this won't take much time,
so it never flared up as an issue. When running larger network
simulations, at 8K nodes, there was a significant slowdown. This
function was found, after extensive profiling, to be the principal
culprit.

The fix is simple, make a table where looking for an ID is O(1). A
simple array does the trick. After running some experiments, there's a
significant speedup of 30% for a network of 8448 with a job using all
nodes. The job was uniform random and the simulation was run for 10ms
(virtual time).
---
 src/util/jobmap-impl/jobmap-list.c | 56 +++++++++++++++++++++++-------
 1 file changed, 43 insertions(+), 13 deletions(-)

diff --git a/src/util/jobmap-impl/jobmap-list.c b/src/util/jobmap-impl/jobmap-list.c
index 7876727f..5ab3abf4 100644
--- a/src/util/jobmap-impl/jobmap-list.c
+++ b/src/util/jobmap-impl/jobmap-list.c
@@ -31,6 +31,12 @@ struct jobmap_list {
     int num_jobs;
     int *rank_counts;
     int **global_ids;
+
+    // This is a look up table containing the same info as above, but with O(1) access.
+    // It is used by `jobmap_list_to_local`. This solves a scalibility bug that appears
+    // when all jobs combined have many nodes (> 8K nodes)
+    int highest_global_id;
+    struct codes_jobmap_id * id_to_jobmap;
 };
 
 #define COND_REALLOC(_len_expr, _cap_var, _buf_var) \
@@ -148,6 +154,37 @@ static int jobmap_list_configure(void const * params, void ** ctx)
         }
     } while (!feof(f));
 
+    // === Building id_to_jobmap lookup table ===
+    // There's some room for improvement (we can probably loop fewer times and fuze some
+    // loops together), but they are relatively inexpensive when done once at the start
+    // of the simulation, so this acceptable
+
+    // Finding highest global id. Although we should be able to get this from the network
+    // configuration file, we look it up in here to keep different parts of CODES separated/modularized
+    lst->highest_global_id = -1;
+    for(int i=0; i<lst->num_jobs; i++) {
+        for(int j=0; j < lst->rank_counts[i]; j++) {
+            if(lst->highest_global_id < lst->global_ids[i][j]) {
+                lst->highest_global_id = lst->global_ids[i][j];
+            }
+        }
+    }
+    lst->id_to_jobmap = calloc(lst->highest_global_id + 1, sizeof(*lst->id_to_jobmap));
+    for (int i=0; i<=lst->highest_global_id; i++) {
+        lst->id_to_jobmap[i].job = -1;
+        lst->id_to_jobmap[i].rank = -1;
+    }
+    // Finally, filling up the table
+    for(int i=0; i<lst->num_jobs; i++) {
+        for(int j=0; j < lst->rank_counts[i]; j++) {
+            int const id = lst->global_ids[i][j];
+            lst->id_to_jobmap[id].job = i;
+            lst->id_to_jobmap[id].rank = j;
+        }
+    }
+    // === ===
+
+    // returning if everything went alright
     if (rc == 0) {
         fclose(f);
         free(line_buf);
@@ -160,6 +197,7 @@ static int jobmap_list_configure(void const * params, void ** ctx)
         }
         free(lst->global_ids);
         free(lst->rank_counts);
+        free(lst->id_to_jobmap);
         free(lst);
         *ctx = NULL;
         return -1;
@@ -168,23 +206,14 @@ static int jobmap_list_configure(void const * params, void ** ctx)
 
 static struct codes_jobmap_id jobmap_list_to_local(int id, void const * ctx)
 {
-    struct codes_jobmap_id rtn;
-    rtn.job = -1;
-    rtn.rank = -1;
-
     struct jobmap_list const *lst = (struct jobmap_list const *)ctx;
 
-    for(int i=0; i<lst->num_jobs; i++) {
-        for(int j=0; j < lst->rank_counts[i]; j++) {
-            if(id == lst->global_ids[i][j]) {
-                rtn.job = i;
-                rtn.rank = j;
-                return rtn;
-            }
-        }
+    // invalid id from what we got in the config
+    if (id < 0 || lst->highest_global_id < id) {
+        return (struct codes_jobmap_id) { .job = -1, .rank = -1 };
     }
 
-    return rtn;
+    return lst->id_to_jobmap[id];
 }
 
 static int jobmap_list_to_global(struct codes_jobmap_id id, void const * ctx)
@@ -221,6 +250,7 @@ static void jobmap_list_destroy(void * ctx)
 
     free(lst->global_ids);
     free(lst->rank_counts);
+    free(lst->id_to_jobmap);
     free(ctx);
 }
 

From a155f6d93076e3355485601502d9cf034fdc282a Mon Sep 17 00:00:00 2001
From: Kazutomo Yoshii <kazutomo.yoshii@gmail.com>
Date: Tue, 7 May 2024 11:14:12 -0500
Subject: [PATCH 081/188] zmqml src

---
 src/surrogate/zmqml/demozmqmlrequester.cpp |  75 +++++++++++
 src/surrogate/zmqml/zmqmlrequester.cpp     | 140 +++++++++++++++++++++
 2 files changed, 215 insertions(+)
 create mode 100644 src/surrogate/zmqml/demozmqmlrequester.cpp
 create mode 100644 src/surrogate/zmqml/zmqmlrequester.cpp

diff --git a/src/surrogate/zmqml/demozmqmlrequester.cpp b/src/surrogate/zmqml/demozmqmlrequester.cpp
new file mode 100644
index 00000000..b856802f
--- /dev/null
+++ b/src/surrogate/zmqml/demozmqmlrequester.cpp
@@ -0,0 +1,75 @@
+#include "zmqmlrequester.h"
+
+#include <iostream>
+#include <string>
+#include <vector>
+#include <cmath>
+#include <chrono>
+#include <thread>
+
+using namespace std;
+
+static void test_blockingcall() {
+    vector<string> args = {"sleep", "1"};
+    vector<string> result = zmqml_request("execute", args);
+
+    cout << "status:" << result[0] << endl;
+}
+
+
+#if 0
+static void test_nonblockingcall() {
+    vector<string> args = {"sleep", "3"};
+    vector<string> ret = zmqml_request("launch", args);
+    
+    string status = ret[0];
+    int id = ret[1];
+    cout << "status=" << status << " id=" << id << endl;
+
+    int cnt = 0;
+    while (true) {
+        ret = zmqml_request("query", {id});
+        status = ret[0];
+        cout << "status=" << status << endl;
+        if (status == "done") {
+            break;
+        }
+        this_thread::sleep_for(chrono::milliseconds(500));
+        cnt++;
+    }
+    cout << "done cnt=" << cnt << endl;
+}
+#endif
+
+static void measure_latency() {
+    cout << "measure latency" << endl;
+    vector<double> tss;
+
+    int n = 1000;
+    for (int i = 0; i < n; ++i) {
+        auto start_time = chrono::steady_clock::now();
+        vector<string> result = zmqml_request("nothing");
+        auto end_time = chrono::steady_clock::now();
+        auto duration = chrono::duration<double>(end_time - start_time).count();
+        tss.push_back(duration);
+    }
+    double sum = 0;
+    for (double ts : tss) sum += ts;
+    double mean = sum / tss.size();
+    double sum_sq_diff = 0;
+    for (double ts : tss) sum_sq_diff += (ts - mean) * (ts - mean);
+    double std_dev = sqrt(sum_sq_diff / tss.size());
+    cout << "zmqcmd latency: mean = " << mean << ", std deviation = " << std_dev << endl;
+}
+
+int main () {
+
+    test_blockingcall();
+
+    //test_nonblockingcall();
+
+    measure_latency();
+
+    zmqml_request("exit");
+    return 0;
+}
diff --git a/src/surrogate/zmqml/zmqmlrequester.cpp b/src/surrogate/zmqml/zmqmlrequester.cpp
new file mode 100644
index 00000000..004b4eca
--- /dev/null
+++ b/src/surrogate/zmqml/zmqmlrequester.cpp
@@ -0,0 +1,140 @@
+#include "zmqmlrequester.h"
+
+#include <iostream>
+#include <string>
+#include <vector>
+#include <chrono>
+#include <thread>
+#include <cmath>
+#include <numeric>
+#include <regex>
+
+#include <zmq.hpp>
+#include "rapidjson/document.h"
+#include "rapidjson/writer.h"
+#include "rapidjson/stringbuffer.h"
+
+using namespace std;
+using namespace rapidjson;
+
+static string endpoint = "tcp://localhost:5555";
+static int debug = 0;
+
+/**
+ * See zmqmlrequester.h
+ */
+vector<string> zmqml_request(const string& cmd,
+                             const vector<string>& args,
+                             const string& bindata) {
+    zmq::context_t context(1);
+    zmq::socket_t socket(context, ZMQ_REQ);
+    socket.connect(endpoint);
+
+    Document msg;
+    msg.SetObject();
+    auto& allocator = msg.GetAllocator();
+
+    Value cmdValue;
+    cmdValue.SetString(cmd.c_str(), cmd.length(), msg.GetAllocator());
+    msg.AddMember("cmd", cmdValue, msg.GetAllocator());
+
+    if (args == std::vector<std::string>()) {
+        Value argsArray(kArrayType);
+        argsArray.PushBack(Value("dummy", allocator), allocator);
+        msg.AddMember("args", argsArray, allocator);
+    } else {
+        Value argsArray(kArrayType);
+        for(const auto& arg: args) {
+            argsArray.PushBack(Value(arg.c_str(), allocator), allocator);
+        }
+        msg.AddMember("args", argsArray, allocator);
+    }
+
+    StringBuffer buffer;
+    Writer<StringBuffer> writer(buffer);
+    msg.Accept(writer);
+
+	if (debug) cout << buffer.GetString() << endl;
+
+    string bufferstr = buffer.GetString();
+    const char delimiter = '\0';
+    string jsonbinmsg = bufferstr + delimiter + bindata;
+
+    zmq::message_t reqmsg(jsonbinmsg.begin(), jsonbinmsg.end());
+    socket.send(reqmsg, zmq::send_flags::none);
+
+    zmq::message_t reply;
+    socket.recv(reply);
+
+    string tmp(static_cast<char*>(reply.data()), reply.size());
+    Document response;
+    response.Parse(tmp.c_str());
+
+    vector<string> ret;
+
+    if (response.HasMember("status")) {
+        ret.push_back(response["status"].GetString());
+
+        if (response.HasMember("et")) {
+            ret.push_back(to_string(response["et"].GetDouble()));
+        }
+
+        if (response.HasMember("id")) {
+            ret.push_back(to_string(response["id"].GetInt()));
+        }
+    } else {
+        ret.push_back("failed");
+    }
+
+    return ret;
+}
+
+
+// void test_training() {
+//     std::cout << "test training" << std::endl;
+//     send_cmd("start_training");
+//     int cnt = 0;
+
+//     while (true) {
+//         auto result = send_cmd("status_training");
+//         std::string r = result.first;
+//         if (r == "True") break;
+// 		//cout << r << endl;
+// 		std::this_thread::sleep_for(std::chrono::seconds(1));
+//         ++cnt;
+//     }
+//     std::cout << "done cnt=" << cnt << std::endl;
+// }
+
+
+
+#if 0
+/**
+ * @brief Finds all occurrences of a regex pattern within a given
+ * input string and returns them.
+ *
+ * This function searches for all matches of the `pattern` within the
+ * `input` string, extracting the first captured group from each
+ * match. Each match found by applying the regular expression is added
+ * to a vector of strings, which is then returned.
+ *
+ * @param pattern The regular expression pattern to search for within
+ * the input string. The pattern should include at least one capturing group.
+ * @param input The string to search within for the pattern.
+ * @return A `std::vector<std::string>` containing all the matches
+ *         found. Each element in the vector is the first captured
+ *         group from a match of the pattern in the input.
+ */
+static std::vector<std::string> findall(const std::string& pattern, const std::string& input) {
+    std::vector<std::string> matches;
+    std::regex re(pattern);
+    auto words_begin = std::sregex_iterator(input.begin(), input.end(), re);
+    auto words_end = std::sregex_iterator();
+
+    for (auto it = words_begin; it != words_end; ++it) {
+        std::smatch match = *it;
+        matches.push_back(match.str(1)); // Extract the first captured group
+    }
+    return matches;
+}
+#endif

From b7cba7c61e4b71c76a64b1ecf511488d482f22a0 Mon Sep 17 00:00:00 2001
From: Kazutomo Yoshii <kazutomo.yoshii@gmail.com>
Date: Tue, 7 May 2024 11:15:59 -0500
Subject: [PATCH 082/188] additional zmqml src

---
 src/surrogate/zmqml/Makefile            |  26 +++
 src/surrogate/zmqml/pyzmqmltest.py      | 125 +++++++++++++++
 src/surrogate/zmqml/runcppdemo.sh       |   2 +
 src/surrogate/zmqml/runmlpacketdelay.py |  29 ++++
 src/surrogate/zmqml/zmqmlrequester.h    |  38 +++++
 src/surrogate/zmqml/zmqmlserver.py      | 201 ++++++++++++++++++++++++
 6 files changed, 421 insertions(+)
 create mode 100644 src/surrogate/zmqml/Makefile
 create mode 100755 src/surrogate/zmqml/pyzmqmltest.py
 create mode 100755 src/surrogate/zmqml/runcppdemo.sh
 create mode 100644 src/surrogate/zmqml/runmlpacketdelay.py
 create mode 100644 src/surrogate/zmqml/zmqmlrequester.h
 create mode 100755 src/surrogate/zmqml/zmqmlserver.py

diff --git a/src/surrogate/zmqml/Makefile b/src/surrogate/zmqml/Makefile
new file mode 100644
index 00000000..85915bdb
--- /dev/null
+++ b/src/surrogate/zmqml/Makefile
@@ -0,0 +1,26 @@
+
+CXX=g++
+CXXFLAGS=-g -Wall -O2 -std=c++11 $(shell pkg-config --cflags libzmq) -Wdeprecated-declarations
+LDFLAGS=$(shell pkg-config --libs libzmq) -lm
+TARGETS=libzmqmlrequester.so demozmqmlrequester
+
+all: $(TARGETS)
+
+libzmqmlrequester.so: zmqmlrequester.o
+	$(CXX) -shared -o $@ $^
+
+zmqmlrequester.o: zmqmlrequester.cpp zmqmlrequester.h
+	$(CXX) $(CXXFLAGS) -fPIC -c $< -o $@
+
+demozmqmlrequester: demozmqmlrequester.cpp libzmqmlrequester.so
+	$(CXX) $(CXXFLAGS) -o $@ $< $(LDFLAGS) -L./ -lzmqmlrequester
+
+# doxygen
+# sphinx
+
+clean:
+	rm -f $(TARGETS)
+	rm -f *.o
+
+distclean: clean
+	rm -f *~
diff --git a/src/surrogate/zmqml/pyzmqmltest.py b/src/surrogate/zmqml/pyzmqmltest.py
new file mode 100755
index 00000000..01e21d26
--- /dev/null
+++ b/src/surrogate/zmqml/pyzmqmltest.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+
+#
+# pyrequester : a requester sample implementation to ZeroMQ-based ML
+# task dispatching server
+#
+# Written by Kazutomo Yoshii <kazutomo.yoshii@gmail.com>
+#
+
+import zmq
+import json
+import time
+import numpy as np
+import sys
+import re
+
+debug = False # XXX: make this argument
+
+endpoint = "tcp://localhost:5555" # XXX: make this configurable
+
+def zmqml_request(cmd, args=None, bindata=b"None"):
+    """
+    Sends a command to a specified endpoint using ZeroMQ and waits for a response.
+
+    :param cmd: the command to be sent.
+    :type cmd: str
+    :param args: A list of arguments for the command where the first argument is the function name. Defaults to None.
+    :type args: list, optional
+    :return: A tuple containing the results extracted from the response and the elapsed time in seconds.
+    :rtype: tuple
+    :raises zmq.ZMQError: Raises an exception if there is an issue with the ZeroMQ communication.
+
+    Example usage:
+    >>> zmqml_request("execute", ["mlpacketdelay", "param1", "param2"])
+    """
+
+    context = zmq.Context()
+    socket = context.socket(zmq.REQ)
+    socket.connect(endpoint)
+
+    # the first arg in args is the function name (e.g., mlpacketdelay)
+    msg = {"cmd":cmd, "args":args}
+    msgencoded = json.dumps(msg).encode('utf-8')
+
+    delimiter = b'\x00'
+    payload = msgencoded + delimiter + bindata
+    socket.send(payload)
+
+    response = socket.recv_json()
+    status = response["status"]
+    if debug:
+        print("status:", status)
+
+    socket.close()
+
+    return response
+
+#
+#
+def measure_latency():
+    print("measure latency")
+    tss = []
+    n = 1000
+    for i in range(0,n):
+        st = time.time()
+        zmqml_request("nothing") # blocking
+        tss.append(time.time() - st)
+    print('zmqcmd latency:', np.mean(tss), np.std(tss))
+
+#
+#
+def test_blocking_sleep():
+    print("sleep")
+
+    target = ["sleep", "1"] # this works like args to main() in C
+
+    ret = zmqml_request("execute", target) # blocking
+    print(f'status={ret["status"]} et={ret["et"]}')
+    print("done")
+
+#
+#
+def test_nonblocking_sleep():
+    print("test nonblocking")
+
+    target = ["sleep", "2"]
+
+    ret = zmqml_request("launch", target)
+    status = ret["status"]
+    id = ret["id"]
+    print(f'status={status} id={id}')
+
+    cnt = 0
+    while True:
+        ret = zmqml_request("query", [id])
+        status = ret["status"]
+        print(f"status={status}")
+        if status == "done":
+            break
+        time.sleep(.5)
+        cnt = cnt + 1
+    print(f"done cnt={cnt}")
+
+#
+#
+def test_send_binary():
+    print("test nonblocking")
+
+    data = b""
+    with open('ml-model.pt', 'rb') as f:
+        data = f.read()
+    
+    ret = zmqml_request("send", ["foobar.dat"], data)
+    status = ret["status"]
+    print(f"status={status}")
+
+    
+if __name__ == "__main__":
+    test_send_binary()
+    measure_latency()
+    test_blocking_sleep()
+    test_nonblocking_sleep()
+    #test_mlpacketdelay_training()
+    zmqml_request("exit")
+    sys.exit(0)
diff --git a/src/surrogate/zmqml/runcppdemo.sh b/src/surrogate/zmqml/runcppdemo.sh
new file mode 100755
index 00000000..fe460392
--- /dev/null
+++ b/src/surrogate/zmqml/runcppdemo.sh
@@ -0,0 +1,2 @@
+make
+LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`  ./demozmqmlrequester
diff --git a/src/surrogate/zmqml/runmlpacketdelay.py b/src/surrogate/zmqml/runmlpacketdelay.py
new file mode 100644
index 00000000..cd9d2b5e
--- /dev/null
+++ b/src/surrogate/zmqml/runmlpacketdelay.py
@@ -0,0 +1,29 @@
+
+import argparse
+from pathlib import Path
+
+from model import mlpacketdelay
+
+def run_training(done_event):
+    parser = argparse.ArgumentParser(description="Delay Prediction")
+    parser.add_argument('--method', type=str, default='MLP', choices=['MLP','Average'])
+    parser.add_argument('--epoch', type=int, default=10, help='epochs to train')
+    parser.add_argument('--h-dim', type=int, default=16, help='dimension of the hidden layer')
+    parser.add_argument('--seed', type=int, default=0)
+    parser.add_argument('--pck_size', type=int, default=4096, help='maximum packet size in simulation')
+    parser.add_argument('--terminals', type=int, default=72, help='total number of terminals in the network')
+    parser.add_argument('--input-file', type=Path, default=Path('packet-delays.txt'))
+#    parser.add_argument('--load-model', action=argparse.BooleanOptionalAction, default=False,
+    parser.add_argument('--load-model', action='store_true', default=False,
+                        help='whether to load model from file or start from scratch')
+    parser.add_argument('--model-path', type=Path, default=Path('MLP_Surrogate-combined.pt'))
+#    parser.add_argument('--plot-weights', action=argparse.BooleanOptionalAction, default=False,
+    parser.add_argument('--plot-weights', action='store_true', default=False,
+                        help='whether to show weights from source to destination')
+
+    args = parser.parse_args(["--method", "MLP", "--epoch", "1", # 50
+                              "--input-file", "model/data/packets-delay.csv",
+                              "--model-path", "ml-model.pt"])
+
+    mlpacketdelay.main_func(args)
+    done_event.set()
diff --git a/src/surrogate/zmqml/zmqmlrequester.h b/src/surrogate/zmqml/zmqmlrequester.h
new file mode 100644
index 00000000..220f1a51
--- /dev/null
+++ b/src/surrogate/zmqml/zmqmlrequester.h
@@ -0,0 +1,38 @@
+#ifndef __ZMQREQUESTER_H_DEFINED__
+#define __ZMQREQUESTER_H_DEFINED__
+
+#include <string>
+#include <vector>
+
+/**
+ * @brief Sends a request over ZeroMQ with the specified command and arguments,
+ *        receives a reply
+ *
+ * This function constructs a JSON message with the provided command
+ * and arguments, sends it over a ZeroMQ REQ socket, waits for the
+ * reply, parses the JSON response, and extracts the 'status', 'et'
+ * (if present), and 'id' (if present) fields. It constructs a vector
+ * of strings containing these fields for the return value. If the
+ * 'status' field is not present in the response, it returns a vector
+ * containing "failed".
+ *
+ * @param cmd zmqml request command: 'query', 'launch', execute', send', 'nothing', 'exit'
+ * @param args the arguments for launch and execute
+ * @param bindata binary data from send
+ * @return vector<string> A vector containing the 'status' field and
+ *         optionally 'et' and 'id'.  'status' is not present, returns
+ *         a vector with "failed".
+ *
+ * @exception std::runtime_error Thrown if there are any issues with ZeroMQ communication.
+ * @exception rapidjson::ParseErrorException Thrown if parsing the JSON response fails.
+ * @note This function assumes that the 'endpoint' variable (used in
+ *       socket.connect) is defined externally and is accessible
+ *       within this function scope. Ensure 'endpoint' is properly
+ *       configured before calling this function.
+ * @note If 'debug' is true, the JSON message sent is printed to standard output.
+ */
+extern std::vector<std::string> zmqml_request(const std::string& cmd,
+                                              const std::vector<std::string>& args = std::vector<std::string>(),
+                                              const std::string& bindata = "None"
+                                              );
+#endif
diff --git a/src/surrogate/zmqml/zmqmlserver.py b/src/surrogate/zmqml/zmqmlserver.py
new file mode 100755
index 00000000..90ca0088
--- /dev/null
+++ b/src/surrogate/zmqml/zmqmlserver.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python
+
+#
+# zmqmlserver : ZeroMQ-based ML task dispatching server
+#
+# Written by Kazutomo Yoshii <kazutomo.yoshii@gmail.com>
+#
+
+import zmq
+import json
+import threading
+import sys
+import time
+from itertools import count # generate unit id
+# from dataclasses import dataclass
+
+# TODO: abstract a mechanism to call training
+from runmlpacketdelay import run_training
+
+#import os
+#model_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "model"))
+#sys.path.insert(0, model_dir)
+
+endpoint = "tcp://*:5555"
+
+debug = False
+
+#
+#
+#
+launch_id = count(start=1) # unique for launched thread
+launched_threads = {} # id:obj. keep track of active threads. remove the thread once it finished
+
+class LaunchCMD:
+    def __init__(self):
+        # thread event
+        self.done_ev   = threading.Event() # successfully done
+
+    def launch(self, func, func_args):
+        self.thread = threading.Thread(target=func, args=(self.done_ev, func_args))
+        try:
+            self.thread.start()
+            self.st = time.time()
+            self.id = next(launch_id)
+            launched_threads[self.id] = self
+        except RuntimeError as e:
+            print(f"Failed to launch: {e}")
+            self.id = -1
+
+        return self.id
+
+    def query(self):
+        res = self.done_ev.is_set()
+        status = "running"
+        if res:
+            self.thread.join()
+            del launched_threads[self.id]
+            if debug:
+                print("thread joined")
+            status = "done"
+
+        return (status, time.time() - self.st)
+
+#
+# launchable functions by LaunchCMD here
+#
+def launch_sleep(done_event, args):
+    if debug:
+        print("Dummy started")
+    time.sleep(int(args[0]))
+    if debug:
+        print("Dummy done")
+    done_event.set()
+
+list_nonblockingcalls = {
+    "sleep": launch_sleep
+}
+
+#
+#
+#
+def nonblockingcall(args):
+    func = args[0]   # the 1st arg is the target func
+    func_args = args[1:]
+
+    status = "failed"
+
+    threadid = -1
+    if func in list_nonblockingcalls:
+        launchcmd = LaunchCMD()
+
+        threadid = launchcmd.launch(
+            list_nonblockingcalls[func],  # func
+            func_args                # args
+        )
+        if threadid > 0:
+            launched_threads[threadid] = launchcmd
+            status = "done"
+
+    return (status, threadid)
+
+#
+# define blocking-call functions here
+#
+def func_sleep(args):
+    time.sleep(int(args[0]))
+    return True
+
+#
+# register blocking call functions to list_blockingcalls
+#
+list_blockingcalls = {
+    "sleep" : func_sleep
+}
+
+def blockingcall(args):
+    func = args[0]   # the 1st arg is the target func
+    func_args = args[1:]
+
+    status = "failed"
+    st = time.time()
+    if func in list_blockingcalls:
+        if func_sleep(func_args):
+            status = "done"
+
+    elapsed_time = time.time() - st
+    return (status, elapsed_time)
+
+
+#
+# receive bindata
+#
+def receivedata(args, bindata):
+    destfn = args[0]
+    status = "failed"
+    st = time.time()
+    with open(destfn, "wb") as f:
+        f.write(bindata)
+        status = "done"
+
+    elapsed_time = time.time() - st
+    return (status, elapsed_time)
+
+
+#
+# main listener loop
+# XXX: add mechanisms for multiple requesters
+#
+def zmq_cmd_listener():
+    context = zmq.Context()
+    socket = context.socket(zmq.REP)
+    socket.bind(endpoint)
+
+    while True:
+        tmp = socket.recv()
+        delimiter = b'\x00'
+        msgraw, bindata = tmp.split(delimiter, 1)
+        msg = json.loads(msgraw.decode('utf-8'))
+        cmd = msg["cmd"]
+        args = msg.get("args",[])
+
+        if debug:
+            print(f"Received cmd:{cmd} args:{args}")
+
+        retmsg = {"status":"none"} # empty status
+
+        if cmd == "nothing": # this cmd does nothing. to measure the latency
+            retmsg = {"status":"done"}
+        elif cmd == "execute":
+            (status, et) = blockingcall(args)
+            retmsg = {"status":status, "et":et}
+        elif cmd == "launch":
+            (status, id) = nonblockingcall(args)
+            retmsg = {"status":status, "id":id}
+        elif cmd == "query":
+            targetid = args[0]
+            (status, et) = launched_threads[targetid].query()
+            retmsg = {"status":status, "et":et}
+        elif cmd == "send":
+            destfn = args[0]
+            (status, et) = receivedata(args, bindata)
+            retmsg = {"status":status, "et":et}
+
+        # send response back to the requester
+        socket.send_json(retmsg)
+
+        if cmd == "exit":
+            # XXX: add codes to kill active threads
+            break
+
+#
+#
+#
+if __name__ == "__main__":
+    if debug:
+        print("start zmq_cmd_listener")
+
+    zmq_cmd_listener()
+
+    if debug:
+        print("done")

From 8184b44a83ead52a84f190bba587436cab948850 Mon Sep 17 00:00:00 2001
From: Kazutomo Yoshii <kazutomo.yoshii@gmail.com>
Date: Tue, 7 May 2024 15:38:20 -0500
Subject: [PATCH 083/188] C++ API fixes and training demo files

---
 src/surrogate/zmqml/Makefile                  |   1 +
 src/surrogate/zmqml/demozmqmlrequester.cpp    |  78 +++++-
 src/surrogate/zmqml/model/ml-model.pt         | Bin 0 -> 32881 bytes
 src/surrogate/zmqml/model/mlpacketdelay.py    | 258 ++++++++++++++++++
 src/surrogate/zmqml/model/train.sh            |   3 +
 ...pyzmqmltest.py => pydemozmqmlrequester.py} |  45 ++-
 src/surrogate/zmqml/runmlpacketdelay.py       |  14 +-
 src/surrogate/zmqml/zmqmlrequester.cpp        |  20 +-
 src/surrogate/zmqml/zmqmlserver.py            |  30 +-
 9 files changed, 399 insertions(+), 50 deletions(-)
 create mode 100644 src/surrogate/zmqml/model/ml-model.pt
 create mode 100644 src/surrogate/zmqml/model/mlpacketdelay.py
 create mode 100644 src/surrogate/zmqml/model/train.sh
 rename src/surrogate/zmqml/{pyzmqmltest.py => pydemozmqmlrequester.py} (74%)

diff --git a/src/surrogate/zmqml/Makefile b/src/surrogate/zmqml/Makefile
index 85915bdb..4c28ed54 100644
--- a/src/surrogate/zmqml/Makefile
+++ b/src/surrogate/zmqml/Makefile
@@ -21,6 +21,7 @@ demozmqmlrequester: demozmqmlrequester.cpp libzmqmlrequester.so
 clean:
 	rm -f $(TARGETS)
 	rm -f *.o
+	rm -f tmptestsend.dat
 
 distclean: clean
 	rm -f *~
diff --git a/src/surrogate/zmqml/demozmqmlrequester.cpp b/src/surrogate/zmqml/demozmqmlrequester.cpp
index b856802f..0c9ffb35 100644
--- a/src/surrogate/zmqml/demozmqmlrequester.cpp
+++ b/src/surrogate/zmqml/demozmqmlrequester.cpp
@@ -1,6 +1,7 @@
 #include "zmqmlrequester.h"
 
 #include <iostream>
+#include <fstream>
 #include <string>
 #include <vector>
 #include <cmath>
@@ -10,20 +11,22 @@
 using namespace std;
 
 static void test_blockingcall() {
+    cout << "* test_blockingcall" << endl;
+    
     vector<string> args = {"sleep", "1"};
     vector<string> result = zmqml_request("execute", args);
 
     cout << "status:" << result[0] << endl;
 }
 
-
-#if 0
 static void test_nonblockingcall() {
+    cout << "* test_nonblockingcall" << endl;
+
     vector<string> args = {"sleep", "3"};
     vector<string> ret = zmqml_request("launch", args);
     
     string status = ret[0];
-    int id = ret[1];
+    string id = ret[1];
     cout << "status=" << status << " id=" << id << endl;
 
     int cnt = 0;
@@ -39,10 +42,34 @@ static void test_nonblockingcall() {
     }
     cout << "done cnt=" << cnt << endl;
 }
-#endif
+
+static void test_send_binary() {
+    cout << "* test_send_binary" << endl;
+    
+    string data;
+    ifstream file("model/ml-model.pt", ios::binary);
+
+    if (file) {
+        file.seekg(0, ios::end);
+        data.resize(file.tellg());
+        file.seekg(0, ios::beg);
+        file.read(&data[0], data.size());
+        file.close();
+    } else {
+        cerr << "Failed to open the file." << endl;
+        return;
+    }
+
+    vector<string> ret = zmqml_request("send",
+                                       {"tmptestsend.dat"}, // dest filename
+                                       data);
+    string status = ret[0];
+    cout << "status=" << status << endl;
+}
 
 static void measure_latency() {
-    cout << "measure latency" << endl;
+    cout << "* measure_latency" << endl;
+
     vector<double> tss;
 
     int n = 1000;
@@ -62,14 +89,47 @@ static void measure_latency() {
     cout << "zmqcmd latency: mean = " << mean << ", std deviation = " << std_dev << endl;
 }
 
-int main () {
 
-    test_blockingcall();
+void test_mlpacketdelay_training() {
+    std::cout << "* test_mlpacketdelay_training" << std::endl;
+
+    vector<string> args = {"mlpacketdelay_training", 
+                           "--method", "MLP", "--epoch", "1",
+                            "--input-file", "model/data/packets-delay.csv",
+                            "--model-path", "ml-model.pt"};
+
+    vector<string> ret = zmqml_request("launch", args);
+    
+    string status = ret[0];
+    string id = ret[1];
+    cout << "status=" << status << " id=" << id << endl;
+
+    int cnt = 0;
+    while (true) {
+        ret = zmqml_request("query", {id});
+        status = ret[0];
+        cout << "status=" << status << endl;
+        if (status == "done") {
+            break;
+        }
+        this_thread::sleep_for(chrono::milliseconds(500));
+        cnt++;
+    }
+    cout << "done cnt=" << cnt << endl;
+}
+
 
-    //test_nonblockingcall();
 
-    measure_latency();
+int main () {
+    if(0) {
+        test_send_binary();
+        test_blockingcall();
+        test_nonblockingcall();
+        measure_latency();
+    }
 
+    test_mlpacketdelay_training();
+    
     zmqml_request("exit");
     return 0;
 }
diff --git a/src/surrogate/zmqml/model/ml-model.pt b/src/surrogate/zmqml/model/ml-model.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2008092704019a409ae59771bcdf711a86a624bd
GIT binary patch
literal 32881
zcmZUaXFQj0*v1hdM5N4&>`fZV?>MseEM@P#x1>S4h)Sisho%N??X97m_Mdi2d#Lo>
z&&%iYc;)r)zOL&$#`g^L=_M&9CL<&E|Na!kRK&6}EwgeGlQOLn;|k-fZQPv|#lrvZ
zU%l;rqL-I9pe3_}pIsvPxT6uP(%Ug}l`4X@<d|m}hV6c#n0!-`2Zm_zr0roiwT-2(
z<{Z@ioyajE(Tt2RM`GQ7*r%;ZwX#7x_Rfq(W#D9g6-u1i4?hcEZdxdZM%KY;+auhT
zZ@_{_7m%i4jbHm-<4%MT(`)8pd~t7T7$&g0Mv8?R{W&v4g5{n%w0}L0LvK#z-m4L`
zt|(>3ODRUS{}U~FaTp_=BB?8@K*OB<xY9oubFRr@?@e=@%e{xco`K9C5=8&QA4PIO
zp$wWDgYJ|LoM~*vo~d1^+tP`5-QPt6jO<|l;2#X<Dspeb9(XPv!D}=BihOqW;kVRc
zCN8c+!n1q^4RGMmEwWfOD4dhtNU$_xAF^{YXdds0Zz46?o%{ep?MR9}wPa|I2QRb^
z=Vr|g^#Ah`TMmtaqUu;an~}kq%t%UqO~Fi+0erQ!5Sb4*;LMgfOd2<g=bV?|`8;{5
z{O!lM&`>TbOF~`bWb}`(gR5L5_YA*)y~VjKJwK8={%LT(uPldrdJf%Hs(knM37iWQ
zsZpUx&)!bBCszj@`7D0CI0_GMO7M(L1t+=L^3=V)%o(DCudd!SUXcm&L(lMG`z7cf
z`htk%vaGzFL;LAkR52~#$AC~ydovr&7EZK|OG4n!Mno*N;>rz{y#FbNrnj#^u|t8)
zFPu1|Ng2Ntllk?#FMj>$%TvBTaU^y-#wlvjHad%?FK*%HlO_14(G1lWe?(quojGUd
z9$b@~glCTP@jhV;x7h^qo@p9)&Kb_{;x{qz$rI#tS<#BwTzc1xZ?`w$!N1-3)Y%uR
zeGI6%Y8f8Q)j+6n0o!g@V%+ZE5Fchu_p|*tIVl;Z8hkOiEu9k_b@}o}J};!VAg#xO
z8Lb^iYLlehu?6TkW5*>f77QOP!{T}E2$)%p*2~d!^NNF_o(mtRUBT}0`!WBx7EgPu
zNB2Nuj`G%H-K$vScIxw!i96kgh-h5+8)M>KFzHJdGF^t#Y~l?Bt`l*ZQ7kvxt%k9S
zIHhu(Idb#>Zm3aX;WtfQzVCzF!U3G88AEC344$4cj*q?!;@6+v?Du0Q8V0zaAY~NZ
zd@bhnsPTNUuMqFD!Q9np%=%i!*=70su9i+Q-*@<@(u&bGxs2Zv%<8Q*SoEcUbql=M
z`%nO%<j+9<AR9`ozKH%qvUs}cD=hYda@m78uicp@L06!@FOYd_(h&XKkgFw%AUpQC
z$VSHvf2`AZcR>J^Ru^z(MmCqsl}E5jF?vP4N6+<gB!|Q^JS_|k-Hphtd?(`Cjo1~R
z$Ljr=Y#SfTjr)^%)+?KRn>(PmYcyXyFX7U5J7z~tVXRpICtMBXVX@0tvC4<eO?~-3
z<pZSDmLkJ{HCB5>K;>5q&tHpSN$4Lmj#z|SE@|A77Q*%_Wj5*VMnX~nyCXoU&F)Zq
zGnr=~d~f|z=-v>_g|}CuX7x8Lnv_W&byZ%g%*O^^hnQyv{O^2$;uuXnk1nNHRDUMu
zl;iC5{(S4}$<!g9Jg{92b?&cVAvqo=MjgknxMA!sn}ZvAqnKHl#g&Uy_|vHfTG}nJ
zO*WyG|2RC+@ne0H4%{oHu&PlVW9ENEzvGb<4-&(4mpr;1IE+`X%&GE7nF>|5*z>Z0
zS`EIOVmgV#ihMY#xC~#XWOHuu00unwVTwWoHnv^Cy)9iLSE;jDcfp7YdLD}67TNLZ
zLSJ6GuEi_+3UJ_p2mHp}K=&*sZkjZi0Ubf?S}MVh=d3y2_Lpe5s~z6ideFUkF)r?O
zXSM%CjBMKq$I*5$X!PferxH9Zl|ti(<M`|G7`{sHM}PfCct7$JCO4{aaH#N}4JyN(
z3%PvrR*V}fhGKurYAjdE<;kJ>{5$lu$TK;Xd+zx#JuZZ|`sA{u_h!tnvu1ULGwVZc
zBQ4dIH=FNc*0B&SwP=TZ@MRdxHR5yyYj%`xf%kPaip~ae*6nWW9B9ks-``<DuoWeK
zX5*U4Qta8W2K_Zkc=pbHsQUNg3acI1XE2#j$>mgd9tZLFzPLNu2hvdrTyy+5F8`^)
zg_h&^G(eAo-)zQZaYMcu@)VLT+c6+cn|0f=C_QHu4s2Cn-Ov_<tSh7F(o0dsJOdai
zOrgBiG93PHK*`m;D0#ROxl^8^y<j|htrouf(_Yy8W+9esxDA{L;^h1SScH{wt;Zv%
zCcVd&^b{@$^5w^fJid?*;ef^#NS0+|^y}YPt&q*(wP*2nfHt-!i1;z1l=Ite!BxWz
z!FIi%a?FUDccd62zYU+;44I=Q-1qJ@VnZswPVa%`8%Nkzn&OH@DMz(BbNNtDj=k&%
z?Ni0<c|M7nTMF6fY>DFNQ+PeU95&laInV1j*1l|nw#c84bhn_7_YO>0`38<wvOJ-f
z1cQm$Tz+>a)>e#S<L3efRQS-bONXuTIh^gU&Y=HxLf?N96`vT<cIFq+pk3`anpcjQ
zR|^;&{~Ob!wcy^^oBL0M(k;!LwVD?EcV9Tetq#l@`WkPg&%vd^2#P^r=(y|54~bQ9
z(a_<@S590oR*{d)XCQuBA>WGDAZJGg507`n66fo9*5b?g(~co6A)k5w#be*@k;2>4
zi1p$6jHxVU-!}~~HL>N~syq11=dcY+;GQvIP_7%uo~=bZH{&l<9;ONJ`$MP?NaF{!
zFQT8{f8d4kTcm%sV)=s+G>%Q-qoY5tZ<rjnuZZWzny2tOlf^%Fd5H9Vj&mDjIj+i`
z4`;`4|D12I8}EoEe?0iP)Rc9{Mstov4eF$#Snpj*@#Ma&ULc2K88O^>Iv?NsO!**Q
zc<)Cx;ZTV+;|5tkJAE+!9L|7Qr8unyYV(X*9u*U|L0>$aH{$+b`k`I$zGRGED!)W?
zK2PM-k^jNUYyi8utte#`PUXLOw71UUEAM!mRr-tMu*FcBSc`EPrmQ?Si4XiEV5}3y
zkl!n?^lv9@ccyc<{~N4{9LQr6a`4YAhsjAj*c6bCqx~(I_NxGYG`*1LGLWhJ6FD;G
zCO*ILVWpsXjz5d!lFBVu+E<L$%Uf}GpEtZ_D$xFECM{aC7*MaqXk~4F{cslFpC)k3
z^8@e~%RwJC9d7dv;R9tIv_%zh?}rh*@uL9yTi@X4<UAVIL|~HIBZwB*Gc-p7>SlqA
zy64F-wNO6Mo{oU7GOj3F0%e(L)Ra-clQcuT^%&1tgEZ*a+@Cv_Y{bFVW0*4g2`=Ay
zFFG?riNBL{IJHrcGfvs?;KgEg_cr5ty$dKcIfS^<Ntl>87xNbWf?oAEG*!4WzWE3?
z>1FUzS0*xNDRBCUBDS06(pz^0PK4=j^=WnC4c&uPrZ-{r%#!sVO0nKapHU(1yz#05
zU*4s1E2a6&(1p8aOy!Z#HfXO4;=&D2;bGK+#I9~^?oyyb(nTbS4dU*H%A7bSly*Zu
z;90gh3u?|{tK?7=st)6w8CuN0wi`cHuRzJcl<i@i5Z8?2l-hnA{9zP-_aQU;bYkbk
zQG9wQfvK9Rw30L62s>e34Nc@flYRL2ay8yYyU}%z9a75_xaYxMR1FX1s={xgxQa16
zIG}<llMFd>y%g_PPGytyVW_!D@TBS<JQ~-H{754{e6GoYd#`b$<eJFYZvgEs^r2#d
z7Rxe|=xbBX_^zp_^OfXg-NUFVQirB~I;>5LShziq>aToQViCaX@4%OZ187}1fp68;
z;o1FC2J|xF`*(4)_HD$XP40}?Vt{v>q<L?q6{Ck*Q>r72O&2C}_S^}SXo}>>g^3*R
z(1Yb)1E9U|Dz+{81+CueaMaO<OSg@rWPu)c<QQ}5juP5_b6~cE1>|leWBWW08vf8E
zXLvDc*f-JfyXqKV9?q>@VVrfkFM4Z?;u=d|B$}qu&)$la8Ivg5BZ07z528oMVwgWQ
zms@<oxcG21C2m+CqcVeGo&B*#!hlLqG0^I2#`6X}?yo<FNv)qS`?EN*&)gIBpEZiU
z)kiQzVg?|76vMMzVdr3tQNo!WWaz`?wpyGTwiZvW*b4JZnJ6fL!&yVVJT`&%1Eiog
z?w?3{l{zI_;~8u=4;g8uoZb*e+3Qxc3m8nxk}33h62S$su}HGh;NfjPT(O{lVynN2
zc8(6@k+jFi{@y9t|HOdMpO435wHPin2QkZ{Ad-~i8HJmW(;Lef7{H3v$qYR_nD16!
z!GY9duKTM>0~13I?r4GR&Av4F`W~@&B)D#^7gj&(L%E~o*i-!ieyhfE&*lQCuAhOy
zolnqje;dC3(BX$=T3oB*!zGxBM=27VKgySukr}L>)0=a~Er+9o88?TH;rr%tG+eC1
zlm*U|{wc+$N}8~`zY-tkk7mgG+ZeewiV-mx%+=Co`A8|A_rHpL*QDuNWkByuX{d{M
z4LMzF%pW?APM>C>VVmGjem=!h`4yOw=gI5ab!eF$%Pl!W*<EaoqqE0zX2Mt09NrI&
z`4MyqPosmn2BOAEFmZS`&6hbcbw&ZL-`qm~$ICHnM;1};N|_fEsM@>-|K(O<z)W%4
zjFU(BNGJZBpTOW)6BfSg&H9c2Zn{{50Xmb}u<xhHrhXu*Yu~``OA0MEYfxd7i1}-S
zQTuin=_ASX;ZFQ$A4N5L4_-6v#U<mUcw}=ehP0o9Y_CZ65zdZcX9&;DJdW#iuMk?V
z$l^*>B=?o)l@uxN>HQb>ufJfUb28_sB(hLch{_orteHF#?}CQo%0_D@o#@Lot$Xn`
z>!qmV$wllG4Q2TtP1u*H!^cyL&lAg#|J)eIzaGbyi+AzRQv}}^6L7ynmF@*{sFeE%
z<1JP2y*3%oHaZdNW6^g(7NY7yIOehejU`K{aiASt30v{as~1=3EAjFA6z*QoE*dqi
z7d5xK^P|;jq)pt4!n5v7)cYd(t!>P^Gu+`N@5^4d&9HBF6m>3(qyD-rTCMtW#jrG*
zZn4IvWxHXKUPKq~*GTJjqK&HzOyn-%;)0FXbSIw)jiVT6UWTe8>dY1VafbW}7~Nft
z%_bMIeqah0EsckYb|D&*??C2zUpjf1@qD8aGn=0xxbHmF*uO!^)MH4kN~2nW1dN;K
zqxKCEZvGm_&!o~$BAa{V2SV(GEIVDtGP~ZDN%vEkC@aU#%zWh4$8fXkRit}*@$uY8
zcvu65R_3$RFpIJ$)S!5I7S=32h^u~;_<p;TZx2gQ<J%N8XYYnd`U<?9HJa^iL*O+;
z6(0`@8t<kP2VD1KM!|e^%bK!#T^w^3YV&l*TkKC$rptw1bnEos(z%8BebSew=Z~dz
zule|*l+LV?<>b9)SdA9+x!{@V^*wNEzlh7V3|T)poOjpQ;@H~&%suQ*L$@IeY*j<P
z^m0fY?2k;Pe8iPEqG3xqR|x*?x8RZFohQ;D&Yg>&>!NFgH)ne}Qd=>FD}Hy0F1m%V
z<XINWytHuiy*d@$in!@t3Tk6*sdv+v=2dz$W;u@)r_<$8F4I2rV4HA{tX!qhnz;g5
z+OJ@5@gI)53tHl}CzVbL_fRsHAFriBIz^n}!GqY?X-4B{4@y5ch?Z@)5cVaNPm7*G
zJ~9ry!(w=Aej@8vJcLW>d#E*K<EUr^CQWhV)iH-KG&hwu4hG@*qR&`wtHiX6UJTup
z!`-QIv?%xB^0WJ}a#=KFJ&mDkt->dU6ZxO>E4(|{n^T1BBkqkQ)2$<!X(Wwt-4(E}
zNFjb0F~3`eo@@0fo7IgAnbY7~B!i@sFeY34M&Da!;i>QdBeFL@MdKl|x4EF`K|D{M
z8O@l_8=<aw2`A2aFi`#oF4dSa5rerwV*xZBgfsbj03~1MP)YkbMg+GbQ$L5I7DpuS
zQl)DBWS;uD2}&}>yxCL4zUNCg<dFn38%r4RpaI>FE@RGN2d2Exqxv^LhCeK2(u#SQ
z|I$P7##UV4?8mn=J*l_mDE75SBY1Qgw}lEGs5_hr`gcVugn7F2u@XlX>TupIC4QP{
z$)T_R;OIMJ?iezWt?o0CyH1WXzMVmvL;-uf$l+E=S-Rf8jCS$)C@PERS>fF-yfTq#
zH36&`RKUF}leyV{7ACt1+E(=-{(CFTU#%}#l-ZvhrZdpBQ3D@khw#v2GtO01r`FAA
zj?y}Y@j?#ux_B`<!whMp@(%u+!Wh(3z{w7URNkvZ-|a!X_2-tTs&+BHwdyiGS)QHa
zqtVsA6RV#H-*jh-=%unc``bmZFmg1<CU3&vg?f1I;g4h1N_-n1%h)5$=wGu8-V0Y?
z%T)>9z0fJzTdjkw>67{L+%Bm0{esHc4yYIyW7eTAX#KHd&y3TsDwO4v1qyuK(jjv3
zNu+P@d|G<NQF2fVwwDJmz(5uU);~qJ-8Y!6xCW((PJCIj8%O_0&{XF>#_Ssml>^`L
zwU;kO1_o1XasVGk7xKF33Vv6ra7dLd$BI|*MOrBbWUNEcEJr+U9E^%ndypoU!^#Uo
z8UA}RcMMBlr?Mi~j+=$X`9-YSl}rQ402Ynhh-KXw9J*luuRe(4MaNN`s^rJ7FNdI8
zp$<b^Lx?;7;h4G=j+vD(S4W!eAt{ucFdDT5!#HQw6|6e38;8~2!LR-}JZl4a<?;)x
zC{JbdIRmcz^%>Qd1noWD2wp4hVO5I`YO}w>Fi)9z?<LvwU?*<ON@k8-AX9F>fMdof
z>}VQKmD$C#7v6Of`4719Rf==$ui<7?0oTj)KrAJf*+XUNrzc56^Hs>%eH|wk4&lb-
zu6(tl4}a}0<_g_%G%pl##+)J^>GuO6jRnkdQsh~U9OxJ<!@$uF3=BMo^lTwB=v~h9
z%j}u<Sd39$4QP9IDKzvPF~PY<G{bZ}=%vSm&$^t^(u}yF4@7-Mf$Z{K0;iBq_;$$*
zPo6GA$r5L#-Bv=|>RPyJrL#MDBaZnUK(J3cZ0j94dqo9&CaChz#F13Umu8Tg7Khfx
zVdfQg2HzUa^<`r0=VAqPi`XL`NlmjZ{Id3;*Kd0Uh&7_5Oojtwr*iAHzWm@l4rk^_
zGR(q{K`yru+p`H>@3WYb?#B(*zmU9Kh00wLLbhhi8e=6|9b1k$6AkHW(vNn919@e{
zC79ToP;>ZrxFy^0kED<-G>9|XEC%ld&zT_1yd(o7<h-7QZ@UZmM>dg@l!GX?Hl9YA
zPAqW_<b>4`ykGVj9}1r$Oeuv?d*rwvpbD$3$D(#{ApLDJnfN1_;}dk)78lRvqxM*y
z9|upLZ=#Ow0&bWj%WZ|5(EQtmV;7&r$c{N!8R5v#1Vh?QFTvi8(;+V5L#w((HeS`B
z@A36Wvhbl?sunXISi`}7GQLeRrki*)ZJ)Q{Xum*)X)VUI(PeC~iKWw$!CdL)%3U+9
zIkfL$#N3sq#c>l#*nYsKJrfut<o1?6dZ^Rdg~G?mD0Ix^{fCVx$eD;Eg0Gvnv>bb7
zHe#WIFLylk$1Q&^jvJoHs~(DsDtF_x`Z`>?oyMd=<-8kgz-f*pod5NsXk{-k7MUB+
zrr8XmPx$bz<XN0g-3X7yC{#r|b4kDPn02-j>g%0(<8&(Dr<*YD^i52^)eNWj5H>U!
z(DYFn=X@B>j^)X8zT*a??b&>PVLMjNd4Q|xf-c+Kj`_lQtob*A@(s1nn`g~?*Ss**
zR+AYOIUJoD%USJtf)^btWK_L5Rmcce$Q;6s${g;?^QO|P3N$`Q=gFUf5j-s~<ZVx}
z;N5c^z1NR<@AjZk+yt`QwqteV2V9z##7FT7yt}drF+!IjUHA-hB3D8??F|N2Ud7s*
zV>#~Mc7(KRv!-MvdYx{;1D!NxPv40*=VZ8Q#Uv`+7{>QjoA7<kY@E+*L9tmIUPwD}
z-H!Ldy~*Upsq3-ja|7C5I`Wq&g&iqnJn>PJ8)vBU%gw{s*(ZonVRe}3{1och7Fg48
zU34|km;2@^(;zsNI{mXbwKIrgi^wO=cOgA6m~yv$c=Y5j?kMQP($_j1QQntxc1*%t
zCv#r?;tjKQU&fWp$8fo;D7>}@rt5{=d(U)yzvIhWWj;c75zDbMCY+-^hELqnSP`wy
z&c@Ny)h|P0@nbmYtI_kECgYY}$CIXR(R9H(eAq$O=4VmtvlSvry;wV35tsEOk+pCh
zWa|DPrQL!*4+wW)Zz8i_ma<p75+&!S^3tMRuorZ|<28R#b!`BDHKrj=)dTj&ocTuG
zijM^^TNiT>jYBMXIsY^^dirqP8d-icP39}pAWoOaVP1+4dsp`7I<q3q-?114CyOcj
zU?RQVO{7u%7NkUN#6MS6iZ8Ha_z7R0a0gGU%;zV&=eT+M7Cg=E_`uzetqs*!ePsaG
z7suoJ<onR`ZbaR+Tu$9!h3V?^u=ny?(P}$$R!TVH<{TT&ahr%YN`mf;5oV^ZCJ#+7
zWl!xC*4xU^!Thi2_>)#RMqGmRv@ZP28_uVBzKjtv4^Jx#zHK>*G0$Z<s9B3U9&bih
zz7+fV_P|<B5*m{tIKe`a_C@yi*pxwctc8h!B?o>kWs{g9w^W+5Z0l$)S(L!ro9|=K
z_hlGmEY8syiTp7}oA;K-vi)FRZdhhV^EC>5D*V<mPx@eqzXB}RXz=PmZ@zjI$tx=q
z`QWrGb(BZ*wyqoQe&~<t;9O+I^yL(x&pGt*Hlikdg-qf|p+7XC;)xR4A1P%2#hGx_
zl;lPs*QuGF%a1~K`)SEx#IhDH+6gSUd`s|3C-77=nity&SgqTOY1gOWz=$mJ+7(Rm
zIRf)#laY614;FNW3;tvc);g-f(BlPy20L-Jb~uv0C{XF_b;wA1F+DYl*LU||xZgU|
zJ9}Vd)FMPKv1H77L+*AA=3c{pux|IKYMBWPKbiAJ*AzAmQ-aRh4e+;&U}*b4yxp-1
zO8VifUM#~R@BS3KeGdVpN!&AMEY}~@VbgqFUUCoM$~9wnrTsV}9*OhfY!^CfZ^cR3
zNjzI#j<#R!{FaoB$3JXn_9>pH9uK6R<qxR-9m^^8x%eY4czSsoSUy^bqwNYTu|5Ls
zG3EY5C5FGxVE6e(jD7tWJH;aSGwlHucu(M|;Pd!$NuEyoI&gBfH(RuaGtOGbXLO>N
zmmtLhqaR>#A3470isks{lX>8oH=j*0VDB^IDPMaKXJ)KJc<oKBA2kalL)~b*Ta6Bv
zFJh*u6kpXyv*%4NBW4uyetkU5W(}wP3kQDR;y`gB16x&W&L3~g`1D30pU2DK>$3BB
zKDr#aH>V>!&zSorgwWqQ8TOwe_&~@>s|RkzKZ`-k{=60{M+S4K>|Tg(K8g5k6&SqB
z6%O?&+*9!f2mUG0@4tH3EJ|d&pqrb0ccaxf6}DqNkU64|KZb?izc;@?^=GJwaiG;M
zQ=ZgMLCAaIEUn!q@_(<vU?FSC33`P&fpIiCoWnT_)M+~57C!GypkH<rYrpH$eozN$
zclTpmXA+was8Rm$E9_o2Ovr}TVATC|y4Lie!x1Si_ZdZ_!P8+a{Qv6k9aMMtQ7%4(
z;k`m>CX>v%4pXc)Ifn)}Z64cT4~4r{{L_+&P9Jx+p3LOcC!YA)ApDNo+&Lz7I=)($
zQdf7b$n*FT{CT|r-gPqk5~a<sko%C`B!>Ske}nbZEQYpaa6zai2a9WQ-53*&P<6o|
z7iVV8*e@!%atdBD?f9LOz{~Xtw5iw+`KUp>cFG7LRgdBQ+=N^$!?kt2SlATGA&tq%
ztX&DWVWn*J)92#*Z?H=%fHx+GFk#PUh^$Xxo!(De5%S^KG6^bqjG^sfE2LNLf~j-{
z6Zfd`;p4%)FEf%(W1eHfxMsANTTt2iK5kt(i&8}meiJ-aK~pa-wVH!QpKrLeY&A|D
z^W*AQ(_v?o%?}5i7~7)@zZJ>s{p^+~aHgP#b>~1^+zykb51{|h{phMr;Lgsmcqa6z
zcSrb9BP^d`j~`;$ixo(|JsGZxa~Kk^3)5}Bz;4-R)Jz)0KAVgo)00KfiBPEhE9XeJ
zi@4}|8gJKp!c=t&?$P-I?@#+g(Zvb;o->txK0#F4Rl(*0V;&vQi@Ns(UwV509yJ!?
zXT>wTmkncdRU>u{kfZB-A@})Iik<(-(5Gr5<(bBqm{eZ%|Bdc3eK~V)Ajg#7KwHiU
zl(gs2?P!1M44;cjUIkR^r_Zw|0=d{q6_U<_*zPlyTmOjByR-%ln%PWs@aA1LOCGsx
z!o|)CJQWf^XX#GV@2W#T9}yR|8B=d7*lX4w^tJel+5dYVyG{8xBo<wZ)Ue-U6qoHv
z!RII$9*h#Y^muh9Z;arUdUdAf4`%!UaX!AT!O5vM)Gv=_kemq)NF2ubw!ZLPb`&jd
zB3Rc)7u)J=q4g`AI@_1wzia7SWoXT(%0`rnOJ`W|ACa?kG#}>K^0@hAL>HN{=FkJI
zuRVv_d1Bl?c00n47t(f30h=SGII2bk4oxzIFbCCdMv@0rXdQ3PV@(zuF=8e9yDi0V
zNed*~$_ek8887UTq0--c_8RdXlg`h@!tHw0s(OZhpZ=oVx}3&yWm)t(o3fuZIO+R2
zBpi!pS6eM|)%(%a#eg9bTq&9+hnwq__&Mq+wm*(!tGERp_c$_P*ckrnd<TQ&U6}MY
ziy7-&uwiBzu6<GA?<YsFZN3U^bSKbWLxL~Qs^MDM7VMo`zz+!>xY4i^>2B@}@Ym#w
zL4v01Q$URu0j!^u&-o`kc>nBOTz-EIGK;<WddwFrTdB)8Hqwxis)vTip8uXpbFIb^
zY+Wjx_uru`3iyPp_OftJDWSqfZ`uvXNB^T~7%TL4)Bls_^~W`cz8*xmZSOJSqM#wx
zZG_Jj7xdhyK<Y$E?z)nX_Pb>~c151MY>p#n$#AOY%3@0AI!wy2VPl>Mi{^Xrx33z7
zIt^>4-Vq)39Kuh-y!fE03zlVi_@1)}9nKoU+d7c59Su2l)L<^Dm*T^F0ZbKoZ<z(T
z9J{HMZmli2a{W9`boAhCk`=eC7x8ICZ;s9{hr4_PoI=Z)>f4UG2jU!ZW+9XVx1lM-
zfKvy#Fw^oZkar&Dr{nl9qaJIrC&2SuH-5KI;I5DQ6g#BN#N=N5lp4l?f_5BrKZf^S
znKHUsjYDDv@yM11xTlvu(~b}>IGO}^ab@JKeT}tJ0^4A-6?(Rl_@Z?O=BxIGQ9n2S
zep`oL>A%srq7D9`60FoR#p2`g`25*Jm<tm8*Hr`a-Ey3}QwN%7O&L7gpN11BvF$wI
zAT7ZxeSg;8C}4@G1QoBHI5EMMk}t-vz0X7>Y>(xr-#ZcXZV?>b7;<1vCVltM#o!GA
z-2UYs25h%t?x`F)3$w1bmm?->K0t$l5jQTd!mfV0%uL&Y`_taQ>y8~u%52e5KMNO2
zU1+#Y4E1XTUvR@1TKD>}PlGXC5{5A}y%y;=wFQ6ZOgHa5?v{#0{~z1Y)?zJWqJn>z
zS`ObvD>VEQVdst{m`Cr$`J4(Yt1`ofxvCuhD~J)Jg!_0@ofr1YGyJ3%Z4M`6VW<j~
z5=(ezODtCwoe(V^CwMd;B^s%<BKTtkl2%TF*!d(nM&3bHtTgv65V)PWbKreSji&-b
zct$iIwoPWdH%{oNQvx~gt?(Ws8Zk0Sl^N25A+yJUceak@-i-4Qb50qhZ<fs;yWZkU
zqanrj-A13DP^Nv(<^9O<^o+R%`wubP*W^iq8v`kE+Y{3s9l^@x4$=AiFlq^&u;imJ
z1DmZlI?I^)?M2jCwHqTGufuP*Hk49-LP}DVk7~>rQjmv{)5SPqAR!et9Ob{%sU-79
z6n@Bp6|?o>9Z-ZKuMbdO(F+4-rPK687MJ^N7dRs$VNM0`@;xh>3>M?y3MCBdo5;2|
z7I6Ce6^)wx`Q&gb#tpfSig{M3o~=riXfc|N&KEMLOGrPH!~qq`{4=i+N$!s^%tMM&
z7vwlOu^X4yT65>89Ol2(Vq(W|Y75%ON#O8K>~~?Dwii45P2hrdJJEx0qZlaNn<Gwq
z!LRkr!dwnxb!8bX-#Oswavuf^i=cmh1MVNJ$XWZ+SuYfiyX9VCpqwPDEo@j-X~au=
zr78Kqj<%`hYzYsB&KF~5C57<YeocD6`G?I0$^5X!0<-h?!(M$qoKEC2rMNdvhpTZ?
zbst<jC&dQ^qu4y|1ycNkJ1z7USGKRi`d=ozf7G9E#>jKUntX1%zYu5hT=_Y04D|}U
z_`+PCHo2zsH4ewAP%k)XN^w{1FPxT6rs!%s4_uAn%|YAHtmp&%zu!e40vrUMNgDx~
z-J&1r+adN#ADT-}L2cp!D8FBeGqzJ0c23YT$-->6Riwqt9*mT4!m+#4VXxzdx{+~A
z`*{tw?ulXYzEZAS)t74D%Hj7j97f>|km=CDm(DC6X^*AzvjJ4-9*TuY*YKsPTXZDR
zj*~Y(hQjMlhzvRp3*p@TJRZo;@rE?n@el83^k!P60>^u2z(KE+=Z77^!(HLL?zkMw
zJ}B^Au08Ef33+^i3(fbr<88hMH-^5%xZz_sVq7`JQuV2m+K2X6>QVlE7=LLtAg0xc
z(Vcs7?M*Ddu1Th4-8cr!QR4cK9{kX$k6hs#YCpaXjUx_J5x9Xg&HI>nqX8TJ!?-?u
zCG-rxVt#@z?+LuZa9?9|HFSyIDC|SK<y0zWWn<~FAimz1%JPmSnE&x4BK-4kCP$w^
zSAOA4UnMRLa_3rcXXgL9i^VesFh<FYZD-|qe2^a38y|svb}A*q{y_G{Z!E7>g<5+t
zT9zu&KkyTl-EP3v@&3&9`-`Sc!<jCy1=C~%b})7a?7n?KsF5^_G}mFEWE(!XDDdig
zC+=H6ktZsXdAd=9tsb(xDKh2iAJRN%{tp3S!+6`*P3Yb2p>w=1zZKs=k>IP&cgy0G
zbreq(%JIR#4bc8`469tts9sWvBUAdZ>vkWe@3-Zp|0wA4x$x}k%9JfBtm_^qFkHSI
zu>CB0J{$0$f;qS5>_v}$FcY^*;Bv(&?2@mB{Kv&uP&x<Yv7L^?GW4i2za5LBq!Ij2
zk=OM*F>H?`7rB@rr`I)1J7Gva|Iw^YQRG4?BgQ=}5_rIKIA$8lTd#5`8)=S}e+Sdy
zY7jpa|3Q25IEKgeV{NZwYF<&|v-Q>Z?^8C%9<k=&3k7sjH{<Ku{`AoaX8RspzPEpe
z&0q8xc_9h4!&E32cnu28s@&ob0Fgup`*in*vbQUpn#VKVvoC6miul+f2FaEmU|j6S
zkIt?5rc}f>t)tjKvm4$1Rp{Ta4^!gGXlRi@%{Eom_8!PPM_iCsmcmb^zfpFm5tH15
z&^<hhy*0e(wpr+4?S$TF-5ktp15@T|F;do+(;vRZ0-M>0tV`zcbwk*bz8_!A2lDLZ
zE(}F4DvS-{7MV~e%oKQErEn&Vl;eb1d$8V1ml2ke@b6VDA-f!w)3s^RERL9??o3dM
z;r)mt`khecJo|C1`#p?tK{0%CX^b$F2jb{-HO@NGg0j4wh}a&_ogTm86Euj4W+D8y
zE|T)Eefj+II~<#8ir5MjAuF@snP-CUzAy>-@*Z?kok*vFV>$2CKe&i%bB*>acvgEd
z+BXuvic@&mavjug9AAHGprzq14BqwP$|X&(NlWBC<*~eU+W|=(f8ka+lp7XK;>Tmp
z@GWLMHy6nA>-BR;ZVO?`<KCFmCyyWFD|j-X9sL*BarM@6R$1B7`mqDouNlTBr{kz`
zGNJRL&6p`@rxnK!Af(2C&!mMeD%G0-A%ce=7Rb|o?qO5Q5;Uv2a!zp#ZpIBq$y*WC
zPgTM_q8ajqcX9H$3bjAofbNkN6eYfb_q$l?eX?Lw&`uounFr|&efeUx9S=q*GFi|@
zE5u6J{>F-N7{;UJV;K4NoRA?6qOL_AC%@Z`U;|aY)oMmblM}pL0~kL$ng3Lm!~M$$
zHVXW1zp7D8crE0aTk`p~G?5GJtTF4vDFjU#!~56Q2!33L4gtr|p=ZDbUk`e0Hs`xJ
z(cGK!3)(|{S)5=)<;!iD^<^rq#}u&qcPkpwVyQhko?ATo@JN##4*aJ|jS0K4Y`Y<~
zex3v^5~=2=4|&svs0=mayf5m!G&`4L*55<rXICs77S2UCgt_)5l>Nk(!|de($S0jZ
zXzBoV4z}Q?^{eqjB*k$Z-(h23#6xpVLUm#}Gagk#XTTB686S(0asoH@Uj<qYyRfk3
z5`ykeqMh*WrIrz`M|627Po330vUnjgn48_oxS_&|m&YqIGyW|cCRp%!=o-9Hp30Xl
zg}kxBgANxD!8_-*$l7ZarkyHbU`{_Y_WTmX+V<wW0XDoT^;<M6LjhB!$Wf~2A{y>Q
z^TuWiw$9T;iIqB19)@%C&X4dty$1_jP57+gD%@55x#Ei%V^kV&aJLhb8-rM6kdIqt
zAMyO9274thL7;mOqpD-^(aMag#`UAF=SaRDD(ncf^ke3cU|#5$hI@hrJ3dK)`*L#m
zT;&Zkj?1y+U?aS?Jwr>m1;;=hQY({~p&P?3LRFoln8<?Zt@tW%96$7xDQ20E*K^On
zYwcN7r9Q^?q3Y-vn~Tydz4&jFFoVv#7d1ByrpBW{ZZyv2ujPfz^U6U^$uUe^vjsuW
zV|%_CZR~Z}d@F=r@&1&4x)K(Rv5bq4p?E)SO34^tQvL$CG@5gCu@xm>P2h(xZ^{YX
z$)o<#^iWRW++QalR!WNH?Lq2$1s>e&L-o%-R9@DBUC(=S!Q4gIHq@OvPsCC2hAsD*
z{uD8K3=U6-;_}Tukx>%G(u+cu-Es!i%ho_Or8k;(jHK?5AXXjxjI7SjqMVnL;hQVX
z^da6nE7^)47V{zNmIZhHT$-ym@#so{?|xAO=LjEs`mV#>&$l65;OR#l_zHhnOUS?1
z#o5{b>_63v`jA0f6toX^5mJ2ZA;vRfiWxfM2;5XAa<5uAisJ>X|K6O%OR913geNzg
z-Gp^#GI&3{lz(ojP)5&)+m!R^Eik^JpNC>ghAfTEv{3XUg)vUAArmXZ-{CXScgaeu
zTByl1om7Em-Hbow1NmZT5%<{_aqO3?c(<yE%BSr3UU&=g&dc*j_AsP9yN8HDmK?Tr
z46{PtisVg`_##RYI}d~~du}<6giK*;(FaTtb~2ovwPBD{CY)sAu=b7z>5{~8vdWxi
z_D$q%_)XNK{sv-G{W;<2EL<ox=b(Mtgsdi$`j;PJ+oKL7yc`N+T{Y-jSLSO&EzVZ<
zBdv9D`|%(y7$eR;`D0k~awh^LEZMndID>0^`K{wN{u8sN*ZNPG?rq8?*UPZ=ofDdN
z+amsK6~@RMh2Ao4{uuHRTfS8xf0`omr(Z+df-s&-bj4)dt57nT359{0tkf95x(aC;
zHkI<(frHR!{v-4`0w;Vlm(r3cj9%N95d|&qFt=td;;3dV#~|q-?wKffksF&4viLQO
zBYPuwnlalRB=A7p2MibH;=&?ZKJAugzoRK^Yggy=s`H|+Bh2x1vl8Si`%=8xpWiP0
zg;&R9n(Y0IO~dq9bwyyx-}-Xw4;h}hRY-?_i;%r&6t}Glr{jxiG*|Y4Wrihn!!AK_
zMm=sh8Blv;FdF-pP`pr#N%Ch<Q&mLUWx}i*J&0!FJJFU@%;+Ciux3;qmkB-YuOWUM
zuM^LzJ+W|DRV3^RNnk=@H03jMY1wuP-&btH1Or|AFY*)gxe-I3y@&7p+4vop!w(rn
zm^k^1=yTv<1ZxUf#LkgF_kkwK<59On@H8_wL85#weyw!EmS97k_qdIlYKri%s)CRD
zd`vAngq-*aXz%`o5#biX-hc;W&iuocq<h$@vmHx5KF8&02e3e-$lg~I(f7q*rh1pq
zGdP=;;YZLi(1PZHU7~j(*&IEgOBC@=ALggb@IX@meFa8thr%=bTxZNb<D~hZS&n6j
z7ZKAr6SmtuamK-m?KSJ*^5+%a$a-<I%zqfS?GE}V&%g-7VF))Ba<eBNaG@@YJNl^b
zOK(M*o-)L~^uF}0c>)cwWmq`$4KBY{<g@D|ky)w1*+1f_ou|vVgi@|Q>Pbm852W~>
zK<@AV5H2h5y^%5;=$_8=)6{wB_$U}JyacmL!%*7Pg3__R%nI+rU2ooq^p2SddG|10
z{c8v*xzW%#Q-wgkV)`fE!lKDXVCGbexgnvH>ii7_fv0nxX~?T_R@|r`&D1`sRJyFm
zUImg|S)B|yA**WZEMrMnG1k7D4WAjGMB;1u^QD_EvJ@A>(5Hx%3E)c|UxsAraa*Mb
zg*$g4<)R<XO%`V03~LrWzKOh(6ZrB_Hje2!;hIMU3mU93?0z0yyDaE>tS`2Tin%}g
z5iDLFMbW3R*uKsfax3)deMt=_eZaV#&tNa`@S29tq26nRkdw;MM3`xNhMmI0JJD!!
z)ZpOxRk(j{vB1v%5=}pQ3nPRKyj{?mw@u^uaK14o?#<?~Z65U5)(45RdjxIq4dOM+
z@l=?%{|av7(2iY5*HPfbN7HanF@qjGVN4R3ppyX~p)0at#x7~vWr(me$dz5siy^u3
zI8tN|L$qB53W_THvssI|-+K$4TryW&R^;>j-(lQ;6L4fP*6IGn$AXo(-82isGsL*6
z=MCy~=3|D9u-~GR!}+tyard+fUr6mk>E)le_(Wj!)wH>8!#mM-?NWg+k!K$^z;oM5
zJUURwS+kTF{5^niU!piZ?f}B(s?#cW77D|&=&88|L&rz*xR7sN(TL>o{bg`?Db40n
zzVxz|W#3>K-YONC^|oZ3-gyNRH+oX+xEG6umr{Ch7pg`Yapm5vSk=~rrb}BfK_-^V
zefu&~!HH(YLEQGhkhbY+RGKZ#k6ptM`9Pn$G@e2Bf;oa4iy1bq2MZf_p*&L>6=DiF
zbZI2dgzv?yr9y8Rw+Jq_nLI!DIlQMwGOsb6z4wgAfjKceaO)jLNR=|S+K*08x<!_U
z^mu=vCAYhsgLy_0CnWnIu>1`EOs=3`YXzr14Cdw&^RdXl5|`HpL*~dUe4G=)cdEk9
zh`@Flo-5@e>kMkWJpirmCopfMB2NUHFriwWd;1Tk@nwNeTKyRt(>$5-U5l>&IkLCE
zh^lvlJ&=D*0`KZV<p<U97~;YPbs?uLh-2CEk*wVC95V{XFwEl~GFo-GW4}3hdmxiv
z81jdLETxxUgZYy{=A04qtlUU0T^r8d_ZH%P)N8D}BTpCM4$G{Khugk|*l+m`)2;1s
zBh4Ldd!qQLt{*r3cEXg=f8aJ!mL2`hpkc*G49E(k{hXh8SzgLdW5;3nug%Cf-HXdr
zD<JzXg+1HiIkRm7P0Q!P>Si*J-cMvhMKk_*s?#CJn-90fu{h(GXrKNJuI^|<%HUDF
z*;0*1ovzT`Tmpap@x1nK8_sSGp`EKOkBJ<4Eczw-G_@kNbsyvmm!QZXm(ObZ@X+lh
zh|f@f`lWuXh`cM>G_Ws!+z6y;;6?-`1kk$aHUb46+H(3!QPeqa8p&j$*IRG8KhdDg
zG=E--$w0ShF=JO95Ls4-Vfe35s_6G;gVAc7aqPl3Jq!3eHNd@#uQ0(Tg<h>D!keu{
zHJhjK_AlYKkMcaNIE3msiu9`0p@Q&DdYdHk&z%rPnrd?0h>@Hku%1%4qS<oNlHFNJ
zJofS<gmq1f6Iksb4#RmkJDCNp;QhBF*|j>89iPv^?M(?|7y8m(Ife=Yqxm=^03&3)
z*ecA<!5^n%$bKQ0QJar#C*C6Rn-2Hg3F5tm$JmsqhScOBDrg4t`RLCW^*Nt2b<=p`
z&RewHQ0Ao`9ii*^jKB{kG3d?_91`}dW(rwK@5DAd7h8{sNqIcxr^WR9@w8o9%pF^Q
zV<5EX=vq#zgc8IW-9qH5L0n2nR$Vlw&9y)VK01zviUawuGncnJmtpsaiP$kO2A_k+
zvvG?mCRPjFM^y~hcGV(WDw_{Gn$ReCvF}1xK4n24o~;VwjD>~VHCpg~2IE+G$AE{Q
z%Q7@+5`G@GrutZ6k8URLPUr_dRf^$Jsu8EXe1HpcTZO)J0t<Uy<K=~FxHS#n4Z#;h
z^?!u(-^VcZxG7mS582j5R5OZU_1hAh_Vc9N8z;)Xnu=AvIgmLPjFR4dT<)aHB}1J#
zAodVGTui2PmpV;4uZg~Fd5z8WzMLFj#5;pJk@{*1mKRqc?QIr6->$_Hr+CUqUcmNx
zCALrfCrZn_fSpx^d=WAbXODX@{<9_ajT5~8rNOY<79q@Jf9y)Uf)RZNG4pOJTB^+X
zWm_oY7iI`~b^$~EBl*<vA+~K5=Yb75-2T^ta-q-gSU;URE}LO}NIq}P7(?y21bk`=
zMpI2X19Hl^yrn-+DIdWrVW+M7kuy&Z|B2#FnVc{rj4%I~vErPtv-d(0x^uVU!AVVy
zK4;Fs@?UXg=x8=gw&jjP8mJGqqObdJym^-eg9v4|-n<LPV{34r#}FGH=WzdzMHnRP
zER0D_<+Cq6*l}(KbbV$zuB_E(X3GvNJ)$J^O+y$f^$@?)qj~(rFTDD79#R*}@mk+T
z*bl3M$s7X)SV^${o4^t6-HiGGVK?q&56TbKqL-~B6P=pzeI{5jeH>Glg)l7kEA(pv
zXmxr8x>J9P&U_T-=E2ceWf_B2iNY?dLl$EVPeaC0g3&_WJ<VN%d%Uc<yP*T0R8#46
zh^X%^unpUmK(8yGW><44UN(idkC*Uk8JVaSBKU+#w8WaRu13gSddBm#t1AaAGUJ`I
zLlD@U$Spf0Xq|T$&H4Vc_fX-w4+rsfnF*I~%|##W3CtHS<jU2aJpILwMw%}~=_#ua
z;wB-WjT>R=FGAh$XzuJco~uU-^D4Rty@V{?DD4llH$H`~wLb?=5ZHq?>F|gx=7-Z#
zj1M#B#h21BD^%q;@k6jznvLE4PQWHyo>OC<V!Zf7U|<64e62X8+5pD~8E}J(3FA$a
zcwn|U#YRf<ze0C`<1P^rss_A}N@n2983_IqL~oN~9)HjqW7q6M@zo+GpO@z0HG}!k
zU6W&VM)Bp_lL$!DVw_ey-FwS&k*zdZG>7qoz(${O{sd9)F-$w8Pc_qd@H)2{mqz}@
z-_+Y^obnQ*#zf;rYXT#MUb>&LF_+01@l3bC*L_K1#ByW0e%7P@jHUQqZO6beW9)or
zLoXF`^!{@gQ)*p!P-8icpU&hPVaMEHekoI4n^P(5C>kTesN|@DM^EO%BBBb8UTQQD
z-tUf$dN}Z-f`fhK8MU<@8x55CYW!aOS76Td{YG*8%ezo>??=V)!CW%A5=V}|#*#gs
zFt7hS?0!9fYdckV(a;G&wgPjZK9wE9?{h$G38wEX=ccD2C~w(>);57zou5I=xCvbO
zp@6?-S7ZD}Gxm<3i<^2%Trw+zHqU)|^J_n-pb5KoTH_5odH%*P>~G70!|5A%wAqGJ
zE)3ubqjElhzzqpIqXB7m(X&=yiS{eAqac?<1RiVqh;>M79fCtAB(Wn&$Q>fIxiqSj
z`TzCeOZlOkEodJ7>`b^$^MdGrh&9bV%uU*b3Ke0mZHO3xe+e__?O+DPD{^%AH!Q7o
zVus;EluM*>iu)+c&tDDguZ}RQF~A}5R960di<;$es0=cqw0HvkB&)LZgEMcv|A6LC
z0t0Vlz+)Hvp><4;zx#*t$>QfI4p8Iery3N0pvpO41<%{|TlB;yfo&U|xyfA6oPz%9
zyL>cmYIorBK`EZDF=FhtTI^02dV%h1DBK;zseuD=PDX?MeIwZwH6Ai?ax~X>7y4Bf
zD*V-C#R*~ex*`>DETD6>5*>d<{7-970#D_(y`jt^Lo#H}JkLXCZ<*&gvySN)k7GKf
zgeZ|RMj=BK8l<R{gvu03qDbb<Wz0~b{6BSHm)^biy?g)f_<g^_@vU#Iwbx#IueH~3
zU^Jc=67Ug14kA^kgB*qgTsZ=FYjcC)NM=ZE7mC;?hoHxh156Ix3!3&&!xN<rutC@n
zGVCz}=U>=EohOTcB|{46JMhAMJd(p7X#}+?;(+8?C%`!z3Pi6M!DlCx0kZ=Q)Z}^%
zgv!<7-Bv!x+nEJ2rMTeN@gP|4&k4~(H^3B*7V4$2f;javz-WCIGz_OBYZe}AF>%96
zBtKVEa0;vuFN3%pf=E63Ah?3m%5_pwg1+lc@H|pGf)%0x4A`fjVxI@t6^Gb@Nr>%-
zHH4Q7S3%WBHJF&m2kTE8KqZEQppHu(ihF)TMaG>0BjJ_6i*X&KK3hStuu;RB=bW$<
zsb@TTNgQ55a_nD6EF!h!SCE_!0gyrG01hz}1Clm1&_MJI1Yb3TSQAmGA%Vn)7$e~-
zd=A3&>p?~uJ7f+=Vo3TXupF_m4bXc5Z@neZ*NFgQZh~<AmIsXg<__)c`oT(kHfV0D
z0B|u5tR#;E>KlR}K-?If9Ap8BNIl7wrNfY-$^vK|<A)50-8bYD0e1-KLx}4}>I_Yx
z>jVirg5*;_d`gR)n{&eXqi9Gri-H|Uo{doi8;pvifNEX<=6(=>Bk`2rV*)e0v*Qui
z7%KuJ$oXFCBVOp0%Lb_yt^!3xHOOBQ4h_%mg%T?2P|cDbe5gGH+oun}$4G8(b<{za
zI6DbAC&*zxGFQTeo#5pY5r9&~4U!|d&M2gw!)~VrP?g&cZz8p_;+I`vqB#{3<K7RI
zm8ju)dI}hYoXNYM+YOtA)PcH86mYP<4<_=N;H<JTG^kz#{E4Li@eZIJTN4V$?gHuQ
z2w|fh8lJxB4kxFXVL>c%PQsK8Huj?-U%olCd$j;wAUW7-3nZ{Z#2sGpQG;YDNKEb#
zDX{MehCHgs|1|Yr#pr%mh(>y9+z)}ix?h3mq9Cwg4TDQk<G`K12(a(Ag-tm|kV0TD
zG=5DAQ|4xX^8(^8McTndesieDcNm^+D*`O)oUmk21o&u}K#}Kzs4THD6n{uH%ErnH
z?$2k1ksY+~YZY>ieV_#}a3O0Y*(rcBo<MSZ&H!#r8u*^o3r>&7f}}W2sEXu|X-igv
z1IIa`?*lH_lZ}N(CpJKMTRTw5Y5?A`yCEBsAJDWi0vpdu!KxV&XJd#3WDV)yUX=s9
z%MlFqzxIM_k-|VTs|E}q^|U+sCE<vDFL<9N0@}-afQ7+Z5WL3}swrFttVp~ewSy79
ze*GD=2#SGEqmeK|d>$kYTSBt58(@cSH!3{F8N~Eo0nEOg;6>yT&=K~5&a#xC{eUhU
zWDI~wPUP_MgD}9LLJj9Tkot{1nlJ|24N__zgUh*B!EnkVYLrD9?lQ9lzRDL7f1e!Y
zsCh!>EFU;M;SJ<u@i3IiAMWQtYMJ_&;E4b?=yhEW?!aS#(SsWxJd_Tq>qP^G$ab*d
zgoR+~AQVOR2o9cIFy&D`NZV@<t9+3BoIFlw%6%I!^8(1%a0ob=yF&#IKj=Fu2R+Yv
z!d))r&^+WJXjota`3CH;|E?)4?Iwg;tW2;V#0mP=s6t|XPxw@49}Gp}>W0dLfC2Lv
zoX*GuOP3s>&hSxaB&h^-FuPz|Dw4Bsq!A@~atu7`_XNHI2SNMBCqQz30dO($K+U26
zFl^!orvL+poGb&e=;wg`JT;u_RfUq;!yx4yCtPtK2dLC1pds)Zc(%d`2PNO5b_hlR
z>XYs;2I+-Tc@T-U7gEDIZ8n%p%L~OS@PHnvCs{Zm1UUJPfB^9d&_Hs}v~*k{PK^`R
zaxZ{)f#lH8*#y=|wgDH-d4MyR0B8HP0kQBmU|hq3B05?CyzU+cCz@U1D_%w*+j9hF
zNGU+R0W6@~aED|Gb70S94ZzrooRb*n!I9<_6gG(vCM=_1!b~&h_F{&r5)VLMKp3!F
z^nfJC%)tg5QqM3h22ljrptepEJ}Wi{Z=MH1;%Xhpqxczg8LEOqezZuvw+vJaC;*q<
ztbyK{x2Vq>Rj9^EBqvA27ph*NfP$ZpIH2DhuzscqL=+H0{fs<dTVsp(3|5fwn;lTV
zi$dx663~T-2gk2}24az|Nd4nQaA1xNI@w#ngZ)e3{173$Im!ehTJ8e}NfUV64mtm6
z-vRvv=wXht7u<;Sg0^_XKX_CLF07IOW@B?0cfuV$tgHp;_KGl;kONv!Yk&mDgRlt6
zf0&}9ga>g<u&JCErX6*HH2VV~I-MOFH|s*Lh+d@T*&HUJ8bR0>74W=O2TJxIfSmjJ
zAwh!*WJ^KT&~9;9EeN1{V=kDzLkxi}2lT|ugD0iRAe@E~=H0P?!5hV3-8T}FBK}y4
zuqsTAYXsCoTHvjI6W~GY7<rCc;MLP0$d!H?khujRHOF<}ArhN~8@IroOCfOZvM!t^
zSOa^k4Pc^_4vaW_9UQlv0<8E5xN_GFTD&R(s;bQ(2%5mQ3?tZ9Ko2W!#(^asSI~+_
z`XAMy;Ced&lp{X?8263>Pu2*Sp(_eL5ZeL=Lo39O3`P86CcxXu58hKCy+?8*;fek{
zFdTvOD_TNo^Tr#%jn}V0>q#7V)<F%!Z-hYeSQWU2<QD7lk-*u58-S7fGnlk6f~BnA
zfSa>AsJ3kZuuT~b*agB6&pFVyAqiAOF+jrF1nfe5be(UG@bPUVFRxe;nb&sEe8~o>
z_q2qxtj4gXKm<}r>BGADV<2}X8JHlk&%uja@b!uWaI^>o77~hpc*F&&J+pz?3q{}_
z`#z|S=&8b#H$3RQfzmXx1rh_QK!(>6Lh1^{C$WOZZ3H2oMIj(2cn%at?*pZHHF$iZ
z1&A6-Ktn>L--(7U$ha8+B3%7J-FrPaTEh<~K2ShQw$lI=HjQ!{Yy|dR9)PfY5>@|*
z0vt=01SPxqLE;B7cvVIMQX_pdzQwMg8j7u<8DcxKS{NWU_ZTP*)`O(GR!~E8Qt-l%
z2OJ`Y@Td|M_#oQ{tiV^GcMn;otDXT8QV!^(`wDEZeg-G|DG>i96L1)X0HZ5dXnxKg
z9uy#j#rnPwU+4n&RGtA}zcc_d$s@qzlsJ6mN(<R7J_BAGH#mT<2fofZ;Ddq>u>O((
zj^03WCJL{BP>OEw7Rd|VebWx)Xv6~6bvKxIx&`<wUj`4tkRA!y3jiX0Q<@4<FdL%5
z;LbppHctR50_EX}s~w=Wwi3u|1VaMi1=L0g6C7}}gTz+?5I<EL+z(}gNma*?d>IlT
zJ;MR_B*y{#<UXk4p@wSb(TD1p6JU`L85ezWXh}i<3pzdk5i%aI{G=9~)kOL@e1%9o
zHWr#VodAzQ9|AVa7a$O<2YNiH;b=uMcsijDqJ=3TJvtCNGa>aJB8Z-slSBFHYOuT8
z2VQZ{212LkfY5DT_?*!MCZ0(HqUuU8&#(iiBJ+Oo>kJ?`G6R~YWdQDhFl-|10cMmK
zIK9RR^`A-twfi1GM*9+Io7RDuX~`fFRDw8qqy``F3r<Vl1!Rx$ut6gqPz^l?m}bPE
zdn^biWZhw65k0uor3>Zim%*@A3UC&<12W&ygX&%fc)8;vcx69};u7?NdNWo~;yi@o
z3E|M!As9*_eN2{-J#q1hCp3)ag2ZNIa7K|H$q{=1PF~W0_OlESRV)B)HBJNcLOt;L
z<N@n%?}hwBO7MIZ2Pg@y0?qN%kduu7p5=Fh-{!l3^#>e$u{sSn)RdtXTNDVf+y@B@
zyHL85NN%-AGT56b1k1RYVD2GCNR)xp(7ngPD`v-_hY)}ZNMD?N*`$!A`UOzR;DM{u
zXsBhq0?rOvfqEJxn3g&Ub~hq@F?tR|OHyLk;gSPxn8pCn<|B}f)DDs({j;ta)4;o$
z6!4I~DV)CwfPp_1R4JE&lw3$m-;M<yi?|M`kCB6UZKOsmHv-yewS#(p4sa0Z2eG#5
z3~J3|0X;?p2EW|^hrbfT56XcsdSntT)|$g>X14%=p*!r?5{0%20WhLY5h~_A2jW>8
z&`=ZyIf;2-f21ZXK8OQ*$W36^w-2a~%K`8ri5k3bvj-CNY@lWW10a~Og);X-;bSCT
zW&(v^{A?>&8@~yPa#3)cRvZ@4(m+p1q}J9*0ve|>fF@%;(242*&9b(z)yfdo=dpru
zst!=ZvH^-`1!3rdA3P^W1HWA`MDnzd{z(h9AQ5#67$7}3sy$Y~YLOb$9(035N10$r
zff<xXa&7r9n#0n`UGN2i6TCne26gtiB6S*J&=#rTYF4ubPdJd=9&J;kE?Ne@IL!fZ
zjx^w$J#yZBauB2-wx8+i2sm+B2D*{>z`?}>aI6uD2NsebF$Lt_NCNmsfCd_3-U2@F
zN?=rN4rfv7Q1#(G@We$3#I$Qc>2DjT{Y(^)^dgdjc*7ft<om#ziAG><Ckx2`k_M8Z
zMPUd%Vhg?<19wln1UG!SVdvXYP&cFjU-BX~FrRlqGZ`PKQ=bbq5Pw)4>DN;=`3iia
zlYtV-ae&f<3D&&hhq%dLIQ-x|2(r!r!v`;bwSEdPp3MTY7Dxd)s|z?hDFH-L3=ne?
ziP=_rgN?m!!ITdXY#>TOYHWnyY9P`ZFB%P;-h04jNRIF1$6jEH*ljGeEJ#m60;ta(
z1zwbR!>9K&V8U@V@Wd4Z4z35mi$%!(ZZiXN_#7afM+}~^QGlODy<uehPQ*S5g~ocM
zWQP*9Z3#?}28zgK{1^Qc)qd%xD5<Fmf9<EZy}ROn{!8vGPFv?#rRF9biyrzkn~fYd
zufL*0`4>8*G*#vQDIN1^ds{O;^{Q^-*EF7c@=x$f|3C0gUP`E<Kbs38^cHJ`&yUx<
z7mKRlezbw&Nt8g{MtY|3Fuy=GhFgNYYG&2phAaoFrKxMX8tQ6J+zqG^xtfn^47&y}
zU&lf4!c5J4(@msWbOBXV)d$uLWox{gkp1NJo@&>p(tw?Z2Zpkd*UmFNsL?KAta%pT
zTGQgX0WN3N*7(xL)f|Xvs~N;n)H0z`YK)QIKibFYk$jdDK<l93ftPLhHLoe3qPz$w
zYTM4gu36W=S3{JR0<Jlq0Gs2M8DHV^2$9+5^;dHHb^gfw59SYKK3>DRYE&&5w-YX4
zq~Og<o?suFG7#iF50(d7YtFbhgATE=n)gG`YQ*kj)MQyVf^}dGmY8qWghf6^ZPK5e
zY^$V#(7t*79r|Ve2lV5RUV;QqK@}ar5^8rMHz0V)4t6r#0^#Bn)w>2@wSlJzXlFGC
zlMROf_d7?>BP0x}AF9?QyFNTno90=4CJk4!@k|>8lJEhl3%)gE*C2`}C#X7<3txR!
zII-F+#tS8r&s5V_R)yLVL<sB$NI~DZQIsw^yvCs1yk?Vsl9Sp;k0HEoUVn#wx&HzG
zpV}S4n=^w4Ht8sMzEc06^=~(U;&%NP^Yy}Ns>=Se72p_=Ga8Ne@ppDdqs6}WiWT$5
z`Z_v$Vem-bSjSKb1ASu(KLWw1Z~-e)WPE}#9&YaV06#+K??7VcK)eSwfCBB0aSHUn
zx}fnGT!4>1I!KDbk4WGLNC0xDqZ`JL_y-7uCf3IhZ}#Iu3cnpzgveth{B~LqBk#_>
zfqo<gDhzlNKT<sc0X;%JZ9Q$g0P+d)3n4zv+1Uq+#W>?Vd~g92M*c1se~gQ|hclkS
zk4(@MZ|X;GwK;%ze@73T2hPop;*`}M<ZrY`0NOPWi$yyjj0HRTyP(~De7yW94Xvn=
z|Kfc7z0ux|A!w{e0N#(v9iiKu*!>4xl77_gBzi;$IlDH=A;Kdh;ZM2$Kt{6#+ynOm
zoOT<yl;7^(!0G-SoZgCNlU5(RBNiDpe{T<*BR0SfVf6dwn*{xk13JJ1i5@Z<5YT>S
z&e`1&hr=Lu?$IOsK4$(HH$SE=Qv4wp9Bxybfmlp{n6nQKj|>RLpTZ2|7l^^(JshzV
zetS1%>P~UWozl>K=U;(i5l}~e3jH5%e$3zJ0trI;kGbH-vW;bFKURcg31r0BHd&_C
zBSyxIcvE)2joLmdq+v0ElZRt~ANw{)89xpLk`{sF{0{kjtp5eY^@GwtCqHg?>c67a
z5t$r8j++yN!W5%_h{BKOcRmqlUiaO91^t6pG4#K5!rKw&hQ**IesaJ6`_s0wEbGUI
z5YCAZ&i`G?2oIZ#|IV_2`*u#{`~-iS7DB&Ii+^z{{F74=gkDkP5)<289lCEf9xXz2
zWb@iwr5KQ|&zqBd`%8=f{JuIkBfX!+|HTSg{6{P3ZR#!N8>*?Q^v{-#@5W8~p<di2
z3EJ>EORI3AQw8mpr!?M2JhrInn6+g{VDn`$cQ$jb=#BWAxDcbKY7t75<kpt$OcR^k
zS;Qqi7^x{<?yh(`uau1N%GCnKPJLN|(M)V9v|2sV{yb2%!v0BV|6E-udF76e>Jic}
zwPUz$xC`no#Fyoy^}R}`p0;gs)LOhpbB#hld-dcR^t9JKxZ23eqQG@%C*xPjhu9_B
zSGcry?rM4likz6f%YC$hF^%^Lua@oWY)?ar)Grb<MpM%<O6wEeF<&L*oXNdP>+w2A
z|LnM_yLr9&m($hv^e0nUOm_C3tv1&*d?iuC;p8AV;?W>Vc(gcAkUc0Gv!l05G}USD
zu!cUarn0+O%=y|>W(LU=Z(rdPqF_<XGsnurzUcFcr&X^ja9OMFSx7$CLTvFUIeNH%
zl59_NTFi|Ykx`*8Hurs5R6}&-mCX+ldv-qyN>me9Zza3sJEJsk_@V@B0aM9KH{O9Z
z9lJJdw&7(KB=xbkt$gokJCc-d(*E)@=gzeWy)rkq6or06A-1LUPsYmM4%hGL)V12r
zZ>=~S%#^)!<GM8sV}cR&>^hhC$VL1Yd!>f6^S+wcb1`BqaCgaB!T1sXNWzV=`Na0T
zr6@rXbL~Zc&Cajn29M3M_tsXFuqQS)qmzz*oWfy;US<d%;5|;P?HY$`N-S64%Qa(L
z)>_2uc+j}OW_EN&_3rzV`=116AF=%4Nbe+k{BD?*zgMf%+!10<#bZzB8oOR&nbgV_
zTNOkz`~q7{&@L56nGR6MiAox29lodg=6=91SDS_*R^`#FHl;FUrt2S03-)}@{CK(Q
z%Nx3;<2?HZNKDKhmb>!>9=|j)b}*=`cI_I@we?MULgd+5Db2^n@(dQVJU$yKL@&dh
z-6fPHZ=wY6Dc||fMw;~CzEJo6U9YR_f=bhd<y3Pomnpn(4z$S?Xa8`#Bv`?Q;Z2AK
z%grxemp5?E{=1WxbI-PZp{EV-mY91tbqDc5H<fUoTf~bGM98X5{#Qz<Kp_4%l~BwD
z*+1RT-*<B1zoU)`hwQOzmW%`F4gGJ^^*Y%b3r4BdVYgZ3&7+NDUwL@)7oXRhRlOP4
z=UC1j#-E$hqa480RN6AwS{TEBLg41S%1|ZQDbmY(%#*(~stGds#PKK7dS15}i^;9J
zZJ%eY>}gpMT(g!lr##UYHu1Si5#%<z<VFN#qO5P{)gH1f&$})<_~m_peVG4n<^^tN
zzsu6xt^5nE^)Iefxt7MfETX?*Q}*Fzx`DX+-NJoU1^Cyrm4`2W)Ane)pmdE}ZR&Fc
z{!$T>LIgJM0Gd0>KIzH;<-y_3bP3-XO@SMS<Et!N$H&skneSW(>JNVRCSjsbgt_3+
z+|hIWvkkhVjnu_sHeH34<(cajZLgM($0pusuXyS+Q*IyVysFre-Fy8RVa<AGL|DnJ
zkh$)9LCmg?PTg;4)v{A~&G^QBE<JxB$clT1!zq<bWk`yBDP@ki5E9=}66+oL`r$;@
zg%tim){E^}C8>|>dz~`!3`6C5YnE(kEQ2pIz1LRTyWV52jlvEr5(NjoxFLU&|C3Ya
zau=(V=6%avS+(gAF~9w}QfJ;LP;|u>S_$4#wpC0oF&_|S#=VR9cDMih)83$9CGEtO
zvh*6_9!2uoS(iPPXr|AZ4f@iPs<92-3l0q{OVFaLte`*fB_F;>*P4*5fBfn~SLbq2
z5j#nU!a1&83*yNtU&}y$<DKy0`6G4YMBPbxh2>nDpVjlyrgg13{5$#t(0$6KwRMR_
zkuPzt<t1cpY4mzu>U9Jib48~WpU`{~y?NyMs-oJNLb7rVtyKmt8<U9v{DU)n%Q^QL
zd6E`x<E6XxOG=dQb2m}WqaODJWxrTrR1vl8=RA{@n8fNd6^P$A;jB(uS-x-Qz637A
z`JI=qCES{_kk6i}pt|4~*njqd;~kU!H|HX(g4d74$=`V}qv>d7et~S=v(pI22$XHp
zyUr;zizhehpVltAZRqwv1C?X6@Nt$}r}T6ioo)4`?!;jSnqo7$?De8pH!n@YTeB;%
zgE_V@t=oLdn!pt0%ngT{TmkFMDF&ODlob(PN5#(tOU|e2bS?PfyY<GW2TpLceH0m&
ztyhd7sa+1*5uzs2^w6`bk8{_h)X*uGJ>q4WXOn_Fr`hApUZ%X6RX4Kzz{HnyC1+Ht
z(}r#FhG<pMvKi`Y2hVcnx|K~Hm35d7YxZ29x_A3g{7$CNj{#|R8dGpuQk(aTsTgxX
zeQ{Nb3(L}}yeX_Nr=>CM4DUX21Vw=FzkQVK6u;9nuT?Eyi>X~Xr<svhces(o^f}AH
zdHa#Wy^EF$-sivcQJIukh**lLkx1#9E2BTx4Lp;RA+~;cke$N#1mWHjTzg4~SB5(y
z6plsGJmK0KWuic(<v_JyvW_BYa%7WwySO9U@|_g+QAKo#)0`XY`C+4Zx1EX&pnb?}
zT#cIj(Yfe{P8=rV&eUx8Z(h9kW<+sE?MWykAhhz_&bio5OWMct17XAaG`|4eDHaS%
z+Je9$_wz+lq5%_$D{s_xlq6%c$|+7Vgk7*%mwo;<!GEr(ves)hQY6XUs%+svs(ktM
z&J!n{jK*{X<rgoPX|(jU&~NmnS?52aOe^cm%oaOCJ8L#Qc3^5LUFH*y=b~}IT{iZp
z=&x@QPM%g=VS7RxpRk9v=A3h+N{DgUO$Ke*?#{mGlQ$>cu#(XT2Jt)}n0^vWn<%hy
zl&L1hm%*q+Md$&Vvv<^t@8XnN%bku0?i^9E<mbsAv$n6sCy<lY(OtqTOw>&uu|hY8
z;+!^$r=%Mg&UgB9zu?=QD0t53c7xbD_mfNU&x3|Vo;b%pXVM|>j=#M2A<bGKbxB@e
z(fsY4t5UC13cfaN9A|bR@cF3&YwZ3_jBA8rhl?;@k+Lj<>k*oj_hddTAGp4U;f(Og
z3$cf>`TPuFNpedOS!<j(){|G}O|sT9T>Id$KGUfFgR_^NEJ?_MwE_-Dq10y?1`bkx
zB?==S_IPOLvC_-(?f#>PsClZ2bk|6|@M6-lt4HVzAA2Yq54bHa&6JjLylMDB^zHcY
zO4i^#ZBgX(669tQ)Dy!*b~`9_CC*-P^|Od?dUHxdId<-YVoBJoGUxJ=$MQpvLZ2;;
zC1k!Jnw$;Cf}OHfJ~d`Gu57*-PJK8AEtFw8EqiU18WSO}t+XEN8nLt1QsPF2r23N;
z>fNp`aqcx~5<bG~O^-cU9+p1ddDQx9)J6RZo^od;b}%u9rQ6izX9fqtb50utxQ(J`
zk@O@7$-P)L=PB0JAD?kDqB>hKA$U8P;MFss3Z;i<&EtiyeI(!9MEUpgliorch!Mp9
z`EyHFmO%7RH_`7djyMjtY0_@GIO0EDobLv$;D5h^=A=BvJ&~YQpF7$RPoHun{qjBS
z8K!o2n<J|3m{fyXzFk*m?z|uDmQ*)Z@os4DYU`X1vGbf_d)N_XG@btHHow_&R;m-x
zu{S!xwjW*gaUXc(xv%&_O=;wrhw1T*^2U!E_o7r<9Q(ZzSgmJ4;SFD}XJV5tPI|1q
zHjznv)~I)riuCqtnViz&JO%O<ecbUO6s|8D^$pz{cyCzMk7TK>oIfs7{Uz>UNPa`F
zY|?t3ps%`3a{`}!Ql-$fe7T^IPtQ_HPY+J()`3@A7HpA@?D_-@!PVjpI@4zihGrLh
zMGN9R;(Cm=XTAHRV%r_cyUX=NS?{i!w$j-2DSU2|FsT20LLsM)JXM2NS*;_bY&5Jj
z>gkI(tP#~$PEV)$-Y=&x0xJzu57wIHHzmgyQV}PF`0LLR6X?$+2MGlKndG)wLHqwZ
z85$(%>D8$)#1t<a8u8+arc56aigw-8A9v;A&}lE>u4A{28)Y)(oI7>+G6e$1#Gv85
zZyfvv7ptl4G8X#-=2qpN%BfC%$?vW4om$KZ)646dwVycofa&}vOS&HQR&BYbxwvzs
z$ChtIWJetFaBvfO&tYT7TdAZVS$HtW^NI+y^(BR;JVoT!)n%o$R@{=y`8vC-6W*q0
z3Qjb4QDsP_7Z*4gRH_WzIawD{Ki|IAa_B((LLB966ZgDL#;GK#>k7+tU6P3*H2ltt
zjRj+DBWHwz(n7wqv);;jtUBZwdh%}6$NNa0*O@Wyo>yKi57RXDjg^{hN$k0=zxG?U
z7d4a9F^kM`6&iE$6+53U<E`?|<@jP-x*Gi~4Ug-JKl{!!O2!3!H|sLAchQ4m1dRTH
z9ZOypG_8c%35drX*o~V-9-+CqFB0PjV>Q-Zk`)t+YbqS-upTR}usKoKQbeAhT3RPI
z&FE`>C(_MfzdXeR$L2I(O&X~8L#6@4Urqz5zp#S;O@V)Nyf@v^zh(ah8l$}GBxw5=
zg?Aar-H$e%bkJ@x^e)D7k@bKE?w01s<L!}yXGc$pWX{f32hRb%>#E1Z)vg>}n;Ca0
zV+|rOEp53jthsbh_f1_{Wxk?#*;GDr0tK6qhCJVmxdN&~`>1?XDB7e`my-gQT9Wgn
zoi7E*^=5DemRlrxKIlu!8++Fmv&x|r^4UH`R!rlnNyDgQRXB_3in~B=$ovOoN-EUY
zU`DRV@gPujkMIV2$m-<!9c@8f2K9F~hiOlUa_nA~L<PI8$u?{VD}}7S>Bzs;`#S7t
zSAj*0K$DMe8|7q5H}4ha4<!|!KV2|Ee|gaVT>wMr(K8<r$NwTCgg@8A0|dPPy&nD)
zz*Z&X{EteAGHf+O#vgB!yGoRZRB(CmEUUaGzM_4{yofVhd(>0|_fD=Tai8+8LyDZZ
zRjP<|-V=!yS^YVguOot^g$W)lVJb5|5BK&(QlY<&t%vUUa(1nll=|Vt)Tp575AhB$
z8Qw#hnJC$^^U`HgYGSvaB<8<xDb=Uhh3gHPesAd8+32~~m6B(efn1DP_Avc)-4}|t
z(lrL(Vr7d%U%c*jI8^9YV0tom>Gmz5LkeT5TDpz+xXksZWrz55Px6R-W^p6EAfTwu
zlO51X!kiF<Z?&O0M^`K)rhfNx7y*Vf+-X^m&}2kquCG91<fWlRu)Xi@SUa1?0UzE=
z`;19e@g6&7nkR4m`dzDzXN<ySHI*B|r^J?bR`T3Vz39I%!mAZ<Ra|e0a#LkEnAQDx
z5ov9z&7Vt5jzHnRm)P$oUEeo7ng1xOKZ-_qCrKI+OH##r`bLoy+_76SZ-+C#g9_%_
zwWsF;+|9cc70Kfwjp(d|0(F;i2R~t(GUy(5wl>YPbU%@4Qzr0|H-Bi#XVCS8p3TVK
zKK_%usRb3?D7~wd)2JLn>T*nv*reFtmFuXckgMNp8I;zs=5Ep0Gl|#k(?lP|zBJRA
zO)*=v{5W)}VktT9tKIbg^WciErS7;4FSI-}Uh$~oE)#ua=WDA2#viFjQyBtJ#7=C?
z_1Ujkwa#*e@9tK4s(GrlU9fY|hW-rddfI7Sjp+!&&B0E|rU-tCsE6`j4z>!x!GAc|
zTU7-0kB0rHC=AZL(5sVRXzE<8ii{$qL4`~{l0AO7Q3dm)h{wXwVp>DD%T-RyAWJ6a
z+1Y#R)rV0%=cK}S93EIZ3g@mf<q#3LCY^SmYZPCc7mJ`^T8!;g$nNVi+V6Xbh1f%*
z?tx-pq03mwi7$_Y0`;C)Fzp)CC~Pv+4iPjTWVQ<&EE5e&8T2p^ED&zU$t)(j-e=+E
zeUje4r|;u~e4o~8vTnX->V`uskrp<iMB{0l>hlb1$@2!}ca3$%lQ=KwSz!bD?$0GS
z=*TiNC|HlkcVIXtB$zxJ-tRP&>31;L`6>QJU-!pH8k`=^1In)t+qJtqkE<>b&~t8)
zy;N8+mcRC%u3CkFkSFPF*TcCI!l$aJ9V`!HO?i@D%4V_bp#IVv^m6D~OAx1m&q=ov
z)GS#SH}a@TpjIpW$XB9GF?+?SDk&f$zWkSBRwvl=-;4RjDu|q!{uUGY*ZJ>v4Ws)5
zQ3o1V;p!x)`W6{a@U!~HRH%^>PrbQ1lO{?=)>DM#lEO?zU5rHQ1jUPGY7gWjzM|GV
zLX(%J^WaNs;!ESseFU_YQ=_U;*;*%fvahpFdF(nwdFW=L(XuH%-2A;!TD}44zU;W_
zK=S4<8v(wix5ot?tMBslx+e*-9Is|8m7mhgzEREhtyd~1!a|pYl~^d1OQ{9NkyWVZ
zwd?$y&$0_y93A35r#N(?%EhX>bI-i*Sa|%vQInyN!nG`o?4iW|-BH2K+@Vq@UO1h;
zW)KlN!czAYO?{wx&fD@0Dwbyy<8*<-<lUR$KBjh)2icCwF1?4DHiG0mTdT(Bzb|WC
zuKV|{A(~eHmm)AEApf6;V5{Ek{2`3LvH$qBrlW^xbW|tZ3w!!At!FIx?-LQ|qtBf_
z#ZmM8Yt#H|bX%TVKCFy9(*B52h}oV0ekqwnTn`B@t;dW$Op*Qq7?l#9-up#=yk%)<
zsKUFcbasclcgab|3JE&Hl%Z6U5!uI}<mN@xNbhK*RY2A!yMAhmLoED4m=L7a=(=f)
znONv$7ya0b<|H;-Oo5BSljxg%LAOq&s>wW2yoXJG>8^FJWZ6N`@7g<6^V7AY#zF1;
z7I}2Evb&brL@A`%XLba1?_~0OTF7|dUMX0#V85+LL?*Bts&+`UCg-w6oSM>(S4YlI
znxd1HrG&|an4qD*%=s_n>b0A?;^th^v339X<L`_VLt?-`zccce=)q5u%{37BJpkv3
z{jL=U{y-~!E^q(w7Z;2xr#}W8DBywf4a6&PntzWUicoMOFXSVvqccW{)5*sNi_%1*
zV;DhBQSjS6O2|DPu0L=34&jdp2*lz!QJg<wV4Dv{|9G+>@_!^~hS`MTL@@r_eZSx4
zkHH7}<9@q`Vsr3#MVde6BeQ(-`t!|OVQXOazZ$%)^Z6gfZ^KOtd^%NV@AeiM+~Y_)
zN8EI1Xh=Ij^l4$dfN8RTx$W^Zl0Zey?lS(6X4jeJ(u2bU)vFY>QPTXMqp)=RUlnfI
zc*rO(4m=DEyQ|7!b1v{>#YFJh;|7n7K;MD6oSIO^r7H2Z!@8~cMOd8+LHcoyaREay
zb?O$AyJn9Ydql(;wA7I)g(MfTag3Zz7IuHvwRr8)@lgtIW+JTNfV)ho&bgtHDo$R@
ztKJgYr7v+YGp@{T7jGFpB0D9+DitT`M)ZV@Dca+J$ca$Hog}-QX&)@d2>LE45~Rw$
zw>Xd!7pog6Ry!q=L}y8MkJN2WY(e7rg2Hpcri*Hg>8D4PZdh_&Vr`~=Z!XCCj@SJ0
z87`|c)D*kN&#rnJ+z!h5a%c@3*sdW)rC-lB%b9bPKlZ+Vq2l6L5$X_@yHe*<>3)$C
zExMOY_cX6Ydo408QWuXN+)vpo8h_0~BA0bqE_^9+n(l$xC#DSrwCli`QL%6zio>ai
zk6B~TlsSoJKF@ZXHr*ZTO;hhS_MEI&i`|V_{K6zJqxYw$X8LNvl$T6C@4m)vMCLQB
z|Fj>=J?Bw5K`Wf)^`-s}tBVMWpUUy*1qBDyfTA<WYi*>Y_#~@(p|Niw`g6BkY&hkt
zxvsk;yn3qCgSZ+xv_0ItF0oc-?WG&{ELw>3-hSh#;x*FsPhtBUGlR~Y_Rz8NzrE7$
zQj6WV*Zg+>s^9e$Y3sJGj|Zo|eUDm*k#{6>A-<aDZ=u6KR{lK%tXq{I7l3!f;RAkT
z7cl*qk{=@y{2rb37ALpdjO%V*n=10hC}_7vCxbBl0Z5qk{}+Fh`L3XQ$oN<g{s=a&
zKgUkD6}wX?9^<o_K=A*=kM+Rgv6#(8A_g_WycreWy#Aa9j;*I!V&A`(Hvr?01mr!A
zd=FZqJzO+Z|Hk>1gtVlrl&qqJq@=8jyp)urjI6Y*gruCDq@t{}w49=>gtWAboQ#z8
zCWWnbibn+y>NYQ9J;Kew`uWYlTXWzx(-OA7e(niBzDM|1;H`;k+kuUdl)CM}lD`6P
z%{to-{N~?)rG5q8ntHVz*vj-*%*YsQ=Xq=1({|uwvtNN_eg)o|G_)Pq*!)*u*<XRT
zX8QaBeCStTxnF^|CIal<CZ{*Q0&gns5Aff!1-6DT#kPU3SZ~4o%LewxxdXpGd+W)r
z!nS9J+5g>V|Gqi?ie>95r0g~<k6iv7%TN2|kIsyLMX~jS3vNSk#PuJd_;Y@?p5{nx
zLl))!H<0~2!TFV$t!EDhwmqN7^PfEb&pFxZc*||WbjRy&VA^)F@hdl5y*`y~IJmt3
zDULtqXsd&xz6~3h&)>lI7k=fh{B5<=4Yy%D?fbvM_$#-+?*dy5-R*0GjQ@Xudz)wQ
zD_>iUknL-OU%=me_MdaG)ehNA{rmkT8MF<{7W3u@rN58DR%?Rx_Xi-qrEPu2Z}!Wt
z&|B>Qx^2*O!CNT!X@C3*zjc@9*am-YEBqhZ4#s*UBqF~7u@j5_1mFAHZ~uDl-pu7h
kUJ4;U-VwO(pU@op@i*emBPPPmO<RUwFY>c_|BrkB4~N;s=l}o!

literal 0
HcmV?d00001

diff --git a/src/surrogate/zmqml/model/mlpacketdelay.py b/src/surrogate/zmqml/model/mlpacketdelay.py
new file mode 100644
index 00000000..4cab7c46
--- /dev/null
+++ b/src/surrogate/zmqml/model/mlpacketdelay.py
@@ -0,0 +1,258 @@
+import argparse
+import os
+import random
+import time
+import warnings
+from itertools import product
+from pathlib import Path
+
+import pandas as pd
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from sklearn.preprocessing import MinMaxScaler
+
+
+warnings.filterwarnings("ignore")
+
+class MLP(nn.Module):
+    def __init__(self, total_terminals, max_packet_size, h_dim, out_dim, norm_max_list,
+                 norm_min_list, channels=1):
+        super().__init__()
+        self.norm_max_list = norm_max_list
+        self.norm_min_list = norm_min_list
+        self.total_terminals = total_terminals
+        self.max_packet_size = max_packet_size
+        self.channels = channels
+        self.weights = nn.Parameter(torch.Tensor(channels, total_terminals, total_terminals))
+        self.reg = nn.Sequential(
+                nn.Linear(channels + 2, h_dim),
+                nn.ReLU(),
+                nn.Linear(h_dim, out_dim),
+            )
+
+        nn.init.uniform_(self.weights, 0, 1)
+
+    def forward(self, input_seq):
+        tt = self.total_terminals
+        input_src_terminal = F.one_hot(input_seq[:, 0], num_classes=tt)
+        input_src_terminal = input_src_terminal.reshape((-1, 1, 1, tt)).float()
+        input_dest_terminal = F.one_hot(input_seq[:, 1], num_classes=tt)
+        input_dest_terminal = input_dest_terminal.reshape((-1, 1, tt, 1)).float()
+
+        # assuming the input was a single row, it could be written as matrix
+        # multiplication as
+        # combined = input_src_terminal @ self.weights @ input_dest_terminal.T
+        combined = torch.matmul(torch.matmul(input_src_terminal, self.weights), input_dest_terminal)
+
+        input_size = input_seq[:, 2].reshape((-1, 1)).float() / self.max_packet_size
+        input_is_there_another = input_seq[:, 3].reshape((-1, 1)).float()
+
+        input_seq = torch.concat(
+            (combined.reshape((-1, self.channels)),
+             input_size,  # size
+             input_is_there_another,  # is_there_another_pckt_in_queue
+             ),
+            dim=1
+        ).float()
+
+        pred = self.reg(input_seq)
+
+        if not self.training:
+            pred = self.denormalize(pred)
+        return pred
+
+    def denormalize(self, pred_norm):
+        pred = torch.zeros(pred_norm.shape)
+        for i in range(pred_norm.shape[1]):
+            pred[:, i] = pred_norm[:, i]*(self.norm_max_list[i] - self.norm_min_list[i]) + self.norm_min_list[i]
+
+        return pred
+
+def split(data):
+    # removing packets with no info and shuffle data
+    noinfo_index = (data['next_packet_delay'] != -1)
+    noinfo2_index = (data['is_there_another_pckt_in_queue'] != 0)
+    data = data[np.bitwise_and(noinfo_index, noinfo2_index)]
+    data = data.sample(frac=1, random_state=1)
+
+    #split data
+    train_data = data[0:int(0.8*len(data))]
+    test_data = data[int(0.8*len(data)):]
+
+    return train_data, test_data
+
+def extract_process_data(train_data, X_columns, Y_columns):
+    # encode input data with one-hot encoding
+    # categories = np.unique(train_data[X_columns[0]].values)
+    # X_train = np.zeros((train_data.shape[0], len(X_columns), len(categories)))
+    # for i in range(len(X_columns)):
+    #     column = train_data[X_columns[i]].values
+    #     categories = np.unique(column)
+    #     X_train[:, i, :] = np.array([np.array(item == categories, dtype=int) for item in column])
+    X_train = train_data[X_columns].values
+
+    # normalize output data with minimax
+    scaler = MinMaxScaler()
+    Y_train = scaler.fit_transform(train_data[Y_columns].values) # x_normalized = (x-x_min)/(x_max-x_min)
+    norm_max_list, norm_min_list = scaler.data_max_, scaler.data_min_
+
+    return X_train, Y_train, norm_max_list, norm_min_list
+
+def extract_data(test_data, X_columns, Y_columns):
+    X_test = test_data[X_columns].values
+    Y_test = test_data[Y_columns].values
+
+    return X_test, Y_test
+
+def cal_rmse(pred, test):
+    return np.sqrt(np.mean(np.square(pred - test)))
+
+
+def main_func(args):
+
+    random.seed(args.seed)
+    np.random.seed(args.seed)
+    torch.manual_seed(args.seed)
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.manual_seed(args.seed)
+        torch.cuda.manual_seed_all(args.seed)
+    os.environ['PYTHONHASHSEED'] = str(args.seed)
+    os.environ['OMP_NUM_THREADS'] = '1'
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+    data = pd.read_csv(args.input_file)
+    # print(data)
+
+    train_data, test_data = split(data)
+
+    start_time = time.time()
+
+    if args.method == 'MLP':
+        X_columns = ['src_terminal', 'dest_terminal', 'size', 'is_there_another_pckt_in_queue']
+        Y_columns = ['latency', 'next_packet_delay']
+        X_train, Y_train, norm_max_list, norm_min_list = extract_process_data(train_data, X_columns, Y_columns)
+        X_test, Y_test = extract_data(test_data, X_columns, Y_columns)
+        y_dim = Y_train.shape[1]
+
+        if args.load_model:
+            print("Loading model from disk")
+            mlp = torch.jit.load(args.model_path)
+        else:
+            print("Generating model from scratch")
+            mlp = MLP(args.terminals, args.pck_size, args.h_dim, y_dim, torch.FloatTensor(norm_max_list), torch.FloatTensor(norm_min_list))
+
+        optimizer = torch.optim.Adam(mlp.parameters(), lr=0.001)
+        loss_function = nn.MSELoss()
+        mlp.train()
+
+        all_idx = list(range(len(X_train)))
+        random.shuffle(all_idx)
+
+        batch_size = 1024
+        batch_num = len(X_train) // batch_size if len(X_train) % batch_size == 0 else len(X_train) // batch_size + 1
+
+        for i in range(args.epoch):
+            epoch_loss = 0
+            for batch_idx in range(batch_num):
+                is_final_batch = (batch_idx == (batch_num - 1))
+
+                if not is_final_batch:
+                    idx = all_idx[batch_idx * batch_size: (batch_idx + 1) * batch_size]
+                else:
+                    idx = all_idx[batch_idx * batch_size:]
+
+                x, y = X_train[idx], Y_train[idx]
+
+                x, y = torch.LongTensor(x), torch.FloatTensor(y)
+
+                optimizer.zero_grad()
+                y_pred = mlp(x)
+                loss = loss_function(y_pred, y)
+                loss.backward()
+                optimizer.step()
+                epoch_loss = epoch_loss + loss
+
+            print(i, epoch_loss)
+
+        # EVAL has to be called before saving the state of the network
+        mlp.eval()
+
+        mlp_scripted = torch.jit.script(mlp)
+        mlp_scripted.save(args.model_path)
+
+        X_test = torch.LongTensor(X_test)
+        with torch.no_grad():
+            Y_pred = mlp(X_test).numpy()
+
+        rmse = cal_rmse(Y_pred, Y_test)
+
+    elif args.method == 'Average':
+        train_data = train_data[['src_terminal', 'dest_terminal', 'latency']]
+        test_data = test_data[['src_terminal', 'dest_terminal', 'latency']]
+
+        mean_src_dest = train_data.groupby(['src_terminal', 'dest_terminal']).mean()
+        mean_src = train_data.groupby(['src_terminal']).mean()
+        mean_dest = train_data.groupby(['dest_terminal']).mean()
+        total_avg = train_data.values.mean()
+
+        terminal2terminal = np.zeros((72, 72))
+        # terminal2terminal = np.ones((72, 72)) * total_avg
+        for i, j in product(range(72), range(72)):
+            if mean_src_dest.index.isin([(i, j)]).any():
+                latency = mean_src_dest.loc[(i, j), 'latency']
+            elif mean_src.index.isin([i]).any() == True and  mean_dest.index.isin([j]).any()== False:
+                latency = mean_src.loc[i, 'latency'].item()
+            elif mean_src.index.isin([i]).any() == False and  mean_dest.index.isin([j]).any()== True:
+                latency = mean_dest.loc[j, 'latency'].item()
+            else:
+                latency = total_avg
+            terminal2terminal[i, j] = latency
+
+        items = test_data[['src_terminal', 'dest_terminal']].values
+        src = items[:, 0]
+        dest = items[:, 1]
+        pred = terminal2terminal[src, dest]
+
+        rmse = cal_rmse(pred, test_data['latency'].values)
+
+    end_time = time.time()
+
+    print('rmse:', rmse)
+    print('Time:', end_time - start_time)
+
+    if args.plot_weights:
+        if args.method == 'MLP':
+            with torch.no_grad():
+                terminal2terminal = mlp.weights.numpy()
+                terminal2terminal = terminal2terminal[0, :, :]  # extracting first channel weights
+
+        import matplotlib.pyplot as plt
+        fig, ax = plt.subplots()
+        c = ax.imshow(terminal2terminal, cmap='RdBu', interpolation='nearest')
+        fig.colorbar(c, ax=ax)
+        plt.show()
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="Delay Prediction")
+    parser.add_argument('--method', type=str, default='MLP', choices=['MLP','Average'])
+    parser.add_argument('--epoch', type=int, default=10, help='epochs to train')
+    parser.add_argument('--h-dim', type=int, default=16, help='dimension of the hidden layer')
+    parser.add_argument('--seed', type=int, default=0)
+    parser.add_argument('--pck_size', type=int, default=4096, help='maximum packet size in simulation')
+    parser.add_argument('--terminals', type=int, default=72, help='total number of terminals in the network')
+    parser.add_argument('--input-file', type=Path, default=Path('packet-delays.txt'))
+#    parser.add_argument('--load-model', action=argparse.BooleanOptionalAction, default=False,
+    parser.add_argument('--load-model', action='store_true', default=False,
+                        help='whether to load model from file or start from scratch')
+    parser.add_argument('--model-path', type=Path, default=Path('MLP_Surrogate-combined.pt'))
+#    parser.add_argument('--plot-weights', action=argparse.BooleanOptionalAction, default=False,
+    parser.add_argument('--plot-weights', action='store_true', default=False,
+                        help='whether to show weights from source to destination')
+
+    args = parser.parse_args()
+
+    main_func(args)
diff --git a/src/surrogate/zmqml/model/train.sh b/src/surrogate/zmqml/model/train.sh
new file mode 100644
index 00000000..0b52b953
--- /dev/null
+++ b/src/surrogate/zmqml/model/train.sh
@@ -0,0 +1,3 @@
+python mlpacketdelay.py --method MLP --epoch 50 \
+	   --input-file data/packets-delay.csv \
+	   --model-path ml-model.pt
diff --git a/src/surrogate/zmqml/pyzmqmltest.py b/src/surrogate/zmqml/pydemozmqmlrequester.py
similarity index 74%
rename from src/surrogate/zmqml/pyzmqmltest.py
rename to src/surrogate/zmqml/pydemozmqmlrequester.py
index 01e21d26..feaab869 100755
--- a/src/surrogate/zmqml/pyzmqmltest.py
+++ b/src/surrogate/zmqml/pydemozmqmlrequester.py
@@ -58,7 +58,7 @@ def zmqml_request(cmd, args=None, bindata=b"None"):
 #
 #
 def measure_latency():
-    print("measure latency")
+    print("* measure_latency")
     tss = []
     n = 1000
     for i in range(0,n):
@@ -70,7 +70,7 @@ def measure_latency():
 #
 #
 def test_blocking_sleep():
-    print("sleep")
+    print("* test_blocking_sleep")
 
     target = ["sleep", "1"] # this works like args to main() in C
 
@@ -81,7 +81,7 @@ def test_blocking_sleep():
 #
 #
 def test_nonblocking_sleep():
-    print("test nonblocking")
+    print("* test_nonblocking_sleep")
 
     target = ["sleep", "2"]
 
@@ -101,25 +101,54 @@ def test_nonblocking_sleep():
         cnt = cnt + 1
     print(f"done cnt={cnt}")
 
+
+#
+#
+def test_mlpacketdelay_training():
+    print("* test_mlpacketdelay_training")
+
+    target = ["mlpacketdelay_training", 
+              "--method", "MLP", "--epoch", "1",
+              "--input-file", "model/data/packets-delay.csv",
+              "--model-path", "ml-model.pt"]
+    
+    ret = zmqml_request("launch", target)
+    status = ret["status"]
+    id = ret["id"]
+    print(f'status={status} id={id}')
+
+    cnt = 0
+    while True:
+        ret = zmqml_request("query", [id])
+        status = ret["status"]
+        print(f"status={status}")
+        if status == "done":
+            break
+        time.sleep(.5)
+        cnt = cnt + 1
+    print(f"done cnt={cnt}")
+    
 #
 #
 def test_send_binary():
-    print("test nonblocking")
+    print("* test_send_binary")
 
     data = b""
-    with open('ml-model.pt', 'rb') as f:
+    with open('model/ml-model.pt', 'rb') as f:
         data = f.read()
     
-    ret = zmqml_request("send", ["foobar.dat"], data)
+    ret = zmqml_request("send", ["tmptestsend.dat"], data)
     status = ret["status"]
     print(f"status={status}")
 
     
 if __name__ == "__main__":
+    test_mlpacketdelay_training()
+    
     test_send_binary()
-    measure_latency()
     test_blocking_sleep()
     test_nonblocking_sleep()
-    #test_mlpacketdelay_training()
+    measure_latency()
+
     zmqml_request("exit")
     sys.exit(0)
diff --git a/src/surrogate/zmqml/runmlpacketdelay.py b/src/surrogate/zmqml/runmlpacketdelay.py
index cd9d2b5e..11a1ce33 100644
--- a/src/surrogate/zmqml/runmlpacketdelay.py
+++ b/src/surrogate/zmqml/runmlpacketdelay.py
@@ -4,7 +4,7 @@
 
 from model import mlpacketdelay
 
-def run_training(done_event):
+def run_mlpacketdelay_training(args):
     parser = argparse.ArgumentParser(description="Delay Prediction")
     parser.add_argument('--method', type=str, default='MLP', choices=['MLP','Average'])
     parser.add_argument('--epoch', type=int, default=10, help='epochs to train')
@@ -21,9 +21,11 @@ def run_training(done_event):
     parser.add_argument('--plot-weights', action='store_true', default=False,
                         help='whether to show weights from source to destination')
 
-    args = parser.parse_args(["--method", "MLP", "--epoch", "1", # 50
-                              "--input-file", "model/data/packets-delay.csv",
-                              "--model-path", "ml-model.pt"])
+    # parsed_args = parser.parse_args(["--method", "MLP", "--epoch", "1", # 50
+    #                           "--input-file", "model/data/packets-delay.csv",
+    #                           "--model-path", "model/ml-model.pt"])
+
+    parsed_args = parser.parse_args(args)
+
+    mlpacketdelay.main_func(parsed_args)
 
-    mlpacketdelay.main_func(args)
-    done_event.set()
diff --git a/src/surrogate/zmqml/zmqmlrequester.cpp b/src/surrogate/zmqml/zmqmlrequester.cpp
index 004b4eca..6f43758c 100644
--- a/src/surrogate/zmqml/zmqmlrequester.cpp
+++ b/src/surrogate/zmqml/zmqmlrequester.cpp
@@ -76,11 +76,11 @@ vector<string> zmqml_request(const string& cmd,
         ret.push_back(response["status"].GetString());
 
         if (response.HasMember("et")) {
-            ret.push_back(to_string(response["et"].GetDouble()));
+            ret.push_back(response["et"].GetString());
         }
 
         if (response.HasMember("id")) {
-            ret.push_back(to_string(response["id"].GetInt()));
+            ret.push_back(response["id"].GetString());
         }
     } else {
         ret.push_back("failed");
@@ -90,22 +90,6 @@ vector<string> zmqml_request(const string& cmd,
 }
 
 
-// void test_training() {
-//     std::cout << "test training" << std::endl;
-//     send_cmd("start_training");
-//     int cnt = 0;
-
-//     while (true) {
-//         auto result = send_cmd("status_training");
-//         std::string r = result.first;
-//         if (r == "True") break;
-// 		//cout << r << endl;
-// 		std::this_thread::sleep_for(std::chrono::seconds(1));
-//         ++cnt;
-//     }
-//     std::cout << "done cnt=" << cnt << std::endl;
-// }
-
 
 
 #if 0
diff --git a/src/surrogate/zmqml/zmqmlserver.py b/src/surrogate/zmqml/zmqmlserver.py
index 90ca0088..066b0512 100755
--- a/src/surrogate/zmqml/zmqmlserver.py
+++ b/src/surrogate/zmqml/zmqmlserver.py
@@ -15,7 +15,7 @@
 # from dataclasses import dataclass
 
 # TODO: abstract a mechanism to call training
-from runmlpacketdelay import run_training
+from runmlpacketdelay import run_mlpacketdelay_training
 
 #import os
 #model_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "model"))
@@ -66,14 +66,26 @@ def query(self):
 #
 def launch_sleep(done_event, args):
     if debug:
-        print("Dummy started")
+        print("sleep started")
     time.sleep(int(args[0]))
     if debug:
-        print("Dummy done")
+        print("sleep done")
     done_event.set()
 
+def launch_mlpacketdelay_training(done_event, args):
+    if debug:
+        print("mlpacketdelay_training started")
+
+    run_mlpacketdelay_training(args)
+
+    if debug:
+        print("mlpacketdelay_training done")
+    done_event.set()
+
+    
 list_nonblockingcalls = {
-    "sleep": launch_sleep
+    "sleep": launch_sleep,
+    "mlpacketdelay_training": launch_mlpacketdelay_training,
 }
 
 #
@@ -168,18 +180,18 @@ def zmq_cmd_listener():
             retmsg = {"status":"done"}
         elif cmd == "execute":
             (status, et) = blockingcall(args)
-            retmsg = {"status":status, "et":et}
+            retmsg = {"status":status, "et":str(et)}
         elif cmd == "launch":
             (status, id) = nonblockingcall(args)
-            retmsg = {"status":status, "id":id}
+            retmsg = {"status":status, "id":str(id)}
         elif cmd == "query":
-            targetid = args[0]
+            targetid = int(args[0])
             (status, et) = launched_threads[targetid].query()
-            retmsg = {"status":status, "et":et}
+            retmsg = {"status":status, "et":str(et)}
         elif cmd == "send":
             destfn = args[0]
             (status, et) = receivedata(args, bindata)
-            retmsg = {"status":status, "et":et}
+            retmsg = {"status":status, "et":str(et)}
 
         # send response back to the requester
         socket.send_json(retmsg)

From b51ffbf3dc237210229846efe3f55224a00e3930 Mon Sep 17 00:00:00 2001
From: Kazutomo Yoshii <kazutomo.yoshii@gmail.com>
Date: Tue, 7 May 2024 15:49:24 -0500
Subject: [PATCH 084/188] notes

---
 src/surrogate/zmqml/NOTES.txt | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 src/surrogate/zmqml/NOTES.txt

diff --git a/src/surrogate/zmqml/NOTES.txt b/src/surrogate/zmqml/NOTES.txt
new file mode 100644
index 00000000..1a903673
--- /dev/null
+++ b/src/surrogate/zmqml/NOTES.txt
@@ -0,0 +1,15 @@
+
+Please copy packets-delay.csv to src/surrogate/zmqml/model/data/
+before starting the demo.
+
+You need to open two terminals for this demo.
+
+In the first terminal,
+$ ./zmqmlserver.py
+
+In the second terminal,
+$ ./runcppdemo.sh
+
+
+
+

From 32f04828224f49c2d1537a077f633de925a729f2 Mon Sep 17 00:00:00 2001
From: Kazutomo Yoshii <kazutomo.yoshii@gmail.com>
Date: Tue, 7 May 2024 15:50:42 -0500
Subject: [PATCH 085/188] data location change

---
 src/surrogate/zmqml/NOTES.txt               | 4 ++--
 src/surrogate/zmqml/demozmqmlrequester.cpp  | 2 +-
 src/surrogate/zmqml/pydemozmqmlrequester.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/surrogate/zmqml/NOTES.txt b/src/surrogate/zmqml/NOTES.txt
index 1a903673..5e82b2eb 100644
--- a/src/surrogate/zmqml/NOTES.txt
+++ b/src/surrogate/zmqml/NOTES.txt
@@ -1,6 +1,6 @@
 
-Please copy packets-delay.csv to src/surrogate/zmqml/model/data/
-before starting the demo.
+Please copy packets-delay.csv to src/surrogate/zmqml/model/ before
+starting the demo.
 
 You need to open two terminals for this demo.
 
diff --git a/src/surrogate/zmqml/demozmqmlrequester.cpp b/src/surrogate/zmqml/demozmqmlrequester.cpp
index 0c9ffb35..95e866a4 100644
--- a/src/surrogate/zmqml/demozmqmlrequester.cpp
+++ b/src/surrogate/zmqml/demozmqmlrequester.cpp
@@ -95,7 +95,7 @@ void test_mlpacketdelay_training() {
 
     vector<string> args = {"mlpacketdelay_training", 
                            "--method", "MLP", "--epoch", "1",
-                            "--input-file", "model/data/packets-delay.csv",
+                            "--input-file", "model/packets-delay.csv",
                             "--model-path", "ml-model.pt"};
 
     vector<string> ret = zmqml_request("launch", args);
diff --git a/src/surrogate/zmqml/pydemozmqmlrequester.py b/src/surrogate/zmqml/pydemozmqmlrequester.py
index feaab869..b6929279 100755
--- a/src/surrogate/zmqml/pydemozmqmlrequester.py
+++ b/src/surrogate/zmqml/pydemozmqmlrequester.py
@@ -109,7 +109,7 @@ def test_mlpacketdelay_training():
 
     target = ["mlpacketdelay_training", 
               "--method", "MLP", "--epoch", "1",
-              "--input-file", "model/data/packets-delay.csv",
+              "--input-file", "model/packets-delay.csv",
               "--model-path", "ml-model.pt"]
     
     ret = zmqml_request("launch", target)

From c589d49a7996f03765cf7baabaadbbcd59b4e443 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 8 May 2024 18:48:26 -0400
Subject: [PATCH 086/188] Injecting iteration time as an argument

---
 .../workload-iteration-times/print-iterations.py  |  7 +++++++
 src/network-workloads/model-net-mpi-replay.c      | 15 ++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/scripts/workload-iteration-times/print-iterations.py b/scripts/workload-iteration-times/print-iterations.py
index b60bc5ae..efb637e5 100644
--- a/scripts/workload-iteration-times/print-iterations.py
+++ b/scripts/workload-iteration-times/print-iterations.py
@@ -138,6 +138,7 @@ def avg(it: np.int64) -> np.float64:
     _ = parser.add_argument('--output', type=pathlib.Path, help='Name of output figure', default=None)
     _ = parser.add_argument('--iter-count', dest='iter_count', action='store_true')
     _ = parser.add_argument('--legends', nargs='+', help='Application names', required=False)
+    _ = parser.add_argument('--no-show-plot', dest='show_plot', action='store_false')
     args = parser.parse_args()
 
     if args.output:
@@ -152,6 +153,12 @@ def avg(it: np.int64) -> np.float64:
 
     parsed_logs = parse_iteration_log(args.file)
 
+    final_timestamp = float(max(job['time'].max() for job in parsed_logs.values()))
+    print("Simulation end =", final_timestamp)
+
+    if not args.show_plot:
+        exit(0)
+
     # Creating plot with data
     fig, ax = plt.subplots(figsize=(6, 3), layout="constrained")
     ax.set_xlabel("Total virtual time (ns)")
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 7f9f554b..6a063d6e 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -158,6 +158,10 @@ static double sampling_interval = 5000000;
 static double sampling_end_time = 3000000000;
 static int enable_debug = 0;
 
+// More hardcoded values for surrogate switch
+static int start_iter_skip_app = -1;
+static double avg_time_app = -1.0;
+
 /* set group context */
 struct codes_mctx mapping_context;
 enum MAPPING_CONTEXTS
@@ -1132,9 +1136,9 @@ static int iters_skipped(struct AvgSurrogateSwitchingTimesForApp * avgSur) {
 
 static struct AvgSurrogateSwitchingTimesForApp skip_iter_config[] = {
     // app_id, skip_at_iter, resume_at_iter, time_per_iter, done
-    //{0,  3,  21, 14403235, {false}},
-    //{1,  7,  59,  4982017, {false}},
-    //{1, 79, 195,  3581337, {false}},
+    {0,  3,  21, 14403235, {false}},
+    {1,  7,  59,  4982017, {false}},
+    {1, 79, 195,  3581337, {false}},
 };
 
 struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_state * s) {
@@ -3327,6 +3331,8 @@ const tw_optdef app_opt [] =
     TWOPT_CHAR("lp-io-dir", lp_io_dir, "Where to place io output (unspecified -> no output"),
     TWOPT_UINT("lp-io-use-suffix", lp_io_use_suffix, "Whether to append uniq suffix to lp-io directory (default 0)"),
 	TWOPT_CHAR("offset_file", offset_file, "offset file name"),
+    TWOPT_UINT("start-iter-skip-app", start_iter_skip_app, "Hardcoded value to indicate when to switch to surrogate for app 1"),
+    TWOPT_STIME("avg-time-app", avg_time_app, "Hardcoded value for Avg. iteration time for app 1"),
 #ifdef ENABLE_CORTEX_PYTHON
 	TWOPT_CHAR("cortex-file", cortex_file, "Python file (without .py) containing the CoRtEx translation class"),
 	TWOPT_CHAR("cortex-class", cortex_class, "Python class implementing the CoRtEx translator"),
@@ -3470,6 +3476,9 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
   tw_opt_add(app_opt);
   tw_opt_add(cc_app_opt);
   tw_init(argc, argv);
+  skip_iter_config[2].skip_at_iter = start_iter_skip_app;
+  skip_iter_config[2].time_per_iter = avg_time_app;
+
 #ifdef USE_RDAMARIS
     if(g_st_ross_rank)
     { // keep damaris ranks from running code between here up until tw_end()

From 9f605f09421a8c74da2e308aad9c2fb4f9d71a10 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sun, 7 Jul 2024 19:26:37 -0400
Subject: [PATCH 087/188] Fixing compilation warning
 `incompatible-pointer-types`

This bug was introduced when building the network surrogate. To build
the surrogate, we need to track the input queue "size" (the input
message queue to the routers from the workloads).

If the network surrogate wouldn't live down in specific network models
(it has been implemented right now only on dragonfly-dally), it should
actually reside within the model-net layer, and thus, individual models
shouldn't need to track the state of the input queue.

Hopefully, we can move the network surrogate from dragonfly-dally into
model-net.
---
 src/network-workloads/model-net-mpi-replay.c | 1 +
 src/networks/model-net/dragonfly.c           | 3 ++-
 src/networks/model-net/fattree.c             | 3 ++-
 src/networks/model-net/loggp.c               | 6 ++++--
 src/networks/model-net/simplenet-upd.c       | 7 +++++--
 src/networks/model-net/simplep2p.c           | 6 ++++--
 src/networks/model-net/slimfly.c             | 3 ++-
 src/networks/model-net/torus.c               | 3 ++-
 8 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 1433b2a3..26d31694 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -18,6 +18,7 @@
 #include "codes/quickhash.h"
 #include "codes/codes-jobmap.h"
 #include "codes/congestion-controller-core.h"
+#include "codes/surrogate/init.h"
 
 /* turning on track lp will generate a lot of output messages */
 #define DBG_COMM 1
diff --git a/src/networks/model-net/dragonfly.c b/src/networks/model-net/dragonfly.c
index eb5e81bb..faee79d8 100644
--- a/src/networks/model-net/dragonfly.c
+++ b/src/networks/model-net/dragonfly.c
@@ -1087,7 +1087,8 @@ static tw_stime dragonfly_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt)
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue)
 {
     (void)message_offset;
     (void)sched_params;
diff --git a/src/networks/model-net/fattree.c b/src/networks/model-net/fattree.c
index eb1c49b5..e7db6c61 100644
--- a/src/networks/model-net/fattree.c
+++ b/src/networks/model-net/fattree.c
@@ -1527,7 +1527,8 @@ static tw_stime fattree_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt)
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue)
 {
 #if DEBUG_RC
   packet_event_f++;
diff --git a/src/networks/model-net/loggp.c b/src/networks/model-net/loggp.c
index 22904287..def3eb22 100644
--- a/src/networks/model-net/loggp.c
+++ b/src/networks/model-net/loggp.c
@@ -119,7 +119,8 @@ static tw_stime loggp_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt);
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue);
 static void loggp_packet_event_rc(tw_lp *sender);
 
 tw_stime loggp_recv_msg_event(
@@ -611,7 +612,8 @@ static tw_stime loggp_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt)
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue)
 {
     (void)message_offset;
      tw_event * e_new;
diff --git a/src/networks/model-net/simplenet-upd.c b/src/networks/model-net/simplenet-upd.c
index 5b5edc78..5955f228 100644
--- a/src/networks/model-net/simplenet-upd.c
+++ b/src/networks/model-net/simplenet-upd.c
@@ -100,7 +100,8 @@ static tw_stime simplenet_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt);
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue);
 
 static void simplenet_packet_event_rc(tw_lp *sender);
 
@@ -523,7 +524,8 @@ static tw_stime simplenet_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt)
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue)
 {
      (void)message_offset; // unused...
      (void)sched_params; // unused...
@@ -549,6 +551,7 @@ static tw_stime simplenet_packet_event(
      msg->event_type = SN_MSG_START;
      msg->is_pull = req->is_pull;
      msg->pull_size = req->pull_size;
+     //msg->is_there_another_pckt_in_queue = is_there_another_pckt_in_queue;
 
      /*Fill in simplenet information*/
      if(is_last_pckt) /* Its the last packet so pass in remote event information*/
diff --git a/src/networks/model-net/simplep2p.c b/src/networks/model-net/simplep2p.c
index e7609870..6eb9ac0d 100644
--- a/src/networks/model-net/simplep2p.c
+++ b/src/networks/model-net/simplep2p.c
@@ -132,7 +132,8 @@ static tw_stime simplep2p_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt);
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue);
 
 static void simplep2p_packet_event_rc(tw_lp *sender);
 
@@ -807,7 +808,8 @@ static tw_stime simplep2p_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt)
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue)
 {
     (void)message_offset;
     (void)sched_params;
diff --git a/src/networks/model-net/slimfly.c b/src/networks/model-net/slimfly.c
index da122ec6..94188942 100644
--- a/src/networks/model-net/slimfly.c
+++ b/src/networks/model-net/slimfly.c
@@ -1158,7 +1158,8 @@ static tw_stime slimfly_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt)
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue)
 {
     // printf("slim packet event\n");
 
diff --git a/src/networks/model-net/torus.c b/src/networks/model-net/torus.c
index 7db338e6..6ae6c7e6 100644
--- a/src/networks/model-net/torus.c
+++ b/src/networks/model-net/torus.c
@@ -498,7 +498,8 @@ static tw_stime torus_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt)
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue)
 {
     (void)message_offset; // not using atm...
     (void)sched_params; // not using atm...

From 1df7bb7ce933119f5b54b8a9aa43a6ce6d6acc7d Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 8 Jul 2024 22:17:38 -0400
Subject: [PATCH 088/188] Updating code after ROSS change on gvt hook

---
 codes/surrogate/switch.h |  2 +-
 src/surrogate/init.c     | 12 ++---
 src/surrogate/switch.c   | 97 ++++++++++++++++------------------------
 3 files changed, 42 insertions(+), 69 deletions(-)

diff --git a/codes/surrogate/switch.h b/codes/surrogate/switch.h
index 3a56360a..c538e769 100644
--- a/codes/surrogate/switch.h
+++ b/codes/surrogate/switch.h
@@ -61,7 +61,7 @@ extern struct switch_at_struct switch_at;
 
 
 // Switch
-void director_call(tw_pe * pe, tw_event_sig gvt_sig);
+void director_call(tw_pe * pe);
 
 #ifdef __cplusplus
 }
diff --git a/src/surrogate/init.c b/src/surrogate/init.c
index 16772fc2..4ed587c6 100644
--- a/src/surrogate/init.c
+++ b/src/surrogate/init.c
@@ -64,15 +64,9 @@ void surrogate_configure(
         PRINTF_ONCE("\n");
 
         // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT
-        g_tw_gvt_arbitrary_fun = director_call;
-
-#ifdef USE_RAND_TIEBREAKER
-        tw_event_sig time_stamp = {0};
-        time_stamp.recv_ts = switch_at.time_stampts[0];
-        tw_trigger_arbitrary_fun_at(time_stamp);
-#else
-        tw_trigger_arbitrary_fun_at(switch_at.time_stampts[0]);
-#endif
+        g_tw_gvt_hook = director_call;
+
+        tw_trigger_gvt_hook_at(switch_at.time_stampts[0]);
 
         // freeing timestamps before it dissapears
         for (size_t i = 0; i < len; i++) {
diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c
index 32086f46..f88df3ed 100644
--- a/src/surrogate/switch.c
+++ b/src/surrogate/switch.c
@@ -74,8 +74,9 @@ static inline bool does_any_pe(bool val) {
 //}
 
 
+static void rollback_and_cancel_events_pe(tw_pe * pe) {
 #ifdef USE_RAND_TIEBREAKER
-static void rollback_and_cancel_events_pe(tw_pe * pe, tw_event_sig gvt_sig) {
+    tw_event_sig const gvt_sig = pe->GVT_sig;
     tw_stime const gvt = gvt_sig.recv_ts;
     // Backtracking the simulation to GVT
     for (unsigned int i = 0; i < g_tw_nkp; i++) {
@@ -84,7 +85,7 @@ static void rollback_and_cancel_events_pe(tw_pe * pe, tw_event_sig gvt_sig) {
     assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0);
     assert(pe->GVT_sig.recv_ts == gvt);  // redundant but needed because compiler cries that gvt is never used
 #else
-static void rollback_and_cancel_events_pe(tw_pe * pe, tw_stime gvt) {
+    tw_stime const gvt = pe->GVT;
     // Backtracking the simulation to GVT
     for (unsigned int i = 0; i < g_tw_nkp; i++) {
         tw_kp_rollback_to(g_tw_kp[i], gvt);
@@ -115,11 +116,12 @@ static void rollback_and_cancel_events_pe(tw_pe * pe, tw_stime gvt) {
     }
 }
 
+static void shift_events_to_future_pe(tw_pe * pe) {
 #ifdef USE_RAND_TIEBREAKER
-static void shift_events_to_future_pe(tw_pe * pe, tw_event_sig gvt_sig) {
-    tw_stime gvt = gvt_sig.recv_ts;  // pe->GVT_sig.recv_ts;
+    tw_event_sig gvt_sig = pe->GVT_sig;
+    tw_stime gvt = gvt_sig.recv_ts;
 #else
-static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) {
+    tw_stime gvt = pe->GVT;
 #endif
     tw_event * next_event = tw_pq_dequeue(pe->pq);
 
@@ -165,11 +167,11 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) {
             next_event->recv_ts += switch_offset;
             next_event->sig.recv_ts = next_event->recv_ts;
         }
-        assert(next_event->recv_ts >= g_tw_trigger_arbitrary_fun.sig_at.recv_ts);
+        assert(next_event->recv_ts >= g_tw_trigger_gvt_hook.sig_at.recv_ts);
 #else
             next_event->recv_ts += switch_offset;
         }
-        assert(next_event->recv_ts >= g_tw_trigger_arbitrary_fun.at);
+        assert(next_event->recv_ts >= g_tw_trigger_gvt_hook.at);
 #endif
 
         // store event in deque_events to inject immediately back to the queue
@@ -272,17 +274,18 @@ static tw_event *** order_events_per_lps(tw_pe * pe) {
 // - Looking at all events in the PE, "freezing" those in the network model
 //   and letting the workload events be processed further
 // - Going through every LP and calling their respective functions
+static void events_high_def_to_surrogate_switch(tw_pe * pe) {
 #ifdef USE_RAND_TIEBREAKER
-static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_event_sig gvt) {
+    tw_event_sig gvt_sig = pe->GVT_sig;
 #else
-static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_stime gvt) {
+    tw_stime gvt = pe->GVT;
 #endif
     if (g_tw_synchronization_protocol != OPTIMISTIC && g_tw_synchronization_protocol != SEQUENTIAL) {
         tw_error(TW_LOC, "Sorry, sending packets to the future hasn't been implement in this mode");
     }
 
     tw_event *** lps_events = order_events_per_lps(pe);
-    shift_events_to_future_pe(pe, gvt);
+    shift_events_to_future_pe(pe);
 
     // Going through all LPs in PE and running their specific functions
     for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {
@@ -293,7 +296,7 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_stime gvt) {
         // coincide with current GVT (the current GVT often does not
         // correspond to the (last) time stored in KPs).
 #ifdef USE_RAND_TIEBREAKER
-        lp->kp->last_sig = gvt;
+        lp->kp->last_sig = gvt_sig;
 #else
         lp->kp->last_time = gvt;
 #endif
@@ -321,7 +324,7 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_stime gvt) {
 
     // This will force a global update on all the new remote events (instead of waiting until the next GVT cycle to update events to process)
     if (g_tw_synchronization_protocol == OPTIMISTIC) {
-        rollback_and_cancel_events_pe(pe, gvt);
+        rollback_and_cancel_events_pe(pe);
     }
 
     assert(lps_events[0] != NULL);
@@ -330,12 +333,12 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_stime gvt) {
 }
 
 
+static void events_surrogate_to_high_def_switch(tw_pe * pe) {
 #ifdef USE_RAND_TIEBREAKER
-static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_event_sig gvt) {
+    tw_event_sig gvt_sig = pe->GVT_sig;
 #else
-static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_stime gvt) {
+    tw_stime gvt = pe->GVT;
 #endif
-    (void) pe;
 
     // Going through all LPs in PE and running their specific functions
     for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {
@@ -347,7 +350,7 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_stime gvt) {
         // correspond to the (last) time stored in KPs).
 #ifdef USE_RAND_TIEBREAKER
         tw_event_sig const previous_sig = lp->kp->last_sig;
-        lp->kp->last_sig = gvt;
+        lp->kp->last_sig = gvt_sig;
 #else
         tw_stime const previous_time = lp->kp->last_time;
         lp->kp->last_time = gvt;
@@ -385,12 +388,12 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_stime gvt) {
 // This is an impure function, calling it twice WILL give different results. Only call it once!
 bool hit_trigger(tw_stime gvt) {
     if ( switch_at.current_i < switch_at.total
-        && g_tw_trigger_arbitrary_fun.active == ARBITRARY_FUN_triggered) {
+        && g_tw_trigger_gvt_hook.active == GVT_HOOK_triggered) {
         double const switch_time = switch_at.time_stampts[switch_at.current_i];
 #ifdef USE_RAND_TIEBREAKER
-        assert(g_tw_trigger_arbitrary_fun.sig_at.recv_ts == switch_at.time_stampts[switch_at.current_i]);
+        assert(g_tw_trigger_gvt_hook.sig_at.recv_ts == switch_at.time_stampts[switch_at.current_i]);
 #else
-        assert(g_tw_trigger_arbitrary_fun.at == switch_at.time_stampts[switch_at.current_i]);
+        assert(g_tw_trigger_gvt_hook.at == switch_at.time_stampts[switch_at.current_i]);
 #endif
         assert(gvt >= switch_time);  // current gvt shouldn't be that far ahead from the point we wanted to trigger it
 
@@ -398,15 +401,8 @@ bool hit_trigger(tw_stime gvt) {
         if (++switch_at.current_i < switch_at.total) {
             double const next_switch = switch_at.time_stampts[switch_at.current_i];
             // Setting trigger for next switch
-    #ifdef USE_RAND_TIEBREAKER
-            tw_event_sig time_stamp = {0};
-            time_stamp.recv_ts = next_switch;
             //printf("Adding a trigger to activate next switch!\n");
-            tw_trigger_arbitrary_fun_at(time_stamp);
-    #else
-            //printf("Adding a trigger to activate next switch!\n");
-            tw_trigger_arbitrary_fun_at(next_switch);
-    #endif
+            tw_trigger_gvt_hook_at(next_switch);
         }
         //
         return true;
@@ -416,25 +412,15 @@ bool hit_trigger(tw_stime gvt) {
 }
 
 
-#ifdef USE_RAND_TIEBREAKER
-void switch_model(tw_pe * pe, tw_event_sig gvt_sig) {
-#else
-void switch_model(tw_pe * pe, tw_stime gvt) {
-#endif
+void switch_model(tw_pe * pe) {
     // Rollback if in optimistic mode
 #ifdef USE_RAND_TIEBREAKER
     if (g_tw_synchronization_protocol == OPTIMISTIC) {
-        assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0);
-        rollback_and_cancel_events_pe(pe, gvt_sig);
-        //assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) <= 0);
-        assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0);
+        rollback_and_cancel_events_pe(pe);
     }
 #else
     if (g_tw_synchronization_protocol == OPTIMISTIC) {
-        assert(pe->GVT == gvt);
-        rollback_and_cancel_events_pe(pe, gvt);
-        //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0);
-        assert(pe->GVT == gvt);
+        rollback_and_cancel_events_pe(pe);
     }
 #endif
     surr_config.director.switch_surrogate();
@@ -446,30 +432,23 @@ void switch_model(tw_pe * pe, tw_stime gvt) {
     if (freeze_network_on_switch) {
         if (surr_config.director.is_surrogate_on()) {
             model_net_method_switch_to_surrogate();
-#ifdef USE_RAND_TIEBREAKER
-            events_high_def_to_surrogate_switch(pe, gvt_sig);
-#else
-            events_high_def_to_surrogate_switch(pe, gvt);
-#endif
+            events_high_def_to_surrogate_switch(pe);
         } else {
             model_net_method_switch_to_highdef();
-#ifdef USE_RAND_TIEBREAKER
-            events_surrogate_to_high_def_switch(pe, gvt_sig);
-#else
-            events_surrogate_to_high_def_switch(pe, gvt);
-#endif
+            events_surrogate_to_high_def_switch(pe);
         }
     }
 }
 
 
+void director_call(tw_pe * pe) {
+    assert(is_surrogate_configured);
+
 #ifdef USE_RAND_TIEBREAKER
-void director_call(tw_pe * pe, tw_event_sig gvt_sig) {
-    tw_stime const gvt = gvt_sig.recv_ts;
+    tw_stime gvt = pe->GVT_sig.recv_ts;
 #else
-void director_call(tw_pe * pe, tw_stime gvt) {
+    tw_stime gvt = pe->GVT;
 #endif
-    assert(is_surrogate_configured);
 
     static int i = 0;
     if (g_tw_mynode == 0) {
@@ -481,14 +460,14 @@ void director_call(tw_pe * pe, tw_stime gvt) {
             printf("GVT %d at %f in %s arbitrary-fun-status=", i++, gvt,
                     surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition");
 
-            switch (g_tw_trigger_arbitrary_fun.active) {
-                case ARBITRARY_FUN_enabled:
+            switch (g_tw_trigger_gvt_hook.active) {
+                case GVT_HOOK_enabled:
                     printf("enabled\n");
                     break;
-                case ARBITRARY_FUN_disabled:
+                case GVT_HOOK_disabled:
                     printf("disabled\n");
                     break;
-                case ARBITRARY_FUN_triggered:
+                case GVT_HOOK_triggered:
                     printf("triggered\n");
                     break;
             }
@@ -527,7 +506,7 @@ void director_call(tw_pe * pe, tw_stime gvt) {
     }
 
     double const start = tw_clock_read();
-    switch_model(pe, gvt_sig);
+    switch_model(pe);
     double const end = tw_clock_read();
     surrogate_switching_time += end - start;
 

From 6af7eb115ab996ead941a231a759f2a09684c71b Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sun, 7 Jul 2024 19:26:37 -0400
Subject: [PATCH 089/188] Fixing compilation warning
 `incompatible-pointer-types`

This bug was introduced when building the network surrogate. To build
the surrogate, we need to track the input queue "size" (the input
message queue to the routers from the workloads).

If the network surrogate wouldn't live down in specific network models
(it has been implemented right now only on dragonfly-dally), it should
actually reside within the model-net layer, and thus, individual models
shouldn't need to track the state of the input queue.

Hopefully, we can move the network surrogate from dragonfly-dally into
model-net.
---
 src/network-workloads/model-net-mpi-replay.c | 1 +
 src/networks/model-net/dragonfly.c           | 3 ++-
 src/networks/model-net/fattree.c             | 3 ++-
 src/networks/model-net/loggp.c               | 6 ++++--
 src/networks/model-net/simplenet-upd.c       | 7 +++++--
 src/networks/model-net/simplep2p.c           | 6 ++++--
 src/networks/model-net/slimfly.c             | 3 ++-
 src/networks/model-net/torus.c               | 3 ++-
 8 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 6a063d6e..bf6860b9 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -18,6 +18,7 @@
 #include "codes/quickhash.h"
 #include "codes/codes-jobmap.h"
 #include "codes/congestion-controller-core.h"
+#include "codes/surrogate/init.h"
 
 /* turning on track lp will generate a lot of output messages */
 #define DBG_COMM 1
diff --git a/src/networks/model-net/dragonfly.c b/src/networks/model-net/dragonfly.c
index eb5e81bb..faee79d8 100644
--- a/src/networks/model-net/dragonfly.c
+++ b/src/networks/model-net/dragonfly.c
@@ -1087,7 +1087,8 @@ static tw_stime dragonfly_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt)
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue)
 {
     (void)message_offset;
     (void)sched_params;
diff --git a/src/networks/model-net/fattree.c b/src/networks/model-net/fattree.c
index eb1c49b5..e7db6c61 100644
--- a/src/networks/model-net/fattree.c
+++ b/src/networks/model-net/fattree.c
@@ -1527,7 +1527,8 @@ static tw_stime fattree_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt)
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue)
 {
 #if DEBUG_RC
   packet_event_f++;
diff --git a/src/networks/model-net/loggp.c b/src/networks/model-net/loggp.c
index 22904287..def3eb22 100644
--- a/src/networks/model-net/loggp.c
+++ b/src/networks/model-net/loggp.c
@@ -119,7 +119,8 @@ static tw_stime loggp_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt);
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue);
 static void loggp_packet_event_rc(tw_lp *sender);
 
 tw_stime loggp_recv_msg_event(
@@ -611,7 +612,8 @@ static tw_stime loggp_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt)
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue)
 {
     (void)message_offset;
      tw_event * e_new;
diff --git a/src/networks/model-net/simplenet-upd.c b/src/networks/model-net/simplenet-upd.c
index 5b5edc78..5955f228 100644
--- a/src/networks/model-net/simplenet-upd.c
+++ b/src/networks/model-net/simplenet-upd.c
@@ -100,7 +100,8 @@ static tw_stime simplenet_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt);
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue);
 
 static void simplenet_packet_event_rc(tw_lp *sender);
 
@@ -523,7 +524,8 @@ static tw_stime simplenet_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt)
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue)
 {
      (void)message_offset; // unused...
      (void)sched_params; // unused...
@@ -549,6 +551,7 @@ static tw_stime simplenet_packet_event(
      msg->event_type = SN_MSG_START;
      msg->is_pull = req->is_pull;
      msg->pull_size = req->pull_size;
+     //msg->is_there_another_pckt_in_queue = is_there_another_pckt_in_queue;
 
      /*Fill in simplenet information*/
      if(is_last_pckt) /* Its the last packet so pass in remote event information*/
diff --git a/src/networks/model-net/simplep2p.c b/src/networks/model-net/simplep2p.c
index e7609870..6eb9ac0d 100644
--- a/src/networks/model-net/simplep2p.c
+++ b/src/networks/model-net/simplep2p.c
@@ -132,7 +132,8 @@ static tw_stime simplep2p_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt);
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue);
 
 static void simplep2p_packet_event_rc(tw_lp *sender);
 
@@ -807,7 +808,8 @@ static tw_stime simplep2p_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt)
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue)
 {
     (void)message_offset;
     (void)sched_params;
diff --git a/src/networks/model-net/slimfly.c b/src/networks/model-net/slimfly.c
index da122ec6..94188942 100644
--- a/src/networks/model-net/slimfly.c
+++ b/src/networks/model-net/slimfly.c
@@ -1158,7 +1158,8 @@ static tw_stime slimfly_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt)
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue)
 {
     // printf("slim packet event\n");
 
diff --git a/src/networks/model-net/torus.c b/src/networks/model-net/torus.c
index 7db338e6..6ae6c7e6 100644
--- a/src/networks/model-net/torus.c
+++ b/src/networks/model-net/torus.c
@@ -498,7 +498,8 @@ static tw_stime torus_packet_event(
         void const * remote_event,
         void const * self_event,
         tw_lp *sender,
-        int is_last_pckt)
+        int is_last_pckt,
+        bool is_there_another_pckt_in_queue)
 {
     (void)message_offset; // not using atm...
     (void)sched_params; // not using atm...

From 472cc5ae7f99012661f62333c195bd55d0951b2a Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 23 Jan 2025 15:01:46 -0500
Subject: [PATCH 090/188] Removing hardcoded test and we can pass a config file
 now

The configuration file should be of the form:
> %d %d %d %f
where each value corresponds to
> job_id skip_at_iter resume_at_iter time_per_iter

The configuration file is passed through the --skipping-iterations-file
parameter.
---
 src/network-workloads/model-net-mpi-replay.c | 158 ++++++++++++++++---
 1 file changed, 137 insertions(+), 21 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index bf6860b9..ab5d7e62 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -5,6 +5,7 @@
  */
 #include <ross.h>
 #include <inttypes.h>
+#include <stddef.h>
 #include <sys/stat.h>
 #include <sys/resource.h>
 #include "codes/codes-workload.h"
@@ -101,6 +102,7 @@ int period_count[MAX_JOBS];
 long period_time[MAX_JOBS][64];
 float period_interval[MAX_JOBS][64];
 char file_name_of_job[MAX_JOBS][8192];
+char skipping_iterations_file[8192];
 
 tw_stime max_elapsed_time_per_job[MAX_JOBS] = {0};
 
@@ -159,9 +161,9 @@ static double sampling_interval = 5000000;
 static double sampling_end_time = 3000000000;
 static int enable_debug = 0;
 
-// More hardcoded values for surrogate switch
-static int start_iter_skip_app = -1;
-static double avg_time_app = -1.0;
+// We can skip multiple iterations using an average as our predicted iteration time. This will skip ahead to a future step in the simulation
+static struct AvgSurrogateSwitchingTimesForApp *skip_iter_config;
+static size_t skip_iter_config_size = 0;
 
 /* set group context */
 struct codes_mctx mapping_context;
@@ -371,6 +373,10 @@ struct nw_state
     char output_buf[512];
     char col_stats[64];
     struct ross_model_sample ross_sample;
+
+    // Configuration to tell the node when to skip some iterations
+    struct AvgSurrogateSwitchingTimesForApp *switch_config;
+    size_t switch_config_size;
 };
 
 /* data for handling reverse computation.
@@ -1128,25 +1134,41 @@ struct AvgSurrogateSwitchingTimesForApp {
     int skip_at_iter;
     int resume_at_iter;
     double time_per_iter;
-    bool done[72]; // This is a flag to indicate whethe we already completed this skipping stage
+    bool done; // This is a flag to indicate whethe we already completed this skipping stage
 };
 
+static int comp_AvgSurrogateSwitchingTimesForApp(
+    struct AvgSurrogateSwitchingTimesForApp *left,
+    struct AvgSurrogateSwitchingTimesForApp *right
+) {
+    if (left->app_id < right->app_id) {
+        return -1;
+    }
+    if (left->app_id > right->app_id) {
+        return 1;
+    }
+    // else: left->app_id == right->app_id
+
+    if (left->skip_at_iter < right->skip_at_iter) {
+        return -1;
+    }
+    if (left->skip_at_iter > right->skip_at_iter) {
+        return 1;
+    }
+
+    return 0;
+}
+
 static int iters_skipped(struct AvgSurrogateSwitchingTimesForApp * avgSur) {
     return avgSur->resume_at_iter - avgSur->skip_at_iter;
 }
 
-static struct AvgSurrogateSwitchingTimesForApp skip_iter_config[] = {
-    // app_id, skip_at_iter, resume_at_iter, time_per_iter, done
-    {0,  3,  21, 14403235, {false}},
-    {1,  7,  59,  4982017, {false}},
-    {1, 79, 195,  3581337, {false}},
-};
-
-struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_state * s) {
-    int n_jumps = (sizeof(skip_iter_config)/sizeof(skip_iter_config[0]));
-    for (int i=0; i < n_jumps; i++) {
-        struct AvgSurrogateSwitchingTimesForApp * jump = &skip_iter_config[i];
-        if (!jump->done[s->local_rank] && jump->app_id == s->app_id) {
+static struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_state * s) {
+    assert(s->switch_config != NULL);
+    for (int i=0; i < s->switch_config_size; i++) {
+        struct AvgSurrogateSwitchingTimesForApp * jump = &s->switch_config[i];
+        assert(jump->app_id == s->app_id);
+        if (!jump->done) {
             return jump;
         }
     }
@@ -1188,7 +1210,7 @@ static void skip_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m)
         }
     }
 
-    switch_config->done[s->local_rank] = true;
+    switch_config->done = true;
 
     tw_event *e = tw_event_new(lp->gid, 0.0, lp);
     nw_message* msg = (nw_message*) tw_event_data(e);
@@ -2669,6 +2691,32 @@ void nw_test_init(nw_state* s, tw_lp* lp)
                    " num_sends num_bytes_sent sample_end_time");
        }
    }
+
+   if (skip_iter_config_size > 0) {
+       size_t size = 0;
+       // Finding number of times to skip for this job
+       for (size_t i = 0; i < skip_iter_config_size; i++) {
+           if (lid.job == skip_iter_config[i].app_id) {
+               size++;
+           }
+       }
+       // Constructing switch_config
+       s->switch_config_size = size;
+       if (size > 0) {
+          s->switch_config = malloc(size * sizeof(struct AvgSurrogateSwitchingTimesForApp));
+          size_t j = 0;
+          for (size_t i = 0; i < skip_iter_config_size; i++) {
+              if (lid.job == skip_iter_config[i].app_id) {
+                  s->switch_config[j] = skip_iter_config[i];
+                  j++;
+              }
+          }
+       }
+   } else {
+       s->switch_config = NULL;
+       s->switch_config_size = 0;
+   }
+
    return;
 }
 
@@ -2810,6 +2858,7 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
 
         case SURR_SKIP_ITERATION:
             skip_iteration(s, lp, bf, m);
+            break;
 	}
 }
 
@@ -3192,6 +3241,10 @@ void nw_test_finalize(nw_state* s, tw_lp* lp)
 //	    rc_stack_destroy(s->indices);
 	    rc_stack_destroy(s->processed_ops);
 	    rc_stack_destroy(s->processed_wait_op);
+
+    if (s->switch_config != NULL) {
+        free(s->switch_config);
+    }
 }
 
 void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
@@ -3261,6 +3314,10 @@ void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * l
         case CLI_OTHER_FINISH:
             handle_other_finish_rc(s, lp, bf, m);
             break;
+
+        case SURR_SKIP_ITERATION:
+            skip_iteration_rc(s, lp, bf, m);
+            break;
 	}
 }
 
@@ -3332,13 +3389,12 @@ const tw_optdef app_opt [] =
     TWOPT_CHAR("lp-io-dir", lp_io_dir, "Where to place io output (unspecified -> no output"),
     TWOPT_UINT("lp-io-use-suffix", lp_io_use_suffix, "Whether to append uniq suffix to lp-io directory (default 0)"),
 	TWOPT_CHAR("offset_file", offset_file, "offset file name"),
-    TWOPT_UINT("start-iter-skip-app", start_iter_skip_app, "Hardcoded value to indicate when to switch to surrogate for app 1"),
-    TWOPT_STIME("avg-time-app", avg_time_app, "Hardcoded value for Avg. iteration time for app 1"),
 #ifdef ENABLE_CORTEX_PYTHON
 	TWOPT_CHAR("cortex-file", cortex_file, "Python file (without .py) containing the CoRtEx translation class"),
 	TWOPT_CHAR("cortex-class", cortex_class, "Python class implementing the CoRtEx translator"),
 	TWOPT_CHAR("cortex-gen", cortex_gen, "Python function to pre-generate MPI events"),
 #endif
+	TWOPT_CHAR("skipping-iterations-file", skipping_iterations_file, "Configuration file name for which steps to skip"),
 	TWOPT_END()
 };
 
@@ -3477,8 +3533,6 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
   tw_opt_add(app_opt);
   tw_opt_add(cc_app_opt);
   tw_init(argc, argv);
-  skip_iter_config[2].skip_at_iter = start_iter_skip_app;
-  skip_iter_config[2].time_per_iter = avg_time_app;
 
 #ifdef USE_RDAMARIS
     if(g_st_ross_rank)
@@ -3629,6 +3683,64 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
     }
 
 
+    // Loading surrogacy configuration
+    if(strlen(skipping_iterations_file) > 0) {
+        FILE *file = fopen(skipping_iterations_file, "r");
+        if(!file) {
+            tw_error(TW_LOC, "\n Could not open file %s ", workloads_conf_file);
+        }
+
+        // Finding number of skipping iteration rows
+        int i = 0;
+        for(; !feof(file); i++) {
+            struct AvgSurrogateSwitchingTimesForApp skip_row;
+
+            int ref = fscanf(file, "%d %d %d %lf", &skip_row.app_id, &skip_row.skip_at_iter, &skip_row.resume_at_iter, &skip_row.time_per_iter);
+
+            if (ref != 4) { // We couldn't read all four values
+                fprintf(stderr, "Warning: Couldn't read a row of 'skipping-iterations-file'. Stopping after reading %d rows.\n", i);
+                break;
+            }
+        }
+
+        skip_iter_config_size = i;
+
+        skip_iter_config = malloc(skip_iter_config_size * sizeof(struct AvgSurrogateSwitchingTimesForApp));
+
+        fseek(file, 0, SEEK_SET);
+        for(i = 0; !feof(file); i++) {
+            struct AvgSurrogateSwitchingTimesForApp *skip_row = &skip_iter_config[i];
+
+            int ref = fscanf(file, "%d %d %d %lf", &skip_row->app_id, &skip_row->skip_at_iter, &skip_row->resume_at_iter, &skip_row->time_per_iter);
+
+            skip_row->done = false;
+
+            if (ref != 4) { // We couldn't read all four values
+                break;
+            }
+        }
+        assert(i == skip_iter_config_size);
+        fclose(file);
+
+        // Sorting. To skip iterations we asume that all skips for a specific job appear in increasing order
+        qsort(
+            skip_iter_config,
+            skip_iter_config_size,
+            sizeof(struct AvgSurrogateSwitchingTimesForApp),
+            (int (*)(const void *, const void *)) comp_AvgSurrogateSwitchingTimesForApp);
+
+        // Printing configuration
+        if(!g_tw_mynode && skip_iter_config_size) {
+            printf("\n\nConfiguration for skipping selected iterations of one or more jobs has been loaded.\n");
+            printf("| job_id skip_at_iter resume_at_iter time_per_iter\n");
+            for (size_t i=0; i<skip_iter_config_size; i++) {
+                struct AvgSurrogateSwitchingTimesForApp *skip_row = &skip_iter_config[i];
+                printf("| %d %d %d %lf\n", skip_row->app_id, skip_row->skip_at_iter, skip_row->resume_at_iter, skip_row->time_per_iter);
+            }
+            printf("\n");
+        }
+    }
+
     MPI_Comm_rank(MPI_COMM_CODES, &rank);
     MPI_Comm_size(MPI_COMM_CODES, &nprocs);
 
@@ -3813,6 +3925,10 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
    if(alloc_spec)
        codes_jobmap_destroy(jobmap_ctx);
 
+   if (skip_iter_config != NULL) {
+       free(skip_iter_config);
+   }
+
    print_surrogate_stats();
 
 #ifdef USE_RDAMARIS

From 57fc7e3aa03dd4221d5c2393c0b34b17a49a8edf Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 24 Jan 2025 07:05:29 -0500
Subject: [PATCH 091/188] Fixing a memory bug when reading from file

Reading data from `skipping_iterations_file` happens at two stages,
first we find how much data to load into memory, then we malloc the
space and load the data. One extra row of data had been loaded, which
overwrote a couple of bytes for some other structure. This ocassionally
would mean a segfault (which only showed up when running the simulation
in parallel).
---
 src/network-workloads/model-net-mpi-replay.c | 40 ++++++++++----------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index ab5d7e62..d0dea3ec 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -426,6 +426,7 @@ struct nw_message
        int saved_syn_length;
        unsigned long saved_prev_switch;
        double saved_prev_max_time;
+       struct AvgSurrogateSwitchingTimesForApp * switch_config_used;
    } rc;
 };
 
@@ -1176,33 +1177,33 @@ static struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_sta
 }
 
 static void skip_iteration_rc(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) {
-    // TODO: implement!!
+    m->rc.switch_config_used->done = false;
 }
 
-static void skip_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m)
+static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m)
 {
-	struct codes_workload_op * mpi_op = (struct codes_workload_op*) malloc(sizeof(struct codes_workload_op));
-    m->mpi_op = mpi_op;
+    struct codes_workload_op mpi_op;
 
     struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s);
     assert(switch_config != NULL);
     int const resume_at_iter = switch_config->resume_at_iter;
+    m->rc.switch_config_used = switch_config;
 
     // consuming all events until indicated iteration is reached
     bool reached_end = false;
     while (!reached_end) {
-        codes_workload_get_next(wrkld_id, s->app_id, s->local_rank, mpi_op);
+        codes_workload_get_next(wrkld_id, s->app_id, s->local_rank, &mpi_op);
 
-        switch (mpi_op->op_type) {
+        switch (mpi_op.op_type) {
             case CODES_WK_MARK:
-                if (mpi_op->u.send.tag == resume_at_iter) {
+                if (mpi_op.u.send.tag == resume_at_iter) {
                     reached_end = true;
-                    codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, mpi_op);
+                    codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, &mpi_op);
                 }
                 break;
             // If we reach the end of simulation, rollback once to allow the operation to be processed normally
             case CODES_WK_END:
-                codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, mpi_op);
+                codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, &mpi_op);
                 reached_end = true;
                 break;
             default:
@@ -1226,7 +1227,7 @@ static bool have_we_hit_surrogate_switch(struct nw_state* s, struct codes_worklo
     return false;
 }
 
-static double time_to_skip_iterations(struct nw_state* s, struct codes_workload_op * mpi_op) {
+static double time_to_skip_iterations(struct nw_state* s) {
     struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s);
     assert(switch_config != NULL);
     return switch_config->time_per_iter * iters_skipped(switch_config);
@@ -2857,7 +2858,7 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
             break;
 
         case SURR_SKIP_ITERATION:
-            skip_iteration(s, lp, bf, m);
+            skip_to_iteration(s, lp, bf, m);
             break;
 	}
 }
@@ -3096,7 +3097,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
 
                 // If we have reached the surrogate switch time, skip next iteration(s)
                 if (have_we_hit_surrogate_switch(s, mpi_op)) {
-                    tw_event *e = tw_event_new(lp->gid, time_to_skip_iterations(s, mpi_op), lp);
+                    tw_event *e = tw_event_new(lp->gid, time_to_skip_iterations(s), lp);
                     nw_message* msg = (nw_message*) tw_event_data(e);
                     msg->msg_type = SURR_SKIP_ITERATION;
                     tw_event_send(e);
@@ -3356,6 +3357,8 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp
 
             free(m->mpi_op);
         break;
+        case SURR_SKIP_ITERATION:
+            break;
     }
 }
 
@@ -3683,7 +3686,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
     }
 
 
-    // Loading surrogacy configuration
+    // Loading skipping iterations configuration
     if(strlen(skipping_iterations_file) > 0) {
         FILE *file = fopen(skipping_iterations_file, "r");
         if(!file) {
@@ -3707,19 +3710,14 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
 
         skip_iter_config = malloc(skip_iter_config_size * sizeof(struct AvgSurrogateSwitchingTimesForApp));
 
+        // Loading in memory all times to skip iterations
         fseek(file, 0, SEEK_SET);
-        for(i = 0; !feof(file); i++) {
+        for(i = 0; i < skip_iter_config_size; i++) {
             struct AvgSurrogateSwitchingTimesForApp *skip_row = &skip_iter_config[i];
 
-            int ref = fscanf(file, "%d %d %d %lf", &skip_row->app_id, &skip_row->skip_at_iter, &skip_row->resume_at_iter, &skip_row->time_per_iter);
-
+            fscanf(file, "%d %d %d %lf", &skip_row->app_id, &skip_row->skip_at_iter, &skip_row->resume_at_iter, &skip_row->time_per_iter);
             skip_row->done = false;
-
-            if (ref != 4) { // We couldn't read all four values
-                break;
-            }
         }
-        assert(i == skip_iter_config_size);
         fclose(file);
 
         // Sorting. To skip iterations we asume that all skips for a specific job appear in increasing order

From 2711b6bcb71281930401b62eedff27287c3c5152 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 24 Jan 2025 08:30:25 -0500
Subject: [PATCH 092/188] Allowing to run without skipping configuration file

---
 src/network-workloads/model-net-mpi-replay.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index d0dea3ec..b62715c3 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -1165,7 +1165,9 @@ static int iters_skipped(struct AvgSurrogateSwitchingTimesForApp * avgSur) {
 }
 
 static struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_state * s) {
-    assert(s->switch_config != NULL);
+    if (s->switch_config == NULL) {
+        return NULL;
+    }
     for (int i=0; i < s->switch_config_size; i++) {
         struct AvgSurrogateSwitchingTimesForApp * jump = &s->switch_config[i];
         assert(jump->app_id == s->app_id);

From 1412a4e9a943a0df40cdc6f0af9c4ac92c4bf026 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 24 Jan 2025 15:50:55 -0500
Subject: [PATCH 093/188] Saving apps iteration logs into single files per PE

---
 src/network-workloads/model-net-mpi-replay.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index b62715c3..d5d1b8b1 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -3760,7 +3760,21 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
    modelnet_mpi_replay_read_config();
 
    //Xin: output iteration time into log file
-   iteration_log = fopen("iteration-logs", "w+");
+
+   char const iteration_dir[] = "iteration-logs";
+   if (!g_tw_mynode) {
+        int ret = mkdir("iteration-logs", 0775);
+        if(ret != 0)
+        {
+            tw_error(TW_LOC, "mkdir(\"%s/\")", iteration_dir);
+        }
+   }
+   MPI_Barrier(MPI_COMM_CODES);
+   int buffer_size = snprintf(NULL, 0, "%s/pe=%d.txt", iteration_dir, g_tw_mynode) + 1;
+   char *iteration_log_path = malloc(buffer_size);
+   snprintf(iteration_log_path, buffer_size, "%s/pe=%d.txt", iteration_dir, g_tw_mynode);
+   iteration_log = fopen(iteration_log_path, "w+");
+   free(iteration_log_path);
    if(!iteration_log)
    {
        printf("\n Error logging iteration times... quitting ");

From bb5b369fe11280afa0a9a00fb71c707370581793 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sat, 25 Jan 2025 15:28:38 -0500
Subject: [PATCH 094/188] Guaranteeing that "workload period" config works in
 parallel

---
 src/network-workloads/model-net-mpi-replay.c | 25 ++++++++++++++------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index d5d1b8b1..364707f6 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -37,6 +37,7 @@
 #define BAR_TAG 1234
 #define PRINT_SYNTH_TRAFFIC 1
 #define MAX_JOBS 64
+#define MAX_PERIODS_PER_APP 512
 #define NEAR_ZERO .0001 //timestamp for use to be 'close to zero' but still allow progress, zero offset events are hard on the PDES engine
 #define OUTPUT_MARKS 0
 
@@ -99,8 +100,8 @@ float mean_interval_of_job[MAX_JOBS];
 long job_timer1[MAX_JOBS];
 long job_timer2[MAX_JOBS];
 int period_count[MAX_JOBS];
-long period_time[MAX_JOBS][64];
-float period_interval[MAX_JOBS][64];
+long period_time[MAX_JOBS][MAX_PERIODS_PER_APP];
+float period_interval[MAX_JOBS][MAX_PERIODS_PER_APP];
 char file_name_of_job[MAX_JOBS][8192];
 char skipping_iterations_file[8192];
 
@@ -2672,8 +2673,7 @@ void nw_test_init(nw_state* s, tw_lp* lp)
 			e2 = tw_event_new(lp->gid, ts2, lp);
 			m_new2 = (nw_message*)tw_event_data(e2);
 			m_new2->msg_type = CLI_BCKGND_CHANGE;
-			m_new2->fwd.msg_send_time = period_interval[lid.job][k];
-			m_new2->rc.saved_send_time = mean_interval_of_job[s->app_id];
+			m_new2->fwd.msg_send_time = period_interval[lid.job][k];  // Warning: this is overwriting a variable meant for message type MPI_SEND_ARRIVED_CB
 			tw_event_send(e2);
 		}
 	}
@@ -2839,9 +2839,10 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
         break;
 
         case CLI_BCKGND_CHANGE:
-		mean_interval_of_job[s->app_id] = m->fwd.msg_send_time;
-		printf("======== CHANGE [now: %lf] App:%d | Interval: %f\n", tw_now(lp), s->app_id, mean_interval_of_job[s->app_id]);
-	break;
+            m->rc.saved_send_time = mean_interval_of_job[s->app_id];  // Warning: this is overwriting a variable meant for message type MPI_OP_GET_NEXT (specifically CODES_WK_ALLREDUCE) and CLI_BCKGND_ARRIVE
+            mean_interval_of_job[s->app_id] = m->fwd.msg_send_time;
+            m->rc.saved_marker_time = tw_now(lp);
+            break;
 
         case CLI_BCKGND_ARRIVE:
             arrive_syn_tr(s, bf, m, lp);
@@ -3361,6 +3362,10 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp
         break;
         case SURR_SKIP_ITERATION:
             break;
+
+        case CLI_BCKGND_CHANGE:
+            printf("======== CHANGE [now: %lf] App|Job:%d | Period: %f\n", m->rc.saved_marker_time, s->app_id, m->fwd.msg_send_time);
+            break;
     }
 }
 
@@ -3651,7 +3656,13 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
             char ref2 = '\n';
             while(!feof(period_file))
             {
+                if (j >= MAX_JOBS) {
+                    tw_error(TW_LOC, "Exceeded number of max workloads in workloads period file. Max: %d", MAX_JOBS);
+                }
                 ref2 = fscanf(period_file, "%d", &period_count[j]);
+                if (period_count[j] > MAX_PERIODS_PER_APP) {
+                    tw_error(TW_LOC, "Too many periods for workload app %d", period_count[j]);
+                }
                 if(ref2 != EOF){
                     printf("======== [ID: %d] Period count: %d\n", j, period_count[j]);
                     for(int k = 0; k < period_count[j]; k++){

From a4e052a4f483064d7a9ebf071b2ce1592b94fb41 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sat, 25 Jan 2025 16:22:34 -0500
Subject: [PATCH 095/188] Changing time in period file to double (from long)

---
 src/network-workloads/model-net-mpi-replay.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 364707f6..a68bc0c4 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -100,7 +100,7 @@ float mean_interval_of_job[MAX_JOBS];
 long job_timer1[MAX_JOBS];
 long job_timer2[MAX_JOBS];
 int period_count[MAX_JOBS];
-long period_time[MAX_JOBS][MAX_PERIODS_PER_APP];
+double period_time[MAX_JOBS][MAX_PERIODS_PER_APP];
 float period_interval[MAX_JOBS][MAX_PERIODS_PER_APP];
 char file_name_of_job[MAX_JOBS][8192];
 char skipping_iterations_file[8192];
@@ -3666,8 +3666,8 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
                 if(ref2 != EOF){
                     printf("======== [ID: %d] Period count: %d\n", j, period_count[j]);
                     for(int k = 0; k < period_count[j]; k++){
-                        fscanf(period_file, "%ld:%f", &period_time[j][k], &period_interval[j][k]);
-                        printf("======== [ID: %d] Period time and interval: %ld and %f\n", j, period_time[j][k], period_interval[j][k]);
+                        fscanf(period_file, "%lf:%f", &period_time[j][k], &period_interval[j][k]);
+                        printf("======== [ID: %d] Period time and interval: %lf and %f\n", j, period_time[j][k], period_interval[j][k]);
                     }
                 }
                 j++;

From 795628ddafb8873ac81c39d2503218411dbda02d Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 18 Feb 2025 14:02:32 -0500
Subject: [PATCH 096/188] Stdout for surrogate only from PE 0

---
 src/network-workloads/model-net-mpi-replay.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index a68bc0c4..19724ad8 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -3656,18 +3656,22 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
             char ref2 = '\n';
             while(!feof(period_file))
             {
-                if (j >= MAX_JOBS) {
+                if (j >= MAX_JOBS && !g_tw_mynode) {
                     tw_error(TW_LOC, "Exceeded number of max workloads in workloads period file. Max: %d", MAX_JOBS);
                 }
                 ref2 = fscanf(period_file, "%d", &period_count[j]);
-                if (period_count[j] > MAX_PERIODS_PER_APP) {
+                if (period_count[j] > MAX_PERIODS_PER_APP && !g_tw_mynode) {
                     tw_error(TW_LOC, "Too many periods for workload app %d", period_count[j]);
                 }
                 if(ref2 != EOF){
-                    printf("======== [ID: %d] Period count: %d\n", j, period_count[j]);
+                    if (!g_tw_mynode) {
+                        printf("======== [ID: %d] Period count: %d\n", j, period_count[j]);
+                    }
                     for(int k = 0; k < period_count[j]; k++){
                         fscanf(period_file, "%lf:%f", &period_time[j][k], &period_interval[j][k]);
-                        printf("======== [ID: %d] Period time and interval: %lf and %f\n", j, period_time[j][k], period_interval[j][k]);
+                        if (!g_tw_mynode) {
+                            printf("======== [ID: %d] Period time and interval: %lf and %f\n", j, period_time[j][k], period_interval[j][k]);
+                        }
                     }
                 }
                 j++;

From a7121ec6643811e0b3e424a9080800957e2606cd Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 18 Feb 2025 17:58:40 -0500
Subject: [PATCH 097/188] Implementing custom LP status printing for
 model-net-lps

---
 src/networks/model-net/core/model-net-lp.c | 111 +++++++++++++++++++++
 src/util/rc-stack.c                        |  23 ++++-
 2 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 3ff97f37..e49035e3 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -131,6 +131,22 @@ tw_lptype model_net_base_lp = {
     sizeof(model_net_base_state),
 };
 
+// Functionality to check for correct implementation of reverse event handler 
+static void print_model_net_state(FILE * out, model_net_base_state * state);
+static void print_event_state(FILE * out, model_net_wrap_msg * state);
+
+// ROSS function pointer table to check reverse event handler
+crv_checkpointer model_net_chkptr = {
+    &model_net_base_lp,
+    0,
+    (save_checkpoint_state_f) NULL,
+    (clean_checkpoint_state_f) NULL,
+    (check_states_f) NULL,
+    (print_lpstate_f) print_model_net_state,
+    (print_checkpoint_state_f) print_model_net_state,
+    (print_event_f) print_event_state,
+};
+
 static void model_net_commit_event(model_net_base_state * ns, tw_bf *b,  model_net_wrap_msg * m, tw_lp * lp)
 {
     if(m->h.event_type == MN_BASE_PASS)
@@ -268,6 +284,7 @@ void model_net_base_register(int *do_config_nets){
             }
         }
     }
+    crv_add_custom_state_checkpoint(&model_net_chkptr);
 }
 
 static void base_read_config(const char * anno, model_net_base_params *p){
@@ -1117,6 +1134,100 @@ tw_event* model_net_method_congestion_event(tw_lpid dest_gid,
 
 }
 
+/* START Checking reverse handler functionality */
+static void print_model_net_state(FILE * out, model_net_base_state * state) {
+    fprintf(out, "             net_id = %d\n", state->net_id);
+    fprintf(out, "    nics_per_router = %d\n", state->nics_per_router);
+    fprintf(out, "*in_sched_send_loop = %p\n", state->in_sched_send_loop);
+    fprintf(out, " in_sched_recv_loop = %d\n", state->in_sched_recv_loop);
+    fprintf(out, "             msg_id = %lu\n", state->msg_id);
+    fprintf(out, "**       sched_send = %p\n", state->sched_send);
+    fprintf(out, "*        sched_recv = %p\n", state->sched_recv);
+    fprintf(out, "*            params = %p\n", state->params);
+    fprintf(out, "*          sub_type = %p\n", state->sub_type);
+    fprintf(out, "*    sub_model_type = %p\n", state->sub_model_type);
+    fprintf(out, "*         sub_state = %p\n", state->sub_state);
+    fprintf(out, "next_available_time = %f\n", state->next_available_time);
+    fprintf(out, "*node_copy_next_available_time = %p\n", state->node_copy_next_available_time);
+    fprintf(out, "*sched_loop_pre_surrogate = %p\n", state->sched_loop_pre_surrogate);
+    fprintf(out, "sched_recv_loop_pre_surrogate = %d\n", state->sched_recv_loop_pre_surrogate);
+}
+
+static void print_type(FILE * out, enum model_net_base_event_type type) {
+    switch (type) {
+        case MN_BASE_NEW_MSG:
+            fprintf(out, "MN_BASE_NEW_MSG");
+            break;
+        case MN_BASE_SCHED_NEXT:
+            fprintf(out, "MN_BASE_SCHED_NEXT");
+            break;
+        case MN_BASE_SAMPLE:
+            fprintf(out, "MN_BASE_SAMPLE");
+            break;
+        case MN_BASE_PASS:
+            fprintf(out, "MN_BASE_PASS");
+            break;
+        case MN_BASE_END_NOTIF:
+            fprintf(out, "MN_BASE_END_NOTIF");
+            break;
+        case MN_CONGESTION_EVENT:
+            fprintf(out, "MN_CONGESTION_EVENT");
+            break;
+    }
+}
+
+static void print_model_net_request(FILE * out, char const * starts_with, model_net_request * req) {
+    fprintf(out, "%sfinal_dest_lp = %ld\n", starts_with, req->final_dest_lp);
+    fprintf(out, "%sdest_mn_lp = %ld\n", starts_with, req->dest_mn_lp);
+    fprintf(out, "%ssrc_lp = %ld\n", starts_with, req->src_lp);
+    fprintf(out, "%smsg_start_time = %f\n", starts_with, req->msg_start_time);
+    fprintf(out, "%smsg_new_mn_event = %f\n", starts_with, req->msg_new_mn_event);
+    fprintf(out, "%smsg_size = %ld\n", starts_with, req->msg_size);
+    fprintf(out, "%spull_size = %ld\n", starts_with, req->pull_size);
+    fprintf(out, "%spacket_size = %ld\n", starts_with, req->packet_size);
+    fprintf(out, "%smsg_id = %ld\n", starts_with, req->msg_id);
+    fprintf(out, "%snet_id = %d\n", starts_with, req->net_id);
+    fprintf(out, "%sis_pull = %d\n", starts_with, req->is_pull);
+    fprintf(out, "%squeue_offset = %d\n", starts_with, req->queue_offset);
+    fprintf(out, "%sremote_event_size = %d\n", starts_with, req->remote_event_size);
+    fprintf(out, "%sself_event_size = %d\n", starts_with, req->self_event_size);
+    fprintf(out, "%scategory = '%s'\n", starts_with, req->category);
+    fprintf(out, "%sapp_id = %d\n", starts_with, req->app_id);
+}
+
+static void print_event_state(FILE * out, model_net_wrap_msg * msg) {
+    fprintf(out, "h\n");
+    fprintf(out, "|.src = %lu\n", msg->h.src);
+    fprintf(out, "|.event_type = %d (", msg->h.event_type);
+    print_type(out, msg->h.event_type);
+    fprintf(out, ")\n");
+    fprintf(out, "|.magic = %d\n", msg->h.magic);
+    switch (msg->h.event_type) {
+        case MN_BASE_NEW_MSG:
+        case MN_BASE_SCHED_NEXT:
+            // We can check m_base values
+            fprintf(out, "m_base\n");
+            fprintf(out, "     |.req\n");
+            print_model_net_request(out, "     |   |.", &msg->msg.m_base.req);
+            fprintf(out, "     |.is_from_remote = %d\n", msg->msg.m_base.is_from_remote);
+            fprintf(out, "     |.isQueueReq = %d\n", msg->msg.m_base.isQueueReq);
+            fprintf(out, "     |.save_ts = %f\n", msg->msg.m_base.save_ts);
+            fprintf(out, "     |.sched_params.prio = %d\n", msg->msg.m_base.sched_params.prio);
+            fprintf(out, "     |.rc\n");
+            fprintf(out, "     |  |.req\n");
+            print_model_net_request(out, "     |  |   |.", &msg->msg.m_base.rc.req);
+            fprintf(out, "     |  |.sched_params.prio = %d\n", msg->msg.m_base.rc.sched_params.prio);
+            fprintf(out, "     |  |.rtn = %d\n", msg->msg.m_base.rc.rtn);
+            fprintf(out, "     |  |.prio = %d\n", msg->msg.m_base.rc.prio);
+            fprintf(out, "     |.created_in_surrogate = %d\n", msg->msg.m_base.created_in_surrogate);
+            break;
+        default:
+            fprintf(out, "The content of this message cannot be deciphered yet with the information given\n");
+    }
+}
+
+/* END checking reverse handler functionality */
+
 void model_net_method_switch_to_surrogate(void) {
     is_freezing_on = true;
 }
diff --git a/src/util/rc-stack.c b/src/util/rc-stack.c
index ebb2131f..7b0540e7 100644
--- a/src/util/rc-stack.c
+++ b/src/util/rc-stack.c
@@ -12,7 +12,8 @@
 enum rc_stack_mode {
     RC_NONOPT, // not in optimistic mode
     RC_OPT, // optimistic mode
-    RC_OPT_DBG // optimistic *debug* mode (requires special handling)
+    RC_OPT_DBG, // optimistic *debug* mode (requires special handling)
+    RC_SEQ_RV_DBG, // sequential rollback chek, a *debug* mode that requires special handling
 };
 
 typedef struct rc_entry_s {
@@ -40,11 +41,12 @@ void rc_stack_create(struct rc_stack **s){
     }
     switch (g_tw_synchronization_protocol) {
         case OPTIMISTIC:
-            ss->mode = RC_OPT;
-            break;
         case OPTIMISTIC_REALTIME:
             ss->mode = RC_OPT;
             break;
+        case SEQUENTIAL_ROLLBACK_CHECK:
+            ss->mode = RC_SEQ_RV_DBG;
+            break;
         case OPTIMISTIC_DEBUG:
             ss->mode = RC_OPT_DBG;
             break;
@@ -103,6 +105,21 @@ void rc_stack_gc(tw_lp const *lp, struct rc_stack *s) {
     if (s->mode == RC_OPT_DBG)
         return;
 
+    // rollback until only one event is left
+    if (s->mode == RC_SEQ_RV_DBG) {
+        struct qlist_head *ent = s->head.next;
+        while (ent->next != &s->head) {
+            rc_entry *r = qlist_entry(ent, rc_entry, ql);
+            qlist_del(ent);
+            if (r->free_fn) r->free_fn(r->data);
+            free(r);
+            s->count--;
+            ent = s->head.next;
+        }
+        return;
+    }
+
+    // Removing all stored rollback events from stack
     struct qlist_head *ent = s->head.next;
     while (ent != &s->head) {
         rc_entry *r = qlist_entry(ent, rc_entry, ql);

From ca303200d57d6ce0c459b55eda17b6c23d92ecaa Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 18 Feb 2025 18:06:05 -0500
Subject: [PATCH 098/188] Fixing small bug found when rollbacking
 model-net-event

---
 src/networks/model-net/core/model-net-lp.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index e49035e3..8a52c7da 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -788,6 +788,7 @@ void handle_new_msg(
     // don't forget to set packet size, now that we're responsible for it!
     r->msg_new_mn_event = tw_now(lp);
     r->packet_size = ns->params->packet_size;
+    b->c30 = 1;
     r->msg_id = ns->msg_id++;
     void * m_data = m+1;
     void *remote = NULL, *local = NULL;
@@ -881,6 +882,10 @@ void handle_new_msg_rc(
         *in_sched_loop = 0;
     }
     model_net_sched_add_rc(ss, &m->msg.m_base.rc, lp);
+
+    if (b->c30) {
+        ns->msg_id--;
+    }
 }
 
 /// bitfields used

From c2afcd1f6415dd0ac4866af25eacebffdebad7f2 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 24 Feb 2025 11:02:33 -0500
Subject: [PATCH 099/188] Cleaning up some structs and fixing a reverse handler
 case

---
 src/network-workloads/model-net-mpi-replay.c | 21 +++++++-------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 19724ad8..34c6a61d 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -228,7 +228,6 @@ struct mpi_msgs_queue
     int source_rank;
     int dest_rank;
     int64_t num_bytes;
-    int64_t seq_id;
     tw_stime req_init_time;
 	dumpi_req_id req_id;
     struct qlist_head ql;
@@ -238,8 +237,8 @@ struct mpi_msgs_queue
 struct completed_requests
 {
 	unsigned int req_id;
+    int index; // for rollbacking
     struct qlist_head ql;
-    int index;
 };
 
 /* for wait operations, store the pending operation and number of completed waits so far. */
@@ -250,7 +249,6 @@ struct pending_waits
 	int num_completed;
 	int count;
     tw_stime start_time;
-    struct qlist_head ql;
 };
 
 struct msg_size_info
@@ -387,7 +385,7 @@ struct nw_state
 struct nw_message
 {
    // forward message handler
-   int msg_type;
+   enum MPI_NW_EVENTS msg_type;
    int op_type;
    int num_rngs;
    model_net_event_return event_rc;
@@ -399,7 +397,6 @@ struct nw_message
        int dest_rank;
        int64_t num_bytes;
        int num_matched;
-       int data_type;
        double sim_start_time;
        // for callbacks - time message was received
        double msg_send_time;
@@ -919,7 +916,6 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l
             {
                 // printf("%d - %d >= %d\n",s->gen_data,s->prev_switch,perm_switch_thresh);
                 bf->c2 = 1;
-                m->rc.saved_prev_switch = s->prev_switch;
                 s->prev_switch = s->gen_data; //Amount of data pushed at time when switch initiated
                 dest_svr[0] = tw_rand_integer(lp->rng, 0, num_clients - 1);
                 if(dest_svr[0] == s->local_rank)
@@ -1352,7 +1348,6 @@ static int notify_posted_wait(nw_state* s,
     if(op_type == CODES_WK_WAIT &&
             (wait_elem->req_ids[0] == completed_req))
     {
-            m->fwd.wait_completed = 1;
             wait_completed = 1;
     }
     else if(op_type == CODES_WK_WAITALL
@@ -1365,6 +1360,7 @@ static int notify_posted_wait(nw_state* s,
             if(wait_elem->req_ids[i] == completed_req)
             {
                 wait_elem->num_completed++;
+                m->fwd.wait_completed++; //This is just the individual request handle - not the entire wait.
                 if(wait_elem->num_completed > wait_elem->count)
                     printf("\n Num completed %d count %d LP %llu ",
                             wait_elem->num_completed,
@@ -1383,7 +1379,6 @@ static int notify_posted_wait(nw_state* s,
                     }
                     wait_completed = 1;
                 }
-                m->fwd.wait_completed = 1; //This is just the individual request handle - not the entire wait.
             }
         }
     }
@@ -1827,8 +1822,6 @@ static void codes_exec_mpi_recv_rc(
 
     if(m->fwd.found_match >= 0)
 	{
-		ns->recv_time = m->rc.saved_recv_time;
-		ns->ross_sample.recv_time = m->rc.saved_recv_time_sample;
         //int queue_count = qlist_count(&ns->arrival_queue);
 
         mpi_msgs_queue * qi = (mpi_msgs_queue*)rc_stack_pop(ns->processed_ops);
@@ -1880,7 +1873,6 @@ static void codes_exec_mpi_recv(
 
     m->rc.saved_recv_time = s->recv_time;
     m->rc.saved_recv_time_sample = s->ross_sample.recv_time;
-    m->rc.saved_num_bytes = mpi_op->u.recv.num_bytes;
 
     mpi_msgs_queue * recv_op = (mpi_msgs_queue*) malloc(sizeof(mpi_msgs_queue));
     recv_op->req_init_time = tw_now(lp);
@@ -2199,8 +2191,9 @@ static void update_completed_queue_rc(nw_state * s, tw_bf * bf, nw_message * m,
        add_completed_reqs(s, lp, m->fwd.num_matched);
        codes_issue_next_event_rc(lp);
     }
-    if(m->fwd.wait_completed > 0)
-           s->wait_op->num_completed--;
+    if(m->fwd.wait_completed > 0) {
+       s->wait_op->num_completed -= m->fwd.wait_completed;
+    }
 }
 
 static void update_completed_queue(nw_state* s,
@@ -2733,7 +2726,7 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
     rc_stack_gc(lp, s->processed_ops);
     rc_stack_gc(lp, s->processed_wait_op);
 
-    switch(m->msg_type)
+    switch((enum MPI_NW_EVENTS) m->msg_type)
 	{
 		case MPI_SEND_ARRIVED:
 			update_arrival_queue(s, bf, m, lp);

From c4c1491317b06f527dcc39ef0673a5ec5f47f05d Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 24 Feb 2025 14:17:34 -0500
Subject: [PATCH 100/188] Refactoring struct in model-net-mpi-replay

The struct nw_message was messy. It kept on getting longer and longer as
more and more values were stored in the struct to use later for
rollback. Now, it is more managable and it uses less memory than before.
---
 src/network-workloads/model-net-mpi-replay.c | 208 ++++++++++++-------
 1 file changed, 130 insertions(+), 78 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 34c6a61d..be15fa30 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -408,23 +408,73 @@ struct nw_message
        short wait_completed;
        short rend_send;
    } fwd;
-   struct
-   {
-       int saved_perm;
-       double saved_send_time;
-       double saved_send_time_sample;
-       double saved_recv_time;
-       double saved_recv_time_sample;
-       double saved_wait_time;
-       double saved_wait_time_sample;
-       double saved_delay;
-       double saved_delay_sample;
-       double saved_marker_time;
-       int64_t saved_num_bytes;
-       int saved_syn_length;
-       unsigned long saved_prev_switch;
-       double saved_prev_max_time;
-       struct AvgSurrogateSwitchingTimesForApp * switch_config_used;
+
+   // A different struct for each type of MPI_NW_EVENTS
+   union {
+       // For CLI_BCKGND_GEN
+       struct {
+           int saved_syn_length;
+           int saved_perm;  // Used by PERMUTATION
+           unsigned long saved_prev_switch;  // Used by PERMUTATION
+       } gen;
+
+       // For CLI_BCKGND_ARRIVE and MPI_SEND_ARRIVED_CB
+       struct {
+           double saved_prev_max_time;
+           double saved_send_time;
+           double saved_send_time_sample;
+       } arrive;
+
+       // For CLI_BCKGND_CHANGE
+       struct {
+           double saved_send_time;
+           double saved_marker_time;
+       } change;
+
+       // For MPI_OP_GET_NEXT there are also different types
+       struct {
+	       double saved_elapsed_time;
+           union {
+               // CODES_WK_ALLREDUCE
+               struct {
+                   double saved_send_time;
+                   double saved_delay;
+               } all_reduce;
+               // CODES_WK_RECV and CODES_WK_IRECV
+               struct {
+                   double saved_recv_time;
+                   double saved_recv_time_sample;
+               } recv;
+               // CODES_WK_DELAY
+               struct {
+                   double saved_delay;
+                   double saved_delay_sample;
+               } delay;
+               // CODES_WK_END and CODES_WK_MARK
+               struct {
+                   double saved_marker_time;
+               } mark;
+           };
+       } mpi_next;
+
+       // For MPI_SEND_ARRIVED and MPI_REND_ARRIVED and MPI_SEND_POSTED
+       struct {
+           double saved_wait_time;
+           double saved_wait_time_sample;
+           double saved_recv_time;
+           double saved_recv_time_sample;
+           int64_t saved_num_bytes;
+       } mpi_send;
+
+       // For MPI_REND_ACK_ARRIVED
+       struct {
+           int64_t saved_num_bytes;
+       } mpi_ack;
+
+       // Surrogate variables
+       struct {
+           struct AvgSurrogateSwitchingTimesForApp * config_used;
+       } surr;
    } rc;
 };
 
@@ -838,12 +888,12 @@ static void gen_synthetic_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp
     }
     if(bf->c2)
     {
-        s->prev_switch = m->rc.saved_prev_switch;
-        s->saved_perm_dest = m->rc.saved_perm;
+        s->prev_switch = m->rc.gen.saved_prev_switch;
+        s->saved_perm_dest = m->rc.gen.saved_perm;
         tw_rand_reverse_unif(lp->rng);
     }
     int i;
-    for (i=0; i < m->rc.saved_syn_length; i++){
+    for (i=0; i < m->rc.gen.saved_syn_length; i++){
         model_net_event_rc2(lp, &m->event_rc);
         s->gen_data -= payload_sz;
         num_syn_bytes_sent -= payload_sz;
@@ -856,8 +906,10 @@ static void gen_synthetic_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp
 
      if(bf->c5)
         finish_bckgnd_traffic_rc(s, bf, m, lp);
-    if(bf->c7)
+    if(bf->c7) {
+        s->saved_perm_dest = m->rc.gen.saved_perm;
         tw_rand_reverse_unif(lp->rng);
+    }
 }
 
 /* generate synthetic traffic */
@@ -897,8 +949,8 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l
 
         case PERMUTATION:
         {
-            m->rc.saved_prev_switch = s->prev_switch; //for reverse computation
-            m->rc.saved_perm = s->saved_perm_dest;
+            m->rc.gen.saved_prev_switch = s->prev_switch; //for reverse computation
+            m->rc.gen.saved_perm = s->saved_perm_dest;
 
             length = 1;
             dest_svr = (int*) calloc(1, sizeof(int));
@@ -984,7 +1036,7 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l
             tw_error(TW_LOC, "Undefined traffic pattern");
     }   
     /* Record length for reverse handler*/
-    m->rc.saved_syn_length = length;
+    m->rc.gen.saved_syn_length = length;
 
     char prio[12];
 	switch(s->qos_level){
@@ -1075,23 +1127,23 @@ void arrive_syn_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp)
     num_syn_bytes_recvd -= data;
     s->num_bytes_recvd -= data;
     s->ross_sample.num_bytes_recvd -= data;
-    s->send_time = m->rc.saved_send_time;
-    s->ross_sample.send_time = m->rc.saved_send_time_sample;
+    s->send_time = m->rc.arrive.saved_send_time;
+    s->ross_sample.send_time = m->rc.arrive.saved_send_time_sample;
     if((tw_now(lp) - m->fwd.sim_start_time) > s->max_time)
     {
-        s->max_time = m->rc.saved_prev_max_time;
-        s->ross_sample.max_time = m->rc.saved_prev_max_time;
+        s->max_time = m->rc.arrive.saved_prev_max_time;
+        s->ross_sample.max_time = m->rc.arrive.saved_prev_max_time;
     }
 }
 void arrive_syn_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp)
 {
     (void)bf;
     (void)lp;
-    m->rc.saved_send_time = s->send_time;
-    m->rc.saved_send_time_sample = s->ross_sample.send_time;
+    m->rc.arrive.saved_send_time = s->send_time;
+    m->rc.arrive.saved_send_time_sample = s->ross_sample.send_time;
     if((tw_now(lp) - m->fwd.sim_start_time) > s->max_time)
     {
-        m->rc.saved_prev_max_time = s->max_time;
+        m->rc.arrive.saved_prev_max_time = s->max_time;
         s->max_time = tw_now(lp) - m->fwd.sim_start_time;
         s->ross_sample.max_time = tw_now(lp) - m->fwd.sim_start_time;
     }
@@ -1176,7 +1228,7 @@ static struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_sta
 }
 
 static void skip_iteration_rc(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) {
-    m->rc.switch_config_used->done = false;
+    m->rc.surr.config_used->done = false;
 }
 
 static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m)
@@ -1186,7 +1238,7 @@ static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message *
     struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s);
     assert(switch_config != NULL);
     int const resume_at_iter = switch_config->resume_at_iter;
-    m->rc.switch_config_used = switch_config;
+    m->rc.surr.config_used = switch_config;
 
     // consuming all events until indicated iteration is reached
     bool reached_end = false;
@@ -1643,8 +1695,8 @@ static int rm_matching_rcv(nw_state * ns,
         else
         {
             bf->c12 = 1;
-            m->rc.saved_recv_time = ns->recv_time;
-            m->rc.saved_recv_time_sample = ns->ross_sample.recv_time;
+            m->rc.mpi_send.saved_recv_time = ns->recv_time;
+            m->rc.mpi_send.saved_recv_time_sample = ns->ross_sample.recv_time;
             ns->recv_time += (tw_now(lp) - m->fwd.sim_start_time);
             ns->ross_sample.recv_time += (tw_now(lp) - m->fwd.sim_start_time);
         }
@@ -1711,8 +1763,8 @@ static int rm_matching_send(nw_state * ns,
             send_ack_back(ns, bf, m, lp, qi, qitem->req_id);
         }
 
-        m->rc.saved_recv_time = ns->recv_time;
-        m->rc.saved_recv_time_sample = ns->ross_sample.recv_time;
+        m->rc.mpi_next.recv.saved_recv_time = ns->recv_time;
+        m->rc.mpi_next.recv.saved_recv_time_sample = ns->ross_sample.recv_time;
         ns->recv_time += (tw_now(lp) - qitem->req_init_time);
         ns->ross_sample.recv_time += (tw_now(lp) - qitem->req_init_time);
 
@@ -1774,8 +1826,8 @@ static void codes_exec_comp_delay(
 	tw_stime ts;
 	nw_message* msg;
 
-    m->rc.saved_delay = s->compute_time;
-    m->rc.saved_delay_sample = s->ross_sample.compute_time;
+    m->rc.mpi_next.delay.saved_delay = s->compute_time;
+    m->rc.mpi_next.delay.saved_delay_sample = s->ross_sample.compute_time;
     s->compute_time += (mpi_op->u.delay.nsecs/compute_time_speedup);
     s->ross_sample.compute_time += (mpi_op->u.delay.nsecs/compute_time_speedup);
     ts = (mpi_op->u.delay.nsecs/compute_time_speedup);
@@ -1811,8 +1863,8 @@ static void codes_exec_mpi_recv_rc(
         nw_message* m,
         tw_lp* lp)
 {
-	ns->recv_time = m->rc.saved_recv_time;
-	ns->ross_sample.recv_time = m->rc.saved_recv_time_sample;
+	ns->recv_time = m->rc.mpi_next.recv.saved_recv_time;
+	ns->ross_sample.recv_time = m->rc.mpi_next.recv.saved_recv_time_sample;
 
     if(bf->c11)
         codes_issue_next_event_rc(lp);
@@ -1871,8 +1923,8 @@ static void codes_exec_mpi_recv(
    If no matching isend is found, the receive operation is queued in the pending queue of
    receive operations. */
 
-    m->rc.saved_recv_time = s->recv_time;
-    m->rc.saved_recv_time_sample = s->ross_sample.recv_time;
+    m->rc.mpi_next.recv.saved_recv_time = s->recv_time;
+    m->rc.mpi_next.recv.saved_recv_time_sample = s->ross_sample.recv_time;
 
     mpi_msgs_queue * recv_op = (mpi_msgs_queue*) malloc(sizeof(mpi_msgs_queue));
     recv_op->req_init_time = tw_now(lp);
@@ -1942,7 +1994,7 @@ static void codes_exec_mpi_send_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_
            int indx = s->sampling_indx;
 
            s->mpi_wkld_samples[indx].num_sends_sample--;
-           s->mpi_wkld_samples[indx].num_bytes_sample -= m->rc.saved_num_bytes;
+           s->mpi_wkld_samples[indx].num_bytes_sample -= m->rc.mpi_ack.saved_num_bytes;
 
            if(bf->c1)
            {
@@ -1968,9 +2020,9 @@ static void codes_exec_mpi_send_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_
 
         if(bf->c3)
         {
-            s->num_bytes_sent -= m->rc.saved_num_bytes;
-            s->ross_sample.num_bytes_sent -= m->rc.saved_num_bytes;
-            num_bytes_sent -= m->rc.saved_num_bytes;
+            s->num_bytes_sent -= m->rc.mpi_ack.saved_num_bytes;
+            s->ross_sample.num_bytes_sent -= m->rc.mpi_ack.saved_num_bytes;
+            num_bytes_sent -= m->rc.mpi_ack.saved_num_bytes;
         }
 }
 /* executes MPI send and isend operations */
@@ -2033,7 +2085,7 @@ static void codes_exec_mpi_send(nw_state* s,
 
     if(lp->gid == TRACK_LP)
         printf("\n Sender rank %llu global dest rank %d dest-rank %d bytes %"PRIu64" Tag %d", LLU(s->nw_id), global_dest_rank, mpi_op->u.send.dest_rank, mpi_op->u.send.num_bytes, mpi_op->u.send.tag);
-    m->rc.saved_num_bytes = mpi_op->u.send.num_bytes;
+        m->rc.mpi_ack.saved_num_bytes = mpi_op->u.send.num_bytes;
 	/* model-net event */
 	tw_lpid dest_rank = codes_mapping_get_lpid_from_relative(global_dest_rank, NULL, "nw-lp", NULL, 0);
 
@@ -2186,8 +2238,8 @@ static void update_completed_queue_rc(nw_state * s, tw_bf * bf, nw_message * m,
     {
        struct pending_waits* wait_elem = (struct pending_waits*)rc_stack_pop(s->processed_wait_op);
        s->wait_op = wait_elem;
-       s->wait_time = m->rc.saved_wait_time;
-       s->ross_sample.wait_time = m->rc.saved_wait_time_sample;
+       s->wait_time = m->rc.mpi_send.saved_wait_time;
+       s->ross_sample.wait_time = m->rc.mpi_send.saved_wait_time_sample;
        add_completed_reqs(s, lp, m->fwd.num_matched);
        codes_issue_next_event_rc(lp);
     }
@@ -2228,8 +2280,8 @@ static void update_completed_queue(nw_state* s,
             bf->c31 = 1;
             m->fwd.num_matched = clear_completed_reqs(s, lp, s->wait_op->req_ids, s->wait_op->count);
     
-            m->rc.saved_wait_time = s->wait_time;
-            m->rc.saved_wait_time_sample = s->ross_sample.wait_time;
+            m->rc.mpi_send.saved_wait_time = s->wait_time;
+            m->rc.mpi_send.saved_wait_time_sample = s->ross_sample.wait_time;
             s->wait_time += (tw_now(lp) - s->wait_op->start_time);
             s->ross_sample.wait_time += (tw_now(lp) - s->wait_op->start_time);
 
@@ -2345,8 +2397,8 @@ static void update_arrival_queue_rc(nw_state* s,
         }
         if(bf->c12)
         {
-            s->recv_time = m->rc.saved_recv_time;
-            s->ross_sample.recv_time = m->rc.saved_recv_time_sample;
+            s->recv_time = m->rc.mpi_send.saved_recv_time;
+            s->ross_sample.recv_time = m->rc.mpi_send.saved_recv_time_sample;
         }
         
         //if(bf->c10)
@@ -2374,8 +2426,8 @@ static void update_arrival_queue(nw_state* s, tw_bf * bf, nw_message * m, tw_lp
 
     //if(s->local_rank != m->fwd.dest_rank)
     //    printf("\n Dest rank %d local rank %d ", m->fwd.dest_rank, s->local_rank);
-    m->rc.saved_recv_time = s->recv_time;
-    m->rc.saved_recv_time_sample = s->ross_sample.recv_time;
+    m->rc.mpi_send.saved_recv_time = s->recv_time;
+    m->rc.mpi_send.saved_recv_time_sample = s->ross_sample.recv_time;
     s->num_bytes_recvd += m->fwd.num_bytes;
     s->ross_sample.num_bytes_recvd += m->fwd.num_bytes;
     num_bytes_recvd += m->fwd.num_bytes;
@@ -2438,8 +2490,8 @@ static void update_message_time(
     (void)bf;
     (void)lp;
 
-    m->rc.saved_send_time = s->send_time;
-    m->rc.saved_send_time_sample = s->ross_sample.send_time;
+    m->rc.arrive.saved_send_time = s->send_time;
+    m->rc.arrive.saved_send_time_sample = s->ross_sample.send_time;
     s->send_time += m->fwd.msg_send_time;
     s->ross_sample.send_time += m->fwd.msg_send_time;
 }
@@ -2452,8 +2504,8 @@ static void update_message_time_rc(
 {
     (void)bf;
     (void)lp;
-    s->send_time = m->rc.saved_send_time;
-    s->ross_sample.send_time = m->rc.saved_send_time_sample;
+    s->send_time = m->rc.arrive.saved_send_time;
+    s->ross_sample.send_time = m->rc.arrive.saved_send_time_sample;
 }
 
 /* initializes the network node LP, loads the trace file in the structs, calls the first MPI operation to be executed */
@@ -2772,8 +2824,8 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
                 codes_issue_next_event(lp);
             }
             
-            m->rc.saved_recv_time = s->recv_time;
-            m->rc.saved_recv_time_sample = s->ross_sample.recv_time;
+            m->rc.mpi_send.saved_recv_time = s->recv_time;
+            m->rc.mpi_send.saved_recv_time_sample = s->ross_sample.recv_time;
             s->recv_time += (tw_now(lp) - m->fwd.sim_start_time);
             s->ross_sample.recv_time += (tw_now(lp) - m->fwd.sim_start_time);
 
@@ -2832,9 +2884,9 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
         break;
 
         case CLI_BCKGND_CHANGE:
-            m->rc.saved_send_time = mean_interval_of_job[s->app_id];  // Warning: this is overwriting a variable meant for message type MPI_OP_GET_NEXT (specifically CODES_WK_ALLREDUCE) and CLI_BCKGND_ARRIVE
+            m->rc.change.saved_send_time = mean_interval_of_job[s->app_id];  // Warning: this is overwriting a variable meant for message type MPI_OP_GET_NEXT (specifically CODES_WK_ALLREDUCE) and CLI_BCKGND_ARRIVE
             mean_interval_of_job[s->app_id] = m->fwd.msg_send_time;
-            m->rc.saved_marker_time = tw_now(lp);
+            m->rc.change.saved_marker_time = tw_now(lp);
             break;
 
         case CLI_BCKGND_ARRIVE:
@@ -2904,8 +2956,8 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t
             {
                 // if (bf->c28)
                 //     tw_rand_reverse_unif(lp->rng);
-                s->compute_time = m->rc.saved_delay;
-                s->ross_sample.compute_time = m->rc.saved_delay_sample;
+                s->compute_time = m->rc.mpi_next.delay.saved_delay;
+                s->ross_sample.compute_time = m->rc.mpi_next.delay.saved_delay_sample;
             }
 		}
 		break;
@@ -2914,8 +2966,8 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t
             if(bf->c27)
             {
                 s->num_all_reduce--;
-                s->col_time = m->rc.saved_send_time; 
-                s->all_reduce_time = m->rc.saved_delay;
+                s->col_time = m->rc.mpi_next.all_reduce.saved_send_time;
+                s->all_reduce_time = m->rc.mpi_next.all_reduce.saved_delay;
             }
             else
             {
@@ -2992,7 +3044,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
             /* Notify ranks from other job that checkpoint traffic has
              * completed */
             //int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx);
-            m->rc.saved_marker_time = tw_now(lp);
+            m->rc.mpi_next.mark.saved_marker_time = tw_now(lp);
             notify_root_rank(s, lp, bf, m);
             // printf("Client rank %llu completed workload, local rank %d .\n", s->nw_id, s->local_rank);
 
@@ -3060,9 +3112,9 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
                 if(s->col_time > 0)
                 {
                     bf->c27 = 1;
-                    m->rc.saved_delay = s->all_reduce_time;
+                    m->rc.mpi_next.all_reduce.saved_delay = s->all_reduce_time;
                     s->all_reduce_time += tw_now(lp) - s->col_time;
-                    m->rc.saved_send_time = s->col_time;
+                    m->rc.mpi_next.all_reduce.saved_send_time = s->col_time;
                     s->col_time = 0;
                     s->num_all_reduce++;
                 }
@@ -3089,7 +3141,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
 
 		case CODES_WK_MARK:
 			{
-                m->rc.saved_marker_time = tw_now(lp);
+                m->rc.mpi_next.mark.saved_marker_time = tw_now(lp);
 
                 // If we have reached the surrogate switch time, skip next iteration(s)
                 if (have_we_hit_surrogate_switch(s, mpi_op)) {
@@ -3279,8 +3331,8 @@ void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * l
             if(bf->c8)
                 update_completed_queue_rc(s, bf, m, lp);
             
-            s->recv_time = m->rc.saved_recv_time;
-            s->ross_sample.recv_time = m->rc.saved_recv_time_sample;
+            s->recv_time = m->rc.mpi_send.saved_recv_time;
+            s->ross_sample.recv_time = m->rc.mpi_send.saved_recv_time_sample;
         }
         break;
 
@@ -3293,7 +3345,7 @@ void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * l
             break;
 
         case CLI_BCKGND_CHANGE:
-	    mean_interval_of_job[s->app_id] = m->rc.saved_send_time;
+	    mean_interval_of_job[s->app_id] = m->rc.change.saved_send_time;
 	    break;
 
         case CLI_BCKGND_ARRIVE:
@@ -3325,11 +3377,11 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp
         case MPI_OP_GET_NEXT:
             switch (m->mpi_op->op_type) {
                 case CODES_WK_END:
-                    printf("Network node %d Rank %llu App %d finished at %lf \n", s->local_rank, LLU(s->nw_id), s->app_id, m->rc.saved_marker_time);
+                    printf("Network node %d Rank %llu App %d finished at %lf \n", s->local_rank, LLU(s->nw_id), s->app_id, m->rc.mpi_next.mark.saved_marker_time);
                     break;
 
                 case CODES_WK_MARK:
-                    fprintf(iteration_log, "ITERATION %d node %llu job %d rank %d time %lf\n", m->mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, m->rc.saved_marker_time);
+                    fprintf(iteration_log, "ITERATION %d node %llu job %d rank %d time %lf\n", m->mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, m->rc.mpi_next.mark.saved_marker_time);
 
                     if (OUTPUT_MARKS)
                     {
@@ -3340,7 +3392,7 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp
 
                         char tag_line[32];
                         int written;
-                        written = sprintf(tag_line, "%llu %d %.5f\n",s->nw_id, m->mpi_op->u.send.tag, m->rc.saved_marker_time);
+                        written = sprintf(tag_line, "%llu %d %.5f\n",s->nw_id, m->mpi_op->u.send.tag, m->rc.mpi_next.mark.saved_marker_time);
                         lp_io_write(lp->gid, marker_filename, written, tag_line);
                     }
                     break;

From 9a5bf98ac37b8631be47c51642acb00fe8494da1 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 24 Feb 2025 14:22:44 -0500
Subject: [PATCH 101/188] Print function for struct codes_workload_op and enum
 codes_workload_op_type

---
 codes/codes-workload.h        |   4 +
 src/workload/codes-workload.c | 150 ++++++++++++++++++++++++++++++++++
 2 files changed, 154 insertions(+)

diff --git a/codes/codes-workload.h b/codes/codes-workload.h
index 2361ac4b..5ac6b333 100644
--- a/codes/codes-workload.h
+++ b/codes/codes-workload.h
@@ -381,6 +381,10 @@ void codes_workload_add_method(struct codes_workload_method const * method);
  * will shut down automatically once they have issued their last event.
  */
 
+/* Printing event :) */
+void fprint_codes_workload_op(FILE * out, struct codes_workload_op * op, char const * const begin);
+char const * const op_type_string(enum codes_workload_op_type op_type);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/workload/codes-workload.c b/src/workload/codes-workload.c
index aec7108e..a66e517a 100644
--- a/src/workload/codes-workload.c
+++ b/src/workload/codes-workload.c
@@ -550,6 +550,156 @@ void codes_workload_add_method(struct codes_workload_method const * method)
     method_array[num_user_methods++] = method;
 }
 
+char const * const op_type_string(enum codes_workload_op_type op_type) {
+    switch(op_type) {
+        case CODES_WK_END:        return "CODES_WK_END";
+        case CODES_WK_DELAY:      return "CODES_WK_DELAY";
+        case CODES_WK_BARRIER:    return "CODES_WK_BARRIER";
+        case CODES_WK_OPEN:       return "CODES_WK_OPEN";
+        case CODES_WK_CLOSE:      return "CODES_WK_CLOSE";
+        case CODES_WK_WRITE:      return "CODES_WK_WRITE";
+        case CODES_WK_READ:       return "CODES_WK_READ";
+        case CODES_WK_SEND:       return "CODES_WK_SEND";
+        case CODES_WK_RECV:       return "CODES_WK_RECV";
+        case CODES_WK_ISEND:      return "CODES_WK_ISEND";
+        case CODES_WK_IRECV:      return "CODES_WK_IRECV";
+        case CODES_WK_BCAST:      return "CODES_WK_BCAST";
+        case CODES_WK_ALLGATHER:  return "CODES_WK_ALLGATHER";
+        case CODES_WK_ALLGATHERV: return "CODES_WK_ALLGATHERV";
+        case CODES_WK_ALLTOALL:   return "CODES_WK_ALLTOALL";
+        case CODES_WK_ALLTOALLV:  return "CODES_WK_ALLTOALLV";
+        case CODES_WK_REDUCE:     return "CODES_WK_REDUCE";
+        case CODES_WK_ALLREDUCE:  return "CODES_WK_ALLREDUCE";
+        case CODES_WK_COL:        return "CODES_WK_COL";
+        case CODES_WK_WAITALL:    return "CODES_WK_WAITALL";
+        case CODES_WK_WAIT:       return "CODES_WK_WAIT";
+        case CODES_WK_WAITSOME:   return "CODES_WK_WAITSOME";
+        case CODES_WK_WAITANY:    return "CODES_WK_WAITANY";
+        case CODES_WK_TESTALL:    return "CODES_WK_TESTALL";
+        case CODES_WK_REQ_FREE:   return "CODES_WK_REQ_FREE";
+        case CODES_WK_IGNORE:     return "CODES_WK_IGNORE";
+        case CODES_WK_MPI_OPEN:   return "CODES_WK_MPI_OPEN";
+        case CODES_WK_MPI_CLOSE:  return "CODES_WK_MPI_CLOSE";
+        case CODES_WK_MPI_WRITE:  return "CODES_WK_MPI_WRITE";
+        case CODES_WK_MPI_READ:   return "CODES_WK_MPI_READ";
+        case CODES_WK_MPI_COLL_OPEN:  return "CODES_WK_MPI_COLL_OPEN";
+        case CODES_WK_MPI_COLL_WRITE: return "CODES_WK_MPI_COLL_WRITE";
+        case CODES_WK_MPI_COLL_READ:  return "CODES_WK_MPI_COLL_READ";
+        case CODES_WK_MARK: return "CODES_WK_MARK";
+        default: return "UNKNOWN!!";
+    }
+}
+
+// Initial implementation by Claude.ai
+void fprint_codes_workload_op(FILE * out, struct codes_workload_op * op, char const * const begin) {
+    if (op == NULL) {
+        return;
+    }
+
+    // Print common fields first
+    fprintf(out, "%sop_type = %s\n", begin, op_type_string(op->op_type));
+
+    fprintf(out, "%s          start_time = %f\n", begin, op->start_time);
+    fprintf(out, "%s            end_time = %f\n", begin, op->end_time);
+    fprintf(out, "%s      sim_start_time = %f\n", begin, op->sim_start_time);
+    fprintf(out, "%s         sequence_id = %ld\n", begin, op->sequence_id);
+
+    // Print union fields based on op_type
+    switch(op->op_type) {
+        case CODES_WK_DELAY:
+            fprintf(out, "%s       delay.seconds = %f\n", begin, op->u.delay.seconds);
+            fprintf(out, "%s         delay.nsecs = %f\n", begin, op->u.delay.nsecs);
+            break;
+
+        case CODES_WK_BARRIER:
+            fprintf(out, "%s       barrier.count = %d\n", begin, op->u.barrier.count);
+            fprintf(out, "%s        barrier.root = %d\n", begin, op->u.barrier.root);
+            break;
+
+        case CODES_WK_OPEN:
+        case CODES_WK_MPI_OPEN:
+        case CODES_WK_MPI_COLL_OPEN:
+            fprintf(out, "%s        open.file_id = %lu\n", begin, op->u.open.file_id);
+            fprintf(out, "%s    open.create_flag = %d\n", begin, op->u.open.create_flag);
+            break;
+
+        case CODES_WK_WRITE:
+        case CODES_WK_MPI_WRITE:
+        case CODES_WK_MPI_COLL_WRITE:
+            fprintf(out, "%s       write.file_id = %lu\n", begin, op->u.write.file_id);
+            fprintf(out, "%s        write.offset = %ld\n", begin, op->u.write.offset);
+            fprintf(out, "%s          write.size = %zu\n", begin, op->u.write.size);
+            break;
+
+        case CODES_WK_READ:
+        case CODES_WK_MPI_READ:
+        case CODES_WK_MPI_COLL_READ:
+            fprintf(out, "%s        read.file_id = %lu\n", begin, op->u.read.file_id);
+            fprintf(out, "%s         read.offset = %ld\n", begin, op->u.read.offset);
+            fprintf(out, "%s           read.size = %zu\n", begin, op->u.read.size);
+            break;
+
+        case CODES_WK_CLOSE:
+        case CODES_WK_MPI_CLOSE:
+            fprintf(out, "%s       close.file_id = %lu\n", begin, op->u.close.file_id);
+            break;
+
+        case CODES_WK_SEND:
+        case CODES_WK_ISEND:
+            fprintf(out, "%s    send.source_rank = %d\n", begin, op->u.send.source_rank);
+            fprintf(out, "%s      send.dest_rank = %d\n", begin, op->u.send.dest_rank);
+            fprintf(out, "%s      send.num_bytes = %ld\n", begin, op->u.send.num_bytes);
+            fprintf(out, "%s      send.data_type = %d\n", begin, op->u.send.data_type);
+            fprintf(out, "%s          send.count = %d\n", begin, op->u.send.count);
+            fprintf(out, "%s            send.tag = %d\n", begin, op->u.send.tag);
+            fprintf(out, "%s         send.req_id = %u\n", begin, op->u.send.req_id);
+            break;
+
+        case CODES_WK_RECV:
+        case CODES_WK_IRECV:
+            fprintf(out, "%s    recv.source_rank = %d\n", begin, op->u.recv.source_rank);
+            fprintf(out, "%s      recv.dest_rank = %d\n", begin, op->u.recv.dest_rank);
+            fprintf(out, "%s      recv.num_bytes = %ld\n", begin, op->u.recv.num_bytes);
+            fprintf(out, "%s      recv.data_type = %d\n", begin, op->u.recv.data_type);
+            fprintf(out, "%s          recv.count = %d\n", begin, op->u.recv.count);
+            fprintf(out, "%s            recv.tag = %d\n", begin, op->u.recv.tag);
+            fprintf(out, "%s         recv.req_id = %u\n", begin, op->u.recv.req_id);
+            break;
+
+        case CODES_WK_COL:
+        case CODES_WK_BCAST:
+        case CODES_WK_ALLGATHER:
+        case CODES_WK_ALLGATHERV:
+        case CODES_WK_ALLTOALL:
+        case CODES_WK_ALLTOALLV:
+        case CODES_WK_REDUCE:
+        case CODES_WK_ALLREDUCE:
+            fprintf(out, "%scollective.num_bytes = %d\n", begin, op->u.collective.num_bytes);
+            break;
+
+        case CODES_WK_WAITALL:
+        case CODES_WK_WAITSOME:
+        case CODES_WK_WAITANY:
+        case CODES_WK_TESTALL:
+            fprintf(out, "%s         waits.count = %d\n", begin, op->u.waits.count);
+            fprintf(out, "%s      waits.req_ids  = %p\n", begin, op->u.waits.req_ids);
+            break;
+
+        case CODES_WK_WAIT:
+            fprintf(out, "%s         wait.req_id = %u\n", begin, op->u.wait.req_id);
+            break;
+
+        case CODES_WK_REQ_FREE:
+            fprintf(out, "%s         free.req_id = %u\n", begin, op->u.free.req_id);
+            break;
+
+        case CODES_WK_END:
+        case CODES_WK_IGNORE:
+        case CODES_WK_MARK:
+            break;
+    }
+}
+
 /*
  * Local variables:
  *  c-indent-level: 4

From 9da3d364c085f59ab67051ee9ee5d2615a2d38a0 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 24 Feb 2025 14:28:22 -0500
Subject: [PATCH 102/188] Implementing deep copy/check/print for LP state:
 nw_state

---
 codes/quicklist.h                            |  10 +-
 src/network-workloads/model-net-mpi-replay.c | 513 ++++++++++++++++++-
 2 files changed, 521 insertions(+), 2 deletions(-)

diff --git a/codes/quicklist.h b/codes/quicklist.h
index e2647648..5ca78730 100644
--- a/codes/quicklist.h
+++ b/codes/quicklist.h
@@ -193,6 +193,14 @@ static __inline__ void qlist_splice(struct qlist_head *qlist, struct qlist_head
 #define qlist_entry(ptr, type, member) \
     ((type *)((char *)(ptr)-(unsigned long)((&((type *)0)->member))))
 
+/**
+ * QLIST_OFFSET - get offset to the member that holds qlist_header
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the qlist_struct within the struct.
+ */
+#define QLIST_OFFSET(type, member) \
+    (unsigned long)((&((type *)0)->member))
+
 /**
  * qlist_for_each	-	iterate over a qlist
  * @pos:	the &struct qlist_head to use as a loop counter.
@@ -252,7 +260,7 @@ static inline int qlist_exists(struct qlist_head *list, struct qlist_head *qlink
     return 0;
 }
 
-static inline int qlist_count(struct qlist_head *list)
+static inline int qlist_count(struct qlist_head const *list)
 {
     struct qlist_head *pos;
     int count = 0;
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index be15fa30..8f944a3c 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -6,6 +6,7 @@
 #include <ross.h>
 #include <inttypes.h>
 #include <stddef.h>
+#include <string.h>
 #include <sys/stat.h>
 #include <sys/resource.h>
 #include "codes/codes-workload.h"
@@ -40,6 +41,7 @@
 #define MAX_PERIODS_PER_APP 512
 #define NEAR_ZERO .0001 //timestamp for use to be 'close to zero' but still allow progress, zero offset events are hard on the PDES engine
 #define OUTPUT_MARKS 0
+#define LP_DEBUG 1
 
 static int msg_size_hash_compare(
             void *key, struct qhash_head *link);
@@ -286,6 +288,9 @@ typedef struct pending_waits pending_waits;
 /* state of the network LP. It contains the pointers to send/receive lists */
 struct nw_state
 {
+#if LP_DEBUG
+	size_t num_events_processed;
+#endif /* if LP_DEBUG */
 	long num_events_per_lp;
 	tw_lpid nw_id;
 	short wrkld_end;
@@ -2771,6 +2776,9 @@ void nw_test_init(nw_state* s, tw_lp* lp)
 void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
 {
     assert(s->app_id >= 0 && s->local_rank >= 0);
+#if LP_DEBUG
+    s->num_events_processed++;
+#endif /* if LP_DEBUG */
 
     //*(int *)bf = (int)0;
     rc_stack_gc(lp, s->matched_reqs);
@@ -3298,6 +3306,10 @@ void nw_test_finalize(nw_state* s, tw_lp* lp)
 
 void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
 {
+#if LP_DEBUG
+    s->num_events_processed--;
+#endif /* if LP_DEBUG */
+
 	switch(m->msg_type)
 	{
 		case MPI_SEND_ARRIVED:
@@ -3409,7 +3421,492 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp
             break;
 
         case CLI_BCKGND_CHANGE:
-            printf("======== CHANGE [now: %lf] App|Job:%d | Period: %f\n", m->rc.saved_marker_time, s->app_id, m->fwd.msg_send_time);
+            printf("======== CHANGE [now: %lf] App|Job:%d | Period: %f\n", m->rc.change.saved_marker_time, s->app_id, m->fwd.msg_send_time);
+            break;
+    }
+}
+
+static void make_qlist_cpy(struct qlist_head * into, struct qlist_head const * from, unsigned int sizeof_elem, unsigned int offset_ql) {
+    assert(sizeof_elem > offset_ql);
+
+    int const num_elems = qlist_count(from);
+    INIT_QLIST_HEAD(into);
+    if (num_elems) {
+        char * pending_recvs = malloc(num_elems * sizeof_elem);
+        if (pending_recvs == NULL) {
+            tw_error(TW_LOC, "Malloc failed!");
+        }
+
+        char * new_entry = pending_recvs;
+        int i = 0;
+        struct qlist_head * ent;
+        qlist_for_each(ent, from) {
+            char * entry = ((char*)ent) - offset_ql;
+
+            mempcpy(new_entry, entry, sizeof_elem);
+            struct qlist_head * new_entry_ql = (void*) (new_entry + offset_ql);
+            new_entry_ql->prev = (void*)(new_entry - sizeof_elem + offset_ql);
+            new_entry_ql->next = (void*)(new_entry + sizeof_elem + offset_ql);
+            i++;
+            new_entry += sizeof_elem;
+        }
+        assert(i == num_elems);
+
+        struct qlist_head * first_ql = (void*)(pending_recvs + offset_ql);
+        struct qlist_head * last_ql = (void*)(pending_recvs + (num_elems - 1) * sizeof_elem + offset_ql);
+        into->next = first_ql;
+        into->prev = last_ql;
+        first_ql->prev = into;
+        last_ql->next = into;
+    }
+}
+
+static void free_qlist_cpy(struct qlist_head * into, unsigned int offset_ql) {
+    if (! qlist_empty(into)) {
+        void * entry = (char *)(into->next) - offset_ql;
+        free(entry);
+    }
+}
+
+// Assumes that ql is at the end of entry!!
+static bool are_qlist_equal(struct qlist_head const * left, struct qlist_head const * right, unsigned int offset_ql, bool (cmp) (void *, void *)) {
+    int const num_elems = qlist_count(left);
+    if (num_elems != qlist_count(right)) {
+        return false;
+    }
+
+    // Checking element by element
+    int i = 0;
+    struct qlist_head * elem_left = left->next;
+    struct qlist_head * elem_right = right->next;
+    while (elem_left != left) {
+        char * entry_left = (char *)(elem_left) - offset_ql;
+        char * entry_right = (char *)(elem_right) - offset_ql;
+
+        if (!cmp(entry_left, entry_right)) {
+            return false;
+        }
+
+        elem_left = elem_left->next;
+        elem_right = elem_right->next;
+        i++;
+    }
+    assert(i == num_elems);
+    assert(elem_right == right);
+
+    return true;
+}
+
+bool compare_pending_waits(struct pending_waits const * before, struct pending_waits const * after) {
+    // if one is null and the other isn't, then they're not equal
+    if ((before == NULL) != (after == NULL)) {
+        return false;
+    }
+    // only check values if they are not nul
+    if (before == NULL) {
+        return true;
+    }
+
+    bool is_same = true;
+
+    is_same &= before->op_type == after->op_type;
+    is_same &= before->num_completed == after->num_completed;
+    is_same &= before->count == after->count;
+    is_same &= before->start_time == after->start_time;
+
+    for (int i=0; i<before->count; i++) {
+        is_same &= before->req_ids[i] == after->req_ids[i];
+    }
+
+    return is_same;
+}
+
+static bool compare_mpi_msg_queues(mpi_msgs_queue * left, mpi_msgs_queue * right) {
+    bool is_same = true;
+    is_same &= left->op_type == right->op_type;
+    is_same &= left->tag == right->tag;
+    is_same &= left->source_rank == right->source_rank;
+    is_same &= left->dest_rank == right->dest_rank;
+    is_same &= left->num_bytes == right->num_bytes;
+    is_same &= left->req_init_time == right->req_init_time;
+    is_same &= left->req_id == right->req_id;
+    return is_same;
+}
+
+static bool compare_completed_requests(completed_requests * left, completed_requests * right) {
+    bool is_same = true;
+    is_same &= left->req_id == right->req_id;
+    return is_same;
+}
+
+static bool compare_msg_size_info(struct msg_size_info * left, struct msg_size_info * right) {
+    bool is_same = true;
+    is_same &= left->msg_size == right->msg_size;
+    is_same &= left->num_msgs == right->num_msgs;
+    is_same &= left->agg_latency == right->agg_latency;
+    is_same &= left->avg_latency == right->avg_latency;
+    is_same &= left->hash_link.next == right->hash_link.next; // This is not correct, we have to do deep copy this and chek that it is the same
+    is_same &= left->hash_link.prev == right->hash_link.prev;
+    return is_same;
+}
+
+// Deep-copy of nw_state!!
+// Functionality to check for correct implementation of reverse event handler
+static void save_nw_lp_state(nw_state * into, nw_state const * from) {
+    memcpy(into, from, sizeof(nw_state));
+
+    make_qlist_cpy(&into->arrival_queue, &from->arrival_queue,sizeof(mpi_msgs_queue), QLIST_OFFSET(mpi_msgs_queue, ql));
+    make_qlist_cpy(&into->pending_recvs_queue, &from->pending_recvs_queue, sizeof(mpi_msgs_queue), QLIST_OFFSET(mpi_msgs_queue, ql));
+    make_qlist_cpy(&into->completed_reqs, &from->completed_reqs, sizeof(completed_requests), QLIST_OFFSET(completed_requests, ql));
+    make_qlist_cpy(&into->msg_sz_list, &from->msg_sz_list, sizeof(struct msg_size_info), QLIST_OFFSET(struct msg_size_info, ql));
+    // No need to copy msg_sz_table because all data is also in msg_sz_list
+
+    int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx);
+    into->known_completed_jobs = malloc(num_jobs * sizeof(int));
+    memcpy(into->known_completed_jobs, from->known_completed_jobs, num_jobs * sizeof(int));
+    if (from->wait_op != NULL) {
+        into->wait_op = malloc(sizeof(pending_waits));
+        memcpy(into->wait_op, from->wait_op, sizeof(pending_waits));
+    }
+
+    // Don't forget to make deep copies of any new complex data types that nw_state points to
+}
+
+static void print_mpi_msgs_queue(FILE * out, struct qlist_head * head, char const * before) {
+    mpi_msgs_queue * current = NULL;
+    qlist_for_each_entry(current, head, ql) {
+         fprintf(out, "%sMsg: OpType: %d Tag %d Source %d Dest %d bytes %"PRId64" req_init_time %g req_id %u\n", before, current->op_type, current->tag, current->source_rank, current->dest_rank, current->num_bytes, current->req_init_time, current->req_id);
+    }
+}
+
+// Cleaning up deep-copy
+static void clean_nw_lp_state(nw_state * into) {
+    free_qlist_cpy(&into->arrival_queue, QLIST_OFFSET(mpi_msgs_queue, ql));
+    free_qlist_cpy(&into->pending_recvs_queue, QLIST_OFFSET(mpi_msgs_queue, ql));
+    free_qlist_cpy(&into->completed_reqs, QLIST_OFFSET(completed_requests, ql));
+    free_qlist_cpy(&into->msg_sz_list, QLIST_OFFSET(struct msg_size_info, ql));
+    free(into->known_completed_jobs);
+    if (into->wait_op != NULL) {
+        free(into->wait_op);
+    }
+}
+
+// Checking that deep-copy is the same as original!!
+// Originally filled with a prompt on Claude
+static bool check_nw_lp_state(nw_state * before, nw_state const * after) {
+    bool is_same = true;
+
+    // Basic fields
+    is_same &= (before->num_events_per_lp == after->num_events_per_lp);
+    is_same &= (before->nw_id == after->nw_id);
+    is_same &= (before->wrkld_end == after->wrkld_end);
+    is_same &= (before->app_id == after->app_id);
+    is_same &= (before->local_rank == after->local_rank);
+    is_same &= (before->qos_level == after->qos_level);
+
+    // Pattern and completion flags
+    is_same &= (before->synthetic_pattern == after->synthetic_pattern);
+    is_same &= (before->is_finished == after->is_finished);
+    is_same &= (before->num_own_job_ranks_completed == after->num_own_job_ranks_completed);
+
+    // Operation counts
+    is_same &= (before->num_sends == after->num_sends);
+    is_same &= (before->num_recvs == after->num_recvs);
+    is_same &= (before->num_cols == after->num_cols);
+    is_same &= (before->num_delays == after->num_delays);
+    is_same &= (before->num_wait == after->num_wait);
+    is_same &= (before->num_waitall == after->num_waitall);
+    is_same &= (before->num_waitsome == after->num_waitsome);
+
+    // Timing information
+    is_same &= (before->start_time == after->start_time);
+    is_same &= (before->col_time == after->col_time);
+    is_same &= (before->reduce_time == after->reduce_time);
+    is_same &= (before->num_reduce == after->num_reduce);
+    is_same &= (before->all_reduce_time == after->all_reduce_time);
+    is_same &= (before->num_all_reduce == after->num_all_reduce);
+    is_same &= (before->elapsed_time == after->elapsed_time);
+    is_same &= (before->compute_time == after->compute_time);
+    is_same &= (before->send_time == after->send_time);
+    is_same &= (before->max_time == after->max_time);
+    is_same &= (before->recv_time == after->recv_time);
+    is_same &= (before->wait_time == after->wait_time);
+
+    // Interval and current state
+    is_same &= (before->cur_interval_end == after->cur_interval_end);
+
+    // Data statistics
+    is_same &= (before->num_bytes_sent == after->num_bytes_sent);
+    is_same &= (before->num_bytes_recvd == after->num_bytes_recvd);
+    is_same &= (before->syn_data == after->syn_data);
+    is_same &= (before->gen_data == after->gen_data);
+
+    // Switch and routing information
+    is_same &= (before->prev_switch == after->prev_switch);
+    is_same &= (before->saved_perm_dest == after->saved_perm_dest);
+    is_same &= (before->rc_perm == after->rc_perm);
+
+    // Sampling information
+    is_same &= (before->sampling_indx == after->sampling_indx);
+    //is_same &= (before->max_arr_size == after->max_arr_size);
+
+    // Compare string buffers
+    is_same &= (strcmp(before->output_buf, after->output_buf) == 0);
+    is_same &= (strcmp(before->col_stats, after->col_stats) == 0);
+
+    // Compare switch configuration size
+    is_same &= (before->switch_config_size == after->switch_config_size);
+
+    // Complex elements
+    is_same &= are_qlist_equal(&before->arrival_queue, &after->arrival_queue, QLIST_OFFSET(mpi_msgs_queue, ql), (bool (*) (void *, void *)) compare_mpi_msg_queues);
+    is_same &= are_qlist_equal(&before->pending_recvs_queue, &after->pending_recvs_queue, QLIST_OFFSET(mpi_msgs_queue, ql), (bool (*) (void *, void *)) compare_mpi_msg_queues);
+    is_same &= are_qlist_equal(&before->completed_reqs, &after->completed_reqs, QLIST_OFFSET(completed_requests, ql), (bool (*) (void *, void *)) compare_completed_requests);
+    is_same &= are_qlist_equal(&before->msg_sz_list, &after->msg_sz_list, QLIST_OFFSET(struct msg_size_info, ql), (bool (*) (void *, void *)) compare_msg_size_info);
+
+    is_same &= !memcmp(&before->ross_sample, &after->ross_sample, sizeof(struct ross_model_sample));
+
+    int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx);
+    is_same &= !memcmp(before->known_completed_jobs, after->known_completed_jobs, num_jobs * sizeof(int));
+    is_same &= compare_pending_waits(before->wait_op, after->wait_op);
+
+    // Skipped pointer comparisons (used in reverse computation):
+    // - processed_ops
+    // - processed_wait_op
+    // - matched_reqs
+    // - msg_sz_table
+    // Pointers used in some data collection (IO) or outside of PDES loop
+    // - mpi_wkld_samples
+    // - switch_config
+
+    // There is no need to implement msg_sz_table as all values are already
+    // accounted for in msg_sz_list. We can safely ignore all values in msg_sz_list
+
+    return is_same;
+}
+
+// Originally implemneted with a prompt on Claude.ai (tedious code, easy to check and produce)
+static void print_nw_lp_state(FILE * out, nw_state * state) {
+    int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx);
+
+#if LP_DEBUG
+    fprintf(out, "  num_events_processed = %zu\n", state->num_events_processed);
+#endif /* if LP_DEBUG */
+    fprintf(out, "     num_events_per_lp = %ld\n", state->num_events_per_lp);
+    fprintf(out, "                 nw_id = %lu\n", state->nw_id);
+    fprintf(out, "             wrkld_end = %d\n", state->wrkld_end);
+    fprintf(out, "                app_id = %d\n", state->app_id);
+    fprintf(out, "            local_rank = %d\n", state->local_rank);
+    fprintf(out, "             qos_level = %d\n", state->qos_level);
+    fprintf(out, "     synthetic_pattern = %d\n", state->synthetic_pattern);
+    fprintf(out, "           is_finished = %d\n", state->is_finished);
+    fprintf(out, "num_own_job_ranks_completed = %d\n", state->num_own_job_ranks_completed);
+    fprintf(out, "  known_completed_jobs[%d] = [", num_jobs);
+    for(int i=0; i<num_jobs; i++) {
+        fprintf(out, "%d%s", state->known_completed_jobs[i], i+1==num_jobs ? "" : ", ");
+    }
+    fprintf(out, "]\n");
+    fprintf(out, "        *processed_ops = %p\n", state->processed_ops);
+    fprintf(out, "    *processed_wait_op = %p\n", state->processed_wait_op);
+    fprintf(out, "         *matched_reqs = %p\n", state->matched_reqs);
+
+    // Operation counts
+    fprintf(out, "             num_sends = %lu\n", state->num_sends);
+    fprintf(out, "             num_recvs = %lu\n", state->num_recvs);
+    fprintf(out, "              num_cols = %lu\n", state->num_cols);
+    fprintf(out, "            num_delays = %lu\n", state->num_delays);
+    fprintf(out, "              num_wait = %lu\n", state->num_wait);
+    fprintf(out, "           num_waitall = %lu\n", state->num_waitall);
+    fprintf(out, "          num_waitsome = %lu\n", state->num_waitsome);
+
+    // Timing information
+    fprintf(out, "            start_time = %g\n", state->start_time);
+    fprintf(out, "              col_time = %g\n", state->col_time);
+    fprintf(out, "           reduce_time = %g\n", state->reduce_time);
+    fprintf(out, "            num_reduce = %d\n", state->num_reduce);
+    fprintf(out, "       all_reduce_time = %g\n", state->all_reduce_time);
+    fprintf(out, "        num_all_reduce = %d\n", state->num_all_reduce);
+    fprintf(out, "          elapsed_time = %g\n", state->elapsed_time);
+    fprintf(out, "          compute_time = %g\n", state->compute_time);
+    fprintf(out, "             send_time = %g\n", state->send_time);
+    fprintf(out, "              max_time = %g\n", state->max_time);
+    fprintf(out, "             recv_time = %g\n", state->recv_time);
+    fprintf(out, "             wait_time = %g\n", state->wait_time);
+
+    // Queue heads
+    fprintf(out, "         arrival_queue[%d] = [\n", qlist_count(&state->arrival_queue));
+    print_mpi_msgs_queue(out, &state->arrival_queue, "            ");
+    fprintf(out, "]\n");
+    fprintf(out, "   pending_recvs_queue[%d] = [\n", qlist_count(&state->pending_recvs_queue));
+    print_mpi_msgs_queue(out, &state->pending_recvs_queue, "            ");
+    fprintf(out, "]\n");
+
+    fprintf(out, "        completed_reqs[%d] = [\n", qlist_count(&state->completed_reqs));
+    completed_requests * current = NULL;
+    qlist_for_each_entry(current, &state->completed_reqs, ql) {
+         fprintf(out, "            Req: req_id: %u\n", current->req_id);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "      cur_interval_end = %g\n", state->cur_interval_end);
+    fprintf(out, "              *wait_op = %p\n", state->wait_op);
+    if (state->wait_op != NULL) {
+        fprintf(out, "                     |.op_type = %d\n", state->wait_op->op_type);
+        fprintf(out, "                     |.req_ids = [");
+        for(int i = 0; i < state->wait_op->count; i++) {
+            fprintf(out, "%d%s", state->wait_op->req_ids[i], i+1==state->wait_op->count ? "" : ", ");
+        }
+        fprintf(out, "]\n");
+        fprintf(out, "                     |.num_completed = %d\n", state->wait_op->num_completed);
+        fprintf(out, "                     |.count = %d\n", state->wait_op->count);
+        fprintf(out, "                     |.start_time = %g\n", state->wait_op->start_time);
+    }
+    fprintf(out, "           msg_sz_list[%d] = [\n", qlist_count(&state->completed_reqs));
+    struct msg_size_info * ms_info = NULL;
+    qlist_for_each_entry(ms_info, &state->msg_sz_list, ql) {
+         fprintf(out, "            MsSizeInfo: msg_size: %lu num_msgs: %d agg_latency: %g avg_latency: %g hash_link.next: %p  hash_link.prev: %p\n", ms_info->msg_size, ms_info->num_msgs, ms_info->agg_latency, ms_info->avg_latency, ms_info->hash_link.next, ms_info->hash_link.prev);
+    }
+    fprintf(out, "]\n");
+
+    // Data statistics
+    fprintf(out, "        num_bytes_sent = %llu\n", state->num_bytes_sent);
+    fprintf(out, "       num_bytes_recvd = %llu\n", state->num_bytes_recvd);
+    fprintf(out, "              syn_data = %llu\n", state->syn_data);
+    fprintf(out, "              gen_data = %llu\n", state->gen_data);
+
+    fprintf(out, "           prev_switch = %lu\n", state->prev_switch);
+    fprintf(out, "       saved_perm_dest = %d\n", state->saved_perm_dest);
+    fprintf(out, "               rc_perm = %lu\n", state->rc_perm);
+
+    // Sampling information
+    fprintf(out, "         sampling_indx = %d\n", state->sampling_indx);
+    fprintf(out, "          max_arr_size = %d\n", state->max_arr_size);
+    fprintf(out, "*     mpi_wkld_samples = %p\n", state->mpi_wkld_samples);
+    fprintf(out, "            output_buf = %.512s...\n", state->output_buf);
+    fprintf(out, "             col_stats = %.64s...\n", state->col_stats);
+
+    fprintf(out, "ross_sample.\n");
+    fprintf(out, "           |          .nw_id = %lu\n", state->ross_sample.nw_id);
+    fprintf(out, "           |         .app_id = %d\n", state->ross_sample.app_id);
+    fprintf(out, "           |     .local_rank = %d\n", state->ross_sample.local_rank);
+    fprintf(out, "           |      .num_sends = %lu\n", state->ross_sample.num_sends);
+    fprintf(out, "           |      .num_recvs = %lu\n", state->ross_sample.num_recvs);
+    fprintf(out, "           | .num_bytes_sent = %llu\n", state->ross_sample.num_bytes_sent);
+    fprintf(out, "           |.num_bytes_recvd = %llu\n", state->ross_sample.num_bytes_recvd);
+    fprintf(out, "           |      .send_time = %g\n", state->ross_sample.send_time);
+    fprintf(out, "           |      .recv_time = %g\n", state->ross_sample.recv_time);
+    fprintf(out, "           |      .wait_time = %g\n", state->ross_sample.wait_time);
+    fprintf(out, "           |   .compute_time = %g\n", state->ross_sample.compute_time);
+    fprintf(out, "           |      .comm_time = %g\n", state->ross_sample.comm_time);
+    fprintf(out, "           |       .max_time = %g\n", state->ross_sample.max_time);
+    fprintf(out, "           |   .avg_msg_time = %g\n", state->ross_sample.avg_msg_time);
+
+    // Configuration
+    fprintf(out, "*        switch_config = %p\n", state->switch_config);
+    fprintf(out, "    switch_config_size = %zu\n", state->switch_config_size);
+}
+
+static char const * const MPI_NW_EVENTS_to_string(enum MPI_NW_EVENTS event_type) {
+
+    switch (event_type) {
+        case MPI_OP_GET_NEXT:      return "MPI_OP_GET_NEXT";
+        case MPI_SEND_ARRIVED:     return "MPI_SEND_ARRIVED";
+        case MPI_SEND_ARRIVED_CB:  return "MPI_SEND_ARRIVED_CB";
+        case MPI_SEND_POSTED:      return "MPI_SEND_POSTED";
+        case MPI_REND_ARRIVED:     return "MPI_REND_ARRIVED";
+        case MPI_REND_ACK_ARRIVED: return "MPI_REND_ACK_ARRIVED";
+        case CLI_BCKGND_FIN:       return "CLI_BCKGND_FIN";
+        case CLI_BCKGND_ARRIVE:    return "CLI_BCKGND_ARRIVE";
+        case CLI_BCKGND_GEN:       return "CLI_BCKGND_GEN";
+        case CLI_BCKGND_CHANGE:    return "CLI_BCKGND_CHANGE";
+        case CLI_NBR_FINISH:       return "CLI_NBR_FINISH";
+        case CLI_OTHER_FINISH:     return "CLI_OTHER_FINISH";
+        case SURR_SKIP_ITERATION:  return "SURR_SKIP_ITERATION";
+        default: return "UNKNOWN!!";
+    }
+
+}
+
+// Original printing function from Claude.ai
+static void print_nw_message(FILE * out, struct nw_message * msg) {
+    // Print main fields
+    fprintf(out, "msg_type = %s\n", MPI_NW_EVENTS_to_string(msg->msg_type));
+    fprintf(out, " op_type = %s\n", op_type_string(msg->op_type));
+    fprintf(out, "num_rngs = %d\n", msg->num_rngs);
+    fprintf(out, "event_rc = %d\n", msg->event_rc);
+    fprintf(out, "  mpi_op = %p\n", msg->mpi_op);
+    fprint_codes_workload_op(out, msg->mpi_op, "        |");
+
+    fprintf(out, "fwd\n");
+    fprintf(out, "  |      .src_rank = %lu\n", msg->fwd.src_rank);
+    fprintf(out, "  |     .dest_rank = %d\n", msg->fwd.dest_rank);
+    fprintf(out, "  |     .num_bytes = %ld\n", msg->fwd.num_bytes);
+    fprintf(out, "  |   .num_matched = %d\n", msg->fwd.num_matched);
+    fprintf(out, "  |.sim_start_time = %g\n", msg->fwd.sim_start_time);
+    fprintf(out, "  | .msg_send_time = %g\n", msg->fwd.msg_send_time);
+    fprintf(out, "  |        .req_id = %u\n", msg->fwd.req_id);
+    fprintf(out, "  |   .matched_req = %d\n", msg->fwd.matched_req);
+    fprintf(out, "  |           .tag = %d\n", msg->fwd.tag);
+    fprintf(out, "  |        .app_id = %d\n", msg->fwd.app_id);
+    fprintf(out, "  |   .found_match = %d\n", msg->fwd.found_match);
+    fprintf(out, "  |.wait_completed = %d\n", msg->fwd.wait_completed);
+    fprintf(out, "  |     .rend_send = %d\n", msg->fwd.rend_send);
+
+    fprintf(out, "rc\n");
+    switch(msg->msg_type) {
+        case CLI_BCKGND_GEN:
+            fprintf(out, "  |.gen\n");
+            fprintf(out, "      | .saved_syn_length = %d\n", msg->rc.gen.saved_syn_length);
+            fprintf(out, "      |       .saved_perm = %d\n", msg->rc.gen.saved_perm);
+            fprintf(out, "      |.saved_prev_switch = %lu\n", msg->rc.gen.saved_prev_switch);
+            break;
+
+        case CLI_BCKGND_ARRIVE:
+        case MPI_SEND_ARRIVED_CB:
+            fprintf(out, "  |arrive.saved_prev_max_time = %g\n", msg->rc.arrive.saved_prev_max_time);
+            fprintf(out, "  |    arrive.saved_send_time = %g\n", msg->rc.arrive.saved_send_time);
+            fprintf(out, "  |arrive.saved_send_time_sample = %g\n", msg->rc.arrive.saved_send_time_sample);
+            break;
+
+        case CLI_BCKGND_CHANGE:
+            fprintf(out, "  |   change.saved_send_time = %g\n", msg->rc.change.saved_send_time);
+            fprintf(out, "  | change.saved_marker_time = %g\n", msg->rc.change.saved_marker_time);
+            break;
+
+        case MPI_OP_GET_NEXT:
+            fprintf(out, "   .mpi_next\n");
+            fprintf(out, "           |.saved_elapsed_time = %g\n", msg->rc.mpi_next.saved_elapsed_time);
+            fprintf(out, "           |.all_reduce.saved_send_time = %g\n", msg->rc.mpi_next.all_reduce.saved_send_time);
+            fprintf(out, "           |.all_reduce.saved_delay = %g\n", msg->rc.mpi_next.all_reduce.saved_delay);
+
+            fprintf(out, "           |.recv.saved_recv_time = %g\n", msg->rc.mpi_next.recv.saved_recv_time);
+            fprintf(out, "           |.recv.saved_recv_time_sample = %g\n", msg->rc.mpi_next.recv.saved_recv_time_sample);
+
+            fprintf(out, "           |.delay.saved_delay = %g\n", msg->rc.mpi_next.delay.saved_delay);
+            fprintf(out, "           |.delay.saved_delay_sample = %g\n", msg->rc.mpi_next.delay.saved_delay_sample);
+
+            fprintf(out, "           |.mark.saved_marker_time = %g\n", msg->rc.mpi_next.mark.saved_marker_time);
+            break;
+
+        case MPI_SEND_ARRIVED:
+        case MPI_REND_ARRIVED:
+        case MPI_SEND_POSTED:
+            fprintf(out, "  |.mpi_send\n");
+            fprintf(out, "           |       .saved_wait_time = %g\n", msg->rc.mpi_send.saved_wait_time);
+            fprintf(out, "           |.saved_wait_time_sample = %g\n", msg->rc.mpi_send.saved_wait_time_sample);
+            fprintf(out, "           |       .saved_recv_time = %g\n", msg->rc.mpi_send.saved_recv_time);
+            fprintf(out, "           |.saved_recv_time_sample = %g\n", msg->rc.mpi_send.saved_recv_time_sample);
+            fprintf(out, "           |       .saved_num_bytes = %lu\n", msg->rc.mpi_send.saved_num_bytes);
+            break;
+
+        case MPI_REND_ACK_ARRIVED:
+            fprintf(out, "  |  mpi_ack.saved_num_bytes = %ld\n", msg->rc.mpi_ack.saved_num_bytes);
+            break;
+
+        case SURR_SKIP_ITERATION:
+            fprintf(out, "  |        surr.config_used = %p\n", msg->rc.surr.config_used);
+            break;
+
+        default:
             break;
     }
 }
@@ -3469,9 +3966,23 @@ const tw_lptype* nw_get_lp_type()
             return(&nw_lp);
 }
 
+// ROSS function pointer table to check reverse event handler
+crv_checkpointer nw_lp_chkptr = {
+    &nw_lp,
+    0,
+    (save_checkpoint_state_f) save_nw_lp_state,
+    (clean_checkpoint_state_f) clean_nw_lp_state,
+    (check_states_f) check_nw_lp_state,
+    (print_lpstate_f) print_nw_lp_state,
+    (print_checkpoint_state_f) print_nw_lp_state,
+    (print_event_f) print_nw_message,
+};
+
 static void nw_add_lp_type()
 {
   lp_type_register("nw-lp", nw_get_lp_type());
+  // registering custom print for nw_lp LPs
+  crv_add_custom_state_checkpoint(&nw_lp_chkptr);
 }
 
 /* setup for the ROSS event tracing

From 6e97889fa403b719bbe7e6db6cf7d0ba0fc09164 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 24 Feb 2025 14:31:16 -0500
Subject: [PATCH 103/188] Fixing minor reversibility bugs in LP type nw_state

---
 codes/quicklist.h                            | 19 +++++++
 src/network-workloads/model-net-mpi-replay.c | 53 ++++++++------------
 2 files changed, 41 insertions(+), 31 deletions(-)

diff --git a/codes/quicklist.h b/codes/quicklist.h
index 5ca78730..bacc2c44 100644
--- a/codes/quicklist.h
+++ b/codes/quicklist.h
@@ -276,6 +276,25 @@ static inline int qlist_count(struct qlist_head const *list)
     return count;
 }
 
+static inline void qlist_add_at_index(struct qlist_head *newi, struct qlist_head *list, int index)
+{
+    if (index < 0)
+    {
+        while(index++)
+        {
+            list = list->prev;
+        }
+    }
+    else
+    {
+        while(index--)
+        {
+            list = list->next;
+        }
+    }
+    __qlist_add(newi, list, list->next);
+}
+
 static inline struct qlist_head * qlist_find(
     struct qlist_head *list,
     int (*compare)(struct qlist_head *, void *),
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 8f944a3c..902e82d9 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -864,6 +864,7 @@ void finish_bckgnd_traffic_rc(
         (void)lp;
 
         ns->is_finished = 0;
+        ns->elapsed_time = msg->rc.mpi_next.saved_elapsed_time;
         return;
 }
 void finish_bckgnd_traffic(
@@ -875,6 +876,7 @@ void finish_bckgnd_traffic(
         (void)b;
         (void)msg;
         ns->is_finished = 1;
+        msg->rc.mpi_next.saved_elapsed_time = ns->elapsed_time;
         ns->elapsed_time = tw_now(lp) - ns->start_time;
 
         printf("\n LP %llu App %d completed sending data %llu completed at time %lf ", LLU(lp->gid),ns->app_id, ns->gen_data, tw_now(lp));
@@ -1331,39 +1333,24 @@ static int clear_completed_reqs(nw_state * s,
     (void)s;
     (void)lp;
 
-    int i, matched = 0;
+    int matched = 0;
 
-    for( i = 0; i < count; i++)
-    {
-      struct qlist_head * ent = NULL;
-      struct completed_requests * current = NULL;
-      struct completed_requests * prev = NULL;
+    struct qlist_head * ent, * _;
+    struct completed_requests * current = NULL;
 
-      int index = 0;
-      qlist_for_each(ent, &s->completed_reqs)
-       {
-           if(prev)
-           {
-              rc_stack_push(lp, prev, free, s->matched_reqs);
-              prev = NULL;
-           }
-            
-           current = qlist_entry(ent, completed_requests, ql);
-           current->index = index; 
-            if(current->req_id == reqs[i])
-            {
+    int index = 0;
+    qlist_for_each_safe(ent, _, &s->completed_reqs) {
+        current = qlist_entry(ent, completed_requests, ql);
+        for(int i = 0; i < count; i++) {
+            if(current->req_id == reqs[i]) {
+                current->index = index;
                 ++matched;
-                qlist_del(&current->ql);
-                prev = current;
+                qlist_del(ent);
+                rc_stack_push(lp, current, free, s->matched_reqs);
+                break;
             }
-            ++index;
-       }
-
-      if(prev)
-      {
-         rc_stack_push(lp, prev, free, s->matched_reqs);
-         prev = NULL;
-      }
+        }
+        index++;
     }
     return matched;
 }
@@ -1376,7 +1363,7 @@ static void add_completed_reqs(nw_state * s,
     {
        struct completed_requests * req = (struct completed_requests*)rc_stack_pop(s->matched_reqs);
        // turn on only if wait-all unmatched error arises in optimistic mode.
-       qlist_add(&req->ql, &s->completed_reqs);
+       qlist_add_at_index(&req->ql, &s->completed_reqs, req->index - count + i + 1);
     }//end for
 }
 
@@ -1677,6 +1664,7 @@ static int rm_matching_rcv(nw_state * ns,
                 && ((qi->source_rank == qitem->source_rank) || qi->source_rank == -1))
         {
             matched = 1;
+            m->rc.mpi_send.saved_num_bytes = qi->num_bytes;
             qi->num_bytes = qitem->num_bytes;
             break;
         }
@@ -2090,7 +2078,7 @@ static void codes_exec_mpi_send(nw_state* s,
 
     if(lp->gid == TRACK_LP)
         printf("\n Sender rank %llu global dest rank %d dest-rank %d bytes %"PRIu64" Tag %d", LLU(s->nw_id), global_dest_rank, mpi_op->u.send.dest_rank, mpi_op->u.send.num_bytes, mpi_op->u.send.tag);
-        m->rc.mpi_ack.saved_num_bytes = mpi_op->u.send.num_bytes;
+    m->rc.mpi_ack.saved_num_bytes = mpi_op->u.send.num_bytes;
 	/* model-net event */
 	tw_lpid dest_rank = codes_mapping_get_lpid_from_relative(global_dest_rank, NULL, "nw-lp", NULL, 0);
 
@@ -2380,6 +2368,7 @@ static void update_arrival_queue_rc(nw_state* s,
     if(m->fwd.found_match >= 0)
 	{
         mpi_msgs_queue * qi = (mpi_msgs_queue*)rc_stack_pop(s->processed_ops);
+        qi->num_bytes = m->rc.mpi_send.saved_num_bytes;
 //        int queue_count = qlist_count(&s->pending_recvs_queue);
 
         if(m->fwd.found_match == 0)
@@ -2926,6 +2915,7 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t
 	if(m->op_type == CODES_WK_END)
     {
         s->is_finished = 0;
+        s->elapsed_time = m->rc.mpi_next.saved_elapsed_time;
 
         if(bf->c9)
             return;
@@ -3040,6 +3030,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
 	
         if(mpi_op->op_type == CODES_WK_END)
         {
+            m->rc.mpi_next.saved_elapsed_time = s->elapsed_time;
             s->elapsed_time = tw_now(lp) - s->start_time;
             s->is_finished = 1;
 

From a3e638e73fbe513a1e55bc46d7082c51879c53a9 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sun, 2 Mar 2025 15:48:39 -0500
Subject: [PATCH 104/188] Adding checkpointer functionality to model-net
 sub-models

---
 codes/model-net-method.h                  |   1 +
 src/networks/model-net/dragonfly-custom.C |   2 +
 src/networks/model-net/dragonfly-dally.C  | 183 +++++++++++++++++++++-
 src/networks/model-net/dragonfly-plus.C   |   2 +
 src/networks/model-net/slimfly.c          |   2 +
 5 files changed, 188 insertions(+), 2 deletions(-)

diff --git a/codes/model-net-method.h b/codes/model-net-method.h
index b6bb01ab..eab2dc7c 100644
--- a/codes/model-net-method.h
+++ b/codes/model-net-method.h
@@ -71,6 +71,7 @@ struct model_net_method
     event_f cc_congestion_event_fn;
     revent_f cc_congestion_event_rc_fn;
     commit_f cc_congestion_event_commit_fn;
+    crv_checkpointer * checkpointer;
 };
 
 extern struct model_net_method * method_array[];
diff --git a/src/networks/model-net/dragonfly-custom.C b/src/networks/model-net/dragonfly-custom.C
index 934827a2..cf7bf21b 100644
--- a/src/networks/model-net/dragonfly-custom.C
+++ b/src/networks/model-net/dragonfly-custom.C
@@ -4022,6 +4022,7 @@ struct model_net_method dragonfly_custom_method =
     NULL,//(final_f)dragonfly_custom_sample_fin
     custom_dragonfly_register_model_types,
     custom_dragonfly_get_model_types,
+    NULL,
 };
 
 struct model_net_method dragonfly_custom_router_method =
@@ -4044,6 +4045,7 @@ struct model_net_method dragonfly_custom_router_method =
     NULL,//(final_f)dragonfly_custom_rsample_fin
     custom_router_register_model_types,
     custom_dfly_router_get_model_types,
+    NULL,
 };
 
 #ifdef ENABLE_CORTEX
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 25bccfe6..76496e9f 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -6924,8 +6924,163 @@ static void router_dally_rc_event_handler(router_state * s, tw_bf * bf,
     msg->num_rngs = 0;
 }
 
+//*** ---------- START OF reverse handler checking functions ---------- ***
+// Print fuction originally constructed with help from Claude.ai
+static void print_terminal_state(FILE * out, terminal_state * state) {
+    fprintf(out, "terminal_state (dragonfly-dally) ->\n");
+    fprintf(out, "  |           packet_counter = %ld\n", state->packet_counter);
+    fprintf(out, "  |               packet_gen = %d\n", state->packet_gen);
+    fprintf(out, "  |               packet_fin = %d\n", state->packet_fin);
+    fprintf(out, "  |           total_gen_size = %d\n", state->total_gen_size);
+    fprintf(out, "  | *              router_lp = %p\n", state->router_lp);
+    fprintf(out, "  | *              router_id = %p\n", state->router_id);
+    fprintf(out, "  |              terminal_id = %u\n", state->terminal_id);
+    fprintf(out, "  |                  connMan = <DragonflyConnectionManager object>\n");
+    fprintf(out, "  | *local_congestion_controller = %p\n", state->local_congestion_controller);
+    fprintf(out, "  |  workload_lpid_to_app_id = <map object>\n");
+    fprintf(out, "  |                  app_ids = <set object>\n");
+    fprintf(out, "  |  workloads_finished_flag = %d\n", state->workloads_finished_flag);
+    fprintf(out, "  | **          vc_occupancy = %p\n", state->vc_occupancy);
+    fprintf(out, "  | *terminal_available_time = %p\n", state->terminal_available_time);
+    fprintf(out, "  | ***        terminal_msgs = %p\n", state->terminal_msgs);
+    fprintf(out, "  | ***   terminal_msgs_tail = %p\n", state->terminal_msgs_tail);
+    fprintf(out, "  | *           in_send_loop = %p\n", state->in_send_loop);
+    fprintf(out, "  |    dragonfly_stats_array = <mn_stats array>\n");
+    fprintf(out, "  | **            qos_status = %p\n", state->qos_status);
+    fprintf(out, "  | **              qos_data = %p\n", state->qos_data);
+    fprintf(out, "  | *           last_qos_lvl = %p\n", state->last_qos_lvl);
+    fprintf(out, "  |         is_monitoring_bw = %d\n", state->is_monitoring_bw);
+    fprintf(out, "  | *                     st = %p\n", state->st);
+    fprintf(out, "  | *                  cc_st = %p\n", state->cc_st);
+    fprintf(out, "  | *              issueIdle = %p\n", state->issueIdle);
+    fprintf(out, "  | **       terminal_length = %p\n", state->terminal_length);
+    fprintf(out, "  | *                   anno = %s\n", state->anno ? state->anno : "(nil)");
+    fprintf(out, "  | *                 params = %p\n", state->params);
+    fprintf(out, "  | *               rank_tbl = %p\n", state->rank_tbl);
+    fprintf(out, "  |             rank_tbl_pop = %lu\n", state->rank_tbl_pop);
+    fprintf(out, "  |               total_time = %f\n", state->total_time);
+    fprintf(out, "  |           total_msg_size = %lu\n", state->total_msg_size);
+    fprintf(out, "  |               total_hops = %f\n", state->total_hops);
+    fprintf(out, "  |            finished_msgs = %ld\n", state->finished_msgs);
+    fprintf(out, "  |          finished_chunks = %ld\n", state->finished_chunks);
+    fprintf(out, "  |         finished_packets = %ld\n", state->finished_packets);
+    fprintf(out, "  | *          last_buf_full = %p\n", state->last_buf_full);
+    fprintf(out, "  | *              busy_time = %p\n", state->busy_time);
+    fprintf(out, "  | *           link_traffic = %p\n", state->link_traffic);
+    fprintf(out, "  | *           total_chunks = %p\n", state->total_chunks);
+    fprintf(out, "  | *         stalled_chunks = %p\n", state->stalled_chunks);
+    fprintf(out, "  |          injected_chunks = %lu\n", state->injected_chunks);
+    fprintf(out, "  |           ejected_chunks = %lu\n", state->ejected_chunks);
+    fprintf(out, "  |              max_latency = %f\n", state->max_latency);
+    fprintf(out, "  |              min_latency = %f\n", state->min_latency);
+    fprintf(out, "  |               output_buf = '%.4096s'\n", state->output_buf);
+    fprintf(out, "  |              output_buf2 = '%.4096s'\n", state->output_buf2);
+    fprintf(out, "  |        fin_chunks_sample = %ld\n", state->fin_chunks_sample);
+    fprintf(out, "  |         data_size_sample = %ld\n", state->data_size_sample);
+    fprintf(out, "  |          fin_hops_sample = %f\n", state->fin_hops_sample);
+    fprintf(out, "  |          fin_chunks_time = %f\n", state->fin_chunks_time);
+    fprintf(out, "  | *       busy_time_sample = %p\n", state->busy_time_sample);
+    fprintf(out, "  |               sample_buf = '%.4096s'\n", state->sample_buf);
+    fprintf(out, "  | *            sample_stat = %p\n", state->sample_stat);
+    fprintf(out, "  |              op_arr_size = %d\n", state->op_arr_size);
+    fprintf(out, "  |             max_arr_size = %d\n", state->max_arr_size);
+    fprintf(out, "  |               fwd_events = %ld\n", state->fwd_events);
+    fprintf(out, "  |               rev_events = %ld\n", state->rev_events);
+    fprintf(out, "  |   fin_chunks_ross_sample = %ld\n", state->fin_chunks_ross_sample);
+    fprintf(out, "  |    data_size_ross_sample = %ld\n", state->data_size_ross_sample);
+    fprintf(out, "  |     fin_hops_ross_sample = %ld\n", state->fin_hops_ross_sample);
+    fprintf(out, "  | fin_chunks_time_ross_sample = %f\n", state->fin_chunks_time_ross_sample);
+    fprintf(out, "  | *  busy_time_ross_sample = %p\n", state->busy_time_ross_sample);
+    fprintf(out, "  |              ross_sample = <dfly_cn_sample object>\n");
+    fprintf(out, "  |             sent_packets = <map object>\n");
+    fprintf(out, "  |      last_packet_sent_id = %lu\n", state->last_packet_sent_id);
+    fprintf(out, "  |   arrival_of_last_packet = {packet_ID: %lu, travel_end_time: %f}\n", state->arrival_of_last_packet.packet_ID, state->arrival_of_last_packet.travel_end_time);
+    fprintf(out, "  |     remaining_sz_packets = <map object>\n");
+    fprintf(out, "  |       last_in_queue_time = %f\n", state->last_in_queue_time);
+    fprintf(out, "  | *         predictor_data = %p\n", state->predictor_data);
+    fprintf(out, "  |                  zombies = <set object>\n");
+    fprintf(out, "  | *           frozen_state = %p\n", state->frozen_state);
+}
+
+// Print fuction originally constructed with help from Claude.ai
+static void print_terminal_dally_message(FILE * out, struct terminal_dally_message * msg) {
+    fprintf(out, "terminal_dally_message ->\n");
+    fprintf(out, "  |                      magic = %d\n", msg->magic);
+    fprintf(out, "  |          travel_start_time = %f\n", msg->travel_start_time);
+    fprintf(out, "  |            travel_end_time = %f\n", msg->travel_end_time);
+    fprintf(out, "  |                  packet_ID = %llu\n", msg->packet_ID);
+    fprintf(out, "  |                       type = %d\n", msg->type);
+    fprintf(out, "  |                notify_type = %d\n", msg->notify_type);
+    fprintf(out, "  |                   category = %s\n", msg->category);
+    fprintf(out, "  |             final_dest_gid = %lu\n", msg->final_dest_gid);
+    fprintf(out, "  |                  sender_lp = %lu\n", msg->sender_lp);
+    fprintf(out, "  |               sender_mn_lp = %lu\n", msg->sender_mn_lp);
+    fprintf(out, "  |         dest_terminal_lpid = %lu\n", msg->dest_terminal_lpid);
+    fprintf(out, "  |    dfdally_src_terminal_id = %u\n", msg->dfdally_src_terminal_id);
+    fprintf(out, "  |   dfdally_dest_terminal_id = %u\n", msg->dfdally_dest_terminal_id);
+    fprintf(out, "  |            src_terminal_id = %u\n", msg->src_terminal_id);
+    fprintf(out, "  |           origin_router_id = %u\n", msg->origin_router_id);
+    fprintf(out, "  |                     app_id = %d\n", msg->app_id);
+    fprintf(out, "  |                   my_N_hop = %d\n", msg->my_N_hop);
+    fprintf(out, "  |                   my_l_hop = %d\n", msg->my_l_hop);
+    fprintf(out, "  |                   my_g_hop = %d\n", msg->my_g_hop);
+    fprintf(out, "  |          my_hops_cur_group = %d\n", msg->my_hops_cur_group);
+    fprintf(out, "  |              saved_channel = %d\n", msg->saved_channel);
+    fprintf(out, "  |                   saved_vc = %d\n", msg->saved_vc);
+    fprintf(out, "  |                  next_stop = %d\n", msg->next_stop);
+    fprintf(out, "  |        this_router_arrival = %f\n", msg->this_router_arrival);
+    fprintf(out, "  |    this_router_ptp_latency = %f\n", msg->this_router_ptp_latency);
+    fprintf(out, "  |                 intm_lp_id = %u\n", msg->intm_lp_id);
+    fprintf(out, "  |                   last_hop = %d\n", msg->last_hop);
+    fprintf(out, "  |            is_intm_visited = %d\n", msg->is_intm_visited);
+    fprintf(out, "  |                intm_rtr_id = %d\n", msg->intm_rtr_id);
+    fprintf(out, "  |                intm_grp_id = %d\n", msg->intm_grp_id);
+    fprintf(out, "  |             saved_src_dest = %d\n", msg->saved_src_dest);
+    fprintf(out, "  |             saved_src_chan = %d\n", msg->saved_src_chan);
+    fprintf(out, "  |                   chunk_id = %u\n", msg->chunk_id);
+    fprintf(out, "  |                packet_size = %u\n", msg->packet_size);
+    fprintf(out, "  |                 message_id = %u\n", msg->message_id);
+    fprintf(out, "  |                 total_size = %u\n", msg->total_size);
+    fprintf(out, "  |    remote_event_size_bytes = %d\n", msg->remote_event_size_bytes);
+    fprintf(out, "  |     local_event_size_bytes = %d\n", msg->local_event_size_bytes);
+    fprintf(out, "  |                   vc_index = %d\n", msg->vc_index);
+    fprintf(out, "  |                    rail_id = %d\n", msg->rail_id);
+    fprintf(out, "  |                output_chan = %d\n", msg->output_chan);
+    fprintf(out, "  |                   event_rc = <model_net_event_return object>\n");
+    fprintf(out, "  |                    is_pull = %d\n", msg->is_pull);
+    fprintf(out, "  |                  pull_size = %u\n", msg->pull_size);
+    fprintf(out, "  |                  path_type = %d\n", msg->path_type);
+    fprintf(out, "  |               saved_app_id = %d\n", msg->saved_app_id);
+    fprintf(out, "  | is_there_another_pckt_in_queue = %s\n", msg->is_there_another_pckt_in_queue ? "true" : "false");
+    fprintf(out, "  |                   num_rngs = %d\n", msg->num_rngs);
+    fprintf(out, "  |                    num_cll = %d\n", msg->num_cll);
+    fprintf(out, "  |             last_saved_qos = %d\n", msg->last_saved_qos);
+    fprintf(out, "  |                 qos_reset1 = %d\n", msg->qos_reset1);
+    fprintf(out, "  |                 qos_reset2 = %d\n", msg->qos_reset2);
+    fprintf(out, "  |              rc_is_qos_set = %d\n", msg->rc_is_qos_set);
+    fprintf(out, "  | *              rc_qos_data = %p\n", msg->rc_qos_data);
+    fprintf(out, "  | *            rc_qos_status = %p\n", msg->rc_qos_status);
+    fprintf(out, "  |            saved_send_loop = %d\n", msg->saved_send_loop);
+    fprintf(out, "  |       saved_available_time = %f\n", msg->saved_available_time);
+    fprintf(out, "  |              saved_min_lat = %f\n", msg->saved_min_lat);
+    fprintf(out, "  |             saved_avg_time = %f\n", msg->saved_avg_time);
+    fprintf(out, "  |             saved_rcv_time = %f\n", msg->saved_rcv_time);
+    fprintf(out, "  |            saved_busy_time = %f\n", msg->saved_busy_time);
+    fprintf(out, "  |           saved_total_time = %f\n", msg->saved_total_time);
+    fprintf(out, "  |          saved_sample_time = %f\n", msg->saved_sample_time);
+    fprintf(out, "  |             msg_start_time = %f\n", msg->msg_start_time);
+    fprintf(out, "  |       saved_busy_time_ross = %f\n", msg->saved_busy_time_ross);
+    fprintf(out, "  |      saved_fin_chunks_ross = %f\n", msg->saved_fin_chunks_ross);
+    fprintf(out, "  |   saved_last_in_queue_time = %f\n", msg->saved_last_in_queue_time);
+    fprintf(out, "  |    saved_next_packet_delay = %f\n", msg->saved_next_packet_delay);
+    fprintf(out, "  |           msg_new_mn_event = %f\n", msg->msg_new_mn_event);
+    fprintf(out, "  |         last_received_time = %f\n", msg->last_received_time);
+    fprintf(out, "  |             last_sent_time = %f\n", msg->last_sent_time);
+    fprintf(out, "  |        last_bufupdate_time = %f\n", msg->last_bufupdate_time);
+}
+//*** ---------- END OF reverse handler checking functions ---------- ***
+
 /* dragonfly compute node and router LP types */
-extern "C" {
 tw_lptype dragonfly_dally_lps[] =
 {
     // Terminal handling functions
@@ -6951,7 +7106,29 @@ tw_lptype dragonfly_dally_lps[] =
     },
     {NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0},
 };
-}
+
+crv_checkpointer dragonfly_dally_checkpointers[] = {
+    {
+        &dragonfly_dally_lps[0],
+        0,
+        (save_checkpoint_state_f) NULL,
+        (clean_checkpoint_state_f) NULL,
+        (check_states_f) NULL,
+        (print_lpstate_f) print_terminal_state,
+        (print_checkpoint_state_f) NULL,
+        (print_event_f) print_terminal_dally_message,
+    },
+    {
+        &dragonfly_dally_lps[1],
+        0,
+        (save_checkpoint_state_f) NULL,
+        (clean_checkpoint_state_f) NULL,
+        (check_states_f) NULL,
+        (print_lpstate_f) NULL,
+        (print_checkpoint_state_f) NULL,
+        (print_event_f) NULL,
+    },
+};
 
 /* returns the dragonfly lp type for lp registration */
 static const tw_lptype* dragonfly_dally_get_cn_lp_type(void)
@@ -8221,6 +8398,7 @@ struct model_net_method dragonfly_dally_method =
     (event_f)dragonfly_dally_terminal_congestion_event,
     (revent_f)dragonfly_dally_terminal_congestion_event_rc,
     (commit_f)dragonfly_dally_terminal_congestion_event_commit,
+    &dragonfly_dally_checkpointers[0],
 };
 
 struct model_net_method dragonfly_dally_router_method =
@@ -8248,6 +8426,7 @@ struct model_net_method dragonfly_dally_router_method =
     (event_f)dragonfly_dally_router_congestion_event,
     (revent_f)dragonfly_dally_router_congestion_event_rc,
     (commit_f)dragonfly_dally_router_congestion_event_commit,
+    &dragonfly_dally_checkpointers[1],
 };
 
 // #ifdef ENABLE_CORTEX
diff --git a/src/networks/model-net/dragonfly-plus.C b/src/networks/model-net/dragonfly-plus.C
index 96334f0a..141b7ce8 100644
--- a/src/networks/model-net/dragonfly-plus.C
+++ b/src/networks/model-net/dragonfly-plus.C
@@ -6619,6 +6619,7 @@ struct model_net_method dragonfly_plus_method = {
     NULL, //(final_f)dragonfly_plus_sample_fin,
     dfly_plus_register_model_types,
     dfly_plus_get_model_types,
+    NULL,
 };
 
 struct model_net_method dragonfly_plus_router_method = {
@@ -6640,6 +6641,7 @@ struct model_net_method dragonfly_plus_router_method = {
     NULL, //(final_f)dragonfly_plus_rsample_fin,
     dfly_plus_router_register_model_types,
     dfly_plus_router_get_model_types,
+    NULL,
 };
 
 // #ifdef ENABLE_CORTEX
diff --git a/src/networks/model-net/slimfly.c b/src/networks/model-net/slimfly.c
index 94188942..eee9cd74 100644
--- a/src/networks/model-net/slimfly.c
+++ b/src/networks/model-net/slimfly.c
@@ -4045,6 +4045,7 @@ struct model_net_method slimfly_method =
     NULL,
     slimfly_register_model_types,
     slimfly_get_cn_model_types,
+    NULL,
 };
 
 struct model_net_method slimfly_router_method =
@@ -4067,6 +4068,7 @@ struct model_net_method slimfly_router_method =
     NULL,
     slimfly_router_register_model_types,
     slimfly_get_router_model_types,
+    NULL,
 };
 
 

From e430feade5935287734ae913c1c146887ed70b04 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sun, 2 Mar 2025 15:51:08 -0500
Subject: [PATCH 105/188] Moving implementation of linked list equality to
 quicklist.h

---
 codes/quicklist.h                            | 33 ++++++++++++++++++++
 src/network-workloads/model-net-mpi-replay.c | 29 -----------------
 2 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/codes/quicklist.h b/codes/quicklist.h
index bacc2c44..0a73b761 100644
--- a/codes/quicklist.h
+++ b/codes/quicklist.h
@@ -30,6 +30,8 @@ extern "C" {
 #endif
 
 #include <stdlib.h>
+#include <assert.h>
+#include <stdbool.h>
 
 struct qlist_head {
         struct qlist_head *next, *prev;
@@ -311,6 +313,37 @@ static inline struct qlist_head * qlist_find(
     return NULL;
 }
 
+/**
+ * are_qlist_equal - determine if two qlists have the same elements
+ */
+static inline bool are_qlist_equal(struct qlist_head const * left, struct qlist_head const * right, unsigned int offset_ql, bool (cmp) (void *, void *)) {
+    int const num_elems = qlist_count(left);
+    if (num_elems != qlist_count(right)) {
+        return false;
+    }
+
+    // Checking element by element
+    int i = 0;
+    struct qlist_head * elem_left = left->next;
+    struct qlist_head * elem_right = right->next;
+    while (elem_left != left) {
+        char * entry_left = (char *)(elem_left) - offset_ql;
+        char * entry_right = (char *)(elem_right) - offset_ql;
+
+        if (!cmp(entry_left, entry_right)) {
+            return false;
+        }
+
+        elem_left = elem_left->next;
+        elem_right = elem_right->next;
+        i++;
+    }
+    assert(i == num_elems);
+    assert(elem_right == right);
+
+    return true;
+}
+
 /*
  * Local variables:
  *  c-indent-level: 4
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 902e82d9..41597968 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -3459,35 +3459,6 @@ static void free_qlist_cpy(struct qlist_head * into, unsigned int offset_ql) {
     }
 }
 
-// Assumes that ql is at the end of entry!!
-static bool are_qlist_equal(struct qlist_head const * left, struct qlist_head const * right, unsigned int offset_ql, bool (cmp) (void *, void *)) {
-    int const num_elems = qlist_count(left);
-    if (num_elems != qlist_count(right)) {
-        return false;
-    }
-
-    // Checking element by element
-    int i = 0;
-    struct qlist_head * elem_left = left->next;
-    struct qlist_head * elem_right = right->next;
-    while (elem_left != left) {
-        char * entry_left = (char *)(elem_left) - offset_ql;
-        char * entry_right = (char *)(elem_right) - offset_ql;
-
-        if (!cmp(entry_left, entry_right)) {
-            return false;
-        }
-
-        elem_left = elem_left->next;
-        elem_right = elem_right->next;
-        i++;
-    }
-    assert(i == num_elems);
-    assert(elem_right == right);
-
-    return true;
-}
-
 bool compare_pending_waits(struct pending_waits const * before, struct pending_waits const * after) {
     // if one is null and the other isn't, then they're not equal
     if ((before == NULL) != (after == NULL)) {

From 8b95a700a58941580dca3f3508b19a9dd4aa939e Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sun, 2 Mar 2025 15:54:15 -0500
Subject: [PATCH 106/188] Fixing some potential memory errors (from Valgrind)

---
 src/networks/model-net/dragonfly-dally.C | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 76496e9f..6675ca4b 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -1858,7 +1858,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
     if (p->num_rails % p->num_planes != 0)
         tw_error(TW_LOC, "Number of rails not evenly divisible by number of planes!\n");
 
-    char rail_select_str[MAX_NAME_LENGTH];
+    char rail_select_str[MAX_NAME_LENGTH] = {'\0'};
     rc = configuration_get_value(&config, "PARAMS", "rail_select", anno, rail_select_str,
             MAX_NAME_LENGTH);
     if(strcmp(rail_select_str, "dedicated") == 0)
@@ -1883,7 +1883,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
             fprintf(stderr, "global_k_picks for global adaptive routing not specified, setting to %d\n",p->global_k_picks);
     }
 
-    char scoring_str[MAX_NAME_LENGTH];
+    char scoring_str[MAX_NAME_LENGTH] = {'\0'};
     configuration_get_value(&config, "PARAMS", "route_scoring_metric", anno, scoring_str, MAX_NAME_LENGTH);
     if (strcmp(scoring_str, "alpha") == 0) {
         scoring = ALPHA;
@@ -1978,7 +1978,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
 
     // read intra group connections, store from a router's perspective
     // all links to the same router form a vector
-    char intraFile[MAX_NAME_LENGTH];
+    char intraFile[MAX_NAME_LENGTH] = {'\0'};
     configuration_get_value(&config, "PARAMS", "intra-group-connections", 
         anno, intraFile, MAX_NAME_LENGTH);
     if (strlen(intraFile) <= 0) {
@@ -2035,7 +2035,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
 
     // read inter group connections, store from a router's perspective
     // also create a group level table that tells all the connecting routers
-    char interFile[MAX_NAME_LENGTH];
+    char interFile[MAX_NAME_LENGTH] = {'\0'};
     configuration_get_value(&config, "PARAMS", "inter-group-connections", 
         anno, interFile, MAX_NAME_LENGTH);
     if(strlen(interFile) <= 0) {
@@ -2100,7 +2100,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
 
 
     //read link failure file
-    char failureFileName[MAX_NAME_LENGTH];   
+    char failureFileName[MAX_NAME_LENGTH] = {'\0'};
     failureFileName[0] = '\0';
 
     if (strlen(g_nm_link_failure_filepath) == 0) //was this defined already via a command line argument?

From c9729d81a7213e452ad4755330fe355331aa3661 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sun, 2 Mar 2025 16:06:12 -0500
Subject: [PATCH 107/188] Extending implementation of model-net checkpointer

---
 codes/model-net-sched.h                       |   7 +
 src/networks/model-net/core/model-net-lp.c    | 164 +++++++++++++++---
 src/networks/model-net/core/model-net-sched.c |  60 +++++++
 3 files changed, 211 insertions(+), 20 deletions(-)

diff --git a/codes/model-net-sched.h b/codes/model-net-sched.h
index 9f685b85..da28ddc2 100644
--- a/codes/model-net-sched.h
+++ b/codes/model-net-sched.h
@@ -197,6 +197,13 @@ void model_net_sched_add_rc(
 // set default parameters for messages that don't specify any
 void model_net_sched_set_default_params(mn_sched_params *sched_params);
 
+// Reverse handler functionality
+void save_model_net_sched(model_net_sched *before, model_net_sched const *after);
+void clean_model_net_sched(model_net_sched *before);
+bool check_model_net_sched(model_net_sched *before, model_net_sched *after);
+void print_model_net_sched(FILE * out, model_net_sched *sched);
+void print_model_net_sched_checkpoint(FILE * out, model_net_sched *sched);
+
 extern char * sched_names[];
 
 #ifdef __cplusplus
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 8a52c7da..966137fe 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -131,19 +131,23 @@ tw_lptype model_net_base_lp = {
     sizeof(model_net_base_state),
 };
 
-// Functionality to check for correct implementation of reverse event handler 
+// Functionality to check for correct implementation of reverse event handler
+static void save_state_net_state(model_net_base_state * into, model_net_base_state const * from);
+static void clean_state_net_state(model_net_base_state * state);
+static bool check_model_net_state(model_net_base_state * before, model_net_base_state * after);
 static void print_model_net_state(FILE * out, model_net_base_state * state);
+static void print_model_net_checkpoint(FILE * out, model_net_base_state * state);
 static void print_event_state(FILE * out, model_net_wrap_msg * state);
 
 // ROSS function pointer table to check reverse event handler
 crv_checkpointer model_net_chkptr = {
     &model_net_base_lp,
     0,
-    (save_checkpoint_state_f) NULL,
-    (clean_checkpoint_state_f) NULL,
-    (check_states_f) NULL,
+    (save_checkpoint_state_f) save_state_net_state,
+    (clean_checkpoint_state_f) clean_state_net_state,
+    (check_states_f) check_model_net_state,
     (print_lpstate_f) print_model_net_state,
-    (print_checkpoint_state_f) print_model_net_state,
+    (print_checkpoint_state_f) print_model_net_checkpoint,
     (print_event_f) print_event_state,
 };
 
@@ -1140,22 +1144,140 @@ tw_event* model_net_method_congestion_event(tw_lpid dest_gid,
 }
 
 /* START Checking reverse handler functionality */
+static void save_state_net_state(model_net_base_state * into, model_net_base_state const * from) {
+    memcpy(into, from, sizeof(model_net_base_state));
+
+    into->in_sched_send_loop = malloc(from->params->num_queues * sizeof(int));
+    for (int i=0; i < from->params->num_queues; i++) {
+        into->in_sched_send_loop[i] = from->in_sched_send_loop[i];
+    }
+
+    into->sched_send = malloc(from->params->num_queues * sizeof(model_net_sched*));
+    if (from->params->num_queues > 0) {
+        model_net_sched * sched_send_array = malloc(from->params->num_queues * sizeof(model_net_sched));
+        for(int i = 0; i < from->params->num_queues; i++) {
+            into->sched_send[i] = &sched_send_array[i];
+            save_model_net_sched(into->sched_send[i], from->sched_send[i]);
+        }
+    }
+
+    into->sched_recv = malloc(sizeof(model_net_sched));
+    save_model_net_sched(into->sched_recv, from->sched_recv);
+
+    into->sub_state = NULL;
+    crv_checkpointer * chptr = method_array[from->net_id]->checkpointer;
+    if (chptr && chptr->check_lps) {
+        into->sub_state = calloc(1, from->sub_type->state_sz);
+        chptr->save_lp(into->sub_state, from->sub_state);
+    }
+
+    into->node_copy_next_available_time = malloc(from->params->node_copy_queues * sizeof(tw_stime));
+    for (int i=0; i < from->params->node_copy_queues; i++) {
+        into->node_copy_next_available_time[i] = from->node_copy_next_available_time[i];
+    }
+}
+
+static void clean_state_net_state(model_net_base_state * state) {
+    free(state->in_sched_send_loop);
+
+    if (state->params->num_queues > 0) {
+        for(int i = 0; i < state->params->num_queues; i++) {
+            clean_model_net_sched(state->sched_send[i]);
+        }
+    }
+    free(state->sched_send[0]);
+    free(state->sched_send);
+    clean_model_net_sched(state->sched_recv);
+    free(state->sched_recv);
+
+    if (state->sub_state != NULL) {
+        free(state->sub_state);
+    }
+    free(state->node_copy_next_available_time);
+}
+
+static bool check_model_net_state(model_net_base_state * before, model_net_base_state * after) {
+    bool is_same = true;
+    is_same &= before->net_id == after->net_id;
+    is_same &= before->nics_per_router == after->nics_per_router;
+    for (int i=0; i < before->params->num_queues; i++) {
+        is_same &= before->in_sched_send_loop[i] == after->in_sched_send_loop[i];
+    }
+    is_same &= before->in_sched_recv_loop == after->in_sched_recv_loop;
+    is_same &= before->msg_id == after->msg_id;
+    for(int i = 0; i < before->params->num_queues; i++) {
+        is_same &= check_model_net_sched(before->sched_send[i], after->sched_send[i]);
+    }
+    is_same &= check_model_net_sched(before->sched_recv, after->sched_recv);
+    crv_checkpointer * chptr = method_array[before->net_id]->checkpointer;
+    if (chptr && before->sub_state != NULL && chptr->check_lps) {
+        is_same &= chptr->check_lps(before->sub_state, after->sub_state);
+    } else {
+        tw_error(TW_LOC, "Network of type \"%s\" has not been configured to be checkpointed", model_net_method_names[before->net_id]);
+    }
+    is_same &= before->next_available_time == after->next_available_time;
+    for (int i=0; i < before->params->node_copy_queues; i++) {
+        is_same &= before->node_copy_next_available_time[i] == after->node_copy_next_available_time[i];
+    }
+
+    return is_same;
+}
+
+static void __print_model_net(FILE * out, model_net_base_state * state, bool is_lp_state) {
+    fprintf(out, "model_net_state ->\n");
+    fprintf(out, "  |              net_id = %d\n", state->net_id);
+    fprintf(out, "  |     nics_per_router = %d\n", state->nics_per_router);
+    fprintf(out, "  | *in_sched_send_loop[%d] = [", state->params->num_queues);  // (done) deep-all
+    for (int i=0; i < state->params->num_queues; i++) {
+        fprintf(out, "%d%s", state->in_sched_send_loop[i], i==state->params->num_queues-1 ? "" : ", ");
+    }
+    fprintf(out, "]\n");
+    fprintf(out, "  |  in_sched_recv_loop = %d\n", state->in_sched_recv_loop);
+    fprintf(out, "  |              msg_id = %lu\n", state->msg_id);
+    fprintf(out, "  | **       sched_send = %p\n", state->sched_send);  // (done) deep-all
+    fprintf(out, "  | *        sched_recv = %p\n", state->sched_recv);  // (done) deep-all
+    fprintf(out, "  | *            params = %p\n", state->params);
+    fprintf(out, "  | *          sub_type = %p\n", state->sub_type);
+    fprintf(out, "  | *    sub_model_type = %p\n", state->sub_model_type);
+    fprintf(out, "  | *         sub_state = %p\n", state->sub_state);  // deep-all
+    fprintf(out, "  | next_available_time = %f\n", state->next_available_time);
+    fprintf(out, "  | *node_copy_next_available_time[%d] = [", state->params->num_queues);  // (done) deep-all
+    for (int i=0; i < state->params->node_copy_queues; i++) {
+        fprintf(out, "%g%s", state->node_copy_next_available_time[i], i==state->params->node_copy_queues-1 ? "" : ", ");
+    }
+    fprintf(out, "]\n");
+    fprintf(out, "  | *sched_loop_pre_surrogate = %p\n", state->sched_loop_pre_surrogate);  // no need to check
+    fprintf(out, "  | sched_recv_loop_pre_surrogate = %d\n", state->sched_recv_loop_pre_surrogate);  // no need to check
+
+    void (*print_modelnet) (FILE * out, model_net_sched *sched) = is_lp_state ? print_model_net_sched : print_model_net_sched_checkpoint;
+
+    fprintf(out, "\n");
+    for(int i = 0; i < state->params->num_queues; i++) {
+        fprintf(out, "==== CONTENT for sched_send[%d]:\n", i);
+        print_modelnet(stderr, state->sched_send[i]);
+    }
+
+    fprintf(out, "\n==== CONTENT for sched_recv:\n");
+    print_modelnet(stderr, state->sched_recv);
+
+    crv_checkpointer * chptr = method_array[state->net_id]->checkpointer;
+    if (chptr && state->sub_state != NULL) {
+        if (is_lp_state && chptr->print_lp) {
+            fprintf(out, "\n==== CONTENT for sub_state:\n");
+            chptr->print_lp(out, state->sub_state);
+        }
+        if (!is_lp_state && chptr->print_checkpoint) {
+            fprintf(out, "\n==== CONTENT for sub_state:\n");
+            chptr->print_checkpoint(out, state->sub_state);
+        }
+    }
+}
+
 static void print_model_net_state(FILE * out, model_net_base_state * state) {
-    fprintf(out, "             net_id = %d\n", state->net_id);
-    fprintf(out, "    nics_per_router = %d\n", state->nics_per_router);
-    fprintf(out, "*in_sched_send_loop = %p\n", state->in_sched_send_loop);
-    fprintf(out, " in_sched_recv_loop = %d\n", state->in_sched_recv_loop);
-    fprintf(out, "             msg_id = %lu\n", state->msg_id);
-    fprintf(out, "**       sched_send = %p\n", state->sched_send);
-    fprintf(out, "*        sched_recv = %p\n", state->sched_recv);
-    fprintf(out, "*            params = %p\n", state->params);
-    fprintf(out, "*          sub_type = %p\n", state->sub_type);
-    fprintf(out, "*    sub_model_type = %p\n", state->sub_model_type);
-    fprintf(out, "*         sub_state = %p\n", state->sub_state);
-    fprintf(out, "next_available_time = %f\n", state->next_available_time);
-    fprintf(out, "*node_copy_next_available_time = %p\n", state->node_copy_next_available_time);
-    fprintf(out, "*sched_loop_pre_surrogate = %p\n", state->sched_loop_pre_surrogate);
-    fprintf(out, "sched_recv_loop_pre_surrogate = %d\n", state->sched_recv_loop_pre_surrogate);
+    __print_model_net(out, state, true);
+}
+static void print_model_net_checkpoint(FILE * out, model_net_base_state * state) {
+    __print_model_net(out, state, false);
 }
 
 static void print_type(FILE * out, enum model_net_base_event_type type) {
@@ -1229,6 +1351,8 @@ static void print_event_state(FILE * out, model_net_wrap_msg * msg) {
         default:
             fprintf(out, "The content of this message cannot be deciphered yet with the information given\n");
     }
+    // TODO: print internal state of message
+    // void * sub_msg = ((char*)msg) + msg_offsets[state->net_id];
 }
 
 /* END checking reverse handler functionality */
diff --git a/src/networks/model-net/core/model-net-sched.c b/src/networks/model-net/core/model-net-sched.c
index ca31659a..9fefa30d 100644
--- a/src/networks/model-net/core/model-net-sched.c
+++ b/src/networks/model-net/core/model-net-sched.c
@@ -80,6 +80,66 @@ void model_net_sched_set_default_params(mn_sched_params *sched_params){
     sched_params->prio = -1;
 }
 
+/* START Checking reverse handler functionality */
+void save_model_net_sched(model_net_sched *into, model_net_sched const *from) {
+    into->type = from->type;
+
+    into->dat = NULL;
+    crv_checkpointer const * chptr = sched_checkpointers[from->type];
+    if (chptr && chptr->save_lp) {
+        into->dat = malloc(chptr->sz_storage);
+        chptr->save_lp(into->dat, from->dat);
+    }
+}
+
+void clean_model_net_sched(model_net_sched *state) {
+    if (state->dat) {
+        crv_checkpointer const * chptr = sched_checkpointers[state->type];
+        assert (chptr && chptr->clean_lp);
+        chptr->clean_lp(state->dat);
+        free(state->dat);
+    }
+}
+
+bool check_model_net_sched(
+    model_net_sched *before,
+    model_net_sched *after
+) {
+    crv_checkpointer const * chptr = sched_checkpointers[before->type];
+    if (before->dat != NULL && chptr && chptr->check_lps) {
+        return chptr->check_lps(before->dat, after->dat);
+    }
+    tw_error(TW_LOC, "Scheduler of type \"%s\" has not been configured to be checkpointed", sched_names[before->type]);
+    return false;
+}
+
+static void __print_model_net_sched(
+    FILE * out,
+    model_net_sched *sched,
+    bool is_lp_state
+) {
+    crv_checkpointer const * chptr = sched_checkpointers[sched->type];
+    fprintf(out, "model_net_sched.sched_type = %d\n", sched->type);
+    fprintf(out, "model_net_sched.\n");
+    if (chptr) {
+        if (is_lp_state && chptr->print_lp) {
+            chptr->print_lp(out, sched->dat);
+        }
+        if (!is_lp_state && chptr->print_checkpoint) {
+            chptr->print_checkpoint(out, sched->dat);
+        }
+    }
+}
+
+void print_model_net_sched(FILE * out, model_net_sched *sched) {
+    __print_model_net_sched(out, sched, true);
+}
+
+void print_model_net_sched_checkpoint(FILE * out, model_net_sched *sched) {
+    __print_model_net_sched(out, sched, false);
+}
+/* STOP Checking reverse handler functionality */
+
 /*
  * Local variables:
  *  c-indent-level: 4

From 7bc29c21df10be26586528bddcdbdd386d934a6e Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sun, 2 Mar 2025 16:08:30 -0500
Subject: [PATCH 108/188] Implementing FCFS checkpointer

---
 codes/model-net-sched.h                       |  15 +-
 codes/model-net.h                             |   3 +
 src/networks/model-net/core/model-net-lp.c    |  26 +++-
 .../model-net/core/model-net-sched-impl.c     | 128 +++++++++++++++++-
 src/networks/model-net/core/model-net-sched.c |   2 +-
 5 files changed, 164 insertions(+), 10 deletions(-)

diff --git a/codes/model-net-sched.h b/codes/model-net-sched.h
index da28ddc2..ad7ccf6e 100644
--- a/codes/model-net-sched.h
+++ b/codes/model-net-sched.h
@@ -23,16 +23,16 @@ typedef struct mn_sched_params_s mn_sched_params;
 #include "model-net-method.h"
 
 /// types of schedulers
-/// format: enum type, config string, function pointer names
+/// format: enum type, config string, function pointer names, crv_checkpointer instance
 /// fcfs-full eschews packetization
 #define SCHEDULER_TYPES \
-    X(MN_SCHED_FCFS,      "fcfs",        &fcfs_tab) \
-    X(MN_SCHED_FCFS_FULL, "fcfs-full",   &fcfs_tab) \
-    X(MN_SCHED_RR,        "round-robin", &rr_tab) \
-    X(MN_SCHED_PRIO,      "priority",    &prio_tab) \
-    X(MAX_SCHEDS,         NULL,          NULL)
+    X(MN_SCHED_FCFS,      "fcfs",        &fcfs_tab, &fcfs_chptr) \
+    X(MN_SCHED_FCFS_FULL, "fcfs-full",   &fcfs_tab, &fcfs_chptr) \
+    X(MN_SCHED_RR,        "round-robin", &rr_tab,   NULL) \
+    X(MN_SCHED_PRIO,      "priority",    &prio_tab, NULL) \
+    X(MAX_SCHEDS,         NULL,          NULL,      NULL)
 
-#define X(a,b,c) a,
+#define X(a,b,c,d) a,
 enum sched_type {
     SCHEDULER_TYPES
 };
@@ -205,6 +205,7 @@ void print_model_net_sched(FILE * out, model_net_sched *sched);
 void print_model_net_sched_checkpoint(FILE * out, model_net_sched *sched);
 
 extern char * sched_names[];
+extern const crv_checkpointer * sched_checkpointers[];
 
 #ifdef __cplusplus
 }
diff --git a/codes/model-net.h b/codes/model-net.h
index a529627c..f003cc10 100644
--- a/codes/model-net.h
+++ b/codes/model-net.h
@@ -163,6 +163,9 @@ struct mn_stats
     long max_event_size;
 };
 
+bool check_model_net_request(model_net_request const * before, model_net_request const * after);
+void print_model_net_request(FILE * out, char const * before, model_net_request * item);
+
 /* Registers all model-net LPs in ROSS. Should be called after
  * configuration_load, but before codes_mapping_setup */
 void model_net_register();
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 966137fe..cbf49b54 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -1303,7 +1303,31 @@ static void print_type(FILE * out, enum model_net_base_event_type type) {
     }
 }
 
-static void print_model_net_request(FILE * out, char const * starts_with, model_net_request * req) {
+// Used Claude for an initial draft of this function
+bool check_model_net_request(model_net_request const * before, model_net_request const * after) {
+    bool is_same = true;
+
+    is_same &= (before->final_dest_lp == after->final_dest_lp);
+    is_same &= (before->dest_mn_lp == after->dest_mn_lp);
+    is_same &= (before->src_lp == after->src_lp);
+    is_same &= (before->msg_start_time == after->msg_start_time);
+    is_same &= (before->msg_new_mn_event == after->msg_new_mn_event);
+    is_same &= (before->msg_size == after->msg_size);
+    is_same &= (before->pull_size == after->pull_size);
+    is_same &= (before->packet_size == after->packet_size);
+    is_same &= (before->msg_id == after->msg_id);
+    is_same &= (before->net_id == after->net_id);
+    is_same &= (before->is_pull == after->is_pull);
+    is_same &= (before->queue_offset == after->queue_offset);
+    is_same &= (before->remote_event_size == after->remote_event_size);
+    is_same &= (before->self_event_size == after->self_event_size);
+    is_same &= (before->app_id == after->app_id);
+    is_same &= (strncmp(before->category, after->category, CATEGORY_NAME_MAX) == 0);
+
+    return is_same;
+}
+
+void print_model_net_request(FILE * out, char const * starts_with, model_net_request * req) {
     fprintf(out, "%sfinal_dest_lp = %ld\n", starts_with, req->final_dest_lp);
     fprintf(out, "%sdest_mn_lp = %ld\n", starts_with, req->dest_mn_lp);
     fprintf(out, "%ssrc_lp = %ld\n", starts_with, req->src_lp);
diff --git a/src/networks/model-net/core/model-net-sched-impl.c b/src/networks/model-net/core/model-net-sched-impl.c
index 3c3d25a9..e5f2d9e2 100644
--- a/src/networks/model-net/core/model-net-sched-impl.c
+++ b/src/networks/model-net/core/model-net-sched-impl.c
@@ -83,6 +83,10 @@ static void fcfs_next_rc(
         const void               * rc_event_save,
         const model_net_sched_rc * rc,
         tw_lp                    * lp);
+static void save_state_fcfs_state(mn_sched_queue * into, mn_sched_queue const * from);
+static void clean_state_fcfs_state(mn_sched_queue * into);
+static bool check_fcfs_state(mn_sched_queue *before, mn_sched_queue *after);
+static void print_fcfs_state(FILE * out, mn_sched_queue *sched);
 
 // ROUND-ROBIN
 static void rr_init (
@@ -150,12 +154,29 @@ static const model_net_sched_interface rr_tab =
 static const model_net_sched_interface prio_tab =
 { &prio_init, &prio_destroy, &prio_add, &prio_add_rc, &prio_next, &prio_next_rc};
 
-#define X(a,b,c) c,
+static const crv_checkpointer fcfs_chptr = {
+    NULL,
+    sizeof(mn_sched_queue),
+    (save_checkpoint_state_f) save_state_fcfs_state,
+    (clean_checkpoint_state_f) clean_state_fcfs_state,
+    (check_states_f) check_fcfs_state,
+    (print_lpstate_f) print_fcfs_state,
+    (print_checkpoint_state_f) print_fcfs_state,
+    NULL,
+};
+
+#define X(a,b,c,d) c,
 const model_net_sched_interface * sched_interfaces[] = {
     SCHEDULER_TYPES
 };
 #undef X
 
+#define X(a,b,c,d) d,
+const crv_checkpointer * sched_checkpointers[] = {
+    SCHEDULER_TYPES
+};
+#undef X
+
 /// FCFS implementation
 
 void fcfs_init(
@@ -192,11 +213,13 @@ void fcfs_add (
     q->req = *req;
     q->sched_params = *sched_params;
     q->rem = req->msg_size;
+    assert(req->remote_event_size == remote_event_size);
     if (remote_event_size > 0){
         q->remote_event = malloc(remote_event_size);
         memcpy(q->remote_event, remote_event, remote_event_size);
     }
     else { q->remote_event = NULL; }
+    assert(req->self_event_size == local_event_size);
     if (local_event_size > 0){
         q->local_event = malloc(local_event_size);
         memcpy(q->local_event, local_event, local_event_size);
@@ -364,6 +387,109 @@ void fcfs_next_rc(
     }
 }
 
+static void save_mn_sched_qitem(mn_sched_qitem * into, mn_sched_qitem const * from) {
+    into->req = from->req;
+    into->sched_params = from->sched_params;
+    into->rem = from->rem;
+    into->entry_time = from->entry_time;
+    if (from->remote_event != NULL) {
+        assert(from->req.remote_event_size > 0);
+        into->remote_event = malloc(from->req.remote_event_size);
+        memcpy(into->remote_event, from->remote_event, from->req.remote_event_size);
+    }
+    if (from->local_event != NULL) {
+        assert(from->req.self_event_size > 0);
+        into->local_event = malloc(from->req.self_event_size);
+        memcpy(into->local_event, from->local_event, from->req.self_event_size);
+    }
+}
+
+static void save_state_fcfs_state(mn_sched_queue * into, mn_sched_queue const * from) {
+    into->method = from->method;
+    into->is_recv_queue = from->is_recv_queue;
+    into->queue_len = from->queue_len;
+    INIT_QLIST_HEAD(&into->reqs);
+
+    mn_sched_qitem * sched_qitem = NULL;
+    qlist_for_each_entry(sched_qitem, &from->reqs, ql) {
+        mn_sched_qitem * new_sched_qitem = malloc(sizeof(mn_sched_qitem));
+        save_mn_sched_qitem(new_sched_qitem, sched_qitem);
+        qlist_add_tail(&new_sched_qitem->ql, &into->reqs);
+    }
+}
+
+static void clean_mn_sched_qitem(mn_sched_qitem * into) {
+    if (into->remote_event != NULL) {
+        free(into->remote_event);
+    }
+    if (into->local_event != NULL) {
+        free(into->local_event);
+    }
+}
+
+static void clean_state_fcfs_state(mn_sched_queue * into) {
+    mn_sched_qitem * sched_qitem = NULL;
+    mn_sched_qitem * _ = NULL;
+    qlist_for_each_entry_safe(sched_qitem, _, &into->reqs, ql) {
+        clean_mn_sched_qitem(sched_qitem);
+        qlist_del(&sched_qitem->ql);
+        free(sched_qitem);
+    }
+}
+
+static bool check_mn_sched_qitem(mn_sched_qitem * before, mn_sched_qitem * after) {
+    bool is_same = true;
+
+    is_same &= check_model_net_request(&before->req, &after->req);
+    is_same &= before->sched_params.prio == after->sched_params.prio;
+    is_same &= before->rem == after->rem;
+    is_same &= before->entry_time == after->entry_time;
+    is_same &= !memcmp(before->remote_event, after->remote_event, before->req.remote_event_size);
+    is_same &= !memcmp(before->local_event, after->local_event, before->req.self_event_size);
+    return is_same;
+}
+
+static bool check_fcfs_state(mn_sched_queue * before, mn_sched_queue * after) {
+    bool is_same = true;
+
+    is_same &= before->is_recv_queue == after->is_recv_queue;
+    is_same &= before->queue_len == after->queue_len;
+
+    if (qlist_count(&before->reqs) != qlist_count(&before->reqs)) {
+        return false;
+    }
+
+    is_same &= are_qlist_equal(&before->reqs, &after->reqs, QLIST_OFFSET(mn_sched_qitem, ql), (bool (*) (void *, void *)) check_mn_sched_qitem);
+
+    return is_same;
+}
+
+static void print_mn_sched_qitem(FILE * out, mn_sched_qitem * item) {
+    fprintf(out, "     mn_sched_qitem\n");
+    fprintf(out, "       | .req\n");
+    print_model_net_request(out, "       |     |.", &item->req);
+    fprintf(out, "       | sched_params.prio = %d\n", item->sched_params.prio);
+    fprintf(out, "       |               rem = %lu\n", item->rem);
+    fprintf(out, "       |        entry_time = %g\n", item->entry_time);
+    fprintf(out, "       |      remote_event = %p (contents below)\n", item->remote_event);
+    tw_fprint_binary_array(out, item->remote_event, item->req.remote_event_size);
+    fprintf(out, "       |       local_event = %p (contents below)\n", item->local_event);
+    tw_fprint_binary_array(out, item->local_event, item->req.self_event_size);
+}
+
+static void print_fcfs_state(FILE * out, mn_sched_queue *sched) {
+    fprintf(out, "FCFS:\n");
+    fprintf(out, "   |        .method = %p\n", sched->method);
+    fprintf(out, "   | .is_recv_queue = %d\n", sched->is_recv_queue);
+    fprintf(out, "   |     .queue_len = %d\n", sched->queue_len);
+    fprintf(out, "   |      .reqs[%d] = {\n", qlist_count(&sched->reqs));
+    mn_sched_qitem * sched_qitem = NULL;
+    qlist_for_each_entry(sched_qitem, &sched->reqs, ql) {
+         print_mn_sched_qitem(out, sched_qitem);
+    }
+    fprintf(out, "}\n");
+}
+
 void rr_init (
         const struct model_net_method     * method,
         const model_net_sched_cfg_params  * params,
diff --git a/src/networks/model-net/core/model-net-sched.c b/src/networks/model-net/core/model-net-sched.c
index 9fefa30d..ed280e19 100644
--- a/src/networks/model-net/core/model-net-sched.c
+++ b/src/networks/model-net/core/model-net-sched.c
@@ -14,7 +14,7 @@
 #include "codes/model-net-sched-impl.h"
 #include "codes/quicklist.h"
 
-#define X(a,b,c) b,
+#define X(a,b,c,d) b,
 char * sched_names [] = {
     SCHEDULER_TYPES
 };

From d48898a4944f13e6de6c95e5794ea92c7af107cb Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 4 Mar 2025 12:03:52 -0500
Subject: [PATCH 109/188] Removing never used struct param `entry_time`

---
 src/network-workloads/model-net-mpi-replay.c       |  2 +-
 src/networks/model-net/core/model-net-lp.c         | 13 +++++++++++++
 src/networks/model-net/core/model-net-sched-impl.c |  5 -----
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 41597968..2501c5ac 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -41,7 +41,7 @@
 #define MAX_PERIODS_PER_APP 512
 #define NEAR_ZERO .0001 //timestamp for use to be 'close to zero' but still allow progress, zero offset events are hard on the PDES engine
 #define OUTPUT_MARKS 0
-#define LP_DEBUG 1
+#define LP_DEBUG 0
 
 static int msg_size_hash_compare(
             void *key, struct qhash_head *link);
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index cbf49b54..2cc1d516 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -16,6 +16,7 @@
 #define MN_NAME "model_net_base"
 
 #define DEBUG 0
+#define MODELNET_LP_DEBUG 1
 /**** BEGIN SIMULATION DATA STRUCTURES ****/
 
 int model_net_base_magic;
@@ -48,6 +49,9 @@ static int servers_per_node_queue = -1;
 extern tw_stime codes_cn_delay;
 
 typedef struct model_net_base_state {
+#if MODELNET_LP_DEBUG
+	size_t num_events_processed;
+#endif /* if MODELNET_LP_DEBUG */
     int net_id, nics_per_router;
     // whether scheduler loop is running
     int *in_sched_send_loop, in_sched_recv_loop;
@@ -592,6 +596,9 @@ void model_net_base_event(
         model_net_wrap_msg * m,
         tw_lp * lp){
     memset(b, 0, sizeof(tw_bf));
+#if MODELNET_LP_DEBUG
+    ns->num_events_processed++;
+#endif /* if MODELNET_LP_DEBUG */
 
     if(m->h.magic != model_net_base_magic)
         printf("\n LP ID mismatched %llu\n", LLU(lp->gid));
@@ -644,6 +651,9 @@ void model_net_base_event_rc(
         model_net_wrap_msg * m,
         tw_lp * lp){
     assert(m->h.magic == model_net_base_magic);
+#if MODELNET_LP_DEBUG
+    ns->num_events_processed--;
+#endif /* if MODELNET_LP_DEBUG */
 
     if(!is_freezing_on && m->h.event_type == MN_BASE_SCHED_NEXT && m->msg.m_base.created_in_surrogate) {
         return;
@@ -1225,6 +1235,9 @@ static bool check_model_net_state(model_net_base_state * before, model_net_base_
 
 static void __print_model_net(FILE * out, model_net_base_state * state, bool is_lp_state) {
     fprintf(out, "model_net_state ->\n");
+#if MODELNET_LP_DEBUG
+    fprintf(out, "  |num_events_processed = %zu\n", state->num_events_processed);
+#endif /* if MODELNET_LP_DEBUG */
     fprintf(out, "  |              net_id = %d\n", state->net_id);
     fprintf(out, "  |     nics_per_router = %d\n", state->nics_per_router);
     fprintf(out, "  | *in_sched_send_loop[%d] = [", state->params->num_queues);  // (done) deep-all
diff --git a/src/networks/model-net/core/model-net-sched-impl.c b/src/networks/model-net/core/model-net-sched-impl.c
index e5f2d9e2..c23bd935 100644
--- a/src/networks/model-net/core/model-net-sched-impl.c
+++ b/src/networks/model-net/core/model-net-sched-impl.c
@@ -27,7 +27,6 @@ typedef struct mn_sched_qitem {
     mn_sched_params sched_params;
     // remaining bytes to send
     uint64_t rem;
-    tw_stime entry_time;
     // pointers to event structures
     // sizes are given in the request struct
     void * remote_event;
@@ -209,7 +208,6 @@ void fcfs_add (
         tw_lp                   * lp){
     (void)rc; // unneeded for fcfs
     mn_sched_qitem *q = malloc(sizeof(mn_sched_qitem));
-    q->entry_time = tw_now(lp);
     q->req = *req;
     q->sched_params = *sched_params;
     q->rem = req->msg_size;
@@ -391,7 +389,6 @@ static void save_mn_sched_qitem(mn_sched_qitem * into, mn_sched_qitem const * fr
     into->req = from->req;
     into->sched_params = from->sched_params;
     into->rem = from->rem;
-    into->entry_time = from->entry_time;
     if (from->remote_event != NULL) {
         assert(from->req.remote_event_size > 0);
         into->remote_event = malloc(from->req.remote_event_size);
@@ -443,7 +440,6 @@ static bool check_mn_sched_qitem(mn_sched_qitem * before, mn_sched_qitem * after
     is_same &= check_model_net_request(&before->req, &after->req);
     is_same &= before->sched_params.prio == after->sched_params.prio;
     is_same &= before->rem == after->rem;
-    is_same &= before->entry_time == after->entry_time;
     is_same &= !memcmp(before->remote_event, after->remote_event, before->req.remote_event_size);
     is_same &= !memcmp(before->local_event, after->local_event, before->req.self_event_size);
     return is_same;
@@ -470,7 +466,6 @@ static void print_mn_sched_qitem(FILE * out, mn_sched_qitem * item) {
     print_model_net_request(out, "       |     |.", &item->req);
     fprintf(out, "       | sched_params.prio = %d\n", item->sched_params.prio);
     fprintf(out, "       |               rem = %lu\n", item->rem);
-    fprintf(out, "       |        entry_time = %g\n", item->entry_time);
     fprintf(out, "       |      remote_event = %p (contents below)\n", item->remote_event);
     tw_fprint_binary_array(out, item->remote_event, item->req.remote_event_size);
     fprintf(out, "       |       local_event = %p (contents below)\n", item->local_event);

From fab09e8143cad00bf2424ecb0628659befad8b71 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 4 Mar 2025 20:51:01 -0500
Subject: [PATCH 110/188] Printing lp states and events with a prefix (prettier
 printing)

---
 codes/codes-workload.h                        |   2 +-
 codes/model-net-sched.h                       |   4 +-
 src/network-workloads/model-net-mpi-replay.c  | 289 +++++++++--------
 src/networks/model-net/core/model-net-lp.c    | 207 ++++++------
 .../model-net/core/model-net-sched-impl.c     |  42 +--
 src/networks/model-net/core/model-net-sched.c |  22 +-
 src/networks/model-net/dragonfly-dally.C      | 296 +++++++++---------
 src/workload/codes-workload.c                 |  76 ++---
 8 files changed, 479 insertions(+), 459 deletions(-)

diff --git a/codes/codes-workload.h b/codes/codes-workload.h
index 5ac6b333..4722b5a4 100644
--- a/codes/codes-workload.h
+++ b/codes/codes-workload.h
@@ -382,7 +382,7 @@ void codes_workload_add_method(struct codes_workload_method const * method);
  */
 
 /* Printing event :) */
-void fprint_codes_workload_op(FILE * out, struct codes_workload_op * op, char const * const begin);
+void fprint_codes_workload_op(FILE * out, char const * prefix, struct codes_workload_op * op);
 char const * const op_type_string(enum codes_workload_op_type op_type);
 
 #ifdef __cplusplus
diff --git a/codes/model-net-sched.h b/codes/model-net-sched.h
index ad7ccf6e..576c57eb 100644
--- a/codes/model-net-sched.h
+++ b/codes/model-net-sched.h
@@ -201,8 +201,8 @@ void model_net_sched_set_default_params(mn_sched_params *sched_params);
 void save_model_net_sched(model_net_sched *before, model_net_sched const *after);
 void clean_model_net_sched(model_net_sched *before);
 bool check_model_net_sched(model_net_sched *before, model_net_sched *after);
-void print_model_net_sched(FILE * out, model_net_sched *sched);
-void print_model_net_sched_checkpoint(FILE * out, model_net_sched *sched);
+void print_model_net_sched(FILE * out, char const * prefix, model_net_sched *sched);
+void print_model_net_sched_checkpoint(FILE * out, char const * prefix, model_net_sched *sched);
 
 extern char * sched_names[];
 extern const crv_checkpointer * sched_checkpointers[];
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 2501c5ac..653f6f31 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -3534,10 +3534,10 @@ static void save_nw_lp_state(nw_state * into, nw_state const * from) {
     // Don't forget to make deep copies of any new complex data types that nw_state points to
 }
 
-static void print_mpi_msgs_queue(FILE * out, struct qlist_head * head, char const * before) {
+static void print_mpi_msgs_queue(FILE * out, char const * prefix, struct qlist_head * head) {
     mpi_msgs_queue * current = NULL;
     qlist_for_each_entry(current, head, ql) {
-         fprintf(out, "%sMsg: OpType: %d Tag %d Source %d Dest %d bytes %"PRId64" req_init_time %g req_id %u\n", before, current->op_type, current->tag, current->source_rank, current->dest_rank, current->num_bytes, current->req_init_time, current->req_id);
+         fprintf(out, "%sMsg: OpType: %d Tag %d Source %d Dest %d bytes %"PRId64" req_init_time %g req_id %u\n", prefix, current->op_type, current->tag, current->source_rank, current->dest_rank, current->num_bytes, current->req_init_time, current->req_id);
     }
 }
 
@@ -3647,124 +3647,130 @@ static bool check_nw_lp_state(nw_state * before, nw_state const * after) {
 }
 
 // Originally implemneted with a prompt on Claude.ai (tedious code, easy to check and produce)
-static void print_nw_lp_state(FILE * out, nw_state * state) {
+static void print_nw_lp_state(FILE * out, char const * prefix, nw_state * state) {
     int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx);
 
+    fprintf(out, "%snw-lp state ->\n", prefix);
 #if LP_DEBUG
-    fprintf(out, "  num_events_processed = %zu\n", state->num_events_processed);
-#endif /* if LP_DEBUG */
-    fprintf(out, "     num_events_per_lp = %ld\n", state->num_events_per_lp);
-    fprintf(out, "                 nw_id = %lu\n", state->nw_id);
-    fprintf(out, "             wrkld_end = %d\n", state->wrkld_end);
-    fprintf(out, "                app_id = %d\n", state->app_id);
-    fprintf(out, "            local_rank = %d\n", state->local_rank);
-    fprintf(out, "             qos_level = %d\n", state->qos_level);
-    fprintf(out, "     synthetic_pattern = %d\n", state->synthetic_pattern);
-    fprintf(out, "           is_finished = %d\n", state->is_finished);
-    fprintf(out, "num_own_job_ranks_completed = %d\n", state->num_own_job_ranks_completed);
-    fprintf(out, "  known_completed_jobs[%d] = [", num_jobs);
+    fprintf(out, "%s |  num_events_processed = %zu\n", prefix, state->num_events_processed);
+#endif /* if LP_DE%sBUG */
+    fprintf(out, "%s |     num_events_per_lp = %ld\n", prefix, state->num_events_per_lp);
+    fprintf(out, "%s |                 nw_id = %lu\n", prefix, state->nw_id);
+    fprintf(out, "%s |             wrkld_end = %d\n", prefix, state->wrkld_end);
+    fprintf(out, "%s |                app_id = %d\n", prefix, state->app_id);
+    fprintf(out, "%s |            local_rank = %d\n", prefix, state->local_rank);
+    fprintf(out, "%s |             qos_level = %d\n", prefix, state->qos_level);
+    fprintf(out, "%s |     synthetic_pattern = %d\n", prefix, state->synthetic_pattern);
+    fprintf(out, "%s |           is_finished = %d\n", prefix, state->is_finished);
+    fprintf(out, "%s |num_own_job_ranks_completed = %d\n", prefix, state->num_own_job_ranks_completed);
+    fprintf(out, "%s |  known_completed_jobs[%d] = [", prefix, num_jobs);
     for(int i=0; i<num_jobs; i++) {
-        fprintf(out, "%d%s", state->known_completed_jobs[i], i+1==num_jobs ? "" : ", ");
+        fprintf(out, "%s%d%s", prefix, state->known_completed_jobs[i], i+1==num_jobs ? "" : ", ");
     }
     fprintf(out, "]\n");
-    fprintf(out, "        *processed_ops = %p\n", state->processed_ops);
-    fprintf(out, "    *processed_wait_op = %p\n", state->processed_wait_op);
-    fprintf(out, "         *matched_reqs = %p\n", state->matched_reqs);
+    fprintf(out, "%s |        *processed_ops = %p\n", prefix, state->processed_ops);
+    fprintf(out, "%s |    *processed_wait_op = %p\n", prefix, state->processed_wait_op);
+    fprintf(out, "%s |         *matched_reqs = %p\n", prefix, state->matched_reqs);
 
     // Operation counts
-    fprintf(out, "             num_sends = %lu\n", state->num_sends);
-    fprintf(out, "             num_recvs = %lu\n", state->num_recvs);
-    fprintf(out, "              num_cols = %lu\n", state->num_cols);
-    fprintf(out, "            num_delays = %lu\n", state->num_delays);
-    fprintf(out, "              num_wait = %lu\n", state->num_wait);
-    fprintf(out, "           num_waitall = %lu\n", state->num_waitall);
-    fprintf(out, "          num_waitsome = %lu\n", state->num_waitsome);
+    fprintf(out, "%s |             num_sends = %lu\n", prefix, state->num_sends);
+    fprintf(out, "%s |             num_recvs = %lu\n", prefix, state->num_recvs);
+    fprintf(out, "%s |              num_cols = %lu\n", prefix, state->num_cols);
+    fprintf(out, "%s |            num_delays = %lu\n", prefix, state->num_delays);
+    fprintf(out, "%s |              num_wait = %lu\n", prefix, state->num_wait);
+    fprintf(out, "%s |           num_waitall = %lu\n", prefix, state->num_waitall);
+    fprintf(out, "%s |          num_waitsome = %lu\n", prefix, state->num_waitsome);
 
     // Timing information
-    fprintf(out, "            start_time = %g\n", state->start_time);
-    fprintf(out, "              col_time = %g\n", state->col_time);
-    fprintf(out, "           reduce_time = %g\n", state->reduce_time);
-    fprintf(out, "            num_reduce = %d\n", state->num_reduce);
-    fprintf(out, "       all_reduce_time = %g\n", state->all_reduce_time);
-    fprintf(out, "        num_all_reduce = %d\n", state->num_all_reduce);
-    fprintf(out, "          elapsed_time = %g\n", state->elapsed_time);
-    fprintf(out, "          compute_time = %g\n", state->compute_time);
-    fprintf(out, "             send_time = %g\n", state->send_time);
-    fprintf(out, "              max_time = %g\n", state->max_time);
-    fprintf(out, "             recv_time = %g\n", state->recv_time);
-    fprintf(out, "             wait_time = %g\n", state->wait_time);
+    fprintf(out, "%s |            start_time = %g\n", prefix, state->start_time);
+    fprintf(out, "%s |              col_time = %g\n", prefix, state->col_time);
+    fprintf(out, "%s |           reduce_time = %g\n", prefix, state->reduce_time);
+    fprintf(out, "%s |            num_reduce = %d\n", prefix, state->num_reduce);
+    fprintf(out, "%s |       all_reduce_time = %g\n", prefix, state->all_reduce_time);
+    fprintf(out, "%s |        num_all_reduce = %d\n", prefix, state->num_all_reduce);
+    fprintf(out, "%s |          elapsed_time = %g\n", prefix, state->elapsed_time);
+    fprintf(out, "%s |          compute_time = %g\n", prefix, state->compute_time);
+    fprintf(out, "%s |             send_time = %g\n", prefix, state->send_time);
+    fprintf(out, "%s |              max_time = %g\n", prefix, state->max_time);
+    fprintf(out, "%s |             recv_time = %g\n", prefix, state->recv_time);
+    fprintf(out, "%s |             wait_time = %g\n", prefix, state->wait_time);
 
     // Queue heads
-    fprintf(out, "         arrival_queue[%d] = [\n", qlist_count(&state->arrival_queue));
-    print_mpi_msgs_queue(out, &state->arrival_queue, "            ");
-    fprintf(out, "]\n");
-    fprintf(out, "   pending_recvs_queue[%d] = [\n", qlist_count(&state->pending_recvs_queue));
-    print_mpi_msgs_queue(out, &state->pending_recvs_queue, "            ");
-    fprintf(out, "]\n");
-
-    fprintf(out, "        completed_reqs[%d] = [\n", qlist_count(&state->completed_reqs));
+    char addprefix[] = " |  | ";
+    int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1;
+    char subprefix[len_subprefix];
+    snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix);
+
+    fprintf(out, "%s |         arrival_queue[%d] = [\n", prefix, qlist_count(&state->arrival_queue));
+    print_mpi_msgs_queue(out, subprefix, &state->arrival_queue);
+    fprintf(out, "%s | ]\n", prefix);
+    fprintf(out, "%s |   pending_recvs_queue[%d] = [\n", prefix, qlist_count(&state->pending_recvs_queue));
+    print_mpi_msgs_queue(out, subprefix, &state->pending_recvs_queue);
+    fprintf(out, "%s | ]\n", prefix);
+
+    fprintf(out, "%s |        completed_reqs[%d] = [\n", prefix, qlist_count(&state->completed_reqs));
     completed_requests * current = NULL;
     qlist_for_each_entry(current, &state->completed_reqs, ql) {
-         fprintf(out, "            Req: req_id: %u\n", current->req_id);
+         fprintf(out, "%s |  | Req: req_id: %u\n", prefix, current->req_id);
     }
-    fprintf(out, "]\n");
+    fprintf(out, "%s | ]\n", prefix);
 
-    fprintf(out, "      cur_interval_end = %g\n", state->cur_interval_end);
-    fprintf(out, "              *wait_op = %p\n", state->wait_op);
+    fprintf(out, "%s |      cur_interval_end = %g\n", prefix, state->cur_interval_end);
+    fprintf(out, "%s |              *wait_op = %p\n", prefix, state->wait_op);
     if (state->wait_op != NULL) {
-        fprintf(out, "                     |.op_type = %d\n", state->wait_op->op_type);
-        fprintf(out, "                     |.req_ids = [");
+        fprintf(out, "%s |                  |.op_type = %d\n", prefix, state->wait_op->op_type);
+        fprintf(out, "%s |                  |.req_ids = [", prefix);
         for(int i = 0; i < state->wait_op->count; i++) {
             fprintf(out, "%d%s", state->wait_op->req_ids[i], i+1==state->wait_op->count ? "" : ", ");
         }
         fprintf(out, "]\n");
-        fprintf(out, "                     |.num_completed = %d\n", state->wait_op->num_completed);
-        fprintf(out, "                     |.count = %d\n", state->wait_op->count);
-        fprintf(out, "                     |.start_time = %g\n", state->wait_op->start_time);
+        fprintf(out, "%s |                  |.num_completed = %d\n", prefix, state->wait_op->num_completed);
+        fprintf(out, "%s |                  |.count   = %d\n", prefix, state->wait_op->count);
+        fprintf(out, "%s |                  |.start_time = %g\n", prefix, state->wait_op->start_time);
     }
-    fprintf(out, "           msg_sz_list[%d] = [\n", qlist_count(&state->completed_reqs));
+    fprintf(out, "%s |           msg_sz_list[%d] = [\n", prefix, qlist_count(&state->completed_reqs));
     struct msg_size_info * ms_info = NULL;
     qlist_for_each_entry(ms_info, &state->msg_sz_list, ql) {
-         fprintf(out, "            MsSizeInfo: msg_size: %lu num_msgs: %d agg_latency: %g avg_latency: %g hash_link.next: %p  hash_link.prev: %p\n", ms_info->msg_size, ms_info->num_msgs, ms_info->agg_latency, ms_info->avg_latency, ms_info->hash_link.next, ms_info->hash_link.prev);
+         fprintf(out, "%s |  | MsSizeInfo: msg_size: %lu num_msgs: %d agg_latency: %g avg_latency: %g hash_link.next: %p  hash_link.prev: %p\n", prefix, ms_info->msg_size, ms_info->num_msgs, ms_info->agg_latency, ms_info->avg_latency, ms_info->hash_link.next, ms_info->hash_link.prev);
     }
-    fprintf(out, "]\n");
+    fprintf(out, "%s | ]\n", prefix);
 
     // Data statistics
-    fprintf(out, "        num_bytes_sent = %llu\n", state->num_bytes_sent);
-    fprintf(out, "       num_bytes_recvd = %llu\n", state->num_bytes_recvd);
-    fprintf(out, "              syn_data = %llu\n", state->syn_data);
-    fprintf(out, "              gen_data = %llu\n", state->gen_data);
+    fprintf(out, "%s |        num_bytes_sent = %llu\n", prefix, state->num_bytes_sent);
+    fprintf(out, "%s |       num_bytes_recvd = %llu\n", prefix, state->num_bytes_recvd);
+    fprintf(out, "%s |              syn_data = %llu\n", prefix, state->syn_data);
+    fprintf(out, "%s |              gen_data = %llu\n", prefix, state->gen_data);
 
-    fprintf(out, "           prev_switch = %lu\n", state->prev_switch);
-    fprintf(out, "       saved_perm_dest = %d\n", state->saved_perm_dest);
-    fprintf(out, "               rc_perm = %lu\n", state->rc_perm);
+    fprintf(out, "%s |           prev_switch = %lu\n", prefix, state->prev_switch);
+    fprintf(out, "%s |       saved_perm_dest = %d\n", prefix, state->saved_perm_dest);
+    fprintf(out, "%s |               rc_perm = %lu\n", prefix, state->rc_perm);
 
     // Sampling information
-    fprintf(out, "         sampling_indx = %d\n", state->sampling_indx);
-    fprintf(out, "          max_arr_size = %d\n", state->max_arr_size);
-    fprintf(out, "*     mpi_wkld_samples = %p\n", state->mpi_wkld_samples);
-    fprintf(out, "            output_buf = %.512s...\n", state->output_buf);
-    fprintf(out, "             col_stats = %.64s...\n", state->col_stats);
-
-    fprintf(out, "ross_sample.\n");
-    fprintf(out, "           |          .nw_id = %lu\n", state->ross_sample.nw_id);
-    fprintf(out, "           |         .app_id = %d\n", state->ross_sample.app_id);
-    fprintf(out, "           |     .local_rank = %d\n", state->ross_sample.local_rank);
-    fprintf(out, "           |      .num_sends = %lu\n", state->ross_sample.num_sends);
-    fprintf(out, "           |      .num_recvs = %lu\n", state->ross_sample.num_recvs);
-    fprintf(out, "           | .num_bytes_sent = %llu\n", state->ross_sample.num_bytes_sent);
-    fprintf(out, "           |.num_bytes_recvd = %llu\n", state->ross_sample.num_bytes_recvd);
-    fprintf(out, "           |      .send_time = %g\n", state->ross_sample.send_time);
-    fprintf(out, "           |      .recv_time = %g\n", state->ross_sample.recv_time);
-    fprintf(out, "           |      .wait_time = %g\n", state->ross_sample.wait_time);
-    fprintf(out, "           |   .compute_time = %g\n", state->ross_sample.compute_time);
-    fprintf(out, "           |      .comm_time = %g\n", state->ross_sample.comm_time);
-    fprintf(out, "           |       .max_time = %g\n", state->ross_sample.max_time);
-    fprintf(out, "           |   .avg_msg_time = %g\n", state->ross_sample.avg_msg_time);
+    fprintf(out, "%s |         sampling_indx = %d\n", prefix, state->sampling_indx);
+    fprintf(out, "%s |          max_arr_size = %d\n", prefix, state->max_arr_size);
+    fprintf(out, "%s |*     mpi_wkld_samples = %p\n", prefix, state->mpi_wkld_samples);
+    fprintf(out, "%s |            output_buf = %.512s...\n", prefix, state->output_buf);
+    fprintf(out, "%s |             col_stats = %.64s...\n", prefix, state->col_stats);
+
+    fprintf(out, "%s |ross_sample.\n", prefix);
+    fprintf(out, "%s |    |           nw_id = %lu\n", prefix, state->ross_sample.nw_id);
+    fprintf(out, "%s |    |          app_id = %d\n", prefix, state->ross_sample.app_id);
+    fprintf(out, "%s |    |      local_rank = %d\n", prefix, state->ross_sample.local_rank);
+    fprintf(out, "%s |    |       num_sends = %lu\n", prefix, state->ross_sample.num_sends);
+    fprintf(out, "%s |    |       num_recvs = %lu\n", prefix, state->ross_sample.num_recvs);
+    fprintf(out, "%s |    |  num_bytes_sent = %llu\n", prefix, state->ross_sample.num_bytes_sent);
+    fprintf(out, "%s |    | num_bytes_recvd = %llu\n", prefix, state->ross_sample.num_bytes_recvd);
+    fprintf(out, "%s |    |       send_time = %g\n", prefix, state->ross_sample.send_time);
+    fprintf(out, "%s |    |       recv_time = %g\n", prefix, state->ross_sample.recv_time);
+    fprintf(out, "%s |    |       wait_time = %g\n", prefix, state->ross_sample.wait_time);
+    fprintf(out, "%s |    |    compute_time = %g\n", prefix, state->ross_sample.compute_time);
+    fprintf(out, "%s |    |       comm_time = %g\n", prefix, state->ross_sample.comm_time);
+    fprintf(out, "%s |    |        max_time = %g\n", prefix, state->ross_sample.max_time);
+    fprintf(out, "%s |    |    avg_msg_time = %g\n", prefix, state->ross_sample.avg_msg_time);
 
     // Configuration
-    fprintf(out, "*        switch_config = %p\n", state->switch_config);
-    fprintf(out, "    switch_config_size = %zu\n", state->switch_config_size);
+    fprintf(out, "%s |*        switch_config = %p\n", prefix, state->switch_config);
+    fprintf(out, "%s |    switch_config_size = %zu\n", prefix, state->switch_config_size);
 }
 
 static char const * const MPI_NW_EVENTS_to_string(enum MPI_NW_EVENTS event_type) {
@@ -3789,83 +3795,88 @@ static char const * const MPI_NW_EVENTS_to_string(enum MPI_NW_EVENTS event_type)
 }
 
 // Original printing function from Claude.ai
-static void print_nw_message(FILE * out, struct nw_message * msg) {
-    // Print main fields
-    fprintf(out, "msg_type = %s\n", MPI_NW_EVENTS_to_string(msg->msg_type));
-    fprintf(out, " op_type = %s\n", op_type_string(msg->op_type));
-    fprintf(out, "num_rngs = %d\n", msg->num_rngs);
-    fprintf(out, "event_rc = %d\n", msg->event_rc);
-    fprintf(out, "  mpi_op = %p\n", msg->mpi_op);
-    fprint_codes_workload_op(out, msg->mpi_op, "        |");
-
-    fprintf(out, "fwd\n");
-    fprintf(out, "  |      .src_rank = %lu\n", msg->fwd.src_rank);
-    fprintf(out, "  |     .dest_rank = %d\n", msg->fwd.dest_rank);
-    fprintf(out, "  |     .num_bytes = %ld\n", msg->fwd.num_bytes);
-    fprintf(out, "  |   .num_matched = %d\n", msg->fwd.num_matched);
-    fprintf(out, "  |.sim_start_time = %g\n", msg->fwd.sim_start_time);
-    fprintf(out, "  | .msg_send_time = %g\n", msg->fwd.msg_send_time);
-    fprintf(out, "  |        .req_id = %u\n", msg->fwd.req_id);
-    fprintf(out, "  |   .matched_req = %d\n", msg->fwd.matched_req);
-    fprintf(out, "  |           .tag = %d\n", msg->fwd.tag);
-    fprintf(out, "  |        .app_id = %d\n", msg->fwd.app_id);
-    fprintf(out, "  |   .found_match = %d\n", msg->fwd.found_match);
-    fprintf(out, "  |.wait_completed = %d\n", msg->fwd.wait_completed);
-    fprintf(out, "  |     .rend_send = %d\n", msg->fwd.rend_send);
-
-    fprintf(out, "rc\n");
+static void print_nw_message(FILE * out, char const * prefix, struct nw_message * msg) {
+    fprintf(out, "%snw_message ->\n", prefix);
+    fprintf(out, "%s | msg_type = %s\n", prefix, MPI_NW_EVENTS_to_string(msg->msg_type));
+    fprintf(out, "%s |  op_type = %s\n", prefix, op_type_string(msg->op_type));
+    fprintf(out, "%s | num_rngs = %d\n", prefix, msg->num_rngs);
+    fprintf(out, "%s | event_rc = %d\n", prefix, msg->event_rc);
+    fprintf(out, "%s |   mpi_op = %p\n", prefix, msg->mpi_op);
+
+    char addprefix[] = " |   | ";
+    int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1;
+    char subprefix[len_subprefix];
+    snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix);
+    fprint_codes_workload_op(out, subprefix, msg->mpi_op);
+
+    fprintf(out, "%s | fwd\n", prefix);
+    fprintf(out, "%s |   |       src_rank = %lu\n", prefix, msg->fwd.src_rank);
+    fprintf(out, "%s |   |      dest_rank = %d\n", prefix, msg->fwd.dest_rank);
+    fprintf(out, "%s |   |      num_bytes = %ld\n", prefix, msg->fwd.num_bytes);
+    fprintf(out, "%s |   |    num_matched = %d\n", prefix, msg->fwd.num_matched);
+    fprintf(out, "%s |   | sim_start_time = %g\n", prefix, msg->fwd.sim_start_time);
+    fprintf(out, "%s |   |  msg_send_time = %g\n", prefix, msg->fwd.msg_send_time);
+    fprintf(out, "%s |   |         req_id = %u\n", prefix, msg->fwd.req_id);
+    fprintf(out, "%s |   |    matched_req = %d\n", prefix, msg->fwd.matched_req);
+    fprintf(out, "%s |   |            tag = %d\n", prefix, msg->fwd.tag);
+    fprintf(out, "%s |   |         app_id = %d\n", prefix, msg->fwd.app_id);
+    fprintf(out, "%s |   |    found_match = %d\n", prefix, msg->fwd.found_match);
+    fprintf(out, "%s |   | wait_completed = %d\n", prefix, msg->fwd.wait_completed);
+    fprintf(out, "%s |   |      rend_send = %d\n", prefix, msg->fwd.rend_send);
+
+    fprintf(out, "%s | rc\n", prefix);
     switch(msg->msg_type) {
         case CLI_BCKGND_GEN:
-            fprintf(out, "  |.gen\n");
-            fprintf(out, "      | .saved_syn_length = %d\n", msg->rc.gen.saved_syn_length);
-            fprintf(out, "      |       .saved_perm = %d\n", msg->rc.gen.saved_perm);
-            fprintf(out, "      |.saved_prev_switch = %lu\n", msg->rc.gen.saved_prev_switch);
+            fprintf(out, "%s |   | gen\n", prefix);
+            fprintf(out, "%s |       |  saved_syn_length = %d\n", prefix, msg->rc.gen.saved_syn_length);
+            fprintf(out, "%s |       |        saved_perm = %d\n", prefix, msg->rc.gen.saved_perm);
+            fprintf(out, "%s |       | saved_prev_switch = %lu\n", prefix, msg->rc.gen.saved_prev_switch);
             break;
 
         case CLI_BCKGND_ARRIVE:
         case MPI_SEND_ARRIVED_CB:
-            fprintf(out, "  |arrive.saved_prev_max_time = %g\n", msg->rc.arrive.saved_prev_max_time);
-            fprintf(out, "  |    arrive.saved_send_time = %g\n", msg->rc.arrive.saved_send_time);
-            fprintf(out, "  |arrive.saved_send_time_sample = %g\n", msg->rc.arrive.saved_send_time_sample);
+            fprintf(out, "%s |   |arrive.saved_prev_max_time = %g\n", prefix, msg->rc.arrive.saved_prev_max_time);
+            fprintf(out, "%s |   |    arrive.saved_send_time = %g\n", prefix, msg->rc.arrive.saved_send_time);
+            fprintf(out, "%s |   |arrive.saved_send_time_sample = %g\n", prefix, msg->rc.arrive.saved_send_time_sample);
             break;
 
         case CLI_BCKGND_CHANGE:
-            fprintf(out, "  |   change.saved_send_time = %g\n", msg->rc.change.saved_send_time);
-            fprintf(out, "  | change.saved_marker_time = %g\n", msg->rc.change.saved_marker_time);
+            fprintf(out, "%s |   |   change.saved_send_time = %g\n", prefix, msg->rc.change.saved_send_time);
+            fprintf(out, "%s |   | change.saved_marker_time = %g\n", prefix, msg->rc.change.saved_marker_time);
             break;
 
         case MPI_OP_GET_NEXT:
-            fprintf(out, "   .mpi_next\n");
-            fprintf(out, "           |.saved_elapsed_time = %g\n", msg->rc.mpi_next.saved_elapsed_time);
-            fprintf(out, "           |.all_reduce.saved_send_time = %g\n", msg->rc.mpi_next.all_reduce.saved_send_time);
-            fprintf(out, "           |.all_reduce.saved_delay = %g\n", msg->rc.mpi_next.all_reduce.saved_delay);
+            fprintf(out, "%s |     mpi_next\n", prefix);
+            fprintf(out, "%s |            | saved_elapsed_time = %g\n", prefix, msg->rc.mpi_next.saved_elapsed_time);
+            fprintf(out, "%s |            | all_reduce.saved_send_time = %g\n", prefix, msg->rc.mpi_next.all_reduce.saved_send_time);
+            fprintf(out, "%s |            | all_reduce.saved_delay = %g\n", prefix, msg->rc.mpi_next.all_reduce.saved_delay);
 
-            fprintf(out, "           |.recv.saved_recv_time = %g\n", msg->rc.mpi_next.recv.saved_recv_time);
-            fprintf(out, "           |.recv.saved_recv_time_sample = %g\n", msg->rc.mpi_next.recv.saved_recv_time_sample);
+            fprintf(out, "%s |            | recv.saved_recv_time = %g\n", prefix, msg->rc.mpi_next.recv.saved_recv_time);
+            fprintf(out, "%s |            | recv.saved_recv_time_sample = %g\n", prefix, msg->rc.mpi_next.recv.saved_recv_time_sample);
 
-            fprintf(out, "           |.delay.saved_delay = %g\n", msg->rc.mpi_next.delay.saved_delay);
-            fprintf(out, "           |.delay.saved_delay_sample = %g\n", msg->rc.mpi_next.delay.saved_delay_sample);
+            fprintf(out, "%s |            | delay.saved_delay = %g\n", prefix, msg->rc.mpi_next.delay.saved_delay);
+            fprintf(out, "%s |            | delay.saved_delay_sample = %g\n", prefix, msg->rc.mpi_next.delay.saved_delay_sample);
 
-            fprintf(out, "           |.mark.saved_marker_time = %g\n", msg->rc.mpi_next.mark.saved_marker_time);
+            fprintf(out, "%s |            | mark.saved_marker_time = %g\n", prefix, msg->rc.mpi_next.mark.saved_marker_time);
             break;
 
         case MPI_SEND_ARRIVED:
         case MPI_REND_ARRIVED:
         case MPI_SEND_POSTED:
-            fprintf(out, "  |.mpi_send\n");
-            fprintf(out, "           |       .saved_wait_time = %g\n", msg->rc.mpi_send.saved_wait_time);
-            fprintf(out, "           |.saved_wait_time_sample = %g\n", msg->rc.mpi_send.saved_wait_time_sample);
-            fprintf(out, "           |       .saved_recv_time = %g\n", msg->rc.mpi_send.saved_recv_time);
-            fprintf(out, "           |.saved_recv_time_sample = %g\n", msg->rc.mpi_send.saved_recv_time_sample);
-            fprintf(out, "           |       .saved_num_bytes = %lu\n", msg->rc.mpi_send.saved_num_bytes);
+            fprintf(out, "%s |   | mpi_send\n", prefix);
+            fprintf(out, "%s |            |        saved_wait_time = %g\n", prefix, msg->rc.mpi_send.saved_wait_time);
+            fprintf(out, "%s |            | saved_wait_time_sample = %g\n", prefix, msg->rc.mpi_send.saved_wait_time_sample);
+            fprintf(out, "%s |            |        saved_recv_time = %g\n", prefix, msg->rc.mpi_send.saved_recv_time);
+            fprintf(out, "%s |            | saved_recv_time_sample = %g\n", prefix, msg->rc.mpi_send.saved_recv_time_sample);
+            fprintf(out, "%s |            |        saved_num_bytes = %lu\n", prefix, msg->rc.mpi_send.saved_num_bytes);
             break;
 
         case MPI_REND_ACK_ARRIVED:
-            fprintf(out, "  |  mpi_ack.saved_num_bytes = %ld\n", msg->rc.mpi_ack.saved_num_bytes);
+            fprintf(out, "%s |   |  mpi_ack.saved_num_bytes = %ld\n", prefix, msg->rc.mpi_ack.saved_num_bytes);
             break;
 
         case SURR_SKIP_ITERATION:
-            fprintf(out, "  |        surr.config_used = %p\n", msg->rc.surr.config_used);
+            fprintf(out, "%s |   |        surr.config_used = %p\n", prefix, msg->rc.surr.config_used);
             break;
 
         default:
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 2cc1d516..1a065c8f 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -139,9 +139,9 @@ tw_lptype model_net_base_lp = {
 static void save_state_net_state(model_net_base_state * into, model_net_base_state const * from);
 static void clean_state_net_state(model_net_base_state * state);
 static bool check_model_net_state(model_net_base_state * before, model_net_base_state * after);
-static void print_model_net_state(FILE * out, model_net_base_state * state);
-static void print_model_net_checkpoint(FILE * out, model_net_base_state * state);
-static void print_event_state(FILE * out, model_net_wrap_msg * state);
+static void print_model_net_state(FILE * out, char const * prefix, model_net_base_state * state);
+static void print_model_net_checkpoint(FILE * out, char const * prefix, model_net_base_state * state);
+static void print_event_state(FILE * out, char const * prefix, model_net_wrap_msg * state);
 
 // ROSS function pointer table to check reverse event handler
 crv_checkpointer model_net_chkptr = {
@@ -1233,87 +1233,77 @@ static bool check_model_net_state(model_net_base_state * before, model_net_base_
     return is_same;
 }
 
-static void __print_model_net(FILE * out, model_net_base_state * state, bool is_lp_state) {
-    fprintf(out, "model_net_state ->\n");
+static void __print_model_net(FILE * out, char const * prefix, model_net_base_state * state, bool is_lp_state) {
+    fprintf(out, "%smodel_net_state ->\n", prefix);
 #if MODELNET_LP_DEBUG
-    fprintf(out, "  |num_events_processed = %zu\n", state->num_events_processed);
-#endif /* if MODELNET_LP_DEBUG */
-    fprintf(out, "  |              net_id = %d\n", state->net_id);
-    fprintf(out, "  |     nics_per_router = %d\n", state->nics_per_router);
-    fprintf(out, "  | *in_sched_send_loop[%d] = [", state->params->num_queues);  // (done) deep-all
+    fprintf(out, "%s  |num_events_processed = %zu\n", prefix, state->num_events_processed);
+#endif /* if MODEL%sNET_LP_DEBUG */
+
+    void (*print_modelnet) (FILE *, char const *, model_net_sched *) = is_lp_state ? print_model_net_sched : print_model_net_sched_checkpoint;
+
+    fprintf(out, "%s  |              net_id = %d\n", prefix, state->net_id);
+    fprintf(out, "%s  |     nics_per_router = %d\n", prefix, state->nics_per_router);
+    fprintf(out, "%s  | *in_sched_send_loop[%d] = [", prefix, state->params->num_queues);  // deep-all
     for (int i=0; i < state->params->num_queues; i++) {
         fprintf(out, "%d%s", state->in_sched_send_loop[i], i==state->params->num_queues-1 ? "" : ", ");
     }
     fprintf(out, "]\n");
-    fprintf(out, "  |  in_sched_recv_loop = %d\n", state->in_sched_recv_loop);
-    fprintf(out, "  |              msg_id = %lu\n", state->msg_id);
-    fprintf(out, "  | **       sched_send = %p\n", state->sched_send);  // (done) deep-all
-    fprintf(out, "  | *        sched_recv = %p\n", state->sched_recv);  // (done) deep-all
-    fprintf(out, "  | *            params = %p\n", state->params);
-    fprintf(out, "  | *          sub_type = %p\n", state->sub_type);
-    fprintf(out, "  | *    sub_model_type = %p\n", state->sub_model_type);
-    fprintf(out, "  | *         sub_state = %p\n", state->sub_state);  // deep-all
-    fprintf(out, "  | next_available_time = %f\n", state->next_available_time);
-    fprintf(out, "  | *node_copy_next_available_time[%d] = [", state->params->num_queues);  // (done) deep-all
-    for (int i=0; i < state->params->node_copy_queues; i++) {
-        fprintf(out, "%g%s", state->node_copy_next_available_time[i], i==state->params->node_copy_queues-1 ? "" : ", ");
-    }
-    fprintf(out, "]\n");
-    fprintf(out, "  | *sched_loop_pre_surrogate = %p\n", state->sched_loop_pre_surrogate);  // no need to check
-    fprintf(out, "  | sched_recv_loop_pre_surrogate = %d\n", state->sched_recv_loop_pre_surrogate);  // no need to check
-
-    void (*print_modelnet) (FILE * out, model_net_sched *sched) = is_lp_state ? print_model_net_sched : print_model_net_sched_checkpoint;
-
-    fprintf(out, "\n");
+    fprintf(out, "%s  |  in_sched_recv_loop = %d\n", prefix, state->in_sched_recv_loop);
+    fprintf(out, "%s  |              msg_id = %lu\n", prefix, state->msg_id);
+    fprintf(out, "%s  | **       sched_send = %p\n", prefix, state->sched_send);  // deep-all
+    //
+    int len_subprefix = snprintf(NULL, 0, "%s  |    | ", prefix) + 1;
+    char subprefix[len_subprefix];
+    snprintf(subprefix, len_subprefix, "%s  |    | ", prefix);
     for(int i = 0; i < state->params->num_queues; i++) {
-        fprintf(out, "==== CONTENT for sched_send[%d]:\n", i);
-        print_modelnet(stderr, state->sched_send[i]);
-    }
-
-    fprintf(out, "\n==== CONTENT for sched_recv:\n");
-    print_modelnet(stderr, state->sched_recv);
-
+        fprintf(out, "%ssched_send[%d]:\n", subprefix, i);
+        print_modelnet(out, subprefix, state->sched_send[i]);
+    }
+    //
+    fprintf(out, "%s  | *        sched_recv = %p\n", prefix, state->sched_recv);  // deep-all
+    print_modelnet(out, subprefix, state->sched_recv);
+    fprintf(out, "%s  | *            params = %p\n", prefix, state->params);
+    fprintf(out, "%s  | *          sub_type = %p\n", prefix, state->sub_type);
+    fprintf(out, "%s  | *    sub_model_type = %p\n", prefix, state->sub_model_type);
+    fprintf(out, "%s  | *         sub_state = %p\n", prefix, state->sub_state);  // deep-all
+    //
     crv_checkpointer * chptr = method_array[state->net_id]->checkpointer;
     if (chptr && state->sub_state != NULL) {
         if (is_lp_state && chptr->print_lp) {
-            fprintf(out, "\n==== CONTENT for sub_state:\n");
-            chptr->print_lp(out, state->sub_state);
+            chptr->print_lp(out, subprefix, state->sub_state);
         }
         if (!is_lp_state && chptr->print_checkpoint) {
-            fprintf(out, "\n==== CONTENT for sub_state:\n");
-            chptr->print_checkpoint(out, state->sub_state);
+            chptr->print_checkpoint(out, subprefix, state->sub_state);
         }
     }
+    //
+    fprintf(out, "%s  | next_available_time = %f\n", prefix, state->next_available_time);
+    fprintf(out, "%s  | *node_copy_next_available_time[%d] = [", prefix, state->params->num_queues);  // (done) deep-all
+    for (int i=0; i < state->params->node_copy_queues; i++) {
+        fprintf(out, "%g%s", state->node_copy_next_available_time[i], i==state->params->node_copy_queues-1 ? "" : ", ");
+    }
+    fprintf(out, "]\n");
+    fprintf(out, "%s  | *sched_loop_pre_surrogate = %p\n", prefix, state->sched_loop_pre_surrogate);  // no need to check
+    fprintf(out, "%s  | sched_recv_loop_pre_surrogate = %d\n", prefix, state->sched_recv_loop_pre_surrogate);  // no need to check
 }
 
-static void print_model_net_state(FILE * out, model_net_base_state * state) {
-    __print_model_net(out, state, true);
+static void print_model_net_state(FILE * out, char const * prefix, model_net_base_state * state) {
+    __print_model_net(out, prefix, state, true);
 }
-static void print_model_net_checkpoint(FILE * out, model_net_base_state * state) {
-    __print_model_net(out, state, false);
+static void print_model_net_checkpoint(FILE * out, char const * prefix, model_net_base_state * state) {
+    __print_model_net(out, prefix, state, false);
 }
 
-static void print_type(FILE * out, enum model_net_base_event_type type) {
+static char const * const event_type_string(enum model_net_base_event_type type) {
     switch (type) {
-        case MN_BASE_NEW_MSG:
-            fprintf(out, "MN_BASE_NEW_MSG");
-            break;
-        case MN_BASE_SCHED_NEXT:
-            fprintf(out, "MN_BASE_SCHED_NEXT");
-            break;
-        case MN_BASE_SAMPLE:
-            fprintf(out, "MN_BASE_SAMPLE");
-            break;
-        case MN_BASE_PASS:
-            fprintf(out, "MN_BASE_PASS");
-            break;
-        case MN_BASE_END_NOTIF:
-            fprintf(out, "MN_BASE_END_NOTIF");
-            break;
-        case MN_CONGESTION_EVENT:
-            fprintf(out, "MN_CONGESTION_EVENT");
-            break;
-    }
+        case MN_BASE_NEW_MSG:     return "MN_BASE_NEW_MSG";
+        case MN_BASE_SCHED_NEXT:  return "MN_BASE_SCHED_NEXT";
+        case MN_BASE_SAMPLE:      return "MN_BASE_SAMPLE";
+        case MN_BASE_PASS:        return "MN_BASE_PASS";
+        case MN_BASE_END_NOTIF:   return "MN_BASE_END_NOTIF";
+        case MN_CONGESTION_EVENT: return "MN_CONGESTION_EVENT";
+    }
+    return "UNKNOWN TYPE!!";
 }
 
 // Used Claude for an initial draft of this function
@@ -1340,53 +1330,62 @@ bool check_model_net_request(model_net_request const * before, model_net_request
     return is_same;
 }
 
-void print_model_net_request(FILE * out, char const * starts_with, model_net_request * req) {
-    fprintf(out, "%sfinal_dest_lp = %ld\n", starts_with, req->final_dest_lp);
-    fprintf(out, "%sdest_mn_lp = %ld\n", starts_with, req->dest_mn_lp);
-    fprintf(out, "%ssrc_lp = %ld\n", starts_with, req->src_lp);
-    fprintf(out, "%smsg_start_time = %f\n", starts_with, req->msg_start_time);
-    fprintf(out, "%smsg_new_mn_event = %f\n", starts_with, req->msg_new_mn_event);
-    fprintf(out, "%smsg_size = %ld\n", starts_with, req->msg_size);
-    fprintf(out, "%spull_size = %ld\n", starts_with, req->pull_size);
-    fprintf(out, "%spacket_size = %ld\n", starts_with, req->packet_size);
-    fprintf(out, "%smsg_id = %ld\n", starts_with, req->msg_id);
-    fprintf(out, "%snet_id = %d\n", starts_with, req->net_id);
-    fprintf(out, "%sis_pull = %d\n", starts_with, req->is_pull);
-    fprintf(out, "%squeue_offset = %d\n", starts_with, req->queue_offset);
-    fprintf(out, "%sremote_event_size = %d\n", starts_with, req->remote_event_size);
-    fprintf(out, "%sself_event_size = %d\n", starts_with, req->self_event_size);
-    fprintf(out, "%scategory = '%s'\n", starts_with, req->category);
-    fprintf(out, "%sapp_id = %d\n", starts_with, req->app_id);
+void print_model_net_request(FILE * out, char const * prefix, model_net_request * req) {
+    fprintf(out, "%sfinal_dest_lp = %ld\n", prefix, req->final_dest_lp);
+    fprintf(out, "%sdest_mn_lp = %ld\n", prefix, req->dest_mn_lp);
+    fprintf(out, "%ssrc_lp = %ld\n", prefix, req->src_lp);
+    fprintf(out, "%smsg_start_time = %f\n", prefix, req->msg_start_time);
+    fprintf(out, "%smsg_new_mn_event = %f\n", prefix, req->msg_new_mn_event);
+    fprintf(out, "%smsg_size = %ld\n", prefix, req->msg_size);
+    fprintf(out, "%spull_size = %ld\n", prefix, req->pull_size);
+    fprintf(out, "%spacket_size = %ld\n", prefix, req->packet_size);
+    fprintf(out, "%smsg_id = %ld\n", prefix, req->msg_id);
+    fprintf(out, "%snet_id = %d\n", prefix, req->net_id);
+    fprintf(out, "%sis_pull = %d\n", prefix, req->is_pull);
+    fprintf(out, "%squeue_offset = %d\n", prefix, req->queue_offset);
+    fprintf(out, "%sremote_event_size = %d\n", prefix, req->remote_event_size);
+    fprintf(out, "%sself_event_size = %d\n", prefix, req->self_event_size);
+    fprintf(out, "%scategory = '%s'\n", prefix, req->category);
+    fprintf(out, "%sapp_id = %d\n", prefix, req->app_id);
 }
 
-static void print_event_state(FILE * out, model_net_wrap_msg * msg) {
-    fprintf(out, "h\n");
-    fprintf(out, "|.src = %lu\n", msg->h.src);
-    fprintf(out, "|.event_type = %d (", msg->h.event_type);
-    print_type(out, msg->h.event_type);
-    fprintf(out, ")\n");
-    fprintf(out, "|.magic = %d\n", msg->h.magic);
+static void print_event_state(FILE * out, char const * prefix, model_net_wrap_msg * msg) {
+    fprintf(out, "%sh\n", prefix);
+    fprintf(out, "%s| src = %lu\n", prefix, msg->h.src);
+    fprintf(out, "%s| event_type = %d (%s)\n", prefix, msg->h.event_type, event_type_string(msg->h.event_type));
+    fprintf(out, "%s| magic = %d\n", prefix, msg->h.magic);
+
+    char addprefix[] = "     |   | ";
+    int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1;
+    char subprefix[len_subprefix];
+    snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix);
+
+    char addprefix_2[] = "     |  |   | ";
+    len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1;
+    char subprefix_2[len_subprefix];
+    snprintf(subprefix_2, len_subprefix, "%s%s", prefix, addprefix_2);
+
     switch (msg->h.event_type) {
         case MN_BASE_NEW_MSG:
         case MN_BASE_SCHED_NEXT:
             // We can check m_base values
-            fprintf(out, "m_base\n");
-            fprintf(out, "     |.req\n");
-            print_model_net_request(out, "     |   |.", &msg->msg.m_base.req);
-            fprintf(out, "     |.is_from_remote = %d\n", msg->msg.m_base.is_from_remote);
-            fprintf(out, "     |.isQueueReq = %d\n", msg->msg.m_base.isQueueReq);
-            fprintf(out, "     |.save_ts = %f\n", msg->msg.m_base.save_ts);
-            fprintf(out, "     |.sched_params.prio = %d\n", msg->msg.m_base.sched_params.prio);
-            fprintf(out, "     |.rc\n");
-            fprintf(out, "     |  |.req\n");
-            print_model_net_request(out, "     |  |   |.", &msg->msg.m_base.rc.req);
-            fprintf(out, "     |  |.sched_params.prio = %d\n", msg->msg.m_base.rc.sched_params.prio);
-            fprintf(out, "     |  |.rtn = %d\n", msg->msg.m_base.rc.rtn);
-            fprintf(out, "     |  |.prio = %d\n", msg->msg.m_base.rc.prio);
-            fprintf(out, "     |.created_in_surrogate = %d\n", msg->msg.m_base.created_in_surrogate);
+            fprintf(out, "%sm_base\n", prefix);
+            fprintf(out, "%s     | req\n", prefix);
+            print_model_net_request(out, subprefix, &msg->msg.m_base.req);
+            fprintf(out, "%s     | is_from_remote = %d\n", prefix, msg->msg.m_base.is_from_remote);
+            fprintf(out, "%s     | isQueueReq = %d\n", prefix, msg->msg.m_base.isQueueReq);
+            fprintf(out, "%s     | save_ts = %f\n", prefix, msg->msg.m_base.save_ts);
+            fprintf(out, "%s     | sched_params.prio = %d\n", prefix, msg->msg.m_base.sched_params.prio);
+            fprintf(out, "%s     | rc\n", prefix);
+            fprintf(out, "%s     |  | req\n", prefix);
+            print_model_net_request(out, subprefix_2, &msg->msg.m_base.rc.req);
+            fprintf(out, "%s     |  | sched_params.prio = %d\n", prefix, msg->msg.m_base.rc.sched_params.prio);
+            fprintf(out, "%s     |  | rtn = %d\n", prefix, msg->msg.m_base.rc.rtn);
+            fprintf(out, "%s     |  | prio = %d\n", prefix, msg->msg.m_base.rc.prio);
+            fprintf(out, "%s     | created_in_surrogate = %d\n", prefix, msg->msg.m_base.created_in_surrogate);
             break;
         default:
-            fprintf(out, "The content of this message cannot be deciphered yet with the information given\n");
+            fprintf(out, "%sThe content of this message cannot be deciphered yet with the information given\n", prefix);
     }
     // TODO: print internal state of message
     // void * sub_msg = ((char*)msg) + msg_offsets[state->net_id];
diff --git a/src/networks/model-net/core/model-net-sched-impl.c b/src/networks/model-net/core/model-net-sched-impl.c
index c23bd935..a3ff4fde 100644
--- a/src/networks/model-net/core/model-net-sched-impl.c
+++ b/src/networks/model-net/core/model-net-sched-impl.c
@@ -85,7 +85,7 @@ static void fcfs_next_rc(
 static void save_state_fcfs_state(mn_sched_queue * into, mn_sched_queue const * from);
 static void clean_state_fcfs_state(mn_sched_queue * into);
 static bool check_fcfs_state(mn_sched_queue *before, mn_sched_queue *after);
-static void print_fcfs_state(FILE * out, mn_sched_queue *sched);
+static void print_fcfs_state(FILE * out, char const * prefix, mn_sched_queue *sched);
 
 // ROUND-ROBIN
 static void rr_init (
@@ -460,29 +460,33 @@ static bool check_fcfs_state(mn_sched_queue * before, mn_sched_queue * after) {
     return is_same;
 }
 
-static void print_mn_sched_qitem(FILE * out, mn_sched_qitem * item) {
-    fprintf(out, "     mn_sched_qitem\n");
-    fprintf(out, "       | .req\n");
-    print_model_net_request(out, "       |     |.", &item->req);
-    fprintf(out, "       | sched_params.prio = %d\n", item->sched_params.prio);
-    fprintf(out, "       |               rem = %lu\n", item->rem);
-    fprintf(out, "       |      remote_event = %p (contents below)\n", item->remote_event);
-    tw_fprint_binary_array(out, item->remote_event, item->req.remote_event_size);
-    fprintf(out, "       |       local_event = %p (contents below)\n", item->local_event);
-    tw_fprint_binary_array(out, item->local_event, item->req.self_event_size);
+static void print_mn_sched_qitem(FILE * out, char const * prefix, mn_sched_qitem * item) {
+    int len_subprefix = snprintf(NULL, 0, "%s       |     | ", prefix) + 1;
+    char subprefix[len_subprefix];
+    snprintf(subprefix, len_subprefix, "%s       |     | ", prefix);
+
+    fprintf(out, "%s     mn_sched_qitem\n", prefix);
+    fprintf(out, "%s       | .req\n", prefix);
+    print_model_net_request(out, subprefix, &item->req);
+    fprintf(out, "%s       | sched_params.prio = %d\n", prefix, item->sched_params.prio);
+    fprintf(out, "%s       |               rem = %lu\n", prefix, item->rem);
+    fprintf(out, "%s       |      remote_event = %p (contents below)\n", prefix, item->remote_event);
+    tw_fprint_binary_array(out, subprefix, item->remote_event, item->req.remote_event_size);
+    fprintf(out, "%s       |       local_event = %p (contents below)\n", prefix, item->local_event);
+    tw_fprint_binary_array(out, subprefix, item->local_event, item->req.self_event_size);
 }
 
-static void print_fcfs_state(FILE * out, mn_sched_queue *sched) {
-    fprintf(out, "FCFS:\n");
-    fprintf(out, "   |        .method = %p\n", sched->method);
-    fprintf(out, "   | .is_recv_queue = %d\n", sched->is_recv_queue);
-    fprintf(out, "   |     .queue_len = %d\n", sched->queue_len);
-    fprintf(out, "   |      .reqs[%d] = {\n", qlist_count(&sched->reqs));
+static void print_fcfs_state(FILE * out, char const * prefix, mn_sched_queue *sched) {
+    fprintf(out, "%sFCFS:\n", prefix);
+    fprintf(out, "%s   |        .method = %p\n", prefix, sched->method);
+    fprintf(out, "%s   | .is_recv_queue = %d\n", prefix, sched->is_recv_queue);
+    fprintf(out, "%s   |     .queue_len = %d\n", prefix, sched->queue_len);
+    fprintf(out, "%s   |      .reqs[%d] = {\n", prefix, qlist_count(&sched->reqs));
     mn_sched_qitem * sched_qitem = NULL;
     qlist_for_each_entry(sched_qitem, &sched->reqs, ql) {
-         print_mn_sched_qitem(out, sched_qitem);
+         print_mn_sched_qitem(out, prefix, sched_qitem);
     }
-    fprintf(out, "}\n");
+    fprintf(out, "%s   | }\n", prefix);
 }
 
 void rr_init (
diff --git a/src/networks/model-net/core/model-net-sched.c b/src/networks/model-net/core/model-net-sched.c
index ed280e19..4868fcbf 100644
--- a/src/networks/model-net/core/model-net-sched.c
+++ b/src/networks/model-net/core/model-net-sched.c
@@ -115,28 +115,34 @@ bool check_model_net_sched(
 
 static void __print_model_net_sched(
     FILE * out,
+    char const * prefix,
     model_net_sched *sched,
     bool is_lp_state
 ) {
     crv_checkpointer const * chptr = sched_checkpointers[sched->type];
-    fprintf(out, "model_net_sched.sched_type = %d\n", sched->type);
-    fprintf(out, "model_net_sched.\n");
+    fprintf(out, "%smodel_net_sched.sched_type = %d\n", prefix, sched->type);
+    fprintf(out, "%smodel_net_sched.dat = %p\n", prefix, sched->dat);
+
+    int len_subprefix = snprintf(NULL, 0, "%s  | ", prefix) + 1;
+    char subprefix[len_subprefix];
+    snprintf(subprefix, len_subprefix, "%s  | ", prefix);
+
     if (chptr) {
         if (is_lp_state && chptr->print_lp) {
-            chptr->print_lp(out, sched->dat);
+            chptr->print_lp(out, subprefix, sched->dat);
         }
         if (!is_lp_state && chptr->print_checkpoint) {
-            chptr->print_checkpoint(out, sched->dat);
+            chptr->print_checkpoint(out, subprefix, sched->dat);
         }
     }
 }
 
-void print_model_net_sched(FILE * out, model_net_sched *sched) {
-    __print_model_net_sched(out, sched, true);
+void print_model_net_sched(FILE * out, char const * prefix, model_net_sched *sched) {
+    __print_model_net_sched(out, prefix, sched, true);
 }
 
-void print_model_net_sched_checkpoint(FILE * out, model_net_sched *sched) {
-    __print_model_net_sched(out, sched, false);
+void print_model_net_sched_checkpoint(FILE * out, char const * prefix, model_net_sched *sched) {
+    __print_model_net_sched(out, prefix, sched, false);
 }
 /* STOP Checking reverse handler functionality */
 
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 6675ca4b..bb067dcd 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -6926,157 +6926,157 @@ static void router_dally_rc_event_handler(router_state * s, tw_bf * bf,
 
 //*** ---------- START OF reverse handler checking functions ---------- ***
 // Print fuction originally constructed with help from Claude.ai
-static void print_terminal_state(FILE * out, terminal_state * state) {
-    fprintf(out, "terminal_state (dragonfly-dally) ->\n");
-    fprintf(out, "  |           packet_counter = %ld\n", state->packet_counter);
-    fprintf(out, "  |               packet_gen = %d\n", state->packet_gen);
-    fprintf(out, "  |               packet_fin = %d\n", state->packet_fin);
-    fprintf(out, "  |           total_gen_size = %d\n", state->total_gen_size);
-    fprintf(out, "  | *              router_lp = %p\n", state->router_lp);
-    fprintf(out, "  | *              router_id = %p\n", state->router_id);
-    fprintf(out, "  |              terminal_id = %u\n", state->terminal_id);
-    fprintf(out, "  |                  connMan = <DragonflyConnectionManager object>\n");
-    fprintf(out, "  | *local_congestion_controller = %p\n", state->local_congestion_controller);
-    fprintf(out, "  |  workload_lpid_to_app_id = <map object>\n");
-    fprintf(out, "  |                  app_ids = <set object>\n");
-    fprintf(out, "  |  workloads_finished_flag = %d\n", state->workloads_finished_flag);
-    fprintf(out, "  | **          vc_occupancy = %p\n", state->vc_occupancy);
-    fprintf(out, "  | *terminal_available_time = %p\n", state->terminal_available_time);
-    fprintf(out, "  | ***        terminal_msgs = %p\n", state->terminal_msgs);
-    fprintf(out, "  | ***   terminal_msgs_tail = %p\n", state->terminal_msgs_tail);
-    fprintf(out, "  | *           in_send_loop = %p\n", state->in_send_loop);
-    fprintf(out, "  |    dragonfly_stats_array = <mn_stats array>\n");
-    fprintf(out, "  | **            qos_status = %p\n", state->qos_status);
-    fprintf(out, "  | **              qos_data = %p\n", state->qos_data);
-    fprintf(out, "  | *           last_qos_lvl = %p\n", state->last_qos_lvl);
-    fprintf(out, "  |         is_monitoring_bw = %d\n", state->is_monitoring_bw);
-    fprintf(out, "  | *                     st = %p\n", state->st);
-    fprintf(out, "  | *                  cc_st = %p\n", state->cc_st);
-    fprintf(out, "  | *              issueIdle = %p\n", state->issueIdle);
-    fprintf(out, "  | **       terminal_length = %p\n", state->terminal_length);
-    fprintf(out, "  | *                   anno = %s\n", state->anno ? state->anno : "(nil)");
-    fprintf(out, "  | *                 params = %p\n", state->params);
-    fprintf(out, "  | *               rank_tbl = %p\n", state->rank_tbl);
-    fprintf(out, "  |             rank_tbl_pop = %lu\n", state->rank_tbl_pop);
-    fprintf(out, "  |               total_time = %f\n", state->total_time);
-    fprintf(out, "  |           total_msg_size = %lu\n", state->total_msg_size);
-    fprintf(out, "  |               total_hops = %f\n", state->total_hops);
-    fprintf(out, "  |            finished_msgs = %ld\n", state->finished_msgs);
-    fprintf(out, "  |          finished_chunks = %ld\n", state->finished_chunks);
-    fprintf(out, "  |         finished_packets = %ld\n", state->finished_packets);
-    fprintf(out, "  | *          last_buf_full = %p\n", state->last_buf_full);
-    fprintf(out, "  | *              busy_time = %p\n", state->busy_time);
-    fprintf(out, "  | *           link_traffic = %p\n", state->link_traffic);
-    fprintf(out, "  | *           total_chunks = %p\n", state->total_chunks);
-    fprintf(out, "  | *         stalled_chunks = %p\n", state->stalled_chunks);
-    fprintf(out, "  |          injected_chunks = %lu\n", state->injected_chunks);
-    fprintf(out, "  |           ejected_chunks = %lu\n", state->ejected_chunks);
-    fprintf(out, "  |              max_latency = %f\n", state->max_latency);
-    fprintf(out, "  |              min_latency = %f\n", state->min_latency);
-    fprintf(out, "  |               output_buf = '%.4096s'\n", state->output_buf);
-    fprintf(out, "  |              output_buf2 = '%.4096s'\n", state->output_buf2);
-    fprintf(out, "  |        fin_chunks_sample = %ld\n", state->fin_chunks_sample);
-    fprintf(out, "  |         data_size_sample = %ld\n", state->data_size_sample);
-    fprintf(out, "  |          fin_hops_sample = %f\n", state->fin_hops_sample);
-    fprintf(out, "  |          fin_chunks_time = %f\n", state->fin_chunks_time);
-    fprintf(out, "  | *       busy_time_sample = %p\n", state->busy_time_sample);
-    fprintf(out, "  |               sample_buf = '%.4096s'\n", state->sample_buf);
-    fprintf(out, "  | *            sample_stat = %p\n", state->sample_stat);
-    fprintf(out, "  |              op_arr_size = %d\n", state->op_arr_size);
-    fprintf(out, "  |             max_arr_size = %d\n", state->max_arr_size);
-    fprintf(out, "  |               fwd_events = %ld\n", state->fwd_events);
-    fprintf(out, "  |               rev_events = %ld\n", state->rev_events);
-    fprintf(out, "  |   fin_chunks_ross_sample = %ld\n", state->fin_chunks_ross_sample);
-    fprintf(out, "  |    data_size_ross_sample = %ld\n", state->data_size_ross_sample);
-    fprintf(out, "  |     fin_hops_ross_sample = %ld\n", state->fin_hops_ross_sample);
-    fprintf(out, "  | fin_chunks_time_ross_sample = %f\n", state->fin_chunks_time_ross_sample);
-    fprintf(out, "  | *  busy_time_ross_sample = %p\n", state->busy_time_ross_sample);
-    fprintf(out, "  |              ross_sample = <dfly_cn_sample object>\n");
-    fprintf(out, "  |             sent_packets = <map object>\n");
-    fprintf(out, "  |      last_packet_sent_id = %lu\n", state->last_packet_sent_id);
-    fprintf(out, "  |   arrival_of_last_packet = {packet_ID: %lu, travel_end_time: %f}\n", state->arrival_of_last_packet.packet_ID, state->arrival_of_last_packet.travel_end_time);
-    fprintf(out, "  |     remaining_sz_packets = <map object>\n");
-    fprintf(out, "  |       last_in_queue_time = %f\n", state->last_in_queue_time);
-    fprintf(out, "  | *         predictor_data = %p\n", state->predictor_data);
-    fprintf(out, "  |                  zombies = <set object>\n");
-    fprintf(out, "  | *           frozen_state = %p\n", state->frozen_state);
+static void print_terminal_state(FILE * out, char const * prefix, terminal_state * state) {
+    fprintf(out, "%sterminal_state (dragonfly-dally) ->\n", prefix);
+    fprintf(out, "%s  |           packet_counter = %ld\n", prefix, state->packet_counter);
+    fprintf(out, "%s  |               packet_gen = %d\n", prefix, state->packet_gen);
+    fprintf(out, "%s  |               packet_fin = %d\n", prefix, state->packet_fin);
+    fprintf(out, "%s  |           total_gen_size = %d\n", prefix, state->total_gen_size);
+    fprintf(out, "%s  | *              router_lp = %p\n", prefix, state->router_lp);
+    fprintf(out, "%s  | *              router_id = %p\n", prefix, state->router_id);
+    fprintf(out, "%s  |              terminal_id = %u\n", prefix, state->terminal_id);
+    fprintf(out, "%s  |                  connMan = <DragonflyConnectionManager object>\n", prefix);
+    fprintf(out, "%s  | *local_congestion_controller = %p\n", prefix, state->local_congestion_controller);
+    fprintf(out, "%s  |  workload_lpid_to_app_id = <map object>\n", prefix);
+    fprintf(out, "%s  |                  app_ids = <set object>\n", prefix);
+    fprintf(out, "%s  |  workloads_finished_flag = %d\n", prefix, state->workloads_finished_flag);
+    fprintf(out, "%s  | **          vc_occupancy = %p\n", prefix, state->vc_occupancy);
+    fprintf(out, "%s  | *terminal_available_time = %p\n", prefix, state->terminal_available_time);
+    fprintf(out, "%s  | ***        terminal_msgs = %p\n", prefix, state->terminal_msgs);
+    fprintf(out, "%s  | ***   terminal_msgs_tail = %p\n", prefix, state->terminal_msgs_tail);
+    fprintf(out, "%s  | *           in_send_loop = %p\n", prefix, state->in_send_loop);
+    fprintf(out, "%s  |    dragonfly_stats_array = <mn_stats array>\n", prefix);
+    fprintf(out, "%s  | **            qos_status = %p\n", prefix, state->qos_status);
+    fprintf(out, "%s  | **              qos_data = %p\n", prefix, state->qos_data);
+    fprintf(out, "%s  | *           last_qos_lvl = %p\n", prefix, state->last_qos_lvl);
+    fprintf(out, "%s  |         is_monitoring_bw = %d\n", prefix, state->is_monitoring_bw);
+    fprintf(out, "%s  | *                     st = %p\n", prefix, state->st);
+    fprintf(out, "%s  | *                  cc_st = %p\n", prefix, state->cc_st);
+    fprintf(out, "%s  | *              issueIdle = %p\n", prefix, state->issueIdle);
+    fprintf(out, "%s  | **       terminal_length = %p\n", prefix, state->terminal_length);
+    fprintf(out, "%s  | *                   anno = %s\n", prefix, state->anno ? state->anno : "(nil)");
+    fprintf(out, "%s  | *                 params = %p\n", prefix, state->params);
+    fprintf(out, "%s  | *               rank_tbl = %p\n", prefix, state->rank_tbl);
+    fprintf(out, "%s  |             rank_tbl_pop = %lu\n", prefix, state->rank_tbl_pop);
+    fprintf(out, "%s  |               total_time = %f\n", prefix, state->total_time);
+    fprintf(out, "%s  |           total_msg_size = %lu\n", prefix, state->total_msg_size);
+    fprintf(out, "%s  |               total_hops = %f\n", prefix, state->total_hops);
+    fprintf(out, "%s  |            finished_msgs = %ld\n", prefix, state->finished_msgs);
+    fprintf(out, "%s  |          finished_chunks = %ld\n", prefix, state->finished_chunks);
+    fprintf(out, "%s  |         finished_packets = %ld\n", prefix, state->finished_packets);
+    fprintf(out, "%s  | *          last_buf_full = %p\n", prefix, state->last_buf_full);
+    fprintf(out, "%s  | *              busy_time = %p\n", prefix, state->busy_time);
+    fprintf(out, "%s  | *           link_traffic = %p\n", prefix, state->link_traffic);
+    fprintf(out, "%s  | *           total_chunks = %p\n", prefix, state->total_chunks);
+    fprintf(out, "%s  | *         stalled_chunks = %p\n", prefix, state->stalled_chunks);
+    fprintf(out, "%s  |          injected_chunks = %lu\n", prefix, state->injected_chunks);
+    fprintf(out, "%s  |           ejected_chunks = %lu\n", prefix, state->ejected_chunks);
+    fprintf(out, "%s  |              max_latency = %f\n", prefix, state->max_latency);
+    fprintf(out, "%s  |              min_latency = %f\n", prefix, state->min_latency);
+    fprintf(out, "%s  |               output_buf = '%.4096s'\n", prefix, state->output_buf);
+    fprintf(out, "%s  |              output_buf2 = '%.4096s'\n", prefix, state->output_buf2);
+    fprintf(out, "%s  |        fin_chunks_sample = %ld\n", prefix, state->fin_chunks_sample);
+    fprintf(out, "%s  |         data_size_sample = %ld\n", prefix, state->data_size_sample);
+    fprintf(out, "%s  |          fin_hops_sample = %f\n", prefix, state->fin_hops_sample);
+    fprintf(out, "%s  |          fin_chunks_time = %f\n", prefix, state->fin_chunks_time);
+    fprintf(out, "%s  | *       busy_time_sample = %p\n", prefix, state->busy_time_sample);
+    fprintf(out, "%s  |               sample_buf = '%.4096s'\n", prefix, state->sample_buf);
+    fprintf(out, "%s  | *            sample_stat = %p\n", prefix, state->sample_stat);
+    fprintf(out, "%s  |              op_arr_size = %d\n", prefix, state->op_arr_size);
+    fprintf(out, "%s  |             max_arr_size = %d\n", prefix, state->max_arr_size);
+    fprintf(out, "%s  |               fwd_events = %ld\n", prefix, state->fwd_events);
+    fprintf(out, "%s  |               rev_events = %ld\n", prefix, state->rev_events);
+    fprintf(out, "%s  |   fin_chunks_ross_sample = %ld\n", prefix, state->fin_chunks_ross_sample);
+    fprintf(out, "%s  |    data_size_ross_sample = %ld\n", prefix, state->data_size_ross_sample);
+    fprintf(out, "%s  |     fin_hops_ross_sample = %ld\n", prefix, state->fin_hops_ross_sample);
+    fprintf(out, "%s  | fin_chunks_time_ross_sample = %f\n", prefix, state->fin_chunks_time_ross_sample);
+    fprintf(out, "%s  | *  busy_time_ross_sample = %p\n", prefix, state->busy_time_ross_sample);
+    fprintf(out, "%s  |              ross_sample = <dfly_cn_sample object>\n", prefix);
+    fprintf(out, "%s  |             sent_packets = <map object>\n", prefix);
+    fprintf(out, "%s  |      last_packet_sent_id = %lu\n", prefix, state->last_packet_sent_id);
+    fprintf(out, "%s  |   arrival_of_last_packet = {packet_ID: %lu, travel_end_time: %f}\n", prefix, state->arrival_of_last_packet.packet_ID, state->arrival_of_last_packet.travel_end_time);
+    fprintf(out, "%s  |     remaining_sz_packets = <map object>\n", prefix);
+    fprintf(out, "%s  |       last_in_queue_time = %f\n", prefix, state->last_in_queue_time);
+    fprintf(out, "%s  | *         predictor_data = %p\n", prefix, state->predictor_data);
+    fprintf(out, "%s  |                  zombies = <set object>\n", prefix);
+    fprintf(out, "%s  | *           frozen_state = %p\n", prefix, state->frozen_state);
 }
 
 // Print fuction originally constructed with help from Claude.ai
-static void print_terminal_dally_message(FILE * out, struct terminal_dally_message * msg) {
-    fprintf(out, "terminal_dally_message ->\n");
-    fprintf(out, "  |                      magic = %d\n", msg->magic);
-    fprintf(out, "  |          travel_start_time = %f\n", msg->travel_start_time);
-    fprintf(out, "  |            travel_end_time = %f\n", msg->travel_end_time);
-    fprintf(out, "  |                  packet_ID = %llu\n", msg->packet_ID);
-    fprintf(out, "  |                       type = %d\n", msg->type);
-    fprintf(out, "  |                notify_type = %d\n", msg->notify_type);
-    fprintf(out, "  |                   category = %s\n", msg->category);
-    fprintf(out, "  |             final_dest_gid = %lu\n", msg->final_dest_gid);
-    fprintf(out, "  |                  sender_lp = %lu\n", msg->sender_lp);
-    fprintf(out, "  |               sender_mn_lp = %lu\n", msg->sender_mn_lp);
-    fprintf(out, "  |         dest_terminal_lpid = %lu\n", msg->dest_terminal_lpid);
-    fprintf(out, "  |    dfdally_src_terminal_id = %u\n", msg->dfdally_src_terminal_id);
-    fprintf(out, "  |   dfdally_dest_terminal_id = %u\n", msg->dfdally_dest_terminal_id);
-    fprintf(out, "  |            src_terminal_id = %u\n", msg->src_terminal_id);
-    fprintf(out, "  |           origin_router_id = %u\n", msg->origin_router_id);
-    fprintf(out, "  |                     app_id = %d\n", msg->app_id);
-    fprintf(out, "  |                   my_N_hop = %d\n", msg->my_N_hop);
-    fprintf(out, "  |                   my_l_hop = %d\n", msg->my_l_hop);
-    fprintf(out, "  |                   my_g_hop = %d\n", msg->my_g_hop);
-    fprintf(out, "  |          my_hops_cur_group = %d\n", msg->my_hops_cur_group);
-    fprintf(out, "  |              saved_channel = %d\n", msg->saved_channel);
-    fprintf(out, "  |                   saved_vc = %d\n", msg->saved_vc);
-    fprintf(out, "  |                  next_stop = %d\n", msg->next_stop);
-    fprintf(out, "  |        this_router_arrival = %f\n", msg->this_router_arrival);
-    fprintf(out, "  |    this_router_ptp_latency = %f\n", msg->this_router_ptp_latency);
-    fprintf(out, "  |                 intm_lp_id = %u\n", msg->intm_lp_id);
-    fprintf(out, "  |                   last_hop = %d\n", msg->last_hop);
-    fprintf(out, "  |            is_intm_visited = %d\n", msg->is_intm_visited);
-    fprintf(out, "  |                intm_rtr_id = %d\n", msg->intm_rtr_id);
-    fprintf(out, "  |                intm_grp_id = %d\n", msg->intm_grp_id);
-    fprintf(out, "  |             saved_src_dest = %d\n", msg->saved_src_dest);
-    fprintf(out, "  |             saved_src_chan = %d\n", msg->saved_src_chan);
-    fprintf(out, "  |                   chunk_id = %u\n", msg->chunk_id);
-    fprintf(out, "  |                packet_size = %u\n", msg->packet_size);
-    fprintf(out, "  |                 message_id = %u\n", msg->message_id);
-    fprintf(out, "  |                 total_size = %u\n", msg->total_size);
-    fprintf(out, "  |    remote_event_size_bytes = %d\n", msg->remote_event_size_bytes);
-    fprintf(out, "  |     local_event_size_bytes = %d\n", msg->local_event_size_bytes);
-    fprintf(out, "  |                   vc_index = %d\n", msg->vc_index);
-    fprintf(out, "  |                    rail_id = %d\n", msg->rail_id);
-    fprintf(out, "  |                output_chan = %d\n", msg->output_chan);
-    fprintf(out, "  |                   event_rc = <model_net_event_return object>\n");
-    fprintf(out, "  |                    is_pull = %d\n", msg->is_pull);
-    fprintf(out, "  |                  pull_size = %u\n", msg->pull_size);
-    fprintf(out, "  |                  path_type = %d\n", msg->path_type);
-    fprintf(out, "  |               saved_app_id = %d\n", msg->saved_app_id);
-    fprintf(out, "  | is_there_another_pckt_in_queue = %s\n", msg->is_there_another_pckt_in_queue ? "true" : "false");
-    fprintf(out, "  |                   num_rngs = %d\n", msg->num_rngs);
-    fprintf(out, "  |                    num_cll = %d\n", msg->num_cll);
-    fprintf(out, "  |             last_saved_qos = %d\n", msg->last_saved_qos);
-    fprintf(out, "  |                 qos_reset1 = %d\n", msg->qos_reset1);
-    fprintf(out, "  |                 qos_reset2 = %d\n", msg->qos_reset2);
-    fprintf(out, "  |              rc_is_qos_set = %d\n", msg->rc_is_qos_set);
-    fprintf(out, "  | *              rc_qos_data = %p\n", msg->rc_qos_data);
-    fprintf(out, "  | *            rc_qos_status = %p\n", msg->rc_qos_status);
-    fprintf(out, "  |            saved_send_loop = %d\n", msg->saved_send_loop);
-    fprintf(out, "  |       saved_available_time = %f\n", msg->saved_available_time);
-    fprintf(out, "  |              saved_min_lat = %f\n", msg->saved_min_lat);
-    fprintf(out, "  |             saved_avg_time = %f\n", msg->saved_avg_time);
-    fprintf(out, "  |             saved_rcv_time = %f\n", msg->saved_rcv_time);
-    fprintf(out, "  |            saved_busy_time = %f\n", msg->saved_busy_time);
-    fprintf(out, "  |           saved_total_time = %f\n", msg->saved_total_time);
-    fprintf(out, "  |          saved_sample_time = %f\n", msg->saved_sample_time);
-    fprintf(out, "  |             msg_start_time = %f\n", msg->msg_start_time);
-    fprintf(out, "  |       saved_busy_time_ross = %f\n", msg->saved_busy_time_ross);
-    fprintf(out, "  |      saved_fin_chunks_ross = %f\n", msg->saved_fin_chunks_ross);
-    fprintf(out, "  |   saved_last_in_queue_time = %f\n", msg->saved_last_in_queue_time);
-    fprintf(out, "  |    saved_next_packet_delay = %f\n", msg->saved_next_packet_delay);
-    fprintf(out, "  |           msg_new_mn_event = %f\n", msg->msg_new_mn_event);
-    fprintf(out, "  |         last_received_time = %f\n", msg->last_received_time);
-    fprintf(out, "  |             last_sent_time = %f\n", msg->last_sent_time);
-    fprintf(out, "  |        last_bufupdate_time = %f\n", msg->last_bufupdate_time);
+static void print_terminal_dally_message(FILE * out, char const * prefix, struct terminal_dally_message * msg) {
+    fprintf(out, "%sterminal_dally_message ->\n", prefix);
+    fprintf(out, "%s  |                      magic = %d\n", prefix, msg->magic);
+    fprintf(out, "%s  |          travel_start_time = %f\n", prefix, msg->travel_start_time);
+    fprintf(out, "%s  |            travel_end_time = %f\n", prefix, msg->travel_end_time);
+    fprintf(out, "%s  |                  packet_ID = %llu\n", prefix, msg->packet_ID);
+    fprintf(out, "%s  |                       type = %d\n", prefix, msg->type);
+    fprintf(out, "%s  |                notify_type = %d\n", prefix, msg->notify_type);
+    fprintf(out, "%s  |                   category = %s\n", prefix, msg->category);
+    fprintf(out, "%s  |             final_dest_gid = %lu\n", prefix, msg->final_dest_gid);
+    fprintf(out, "%s  |                  sender_lp = %lu\n", prefix, msg->sender_lp);
+    fprintf(out, "%s  |               sender_mn_lp = %lu\n", prefix, msg->sender_mn_lp);
+    fprintf(out, "%s  |         dest_terminal_lpid = %lu\n", prefix, msg->dest_terminal_lpid);
+    fprintf(out, "%s  |    dfdally_src_terminal_id = %u\n", prefix, msg->dfdally_src_terminal_id);
+    fprintf(out, "%s  |   dfdally_dest_terminal_id = %u\n", prefix, msg->dfdally_dest_terminal_id);
+    fprintf(out, "%s  |            src_terminal_id = %u\n", prefix, msg->src_terminal_id);
+    fprintf(out, "%s  |           origin_router_id = %u\n", prefix, msg->origin_router_id);
+    fprintf(out, "%s  |                     app_id = %d\n", prefix, msg->app_id);
+    fprintf(out, "%s  |                   my_N_hop = %d\n", prefix, msg->my_N_hop);
+    fprintf(out, "%s  |                   my_l_hop = %d\n", prefix, msg->my_l_hop);
+    fprintf(out, "%s  |                   my_g_hop = %d\n", prefix, msg->my_g_hop);
+    fprintf(out, "%s  |          my_hops_cur_group = %d\n", prefix, msg->my_hops_cur_group);
+    fprintf(out, "%s  |              saved_channel = %d\n", prefix, msg->saved_channel);
+    fprintf(out, "%s  |                   saved_vc = %d\n", prefix, msg->saved_vc);
+    fprintf(out, "%s  |                  next_stop = %d\n", prefix, msg->next_stop);
+    fprintf(out, "%s  |        this_router_arrival = %f\n", prefix, msg->this_router_arrival);
+    fprintf(out, "%s  |    this_router_ptp_latency = %f\n", prefix, msg->this_router_ptp_latency);
+    fprintf(out, "%s  |                 intm_lp_id = %u\n", prefix, msg->intm_lp_id);
+    fprintf(out, "%s  |                   last_hop = %d\n", prefix, msg->last_hop);
+    fprintf(out, "%s  |            is_intm_visited = %d\n", prefix, msg->is_intm_visited);
+    fprintf(out, "%s  |                intm_rtr_id = %d\n", prefix, msg->intm_rtr_id);
+    fprintf(out, "%s  |                intm_grp_id = %d\n", prefix, msg->intm_grp_id);
+    fprintf(out, "%s  |             saved_src_dest = %d\n", prefix, msg->saved_src_dest);
+    fprintf(out, "%s  |             saved_src_chan = %d\n", prefix, msg->saved_src_chan);
+    fprintf(out, "%s  |                   chunk_id = %u\n", prefix, msg->chunk_id);
+    fprintf(out, "%s  |                packet_size = %u\n", prefix, msg->packet_size);
+    fprintf(out, "%s  |                 message_id = %u\n", prefix, msg->message_id);
+    fprintf(out, "%s  |                 total_size = %u\n", prefix, msg->total_size);
+    fprintf(out, "%s  |    remote_event_size_bytes = %d\n", prefix, msg->remote_event_size_bytes);
+    fprintf(out, "%s  |     local_event_size_bytes = %d\n", prefix, msg->local_event_size_bytes);
+    fprintf(out, "%s  |                   vc_index = %d\n", prefix, msg->vc_index);
+    fprintf(out, "%s  |                    rail_id = %d\n", prefix, msg->rail_id);
+    fprintf(out, "%s  |                output_chan = %d\n", prefix, msg->output_chan);
+    fprintf(out, "%s  |                   event_rc = <model_net_event_return object>\n", prefix);
+    fprintf(out, "%s  |                    is_pull = %d\n", prefix, msg->is_pull);
+    fprintf(out, "%s  |                  pull_size = %u\n", prefix, msg->pull_size);
+    fprintf(out, "%s  |                  path_type = %d\n", prefix, msg->path_type);
+    fprintf(out, "%s  |               saved_app_id = %d\n", prefix, msg->saved_app_id);
+    fprintf(out, "%s  | is_there_another_pckt_in_queue = %s\n", prefix, msg->is_there_another_pckt_in_queue ? "true" : "false");
+    fprintf(out, "%s  |                   num_rngs = %d\n", prefix, msg->num_rngs);
+    fprintf(out, "%s  |                    num_cll = %d\n", prefix, msg->num_cll);
+    fprintf(out, "%s  |             last_saved_qos = %d\n", prefix, msg->last_saved_qos);
+    fprintf(out, "%s  |                 qos_reset1 = %d\n", prefix, msg->qos_reset1);
+    fprintf(out, "%s  |                 qos_reset2 = %d\n", prefix, msg->qos_reset2);
+    fprintf(out, "%s  |              rc_is_qos_set = %d\n", prefix, msg->rc_is_qos_set);
+    fprintf(out, "%s  | *              rc_qos_data = %p\n", prefix, msg->rc_qos_data);
+    fprintf(out, "%s  | *            rc_qos_status = %p\n", prefix, msg->rc_qos_status);
+    fprintf(out, "%s  |            saved_send_loop = %d\n", prefix, msg->saved_send_loop);
+    fprintf(out, "%s  |       saved_available_time = %f\n", prefix, msg->saved_available_time);
+    fprintf(out, "%s  |              saved_min_lat = %f\n", prefix, msg->saved_min_lat);
+    fprintf(out, "%s  |             saved_avg_time = %f\n", prefix, msg->saved_avg_time);
+    fprintf(out, "%s  |             saved_rcv_time = %f\n", prefix, msg->saved_rcv_time);
+    fprintf(out, "%s  |            saved_busy_time = %f\n", prefix, msg->saved_busy_time);
+    fprintf(out, "%s  |           saved_total_time = %f\n", prefix, msg->saved_total_time);
+    fprintf(out, "%s  |          saved_sample_time = %f\n", prefix, msg->saved_sample_time);
+    fprintf(out, "%s  |             msg_start_time = %f\n", prefix, msg->msg_start_time);
+    fprintf(out, "%s  |       saved_busy_time_ross = %f\n", prefix, msg->saved_busy_time_ross);
+    fprintf(out, "%s  |      saved_fin_chunks_ross = %f\n", prefix, msg->saved_fin_chunks_ross);
+    fprintf(out, "%s  |   saved_last_in_queue_time = %f\n", prefix, msg->saved_last_in_queue_time);
+    fprintf(out, "%s  |    saved_next_packet_delay = %f\n", prefix, msg->saved_next_packet_delay);
+    fprintf(out, "%s  |           msg_new_mn_event = %f\n", prefix, msg->msg_new_mn_event);
+    fprintf(out, "%s  |         last_received_time = %f\n", prefix, msg->last_received_time);
+    fprintf(out, "%s  |             last_sent_time = %f\n", prefix, msg->last_sent_time);
+    fprintf(out, "%s  |        last_bufupdate_time = %f\n", prefix, msg->last_bufupdate_time);
 }
 //*** ---------- END OF reverse handler checking functions ---------- ***
 
diff --git a/src/workload/codes-workload.c b/src/workload/codes-workload.c
index a66e517a..45efc8c0 100644
--- a/src/workload/codes-workload.c
+++ b/src/workload/codes-workload.c
@@ -591,79 +591,79 @@ char const * const op_type_string(enum codes_workload_op_type op_type) {
 }
 
 // Initial implementation by Claude.ai
-void fprint_codes_workload_op(FILE * out, struct codes_workload_op * op, char const * const begin) {
+void fprint_codes_workload_op(FILE * out, char const * prefix, struct codes_workload_op * op) {
     if (op == NULL) {
         return;
     }
 
     // Print common fields first
-    fprintf(out, "%sop_type = %s\n", begin, op_type_string(op->op_type));
+    fprintf(out, "%sop_type = %s\n", prefix, op_type_string(op->op_type));
 
-    fprintf(out, "%s          start_time = %f\n", begin, op->start_time);
-    fprintf(out, "%s            end_time = %f\n", begin, op->end_time);
-    fprintf(out, "%s      sim_start_time = %f\n", begin, op->sim_start_time);
-    fprintf(out, "%s         sequence_id = %ld\n", begin, op->sequence_id);
+    fprintf(out, "%s          start_time = %f\n", prefix, op->start_time);
+    fprintf(out, "%s            end_time = %f\n", prefix, op->end_time);
+    fprintf(out, "%s      sim_start_time = %f\n", prefix, op->sim_start_time);
+    fprintf(out, "%s         sequence_id = %ld\n", prefix, op->sequence_id);
 
     // Print union fields based on op_type
     switch(op->op_type) {
         case CODES_WK_DELAY:
-            fprintf(out, "%s       delay.seconds = %f\n", begin, op->u.delay.seconds);
-            fprintf(out, "%s         delay.nsecs = %f\n", begin, op->u.delay.nsecs);
+            fprintf(out, "%s       delay.seconds = %f\n", prefix, op->u.delay.seconds);
+            fprintf(out, "%s         delay.nsecs = %f\n", prefix, op->u.delay.nsecs);
             break;
 
         case CODES_WK_BARRIER:
-            fprintf(out, "%s       barrier.count = %d\n", begin, op->u.barrier.count);
-            fprintf(out, "%s        barrier.root = %d\n", begin, op->u.barrier.root);
+            fprintf(out, "%s       barrier.count = %d\n", prefix, op->u.barrier.count);
+            fprintf(out, "%s        barrier.root = %d\n", prefix, op->u.barrier.root);
             break;
 
         case CODES_WK_OPEN:
         case CODES_WK_MPI_OPEN:
         case CODES_WK_MPI_COLL_OPEN:
-            fprintf(out, "%s        open.file_id = %lu\n", begin, op->u.open.file_id);
-            fprintf(out, "%s    open.create_flag = %d\n", begin, op->u.open.create_flag);
+            fprintf(out, "%s        open.file_id = %lu\n", prefix, op->u.open.file_id);
+            fprintf(out, "%s    open.create_flag = %d\n", prefix, op->u.open.create_flag);
             break;
 
         case CODES_WK_WRITE:
         case CODES_WK_MPI_WRITE:
         case CODES_WK_MPI_COLL_WRITE:
-            fprintf(out, "%s       write.file_id = %lu\n", begin, op->u.write.file_id);
-            fprintf(out, "%s        write.offset = %ld\n", begin, op->u.write.offset);
-            fprintf(out, "%s          write.size = %zu\n", begin, op->u.write.size);
+            fprintf(out, "%s       write.file_id = %lu\n", prefix, op->u.write.file_id);
+            fprintf(out, "%s        write.offset = %ld\n", prefix, op->u.write.offset);
+            fprintf(out, "%s          write.size = %zu\n", prefix, op->u.write.size);
             break;
 
         case CODES_WK_READ:
         case CODES_WK_MPI_READ:
         case CODES_WK_MPI_COLL_READ:
-            fprintf(out, "%s        read.file_id = %lu\n", begin, op->u.read.file_id);
-            fprintf(out, "%s         read.offset = %ld\n", begin, op->u.read.offset);
-            fprintf(out, "%s           read.size = %zu\n", begin, op->u.read.size);
+            fprintf(out, "%s        read.file_id = %lu\n", prefix, op->u.read.file_id);
+            fprintf(out, "%s         read.offset = %ld\n", prefix, op->u.read.offset);
+            fprintf(out, "%s           read.size = %zu\n", prefix, op->u.read.size);
             break;
 
         case CODES_WK_CLOSE:
         case CODES_WK_MPI_CLOSE:
-            fprintf(out, "%s       close.file_id = %lu\n", begin, op->u.close.file_id);
+            fprintf(out, "%s       close.file_id = %lu\n", prefix, op->u.close.file_id);
             break;
 
         case CODES_WK_SEND:
         case CODES_WK_ISEND:
-            fprintf(out, "%s    send.source_rank = %d\n", begin, op->u.send.source_rank);
-            fprintf(out, "%s      send.dest_rank = %d\n", begin, op->u.send.dest_rank);
-            fprintf(out, "%s      send.num_bytes = %ld\n", begin, op->u.send.num_bytes);
-            fprintf(out, "%s      send.data_type = %d\n", begin, op->u.send.data_type);
-            fprintf(out, "%s          send.count = %d\n", begin, op->u.send.count);
-            fprintf(out, "%s            send.tag = %d\n", begin, op->u.send.tag);
-            fprintf(out, "%s         send.req_id = %u\n", begin, op->u.send.req_id);
+            fprintf(out, "%s    send.source_rank = %d\n", prefix, op->u.send.source_rank);
+            fprintf(out, "%s      send.dest_rank = %d\n", prefix, op->u.send.dest_rank);
+            fprintf(out, "%s      send.num_bytes = %ld\n", prefix, op->u.send.num_bytes);
+            fprintf(out, "%s      send.data_type = %d\n", prefix, op->u.send.data_type);
+            fprintf(out, "%s          send.count = %d\n", prefix, op->u.send.count);
+            fprintf(out, "%s            send.tag = %d\n", prefix, op->u.send.tag);
+            fprintf(out, "%s         send.req_id = %u\n", prefix, op->u.send.req_id);
             break;
 
         case CODES_WK_RECV:
         case CODES_WK_IRECV:
-            fprintf(out, "%s    recv.source_rank = %d\n", begin, op->u.recv.source_rank);
-            fprintf(out, "%s      recv.dest_rank = %d\n", begin, op->u.recv.dest_rank);
-            fprintf(out, "%s      recv.num_bytes = %ld\n", begin, op->u.recv.num_bytes);
-            fprintf(out, "%s      recv.data_type = %d\n", begin, op->u.recv.data_type);
-            fprintf(out, "%s          recv.count = %d\n", begin, op->u.recv.count);
-            fprintf(out, "%s            recv.tag = %d\n", begin, op->u.recv.tag);
-            fprintf(out, "%s         recv.req_id = %u\n", begin, op->u.recv.req_id);
+            fprintf(out, "%s    recv.source_rank = %d\n", prefix, op->u.recv.source_rank);
+            fprintf(out, "%s      recv.dest_rank = %d\n", prefix, op->u.recv.dest_rank);
+            fprintf(out, "%s      recv.num_bytes = %ld\n", prefix, op->u.recv.num_bytes);
+            fprintf(out, "%s      recv.data_type = %d\n", prefix, op->u.recv.data_type);
+            fprintf(out, "%s          recv.count = %d\n", prefix, op->u.recv.count);
+            fprintf(out, "%s            recv.tag = %d\n", prefix, op->u.recv.tag);
+            fprintf(out, "%s         recv.req_id = %u\n", prefix, op->u.recv.req_id);
             break;
 
         case CODES_WK_COL:
@@ -674,23 +674,23 @@ void fprint_codes_workload_op(FILE * out, struct codes_workload_op * op, char co
         case CODES_WK_ALLTOALLV:
         case CODES_WK_REDUCE:
         case CODES_WK_ALLREDUCE:
-            fprintf(out, "%scollective.num_bytes = %d\n", begin, op->u.collective.num_bytes);
+            fprintf(out, "%scollective.num_bytes = %d\n", prefix, op->u.collective.num_bytes);
             break;
 
         case CODES_WK_WAITALL:
         case CODES_WK_WAITSOME:
         case CODES_WK_WAITANY:
         case CODES_WK_TESTALL:
-            fprintf(out, "%s         waits.count = %d\n", begin, op->u.waits.count);
-            fprintf(out, "%s      waits.req_ids  = %p\n", begin, op->u.waits.req_ids);
+            fprintf(out, "%s         waits.count = %d\n", prefix, op->u.waits.count);
+            fprintf(out, "%s      waits.req_ids  = %p\n", prefix, op->u.waits.req_ids);
             break;
 
         case CODES_WK_WAIT:
-            fprintf(out, "%s         wait.req_id = %u\n", begin, op->u.wait.req_id);
+            fprintf(out, "%s         wait.req_id = %u\n", prefix, op->u.wait.req_id);
             break;
 
         case CODES_WK_REQ_FREE:
-            fprintf(out, "%s         free.req_id = %u\n", begin, op->u.free.req_id);
+            fprintf(out, "%s         free.req_id = %u\n", prefix, op->u.free.req_id);
             break;
 
         case CODES_WK_END:

From ca89cf14591bcae883f3b535c6ff64763b04af6b Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 7 Mar 2025 17:14:43 -0500
Subject: [PATCH 111/188] Small implementation fixes (typo and exporting
 function name)

---
 codes/net/dragonfly-dally.h                | 2 ++
 src/networks/model-net/core/model-net-lp.c | 6 +++++-
 src/networks/model-net/dragonfly-dally.C   | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index b5d93b88..dc73d145 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -136,6 +136,8 @@ struct terminal_dally_message
    tw_stime last_bufupdate_time;
 };
 
+void print_terminal_dally_message(FILE * out, char const * prefix, struct terminal_dally_message * msg);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 1a065c8f..92d3dfe1 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -1176,7 +1176,7 @@ static void save_state_net_state(model_net_base_state * into, model_net_base_sta
 
     into->sub_state = NULL;
     crv_checkpointer * chptr = method_array[from->net_id]->checkpointer;
-    if (chptr && chptr->check_lps) {
+    if (chptr && chptr->save_lp) {
         into->sub_state = calloc(1, from->sub_type->state_sz);
         chptr->save_lp(into->sub_state, from->sub_state);
     }
@@ -1201,6 +1201,10 @@ static void clean_state_net_state(model_net_base_state * state) {
     free(state->sched_recv);
 
     if (state->sub_state != NULL) {
+        crv_checkpointer * chptr = method_array[state->net_id]->checkpointer;
+        if (chptr && chptr->clean_lp) {
+            chptr->clean_lp(state->sub_state);
+        }
         free(state->sub_state);
     }
     free(state->node_copy_next_available_time);
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index bb067dcd..9cb3a6fe 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -7003,7 +7003,7 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
 }
 
 // Print fuction originally constructed with help from Claude.ai
-static void print_terminal_dally_message(FILE * out, char const * prefix, struct terminal_dally_message * msg) {
+void print_terminal_dally_message(FILE * out, char const * prefix, struct terminal_dally_message * msg) {
     fprintf(out, "%sterminal_dally_message ->\n", prefix);
     fprintf(out, "%s  |                      magic = %d\n", prefix, msg->magic);
     fprintf(out, "%s  |          travel_start_time = %f\n", prefix, msg->travel_start_time);

From 2dd6db52f5dcc3ec6cf2d470191736b42e122438 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 7 Mar 2025 17:16:55 -0500
Subject: [PATCH 112/188] Implementing base deep-copy/clean/comparison/print
 for dragonfly lps

---
 src/networks/model-net/dragonfly-dally.C | 216 +++++++++++++++++------
 1 file changed, 158 insertions(+), 58 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 9cb3a6fe..b986ab32 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -517,9 +517,6 @@ struct terminal_state
     DragonflyConnectionManager connMan;
     tlc_state *local_congestion_controller;
 
-    map<tw_lpid, int> workload_lpid_to_app_id;
-    set<int> app_ids;
-
     int workloads_finished_flag;
 
     int** vc_occupancy; // vc_occupancies [rail_id][qos_level]
@@ -596,10 +593,10 @@ struct terminal_state
     // Variables to recover latency of packets sent to other terminals
     // Sent packets (to be populated at by commit handler of packet sender)
     map<uint64_t, struct packet_sent> sent_packets;
-    uint64_t last_packet_sent_id;
+    int64_t last_packet_sent_id;
     // We need the next packet to be injected in the network before feeding the packet info forward (the predictor needs starting time, delay to send next packet and latency)
     struct {
-        uint64_t packet_ID;
+        int64_t packet_ID;
         double travel_end_time;
     } arrival_of_last_packet;
     // received (and not completed, yet) packets. The value associated to a key is the remaining number of "bytes" to receive before the packet is consumed totally. If a packet size == chunk size, this map will never be used/filled
@@ -3039,12 +3036,12 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(
     s->finished_msgs                = frozen_state->finished_msgs;
     s->rank_tbl_pop                 = frozen_state->rank_tbl_pop;
     s->last_packet_sent_id          = frozen_state->last_packet_sent_id;
+    s->rank_tbl                     = frozen_state->rank_tbl;
+    s->st                           = frozen_state->st;
     memcpy(&s->arrival_of_last_packet, &frozen_state->arrival_of_last_packet, sizeof(s->arrival_of_last_packet));
     memcpy(&s->zombies,              &frozen_state->zombies,              sizeof(s->zombies));
     memcpy(&s->sent_packets,         &frozen_state->sent_packets,         sizeof(s->sent_packets));
     memcpy(&s->remaining_sz_packets, &frozen_state->remaining_sz_packets, sizeof(s->remaining_sz_packets));
-    memcpy(&s->rank_tbl,             &frozen_state->rank_tbl,             sizeof(s->rank_tbl));
-    memcpy(&s->st,                   &frozen_state->st,                   sizeof(s->st));
 
     s->frozen_state = frozen_state;
 };
@@ -3080,12 +3077,12 @@ static void dragonfly_dally_terminal_surrogate_to_highdef(
     frozen_state->finished_msgs                = s->finished_msgs;
     frozen_state->rank_tbl_pop                 = s->rank_tbl_pop;
     frozen_state->last_packet_sent_id          = s->last_packet_sent_id;
+    frozen_state->rank_tbl                     = s->rank_tbl;
+    frozen_state->st                           = s->st;
     memcpy(&frozen_state->arrival_of_last_packet, &s->arrival_of_last_packet, sizeof(s->arrival_of_last_packet));
     memcpy(&frozen_state->zombies,              &s->zombies,              sizeof(s->zombies));
     memcpy(&frozen_state->sent_packets,         &s->sent_packets,         sizeof(s->sent_packets));
     memcpy(&frozen_state->remaining_sz_packets, &s->remaining_sz_packets, sizeof(s->remaining_sz_packets));
-    memcpy(&frozen_state->rank_tbl,             &s->rank_tbl,             sizeof(s->rank_tbl));
-    memcpy(&frozen_state->st,                   &s->st,                   sizeof(s->st));
     memcpy(s, frozen_state, sizeof(terminal_state));
     memset(frozen_state, 0, sizeof(terminal_state));
     free(frozen_state);
@@ -3472,9 +3469,6 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp )
         codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, NULL, 1, s->router_id[i] / num_routers_per_mgrp, s->router_id[i] % num_routers_per_mgrp, &s->router_lp[i]);
     }
 
-    s->workload_lpid_to_app_id = map<tw_lpid, int>();
-    s->app_ids = set<int>();
-
     s->terminal_available_time = (tw_stime*)calloc(p->num_rails, sizeof(tw_stime));
     s->packet_counter = 0;
     s->min_latency = INT_MAX;
@@ -6925,6 +6919,103 @@ static void router_dally_rc_event_handler(router_state * s, tw_bf * bf,
 }
 
 //*** ---------- START OF reverse handler checking functions ---------- ***
+static void save_terminal_state(terminal_state *into, terminal_state const *from) {
+    memcpy(into, from, sizeof(terminal_state));
+}
+
+static void clean_terminal_state(terminal_state *state) {
+}
+
+static bool check_terminal_state(terminal_state *before, terminal_state *after) {
+    bool is_same = true;
+
+    // Compare scalar values
+    is_same &= (before->packet_counter == after->packet_counter);
+    is_same &= (before->packet_gen == after->packet_gen);
+    is_same &= (before->packet_fin == after->packet_fin);
+    is_same &= (before->total_gen_size == after->total_gen_size);
+    is_same &= (before->terminal_id == after->terminal_id);
+    is_same &= (before->workloads_finished_flag == after->workloads_finished_flag);
+    is_same &= (before->is_monitoring_bw == after->is_monitoring_bw);
+    is_same &= (before->rank_tbl_pop == after->rank_tbl_pop);
+    is_same &= (before->total_time == after->total_time);
+    is_same &= (before->total_msg_size == after->total_msg_size);
+    is_same &= (before->total_hops == after->total_hops);
+    is_same &= (before->finished_msgs == after->finished_msgs);
+    is_same &= (before->finished_chunks == after->finished_chunks);
+    is_same &= (before->finished_packets == after->finished_packets);
+    is_same &= (before->injected_chunks == after->injected_chunks);
+    is_same &= (before->ejected_chunks == after->ejected_chunks);
+    is_same &= (before->max_latency == after->max_latency);
+    is_same &= (before->min_latency == after->min_latency);
+    is_same &= (before->fin_chunks_sample == after->fin_chunks_sample);
+    is_same &= (before->data_size_sample == after->data_size_sample);
+    is_same &= (before->fin_hops_sample == after->fin_hops_sample);
+    is_same &= (before->fin_chunks_time == after->fin_chunks_time);
+    is_same &= (before->op_arr_size == after->op_arr_size);
+    is_same &= (before->max_arr_size == after->max_arr_size);
+    //is_same &= (before->fwd_events == after->fwd_events);  // This is used for statistics, they are never changed when rollbacking
+    //is_same &= (before->rev_events == after->rev_events);  // This is used for statistics, they are never changed when rollbacking
+    is_same &= (before->fin_chunks_ross_sample == after->fin_chunks_ross_sample);
+    is_same &= (before->data_size_ross_sample == after->data_size_ross_sample);
+    is_same &= (before->fin_hops_ross_sample == after->fin_hops_ross_sample);
+    is_same &= (before->fin_chunks_time_ross_sample == after->fin_chunks_time_ross_sample);
+    is_same &= (before->last_packet_sent_id == after->last_packet_sent_id);
+    is_same &= (before->last_in_queue_time == after->last_in_queue_time);
+
+    // Compare arrival_of_last_packet struct
+    is_same &= (before->arrival_of_last_packet.packet_ID == after->arrival_of_last_packet.packet_ID);
+    is_same &= (before->arrival_of_last_packet.travel_end_time == after->arrival_of_last_packet.travel_end_time);
+
+    // Compare arrays (assumes params is the same for both)
+    assert(before->params == after->params);
+    //if (before->params && after->params && before->params->num_rails == after->params->num_rails) {
+    //    for (int i = 0; i < before->params->num_rails; i++) {
+    //        is_same &= (before->router_lp[i] == after->router_lp[i]);
+    //        is_same &= (before->router_id[i] == after->router_id[i]);
+    //    }
+    //} else {
+    //    is_same = false;
+    //}
+
+    // Compare string buffers
+    is_same &= (strncmp(before->output_buf, after->output_buf, 4096) == 0);
+    is_same &= (strncmp(before->output_buf2, after->output_buf2, 4096) == 0);
+    is_same &= (strncmp(before->sample_buf, after->sample_buf, 4096) == 0);
+
+    // Compare anno strings (handling NULL case)
+    if (before->anno && after->anno) {
+        is_same &= (strcmp(before->anno, after->anno) == 0);
+    } else {
+        is_same &= (before->anno == after->anno);
+    }
+
+    // Compare pointers (just checking if they're both NULL or both non-NULL)
+    //is_same &= ((before->local_congestion_controller == NULL) == (after->local_congestion_controller == NULL));
+    //is_same &= ((before->vc_occupancy == NULL) == (after->vc_occupancy == NULL));
+    //is_same &= ((before->terminal_available_time == NULL) == (after->terminal_available_time == NULL));
+    //is_same &= ((before->terminal_msgs == NULL) == (after->terminal_msgs == NULL));
+    //is_same &= ((before->in_send_loop == NULL) == (after->in_send_loop == NULL));
+    //is_same &= ((before->qos_status == NULL) == (after->qos_status == NULL));
+    //is_same &= ((before->qos_data == NULL) == (after->qos_data == NULL));
+    //is_same &= ((before->last_qos_lvl == NULL) == (after->last_qos_lvl == NULL));
+    //is_same &= ((before->issueIdle == NULL) == (after->issueIdle == NULL));
+    //is_same &= ((before->terminal_length == NULL) == (after->terminal_length == NULL));
+    //is_same &= ((before->rank_tbl == NULL) == (after->rank_tbl == NULL));
+    //is_same &= ((before->last_buf_full == NULL) == (after->last_buf_full == NULL));
+    //is_same &= ((before->busy_time == NULL) == (after->busy_time == NULL));
+    //is_same &= ((before->link_traffic == NULL) == (after->link_traffic == NULL));
+    //is_same &= ((before->total_chunks == NULL) == (after->total_chunks == NULL));
+    //is_same &= ((before->stalled_chunks == NULL) == (after->stalled_chunks == NULL));
+    //is_same &= ((before->busy_time_sample == NULL) == (after->busy_time_sample == NULL));
+    //is_same &= ((before->sample_stat == NULL) == (after->sample_stat == NULL));
+    //is_same &= ((before->busy_time_ross_sample == NULL) == (after->busy_time_ross_sample == NULL));
+    //is_same &= ((before->predictor_data == NULL) == (after->predictor_data == NULL));
+    is_same &= ((before->frozen_state == NULL) && (after->frozen_state == NULL));
+
+    return is_same;
+}
+
 // Print fuction originally constructed with help from Claude.ai
 static void print_terminal_state(FILE * out, char const * prefix, terminal_state * state) {
     fprintf(out, "%sterminal_state (dragonfly-dally) ->\n", prefix);
@@ -6932,13 +7023,22 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     fprintf(out, "%s  |               packet_gen = %d\n", prefix, state->packet_gen);
     fprintf(out, "%s  |               packet_fin = %d\n", prefix, state->packet_fin);
     fprintf(out, "%s  |           total_gen_size = %d\n", prefix, state->total_gen_size);
-    fprintf(out, "%s  | *              router_lp = %p\n", prefix, state->router_lp);
-    fprintf(out, "%s  | *              router_id = %p\n", prefix, state->router_id);
+
+    fprintf(out, "%s  | *              router_lp[%d] = [", prefix, state->params->num_rails);
+    for (int i=0; i<state->params->num_rails; i++) {
+        fprintf(out, "%s%lu", i ? ", " : "", state->router_lp[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | *              router_id[%d] = [", prefix, state->params->num_rails);
+    for (int i=0; i<state->params->num_rails; i++) {
+        fprintf(out, "%s%u", i ? ", " : "", state->router_id[i]);
+    }
+    fprintf(out, "]\n");
+
     fprintf(out, "%s  |              terminal_id = %u\n", prefix, state->terminal_id);
     fprintf(out, "%s  |                  connMan = <DragonflyConnectionManager object>\n", prefix);
     fprintf(out, "%s  | *local_congestion_controller = %p\n", prefix, state->local_congestion_controller);
-    fprintf(out, "%s  |  workload_lpid_to_app_id = <map object>\n", prefix);
-    fprintf(out, "%s  |                  app_ids = <set object>\n", prefix);
     fprintf(out, "%s  |  workloads_finished_flag = %d\n", prefix, state->workloads_finished_flag);
     fprintf(out, "%s  | **          vc_occupancy = %p\n", prefix, state->vc_occupancy);
     fprintf(out, "%s  | *terminal_available_time = %p\n", prefix, state->terminal_available_time);
@@ -6958,9 +7058,9 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     fprintf(out, "%s  | *                 params = %p\n", prefix, state->params);
     fprintf(out, "%s  | *               rank_tbl = %p\n", prefix, state->rank_tbl);
     fprintf(out, "%s  |             rank_tbl_pop = %lu\n", prefix, state->rank_tbl_pop);
-    fprintf(out, "%s  |               total_time = %f\n", prefix, state->total_time);
+    fprintf(out, "%s  |               total_time = %g\n", prefix, state->total_time);
     fprintf(out, "%s  |           total_msg_size = %lu\n", prefix, state->total_msg_size);
-    fprintf(out, "%s  |               total_hops = %f\n", prefix, state->total_hops);
+    fprintf(out, "%s  |               total_hops = %g\n", prefix, state->total_hops);
     fprintf(out, "%s  |            finished_msgs = %ld\n", prefix, state->finished_msgs);
     fprintf(out, "%s  |          finished_chunks = %ld\n", prefix, state->finished_chunks);
     fprintf(out, "%s  |         finished_packets = %ld\n", prefix, state->finished_packets);
@@ -6971,17 +7071,17 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     fprintf(out, "%s  | *         stalled_chunks = %p\n", prefix, state->stalled_chunks);
     fprintf(out, "%s  |          injected_chunks = %lu\n", prefix, state->injected_chunks);
     fprintf(out, "%s  |           ejected_chunks = %lu\n", prefix, state->ejected_chunks);
-    fprintf(out, "%s  |              max_latency = %f\n", prefix, state->max_latency);
-    fprintf(out, "%s  |              min_latency = %f\n", prefix, state->min_latency);
+    fprintf(out, "%s  |              max_latency = %g\n", prefix, state->max_latency);
+    fprintf(out, "%s  |              min_latency = %g\n", prefix, state->min_latency);
     fprintf(out, "%s  |               output_buf = '%.4096s'\n", prefix, state->output_buf);
     fprintf(out, "%s  |              output_buf2 = '%.4096s'\n", prefix, state->output_buf2);
     fprintf(out, "%s  |        fin_chunks_sample = %ld\n", prefix, state->fin_chunks_sample);
     fprintf(out, "%s  |         data_size_sample = %ld\n", prefix, state->data_size_sample);
-    fprintf(out, "%s  |          fin_hops_sample = %f\n", prefix, state->fin_hops_sample);
-    fprintf(out, "%s  |          fin_chunks_time = %f\n", prefix, state->fin_chunks_time);
+    fprintf(out, "%s  |          fin_hops_sample = %g\n", prefix, state->fin_hops_sample);
+    fprintf(out, "%s  |          fin_chunks_time = %g\n", prefix, state->fin_chunks_time);
     fprintf(out, "%s  | *       busy_time_sample = %p\n", prefix, state->busy_time_sample);
     fprintf(out, "%s  |               sample_buf = '%.4096s'\n", prefix, state->sample_buf);
-    fprintf(out, "%s  | *            sample_stat = %p\n", prefix, state->sample_stat);
+    fprintf(out, "%s  | *            sample_stat = %p\n", prefix, state->sample_stat);  // ingnoring as this part of the code is never used. Originally part of instrumentation
     fprintf(out, "%s  |              op_arr_size = %d\n", prefix, state->op_arr_size);
     fprintf(out, "%s  |             max_arr_size = %d\n", prefix, state->max_arr_size);
     fprintf(out, "%s  |               fwd_events = %ld\n", prefix, state->fwd_events);
@@ -6989,14 +7089,14 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     fprintf(out, "%s  |   fin_chunks_ross_sample = %ld\n", prefix, state->fin_chunks_ross_sample);
     fprintf(out, "%s  |    data_size_ross_sample = %ld\n", prefix, state->data_size_ross_sample);
     fprintf(out, "%s  |     fin_hops_ross_sample = %ld\n", prefix, state->fin_hops_ross_sample);
-    fprintf(out, "%s  | fin_chunks_time_ross_sample = %f\n", prefix, state->fin_chunks_time_ross_sample);
-    fprintf(out, "%s  | *  busy_time_ross_sample = %p\n", prefix, state->busy_time_ross_sample);
-    fprintf(out, "%s  |              ross_sample = <dfly_cn_sample object>\n", prefix);
+    fprintf(out, "%s  | fin_chunks_time_ross_sample = %g\n", prefix, state->fin_chunks_time_ross_sample);
+    fprintf(out, "%s  | *  busy_time_ross_sample = %p\n", prefix, state->busy_time_ross_sample);  // ingnoring as this part of the code is never used. Originally part of instrumentation
+    fprintf(out, "%s  |              ross_sample = <dfly_cn_sample object>\n", prefix);  // ingnoring as this part of the code is never used. Originally part of instrumentation
     fprintf(out, "%s  |             sent_packets = <map object>\n", prefix);
-    fprintf(out, "%s  |      last_packet_sent_id = %lu\n", prefix, state->last_packet_sent_id);
-    fprintf(out, "%s  |   arrival_of_last_packet = {packet_ID: %lu, travel_end_time: %f}\n", prefix, state->arrival_of_last_packet.packet_ID, state->arrival_of_last_packet.travel_end_time);
+    fprintf(out, "%s  |      last_packet_sent_id = %ld\n", prefix, state->last_packet_sent_id);
+    fprintf(out, "%s  |   arrival_of_last_packet = {packet_ID: %ld, travel_end_time: %g}\n", prefix, state->arrival_of_last_packet.packet_ID, state->arrival_of_last_packet.travel_end_time);
     fprintf(out, "%s  |     remaining_sz_packets = <map object>\n", prefix);
-    fprintf(out, "%s  |       last_in_queue_time = %f\n", prefix, state->last_in_queue_time);
+    fprintf(out, "%s  |       last_in_queue_time = %g\n", prefix, state->last_in_queue_time);
     fprintf(out, "%s  | *         predictor_data = %p\n", prefix, state->predictor_data);
     fprintf(out, "%s  |                  zombies = <set object>\n", prefix);
     fprintf(out, "%s  | *           frozen_state = %p\n", prefix, state->frozen_state);
@@ -7006,12 +7106,12 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
 void print_terminal_dally_message(FILE * out, char const * prefix, struct terminal_dally_message * msg) {
     fprintf(out, "%sterminal_dally_message ->\n", prefix);
     fprintf(out, "%s  |                      magic = %d\n", prefix, msg->magic);
-    fprintf(out, "%s  |          travel_start_time = %f\n", prefix, msg->travel_start_time);
-    fprintf(out, "%s  |            travel_end_time = %f\n", prefix, msg->travel_end_time);
+    fprintf(out, "%s  |          travel_start_time = %g\n", prefix, msg->travel_start_time);
+    fprintf(out, "%s  |            travel_end_time = %g\n", prefix, msg->travel_end_time);
     fprintf(out, "%s  |                  packet_ID = %llu\n", prefix, msg->packet_ID);
     fprintf(out, "%s  |                       type = %d\n", prefix, msg->type);
     fprintf(out, "%s  |                notify_type = %d\n", prefix, msg->notify_type);
-    fprintf(out, "%s  |                   category = %s\n", prefix, msg->category);
+    fprintf(out, "%s  |                   category = %.16s\n", prefix, msg->category);
     fprintf(out, "%s  |             final_dest_gid = %lu\n", prefix, msg->final_dest_gid);
     fprintf(out, "%s  |                  sender_lp = %lu\n", prefix, msg->sender_lp);
     fprintf(out, "%s  |               sender_mn_lp = %lu\n", prefix, msg->sender_mn_lp);
@@ -7028,8 +7128,8 @@ void print_terminal_dally_message(FILE * out, char const * prefix, struct termin
     fprintf(out, "%s  |              saved_channel = %d\n", prefix, msg->saved_channel);
     fprintf(out, "%s  |                   saved_vc = %d\n", prefix, msg->saved_vc);
     fprintf(out, "%s  |                  next_stop = %d\n", prefix, msg->next_stop);
-    fprintf(out, "%s  |        this_router_arrival = %f\n", prefix, msg->this_router_arrival);
-    fprintf(out, "%s  |    this_router_ptp_latency = %f\n", prefix, msg->this_router_ptp_latency);
+    fprintf(out, "%s  |        this_router_arrival = %g\n", prefix, msg->this_router_arrival);
+    fprintf(out, "%s  |    this_router_ptp_latency = %g\n", prefix, msg->this_router_ptp_latency);
     fprintf(out, "%s  |                 intm_lp_id = %u\n", prefix, msg->intm_lp_id);
     fprintf(out, "%s  |                   last_hop = %d\n", prefix, msg->last_hop);
     fprintf(out, "%s  |            is_intm_visited = %d\n", prefix, msg->is_intm_visited);
@@ -7046,12 +7146,12 @@ void print_terminal_dally_message(FILE * out, char const * prefix, struct termin
     fprintf(out, "%s  |                   vc_index = %d\n", prefix, msg->vc_index);
     fprintf(out, "%s  |                    rail_id = %d\n", prefix, msg->rail_id);
     fprintf(out, "%s  |                output_chan = %d\n", prefix, msg->output_chan);
-    fprintf(out, "%s  |                   event_rc = <model_net_event_return object>\n", prefix);
+    fprintf(out, "%s  |                   event_rc = %d\n", prefix, msg->event_rc);
     fprintf(out, "%s  |                    is_pull = %d\n", prefix, msg->is_pull);
     fprintf(out, "%s  |                  pull_size = %u\n", prefix, msg->pull_size);
     fprintf(out, "%s  |                  path_type = %d\n", prefix, msg->path_type);
     fprintf(out, "%s  |               saved_app_id = %d\n", prefix, msg->saved_app_id);
-    fprintf(out, "%s  | is_there_another_pckt_in_queue = %s\n", prefix, msg->is_there_another_pckt_in_queue ? "true" : "false");
+    fprintf(out, "%s  | is_there_another_pckt_in_queue = %d\n", prefix, msg->is_there_another_pckt_in_queue);
     fprintf(out, "%s  |                   num_rngs = %d\n", prefix, msg->num_rngs);
     fprintf(out, "%s  |                    num_cll = %d\n", prefix, msg->num_cll);
     fprintf(out, "%s  |             last_saved_qos = %d\n", prefix, msg->last_saved_qos);
@@ -7061,22 +7161,22 @@ void print_terminal_dally_message(FILE * out, char const * prefix, struct termin
     fprintf(out, "%s  | *              rc_qos_data = %p\n", prefix, msg->rc_qos_data);
     fprintf(out, "%s  | *            rc_qos_status = %p\n", prefix, msg->rc_qos_status);
     fprintf(out, "%s  |            saved_send_loop = %d\n", prefix, msg->saved_send_loop);
-    fprintf(out, "%s  |       saved_available_time = %f\n", prefix, msg->saved_available_time);
-    fprintf(out, "%s  |              saved_min_lat = %f\n", prefix, msg->saved_min_lat);
-    fprintf(out, "%s  |             saved_avg_time = %f\n", prefix, msg->saved_avg_time);
-    fprintf(out, "%s  |             saved_rcv_time = %f\n", prefix, msg->saved_rcv_time);
-    fprintf(out, "%s  |            saved_busy_time = %f\n", prefix, msg->saved_busy_time);
-    fprintf(out, "%s  |           saved_total_time = %f\n", prefix, msg->saved_total_time);
-    fprintf(out, "%s  |          saved_sample_time = %f\n", prefix, msg->saved_sample_time);
-    fprintf(out, "%s  |             msg_start_time = %f\n", prefix, msg->msg_start_time);
-    fprintf(out, "%s  |       saved_busy_time_ross = %f\n", prefix, msg->saved_busy_time_ross);
-    fprintf(out, "%s  |      saved_fin_chunks_ross = %f\n", prefix, msg->saved_fin_chunks_ross);
-    fprintf(out, "%s  |   saved_last_in_queue_time = %f\n", prefix, msg->saved_last_in_queue_time);
-    fprintf(out, "%s  |    saved_next_packet_delay = %f\n", prefix, msg->saved_next_packet_delay);
-    fprintf(out, "%s  |           msg_new_mn_event = %f\n", prefix, msg->msg_new_mn_event);
-    fprintf(out, "%s  |         last_received_time = %f\n", prefix, msg->last_received_time);
-    fprintf(out, "%s  |             last_sent_time = %f\n", prefix, msg->last_sent_time);
-    fprintf(out, "%s  |        last_bufupdate_time = %f\n", prefix, msg->last_bufupdate_time);
+    fprintf(out, "%s  |       saved_available_time = %g\n", prefix, msg->saved_available_time);
+    fprintf(out, "%s  |              saved_min_lat = %g\n", prefix, msg->saved_min_lat);
+    fprintf(out, "%s  |             saved_avg_time = %g\n", prefix, msg->saved_avg_time);
+    fprintf(out, "%s  |             saved_rcv_time = %g\n", prefix, msg->saved_rcv_time);
+    fprintf(out, "%s  |            saved_busy_time = %g\n", prefix, msg->saved_busy_time);
+    fprintf(out, "%s  |           saved_total_time = %g\n", prefix, msg->saved_total_time);
+    fprintf(out, "%s  |          saved_sample_time = %g\n", prefix, msg->saved_sample_time);
+    fprintf(out, "%s  |             msg_start_time = %g\n", prefix, msg->msg_start_time);
+    fprintf(out, "%s  |       saved_busy_time_ross = %g\n", prefix, msg->saved_busy_time_ross);
+    fprintf(out, "%s  |      saved_fin_chunks_ross = %g\n", prefix, msg->saved_fin_chunks_ross);
+    fprintf(out, "%s  |   saved_last_in_queue_time = %g\n", prefix, msg->saved_last_in_queue_time);
+    fprintf(out, "%s  |    saved_next_packet_delay = %g\n", prefix, msg->saved_next_packet_delay);
+    fprintf(out, "%s  |           msg_new_mn_event = %g\n", prefix, msg->msg_new_mn_event);
+    fprintf(out, "%s  |         last_received_time = %g\n", prefix, msg->last_received_time);
+    fprintf(out, "%s  |             last_sent_time = %g\n", prefix, msg->last_sent_time);
+    fprintf(out, "%s  |        last_bufupdate_time = %g\n", prefix, msg->last_bufupdate_time);
 }
 //*** ---------- END OF reverse handler checking functions ---------- ***
 
@@ -7110,17 +7210,17 @@ tw_lptype dragonfly_dally_lps[] =
 crv_checkpointer dragonfly_dally_checkpointers[] = {
     {
         &dragonfly_dally_lps[0],
-        0,
-        (save_checkpoint_state_f) NULL,
-        (clean_checkpoint_state_f) NULL,
-        (check_states_f) NULL,
+        sizeof(terminal_state),
+        (save_checkpoint_state_f) save_terminal_state,
+        (clean_checkpoint_state_f) clean_terminal_state,
+        (check_states_f) check_terminal_state,
         (print_lpstate_f) print_terminal_state,
-        (print_checkpoint_state_f) NULL,
+        (print_checkpoint_state_f) print_terminal_state,
         (print_event_f) print_terminal_dally_message,
     },
     {
         &dragonfly_dally_lps[1],
-        0,
+        sizeof(router_state),
         (save_checkpoint_state_f) NULL,
         (clean_checkpoint_state_f) NULL,
         (check_states_f) NULL,

From 7aa4c1141677ecbc420759c43e1c677b750327ac Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 7 Mar 2025 18:02:02 -0500
Subject: [PATCH 113/188] Printing sub_message contents of model-net message

---
 codes/net/dragonfly-dally.h                  |  2 +-
 src/network-workloads/model-net-mpi-replay.c |  2 +-
 src/networks/model-net/core/model-net-lp.c   | 32 ++++++++++++++++----
 src/networks/model-net/dragonfly-dally.C     | 26 ++++++++++++++--
 4 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index dc73d145..2647c4df 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -136,7 +136,7 @@ struct terminal_dally_message
    tw_stime last_bufupdate_time;
 };
 
-void print_terminal_dally_message(FILE * out, char const * prefix, struct terminal_dally_message * msg);
+void print_terminal_dally_message(FILE * out, char const * prefix, void * s, struct terminal_dally_message * msg);
 
 #ifdef __cplusplus
 }
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 653f6f31..55649581 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -3795,7 +3795,7 @@ static char const * const MPI_NW_EVENTS_to_string(enum MPI_NW_EVENTS event_type)
 }
 
 // Original printing function from Claude.ai
-static void print_nw_message(FILE * out, char const * prefix, struct nw_message * msg) {
+static void print_nw_message(FILE * out, char const * prefix, nw_state* s, struct nw_message * msg) {
     fprintf(out, "%snw_message ->\n", prefix);
     fprintf(out, "%s | msg_type = %s\n", prefix, MPI_NW_EVENTS_to_string(msg->msg_type));
     fprintf(out, "%s |  op_type = %s\n", prefix, op_type_string(msg->op_type));
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 92d3dfe1..6a2cf4f8 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -141,7 +141,7 @@ static void clean_state_net_state(model_net_base_state * state);
 static bool check_model_net_state(model_net_base_state * before, model_net_base_state * after);
 static void print_model_net_state(FILE * out, char const * prefix, model_net_base_state * state);
 static void print_model_net_checkpoint(FILE * out, char const * prefix, model_net_base_state * state);
-static void print_event_state(FILE * out, char const * prefix, model_net_wrap_msg * state);
+static void print_event_state(FILE * out, char const * prefix, model_net_base_state * s, model_net_wrap_msg * msg);
 
 // ROSS function pointer table to check reverse event handler
 crv_checkpointer model_net_chkptr = {
@@ -1353,7 +1353,7 @@ void print_model_net_request(FILE * out, char const * prefix, model_net_request
     fprintf(out, "%sapp_id = %d\n", prefix, req->app_id);
 }
 
-static void print_event_state(FILE * out, char const * prefix, model_net_wrap_msg * msg) {
+static void print_event_state(FILE * out, char const * prefix, model_net_base_state * state, model_net_wrap_msg * msg) {
     fprintf(out, "%sh\n", prefix);
     fprintf(out, "%s| src = %lu\n", prefix, msg->h.src);
     fprintf(out, "%s| event_type = %d (%s)\n", prefix, msg->h.event_type, event_type_string(msg->h.event_type));
@@ -1369,6 +1369,8 @@ static void print_event_state(FILE * out, char const * prefix, model_net_wrap_ms
     char subprefix_2[len_subprefix];
     snprintf(subprefix_2, len_subprefix, "%s%s", prefix, addprefix_2);
 
+    crv_checkpointer * chptr;
+    void * sub_msg;
     switch (msg->h.event_type) {
         case MN_BASE_NEW_MSG:
         case MN_BASE_SCHED_NEXT:
@@ -1388,11 +1390,29 @@ static void print_event_state(FILE * out, char const * prefix, model_net_wrap_ms
             fprintf(out, "%s     |  | prio = %d\n", prefix, msg->msg.m_base.rc.prio);
             fprintf(out, "%s     | created_in_surrogate = %d\n", prefix, msg->msg.m_base.created_in_surrogate);
             break;
-        default:
-            fprintf(out, "%sThe content of this message cannot be deciphered yet with the information given\n", prefix);
+
+        case MN_BASE_SAMPLE:
+        case MN_BASE_PASS:
+        case MN_BASE_END_NOTIF:
+            // printing sub_msg
+            fprintf(out, "%ssub_msg ->\n", prefix);
+            chptr = method_array[state->net_id]->checkpointer;
+            sub_msg = ((char*)msg)+msg_offsets[state->net_id];
+            if (chptr && chptr->print_event) {
+                char addprefix[] = "    | ";
+                int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1;
+                char subprefix[len_subprefix];
+                snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix);
+                chptr->print_event(out, subprefix, state->sub_state, sub_msg);
+            } else {
+                fprintf(out, "%s    | == cannot print the submessage (event print function not yet defined for network of type %s) ==\n", prefix, model_net_method_names[state->net_id]);
+            }
+            break;
+
+        case MN_CONGESTION_EVENT:
+            // Nothing to print
+            break;
     }
-    // TODO: print internal state of message
-    // void * sub_msg = ((char*)msg) + msg_offsets[state->net_id];
 }
 
 /* END checking reverse handler functionality */
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index b986ab32..6674abb3 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -7102,14 +7102,36 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     fprintf(out, "%s  | *           frozen_state = %p\n", prefix, state->frozen_state);
 }
 
+char const * const string_event_t(enum event_t type) {
+    switch (type) {
+        case T_GENERATE:         return "T_GENERATE";
+        case T_ARRIVE:           return "T_ARRIVE";
+        case T_SEND:             return "T_SEND";
+        case T_BUFFER:           return "T_BUFFER";
+        case R_SEND:             return "R_SEND";
+        case R_ARRIVE:           return "R_ARRIVE";
+        case R_BUFFER:           return "R_BUFFER";
+        case R_BANDWIDTH:        return "R_BANDWIDTH";
+        case R_BW_HALT:          return "R_BW_HALT";
+        case T_BANDWIDTH:        return "T_BANDWIDTH";
+        case R_SNAPSHOT:         return "R_SNAPSHOT";
+        case T_NOTIFY:           return "T_NOTIFY";
+        case T_ARRIVE_PREDICTED: return "T_ARRIVE_PREDICTED";
+        case T_VACUOUS_EVENT:    return "T_VACUOUS_EVENT";
+        default:                 return "UNKNOWN TYPE!!";
+    }
+}
+
 // Print fuction originally constructed with help from Claude.ai
-void print_terminal_dally_message(FILE * out, char const * prefix, struct terminal_dally_message * msg) {
+void print_terminal_dally_message(FILE * out, char const * prefix, void * s, struct terminal_dally_message * msg) {
+    //terminal_state * ns = (terminal_state *) s;
+
     fprintf(out, "%sterminal_dally_message ->\n", prefix);
     fprintf(out, "%s  |                      magic = %d\n", prefix, msg->magic);
     fprintf(out, "%s  |          travel_start_time = %g\n", prefix, msg->travel_start_time);
     fprintf(out, "%s  |            travel_end_time = %g\n", prefix, msg->travel_end_time);
     fprintf(out, "%s  |                  packet_ID = %llu\n", prefix, msg->packet_ID);
-    fprintf(out, "%s  |                       type = %d\n", prefix, msg->type);
+    fprintf(out, "%s  |                       type = %d (%s)\n", prefix, msg->type, string_event_t((enum event_t) msg->type));
     fprintf(out, "%s  |                notify_type = %d\n", prefix, msg->notify_type);
     fprintf(out, "%s  |                   category = %.16s\n", prefix, msg->category);
     fprintf(out, "%s  |             final_dest_gid = %lu\n", prefix, msg->final_dest_gid);

From f3818d0ce93261e73a487449a494731c530942d3 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 10 Mar 2025 11:35:02 -0400
Subject: [PATCH 114/188] Implementing (an almost complete) deep-copy of
 terminal_state

---
 codes/congestion-controller-model.h        |   5 +
 codes/model-net.h                          |   5 +-
 src/networks/model-net/core/model-net-lp.c |  34 +-
 src/networks/model-net/dragonfly-dally.C   | 357 +++++++++++++++++----
 src/util/congestion-controller.C           |  53 +++
 5 files changed, 394 insertions(+), 60 deletions(-)

diff --git a/codes/congestion-controller-model.h b/codes/congestion-controller-model.h
index e8b673b5..ff5f6f8f 100644
--- a/codes/congestion-controller-model.h
+++ b/codes/congestion-controller-model.h
@@ -156,6 +156,11 @@ typedef struct tlc_state
     double current_injection_bandwidth_coef;
 } tlc_state;
 
+void save_tlc_state(tlc_state * into, tlc_state const * from);
+void clean_tlc_state(tlc_state * into);
+bool check_tlc_state(tlc_state * before, tlc_state * after);
+void print_tlc_state(FILE * out, char const * prefix, tlc_state * state);
+
 congestion_control_message* cc_msg_rc_storage_create();
 void cc_msg_rc_storage_delete(void * ptr);
 
diff --git a/codes/model-net.h b/codes/model-net.h
index f003cc10..abb630ce 100644
--- a/codes/model-net.h
+++ b/codes/model-net.h
@@ -164,7 +164,10 @@ struct mn_stats
 };
 
 bool check_model_net_request(model_net_request const * before, model_net_request const * after);
-void print_model_net_request(FILE * out, char const * before, model_net_request * item);
+void print_model_net_request(FILE * out, char const * prefix, model_net_request * item);
+
+bool check_mn_stats(struct mn_stats const * before, struct mn_stats const * after);
+void print_mn_stats(FILE * out, char const * prefix, struct mn_stats * item);
 
 /* Registers all model-net LPs in ROSS. Should be called after
  * configuration_load, but before codes_mapping_setup */
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 6a2cf4f8..536c44a1 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -1210,6 +1210,8 @@ static void clean_state_net_state(model_net_base_state * state) {
     free(state->node_copy_next_available_time);
 }
 
+static bool warned_no_lp_checking_defined[MAX_NETS];
+
 static bool check_model_net_state(model_net_base_state * before, model_net_base_state * after) {
     bool is_same = true;
     is_same &= before->net_id == after->net_id;
@@ -1226,8 +1228,10 @@ static bool check_model_net_state(model_net_base_state * before, model_net_base_
     crv_checkpointer * chptr = method_array[before->net_id]->checkpointer;
     if (chptr && before->sub_state != NULL && chptr->check_lps) {
         is_same &= chptr->check_lps(before->sub_state, after->sub_state);
-    } else {
-        tw_error(TW_LOC, "Network of type \"%s\" has not been configured to be checkpointed", model_net_method_names[before->net_id]);
+    // Warning once that checking for LP subtype has not been fully implemented
+    } else if (!warned_no_lp_checking_defined[before->net_id]) {
+        fprintf(stderr, "Warning: Network of type \"%s\" has not been fully configured to be checkpointed (Running this model under SEQUENTIAL_ROLLBACK_CHECK won't capture any issues that arise from the reverse event handlers).\n", model_net_method_names[before->net_id]);
+        warned_no_lp_checking_defined[before->net_id] = true;
     }
     is_same &= before->next_available_time == after->next_available_time;
     for (int i=0; i < before->params->node_copy_queues; i++) {
@@ -1353,6 +1357,32 @@ void print_model_net_request(FILE * out, char const * prefix, model_net_request
     fprintf(out, "%sapp_id = %d\n", prefix, req->app_id);
 }
 
+bool check_mn_stats(struct mn_stats const * before, struct mn_stats const * after) {
+    bool is_same = true;
+
+    is_same &= (strncmp(before->category, after->category, CATEGORY_NAME_MAX) == 0);
+    is_same &= (before->send_count == after->send_count);
+    is_same &= (before->send_bytes == after->send_bytes);
+    is_same &= (before->send_time == after->send_time);
+    is_same &= (before->recv_count == after->recv_count);
+    is_same &= (before->recv_bytes == after->recv_bytes);
+    is_same &= (before->recv_time == after->recv_time);
+    is_same &= (before->max_event_size == after->max_event_size);
+
+    return is_same;
+}
+
+void print_mn_stats(FILE * out, char const * prefix, struct mn_stats * req) {
+    fprintf(out, "%scategory = '%s'\n", prefix, req->category);
+    fprintf(out, "%ssend_count = %ld\n", prefix, req->send_count);
+    fprintf(out, "%ssend_bytes = %ld\n", prefix, req->send_bytes);
+    fprintf(out, "%ssend_time = %g\n", prefix, req->send_time);
+    fprintf(out, "%srecv_count = %ld\n", prefix, req->recv_count);
+    fprintf(out, "%srecv_bytes = %ld\n", prefix, req->recv_bytes);
+    fprintf(out, "%srecv_time = %g\n", prefix, req->recv_time);
+    fprintf(out, "%smax_event_size = %ld\n", prefix, req->max_event_size);
+}
+
 static void print_event_state(FILE * out, char const * prefix, model_net_base_state * state, model_net_wrap_msg * msg) {
     fprintf(out, "%sh\n", prefix);
     fprintf(out, "%s| src = %lu\n", prefix, msg->h.src);
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 6674abb3..409f063a 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -496,6 +496,9 @@ struct packet_id {
 bool operator<(struct packet_id const &lk, struct packet_id const &rk) {
     return lk.packet_ID == rk.packet_ID ? lk.dfdally_src_terminal_id < rk.dfdally_src_terminal_id : lk.packet_ID < rk.packet_ID;
 }
+bool operator==(struct packet_id const &lk, struct packet_id const &rk) {
+    return lk.packet_ID == rk.packet_ID && lk.dfdally_src_terminal_id < rk.dfdally_src_terminal_id;
+}
 // Some more function declarations
 static void notify_dest_lp_of(terminal_state * s, tw_lp * lp, terminal_dally_message * msg, enum notify_t notification);
 
@@ -3558,6 +3561,7 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp )
             fprintf(dragonfly_term_bw_log, "\n term-id time-stamp port-id busy-time");
         }*/
 
+    s->local_congestion_controller = NULL;
     if (g_congestion_control_enabled) {
         s->local_congestion_controller = (tlc_state*)calloc(1,sizeof(tlc_state));
         cc_terminal_local_controller_init(s->local_congestion_controller, lp, s->terminal_id, &s->workloads_finished_flag);
@@ -6919,17 +6923,132 @@ static void router_dally_rc_event_handler(router_state * s, tw_bf * bf,
 }
 
 //*** ---------- START OF reverse handler checking functions ---------- ***
+bool warn_incomplete_definition_terminal_state_check = false;
+
 static void save_terminal_state(terminal_state *into, terminal_state const *from) {
+    if (!warn_incomplete_definition_terminal_state_check) {
+        fprintf(stderr, "Warning: Deep-cloning and comparing has not been fully implemented for the (sub)LP type: `terminal_state` (Running this model under SEQUENTIAL_ROLLBACK_CHECK might not capture issues that arise from its reverse event handler).\n");
+        warn_incomplete_definition_terminal_state_check = true;
+    }
+
+    // These should be deep-cloned/compared/printed if we want to run the functionality they are activated at
+    // from->predictor_data
+    // from->sample_stat
+    // from->ross_sample
+    // from->busy_time_ross_sample
+
     memcpy(into, from, sizeof(terminal_state));
-}
 
+    dragonfly_param const * p = into->params;
+    int const num_qos_levels = p->num_qos_levels;
+    int const num_rails = p->num_rails;
+
+    into->vc_occupancy = (int **) malloc(num_rails * sizeof(int*));
+    into->terminal_length = (int**) malloc(num_rails * sizeof(int*));
+    into->last_buf_full = (tw_stime*) malloc(num_rails * sizeof(tw_stime));
+    into->in_send_loop = (int*) malloc(num_rails * sizeof(int));
+    into->issueIdle = (int*) malloc(num_rails * sizeof(int));
+    into->qos_status = (int**) malloc(num_rails * sizeof(int*));
+    into->qos_data = (int**) malloc(num_rails * sizeof(int*));
+    into->last_qos_lvl = (int*) malloc(num_rails * sizeof(int));
+    into->terminal_available_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime));
+    into->stalled_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t));
+    into->total_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t));
+    into->busy_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime));
+    //into->terminal_msgs = (terminal_dally_message_list***) malloc(num_rails * sizeof(terminal_dally_message_list**));
+
+    for(int i = 0; i < num_rails; i++) {
+        into->vc_occupancy[i] = (int*) malloc(num_qos_levels * sizeof(int));
+        into->terminal_length[i] = (int*) malloc(num_qos_levels * sizeof(int));
+        into->qos_status[i] = (int*) malloc(num_qos_levels * sizeof(int));
+        into->qos_data[i] = (int*) malloc(num_qos_levels * sizeof(int));
+        for (int j = 0; j<num_qos_levels; j++) {
+            into->vc_occupancy[i][j] = from->vc_occupancy[i][j];
+            into->terminal_length[i][j] = from->terminal_length[i][j];
+            into->qos_data[i][j] = from->qos_data[i][j];
+            into->qos_status[i][j] = from->qos_status[i][j];
+        }
+        into->last_buf_full[i] = from->last_buf_full[i];
+        into->in_send_loop[i] = from->in_send_loop[i];
+        into->issueIdle[i] = from->issueIdle[i];
+        into->last_qos_lvl[i] = from->last_qos_lvl[i];
+        into->terminal_available_time[i] = from->terminal_available_time[i];
+        into->stalled_chunks[i] = from->stalled_chunks[i];
+        into->total_chunks[i] = from->total_chunks[i];
+        into->busy_time[i] = from->busy_time[i];
+    }
+
+    into->link_traffic = (uint64_t*) malloc(p->radix * sizeof(uint64_t));
+    for (int i = 0; i < p->radix; i++) {
+        into->link_traffic[i] = from->link_traffic[i];
+    }
+
+    if (from->local_congestion_controller != NULL) {
+        assert(g_congestion_control_enabled);
+        into->local_congestion_controller = (tlc_state*) malloc(sizeof(tlc_state));
+        save_tlc_state(into->local_congestion_controller, from->local_congestion_controller);
+    }
+
+    // Magic deep-copy using C++ mechanisms (the values do not point to any pointers)
+    into->remaining_sz_packets = from->remaining_sz_packets;
+    into->zombies = from->zombies;
+}
+
+// Partially written by Claude
 static void clean_terminal_state(terminal_state *state) {
+    dragonfly_param const * p = state->params;
+    int const num_rails = p->num_rails;
+
+    // Free all allocated memory
+    for (int i = 0; i < num_rails; i++) {
+        free(state->vc_occupancy[i]);
+        free(state->terminal_length[i]);
+        free(state->qos_status[i]);
+        free(state->qos_data[i]);
+    }
+
+    free(state->vc_occupancy);
+    free(state->terminal_length);
+    free(state->last_buf_full);
+    free(state->in_send_loop);
+    free(state->issueIdle);
+    free(state->qos_status);
+    free(state->qos_data);
+    free(state->last_qos_lvl);
+    free(state->terminal_available_time);
+    free(state->stalled_chunks);
+    free(state->total_chunks);
+    free(state->busy_time);
+    free(state->link_traffic);
+
+    if (state->local_congestion_controller != NULL) {
+        clean_tlc_state(state->local_congestion_controller);
+        free(state->local_congestion_controller);
+    }
+
+    // Finish cleaning (free memory), and check and print!!
+    state->remaining_sz_packets.~map();
+    state->zombies.~set();
 }
 
 static bool check_terminal_state(terminal_state *before, terminal_state *after) {
     bool is_same = true;
 
-    // Compare scalar values
+    // There is no need to deep-copy the following. They're never modified
+    assert(before->params == after->params);
+    assert(before->router_lp == after->router_lp);
+    assert(before->router_id == after->router_id);
+
+    // We ignore the comparison of the following. They are not meant to be rolled-back
+    // before->fwd_events
+    // before->rev_events
+    // before->sent_packets
+    // before->last_packet_sent_id
+    // before->arrival_of_last_packet
+    // before->anno
+    assert(before->frozen_state == after->frozen_state);
+
+    // Comparing all other elements of the struct
     is_same &= (before->packet_counter == after->packet_counter);
     is_same &= (before->packet_gen == after->packet_gen);
     is_same &= (before->packet_fin == after->packet_fin);
@@ -6954,30 +7073,12 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after)
     is_same &= (before->fin_chunks_time == after->fin_chunks_time);
     is_same &= (before->op_arr_size == after->op_arr_size);
     is_same &= (before->max_arr_size == after->max_arr_size);
-    //is_same &= (before->fwd_events == after->fwd_events);  // This is used for statistics, they are never changed when rollbacking
-    //is_same &= (before->rev_events == after->rev_events);  // This is used for statistics, they are never changed when rollbacking
     is_same &= (before->fin_chunks_ross_sample == after->fin_chunks_ross_sample);
     is_same &= (before->data_size_ross_sample == after->data_size_ross_sample);
     is_same &= (before->fin_hops_ross_sample == after->fin_hops_ross_sample);
     is_same &= (before->fin_chunks_time_ross_sample == after->fin_chunks_time_ross_sample);
-    is_same &= (before->last_packet_sent_id == after->last_packet_sent_id);
     is_same &= (before->last_in_queue_time == after->last_in_queue_time);
 
-    // Compare arrival_of_last_packet struct
-    is_same &= (before->arrival_of_last_packet.packet_ID == after->arrival_of_last_packet.packet_ID);
-    is_same &= (before->arrival_of_last_packet.travel_end_time == after->arrival_of_last_packet.travel_end_time);
-
-    // Compare arrays (assumes params is the same for both)
-    assert(before->params == after->params);
-    //if (before->params && after->params && before->params->num_rails == after->params->num_rails) {
-    //    for (int i = 0; i < before->params->num_rails; i++) {
-    //        is_same &= (before->router_lp[i] == after->router_lp[i]);
-    //        is_same &= (before->router_id[i] == after->router_id[i]);
-    //    }
-    //} else {
-    //    is_same = false;
-    //}
-
     // Compare string buffers
     is_same &= (strncmp(before->output_buf, after->output_buf, 4096) == 0);
     is_same &= (strncmp(before->output_buf2, after->output_buf2, 4096) == 0);
@@ -6990,28 +7091,47 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after)
         is_same &= (before->anno == after->anno);
     }
 
+    dragonfly_param const * p = before->params;
+    int const num_qos_levels = p->num_qos_levels;
+    int const num_rails = p->num_rails;
+
+    for (int i = 0; i < num_rails; i++) {
+        for (int j = 0; j < num_qos_levels; j++) {
+            is_same &= (before->vc_occupancy[i][j] == after->vc_occupancy[i][j]);
+            is_same &= (before->terminal_length[i][j] == after->terminal_length[i][j]);
+            is_same &= (before->qos_status[i][j] == after->qos_status[i][j]);
+            is_same &= (before->qos_data[i][j] == after->qos_data[i][j]);
+        }
+
+        is_same &= (before->last_buf_full[i] == after->last_buf_full[i]);
+        is_same &= (before->in_send_loop[i] == after->in_send_loop[i]);
+        is_same &= (before->issueIdle[i] == after->issueIdle[i]);
+        is_same &= (before->last_qos_lvl[i] == after->last_qos_lvl[i]);
+        is_same &= (before->terminal_available_time[i] == after->terminal_available_time[i]);
+        is_same &= (before->stalled_chunks[i] == after->stalled_chunks[i]);
+        is_same &= (before->total_chunks[i] == after->total_chunks[i]);
+        is_same &= (before->busy_time[i] == after->busy_time[i]);
+    }
+
+    for (int i = 0; i < p->radix; i++) {
+        is_same &= (before->link_traffic[i] == after->link_traffic[i]);
+    }
+
+    // Ignoring model statistics. In general, we don't care if there are errors in the statistics, as they are only approximate. The stastistics don't interferee with the state of the model. There is a bug within the statistics when rolbacking though. A parameters is never reversed properly
+    //for (size_t i = 0; i < CATEGORY_MAX; i++) {
+    //    is_same &= check_mn_stats(&before->dragonfly_stats_array[i], &after->dragonfly_stats_array[i]);
+    //}
+
+    if (after->local_congestion_controller != NULL) {
+        is_same &= check_tlc_state(before->local_congestion_controller, after->local_congestion_controller);
+    }
+
+    is_same &= before->remaining_sz_packets == after->remaining_sz_packets;
+    is_same &= before->zombies == after->zombies;
+
     // Compare pointers (just checking if they're both NULL or both non-NULL)
-    //is_same &= ((before->local_congestion_controller == NULL) == (after->local_congestion_controller == NULL));
-    //is_same &= ((before->vc_occupancy == NULL) == (after->vc_occupancy == NULL));
-    //is_same &= ((before->terminal_available_time == NULL) == (after->terminal_available_time == NULL));
     //is_same &= ((before->terminal_msgs == NULL) == (after->terminal_msgs == NULL));
-    //is_same &= ((before->in_send_loop == NULL) == (after->in_send_loop == NULL));
-    //is_same &= ((before->qos_status == NULL) == (after->qos_status == NULL));
-    //is_same &= ((before->qos_data == NULL) == (after->qos_data == NULL));
-    //is_same &= ((before->last_qos_lvl == NULL) == (after->last_qos_lvl == NULL));
-    //is_same &= ((before->issueIdle == NULL) == (after->issueIdle == NULL));
-    //is_same &= ((before->terminal_length == NULL) == (after->terminal_length == NULL));
     //is_same &= ((before->rank_tbl == NULL) == (after->rank_tbl == NULL));
-    //is_same &= ((before->last_buf_full == NULL) == (after->last_buf_full == NULL));
-    //is_same &= ((before->busy_time == NULL) == (after->busy_time == NULL));
-    //is_same &= ((before->link_traffic == NULL) == (after->link_traffic == NULL));
-    //is_same &= ((before->total_chunks == NULL) == (after->total_chunks == NULL));
-    //is_same &= ((before->stalled_chunks == NULL) == (after->stalled_chunks == NULL));
-    //is_same &= ((before->busy_time_sample == NULL) == (after->busy_time_sample == NULL));
-    //is_same &= ((before->sample_stat == NULL) == (after->sample_stat == NULL));
-    //is_same &= ((before->busy_time_ross_sample == NULL) == (after->busy_time_ross_sample == NULL));
-    //is_same &= ((before->predictor_data == NULL) == (after->predictor_data == NULL));
-    is_same &= ((before->frozen_state == NULL) && (after->frozen_state == NULL));
 
     return is_same;
 }
@@ -7024,13 +7144,13 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     fprintf(out, "%s  |               packet_fin = %d\n", prefix, state->packet_fin);
     fprintf(out, "%s  |           total_gen_size = %d\n", prefix, state->total_gen_size);
 
-    fprintf(out, "%s  | *              router_lp[%d] = [", prefix, state->params->num_rails);
+    fprintf(out, "%s  | *          router_lp[%d] = [", prefix, state->params->num_rails);
     for (int i=0; i<state->params->num_rails; i++) {
         fprintf(out, "%s%lu", i ? ", " : "", state->router_lp[i]);
     }
     fprintf(out, "]\n");
 
-    fprintf(out, "%s  | *              router_id[%d] = [", prefix, state->params->num_rails);
+    fprintf(out, "%s  | *          router_id[%d] = [", prefix, state->params->num_rails);
     for (int i=0; i<state->params->num_rails; i++) {
         fprintf(out, "%s%u", i ? ", " : "", state->router_id[i]);
     }
@@ -7038,22 +7158,101 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
 
     fprintf(out, "%s  |              terminal_id = %u\n", prefix, state->terminal_id);
     fprintf(out, "%s  |                  connMan = <DragonflyConnectionManager object>\n", prefix);
+
+    char addprefix[] = "  | ";
+    int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1;
+    char * subprefix = (char *) malloc(len_subprefix * sizeof(char));
     fprintf(out, "%s  | *local_congestion_controller = %p\n", prefix, state->local_congestion_controller);
+    if (state->local_congestion_controller != NULL) {
+        print_tlc_state(out, subprefix, state->local_congestion_controller);
+    }
+    free(subprefix);
+
     fprintf(out, "%s  |  workloads_finished_flag = %d\n", prefix, state->workloads_finished_flag);
-    fprintf(out, "%s  | **          vc_occupancy = %p\n", prefix, state->vc_occupancy);
-    fprintf(out, "%s  | *terminal_available_time = %p\n", prefix, state->terminal_available_time);
+
+    fprintf(out, "%s  | **  vc_occupancy[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels);
+    for (int i=0; i<state->params->num_rails; i++) {
+        fprintf(out, "%s  |        rail %d: [", prefix, i);
+        for (int j=0; j<state->params->num_qos_levels; j++) {
+            fprintf(out, "%s%d", j ? ", " : "", state->vc_occupancy[i][j]);
+        }
+        fprintf(out, "]\n");
+    }
+    fprintf(out, "%s  |     ]\n", prefix);
+
+    fprintf(out, "%s  | *terminal_available_time[%d] = [", prefix, state->params->num_rails);
+    for (int i=0; i<state->params->num_rails; i++) {
+        fprintf(out, "%s%g", i ? ", " : "", state->terminal_available_time[i]);
+    }
+    fprintf(out, "]\n");
+
     fprintf(out, "%s  | ***        terminal_msgs = %p\n", prefix, state->terminal_msgs);
     fprintf(out, "%s  | ***   terminal_msgs_tail = %p\n", prefix, state->terminal_msgs_tail);
-    fprintf(out, "%s  | *           in_send_loop = %p\n", prefix, state->in_send_loop);
-    fprintf(out, "%s  |    dragonfly_stats_array = <mn_stats array>\n", prefix);
-    fprintf(out, "%s  | **            qos_status = %p\n", prefix, state->qos_status);
-    fprintf(out, "%s  | **              qos_data = %p\n", prefix, state->qos_data);
-    fprintf(out, "%s  | *           last_qos_lvl = %p\n", prefix, state->last_qos_lvl);
+
+    fprintf(out, "%s  | *       in_send_loop[%d] = [", prefix, state->params->num_rails);
+    for (int i=0; i<state->params->num_rails; i++) {
+        fprintf(out, "%s%d", i ? ", " : "", state->in_send_loop[i]);
+    }
+    fprintf(out, "]\n");
+
+    char addprefix_2[] = "  |    | ";
+    len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1;
+    subprefix = (char *) malloc(len_subprefix * sizeof(char));
+    snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2);
+    fprintf(out, "%s  |    dragonfly_stats_array = [\n", prefix);
+    for (int i = 0; i < CATEGORY_MAX; i++) {
+        fprintf(out, "%s  |    %d:\n", prefix, i);
+        print_mn_stats(out, subprefix, &state->dragonfly_stats_array[i]);
+    }
+    fprintf(out, "%s  |    ]\n", prefix);
+    free(subprefix);
+
+    fprintf(out, "%s  | **      qos_status[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels);
+    for (int i=0; i<state->params->num_rails; i++) {
+        fprintf(out, "%s  |          rail %d: [", prefix, i);
+        for (int j=0; j<state->params->num_qos_levels; j++) {
+            fprintf(out, "%s%d", j ? ", " : "", state->qos_status[i][j]);
+        }
+        fprintf(out, "]\n");
+    }
+    fprintf(out, "%s  |       ]\n", prefix);
+
+    fprintf(out, "%s  | **        qos_data[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels);
+    for (int i=0; i<state->params->num_rails; i++) {
+        fprintf(out, "%s  |            rail %d: [", prefix, i);
+        for (int j=0; j<state->params->num_qos_levels; j++) {
+            fprintf(out, "%s%d", j ? ", " : "", state->qos_data[i][j]);
+        }
+        fprintf(out, "]\n");
+    }
+    fprintf(out, "%s  |         ]\n", prefix);
+
+    fprintf(out, "%s  | *        last_qos_lvl[%d] = [", prefix, state->params->num_rails);
+    for (int i=0; i<state->params->num_rails; i++) {
+        fprintf(out, "%s%d", i ? ", " : "", state->last_qos_lvl[i]);
+    }
+    fprintf(out, "]\n");
+
     fprintf(out, "%s  |         is_monitoring_bw = %d\n", prefix, state->is_monitoring_bw);
     fprintf(out, "%s  | *                     st = %p\n", prefix, state->st);
     fprintf(out, "%s  | *                  cc_st = %p\n", prefix, state->cc_st);
-    fprintf(out, "%s  | *              issueIdle = %p\n", prefix, state->issueIdle);
-    fprintf(out, "%s  | **       terminal_length = %p\n", prefix, state->terminal_length);
+
+    fprintf(out, "%s  | *           issueIdle[%d] = [", prefix, state->params->num_rails);
+    for (int i=0; i<state->params->num_rails; i++) {
+        fprintf(out, "%s%d", i ? ", " : "", state->issueIdle[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | ** terminal_length[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels);
+    for (int i=0; i<state->params->num_rails; i++) {
+        fprintf(out, "%s  |       rail %d: [", prefix, i);
+        for (int j=0; j<state->params->num_qos_levels; j++) {
+            fprintf(out, "%s%d", j ? ", " : "", state->terminal_length[i][j]);
+        }
+        fprintf(out, "]\n");
+    }
+    fprintf(out, "%s  |    ]\n", prefix);
+
     fprintf(out, "%s  | *                   anno = %s\n", prefix, state->anno ? state->anno : "(nil)");
     fprintf(out, "%s  | *                 params = %p\n", prefix, state->params);
     fprintf(out, "%s  | *               rank_tbl = %p\n", prefix, state->rank_tbl);
@@ -7064,11 +7263,37 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     fprintf(out, "%s  |            finished_msgs = %ld\n", prefix, state->finished_msgs);
     fprintf(out, "%s  |          finished_chunks = %ld\n", prefix, state->finished_chunks);
     fprintf(out, "%s  |         finished_packets = %ld\n", prefix, state->finished_packets);
-    fprintf(out, "%s  | *          last_buf_full = %p\n", prefix, state->last_buf_full);
-    fprintf(out, "%s  | *              busy_time = %p\n", prefix, state->busy_time);
-    fprintf(out, "%s  | *           link_traffic = %p\n", prefix, state->link_traffic);
-    fprintf(out, "%s  | *           total_chunks = %p\n", prefix, state->total_chunks);
-    fprintf(out, "%s  | *         stalled_chunks = %p\n", prefix, state->stalled_chunks);
+
+    fprintf(out, "%s  | *       last_buf_full[%d] = [", prefix, state->params->num_rails);
+    for (int i=0; i<state->params->num_rails; i++) {
+        fprintf(out, "%s%g", i ? ", " : "", state->last_buf_full[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | *           busy_time[%d] = [", prefix, state->params->num_rails);
+    for (int i=0; i<state->params->num_rails; i++) {
+        fprintf(out, "%s%g", i ? ", " : "", state->busy_time[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | *        link_traffic[%d] = [", prefix, state->params->radix);
+    for (int i=0; i<state->params->radix; i++) {
+        fprintf(out, "%s%lu", i ? ", " : "", state->link_traffic[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | *        total_chunks[%d] = [", prefix, state->params->num_rails);
+    for (int i=0; i<state->params->num_rails; i++) {
+        fprintf(out, "%s%lu", i ? ", " : "", state->total_chunks[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | *      stalled_chunks[%d] = [", prefix, state->params->num_rails);
+    for (int i=0; i<state->params->num_rails; i++) {
+        fprintf(out, "%s%lu", i ? ", " : "", state->stalled_chunks[i]);
+    }
+    fprintf(out, "]\n");
+
     fprintf(out, "%s  |          injected_chunks = %lu\n", prefix, state->injected_chunks);
     fprintf(out, "%s  |           ejected_chunks = %lu\n", prefix, state->ejected_chunks);
     fprintf(out, "%s  |              max_latency = %g\n", prefix, state->max_latency);
@@ -7092,13 +7317,31 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     fprintf(out, "%s  | fin_chunks_time_ross_sample = %g\n", prefix, state->fin_chunks_time_ross_sample);
     fprintf(out, "%s  | *  busy_time_ross_sample = %p\n", prefix, state->busy_time_ross_sample);  // ingnoring as this part of the code is never used. Originally part of instrumentation
     fprintf(out, "%s  |              ross_sample = <dfly_cn_sample object>\n", prefix);  // ingnoring as this part of the code is never used. Originally part of instrumentation
+
+    // modified outside of process and reverse computation (at commit and at surrogate change)
     fprintf(out, "%s  |             sent_packets = <map object>\n", prefix);
+
     fprintf(out, "%s  |      last_packet_sent_id = %ld\n", prefix, state->last_packet_sent_id);
     fprintf(out, "%s  |   arrival_of_last_packet = {packet_ID: %ld, travel_end_time: %g}\n", prefix, state->arrival_of_last_packet.packet_ID, state->arrival_of_last_packet.travel_end_time);
-    fprintf(out, "%s  |     remaining_sz_packets = <map object>\n", prefix);
+
+    fprintf(out, "%s  |     remaining_sz_packets = {\n", prefix);
+    std::map<struct packet_id, uint32_t>::iterator it_map;
+    for (it_map = state->remaining_sz_packets.begin(); it_map != state->remaining_sz_packets.end(); ++it_map) {
+        fprintf(out, "%s  |         {packet_ID: %lu, dfdally_src_terminal_id: %u} -> %d,\n", prefix, it_map->first.packet_ID, it_map->first.dfdally_src_terminal_id, it_map->second);
+
+    }
+    fprintf(out, "%s  |     }\n", prefix);
+
     fprintf(out, "%s  |       last_in_queue_time = %g\n", prefix, state->last_in_queue_time);
     fprintf(out, "%s  | *         predictor_data = %p\n", prefix, state->predictor_data);
-    fprintf(out, "%s  |                  zombies = <set object>\n", prefix);
+
+    fprintf(out, "%s  |                  zombies = [\n", prefix);
+    std::set<struct packet_id>::iterator it;
+    for (it = state->zombies.begin(); it != state->zombies.end(); ++it) {
+        fprintf(out, "%s  |                    {packet_ID: %lu, dfdally_src_terminal_id: %u},\n", prefix, it->packet_ID, it->dfdally_src_terminal_id);
+    }
+    fprintf(out, "%s  |                  ]\n", prefix);
+
     fprintf(out, "%s  | *           frozen_state = %p\n", prefix, state->frozen_state);
 }
 
diff --git a/src/util/congestion-controller.C b/src/util/congestion-controller.C
index a0dab10c..d8ff1a1a 100644
--- a/src/util/congestion-controller.C
+++ b/src/util/congestion-controller.C
@@ -906,6 +906,59 @@ static double calculate_bandwidth_usage_percent(int bytes_transmitted, double ma
     return percent_bw;
 }
 
+void save_tlc_state(tlc_state * into, tlc_state const * from) {
+    memcpy(into, from, sizeof(tlc_state));
+    into->ejected_rate_windows = (double*) malloc(cc_bandwidth_rolling_window_count * sizeof(double));
+    for (int i = 0; i < cc_bandwidth_rolling_window_count; i++) {
+        into->ejected_rate_windows[i] = from->ejected_rate_windows[i];
+    }
+}
+
+void clean_tlc_state(tlc_state * state) {
+    free(state->ejected_rate_windows);
+}
+
+bool check_tlc_state(tlc_state * before, tlc_state * after) {
+    bool is_same = true;
+
+    is_same &= before->terminal_id == after->terminal_id;
+    is_same &= before->app_id == after->app_id;
+    is_same &= before->abatement_signal_count == after->abatement_signal_count;
+    is_same &= before->window_epoch == after->window_epoch;
+    is_same &= before->ejected_packet_bytes == after->ejected_packet_bytes;
+
+    for (int i = 0; i < cc_bandwidth_rolling_window_count; i++) {
+        is_same &= before->ejected_rate_windows[i] == after->ejected_rate_windows[i];
+    }
+
+    is_same &= before->cur_average_rate == after->cur_average_rate;
+    is_same &= before->is_abatement_active == after->is_abatement_active;
+    is_same &= *before->workloads_finished_flag_ptr == *after->workloads_finished_flag_ptr;
+    is_same &= before->current_injection_bandwidth_coef == after->current_injection_bandwidth_coef;
+
+    return is_same;
+}
+
+void print_tlc_state(FILE * out, char const * prefix, tlc_state * state) {
+    fprintf(out, "%s tlc_state ->\n", prefix);
+    fprintf(out, "%s  | terminal_id = %d\n", prefix, state->terminal_id);
+    fprintf(out, "%s  | app_id = %d\n", prefix, state->app_id);
+    fprintf(out, "%s  | abatement_signal_count = %d\n", prefix, state->abatement_signal_count);
+    fprintf(out, "%s  | window_epoch = %ud\n", prefix, state->window_epoch);
+    fprintf(out, "%s  | ejected_packet_bytes = %ud\n", prefix, state->ejected_packet_bytes);
+
+    fprintf(out, "%s  | ejected_rate_windows[%d] = [", prefix, cc_bandwidth_rolling_window_count);
+    for (int i = 0; i < cc_bandwidth_rolling_window_count; i++) {
+        fprintf(out, "%g%s", state->ejected_rate_windows[i], i == cc_bandwidth_rolling_window_count - 1 ? "" : ", ");
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | cur_average_rate = %g\n", prefix, state->cur_average_rate);
+    fprintf(out, "%s  | is_abatement_active = %d\n", prefix, state->is_abatement_active);
+    fprintf(out, "%s  | workloads_finished_flag_ptr = %d\n", prefix, *state->workloads_finished_flag_ptr);
+    fprintf(out, "%s  | current_injection_bandwidth_coef = %g\n", prefix, state->current_injection_bandwidth_coef);
+}
+
 void cc_terminal_process_bandwidth_check(tlc_state *s, congestion_control_message *msg, tw_lp *lp)
 {
     double usage_percent = calculate_bandwidth_usage_percent(s->ejected_packet_bytes, s->params->terminal_configured_bandwidth, 1); //multiplier for multiple rails but right now we're just using 1

From 0898c37d3fb962ad35766dd555a72c25c71bd9b9 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 10 Mar 2025 12:30:00 -0400
Subject: [PATCH 115/188] Fixing reversibility bug in terminal_state
 (dragonfly-dally)

---
 src/networks/model-net/dragonfly-dally.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 409f063a..cba515bb 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -4468,7 +4468,7 @@ static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_dally_messag
     if(msg->qos_reset2)
         s->qos_status[msg->rail_id][1] = Q_ACTIVE;
     
-    if(msg->last_saved_qos)
+    if(msg->last_saved_qos >= 0)
         s->last_qos_lvl[msg->rail_id] = msg->last_saved_qos;
 
     if(bf->c1) {

From d3d76217aa0c314a7226e1f8a86bd90adea913ea Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 11 Mar 2025 19:16:13 -0400
Subject: [PATCH 116/188] Commenting what has is left to be implemented to
 fully deep-copy `struct terminal_state`

---
 src/networks/model-net/dragonfly-dally.C | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index cba515bb..63e1080d 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -5580,6 +5580,7 @@ static void dragonfly_dally_terminal_final( terminal_state * s,
     for(int i = 0; i < s->params->num_rails; i++)
     {
         free(s->vc_occupancy[i]);
+        // TODO: terminal_msgs are not properly freed if there are messages left. Correct this!
         free(s->terminal_msgs[i]);
         free(s->terminal_msgs_tail[i]);
     }
@@ -6931,6 +6932,10 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from
         warn_incomplete_definition_terminal_state_check = true;
     }
 
+    // Missing deep-clone/comparison/print members. These members are always accessed, so it is possible to discover some bugs if we print their contents
+    // from->terminal_msgs
+    // from->rank_tbl
+
     // These should be deep-cloned/compared/printed if we want to run the functionality they are activated at
     // from->predictor_data
     // from->sample_stat
@@ -6955,7 +6960,6 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from
     into->stalled_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t));
     into->total_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t));
     into->busy_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime));
-    //into->terminal_msgs = (terminal_dally_message_list***) malloc(num_rails * sizeof(terminal_dally_message_list**));
 
     for(int i = 0; i < num_rails; i++) {
         into->vc_occupancy[i] = (int*) malloc(num_qos_levels * sizeof(int));
@@ -7129,10 +7133,6 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after)
     is_same &= before->remaining_sz_packets == after->remaining_sz_packets;
     is_same &= before->zombies == after->zombies;
 
-    // Compare pointers (just checking if they're both NULL or both non-NULL)
-    //is_same &= ((before->terminal_msgs == NULL) == (after->terminal_msgs == NULL));
-    //is_same &= ((before->rank_tbl == NULL) == (after->rank_tbl == NULL));
-
     return is_same;
 }
 

From 41680da53bc38389644a1cec41e433f1f6642ee0 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 12 Mar 2025 14:23:57 -0400
Subject: [PATCH 117/188] Implementing deep-copy of member terminal_msgs in
 terminal_state

---
 codes/net/dragonfly-dally.h                   |   1 +
 .../model-net/core/model-net-sched-impl.c     |   2 +
 src/networks/model-net/dragonfly-dally.C      | 202 +++++++++++++++++-
 3 files changed, 197 insertions(+), 8 deletions(-)

diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index 2647c4df..504446b0 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -137,6 +137,7 @@ struct terminal_dally_message
 };
 
 void print_terminal_dally_message(FILE * out, char const * prefix, void * s, struct terminal_dally_message * msg);
+bool check_terminal_dally_message(struct terminal_dally_message * before, struct terminal_dally_message * after);
 
 #ifdef __cplusplus
 }
diff --git a/src/networks/model-net/core/model-net-sched-impl.c b/src/networks/model-net/core/model-net-sched-impl.c
index a3ff4fde..ffe71d7a 100644
--- a/src/networks/model-net/core/model-net-sched-impl.c
+++ b/src/networks/model-net/core/model-net-sched-impl.c
@@ -389,6 +389,8 @@ static void save_mn_sched_qitem(mn_sched_qitem * into, mn_sched_qitem const * fr
     into->req = from->req;
     into->sched_params = from->sched_params;
     into->rem = from->rem;
+    into->remote_event = NULL;
+    into->local_event = NULL;
     if (from->remote_event != NULL) {
         assert(from->req.remote_event_size > 0);
         into->remote_event = malloc(from->req.remote_event_size);
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 63e1080d..4ccc80e0 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -497,7 +497,7 @@ bool operator<(struct packet_id const &lk, struct packet_id const &rk) {
     return lk.packet_ID == rk.packet_ID ? lk.dfdally_src_terminal_id < rk.dfdally_src_terminal_id : lk.packet_ID < rk.packet_ID;
 }
 bool operator==(struct packet_id const &lk, struct packet_id const &rk) {
-    return lk.packet_ID == rk.packet_ID && lk.dfdally_src_terminal_id < rk.dfdally_src_terminal_id;
+    return lk.packet_ID == rk.packet_ID && lk.dfdally_src_terminal_id == rk.dfdally_src_terminal_id;
 }
 // Some more function declarations
 static void notify_dest_lp_of(terminal_state * s, tw_lp * lp, terminal_dally_message * msg, enum notify_t notification);
@@ -1621,6 +1621,119 @@ static terminal_dally_message_list* return_tail(
     return tail;
 }
 
+// Copies a list and returns the tail
+static terminal_dally_message_list * copy_terminal_dally_message_list(terminal_dally_message_list ** into_thisq, terminal_dally_message_list const * from_thisq) {
+    if (from_thisq == NULL) {
+        *into_thisq = NULL;
+        return NULL;
+    }
+
+    terminal_dally_message_list const * from_head = from_thisq;
+    terminal_dally_message_list * prev = NULL;
+    while(from_head != NULL) {
+        terminal_dally_message_list * copy_head = (terminal_dally_message_list *) malloc(sizeof(terminal_dally_message_list));
+
+        //copy_head->msg = from_head->msg;
+        memcpy(copy_head, from_head, sizeof(terminal_dally_message_list));
+        copy_head->prev = prev;
+
+        if (from_head->event_data != NULL) {
+            int const message_size = from_head->msg.remote_event_size_bytes + from_head->msg.local_event_size_bytes;
+            assert(message_size > 0);
+            copy_head->event_data = (char *) malloc(message_size);
+            memcpy(copy_head->event_data, from_head->event_data, message_size);
+        }
+
+        if (prev == NULL) {
+            *into_thisq = copy_head;
+        } else {
+            prev->next = copy_head;
+        }
+
+        prev = copy_head;
+        from_head = from_head->next;
+    }
+    prev->next = NULL;
+
+    return prev;
+}
+
+static void clean_terminal_dally_message_list(terminal_dally_message_list * thisq) {
+    if (thisq == NULL) {
+        return;
+    }
+
+    terminal_dally_message_list * prev = thisq;
+    terminal_dally_message_list * head = prev->next;
+    free(prev->event_data);
+    while (head != NULL) {
+        free(head->event_data);
+        free(prev);
+        prev = head;
+        head = head->next;
+    }
+    free(prev);
+}
+
+static bool check_terminal_dally_message_list(terminal_dally_message_list * before, terminal_dally_message_list * after) {
+    bool is_same = true;
+
+    terminal_dally_message_list * head_before = before;
+    terminal_dally_message_list * head_after = after;
+    while (head_before != NULL && head_after != NULL) {
+        is_same &= check_terminal_dally_message(&head_before->msg, &head_after->msg);
+        is_same &= (head_before->event_data == NULL) == (head_after->event_data == NULL);
+
+        int const message_size = head_before->msg.remote_event_size_bytes + head_before->msg.local_event_size_bytes;
+        int const message_size_after = head_after->msg.remote_event_size_bytes + head_after->msg.local_event_size_bytes;
+        is_same &= message_size == message_size_after;
+
+        if (is_same && head_before->event_data != NULL) {
+            assert(message_size > 0);
+
+            is_same &= !memcmp(head_before->event_data, head_after->event_data, message_size);
+        }
+
+        head_before = head_before->next;
+        head_after = head_after->next;
+    }
+
+    if (head_before != NULL || head_after != NULL) {
+        is_same = false; // at least one of them is longer than the other
+    }
+
+    return is_same;
+}
+
+static void print_terminal_dally_message_list(FILE * out, char const * prefix, terminal_state * ns, terminal_dally_message_list * thisq) {
+    if (thisq == NULL) {
+        return;
+    }
+
+    char addprefix_2[] = " | | ";
+    int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1;
+    char * subprefix = (char *) malloc(len_subprefix * sizeof(char));
+    snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2);
+
+    terminal_dally_message_list * head = thisq;
+    while (head != NULL) {
+        fprintf(out, "%s{\n", prefix);
+        fprintf(out, "%s | msg:\n", prefix);
+        print_terminal_dally_message(out, subprefix, ns, &head->msg);
+        fprintf(out, "%s | event_data = %p\n", prefix, head->event_data);
+        int const message_size = head->msg.remote_event_size_bytes + head->msg.local_event_size_bytes;
+        if (head->event_data != NULL) {
+            assert(message_size > 0);
+            tw_fprint_binary_array(out, subprefix, head->event_data, message_size);
+        }
+        fprintf(out, "%s},\n", prefix);
+        head = head->next;
+    }
+
+    free(subprefix);
+}
+
+
 static tw_stime* buff_time_storage_create(terminal_state *s)
 {
     tw_stime* storage = (tw_stime*)malloc(s->params->num_rails * sizeof(tw_stime));
@@ -6933,7 +7046,6 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from
     }
 
     // Missing deep-clone/comparison/print members. These members are always accessed, so it is possible to discover some bugs if we print their contents
-    // from->terminal_msgs
     // from->rank_tbl
 
     // These should be deep-cloned/compared/printed if we want to run the functionality they are activated at
@@ -6960,17 +7072,20 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from
     into->stalled_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t));
     into->total_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t));
     into->busy_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime));
+    into->terminal_msgs = (terminal_dally_message_list***) malloc(num_rails * sizeof(terminal_dally_message_list**));
 
     for(int i = 0; i < num_rails; i++) {
         into->vc_occupancy[i] = (int*) malloc(num_qos_levels * sizeof(int));
         into->terminal_length[i] = (int*) malloc(num_qos_levels * sizeof(int));
         into->qos_status[i] = (int*) malloc(num_qos_levels * sizeof(int));
         into->qos_data[i] = (int*) malloc(num_qos_levels * sizeof(int));
+        into->terminal_msgs[i] = (terminal_dally_message_list**) malloc(num_qos_levels * sizeof(terminal_dally_message_list*));
         for (int j = 0; j<num_qos_levels; j++) {
             into->vc_occupancy[i][j] = from->vc_occupancy[i][j];
             into->terminal_length[i][j] = from->terminal_length[i][j];
             into->qos_data[i][j] = from->qos_data[i][j];
             into->qos_status[i][j] = from->qos_status[i][j];
+            copy_terminal_dally_message_list(&into->terminal_msgs[i][j], from->terminal_msgs[i][j]);
         }
         into->last_buf_full[i] = from->last_buf_full[i];
         into->in_send_loop[i] = from->in_send_loop[i];
@@ -7002,6 +7117,7 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from
 static void clean_terminal_state(terminal_state *state) {
     dragonfly_param const * p = state->params;
     int const num_rails = p->num_rails;
+    int const num_qos_levels = p->num_qos_levels;
 
     // Free all allocated memory
     for (int i = 0; i < num_rails; i++) {
@@ -7009,6 +7125,10 @@ static void clean_terminal_state(terminal_state *state) {
         free(state->terminal_length[i]);
         free(state->qos_status[i]);
         free(state->qos_data[i]);
+        for (int j = 0; j<num_qos_levels; j++) {
+            clean_terminal_dally_message_list(state->terminal_msgs[i][j]);
+        }
+        free(state->terminal_msgs[i]);
     }
 
     free(state->vc_occupancy);
@@ -7024,13 +7144,13 @@ static void clean_terminal_state(terminal_state *state) {
     free(state->total_chunks);
     free(state->busy_time);
     free(state->link_traffic);
+    free(state->terminal_msgs);
 
     if (state->local_congestion_controller != NULL) {
         clean_tlc_state(state->local_congestion_controller);
         free(state->local_congestion_controller);
     }
 
-    // Finish cleaning (free memory), and check and print!!
     state->remaining_sz_packets.~map();
     state->zombies.~set();
 }
@@ -7105,6 +7225,7 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after)
             is_same &= (before->terminal_length[i][j] == after->terminal_length[i][j]);
             is_same &= (before->qos_status[i][j] == after->qos_status[i][j]);
             is_same &= (before->qos_data[i][j] == after->qos_data[i][j]);
+            is_same &= check_terminal_dally_message_list(before->terminal_msgs[i][j], after->terminal_msgs[i][j]);
         }
 
         is_same &= (before->last_buf_full[i] == after->last_buf_full[i]);
@@ -7186,7 +7307,21 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     }
     fprintf(out, "]\n");
 
-    fprintf(out, "%s  | ***        terminal_msgs = %p\n", prefix, state->terminal_msgs);
+    char addprefix_2[] = "  |    |  | ";
+    len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1;
+    subprefix = (char *) malloc(len_subprefix * sizeof(char));
+    snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2);
+    fprintf(out, "%s  | ***        terminal_msgs[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels);
+    for (int i=0; i<state->params->num_rails; i++) {
+        fprintf(out, "%s  |   rail %d: [\n", prefix, i);
+        for (int j=0; j<state->params->num_qos_levels; j++) {
+            fprintf(out, "%s  |    | qos level %d\n", prefix, j);
+            print_terminal_dally_message_list(out, subprefix, state, state->terminal_msgs[i][j]);
+        }
+    }
+    fprintf(out, "%s  | ]\n", prefix);
+    free(subprefix);
+
     fprintf(out, "%s  | ***   terminal_msgs_tail = %p\n", prefix, state->terminal_msgs_tail);
 
     fprintf(out, "%s  | *       in_send_loop[%d] = [", prefix, state->params->num_rails);
@@ -7195,10 +7330,10 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     }
     fprintf(out, "]\n");
 
-    char addprefix_2[] = "  |    | ";
-    len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1;
+    char addprefix_3[] = "  |    | ";
+    len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_3) + 1;
     subprefix = (char *) malloc(len_subprefix * sizeof(char));
-    snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2);
+    snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_3);
     fprintf(out, "%s  |    dragonfly_stats_array = [\n", prefix);
     for (int i = 0; i < CATEGORY_MAX; i++) {
         fprintf(out, "%s  |    %d:\n", prefix, i);
@@ -7328,7 +7463,6 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     std::map<struct packet_id, uint32_t>::iterator it_map;
     for (it_map = state->remaining_sz_packets.begin(); it_map != state->remaining_sz_packets.end(); ++it_map) {
         fprintf(out, "%s  |         {packet_ID: %lu, dfdally_src_terminal_id: %u} -> %d,\n", prefix, it_map->first.packet_ID, it_map->first.dfdally_src_terminal_id, it_map->second);
-
     }
     fprintf(out, "%s  |     }\n", prefix);
 
@@ -7365,6 +7499,58 @@ char const * const string_event_t(enum event_t type) {
     }
 }
 
+// Built with help of Claude
+bool check_terminal_dally_message(struct terminal_dally_message * before, struct terminal_dally_message * after) {
+    bool is_same = true;
+
+    // Compare all fields
+    is_same &= before->magic == after->magic;
+    is_same &= before->travel_start_time == after->travel_start_time;
+    is_same &= before->travel_end_time == after->travel_end_time;
+    is_same &= before->packet_ID == after->packet_ID;
+    is_same &= before->type == after->type;
+    is_same &= before->notify_type == after->notify_type;
+    is_same &= strncmp(before->category, after->category, CATEGORY_NAME_MAX) == 0;
+    is_same &= before->final_dest_gid == after->final_dest_gid;
+    is_same &= before->sender_lp == after->sender_lp;
+    is_same &= before->sender_mn_lp == after->sender_mn_lp;
+    is_same &= before->dest_terminal_lpid == after->dest_terminal_lpid;
+    is_same &= before->dfdally_src_terminal_id == after->dfdally_src_terminal_id;
+    is_same &= before->dfdally_dest_terminal_id == after->dfdally_dest_terminal_id;
+    is_same &= before->src_terminal_id == after->src_terminal_id;
+    is_same &= before->origin_router_id == after->origin_router_id;
+    is_same &= before->app_id == after->app_id;
+    is_same &= before->my_N_hop == after->my_N_hop;
+    is_same &= before->my_l_hop == after->my_l_hop;
+    is_same &= before->my_g_hop == after->my_g_hop;
+    is_same &= before->my_hops_cur_group == after->my_hops_cur_group;
+    is_same &= before->next_stop == after->next_stop;
+    is_same &= before->this_router_arrival == after->this_router_arrival;
+    is_same &= before->this_router_ptp_latency == after->this_router_ptp_latency;
+    is_same &= before->intm_lp_id == after->intm_lp_id;
+    is_same &= before->last_hop == after->last_hop;
+    is_same &= before->is_intm_visited == after->is_intm_visited;
+    is_same &= before->intm_rtr_id == after->intm_rtr_id;
+    is_same &= before->intm_grp_id == after->intm_grp_id;
+    is_same &= before->chunk_id == after->chunk_id;
+    is_same &= before->packet_size == after->packet_size;
+    is_same &= before->message_id == after->message_id;
+    is_same &= before->total_size == after->total_size;
+    is_same &= before->remote_event_size_bytes == after->remote_event_size_bytes;
+    is_same &= before->local_event_size_bytes == after->local_event_size_bytes;
+    is_same &= before->vc_index == after->vc_index;
+    is_same &= before->rail_id == after->rail_id;
+    is_same &= before->output_chan == after->output_chan;
+    is_same &= before->is_pull == after->is_pull;
+    is_same &= before->pull_size == after->pull_size;
+    is_same &= before->path_type == after->path_type;
+    is_same &= before->is_there_another_pckt_in_queue == after->is_there_another_pckt_in_queue;
+    is_same &= before->qos_reset1 == after->qos_reset1;
+    is_same &= before->qos_reset2 == after->qos_reset2;
+
+    return is_same;
+}
+
 // Print fuction originally constructed with help from Claude.ai
 void print_terminal_dally_message(FILE * out, char const * prefix, void * s, struct terminal_dally_message * msg) {
     //terminal_state * ns = (terminal_state *) s;

From f8c5163b1185eae2f5533231b353d77c4b019ebd Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 12 Mar 2025 14:33:26 -0400
Subject: [PATCH 118/188] Fixing copy of C++ non-initialized members

---
 src/networks/model-net/dragonfly-dally.C | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 4ccc80e0..321908c3 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -7108,9 +7108,26 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from
         save_tlc_state(into->local_congestion_controller, from->local_congestion_controller);
     }
 
-    // Magic deep-copy using C++ mechanisms (the values do not point to any pointers)
-    into->remaining_sz_packets = from->remaining_sz_packets;
-    into->zombies = from->zombies;
+    // I would use the C++ amgic to copy these containers but they don't work as well :S
+    new (&into->remaining_sz_packets) map<struct packet_id, uint32_t>();
+    new (&into->zombies) set<struct packet_id>();
+
+    // Sorry const, I promise not to change the state of remaining_sz_packets
+    map<struct packet_id, uint32_t> * from_remaining_sz_packets = (map<struct packet_id, uint32_t> *) &from->remaining_sz_packets;
+    set<struct packet_id> * from_zombies = (set<struct packet_id> *) &from->zombies;
+
+    std::map<struct packet_id, uint32_t>::iterator it_map;
+    for (it_map = from_remaining_sz_packets->begin(); it_map != from_remaining_sz_packets->end(); ++it_map) {
+        into->remaining_sz_packets[it_map->first] = it_map->second;
+    }
+
+    std::set<struct packet_id>::iterator it_set;
+    for (it_set = from_zombies->begin(); it_set != from_zombies->end(); ++it_set) {
+        struct packet_id const zombie = {
+            .packet_ID = it_set->packet_ID,
+            .dfdally_src_terminal_id = it_set->dfdally_src_terminal_id};
+        into->zombies.insert(zombie);
+    }
 }
 
 // Partially written by Claude

From 4a1819b431341482e5264052e9536a0bbda2e24b Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 12 Mar 2025 14:35:50 -0400
Subject: [PATCH 119/188] Some members of terminal_state are not be deep-copied
 in surrogate mode

---
 src/networks/model-net/dragonfly-dally.C | 381 +++++++++++++----------
 1 file changed, 208 insertions(+), 173 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 321908c3..6df7ac68 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -7060,46 +7060,48 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from
     int const num_qos_levels = p->num_qos_levels;
     int const num_rails = p->num_rails;
 
-    into->vc_occupancy = (int **) malloc(num_rails * sizeof(int*));
-    into->terminal_length = (int**) malloc(num_rails * sizeof(int*));
-    into->last_buf_full = (tw_stime*) malloc(num_rails * sizeof(tw_stime));
-    into->in_send_loop = (int*) malloc(num_rails * sizeof(int));
-    into->issueIdle = (int*) malloc(num_rails * sizeof(int));
-    into->qos_status = (int**) malloc(num_rails * sizeof(int*));
-    into->qos_data = (int**) malloc(num_rails * sizeof(int*));
-    into->last_qos_lvl = (int*) malloc(num_rails * sizeof(int));
-    into->terminal_available_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime));
-    into->stalled_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t));
-    into->total_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t));
-    into->busy_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime));
-    into->terminal_msgs = (terminal_dally_message_list***) malloc(num_rails * sizeof(terminal_dally_message_list**));
-
-    for(int i = 0; i < num_rails; i++) {
-        into->vc_occupancy[i] = (int*) malloc(num_qos_levels * sizeof(int));
-        into->terminal_length[i] = (int*) malloc(num_qos_levels * sizeof(int));
-        into->qos_status[i] = (int*) malloc(num_qos_levels * sizeof(int));
-        into->qos_data[i] = (int*) malloc(num_qos_levels * sizeof(int));
-        into->terminal_msgs[i] = (terminal_dally_message_list**) malloc(num_qos_levels * sizeof(terminal_dally_message_list*));
-        for (int j = 0; j<num_qos_levels; j++) {
-            into->vc_occupancy[i][j] = from->vc_occupancy[i][j];
-            into->terminal_length[i][j] = from->terminal_length[i][j];
-            into->qos_data[i][j] = from->qos_data[i][j];
-            into->qos_status[i][j] = from->qos_status[i][j];
-            copy_terminal_dally_message_list(&into->terminal_msgs[i][j], from->terminal_msgs[i][j]);
-        }
-        into->last_buf_full[i] = from->last_buf_full[i];
-        into->in_send_loop[i] = from->in_send_loop[i];
-        into->issueIdle[i] = from->issueIdle[i];
-        into->last_qos_lvl[i] = from->last_qos_lvl[i];
-        into->terminal_available_time[i] = from->terminal_available_time[i];
-        into->stalled_chunks[i] = from->stalled_chunks[i];
-        into->total_chunks[i] = from->total_chunks[i];
-        into->busy_time[i] = from->busy_time[i];
-    }
-
-    into->link_traffic = (uint64_t*) malloc(p->radix * sizeof(uint64_t));
-    for (int i = 0; i < p->radix; i++) {
-        into->link_traffic[i] = from->link_traffic[i];
+    if (!is_surrogate_on) {
+        into->vc_occupancy = (int **) malloc(num_rails * sizeof(int*));
+        into->terminal_length = (int**) malloc(num_rails * sizeof(int*));
+        into->last_buf_full = (tw_stime*) malloc(num_rails * sizeof(tw_stime));
+        into->in_send_loop = (int*) malloc(num_rails * sizeof(int));
+        into->issueIdle = (int*) malloc(num_rails * sizeof(int));
+        into->qos_status = (int**) malloc(num_rails * sizeof(int*));
+        into->qos_data = (int**) malloc(num_rails * sizeof(int*));
+        into->last_qos_lvl = (int*) malloc(num_rails * sizeof(int));
+        into->terminal_available_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime));
+        into->stalled_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t));
+        into->total_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t));
+        into->busy_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime));
+        into->terminal_msgs = (terminal_dally_message_list***) malloc(num_rails * sizeof(terminal_dally_message_list**));
+
+        for(int i = 0; i < num_rails; i++) {
+            into->vc_occupancy[i] = (int*) malloc(num_qos_levels * sizeof(int));
+            into->terminal_length[i] = (int*) malloc(num_qos_levels * sizeof(int));
+            into->qos_status[i] = (int*) malloc(num_qos_levels * sizeof(int));
+            into->qos_data[i] = (int*) malloc(num_qos_levels * sizeof(int));
+            into->terminal_msgs[i] = (terminal_dally_message_list**) malloc(num_qos_levels * sizeof(terminal_dally_message_list*));
+            for (int j = 0; j<num_qos_levels; j++) {
+                into->vc_occupancy[i][j] = from->vc_occupancy[i][j];
+                into->terminal_length[i][j] = from->terminal_length[i][j];
+                into->qos_data[i][j] = from->qos_data[i][j];
+                into->qos_status[i][j] = from->qos_status[i][j];
+                copy_terminal_dally_message_list(&into->terminal_msgs[i][j], from->terminal_msgs[i][j]);
+            }
+            into->last_buf_full[i] = from->last_buf_full[i];
+            into->in_send_loop[i] = from->in_send_loop[i];
+            into->issueIdle[i] = from->issueIdle[i];
+            into->last_qos_lvl[i] = from->last_qos_lvl[i];
+            into->terminal_available_time[i] = from->terminal_available_time[i];
+            into->stalled_chunks[i] = from->stalled_chunks[i];
+            into->total_chunks[i] = from->total_chunks[i];
+            into->busy_time[i] = from->busy_time[i];
+        }
+
+        into->link_traffic = (uint64_t*) malloc(p->radix * sizeof(uint64_t));
+        for (int i = 0; i < p->radix; i++) {
+            into->link_traffic[i] = from->link_traffic[i];
+        }
     }
 
     if (from->local_congestion_controller != NULL) {
@@ -7136,32 +7138,33 @@ static void clean_terminal_state(terminal_state *state) {
     int const num_rails = p->num_rails;
     int const num_qos_levels = p->num_qos_levels;
 
-    // Free all allocated memory
-    for (int i = 0; i < num_rails; i++) {
-        free(state->vc_occupancy[i]);
-        free(state->terminal_length[i]);
-        free(state->qos_status[i]);
-        free(state->qos_data[i]);
-        for (int j = 0; j<num_qos_levels; j++) {
-            clean_terminal_dally_message_list(state->terminal_msgs[i][j]);
-        }
-        free(state->terminal_msgs[i]);
-    }
-
-    free(state->vc_occupancy);
-    free(state->terminal_length);
-    free(state->last_buf_full);
-    free(state->in_send_loop);
-    free(state->issueIdle);
-    free(state->qos_status);
-    free(state->qos_data);
-    free(state->last_qos_lvl);
-    free(state->terminal_available_time);
-    free(state->stalled_chunks);
-    free(state->total_chunks);
-    free(state->busy_time);
-    free(state->link_traffic);
-    free(state->terminal_msgs);
+    if (!is_surrogate_on) {
+        for (int i = 0; i < num_rails; i++) {
+            free(state->vc_occupancy[i]);
+            free(state->terminal_length[i]);
+            free(state->qos_status[i]);
+            free(state->qos_data[i]);
+            for (int j = 0; j<num_qos_levels; j++) {
+                clean_terminal_dally_message_list(state->terminal_msgs[i][j]);
+            }
+            free(state->terminal_msgs[i]);
+        }
+
+        free(state->vc_occupancy);
+        free(state->terminal_length);
+        free(state->last_buf_full);
+        free(state->in_send_loop);
+        free(state->issueIdle);
+        free(state->qos_status);
+        free(state->qos_data);
+        free(state->last_qos_lvl);
+        free(state->terminal_available_time);
+        free(state->stalled_chunks);
+        free(state->total_chunks);
+        free(state->busy_time);
+        free(state->link_traffic);
+        free(state->terminal_msgs);
+    }
 
     if (state->local_congestion_controller != NULL) {
         clean_tlc_state(state->local_congestion_controller);
@@ -7232,31 +7235,33 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after)
         is_same &= (before->anno == after->anno);
     }
 
-    dragonfly_param const * p = before->params;
-    int const num_qos_levels = p->num_qos_levels;
-    int const num_rails = p->num_rails;
+    if (!is_surrogate_on) {
+        dragonfly_param const * p = before->params;
+        int const num_qos_levels = p->num_qos_levels;
+        int const num_rails = p->num_rails;
 
-    for (int i = 0; i < num_rails; i++) {
-        for (int j = 0; j < num_qos_levels; j++) {
-            is_same &= (before->vc_occupancy[i][j] == after->vc_occupancy[i][j]);
-            is_same &= (before->terminal_length[i][j] == after->terminal_length[i][j]);
-            is_same &= (before->qos_status[i][j] == after->qos_status[i][j]);
-            is_same &= (before->qos_data[i][j] == after->qos_data[i][j]);
-            is_same &= check_terminal_dally_message_list(before->terminal_msgs[i][j], after->terminal_msgs[i][j]);
-        }
+        for (int i = 0; i < num_rails; i++) {
+            for (int j = 0; j < num_qos_levels; j++) {
+                is_same &= (before->vc_occupancy[i][j] == after->vc_occupancy[i][j]);
+                is_same &= (before->terminal_length[i][j] == after->terminal_length[i][j]);
+                is_same &= (before->qos_status[i][j] == after->qos_status[i][j]);
+                is_same &= (before->qos_data[i][j] == after->qos_data[i][j]);
+                is_same &= check_terminal_dally_message_list(before->terminal_msgs[i][j], after->terminal_msgs[i][j]);
+            }
 
-        is_same &= (before->last_buf_full[i] == after->last_buf_full[i]);
-        is_same &= (before->in_send_loop[i] == after->in_send_loop[i]);
-        is_same &= (before->issueIdle[i] == after->issueIdle[i]);
-        is_same &= (before->last_qos_lvl[i] == after->last_qos_lvl[i]);
-        is_same &= (before->terminal_available_time[i] == after->terminal_available_time[i]);
-        is_same &= (before->stalled_chunks[i] == after->stalled_chunks[i]);
-        is_same &= (before->total_chunks[i] == after->total_chunks[i]);
-        is_same &= (before->busy_time[i] == after->busy_time[i]);
-    }
+            is_same &= (before->last_buf_full[i] == after->last_buf_full[i]);
+            is_same &= (before->in_send_loop[i] == after->in_send_loop[i]);
+            is_same &= (before->issueIdle[i] == after->issueIdle[i]);
+            is_same &= (before->last_qos_lvl[i] == after->last_qos_lvl[i]);
+            is_same &= (before->terminal_available_time[i] == after->terminal_available_time[i]);
+            is_same &= (before->stalled_chunks[i] == after->stalled_chunks[i]);
+            is_same &= (before->total_chunks[i] == after->total_chunks[i]);
+            is_same &= (before->busy_time[i] == after->busy_time[i]);
+        }
 
-    for (int i = 0; i < p->radix; i++) {
-        is_same &= (before->link_traffic[i] == after->link_traffic[i]);
+        for (int i = 0; i < p->radix; i++) {
+            is_same &= (before->link_traffic[i] == after->link_traffic[i]);
+        }
     }
 
     // Ignoring model statistics. In general, we don't care if there are errors in the statistics, as they are only approximate. The stastistics don't interferee with the state of the model. There is a bug within the statistics when rolbacking though. A parameters is never reversed properly
@@ -7308,44 +7313,54 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
 
     fprintf(out, "%s  |  workloads_finished_flag = %d\n", prefix, state->workloads_finished_flag);
 
-    fprintf(out, "%s  | **  vc_occupancy[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels);
-    for (int i=0; i<state->params->num_rails; i++) {
-        fprintf(out, "%s  |        rail %d: [", prefix, i);
-        for (int j=0; j<state->params->num_qos_levels; j++) {
-            fprintf(out, "%s%d", j ? ", " : "", state->vc_occupancy[i][j]);
+    if (is_surrogate_on) {
+        fprintf(out, "%s  | **          vc_occupancy = %p\n", prefix, state->vc_occupancy);
+        fprintf(out, "%s  | *terminal_available_time = %p\n", prefix, state->terminal_available_time);
+        fprintf(out, "%s  | ***        terminal_msgs = %p\n", prefix, state->terminal_msgs);
+    } else {
+        fprintf(out, "%s  | **  vc_occupancy[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels);
+        for (int i=0; i<state->params->num_rails; i++) {
+            fprintf(out, "%s  |        rail %d: [", prefix, i);
+            for (int j=0; j<state->params->num_qos_levels; j++) {
+                fprintf(out, "%s%d", j ? ", " : "", state->vc_occupancy[i][j]);
+            }
+            fprintf(out, "]\n");
         }
-        fprintf(out, "]\n");
-    }
-    fprintf(out, "%s  |     ]\n", prefix);
+        fprintf(out, "%s  |     ]\n", prefix);
 
-    fprintf(out, "%s  | *terminal_available_time[%d] = [", prefix, state->params->num_rails);
-    for (int i=0; i<state->params->num_rails; i++) {
-        fprintf(out, "%s%g", i ? ", " : "", state->terminal_available_time[i]);
-    }
-    fprintf(out, "]\n");
+        fprintf(out, "%s  | *terminal_available_time[%d] = [", prefix, state->params->num_rails);
+        for (int i=0; i<state->params->num_rails; i++) {
+            fprintf(out, "%s%g", i ? ", " : "", state->terminal_available_time[i]);
+        }
+        fprintf(out, "]\n");
 
-    char addprefix_2[] = "  |    |  | ";
-    len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1;
-    subprefix = (char *) malloc(len_subprefix * sizeof(char));
-    snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2);
-    fprintf(out, "%s  | ***        terminal_msgs[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels);
-    for (int i=0; i<state->params->num_rails; i++) {
-        fprintf(out, "%s  |   rail %d: [\n", prefix, i);
-        for (int j=0; j<state->params->num_qos_levels; j++) {
-            fprintf(out, "%s  |    | qos level %d\n", prefix, j);
-            print_terminal_dally_message_list(out, subprefix, state, state->terminal_msgs[i][j]);
+        char addprefix_2[] = "  |    |  | ";
+        len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1;
+        subprefix = (char *) malloc(len_subprefix * sizeof(char));
+        snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2);
+        fprintf(out, "%s  | ***        terminal_msgs[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels);
+        for (int i=0; i<state->params->num_rails; i++) {
+            fprintf(out, "%s  |   rail %d: [\n", prefix, i);
+            for (int j=0; j<state->params->num_qos_levels; j++) {
+                fprintf(out, "%s  |    | qos level %d\n", prefix, j);
+                print_terminal_dally_message_list(out, subprefix, state, state->terminal_msgs[i][j]);
+            }
         }
+        fprintf(out, "%s  | ]\n", prefix);
+        free(subprefix);
     }
-    fprintf(out, "%s  | ]\n", prefix);
-    free(subprefix);
 
     fprintf(out, "%s  | ***   terminal_msgs_tail = %p\n", prefix, state->terminal_msgs_tail);
 
-    fprintf(out, "%s  | *       in_send_loop[%d] = [", prefix, state->params->num_rails);
-    for (int i=0; i<state->params->num_rails; i++) {
-        fprintf(out, "%s%d", i ? ", " : "", state->in_send_loop[i]);
+    if (is_surrogate_on) {
+        fprintf(out, "%s  | *          in_send_loop = %p\n", prefix, state->in_send_loop);
+    } else {
+        fprintf(out, "%s  | *       in_send_loop[%d] = [", prefix, state->params->num_rails);
+        for (int i=0; i<state->params->num_rails; i++) {
+            fprintf(out, "%s%d", i ? ", " : "", state->in_send_loop[i]);
+        }
+        fprintf(out, "]\n");
     }
-    fprintf(out, "]\n");
 
     char addprefix_3[] = "  |    | ";
     len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_3) + 1;
@@ -7359,51 +7374,62 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     fprintf(out, "%s  |    ]\n", prefix);
     free(subprefix);
 
-    fprintf(out, "%s  | **      qos_status[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels);
-    for (int i=0; i<state->params->num_rails; i++) {
-        fprintf(out, "%s  |          rail %d: [", prefix, i);
-        for (int j=0; j<state->params->num_qos_levels; j++) {
-            fprintf(out, "%s%d", j ? ", " : "", state->qos_status[i][j]);
+    if (is_surrogate_on) {
+        fprintf(out, "%s  | **           qos_status = %p\n", prefix, state->qos_status);
+        fprintf(out, "%s  | **             qos_data = %p\n", prefix, state->qos_data);
+        fprintf(out, "%s  | *          last_qos_lvl = %p\n", prefix, state->last_qos_lvl);
+    } else {
+        fprintf(out, "%s  | **      qos_status[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels);
+        for (int i=0; i<state->params->num_rails; i++) {
+            fprintf(out, "%s  |          rail %d: [", prefix, i);
+            for (int j=0; j<state->params->num_qos_levels; j++) {
+                fprintf(out, "%s%d", j ? ", " : "", state->qos_status[i][j]);
+            }
+            fprintf(out, "]\n");
         }
-        fprintf(out, "]\n");
-    }
-    fprintf(out, "%s  |       ]\n", prefix);
+        fprintf(out, "%s  |       ]\n", prefix);
 
-    fprintf(out, "%s  | **        qos_data[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels);
-    for (int i=0; i<state->params->num_rails; i++) {
-        fprintf(out, "%s  |            rail %d: [", prefix, i);
-        for (int j=0; j<state->params->num_qos_levels; j++) {
-            fprintf(out, "%s%d", j ? ", " : "", state->qos_data[i][j]);
+        fprintf(out, "%s  | **        qos_data[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels);
+        for (int i=0; i<state->params->num_rails; i++) {
+            fprintf(out, "%s  |            rail %d: [", prefix, i);
+            for (int j=0; j<state->params->num_qos_levels; j++) {
+                fprintf(out, "%s%d", j ? ", " : "", state->qos_data[i][j]);
+            }
+            fprintf(out, "]\n");
+        }
+        fprintf(out, "%s  |         ]\n", prefix);
+
+        fprintf(out, "%s  | *        last_qos_lvl[%d] = [", prefix, state->params->num_rails);
+        for (int i=0; i<state->params->num_rails; i++) {
+            fprintf(out, "%s%d", i ? ", " : "", state->last_qos_lvl[i]);
         }
         fprintf(out, "]\n");
     }
-    fprintf(out, "%s  |         ]\n", prefix);
-
-    fprintf(out, "%s  | *        last_qos_lvl[%d] = [", prefix, state->params->num_rails);
-    for (int i=0; i<state->params->num_rails; i++) {
-        fprintf(out, "%s%d", i ? ", " : "", state->last_qos_lvl[i]);
-    }
-    fprintf(out, "]\n");
 
     fprintf(out, "%s  |         is_monitoring_bw = %d\n", prefix, state->is_monitoring_bw);
     fprintf(out, "%s  | *                     st = %p\n", prefix, state->st);
     fprintf(out, "%s  | *                  cc_st = %p\n", prefix, state->cc_st);
 
-    fprintf(out, "%s  | *           issueIdle[%d] = [", prefix, state->params->num_rails);
-    for (int i=0; i<state->params->num_rails; i++) {
-        fprintf(out, "%s%d", i ? ", " : "", state->issueIdle[i]);
-    }
-    fprintf(out, "]\n");
-
-    fprintf(out, "%s  | ** terminal_length[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels);
-    for (int i=0; i<state->params->num_rails; i++) {
-        fprintf(out, "%s  |       rail %d: [", prefix, i);
-        for (int j=0; j<state->params->num_qos_levels; j++) {
-            fprintf(out, "%s%d", j ? ", " : "", state->terminal_length[i][j]);
+    if (is_surrogate_on) {
+        fprintf(out, "%s  | *             issueIdle = %p\n", prefix, state->issueIdle);
+        fprintf(out, "%s  | **      terminal_length = %p\n", prefix, state->terminal_length);
+    } else {
+        fprintf(out, "%s  | *           issueIdle[%d] = [", prefix, state->params->num_rails);
+        for (int i=0; i<state->params->num_rails; i++) {
+            fprintf(out, "%s%d", i ? ", " : "", state->issueIdle[i]);
         }
         fprintf(out, "]\n");
+
+        fprintf(out, "%s  | ** terminal_length[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels);
+        for (int i=0; i<state->params->num_rails; i++) {
+            fprintf(out, "%s  |       rail %d: [", prefix, i);
+            for (int j=0; j<state->params->num_qos_levels; j++) {
+                fprintf(out, "%s%d", j ? ", " : "", state->terminal_length[i][j]);
+            }
+            fprintf(out, "]\n");
+        }
+        fprintf(out, "%s  |    ]\n", prefix);
     }
-    fprintf(out, "%s  |    ]\n", prefix);
 
     fprintf(out, "%s  | *                   anno = %s\n", prefix, state->anno ? state->anno : "(nil)");
     fprintf(out, "%s  | *                 params = %p\n", prefix, state->params);
@@ -7416,35 +7442,44 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     fprintf(out, "%s  |          finished_chunks = %ld\n", prefix, state->finished_chunks);
     fprintf(out, "%s  |         finished_packets = %ld\n", prefix, state->finished_packets);
 
-    fprintf(out, "%s  | *       last_buf_full[%d] = [", prefix, state->params->num_rails);
-    for (int i=0; i<state->params->num_rails; i++) {
-        fprintf(out, "%s%g", i ? ", " : "", state->last_buf_full[i]);
-    }
-    fprintf(out, "]\n");
+    if (is_surrogate_on) {
+        fprintf(out, "%s  | **      terminal_length = %p\n", prefix, state->terminal_length);
+        fprintf(out, "%s  | *         last_buf_full = %p\n", prefix, state->last_buf_full);
+        fprintf(out, "%s  | *             busy_time = %p\n", prefix, state->busy_time);
+        fprintf(out, "%s  | *          link_traffic = %p\n", prefix, state->link_traffic);
+        fprintf(out, "%s  | *          total_chunks = %p\n", prefix, state->total_chunks);
+        fprintf(out, "%s  | *        stalled_chunks = %p\n", prefix, state->stalled_chunks);
+    } else {
+        fprintf(out, "%s  | *       last_buf_full[%d] = [", prefix, state->params->num_rails);
+        for (int i=0; i<state->params->num_rails; i++) {
+            fprintf(out, "%s%g", i ? ", " : "", state->last_buf_full[i]);
+        }
+        fprintf(out, "]\n");
 
-    fprintf(out, "%s  | *           busy_time[%d] = [", prefix, state->params->num_rails);
-    for (int i=0; i<state->params->num_rails; i++) {
-        fprintf(out, "%s%g", i ? ", " : "", state->busy_time[i]);
-    }
-    fprintf(out, "]\n");
+        fprintf(out, "%s  | *           busy_time[%d] = [", prefix, state->params->num_rails);
+        for (int i=0; i<state->params->num_rails; i++) {
+            fprintf(out, "%s%g", i ? ", " : "", state->busy_time[i]);
+        }
+        fprintf(out, "]\n");
 
-    fprintf(out, "%s  | *        link_traffic[%d] = [", prefix, state->params->radix);
-    for (int i=0; i<state->params->radix; i++) {
-        fprintf(out, "%s%lu", i ? ", " : "", state->link_traffic[i]);
-    }
-    fprintf(out, "]\n");
+        fprintf(out, "%s  | *        link_traffic[%d] = [", prefix, state->params->radix);
+        for (int i=0; i<state->params->radix; i++) {
+            fprintf(out, "%s%lu", i ? ", " : "", state->link_traffic[i]);
+        }
+        fprintf(out, "]\n");
 
-    fprintf(out, "%s  | *        total_chunks[%d] = [", prefix, state->params->num_rails);
-    for (int i=0; i<state->params->num_rails; i++) {
-        fprintf(out, "%s%lu", i ? ", " : "", state->total_chunks[i]);
-    }
-    fprintf(out, "]\n");
+        fprintf(out, "%s  | *        total_chunks[%d] = [", prefix, state->params->num_rails);
+        for (int i=0; i<state->params->num_rails; i++) {
+            fprintf(out, "%s%lu", i ? ", " : "", state->total_chunks[i]);
+        }
+        fprintf(out, "]\n");
 
-    fprintf(out, "%s  | *      stalled_chunks[%d] = [", prefix, state->params->num_rails);
-    for (int i=0; i<state->params->num_rails; i++) {
-        fprintf(out, "%s%lu", i ? ", " : "", state->stalled_chunks[i]);
+        fprintf(out, "%s  | *      stalled_chunks[%d] = [", prefix, state->params->num_rails);
+        for (int i=0; i<state->params->num_rails; i++) {
+            fprintf(out, "%s%lu", i ? ", " : "", state->stalled_chunks[i]);
+        }
+        fprintf(out, "]\n");
     }
-    fprintf(out, "]\n");
 
     fprintf(out, "%s  |          injected_chunks = %lu\n", prefix, state->injected_chunks);
     fprintf(out, "%s  |           ejected_chunks = %lu\n", prefix, state->ejected_chunks);

From d2cf6ae0e0e701651706623fa0a90797037cb360 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 12 Mar 2025 14:37:10 -0400
Subject: [PATCH 120/188] Fixing surrogate switch

No simulation would run in hybrid mode because of a previous change on
how the switch is done.
---
 src/surrogate/switch.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c
index 32086f46..5754e77a 100644
--- a/src/surrogate/switch.c
+++ b/src/surrogate/switch.c
@@ -131,13 +131,16 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) {
     // We have to put the events back into the queue after we switch back, but if we never
     // switch back they will never get to be processed and thus we can clean them
     double switch_offset = g_tw_ts_end;
-    if (switch_at.current_i + 1 < switch_at.total) {
-        double const next_switch = switch_at.time_stampts[switch_at.current_i + 1];
+    if (switch_at.current_i < switch_at.total) {
+        double const next_switch = switch_at.time_stampts[switch_at.current_i];
         double const pre_switch_time = gvt;
         switch_offset = next_switch - pre_switch_time;
         assert(pre_switch_time < next_switch);
         //printf("gvt=%f next_switch=%f switch_offset=%f\n", pre_switch_time, next_switch, switch_offset);
     }
+    assert(0 < switch_at.current_i && switch_at.current_i <= switch_at.total);
+    double const current_switch_time = switch_at.time_stampts[switch_at.current_i - 1];
+    assert(current_switch_time == gvt);
 
     tw_event * dequed_events = NULL; // Linked list of workload events, to be placed again in the queue
     int events_dequeued = 0;  // for stats on code correctness
@@ -164,13 +167,11 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) {
             assert(next_event->recv_ts == next_event->sig.recv_ts);
             next_event->recv_ts += switch_offset;
             next_event->sig.recv_ts = next_event->recv_ts;
-        }
-        assert(next_event->recv_ts >= g_tw_trigger_arbitrary_fun.sig_at.recv_ts);
 #else
             next_event->recv_ts += switch_offset;
-        }
-        assert(next_event->recv_ts >= g_tw_trigger_arbitrary_fun.at);
 #endif
+        }
+        assert(next_event->recv_ts >= current_switch_time);
 
         // store event in deque_events to inject immediately back to the queue
         next_event->prev = dequed_events;
@@ -277,7 +278,7 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_event_sig gvt) {
 #else
 static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_stime gvt) {
 #endif
-    if (g_tw_synchronization_protocol != OPTIMISTIC && g_tw_synchronization_protocol != SEQUENTIAL) {
+    if (g_tw_synchronization_protocol != OPTIMISTIC && g_tw_synchronization_protocol != SEQUENTIAL && g_tw_synchronization_protocol != SEQUENTIAL_ROLLBACK_CHECK) {
         tw_error(TW_LOC, "Sorry, sending packets to the future hasn't been implement in this mode");
     }
 
@@ -497,9 +498,9 @@ void director_call(tw_pe * pe, tw_stime gvt) {
 
     // Only in sequential mode pe->GVT does not carry the current gvt, while it does in conservative and optimistic
 #ifdef USE_RAND_TIEBREAKER
-    assert((g_tw_synchronization_protocol == SEQUENTIAL) || (pe->GVT_sig.recv_ts == gvt));
+    assert((g_tw_synchronization_protocol == SEQUENTIAL) || (g_tw_synchronization_protocol == SEQUENTIAL_ROLLBACK_CHECK) || (pe->GVT_sig.recv_ts == gvt));
 #else
-    assert((g_tw_synchronization_protocol == SEQUENTIAL) || (pe->GVT == gvt));
+    assert((g_tw_synchronization_protocol == SEQUENTIAL) || (g_tw_synchronization_protocol == SEQUENTIAL_ROLLBACK_CHECK) || (pe->GVT == gvt));
 #endif
 
     // Do not process if the simulation ended

From 4b6bc9a915903ef5fb67be8adaa9887119f92c0c Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 12 Mar 2025 14:38:46 -0400
Subject: [PATCH 121/188] Fixing state that wasn't properly reversed

---
 src/networks/model-net/dragonfly-dally.C | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 6df7ac68..5a63bb41 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -4605,6 +4605,7 @@ static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_dally_messag
 
     terminal_dally_message_list* cur_entry = (terminal_dally_message_list *)rc_stack_pop(s->st);
     
+    cur_entry->msg.travel_start_time = msg->saved_avg_time;
     int data_size = s->params->chunk_size;
     if(cur_entry->msg.packet_size < s->params->chunk_size)
         data_size = cur_entry->msg.packet_size % s->params->chunk_size;
@@ -4671,6 +4672,7 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message *
     uint64_t num_chunks = cur_entry->msg.packet_size/s->params->chunk_size;
     if(cur_entry->msg.packet_size < s->params->chunk_size)
         num_chunks++;
+    msg->saved_avg_time = cur_entry->msg.travel_start_time;  // reusing field saved_avg_time. It is only used in another event handler path (arrive). So, no interruptions here
     cur_entry->msg.travel_start_time = tw_now(lp);
 
     double bandwidth_coef = 1;

From 8c8ccbc3be1811e3afe7cd069a19299045a05da5 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 12 Mar 2025 14:40:03 -0400
Subject: [PATCH 122/188] Fixing rollback of member `remaining_sz_packets` in
 terminal_state

---
 src/networks/model-net/dragonfly-dally.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 5a63bb41..576da098 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -5171,7 +5171,7 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess
         s->remaining_sz_packets[packet_key] += s->params->chunk_size;
     } else {
         if (bf->c29) {
-            s->remaining_sz_packets[packet_key] += s->params->chunk_size;
+            s->remaining_sz_packets.erase(packet_key);
         }
     }
 

From ba77a088e4d5ea6122960c416d5ef1f363a06c38 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 12 Mar 2025 14:41:31 -0400
Subject: [PATCH 123/188] Fixing faulty logic when rollbacking event for
 background traffic

---
 src/network-workloads/model-net-mpi-replay.c | 5 +++--
 src/networks/model-net/core/model-net-lp.c   | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 55649581..6468d7ff 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -1136,7 +1136,7 @@ void arrive_syn_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp)
     s->ross_sample.num_bytes_recvd -= data;
     s->send_time = m->rc.arrive.saved_send_time;
     s->ross_sample.send_time = m->rc.arrive.saved_send_time_sample;
-    if((tw_now(lp) - m->fwd.sim_start_time) > s->max_time)
+    if(bf->c0)
     {
         s->max_time = m->rc.arrive.saved_prev_max_time;
         s->ross_sample.max_time = m->rc.arrive.saved_prev_max_time;
@@ -1150,6 +1150,7 @@ void arrive_syn_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp)
     m->rc.arrive.saved_send_time_sample = s->ross_sample.send_time;
     if((tw_now(lp) - m->fwd.sim_start_time) > s->max_time)
     {
+        bf->c0 = 1;
         m->rc.arrive.saved_prev_max_time = s->max_time;
         s->max_time = tw_now(lp) - m->fwd.sim_start_time;
         s->ross_sample.max_time = tw_now(lp) - m->fwd.sim_start_time;
@@ -2769,7 +2770,7 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
     s->num_events_processed++;
 #endif /* if LP_DEBUG */
 
-    //*(int *)bf = (int)0;
+    memset(bf, 0, sizeof(tw_bf));
     rc_stack_gc(lp, s->matched_reqs);
 //    rc_stack_gc(lp, s->indices);
     rc_stack_gc(lp, s->processed_ops);
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index 536c44a1..b513675b 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -16,7 +16,7 @@
 #define MN_NAME "model_net_base"
 
 #define DEBUG 0
-#define MODELNET_LP_DEBUG 1
+#define MODELNET_LP_DEBUG 0
 /**** BEGIN SIMULATION DATA STRUCTURES ****/
 
 int model_net_base_magic;

From ddf198130db6d76cc6399781b41eb984ac5de3b2 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 13 Mar 2025 08:23:48 -0400
Subject: [PATCH 124/188] Fixing condition for surrogate switch

---
 src/surrogate/switch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c
index 5754e77a..2481e28b 100644
--- a/src/surrogate/switch.c
+++ b/src/surrogate/switch.c
@@ -140,7 +140,7 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) {
     }
     assert(0 < switch_at.current_i && switch_at.current_i <= switch_at.total);
     double const current_switch_time = switch_at.time_stampts[switch_at.current_i - 1];
-    assert(current_switch_time == gvt);
+    assert(current_switch_time <= gvt);
 
     tw_event * dequed_events = NULL; // Linked list of workload events, to be placed again in the queue
     int events_dequeued = 0;  // for stats on code correctness

From 03e5fd4f6d48f189a1f84a62ac79fadf45c77b78 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 18 Mar 2025 11:32:23 -0400
Subject: [PATCH 125/188] Fixing the switch from high-fidelity to surrogate

---
 src/networks/model-net/dragonfly-dally.C |  2 +-
 src/surrogate/switch.c                   | 45 ++++++++++++++----------
 2 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 576da098..83b6a46f 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -4831,7 +4831,7 @@ static void notify_dest_lp_of(
     terminal_dally_message * new_msg;
     // Lower value in priority means that it will be processed first
     // This event will be processed before any predicted packet arrives (even if scheduled at the same timestamp)
-    tw_event *e = model_net_method_event_new_user_prio(msg->dest_terminal_lpid, offset, lp, DRAGONFLY_DALLY, (void**)&new_msg, NULL, 0.5);
+    tw_event *e = model_net_method_event_new_user_prio(msg->dest_terminal_lpid, offset, lp, DRAGONFLY_DALLY, (void**)&new_msg, NULL, 1);
 
     memcpy(new_msg, msg, sizeof(terminal_dally_message)); // Just making sure that if the simulation breaks because we didn't set some value below, it breaks in a spectacular manner (~0 can be -1)
     assert(new_msg->dfdally_src_terminal_id == s->terminal_id);
diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c
index c2b9a626..e05436da 100644
--- a/src/surrogate/switch.c
+++ b/src/surrogate/switch.c
@@ -134,14 +134,14 @@ static void shift_events_to_future_pe(tw_pe * pe) {
     // switch back they will never get to be processed and thus we can clean them
     double switch_offset = g_tw_ts_end;
     if (switch_at.current_i < switch_at.total) {
-        double const next_switch = switch_at.time_stampts[switch_at.current_i];
+        double const next_switch = switch_at.time_stampts[switch_at.current_i + 1];
         double const pre_switch_time = gvt;
         switch_offset = next_switch - pre_switch_time;
         assert(pre_switch_time < next_switch);
         //printf("gvt=%f next_switch=%f switch_offset=%f\n", pre_switch_time, next_switch, switch_offset);
     }
-    assert(0 < switch_at.current_i && switch_at.current_i <= switch_at.total);
-    double const current_switch_time = switch_at.time_stampts[switch_at.current_i - 1];
+    assert(0 <= switch_at.current_i && switch_at.current_i < switch_at.total);
+    double const current_switch_time = switch_at.time_stampts[switch_at.current_i];
     assert(current_switch_time <= gvt);
 
     tw_event * dequed_events = NULL; // Linked list of workload events, to be placed again in the queue
@@ -155,6 +155,9 @@ static void shift_events_to_future_pe(tw_pe * pe) {
 #else
         assert(next_event->recv_ts >= gvt);
 #endif
+        if (next_event->event_id && next_event->state.remote) {
+            tw_hash_remove(pe->hash_t, next_event, next_event->send_pe);
+        }
 
         // finding out lp type
         char const * lp_type_name;
@@ -191,6 +194,10 @@ static void shift_events_to_future_pe(tw_pe * pe) {
         prev_event->prev = NULL;
         tw_pq_enqueue(pe->pq, prev_event);
 
+        if (prev_event->event_id && prev_event->state.remote) {
+            tw_hash_insert(pe->hash_t, prev_event, prev_event->send_pe);
+        }
+
         events_enqueued++;
     }
 
@@ -286,7 +293,9 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) {
     }
 
     tw_event *** lps_events = order_events_per_lps(pe);
+    printf("PE %d - AVL size %d (before shifting events)\n", g_tw_mynode, pe->avl_tree_size);
     shift_events_to_future_pe(pe);
+    printf("PE %d - AVL size %d (after shifting events to future)\n", g_tw_mynode, pe->avl_tree_size);
 
     // Going through all LPs in PE and running their specific functions
     for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {
@@ -308,6 +317,10 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) {
         bool const is_lp_modelnet = strncmp("modelnet_", lp_type_name, 9) == 0;
         struct lp_types_switch const * const lp_type_switch = get_type_switch(lp_type_name);
 
+        pe->cur_event = pe->abort_event;
+        pe->cur_event->caused_by_me = NULL;
+        pe->cur_event->sig = pe->GVT_sig;
+
         if (lp_type_switch) {
             if (lp_type_switch->trigger_idle_modelnet) {
                 assert(is_lp_modelnet);
@@ -363,6 +376,10 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) {
         bool const is_lp_modelnet = strncmp("modelnet_", lp_type_name, 9) == 0;
         struct lp_types_switch const * const lp_type_switch = get_type_switch(lp_type_name);
 
+        pe->cur_event = pe->abort_event;
+        pe->cur_event->caused_by_me = NULL;
+        pe->cur_event->sig = pe->GVT_sig;
+
         if (lp_type_switch) {
             if (lp_type_switch->trigger_idle_modelnet) {
                 assert(is_lp_modelnet);
@@ -385,8 +402,6 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) {
     }
 }
 
-
-// This is an impure function, calling it twice WILL give different results. Only call it once!
 bool hit_trigger(tw_stime gvt) {
     if ( switch_at.current_i < switch_at.total
         && g_tw_trigger_gvt_hook.active == GVT_HOOK_triggered) {
@@ -398,14 +413,6 @@ bool hit_trigger(tw_stime gvt) {
 #endif
         assert(gvt >= switch_time);  // current gvt shouldn't be that far ahead from the point we wanted to trigger it
 
-        // Activating next switch
-        if (++switch_at.current_i < switch_at.total) {
-            double const next_switch = switch_at.time_stampts[switch_at.current_i];
-            // Setting trigger for next switch
-            //printf("Adding a trigger to activate next switch!\n");
-            tw_trigger_gvt_hook_at(next_switch);
-        }
-        //
         return true;
     } else {
         return false;
@@ -415,15 +422,9 @@ bool hit_trigger(tw_stime gvt) {
 
 void switch_model(tw_pe * pe) {
     // Rollback if in optimistic mode
-#ifdef USE_RAND_TIEBREAKER
     if (g_tw_synchronization_protocol == OPTIMISTIC) {
         rollback_and_cancel_events_pe(pe);
     }
-#else
-    if (g_tw_synchronization_protocol == OPTIMISTIC) {
-        rollback_and_cancel_events_pe(pe);
-    }
-#endif
     surr_config.director.switch_surrogate();
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
         printf("Switching to %s\n", surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity");
@@ -511,6 +512,12 @@ void director_call(tw_pe * pe) {
     double const end = tw_clock_read();
     surrogate_switching_time += end - start;
 
+    // Setting trigger for next switch
+    if (++switch_at.current_i < switch_at.total) {
+        double next_switch = switch_at.time_stampts[switch_at.current_i];
+        tw_trigger_gvt_hook_at(next_switch);
+    }
+
     if (DEBUG_DIRECTOR == 1 && g_tw_mynode == 0) {
         printf("Switch completed!\n");
     }

From a4cac4dcd9fd7eaa0251b1b33f6c66dcf5ad498a Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 18 Mar 2025 11:33:51 -0400
Subject: [PATCH 126/188] Adding ability to delete events at director call

---
 codes/surrogate/switch.h                 |  1 +
 src/networks/model-net/dragonfly-dally.C |  8 ++++++--
 src/surrogate/switch.c                   | 17 +++++++++++++----
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/codes/surrogate/switch.h b/codes/surrogate/switch.h
index c538e769..d23abb00 100644
--- a/codes/surrogate/switch.h
+++ b/codes/surrogate/switch.h
@@ -49,6 +49,7 @@ struct lp_types_switch {
     model_switch_f        highdef_to_surrogate;
     model_switch_f        surrogate_to_highdef;
     model_ask_if_freeze_f should_event_be_frozen;  // NULL means event from LP type shouldn't be frozen
+    model_ask_if_freeze_f should_event_be_deleted;  // NULL means event from LP type shouldn't be deleted
 };
 
 struct switch_at_struct {
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 83b6a46f..2a3d0b9a 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -2446,12 +2446,16 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
                  .trigger_idle_modelnet = true,
                  .highdef_to_surrogate = (model_switch_f) dragonfly_dally_terminal_highdef_to_surrogate,
                  .surrogate_to_highdef = (model_switch_f) dragonfly_dally_terminal_surrogate_to_highdef,
-                 .should_event_be_frozen = dragonfly_dally_terminal_should_event_be_frozen},
+                 .should_event_be_frozen = dragonfly_dally_terminal_should_event_be_frozen,
+                 .should_event_be_deleted = NULL,
+                },
                 {.lpname = "modelnet_dragonfly_dally_router",
                  .trigger_idle_modelnet = false,
                  .highdef_to_surrogate = NULL,
                  .surrogate_to_highdef = NULL,
-                 .should_event_be_frozen = dragonfly_dally_router_should_event_be_frozen},
+                 .should_event_be_frozen = dragonfly_dally_router_should_event_be_frozen,
+                 .should_event_be_deleted = NULL,
+                },
                 0
             }
         };
diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c
index e05436da..babbc31f 100644
--- a/src/surrogate/switch.c
+++ b/src/surrogate/switch.c
@@ -166,6 +166,7 @@ static void shift_events_to_future_pe(tw_pe * pe) {
         struct lp_types_switch const * const lp_type_switch = get_type_switch(lp_type_name);
 
         // shifting time stamps to the future for events to freeze
+        bool deleted = false;
         if (lp_type_switch && lp_type_switch->should_event_be_frozen
                 && lp_type_switch->should_event_be_frozen(next_event->dest_lp, next_event)) {
 #ifdef USE_RAND_TIEBREAKER
@@ -175,13 +176,21 @@ static void shift_events_to_future_pe(tw_pe * pe) {
 #else
             next_event->recv_ts += switch_offset;
 #endif
+            assert(next_event->recv_ts >= current_switch_time);
+        // deleting event if we need to
+        } else if (lp_type_switch && lp_type_switch->should_event_be_deleted
+                && lp_type_switch->should_event_be_deleted(next_event->dest_lp, next_event)) {
+            tw_event_free(pe, next_event);
+            deleted = true;
         }
-        assert(next_event->recv_ts >= current_switch_time);
 
         // store event in deque_events to inject immediately back to the queue
-        next_event->prev = dequed_events;
-        dequed_events = next_event;
-        events_dequeued++;
+        if (!deleted) {
+             next_event->prev = dequed_events;
+             dequed_events = next_event;
+             events_dequeued++;
+             assert(next_event->recv_ts >= current_switch_time);
+        }
 
         next_event = tw_pq_dequeue(pe->pq);
     }

From dde0551527a5d6569f4bf83486066666dab77ec8 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 18 Mar 2025 11:46:02 -0400
Subject: [PATCH 127/188] Fixing some debug output in surrogate switch

---
 src/surrogate/switch.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c
index babbc31f..088de48b 100644
--- a/src/surrogate/switch.c
+++ b/src/surrogate/switch.c
@@ -210,7 +210,7 @@ static void shift_events_to_future_pe(tw_pe * pe) {
         events_enqueued++;
     }
 
-    if (DEBUG_DIRECTOR > 1) {
+    if (DEBUG_DIRECTOR > 0 && events_dequeued != events_enqueued) {
         printf("PE %lu: Discrepancy on number of events processed %d (%d dequeued and %d enqueued)\n",
                 g_tw_mynode, events_dequeued - events_enqueued, events_dequeued, events_enqueued);
     }
@@ -302,9 +302,9 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) {
     }
 
     tw_event *** lps_events = order_events_per_lps(pe);
-    printf("PE %d - AVL size %d (before shifting events)\n", g_tw_mynode, pe->avl_tree_size);
+    printf("PE %lu - AVL size %d (before shifting events)\n", g_tw_mynode, pe->avl_tree_size);
     shift_events_to_future_pe(pe);
-    printf("PE %d - AVL size %d (after shifting events to future)\n", g_tw_mynode, pe->avl_tree_size);
+    printf("PE %lu - AVL size %d (after shifting events to future)\n", g_tw_mynode, pe->avl_tree_size);
 
     // Going through all LPs in PE and running their specific functions
     for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {

From 18d300e00db60461e61bdc7c364e176132d858b9 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 20 Mar 2025 21:25:47 -0400
Subject: [PATCH 128/188] Adding deep-copy/check/print functions for
 router_state

---
 src/networks/model-net/dragonfly-dally.C | 536 ++++++++++++++++++++++-
 1 file changed, 520 insertions(+), 16 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 2a3d0b9a..e33e6da3 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -3895,6 +3895,7 @@ static void router_dally_init(router_state * r, tw_lp * lp)
 
     }
 
+    r->snapshot_data = NULL;
     if (num_snapshots) {
         r->snapshot_data = (int**)calloc(num_snapshots, sizeof(int*));
         for(int i = 0; i < num_snapshots; i++)
@@ -3905,6 +3906,8 @@ static void router_dally_init(router_state * r, tw_lp * lp)
     }
 
     //Xin: msg counters for apps 
+    r->agg_link_traffic = NULL;
+    r->agg_busy_time = NULL;
     if(p->counting_bool > 0)
     {   
         r->agg_link_traffic = (int64_t **) calloc(p->counting_windows, sizeof(int64_t *));
@@ -7059,6 +7062,7 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from
     // from->sample_stat
     // from->ross_sample
     // from->busy_time_ross_sample
+    // from->busy_time_sample
 
     memcpy(into, from, sizeof(terminal_state));
 
@@ -7080,6 +7084,7 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from
         into->total_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t));
         into->busy_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime));
         into->terminal_msgs = (terminal_dally_message_list***) malloc(num_rails * sizeof(terminal_dally_message_list**));
+        into->link_traffic = (uint64_t*) malloc(num_rails * sizeof(uint64_t));
 
         for(int i = 0; i < num_rails; i++) {
             into->vc_occupancy[i] = (int*) malloc(num_qos_levels * sizeof(int));
@@ -7102,10 +7107,6 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from
             into->stalled_chunks[i] = from->stalled_chunks[i];
             into->total_chunks[i] = from->total_chunks[i];
             into->busy_time[i] = from->busy_time[i];
-        }
-
-        into->link_traffic = (uint64_t*) malloc(p->radix * sizeof(uint64_t));
-        for (int i = 0; i < p->radix; i++) {
             into->link_traffic[i] = from->link_traffic[i];
         }
     }
@@ -7263,9 +7264,6 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after)
             is_same &= (before->stalled_chunks[i] == after->stalled_chunks[i]);
             is_same &= (before->total_chunks[i] == after->total_chunks[i]);
             is_same &= (before->busy_time[i] == after->busy_time[i]);
-        }
-
-        for (int i = 0; i < p->radix; i++) {
             is_same &= (before->link_traffic[i] == after->link_traffic[i]);
         }
     }
@@ -7468,8 +7466,8 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
         }
         fprintf(out, "]\n");
 
-        fprintf(out, "%s  | *        link_traffic[%d] = [", prefix, state->params->radix);
-        for (int i=0; i<state->params->radix; i++) {
+        fprintf(out, "%s  | *        link_traffic[%d] = [", prefix, state->params->num_rails);
+        for (int i=0; i<state->params->num_rails; i++) {
             fprintf(out, "%s%lu", i ? ", " : "", state->link_traffic[i]);
         }
         fprintf(out, "]\n");
@@ -7537,6 +7535,509 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     fprintf(out, "%s  | *           frozen_state = %p\n", prefix, state->frozen_state);
 }
 
+// Original function implemented by Claude
+static void save_router_state(router_state *into, router_state const *from) {
+    // Missing deep-clone/comparison/print members. These members are always accessed, so it is possible to discover some bugs if we print their contents
+    // from->local_congestion_controller
+
+    // Missing deep-clone/comparison/print members.
+    // from->rsamples
+    // from->ross_rsample
+    // from->busy_time_sample
+    // from->link_traffic_sample
+    // from->link_traffic_ross_sample
+
+    memcpy(into, from, sizeof(router_state));
+
+    dragonfly_param const * p = into->params;
+    int const radix = p->radix;
+    int const num_qos_levels = p->num_qos_levels;
+
+    into->global_channel = (int*) malloc(p->num_global_channels * sizeof(int));
+
+    for (int i = 0; i < p->num_global_channels; i++) {
+        into->global_channel[i] = from->global_channel[i];
+    }
+
+    into->next_output_available_time = (tw_stime*) malloc(radix * sizeof(tw_stime));
+    into->last_buf_full = (tw_stime*) malloc(radix * sizeof(tw_stime));
+    into->busy_time = (tw_stime*) malloc(radix * sizeof(tw_stime));
+    into->stalled_chunks = (unsigned long*) malloc(radix * sizeof(unsigned long));
+    into->total_chunks = (unsigned long*) malloc(radix * sizeof(unsigned long));
+    into->in_send_loop = (int*) malloc(radix * sizeof(int));
+    into->queued_count = (int*) malloc(radix * sizeof(int));
+    into->port_bandwidths = (double*) malloc(radix * sizeof(double));
+    into->vc_max_sizes = (int*) malloc(radix * sizeof(int));
+    into->link_traffic = (int64_t*) malloc(radix * sizeof(int64_t));
+    into->last_qos_lvl = (int*) malloc(radix * sizeof(int));
+    into->vc_occupancy = (int**) malloc(radix * sizeof(int*));
+    into->qos_status = (int**) malloc(radix * sizeof(int*));
+    into->qos_data = (int**) malloc(radix * sizeof(int*));
+    into->pending_msgs = (terminal_dally_message_list***) malloc(radix * sizeof(terminal_dally_message_list**));
+    into->queued_msgs = (terminal_dally_message_list***) malloc(radix * sizeof(terminal_dally_message_list**));
+
+    for (int i = 0; i < radix; i++) {
+        into->next_output_available_time[i] = from->next_output_available_time[i];
+        into->last_buf_full[i] = from->last_buf_full[i];
+        into->busy_time[i] = from->busy_time[i];
+        into->stalled_chunks[i] = from->stalled_chunks[i];
+        into->total_chunks[i] = from->total_chunks[i];
+        into->in_send_loop[i] = from->in_send_loop[i];
+        into->queued_count[i] = from->queued_count[i];
+        into->port_bandwidths[i] = from->port_bandwidths[i];
+        into->vc_max_sizes[i] = from->vc_max_sizes[i];
+        into->link_traffic[i] = from->link_traffic[i];
+        into->last_qos_lvl[i] = from->last_qos_lvl[i];
+
+        into->vc_occupancy[i] = (int*) malloc(p->num_vcs * sizeof(int));
+        into->qos_status[i] = (int*) malloc(num_qos_levels * sizeof(int));
+        into->qos_data[i] = (int*) malloc(num_qos_levels * sizeof(int));
+
+        into->pending_msgs[i] = (terminal_dally_message_list**) malloc(p->num_vcs * sizeof(terminal_dally_message_list*));
+        into->queued_msgs[i] = (terminal_dally_message_list**) malloc(p->num_vcs * sizeof(terminal_dally_message_list*));
+
+        for (int j = 0; j < p->num_vcs; j++) {
+            into->vc_occupancy[i][j] = from->vc_occupancy[i][j];
+            copy_terminal_dally_message_list(&into->pending_msgs[i][j], from->pending_msgs[i][j]);
+            copy_terminal_dally_message_list(&into->queued_msgs[i][j], from->queued_msgs[i][j]);
+        }
+        for (int j = 0; j < num_qos_levels; j++) {
+            into->qos_status[i][j] = from->qos_status[i][j];
+            into->qos_data[i][j] = from->qos_data[i][j];
+        }
+    }
+
+    into->snapshot_data = NULL;
+    if (num_snapshots) {
+        into->snapshot_data = (int**) malloc(num_snapshots * sizeof(int*));
+        int size_snapshot = from->params->num_vcs * from->params->radix;
+        for (int i = 0; i < num_snapshots; i++) {
+            into->snapshot_data[i] = (int*) malloc(size_snapshot * sizeof(int));
+            memcpy(into->snapshot_data[i], from->snapshot_data[i], size_snapshot * sizeof(int));
+        }
+    }
+
+    if (p->counting_bool > 0) {
+        assert(from->agg_busy_time != NULL);
+        assert(from->agg_link_traffic != NULL);
+        into->agg_busy_time = (tw_stime**) malloc(p->counting_windows * sizeof(tw_stime*));
+        into->agg_link_traffic = (int64_t**) malloc(p->counting_windows * sizeof(int64_t*));
+
+        for (int i = 0; i < p->counting_windows; i++) {
+            into->agg_busy_time[i] = (tw_stime*) malloc(radix * sizeof(tw_stime));
+            into->agg_link_traffic[i] = (int64_t*) malloc(radix * sizeof(int64_t));
+            memcpy(into->agg_busy_time[i], from->agg_busy_time[i], radix * sizeof(tw_stime));
+            memcpy(into->agg_link_traffic[i], from->agg_link_traffic[i], radix * sizeof(int64_t));
+        }
+    }
+
+    //if (from->local_congestion_controller != NULL) {
+    //    assert(g_congestion_control_enabled);
+    //    into->local_congestion_controller = (rlc_state*) malloc(sizeof(rlc_state));
+    //    save_rlc_state(into->local_congestion_controller, from->local_congestion_controller);
+    //}
+}
+
+// Original function implemented by Claude
+static void clean_router_state(router_state *state) {
+    dragonfly_param const * p = state->params;
+    int const radix = p->radix;
+
+    // Free simple arrays
+    free(state->global_channel);
+    free(state->next_output_available_time);
+    free(state->last_buf_full);
+    free(state->busy_time);
+    free(state->stalled_chunks);
+    free(state->total_chunks);
+    free(state->in_send_loop);
+    free(state->queued_count);
+    free(state->port_bandwidths);
+    free(state->vc_max_sizes);
+    free(state->link_traffic);
+    free(state->last_qos_lvl);
+
+    // Clean and free 2D arrays
+    for (int i = 0; i < radix; i++) {
+        free(state->vc_occupancy[i]);
+        free(state->qos_status[i]);
+        free(state->qos_data[i]);
+
+        for (int j = 0; j < p->num_vcs; j++) {
+            clean_terminal_dally_message_list(state->pending_msgs[i][j]);
+            clean_terminal_dally_message_list(state->queued_msgs[i][j]);
+        }
+
+        free(state->pending_msgs[i]);
+        free(state->queued_msgs[i]);
+    }
+
+    free(state->vc_occupancy);
+    free(state->qos_status);
+    free(state->qos_data);
+    free(state->pending_msgs);
+    free(state->queued_msgs);
+
+    if (num_snapshots) {
+        for (int i = 0; i < num_snapshots; i++) {
+            free(state->snapshot_data[i]);
+        }
+        free(state->snapshot_data);
+    }
+
+    if (p->counting_bool > 0) {
+        for (int i = 0; i < p->counting_windows; i++) {
+            free(state->agg_busy_time[i]);
+            free(state->agg_link_traffic[i]);
+        }
+        free(state->agg_busy_time);
+        free(state->agg_link_traffic);
+    }
+
+    //if (state->local_congestion_controller != NULL) {
+    //    clean_rlc_state(state->local_congestion_controller);
+    //    free(state->local_congestion_controller);
+    //}
+}
+
+// Original function implemented by Claude
+static bool check_router_state(router_state const *before, router_state const *after) {
+    dragonfly_param const * p = before->params;
+    int const radix = p->radix;
+    int const num_qos_levels = p->num_qos_levels;
+
+    if (before->router_id != after->router_id ||
+        before->group_id != after->group_id ||
+        before->plane_id != after->plane_id ||
+        before->op_arr_size != after->op_arr_size ||
+        before->max_arr_size != after->max_arr_size ||
+        before->workloads_finished_flag != after->workloads_finished_flag ||
+        before->is_monitoring_bw != after->is_monitoring_bw ||
+        before->last_time != after->last_time) {
+        return false;
+    }
+
+    for (int i = 0; i < p->num_global_channels; i++) {
+        if (before->global_channel[i] != after->global_channel[i]) {
+            return false;
+        }
+    }
+
+    for (int i = 0; i < radix; i++) {
+        if (before->next_output_available_time[i] != after->next_output_available_time[i] ||
+            before->last_buf_full[i] != after->last_buf_full[i] ||
+            before->busy_time[i] != after->busy_time[i] ||
+            before->stalled_chunks[i] != after->stalled_chunks[i] ||
+            before->total_chunks[i] != after->total_chunks[i] ||
+            before->in_send_loop[i] != after->in_send_loop[i] ||
+            before->queued_count[i] != after->queued_count[i] ||
+            before->port_bandwidths[i] != after->port_bandwidths[i] ||
+            before->vc_max_sizes[i] != after->vc_max_sizes[i] ||
+            before->link_traffic[i] != after->link_traffic[i] ||
+            before->last_qos_lvl[i] != after->last_qos_lvl[i]) {
+            return false;
+        }
+
+        for (int j = 0; j < p->num_vcs; j++) {
+            if (before->vc_occupancy[i][j] != after->vc_occupancy[i][j]) {
+                return false;
+            }
+
+            if (!check_terminal_dally_message_list(before->pending_msgs[i][j], after->pending_msgs[i][j]) ||
+                !check_terminal_dally_message_list(before->queued_msgs[i][j], after->queued_msgs[i][j])) {
+                return false;
+            }
+        }
+
+        for (int j = 0; j < num_qos_levels; j++) {
+            if (before->qos_status[i][j] != after->qos_status[i][j] ||
+                before->qos_data[i][j] != after->qos_data[i][j]) {
+                return false;
+            }
+        }
+    }
+
+    if ((before->snapshot_data == NULL) != (after->snapshot_data == NULL)) {
+        return false;
+    }
+
+    if (num_snapshots) {
+        assert(before->snapshot_data != NULL);
+        int size_snapshot = before->params->num_vcs * before->params->radix;
+        for (int i = 0; i < num_snapshots; i++) {
+            assert(after->snapshot_data[i] == NULL);
+
+            for (int j = 0; j < size_snapshot; j++) {
+                if (before->snapshot_data[i][j] != after->snapshot_data[i][j]) {
+                    return false;
+                }
+            }
+        }
+    }
+
+    if ((before->agg_busy_time == NULL) != (after->agg_busy_time == NULL)) {
+        return false;
+    }
+    if ((before->agg_link_traffic == NULL) != (after->agg_link_traffic == NULL)) {
+        return false;
+    }
+
+    if (p->counting_bool > 0) {
+        assert(before->agg_busy_time != NULL && after->agg_busy_time);
+        assert(before->agg_link_traffic != NULL && after->agg_link_traffic);
+        for (int i = 0; i < p->counting_windows; i++) {
+            for (int j = 0; j < radix; j++) {
+                if (before->agg_busy_time[i][j] != after->agg_busy_time[i][j] ||
+                    before->agg_link_traffic[i][j] != after->agg_link_traffic[i][j]) {
+                    return false;
+                }
+            }
+        }
+    }
+
+    //if (before->local_congestion_controller != NULL) {
+    //    if (!check_rlc_state(before->local_congestion_controller, after->local_congestion_controller)) {
+    //        return false;
+    //    }
+    //}
+
+    // Check strings
+    if (strncmp(before->output_buf, after->output_buf, 4096) != 0 ||
+        strncmp(before->output_buf5, after->output_buf5, 4096) != 0 ||
+        strncmp(before->output_buf6, after->output_buf6, 4096) != 0) {
+        return false;
+    }
+
+    // All checks passed
+    return true;
+}
+
+// Original function implemented by Claude
+static void print_router_state(FILE * out, char const * prefix, router_state * state) {
+    dragonfly_param const * p = state->params;
+    int const radix = p->radix;
+    int const num_qos_levels = p->num_qos_levels;
+
+    fprintf(out, "%srouter_state (dragonfly) ->\n", prefix);
+    fprintf(out, "%s  |              router_id = %u\n", prefix, state->router_id);
+    fprintf(out, "%s  |               group_id = %d\n", prefix, state->group_id);
+    fprintf(out, "%s  |               plane_id = %d\n", prefix, state->plane_id);
+    fprintf(out, "%s  |            op_arr_size = %d\n", prefix, state->op_arr_size);
+    fprintf(out, "%s  |           max_arr_size = %d\n", prefix, state->max_arr_size);
+
+    fprintf(out, "%s  | *       global_channel[%d] = [", prefix, radix);
+    for (int i = 0; i < p->num_global_channels; i++) {
+        fprintf(out, "%s%d", i ? ", " : "", state->global_channel[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  |                connMan = <DragonflyConnectionManager object>\n", prefix);
+
+    char addprefix[] = "  | ";
+    int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1;
+    char * subprefix = (char *) malloc(len_subprefix * sizeof(char));
+    fprintf(out, "%s  | *local_congestion_controller = %p\n", prefix, state->local_congestion_controller);
+    //if (state->local_congestion_controller != NULL) {
+    //    snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix);
+    //    print_rlc_state(out, subprefix, state->local_congestion_controller);
+    //}
+    free(subprefix);
+
+    fprintf(out, "%s  | *next_output_available_time[%d] = [", prefix, radix);
+    for (int i = 0; i < radix; i++) {
+        fprintf(out, "%s%g", i ? ", " : "", state->next_output_available_time[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | *     last_buf_full[%d] = [", prefix, radix);
+    for (int i = 0; i < radix; i++) {
+        fprintf(out, "%s%g", i ? ", " : "", state->last_buf_full[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | *         busy_time[%d] = [", prefix, radix);
+    for (int i = 0; i < radix; i++) {
+        fprintf(out, "%s%g", i ? ", " : "", state->busy_time[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | *     busy_time_sample = %p\n", prefix, state->busy_time_sample);
+
+    fprintf(out, "%s  | *    stalled_chunks[%d] = [", prefix, radix);
+    for (int i = 0; i < radix; i++) {
+        fprintf(out, "%s%lu", i ? ", " : "", state->stalled_chunks[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | *      total_chunks[%d] = [", prefix, radix);
+    for (int i = 0; i < radix; i++) {
+        fprintf(out, "%s%lu", i ? ", " : "", state->total_chunks[i]);
+    }
+    fprintf(out, "]\n");
+
+    char addprefix_2[] = "  |   |  |  ";
+    len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1;
+    subprefix = (char *) malloc(len_subprefix * sizeof(char));
+    snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2);
+
+    fprintf(out, "%s  | ***   pending_msgs[%d][%d] = [\n", prefix, radix, p->num_vcs);
+    for (int i = 0; i < radix; i++) {
+        fprintf(out, "%s  |   port %d: [\n", prefix, i);
+        for (int j = 0; j < p->num_vcs; j++) {
+            fprintf(out, "%s  |   |  vcs # %d\n", prefix, j);
+            print_terminal_dally_message_list(out, subprefix, NULL, state->pending_msgs[i][j]);
+        }
+        fprintf(out, "%s  |   ]\n", prefix);
+    }
+    fprintf(out, "%s  | ]\n", prefix);
+
+    fprintf(out, "%s  | ***    pending_msgs_tail = %p\n", prefix, state->pending_msgs_tail);
+
+    fprintf(out, "%s  | ***  queued_msgs[%d][%d] = [\n", prefix, radix, p->num_vcs);
+    for (int i = 0; i < radix; i++) {
+        fprintf(out, "%s  |   port %d: [\n", prefix, i);
+        for (int j = 0; j < p->num_vcs; j++) {
+            fprintf(out, "%s  |   |  vcs # %d\n", prefix, j);
+            print_terminal_dally_message_list(out, subprefix, NULL, state->queued_msgs[i][j]);
+        }
+        fprintf(out, "%s  |   ]\n", prefix);
+    }
+    fprintf(out, "%s  | ]\n", prefix);
+    free(subprefix);
+
+    fprintf(out, "%s  | ***     queued_msgs_tail = %p\n", prefix, state->queued_msgs_tail);
+
+    fprintf(out, "%s  | *        in_send_loop[%d] = [", prefix, radix);
+    for (int i = 0; i < radix; i++) {
+        fprintf(out, "%s%d", i ? ", " : "", state->in_send_loop[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | *        queued_count[%d] = [", prefix, radix);
+    for (int i = 0; i < radix; i++) {
+        fprintf(out, "%s%d", i ? ", " : "", state->queued_count[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | *                     st = %p\n", prefix, state->st);
+    fprintf(out, "%s  | *                  cc_st = %p\n", prefix, state->cc_st);
+    fprintf(out, "%s  |  workloads_finished_flag = %d\n", prefix, state->workloads_finished_flag);
+
+    fprintf(out, "%s  | *     port_bandwidths[%d] = [", prefix, radix);
+    for (int i = 0; i < radix; i++) {
+        fprintf(out, "%s%g", i ? ", " : "", state->port_bandwidths[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | *        vc_max_sizes[%d] = [", prefix, radix);
+    for (int i = 0; i < radix; i++) {
+        fprintf(out, "%s%d", i ? ", " : "", state->vc_max_sizes[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | **    vc_occupancy[%d][%d] = [\n", prefix, radix, p->num_vcs);
+    for (int i = 0; i < radix; i++) {
+        fprintf(out, "%s  |         port %d: [", prefix, i);
+        for (int j = 0; j < p->num_vcs; j++) {
+            fprintf(out, "%s%d", j ? ", " : "", state->vc_occupancy[i][j]);
+        }
+        fprintf(out, "]\n");
+    }
+    fprintf(out, "%s  |      ]\n", prefix);
+
+    fprintf(out, "%s  | *        link_traffic[%d] = [", prefix, radix);
+    for (int i = 0; i < radix; i++) {
+        fprintf(out, "%s%ld", i ? ", " : "", state->link_traffic[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | *    link_traffic_sample = %p\n", prefix, state->link_traffic_sample);
+
+    fprintf(out, "%s  |         is_monitoring_bw = %d\n", prefix, state->is_monitoring_bw);
+
+    fprintf(out, "%s  | *        last_qos_lvl[%d] = [", prefix, radix);
+    for (int i = 0; i < radix; i++) {
+        fprintf(out, "%s%d", i ? ", " : "", state->last_qos_lvl[i]);
+    }
+    fprintf(out, "]\n");
+
+    fprintf(out, "%s  | **      qos_status[%d][%d] = [\n", prefix, radix, num_qos_levels);
+    for (int i = 0; i < radix; i++) {
+        fprintf(out, "%s  |          port %d: [", prefix, i);
+        for (int j = 0; j < num_qos_levels; j++) {
+            fprintf(out, "%s%d", j ? ", " : "", state->qos_status[i][j]);
+        }
+        fprintf(out, "]\n");
+    }
+    fprintf(out, "%s  |       ]\n", prefix);
+
+    fprintf(out, "%s  | **        qos_data[%d][%d] = [\n", prefix, radix, num_qos_levels);
+    for (int i = 0; i < radix; i++) {
+        fprintf(out, "%s  |            port %d: [", prefix, i);
+        for (int j = 0; j < num_qos_levels; j++) {
+            fprintf(out, "%s%d", j ? ", " : "", state->qos_data[i][j]);
+        }
+        fprintf(out, "]\n");
+    }
+    fprintf(out, "%s  |         ]\n", prefix);
+
+    fprintf(out, "%s  | *                   anno = %s\n", prefix, state->anno ? state->anno : "(nil)");
+    fprintf(out, "%s  | *                 params = %p\n", prefix, state->params);
+
+    if (num_snapshots) {
+        fprintf(out, "%s  | **   snapshot_data[%d][%d] = [\n", prefix, num_snapshots, radix);
+        int size_snapshot = p->num_vcs * p->radix;
+        for (int i = 0; i < num_snapshots; i++) {
+            fprintf(out, "%s  |  snapshot %d: [", prefix, i);
+            for (int j = 0; j < size_snapshot; j++) {
+                fprintf(out, "%s%d", j ? ", " : "", state->snapshot_data[i][j]);
+            }
+            fprintf(out, "]\n");
+        }
+        fprintf(out, "%s  |  ]\n", prefix);
+    } else {
+        fprintf(out, "%s  | **         snapshot_data = %p\n", prefix, state->snapshot_data);
+    }
+
+    fprintf(out, "%s  |               output_buf = '%.4096s'\n", prefix, state->output_buf);
+    fprintf(out, "%s  | *               rsamples = %p\n", prefix, state->rsamples);
+    fprintf(out, "%s  |               fwd_events = %ld\n", prefix, state->fwd_events);
+    fprintf(out, "%s  |               rev_events = %ld\n", prefix, state->rev_events);
+    fprintf(out, "%s  |              output_buf5 = '%.4096s'\n", prefix, state->output_buf5);
+    fprintf(out, "%s  |              output_buf6 = '%.4096s'\n", prefix, state->output_buf6);
+
+    if(p->counting_bool <= 0)
+    {
+        fprintf(out, "%s  | **         agg_busy_time = %p\n", prefix, state->agg_busy_time);
+        fprintf(out, "%s  | **      agg_link_traffic = %p\n", prefix, state->agg_link_traffic);
+    } else {
+        assert(state->agg_busy_time != NULL);
+        assert(state->agg_link_traffic != NULL);
+        fprintf(out, "%s  | **   agg_busy_time[%d][%d] = [\n", prefix, p->counting_windows, radix);
+        for (int i = 0; i < p->counting_windows; i++) {
+            fprintf(out, "%s  |  window %d: [", prefix, i);
+            for (int j = 0; j < radix; j++) {
+                fprintf(out, "%s%g", j ? ", " : "", state->agg_busy_time[i][j]);
+            }
+            fprintf(out, "]\n");
+        }
+        fprintf(out, "%s  |  ]\n", prefix);
+
+        fprintf(out, "%s  | ** agg_link_traffic[%d][%d] = [\n", prefix, p->counting_windows, radix);
+        for (int i = 0; i < p->counting_windows; i++) {
+            fprintf(out, "%s  |  window %d: [", prefix, i);
+            for (int j = 0; j < radix; j++) {
+                fprintf(out, "%s%lu", j ? ", " : "", state->agg_link_traffic[i][j]);
+            }
+            fprintf(out, "]\n");
+        }
+        fprintf(out, "%s  |  ]\n", prefix);
+    }
+
+    fprintf(out, "%s  |             ross_rsample = <dfly_router_sample object>\n", prefix);
+    fprintf(out, "%s  |                last_time = %g\n", prefix, state->last_time);
+}
+
 char const * const string_event_t(enum event_t type) {
     switch (type) {
         case T_GENERATE:         return "T_GENERATE";
@@ -7561,6 +8062,9 @@ char const * const string_event_t(enum event_t type) {
 bool check_terminal_dally_message(struct terminal_dally_message * before, struct terminal_dally_message * after) {
     bool is_same = true;
 
+    // Fields that have no effects in the simulation
+    // before->this_router_ptp_latency
+
     // Compare all fields
     is_same &= before->magic == after->magic;
     is_same &= before->travel_start_time == after->travel_start_time;
@@ -7584,7 +8088,6 @@ bool check_terminal_dally_message(struct terminal_dally_message * before, struct
     is_same &= before->my_hops_cur_group == after->my_hops_cur_group;
     is_same &= before->next_stop == after->next_stop;
     is_same &= before->this_router_arrival == after->this_router_arrival;
-    is_same &= before->this_router_ptp_latency == after->this_router_ptp_latency;
     is_same &= before->intm_lp_id == after->intm_lp_id;
     is_same &= before->last_hop == after->last_hop;
     is_same &= before->is_intm_visited == after->is_intm_visited;
@@ -7612,6 +8115,7 @@ bool check_terminal_dally_message(struct terminal_dally_message * before, struct
 // Print fuction originally constructed with help from Claude.ai
 void print_terminal_dally_message(FILE * out, char const * prefix, void * s, struct terminal_dally_message * msg) {
     //terminal_state * ns = (terminal_state *) s;
+    //router_state * ns = (router_state *) s;
 
     fprintf(out, "%sterminal_dally_message ->\n", prefix);
     fprintf(out, "%s  |                      magic = %d\n", prefix, msg->magic);
@@ -7730,12 +8234,12 @@ crv_checkpointer dragonfly_dally_checkpointers[] = {
     {
         &dragonfly_dally_lps[1],
         sizeof(router_state),
-        (save_checkpoint_state_f) NULL,
-        (clean_checkpoint_state_f) NULL,
-        (check_states_f) NULL,
-        (print_lpstate_f) NULL,
-        (print_checkpoint_state_f) NULL,
-        (print_event_f) NULL,
+        (save_checkpoint_state_f) save_router_state,
+        (clean_checkpoint_state_f) clean_router_state,
+        (check_states_f) check_router_state,
+        (print_lpstate_f) print_router_state,
+        (print_checkpoint_state_f) print_router_state,
+        (print_event_f) print_terminal_dally_message,
     },
 };
 

From 244f98af05736192fb0cebdbb894b5b197514827 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 20 Mar 2025 21:26:47 -0400
Subject: [PATCH 129/188] Fixing reversibility bug in router_state

---
 src/networks/model-net/dragonfly-dally.C | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index e33e6da3..38dbe1e6 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -6616,7 +6616,7 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes
     m->magic = router_magic_num;
 
     int msg_size = s->params->chunk_size;
-    if((cur_entry->msg.packet_size % s->params->chunk_size) && (cur_entry->msg.chunk_id == num_chunks - 1)) {
+    if(((cur_entry->msg.packet_size % s->params->chunk_size) || cur_entry->msg.packet_size == 0) && (cur_entry->msg.chunk_id == num_chunks - 1)) {
         bf->c11 = 1;
         s->link_traffic[output_port] +=  (cur_entry->msg.packet_size % s->params->chunk_size); 
         s->link_traffic_sample[output_port] += (cur_entry->msg.packet_size % s->params->chunk_size);
@@ -6910,6 +6910,7 @@ static void router_dally_event(router_state * s, tw_bf * bf, terminal_dally_mess
     s->ross_rsample.fwd_events++;
     rc_stack_gc(lp, s->st);
 
+    msg->last_received_time = s->last_time;
     s->last_time = tw_now(lp);
     
     assert(msg->magic == router_magic_num);
@@ -7016,6 +7017,8 @@ static void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, term
 static void router_dally_rc_event_handler(router_state * s, tw_bf * bf, 
   terminal_dally_message * msg, tw_lp * lp) 
 {
+    s->last_time = msg->last_received_time;
+
     for(int i = 0; i < msg->num_rngs; i++)
         tw_rand_reverse_unif(lp->rng);
 

From e7e7535f8a825cf5b7d95a0a1e880689147d461d Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sat, 22 Mar 2025 20:33:56 -0400
Subject: [PATCH 130/188] Updating tie-breaker related code from ROSS update

---
 src/surrogate/switch.c | 6 +++---
 src/util/rc-stack.c    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c
index 088de48b..dede0cf5 100644
--- a/src/surrogate/switch.c
+++ b/src/surrogate/switch.c
@@ -80,9 +80,9 @@ static void rollback_and_cancel_events_pe(tw_pe * pe) {
     tw_stime const gvt = gvt_sig.recv_ts;
     // Backtracking the simulation to GVT
     for (unsigned int i = 0; i < g_tw_nkp; i++) {
-        tw_kp_rollback_to_sig(g_tw_kp[i], gvt_sig);
+        tw_kp_rollback_to_sig(g_tw_kp[i], &gvt_sig);
     }
-    assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0);
+    assert(tw_event_sig_compare_ptr(&pe->GVT_sig, &gvt_sig) == 0);
     assert(pe->GVT_sig.recv_ts == gvt);  // redundant but needed because compiler cries that gvt is never used
 #else
     tw_stime const gvt = pe->GVT;
@@ -151,7 +151,7 @@ static void shift_events_to_future_pe(tw_pe * pe) {
         // Filtering events to freeze
         assert(next_event->prev == NULL);
 #ifdef USE_RAND_TIEBREAKER
-        assert(tw_event_sig_compare(next_event->sig, gvt_sig) >= 0);
+        assert(tw_event_sig_compare_ptr(&next_event->sig, &gvt_sig) >= 0);
 #else
         assert(next_event->recv_ts >= gvt);
 #endif
diff --git a/src/util/rc-stack.c b/src/util/rc-stack.c
index 7b0540e7..9491c897 100644
--- a/src/util/rc-stack.c
+++ b/src/util/rc-stack.c
@@ -124,7 +124,7 @@ void rc_stack_gc(tw_lp const *lp, struct rc_stack *s) {
     while (ent != &s->head) {
         rc_entry *r = qlist_entry(ent, rc_entry, ql);
 #ifdef USE_RAND_TIEBREAKER
-        if (lp == NULL || tw_event_sig_compare(r->e_sig, lp->pe->GVT_sig) == -1) {
+        if (lp == NULL || tw_event_sig_compare_ptr(&r->e_sig, &lp->pe->GVT_sig) < 0) {
 #else
         if (lp == NULL || r->time < lp->pe->GVT){
 #endif

From e0cc46e9d3569570712b64c797eae4ac923637f9 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 29 May 2025 09:38:34 -0400
Subject: [PATCH 131/188] Finishing missing components to check in
 deep-copy/check/print functions for router_state

---
 src/network-workloads/model-net-mpi-replay.c |   2 +-
 src/networks/model-net/dragonfly-dally.C     | 184 +++++++++++++------
 2 files changed, 127 insertions(+), 59 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 6468d7ff..c1040526 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -82,7 +82,7 @@ static tw_stime mean_interval = 100000;
 static int payload_sz = 1024;
 
 /* Doing LP IO*/
-static void * params = NULL;
+static char * params = NULL;
 static char lp_io_dir[256] = {'\0'};
 static char sampling_dir[32] = {'\0'};
 static char mpi_msg_dir[32] = {'\0'};
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 38dbe1e6..d7792fa2 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -23,6 +23,7 @@
 #include "codes/model-net-lp.h"
 #include "codes/surrogate/init.h"
 #include "codes/net/dragonfly-dally.h"
+#include "quicklist.h"
 #include "sys/file.h"
 #include "codes/quickhash.h"
 #include "codes/rc-stack.h"
@@ -3302,6 +3303,11 @@ static void router_send_snapshot_events(router_state *s, tw_lp *lp)
 
 static void router_handle_snapshot_event(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp)
 {
+    if (msg->packet_ID >= num_snapshots) {
+        fprintf(stderr, "Warning: packet_ID = %llu will not be saved in the snapshot because there are only %d spaces available.\n", msg->packet_ID, num_snapshots);
+        return;
+    }
+
     for(int i = 0; i < s->params->radix; i++)
     {
         for(int j = 0; j < s->params->num_vcs; j++)
@@ -3517,7 +3523,7 @@ static void router_dally_commit(router_state * s,
 
     if (msg->type == R_SNAPSHOT)
     {
-        if (OUTPUT_SNAPSHOT == 1)
+        if (OUTPUT_SNAPSHOT == 1 && msg->packet_ID < num_snapshots)
         {
             char snapshot_line[8192];
             int written;
@@ -7049,17 +7055,103 @@ static void router_dally_rc_event_handler(router_state * s, tw_bf * bf,
 }
 
 //*** ---------- START OF reverse handler checking functions ---------- ***
-bool warn_incomplete_definition_terminal_state_check = false;
+static void copy_rank_tbl(struct qhash_table * into, struct qhash_table const * from) {
+  // YES! This function is very, very slow and so are all the others. This is
+  // the simplest implementation we could come up with without changing how
+  // qhash_table works or replacing it altogether. Both options would need
+  // substantial changes to the dragonfly model
+  for (int i = 0; i < from->table_size; i++) {
+    struct dfly_qhash_entry *entry;
+    qlist_for_each_entry(entry, &from->array[i], hash_link) {
+      struct dfly_qhash_entry *new_entry =
+          (struct dfly_qhash_entry *)malloc(sizeof(struct dfly_qhash_entry));
+      *new_entry = *entry; // There is no need to copy contents of pointer because we don't check it
+      qlist_add(&new_entry->hash_link, &into->array[i]);
+    }
+    }
+}
 
-static void save_terminal_state(terminal_state *into, terminal_state const *from) {
-    if (!warn_incomplete_definition_terminal_state_check) {
-        fprintf(stderr, "Warning: Deep-cloning and comparing has not been fully implemented for the (sub)LP type: `terminal_state` (Running this model under SEQUENTIAL_ROLLBACK_CHECK might not capture issues that arise from its reverse event handler).\n");
-        warn_incomplete_definition_terminal_state_check = true;
+static void clean_rank_tbl(struct qhash_table * rank_tbl) {
+    for (int i=0; i < rank_tbl->table_size; i++) {
+        while(!qlist_empty(&rank_tbl->array[i])) {
+            struct qlist_head *item = qlist_pop(&rank_tbl->array[i]);
+            struct dfly_qhash_entry * entry = qlist_entry(item, struct dfly_qhash_entry, hash_link);
+            free(entry);
+        }
+    }
+}
+
+static bool check_dfly_qhash_entry(struct dfly_qhash_entry * before, struct dfly_qhash_entry * after) {
+    // We ignore the remote data fields because they won't be needed:
+    // - remote_event_size
+    // - remote_event_data
+
+    if (before->key.sender_id != after->key.sender_id ||
+        before->key.message_id != after->key.message_id ||
+        before->num_chunks != after->num_chunks ||
+        before->remaining_packets != after->remaining_packets) {
+        return false;
     }
 
-    // Missing deep-clone/comparison/print members. These members are always accessed, so it is possible to discover some bugs if we print their contents
-    // from->rank_tbl
+    return true;
+}
+
+static bool check_rank_tbl(qhash_table const * before, struct qhash_table const * after) {
+    for (int i=0; i < before->table_size; i++) {
+        if (qlist_count(&before->array[i]) != qlist_count(&before->array[i])) {
+            return false;
+        }
+        struct dfly_qhash_entry * before_entry;
+        struct dfly_qhash_entry * after_entry;
+        qlist_for_each_entry(before_entry, &before->array[i], hash_link) {
+            // Yes, this is slow if there are many collisions, but often there won't be any
+            bool found_entry = false;
+            qlist_for_each_entry(after_entry, &after->array[i], hash_link) {
+                if (check_dfly_qhash_entry(before_entry, after_entry)) {
+                    found_entry = true;
+                    break;
+                }
+            }
+            if (!found_entry) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+static void print_rank_tbl(FILE * out, char const * prefix, struct qhash_table * rank_tbl) {
+    fprintf(out, "%stable_size = %d\n", prefix, rank_tbl->table_size);
+    fprintf(out, "%s   compare = %p\n", prefix, rank_tbl->compare);
+    fprintf(out, "%s      hash = %p\n", prefix, rank_tbl->hash);
+    fprintf(out, "%s     array = %p\n", prefix, rank_tbl->array);
+
+    char addprefix[] = "     |  | ";
+    int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1;
+    char * subprefix = (char *) malloc(len_subprefix * sizeof(char));
+    snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix);
+
+    for (int i=0; i < rank_tbl->table_size; i++) {
+        struct dfly_qhash_entry * entry;
+        qlist_for_each_entry(entry, &rank_tbl->array[i], hash_link) {
+            fprintf(out, "%s     | {\n", prefix);
+            fprintf(out, "%s     |      key.message_id = %lu\n", prefix, entry->key.message_id);
+            fprintf(out, "%s     |       key.sender_id = %lu\n", prefix, entry->key.sender_id);
+            fprintf(out, "%s     |          num_chunks = %d\n", prefix, entry->num_chunks);
+            fprintf(out, "%s     |   remaining_packets = %d\n", prefix, entry->remaining_packets);
+            fprintf(out, "%s     |   remote_event_size = %d\n", prefix, entry->remote_event_size);
+            fprintf(out, "%s     | * remote_event_data = %p\n", prefix, entry->remote_event_data);
+            if (entry->remote_event_size) {
+                tw_fprint_binary_array(out, subprefix, entry->remote_event_data, entry->remote_event_size);
+            }
+            fprintf(out, "%s     | },\n", prefix);
+        }
+    }
+
+    free(subprefix);
+}
 
+static void save_terminal_state(terminal_state *into, terminal_state const *from) {
     // These should be deep-cloned/compared/printed if we want to run the functionality they are activated at
     // from->predictor_data
     // from->sample_stat
@@ -7120,6 +7212,9 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from
         save_tlc_state(into->local_congestion_controller, from->local_congestion_controller);
     }
 
+    into->rank_tbl = qhash_init(dragonfly_rank_hash_compare, dragonfly_hash_func, DFLY_HASH_TABLE_SIZE);
+    copy_rank_tbl(into->rank_tbl, from->rank_tbl);
+
     // I would use the C++ amgic to copy these containers but they don't work as well :S
     new (&into->remaining_sz_packets) map<struct packet_id, uint32_t>();
     new (&into->zombies) set<struct packet_id>();
@@ -7181,6 +7276,9 @@ static void clean_terminal_state(terminal_state *state) {
         free(state->local_congestion_controller);
     }
 
+    clean_rank_tbl(state->rank_tbl);
+    qhash_finalize(state->rank_tbl);
+
     state->remaining_sz_packets.~map();
     state->zombies.~set();
 }
@@ -7280,6 +7378,8 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after)
         is_same &= check_tlc_state(before->local_congestion_controller, after->local_congestion_controller);
     }
 
+    is_same &= check_rank_tbl(before->rank_tbl, after->rank_tbl);
+
     is_same &= before->remaining_sz_packets == after->remaining_sz_packets;
     is_same &= before->zombies == after->zombies;
 
@@ -7440,7 +7540,16 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
 
     fprintf(out, "%s  | *                   anno = %s\n", prefix, state->anno ? state->anno : "(nil)");
     fprintf(out, "%s  | *                 params = %p\n", prefix, state->params);
-    fprintf(out, "%s  | *               rank_tbl = %p\n", prefix, state->rank_tbl);
+
+    fprintf(out, "%s  | *               rank_tbl = {\n", prefix);
+    char addprefix_4[] = "  |     ";
+    len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_4) + 1;
+    subprefix = (char *) malloc(len_subprefix * sizeof(char));
+    snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_4);
+    print_rank_tbl(out, subprefix, state->rank_tbl);
+    free(subprefix);
+    fprintf(out, "%s  | }\n", prefix);
+
     fprintf(out, "%s  |             rank_tbl_pop = %lu\n", prefix, state->rank_tbl_pop);
     fprintf(out, "%s  |               total_time = %g\n", prefix, state->total_time);
     fprintf(out, "%s  |           total_msg_size = %lu\n", prefix, state->total_msg_size);
@@ -7610,16 +7719,6 @@ static void save_router_state(router_state *into, router_state const *from) {
         }
     }
 
-    into->snapshot_data = NULL;
-    if (num_snapshots) {
-        into->snapshot_data = (int**) malloc(num_snapshots * sizeof(int*));
-        int size_snapshot = from->params->num_vcs * from->params->radix;
-        for (int i = 0; i < num_snapshots; i++) {
-            into->snapshot_data[i] = (int*) malloc(size_snapshot * sizeof(int));
-            memcpy(into->snapshot_data[i], from->snapshot_data[i], size_snapshot * sizeof(int));
-        }
-    }
-
     if (p->counting_bool > 0) {
         assert(from->agg_busy_time != NULL);
         assert(from->agg_link_traffic != NULL);
@@ -7681,13 +7780,6 @@ static void clean_router_state(router_state *state) {
     free(state->pending_msgs);
     free(state->queued_msgs);
 
-    if (num_snapshots) {
-        for (int i = 0; i < num_snapshots; i++) {
-            free(state->snapshot_data[i]);
-        }
-        free(state->snapshot_data);
-    }
-
     if (p->counting_bool > 0) {
         for (int i = 0; i < p->counting_windows; i++) {
             free(state->agg_busy_time[i]);
@@ -7705,6 +7797,13 @@ static void clean_router_state(router_state *state) {
 
 // Original function implemented by Claude
 static bool check_router_state(router_state const *before, router_state const *after) {
+    // The following are not checked because they don't influence any other
+    // components of the router state, ie, they are never used to change
+    // the simulation behavior.
+    // - snapshot_data
+    // - fwd_events
+    // - rev_events
+
     dragonfly_param const * p = before->params;
     int const radix = p->radix;
     int const num_qos_levels = p->num_qos_levels;
@@ -7760,24 +7859,6 @@ static bool check_router_state(router_state const *before, router_state const *a
         }
     }
 
-    if ((before->snapshot_data == NULL) != (after->snapshot_data == NULL)) {
-        return false;
-    }
-
-    if (num_snapshots) {
-        assert(before->snapshot_data != NULL);
-        int size_snapshot = before->params->num_vcs * before->params->radix;
-        for (int i = 0; i < num_snapshots; i++) {
-            assert(after->snapshot_data[i] == NULL);
-
-            for (int j = 0; j < size_snapshot; j++) {
-                if (before->snapshot_data[i][j] != after->snapshot_data[i][j]) {
-                    return false;
-                }
-            }
-        }
-    }
-
     if ((before->agg_busy_time == NULL) != (after->agg_busy_time == NULL)) {
         return false;
     }
@@ -7987,20 +8068,7 @@ static void print_router_state(FILE * out, char const * prefix, router_state * s
     fprintf(out, "%s  | *                   anno = %s\n", prefix, state->anno ? state->anno : "(nil)");
     fprintf(out, "%s  | *                 params = %p\n", prefix, state->params);
 
-    if (num_snapshots) {
-        fprintf(out, "%s  | **   snapshot_data[%d][%d] = [\n", prefix, num_snapshots, radix);
-        int size_snapshot = p->num_vcs * p->radix;
-        for (int i = 0; i < num_snapshots; i++) {
-            fprintf(out, "%s  |  snapshot %d: [", prefix, i);
-            for (int j = 0; j < size_snapshot; j++) {
-                fprintf(out, "%s%d", j ? ", " : "", state->snapshot_data[i][j]);
-            }
-            fprintf(out, "]\n");
-        }
-        fprintf(out, "%s  |  ]\n", prefix);
-    } else {
-        fprintf(out, "%s  | **         snapshot_data = %p\n", prefix, state->snapshot_data);
-    }
+    fprintf(out, "%s  | **         snapshot_data = %p\n", prefix, state->snapshot_data);
 
     fprintf(out, "%s  |               output_buf = '%.4096s'\n", prefix, state->output_buf);
     fprintf(out, "%s  | *               rsamples = %p\n", prefix, state->rsamples);

From 0e7669355c6c5d0da881358d6eecbdb6bf76f626 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 29 May 2025 13:05:19 -0400
Subject: [PATCH 132/188] Moving general PDES code into ROSS

---
 src/surrogate/switch.c | 137 ++---------------------------------------
 1 file changed, 5 insertions(+), 132 deletions(-)

diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c
index dede0cf5..f15aafe8 100644
--- a/src/surrogate/switch.c
+++ b/src/surrogate/switch.c
@@ -1,6 +1,8 @@
 #include <codes/surrogate/init.h>
 #include <codes/surrogate/switch.h>
 #include <codes/model-net-lp.h>
+#include <ross-extern.h>
+#include <stdio.h>
 
 double surrogate_switching_time = 0.0;
 double time_in_surrogate = 0.0;
@@ -9,36 +11,6 @@ static double surrogate_time_last = 0.0;
 // === Director functionality
 //
 
-
-//static void offset_future_events_in_causality_list(double switch_offset, tw_event_sig gvt) {
-//    (void) switch_offset;
-//    (void) gvt;
-//    int events_processed = 0;
-//    int events_modified = 0;
-//    for (unsigned int i = 0; i < g_tw_nkp; i++) {
-//        tw_kp * const this_kp = g_tw_kp[i];
-//
-//        //assert(this_kp->pevent_q.size == 0);
-//        // All events in pevent_q are sent into the future
-//        assert((this_kp->pevent_q.tail == NULL) == (this_kp->pevent_q.size == 0));
-//        tw_event * cur_event = this_kp->pevent_q.tail;
-//        while (cur_event) {
-//            if (!is_workload_event(cur_event) && tw_event_sig_compare(cur_event->sig, gvt) > 0) {
-//                cur_event->recv_ts += switch_offset;
-//                cur_event->sig.recv_ts = cur_event->recv_ts;
-//                events_modified++;
-//            }
-//
-//            cur_event = cur_event->prev;
-//            events_processed++;
-//        }
-//    }
-//    if (DEBUG_DIRECTOR > 1 && g_tw_mynode == 0) {
-//        printf("PE %lu: Total events from causality modified %d (from total processed %d)\n", g_tw_mynode, events_modified, events_processed);
-//    }
-//}
-
-
 static struct lp_types_switch const * get_type_switch(char const * const name) {
     for (size_t i = 0; i < surr_config.n_lp_types; i++) {
         //printf("THIS %s and %s\n", surr_config.lp_types[i].lpname, name);
@@ -50,72 +22,6 @@ static struct lp_types_switch const * get_type_switch(char const * const name) {
 }
 
 
-// MPI barrier to determine if anyone has a true value `val`. Returns true if anyone says "TRUE"
-static inline bool does_any_pe(bool val) {
-    bool global_val;
-    if(MPI_Allreduce(&val, &global_val, 1, MPI_C_BOOL, MPI_LOR, MPI_COMM_ROSS) != MPI_SUCCESS) {
-        tw_error(TW_LOC, "MPI_Allreduce for custom rollback and cleanup failed");
-    }
-    return global_val;
-}
-
-
-//static tw_event_sig find_sig_smallest_larger_than(double switch_, tw_kp * kp, tw_event_sig gvt) {
-//    //printf("Just testing, I'm here! size=%d\n", kp->pevent_q.size);
-//    tw_event * cur_event = kp->pevent_q.tail;
-//    while (cur_event) {
-//        //printf("Current timestamp to rollback (%e) and gvt (%e)\n", cur_event->sig.recv_ts, gvt.recv_ts);
-//        if (tw_event_sig_compare(cur_event->sig, gvt) < 0 && switch_ <= cur_event->sig.recv_ts) {
-//            gvt = cur_event->sig;
-//        }
-//        cur_event = cur_event->prev;
-//    }
-//    return gvt;
-//}
-
-
-static void rollback_and_cancel_events_pe(tw_pe * pe) {
-#ifdef USE_RAND_TIEBREAKER
-    tw_event_sig const gvt_sig = pe->GVT_sig;
-    tw_stime const gvt = gvt_sig.recv_ts;
-    // Backtracking the simulation to GVT
-    for (unsigned int i = 0; i < g_tw_nkp; i++) {
-        tw_kp_rollback_to_sig(g_tw_kp[i], &gvt_sig);
-    }
-    assert(tw_event_sig_compare_ptr(&pe->GVT_sig, &gvt_sig) == 0);
-    assert(pe->GVT_sig.recv_ts == gvt);  // redundant but needed because compiler cries that gvt is never used
-#else
-    tw_stime const gvt = pe->GVT;
-    // Backtracking the simulation to GVT
-    for (unsigned int i = 0; i < g_tw_nkp; i++) {
-        tw_kp_rollback_to(g_tw_kp[i], gvt);
-    }
-    assert(pe->GVT == gvt);
-#endif
-
-    // Making sure that everything gets cleaned up properly (AVL tree should be empty by the end)
-    do {
-        if (tw_nnodes() > 1) {
-            double const start = tw_clock_read();
-            tw_net_read(pe);
-            pe->stats.s_net_read += tw_clock_read() - start;
-        }
-
-        pe->gvt_status = 1;
-        tw_sched_event_q(pe);
-        tw_sched_cancel_q(pe);
-        tw_gvt_step2(pe);
-
-        if (DEBUG_DIRECTOR > 1) {
-            printf("PE %lu: Time stamp at the end of GVT time: %f - AVL-tree sized: %d\n", g_tw_mynode, gvt, pe->avl_tree_size);
-        }
-    } while (does_any_pe(pe->cancel_q != NULL) || does_any_pe(pe->event_q.size != 0));
-
-    if (DEBUG_DIRECTOR > 1) {
-        printf("PE %lu: All events rolledbacked and cancelled\n", g_tw_mynode);
-    }
-}
-
 static void shift_events_to_future_pe(tw_pe * pe) {
 #ifdef USE_RAND_TIEBREAKER
     tw_event_sig gvt_sig = pe->GVT_sig;
@@ -347,7 +253,7 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) {
 
     // This will force a global update on all the new remote events (instead of waiting until the next GVT cycle to update events to process)
     if (g_tw_synchronization_protocol == OPTIMISTIC) {
-        rollback_and_cancel_events_pe(pe);
+        tw_scheduler_rollback_and_cancel_events_pe(pe);
     }
 
     assert(lps_events[0] != NULL);
@@ -411,28 +317,11 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) {
     }
 }
 
-bool hit_trigger(tw_stime gvt) {
-    if ( switch_at.current_i < switch_at.total
-        && g_tw_trigger_gvt_hook.active == GVT_HOOK_triggered) {
-        double const switch_time = switch_at.time_stampts[switch_at.current_i];
-#ifdef USE_RAND_TIEBREAKER
-        assert(g_tw_trigger_gvt_hook.sig_at.recv_ts == switch_at.time_stampts[switch_at.current_i]);
-#else
-        assert(g_tw_trigger_gvt_hook.at == switch_at.time_stampts[switch_at.current_i]);
-#endif
-        assert(gvt >= switch_time);  // current gvt shouldn't be that far ahead from the point we wanted to trigger it
-
-        return true;
-    } else {
-        return false;
-    }
-}
-
 
 void switch_model(tw_pe * pe) {
     // Rollback if in optimistic mode
     if (g_tw_synchronization_protocol == OPTIMISTIC) {
-        rollback_and_cancel_events_pe(pe);
+        tw_scheduler_rollback_and_cancel_events_pe(pe);
     }
     surr_config.director.switch_surrogate();
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
@@ -468,20 +357,8 @@ void director_call(tw_pe * pe) {
             fflush(stdout);
         }
         if (DEBUG_DIRECTOR == 3) {
-            printf("GVT %d at %f in %s arbitrary-fun-status=", i++, gvt,
+            printf("GVT %d at %f in %s\n", i++, gvt,
                     surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition");
-
-            switch (g_tw_trigger_gvt_hook.active) {
-                case GVT_HOOK_enabled:
-                    printf("enabled\n");
-                    break;
-                case GVT_HOOK_disabled:
-                    printf("disabled\n");
-                    break;
-                case GVT_HOOK_triggered:
-                    printf("triggered\n");
-                    break;
-            }
         }
     }
 
@@ -501,10 +378,6 @@ void director_call(tw_pe * pe) {
         return;
     }
 
-    // Detecting if we are going to switch
-    if (! hit_trigger(gvt)) {
-        return;
-    }
     // ---- Past this means that we are in fact switching ----
     bool const pre_switch_status = surr_config.director.is_surrogate_on();
 

From 01e6bf61fd43cb825009b6b1ac399d2a606ad86b Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 30 May 2025 11:07:55 -0400
Subject: [PATCH 133/188] Renaming surrogate as network-surrogate

---
 codes/surrogate/init.h                        | 12 ++---
 .../{switch.h => network-surrogate.h}         |  5 +-
 .../tutorial-ping-pong-surrogate.conf.in      |  2 +-
 src/CMakeLists.txt                            |  2 +-
 src/networks/model-net/dragonfly-dally.C      | 54 +++++++++----------
 src/surrogate/init.c                          | 31 ++++++-----
 .../{switch.c => network-surrogate.c}         | 10 ++--
 7 files changed, 57 insertions(+), 59 deletions(-)
 rename codes/surrogate/{switch.h => network-surrogate.h} (95%)
 rename src/surrogate/{switch.c => network-surrogate.c} (98%)

diff --git a/codes/surrogate/init.h b/codes/surrogate/init.h
index 11ad5027..3a9a2169 100644
--- a/codes/surrogate/init.h
+++ b/codes/surrogate/init.h
@@ -8,7 +8,7 @@
  * Copyright (c) 2023 Rensselaer Polytechnic Institute
  */
 #include "codes/surrogate/packet-latency-predictor/common.h"
-#include "codes/surrogate/switch.h"
+#include "codes/surrogate/network-surrogate.h"
 
 // A simple macro to clarify code a bit
 #define PRINTF_ONCE(...) if (g_tw_mynode == 0) { fprintf(stderr, __VA_ARGS__); }
@@ -32,7 +32,7 @@ extern "C" {
 
 void print_surrogate_stats(void);
 
-struct surrogate_config {
+struct network_surrogate_config {
     struct director_data director;  //!< functionality needed by the director to switch back and forth from model-level surrogate-mode to (vanilla) high-definition simulation
     int total_terminals;  //!< total number of terminals
     size_t n_lp_types;
@@ -40,14 +40,14 @@ struct surrogate_config {
 };
 
 /** Loads surrogate configuration, including packet latency predictor. */
-void surrogate_configure(
+void network_surrogate_configure(
         char const * const annotation,
-        struct surrogate_config * const config,
+        struct network_surrogate_config * const config,
         struct packet_latency_predictor ** pl_pred //!< pointer to save packet latency predictor generated by. Caller must free it
 );
 
-extern struct surrogate_config surr_config;
-extern bool is_surrogate_configured;
+extern struct network_surrogate_config surr_config;
+extern bool is_network_surrogate_configured;
 
 #ifdef __cplusplus
 }
diff --git a/codes/surrogate/switch.h b/codes/surrogate/network-surrogate.h
similarity index 95%
rename from codes/surrogate/switch.h
rename to codes/surrogate/network-surrogate.h
index d23abb00..f941ea9f 100644
--- a/codes/surrogate/switch.h
+++ b/codes/surrogate/network-surrogate.h
@@ -60,9 +60,8 @@ struct switch_at_struct {
 
 extern struct switch_at_struct switch_at;
 
-
-// Switch
-void director_call(tw_pe * pe);
+// Main function responsible for switching between high-fidelity and (network) surrogate
+void network_director(tw_pe * pe);
 
 #ifdef __cplusplus
 }
diff --git a/doc/example/tutorial-ping-pong-surrogate.conf.in b/doc/example/tutorial-ping-pong-surrogate.conf.in
index 6d2b3e58..04d2c94f 100644
--- a/doc/example/tutorial-ping-pong-surrogate.conf.in
+++ b/doc/example/tutorial-ping-pong-surrogate.conf.in
@@ -58,7 +58,7 @@ PARAMS
 # router buffer occupancy snapshots
    router_buffer_snapshots=( ${BUFFER_SNAPSHOTS} );
 }
-SURROGATE {
+NETWORK_SURROGATE {
 # determines the director switching from surrogate to high-def simulation strategy
    director_mode="at-fixed-virtual-times";
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d82c2584..55e97215 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -56,7 +56,7 @@ list(APPEND SRCS
     util/congestion-controller.C
 
     surrogate/init.c
-    surrogate/switch.c
+    surrogate/network-surrogate.c
     surrogate/packet-latency-predictor/common.c
     surrogate/packet-latency-predictor/average.c
 
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index d7792fa2..3797c8ae 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -189,8 +189,8 @@ static void setup_packet_latency_path(char const * const dir_to_save);
 
 // ==== START OF Parameters to tune surrogate mode ====
 // 
-static bool surrogate_configured = false;
-static bool is_surrogate_on = false;
+static bool dally_surrogate_configured = false;
+static bool is_dally_surrogate_on = false;
 static struct packet_latency_predictor * terminal_predictor = NULL;
 static void switch_surrogate(void);
 static bool is_surrogate_on_fun(void);
@@ -2435,10 +2435,10 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
     // START Surrogate configuration
     char director_mode[MAX_NAME_LENGTH];
     director_mode[0] = '\0';
-    int director_mode_len = configuration_get_value(&config, "SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH);
+    int director_mode_len = configuration_get_value(&config, "NETWORK_SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH);
     // if surrogate mode has been set up
     if (director_mode_len > 0) {
-        struct surrogate_config surr_conf = {
+        struct network_surrogate_config surr_conf = {
             .director = {.switch_surrogate = switch_surrogate, .is_surrogate_on = is_surrogate_on_fun},
             .total_terminals = p->total_terminals,
             .n_lp_types = 2,
@@ -2460,9 +2460,9 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
                 0
             }
         };
-        surrogate_configure(anno, &surr_conf, &terminal_predictor);
+        network_surrogate_configure(anno, &surr_conf, &terminal_predictor);
         if (terminal_predictor) {
-            surrogate_configured = true;
+            dally_surrogate_configured = true;
         } else {
             tw_error(TW_LOC, "Latency predictor is NULL. Something during surrogate configuration failed.");
         }
@@ -2987,11 +2987,11 @@ static inline void packet_latency_save_to_file(
 // ==== START OF Surrogate functions definition ====
 
 static void switch_surrogate(void) {
-    is_surrogate_on = ! is_surrogate_on;
+    is_dally_surrogate_on = ! is_dally_surrogate_on;
 }
 
 static bool is_surrogate_on_fun(void) {
-    return is_surrogate_on;
+    return is_dally_surrogate_on;
 }
 
 static void feed_packet_to_predictor(terminal_state * s, tw_lp * lp, uint64_t packet_ID, double end_time) {
@@ -3002,8 +3002,8 @@ static void feed_packet_to_predictor(terminal_state * s, tw_lp * lp, uint64_t pa
         .next_packet_delay = sent.next_packet_delay,
     };
 
-    packet_latency_save_to_file(s->terminal_id, &sent.start, &end, is_surrogate_on, false);
-    if (surrogate_configured && !is_surrogate_on) {
+    packet_latency_save_to_file(s->terminal_id, &sent.start, &end, is_dally_surrogate_on, false);
+    if (dally_surrogate_configured && !is_dally_surrogate_on) {
         assert(terminal_predictor != NULL);
         terminal_predictor->feed(s->predictor_data, lp, s->terminal_id, &sent.start, &end);
     }
@@ -3085,7 +3085,7 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(
                 latency = 0;
             }
 
-            packet_latency_save_to_file(s->terminal_id, &sent.start, &predicted_end, is_surrogate_on, true);
+            packet_latency_save_to_file(s->terminal_id, &sent.start, &predicted_end, is_dally_surrogate_on, true);
 
             assert(sent.message_data);
             terminal_dally_message * const msg_data = (terminal_dally_message*) sent.message_data;
@@ -3321,7 +3321,7 @@ static void router_handle_snapshot_event(router_state *s, tw_bf *bf, terminal_da
 }
 
 static void terminal_commit_packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) {
-    if (!packet_latency_f && !surrogate_configured) {
+    if (!packet_latency_f && !dally_surrogate_configured) {
         return;
     }
 
@@ -3379,7 +3379,7 @@ static void terminal_dally_commit(terminal_state * s,
     switch (msg->type) {
         case T_GENERATE:
             if(bf->c10) {  // if the packet was sent as a prediction, store the prediction in memory
-                assert(surrogate_configured);
+                assert(dally_surrogate_configured);
                 auto start = (struct packet_start) {
                     .packet_ID = msg->packet_ID,
                     .dest_terminal_lpid = msg->dest_terminal_lpid,
@@ -3396,7 +3396,7 @@ static void terminal_dally_commit(terminal_state * s,
                     .travel_end_time = msg->travel_end_time,
                     .next_packet_delay = msg->saved_next_packet_delay,
                 };
-                packet_latency_save_to_file(s->terminal_id, &start, &end, is_surrogate_on, true);
+                packet_latency_save_to_file(s->terminal_id, &start, &end, is_dally_surrogate_on, true);
 
                 // If we had latency info for the last packet transmitted, then we have to store it into memory and clean the variable
                 if (s->arrival_of_last_packet.packet_ID != -1) {
@@ -3410,7 +3410,7 @@ static void terminal_dally_commit(terminal_state * s,
                         .next_packet_delay = -1,
                     };
 
-                    packet_latency_save_to_file(s->terminal_id, &sent.start, &end, is_surrogate_on, false);
+                    packet_latency_save_to_file(s->terminal_id, &sent.start, &end, is_dally_surrogate_on, false);
 
                     s->sent_packets.erase(s->arrival_of_last_packet.packet_ID);
                     s->arrival_of_last_packet.packet_ID = -1;
@@ -5497,7 +5497,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
         tmp->remaining_packets--;
 
         //printf("Good day sir, not a zombie! LPID=%d  packet_ID = %d  dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id);
-        if (packet_latency_f || surrogate_configured) {
+        if (packet_latency_f || dally_surrogate_configured) {
             notify_src_lp_on_total_latency(lp, msg);
         //} else {
         //    // This vacuous msg is necessary just to keep simulations with and without the latency notification the same. Notifying the latency does not impact
@@ -5589,7 +5589,7 @@ static void terminal_buf_update(terminal_state * s,
 static void dragonfly_dally_terminal_final( terminal_state * s, 
       tw_lp * lp )
 {
-    if (freeze_network_on_switch && is_surrogate_on) {
+    if (freeze_network_on_switch && is_dally_surrogate_on) {
         dragonfly_dally_terminal_surrogate_to_highdef(s, lp, NULL);
     }
     // printf("terminal id %d\n",s->terminal_id);
@@ -6851,7 +6851,7 @@ terminal_dally_event( terminal_state * s,
     assert(msg->magic == terminal_magic_num);
     //printf("LPID: %llu Event type %d processed at %f\n", lp->gid, msg->type, tw_now(lp));
 
-    if (is_surrogate_on && freeze_network_on_switch) {
+    if (is_dally_surrogate_on && freeze_network_on_switch) {
         // This event will be reversed. It comes from the past, it has been forwarded to the future
         // by the surrogate freezing the network procedure and should not be taken into account
         if (! (msg->type == T_GENERATE || msg->type == T_ARRIVE_PREDICTED || msg->type == T_NOTIFY)) {
@@ -6865,7 +6865,7 @@ terminal_dally_event( terminal_state * s,
     switch(msg->type)
         {
         case T_GENERATE:
-            if (is_surrogate_on) {
+            if (is_dally_surrogate_on) {
                 bf->c10 = 1;
                 packet_generate_predicted(s,bf,msg,lp);
             } else {
@@ -7165,7 +7165,7 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from
     int const num_qos_levels = p->num_qos_levels;
     int const num_rails = p->num_rails;
 
-    if (!is_surrogate_on) {
+    if (!is_dally_surrogate_on) {
         into->vc_occupancy = (int **) malloc(num_rails * sizeof(int*));
         into->terminal_length = (int**) malloc(num_rails * sizeof(int*));
         into->last_buf_full = (tw_stime*) malloc(num_rails * sizeof(tw_stime));
@@ -7243,7 +7243,7 @@ static void clean_terminal_state(terminal_state *state) {
     int const num_rails = p->num_rails;
     int const num_qos_levels = p->num_qos_levels;
 
-    if (!is_surrogate_on) {
+    if (!is_dally_surrogate_on) {
         for (int i = 0; i < num_rails; i++) {
             free(state->vc_occupancy[i]);
             free(state->terminal_length[i]);
@@ -7343,7 +7343,7 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after)
         is_same &= (before->anno == after->anno);
     }
 
-    if (!is_surrogate_on) {
+    if (!is_dally_surrogate_on) {
         dragonfly_param const * p = before->params;
         int const num_qos_levels = p->num_qos_levels;
         int const num_rails = p->num_rails;
@@ -7420,7 +7420,7 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
 
     fprintf(out, "%s  |  workloads_finished_flag = %d\n", prefix, state->workloads_finished_flag);
 
-    if (is_surrogate_on) {
+    if (is_dally_surrogate_on) {
         fprintf(out, "%s  | **          vc_occupancy = %p\n", prefix, state->vc_occupancy);
         fprintf(out, "%s  | *terminal_available_time = %p\n", prefix, state->terminal_available_time);
         fprintf(out, "%s  | ***        terminal_msgs = %p\n", prefix, state->terminal_msgs);
@@ -7459,7 +7459,7 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
 
     fprintf(out, "%s  | ***   terminal_msgs_tail = %p\n", prefix, state->terminal_msgs_tail);
 
-    if (is_surrogate_on) {
+    if (is_dally_surrogate_on) {
         fprintf(out, "%s  | *          in_send_loop = %p\n", prefix, state->in_send_loop);
     } else {
         fprintf(out, "%s  | *       in_send_loop[%d] = [", prefix, state->params->num_rails);
@@ -7481,7 +7481,7 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     fprintf(out, "%s  |    ]\n", prefix);
     free(subprefix);
 
-    if (is_surrogate_on) {
+    if (is_dally_surrogate_on) {
         fprintf(out, "%s  | **           qos_status = %p\n", prefix, state->qos_status);
         fprintf(out, "%s  | **             qos_data = %p\n", prefix, state->qos_data);
         fprintf(out, "%s  | *          last_qos_lvl = %p\n", prefix, state->last_qos_lvl);
@@ -7517,7 +7517,7 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     fprintf(out, "%s  | *                     st = %p\n", prefix, state->st);
     fprintf(out, "%s  | *                  cc_st = %p\n", prefix, state->cc_st);
 
-    if (is_surrogate_on) {
+    if (is_dally_surrogate_on) {
         fprintf(out, "%s  | *             issueIdle = %p\n", prefix, state->issueIdle);
         fprintf(out, "%s  | **      terminal_length = %p\n", prefix, state->terminal_length);
     } else {
@@ -7558,7 +7558,7 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
     fprintf(out, "%s  |          finished_chunks = %ld\n", prefix, state->finished_chunks);
     fprintf(out, "%s  |         finished_packets = %ld\n", prefix, state->finished_packets);
 
-    if (is_surrogate_on) {
+    if (is_dally_surrogate_on) {
         fprintf(out, "%s  | **      terminal_length = %p\n", prefix, state->terminal_length);
         fprintf(out, "%s  | *         last_buf_full = %p\n", prefix, state->last_buf_full);
         fprintf(out, "%s  | *             busy_time = %p\n", prefix, state->busy_time);
diff --git a/src/surrogate/init.c b/src/surrogate/init.c
index 4ed587c6..7c969924 100644
--- a/src/surrogate/init.c
+++ b/src/surrogate/init.c
@@ -1,5 +1,4 @@
 #include <codes/surrogate/init.h>
-#include <codes/surrogate/switch.h>
 #include <codes/surrogate/packet-latency-predictor/average.h>
 
 #ifdef USE_TORCH
@@ -7,15 +6,15 @@
 #endif
 
 bool freeze_network_on_switch = true;
-struct surrogate_config surr_config = {0};
-bool is_surrogate_configured = false;
+struct network_surrogate_config surr_config = {0};
+bool is_network_surrogate_configured = false;
 struct switch_at_struct switch_at;
 static struct packet_latency_predictor current_predictor = {0};
 
 
 // === Stats!
 void print_surrogate_stats(void) {
-    if(is_surrogate_configured && g_tw_mynode == 0) {
+    if(is_network_surrogate_configured && g_tw_mynode == 0) {
         printf("\nTotal time spent on surrogate-mode: %.4f\n", (double) time_in_surrogate / g_tw_clock_rate);
         printf("Total time spent on switching from and to surrogate-mode: %.4f\n", (double) surrogate_switching_time / g_tw_clock_rate);
     }
@@ -24,14 +23,14 @@ void print_surrogate_stats(void) {
 
 
 // === All things Surrogate Configuration
-void surrogate_configure(
+void network_surrogate_configure(
         char const * const anno,
-        struct surrogate_config * const sc,
+        struct network_surrogate_config * const sc,
         struct packet_latency_predictor ** pl_pred
 ) {
     assert(sc);
     assert(0 < sc->n_lp_types && sc->n_lp_types <= MAX_LP_TYPES);
-    is_surrogate_configured = true;
+    is_network_surrogate_configured = true;
 
     // This is the only place where the director data should be loaded and set up
     surr_config = *sc;
@@ -39,14 +38,14 @@ void surrogate_configure(
     // Determining which director mode to set up
     char director_mode[MAX_NAME_LENGTH];
     director_mode[0] = '\0';
-    configuration_get_value(&config, "SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH);
+    configuration_get_value(&config, "NETWORK_SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH);
     if (strcmp(director_mode, "at-fixed-virtual-times") == 0) {
-        PRINTF_ONCE("\nSurrogate activated switching at fixed virtual times: ");
+        PRINTF_ONCE("\nNetwork surrogate activated switching at fixed virtual times: ");
 
         // Loading timestamps
         char **timestamps;
         size_t len;
-        configuration_get_multivalue(&config, "SURROGATE", "fixed_switch_timestamps", anno, &timestamps, &len);
+        configuration_get_multivalue(&config, "NETWORK_SURROGATE", "fixed_switch_timestamps", anno, &timestamps, &len);
 
         switch_at.current_i = 0;
         switch_at.total = len;
@@ -64,7 +63,7 @@ void surrogate_configure(
         PRINTF_ONCE("\n");
 
         // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT
-        g_tw_gvt_hook = director_call;
+        g_tw_gvt_hook = network_director;
 
         tw_trigger_gvt_hook_at(switch_at.time_stampts[0]);
 
@@ -80,7 +79,7 @@ void surrogate_configure(
     // Determining which predictor to set up and return
     char latency_pred_name[MAX_NAME_LENGTH];
     latency_pred_name[0] = '\0';
-    configuration_get_value(&config, "SURROGATE", "packet_latency_predictor", anno, latency_pred_name, MAX_NAME_LENGTH);
+    configuration_get_value(&config, "NETWORK_SURROGATE", "packet_latency_predictor", anno, latency_pred_name, MAX_NAME_LENGTH);
     if (*latency_pred_name) {
         if (strcmp(latency_pred_name, "average") == 0) {
             current_predictor = average_latency_predictor(surr_config.total_terminals);
@@ -90,14 +89,14 @@ void surrogate_configure(
         } else if (strcmp(latency_pred_name, "torch-jit") == 0) {
             char torch_jit_mode[MAX_NAME_LENGTH];
             torch_jit_mode[0] = '\0';
-            configuration_get_value(&config, "SURROGATE", "torch_jit_mode", anno, torch_jit_mode, MAX_NAME_LENGTH);
+            configuration_get_value(&config, "NETWORK_SURROGATE", "torch_jit_mode", anno, torch_jit_mode, MAX_NAME_LENGTH);
             if (strcmp(torch_jit_mode, "single-static-model-for-all-terminals") != 0) {
                 tw_error(TW_LOC, "Unknown torch-jit mode `%s`", torch_jit_mode);
             }
 
             char torch_jit_model_path[MAX_NAME_LENGTH];
             torch_jit_model_path[0] = '\0';
-            configuration_get_value(&config, "SURROGATE", "torch_jit_model_path", anno, torch_jit_model_path, MAX_NAME_LENGTH);
+            configuration_get_value(&config, "NETWORK_SURROGATE", "torch_jit_model_path", anno, torch_jit_model_path, MAX_NAME_LENGTH);
             surrogate_torch_init(torch_jit_model_path);
 
             *pl_pred = &torch_latency_predictor;
@@ -118,7 +117,7 @@ void surrogate_configure(
     }
 
     // Finding out whether to ignore some packet latencies
-    int rc = configuration_get_value_double(&config, "SURROGATE", "ignore_until", anno, &ignore_until);
+    int rc = configuration_get_value_double(&config, "NETWORK_SURROGATE", "ignore_until", anno, &ignore_until);
     if (rc) {
         ignore_until = -1; // any negative number disables ignore_until, all packet latencies will be considered
         PRINTF_ONCE("`ignore_until` disabled (all packet latencies will be used in training the predictor)\n");
@@ -129,7 +128,7 @@ void surrogate_configure(
     // Determining which predictor to set up and return
     char network_treatment_name[MAX_NAME_LENGTH];
     network_treatment_name[0] = '\0';
-    configuration_get_value(&config, "SURROGATE", "network_treatment_on_switch", anno, network_treatment_name, MAX_NAME_LENGTH);
+    configuration_get_value(&config, "NETWORK_SURROGATE", "network_treatment_on_switch", anno, network_treatment_name, MAX_NAME_LENGTH);
     if (*network_treatment_name) {
         if (strcmp(network_treatment_name, "freeze") == 0) {
             freeze_network_on_switch = true;
diff --git a/src/surrogate/switch.c b/src/surrogate/network-surrogate.c
similarity index 98%
rename from src/surrogate/switch.c
rename to src/surrogate/network-surrogate.c
index f15aafe8..38875061 100644
--- a/src/surrogate/switch.c
+++ b/src/surrogate/network-surrogate.c
@@ -1,5 +1,5 @@
 #include <codes/surrogate/init.h>
-#include <codes/surrogate/switch.h>
+#include <codes/surrogate/network-surrogate.h>
 #include <codes/model-net-lp.h>
 #include <ross-extern.h>
 #include <stdio.h>
@@ -31,7 +31,7 @@ static void shift_events_to_future_pe(tw_pe * pe) {
 #endif
     tw_event * next_event = tw_pq_dequeue(pe->pq);
 
-    // If there aren't any events left to process, the simulation has already finished and we have nothing to do
+    // If there aren't any events left to process, then this PE has nothing to do
     if (next_event == NULL) {
         return;
     }
@@ -207,10 +207,10 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) {
         tw_error(TW_LOC, "Sorry, sending packets to the future hasn't been implement in this mode");
     }
 
-    tw_event *** lps_events = order_events_per_lps(pe);
     printf("PE %lu - AVL size %d (before shifting events)\n", g_tw_mynode, pe->avl_tree_size);
     shift_events_to_future_pe(pe);
     printf("PE %lu - AVL size %d (after shifting events to future)\n", g_tw_mynode, pe->avl_tree_size);
+    tw_event *** lps_events = order_events_per_lps(pe);
 
     // Going through all LPs in PE and running their specific functions
     for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {
@@ -341,8 +341,8 @@ void switch_model(tw_pe * pe) {
 }
 
 
-void director_call(tw_pe * pe) {
-    assert(is_surrogate_configured);
+void network_director(tw_pe * pe) {
+    assert(is_network_surrogate_configured);
 
 #ifdef USE_RAND_TIEBREAKER
     tw_stime gvt = pe->GVT_sig.recv_ts;

From ab3b9511da7fa4f98a438812c908149ddd37a286 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 30 May 2025 15:12:22 -0400
Subject: [PATCH 134/188] Renaming network average predictor to allow for more
 predictors

---
 .../packet-latency-predictor/common.h         | 20 +++++++++----------
 .../packet-latency-predictor/average.c        |  8 ++++----
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/codes/surrogate/packet-latency-predictor/common.h b/codes/surrogate/packet-latency-predictor/common.h
index aae0f0d7..61b0283c 100644
--- a/codes/surrogate/packet-latency-predictor/common.h
+++ b/codes/surrogate/packet-latency-predictor/common.h
@@ -36,18 +36,18 @@ struct packet_end {
 };
 
 // Definition of functions needed to define a predictor
-typedef void (*init_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id); // Initializes the predictor (eg, LSTM)
-typedef void (*feed_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *, struct packet_end const *); // Feeds known latency for packet sent at `now`
-typedef struct packet_end (*predict_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *); // Get prediction for packet sent to `destination` at `now`
-typedef void (*predict_pred_rc_f) (void * predictor_data, tw_lp * lp); // Reverse prediction (reverse state of predictor one prediction)
+typedef void (*init_pred_lat_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id); // Initializes the predictor (eg, LSTM)
+typedef void (*feed_pred_lat_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *, struct packet_end const *); // Feeds known latency for packet sent at `now`
+typedef struct packet_end (*predict_pred_lat_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *); // Get prediction for packet sent to `destination` at `now`
+typedef void (*predict_pred_lat_rc_f) (void * predictor_data, tw_lp * lp); // Reverse prediction (reverse state of predictor one prediction)
 
-// Each network model defines its own way to setup the packet latency predictor
+// API for packet latency predictors
 struct packet_latency_predictor {
-    init_pred_f        init;
-    feed_pred_f        feed;
-    predict_pred_f     predict;
-    predict_pred_rc_f  predict_rc;
-    size_t             predictor_data_sz; // `predictor_data` size
+    init_pred_lat_f        init;
+    feed_pred_lat_f        feed;
+    predict_pred_lat_f     predict;
+    predict_pred_lat_rc_f  predict_rc;
+    size_t                 predictor_data_sz; // `predictor_data` size
 };
 
 #ifdef __cplusplus
diff --git a/src/surrogate/packet-latency-predictor/average.c b/src/surrogate/packet-latency-predictor/average.c
index 88f084d3..82db7e1c 100644
--- a/src/surrogate/packet-latency-predictor/average.c
+++ b/src/surrogate/packet-latency-predictor/average.c
@@ -104,10 +104,10 @@ static void predict_latency_rc(struct latency_surrogate * data, tw_lp * lp) {
 
 struct packet_latency_predictor average_latency_predictor(int num_terminals) {
     return (struct packet_latency_predictor) {
-    .init              = (init_pred_f) init_pred,
-    .feed              = (feed_pred_f) feed_pred,
-    .predict           = (predict_pred_f) predict_latency,
-    .predict_rc        = (predict_pred_rc_f) predict_latency_rc,
+    .init              = (init_pred_lat_f) init_pred,
+    .feed              = (feed_pred_lat_f) feed_pred,
+    .predict           = (predict_pred_lat_f) predict_latency,
+    .predict_rc        = (predict_pred_lat_rc_f) predict_latency_rc,
     .predictor_data_sz = sizeof(struct latency_surrogate) + num_terminals * sizeof(struct aggregated_latency_one_terminal)
     };
 }

From 77964abc300c17e64602824c7ebf46bf8eb69986 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 30 May 2025 15:22:34 -0400
Subject: [PATCH 135/188] Network predictors do not need to allocate memory
 when initialized

It is possible to allocate all memory needed for all predictors within a
PE in the predictors' .c file, even before initializing any predictor
(calling `.init`). Thus a predictor might have size zero
---
 src/networks/model-net/dragonfly-dally.C | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 3797c8ae..0fec30f6 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -3704,11 +3704,12 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp )
     s->frozen_state = NULL;
 
     // alloc'ing memory for predictor, calling initiliazer for predictor
-    if (terminal_predictor != NULL && terminal_predictor->predictor_data_sz > 0) {
-        s->predictor_data = calloc(1, terminal_predictor->predictor_data_sz);
+    s->predictor_data = NULL;
+    if (terminal_predictor != NULL) {
+        if (terminal_predictor->predictor_data_sz > 0) {
+            s->predictor_data = calloc(1, terminal_predictor->predictor_data_sz);
+        }
         terminal_predictor->init(s->predictor_data, lp, s->terminal_id);
-    } else {
-        s->predictor_data = NULL;
     }
     s->last_in_queue_time = 0;
     return;

From 8c65ec238ab9c2ca9e390b703efae5ae18b279c4 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 30 May 2025 18:01:59 -0400
Subject: [PATCH 136/188] Each computer node tracks its own workload id

---
 src/network-workloads/model-net-mpi-replay.c | 23 ++++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index c1040526..f456aab0 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -67,7 +67,6 @@ char workload_type[128];
 char workload_name[128];
 char workload_file[8192];
 char offset_file[8192];
-static int wrkld_id;
 static int num_net_traces = 0;
 static int prioritize_collectives = 0;
 static int num_dumpi_traces = 0;
@@ -293,7 +292,7 @@ struct nw_state
 #endif /* if LP_DEBUG */
 	long num_events_per_lp;
 	tw_lpid nw_id;
-	short wrkld_end;
+	short wrkld_id;
     int app_id;
     int local_rank;
     int qos_level;
@@ -1251,18 +1250,18 @@ static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message *
     // consuming all events until indicated iteration is reached
     bool reached_end = false;
     while (!reached_end) {
-        codes_workload_get_next(wrkld_id, s->app_id, s->local_rank, &mpi_op);
+        codes_workload_get_next(s->wrkld_id, s->app_id, s->local_rank, &mpi_op);
 
         switch (mpi_op.op_type) {
             case CODES_WK_MARK:
                 if (mpi_op.u.send.tag == resume_at_iter) {
                     reached_end = true;
-                    codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, &mpi_op);
+                    codes_workload_get_next_rc(s->wrkld_id, s->app_id, s->local_rank, &mpi_op);
                 }
                 break;
             // If we reach the end of simulation, rollback once to allow the operation to be processed normally
             case CODES_WK_END:
-                codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, &mpi_op);
+                codes_workload_get_next_rc(s->wrkld_id, s->app_id, s->local_rank, &mpi_op);
                 reached_end = true;
                 break;
             default:
@@ -2523,6 +2522,7 @@ void nw_test_init(nw_state* s, tw_lp* lp)
    s->qos_level = 0; //TODO:  We need a more elegant solution for determining if qos is enabled or not.
                      //       This had been -1 but if qos is not configured (single job no workload conf file)
                      //       then this will error out
+   s->wrkld_id = -1;
 
    char type_name[512];
 
@@ -2721,8 +2721,9 @@ void nw_test_init(nw_state* s, tw_lp* lp)
    }
    else 
    {
-   wrkld_id = codes_workload_load(type_name, params, s->app_id, s->local_rank);
+   s->wrkld_id = codes_workload_load(type_name, params, s->app_id, s->local_rank);
    codes_issue_next_event(lp);
+        printf("my wrkld_id = %d\n", s->wrkld_id);
    }
    if(enable_sampling && sampling_interval > 0)
    {
@@ -2911,7 +2912,7 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
 
 static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
 {
-    codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, m->mpi_op);
+    codes_workload_get_next_rc(s->wrkld_id, s->app_id, s->local_rank, m->mpi_op);
 
 	if(m->op_type == CODES_WK_END)
     {
@@ -3022,10 +3023,8 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
 {
 		//struct codes_workload_op * mpi_op = malloc(sizeof(struct codes_workload_op));
 //        printf("\n App id %d local rank %d ", s->app_id, s->local_rank);
-    //    struct codes_workload_op mpi_op;
-    //    codes_workload_get_next(wrkld_id, s->app_id, s->local_rank, &mpi_op);
 	    struct codes_workload_op * mpi_op = (struct codes_workload_op*)malloc(sizeof(struct codes_workload_op));
-        codes_workload_get_next(wrkld_id, s->app_id, s->local_rank, mpi_op);
+        codes_workload_get_next(s->wrkld_id, s->app_id, s->local_rank, mpi_op);
         m->mpi_op = mpi_op; 
         m->op_type = mpi_op->op_type;
 	
@@ -3562,7 +3561,7 @@ static bool check_nw_lp_state(nw_state * before, nw_state const * after) {
     // Basic fields
     is_same &= (before->num_events_per_lp == after->num_events_per_lp);
     is_same &= (before->nw_id == after->nw_id);
-    is_same &= (before->wrkld_end == after->wrkld_end);
+    is_same &= (before->wrkld_id == after->wrkld_id);
     is_same &= (before->app_id == after->app_id);
     is_same &= (before->local_rank == after->local_rank);
     is_same &= (before->qos_level == after->qos_level);
@@ -3657,7 +3656,7 @@ static void print_nw_lp_state(FILE * out, char const * prefix, nw_state * state)
 #endif /* if LP_DE%sBUG */
     fprintf(out, "%s |     num_events_per_lp = %ld\n", prefix, state->num_events_per_lp);
     fprintf(out, "%s |                 nw_id = %lu\n", prefix, state->nw_id);
-    fprintf(out, "%s |             wrkld_end = %d\n", prefix, state->wrkld_end);
+    fprintf(out, "%s |             wrkld_end = %d\n", prefix, state->wrkld_id);
     fprintf(out, "%s |                app_id = %d\n", prefix, state->app_id);
     fprintf(out, "%s |            local_rank = %d\n", prefix, state->local_rank);
     fprintf(out, "%s |             qos_level = %d\n", prefix, state->qos_level);

From d3f75b8bc4e44d7e9f5c022fe3a81ce9d45bcf07 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sat, 31 May 2025 17:09:44 -0400
Subject: [PATCH 137/188] Renaming another variable from surrogate to
 network-surrogate

---
 codes/surrogate/init.h                        |  4 +--
 codes/surrogate/network-surrogate.h           |  6 ++--
 src/surrogate/init.c                          | 30 ++++++++--------
 src/surrogate/network-surrogate.c             | 34 +++++++++----------
 .../packet-latency-predictor/average.c        |  2 +-
 5 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/codes/surrogate/init.h b/codes/surrogate/init.h
index 3a9a2169..6846b2e0 100644
--- a/codes/surrogate/init.h
+++ b/codes/surrogate/init.h
@@ -43,10 +43,10 @@ struct network_surrogate_config {
 void network_surrogate_configure(
         char const * const annotation,
         struct network_surrogate_config * const config,
-        struct packet_latency_predictor ** pl_pred //!< pointer to save packet latency predictor generated by. Caller must free it
+        struct packet_latency_predictor ** pl_pred //!< pointer to save packet latency predictor. Caller does not need to free pointer
 );
 
-extern struct network_surrogate_config surr_config;
+extern struct network_surrogate_config net_surr_config;
 extern bool is_network_surrogate_configured;
 
 #ifdef __cplusplus
diff --git a/codes/surrogate/network-surrogate.h b/codes/surrogate/network-surrogate.h
index f941ea9f..a6060ea1 100644
--- a/codes/surrogate/network-surrogate.h
+++ b/codes/surrogate/network-surrogate.h
@@ -1,5 +1,5 @@
-#ifndef CODES_SURROGATE_SWITCH_H
-#define CODES_SURROGATE_SWITCH_H
+#ifndef CODES_SURROGATE_NETWORK_SURROGATE_H
+#define CODES_SURROGATE_NETWORK_SURROGATE_H
 
 /**
  * switch.h -- DIRECTOR FUNCTION in charge of switching back and forth from high-fidelity and surrogate modes
@@ -58,7 +58,7 @@ struct switch_at_struct {
     double * time_stampts; // list of precise timestamps at which to switch
 };
 
-extern struct switch_at_struct switch_at;
+extern struct switch_at_struct switch_network_at;
 
 // Main function responsible for switching between high-fidelity and (network) surrogate
 void network_director(tw_pe * pe);
diff --git a/src/surrogate/init.c b/src/surrogate/init.c
index 7c969924..63f9ff89 100644
--- a/src/surrogate/init.c
+++ b/src/surrogate/init.c
@@ -6,10 +6,10 @@
 #endif
 
 bool freeze_network_on_switch = true;
-struct network_surrogate_config surr_config = {0};
+struct network_surrogate_config net_surr_config = {0};
 bool is_network_surrogate_configured = false;
-struct switch_at_struct switch_at;
-static struct packet_latency_predictor current_predictor = {0};
+struct switch_at_struct switch_network_at;
+static struct packet_latency_predictor current_net_predictor = {0};
 
 
 // === Stats!
@@ -33,7 +33,7 @@ void network_surrogate_configure(
     is_network_surrogate_configured = true;
 
     // This is the only place where the director data should be loaded and set up
-    surr_config = *sc;
+    net_surr_config = *sc;
 
     // Determining which director mode to set up
     char director_mode[MAX_NAME_LENGTH];
@@ -47,25 +47,25 @@ void network_surrogate_configure(
         size_t len;
         configuration_get_multivalue(&config, "NETWORK_SURROGATE", "fixed_switch_timestamps", anno, &timestamps, &len);
 
-        switch_at.current_i = 0;
-        switch_at.total = len;
-        switch_at.time_stampts = malloc(len * sizeof(double));
+        switch_network_at.current_i = 0;
+        switch_network_at.total = len;
+        switch_network_at.time_stampts = malloc(len * sizeof(double));
 
         for (size_t i = 0; i < len; i++) {
             errno = 0;
-            switch_at.time_stampts[i] = strtod(timestamps[i], NULL);
+            switch_network_at.time_stampts[i] = strtod(timestamps[i], NULL);
             if (errno == ERANGE || errno == EILSEQ){
                 tw_error(TW_LOC, "Sequence `%s' could not be succesfully interpreted as a _double_.", timestamps[i]);
             }
 
-            PRINTF_ONCE("%g%s", switch_at.time_stampts[i], i == len-1 ? "" : ", ");
+            PRINTF_ONCE("%g%s", switch_network_at.time_stampts[i], i == len-1 ? "" : ", ");
         }
         PRINTF_ONCE("\n");
 
         // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT
         g_tw_gvt_hook = network_director;
 
-        tw_trigger_gvt_hook_at(switch_at.time_stampts[0]);
+        tw_trigger_gvt_hook_at(switch_network_at.time_stampts[0]);
 
         // freeing timestamps before it dissapears
         for (size_t i = 0; i < len; i++) {
@@ -82,8 +82,8 @@ void network_surrogate_configure(
     configuration_get_value(&config, "NETWORK_SURROGATE", "packet_latency_predictor", anno, latency_pred_name, MAX_NAME_LENGTH);
     if (*latency_pred_name) {
         if (strcmp(latency_pred_name, "average") == 0) {
-            current_predictor = average_latency_predictor(surr_config.total_terminals);
-            *pl_pred = &current_predictor;
+            current_net_predictor = average_latency_predictor(net_surr_config.total_terminals);
+            *pl_pred = &current_net_predictor;
 
 #ifdef USE_TORCH
         } else if (strcmp(latency_pred_name, "torch-jit") == 0) {
@@ -111,8 +111,8 @@ void network_surrogate_configure(
                     ")", latency_pred_name);
         }
     } else {
-        current_predictor = average_latency_predictor(surr_config.total_terminals);
-        *pl_pred = &current_predictor;
+        current_net_predictor = average_latency_predictor(net_surr_config.total_terminals);
+        *pl_pred = &current_net_predictor;
         PRINTF_ONCE("Enabling average packet latency predictor (default behaviour)\n");
     }
 
@@ -146,7 +146,7 @@ void network_surrogate_configure(
 
     //surr_config.director.switch_surrogate();
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
-        fprintf(stderr, "Simulation starting on %s mode\n", surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity");
+        fprintf(stderr, "Simulation starting on %s mode\n", net_surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity");
     }
 }
 // === END OF All things Surrogate Configuration
diff --git a/src/surrogate/network-surrogate.c b/src/surrogate/network-surrogate.c
index 38875061..a8bc671c 100644
--- a/src/surrogate/network-surrogate.c
+++ b/src/surrogate/network-surrogate.c
@@ -12,10 +12,10 @@ static double surrogate_time_last = 0.0;
 //
 
 static struct lp_types_switch const * get_type_switch(char const * const name) {
-    for (size_t i = 0; i < surr_config.n_lp_types; i++) {
+    for (size_t i = 0; i < net_surr_config.n_lp_types; i++) {
         //printf("THIS %s and %s\n", surr_config.lp_types[i].lpname, name);
-        if (strcmp(surr_config.lp_types[i].lpname, name) == 0) {
-            return &surr_config.lp_types[i];
+        if (strcmp(net_surr_config.lp_types[i].lpname, name) == 0) {
+            return &net_surr_config.lp_types[i];
         }
     }
     return NULL;
@@ -39,15 +39,15 @@ static void shift_events_to_future_pe(tw_pe * pe) {
     // We have to put the events back into the queue after we switch back, but if we never
     // switch back they will never get to be processed and thus we can clean them
     double switch_offset = g_tw_ts_end;
-    if (switch_at.current_i < switch_at.total) {
-        double const next_switch = switch_at.time_stampts[switch_at.current_i + 1];
+    if (switch_network_at.current_i < switch_network_at.total) {
+        double const next_switch = switch_network_at.time_stampts[switch_network_at.current_i + 1];
         double const pre_switch_time = gvt;
         switch_offset = next_switch - pre_switch_time;
         assert(pre_switch_time < next_switch);
         //printf("gvt=%f next_switch=%f switch_offset=%f\n", pre_switch_time, next_switch, switch_offset);
     }
-    assert(0 <= switch_at.current_i && switch_at.current_i < switch_at.total);
-    double const current_switch_time = switch_at.time_stampts[switch_at.current_i];
+    assert(0 <= switch_network_at.current_i && switch_network_at.current_i < switch_network_at.total);
+    double const current_switch_time = switch_network_at.time_stampts[switch_network_at.current_i];
     assert(current_switch_time <= gvt);
 
     tw_event * dequed_events = NULL; // Linked list of workload events, to be placed again in the queue
@@ -323,14 +323,14 @@ void switch_model(tw_pe * pe) {
     if (g_tw_synchronization_protocol == OPTIMISTIC) {
         tw_scheduler_rollback_and_cancel_events_pe(pe);
     }
-    surr_config.director.switch_surrogate();
+    net_surr_config.director.switch_surrogate();
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
-        printf("Switching to %s\n", surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity");
+        printf("Switching to %s\n", net_surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity");
     }
 
     // "Freezing" network events and activating LP's switch functions
     if (freeze_network_on_switch) {
-        if (surr_config.director.is_surrogate_on()) {
+        if (net_surr_config.director.is_surrogate_on()) {
             model_net_method_switch_to_surrogate();
             events_high_def_to_surrogate_switch(pe);
         } else {
@@ -358,7 +358,7 @@ void network_director(tw_pe * pe) {
         }
         if (DEBUG_DIRECTOR == 3) {
             printf("GVT %d at %f in %s\n", i++, gvt,
-                    surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition");
+                    net_surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition");
         }
     }
 
@@ -372,14 +372,14 @@ void network_director(tw_pe * pe) {
     // Do not process if the simulation ended
     if (gvt >= g_tw_ts_end) {
         // If the simulation ended and the surrogate is still on, stop timer checking surrogate time
-        if (surr_config.director.is_surrogate_on()) {
+        if (net_surr_config.director.is_surrogate_on()) {
             time_in_surrogate += tw_clock_read() - surrogate_time_last;
         }
         return;
     }
 
     // ---- Past this means that we are in fact switching ----
-    bool const pre_switch_status = surr_config.director.is_surrogate_on();
+    bool const pre_switch_status = net_surr_config.director.is_surrogate_on();
 
     // Asking the director/model to switch
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
@@ -395,8 +395,8 @@ void network_director(tw_pe * pe) {
     surrogate_switching_time += end - start;
 
     // Setting trigger for next switch
-    if (++switch_at.current_i < switch_at.total) {
-        double next_switch = switch_at.time_stampts[switch_at.current_i];
+    if (++switch_network_at.current_i < switch_network_at.total) {
+        double next_switch = switch_network_at.time_stampts[switch_network_at.current_i];
         tw_trigger_gvt_hook_at(next_switch);
     }
 
@@ -408,8 +408,8 @@ void network_director(tw_pe * pe) {
     }
 
     // Determining time in surrogate
-    if (pre_switch_status != surr_config.director.is_surrogate_on()) {
-        if (surr_config.director.is_surrogate_on()) {
+    if (pre_switch_status != net_surr_config.director.is_surrogate_on()) {
+        if (net_surr_config.director.is_surrogate_on()) {
             // Start tracking time spent in surrogate mode
             surrogate_time_last = end;
         } else {
diff --git a/src/surrogate/packet-latency-predictor/average.c b/src/surrogate/packet-latency-predictor/average.c
index 82db7e1c..2b8af6ea 100644
--- a/src/surrogate/packet-latency-predictor/average.c
+++ b/src/surrogate/packet-latency-predictor/average.c
@@ -29,7 +29,7 @@ static void init_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int
     assert(data->aggregated_next_packet_delay.total_msgs == 0);
     assert(data->aggregated_next_packet_delay.sum_latency == 0);
 
-    data->num_terminals = surr_config.total_terminals;
+    data->num_terminals = net_surr_config.total_terminals;
 }
 
 static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * start, struct packet_end const * end) {

From 9bfa92688ee629fb7db8f4edd5c48a94f238c8a1 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 2 Jun 2025 11:47:31 -0400
Subject: [PATCH 138/188] Adding some documentation for nw_state

---
 src/network-workloads/model-net-mpi-replay.c | 47 ++++++++++----------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index f456aab0..9dfcd306 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -284,30 +284,40 @@ typedef struct mpi_msgs_queue mpi_msgs_queue;
 typedef struct completed_requests completed_requests;
 typedef struct pending_waits pending_waits;
 
-/* state of the network LP. It contains the pointers to send/receive lists */
+/*
+ * state of the network LP. It contains the pointers to send/receive lists
+ *
+ * nw-lp's can only run one job! Which all start at time 0
+ *
+ * Three possible states for nw-lp:
+ * - run application (non-synthetic workload)
+ * - run background noise pattern (synthetic workload)
+ * - do nothing
+ **/
 struct nw_state
 {
 #if LP_DEBUG
 	size_t num_events_processed;
 #endif /* if LP_DEBUG */
-	long num_events_per_lp;
-	tw_lpid nw_id;
-	short wrkld_id;
-    int app_id;
-    int local_rank;
-    int qos_level;
 
-    int synthetic_pattern;
-    int is_finished;
-    int num_own_job_ranks_completed; //counted by the root rank 0 of a job
-
-     //array of whether this rank knows other jobs are completed.
-    int * known_completed_jobs;
+    tw_lpid nw_id;  // compute node id, as labeled by the network
+    int local_rank; // id local to the application or synthetic workload, this is the number that the application sees, their phony "MPI rank"
 
+    // Parameters used for non-synthetic workloads
+    short wrkld_id; // workload machinery in charge, e.g, swm
+    int app_id;     // application id, position on the queue for this app to run
+    int * known_completed_jobs; //array of whether this rank knows other jobs are completed.
     struct rc_stack * processed_ops;
     struct rc_stack * processed_wait_op;
     struct rc_stack * matched_reqs;
-//    struct rc_stack * indices;
+    struct pending_waits * wait_op; // Pending wait operation
+
+    // Parameters used for synthetic workload parameters
+    int synthetic_pattern;
+    int is_finished;
+    int num_own_job_ranks_completed; //counted by the root rank 0 of a job
+
+    int qos_level;
 
     /* count of sends, receives, collectives and delays */
 	unsigned long num_sends;
@@ -349,9 +359,6 @@ struct nw_state
 	struct qlist_head completed_reqs;
 
     tw_stime cur_interval_end;
-    
-    /* Pending wait operation */
-    struct pending_waits * wait_op;
 
     /* Message size latency information */
     struct qhash_table * msg_sz_table;
@@ -2663,12 +2670,10 @@ void nw_test_init(nw_state* s, tw_lp* lp)
    rc_stack_create(&s->processed_ops);
    rc_stack_create(&s->processed_wait_op);
    rc_stack_create(&s->matched_reqs);
-//   rc_stack_create(&s->indices);
     
    assert(s->processed_ops != NULL);
    assert(s->processed_wait_op != NULL);
    assert(s->matched_reqs != NULL);
-//   assert(s->indices != NULL);
 
    /* clock starts ticking when the first event is processed */
    s->start_time = tw_now(lp);
@@ -2773,7 +2778,6 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
 
     memset(bf, 0, sizeof(tw_bf));
     rc_stack_gc(lp, s->matched_reqs);
-//    rc_stack_gc(lp, s->indices);
     rc_stack_gc(lp, s->processed_ops);
     rc_stack_gc(lp, s->processed_wait_op);
 
@@ -3286,7 +3290,6 @@ void nw_test_finalize(nw_state* s, tw_lp* lp)
 
 		//printf("\n LP %ld Time spent in communication %llu ", lp->gid, total_time - s->compute_time);
 	    rc_stack_destroy(s->matched_reqs);
-//	    rc_stack_destroy(s->indices);
 	    rc_stack_destroy(s->processed_ops);
 	    rc_stack_destroy(s->processed_wait_op);
 
@@ -3559,7 +3562,6 @@ static bool check_nw_lp_state(nw_state * before, nw_state const * after) {
     bool is_same = true;
 
     // Basic fields
-    is_same &= (before->num_events_per_lp == after->num_events_per_lp);
     is_same &= (before->nw_id == after->nw_id);
     is_same &= (before->wrkld_id == after->wrkld_id);
     is_same &= (before->app_id == after->app_id);
@@ -3654,7 +3656,6 @@ static void print_nw_lp_state(FILE * out, char const * prefix, nw_state * state)
 #if LP_DEBUG
     fprintf(out, "%s |  num_events_processed = %zu\n", prefix, state->num_events_processed);
 #endif /* if LP_DE%sBUG */
-    fprintf(out, "%s |     num_events_per_lp = %ld\n", prefix, state->num_events_per_lp);
     fprintf(out, "%s |                 nw_id = %lu\n", prefix, state->nw_id);
     fprintf(out, "%s |             wrkld_end = %d\n", prefix, state->wrkld_id);
     fprintf(out, "%s |                app_id = %d\n", prefix, state->app_id);

From 81099b7eb0790408a9b35d66a85156be7bdb1fbb Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 5 Jun 2025 18:27:48 -0400
Subject: [PATCH 139/188] Initial implementation of director for application
 iteration

Many of the input parameters for the director and predictor are
hardcoded. Things like, number of iterations that the application will
run, number of applications and how often to call the director (how many
GVTs to wait until calling it).
---
 .../app-iteration-predictor/average.h         |  21 +
 .../app-iteration-predictor/common.h          |  75 ++++
 codes/surrogate/application-surrogate.h       |  25 ++
 codes/surrogate/init.h                        |   8 +
 src/CMakeLists.txt                            |   3 +
 src/network-workloads/model-net-mpi-replay.c  |  84 +++-
 .../app-iteration-predictor/average.c         | 398 ++++++++++++++++++
 .../app-iteration-predictor/common.c          |   1 +
 src/surrogate/application-surrogate.c         |  74 ++++
 src/surrogate/init.c                          |  24 ++
 10 files changed, 696 insertions(+), 17 deletions(-)
 create mode 100644 codes/surrogate/app-iteration-predictor/average.h
 create mode 100644 codes/surrogate/app-iteration-predictor/common.h
 create mode 100644 codes/surrogate/application-surrogate.h
 create mode 100644 src/surrogate/app-iteration-predictor/average.c
 create mode 100644 src/surrogate/app-iteration-predictor/common.c
 create mode 100644 src/surrogate/application-surrogate.c

diff --git a/codes/surrogate/app-iteration-predictor/average.h b/codes/surrogate/app-iteration-predictor/average.h
new file mode 100644
index 00000000..0ec22283
--- /dev/null
+++ b/codes/surrogate/app-iteration-predictor/average.h
@@ -0,0 +1,21 @@
+#ifndef CODES_SURROGATE_ITERATION_PREDICTOR_AVERAGE_H
+#define CODES_SURROGATE_ITERATION_PREDICTOR_AVERAGE_H
+
+/**
+ * This predictor collects the time that it takes to complete an iteration, and
+ * uses this information as the prediction. The trigger becomes 
+ */
+
+#include "surrogate/app-iteration-predictor/common.h"
+
+struct avg_app_config {
+    int num_apps;
+    int num_nodes_in_pe;
+    int num_of_iters_to_feed;
+};
+
+struct app_iteration_predictor avg_app_iteration_predictor(struct avg_app_config *);
+
+void free_avg_app_iteration_predictor(void);
+
+#endif /* end of include guard */
diff --git a/codes/surrogate/app-iteration-predictor/common.h b/codes/surrogate/app-iteration-predictor/common.h
new file mode 100644
index 00000000..d2eabc99
--- /dev/null
+++ b/codes/surrogate/app-iteration-predictor/common.h
@@ -0,0 +1,75 @@
+#ifndef CODES_SURROGATE_ITERATION_PREDICTOR_COMMON_H
+#define CODES_SURROGATE_ITERATION_PREDICTOR_COMMON_H
+
+/**
+ * common.h -- common datatypes and functionality to all application iteration predictors
+ * -Elkin Cruz
+ *
+ * Copyright (c) 2025 Rensselaer Polytechnic Institute
+ */
+#include <ross.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Iteration application prediction machinery. Notice that any of these predictors have to know how many iterations to run in total, thus they need data about the number of steps the application will take.
+ */
+
+struct app_iter_node_config {
+    int app_id;
+    int app_ending_iter;
+};
+
+// This returns how much to skip ahead and when to restart
+struct iteration_pred {
+    int resume_at_iter;
+    double restart_at;
+};
+
+enum FAST_FORWARD {
+    FAST_FORWARD_switching = 0,
+    FAST_FORWARD_restart, // Stop accumulating data (we gain nothing from switching to surrogate-mode) and restart at future point in time
+};
+struct fast_forward_values {
+    enum FAST_FORWARD status;  // Are we switching to surrogate-mode
+    // Only needed for "switching" and "restart"
+    double restarting_at;      // Time at which we will have fully restarted (or expect to)
+};
+
+
+// Model calls to predictor
+typedef void (*init_pred_iter_f) (tw_lp * lp, int nw_id_in_pe, struct app_iter_node_config *); // Initializes the predictor (eg, average)
+typedef void (*feed_pred_iter_f) (tw_lp * lp, int nw_id_in_pe, int iteration_id, double iteration_time); // Feeds last iteration time
+typedef void (*end_pred_iter_f) (tw_lp * lp, int nw_id_in_pe, double time); // Tells the predictor that the application has stopped running
+typedef struct iteration_pred (*predict_pred_iter_f) (tw_lp * lp, int nw_id_in_pe); // Get prediction
+typedef void (*predict_pred_iter_rc_f) (tw_lp * lp, int nw_id_in_pe); // Reverse prediction (reverse state of predictor one prediction)
+// Director calls to predictor module
+typedef bool (*have_we_hit_switch_f) (tw_lp * lp, int nw_id_in_pe, int iteration_id); // Are we ready to switch to a future iterationº
+typedef bool (*is_predictor_read_f) (void); // Checking if it is a good time to switch (enough data has been collected or we have received some notification of an application ending, forcing us to restart collecting data). This might trigger an MPI_Allreduce call, thus has to be called by all PEs!
+typedef void (*reset_pred_iter_f) (void); // Resets the predictor (eg, average)
+typedef struct fast_forward_values (*prepare_fast_forward_f) (void); // Checking if it is a good time to switch (enough data has been collected)
+
+// API that predictors have to comply with and 
+struct app_iteration_predictor {
+    struct {
+        init_pred_iter_f        init;
+        feed_pred_iter_f        feed;
+        end_pred_iter_f         ended;
+        predict_pred_iter_f     predict;
+        predict_pred_iter_rc_f  predict_rc;
+        have_we_hit_switch_f    have_we_hit_switch;
+    } model;
+    struct {
+        reset_pred_iter_f       reset;
+        is_predictor_read_f     is_predictor_ready;
+        prepare_fast_forward_f  prepare_fast_forward_jump;
+    } director;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of include guard */
diff --git a/codes/surrogate/application-surrogate.h b/codes/surrogate/application-surrogate.h
new file mode 100644
index 00000000..184a4f98
--- /dev/null
+++ b/codes/surrogate/application-surrogate.h
@@ -0,0 +1,25 @@
+#ifndef CODES_SURROGATE_APP_SURROGATE_H
+#define CODES_SURROGATE_APP_SURROGATE_H
+
+/**
+ * switch.h -- DIRECTOR FUNCTION in charge of switching back and forth from high-fidelity and surrogate modes for the application level
+ * Elkin Cruz
+ *
+ * Copyright (c) 2025 Rensselaer Polytechnic Institute
+ */
+
+#include <ross.h>
+#include "surrogate/app-iteration-predictor/common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Main function responsible for switching between high-fidelity and (application iteration) surrogate
+void application_director_configure(int every_n_gvt, struct app_iteration_predictor *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of include guard */
diff --git a/codes/surrogate/init.h b/codes/surrogate/init.h
index 6846b2e0..28e90a8d 100644
--- a/codes/surrogate/init.h
+++ b/codes/surrogate/init.h
@@ -8,6 +8,7 @@
  * Copyright (c) 2023 Rensselaer Polytechnic Institute
  */
 #include "codes/surrogate/packet-latency-predictor/common.h"
+#include "codes/surrogate/app-iteration-predictor/common.h"
 #include "codes/surrogate/network-surrogate.h"
 
 // A simple macro to clarify code a bit
@@ -49,6 +50,13 @@ void network_surrogate_configure(
 extern struct network_surrogate_config net_surr_config;
 extern bool is_network_surrogate_configured;
 
+void application_surrogate_configure(
+    int num_terminals_on_pe,
+    int num_apps,
+    struct app_iteration_predictor ** iter_pred //!< pointer to save application iteration predictor. No need to free pointer
+);
+void free_application_surrogate(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 55e97215..08950e7f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -56,7 +56,10 @@ list(APPEND SRCS
     util/congestion-controller.C
 
     surrogate/init.c
+    surrogate/application-surrogate.c
     surrogate/network-surrogate.c
+    surrogate/app-iteration-predictor/common.c
+    surrogate/app-iteration-predictor/average.c
     surrogate/packet-latency-predictor/common.c
     surrogate/packet-latency-predictor/average.c
 
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 9dfcd306..2d0edd45 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -21,6 +21,7 @@
 #include "codes/codes-jobmap.h"
 #include "codes/congestion-controller-core.h"
 #include "codes/surrogate/init.h"
+#include "surrogate/app-iteration-predictor/common.h"
 
 /* turning on track lp will generate a lot of output messages */
 #define DBG_COMM 1
@@ -42,6 +43,7 @@
 #define NEAR_ZERO .0001 //timestamp for use to be 'close to zero' but still allow progress, zero offset events are hard on the PDES engine
 #define OUTPUT_MARKS 0
 #define LP_DEBUG 0
+#define HARD_CODED_AVG_ITER_PREDICTOR 0
 
 static int msg_size_hash_compare(
             void *key, struct qhash_head *link);
@@ -163,6 +165,9 @@ static double sampling_interval = 5000000;
 static double sampling_end_time = 3000000000;
 static int enable_debug = 0;
 
+// Surrogate variables
+struct app_iteration_predictor *iter_predictor = NULL;
+static int nw_id_counter = 0;
 // We can skip multiple iterations using an average as our predicted iteration time. This will skip ahead to a future step in the simulation
 static struct AvgSurrogateSwitchingTimesForApp *skip_iter_config;
 static size_t skip_iter_config_size = 0;
@@ -301,6 +306,7 @@ struct nw_state
 #endif /* if LP_DEBUG */
 
     tw_lpid nw_id;  // compute node id, as labeled by the network
+    tw_lpid nw_id_in_pe;  // compute node id for this PE
     int local_rank; // id local to the application or synthetic workload, this is the number that the application sees, their phony "MPI rank"
 
     // Parameters used for non-synthetic workloads
@@ -418,6 +424,7 @@ struct nw_message
        int found_match;
        short wait_completed;
        short rend_send;
+       int resume_at_iter;
    } fwd;
 
    // A different struct for each type of MPI_NW_EVENTS
@@ -482,7 +489,7 @@ struct nw_message
            int64_t saved_num_bytes;
        } mpi_ack;
 
-       // Surrogate variables
+        // For SURR_SKIP_ITERATION
        struct {
            struct AvgSurrogateSwitchingTimesForApp * config_used;
        } surr;
@@ -1242,17 +1249,25 @@ static struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_sta
 }
 
 static void skip_iteration_rc(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) {
-    m->rc.surr.config_used->done = false;
+    if (HARD_CODED_AVG_ITER_PREDICTOR) {
+        m->rc.surr.config_used->done = false;
+    }
 }
 
 static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m)
 {
     struct codes_workload_op mpi_op;
+    int resume_at_iter;
 
-    struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s);
-    assert(switch_config != NULL);
-    int const resume_at_iter = switch_config->resume_at_iter;
-    m->rc.surr.config_used = switch_config;
+    if (HARD_CODED_AVG_ITER_PREDICTOR) {
+        struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s);
+        assert(switch_config != NULL);
+        resume_at_iter = switch_config->resume_at_iter;
+        m->rc.surr.config_used = switch_config;
+        switch_config->done = true;
+    } else {
+        resume_at_iter = m->fwd.resume_at_iter;
+    }
 
     // consuming all events until indicated iteration is reached
     bool reached_end = false;
@@ -1276,8 +1291,6 @@ static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message *
         }
     }
 
-    switch_config->done = true;
-
     tw_event *e = tw_event_new(lp->gid, 0.0, lp);
     nw_message* msg = (nw_message*) tw_event_data(e);
     msg->msg_type = MPI_OP_GET_NEXT;
@@ -2516,6 +2529,7 @@ void nw_test_init(nw_state* s, tw_lp* lp)
 
    memset(s, 0, sizeof(*s));
    s->nw_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0);
+   s->nw_id_in_pe = nw_id_counter++;
    s->mpi_wkld_samples = (struct mpi_workload_sample*)calloc(MAX_STATS, sizeof(struct mpi_workload_sample));
    s->sampling_indx = 0;
    s->is_finished = 0;
@@ -2685,8 +2699,10 @@ void nw_test_init(nw_state* s, tw_lp* lp)
    s->app_id = lid.job;
    s->local_rank = lid.rank;
 
+   bool am_i_synthetic = false;
    if(strncmp(file_name_of_job[lid.job], "synthetic", 9) == 0)
    {
+        am_i_synthetic = true;
         sscanf(file_name_of_job[lid.job], "synthetic%d", &synthetic_pattern);
         if(synthetic_pattern <=0 || synthetic_pattern > 6)
         {
@@ -2728,7 +2744,6 @@ void nw_test_init(nw_state* s, tw_lp* lp)
    {
    s->wrkld_id = codes_workload_load(type_name, params, s->app_id, s->local_rank);
    codes_issue_next_event(lp);
-        printf("my wrkld_id = %d\n", s->wrkld_id);
    }
    if(enable_sampling && sampling_interval > 0)
    {
@@ -2765,6 +2780,13 @@ void nw_test_init(nw_state* s, tw_lp* lp)
        s->switch_config = NULL;
        s->switch_config_size = 0;
    }
+   if (iter_predictor && !am_i_synthetic) {
+        struct app_iter_node_config conf = {
+            .app_id = s->app_id,
+            .app_ending_iter = s->app_id ? 19 : 20,
+        };
+        iter_predictor->model.init(lp, s->nw_id_in_pe, &conf);
+   }
 
    return;
 }
@@ -3015,6 +3037,9 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t
 		break;
 	case CODES_WK_MARK:
 		codes_issue_next_event_rc(lp);
+        if (bf->c13) {
+            iter_predictor->model.predict_rc(lp, s->nw_id_in_pe);
+        }
 		break;
 
 		default:
@@ -3145,15 +3170,30 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
 		case CODES_WK_MARK:
 			{
                 m->rc.mpi_next.mark.saved_marker_time = tw_now(lp);
-
-                // If we have reached the surrogate switch time, skip next iteration(s)
-                if (have_we_hit_surrogate_switch(s, mpi_op)) {
-                    tw_event *e = tw_event_new(lp->gid, time_to_skip_iterations(s), lp);
-                    nw_message* msg = (nw_message*) tw_event_data(e);
-                    msg->msg_type = SURR_SKIP_ITERATION;
-                    tw_event_send(e);
+                int iteration_i = mpi_op->u.send.tag;
+
+                if (HARD_CODED_AVG_ITER_PREDICTOR) {
+                    // If we have reached the surrogate switch time, skip next iteration(s)
+                    if (have_we_hit_surrogate_switch(s, mpi_op)) {
+                        tw_event *e = tw_event_new(lp->gid, time_to_skip_iterations(s), lp);
+                        nw_message* msg = (nw_message*) tw_event_data(e);
+                        msg->msg_type = SURR_SKIP_ITERATION;
+                        tw_event_send(e);
+                    } else {
+                        codes_issue_next_event(lp);
+                    }
                 } else {
-                    codes_issue_next_event(lp);
+                    if (iter_predictor && iter_predictor->model.have_we_hit_switch(lp, s->nw_id_in_pe, iteration_i)) {
+                        bf->c13 = 1;
+                        struct iteration_pred iter_pred = iter_predictor->model.predict(lp, s->nw_id_in_pe);
+                        tw_event *e = tw_event_new(lp->gid, iter_pred.restart_at - tw_now(lp), lp);
+                        nw_message* msg = (nw_message*) tw_event_data(e);
+                        msg->msg_type = SURR_SKIP_ITERATION;
+                        msg->fwd.resume_at_iter = iter_pred.resume_at_iter;
+                        tw_event_send(e);
+                    } else {
+                        codes_issue_next_event(lp);
+                    }
                 }
 			}
 			break;
@@ -3384,10 +3424,16 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp
             switch (m->mpi_op->op_type) {
                 case CODES_WK_END:
                     printf("Network node %d Rank %llu App %d finished at %lf \n", s->local_rank, LLU(s->nw_id), s->app_id, m->rc.mpi_next.mark.saved_marker_time);
+                    if (iter_predictor) {
+                        iter_predictor->model.ended(lp, s->nw_id_in_pe, m->rc.mpi_next.mark.saved_marker_time);
+                    }
                     break;
 
                 case CODES_WK_MARK:
                     fprintf(iteration_log, "ITERATION %d node %llu job %d rank %d time %lf\n", m->mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, m->rc.mpi_next.mark.saved_marker_time);
+                    if (iter_predictor) {
+                        iter_predictor->model.feed(lp, s->nw_id_in_pe, m->mpi_op->u.send.tag, m->rc.mpi_next.mark.saved_marker_time);
+                    }
 
                     if (OUTPUT_MARKS)
                     {
@@ -4408,6 +4454,9 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
         assert(ret == 0 || !"lp_io_prepare failure");
     }
 
+   // TODO: read from config whether to load iterator predictor
+   application_surrogate_configure(24, 2, &iter_predictor);
+
    tw_run();
 
     fclose(iteration_log); //Xin
@@ -4489,6 +4538,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
    }
 
    print_surrogate_stats();
+   free_application_surrogate();
 
 #ifdef USE_RDAMARIS
     } // end if(g_st_ross_rank)
diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c
new file mode 100644
index 00000000..77dfc954
--- /dev/null
+++ b/src/surrogate/app-iteration-predictor/average.c
@@ -0,0 +1,398 @@
+#include "surrogate/app-iteration-predictor/average.h"
+#include "codes/codes.h"
+#include <assert.h>
+#include <limits.h>
+#include <math.h>
+
+static struct avg_app_config my_config = {0};
+
+struct node_data {
+    int app_id;
+    double acc_iteration_time;
+    double prev_iteration_time;
+    int acc_iters;
+    int last_iter;
+};
+static struct node_data * arr_node_data = NULL; // array containing info for all nodes
+
+enum ENDED_STATUS {
+    ENDED_STATUS_running = 0,
+    ENDED_STATUS_just_ended,       // fully ended in this PE
+    ENDED_STATUS_ended_everywhere, // fully ended on all PEs
+};
+
+struct app_data {
+    int num_nodes;
+    int nodes_with_enough_iters;
+    int ending_iteration;  // last iteration the simulation will run (aka, num of iterations)
+    int nodes_that_have_ended;
+    enum ENDED_STATUS ended;  // use ended to stop accumulating data
+    // To be used when called by the model. Set by `prepare_fast_forward_jump`
+    struct {
+        int jump_at_iter;
+        int resume_at_iter;
+        double restart_at;
+    } pred;
+};
+static struct app_data * arr_app_data = NULL; // array containing info for all apps
+static bool ready_to_skip = false;
+
+
+static void find_max_iter_per_app(int * save_last_iter);
+static inline int app_id_for(int nw_id_in_pe) {
+    return arr_node_data[nw_id_in_pe].app_id;
+}
+
+
+static void init(tw_lp * lp, int nw_id_in_pe, struct app_iter_node_config * config) {
+    assert(arr_node_data);
+    if (my_config.num_nodes_in_pe <= nw_id_in_pe) {
+        tw_error(TW_LOC, "Node id relative to PE (%d) is larger than the number of nodes %d", nw_id_in_pe, my_config.num_nodes_in_pe);
+    }
+
+    // Storing node data info
+    arr_node_data[nw_id_in_pe].app_id = config->app_id;
+    arr_node_data[nw_id_in_pe].last_iter = INT_MIN;
+
+    // Storing app data info
+    arr_app_data[config->app_id].num_nodes++;
+    if (arr_app_data[config->app_id].ending_iteration == -1) {
+        arr_app_data[config->app_id].ending_iteration = config->app_ending_iter;
+    } else {
+        if (arr_app_data[config->app_id].ending_iteration != config->app_ending_iter) {
+            tw_error(TW_LOC, "Two different ranks for application %d have differing total iterations they will run (%d != %d)", config->app_id, config->app_ending_iter, arr_app_data[config->app_id].ending_iteration);
+        }
+    }
+}
+
+
+static void feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) {
+    (void) lp;
+    assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe);
+    assert(app_id_for(nw_id_in_pe) != -1);
+    struct node_data * node_data = &arr_node_data[nw_id_in_pe];
+    if (node_data->last_iter >= iter) { // we only collect iteration data past the previous `last_iter`
+        return;
+    }
+    node_data->acc_iteration_time += iteration_time - node_data->prev_iteration_time;
+    node_data->prev_iteration_time = iteration_time;
+    node_data->acc_iters++;
+    node_data->last_iter = iter;
+    // We've hit the required number of iterations to feed our predictor
+    if (node_data->acc_iters == my_config.num_of_iters_to_feed) {
+        arr_app_data[node_data->app_id].nodes_with_enough_iters++;
+    }
+}
+
+
+static void ended(tw_lp * lp, int nw_id_in_pe, double iteration_time) {
+    assert(app_id_for(nw_id_in_pe) != -1);
+    struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)];
+    app_data->nodes_that_have_ended++;
+    if (app_data->nodes_that_have_ended == app_data->num_nodes) {
+        app_data->ended = ENDED_STATUS_just_ended;
+    }
+}
+
+
+static struct iteration_pred predict(tw_lp * lp, int nw_id_in_pe) {
+    assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe);
+    assert(app_id_for(nw_id_in_pe) != -1);
+    struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)];
+    return (struct iteration_pred) {
+        .resume_at_iter = app_data->pred.resume_at_iter,
+        .restart_at = app_data->pred.restart_at,
+    };
+}
+
+static void predict_rc(tw_lp * lp, int nw_id_in_pe) {}
+
+static void reset_with(bool const * app_just_ended) {
+    ready_to_skip = false;
+    
+    int last_iter[my_config.num_apps];
+    find_max_iter_per_app(last_iter); // We should start tracking iterations from the next iteration
+
+    for (int i=0; i < my_config.num_nodes_in_pe; i++) {
+        struct node_data * node_data = &arr_node_data[i];
+        node_data->acc_iters = 0;
+        node_data->acc_iteration_time = 0;
+        node_data->last_iter = last_iter[node_data->app_id];
+        node_data->prev_iteration_time = arr_app_data[node_data->app_id].pred.restart_at;
+    }
+    for (int i=0; i < my_config.num_apps; i++) {
+        arr_app_data[i].nodes_with_enough_iters = 0;
+    }
+
+    // If an app just fully ended (ended on all PEs but hasn't been cleaned) then clean it
+    for (int i = 0; i < my_config.num_apps; i++) {
+        if (app_just_ended[i]) {
+            arr_app_data[i].ended = ENDED_STATUS_ended_everywhere;
+        }
+    }
+}
+
+static bool have_we_hit_switch(tw_lp * lp, int nw_id_in_pe, int iteration_id) {
+    assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe);
+    int const app_id = app_id_for(nw_id_in_pe);
+    if (ready_to_skip && iteration_id == arr_app_data[app_id].pred.jump_at_iter) {
+        return true;
+    }
+    return false;
+}
+
+static inline bool has_any_app_ended(bool * save_app_just_ended) {
+    // Checking any application has fully ended, in which case we have to restart collecting data
+    bool app_just_ended_here[my_config.num_apps];
+    for (int i = 0; i < my_config.num_apps; i++) {
+        struct app_data * app_data = &arr_app_data[i];
+        app_just_ended_here[i] = app_data->ended == ENDED_STATUS_just_ended;
+    }
+    if(MPI_Allreduce(&app_just_ended_here, save_app_just_ended, my_config.num_apps, MPI_C_BOOL, MPI_LAND, MPI_COMM_CODES) != MPI_SUCCESS) {
+        tw_error(TW_LOC, "MPI_Allreduce call failed!");
+    }
+    for (int i = 0; i < my_config.num_apps; i++) {
+        if (save_app_just_ended[i]) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static inline bool all_apps_ended(void) {
+    for (int i = 0; i < my_config.num_apps; i++) {
+        struct app_data * app_data = &arr_app_data[i];
+        if (app_data->ended != ENDED_STATUS_ended_everywhere) {
+            return false;
+        }
+    }
+    return true;
+}
+
+
+static inline bool has_everyone_accumulated_enough() {
+    bool everyone = true;
+    for (int i = 0; i < my_config.num_apps; i++) {
+        struct app_data * app_data = &arr_app_data[i];
+        // ignoring apps that have ended already
+        if (app_data->ended != ENDED_STATUS_ended_everywhere) {
+            everyone &= app_data->nodes_with_enough_iters == app_data->num_nodes;
+        }
+    }
+    return everyone;
+}
+
+static bool is_predictor_ready(void) {
+    bool app_just_ended[my_config.num_apps];
+    if (has_any_app_ended(app_just_ended)) {
+        reset_with(app_just_ended);
+        return false;
+    }
+
+    if (all_apps_ended()) {
+        return false;
+    }
+
+    // check that all applications have collected data for enough iterations to jump ahead
+    bool const everyone_ready_here = has_everyone_accumulated_enough();
+    bool everyone_ready;
+    if(MPI_Allreduce(&everyone_ready_here, &everyone_ready, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_CODES) != MPI_SUCCESS) {
+        tw_error(TW_LOC, "MPI_Allreduce call failed!");
+    }
+    return everyone_ready;
+}
+
+
+static void reset(void) {
+    bool app_just_ended[my_config.num_apps];
+    has_any_app_ended(app_just_ended);
+    reset_with(app_just_ended);
+}
+
+static void find_avg_iteration_time(double * save_avg_time) {
+    double acc_iter_time_here[my_config.num_apps];
+    int acc_iters_here[my_config.num_apps];
+    for (int i=0; i < my_config.num_apps; i++) {
+        acc_iter_time_here[i] = 0.0;
+        acc_iters_here[i] = 0;
+    }
+    for (int i=0; i < my_config.num_nodes_in_pe; i++) {
+        struct node_data * node_data = &arr_node_data[i];
+        int const app_id = node_data->app_id;
+        acc_iter_time_here[app_id] += node_data->acc_iteration_time;
+        acc_iters_here[app_id] += node_data->acc_iters;
+    }
+    double acc_iter_time[my_config.num_apps];
+    if(MPI_Allreduce(&acc_iter_time_here, &acc_iter_time, my_config.num_apps, MPI_DOUBLE, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) {
+        tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up");
+    }
+    int acc_iters[my_config.num_apps];
+    if(MPI_Allreduce(&acc_iters_here, &acc_iters, my_config.num_apps, MPI_INT, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) {
+        tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up");
+    }
+
+    for (int i=0; i < my_config.num_apps; i++) {
+        if (acc_iters[i]) {
+            save_avg_time[i] = acc_iter_time[i] / acc_iters[i];
+        }
+    }
+}
+
+static void find_max_iter_per_app(int * save_last_iter) {
+    int last_iter_here[my_config.num_apps];
+    for (int i=0; i < my_config.num_apps; i++) {
+        last_iter_here[i] = -1;
+    }
+    for (int i=0; i < my_config.num_nodes_in_pe; i++) {
+        struct node_data * node_data = &arr_node_data[i];
+        int const app_id = node_data->app_id;
+        if (last_iter_here[app_id] < node_data->last_iter) {
+            last_iter_here[app_id] = node_data->last_iter;
+        }
+    }
+    if(MPI_Allreduce(&last_iter_here, save_last_iter, my_config.num_apps, MPI_INT, MPI_MAX, MPI_COMM_CODES) != MPI_SUCCESS) {
+        tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't compute maximum");
+    }
+}
+
+static void find_avg_time_for_max_iter(double * save_last_iter_time, int const * last_iter) {
+    int acc_iters_here[my_config.num_apps];
+    double acc_last_iter_time[my_config.num_apps];
+    for (int i=0; i < my_config.num_apps; i++) {
+        acc_iters_here[i] = 0;
+        acc_last_iter_time[i] = 0.0;
+    }
+    for (int i=0; i < my_config.num_nodes_in_pe; i++) {
+        struct node_data * node_data = &arr_node_data[i];
+        int const app_id = node_data->app_id;
+        if (node_data->last_iter == last_iter[app_id]) {
+            acc_last_iter_time[app_id] += node_data->prev_iteration_time;
+            acc_iters_here[app_id]++;
+        }
+    }
+    if(MPI_Allreduce(&acc_last_iter_time, save_last_iter_time, my_config.num_apps, MPI_DOUBLE, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) {
+        tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up");
+    }
+    int acc_iters[my_config.num_apps];
+    if(MPI_Allreduce(&acc_iters_here, &acc_iters, my_config.num_apps, MPI_INT, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) {
+        tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up");
+    }
+    for (int i=0; i < my_config.num_apps; i++) {
+        if (acc_iters[i] > 0) {
+            save_last_iter_time[i] /= acc_iters[i];
+        }
+    }
+}
+
+static struct fast_forward_values prepare_fast_forward_jump(void) {
+    // 0. Check if app is still running
+    bool is_running[my_config.num_apps];
+    for (int i=0; i < my_config.num_apps; i++) {
+        is_running[i] = arr_app_data[i].ended != ENDED_STATUS_ended_everywhere;
+    }
+    // 1. Compute end time for each application given current data (pick smallest)
+    //   a. Find avg iteration per app
+    double avg_iter_time[my_config.num_apps];
+    find_avg_iteration_time(avg_iter_time);
+    //   b. Find iteration to start stwich after
+    int last_iter[my_config.num_apps];
+    double last_iter_time[my_config.num_apps];
+    find_max_iter_per_app(last_iter);
+    find_avg_time_for_max_iter(last_iter_time, last_iter);
+    //   c. Compute avg end time for all apps (loop through every node, and add value to avg array)
+    double apps_end_time[my_config.num_apps];
+    for (int i=0; i < my_config.num_apps; i++) {
+        int const iterations_left = arr_app_data[i].ending_iteration - last_iter[i];
+        apps_end_time[i] = last_iter_time[i] + iterations_left * avg_iter_time[i];
+    }
+    //   d. Pick smallest compute end time/time to skip
+    double switch_time = DBL_MAX;
+    for (int i=0; i < my_config.num_apps; i++) {
+        if (is_running[i] && switch_time > apps_end_time[i]) {
+            switch_time = apps_end_time[i];
+        }
+    }
+    // 2. Find number of iterations to skip per node given time to skip, then compute when each application is expected to reach this point
+    //   a. Find iteration to skip to per node
+    double apps_restart_at_time[my_config.num_apps];
+    int apps_restart_at_iter[my_config.num_apps];
+    bool worth_switching = true;
+    for (int i=0; i < my_config.num_apps; i++) {
+        if (!is_running[i]) {
+            continue;
+        }
+        int iters_to_skip = lround((switch_time - last_iter_time[i]) / avg_iter_time[i]);
+        apps_restart_at_time[i] = last_iter_time[i] + iters_to_skip * avg_iter_time[i];
+        apps_restart_at_iter[i] = last_iter[i] + iters_to_skip;
+
+        // if we are not skipping at least two iterations, there is no point in trying to fastforward
+        if (iters_to_skip <= 2) {
+            worth_switching = false;
+        }
+    }
+    //   b. Compute last application to restart (this is restarting_at)
+    double last_to_finish = 0;
+    for (int i=0; i < my_config.num_apps; i++) {
+        if (is_running[i] && last_to_finish < apps_restart_at_time[i]) {
+            last_to_finish = apps_restart_at_time[i];
+        }
+    }
+    //   c. If the number of iterations to skip is zero for any app, force reset of predictor tracking
+    if (!worth_switching) {
+        return (struct fast_forward_values) {
+            .status = FAST_FORWARD_restart,
+            .restarting_at = last_to_finish,
+        };
+    }
+    // 4. Set values for iteration to restart at and iterations to jump for each application
+    for (int i=0; i < my_config.num_apps; i++) {
+        if (!is_running[i]) {
+            continue;
+        }
+        arr_app_data[i].pred.jump_at_iter = last_iter[i] + 1;
+        arr_app_data[i].pred.resume_at_iter = apps_restart_at_iter[i];
+        arr_app_data[i].pred.restart_at = apps_restart_at_time[i];
+    }
+    ready_to_skip = true;
+
+    return (struct fast_forward_values) {
+        .status = FAST_FORWARD_switching,
+        .restarting_at = last_to_finish,
+    };
+}
+
+struct app_iteration_predictor avg_app_iteration_predictor(struct avg_app_config * config_) {
+    my_config = *config_;
+    arr_node_data = calloc(my_config.num_nodes_in_pe, sizeof(struct node_data));
+    arr_app_data = calloc(my_config.num_apps, sizeof(struct app_data));
+    for (int i=0; i < my_config.num_nodes_in_pe; i++) {
+        arr_node_data[i].app_id = -1;
+    }
+    for (int i=0; i < my_config.num_apps; i++) {
+        arr_app_data[i].ending_iteration = -1;
+    }
+    return (struct app_iteration_predictor) {
+        .model = {
+            .init = init,
+            .feed = feed,
+            .ended = ended,
+            .predict = predict,
+            .predict_rc = predict_rc,
+            .have_we_hit_switch = have_we_hit_switch,
+        },
+        .director = {
+            .reset = reset,
+            .is_predictor_ready = is_predictor_ready,
+            .prepare_fast_forward_jump = prepare_fast_forward_jump,
+        }
+    };
+}
+
+void free_avg_app_iteration_predictor(void) {
+    if (arr_node_data) {
+        free(arr_node_data);
+    }
+    if (arr_app_data) {
+        free(arr_app_data);
+    }
+}
diff --git a/src/surrogate/app-iteration-predictor/common.c b/src/surrogate/app-iteration-predictor/common.c
new file mode 100644
index 00000000..cc8db1a3
--- /dev/null
+++ b/src/surrogate/app-iteration-predictor/common.c
@@ -0,0 +1 @@
+#include "surrogate/app-iteration-predictor/common.h"
diff --git a/src/surrogate/application-surrogate.c b/src/surrogate/application-surrogate.c
new file mode 100644
index 00000000..87a8dc74
--- /dev/null
+++ b/src/surrogate/application-surrogate.c
@@ -0,0 +1,74 @@
+#include "surrogate/application-surrogate.h"
+#include <ross-extern.h>
+
+static struct app_iteration_predictor * iter_predictor;
+static int every_n_gvt = 1;
+static enum {
+    PRE_JUMP = 0,
+    POST_JUMP_switched,  // Switched to surrogate-mode
+    POST_JUMP_skipped,   // Did not switch, and skipping until next application finishes
+} director_state;
+
+#ifdef USE_RAND_TIEBREAKER
+#define gvt_for(pe) (pe->GVT_sig.recv_ts)
+#else
+#define gvt_for(pe) (pe->GVT)
+#endif
+
+#define master_printf(str, ...) if (g_tw_mynode == 0) { printf(str, __VA_ARGS__); }
+
+static void application_director_pre_switch(tw_pe * pe) {
+    if (!iter_predictor->director.is_predictor_ready()) {
+        return;
+    }
+    struct fast_forward_values jump_to = iter_predictor->director.prepare_fast_forward_jump();
+    double const restarting_at = jump_to.restarting_at > gvt_for(pe) ? jump_to.restarting_at : gvt_for(pe);
+    switch (jump_to.status) {
+        case FAST_FORWARD_switching:
+            tw_trigger_gvt_hook_at(restarting_at + 1); // + 1 to force director to run right after we have fully fast-forward
+            master_printf("Triggering switch to application iteration surrogate mode at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe));
+            director_state = POST_JUMP_switched;
+        break;
+
+        case FAST_FORWARD_restart:
+            tw_trigger_gvt_hook_at(restarting_at + 1); // + 1 to force director to run right after we have fully fast-forward
+            director_state = POST_JUMP_skipped;
+        break;
+    }
+}
+
+static void application_director_post_switch(tw_pe * pe) {
+    tw_trigger_gvt_hook_every(every_n_gvt);
+    iter_predictor->director.reset();
+
+    if (director_state == POST_JUMP_switched) {
+        master_printf("Back to full high-fidelity application iteration mode at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe));
+    } else {
+        master_printf("Resetting predictor at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe));
+    }
+    director_state = PRE_JUMP;
+}
+
+void application_director(tw_pe * pe) {
+    // Director is not called if the simulation has ended
+    if (gvt_for(pe) >= g_tw_ts_end) {
+        return;
+    }
+    switch (director_state) {
+        case PRE_JUMP:
+            application_director_pre_switch(pe);
+        break;
+        case POST_JUMP_switched:
+        case POST_JUMP_skipped:
+            application_director_post_switch(pe);
+        break;
+    }
+}
+
+void application_director_configure(int every_n_gvt_, struct app_iteration_predictor * iter_predictor_) {
+    every_n_gvt = every_n_gvt_;
+    iter_predictor = iter_predictor_;
+    g_tw_gvt_hook = application_director;
+    director_state = PRE_JUMP;
+    tw_trigger_gvt_hook_every(every_n_gvt);
+}
diff --git a/src/surrogate/init.c b/src/surrogate/init.c
index 63f9ff89..0a7386a4 100644
--- a/src/surrogate/init.c
+++ b/src/surrogate/init.c
@@ -1,5 +1,7 @@
 #include <codes/surrogate/init.h>
 #include <codes/surrogate/packet-latency-predictor/average.h>
+#include <codes/surrogate/application-surrogate.h>
+#include <codes/surrogate/app-iteration-predictor/average.h>
 
 #ifdef USE_TORCH
 #include <codes/surrogate/packet-latency-predictor/torch-jit.h>
@@ -10,6 +12,7 @@ struct network_surrogate_config net_surr_config = {0};
 bool is_network_surrogate_configured = false;
 struct switch_at_struct switch_network_at;
 static struct packet_latency_predictor current_net_predictor = {0};
+static struct app_iteration_predictor current_iter_predictor = {0};
 
 
 // === Stats!
@@ -149,4 +152,25 @@ void network_surrogate_configure(
         fprintf(stderr, "Simulation starting on %s mode\n", net_surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity");
     }
 }
+
+void application_surrogate_configure(
+    int num_terminals_in_pe,
+    int num_apps,
+    struct app_iteration_predictor ** iter_pred //!< pointer to save application iteration predictor. Caller must free it
+) {
+    // TODO: get configuration settings from common configuration file settings
+    struct avg_app_config predictor_config = {
+        .num_apps = num_apps,
+        .num_nodes_in_pe = num_terminals_in_pe,
+        .num_of_iters_to_feed = 5,
+    };
+    int every_n_gvt = 100;
+    current_iter_predictor = avg_app_iteration_predictor(&predictor_config);
+    application_director_configure(every_n_gvt, &current_iter_predictor);
+    *iter_pred = &current_iter_predictor;
+}
+
+void free_application_surrogate(void) {
+    free_avg_app_iteration_predictor();
+}
 // === END OF All things Surrogate Configuration

From 53f51c478625ebf439b338afeb826c77ab3cd4ae Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 9 Jun 2025 12:17:14 -0400
Subject: [PATCH 140/188] Fixing bug on predictor when app is not fully
 distributed across all PEs

An app/workload was assumed to be distributed on all PEs, which is not
always true. Now it is possible to have a workload in one part of the PE
space than others.
---
 .../app-iteration-predictor/average.c         | 59 ++++++++++++++++---
 1 file changed, 52 insertions(+), 7 deletions(-)

diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c
index 77dfc954..f8105b2f 100644
--- a/src/surrogate/app-iteration-predictor/average.c
+++ b/src/surrogate/app-iteration-predictor/average.c
@@ -4,6 +4,8 @@
 #include <limits.h>
 #include <math.h>
 
+#define master_printf(str, ...) if (g_tw_mynode == 0) { printf(str, __VA_ARGS__); }
+
 static struct avg_app_config my_config = {0};
 
 struct node_data {
@@ -56,7 +58,7 @@ static void init(tw_lp * lp, int nw_id_in_pe, struct app_iter_node_config * conf
 
     // Storing app data info
     arr_app_data[config->app_id].num_nodes++;
-    if (arr_app_data[config->app_id].ending_iteration == -1) {
+    if (arr_app_data[config->app_id].ending_iteration == INT_MIN) {
         arr_app_data[config->app_id].ending_iteration = config->app_ending_iter;
     } else {
         if (arr_app_data[config->app_id].ending_iteration != config->app_ending_iter) {
@@ -74,6 +76,9 @@ static void feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) {
     if (node_data->last_iter >= iter) { // we only collect iteration data past the previous `last_iter`
         return;
     }
+    if (arr_app_data[node_data->app_id].ended != ENDED_STATUS_running) {
+        tw_warning(TW_LOC, "Attempting to feed data to application predictor for an application that has either been marked as completed or not configured");
+    }
     node_data->acc_iteration_time += iteration_time - node_data->prev_iteration_time;
     node_data->prev_iteration_time = iteration_time;
     node_data->acc_iters++;
@@ -117,8 +122,10 @@ static void reset_with(bool const * app_just_ended) {
         struct node_data * node_data = &arr_node_data[i];
         node_data->acc_iters = 0;
         node_data->acc_iteration_time = 0;
-        node_data->last_iter = last_iter[node_data->app_id];
-        node_data->prev_iteration_time = arr_app_data[node_data->app_id].pred.restart_at;
+        if (node_data->last_iter < arr_app_data[node_data->app_id].pred.resume_at_iter) {
+            node_data->last_iter = last_iter[node_data->app_id];
+            node_data->prev_iteration_time = arr_app_data[node_data->app_id].pred.restart_at;
+        }
     }
     for (int i=0; i < my_config.num_apps; i++) {
         arr_app_data[i].nodes_with_enough_iters = 0;
@@ -141,6 +148,35 @@ static bool have_we_hit_switch(tw_lp * lp, int nw_id_in_pe, int iteration_id) {
     return false;
 }
 
+static inline void post_init_share_ending_iteration(void) {
+    // Sharing ending_iteration results across PEs
+    int ending_iteration_here[my_config.num_apps];
+    for (int i = 0; i < my_config.num_apps; i++) {
+        ending_iteration_here[i] = arr_app_data[i].ending_iteration;
+    }
+    int ending_iteration[my_config.num_apps];
+    if(MPI_Allreduce(ending_iteration_here, ending_iteration, my_config.num_apps, MPI_INT, MPI_MAX, MPI_COMM_CODES) != MPI_SUCCESS) {
+        tw_error(TW_LOC, "MPI_Allreduce call failed!");
+    }
+
+    // Checking that total iterations are the same across nodes
+    for (int i = 0; i < my_config.num_apps; i++) {
+        struct app_data * app_data = &arr_app_data[i];
+        if (app_data->ending_iteration == INT_MIN) {
+            if (ending_iteration[i] == INT_MIN) {
+                app_data->ended = ENDED_STATUS_ended_everywhere;
+                master_printf("Workload/app %d has not been configured to be tracked by iteration predictor (it might be a synthetic workload)\n", i);
+            } else {
+                // The application has "completed" in this PE already!
+                app_data->ended = ENDED_STATUS_just_ended;
+            }
+            app_data->ending_iteration = ending_iteration[i];
+        } else if (ending_iteration[i] != app_data->ending_iteration) {
+            tw_error(TW_LOC, "Two different ranks for application %d (on different PEs) have differing total iterations they will run (%d != %d)", i, ending_iteration[i], app_data->ending_iteration);
+        }
+    }
+}
+
 static inline bool has_any_app_ended(bool * save_app_just_ended) {
     // Checking any application has fully ended, in which case we have to restart collecting data
     bool app_just_ended_here[my_config.num_apps];
@@ -175,7 +211,9 @@ static inline bool has_everyone_accumulated_enough() {
     for (int i = 0; i < my_config.num_apps; i++) {
         struct app_data * app_data = &arr_app_data[i];
         // ignoring apps that have ended already
-        if (app_data->ended != ENDED_STATUS_ended_everywhere) {
+        bool const app_in_pe = app_data->num_nodes > 0;
+        bool const hasnt_ended = app_data->completed != ENDED_STATUS_ended_everywhere;
+        if (app_in_pe && hasnt_ended) {
             everyone &= app_data->nodes_with_enough_iters == app_data->num_nodes;
         }
     }
@@ -183,6 +221,11 @@ static inline bool has_everyone_accumulated_enough() {
 }
 
 static bool is_predictor_ready(void) {
+    static bool post_init_done = false;
+    if (!post_init_done) {
+        post_init_share_ending_iteration();
+        post_init_done = true;
+    }
     bool app_just_ended[my_config.num_apps];
     if (has_any_app_ended(app_just_ended)) {
         reset_with(app_just_ended);
@@ -241,7 +284,7 @@ static void find_avg_iteration_time(double * save_avg_time) {
 static void find_max_iter_per_app(int * save_last_iter) {
     int last_iter_here[my_config.num_apps];
     for (int i=0; i < my_config.num_apps; i++) {
-        last_iter_here[i] = -1;
+        last_iter_here[i] = INT_MIN;
     }
     for (int i=0; i < my_config.num_nodes_in_pe; i++) {
         struct node_data * node_data = &arr_node_data[i];
@@ -366,10 +409,12 @@ struct app_iteration_predictor avg_app_iteration_predictor(struct avg_app_config
     arr_node_data = calloc(my_config.num_nodes_in_pe, sizeof(struct node_data));
     arr_app_data = calloc(my_config.num_apps, sizeof(struct app_data));
     for (int i=0; i < my_config.num_nodes_in_pe; i++) {
-        arr_node_data[i].app_id = -1;
+        struct node_data * node_data = &arr_node_data[i];
+        node_data->app_id = -1;
+        node_data->last_iter = INT_MIN;
     }
     for (int i=0; i < my_config.num_apps; i++) {
-        arr_app_data[i].ending_iteration = -1;
+        arr_app_data[i].ending_iteration = INT_MIN;
     }
     return (struct app_iteration_predictor) {
         .model = {

From ffea77be9bd27a5281bbaa44da5b1f3edc1ac95b Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 9 Jun 2025 12:20:25 -0400
Subject: [PATCH 141/188] Refactoring/renaming some fields to aid legibility

---
 .../app-iteration-predictor/average.c         | 64 +++++++++----------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c
index f8105b2f..0ecfcc89 100644
--- a/src/surrogate/app-iteration-predictor/average.c
+++ b/src/surrogate/app-iteration-predictor/average.c
@@ -17,10 +17,10 @@ struct node_data {
 };
 static struct node_data * arr_node_data = NULL; // array containing info for all nodes
 
-enum ENDED_STATUS {
-    ENDED_STATUS_running = 0,
-    ENDED_STATUS_just_ended,       // fully ended in this PE
-    ENDED_STATUS_ended_everywhere, // fully ended on all PEs
+enum APP_STATUS {
+    APP_STATUS_running = 0,
+    APP_STATUS_just_completed,       // fully ended in this PE
+    APP_STATUS_completed_everywhere, // fully ended on all PEs
 };
 
 struct app_data {
@@ -28,7 +28,7 @@ struct app_data {
     int nodes_with_enough_iters;
     int ending_iteration;  // last iteration the simulation will run (aka, num of iterations)
     int nodes_that_have_ended;
-    enum ENDED_STATUS ended;  // use ended to stop accumulating data
+    enum APP_STATUS status;  // use ended to stop accumulating data
     // To be used when called by the model. Set by `prepare_fast_forward_jump`
     struct {
         int jump_at_iter;
@@ -46,7 +46,7 @@ static inline int app_id_for(int nw_id_in_pe) {
 }
 
 
-static void init(tw_lp * lp, int nw_id_in_pe, struct app_iter_node_config * config) {
+static void model_calls_init(tw_lp * lp, int nw_id_in_pe, struct app_iter_node_config * config) {
     assert(arr_node_data);
     if (my_config.num_nodes_in_pe <= nw_id_in_pe) {
         tw_error(TW_LOC, "Node id relative to PE (%d) is larger than the number of nodes %d", nw_id_in_pe, my_config.num_nodes_in_pe);
@@ -68,7 +68,7 @@ static void init(tw_lp * lp, int nw_id_in_pe, struct app_iter_node_config * conf
 }
 
 
-static void feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) {
+static void model_calls_feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) {
     (void) lp;
     assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe);
     assert(app_id_for(nw_id_in_pe) != -1);
@@ -76,7 +76,7 @@ static void feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) {
     if (node_data->last_iter >= iter) { // we only collect iteration data past the previous `last_iter`
         return;
     }
-    if (arr_app_data[node_data->app_id].ended != ENDED_STATUS_running) {
+    if (arr_app_data[node_data->app_id].status != APP_STATUS_running) {
         tw_warning(TW_LOC, "Attempting to feed data to application predictor for an application that has either been marked as completed or not configured");
     }
     node_data->acc_iteration_time += iteration_time - node_data->prev_iteration_time;
@@ -90,17 +90,17 @@ static void feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) {
 }
 
 
-static void ended(tw_lp * lp, int nw_id_in_pe, double iteration_time) {
+static void model_calls_ended(tw_lp * lp, int nw_id_in_pe, double iteration_time) {
     assert(app_id_for(nw_id_in_pe) != -1);
     struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)];
     app_data->nodes_that_have_ended++;
     if (app_data->nodes_that_have_ended == app_data->num_nodes) {
-        app_data->ended = ENDED_STATUS_just_ended;
+        app_data->status = APP_STATUS_just_completed;
     }
 }
 
 
-static struct iteration_pred predict(tw_lp * lp, int nw_id_in_pe) {
+static struct iteration_pred model_calls_predict(tw_lp * lp, int nw_id_in_pe) {
     assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe);
     assert(app_id_for(nw_id_in_pe) != -1);
     struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)];
@@ -110,7 +110,7 @@ static struct iteration_pred predict(tw_lp * lp, int nw_id_in_pe) {
     };
 }
 
-static void predict_rc(tw_lp * lp, int nw_id_in_pe) {}
+static void model_calls_predict_rc(tw_lp * lp, int nw_id_in_pe) {}
 
 static void reset_with(bool const * app_just_ended) {
     ready_to_skip = false;
@@ -134,12 +134,12 @@ static void reset_with(bool const * app_just_ended) {
     // If an app just fully ended (ended on all PEs but hasn't been cleaned) then clean it
     for (int i = 0; i < my_config.num_apps; i++) {
         if (app_just_ended[i]) {
-            arr_app_data[i].ended = ENDED_STATUS_ended_everywhere;
+            arr_app_data[i].status = APP_STATUS_completed_everywhere;
         }
     }
 }
 
-static bool have_we_hit_switch(tw_lp * lp, int nw_id_in_pe, int iteration_id) {
+static bool model_calls_have_we_hit_switch(tw_lp * lp, int nw_id_in_pe, int iteration_id) {
     assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe);
     int const app_id = app_id_for(nw_id_in_pe);
     if (ready_to_skip && iteration_id == arr_app_data[app_id].pred.jump_at_iter) {
@@ -164,11 +164,11 @@ static inline void post_init_share_ending_iteration(void) {
         struct app_data * app_data = &arr_app_data[i];
         if (app_data->ending_iteration == INT_MIN) {
             if (ending_iteration[i] == INT_MIN) {
-                app_data->ended = ENDED_STATUS_ended_everywhere;
+                app_data->status = APP_STATUS_completed_everywhere;
                 master_printf("Workload/app %d has not been configured to be tracked by iteration predictor (it might be a synthetic workload)\n", i);
             } else {
                 // The application has "completed" in this PE already!
-                app_data->ended = ENDED_STATUS_just_ended;
+                app_data->status = APP_STATUS_just_completed;
             }
             app_data->ending_iteration = ending_iteration[i];
         } else if (ending_iteration[i] != app_data->ending_iteration) {
@@ -182,7 +182,7 @@ static inline bool has_any_app_ended(bool * save_app_just_ended) {
     bool app_just_ended_here[my_config.num_apps];
     for (int i = 0; i < my_config.num_apps; i++) {
         struct app_data * app_data = &arr_app_data[i];
-        app_just_ended_here[i] = app_data->ended == ENDED_STATUS_just_ended;
+        app_just_ended_here[i] = app_data->status == APP_STATUS_just_completed;
     }
     if(MPI_Allreduce(&app_just_ended_here, save_app_just_ended, my_config.num_apps, MPI_C_BOOL, MPI_LAND, MPI_COMM_CODES) != MPI_SUCCESS) {
         tw_error(TW_LOC, "MPI_Allreduce call failed!");
@@ -198,7 +198,7 @@ static inline bool has_any_app_ended(bool * save_app_just_ended) {
 static inline bool all_apps_ended(void) {
     for (int i = 0; i < my_config.num_apps; i++) {
         struct app_data * app_data = &arr_app_data[i];
-        if (app_data->ended != ENDED_STATUS_ended_everywhere) {
+        if (app_data->status != APP_STATUS_completed_everywhere) {
             return false;
         }
     }
@@ -212,7 +212,7 @@ static inline bool has_everyone_accumulated_enough() {
         struct app_data * app_data = &arr_app_data[i];
         // ignoring apps that have ended already
         bool const app_in_pe = app_data->num_nodes > 0;
-        bool const hasnt_ended = app_data->completed != ENDED_STATUS_ended_everywhere;
+        bool const hasnt_ended = app_data->status != APP_STATUS_completed_everywhere;
         if (app_in_pe && hasnt_ended) {
             everyone &= app_data->nodes_with_enough_iters == app_data->num_nodes;
         }
@@ -220,7 +220,7 @@ static inline bool has_everyone_accumulated_enough() {
     return everyone;
 }
 
-static bool is_predictor_ready(void) {
+static bool director_calls_is_predictor_ready(void) {
     static bool post_init_done = false;
     if (!post_init_done) {
         post_init_share_ending_iteration();
@@ -246,7 +246,7 @@ static bool is_predictor_ready(void) {
 }
 
 
-static void reset(void) {
+static void director_calls_reset(void) {
     bool app_just_ended[my_config.num_apps];
     has_any_app_ended(app_just_ended);
     reset_with(app_just_ended);
@@ -327,11 +327,11 @@ static void find_avg_time_for_max_iter(double * save_last_iter_time, int const *
     }
 }
 
-static struct fast_forward_values prepare_fast_forward_jump(void) {
+static struct fast_forward_values director_calls_prepare_fast_forward_jump(void) {
     // 0. Check if app is still running
     bool is_running[my_config.num_apps];
     for (int i=0; i < my_config.num_apps; i++) {
-        is_running[i] = arr_app_data[i].ended != ENDED_STATUS_ended_everywhere;
+        is_running[i] = arr_app_data[i].status != APP_STATUS_completed_everywhere;
     }
     // 1. Compute end time for each application given current data (pick smallest)
     //   a. Find avg iteration per app
@@ -418,17 +418,17 @@ struct app_iteration_predictor avg_app_iteration_predictor(struct avg_app_config
     }
     return (struct app_iteration_predictor) {
         .model = {
-            .init = init,
-            .feed = feed,
-            .ended = ended,
-            .predict = predict,
-            .predict_rc = predict_rc,
-            .have_we_hit_switch = have_we_hit_switch,
+            .init = model_calls_init,
+            .feed = model_calls_feed,
+            .ended = model_calls_ended,
+            .predict = model_calls_predict,
+            .predict_rc = model_calls_predict_rc,
+            .have_we_hit_switch = model_calls_have_we_hit_switch,
         },
         .director = {
-            .reset = reset,
-            .is_predictor_ready = is_predictor_ready,
-            .prepare_fast_forward_jump = prepare_fast_forward_jump,
+            .reset = director_calls_reset,
+            .is_predictor_ready = director_calls_is_predictor_ready,
+            .prepare_fast_forward_jump = director_calls_prepare_fast_forward_jump,
         }
     };
 }

From 763a71f2640026d09afd548c8d6e0199b2728e3f Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 9 Jun 2025 12:21:55 -0400
Subject: [PATCH 142/188] De-harcoding parameters passed down by
 model-net-mpi-replay at init

---
 codes/codes_mapping.h                        |  3 +++
 src/network-workloads/model-net-mpi-replay.c |  5 +++--
 src/util/codes_mapping.c                     | 15 +++++++++++++++
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/codes/codes_mapping.h b/codes/codes_mapping.h
index 0e46447b..6d83098c 100644
--- a/codes/codes_mapping.h
+++ b/codes/codes_mapping.h
@@ -23,6 +23,9 @@ extern "C" {
 /* Returns number of LPs on the current PE */
 int codes_mapping_get_lps_for_pe(void);
 
+/* Returns the number of LPs of the given type name */
+tw_lpid codes_mapping_count_lps_of_type(char const lp_type_name[MAX_NAME_LENGTH]);
+
 /* Takes the global LP ID and returns the rank (PE id) on which the LP is mapped.*/
 tw_peid codes_mapping( tw_lpid gid);
 
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 2d0edd45..a75020e4 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -4454,8 +4454,9 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
         assert(ret == 0 || !"lp_io_prepare failure");
     }
 
-   // TODO: read from config whether to load iterator predictor
-   application_surrogate_configure(24, 2, &iter_predictor);
+   tw_lpid const num_nw_lps_in_pe = codes_mapping_count_lps_of_type("nw-lp");
+   int const num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx);
+   application_surrogate_configure(num_nw_lps_in_pe, num_jobs, &iter_predictor);
 
    tw_run();
 
diff --git a/src/util/codes_mapping.c b/src/util/codes_mapping.c
index 9a8554ba..359a6622 100644
--- a/src/util/codes_mapping.c
+++ b/src/util/codes_mapping.c
@@ -519,6 +519,21 @@ static void codes_mapping_init(void)
      return;
 }
 
+tw_lpid codes_mapping_count_lps_of_type(char const lp_type_name[MAX_NAME_LENGTH])
+{
+    tw_lpid count = 0;
+    for (tw_lpid lpid = 0; lpid < g_tw_nlp; lpid ++) {
+        tw_lpid ross_gid = g_tw_lp[lpid]->gid;
+        int grp_id, lpt_id, rep_id, offset;
+        char this_lp_type[MAX_NAME_LENGTH];
+        codes_mapping_get_lp_info(ross_gid, NULL, &grp_id, this_lp_type, &lpt_id, NULL, &rep_id, &offset);  // This lookup could be speed up, but making this call is far simpler rn
+        if (strncmp(lp_type_name, this_lp_type, MAX_NAME_LENGTH)) {
+            count++;
+        }
+    }
+    return count;
+}
+
 /* This function takes the global LP ID, maps it to the local LP ID and returns the LP 
  * lps have global and local LP IDs
  * global LP IDs are unique across all PEs, local LP IDs are unique within a PE */

From 86c25cde28d81fdc38033e374393dfbef1dada43 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 9 Jun 2025 13:05:27 -0400
Subject: [PATCH 143/188] Configuring application surrogate through config file

---
 .../app-iteration-predictor/average.h         |  2 +-
 src/network-workloads/model-net-mpi-replay.c  | 25 ++++++++++++++++---
 .../app-iteration-predictor/average.c         |  2 +-
 src/surrogate/init.c                          |  8 ++++--
 4 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/codes/surrogate/app-iteration-predictor/average.h b/codes/surrogate/app-iteration-predictor/average.h
index 0ec22283..0d3ace0d 100644
--- a/codes/surrogate/app-iteration-predictor/average.h
+++ b/codes/surrogate/app-iteration-predictor/average.h
@@ -11,7 +11,7 @@
 struct avg_app_config {
     int num_apps;
     int num_nodes_in_pe;
-    int num_of_iters_to_feed;
+    int num_iters_to_collect;
 };
 
 struct app_iteration_predictor avg_app_iteration_predictor(struct avg_app_config *);
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index a75020e4..6c6b0c72 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -4103,6 +4103,27 @@ void modelnet_mpi_replay_read_config()
 }
 
 
+void modelnet_mpi_replay_configure_app_surrogate()
+{
+    char app_surrogate_test[MAX_NAME_LENGTH];
+    app_surrogate_test[0] = '\0';
+    int app_surrogate_len = configuration_get_value(&config, "APPLICATION_SURROGATE", "enable", NULL, app_surrogate_test, MAX_NAME_LENGTH);
+
+    // Only configure if APPLICATION_SURROGATE is present and enabled
+    if (app_surrogate_len == 0 || atoi(app_surrogate_test) == 0) {
+        return;
+    }
+
+    tw_lpid const num_nw_lps_in_pe = codes_mapping_count_lps_of_type("nw-lp");
+    int const num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx);
+    application_surrogate_configure(num_nw_lps_in_pe, num_jobs, &iter_predictor);
+
+    if (g_tw_mynode == 0) {
+        printf("Application surrogacy configured with a total of %d jobs\n", num_jobs);
+    }
+}
+
+
 int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
 {
   int rank;
@@ -4454,9 +4475,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
         assert(ret == 0 || !"lp_io_prepare failure");
     }
 
-   tw_lpid const num_nw_lps_in_pe = codes_mapping_count_lps_of_type("nw-lp");
-   int const num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx);
-   application_surrogate_configure(num_nw_lps_in_pe, num_jobs, &iter_predictor);
+   modelnet_mpi_replay_configure_app_surrogate();
 
    tw_run();
 
diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c
index 0ecfcc89..6fc254a6 100644
--- a/src/surrogate/app-iteration-predictor/average.c
+++ b/src/surrogate/app-iteration-predictor/average.c
@@ -84,7 +84,7 @@ static void model_calls_feed(tw_lp * lp, int nw_id_in_pe, int iter, double itera
     node_data->acc_iters++;
     node_data->last_iter = iter;
     // We've hit the required number of iterations to feed our predictor
-    if (node_data->acc_iters == my_config.num_of_iters_to_feed) {
+    if (node_data->acc_iters == my_config.num_iters_to_collect) {
         arr_app_data[node_data->app_id].nodes_with_enough_iters++;
     }
 }
diff --git a/src/surrogate/init.c b/src/surrogate/init.c
index 0a7386a4..47ccc2db 100644
--- a/src/surrogate/init.c
+++ b/src/surrogate/init.c
@@ -158,11 +158,15 @@ void application_surrogate_configure(
     int num_apps,
     struct app_iteration_predictor ** iter_pred //!< pointer to save application iteration predictor. Caller must free it
 ) {
-    // TODO: get configuration settings from common configuration file settings
+    char num_iters_str[MAX_NAME_LENGTH];
+    num_iters_str[0] = '\0';
+    int const rc = configuration_get_value(&config, "APPLICATION_SURROGATE", "num_iters_to_collect", NULL, num_iters_str, MAX_NAME_LENGTH);
+    int const num_of_iters_to_feed = (rc > 0) ? atoi(num_iters_str) : 5; // default to 5 if not specified
+
     struct avg_app_config predictor_config = {
         .num_apps = num_apps,
         .num_nodes_in_pe = num_terminals_in_pe,
-        .num_of_iters_to_feed = 5,
+        .num_iters_to_collect = num_of_iters_to_feed,
     };
     int every_n_gvt = 100;
     current_iter_predictor = avg_app_iteration_predictor(&predictor_config);

From fa56d854bacfd9923bf9e4915863fb7f78ed718f Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 10 Jun 2025 10:50:34 -0400
Subject: [PATCH 144/188] Passing data from non-synthetic workloads to CODES
 through interface

The extended interface asks the workload generator to set extra
information in order to run in application-surrogate mode. If this
information is not given, the surrogate will not run.
---
 codes/codes-workload.h                        |  4 ++
 src/network-workloads/model-net-mpi-replay.c  | 15 ++--
 .../app-iteration-predictor/average.c         |  4 +-
 src/workload/codes-workload.c                 |  7 ++
 .../methods/codes-conc-online-comm-wrkld.C    | 71 ++++++++++++++++++-
 .../methods/codes-online-comm-wrkld.C         | 51 ++++++++++++-
 6 files changed, 143 insertions(+), 9 deletions(-)

diff --git a/codes/codes-workload.h b/codes/codes-workload.h
index 4722b5a4..e83ef3d1 100644
--- a/codes/codes-workload.h
+++ b/codes/codes-workload.h
@@ -354,6 +354,9 @@ int codes_workload_get_time(const char *type,
 		int app_id,
 		int rank, double *read_time, double *write_time, int64_t *read_bytes, int64_t *written_bytes);
 
+// Returns the final iteration (positive) after which the workload will stop. If the result is -1, then there is nothing to do
+int codes_workload_get_final_iteration(int wkld_id, int app_id, int rank);
+
 /* implementation structure */
 struct codes_workload_method
 {
@@ -368,6 +371,7 @@ struct codes_workload_method
     int (*codes_workload_finalize)(const char* params, int app_id, int rank);
     /* added for get all read or write time */
     int (*codes_workload_get_time)(const char * params, int app_id, int rank, double *read_time, double *write_time, int64_t *read_bytes, int64_t *written_bytes);
+    int (*codes_workload_get_final_iteration)(int app_id, int rank);
 };
 
 
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 6c6b0c72..0468663c 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -2781,11 +2781,16 @@ void nw_test_init(nw_state* s, tw_lp* lp)
        s->switch_config_size = 0;
    }
    if (iter_predictor && !am_i_synthetic) {
-        struct app_iter_node_config conf = {
-            .app_id = s->app_id,
-            .app_ending_iter = s->app_id ? 19 : 20,
-        };
-        iter_predictor->model.init(lp, s->nw_id_in_pe, &conf);
+        int const ending_iter = codes_workload_get_final_iteration(s->wrkld_id, s->app_id, s->local_rank);
+        if (ending_iter == -1) {
+            tw_warning(TW_LOC, "Predictor for non-synthetic job cannot be initialized. app id=%d", s->app_id);
+        } else {
+            struct app_iter_node_config conf = {
+                .app_id = s->app_id,
+                .app_ending_iter = ending_iter,
+            };
+            iter_predictor->model.init(lp, s->nw_id_in_pe, &conf);
+        }
    }
 
    return;
diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c
index 6fc254a6..5f04e846 100644
--- a/src/surrogate/app-iteration-predictor/average.c
+++ b/src/surrogate/app-iteration-predictor/average.c
@@ -71,7 +71,9 @@ static void model_calls_init(tw_lp * lp, int nw_id_in_pe, struct app_iter_node_c
 static void model_calls_feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) {
     (void) lp;
     assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe);
-    assert(app_id_for(nw_id_in_pe) != -1);
+    if (app_id_for(nw_id_in_pe) == -1) {
+        tw_error(TW_LOC, "Predictor for node was not initialized! Node ID (on PE) %d", nw_id_in_pe);
+    }
     struct node_data * node_data = &arr_node_data[nw_id_in_pe];
     if (node_data->last_iter >= iter) { // we only collect iteration data past the previous `last_iter`
         return;
diff --git a/src/workload/codes-workload.c b/src/workload/codes-workload.c
index 45efc8c0..45657be3 100644
--- a/src/workload/codes-workload.c
+++ b/src/workload/codes-workload.c
@@ -368,6 +368,13 @@ int codes_workload_get_rank_cnt(
     return(-1);
 }
 
+int codes_workload_get_final_iteration(int wkld_id, int app_id, int rank) {
+    if (method_array[wkld_id]->codes_workload_get_final_iteration) {
+        return method_array[wkld_id]->codes_workload_get_final_iteration(app_id, rank);
+    }
+    return -1;
+}
+
 void codes_workload_print_op(
         FILE *f,
         struct codes_workload_op *op,
diff --git a/src/workload/methods/codes-conc-online-comm-wrkld.C b/src/workload/methods/codes-conc-online-comm-wrkld.C
index 7cb36466..a78f9abf 100644
--- a/src/workload/methods/codes-conc-online-comm-wrkld.C
+++ b/src/workload/methods/codes-conc-online-comm-wrkld.C
@@ -73,6 +73,10 @@ struct shared_context {
     bool isconc;
     ABT_thread      producer;
     std::deque<struct codes_workload_op*> fifo;
+    struct {
+        bool received;
+        int final_iteration;
+    } init_data_from_workload;
 };
 
 struct rank_mpi_context {
@@ -86,8 +90,26 @@ typedef struct rank_mpi_compare {
     int rank;
 } rank_mpi_compare;
 
-
 /* Conceptual online workload implementations */
+
+void UNION_Pass_app_data(struct union_app_data * app_data) {
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err;
+
+    err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+
+    sctx->init_data_from_workload.received = true;
+    sctx->init_data_from_workload.final_iteration = app_data->final_iteration;
+
+    ABT_thread_yield_to(global_prod_thread);
+}
+
 void UNION_MPI_Comm_size (UNION_Comm comm, int *size) 
 {
     /* Retreive the shared context state */
@@ -1014,6 +1036,21 @@ void UNION_MPI_Alltoall(const void *sendbuf,
 
 //#ifdef USE_SWM
 
+void SWM_Pass_app_data(struct swm_app_data *app_data) {
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    sctx->init_data_from_workload.received = true;
+    sctx->init_data_from_workload.final_iteration = app_data->final_iteration;
+
+    ABT_thread_yield_to(global_prod_thread);
+}
+
 /*
  * peer: the receiving peer id 
  * comm_id: the communicator id being used
@@ -1814,6 +1851,7 @@ static int comm_online_workload_load(const void * params, int app_id, int rank)
     my_ctx->sctx.num_ranks = nprocs;
     my_ctx->sctx.wait_id = 0;
     my_ctx->app_id = app_id;
+    my_ctx->sctx.init_data_from_workload.received = false;
 
     // printf("my_ctx nprocs %d\n", my_ctx->sctx.num_ranks);
 
@@ -1943,6 +1981,12 @@ static int comm_online_workload_load(const void * params, int app_id, int rank)
             &workload_caller, (void*)&(my_ctx->sctx),
             ABT_THREAD_ATTR_NULL, &(my_ctx->sctx.producer));
 
+    // Running thread that we just spawn until the producer adds an OP to FIFO or SWM_Mark_total_iterations is called. We use SWM_Mark_total_iterations in order to pass information into CODES from the SWM app.
+    while(my_ctx->sctx.fifo.empty() && !my_ctx->sctx.init_data_from_workload.received)
+    {
+        ABT_thread_yield_to(my_ctx->sctx.producer);
+    }
+
     if(DBG_LINKING)
     {
         printf("\nRank %d create app thread? %d", rank, rcode);
@@ -2049,6 +2093,25 @@ static int comm_online_workload_finalize(const char* params, int app_id, int ran
     }                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                
     return 0;
 }
+
+static int comm_online_workload_get_final_iteration(int app_id, int rank) {
+    rank_mpi_compare cmp;
+    cmp.app_id = app_id;
+    cmp.rank = rank;
+
+    struct qhash_head * hash_link = qhash_search(rank_tbl, &cmp);
+    if(!hash_link)
+    {
+        printf("Workload/job not found for rank id %d, and app_id %d\n", rank, app_id);
+        return -1;
+    }
+    rank_mpi_context * ctx = qhash_entry(hash_link, rank_mpi_context, hash_link);
+    if (ctx->sctx.init_data_from_workload.received) {
+        return ctx->sctx.init_data_from_workload.final_iteration;
+    }
+    return -1;
+}
+
 extern "C" {
 /* workload method name and function pointers for the CODES workload API */
 struct codes_workload_method conc_online_comm_workload_method =
@@ -2066,7 +2129,11 @@ struct codes_workload_method conc_online_comm_workload_method =
     // .codes_workload_get_rank_cnt
     comm_online_workload_get_rank_cnt,
     // .codes_workload_finalize = 
-    comm_online_workload_finalize
+    comm_online_workload_finalize,
+    // .codes_workload_get_time =
+    NULL,
+    // .codes_workload_get_final_iteration =
+    comm_online_workload_get_final_iteration,
 };
 } // closing brace for extern "C"
 
diff --git a/src/workload/methods/codes-online-comm-wrkld.C b/src/workload/methods/codes-online-comm-wrkld.C
index ca6978c6..8d783403 100644
--- a/src/workload/methods/codes-online-comm-wrkld.C
+++ b/src/workload/methods/codes-online-comm-wrkld.C
@@ -66,6 +66,10 @@ struct shared_context {
     void * swm_obj;
     ABT_thread      producer;
     std::deque<struct codes_workload_op*> fifo;
+    struct {
+        bool received;
+        int final_iteration;
+    } init_data_from_workload;
 };
 
 struct rank_mpi_context {
@@ -79,6 +83,21 @@ typedef struct rank_mpi_compare {
     int rank;
 } rank_mpi_compare;
 
+void SWM_Pass_app_data(struct swm_app_data *app_data) {
+    /* Retreive the shared context state */
+    ABT_thread prod;
+    void * arg;
+    int err = ABT_thread_self(&prod);
+    assert(err == ABT_SUCCESS);
+    err =  ABT_thread_get_arg(prod, &arg);
+    assert(err == ABT_SUCCESS);
+    struct shared_context * sctx = static_cast<shared_context*>(arg);
+    sctx->init_data_from_workload.received = true;
+    sctx->init_data_from_workload.final_iteration = app_data->final_iteration;
+
+    ABT_thread_yield_to(global_prod_thread);
+}
+
 /*
  * peer: the receiving peer id 
  * comm_id: the communicator id being used
@@ -946,6 +965,7 @@ static int comm_online_workload_load(const void * params, int app_id, int rank)
     my_ctx->sctx.num_ranks = nprocs;
     my_ctx->sctx.wait_id = 0;
     my_ctx->app_id = app_id;
+    my_ctx->sctx.init_data_from_workload.received = false;
 
     void** generic_ptrs;
     int array_len = 1;
@@ -1038,6 +1058,12 @@ static int comm_online_workload_load(const void * params, int app_id, int rank)
             &workload_caller, (void*)&(my_ctx->sctx),
             ABT_THREAD_ATTR_NULL, &(my_ctx->sctx.producer));
 
+    // Running thread that we just spawn until the producer adds an OP to FIFO or SWM_Mark_total_iterations is called. We use SWM_Mark_total_iterations in order to pass information into CODES from the SWM app.
+    while(my_ctx->sctx.fifo.empty() && !my_ctx->sctx.init_data_from_workload.received)
+    {
+        ABT_thread_yield_to(my_ctx->sctx.producer);
+    }
+
     rank_mpi_compare cmp;
     cmp.app_id = app_id;
     cmp.rank = rank;
@@ -1112,6 +1138,25 @@ static int comm_online_workload_finalize(const char* params, int app_id, int ran
     ABT_thread_free(&(temp_data->sctx.producer));
     return 0;
 }
+
+static int comm_online_workload_get_final_iteration(int app_id, int rank) {
+    rank_mpi_compare cmp;
+    cmp.app_id = app_id;
+    cmp.rank = rank;
+
+    struct qhash_head * hash_link = qhash_search(rank_tbl, &cmp);
+    if(!hash_link)
+    {
+        printf("Workload/job not found for rank id %d, and app_id %d\n", rank, app_id);
+        return -1;
+    }
+    rank_mpi_context * ctx = qhash_entry(hash_link, rank_mpi_context, hash_link);
+    if (ctx->sctx.init_data_from_workload.received) {
+        return ctx->sctx.init_data_from_workload.final_iteration;
+    }
+    return -1;
+}
+
 extern "C" {
 /* workload method name and function pointers for the CODES workload API */
 struct codes_workload_method swm_online_comm_workload_method =
@@ -1129,7 +1174,11 @@ struct codes_workload_method swm_online_comm_workload_method =
     // .codes_workload_get_rank_cnt
     comm_online_workload_get_rank_cnt,
     // .codes_workload_finalize = 
-    comm_online_workload_finalize
+    comm_online_workload_finalize,
+    // .codes_workload_get_time =
+    NULL,
+    // .codes_workload_get_final_iteration
+    comm_online_workload_get_final_iteration,
 };
 } // closing brace for extern "C"
 

From 2433b8ba4dfe41427ccfe1377fcdd525e67b96a2 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 10 Jun 2025 15:41:04 -0400
Subject: [PATCH 145/188] Allowing surrogate to run in sequential mode

---
 codes/surrogate/application-surrogate.h      |  17 +++-
 src/network-workloads/model-net-mpi-replay.c |   4 -
 src/surrogate/application-surrogate.c        |  25 ++++-
 src/surrogate/init.c                         | 100 ++++++++++++++++++-
 4 files changed, 134 insertions(+), 12 deletions(-)

diff --git a/codes/surrogate/application-surrogate.h b/codes/surrogate/application-surrogate.h
index 184a4f98..111df4ef 100644
--- a/codes/surrogate/application-surrogate.h
+++ b/codes/surrogate/application-surrogate.h
@@ -15,8 +15,23 @@
 extern "C" {
 #endif
 
+enum APP_DIRECTOR_OPTS {
+    APP_DIRECTOR_OPTS_every_n_gvt = 0, // Call director every `n` GVTs
+    APP_DIRECTOR_OPTS_call_every_ns,   // Call director every X (virtual) nanoseconds
+};
+
+struct application_director_config {
+    enum APP_DIRECTOR_OPTS option;
+    union {
+        // To use when APP_DIRECTOR_OPTS_every_n_gvt
+        int every_n_gvt;
+        // To use when APP_DIRECTOR_OPTS_call_every_ns
+        double call_every_ns;
+    };
+};
+
 // Main function responsible for switching between high-fidelity and (application iteration) surrogate
-void application_director_configure(int every_n_gvt, struct app_iteration_predictor *);
+void application_director_configure(struct application_director_config *, struct app_iteration_predictor *);
 
 #ifdef __cplusplus
 }
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 0468663c..4398e50e 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -4122,10 +4122,6 @@ void modelnet_mpi_replay_configure_app_surrogate()
     tw_lpid const num_nw_lps_in_pe = codes_mapping_count_lps_of_type("nw-lp");
     int const num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx);
     application_surrogate_configure(num_nw_lps_in_pe, num_jobs, &iter_predictor);
-
-    if (g_tw_mynode == 0) {
-        printf("Application surrogacy configured with a total of %d jobs\n", num_jobs);
-    }
 }
 
 
diff --git a/src/surrogate/application-surrogate.c b/src/surrogate/application-surrogate.c
index 87a8dc74..48677cfd 100644
--- a/src/surrogate/application-surrogate.c
+++ b/src/surrogate/application-surrogate.c
@@ -2,7 +2,7 @@
 #include <ross-extern.h>
 
 static struct app_iteration_predictor * iter_predictor;
-static int every_n_gvt = 1;
+static struct application_director_config conf = {.option = APP_DIRECTOR_OPTS_call_every_ns, .every_n_gvt = 1000000};
 static enum {
     PRE_JUMP = 0,
     POST_JUMP_switched,  // Switched to surrogate-mode
@@ -18,6 +18,11 @@ static enum {
 #define master_printf(str, ...) if (g_tw_mynode == 0) { printf(str, __VA_ARGS__); }
 
 static void application_director_pre_switch(tw_pe * pe) {
+    // Scheduling next GVT hook call if it is not scheduled every tw_trigger_gvt_hook_every
+    if (conf.option == APP_DIRECTOR_OPTS_call_every_ns) {
+        tw_trigger_gvt_hook_at(gvt_for(pe) + conf.call_every_ns);
+    }
+
     if (!iter_predictor->director.is_predictor_ready()) {
         return;
     }
@@ -38,7 +43,13 @@ static void application_director_pre_switch(tw_pe * pe) {
 }
 
 static void application_director_post_switch(tw_pe * pe) {
-    tw_trigger_gvt_hook_every(every_n_gvt);
+    // Scheduling next GVT hook call
+    if (conf.option == APP_DIRECTOR_OPTS_call_every_ns) {
+        tw_trigger_gvt_hook_at(gvt_for(pe) + conf.call_every_ns);
+    } else {
+        tw_trigger_gvt_hook_every(conf.every_n_gvt);
+    }
+
     iter_predictor->director.reset();
 
     if (director_state == POST_JUMP_switched) {
@@ -65,10 +76,14 @@ void application_director(tw_pe * pe) {
     }
 }
 
-void application_director_configure(int every_n_gvt_, struct app_iteration_predictor * iter_predictor_) {
-    every_n_gvt = every_n_gvt_;
+void application_director_configure(struct application_director_config * conf_, struct app_iteration_predictor * iter_predictor_) {
+    conf = *conf_;
     iter_predictor = iter_predictor_;
     g_tw_gvt_hook = application_director;
     director_state = PRE_JUMP;
-    tw_trigger_gvt_hook_every(every_n_gvt);
+    if (conf.option == APP_DIRECTOR_OPTS_every_n_gvt) {
+        tw_trigger_gvt_hook_every(conf.every_n_gvt);
+    } else {
+        tw_trigger_gvt_hook_at(conf.call_every_ns);
+    }
 }
diff --git a/src/surrogate/init.c b/src/surrogate/init.c
index 47ccc2db..c6a0a6aa 100644
--- a/src/surrogate/init.c
+++ b/src/surrogate/init.c
@@ -7,6 +7,8 @@
 #include <codes/surrogate/packet-latency-predictor/torch-jit.h>
 #endif
 
+#define master_printf(...) if (g_tw_mynode == 0) { printf(__VA_ARGS__); }
+
 bool freeze_network_on_switch = true;
 struct network_surrogate_config net_surr_config = {0};
 bool is_network_surrogate_configured = false;
@@ -153,6 +155,86 @@ void network_surrogate_configure(
     }
 }
 
+static int load_and_validate_int_param(const char* param_name, int default_value) {
+    char param_str[MAX_NAME_LENGTH];
+    param_str[0] = '\0';
+    int const rc = configuration_get_value(&config, "APPLICATION_SURROGATE", param_name, NULL, param_str, MAX_NAME_LENGTH);
+    int value = (rc > 0) ? atoi(param_str) : default_value;
+
+    if (value <= 0) {
+        tw_warning(TW_LOC, "%s must be a positive integer, got %d. Using default value %d.", param_name, value, default_value);
+        value = default_value;
+    }
+
+    return value;
+}
+
+static struct application_director_config load_director_config(void) {
+    int const default_gvt = 100;
+    int const default_ns = 1000000; // 1ms
+
+    enum {
+        MODE_NOT_SET,
+        MODE_EVERY_N_GVT,
+        MODE_EVERY_N_NANOSECONDS,
+        MODE_UNKNOWN
+    } mode;
+
+    char director_mode[MAX_NAME_LENGTH];
+    director_mode[0] = '\0';
+    int const rc_mode = configuration_get_value(&config, "APPLICATION_SURROGATE", "director_mode", NULL, director_mode, MAX_NAME_LENGTH);
+
+    if (rc_mode == 0) {
+        mode = MODE_NOT_SET;
+    } else if (strcmp(director_mode, "every-n-gvt") == 0) {
+        mode = MODE_EVERY_N_GVT;
+    } else if (strcmp(director_mode, "every-n-nanoseconds") == 0) {
+        mode = MODE_EVERY_N_NANOSECONDS;
+    } else {
+        mode = MODE_UNKNOWN;
+    }
+
+    int every_n_gvt = load_and_validate_int_param("director_num_gvt", default_gvt);
+    int every_n_ns = load_and_validate_int_param("director_num_ns", default_ns);
+
+    bool const is_sequential = (g_tw_synchronization_protocol == SEQUENTIAL ||
+                                g_tw_synchronization_protocol == SEQUENTIAL_ROLLBACK_CHECK);
+
+    struct application_director_config config;
+    switch (mode) {
+        case MODE_EVERY_N_GVT:
+            if (is_sequential) {
+                tw_warning(TW_LOC, "Cannot use 'every-n-gvt' mode in sequential simulation. Forcing 'every-n-nanoseconds' mode.");
+                config.option = APP_DIRECTOR_OPTS_call_every_ns;
+                config.call_every_ns = every_n_ns;
+            } else {
+                config.option = APP_DIRECTOR_OPTS_every_n_gvt;
+                config.every_n_gvt = every_n_gvt;
+            }
+            break;
+
+        case MODE_EVERY_N_NANOSECONDS:
+            config.option = APP_DIRECTOR_OPTS_call_every_ns;
+            config.call_every_ns = every_n_ns;
+            break;
+
+        case MODE_UNKNOWN:
+            tw_warning(TW_LOC, "Unknown director_mode '%s'. Using default mode 'every-n-nanoseconds'.", director_mode);
+            config.option = APP_DIRECTOR_OPTS_call_every_ns;
+            config.call_every_ns = every_n_ns;
+            break;
+
+        case MODE_NOT_SET:
+        default:
+            tw_warning(TW_LOC, "director_mode not set. Using default mode 'every-n-nanoseconds'.");
+            config.option = APP_DIRECTOR_OPTS_call_every_ns;
+            config.call_every_ns = every_n_ns;
+            break;
+    }
+
+    return config;
+}
+
 void application_surrogate_configure(
     int num_terminals_in_pe,
     int num_apps,
@@ -168,10 +250,24 @@ void application_surrogate_configure(
         .num_nodes_in_pe = num_terminals_in_pe,
         .num_iters_to_collect = num_of_iters_to_feed,
     };
-    int every_n_gvt = 100;
+
+    struct application_director_config app_dir_config = load_director_config();
+
     current_iter_predictor = avg_app_iteration_predictor(&predictor_config);
-    application_director_configure(every_n_gvt, &current_iter_predictor);
+    application_director_configure(&app_dir_config, &current_iter_predictor);
     *iter_pred = &current_iter_predictor;
+
+    // Printing configuration summary
+    master_printf("\nApplication surrogate configuration:\n");
+    master_printf("  Predictor - num_apps: %d, num_iters_to_collect: %d\n",
+                  predictor_config.num_apps, predictor_config.num_iters_to_collect);
+
+    if (app_dir_config.option == APP_DIRECTOR_OPTS_every_n_gvt) {
+        master_printf("  Director - mode: every-n-gvt, every_n_gvt: %d\n", app_dir_config.every_n_gvt);
+    } else {
+        master_printf("  Director - mode: every-n-nanoseconds, call_every_ns: %e\n", app_dir_config.call_every_ns);
+    }
+    master_printf("\n");
 }
 
 void free_application_surrogate(void) {

From 26fa2acd5586c26ff62e6c262dad15ad74aa01ad Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 10 Jun 2025 15:48:59 -0400
Subject: [PATCH 146/188] Minor cosmetic change

---
 src/network-workloads/model-net-mpi-replay.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 4398e50e..72b8f75f 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -748,7 +748,7 @@ void handle_other_finish(
     assert(ns->app_id == 0); //make sure that only the root workload is getting this notification
     assert(ns->local_rank == 0); //make sure that only the root rank is getting this notification
 
-    printf("App %d: Received finished workload notification",ns->app_id);
+    printf("App %d: Received finished workload notification\n", ns->app_id);
     // if(is_job_synthetic[ns->app_id])
         // return; //nothing for synthetic (background) ranks to do here
     // printf(" And I am not synthetic\n");

From 1b0bdab330212e74ff661cf063daebbe0cebcebd Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 10 Jun 2025 16:17:42 -0400
Subject: [PATCH 147/188] Light refactoring of a large function in the
 application predictor

---
 .../app-iteration-predictor/average.c         | 204 ++++++++++++------
 1 file changed, 136 insertions(+), 68 deletions(-)

diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c
index 5f04e846..615594a5 100644
--- a/src/surrogate/app-iteration-predictor/average.c
+++ b/src/surrogate/app-iteration-predictor/average.c
@@ -41,6 +41,12 @@ static bool ready_to_skip = false;
 
 
 static void find_max_iter_per_app(int * save_last_iter);
+static inline void mpi_allreduce_int_max(int const * local_data, int * result_data, int count);
+static inline void mpi_allreduce_int_sum(int const * local_data, int * result_data, int count);
+static inline void mpi_allreduce_double_sum(double const * local_data, double * result_data, int count);
+static inline void mpi_allreduce_bool_and(bool const * local_data, bool * result_data, int count);
+static inline void init_int_array(int * array, int size, int value);
+static inline void init_double_array(double * array, int size, double value);
 static inline int app_id_for(int nw_id_in_pe) {
     return arr_node_data[nw_id_in_pe].app_id;
 }
@@ -157,9 +163,7 @@ static inline void post_init_share_ending_iteration(void) {
         ending_iteration_here[i] = arr_app_data[i].ending_iteration;
     }
     int ending_iteration[my_config.num_apps];
-    if(MPI_Allreduce(ending_iteration_here, ending_iteration, my_config.num_apps, MPI_INT, MPI_MAX, MPI_COMM_CODES) != MPI_SUCCESS) {
-        tw_error(TW_LOC, "MPI_Allreduce call failed!");
-    }
+    mpi_allreduce_int_max(ending_iteration_here, ending_iteration, my_config.num_apps);
 
     // Checking that total iterations are the same across nodes
     for (int i = 0; i < my_config.num_apps; i++) {
@@ -186,9 +190,7 @@ static inline bool has_any_app_ended(bool * save_app_just_ended) {
         struct app_data * app_data = &arr_app_data[i];
         app_just_ended_here[i] = app_data->status == APP_STATUS_just_completed;
     }
-    if(MPI_Allreduce(&app_just_ended_here, save_app_just_ended, my_config.num_apps, MPI_C_BOOL, MPI_LAND, MPI_COMM_CODES) != MPI_SUCCESS) {
-        tw_error(TW_LOC, "MPI_Allreduce call failed!");
-    }
+    mpi_allreduce_bool_and(app_just_ended_here, save_app_just_ended, my_config.num_apps);
     for (int i = 0; i < my_config.num_apps; i++) {
         if (save_app_just_ended[i]) {
             return true;
@@ -241,9 +243,7 @@ static bool director_calls_is_predictor_ready(void) {
     // check that all applications have collected data for enough iterations to jump ahead
     bool const everyone_ready_here = has_everyone_accumulated_enough();
     bool everyone_ready;
-    if(MPI_Allreduce(&everyone_ready_here, &everyone_ready, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_CODES) != MPI_SUCCESS) {
-        tw_error(TW_LOC, "MPI_Allreduce call failed!");
-    }
+    mpi_allreduce_bool_and(&everyone_ready_here, &everyone_ready, 1);
     return everyone_ready;
 }
 
@@ -257,10 +257,8 @@ static void director_calls_reset(void) {
 static void find_avg_iteration_time(double * save_avg_time) {
     double acc_iter_time_here[my_config.num_apps];
     int acc_iters_here[my_config.num_apps];
-    for (int i=0; i < my_config.num_apps; i++) {
-        acc_iter_time_here[i] = 0.0;
-        acc_iters_here[i] = 0;
-    }
+    init_double_array(acc_iter_time_here, my_config.num_apps, 0.0);
+    init_int_array(acc_iters_here, my_config.num_apps, 0);
     for (int i=0; i < my_config.num_nodes_in_pe; i++) {
         struct node_data * node_data = &arr_node_data[i];
         int const app_id = node_data->app_id;
@@ -268,13 +266,9 @@ static void find_avg_iteration_time(double * save_avg_time) {
         acc_iters_here[app_id] += node_data->acc_iters;
     }
     double acc_iter_time[my_config.num_apps];
-    if(MPI_Allreduce(&acc_iter_time_here, &acc_iter_time, my_config.num_apps, MPI_DOUBLE, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) {
-        tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up");
-    }
+    mpi_allreduce_double_sum(acc_iter_time_here, acc_iter_time, my_config.num_apps);
     int acc_iters[my_config.num_apps];
-    if(MPI_Allreduce(&acc_iters_here, &acc_iters, my_config.num_apps, MPI_INT, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) {
-        tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up");
-    }
+    mpi_allreduce_int_sum(acc_iters_here, acc_iters, my_config.num_apps);
 
     for (int i=0; i < my_config.num_apps; i++) {
         if (acc_iters[i]) {
@@ -283,11 +277,46 @@ static void find_avg_iteration_time(double * save_avg_time) {
     }
 }
 
+static inline void mpi_allreduce_int_max(int const * local_data, int * result_data, int count) {
+    if(MPI_Allreduce(local_data, result_data, count, MPI_INT, MPI_MAX, MPI_COMM_CODES) != MPI_SUCCESS) {
+        tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't compute maximum");
+    }
+}
+
+static inline void mpi_allreduce_int_sum(int const * local_data, int * result_data, int count) {
+    if(MPI_Allreduce(local_data, result_data, count, MPI_INT, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) {
+        tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up");
+    }
+}
+
+static inline void mpi_allreduce_double_sum(double const * local_data, double * result_data, int count) {
+    if(MPI_Allreduce(local_data, result_data, count, MPI_DOUBLE, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) {
+        tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up");
+    }
+}
+
+static inline void mpi_allreduce_bool_and(bool const * local_data, bool * result_data, int count) {
+    if(MPI_Allreduce(local_data, result_data, count, MPI_C_BOOL, MPI_LAND, MPI_COMM_CODES) != MPI_SUCCESS) {
+        tw_error(TW_LOC, "MPI_Allreduce call failed!");
+    }
+}
+
+static inline void init_int_array(int * array, int size, int value) {
+    for (int i = 0; i < size; i++) {
+        array[i] = value;
+    }
+}
+
+static inline void init_double_array(double * array, int size, double value) {
+    for (int i = 0; i < size; i++) {
+        array[i] = value;
+    }
+}
+
 static void find_max_iter_per_app(int * save_last_iter) {
     int last_iter_here[my_config.num_apps];
-    for (int i=0; i < my_config.num_apps; i++) {
-        last_iter_here[i] = INT_MIN;
-    }
+    init_int_array(last_iter_here, my_config.num_apps, INT_MIN);
+
     for (int i=0; i < my_config.num_nodes_in_pe; i++) {
         struct node_data * node_data = &arr_node_data[i];
         int const app_id = node_data->app_id;
@@ -295,18 +324,14 @@ static void find_max_iter_per_app(int * save_last_iter) {
             last_iter_here[app_id] = node_data->last_iter;
         }
     }
-    if(MPI_Allreduce(&last_iter_here, save_last_iter, my_config.num_apps, MPI_INT, MPI_MAX, MPI_COMM_CODES) != MPI_SUCCESS) {
-        tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't compute maximum");
-    }
+    mpi_allreduce_int_max(last_iter_here, save_last_iter, my_config.num_apps);
 }
 
 static void find_avg_time_for_max_iter(double * save_last_iter_time, int const * last_iter) {
     int acc_iters_here[my_config.num_apps];
     double acc_last_iter_time[my_config.num_apps];
-    for (int i=0; i < my_config.num_apps; i++) {
-        acc_iters_here[i] = 0;
-        acc_last_iter_time[i] = 0.0;
-    }
+    init_int_array(acc_iters_here, my_config.num_apps, 0);
+    init_double_array(acc_last_iter_time, my_config.num_apps, 0.0);
     for (int i=0; i < my_config.num_nodes_in_pe; i++) {
         struct node_data * node_data = &arr_node_data[i];
         int const app_id = node_data->app_id;
@@ -315,13 +340,9 @@ static void find_avg_time_for_max_iter(double * save_last_iter_time, int const *
             acc_iters_here[app_id]++;
         }
     }
-    if(MPI_Allreduce(&acc_last_iter_time, save_last_iter_time, my_config.num_apps, MPI_DOUBLE, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) {
-        tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up");
-    }
+    mpi_allreduce_double_sum(acc_last_iter_time, save_last_iter_time, my_config.num_apps);
     int acc_iters[my_config.num_apps];
-    if(MPI_Allreduce(&acc_iters_here, &acc_iters, my_config.num_apps, MPI_INT, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) {
-        tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up");
-    }
+    mpi_allreduce_int_sum(acc_iters_here, acc_iters, my_config.num_apps);
     for (int i=0; i < my_config.num_apps; i++) {
         if (acc_iters[i] > 0) {
             save_last_iter_time[i] /= acc_iters[i];
@@ -329,40 +350,44 @@ static void find_avg_time_for_max_iter(double * save_last_iter_time, int const *
     }
 }
 
-static struct fast_forward_values director_calls_prepare_fast_forward_jump(void) {
-    // 0. Check if app is still running
-    bool is_running[my_config.num_apps];
-    for (int i=0; i < my_config.num_apps; i++) {
+static void get_running_apps(bool * is_running) {
+    for (int i = 0; i < my_config.num_apps; i++) {
         is_running[i] = arr_app_data[i].status != APP_STATUS_completed_everywhere;
     }
-    // 1. Compute end time for each application given current data (pick smallest)
-    //   a. Find avg iteration per app
-    double avg_iter_time[my_config.num_apps];
-    find_avg_iteration_time(avg_iter_time);
-    //   b. Find iteration to start stwich after
-    int last_iter[my_config.num_apps];
-    double last_iter_time[my_config.num_apps];
-    find_max_iter_per_app(last_iter);
-    find_avg_time_for_max_iter(last_iter_time, last_iter);
-    //   c. Compute avg end time for all apps (loop through every node, and add value to avg array)
+}
+
+static double compute_earliest_end_time(
+    bool const * is_running,
+    double const * avg_iter_time,
+    int const * last_iter,
+    double const * last_iter_time) {
+    // Compute avg end time for all apps (loop through every node, and add value to avg array)
     double apps_end_time[my_config.num_apps];
-    for (int i=0; i < my_config.num_apps; i++) {
+    for (int i = 0; i < my_config.num_apps; i++) {
         int const iterations_left = arr_app_data[i].ending_iteration - last_iter[i];
         apps_end_time[i] = last_iter_time[i] + iterations_left * avg_iter_time[i];
     }
-    //   d. Pick smallest compute end time/time to skip
+    // Pick smallest compute end time/time to skip
     double switch_time = DBL_MAX;
-    for (int i=0; i < my_config.num_apps; i++) {
+    for (int i = 0; i < my_config.num_apps; i++) {
         if (is_running[i] && switch_time > apps_end_time[i]) {
             switch_time = apps_end_time[i];
         }
     }
-    // 2. Find number of iterations to skip per node given time to skip, then compute when each application is expected to reach this point
-    //   a. Find iteration to skip to per node
-    double apps_restart_at_time[my_config.num_apps];
-    int apps_restart_at_iter[my_config.num_apps];
+    return switch_time;
+}
+
+static bool compute_restart_params(
+    bool const * is_running,
+    double const * avg_iter_time,
+    int const * last_iter,
+    double const * last_iter_time,
+    double switch_time,
+    double * apps_restart_at_time,
+    int * apps_restart_at_iter) {
+    // Find iteration to skip to per node
     bool worth_switching = true;
-    for (int i=0; i < my_config.num_apps; i++) {
+    for (int i = 0; i < my_config.num_apps; i++) {
         if (!is_running[i]) {
             continue;
         }
@@ -375,22 +400,27 @@ static struct fast_forward_values director_calls_prepare_fast_forward_jump(void)
             worth_switching = false;
         }
     }
-    //   b. Compute last application to restart (this is restarting_at)
+    return worth_switching;
+}
+
+static double find_latest_restart_time(bool const * is_running, double const * apps_restart_at_time) {
+    // Compute last application to restart (this is restarting_at)
     double last_to_finish = 0;
-    for (int i=0; i < my_config.num_apps; i++) {
+    for (int i = 0; i < my_config.num_apps; i++) {
         if (is_running[i] && last_to_finish < apps_restart_at_time[i]) {
             last_to_finish = apps_restart_at_time[i];
         }
     }
-    //   c. If the number of iterations to skip is zero for any app, force reset of predictor tracking
-    if (!worth_switching) {
-        return (struct fast_forward_values) {
-            .status = FAST_FORWARD_restart,
-            .restarting_at = last_to_finish,
-        };
-    }
-    // 4. Set values for iteration to restart at and iterations to jump for each application
-    for (int i=0; i < my_config.num_apps; i++) {
+    return last_to_finish;
+}
+
+static void set_app_prediction_data(
+    bool const * is_running,
+    int const * last_iter,
+    int const * apps_restart_at_iter,
+    double const * apps_restart_at_time) {
+    // Set values for iteration to restart at and iterations to jump for each application
+    for (int i = 0; i < my_config.num_apps; i++) {
         if (!is_running[i]) {
             continue;
         }
@@ -398,6 +428,44 @@ static struct fast_forward_values director_calls_prepare_fast_forward_jump(void)
         arr_app_data[i].pred.resume_at_iter = apps_restart_at_iter[i];
         arr_app_data[i].pred.restart_at = apps_restart_at_time[i];
     }
+}
+
+static struct fast_forward_values director_calls_prepare_fast_forward_jump(void) {
+    // 0. Check if app is still running
+    bool is_running[my_config.num_apps];
+    get_running_apps(is_running);
+
+    // 1. Compute end time for each application given current data (pick smallest)
+    //   a. Find avg iteration per app
+    double avg_iter_time[my_config.num_apps];
+    find_avg_iteration_time(avg_iter_time);
+    //   b. Find iteration to start switch after
+    int last_iter[my_config.num_apps];
+    double last_iter_time[my_config.num_apps];
+    find_max_iter_per_app(last_iter);
+    find_avg_time_for_max_iter(last_iter_time, last_iter);
+    //   c. & d. Compute and pick smallest end time/time to skip
+    double switch_time = compute_earliest_end_time(is_running, avg_iter_time, last_iter, last_iter_time);
+
+    // 2. Find number of iterations to skip per node given time to skip, then compute when each application is expected to reach this point
+    //   a. Find iteration to skip to per node
+    double apps_restart_at_time[my_config.num_apps];
+    int apps_restart_at_iter[my_config.num_apps];
+    bool worth_switching = compute_restart_params(is_running, avg_iter_time, last_iter, last_iter_time, switch_time, apps_restart_at_time, apps_restart_at_iter);
+
+    //   b. Compute last application to restart (this is restarting_at)
+    double last_to_finish = find_latest_restart_time(is_running, apps_restart_at_time);
+
+    //   c. If the number of iterations to skip is zero for any app, force reset of predictor tracking
+    if (!worth_switching) {
+        return (struct fast_forward_values) {
+            .status = FAST_FORWARD_restart,
+            .restarting_at = last_to_finish,
+        };
+    }
+
+    // 3. Set values for iteration to restart at and iterations to jump for each application
+    set_app_prediction_data(is_running, last_iter, apps_restart_at_iter, apps_restart_at_time);
     ready_to_skip = true;
 
     return (struct fast_forward_values) {

From 553f4926836cf7afed55d88db5c63264242860e1 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 10 Jun 2025 17:02:20 -0400
Subject: [PATCH 148/188] Removing old (hardcoded) application surrogate

---
 src/network-workloads/model-net-mpi-replay.c | 227 ++-----------------
 1 file changed, 13 insertions(+), 214 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 72b8f75f..0c3d9ed1 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -43,7 +43,6 @@
 #define NEAR_ZERO .0001 //timestamp for use to be 'close to zero' but still allow progress, zero offset events are hard on the PDES engine
 #define OUTPUT_MARKS 0
 #define LP_DEBUG 0
-#define HARD_CODED_AVG_ITER_PREDICTOR 0
 
 static int msg_size_hash_compare(
             void *key, struct qhash_head *link);
@@ -106,7 +105,6 @@ int period_count[MAX_JOBS];
 double period_time[MAX_JOBS][MAX_PERIODS_PER_APP];
 float period_interval[MAX_JOBS][MAX_PERIODS_PER_APP];
 char file_name_of_job[MAX_JOBS][8192];
-char skipping_iterations_file[8192];
 
 tw_stime max_elapsed_time_per_job[MAX_JOBS] = {0};
 
@@ -168,9 +166,6 @@ static int enable_debug = 0;
 // Surrogate variables
 struct app_iteration_predictor *iter_predictor = NULL;
 static int nw_id_counter = 0;
-// We can skip multiple iterations using an average as our predicted iteration time. This will skip ahead to a future step in the simulation
-static struct AvgSurrogateSwitchingTimesForApp *skip_iter_config;
-static size_t skip_iter_config_size = 0;
 
 /* set group context */
 struct codes_mctx mapping_context;
@@ -389,10 +384,6 @@ struct nw_state
     char output_buf[512];
     char col_stats[64];
     struct ross_model_sample ross_sample;
-
-    // Configuration to tell the node when to skip some iterations
-    struct AvgSurrogateSwitchingTimesForApp *switch_config;
-    size_t switch_config_size;
 };
 
 /* data for handling reverse computation.
@@ -488,11 +479,6 @@ struct nw_message
        struct {
            int64_t saved_num_bytes;
        } mpi_ack;
-
-        // For SURR_SKIP_ITERATION
-       struct {
-           struct AvgSurrogateSwitchingTimesForApp * config_used;
-       } surr;
    } rc;
 };
 
@@ -1199,75 +1185,13 @@ void arrive_syn_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp)
     }
 }
 
-// Surrogate switiching structure
-struct AvgSurrogateSwitchingTimesForApp {
-    int app_id;
-    int skip_at_iter;
-    int resume_at_iter;
-    double time_per_iter;
-    bool done; // This is a flag to indicate whethe we already completed this skipping stage
-};
-
-static int comp_AvgSurrogateSwitchingTimesForApp(
-    struct AvgSurrogateSwitchingTimesForApp *left,
-    struct AvgSurrogateSwitchingTimesForApp *right
-) {
-    if (left->app_id < right->app_id) {
-        return -1;
-    }
-    if (left->app_id > right->app_id) {
-        return 1;
-    }
-    // else: left->app_id == right->app_id
-
-    if (left->skip_at_iter < right->skip_at_iter) {
-        return -1;
-    }
-    if (left->skip_at_iter > right->skip_at_iter) {
-        return 1;
-    }
-
-    return 0;
-}
-
-static int iters_skipped(struct AvgSurrogateSwitchingTimesForApp * avgSur) {
-    return avgSur->resume_at_iter - avgSur->skip_at_iter;
-}
-
-static struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_state * s) {
-    if (s->switch_config == NULL) {
-        return NULL;
-    }
-    for (int i=0; i < s->switch_config_size; i++) {
-        struct AvgSurrogateSwitchingTimesForApp * jump = &s->switch_config[i];
-        assert(jump->app_id == s->app_id);
-        if (!jump->done) {
-            return jump;
-        }
-    }
-    return NULL;
-}
-
-static void skip_iteration_rc(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) {
-    if (HARD_CODED_AVG_ITER_PREDICTOR) {
-        m->rc.surr.config_used->done = false;
-    }
-}
+// We never rollback all op messages properly. This is because we have not found any situation where we have to fully rollback a SURR_SKIP_ITERATION event. Any event that schedules a SURR_SKIP_ITERATION event will have been completed long before the SURR_SKIP_ITERATION event is processed.
+static void skip_to_iteration_rc(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) {}
 
 static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m)
 {
     struct codes_workload_op mpi_op;
-    int resume_at_iter;
-
-    if (HARD_CODED_AVG_ITER_PREDICTOR) {
-        struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s);
-        assert(switch_config != NULL);
-        resume_at_iter = switch_config->resume_at_iter;
-        m->rc.surr.config_used = switch_config;
-        switch_config->done = true;
-    } else {
-        resume_at_iter = m->fwd.resume_at_iter;
-    }
+    int resume_at_iter = m->fwd.resume_at_iter;
 
     // consuming all events until indicated iteration is reached
     bool reached_end = false;
@@ -1297,20 +1221,6 @@ static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message *
     tw_event_send(e);
 }
 
-static bool have_we_hit_surrogate_switch(struct nw_state* s, struct codes_workload_op * mpi_op) {
-    struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s);
-    if (switch_config != NULL) {
-        return mpi_op->u.send.tag == switch_config->skip_at_iter;
-    }
-    return false;
-}
-
-static double time_to_skip_iterations(struct nw_state* s) {
-    struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s);
-    assert(switch_config != NULL);
-    return switch_config->time_per_iter * iters_skipped(switch_config);
-}
-
 /* Debugging functions, may generate unused function warning */
 /*static void print_waiting_reqs(uint32_t * reqs, int count)
 {
@@ -2756,30 +2666,6 @@ void nw_test_init(nw_state* s, tw_lp* lp)
        }
    }
 
-   if (skip_iter_config_size > 0) {
-       size_t size = 0;
-       // Finding number of times to skip for this job
-       for (size_t i = 0; i < skip_iter_config_size; i++) {
-           if (lid.job == skip_iter_config[i].app_id) {
-               size++;
-           }
-       }
-       // Constructing switch_config
-       s->switch_config_size = size;
-       if (size > 0) {
-          s->switch_config = malloc(size * sizeof(struct AvgSurrogateSwitchingTimesForApp));
-          size_t j = 0;
-          for (size_t i = 0; i < skip_iter_config_size; i++) {
-              if (lid.job == skip_iter_config[i].app_id) {
-                  s->switch_config[j] = skip_iter_config[i];
-                  j++;
-              }
-          }
-       }
-   } else {
-       s->switch_config = NULL;
-       s->switch_config_size = 0;
-   }
    if (iter_predictor && !am_i_synthetic) {
         int const ending_iter = codes_workload_get_final_iteration(s->wrkld_id, s->app_id, s->local_rank);
         if (ending_iter == -1) {
@@ -3177,28 +3063,16 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
                 m->rc.mpi_next.mark.saved_marker_time = tw_now(lp);
                 int iteration_i = mpi_op->u.send.tag;
 
-                if (HARD_CODED_AVG_ITER_PREDICTOR) {
-                    // If we have reached the surrogate switch time, skip next iteration(s)
-                    if (have_we_hit_surrogate_switch(s, mpi_op)) {
-                        tw_event *e = tw_event_new(lp->gid, time_to_skip_iterations(s), lp);
-                        nw_message* msg = (nw_message*) tw_event_data(e);
-                        msg->msg_type = SURR_SKIP_ITERATION;
-                        tw_event_send(e);
-                    } else {
-                        codes_issue_next_event(lp);
-                    }
+                if (iter_predictor && iter_predictor->model.have_we_hit_switch(lp, s->nw_id_in_pe, iteration_i)) {
+                    bf->c13 = 1;
+                    struct iteration_pred iter_pred = iter_predictor->model.predict(lp, s->nw_id_in_pe);
+                    tw_event *e = tw_event_new(lp->gid, iter_pred.restart_at - tw_now(lp), lp);
+                    nw_message* msg = (nw_message*) tw_event_data(e);
+                    msg->msg_type = SURR_SKIP_ITERATION;
+                    msg->fwd.resume_at_iter = iter_pred.resume_at_iter;
+                    tw_event_send(e);
                 } else {
-                    if (iter_predictor && iter_predictor->model.have_we_hit_switch(lp, s->nw_id_in_pe, iteration_i)) {
-                        bf->c13 = 1;
-                        struct iteration_pred iter_pred = iter_predictor->model.predict(lp, s->nw_id_in_pe);
-                        tw_event *e = tw_event_new(lp->gid, iter_pred.restart_at - tw_now(lp), lp);
-                        nw_message* msg = (nw_message*) tw_event_data(e);
-                        msg->msg_type = SURR_SKIP_ITERATION;
-                        msg->fwd.resume_at_iter = iter_pred.resume_at_iter;
-                        tw_event_send(e);
-                    } else {
-                        codes_issue_next_event(lp);
-                    }
+                    codes_issue_next_event(lp);
                 }
 			}
 			break;
@@ -3337,10 +3211,6 @@ void nw_test_finalize(nw_state* s, tw_lp* lp)
 	    rc_stack_destroy(s->matched_reqs);
 	    rc_stack_destroy(s->processed_ops);
 	    rc_stack_destroy(s->processed_wait_op);
-
-    if (s->switch_config != NULL) {
-        free(s->switch_config);
-    }
 }
 
 void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
@@ -3416,7 +3286,7 @@ void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * l
             break;
 
         case SURR_SKIP_ITERATION:
-            skip_iteration_rc(s, lp, bf, m);
+            skip_to_iteration_rc(s, lp, bf, m);
             break;
 	}
 }
@@ -3669,9 +3539,6 @@ static bool check_nw_lp_state(nw_state * before, nw_state const * after) {
     is_same &= (strcmp(before->output_buf, after->output_buf) == 0);
     is_same &= (strcmp(before->col_stats, after->col_stats) == 0);
 
-    // Compare switch configuration size
-    is_same &= (before->switch_config_size == after->switch_config_size);
-
     // Complex elements
     is_same &= are_qlist_equal(&before->arrival_queue, &after->arrival_queue, QLIST_OFFSET(mpi_msgs_queue, ql), (bool (*) (void *, void *)) compare_mpi_msg_queues);
     is_same &= are_qlist_equal(&before->pending_recvs_queue, &after->pending_recvs_queue, QLIST_OFFSET(mpi_msgs_queue, ql), (bool (*) (void *, void *)) compare_mpi_msg_queues);
@@ -3691,7 +3558,6 @@ static bool check_nw_lp_state(nw_state * before, nw_state const * after) {
     // - msg_sz_table
     // Pointers used in some data collection (IO) or outside of PDES loop
     // - mpi_wkld_samples
-    // - switch_config
 
     // There is no need to implement msg_sz_table as all values are already
     // accounted for in msg_sz_list. We can safely ignore all values in msg_sz_list
@@ -3819,10 +3685,6 @@ static void print_nw_lp_state(FILE * out, char const * prefix, nw_state * state)
     fprintf(out, "%s |    |       comm_time = %g\n", prefix, state->ross_sample.comm_time);
     fprintf(out, "%s |    |        max_time = %g\n", prefix, state->ross_sample.max_time);
     fprintf(out, "%s |    |    avg_msg_time = %g\n", prefix, state->ross_sample.avg_msg_time);
-
-    // Configuration
-    fprintf(out, "%s |*        switch_config = %p\n", prefix, state->switch_config);
-    fprintf(out, "%s |    switch_config_size = %zu\n", prefix, state->switch_config_size);
 }
 
 static char const * const MPI_NW_EVENTS_to_string(enum MPI_NW_EVENTS event_type) {
@@ -3927,10 +3789,6 @@ static void print_nw_message(FILE * out, char const * prefix, nw_state* s, struc
             fprintf(out, "%s |   |  mpi_ack.saved_num_bytes = %ld\n", prefix, msg->rc.mpi_ack.saved_num_bytes);
             break;
 
-        case SURR_SKIP_ITERATION:
-            fprintf(out, "%s |   |        surr.config_used = %p\n", prefix, msg->rc.surr.config_used);
-            break;
-
         default:
             break;
     }
@@ -3971,7 +3829,6 @@ const tw_optdef app_opt [] =
 	TWOPT_CHAR("cortex-class", cortex_class, "Python class implementing the CoRtEx translator"),
 	TWOPT_CHAR("cortex-gen", cortex_gen, "Python function to pre-generate MPI events"),
 #endif
-	TWOPT_CHAR("skipping-iterations-file", skipping_iterations_file, "Configuration file name for which steps to skip"),
 	TWOPT_END()
 };
 
@@ -4300,60 +4157,6 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
         jobmap_ctx = codes_jobmap_configure(CODES_JOBMAP_IDENTITY, &jobmap_ident_p);
     }
 
-
-    // Loading skipping iterations configuration
-    if(strlen(skipping_iterations_file) > 0) {
-        FILE *file = fopen(skipping_iterations_file, "r");
-        if(!file) {
-            tw_error(TW_LOC, "\n Could not open file %s ", workloads_conf_file);
-        }
-
-        // Finding number of skipping iteration rows
-        int i = 0;
-        for(; !feof(file); i++) {
-            struct AvgSurrogateSwitchingTimesForApp skip_row;
-
-            int ref = fscanf(file, "%d %d %d %lf", &skip_row.app_id, &skip_row.skip_at_iter, &skip_row.resume_at_iter, &skip_row.time_per_iter);
-
-            if (ref != 4) { // We couldn't read all four values
-                fprintf(stderr, "Warning: Couldn't read a row of 'skipping-iterations-file'. Stopping after reading %d rows.\n", i);
-                break;
-            }
-        }
-
-        skip_iter_config_size = i;
-
-        skip_iter_config = malloc(skip_iter_config_size * sizeof(struct AvgSurrogateSwitchingTimesForApp));
-
-        // Loading in memory all times to skip iterations
-        fseek(file, 0, SEEK_SET);
-        for(i = 0; i < skip_iter_config_size; i++) {
-            struct AvgSurrogateSwitchingTimesForApp *skip_row = &skip_iter_config[i];
-
-            fscanf(file, "%d %d %d %lf", &skip_row->app_id, &skip_row->skip_at_iter, &skip_row->resume_at_iter, &skip_row->time_per_iter);
-            skip_row->done = false;
-        }
-        fclose(file);
-
-        // Sorting. To skip iterations we asume that all skips for a specific job appear in increasing order
-        qsort(
-            skip_iter_config,
-            skip_iter_config_size,
-            sizeof(struct AvgSurrogateSwitchingTimesForApp),
-            (int (*)(const void *, const void *)) comp_AvgSurrogateSwitchingTimesForApp);
-
-        // Printing configuration
-        if(!g_tw_mynode && skip_iter_config_size) {
-            printf("\n\nConfiguration for skipping selected iterations of one or more jobs has been loaded.\n");
-            printf("| job_id skip_at_iter resume_at_iter time_per_iter\n");
-            for (size_t i=0; i<skip_iter_config_size; i++) {
-                struct AvgSurrogateSwitchingTimesForApp *skip_row = &skip_iter_config[i];
-                printf("| %d %d %d %lf\n", skip_row->app_id, skip_row->skip_at_iter, skip_row->resume_at_iter, skip_row->time_per_iter);
-            }
-            printf("\n");
-        }
-    }
-
     MPI_Comm_rank(MPI_COMM_CODES, &rank);
     MPI_Comm_size(MPI_COMM_CODES, &nprocs);
 
@@ -4554,10 +4357,6 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
    if(alloc_spec)
        codes_jobmap_destroy(jobmap_ctx);
 
-   if (skip_iter_config != NULL) {
-       free(skip_iter_config);
-   }
-
    print_surrogate_stats();
    free_application_surrogate();
 

From 650fd9ea2094e165f218d16a069ab558bbd2ca3c Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 11 Jun 2025 10:48:22 -0400
Subject: [PATCH 149/188] Refactoring strategy to freeze network in network
 director

---
 codes/model-net-lp.h                       |  2 +-
 codes/surrogate/network-surrogate.h        |  6 +-
 src/networks/model-net/core/model-net-lp.c |  4 +-
 src/networks/model-net/dragonfly-dally.C   | 48 +++++-------
 src/surrogate/network-surrogate.c          | 87 ++++------------------
 5 files changed, 39 insertions(+), 108 deletions(-)

diff --git a/codes/model-net-lp.h b/codes/model-net-lp.h
index a7585ce4..0a20a2f9 100644
--- a/codes/model-net-lp.h
+++ b/codes/model-net-lp.h
@@ -136,7 +136,7 @@ void model_net_method_switch_to_highdef(void);
 
 // It will call the function (pointer) on the internal structure/network model.
 // The lp parameter has to be a model-net lp. The function pointer has to coincide with the underlying subtype
-void model_net_method_call_inner(tw_lp * lp, void (*) (void * inner, tw_lp * lp, tw_event **), tw_event **);
+void model_net_method_call_inner(tw_lp * lp, void (*) (void * inner, tw_lp * lp, void * data), void * data);
 
 /// The following functions/data structures should not need to be used by
 /// model developers - they are just provided so other internal components can
diff --git a/codes/surrogate/network-surrogate.h b/codes/surrogate/network-surrogate.h
index a6060ea1..9a9b2680 100644
--- a/codes/surrogate/network-surrogate.h
+++ b/codes/surrogate/network-surrogate.h
@@ -42,14 +42,16 @@ struct director_data {
 // Parameters: `data` corresponds to the lp sub-state, lp is the lp pointer, and the array of events in queue (to be processed)
 typedef void (*model_switch_f) (void * data, tw_lp * lp, tw_event **);
 typedef bool (*model_ask_if_freeze_f) (tw_lp * lp, tw_event * event); // Determines whether the event should be "frozen" or should be allowed to run during surrogate-mode
+typedef void (*model_check_event_f) (void * state, tw_lp * lp, tw_event * event); // Determines whether the event should be "frozen" or should be allowed to run during surrogate-mode
 
 struct lp_types_switch {
     char lpname[MAX_NAME_LENGTH];
     bool trigger_idle_modelnet;  // Trigger idle events for model-net (prevents a model to be stuck in a schedule loop if it is to process packets during surrogate-mode). If this is true and the lpname does not start with 'modelnet_', the behaviour is undefined
     model_switch_f        highdef_to_surrogate;
     model_switch_f        surrogate_to_highdef;
-    model_ask_if_freeze_f should_event_be_frozen;  // NULL means event from LP type shouldn't be frozen
-    model_ask_if_freeze_f should_event_be_deleted;  // NULL means event from LP type shouldn't be deleted
+    model_ask_if_freeze_f should_event_be_frozen;  // true means event from LP type shouldn't be frozen
+    model_ask_if_freeze_f should_event_be_deleted;  // true means event from LP type shouldn't be deleted
+    model_check_event_f   check_event_in_queue;
 };
 
 struct switch_at_struct {
diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c
index b513675b..1595f480 100644
--- a/src/networks/model-net/core/model-net-lp.c
+++ b/src/networks/model-net/core/model-net-lp.c
@@ -1498,10 +1498,10 @@ void model_net_method_switch_to_highdef_lp(tw_lp * lp) {
     ns->in_sched_recv_loop |= ns->sched_recv_loop_pre_surrogate;
 }
 
-void model_net_method_call_inner(tw_lp * lp, void (*fun) (void * inner, tw_lp * lp, tw_event **), tw_event ** lp_events) {
+void model_net_method_call_inner(tw_lp * lp, void (*fun) (void * inner, tw_lp * lp, void * data), void * data) {
     model_net_base_state * const ns = (model_net_base_state*) lp->cur_state;
 
-    fun(ns->sub_state, lp, lp_events);
+    fun(ns->sub_state, lp, data);
 }
 
 int model_net_get_event_type_lp(model_net_wrap_msg * msg) {
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 0fec30f6..3e2ce29d 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -198,6 +198,7 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(terminal_state * s, tw
 static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw_lp * lp, tw_event **);
 static bool dragonfly_dally_terminal_should_event_be_frozen(tw_lp * lp, tw_event * event);
 static bool dragonfly_dally_router_should_event_be_frozen(tw_lp * lp, tw_event * event);
+static void dragonfly_dally_terminal_pre_surrogate_switch_event_queue( terminal_state * s, tw_lp * lp, tw_event * event);
 //
 // ==== END OF Parameters to tune surrogate mode ====
 
@@ -2449,6 +2450,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
                  .surrogate_to_highdef = (model_switch_f) dragonfly_dally_terminal_surrogate_to_highdef,
                  .should_event_be_frozen = dragonfly_dally_terminal_should_event_be_frozen,
                  .should_event_be_deleted = NULL,
+                 .check_event_in_queue = (model_check_event_f) dragonfly_dally_terminal_pre_surrogate_switch_event_queue,
                 },
                 {.lpname = "modelnet_dragonfly_dally_router",
                  .trigger_idle_modelnet = false,
@@ -2456,6 +2458,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
                  .surrogate_to_highdef = NULL,
                  .should_event_be_frozen = dragonfly_dally_router_should_event_be_frozen,
                  .should_event_be_deleted = NULL,
+                 .check_event_in_queue = NULL,
                 },
                 0
             }
@@ -3017,37 +3020,27 @@ static void feed_packet_to_predictor(terminal_state * s, tw_lp * lp, uint64_t pa
     }
 }
 
-// Constructs a hashmap with all the T_NOTIFY events to be processed.
-// The key of the list is the GID for the source terminal. The value of the
-// hash is the end time
-static map<uint64_t, double> construct_map_of_NOTIFY_LATENCY_events(
-        tw_lp * lp, tw_event ** const terminal_events) {
-    // hash map to store T_NOTIFY events found (`packet_ID` and `travel_end_time`)
-    map<uint64_t, double> notification_events_map;
-
-    for (size_t i = 0; terminal_events && terminal_events[i] != NULL; i++) {
-        assert(terminal_events[i]->dest_lpid == lp->gid);
-        tw_event * event = terminal_events[i];
-        int const event_type = model_net_get_event_type_lp((model_net_wrap_msg *) tw_event_data(event));
-        // if event is T_NOTIFY, add event relevant data into hash map for T_NOTIFY event
-        if (event_type == MN_BASE_PASS) {
-            terminal_dally_message * msg = (terminal_dally_message *)
-                model_net_method_msg_from_tw_event(lp, (model_net_wrap_msg *) tw_event_data(event));
-            if (msg->type == T_NOTIFY) {
-                assert(msg->notify_type == NOTIFY_LATENCY);
-                notification_events_map[msg->packet_ID] = msg->travel_end_time;
-            }
+// We check an event that is in the event queue, thus we do not process it yet
+static void dragonfly_dally_terminal_pre_surrogate_switch_event_queue(
+    terminal_state * s, tw_lp * lp, tw_event * event) {
+    int const event_type = model_net_get_event_type_lp((model_net_wrap_msg *) tw_event_data(event));
+    // if event is T_NOTIFY, add event relevant data into hash map for T_NOTIFY event
+    if (event_type == MN_BASE_PASS) {
+        terminal_dally_message * msg = (terminal_dally_message *)
+            model_net_method_msg_from_tw_event(lp, (model_net_wrap_msg *) tw_event_data(event));
+        assert(msg != NULL);
+        if (msg->type == T_NOTIFY) {
+            assert(msg->notify_type == NOTIFY_LATENCY);
+            feed_packet_to_predictor(s, lp, msg->packet_ID, msg->travel_end_time);
+            s->sent_packets.erase(msg->packet_ID);
         }
     }
-
-    return notification_events_map;
 }
 
 // This function never rollsback because it's called at GVT
 static void dragonfly_dally_terminal_highdef_to_surrogate(
         terminal_state * s, tw_lp * lp, tw_event ** terminal_events) {
-
-    auto notification_events_map = construct_map_of_NOTIFY_LATENCY_events(lp, terminal_events);
+    (void) terminal_events;
 
     if (s->arrival_of_last_packet.packet_ID != -1) {
         assert(s->sent_packets.count(s->arrival_of_last_packet.packet_ID) == 1); // packet_ID is in s->sent_packets
@@ -3069,13 +3062,8 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(
 
         assert(packet_ID == sent.start.packet_ID);
 
-        // Finding out whether the packet-latency is on the list of messages to be processed
-        bool const in_events_to_process = notification_events_map.count(packet_ID) == 1;
-        if (in_events_to_process) {
-            feed_packet_to_predictor(s, lp, packet_ID, notification_events_map[sent.start.packet_ID]);
-
         // The packet has not been delievered. Send directly to destination and notify of zombie event
-        } else if (freeze_network_on_switch) {
+        if (freeze_network_on_switch) {
             struct packet_end predicted_end = 
                 terminal_predictor->predict(s->predictor_data, lp, s->terminal_id, &sent.start);
 
diff --git a/src/surrogate/network-surrogate.c b/src/surrogate/network-surrogate.c
index a8bc671c..8f12ec33 100644
--- a/src/surrogate/network-surrogate.c
+++ b/src/surrogate/network-surrogate.c
@@ -69,8 +69,18 @@ static void shift_events_to_future_pe(tw_pe * pe) {
         char const * lp_type_name;
         int rep_id, offset; // unused
         codes_mapping_get_lp_info2(next_event->dest_lpid, NULL, &lp_type_name, NULL, &rep_id, &offset);
+        bool const is_lp_modelnet = strncmp("modelnet_", lp_type_name, 9) == 0;
         struct lp_types_switch const * const lp_type_switch = get_type_switch(lp_type_name);
 
+        // "Processing" event
+        if (lp_type_switch && lp_type_switch->check_event_in_queue) {
+            if (is_lp_modelnet) {
+                model_net_method_call_inner(next_event->dest_lp, (void (*) (void *, tw_lp *, void *))lp_type_switch->check_event_in_queue, next_event);
+            } else {
+                lp_type_switch->check_event_in_queue(next_event->dest_lp->cur_state, next_event->dest_lp, next_event);
+            }
+        }
+
         // shifting time stamps to the future for events to freeze
         bool deleted = false;
         if (lp_type_switch && lp_type_switch->should_event_be_frozen
@@ -126,71 +136,6 @@ static void shift_events_to_future_pe(tw_pe * pe) {
 }
 
 
-// Returns an array of size `g_tw_nlp`, where each element is a null-terminated
-// array containing all the events that each LP has for processing
-static tw_event *** order_events_per_lps(tw_pe * pe) {
-    // 0. Create array for linked list of size g_tw_nlp to store events per lp
-    tw_event ** lp_queue_events = (tw_event **) calloc(g_tw_nlp, sizeof(tw_event *));
-    // 0b. Create simple array (size g_tw_lp) to store number of events per lp
-    size_t * num_lp_queue_events = (size_t *) calloc(g_tw_nlp, sizeof(size_t));
-
-    // 1. loop extracting events from queue
-    //   a. check from which local lp does the event belong
-    //   b. add event to reversed linked-list of given lp and increase lp counter
-    tw_event * next_event = tw_pq_dequeue(pe->pq);
-    size_t events_dequeued = 0;
-    while (next_event) {
-        // Filtering events to freeze
-        assert(next_event->prev == NULL);
-
-        // finding out lp type
-        assert(tw_getlocal_lp(next_event->dest_lpid) == next_event->dest_lp);
-        tw_lpid const lpid = next_event->dest_lp->id;
-
-        // store event in lp_queue_events
-        next_event->prev = lp_queue_events[lpid];
-        lp_queue_events[lpid] = next_event;
-        num_lp_queue_events[lpid]++;
-        events_dequeued++;
-
-        next_event = tw_pq_dequeue(pe->pq);
-    }
-
-    // 2. create array (triple pointer type, **) of size `g_tw_nlp + total events`
-    //    to store events per lp, null-terminated
-    tw_event *** lps_events = (tw_event ** *) calloc(g_tw_nlp, sizeof(tw_event **));
-    tw_event ** all_events_mem = (tw_event * *) calloc(g_tw_nlp + events_dequeued, sizeof(tw_event *));
-
-    // 3. loop through each linked-list insert each event back into the
-    //   queue and store address copy into lp array
-    size_t event_i = 0;
-    for (size_t lpid = 0; lpid < g_tw_nlp; lpid++) {
-        lps_events[lpid] = &all_events_mem[event_i];
-
-        tw_event * dequed_events = lp_queue_events[lpid];
-        while (dequed_events) {
-            // event address copy
-            all_events_mem[event_i] = dequed_events;
-
-            // placing back into queue
-            tw_event * const prev_event = dequed_events;
-            dequed_events = dequed_events->prev;
-            prev_event->prev = NULL;
-            tw_pq_enqueue(pe->pq, prev_event);
-
-            event_i++;
-        }
-        event_i++;
-    }
-    assert(event_i == g_tw_nlp + events_dequeued);
-
-    assert(g_tw_nlp > 0 && lps_events[0] == all_events_mem);
-    free(lp_queue_events);
-    free(num_lp_queue_events);
-    return lps_events;
-}
-
-
 // Switching from a (vanilla) high-def simulation to surrogate mode
 // consists of:
 // - Cancel all events that have to be cancelled and clean everything
@@ -210,7 +155,6 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) {
     printf("PE %lu - AVL size %d (before shifting events)\n", g_tw_mynode, pe->avl_tree_size);
     shift_events_to_future_pe(pe);
     printf("PE %lu - AVL size %d (after shifting events to future)\n", g_tw_mynode, pe->avl_tree_size);
-    tw_event *** lps_events = order_events_per_lps(pe);
 
     // Going through all LPs in PE and running their specific functions
     for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {
@@ -241,11 +185,11 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) {
                 assert(is_lp_modelnet);
                 model_net_method_switch_to_surrogate_lp(lp);
             }
-            if (lp_type_switch->surrogate_to_highdef) {
+            if (lp_type_switch->highdef_to_surrogate) {
                 if (is_lp_modelnet) {
-                    model_net_method_call_inner(lp, lp_type_switch->highdef_to_surrogate, lps_events[local_lpid]);
+                    model_net_method_call_inner(lp, (void (*) (void *, tw_lp *, void *))lp_type_switch->highdef_to_surrogate, NULL);
                 } else {
-                    lp_type_switch->highdef_to_surrogate(lp->cur_state, lp, lps_events[local_lpid]);
+                    lp_type_switch->highdef_to_surrogate(lp->cur_state, lp, NULL);
                 }
             }
         }
@@ -256,9 +200,6 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) {
         tw_scheduler_rollback_and_cancel_events_pe(pe);
     }
 
-    assert(lps_events[0] != NULL);
-    free(lps_events[0]);
-    free(lps_events);
 }
 
 
@@ -302,7 +243,7 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) {
             }
             if (lp_type_switch->surrogate_to_highdef) {
                 if (is_lp_modelnet) {
-                    model_net_method_call_inner(lp, lp_type_switch->surrogate_to_highdef, NULL);
+                    model_net_method_call_inner(lp, (void (*) (void *, tw_lp *, void *))lp_type_switch->surrogate_to_highdef, NULL);
                 } else {
                     lp_type_switch->surrogate_to_highdef(lp->cur_state, lp, NULL);
                 }

From 7db95be1c612948f517ff1ade84660ee7937cad0 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 11 Jun 2025 13:23:08 -0400
Subject: [PATCH 150/188] Refactor network director to use separate queue for
 frozen events instead of timestamp manipulation

---
 codes/surrogate/network-surrogate.h |   3 +
 src/surrogate/network-surrogate.c   | 135 +++++++++++++++++++---------
 2 files changed, 98 insertions(+), 40 deletions(-)

diff --git a/codes/surrogate/network-surrogate.h b/codes/surrogate/network-surrogate.h
index 9a9b2680..2f86ac21 100644
--- a/codes/surrogate/network-surrogate.h
+++ b/codes/surrogate/network-surrogate.h
@@ -65,6 +65,9 @@ extern struct switch_at_struct switch_network_at;
 // Main function responsible for switching between high-fidelity and (network) surrogate
 void network_director(tw_pe * pe);
 
+// Function for application director to use network freezing machinery
+void surrogate_switch_network_model(tw_pe * pe);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/surrogate/network-surrogate.c b/src/surrogate/network-surrogate.c
index 8f12ec33..13fccb37 100644
--- a/src/surrogate/network-surrogate.c
+++ b/src/surrogate/network-surrogate.c
@@ -8,6 +8,10 @@ double surrogate_switching_time = 0.0;
 double time_in_surrogate = 0.0;
 static double surrogate_time_last = 0.0;
 
+// === Frozen events system for separate queue approach
+static tw_event *frozen_events_head = NULL;  // Head of frozen events linked list
+static double frozen_events_switch_time = 0.0;  // Time when we switched to surrogate mode
+
 // === Director functionality
 //
 
@@ -22,13 +26,17 @@ static struct lp_types_switch const * get_type_switch(char const * const name) {
 }
 
 
-static void shift_events_to_future_pe(tw_pe * pe) {
+static void freeze_events_to_separate_queue_pe(tw_pe * pe) {
 #ifdef USE_RAND_TIEBREAKER
     tw_event_sig gvt_sig = pe->GVT_sig;
     tw_stime gvt = gvt_sig.recv_ts;
 #else
     tw_stime gvt = pe->GVT;
 #endif
+
+    // Store the time when we switch to surrogate mode
+    frozen_events_switch_time = gvt;
+
     tw_event * next_event = tw_pq_dequeue(pe->pq);
 
     // If there aren't any events left to process, then this PE has nothing to do
@@ -36,24 +44,16 @@ static void shift_events_to_future_pe(tw_pe * pe) {
         return;
     }
 
-    // We have to put the events back into the queue after we switch back, but if we never
-    // switch back they will never get to be processed and thus we can clean them
-    double switch_offset = g_tw_ts_end;
-    if (switch_network_at.current_i < switch_network_at.total) {
-        double const next_switch = switch_network_at.time_stampts[switch_network_at.current_i + 1];
-        double const pre_switch_time = gvt;
-        switch_offset = next_switch - pre_switch_time;
-        assert(pre_switch_time < next_switch);
-        //printf("gvt=%f next_switch=%f switch_offset=%f\n", pre_switch_time, next_switch, switch_offset);
-    }
-    assert(0 <= switch_network_at.current_i && switch_network_at.current_i < switch_network_at.total);
-    double const current_switch_time = switch_network_at.time_stampts[switch_network_at.current_i];
-    assert(current_switch_time <= gvt);
+    tw_event * dequed_events = NULL; // Linked list of non-frozen events, to be placed back in the queue
+    int events_processed = 0; // Total events processed from queue
+    int events_enqueued = 0;  // Events put back in queue
+    int events_frozen = 0;    // Events moved to frozen queue
+    int events_deleted = 0;   // Events deleted
 
-    tw_event * dequed_events = NULL; // Linked list of workload events, to be placed again in the queue
-    int events_dequeued = 0;  // for stats on code correctness
     // Traversing all events stored in the queue
     while (next_event) {
+        events_processed++;
+
         // Filtering events to freeze
         assert(next_event->prev == NULL);
 #ifdef USE_RAND_TIEBREAKER
@@ -81,38 +81,35 @@ static void shift_events_to_future_pe(tw_pe * pe) {
             }
         }
 
-        // shifting time stamps to the future for events to freeze
         bool deleted = false;
+        bool frozen = false;
+
+        // Check if event should be frozen (moved to separate queue)
         if (lp_type_switch && lp_type_switch->should_event_be_frozen
                 && lp_type_switch->should_event_be_frozen(next_event->dest_lp, next_event)) {
-#ifdef USE_RAND_TIEBREAKER
-            assert(next_event->recv_ts == next_event->sig.recv_ts);
-            next_event->recv_ts += switch_offset;
-            next_event->sig.recv_ts = next_event->recv_ts;
-#else
-            next_event->recv_ts += switch_offset;
-#endif
-            assert(next_event->recv_ts >= current_switch_time);
+            // Add to frozen events linked list (no timestamp manipulation here)
+            next_event->prev = frozen_events_head;
+            frozen_events_head = next_event;
+            frozen = true;
+            events_frozen++;
         // deleting event if we need to
         } else if (lp_type_switch && lp_type_switch->should_event_be_deleted
                 && lp_type_switch->should_event_be_deleted(next_event->dest_lp, next_event)) {
             tw_event_free(pe, next_event);
             deleted = true;
+            events_deleted++;
         }
 
-        // store event in deque_events to inject immediately back to the queue
-        if (!deleted) {
+        // store event in dequed_events to inject immediately back to the queue
+        if (!deleted && !frozen) {
              next_event->prev = dequed_events;
              dequed_events = next_event;
-             events_dequeued++;
-             assert(next_event->recv_ts >= current_switch_time);
         }
 
         next_event = tw_pq_dequeue(pe->pq);
     }
 
-    int events_enqueued = 0;
-    // Reinjecting events into simulation
+    // Reinjecting non-frozen events into simulation
     while (dequed_events) {
         tw_event * const prev_event = dequed_events;
         dequed_events = dequed_events->prev;
@@ -126,13 +123,60 @@ static void shift_events_to_future_pe(tw_pe * pe) {
         events_enqueued++;
     }
 
-    if (DEBUG_DIRECTOR > 0 && events_dequeued != events_enqueued) {
-        printf("PE %lu: Discrepancy on number of events processed %d (%d dequeued and %d enqueued)\n",
-                g_tw_mynode, events_dequeued - events_enqueued, events_dequeued, events_enqueued);
+    if (DEBUG_DIRECTOR > 0) {
+        printf("PE %lu: Processed %d events (%d enqueued, %d frozen, %d deleted)\n",
+                g_tw_mynode, events_processed, events_enqueued, events_frozen, events_deleted);
     }
 
-    // shifting time stamps of events in causality list (one list per KP)
-    // offset_future_events_in_causality_list(switch_offset, gvt);
+    // Sanity check: processed = enqueued + frozen + deleted
+    assert(events_processed == events_enqueued + events_frozen + events_deleted);
+}
+
+static void unfreeze_events_from_separate_queue_pe(tw_pe * pe) {
+#ifdef USE_RAND_TIEBREAKER
+    tw_stime current_gvt = pe->GVT_sig.recv_ts;
+#else
+    tw_stime current_gvt = pe->GVT;
+#endif
+
+    // Calculate offset to adjust timestamps: current_gvt - switch_time
+    double time_offset = current_gvt - frozen_events_switch_time;
+
+    int events_restored = 0;
+
+    // Traverse the frozen events linked list and restore them to the main queue
+    while (frozen_events_head) {
+        tw_event * event_to_restore = frozen_events_head;
+        frozen_events_head = frozen_events_head->prev;
+        event_to_restore->prev = NULL;
+
+        // Adjust timestamp: original_time + time_spent_in_surrogate
+#ifdef USE_RAND_TIEBREAKER
+        assert(event_to_restore->recv_ts == event_to_restore->sig.recv_ts);
+        event_to_restore->recv_ts += time_offset;
+        event_to_restore->sig.recv_ts = event_to_restore->recv_ts;
+#else
+        event_to_restore->recv_ts += time_offset;
+#endif
+
+        // Re-enqueue the event
+        tw_pq_enqueue(pe->pq, event_to_restore);
+
+        // Re-add to hash table if it was a remote event
+        if (event_to_restore->event_id && event_to_restore->state.remote) {
+            tw_hash_insert(pe->hash_t, event_to_restore, event_to_restore->send_pe);
+        }
+
+        events_restored++;
+    }
+
+    if (DEBUG_DIRECTOR > 0 && events_restored > 0) {
+        printf("PE %lu: Restored %d frozen events with time offset %.6f\n",
+                g_tw_mynode, events_restored, time_offset);
+    }
+
+    // Reset frozen events state
+    frozen_events_switch_time = 0.0;
 }
 
 
@@ -152,9 +196,9 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) {
         tw_error(TW_LOC, "Sorry, sending packets to the future hasn't been implement in this mode");
     }
 
-    printf("PE %lu - AVL size %d (before shifting events)\n", g_tw_mynode, pe->avl_tree_size);
-    shift_events_to_future_pe(pe);
-    printf("PE %lu - AVL size %d (after shifting events to future)\n", g_tw_mynode, pe->avl_tree_size);
+    printf("PE %lu - AVL size %d (before freezing events)\n", g_tw_mynode, pe->avl_tree_size);
+    freeze_events_to_separate_queue_pe(pe);
+    printf("PE %lu - AVL size %d (after freezing events to separate queue)\n", g_tw_mynode, pe->avl_tree_size);
 
     // Going through all LPs in PE and running their specific functions
     for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {
@@ -210,6 +254,11 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) {
     tw_stime gvt = pe->GVT;
 #endif
 
+    // Restore frozen events back to the main queue with timestamp adjustment
+    printf("PE %lu - AVL size %d (before injecting events into event queue again)\n", g_tw_mynode, pe->avl_tree_size);
+    unfreeze_events_from_separate_queue_pe(pe);
+    printf("PE %lu - AVL size %d (after defreezing events from separate queue)\n", g_tw_mynode, pe->avl_tree_size);
+
     // Going through all LPs in PE and running their specific functions
     for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {
         tw_lp * const lp = g_tw_lp[local_lpid];
@@ -259,7 +308,7 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) {
 }
 
 
-void switch_model(tw_pe * pe) {
+static void switch_model(tw_pe * pe) {
     // Rollback if in optimistic mode
     if (g_tw_synchronization_protocol == OPTIMISTIC) {
         tw_scheduler_rollback_and_cancel_events_pe(pe);
@@ -359,6 +408,12 @@ void network_director(tw_pe * pe) {
         }
     }
 }
+
+// === Function for application director to use network freezing machinery
+void surrogate_switch_network_model(tw_pe * pe) {
+    // Simply expose the existing switch_model function for use by application director
+    switch_model(pe);
+}
 //
 // === END OF Director functionality
 // vim: set tabstop=4 shiftwidth=4 expandtab :

From c16965fc4591ceb74e614bdf4529dcaee68e9958 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 11 Jun 2025 17:07:28 -0400
Subject: [PATCH 151/188] Network surrogate should be enabled through a custom
 parameter

---
 src/networks/model-net/dragonfly-dally.C | 12 ++++++++----
 src/surrogate/application-surrogate.c    | 17 ++++++++++++++++-
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 3e2ce29d..9f5a91c4 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -2434,11 +2434,15 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
     }
 
     // START Surrogate configuration
-    char director_mode[MAX_NAME_LENGTH];
-    director_mode[0] = '\0';
-    int director_mode_len = configuration_get_value(&config, "NETWORK_SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH);
+    char enable_str[MAX_NAME_LENGTH];
+    enable_str[0] = '\0';
+    int const rc_enable = configuration_get_value(&config, "NETWORK_SURROGATE", "enable", anno, enable_str, MAX_NAME_LENGTH);
+    bool enable_network_surrogate = false;
+    if (rc_enable > 0) {
+        enable_network_surrogate = (strcmp(enable_str, "1") == 0 || strcmp(enable_str, "true") == 0);
+    }
     // if surrogate mode has been set up
-    if (director_mode_len > 0) {
+    if (enable_network_surrogate) {
         struct network_surrogate_config surr_conf = {
             .director = {.switch_surrogate = switch_surrogate, .is_surrogate_on = is_surrogate_on_fun},
             .total_terminals = p->total_terminals,
diff --git a/src/surrogate/application-surrogate.c b/src/surrogate/application-surrogate.c
index 48677cfd..a95e5e19 100644
--- a/src/surrogate/application-surrogate.c
+++ b/src/surrogate/application-surrogate.c
@@ -15,7 +15,7 @@ static enum {
 #define gvt_for(pe) (pe->GVT)
 #endif
 
-#define master_printf(str, ...) if (g_tw_mynode == 0) { printf(str, __VA_ARGS__); }
+#define master_printf(...) if (g_tw_mynode == 0) { printf(__VA_ARGS__); }
 
 static void application_director_pre_switch(tw_pe * pe) {
     // Scheduling next GVT hook call if it is not scheduled every tw_trigger_gvt_hook_every
@@ -32,6 +32,14 @@ static void application_director_pre_switch(tw_pe * pe) {
         case FAST_FORWARD_switching:
             tw_trigger_gvt_hook_at(restarting_at + 1); // + 1 to force director to run right after we have fully fast-forward
             master_printf("Triggering switch to application iteration surrogate mode at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe));
+
+            // TODO: Fix network surrogate (it's buggy) and enable this code
+            // Freeze network events if configured
+            //if (freeze_network_on_app_switch) {
+            //    master_printf("Freezing network events for application surrogate mode\n");
+            //    surrogate_switch_network_model(pe);
+            //}
+
             director_state = POST_JUMP_switched;
         break;
 
@@ -54,6 +62,13 @@ static void application_director_post_switch(tw_pe * pe) {
 
     if (director_state == POST_JUMP_switched) {
         master_printf("Back to full high-fidelity application iteration mode at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe));
+
+        // Unfreeze network events if they were frozen
+        //if (freeze_network_on_app_switch) {
+        //    master_printf("Unfreezing network events after application surrogate mode\n");
+        //    surrogate_switch_network_model(pe);
+        //    // TODO: reset network predictors
+        //}
     } else {
         master_printf("Resetting predictor at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe));
     }

From 9835040e66ad720cb5882e260041387fce69c100 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 12 Jun 2025 17:03:52 -0400
Subject: [PATCH 152/188] Bug fix - tw_now has been moved out of commit time

Silly (naive and common) bug has been squashed. This bug is very common
and very easy to make when doing anything in ROSS.
---
 codes/net/dragonfly-dally.h              | 2 ++
 src/networks/model-net/dragonfly-dally.C | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h
index 504446b0..929f6952 100644
--- a/codes/net/dragonfly-dally.h
+++ b/codes/net/dragonfly-dally.h
@@ -134,6 +134,8 @@ struct terminal_dally_message
 
    //Xin: for busy time recording
    tw_stime last_bufupdate_time;
+
+   tw_stime saved_processing_time;
 };
 
 void print_terminal_dally_message(FILE * out, char const * prefix, void * s, struct terminal_dally_message * msg);
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 9f5a91c4..cc0a4c85 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -3333,7 +3333,7 @@ static void terminal_commit_packet_generate(terminal_state * s, tw_bf * bf, term
     sent.start.packet_ID = msg->packet_ID;
     sent.start.dest_terminal_lpid = msg->dest_terminal_lpid;
     sent.start.dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id;
-    sent.start.travel_start_time = tw_now(lp);
+    sent.start.travel_start_time = msg->saved_processing_time;
     sent.start.workload_injection_time = msg->msg_start_time;
     sent.start.processing_packet_delay = processing_packet_delay;
     sent.start.packet_size = msg->packet_size;
@@ -3458,6 +3458,8 @@ static void terminal_dally_commit(terminal_state * s,
                 uint64_t packet_ID = msg->packet_ID;
 
                 if (s->sent_packets.count(packet_ID) == 1) { // packet_ID is in s->sent_packets
+                    auto sent = s->sent_packets[packet_ID];
+                    assert(msg->travel_end_time > sent.start.travel_start_time);
                     if (packet_ID == s->last_packet_sent_id) { // packet_ID is last, we cannot compute the next_packet_delay
                         assert(s->arrival_of_last_packet.packet_ID == -1);
                         s->arrival_of_last_packet.packet_ID = packet_ID;
@@ -4212,6 +4214,7 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
 
     s->packet_gen++;
     s->total_gen_size += msg->packet_size;
+    msg->saved_processing_time = tw_now(lp);
 
     tw_stime ts, injection_ts, nic_ts;
 

From bfdfba969209d06eababe4d7c54a3d0337b68aab Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 12 Jun 2025 17:08:10 -0400
Subject: [PATCH 153/188] Hooking network surrogate to application surrogate

---
 codes/surrogate/application-surrogate.h |  1 +
 src/surrogate/application-surrogate.c   | 25 +++++++++++++++----------
 src/surrogate/init.c                    |  7 ++++++-
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/codes/surrogate/application-surrogate.h b/codes/surrogate/application-surrogate.h
index 111df4ef..cfda05bf 100644
--- a/codes/surrogate/application-surrogate.h
+++ b/codes/surrogate/application-surrogate.h
@@ -28,6 +28,7 @@ struct application_director_config {
         // To use when APP_DIRECTOR_OPTS_call_every_ns
         double call_every_ns;
     };
+    bool use_network_surrogate;
 };
 
 // Main function responsible for switching between high-fidelity and (application iteration) surrogate
diff --git a/src/surrogate/application-surrogate.c b/src/surrogate/application-surrogate.c
index a95e5e19..4e848e23 100644
--- a/src/surrogate/application-surrogate.c
+++ b/src/surrogate/application-surrogate.c
@@ -1,8 +1,13 @@
 #include "surrogate/application-surrogate.h"
 #include <ross-extern.h>
+#include "surrogate/network-surrogate.h"
 
 static struct app_iteration_predictor * iter_predictor;
-static struct application_director_config conf = {.option = APP_DIRECTOR_OPTS_call_every_ns, .every_n_gvt = 1000000};
+static struct application_director_config conf = {
+    .option = APP_DIRECTOR_OPTS_call_every_ns,
+    .every_n_gvt = 1000000,
+    .use_network_surrogate = false
+};
 static enum {
     PRE_JUMP = 0,
     POST_JUMP_switched,  // Switched to surrogate-mode
@@ -35,10 +40,10 @@ static void application_director_pre_switch(tw_pe * pe) {
 
             // TODO: Fix network surrogate (it's buggy) and enable this code
             // Freeze network events if configured
-            //if (freeze_network_on_app_switch) {
-            //    master_printf("Freezing network events for application surrogate mode\n");
-            //    surrogate_switch_network_model(pe);
-            //}
+            if (conf.use_network_surrogate) {
+                master_printf("Switching on network surrogate\n");
+                surrogate_switch_network_model(pe);
+            }
 
             director_state = POST_JUMP_switched;
         break;
@@ -64,11 +69,11 @@ static void application_director_post_switch(tw_pe * pe) {
         master_printf("Back to full high-fidelity application iteration mode at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe));
 
         // Unfreeze network events if they were frozen
-        //if (freeze_network_on_app_switch) {
-        //    master_printf("Unfreezing network events after application surrogate mode\n");
-        //    surrogate_switch_network_model(pe);
-        //    // TODO: reset network predictors
-        //}
+        if (conf.use_network_surrogate) {
+            master_printf("Switching off network surrogate\n");
+            surrogate_switch_network_model(pe);
+            // TODO: reset network predictors and ask not to gather any data for 1 ms
+        }
     } else {
         master_printf("Resetting predictor at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe));
     }
diff --git a/src/surrogate/init.c b/src/surrogate/init.c
index c6a0a6aa..67ab25ee 100644
--- a/src/surrogate/init.c
+++ b/src/surrogate/init.c
@@ -232,13 +232,15 @@ static struct application_director_config load_director_config(void) {
             break;
     }
 
+    config.use_network_surrogate = is_network_surrogate_configured;
+
     return config;
 }
 
 void application_surrogate_configure(
     int num_terminals_in_pe,
     int num_apps,
-    struct app_iteration_predictor ** iter_pred //!< pointer to save application iteration predictor. Caller must free it
+    struct app_iteration_predictor ** iter_pred
 ) {
     char num_iters_str[MAX_NAME_LENGTH];
     num_iters_str[0] = '\0';
@@ -267,6 +269,9 @@ void application_surrogate_configure(
     } else {
         master_printf("  Director - mode: every-n-nanoseconds, call_every_ns: %e\n", app_dir_config.call_every_ns);
     }
+    if (is_network_surrogate_configured) {
+        master_printf("  The network director has been replaced by the application director. The application director will trigger the network surrogate on and off.\n");
+    }
     master_printf("\n");
 }
 

From 8e9521bc8961e1eeb58cce57137dd685ddaae4ca Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 12 Jun 2025 17:10:29 -0400
Subject: [PATCH 154/188] Wrap dummy event logic with compile-time flag for
 simulation reproducibility and determinism

---
 src/networks/model-net/dragonfly-dally.C | 27 +++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index cc0a4c85..c4bc8331 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -76,6 +76,9 @@
 #define LP_CONFIG_NM_ROUT (model_net_lp_config_names[DRAGONFLY_DALLY_ROUTER])
 #define LP_METHOD_NM_ROUT (model_net_method_names[DRAGONFLY_DALLY_ROUTER])
 
+// If we have configured the network surrogate, then we will collect packet delay data, which is done via the scheduling of an event. This additional event will shift the random generator and thus the same model will behave differently from the start when compared with the one where the surrogate is not setup. If one wants to test both scenarios (with and without the surrogate) and maintain determinism in high-fidelity, one has to enable this option
+#define ALWAYS_DETERMINISTIC_NETWORK 0
+
 /* handles terminal and router events like packet generate/send/receive/buffer */
 typedef struct terminal_state terminal_state;
 typedef struct router_state router_state;
@@ -368,7 +371,9 @@ enum event_t
     R_SNAPSHOT, //used for timed statistic outputs
     T_NOTIFY,  // used to notify a source or destination terminal about packets status (useful for informing about latency, zombie packet or delete a zombie packet)
     T_ARRIVE_PREDICTED,  // this event is generated by a latency predictor instead of traversing the network
+#if ALWAYS_DETERMINISTIC_NETWORK
     T_VACUOUS_EVENT, // nothing happens with this event, it's just ment to be a dummy event that allows us to keep the number of events produced in a simulation the same regardless of whether packet latency is activated (can be safely removed)
+#endif /* ALWAYS_DETERMINISTIC_NETWORK */
 };
 
 // Types of notifications between terminals
@@ -3472,8 +3477,10 @@ static void terminal_dally_commit(terminal_state * s,
             }
         break;
 
+#if ALWAYS_DETERMINISTIC_NETWORK
         case T_VACUOUS_EVENT:
         break;
+#endif /* ALWAYS_DETERMINISTIC_NETWORK */
 
         default:
             printf("\n LP %d Terminal message type not supported %d ", (int)lp->gid, msg->type);
@@ -4904,6 +4911,7 @@ static void process_terminal_notification_event(terminal_state * s, tw_bf * bf,
     }
 }
 
+#if ALWAYS_DETERMINISTIC_NETWORK
 // This function triggers an event that is completely ignored when processed later. The number of events produced by a terminal/router DOES alter the simulation results. (The number of events processed by an LP shouldn't be a parameter to the simulation itself, but it is weirdly).
 static void vacuous_msg_to_itself(terminal_state * s, terminal_dally_message * msg, tw_lp * lp)
 {
@@ -4917,6 +4925,7 @@ static void vacuous_msg_to_itself(terminal_state * s, terminal_dally_message * m
     new_msg->magic = terminal_magic_num;
     tw_event_send(e); 
 }
+#endif /* ALWAYS_DETERMINISTIC_NETWORK */
 
 //used by packet_arrive()
 static void send_remote_event(terminal_state * s, terminal_dally_message * msg, tw_lp * lp, tw_bf * bf, char * event_data, int remote_event_size)
@@ -5495,12 +5504,14 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
         //printf("Good day sir, not a zombie! LPID=%d  packet_ID = %d  dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id);
         if (packet_latency_f || dally_surrogate_configured) {
             notify_src_lp_on_total_latency(lp, msg);
-        //} else {
-        //    // This vacuous msg is necessary just to keep simulations with and without the latency notification the same. Notifying the latency does not impact
-        //    // the simulation (unless the data is fed to a predictor, later to be used). If the latency notification is deactivated, the simulation will produce
-        //    // the same number of events (a bit wasteful), a parameter that model-net or dragonfly-dally for some reason use :S
-        //    vacuous_msg_to_itself(s, msg, lp);
+#if ALWAYS_DETERMINISTIC_NETWORK
+        } else {
+            // This vacuous msg is necessary just to keep simulations with and without the latency notification the same. Notifying the latency does not impact
+            // the simulation (unless the data is fed to a predictor, later to be used). If the latency notification is deactivated, the simulation will produce
+            // the same number of events (a bit wasteful), a parameter that model-net or dragonfly-dally for some reason use :S
+            vacuous_msg_to_itself(s, msg, lp);
         }
+#endif /* if ALWAYS_DETERMINISTIC_NETWORK */
     }
 
     // if the message is complete (ie, this `msg` is the last piece of the message)
@@ -6893,8 +6904,10 @@ terminal_dally_event( terminal_state * s,
             process_terminal_notification_event(s, bf, msg, lp);
         break;
 
+#if ALWAYS_DETERMINISTIC_NETWORK
         case T_VACUOUS_EVENT:
         break;
+#endif /* ALWAYS_DETERMINISTIC_NETWORK */
 
         default:
             printf("\n LP %d Terminal message type not supported %d ", (int)lp->gid, msg->type);
@@ -7004,8 +7017,10 @@ static void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, term
             process_terminal_notification_event_rc(s, bf, msg, lp);
         break;
 
+#if ALWAYS_DETERMINISTIC_NETWORK
         case T_VACUOUS_EVENT:
         break;
+#endif /* ALWAYS_DETERMINISTIC_NETWORK */
 
         default:
             tw_error(TW_LOC, "\n Invalid terminal event type %d ", msg->type);
@@ -8120,7 +8135,9 @@ char const * const string_event_t(enum event_t type) {
         case R_SNAPSHOT:         return "R_SNAPSHOT";
         case T_NOTIFY:           return "T_NOTIFY";
         case T_ARRIVE_PREDICTED: return "T_ARRIVE_PREDICTED";
+#if ALWAYS_DETERMINISTIC_NETWORK
         case T_VACUOUS_EVENT:    return "T_VACUOUS_EVENT";
+#endif /* ALWAYS_DETERMINISTIC_NETWORK */
         default:                 return "UNKNOWN TYPE!!";
     }
 }

From 10edcecd1e9247488b9f4df0f5e1fdbdcc1b662e Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 12 Jun 2025 17:26:11 -0400
Subject: [PATCH 155/188] Modifying tests. They all pass now!

---
 doc/example/tutorial-ping-pong-surrogate.conf.in | 2 ++
 tests/CMakeLists.txt                             | 5 ++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/example/tutorial-ping-pong-surrogate.conf.in b/doc/example/tutorial-ping-pong-surrogate.conf.in
index 04d2c94f..fd53f4d1 100644
--- a/doc/example/tutorial-ping-pong-surrogate.conf.in
+++ b/doc/example/tutorial-ping-pong-surrogate.conf.in
@@ -59,6 +59,8 @@ PARAMS
    router_buffer_snapshots=( ${BUFFER_SNAPSHOTS} );
 }
 NETWORK_SURROGATE {
+   enable="1"; # Options: 0 or 1
+
 # determines the director switching from surrogate to high-def simulation strategy
    director_mode="at-fixed-virtual-times";
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 238d988f..886dcf59 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -91,9 +91,8 @@ set(test-shell-files
     example-ping-pong-surrogate-2.sh
     example-ping-pong-surrogate-3.sh
     example-ping-pong-no-logging.sh
-    # These are aspirational unit tests. The switching mechanism is not fully deterministic
-    #example-ping-pong-surrogate-determinism-1.sh # bug: not all processed events are commited before the switch happens, this might alter the behaviour of the predictor, thus the simulation diverges at switch (no longer deterministic)
-    #example-ping-pong-surrogate-determinism-2.sh # bug: incoming packets (`T_ARRIVE_PREDICTED` events scheduled by `dragonfly_dally_terminal_highdef_to_surrogate`) might tie some times, the tie is not resolved deterministically
+    example-ping-pong-surrogate-determinism-1.sh
+    example-ping-pong-surrogate-determinism-2.sh
     )
 
 foreach(testname ${test-shell-files})

From 9126863eecc6ced2bd63a24d4b46e591b232f2de Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 13 Jun 2025 09:25:22 -0400
Subject: [PATCH 156/188] Adding missing garbage collection and print statement

---
 src/networks/model-net/dragonfly-dally.C | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index c4bc8331..d9345c8f 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -5711,6 +5711,7 @@ static void dragonfly_dally_terminal_final( terminal_state * s,
         qhash_finalize(s->rank_tbl);
     
     rc_stack_destroy(s->st);
+    rc_stack_destroy(s->cc_st);
     //TODO FREE THESE CORRECTLY
     for(int i = 0; i < s->params->num_rails; i++)
     {
@@ -5778,6 +5779,7 @@ void dragonfly_dally_router_final(router_state * s, tw_lp * lp){
         fclose(dragonfly_rtr_bw_log);
 
     rc_stack_destroy(s->st);
+    rc_stack_destroy(s->cc_st);
     
     const dragonfly_param *p = s->params;
     int written = 0;
@@ -6868,6 +6870,7 @@ terminal_dally_event( terminal_state * s,
         }
     } else {
         rc_stack_gc(lp, s->st);
+        rc_stack_gc(lp, s->cc_st);
     }
     switch(msg->type)
         {
@@ -6924,6 +6927,7 @@ static void router_dally_event(router_state * s, tw_bf * bf, terminal_dally_mess
     s->fwd_events++;
     s->ross_rsample.fwd_events++;
     rc_stack_gc(lp, s->st);
+    rc_stack_gc(lp, s->cc_st);
 
     msg->last_received_time = s->last_time;
     s->last_time = tw_now(lp);
@@ -8270,6 +8274,7 @@ void print_terminal_dally_message(FILE * out, char const * prefix, void * s, str
     fprintf(out, "%s  |      saved_fin_chunks_ross = %g\n", prefix, msg->saved_fin_chunks_ross);
     fprintf(out, "%s  |   saved_last_in_queue_time = %g\n", prefix, msg->saved_last_in_queue_time);
     fprintf(out, "%s  |    saved_next_packet_delay = %g\n", prefix, msg->saved_next_packet_delay);
+    fprintf(out, "%s  |      saved_processing_time = %g\n", prefix, msg->saved_processing_time);
     fprintf(out, "%s  |           msg_new_mn_event = %g\n", prefix, msg->msg_new_mn_event);
     fprintf(out, "%s  |         last_received_time = %g\n", prefix, msg->last_received_time);
     fprintf(out, "%s  |             last_sent_time = %g\n", prefix, msg->last_sent_time);

From cd766b14813f487301f83c70cbbae40da00cec0b Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 13 Jun 2025 11:20:46 -0400
Subject: [PATCH 157/188] Refactoring some of the common values between
 surrogates

---
 codes/surrogate/application-surrogate.h       |  2 +
 codes/surrogate/init.h                        | 24 +++---
 codes/surrogate/network-surrogate.h           | 25 +++----
 doc/example/tutorial-synthetic-ping-pong.c    |  1 +
 src/network-workloads/model-net-mpi-replay.c  |  2 +-
 src/networks/model-net/dragonfly-dally.C      |  5 +-
 src/surrogate/application-surrogate.c         | 20 +++--
 src/surrogate/init.c                          | 73 ++++++++++++-------
 src/surrogate/network-surrogate.c             | 66 +++++++++++------
 .../packet-latency-predictor/average.c        | 15 ++--
 10 files changed, 136 insertions(+), 97 deletions(-)

diff --git a/codes/surrogate/application-surrogate.h b/codes/surrogate/application-surrogate.h
index cfda05bf..9fded3db 100644
--- a/codes/surrogate/application-surrogate.h
+++ b/codes/surrogate/application-surrogate.h
@@ -34,6 +34,8 @@ struct application_director_config {
 // Main function responsible for switching between high-fidelity and (application iteration) surrogate
 void application_director_configure(struct application_director_config *, struct app_iteration_predictor *);
 
+void application_director_finalize(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/codes/surrogate/init.h b/codes/surrogate/init.h
index 28e90a8d..f095e29c 100644
--- a/codes/surrogate/init.h
+++ b/codes/surrogate/init.h
@@ -11,9 +11,6 @@
 #include "codes/surrogate/app-iteration-predictor/common.h"
 #include "codes/surrogate/network-surrogate.h"
 
-// A simple macro to clarify code a bit
-#define PRINTF_ONCE(...) if (g_tw_mynode == 0) { fprintf(stderr, __VA_ARGS__); }
-
 // Basic level of debugging is 1. It should be always turned on
 // because it tells us when a switch to or from surrogate-mode happened.
 // It can be deactivated (set to 0) if it ends up being too obnoxious
@@ -31,31 +28,28 @@ extern "C" {
  * Variable definitions
  */
 
-void print_surrogate_stats(void);
+// Time spent switching from high-fidelity to surrogate and viceversa
+extern double surrogate_switching_time;
+// Total time spent in surrogate mode (between switches)
+extern double time_in_surrogate;
+// Time at which we transitioned into surrogate (zero means that we are in high-fidelity)
+extern double surrogate_time_last;
 
-struct network_surrogate_config {
-    struct director_data director;  //!< functionality needed by the director to switch back and forth from model-level surrogate-mode to (vanilla) high-definition simulation
-    int total_terminals;  //!< total number of terminals
-    size_t n_lp_types;
-    struct lp_types_switch lp_types[MAX_LP_TYPES];
-};
+void print_surrogate_stats(void);
 
 /** Loads surrogate configuration, including packet latency predictor. */
-void network_surrogate_configure(
+bool network_surrogate_configure(
         char const * const annotation,
         struct network_surrogate_config * const config,
         struct packet_latency_predictor ** pl_pred //!< pointer to save packet latency predictor. Caller does not need to free pointer
 );
 
-extern struct network_surrogate_config net_surr_config;
-extern bool is_network_surrogate_configured;
-
 void application_surrogate_configure(
     int num_terminals_on_pe,
     int num_apps,
     struct app_iteration_predictor ** iter_pred //!< pointer to save application iteration predictor. No need to free pointer
 );
-void free_application_surrogate(void);
+void surrogates_finalize(void);
 
 #ifdef __cplusplus
 }
diff --git a/codes/surrogate/network-surrogate.h b/codes/surrogate/network-surrogate.h
index 2f86ac21..a550464d 100644
--- a/codes/surrogate/network-surrogate.h
+++ b/codes/surrogate/network-surrogate.h
@@ -16,22 +16,11 @@
 extern "C" {
 #endif
 
-// Time spent switching from high-fidelity to surrogate and viceversa
-extern double surrogate_switching_time;
-// Total time spent in surrogate mode (between switches)
-extern double time_in_surrogate;
-
-// When true (below), the network state will be frozen at switch time (from
-// high-def to surrogate) and later reanimated on the switch back (from
-// surrogate to high-def). If not, all events will be kept in the network while
-// on surrogate mode, which means that the network will vacate completely
-extern bool freeze_network_on_switch;
-
 // Functions that director should have access to
 typedef void (*switch_surrogate_f) (void); // Switches back and forth from surrogate mode as defined by network model (e.g, by dragonfly-dally.C)
 typedef bool (*is_surrogate_on_f) (void); // Switches back and forth from surrogate mode as defined by network model (e.g, by dragonfly-dally.C)
 
-struct director_data {
+struct network_model_surrogate {
     switch_surrogate_f  switch_surrogate; // this function switches the model to and from surrogate-mode on a PE basis. It has to be called on all PEs to switch the entire simulation to its surrogate version
     is_surrogate_on_f   is_surrogate_on;  // determines if the model has switched or not
 };
@@ -60,14 +49,20 @@ struct switch_at_struct {
     double * time_stampts; // list of precise timestamps at which to switch
 };
 
-extern struct switch_at_struct switch_network_at;
+struct network_surrogate_config {
+    struct network_model_surrogate model;  //!< functionality needed by the director to switch the model back and forth from high-fidelity to surrogate
+    int total_terminals;  //!< total number of terminals
+    size_t n_lp_types;
+    struct lp_types_switch lp_types[MAX_LP_TYPES];
+};
 
-// Main function responsible for switching between high-fidelity and (network) surrogate
-void network_director(tw_pe * pe);
+void network_director_configure(struct network_surrogate_config *, struct switch_at_struct * switch_network_at, bool freeze_network_on_switch);
 
 // Function for application director to use network freezing machinery
 void surrogate_switch_network_model(tw_pe * pe);
 
+void network_director_finalize(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/doc/example/tutorial-synthetic-ping-pong.c b/doc/example/tutorial-synthetic-ping-pong.c
index 1aaf0528..e25ce7bc 100644
--- a/doc/example/tutorial-synthetic-ping-pong.c
+++ b/doc/example/tutorial-synthetic-ping-pong.c
@@ -356,6 +356,7 @@ int main(int argc, char **argv)
     model_net_report_stats(net_id);
 
     // Printing some stats
+    surrogates_finalize();
     print_surrogate_stats();
 
     tw_end();
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 0c3d9ed1..fd28775f 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -4357,8 +4357,8 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
    if(alloc_spec)
        codes_jobmap_destroy(jobmap_ctx);
 
+   surrogates_finalize();
    print_surrogate_stats();
-   free_application_surrogate();
 
 #ifdef USE_RDAMARIS
     } // end if(g_st_ross_rank)
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index d9345c8f..102a2de7 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -194,6 +194,7 @@ static void setup_packet_latency_path(char const * const dir_to_save);
 // 
 static bool dally_surrogate_configured = false;
 static bool is_dally_surrogate_on = false;
+static bool freeze_network_on_switch = false;
 static struct packet_latency_predictor * terminal_predictor = NULL;
 static void switch_surrogate(void);
 static bool is_surrogate_on_fun(void);
@@ -2449,7 +2450,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
     // if surrogate mode has been set up
     if (enable_network_surrogate) {
         struct network_surrogate_config surr_conf = {
-            .director = {.switch_surrogate = switch_surrogate, .is_surrogate_on = is_surrogate_on_fun},
+            .model = {.switch_surrogate = switch_surrogate, .is_surrogate_on = is_surrogate_on_fun},
             .total_terminals = p->total_terminals,
             .n_lp_types = 2,
             .lp_types = {
@@ -2472,7 +2473,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
                 0
             }
         };
-        network_surrogate_configure(anno, &surr_conf, &terminal_predictor);
+        freeze_network_on_switch = network_surrogate_configure(anno, &surr_conf, &terminal_predictor);
         if (terminal_predictor) {
             dally_surrogate_configured = true;
         } else {
diff --git a/src/surrogate/application-surrogate.c b/src/surrogate/application-surrogate.c
index 4e848e23..870794b6 100644
--- a/src/surrogate/application-surrogate.c
+++ b/src/surrogate/application-surrogate.c
@@ -1,6 +1,7 @@
 #include "surrogate/application-surrogate.h"
 #include <ross-extern.h>
 #include "surrogate/network-surrogate.h"
+#include "surrogate/init.h"
 
 static struct app_iteration_predictor * iter_predictor;
 static struct application_director_config conf = {
@@ -38,13 +39,12 @@ static void application_director_pre_switch(tw_pe * pe) {
             tw_trigger_gvt_hook_at(restarting_at + 1); // + 1 to force director to run right after we have fully fast-forward
             master_printf("Triggering switch to application iteration surrogate mode at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe));
 
-            // TODO: Fix network surrogate (it's buggy) and enable this code
-            // Freeze network events if configured
             if (conf.use_network_surrogate) {
-                master_printf("Switching on network surrogate\n");
+                master_printf("Switching network surrogate on\n");
                 surrogate_switch_network_model(pe);
             }
 
+            surrogate_time_last = tw_clock_read();
             director_state = POST_JUMP_switched;
         break;
 
@@ -63,24 +63,29 @@ static void application_director_post_switch(tw_pe * pe) {
         tw_trigger_gvt_hook_every(conf.every_n_gvt);
     }
 
+    double const start = tw_clock_read();
     iter_predictor->director.reset();
+    double const end = tw_clock_read();
+    surrogate_switching_time += end - start;
 
     if (director_state == POST_JUMP_switched) {
         master_printf("Back to full high-fidelity application iteration mode at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe));
 
-        // Unfreeze network events if they were frozen
         if (conf.use_network_surrogate) {
-            master_printf("Switching off network surrogate\n");
+            master_printf("Switching network surrogate off\n");
             surrogate_switch_network_model(pe);
             // TODO: reset network predictors and ask not to gather any data for 1 ms
         }
+
+        time_in_surrogate += start - surrogate_time_last;
+        surrogate_time_last = 0.0;
     } else {
         master_printf("Resetting predictor at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe));
     }
     director_state = PRE_JUMP;
 }
 
-void application_director(tw_pe * pe) {
+static void application_director(tw_pe * pe) {
     // Director is not called if the simulation has ended
     if (gvt_for(pe) >= g_tw_ts_end) {
         return;
@@ -107,3 +112,6 @@ void application_director_configure(struct application_director_config * conf_,
         tw_trigger_gvt_hook_at(conf.call_every_ns);
     }
 }
+
+void application_director_finalize(void) {
+}
diff --git a/src/surrogate/init.c b/src/surrogate/init.c
index 67ab25ee..2e93ed75 100644
--- a/src/surrogate/init.c
+++ b/src/surrogate/init.c
@@ -1,6 +1,7 @@
 #include <codes/surrogate/init.h>
 #include <codes/surrogate/packet-latency-predictor/average.h>
 #include <codes/surrogate/application-surrogate.h>
+#include <codes/surrogate/network-surrogate.h>
 #include <codes/surrogate/app-iteration-predictor/average.h>
 
 #ifdef USE_TORCH
@@ -9,16 +10,21 @@
 
 #define master_printf(...) if (g_tw_mynode == 0) { printf(__VA_ARGS__); }
 
-bool freeze_network_on_switch = true;
-struct network_surrogate_config net_surr_config = {0};
-bool is_network_surrogate_configured = false;
-struct switch_at_struct switch_network_at;
+// Shared times across network and application surrogates
+double surrogate_switching_time = 0.0;
+double time_in_surrogate = 0.0;
+double surrogate_time_last = 0.0;
+
+static bool network_director_enabled = false;
+static bool is_network_surrogate_configured = false;
+static bool is_app_surrogate_configured = false;
 static struct packet_latency_predictor current_net_predictor = {0};
 static struct app_iteration_predictor current_iter_predictor = {0};
 
 
 // === Stats!
 void print_surrogate_stats(void) {
+    // Computing the time in surrogate only makes sense if we can switch the whole simulation all at once (like the network simulation does), and it doesn't work with the application surrogate as this doesn't switch the state of the simulation all at once
     if(is_network_surrogate_configured && g_tw_mynode == 0) {
         printf("\nTotal time spent on surrogate-mode: %.4f\n", (double) time_in_surrogate / g_tw_clock_rate);
         printf("Total time spent on switching from and to surrogate-mode: %.4f\n", (double) surrogate_switching_time / g_tw_clock_rate);
@@ -28,7 +34,7 @@ void print_surrogate_stats(void) {
 
 
 // === All things Surrogate Configuration
-void network_surrogate_configure(
+bool network_surrogate_configure(
         char const * const anno,
         struct network_surrogate_config * const sc,
         struct packet_latency_predictor ** pl_pred
@@ -37,21 +43,21 @@ void network_surrogate_configure(
     assert(0 < sc->n_lp_types && sc->n_lp_types <= MAX_LP_TYPES);
     is_network_surrogate_configured = true;
 
-    // This is the only place where the director data should be loaded and set up
-    net_surr_config = *sc;
+    struct switch_at_struct switch_network_at;
 
     // Determining which director mode to set up
     char director_mode[MAX_NAME_LENGTH];
     director_mode[0] = '\0';
     configuration_get_value(&config, "NETWORK_SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH);
     if (strcmp(director_mode, "at-fixed-virtual-times") == 0) {
-        PRINTF_ONCE("\nNetwork surrogate activated switching at fixed virtual times: ");
+        master_printf("\nNetwork surrogate activated switching at fixed virtual times: ");
 
         // Loading timestamps
         char **timestamps;
         size_t len;
         configuration_get_multivalue(&config, "NETWORK_SURROGATE", "fixed_switch_timestamps", anno, &timestamps, &len);
 
+        network_director_enabled = true;
         switch_network_at.current_i = 0;
         switch_network_at.total = len;
         switch_network_at.time_stampts = malloc(len * sizeof(double));
@@ -63,20 +69,17 @@ void network_surrogate_configure(
                 tw_error(TW_LOC, "Sequence `%s' could not be succesfully interpreted as a _double_.", timestamps[i]);
             }
 
-            PRINTF_ONCE("%g%s", switch_network_at.time_stampts[i], i == len-1 ? "" : ", ");
+            master_printf("%g%s", switch_network_at.time_stampts[i], i == len-1 ? "" : ", ");
         }
-        PRINTF_ONCE("\n");
-
-        // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT
-        g_tw_gvt_hook = network_director;
-
-        tw_trigger_gvt_hook_at(switch_network_at.time_stampts[0]);
+        master_printf("\n");
 
         // freeing timestamps before it dissapears
         for (size_t i = 0; i < len; i++) {
             free(timestamps[i]);
         }
         free(timestamps);
+    } else if (strcmp(director_mode, "delegate-to-app-director") == 0) {
+        master_printf("\nNetwork surrogate enabled but director won't run. Network surrogate will be triggered by app director if present\n");
     } else {
         tw_error(TW_LOC, "Unknown director mode `%s`", director_mode);
     }
@@ -87,7 +90,7 @@ void network_surrogate_configure(
     configuration_get_value(&config, "NETWORK_SURROGATE", "packet_latency_predictor", anno, latency_pred_name, MAX_NAME_LENGTH);
     if (*latency_pred_name) {
         if (strcmp(latency_pred_name, "average") == 0) {
-            current_net_predictor = average_latency_predictor(net_surr_config.total_terminals);
+            current_net_predictor = average_latency_predictor(sc->total_terminals);
             *pl_pred = &current_net_predictor;
 
 #ifdef USE_TORCH
@@ -116,20 +119,21 @@ void network_surrogate_configure(
                     ")", latency_pred_name);
         }
     } else {
-        current_net_predictor = average_latency_predictor(net_surr_config.total_terminals);
+        current_net_predictor = average_latency_predictor(sc->total_terminals);
         *pl_pred = &current_net_predictor;
-        PRINTF_ONCE("Enabling average packet latency predictor (default behaviour)\n");
+        master_printf("Enabling average packet latency predictor (default behaviour)\n");
     }
 
     // Finding out whether to ignore some packet latencies
     int rc = configuration_get_value_double(&config, "NETWORK_SURROGATE", "ignore_until", anno, &ignore_until);
     if (rc) {
         ignore_until = -1; // any negative number disables ignore_until, all packet latencies will be considered
-        PRINTF_ONCE("`ignore_until` disabled (all packet latencies will be used in training the predictor)\n");
+        master_printf("`ignore_until` disabled (all packet latencies will be used in training the predictor)\n");
     } else {
-        PRINTF_ONCE("ignore_until=%g a packet delievered before this time stamp will not be used in training any predictor\n", ignore_until);
+        master_printf("ignore_until=%g a packet delievered before this time stamp will not be used in training any predictor\n", ignore_until);
     }
 
+    bool freeze_network_on_switch = true;
     // Determining which predictor to set up and return
     char network_treatment_name[MAX_NAME_LENGTH];
     network_treatment_name[0] = '\0';
@@ -137,22 +141,26 @@ void network_surrogate_configure(
     if (*network_treatment_name) {
         if (strcmp(network_treatment_name, "freeze") == 0) {
             freeze_network_on_switch = true;
-            PRINTF_ONCE("The network will be frozen on switch to surrogate\n");
+            master_printf("The network will be frozen on switch to surrogate\n");
         } else if (strcmp(network_treatment_name, "nothing") == 0) {
             freeze_network_on_switch = false;
-            PRINTF_ONCE("The network will be left alone on switch to surrogate (it will run on the background until it empties by itself)\n");
+            master_printf("The network will be left alone on switch to surrogate (it will run on the background until it empties by itself)\n");
         } else {
             tw_error(TW_LOC, "Unknown network treatment `%s` (possibilities include: frezee or nothing)", network_treatment_name);
         }
     } else {
         freeze_network_on_switch = true;
-        PRINTF_ONCE("The network will be frozen on switch to surrogate (default behaviour)\n");
+        master_printf("The network will be frozen on switch to surrogate (default behaviour)\n");
     }
 
+    network_director_configure(sc, network_director_enabled ? &switch_network_at: NULL, freeze_network_on_switch);
+
     //surr_config.director.switch_surrogate();
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
-        fprintf(stderr, "Simulation starting on %s mode\n", net_surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity");
+        fprintf(stderr, "Simulation starting on network %s mode\n", sc->model.is_surrogate_on() ? "surrogate" : "high-fidelity");
     }
+
+    return freeze_network_on_switch;
 }
 
 static int load_and_validate_int_param(const char* param_name, int default_value) {
@@ -258,6 +266,7 @@ void application_surrogate_configure(
     current_iter_predictor = avg_app_iteration_predictor(&predictor_config);
     application_director_configure(&app_dir_config, &current_iter_predictor);
     *iter_pred = &current_iter_predictor;
+    is_app_surrogate_configured = true;
 
     // Printing configuration summary
     master_printf("\nApplication surrogate configuration:\n");
@@ -269,13 +278,23 @@ void application_surrogate_configure(
     } else {
         master_printf("  Director - mode: every-n-nanoseconds, call_every_ns: %e\n", app_dir_config.call_every_ns);
     }
-    if (is_network_surrogate_configured) {
+    if (network_director_enabled) {
         master_printf("  The network director has been replaced by the application director. The application director will trigger the network surrogate on and off.\n");
     }
     master_printf("\n");
 }
 
-void free_application_surrogate(void) {
-    free_avg_app_iteration_predictor();
+void surrogates_finalize(void) {
+    // TODO (helq): check that we are in fact still in surrogate (either network or application)
+    if (surrogate_time_last > 0) { // we likely didn't transitioned back from surrogate mode
+        time_in_surrogate += tw_clock_read() - surrogate_time_last;
+    }
+    if (is_network_surrogate_configured) {
+        network_director_finalize();
+    }
+    if (is_app_surrogate_configured) {
+        application_director_finalize();
+        free_avg_app_iteration_predictor();
+    }
 }
 // === END OF All things Surrogate Configuration
diff --git a/src/surrogate/network-surrogate.c b/src/surrogate/network-surrogate.c
index 13fccb37..230c6ade 100644
--- a/src/surrogate/network-surrogate.c
+++ b/src/surrogate/network-surrogate.c
@@ -4,9 +4,11 @@
 #include <ross-extern.h>
 #include <stdio.h>
 
-double surrogate_switching_time = 0.0;
-double time_in_surrogate = 0.0;
-static double surrogate_time_last = 0.0;
+static bool is_network_surrogate_configured = false;
+static struct switch_at_struct switch_network_at = {0};
+static struct network_surrogate_config net_surr_config = {0};
+static bool freeze_network_on_switch = false;
+static bool network_director_enabled = false;
 
 // === Frozen events system for separate queue approach
 static tw_event *frozen_events_head = NULL;  // Head of frozen events linked list
@@ -313,14 +315,14 @@ static void switch_model(tw_pe * pe) {
     if (g_tw_synchronization_protocol == OPTIMISTIC) {
         tw_scheduler_rollback_and_cancel_events_pe(pe);
     }
-    net_surr_config.director.switch_surrogate();
+    net_surr_config.model.switch_surrogate();
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
-        printf("Switching to %s\n", net_surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity");
+        printf("Switching to network %s\n", net_surr_config.model.is_surrogate_on() ? "surrogate" : "high-fidelity");
     }
 
     // "Freezing" network events and activating LP's switch functions
     if (freeze_network_on_switch) {
-        if (net_surr_config.director.is_surrogate_on()) {
+        if (net_surr_config.model.is_surrogate_on()) {
             model_net_method_switch_to_surrogate();
             events_high_def_to_surrogate_switch(pe);
         } else {
@@ -333,6 +335,7 @@ static void switch_model(tw_pe * pe) {
 
 void network_director(tw_pe * pe) {
     assert(is_network_surrogate_configured);
+    assert(network_director_enabled);
 
 #ifdef USE_RAND_TIEBREAKER
     tw_stime gvt = pe->GVT_sig.recv_ts;
@@ -348,7 +351,7 @@ void network_director(tw_pe * pe) {
         }
         if (DEBUG_DIRECTOR == 3) {
             printf("GVT %d at %f in %s\n", i++, gvt,
-                    net_surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition");
+                    net_surr_config.model.is_surrogate_on() ? "surrogate-mode" : "high-definition");
         }
     }
 
@@ -361,22 +364,18 @@ void network_director(tw_pe * pe) {
 
     // Do not process if the simulation ended
     if (gvt >= g_tw_ts_end) {
-        // If the simulation ended and the surrogate is still on, stop timer checking surrogate time
-        if (net_surr_config.director.is_surrogate_on()) {
-            time_in_surrogate += tw_clock_read() - surrogate_time_last;
-        }
         return;
     }
 
     // ---- Past this means that we are in fact switching ----
-    bool const pre_switch_status = net_surr_config.director.is_surrogate_on();
+    bool const pre_switch_status = net_surr_config.model.is_surrogate_on();
 
     // Asking the director/model to switch
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
         if (DEBUG_DIRECTOR == 2) {
             printf("\n");
         }
-        printf("Switching at %f\n", gvt);
+        printf("Switching network at %f\n", gvt);
     }
 
     double const start = tw_clock_read();
@@ -391,28 +390,49 @@ void network_director(tw_pe * pe) {
     }
 
     if (DEBUG_DIRECTOR == 1 && g_tw_mynode == 0) {
-        printf("Switch completed!\n");
+        printf("Network switch completed!\n");
     }
     if (DEBUG_DIRECTOR > 1) {
         printf("PE %lu: Switch completed!\n", g_tw_mynode);
     }
 
     // Determining time in surrogate
-    if (pre_switch_status != net_surr_config.director.is_surrogate_on()) {
-        if (net_surr_config.director.is_surrogate_on()) {
-            // Start tracking time spent in surrogate mode
-            surrogate_time_last = end;
-        } else {
-            // We are done tracking time spent in surrogate mode
-            time_in_surrogate += start - surrogate_time_last;
-        }
+    if (net_surr_config.model.is_surrogate_on()) {
+        // Start tracking time spent in surrogate mode
+        surrogate_time_last = end;
+    } else {
+        // We are done tracking time spent in surrogate mode
+        time_in_surrogate += start - surrogate_time_last;
+        surrogate_time_last = 0.0;
+    }
+}
+
+void network_director_configure(struct network_surrogate_config * sc, struct switch_at_struct * switch_network_at_, bool fnos) {
+    is_network_surrogate_configured = true;
+    // Injecting into ROSS the function to be called at GVT
+    if (switch_network_at_) {
+        network_director_enabled = true;
+        g_tw_gvt_hook = network_director;
+        switch_network_at = *switch_network_at_;
+        tw_trigger_gvt_hook_at(switch_network_at.time_stampts[0]);
     }
+    net_surr_config = *sc;
+    freeze_network_on_switch = fnos;
 }
 
-// === Function for application director to use network freezing machinery
+void network_director_finalize(void) {
+    if (network_director_enabled) {
+        free(switch_network_at.time_stampts);
+    }
+}
+
+// === Function for application director to use switch to surrogate machinery
 void surrogate_switch_network_model(tw_pe * pe) {
     // Simply expose the existing switch_model function for use by application director
+    double const start = tw_clock_read();
     switch_model(pe);
+    double const end = tw_clock_read();
+    surrogate_switching_time += end - start;
 }
 //
 // === END OF Director functionality
diff --git a/src/surrogate/packet-latency-predictor/average.c b/src/surrogate/packet-latency-predictor/average.c
index 2b8af6ea..c6553563 100644
--- a/src/surrogate/packet-latency-predictor/average.c
+++ b/src/surrogate/packet-latency-predictor/average.c
@@ -1,7 +1,8 @@
-#include <codes/surrogate/init.h>
 #include <codes/surrogate/packet-latency-predictor/average.h>
+#include <assert.h>
 
 double ignore_until = 0;
+static int num_terminals = 0;
 
 
 // === Average packet latency functionality
@@ -14,22 +15,18 @@ struct aggregated_latency_one_terminal {
 struct latency_surrogate {
     struct aggregated_latency_one_terminal aggregated_next_packet_delay;
     struct aggregated_latency_one_terminal aggregated_latency_for_all;
-    unsigned int num_terminals;
     struct aggregated_latency_one_terminal aggregated_latency[];
 };
 
 static void init_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal) {
     (void) lp;
     (void) src_terminal;
-    assert(data->num_terminals == 0);
     assert(data->aggregated_latency_for_all.sum_latency == 0);
     assert(data->aggregated_latency_for_all.total_msgs == 0);
     assert(data->aggregated_latency[0].sum_latency == 0);
     assert(data->aggregated_latency[0].total_msgs == 0);
     assert(data->aggregated_next_packet_delay.total_msgs == 0);
     assert(data->aggregated_next_packet_delay.sum_latency == 0);
-
-    data->num_terminals = net_surr_config.total_terminals;
 }
 
 static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * start, struct packet_end const * end) {
@@ -42,7 +39,7 @@ static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int
 
     unsigned int const dest_terminal = start->dfdally_dest_terminal_id;
     double const latency = end->travel_end_time - start->travel_start_time;
-    assert(dest_terminal < data->num_terminals);
+    assert(dest_terminal < num_terminals);
     assert(end->travel_end_time > start->travel_start_time);
 
     // For average latency per terminal
@@ -64,7 +61,7 @@ static struct packet_end predict_latency(struct latency_surrogate * data, tw_lp
     (void) lp;
 
     unsigned int const dest_terminal = packet_dest->dfdally_dest_terminal_id;
-    assert(dest_terminal < data->num_terminals);
+    assert(dest_terminal < num_terminals);
 
     unsigned int const total_total_datapoints = data->aggregated_latency_for_all.total_msgs;
     if (total_total_datapoints == 0) {
@@ -102,7 +99,9 @@ static void predict_latency_rc(struct latency_surrogate * data, tw_lp * lp) {
 }
 
 
-struct packet_latency_predictor average_latency_predictor(int num_terminals) {
+struct packet_latency_predictor average_latency_predictor(int num_terminals_) {
+    assert(num_terminals_ >= 0);
+    num_terminals = num_terminals_;
     return (struct packet_latency_predictor) {
     .init              = (init_pred_lat_f) init_pred,
     .feed              = (feed_pred_lat_f) feed_pred,

From 4969737b762b82987c6ec36edfd4af19b57d28dc Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 13 Jun 2025 11:26:15 -0400
Subject: [PATCH 158/188] Fixing position of bracket

---
 src/networks/model-net/dragonfly-dally.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 102a2de7..99b292ee 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -5511,8 +5511,8 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
             // the simulation (unless the data is fed to a predictor, later to be used). If the latency notification is deactivated, the simulation will produce
             // the same number of events (a bit wasteful), a parameter that model-net or dragonfly-dally for some reason use :S
             vacuous_msg_to_itself(s, msg, lp);
-        }
 #endif /* if ALWAYS_DETERMINISTIC_NETWORK */
+        }
     }
 
     // if the message is complete (ie, this `msg` is the last piece of the message)

From b4b6362ec2495b8659dc745cfd2eb7bc3583e47d Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 13 Jun 2025 12:39:25 -0400
Subject: [PATCH 159/188] Updating tests

---
 tests/example-ping-pong-surrogate-1.sh             | 2 +-
 tests/example-ping-pong-surrogate-2.sh             | 2 +-
 tests/example-ping-pong-surrogate-3.sh             | 2 +-
 tests/example-ping-pong-surrogate-determinism-1.sh | 2 +-
 tests/example-ping-pong-surrogate-determinism-2.sh | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/example-ping-pong-surrogate-1.sh b/tests/example-ping-pong-surrogate-1.sh
index 7f3a5f6d..6b9e4b8b 100755
--- a/tests/example-ping-pong-surrogate-1.sh
+++ b/tests/example-ping-pong-surrogate-1.sh
@@ -42,7 +42,7 @@ err=$?
 [[ $err -ne 0 ]] && exit $err
 
 # Checking that the surrogate switched properly
-grep 'Switch completed' model-output-2.txt
+grep 'Network switch completed' model-output-2.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 
diff --git a/tests/example-ping-pong-surrogate-2.sh b/tests/example-ping-pong-surrogate-2.sh
index f987bedf..a37d309a 100755
--- a/tests/example-ping-pong-surrogate-2.sh
+++ b/tests/example-ping-pong-surrogate-2.sh
@@ -42,7 +42,7 @@ err=$?
 [[ $err -ne 0 ]] && exit $err
 
 # Checking that the surrogate switched properly
-grep 'Switch completed' model-output-2.txt
+grep 'Network switch completed' model-output-2.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 
diff --git a/tests/example-ping-pong-surrogate-3.sh b/tests/example-ping-pong-surrogate-3.sh
index 19212e9e..ca04c245 100755
--- a/tests/example-ping-pong-surrogate-3.sh
+++ b/tests/example-ping-pong-surrogate-3.sh
@@ -43,7 +43,7 @@ err=$?
 [[ $err -ne 0 ]] && exit $err
 
 # Checking that the surrogate switched properly
-grep 'Switch completed' model-output-1.txt
+grep 'Network switch completed' model-output-1.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 
diff --git a/tests/example-ping-pong-surrogate-determinism-1.sh b/tests/example-ping-pong-surrogate-determinism-1.sh
index cd219272..5726aa0b 100755
--- a/tests/example-ping-pong-surrogate-determinism-1.sh
+++ b/tests/example-ping-pong-surrogate-determinism-1.sh
@@ -40,7 +40,7 @@ err=$?
 [[ $err -ne 0 ]] && exit $err
 
 # Checking that the surrogate switched properly
-grep 'Switch completed' model-output-1.txt
+grep 'Network switch completed' model-output-1.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 
diff --git a/tests/example-ping-pong-surrogate-determinism-2.sh b/tests/example-ping-pong-surrogate-determinism-2.sh
index b86f0dfd..f7908a27 100755
--- a/tests/example-ping-pong-surrogate-determinism-2.sh
+++ b/tests/example-ping-pong-surrogate-determinism-2.sh
@@ -40,7 +40,7 @@ err=$?
 [[ $err -ne 0 ]] && exit $err
 
 # Checking that the surrogate switched properly
-grep 'Switch completed' model-output-1.txt
+grep 'Network switch completed' model-output-1.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 

From 36bc31780342db216a636d592e0cde25f68fe416 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 13 Jun 2025 16:02:27 -0400
Subject: [PATCH 160/188] Adding tests for UNION

---
 tests/CMakeLists.txt                          |   9 +
 .../conceptual.json                           |   6 +-
 .../dfdally-72-par.conf.in                    |  40 +-
 .../jacobi_MILC.workload.conf                 |   0
 .../milc_skeleton.json                        |   2 +-
 .../rand_node0-1d-72-jacobi_MILC.alloc.conf   |   0
 tests/run-test.sh.in                          |   8 +
 .../union-workload-test-surrogate-parallel.sh | 101 +++++
 tests/union-workload-test-surrogate.sh        | 101 +++++
 tmptest/README.md                             | 107 ------
 tmptest/conf/dfdally-72-inter                 | Bin 576 -> 0 bytes
 tmptest/conf/dfdally-72-intra                 | Bin 144 -> 0 bytes
 tmptest/expected/iteration-logs               | 360 ------------------
 .../avg-all-reduce-time                       |   0
 .../dragonfly-cn-stats                        |  73 ----
 .../dragonfly-link-stats                      | 326 ----------------
 .../model-net-category-all                    |  72 ----
 .../model-net-category-high                   |  72 ----
 .../mpi-replay-stats                          |  73 ----
 tmptest/expected/tmptest-jacobiS_MILC.output  | 240 ------------
 20 files changed, 260 insertions(+), 1330 deletions(-)
 rename {tmptest/conf => tests/conf/union-milc-jacobi-workload}/conceptual.json (94%)
 rename tmptest/conf/dfdally-72-par.conf => tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in (51%)
 rename tmptest/conf/jacobi_MILC.conf => tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf (100%)
 rename {tmptest/conf => tests/conf/union-milc-jacobi-workload}/milc_skeleton.json (92%)
 rename tmptest/conf/rand_node0-1d-72-jacobi_MILC.conf => tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf (100%)
 create mode 100644 tests/union-workload-test-surrogate-parallel.sh
 create mode 100644 tests/union-workload-test-surrogate.sh
 delete mode 100644 tmptest/README.md
 delete mode 100644 tmptest/conf/dfdally-72-inter
 delete mode 100644 tmptest/conf/dfdally-72-intra
 delete mode 100644 tmptest/expected/iteration-logs
 delete mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/avg-all-reduce-time
 delete mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-cn-stats
 delete mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-link-stats
 delete mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-all
 delete mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-high
 delete mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/mpi-replay-stats
 delete mode 100644 tmptest/expected/tmptest-jacobiS_MILC.output

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 886dcf59..a78e7210 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -95,6 +95,15 @@ set(test-shell-files
     example-ping-pong-surrogate-determinism-2.sh
     )
 
+configure_file(conf/union-milc-jacobi-workload/dfdally-72-par.conf.in conf/union-milc-jacobi-workload/dfdally-72-par.conf.in @ONLY)
+
+if(USE_UNION)
+    list(APPEND test-shell-files
+        union-workload-test-surrogate.sh
+        union-workload-test-surrogate-parallel.sh
+    )
+endif()
+
 foreach(testname ${test-shell-files})
     add_test(NAME ${testname}
         COMMAND "${CMAKE_CURRENT_BINARY_DIR}/run-test.sh" "${CMAKE_CURRENT_SOURCE_DIR}/${testname}"
diff --git a/tmptest/conf/conceptual.json b/tests/conf/union-milc-jacobi-workload/conceptual.json
similarity index 94%
rename from tmptest/conf/conceptual.json
rename to tests/conf/union-milc-jacobi-workload/conceptual.json
index ad786bce..557c0bce 100644
--- a/tmptest/conf/conceptual.json
+++ b/tests/conf/union-milc-jacobi-workload/conceptual.json
@@ -40,9 +40,9 @@
       "100",
       "100",
       "100",
-      "125000",
-      "5",
-      "2000000",
+      "50000",
+      "39",
+      "200000",
       "barrier"
     ]
   },
diff --git a/tmptest/conf/dfdally-72-par.conf b/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in
similarity index 51%
rename from tmptest/conf/dfdally-72-par.conf
rename to tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in
index 7f4679b6..3b72d00a 100644
--- a/tmptest/conf/dfdally-72-par.conf
+++ b/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in
@@ -38,16 +38,16 @@ PARAMS
 # bandwidth in GiB/s for compute node-router channels
    cn_bandwidth="5.25";
 # ROSS message size
-   message_size="792";
+   message_size="840";
 # number of compute nodes connected to router, dictated by dragonfly config
 # file
    num_cns_per_router="2";
 # number of global channels per router
    num_global_channels="2";
 # network config file for intra-group connections
-   intra-group-connections="/home/helq/Research/HPC/code/kronos/kronos-merge/codes/tmptest/conf/dfdally-72-intra";
+   intra-group-connections="@CMAKE_SOURCE_DIR@/src/network-workloads/conf/dragonfly-dally/dfdally-72-intra";
 # network config file for inter-group connections
-   inter-group-connections="/home/helq/Research/HPC/code/kronos/kronos-merge/codes/tmptest/conf/dfdally-72-inter";
+   inter-group-connections="@CMAKE_SOURCE_DIR@/src/network-workloads/conf/dragonfly-dally/dfdally-72-inter";
 # routing protocol to be used
    routing="prog-adaptive";
    minimal-bias="1";
@@ -62,3 +62,37 @@ PARAMS
    #offset for app_id: model-net-mpi-replay is 88, synthetic-dfly-plus is 24
    offset="144";
 }
+NETWORK_SURROGATE {
+   enable="${NETWORK_SURR_ON}"; # either "0" or "1"
+# determines the director switching from surrogate to high-def simulation strategy
+   director_mode="delegate-to-app-director";
+   #director_mode="at-fixed-virtual-times";
+
+# director configuration for: director_mode == "at-fixed-virtual-times"
+# timestamps at which to switch to surrogate-mode and back
+   fixed_switch_timestamps=( "25.0e6", "400.0e6" );
+
+# latency predictor to use. Options: average, torch-jit
+   packet_latency_predictor="average";
+# some workload models need some time to stabilize, a point where the network behaviour stabilizes. The predictor will ignore all packet latencies that arrive during this period
+   ignore_until="10.0e6";
+
+# parameters for torch-jit latency predictor
+   torch_jit_mode="single-static-model-for-all-terminals";
+   torch_jit_model_path="";
+
+# selecting network treatment on switching to surrogate. Options: freeze, nothing
+   network_treatment_on_switch="nothing";  # freeze is buggy sadly. It freezes more events than it should
+}
+APPLICATION_SURROGATE {
+   enable="${APP_SURR_ON}"; # either 0 or 1
+
+   # Configuring director
+   director_mode="${APP_DIRECTOR_MODE}"; # Opts: "every-n-gvt", "every-n-nanoseconds"
+   director_num_gvt="${EVERY_N_GVT}";
+   director_num_ns="${EVERY_NSECS}"; # 1^6 means 1ms
+
+   # Configuring predictor
+   # Minimum number of iterations to collect data from before skipping ahead in the simulation
+   num_iters_to_collect="2";
+}
diff --git a/tmptest/conf/jacobi_MILC.conf b/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf
similarity index 100%
rename from tmptest/conf/jacobi_MILC.conf
rename to tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf
diff --git a/tmptest/conf/milc_skeleton.json b/tests/conf/union-milc-jacobi-workload/milc_skeleton.json
similarity index 92%
rename from tmptest/conf/milc_skeleton.json
rename to tests/conf/union-milc-jacobi-workload/milc_skeleton.json
index b3a42297..c43f648f 100644
--- a/tmptest/conf/milc_skeleton.json
+++ b/tests/conf/union-milc-jacobi-workload/milc_skeleton.json
@@ -4,7 +4,7 @@
         "size": 36,
         "cfg": {
             "app": "milc",
-            "iteration_cnt": 5,
+            "iteration_cnt": 120,
             "compute_delay": 100,
             "dimension_cnt": 4,
             "dimension_sizes": [2,2,3,3],
diff --git a/tmptest/conf/rand_node0-1d-72-jacobi_MILC.conf b/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf
similarity index 100%
rename from tmptest/conf/rand_node0-1d-72-jacobi_MILC.conf
rename to tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf
diff --git a/tests/run-test.sh.in b/tests/run-test.sh.in
index 81259559..be706268 100755
--- a/tests/run-test.sh.in
+++ b/tests/run-test.sh.in
@@ -11,6 +11,14 @@ export srcdir="${CMAKE_SOURCE_DIR}"
 export bindir="${CMAKE_BINARY_DIR}"
 export GENERATED_USING_CMAKE=1
 
+# Set Union and SWM install paths if available
+if [ -n "${SWM_DATAROOTDIR}" ]; then
+    export SWM_DATAROOTDIR="${SWM_DATAROOTDIR}"
+fi
+if [ -n "${UNION_DATAROOTDIR}" ]; then
+    export UNION_DATAROOTDIR="${UNION_DATAROOTDIR}"
+fi
+
 # Creating temporal folder in order to save output without colliding with
 # some other process that would like to generate the same output
 mkdir -p testing-output
diff --git a/tests/union-workload-test-surrogate-parallel.sh b/tests/union-workload-test-surrogate-parallel.sh
new file mode 100644
index 00000000..c16deb8f
--- /dev/null
+++ b/tests/union-workload-test-surrogate-parallel.sh
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+if [[ -z $bindir ]] ; then
+    echo bindir variable not set
+    exit 1
+fi
+
+if [[ -z $UNION_DATAROOTDIR ]] ; then
+    echo UNION_DATAROOTDIR variable not set
+    exit 1
+fi
+
+if [[ -z $SWM_DATAROOTDIR ]] ; then
+    echo SWM_DATAROOTDIR variable not set
+    exit 1
+fi
+
+np=3
+
+expfolder="$PWD"
+export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload"
+
+# Backing up and copying milc json!
+tmpdir="$(TMPDIR="$PWD" mktemp -d)"
+mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json"
+cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json"
+mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json"
+cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json"
+
+# Copying configuration files to keep as documentation
+cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder"
+cp "$CONFIGS_PATH/conceptual.json" "$expfolder"
+cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder"
+cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
+
+# CODES config file
+export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
+export NETWORK_SURR_ON=0
+export APP_SURR_ON=1
+export APP_DIRECTOR_MODE=every-n-nanoseconds
+#export APP_DIRECTOR_MODE=every-n-gvt
+export EVERY_N_GVT=500
+export EVERY_NSECS=1e6
+envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf"
+
+# running simulation
+cons_lookahead=200
+opt_lookahead=600
+
+export PATH_TO_CODES_BUILD="$bindir"
+
+mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \
+  --synch=3 \
+  --batch=4 --gvt-interval=256 \
+  --cons-lookahead=$cons_lookahead \
+  --max-opt-lookahead=$opt_lookahead \
+  --workload_type=conc-online \
+  --lp-io-dir=lp-io-dir \
+  --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \
+  --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \
+  -- "$expfolder/dfdally-72-par.conf" \
+  > model-output.txt 2> model-output-error.txt
+
+err=$?
+
+# Setting milc json back
+mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json"
+mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json"
+rmdir "$tmpdir"
+
+[[ $err -ne 0 ]] && exit $err
+
+# Checking that there is actual output
+grep 'Net Events Processed' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# Checking both milc and jacobi ran
+grep 'MILC: Iteration 119/120' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+grep 'Jacobi3D: Completed 40 iterations' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+grep 'App 0: All non-synthetic workloads have completed' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# it transitioned into surrogacy
+grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# it transitioned back to high-fidelity
+grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+exit 0
diff --git a/tests/union-workload-test-surrogate.sh b/tests/union-workload-test-surrogate.sh
new file mode 100644
index 00000000..59c0c067
--- /dev/null
+++ b/tests/union-workload-test-surrogate.sh
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+if [[ -z $bindir ]] ; then
+    echo bindir variable not set
+    exit 1
+fi
+
+if [[ -z $UNION_DATAROOTDIR ]] ; then
+    echo UNION_DATAROOTDIR variable not set
+    exit 1
+fi
+
+if [[ -z $SWM_DATAROOTDIR ]] ; then
+    echo SWM_DATAROOTDIR variable not set
+    exit 1
+fi
+
+np=1
+
+expfolder="$PWD"
+export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload"
+
+# Backing up and copying milc json!
+tmpdir="$(TMPDIR="$PWD" mktemp -d)"
+mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json"
+cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json"
+mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json"
+cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json"
+
+# Copying configuration files to keep as documentation
+cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder"
+cp "$CONFIGS_PATH/conceptual.json" "$expfolder"
+cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder"
+cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
+
+# CODES config file
+export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
+export NETWORK_SURR_ON=0
+export APP_SURR_ON=1
+export APP_DIRECTOR_MODE=every-n-nanoseconds
+#export APP_DIRECTOR_MODE=every-n-gvt
+export EVERY_N_GVT=500
+export EVERY_NSECS=1e6
+envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf"
+
+# running simulation
+cons_lookahead=200
+opt_lookahead=600
+
+export PATH_TO_CODES_BUILD="$bindir"
+
+mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \
+  --synch=1 \
+  --batch=4 --gvt-interval=256 \
+  --cons-lookahead=$cons_lookahead \
+  --max-opt-lookahead=$opt_lookahead \
+  --workload_type=conc-online \
+  --lp-io-dir=lp-io-dir \
+  --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \
+  --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \
+  -- "$expfolder/dfdally-72-par.conf" \
+  > model-output.txt 2> model-output-error.txt
+
+err=$?
+
+# Setting milc json back
+mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json"
+mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json"
+rmdir "$tmpdir"
+
+[[ $err -ne 0 ]] && exit $err
+
+# Checking that there is actual output
+grep 'Net Events Processed' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# Checking both milc and jacobi ran
+grep 'MILC: Iteration 119/120' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+grep 'Jacobi3D: Completed 40 iterations' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+grep 'App 0: All non-synthetic workloads have completed' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# it transitioned into surrogacy
+grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# it transitioned back to high-fidelity
+grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+exit 0
diff --git a/tmptest/README.md b/tmptest/README.md
deleted file mode 100644
index 22a7de5d..00000000
--- a/tmptest/README.md
+++ /dev/null
@@ -1,107 +0,0 @@
-# Union
-Workload Manager for Integration of Conceptual as an Online Workload for CODES
-
-
-# Installation
-
-### Installing Conceptual (mandatory)
-
-Download Conceptual at https://ccsweb.lanl.gov/~pakin/software/conceptual/download.html (version 1.5.1 or greater)
-
-```bash
-tar xvf conceptual-1.5.1.tar.gz
-cd conceptual-1.5.1
-PYTHON=python2 ./configure --prefix="$(realpath ./install)" LIBS=-lm
-make
-make install
-```
-
-### Installing Boost-Python (currently mandatory, we may remove this soon)
-
-Download boost at http://www.boost.org/users/download/ (version 1.68 or greater)
-
-```bash
-tar xvf boost_1_68_0.tar.gz
-cd boost_1_68_0
-./bootstrap.sh --prefix=/path/to/boost/install  --with-libraries=python
-./b2 install
-```
-
-### Installing Union
-```bash
-git clone https://github.com/SPEAR-UIC/Union
-cd union
-./prepare.sh
-./configure --disable-shared --with-conceptual="$(realpath ../conceptual-1.5.1b/install)" --prefix="$(realpath ./install)" CC=mpicc CXX=mpicxx
-make
-make install
-```
-
-# Workload Simulation with CODES
-
-### Installing ROSS
-
-```bash
-git clone https://github.com/carothersc/ROSS.git
-mkdir build-ross
-cd build-ross
-cmake -DCMAKE_INSTALL_PREFIX:path=path/to/ross/install -DCMAKE_C_COMPILER=$(which mpicc) -DCMAKE_CXX_COMPILER=$(which mpicxx) ../ROSS
-make install
-```
-
-### Installing Argobots
-
-```bash
-git clone https://github.com/pmodels/argobots.git
-./autogen.sh
-./configure --prefix=/path/to/argobots/install
-make
-make install
-```
-
-### Installing SWM workloads
-
-```bash
-git clone https://github.com/codes-org/SWM-workloads.git
-cd swm
-./prepare.sh
-./configure --with-boost=/path/to/boost/install --prefix=/path/to/swm/install CC=mpicc CXX=mpicxx
-make
-make install
-```
-
-### Installing CODES (kronos-union branch)
-
-```bash
-git clone https://github.com/codes-org/codes.git
-cd codes
-./prepare.sh
-mkdir build
-cd build
-../configure \
-    --disable-shared \
-    --with-online=true \
-    --with-boost=/path/to/boost/install \
-    PKG_CONFIG_PATH=/home/development/kronos/kronos-merge/argobots/build/bin/lib/pkgconfig:/home/development/kronos/kronos-merge/ross/build/bin/lib/pkgconfig:/home/development/kronos/kronos-merge/Union/install/lib/pkgconfig:/home/development/kronos/kronos-merge/swm-workloads/swm/build/bin/lib/pkgconfig \
-    --with-union=true \
-    --prefix=/path/to/codes/install \
-    CC=mpicc CXX=mpicxx
-
-make
-make install
-```
-
-### Run Test Simulations
-The tmptest directory includes all necessary configuration files to run the test simulation.
-
-Copy milc_skeleton.json to /path/to/swm/install/share/
-Copy conceptual.json to /path/to/union/install/share/
-Change the path for "intra-group-connections" and "intra-group-connections" in dfdally-72-par.conf
-Run the following command:
-
-```bash
-/home/helq/Research/HPC/code/kronos/kronos-merge/codes/build/src/model-net-mpi-replay --sync=1 --workload_type=conc-online --lp-io-use-suffix=1 --workload_conf_file="$(realpath ../conf/jacobi_MILC.conf)" --alloc_file="$(realpath ../conf/rand_node0-1d-72-jacobi_MILC.conf)" --lp-io-dir=tmptest-jacobiS_MILC -- "$(realpath ../conf/dfdally-72-par.conf)" > tmptest-jacobiS_MILC.output
-```
-
-
-
diff --git a/tmptest/conf/dfdally-72-inter b/tmptest/conf/dfdally-72-inter
deleted file mode 100644
index f95b989c64812d8936d00541ad6808c590c2a0dd..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 576
zcmXxhSxy5%6hqO107)Q_nR#f){`ZKF)Nbj@{bTvrHO3gr;Cl#H;c3=G^g5hpeTy!_
z=d4roCh*a1xD9;tIot(4`W0kNvqr`$@X=#f2R`~9?gJnF4YGXA+~l8+zJ$lXN56wE
zeC;Hwx60fFKKc>%fsg(LU2M`$vU;n`Q{baN!B%|TOqXt<ldRq<^EL3%OR$yAyO}QC
tLMK_hRi?M#>zmlBTiZ;RZlRN`-YRnn-jsS9TXk!j>C!E9lGR&f{sV<~3E2Pu

diff --git a/tmptest/conf/dfdally-72-intra b/tmptest/conf/dfdally-72-intra
deleted file mode 100644
index 37ea2848b53d14494cf91262078ab9fe67ac4d9d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 144
xcmYj|fe8RG2*bMh|Icf{zyxW~G)Yd^xk7W6|39n)>v*FA+ktgV99Rdo{{RVK03`qb

diff --git a/tmptest/expected/iteration-logs b/tmptest/expected/iteration-logs
deleted file mode 100644
index 83179b2f..00000000
--- a/tmptest/expected/iteration-logs
+++ /dev/null
@@ -1,360 +0,0 @@
-ITERATION 0 node 53 job 1 rank 34 time 4512686.215045
-ITERATION 0 node 54 job 1 rank 18 time 4513749.276363
-ITERATION 0 node 4 job 1 rank 10 time 4517871.443080
-ITERATION 0 node 56 job 1 rank 25 time 4538165.950297
-ITERATION 0 node 51 job 1 rank 29 time 4540989.136935
-ITERATION 0 node 29 job 1 rank 33 time 4542208.517079
-ITERATION 0 node 64 job 1 rank 9 time 4543495.002481
-ITERATION 0 node 34 job 1 rank 31 time 4545035.368923
-ITERATION 0 node 58 job 1 rank 13 time 4545114.472426
-ITERATION 0 node 59 job 1 rank 15 time 4548339.211403
-ITERATION 0 node 35 job 1 rank 21 time 4554419.528244
-ITERATION 0 node 10 job 1 rank 3 time 4561111.532285
-ITERATION 0 node 60 job 1 rank 35 time 4562346.412949
-ITERATION 0 node 30 job 1 rank 27 time 4564867.979829
-ITERATION 0 node 16 job 1 rank 23 time 4567439.860843
-ITERATION 0 node 31 job 1 rank 2 time 4573504.960624
-ITERATION 0 node 22 job 1 rank 11 time 4574627.826180
-ITERATION 0 node 37 job 1 rank 14 time 4579930.283097
-ITERATION 0 node 27 job 1 rank 30 time 4587175.167871
-ITERATION 0 node 20 job 1 rank 5 time 4589753.028610
-ITERATION 0 node 57 job 1 rank 4 time 4593050.336040
-ITERATION 0 node 17 job 1 rank 32 time 4604738.524840
-ITERATION 0 node 19 job 1 rank 28 time 4607122.579619
-ITERATION 0 node 63 job 1 rank 24 time 4616051.899458
-ITERATION 0 node 44 job 1 rank 22 time 4629534.947693
-ITERATION 0 node 33 job 1 rank 12 time 4630801.551756
-ITERATION 0 node 14 job 1 rank 20 time 4634810.847629
-ITERATION 0 node 43 job 1 rank 17 time 4637426.227568
-ITERATION 0 node 50 job 1 rank 19 time 4645719.045367
-ITERATION 0 node 42 job 1 rank 1 time 4657866.214120
-ITERATION 0 node 0 job 1 rank 0 time 4662904.772657
-ITERATION 0 node 36 job 1 rank 26 time 4669557.114654
-ITERATION 0 node 47 job 1 rank 16 time 4682491.551200
-ITERATION 0 node 40 job 1 rank 7 time 4725442.291027
-ITERATION 0 node 41 job 1 rank 6 time 4726998.347349
-ITERATION 0 node 66 job 1 rank 8 time 4766523.879336
-ITERATION 0 node 67 job 0 rank 11 time 5565984.178337
-ITERATION 0 node 5 job 0 rank 23 time 5575875.812174
-ITERATION 0 node 9 job 0 rank 27 time 5579506.018872
-ITERATION 0 node 62 job 0 rank 31 time 5584465.243288
-ITERATION 0 node 45 job 0 rank 15 time 5618600.555287
-ITERATION 0 node 1 job 0 rank 19 time 5717281.741377
-ITERATION 0 node 70 job 0 rank 7 time 5721216.605169
-ITERATION 0 node 3 job 0 rank 6 time 5727197.597463
-ITERATION 0 node 68 job 0 rank 21 time 5728040.094749
-ITERATION 0 node 26 job 0 rank 29 time 5732165.171144
-ITERATION 0 node 46 job 0 rank 25 time 5736556.257391
-ITERATION 0 node 21 job 0 rank 13 time 5736620.081453
-ITERATION 0 node 2 job 0 rank 3 time 5742325.644168
-ITERATION 0 node 55 job 0 rank 17 time 5749700.425050
-ITERATION 0 node 25 job 0 rank 35 time 5767346.740335
-ITERATION 0 node 38 job 0 rank 2 time 5788871.660345
-ITERATION 0 node 28 job 0 rank 33 time 5811545.306529
-ITERATION 0 node 32 job 0 rank 22 time 5813306.845833
-ITERATION 0 node 49 job 0 rank 20 time 5816982.805562
-ITERATION 0 node 61 job 0 rank 24 time 5820868.818799
-ITERATION 0 node 52 job 0 rank 32 time 5824994.468838
-ITERATION 0 node 7 job 0 rank 28 time 5829521.253890
-ITERATION 0 node 23 job 0 rank 26 time 5830679.689756
-ITERATION 0 node 71 job 0 rank 16 time 5830833.289263
-ITERATION 0 node 18 job 0 rank 5 time 5831517.990821
-ITERATION 0 node 13 job 0 rank 34 time 5836011.317489
-ITERATION 0 node 69 job 0 rank 10 time 5848873.775015
-ITERATION 0 node 39 job 0 rank 8 time 5852519.517052
-ITERATION 0 node 65 job 0 rank 4 time 5853828.568648
-ITERATION 0 node 24 job 0 rank 1 time 5854716.580830
-ITERATION 0 node 12 job 0 rank 0 time 5859659.053022
-ITERATION 0 node 6 job 0 rank 18 time 5863641.542198
-ITERATION 0 node 15 job 0 rank 30 time 5891375.888033
-ITERATION 0 node 8 job 0 rank 14 time 5894381.087555
-ITERATION 0 node 48 job 0 rank 12 time 5895134.332795
-ITERATION 0 node 11 job 0 rank 9 time 5942306.311091
-ITERATION 1 node 66 job 1 rank 8 time 8297180.376270
-ITERATION 1 node 14 job 1 rank 20 time 8300140.917016
-ITERATION 1 node 64 job 1 rank 9 time 8300849.048957
-ITERATION 1 node 19 job 1 rank 28 time 8301305.476039
-ITERATION 1 node 63 job 1 rank 24 time 8301305.711761
-ITERATION 1 node 33 job 1 rank 12 time 8301733.793207
-ITERATION 1 node 10 job 1 rank 3 time 8302129.790711
-ITERATION 1 node 42 job 1 rank 1 time 8302470.511443
-ITERATION 1 node 47 job 1 rank 16 time 8303046.409280
-ITERATION 1 node 35 job 1 rank 21 time 8303817.637378
-ITERATION 1 node 37 job 1 rank 14 time 8303944.501174
-ITERATION 1 node 4 job 1 rank 10 time 8303944.806055
-ITERATION 1 node 43 job 1 rank 17 time 8304146.840707
-ITERATION 1 node 56 job 1 rank 25 time 8304344.924724
-ITERATION 1 node 59 job 1 rank 15 time 8305202.188843
-ITERATION 1 node 20 job 1 rank 5 time 8305346.120632
-ITERATION 1 node 31 job 1 rank 2 time 8305427.098141
-ITERATION 1 node 22 job 1 rank 11 time 8305464.857400
-ITERATION 1 node 16 job 1 rank 23 time 8305465.061184
-ITERATION 1 node 58 job 1 rank 13 time 8305909.329333
-ITERATION 1 node 44 job 1 rank 22 time 8306179.488712
-ITERATION 1 node 40 job 1 rank 7 time 8306815.016916
-ITERATION 1 node 36 job 1 rank 26 time 8307242.113385
-ITERATION 1 node 54 job 1 rank 18 time 8307242.230485
-ITERATION 1 node 0 job 1 rank 0 time 8307509.069980
-ITERATION 1 node 27 job 1 rank 30 time 8308069.836665
-ITERATION 1 node 41 job 1 rank 6 time 8308371.073239
-ITERATION 1 node 51 job 1 rank 29 time 8308510.790046
-ITERATION 1 node 57 job 1 rank 4 time 8308643.428061
-ITERATION 1 node 30 job 1 rank 27 time 8308762.164730
-ITERATION 1 node 17 job 1 rank 32 time 8308912.995878
-ITERATION 1 node 53 job 1 rank 34 time 8309964.008052
-ITERATION 1 node 34 job 1 rank 31 time 8311068.775441
-ITERATION 1 node 50 job 1 rank 19 time 8311331.100457
-ITERATION 1 node 29 job 1 rank 33 time 8311707.229825
-ITERATION 1 node 60 job 1 rank 35 time 8317686.769451
-ITERATION 1 node 8 job 0 rank 14 time 11235088.810240
-ITERATION 1 node 18 job 0 rank 5 time 11235317.411859
-ITERATION 1 node 15 job 0 rank 30 time 11237472.894623
-ITERATION 1 node 24 job 0 rank 1 time 11245995.227219
-ITERATION 1 node 61 job 0 rank 24 time 11248511.493893
-ITERATION 1 node 71 job 0 rank 16 time 11249063.598574
-ITERATION 1 node 2 job 0 rank 3 time 11249988.785889
-ITERATION 1 node 11 job 0 rank 9 time 11250588.161742
-ITERATION 1 node 21 job 0 rank 13 time 11251495.763839
-ITERATION 1 node 49 job 0 rank 20 time 11252879.124696
-ITERATION 1 node 38 job 0 rank 2 time 11253481.695522
-ITERATION 1 node 52 job 0 rank 32 time 11254930.185172
-ITERATION 1 node 26 job 0 rank 29 time 11259777.124063
-ITERATION 1 node 39 job 0 rank 8 time 11266061.281928
-ITERATION 1 node 32 job 0 rank 22 time 11270470.549169
-ITERATION 1 node 68 job 0 rank 21 time 11286125.385978
-ITERATION 1 node 48 job 0 rank 12 time 11294811.908230
-ITERATION 1 node 46 job 0 rank 25 time 11300192.747257
-ITERATION 1 node 12 job 0 rank 0 time 11317107.807555
-ITERATION 1 node 7 job 0 rank 28 time 11322189.096862
-ITERATION 1 node 67 job 0 rank 11 time 11333239.622073
-ITERATION 1 node 70 job 0 rank 7 time 11334381.123429
-ITERATION 1 node 45 job 0 rank 15 time 11339259.471444
-ITERATION 1 node 1 job 0 rank 19 time 11340523.986731
-ITERATION 1 node 62 job 0 rank 31 time 11345350.350138
-ITERATION 1 node 69 job 0 rank 10 time 11351306.613121
-ITERATION 1 node 23 job 0 rank 26 time 11361640.366497
-ITERATION 1 node 25 job 0 rank 35 time 11363595.692666
-ITERATION 1 node 3 job 0 rank 6 time 11372326.537226
-ITERATION 1 node 9 job 0 rank 27 time 11388501.387550
-ITERATION 1 node 6 job 0 rank 18 time 11391110.891033
-ITERATION 1 node 5 job 0 rank 23 time 11392483.556208
-ITERATION 1 node 65 job 0 rank 4 time 11392558.633627
-ITERATION 1 node 13 job 0 rank 34 time 11396149.449470
-ITERATION 1 node 55 job 0 rank 17 time 11417714.983024
-ITERATION 1 node 28 job 0 rank 33 time 11421840.318515
-ITERATION 2 node 58 job 1 rank 13 time 12316899.292981
-ITERATION 2 node 35 job 1 rank 21 time 12316899.576287
-ITERATION 2 node 64 job 1 rank 9 time 12316899.606397
-ITERATION 2 node 10 job 1 rank 3 time 12317127.807697
-ITERATION 2 node 33 job 1 rank 12 time 12320196.737845
-ITERATION 2 node 56 job 1 rank 25 time 12320196.913727
-ITERATION 2 node 59 job 1 rank 15 time 12320197.104361
-ITERATION 2 node 43 job 1 rank 17 time 12320197.230276
-ITERATION 2 node 66 job 1 rank 8 time 12320197.233370
-ITERATION 2 node 40 job 1 rank 7 time 12320425.112677
-ITERATION 2 node 31 job 1 rank 2 time 12320425.115126
-ITERATION 2 node 42 job 1 rank 1 time 12320426.025358
-ITERATION 2 node 51 job 1 rank 29 time 12321024.628471
-ITERATION 2 node 16 job 1 rank 23 time 12321024.717490
-ITERATION 2 node 22 job 1 rank 11 time 12321024.945440
-ITERATION 2 node 14 job 1 rank 20 time 12321025.218551
-ITERATION 2 node 41 job 1 rank 6 time 12321981.169000
-ITERATION 2 node 34 job 1 rank 31 time 12323494.411691
-ITERATION 2 node 0 job 1 rank 0 time 12323723.332787
-ITERATION 2 node 19 job 1 rank 28 time 12324322.073336
-ITERATION 2 node 30 job 1 rank 27 time 12324322.252770
-ITERATION 2 node 47 job 1 rank 16 time 12324322.491049
-ITERATION 2 node 29 job 1 rank 33 time 12324322.565767
-ITERATION 2 node 63 job 1 rank 24 time 12324322.568861
-ITERATION 2 node 50 job 1 rank 19 time 12325150.326534
-ITERATION 2 node 44 job 1 rank 22 time 12326063.258270
-ITERATION 2 node 20 job 1 rank 5 time 12326291.816256
-ITERATION 2 node 4 job 1 rank 10 time 12326891.771792
-ITERATION 2 node 37 job 1 rank 14 time 12326892.126731
-ITERATION 2 node 17 job 1 rank 32 time 12328447.826540
-ITERATION 2 node 60 job 1 rank 35 time 12329275.662024
-ITERATION 2 node 57 job 1 rank 4 time 12329589.123686
-ITERATION 2 node 54 job 1 rank 18 time 12330188.984627
-ITERATION 2 node 36 job 1 rank 26 time 12330189.079122
-ITERATION 2 node 27 job 1 rank 30 time 12331017.462222
-ITERATION 2 node 53 job 1 rank 34 time 12332573.069010
-ITERATION 3 node 64 job 1 rank 9 time 16846408.366519
-ITERATION 3 node 35 job 1 rank 21 time 16846408.565540
-ITERATION 3 node 58 job 1 rank 13 time 16848977.637086
-ITERATION 3 node 66 job 1 rank 8 time 16849705.670921
-ITERATION 3 node 56 job 1 rank 25 time 16849705.673849
-ITERATION 3 node 59 job 1 rank 15 time 16849705.765384
-ITERATION 3 node 43 job 1 rank 17 time 16849705.775639
-ITERATION 3 node 42 job 1 rank 1 time 16849934.209991
-ITERATION 3 node 40 job 1 rank 7 time 16849935.003134
-ITERATION 3 node 22 job 1 rank 11 time 16850534.153202
-ITERATION 3 node 16 job 1 rank 23 time 16850535.146788
-ITERATION 3 node 41 job 1 rank 6 time 16851491.059456
-ITERATION 3 node 34 job 1 rank 31 time 16853003.072714
-ITERATION 3 node 51 job 1 rank 29 time 16853102.972576
-ITERATION 3 node 0 job 1 rank 0 time 16853231.517421
-ITERATION 3 node 33 job 1 rank 12 time 16853284.355810
-ITERATION 3 node 47 job 1 rank 16 time 16853830.997593
-ITERATION 3 node 63 job 1 rank 24 time 16853831.006412
-ITERATION 3 node 29 job 1 rank 33 time 16853831.111130
-ITERATION 3 node 19 job 1 rank 28 time 16853831.239840
-ITERATION 3 node 20 job 1 rank 5 time 16854059.856564
-ITERATION 3 node 4 job 1 rank 10 time 16854659.040052
-ITERATION 3 node 44 job 1 rank 22 time 16856400.617505
-ITERATION 3 node 50 job 1 rank 19 time 16856401.051372
-ITERATION 3 node 57 job 1 rank 4 time 16857357.163993
-ITERATION 3 node 37 job 1 rank 14 time 16857724.883755
-ITERATION 3 node 54 job 1 rank 18 time 16857956.265580
-ITERATION 3 node 36 job 1 rank 26 time 16857956.347382
-ITERATION 3 node 17 job 1 rank 32 time 16859697.584190
-ITERATION 3 node 53 job 1 rank 34 time 16860340.349963
-ITERATION 3 node 60 job 1 rank 35 time 16860526.386863
-ITERATION 3 node 30 job 1 rank 27 time 16862439.401431
-ITERATION 3 node 27 job 1 rank 30 time 16862631.359863
-ITERATION 3 node 10 job 1 rank 3 time 16874057.983505
-ITERATION 3 node 31 job 1 rank 2 time 16877355.290935
-ITERATION 3 node 14 job 1 rank 20 time 16894510.197562
-ITERATION 2 node 5 job 0 rank 23 time 17110727.735133
-ITERATION 2 node 32 job 0 rank 22 time 17114852.209292
-ITERATION 2 node 62 job 0 rank 31 time 17114852.599159
-ITERATION 2 node 9 job 0 rank 27 time 17114852.774364
-ITERATION 2 node 68 job 0 rank 21 time 17114853.074428
-ITERATION 2 node 49 job 0 rank 20 time 17118149.308069
-ITERATION 2 node 45 job 0 rank 15 time 17118149.906489
-ITERATION 2 node 46 job 0 rank 25 time 17118150.389777
-ITERATION 2 node 25 job 0 rank 35 time 17118150.407391
-ITERATION 2 node 70 job 0 rank 7 time 17118379.051555
-ITERATION 2 node 15 job 0 rank 30 time 17118977.516634
-ITERATION 2 node 23 job 0 rank 26 time 17118977.597908
-ITERATION 2 node 67 job 0 rank 11 time 17118978.109855
-ITERATION 2 node 26 job 0 rank 29 time 17118978.321161
-ITERATION 2 node 8 job 0 rank 14 time 17121361.601018
-ITERATION 2 node 61 job 0 rank 24 time 17121446.508622
-ITERATION 2 node 11 job 0 rank 9 time 17121447.697106
-ITERATION 2 node 28 job 0 rank 33 time 17121448.334170
-ITERATION 2 node 7 job 0 rank 28 time 17122274.470849
-ITERATION 2 node 13 job 0 rank 34 time 17122274.835607
-ITERATION 2 node 1 job 0 rank 19 time 17122275.742882
-ITERATION 2 node 18 job 0 rank 5 time 17122503.338803
-ITERATION 2 node 2 job 0 rank 3 time 17122504.165793
-ITERATION 2 node 3 job 0 rank 6 time 17122504.387146
-ITERATION 2 node 69 job 0 rank 10 time 17123102.933399
-ITERATION 2 node 21 job 0 rank 13 time 17123103.656652
-ITERATION 2 node 39 job 0 rank 8 time 17124743.815952
-ITERATION 2 node 52 job 0 rank 32 time 17125571.717580
-ITERATION 2 node 55 job 0 rank 17 time 17125573.669661
-ITERATION 2 node 24 job 0 rank 1 time 17125800.744680
-ITERATION 2 node 38 job 0 rank 2 time 17125801.473223
-ITERATION 2 node 48 job 0 rank 12 time 17126399.806339
-ITERATION 2 node 6 job 0 rank 18 time 17126400.171098
-ITERATION 2 node 65 job 0 rank 4 time 17126628.674394
-ITERATION 2 node 71 job 0 rank 16 time 17128869.024909
-ITERATION 2 node 12 job 0 rank 0 time 17129926.080271
-ITERATION 4 node 4 job 1 rank 10 time 21068851.547471
-ITERATION 4 node 44 job 1 rank 22 time 21071037.824821
-ITERATION 4 node 54 job 1 rank 18 time 21073270.434306
-ITERATION 4 node 37 job 1 rank 14 time 21097612.783438
-ITERATION 4 node 27 job 1 rank 30 time 21102424.565486
-ITERATION 4 node 20 job 1 rank 5 time 21108249.546302
-ITERATION 4 node 33 job 1 rank 12 time 21110027.342829
-ITERATION 4 node 42 job 1 rank 1 time 21110256.073660
-ITERATION 4 node 57 job 1 rank 4 time 21111546.853731
-ITERATION 4 node 66 job 1 rank 8 time 21113501.882887
-ITERATION 4 node 14 job 1 rank 20 time 21113973.869283
-ITERATION 4 node 19 job 1 rank 28 time 21114152.678320
-ITERATION 4 node 58 job 1 rank 13 time 21115387.315874
-ITERATION 4 node 35 job 1 rank 21 time 21115387.760180
-ITERATION 4 node 10 job 1 rank 3 time 21115616.245472
-ITERATION 4 node 47 job 1 rank 16 time 21118843.796707
-ITERATION 4 node 63 job 1 rank 24 time 21119441.870921
-ITERATION 4 node 0 job 1 rank 0 time 21119597.570694
-ITERATION 4 node 17 job 1 rank 32 time 21127435.068013
-ITERATION 4 node 51 job 1 rank 29 time 21129060.332514
-ITERATION 4 node 36 job 1 rank 26 time 21143479.925267
-ITERATION 4 node 53 job 1 rank 34 time 21154862.434499
-ITERATION 4 node 31 job 1 rank 2 time 21156014.514993
-ITERATION 4 node 59 job 1 rank 15 time 21161740.594529
-ITERATION 4 node 40 job 1 rank 7 time 21161969.568395
-ITERATION 4 node 41 job 1 rank 6 time 21163525.624717
-ITERATION 4 node 16 job 1 rank 23 time 21173654.388369
-ITERATION 4 node 64 job 1 rank 9 time 21185533.014478
-ITERATION 4 node 43 job 1 rank 17 time 21189278.967485
-ITERATION 4 node 56 job 1 rank 25 time 21189811.925489
-ITERATION 4 node 34 job 1 rank 31 time 21192311.784227
-ITERATION 4 node 29 job 1 rank 33 time 21238412.751629
-ITERATION 4 node 22 job 1 rank 11 time 21253653.349842
-ITERATION 4 node 30 job 1 rank 27 time 21256950.657171
-ITERATION 4 node 50 job 1 rank 19 time 21257778.307985
-ITERATION 4 node 60 job 1 rank 35 time 21356838.612638
-ITERATION 3 node 49 job 0 rank 20 time 22283924.781893
-ITERATION 3 node 32 job 0 rank 22 time 22287221.893064
-ITERATION 3 node 61 job 0 rank 24 time 22287222.081563
-ITERATION 3 node 68 job 0 rank 21 time 22287222.429961
-ITERATION 3 node 7 job 0 rank 28 time 22288050.215038
-ITERATION 3 node 39 job 0 rank 8 time 22290519.388893
-ITERATION 3 node 15 job 0 rank 30 time 22291346.949060
-ITERATION 3 node 23 job 0 rank 26 time 22291347.443031
-ITERATION 3 node 26 job 0 rank 29 time 22291347.569684
-ITERATION 3 node 52 job 0 rank 32 time 22291347.632683
-ITERATION 3 node 24 job 0 rank 1 time 22291576.365970
-ITERATION 3 node 48 job 0 rank 12 time 22292175.550529
-ITERATION 3 node 5 job 0 rank 23 time 22292260.691330
-ITERATION 3 node 46 job 0 rank 25 time 22292261.166656
-ITERATION 3 node 8 job 0 rank 14 time 22293731.033444
-ITERATION 3 node 71 job 0 rank 16 time 22294644.940013
-ITERATION 3 node 13 job 0 rank 34 time 22294645.243202
-ITERATION 3 node 18 job 0 rank 5 time 22294873.423611
-ITERATION 3 node 2 job 0 rank 3 time 22294873.612687
-ITERATION 3 node 69 job 0 rank 10 time 22295472.778521
-ITERATION 3 node 21 job 0 rank 13 time 22295472.905174
-ITERATION 3 node 28 job 0 rank 33 time 22295558.271868
-ITERATION 3 node 11 job 0 rank 9 time 22295558.473985
-ITERATION 3 node 12 job 0 rank 0 time 22295701.701561
-ITERATION 3 node 9 job 0 rank 27 time 22296386.567242
-ITERATION 3 node 62 job 0 rank 31 time 22296386.592726
-ITERATION 3 node 38 job 0 rank 2 time 22298170.920117
-ITERATION 3 node 6 job 0 rank 18 time 22298770.578693
-ITERATION 3 node 65 job 0 rank 4 time 22298998.759202
-ITERATION 3 node 55 job 0 rank 17 time 22299683.607359
-ITERATION 3 node 45 job 0 rank 15 time 22299683.900055
-ITERATION 3 node 25 job 0 rank 35 time 22299683.915598
-ITERATION 3 node 70 job 0 rank 7 time 22299911.908515
-ITERATION 3 node 67 job 0 rank 11 time 22300511.902733
-ITERATION 3 node 1 job 0 rank 19 time 22303809.251089
-ITERATION 3 node 3 job 0 rank 6 time 22304037.244106
-ITERATION 4 node 46 job 0 rank 25 time 27793249.825077
-ITERATION 4 node 68 job 0 rank 21 time 27795614.122037
-ITERATION 4 node 11 job 0 rank 9 time 27795614.960123
-ITERATION 4 node 61 job 0 rank 24 time 27796442.932740
-ITERATION 4 node 9 job 0 rank 27 time 27796462.525210
-ITERATION 4 node 28 job 0 rank 33 time 27798203.258478
-ITERATION 4 node 49 job 0 rank 20 time 27799739.567011
-ITERATION 4 node 26 job 0 rank 29 time 27799739.840442
-ITERATION 4 node 39 job 0 rank 8 time 27799740.240069
-ITERATION 4 node 55 job 0 rank 17 time 27799740.572089
-ITERATION 4 node 67 job 0 rank 11 time 27800568.269665
-ITERATION 4 node 5 job 0 rank 23 time 27800568.290579
-ITERATION 4 node 52 job 0 rank 32 time 27800568.291383
-ITERATION 4 node 23 job 0 rank 26 time 27800568.764657
-ITERATION 4 node 25 job 0 rank 35 time 27800587.792281
-ITERATION 4 node 2 job 0 rank 3 time 27803265.147279
-ITERATION 4 node 21 job 0 rank 13 time 27803865.175933
-ITERATION 4 node 7 job 0 rank 28 time 27803865.393650
-ITERATION 4 node 71 job 0 rank 16 time 27803865.598713
-ITERATION 4 node 13 job 0 rank 34 time 27803865.779107
-ITERATION 4 node 1 job 0 rank 19 time 27803865.949695
-ITERATION 4 node 62 job 0 rank 31 time 27804693.839746
-ITERATION 4 node 69 job 0 rank 10 time 27804694.100148
-ITERATION 4 node 32 job 0 rank 22 time 27804694.147620
-ITERATION 4 node 38 job 0 rank 2 time 27806562.454709
-ITERATION 4 node 24 job 0 rank 1 time 27807391.000446
-ITERATION 4 node 48 job 0 rank 12 time 27807990.729141
-ITERATION 4 node 6 job 0 rank 18 time 27807991.114598
-ITERATION 4 node 45 job 0 rank 15 time 27807991.147076
-ITERATION 4 node 70 job 0 rank 7 time 27808219.847694
-ITERATION 4 node 15 job 0 rank 30 time 27808819.476032
-ITERATION 4 node 8 job 0 rank 14 time 27811203.560416
-ITERATION 4 node 12 job 0 rank 0 time 27811516.336037
-ITERATION 4 node 3 job 0 rank 6 time 27812345.183284
-ITERATION 4 node 18 job 0 rank 5 time 27812345.583060
-ITERATION 4 node 65 job 0 rank 4 time 27816470.918651
diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/avg-all-reduce-time b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/avg-all-reduce-time
deleted file mode 100644
index e69de29b..00000000
diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-cn-stats b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-cn-stats
deleted file mode 100644
index b06aa026..00000000
--- a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-cn-stats
+++ /dev/null
@@ -1,73 +0,0 @@
-# Format <LP id> <Terminal ID> <Total Data Sent> <Total Data Received> <Avg packet latency> <Max packet Latency> <Min packet Latency> <# Packets finished> <Avg Hops> <Avg Busy Time (over rails)>
-2 0 39813200 39813200 36107.138499 461417.952062 3297.307430 9770 4.376561 17587784.331581
-3 1 30000200 30000200 39284.494061 434701.820268 2379.827009 7387 3.682144 18356179.307331
-7 2 30000240 30000240 62318.782725 640866.354244 2379.827009 7392 4.470373 19113473.070391
-8 3 30000040 30000040 40925.980015 515505.949898 3291.464725 7367 3.945975 17940780.437224
-12 4 39813520 39813520 34994.103721 469254.313408 3297.307430 9810 4.381040 16536392.029545
-13 5 30000200 30000200 34635.404728 544989.362677 2379.827009 7387 4.055909 14938820.108085
-17 6 30000200 30000200 44767.801473 576653.794469 2384.084484 7387 3.656288 18814126.770707
-18 7 30000200 30000200 51387.382471 546505.844848 3291.464725 7387 4.206444 22205724.648773
-22 8 30000200 30000200 56515.671426 456563.833012 2379.827009 7387 4.338568 15243839.322545
-23 9 30000200 30000200 62747.573596 732304.908878 2464.855722 7387 4.501963 22518624.337887
-27 10 39813600 39813600 38206.513802 595234.314223 3297.307430 9820 4.071996 15912055.432753
-28 11 30000200 30000200 57688.459239 460987.598043 3291.464725 7387 4.624882 21982147.565134
-32 12 30000040 30000040 48120.746995 460788.732751 1553.218006 7367 4.426904 21978848.089413
-33 13 30000200 30000200 48728.737706 528097.477557 2379.827009 7387 4.224042 17300716.492549
-37 14 39813520 39813520 42482.617734 504077.082789 3297.307430 9810 4.500408 16254727.926633
-38 15 30000200 30000200 55858.592637 544988.314605 2379.827009 7387 3.945986 10437047.462660
-42 16 39813520 39813520 42849.647075 408832.282689 2469.279268 9810 4.165036 15447231.594722
-43 17 39813520 39813520 38338.085471 341206.976962 2384.084484 9810 4.015087 16773305.735538
-47 18 30000240 30000240 42373.430620 629220.891776 3291.464725 7392 4.535173 15995328.015708
-48 19 39813520 39813520 40423.608697 315897.493880 2384.084484 9810 4.176860 14328352.862862
-52 20 39813600 39813600 43520.623476 541086.894327 3297.307430 9820 4.555601 13780387.971014
-53 21 30000200 30000200 44476.828958 349439.122320 2379.827009 7387 4.475430 18712483.166522
-57 22 39813520 39813520 46535.756921 308890.720074 3297.307430 9810 4.331906 12926592.020511
-58 23 30000200 30000200 33511.399102 322937.974241 2469.279268 7387 3.786517 18523348.301746
-62 24 30000240 30000240 36774.377417 461518.474685 1553.218006 7392 4.146239 19158269.234675
-63 25 30000200 30000200 41059.457855 278051.087009 2379.827009 7387 4.351157 19270573.497551
-67 26 30000200 30000200 40614.046110 381768.605366 2379.827009 7387 4.125220 19112482.493963
-68 27 39813520 39813520 41995.882873 371667.257947 2469.279268 9810 4.376962 14646355.081486
-72 28 30000200 30000200 31321.605985 365266.392194 2379.827009 7387 4.039123 19400082.478809
-73 29 39813520 39813520 32959.171255 451587.798712 3297.307430 9810 4.179613 16095202.695306
-77 30 39813520 39813520 37427.649193 351365.395737 2469.279268 9810 4.407339 15586679.540493
-78 31 39813200 39813200 41862.918330 387855.048741 3297.307430 9770 4.525077 16586112.004836
-82 32 30000200 30000200 45060.956994 427249.367222 3291.464725 7387 4.514146 19189374.516511
-83 33 39813520 39813520 49395.064664 469382.320131 2384.084484 9810 4.012232 17850768.194652
-87 34 39813520 39813520 44079.438999 342285.995523 2469.279268 9810 4.520387 15144572.054951
-88 35 39813520 39813520 38971.920099 300516.633956 3297.307430 9810 4.351682 16091038.039457
-92 36 39813520 39813520 39426.347282 309758.764907 2643.131510 9810 4.074822 15678132.783902
-93 37 39813520 39813520 34995.061880 346958.053999 2384.084484 9810 3.428746 16876971.125963
-97 38 30000040 30000040 35905.285204 495198.570009 2379.827009 7367 3.970273 22512609.412966
-98 39 30000200 30000200 42447.183602 377731.969067 2469.279268 7387 4.671179 22714730.942416
-102 40 39813600 39813600 34687.355890 335707.416147 1556.056322 9820 3.577800 14259163.848822
-103 41 39813200 39813200 38175.362430 382249.538761 1556.056322 9770 3.537769 13573738.064970
-107 42 39813600 39813600 44532.014760 369370.892239 3297.307430 9820 4.442770 15313358.299072
-108 43 39813520 39813520 38517.783131 536152.927723 2384.084484 9810 3.930887 16854266.067963
-112 44 39813520 39813520 48072.153707 551643.077958 3297.307430 9810 4.406932 13784797.518310
-113 45 30000200 30000200 38361.576186 337612.463862 3291.464725 7387 4.412481 22709598.612240
-117 46 30000200 30000200 47063.934511 323522.751165 3291.464725 7387 4.315690 15178598.555845
-118 47 39813520 39813520 32468.101294 470956.241085 2384.084484 9810 3.637717 16103495.317657
-122 48 30000200 30000200 34763.145246 355505.175562 2379.827009 7387 3.586165 14099343.817718
-123 49 30000200 30000200 32178.416645 415926.699100 1553.218006 7387 3.282523 13642975.324074
-127 50 39813520 39813520 36042.175157 333399.692314 2384.084484 9810 3.894903 15924037.373174
-128 51 39813520 39813520 47426.976684 384894.236551 3297.307430 9810 4.524771 14800606.559240
-132 52 30000200 30000200 50648.202686 683306.228547 3291.464725 7387 4.051712 18051629.450587
-133 53 39813520 39813520 43688.807523 397577.483030 2384.084484 9810 4.367992 15294739.661280
-137 54 39813520 39813520 41274.721778 410620.945162 2384.084484 9810 3.614985 15348607.758912
-138 55 30000200 30000200 47460.273307 393172.959339 3291.464725 7387 4.455665 22327681.850808
-142 56 39813520 39813520 30353.003214 423224.921981 2384.084484 9810 3.627217 14875339.838529
-143 57 39813200 39813200 41978.905805 362220.406331 3297.307430 9770 4.289662 13439725.026830
-147 58 39813520 39813520 32600.221245 394656.517832 1556.056322 9810 3.366769 17388431.077442
-148 59 39813520 39813520 34844.964516 437962.421017 1556.056322 9810 3.642712 15275599.714230
-152 60 39813520 39813520 38828.762813 380963.287056 2469.279268 9810 4.269521 14057550.559304
-153 61 30000200 30000200 38425.563290 437052.170191 3291.464725 7387 4.298227 18690662.547655
-157 62 30000200 30000200 37163.192817 486901.376592 3291.464725 7387 4.302423 22529024.421436
-158 63 39813520 39813520 32177.129431 313859.556601 2384.084484 9810 4.102345 16088278.649605
-162 64 39813520 39813520 36307.283720 473982.706414 2384.084484 9810 3.756575 13805423.148064
-163 65 30000040 30000040 39266.258111 541660.942240 3291.464725 7367 3.542012 22415755.472424
-167 66 39813520 39813520 35784.342668 486343.405857 2384.084484 9810 3.758104 17791245.347997
-168 67 30000200 30000200 35113.840119 558234.396436 2379.827009 7387 3.712739 15175940.132678
-172 68 30000200 30000200 46259.577398 529697.867518 3291.464725 7387 4.381752 11926841.048741
-173 69 30000200 30000200 41583.798735 474531.461375 2384.084484 7387 4.093001 18609998.026356
-177 70 30000240 30000240 30915.717460 551227.348696 2379.827009 7392 3.543561 18582548.588579
-178 71 30000200 30000200 36220.251632 387764.380366 2464.855722 7387 3.623392 14445475.496982
diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-link-stats b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-link-stats
deleted file mode 100644
index 7864d91b..00000000
--- a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-link-stats
+++ /dev/null
@@ -1,326 +0,0 @@
-# Format <source_id> <source_type> <dest_id> < dest_type>  <link_type> <link_traffic> <link_saturation> <stalled_chunks>
-
-0 T 0 R CN 40017920 17587784.331581 8837
-1 T 0 R CN 30257152 18356179.307331 6359
-0 R 1 R L 43550704 4874052.951440 4664
-0 R 2 R L 45137320 4738075.959451 5558
-0 R 3 R L 47388976 5040625.986632 6123
-0 R 7 R G 47388976 5040625.986632 6123
-0 R 11 R G 41983848 4420584.424223 4472
-0 R 0 T CN 39813200 1311579.280498 3696
-0 R 1 T CN 30049352 627821.545675 1868
-2 T 1 R CN 30277632 19113473.070391 6755
-3 T 1 R CN 30175232 17940780.437224 6736
-1 R 0 R L 45287672 4986264.334545 5638
-1 R 2 R L 41987648 4053798.568995 5472
-1 R 3 R L 44837432 4310280.411427 4833
-1 R 14 R G 44837432 4310280.411427 4833
-1 R 18 R G 40681472 5840547.653511 5126
-1 R 2 T CN 30049392 1001716.940524 3112
-1 R 3 T CN 30049192 766538.802992 2606
-4 T 2 R CN 40181760 16536392.029545 8204
-5 T 2 R CN 30257152 14938820.108085 6439
-2 R 0 R L 45136984 3987872.001758 3633
-2 R 1 R L 33825320 5132507.548055 3802
-2 R 3 R L 48316064 5229299.015873 6004
-2 R 21 R G 48316064 5229299.015873 6004
-2 R 25 R G 38686264 4817680.782145 4226
-2 R 4 T CN 39813520 1501521.379594 4519
-2 R 5 T CN 30049352 556600.125602 1835
-6 T 3 R CN 30257152 18814126.770707 6964
-7 T 3 R CN 30257152 22205724.648773 6984
-3 R 0 R L 45473184 4549998.796036 5237
-3 R 1 R L 38859592 4033811.151346 4587
-3 R 2 R L 36569560 4293046.016019 3998
-3 R 28 R G 47455304 5109010.661862 5571
-3 R 32 R G 48449992 6234288.585361 7036
-3 R 6 T CN 30049352 821409.664124 2409
-3 R 7 T CN 30049352 969930.916156 3276
-8 T 4 R CN 30257152 15243839.322545 6972
-9 T 4 R CN 30257152 22518624.337887 6513
-4 R 5 R L 36860984 4721092.963003 4346
-4 R 6 R L 32135288 3583080.888814 3568
-4 R 7 R L 58009616 6102408.455236 8020
-4 R 11 R G 58009616 6102408.455236 8020
-4 R 15 R G 52375248 5544874.795086 7249
-4 R 8 T CN 30049352 1128444.125886 3975
-4 R 9 T CN 30049352 932709.088273 3051
-10 T 5 R CN 40222720 15912055.432753 8427
-11 T 5 R CN 30257152 21982147.565134 6710
-5 R 4 R L 53993696 6136982.915861 7661
-5 R 6 R L 42998872 5104126.663520 4877
-5 R 7 R L 46448488 5646843.629053 6270
-5 R 18 R G 46448488 5646843.629053 6270
-5 R 22 R G 51448536 5797029.042031 6137
-5 R 10 T CN 39813600 1349237.793212 4270
-5 R 11 T CN 30049352 1248376.533293 3652
-12 T 6 R CN 30175232 21978848.089413 7147
-13 T 6 R CN 30257152 17300716.492549 6711
-6 R 4 R L 40487440 5812805.144307 5433
-6 R 5 R L 36769744 5264234.631795 4172
-6 R 7 R L 36385608 4752852.854935 3298
-6 R 25 R G 36385608 4752852.854935 3298
-6 R 29 R G 40053384 5005853.637480 4483
-6 R 12 T CN 30049192 903354.038109 3055
-6 R 13 T CN 30049352 712938.420808 2057
-14 T 7 R CN 40181760 16254727.926633 8254
-15 T 7 R CN 30257152 10437047.462660 6653
-7 R 4 R L 34675992 4691030.184385 3995
-7 R 5 R L 43513016 4301021.299787 4870
-7 R 6 R L 35973648 3934225.668027 3041
-7 R 0 R G 42749416 4505780.268095 5345
-7 R 32 R G 38144576 4584423.547847 5166
-7 R 14 T CN 39813520 1387873.491301 4062
-7 R 15 T CN 30049352 1037738.118919 3344
-16 T 8 R CN 40181760 15447231.594722 8402
-17 T 8 R CN 40181760 16773305.735538 9077
-8 R 9 R L 37801352 5059668.991064 4600
-8 R 10 R L 37946560 6152744.148682 4069
-8 R 11 R L 55297048 5864020.845953 7606
-8 R 15 R G 55297048 5864020.845953 7606
-8 R 19 R G 45039696 4606939.448035 4488
-8 R 16 T CN 39813520 1003951.885126 3174
-8 R 17 T CN 39813520 1200737.527034 3904
-18 T 9 R CN 30277632 15995328.015708 7099
-19 T 9 R CN 40181760 14328352.862862 8584
-9 R 8 R L 49385696 6808787.447933 6557
-9 R 10 R L 33253400 5479264.726710 3535
-9 R 11 R L 52364520 6489474.843136 7766
-9 R 22 R G 52364520 6489474.843136 7766
-9 R 26 R G 49267624 6524810.669739 6733
-9 R 18 T CN 30049392 972448.000381 3110
-9 R 19 T CN 39813520 963301.128882 3233
-20 T 10 R CN 40222720 13780387.971014 8866
-21 T 10 R CN 30257152 18712483.166522 6914
-10 R 8 R L 38678080 5240244.555583 4041
-10 R 9 R L 49890448 5799177.766866 6788
-10 R 11 R L 48753776 5150370.905425 6152
-10 R 29 R G 48753776 5150370.905425 6152
-10 R 33 R G 41425648 6389805.183540 5013
-10 R 20 T CN 39813600 1377361.309928 4429
-10 R 21 T CN 30049352 1175058.415210 3308
-22 T 11 R CN 40181760 12926592.020511 8104
-23 T 11 R CN 30257152 18523348.301746 6773
-11 R 8 R L 40560208 4846836.691540 3797
-11 R 9 R L 34124224 4763859.453014 3292
-11 R 10 R L 38605016 5277021.621061 4322
-11 R 0 R G 40478280 5086777.914766 4111
-11 R 4 R G 49943824 5905101.806403 6563
-11 R 22 T CN 39813520 1424222.517660 4265
-11 R 23 T CN 30049352 1028261.496375 3530
-24 T 12 R CN 30277632 19158269.234675 7090
-25 T 12 R CN 30257152 19270573.497551 7116
-12 R 13 R L 32047456 3750265.764293 2849
-12 R 14 R L 50178184 7309130.184921 7564
-12 R 15 R L 58034400 5843698.462804 8436
-12 R 19 R G 58034400 5843698.462804 8436
-12 R 23 R G 44575160 6092245.688980 5725
-12 R 24 T CN 30049392 806566.838442 2811
-12 R 25 T CN 30049352 1213202.131881 3583
-26 T 13 R CN 30257152 19112482.493963 7014
-27 T 13 R CN 40181760 14646355.081486 8988
-13 R 12 R L 44527192 6423030.861185 6482
-13 R 14 R L 46636984 5807093.011567 6138
-13 R 15 R L 46974000 6554687.676421 5454
-13 R 26 R G 46974000 6554687.676421 5454
-13 R 30 R G 52447448 4836595.765817 6980
-13 R 26 T CN 30049352 1125329.914129 3288
-13 R 27 T CN 39813520 1512104.949816 4844
-28 T 14 R CN 30257152 19400082.478809 7092
-29 T 14 R CN 40181760 16095202.695306 9170
-14 R 12 R L 34606080 5950705.752400 4000
-14 R 13 R L 43123232 5680350.634564 5580
-14 R 15 R L 38261328 4981625.686206 3350
-14 R 1 R G 45992208 4930271.366009 5573
-14 R 33 R G 38261328 4981625.686206 3350
-14 R 28 T CN 30049352 637814.555154 2221
-14 R 29 T CN 39813520 1249106.103622 3833
-30 T 15 R CN 40181760 15586679.540493 9411
-31 T 15 R CN 40017920 16586112.004836 9223
-15 R 12 R L 51077496 6550042.514575 7713
-15 R 13 R L 47073208 4832118.412440 5895
-15 R 14 R L 45116952 4837185.355505 4833
-15 R 4 R G 52746688 6709131.321456 8350
-15 R 8 R G 51692976 6953577.442845 7000
-15 R 30 T CN 39813520 1283130.365744 4192
-15 R 31 T CN 39813200 1457038.449892 4765
-32 T 16 R CN 30257152 19189374.516511 7113
-33 T 16 R CN 40181760 17850768.194652 7596
-16 R 17 R L 54025952 5905546.258479 7114
-16 R 18 R L 34160632 4064251.783900 3198
-16 R 19 R L 45741616 6374696.422823 6186
-16 R 23 R G 45741616 6374696.422823 6186
-16 R 27 R G 41321480 5702481.030882 5627
-16 R 32 T CN 30049352 1532449.420789 4360
-16 R 33 T CN 39813520 1199169.584479 3474
-34 T 17 R CN 40181760 15144572.054951 9081
-35 T 17 R CN 40181760 16091038.039457 9000
-17 R 16 R L 46232920 5971728.548924 6388
-17 R 18 R L 44728608 6438892.891589 6745
-17 R 19 R L 51983616 5427979.947518 6365
-17 R 30 R G 51983616 5427979.947518 6365
-17 R 34 R G 44317208 5188154.376478 5087
-17 R 34 T CN 39813520 1485890.868749 4665
-17 R 35 T CN 39813520 1130833.183301 3732
-36 T 18 R CN 40181760 15678132.783902 8490
-37 T 18 R CN 40181760 16876971.125963 8122
-18 R 16 R L 46143600 5514466.847122 6984
-18 R 17 R L 42394152 3865669.619331 4840
-18 R 19 R L 45794064 5340218.948968 5313
-18 R 1 R G 45794064 5340218.948968 5313
-18 R 5 R G 49523608 5463627.902051 6131
-18 R 36 T CN 39813520 852504.013955 2683
-18 R 37 T CN 39813520 805790.300623 2486
-38 T 19 R CN 30175232 22512609.412966 6847
-39 T 19 R CN 30257152 22714730.942416 7121
-19 R 16 R L 34957960 5526707.603444 4004
-19 R 17 R L 47349408 6034758.983252 6333
-19 R 18 R L 46919416 5608445.885587 6075
-19 R 8 R G 44365616 5296334.999713 4604
-19 R 12 R G 56495544 6082973.442639 8366
-19 R 38 T CN 30049192 1092387.377044 3270
-19 R 39 T CN 30049352 1068280.894725 3140
-40 T 20 R CN 40222720 14259163.848822 8729
-41 T 20 R CN 40017920 13573738.064970 8591
-20 R 21 R L 39728944 4697512.855911 5353
-20 R 22 R L 41903320 5045588.986343 5348
-20 R 23 R L 52146088 7359369.800037 7983
-20 R 27 R G 52146088 7359369.800037 7983
-20 R 31 R G 47184128 5131759.262767 5973
-20 R 40 T CN 39813600 883030.699000 2892
-20 R 41 T CN 39813200 784350.806184 2677
-42 T 21 R CN 40222720 15313358.299072 9214
-43 T 21 R CN 40181760 16854266.067963 8718
-21 R 20 R L 39164632 5944643.875686 5577
-21 R 22 R L 38028176 4427084.605373 4537
-21 R 23 R L 34169112 4269694.137535 2856
-21 R 2 R G 48270464 5223801.490295 5763
-21 R 34 R G 34169112 4269694.137535 2856
-21 R 42 T CN 39813600 1364767.638329 4459
-21 R 43 T CN 39813520 857971.397901 2662
-44 T 22 R CN 40181760 13784797.518310 8666
-45 T 22 R CN 30257152 22709598.612240 6328
-22 R 20 R L 41212736 5760616.725519 4330
-22 R 21 R L 44899648 5571184.216750 5490
-22 R 23 R L 48191408 6286261.674063 6674
-22 R 5 R G 48191408 6286261.674063 6674
-22 R 9 R G 51169760 5237963.175649 7226
-22 R 44 T CN 39813520 1542534.243019 4936
-22 R 45 T CN 30049352 1265047.999267 3871
-46 T 23 R CN 30257152 15178598.555845 6169
-47 T 23 R CN 40181760 16103495.317657 8422
-23 R 20 R L 40333504 6603454.297476 5211
-23 R 21 R L 36006888 4843963.251430 3295
-23 R 22 R L 40413664 5142267.725370 4994
-23 R 12 R G 45082392 5304702.183129 5115
-23 R 16 R G 50801776 5131874.483025 7138
-23 R 46 T CN 30049352 1382693.916307 4027
-23 R 47 T CN 39813520 1038551.205006 3128
-48 T 24 R CN 30257152 14099343.817718 6106
-49 T 24 R CN 30257152 13642975.324074 6045
-24 R 25 R L 39016768 3857944.974915 4288
-24 R 26 R L 34965464 5003889.506839 4262
-24 R 27 R L 47190024 5454167.775527 6871
-24 R 31 R G 47190024 5454167.775527 6871
-24 R 35 R G 41646064 5992850.984984 5408
-24 R 48 T CN 30049352 667188.968666 2499
-24 R 49 T CN 30049352 881431.111138 2993
-50 T 25 R CN 40181760 15924037.373174 9323
-51 T 25 R CN 40181760 14800606.559240 8762
-25 R 24 R L 35274088 5828284.233935 3900
-25 R 26 R L 51652152 5817462.082088 7529
-25 R 27 R L 39214576 4326894.031401 4324
-25 R 2 R G 39214576 4326894.031401 4324
-25 R 6 R G 35298432 4524630.058634 3493
-25 R 50 T CN 39813520 966485.561719 2871
-25 R 51 T CN 39813520 1404936.336504 4156
-52 T 26 R CN 30257152 18051629.450587 6976
-53 T 26 R CN 40181760 15294739.661280 9009
-26 R 24 R L 38439736 6450489.640662 5018
-26 R 25 R L 43021968 4912180.431768 4539
-26 R 27 R L 49255576 5506754.761772 6905
-26 R 9 R G 49255576 5506754.761772 6905
-26 R 13 R G 46517152 4931759.758126 5515
-26 R 52 T CN 30049352 1089113.603708 3137
-26 R 53 T CN 39813520 1068108.929478 3544
-54 T 27 R CN 40181760 15348607.758912 8357
-55 T 27 R CN 30257152 22327681.850808 6890
-27 R 24 R L 31797088 5550718.157644 3272
-27 R 25 R L 52961704 5033719.493260 5860
-27 R 26 R L 40313048 4772700.110949 4746
-27 R 16 R G 46071704 4936679.422089 6250
-27 R 20 R G 51076808 5847799.836050 6668
-27 R 54 T CN 39813520 1055478.134521 3643
-27 R 55 T CN 30049352 1389171.083011 3761
-56 T 28 R CN 40181760 14875339.838529 8758
-57 T 28 R CN 40017920 13439725.026830 8197
-28 R 29 R L 36407792 4562925.391117 4531
-28 R 30 R L 54929192 5839356.894003 8374
-28 R 31 R L 47104064 5457596.779440 5298
-28 R 3 R G 44037240 3666217.594051 4157
-28 R 35 R G 47104064 5457596.779440 5298
-28 R 56 T CN 39813520 589240.204071 1810
-28 R 57 T CN 39813200 1253920.506312 4022
-58 T 29 R CN 40181760 17388431.077442 8594
-59 T 29 R CN 40181760 15275599.714230 8708
-29 R 28 R L 36743304 4747217.925107 3404
-29 R 30 R L 39226976 4588611.200791 4500
-29 R 31 R L 43193824 5690009.346772 4949
-29 R 6 R G 43193824 5690009.346772 4949
-29 R 10 R G 46395064 5499095.428036 5989
-29 R 58 T CN 39813520 813051.646961 2322
-29 R 59 T CN 39813520 916003.412645 3024
-60 T 30 R CN 40181760 14057550.559304 8373
-61 T 30 R CN 30257152 18690662.547655 6874
-30 R 28 R L 37206752 5500248.346157 3199
-30 R 29 R L 46057680 5746289.642082 5828
-30 R 31 R L 51234608 5090602.876008 6564
-30 R 13 R G 51234608 5090602.876008 6564
-30 R 17 R G 50215328 5459724.442375 7315
-30 R 60 T CN 39813520 1135713.228615 3800
-30 R 61 T CN 30049352 1610036.399930 4445
-62 T 31 R CN 30257152 22529024.421436 6870
-63 T 31 R CN 40181760 16088278.649605 8550
-31 R 28 R L 54027816 4799689.111254 6056
-31 R 29 R L 38985584 5655226.937628 4649
-31 R 30 R L 47371424 4792407.927307 5839
-31 R 20 R G 46661320 5892242.729819 5976
-31 R 24 R G 49969336 6454067.586342 6779
-31 R 62 T CN 30049352 1152784.313173 3566
-31 R 63 T CN 39813520 803243.110496 2380
-64 T 32 R CN 40181760 13805423.148064 6702
-65 T 32 R CN 30175232 22415755.472424 6932
-32 R 33 R L 39495072 4060223.754083 4027
-32 R 34 R L 38318680 3832552.194477 3756
-32 R 35 R L 51263104 5519808.640647 7544
-32 R 3 R G 51263104 5519808.640647 7544
-32 R 7 R G 39328112 5219095.840084 5052
-32 R 64 T CN 39813520 1210251.667998 3458
-32 R 65 T CN 30049192 630422.140586 2292
-66 T 33 R CN 40181760 17791245.347997 6309
-67 T 33 R CN 30257152 15175940.132678 6274
-33 R 32 R L 55929688 5194979.684028 5947
-33 R 34 R L 32785920 4097967.225898 2747
-33 R 35 R L 39697960 5994401.587308 5430
-33 R 10 R G 39697960 5994401.587308 5430
-33 R 14 R G 38884784 4379182.216837 4004
-33 R 66 T CN 39813520 1107798.113758 3239
-33 R 67 T CN 30049352 811674.087607 2529
-68 T 34 R CN 30257152 11926841.048741 6879
-69 T 34 R CN 30257152 18609998.026356 6837
-34 R 32 R L 39777456 5046916.308178 5315
-34 R 33 R L 34270784 5365204.443651 4172
-34 R 35 R L 42249888 4560827.376285 5055
-34 R 17 R G 42249888 4560827.376285 5055
-34 R 21 R G 36831104 4824175.442723 3634
-34 R 68 T CN 30049352 1078687.231229 3249
-34 R 69 T CN 30049352 739703.645466 2337
-70 T 35 R CN 30277632 18582548.588579 6780
-71 T 35 R CN 30257152 14445475.496982 6863
-35 R 32 R L 39508896 5032306.409923 6202
-35 R 33 R L 34469632 5572253.621625 4066
-35 R 34 R L 33126928 4824327.757457 3462
-35 R 24 R G 41519936 4246448.961771 4535
-35 R 28 R G 43743104 4729620.821349 5151
-35 R 70 T CN 30049392 447139.490875 1640
-35 R 71 T CN 30049352 1020380.529942 3401
\ No newline at end of file
diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-all b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-all
deleted file mode 100644
index b2df970f..00000000
--- a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-all
+++ /dev/null
@@ -1,72 +0,0 @@
-lp:2	send_count:9770	send_bytes:39813200	send_time:7583466.666666	recv_count:9770	recv_bytes:39813200	recv_time:352766743.133932	max_event_size:792
-lp:3	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:290194557.627888	max_event_size:792
-lp:7	send_count:7392	send_bytes:30000240	send_time:5714331.428571	recv_count:7392	recv_bytes:30000240	recv_time:460660441.899865	max_event_size:792
-lp:8	send_count:7367	send_bytes:30000040	send_time:5714293.333333	recv_count:7367	recv_bytes:30000040	recv_time:301501694.772019	max_event_size:792
-lp:12	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:343292157.501219	max_event_size:792
-lp:13	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:255851734.723889	max_event_size:792
-lp:17	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:330699749.483186	max_event_size:792
-lp:18	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:379598594.313395	max_event_size:792
-lp:22	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:417481264.821944	max_event_size:792
-lp:23	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:463516326.154559	max_event_size:792
-lp:27	send_count:9820	send_bytes:39813600	send_time:7583542.857142	recv_count:9820	recv_bytes:39813600	recv_time:375187965.536660	max_event_size:792
-lp:28	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:426144648.397676	max_event_size:792
-lp:32	send_count:7367	send_bytes:30000040	send_time:5714293.333333	recv_count:7367	recv_bytes:30000040	recv_time:354505543.112376	max_event_size:792
-lp:33	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:359959185.432986	max_event_size:792
-lp:37	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:416754479.974724	max_event_size:792
-lp:38	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:412627423.811912	max_event_size:792
-lp:42	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:420355037.806001	max_event_size:792
-lp:43	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:376096618.471747	max_event_size:792
-lp:47	send_count:7392	send_bytes:30000240	send_time:5714331.428571	recv_count:7392	recv_bytes:30000240	recv_time:313224399.141498	max_event_size:792
-lp:48	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:396555601.315165	max_event_size:792
-lp:52	send_count:9820	send_bytes:39813600	send_time:7583542.857142	recv_count:9820	recv_bytes:39813600	recv_time:427372522.534280	max_event_size:792
-lp:53	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:328550335.514686	max_event_size:792
-lp:57	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:456515775.394763	max_event_size:792
-lp:58	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:247548705.168367	max_event_size:792
-lp:62	send_count:7392	send_bytes:30000240	send_time:5714331.428571	recv_count:7392	recv_bytes:30000240	recv_time:271836197.869304	max_event_size:792
-lp:63	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:303306215.174348	max_event_size:792
-lp:67	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:300015958.614822	max_event_size:792
-lp:68	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:411979610.984809	max_event_size:792
-lp:72	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:231372703.409270	max_event_size:792
-lp:73	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:323329470.014433	max_event_size:792
-lp:77	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:367165238.584887	max_event_size:792
-lp:78	send_count:9770	send_bytes:39813200	send_time:7583466.666666	recv_count:9770	recv_bytes:39813200	recv_time:409000712.088329	max_event_size:792
-lp:82	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:332865289.311507	max_event_size:792
-lp:83	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:484565584.355521	max_event_size:792
-lp:87	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:432419296.580878	max_event_size:792
-lp:88	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:382314536.175355	max_event_size:792
-lp:92	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:386772466.832815	max_event_size:792
-lp:93	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:343301557.045121	max_event_size:792
-lp:97	send_count:7367	send_bytes:30000040	send_time:5714293.333333	recv_count:7367	recv_bytes:30000040	recv_time:264514236.096660	max_event_size:792
-lp:98	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:313557345.269660	max_event_size:792
-lp:102	send_count:9820	send_bytes:39813600	send_time:7583542.857142	recv_count:9820	recv_bytes:39813600	recv_time:340629834.844444	max_event_size:792
-lp:103	send_count:9770	send_bytes:39813200	send_time:7583466.666666	recv_count:9770	recv_bytes:39813200	recv_time:372973290.936664	max_event_size:792
-lp:107	send_count:9820	send_bytes:39813600	send_time:7583542.857142	recv_count:9820	recv_bytes:39813600	recv_time:437304384.944908	max_event_size:792
-lp:108	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:377859452.518662	max_event_size:792
-lp:112	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:471587827.861975	max_event_size:792
-lp:113	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:283376963.287888	max_event_size:792
-lp:117	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:347661284.230018	max_event_size:792
-lp:118	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:318512073.696441	max_event_size:792
-lp:122	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:256795353.934300	max_event_size:792
-lp:123	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:237701963.756680	max_event_size:792
-lp:127	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:353573738.286471	max_event_size:792
-lp:128	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:465258641.273453	max_event_size:792
-lp:132	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:374138273.240924	max_event_size:792
-lp:133	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:428587201.804590	max_event_size:792
-lp:137	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:404905020.644931	max_event_size:792
-lp:138	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:350589038.917737	max_event_size:792
-lp:142	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:297762961.526255	max_event_size:792
-lp:143	send_count:9770	send_bytes:39813200	send_time:7583466.666666	recv_count:9770	recv_bytes:39813200	recv_time:410133909.716090	max_event_size:792
-lp:147	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:319808170.413551	max_event_size:792
-lp:148	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:341829101.902517	max_event_size:792
-lp:152	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:380910163.193983	max_event_size:792
-lp:153	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:283849636.026727	max_event_size:792
-lp:157	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:274524505.335948	max_event_size:792
-lp:158	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:315657639.722941	max_event_size:792
-lp:162	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:356174453.289787	max_event_size:792
-lp:163	send_count:7367	send_bytes:30000040	send_time:5714293.333333	recv_count:7367	recv_bytes:30000040	recv_time:289274523.506280	max_event_size:792
-lp:167	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:351044401.571382	max_event_size:792
-lp:168	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:259385936.957835	max_event_size:792
-lp:172	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:341719498.237854	max_event_size:792
-lp:173	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:307179521.253780	max_event_size:792
-lp:177	send_count:7392	send_bytes:30000240	send_time:5714331.428571	recv_count:7392	recv_bytes:30000240	recv_time:228528983.462069	max_event_size:792
-lp:178	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:267558998.807704	max_event_size:792
diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-high b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-high
deleted file mode 100644
index b2df970f..00000000
--- a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-high
+++ /dev/null
@@ -1,72 +0,0 @@
-lp:2	send_count:9770	send_bytes:39813200	send_time:7583466.666666	recv_count:9770	recv_bytes:39813200	recv_time:352766743.133932	max_event_size:792
-lp:3	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:290194557.627888	max_event_size:792
-lp:7	send_count:7392	send_bytes:30000240	send_time:5714331.428571	recv_count:7392	recv_bytes:30000240	recv_time:460660441.899865	max_event_size:792
-lp:8	send_count:7367	send_bytes:30000040	send_time:5714293.333333	recv_count:7367	recv_bytes:30000040	recv_time:301501694.772019	max_event_size:792
-lp:12	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:343292157.501219	max_event_size:792
-lp:13	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:255851734.723889	max_event_size:792
-lp:17	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:330699749.483186	max_event_size:792
-lp:18	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:379598594.313395	max_event_size:792
-lp:22	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:417481264.821944	max_event_size:792
-lp:23	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:463516326.154559	max_event_size:792
-lp:27	send_count:9820	send_bytes:39813600	send_time:7583542.857142	recv_count:9820	recv_bytes:39813600	recv_time:375187965.536660	max_event_size:792
-lp:28	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:426144648.397676	max_event_size:792
-lp:32	send_count:7367	send_bytes:30000040	send_time:5714293.333333	recv_count:7367	recv_bytes:30000040	recv_time:354505543.112376	max_event_size:792
-lp:33	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:359959185.432986	max_event_size:792
-lp:37	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:416754479.974724	max_event_size:792
-lp:38	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:412627423.811912	max_event_size:792
-lp:42	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:420355037.806001	max_event_size:792
-lp:43	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:376096618.471747	max_event_size:792
-lp:47	send_count:7392	send_bytes:30000240	send_time:5714331.428571	recv_count:7392	recv_bytes:30000240	recv_time:313224399.141498	max_event_size:792
-lp:48	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:396555601.315165	max_event_size:792
-lp:52	send_count:9820	send_bytes:39813600	send_time:7583542.857142	recv_count:9820	recv_bytes:39813600	recv_time:427372522.534280	max_event_size:792
-lp:53	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:328550335.514686	max_event_size:792
-lp:57	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:456515775.394763	max_event_size:792
-lp:58	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:247548705.168367	max_event_size:792
-lp:62	send_count:7392	send_bytes:30000240	send_time:5714331.428571	recv_count:7392	recv_bytes:30000240	recv_time:271836197.869304	max_event_size:792
-lp:63	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:303306215.174348	max_event_size:792
-lp:67	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:300015958.614822	max_event_size:792
-lp:68	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:411979610.984809	max_event_size:792
-lp:72	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:231372703.409270	max_event_size:792
-lp:73	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:323329470.014433	max_event_size:792
-lp:77	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:367165238.584887	max_event_size:792
-lp:78	send_count:9770	send_bytes:39813200	send_time:7583466.666666	recv_count:9770	recv_bytes:39813200	recv_time:409000712.088329	max_event_size:792
-lp:82	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:332865289.311507	max_event_size:792
-lp:83	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:484565584.355521	max_event_size:792
-lp:87	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:432419296.580878	max_event_size:792
-lp:88	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:382314536.175355	max_event_size:792
-lp:92	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:386772466.832815	max_event_size:792
-lp:93	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:343301557.045121	max_event_size:792
-lp:97	send_count:7367	send_bytes:30000040	send_time:5714293.333333	recv_count:7367	recv_bytes:30000040	recv_time:264514236.096660	max_event_size:792
-lp:98	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:313557345.269660	max_event_size:792
-lp:102	send_count:9820	send_bytes:39813600	send_time:7583542.857142	recv_count:9820	recv_bytes:39813600	recv_time:340629834.844444	max_event_size:792
-lp:103	send_count:9770	send_bytes:39813200	send_time:7583466.666666	recv_count:9770	recv_bytes:39813200	recv_time:372973290.936664	max_event_size:792
-lp:107	send_count:9820	send_bytes:39813600	send_time:7583542.857142	recv_count:9820	recv_bytes:39813600	recv_time:437304384.944908	max_event_size:792
-lp:108	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:377859452.518662	max_event_size:792
-lp:112	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:471587827.861975	max_event_size:792
-lp:113	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:283376963.287888	max_event_size:792
-lp:117	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:347661284.230018	max_event_size:792
-lp:118	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:318512073.696441	max_event_size:792
-lp:122	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:256795353.934300	max_event_size:792
-lp:123	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:237701963.756680	max_event_size:792
-lp:127	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:353573738.286471	max_event_size:792
-lp:128	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:465258641.273453	max_event_size:792
-lp:132	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:374138273.240924	max_event_size:792
-lp:133	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:428587201.804590	max_event_size:792
-lp:137	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:404905020.644931	max_event_size:792
-lp:138	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:350589038.917737	max_event_size:792
-lp:142	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:297762961.526255	max_event_size:792
-lp:143	send_count:9770	send_bytes:39813200	send_time:7583466.666666	recv_count:9770	recv_bytes:39813200	recv_time:410133909.716090	max_event_size:792
-lp:147	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:319808170.413551	max_event_size:792
-lp:148	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:341829101.902517	max_event_size:792
-lp:152	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:380910163.193983	max_event_size:792
-lp:153	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:283849636.026727	max_event_size:792
-lp:157	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:274524505.335948	max_event_size:792
-lp:158	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:315657639.722941	max_event_size:792
-lp:162	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:356174453.289787	max_event_size:792
-lp:163	send_count:7367	send_bytes:30000040	send_time:5714293.333333	recv_count:7367	recv_bytes:30000040	recv_time:289274523.506280	max_event_size:792
-lp:167	send_count:9810	send_bytes:39813520	send_time:7583527.619046	recv_count:9810	recv_bytes:39813520	recv_time:351044401.571382	max_event_size:792
-lp:168	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:259385936.957835	max_event_size:792
-lp:172	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:341719498.237854	max_event_size:792
-lp:173	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:307179521.253780	max_event_size:792
-lp:177	send_count:7392	send_bytes:30000240	send_time:5714331.428571	recv_count:7392	recv_bytes:30000240	recv_time:228528983.462069	max_event_size:792
-lp:178	send_count:7387	send_bytes:30000200	send_time:5714323.809523	recv_count:7387	recv_bytes:30000200	recv_time:267558998.807704	max_event_size:792
diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/mpi-replay-stats b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/mpi-replay-stats
deleted file mode 100644
index fed80ff6..00000000
--- a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/mpi-replay-stats
+++ /dev/null
@@ -1,73 +0,0 @@
-# Format <LP ID> <Terminal ID> <Job ID> <Local Rank> <Total sends> <Total Recvs> <Bytes sent> <Bytes recvd> <Send time> <Comm. time> <Compute time> <Avg msg time> <Max Msg Time>
- 0 0 1 0 90 90 39813200 39813200 89237007.320510 21119347.570794 250.000000 0.000000 0.000000
- 1 1 0 19 67 67 30000200 30000200 56475874.301444 17852333.411932 10000000.000000 0.000000 0.000000
- 5 2 0 3 72 72 30000240 30000240 62621556.489293 17852174.837306 10000000.000000 0.000000 0.000000
- 6 3 0 6 47 47 30000040 30000040 53717217.531453 17853913.164114 10000000.000000 0.000000 0.000000
- 10 4 1 10 130 130 39813520 39813520 82212308.371531 21068601.547571 250.000000 0.000000 0.000000
- 11 5 0 23 67 67 30000200 30000200 56605447.229800 17856451.510284 10000000.000000 0.000000 0.000000
- 15 6 0 18 67 67 30000200 30000200 59377020.615150 17849212.584262 10000000.000000 0.000000 0.000000
- 16 7 0 28 67 67 30000200 30000200 56611204.332201 17853245.667187 10000000.000000 0.000000 0.000000
- 20 8 0 14 67 67 30000200 30000200 58046006.712762 17853330.657890 10000000.000000 0.000000 0.000000
- 21 9 0 27 67 67 30000200 30000200 54666576.648049 17857362.848268 10000000.000000 0.000000 0.000000
- 25 10 1 3 140 140 39813600 39813600 79806532.629068 21115366.245572 250.000000 0.000000 0.000000
- 26 11 0 9 67 67 30000200 30000200 55190353.587108 17851269.424881 10000000.000000 0.000000 0.000000
- 30 12 0 0 47 47 30000040 30000040 56355045.144427 17849847.891871 10000000.000000 0.000000 0.000000
- 31 13 0 34 67 67 30000200 30000200 57319838.732274 17853331.023495 10000000.000000 0.000000 0.000000
- 35 14 1 20 130 130 39813520 39813520 79825582.529869 21113723.869383 250.000000 0.000000 0.000000
- 36 15 0 30 67 67 30000200 30000200 50539798.306285 17855710.850404 10000000.000000 0.000000 0.000000
- 40 16 1 23 130 130 39813520 39813520 96570614.385612 21173404.388469 250.000000 0.000000 0.000000
- 41 17 1 32 130 130 39813520 39813520 104174353.511400 21127185.068113 250.000000 0.000000 0.000000
- 45 18 0 5 72 72 30000240 30000240 60746833.137984 17855387.498509 10000000.000000 0.000000 0.000000
- 46 19 1 28 130 130 39813520 39813520 90532100.834860 21113902.678420 250.000000 0.000000 0.000000
- 50 20 1 5 140 140 39813600 39813600 93215021.859260 21107999.546402 250.000000 0.000000 0.000000
- 51 21 0 13 67 67 30000200 30000200 62524142.281874 17854071.877755 10000000.000000 0.000000 0.000000
- 55 22 1 11 130 130 39813520 39813520 87108820.845512 21253403.349942 250.000000 0.000000 0.000000
- 56 23 0 26 67 67 30000200 30000200 58124303.599644 17850680.445220 10000000.000000 0.000000 0.000000
- 60 24 0 1 72 72 30000240 30000240 73825732.655678 17853237.674558 10000000.000000 0.000000 0.000000
- 61 25 0 35 67 67 30000200 30000200 74632319.393684 17856451.858593 10000000.000000 0.000000 0.000000
- 65 26 0 29 67 67 30000200 30000200 60973047.224376 17856451.550016 10000000.000000 0.000000 0.000000
- 66 27 1 30 130 130 39813520 39813520 97183327.200930 21102174.565586 250.000000 0.000000 0.000000
- 70 28 0 33 67 67 30000200 30000200 69666737.099805 17855617.501467 10000000.000000 0.000000 0.000000
- 71 29 1 33 130 130 39813520 39813520 100178218.078153 21238162.751729 250.000000 0.000000 0.000000
- 75 30 1 27 130 130 39813520 39813520 98877667.184731 21256700.657271 250.000000 0.000000 0.000000
- 76 31 1 2 90 90 39813200 39813200 100942318.305743 21155764.515093 250.000000 0.000000 0.000000
- 80 32 0 22 67 67 30000200 30000200 65591630.997276 17854157.642763 10000000.000000 0.000000 0.000000
- 81 33 1 12 130 130 39813520 39813520 78974801.715336 21109777.342929 250.000000 0.000000 0.000000
- 85 34 1 31 130 130 39813520 39813520 100689518.650071 21192061.784327 250.000000 0.000000 0.000000
- 86 35 1 21 130 130 39813520 39813520 101556407.296841 21115137.760280 250.000000 0.000000 0.000000
- 90 36 1 26 130 130 39813520 39813520 87181078.575814 21143229.925367 250.000000 0.000000 0.000000
- 91 37 1 14 130 130 39813520 39813520 91281082.849771 21097362.783538 250.000000 0.000000 0.000000
- 95 38 0 2 47 47 30000040 30000040 63940848.873793 17847374.542274 10000000.000000 0.000000 0.000000
- 96 39 0 8 67 67 30000200 30000200 71690442.429131 17849121.063364 10000000.000000 0.000000 0.000000
- 100 40 1 7 140 140 39813600 39813600 88967018.559046 21161719.568495 250.000000 0.000000 0.000000
- 101 41 1 6 90 90 39813200 39813200 85527779.462703 21163275.624817 250.000000 0.000000 0.000000
- 105 42 1 1 140 140 39813600 39813600 100457403.678861 21110006.073760 250.000000 0.000000 0.000000
- 106 43 1 17 130 130 39813520 39813520 99253629.669866 21189028.967585 250.000000 0.000000 0.000000
- 110 44 1 22 130 130 39813520 39813520 88807135.248057 21070787.824921 250.000000 0.000000 0.000000
- 111 45 0 15 67 67 30000200 30000200 55657802.114682 17856451.485560 10000000.000000 0.000000 0.000000
- 115 46 0 25 67 67 30000200 30000200 57569424.228786 17853238.309627 10000000.000000 0.000000 0.000000
- 116 47 1 16 130 130 39813520 39813520 94714665.003806 21118593.796807 250.000000 0.000000 0.000000
- 120 48 0 12 67 67 30000200 30000200 49329790.727020 17848300.919883 10000000.000000 0.000000 0.000000
- 121 49 0 20 67 67 30000200 30000200 49076027.353583 17848300.513379 10000000.000000 0.000000 0.000000
- 125 50 1 19 130 130 39813520 39813520 92110607.580828 21257528.308085 250.000000 0.000000 0.000000
- 126 51 1 29 130 130 39813520 39813520 94697768.477917 21128810.332614 250.000000 0.000000 0.000000
- 130 52 0 32 67 67 30000200 30000200 52119612.770477 17853139.356496 10000000.000000 0.000000 0.000000
- 131 53 1 34 130 130 39813520 39813520 87422795.515014 21154612.434599 250.000000 0.000000 0.000000
- 135 54 1 18 130 130 39813520 39813520 78422965.566418 21073020.434406 250.000000 0.000000 0.000000
- 136 55 0 17 67 67 30000200 30000200 53390682.328147 17853238.062964 10000000.000000 0.000000 0.000000
- 140 56 1 25 130 130 39813520 39813520 89493603.447990 21189561.925589 250.000000 0.000000 0.000000
- 141 57 1 4 90 90 39813200 39813200 85436564.237106 21111296.853831 250.000000 0.000000 0.000000
- 145 58 1 13 130 130 39813520 39813520 96802622.211033 21115137.315974 250.000000 0.000000 0.000000
- 146 59 1 15 130 130 39813520 39813520 96583487.482583 21161490.594629 250.000000 0.000000 0.000000
- 150 60 1 35 130 130 39813520 39813520 90266817.624509 21356588.612738 250.000000 0.000000 0.000000
- 151 61 0 24 67 67 30000200 30000200 58285049.554882 17852412.897185 10000000.000000 0.000000 0.000000
- 155 62 0 31 67 67 30000200 30000200 66320862.530225 17860569.932221 10000000.000000 0.000000 0.000000
- 156 63 1 24 130 130 39813520 39813520 98077004.277963 21119191.871021 250.000000 0.000000 0.000000
- 160 64 1 9 130 130 39813520 39813520 71635687.154524 21185283.014578 250.000000 0.000000 0.000000
- 161 65 0 4 47 47 30000040 30000040 55102180.614745 17851269.485238 10000000.000000 0.000000 0.000000
- 165 66 1 8 130 130 39813520 39813520 68646777.969893 21113251.882987 250.000000 0.000000 0.000000
- 166 67 0 11 67 67 30000200 30000200 51878884.318374 17855809.464314 10000000.000000 0.000000 0.000000
- 170 68 0 21 67 67 30000200 30000200 59776460.886694 17852418.990265 10000000.000000 0.000000 0.000000
- 171 69 0 10 67 67 30000200 30000200 57589326.703996 17849795.090486 10000000.000000 0.000000 0.000000
- 175 70 0 7 72 72 30000240 30000240 54267360.186473 17850595.235628 10000000.000000 0.000000 0.000000
- 176 71 0 16 67 67 30000200 30000200 56663635.212701 17849021.295227 10000000.000000 0.000000 0.000000
\ No newline at end of file
diff --git a/tmptest/expected/tmptest-jacobiS_MILC.output b/tmptest/expected/tmptest-jacobiS_MILC.output
deleted file mode 100644
index b8fb808a..00000000
--- a/tmptest/expected/tmptest-jacobiS_MILC.output
+++ /dev/null
@@ -1,240 +0,0 @@
-/home/ac.xwang/install/codes-new/bin/model-net-mpi-replay --sync=1 --workload_type=conc-online --lp-io-use-suffix=1 --workload_conf_file=/home/ac.xwang/tools/codes-new/tmptest/conf/jacobi_MILC.conf --alloc_file=/home/ac.xwang/tools/codes-new/tmptest/conf/rand_node0-1d-72-jacobi_MILC.conf --lp-io-dir=tmptest-jacobiS_MILC -- /home/ac.xwang/tools/codes-new/tmptest/conf/dfdally-72-par.conf 
-
-Thu Aug 31 11:16:21 2023
-
-ROSS Version: v8.0.0-dirty
-
-tw_net_start: Found world size to be 1 
-
- num_net_traces 72; num_dumpi_traces 72NIC num injection port not specified, setting to 1
-NIC seq delay not specified, setting to 10.000000
-NIC num copy queues not specified, setting to 1
-Dragonfly rail selection is 3
-within node transfer per byte delay is 0.190476
-
-ROSS Core Configuration: 
-	Total PEs                                                    1
-	Total KPs                                          [Nodes (1) x KPs (16)] 16
-	Total LPs                                                  180
-	Simulation End Time                                3600000000000.00
-	LP-to-PE Mapping                                   model defined
-
-
-ROSS Event Memory Allocation:
-	Model events                                             46081
-	Network events                                              16
-	Total events                                             46096
-
-*** START SEQUENTIAL SIMULATION ***
-
-Jacobi3D: Running Jacobi on 36 processors with (4, 3, 3) elements
-Jacobi3D: Array Dimensions: 400 300 300
-Jacobi3D: Block Dimensions: 100 100 100
-Set num_servers per router 2, servers per injection queue per router 2, servers per node copy queue per node 1, num nics 2
-
- Network node 10 Rank 4 App 1 finished at 21068851.547571 
- Network node 22 Rank 44 App 1 finished at 21071037.824921 
- Network node 18 Rank 54 App 1 finished at 21073270.434406 
- Network node 14 Rank 37 App 1 finished at 21097612.783538 
- Network node 30 Rank 27 App 1 finished at 21102424.565586 
- Network node 5 Rank 20 App 1 finished at 21108249.546402 
- Network node 12 Rank 33 App 1 finished at 21110027.342929 
- Network node 1 Rank 42 App 1 finished at 21110256.073760 
- Network node 4 Rank 57 App 1 finished at 21111546.853831 
- Network node 8 Rank 66 App 1 finished at 21113501.882987 
- Network node 20 Rank 14 App 1 finished at 21113973.869383 
- Network node 28 Rank 19 App 1 finished at 21114152.678420 
- Network node 13 Rank 58 App 1 finished at 21115387.315974 
- Network node 21 Rank 35 App 1 finished at 21115387.760280 
- Network node 3 Rank 10 App 1 finished at 21115616.245572 
- Network node 16 Rank 47 App 1 finished at 21118843.796807 
- Network node 24 Rank 63 App 1 finished at 21119441.871021 
- Network node 0 Rank 0 App 1 finished at 21119597.570794 
- Network node 32 Rank 17 App 1 finished at 21127435.068113 
- Network node 29 Rank 51 App 1 finished at 21129060.332614 
- Network node 26 Rank 36 App 1 finished at 21143479.925367 
- Network node 34 Rank 53 App 1 finished at 21154862.434599 
- Network node 2 Rank 31 App 1 finished at 21156014.515093 
- Network node 15 Rank 59 App 1 finished at 21161740.594629 
- Network node 7 Rank 40 App 1 finished at 21161969.568495 
- Network node 6 Rank 41 App 1 finished at 21163525.624817 
- Network node 23 Rank 16 App 1 finished at 21173654.388469 
- Network node 9 Rank 64 App 1 finished at 21185533.014578 
- Network node 17 Rank 43 App 1 finished at 21189278.967585 
- Network node 25 Rank 56 App 1 finished at 21189811.925589 
- Network node 31 Rank 34 App 1 finished at 21192311.784327 
- Network node 33 Rank 29 App 1 finished at 21238412.751729 
- Network node 11 Rank 22 App 1 finished at 21253653.349942 
- Network node 27 Rank 30 App 1 finished at 21256950.657271 
- Network node 19 Rank 50 App 1 finished at 21257778.308085 
- Network node 35 Rank 60 App 1 finished at 21356838.612738 App 0: Received finished workload notificationThere is still a nonsynethic workload left. 1 != 2
-
- Network node 2 Rank 38 App 0 finished at 27847374.542274 
- Network node 20 Rank 49 App 0 finished at 27848300.513379 
- Network node 12 Rank 48 App 0 finished at 27848300.919883 
- Network node 16 Rank 71 App 0 finished at 27849021.295227 
- Network node 8 Rank 39 App 0 finished at 27849121.063364 
- Network node 18 Rank 6 App 0 finished at 27849212.584262 
- Network node 10 Rank 69 App 0 finished at 27849795.090486 
- Network node 0 Rank 12 App 0 finished at 27849847.891871 
- Network node 7 Rank 70 App 0 finished at 27850595.235628 
- Network node 26 Rank 23 App 0 finished at 27850680.445220 
- Network node 9 Rank 11 App 0 finished at 27851269.424881 
- Network node 4 Rank 65 App 0 finished at 27851269.485238 
- Network node 3 Rank 2 App 0 finished at 27852174.837306 
- Network node 19 Rank 1 App 0 finished at 27852333.411932 
- Network node 24 Rank 61 App 0 finished at 27852412.897185 
- Network node 21 Rank 68 App 0 finished at 27852418.990265 
- Network node 32 Rank 52 App 0 finished at 27853139.356496 
- Network node 1 Rank 24 App 0 finished at 27853237.674558 
- Network node 17 Rank 55 App 0 finished at 27853238.062964 
- Network node 25 Rank 46 App 0 finished at 27853238.309627 
- Network node 28 Rank 7 App 0 finished at 27853245.667187 
- Network node 14 Rank 8 App 0 finished at 27853330.657890 
- Network node 34 Rank 13 App 0 finished at 27853331.023495 
- Network node 6 Rank 3 App 0 finished at 27853913.164114 
- Network node 13 Rank 21 App 0 finished at 27854071.877755 
- Network node 22 Rank 32 App 0 finished at 27854157.642763 
- Network node 5 Rank 18 App 0 finished at 27855387.498509 
- Network node 33 Rank 28 App 0 finished at 27855617.501467 
- Network node 30 Rank 15 App 0 finished at 27855710.850404 
- Network node 11 Rank 67 App 0 finished at 27855809.464314 
- Network node 15 Rank 45 App 0 finished at 27856451.485560 
- Network node 23 Rank 5 App 0 finished at 27856451.510284 
- Network node 29 Rank 26 App 0 finished at 27856451.550016 
- Network node 35 Rank 25 App 0 finished at 27856451.858593 
- Network node 27 Rank 9 App 0 finished at 27857362.848268 
- Network node 31 Rank 62 App 0 finished at 27860569.932221 App 0: Received finished workload notificationApp 0: All non-synthetic workloads have completed
-*** END SIMULATION ***
-
-
-	: Running Time = 35.7901 seconds
-
-TW Library Statistics:
-	Total Events Processed                                11315021
-	Events Aborted (part of RBs)                                 0
-	Events Rolled Back                                           0
-	Event Ties Detected in PE Queues                             0
-	Efficiency                                              100.00 %
-	Total Remote (shared mem) Events Processed                   0
-	Percent Remote Events                                     0.00 %
-	Total Remote (network) Events Processed                      0
-	Percent Remote Events                                     0.00 %
-
-	Total Roll Backs                                             0
-	Primary Roll Backs                                           0
-	Secondary Roll Backs                                         0
-	Fossil Collect Attempts                                      0
-	Total GVT Computations                                       0
-
-	Net Events Processed                                  11315021
-	Event Rate (events/sec)                               316149.8
-	Total Events Scheduled Past End Time                         0
-
-TW Memory Statistics:
-	Events Allocated                                         46097
-	Memory Allocated                                         80000
-	Memory Wasted                                              101
-
-TW Data Structure sizes in bytes (sizeof):
-	PE struct                                                 3888
-	KP struct                                                  960
-	LP struct                                                  960
-	LP Model struct                                             96
-	LP RNGs                                                     80
-	Total LP                                                  1136
-	Event struct                                               976
-	Event struct with Model                                   1768
-
-TW Clock Cycle Statistics (MAX values in secs at 1.0000 GHz):
-	Initialization                                          0.3151
-	Priority Queue (enq/deq)                                5.0614
-	AVL Tree (insert/delete)                                0.0000
-	LZ4 (de)compression                                     0.0000
-	Buddy system                                            0.0000
-	Event Processing                                        0.0000
-	Event Cancel                                            0.0000
-	Event Abort                                             0.0000
-
-	GVT                                                     0.0000
-	Fossil Collect                                          0.0000
-	Primary Rollbacks                                       0.0000
-	Network Read                                            0.0000
-	Other Network                                           0.0000
-	Instrumentation (computation)                           0.0000
-	Instrumentation (write)                                 0.0000
-	Total Time (Note: Using Running Time above for Speedup)     74.9855
-
-TW GVT Statistics: MPI AllReduce
-	GVT Interval                                                16
-	GVT Real Time Interval (cycles)                    0
-	GVT Real Time Interval (sec)                        0.00000000
-	Batch Size                                                  16
-
-	Forced GVT                                                   0
-	Total GVT Computations                                       0
-	Total All Reduce Calls                                       0
-	Average Reduction / GVT                                   -nan
-
- Total bytes sent 2513292480 recvd 2513292480 
- max runtime 27860569.932221 ns avg runtime 24501344.282682 
- max comm time 21356588.612738 avg comm time 19501219.282682 
- max send time 104174353.511400 avg send time 74765835.724547 
- max recv time 100173347.933146 avg recv time 74419141.320531 
- max wait time 19215338.524462 avg wait time 16456309.234173 
-
-----------
-Per App Max Elapsed Times:
-	App 0: 27860569.9322
-	App 1: 21356838.6127
-----------
-LP-IO: writing output to tmptest-jacobiS_MILC-25331-1693498581/
-LP-IO: data files:
-   tmptest-jacobiS_MILC-25331-1693498581/dragonfly-cn-stats
-   tmptest-jacobiS_MILC-25331-1693498581/dragonfly-link-stats
-   tmptest-jacobiS_MILC-25331-1693498581/model-net-category-all
-   tmptest-jacobiS_MILC-25331-1693498581/model-net-category-high
-   tmptest-jacobiS_MILC-25331-1693498581/avg-all-reduce-time
-   tmptest-jacobiS_MILC-25331-1693498581/mpi-replay-stats
-
------------------- Dragonfly Dally Parameters ---------
-	num_routers =            4
-	local_bandwidth =        5.25
-	global_bandwidth =       4.70
-	cn_bandwidth =           5.25
-	num_vcs =                4
-	num_qos_levels =         1
-	local_vc_size =          16384
-	global_vc_size =         16384
-	cn_vc_size =             32768
-	chunk_size =             4096
-	num_cn =                 2
-	cn_radix =               2
-	intra_grp_radix =        3
-	num_groups =             9
-	total_groups =           9
-	virtual radix =          7
-	total_routers =          36
-	total_terminals =        72
-	num_global_channels =    2
-	num_injection_queues =   1
-	num_rails =              1
-	num_planes =             1
-	cn_delay =               726.61
-	local_delay =            726.61
-	global_delay =           811.64
-	local credit_delay =     1.42
-	global credit_delay =    1.42
-	cn credit_delay =        1.42
-	router_delay =           100.00
-	routing =                PROG_ADAPTIVE
-	adaptive_threshold =     0
-	max hops notification =  2147483647
-------------------------------------------------------
-
-
-Average number of hops traversed 4.101973 average chunk latency 40.897694 us maximum chunk latency 732.304909 us avg message size 363612.937500 bytes finished messages 6912 finished chunks 618912
-
-ADAPTIVE ROUTING STATS: 349554 chunks routed minimally 269358 chunks routed non-minimally completed packets 618912 
-
-Total packets generated 618912 finished 618912 Locally routed- same router 14696 different-router 56472 Remote (inter-group) 547744 

From 03d7da6f27dc12aa9b4889b616b845950c21aa60 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 16 Jun 2025 16:00:50 -0400
Subject: [PATCH 161/188] Fixing bug where MILC would not work with network
 surrogate when freezing

---
 src/networks/model-net/dragonfly-dally.C | 45 +++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 99b292ee..846d8734 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -495,6 +495,7 @@ struct packet_sent {
     double next_packet_delay; // When the packet is initially sent, this value is -1, when the next packet is sent this value is updated to the actual delay to process the next packet
     void * message_data;  // Yep, we have to save the entire message just because we might need to resend the message when switching to surrogate-mode. It's wasteful but there is no other way
     void * remote_event_data;  // This and the one above have to be freed. This contains the extra information that the message contains
+    void * local_data;  // This and the one above have to be freed. This contains the extra information that the message contains
 };
 
 struct packet_id {
@@ -604,6 +605,7 @@ struct terminal_state
     // Variables to recover latency of packets sent to other terminals
     // Sent packets (to be populated at by commit handler of packet sender)
     map<uint64_t, struct packet_sent> sent_packets;
+    set<uint64_t> is_pending_local_send;
     int64_t last_packet_sent_id;
     // We need the next packet to be injected in the network before feeding the packet info forward (the predictor needs starting time, delay to send next packet and latency)
     struct {
@@ -3028,6 +3030,9 @@ static void feed_packet_to_predictor(terminal_state * s, tw_lp * lp, uint64_t pa
     if (sent.remote_event_data) {
         free(sent.remote_event_data);
     }
+    if (sent.local_data) {
+        free(sent.local_data);
+    }
 }
 
 // We check an event that is in the event queue, thus we do not process it yet
@@ -3116,7 +3121,18 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(
             tw_event_send(e);
 
             //printf("NOTIFYING of zombie: packet dest id %d dest gid %d\n", sent.start.dest_terminal_lpid, sent.start.dfdally_dest_terminal_id);
-            notify_dest_lp_of(s, lp, m, NOTIFY_ZOMBIE);
+            notify_dest_lp_of(s, lp, msg_data, NOTIFY_ZOMBIE);
+
+            if (s->is_pending_local_send.count(packet_ID) == 1) {
+                assert(sent.local_data);
+                assert(msg_data->local_event_size_bytes);
+                double const local_ts = 11;
+                tw_event *e_new = tw_event_new(msg_data->sender_lp, local_ts, lp);
+                void * m_new = tw_event_data(e_new);
+                memcpy(m_new, sent.local_data, msg_data->local_event_size_bytes);
+                tw_event_send(e_new);
+                s->is_pending_local_send.erase(packet_ID);
+            }
 
             // Deallocating memory from packet_start
             if (sent.message_data) {
@@ -3125,9 +3141,13 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(
             if (sent.remote_event_data) {
                 free(sent.remote_event_data);
             }
+            if (sent.local_data) {
+                free(sent.local_data);
+            }
         }
     }
     assert(s->sent_packets.empty());
+    assert(s->is_pending_local_send.empty());
 
     // Hide current state and clean current state. Hidding the network information is in principle
     // the same as freezing the state of the network.
@@ -3160,6 +3180,7 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(
     memcpy(&s->arrival_of_last_packet, &frozen_state->arrival_of_last_packet, sizeof(s->arrival_of_last_packet));
     memcpy(&s->zombies,              &frozen_state->zombies,              sizeof(s->zombies));
     memcpy(&s->sent_packets,         &frozen_state->sent_packets,         sizeof(s->sent_packets));
+    memcpy(&s->is_pending_local_send, &frozen_state->is_pending_local_send, sizeof(s->is_pending_local_send));
     memcpy(&s->remaining_sz_packets, &frozen_state->remaining_sz_packets, sizeof(s->remaining_sz_packets));
 
     s->frozen_state = frozen_state;
@@ -3201,6 +3222,7 @@ static void dragonfly_dally_terminal_surrogate_to_highdef(
     memcpy(&frozen_state->arrival_of_last_packet, &s->arrival_of_last_packet, sizeof(s->arrival_of_last_packet));
     memcpy(&frozen_state->zombies,              &s->zombies,              sizeof(s->zombies));
     memcpy(&frozen_state->sent_packets,         &s->sent_packets,         sizeof(s->sent_packets));
+    memcpy(&frozen_state->is_pending_local_send, &s->is_pending_local_send, sizeof(s->is_pending_local_send));
     memcpy(&frozen_state->remaining_sz_packets, &s->remaining_sz_packets, sizeof(s->remaining_sz_packets));
     memcpy(s, frozen_state, sizeof(terminal_state));
     memset(frozen_state, 0, sizeof(terminal_state));
@@ -3332,6 +3354,11 @@ static void terminal_commit_packet_generate(terminal_state * s, tw_bf * bf, term
         remote_data = malloc(msg->remote_event_size_bytes);
         memcpy(remote_data, model_net_method_get_edata(DRAGONFLY_DALLY, msg), msg->remote_event_size_bytes);
     }
+    void * local_data = NULL;
+    if (msg->local_event_size_bytes) {
+        local_data = malloc(msg->local_event_size_bytes);
+        memcpy(local_data, (char *) model_net_method_get_edata(DRAGONFLY_DALLY, msg) + msg->remote_event_size_bytes, msg->local_event_size_bytes);
+    }
     double const processing_packet_delay = msg->saved_next_packet_delay;
 
     // TODO (elkin): In the future, this ugly initialization could be done all in a single "line" instead of setting all values one by one. The reason to do it this way is because some old compilers do not understand other ways of initializing
@@ -3347,8 +3374,12 @@ static void terminal_commit_packet_generate(terminal_state * s, tw_bf * bf, term
     sent.next_packet_delay = -1;
     sent.message_data = msg_data;
     sent.remote_event_data = remote_data;
+    sent.local_data = local_data;
 
     s->sent_packets[msg->packet_ID] = sent;
+    if (freeze_network_on_switch && msg->local_event_size_bytes > 0) {
+        s->is_pending_local_send.insert(msg->packet_ID);
+    }
 
     // Set next_packet_delay for the last past sent packet
     if (s->sent_packets.count(s->last_packet_sent_id) == 1) {
@@ -3444,6 +3475,11 @@ static void terminal_dally_commit(terminal_state * s,
         break;
         
         case T_SEND:
+            if (freeze_network_on_switch) {
+                if (bf->c16 && s->is_pending_local_send.count(msg->packet_ID) == 1) {
+                    s->is_pending_local_send.erase(msg->packet_ID);
+                }
+            }
         break;
         
         case T_BUFFER:
@@ -3701,6 +3737,7 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp )
     s->arrival_of_last_packet.packet_ID = -1;
     s->arrival_of_last_packet.travel_end_time = -1;
     new (&s->sent_packets) map<uint64_t, struct packet_sent>();
+    new (&s->is_pending_local_send) set<uint64_t>();
     new (&s->remaining_sz_packets) map<struct packet_id, uint32_t>();
     new (&s->zombies) set<struct packet_id>();
     s->frozen_state = NULL;
@@ -4763,6 +4800,8 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message *
 
     if(cur_entry->msg.chunk_id == num_chunks - 1 && (cur_entry->msg.local_event_size_bytes > 0)) 
     {
+        bf->c16 = 1;
+        msg->packet_ID = cur_entry->msg.packet_ID;
         tw_stime local_ts = 0;
         tw_event *e_new = tw_event_new(cur_entry->msg.sender_lp, local_ts, lp);
         void * m_new = tw_event_data(e_new);
@@ -5681,6 +5720,7 @@ static void dragonfly_dally_terminal_final( terminal_state * s,
             // Deallocating memory from packet_start
             if (sent.message_data) { free(sent.message_data); }
             if (sent.remote_event_data) { free(sent.remote_event_data); }
+            if (sent.local_data) { free(sent.local_data); }
 
             s->sent_packets.erase(s->arrival_of_last_packet.packet_ID);
             s->arrival_of_last_packet.packet_ID = -1;
@@ -5702,6 +5742,7 @@ static void dragonfly_dally_terminal_final( terminal_state * s,
             // Deallocating memory from packet_start
             if (sent.message_data) { free(sent.message_data); }
             if (sent.remote_event_data) { free(sent.remote_event_data); }
+            if (sent.local_data) { free(sent.local_data); }
         }
     }
 
@@ -5739,8 +5780,10 @@ static void dragonfly_dally_terminal_final( terminal_state * s,
     for (auto&& kv: s->sent_packets) {
         if (kv.second.message_data) { free(kv.second.message_data); }
         if (kv.second.remote_event_data) { free(kv.second.remote_event_data); }
+        if (kv.second.local_data) { free(kv.second.local_data); }
     }
     s->sent_packets.~map();
+    s->is_pending_local_send.~set();
     s->remaining_sz_packets.~map();
 
     if (s->predictor_data) {

From fcdf824d8f144da3a5164f111c308325be45c05d Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 16 Jun 2025 16:37:19 -0400
Subject: [PATCH 162/188] Fixed a bug on reading setting from file

---
 src/surrogate/init.c              | 18 ++++++++++++++++--
 src/surrogate/network-surrogate.c |  2 +-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/surrogate/init.c b/src/surrogate/init.c
index 2e93ed75..dc165eae 100644
--- a/src/surrogate/init.c
+++ b/src/surrogate/init.c
@@ -177,9 +177,23 @@ static int load_and_validate_int_param(const char* param_name, int default_value
     return value;
 }
 
+static double load_and_validate_double_param(const char* param_name, double default_value) {
+    char param_str[MAX_NAME_LENGTH];
+    param_str[0] = '\0';
+    int const rc = configuration_get_value(&config, "APPLICATION_SURROGATE", param_name, NULL, param_str, MAX_NAME_LENGTH);
+    double value = (rc > 0) ? strtod(param_str, NULL) : default_value;
+
+    if (value <= 0) {
+        tw_warning(TW_LOC, "%s must be a positive integer, got %d. Using default value %d.", param_name, value, default_value);
+        value = default_value;
+    }
+
+    return value;
+}
+
 static struct application_director_config load_director_config(void) {
     int const default_gvt = 100;
-    int const default_ns = 1000000; // 1ms
+    double const default_ns = 1.0e6; // 1ms
 
     enum {
         MODE_NOT_SET,
@@ -203,7 +217,7 @@ static struct application_director_config load_director_config(void) {
     }
 
     int every_n_gvt = load_and_validate_int_param("director_num_gvt", default_gvt);
-    int every_n_ns = load_and_validate_int_param("director_num_ns", default_ns);
+    double every_n_ns = load_and_validate_double_param("director_num_ns", default_ns);
 
     bool const is_sequential = (g_tw_synchronization_protocol == SEQUENTIAL ||
                                 g_tw_synchronization_protocol == SEQUENTIAL_ROLLBACK_CHECK);
diff --git a/src/surrogate/network-surrogate.c b/src/surrogate/network-surrogate.c
index 230c6ade..ed7185f1 100644
--- a/src/surrogate/network-surrogate.c
+++ b/src/surrogate/network-surrogate.c
@@ -368,7 +368,7 @@ void network_director(tw_pe * pe) {
     }
 
     // ---- Past this means that we are in fact switching ----
-    bool const pre_switch_status = net_surr_config.model.is_surrogate_on();
+    net_surr_config.model.is_surrogate_on();
 
     // Asking the director/model to switch
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {

From 1a41fda188bf000322e92e43800205bd593453af Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 16 Jun 2025 17:22:24 -0400
Subject: [PATCH 163/188] Resetting predictor when turning back into full
 fidelity

---
 codes/surrogate/network-surrogate.h              |  1 +
 .../surrogate/packet-latency-predictor/common.h  |  2 ++
 src/networks/model-net/dragonfly-dally.C         | 10 ++++++++++
 src/surrogate/network-surrogate.c                |  7 +++++++
 src/surrogate/packet-latency-predictor/average.c | 16 ++++++++++++++++
 5 files changed, 36 insertions(+)

diff --git a/codes/surrogate/network-surrogate.h b/codes/surrogate/network-surrogate.h
index a550464d..4b22e238 100644
--- a/codes/surrogate/network-surrogate.h
+++ b/codes/surrogate/network-surrogate.h
@@ -41,6 +41,7 @@ struct lp_types_switch {
     model_ask_if_freeze_f should_event_be_frozen;  // true means event from LP type shouldn't be frozen
     model_ask_if_freeze_f should_event_be_deleted;  // true means event from LP type shouldn't be deleted
     model_check_event_f   check_event_in_queue;
+    model_switch_f        reset_predictor;
 };
 
 struct switch_at_struct {
diff --git a/codes/surrogate/packet-latency-predictor/common.h b/codes/surrogate/packet-latency-predictor/common.h
index 61b0283c..3faa7bff 100644
--- a/codes/surrogate/packet-latency-predictor/common.h
+++ b/codes/surrogate/packet-latency-predictor/common.h
@@ -37,6 +37,7 @@ struct packet_end {
 
 // Definition of functions needed to define a predictor
 typedef void (*init_pred_lat_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id); // Initializes the predictor (eg, LSTM)
+typedef void (*reset_pred_lat_f) (void * predictor_data, tw_lp * lp);
 typedef void (*feed_pred_lat_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *, struct packet_end const *); // Feeds known latency for packet sent at `now`
 typedef struct packet_end (*predict_pred_lat_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *); // Get prediction for packet sent to `destination` at `now`
 typedef void (*predict_pred_lat_rc_f) (void * predictor_data, tw_lp * lp); // Reverse prediction (reverse state of predictor one prediction)
@@ -44,6 +45,7 @@ typedef void (*predict_pred_lat_rc_f) (void * predictor_data, tw_lp * lp); // Re
 // API for packet latency predictors
 struct packet_latency_predictor {
     init_pred_lat_f        init;
+    reset_pred_lat_f       reset;
     feed_pred_lat_f        feed;
     predict_pred_lat_f     predict;
     predict_pred_lat_rc_f  predict_rc;
diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 846d8734..2175515d 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -203,6 +203,7 @@ static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw
 static bool dragonfly_dally_terminal_should_event_be_frozen(tw_lp * lp, tw_event * event);
 static bool dragonfly_dally_router_should_event_be_frozen(tw_lp * lp, tw_event * event);
 static void dragonfly_dally_terminal_pre_surrogate_switch_event_queue( terminal_state * s, tw_lp * lp, tw_event * event);
+static void dragonfly_dally_terminal_reset_predictor(terminal_state * s, tw_lp * lp, void *);
 //
 // ==== END OF Parameters to tune surrogate mode ====
 
@@ -2463,6 +2464,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
                  .should_event_be_frozen = dragonfly_dally_terminal_should_event_be_frozen,
                  .should_event_be_deleted = NULL,
                  .check_event_in_queue = (model_check_event_f) dragonfly_dally_terminal_pre_surrogate_switch_event_queue,
+                 .reset_predictor = (model_switch_f) dragonfly_dally_terminal_reset_predictor,
                 },
                 {.lpname = "modelnet_dragonfly_dally_router",
                  .trigger_idle_modelnet = false,
@@ -2471,6 +2473,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params)
                  .should_event_be_frozen = dragonfly_dally_router_should_event_be_frozen,
                  .should_event_be_deleted = NULL,
                  .check_event_in_queue = NULL,
+                 .reset_predictor = NULL,
                 },
                 0
             }
@@ -3052,6 +3055,13 @@ static void dragonfly_dally_terminal_pre_surrogate_switch_event_queue(
     }
 }
 
+static void dragonfly_dally_terminal_reset_predictor(terminal_state * s, tw_lp * lp, void * vacuous) {
+    (void) vacuous;
+    if (terminal_predictor != NULL && s->predictor_data != NULL) {
+        terminal_predictor->reset(s->predictor_data, lp);
+    }
+}
+
 // This function never rollsback because it's called at GVT
 static void dragonfly_dally_terminal_highdef_to_surrogate(
         terminal_state * s, tw_lp * lp, tw_event ** terminal_events) {
diff --git a/src/surrogate/network-surrogate.c b/src/surrogate/network-surrogate.c
index ed7185f1..b7108cc8 100644
--- a/src/surrogate/network-surrogate.c
+++ b/src/surrogate/network-surrogate.c
@@ -299,6 +299,13 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) {
                     lp_type_switch->surrogate_to_highdef(lp->cur_state, lp, NULL);
                 }
             }
+            if (lp_type_switch->reset_predictor) {
+                if (is_lp_modelnet) {
+                    model_net_method_call_inner(lp, (void (*) (void *, tw_lp *, void *))lp_type_switch->reset_predictor, NULL);
+                } else {
+                    lp_type_switch->reset_predictor(lp->cur_state, lp, NULL);
+                }
+            }
         }
 
 #ifdef USE_RAND_TIEBREAKER
diff --git a/src/surrogate/packet-latency-predictor/average.c b/src/surrogate/packet-latency-predictor/average.c
index c6553563..4b14aedb 100644
--- a/src/surrogate/packet-latency-predictor/average.c
+++ b/src/surrogate/packet-latency-predictor/average.c
@@ -98,12 +98,28 @@ static void predict_latency_rc(struct latency_surrogate * data, tw_lp * lp) {
     (void) lp;
 }
 
+static void reset_pred(struct latency_surrogate * data, tw_lp * lp) {
+    (void) lp;
+
+    data->aggregated_next_packet_delay.sum_latency = 0;
+    data->aggregated_next_packet_delay.total_msgs = 0;
+
+    data->aggregated_latency_for_all.sum_latency = 0;
+    data->aggregated_latency_for_all.total_msgs = 0;
+
+    for (int i = 0; i < num_terminals; i++) {
+        data->aggregated_latency[i].sum_latency = 0;
+        data->aggregated_latency[i].total_msgs = 0;
+    }
+}
+
 
 struct packet_latency_predictor average_latency_predictor(int num_terminals_) {
     assert(num_terminals_ >= 0);
     num_terminals = num_terminals_;
     return (struct packet_latency_predictor) {
     .init              = (init_pred_lat_f) init_pred,
+    .reset             = (reset_pred_lat_f) reset_pred,
     .feed              = (feed_pred_lat_f) feed_pred,
     .predict           = (predict_pred_lat_f) predict_latency,
     .predict_rc        = (predict_pred_lat_rc_f) predict_latency_rc,

From e70f540ec94686ecf2cf6d4aba7e10f2fbf831ad Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 16 Jun 2025 17:55:07 -0400
Subject: [PATCH 164/188] Adding more tests for Union and the application
 surrogacy

---
 tests/CMakeLists.txt                          |   2 +
 ...test-surrogate-parallel-deterministic-1.sh | 112 +++++++++++++++++
 ...test-surrogate-parallel-deterministic-2.sh | 114 ++++++++++++++++++
 .../union-workload-test-surrogate-parallel.sh |   2 +-
 tests/union-workload-test-surrogate.sh        |   2 +-
 5 files changed, 230 insertions(+), 2 deletions(-)
 create mode 100644 tests/union-workload-test-surrogate-parallel-deterministic-1.sh
 create mode 100644 tests/union-workload-test-surrogate-parallel-deterministic-2.sh

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index a78e7210..e5e11309 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -101,6 +101,8 @@ if(USE_UNION)
     list(APPEND test-shell-files
         union-workload-test-surrogate.sh
         union-workload-test-surrogate-parallel.sh
+        union-workload-test-surrogate-parallel-deterministic-1.sh
+        union-workload-test-surrogate-parallel-deterministic-2.sh
     )
 endif()
 
diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-1.sh b/tests/union-workload-test-surrogate-parallel-deterministic-1.sh
new file mode 100644
index 00000000..c5d145e3
--- /dev/null
+++ b/tests/union-workload-test-surrogate-parallel-deterministic-1.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+if [[ -z $bindir ]] ; then
+    echo bindir variable not set
+    exit 1
+fi
+
+if [[ -z $UNION_DATAROOTDIR ]] ; then
+    echo UNION_DATAROOTDIR variable not set
+    exit 1
+fi
+
+if [[ -z $SWM_DATAROOTDIR ]] ; then
+    echo SWM_DATAROOTDIR variable not set
+    exit 1
+fi
+
+np=3
+
+expfolder="$PWD"
+export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload"
+
+# Backing up and copying milc json!
+tmpdir="$(TMPDIR="$PWD" mktemp -d)"
+mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json"
+cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json"
+mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json"
+cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json"
+
+# Copying configuration files to keep as documentation
+cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder"
+cp "$CONFIGS_PATH/conceptual.json" "$expfolder"
+cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder"
+cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
+
+# CODES config file
+export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
+export NETWORK_SURR_ON=0
+export APP_SURR_ON=1
+export APP_DIRECTOR_MODE=every-n-nanoseconds
+#export APP_DIRECTOR_MODE=every-n-gvt
+export EVERY_N_GVT=500
+export EVERY_NSECS=1e6
+envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf"
+
+# running simulation
+cons_lookahead=200
+opt_lookahead=600
+
+export PATH_TO_CODES_BUILD="$bindir"
+
+mkdir run-1
+pushd run-1
+
+mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \
+  --synch=3 \
+  --batch=4 --gvt-interval=256 \
+  --cons-lookahead=$cons_lookahead \
+  --max-opt-lookahead=$opt_lookahead \
+  --workload_type=conc-online \
+  --lp-io-dir=lp-io-dir \
+  --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \
+  --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \
+  -- "$expfolder/dfdally-72-par.conf" \
+  > model-output-1.txt 2> model-output-1-error.txt
+
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+popd
+
+mkdir run-2
+pushd run-2
+
+mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \
+  --synch=3 \
+  --batch=4 --gvt-interval=256 \
+  --cons-lookahead=$cons_lookahead \
+  --max-opt-lookahead=$opt_lookahead \
+  --workload_type=conc-online \
+  --lp-io-dir=lp-io-dir \
+  --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \
+  --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \
+  -- "$expfolder/dfdally-72-par.conf" \
+  > model-output-2.txt 2> model-output-2-error.txt
+
+err=$?
+
+popd
+
+# Setting milc json back
+mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json"
+mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json"
+rmdir "$tmpdir"
+
+[[ $err -ne 0 ]] && exit $err
+
+# Checking that there is actual output
+grep 'Net Events Processed' run-1/model-output-1.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+diff <(grep 'Net Events Processed' run-1/model-output-1.txt) \
+    <(grep 'Net Events Processed' run-2/model-output-2.txt)
+err=$?
+if [[ $err -ne 0 ]]; then
+    >&2 echo "The number of net events processed does not coincide, ie," \
+        "the simulation is not deterministic"
+    exit $err
+fi
+
+exit 0
diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-2.sh b/tests/union-workload-test-surrogate-parallel-deterministic-2.sh
new file mode 100644
index 00000000..ab596dd5
--- /dev/null
+++ b/tests/union-workload-test-surrogate-parallel-deterministic-2.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+if [[ -z $bindir ]] ; then
+    echo bindir variable not set
+    exit 1
+fi
+
+if [[ -z $UNION_DATAROOTDIR ]] ; then
+    echo UNION_DATAROOTDIR variable not set
+    exit 1
+fi
+
+if [[ -z $SWM_DATAROOTDIR ]] ; then
+    echo SWM_DATAROOTDIR variable not set
+    exit 1
+fi
+
+np=3
+
+expfolder="$PWD"
+export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload"
+
+# Backing up and copying milc json!
+tmpdir="$(TMPDIR="$PWD" mktemp -d)"
+mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json"
+cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json"
+mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json"
+cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json"
+
+# Copying configuration files to keep as documentation
+cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder"
+cp "$CONFIGS_PATH/conceptual.json" "$expfolder"
+cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder"
+cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
+
+# CODES config file
+export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
+export NETWORK_SURR_ON=0
+export APP_SURR_ON=1
+export APP_DIRECTOR_MODE=every-n-nanoseconds
+export EVERY_N_GVT=500
+export EVERY_NSECS=1e6
+envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par-1.conf"
+
+export APP_DIRECTOR_MODE=every-n-gvt
+envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par-2.conf"
+
+# running simulation
+cons_lookahead=200
+opt_lookahead=600
+
+export PATH_TO_CODES_BUILD="$bindir"
+
+mkdir run-1
+pushd run-1
+
+mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \
+  --synch=3 \
+  --batch=4 --gvt-interval=256 \
+  --cons-lookahead=$cons_lookahead \
+  --max-opt-lookahead=$opt_lookahead \
+  --workload_type=conc-online \
+  --lp-io-dir=lp-io-dir \
+  --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \
+  --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \
+  -- "$expfolder/dfdally-72-par-1.conf" \
+  > model-output-1.txt 2> model-output-1-error.txt
+
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+popd
+
+mkdir run-2
+pushd run-2
+
+mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \
+  --synch=3 \
+  --batch=4 --gvt-interval=256 \
+  --cons-lookahead=$cons_lookahead \
+  --max-opt-lookahead=$opt_lookahead \
+  --workload_type=conc-online \
+  --lp-io-dir=lp-io-dir \
+  --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \
+  --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \
+  -- "$expfolder/dfdally-72-par-2.conf" \
+  > model-output-2.txt 2> model-output-2-error.txt
+
+err=$?
+
+popd
+
+# Setting milc json back
+mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json"
+mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json"
+rmdir "$tmpdir"
+
+[[ $err -ne 0 ]] && exit $err
+
+# Checking that there is actual output
+grep 'Net Events Processed' run-1/model-output-1.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+diff <(grep 'Net Events Processed' run-1/model-output-1.txt) \
+    <(grep 'Net Events Processed' run-2/model-output-2.txt)
+err=$?
+if [[ $err -ne 0 ]]; then
+    >&2 echo "The number of net events processed does not coincide, ie," \
+        "the simulation is not deterministic"
+    exit $err
+fi
+
+exit 0
diff --git a/tests/union-workload-test-surrogate-parallel.sh b/tests/union-workload-test-surrogate-parallel.sh
index c16deb8f..f2940d12 100644
--- a/tests/union-workload-test-surrogate-parallel.sh
+++ b/tests/union-workload-test-surrogate-parallel.sh
@@ -80,7 +80,7 @@ grep 'MILC: Iteration 119/120' model-output.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 
-grep 'Jacobi3D: Completed 40 iterations' model-output.txt
+grep 'Jacobi3D: Completed 39 iterations' model-output.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 
diff --git a/tests/union-workload-test-surrogate.sh b/tests/union-workload-test-surrogate.sh
index 59c0c067..032b649e 100644
--- a/tests/union-workload-test-surrogate.sh
+++ b/tests/union-workload-test-surrogate.sh
@@ -80,7 +80,7 @@ grep 'MILC: Iteration 119/120' model-output.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 
-grep 'Jacobi3D: Completed 40 iterations' model-output.txt
+grep 'Jacobi3D: Completed 39 iterations' model-output.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 

From 9b9a1eda9614abc2033124efa6e9dd886939a2d5 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 17 Jun 2025 00:12:48 -0400
Subject: [PATCH 165/188] Fixing a bug and adding a test to check for different
 sizes of chunks and packets

---
 src/networks/model-net/dragonfly-dally.C      |   3 +-
 tests/CMakeLists.txt                          |   7 +-
 .../conceptual.json                           |   2 +-
 .../dfdally-72-par.conf.in                    |   2 +-
 .../jacobi_MILC.workload.conf                 |   1 -
 .../rand_node0-1d-72-jacobi_MILC.alloc.conf   |   1 -
 tests/union-workload-test-surrogate-fails.sh  | 102 ++++++++++++++++++
 tests/union-workload-test-surrogate.sh        |  23 ++--
 8 files changed, 122 insertions(+), 19 deletions(-)
 create mode 100644 tests/union-workload-test-surrogate-fails.sh

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 2175515d..0014fe88 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -6490,7 +6490,6 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m
     if(cur_entry->msg.packet_size < s->params->chunk_size)
         msg_size = cur_entry->msg.packet_size;
 
-    s->qos_data[output_port][vcg] -= msg_size;
     s->next_output_available_time[output_port] = msg->saved_available_time;
 
     if(bf->c11)
@@ -6499,6 +6498,7 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m
         s->link_traffic_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; 
         s->ross_rsample.link_traffic_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; 
         s->link_traffic_ross_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; 
+        msg_size = cur_entry->msg.packet_size % s->params->chunk_size;
 
         //Xin: reverse link traffic
         if(rolback && current_window >= 0){
@@ -6518,6 +6518,7 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m
         }
     }
 
+    s->qos_data[output_port][vcg] -= msg_size;
     s->total_chunks[output_port]--;
 
     prepend_to_terminal_dally_message_list(s->pending_msgs[output_port],
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index e5e11309..e6d46ef8 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -100,9 +100,10 @@ configure_file(conf/union-milc-jacobi-workload/dfdally-72-par.conf.in conf/union
 if(USE_UNION)
     list(APPEND test-shell-files
         union-workload-test-surrogate.sh
-        union-workload-test-surrogate-parallel.sh
-        union-workload-test-surrogate-parallel-deterministic-1.sh
-        union-workload-test-surrogate-parallel-deterministic-2.sh
+        union-workload-test-surrogate-fails.sh
+        #union-workload-test-surrogate-parallel.sh
+        #union-workload-test-surrogate-parallel-deterministic-1.sh
+        #union-workload-test-surrogate-parallel-deterministic-2.sh
     )
 endif()
 
diff --git a/tests/conf/union-milc-jacobi-workload/conceptual.json b/tests/conf/union-milc-jacobi-workload/conceptual.json
index 557c0bce..27a03f6c 100644
--- a/tests/conf/union-milc-jacobi-workload/conceptual.json
+++ b/tests/conf/union-milc-jacobi-workload/conceptual.json
@@ -41,7 +41,7 @@
       "100",
       "100",
       "50000",
-      "39",
+      "1",
       "200000",
       "barrier"
     ]
diff --git a/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in b/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in
index 3b72d00a..11598088 100644
--- a/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in
+++ b/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in
@@ -19,7 +19,7 @@ PARAMS
    modelnet_scheduler="fcfs";
 # chunk size in the network (when chunk size = packet size, packets will not be
 # divided into chunks)
-   chunk_size="4096";
+   chunk_size="${CHUNK_SIZE}";
 # modelnet_scheduler="round-robin";
 # number of routers in group
    num_routers="4";
diff --git a/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf b/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf
index 93c60688..cf0a4b93 100644
--- a/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf
+++ b/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf
@@ -1,2 +1 @@
 36 conceptual-jacobi3d 1 0
-36 milc 1 0
diff --git a/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf b/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf
index 07e490d0..bae48ac5 100644
--- a/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf
+++ b/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf
@@ -1,2 +1 @@
 12 24 38 2 65 18 3 70 39 11 69 67 48 21 8 45 71 55 6 1 49 68 32 5 61 46 23 9 7 26 15 62 52 28 13 25 
-0 42 31 10 57 20 41 40 66 64 4 22 33 58 37 59 47 43 54 50 14 35 44 16 63 56 36 30 19 51 27 34 17 29 53 60 
diff --git a/tests/union-workload-test-surrogate-fails.sh b/tests/union-workload-test-surrogate-fails.sh
new file mode 100644
index 00000000..1d81c4ee
--- /dev/null
+++ b/tests/union-workload-test-surrogate-fails.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+if [[ -z $bindir ]] ; then
+    echo bindir variable not set
+    exit 1
+fi
+
+if [[ -z $UNION_DATAROOTDIR ]] ; then
+    echo UNION_DATAROOTDIR variable not set
+    exit 1
+fi
+
+if [[ -z $SWM_DATAROOTDIR ]] ; then
+    echo SWM_DATAROOTDIR variable not set
+    exit 1
+fi
+
+np=1
+
+expfolder="$PWD"
+export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload"
+
+# Backing up and copying milc json!
+tmpdir="$(TMPDIR="$PWD" mktemp -d)"
+mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json"
+cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json"
+mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json"
+cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json"
+
+# Copying configuration files to keep as documentation
+cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder"
+cp "$CONFIGS_PATH/conceptual.json" "$expfolder"
+cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder"
+cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
+
+# CODES config file
+export CHUNK_SIZE=2048
+export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
+export NETWORK_SURR_ON=0
+export APP_SURR_ON=0
+export APP_DIRECTOR_MODE=every-n-nanoseconds
+#export APP_DIRECTOR_MODE=every-n-gvt
+export EVERY_N_GVT=500
+export EVERY_NSECS=1e6
+envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf"
+
+# running simulation
+cons_lookahead=200
+opt_lookahead=600
+
+export PATH_TO_CODES_BUILD="$bindir"
+
+mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \
+  --synch=1 \
+  --batch=4 --gvt-interval=256 \
+  --cons-lookahead=$cons_lookahead \
+  --max-opt-lookahead=$opt_lookahead \
+  --workload_type=conc-online \
+  --lp-io-dir=lp-io-dir \
+  --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \
+  --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \
+  -- "$expfolder/dfdally-72-par.conf" \
+  > model-output.txt 2> model-output-error.txt
+
+err=$?
+
+# Setting milc json back
+mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json"
+mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json"
+rmdir "$tmpdir"
+
+[[ $err -ne 0 ]] && exit $err
+
+# Checking that there is actual output
+grep 'Net Events Processed' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# Checking both milc and jacobi ran
+#grep 'MILC: Iteration 119/120' model-output.txt
+#err=$?
+#[[ $err -ne 0 ]] && exit $err
+
+grep 'Jacobi3D: Completed 1 iterations' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+grep 'App 0: All non-synthetic workloads have completed' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+# it transitioned into surrogacy
+#grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt
+#err=$?
+#[[ $err -ne 0 ]] && exit $err
+
+# it transitioned back to high-fidelity
+#grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt
+#err=$?
+#[[ $err -ne 0 ]] && exit $err
+
+exit 0
diff --git a/tests/union-workload-test-surrogate.sh b/tests/union-workload-test-surrogate.sh
index 032b649e..9cd2c6be 100644
--- a/tests/union-workload-test-surrogate.sh
+++ b/tests/union-workload-test-surrogate.sh
@@ -34,9 +34,10 @@ cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder"
 cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
 
 # CODES config file
+export CHUNK_SIZE=4096
 export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
 export NETWORK_SURR_ON=0
-export APP_SURR_ON=1
+export APP_SURR_ON=0
 export APP_DIRECTOR_MODE=every-n-nanoseconds
 #export APP_DIRECTOR_MODE=every-n-gvt
 export EVERY_N_GVT=500
@@ -76,11 +77,11 @@ err=$?
 [[ $err -ne 0 ]] && exit $err
 
 # Checking both milc and jacobi ran
-grep 'MILC: Iteration 119/120' model-output.txt
-err=$?
-[[ $err -ne 0 ]] && exit $err
+#grep 'MILC: Iteration 119/120' model-output.txt
+#err=$?
+#[[ $err -ne 0 ]] && exit $err
 
-grep 'Jacobi3D: Completed 39 iterations' model-output.txt
+grep 'Jacobi3D: Completed 1 iterations' model-output.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 
@@ -89,13 +90,13 @@ err=$?
 [[ $err -ne 0 ]] && exit $err
 
 # it transitioned into surrogacy
-grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt
-err=$?
-[[ $err -ne 0 ]] && exit $err
+#grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt
+#err=$?
+#[[ $err -ne 0 ]] && exit $err
 
 # it transitioned back to high-fidelity
-grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt
-err=$?
-[[ $err -ne 0 ]] && exit $err
+#grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt
+#err=$?
+#[[ $err -ne 0 ]] && exit $err
 
 exit 0

From 66511d98e3777008901156441a08643cd986d501 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 17 Jun 2025 00:50:56 -0400
Subject: [PATCH 166/188] potential/partial fix to unmatched receives bug

---
 src/networks/model-net/dragonfly-dally.C | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 0014fe88..ea442aef 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -5353,10 +5353,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
     /* WE do not allow self messages through dragonfly */
     assert(lp->gid != msg->src_terminal_id);
 
-    // TODO (elkin): this is wrong, this is _not_ finding the number of chunks, consider: chunk_size = 2 and packet_size = 5. There should be 3 chunks, but the code outputs 2!
-    uint64_t num_chunks = msg->packet_size / s->params->chunk_size;
-    if (msg->packet_size < s->params->chunk_size)
-        num_chunks++;
+    uint64_t num_chunks = (msg->packet_size + s->params->chunk_size - 1) / s->params->chunk_size;
 
     if(msg->path_type == MINIMAL)
         minimal_count++;   
@@ -5444,8 +5441,9 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
     int const chunk_size = s->params->chunk_size;
     if (has_remaining_sz) {
         bf->c28 = 1;
-        assert(s->remaining_sz_packets[packet_key] >= chunk_size);
-        s->remaining_sz_packets[packet_key] -= chunk_size;
+        int const actual_chunk_size = std::min(chunk_size, (int)s->remaining_sz_packets[packet_key]);
+        assert(s->remaining_sz_packets[packet_key] >= actual_chunk_size);
+        s->remaining_sz_packets[packet_key] -= actual_chunk_size;
 
         // if `remaining == 0`, ie, if the packet has been completed
         if (s->remaining_sz_packets[packet_key] == 0) {

From ecae4b87e4c5cf8b03b8924d3de64a09d6d03fdb Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 17 Jun 2025 10:51:31 -0400
Subject: [PATCH 167/188] Fixing bugs that show up with Jacobi and chunk size
 != packet size

---
 src/networks/model-net/dragonfly-dally.C      | 35 +++++--------------
 tests/CMakeLists.txt                          |  8 ++---
 .../conceptual.json                           |  2 +-
 .../jacobi_MILC.workload.conf                 |  1 +
 .../rand_node0-1d-72-jacobi_MILC.alloc.conf   |  1 +
 ...test-surrogate-parallel-deterministic-1.sh |  1 +
 ...test-surrogate-parallel-deterministic-2.sh |  1 +
 .../union-workload-test-surrogate-parallel.sh |  1 +
 ...load-test-surrogate-smaller-chunk-size.sh} | 22 ++++++------
 tests/union-workload-test-surrogate.sh        | 22 ++++++------
 10 files changed, 41 insertions(+), 53 deletions(-)
 rename tests/{union-workload-test-surrogate-fails.sh => union-workload-test-surrogate-smaller-chunk-size.sh} (86%)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index ea442aef..978ac455 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -79,6 +79,8 @@
 // If we have configured the network surrogate, then we will collect packet delay data, which is done via the scheduling of an event. This additional event will shift the random generator and thus the same model will behave differently from the start when compared with the one where the surrogate is not setup. If one wants to test both scenarios (with and without the surrogate) and maintain determinism in high-fidelity, one has to enable this option
 #define ALWAYS_DETERMINISTIC_NETWORK 0
 
+#define num_chunks_for(message_size, chunk_size) ((message_size) ? ((message_size) + (chunk_size) - 1) / (chunk_size) : 1)
+
 /* handles terminal and router events like packet generate/send/receive/buffer */
 typedef struct terminal_state terminal_state;
 typedef struct router_state router_state;
@@ -4213,9 +4215,7 @@ static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_me
     if(bf->c4)
         num_remote_packets--;
 
-    int num_chunks = msg->packet_size/s->params->chunk_size;
-    if(msg->packet_size < s->params->chunk_size)
-        num_chunks++;
+    int const num_chunks = num_chunks_for(msg->packet_size, s->params->chunk_size);
 
     int i;
     int vcg = 0;
@@ -4277,15 +4277,7 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
     const dragonfly_param *p = s->params;
 
     int total_event_size;
-    uint64_t num_chunks = msg->packet_size / p->chunk_size;
-    
-    double cn_delay = s->params->cn_delay;
-
-    if (msg->packet_size < s->params->chunk_size) 
-        num_chunks++;
-
-    if(msg->packet_size < s->params->chunk_size)
-        cn_delay = bytes_to_ns(msg->packet_size % s->params->chunk_size, s->params->cn_bandwidth);
+    uint64_t const num_chunks = num_chunks_for(msg->packet_size, p->chunk_size);
 
     int dest_router_id;
     if (s->params->num_injection_queues > 1 || netMan.is_link_failures_enabled()) {
@@ -4733,9 +4725,8 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message *
     msg->saved_vc = vcg;
     terminal_dally_message_list* cur_entry = s->terminal_msgs[msg->rail_id][vcg];
     int data_size = s->params->chunk_size;
-    uint64_t num_chunks = cur_entry->msg.packet_size/s->params->chunk_size;
-    if(cur_entry->msg.packet_size < s->params->chunk_size)
-        num_chunks++;
+    uint64_t const num_chunks = num_chunks_for(cur_entry->msg.packet_size, s->params->chunk_size);
+
     msg->saved_avg_time = cur_entry->msg.travel_start_time;  // reusing field saved_avg_time. It is only used in another event handler path (arrive). So, no interruptions here
     cur_entry->msg.travel_start_time = tw_now(lp);
 
@@ -5353,7 +5344,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
     /* WE do not allow self messages through dragonfly */
     assert(lp->gid != msg->src_terminal_id);
 
-    uint64_t num_chunks = (msg->packet_size + s->params->chunk_size - 1) / s->params->chunk_size;
+    uint64_t const num_chunks = num_chunks_for(msg->packet_size, s->params->chunk_size);
 
     if(msg->path_type == MINIMAL)
         minimal_count++;   
@@ -5484,13 +5475,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message
     if(hash_link)
         tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link);
 
-    uint64_t total_chunks = msg->total_size / s->params->chunk_size;
-
-    if(msg->total_size % s->params->chunk_size)
-          total_chunks++;
-
-    if(!total_chunks)
-          total_chunks = 1;
+    uint64_t const total_chunks = num_chunks_for(msg->total_size, s->params->chunk_size);
 
     /*if(tmp)
     {
@@ -6622,9 +6607,7 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes
         bandwidth = s->params->global_bandwidth;
     }
 
-    uint64_t num_chunks = cur_entry->msg.packet_size / s->params->chunk_size;
-    if(cur_entry->msg.packet_size < s->params->chunk_size)
-        num_chunks++;
+    uint64_t const num_chunks = num_chunks_for(cur_entry->msg.packet_size, s->params->chunk_size);
 
     /* Injection delay: Time taken for the data to be placed on the link/channel
      *  - Based on bandwidth
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index e6d46ef8..3efbaeaa 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -100,10 +100,10 @@ configure_file(conf/union-milc-jacobi-workload/dfdally-72-par.conf.in conf/union
 if(USE_UNION)
     list(APPEND test-shell-files
         union-workload-test-surrogate.sh
-        union-workload-test-surrogate-fails.sh
-        #union-workload-test-surrogate-parallel.sh
-        #union-workload-test-surrogate-parallel-deterministic-1.sh
-        #union-workload-test-surrogate-parallel-deterministic-2.sh
+        union-workload-test-surrogate-smaller-chunk-size.sh
+        union-workload-test-surrogate-parallel.sh
+        union-workload-test-surrogate-parallel-deterministic-1.sh
+        union-workload-test-surrogate-parallel-deterministic-2.sh
     )
 endif()
 
diff --git a/tests/conf/union-milc-jacobi-workload/conceptual.json b/tests/conf/union-milc-jacobi-workload/conceptual.json
index 27a03f6c..557c0bce 100644
--- a/tests/conf/union-milc-jacobi-workload/conceptual.json
+++ b/tests/conf/union-milc-jacobi-workload/conceptual.json
@@ -41,7 +41,7 @@
       "100",
       "100",
       "50000",
-      "1",
+      "39",
       "200000",
       "barrier"
     ]
diff --git a/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf b/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf
index cf0a4b93..93c60688 100644
--- a/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf
+++ b/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf
@@ -1 +1,2 @@
 36 conceptual-jacobi3d 1 0
+36 milc 1 0
diff --git a/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf b/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf
index bae48ac5..07e490d0 100644
--- a/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf
+++ b/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf
@@ -1 +1,2 @@
 12 24 38 2 65 18 3 70 39 11 69 67 48 21 8 45 71 55 6 1 49 68 32 5 61 46 23 9 7 26 15 62 52 28 13 25 
+0 42 31 10 57 20 41 40 66 64 4 22 33 58 37 59 47 43 54 50 14 35 44 16 63 56 36 30 19 51 27 34 17 29 53 60 
diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-1.sh b/tests/union-workload-test-surrogate-parallel-deterministic-1.sh
index c5d145e3..7afae27c 100644
--- a/tests/union-workload-test-surrogate-parallel-deterministic-1.sh
+++ b/tests/union-workload-test-surrogate-parallel-deterministic-1.sh
@@ -34,6 +34,7 @@ cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder"
 cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
 
 # CODES config file
+export CHUNK_SIZE=4096
 export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
 export NETWORK_SURR_ON=0
 export APP_SURR_ON=1
diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-2.sh b/tests/union-workload-test-surrogate-parallel-deterministic-2.sh
index ab596dd5..ca2cb776 100644
--- a/tests/union-workload-test-surrogate-parallel-deterministic-2.sh
+++ b/tests/union-workload-test-surrogate-parallel-deterministic-2.sh
@@ -34,6 +34,7 @@ cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder"
 cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
 
 # CODES config file
+export CHUNK_SIZE=4096
 export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
 export NETWORK_SURR_ON=0
 export APP_SURR_ON=1
diff --git a/tests/union-workload-test-surrogate-parallel.sh b/tests/union-workload-test-surrogate-parallel.sh
index f2940d12..fae9abd4 100644
--- a/tests/union-workload-test-surrogate-parallel.sh
+++ b/tests/union-workload-test-surrogate-parallel.sh
@@ -34,6 +34,7 @@ cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder"
 cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
 
 # CODES config file
+export CHUNK_SIZE=4096
 export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
 export NETWORK_SURR_ON=0
 export APP_SURR_ON=1
diff --git a/tests/union-workload-test-surrogate-fails.sh b/tests/union-workload-test-surrogate-smaller-chunk-size.sh
similarity index 86%
rename from tests/union-workload-test-surrogate-fails.sh
rename to tests/union-workload-test-surrogate-smaller-chunk-size.sh
index 1d81c4ee..e31a0d4d 100644
--- a/tests/union-workload-test-surrogate-fails.sh
+++ b/tests/union-workload-test-surrogate-smaller-chunk-size.sh
@@ -37,7 +37,7 @@ cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
 export CHUNK_SIZE=2048
 export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
 export NETWORK_SURR_ON=0
-export APP_SURR_ON=0
+export APP_SURR_ON=1
 export APP_DIRECTOR_MODE=every-n-nanoseconds
 #export APP_DIRECTOR_MODE=every-n-gvt
 export EVERY_N_GVT=500
@@ -77,11 +77,11 @@ err=$?
 [[ $err -ne 0 ]] && exit $err
 
 # Checking both milc and jacobi ran
-#grep 'MILC: Iteration 119/120' model-output.txt
-#err=$?
-#[[ $err -ne 0 ]] && exit $err
+grep 'MILC: Iteration 119/120' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
 
-grep 'Jacobi3D: Completed 1 iterations' model-output.txt
+grep 'Jacobi3D: Completed 39 iterations' model-output.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 
@@ -90,13 +90,13 @@ err=$?
 [[ $err -ne 0 ]] && exit $err
 
 # it transitioned into surrogacy
-#grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt
-#err=$?
-#[[ $err -ne 0 ]] && exit $err
+grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
 
 # it transitioned back to high-fidelity
-#grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt
-#err=$?
-#[[ $err -ne 0 ]] && exit $err
+grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
 
 exit 0
diff --git a/tests/union-workload-test-surrogate.sh b/tests/union-workload-test-surrogate.sh
index 9cd2c6be..64a19ee8 100644
--- a/tests/union-workload-test-surrogate.sh
+++ b/tests/union-workload-test-surrogate.sh
@@ -37,7 +37,7 @@ cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
 export CHUNK_SIZE=4096
 export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
 export NETWORK_SURR_ON=0
-export APP_SURR_ON=0
+export APP_SURR_ON=1
 export APP_DIRECTOR_MODE=every-n-nanoseconds
 #export APP_DIRECTOR_MODE=every-n-gvt
 export EVERY_N_GVT=500
@@ -77,11 +77,11 @@ err=$?
 [[ $err -ne 0 ]] && exit $err
 
 # Checking both milc and jacobi ran
-#grep 'MILC: Iteration 119/120' model-output.txt
-#err=$?
-#[[ $err -ne 0 ]] && exit $err
+grep 'MILC: Iteration 119/120' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
 
-grep 'Jacobi3D: Completed 1 iterations' model-output.txt
+grep 'Jacobi3D: Completed 39 iterations' model-output.txt
 err=$?
 [[ $err -ne 0 ]] && exit $err
 
@@ -90,13 +90,13 @@ err=$?
 [[ $err -ne 0 ]] && exit $err
 
 # it transitioned into surrogacy
-#grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt
-#err=$?
-#[[ $err -ne 0 ]] && exit $err
+grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
 
 # it transitioned back to high-fidelity
-#grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt
-#err=$?
-#[[ $err -ne 0 ]] && exit $err
+grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
 
 exit 0

From 737c702e7b92a395ea4b2f877a170e09d21e7bf2 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 17 Jun 2025 11:29:51 -0400
Subject: [PATCH 168/188] Adding more tests :)

---
 tests/CMakeLists.txt                          |   2 +
 .../dfdally-72-par.conf.in                    |   4 +-
 ...test-surrogate-parallel-deterministic-1.sh |   1 +
 ...test-surrogate-parallel-deterministic-2.sh |   1 +
 ...test-surrogate-parallel-deterministic-3.sh | 114 ++++++++++++++++++
 ...test-surrogate-parallel-deterministic-4.sh | 114 ++++++++++++++++++
 .../union-workload-test-surrogate-parallel.sh |   1 +
 ...kload-test-surrogate-smaller-chunk-size.sh |   1 +
 tests/union-workload-test-surrogate.sh        |   1 +
 9 files changed, 237 insertions(+), 2 deletions(-)
 create mode 100644 tests/union-workload-test-surrogate-parallel-deterministic-3.sh
 create mode 100644 tests/union-workload-test-surrogate-parallel-deterministic-4.sh

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 3efbaeaa..6390965c 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -104,6 +104,8 @@ if(USE_UNION)
         union-workload-test-surrogate-parallel.sh
         union-workload-test-surrogate-parallel-deterministic-1.sh
         union-workload-test-surrogate-parallel-deterministic-2.sh
+        union-workload-test-surrogate-parallel-deterministic-3.sh
+        union-workload-test-surrogate-parallel-deterministic-4.sh
     )
 endif()
 
diff --git a/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in b/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in
index 11598088..ade97ef2 100644
--- a/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in
+++ b/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in
@@ -75,14 +75,14 @@ NETWORK_SURROGATE {
 # latency predictor to use. Options: average, torch-jit
    packet_latency_predictor="average";
 # some workload models need some time to stabilize, a point where the network behaviour stabilizes. The predictor will ignore all packet latencies that arrive during this period
-   ignore_until="10.0e6";
+   ignore_until="2.0e6";
 
 # parameters for torch-jit latency predictor
    torch_jit_mode="single-static-model-for-all-terminals";
    torch_jit_model_path="";
 
 # selecting network treatment on switching to surrogate. Options: freeze, nothing
-   network_treatment_on_switch="nothing";  # freeze is buggy sadly. It freezes more events than it should
+   network_treatment_on_switch="${NETWORK_MODE}";
 }
 APPLICATION_SURROGATE {
    enable="${APP_SURR_ON}"; # either 0 or 1
diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-1.sh b/tests/union-workload-test-surrogate-parallel-deterministic-1.sh
index 7afae27c..22294863 100644
--- a/tests/union-workload-test-surrogate-parallel-deterministic-1.sh
+++ b/tests/union-workload-test-surrogate-parallel-deterministic-1.sh
@@ -37,6 +37,7 @@ cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
 export CHUNK_SIZE=4096
 export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
 export NETWORK_SURR_ON=0
+export NETWORK_MODE=nothing
 export APP_SURR_ON=1
 export APP_DIRECTOR_MODE=every-n-nanoseconds
 #export APP_DIRECTOR_MODE=every-n-gvt
diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-2.sh b/tests/union-workload-test-surrogate-parallel-deterministic-2.sh
index ca2cb776..2a0384fd 100644
--- a/tests/union-workload-test-surrogate-parallel-deterministic-2.sh
+++ b/tests/union-workload-test-surrogate-parallel-deterministic-2.sh
@@ -37,6 +37,7 @@ cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
 export CHUNK_SIZE=4096
 export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
 export NETWORK_SURR_ON=0
+export NETWORK_MODE=nothing
 export APP_SURR_ON=1
 export APP_DIRECTOR_MODE=every-n-nanoseconds
 export EVERY_N_GVT=500
diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-3.sh b/tests/union-workload-test-surrogate-parallel-deterministic-3.sh
new file mode 100644
index 00000000..93b74afc
--- /dev/null
+++ b/tests/union-workload-test-surrogate-parallel-deterministic-3.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+if [[ -z $bindir ]] ; then
+    echo bindir variable not set
+    exit 1
+fi
+
+if [[ -z $UNION_DATAROOTDIR ]] ; then
+    echo UNION_DATAROOTDIR variable not set
+    exit 1
+fi
+
+if [[ -z $SWM_DATAROOTDIR ]] ; then
+    echo SWM_DATAROOTDIR variable not set
+    exit 1
+fi
+
+np=3
+
+expfolder="$PWD"
+export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload"
+
+# Backing up and copying milc json!
+tmpdir="$(TMPDIR="$PWD" mktemp -d)"
+mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json"
+cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json"
+mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json"
+cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json"
+
+# Copying configuration files to keep as documentation
+cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder"
+cp "$CONFIGS_PATH/conceptual.json" "$expfolder"
+cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder"
+cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
+
+# CODES config file
+export CHUNK_SIZE=4096
+export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
+export NETWORK_SURR_ON=1
+export NETWORK_MODE=nothing
+export APP_SURR_ON=1
+export APP_DIRECTOR_MODE=every-n-nanoseconds
+#export APP_DIRECTOR_MODE=every-n-gvt
+export EVERY_N_GVT=500
+export EVERY_NSECS=1e6
+envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf"
+
+# running simulation
+cons_lookahead=200
+opt_lookahead=600
+
+export PATH_TO_CODES_BUILD="$bindir"
+
+mkdir run-1
+pushd run-1
+
+mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \
+  --synch=3 \
+  --batch=4 --gvt-interval=256 \
+  --cons-lookahead=$cons_lookahead \
+  --max-opt-lookahead=$opt_lookahead \
+  --workload_type=conc-online \
+  --lp-io-dir=lp-io-dir \
+  --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \
+  --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \
+  -- "$expfolder/dfdally-72-par.conf" \
+  > model-output-1.txt 2> model-output-1-error.txt
+
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+popd
+
+mkdir run-2
+pushd run-2
+
+mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \
+  --synch=3 \
+  --batch=4 --gvt-interval=256 \
+  --cons-lookahead=$cons_lookahead \
+  --max-opt-lookahead=$opt_lookahead \
+  --workload_type=conc-online \
+  --lp-io-dir=lp-io-dir \
+  --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \
+  --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \
+  -- "$expfolder/dfdally-72-par.conf" \
+  > model-output-2.txt 2> model-output-2-error.txt
+
+err=$?
+
+popd
+
+# Setting milc json back
+mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json"
+mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json"
+rmdir "$tmpdir"
+
+[[ $err -ne 0 ]] && exit $err
+
+# Checking that there is actual output
+grep 'Net Events Processed' run-1/model-output-1.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+diff <(grep 'Net Events Processed' run-1/model-output-1.txt) \
+    <(grep 'Net Events Processed' run-2/model-output-2.txt)
+err=$?
+if [[ $err -ne 0 ]]; then
+    >&2 echo "The number of net events processed does not coincide, ie," \
+        "the simulation is not deterministic"
+    exit $err
+fi
+
+exit 0
diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-4.sh b/tests/union-workload-test-surrogate-parallel-deterministic-4.sh
new file mode 100644
index 00000000..639eed1e
--- /dev/null
+++ b/tests/union-workload-test-surrogate-parallel-deterministic-4.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+if [[ -z $bindir ]] ; then
+    echo bindir variable not set
+    exit 1
+fi
+
+if [[ -z $UNION_DATAROOTDIR ]] ; then
+    echo UNION_DATAROOTDIR variable not set
+    exit 1
+fi
+
+if [[ -z $SWM_DATAROOTDIR ]] ; then
+    echo SWM_DATAROOTDIR variable not set
+    exit 1
+fi
+
+np=3
+
+expfolder="$PWD"
+export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload"
+
+# Backing up and copying milc json!
+tmpdir="$(TMPDIR="$PWD" mktemp -d)"
+mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json"
+cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json"
+mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json"
+cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json"
+
+# Copying configuration files to keep as documentation
+cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder"
+cp "$CONFIGS_PATH/conceptual.json" "$expfolder"
+cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder"
+cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
+
+# CODES config file
+export CHUNK_SIZE=4096
+export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
+export NETWORK_SURR_ON=1
+export NETWORK_MODE=freeze
+export APP_SURR_ON=1
+export APP_DIRECTOR_MODE=every-n-nanoseconds
+#export APP_DIRECTOR_MODE=every-n-gvt
+export EVERY_N_GVT=500
+export EVERY_NSECS=1e6
+envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf"
+
+# running simulation
+cons_lookahead=200
+opt_lookahead=600
+
+export PATH_TO_CODES_BUILD="$bindir"
+
+mkdir run-1
+pushd run-1
+
+mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \
+  --synch=3 \
+  --batch=4 --gvt-interval=256 \
+  --cons-lookahead=$cons_lookahead \
+  --max-opt-lookahead=$opt_lookahead \
+  --workload_type=conc-online \
+  --lp-io-dir=lp-io-dir \
+  --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \
+  --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \
+  -- "$expfolder/dfdally-72-par.conf" \
+  > model-output-1.txt 2> model-output-1-error.txt
+
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+popd
+
+mkdir run-2
+pushd run-2
+
+mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \
+  --synch=3 \
+  --batch=4 --gvt-interval=256 \
+  --cons-lookahead=$cons_lookahead \
+  --max-opt-lookahead=$opt_lookahead \
+  --workload_type=conc-online \
+  --lp-io-dir=lp-io-dir \
+  --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \
+  --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \
+  -- "$expfolder/dfdally-72-par.conf" \
+  > model-output-2.txt 2> model-output-2-error.txt
+
+err=$?
+
+popd
+
+# Setting milc json back
+mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json"
+mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json"
+rmdir "$tmpdir"
+
+[[ $err -ne 0 ]] && exit $err
+
+# Checking that there is actual output
+grep 'Net Events Processed' run-1/model-output-1.txt
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+diff <(grep 'Net Events Processed' run-1/model-output-1.txt) \
+    <(grep 'Net Events Processed' run-2/model-output-2.txt)
+err=$?
+if [[ $err -ne 0 ]]; then
+    >&2 echo "The number of net events processed does not coincide, ie," \
+        "the simulation is not deterministic"
+    exit $err
+fi
+
+exit 0
diff --git a/tests/union-workload-test-surrogate-parallel.sh b/tests/union-workload-test-surrogate-parallel.sh
index fae9abd4..f84bccad 100644
--- a/tests/union-workload-test-surrogate-parallel.sh
+++ b/tests/union-workload-test-surrogate-parallel.sh
@@ -37,6 +37,7 @@ cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
 export CHUNK_SIZE=4096
 export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
 export NETWORK_SURR_ON=0
+export NETWORK_MODE=nothing
 export APP_SURR_ON=1
 export APP_DIRECTOR_MODE=every-n-nanoseconds
 #export APP_DIRECTOR_MODE=every-n-gvt
diff --git a/tests/union-workload-test-surrogate-smaller-chunk-size.sh b/tests/union-workload-test-surrogate-smaller-chunk-size.sh
index e31a0d4d..d7266ccc 100644
--- a/tests/union-workload-test-surrogate-smaller-chunk-size.sh
+++ b/tests/union-workload-test-surrogate-smaller-chunk-size.sh
@@ -37,6 +37,7 @@ cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
 export CHUNK_SIZE=2048
 export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
 export NETWORK_SURR_ON=0
+export NETWORK_MODE=nothing
 export APP_SURR_ON=1
 export APP_DIRECTOR_MODE=every-n-nanoseconds
 #export APP_DIRECTOR_MODE=every-n-gvt
diff --git a/tests/union-workload-test-surrogate.sh b/tests/union-workload-test-surrogate.sh
index 64a19ee8..0a34b2c8 100644
--- a/tests/union-workload-test-surrogate.sh
+++ b/tests/union-workload-test-surrogate.sh
@@ -37,6 +37,7 @@ cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder"
 export CHUNK_SIZE=4096
 export PATH_TO_CONNECTIONS="$CONFIGS_PATH"
 export NETWORK_SURR_ON=0
+export NETWORK_MODE=nothing
 export APP_SURR_ON=1
 export APP_DIRECTOR_MODE=every-n-nanoseconds
 #export APP_DIRECTOR_MODE=every-n-gvt

From 5755e06bc75a891068c869dd30c1c2c6079aab90 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 17 Jun 2025 14:35:30 -0400
Subject: [PATCH 169/188] Changed terminal_dally_message_list to work with
 terminal_dally_message_list with terminals

---
 src/networks/model-net/dragonfly-dally.C | 230 +++++++++++++++++------
 1 file changed, 174 insertions(+), 56 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 978ac455..7562e844 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -213,14 +213,16 @@ typedef struct terminal_dally_message_list terminal_dally_message_list;
 struct terminal_dally_message_list {
     terminal_dally_message msg;
     char* event_data;
-    terminal_dally_message_list *next;
-    terminal_dally_message_list *prev;
+    struct qlist_head list;
+    terminal_dally_message_list *next;  // Keep for router compatibility
+    terminal_dally_message_list *prev;  // Keep for router compatibility
 };
 
 static void init_terminal_dally_message_list(terminal_dally_message_list *thisO, 
     terminal_dally_message *inmsg) {
     thisO->msg = *inmsg;
     thisO->event_data = NULL;
+    INIT_QLIST_HEAD(&thisO->list);
     thisO->next = NULL;
     thisO->prev = NULL;
 }
@@ -536,8 +538,7 @@ struct terminal_state
 
     int** vc_occupancy; // vc_occupancies [rail_id][qos_level]
     tw_stime* terminal_available_time; // [rail_id]
-    terminal_dally_message_list ***terminal_msgs; //[rail_id][qos_level]
-    terminal_dally_message_list ***terminal_msgs_tail; //[rail_id][qos_level]
+    struct qlist_head **terminal_msgs; //[rail_id][qos_level] - quicklist heads
     int* in_send_loop; // [rail_id]
     struct mn_stats dragonfly_stats_array[CATEGORY_MAX];
 
@@ -1561,13 +1562,23 @@ static Connection dfdally_get_best_from_k_connections(router_state *s, tw_bf *bf
     return get_absolute_best_connection_from_conns(s, bf, msg, lp, k_conns);
 }
 
+static inline void append_to_qlist(struct qlist_head *head, terminal_dally_message_list *msg)
+{
+    qlist_add_tail(&msg->list, head);
+}
+
+static inline void prepend_to_qlist(struct qlist_head *head, terminal_dally_message_list *msg)
+{
+    qlist_add(&msg->list, head);
+}
+
+// Restore old functions for router compatibility
 static void append_to_terminal_dally_message_list(  
         terminal_dally_message_list ** thisq,
         terminal_dally_message_list ** thistail,
         int index, 
         terminal_dally_message_list *msg) 
 {
-//    printf("\n msg id %d ", msg->msg.packet_ID);
     if (thisq[index] == NULL) {
         thisq[index] = msg;
     } 
@@ -1577,7 +1588,105 @@ static void append_to_terminal_dally_message_list(
         msg->prev = thistail[index];
     } 
     thistail[index] = msg;
-//    printf("\n done adding %d ", msg->msg.packet_ID);
+}
+
+static terminal_dally_message_list* return_head(
+        terminal_dally_message_list ** thisq,
+        terminal_dally_message_list ** thistail,
+        int index)
+{
+    terminal_dally_message_list *head = thisq[index];
+    if (head != NULL) {
+        thisq[index] = head->next;
+        if(head->next != NULL) {
+            head->next->prev = NULL;
+            head->next = NULL;
+        }
+        else {
+            thistail[index] = NULL;
+        }
+    }
+    return head;
+}
+
+static void copy_terminal_dally_message_qlist(struct qlist_head *into_head, struct qlist_head *from_head)
+{
+    if (qlist_empty(from_head)) {
+        return;
+    }
+
+    terminal_dally_message_list *from_entry;
+    qlist_for_each_entry(from_entry, from_head, list) {
+        terminal_dally_message_list *copy_entry = (terminal_dally_message_list *)malloc(sizeof(terminal_dally_message_list));
+
+        // Deep copy the entry
+        memcpy(copy_entry, from_entry, sizeof(terminal_dally_message_list));
+        INIT_QLIST_HEAD(&copy_entry->list);
+
+        if (from_entry->event_data != NULL) {
+            int event_data_sz = from_entry->msg.remote_event_size_bytes + from_entry->msg.local_event_size_bytes;
+            copy_entry->event_data = (char *)malloc(event_data_sz);
+            memcpy(copy_entry->event_data, from_entry->event_data, event_data_sz);
+        }
+
+        append_to_qlist(into_head, copy_entry);
+    }
+}
+
+static void clean_terminal_dally_message_qlist(struct qlist_head *head)
+{
+    terminal_dally_message_list *entry, *tmp;
+    qlist_for_each_entry_safe(entry, tmp, head, list) {
+        qlist_del(&entry->list);
+        if (entry->event_data != NULL) {
+            free(entry->event_data);
+        }
+        free(entry);
+    }
+}
+
+static bool check_terminal_dally_message_qlist(struct qlist_head *before, struct qlist_head *after)
+{
+    bool is_same = true;
+
+    // Check if both are empty
+    if (qlist_empty(before) && qlist_empty(after)) {
+        return true;
+    }
+
+    // If only one is empty, they're different
+    if (qlist_empty(before) || qlist_empty(after)) {
+        return false;
+    }
+
+    terminal_dally_message_list *entry_before, *entry_after;
+    struct qlist_head *pos_before = before->next;
+    struct qlist_head *pos_after = after->next;
+
+    while (pos_before != before && pos_after != after) {
+        entry_before = qlist_entry(pos_before, terminal_dally_message_list, list);
+        entry_after = qlist_entry(pos_after, terminal_dally_message_list, list);
+
+        is_same &= check_terminal_dally_message(&entry_before->msg, &entry_after->msg);
+        is_same &= (entry_before->event_data == NULL) == (entry_after->event_data == NULL);
+
+        if (entry_before->event_data != NULL && entry_after->event_data != NULL) {
+            int const message_size = entry_before->msg.remote_event_size_bytes + entry_before->msg.local_event_size_bytes;
+            int const message_size_after = entry_after->msg.remote_event_size_bytes + entry_after->msg.local_event_size_bytes;
+            is_same &= (message_size == message_size_after);
+            if (message_size > 0) {
+                is_same &= (memcmp(entry_before->event_data, entry_after->event_data, message_size) == 0);
+            }
+        }
+
+        pos_before = pos_before->next;
+        pos_after = pos_after->next;
+    }
+
+    // Check if both reached the end at the same time
+    is_same &= (pos_before == before) && (pos_after == after);
+
+    return is_same;
 }
 
 static void prepend_to_terminal_dally_message_list(  
@@ -1596,23 +1705,24 @@ static void prepend_to_terminal_dally_message_list(
     thisq[index] = msg;
 }
 
-static terminal_dally_message_list* return_head(
-        terminal_dally_message_list ** thisq,
-        terminal_dally_message_list ** thistail,
-        int index) 
+static terminal_dally_message_list* return_head_from_qlist(struct qlist_head *head)
 {
-    terminal_dally_message_list *head = thisq[index];
-    if (head != NULL) {
-        thisq[index] = head->next;
-        if(head->next != NULL) {
-            head->next->prev = NULL;
-            head->next = NULL;
-        } 
-        else {
-            thistail[index] = NULL;
-        }
+    if (qlist_empty(head)) {
+        return NULL;
     }
-    return head;
+
+    struct qlist_head *item = qlist_pop(head);
+    return qlist_entry(item, terminal_dally_message_list, list);
+}
+
+static terminal_dally_message_list* return_tail_from_qlist(struct qlist_head *head)
+{
+    if (qlist_empty(head)) {
+        return NULL;
+    }
+
+    struct qlist_head *item = qlist_pop_back(head);
+    return qlist_entry(item, terminal_dally_message_list, list);
 }
 
 static terminal_dally_message_list* return_tail(
@@ -1718,6 +1828,26 @@ static bool check_terminal_dally_message_list(terminal_dally_message_list * befo
     return is_same;
 }
 
+static void print_terminal_dally_message_qlist(FILE * out, char const * prefix, terminal_state * ns, struct qlist_head * head) {
+    if (qlist_empty(head)) {
+        return;
+    }
+
+    char addprefix_2[] = " | | ";
+    int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1;
+    char * subprefix = (char *) malloc(len_subprefix * sizeof(char));
+    snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2);
+
+    terminal_dally_message_list * entry;
+    qlist_for_each_entry(entry, head, list) {
+        fprintf(out, "%s terminal_dally_message_list (%p) {\n", prefix, entry);
+        fprintf(out, "%s packet_ID = %llu\n", subprefix, LLU(entry->msg.packet_ID));
+        fprintf(out, "%s }\n", prefix);
+    }
+
+    free(subprefix);
+}
+
 static void print_terminal_dally_message_list(FILE * out, char const * prefix, terminal_state * ns, terminal_dally_message_list * thisq) {
     if (thisq == NULL) {
         return;
@@ -2859,7 +2989,7 @@ static int get_next_vcg(terminal_state * s, tw_bf * bf, terminal_dally_message *
     
     if(num_qos_levels == 1)
     {
-        if(s->terminal_msgs[msg->rail_id][0] == NULL || s->vc_occupancy[msg->rail_id][0] + s->params->chunk_size > s->params->cn_vc_size)
+        if(qlist_empty(&s->terminal_msgs[msg->rail_id][0]) || s->vc_occupancy[msg->rail_id][0] + s->params->chunk_size > s->params->cn_vc_size)
             return -1;
         else
             return 0;
@@ -2892,7 +3022,7 @@ static int get_next_vcg(terminal_state * s, tw_bf * bf, terminal_dally_message *
         {
             if(s->qos_status[msg->rail_id][i] == Q_ACTIVE)
             {
-                if(s->terminal_msgs[msg->rail_id][i] != NULL && s->vc_occupancy[msg->rail_id][i] + s->params->chunk_size <= s->params->cn_vc_size)
+                if(!qlist_empty(&s->terminal_msgs[msg->rail_id][i]) && s->vc_occupancy[msg->rail_id][i] + s->params->chunk_size <= s->params->cn_vc_size)
                     return i;
             }
         }
@@ -2903,7 +3033,7 @@ static int get_next_vcg(terminal_state * s, tw_bf * bf, terminal_dally_message *
     /* All vcgs are exceeding their bandwidth limits*/
     for(int i = 0; i < num_qos_levels; i++)
     {
-        if(s->terminal_msgs[msg->rail_id][i] != NULL && s->vc_occupancy[msg->rail_id][i] + s->params->chunk_size <= s->params->cn_vc_size)
+        if(!qlist_empty(&s->terminal_msgs[msg->rail_id][i]) && s->vc_occupancy[msg->rail_id][i] + s->params->chunk_size <= s->params->cn_vc_size)
         {
             bf->c2 = 1;
             
@@ -3687,10 +3817,7 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp )
     s->issueIdle = (int*)calloc(p->num_rails, sizeof(int));
 
     s->rank_tbl = qhash_init(dragonfly_rank_hash_compare, dragonfly_hash_func, DFLY_HASH_TABLE_SIZE);
-    s->terminal_msgs = 
-        (terminal_dally_message_list***)calloc(p->num_rails, sizeof(terminal_dally_message_list**));
-    s->terminal_msgs_tail = 
-        (terminal_dally_message_list***)calloc(p->num_rails, sizeof(terminal_dally_message_list**));
+    s->terminal_msgs = (struct qlist_head**)calloc(p->num_rails, sizeof(struct qlist_head*));
 
     s->qos_status = (int**)calloc(p->num_rails, sizeof(int*));
     s->qos_data = (int**)calloc(p->num_rails, sizeof(int*));
@@ -3698,13 +3825,11 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp )
     for(i = 0; i < p->num_rails; i++)
     {
         s->in_send_loop[i] = 0;
-        s->terminal_msgs[i] = (terminal_dally_message_list**)calloc(num_qos_levels, sizeof(terminal_dally_message_list*));
-        s->terminal_msgs_tail[i] = (terminal_dally_message_list**)calloc(num_qos_levels, sizeof(terminal_dally_message_list*));
+        s->terminal_msgs[i] = (struct qlist_head*)calloc(num_qos_levels, sizeof(struct qlist_head));
 
         for(int j = 0; j < num_qos_levels; j++)
         {
-            s->terminal_msgs[i][j] = NULL;
-            s->terminal_msgs_tail[i][j] = NULL;
+            INIT_QLIST_HEAD(&s->terminal_msgs[i][j]);
         }
 
         /* Whether the virtual channel group is active or over-bw*/
@@ -4227,7 +4352,7 @@ static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_me
     assert(vcg < num_qos_levels);
 
     for(i = 0; i < num_chunks; i++) {
-            delete_terminal_dally_message_list(return_tail(s->terminal_msgs[msg->rail_id], s->terminal_msgs_tail[msg->rail_id], vcg));
+            delete_terminal_dally_message_list(return_tail_from_qlist(&s->terminal_msgs[msg->rail_id][vcg]));
             s->terminal_length[msg->rail_id][vcg] -= s->params->chunk_size;
     }
     if(bf->c5) {
@@ -4525,8 +4650,7 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa
         cur_chunk->msg.output_chan = vcg;
         cur_chunk->msg.chunk_id = i;
         cur_chunk->msg.origin_router_id = s->router_id[msg->rail_id];
-        append_to_terminal_dally_message_list(s->terminal_msgs[msg->rail_id], s->terminal_msgs_tail[msg->rail_id],
-        vcg, cur_chunk);
+        append_to_qlist(&s->terminal_msgs[msg->rail_id][vcg], cur_chunk);
         s->terminal_length[msg->rail_id][vcg] += s->params->chunk_size;
     }
     
@@ -4668,8 +4792,7 @@ static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_dally_messag
 
     s->qos_data[msg->rail_id][vcg] -= data_size;
 
-    prepend_to_terminal_dally_message_list(s->terminal_msgs[msg->rail_id], 
-            s->terminal_msgs_tail[msg->rail_id], vcg, cur_entry);
+    prepend_to_qlist(&s->terminal_msgs[msg->rail_id][vcg], cur_entry);
     
     if(bf->c4) {
         s->in_send_loop[msg->rail_id] = msg->saved_send_loop;
@@ -4723,7 +4846,7 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message *
     }
 
     msg->saved_vc = vcg;
-    terminal_dally_message_list* cur_entry = s->terminal_msgs[msg->rail_id][vcg];
+    terminal_dally_message_list* cur_entry = return_head_from_qlist(&s->terminal_msgs[msg->rail_id][vcg]);
     int data_size = s->params->chunk_size;
     uint64_t const num_chunks = num_chunks_for(cur_entry->msg.packet_size, s->params->chunk_size);
 
@@ -4813,7 +4936,7 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message *
     }
     
     s->vc_occupancy[msg->rail_id][vcg] += s->params->chunk_size;
-    cur_entry = return_head(s->terminal_msgs[msg->rail_id], s->terminal_msgs_tail[msg->rail_id], vcg); 
+
     rc_stack_push(lp, cur_entry, delete_terminal_dally_message_list, s->st);
     s->terminal_length[msg->rail_id][vcg] -= s->params->chunk_size;
     s->link_traffic[msg->rail_id] += s->params->chunk_size;
@@ -4825,12 +4948,8 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message *
     if(num_qos_levels > 1) //I think this one is OK since the default is that terminals have only 1 VC anyway so leaving vcg as 
         next_vcg = get_next_vcg(s, bf, msg, lp);
 
-    cur_entry = NULL;
-    if(next_vcg >= 0)
-        cur_entry = s->terminal_msgs[msg->rail_id][next_vcg];
-
     /* if there is another packet inline then schedule another send event */
-    if(cur_entry != NULL && s->vc_occupancy[msg->rail_id][next_vcg] + s->params->chunk_size <= s->params->cn_vc_size) {
+    if(next_vcg >= 0 && !qlist_empty(&s->terminal_msgs[msg->rail_id][next_vcg]) && s->vc_occupancy[msg->rail_id][next_vcg] + s->params->chunk_size <= s->params->cn_vc_size) {
         terminal_dally_message *m_new;
         e = model_net_method_event_new(lp->gid, injection_ts + gen_noise(lp, &msg->num_rngs), lp, DRAGONFLY_DALLY, (void**)&m_new, NULL);
         m_new->type = T_SEND;
@@ -5612,7 +5731,7 @@ static void terminal_buf_update(terminal_state * s,
     tw_stime ts = 0;
     s->vc_occupancy[msg->rail_id][vcg] -= s->params->chunk_size;
     
-    if(s->in_send_loop[msg->rail_id] == 0 && s->terminal_msgs[msg->rail_id][vcg] != NULL) {
+    if(s->in_send_loop[msg->rail_id] == 0 && !qlist_empty(&s->terminal_msgs[msg->rail_id][vcg])) {
         terminal_dally_message *m;
         bf->c1 = 1;
         tw_event* e = model_net_method_event_new(lp->gid, ts + gen_noise(lp, &msg->num_rngs), lp, DRAGONFLY_DALLY, 
@@ -5688,8 +5807,9 @@ static void dragonfly_dally_terminal_final( terminal_state * s,
 
     for(int i = 0; i < s->params->num_rails; i++)
     {
-        if(s->terminal_msgs[i][0] != NULL) 
-        printf("[%llu] leftover terminal messages \n", LLU(lp->gid));
+        if(!qlist_empty(&s->terminal_msgs[i][0])) {
+            printf("[%llu] leftover terminal messages \n", LLU(lp->gid));
+        }
     }
 
 
@@ -5753,11 +5873,9 @@ static void dragonfly_dally_terminal_final( terminal_state * s,
         free(s->vc_occupancy[i]);
         // TODO: terminal_msgs are not properly freed if there are messages left. Correct this!
         free(s->terminal_msgs[i]);
-        free(s->terminal_msgs_tail[i]);
     }
     free(s->vc_occupancy);
     free(s->terminal_msgs);
-    free(s->terminal_msgs_tail);
 
     // Calling destructors for data. There is no need to free data, the
     // destructors do it themselves. ROSS allocated space for the datatypes and
@@ -7229,7 +7347,7 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from
         into->stalled_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t));
         into->total_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t));
         into->busy_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime));
-        into->terminal_msgs = (terminal_dally_message_list***) malloc(num_rails * sizeof(terminal_dally_message_list**));
+        into->terminal_msgs = (struct qlist_head**) malloc(num_rails * sizeof(struct qlist_head*));
         into->link_traffic = (uint64_t*) malloc(num_rails * sizeof(uint64_t));
 
         for(int i = 0; i < num_rails; i++) {
@@ -7237,13 +7355,14 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from
             into->terminal_length[i] = (int*) malloc(num_qos_levels * sizeof(int));
             into->qos_status[i] = (int*) malloc(num_qos_levels * sizeof(int));
             into->qos_data[i] = (int*) malloc(num_qos_levels * sizeof(int));
-            into->terminal_msgs[i] = (terminal_dally_message_list**) malloc(num_qos_levels * sizeof(terminal_dally_message_list*));
+            into->terminal_msgs[i] = (struct qlist_head*) malloc(num_qos_levels * sizeof(struct qlist_head));
             for (int j = 0; j<num_qos_levels; j++) {
                 into->vc_occupancy[i][j] = from->vc_occupancy[i][j];
                 into->terminal_length[i][j] = from->terminal_length[i][j];
                 into->qos_data[i][j] = from->qos_data[i][j];
                 into->qos_status[i][j] = from->qos_status[i][j];
-                copy_terminal_dally_message_list(&into->terminal_msgs[i][j], from->terminal_msgs[i][j]);
+                INIT_QLIST_HEAD(&into->terminal_msgs[i][j]);
+                copy_terminal_dally_message_qlist(&into->terminal_msgs[i][j], &from->terminal_msgs[i][j]);
             }
             into->last_buf_full[i] = from->last_buf_full[i];
             into->in_send_loop[i] = from->in_send_loop[i];
@@ -7301,7 +7420,7 @@ static void clean_terminal_state(terminal_state *state) {
             free(state->qos_status[i]);
             free(state->qos_data[i]);
             for (int j = 0; j<num_qos_levels; j++) {
-                clean_terminal_dally_message_list(state->terminal_msgs[i][j]);
+                clean_terminal_dally_message_qlist(&state->terminal_msgs[i][j]);
             }
             free(state->terminal_msgs[i]);
         }
@@ -7405,7 +7524,7 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after)
                 is_same &= (before->terminal_length[i][j] == after->terminal_length[i][j]);
                 is_same &= (before->qos_status[i][j] == after->qos_status[i][j]);
                 is_same &= (before->qos_data[i][j] == after->qos_data[i][j]);
-                is_same &= check_terminal_dally_message_list(before->terminal_msgs[i][j], after->terminal_msgs[i][j]);
+                is_same &= check_terminal_dally_message_qlist(&before->terminal_msgs[i][j], &after->terminal_msgs[i][j]);
             }
 
             is_same &= (before->last_buf_full[i] == after->last_buf_full[i]);
@@ -7501,14 +7620,13 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state
             fprintf(out, "%s  |   rail %d: [\n", prefix, i);
             for (int j=0; j<state->params->num_qos_levels; j++) {
                 fprintf(out, "%s  |    | qos level %d\n", prefix, j);
-                print_terminal_dally_message_list(out, subprefix, state, state->terminal_msgs[i][j]);
+                print_terminal_dally_message_qlist(out, subprefix, state, &state->terminal_msgs[i][j]);
             }
         }
         fprintf(out, "%s  | ]\n", prefix);
         free(subprefix);
     }
 
-    fprintf(out, "%s  | ***   terminal_msgs_tail = %p\n", prefix, state->terminal_msgs_tail);
 
     if (is_dally_surrogate_on) {
         fprintf(out, "%s  | *          in_send_loop = %p\n", prefix, state->in_send_loop);

From 1710290fd2941d2fbc7283eb3b2a99f0cb7dfcae Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 17 Jun 2025 16:36:46 -0400
Subject: [PATCH 170/188] Fixing small silent bug at terminal initialization

---
 src/networks/model-net/dragonfly-dally.C | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 7562e844..e30769db 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -3825,9 +3825,9 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp )
     for(i = 0; i < p->num_rails; i++)
     {
         s->in_send_loop[i] = 0;
-        s->terminal_msgs[i] = (struct qlist_head*)calloc(num_qos_levels, sizeof(struct qlist_head));
+        s->terminal_msgs[i] = (struct qlist_head*)calloc(s->params->num_vcs, sizeof(struct qlist_head));
 
-        for(int j = 0; j < num_qos_levels; j++)
+        for(int j = 0; j < s->params->num_vcs; j++)
         {
             INIT_QLIST_HEAD(&s->terminal_msgs[i][j]);
         }

From 3d1b55c061a0d5ff0911979f59ad06e2a642b366 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 18 Jun 2025 04:43:05 -0400
Subject: [PATCH 171/188] Refactoring routers usage of custom double
 linked-list for qlist

---
 src/networks/model-net/dragonfly-dally.C | 332 +++++++++--------------
 1 file changed, 121 insertions(+), 211 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index e30769db..5465605a 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -214,8 +214,6 @@ struct terminal_dally_message_list {
     terminal_dally_message msg;
     char* event_data;
     struct qlist_head list;
-    terminal_dally_message_list *next;  // Keep for router compatibility
-    terminal_dally_message_list *prev;  // Keep for router compatibility
 };
 
 static void init_terminal_dally_message_list(terminal_dally_message_list *thisO, 
@@ -223,8 +221,6 @@ static void init_terminal_dally_message_list(terminal_dally_message_list *thisO,
     thisO->msg = *inmsg;
     thisO->event_data = NULL;
     INIT_QLIST_HEAD(&thisO->list);
-    thisO->next = NULL;
-    thisO->prev = NULL;
 }
 
 static void delete_terminal_dally_message_list(void *thisO) {
@@ -653,10 +649,8 @@ struct router_state
     unsigned long* stalled_chunks; //Counter for when a packet is put into queued messages instead of routing due to full VC
     unsigned long* total_chunks; //Counter for when a packet is sent - per port
 
-    terminal_dally_message_list ***pending_msgs;
-    terminal_dally_message_list ***pending_msgs_tail;
-    terminal_dally_message_list ***queued_msgs;
-    terminal_dally_message_list ***queued_msgs_tail;
+    struct qlist_head **pending_msgs;
+    struct qlist_head **queued_msgs;
     int *in_send_loop;
     int *queued_count;
     struct rc_stack * st;
@@ -1573,41 +1567,7 @@ static inline void prepend_to_qlist(struct qlist_head *head, terminal_dally_mess
 }
 
 // Restore old functions for router compatibility
-static void append_to_terminal_dally_message_list(  
-        terminal_dally_message_list ** thisq,
-        terminal_dally_message_list ** thistail,
-        int index, 
-        terminal_dally_message_list *msg) 
-{
-    if (thisq[index] == NULL) {
-        thisq[index] = msg;
-    } 
-    else {
-        assert(thistail[index] != NULL);
-        thistail[index]->next = msg;
-        msg->prev = thistail[index];
-    } 
-    thistail[index] = msg;
-}
 
-static terminal_dally_message_list* return_head(
-        terminal_dally_message_list ** thisq,
-        terminal_dally_message_list ** thistail,
-        int index)
-{
-    terminal_dally_message_list *head = thisq[index];
-    if (head != NULL) {
-        thisq[index] = head->next;
-        if(head->next != NULL) {
-            head->next->prev = NULL;
-            head->next = NULL;
-        }
-        else {
-            thistail[index] = NULL;
-        }
-    }
-    return head;
-}
 
 static void copy_terminal_dally_message_qlist(struct qlist_head *into_head, struct qlist_head *from_head)
 {
@@ -1689,21 +1649,6 @@ static bool check_terminal_dally_message_qlist(struct qlist_head *before, struct
     return is_same;
 }
 
-static void prepend_to_terminal_dally_message_list(  
-        terminal_dally_message_list ** thisq,
-        terminal_dally_message_list ** thistail,
-        int index, 
-        terminal_dally_message_list *msg) 
-{
-    if (thisq[index] == NULL) {
-        thistail[index] = msg;
-    } 
-    else {
-        thisq[index]->prev = msg;
-        msg->next = thisq[index];
-    } 
-    thisq[index] = msg;
-}
 
 static terminal_dally_message_list* return_head_from_qlist(struct qlist_head *head)
 {
@@ -1725,104 +1670,69 @@ static terminal_dally_message_list* return_tail_from_qlist(struct qlist_head *he
     return qlist_entry(item, terminal_dally_message_list, list);
 }
 
-static terminal_dally_message_list* return_tail(
-        terminal_dally_message_list ** thisq,
-        terminal_dally_message_list ** thistail,
-        int index) 
-{
-    terminal_dally_message_list *tail = thistail[index];
-    assert(tail);
-    if (tail->prev != NULL) {
-        tail->prev->next = NULL;
-        thistail[index] = tail->prev;
-        tail->prev = NULL;
-    } 
-    else {
-        thistail[index] = NULL;
-        thisq[index] = NULL;
-    }
-    return tail;
-}
 
-// Copies a list and returns the tail
-static terminal_dally_message_list * copy_terminal_dally_message_list(terminal_dally_message_list ** into_thisq, terminal_dally_message_list const * from_thisq) {
-    if (from_thisq == NULL) {
-        *into_thisq = NULL;
-        return NULL;
-    }
 
-    terminal_dally_message_list const * from_head = from_thisq;
-    terminal_dally_message_list * prev = NULL;
-    while(from_head != NULL) {
-        terminal_dally_message_list * copy_head = (terminal_dally_message_list *) malloc(sizeof(terminal_dally_message_list));
+static void copy_msgs_qlist(struct qlist_head *into_qlist, struct qlist_head *from_qlist) {
+    INIT_QLIST_HEAD(into_qlist);
 
-        //copy_head->msg = from_head->msg;
-        memcpy(copy_head, from_head, sizeof(terminal_dally_message_list));
-        copy_head->prev = prev;
+    if (qlist_empty(from_qlist)) {
+        return;
+    }
 
-        if (from_head->event_data != NULL) {
-            int const message_size = from_head->msg.remote_event_size_bytes + from_head->msg.local_event_size_bytes;
-            assert(message_size > 0);
-            copy_head->event_data = (char *) malloc(message_size);
-            memcpy(copy_head->event_data, from_head->event_data, message_size);
-        }
+    struct qlist_head *pos;
+    qlist_for_each(pos, from_qlist) {
+        terminal_dally_message_list *from_entry = qlist_entry(pos, terminal_dally_message_list, list);
+        terminal_dally_message_list *copy_entry = (terminal_dally_message_list*) malloc(sizeof(terminal_dally_message_list));
 
-        if (prev == NULL) {
-            *into_thisq = copy_head;
-        } else {
-            prev->next = copy_head;
+        init_terminal_dally_message_list(copy_entry, &from_entry->msg);
+        if (from_entry->event_data != NULL) {
+            copy_entry->event_data = (char*) malloc(from_entry->msg.remote_event_size_bytes);
+            memcpy(copy_entry->event_data, from_entry->event_data, from_entry->msg.remote_event_size_bytes);
         }
 
-        prev = copy_head;
-        from_head = from_head->next;
+        qlist_add_tail(&copy_entry->list, into_qlist);
     }
-    prev->next = NULL;
-
-    return prev;
 }
 
-static void clean_terminal_dally_message_list(terminal_dally_message_list * thisq) {
-    if (thisq == NULL) {
-        return;
+
+
+static bool check_msgs_qlist(struct qlist_head * before, struct qlist_head * after) {
+    bool is_same = true;
+
+    if (qlist_empty(before) && qlist_empty(after)) {
+        return true;
     }
 
-    terminal_dally_message_list * prev = thisq;
-    terminal_dally_message_list * head = prev->next;
-    free(prev->event_data);
-    while (head != NULL) {
-        free(head->event_data);
-        free(prev);
-        prev = head;
-        head = head->next;
+    if (qlist_empty(before) != qlist_empty(after)) {
+        return false;
     }
-    free(prev);
-}
 
-static bool check_terminal_dally_message_list(terminal_dally_message_list * before, terminal_dally_message_list * after) {
-    bool is_same = true;
+    struct qlist_head *pos_before = before->next;
+    struct qlist_head *pos_after = after->next;
 
-    terminal_dally_message_list * head_before = before;
-    terminal_dally_message_list * head_after = after;
-    while (head_before != NULL && head_after != NULL) {
-        is_same &= check_terminal_dally_message(&head_before->msg, &head_after->msg);
-        is_same &= (head_before->event_data == NULL) == (head_after->event_data == NULL);
+    while (pos_before != before && pos_after != after) {
+        terminal_dally_message_list *entry_before = qlist_entry(pos_before, terminal_dally_message_list, list);
+        terminal_dally_message_list *entry_after = qlist_entry(pos_after, terminal_dally_message_list, list);
+
+        is_same &= check_terminal_dally_message(&entry_before->msg, &entry_after->msg);
+        is_same &= (entry_before->event_data == NULL) == (entry_after->event_data == NULL);
 
-        int const message_size = head_before->msg.remote_event_size_bytes + head_before->msg.local_event_size_bytes;
-        int const message_size_after = head_after->msg.remote_event_size_bytes + head_after->msg.local_event_size_bytes;
+        int const message_size = entry_before->msg.remote_event_size_bytes + entry_before->msg.local_event_size_bytes;
+        int const message_size_after = entry_after->msg.remote_event_size_bytes + entry_after->msg.local_event_size_bytes;
         is_same &= message_size == message_size_after;
 
-        if (is_same && head_before->event_data != NULL) {
+        if (is_same && entry_before->event_data != NULL) {
             assert(message_size > 0);
-
-            is_same &= !memcmp(head_before->event_data, head_after->event_data, message_size);
+            is_same &= !memcmp(entry_before->event_data, entry_after->event_data, message_size);
         }
 
-        head_before = head_before->next;
-        head_after = head_after->next;
+        pos_before = pos_before->next;
+        pos_after = pos_after->next;
     }
 
-    if (head_before != NULL || head_after != NULL) {
-        is_same = false; // at least one of them is longer than the other
+    // Check if both reached end
+    if (pos_before != before || pos_after != after) {
+        is_same = false; // different lengths
     }
 
     return is_same;
@@ -1848,8 +1758,9 @@ static void print_terminal_dally_message_qlist(FILE * out, char const * prefix,
     free(subprefix);
 }
 
-static void print_terminal_dally_message_list(FILE * out, char const * prefix, terminal_state * ns, terminal_dally_message_list * thisq) {
-    if (thisq == NULL) {
+
+static void print_msgs_qlist(FILE * out, char const * prefix, struct qlist_head * qlist) {
+    if (qlist_empty(qlist)) {
         return;
     }
 
@@ -1858,25 +1769,17 @@ static void print_terminal_dally_message_list(FILE * out, char const * prefix, t
     char * subprefix = (char *) malloc(len_subprefix * sizeof(char));
     snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2);
 
-    terminal_dally_message_list * head = thisq;
-    while (head != NULL) {
-        fprintf(out, "%s{\n", prefix);
-        fprintf(out, "%s | msg:\n", prefix);
-        print_terminal_dally_message(out, subprefix, ns, &head->msg);
-        fprintf(out, "%s | event_data = %p\n", prefix, head->event_data);
-        int const message_size = head->msg.remote_event_size_bytes + head->msg.local_event_size_bytes;
-        if (head->event_data != NULL) {
-            assert(message_size > 0);
-            tw_fprint_binary_array(out, subprefix, head->event_data, message_size);
-        }
-        fprintf(out, "%s},\n", prefix);
-        head = head->next;
+    struct qlist_head *pos;
+    qlist_for_each(pos, qlist) {
+        terminal_dally_message_list *entry = qlist_entry(pos, terminal_dally_message_list, list);
+        fprintf(out, "%s qlist entry (%p) {\n", prefix, entry);
+        print_terminal_dally_message(out, subprefix, NULL, &entry->msg);
+        fprintf(out, "%s }\n", prefix);
     }
 
     free(subprefix);
 }
 
-
 static tw_stime* buff_time_storage_create(terminal_state *s)
 {
     tw_stime* storage = (tw_stime*)malloc(s->params->num_rails * sizeof(tw_stime));
@@ -3092,7 +2995,7 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess
                 int base_limit = i * vcs_per_qos;
                 for(int k = base_limit; k < base_limit + vcs_per_qos; k ++)
                 {
-                    if(s->pending_msgs[output_port][k] != NULL)
+                    if(!qlist_empty(&s->pending_msgs[output_port][k]))
                         return k;
                 }
             }
@@ -3106,7 +3009,7 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess
 
     for(int i = 0; i < s->params->num_vcs; i++)
     {
-        if(s->pending_msgs[output_port][next_rr_vc] != NULL)
+        if(!qlist_empty(&s->pending_msgs[output_port][next_rr_vc]))
         {
             s->last_qos_lvl[output_port] = next_rr_vc;
             return next_rr_vc;
@@ -3985,13 +3888,9 @@ static void router_dally_init(router_state * r, tw_lp * lp)
     r->last_qos_lvl = (int*)calloc(p->radix, sizeof(int));
     r->qos_status = (int**)calloc(p->radix, sizeof(int*));
     r->pending_msgs = 
-        (terminal_dally_message_list***)calloc((p->radix), sizeof(terminal_dally_message_list**));
-    r->pending_msgs_tail = 
-        (terminal_dally_message_list***)calloc((p->radix), sizeof(terminal_dally_message_list**));
+        (struct qlist_head**)calloc(p->radix, sizeof(struct qlist_head*));
     r->queued_msgs = 
-        (terminal_dally_message_list***)calloc(p->radix, sizeof(terminal_dally_message_list**));
-    r->queued_msgs_tail = 
-        (terminal_dally_message_list***)calloc(p->radix, sizeof(terminal_dally_message_list**));
+        (struct qlist_head**)calloc(p->radix, sizeof(struct qlist_head*));
     r->queued_count = (int*)calloc(p->radix, sizeof(int));
     r->last_buf_full = (tw_stime*)calloc(p->radix, sizeof(tw_stime*));
     r->busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime));
@@ -4024,14 +3923,8 @@ static void router_dally_init(router_state * r, tw_lp * lp)
         r->in_send_loop[i] = 0;
         r->vc_occupancy[i] = (int*)calloc(p->num_vcs, sizeof(int));
     //    printf("\n Number of vcs %d for radix %d ", p->num_vcs, p->radix);
-        r->pending_msgs[i] = (terminal_dally_message_list**)calloc(p->num_vcs, 
-            sizeof(terminal_dally_message_list*));
-        r->pending_msgs_tail[i] = (terminal_dally_message_list**)calloc(p->num_vcs,
-            sizeof(terminal_dally_message_list*));
-        r->queued_msgs[i] = (terminal_dally_message_list**)calloc(p->num_vcs,
-            sizeof(terminal_dally_message_list*));
-        r->queued_msgs_tail[i] = (terminal_dally_message_list**)calloc(p->num_vcs,
-            sizeof(terminal_dally_message_list*));
+        r->pending_msgs[i] = (struct qlist_head*)calloc(p->num_vcs, sizeof(struct qlist_head));
+        r->queued_msgs[i] = (struct qlist_head*)calloc(p->num_vcs, sizeof(struct qlist_head));
         r->qos_status[i] = (int*)calloc(num_qos_levels, sizeof(int));
         r->qos_data[i] = (int*)calloc(num_qos_levels, sizeof(int));
         for(int j = 0; j < num_qos_levels; j++)
@@ -4041,10 +3934,8 @@ static void router_dally_init(router_state * r, tw_lp * lp)
         }
         for(int j = 0; j < p->num_vcs; j++) 
         {
-            r->pending_msgs[i][j] = NULL;
-            r->pending_msgs_tail[i][j] = NULL;
-            r->queued_msgs[i][j] = NULL;
-            r->queued_msgs_tail[i][j] = NULL;
+            INIT_QLIST_HEAD(&r->pending_msgs[i][j]);
+            INIT_QLIST_HEAD(&r->queued_msgs[i][j]);
         }
     }
 
@@ -5920,11 +5811,11 @@ void dragonfly_dally_router_final(router_state * s, tw_lp * lp){
     int i, j;
     for(i = 0; i < s->params->radix; i++) {
         for(j = 0; j < s->params->num_vcs; j++) {
-            if(s->queued_msgs[i][j] != NULL) {
+            if(!qlist_empty(&s->queued_msgs[i][j])) {
                 printf("[%llu] leftover queued messages %d %d %d\n", LLU(lp->gid), i, j,
                 s->vc_occupancy[i][j]);
             }
-            if(s->pending_msgs[i][j] != NULL) {
+            if(!qlist_empty(&s->pending_msgs[i][j])) {
                 printf("[%llu] lefover pending messages %d %d\n", LLU(lp->gid), i, j);
             }
         }
@@ -6282,8 +6173,12 @@ static void router_packet_receive_rc(router_state * s,
         s->is_monitoring_bw = 0;
 
     if(bf->c2) {
-        terminal_dally_message_list * tail = return_tail(s->pending_msgs[output_port], s->pending_msgs_tail[output_port], output_chan);
-        delete_terminal_dally_message_list(tail);
+        if (!qlist_empty(&s->pending_msgs[output_port][output_chan])) {
+            struct qlist_head *last = s->pending_msgs[output_port][output_chan].prev;
+            qlist_del(last);
+            terminal_dally_message_list *tail = qlist_entry(last, terminal_dally_message_list, list);
+            delete_terminal_dally_message_list(tail);
+        }
         s->vc_occupancy[output_port][output_chan] -= s->params->chunk_size;
         if(bf->c3) {
             s->in_send_loop[output_port] = 0;
@@ -6295,8 +6190,12 @@ static void router_packet_receive_rc(router_state * s,
         {
             s->last_buf_full[output_port] = msg->saved_busy_time;
         }
-    delete_terminal_dally_message_list(return_tail(s->queued_msgs[output_port], 
-        s->queued_msgs_tail[output_port], output_chan));
+    if (!qlist_empty(&s->queued_msgs[output_port][output_chan])) {
+        struct qlist_head *last = s->queued_msgs[output_port][output_chan].prev;
+        qlist_del(last);
+        terminal_dally_message_list *tail = qlist_entry(last, terminal_dally_message_list, list);
+        delete_terminal_dally_message_list(tail);
+    }
     s->queued_count[output_port] -= s->params->chunk_size; 
     }
 
@@ -6474,8 +6373,7 @@ static void router_packet_receive( router_state * s,
         assert(output_chan < s->params->num_vcs && output_port < s->params->radix);
         router_credit_send(s, msg, lp, -1, &(msg->num_rngs));
     
-        append_to_terminal_dally_message_list(s->pending_msgs[output_port], s->pending_msgs_tail[output_port],
-                                            output_chan, cur_chunk);
+        qlist_add_tail(&cur_chunk->list, &s->pending_msgs[output_port][output_chan]);
         s->vc_occupancy[output_port][output_chan] += s->params->chunk_size;
         if(s->in_send_loop[output_port] == 0) {
             bf->c3 = 1;
@@ -6499,8 +6397,7 @@ static void router_packet_receive( router_state * s,
         cur_chunk->msg.saved_vc = msg->vc_index;
         cur_chunk->msg.saved_channel = msg->output_chan;
         assert(output_chan < s->params->num_vcs && output_port < s->params->radix);
-        append_to_terminal_dally_message_list( s->queued_msgs[output_port], 
-        s->queued_msgs_tail[output_port], output_chan, cur_chunk);
+        qlist_add_tail(&cur_chunk->list, &s->queued_msgs[output_port][output_chan]);
         s->queued_count[output_port] += s->params->chunk_size;
 
 
@@ -6622,8 +6519,7 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m
     s->qos_data[output_port][vcg] -= msg_size;
     s->total_chunks[output_port]--;
 
-    prepend_to_terminal_dally_message_list(s->pending_msgs[output_port],
-            s->pending_msgs_tail[output_port], output_chan, cur_entry);
+    qlist_add(&cur_entry->list, &s->pending_msgs[output_port][output_chan]);
 
     if (g_congestion_control_enabled) {
         congestion_control_message *cc_msg_rc = (congestion_control_message*)rc_stack_pop(s->cc_st);
@@ -6681,7 +6577,12 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes
         return;
     }
 
-    cur_entry = s->pending_msgs[output_port][output_chan];
+    if (!qlist_empty(&s->pending_msgs[output_port][output_chan])) {
+        struct qlist_head *first = s->pending_msgs[output_port][output_chan].next;
+        cur_entry = qlist_entry(first, terminal_dally_message_list, list);
+    } else {
+        cur_entry = NULL;
+    }
     
     msg->dfdally_src_terminal_id = cur_entry->msg.dfdally_src_terminal_id;
 
@@ -6840,8 +6741,8 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes
         rc_stack_push(lp, cc_msg_rc, cc_msg_rc_storage_delete, s->cc_st);
     }
 
-    cur_entry = return_head(s->pending_msgs[output_port], 
-        s->pending_msgs_tail[output_port], output_chan);
+    struct qlist_head *item = qlist_pop(&s->pending_msgs[output_port][output_chan]);
+    cur_entry = item ? qlist_entry(item, terminal_dally_message_list, list) : NULL;
     rc_stack_push(lp, cur_entry, delete_terminal_dally_message_list, s->st);
 
     s->qos_data[output_port][vcg] += msg_size; 
@@ -6856,7 +6757,7 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes
         base_limit = i * vcs_per_qos;
         for(int k = base_limit; k < base_limit + vcs_per_qos; k ++)
         {
-            if(s->pending_msgs[output_port][k] != NULL)
+            if(!qlist_empty(&s->pending_msgs[output_port][k]))
             {
                 next_output_chan = k;
                 break;
@@ -6872,7 +6773,12 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes
         s->in_send_loop[output_port] = 0;
         return;
     }
-    cur_entry = s->pending_msgs[output_port][next_output_chan];
+    if (!qlist_empty(&s->pending_msgs[output_port][next_output_chan])) {
+        struct qlist_head *first = s->pending_msgs[output_port][next_output_chan].next;
+        cur_entry = qlist_entry(first, terminal_dally_message_list, list);
+    } else {
+        cur_entry = NULL;
+    }
     assert(cur_entry != NULL); 
 
     terminal_dally_message *m_new;
@@ -6913,10 +6819,8 @@ static void router_buf_update_rc(router_state * s,
         }
     }
     if(bf->c1) {
-        terminal_dally_message_list* head = return_tail(s->pending_msgs[indx],
-            s->pending_msgs_tail[indx], output_chan);
-        prepend_to_terminal_dally_message_list(s->queued_msgs[indx], 
-            s->queued_msgs_tail[indx], output_chan, head);
+        terminal_dally_message_list* head = return_tail_from_qlist(&s->pending_msgs[indx][output_chan]);
+        qlist_add(&head->list, &s->queued_msgs[indx][output_chan]);
         s->vc_occupancy[indx][output_chan] -= s->params->chunk_size;
         s->queued_count[indx] += s->params->chunk_size;
     }
@@ -6963,12 +6867,12 @@ static void router_buf_update(router_state * s, tw_bf * bf, terminal_dally_messa
         s->last_buf_full[indx] = 0.0;
     }
 
-    if(s->queued_msgs[indx][output_chan] != NULL) {
+    if(!qlist_empty(&s->queued_msgs[indx][output_chan])) {
         bf->c1 = 1;
         assert(indx < s->params->radix);
         assert(output_chan < s->params->num_vcs);
-        terminal_dally_message_list *head = return_head(s->queued_msgs[indx],
-            s->queued_msgs_tail[indx], output_chan);
+        struct qlist_head *item = qlist_pop(&s->queued_msgs[indx][output_chan]);
+        terminal_dally_message_list *head = item ? qlist_entry(item, terminal_dally_message_list, list) : NULL;
         /*if(strcmp(head->msg.category, "medium") == 0)
         {
         if(head->msg.saved_channel < 4 || head->msg.saved_channel >= 8)
@@ -6977,13 +6881,12 @@ static void router_buf_update(router_state * s, tw_bf * bf, terminal_dally_messa
         }
         }*/
         router_credit_send(s, &head->msg, lp, 1, &(msg->num_rngs)); 
-        append_to_terminal_dally_message_list(s->pending_msgs[indx], 
-        s->pending_msgs_tail[indx], output_chan, head);
+        qlist_add_tail(&head->list, &s->pending_msgs[indx][output_chan]);
         s->vc_occupancy[indx][output_chan] += s->params->chunk_size;
         s->queued_count[indx] -= s->params->chunk_size; 
     }
 
-    if(s->in_send_loop[indx] == 0 && s->pending_msgs[indx][output_chan] != NULL) {
+    if(s->in_send_loop[indx] == 0 && !qlist_empty(&s->pending_msgs[indx][output_chan])) {
         bf->c2 = 1;
         terminal_dally_message *m;
         tw_stime ts = maxd(s->next_output_available_time[indx], tw_now(lp)) - tw_now(lp);
@@ -7854,8 +7757,8 @@ static void save_router_state(router_state *into, router_state const *from) {
     into->vc_occupancy = (int**) malloc(radix * sizeof(int*));
     into->qos_status = (int**) malloc(radix * sizeof(int*));
     into->qos_data = (int**) malloc(radix * sizeof(int*));
-    into->pending_msgs = (terminal_dally_message_list***) malloc(radix * sizeof(terminal_dally_message_list**));
-    into->queued_msgs = (terminal_dally_message_list***) malloc(radix * sizeof(terminal_dally_message_list**));
+    into->pending_msgs = (struct qlist_head**) malloc(radix * sizeof(struct qlist_head*));
+    into->queued_msgs = (struct qlist_head**) malloc(radix * sizeof(struct qlist_head*));
 
     for (int i = 0; i < radix; i++) {
         into->next_output_available_time[i] = from->next_output_available_time[i];
@@ -7874,13 +7777,13 @@ static void save_router_state(router_state *into, router_state const *from) {
         into->qos_status[i] = (int*) malloc(num_qos_levels * sizeof(int));
         into->qos_data[i] = (int*) malloc(num_qos_levels * sizeof(int));
 
-        into->pending_msgs[i] = (terminal_dally_message_list**) malloc(p->num_vcs * sizeof(terminal_dally_message_list*));
-        into->queued_msgs[i] = (terminal_dally_message_list**) malloc(p->num_vcs * sizeof(terminal_dally_message_list*));
+        into->pending_msgs[i] = (struct qlist_head*) malloc(p->num_vcs * sizeof(struct qlist_head));
+        into->queued_msgs[i] = (struct qlist_head*) malloc(p->num_vcs * sizeof(struct qlist_head));
 
         for (int j = 0; j < p->num_vcs; j++) {
             into->vc_occupancy[i][j] = from->vc_occupancy[i][j];
-            copy_terminal_dally_message_list(&into->pending_msgs[i][j], from->pending_msgs[i][j]);
-            copy_terminal_dally_message_list(&into->queued_msgs[i][j], from->queued_msgs[i][j]);
+            copy_msgs_qlist(&into->pending_msgs[i][j], &from->pending_msgs[i][j]);
+            copy_msgs_qlist(&into->queued_msgs[i][j], &from->queued_msgs[i][j]);
         }
         for (int j = 0; j < num_qos_levels; j++) {
             into->qos_status[i][j] = from->qos_status[i][j];
@@ -7935,8 +7838,19 @@ static void clean_router_state(router_state *state) {
         free(state->qos_data[i]);
 
         for (int j = 0; j < p->num_vcs; j++) {
-            clean_terminal_dally_message_list(state->pending_msgs[i][j]);
-            clean_terminal_dally_message_list(state->queued_msgs[i][j]);
+            // Clean up qlist entries - remove and free all elements
+            while (!qlist_empty(&state->pending_msgs[i][j])) {
+                struct qlist_head *item = qlist_pop(&state->pending_msgs[i][j]);
+                terminal_dally_message_list *entry = qlist_entry(item, terminal_dally_message_list, list);
+                free(entry->event_data);
+                free(entry);
+            }
+            while (!qlist_empty(&state->queued_msgs[i][j])) {
+                struct qlist_head *item = qlist_pop(&state->queued_msgs[i][j]);
+                terminal_dally_message_list *entry = qlist_entry(item, terminal_dally_message_list, list);
+                free(entry->event_data);
+                free(entry);
+            }
         }
 
         free(state->pending_msgs[i]);
@@ -8014,8 +7928,8 @@ static bool check_router_state(router_state const *before, router_state const *a
                 return false;
             }
 
-            if (!check_terminal_dally_message_list(before->pending_msgs[i][j], after->pending_msgs[i][j]) ||
-                !check_terminal_dally_message_list(before->queued_msgs[i][j], after->queued_msgs[i][j])) {
+            if (!check_msgs_qlist(&before->pending_msgs[i][j], &after->pending_msgs[i][j]) ||
+                !check_msgs_qlist(&before->queued_msgs[i][j], &after->queued_msgs[i][j])) {
                 return false;
             }
         }
@@ -8138,28 +8052,24 @@ static void print_router_state(FILE * out, char const * prefix, router_state * s
         fprintf(out, "%s  |   port %d: [\n", prefix, i);
         for (int j = 0; j < p->num_vcs; j++) {
             fprintf(out, "%s  |   |  vcs # %d\n", prefix, j);
-            print_terminal_dally_message_list(out, subprefix, NULL, state->pending_msgs[i][j]);
+            print_msgs_qlist(out, subprefix, &state->pending_msgs[i][j]);
         }
         fprintf(out, "%s  |   ]\n", prefix);
     }
     fprintf(out, "%s  | ]\n", prefix);
 
-    fprintf(out, "%s  | ***    pending_msgs_tail = %p\n", prefix, state->pending_msgs_tail);
-
     fprintf(out, "%s  | ***  queued_msgs[%d][%d] = [\n", prefix, radix, p->num_vcs);
     for (int i = 0; i < radix; i++) {
         fprintf(out, "%s  |   port %d: [\n", prefix, i);
         for (int j = 0; j < p->num_vcs; j++) {
             fprintf(out, "%s  |   |  vcs # %d\n", prefix, j);
-            print_terminal_dally_message_list(out, subprefix, NULL, state->queued_msgs[i][j]);
+            print_msgs_qlist(out, subprefix, &state->queued_msgs[i][j]);
         }
         fprintf(out, "%s  |   ]\n", prefix);
     }
     fprintf(out, "%s  | ]\n", prefix);
     free(subprefix);
 
-    fprintf(out, "%s  | ***     queued_msgs_tail = %p\n", prefix, state->queued_msgs_tail);
-
     fprintf(out, "%s  | *        in_send_loop[%d] = [", prefix, radix);
     for (int i = 0; i < radix; i++) {
         fprintf(out, "%s%d", i ? ", " : "", state->in_send_loop[i]);

From 274f020483fd0fe7e9f4a42843c0a53c7d0aafd0 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 18 Jun 2025 08:32:26 -0400
Subject: [PATCH 172/188] Allowing director to be called after simulation
 ended, to repopulate network if needed

---
 codes/surrogate/network-surrogate.h          |  2 +-
 src/network-workloads/model-net-mpi-replay.c |  2 +-
 src/surrogate/application-surrogate.c        | 38 +++++++------
 src/surrogate/network-surrogate.c            | 56 +++++++++++++-------
 4 files changed, 61 insertions(+), 37 deletions(-)

diff --git a/codes/surrogate/network-surrogate.h b/codes/surrogate/network-surrogate.h
index 4b22e238..b4dae45c 100644
--- a/codes/surrogate/network-surrogate.h
+++ b/codes/surrogate/network-surrogate.h
@@ -60,7 +60,7 @@ struct network_surrogate_config {
 void network_director_configure(struct network_surrogate_config *, struct switch_at_struct * switch_network_at, bool freeze_network_on_switch);
 
 // Function for application director to use network freezing machinery
-void surrogate_switch_network_model(tw_pe * pe);
+void surrogate_switch_network_model(tw_pe * pe, bool is_queue_empty);
 
 void network_director_finalize(void);
 
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index fd28775f..475d1675 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -4080,7 +4080,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
             }
                 i++;
         }
-        printf("\n num_net_traces %d; num_dumpi_traces %d", num_net_traces, num_dumpi_traces);
+        printf("\n num_net_traces %d; num_dumpi_traces %d\n", num_net_traces, num_dumpi_traces);
         fclose(name_file);
         assert(strlen(alloc_file) != 0);
         alloc_spec = 1;
diff --git a/src/surrogate/application-surrogate.c b/src/surrogate/application-surrogate.c
index 870794b6..736fdd89 100644
--- a/src/surrogate/application-surrogate.c
+++ b/src/surrogate/application-surrogate.c
@@ -23,7 +23,11 @@ static enum {
 
 #define master_printf(...) if (g_tw_mynode == 0) { printf(__VA_ARGS__); }
 
-static void application_director_pre_switch(tw_pe * pe) {
+static void application_director_pre_switch(tw_pe * pe, bool is_queue_empty) {
+    // No need to switch to surrogate when the simulation has ended
+    if (is_queue_empty || gvt_for(pe) >= g_tw_ts_end) {
+        return;
+    }
     // Scheduling next GVT hook call if it is not scheduled every tw_trigger_gvt_hook_every
     if (conf.option == APP_DIRECTOR_OPTS_call_every_ns) {
         tw_trigger_gvt_hook_at(gvt_for(pe) + conf.call_every_ns);
@@ -41,7 +45,7 @@ static void application_director_pre_switch(tw_pe * pe) {
 
             if (conf.use_network_surrogate) {
                 master_printf("Switching network surrogate on\n");
-                surrogate_switch_network_model(pe);
+                surrogate_switch_network_model(pe, is_queue_empty);
             }
 
             surrogate_time_last = tw_clock_read();
@@ -55,12 +59,19 @@ static void application_director_pre_switch(tw_pe * pe) {
     }
 }
 
-static void application_director_post_switch(tw_pe * pe) {
+static void application_director_post_switch(tw_pe * pe, bool is_queue_empty) {
+    // No need to restart high-fidelity simulation if network was not suspended
+    if (is_queue_empty && !conf.use_network_surrogate) {
+        return;
+    }
+
     // Scheduling next GVT hook call
-    if (conf.option == APP_DIRECTOR_OPTS_call_every_ns) {
-        tw_trigger_gvt_hook_at(gvt_for(pe) + conf.call_every_ns);
-    } else {
-        tw_trigger_gvt_hook_every(conf.every_n_gvt);
+    if (!is_queue_empty) {
+        if (conf.option == APP_DIRECTOR_OPTS_call_every_ns) {
+            tw_trigger_gvt_hook_at(gvt_for(pe) + conf.call_every_ns);
+        } else {
+            tw_trigger_gvt_hook_every(conf.every_n_gvt);
+        }
     }
 
     double const start = tw_clock_read();
@@ -73,8 +84,7 @@ static void application_director_post_switch(tw_pe * pe) {
 
         if (conf.use_network_surrogate) {
             master_printf("Switching network surrogate off\n");
-            surrogate_switch_network_model(pe);
-            // TODO: reset network predictors and ask not to gather any data for 1 ms
+            surrogate_switch_network_model(pe, is_queue_empty);
         }
 
         time_in_surrogate += start - surrogate_time_last;
@@ -85,18 +95,14 @@ static void application_director_post_switch(tw_pe * pe) {
     director_state = PRE_JUMP;
 }
 
-static void application_director(tw_pe * pe) {
-    // Director is not called if the simulation has ended
-    if (gvt_for(pe) >= g_tw_ts_end) {
-        return;
-    }
+static void application_director(tw_pe * pe, bool is_queue_empty) {
     switch (director_state) {
         case PRE_JUMP:
-            application_director_pre_switch(pe);
+            application_director_pre_switch(pe, is_queue_empty);
         break;
         case POST_JUMP_switched:
         case POST_JUMP_skipped:
-            application_director_post_switch(pe);
+            application_director_post_switch(pe, is_queue_empty);
         break;
     }
 }
diff --git a/src/surrogate/network-surrogate.c b/src/surrogate/network-surrogate.c
index b7108cc8..c2278583 100644
--- a/src/surrogate/network-surrogate.c
+++ b/src/surrogate/network-surrogate.c
@@ -4,6 +4,8 @@
 #include <ross-extern.h>
 #include <stdio.h>
 
+#define master_printf(cond, ...) if (cond && g_tw_mynode == 0) { printf(__VA_ARGS__); }
+
 static bool is_network_surrogate_configured = false;
 static struct switch_at_struct switch_network_at = {0};
 static struct network_surrogate_config net_surr_config = {0};
@@ -198,9 +200,9 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) {
         tw_error(TW_LOC, "Sorry, sending packets to the future hasn't been implement in this mode");
     }
 
-    printf("PE %lu - AVL size %d (before freezing events)\n", g_tw_mynode, pe->avl_tree_size);
+    master_printf(DEBUG_DIRECTOR > 1, "PE %lu - AVL size %d (before freezing events)\n", g_tw_mynode, pe->avl_tree_size);
     freeze_events_to_separate_queue_pe(pe);
-    printf("PE %lu - AVL size %d (after freezing events to separate queue)\n", g_tw_mynode, pe->avl_tree_size);
+    master_printf(DEBUG_DIRECTOR > 1, "PE %lu - AVL size %d (after freezing events to separate queue)\n", g_tw_mynode, pe->avl_tree_size);
 
     // Going through all LPs in PE and running their specific functions
     for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {
@@ -224,7 +226,11 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) {
 
         pe->cur_event = pe->abort_event;
         pe->cur_event->caused_by_me = NULL;
+#ifdef USE_RAND_TIEBREAKER
         pe->cur_event->sig = pe->GVT_sig;
+#else
+        pe->cur_event->recv_ts = pe->GVT;
+#endif
 
         if (lp_type_switch) {
             if (lp_type_switch->trigger_idle_modelnet) {
@@ -257,9 +263,9 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) {
 #endif
 
     // Restore frozen events back to the main queue with timestamp adjustment
-    printf("PE %lu - AVL size %d (before injecting events into event queue again)\n", g_tw_mynode, pe->avl_tree_size);
+    master_printf(DEBUG_DIRECTOR > 1, "PE %lu - AVL size %d (before injecting events into event queue again)\n", g_tw_mynode, pe->avl_tree_size);
     unfreeze_events_from_separate_queue_pe(pe);
-    printf("PE %lu - AVL size %d (after defreezing events from separate queue)\n", g_tw_mynode, pe->avl_tree_size);
+    master_printf(DEBUG_DIRECTOR > 1, "PE %lu - AVL size %d (after defreezing events from separate queue)\n", g_tw_mynode, pe->avl_tree_size);
 
     // Going through all LPs in PE and running their specific functions
     for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) {
@@ -285,7 +291,11 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) {
 
         pe->cur_event = pe->abort_event;
         pe->cur_event->caused_by_me = NULL;
+#ifdef USE_RAND_TIEBREAKER
         pe->cur_event->sig = pe->GVT_sig;
+#else
+        pe->cur_event->recv_ts = pe->GVT;
+#endif
 
         if (lp_type_switch) {
             if (lp_type_switch->trigger_idle_modelnet) {
@@ -317,19 +327,23 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) {
 }
 
 
-static void switch_model(tw_pe * pe) {
-    // Rollback if in optimistic mode
-    if (g_tw_synchronization_protocol == OPTIMISTIC) {
+static void switch_model(tw_pe * pe, bool is_queue_empty) {
+    // Rollback if in optimistic mode and the simulation has events yet to process (globally)
+    if (g_tw_synchronization_protocol == OPTIMISTIC && !is_queue_empty) {
         tw_scheduler_rollback_and_cancel_events_pe(pe);
     }
-    net_surr_config.model.switch_surrogate();
-    if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
-        printf("Switching to network %s\n", net_surr_config.model.is_surrogate_on() ? "surrogate" : "high-fidelity");
+    master_printf(DEBUG_DIRECTOR, "Switching to network %s\n", net_surr_config.model.is_surrogate_on() ? "high-fidelity": "surrogate");
+
+    bool const is_surrogate_off = !net_surr_config.model.is_surrogate_on();
+    if (is_surrogate_off && is_queue_empty) {
+        master_printf(true, "No need to switch to surrogate when the simulation has no events to process\n");
+        return;
     }
+    net_surr_config.model.switch_surrogate();
 
     // "Freezing" network events and activating LP's switch functions
     if (freeze_network_on_switch) {
-        if (net_surr_config.model.is_surrogate_on()) {
+        if (is_surrogate_off) {
             model_net_method_switch_to_surrogate();
             events_high_def_to_surrogate_switch(pe);
         } else {
@@ -340,7 +354,7 @@ static void switch_model(tw_pe * pe) {
 }
 
 
-void network_director(tw_pe * pe) {
+void network_director(tw_pe * pe, bool is_queue_empty) {
     assert(is_network_surrogate_configured);
     assert(network_director_enabled);
 
@@ -375,7 +389,7 @@ void network_director(tw_pe * pe) {
     }
 
     // ---- Past this means that we are in fact switching ----
-    net_surr_config.model.is_surrogate_on();
+    bool const surrogate_state_pre_switch = net_surr_config.model.is_surrogate_on();
 
     // Asking the director/model to switch
     if (DEBUG_DIRECTOR && g_tw_mynode == 0) {
@@ -386,7 +400,7 @@ void network_director(tw_pe * pe) {
     }
 
     double const start = tw_clock_read();
-    switch_model(pe);
+    switch_model(pe, is_queue_empty);
     double const end = tw_clock_read();
     surrogate_switching_time += end - start;
 
@@ -396,15 +410,19 @@ void network_director(tw_pe * pe) {
         tw_trigger_gvt_hook_at(next_switch);
     }
 
-    if (DEBUG_DIRECTOR == 1 && g_tw_mynode == 0) {
-        printf("Network switch completed!\n");
+    bool const is_surrogate_on = net_surr_config.model.is_surrogate_on();
+    if (is_surrogate_on == surrogate_state_pre_switch) {
+        // The surrogate was never switched!
+        return;
     }
+
+    master_printf(DEBUG_DIRECTOR == 1, "Network switch completed!\n");
     if (DEBUG_DIRECTOR > 1) {
         printf("PE %lu: Switch completed!\n", g_tw_mynode);
     }
 
     // Determining time in surrogate
-    if (net_surr_config.model.is_surrogate_on()) {
+    if (is_surrogate_on) {
         // Start tracking time spent in surrogate mode
         surrogate_time_last = end;
     } else {
@@ -434,10 +452,10 @@ void network_director_finalize(void) {
 }
 
 // === Function for application director to use switch to surrogate machinery
-void surrogate_switch_network_model(tw_pe * pe) {
+void surrogate_switch_network_model(tw_pe * pe, bool is_queue_empty) {
     // Simply expose the existing switch_model function for use by application director
     double const start = tw_clock_read();
-    switch_model(pe);
+    switch_model(pe, is_queue_empty);
     double const end = tw_clock_read();
     surrogate_switching_time += end - start;
 }

From ba7b826675ff4f690bb9465aa7bf81086b979be7 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 18 Jun 2025 10:03:17 -0400
Subject: [PATCH 173/188] Updating README and compile instructions

---
 CODES-compile-instructions.sh | 132 ++++++++++++++++++++++++++++++
 README.md                     | 149 +++++++++++++++++++++++++++++++++-
 2 files changed, 278 insertions(+), 3 deletions(-)
 create mode 100644 CODES-compile-instructions.sh

diff --git a/CODES-compile-instructions.sh b/CODES-compile-instructions.sh
new file mode 100644
index 00000000..ac15c087
--- /dev/null
+++ b/CODES-compile-instructions.sh
@@ -0,0 +1,132 @@
+#!/usr/bin bash -x
+
+# Switches
+swm_enable=1
+union_enable=1
+torch_enable=0
+
+# Uncomment below for MPICH
+#export PATH=/usr/local/mpich-4.1.2/bin/:"$PATH"
+# Note: remember to compile MPICH with nemesis not with UCX support
+
+################## Actual scripts starts from here ##################
+
+# SWM has to be enabled for UNION to work
+if [ $union_enable = 1 ]; then
+    swm_enable=1
+fi
+
+# What to compile
+CUR_DIR="$PWD"
+
+##### Downloading everything #####
+
+git clone https://github.com/codes-org/codes --branch=kronos-develop
+git clone https://github.com/ross-org/ross --depth=20 --branch=at_gvt_arbitrary_function
+
+if [ $swm_enable = 1 ]; then
+    git clone https://github.com/pmodels/argobots --depth=1
+    # This version is one commit ahead
+    git clone https://github.com/helq/swm-workloads --depth=1 --branch=fix-global-variable-rem
+fi
+
+if [ $union_enable = 1 ]; then
+    # Downloading conceptual
+    curl -L https://sourceforge.net/projects/conceptual/files/conceptual/1.5.1b/conceptual-1.5.1b.tar.gz -o conceptual-1.5.1b.tar.gz
+    tar xvf conceptual-1.5.1b.tar.gz
+    # Downloading union
+    git clone https://github.com/SPEAR-UIC/Union
+fi
+
+##### COMPILING #####
+
+mkdir ross/build
+pushd ross/build
+cmake .. -DROSS_BUILD_MODELS=ON -DCMAKE_INSTALL_PREFIX="$(realpath ./bin)" \
+  -DCMAKE_C_COMPILER=mpicc -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-g -Wall"
+#make VERBOSE=1
+make install -j4
+err=$?
+[[ $err -ne 0 ]] && exit $err
+popd
+
+if [ $swm_enable = 1 ]; then
+    pushd swm-workloads/swm
+    ./prepare.sh
+    mkdir build
+    pushd build
+    ../configure --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g
+    #make V=1 && make install
+    make -j4 && make install
+    err=$?
+    [[ $err -ne 0 ]] && exit $err
+    popd && popd
+
+    pushd argobots
+    ./autogen.sh
+    mkdir build
+    pushd build
+    #../configure --enable-debug=all --disable-fast --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g
+    ../configure --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g
+    #make V=1 && make install
+    make -j4 && make install
+    err=$?
+    [[ $err -ne 0 ]] && exit $err
+    popd && popd
+fi
+
+if [ $union_enable = 1 ]; then
+    pushd conceptual-1.5.1b
+    PYTHON=python2 ./configure --prefix="$(realpath ./install)" LIBS=-lm
+    make -j4 && make install
+    err=$?
+    [[ $err -ne 0 ]] && exit $err
+    popd
+
+    pushd Union
+    ./prepare.sh
+    ./configure --disable-shared --with-conceptual="$(realpath ../conceptual-1.5.1b/install)" --prefix="$(realpath ./install)" CC=mpicc CXX=mpicxx
+    make -j4 && make install
+    err=$?
+    [[ $err -ne 0 ]] && exit $err
+    popd
+fi
+
+
+mkdir codes/build
+pushd codes/build
+
+make_args_codes=(
+    -DCMAKE_PREFIX_PATH="$(realpath "$CUR_DIR/ross/build/bin")"
+    -DCMAKE_CXX_COMPILER=mpicxx -DCMAKE_C_COMPILER=mpicc
+    -DCMAKE_C_FLAGS="-g -Wall"
+    -DCMAKE_CXX_FLAGS="-g -Wall"
+    -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON
+    -DCMAKE_INSTALL_PREFIX="$(realpath bin)"
+)
+if [ $swm_enable = 1 ]; then
+    make_args_codes=(
+        "${make_args_codes[@]}"
+        -DSWM_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/swm-workloads/swm/build/maint")"
+        -DARGOBOTS_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/argobots/build/maint")"
+    )
+fi
+if [ $union_enable = 1 ]; then
+    make_args_codes=(
+        "${make_args_codes[@]}"
+        -DUNION_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/Union/install/lib/pkgconfig")"
+    )
+fi
+if [ $torch_enable = 1 ]; then
+    make_args_codes=("${make_args_codes[@]}" -DUSE_TORCH=true)
+else
+    make_args_codes=("${make_args_codes[@]}" -DUSE_TORCH=false)
+fi
+
+cmake .. "${make_args_codes[@]}"
+#make VERBOSE=1
+make -j4
+err=$?
+[[ $err -ne 0 ]] && exit $err
+
+popd
diff --git a/README.md b/README.md
index 3388fad8..a86424be 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,154 @@
 # CODES Discrete-event Simulation Framework
 
-### [Join our CODES user mailing list](https://mailchi.mp/75d0c8aa42c3/codes-user-group) to stay up to date with major changes, events, and news!
+A high-performance discrete-event simulation framework for modeling HPC system architectures, network fabrics, and storage systems. Built on top of ROSS (Rensselaer Optimistic Simulation System) for massively parallel simulation capabilities.
 
-### New? Check out the [Wiki for Installation, Tutorials, and Documentation](https://github.com/codes-org/codes/wiki)
+## Quick Start
 
-Discrete event driven simulation of HPC system architectures and subsystems has emerged as a productive and cost-effective means to evaluating potential HPC designs, along with capabilities for executing simulations of extreme scale systems. The goal of the CODES project is to use highly parallel simulation to explore the design of exascale storage/network architectures and distributed data-intensive science facilities. 
+The easiest way to build CODES is using our automated compilation script that handles all dependencies and configurations.
+
+1. **Download the compilation script** [click here](https://raw.githubusercontent.com/codes-org/codes/master/CODES-compile-instructions.sh) or:
+
+   ```bash
+   # Download the script to your desired directory
+   wget https://raw.githubusercontent.com/codes-org/codes/master/CODES-compile-instructions.sh
+   chmod +x CODES-compile-instructions.sh
+   ```
+
+2. **Edit and Run the script**:
+   ```bash
+   ./CODES-compile-instructions.sh
+   ```
+
+The script will create a new directory with all dependencies and CODES compiled and ready to use.
+
+## Features
+
+CODES provides comprehensive simulation capabilities for:
+
+### Network Topologies
+- **Dragonfly**: High-radix interconnect with adaptive routing (most up to date)
+- **Torus**: Multi-dimensional torus networks
+- **Fat-tree**: Hierarchical tree topologies
+- **Express Mesh**: Enhanced mesh networks
+- **Simple P2P**: Point-to-point networks
+
+### Workload Generation
+- **SWM and UNION**: Workload generation
+- **MPI trace replay**: Support for DUMPI traces
+- **Synthetic patterns**: Uniform random, nearest neighbor, and custom patterns
+
+### Multi-fidelity Simulation
+- **Network surrogate models**: Switch between high-fidelity and surrogate modes
+- **Application surrogate models**: Accelerate application-level simulation
+- **Adaptive directors**: Intelligent switching between simulation modes
+
+## Prerequisites
+
+- **MPI**: OpenMPI or MPICH for parallel execution
+- **CMake**: Version 3.12 or higher
+- **ROSS**: Rensselaer Optimistic Simulation System (handled by script)
+- **C/C++ compiler**: GCC or Clang with C++11 support
+
+Optional dependencies (automatically handled by script if enabled):
+- **UNION**: For advanced workload generation
+- **SWM**: For structured workload modeling
+- **Argobots**: Threading library for enhanced performance
+- **PyTorch**: For ML model integration (if enabled)
+
+## Manual Installation
+
+For advanced users who prefer manual installation:
+
+```bash
+# 1. Build and install ROSS first
+git clone https://github.com/ross-org/ROSS.git
+cd ROSS && mkdir build && cd build
+cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/ross
+make -j && make install
+cd ../..
+
+# 2. Clone and build CODES
+git clone https://github.com/codes-org/codes.git
+cd codes && mkdir build && cd build
+
+# 3. Configure with CMake
+cmake .. \
+  -DCMAKE_PREFIX_PATH=$HOME/ross \
+  -DCMAKE_C_COMPILER=mpicc \
+  -DCMAKE_CXX_COMPILER=mpicxx \
+  -DCMAKE_BUILD_TYPE=Debug \
+  -DBUILD_TESTING=ON
+
+# 4. Build and test
+make -j
+ctest
+```
+
+## Testing
+
+Check your installation with:
+
+```bash
+# Run all tests
+cd codes/build && ctest
+
+# Run specific tests
+ctest -R modelnet-test-dragonfly
+ctest -R union-workload-test-surrogate
+
+# Keep test output for inspection
+DONT_DELETE_TEST_DIR=1 ctest -R your-test-name
+```
+
+All tests pass to date of writing, including those that require UNION support. Tests verify:
+
+- Network model correctness and determinism
+- Workload generation and replay accuracy
+- Multi-fidelity simulation switching
+- Parallel execution and reverse computation
+- Configuration file parsing and LP setup
+
+## Basic Usage
+
+Running a CODES experiment is tricky due to the large amount of compontents that have to be correctly configured. Please use the [experiments repo](https://github.com/CODES-org/experiments) for examples of simulation you can run.
+
+If you have used the compilation script from above (quick start) run the following (in the folder that contains `CODES-compile-instructions.sh`):
+
+```bash
+git clone https://github.com/CODES-org/experiments
+```
+
+To run an experiment do:
+
+```bash
+cd experiments
+bash run-experiment.sh path-to-experiment/script.sh
+```
+
+A folder will be created under `path-to-experiment/results` containing the result of running the experiment.
+
+## Contributing
+
+Before contributing please run the full test suite. Some tests verify our determinism guarantees (every simulation should be reproducible), i.e, the number of net events processed between two runs in parallel mode should be the same. We want to keep our determinism guarantees forever. Non-deterministic simulations are often the result of faulty reverse handlers, which have caused serious bug failures and hundreds of hours of debugging.
+
+If you find yourself with a model that is not deterministic (two runs with the same initial configuration produce different numbers of net events), then you can check for errors in the reverse handlers via the ROSS feature: reverse handlers check. For this, run your model with `--synch=6`. Make sure that all LPs in the simulation (ie, routers, terminals and others) have implemented proper reversibility checks (defined in a struct of type `crv_checkpointer`).
+
+## License
+
+See LICENSE file for licensing information.
+
+## Credits
+
+Developed by Argonne National Laboratory and Rensselaer Polytechnic Institute, with collaborations from UC Davis and Lawrence Livermore National Laboratory.
+
+## About CODES
+
+Discrete event driven simulation of HPC system architectures and subsystems has emerged as a productive and cost-effective means to evaluating potential HPC designs, along with capabilities for executing simulations of extreme scale systems. The goal of the CODES project is to use highly parallel simulation to explore the design of exascale storage/network architectures and distributed data-intensive science facilities.
 
 Our simulations build upon the Rensselaer Optimistic Simulation System (ROSS), a discrete event simulation framework that allows simulations to be run in parallel, decreasing the simulation run time of massive simulations to hours. We are using ROSS to explore topics including large-scale storage systems, I/O workloads, HPC network fabrics, distributed science systems, and data-intensive computation environments.
 
 The CODES project is a collaboration between the Mathematics and Computer Science department at Argonne National Laboratory and Rensselaer Polytechnic Institute. We collaborate with researchers at University of California at Davis to come up with novel methods for analysis and visualizations of large-scale event driven simulations. We also collaborate with Lawrence Livermore National Laboratory for modeling HPC interconnect systems.
+
+## About this README
+
+Claude helped us in templating this doc. Any typos are our own and after the fact.

From 07a4002fbe6b72d66c38839829ee1727d6689914 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 20 Jun 2025 09:17:05 -0400
Subject: [PATCH 174/188] Small print changes

---
 src/network-workloads/model-net-mpi-replay.c        | 2 +-
 src/surrogate/app-iteration-predictor/average.c     | 2 ++
 src/surrogate/application-surrogate.c               | 2 +-
 src/workload/methods/codes-conc-online-comm-wrkld.C | 1 +
 4 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 475d1675..5d42f2b6 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -37,7 +37,7 @@
 #define MAX_STATS 65536
 #define COL_TAG 1235
 #define BAR_TAG 1234
-#define PRINT_SYNTH_TRAFFIC 1
+#define PRINT_SYNTH_TRAFFIC 0
 #define MAX_JOBS 64
 #define MAX_PERIODS_PER_APP 512
 #define NEAR_ZERO .0001 //timestamp for use to be 'close to zero' but still allow progress, zero offset events are hard on the PDES engine
diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c
index 615594a5..db098307 100644
--- a/src/surrogate/app-iteration-predictor/average.c
+++ b/src/surrogate/app-iteration-predictor/average.c
@@ -122,6 +122,8 @@ static void model_calls_predict_rc(tw_lp * lp, int nw_id_in_pe) {}
 
 static void reset_with(bool const * app_just_ended) {
     ready_to_skip = false;
+
+    master_printf("Resetting (average) application predictor at GVT %d time %f\n", g_tw_gvt_done, g_tw_pe->GVT_sig.recv_ts)
     
     int last_iter[my_config.num_apps];
     find_max_iter_per_app(last_iter); // We should start tracking iterations from the next iteration
diff --git a/src/surrogate/application-surrogate.c b/src/surrogate/application-surrogate.c
index 736fdd89..fb6044df 100644
--- a/src/surrogate/application-surrogate.c
+++ b/src/surrogate/application-surrogate.c
@@ -90,7 +90,7 @@ static void application_director_post_switch(tw_pe * pe, bool is_queue_empty) {
         time_in_surrogate += start - surrogate_time_last;
         surrogate_time_last = 0.0;
     } else {
-        master_printf("Resetting predictor at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe));
+        master_printf("Resetting network predictor at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe));
     }
     director_state = PRE_JUMP;
 }
diff --git a/src/workload/methods/codes-conc-online-comm-wrkld.C b/src/workload/methods/codes-conc-online-comm-wrkld.C
index a78f9abf..cdd1705d 100644
--- a/src/workload/methods/codes-conc-online-comm-wrkld.C
+++ b/src/workload/methods/codes-conc-online-comm-wrkld.C
@@ -170,6 +170,7 @@ void UNION_MPI_Finalize()
     ABT_thread_yield_to(global_prod_thread);
 }
 
+// cycle_count assumes 1 GHz, meaning, 1 cycle is 1 nanosecond. This is different from SWM_Compute!
 void UNION_Compute(long cycle_count)
 {
     /* Add an event in the shared queue and then yield */

From 8be98f948eb624b5d8249aaed24ed9bb1fe17f0d Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 20 Jun 2025 12:45:24 -0400
Subject: [PATCH 175/188] Allowing conc-online to load json files from config
 path

---
 src/network-workloads/model-net-mpi-replay.c  | 55 ++++++++++++++
 .../methods/codes-conc-online-comm-wrkld.C    | 76 ++++++++++---------
 2 files changed, 94 insertions(+), 37 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 5d42f2b6..0d9eea3d 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -90,10 +90,17 @@ static lp_io_handle io_handle;
 static unsigned int lp_io_use_suffix = 0;
 static int do_lp_io = 0;
 
+/* Workload JSON file mapping structure */
+struct codes_workload_json_mapping {
+    char workload_type[MAX_NAME_LENGTH_WKLD];
+    char json_path[8192];
+};
+
 /* variables for loading multiple applications */
 char workloads_conf_file[8192];
 char workloads_timer_file[8192];
 char workloads_period_file[8192];
+char workload_json_files[8192];
 char alloc_file[8192];
 int num_traces_of_job[MAX_JOBS];
 int is_job_synthetic[MAX_JOBS]; //0 if job is not synthetic 1 if job is
@@ -105,6 +112,8 @@ int period_count[MAX_JOBS];
 double period_time[MAX_JOBS][MAX_PERIODS_PER_APP];
 float period_interval[MAX_JOBS][MAX_PERIODS_PER_APP];
 char file_name_of_job[MAX_JOBS][8192];
+struct codes_workload_json_mapping workload_json_mappings[MAX_JOBS];
+int workload_json_mapping_count;
 
 tw_stime max_elapsed_time_per_job[MAX_JOBS] = {0};
 
@@ -2563,6 +2572,20 @@ void nw_test_init(nw_state* s, tw_lp* lp)
        {
             strcpy(oc_params.workload_name, file_name_of_job[lid.job]);      
        }
+
+       /* Look up custom JSON path for this workload */
+       oc_params.file_path[0] = '\0';
+       char * wrkl_name_settings = oc_params.workload_name;
+       if(strncmp("conceptual", oc_params.workload_name, 10) == 0) {
+            wrkl_name_settings = "conceptual";
+       }
+       for(int i = 0; i < workload_json_mapping_count; i++) {
+           if(strcmp(workload_json_mappings[i].workload_type, wrkl_name_settings) == 0) {
+                strcpy(oc_params.file_path, workload_json_mappings[i].json_path);
+                break;
+           }
+       }
+
        /*TODO: nprocs is different for dumpi and online workload. for
         * online, it is the number of ranks to be simulated. */
        // printf("conc-online num_traces_of_job %d\n", num_traces_of_job[lid.job]);
@@ -2667,6 +2690,7 @@ void nw_test_init(nw_state* s, tw_lp* lp)
    }
 
    if (iter_predictor && !am_i_synthetic) {
+        assert(s->wrkld_id != -1);
         int const ending_iter = codes_workload_get_final_iteration(s->wrkld_id, s->app_id, s->local_rank);
         if (ending_iter == -1) {
             tw_warning(TW_LOC, "Predictor for non-synthetic job cannot be initialized. app id=%d", s->app_id);
@@ -3802,6 +3826,7 @@ const tw_optdef app_opt [] =
 	TWOPT_CHAR("workload_file", workload_file, "workload file name"),
 	TWOPT_CHAR("alloc_file", alloc_file, "allocation file name"),
 	TWOPT_CHAR("workload_conf_file", workloads_conf_file, "workload config file name"),
+	TWOPT_CHAR("workload_json_files", workload_json_files, "workload json files mapping file name"),
     TWOPT_CHAR("link_failure_file", g_nm_link_failure_filepath, "filepath for override of link failure file from configuration for supporting models"),
 	TWOPT_CHAR("workload_timer_file", workloads_timer_file, "workload timer file name (for starting/pausing/stopping synthetic traffic)"),
 	TWOPT_CHAR("workload_period_file", workloads_period_file, "workload periods file name (for changing the per-job synthetic traffic load at specified periods/times)"),
@@ -4026,9 +4051,12 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
 	return -1;
     }
 
+    bool is_conc_enabled = false;
+
     /* Xin: Currently rendezvous protocol cannot work with Conceptual online workloads */
     if(strcmp(workload_type, "conc-online") == 0) {
         EAGER_THRESHOLD = INT64_MAX;
+        is_conc_enabled = true;
     }
 
 	jobmap_ctx = NULL; // make sure it's NULL if it's not used
@@ -4133,6 +4161,33 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
             }
             fclose(period_file);
         }
+
+        /* Load workload JSON files mapping if specified */
+        if(is_conc_enabled && strlen(workload_json_files) > 0)
+        {
+            FILE *json_file = fopen(workload_json_files, "r");
+            if(!json_file)
+                tw_error(TW_LOC, "\n Could not open file %s ", workload_json_files);
+
+            workload_json_mapping_count = 0;
+
+            while(!feof(json_file) && workload_json_mapping_count < MAX_JOBS)
+            {
+                if(fscanf(json_file, "%s %s",
+                    workload_json_mappings[workload_json_mapping_count].workload_type,
+                    workload_json_mappings[workload_json_mapping_count].json_path) == 2)
+                {
+                    workload_json_mapping_count++;
+                }
+            }
+            fclose(json_file);
+
+            if(enable_debug)
+                printf("\n Loaded %d workload JSON mappings\n", workload_json_mapping_count);
+        }
+        if(!is_conc_enabled && strlen(workload_json_files) > 0) {
+            printf("\n Conceptual online worloads will not run, thus, we won't read any json files from --workload_json_files\n");
+        }
     }
     else
     {
diff --git a/src/workload/methods/codes-conc-online-comm-wrkld.C b/src/workload/methods/codes-conc-online-comm-wrkld.C
index cdd1705d..96f93764 100644
--- a/src/workload/methods/codes-conc-online-comm-wrkld.C
+++ b/src/workload/methods/codes-conc-online-comm-wrkld.C
@@ -1839,6 +1839,44 @@ static void workload_caller(void * arg)
     }
 }
 
+static void determine_workload_paths(const char* workload_name, const char* custom_json_path, string& swm_path, string& conc_path, bool& isconc)
+{
+    /* First check if custom JSON path is provided through file_path parameter */
+    if(custom_json_path && strlen(custom_json_path) > 0) {
+        if(strncmp(workload_name, "conceptual", 10) == 0) {
+            conc_path.append(custom_json_path);
+            isconc = 1;
+        } else {
+            swm_path.append(custom_json_path);
+        }
+        return;
+    }
+
+    /* Fall back to hardcoded paths */
+    swm_path.append(SWM_DATAROOTDIR);
+    if(strcmp(workload_name, "lammps") == 0) {
+        swm_path.append("/lammps_workload.json");
+    } else if(strcmp(workload_name, "nekbone") == 0) {
+        swm_path.append("/workload.json");
+    } else if(strcmp(workload_name, "milc") == 0) {
+        swm_path.append("/milc_skeleton.json");
+    } else if(strcmp(workload_name, "nearest_neighbor") == 0) {
+        swm_path.append("/skeleton.json");
+    } else if(strcmp(workload_name, "incast") == 0) {
+        swm_path.append("/incast.json");
+    } else if(strcmp(workload_name, "incast1") == 0) {
+        swm_path.append("/incast1.json");
+    } else if(strcmp(workload_name, "incast2") == 0) {
+        swm_path.append("/incast2.json");
+    } else if(strncmp(workload_name, "conceptual", 10) == 0) {
+        conc_path.append(UNION_DATADIR);
+        conc_path.append("/conceptual.json");
+        isconc = 1;
+    } else {
+        tw_error(TW_LOC, "\n Undefined workload type %s ", workload_name);
+    }
+}
+
 static int comm_online_workload_load(const void * params, int app_id, int rank)
 {
     /* LOAD parameters from JSON file*/
@@ -1867,43 +1905,7 @@ static int comm_online_workload_load(const void * params, int app_id, int rank)
     bool isconc=0;
 
     // printf("workload name: %s\n", o_params->workload_name);
-    swm_path.append(SWM_DATAROOTDIR);
-    if(strcmp(o_params->workload_name, "lammps") == 0)
-    {
-        swm_path.append("/lammps_workload.json");
-    }
-    else if(strcmp(o_params->workload_name, "nekbone") == 0)
-    {
-        swm_path.append("/workload.json"); 
-    }
-    else if(strcmp(o_params->workload_name, "milc") == 0)
-    {
-        swm_path.append("/milc_skeleton.json");
-    }
-    else if(strcmp(o_params->workload_name, "nearest_neighbor") == 0)
-    {
-        swm_path.append("/skeleton.json"); 
-    }
-    else if(strcmp(o_params->workload_name, "incast") == 0)
-    {
-        swm_path.append("/incast.json"); 
-    }
-    else if(strcmp(o_params->workload_name, "incast1") == 0)
-    {
-        swm_path.append("/incast1.json"); 
-    }
-    else if(strcmp(o_params->workload_name, "incast2") == 0)
-    {
-        swm_path.append("/incast2.json"); 
-    }    
-    else if(strncmp(o_params->workload_name, "conceptual", 10) == 0)
-    {
-        conc_path.append(UNION_DATADIR);
-        conc_path.append("/conceptual.json");
-        isconc = 1;
-    }
-    else
-        tw_error(TW_LOC, "\n Undefined workload type %s ", o_params->workload_name);
+    determine_workload_paths(o_params->workload_name, o_params->file_path, swm_path, conc_path, isconc);
 
     // printf("\nUnion jason path %s\n", conc_path.c_str());
     if(isconc){

From 25ab4c9b081c01f95ae915ad7af14736fd50987a Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Fri, 20 Jun 2025 17:57:31 -0400
Subject: [PATCH 176/188] If we pass on a `workload_json_files` conf file, we
 allow a job to take a different name

The idea of this change is to be able to have a configuration file like:

```
20 milc1 1 0
15 conceptual-jacobi3d-5 1 0
```

While the workload_json_files allow us to tell CODES where to look for
the json configuration files:

```
milc1 path-to/milc1.json
conceptual-jacobi3d-5 path-to/my-conceptual-jacobi3d.json
```
---
 src/network-workloads/model-net-mpi-replay.c  |  6 +--
 .../methods/codes-conc-online-comm-wrkld.C    | 54 ++++++++++++-------
 2 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 0d9eea3d..36bed9f8 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -2575,12 +2575,8 @@ void nw_test_init(nw_state* s, tw_lp* lp)
 
        /* Look up custom JSON path for this workload */
        oc_params.file_path[0] = '\0';
-       char * wrkl_name_settings = oc_params.workload_name;
-       if(strncmp("conceptual", oc_params.workload_name, 10) == 0) {
-            wrkl_name_settings = "conceptual";
-       }
        for(int i = 0; i < workload_json_mapping_count; i++) {
-           if(strcmp(workload_json_mappings[i].workload_type, wrkl_name_settings) == 0) {
+           if(strcmp(workload_json_mappings[i].workload_type, oc_params.workload_name) == 0) {
                 strcpy(oc_params.file_path, workload_json_mappings[i].json_path);
                 break;
            }
diff --git a/src/workload/methods/codes-conc-online-comm-wrkld.C b/src/workload/methods/codes-conc-online-comm-wrkld.C
index 96f93764..42b4b5c7 100644
--- a/src/workload/methods/codes-conc-online-comm-wrkld.C
+++ b/src/workload/methods/codes-conc-online-comm-wrkld.C
@@ -1839,41 +1839,41 @@ static void workload_caller(void * arg)
     }
 }
 
-static void determine_workload_paths(const char* workload_name, const char* custom_json_path, string& swm_path, string& conc_path, bool& isconc)
+static void determine_workload_paths(online_comm_params const * o_params, string& swm_path, string& conc_path, bool& isconc)
 {
     /* First check if custom JSON path is provided through file_path parameter */
-    if(custom_json_path && strlen(custom_json_path) > 0) {
-        if(strncmp(workload_name, "conceptual", 10) == 0) {
-            conc_path.append(custom_json_path);
+    if(strlen(o_params->file_path) > 0) {
+        if(strncmp(o_params->workload_name, "conceptual", 10) == 0) {
+            conc_path.append(o_params->file_path);
             isconc = 1;
         } else {
-            swm_path.append(custom_json_path);
+            swm_path.append(o_params->file_path);
         }
         return;
     }
 
     /* Fall back to hardcoded paths */
     swm_path.append(SWM_DATAROOTDIR);
-    if(strcmp(workload_name, "lammps") == 0) {
+    if(strcmp(o_params->workload_name, "lammps") == 0) {
         swm_path.append("/lammps_workload.json");
-    } else if(strcmp(workload_name, "nekbone") == 0) {
+    } else if(strcmp(o_params->workload_name, "nekbone") == 0) {
         swm_path.append("/workload.json");
-    } else if(strcmp(workload_name, "milc") == 0) {
+    } else if(strcmp(o_params->workload_name, "milc") == 0) {
         swm_path.append("/milc_skeleton.json");
-    } else if(strcmp(workload_name, "nearest_neighbor") == 0) {
+    } else if(strcmp(o_params->workload_name, "nearest_neighbor") == 0) {
         swm_path.append("/skeleton.json");
-    } else if(strcmp(workload_name, "incast") == 0) {
+    } else if(strcmp(o_params->workload_name, "incast") == 0) {
         swm_path.append("/incast.json");
-    } else if(strcmp(workload_name, "incast1") == 0) {
+    } else if(strcmp(o_params->workload_name, "incast1") == 0) {
         swm_path.append("/incast1.json");
-    } else if(strcmp(workload_name, "incast2") == 0) {
+    } else if(strcmp(o_params->workload_name, "incast2") == 0) {
         swm_path.append("/incast2.json");
-    } else if(strncmp(workload_name, "conceptual", 10) == 0) {
+    } else if(strncmp(o_params->workload_name, "conceptual", 10) == 0) {
         conc_path.append(UNION_DATADIR);
         conc_path.append("/conceptual.json");
         isconc = 1;
     } else {
-        tw_error(TW_LOC, "\n Undefined workload type %s ", workload_name);
+        tw_error(TW_LOC, "\n Undefined workload type %s ", o_params->workload_name);
     }
 }
 
@@ -1905,7 +1905,7 @@ static int comm_online_workload_load(const void * params, int app_id, int rank)
     bool isconc=0;
 
     // printf("workload name: %s\n", o_params->workload_name);
-    determine_workload_paths(o_params->workload_name, o_params->file_path, swm_path, conc_path, isconc);
+    determine_workload_paths(o_params, swm_path, conc_path, isconc);
 
     // printf("\nUnion jason path %s\n", conc_path.c_str());
     if(isconc){
@@ -1915,8 +1915,16 @@ static int comm_online_workload_load(const void * params, int app_id, int rank)
 
             // printf("workload_name: %s\n", o_params->workload_name);
             union_bench_param *tmp_params = (union_bench_param *) calloc(1, sizeof(union_bench_param));
-            strcpy(tmp_params->conc_program, &o_params->workload_name[11]);
-            child = root.get_child(tmp_params->conc_program);
+            child = root.get_child(&o_params->workload_name[11]);
+
+            // if we were given a path, we read the type of workload from the config
+            bool const has_path = o_params->file_path[0] != '\0';
+            if (has_path) {
+                strcpy(tmp_params->conc_program, child.get_child("argv").begin()->second.data().c_str());
+            } else {
+                strcpy(tmp_params->conc_program, &o_params->workload_name[11]);
+            }
+
             tmp_params->conc_argc = child.get<int>("argc");
             int i = 0;
             BOOST_FOREACH(boost::property_tree::ptree::value_type &v, child.get_child("argv"))
@@ -1931,7 +1939,7 @@ static int comm_online_workload_load(const void * params, int app_id, int rank)
         }
         catch(std::exception & e)
         {
-            printf("%s \n", e.what());
+            printf("Exception when reading UNION/Conceptual json config %s: %s\n", conc_path.c_str(), e.what());
             return -1;
         }
     }
@@ -1939,12 +1947,18 @@ static int comm_online_workload_load(const void * params, int app_id, int rank)
         try {
             std::ifstream jsonFile(swm_path.c_str());
             boost::property_tree::json_parser::read_json(jsonFile, root);
-            uint32_t process_cnt = root.get<uint32_t>("jobs.size", 1);
             cpu_freq = root.get<double>("jobs.cfg.cpu_freq") / 1e9; 
+
+            // if we were given a path, we read the type of workload from the config
+            bool const has_path = o_params->file_path[0] != '\0';
+            if (has_path) {
+                strcpy(o_params->workload_name, root.get<string>("jobs.cfg.app").c_str());
+                strcpy(my_ctx->sctx.workload_name, o_params->workload_name);
+            }
         }
         catch(std::exception & e)
         {
-            printf("%s \n", e.what());
+            printf("Exception when reading SWM json config %s: %s\n", swm_path.c_str(), e.what());
             return -1;
         }
         my_ctx->sctx.isconc = 0;

From 64c6cce74c3eb03abdaec251e4369eab0bbbd2a7 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Mon, 23 Jun 2025 18:54:51 -0400
Subject: [PATCH 177/188] Extending iterator predictor to predict when to
 restart the simulation

---
 .../app-iteration-predictor/common.h          |  14 +-
 src/network-workloads/model-net-mpi-replay.c  |  55 +++++---
 .../app-iteration-predictor/average.c         | 125 ++++++++++++++----
 3 files changed, 150 insertions(+), 44 deletions(-)

diff --git a/codes/surrogate/app-iteration-predictor/common.h b/codes/surrogate/app-iteration-predictor/common.h
index d2eabc99..5f5e7b96 100644
--- a/codes/surrogate/app-iteration-predictor/common.h
+++ b/codes/surrogate/app-iteration-predictor/common.h
@@ -17,9 +17,19 @@ extern "C" {
  * Iteration application prediction machinery. Notice that any of these predictors have to know how many iterations to run in total, thus they need data about the number of steps the application will take.
  */
 
+enum NODE_TYPE {
+    NODE_TYPE_unassigned = 0,
+    NODE_TYPE_background_noise = 1,
+    NODE_TYPE_app = 2,
+};
+
 struct app_iter_node_config {
     int app_id;
-    int app_ending_iter;
+    enum NODE_TYPE type;
+    union {
+        // To be used by NODE_TYPE_app only
+        int app_ending_iter;
+    };
 };
 
 // This returns how much to skip ahead and when to restart
@@ -45,8 +55,8 @@ typedef void (*feed_pred_iter_f) (tw_lp * lp, int nw_id_in_pe, int iteration_id,
 typedef void (*end_pred_iter_f) (tw_lp * lp, int nw_id_in_pe, double time); // Tells the predictor that the application has stopped running
 typedef struct iteration_pred (*predict_pred_iter_f) (tw_lp * lp, int nw_id_in_pe); // Get prediction
 typedef void (*predict_pred_iter_rc_f) (tw_lp * lp, int nw_id_in_pe); // Reverse prediction (reverse state of predictor one prediction)
-// Director calls to predictor module
 typedef bool (*have_we_hit_switch_f) (tw_lp * lp, int nw_id_in_pe, int iteration_id); // Are we ready to switch to a future iterationº
+// Director calls to predictor module
 typedef bool (*is_predictor_read_f) (void); // Checking if it is a good time to switch (enough data has been collected or we have received some notification of an application ending, forcing us to restart collecting data). This might trigger an MPI_Allreduce call, thus has to be called by all PEs!
 typedef void (*reset_pred_iter_f) (void); // Resets the predictor (eg, average)
 typedef struct fast_forward_values (*prepare_fast_forward_f) (void); // Checking if it is a good time to switch (enough data has been collected)
diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 36bed9f8..539b8299 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -434,6 +434,7 @@ struct nw_message
            int saved_syn_length;
            int saved_perm;  // Used by PERMUTATION
            unsigned long saved_prev_switch;  // Used by PERMUTATION
+           unsigned long long saved_gen_data;
        } gen;
 
        // For CLI_BCKGND_ARRIVE and MPI_SEND_ARRIVED_CB
@@ -907,10 +908,9 @@ static void gen_synthetic_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp
         s->saved_perm_dest = m->rc.gen.saved_perm;
         tw_rand_reverse_unif(lp->rng);
     }
-    int i;
-    for (i=0; i < m->rc.gen.saved_syn_length; i++){
+    s->gen_data = m->rc.gen.saved_gen_data;
+    for (int i=0; i < m->rc.gen.saved_syn_length; i++){
         model_net_event_rc2(lp, &m->event_rc);
-        s->gen_data -= payload_sz;
         num_syn_bytes_sent -= payload_sz;
         s->num_bytes_sent -= payload_sz;
         s->ross_sample.num_bytes_sent -= payload_sz;
@@ -925,6 +925,9 @@ static void gen_synthetic_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp
         s->saved_perm_dest = m->rc.gen.saved_perm;
         tw_rand_reverse_unif(lp->rng);
     }
+    if (bf->c13) {
+        iter_predictor->model.predict_rc(lp, s->nw_id_in_pe);
+    }
 }
 
 /* generate synthetic traffic */
@@ -1078,6 +1081,9 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l
 			length = 0;
 		}
 	}
+
+    m->rc.gen.saved_gen_data = s->gen_data;
+
     if(length > 0)
     {
         // m->event_array_rc = (model_net_event_return) malloc(length * sizeof(model_net_event_return));
@@ -1112,10 +1118,19 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l
 
     /* New event after MEAN_INTERVAL */  
     tw_stime ts = mean_interval_of_job[s->app_id];
-    tw_event * e;
-    nw_message * m_new;
-    e = tw_event_new(lp->gid, ts, lp);
-    m_new = (struct nw_message*)tw_event_data(e);
+    if (iter_predictor && iter_predictor->model.have_we_hit_switch(lp, s->nw_id_in_pe, 0)) {  // background synthetic lps have no iterations
+        bf->c13 = 1;
+        struct iteration_pred iter_pred = iter_predictor->model.predict(lp, s->nw_id_in_pe);
+        double const restarting_background_at = iter_pred.restart_at;
+        // this check is necessary because we don't rely on iteration count for switch like applications do
+        if (restarting_background_at > tw_now(lp)) {
+            long const periods_to_jump = ceil((restarting_background_at - tw_now(lp)) / mean_interval_of_job[s->app_id]);
+            ts *= periods_to_jump;
+            s->gen_data += periods_to_jump * (length + payload_sz);
+        }
+    }
+    tw_event * e = tw_event_new(lp->gid, ts, lp);
+    nw_message * m_new = (struct nw_message*)tw_event_data(e);
     m_new->msg_type = CLI_BCKGND_GEN;
     tw_event_send(e);
     
@@ -2625,9 +2640,6 @@ void nw_test_init(nw_state* s, tw_lp* lp)
    s->compute_time = 0;
    s->elapsed_time = 0;
         
-   s->app_id = lid.job;
-   s->local_rank = lid.rank;
-
    bool am_i_synthetic = false;
    if(strncmp(file_name_of_job[lid.job], "synthetic", 9) == 0)
    {
@@ -2685,17 +2697,26 @@ void nw_test_init(nw_state* s, tw_lp* lp)
        }
    }
 
-   if (iter_predictor && !am_i_synthetic) {
-        assert(s->wrkld_id != -1);
-        int const ending_iter = codes_workload_get_final_iteration(s->wrkld_id, s->app_id, s->local_rank);
-        if (ending_iter == -1) {
-            tw_warning(TW_LOC, "Predictor for non-synthetic job cannot be initialized. app id=%d", s->app_id);
-        } else {
+   if (iter_predictor) {
+        if (am_i_synthetic) {
             struct app_iter_node_config conf = {
                 .app_id = s->app_id,
-                .app_ending_iter = ending_iter,
+                .type = NODE_TYPE_background_noise,
             };
             iter_predictor->model.init(lp, s->nw_id_in_pe, &conf);
+        } else {
+            assert(s->wrkld_id != -1);
+            int const ending_iter = codes_workload_get_final_iteration(s->wrkld_id, s->app_id, s->local_rank);
+            if (ending_iter == -1) {
+                tw_warning(TW_LOC, "Predictor for non-synthetic job cannot be initialized. app id=%d", s->app_id);
+            } else {
+                struct app_iter_node_config conf = {
+                    .app_id = s->app_id,
+                    .type = NODE_TYPE_app,
+                    .app_ending_iter = ending_iter,
+                };
+                iter_predictor->model.init(lp, s->nw_id_in_pe, &conf);
+            }
         }
    }
 
diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c
index db098307..eb6355bd 100644
--- a/src/surrogate/app-iteration-predictor/average.c
+++ b/src/surrogate/app-iteration-predictor/average.c
@@ -1,10 +1,13 @@
 #include "surrogate/app-iteration-predictor/average.h"
 #include "codes/codes.h"
+#include "surrogate/app-iteration-predictor/common.h"
 #include <assert.h>
 #include <limits.h>
 #include <math.h>
+#include <ross-extern.h>
 
-#define master_printf(str, ...) if (g_tw_mynode == 0) { printf(str, __VA_ARGS__); }
+#define master_printf(...) if (g_tw_mynode == 0) { printf(__VA_ARGS__); }
+#define master_printf_if(val, ...) if (val && g_tw_mynode == 0) { printf(__VA_ARGS__); }
 
 static struct avg_app_config my_config = {0};
 
@@ -24,7 +27,8 @@ enum APP_STATUS {
 };
 
 struct app_data {
-    int num_nodes;
+    enum NODE_TYPE type;
+    int num_nodes; // nodes in PE
     int nodes_with_enough_iters;
     int ending_iteration;  // last iteration the simulation will run (aka, num of iterations)
     int nodes_that_have_ended;
@@ -39,6 +43,15 @@ struct app_data {
 static struct app_data * arr_app_data = NULL; // array containing info for all apps
 static bool ready_to_skip = false;
 
+static inline char const * string_node_type(enum NODE_TYPE type) {
+    switch (type) {
+        case NODE_TYPE_unassigned:       return "Unassigned app";
+        case NODE_TYPE_background_noise: return "Background noise/synthetic pattern";
+        case NODE_TYPE_app:              return "App that runs on predictable iterations";
+        default:                         return "Unknown type!";
+    }
+}
+
 
 static void find_max_iter_per_app(int * save_last_iter);
 static inline void mpi_allreduce_int_max(int const * local_data, int * result_data, int count);
@@ -64,27 +77,54 @@ static void model_calls_init(tw_lp * lp, int nw_id_in_pe, struct app_iter_node_c
 
     // Storing app data info
     arr_app_data[config->app_id].num_nodes++;
+
+    if (arr_app_data[config->app_id].type == NODE_TYPE_unassigned) {
+        arr_app_data[config->app_id].type = config->type;
+    } else if (arr_app_data[config->app_id].type != config->type) {
+        tw_error(TW_LOC, "Two different ranks for application %d have signaded different compute node types. LP ID %d is of type '%s', but app had been configured as '%s'", lp->gid, string_node_type(arr_app_data[config->app_id].type), string_node_type(config->type));
+    }
+
+    if (config->type == NODE_TYPE_background_noise) {
+        return; // nothing left to set for synthetic workloads
+    }
+
     if (arr_app_data[config->app_id].ending_iteration == INT_MIN) {
         arr_app_data[config->app_id].ending_iteration = config->app_ending_iter;
-    } else {
-        if (arr_app_data[config->app_id].ending_iteration != config->app_ending_iter) {
-            tw_error(TW_LOC, "Two different ranks for application %d have differing total iterations they will run (%d != %d)", config->app_id, config->app_ending_iter, arr_app_data[config->app_id].ending_iteration);
-        }
+    } else if (arr_app_data[config->app_id].ending_iteration != config->app_ending_iter) {
+        tw_error(TW_LOC, "Two different ranks for application %d have differing total iterations they will run (%d != %d)", config->app_id, config->app_ending_iter, arr_app_data[config->app_id].ending_iteration);
     }
 }
 
+static inline void assert_app_initialized(int nw_id_in_pe) {
+    int const app_id = app_id_for(nw_id_in_pe);
+    if (app_id == -1) {
+        assert(arr_app_data[app_id].type == NODE_TYPE_unassigned);
+        tw_error(TW_LOC, "Predictor for node was not initialized! Node ID (on PE) %d", nw_id_in_pe);
+    }
+}
 
 static void model_calls_feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) {
     (void) lp;
     assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe);
-    if (app_id_for(nw_id_in_pe) == -1) {
-        tw_error(TW_LOC, "Predictor for node was not initialized! Node ID (on PE) %d", nw_id_in_pe);
+    assert_app_initialized(nw_id_in_pe);
+
+    int const app_id = app_id_for(nw_id_in_pe);
+
+    // We should only be handling non-synthetic workloads (aka, no background noise)
+    static bool shown_warning = false;
+    if (!shown_warning && arr_app_data[app_id].type == NODE_TYPE_background_noise) {
+        shown_warning = true;
+        tw_warning(TW_LOC, "`feed` has been called in App %d, which was determined to be Background traffic (aka, a synthetic workload)", app_id);
+        return;
     }
+
+    assert(arr_app_data[app_id].type == NODE_TYPE_app);
     struct node_data * node_data = &arr_node_data[nw_id_in_pe];
-    if (node_data->last_iter >= iter) { // we only collect iteration data past the previous `last_iter`
+    // we only collect iteration data past the previous `last_iter`
+    if (node_data->last_iter >= iter) {
         return;
     }
-    if (arr_app_data[node_data->app_id].status != APP_STATUS_running) {
+    if (arr_app_data[app_id].status != APP_STATUS_running) {
         tw_warning(TW_LOC, "Attempting to feed data to application predictor for an application that has either been marked as completed or not configured");
     }
     node_data->acc_iteration_time += iteration_time - node_data->prev_iteration_time;
@@ -93,13 +133,13 @@ static void model_calls_feed(tw_lp * lp, int nw_id_in_pe, int iter, double itera
     node_data->last_iter = iter;
     // We've hit the required number of iterations to feed our predictor
     if (node_data->acc_iters == my_config.num_iters_to_collect) {
-        arr_app_data[node_data->app_id].nodes_with_enough_iters++;
+        arr_app_data[app_id].nodes_with_enough_iters++;
     }
 }
 
 
 static void model_calls_ended(tw_lp * lp, int nw_id_in_pe, double iteration_time) {
-    assert(app_id_for(nw_id_in_pe) != -1);
+    assert_app_initialized(nw_id_in_pe);
     struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)];
     app_data->nodes_that_have_ended++;
     if (app_data->nodes_that_have_ended == app_data->num_nodes) {
@@ -110,7 +150,7 @@ static void model_calls_ended(tw_lp * lp, int nw_id_in_pe, double iteration_time
 
 static struct iteration_pred model_calls_predict(tw_lp * lp, int nw_id_in_pe) {
     assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe);
-    assert(app_id_for(nw_id_in_pe) != -1);
+    assert_app_initialized(nw_id_in_pe);
     struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)];
     return (struct iteration_pred) {
         .resume_at_iter = app_data->pred.resume_at_iter,
@@ -151,10 +191,24 @@ static void reset_with(bool const * app_just_ended) {
 
 static bool model_calls_have_we_hit_switch(tw_lp * lp, int nw_id_in_pe, int iteration_id) {
     assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe);
-    int const app_id = app_id_for(nw_id_in_pe);
-    if (ready_to_skip && iteration_id == arr_app_data[app_id].pred.jump_at_iter) {
-        return true;
+    assert_app_initialized(nw_id_in_pe);
+
+    if (!ready_to_skip) {
+        return false;
     }
+
+    struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)];
+    switch (app_data->type) {
+        case NODE_TYPE_background_noise:
+            return true;
+        case NODE_TYPE_app:
+            if (iteration_id == app_data->pred.jump_at_iter) {
+                return true;
+            }
+        default:
+        break;
+    }
+
     return false;
 }
 
@@ -173,7 +227,7 @@ static inline void post_init_share_ending_iteration(void) {
         if (app_data->ending_iteration == INT_MIN) {
             if (ending_iteration[i] == INT_MIN) {
                 app_data->status = APP_STATUS_completed_everywhere;
-                master_printf("Workload/app %d has not been configured to be tracked by iteration predictor (it might be a synthetic workload)\n", i);
+                master_printf_if(app_data->type == NODE_TYPE_unassigned, "Workload/app %d has not been configured to be tracked by iteration predictor (it might be a synthetic workload)\n", i);
             } else {
                 // The application has "completed" in this PE already!
                 app_data->status = APP_STATUS_just_completed;
@@ -416,19 +470,39 @@ static double find_latest_restart_time(bool const * is_running, double const * a
     return last_to_finish;
 }
 
+static double find_earliest_restart_time(bool const * is_running, double const * apps_restart_at_time) {
+    // Compute last application to restart (this is restarting_at)
+    double first_to_finish = DBL_MAX;
+    for (int i = 0; i < my_config.num_apps; i++) {
+        if (is_running[i] && first_to_finish > apps_restart_at_time[i]) {
+            first_to_finish = apps_restart_at_time[i];
+        }
+    }
+    return first_to_finish;
+}
+
 static void set_app_prediction_data(
     bool const * is_running,
     int const * last_iter,
     int const * apps_restart_at_iter,
-    double const * apps_restart_at_time) {
+    double const * apps_restart_at_time,
+    double const earliest_app_restart) {
     // Set values for iteration to restart at and iterations to jump for each application
     for (int i = 0; i < my_config.num_apps; i++) {
-        if (!is_running[i]) {
-            continue;
+        switch (arr_app_data[i].type) {
+            case NODE_TYPE_unassigned:
+            break;
+            case NODE_TYPE_background_noise:
+                arr_app_data[i].pred.restart_at = earliest_app_restart;
+            break;
+            case NODE_TYPE_app:
+            if (is_running[i]) {
+                arr_app_data[i].pred.jump_at_iter = last_iter[i] + 1;
+                arr_app_data[i].pred.resume_at_iter = apps_restart_at_iter[i];
+                arr_app_data[i].pred.restart_at = apps_restart_at_time[i];
+            }
+            break;
         }
-        arr_app_data[i].pred.jump_at_iter = last_iter[i] + 1;
-        arr_app_data[i].pred.resume_at_iter = apps_restart_at_iter[i];
-        arr_app_data[i].pred.restart_at = apps_restart_at_time[i];
     }
 }
 
@@ -456,7 +530,8 @@ static struct fast_forward_values director_calls_prepare_fast_forward_jump(void)
     bool worth_switching = compute_restart_params(is_running, avg_iter_time, last_iter, last_iter_time, switch_time, apps_restart_at_time, apps_restart_at_iter);
 
     //   b. Compute last application to restart (this is restarting_at)
-    double last_to_finish = find_latest_restart_time(is_running, apps_restart_at_time);
+    double const last_to_finish = find_latest_restart_time(is_running, apps_restart_at_time);
+    double const first_to_finish = find_earliest_restart_time(is_running, apps_restart_at_time);
 
     //   c. If the number of iterations to skip is zero for any app, force reset of predictor tracking
     if (!worth_switching) {
@@ -467,7 +542,7 @@ static struct fast_forward_values director_calls_prepare_fast_forward_jump(void)
     }
 
     // 3. Set values for iteration to restart at and iterations to jump for each application
-    set_app_prediction_data(is_running, last_iter, apps_restart_at_iter, apps_restart_at_time);
+    set_app_prediction_data(is_running, last_iter, apps_restart_at_iter, apps_restart_at_time, first_to_finish);
     ready_to_skip = true;
 
     return (struct fast_forward_values) {

From b992e4a45abaabea0d16c4d553ab3f7f8024c9e3 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 24 Jun 2025 12:39:57 -0400
Subject: [PATCH 178/188] Making post_init_share_ending_iteration intent
 clearer

---
 .../app-iteration-predictor/average.c         | 57 ++++++++++++++-----
 1 file changed, 44 insertions(+), 13 deletions(-)

diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c
index eb6355bd..1dfae210 100644
--- a/src/surrogate/app-iteration-predictor/average.c
+++ b/src/surrogate/app-iteration-predictor/average.c
@@ -7,7 +7,6 @@
 #include <ross-extern.h>
 
 #define master_printf(...) if (g_tw_mynode == 0) { printf(__VA_ARGS__); }
-#define master_printf_if(val, ...) if (val && g_tw_mynode == 0) { printf(__VA_ARGS__); }
 
 static struct avg_app_config my_config = {0};
 
@@ -212,6 +211,20 @@ static bool model_calls_have_we_hit_switch(tw_lp * lp, int nw_id_in_pe, int iter
     return false;
 }
 
+static inline void find_app_types(enum NODE_TYPE * app_type) {
+    int app_type_here[my_config.num_apps];
+    for (int i = 0; i < my_config.num_apps; i++) {
+        app_type_here[i] = arr_app_data[i].type;
+    }
+    int app_type_int[my_config.num_apps];
+    mpi_allreduce_int_max(app_type_here, app_type_int, my_config.num_apps);
+
+    // Convert back to enums
+    for (int i = 0; i < my_config.num_apps; i++) {
+        app_type[i] = app_type_int[i];
+    }
+}
+
 static inline void post_init_share_ending_iteration(void) {
     // Sharing ending_iteration results across PEs
     int ending_iteration_here[my_config.num_apps];
@@ -221,20 +234,38 @@ static inline void post_init_share_ending_iteration(void) {
     int ending_iteration[my_config.num_apps];
     mpi_allreduce_int_max(ending_iteration_here, ending_iteration, my_config.num_apps);
 
+    enum NODE_TYPE app_type[my_config.num_apps];
+    find_app_types(app_type);
+
     // Checking that total iterations are the same across nodes
     for (int i = 0; i < my_config.num_apps; i++) {
-        struct app_data * app_data = &arr_app_data[i];
-        if (app_data->ending_iteration == INT_MIN) {
-            if (ending_iteration[i] == INT_MIN) {
-                app_data->status = APP_STATUS_completed_everywhere;
-                master_printf_if(app_data->type == NODE_TYPE_unassigned, "Workload/app %d has not been configured to be tracked by iteration predictor (it might be a synthetic workload)\n", i);
-            } else {
-                // The application has "completed" in this PE already!
-                app_data->status = APP_STATUS_just_completed;
-            }
-            app_data->ending_iteration = ending_iteration[i];
-        } else if (ending_iteration[i] != app_data->ending_iteration) {
-            tw_error(TW_LOC, "Two different ranks for application %d (on different PEs) have differing total iterations they will run (%d != %d)", i, ending_iteration[i], app_data->ending_iteration);
+        struct app_data * app_data_here = &arr_app_data[i];
+        switch (app_type[i]) {
+            case NODE_TYPE_unassigned:
+                assert(app_data_here->type == NODE_TYPE_unassigned);
+                master_printf("Workload/app %d has not been configured to be tracked by iteration predictor\n", i);
+                app_data_here->status = APP_STATUS_completed_everywhere;
+            break;
+            case NODE_TYPE_background_noise:
+                if (app_data_here->type == NODE_TYPE_app) {
+                    tw_error(TW_LOC, "Two different ranks for application %d (on different PEs) have signaled conflicting node type (here: application, other: background noise)", i);
+                }
+                // We assume the background noise stays the same forever, thus we can think of it as not running. But if the background noise were to change, we would have to keep it APP_STATUS_running. And, possibly, we would have to call .ended() from the background process
+                app_data_here->status = APP_STATUS_completed_everywhere;
+                app_data_here->type = NODE_TYPE_background_noise;
+            break;
+            case NODE_TYPE_app:
+                if (app_data_here->type == NODE_TYPE_unassigned) {
+                    // There are no nodes for this application on this PE
+                    app_data_here->status = APP_STATUS_just_completed;
+                } else if (app_data_here->type == NODE_TYPE_background_noise) {
+                    tw_error(TW_LOC, "Two different ranks for application %d (on different PEs) have signaled conflicting node type (here: background noise, other: application)", i);
+                } else if (ending_iteration[i] != app_data_here->ending_iteration) {
+                    tw_error(TW_LOC, "Two different ranks for application %d (on different PEs) have differing total iterations they will run (%d != %d)", i, ending_iteration[i], app_data_here->ending_iteration);
+                }
+                app_data_here->ending_iteration = ending_iteration[i];
+                app_data_here->type = NODE_TYPE_app;
+            break;
         }
     }
 }

From 82a69f81429908a2bc158bdd20e5546cfd102427 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 24 Jun 2025 18:39:37 -0400
Subject: [PATCH 179/188] Fixed cross-platform fscanf EOF handling

Replaced fscanf loop with fgets/sscanf to handle trailing newlines
consistently across systems (this bug was silently showing up in the
GHC200 system). Also added error reporting for malformed
lines.

btw, this code was written by Claude and audited by me ;)
---
 src/network-workloads/model-net-mpi-replay.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 539b8299..27c0ba87 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -4094,13 +4094,15 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
             tw_error(TW_LOC, "\n Could not open file %s ", workloads_conf_file);
 
         int i = 0;
-        char ref = '\n';
-        while(!feof(name_file))
+        char line[1024];
+        while(fgets(line, sizeof(line), name_file))
         {
-            //TODO: can we allow for a 2 item line but with defaults for the last two?
-            ref = fscanf(name_file, "%d %s %d %f", &num_traces_of_job[i], file_name_of_job[i], &qos_level_of_job[i], &mean_interval_of_job[i]);
+            int const fields = sscanf(line, "%d %s %d %f", &num_traces_of_job[i], file_name_of_job[i], &qos_level_of_job[i], &mean_interval_of_job[i]);
+            if(fields != 4) {
+                tw_error(TW_LOC, "Invalid format in %s at line %d: expected 4 fields, got %d", workloads_conf_file, i+1, fields);
+            }
             
-            if(ref != EOF && strncmp(file_name_of_job[i], "synthetic", 9) == 0)
+            if(strncmp(file_name_of_job[i], "synthetic", 9) == 0)
             {
               num_syn_clients = num_traces_of_job[i];
               num_net_traces += num_traces_of_job[i];
@@ -4112,7 +4114,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
 		tw_error(TW_LOC, "BISECTION requires and even number of nodes.");
 
             }
-            else if(ref!=EOF)
+            else
             {
                 if(enable_debug)
                     printf("\n%d traces of app %s (default qos class: %d)\n", num_traces_of_job[i], file_name_of_job[i], qos_level_of_job[i]);

From 3295653de3aa95faeb20cd4011290d4dbb832eb7 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 25 Jun 2025 20:32:11 -0400
Subject: [PATCH 180/188] Fixing some errors found with valgrind

---
 src/networks/model-net/dragonfly-dally.C        |  8 +++++---
 src/surrogate/app-iteration-predictor/average.c | 16 ++++++++++++++--
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C
index 5465605a..91befa1b 100644
--- a/src/networks/model-net/dragonfly-dally.C
+++ b/src/networks/model-net/dragonfly-dally.C
@@ -4647,9 +4647,10 @@ static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_dally_messag
 {
     int num_qos_levels = s->params->num_qos_levels;
 
+    assert(msg->rail_id < s->params->num_rails);
     if(msg->qos_reset1)
         s->qos_status[msg->rail_id][0] = Q_ACTIVE;
-    if(msg->qos_reset2)
+    if(msg->qos_reset2 && s->params->num_qos_levels > 1)
         s->qos_status[msg->rail_id][1] = Q_ACTIVE;
     
     if(msg->last_saved_qos >= 0)
@@ -6435,9 +6436,10 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m
     int src_term_id = msg->dfdally_src_terminal_id;
     int app_id = msg->saved_app_id;
       
+    assert(output_port < s->params->radix);
     if(msg->qos_reset1)
         s->qos_status[output_port][0] = Q_ACTIVE;
-    if(msg->qos_reset2)
+    if(msg->qos_reset2 && s->params->num_qos_levels > 1)
         s->qos_status[output_port][1] = Q_ACTIVE;
     
     if(msg->last_saved_qos)
@@ -8682,7 +8684,7 @@ static Connection dfdally_prog_adaptive_routing(router_state *s, tw_bf *bf, term
     vector< Connection > poss_nonmin_next_stops = get_legal_nonminimal_stops(s, bf, msg, lp, fdest_router_id);
 
     Connection best_min_conn, best_nonmin_conn;
-    ConnectionType conn_type_of_mins, conn_type_of_nonmins;
+    ConnectionType conn_type_of_mins = CONN_LOCAL, conn_type_of_nonmins = CONN_LOCAL;
 
     if (poss_min_next_stops.size() > 0)
     {
diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c
index 1dfae210..b529be7f 100644
--- a/src/surrogate/app-iteration-predictor/average.c
+++ b/src/surrogate/app-iteration-predictor/average.c
@@ -169,6 +169,9 @@ static void reset_with(bool const * app_just_ended) {
 
     for (int i=0; i < my_config.num_nodes_in_pe; i++) {
         struct node_data * node_data = &arr_node_data[i];
+        if (node_data->app_id == -1) {
+            continue;
+        }
         node_data->acc_iters = 0;
         node_data->acc_iteration_time = 0;
         if (node_data->last_iter < arr_app_data[node_data->app_id].pred.resume_at_iter) {
@@ -349,6 +352,9 @@ static void find_avg_iteration_time(double * save_avg_time) {
     for (int i=0; i < my_config.num_nodes_in_pe; i++) {
         struct node_data * node_data = &arr_node_data[i];
         int const app_id = node_data->app_id;
+        if (app_id == -1) {
+            continue;
+        }
         acc_iter_time_here[app_id] += node_data->acc_iteration_time;
         acc_iters_here[app_id] += node_data->acc_iters;
     }
@@ -407,6 +413,9 @@ static void find_max_iter_per_app(int * save_last_iter) {
     for (int i=0; i < my_config.num_nodes_in_pe; i++) {
         struct node_data * node_data = &arr_node_data[i];
         int const app_id = node_data->app_id;
+        if (app_id == -1) {
+            continue;
+        }
         if (last_iter_here[app_id] < node_data->last_iter) {
             last_iter_here[app_id] = node_data->last_iter;
         }
@@ -422,6 +431,9 @@ static void find_avg_time_for_max_iter(double * save_last_iter_time, int const *
     for (int i=0; i < my_config.num_nodes_in_pe; i++) {
         struct node_data * node_data = &arr_node_data[i];
         int const app_id = node_data->app_id;
+        if (app_id == -1) {
+            continue;
+        }
         if (node_data->last_iter == last_iter[app_id]) {
             acc_last_iter_time[app_id] += node_data->prev_iteration_time;
             acc_iters_here[app_id]++;
@@ -482,8 +494,8 @@ static bool compute_restart_params(
         apps_restart_at_time[i] = last_iter_time[i] + iters_to_skip * avg_iter_time[i];
         apps_restart_at_iter[i] = last_iter[i] + iters_to_skip;
 
-        // if we are not skipping at least two iterations, there is no point in trying to fastforward
-        if (iters_to_skip <= 2) {
+        // if we are not skipping at least one iteration, there is no point in trying to fastforward
+        if (iters_to_skip <= 1) {
             worth_switching = false;
         }
     }

From 73cdbd54237addce142718e62ebba08d412301fc Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 26 Jun 2025 17:27:59 -0400
Subject: [PATCH 181/188] Updating CODES-compile-instructions.sh

---
 CODES-compile-instructions.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/CODES-compile-instructions.sh b/CODES-compile-instructions.sh
index ac15c087..694f0c95 100644
--- a/CODES-compile-instructions.sh
+++ b/CODES-compile-instructions.sh
@@ -21,13 +21,13 @@ CUR_DIR="$PWD"
 
 ##### Downloading everything #####
 
-git clone https://github.com/codes-org/codes --branch=kronos-develop
-git clone https://github.com/ross-org/ross --depth=20 --branch=at_gvt_arbitrary_function
+git clone https://github.com/codes-org/codes --branch=director-app-automatic
+git clone https://github.com/ross-org/ross --depth=100 --branch=gvt-hook-util
 
 if [ $swm_enable = 1 ]; then
     git clone https://github.com/pmodels/argobots --depth=1
     # This version is one commit ahead
-    git clone https://github.com/helq/swm-workloads --depth=1 --branch=fix-global-variable-rem
+    git clone https://github.com/helq/swm-workloads --branch=total-iterations-communication
 fi
 
 if [ $union_enable = 1 ]; then
@@ -35,7 +35,7 @@ if [ $union_enable = 1 ]; then
     curl -L https://sourceforge.net/projects/conceptual/files/conceptual/1.5.1b/conceptual-1.5.1b.tar.gz -o conceptual-1.5.1b.tar.gz
     tar xvf conceptual-1.5.1b.tar.gz
     # Downloading union
-    git clone https://github.com/SPEAR-UIC/Union
+    git clone https://github.com/helq/Union --branch=total-iterations-communication
 fi
 
 ##### COMPILING #####

From 667dc2847306a327ec7280715ece78250e620b7e Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Thu, 26 Jun 2025 20:09:11 -0400
Subject: [PATCH 182/188] Saving to file when an iteration has been skipped by
 the surrogate

---
 src/network-workloads/model-net-mpi-replay.c | 21 ++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 27c0ba87..58a27098 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -427,7 +427,7 @@ struct nw_message
        int resume_at_iter;
    } fwd;
 
-   // A different struct for each type of MPI_NW_EVENTS
+   // A different struct for each type of MPI_NW_EVENTS (it can be used for the commit or the reverse handler)
    union {
        // For CLI_BCKGND_GEN
        struct {
@@ -472,6 +472,7 @@ struct nw_message
                // CODES_WK_END and CODES_WK_MARK
                struct {
                    double saved_marker_time;
+                   bool was_skipped;
                } mark;
            };
        } mpi_next;
@@ -489,6 +490,11 @@ struct nw_message
        struct {
            int64_t saved_num_bytes;
        } mpi_ack;
+
+       // For SURR_SKIP_ITERATION
+       struct {
+           double saved_marker_time;
+       } surr_skip;
    } rc;
 };
 
@@ -1216,6 +1222,7 @@ static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message *
 {
     struct codes_workload_op mpi_op;
     int resume_at_iter = m->fwd.resume_at_iter;
+    m->rc.surr_skip.saved_marker_time = tw_now(lp);
 
     // consuming all events until indicated iteration is reached
     bool reached_end = false;
@@ -1242,6 +1249,7 @@ static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message *
     tw_event *e = tw_event_new(lp->gid, 0.0, lp);
     nw_message* msg = (nw_message*) tw_event_data(e);
     msg->msg_type = MPI_OP_GET_NEXT;
+    msg->rc.mpi_next.mark.was_skipped = true;
     tw_event_send(e);
 }
 
@@ -1761,6 +1769,7 @@ static void codes_issue_next_event(tw_lp* lp)
    msg = (nw_message*)tw_event_data(e);
 
    msg->msg_type = MPI_OP_GET_NEXT;
+   msg->rc.mpi_next.mark.was_skipped = false;
    tw_event_send(e);
 }
 
@@ -1799,6 +1808,7 @@ static void codes_exec_comp_delay(
 	e = tw_event_new( lp->gid, ts , lp );
 	msg = (nw_message*)tw_event_data(e);
 	msg->msg_type = MPI_OP_GET_NEXT;
+    msg->rc.mpi_next.mark.was_skipped = false;
 	tw_event_send(e);
 
 }
@@ -3346,9 +3356,11 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp
                     break;
 
                 case CODES_WK_MARK:
-                    fprintf(iteration_log, "ITERATION %d node %llu job %d rank %d time %lf\n", m->mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, m->rc.mpi_next.mark.saved_marker_time);
-                    if (iter_predictor) {
-                        iter_predictor->model.feed(lp, s->nw_id_in_pe, m->mpi_op->u.send.tag, m->rc.mpi_next.mark.saved_marker_time);
+                    if (! m->rc.mpi_next.mark.was_skipped) {
+                        fprintf(iteration_log, "ITERATION %d node %llu job %d rank %d time %lf\n", m->mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, m->rc.mpi_next.mark.saved_marker_time);
+                        if (iter_predictor) {
+                            iter_predictor->model.feed(lp, s->nw_id_in_pe, m->mpi_op->u.send.tag, m->rc.mpi_next.mark.saved_marker_time);
+                        }
                     }
 
                     if (OUTPUT_MARKS)
@@ -3374,6 +3386,7 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp
             free(m->mpi_op);
         break;
         case SURR_SKIP_ITERATION:
+            fprintf(iteration_log, "SKIPPED TO ITERATION %d node %llu job %d rank %d time %lf\n", m->fwd.resume_at_iter, LLU(s->nw_id), s->app_id, s->local_rank, m->rc.surr_skip.saved_marker_time);
             break;
 
         case CLI_BCKGND_CHANGE:

From 789c4693170dd2aa648210dcd39c0e8e24e8a78f Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sun, 29 Jun 2025 18:37:16 -0400
Subject: [PATCH 183/188] Updating compilation instructions

---
 CODES-compile-instructions.sh |  4 ++--
 README.md                     | 42 +++++++++++++++++------------------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/CODES-compile-instructions.sh b/CODES-compile-instructions.sh
index 694f0c95..0856d597 100644
--- a/CODES-compile-instructions.sh
+++ b/CODES-compile-instructions.sh
@@ -21,8 +21,8 @@ CUR_DIR="$PWD"
 
 ##### Downloading everything #####
 
-git clone https://github.com/codes-org/codes --branch=director-app-automatic
-git clone https://github.com/ross-org/ross --depth=100 --branch=gvt-hook-util
+git clone https://github.com/codes-org/codes --branch=develop
+git clone https://github.com/ross-org/ross --depth=100 --branch=develop
 
 if [ $swm_enable = 1 ]; then
     git clone https://github.com/pmodels/argobots --depth=1
diff --git a/README.md b/README.md
index a86424be..7740a222 100644
--- a/README.md
+++ b/README.md
@@ -21,27 +21,6 @@ The easiest way to build CODES is using our automated compilation script that ha
 
 The script will create a new directory with all dependencies and CODES compiled and ready to use.
 
-## Features
-
-CODES provides comprehensive simulation capabilities for:
-
-### Network Topologies
-- **Dragonfly**: High-radix interconnect with adaptive routing (most up to date)
-- **Torus**: Multi-dimensional torus networks
-- **Fat-tree**: Hierarchical tree topologies
-- **Express Mesh**: Enhanced mesh networks
-- **Simple P2P**: Point-to-point networks
-
-### Workload Generation
-- **SWM and UNION**: Workload generation
-- **MPI trace replay**: Support for DUMPI traces
-- **Synthetic patterns**: Uniform random, nearest neighbor, and custom patterns
-
-### Multi-fidelity Simulation
-- **Network surrogate models**: Switch between high-fidelity and surrogate modes
-- **Application surrogate models**: Accelerate application-level simulation
-- **Adaptive directors**: Intelligent switching between simulation modes
-
 ## Prerequisites
 
 - **MPI**: OpenMPI or MPICH for parallel execution
@@ -127,6 +106,27 @@ bash run-experiment.sh path-to-experiment/script.sh
 
 A folder will be created under `path-to-experiment/results` containing the result of running the experiment.
 
+## Features
+
+CODES provides comprehensive simulation capabilities for:
+
+### Network Topologies
+- **Dragonfly**: High-radix interconnect with adaptive routing (most up to date)
+- **Torus**: Multi-dimensional torus networks
+- **Fat-tree**: Hierarchical tree topologies
+- **Express Mesh**: Enhanced mesh networks
+- **Simple P2P**: Point-to-point networks
+
+### Workload Generation
+- **SWM and UNION**: Workload generation
+- **MPI trace replay**: Support for DUMPI traces
+- **Synthetic patterns**: Uniform random, nearest neighbor, and custom patterns
+
+### Multi-fidelity Simulation
+- **Network surrogate models**: Switch between high-fidelity and surrogate modes
+- **Application surrogate models**: Accelerate application-level simulation
+- **Adaptive directors**: Intelligent switching between simulation modes
+
 ## Contributing
 
 Before contributing please run the full test suite. Some tests verify our determinism guarantees (every simulation should be reproducible), i.e, the number of net events processed between two runs in parallel mode should be the same. We want to keep our determinism guarantees forever. Non-deterministic simulations are often the result of faulty reverse handlers, which have caused serious bug failures and hundreds of hours of debugging.

From 45453ad0766070ced950e3c4fa634c708b8880c2 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Sun, 29 Jun 2025 18:38:05 -0400
Subject: [PATCH 184/188] Max iteration per app should be computed across all
 MPI ranks

---
 src/network-workloads/model-net-mpi-replay.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c
index 58a27098..018c4337 100644
--- a/src/network-workloads/model-net-mpi-replay.c
+++ b/src/network-workloads/model-net-mpi-replay.c
@@ -4387,6 +4387,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
     double total_avg_send_time, total_max_send_time;
      double total_avg_wait_time, total_max_wait_time;
      double total_avg_recv_time, total_max_recv_time;
+     double g_max_elapsed_time_per_job[MAX_JOBS];
      double g_total_syn_data = 0;
 
     MPI_Reduce(&num_bytes_sent, &total_bytes_sent, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES);
@@ -4403,6 +4404,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
    MPI_Reduce(&avg_wait_time, &total_avg_wait_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES);
    MPI_Reduce(&avg_send_time, &total_avg_send_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES);
    MPI_Reduce(&total_syn_data, &g_total_syn_data, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES);  
+   MPI_Reduce(max_elapsed_time_per_job, g_max_elapsed_time_per_job, num_total_jobs, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_CODES);
 
    assert(num_net_traces);
 
@@ -4421,19 +4423,20 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
     printf("Per App Max Elapsed Times:\n");
     for(int i = 0; i < num_total_jobs; i++)
     {
-        printf("\tApp %d: %.4f\n",i,max_elapsed_time_per_job[i]);
+        printf("\tApp %d: %.4f\n",i,g_max_elapsed_time_per_job[i]);
     }
     printf("----------\n");
 
     if(synthetic_pattern == PERMUTATION)
         printf("\n Threshold for random permutation %ld ", perm_switch_thresh);
+
+    if(is_synthetic)
+        printf("\n Synthetic traffic stats: data received per proc %lf bytes \n", g_total_syn_data/num_syn_clients);
    }
     if (do_lp_io){
         int ret = lp_io_flush(io_handle, MPI_COMM_CODES);
         assert(ret == 0 || !"lp_io_flush failure");
     }
-    if(is_synthetic)
-        printf("\n PE%d: Synthetic traffic stats: data received per proc %lf bytes \n",rank, g_total_syn_data/num_syn_clients);
 
    model_net_report_stats(net_id);
    

From 242707e42e32d52ca9f5cc48c2558d61f2752948 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 15 Jul 2025 15:58:43 -0400
Subject: [PATCH 185/188] Updating compilation instructions

---
 CODES-compile-instructions.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CODES-compile-instructions.sh b/CODES-compile-instructions.sh
index 0856d597..76d4c6a1 100644
--- a/CODES-compile-instructions.sh
+++ b/CODES-compile-instructions.sh
@@ -35,7 +35,7 @@ if [ $union_enable = 1 ]; then
     curl -L https://sourceforge.net/projects/conceptual/files/conceptual/1.5.1b/conceptual-1.5.1b.tar.gz -o conceptual-1.5.1b.tar.gz
     tar xvf conceptual-1.5.1b.tar.gz
     # Downloading union
-    git clone https://github.com/helq/Union --branch=total-iterations-communication
+    git clone https://github.com/helq/Union --branch=master
 fi
 
 ##### COMPILING #####
@@ -85,7 +85,7 @@ if [ $union_enable = 1 ]; then
 
     pushd Union
     ./prepare.sh
-    ./configure --disable-shared --with-conceptual="$(realpath ../conceptual-1.5.1b/install)" --prefix="$(realpath ./install)" CC=mpicc CXX=mpicxx
+    PYTHON=python2 ./configure --disable-shared --with-conceptual="$(realpath ../conceptual-1.5.1b/install)" --with-conceptual-src="$(realpath ../conceptual-1.5.1b)" --prefix="$(realpath ./install)" CC=mpicc CXX=mpicxx
     make -j4 && make install
     err=$?
     [[ $err -ne 0 ]] && exit $err

From 34275e3a947e6862c28313ed5ae73961f4d1a56f Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 22 Jul 2025 12:04:11 -0400
Subject: [PATCH 186/188] Removing support for Autoconf

Autoconf is now far too outdated and keeping it on synch with the
changes made in the CMakefile
---
 LICENSE.md                     |   22 -
 Make.rules                     |   40 -
 Makefile.am                    |  103 -
 configure.ac                   |  262 --
 m4/ax_check_compile_flag.m4    |   75 -
 m4/ax_compare_version.m4       |  177 -
 m4/ax_cxx_compile_stdcxx.m4    |  972 ----
 m4/ax_prog_bison.m4            |   68 -
 m4/ax_prog_bison_clfeatures.m4 |  137 -
 m4/ax_prog_flex.m4             |   62 -
 m4/libtool.m4                  | 7986 --------------------------------
 m4/ltoptions.m4                |  384 --
 m4/ltsugar.m4                  |  123 -
 m4/ltversion.m4                |   23 -
 m4/lt~obsolete.m4              |   98 -
 m4/m4_ax_boost_base.m4         |  301 --
 m4/m4_ax_boost_filesystem.m4   |  118 -
 m4/m4_ax_boost_system.m4       |  121 -
 m4/pkg.m4                      |  233 -
 maint/codes-net.pc.in          |   12 -
 maint/codes.pc.in              |   32 -
 prepare.sh                     |    4 -
 22 files changed, 11353 deletions(-)
 delete mode 100644 LICENSE.md
 delete mode 100644 Make.rules
 delete mode 100644 Makefile.am
 delete mode 100755 configure.ac
 delete mode 100644 m4/ax_check_compile_flag.m4
 delete mode 100644 m4/ax_compare_version.m4
 delete mode 100644 m4/ax_cxx_compile_stdcxx.m4
 delete mode 100755 m4/ax_prog_bison.m4
 delete mode 100755 m4/ax_prog_bison_clfeatures.m4
 delete mode 100755 m4/ax_prog_flex.m4
 delete mode 100644 m4/libtool.m4
 delete mode 100644 m4/ltoptions.m4
 delete mode 100644 m4/ltsugar.m4
 delete mode 100644 m4/ltversion.m4
 delete mode 100644 m4/lt~obsolete.m4
 delete mode 100644 m4/m4_ax_boost_base.m4
 delete mode 100644 m4/m4_ax_boost_filesystem.m4
 delete mode 100644 m4/m4_ax_boost_system.m4
 delete mode 100644 m4/pkg.m4
 delete mode 100644 maint/codes-net.pc.in
 delete mode 100644 maint/codes.pc.in
 delete mode 100755 prepare.sh

diff --git a/LICENSE.md b/LICENSE.md
deleted file mode 100644
index a6de0500..00000000
--- a/LICENSE.md
+++ /dev/null
@@ -1,22 +0,0 @@
-************** Copyright © 2019, UChicago Argonne, LLC ***************
-
-All Rights Reserved
-
-Software Name: CO-Design of Exascale Storage and Network Architectures (CODES)
-
-By: Argonne National Laboratory, Rensselaer Polytechnic Institute, Lawrence Livermore National Laboratory, and Illinois Institute of Technology
-
-OPEN SOURCE LICENSE
-
-Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
-3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
-
-
-******************************************************************************************************
-DISCLAIMER
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-***************************************************************************************************
diff --git a/Make.rules b/Make.rules
deleted file mode 100644
index aedcde1e..00000000
--- a/Make.rules
+++ /dev/null
@@ -1,40 +0,0 @@
-# flex & bison deps
-#
-%.c %.h: %.l
-	$(AM_V_GEN)$(LEX) --header-file=$(@:.c=.h) -o $(@:.h=.c) $< \
-	   || ( $(RM) $(basename $@).h $(basename $@).c ; exit 1) 
-
-# 
-# specific rule for codesparser generation; we want the header to land in
-# the codes/ directory because it will be installed for use by other repos
-#src/iokernellang/codesparser.c codes/codesparser.h: src/iokernellang/codesparser.y
-#	mkdir -p codes
-#	@test "x$(bison_ok)" != "yes" || echo "*** WARNING *** Bison version might be too old"
-#	$(AM_V_GEN)$(YACC) --defines=codes/codesparser.h -o src/iokernellang/codesparser.c  $< \
-#	   || ( $(RM) $(basename $@).h $(basename $@).c ; exit 1)
-
-
-%.c %.h: %.y
-	@test "x$(bison_ok)" != "yes" || echo "*** WARNING *** Bison version might be too old"
-	$(AM_V_GEN)$(YACC) --defines=$(@:.c=.h) -o $(@:.h=.c)  $< \
-	   || ( $(RM) $(basename $@).h $(basename $@).c ; exit 1)
-
-
-
-# %.y: %.y.in Makefile
-#	$(AM_V_GEN)$(SED) -e 's,[@]CODES_PURE_PARSER_DEFINES[@],$(CODES_PURE_PARSER_DEFINES),g' \
-#	-e 's,[@]CODES_PUSH_PARSER_DEFINES[@],$(CODES_PUSH_PARSER_DEFINES),g' \
-#	< src/common/iokernellang/codesparser.y.in > src/common/iokernellang/codesparser.y
-
-#
-# Output dist version
-#
-.phony: distversion
-distversion:
-	@echo $(VERSION)
-
-#
-# Easy way to build unit tests without running them
-# 
-.phony: tests
-tests: $(check_PROGRAMS)
diff --git a/Makefile.am b/Makefile.am
deleted file mode 100644
index f18d8d63..00000000
--- a/Makefile.am
+++ /dev/null
@@ -1,103 +0,0 @@
-AUTOMAKE_OPTIONS = foreign
-ACLOCAL_AMFLAGS = -I m4
-
-bin_PROGRAMS =
-bin_SCRIPTS =
-noinst_LIBRARIES =
-noinst_PROGRAMS =
-lib_LTLIBRARIES =
-noinst_HEADERS =
-TESTS =
-check_PROGRAMS =
-EXTRA_PROGRAMS =
-CLEANFILES = $(bin_SCRIPTS)
-EXTRA_DIST =
-BUILT_SOURCES =
-AM_LDFLAGS =
-
-
-# pkgconfig files
-pkgconfigdir = $(libdir)/pkgconfig
-pkgconfig_DATA = maint/codes.pc
-$(pkgconfig_DATA): config.status
-
-EXTRA_DIST += \
-  prepare.sh LICENSE.md configure.ac uc-codes.cfg reformat.sh \
-  misc/README misc/ptrn_loggp-2.4.6.patch CONTRIBUTORS.md \
-  README.md
-
-AM_CPPFLAGS = -I$(top_srcdir)/src ${ROSS_CFLAGS} 
-
-AM_CFLAGS =
-
-AM_CXXFLAGS = $(AM_CFLAGS)
-
-LDADD = $(lib_LTLIBRARIES) $(ROSS_LIBS)
-
-include Make.rules
-
-include $(top_srcdir)/scripts/Makefile.subdir
-include $(top_srcdir)/src/Makefile.subdir
-include $(top_srcdir)/tests/Makefile.subdir
-include $(top_srcdir)/doc/Makefile.subdir
-
-if USE_DEBUG
-AM_CPPFLAGS += -g
-AM_CFLAGS   += -g
-AM_CXXFLAGS += -g
-endif
-
-if USE_DARSHAN
-AM_CPPFLAGS += ${DARSHAN_CFLAGS} -DUSE_DARSHAN=1
-src_libcodes_la_SOURCES += src/workload/methods/codes-darshan3-io-wrkld.c
-LDADD += ${DARSHAN_LIBS}
-TESTS += tests/workload/darshan-dump.sh
-endif
-
-if USE_RECORDER
-AM_CPPFLAGS += ${RECORDER_CPPFLAGS}
-src_libcodes_la_SOURCES += src/workload/methods/codes-recorder-io-wrkld.c
-endif
-
-if USE_ONLINE
-AM_CPPFLAGS += ${ARGOBOTS_CFLAGS} -DUSE_ONLINE=1
-LDADD += ${ARGOBOTS_LIBS}
-if USE_SWM
-AM_CPPFLAGS +=  ${SWM_CFLAGS} -DUSE_SWM=1
-LDADD += ${SWM_LIBS}
-src_libcodes_la_SOURCES += src/workload/methods/codes-online-comm-wrkld.C
-endif
-if USE_UNION
-src_libcodes_la_SOURCES += src/workload/methods/codes-conc-online-comm-wrkld.C
-AM_CPPFLAGS += ${UNION_CFLAGS} ${SWM_CFLAGS} -DUSE_UNION=1
-LDADD += ${UNION_LIBS} ${SWM_LIBS}
-endif
-endif
-
-if USE_DUMPI
-AM_CPPFLAGS += ${DUMPI_CFLAGS} -DUSE_DUMPI=1
-src_libcodes_la_SOURCES += src/workload/methods/codes-dumpi-trace-nw-wrkld.c
-TESTS += tests/modelnet-test-dragonfly-traces.sh \
-		 tests/modelnet-test-dragonfly-custom-traces.sh \
-		 tests/modelnet-test-slimfly-traces.sh	\
-		 tests/modelnet-test-torus-traces.sh
-check_PROGRAMS += src/network-workloads/model-net-mpi-replay
-if USE_CORTEX
-if USE_PYTHON
-if USE_CORTEX_PYTHON
-AM_CPPFLAGS += ${CORTEX_PYTHON_CFLAGS} -DENABLE_CORTEX_PYTHON=1
-LDADD += ${CORTEX_PYTHON_LIBS}
-AM_CPPFLAGS += ${PYTHON_CFLAGS}
-LDADD += ${PYTHON_LIBS}
-endif
-endif
-AM_CPPFLAGS += ${CORTEX_CFLAGS} -DENABLE_CORTEX=1
-LDADD += ${CORTEX_LIBS}
-endif
-LDADD += ${DUMPI_LIBS}
-endif
-
-if USE_RDAMARIS
-AM_CPPFLAGS += ${ROSS_Damaris_CFLAGS} -DUSE_RDAMARIS=1
-LDADD += ${ROSS_Damaris_LIBS}
-endif
diff --git a/configure.ac b/configure.ac
deleted file mode 100755
index 2c4b7fea..00000000
--- a/configure.ac
+++ /dev/null
@@ -1,262 +0,0 @@
-#                                               -*- Autoconf -*-
-# Process this file with autoconf to produce a configure script.
-
-AC_PREREQ([2.67])
-AC_INIT([codes], [1.4.2], [http://trac.mcs.anl.gov/projects/codes/newticket],[],[http://www.mcs.anl.gov/projects/codes/])
-LT_INIT
-
-AC_CANONICAL_TARGET
-AC_CANONICAL_SYSTEM
-AC_CANONICAL_HOST
-
-AM_INIT_AUTOMAKE([foreign subdir-objects -Wall])
-
-m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
-
-AC_CONFIG_SRCDIR([doc/BUILD_STEPS])
-AC_CONFIG_HEADERS([codes_config.h])
-
-AX_PROG_BISON([],[AC_MSG_ERROR([could not find required package bison])])
-AX_PROG_FLEX([],[AC_MSG_ERROR([could not find required package flex])])
-AC_SUBST([BISON])
-AC_SUBST([FLEX])
-
-# Checks for programs.
-AC_PROG_CC
-AM_PROG_CC_C_O
-AC_PROG_CXX
-AC_PROG_CXXCPP
-AC_PROG_RANLIB
-
-PKG_PROG_PKG_CONFIG
-
-if test -z "$PKG_CONFIG" ; then
-    AC_MSG_ERROR([pkg-config is required. Please install the pkg-config program on your PATH or set the PKG_CONFIG environment variable to the appropriate package.])
-fi
-
-# Check for C99
-AC_PROG_CC_C99
-
-AC_REQUIRE_CPP
-
-# Checks for header files.
-AC_HEADER_STDC
-AC_CHECK_HEADERS([stdlib.h string.h unistd.h execinfo.h pthread.h malloc.h])
-
-# Checks for typedefs, structures, and compiler characteristics.
-AC_C_CONST
-AC_C_INLINE
-AC_TYPE_INT8_T
-AC_TYPE_INT16_T
-AC_TYPE_INT32_T
-AC_TYPE_INT64_T
-AC_TYPE_UINT8_T
-AC_TYPE_UINT16_T
-AC_TYPE_UINT32_T
-AC_TYPE_UINT64_T
-AC_TYPE_SIZE_T
-AC_TYPE_SSIZE_T
-
-# Add warning flags by default
-AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS -Wall"])
-AX_CHECK_COMPILE_FLAG([-Wextra], [CFLAGS="$CFLAGS -Wextra"])
-AX_CHECK_COMPILE_FLAG([-Wshadow], [CFLAGS="$CFLAGS -Wshadow"])
-
-# Checks for library functions.
-AC_CHECK_FUNCS([memset])
-AC_CHECK_LIB([pthread],[pthread_create],,[AC_MSG_ERROR([Could not find pthread_create!])])
-AC_CHECK_LIB([m],[sqrt],,[AC_MSG_ERROR([Could not find sqrt!])])
-
-
-AX_PROG_BISON_CLFEATURES([],[AC_MSG_WARN([Could not find bison])],
-[bison_ok="yes"], [bison_ok="no"])
-AC_SUBST([bison_ok])
-
-dnl Check to see if CC is an MPI compiler
-AC_MSG_CHECKING(whether the mpicc compiler works)
-AC_TRY_COMPILE([#include <mpi.h>], [int ret = MPI_Init(0, (void*)0)],
-    AC_MSG_RESULT(yes),
-    AC_MSG_RESULT(no)
-    AC_MSG_ERROR(CC doesnt appear to be a valid MPI compiler.  See INSTALL document or try adding CC=mpicc to your configure command line.)
-)
-
-# check for ROSS
-PKG_CHECK_MODULES_STATIC([ROSS], [ross], [],
-                  [AC_MSG_ERROR([Could not find working ross installation via pkg-config])])
-
-#check for Damaris
-AC_ARG_WITH([damaris],[AS_HELP_STRING([--with-damaris],
-                        [build with ROSS-Damaris in situ analysis support])],
-                        [use_rdamaris=yes],[use_rdamaris=no])
-if test "x${use_rdamaris}" = xyes ; then
-    PKG_CHECK_MODULES_STATIC([ROSS_Damaris], [ross-damaris], [],
-                             [AC_MSG_ERROR([Could not find working ROSS-Damaris installation via pkg-config])])
-fi
-AM_CONDITIONAL(USE_RDAMARIS, [test "x${use_rdamaris}" = xyes])
-
-# check for enable-g
-AC_ARG_ENABLE([g],[AS_HELP_STRING([--enable-g],
-			[Build with GDB symbols])],
-		[use_debug=yes],[use_debug=no])
-AM_CONDITIONAL(USE_DEBUG, [test "x${use_debug}" = xyes])
-
-# check for Darshan
-AC_ARG_WITH([darshan],[AS_HELP_STRING([--with-darshan],
-                        [Build with the darshan workload support])],
-                      [use_darshan=yes],[use_darshan=no])
-if test "x${use_darshan}" = xyes ; then
-    PKG_CHECK_MODULES_STATIC([DARSHAN], [darshan-util], [],
-                      [AC_MSG_ERROR([Could not find working darshan installation via pkg-config])])
-    DARSHAN_VER=`pkg-config --modversion darshan-util`
-    AX_COMPARE_VERSION([$DARSHAN_VER],[ge],[2.3],[],
-        [AC_MSG_ERROR([Found Darshan $DARSHAN_VER but 2.3 or greater is needed])])
-fi
-AM_CONDITIONAL(USE_DARSHAN, [test "x${use_darshan}" = xyes])
-
-# check for Argobots
-AC_ARG_WITH([online],[AS_HELP_STRING([--with-online@<:@=DIR@:>@],
-                        [Build with the online workloads and argobots support])])
-if test "x${with_online}" != "x" ; then
-    AM_CONDITIONAL(USE_ONLINE, true)
-    AX_BOOST_BASE([1.66])
-    AX_CXX_COMPILE_STDCXX(11, noext, mandatory)
-    PKG_CHECK_MODULES_STATIC([ARGOBOTS], [argobots], [],
-                      [AC_MSG_ERROR([Could not find working argobots installation via pkg-config])])
-else
-    AM_CONDITIONAL(USE_ONLINE, false)
-fi
-
-#check for SWM
-AC_ARG_WITH([swm],[AS_HELP_STRING([--with-swm@<:@=DIR@:>@],
-                        [location of SWM installation])])
-if test "x${with_swm}" != "x" ; then
-    AM_CONDITIONAL(USE_SWM, true)
-    PKG_CHECK_MODULES_STATIC([SWM], [swm], [],
-                      [AC_MSG_ERROR([Could not find working swm installation via pkg-config])])
-    PKG_CHECK_VAR([SWM_DATAROOTDIR], [swm], [datarootdir], [],
-              [AC_MSG_ERROR[Could not find shared directory in SWM]])
-    AC_DEFINE_UNQUOTED([SWM_DATAROOTDIR], ["$SWM_DATAROOTDIR"], [if using json
-                    data files])
-else
-  AM_CONDITIONAL(USE_SWM, false)
-fi
-
-#check for UNION 
-AC_ARG_WITH([union],[AS_HELP_STRING([--with-union@<:@=DIR@:>@],
-                        [location of Union installation])])
-if test "x${with_union}" != "x" ; then
-    AM_CONDITIONAL(USE_UNION, true)
-    PKG_CHECK_MODULES_STATIC([UNION], [union], [],
-                      [AC_MSG_ERROR([Could not find working Union installation via pkg-config])])
-    PKG_CHECK_VAR([UNION_DATADIR], [union], [datarootdir], [],
-              [AC_MSG_ERROR[Could not find shared directory in UNION]])
-    AC_DEFINE_UNQUOTED([UNION_DATADIR], ["$UNION_DATADIR"], [if using json data files])
-    PKG_CHECK_MODULES_STATIC([SWM], [swm], [],
-                      [AC_MSG_ERROR([Could not find working swm installation via pkg-config])])
-    PKG_CHECK_VAR([SWM_DATAROOTDIR], [swm], [datarootdir], [],
-              [AC_MSG_ERROR[Could not find shared directory in SWM]])
-    AC_DEFINE_UNQUOTED([SWM_DATAROOTDIR], ["$SWM_DATAROOTDIR"], [if using json
-                    data files])
-else
-    AM_CONDITIONAL(USE_UNION, false)
-fi
-
-
-# check for Recorder
-AM_CONDITIONAL(USE_RECORDER, true)
-RECORDER_CPPFLAGS="-DUSE_RECORDER=1"
-AC_SUBST(RECORDER_CPPFLAGS)
-
-#check for Dumpi
-AC_ARG_WITH([dumpi],[AS_HELP_STRING([--with-dumpi@<:@=DIR@:>@],
-                        [location of Dumpi installation])])
-if test "x${with_dumpi}" != "x" ; then
-	CFLAGS="-I${with_dumpi}/include"
-	LIBS="-L${with_dumpi}/lib/ -lundumpi"
-    AC_CHECK_LIB([undumpi],
-                 [undumpi_open], [], [AC_MSG_ERROR(Could not find dumpi)])
-    AM_CONDITIONAL(USE_DUMPI, true)
-	DUMPI_CFLAGS="-I${with_dumpi}/include"
-	DUMPI_LIBS="-L${with_dumpi}/lib/ -lundumpi"
-    AC_SUBST(DUMPI_LIBS)
-    AC_SUBST(DUMPI_CFLAGS)
-else
-	AM_CONDITIONAL(USE_DUMPI, false)
-fi
-
-# check for Cortex
-AC_ARG_WITH([cortex],[AS_HELP_STRING([--with-cortex@<:@=DIR@:>@],
-			[location of Cortex installation])])
-
-# check for Python
-AC_ARG_WITH([python],[AS_HELP_STRING([--with-python@<:@=DIR@:>@],
-			[location of Python 2.7 installation])])
-
-# check for Boost Python
-AC_ARG_WITH([boost],[AS_HELP_STRING([--with-boost@<:@=DIR@:>@],
-			[location of Boost Python installation])])
-
-if [ test "x${with_python}" != "x" -a "x${with_boost}" != "x"] ; then
-        AC_CHECK_FILES([${with_python}/lib/libpython2.7.so ${with_boost}/lib/libboost_python.a],
-                AM_CONDITIONAL(USE_PYTHON, true),
-                AC_MSG_ERROR(Could not find Python and/or Boost-Python libraries))
-        PYTHON_CFLAGS="-I${with_python}/include -I${with_boost}/include"
-        PYTHON_LIBS="-L${with_boost}/lib -lboost_python -L${with_python}/lib/ -lpython2.7"
-        AC_SUBST(PYTHON_LIBS)
-        AC_SUBST(PYTHON_CFLAGS)
-else
-        AM_CONDITIONAL(USE_PYTHON, false)
-fi
-
-if test "x${with_cortex}" != "x" ; then
-	 AC_CHECK_FILES([${with_cortex}/lib/libcortex.a ${with_cortex}/lib/libcortex-mpich.a],
-		AM_CONDITIONAL(USE_CORTEX, true),
-		AC_MSG_ERROR(Could not find Cortex libraries libcortex.a and/or libcortex-mpich.a))
-	CORTEX_CFLAGS="-I${with_cortex}/include"
-	CORTEX_LIBS="-L${with_cortex}/lib/ -lcortex-mpich -lcortex -lstdc++"
-	AC_SUBST(CORTEX_LIBS)
-	AC_SUBST(CORTEX_CFLAGS)
-else
-	AM_CONDITIONAL(USE_CORTEX, false)
-fi
-
-if [ test "x${with_cortex}" != "x" -a "x${with_python}" != "x" -a "x${with_boost}" != "x"] ; then
-	AC_CHECK_FILE([${with_cortex}/lib/libcortex-python.a],
-		AM_CONDITIONAL(USE_CORTEX_PYTHON, true),
-		AC_MSG_ERROR(Could not find library libcortex-python.a))
-	CORTEX_PYTHON_CFLAGS="-I${with_cortex}/include"
-	CORTEX_PYTHON_LIBS="-L${with_cortex}/lib/ -lcortex-python"
-	AC_SUBST(CORTEX_PYTHON_LIBS)
-	AC_SUBST(CORTEX_PYTHON_CFLAGS)
-else
-	AM_CONDITIONAL(USE_CORTEX_PYTHON, false)
-fi
-
-dnl ======================================================================
-dnl  Try harder to be valgrind safe
-dnl ======================================================================
-AC_ARG_ENABLE(valgrind-clean,
-        [AS_HELP_STRING(
-                [--enable-valgrind-clean],
-                [Try harder to avoid valgrind warnings])
-        ])
-
-AS_IF([test "x$enable_valgrind_clean" = "xyes"], [
-      AC_DEFINE([VALGRIND], [1], [If enabling valgrind-clean build])
-])
-
-
-dnl AC_CONFIG_FILES([src/iokernellang/codesparser.y])
-if test "x$srcdir" != "x."; then
-    AC_CONFIG_LINKS([tests/conf:$srcdir/tests/conf])
-fi
-
-AC_CONFIG_FILES([Makefile])
-
-AC_OUTPUT([maint/codes.pc])
-AC_OUTPUT([src/network-workloads/conf/dragonfly-custom/modelnet-test-dragonfly-1728-nodes.conf])
-AC_OUTPUT([src/network-workloads/conf/dragonfly-plus/modelnet-test-dragonfly-plus.conf])
-AC_OUTPUT([src/network-workloads/conf/dragonfly-dally/modelnet-test-dragonfly-dally.conf])
-AC_OUTPUT([doc/example/tutorial-ping-pong.conf])
-
-
diff --git a/m4/ax_check_compile_flag.m4 b/m4/ax_check_compile_flag.m4
deleted file mode 100644
index a7680d72..00000000
--- a/m4/ax_check_compile_flag.m4
+++ /dev/null
@@ -1,75 +0,0 @@
-# ===========================================================================
-#   http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-#   AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT])
-#
-# DESCRIPTION
-#
-#   Check whether the given FLAG works with the current language's compiler
-#   or gives an error.  (Warnings, however, are ignored)
-#
-#   ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
-#   success/failure.
-#
-#   If EXTRA-FLAGS is defined, it is added to the current language's default
-#   flags (e.g. CFLAGS) when the check is done.  The check is thus made with
-#   the flags: "CFLAGS EXTRA-FLAGS FLAG".  This can for example be used to
-#   force the compiler to issue an error when a bad flag is given.
-#
-#   INPUT gives an alternative input source to AC_COMPILE_IFELSE.
-#
-#   NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
-#   macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
-#
-# LICENSE
-#
-#   Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
-#   Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
-#
-#   This program is free software: you can redistribute it and/or modify it
-#   under the terms of the GNU General Public License as published by the
-#   Free Software Foundation, either version 3 of the License, or (at your
-#   option) any later version.
-#
-#   This program is distributed in the hope that it will be useful, but
-#   WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-#   Public License for more details.
-#
-#   You should have received a copy of the GNU General Public License along
-#   with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-#   As a special exception, the respective Autoconf Macro's copyright owner
-#   gives unlimited permission to copy, distribute and modify the configure
-#   scripts that are the output of Autoconf when processing the Macro. You
-#   need not follow the terms of the GNU General Public License when using
-#   or distributing such scripts, even though portions of the text of the
-#   Macro appear in them. The GNU General Public License (GPL) does govern
-#   all other use of the material that constitutes the Autoconf Macro.
-#
-#   This special exception to the GPL applies to versions of the Autoconf
-#   Macro released by the Autoconf Archive. When you make and distribute a
-#   modified version of the Autoconf Macro, you may extend this special
-#   exception to the GPL to apply to your modified version as well.
-
-#serial 4
-
-AC_DEFUN([AX_CHECK_COMPILE_FLAG],
-[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF
-AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
-AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
-  ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
-  _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
-  AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])],
-    [AS_VAR_SET(CACHEVAR,[yes])],
-    [AS_VAR_SET(CACHEVAR,[no])])
-  _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
-AS_VAR_IF(CACHEVAR,yes,
-  [m4_default([$2], :)],
-  [m4_default([$3], :)])
-AS_VAR_POPDEF([CACHEVAR])dnl
-])dnl AX_CHECK_COMPILE_FLAGS
-
diff --git a/m4/ax_compare_version.m4 b/m4/ax_compare_version.m4
deleted file mode 100644
index 74dc0fdd..00000000
--- a/m4/ax_compare_version.m4
+++ /dev/null
@@ -1,177 +0,0 @@
-# ===========================================================================
-#    http://www.gnu.org/software/autoconf-archive/ax_compare_version.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-#   AX_COMPARE_VERSION(VERSION_A, OP, VERSION_B, [ACTION-IF-TRUE], [ACTION-IF-FALSE])
-#
-# DESCRIPTION
-#
-#   This macro compares two version strings. Due to the various number of
-#   minor-version numbers that can exist, and the fact that string
-#   comparisons are not compatible with numeric comparisons, this is not
-#   necessarily trivial to do in a autoconf script. This macro makes doing
-#   these comparisons easy.
-#
-#   The six basic comparisons are available, as well as checking equality
-#   limited to a certain number of minor-version levels.
-#
-#   The operator OP determines what type of comparison to do, and can be one
-#   of:
-#
-#    eq  - equal (test A == B)
-#    ne  - not equal (test A != B)
-#    le  - less than or equal (test A <= B)
-#    ge  - greater than or equal (test A >= B)
-#    lt  - less than (test A < B)
-#    gt  - greater than (test A > B)
-#
-#   Additionally, the eq and ne operator can have a number after it to limit
-#   the test to that number of minor versions.
-#
-#    eq0 - equal up to the length of the shorter version
-#    ne0 - not equal up to the length of the shorter version
-#    eqN - equal up to N sub-version levels
-#    neN - not equal up to N sub-version levels
-#
-#   When the condition is true, shell commands ACTION-IF-TRUE are run,
-#   otherwise shell commands ACTION-IF-FALSE are run. The environment
-#   variable 'ax_compare_version' is always set to either 'true' or 'false'
-#   as well.
-#
-#   Examples:
-#
-#     AX_COMPARE_VERSION([3.15.7],[lt],[3.15.8])
-#     AX_COMPARE_VERSION([3.15],[lt],[3.15.8])
-#
-#   would both be true.
-#
-#     AX_COMPARE_VERSION([3.15.7],[eq],[3.15.8])
-#     AX_COMPARE_VERSION([3.15],[gt],[3.15.8])
-#
-#   would both be false.
-#
-#     AX_COMPARE_VERSION([3.15.7],[eq2],[3.15.8])
-#
-#   would be true because it is only comparing two minor versions.
-#
-#     AX_COMPARE_VERSION([3.15.7],[eq0],[3.15])
-#
-#   would be true because it is only comparing the lesser number of minor
-#   versions of the two values.
-#
-#   Note: The characters that separate the version numbers do not matter. An
-#   empty string is the same as version 0. OP is evaluated by autoconf, not
-#   configure, so must be a string, not a variable.
-#
-#   The author would like to acknowledge Guido Draheim whose advice about
-#   the m4_case and m4_ifvaln functions make this macro only include the
-#   portions necessary to perform the specific comparison specified by the
-#   OP argument in the final configure script.
-#
-# LICENSE
-#
-#   Copyright (c) 2008 Tim Toolan <toolan@ele.uri.edu>
-#
-#   Copying and distribution of this file, with or without modification, are
-#   permitted in any medium without royalty provided the copyright notice
-#   and this notice are preserved. This file is offered as-is, without any
-#   warranty.
-
-#serial 11
-
-dnl #########################################################################
-AC_DEFUN([AX_COMPARE_VERSION], [
-  AC_REQUIRE([AC_PROG_AWK])
-
-  # Used to indicate true or false condition
-  ax_compare_version=false
-
-  # Convert the two version strings to be compared into a format that
-  # allows a simple string comparison.  The end result is that a version
-  # string of the form 1.12.5-r617 will be converted to the form
-  # 0001001200050617.  In other words, each number is zero padded to four
-  # digits, and non digits are removed.
-  AS_VAR_PUSHDEF([A],[ax_compare_version_A])
-  A=`echo "$1" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \
-                     -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \
-                     -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \
-                     -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \
-                     -e 's/[[^0-9]]//g'`
-
-  AS_VAR_PUSHDEF([B],[ax_compare_version_B])
-  B=`echo "$3" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \
-                     -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \
-                     -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \
-                     -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \
-                     -e 's/[[^0-9]]//g'`
-
-  dnl # In the case of le, ge, lt, and gt, the strings are sorted as necessary
-  dnl # then the first line is used to determine if the condition is true.
-  dnl # The sed right after the echo is to remove any indented white space.
-  m4_case(m4_tolower($2),
-  [lt],[
-    ax_compare_version=`echo "x$A
-x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/false/;s/x${B}/true/;1q"`
-  ],
-  [gt],[
-    ax_compare_version=`echo "x$A
-x$B" | sed 's/^ *//' | sort | sed "s/x${A}/false/;s/x${B}/true/;1q"`
-  ],
-  [le],[
-    ax_compare_version=`echo "x$A
-x$B" | sed 's/^ *//' | sort | sed "s/x${A}/true/;s/x${B}/false/;1q"`
-  ],
-  [ge],[
-    ax_compare_version=`echo "x$A
-x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/true/;s/x${B}/false/;1q"`
-  ],[
-    dnl Split the operator from the subversion count if present.
-    m4_bmatch(m4_substr($2,2),
-    [0],[
-      # A count of zero means use the length of the shorter version.
-      # Determine the number of characters in A and B.
-      ax_compare_version_len_A=`echo "$A" | $AWK '{print(length)}'`
-      ax_compare_version_len_B=`echo "$B" | $AWK '{print(length)}'`
-
-      # Set A to no more than B's length and B to no more than A's length.
-      A=`echo "$A" | sed "s/\(.\{$ax_compare_version_len_B\}\).*/\1/"`
-      B=`echo "$B" | sed "s/\(.\{$ax_compare_version_len_A\}\).*/\1/"`
-    ],
-    [[0-9]+],[
-      # A count greater than zero means use only that many subversions
-      A=`echo "$A" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"`
-      B=`echo "$B" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"`
-    ],
-    [.+],[
-      AC_WARNING(
-        [illegal OP numeric parameter: $2])
-    ],[])
-
-    # Pad zeros at end of numbers to make same length.
-    ax_compare_version_tmp_A="$A`echo $B | sed 's/./0/g'`"
-    B="$B`echo $A | sed 's/./0/g'`"
-    A="$ax_compare_version_tmp_A"
-
-    # Check for equality or inequality as necessary.
-    m4_case(m4_tolower(m4_substr($2,0,2)),
-    [eq],[
-      test "x$A" = "x$B" && ax_compare_version=true
-    ],
-    [ne],[
-      test "x$A" != "x$B" && ax_compare_version=true
-    ],[
-      AC_WARNING([illegal OP parameter: $2])
-    ])
-  ])
-
-  AS_VAR_POPDEF([A])dnl
-  AS_VAR_POPDEF([B])dnl
-
-  dnl # Execute ACTION-IF-TRUE / ACTION-IF-FALSE.
-  if test "$ax_compare_version" = "true" ; then
-    m4_ifvaln([$4],[$4],[:])dnl
-    m4_ifvaln([$5],[else $5])dnl
-  fi
-]) dnl AX_COMPARE_VERSION
diff --git a/m4/ax_cxx_compile_stdcxx.m4 b/m4/ax_cxx_compile_stdcxx.m4
deleted file mode 100644
index 0b6cb3a7..00000000
--- a/m4/ax_cxx_compile_stdcxx.m4
+++ /dev/null
@@ -1,972 +0,0 @@
-# ===========================================================================
-#  https://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-#   AX_CXX_COMPILE_STDCXX(VERSION, [ext|noext], [mandatory|optional])
-#
-# DESCRIPTION
-#
-#   Check for baseline language coverage in the compiler for the specified
-#   version of the C++ standard.  If necessary, add switches to CXX and
-#   CXXCPP to enable support.  VERSION may be '11' (for the C++11 standard)
-#   or '14' (for the C++14 standard).
-#
-#   The second argument, if specified, indicates whether you insist on an
-#   extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g.
-#   -std=c++11).  If neither is specified, you get whatever works, with
-#   preference for an extended mode.
-#
-#   The third argument, if specified 'mandatory' or if left unspecified,
-#   indicates that baseline support for the specified C++ standard is
-#   required and that the macro should error out if no mode with that
-#   support is found.  If specified 'optional', then configuration proceeds
-#   regardless, after defining HAVE_CXX${VERSION} if and only if a
-#   supporting mode is found.
-#
-# LICENSE
-#
-#   Copyright (c) 2008 Benjamin Kosnik <bkoz@redhat.com>
-#   Copyright (c) 2012 Zack Weinberg <zackw@panix.com>
-#   Copyright (c) 2013 Roy Stogner <roystgnr@ices.utexas.edu>
-#   Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov <sokolov@google.com>
-#   Copyright (c) 2015 Paul Norman <penorman@mac.com>
-#   Copyright (c) 2015 Moritz Klammler <moritz@klammler.eu>
-#   Copyright (c) 2016, 2018 Krzesimir Nowak <qdlacz@gmail.com>
-#
-#   Copying and distribution of this file, with or without modification, are
-#   permitted in any medium without royalty provided the copyright notice
-#   and this notice are preserved.  This file is offered as-is, without any
-#   warranty.
-
-#serial 9
-
-dnl  This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro
-dnl  (serial version number 13).
-
-AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
-  m4_if([$1], [11], [ax_cxx_compile_alternatives="11 0x"],
-        [$1], [14], [ax_cxx_compile_alternatives="14 1y"],
-        [$1], [17], [ax_cxx_compile_alternatives="17 1z"],
-        [m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl
-  m4_if([$2], [], [],
-        [$2], [ext], [],
-        [$2], [noext], [],
-        [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX])])dnl
-  m4_if([$3], [], [ax_cxx_compile_cxx$1_required=true],
-        [$3], [mandatory], [ax_cxx_compile_cxx$1_required=true],
-        [$3], [optional], [ax_cxx_compile_cxx$1_required=false],
-        [m4_fatal([invalid third argument `$3' to AX_CXX_COMPILE_STDCXX])])
-  AC_LANG_PUSH([C++])dnl
-  ac_success=no
-
-  m4_if([$2], [noext], [], [dnl
-  if test x$ac_success = xno; then
-    for alternative in ${ax_cxx_compile_alternatives}; do
-      switch="-std=gnu++${alternative}"
-      cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
-      AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
-                     $cachevar,
-        [ac_save_CXX="$CXX"
-         CXX="$CXX $switch"
-         AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
-          [eval $cachevar=yes],
-          [eval $cachevar=no])
-         CXX="$ac_save_CXX"])
-      if eval test x\$$cachevar = xyes; then
-        CXX="$CXX $switch"
-        if test -n "$CXXCPP" ; then
-          CXXCPP="$CXXCPP $switch"
-        fi
-        ac_success=yes
-        break
-      fi
-    done
-  fi])
-
-  m4_if([$2], [ext], [], [dnl
-  if test x$ac_success = xno; then
-    dnl HP's aCC needs +std=c++11 according to:
-    dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf
-    dnl Cray's crayCC needs "-h std=c++11"
-    for alternative in ${ax_cxx_compile_alternatives}; do
-      for switch in -std=c++${alternative} +std=c++${alternative} "-h std=c++${alternative}"; do
-        cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
-        AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
-                       $cachevar,
-          [ac_save_CXX="$CXX"
-           CXX="$CXX $switch"
-           AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
-            [eval $cachevar=yes],
-            [eval $cachevar=no])
-           CXX="$ac_save_CXX"])
-        if eval test x\$$cachevar = xyes; then
-          CXX="$CXX $switch"
-          if test -n "$CXXCPP" ; then
-            CXXCPP="$CXXCPP $switch"
-          fi
-          ac_success=yes
-          break
-        fi
-      done
-      if test x$ac_success = xyes; then
-        break
-      fi
-    done
-  fi])
-  AC_LANG_POP([C++])
-  if test x$ax_cxx_compile_cxx$1_required = xtrue; then
-    if test x$ac_success = xno; then
-      AC_MSG_ERROR([*** A compiler with support for C++$1 language features is required.])
-    fi
-  fi
-  if test x$ac_success = xno; then
-    HAVE_CXX$1=0
-    AC_MSG_NOTICE([No compiler with C++$1 support was found])
-  else
-    HAVE_CXX$1=1
-    AC_DEFINE(HAVE_CXX$1,1,
-              [define if the compiler supports basic C++$1 syntax])
-  fi
-  AC_SUBST(HAVE_CXX$1)
-])
-
-
-dnl  Test body for checking C++11 support
-
-m4_define([_AX_CXX_COMPILE_STDCXX_testbody_11],
-  _AX_CXX_COMPILE_STDCXX_testbody_new_in_11
-)
-
-
-dnl  Test body for checking C++14 support
-
-m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14],
-  _AX_CXX_COMPILE_STDCXX_testbody_new_in_11
-  _AX_CXX_COMPILE_STDCXX_testbody_new_in_14
-)
-
-m4_define([_AX_CXX_COMPILE_STDCXX_testbody_17],
-  _AX_CXX_COMPILE_STDCXX_testbody_new_in_11
-  _AX_CXX_COMPILE_STDCXX_testbody_new_in_14
-  _AX_CXX_COMPILE_STDCXX_testbody_new_in_17
-)
-
-dnl  Tests for new features in C++11
-
-m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[
-
-// If the compiler admits that it is not ready for C++11, why torture it?
-// Hopefully, this will speed up the test.
-
-#ifndef __cplusplus
-
-#error "This is not a C++ compiler"
-
-#elif __cplusplus < 201103L
-
-#error "This is not a C++11 compiler"
-
-#else
-
-namespace cxx11
-{
-
-  namespace test_static_assert
-  {
-
-    template <typename T>
-    struct check
-    {
-      static_assert(sizeof(int) <= sizeof(T), "not big enough");
-    };
-
-  }
-
-  namespace test_final_override
-  {
-
-    struct Base
-    {
-      virtual void f() {}
-    };
-
-    struct Derived : public Base
-    {
-      virtual void f() override {}
-    };
-
-  }
-
-  namespace test_double_right_angle_brackets
-  {
-
-    template < typename T >
-    struct check {};
-
-    typedef check<void> single_type;
-    typedef check<check<void>> double_type;
-    typedef check<check<check<void>>> triple_type;
-    typedef check<check<check<check<void>>>> quadruple_type;
-
-  }
-
-  namespace test_decltype
-  {
-
-    int
-    f()
-    {
-      int a = 1;
-      decltype(a) b = 2;
-      return a + b;
-    }
-
-  }
-
-  namespace test_type_deduction
-  {
-
-    template < typename T1, typename T2 >
-    struct is_same
-    {
-      static const bool value = false;
-    };
-
-    template < typename T >
-    struct is_same<T, T>
-    {
-      static const bool value = true;
-    };
-
-    template < typename T1, typename T2 >
-    auto
-    add(T1 a1, T2 a2) -> decltype(a1 + a2)
-    {
-      return a1 + a2;
-    }
-
-    int
-    test(const int c, volatile int v)
-    {
-      static_assert(is_same<int, decltype(0)>::value == true, "");
-      static_assert(is_same<int, decltype(c)>::value == false, "");
-      static_assert(is_same<int, decltype(v)>::value == false, "");
-      auto ac = c;
-      auto av = v;
-      auto sumi = ac + av + 'x';
-      auto sumf = ac + av + 1.0;
-      static_assert(is_same<int, decltype(ac)>::value == true, "");
-      static_assert(is_same<int, decltype(av)>::value == true, "");
-      static_assert(is_same<int, decltype(sumi)>::value == true, "");
-      static_assert(is_same<int, decltype(sumf)>::value == false, "");
-      static_assert(is_same<int, decltype(add(c, v))>::value == true, "");
-      return (sumf > 0.0) ? sumi : add(c, v);
-    }
-
-  }
-
-  namespace test_noexcept
-  {
-
-    int f() { return 0; }
-    int g() noexcept { return 0; }
-
-    static_assert(noexcept(f()) == false, "");
-    static_assert(noexcept(g()) == true, "");
-
-  }
-
-  namespace test_constexpr
-  {
-
-    template < typename CharT >
-    unsigned long constexpr
-    strlen_c_r(const CharT *const s, const unsigned long acc) noexcept
-    {
-      return *s ? strlen_c_r(s + 1, acc + 1) : acc;
-    }
-
-    template < typename CharT >
-    unsigned long constexpr
-    strlen_c(const CharT *const s) noexcept
-    {
-      return strlen_c_r(s, 0UL);
-    }
-
-    static_assert(strlen_c("") == 0UL, "");
-    static_assert(strlen_c("1") == 1UL, "");
-    static_assert(strlen_c("example") == 7UL, "");
-    static_assert(strlen_c("another\0example") == 7UL, "");
-
-  }
-
-  namespace test_rvalue_references
-  {
-
-    template < int N >
-    struct answer
-    {
-      static constexpr int value = N;
-    };
-
-    answer<1> f(int&)       { return answer<1>(); }
-    answer<2> f(const int&) { return answer<2>(); }
-    answer<3> f(int&&)      { return answer<3>(); }
-
-    void
-    test()
-    {
-      int i = 0;
-      const int c = 0;
-      static_assert(decltype(f(i))::value == 1, "");
-      static_assert(decltype(f(c))::value == 2, "");
-      static_assert(decltype(f(0))::value == 3, "");
-    }
-
-  }
-
-  namespace test_uniform_initialization
-  {
-
-    struct test
-    {
-      static const int zero {};
-      static const int one {1};
-    };
-
-    static_assert(test::zero == 0, "");
-    static_assert(test::one == 1, "");
-
-  }
-
-  namespace test_lambdas
-  {
-
-    void
-    test1()
-    {
-      auto lambda1 = [](){};
-      auto lambda2 = lambda1;
-      lambda1();
-      lambda2();
-    }
-
-    int
-    test2()
-    {
-      auto a = [](int i, int j){ return i + j; }(1, 2);
-      auto b = []() -> int { return '0'; }();
-      auto c = [=](){ return a + b; }();
-      auto d = [&](){ return c; }();
-      auto e = [a, &b](int x) mutable {
-        const auto identity = [](int y){ return y; };
-        for (auto i = 0; i < a; ++i)
-          a += b--;
-        return x + identity(a + b);
-      }(0);
-      return a + b + c + d + e;
-    }
-
-    int
-    test3()
-    {
-      const auto nullary = [](){ return 0; };
-      const auto unary = [](int x){ return x; };
-      using nullary_t = decltype(nullary);
-      using unary_t = decltype(unary);
-      const auto higher1st = [](nullary_t f){ return f(); };
-      const auto higher2nd = [unary](nullary_t f1){
-        return [unary, f1](unary_t f2){ return f2(unary(f1())); };
-      };
-      return higher1st(nullary) + higher2nd(nullary)(unary);
-    }
-
-  }
-
-  namespace test_variadic_templates
-  {
-
-    template <int...>
-    struct sum;
-
-    template <int N0, int... N1toN>
-    struct sum<N0, N1toN...>
-    {
-      static constexpr auto value = N0 + sum<N1toN...>::value;
-    };
-
-    template <>
-    struct sum<>
-    {
-      static constexpr auto value = 0;
-    };
-
-    static_assert(sum<>::value == 0, "");
-    static_assert(sum<1>::value == 1, "");
-    static_assert(sum<23>::value == 23, "");
-    static_assert(sum<1, 2>::value == 3, "");
-    static_assert(sum<5, 5, 11>::value == 21, "");
-    static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, "");
-
-  }
-
-  // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae
-  // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function
-  // because of this.
-  namespace test_template_alias_sfinae
-  {
-
-    struct foo {};
-
-    template<typename T>
-    using member = typename T::member_type;
-
-    template<typename T>
-    void func(...) {}
-
-    template<typename T>
-    void func(member<T>*) {}
-
-    void test();
-
-    void test() { func<foo>(0); }
-
-  }
-
-}  // namespace cxx11
-
-#endif  // __cplusplus >= 201103L
-
-]])
-
-
-dnl  Tests for new features in C++14
-
-m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_14], [[
-
-// If the compiler admits that it is not ready for C++14, why torture it?
-// Hopefully, this will speed up the test.
-
-#ifndef __cplusplus
-
-#error "This is not a C++ compiler"
-
-#elif __cplusplus < 201402L
-
-#error "This is not a C++14 compiler"
-
-#else
-
-namespace cxx14
-{
-
-  namespace test_polymorphic_lambdas
-  {
-
-    int
-    test()
-    {
-      const auto lambda = [](auto&&... args){
-        const auto istiny = [](auto x){
-          return (sizeof(x) == 1UL) ? 1 : 0;
-        };
-        const int aretiny[] = { istiny(args)... };
-        return aretiny[0];
-      };
-      return lambda(1, 1L, 1.0f, '1');
-    }
-
-  }
-
-  namespace test_binary_literals
-  {
-
-    constexpr auto ivii = 0b0000000000101010;
-    static_assert(ivii == 42, "wrong value");
-
-  }
-
-  namespace test_generalized_constexpr
-  {
-
-    template < typename CharT >
-    constexpr unsigned long
-    strlen_c(const CharT *const s) noexcept
-    {
-      auto length = 0UL;
-      for (auto p = s; *p; ++p)
-        ++length;
-      return length;
-    }
-
-    static_assert(strlen_c("") == 0UL, "");
-    static_assert(strlen_c("x") == 1UL, "");
-    static_assert(strlen_c("test") == 4UL, "");
-    static_assert(strlen_c("another\0test") == 7UL, "");
-
-  }
-
-  namespace test_lambda_init_capture
-  {
-
-    int
-    test()
-    {
-      auto x = 0;
-      const auto lambda1 = [a = x](int b){ return a + b; };
-      const auto lambda2 = [a = lambda1(x)](){ return a; };
-      return lambda2();
-    }
-
-  }
-
-  namespace test_digit_separators
-  {
-
-    constexpr auto ten_million = 100'000'000;
-    static_assert(ten_million == 100000000, "");
-
-  }
-
-  namespace test_return_type_deduction
-  {
-
-    auto f(int& x) { return x; }
-    decltype(auto) g(int& x) { return x; }
-
-    template < typename T1, typename T2 >
-    struct is_same
-    {
-      static constexpr auto value = false;
-    };
-
-    template < typename T >
-    struct is_same<T, T>
-    {
-      static constexpr auto value = true;
-    };
-
-    int
-    test()
-    {
-      auto x = 0;
-      static_assert(is_same<int, decltype(f(x))>::value, "");
-      static_assert(is_same<int&, decltype(g(x))>::value, "");
-      return x;
-    }
-
-  }
-
-}  // namespace cxx14
-
-#endif  // __cplusplus >= 201402L
-
-]])
-
-
-dnl  Tests for new features in C++17
-
-m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_17], [[
-
-// If the compiler admits that it is not ready for C++17, why torture it?
-// Hopefully, this will speed up the test.
-
-#ifndef __cplusplus
-
-#error "This is not a C++ compiler"
-
-#elif __cplusplus <= 201402L
-
-#error "This is not a C++17 compiler"
-
-#else
-
-#if defined(__clang__)
-  #define REALLY_CLANG
-#else
-  #if defined(__GNUC__)
-    #define REALLY_GCC
-  #endif
-#endif
-
-#include <initializer_list>
-#include <utility>
-#include <type_traits>
-
-namespace cxx17
-{
-
-#if !defined(REALLY_CLANG)
-  namespace test_constexpr_lambdas
-  {
-
-    // TODO: test it with clang++ from git
-
-    constexpr int foo = [](){return 42;}();
-
-  }
-#endif // !defined(REALLY_CLANG)
-
-  namespace test::nested_namespace::definitions
-  {
-
-  }
-
-  namespace test_fold_expression
-  {
-
-    template<typename... Args>
-    int multiply(Args... args)
-    {
-      return (args * ... * 1);
-    }
-
-    template<typename... Args>
-    bool all(Args... args)
-    {
-      return (args && ...);
-    }
-
-  }
-
-  namespace test_extended_static_assert
-  {
-
-    static_assert (true);
-
-  }
-
-  namespace test_auto_brace_init_list
-  {
-
-    auto foo = {5};
-    auto bar {5};
-
-    static_assert(std::is_same<std::initializer_list<int>, decltype(foo)>::value);
-    static_assert(std::is_same<int, decltype(bar)>::value);
-  }
-
-  namespace test_typename_in_template_template_parameter
-  {
-
-    template<template<typename> typename X> struct D;
-
-  }
-
-  namespace test_fallthrough_nodiscard_maybe_unused_attributes
-  {
-
-    int f1()
-    {
-      return 42;
-    }
-
-    [[nodiscard]] int f2()
-    {
-      [[maybe_unused]] auto unused = f1();
-
-      switch (f1())
-      {
-      case 17:
-        f1();
-        [[fallthrough]];
-      case 42:
-        f1();
-      }
-      return f1();
-    }
-
-  }
-
-  namespace test_extended_aggregate_initialization
-  {
-
-    struct base1
-    {
-      int b1, b2 = 42;
-    };
-
-    struct base2
-    {
-      base2() {
-        b3 = 42;
-      }
-      int b3;
-    };
-
-    struct derived : base1, base2
-    {
-        int d;
-    };
-
-    derived d1 {{1, 2}, {}, 4};  // full initialization
-    derived d2 {{}, {}, 4};      // value-initialized bases
-
-  }
-
-  namespace test_general_range_based_for_loop
-  {
-
-    struct iter
-    {
-      int i;
-
-      int& operator* ()
-      {
-        return i;
-      }
-
-      const int& operator* () const
-      {
-        return i;
-      }
-
-      iter& operator++()
-      {
-        ++i;
-        return *this;
-      }
-    };
-
-    struct sentinel
-    {
-      int i;
-    };
-
-    bool operator== (const iter& i, const sentinel& s)
-    {
-      return i.i == s.i;
-    }
-
-    bool operator!= (const iter& i, const sentinel& s)
-    {
-      return !(i == s);
-    }
-
-    struct range
-    {
-      iter begin() const
-      {
-        return {0};
-      }
-
-      sentinel end() const
-      {
-        return {5};
-      }
-    };
-
-    void f()
-    {
-      range r {};
-
-      for (auto i : r)
-      {
-        [[maybe_unused]] auto v = i;
-      }
-    }
-
-  }
-
-  namespace test_lambda_capture_asterisk_this_by_value
-  {
-
-    struct t
-    {
-      int i;
-      int foo()
-      {
-        return [*this]()
-        {
-          return i;
-        }();
-      }
-    };
-
-  }
-
-  namespace test_enum_class_construction
-  {
-
-    enum class byte : unsigned char
-    {};
-
-    byte foo {42};
-
-  }
-
-  namespace test_constexpr_if
-  {
-
-    template <bool cond>
-    int f ()
-    {
-      if constexpr(cond)
-      {
-        return 13;
-      }
-      else
-      {
-        return 42;
-      }
-    }
-
-  }
-
-  namespace test_selection_statement_with_initializer
-  {
-
-    int f()
-    {
-      return 13;
-    }
-
-    int f2()
-    {
-      if (auto i = f(); i > 0)
-      {
-        return 3;
-      }
-
-      switch (auto i = f(); i + 4)
-      {
-      case 17:
-        return 2;
-
-      default:
-        return 1;
-      }
-    }
-
-  }
-
-#if !defined(REALLY_CLANG)
-  namespace test_template_argument_deduction_for_class_templates
-  {
-
-    // TODO: test it with clang++ from git
-
-    template <typename T1, typename T2>
-    struct pair
-    {
-      pair (T1 p1, T2 p2)
-        : m1 {p1},
-          m2 {p2}
-      {}
-
-      T1 m1;
-      T2 m2;
-    };
-
-    void f()
-    {
-      [[maybe_unused]] auto p = pair{13, 42u};
-    }
-
-  }
-#endif // !defined(REALLY_CLANG)
-
-  namespace test_non_type_auto_template_parameters
-  {
-
-    template <auto n>
-    struct B
-    {};
-
-    B<5> b1;
-    B<'a'> b2;
-
-  }
-
-#if !defined(REALLY_CLANG)
-  namespace test_structured_bindings
-  {
-
-    // TODO: test it with clang++ from git
-
-    int arr[2] = { 1, 2 };
-    std::pair<int, int> pr = { 1, 2 };
-
-    auto f1() -> int(&)[2]
-    {
-      return arr;
-    }
-
-    auto f2() -> std::pair<int, int>&
-    {
-      return pr;
-    }
-
-    struct S
-    {
-      int x1 : 2;
-      volatile double y1;
-    };
-
-    S f3()
-    {
-      return {};
-    }
-
-    auto [ x1, y1 ] = f1();
-    auto& [ xr1, yr1 ] = f1();
-    auto [ x2, y2 ] = f2();
-    auto& [ xr2, yr2 ] = f2();
-    const auto [ x3, y3 ] = f3();
-
-  }
-#endif // !defined(REALLY_CLANG)
-
-#if !defined(REALLY_CLANG)
-  namespace test_exception_spec_type_system
-  {
-
-    // TODO: test it with clang++ from git
-
-    struct Good {};
-    struct Bad {};
-
-    void g1() noexcept;
-    void g2();
-
-    template<typename T>
-    Bad
-    f(T*, T*);
-
-    template<typename T1, typename T2>
-    Good
-    f(T1*, T2*);
-
-    static_assert (std::is_same_v<Good, decltype(f(g1, g2))>);
-
-  }
-#endif // !defined(REALLY_CLANG)
-
-  namespace test_inline_variables
-  {
-
-    template<class T> void f(T)
-    {}
-
-    template<class T> inline T g(T)
-    {
-      return T{};
-    }
-
-    template<> inline void f<>(int)
-    {}
-
-    template<> int g<>(int)
-    {
-      return 5;
-    }
-
-  }
-
-}  // namespace cxx17
-
-#endif  // __cplusplus <= 201402L
-
-]])
diff --git a/m4/ax_prog_bison.m4 b/m4/ax_prog_bison.m4
deleted file mode 100755
index aa3bb112..00000000
--- a/m4/ax_prog_bison.m4
+++ /dev/null
@@ -1,68 +0,0 @@
-# ===========================================================================
-#          http://www.nongnu.org/autoconf-archive/ax_prog_bison.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-#   AX_PROG_BISON(ACTION-IF-TRUE,ACTION-IF-FALSE)
-#
-# DESCRIPTION
-#
-#   Check whether bison is the parser generator. Run ACTION-IF-TRUE if
-#   successful, ACTION-IF-FALSE otherwise
-#
-# LICENSE
-#
-#   Copyright (c) 2009 Francesco Salvestrini <salvestrini@users.sourceforge.net>
-#
-#   This program is free software; you can redistribute it and/or modify it
-#   under the terms of the GNU General Public License as published by the
-#   Free Software Foundation; either version 2 of the License, or (at your
-#   option) any later version.
-#
-#   This program is distributed in the hope that it will be useful, but
-#   WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-#   Public License for more details.
-#
-#   You should have received a copy of the GNU General Public License along
-#   with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-#   As a special exception, the respective Autoconf Macro's copyright owner
-#   gives unlimited permission to copy, distribute and modify the configure
-#   scripts that are the output of Autoconf when processing the Macro. You
-#   need not follow the terms of the GNU General Public License when using
-#   or distributing such scripts, even though portions of the text of the
-#   Macro appear in them. The GNU General Public License (GPL) does govern
-#   all other use of the material that constitutes the Autoconf Macro.
-#
-#   This special exception to the GPL applies to versions of the Autoconf
-#   Macro released by the Autoconf Archive. When you make and distribute a
-#   modified version of the Autoconf Macro, you may extend this special
-#   exception to the GPL to apply to your modified version as well.
-
-AC_DEFUN([AX_PROG_BISON], [
-  AC_REQUIRE([AC_PROG_YACC])
-  AC_REQUIRE([AC_PROG_SED])
-
-  AC_CACHE_CHECK([if bison is the parser generator],[ax_cv_prog_bison],[
-    AS_IF([test "`echo \"$YACC\" | $SED 's,^.*\(bison\).*$,\1,'`" = "bison" ],[
-      ax_cv_prog_bison=yes
-    ],[
-      ax_cv_prog_bison=no
-    ])
-  ])
-
-  AC_DEFINE([HAVE_YACC_OLD_PUSH],[0],[If old-style push parser syntax is supported by ${YACC}])
-  AM_CONDITIONAL([HAVE_YACC_OLD_PUSH],[test "x${HAVE_YACC_OLD_PUSH}" == "x1"])
-  AC_DEFINE([HAVE_YACC_OLD_PURE],[0],[If old-style pure reentrant parser syntax is supported by ${YACC}])
-  AM_CONDITIONAL([HAVE_YACC_OLD_PURE],[test "x${HAVE_YACC_OLD_PURE}" == "x1"])
-
-  AS_IF([test "$ax_cv_prog_bison" = yes],[
-    :
-    $1
-  ],[
-    :
-    $2
-  ])
-])
diff --git a/m4/ax_prog_bison_clfeatures.m4 b/m4/ax_prog_bison_clfeatures.m4
deleted file mode 100755
index 46e82665..00000000
--- a/m4/ax_prog_bison_clfeatures.m4
+++ /dev/null
@@ -1,137 +0,0 @@
-AC_DEFUN([AX_PROG_BISON_CLFEATURES], [
-	AC_REQUIRE([AC_PROG_YACC])
-	AC_REQUIRE([AC_PROG_SED])
-
-	AC_CACHE_CHECK([if bison is the parser generator],[ax_cv_prog_bison],[
-  		AS_IF([test "`echo \"$YACC\" | $SED 's,^.*\(bison\).*$,\1,'`" = "bison" ],[
-			ax_cv_prog_bison=yes
-    			],[
-      				ax_cv_prog_bison=no
-    		])
-  	])
-
-cat > conftest.y <<ACEOF
-%{
-    int yylex(void*);
-    void yyerror(const char *s);
-%}
-%pure-parser
-%token  FIRST_TOK
-%token  LAST_TOK
-%start top
-%%
-top: FIRST_TOK LAST_TOK
-%%
-ACEOF
-
-# set up some common variables for testing:
-ac_cv_prog_yacc_root="y.tab"
-ac_compile_yacc='$CC -c $CFLAGS $CPPFLAGS $ac_cv_prog_yacc_root.c >&5'
-
-HAVE_YACC_OLD_PURE=
-HAVE_YACC_OLD_PUSH=
-AC_MSG_CHECKING([if ${YACC} supports pure / reentrant paser features])
-if $YACC -d -t -v conftest.y > /dev/null 2>&1 && eval "$ac_compile_yacc"
-then
-	AC_SUBST([CODES_PURE_PARSER_DEFINES], ["%pure-parser"])
-	AC_MSG_RESULT([old-style])
-	$3
-else
-
-cat > conftest.y <<ACEOF
-%{
-    int yylex(void*);
-    void yyerror(const char *s);
-%}
-%define api.pure
-%token  FIRST_TOK
-%token  LAST_TOK
-%start top
-%%
-top: FIRST_TOK LAST_TOK
-%%
-ACEOF
-
-# set up some common variables for testing:
-ac_cv_prog_yacc_root="y.tab"
-ac_compile_yacc='$CC -c $CFLAGS $CPPFLAGS $ac_cv_prog_yacc_root.c >&5'
-	if $YACC -d -t -v conftest.y > /dev/null 2>&1 && eval "$ac_compile_yacc"
-	then
-		AC_SUBST([CODES_PURE_PARSER_DEFINES], ["%define api.pure"])
-		AC_MSG_RESULT([new-style])
-		$3
-	else
-		AC_MSG_RESULT([feature not supported])
-		BVER=`${YACC} --version | head -n 1`
-		AC_MSG_WARN([${BVER} does not support pure / reentrant parser generation])
-		$4
-	fi
-fi
-
-cat > conftest.y <<ACEOF
-%{
-    int yylex(void*);
-    void yyerror(const char *s);
-%}
-%define api.push_pull "push"
-%token  FIRST_TOK
-%token  LAST_TOK
-%start top
-%%
-top: FIRST_TOK LAST_TOK
-%%
-ACEOF
-
-# set up some common variables for testing:
-ac_cv_prog_yacc_root="y.tab"
-ac_compile_yacc='$CC -c $CFLAGS $CPPFLAGS $ac_cv_prog_yacc_root.c >&5'
-
-AC_MSG_CHECKING([if ${YACC} supports push parser features])
-if $YACC -d -t -v conftest.y > /dev/null 2>&1 && eval "$ac_compile_yacc"
-then
-	AC_SUBST([CODES_PUSH_PARSER_DEFINES], ["%define api.push_pull \"push\""])
-	AC_MSG_RESULT([old-style])
-	$3
-else
- 
-cat > conftest.y <<ACEOF
-%{
-    int yylex(void*);
-    void yyerror(const char *s);
-%}
-%define api.push-pull push
-%token  FIRST_TOK
-%token  LAST_TOK
-%start top
-%%
-top: FIRST_TOK LAST_TOK
-%%
-ACEOF
-
-# set up some common variables for testing:
-ac_cv_prog_yacc_root="y.tab"
-ac_compile_yacc='$CC -c $CFLAGS $CPPFLAGS $ac_cv_prog_yacc_root.c >&5'
-	if $YACC -d -t -v conftest.y > /dev/null 2>&1 && eval "$ac_compile_yacc"
-	then
-		AC_SUBST([CODES_PUSH_PARSER_DEFINES], ["%define api.push-pull push"])
-		AC_MSG_RESULT([new-style])
-		$3
-	else
-		AC_MSG_RESULT([feature not supported])
-		BVER=`${YACC} --version | head -n 1`
-		AC_MSG_WARN([${BVER} does not support push parser generation])
-		$4
-	fi
-fi
-
-  AS_IF([test "$ax_cv_prog_bison" = yes],[
-    :
-    $1
-  ],[
-    :
-    $2
-  ])
-
-  # cleanup bison / yacc tmp files
-  rm -rf y.output y.tab.h y.tab.c y.tab.o
-])
diff --git a/m4/ax_prog_flex.m4 b/m4/ax_prog_flex.m4
deleted file mode 100755
index 6f8c6107..00000000
--- a/m4/ax_prog_flex.m4
+++ /dev/null
@@ -1,62 +0,0 @@
-# ===========================================================================
-#          http://www.nongnu.org/autoconf-archive/ax_prog_flex.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-#   AX_PROG_FLEX(ACTION-IF-TRUE,ACTION-IF-FALSE)
-#
-# DESCRIPTION
-#
-#   Check whether flex is the scanner generator. Run ACTION-IF-TRUE if
-#   successful, ACTION-IF-FALSE otherwise
-#
-# LICENSE
-#
-#   Copyright (c) 2009 Francesco Salvestrini <salvestrini@users.sourceforge.net>
-#
-#   This program is free software; you can redistribute it and/or modify it
-#   under the terms of the GNU General Public License as published by the
-#   Free Software Foundation; either version 2 of the License, or (at your
-#   option) any later version.
-#
-#   This program is distributed in the hope that it will be useful, but
-#   WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-#   Public License for more details.
-#
-#   You should have received a copy of the GNU General Public License along
-#   with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-#   As a special exception, the respective Autoconf Macro's copyright owner
-#   gives unlimited permission to copy, distribute and modify the configure
-#   scripts that are the output of Autoconf when processing the Macro. You
-#   need not follow the terms of the GNU General Public License when using
-#   or distributing such scripts, even though portions of the text of the
-#   Macro appear in them. The GNU General Public License (GPL) does govern
-#   all other use of the material that constitutes the Autoconf Macro.
-#
-#   This special exception to the GPL applies to versions of the Autoconf
-#   Macro released by the Autoconf Archive. When you make and distribute a
-#   modified version of the Autoconf Macro, you may extend this special
-#   exception to the GPL to apply to your modified version as well.
-
-AC_DEFUN([AX_PROG_FLEX], [
-  AC_REQUIRE([AC_PROG_LEX])
-  AC_REQUIRE([AC_PROG_SED])
-
-  AC_CACHE_CHECK([if flex is the lexer generator],[ax_cv_prog_flex],[
-    AS_IF([test "`echo \"$LEX\" | $SED 's,^.*\(flex\).*$,\1,'`" = "flex"],[
-      ax_cv_prog_flex=yes
-    ],[
-      ax_cv_prog_flex=no
-    ])
-  ])
-  AS_IF([test "$ax_cv_prog_flex" = yes],[
-    :
-    $1
-  ],[
-    :
-    $2
-  ])
-])
diff --git a/m4/libtool.m4 b/m4/libtool.m4
deleted file mode 100644
index 56666f0e..00000000
--- a/m4/libtool.m4
+++ /dev/null
@@ -1,7986 +0,0 @@
-# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*-
-#
-#   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
-#                 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-#                 Foundation, Inc.
-#   Written by Gordon Matzigkeit, 1996
-#
-# This file is free software; the Free Software Foundation gives
-# unlimited permission to copy and/or distribute it, with or without
-# modifications, as long as this notice is preserved.
-
-m4_define([_LT_COPYING], [dnl
-#   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
-#                 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-#                 Foundation, Inc.
-#   Written by Gordon Matzigkeit, 1996
-#
-#   This file is part of GNU Libtool.
-#
-# GNU Libtool is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation; either version 2 of
-# the License, or (at your option) any later version.
-#
-# As a special exception to the GNU General Public License,
-# if you distribute this file as part of a program or library that
-# is built using GNU Libtool, you may include this file under the
-# same distribution terms that you use for the rest of that program.
-#
-# GNU Libtool is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Libtool; see the file COPYING.  If not, a copy
-# can be downloaded from http://www.gnu.org/licenses/gpl.html, or
-# obtained by writing to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-])
-
-# serial 57 LT_INIT
-
-
-# LT_PREREQ(VERSION)
-# ------------------
-# Complain and exit if this libtool version is less that VERSION.
-m4_defun([LT_PREREQ],
-[m4_if(m4_version_compare(m4_defn([LT_PACKAGE_VERSION]), [$1]), -1,
-       [m4_default([$3],
-		   [m4_fatal([Libtool version $1 or higher is required],
-		             63)])],
-       [$2])])
-
-
-# _LT_CHECK_BUILDDIR
-# ------------------
-# Complain if the absolute build directory name contains unusual characters
-m4_defun([_LT_CHECK_BUILDDIR],
-[case `pwd` in
-  *\ * | *\	*)
-    AC_MSG_WARN([Libtool does not cope well with whitespace in `pwd`]) ;;
-esac
-])
-
-
-# LT_INIT([OPTIONS])
-# ------------------
-AC_DEFUN([LT_INIT],
-[AC_PREREQ([2.58])dnl We use AC_INCLUDES_DEFAULT
-AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
-AC_BEFORE([$0], [LT_LANG])dnl
-AC_BEFORE([$0], [LT_OUTPUT])dnl
-AC_BEFORE([$0], [LTDL_INIT])dnl
-m4_require([_LT_CHECK_BUILDDIR])dnl
-
-dnl Autoconf doesn't catch unexpanded LT_ macros by default:
-m4_pattern_forbid([^_?LT_[A-Z_]+$])dnl
-m4_pattern_allow([^(_LT_EOF|LT_DLGLOBAL|LT_DLLAZY_OR_NOW|LT_MULTI_MODULE)$])dnl
-dnl aclocal doesn't pull ltoptions.m4, ltsugar.m4, or ltversion.m4
-dnl unless we require an AC_DEFUNed macro:
-AC_REQUIRE([LTOPTIONS_VERSION])dnl
-AC_REQUIRE([LTSUGAR_VERSION])dnl
-AC_REQUIRE([LTVERSION_VERSION])dnl
-AC_REQUIRE([LTOBSOLETE_VERSION])dnl
-m4_require([_LT_PROG_LTMAIN])dnl
-
-_LT_SHELL_INIT([SHELL=${CONFIG_SHELL-/bin/sh}])
-
-dnl Parse OPTIONS
-_LT_SET_OPTIONS([$0], [$1])
-
-# This can be used to rebuild libtool when needed
-LIBTOOL_DEPS="$ltmain"
-
-# Always use our own libtool.
-LIBTOOL='$(SHELL) $(top_builddir)/libtool'
-AC_SUBST(LIBTOOL)dnl
-
-_LT_SETUP
-
-# Only expand once:
-m4_define([LT_INIT])
-])# LT_INIT
-
-# Old names:
-AU_ALIAS([AC_PROG_LIBTOOL], [LT_INIT])
-AU_ALIAS([AM_PROG_LIBTOOL], [LT_INIT])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_PROG_LIBTOOL], [])
-dnl AC_DEFUN([AM_PROG_LIBTOOL], [])
-
-
-# _LT_CC_BASENAME(CC)
-# -------------------
-# Calculate cc_basename.  Skip known compiler wrappers and cross-prefix.
-m4_defun([_LT_CC_BASENAME],
-[for cc_temp in $1""; do
-  case $cc_temp in
-    compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;;
-    distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;;
-    \-*) ;;
-    *) break;;
-  esac
-done
-cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"`
-])
-
-
-# _LT_FILEUTILS_DEFAULTS
-# ----------------------
-# It is okay to use these file commands and assume they have been set
-# sensibly after `m4_require([_LT_FILEUTILS_DEFAULTS])'.
-m4_defun([_LT_FILEUTILS_DEFAULTS],
-[: ${CP="cp -f"}
-: ${MV="mv -f"}
-: ${RM="rm -f"}
-])# _LT_FILEUTILS_DEFAULTS
-
-
-# _LT_SETUP
-# ---------
-m4_defun([_LT_SETUP],
-[AC_REQUIRE([AC_CANONICAL_HOST])dnl
-AC_REQUIRE([AC_CANONICAL_BUILD])dnl
-AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl
-AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl
-
-_LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl
-dnl
-_LT_DECL([], [host_alias], [0], [The host system])dnl
-_LT_DECL([], [host], [0])dnl
-_LT_DECL([], [host_os], [0])dnl
-dnl
-_LT_DECL([], [build_alias], [0], [The build system])dnl
-_LT_DECL([], [build], [0])dnl
-_LT_DECL([], [build_os], [0])dnl
-dnl
-AC_REQUIRE([AC_PROG_CC])dnl
-AC_REQUIRE([LT_PATH_LD])dnl
-AC_REQUIRE([LT_PATH_NM])dnl
-dnl
-AC_REQUIRE([AC_PROG_LN_S])dnl
-test -z "$LN_S" && LN_S="ln -s"
-_LT_DECL([], [LN_S], [1], [Whether we need soft or hard links])dnl
-dnl
-AC_REQUIRE([LT_CMD_MAX_LEN])dnl
-_LT_DECL([objext], [ac_objext], [0], [Object file suffix (normally "o")])dnl
-_LT_DECL([], [exeext], [0], [Executable file suffix (normally "")])dnl
-dnl
-m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-m4_require([_LT_CHECK_SHELL_FEATURES])dnl
-m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl
-m4_require([_LT_CMD_RELOAD])dnl
-m4_require([_LT_CHECK_MAGIC_METHOD])dnl
-m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl
-m4_require([_LT_CMD_OLD_ARCHIVE])dnl
-m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl
-m4_require([_LT_WITH_SYSROOT])dnl
-
-_LT_CONFIG_LIBTOOL_INIT([
-# See if we are running on zsh, and set the options which allow our
-# commands through without removal of \ escapes INIT.
-if test -n "\${ZSH_VERSION+set}" ; then
-   setopt NO_GLOB_SUBST
-fi
-])
-if test -n "${ZSH_VERSION+set}" ; then
-   setopt NO_GLOB_SUBST
-fi
-
-_LT_CHECK_OBJDIR
-
-m4_require([_LT_TAG_COMPILER])dnl
-
-case $host_os in
-aix3*)
-  # AIX sometimes has problems with the GCC collect2 program.  For some
-  # reason, if we set the COLLECT_NAMES environment variable, the problems
-  # vanish in a puff of smoke.
-  if test "X${COLLECT_NAMES+set}" != Xset; then
-    COLLECT_NAMES=
-    export COLLECT_NAMES
-  fi
-  ;;
-esac
-
-# Global variables:
-ofile=libtool
-can_build_shared=yes
-
-# All known linkers require a `.a' archive for static linking (except MSVC,
-# which needs '.lib').
-libext=a
-
-with_gnu_ld="$lt_cv_prog_gnu_ld"
-
-old_CC="$CC"
-old_CFLAGS="$CFLAGS"
-
-# Set sane defaults for various variables
-test -z "$CC" && CC=cc
-test -z "$LTCC" && LTCC=$CC
-test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS
-test -z "$LD" && LD=ld
-test -z "$ac_objext" && ac_objext=o
-
-_LT_CC_BASENAME([$compiler])
-
-# Only perform the check for file, if the check method requires it
-test -z "$MAGIC_CMD" && MAGIC_CMD=file
-case $deplibs_check_method in
-file_magic*)
-  if test "$file_magic_cmd" = '$MAGIC_CMD'; then
-    _LT_PATH_MAGIC
-  fi
-  ;;
-esac
-
-# Use C for the default configuration in the libtool script
-LT_SUPPORTED_TAG([CC])
-_LT_LANG_C_CONFIG
-_LT_LANG_DEFAULT_CONFIG
-_LT_CONFIG_COMMANDS
-])# _LT_SETUP
-
-
-# _LT_PREPARE_SED_QUOTE_VARS
-# --------------------------
-# Define a few sed substitution that help us do robust quoting.
-m4_defun([_LT_PREPARE_SED_QUOTE_VARS],
-[# Backslashify metacharacters that are still active within
-# double-quoted strings.
-sed_quote_subst='s/\([["`$\\]]\)/\\\1/g'
-
-# Same as above, but do not quote variable references.
-double_quote_subst='s/\([["`\\]]\)/\\\1/g'
-
-# Sed substitution to delay expansion of an escaped shell variable in a
-# double_quote_subst'ed string.
-delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g'
-
-# Sed substitution to delay expansion of an escaped single quote.
-delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g'
-
-# Sed substitution to avoid accidental globbing in evaled expressions
-no_glob_subst='s/\*/\\\*/g'
-])
-
-# _LT_PROG_LTMAIN
-# ---------------
-# Note that this code is called both from `configure', and `config.status'
-# now that we use AC_CONFIG_COMMANDS to generate libtool.  Notably,
-# `config.status' has no value for ac_aux_dir unless we are using Automake,
-# so we pass a copy along to make sure it has a sensible value anyway.
-m4_defun([_LT_PROG_LTMAIN],
-[m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([ltmain.sh])])dnl
-_LT_CONFIG_LIBTOOL_INIT([ac_aux_dir='$ac_aux_dir'])
-ltmain="$ac_aux_dir/ltmain.sh"
-])# _LT_PROG_LTMAIN
-
-
-## ------------------------------------- ##
-## Accumulate code for creating libtool. ##
-## ------------------------------------- ##
-
-# So that we can recreate a full libtool script including additional
-# tags, we accumulate the chunks of code to send to AC_CONFIG_COMMANDS
-# in macros and then make a single call at the end using the `libtool'
-# label.
-
-
-# _LT_CONFIG_LIBTOOL_INIT([INIT-COMMANDS])
-# ----------------------------------------
-# Register INIT-COMMANDS to be passed to AC_CONFIG_COMMANDS later.
-m4_define([_LT_CONFIG_LIBTOOL_INIT],
-[m4_ifval([$1],
-          [m4_append([_LT_OUTPUT_LIBTOOL_INIT],
-                     [$1
-])])])
-
-# Initialize.
-m4_define([_LT_OUTPUT_LIBTOOL_INIT])
-
-
-# _LT_CONFIG_LIBTOOL([COMMANDS])
-# ------------------------------
-# Register COMMANDS to be passed to AC_CONFIG_COMMANDS later.
-m4_define([_LT_CONFIG_LIBTOOL],
-[m4_ifval([$1],
-          [m4_append([_LT_OUTPUT_LIBTOOL_COMMANDS],
-                     [$1
-])])])
-
-# Initialize.
-m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS])
-
-
-# _LT_CONFIG_SAVE_COMMANDS([COMMANDS], [INIT_COMMANDS])
-# -----------------------------------------------------
-m4_defun([_LT_CONFIG_SAVE_COMMANDS],
-[_LT_CONFIG_LIBTOOL([$1])
-_LT_CONFIG_LIBTOOL_INIT([$2])
-])
-
-
-# _LT_FORMAT_COMMENT([COMMENT])
-# -----------------------------
-# Add leading comment marks to the start of each line, and a trailing
-# full-stop to the whole comment if one is not present already.
-m4_define([_LT_FORMAT_COMMENT],
-[m4_ifval([$1], [
-m4_bpatsubst([m4_bpatsubst([$1], [^ *], [# ])],
-              [['`$\]], [\\\&])]m4_bmatch([$1], [[!?.]$], [], [.])
-)])
-
-
-
-## ------------------------ ##
-## FIXME: Eliminate VARNAME ##
-## ------------------------ ##
-
-
-# _LT_DECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION], [IS-TAGGED?])
-# -------------------------------------------------------------------
-# CONFIGNAME is the name given to the value in the libtool script.
-# VARNAME is the (base) name used in the configure script.
-# VALUE may be 0, 1 or 2 for a computed quote escaped value based on
-# VARNAME.  Any other value will be used directly.
-m4_define([_LT_DECL],
-[lt_if_append_uniq([lt_decl_varnames], [$2], [, ],
-    [lt_dict_add_subkey([lt_decl_dict], [$2], [libtool_name],
-	[m4_ifval([$1], [$1], [$2])])
-    lt_dict_add_subkey([lt_decl_dict], [$2], [value], [$3])
-    m4_ifval([$4],
-	[lt_dict_add_subkey([lt_decl_dict], [$2], [description], [$4])])
-    lt_dict_add_subkey([lt_decl_dict], [$2],
-	[tagged?], [m4_ifval([$5], [yes], [no])])])
-])
-
-
-# _LT_TAGDECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION])
-# --------------------------------------------------------
-m4_define([_LT_TAGDECL], [_LT_DECL([$1], [$2], [$3], [$4], [yes])])
-
-
-# lt_decl_tag_varnames([SEPARATOR], [VARNAME1...])
-# ------------------------------------------------
-m4_define([lt_decl_tag_varnames],
-[_lt_decl_filter([tagged?], [yes], $@)])
-
-
-# _lt_decl_filter(SUBKEY, VALUE, [SEPARATOR], [VARNAME1..])
-# ---------------------------------------------------------
-m4_define([_lt_decl_filter],
-[m4_case([$#],
-  [0], [m4_fatal([$0: too few arguments: $#])],
-  [1], [m4_fatal([$0: too few arguments: $#: $1])],
-  [2], [lt_dict_filter([lt_decl_dict], [$1], [$2], [], lt_decl_varnames)],
-  [3], [lt_dict_filter([lt_decl_dict], [$1], [$2], [$3], lt_decl_varnames)],
-  [lt_dict_filter([lt_decl_dict], $@)])[]dnl
-])
-
-
-# lt_decl_quote_varnames([SEPARATOR], [VARNAME1...])
-# --------------------------------------------------
-m4_define([lt_decl_quote_varnames],
-[_lt_decl_filter([value], [1], $@)])
-
-
-# lt_decl_dquote_varnames([SEPARATOR], [VARNAME1...])
-# ---------------------------------------------------
-m4_define([lt_decl_dquote_varnames],
-[_lt_decl_filter([value], [2], $@)])
-
-
-# lt_decl_varnames_tagged([SEPARATOR], [VARNAME1...])
-# ---------------------------------------------------
-m4_define([lt_decl_varnames_tagged],
-[m4_assert([$# <= 2])dnl
-_$0(m4_quote(m4_default([$1], [[, ]])),
-    m4_ifval([$2], [[$2]], [m4_dquote(lt_decl_tag_varnames)]),
-    m4_split(m4_normalize(m4_quote(_LT_TAGS)), [ ]))])
-m4_define([_lt_decl_varnames_tagged],
-[m4_ifval([$3], [lt_combine([$1], [$2], [_], $3)])])
-
-
-# lt_decl_all_varnames([SEPARATOR], [VARNAME1...])
-# ------------------------------------------------
-m4_define([lt_decl_all_varnames],
-[_$0(m4_quote(m4_default([$1], [[, ]])),
-     m4_if([$2], [],
-	   m4_quote(lt_decl_varnames),
-	m4_quote(m4_shift($@))))[]dnl
-])
-m4_define([_lt_decl_all_varnames],
-[lt_join($@, lt_decl_varnames_tagged([$1],
-			lt_decl_tag_varnames([[, ]], m4_shift($@))))dnl
-])
-
-
-# _LT_CONFIG_STATUS_DECLARE([VARNAME])
-# ------------------------------------
-# Quote a variable value, and forward it to `config.status' so that its
-# declaration there will have the same value as in `configure'.  VARNAME
-# must have a single quote delimited value for this to work.
-m4_define([_LT_CONFIG_STATUS_DECLARE],
-[$1='`$ECHO "$][$1" | $SED "$delay_single_quote_subst"`'])
-
-
-# _LT_CONFIG_STATUS_DECLARATIONS
-# ------------------------------
-# We delimit libtool config variables with single quotes, so when
-# we write them to config.status, we have to be sure to quote all
-# embedded single quotes properly.  In configure, this macro expands
-# each variable declared with _LT_DECL (and _LT_TAGDECL) into:
-#
-#    <var>='`$ECHO "$<var>" | $SED "$delay_single_quote_subst"`'
-m4_defun([_LT_CONFIG_STATUS_DECLARATIONS],
-[m4_foreach([_lt_var], m4_quote(lt_decl_all_varnames),
-    [m4_n([_LT_CONFIG_STATUS_DECLARE(_lt_var)])])])
-
-
-# _LT_LIBTOOL_TAGS
-# ----------------
-# Output comment and list of tags supported by the script
-m4_defun([_LT_LIBTOOL_TAGS],
-[_LT_FORMAT_COMMENT([The names of the tagged configurations supported by this script])dnl
-available_tags="_LT_TAGS"dnl
-])
-
-
-# _LT_LIBTOOL_DECLARE(VARNAME, [TAG])
-# -----------------------------------
-# Extract the dictionary values for VARNAME (optionally with TAG) and
-# expand to a commented shell variable setting:
-#
-#    # Some comment about what VAR is for.
-#    visible_name=$lt_internal_name
-m4_define([_LT_LIBTOOL_DECLARE],
-[_LT_FORMAT_COMMENT(m4_quote(lt_dict_fetch([lt_decl_dict], [$1],
-					   [description])))[]dnl
-m4_pushdef([_libtool_name],
-    m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [libtool_name])))[]dnl
-m4_case(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [value])),
-    [0], [_libtool_name=[$]$1],
-    [1], [_libtool_name=$lt_[]$1],
-    [2], [_libtool_name=$lt_[]$1],
-    [_libtool_name=lt_dict_fetch([lt_decl_dict], [$1], [value])])[]dnl
-m4_ifval([$2], [_$2])[]m4_popdef([_libtool_name])[]dnl
-])
-
-
-# _LT_LIBTOOL_CONFIG_VARS
-# -----------------------
-# Produce commented declarations of non-tagged libtool config variables
-# suitable for insertion in the LIBTOOL CONFIG section of the `libtool'
-# script.  Tagged libtool config variables (even for the LIBTOOL CONFIG
-# section) are produced by _LT_LIBTOOL_TAG_VARS.
-m4_defun([_LT_LIBTOOL_CONFIG_VARS],
-[m4_foreach([_lt_var],
-    m4_quote(_lt_decl_filter([tagged?], [no], [], lt_decl_varnames)),
-    [m4_n([_LT_LIBTOOL_DECLARE(_lt_var)])])])
-
-
-# _LT_LIBTOOL_TAG_VARS(TAG)
-# -------------------------
-m4_define([_LT_LIBTOOL_TAG_VARS],
-[m4_foreach([_lt_var], m4_quote(lt_decl_tag_varnames),
-    [m4_n([_LT_LIBTOOL_DECLARE(_lt_var, [$1])])])])
-
-
-# _LT_TAGVAR(VARNAME, [TAGNAME])
-# ------------------------------
-m4_define([_LT_TAGVAR], [m4_ifval([$2], [$1_$2], [$1])])
-
-
-# _LT_CONFIG_COMMANDS
-# -------------------
-# Send accumulated output to $CONFIG_STATUS.  Thanks to the lists of
-# variables for single and double quote escaping we saved from calls
-# to _LT_DECL, we can put quote escaped variables declarations
-# into `config.status', and then the shell code to quote escape them in
-# for loops in `config.status'.  Finally, any additional code accumulated
-# from calls to _LT_CONFIG_LIBTOOL_INIT is expanded.
-m4_defun([_LT_CONFIG_COMMANDS],
-[AC_PROVIDE_IFELSE([LT_OUTPUT],
-	dnl If the libtool generation code has been placed in $CONFIG_LT,
-	dnl instead of duplicating it all over again into config.status,
-	dnl then we will have config.status run $CONFIG_LT later, so it
-	dnl needs to know what name is stored there:
-        [AC_CONFIG_COMMANDS([libtool],
-            [$SHELL $CONFIG_LT || AS_EXIT(1)], [CONFIG_LT='$CONFIG_LT'])],
-    dnl If the libtool generation code is destined for config.status,
-    dnl expand the accumulated commands and init code now:
-    [AC_CONFIG_COMMANDS([libtool],
-        [_LT_OUTPUT_LIBTOOL_COMMANDS], [_LT_OUTPUT_LIBTOOL_COMMANDS_INIT])])
-])#_LT_CONFIG_COMMANDS
-
-
-# Initialize.
-m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS_INIT],
-[
-
-# The HP-UX ksh and POSIX shell print the target directory to stdout
-# if CDPATH is set.
-(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
-
-sed_quote_subst='$sed_quote_subst'
-double_quote_subst='$double_quote_subst'
-delay_variable_subst='$delay_variable_subst'
-_LT_CONFIG_STATUS_DECLARATIONS
-LTCC='$LTCC'
-LTCFLAGS='$LTCFLAGS'
-compiler='$compiler_DEFAULT'
-
-# A function that is used when there is no print builtin or printf.
-func_fallback_echo ()
-{
-  eval 'cat <<_LTECHO_EOF
-\$[]1
-_LTECHO_EOF'
-}
-
-# Quote evaled strings.
-for var in lt_decl_all_varnames([[ \
-]], lt_decl_quote_varnames); do
-    case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in
-    *[[\\\\\\\`\\"\\\$]]*)
-      eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\""
-      ;;
-    *)
-      eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\""
-      ;;
-    esac
-done
-
-# Double-quote double-evaled strings.
-for var in lt_decl_all_varnames([[ \
-]], lt_decl_dquote_varnames); do
-    case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in
-    *[[\\\\\\\`\\"\\\$]]*)
-      eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\""
-      ;;
-    *)
-      eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\""
-      ;;
-    esac
-done
-
-_LT_OUTPUT_LIBTOOL_INIT
-])
-
-# _LT_GENERATED_FILE_INIT(FILE, [COMMENT])
-# ------------------------------------
-# Generate a child script FILE with all initialization necessary to
-# reuse the environment learned by the parent script, and make the
-# file executable.  If COMMENT is supplied, it is inserted after the
-# `#!' sequence but before initialization text begins.  After this
-# macro, additional text can be appended to FILE to form the body of
-# the child script.  The macro ends with non-zero status if the
-# file could not be fully written (such as if the disk is full).
-m4_ifdef([AS_INIT_GENERATED],
-[m4_defun([_LT_GENERATED_FILE_INIT],[AS_INIT_GENERATED($@)])],
-[m4_defun([_LT_GENERATED_FILE_INIT],
-[m4_require([AS_PREPARE])]dnl
-[m4_pushdef([AS_MESSAGE_LOG_FD])]dnl
-[lt_write_fail=0
-cat >$1 <<_ASEOF || lt_write_fail=1
-#! $SHELL
-# Generated by $as_me.
-$2
-SHELL=\${CONFIG_SHELL-$SHELL}
-export SHELL
-_ASEOF
-cat >>$1 <<\_ASEOF || lt_write_fail=1
-AS_SHELL_SANITIZE
-_AS_PREPARE
-exec AS_MESSAGE_FD>&1
-_ASEOF
-test $lt_write_fail = 0 && chmod +x $1[]dnl
-m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT
-
-# LT_OUTPUT
-# ---------
-# This macro allows early generation of the libtool script (before
-# AC_OUTPUT is called), incase it is used in configure for compilation
-# tests.
-AC_DEFUN([LT_OUTPUT],
-[: ${CONFIG_LT=./config.lt}
-AC_MSG_NOTICE([creating $CONFIG_LT])
-_LT_GENERATED_FILE_INIT(["$CONFIG_LT"],
-[# Run this file to recreate a libtool stub with the current configuration.])
-
-cat >>"$CONFIG_LT" <<\_LTEOF
-lt_cl_silent=false
-exec AS_MESSAGE_LOG_FD>>config.log
-{
-  echo
-  AS_BOX([Running $as_me.])
-} >&AS_MESSAGE_LOG_FD
-
-lt_cl_help="\
-\`$as_me' creates a local libtool stub from the current configuration,
-for use in further configure time tests before the real libtool is
-generated.
-
-Usage: $[0] [[OPTIONS]]
-
-  -h, --help      print this help, then exit
-  -V, --version   print version number, then exit
-  -q, --quiet     do not print progress messages
-  -d, --debug     don't remove temporary files
-
-Report bugs to <bug-libtool@gnu.org>."
-
-lt_cl_version="\
-m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl
-m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION])
-configured by $[0], generated by m4_PACKAGE_STRING.
-
-Copyright (C) 2011 Free Software Foundation, Inc.
-This config.lt script is free software; the Free Software Foundation
-gives unlimited permision to copy, distribute and modify it."
-
-while test $[#] != 0
-do
-  case $[1] in
-    --version | --v* | -V )
-      echo "$lt_cl_version"; exit 0 ;;
-    --help | --h* | -h )
-      echo "$lt_cl_help"; exit 0 ;;
-    --debug | --d* | -d )
-      debug=: ;;
-    --quiet | --q* | --silent | --s* | -q )
-      lt_cl_silent=: ;;
-
-    -*) AC_MSG_ERROR([unrecognized option: $[1]
-Try \`$[0] --help' for more information.]) ;;
-
-    *) AC_MSG_ERROR([unrecognized argument: $[1]
-Try \`$[0] --help' for more information.]) ;;
-  esac
-  shift
-done
-
-if $lt_cl_silent; then
-  exec AS_MESSAGE_FD>/dev/null
-fi
-_LTEOF
-
-cat >>"$CONFIG_LT" <<_LTEOF
-_LT_OUTPUT_LIBTOOL_COMMANDS_INIT
-_LTEOF
-
-cat >>"$CONFIG_LT" <<\_LTEOF
-AC_MSG_NOTICE([creating $ofile])
-_LT_OUTPUT_LIBTOOL_COMMANDS
-AS_EXIT(0)
-_LTEOF
-chmod +x "$CONFIG_LT"
-
-# configure is writing to config.log, but config.lt does its own redirection,
-# appending to config.log, which fails on DOS, as config.log is still kept
-# open by configure.  Here we exec the FD to /dev/null, effectively closing
-# config.log, so it can be properly (re)opened and appended to by config.lt.
-lt_cl_success=:
-test "$silent" = yes &&
-  lt_config_lt_args="$lt_config_lt_args --quiet"
-exec AS_MESSAGE_LOG_FD>/dev/null
-$SHELL "$CONFIG_LT" $lt_config_lt_args || lt_cl_success=false
-exec AS_MESSAGE_LOG_FD>>config.log
-$lt_cl_success || AS_EXIT(1)
-])# LT_OUTPUT
-
-
-# _LT_CONFIG(TAG)
-# ---------------
-# If TAG is the built-in tag, create an initial libtool script with a
-# default configuration from the untagged config vars.  Otherwise add code
-# to config.status for appending the configuration named by TAG from the
-# matching tagged config vars.
-m4_defun([_LT_CONFIG],
-[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-_LT_CONFIG_SAVE_COMMANDS([
-  m4_define([_LT_TAG], m4_if([$1], [], [C], [$1]))dnl
-  m4_if(_LT_TAG, [C], [
-    # See if we are running on zsh, and set the options which allow our
-    # commands through without removal of \ escapes.
-    if test -n "${ZSH_VERSION+set}" ; then
-      setopt NO_GLOB_SUBST
-    fi
-
-    cfgfile="${ofile}T"
-    trap "$RM \"$cfgfile\"; exit 1" 1 2 15
-    $RM "$cfgfile"
-
-    cat <<_LT_EOF >> "$cfgfile"
-#! $SHELL
-
-# `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services.
-# Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION
-# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
-# NOTE: Changes made to this file will be lost: look at ltmain.sh.
-#
-_LT_COPYING
-_LT_LIBTOOL_TAGS
-
-# ### BEGIN LIBTOOL CONFIG
-_LT_LIBTOOL_CONFIG_VARS
-_LT_LIBTOOL_TAG_VARS
-# ### END LIBTOOL CONFIG
-
-_LT_EOF
-
-  case $host_os in
-  aix3*)
-    cat <<\_LT_EOF >> "$cfgfile"
-# AIX sometimes has problems with the GCC collect2 program.  For some
-# reason, if we set the COLLECT_NAMES environment variable, the problems
-# vanish in a puff of smoke.
-if test "X${COLLECT_NAMES+set}" != Xset; then
-  COLLECT_NAMES=
-  export COLLECT_NAMES
-fi
-_LT_EOF
-    ;;
-  esac
-
-  _LT_PROG_LTMAIN
-
-  # We use sed instead of cat because bash on DJGPP gets confused if
-  # if finds mixed CR/LF and LF-only lines.  Since sed operates in
-  # text mode, it properly converts lines to CR/LF.  This bash problem
-  # is reportedly fixed, but why not run on old versions too?
-  sed '$q' "$ltmain" >> "$cfgfile" \
-     || (rm -f "$cfgfile"; exit 1)
-
-  _LT_PROG_REPLACE_SHELLFNS
-
-   mv -f "$cfgfile" "$ofile" ||
-    (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile")
-  chmod +x "$ofile"
-],
-[cat <<_LT_EOF >> "$ofile"
-
-dnl Unfortunately we have to use $1 here, since _LT_TAG is not expanded
-dnl in a comment (ie after a #).
-# ### BEGIN LIBTOOL TAG CONFIG: $1
-_LT_LIBTOOL_TAG_VARS(_LT_TAG)
-# ### END LIBTOOL TAG CONFIG: $1
-_LT_EOF
-])dnl /m4_if
-],
-[m4_if([$1], [], [
-    PACKAGE='$PACKAGE'
-    VERSION='$VERSION'
-    TIMESTAMP='$TIMESTAMP'
-    RM='$RM'
-    ofile='$ofile'], [])
-])dnl /_LT_CONFIG_SAVE_COMMANDS
-])# _LT_CONFIG
-
-
-# LT_SUPPORTED_TAG(TAG)
-# ---------------------
-# Trace this macro to discover what tags are supported by the libtool
-# --tag option, using:
-#    autoconf --trace 'LT_SUPPORTED_TAG:$1'
-AC_DEFUN([LT_SUPPORTED_TAG], [])
-
-
-# C support is built-in for now
-m4_define([_LT_LANG_C_enabled], [])
-m4_define([_LT_TAGS], [])
-
-
-# LT_LANG(LANG)
-# -------------
-# Enable libtool support for the given language if not already enabled.
-AC_DEFUN([LT_LANG],
-[AC_BEFORE([$0], [LT_OUTPUT])dnl
-m4_case([$1],
-  [C],			[_LT_LANG(C)],
-  [C++],		[_LT_LANG(CXX)],
-  [Go],			[_LT_LANG(GO)],
-  [Java],		[_LT_LANG(GCJ)],
-  [Fortran 77],		[_LT_LANG(F77)],
-  [Fortran],		[_LT_LANG(FC)],
-  [Windows Resource],	[_LT_LANG(RC)],
-  [m4_ifdef([_LT_LANG_]$1[_CONFIG],
-    [_LT_LANG($1)],
-    [m4_fatal([$0: unsupported language: "$1"])])])dnl
-])# LT_LANG
-
-
-# _LT_LANG(LANGNAME)
-# ------------------
-m4_defun([_LT_LANG],
-[m4_ifdef([_LT_LANG_]$1[_enabled], [],
-  [LT_SUPPORTED_TAG([$1])dnl
-  m4_append([_LT_TAGS], [$1 ])dnl
-  m4_define([_LT_LANG_]$1[_enabled], [])dnl
-  _LT_LANG_$1_CONFIG($1)])dnl
-])# _LT_LANG
-
-
-m4_ifndef([AC_PROG_GO], [
-############################################################
-# NOTE: This macro has been submitted for inclusion into   #
-#  GNU Autoconf as AC_PROG_GO.  When it is available in    #
-#  a released version of Autoconf we should remove this    #
-#  macro and use it instead.                               #
-############################################################
-m4_defun([AC_PROG_GO],
-[AC_LANG_PUSH(Go)dnl
-AC_ARG_VAR([GOC],     [Go compiler command])dnl
-AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl
-_AC_ARG_VAR_LDFLAGS()dnl
-AC_CHECK_TOOL(GOC, gccgo)
-if test -z "$GOC"; then
-  if test -n "$ac_tool_prefix"; then
-    AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo])
-  fi
-fi
-if test -z "$GOC"; then
-  AC_CHECK_PROG(GOC, gccgo, gccgo, false)
-fi
-])#m4_defun
-])#m4_ifndef
-
-
-# _LT_LANG_DEFAULT_CONFIG
-# -----------------------
-m4_defun([_LT_LANG_DEFAULT_CONFIG],
-[AC_PROVIDE_IFELSE([AC_PROG_CXX],
-  [LT_LANG(CXX)],
-  [m4_define([AC_PROG_CXX], defn([AC_PROG_CXX])[LT_LANG(CXX)])])
-
-AC_PROVIDE_IFELSE([AC_PROG_F77],
-  [LT_LANG(F77)],
-  [m4_define([AC_PROG_F77], defn([AC_PROG_F77])[LT_LANG(F77)])])
-
-AC_PROVIDE_IFELSE([AC_PROG_FC],
-  [LT_LANG(FC)],
-  [m4_define([AC_PROG_FC], defn([AC_PROG_FC])[LT_LANG(FC)])])
-
-dnl The call to [A][M_PROG_GCJ] is quoted like that to stop aclocal
-dnl pulling things in needlessly.
-AC_PROVIDE_IFELSE([AC_PROG_GCJ],
-  [LT_LANG(GCJ)],
-  [AC_PROVIDE_IFELSE([A][M_PROG_GCJ],
-    [LT_LANG(GCJ)],
-    [AC_PROVIDE_IFELSE([LT_PROG_GCJ],
-      [LT_LANG(GCJ)],
-      [m4_ifdef([AC_PROG_GCJ],
-	[m4_define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[LT_LANG(GCJ)])])
-       m4_ifdef([A][M_PROG_GCJ],
-	[m4_define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[LT_LANG(GCJ)])])
-       m4_ifdef([LT_PROG_GCJ],
-	[m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])])
-
-AC_PROVIDE_IFELSE([AC_PROG_GO],
-  [LT_LANG(GO)],
-  [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])])
-
-AC_PROVIDE_IFELSE([LT_PROG_RC],
-  [LT_LANG(RC)],
-  [m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])])
-])# _LT_LANG_DEFAULT_CONFIG
-
-# Obsolete macros:
-AU_DEFUN([AC_LIBTOOL_CXX], [LT_LANG(C++)])
-AU_DEFUN([AC_LIBTOOL_F77], [LT_LANG(Fortran 77)])
-AU_DEFUN([AC_LIBTOOL_FC], [LT_LANG(Fortran)])
-AU_DEFUN([AC_LIBTOOL_GCJ], [LT_LANG(Java)])
-AU_DEFUN([AC_LIBTOOL_RC], [LT_LANG(Windows Resource)])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_LIBTOOL_CXX], [])
-dnl AC_DEFUN([AC_LIBTOOL_F77], [])
-dnl AC_DEFUN([AC_LIBTOOL_FC], [])
-dnl AC_DEFUN([AC_LIBTOOL_GCJ], [])
-dnl AC_DEFUN([AC_LIBTOOL_RC], [])
-
-
-# _LT_TAG_COMPILER
-# ----------------
-m4_defun([_LT_TAG_COMPILER],
-[AC_REQUIRE([AC_PROG_CC])dnl
-
-_LT_DECL([LTCC], [CC], [1], [A C compiler])dnl
-_LT_DECL([LTCFLAGS], [CFLAGS], [1], [LTCC compiler flags])dnl
-_LT_TAGDECL([CC], [compiler], [1], [A language specific compiler])dnl
-_LT_TAGDECL([with_gcc], [GCC], [0], [Is the compiler the GNU compiler?])dnl
-
-# If no C compiler was specified, use CC.
-LTCC=${LTCC-"$CC"}
-
-# If no C compiler flags were specified, use CFLAGS.
-LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
-
-# Allow CC to be a program name with arguments.
-compiler=$CC
-])# _LT_TAG_COMPILER
-
-
-# _LT_COMPILER_BOILERPLATE
-# ------------------------
-# Check for compiler boilerplate output or warnings with
-# the simple compiler test code.
-m4_defun([_LT_COMPILER_BOILERPLATE],
-[m4_require([_LT_DECL_SED])dnl
-ac_outfile=conftest.$ac_objext
-echo "$lt_simple_compile_test_code" >conftest.$ac_ext
-eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
-_lt_compiler_boilerplate=`cat conftest.err`
-$RM conftest*
-])# _LT_COMPILER_BOILERPLATE
-
-
-# _LT_LINKER_BOILERPLATE
-# ----------------------
-# Check for linker boilerplate output or warnings with
-# the simple link test code.
-m4_defun([_LT_LINKER_BOILERPLATE],
-[m4_require([_LT_DECL_SED])dnl
-ac_outfile=conftest.$ac_objext
-echo "$lt_simple_link_test_code" >conftest.$ac_ext
-eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
-_lt_linker_boilerplate=`cat conftest.err`
-$RM -r conftest*
-])# _LT_LINKER_BOILERPLATE
-
-# _LT_REQUIRED_DARWIN_CHECKS
-# -------------------------
-m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[
-  case $host_os in
-    rhapsody* | darwin*)
-    AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:])
-    AC_CHECK_TOOL([NMEDIT], [nmedit], [:])
-    AC_CHECK_TOOL([LIPO], [lipo], [:])
-    AC_CHECK_TOOL([OTOOL], [otool], [:])
-    AC_CHECK_TOOL([OTOOL64], [otool64], [:])
-    _LT_DECL([], [DSYMUTIL], [1],
-      [Tool to manipulate archived DWARF debug symbol files on Mac OS X])
-    _LT_DECL([], [NMEDIT], [1],
-      [Tool to change global to local symbols on Mac OS X])
-    _LT_DECL([], [LIPO], [1],
-      [Tool to manipulate fat objects and archives on Mac OS X])
-    _LT_DECL([], [OTOOL], [1],
-      [ldd/readelf like tool for Mach-O binaries on Mac OS X])
-    _LT_DECL([], [OTOOL64], [1],
-      [ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4])
-
-    AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod],
-      [lt_cv_apple_cc_single_mod=no
-      if test -z "${LT_MULTI_MODULE}"; then
-	# By default we will add the -single_module flag. You can override
-	# by either setting the environment variable LT_MULTI_MODULE
-	# non-empty at configure time, or by adding -multi_module to the
-	# link flags.
-	rm -rf libconftest.dylib*
-	echo "int foo(void){return 1;}" > conftest.c
-	echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
--dynamiclib -Wl,-single_module conftest.c" >&AS_MESSAGE_LOG_FD
-	$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
-	  -dynamiclib -Wl,-single_module conftest.c 2>conftest.err
-        _lt_result=$?
-	# If there is a non-empty error log, and "single_module"
-	# appears in it, assume the flag caused a linker warning
-        if test -s conftest.err && $GREP single_module conftest.err; then
-	  cat conftest.err >&AS_MESSAGE_LOG_FD
-	# Otherwise, if the output was created with a 0 exit code from
-	# the compiler, it worked.
-	elif test -f libconftest.dylib && test $_lt_result -eq 0; then
-	  lt_cv_apple_cc_single_mod=yes
-	else
-	  cat conftest.err >&AS_MESSAGE_LOG_FD
-	fi
-	rm -rf libconftest.dylib*
-	rm -f conftest.*
-      fi])
-
-    AC_CACHE_CHECK([for -exported_symbols_list linker flag],
-      [lt_cv_ld_exported_symbols_list],
-      [lt_cv_ld_exported_symbols_list=no
-      save_LDFLAGS=$LDFLAGS
-      echo "_main" > conftest.sym
-      LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym"
-      AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])],
-	[lt_cv_ld_exported_symbols_list=yes],
-	[lt_cv_ld_exported_symbols_list=no])
-	LDFLAGS="$save_LDFLAGS"
-    ])
-
-    AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load],
-      [lt_cv_ld_force_load=no
-      cat > conftest.c << _LT_EOF
-int forced_loaded() { return 2;}
-_LT_EOF
-      echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD
-      $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD
-      echo "$AR cru libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD
-      $AR cru libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD
-      echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD
-      $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD
-      cat > conftest.c << _LT_EOF
-int main() { return 0;}
-_LT_EOF
-      echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD
-      $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err
-      _lt_result=$?
-      if test -s conftest.err && $GREP force_load conftest.err; then
-	cat conftest.err >&AS_MESSAGE_LOG_FD
-      elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then
-	lt_cv_ld_force_load=yes
-      else
-	cat conftest.err >&AS_MESSAGE_LOG_FD
-      fi
-        rm -f conftest.err libconftest.a conftest conftest.c
-        rm -rf conftest.dSYM
-    ])
-    case $host_os in
-    rhapsody* | darwin1.[[012]])
-      _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;;
-    darwin1.*)
-      _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;;
-    darwin*) # darwin 5.x on
-      # if running on 10.5 or later, the deployment target defaults
-      # to the OS version, if on x86, and 10.4, the deployment
-      # target defaults to 10.4. Don't you love it?
-      case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in
-	10.0,*86*-darwin8*|10.0,*-darwin[[91]]*)
-	  _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;;
-	10.[[012]]*)
-	  _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;;
-	10.*)
-	  _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;;
-      esac
-    ;;
-  esac
-    if test "$lt_cv_apple_cc_single_mod" = "yes"; then
-      _lt_dar_single_mod='$single_module'
-    fi
-    if test "$lt_cv_ld_exported_symbols_list" = "yes"; then
-      _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym'
-    else
-      _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}'
-    fi
-    if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then
-      _lt_dsymutil='~$DSYMUTIL $lib || :'
-    else
-      _lt_dsymutil=
-    fi
-    ;;
-  esac
-])
-
-
-# _LT_DARWIN_LINKER_FEATURES([TAG])
-# ---------------------------------
-# Checks for linker and compiler features on darwin
-m4_defun([_LT_DARWIN_LINKER_FEATURES],
-[
-  m4_require([_LT_REQUIRED_DARWIN_CHECKS])
-  _LT_TAGVAR(archive_cmds_need_lc, $1)=no
-  _LT_TAGVAR(hardcode_direct, $1)=no
-  _LT_TAGVAR(hardcode_automatic, $1)=yes
-  _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
-  if test "$lt_cv_ld_force_load" = "yes"; then
-    _LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
-    m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes],
-                  [FC],  [_LT_TAGVAR(compiler_needs_object, $1)=yes])
-  else
-    _LT_TAGVAR(whole_archive_flag_spec, $1)=''
-  fi
-  _LT_TAGVAR(link_all_deplibs, $1)=yes
-  _LT_TAGVAR(allow_undefined_flag, $1)="$_lt_dar_allow_undefined"
-  case $cc_basename in
-     ifort*) _lt_dar_can_shared=yes ;;
-     *) _lt_dar_can_shared=$GCC ;;
-  esac
-  if test "$_lt_dar_can_shared" = "yes"; then
-    output_verbose_link_cmd=func_echo_all
-    _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}"
-    _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}"
-    _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}"
-    _LT_TAGVAR(module_expsym_cmds, $1)="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}"
-    m4_if([$1], [CXX],
-[   if test "$lt_cv_apple_cc_single_mod" != "yes"; then
-      _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dsymutil}"
-      _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dar_export_syms}${_lt_dsymutil}"
-    fi
-],[])
-  else
-  _LT_TAGVAR(ld_shlibs, $1)=no
-  fi
-])
-
-# _LT_SYS_MODULE_PATH_AIX([TAGNAME])
-# ----------------------------------
-# Links a minimal program and checks the executable
-# for the system default hardcoded library path. In most cases,
-# this is /usr/lib:/lib, but when the MPI compilers are used
-# the location of the communication and MPI libs are included too.
-# If we don't find anything, use the default library path according
-# to the aix ld manual.
-# Store the results from the different compilers for each TAGNAME.
-# Allow to override them for all tags through lt_cv_aix_libpath.
-m4_defun([_LT_SYS_MODULE_PATH_AIX],
-[m4_require([_LT_DECL_SED])dnl
-if test "${lt_cv_aix_libpath+set}" = set; then
-  aix_libpath=$lt_cv_aix_libpath
-else
-  AC_CACHE_VAL([_LT_TAGVAR([lt_cv_aix_libpath_], [$1])],
-  [AC_LINK_IFELSE([AC_LANG_PROGRAM],[
-  lt_aix_libpath_sed='[
-      /Import File Strings/,/^$/ {
-	  /^0/ {
-	      s/^0  *\([^ ]*\) *$/\1/
-	      p
-	  }
-      }]'
-  _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
-  # Check for a 64-bit object if we didn't find anything.
-  if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then
-    _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
-  fi],[])
-  if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then
-    _LT_TAGVAR([lt_cv_aix_libpath_], [$1])="/usr/lib:/lib"
-  fi
-  ])
-  aix_libpath=$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])
-fi
-])# _LT_SYS_MODULE_PATH_AIX
-
-
-# _LT_SHELL_INIT(ARG)
-# -------------------
-m4_define([_LT_SHELL_INIT],
-[m4_divert_text([M4SH-INIT], [$1
-])])# _LT_SHELL_INIT
-
-
-
-# _LT_PROG_ECHO_BACKSLASH
-# -----------------------
-# Find how we can fake an echo command that does not interpret backslash.
-# In particular, with Autoconf 2.60 or later we add some code to the start
-# of the generated configure script which will find a shell with a builtin
-# printf (which we can use as an echo command).
-m4_defun([_LT_PROG_ECHO_BACKSLASH],
-[ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
-ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO
-ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO
-
-AC_MSG_CHECKING([how to print strings])
-# Test print first, because it will be a builtin if present.
-if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \
-   test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then
-  ECHO='print -r --'
-elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then
-  ECHO='printf %s\n'
-else
-  # Use this function as a fallback that always works.
-  func_fallback_echo ()
-  {
-    eval 'cat <<_LTECHO_EOF
-$[]1
-_LTECHO_EOF'
-  }
-  ECHO='func_fallback_echo'
-fi
-
-# func_echo_all arg...
-# Invoke $ECHO with all args, space-separated.
-func_echo_all ()
-{
-    $ECHO "$*" 
-}
-
-case "$ECHO" in
-  printf*) AC_MSG_RESULT([printf]) ;;
-  print*) AC_MSG_RESULT([print -r]) ;;
-  *) AC_MSG_RESULT([cat]) ;;
-esac
-
-m4_ifdef([_AS_DETECT_SUGGESTED],
-[_AS_DETECT_SUGGESTED([
-  test -n "${ZSH_VERSION+set}${BASH_VERSION+set}" || (
-    ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
-    ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO
-    ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO
-    PATH=/empty FPATH=/empty; export PATH FPATH
-    test "X`printf %s $ECHO`" = "X$ECHO" \
-      || test "X`print -r -- $ECHO`" = "X$ECHO" )])])
-
-_LT_DECL([], [SHELL], [1], [Shell to use when invoking shell scripts])
-_LT_DECL([], [ECHO], [1], [An echo program that protects backslashes])
-])# _LT_PROG_ECHO_BACKSLASH
-
-
-# _LT_WITH_SYSROOT
-# ----------------
-AC_DEFUN([_LT_WITH_SYSROOT],
-[AC_MSG_CHECKING([for sysroot])
-AC_ARG_WITH([sysroot],
-[  --with-sysroot[=DIR] Search for dependent libraries within DIR
-                        (or the compiler's sysroot if not specified).],
-[], [with_sysroot=no])
-
-dnl lt_sysroot will always be passed unquoted.  We quote it here
-dnl in case the user passed a directory name.
-lt_sysroot=
-case ${with_sysroot} in #(
- yes)
-   if test "$GCC" = yes; then
-     lt_sysroot=`$CC --print-sysroot 2>/dev/null`
-   fi
-   ;; #(
- /*)
-   lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"`
-   ;; #(
- no|'')
-   ;; #(
- *)
-   AC_MSG_RESULT([${with_sysroot}])
-   AC_MSG_ERROR([The sysroot must be an absolute path.])
-   ;;
-esac
-
- AC_MSG_RESULT([${lt_sysroot:-no}])
-_LT_DECL([], [lt_sysroot], [0], [The root where to search for ]dnl
-[dependent libraries, and in which our libraries should be installed.])])
-
-# _LT_ENABLE_LOCK
-# ---------------
-m4_defun([_LT_ENABLE_LOCK],
-[AC_ARG_ENABLE([libtool-lock],
-  [AS_HELP_STRING([--disable-libtool-lock],
-    [avoid locking (might break parallel builds)])])
-test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes
-
-# Some flags need to be propagated to the compiler or linker for good
-# libtool support.
-case $host in
-ia64-*-hpux*)
-  # Find out which ABI we are using.
-  echo 'int i;' > conftest.$ac_ext
-  if AC_TRY_EVAL(ac_compile); then
-    case `/usr/bin/file conftest.$ac_objext` in
-      *ELF-32*)
-	HPUX_IA64_MODE="32"
-	;;
-      *ELF-64*)
-	HPUX_IA64_MODE="64"
-	;;
-    esac
-  fi
-  rm -rf conftest*
-  ;;
-*-*-irix6*)
-  # Find out which ABI we are using.
-  echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext
-  if AC_TRY_EVAL(ac_compile); then
-    if test "$lt_cv_prog_gnu_ld" = yes; then
-      case `/usr/bin/file conftest.$ac_objext` in
-	*32-bit*)
-	  LD="${LD-ld} -melf32bsmip"
-	  ;;
-	*N32*)
-	  LD="${LD-ld} -melf32bmipn32"
-	  ;;
-	*64-bit*)
-	  LD="${LD-ld} -melf64bmip"
-	;;
-      esac
-    else
-      case `/usr/bin/file conftest.$ac_objext` in
-	*32-bit*)
-	  LD="${LD-ld} -32"
-	  ;;
-	*N32*)
-	  LD="${LD-ld} -n32"
-	  ;;
-	*64-bit*)
-	  LD="${LD-ld} -64"
-	  ;;
-      esac
-    fi
-  fi
-  rm -rf conftest*
-  ;;
-
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
-s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
-  # Find out which ABI we are using.
-  echo 'int i;' > conftest.$ac_ext
-  if AC_TRY_EVAL(ac_compile); then
-    case `/usr/bin/file conftest.o` in
-      *32-bit*)
-	case $host in
-	  x86_64-*kfreebsd*-gnu)
-	    LD="${LD-ld} -m elf_i386_fbsd"
-	    ;;
-	  x86_64-*linux*)
-	    LD="${LD-ld} -m elf_i386"
-	    ;;
-	  ppc64-*linux*|powerpc64-*linux*)
-	    LD="${LD-ld} -m elf32ppclinux"
-	    ;;
-	  s390x-*linux*)
-	    LD="${LD-ld} -m elf_s390"
-	    ;;
-	  sparc64-*linux*)
-	    LD="${LD-ld} -m elf32_sparc"
-	    ;;
-	esac
-	;;
-      *64-bit*)
-	case $host in
-	  x86_64-*kfreebsd*-gnu)
-	    LD="${LD-ld} -m elf_x86_64_fbsd"
-	    ;;
-	  x86_64-*linux*)
-	    LD="${LD-ld} -m elf_x86_64"
-	    ;;
-	  ppc*-*linux*|powerpc*-*linux*)
-	    LD="${LD-ld} -m elf64ppc"
-	    ;;
-	  s390*-*linux*|s390*-*tpf*)
-	    LD="${LD-ld} -m elf64_s390"
-	    ;;
-	  sparc*-*linux*)
-	    LD="${LD-ld} -m elf64_sparc"
-	    ;;
-	esac
-	;;
-    esac
-  fi
-  rm -rf conftest*
-  ;;
-
-*-*-sco3.2v5*)
-  # On SCO OpenServer 5, we need -belf to get full-featured binaries.
-  SAVE_CFLAGS="$CFLAGS"
-  CFLAGS="$CFLAGS -belf"
-  AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf,
-    [AC_LANG_PUSH(C)
-     AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no])
-     AC_LANG_POP])
-  if test x"$lt_cv_cc_needs_belf" != x"yes"; then
-    # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf
-    CFLAGS="$SAVE_CFLAGS"
-  fi
-  ;;
-*-*solaris*)
-  # Find out which ABI we are using.
-  echo 'int i;' > conftest.$ac_ext
-  if AC_TRY_EVAL(ac_compile); then
-    case `/usr/bin/file conftest.o` in
-    *64-bit*)
-      case $lt_cv_prog_gnu_ld in
-      yes*)
-        case $host in
-        i?86-*-solaris*)
-          LD="${LD-ld} -m elf_x86_64"
-          ;;
-        sparc*-*-solaris*)
-          LD="${LD-ld} -m elf64_sparc"
-          ;;
-        esac
-        # GNU ld 2.21 introduced _sol2 emulations.  Use them if available.
-        if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then
-          LD="${LD-ld}_sol2"
-        fi
-        ;;
-      *)
-	if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then
-	  LD="${LD-ld} -64"
-	fi
-	;;
-      esac
-      ;;
-    esac
-  fi
-  rm -rf conftest*
-  ;;
-esac
-
-need_locks="$enable_libtool_lock"
-])# _LT_ENABLE_LOCK
-
-
-# _LT_PROG_AR
-# -----------
-m4_defun([_LT_PROG_AR],
-[AC_CHECK_TOOLS(AR, [ar], false)
-: ${AR=ar}
-: ${AR_FLAGS=cru}
-_LT_DECL([], [AR], [1], [The archiver])
-_LT_DECL([], [AR_FLAGS], [1], [Flags to create an archive])
-
-AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file],
-  [lt_cv_ar_at_file=no
-   AC_COMPILE_IFELSE([AC_LANG_PROGRAM],
-     [echo conftest.$ac_objext > conftest.lst
-      lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&AS_MESSAGE_LOG_FD'
-      AC_TRY_EVAL([lt_ar_try])
-      if test "$ac_status" -eq 0; then
-	# Ensure the archiver fails upon bogus file names.
-	rm -f conftest.$ac_objext libconftest.a
-	AC_TRY_EVAL([lt_ar_try])
-	if test "$ac_status" -ne 0; then
-          lt_cv_ar_at_file=@
-        fi
-      fi
-      rm -f conftest.* libconftest.a
-     ])
-  ])
-
-if test "x$lt_cv_ar_at_file" = xno; then
-  archiver_list_spec=
-else
-  archiver_list_spec=$lt_cv_ar_at_file
-fi
-_LT_DECL([], [archiver_list_spec], [1],
-  [How to feed a file listing to the archiver])
-])# _LT_PROG_AR
-
-
-# _LT_CMD_OLD_ARCHIVE
-# -------------------
-m4_defun([_LT_CMD_OLD_ARCHIVE],
-[_LT_PROG_AR
-
-AC_CHECK_TOOL(STRIP, strip, :)
-test -z "$STRIP" && STRIP=:
-_LT_DECL([], [STRIP], [1], [A symbol stripping program])
-
-AC_CHECK_TOOL(RANLIB, ranlib, :)
-test -z "$RANLIB" && RANLIB=:
-_LT_DECL([], [RANLIB], [1],
-    [Commands used to install an old-style archive])
-
-# Determine commands to create old-style static archives.
-old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs'
-old_postinstall_cmds='chmod 644 $oldlib'
-old_postuninstall_cmds=
-
-if test -n "$RANLIB"; then
-  case $host_os in
-  openbsd*)
-    old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib"
-    ;;
-  *)
-    old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib"
-    ;;
-  esac
-  old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib"
-fi
-
-case $host_os in
-  darwin*)
-    lock_old_archive_extraction=yes ;;
-  *)
-    lock_old_archive_extraction=no ;;
-esac
-_LT_DECL([], [old_postinstall_cmds], [2])
-_LT_DECL([], [old_postuninstall_cmds], [2])
-_LT_TAGDECL([], [old_archive_cmds], [2],
-    [Commands used to build an old-style archive])
-_LT_DECL([], [lock_old_archive_extraction], [0],
-    [Whether to use a lock for old archive extraction])
-])# _LT_CMD_OLD_ARCHIVE
-
-
-# _LT_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS,
-#		[OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE])
-# ----------------------------------------------------------------
-# Check whether the given compiler option works
-AC_DEFUN([_LT_COMPILER_OPTION],
-[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-m4_require([_LT_DECL_SED])dnl
-AC_CACHE_CHECK([$1], [$2],
-  [$2=no
-   m4_if([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4])
-   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
-   lt_compiler_flag="$3"
-   # Insert the option either (1) after the last *FLAGS variable, or
-   # (2) before a word containing "conftest.", or (3) at the end.
-   # Note that $ac_compile itself does not contain backslashes and begins
-   # with a dollar sign (not a hyphen), so the echo should work correctly.
-   # The option is referenced via a variable to avoid confusing sed.
-   lt_compile=`echo "$ac_compile" | $SED \
-   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-   -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \
-   -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD)
-   (eval "$lt_compile" 2>conftest.err)
-   ac_status=$?
-   cat conftest.err >&AS_MESSAGE_LOG_FD
-   echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
-   if (exit $ac_status) && test -s "$ac_outfile"; then
-     # The compiler can only warn and ignore the option if not recognized
-     # So say no if there are warnings other than the usual output.
-     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp
-     $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
-     if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then
-       $2=yes
-     fi
-   fi
-   $RM conftest*
-])
-
-if test x"[$]$2" = xyes; then
-    m4_if([$5], , :, [$5])
-else
-    m4_if([$6], , :, [$6])
-fi
-])# _LT_COMPILER_OPTION
-
-# Old name:
-AU_ALIAS([AC_LIBTOOL_COMPILER_OPTION], [_LT_COMPILER_OPTION])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], [])
-
-
-# _LT_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS,
-#                  [ACTION-SUCCESS], [ACTION-FAILURE])
-# ----------------------------------------------------
-# Check whether the given linker option works
-AC_DEFUN([_LT_LINKER_OPTION],
-[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-m4_require([_LT_DECL_SED])dnl
-AC_CACHE_CHECK([$1], [$2],
-  [$2=no
-   save_LDFLAGS="$LDFLAGS"
-   LDFLAGS="$LDFLAGS $3"
-   echo "$lt_simple_link_test_code" > conftest.$ac_ext
-   if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
-     # The linker can only warn and ignore the option if not recognized
-     # So say no if there are warnings
-     if test -s conftest.err; then
-       # Append any errors to the config.log.
-       cat conftest.err 1>&AS_MESSAGE_LOG_FD
-       $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp
-       $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
-       if diff conftest.exp conftest.er2 >/dev/null; then
-         $2=yes
-       fi
-     else
-       $2=yes
-     fi
-   fi
-   $RM -r conftest*
-   LDFLAGS="$save_LDFLAGS"
-])
-
-if test x"[$]$2" = xyes; then
-    m4_if([$4], , :, [$4])
-else
-    m4_if([$5], , :, [$5])
-fi
-])# _LT_LINKER_OPTION
-
-# Old name:
-AU_ALIAS([AC_LIBTOOL_LINKER_OPTION], [_LT_LINKER_OPTION])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], [])
-
-
-# LT_CMD_MAX_LEN
-#---------------
-AC_DEFUN([LT_CMD_MAX_LEN],
-[AC_REQUIRE([AC_CANONICAL_HOST])dnl
-# find the maximum length of command line arguments
-AC_MSG_CHECKING([the maximum length of command line arguments])
-AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl
-  i=0
-  teststring="ABCD"
-
-  case $build_os in
-  msdosdjgpp*)
-    # On DJGPP, this test can blow up pretty badly due to problems in libc
-    # (any single argument exceeding 2000 bytes causes a buffer overrun
-    # during glob expansion).  Even if it were fixed, the result of this
-    # check would be larger than it should be.
-    lt_cv_sys_max_cmd_len=12288;    # 12K is about right
-    ;;
-
-  gnu*)
-    # Under GNU Hurd, this test is not required because there is
-    # no limit to the length of command line arguments.
-    # Libtool will interpret -1 as no limit whatsoever
-    lt_cv_sys_max_cmd_len=-1;
-    ;;
-
-  cygwin* | mingw* | cegcc*)
-    # On Win9x/ME, this test blows up -- it succeeds, but takes
-    # about 5 minutes as the teststring grows exponentially.
-    # Worse, since 9x/ME are not pre-emptively multitasking,
-    # you end up with a "frozen" computer, even though with patience
-    # the test eventually succeeds (with a max line length of 256k).
-    # Instead, let's just punt: use the minimum linelength reported by
-    # all of the supported platforms: 8192 (on NT/2K/XP).
-    lt_cv_sys_max_cmd_len=8192;
-    ;;
-
-  mint*)
-    # On MiNT this can take a long time and run out of memory.
-    lt_cv_sys_max_cmd_len=8192;
-    ;;
-
-  amigaos*)
-    # On AmigaOS with pdksh, this test takes hours, literally.
-    # So we just punt and use a minimum line length of 8192.
-    lt_cv_sys_max_cmd_len=8192;
-    ;;
-
-  netbsd* | freebsd* | openbsd* | darwin* | dragonfly*)
-    # This has been around since 386BSD, at least.  Likely further.
-    if test -x /sbin/sysctl; then
-      lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax`
-    elif test -x /usr/sbin/sysctl; then
-      lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax`
-    else
-      lt_cv_sys_max_cmd_len=65536	# usable default for all BSDs
-    fi
-    # And add a safety zone
-    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
-    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
-    ;;
-
-  interix*)
-    # We know the value 262144 and hardcode it with a safety zone (like BSD)
-    lt_cv_sys_max_cmd_len=196608
-    ;;
-
-  os2*)
-    # The test takes a long time on OS/2.
-    lt_cv_sys_max_cmd_len=8192
-    ;;
-
-  osf*)
-    # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
-    # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
-    # nice to cause kernel panics so lets avoid the loop below.
-    # First set a reasonable default.
-    lt_cv_sys_max_cmd_len=16384
-    #
-    if test -x /sbin/sysconfig; then
-      case `/sbin/sysconfig -q proc exec_disable_arg_limit` in
-        *1*) lt_cv_sys_max_cmd_len=-1 ;;
-      esac
-    fi
-    ;;
-  sco3.2v5*)
-    lt_cv_sys_max_cmd_len=102400
-    ;;
-  sysv5* | sco5v6* | sysv4.2uw2*)
-    kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null`
-    if test -n "$kargmax"; then
-      lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[	 ]]//'`
-    else
-      lt_cv_sys_max_cmd_len=32768
-    fi
-    ;;
-  *)
-    lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null`
-    if test -n "$lt_cv_sys_max_cmd_len"; then
-      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
-      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
-    else
-      # Make teststring a little bigger before we do anything with it.
-      # a 1K string should be a reasonable start.
-      for i in 1 2 3 4 5 6 7 8 ; do
-        teststring=$teststring$teststring
-      done
-      SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}}
-      # If test is not a shell built-in, we'll probably end up computing a
-      # maximum length that is only half of the actual maximum length, but
-      # we can't tell.
-      while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \
-	         = "X$teststring$teststring"; } >/dev/null 2>&1 &&
-	      test $i != 17 # 1/2 MB should be enough
-      do
-        i=`expr $i + 1`
-        teststring=$teststring$teststring
-      done
-      # Only check the string length outside the loop.
-      lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1`
-      teststring=
-      # Add a significant safety factor because C++ compilers can tack on
-      # massive amounts of additional arguments before passing them to the
-      # linker.  It appears as though 1/2 is a usable value.
-      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2`
-    fi
-    ;;
-  esac
-])
-if test -n $lt_cv_sys_max_cmd_len ; then
-  AC_MSG_RESULT($lt_cv_sys_max_cmd_len)
-else
-  AC_MSG_RESULT(none)
-fi
-max_cmd_len=$lt_cv_sys_max_cmd_len
-_LT_DECL([], [max_cmd_len], [0],
-    [What is the maximum length of a command?])
-])# LT_CMD_MAX_LEN
-
-# Old name:
-AU_ALIAS([AC_LIBTOOL_SYS_MAX_CMD_LEN], [LT_CMD_MAX_LEN])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], [])
-
-
-# _LT_HEADER_DLFCN
-# ----------------
-m4_defun([_LT_HEADER_DLFCN],
-[AC_CHECK_HEADERS([dlfcn.h], [], [], [AC_INCLUDES_DEFAULT])dnl
-])# _LT_HEADER_DLFCN
-
-
-# _LT_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE,
-#                      ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING)
-# ----------------------------------------------------------------
-m4_defun([_LT_TRY_DLOPEN_SELF],
-[m4_require([_LT_HEADER_DLFCN])dnl
-if test "$cross_compiling" = yes; then :
-  [$4]
-else
-  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
-  lt_status=$lt_dlunknown
-  cat > conftest.$ac_ext <<_LT_EOF
-[#line $LINENO "configure"
-#include "confdefs.h"
-
-#if HAVE_DLFCN_H
-#include <dlfcn.h>
-#endif
-
-#include <stdio.h>
-
-#ifdef RTLD_GLOBAL
-#  define LT_DLGLOBAL		RTLD_GLOBAL
-#else
-#  ifdef DL_GLOBAL
-#    define LT_DLGLOBAL		DL_GLOBAL
-#  else
-#    define LT_DLGLOBAL		0
-#  endif
-#endif
-
-/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
-   find out it does not work in some platform. */
-#ifndef LT_DLLAZY_OR_NOW
-#  ifdef RTLD_LAZY
-#    define LT_DLLAZY_OR_NOW		RTLD_LAZY
-#  else
-#    ifdef DL_LAZY
-#      define LT_DLLAZY_OR_NOW		DL_LAZY
-#    else
-#      ifdef RTLD_NOW
-#        define LT_DLLAZY_OR_NOW	RTLD_NOW
-#      else
-#        ifdef DL_NOW
-#          define LT_DLLAZY_OR_NOW	DL_NOW
-#        else
-#          define LT_DLLAZY_OR_NOW	0
-#        endif
-#      endif
-#    endif
-#  endif
-#endif
-
-/* When -fvisbility=hidden is used, assume the code has been annotated
-   correspondingly for the symbols needed.  */
-#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3))
-int fnord () __attribute__((visibility("default")));
-#endif
-
-int fnord () { return 42; }
-int main ()
-{
-  void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
-  int status = $lt_dlunknown;
-
-  if (self)
-    {
-      if (dlsym (self,"fnord"))       status = $lt_dlno_uscore;
-      else
-        {
-	  if (dlsym( self,"_fnord"))  status = $lt_dlneed_uscore;
-          else puts (dlerror ());
-	}
-      /* dlclose (self); */
-    }
-  else
-    puts (dlerror ());
-
-  return status;
-}]
-_LT_EOF
-  if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext} 2>/dev/null; then
-    (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null
-    lt_status=$?
-    case x$lt_status in
-      x$lt_dlno_uscore) $1 ;;
-      x$lt_dlneed_uscore) $2 ;;
-      x$lt_dlunknown|x*) $3 ;;
-    esac
-  else :
-    # compilation failed
-    $3
-  fi
-fi
-rm -fr conftest*
-])# _LT_TRY_DLOPEN_SELF
-
-
-# LT_SYS_DLOPEN_SELF
-# ------------------
-AC_DEFUN([LT_SYS_DLOPEN_SELF],
-[m4_require([_LT_HEADER_DLFCN])dnl
-if test "x$enable_dlopen" != xyes; then
-  enable_dlopen=unknown
-  enable_dlopen_self=unknown
-  enable_dlopen_self_static=unknown
-else
-  lt_cv_dlopen=no
-  lt_cv_dlopen_libs=
-
-  case $host_os in
-  beos*)
-    lt_cv_dlopen="load_add_on"
-    lt_cv_dlopen_libs=
-    lt_cv_dlopen_self=yes
-    ;;
-
-  mingw* | pw32* | cegcc*)
-    lt_cv_dlopen="LoadLibrary"
-    lt_cv_dlopen_libs=
-    ;;
-
-  cygwin*)
-    lt_cv_dlopen="dlopen"
-    lt_cv_dlopen_libs=
-    ;;
-
-  darwin*)
-  # if libdl is installed we need to link against it
-    AC_CHECK_LIB([dl], [dlopen],
-		[lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],[
-    lt_cv_dlopen="dyld"
-    lt_cv_dlopen_libs=
-    lt_cv_dlopen_self=yes
-    ])
-    ;;
-
-  *)
-    AC_CHECK_FUNC([shl_load],
-	  [lt_cv_dlopen="shl_load"],
-      [AC_CHECK_LIB([dld], [shl_load],
-	    [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"],
-	[AC_CHECK_FUNC([dlopen],
-	      [lt_cv_dlopen="dlopen"],
-	  [AC_CHECK_LIB([dl], [dlopen],
-		[lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],
-	    [AC_CHECK_LIB([svld], [dlopen],
-		  [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"],
-	      [AC_CHECK_LIB([dld], [dld_link],
-		    [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"])
-	      ])
-	    ])
-	  ])
-	])
-      ])
-    ;;
-  esac
-
-  if test "x$lt_cv_dlopen" != xno; then
-    enable_dlopen=yes
-  else
-    enable_dlopen=no
-  fi
-
-  case $lt_cv_dlopen in
-  dlopen)
-    save_CPPFLAGS="$CPPFLAGS"
-    test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H"
-
-    save_LDFLAGS="$LDFLAGS"
-    wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\"
-
-    save_LIBS="$LIBS"
-    LIBS="$lt_cv_dlopen_libs $LIBS"
-
-    AC_CACHE_CHECK([whether a program can dlopen itself],
-	  lt_cv_dlopen_self, [dnl
-	  _LT_TRY_DLOPEN_SELF(
-	    lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes,
-	    lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross)
-    ])
-
-    if test "x$lt_cv_dlopen_self" = xyes; then
-      wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\"
-      AC_CACHE_CHECK([whether a statically linked program can dlopen itself],
-	  lt_cv_dlopen_self_static, [dnl
-	  _LT_TRY_DLOPEN_SELF(
-	    lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes,
-	    lt_cv_dlopen_self_static=no,  lt_cv_dlopen_self_static=cross)
-      ])
-    fi
-
-    CPPFLAGS="$save_CPPFLAGS"
-    LDFLAGS="$save_LDFLAGS"
-    LIBS="$save_LIBS"
-    ;;
-  esac
-
-  case $lt_cv_dlopen_self in
-  yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;;
-  *) enable_dlopen_self=unknown ;;
-  esac
-
-  case $lt_cv_dlopen_self_static in
-  yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;;
-  *) enable_dlopen_self_static=unknown ;;
-  esac
-fi
-_LT_DECL([dlopen_support], [enable_dlopen], [0],
-	 [Whether dlopen is supported])
-_LT_DECL([dlopen_self], [enable_dlopen_self], [0],
-	 [Whether dlopen of programs is supported])
-_LT_DECL([dlopen_self_static], [enable_dlopen_self_static], [0],
-	 [Whether dlopen of statically linked programs is supported])
-])# LT_SYS_DLOPEN_SELF
-
-# Old name:
-AU_ALIAS([AC_LIBTOOL_DLOPEN_SELF], [LT_SYS_DLOPEN_SELF])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], [])
-
-
-# _LT_COMPILER_C_O([TAGNAME])
-# ---------------------------
-# Check to see if options -c and -o are simultaneously supported by compiler.
-# This macro does not hard code the compiler like AC_PROG_CC_C_O.
-m4_defun([_LT_COMPILER_C_O],
-[m4_require([_LT_DECL_SED])dnl
-m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-m4_require([_LT_TAG_COMPILER])dnl
-AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext],
-  [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)],
-  [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no
-   $RM -r conftest 2>/dev/null
-   mkdir conftest
-   cd conftest
-   mkdir out
-   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
-
-   lt_compiler_flag="-o out/conftest2.$ac_objext"
-   # Insert the option either (1) after the last *FLAGS variable, or
-   # (2) before a word containing "conftest.", or (3) at the end.
-   # Note that $ac_compile itself does not contain backslashes and begins
-   # with a dollar sign (not a hyphen), so the echo should work correctly.
-   lt_compile=`echo "$ac_compile" | $SED \
-   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-   -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \
-   -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD)
-   (eval "$lt_compile" 2>out/conftest.err)
-   ac_status=$?
-   cat out/conftest.err >&AS_MESSAGE_LOG_FD
-   echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
-   if (exit $ac_status) && test -s out/conftest2.$ac_objext
-   then
-     # The compiler can only warn and ignore the option if not recognized
-     # So say no if there are warnings
-     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
-     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
-     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
-       _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes
-     fi
-   fi
-   chmod u+w . 2>&AS_MESSAGE_LOG_FD
-   $RM conftest*
-   # SGI C++ compiler will create directory out/ii_files/ for
-   # template instantiation
-   test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
-   $RM out/* && rmdir out
-   cd ..
-   $RM -r conftest
-   $RM conftest*
-])
-_LT_TAGDECL([compiler_c_o], [lt_cv_prog_compiler_c_o], [1],
-	[Does compiler simultaneously support -c and -o options?])
-])# _LT_COMPILER_C_O
-
-
-# _LT_COMPILER_FILE_LOCKS([TAGNAME])
-# ----------------------------------
-# Check to see if we can do hard links to lock some files if needed
-m4_defun([_LT_COMPILER_FILE_LOCKS],
-[m4_require([_LT_ENABLE_LOCK])dnl
-m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-_LT_COMPILER_C_O([$1])
-
-hard_links="nottested"
-if test "$_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)" = no && test "$need_locks" != no; then
-  # do not overwrite the value of need_locks provided by the user
-  AC_MSG_CHECKING([if we can lock with hard links])
-  hard_links=yes
-  $RM conftest*
-  ln conftest.a conftest.b 2>/dev/null && hard_links=no
-  touch conftest.a
-  ln conftest.a conftest.b 2>&5 || hard_links=no
-  ln conftest.a conftest.b 2>/dev/null && hard_links=no
-  AC_MSG_RESULT([$hard_links])
-  if test "$hard_links" = no; then
-    AC_MSG_WARN([`$CC' does not support `-c -o', so `make -j' may be unsafe])
-    need_locks=warn
-  fi
-else
-  need_locks=no
-fi
-_LT_DECL([], [need_locks], [1], [Must we lock files when doing compilation?])
-])# _LT_COMPILER_FILE_LOCKS
-
-
-# _LT_CHECK_OBJDIR
-# ----------------
-m4_defun([_LT_CHECK_OBJDIR],
-[AC_CACHE_CHECK([for objdir], [lt_cv_objdir],
-[rm -f .libs 2>/dev/null
-mkdir .libs 2>/dev/null
-if test -d .libs; then
-  lt_cv_objdir=.libs
-else
-  # MS-DOS does not allow filenames that begin with a dot.
-  lt_cv_objdir=_libs
-fi
-rmdir .libs 2>/dev/null])
-objdir=$lt_cv_objdir
-_LT_DECL([], [objdir], [0],
-         [The name of the directory that contains temporary libtool files])dnl
-m4_pattern_allow([LT_OBJDIR])dnl
-AC_DEFINE_UNQUOTED(LT_OBJDIR, "$lt_cv_objdir/",
-  [Define to the sub-directory in which libtool stores uninstalled libraries.])
-])# _LT_CHECK_OBJDIR
-
-
-# _LT_LINKER_HARDCODE_LIBPATH([TAGNAME])
-# --------------------------------------
-# Check hardcoding attributes.
-m4_defun([_LT_LINKER_HARDCODE_LIBPATH],
-[AC_MSG_CHECKING([how to hardcode library paths into programs])
-_LT_TAGVAR(hardcode_action, $1)=
-if test -n "$_LT_TAGVAR(hardcode_libdir_flag_spec, $1)" ||
-   test -n "$_LT_TAGVAR(runpath_var, $1)" ||
-   test "X$_LT_TAGVAR(hardcode_automatic, $1)" = "Xyes" ; then
-
-  # We can hardcode non-existent directories.
-  if test "$_LT_TAGVAR(hardcode_direct, $1)" != no &&
-     # If the only mechanism to avoid hardcoding is shlibpath_var, we
-     # have to relink, otherwise we might link with an installed library
-     # when we should be linking with a yet-to-be-installed one
-     ## test "$_LT_TAGVAR(hardcode_shlibpath_var, $1)" != no &&
-     test "$_LT_TAGVAR(hardcode_minus_L, $1)" != no; then
-    # Linking always hardcodes the temporary library directory.
-    _LT_TAGVAR(hardcode_action, $1)=relink
-  else
-    # We can link without hardcoding, and we can hardcode nonexisting dirs.
-    _LT_TAGVAR(hardcode_action, $1)=immediate
-  fi
-else
-  # We cannot hardcode anything, or else we can only hardcode existing
-  # directories.
-  _LT_TAGVAR(hardcode_action, $1)=unsupported
-fi
-AC_MSG_RESULT([$_LT_TAGVAR(hardcode_action, $1)])
-
-if test "$_LT_TAGVAR(hardcode_action, $1)" = relink ||
-   test "$_LT_TAGVAR(inherit_rpath, $1)" = yes; then
-  # Fast installation is not supported
-  enable_fast_install=no
-elif test "$shlibpath_overrides_runpath" = yes ||
-     test "$enable_shared" = no; then
-  # Fast installation is not necessary
-  enable_fast_install=needless
-fi
-_LT_TAGDECL([], [hardcode_action], [0],
-    [How to hardcode a shared library path into an executable])
-])# _LT_LINKER_HARDCODE_LIBPATH
-
-
-# _LT_CMD_STRIPLIB
-# ----------------
-m4_defun([_LT_CMD_STRIPLIB],
-[m4_require([_LT_DECL_EGREP])
-striplib=
-old_striplib=
-AC_MSG_CHECKING([whether stripping libraries is possible])
-if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then
-  test -z "$old_striplib" && old_striplib="$STRIP --strip-debug"
-  test -z "$striplib" && striplib="$STRIP --strip-unneeded"
-  AC_MSG_RESULT([yes])
-else
-# FIXME - insert some real tests, host_os isn't really good enough
-  case $host_os in
-  darwin*)
-    if test -n "$STRIP" ; then
-      striplib="$STRIP -x"
-      old_striplib="$STRIP -S"
-      AC_MSG_RESULT([yes])
-    else
-      AC_MSG_RESULT([no])
-    fi
-    ;;
-  *)
-    AC_MSG_RESULT([no])
-    ;;
-  esac
-fi
-_LT_DECL([], [old_striplib], [1], [Commands to strip libraries])
-_LT_DECL([], [striplib], [1])
-])# _LT_CMD_STRIPLIB
-
-
-# _LT_SYS_DYNAMIC_LINKER([TAG])
-# -----------------------------
-# PORTME Fill in your ld.so characteristics
-m4_defun([_LT_SYS_DYNAMIC_LINKER],
-[AC_REQUIRE([AC_CANONICAL_HOST])dnl
-m4_require([_LT_DECL_EGREP])dnl
-m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-m4_require([_LT_DECL_OBJDUMP])dnl
-m4_require([_LT_DECL_SED])dnl
-m4_require([_LT_CHECK_SHELL_FEATURES])dnl
-AC_MSG_CHECKING([dynamic linker characteristics])
-m4_if([$1],
-	[], [
-if test "$GCC" = yes; then
-  case $host_os in
-    darwin*) lt_awk_arg="/^libraries:/,/LR/" ;;
-    *) lt_awk_arg="/^libraries:/" ;;
-  esac
-  case $host_os in
-    mingw* | cegcc*) lt_sed_strip_eq="s,=\([[A-Za-z]]:\),\1,g" ;;
-    *) lt_sed_strip_eq="s,=/,/,g" ;;
-  esac
-  lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq`
-  case $lt_search_path_spec in
-  *\;*)
-    # if the path contains ";" then we assume it to be the separator
-    # otherwise default to the standard path separator (i.e. ":") - it is
-    # assumed that no part of a normal pathname contains ";" but that should
-    # okay in the real world where ";" in dirpaths is itself problematic.
-    lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'`
-    ;;
-  *)
-    lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"`
-    ;;
-  esac
-  # Ok, now we have the path, separated by spaces, we can step through it
-  # and add multilib dir if necessary.
-  lt_tmp_lt_search_path_spec=
-  lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null`
-  for lt_sys_path in $lt_search_path_spec; do
-    if test -d "$lt_sys_path/$lt_multi_os_dir"; then
-      lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir"
-    else
-      test -d "$lt_sys_path" && \
-	lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path"
-    fi
-  done
-  lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk '
-BEGIN {RS=" "; FS="/|\n";} {
-  lt_foo="";
-  lt_count=0;
-  for (lt_i = NF; lt_i > 0; lt_i--) {
-    if ($lt_i != "" && $lt_i != ".") {
-      if ($lt_i == "..") {
-        lt_count++;
-      } else {
-        if (lt_count == 0) {
-          lt_foo="/" $lt_i lt_foo;
-        } else {
-          lt_count--;
-        }
-      }
-    }
-  }
-  if (lt_foo != "") { lt_freq[[lt_foo]]++; }
-  if (lt_freq[[lt_foo]] == 1) { print lt_foo; }
-}'`
-  # AWK program above erroneously prepends '/' to C:/dos/paths
-  # for these hosts.
-  case $host_os in
-    mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\
-      $SED 's,/\([[A-Za-z]]:\),\1,g'` ;;
-  esac
-  sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP`
-else
-  sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
-fi])
-library_names_spec=
-libname_spec='lib$name'
-soname_spec=
-shrext_cmds=".so"
-postinstall_cmds=
-postuninstall_cmds=
-finish_cmds=
-finish_eval=
-shlibpath_var=
-shlibpath_overrides_runpath=unknown
-version_type=none
-dynamic_linker="$host_os ld.so"
-sys_lib_dlsearch_path_spec="/lib /usr/lib"
-need_lib_prefix=unknown
-hardcode_into_libs=no
-
-# when you set need_version to no, make sure it does not cause -set_version
-# flags to be left without arguments
-need_version=unknown
-
-case $host_os in
-aix3*)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
-  shlibpath_var=LIBPATH
-
-  # AIX 3 has no versioning support, so we append a major version to the name.
-  soname_spec='${libname}${release}${shared_ext}$major'
-  ;;
-
-aix[[4-9]]*)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  need_lib_prefix=no
-  need_version=no
-  hardcode_into_libs=yes
-  if test "$host_cpu" = ia64; then
-    # AIX 5 supports IA64
-    library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}'
-    shlibpath_var=LD_LIBRARY_PATH
-  else
-    # With GCC up to 2.95.x, collect2 would create an import file
-    # for dependence libraries.  The import file would start with
-    # the line `#! .'.  This would cause the generated library to
-    # depend on `.', always an invalid library.  This was fixed in
-    # development snapshots of GCC prior to 3.0.
-    case $host_os in
-      aix4 | aix4.[[01]] | aix4.[[01]].*)
-      if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
-	   echo ' yes '
-	   echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then
-	:
-      else
-	can_build_shared=no
-      fi
-      ;;
-    esac
-    # AIX (on Power*) has no versioning support, so currently we can not hardcode correct
-    # soname into executable. Probably we can add versioning support to
-    # collect2, so additional links can be useful in future.
-    if test "$aix_use_runtimelinking" = yes; then
-      # If using run time linking (on AIX 4.2 or later) use lib<name>.so
-      # instead of lib<name>.a to let people know that these are not
-      # typical AIX shared libraries.
-      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
-    else
-      # We preserve .a as extension for shared libraries through AIX4.2
-      # and later when we are not doing run time linking.
-      library_names_spec='${libname}${release}.a $libname.a'
-      soname_spec='${libname}${release}${shared_ext}$major'
-    fi
-    shlibpath_var=LIBPATH
-  fi
-  ;;
-
-amigaos*)
-  case $host_cpu in
-  powerpc)
-    # Since July 2007 AmigaOS4 officially supports .so libraries.
-    # When compiling the executable, add -use-dynld -Lsobjs: to the compileline.
-    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
-    ;;
-  m68k)
-    library_names_spec='$libname.ixlibrary $libname.a'
-    # Create ${libname}_ixlibrary.a entries in /sys/libs.
-    finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done'
-    ;;
-  esac
-  ;;
-
-beos*)
-  library_names_spec='${libname}${shared_ext}'
-  dynamic_linker="$host_os ld.so"
-  shlibpath_var=LIBRARY_PATH
-  ;;
-
-bsdi[[45]]*)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
-  shlibpath_var=LD_LIBRARY_PATH
-  sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
-  sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
-  # the default ld.so.conf also contains /usr/contrib/lib and
-  # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
-  # libtool to hard-code these into programs
-  ;;
-
-cygwin* | mingw* | pw32* | cegcc*)
-  version_type=windows
-  shrext_cmds=".dll"
-  need_version=no
-  need_lib_prefix=no
-
-  case $GCC,$cc_basename in
-  yes,*)
-    # gcc
-    library_names_spec='$libname.dll.a'
-    # DLL is installed to $(libdir)/../bin by postinstall_cmds
-    postinstall_cmds='base_file=`basename \${file}`~
-      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
-      dldir=$destdir/`dirname \$dlpath`~
-      test -d \$dldir || mkdir -p \$dldir~
-      $install_prog $dir/$dlname \$dldir/$dlname~
-      chmod a+x \$dldir/$dlname~
-      if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then
-        eval '\''$striplib \$dldir/$dlname'\'' || exit \$?;
-      fi'
-    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
-      dlpath=$dir/\$dldll~
-       $RM \$dlpath'
-    shlibpath_overrides_runpath=yes
-
-    case $host_os in
-    cygwin*)
-      # Cygwin DLLs use 'cyg' prefix rather than 'lib'
-      soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
-m4_if([$1], [],[
-      sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"])
-      ;;
-    mingw* | cegcc*)
-      # MinGW DLLs use traditional 'lib' prefix
-      soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
-      ;;
-    pw32*)
-      # pw32 DLLs use 'pw' prefix rather than 'lib'
-      library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
-      ;;
-    esac
-    dynamic_linker='Win32 ld.exe'
-    ;;
-
-  *,cl*)
-    # Native MSVC
-    libname_spec='$name'
-    soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
-    library_names_spec='${libname}.dll.lib'
-
-    case $build_os in
-    mingw*)
-      sys_lib_search_path_spec=
-      lt_save_ifs=$IFS
-      IFS=';'
-      for lt_path in $LIB
-      do
-        IFS=$lt_save_ifs
-        # Let DOS variable expansion print the short 8.3 style file name.
-        lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"`
-        sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path"
-      done
-      IFS=$lt_save_ifs
-      # Convert to MSYS style.
-      sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'`
-      ;;
-    cygwin*)
-      # Convert to unix form, then to dos form, then back to unix form
-      # but this time dos style (no spaces!) so that the unix form looks
-      # like /cygdrive/c/PROGRA~1:/cygdr...
-      sys_lib_search_path_spec=`cygpath --path --unix "$LIB"`
-      sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null`
-      sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
-      ;;
-    *)
-      sys_lib_search_path_spec="$LIB"
-      if $ECHO "$sys_lib_search_path_spec" | [$GREP ';[c-zC-Z]:/' >/dev/null]; then
-        # It is most probably a Windows format PATH.
-        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
-      else
-        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
-      fi
-      # FIXME: find the short name or the path components, as spaces are
-      # common. (e.g. "Program Files" -> "PROGRA~1")
-      ;;
-    esac
-
-    # DLL is installed to $(libdir)/../bin by postinstall_cmds
-    postinstall_cmds='base_file=`basename \${file}`~
-      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
-      dldir=$destdir/`dirname \$dlpath`~
-      test -d \$dldir || mkdir -p \$dldir~
-      $install_prog $dir/$dlname \$dldir/$dlname'
-    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
-      dlpath=$dir/\$dldll~
-       $RM \$dlpath'
-    shlibpath_overrides_runpath=yes
-    dynamic_linker='Win32 link.exe'
-    ;;
-
-  *)
-    # Assume MSVC wrapper
-    library_names_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext} $libname.lib'
-    dynamic_linker='Win32 ld.exe'
-    ;;
-  esac
-  # FIXME: first we should search . and the directory the executable is in
-  shlibpath_var=PATH
-  ;;
-
-darwin* | rhapsody*)
-  dynamic_linker="$host_os dyld"
-  version_type=darwin
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext'
-  soname_spec='${libname}${release}${major}$shared_ext'
-  shlibpath_overrides_runpath=yes
-  shlibpath_var=DYLD_LIBRARY_PATH
-  shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`'
-m4_if([$1], [],[
-  sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"])
-  sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
-  ;;
-
-dgux*)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-freebsd* | dragonfly*)
-  # DragonFly does not have aout.  When/if they implement a new
-  # versioning mechanism, adjust this.
-  if test -x /usr/bin/objformat; then
-    objformat=`/usr/bin/objformat`
-  else
-    case $host_os in
-    freebsd[[23]].*) objformat=aout ;;
-    *) objformat=elf ;;
-    esac
-  fi
-  version_type=freebsd-$objformat
-  case $version_type in
-    freebsd-elf*)
-      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
-      need_version=no
-      need_lib_prefix=no
-      ;;
-    freebsd-*)
-      library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix'
-      need_version=yes
-      ;;
-  esac
-  shlibpath_var=LD_LIBRARY_PATH
-  case $host_os in
-  freebsd2.*)
-    shlibpath_overrides_runpath=yes
-    ;;
-  freebsd3.[[01]]* | freebsdelf3.[[01]]*)
-    shlibpath_overrides_runpath=yes
-    hardcode_into_libs=yes
-    ;;
-  freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \
-  freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1)
-    shlibpath_overrides_runpath=no
-    hardcode_into_libs=yes
-    ;;
-  *) # from 4.6 on, and DragonFly
-    shlibpath_overrides_runpath=yes
-    hardcode_into_libs=yes
-    ;;
-  esac
-  ;;
-
-gnu*)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  ;;
-
-haiku*)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  need_lib_prefix=no
-  need_version=no
-  dynamic_linker="$host_os runtime_loader"
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LIBRARY_PATH
-  shlibpath_overrides_runpath=yes
-  sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib'
-  hardcode_into_libs=yes
-  ;;
-
-hpux9* | hpux10* | hpux11*)
-  # Give a soname corresponding to the major version so that dld.sl refuses to
-  # link against other versions.
-  version_type=sunos
-  need_lib_prefix=no
-  need_version=no
-  case $host_cpu in
-  ia64*)
-    shrext_cmds='.so'
-    hardcode_into_libs=yes
-    dynamic_linker="$host_os dld.so"
-    shlibpath_var=LD_LIBRARY_PATH
-    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
-    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
-    soname_spec='${libname}${release}${shared_ext}$major'
-    if test "X$HPUX_IA64_MODE" = X32; then
-      sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib"
-    else
-      sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64"
-    fi
-    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
-    ;;
-  hppa*64*)
-    shrext_cmds='.sl'
-    hardcode_into_libs=yes
-    dynamic_linker="$host_os dld.sl"
-    shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH
-    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
-    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
-    soname_spec='${libname}${release}${shared_ext}$major'
-    sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64"
-    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
-    ;;
-  *)
-    shrext_cmds='.sl'
-    dynamic_linker="$host_os dld.sl"
-    shlibpath_var=SHLIB_PATH
-    shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
-    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
-    soname_spec='${libname}${release}${shared_ext}$major'
-    ;;
-  esac
-  # HP-UX runs *really* slowly unless shared libraries are mode 555, ...
-  postinstall_cmds='chmod 555 $lib'
-  # or fails outright, so override atomically:
-  install_override_mode=555
-  ;;
-
-interix[[3-9]]*)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  ;;
-
-irix5* | irix6* | nonstopux*)
-  case $host_os in
-    nonstopux*) version_type=nonstopux ;;
-    *)
-	if test "$lt_cv_prog_gnu_ld" = yes; then
-		version_type=linux # correct to gnu/linux during the next big refactor
-	else
-		version_type=irix
-	fi ;;
-  esac
-  need_lib_prefix=no
-  need_version=no
-  soname_spec='${libname}${release}${shared_ext}$major'
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}'
-  case $host_os in
-  irix5* | nonstopux*)
-    libsuff= shlibsuff=
-    ;;
-  *)
-    case $LD in # libtool.m4 will add one of these switches to LD
-    *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ")
-      libsuff= shlibsuff= libmagic=32-bit;;
-    *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ")
-      libsuff=32 shlibsuff=N32 libmagic=N32;;
-    *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ")
-      libsuff=64 shlibsuff=64 libmagic=64-bit;;
-    *) libsuff= shlibsuff= libmagic=never-match;;
-    esac
-    ;;
-  esac
-  shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
-  shlibpath_overrides_runpath=no
-  sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
-  sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
-  hardcode_into_libs=yes
-  ;;
-
-# No shared lib support for Linux oldld, aout, or coff.
-linux*oldld* | linux*aout* | linux*coff*)
-  dynamic_linker=no
-  ;;
-
-# This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-
-  # Some binutils ld are patched to set DT_RUNPATH
-  AC_CACHE_VAL([lt_cv_shlibpath_overrides_runpath],
-    [lt_cv_shlibpath_overrides_runpath=no
-    save_LDFLAGS=$LDFLAGS
-    save_libdir=$libdir
-    eval "libdir=/foo; wl=\"$_LT_TAGVAR(lt_prog_compiler_wl, $1)\"; \
-	 LDFLAGS=\"\$LDFLAGS $_LT_TAGVAR(hardcode_libdir_flag_spec, $1)\""
-    AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])],
-      [AS_IF([ ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null],
-	 [lt_cv_shlibpath_overrides_runpath=yes])])
-    LDFLAGS=$save_LDFLAGS
-    libdir=$save_libdir
-    ])
-  shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath
-
-  # This implies no fast_install, which is unacceptable.
-  # Some rework will be needed to allow for fast_install
-  # before this can be enabled.
-  hardcode_into_libs=yes
-
-  # Add ABI-specific directories to the system library path.
-  sys_lib_dlsearch_path_spec="/lib64 /usr/lib64 /lib /usr/lib"
-
-  # Append ld.so.conf contents to the search path
-  if test -f /etc/ld.so.conf; then
-    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[	 ]*hwcap[	 ]/d;s/[:,	]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '`
-    sys_lib_dlsearch_path_spec="$sys_lib_dlsearch_path_spec $lt_ld_extra"
-
-  fi
-
-  # We used to test for /lib/ld.so.1 and disable shared libraries on
-  # powerpc, because MkLinux only supported shared libraries with the
-  # GNU dynamic linker.  Since this was broken with cross compilers,
-  # most powerpc-linux boxes support dynamic linking these days and
-  # people can always --disable-shared, the test was removed, and we
-  # assume the GNU/Linux dynamic linker is in use.
-  dynamic_linker='GNU/Linux ld.so'
-  ;;
-
-netbsd*)
-  version_type=sunos
-  need_lib_prefix=no
-  need_version=no
-  if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
-    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
-    finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
-    dynamic_linker='NetBSD (a.out) ld.so'
-  else
-    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
-    soname_spec='${libname}${release}${shared_ext}$major'
-    dynamic_linker='NetBSD ld.elf_so'
-  fi
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=yes
-  hardcode_into_libs=yes
-  ;;
-
-newsos6)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=yes
-  ;;
-
-*nto* | *qnx*)
-  version_type=qnx
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  dynamic_linker='ldqnx.so'
-  ;;
-
-openbsd*)
-  version_type=sunos
-  sys_lib_dlsearch_path_spec="/usr/lib"
-  need_lib_prefix=no
-  # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs.
-  case $host_os in
-    openbsd3.3 | openbsd3.3.*)	need_version=yes ;;
-    *)				need_version=no  ;;
-  esac
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
-  finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
-  shlibpath_var=LD_LIBRARY_PATH
-  if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
-    case $host_os in
-      openbsd2.[[89]] | openbsd2.[[89]].*)
-	shlibpath_overrides_runpath=no
-	;;
-      *)
-	shlibpath_overrides_runpath=yes
-	;;
-      esac
-  else
-    shlibpath_overrides_runpath=yes
-  fi
-  ;;
-
-os2*)
-  libname_spec='$name'
-  shrext_cmds=".dll"
-  need_lib_prefix=no
-  library_names_spec='$libname${shared_ext} $libname.a'
-  dynamic_linker='OS/2 ld.exe'
-  shlibpath_var=LIBPATH
-  ;;
-
-osf3* | osf4* | osf5*)
-  version_type=osf
-  need_lib_prefix=no
-  need_version=no
-  soname_spec='${libname}${release}${shared_ext}$major'
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
-  shlibpath_var=LD_LIBRARY_PATH
-  sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
-  sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
-  ;;
-
-rdos*)
-  dynamic_linker=no
-  ;;
-
-solaris*)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=yes
-  hardcode_into_libs=yes
-  # ldd complains unless libraries are executable
-  postinstall_cmds='chmod +x $lib'
-  ;;
-
-sunos4*)
-  version_type=sunos
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
-  finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=yes
-  if test "$with_gnu_ld" = yes; then
-    need_lib_prefix=no
-  fi
-  need_version=yes
-  ;;
-
-sysv4 | sysv4.3*)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  case $host_vendor in
-    sni)
-      shlibpath_overrides_runpath=no
-      need_lib_prefix=no
-      runpath_var=LD_RUN_PATH
-      ;;
-    siemens)
-      need_lib_prefix=no
-      ;;
-    motorola)
-      need_lib_prefix=no
-      need_version=no
-      shlibpath_overrides_runpath=no
-      sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
-      ;;
-  esac
-  ;;
-
-sysv4*MP*)
-  if test -d /usr/nec ;then
-    version_type=linux # correct to gnu/linux during the next big refactor
-    library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
-    soname_spec='$libname${shared_ext}.$major'
-    shlibpath_var=LD_LIBRARY_PATH
-  fi
-  ;;
-
-sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
-  version_type=freebsd-elf
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=yes
-  hardcode_into_libs=yes
-  if test "$with_gnu_ld" = yes; then
-    sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib'
-  else
-    sys_lib_search_path_spec='/usr/ccs/lib /usr/lib'
-    case $host_os in
-      sco3.2v5*)
-        sys_lib_search_path_spec="$sys_lib_search_path_spec /lib"
-	;;
-    esac
-  fi
-  sys_lib_dlsearch_path_spec='/usr/lib'
-  ;;
-
-tpf*)
-  # TPF is a cross-target only.  Preferred cross-host = GNU/Linux.
-  version_type=linux # correct to gnu/linux during the next big refactor
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  ;;
-
-uts4*)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-*)
-  dynamic_linker=no
-  ;;
-esac
-AC_MSG_RESULT([$dynamic_linker])
-test "$dynamic_linker" = no && can_build_shared=no
-
-variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
-if test "$GCC" = yes; then
-  variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
-fi
-
-if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then
-  sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec"
-fi
-if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then
-  sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec"
-fi
-
-_LT_DECL([], [variables_saved_for_relink], [1],
-    [Variables whose values should be saved in libtool wrapper scripts and
-    restored at link time])
-_LT_DECL([], [need_lib_prefix], [0],
-    [Do we need the "lib" prefix for modules?])
-_LT_DECL([], [need_version], [0], [Do we need a version for libraries?])
-_LT_DECL([], [version_type], [0], [Library versioning type])
-_LT_DECL([], [runpath_var], [0],  [Shared library runtime path variable])
-_LT_DECL([], [shlibpath_var], [0],[Shared library path variable])
-_LT_DECL([], [shlibpath_overrides_runpath], [0],
-    [Is shlibpath searched before the hard-coded library search path?])
-_LT_DECL([], [libname_spec], [1], [Format of library name prefix])
-_LT_DECL([], [library_names_spec], [1],
-    [[List of archive names.  First name is the real one, the rest are links.
-    The last name is the one that the linker finds with -lNAME]])
-_LT_DECL([], [soname_spec], [1],
-    [[The coded name of the library, if different from the real name]])
-_LT_DECL([], [install_override_mode], [1],
-    [Permission mode override for installation of shared libraries])
-_LT_DECL([], [postinstall_cmds], [2],
-    [Command to use after installation of a shared archive])
-_LT_DECL([], [postuninstall_cmds], [2],
-    [Command to use after uninstallation of a shared archive])
-_LT_DECL([], [finish_cmds], [2],
-    [Commands used to finish a libtool library installation in a directory])
-_LT_DECL([], [finish_eval], [1],
-    [[As "finish_cmds", except a single script fragment to be evaled but
-    not shown]])
-_LT_DECL([], [hardcode_into_libs], [0],
-    [Whether we should hardcode library paths into libraries])
-_LT_DECL([], [sys_lib_search_path_spec], [2],
-    [Compile-time system search path for libraries])
-_LT_DECL([], [sys_lib_dlsearch_path_spec], [2],
-    [Run-time system search path for libraries])
-])# _LT_SYS_DYNAMIC_LINKER
-
-
-# _LT_PATH_TOOL_PREFIX(TOOL)
-# --------------------------
-# find a file program which can recognize shared library
-AC_DEFUN([_LT_PATH_TOOL_PREFIX],
-[m4_require([_LT_DECL_EGREP])dnl
-AC_MSG_CHECKING([for $1])
-AC_CACHE_VAL(lt_cv_path_MAGIC_CMD,
-[case $MAGIC_CMD in
-[[\\/*] |  ?:[\\/]*])
-  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path.
-  ;;
-*)
-  lt_save_MAGIC_CMD="$MAGIC_CMD"
-  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
-dnl $ac_dummy forces splitting on constant user-supplied paths.
-dnl POSIX.2 word splitting is done only on the output of word expansions,
-dnl not every word.  This closes a longstanding sh security hole.
-  ac_dummy="m4_if([$2], , $PATH, [$2])"
-  for ac_dir in $ac_dummy; do
-    IFS="$lt_save_ifs"
-    test -z "$ac_dir" && ac_dir=.
-    if test -f $ac_dir/$1; then
-      lt_cv_path_MAGIC_CMD="$ac_dir/$1"
-      if test -n "$file_magic_test_file"; then
-	case $deplibs_check_method in
-	"file_magic "*)
-	  file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"`
-	  MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
-	  if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
-	    $EGREP "$file_magic_regex" > /dev/null; then
-	    :
-	  else
-	    cat <<_LT_EOF 1>&2
-
-*** Warning: the command libtool uses to detect shared libraries,
-*** $file_magic_cmd, produces output that libtool cannot recognize.
-*** The result is that libtool may fail to recognize shared libraries
-*** as such.  This will affect the creation of libtool libraries that
-*** depend on shared libraries, but programs linked with such libtool
-*** libraries will work regardless of this problem.  Nevertheless, you
-*** may want to report the problem to your system manager and/or to
-*** bug-libtool@gnu.org
-
-_LT_EOF
-	  fi ;;
-	esac
-      fi
-      break
-    fi
-  done
-  IFS="$lt_save_ifs"
-  MAGIC_CMD="$lt_save_MAGIC_CMD"
-  ;;
-esac])
-MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
-if test -n "$MAGIC_CMD"; then
-  AC_MSG_RESULT($MAGIC_CMD)
-else
-  AC_MSG_RESULT(no)
-fi
-_LT_DECL([], [MAGIC_CMD], [0],
-	 [Used to examine libraries when file_magic_cmd begins with "file"])dnl
-])# _LT_PATH_TOOL_PREFIX
-
-# Old name:
-AU_ALIAS([AC_PATH_TOOL_PREFIX], [_LT_PATH_TOOL_PREFIX])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_PATH_TOOL_PREFIX], [])
-
-
-# _LT_PATH_MAGIC
-# --------------
-# find a file program which can recognize a shared library
-m4_defun([_LT_PATH_MAGIC],
-[_LT_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH)
-if test -z "$lt_cv_path_MAGIC_CMD"; then
-  if test -n "$ac_tool_prefix"; then
-    _LT_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH)
-  else
-    MAGIC_CMD=:
-  fi
-fi
-])# _LT_PATH_MAGIC
-
-
-# LT_PATH_LD
-# ----------
-# find the pathname to the GNU or non-GNU linker
-AC_DEFUN([LT_PATH_LD],
-[AC_REQUIRE([AC_PROG_CC])dnl
-AC_REQUIRE([AC_CANONICAL_HOST])dnl
-AC_REQUIRE([AC_CANONICAL_BUILD])dnl
-m4_require([_LT_DECL_SED])dnl
-m4_require([_LT_DECL_EGREP])dnl
-m4_require([_LT_PROG_ECHO_BACKSLASH])dnl
-
-AC_ARG_WITH([gnu-ld],
-    [AS_HELP_STRING([--with-gnu-ld],
-	[assume the C compiler uses GNU ld @<:@default=no@:>@])],
-    [test "$withval" = no || with_gnu_ld=yes],
-    [with_gnu_ld=no])dnl
-
-ac_prog=ld
-if test "$GCC" = yes; then
-  # Check if gcc -print-prog-name=ld gives a path.
-  AC_MSG_CHECKING([for ld used by $CC])
-  case $host in
-  *-*-mingw*)
-    # gcc leaves a trailing carriage return which upsets mingw
-    ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
-  *)
-    ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
-  esac
-  case $ac_prog in
-    # Accept absolute paths.
-    [[\\/]]* | ?:[[\\/]]*)
-      re_direlt='/[[^/]][[^/]]*/\.\./'
-      # Canonicalize the pathname of ld
-      ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'`
-      while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do
-	ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"`
-      done
-      test -z "$LD" && LD="$ac_prog"
-      ;;
-  "")
-    # If it fails, then pretend we aren't using GCC.
-    ac_prog=ld
-    ;;
-  *)
-    # If it is relative, then search for the first ld in PATH.
-    with_gnu_ld=unknown
-    ;;
-  esac
-elif test "$with_gnu_ld" = yes; then
-  AC_MSG_CHECKING([for GNU ld])
-else
-  AC_MSG_CHECKING([for non-GNU ld])
-fi
-AC_CACHE_VAL(lt_cv_path_LD,
-[if test -z "$LD"; then
-  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
-  for ac_dir in $PATH; do
-    IFS="$lt_save_ifs"
-    test -z "$ac_dir" && ac_dir=.
-    if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
-      lt_cv_path_LD="$ac_dir/$ac_prog"
-      # Check to see if the program is GNU ld.  I'd rather use --version,
-      # but apparently some variants of GNU ld only accept -v.
-      # Break only if it was the GNU/non-GNU ld that we prefer.
-      case `"$lt_cv_path_LD" -v 2>&1 </dev/null` in
-      *GNU* | *'with BFD'*)
-	test "$with_gnu_ld" != no && break
-	;;
-      *)
-	test "$with_gnu_ld" != yes && break
-	;;
-      esac
-    fi
-  done
-  IFS="$lt_save_ifs"
-else
-  lt_cv_path_LD="$LD" # Let the user override the test with a path.
-fi])
-LD="$lt_cv_path_LD"
-if test -n "$LD"; then
-  AC_MSG_RESULT($LD)
-else
-  AC_MSG_RESULT(no)
-fi
-test -z "$LD" && AC_MSG_ERROR([no acceptable ld found in \$PATH])
-_LT_PATH_LD_GNU
-AC_SUBST([LD])
-
-_LT_TAGDECL([], [LD], [1], [The linker used to build libraries])
-])# LT_PATH_LD
-
-# Old names:
-AU_ALIAS([AM_PROG_LD], [LT_PATH_LD])
-AU_ALIAS([AC_PROG_LD], [LT_PATH_LD])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AM_PROG_LD], [])
-dnl AC_DEFUN([AC_PROG_LD], [])
-
-
-# _LT_PATH_LD_GNU
-#- --------------
-m4_defun([_LT_PATH_LD_GNU],
-[AC_CACHE_CHECK([if the linker ($LD) is GNU ld], lt_cv_prog_gnu_ld,
-[# I'd rather use --version here, but apparently some GNU lds only accept -v.
-case `$LD -v 2>&1 </dev/null` in
-*GNU* | *'with BFD'*)
-  lt_cv_prog_gnu_ld=yes
-  ;;
-*)
-  lt_cv_prog_gnu_ld=no
-  ;;
-esac])
-with_gnu_ld=$lt_cv_prog_gnu_ld
-])# _LT_PATH_LD_GNU
-
-
-# _LT_CMD_RELOAD
-# --------------
-# find reload flag for linker
-#   -- PORTME Some linkers may need a different reload flag.
-m4_defun([_LT_CMD_RELOAD],
-[AC_CACHE_CHECK([for $LD option to reload object files],
-  lt_cv_ld_reload_flag,
-  [lt_cv_ld_reload_flag='-r'])
-reload_flag=$lt_cv_ld_reload_flag
-case $reload_flag in
-"" | " "*) ;;
-*) reload_flag=" $reload_flag" ;;
-esac
-reload_cmds='$LD$reload_flag -o $output$reload_objs'
-case $host_os in
-  cygwin* | mingw* | pw32* | cegcc*)
-    if test "$GCC" != yes; then
-      reload_cmds=false
-    fi
-    ;;
-  darwin*)
-    if test "$GCC" = yes; then
-      reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r -o $output$reload_objs'
-    else
-      reload_cmds='$LD$reload_flag -o $output$reload_objs'
-    fi
-    ;;
-esac
-_LT_TAGDECL([], [reload_flag], [1], [How to create reloadable object files])dnl
-_LT_TAGDECL([], [reload_cmds], [2])dnl
-])# _LT_CMD_RELOAD
-
-
-# _LT_CHECK_MAGIC_METHOD
-# ----------------------
-# how to check for library dependencies
-#  -- PORTME fill in with the dynamic library characteristics
-m4_defun([_LT_CHECK_MAGIC_METHOD],
-[m4_require([_LT_DECL_EGREP])
-m4_require([_LT_DECL_OBJDUMP])
-AC_CACHE_CHECK([how to recognize dependent libraries],
-lt_cv_deplibs_check_method,
-[lt_cv_file_magic_cmd='$MAGIC_CMD'
-lt_cv_file_magic_test_file=
-lt_cv_deplibs_check_method='unknown'
-# Need to set the preceding variable on all platforms that support
-# interlibrary dependencies.
-# 'none' -- dependencies not supported.
-# `unknown' -- same as none, but documents that we really don't know.
-# 'pass_all' -- all dependencies passed with no checks.
-# 'test_compile' -- check by making test program.
-# 'file_magic [[regex]]' -- check by looking for files in library path
-# which responds to the $file_magic_cmd with a given extended regex.
-# If you have `file' or equivalent on your system and you're not sure
-# whether `pass_all' will *always* work, you probably want this one.
-
-case $host_os in
-aix[[4-9]]*)
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-
-beos*)
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-
-bsdi[[45]]*)
-  lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)'
-  lt_cv_file_magic_cmd='/usr/bin/file -L'
-  lt_cv_file_magic_test_file=/shlib/libc.so
-  ;;
-
-cygwin*)
-  # func_win32_libid is a shell function defined in ltmain.sh
-  lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
-  lt_cv_file_magic_cmd='func_win32_libid'
-  ;;
-
-mingw* | pw32*)
-  # Base MSYS/MinGW do not provide the 'file' command needed by
-  # func_win32_libid shell function, so use a weaker test based on 'objdump',
-  # unless we find 'file', for example because we are cross-compiling.
-  # func_win32_libid assumes BSD nm, so disallow it if using MS dumpbin.
-  if ( test "$lt_cv_nm_interface" = "BSD nm" && file / ) >/dev/null 2>&1; then
-    lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
-    lt_cv_file_magic_cmd='func_win32_libid'
-  else
-    # Keep this pattern in sync with the one in func_win32_libid.
-    lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)'
-    lt_cv_file_magic_cmd='$OBJDUMP -f'
-  fi
-  ;;
-
-cegcc*)
-  # use the weaker test based on 'objdump'. See mingw*.
-  lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?'
-  lt_cv_file_magic_cmd='$OBJDUMP -f'
-  ;;
-
-darwin* | rhapsody*)
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-
-freebsd* | dragonfly*)
-  if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
-    case $host_cpu in
-    i*86 )
-      # Not sure whether the presence of OpenBSD here was a mistake.
-      # Let's accept both of them until this is cleared up.
-      lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library'
-      lt_cv_file_magic_cmd=/usr/bin/file
-      lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
-      ;;
-    esac
-  else
-    lt_cv_deplibs_check_method=pass_all
-  fi
-  ;;
-
-gnu*)
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-
-haiku*)
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-
-hpux10.20* | hpux11*)
-  lt_cv_file_magic_cmd=/usr/bin/file
-  case $host_cpu in
-  ia64*)
-    lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64'
-    lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so
-    ;;
-  hppa*64*)
-    [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]']
-    lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl
-    ;;
-  *)
-    lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]]\.[[0-9]]) shared library'
-    lt_cv_file_magic_test_file=/usr/lib/libc.sl
-    ;;
-  esac
-  ;;
-
-interix[[3-9]]*)
-  # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here
-  lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$'
-  ;;
-
-irix5* | irix6* | nonstopux*)
-  case $LD in
-  *-32|*"-32 ") libmagic=32-bit;;
-  *-n32|*"-n32 ") libmagic=N32;;
-  *-64|*"-64 ") libmagic=64-bit;;
-  *) libmagic=never-match;;
-  esac
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-
-# This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-
-netbsd*)
-  if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
-    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$'
-  else
-    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$'
-  fi
-  ;;
-
-newos6*)
-  lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)'
-  lt_cv_file_magic_cmd=/usr/bin/file
-  lt_cv_file_magic_test_file=/usr/lib/libnls.so
-  ;;
-
-*nto* | *qnx*)
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-
-openbsd*)
-  if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
-    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$'
-  else
-    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$'
-  fi
-  ;;
-
-osf3* | osf4* | osf5*)
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-
-rdos*)
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-
-solaris*)
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-
-sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-
-sysv4 | sysv4.3*)
-  case $host_vendor in
-  motorola)
-    lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]'
-    lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
-    ;;
-  ncr)
-    lt_cv_deplibs_check_method=pass_all
-    ;;
-  sequent)
-    lt_cv_file_magic_cmd='/bin/file'
-    lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )'
-    ;;
-  sni)
-    lt_cv_file_magic_cmd='/bin/file'
-    lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib"
-    lt_cv_file_magic_test_file=/lib/libc.so
-    ;;
-  siemens)
-    lt_cv_deplibs_check_method=pass_all
-    ;;
-  pc)
-    lt_cv_deplibs_check_method=pass_all
-    ;;
-  esac
-  ;;
-
-tpf*)
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-esac
-])
-
-file_magic_glob=
-want_nocaseglob=no
-if test "$build" = "$host"; then
-  case $host_os in
-  mingw* | pw32*)
-    if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then
-      want_nocaseglob=yes
-    else
-      file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[[\1]]\/[[\1]]\/g;/g"`
-    fi
-    ;;
-  esac
-fi
-
-file_magic_cmd=$lt_cv_file_magic_cmd
-deplibs_check_method=$lt_cv_deplibs_check_method
-test -z "$deplibs_check_method" && deplibs_check_method=unknown
-
-_LT_DECL([], [deplibs_check_method], [1],
-    [Method to check whether dependent libraries are shared objects])
-_LT_DECL([], [file_magic_cmd], [1],
-    [Command to use when deplibs_check_method = "file_magic"])
-_LT_DECL([], [file_magic_glob], [1],
-    [How to find potential files when deplibs_check_method = "file_magic"])
-_LT_DECL([], [want_nocaseglob], [1],
-    [Find potential files using nocaseglob when deplibs_check_method = "file_magic"])
-])# _LT_CHECK_MAGIC_METHOD
-
-
-# LT_PATH_NM
-# ----------
-# find the pathname to a BSD- or MS-compatible name lister
-AC_DEFUN([LT_PATH_NM],
-[AC_REQUIRE([AC_PROG_CC])dnl
-AC_CACHE_CHECK([for BSD- or MS-compatible name lister (nm)], lt_cv_path_NM,
-[if test -n "$NM"; then
-  # Let the user override the test.
-  lt_cv_path_NM="$NM"
-else
-  lt_nm_to_check="${ac_tool_prefix}nm"
-  if test -n "$ac_tool_prefix" && test "$build" = "$host"; then
-    lt_nm_to_check="$lt_nm_to_check nm"
-  fi
-  for lt_tmp_nm in $lt_nm_to_check; do
-    lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
-    for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do
-      IFS="$lt_save_ifs"
-      test -z "$ac_dir" && ac_dir=.
-      tmp_nm="$ac_dir/$lt_tmp_nm"
-      if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then
-	# Check to see if the nm accepts a BSD-compat flag.
-	# Adding the `sed 1q' prevents false positives on HP-UX, which says:
-	#   nm: unknown option "B" ignored
-	# Tru64's nm complains that /dev/null is an invalid object file
-	case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in
-	*/dev/null* | *'Invalid file or object type'*)
-	  lt_cv_path_NM="$tmp_nm -B"
-	  break
-	  ;;
-	*)
-	  case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in
-	  */dev/null*)
-	    lt_cv_path_NM="$tmp_nm -p"
-	    break
-	    ;;
-	  *)
-	    lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but
-	    continue # so that we can try to find one that supports BSD flags
-	    ;;
-	  esac
-	  ;;
-	esac
-      fi
-    done
-    IFS="$lt_save_ifs"
-  done
-  : ${lt_cv_path_NM=no}
-fi])
-if test "$lt_cv_path_NM" != "no"; then
-  NM="$lt_cv_path_NM"
-else
-  # Didn't find any BSD compatible name lister, look for dumpbin.
-  if test -n "$DUMPBIN"; then :
-    # Let the user override the test.
-  else
-    AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :)
-    case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in
-    *COFF*)
-      DUMPBIN="$DUMPBIN -symbols"
-      ;;
-    *)
-      DUMPBIN=:
-      ;;
-    esac
-  fi
-  AC_SUBST([DUMPBIN])
-  if test "$DUMPBIN" != ":"; then
-    NM="$DUMPBIN"
-  fi
-fi
-test -z "$NM" && NM=nm
-AC_SUBST([NM])
-_LT_DECL([], [NM], [1], [A BSD- or MS-compatible name lister])dnl
-
-AC_CACHE_CHECK([the name lister ($NM) interface], [lt_cv_nm_interface],
-  [lt_cv_nm_interface="BSD nm"
-  echo "int some_variable = 0;" > conftest.$ac_ext
-  (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&AS_MESSAGE_LOG_FD)
-  (eval "$ac_compile" 2>conftest.err)
-  cat conftest.err >&AS_MESSAGE_LOG_FD
-  (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&AS_MESSAGE_LOG_FD)
-  (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
-  cat conftest.err >&AS_MESSAGE_LOG_FD
-  (eval echo "\"\$as_me:$LINENO: output\"" >&AS_MESSAGE_LOG_FD)
-  cat conftest.out >&AS_MESSAGE_LOG_FD
-  if $GREP 'External.*some_variable' conftest.out > /dev/null; then
-    lt_cv_nm_interface="MS dumpbin"
-  fi
-  rm -f conftest*])
-])# LT_PATH_NM
-
-# Old names:
-AU_ALIAS([AM_PROG_NM], [LT_PATH_NM])
-AU_ALIAS([AC_PROG_NM], [LT_PATH_NM])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AM_PROG_NM], [])
-dnl AC_DEFUN([AC_PROG_NM], [])
-
-# _LT_CHECK_SHAREDLIB_FROM_LINKLIB
-# --------------------------------
-# how to determine the name of the shared library
-# associated with a specific link library.
-#  -- PORTME fill in with the dynamic library characteristics
-m4_defun([_LT_CHECK_SHAREDLIB_FROM_LINKLIB],
-[m4_require([_LT_DECL_EGREP])
-m4_require([_LT_DECL_OBJDUMP])
-m4_require([_LT_DECL_DLLTOOL])
-AC_CACHE_CHECK([how to associate runtime and link libraries],
-lt_cv_sharedlib_from_linklib_cmd,
-[lt_cv_sharedlib_from_linklib_cmd='unknown'
-
-case $host_os in
-cygwin* | mingw* | pw32* | cegcc*)
-  # two different shell functions defined in ltmain.sh
-  # decide which to use based on capabilities of $DLLTOOL
-  case `$DLLTOOL --help 2>&1` in
-  *--identify-strict*)
-    lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib
-    ;;
-  *)
-    lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback
-    ;;
-  esac
-  ;;
-*)
-  # fallback: assume linklib IS sharedlib
-  lt_cv_sharedlib_from_linklib_cmd="$ECHO"
-  ;;
-esac
-])
-sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd
-test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO
-
-_LT_DECL([], [sharedlib_from_linklib_cmd], [1],
-    [Command to associate shared and link libraries])
-])# _LT_CHECK_SHAREDLIB_FROM_LINKLIB
-
-
-# _LT_PATH_MANIFEST_TOOL
-# ----------------------
-# locate the manifest tool
-m4_defun([_LT_PATH_MANIFEST_TOOL],
-[AC_CHECK_TOOL(MANIFEST_TOOL, mt, :)
-test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt
-AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_mainfest_tool],
-  [lt_cv_path_mainfest_tool=no
-  echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD
-  $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out
-  cat conftest.err >&AS_MESSAGE_LOG_FD
-  if $GREP 'Manifest Tool' conftest.out > /dev/null; then
-    lt_cv_path_mainfest_tool=yes
-  fi
-  rm -f conftest*])
-if test "x$lt_cv_path_mainfest_tool" != xyes; then
-  MANIFEST_TOOL=:
-fi
-_LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl
-])# _LT_PATH_MANIFEST_TOOL
-
-
-# LT_LIB_M
-# --------
-# check for math library
-AC_DEFUN([LT_LIB_M],
-[AC_REQUIRE([AC_CANONICAL_HOST])dnl
-LIBM=
-case $host in
-*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*)
-  # These system don't have libm, or don't need it
-  ;;
-*-ncr-sysv4.3*)
-  AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw")
-  AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm")
-  ;;
-*)
-  AC_CHECK_LIB(m, cos, LIBM="-lm")
-  ;;
-esac
-AC_SUBST([LIBM])
-])# LT_LIB_M
-
-# Old name:
-AU_ALIAS([AC_CHECK_LIBM], [LT_LIB_M])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_CHECK_LIBM], [])
-
-
-# _LT_COMPILER_NO_RTTI([TAGNAME])
-# -------------------------------
-m4_defun([_LT_COMPILER_NO_RTTI],
-[m4_require([_LT_TAG_COMPILER])dnl
-
-_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=
-
-if test "$GCC" = yes; then
-  case $cc_basename in
-  nvcc*)
-    _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -Xcompiler -fno-builtin' ;;
-  *)
-    _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' ;;
-  esac
-
-  _LT_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions],
-    lt_cv_prog_compiler_rtti_exceptions,
-    [-fno-rtti -fno-exceptions], [],
-    [_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"])
-fi
-_LT_TAGDECL([no_builtin_flag], [lt_prog_compiler_no_builtin_flag], [1],
-	[Compiler flag to turn off builtin functions])
-])# _LT_COMPILER_NO_RTTI
-
-
-# _LT_CMD_GLOBAL_SYMBOLS
-# ----------------------
-m4_defun([_LT_CMD_GLOBAL_SYMBOLS],
-[AC_REQUIRE([AC_CANONICAL_HOST])dnl
-AC_REQUIRE([AC_PROG_CC])dnl
-AC_REQUIRE([AC_PROG_AWK])dnl
-AC_REQUIRE([LT_PATH_NM])dnl
-AC_REQUIRE([LT_PATH_LD])dnl
-m4_require([_LT_DECL_SED])dnl
-m4_require([_LT_DECL_EGREP])dnl
-m4_require([_LT_TAG_COMPILER])dnl
-
-# Check for command to grab the raw symbol name followed by C symbol from nm.
-AC_MSG_CHECKING([command to parse $NM output from $compiler object])
-AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe],
-[
-# These are sane defaults that work on at least a few old systems.
-# [They come from Ultrix.  What could be older than Ultrix?!! ;)]
-
-# Character class describing NM global symbol codes.
-symcode='[[BCDEGRST]]'
-
-# Regexp to match symbols that can be accessed directly from C.
-sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)'
-
-# Define system-specific variables.
-case $host_os in
-aix*)
-  symcode='[[BCDT]]'
-  ;;
-cygwin* | mingw* | pw32* | cegcc*)
-  symcode='[[ABCDGISTW]]'
-  ;;
-hpux*)
-  if test "$host_cpu" = ia64; then
-    symcode='[[ABCDEGRST]]'
-  fi
-  ;;
-irix* | nonstopux*)
-  symcode='[[BCDEGRST]]'
-  ;;
-osf*)
-  symcode='[[BCDEGQRST]]'
-  ;;
-solaris*)
-  symcode='[[BDRT]]'
-  ;;
-sco3.2v5*)
-  symcode='[[DT]]'
-  ;;
-sysv4.2uw2*)
-  symcode='[[DT]]'
-  ;;
-sysv5* | sco5v6* | unixware* | OpenUNIX*)
-  symcode='[[ABDT]]'
-  ;;
-sysv4)
-  symcode='[[DFNSTU]]'
-  ;;
-esac
-
-# If we're using GNU nm, then use its standard symbol codes.
-case `$NM -V 2>&1` in
-*GNU* | *'with BFD'*)
-  symcode='[[ABCDGIRSTW]]' ;;
-esac
-
-# Transform an extracted symbol line into a proper C declaration.
-# Some systems (esp. on ia64) link data and code symbols differently,
-# so use this general approach.
-lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
-
-# Transform an extracted symbol line into symbol name and symbol address
-lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/  {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/  {\"\2\", (void *) \&\2},/p'"
-lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/  {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \(lib[[^ ]]*\)$/  {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/  {\"lib\2\", (void *) \&\2},/p'"
-
-# Handle CRLF in mingw tool chain
-opt_cr=
-case $build_os in
-mingw*)
-  opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp
-  ;;
-esac
-
-# Try without a prefix underscore, then with it.
-for ac_symprfx in "" "_"; do
-
-  # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol.
-  symxfrm="\\1 $ac_symprfx\\2 \\2"
-
-  # Write the raw and C identifiers.
-  if test "$lt_cv_nm_interface" = "MS dumpbin"; then
-    # Fake it for dumpbin and say T for any non-static function
-    # and D for any global variable.
-    # Also find C++ and __fastcall symbols from MSVC++,
-    # which start with @ or ?.
-    lt_cv_sys_global_symbol_pipe="$AWK ['"\
-"     {last_section=section; section=\$ 3};"\
-"     /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\
-"     /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\
-"     \$ 0!~/External *\|/{next};"\
-"     / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\
-"     {if(hide[section]) next};"\
-"     {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\
-"     {split(\$ 0, a, /\||\r/); split(a[2], s)};"\
-"     s[1]~/^[@?]/{print s[1], s[1]; next};"\
-"     s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\
-"     ' prfx=^$ac_symprfx]"
-  else
-    lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[	 ]]\($symcode$symcode*\)[[	 ]][[	 ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'"
-  fi
-  lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'"
-
-  # Check to see that the pipe works correctly.
-  pipe_works=no
-
-  rm -f conftest*
-  cat > conftest.$ac_ext <<_LT_EOF
-#ifdef __cplusplus
-extern "C" {
-#endif
-char nm_test_var;
-void nm_test_func(void);
-void nm_test_func(void){}
-#ifdef __cplusplus
-}
-#endif
-int main(){nm_test_var='a';nm_test_func();return(0);}
-_LT_EOF
-
-  if AC_TRY_EVAL(ac_compile); then
-    # Now try to grab the symbols.
-    nlist=conftest.nm
-    if AC_TRY_EVAL(NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) && test -s "$nlist"; then
-      # Try sorting and uniquifying the output.
-      if sort "$nlist" | uniq > "$nlist"T; then
-	mv -f "$nlist"T "$nlist"
-      else
-	rm -f "$nlist"T
-      fi
-
-      # Make sure that we snagged all the symbols we need.
-      if $GREP ' nm_test_var$' "$nlist" >/dev/null; then
-	if $GREP ' nm_test_func$' "$nlist" >/dev/null; then
-	  cat <<_LT_EOF > conftest.$ac_ext
-/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests.  */
-#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE)
-/* DATA imports from DLLs on WIN32 con't be const, because runtime
-   relocations are performed -- see ld's documentation on pseudo-relocs.  */
-# define LT@&t@_DLSYM_CONST
-#elif defined(__osf__)
-/* This system does not cope well with relocations in const data.  */
-# define LT@&t@_DLSYM_CONST
-#else
-# define LT@&t@_DLSYM_CONST const
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-_LT_EOF
-	  # Now generate the symbol file.
-	  eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext'
-
-	  cat <<_LT_EOF >> conftest.$ac_ext
-
-/* The mapping between symbol names and symbols.  */
-LT@&t@_DLSYM_CONST struct {
-  const char *name;
-  void       *address;
-}
-lt__PROGRAM__LTX_preloaded_symbols[[]] =
-{
-  { "@PROGRAM@", (void *) 0 },
-_LT_EOF
-	  $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/  {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext
-	  cat <<\_LT_EOF >> conftest.$ac_ext
-  {0, (void *) 0}
-};
-
-/* This works around a problem in FreeBSD linker */
-#ifdef FREEBSD_WORKAROUND
-static const void *lt_preloaded_setup() {
-  return lt__PROGRAM__LTX_preloaded_symbols;
-}
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-_LT_EOF
-	  # Now try linking the two files.
-	  mv conftest.$ac_objext conftstm.$ac_objext
-	  lt_globsym_save_LIBS=$LIBS
-	  lt_globsym_save_CFLAGS=$CFLAGS
-	  LIBS="conftstm.$ac_objext"
-	  CFLAGS="$CFLAGS$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)"
-	  if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext}; then
-	    pipe_works=yes
-	  fi
-	  LIBS=$lt_globsym_save_LIBS
-	  CFLAGS=$lt_globsym_save_CFLAGS
-	else
-	  echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD
-	fi
-      else
-	echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD
-      fi
-    else
-      echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD
-    fi
-  else
-    echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD
-    cat conftest.$ac_ext >&5
-  fi
-  rm -rf conftest* conftst*
-
-  # Do not use the global_symbol_pipe unless it works.
-  if test "$pipe_works" = yes; then
-    break
-  else
-    lt_cv_sys_global_symbol_pipe=
-  fi
-done
-])
-if test -z "$lt_cv_sys_global_symbol_pipe"; then
-  lt_cv_sys_global_symbol_to_cdecl=
-fi
-if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then
-  AC_MSG_RESULT(failed)
-else
-  AC_MSG_RESULT(ok)
-fi
-
-# Response file support.
-if test "$lt_cv_nm_interface" = "MS dumpbin"; then
-  nm_file_list_spec='@'
-elif $NM --help 2>/dev/null | grep '[[@]]FILE' >/dev/null; then
-  nm_file_list_spec='@'
-fi
-
-_LT_DECL([global_symbol_pipe], [lt_cv_sys_global_symbol_pipe], [1],
-    [Take the output of nm and produce a listing of raw symbols and C names])
-_LT_DECL([global_symbol_to_cdecl], [lt_cv_sys_global_symbol_to_cdecl], [1],
-    [Transform the output of nm in a proper C declaration])
-_LT_DECL([global_symbol_to_c_name_address],
-    [lt_cv_sys_global_symbol_to_c_name_address], [1],
-    [Transform the output of nm in a C name address pair])
-_LT_DECL([global_symbol_to_c_name_address_lib_prefix],
-    [lt_cv_sys_global_symbol_to_c_name_address_lib_prefix], [1],
-    [Transform the output of nm in a C name address pair when lib prefix is needed])
-_LT_DECL([], [nm_file_list_spec], [1],
-    [Specify filename containing input files for $NM])
-]) # _LT_CMD_GLOBAL_SYMBOLS
-
-
-# _LT_COMPILER_PIC([TAGNAME])
-# ---------------------------
-m4_defun([_LT_COMPILER_PIC],
-[m4_require([_LT_TAG_COMPILER])dnl
-_LT_TAGVAR(lt_prog_compiler_wl, $1)=
-_LT_TAGVAR(lt_prog_compiler_pic, $1)=
-_LT_TAGVAR(lt_prog_compiler_static, $1)=
-
-m4_if([$1], [CXX], [
-  # C++ specific cases for pic, static, wl, etc.
-  if test "$GXX" = yes; then
-    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-    _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
-
-    case $host_os in
-    aix*)
-      # All AIX code is PIC.
-      if test "$host_cpu" = ia64; then
-	# AIX 5 now supports IA64 processor
-	_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-      fi
-      ;;
-
-    amigaos*)
-      case $host_cpu in
-      powerpc)
-            # see comment about AmigaOS4 .so support
-            _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
-        ;;
-      m68k)
-            # FIXME: we need at least 68020 code to build shared libraries, but
-            # adding the `-m68020' flag to GCC prevents building anything better,
-            # like `-m68040'.
-            _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4'
-        ;;
-      esac
-      ;;
-
-    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
-      # PIC is the default for these OSes.
-      ;;
-    mingw* | cygwin* | os2* | pw32* | cegcc*)
-      # This hack is so that the source file can tell whether it is being
-      # built for inclusion in a dll (and should export symbols for example).
-      # Although the cygwin gcc ignores -fPIC, still need this for old-style
-      # (--disable-auto-import) libraries
-      m4_if([$1], [GCJ], [],
-	[_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'])
-      ;;
-    darwin* | rhapsody*)
-      # PIC is the default on this platform
-      # Common symbols not allowed in MH_DYLIB files
-      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common'
-      ;;
-    *djgpp*)
-      # DJGPP does not support shared libraries at all
-      _LT_TAGVAR(lt_prog_compiler_pic, $1)=
-      ;;
-    haiku*)
-      # PIC is the default for Haiku.
-      # The "-static" flag exists, but is broken.
-      _LT_TAGVAR(lt_prog_compiler_static, $1)=
-      ;;
-    interix[[3-9]]*)
-      # Interix 3.x gcc -fpic/-fPIC options generate broken code.
-      # Instead, we relocate shared libraries at runtime.
-      ;;
-    sysv4*MP*)
-      if test -d /usr/nec; then
-	_LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic
-      fi
-      ;;
-    hpux*)
-      # PIC is the default for 64-bit PA HP-UX, but not for 32-bit
-      # PA HP-UX.  On IA64 HP-UX, PIC is the default but the pic flag
-      # sets the default TLS model and affects inlining.
-      case $host_cpu in
-      hppa*64*)
-	;;
-      *)
-	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
-	;;
-      esac
-      ;;
-    *qnx* | *nto*)
-      # QNX uses GNU C++, but need to define -shared option too, otherwise
-      # it will coredump.
-      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared'
-      ;;
-    *)
-      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
-      ;;
-    esac
-  else
-    case $host_os in
-      aix[[4-9]]*)
-	# All AIX code is PIC.
-	if test "$host_cpu" = ia64; then
-	  # AIX 5 now supports IA64 processor
-	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-	else
-	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp'
-	fi
-	;;
-      chorus*)
-	case $cc_basename in
-	cxch68*)
-	  # Green Hills C++ Compiler
-	  # _LT_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a"
-	  ;;
-	esac
-	;;
-      mingw* | cygwin* | os2* | pw32* | cegcc*)
-	# This hack is so that the source file can tell whether it is being
-	# built for inclusion in a dll (and should export symbols for example).
-	m4_if([$1], [GCJ], [],
-	  [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'])
-	;;
-      dgux*)
-	case $cc_basename in
-	  ec++*)
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
-	    ;;
-	  ghcx*)
-	    # Green Hills C++ Compiler
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
-	    ;;
-	  *)
-	    ;;
-	esac
-	;;
-      freebsd* | dragonfly*)
-	# FreeBSD uses GNU C++
-	;;
-      hpux9* | hpux10* | hpux11*)
-	case $cc_basename in
-	  CC*)
-	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	    _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
-	    if test "$host_cpu" != ia64; then
-	      _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
-	    fi
-	    ;;
-	  aCC*)
-	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	    _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
-	    case $host_cpu in
-	    hppa*64*|ia64*)
-	      # +Z the default
-	      ;;
-	    *)
-	      _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
-	      ;;
-	    esac
-	    ;;
-	  *)
-	    ;;
-	esac
-	;;
-      interix*)
-	# This is c89, which is MS Visual C++ (no shared libs)
-	# Anyone wants to do a port?
-	;;
-      irix5* | irix6* | nonstopux*)
-	case $cc_basename in
-	  CC*)
-	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
-	    # CC pic flag -KPIC is the default.
-	    ;;
-	  *)
-	    ;;
-	esac
-	;;
-      linux* | k*bsd*-gnu | kopensolaris*-gnu)
-	case $cc_basename in
-	  KCC*)
-	    # KAI C++ Compiler
-	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,'
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
-	    ;;
-	  ecpc* )
-	    # old Intel C++ for x86_64 which still supported -KPIC.
-	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
-	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
-	    ;;
-	  icpc* )
-	    # Intel C++, used to be incompatible with GCC.
-	    # ICC 10 doesn't accept -KPIC any more.
-	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
-	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
-	    ;;
-	  pgCC* | pgcpp*)
-	    # Portland Group C++ compiler
-	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
-	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-	    ;;
-	  cxx*)
-	    # Compaq C++
-	    # Make sure the PIC flag is empty.  It appears that all Alpha
-	    # Linux and Compaq Tru64 Unix objects are PIC.
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)=
-	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
-	    ;;
-	  xlc* | xlC* | bgxl[[cC]]* | mpixl[[cC]]*)
-	    # IBM XL 8.0, 9.0 on PPC and BlueGene
-	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic'
-	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink'
-	    ;;
-	  *)
-	    case `$CC -V 2>&1 | sed 5q` in
-	    *Sun\ C*)
-	      # Sun C++ 5.9
-	      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
-	      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-	      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
-	      ;;
-	    esac
-	    ;;
-	esac
-	;;
-      lynxos*)
-	;;
-      m88k*)
-	;;
-      mvs*)
-	case $cc_basename in
-	  cxx*)
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall'
-	    ;;
-	  *)
-	    ;;
-	esac
-	;;
-      netbsd*)
-	;;
-      *qnx* | *nto*)
-        # QNX uses GNU C++, but need to define -shared option too, otherwise
-        # it will coredump.
-        _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared'
-        ;;
-      osf3* | osf4* | osf5*)
-	case $cc_basename in
-	  KCC*)
-	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,'
-	    ;;
-	  RCC*)
-	    # Rational C++ 2.4.1
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
-	    ;;
-	  cxx*)
-	    # Digital/Compaq C++
-	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	    # Make sure the PIC flag is empty.  It appears that all Alpha
-	    # Linux and Compaq Tru64 Unix objects are PIC.
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)=
-	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
-	    ;;
-	  *)
-	    ;;
-	esac
-	;;
-      psos*)
-	;;
-      solaris*)
-	case $cc_basename in
-	  CC* | sunCC*)
-	    # Sun C++ 4.2, 5.x and Centerline C++
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
-	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
-	    ;;
-	  gcx*)
-	    # Green Hills C++ Compiler
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC'
-	    ;;
-	  *)
-	    ;;
-	esac
-	;;
-      sunos4*)
-	case $cc_basename in
-	  CC*)
-	    # Sun C++ 4.x
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
-	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-	    ;;
-	  lcc*)
-	    # Lucid
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
-	    ;;
-	  *)
-	    ;;
-	esac
-	;;
-      sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
-	case $cc_basename in
-	  CC*)
-	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
-	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-	    ;;
-	esac
-	;;
-      tandem*)
-	case $cc_basename in
-	  NCC*)
-	    # NonStop-UX NCC 3.20
-	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
-	    ;;
-	  *)
-	    ;;
-	esac
-	;;
-      vxworks*)
-	;;
-      *)
-	_LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
-	;;
-    esac
-  fi
-],
-[
-  if test "$GCC" = yes; then
-    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-    _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
-
-    case $host_os in
-      aix*)
-      # All AIX code is PIC.
-      if test "$host_cpu" = ia64; then
-	# AIX 5 now supports IA64 processor
-	_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-      fi
-      ;;
-
-    amigaos*)
-      case $host_cpu in
-      powerpc)
-            # see comment about AmigaOS4 .so support
-            _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
-        ;;
-      m68k)
-            # FIXME: we need at least 68020 code to build shared libraries, but
-            # adding the `-m68020' flag to GCC prevents building anything better,
-            # like `-m68040'.
-            _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4'
-        ;;
-      esac
-      ;;
-
-    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
-      # PIC is the default for these OSes.
-      ;;
-
-    mingw* | cygwin* | pw32* | os2* | cegcc*)
-      # This hack is so that the source file can tell whether it is being
-      # built for inclusion in a dll (and should export symbols for example).
-      # Although the cygwin gcc ignores -fPIC, still need this for old-style
-      # (--disable-auto-import) libraries
-      m4_if([$1], [GCJ], [],
-	[_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'])
-      ;;
-
-    darwin* | rhapsody*)
-      # PIC is the default on this platform
-      # Common symbols not allowed in MH_DYLIB files
-      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common'
-      ;;
-
-    haiku*)
-      # PIC is the default for Haiku.
-      # The "-static" flag exists, but is broken.
-      _LT_TAGVAR(lt_prog_compiler_static, $1)=
-      ;;
-
-    hpux*)
-      # PIC is the default for 64-bit PA HP-UX, but not for 32-bit
-      # PA HP-UX.  On IA64 HP-UX, PIC is the default but the pic flag
-      # sets the default TLS model and affects inlining.
-      case $host_cpu in
-      hppa*64*)
-	# +Z the default
-	;;
-      *)
-	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
-	;;
-      esac
-      ;;
-
-    interix[[3-9]]*)
-      # Interix 3.x gcc -fpic/-fPIC options generate broken code.
-      # Instead, we relocate shared libraries at runtime.
-      ;;
-
-    msdosdjgpp*)
-      # Just because we use GCC doesn't mean we suddenly get shared libraries
-      # on systems that don't support them.
-      _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
-      enable_shared=no
-      ;;
-
-    *nto* | *qnx*)
-      # QNX uses GNU C++, but need to define -shared option too, otherwise
-      # it will coredump.
-      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared'
-      ;;
-
-    sysv4*MP*)
-      if test -d /usr/nec; then
-	_LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic
-      fi
-      ;;
-
-    *)
-      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
-      ;;
-    esac
-
-    case $cc_basename in
-    nvcc*) # Cuda Compiler Driver 2.2
-      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker '
-      if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then
-        _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)"
-      fi
-      ;;
-    esac
-  else
-    # PORTME Check for flag to pass linker flags through the system compiler.
-    case $host_os in
-    aix*)
-      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-      if test "$host_cpu" = ia64; then
-	# AIX 5 now supports IA64 processor
-	_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-      else
-	_LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp'
-      fi
-      ;;
-
-    mingw* | cygwin* | pw32* | os2* | cegcc*)
-      # This hack is so that the source file can tell whether it is being
-      # built for inclusion in a dll (and should export symbols for example).
-      m4_if([$1], [GCJ], [],
-	[_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'])
-      ;;
-
-    hpux9* | hpux10* | hpux11*)
-      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-      # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but
-      # not for PA HP-UX.
-      case $host_cpu in
-      hppa*64*|ia64*)
-	# +Z the default
-	;;
-      *)
-	_LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
-	;;
-      esac
-      # Is there a better lt_prog_compiler_static that works with the bundled CC?
-      _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
-      ;;
-
-    irix5* | irix6* | nonstopux*)
-      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-      # PIC (with -KPIC) is the default.
-      _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
-      ;;
-
-    linux* | k*bsd*-gnu | kopensolaris*-gnu)
-      case $cc_basename in
-      # old Intel for x86_64 which still supported -KPIC.
-      ecc*)
-	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
-	_LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
-        ;;
-      # icc used to be incompatible with GCC.
-      # ICC 10 doesn't accept -KPIC any more.
-      icc* | ifort*)
-	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
-	_LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
-        ;;
-      # Lahey Fortran 8.1.
-      lf95*)
-	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	_LT_TAGVAR(lt_prog_compiler_pic, $1)='--shared'
-	_LT_TAGVAR(lt_prog_compiler_static, $1)='--static'
-	;;
-      nagfor*)
-	# NAG Fortran compiler
-	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,'
-	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC'
-	_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-	;;
-      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
-        # Portland Group compilers (*not* the Pentium gcc compiler,
-	# which looks to be a dead project)
-	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
-	_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-        ;;
-      ccc*)
-        _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-        # All Alpha code is PIC.
-        _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
-        ;;
-      xl* | bgxl* | bgf* | mpixl*)
-	# IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene
-	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic'
-	_LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink'
-	;;
-      *)
-	case `$CC -V 2>&1 | sed 5q` in
-	*Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*)
-	  # Sun Fortran 8.3 passes all unrecognized flags to the linker
-	  _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
-	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-	  _LT_TAGVAR(lt_prog_compiler_wl, $1)=''
-	  ;;
-	*Sun\ F* | *Sun*Fortran*)
-	  _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
-	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-	  _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
-	  ;;
-	*Sun\ C*)
-	  # Sun C 5.9
-	  _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
-	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-	  _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	  ;;
-        *Intel*\ [[CF]]*Compiler*)
-	  _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	  _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
-	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
-	  ;;
-	*Portland\ Group*)
-	  _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-	  _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
-	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-	  ;;
-	esac
-	;;
-      esac
-      ;;
-
-    newsos6)
-      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
-      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-      ;;
-
-    *nto* | *qnx*)
-      # QNX uses GNU C++, but need to define -shared option too, otherwise
-      # it will coredump.
-      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared'
-      ;;
-
-    osf3* | osf4* | osf5*)
-      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-      # All OSF/1 code is PIC.
-      _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
-      ;;
-
-    rdos*)
-      _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
-      ;;
-
-    solaris*)
-      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
-      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-      case $cc_basename in
-      f77* | f90* | f95* | sunf77* | sunf90* | sunf95*)
-	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';;
-      *)
-	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';;
-      esac
-      ;;
-
-    sunos4*)
-      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
-      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC'
-      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-      ;;
-
-    sysv4 | sysv4.2uw2* | sysv4.3*)
-      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
-      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-      ;;
-
-    sysv4*MP*)
-      if test -d /usr/nec ;then
-	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic'
-	_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-      fi
-      ;;
-
-    sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
-      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
-      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-      ;;
-
-    unicos*)
-      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
-      _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
-      ;;
-
-    uts4*)
-      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
-      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
-      ;;
-
-    *)
-      _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
-      ;;
-    esac
-  fi
-])
-case $host_os in
-  # For platforms which do not support PIC, -DPIC is meaningless:
-  *djgpp*)
-    _LT_TAGVAR(lt_prog_compiler_pic, $1)=
-    ;;
-  *)
-    _LT_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])"
-    ;;
-esac
-
-AC_CACHE_CHECK([for $compiler option to produce PIC],
-  [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)],
-  [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_prog_compiler_pic, $1)])
-_LT_TAGVAR(lt_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)
-
-#
-# Check to make sure the PIC flag actually works.
-#
-if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then
-  _LT_COMPILER_OPTION([if $compiler PIC flag $_LT_TAGVAR(lt_prog_compiler_pic, $1) works],
-    [_LT_TAGVAR(lt_cv_prog_compiler_pic_works, $1)],
-    [$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])], [],
-    [case $_LT_TAGVAR(lt_prog_compiler_pic, $1) in
-     "" | " "*) ;;
-     *) _LT_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_TAGVAR(lt_prog_compiler_pic, $1)" ;;
-     esac],
-    [_LT_TAGVAR(lt_prog_compiler_pic, $1)=
-     _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no])
-fi
-_LT_TAGDECL([pic_flag], [lt_prog_compiler_pic], [1],
-	[Additional compiler flags for building library objects])
-
-_LT_TAGDECL([wl], [lt_prog_compiler_wl], [1],
-	[How to pass a linker flag through the compiler])
-#
-# Check to make sure the static flag actually works.
-#
-wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_TAGVAR(lt_prog_compiler_static, $1)\"
-_LT_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works],
-  _LT_TAGVAR(lt_cv_prog_compiler_static_works, $1),
-  $lt_tmp_static_flag,
-  [],
-  [_LT_TAGVAR(lt_prog_compiler_static, $1)=])
-_LT_TAGDECL([link_static_flag], [lt_prog_compiler_static], [1],
-	[Compiler flag to prevent dynamic linking])
-])# _LT_COMPILER_PIC
-
-
-# _LT_LINKER_SHLIBS([TAGNAME])
-# ----------------------------
-# See if the linker supports building shared libraries.
-m4_defun([_LT_LINKER_SHLIBS],
-[AC_REQUIRE([LT_PATH_LD])dnl
-AC_REQUIRE([LT_PATH_NM])dnl
-m4_require([_LT_PATH_MANIFEST_TOOL])dnl
-m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-m4_require([_LT_DECL_EGREP])dnl
-m4_require([_LT_DECL_SED])dnl
-m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl
-m4_require([_LT_TAG_COMPILER])dnl
-AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries])
-m4_if([$1], [CXX], [
-  _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
-  _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*']
-  case $host_os in
-  aix[[4-9]]*)
-    # If we're using GNU nm, then we don't want the "-C" option.
-    # -C means demangle to AIX nm, but means don't demangle with GNU nm
-    # Also, AIX nm treats weak defined symbols like other global defined
-    # symbols, whereas GNU nm marks them as "W".
-    if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
-      _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
-    else
-      _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
-    fi
-    ;;
-  pw32*)
-    _LT_TAGVAR(export_symbols_cmds, $1)="$ltdll_cmds"
-    ;;
-  cygwin* | mingw* | cegcc*)
-    case $cc_basename in
-    cl*)
-      _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
-      ;;
-    *)
-      _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols'
-      _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname']
-      ;;
-    esac
-    ;;
-  *)
-    _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
-    ;;
-  esac
-], [
-  runpath_var=
-  _LT_TAGVAR(allow_undefined_flag, $1)=
-  _LT_TAGVAR(always_export_symbols, $1)=no
-  _LT_TAGVAR(archive_cmds, $1)=
-  _LT_TAGVAR(archive_expsym_cmds, $1)=
-  _LT_TAGVAR(compiler_needs_object, $1)=no
-  _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no
-  _LT_TAGVAR(export_dynamic_flag_spec, $1)=
-  _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
-  _LT_TAGVAR(hardcode_automatic, $1)=no
-  _LT_TAGVAR(hardcode_direct, $1)=no
-  _LT_TAGVAR(hardcode_direct_absolute, $1)=no
-  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-  _LT_TAGVAR(hardcode_libdir_separator, $1)=
-  _LT_TAGVAR(hardcode_minus_L, $1)=no
-  _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
-  _LT_TAGVAR(inherit_rpath, $1)=no
-  _LT_TAGVAR(link_all_deplibs, $1)=unknown
-  _LT_TAGVAR(module_cmds, $1)=
-  _LT_TAGVAR(module_expsym_cmds, $1)=
-  _LT_TAGVAR(old_archive_from_new_cmds, $1)=
-  _LT_TAGVAR(old_archive_from_expsyms_cmds, $1)=
-  _LT_TAGVAR(thread_safe_flag_spec, $1)=
-  _LT_TAGVAR(whole_archive_flag_spec, $1)=
-  # include_expsyms should be a list of space-separated symbols to be *always*
-  # included in the symbol list
-  _LT_TAGVAR(include_expsyms, $1)=
-  # exclude_expsyms can be an extended regexp of symbols to exclude
-  # it will be wrapped by ` (' and `)$', so one must not match beginning or
-  # end of line.  Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc',
-  # as well as any symbol that contains `d'.
-  _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*']
-  # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out
-  # platforms (ab)use it in PIC code, but their linkers get confused if
-  # the symbol is explicitly referenced.  Since portable code cannot
-  # rely on this symbol name, it's probably fine to never include it in
-  # preloaded symbol tables.
-  # Exclude shared library initialization/finalization symbols.
-dnl Note also adjust exclude_expsyms for C++ above.
-  extract_expsyms_cmds=
-
-  case $host_os in
-  cygwin* | mingw* | pw32* | cegcc*)
-    # FIXME: the MSVC++ port hasn't been tested in a loooong time
-    # When not using gcc, we currently assume that we are using
-    # Microsoft Visual C++.
-    if test "$GCC" != yes; then
-      with_gnu_ld=no
-    fi
-    ;;
-  interix*)
-    # we just hope/assume this is gcc and not c89 (= MSVC++)
-    with_gnu_ld=yes
-    ;;
-  openbsd*)
-    with_gnu_ld=no
-    ;;
-  esac
-
-  _LT_TAGVAR(ld_shlibs, $1)=yes
-
-  # On some targets, GNU ld is compatible enough with the native linker
-  # that we're better off using the native interface for both.
-  lt_use_gnu_ld_interface=no
-  if test "$with_gnu_ld" = yes; then
-    case $host_os in
-      aix*)
-	# The AIX port of GNU ld has always aspired to compatibility
-	# with the native linker.  However, as the warning in the GNU ld
-	# block says, versions before 2.19.5* couldn't really create working
-	# shared libraries, regardless of the interface used.
-	case `$LD -v 2>&1` in
-	  *\ \(GNU\ Binutils\)\ 2.19.5*) ;;
-	  *\ \(GNU\ Binutils\)\ 2.[[2-9]]*) ;;
-	  *\ \(GNU\ Binutils\)\ [[3-9]]*) ;;
-	  *)
-	    lt_use_gnu_ld_interface=yes
-	    ;;
-	esac
-	;;
-      *)
-	lt_use_gnu_ld_interface=yes
-	;;
-    esac
-  fi
-
-  if test "$lt_use_gnu_ld_interface" = yes; then
-    # If archive_cmds runs LD, not CC, wlarc should be empty
-    wlarc='${wl}'
-
-    # Set some defaults for GNU ld with shared library support. These
-    # are reset later if shared libraries are not supported. Putting them
-    # here allows them to be overridden if necessary.
-    runpath_var=LD_RUN_PATH
-    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
-    _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
-    # ancient GNU ld didn't support --whole-archive et. al.
-    if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then
-      _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
-    else
-      _LT_TAGVAR(whole_archive_flag_spec, $1)=
-    fi
-    supports_anon_versioning=no
-    case `$LD -v 2>&1` in
-      *GNU\ gold*) supports_anon_versioning=yes ;;
-      *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11
-      *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ...
-      *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ...
-      *\ 2.11.*) ;; # other 2.11 versions
-      *) supports_anon_versioning=yes ;;
-    esac
-
-    # See if GNU ld supports shared libraries.
-    case $host_os in
-    aix[[3-9]]*)
-      # On AIX/PPC, the GNU linker is very broken
-      if test "$host_cpu" != ia64; then
-	_LT_TAGVAR(ld_shlibs, $1)=no
-	cat <<_LT_EOF 1>&2
-
-*** Warning: the GNU linker, at least up to release 2.19, is reported
-*** to be unable to reliably create shared libraries on AIX.
-*** Therefore, libtool is disabling shared libraries support.  If you
-*** really care for shared libraries, you may want to install binutils
-*** 2.20 or above, or modify your PATH so that a non-GNU linker is found.
-*** You will then need to restart the configuration process.
-
-_LT_EOF
-      fi
-      ;;
-
-    amigaos*)
-      case $host_cpu in
-      powerpc)
-            # see comment about AmigaOS4 .so support
-            _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
-            _LT_TAGVAR(archive_expsym_cmds, $1)=''
-        ;;
-      m68k)
-            _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
-            _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
-            _LT_TAGVAR(hardcode_minus_L, $1)=yes
-        ;;
-      esac
-      ;;
-
-    beos*)
-      if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
-	_LT_TAGVAR(allow_undefined_flag, $1)=unsupported
-	# Joseph Beckenbach <jrb3@best.com> says some releases of gcc
-	# support --undefined.  This deserves some investigation.  FIXME
-	_LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
-      else
-	_LT_TAGVAR(ld_shlibs, $1)=no
-      fi
-      ;;
-
-    cygwin* | mingw* | pw32* | cegcc*)
-      # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless,
-      # as there is no search path for DLLs.
-      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
-      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols'
-      _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
-      _LT_TAGVAR(always_export_symbols, $1)=no
-      _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
-      _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols'
-      _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname']
-
-      if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then
-        _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
-	# If the export-symbols file already is a .def file (1st line
-	# is EXPORTS), use it as is; otherwise, prepend...
-	_LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
-	  cp $export_symbols $output_objdir/$soname.def;
-	else
-	  echo EXPORTS > $output_objdir/$soname.def;
-	  cat $export_symbols >> $output_objdir/$soname.def;
-	fi~
-	$CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
-      else
-	_LT_TAGVAR(ld_shlibs, $1)=no
-      fi
-      ;;
-
-    haiku*)
-      _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
-      _LT_TAGVAR(link_all_deplibs, $1)=yes
-      ;;
-
-    interix[[3-9]]*)
-      _LT_TAGVAR(hardcode_direct, $1)=no
-      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
-      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
-      # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
-      # Instead, shared libraries are loaded at an image base (0x10000000 by
-      # default) and relocated if they conflict, which is a slow very memory
-      # consuming and fragmenting process.  To avoid this, we pick a random,
-      # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
-      # time.  Moving up from 0x10000000 also allows more sbrk(2) space.
-      _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
-      _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
-      ;;
-
-    gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu)
-      tmp_diet=no
-      if test "$host_os" = linux-dietlibc; then
-	case $cc_basename in
-	  diet\ *) tmp_diet=yes;;	# linux-dietlibc with static linking (!diet-dyn)
-	esac
-      fi
-      if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \
-	 && test "$tmp_diet" = no
-      then
-	tmp_addflag=' $pic_flag'
-	tmp_sharedflag='-shared'
-	case $cc_basename,$host_cpu in
-        pgcc*)				# Portland Group C compiler
-	  _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
-	  tmp_addflag=' $pic_flag'
-	  ;;
-	pgf77* | pgf90* | pgf95* | pgfortran*)
-					# Portland Group f77 and f90 compilers
-	  _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
-	  tmp_addflag=' $pic_flag -Mnomain' ;;
-	ecc*,ia64* | icc*,ia64*)	# Intel C compiler on ia64
-	  tmp_addflag=' -i_dynamic' ;;
-	efc*,ia64* | ifort*,ia64*)	# Intel Fortran compiler on ia64
-	  tmp_addflag=' -i_dynamic -nofor_main' ;;
-	ifc* | ifort*)			# Intel Fortran compiler
-	  tmp_addflag=' -nofor_main' ;;
-	lf95*)				# Lahey Fortran 8.1
-	  _LT_TAGVAR(whole_archive_flag_spec, $1)=
-	  tmp_sharedflag='--shared' ;;
-	xl[[cC]]* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL C 8.0 on PPC (deal with xlf below)
-	  tmp_sharedflag='-qmkshrobj'
-	  tmp_addflag= ;;
-	nvcc*)	# Cuda Compiler Driver 2.2
-	  _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
-	  _LT_TAGVAR(compiler_needs_object, $1)=yes
-	  ;;
-	esac
-	case `$CC -V 2>&1 | sed 5q` in
-	*Sun\ C*)			# Sun C 5.9
-	  _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
-	  _LT_TAGVAR(compiler_needs_object, $1)=yes
-	  tmp_sharedflag='-G' ;;
-	*Sun\ F*)			# Sun Fortran 8.3
-	  tmp_sharedflag='-G' ;;
-	esac
-	_LT_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
-
-        if test "x$supports_anon_versioning" = xyes; then
-          _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
-	    cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
-	    echo "local: *; };" >> $output_objdir/$libname.ver~
-	    $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
-        fi
-
-	case $cc_basename in
-	xlf* | bgf* | bgxlf* | mpixlf*)
-	  # IBM XL Fortran 10.1 on PPC cannot create shared libs itself
-	  _LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive'
-	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
-	  _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib'
-	  if test "x$supports_anon_versioning" = xyes; then
-	    _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
-	      cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
-	      echo "local: *; };" >> $output_objdir/$libname.ver~
-	      $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib'
-	  fi
-	  ;;
-	esac
-      else
-        _LT_TAGVAR(ld_shlibs, $1)=no
-      fi
-      ;;
-
-    netbsd*)
-      if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
-	_LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
-	wlarc=
-      else
-	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
-	_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
-      fi
-      ;;
-
-    solaris*)
-      if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then
-	_LT_TAGVAR(ld_shlibs, $1)=no
-	cat <<_LT_EOF 1>&2
-
-*** Warning: The releases 2.8.* of the GNU linker cannot reliably
-*** create shared libraries on Solaris systems.  Therefore, libtool
-*** is disabling shared libraries support.  We urge you to upgrade GNU
-*** binutils to release 2.9.1 or newer.  Another option is to modify
-*** your PATH or compiler configuration so that the native linker is
-*** used, and then restart.
-
-_LT_EOF
-      elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
-	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
-	_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
-      else
-	_LT_TAGVAR(ld_shlibs, $1)=no
-      fi
-      ;;
-
-    sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*)
-      case `$LD -v 2>&1` in
-        *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*)
-	_LT_TAGVAR(ld_shlibs, $1)=no
-	cat <<_LT_EOF 1>&2
-
-*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not
-*** reliably create shared libraries on SCO systems.  Therefore, libtool
-*** is disabling shared libraries support.  We urge you to upgrade GNU
-*** binutils to release 2.16.91.0.3 or newer.  Another option is to modify
-*** your PATH or compiler configuration so that the native linker is
-*** used, and then restart.
-
-_LT_EOF
-	;;
-	*)
-	  # For security reasons, it is highly recommended that you always
-	  # use absolute paths for naming shared libraries, and exclude the
-	  # DT_RUNPATH tag from executables and libraries.  But doing so
-	  # requires that you compile everything twice, which is a pain.
-	  if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
-	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
-	    _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
-	    _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
-	  else
-	    _LT_TAGVAR(ld_shlibs, $1)=no
-	  fi
-	;;
-      esac
-      ;;
-
-    sunos4*)
-      _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags'
-      wlarc=
-      _LT_TAGVAR(hardcode_direct, $1)=yes
-      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-      ;;
-
-    *)
-      if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
-	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
-	_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
-      else
-	_LT_TAGVAR(ld_shlibs, $1)=no
-      fi
-      ;;
-    esac
-
-    if test "$_LT_TAGVAR(ld_shlibs, $1)" = no; then
-      runpath_var=
-      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-      _LT_TAGVAR(export_dynamic_flag_spec, $1)=
-      _LT_TAGVAR(whole_archive_flag_spec, $1)=
-    fi
-  else
-    # PORTME fill in a description of your system's linker (not GNU ld)
-    case $host_os in
-    aix3*)
-      _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
-      _LT_TAGVAR(always_export_symbols, $1)=yes
-      _LT_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname'
-      # Note: this linker hardcodes the directories in LIBPATH if there
-      # are no directories specified by -L.
-      _LT_TAGVAR(hardcode_minus_L, $1)=yes
-      if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then
-	# Neither direct hardcoding nor static linking is supported with a
-	# broken collect2.
-	_LT_TAGVAR(hardcode_direct, $1)=unsupported
-      fi
-      ;;
-
-    aix[[4-9]]*)
-      if test "$host_cpu" = ia64; then
-	# On IA64, the linker does run time linking by default, so we don't
-	# have to do anything special.
-	aix_use_runtimelinking=no
-	exp_sym_flag='-Bexport'
-	no_entry_flag=""
-      else
-	# If we're using GNU nm, then we don't want the "-C" option.
-	# -C means demangle to AIX nm, but means don't demangle with GNU nm
-	# Also, AIX nm treats weak defined symbols like other global
-	# defined symbols, whereas GNU nm marks them as "W".
-	if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
-	  _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
-	else
-	  _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
-	fi
-	aix_use_runtimelinking=no
-
-	# Test if we are trying to use run time linking or normal
-	# AIX style linking. If -brtl is somewhere in LDFLAGS, we
-	# need to do runtime linking.
-	case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*)
-	  for ld_flag in $LDFLAGS; do
-	  if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then
-	    aix_use_runtimelinking=yes
-	    break
-	  fi
-	  done
-	  ;;
-	esac
-
-	exp_sym_flag='-bexport'
-	no_entry_flag='-bnoentry'
-      fi
-
-      # When large executables or shared objects are built, AIX ld can
-      # have problems creating the table of contents.  If linking a library
-      # or program results in "error TOC overflow" add -mminimal-toc to
-      # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
-      # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
-
-      _LT_TAGVAR(archive_cmds, $1)=''
-      _LT_TAGVAR(hardcode_direct, $1)=yes
-      _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
-      _LT_TAGVAR(hardcode_libdir_separator, $1)=':'
-      _LT_TAGVAR(link_all_deplibs, $1)=yes
-      _LT_TAGVAR(file_list_spec, $1)='${wl}-f,'
-
-      if test "$GCC" = yes; then
-	case $host_os in aix4.[[012]]|aix4.[[012]].*)
-	# We only want to do this on AIX 4.2 and lower, the check
-	# below for broken collect2 doesn't work under 4.3+
-	  collect2name=`${CC} -print-prog-name=collect2`
-	  if test -f "$collect2name" &&
-	   strings "$collect2name" | $GREP resolve_lib_name >/dev/null
-	  then
-	  # We have reworked collect2
-	  :
-	  else
-	  # We have old collect2
-	  _LT_TAGVAR(hardcode_direct, $1)=unsupported
-	  # It fails to find uninstalled libraries when the uninstalled
-	  # path is not listed in the libpath.  Setting hardcode_minus_L
-	  # to unsupported forces relinking
-	  _LT_TAGVAR(hardcode_minus_L, $1)=yes
-	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
-	  _LT_TAGVAR(hardcode_libdir_separator, $1)=
-	  fi
-	  ;;
-	esac
-	shared_flag='-shared'
-	if test "$aix_use_runtimelinking" = yes; then
-	  shared_flag="$shared_flag "'${wl}-G'
-	fi
-      else
-	# not using gcc
-	if test "$host_cpu" = ia64; then
-	# VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
-	# chokes on -Wl,-G. The following line is correct:
-	  shared_flag='-G'
-	else
-	  if test "$aix_use_runtimelinking" = yes; then
-	    shared_flag='${wl}-G'
-	  else
-	    shared_flag='${wl}-bM:SRE'
-	  fi
-	fi
-      fi
-
-      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall'
-      # It seems that -bexpall does not export symbols beginning with
-      # underscore (_), so it is better to generate a list of symbols to export.
-      _LT_TAGVAR(always_export_symbols, $1)=yes
-      if test "$aix_use_runtimelinking" = yes; then
-	# Warning - without using the other runtime loading flags (-brtl),
-	# -berok will link without error, but may produce a broken library.
-	_LT_TAGVAR(allow_undefined_flag, $1)='-berok'
-        # Determine the default libpath from the value encoded in an
-        # empty executable.
-        _LT_SYS_MODULE_PATH_AIX([$1])
-        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
-        _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
-      else
-	if test "$host_cpu" = ia64; then
-	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib'
-	  _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs"
-	  _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
-	else
-	 # Determine the default libpath from the value encoded in an
-	 # empty executable.
-	 _LT_SYS_MODULE_PATH_AIX([$1])
-	 _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
-	  # Warning - without using the other run time loading flags,
-	  # -berok will link without error, but may produce a broken library.
-	  _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok'
-	  _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok'
-	  if test "$with_gnu_ld" = yes; then
-	    # We only use this code for GNU lds that support --whole-archive.
-	    _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
-	  else
-	    # Exported symbols can be pulled into shared objects from archives
-	    _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience'
-	  fi
-	  _LT_TAGVAR(archive_cmds_need_lc, $1)=yes
-	  # This is similar to how AIX traditionally builds its shared libraries.
-	  _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
-	fi
-      fi
-      ;;
-
-    amigaos*)
-      case $host_cpu in
-      powerpc)
-            # see comment about AmigaOS4 .so support
-            _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
-            _LT_TAGVAR(archive_expsym_cmds, $1)=''
-        ;;
-      m68k)
-            _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
-            _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
-            _LT_TAGVAR(hardcode_minus_L, $1)=yes
-        ;;
-      esac
-      ;;
-
-    bsdi[[45]]*)
-      _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic
-      ;;
-
-    cygwin* | mingw* | pw32* | cegcc*)
-      # When not using gcc, we currently assume that we are using
-      # Microsoft Visual C++.
-      # hardcode_libdir_flag_spec is actually meaningless, as there is
-      # no search path for DLLs.
-      case $cc_basename in
-      cl*)
-	# Native MSVC
-	_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
-	_LT_TAGVAR(allow_undefined_flag, $1)=unsupported
-	_LT_TAGVAR(always_export_symbols, $1)=yes
-	_LT_TAGVAR(file_list_spec, $1)='@'
-	# Tell ltmain to make .lib files, not .a files.
-	libext=lib
-	# Tell ltmain to make .dll files, not .so files.
-	shrext_cmds=".dll"
-	# FIXME: Setting linknames here is a bad hack.
-	_LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames='
-	_LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
-	    sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp;
-	  else
-	    sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp;
-	  fi~
-	  $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~
-	  linknames='
-	# The linker will not automatically build a static lib if we build a DLL.
-	# _LT_TAGVAR(old_archive_from_new_cmds, $1)='true'
-	_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
-	_LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
-	_LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols'
-	# Don't use ranlib
-	_LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib'
-	_LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~
-	  lt_tool_outputfile="@TOOL_OUTPUT@"~
-	  case $lt_outputfile in
-	    *.exe|*.EXE) ;;
-	    *)
-	      lt_outputfile="$lt_outputfile.exe"
-	      lt_tool_outputfile="$lt_tool_outputfile.exe"
-	      ;;
-	  esac~
-	  if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then
-	    $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1;
-	    $RM "$lt_outputfile.manifest";
-	  fi'
-	;;
-      *)
-	# Assume MSVC wrapper
-	_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
-	_LT_TAGVAR(allow_undefined_flag, $1)=unsupported
-	# Tell ltmain to make .lib files, not .a files.
-	libext=lib
-	# Tell ltmain to make .dll files, not .so files.
-	shrext_cmds=".dll"
-	# FIXME: Setting linknames here is a bad hack.
-	_LT_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames='
-	# The linker will automatically build a .lib file if we build a DLL.
-	_LT_TAGVAR(old_archive_from_new_cmds, $1)='true'
-	# FIXME: Should let the user specify the lib program.
-	_LT_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs'
-	_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
-	;;
-      esac
-      ;;
-
-    darwin* | rhapsody*)
-      _LT_DARWIN_LINKER_FEATURES($1)
-      ;;
-
-    dgux*)
-      _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
-      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
-      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-      ;;
-
-    # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
-    # support.  Future versions do this automatically, but an explicit c++rt0.o
-    # does not break anything, and helps significantly (at the cost of a little
-    # extra space).
-    freebsd2.2*)
-      _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o'
-      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
-      _LT_TAGVAR(hardcode_direct, $1)=yes
-      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-      ;;
-
-    # Unfortunately, older versions of FreeBSD 2 do not have this feature.
-    freebsd2.*)
-      _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
-      _LT_TAGVAR(hardcode_direct, $1)=yes
-      _LT_TAGVAR(hardcode_minus_L, $1)=yes
-      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-      ;;
-
-    # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
-    freebsd* | dragonfly*)
-      _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
-      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
-      _LT_TAGVAR(hardcode_direct, $1)=yes
-      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-      ;;
-
-    hpux9*)
-      if test "$GCC" = yes; then
-	_LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
-      else
-	_LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
-      fi
-      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
-      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-      _LT_TAGVAR(hardcode_direct, $1)=yes
-
-      # hardcode_minus_L: Not really in the search PATH,
-      # but as the default location of the library.
-      _LT_TAGVAR(hardcode_minus_L, $1)=yes
-      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
-      ;;
-
-    hpux10*)
-      if test "$GCC" = yes && test "$with_gnu_ld" = no; then
-	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
-      else
-	_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'
-      fi
-      if test "$with_gnu_ld" = no; then
-	_LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
-	_LT_TAGVAR(hardcode_libdir_separator, $1)=:
-	_LT_TAGVAR(hardcode_direct, $1)=yes
-	_LT_TAGVAR(hardcode_direct_absolute, $1)=yes
-	_LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
-	# hardcode_minus_L: Not really in the search PATH,
-	# but as the default location of the library.
-	_LT_TAGVAR(hardcode_minus_L, $1)=yes
-      fi
-      ;;
-
-    hpux11*)
-      if test "$GCC" = yes && test "$with_gnu_ld" = no; then
-	case $host_cpu in
-	hppa*64*)
-	  _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
-	  ;;
-	ia64*)
-	  _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
-	  ;;
-	*)
-	  _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
-	  ;;
-	esac
-      else
-	case $host_cpu in
-	hppa*64*)
-	  _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
-	  ;;
-	ia64*)
-	  _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
-	  ;;
-	*)
-	m4_if($1, [], [
-	  # Older versions of the 11.00 compiler do not understand -b yet
-	  # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does)
-	  _LT_LINKER_OPTION([if $CC understands -b],
-	    _LT_TAGVAR(lt_cv_prog_compiler__b, $1), [-b],
-	    [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'],
-	    [_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'])],
-	  [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'])
-	  ;;
-	esac
-      fi
-      if test "$with_gnu_ld" = no; then
-	_LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
-	_LT_TAGVAR(hardcode_libdir_separator, $1)=:
-
-	case $host_cpu in
-	hppa*64*|ia64*)
-	  _LT_TAGVAR(hardcode_direct, $1)=no
-	  _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-	  ;;
-	*)
-	  _LT_TAGVAR(hardcode_direct, $1)=yes
-	  _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
-	  _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
-
-	  # hardcode_minus_L: Not really in the search PATH,
-	  # but as the default location of the library.
-	  _LT_TAGVAR(hardcode_minus_L, $1)=yes
-	  ;;
-	esac
-      fi
-      ;;
-
-    irix5* | irix6* | nonstopux*)
-      if test "$GCC" = yes; then
-	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
-	# Try to use the -exported_symbol ld option, if it does not
-	# work, assume that -exports_file does not work either and
-	# implicitly export all symbols.
-	# This should be the same for all languages, so no per-tag cache variable.
-	AC_CACHE_CHECK([whether the $host_os linker accepts -exported_symbol],
-	  [lt_cv_irix_exported_symbol],
-	  [save_LDFLAGS="$LDFLAGS"
-	   LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null"
-	   AC_LINK_IFELSE(
-	     [AC_LANG_SOURCE(
-	        [AC_LANG_CASE([C], [[int foo (void) { return 0; }]],
-			      [C++], [[int foo (void) { return 0; }]],
-			      [Fortran 77], [[
-      subroutine foo
-      end]],
-			      [Fortran], [[
-      subroutine foo
-      end]])])],
-	      [lt_cv_irix_exported_symbol=yes],
-	      [lt_cv_irix_exported_symbol=no])
-           LDFLAGS="$save_LDFLAGS"])
-	if test "$lt_cv_irix_exported_symbol" = yes; then
-          _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib'
-	fi
-      else
-	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
-	_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib'
-      fi
-      _LT_TAGVAR(archive_cmds_need_lc, $1)='no'
-      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
-      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-      _LT_TAGVAR(inherit_rpath, $1)=yes
-      _LT_TAGVAR(link_all_deplibs, $1)=yes
-      ;;
-
-    netbsd*)
-      if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
-	_LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'  # a.out
-      else
-	_LT_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags'      # ELF
-      fi
-      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
-      _LT_TAGVAR(hardcode_direct, $1)=yes
-      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-      ;;
-
-    newsos6)
-      _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
-      _LT_TAGVAR(hardcode_direct, $1)=yes
-      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
-      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-      ;;
-
-    *nto* | *qnx*)
-      ;;
-
-    openbsd*)
-      if test -f /usr/libexec/ld.so; then
-	_LT_TAGVAR(hardcode_direct, $1)=yes
-	_LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-	_LT_TAGVAR(hardcode_direct_absolute, $1)=yes
-	if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
-	  _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
-	  _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols'
-	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
-	  _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
-	else
-	  case $host_os in
-	   openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*)
-	     _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
-	     _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
-	     ;;
-	   *)
-	     _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
-	     _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
-	     ;;
-	  esac
-	fi
-      else
-	_LT_TAGVAR(ld_shlibs, $1)=no
-      fi
-      ;;
-
-    os2*)
-      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
-      _LT_TAGVAR(hardcode_minus_L, $1)=yes
-      _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
-      _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def'
-      _LT_TAGVAR(old_archive_from_new_cmds, $1)='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def'
-      ;;
-
-    osf3*)
-      if test "$GCC" = yes; then
-	_LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
-	_LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
-      else
-	_LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
-	_LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
-      fi
-      _LT_TAGVAR(archive_cmds_need_lc, $1)='no'
-      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
-      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-      ;;
-
-    osf4* | osf5*)	# as osf3* with the addition of -msym flag
-      if test "$GCC" = yes; then
-	_LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
-	_LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
-	_LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
-      else
-	_LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
-	_LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
-	_LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~
-	$CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp'
-
-	# Both c and cxx compiler support -rpath directly
-	_LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
-      fi
-      _LT_TAGVAR(archive_cmds_need_lc, $1)='no'
-      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-      ;;
-
-    solaris*)
-      _LT_TAGVAR(no_undefined_flag, $1)=' -z defs'
-      if test "$GCC" = yes; then
-	wlarc='${wl}'
-	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
-	_LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
-	  $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
-      else
-	case `$CC -V 2>&1` in
-	*"Compilers 5.0"*)
-	  wlarc=''
-	  _LT_TAGVAR(archive_cmds, $1)='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
-	  _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
-	  $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp'
-	  ;;
-	*)
-	  wlarc='${wl}'
-	  _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags'
-	  _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
-	  $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
-	  ;;
-	esac
-      fi
-      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
-      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-      case $host_os in
-      solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
-      *)
-	# The compiler driver will combine and reorder linker options,
-	# but understands `-z linker_flag'.  GCC discards it without `$wl',
-	# but is careful enough not to reorder.
-	# Supported since Solaris 2.6 (maybe 2.5.1?)
-	if test "$GCC" = yes; then
-	  _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
-	else
-	  _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract'
-	fi
-	;;
-      esac
-      _LT_TAGVAR(link_all_deplibs, $1)=yes
-      ;;
-
-    sunos4*)
-      if test "x$host_vendor" = xsequent; then
-	# Use $CC to link under sequent, because it throws in some extra .o
-	# files that make .init and .fini sections work.
-	_LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags'
-      else
-	_LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags'
-      fi
-      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
-      _LT_TAGVAR(hardcode_direct, $1)=yes
-      _LT_TAGVAR(hardcode_minus_L, $1)=yes
-      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-      ;;
-
-    sysv4)
-      case $host_vendor in
-	sni)
-	  _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
-	  _LT_TAGVAR(hardcode_direct, $1)=yes # is this really true???
-	;;
-	siemens)
-	  ## LD is ld it makes a PLAMLIB
-	  ## CC just makes a GrossModule.
-	  _LT_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags'
-	  _LT_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs'
-	  _LT_TAGVAR(hardcode_direct, $1)=no
-        ;;
-	motorola)
-	  _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
-	  _LT_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie
-	;;
-      esac
-      runpath_var='LD_RUN_PATH'
-      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-      ;;
-
-    sysv4.3*)
-      _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
-      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-      _LT_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport'
-      ;;
-
-    sysv4*MP*)
-      if test -d /usr/nec; then
-	_LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
-	_LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-	runpath_var=LD_RUN_PATH
-	hardcode_runpath_var=yes
-	_LT_TAGVAR(ld_shlibs, $1)=yes
-      fi
-      ;;
-
-    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*)
-      _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
-      _LT_TAGVAR(archive_cmds_need_lc, $1)=no
-      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-      runpath_var='LD_RUN_PATH'
-
-      if test "$GCC" = yes; then
-	_LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
-	_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
-      else
-	_LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
-	_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
-      fi
-      ;;
-
-    sysv5* | sco3.2v5* | sco5v6*)
-      # Note: We can NOT use -z defs as we might desire, because we do not
-      # link with -lc, and that would cause any symbols used from libc to
-      # always be unresolved, which means just about no library would
-      # ever link correctly.  If we're not using GNU ld we use -z text
-      # though, which does catch some bad symbols but isn't as heavy-handed
-      # as -z defs.
-      _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
-      _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs'
-      _LT_TAGVAR(archive_cmds_need_lc, $1)=no
-      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir'
-      _LT_TAGVAR(hardcode_libdir_separator, $1)=':'
-      _LT_TAGVAR(link_all_deplibs, $1)=yes
-      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport'
-      runpath_var='LD_RUN_PATH'
-
-      if test "$GCC" = yes; then
-	_LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
-	_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
-      else
-	_LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
-	_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
-      fi
-      ;;
-
-    uts4*)
-      _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
-      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
-      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-      ;;
-
-    *)
-      _LT_TAGVAR(ld_shlibs, $1)=no
-      ;;
-    esac
-
-    if test x$host_vendor = xsni; then
-      case $host in
-      sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
-	_LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Blargedynsym'
-	;;
-      esac
-    fi
-  fi
-])
-AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)])
-test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no
-
-_LT_TAGVAR(with_gnu_ld, $1)=$with_gnu_ld
-
-_LT_DECL([], [libext], [0], [Old archive suffix (normally "a")])dnl
-_LT_DECL([], [shrext_cmds], [1], [Shared library suffix (normally ".so")])dnl
-_LT_DECL([], [extract_expsyms_cmds], [2],
-    [The commands to extract the exported symbol list from a shared archive])
-
-#
-# Do we need to explicitly link libc?
-#
-case "x$_LT_TAGVAR(archive_cmds_need_lc, $1)" in
-x|xyes)
-  # Assume -lc should be added
-  _LT_TAGVAR(archive_cmds_need_lc, $1)=yes
-
-  if test "$enable_shared" = yes && test "$GCC" = yes; then
-    case $_LT_TAGVAR(archive_cmds, $1) in
-    *'~'*)
-      # FIXME: we may have to deal with multi-command sequences.
-      ;;
-    '$CC '*)
-      # Test whether the compiler implicitly links with -lc since on some
-      # systems, -lgcc has to come before -lc. If gcc already passes -lc
-      # to ld, don't add -lc before -lgcc.
-      AC_CACHE_CHECK([whether -lc should be explicitly linked in],
-	[lt_cv_]_LT_TAGVAR(archive_cmds_need_lc, $1),
-	[$RM conftest*
-	echo "$lt_simple_compile_test_code" > conftest.$ac_ext
-
-	if AC_TRY_EVAL(ac_compile) 2>conftest.err; then
-	  soname=conftest
-	  lib=conftest
-	  libobjs=conftest.$ac_objext
-	  deplibs=
-	  wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1)
-	  pic_flag=$_LT_TAGVAR(lt_prog_compiler_pic, $1)
-	  compiler_flags=-v
-	  linker_flags=-v
-	  verstring=
-	  output_objdir=.
-	  libname=conftest
-	  lt_save_allow_undefined_flag=$_LT_TAGVAR(allow_undefined_flag, $1)
-	  _LT_TAGVAR(allow_undefined_flag, $1)=
-	  if AC_TRY_EVAL(_LT_TAGVAR(archive_cmds, $1) 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1)
-	  then
-	    lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=no
-	  else
-	    lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=yes
-	  fi
-	  _LT_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag
-	else
-	  cat conftest.err 1>&5
-	fi
-	$RM conftest*
-	])
-      _LT_TAGVAR(archive_cmds_need_lc, $1)=$lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)
-      ;;
-    esac
-  fi
-  ;;
-esac
-
-_LT_TAGDECL([build_libtool_need_lc], [archive_cmds_need_lc], [0],
-    [Whether or not to add -lc for building shared libraries])
-_LT_TAGDECL([allow_libtool_libs_with_static_runtimes],
-    [enable_shared_with_static_runtimes], [0],
-    [Whether or not to disallow shared libs when runtime libs are static])
-_LT_TAGDECL([], [export_dynamic_flag_spec], [1],
-    [Compiler flag to allow reflexive dlopens])
-_LT_TAGDECL([], [whole_archive_flag_spec], [1],
-    [Compiler flag to generate shared objects directly from archives])
-_LT_TAGDECL([], [compiler_needs_object], [1],
-    [Whether the compiler copes with passing no objects directly])
-_LT_TAGDECL([], [old_archive_from_new_cmds], [2],
-    [Create an old-style archive from a shared archive])
-_LT_TAGDECL([], [old_archive_from_expsyms_cmds], [2],
-    [Create a temporary old-style archive to link instead of a shared archive])
-_LT_TAGDECL([], [archive_cmds], [2], [Commands used to build a shared archive])
-_LT_TAGDECL([], [archive_expsym_cmds], [2])
-_LT_TAGDECL([], [module_cmds], [2],
-    [Commands used to build a loadable module if different from building
-    a shared archive.])
-_LT_TAGDECL([], [module_expsym_cmds], [2])
-_LT_TAGDECL([], [with_gnu_ld], [1],
-    [Whether we are building with GNU ld or not])
-_LT_TAGDECL([], [allow_undefined_flag], [1],
-    [Flag that allows shared libraries with undefined symbols to be built])
-_LT_TAGDECL([], [no_undefined_flag], [1],
-    [Flag that enforces no undefined symbols])
-_LT_TAGDECL([], [hardcode_libdir_flag_spec], [1],
-    [Flag to hardcode $libdir into a binary during linking.
-    This must work even if $libdir does not exist])
-_LT_TAGDECL([], [hardcode_libdir_separator], [1],
-    [Whether we need a single "-rpath" flag with a separated argument])
-_LT_TAGDECL([], [hardcode_direct], [0],
-    [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes
-    DIR into the resulting binary])
-_LT_TAGDECL([], [hardcode_direct_absolute], [0],
-    [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes
-    DIR into the resulting binary and the resulting library dependency is
-    "absolute", i.e impossible to change by setting ${shlibpath_var} if the
-    library is relocated])
-_LT_TAGDECL([], [hardcode_minus_L], [0],
-    [Set to "yes" if using the -LDIR flag during linking hardcodes DIR
-    into the resulting binary])
-_LT_TAGDECL([], [hardcode_shlibpath_var], [0],
-    [Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR
-    into the resulting binary])
-_LT_TAGDECL([], [hardcode_automatic], [0],
-    [Set to "yes" if building a shared library automatically hardcodes DIR
-    into the library and all subsequent libraries and executables linked
-    against it])
-_LT_TAGDECL([], [inherit_rpath], [0],
-    [Set to yes if linker adds runtime paths of dependent libraries
-    to runtime path list])
-_LT_TAGDECL([], [link_all_deplibs], [0],
-    [Whether libtool must link a program against all its dependency libraries])
-_LT_TAGDECL([], [always_export_symbols], [0],
-    [Set to "yes" if exported symbols are required])
-_LT_TAGDECL([], [export_symbols_cmds], [2],
-    [The commands to list exported symbols])
-_LT_TAGDECL([], [exclude_expsyms], [1],
-    [Symbols that should not be listed in the preloaded symbols])
-_LT_TAGDECL([], [include_expsyms], [1],
-    [Symbols that must always be exported])
-_LT_TAGDECL([], [prelink_cmds], [2],
-    [Commands necessary for linking programs (against libraries) with templates])
-_LT_TAGDECL([], [postlink_cmds], [2],
-    [Commands necessary for finishing linking programs])
-_LT_TAGDECL([], [file_list_spec], [1],
-    [Specify filename containing input files])
-dnl FIXME: Not yet implemented
-dnl _LT_TAGDECL([], [thread_safe_flag_spec], [1],
-dnl    [Compiler flag to generate thread safe objects])
-])# _LT_LINKER_SHLIBS
-
-
-# _LT_LANG_C_CONFIG([TAG])
-# ------------------------
-# Ensure that the configuration variables for a C compiler are suitably
-# defined.  These variables are subsequently used by _LT_CONFIG to write
-# the compiler configuration to `libtool'.
-m4_defun([_LT_LANG_C_CONFIG],
-[m4_require([_LT_DECL_EGREP])dnl
-lt_save_CC="$CC"
-AC_LANG_PUSH(C)
-
-# Source file extension for C test sources.
-ac_ext=c
-
-# Object file extension for compiled C test sources.
-objext=o
-_LT_TAGVAR(objext, $1)=$objext
-
-# Code to be used in simple compile tests
-lt_simple_compile_test_code="int some_variable = 0;"
-
-# Code to be used in simple link tests
-lt_simple_link_test_code='int main(){return(0);}'
-
-_LT_TAG_COMPILER
-# Save the default compiler, since it gets overwritten when the other
-# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP.
-compiler_DEFAULT=$CC
-
-# save warnings/boilerplate of simple test code
-_LT_COMPILER_BOILERPLATE
-_LT_LINKER_BOILERPLATE
-
-## CAVEAT EMPTOR:
-## There is no encapsulation within the following macros, do not change
-## the running order or otherwise move them around unless you know exactly
-## what you are doing...
-if test -n "$compiler"; then
-  _LT_COMPILER_NO_RTTI($1)
-  _LT_COMPILER_PIC($1)
-  _LT_COMPILER_C_O($1)
-  _LT_COMPILER_FILE_LOCKS($1)
-  _LT_LINKER_SHLIBS($1)
-  _LT_SYS_DYNAMIC_LINKER($1)
-  _LT_LINKER_HARDCODE_LIBPATH($1)
-  LT_SYS_DLOPEN_SELF
-  _LT_CMD_STRIPLIB
-
-  # Report which library types will actually be built
-  AC_MSG_CHECKING([if libtool supports shared libraries])
-  AC_MSG_RESULT([$can_build_shared])
-
-  AC_MSG_CHECKING([whether to build shared libraries])
-  test "$can_build_shared" = "no" && enable_shared=no
-
-  # On AIX, shared libraries and static libraries use the same namespace, and
-  # are all built from PIC.
-  case $host_os in
-  aix3*)
-    test "$enable_shared" = yes && enable_static=no
-    if test -n "$RANLIB"; then
-      archive_cmds="$archive_cmds~\$RANLIB \$lib"
-      postinstall_cmds='$RANLIB $lib'
-    fi
-    ;;
-
-  aix[[4-9]]*)
-    if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
-      test "$enable_shared" = yes && enable_static=no
-    fi
-    ;;
-  esac
-  AC_MSG_RESULT([$enable_shared])
-
-  AC_MSG_CHECKING([whether to build static libraries])
-  # Make sure either enable_shared or enable_static is yes.
-  test "$enable_shared" = yes || enable_static=yes
-  AC_MSG_RESULT([$enable_static])
-
-  _LT_CONFIG($1)
-fi
-AC_LANG_POP
-CC="$lt_save_CC"
-])# _LT_LANG_C_CONFIG
-
-
-# _LT_LANG_CXX_CONFIG([TAG])
-# --------------------------
-# Ensure that the configuration variables for a C++ compiler are suitably
-# defined.  These variables are subsequently used by _LT_CONFIG to write
-# the compiler configuration to `libtool'.
-m4_defun([_LT_LANG_CXX_CONFIG],
-[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-m4_require([_LT_DECL_EGREP])dnl
-m4_require([_LT_PATH_MANIFEST_TOOL])dnl
-if test -n "$CXX" && ( test "X$CXX" != "Xno" &&
-    ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) ||
-    (test "X$CXX" != "Xg++"))) ; then
-  AC_PROG_CXXCPP
-else
-  _lt_caught_CXX_error=yes
-fi
-
-AC_LANG_PUSH(C++)
-_LT_TAGVAR(archive_cmds_need_lc, $1)=no
-_LT_TAGVAR(allow_undefined_flag, $1)=
-_LT_TAGVAR(always_export_symbols, $1)=no
-_LT_TAGVAR(archive_expsym_cmds, $1)=
-_LT_TAGVAR(compiler_needs_object, $1)=no
-_LT_TAGVAR(export_dynamic_flag_spec, $1)=
-_LT_TAGVAR(hardcode_direct, $1)=no
-_LT_TAGVAR(hardcode_direct_absolute, $1)=no
-_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-_LT_TAGVAR(hardcode_libdir_separator, $1)=
-_LT_TAGVAR(hardcode_minus_L, $1)=no
-_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
-_LT_TAGVAR(hardcode_automatic, $1)=no
-_LT_TAGVAR(inherit_rpath, $1)=no
-_LT_TAGVAR(module_cmds, $1)=
-_LT_TAGVAR(module_expsym_cmds, $1)=
-_LT_TAGVAR(link_all_deplibs, $1)=unknown
-_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
-_LT_TAGVAR(reload_flag, $1)=$reload_flag
-_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
-_LT_TAGVAR(no_undefined_flag, $1)=
-_LT_TAGVAR(whole_archive_flag_spec, $1)=
-_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no
-
-# Source file extension for C++ test sources.
-ac_ext=cpp
-
-# Object file extension for compiled C++ test sources.
-objext=o
-_LT_TAGVAR(objext, $1)=$objext
-
-# No sense in running all these tests if we already determined that
-# the CXX compiler isn't working.  Some variables (like enable_shared)
-# are currently assumed to apply to all compilers on this platform,
-# and will be corrupted by setting them based on a non-working compiler.
-if test "$_lt_caught_CXX_error" != yes; then
-  # Code to be used in simple compile tests
-  lt_simple_compile_test_code="int some_variable = 0;"
-
-  # Code to be used in simple link tests
-  lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }'
-
-  # ltmain only uses $CC for tagged configurations so make sure $CC is set.
-  _LT_TAG_COMPILER
-
-  # save warnings/boilerplate of simple test code
-  _LT_COMPILER_BOILERPLATE
-  _LT_LINKER_BOILERPLATE
-
-  # Allow CC to be a program name with arguments.
-  lt_save_CC=$CC
-  lt_save_CFLAGS=$CFLAGS
-  lt_save_LD=$LD
-  lt_save_GCC=$GCC
-  GCC=$GXX
-  lt_save_with_gnu_ld=$with_gnu_ld
-  lt_save_path_LD=$lt_cv_path_LD
-  if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then
-    lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx
-  else
-    $as_unset lt_cv_prog_gnu_ld
-  fi
-  if test -n "${lt_cv_path_LDCXX+set}"; then
-    lt_cv_path_LD=$lt_cv_path_LDCXX
-  else
-    $as_unset lt_cv_path_LD
-  fi
-  test -z "${LDCXX+set}" || LD=$LDCXX
-  CC=${CXX-"c++"}
-  CFLAGS=$CXXFLAGS
-  compiler=$CC
-  _LT_TAGVAR(compiler, $1)=$CC
-  _LT_CC_BASENAME([$compiler])
-
-  if test -n "$compiler"; then
-    # We don't want -fno-exception when compiling C++ code, so set the
-    # no_builtin_flag separately
-    if test "$GXX" = yes; then
-      _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin'
-    else
-      _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=
-    fi
-
-    if test "$GXX" = yes; then
-      # Set up default GNU C++ configuration
-
-      LT_PATH_LD
-
-      # Check if GNU C++ uses GNU ld as the underlying linker, since the
-      # archiving commands below assume that GNU ld is being used.
-      if test "$with_gnu_ld" = yes; then
-        _LT_TAGVAR(archive_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
-        _LT_TAGVAR(archive_expsym_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
-
-        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
-        _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
-
-        # If archive_cmds runs LD, not CC, wlarc should be empty
-        # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to
-        #     investigate it a little bit more. (MM)
-        wlarc='${wl}'
-
-        # ancient GNU ld didn't support --whole-archive et. al.
-        if eval "`$CC -print-prog-name=ld` --help 2>&1" |
-	  $GREP 'no-whole-archive' > /dev/null; then
-          _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
-        else
-          _LT_TAGVAR(whole_archive_flag_spec, $1)=
-        fi
-      else
-        with_gnu_ld=no
-        wlarc=
-
-        # A generic and very simple default shared library creation
-        # command for GNU C++ for the case where it uses the native
-        # linker, instead of GNU ld.  If possible, this setting should
-        # overridden to take advantage of the native linker features on
-        # the platform it is being used on.
-        _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
-      fi
-
-      # Commands to make compiler produce verbose output that lists
-      # what "hidden" libraries, object files and flags are used when
-      # linking a shared library.
-      output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
-
-    else
-      GXX=no
-      with_gnu_ld=no
-      wlarc=
-    fi
-
-    # PORTME: fill in a description of your system's C++ link characteristics
-    AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries])
-    _LT_TAGVAR(ld_shlibs, $1)=yes
-    case $host_os in
-      aix3*)
-        # FIXME: insert proper C++ library support
-        _LT_TAGVAR(ld_shlibs, $1)=no
-        ;;
-      aix[[4-9]]*)
-        if test "$host_cpu" = ia64; then
-          # On IA64, the linker does run time linking by default, so we don't
-          # have to do anything special.
-          aix_use_runtimelinking=no
-          exp_sym_flag='-Bexport'
-          no_entry_flag=""
-        else
-          aix_use_runtimelinking=no
-
-          # Test if we are trying to use run time linking or normal
-          # AIX style linking. If -brtl is somewhere in LDFLAGS, we
-          # need to do runtime linking.
-          case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*)
-	    for ld_flag in $LDFLAGS; do
-	      case $ld_flag in
-	      *-brtl*)
-	        aix_use_runtimelinking=yes
-	        break
-	        ;;
-	      esac
-	    done
-	    ;;
-          esac
-
-          exp_sym_flag='-bexport'
-          no_entry_flag='-bnoentry'
-        fi
-
-        # When large executables or shared objects are built, AIX ld can
-        # have problems creating the table of contents.  If linking a library
-        # or program results in "error TOC overflow" add -mminimal-toc to
-        # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
-        # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
-
-        _LT_TAGVAR(archive_cmds, $1)=''
-        _LT_TAGVAR(hardcode_direct, $1)=yes
-        _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
-        _LT_TAGVAR(hardcode_libdir_separator, $1)=':'
-        _LT_TAGVAR(link_all_deplibs, $1)=yes
-        _LT_TAGVAR(file_list_spec, $1)='${wl}-f,'
-
-        if test "$GXX" = yes; then
-          case $host_os in aix4.[[012]]|aix4.[[012]].*)
-          # We only want to do this on AIX 4.2 and lower, the check
-          # below for broken collect2 doesn't work under 4.3+
-	  collect2name=`${CC} -print-prog-name=collect2`
-	  if test -f "$collect2name" &&
-	     strings "$collect2name" | $GREP resolve_lib_name >/dev/null
-	  then
-	    # We have reworked collect2
-	    :
-	  else
-	    # We have old collect2
-	    _LT_TAGVAR(hardcode_direct, $1)=unsupported
-	    # It fails to find uninstalled libraries when the uninstalled
-	    # path is not listed in the libpath.  Setting hardcode_minus_L
-	    # to unsupported forces relinking
-	    _LT_TAGVAR(hardcode_minus_L, $1)=yes
-	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
-	    _LT_TAGVAR(hardcode_libdir_separator, $1)=
-	  fi
-          esac
-          shared_flag='-shared'
-	  if test "$aix_use_runtimelinking" = yes; then
-	    shared_flag="$shared_flag "'${wl}-G'
-	  fi
-        else
-          # not using gcc
-          if test "$host_cpu" = ia64; then
-	  # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
-	  # chokes on -Wl,-G. The following line is correct:
-	  shared_flag='-G'
-          else
-	    if test "$aix_use_runtimelinking" = yes; then
-	      shared_flag='${wl}-G'
-	    else
-	      shared_flag='${wl}-bM:SRE'
-	    fi
-          fi
-        fi
-
-        _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall'
-        # It seems that -bexpall does not export symbols beginning with
-        # underscore (_), so it is better to generate a list of symbols to
-	# export.
-        _LT_TAGVAR(always_export_symbols, $1)=yes
-        if test "$aix_use_runtimelinking" = yes; then
-          # Warning - without using the other runtime loading flags (-brtl),
-          # -berok will link without error, but may produce a broken library.
-          _LT_TAGVAR(allow_undefined_flag, $1)='-berok'
-          # Determine the default libpath from the value encoded in an empty
-          # executable.
-          _LT_SYS_MODULE_PATH_AIX([$1])
-          _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
-
-          _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
-        else
-          if test "$host_cpu" = ia64; then
-	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib'
-	    _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs"
-	    _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
-          else
-	    # Determine the default libpath from the value encoded in an
-	    # empty executable.
-	    _LT_SYS_MODULE_PATH_AIX([$1])
-	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
-	    # Warning - without using the other run time loading flags,
-	    # -berok will link without error, but may produce a broken library.
-	    _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok'
-	    _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok'
-	    if test "$with_gnu_ld" = yes; then
-	      # We only use this code for GNU lds that support --whole-archive.
-	      _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
-	    else
-	      # Exported symbols can be pulled into shared objects from archives
-	      _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience'
-	    fi
-	    _LT_TAGVAR(archive_cmds_need_lc, $1)=yes
-	    # This is similar to how AIX traditionally builds its shared
-	    # libraries.
-	    _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
-          fi
-        fi
-        ;;
-
-      beos*)
-	if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
-	  _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
-	  # Joseph Beckenbach <jrb3@best.com> says some releases of gcc
-	  # support --undefined.  This deserves some investigation.  FIXME
-	  _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
-	else
-	  _LT_TAGVAR(ld_shlibs, $1)=no
-	fi
-	;;
-
-      chorus*)
-        case $cc_basename in
-          *)
-	  # FIXME: insert proper C++ library support
-	  _LT_TAGVAR(ld_shlibs, $1)=no
-	  ;;
-        esac
-        ;;
-
-      cygwin* | mingw* | pw32* | cegcc*)
-	case $GXX,$cc_basename in
-	,cl* | no,cl*)
-	  # Native MSVC
-	  # hardcode_libdir_flag_spec is actually meaningless, as there is
-	  # no search path for DLLs.
-	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
-	  _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
-	  _LT_TAGVAR(always_export_symbols, $1)=yes
-	  _LT_TAGVAR(file_list_spec, $1)='@'
-	  # Tell ltmain to make .lib files, not .a files.
-	  libext=lib
-	  # Tell ltmain to make .dll files, not .so files.
-	  shrext_cmds=".dll"
-	  # FIXME: Setting linknames here is a bad hack.
-	  _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames='
-	  _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
-	      $SED -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp;
-	    else
-	      $SED -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp;
-	    fi~
-	    $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~
-	    linknames='
-	  # The linker will not automatically build a static lib if we build a DLL.
-	  # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true'
-	  _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
-	  # Don't use ranlib
-	  _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib'
-	  _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~
-	    lt_tool_outputfile="@TOOL_OUTPUT@"~
-	    case $lt_outputfile in
-	      *.exe|*.EXE) ;;
-	      *)
-		lt_outputfile="$lt_outputfile.exe"
-		lt_tool_outputfile="$lt_tool_outputfile.exe"
-		;;
-	    esac~
-	    func_to_tool_file "$lt_outputfile"~
-	    if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then
-	      $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1;
-	      $RM "$lt_outputfile.manifest";
-	    fi'
-	  ;;
-	*)
-	  # g++
-	  # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless,
-	  # as there is no search path for DLLs.
-	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
-	  _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols'
-	  _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
-	  _LT_TAGVAR(always_export_symbols, $1)=no
-	  _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
-
-	  if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then
-	    _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
-	    # If the export-symbols file already is a .def file (1st line
-	    # is EXPORTS), use it as is; otherwise, prepend...
-	    _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
-	      cp $export_symbols $output_objdir/$soname.def;
-	    else
-	      echo EXPORTS > $output_objdir/$soname.def;
-	      cat $export_symbols >> $output_objdir/$soname.def;
-	    fi~
-	    $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
-	  else
-	    _LT_TAGVAR(ld_shlibs, $1)=no
-	  fi
-	  ;;
-	esac
-	;;
-      darwin* | rhapsody*)
-        _LT_DARWIN_LINKER_FEATURES($1)
-	;;
-
-      dgux*)
-        case $cc_basename in
-          ec++*)
-	    # FIXME: insert proper C++ library support
-	    _LT_TAGVAR(ld_shlibs, $1)=no
-	    ;;
-          ghcx*)
-	    # Green Hills C++ Compiler
-	    # FIXME: insert proper C++ library support
-	    _LT_TAGVAR(ld_shlibs, $1)=no
-	    ;;
-          *)
-	    # FIXME: insert proper C++ library support
-	    _LT_TAGVAR(ld_shlibs, $1)=no
-	    ;;
-        esac
-        ;;
-
-      freebsd2.*)
-        # C++ shared libraries reported to be fairly broken before
-	# switch to ELF
-        _LT_TAGVAR(ld_shlibs, $1)=no
-        ;;
-
-      freebsd-elf*)
-        _LT_TAGVAR(archive_cmds_need_lc, $1)=no
-        ;;
-
-      freebsd* | dragonfly*)
-        # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF
-        # conventions
-        _LT_TAGVAR(ld_shlibs, $1)=yes
-        ;;
-
-      gnu*)
-        ;;
-
-      haiku*)
-        _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
-        _LT_TAGVAR(link_all_deplibs, $1)=yes
-        ;;
-
-      hpux9*)
-        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
-        _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-        _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
-        _LT_TAGVAR(hardcode_direct, $1)=yes
-        _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH,
-				             # but as the default
-				             # location of the library.
-
-        case $cc_basename in
-          CC*)
-            # FIXME: insert proper C++ library support
-            _LT_TAGVAR(ld_shlibs, $1)=no
-            ;;
-          aCC*)
-            _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
-            # Commands to make compiler produce verbose output that lists
-            # what "hidden" libraries, object files and flags are used when
-            # linking a shared library.
-            #
-            # There doesn't appear to be a way to prevent this compiler from
-            # explicitly linking system object files so we need to strip them
-            # from the output so that they don't get included in the library
-            # dependencies.
-            output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
-            ;;
-          *)
-            if test "$GXX" = yes; then
-              _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
-            else
-              # FIXME: insert proper C++ library support
-              _LT_TAGVAR(ld_shlibs, $1)=no
-            fi
-            ;;
-        esac
-        ;;
-
-      hpux10*|hpux11*)
-        if test $with_gnu_ld = no; then
-	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
-	  _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-
-          case $host_cpu in
-            hppa*64*|ia64*)
-              ;;
-            *)
-	      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
-              ;;
-          esac
-        fi
-        case $host_cpu in
-          hppa*64*|ia64*)
-            _LT_TAGVAR(hardcode_direct, $1)=no
-            _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-            ;;
-          *)
-            _LT_TAGVAR(hardcode_direct, $1)=yes
-            _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
-            _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH,
-					         # but as the default
-					         # location of the library.
-            ;;
-        esac
-
-        case $cc_basename in
-          CC*)
-	    # FIXME: insert proper C++ library support
-	    _LT_TAGVAR(ld_shlibs, $1)=no
-	    ;;
-          aCC*)
-	    case $host_cpu in
-	      hppa*64*)
-	        _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
-	        ;;
-	      ia64*)
-	        _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
-	        ;;
-	      *)
-	        _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
-	        ;;
-	    esac
-	    # Commands to make compiler produce verbose output that lists
-	    # what "hidden" libraries, object files and flags are used when
-	    # linking a shared library.
-	    #
-	    # There doesn't appear to be a way to prevent this compiler from
-	    # explicitly linking system object files so we need to strip them
-	    # from the output so that they don't get included in the library
-	    # dependencies.
-	    output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
-	    ;;
-          *)
-	    if test "$GXX" = yes; then
-	      if test $with_gnu_ld = no; then
-	        case $host_cpu in
-	          hppa*64*)
-	            _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
-	            ;;
-	          ia64*)
-	            _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
-	            ;;
-	          *)
-	            _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
-	            ;;
-	        esac
-	      fi
-	    else
-	      # FIXME: insert proper C++ library support
-	      _LT_TAGVAR(ld_shlibs, $1)=no
-	    fi
-	    ;;
-        esac
-        ;;
-
-      interix[[3-9]]*)
-	_LT_TAGVAR(hardcode_direct, $1)=no
-	_LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-	_LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
-	_LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
-	# Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
-	# Instead, shared libraries are loaded at an image base (0x10000000 by
-	# default) and relocated if they conflict, which is a slow very memory
-	# consuming and fragmenting process.  To avoid this, we pick a random,
-	# 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
-	# time.  Moving up from 0x10000000 also allows more sbrk(2) space.
-	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
-	_LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
-	;;
-      irix5* | irix6*)
-        case $cc_basename in
-          CC*)
-	    # SGI C++
-	    _LT_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
-
-	    # Archives containing C++ object files must be created using
-	    # "CC -ar", where "CC" is the IRIX C++ compiler.  This is
-	    # necessary to make sure instantiated templates are included
-	    # in the archive.
-	    _LT_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs'
-	    ;;
-          *)
-	    if test "$GXX" = yes; then
-	      if test "$with_gnu_ld" = no; then
-	        _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
-	      else
-	        _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` -o $lib'
-	      fi
-	    fi
-	    _LT_TAGVAR(link_all_deplibs, $1)=yes
-	    ;;
-        esac
-        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
-        _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-        _LT_TAGVAR(inherit_rpath, $1)=yes
-        ;;
-
-      linux* | k*bsd*-gnu | kopensolaris*-gnu)
-        case $cc_basename in
-          KCC*)
-	    # Kuck and Associates, Inc. (KAI) C++ Compiler
-
-	    # KCC will only create a shared library if the output file
-	    # ends with ".so" (or ".sl" for HP-UX), so rename the library
-	    # to its proper name (with version) after linking.
-	    _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
-	    _LT_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib'
-	    # Commands to make compiler produce verbose output that lists
-	    # what "hidden" libraries, object files and flags are used when
-	    # linking a shared library.
-	    #
-	    # There doesn't appear to be a way to prevent this compiler from
-	    # explicitly linking system object files so we need to strip them
-	    # from the output so that they don't get included in the library
-	    # dependencies.
-	    output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
-
-	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
-	    _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
-
-	    # Archives containing C++ object files must be created using
-	    # "CC -Bstatic", where "CC" is the KAI C++ compiler.
-	    _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs'
-	    ;;
-	  icpc* | ecpc* )
-	    # Intel C++
-	    with_gnu_ld=yes
-	    # version 8.0 and above of icpc choke on multiply defined symbols
-	    # if we add $predep_objects and $postdep_objects, however 7.1 and
-	    # earlier do not add the objects themselves.
-	    case `$CC -V 2>&1` in
-	      *"Version 7."*)
-	        _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
-		_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
-		;;
-	      *)  # Version 8.0 or newer
-	        tmp_idyn=
-	        case $host_cpu in
-		  ia64*) tmp_idyn=' -i_dynamic';;
-		esac
-	        _LT_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
-		_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
-		;;
-	    esac
-	    _LT_TAGVAR(archive_cmds_need_lc, $1)=no
-	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
-	    _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
-	    _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
-	    ;;
-          pgCC* | pgcpp*)
-            # Portland Group C++ compiler
-	    case `$CC -V` in
-	    *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*)
-	      _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~
-		rm -rf $tpldir~
-		$CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~
-		compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"'
-	      _LT_TAGVAR(old_archive_cmds, $1)='tpldir=Template.dir~
-		rm -rf $tpldir~
-		$CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~
-		$AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~
-		$RANLIB $oldlib'
-	      _LT_TAGVAR(archive_cmds, $1)='tpldir=Template.dir~
-		rm -rf $tpldir~
-		$CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~
-		$CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib'
-	      _LT_TAGVAR(archive_expsym_cmds, $1)='tpldir=Template.dir~
-		rm -rf $tpldir~
-		$CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~
-		$CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib'
-	      ;;
-	    *) # Version 6 and above use weak symbols
-	      _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib'
-	      _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib'
-	      ;;
-	    esac
-
-	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir'
-	    _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
-	    _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
-            ;;
-	  cxx*)
-	    # Compaq C++
-	    _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
-	    _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname  -o $lib ${wl}-retain-symbols-file $wl$export_symbols'
-
-	    runpath_var=LD_RUN_PATH
-	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
-	    _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-
-	    # Commands to make compiler produce verbose output that lists
-	    # what "hidden" libraries, object files and flags are used when
-	    # linking a shared library.
-	    #
-	    # There doesn't appear to be a way to prevent this compiler from
-	    # explicitly linking system object files so we need to strip them
-	    # from the output so that they don't get included in the library
-	    # dependencies.
-	    output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed'
-	    ;;
-	  xl* | mpixl* | bgxl*)
-	    # IBM XL 8.0 on PPC, with GNU ld
-	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
-	    _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
-	    _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
-	    if test "x$supports_anon_versioning" = xyes; then
-	      _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
-		cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
-		echo "local: *; };" >> $output_objdir/$libname.ver~
-		$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
-	    fi
-	    ;;
-	  *)
-	    case `$CC -V 2>&1 | sed 5q` in
-	    *Sun\ C*)
-	      # Sun C++ 5.9
-	      _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs'
-	      _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
-	      _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols'
-	      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
-	      _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
-	      _LT_TAGVAR(compiler_needs_object, $1)=yes
-
-	      # Not sure whether something based on
-	      # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1
-	      # would be better.
-	      output_verbose_link_cmd='func_echo_all'
-
-	      # Archives containing C++ object files must be created using
-	      # "CC -xar", where "CC" is the Sun C++ compiler.  This is
-	      # necessary to make sure instantiated templates are included
-	      # in the archive.
-	      _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs'
-	      ;;
-	    esac
-	    ;;
-	esac
-	;;
-
-      lynxos*)
-        # FIXME: insert proper C++ library support
-	_LT_TAGVAR(ld_shlibs, $1)=no
-	;;
-
-      m88k*)
-        # FIXME: insert proper C++ library support
-        _LT_TAGVAR(ld_shlibs, $1)=no
-	;;
-
-      mvs*)
-        case $cc_basename in
-          cxx*)
-	    # FIXME: insert proper C++ library support
-	    _LT_TAGVAR(ld_shlibs, $1)=no
-	    ;;
-	  *)
-	    # FIXME: insert proper C++ library support
-	    _LT_TAGVAR(ld_shlibs, $1)=no
-	    ;;
-	esac
-	;;
-
-      netbsd*)
-        if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
-	  _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable  -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags'
-	  wlarc=
-	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
-	  _LT_TAGVAR(hardcode_direct, $1)=yes
-	  _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-	fi
-	# Workaround some broken pre-1.5 toolchains
-	output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"'
-	;;
-
-      *nto* | *qnx*)
-        _LT_TAGVAR(ld_shlibs, $1)=yes
-	;;
-
-      openbsd2*)
-        # C++ shared libraries are fairly broken
-	_LT_TAGVAR(ld_shlibs, $1)=no
-	;;
-
-      openbsd*)
-	if test -f /usr/libexec/ld.so; then
-	  _LT_TAGVAR(hardcode_direct, $1)=yes
-	  _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-	  _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
-	  _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
-	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
-	  if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
-	    _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib'
-	    _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
-	    _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
-	  fi
-	  output_verbose_link_cmd=func_echo_all
-	else
-	  _LT_TAGVAR(ld_shlibs, $1)=no
-	fi
-	;;
-
-      osf3* | osf4* | osf5*)
-        case $cc_basename in
-          KCC*)
-	    # Kuck and Associates, Inc. (KAI) C++ Compiler
-
-	    # KCC will only create a shared library if the output file
-	    # ends with ".so" (or ".sl" for HP-UX), so rename the library
-	    # to its proper name (with version) after linking.
-	    _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
-
-	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
-	    _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-
-	    # Archives containing C++ object files must be created using
-	    # the KAI C++ compiler.
-	    case $host in
-	      osf3*) _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;;
-	      *) _LT_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;;
-	    esac
-	    ;;
-          RCC*)
-	    # Rational C++ 2.4.1
-	    # FIXME: insert proper C++ library support
-	    _LT_TAGVAR(ld_shlibs, $1)=no
-	    ;;
-          cxx*)
-	    case $host in
-	      osf3*)
-	        _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
-	        _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && func_echo_all "${wl}-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
-	        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
-		;;
-	      *)
-	        _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
-	        _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
-	        _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~
-	          echo "-hidden">> $lib.exp~
-	          $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname ${wl}-input ${wl}$lib.exp  `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~
-	          $RM $lib.exp'
-	        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
-		;;
-	    esac
-
-	    _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-
-	    # Commands to make compiler produce verbose output that lists
-	    # what "hidden" libraries, object files and flags are used when
-	    # linking a shared library.
-	    #
-	    # There doesn't appear to be a way to prevent this compiler from
-	    # explicitly linking system object files so we need to strip them
-	    # from the output so that they don't get included in the library
-	    # dependencies.
-	    output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
-	    ;;
-	  *)
-	    if test "$GXX" = yes && test "$with_gnu_ld" = no; then
-	      _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
-	      case $host in
-	        osf3*)
-	          _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
-		  ;;
-	        *)
-	          _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
-		  ;;
-	      esac
-
-	      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
-	      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-
-	      # Commands to make compiler produce verbose output that lists
-	      # what "hidden" libraries, object files and flags are used when
-	      # linking a shared library.
-	      output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
-
-	    else
-	      # FIXME: insert proper C++ library support
-	      _LT_TAGVAR(ld_shlibs, $1)=no
-	    fi
-	    ;;
-        esac
-        ;;
-
-      psos*)
-        # FIXME: insert proper C++ library support
-        _LT_TAGVAR(ld_shlibs, $1)=no
-        ;;
-
-      sunos4*)
-        case $cc_basename in
-          CC*)
-	    # Sun C++ 4.x
-	    # FIXME: insert proper C++ library support
-	    _LT_TAGVAR(ld_shlibs, $1)=no
-	    ;;
-          lcc*)
-	    # Lucid
-	    # FIXME: insert proper C++ library support
-	    _LT_TAGVAR(ld_shlibs, $1)=no
-	    ;;
-          *)
-	    # FIXME: insert proper C++ library support
-	    _LT_TAGVAR(ld_shlibs, $1)=no
-	    ;;
-        esac
-        ;;
-
-      solaris*)
-        case $cc_basename in
-          CC* | sunCC*)
-	    # Sun C++ 4.2, 5.x and Centerline C++
-            _LT_TAGVAR(archive_cmds_need_lc,$1)=yes
-	    _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs'
-	    _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag}  -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
-	    _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
-	      $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
-
-	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
-	    _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-	    case $host_os in
-	      solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
-	      *)
-		# The compiler driver will combine and reorder linker options,
-		# but understands `-z linker_flag'.
-	        # Supported since Solaris 2.6 (maybe 2.5.1?)
-		_LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract'
-	        ;;
-	    esac
-	    _LT_TAGVAR(link_all_deplibs, $1)=yes
-
-	    output_verbose_link_cmd='func_echo_all'
-
-	    # Archives containing C++ object files must be created using
-	    # "CC -xar", where "CC" is the Sun C++ compiler.  This is
-	    # necessary to make sure instantiated templates are included
-	    # in the archive.
-	    _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs'
-	    ;;
-          gcx*)
-	    # Green Hills C++ Compiler
-	    _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
-
-	    # The C++ compiler must be used to create the archive.
-	    _LT_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs'
-	    ;;
-          *)
-	    # GNU C++ compiler with Solaris linker
-	    if test "$GXX" = yes && test "$with_gnu_ld" = no; then
-	      _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-z ${wl}defs'
-	      if $CC --version | $GREP -v '^2\.7' > /dev/null; then
-	        _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
-	        _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
-		  $CC -shared $pic_flag -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
-
-	        # Commands to make compiler produce verbose output that lists
-	        # what "hidden" libraries, object files and flags are used when
-	        # linking a shared library.
-	        output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
-	      else
-	        # g++ 2.7 appears to require `-G' NOT `-shared' on this
-	        # platform.
-	        _LT_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
-	        _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
-		  $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
-
-	        # Commands to make compiler produce verbose output that lists
-	        # what "hidden" libraries, object files and flags are used when
-	        # linking a shared library.
-	        output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
-	      fi
-
-	      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $wl$libdir'
-	      case $host_os in
-		solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
-		*)
-		  _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
-		  ;;
-	      esac
-	    fi
-	    ;;
-        esac
-        ;;
-
-    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*)
-      _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
-      _LT_TAGVAR(archive_cmds_need_lc, $1)=no
-      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-      runpath_var='LD_RUN_PATH'
-
-      case $cc_basename in
-        CC*)
-	  _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
-	  _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
-	  ;;
-	*)
-	  _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
-	  _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
-	  ;;
-      esac
-      ;;
-
-      sysv5* | sco3.2v5* | sco5v6*)
-	# Note: We can NOT use -z defs as we might desire, because we do not
-	# link with -lc, and that would cause any symbols used from libc to
-	# always be unresolved, which means just about no library would
-	# ever link correctly.  If we're not using GNU ld we use -z text
-	# though, which does catch some bad symbols but isn't as heavy-handed
-	# as -z defs.
-	_LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
-	_LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs'
-	_LT_TAGVAR(archive_cmds_need_lc, $1)=no
-	_LT_TAGVAR(hardcode_shlibpath_var, $1)=no
-	_LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir'
-	_LT_TAGVAR(hardcode_libdir_separator, $1)=':'
-	_LT_TAGVAR(link_all_deplibs, $1)=yes
-	_LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport'
-	runpath_var='LD_RUN_PATH'
-
-	case $cc_basename in
-          CC*)
-	    _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
-	    _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
-	    _LT_TAGVAR(old_archive_cmds, $1)='$CC -Tprelink_objects $oldobjs~
-	      '"$_LT_TAGVAR(old_archive_cmds, $1)"
-	    _LT_TAGVAR(reload_cmds, $1)='$CC -Tprelink_objects $reload_objs~
-	      '"$_LT_TAGVAR(reload_cmds, $1)"
-	    ;;
-	  *)
-	    _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
-	    _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
-	    ;;
-	esac
-      ;;
-
-      tandem*)
-        case $cc_basename in
-          NCC*)
-	    # NonStop-UX NCC 3.20
-	    # FIXME: insert proper C++ library support
-	    _LT_TAGVAR(ld_shlibs, $1)=no
-	    ;;
-          *)
-	    # FIXME: insert proper C++ library support
-	    _LT_TAGVAR(ld_shlibs, $1)=no
-	    ;;
-        esac
-        ;;
-
-      vxworks*)
-        # FIXME: insert proper C++ library support
-        _LT_TAGVAR(ld_shlibs, $1)=no
-        ;;
-
-      *)
-        # FIXME: insert proper C++ library support
-        _LT_TAGVAR(ld_shlibs, $1)=no
-        ;;
-    esac
-
-    AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)])
-    test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no
-
-    _LT_TAGVAR(GCC, $1)="$GXX"
-    _LT_TAGVAR(LD, $1)="$LD"
-
-    ## CAVEAT EMPTOR:
-    ## There is no encapsulation within the following macros, do not change
-    ## the running order or otherwise move them around unless you know exactly
-    ## what you are doing...
-    _LT_SYS_HIDDEN_LIBDEPS($1)
-    _LT_COMPILER_PIC($1)
-    _LT_COMPILER_C_O($1)
-    _LT_COMPILER_FILE_LOCKS($1)
-    _LT_LINKER_SHLIBS($1)
-    _LT_SYS_DYNAMIC_LINKER($1)
-    _LT_LINKER_HARDCODE_LIBPATH($1)
-
-    _LT_CONFIG($1)
-  fi # test -n "$compiler"
-
-  CC=$lt_save_CC
-  CFLAGS=$lt_save_CFLAGS
-  LDCXX=$LD
-  LD=$lt_save_LD
-  GCC=$lt_save_GCC
-  with_gnu_ld=$lt_save_with_gnu_ld
-  lt_cv_path_LDCXX=$lt_cv_path_LD
-  lt_cv_path_LD=$lt_save_path_LD
-  lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld
-  lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld
-fi # test "$_lt_caught_CXX_error" != yes
-
-AC_LANG_POP
-])# _LT_LANG_CXX_CONFIG
-
-
-# _LT_FUNC_STRIPNAME_CNF
-# ----------------------
-# func_stripname_cnf prefix suffix name
-# strip PREFIX and SUFFIX off of NAME.
-# PREFIX and SUFFIX must not contain globbing or regex special
-# characters, hashes, percent signs, but SUFFIX may contain a leading
-# dot (in which case that matches only a dot).
-#
-# This function is identical to the (non-XSI) version of func_stripname,
-# except this one can be used by m4 code that may be executed by configure,
-# rather than the libtool script.
-m4_defun([_LT_FUNC_STRIPNAME_CNF],[dnl
-AC_REQUIRE([_LT_DECL_SED])
-AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])
-func_stripname_cnf ()
-{
-  case ${2} in
-  .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;;
-  *)  func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;;
-  esac
-} # func_stripname_cnf
-])# _LT_FUNC_STRIPNAME_CNF
-
-# _LT_SYS_HIDDEN_LIBDEPS([TAGNAME])
-# ---------------------------------
-# Figure out "hidden" library dependencies from verbose
-# compiler output when linking a shared library.
-# Parse the compiler output and extract the necessary
-# objects, libraries and library flags.
-m4_defun([_LT_SYS_HIDDEN_LIBDEPS],
-[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-AC_REQUIRE([_LT_FUNC_STRIPNAME_CNF])dnl
-# Dependencies to place before and after the object being linked:
-_LT_TAGVAR(predep_objects, $1)=
-_LT_TAGVAR(postdep_objects, $1)=
-_LT_TAGVAR(predeps, $1)=
-_LT_TAGVAR(postdeps, $1)=
-_LT_TAGVAR(compiler_lib_search_path, $1)=
-
-dnl we can't use the lt_simple_compile_test_code here,
-dnl because it contains code intended for an executable,
-dnl not a library.  It's possible we should let each
-dnl tag define a new lt_????_link_test_code variable,
-dnl but it's only used here...
-m4_if([$1], [], [cat > conftest.$ac_ext <<_LT_EOF
-int a;
-void foo (void) { a = 0; }
-_LT_EOF
-], [$1], [CXX], [cat > conftest.$ac_ext <<_LT_EOF
-class Foo
-{
-public:
-  Foo (void) { a = 0; }
-private:
-  int a;
-};
-_LT_EOF
-], [$1], [F77], [cat > conftest.$ac_ext <<_LT_EOF
-      subroutine foo
-      implicit none
-      integer*4 a
-      a=0
-      return
-      end
-_LT_EOF
-], [$1], [FC], [cat > conftest.$ac_ext <<_LT_EOF
-      subroutine foo
-      implicit none
-      integer a
-      a=0
-      return
-      end
-_LT_EOF
-], [$1], [GCJ], [cat > conftest.$ac_ext <<_LT_EOF
-public class foo {
-  private int a;
-  public void bar (void) {
-    a = 0;
-  }
-};
-_LT_EOF
-], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF
-package foo
-func foo() {
-}
-_LT_EOF
-])
-
-_lt_libdeps_save_CFLAGS=$CFLAGS
-case "$CC $CFLAGS " in #(
-*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;;
-*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;;
-*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;;
-esac
-
-dnl Parse the compiler output and extract the necessary
-dnl objects, libraries and library flags.
-if AC_TRY_EVAL(ac_compile); then
-  # Parse the compiler output and extract the necessary
-  # objects, libraries and library flags.
-
-  # Sentinel used to keep track of whether or not we are before
-  # the conftest object file.
-  pre_test_object_deps_done=no
-
-  for p in `eval "$output_verbose_link_cmd"`; do
-    case ${prev}${p} in
-
-    -L* | -R* | -l*)
-       # Some compilers place space between "-{L,R}" and the path.
-       # Remove the space.
-       if test $p = "-L" ||
-          test $p = "-R"; then
-	 prev=$p
-	 continue
-       fi
-
-       # Expand the sysroot to ease extracting the directories later.
-       if test -z "$prev"; then
-         case $p in
-         -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;;
-         -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;;
-         -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;;
-         esac
-       fi
-       case $p in
-       =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;;
-       esac
-       if test "$pre_test_object_deps_done" = no; then
-	 case ${prev} in
-	 -L | -R)
-	   # Internal compiler library paths should come after those
-	   # provided the user.  The postdeps already come after the
-	   # user supplied libs so there is no need to process them.
-	   if test -z "$_LT_TAGVAR(compiler_lib_search_path, $1)"; then
-	     _LT_TAGVAR(compiler_lib_search_path, $1)="${prev}${p}"
-	   else
-	     _LT_TAGVAR(compiler_lib_search_path, $1)="${_LT_TAGVAR(compiler_lib_search_path, $1)} ${prev}${p}"
-	   fi
-	   ;;
-	 # The "-l" case would never come before the object being
-	 # linked, so don't bother handling this case.
-	 esac
-       else
-	 if test -z "$_LT_TAGVAR(postdeps, $1)"; then
-	   _LT_TAGVAR(postdeps, $1)="${prev}${p}"
-	 else
-	   _LT_TAGVAR(postdeps, $1)="${_LT_TAGVAR(postdeps, $1)} ${prev}${p}"
-	 fi
-       fi
-       prev=
-       ;;
-
-    *.lto.$objext) ;; # Ignore GCC LTO objects
-    *.$objext)
-       # This assumes that the test object file only shows up
-       # once in the compiler output.
-       if test "$p" = "conftest.$objext"; then
-	 pre_test_object_deps_done=yes
-	 continue
-       fi
-
-       if test "$pre_test_object_deps_done" = no; then
-	 if test -z "$_LT_TAGVAR(predep_objects, $1)"; then
-	   _LT_TAGVAR(predep_objects, $1)="$p"
-	 else
-	   _LT_TAGVAR(predep_objects, $1)="$_LT_TAGVAR(predep_objects, $1) $p"
-	 fi
-       else
-	 if test -z "$_LT_TAGVAR(postdep_objects, $1)"; then
-	   _LT_TAGVAR(postdep_objects, $1)="$p"
-	 else
-	   _LT_TAGVAR(postdep_objects, $1)="$_LT_TAGVAR(postdep_objects, $1) $p"
-	 fi
-       fi
-       ;;
-
-    *) ;; # Ignore the rest.
-
-    esac
-  done
-
-  # Clean up.
-  rm -f a.out a.exe
-else
-  echo "libtool.m4: error: problem compiling $1 test program"
-fi
-
-$RM -f confest.$objext
-CFLAGS=$_lt_libdeps_save_CFLAGS
-
-# PORTME: override above test on systems where it is broken
-m4_if([$1], [CXX],
-[case $host_os in
-interix[[3-9]]*)
-  # Interix 3.5 installs completely hosed .la files for C++, so rather than
-  # hack all around it, let's just trust "g++" to DTRT.
-  _LT_TAGVAR(predep_objects,$1)=
-  _LT_TAGVAR(postdep_objects,$1)=
-  _LT_TAGVAR(postdeps,$1)=
-  ;;
-
-linux*)
-  case `$CC -V 2>&1 | sed 5q` in
-  *Sun\ C*)
-    # Sun C++ 5.9
-
-    # The more standards-conforming stlport4 library is
-    # incompatible with the Cstd library. Avoid specifying
-    # it if it's in CXXFLAGS. Ignore libCrun as
-    # -library=stlport4 depends on it.
-    case " $CXX $CXXFLAGS " in
-    *" -library=stlport4 "*)
-      solaris_use_stlport4=yes
-      ;;
-    esac
-
-    if test "$solaris_use_stlport4" != yes; then
-      _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun'
-    fi
-    ;;
-  esac
-  ;;
-
-solaris*)
-  case $cc_basename in
-  CC* | sunCC*)
-    # The more standards-conforming stlport4 library is
-    # incompatible with the Cstd library. Avoid specifying
-    # it if it's in CXXFLAGS. Ignore libCrun as
-    # -library=stlport4 depends on it.
-    case " $CXX $CXXFLAGS " in
-    *" -library=stlport4 "*)
-      solaris_use_stlport4=yes
-      ;;
-    esac
-
-    # Adding this requires a known-good setup of shared libraries for
-    # Sun compiler versions before 5.6, else PIC objects from an old
-    # archive will be linked into the output, leading to subtle bugs.
-    if test "$solaris_use_stlport4" != yes; then
-      _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun'
-    fi
-    ;;
-  esac
-  ;;
-esac
-])
-
-case " $_LT_TAGVAR(postdeps, $1) " in
-*" -lc "*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;;
-esac
- _LT_TAGVAR(compiler_lib_search_dirs, $1)=
-if test -n "${_LT_TAGVAR(compiler_lib_search_path, $1)}"; then
- _LT_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_TAGVAR(compiler_lib_search_path, $1)}" | ${SED} -e 's! -L! !g' -e 's!^ !!'`
-fi
-_LT_TAGDECL([], [compiler_lib_search_dirs], [1],
-    [The directories searched by this compiler when creating a shared library])
-_LT_TAGDECL([], [predep_objects], [1],
-    [Dependencies to place before and after the objects being linked to
-    create a shared library])
-_LT_TAGDECL([], [postdep_objects], [1])
-_LT_TAGDECL([], [predeps], [1])
-_LT_TAGDECL([], [postdeps], [1])
-_LT_TAGDECL([], [compiler_lib_search_path], [1],
-    [The library search path used internally by the compiler when linking
-    a shared library])
-])# _LT_SYS_HIDDEN_LIBDEPS
-
-
-# _LT_LANG_F77_CONFIG([TAG])
-# --------------------------
-# Ensure that the configuration variables for a Fortran 77 compiler are
-# suitably defined.  These variables are subsequently used by _LT_CONFIG
-# to write the compiler configuration to `libtool'.
-m4_defun([_LT_LANG_F77_CONFIG],
-[AC_LANG_PUSH(Fortran 77)
-if test -z "$F77" || test "X$F77" = "Xno"; then
-  _lt_disable_F77=yes
-fi
-
-_LT_TAGVAR(archive_cmds_need_lc, $1)=no
-_LT_TAGVAR(allow_undefined_flag, $1)=
-_LT_TAGVAR(always_export_symbols, $1)=no
-_LT_TAGVAR(archive_expsym_cmds, $1)=
-_LT_TAGVAR(export_dynamic_flag_spec, $1)=
-_LT_TAGVAR(hardcode_direct, $1)=no
-_LT_TAGVAR(hardcode_direct_absolute, $1)=no
-_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-_LT_TAGVAR(hardcode_libdir_separator, $1)=
-_LT_TAGVAR(hardcode_minus_L, $1)=no
-_LT_TAGVAR(hardcode_automatic, $1)=no
-_LT_TAGVAR(inherit_rpath, $1)=no
-_LT_TAGVAR(module_cmds, $1)=
-_LT_TAGVAR(module_expsym_cmds, $1)=
-_LT_TAGVAR(link_all_deplibs, $1)=unknown
-_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
-_LT_TAGVAR(reload_flag, $1)=$reload_flag
-_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
-_LT_TAGVAR(no_undefined_flag, $1)=
-_LT_TAGVAR(whole_archive_flag_spec, $1)=
-_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no
-
-# Source file extension for f77 test sources.
-ac_ext=f
-
-# Object file extension for compiled f77 test sources.
-objext=o
-_LT_TAGVAR(objext, $1)=$objext
-
-# No sense in running all these tests if we already determined that
-# the F77 compiler isn't working.  Some variables (like enable_shared)
-# are currently assumed to apply to all compilers on this platform,
-# and will be corrupted by setting them based on a non-working compiler.
-if test "$_lt_disable_F77" != yes; then
-  # Code to be used in simple compile tests
-  lt_simple_compile_test_code="\
-      subroutine t
-      return
-      end
-"
-
-  # Code to be used in simple link tests
-  lt_simple_link_test_code="\
-      program t
-      end
-"
-
-  # ltmain only uses $CC for tagged configurations so make sure $CC is set.
-  _LT_TAG_COMPILER
-
-  # save warnings/boilerplate of simple test code
-  _LT_COMPILER_BOILERPLATE
-  _LT_LINKER_BOILERPLATE
-
-  # Allow CC to be a program name with arguments.
-  lt_save_CC="$CC"
-  lt_save_GCC=$GCC
-  lt_save_CFLAGS=$CFLAGS
-  CC=${F77-"f77"}
-  CFLAGS=$FFLAGS
-  compiler=$CC
-  _LT_TAGVAR(compiler, $1)=$CC
-  _LT_CC_BASENAME([$compiler])
-  GCC=$G77
-  if test -n "$compiler"; then
-    AC_MSG_CHECKING([if libtool supports shared libraries])
-    AC_MSG_RESULT([$can_build_shared])
-
-    AC_MSG_CHECKING([whether to build shared libraries])
-    test "$can_build_shared" = "no" && enable_shared=no
-
-    # On AIX, shared libraries and static libraries use the same namespace, and
-    # are all built from PIC.
-    case $host_os in
-      aix3*)
-        test "$enable_shared" = yes && enable_static=no
-        if test -n "$RANLIB"; then
-          archive_cmds="$archive_cmds~\$RANLIB \$lib"
-          postinstall_cmds='$RANLIB $lib'
-        fi
-        ;;
-      aix[[4-9]]*)
-	if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
-	  test "$enable_shared" = yes && enable_static=no
-	fi
-        ;;
-    esac
-    AC_MSG_RESULT([$enable_shared])
-
-    AC_MSG_CHECKING([whether to build static libraries])
-    # Make sure either enable_shared or enable_static is yes.
-    test "$enable_shared" = yes || enable_static=yes
-    AC_MSG_RESULT([$enable_static])
-
-    _LT_TAGVAR(GCC, $1)="$G77"
-    _LT_TAGVAR(LD, $1)="$LD"
-
-    ## CAVEAT EMPTOR:
-    ## There is no encapsulation within the following macros, do not change
-    ## the running order or otherwise move them around unless you know exactly
-    ## what you are doing...
-    _LT_COMPILER_PIC($1)
-    _LT_COMPILER_C_O($1)
-    _LT_COMPILER_FILE_LOCKS($1)
-    _LT_LINKER_SHLIBS($1)
-    _LT_SYS_DYNAMIC_LINKER($1)
-    _LT_LINKER_HARDCODE_LIBPATH($1)
-
-    _LT_CONFIG($1)
-  fi # test -n "$compiler"
-
-  GCC=$lt_save_GCC
-  CC="$lt_save_CC"
-  CFLAGS="$lt_save_CFLAGS"
-fi # test "$_lt_disable_F77" != yes
-
-AC_LANG_POP
-])# _LT_LANG_F77_CONFIG
-
-
-# _LT_LANG_FC_CONFIG([TAG])
-# -------------------------
-# Ensure that the configuration variables for a Fortran compiler are
-# suitably defined.  These variables are subsequently used by _LT_CONFIG
-# to write the compiler configuration to `libtool'.
-m4_defun([_LT_LANG_FC_CONFIG],
-[AC_LANG_PUSH(Fortran)
-
-if test -z "$FC" || test "X$FC" = "Xno"; then
-  _lt_disable_FC=yes
-fi
-
-_LT_TAGVAR(archive_cmds_need_lc, $1)=no
-_LT_TAGVAR(allow_undefined_flag, $1)=
-_LT_TAGVAR(always_export_symbols, $1)=no
-_LT_TAGVAR(archive_expsym_cmds, $1)=
-_LT_TAGVAR(export_dynamic_flag_spec, $1)=
-_LT_TAGVAR(hardcode_direct, $1)=no
-_LT_TAGVAR(hardcode_direct_absolute, $1)=no
-_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-_LT_TAGVAR(hardcode_libdir_separator, $1)=
-_LT_TAGVAR(hardcode_minus_L, $1)=no
-_LT_TAGVAR(hardcode_automatic, $1)=no
-_LT_TAGVAR(inherit_rpath, $1)=no
-_LT_TAGVAR(module_cmds, $1)=
-_LT_TAGVAR(module_expsym_cmds, $1)=
-_LT_TAGVAR(link_all_deplibs, $1)=unknown
-_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
-_LT_TAGVAR(reload_flag, $1)=$reload_flag
-_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
-_LT_TAGVAR(no_undefined_flag, $1)=
-_LT_TAGVAR(whole_archive_flag_spec, $1)=
-_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no
-
-# Source file extension for fc test sources.
-ac_ext=${ac_fc_srcext-f}
-
-# Object file extension for compiled fc test sources.
-objext=o
-_LT_TAGVAR(objext, $1)=$objext
-
-# No sense in running all these tests if we already determined that
-# the FC compiler isn't working.  Some variables (like enable_shared)
-# are currently assumed to apply to all compilers on this platform,
-# and will be corrupted by setting them based on a non-working compiler.
-if test "$_lt_disable_FC" != yes; then
-  # Code to be used in simple compile tests
-  lt_simple_compile_test_code="\
-      subroutine t
-      return
-      end
-"
-
-  # Code to be used in simple link tests
-  lt_simple_link_test_code="\
-      program t
-      end
-"
-
-  # ltmain only uses $CC for tagged configurations so make sure $CC is set.
-  _LT_TAG_COMPILER
-
-  # save warnings/boilerplate of simple test code
-  _LT_COMPILER_BOILERPLATE
-  _LT_LINKER_BOILERPLATE
-
-  # Allow CC to be a program name with arguments.
-  lt_save_CC="$CC"
-  lt_save_GCC=$GCC
-  lt_save_CFLAGS=$CFLAGS
-  CC=${FC-"f95"}
-  CFLAGS=$FCFLAGS
-  compiler=$CC
-  GCC=$ac_cv_fc_compiler_gnu
-
-  _LT_TAGVAR(compiler, $1)=$CC
-  _LT_CC_BASENAME([$compiler])
-
-  if test -n "$compiler"; then
-    AC_MSG_CHECKING([if libtool supports shared libraries])
-    AC_MSG_RESULT([$can_build_shared])
-
-    AC_MSG_CHECKING([whether to build shared libraries])
-    test "$can_build_shared" = "no" && enable_shared=no
-
-    # On AIX, shared libraries and static libraries use the same namespace, and
-    # are all built from PIC.
-    case $host_os in
-      aix3*)
-        test "$enable_shared" = yes && enable_static=no
-        if test -n "$RANLIB"; then
-          archive_cmds="$archive_cmds~\$RANLIB \$lib"
-          postinstall_cmds='$RANLIB $lib'
-        fi
-        ;;
-      aix[[4-9]]*)
-	if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
-	  test "$enable_shared" = yes && enable_static=no
-	fi
-        ;;
-    esac
-    AC_MSG_RESULT([$enable_shared])
-
-    AC_MSG_CHECKING([whether to build static libraries])
-    # Make sure either enable_shared or enable_static is yes.
-    test "$enable_shared" = yes || enable_static=yes
-    AC_MSG_RESULT([$enable_static])
-
-    _LT_TAGVAR(GCC, $1)="$ac_cv_fc_compiler_gnu"
-    _LT_TAGVAR(LD, $1)="$LD"
-
-    ## CAVEAT EMPTOR:
-    ## There is no encapsulation within the following macros, do not change
-    ## the running order or otherwise move them around unless you know exactly
-    ## what you are doing...
-    _LT_SYS_HIDDEN_LIBDEPS($1)
-    _LT_COMPILER_PIC($1)
-    _LT_COMPILER_C_O($1)
-    _LT_COMPILER_FILE_LOCKS($1)
-    _LT_LINKER_SHLIBS($1)
-    _LT_SYS_DYNAMIC_LINKER($1)
-    _LT_LINKER_HARDCODE_LIBPATH($1)
-
-    _LT_CONFIG($1)
-  fi # test -n "$compiler"
-
-  GCC=$lt_save_GCC
-  CC=$lt_save_CC
-  CFLAGS=$lt_save_CFLAGS
-fi # test "$_lt_disable_FC" != yes
-
-AC_LANG_POP
-])# _LT_LANG_FC_CONFIG
-
-
-# _LT_LANG_GCJ_CONFIG([TAG])
-# --------------------------
-# Ensure that the configuration variables for the GNU Java Compiler compiler
-# are suitably defined.  These variables are subsequently used by _LT_CONFIG
-# to write the compiler configuration to `libtool'.
-m4_defun([_LT_LANG_GCJ_CONFIG],
-[AC_REQUIRE([LT_PROG_GCJ])dnl
-AC_LANG_SAVE
-
-# Source file extension for Java test sources.
-ac_ext=java
-
-# Object file extension for compiled Java test sources.
-objext=o
-_LT_TAGVAR(objext, $1)=$objext
-
-# Code to be used in simple compile tests
-lt_simple_compile_test_code="class foo {}"
-
-# Code to be used in simple link tests
-lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }'
-
-# ltmain only uses $CC for tagged configurations so make sure $CC is set.
-_LT_TAG_COMPILER
-
-# save warnings/boilerplate of simple test code
-_LT_COMPILER_BOILERPLATE
-_LT_LINKER_BOILERPLATE
-
-# Allow CC to be a program name with arguments.
-lt_save_CC=$CC
-lt_save_CFLAGS=$CFLAGS
-lt_save_GCC=$GCC
-GCC=yes
-CC=${GCJ-"gcj"}
-CFLAGS=$GCJFLAGS
-compiler=$CC
-_LT_TAGVAR(compiler, $1)=$CC
-_LT_TAGVAR(LD, $1)="$LD"
-_LT_CC_BASENAME([$compiler])
-
-# GCJ did not exist at the time GCC didn't implicitly link libc in.
-_LT_TAGVAR(archive_cmds_need_lc, $1)=no
-
-_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
-_LT_TAGVAR(reload_flag, $1)=$reload_flag
-_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
-
-## CAVEAT EMPTOR:
-## There is no encapsulation within the following macros, do not change
-## the running order or otherwise move them around unless you know exactly
-## what you are doing...
-if test -n "$compiler"; then
-  _LT_COMPILER_NO_RTTI($1)
-  _LT_COMPILER_PIC($1)
-  _LT_COMPILER_C_O($1)
-  _LT_COMPILER_FILE_LOCKS($1)
-  _LT_LINKER_SHLIBS($1)
-  _LT_LINKER_HARDCODE_LIBPATH($1)
-
-  _LT_CONFIG($1)
-fi
-
-AC_LANG_RESTORE
-
-GCC=$lt_save_GCC
-CC=$lt_save_CC
-CFLAGS=$lt_save_CFLAGS
-])# _LT_LANG_GCJ_CONFIG
-
-
-# _LT_LANG_GO_CONFIG([TAG])
-# --------------------------
-# Ensure that the configuration variables for the GNU Go compiler
-# are suitably defined.  These variables are subsequently used by _LT_CONFIG
-# to write the compiler configuration to `libtool'.
-m4_defun([_LT_LANG_GO_CONFIG],
-[AC_REQUIRE([LT_PROG_GO])dnl
-AC_LANG_SAVE
-
-# Source file extension for Go test sources.
-ac_ext=go
-
-# Object file extension for compiled Go test sources.
-objext=o
-_LT_TAGVAR(objext, $1)=$objext
-
-# Code to be used in simple compile tests
-lt_simple_compile_test_code="package main; func main() { }"
-
-# Code to be used in simple link tests
-lt_simple_link_test_code='package main; func main() { }'
-
-# ltmain only uses $CC for tagged configurations so make sure $CC is set.
-_LT_TAG_COMPILER
-
-# save warnings/boilerplate of simple test code
-_LT_COMPILER_BOILERPLATE
-_LT_LINKER_BOILERPLATE
-
-# Allow CC to be a program name with arguments.
-lt_save_CC=$CC
-lt_save_CFLAGS=$CFLAGS
-lt_save_GCC=$GCC
-GCC=yes
-CC=${GOC-"gccgo"}
-CFLAGS=$GOFLAGS
-compiler=$CC
-_LT_TAGVAR(compiler, $1)=$CC
-_LT_TAGVAR(LD, $1)="$LD"
-_LT_CC_BASENAME([$compiler])
-
-# Go did not exist at the time GCC didn't implicitly link libc in.
-_LT_TAGVAR(archive_cmds_need_lc, $1)=no
-
-_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
-_LT_TAGVAR(reload_flag, $1)=$reload_flag
-_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
-
-## CAVEAT EMPTOR:
-## There is no encapsulation within the following macros, do not change
-## the running order or otherwise move them around unless you know exactly
-## what you are doing...
-if test -n "$compiler"; then
-  _LT_COMPILER_NO_RTTI($1)
-  _LT_COMPILER_PIC($1)
-  _LT_COMPILER_C_O($1)
-  _LT_COMPILER_FILE_LOCKS($1)
-  _LT_LINKER_SHLIBS($1)
-  _LT_LINKER_HARDCODE_LIBPATH($1)
-
-  _LT_CONFIG($1)
-fi
-
-AC_LANG_RESTORE
-
-GCC=$lt_save_GCC
-CC=$lt_save_CC
-CFLAGS=$lt_save_CFLAGS
-])# _LT_LANG_GO_CONFIG
-
-
-# _LT_LANG_RC_CONFIG([TAG])
-# -------------------------
-# Ensure that the configuration variables for the Windows resource compiler
-# are suitably defined.  These variables are subsequently used by _LT_CONFIG
-# to write the compiler configuration to `libtool'.
-m4_defun([_LT_LANG_RC_CONFIG],
-[AC_REQUIRE([LT_PROG_RC])dnl
-AC_LANG_SAVE
-
-# Source file extension for RC test sources.
-ac_ext=rc
-
-# Object file extension for compiled RC test sources.
-objext=o
-_LT_TAGVAR(objext, $1)=$objext
-
-# Code to be used in simple compile tests
-lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }'
-
-# Code to be used in simple link tests
-lt_simple_link_test_code="$lt_simple_compile_test_code"
-
-# ltmain only uses $CC for tagged configurations so make sure $CC is set.
-_LT_TAG_COMPILER
-
-# save warnings/boilerplate of simple test code
-_LT_COMPILER_BOILERPLATE
-_LT_LINKER_BOILERPLATE
-
-# Allow CC to be a program name with arguments.
-lt_save_CC="$CC"
-lt_save_CFLAGS=$CFLAGS
-lt_save_GCC=$GCC
-GCC=
-CC=${RC-"windres"}
-CFLAGS=
-compiler=$CC
-_LT_TAGVAR(compiler, $1)=$CC
-_LT_CC_BASENAME([$compiler])
-_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes
-
-if test -n "$compiler"; then
-  :
-  _LT_CONFIG($1)
-fi
-
-GCC=$lt_save_GCC
-AC_LANG_RESTORE
-CC=$lt_save_CC
-CFLAGS=$lt_save_CFLAGS
-])# _LT_LANG_RC_CONFIG
-
-
-# LT_PROG_GCJ
-# -----------
-AC_DEFUN([LT_PROG_GCJ],
-[m4_ifdef([AC_PROG_GCJ], [AC_PROG_GCJ],
-  [m4_ifdef([A][M_PROG_GCJ], [A][M_PROG_GCJ],
-    [AC_CHECK_TOOL(GCJ, gcj,)
-      test "x${GCJFLAGS+set}" = xset || GCJFLAGS="-g -O2"
-      AC_SUBST(GCJFLAGS)])])[]dnl
-])
-
-# Old name:
-AU_ALIAS([LT_AC_PROG_GCJ], [LT_PROG_GCJ])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([LT_AC_PROG_GCJ], [])
-
-
-# LT_PROG_GO
-# ----------
-AC_DEFUN([LT_PROG_GO],
-[AC_CHECK_TOOL(GOC, gccgo,)
-])
-
-
-# LT_PROG_RC
-# ----------
-AC_DEFUN([LT_PROG_RC],
-[AC_CHECK_TOOL(RC, windres,)
-])
-
-# Old name:
-AU_ALIAS([LT_AC_PROG_RC], [LT_PROG_RC])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([LT_AC_PROG_RC], [])
-
-
-# _LT_DECL_EGREP
-# --------------
-# If we don't have a new enough Autoconf to choose the best grep
-# available, choose the one first in the user's PATH.
-m4_defun([_LT_DECL_EGREP],
-[AC_REQUIRE([AC_PROG_EGREP])dnl
-AC_REQUIRE([AC_PROG_FGREP])dnl
-test -z "$GREP" && GREP=grep
-_LT_DECL([], [GREP], [1], [A grep program that handles long lines])
-_LT_DECL([], [EGREP], [1], [An ERE matcher])
-_LT_DECL([], [FGREP], [1], [A literal string matcher])
-dnl Non-bleeding-edge autoconf doesn't subst GREP, so do it here too
-AC_SUBST([GREP])
-])
-
-
-# _LT_DECL_OBJDUMP
-# --------------
-# If we don't have a new enough Autoconf to choose the best objdump
-# available, choose the one first in the user's PATH.
-m4_defun([_LT_DECL_OBJDUMP],
-[AC_CHECK_TOOL(OBJDUMP, objdump, false)
-test -z "$OBJDUMP" && OBJDUMP=objdump
-_LT_DECL([], [OBJDUMP], [1], [An object symbol dumper])
-AC_SUBST([OBJDUMP])
-])
-
-# _LT_DECL_DLLTOOL
-# ----------------
-# Ensure DLLTOOL variable is set.
-m4_defun([_LT_DECL_DLLTOOL],
-[AC_CHECK_TOOL(DLLTOOL, dlltool, false)
-test -z "$DLLTOOL" && DLLTOOL=dlltool
-_LT_DECL([], [DLLTOOL], [1], [DLL creation program])
-AC_SUBST([DLLTOOL])
-])
-
-# _LT_DECL_SED
-# ------------
-# Check for a fully-functional sed program, that truncates
-# as few characters as possible.  Prefer GNU sed if found.
-m4_defun([_LT_DECL_SED],
-[AC_PROG_SED
-test -z "$SED" && SED=sed
-Xsed="$SED -e 1s/^X//"
-_LT_DECL([], [SED], [1], [A sed program that does not truncate output])
-_LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"],
-    [Sed that helps us avoid accidentally triggering echo(1) options like -n])
-])# _LT_DECL_SED
-
-m4_ifndef([AC_PROG_SED], [
-############################################################
-# NOTE: This macro has been submitted for inclusion into   #
-#  GNU Autoconf as AC_PROG_SED.  When it is available in   #
-#  a released version of Autoconf we should remove this    #
-#  macro and use it instead.                               #
-############################################################
-
-m4_defun([AC_PROG_SED],
-[AC_MSG_CHECKING([for a sed that does not truncate output])
-AC_CACHE_VAL(lt_cv_path_SED,
-[# Loop through the user's path and test for sed and gsed.
-# Then use that list of sed's as ones to test for truncation.
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-  for lt_ac_prog in sed gsed; do
-    for ac_exec_ext in '' $ac_executable_extensions; do
-      if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then
-        lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext"
-      fi
-    done
-  done
-done
-IFS=$as_save_IFS
-lt_ac_max=0
-lt_ac_count=0
-# Add /usr/xpg4/bin/sed as it is typically found on Solaris
-# along with /bin/sed that truncates output.
-for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do
-  test ! -f $lt_ac_sed && continue
-  cat /dev/null > conftest.in
-  lt_ac_count=0
-  echo $ECHO_N "0123456789$ECHO_C" >conftest.in
-  # Check for GNU sed and select it if it is found.
-  if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then
-    lt_cv_path_SED=$lt_ac_sed
-    break
-  fi
-  while true; do
-    cat conftest.in conftest.in >conftest.tmp
-    mv conftest.tmp conftest.in
-    cp conftest.in conftest.nl
-    echo >>conftest.nl
-    $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break
-    cmp -s conftest.out conftest.nl || break
-    # 10000 chars as input seems more than enough
-    test $lt_ac_count -gt 10 && break
-    lt_ac_count=`expr $lt_ac_count + 1`
-    if test $lt_ac_count -gt $lt_ac_max; then
-      lt_ac_max=$lt_ac_count
-      lt_cv_path_SED=$lt_ac_sed
-    fi
-  done
-done
-])
-SED=$lt_cv_path_SED
-AC_SUBST([SED])
-AC_MSG_RESULT([$SED])
-])#AC_PROG_SED
-])#m4_ifndef
-
-# Old name:
-AU_ALIAS([LT_AC_PROG_SED], [AC_PROG_SED])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([LT_AC_PROG_SED], [])
-
-
-# _LT_CHECK_SHELL_FEATURES
-# ------------------------
-# Find out whether the shell is Bourne or XSI compatible,
-# or has some other useful features.
-m4_defun([_LT_CHECK_SHELL_FEATURES],
-[AC_MSG_CHECKING([whether the shell understands some XSI constructs])
-# Try some XSI features
-xsi_shell=no
-( _lt_dummy="a/b/c"
-  test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \
-      = c,a/b,b/c, \
-    && eval 'test $(( 1 + 1 )) -eq 2 \
-    && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \
-  && xsi_shell=yes
-AC_MSG_RESULT([$xsi_shell])
-_LT_CONFIG_LIBTOOL_INIT([xsi_shell='$xsi_shell'])
-
-AC_MSG_CHECKING([whether the shell understands "+="])
-lt_shell_append=no
-( foo=bar; set foo baz; eval "$[1]+=\$[2]" && test "$foo" = barbaz ) \
-    >/dev/null 2>&1 \
-  && lt_shell_append=yes
-AC_MSG_RESULT([$lt_shell_append])
-_LT_CONFIG_LIBTOOL_INIT([lt_shell_append='$lt_shell_append'])
-
-if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
-  lt_unset=unset
-else
-  lt_unset=false
-fi
-_LT_DECL([], [lt_unset], [0], [whether the shell understands "unset"])dnl
-
-# test EBCDIC or ASCII
-case `echo X|tr X '\101'` in
- A) # ASCII based system
-    # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr
-  lt_SP2NL='tr \040 \012'
-  lt_NL2SP='tr \015\012 \040\040'
-  ;;
- *) # EBCDIC based system
-  lt_SP2NL='tr \100 \n'
-  lt_NL2SP='tr \r\n \100\100'
-  ;;
-esac
-_LT_DECL([SP2NL], [lt_SP2NL], [1], [turn spaces into newlines])dnl
-_LT_DECL([NL2SP], [lt_NL2SP], [1], [turn newlines into spaces])dnl
-])# _LT_CHECK_SHELL_FEATURES
-
-
-# _LT_PROG_FUNCTION_REPLACE (FUNCNAME, REPLACEMENT-BODY)
-# ------------------------------------------------------
-# In `$cfgfile', look for function FUNCNAME delimited by `^FUNCNAME ()$' and
-# '^} FUNCNAME ', and replace its body with REPLACEMENT-BODY.
-m4_defun([_LT_PROG_FUNCTION_REPLACE],
-[dnl {
-sed -e '/^$1 ()$/,/^} # $1 /c\
-$1 ()\
-{\
-m4_bpatsubsts([$2], [$], [\\], [^\([	 ]\)], [\\\1])
-} # Extended-shell $1 implementation' "$cfgfile" > $cfgfile.tmp \
-  && mv -f "$cfgfile.tmp" "$cfgfile" \
-    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
-test 0 -eq $? || _lt_function_replace_fail=:
-])
-
-
-# _LT_PROG_REPLACE_SHELLFNS
-# -------------------------
-# Replace existing portable implementations of several shell functions with
-# equivalent extended shell implementations where those features are available..
-m4_defun([_LT_PROG_REPLACE_SHELLFNS],
-[if test x"$xsi_shell" = xyes; then
-  _LT_PROG_FUNCTION_REPLACE([func_dirname], [dnl
-    case ${1} in
-      */*) func_dirname_result="${1%/*}${2}" ;;
-      *  ) func_dirname_result="${3}" ;;
-    esac])
-
-  _LT_PROG_FUNCTION_REPLACE([func_basename], [dnl
-    func_basename_result="${1##*/}"])
-
-  _LT_PROG_FUNCTION_REPLACE([func_dirname_and_basename], [dnl
-    case ${1} in
-      */*) func_dirname_result="${1%/*}${2}" ;;
-      *  ) func_dirname_result="${3}" ;;
-    esac
-    func_basename_result="${1##*/}"])
-
-  _LT_PROG_FUNCTION_REPLACE([func_stripname], [dnl
-    # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are
-    # positional parameters, so assign one to ordinary parameter first.
-    func_stripname_result=${3}
-    func_stripname_result=${func_stripname_result#"${1}"}
-    func_stripname_result=${func_stripname_result%"${2}"}])
-
-  _LT_PROG_FUNCTION_REPLACE([func_split_long_opt], [dnl
-    func_split_long_opt_name=${1%%=*}
-    func_split_long_opt_arg=${1#*=}])
-
-  _LT_PROG_FUNCTION_REPLACE([func_split_short_opt], [dnl
-    func_split_short_opt_arg=${1#??}
-    func_split_short_opt_name=${1%"$func_split_short_opt_arg"}])
-
-  _LT_PROG_FUNCTION_REPLACE([func_lo2o], [dnl
-    case ${1} in
-      *.lo) func_lo2o_result=${1%.lo}.${objext} ;;
-      *)    func_lo2o_result=${1} ;;
-    esac])
-
-  _LT_PROG_FUNCTION_REPLACE([func_xform], [    func_xform_result=${1%.*}.lo])
-
-  _LT_PROG_FUNCTION_REPLACE([func_arith], [    func_arith_result=$(( $[*] ))])
-
-  _LT_PROG_FUNCTION_REPLACE([func_len], [    func_len_result=${#1}])
-fi
-
-if test x"$lt_shell_append" = xyes; then
-  _LT_PROG_FUNCTION_REPLACE([func_append], [    eval "${1}+=\\${2}"])
-
-  _LT_PROG_FUNCTION_REPLACE([func_append_quoted], [dnl
-    func_quote_for_eval "${2}"
-dnl m4 expansion turns \\\\ into \\, and then the shell eval turns that into \
-    eval "${1}+=\\\\ \\$func_quote_for_eval_result"])
-
-  # Save a `func_append' function call where possible by direct use of '+='
-  sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \
-    && mv -f "$cfgfile.tmp" "$cfgfile" \
-      || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
-  test 0 -eq $? || _lt_function_replace_fail=:
-else
-  # Save a `func_append' function call even when '+=' is not available
-  sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \
-    && mv -f "$cfgfile.tmp" "$cfgfile" \
-      || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
-  test 0 -eq $? || _lt_function_replace_fail=:
-fi
-
-if test x"$_lt_function_replace_fail" = x":"; then
-  AC_MSG_WARN([Unable to substitute extended shell functions in $ofile])
-fi
-])
-
-# _LT_PATH_CONVERSION_FUNCTIONS
-# -----------------------------
-# Determine which file name conversion functions should be used by
-# func_to_host_file (and, implicitly, by func_to_host_path).  These are needed
-# for certain cross-compile configurations and native mingw.
-m4_defun([_LT_PATH_CONVERSION_FUNCTIONS],
-[AC_REQUIRE([AC_CANONICAL_HOST])dnl
-AC_REQUIRE([AC_CANONICAL_BUILD])dnl
-AC_MSG_CHECKING([how to convert $build file names to $host format])
-AC_CACHE_VAL(lt_cv_to_host_file_cmd,
-[case $host in
-  *-*-mingw* )
-    case $build in
-      *-*-mingw* ) # actually msys
-        lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32
-        ;;
-      *-*-cygwin* )
-        lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32
-        ;;
-      * ) # otherwise, assume *nix
-        lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32
-        ;;
-    esac
-    ;;
-  *-*-cygwin* )
-    case $build in
-      *-*-mingw* ) # actually msys
-        lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin
-        ;;
-      *-*-cygwin* )
-        lt_cv_to_host_file_cmd=func_convert_file_noop
-        ;;
-      * ) # otherwise, assume *nix
-        lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin
-        ;;
-    esac
-    ;;
-  * ) # unhandled hosts (and "normal" native builds)
-    lt_cv_to_host_file_cmd=func_convert_file_noop
-    ;;
-esac
-])
-to_host_file_cmd=$lt_cv_to_host_file_cmd
-AC_MSG_RESULT([$lt_cv_to_host_file_cmd])
-_LT_DECL([to_host_file_cmd], [lt_cv_to_host_file_cmd],
-         [0], [convert $build file names to $host format])dnl
-
-AC_MSG_CHECKING([how to convert $build file names to toolchain format])
-AC_CACHE_VAL(lt_cv_to_tool_file_cmd,
-[#assume ordinary cross tools, or native build.
-lt_cv_to_tool_file_cmd=func_convert_file_noop
-case $host in
-  *-*-mingw* )
-    case $build in
-      *-*-mingw* ) # actually msys
-        lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32
-        ;;
-    esac
-    ;;
-esac
-])
-to_tool_file_cmd=$lt_cv_to_tool_file_cmd
-AC_MSG_RESULT([$lt_cv_to_tool_file_cmd])
-_LT_DECL([to_tool_file_cmd], [lt_cv_to_tool_file_cmd],
-         [0], [convert $build files to toolchain format])dnl
-])# _LT_PATH_CONVERSION_FUNCTIONS
diff --git a/m4/ltoptions.m4 b/m4/ltoptions.m4
deleted file mode 100644
index 5d9acd8e..00000000
--- a/m4/ltoptions.m4
+++ /dev/null
@@ -1,384 +0,0 @@
-# Helper functions for option handling.                    -*- Autoconf -*-
-#
-#   Copyright (C) 2004, 2005, 2007, 2008, 2009 Free Software Foundation,
-#   Inc.
-#   Written by Gary V. Vaughan, 2004
-#
-# This file is free software; the Free Software Foundation gives
-# unlimited permission to copy and/or distribute it, with or without
-# modifications, as long as this notice is preserved.
-
-# serial 7 ltoptions.m4
-
-# This is to help aclocal find these macros, as it can't see m4_define.
-AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])])
-
-
-# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME)
-# ------------------------------------------
-m4_define([_LT_MANGLE_OPTION],
-[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])])
-
-
-# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME)
-# ---------------------------------------
-# Set option OPTION-NAME for macro MACRO-NAME, and if there is a
-# matching handler defined, dispatch to it.  Other OPTION-NAMEs are
-# saved as a flag.
-m4_define([_LT_SET_OPTION],
-[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl
-m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]),
-        _LT_MANGLE_DEFUN([$1], [$2]),
-    [m4_warning([Unknown $1 option `$2'])])[]dnl
-])
-
-
-# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET])
-# ------------------------------------------------------------
-# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
-m4_define([_LT_IF_OPTION],
-[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])])
-
-
-# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET)
-# -------------------------------------------------------
-# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME
-# are set.
-m4_define([_LT_UNLESS_OPTIONS],
-[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
-	    [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option),
-		      [m4_define([$0_found])])])[]dnl
-m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3
-])[]dnl
-])
-
-
-# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST)
-# ----------------------------------------
-# OPTION-LIST is a space-separated list of Libtool options associated
-# with MACRO-NAME.  If any OPTION has a matching handler declared with
-# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about
-# the unknown option and exit.
-m4_defun([_LT_SET_OPTIONS],
-[# Set options
-m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
-    [_LT_SET_OPTION([$1], _LT_Option)])
-
-m4_if([$1],[LT_INIT],[
-  dnl
-  dnl Simply set some default values (i.e off) if boolean options were not
-  dnl specified:
-  _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no
-  ])
-  _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no
-  ])
-  dnl
-  dnl If no reference was made to various pairs of opposing options, then
-  dnl we run the default mode handler for the pair.  For example, if neither
-  dnl `shared' nor `disable-shared' was passed, we enable building of shared
-  dnl archives by default:
-  _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED])
-  _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC])
-  _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC])
-  _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install],
-  		   [_LT_ENABLE_FAST_INSTALL])
-  ])
-])# _LT_SET_OPTIONS
-
-
-## --------------------------------- ##
-## Macros to handle LT_INIT options. ##
-## --------------------------------- ##
-
-# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME)
-# -----------------------------------------
-m4_define([_LT_MANGLE_DEFUN],
-[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])])
-
-
-# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE)
-# -----------------------------------------------
-m4_define([LT_OPTION_DEFINE],
-[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl
-])# LT_OPTION_DEFINE
-
-
-# dlopen
-# ------
-LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes
-])
-
-AU_DEFUN([AC_LIBTOOL_DLOPEN],
-[_LT_SET_OPTION([LT_INIT], [dlopen])
-AC_DIAGNOSE([obsolete],
-[$0: Remove this warning and the call to _LT_SET_OPTION when you
-put the `dlopen' option into LT_INIT's first parameter.])
-])
-
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], [])
-
-
-# win32-dll
-# ---------
-# Declare package support for building win32 dll's.
-LT_OPTION_DEFINE([LT_INIT], [win32-dll],
-[enable_win32_dll=yes
-
-case $host in
-*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*)
-  AC_CHECK_TOOL(AS, as, false)
-  AC_CHECK_TOOL(DLLTOOL, dlltool, false)
-  AC_CHECK_TOOL(OBJDUMP, objdump, false)
-  ;;
-esac
-
-test -z "$AS" && AS=as
-_LT_DECL([], [AS],      [1], [Assembler program])dnl
-
-test -z "$DLLTOOL" && DLLTOOL=dlltool
-_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl
-
-test -z "$OBJDUMP" && OBJDUMP=objdump
-_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl
-])# win32-dll
-
-AU_DEFUN([AC_LIBTOOL_WIN32_DLL],
-[AC_REQUIRE([AC_CANONICAL_HOST])dnl
-_LT_SET_OPTION([LT_INIT], [win32-dll])
-AC_DIAGNOSE([obsolete],
-[$0: Remove this warning and the call to _LT_SET_OPTION when you
-put the `win32-dll' option into LT_INIT's first parameter.])
-])
-
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], [])
-
-
-# _LT_ENABLE_SHARED([DEFAULT])
-# ----------------------------
-# implement the --enable-shared flag, and supports the `shared' and
-# `disable-shared' LT_INIT options.
-# DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
-m4_define([_LT_ENABLE_SHARED],
-[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl
-AC_ARG_ENABLE([shared],
-    [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@],
-	[build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])],
-    [p=${PACKAGE-default}
-    case $enableval in
-    yes) enable_shared=yes ;;
-    no) enable_shared=no ;;
-    *)
-      enable_shared=no
-      # Look at the argument we got.  We use all the common list separators.
-      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
-      for pkg in $enableval; do
-	IFS="$lt_save_ifs"
-	if test "X$pkg" = "X$p"; then
-	  enable_shared=yes
-	fi
-      done
-      IFS="$lt_save_ifs"
-      ;;
-    esac],
-    [enable_shared=]_LT_ENABLE_SHARED_DEFAULT)
-
-    _LT_DECL([build_libtool_libs], [enable_shared], [0],
-	[Whether or not to build shared libraries])
-])# _LT_ENABLE_SHARED
-
-LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])])
-LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])])
-
-# Old names:
-AC_DEFUN([AC_ENABLE_SHARED],
-[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared])
-])
-
-AC_DEFUN([AC_DISABLE_SHARED],
-[_LT_SET_OPTION([LT_INIT], [disable-shared])
-])
-
-AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)])
-AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)])
-
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AM_ENABLE_SHARED], [])
-dnl AC_DEFUN([AM_DISABLE_SHARED], [])
-
-
-
-# _LT_ENABLE_STATIC([DEFAULT])
-# ----------------------------
-# implement the --enable-static flag, and support the `static' and
-# `disable-static' LT_INIT options.
-# DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
-m4_define([_LT_ENABLE_STATIC],
-[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl
-AC_ARG_ENABLE([static],
-    [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@],
-	[build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])],
-    [p=${PACKAGE-default}
-    case $enableval in
-    yes) enable_static=yes ;;
-    no) enable_static=no ;;
-    *)
-     enable_static=no
-      # Look at the argument we got.  We use all the common list separators.
-      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
-      for pkg in $enableval; do
-	IFS="$lt_save_ifs"
-	if test "X$pkg" = "X$p"; then
-	  enable_static=yes
-	fi
-      done
-      IFS="$lt_save_ifs"
-      ;;
-    esac],
-    [enable_static=]_LT_ENABLE_STATIC_DEFAULT)
-
-    _LT_DECL([build_old_libs], [enable_static], [0],
-	[Whether or not to build static libraries])
-])# _LT_ENABLE_STATIC
-
-LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])])
-LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])])
-
-# Old names:
-AC_DEFUN([AC_ENABLE_STATIC],
-[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static])
-])
-
-AC_DEFUN([AC_DISABLE_STATIC],
-[_LT_SET_OPTION([LT_INIT], [disable-static])
-])
-
-AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)])
-AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)])
-
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AM_ENABLE_STATIC], [])
-dnl AC_DEFUN([AM_DISABLE_STATIC], [])
-
-
-
-# _LT_ENABLE_FAST_INSTALL([DEFAULT])
-# ----------------------------------
-# implement the --enable-fast-install flag, and support the `fast-install'
-# and `disable-fast-install' LT_INIT options.
-# DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
-m4_define([_LT_ENABLE_FAST_INSTALL],
-[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl
-AC_ARG_ENABLE([fast-install],
-    [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@],
-    [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])],
-    [p=${PACKAGE-default}
-    case $enableval in
-    yes) enable_fast_install=yes ;;
-    no) enable_fast_install=no ;;
-    *)
-      enable_fast_install=no
-      # Look at the argument we got.  We use all the common list separators.
-      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
-      for pkg in $enableval; do
-	IFS="$lt_save_ifs"
-	if test "X$pkg" = "X$p"; then
-	  enable_fast_install=yes
-	fi
-      done
-      IFS="$lt_save_ifs"
-      ;;
-    esac],
-    [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT)
-
-_LT_DECL([fast_install], [enable_fast_install], [0],
-	 [Whether or not to optimize for fast installation])dnl
-])# _LT_ENABLE_FAST_INSTALL
-
-LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])])
-LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])])
-
-# Old names:
-AU_DEFUN([AC_ENABLE_FAST_INSTALL],
-[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install])
-AC_DIAGNOSE([obsolete],
-[$0: Remove this warning and the call to _LT_SET_OPTION when you put
-the `fast-install' option into LT_INIT's first parameter.])
-])
-
-AU_DEFUN([AC_DISABLE_FAST_INSTALL],
-[_LT_SET_OPTION([LT_INIT], [disable-fast-install])
-AC_DIAGNOSE([obsolete],
-[$0: Remove this warning and the call to _LT_SET_OPTION when you put
-the `disable-fast-install' option into LT_INIT's first parameter.])
-])
-
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], [])
-dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], [])
-
-
-# _LT_WITH_PIC([MODE])
-# --------------------
-# implement the --with-pic flag, and support the `pic-only' and `no-pic'
-# LT_INIT options.
-# MODE is either `yes' or `no'.  If omitted, it defaults to `both'.
-m4_define([_LT_WITH_PIC],
-[AC_ARG_WITH([pic],
-    [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@],
-	[try to use only PIC/non-PIC objects @<:@default=use both@:>@])],
-    [lt_p=${PACKAGE-default}
-    case $withval in
-    yes|no) pic_mode=$withval ;;
-    *)
-      pic_mode=default
-      # Look at the argument we got.  We use all the common list separators.
-      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
-      for lt_pkg in $withval; do
-	IFS="$lt_save_ifs"
-	if test "X$lt_pkg" = "X$lt_p"; then
-	  pic_mode=yes
-	fi
-      done
-      IFS="$lt_save_ifs"
-      ;;
-    esac],
-    [pic_mode=default])
-
-test -z "$pic_mode" && pic_mode=m4_default([$1], [default])
-
-_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl
-])# _LT_WITH_PIC
-
-LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])])
-LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])])
-
-# Old name:
-AU_DEFUN([AC_LIBTOOL_PICMODE],
-[_LT_SET_OPTION([LT_INIT], [pic-only])
-AC_DIAGNOSE([obsolete],
-[$0: Remove this warning and the call to _LT_SET_OPTION when you
-put the `pic-only' option into LT_INIT's first parameter.])
-])
-
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_LIBTOOL_PICMODE], [])
-
-## ----------------- ##
-## LTDL_INIT Options ##
-## ----------------- ##
-
-m4_define([_LTDL_MODE], [])
-LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive],
-		 [m4_define([_LTDL_MODE], [nonrecursive])])
-LT_OPTION_DEFINE([LTDL_INIT], [recursive],
-		 [m4_define([_LTDL_MODE], [recursive])])
-LT_OPTION_DEFINE([LTDL_INIT], [subproject],
-		 [m4_define([_LTDL_MODE], [subproject])])
-
-m4_define([_LTDL_TYPE], [])
-LT_OPTION_DEFINE([LTDL_INIT], [installable],
-		 [m4_define([_LTDL_TYPE], [installable])])
-LT_OPTION_DEFINE([LTDL_INIT], [convenience],
-		 [m4_define([_LTDL_TYPE], [convenience])])
diff --git a/m4/ltsugar.m4 b/m4/ltsugar.m4
deleted file mode 100644
index 9000a057..00000000
--- a/m4/ltsugar.m4
+++ /dev/null
@@ -1,123 +0,0 @@
-# ltsugar.m4 -- libtool m4 base layer.                         -*-Autoconf-*-
-#
-# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
-# Written by Gary V. Vaughan, 2004
-#
-# This file is free software; the Free Software Foundation gives
-# unlimited permission to copy and/or distribute it, with or without
-# modifications, as long as this notice is preserved.
-
-# serial 6 ltsugar.m4
-
-# This is to help aclocal find these macros, as it can't see m4_define.
-AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])])
-
-
-# lt_join(SEP, ARG1, [ARG2...])
-# -----------------------------
-# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their
-# associated separator.
-# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier
-# versions in m4sugar had bugs.
-m4_define([lt_join],
-[m4_if([$#], [1], [],
-       [$#], [2], [[$2]],
-       [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])])
-m4_define([_lt_join],
-[m4_if([$#$2], [2], [],
-       [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])])
-
-
-# lt_car(LIST)
-# lt_cdr(LIST)
-# ------------
-# Manipulate m4 lists.
-# These macros are necessary as long as will still need to support
-# Autoconf-2.59 which quotes differently.
-m4_define([lt_car], [[$1]])
-m4_define([lt_cdr],
-[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])],
-       [$#], 1, [],
-       [m4_dquote(m4_shift($@))])])
-m4_define([lt_unquote], $1)
-
-
-# lt_append(MACRO-NAME, STRING, [SEPARATOR])
-# ------------------------------------------
-# Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'.
-# Note that neither SEPARATOR nor STRING are expanded; they are appended
-# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked).
-# No SEPARATOR is output if MACRO-NAME was previously undefined (different
-# than defined and empty).
-#
-# This macro is needed until we can rely on Autoconf 2.62, since earlier
-# versions of m4sugar mistakenly expanded SEPARATOR but not STRING.
-m4_define([lt_append],
-[m4_define([$1],
-	   m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])])
-
-
-
-# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...])
-# ----------------------------------------------------------
-# Produce a SEP delimited list of all paired combinations of elements of
-# PREFIX-LIST with SUFFIX1 through SUFFIXn.  Each element of the list
-# has the form PREFIXmINFIXSUFFIXn.
-# Needed until we can rely on m4_combine added in Autoconf 2.62.
-m4_define([lt_combine],
-[m4_if(m4_eval([$# > 3]), [1],
-       [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl
-[[m4_foreach([_Lt_prefix], [$2],
-	     [m4_foreach([_Lt_suffix],
-		]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[,
-	[_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])])
-
-
-# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ])
-# -----------------------------------------------------------------------
-# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited
-# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ.
-m4_define([lt_if_append_uniq],
-[m4_ifdef([$1],
-	  [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1],
-		 [lt_append([$1], [$2], [$3])$4],
-		 [$5])],
-	  [lt_append([$1], [$2], [$3])$4])])
-
-
-# lt_dict_add(DICT, KEY, VALUE)
-# -----------------------------
-m4_define([lt_dict_add],
-[m4_define([$1($2)], [$3])])
-
-
-# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE)
-# --------------------------------------------
-m4_define([lt_dict_add_subkey],
-[m4_define([$1($2:$3)], [$4])])
-
-
-# lt_dict_fetch(DICT, KEY, [SUBKEY])
-# ----------------------------------
-m4_define([lt_dict_fetch],
-[m4_ifval([$3],
-	m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]),
-    m4_ifdef([$1($2)], [m4_defn([$1($2)])]))])
-
-
-# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE])
-# -----------------------------------------------------------------
-m4_define([lt_if_dict_fetch],
-[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4],
-	[$5],
-    [$6])])
-
-
-# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...])
-# --------------------------------------------------------------
-m4_define([lt_dict_filter],
-[m4_if([$5], [], [],
-  [lt_join(m4_quote(m4_default([$4], [[, ]])),
-           lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]),
-		      [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl
-])
diff --git a/m4/ltversion.m4 b/m4/ltversion.m4
deleted file mode 100644
index 07a8602d..00000000
--- a/m4/ltversion.m4
+++ /dev/null
@@ -1,23 +0,0 @@
-# ltversion.m4 -- version numbers			-*- Autoconf -*-
-#
-#   Copyright (C) 2004 Free Software Foundation, Inc.
-#   Written by Scott James Remnant, 2004
-#
-# This file is free software; the Free Software Foundation gives
-# unlimited permission to copy and/or distribute it, with or without
-# modifications, as long as this notice is preserved.
-
-# @configure_input@
-
-# serial 3337 ltversion.m4
-# This file is part of GNU Libtool
-
-m4_define([LT_PACKAGE_VERSION], [2.4.2])
-m4_define([LT_PACKAGE_REVISION], [1.3337])
-
-AC_DEFUN([LTVERSION_VERSION],
-[macro_version='2.4.2'
-macro_revision='1.3337'
-_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
-_LT_DECL(, macro_revision, 0)
-])
diff --git a/m4/lt~obsolete.m4 b/m4/lt~obsolete.m4
deleted file mode 100644
index c573da90..00000000
--- a/m4/lt~obsolete.m4
+++ /dev/null
@@ -1,98 +0,0 @@
-# lt~obsolete.m4 -- aclocal satisfying obsolete definitions.    -*-Autoconf-*-
-#
-#   Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
-#   Written by Scott James Remnant, 2004.
-#
-# This file is free software; the Free Software Foundation gives
-# unlimited permission to copy and/or distribute it, with or without
-# modifications, as long as this notice is preserved.
-
-# serial 5 lt~obsolete.m4
-
-# These exist entirely to fool aclocal when bootstrapping libtool.
-#
-# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN)
-# which have later been changed to m4_define as they aren't part of the
-# exported API, or moved to Autoconf or Automake where they belong.
-#
-# The trouble is, aclocal is a bit thick.  It'll see the old AC_DEFUN
-# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us
-# using a macro with the same name in our local m4/libtool.m4 it'll
-# pull the old libtool.m4 in (it doesn't see our shiny new m4_define
-# and doesn't know about Autoconf macros at all.)
-#
-# So we provide this file, which has a silly filename so it's always
-# included after everything else.  This provides aclocal with the
-# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything
-# because those macros already exist, or will be overwritten later.
-# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. 
-#
-# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here.
-# Yes, that means every name once taken will need to remain here until
-# we give up compatibility with versions before 1.7, at which point
-# we need to keep only those names which we still refer to.
-
-# This is to help aclocal find these macros, as it can't see m4_define.
-AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])])
-
-m4_ifndef([AC_LIBTOOL_LINKER_OPTION],	[AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])])
-m4_ifndef([AC_PROG_EGREP],		[AC_DEFUN([AC_PROG_EGREP])])
-m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH],	[AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])])
-m4_ifndef([_LT_AC_SHELL_INIT],		[AC_DEFUN([_LT_AC_SHELL_INIT])])
-m4_ifndef([_LT_AC_SYS_LIBPATH_AIX],	[AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])])
-m4_ifndef([_LT_PROG_LTMAIN],		[AC_DEFUN([_LT_PROG_LTMAIN])])
-m4_ifndef([_LT_AC_TAGVAR],		[AC_DEFUN([_LT_AC_TAGVAR])])
-m4_ifndef([AC_LTDL_ENABLE_INSTALL],	[AC_DEFUN([AC_LTDL_ENABLE_INSTALL])])
-m4_ifndef([AC_LTDL_PREOPEN],		[AC_DEFUN([AC_LTDL_PREOPEN])])
-m4_ifndef([_LT_AC_SYS_COMPILER],	[AC_DEFUN([_LT_AC_SYS_COMPILER])])
-m4_ifndef([_LT_AC_LOCK],		[AC_DEFUN([_LT_AC_LOCK])])
-m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE],	[AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])])
-m4_ifndef([_LT_AC_TRY_DLOPEN_SELF],	[AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])])
-m4_ifndef([AC_LIBTOOL_PROG_CC_C_O],	[AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])])
-m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])])
-m4_ifndef([AC_LIBTOOL_OBJDIR],		[AC_DEFUN([AC_LIBTOOL_OBJDIR])])
-m4_ifndef([AC_LTDL_OBJDIR],		[AC_DEFUN([AC_LTDL_OBJDIR])])
-m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])])
-m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP],	[AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])])
-m4_ifndef([AC_PATH_MAGIC],		[AC_DEFUN([AC_PATH_MAGIC])])
-m4_ifndef([AC_PROG_LD_GNU],		[AC_DEFUN([AC_PROG_LD_GNU])])
-m4_ifndef([AC_PROG_LD_RELOAD_FLAG],	[AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])])
-m4_ifndef([AC_DEPLIBS_CHECK_METHOD],	[AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])])
-m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])])
-m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])])
-m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])])
-m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS],	[AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])])
-m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP],	[AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])])
-m4_ifndef([LT_AC_PROG_EGREP],		[AC_DEFUN([LT_AC_PROG_EGREP])])
-m4_ifndef([LT_AC_PROG_SED],		[AC_DEFUN([LT_AC_PROG_SED])])
-m4_ifndef([_LT_CC_BASENAME],		[AC_DEFUN([_LT_CC_BASENAME])])
-m4_ifndef([_LT_COMPILER_BOILERPLATE],	[AC_DEFUN([_LT_COMPILER_BOILERPLATE])])
-m4_ifndef([_LT_LINKER_BOILERPLATE],	[AC_DEFUN([_LT_LINKER_BOILERPLATE])])
-m4_ifndef([_AC_PROG_LIBTOOL],		[AC_DEFUN([_AC_PROG_LIBTOOL])])
-m4_ifndef([AC_LIBTOOL_SETUP],		[AC_DEFUN([AC_LIBTOOL_SETUP])])
-m4_ifndef([_LT_AC_CHECK_DLFCN],		[AC_DEFUN([_LT_AC_CHECK_DLFCN])])
-m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER],	[AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])])
-m4_ifndef([_LT_AC_TAGCONFIG],		[AC_DEFUN([_LT_AC_TAGCONFIG])])
-m4_ifndef([AC_DISABLE_FAST_INSTALL],	[AC_DEFUN([AC_DISABLE_FAST_INSTALL])])
-m4_ifndef([_LT_AC_LANG_CXX],		[AC_DEFUN([_LT_AC_LANG_CXX])])
-m4_ifndef([_LT_AC_LANG_F77],		[AC_DEFUN([_LT_AC_LANG_F77])])
-m4_ifndef([_LT_AC_LANG_GCJ],		[AC_DEFUN([_LT_AC_LANG_GCJ])])
-m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])])
-m4_ifndef([_LT_AC_LANG_C_CONFIG],	[AC_DEFUN([_LT_AC_LANG_C_CONFIG])])
-m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])])
-m4_ifndef([_LT_AC_LANG_CXX_CONFIG],	[AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])])
-m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])])
-m4_ifndef([_LT_AC_LANG_F77_CONFIG],	[AC_DEFUN([_LT_AC_LANG_F77_CONFIG])])
-m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])])
-m4_ifndef([_LT_AC_LANG_GCJ_CONFIG],	[AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])])
-m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])])
-m4_ifndef([_LT_AC_LANG_RC_CONFIG],	[AC_DEFUN([_LT_AC_LANG_RC_CONFIG])])
-m4_ifndef([AC_LIBTOOL_CONFIG],		[AC_DEFUN([AC_LIBTOOL_CONFIG])])
-m4_ifndef([_LT_AC_FILE_LTDLL_C],	[AC_DEFUN([_LT_AC_FILE_LTDLL_C])])
-m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS],	[AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])])
-m4_ifndef([_LT_AC_PROG_CXXCPP],		[AC_DEFUN([_LT_AC_PROG_CXXCPP])])
-m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS],	[AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])])
-m4_ifndef([_LT_PROG_ECHO_BACKSLASH],	[AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])])
-m4_ifndef([_LT_PROG_F77],		[AC_DEFUN([_LT_PROG_F77])])
-m4_ifndef([_LT_PROG_FC],		[AC_DEFUN([_LT_PROG_FC])])
-m4_ifndef([_LT_PROG_CXX],		[AC_DEFUN([_LT_PROG_CXX])])
diff --git a/m4/m4_ax_boost_base.m4 b/m4/m4_ax_boost_base.m4
deleted file mode 100644
index 2c789eae..00000000
--- a/m4/m4_ax_boost_base.m4
+++ /dev/null
@@ -1,301 +0,0 @@
-# ===========================================================================
-#      https://www.gnu.org/software/autoconf-archive/ax_boost_base.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-#   AX_BOOST_BASE([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
-#
-# DESCRIPTION
-#
-#   Test for the Boost C++ libraries of a particular version (or newer)
-#
-#   If no path to the installed boost library is given the macro searchs
-#   under /usr, /usr/local, /opt and /opt/local and evaluates the
-#   $BOOST_ROOT environment variable. Further documentation is available at
-#   <http://randspringer.de/boost/index.html>.
-#
-#   This macro calls:
-#
-#     AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS)
-#
-#   And sets:
-#
-#     HAVE_BOOST
-#
-# LICENSE
-#
-#   Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de>
-#   Copyright (c) 2009 Peter Adolphs
-#
-#   Copying and distribution of this file, with or without modification, are
-#   permitted in any medium without royalty provided the copyright notice
-#   and this notice are preserved. This file is offered as-is, without any
-#   warranty.
-
-#serial 44
-
-# example boost program (need to pass version)
-m4_define([_AX_BOOST_BASE_PROGRAM],
-          [AC_LANG_PROGRAM([[
-#include <boost/version.hpp>
-]],[[
-(void) ((void)sizeof(char[1 - 2*!!((BOOST_VERSION) < ($1))]));
-]])])
-
-AC_DEFUN([AX_BOOST_BASE],
-[
-AC_ARG_WITH([boost],
-  [AS_HELP_STRING([--with-boost@<:@=ARG@:>@],
-    [use Boost library from a standard location (ARG=yes),
-     from the specified location (ARG=<path>),
-     or disable it (ARG=no)
-     @<:@ARG=yes@:>@ ])],
-    [
-     AS_CASE([$withval],
-       [no],[want_boost="no";_AX_BOOST_BASE_boost_path=""],
-       [yes],[want_boost="yes";_AX_BOOST_BASE_boost_path=""],
-       [want_boost="yes";_AX_BOOST_BASE_boost_path="$withval"])
-    ],
-    [want_boost="yes"])
-
-
-AC_ARG_WITH([boost-libdir],
-  [AS_HELP_STRING([--with-boost-libdir=LIB_DIR],
-    [Force given directory for boost libraries.
-     Note that this will override library path detection,
-     so use this parameter only if default library detection fails
-     and you know exactly where your boost libraries are located.])],
-  [
-   AS_IF([test -d "$withval"],
-         [_AX_BOOST_BASE_boost_lib_path="$withval"],
-    [AC_MSG_ERROR([--with-boost-libdir expected directory name])])
-  ],
-  [_AX_BOOST_BASE_boost_lib_path=""])
-
-BOOST_LDFLAGS=""
-BOOST_CPPFLAGS=""
-AS_IF([test "x$want_boost" = "xyes"],
-      [_AX_BOOST_BASE_RUNDETECT([$1],[$2],[$3])])
-AC_SUBST(BOOST_CPPFLAGS)
-AC_SUBST(BOOST_LDFLAGS)
-])
-
-
-# convert a version string in $2 to numeric and affect to polymorphic var $1
-AC_DEFUN([_AX_BOOST_BASE_TONUMERICVERSION],[
-  AS_IF([test "x$2" = "x"],[_AX_BOOST_BASE_TONUMERICVERSION_req="1.20.0"],[_AX_BOOST_BASE_TONUMERICVERSION_req="$2"])
-  _AX_BOOST_BASE_TONUMERICVERSION_req_shorten=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req : '\([[0-9]]*\.[[0-9]]*\)'`
-  _AX_BOOST_BASE_TONUMERICVERSION_req_major=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req : '\([[0-9]]*\)'`
-  AS_IF([test "x$_AX_BOOST_BASE_TONUMERICVERSION_req_major" = "x"],
-        [AC_MSG_ERROR([You should at least specify libboost major version])])
-  _AX_BOOST_BASE_TONUMERICVERSION_req_minor=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req : '[[0-9]]*\.\([[0-9]]*\)'`
-  AS_IF([test "x$_AX_BOOST_BASE_TONUMERICVERSION_req_minor" = "x"],
-        [_AX_BOOST_BASE_TONUMERICVERSION_req_minor="0"])
-  _AX_BOOST_BASE_TONUMERICVERSION_req_sub_minor=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'`
-  AS_IF([test "X$_AX_BOOST_BASE_TONUMERICVERSION_req_sub_minor" = "X"],
-        [_AX_BOOST_BASE_TONUMERICVERSION_req_sub_minor="0"])
-  _AX_BOOST_BASE_TONUMERICVERSION_RET=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req_major \* 100000 \+  $_AX_BOOST_BASE_TONUMERICVERSION_req_minor \* 100 \+ $_AX_BOOST_BASE_TONUMERICVERSION_req_sub_minor`
-  AS_VAR_SET($1,$_AX_BOOST_BASE_TONUMERICVERSION_RET)
-])
-
-dnl Run the detection of boost should be run only if $want_boost
-AC_DEFUN([_AX_BOOST_BASE_RUNDETECT],[
-    _AX_BOOST_BASE_TONUMERICVERSION(WANT_BOOST_VERSION,[$1])
-    succeeded=no
-
-
-    AC_REQUIRE([AC_CANONICAL_HOST])
-    dnl On 64-bit systems check for system libraries in both lib64 and lib.
-    dnl The former is specified by FHS, but e.g. Debian does not adhere to
-    dnl this (as it rises problems for generic multi-arch support).
-    dnl The last entry in the list is chosen by default when no libraries
-    dnl are found, e.g. when only header-only libraries are installed!
-    AS_CASE([${host_cpu}],
-      [x86_64],[libsubdirs="lib64 libx32 lib lib64"],
-      [ppc64|s390x|sparc64|aarch64|ppc64le|riscv64],[libsubdirs="lib64 lib lib64"],
-      [libsubdirs="lib"]
-    )
-
-    dnl allow for real multi-arch paths e.g. /usr/lib/x86_64-linux-gnu. Give
-    dnl them priority over the other paths since, if libs are found there, they
-    dnl are almost assuredly the ones desired.
-    AS_CASE([${host_cpu}],
-      [i?86],[multiarch_libsubdir="lib/i386-${host_os}"],
-      [multiarch_libsubdir="lib/${host_cpu}-${host_os}"]
-    )
-
-    dnl first we check the system location for boost libraries
-    dnl this location ist chosen if boost libraries are installed with the --layout=system option
-    dnl or if you install boost with RPM
-    AS_IF([test "x$_AX_BOOST_BASE_boost_path" != "x"],[
-        AC_MSG_CHECKING([for boostlib >= $1 ($WANT_BOOST_VERSION) includes in "$_AX_BOOST_BASE_boost_path/include"])
-         AS_IF([test -d "$_AX_BOOST_BASE_boost_path/include" && test -r "$_AX_BOOST_BASE_boost_path/include"],[
-           AC_MSG_RESULT([yes])
-           BOOST_CPPFLAGS="-I$_AX_BOOST_BASE_boost_path/include"
-           for _AX_BOOST_BASE_boost_path_tmp in $multiarch_libsubdir $libsubdirs; do
-                AC_MSG_CHECKING([for boostlib >= $1 ($WANT_BOOST_VERSION) lib path in "$_AX_BOOST_BASE_boost_path/$_AX_BOOST_BASE_boost_path_tmp"])
-                AS_IF([test -d "$_AX_BOOST_BASE_boost_path/$_AX_BOOST_BASE_boost_path_tmp" && test -r "$_AX_BOOST_BASE_boost_path/$_AX_BOOST_BASE_boost_path_tmp" ],[
-                        AC_MSG_RESULT([yes])
-                        BOOST_LDFLAGS="-L$_AX_BOOST_BASE_boost_path/$_AX_BOOST_BASE_boost_path_tmp";
-                        break;
-                ],
-      [AC_MSG_RESULT([no])])
-           done],[
-      AC_MSG_RESULT([no])])
-    ],[
-        if test X"$cross_compiling" = Xyes; then
-            search_libsubdirs=$multiarch_libsubdir
-        else
-            search_libsubdirs="$multiarch_libsubdir $libsubdirs"
-        fi
-        for _AX_BOOST_BASE_boost_path_tmp in /usr /usr/local /opt /opt/local ; do
-            if test -d "$_AX_BOOST_BASE_boost_path_tmp/include/boost" && test -r "$_AX_BOOST_BASE_boost_path_tmp/include/boost" ; then
-                for libsubdir in $search_libsubdirs ; do
-                    if ls "$_AX_BOOST_BASE_boost_path_tmp/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi
-                done
-                BOOST_LDFLAGS="-L$_AX_BOOST_BASE_boost_path_tmp/$libsubdir"
-                BOOST_CPPFLAGS="-I$_AX_BOOST_BASE_boost_path_tmp/include"
-                break;
-            fi
-        done
-    ])
-
-    dnl overwrite ld flags if we have required special directory with
-    dnl --with-boost-libdir parameter
-    AS_IF([test "x$_AX_BOOST_BASE_boost_lib_path" != "x"],
-          [BOOST_LDFLAGS="-L$_AX_BOOST_BASE_boost_lib_path"])
-
-    AC_MSG_CHECKING([for boostlib >= $1 ($WANT_BOOST_VERSION)])
-    CPPFLAGS_SAVED="$CPPFLAGS"
-    CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
-    export CPPFLAGS
-
-    LDFLAGS_SAVED="$LDFLAGS"
-    LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
-    export LDFLAGS
-
-    AC_REQUIRE([AC_PROG_CXX])
-    AC_LANG_PUSH(C++)
-        AC_COMPILE_IFELSE([_AX_BOOST_BASE_PROGRAM($WANT_BOOST_VERSION)],[
-        AC_MSG_RESULT(yes)
-    succeeded=yes
-    found_system=yes
-        ],[
-        ])
-    AC_LANG_POP([C++])
-
-
-
-    dnl if we found no boost with system layout we search for boost libraries
-    dnl built and installed without the --layout=system option or for a staged(not installed) version
-    if test "x$succeeded" != "xyes" ; then
-        CPPFLAGS="$CPPFLAGS_SAVED"
-        LDFLAGS="$LDFLAGS_SAVED"
-        BOOST_CPPFLAGS=
-        if test -z "$_AX_BOOST_BASE_boost_lib_path" ; then
-            BOOST_LDFLAGS=
-        fi
-        _version=0
-        if test -n "$_AX_BOOST_BASE_boost_path" ; then
-            if test -d "$_AX_BOOST_BASE_boost_path" && test -r "$_AX_BOOST_BASE_boost_path"; then
-                for i in `ls -d $_AX_BOOST_BASE_boost_path/include/boost-* 2>/dev/null`; do
-                    _version_tmp=`echo $i | sed "s#$_AX_BOOST_BASE_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'`
-                    V_CHECK=`expr $_version_tmp \> $_version`
-                    if test "x$V_CHECK" = "x1" ; then
-                        _version=$_version_tmp
-                    fi
-                    VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
-                    BOOST_CPPFLAGS="-I$_AX_BOOST_BASE_boost_path/include/boost-$VERSION_UNDERSCORE"
-                done
-                dnl if nothing found search for layout used in Windows distributions
-                if test -z "$BOOST_CPPFLAGS"; then
-                    if test -d "$_AX_BOOST_BASE_boost_path/boost" && test -r "$_AX_BOOST_BASE_boost_path/boost"; then
-                        BOOST_CPPFLAGS="-I$_AX_BOOST_BASE_boost_path"
-                    fi
-                fi
-                dnl if we found something and BOOST_LDFLAGS was unset before
-                dnl (because "$_AX_BOOST_BASE_boost_lib_path" = ""), set it here.
-                if test -n "$BOOST_CPPFLAGS" && test -z "$BOOST_LDFLAGS"; then
-                    for libsubdir in $libsubdirs ; do
-                        if ls "$_AX_BOOST_BASE_boost_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi
-                    done
-                    BOOST_LDFLAGS="-L$_AX_BOOST_BASE_boost_path/$libsubdir"
-                fi
-            fi
-        else
-            if test "x$cross_compiling" != "xyes" ; then
-                for _AX_BOOST_BASE_boost_path in /usr /usr/local /opt /opt/local ; do
-                    if test -d "$_AX_BOOST_BASE_boost_path" && test -r "$_AX_BOOST_BASE_boost_path" ; then
-                        for i in `ls -d $_AX_BOOST_BASE_boost_path/include/boost-* 2>/dev/null`; do
-                            _version_tmp=`echo $i | sed "s#$_AX_BOOST_BASE_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'`
-                            V_CHECK=`expr $_version_tmp \> $_version`
-                            if test "x$V_CHECK" = "x1" ; then
-                                _version=$_version_tmp
-                                best_path=$_AX_BOOST_BASE_boost_path
-                            fi
-                        done
-                    fi
-                done
-
-                VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
-                BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE"
-                if test -z "$_AX_BOOST_BASE_boost_lib_path" ; then
-                    for libsubdir in $libsubdirs ; do
-                        if ls "$best_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi
-                    done
-                    BOOST_LDFLAGS="-L$best_path/$libsubdir"
-                fi
-            fi
-
-            if test -n "$BOOST_ROOT" ; then
-                for libsubdir in $libsubdirs ; do
-                    if ls "$BOOST_ROOT/stage/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi
-                done
-                if test -d "$BOOST_ROOT" && test -r "$BOOST_ROOT" && test -d "$BOOST_ROOT/stage/$libsubdir" && test -r "$BOOST_ROOT/stage/$libsubdir"; then
-                    version_dir=`expr //$BOOST_ROOT : '.*/\(.*\)'`
-                    stage_version=`echo $version_dir | sed 's/boost_//' | sed 's/_/./g'`
-                        stage_version_shorten=`expr $stage_version : '\([[0-9]]*\.[[0-9]]*\)'`
-                    V_CHECK=`expr $stage_version_shorten \>\= $_version`
-                    if test "x$V_CHECK" = "x1" && test -z "$_AX_BOOST_BASE_boost_lib_path" ; then
-                        AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT)
-                        BOOST_CPPFLAGS="-I$BOOST_ROOT"
-                        BOOST_LDFLAGS="-L$BOOST_ROOT/stage/$libsubdir"
-                    fi
-                fi
-            fi
-        fi
-
-        CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
-        export CPPFLAGS
-        LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
-        export LDFLAGS
-
-        AC_LANG_PUSH(C++)
-            AC_COMPILE_IFELSE([_AX_BOOST_BASE_PROGRAM($WANT_BOOST_VERSION)],[
-            AC_MSG_RESULT(yes)
-        succeeded=yes
-        found_system=yes
-            ],[
-            ])
-        AC_LANG_POP([C++])
-    fi
-
-    if test "x$succeeded" != "xyes" ; then
-        if test "x$_version" = "x0" ; then
-            AC_MSG_NOTICE([[We could not detect the boost libraries (version $1 or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option.  If you are sure you have boost installed, then check your version number looking in <boost/version.hpp>. See http://randspringer.de/boost for more documentation.]])
-        else
-            AC_MSG_NOTICE([Your boost libraries seems to old (version $_version).])
-        fi
-        # execute ACTION-IF-NOT-FOUND (if present):
-        ifelse([$3], , :, [$3])
-    else
-        AC_DEFINE(HAVE_BOOST,,[define if the Boost library is available])
-        # execute ACTION-IF-FOUND (if present):
-        ifelse([$2], , :, [$2])
-    fi
-
-    CPPFLAGS="$CPPFLAGS_SAVED"
-    LDFLAGS="$LDFLAGS_SAVED"
-
-])
diff --git a/m4/m4_ax_boost_filesystem.m4 b/m4/m4_ax_boost_filesystem.m4
deleted file mode 100644
index c392f9d6..00000000
--- a/m4/m4_ax_boost_filesystem.m4
+++ /dev/null
@@ -1,118 +0,0 @@
-# ===========================================================================
-#   https://www.gnu.org/software/autoconf-archive/ax_boost_filesystem.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-#   AX_BOOST_FILESYSTEM
-#
-# DESCRIPTION
-#
-#   Test for Filesystem library from the Boost C++ libraries. The macro
-#   requires a preceding call to AX_BOOST_BASE. Further documentation is
-#   available at <http://randspringer.de/boost/index.html>.
-#
-#   This macro calls:
-#
-#     AC_SUBST(BOOST_FILESYSTEM_LIB)
-#
-#   And sets:
-#
-#     HAVE_BOOST_FILESYSTEM
-#
-# LICENSE
-#
-#   Copyright (c) 2009 Thomas Porschberg <thomas@randspringer.de>
-#   Copyright (c) 2009 Michael Tindal
-#   Copyright (c) 2009 Roman Rybalko <libtorrent@romanr.info>
-#
-#   Copying and distribution of this file, with or without modification, are
-#   permitted in any medium without royalty provided the copyright notice
-#   and this notice are preserved. This file is offered as-is, without any
-#   warranty.
-
-#serial 27
-
-AC_DEFUN([AX_BOOST_FILESYSTEM],
-[
-	AC_ARG_WITH([boost-filesystem],
-	AS_HELP_STRING([--with-boost-filesystem@<:@=special-lib@:>@],
-                   [use the Filesystem library from boost - it is possible to specify a certain library for the linker
-                        e.g. --with-boost-filesystem=boost_filesystem-gcc-mt ]),
-        [
-        if test "$withval" = "no"; then
-			want_boost="no"
-        elif test "$withval" = "yes"; then
-            want_boost="yes"
-            ax_boost_user_filesystem_lib=""
-        else
-		    want_boost="yes"
-		ax_boost_user_filesystem_lib="$withval"
-		fi
-        ],
-        [want_boost="yes"]
-	)
-
-	if test "x$want_boost" = "xyes"; then
-        AC_REQUIRE([AC_PROG_CC])
-		CPPFLAGS_SAVED="$CPPFLAGS"
-		CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
-		export CPPFLAGS
-
-		LDFLAGS_SAVED="$LDFLAGS"
-		LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
-		export LDFLAGS
-
-		LIBS_SAVED=$LIBS
-		LIBS="$LIBS $BOOST_SYSTEM_LIB"
-		export LIBS
-
-        AC_CACHE_CHECK(whether the Boost::Filesystem library is available,
-					   ax_cv_boost_filesystem,
-        [AC_LANG_PUSH([C++])
-         AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include <boost/filesystem/path.hpp>]],
-                                   [[using namespace boost::filesystem;
-                                   path my_path( "foo/bar/data.txt" );
-                                   return 0;]])],
-					       ax_cv_boost_filesystem=yes, ax_cv_boost_filesystem=no)
-         AC_LANG_POP([C++])
-		])
-		if test "x$ax_cv_boost_filesystem" = "xyes"; then
-			AC_DEFINE(HAVE_BOOST_FILESYSTEM,,[define if the Boost::Filesystem library is available])
-            BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'`
-            if test "x$ax_boost_user_filesystem_lib" = "x"; then
-                for libextension in `ls -r $BOOSTLIBDIR/libboost_filesystem* 2>/dev/null | sed 's,.*/lib,,' | sed 's,\..*,,'` ; do
-                     ax_lib=${libextension}
-				    AC_CHECK_LIB($ax_lib, exit,
-                                 [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break],
-                                 [link_filesystem="no"])
-				done
-                if test "x$link_filesystem" != "xyes"; then
-                for libextension in `ls -r $BOOSTLIBDIR/boost_filesystem* 2>/dev/null | sed 's,.*/,,' | sed -e 's,\..*,,'` ; do
-                     ax_lib=${libextension}
-				    AC_CHECK_LIB($ax_lib, exit,
-                                 [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break],
-                                 [link_filesystem="no"])
-				done
-		    fi
-            else
-               for ax_lib in $ax_boost_user_filesystem_lib boost_filesystem-$ax_boost_user_filesystem_lib; do
-				      AC_CHECK_LIB($ax_lib, exit,
-                                   [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break],
-                                   [link_filesystem="no"])
-                  done
-
-            fi
-            if test "x$ax_lib" = "x"; then
-                AC_MSG_ERROR(Could not find a version of the library!)
-            fi
-			if test "x$link_filesystem" != "xyes"; then
-				AC_MSG_ERROR(Could not link against $ax_lib !)
-			fi
-		fi
-
-		CPPFLAGS="$CPPFLAGS_SAVED"
-		LDFLAGS="$LDFLAGS_SAVED"
-		LIBS="$LIBS_SAVED"
-	fi
-])
diff --git a/m4/m4_ax_boost_system.m4 b/m4/m4_ax_boost_system.m4
deleted file mode 100644
index 207d7be8..00000000
--- a/m4/m4_ax_boost_system.m4
+++ /dev/null
@@ -1,121 +0,0 @@
-# ===========================================================================
-#     https://www.gnu.org/software/autoconf-archive/ax_boost_system.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-#   AX_BOOST_SYSTEM
-#
-# DESCRIPTION
-#
-#   Test for System library from the Boost C++ libraries. The macro requires
-#   a preceding call to AX_BOOST_BASE. Further documentation is available at
-#   <http://randspringer.de/boost/index.html>.
-#
-#   This macro calls:
-#
-#     AC_SUBST(BOOST_SYSTEM_LIB)
-#
-#   And sets:
-#
-#     HAVE_BOOST_SYSTEM
-#
-# LICENSE
-#
-#   Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de>
-#   Copyright (c) 2008 Michael Tindal
-#   Copyright (c) 2008 Daniel Casimiro <dan.casimiro@gmail.com>
-#
-#   Copying and distribution of this file, with or without modification, are
-#   permitted in any medium without royalty provided the copyright notice
-#   and this notice are preserved. This file is offered as-is, without any
-#   warranty.
-
-#serial 19
-
-AC_DEFUN([AX_BOOST_SYSTEM],
-[
-	AC_ARG_WITH([boost-system],
-	AS_HELP_STRING([--with-boost-system@<:@=special-lib@:>@],
-                   [use the System library from boost - it is possible to specify a certain library for the linker
-                        e.g. --with-boost-system=boost_system-gcc-mt ]),
-        [
-        if test "$withval" = "no"; then
-			want_boost="no"
-        elif test "$withval" = "yes"; then
-            want_boost="yes"
-            ax_boost_user_system_lib=""
-        else
-		    want_boost="yes"
-		ax_boost_user_system_lib="$withval"
-		fi
-        ],
-        [want_boost="yes"]
-	)
-
-	if test "x$want_boost" = "xyes"; then
-        AC_REQUIRE([AC_PROG_CC])
-        AC_REQUIRE([AC_CANONICAL_BUILD])
-		CPPFLAGS_SAVED="$CPPFLAGS"
-		CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
-		export CPPFLAGS
-
-		LDFLAGS_SAVED="$LDFLAGS"
-		LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
-		export LDFLAGS
-
-        AC_CACHE_CHECK(whether the Boost::System library is available,
-					   ax_cv_boost_system,
-        [AC_LANG_PUSH([C++])
-			 CXXFLAGS_SAVE=$CXXFLAGS
-			 CXXFLAGS=
-
-			 AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include <boost/system/error_code.hpp>]],
-				    [[boost::system::error_category *a = 0;]])],
-                   ax_cv_boost_system=yes, ax_cv_boost_system=no)
-			 CXXFLAGS=$CXXFLAGS_SAVE
-             AC_LANG_POP([C++])
-		])
-		if test "x$ax_cv_boost_system" = "xyes"; then
-			AC_SUBST(BOOST_CPPFLAGS)
-
-			AC_DEFINE(HAVE_BOOST_SYSTEM,,[define if the Boost::System library is available])
-            BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'`
-
-			LDFLAGS_SAVE=$LDFLAGS
-            if test "x$ax_boost_user_system_lib" = "x"; then
-                for libextension in `ls -r $BOOSTLIBDIR/libboost_system* 2>/dev/null | sed 's,.*/lib,,' | sed 's,\..*,,'` ; do
-                     ax_lib=${libextension}
-				    AC_CHECK_LIB($ax_lib, exit,
-                                 [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break],
-                                 [link_system="no"])
-				done
-                if test "x$link_system" != "xyes"; then
-                for libextension in `ls -r $BOOSTLIBDIR/boost_system* 2>/dev/null | sed 's,.*/,,' | sed -e 's,\..*,,'` ; do
-                     ax_lib=${libextension}
-				    AC_CHECK_LIB($ax_lib, exit,
-                                 [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break],
-                                 [link_system="no"])
-				done
-                fi
-
-            else
-               for ax_lib in $ax_boost_user_system_lib boost_system-$ax_boost_user_system_lib; do
-				      AC_CHECK_LIB($ax_lib, exit,
-                                   [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break],
-                                   [link_system="no"])
-                  done
-
-            fi
-            if test "x$ax_lib" = "x"; then
-                AC_MSG_ERROR(Could not find a version of the library!)
-            fi
-			if test "x$link_system" = "xno"; then
-				AC_MSG_ERROR(Could not link against $ax_lib !)
-			fi
-		fi
-
-		CPPFLAGS="$CPPFLAGS_SAVED"
-	LDFLAGS="$LDFLAGS_SAVED"
-	fi
-])
diff --git a/m4/pkg.m4 b/m4/pkg.m4
deleted file mode 100644
index 62995f01..00000000
--- a/m4/pkg.m4
+++ /dev/null
@@ -1,233 +0,0 @@
-# pkg.m4 - Macros to locate and utilise pkg-config.            -*- Autoconf -*-
-# serial 1 (pkg-config-0.24)
-# 
-# Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-# PKG_PROG_PKG_CONFIG([MIN-VERSION])
-# ----------------------------------
-AC_DEFUN([PKG_PROG_PKG_CONFIG],
-[m4_pattern_forbid([^_?PKG_[A-Z_]+$])
-m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$])
-m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$])
-AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])
-AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path])
-AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path])
-
-if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
-	AC_PATH_TOOL([PKG_CONFIG], [pkg-config])
-fi
-if test -n "$PKG_CONFIG"; then
-	_pkg_min_version=m4_default([$1], [0.9.0])
-	AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version])
-	if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
-		AC_MSG_RESULT([yes])
-	else
-		AC_MSG_RESULT([no])
-		PKG_CONFIG=""
-	fi
-fi[]dnl
-])# PKG_PROG_PKG_CONFIG
-
-# PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
-#
-# Check to see whether a particular set of modules exists.  Similar
-# to PKG_CHECK_MODULES(), but does not set variables or print errors.
-#
-# Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG])
-# only at the first occurence in configure.ac, so if the first place
-# it's called might be skipped (such as if it is within an "if", you
-# have to call PKG_CHECK_EXISTS manually
-# --------------------------------------------------------------
-AC_DEFUN([PKG_CHECK_EXISTS],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
-if test -n "$PKG_CONFIG" && \
-    AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then
-  m4_default([$2], [:])
-m4_ifvaln([$3], [else
-  $3])dnl
-fi])
-
-# _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES])
-# ---------------------------------------------
-m4_define([_PKG_CONFIG],
-[if test -n "$$1"; then
-    pkg_cv_[]$1="$$1"
- elif test -n "$PKG_CONFIG"; then
-    PKG_CHECK_EXISTS([$3],
-                     [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`
-		      test "x$?" != "x0" && pkg_failed=yes ],
-		     [pkg_failed=yes])
- else
-    pkg_failed=untried
-fi[]dnl
-])# _PKG_CONFIG
-
-# _PKG_SHORT_ERRORS_SUPPORTED
-# -----------------------------
-AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])
-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
-        _pkg_short_errors_supported=yes
-else
-        _pkg_short_errors_supported=no
-fi[]dnl
-])# _PKG_SHORT_ERRORS_SUPPORTED
-
-
-# PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
-# [ACTION-IF-NOT-FOUND])
-#
-#
-# Note that if there is a possibility the first call to
-# PKG_CHECK_MODULES might not happen, you should be sure to include an
-# explicit call to PKG_PROG_PKG_CONFIG in your configure.ac
-#
-#
-# --------------------------------------------------------------
-AC_DEFUN([PKG_CHECK_MODULES],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
-AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
-AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl
-
-pkg_failed=no
-AC_MSG_CHECKING([for $1])
-
-_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
-_PKG_CONFIG([$1][_LIBS], [libs], [$2])
-
-m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS
-and $1[]_LIBS to avoid the need to call pkg-config.
-See the pkg-config man page for more details.])
-
-if test $pkg_failed = yes; then
-   	AC_MSG_RESULT([no])
-        _PKG_SHORT_ERRORS_SUPPORTED
-        if test $_pkg_short_errors_supported = yes; then
-	        $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1`
-        else 
-	        $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1`
-        fi
-	# Put the nasty error message in config.log where it belongs
-	echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
-
-	m4_default([$4], [AC_MSG_ERROR(
-[Package requirements ($2) were not met:
-
-$$1_PKG_ERRORS
-
-Consider adjusting the PKG_CONFIG_PATH environment variable if you
-installed software in a non-standard prefix.
-
-_PKG_TEXT])[]dnl
-        ])
-elif test $pkg_failed = untried; then
-     	AC_MSG_RESULT([no])
-	m4_default([$4], [AC_MSG_FAILURE(
-[The pkg-config script could not be found or is too old.  Make sure it
-is in your PATH or set the PKG_CONFIG environment variable to the full
-path to pkg-config.
-
-_PKG_TEXT
-
-To get pkg-config, see <http://pkg-config.freedesktop.org/>.])[]dnl
-        ])
-else
-	$1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
-	$1[]_LIBS=$pkg_cv_[]$1[]_LIBS
-        AC_MSG_RESULT([yes])
-	$3
-fi[]dnl
-])# PKG_CHECK_MODULES
-
-
-# PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
-# [ACTION-IF-NOT-FOUND])
-# ---------------------------------------------------------------------
-# Checks for existence of MODULES and gathers its build flags with
-# static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags
-# and VARIABLE-PREFIX_LIBS from --libs.
-#
-# Note that if there is a possibility the first call to
-# PKG_CHECK_MODULES_STATIC might not happen, you should be sure to include
-# an explicit call to PKG_PROG_PKG_CONFIG in your configure.ac.
-AC_DEFUN([PKG_CHECK_MODULES_STATIC],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
-_save_PKG_CONFIG=$PKG_CONFIG
-PKG_CONFIG="$PKG_CONFIG --static"
-PKG_CHECK_MODULES($@)
-PKG_CONFIG=$_save_PKG_CONFIG[]dnl
-])
-
-
-# PKG_INSTALLDIR(DIRECTORY)
-# -------------------------
-# Substitutes the variable pkgconfigdir as the location where a module
-# should install pkg-config .pc files. By default the directory is
-# $libdir/pkgconfig, but the default can be changed by passing
-# DIRECTORY. The user can override through the --with-pkgconfigdir
-# parameter.
-AC_DEFUN([PKG_INSTALLDIR],
-[m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])])
-m4_pushdef([pkg_description],
-    [pkg-config installation directory @<:@]pkg_default[@:>@])
-AC_ARG_WITH([pkgconfigdir],
-    [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],,
-    [with_pkgconfigdir=]pkg_default)
-AC_SUBST([pkgconfigdir], [$with_pkgconfigdir])
-m4_popdef([pkg_default])
-m4_popdef([pkg_description])
-]) dnl PKG_INSTALLDIR
-
-
-# PKG_NOARCH_INSTALLDIR(DIRECTORY)
-# -------------------------
-# Substitutes the variable noarch_pkgconfigdir as the location where a
-# module should install arch-independent pkg-config .pc files. By
-# default the directory is $datadir/pkgconfig, but the default can be
-# changed by passing DIRECTORY. The user can override through the
-# --with-noarch-pkgconfigdir parameter.
-AC_DEFUN([PKG_NOARCH_INSTALLDIR],
-[m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])])
-m4_pushdef([pkg_description],
-    [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@])
-AC_ARG_WITH([noarch-pkgconfigdir],
-    [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],,
-    [with_noarch_pkgconfigdir=]pkg_default)
-AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir])
-m4_popdef([pkg_default])
-m4_popdef([pkg_description])
-]) dnl PKG_NOARCH_INSTALLDIR
-
-
-# PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE,
-# [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
-# -------------------------------------------
-# Retrieves the value of the pkg-config variable for the given module.
-AC_DEFUN([PKG_CHECK_VAR],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
-AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl
-
-_PKG_CONFIG([$1], [variable="][$3]["], [$2])
-AS_VAR_COPY([$1], [pkg_cv_][$1])
-
-AS_VAR_IF([$1], [""], [$5], [$4])dnl
-])# PKG_CHECK_VAR
diff --git a/maint/codes-net.pc.in b/maint/codes-net.pc.in
deleted file mode 100644
index 9215d369..00000000
--- a/maint/codes-net.pc.in
+++ /dev/null
@@ -1,12 +0,0 @@
-prefix = @prefix@
-exec_prefix = @exec_prefix@
-libdir=@libdir@
-includedir=@includedir@
-
-Name: codes-net
-Description: Network functionality for CODES storage simulation
-Version: @PACKAGE_VERSION@
-URL: https://github.com/codes-org/codes
-Requires: codes-base
-Libs: -L${libdir} -lcodes-net
-Cflags: -I${includedir}
diff --git a/maint/codes.pc.in b/maint/codes.pc.in
deleted file mode 100644
index 97cf7d1d..00000000
--- a/maint/codes.pc.in
+++ /dev/null
@@ -1,32 +0,0 @@
-prefix = @prefix@
-exec_prefix = @exec_prefix@
-libdir=@libdir@
-includedir=@includedir@
-ross_cflags=@ROSS_CFLAGS@
-ross_libs=@ROSS_LIBS@
-darshan_libs=@DARSHAN_LIBS@
-darshan_cflags=@DARSHAN_CFLAGS@
-dumpi_cflags=@DUMPI_CFLAGS@
-dumpi_libs=@DUMPI_LIBS@
-cortex_cflags=@CORTEX_CFLAGS@
-cortex_libs=@CORTEX_LIBS@
-python_cflags=@PYTHON_CFLAGS@
-python_libs=@PYTHON_LIBS@
-boost_cflags=@BOOST_CFLAGS@
-boost_libs=@BOOST_LIBS@
-argobots_libs=@ARGOBOTS_LIBS@
-argobots_cflags=@ARGOBOTS_CFLAGS@
-swm_libs=@SWM_LIBS@
-swm_cflags=@SWM_CFLAGS@
-swm_datarootdir=@SWM_DATAROOTDIR@
-union_libs=@UNION_LIBS@
-union_cflags=@UNION_CFLAGS@
-union_datadir=@UNION_DATADIR@
-
-Name: codes-base
-Description: Base functionality for CODES storage simulation
-Version: @PACKAGE_VERSION@
-URL: https://github.com/codes-org/codes
-Requires:
-Libs: -L${libdir} -lcodes ${ross_libs} ${argobots_libs} ${swm_libs} ${union_libs} ${darshan_libs} ${dumpi_libs} ${cortex_libs}
-Cflags: -I${includedir} -I${swm_datarootdir} ${union_datadir} ${ross_cflags} ${darshan_cflags} ${swm_cflags} ${union_cflags} ${argobots_cflags} ${dumpi_cflags} ${cortex_cflags}
diff --git a/prepare.sh b/prepare.sh
deleted file mode 100755
index 2739136e..00000000
--- a/prepare.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-
-echo "Regenerating build files..."
-autoreconf -fi -Im4

From 3d2b726b0574e1193ab6013569d6ecdefc417f24 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Tue, 22 Jul 2025 12:05:43 -0400
Subject: [PATCH 187/188] Adding some of Neil's and Elkin's contributions from
 the past 5 years

---
 CONTRIBUTORS.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 39e0bebd..7d8dd5b4 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -20,6 +20,8 @@ Contributors to date (with affiliations at time of contribution)
 - Lee Savoie, Univ. of Arizona 
 - Ning Liu, Rensselaer Polytechnic Institute
 - Jason Cope, Argonne National Laboratory
+- Kevin A. Brown, Argonne National Laboratory
+- Elkin Cruz, Rensselaer Polytechnic Institute
 
 Contributions:
 
@@ -40,6 +42,8 @@ Neil McGlohon (RPI)
     - Merged 1-D dragonfly and 2-D dragonfly network models.
     - Updated adaptive routing in megafly and 1-D dragonfly network models. 
     - Extended slim fly network model's dual-rail mode to arbitrary number of rails (pending).
+    - Implemented Quality of Service (QoS) in 1-D dragonfly network.
+    - Implemented changes needed to allow ROSS's tiebreaker mechanism.
 
 Nikhil Jain, Abhinav Bhatele (LLNL)
     - Improvements in credit-based flow control of CODES dragonfly and torus network models.
@@ -78,3 +82,17 @@ Caitlin Ross (RPI):
     - Added instrumentation so that network models can report sampled
       statistics over virtual time (pending).
     - Bug reporter for CODES models.
+
+Elkin Cruz (RPI)
+    - Added network surrogate for 1-D Dragonfly model (dragonfly-dally).
+    - Added application surrogate for MPI replay (model-net-mpi-replay).
+    - Implemented API to allow network and application surrogates to switch as
+      simulation runs (aka, hybrid simulation).
+    - Added network and application level directors, which coordinate data
+      transference between model and predictor.
+    - Added simple averaged-based network and application predictors (they are
+      given simulation data and are in charge of predicting future states of the
+      simulation, skipping computation).
+    - Implemented necessary scaffolding to check for bugs in reversible
+      computation (to be used with SEQUENTIAL_ROLLBACK_CHECK option in ROSS).
+    - Fixed reversible computation bugs on 1-D Dragonfly network.

From ed9edf5a2f5fb48117e0b7f15c1194a788efd0a1 Mon Sep 17 00:00:00 2001
From: helq <linuxero789@gmail.com>
Date: Wed, 23 Jul 2025 10:50:35 -0400
Subject: [PATCH 188/188] Updating compilation script

---
 CODES-compile-instructions.sh | 16 ++++++++++------
 README.md                     |  6 ++----
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/CODES-compile-instructions.sh b/CODES-compile-instructions.sh
index 76d4c6a1..14178c99 100644
--- a/CODES-compile-instructions.sh
+++ b/CODES-compile-instructions.sh
@@ -21,13 +21,12 @@ CUR_DIR="$PWD"
 
 ##### Downloading everything #####
 
-git clone https://github.com/codes-org/codes --branch=develop
-git clone https://github.com/ross-org/ross --depth=100 --branch=develop
+git clone https://github.com/codes-org/codes --depth=100 --branch=v1.5.0
+git clone https://github.com/ross-org/ross --depth=100 --branch=v8.1.0
 
 if [ $swm_enable = 1 ]; then
     git clone https://github.com/pmodels/argobots --depth=1
-    # This version is one commit ahead
-    git clone https://github.com/helq/swm-workloads --branch=total-iterations-communication
+    git clone https://github.com/codes-org/swm-workloads --branch=v1.2
 fi
 
 if [ $union_enable = 1 ]; then
@@ -35,7 +34,8 @@ if [ $union_enable = 1 ]; then
     curl -L https://sourceforge.net/projects/conceptual/files/conceptual/1.5.1b/conceptual-1.5.1b.tar.gz -o conceptual-1.5.1b.tar.gz
     tar xvf conceptual-1.5.1b.tar.gz
     # Downloading union
-    git clone https://github.com/helq/Union --branch=master
+    git clone https://github.com/SPEAR-UIC/Union
+    pushd Union && git checkout 99b3df3 && popd
 fi
 
 ##### COMPILING #####
@@ -84,9 +84,13 @@ if [ $union_enable = 1 ]; then
     popd
 
     pushd Union
+    # Python 2 override. Union expects Python 2 ONLY
+    mkdir -p python-override
+    ln -s /usr/bin/python2 python-override/python
+    # compiling
     ./prepare.sh
     PYTHON=python2 ./configure --disable-shared --with-conceptual="$(realpath ../conceptual-1.5.1b/install)" --with-conceptual-src="$(realpath ../conceptual-1.5.1b)" --prefix="$(realpath ./install)" CC=mpicc CXX=mpicxx
-    make -j4 && make install
+    PATH="$PWD/python-override:$PATH" make -j4 && make install
     err=$?
     [[ $err -ne 0 ]] && exit $err
     popd
diff --git a/README.md b/README.md
index 7740a222..43f64438 100644
--- a/README.md
+++ b/README.md
@@ -9,21 +9,19 @@ The easiest way to build CODES is using our automated compilation script that ha
 1. **Download the compilation script** [click here](https://raw.githubusercontent.com/codes-org/codes/master/CODES-compile-instructions.sh) or:
 
    ```bash
-   # Download the script to your desired directory
    wget https://raw.githubusercontent.com/codes-org/codes/master/CODES-compile-instructions.sh
-   chmod +x CODES-compile-instructions.sh
    ```
 
 2. **Edit and Run the script**:
    ```bash
-   ./CODES-compile-instructions.sh
+   bash ./CODES-compile-instructions.sh
    ```
 
 The script will create a new directory with all dependencies and CODES compiled and ready to use.
 
 ## Prerequisites
 
-- **MPI**: OpenMPI or MPICH for parallel execution
+- **MPI**: MPICH for parallel execution (OpenMPI is not supported by Union, a dependency)
 - **CMake**: Version 3.12 or higher
 - **ROSS**: Rensselaer Optimistic Simulation System (handled by script)
 - **C/C++ compiler**: GCC or Clang with C++11 support