diff --git a/.gitignore b/.gitignore index 4d3a6923..b023d553 100644 --- a/.gitignore +++ b/.gitignore @@ -37,4 +37,10 @@ # generated files from test runs ross.csv -install-mastiff/include/codes/model-net-method.h \ No newline at end of file +install-mastiff/include/codes/model-net-method.h + +# commonly used building stuff +/build*/ +/build* +.cache +compile_commands.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 86b4b5fe..20afe8d1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.10) +cmake_minimum_required(VERSION 3.17) # set the project name and version project(codes LANGUAGES C CXX VERSION 2.0) @@ -22,12 +22,13 @@ SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) set(ROSS_PKG_CONFIG_PATH "" CACHE PATH "Where is ROSS PKG_CONFIG is installed?") set(SWM_PKG_CONFIG_PATH "" CACHE PATH "Where is the SWM PKG_CONFIG installed?") +set(UNION_PKG_CONFIG_PATH "" CACHE PATH "Where is the Union PKG_CONFIG installed?") set(ARGOBOTS_PKG_CONFIG_PATH "" CACHE PATH "Where is argobots PKG_COPNFIG installed? Necessary for SWM") set(DAMARIS_PKG_CONFIG_PATH "" CACHE PATH "Where is the damaris PKG_CONFIG installed?") find_package(PkgConfig REQUIRED) -set(ENV{PKG_CONFIG_PATH} "${ROSS_PKG_CONFIG_PATH}:${SWM_PKG_CONFIG_PATH}:${ARGOBOTS_PKG_CONFIG_PATH}") +set(ENV{PKG_CONFIG_PATH} "${ROSS_PKG_CONFIG_PATH}:${SWM_PKG_CONFIG_PATH}:${UNION_PKG_CONFIG_PATH}:${ARGOBOTS_PKG_CONFIG_PATH}") pkg_check_modules(ROSS REQUIRED IMPORTED_TARGET ross) # MPI @@ -57,28 +58,50 @@ else(DUMPI_LIB) set(USE_DUMPI true) endif() -## SWM +# SWM and UNION (both require ARGOBOTS to function) pkg_check_modules(SWM IMPORTED_TARGET swm) if(NOT SWM_FOUND) - message(STATUS "SWM Library Not Found, Online workloads disabled") + message(STATUS "SWM Library Not Found, Online workloads disabled") + else(SWM_FOUND) - message(STATUS "SWM Library Found: ${SWM_LIBRARIES}") - pkg_check_modules(ARGOBOTS REQUIRED IMPORTED_TARGET argobots) - if(NOT ARGOBOTS_FOUND) - message(STATUS "Argobots Library Not Found, Online workloads disabled") - else(ARGOBOTS_FOUND) - message(STATUS "Argobots Library Found: ${ARGOBOTS_LIBRARIES}") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARGOBOTS_CFLAGS} -I${ARGOBOTS_INCLUDE}") - - pkg_get_variable(SWM_DATAROOTDIR swm datarootdir) - cmake_print_variables(SWM_DATAROOTDIR) - - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SWM_CFLAGS} -I${SWM_INCLUDE}") - add_definitions(-DUSE_ONLINE=1) - set(USE_ONLINE true) + message(STATUS "SWM Library Found: ${SWM_LIBRARIES}") + pkg_check_modules(ARGOBOTS REQUIRED IMPORTED_TARGET argobots) + if(NOT ARGOBOTS_FOUND) + message(STATUS "Argobots Library Not Found, Online workloads disabled") + + else(ARGOBOTS_FOUND) + message(STATUS "Argobots Library Found: ${ARGOBOTS_LIBRARIES}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARGOBOTS_CFLAGS} -I${ARGOBOTS_INCLUDE}") + + pkg_get_variable(SWM_DATAROOTDIR swm datarootdir) + cmake_print_variables(SWM_DATAROOTDIR) + + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SWM_CFLAGS} -I${SWM_INCLUDE}") + add_definitions(-DUSE_ONLINE=1) + set(USE_ONLINE true) + + pkg_check_modules(UNION IMPORTED_TARGET union) + if(NOT UNION_FOUND) + message(STATUS "UNION Library Not Found, SWM-only online workloads enabled") + add_definitions(-DUSE_SWM=1) + set(USE_SWM true) + else(UNION_FOUND) + message(STATUS "UNION Library Found: ${UNION_LIBRARIES}") + pkg_get_variable(UNION_DATAROOTDIR union datarootdir) + cmake_print_variables(UNION_DATAROOTDIR) + + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${UNION_INCLUDE}") + foreach(INCLUDE_OPT ${UNION_CFLAGS}) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${INCLUDE_OPT}") + endforeach() + + add_definitions(-DUSE_UNION=1) + set(USE_UNION true) endif() + endif() endif() + ## RECORDER option(USE_RECORDER "use recorder io workload" ON) if(USE_RECORDER) @@ -96,11 +119,34 @@ endif() # set(USE_DAMARIS true) # endif() +## TORCH loading ML models +if((NOT DEFINED USE_TORCH) OR USE_TORCH) + find_package(Torch) + if(Torch_FOUND) + set(CMAKE_CXX_STANDARD 17) + add_definitions(-DUSE_TORCH) + set(USE_TORCH true) + message(STATUS "Loading TORCH models enabled.") + else() + set(USE_TORCH false) + message(STATUS "Torch library not found. Loading TORCH models disabled.") + endif() +else() + message(STATUS "Loading TORCH models NOT enabled.") +endif() + cmake_print_variables(CMAKE_C_FLAGS) add_subdirectory(src) -configure_file(codes_config.h.in codes_config.h) - +configure_file(codes_config.h.cmake.in codes_config.h) +add_subdirectory(doc/example) +string(COMPARE NOTEQUAL "RELEASE" "${CMAKE_BUILD_TYPE}" not_release) +if(BUILD_TESTING AND not_release) + include(CTest) + set(CODES_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") + set(CODES_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") + add_subdirectory(tests) +endif() diff --git a/CODES-compile-instructions.sh b/CODES-compile-instructions.sh new file mode 100644 index 00000000..14178c99 --- /dev/null +++ b/CODES-compile-instructions.sh @@ -0,0 +1,136 @@ +#!/usr/bin bash -x + +# Switches +swm_enable=1 +union_enable=1 +torch_enable=0 + +# Uncomment below for MPICH +#export PATH=/usr/local/mpich-4.1.2/bin/:"$PATH" +# Note: remember to compile MPICH with nemesis not with UCX support + +################## Actual scripts starts from here ################## + +# SWM has to be enabled for UNION to work +if [ $union_enable = 1 ]; then + swm_enable=1 +fi + +# What to compile +CUR_DIR="$PWD" + +##### Downloading everything ##### + +git clone https://github.com/codes-org/codes --depth=100 --branch=v1.5.0 +git clone https://github.com/ross-org/ross --depth=100 --branch=v8.1.0 + +if [ $swm_enable = 1 ]; then + git clone https://github.com/pmodels/argobots --depth=1 + git clone https://github.com/codes-org/swm-workloads --branch=v1.2 +fi + +if [ $union_enable = 1 ]; then + # Downloading conceptual + curl -L https://sourceforge.net/projects/conceptual/files/conceptual/1.5.1b/conceptual-1.5.1b.tar.gz -o conceptual-1.5.1b.tar.gz + tar xvf conceptual-1.5.1b.tar.gz + # Downloading union + git clone https://github.com/SPEAR-UIC/Union + pushd Union && git checkout 99b3df3 && popd +fi + +##### COMPILING ##### + +mkdir ross/build +pushd ross/build +cmake .. -DROSS_BUILD_MODELS=ON -DCMAKE_INSTALL_PREFIX="$(realpath ./bin)" \ + -DCMAKE_C_COMPILER=mpicc -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-g -Wall" +#make VERBOSE=1 +make install -j4 +err=$? +[[ $err -ne 0 ]] && exit $err +popd + +if [ $swm_enable = 1 ]; then + pushd swm-workloads/swm + ./prepare.sh + mkdir build + pushd build + ../configure --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g + #make V=1 && make install + make -j4 && make install + err=$? + [[ $err -ne 0 ]] && exit $err + popd && popd + + pushd argobots + ./autogen.sh + mkdir build + pushd build + #../configure --enable-debug=all --disable-fast --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g + ../configure --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g + #make V=1 && make install + make -j4 && make install + err=$? + [[ $err -ne 0 ]] && exit $err + popd && popd +fi + +if [ $union_enable = 1 ]; then + pushd conceptual-1.5.1b + PYTHON=python2 ./configure --prefix="$(realpath ./install)" LIBS=-lm + make -j4 && make install + err=$? + [[ $err -ne 0 ]] && exit $err + popd + + pushd Union + # Python 2 override. Union expects Python 2 ONLY + mkdir -p python-override + ln -s /usr/bin/python2 python-override/python + # compiling + ./prepare.sh + PYTHON=python2 ./configure --disable-shared --with-conceptual="$(realpath ../conceptual-1.5.1b/install)" --with-conceptual-src="$(realpath ../conceptual-1.5.1b)" --prefix="$(realpath ./install)" CC=mpicc CXX=mpicxx + PATH="$PWD/python-override:$PATH" make -j4 && make install + err=$? + [[ $err -ne 0 ]] && exit $err + popd +fi + + +mkdir codes/build +pushd codes/build + +make_args_codes=( + -DCMAKE_PREFIX_PATH="$(realpath "$CUR_DIR/ross/build/bin")" + -DCMAKE_CXX_COMPILER=mpicxx -DCMAKE_C_COMPILER=mpicc + -DCMAKE_C_FLAGS="-g -Wall" + -DCMAKE_CXX_FLAGS="-g -Wall" + -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON + -DCMAKE_INSTALL_PREFIX="$(realpath bin)" +) +if [ $swm_enable = 1 ]; then + make_args_codes=( + "${make_args_codes[@]}" + -DSWM_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/swm-workloads/swm/build/maint")" + -DARGOBOTS_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/argobots/build/maint")" + ) +fi +if [ $union_enable = 1 ]; then + make_args_codes=( + "${make_args_codes[@]}" + -DUNION_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/Union/install/lib/pkgconfig")" + ) +fi +if [ $torch_enable = 1 ]; then + make_args_codes=("${make_args_codes[@]}" -DUSE_TORCH=true) +else + make_args_codes=("${make_args_codes[@]}" -DUSE_TORCH=false) +fi + +cmake .. "${make_args_codes[@]}" +#make VERBOSE=1 +make -j4 +err=$? +[[ $err -ne 0 ]] && exit $err + +popd diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 39e0bebd..7d8dd5b4 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -20,6 +20,8 @@ Contributors to date (with affiliations at time of contribution) - Lee Savoie, Univ. of Arizona - Ning Liu, Rensselaer Polytechnic Institute - Jason Cope, Argonne National Laboratory +- Kevin A. Brown, Argonne National Laboratory +- Elkin Cruz, Rensselaer Polytechnic Institute Contributions: @@ -40,6 +42,8 @@ Neil McGlohon (RPI) - Merged 1-D dragonfly and 2-D dragonfly network models. - Updated adaptive routing in megafly and 1-D dragonfly network models. - Extended slim fly network model's dual-rail mode to arbitrary number of rails (pending). + - Implemented Quality of Service (QoS) in 1-D dragonfly network. + - Implemented changes needed to allow ROSS's tiebreaker mechanism. Nikhil Jain, Abhinav Bhatele (LLNL) - Improvements in credit-based flow control of CODES dragonfly and torus network models. @@ -78,3 +82,17 @@ Caitlin Ross (RPI): - Added instrumentation so that network models can report sampled statistics over virtual time (pending). - Bug reporter for CODES models. + +Elkin Cruz (RPI) + - Added network surrogate for 1-D Dragonfly model (dragonfly-dally). + - Added application surrogate for MPI replay (model-net-mpi-replay). + - Implemented API to allow network and application surrogates to switch as + simulation runs (aka, hybrid simulation). + - Added network and application level directors, which coordinate data + transference between model and predictor. + - Added simple averaged-based network and application predictors (they are + given simulation data and are in charge of predicting future states of the + simulation, skipping computation). + - Implemented necessary scaffolding to check for bugs in reversible + computation (to be used with SEQUENTIAL_ROLLBACK_CHECK option in ROSS). + - Fixed reversible computation bugs on 1-D Dragonfly network. diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index a6de0500..00000000 --- a/LICENSE.md +++ /dev/null @@ -1,22 +0,0 @@ -************** Copyright © 2019, UChicago Argonne, LLC *************** - -All Rights Reserved - -Software Name: CO-Design of Exascale Storage and Network Architectures (CODES) - -By: Argonne National Laboratory, Rensselaer Polytechnic Institute, Lawrence Livermore National Laboratory, and Illinois Institute of Technology - -OPEN SOURCE LICENSE - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. - - -****************************************************************************************************** -DISCLAIMER - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************************************************** diff --git a/Make.rules b/Make.rules deleted file mode 100644 index aedcde1e..00000000 --- a/Make.rules +++ /dev/null @@ -1,40 +0,0 @@ -# flex & bison deps -# -%.c %.h: %.l - $(AM_V_GEN)$(LEX) --header-file=$(@:.c=.h) -o $(@:.h=.c) $< \ - || ( $(RM) $(basename $@).h $(basename $@).c ; exit 1) - -# -# specific rule for codesparser generation; we want the header to land in -# the codes/ directory because it will be installed for use by other repos -#src/iokernellang/codesparser.c codes/codesparser.h: src/iokernellang/codesparser.y -# mkdir -p codes -# @test "x$(bison_ok)" != "yes" || echo "*** WARNING *** Bison version might be too old" -# $(AM_V_GEN)$(YACC) --defines=codes/codesparser.h -o src/iokernellang/codesparser.c $< \ -# || ( $(RM) $(basename $@).h $(basename $@).c ; exit 1) - - -%.c %.h: %.y - @test "x$(bison_ok)" != "yes" || echo "*** WARNING *** Bison version might be too old" - $(AM_V_GEN)$(YACC) --defines=$(@:.c=.h) -o $(@:.h=.c) $< \ - || ( $(RM) $(basename $@).h $(basename $@).c ; exit 1) - - - -# %.y: %.y.in Makefile -# $(AM_V_GEN)$(SED) -e 's,[@]CODES_PURE_PARSER_DEFINES[@],$(CODES_PURE_PARSER_DEFINES),g' \ -# -e 's,[@]CODES_PUSH_PARSER_DEFINES[@],$(CODES_PUSH_PARSER_DEFINES),g' \ -# < src/common/iokernellang/codesparser.y.in > src/common/iokernellang/codesparser.y - -# -# Output dist version -# -.phony: distversion -distversion: - @echo $(VERSION) - -# -# Easy way to build unit tests without running them -# -.phony: tests -tests: $(check_PROGRAMS) diff --git a/Makefile.am b/Makefile.am deleted file mode 100644 index 28157735..00000000 --- a/Makefile.am +++ /dev/null @@ -1,94 +0,0 @@ -AUTOMAKE_OPTIONS = foreign -ACLOCAL_AMFLAGS = -I m4 - -bin_PROGRAMS = -bin_SCRIPTS = -noinst_LIBRARIES = -noinst_PROGRAMS = -lib_LTLIBRARIES = -noinst_HEADERS = -TESTS = -check_PROGRAMS = -EXTRA_PROGRAMS = -CLEANFILES = $(bin_SCRIPTS) -EXTRA_DIST = -BUILT_SOURCES = -AM_LDFLAGS = - - -# pkgconfig files -pkgconfigdir = $(libdir)/pkgconfig -pkgconfig_DATA = maint/codes.pc -$(pkgconfig_DATA): config.status - -EXTRA_DIST += \ - prepare.sh LICENSE.md configure.ac uc-codes.cfg reformat.sh \ - misc/README misc/ptrn_loggp-2.4.6.patch CONTRIBUTORS.md \ - README.md - -AM_CPPFLAGS = -I$(top_srcdir)/src ${ROSS_CFLAGS} - -AM_CFLAGS = - -AM_CXXFLAGS = $(AM_CFLAGS) - -LDADD = $(lib_LTLIBRARIES) $(ROSS_LIBS) - -include Make.rules - -include $(top_srcdir)/scripts/Makefile.subdir -include $(top_srcdir)/src/Makefile.subdir -include $(top_srcdir)/tests/Makefile.subdir -include $(top_srcdir)/doc/Makefile.subdir - -if USE_DEBUG -AM_CPPFLAGS += -g -AM_CFLAGS += -g -AM_CXXFLAGS += -g -endif - -if USE_DARSHAN -AM_CPPFLAGS += ${DARSHAN_CFLAGS} -DUSE_DARSHAN=1 -src_libcodes_la_SOURCES += src/workload/methods/codes-darshan3-io-wrkld.c -LDADD += ${DARSHAN_LIBS} -TESTS += tests/workload/darshan-dump.sh -endif - -if USE_RECORDER -AM_CPPFLAGS += ${RECORDER_CPPFLAGS} -src_libcodes_la_SOURCES += src/workload/methods/codes-recorder-io-wrkld.c -endif - -if USE_ONLINE -AM_CPPFLAGS += ${ARGOBOTS_CFLAGS} ${SWM_CFLAGS} -DUSE_ONLINE=1 -LDADD += ${SWM_LIBS} ${ARGOBOTS_LIBS} -src_libcodes_la_SOURCES += src/workload/methods/codes-online-comm-wrkld.C -endif - -if USE_DUMPI -AM_CPPFLAGS += ${DUMPI_CFLAGS} -DUSE_DUMPI=1 -src_libcodes_la_SOURCES += src/workload/methods/codes-dumpi-trace-nw-wrkld.c -TESTS += tests/modelnet-test-dragonfly-traces.sh \ - tests/modelnet-test-dragonfly-custom-traces.sh \ - tests/modelnet-test-slimfly-traces.sh \ - tests/modelnet-test-torus-traces.sh -check_PROGRAMS += src/network-workloads/model-net-mpi-replay -if USE_CORTEX -if USE_PYTHON -if USE_CORTEX_PYTHON -AM_CPPFLAGS += ${CORTEX_PYTHON_CFLAGS} -DENABLE_CORTEX_PYTHON=1 -LDADD += ${CORTEX_PYTHON_LIBS} -AM_CPPFLAGS += ${PYTHON_CFLAGS} -LDADD += ${PYTHON_LIBS} -endif -endif -AM_CPPFLAGS += ${CORTEX_CFLAGS} -DENABLE_CORTEX=1 -LDADD += ${CORTEX_LIBS} -endif -LDADD += ${DUMPI_LIBS} -endif - -if USE_RDAMARIS -AM_CPPFLAGS += ${ROSS_Damaris_CFLAGS} -DUSE_RDAMARIS=1 -LDADD += ${ROSS_Damaris_LIBS} -endif diff --git a/README.md b/README.md index 3388fad8..43f64438 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,152 @@ # CODES Discrete-event Simulation Framework -### [Join our CODES user mailing list](https://mailchi.mp/75d0c8aa42c3/codes-user-group) to stay up to date with major changes, events, and news! +A high-performance discrete-event simulation framework for modeling HPC system architectures, network fabrics, and storage systems. Built on top of ROSS (Rensselaer Optimistic Simulation System) for massively parallel simulation capabilities. -### New? Check out the [Wiki for Installation, Tutorials, and Documentation](https://github.com/codes-org/codes/wiki) +## Quick Start -Discrete event driven simulation of HPC system architectures and subsystems has emerged as a productive and cost-effective means to evaluating potential HPC designs, along with capabilities for executing simulations of extreme scale systems. The goal of the CODES project is to use highly parallel simulation to explore the design of exascale storage/network architectures and distributed data-intensive science facilities. +The easiest way to build CODES is using our automated compilation script that handles all dependencies and configurations. + +1. **Download the compilation script** [click here](https://raw.githubusercontent.com/codes-org/codes/master/CODES-compile-instructions.sh) or: + + ```bash + wget https://raw.githubusercontent.com/codes-org/codes/master/CODES-compile-instructions.sh + ``` + +2. **Edit and Run the script**: + ```bash + bash ./CODES-compile-instructions.sh + ``` + +The script will create a new directory with all dependencies and CODES compiled and ready to use. + +## Prerequisites + +- **MPI**: MPICH for parallel execution (OpenMPI is not supported by Union, a dependency) +- **CMake**: Version 3.12 or higher +- **ROSS**: Rensselaer Optimistic Simulation System (handled by script) +- **C/C++ compiler**: GCC or Clang with C++11 support + +Optional dependencies (automatically handled by script if enabled): +- **UNION**: For advanced workload generation +- **SWM**: For structured workload modeling +- **Argobots**: Threading library for enhanced performance +- **PyTorch**: For ML model integration (if enabled) + +## Manual Installation + +For advanced users who prefer manual installation: + +```bash +# 1. Build and install ROSS first +git clone https://github.com/ross-org/ROSS.git +cd ROSS && mkdir build && cd build +cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/ross +make -j && make install +cd ../.. + +# 2. Clone and build CODES +git clone https://github.com/codes-org/codes.git +cd codes && mkdir build && cd build + +# 3. Configure with CMake +cmake .. \ + -DCMAKE_PREFIX_PATH=$HOME/ross \ + -DCMAKE_C_COMPILER=mpicc \ + -DCMAKE_CXX_COMPILER=mpicxx \ + -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_TESTING=ON + +# 4. Build and test +make -j +ctest +``` + +## Testing + +Check your installation with: + +```bash +# Run all tests +cd codes/build && ctest + +# Run specific tests +ctest -R modelnet-test-dragonfly +ctest -R union-workload-test-surrogate + +# Keep test output for inspection +DONT_DELETE_TEST_DIR=1 ctest -R your-test-name +``` + +All tests pass to date of writing, including those that require UNION support. Tests verify: + +- Network model correctness and determinism +- Workload generation and replay accuracy +- Multi-fidelity simulation switching +- Parallel execution and reverse computation +- Configuration file parsing and LP setup + +## Basic Usage + +Running a CODES experiment is tricky due to the large amount of compontents that have to be correctly configured. Please use the [experiments repo](https://github.com/CODES-org/experiments) for examples of simulation you can run. + +If you have used the compilation script from above (quick start) run the following (in the folder that contains `CODES-compile-instructions.sh`): + +```bash +git clone https://github.com/CODES-org/experiments +``` + +To run an experiment do: + +```bash +cd experiments +bash run-experiment.sh path-to-experiment/script.sh +``` + +A folder will be created under `path-to-experiment/results` containing the result of running the experiment. + +## Features + +CODES provides comprehensive simulation capabilities for: + +### Network Topologies +- **Dragonfly**: High-radix interconnect with adaptive routing (most up to date) +- **Torus**: Multi-dimensional torus networks +- **Fat-tree**: Hierarchical tree topologies +- **Express Mesh**: Enhanced mesh networks +- **Simple P2P**: Point-to-point networks + +### Workload Generation +- **SWM and UNION**: Workload generation +- **MPI trace replay**: Support for DUMPI traces +- **Synthetic patterns**: Uniform random, nearest neighbor, and custom patterns + +### Multi-fidelity Simulation +- **Network surrogate models**: Switch between high-fidelity and surrogate modes +- **Application surrogate models**: Accelerate application-level simulation +- **Adaptive directors**: Intelligent switching between simulation modes + +## Contributing + +Before contributing please run the full test suite. Some tests verify our determinism guarantees (every simulation should be reproducible), i.e, the number of net events processed between two runs in parallel mode should be the same. We want to keep our determinism guarantees forever. Non-deterministic simulations are often the result of faulty reverse handlers, which have caused serious bug failures and hundreds of hours of debugging. + +If you find yourself with a model that is not deterministic (two runs with the same initial configuration produce different numbers of net events), then you can check for errors in the reverse handlers via the ROSS feature: reverse handlers check. For this, run your model with `--synch=6`. Make sure that all LPs in the simulation (ie, routers, terminals and others) have implemented proper reversibility checks (defined in a struct of type `crv_checkpointer`). + +## License + +See LICENSE file for licensing information. + +## Credits + +Developed by Argonne National Laboratory and Rensselaer Polytechnic Institute, with collaborations from UC Davis and Lawrence Livermore National Laboratory. + +## About CODES + +Discrete event driven simulation of HPC system architectures and subsystems has emerged as a productive and cost-effective means to evaluating potential HPC designs, along with capabilities for executing simulations of extreme scale systems. The goal of the CODES project is to use highly parallel simulation to explore the design of exascale storage/network architectures and distributed data-intensive science facilities. Our simulations build upon the Rensselaer Optimistic Simulation System (ROSS), a discrete event simulation framework that allows simulations to be run in parallel, decreasing the simulation run time of massive simulations to hours. We are using ROSS to explore topics including large-scale storage systems, I/O workloads, HPC network fabrics, distributed science systems, and data-intensive computation environments. The CODES project is a collaboration between the Mathematics and Computer Science department at Argonne National Laboratory and Rensselaer Polytechnic Institute. We collaborate with researchers at University of California at Davis to come up with novel methods for analysis and visualizations of large-scale event driven simulations. We also collaborate with Lawrence Livermore National Laboratory for modeling HPC interconnect systems. + +## About this README + +Claude helped us in templating this doc. Any typos are our own and after the fact. diff --git a/codes/codes-workload.h b/codes/codes-workload.h index e97ec88c..e83ef3d1 100644 --- a/codes/codes-workload.h +++ b/codes/codes-workload.h @@ -302,7 +302,7 @@ void codes_workload_free_config_return(codes_workload_config_return *c); */ int codes_workload_load( const char* type, - const char* params, + const void* params, int app_id, int rank); @@ -354,6 +354,9 @@ int codes_workload_get_time(const char *type, int app_id, int rank, double *read_time, double *write_time, int64_t *read_bytes, int64_t *written_bytes); +// Returns the final iteration (positive) after which the workload will stop. If the result is -1, then there is nothing to do +int codes_workload_get_final_iteration(int wkld_id, int app_id, int rank); + /* implementation structure */ struct codes_workload_method { @@ -361,13 +364,14 @@ struct codes_workload_method void * (*codes_workload_read_config) ( ConfigHandle *handle, char const * section_name, char const * annotation, int num_ranks); - int (*codes_workload_load)(const char* params, int app_id, int rank); + int (*codes_workload_load)(const void* params, int app_id, int rank); void (*codes_workload_get_next)(int app_id, int rank, struct codes_workload_op *op); void (*codes_workload_get_next_rc2)(int app_id, int rank); int (*codes_workload_get_rank_cnt)(const char* params, int app_id); int (*codes_workload_finalize)(const char* params, int app_id, int rank); /* added for get all read or write time */ int (*codes_workload_get_time)(const char * params, int app_id, int rank, double *read_time, double *write_time, int64_t *read_bytes, int64_t *written_bytes); + int (*codes_workload_get_final_iteration)(int app_id, int rank); }; @@ -381,6 +385,10 @@ void codes_workload_add_method(struct codes_workload_method const * method); * will shut down automatically once they have issued their last event. */ +/* Printing event :) */ +void fprint_codes_workload_op(FILE * out, char const * prefix, struct codes_workload_op * op); +char const * const op_type_string(enum codes_workload_op_type op_type); + #ifdef __cplusplus } #endif diff --git a/codes/codes_mapping.h b/codes/codes_mapping.h index 0e46447b..6d83098c 100644 --- a/codes/codes_mapping.h +++ b/codes/codes_mapping.h @@ -23,6 +23,9 @@ extern "C" { /* Returns number of LPs on the current PE */ int codes_mapping_get_lps_for_pe(void); +/* Returns the number of LPs of the given type name */ +tw_lpid codes_mapping_count_lps_of_type(char const lp_type_name[MAX_NAME_LENGTH]); + /* Takes the global LP ID and returns the rank (PE id) on which the LP is mapped.*/ tw_peid codes_mapping( tw_lpid gid); diff --git a/codes/congestion-controller-model.h b/codes/congestion-controller-model.h index e8b673b5..ff5f6f8f 100644 --- a/codes/congestion-controller-model.h +++ b/codes/congestion-controller-model.h @@ -156,6 +156,11 @@ typedef struct tlc_state double current_injection_bandwidth_coef; } tlc_state; +void save_tlc_state(tlc_state * into, tlc_state const * from); +void clean_tlc_state(tlc_state * into); +bool check_tlc_state(tlc_state * before, tlc_state * after); +void print_tlc_state(FILE * out, char const * prefix, tlc_state * state); + congestion_control_message* cc_msg_rc_storage_create(); void cc_msg_rc_storage_delete(void * ptr); diff --git a/codes/lp-type-lookup.h b/codes/lp-type-lookup.h index 1fc11483..b8799e2e 100644 --- a/codes/lp-type-lookup.h +++ b/codes/lp-type-lookup.h @@ -14,6 +14,8 @@ extern "C" { #include "ross.h" +#define MAX_LP_TYPES 64 + /* look up the lp type registered through lp_type_register. Mostly used * internally */ const tw_lptype* lp_type_lookup(const char* name); diff --git a/codes/model-net-lp.h b/codes/model-net-lp.h index f713caaa..0a20a2f9 100644 --- a/codes/model-net-lp.h +++ b/codes/model-net-lp.h @@ -60,6 +60,16 @@ tw_event * model_net_method_event_new( void **msg_data, void **extra_data); +// Same as `model_net_method_event_new` extended to use user priorities to enforce ordering of some simulatenous events (USE WITH CARE!!) +tw_event * model_net_method_event_new_user_prio( + tw_lpid dest_gid, + tw_stime offset_ts, + tw_lp *sender, + int net_id, + void **msg_data, + void **extra_data, + tw_stime prio); + // Construct a model-net-specific event, similar to model_net_method_event_new. // The primary differences are: // - the event gets sent to final_dest_lp and put on it's receiver queue @@ -118,23 +128,33 @@ tw_event* model_net_method_congestion_event(tw_lpid dest_gid, void **msg_data, void **extra_data); +// Functions to call when switching from highdef to surrogate, and surrogate to highdef +void model_net_method_switch_to_surrogate_lp(tw_lp * lp); +void model_net_method_switch_to_highdef_lp(tw_lp * lp); +void model_net_method_switch_to_surrogate(void); +void model_net_method_switch_to_highdef(void); + +// It will call the function (pointer) on the internal structure/network model. +// The lp parameter has to be a model-net lp. The function pointer has to coincide with the underlying subtype +void model_net_method_call_inner(tw_lp * lp, void (*) (void * inner, tw_lp * lp, void * data), void * data); + /// The following functions/data structures should not need to be used by /// model developers - they are just provided so other internal components can /// use them enum model_net_base_event_type { - MN_BASE_NEW_MSG, + MN_BASE_NEW_MSG = 1, // schedule next packet - MN_BASE_SCHED_NEXT, + MN_BASE_SCHED_NEXT = 2, // gather a sample from the underlying model - MN_BASE_SAMPLE, + MN_BASE_SAMPLE = 4, // message goes directly down to topology-specific event handler - MN_BASE_PASS, + MN_BASE_PASS = 8, /* message goes directly to topology-specific event handler for ending the simulation usefull if there is an infinite heartbeat pattern */ - MN_BASE_END_NOTIF, + MN_BASE_END_NOTIF = 16, // message calls congestion request method on topology specific handler - MN_CONGESTION_EVENT + MN_CONGESTION_EVENT = 32 }; typedef struct model_net_base_msg { @@ -147,6 +167,7 @@ typedef struct model_net_base_msg { // TODO: make this a union for multiple types of parameters mn_sched_params sched_params; model_net_sched_rc rc; // rc for scheduling events + int created_in_surrogate; } model_net_base_msg; typedef struct model_net_wrap_msg { @@ -169,6 +190,12 @@ typedef struct model_net_wrap_msg { } msg; } model_net_wrap_msg; +// Returns the (hidden) event type of the current event +int model_net_get_event_type_lp(model_net_wrap_msg *); + +// Extracting message contained within event MN_BASE_PASS +void * model_net_method_msg_from_tw_event(tw_lp *, model_net_wrap_msg *); + #ifdef __cplusplus } #endif diff --git a/codes/model-net-method.h b/codes/model-net-method.h index fdf09557..eab2dc7c 100644 --- a/codes/model-net-method.h +++ b/codes/model-net-method.h @@ -39,7 +39,8 @@ struct model_net_method void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt); + int is_last_pckt, + bool is_there_another_pckt_in_queue); void (*model_net_method_packet_event_rc)(tw_lp *sender); tw_stime (*model_net_method_recv_msg_event)( const char * category, @@ -70,6 +71,7 @@ struct model_net_method event_f cc_congestion_event_fn; revent_f cc_congestion_event_rc_fn; commit_f cc_congestion_event_commit_fn; + crv_checkpointer * checkpointer; }; extern struct model_net_method * method_array[]; diff --git a/codes/model-net-sched.h b/codes/model-net-sched.h index 9f685b85..576c57eb 100644 --- a/codes/model-net-sched.h +++ b/codes/model-net-sched.h @@ -23,16 +23,16 @@ typedef struct mn_sched_params_s mn_sched_params; #include "model-net-method.h" /// types of schedulers -/// format: enum type, config string, function pointer names +/// format: enum type, config string, function pointer names, crv_checkpointer instance /// fcfs-full eschews packetization #define SCHEDULER_TYPES \ - X(MN_SCHED_FCFS, "fcfs", &fcfs_tab) \ - X(MN_SCHED_FCFS_FULL, "fcfs-full", &fcfs_tab) \ - X(MN_SCHED_RR, "round-robin", &rr_tab) \ - X(MN_SCHED_PRIO, "priority", &prio_tab) \ - X(MAX_SCHEDS, NULL, NULL) + X(MN_SCHED_FCFS, "fcfs", &fcfs_tab, &fcfs_chptr) \ + X(MN_SCHED_FCFS_FULL, "fcfs-full", &fcfs_tab, &fcfs_chptr) \ + X(MN_SCHED_RR, "round-robin", &rr_tab, NULL) \ + X(MN_SCHED_PRIO, "priority", &prio_tab, NULL) \ + X(MAX_SCHEDS, NULL, NULL, NULL) -#define X(a,b,c) a, +#define X(a,b,c,d) a, enum sched_type { SCHEDULER_TYPES }; @@ -197,7 +197,15 @@ void model_net_sched_add_rc( // set default parameters for messages that don't specify any void model_net_sched_set_default_params(mn_sched_params *sched_params); +// Reverse handler functionality +void save_model_net_sched(model_net_sched *before, model_net_sched const *after); +void clean_model_net_sched(model_net_sched *before); +bool check_model_net_sched(model_net_sched *before, model_net_sched *after); +void print_model_net_sched(FILE * out, char const * prefix, model_net_sched *sched); +void print_model_net_sched_checkpoint(FILE * out, char const * prefix, model_net_sched *sched); + extern char * sched_names[]; +extern const crv_checkpointer * sched_checkpointers[]; #ifdef __cplusplus } diff --git a/codes/model-net.h b/codes/model-net.h index 577ae5ae..abb630ce 100644 --- a/codes/model-net.h +++ b/codes/model-net.h @@ -130,6 +130,8 @@ typedef struct model_net_request { tw_lpid src_lp; // time the source event was called tw_stime msg_start_time; + // time the event was created + tw_stime msg_new_mn_event; uint64_t msg_size; uint64_t pull_size; uint64_t packet_size; @@ -143,7 +145,7 @@ typedef struct model_net_request { int self_event_size; char category[CATEGORY_NAME_MAX]; - //for counting msg app id + //Xin: passing app id to routers int app_id; } model_net_request; @@ -161,6 +163,12 @@ struct mn_stats long max_event_size; }; +bool check_model_net_request(model_net_request const * before, model_net_request const * after); +void print_model_net_request(FILE * out, char const * prefix, model_net_request * item); + +bool check_mn_stats(struct mn_stats const * before, struct mn_stats const * after); +void print_mn_stats(FILE * out, char const * prefix, struct mn_stats * item); + /* Registers all model-net LPs in ROSS. Should be called after * configuration_load, but before codes_mapping_setup */ void model_net_register(); diff --git a/codes/net/dragonfly-custom.h b/codes/net/dragonfly-custom.h index 96875f1d..af38d98f 100644 --- a/codes/net/dragonfly-custom.h +++ b/codes/net/dragonfly-custom.h @@ -87,6 +87,14 @@ struct terminal_custom_message tw_stime msg_start_time; tw_stime saved_busy_time_ross; tw_stime saved_fin_chunks_ross; + + //Yao: for counting msg app id + int app_id; + tw_stime last_received_time; + tw_stime last_sent_time; + //Xin: for busy time recording + tw_stime last_bufupdate_time; + }; #ifdef __cplusplus diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h index 9f89f60b..929f6952 100644 --- a/codes/net/dragonfly-dally.h +++ b/codes/net/dragonfly-dally.h @@ -20,18 +20,18 @@ struct terminal_dally_message { /* magic number */ int magic; - /* flit travel start time*/ + /* message travel start time*/ tw_stime travel_start_time; /* flit travel end time*/ tw_stime travel_end_time; /* packet ID of the flit */ unsigned long long packet_ID; - /* event type of the flit */ - short type; + /* event type of the flit. Actual type is `enum dfdally_event_t` */ + short type; + /* if the type==T_NOTIFY then we have to find out what type of notification is it. Actual type is `enum dfdally_notify_t` */ + short notify_type; /* category: comes from codes */ char category[CATEGORY_NAME_MAX]; - /* store category hash in the event */ - uint32_t category_hash; /* final destination LP ID, this comes from codes can be a server or any other LP type*/ tw_lpid final_dest_gid; /*sending LP ID from CODES, can be a server or any other LP type */ @@ -92,7 +92,10 @@ struct terminal_dally_message int path_type; int saved_app_id; - /* for reverse computation */ + // For packet latency predictor (surrogate) + bool is_there_another_pckt_in_queue; + + /* for reverse computation */ short num_rngs; short num_cll; @@ -107,19 +110,37 @@ struct terminal_dally_message unsigned long long * rc_qos_data; int * rc_qos_status; + // TODO (elkin): all these fields to store information for rollback purposes got out of control, the rc_stack was created for things like this! Refactor this out! short saved_send_loop; tw_stime saved_available_time; tw_stime saved_min_lat; tw_stime saved_avg_time; tw_stime saved_rcv_time; - tw_stime saved_busy_time; + tw_stime saved_busy_time; tw_stime saved_total_time; tw_stime saved_sample_time; tw_stime msg_start_time; tw_stime saved_busy_time_ross; tw_stime saved_fin_chunks_ross; + + // To use in rollback calls + tw_stime saved_last_in_queue_time; + tw_stime saved_next_packet_delay; + tw_stime msg_new_mn_event; + + //Yao: for counting msg app id + tw_stime last_received_time; + tw_stime last_sent_time; + + //Xin: for busy time recording + tw_stime last_bufupdate_time; + + tw_stime saved_processing_time; }; +void print_terminal_dally_message(FILE * out, char const * prefix, void * s, struct terminal_dally_message * msg); +bool check_terminal_dally_message(struct terminal_dally_message * before, struct terminal_dally_message * after); + #ifdef __cplusplus } #endif diff --git a/codes/quicklist.h b/codes/quicklist.h index e2647648..0a73b761 100644 --- a/codes/quicklist.h +++ b/codes/quicklist.h @@ -30,6 +30,8 @@ extern "C" { #endif #include +#include +#include struct qlist_head { struct qlist_head *next, *prev; @@ -193,6 +195,14 @@ static __inline__ void qlist_splice(struct qlist_head *qlist, struct qlist_head #define qlist_entry(ptr, type, member) \ ((type *)((char *)(ptr)-(unsigned long)((&((type *)0)->member)))) +/** + * QLIST_OFFSET - get offset to the member that holds qlist_header + * @type: the type of the struct this is embedded in. + * @member: the name of the qlist_struct within the struct. + */ +#define QLIST_OFFSET(type, member) \ + (unsigned long)((&((type *)0)->member)) + /** * qlist_for_each - iterate over a qlist * @pos: the &struct qlist_head to use as a loop counter. @@ -252,7 +262,7 @@ static inline int qlist_exists(struct qlist_head *list, struct qlist_head *qlink return 0; } -static inline int qlist_count(struct qlist_head *list) +static inline int qlist_count(struct qlist_head const *list) { struct qlist_head *pos; int count = 0; @@ -268,6 +278,25 @@ static inline int qlist_count(struct qlist_head *list) return count; } +static inline void qlist_add_at_index(struct qlist_head *newi, struct qlist_head *list, int index) +{ + if (index < 0) + { + while(index++) + { + list = list->prev; + } + } + else + { + while(index--) + { + list = list->next; + } + } + __qlist_add(newi, list, list->next); +} + static inline struct qlist_head * qlist_find( struct qlist_head *list, int (*compare)(struct qlist_head *, void *), @@ -284,6 +313,37 @@ static inline struct qlist_head * qlist_find( return NULL; } +/** + * are_qlist_equal - determine if two qlists have the same elements + */ +static inline bool are_qlist_equal(struct qlist_head const * left, struct qlist_head const * right, unsigned int offset_ql, bool (cmp) (void *, void *)) { + int const num_elems = qlist_count(left); + if (num_elems != qlist_count(right)) { + return false; + } + + // Checking element by element + int i = 0; + struct qlist_head * elem_left = left->next; + struct qlist_head * elem_right = right->next; + while (elem_left != left) { + char * entry_left = (char *)(elem_left) - offset_ql; + char * entry_right = (char *)(elem_right) - offset_ql; + + if (!cmp(entry_left, entry_right)) { + return false; + } + + elem_left = elem_left->next; + elem_right = elem_right->next; + i++; + } + assert(i == num_elems); + assert(elem_right == right); + + return true; +} + /* * Local variables: * c-indent-level: 4 diff --git a/codes/surrogate/app-iteration-predictor/average.h b/codes/surrogate/app-iteration-predictor/average.h new file mode 100644 index 00000000..0d3ace0d --- /dev/null +++ b/codes/surrogate/app-iteration-predictor/average.h @@ -0,0 +1,21 @@ +#ifndef CODES_SURROGATE_ITERATION_PREDICTOR_AVERAGE_H +#define CODES_SURROGATE_ITERATION_PREDICTOR_AVERAGE_H + +/** + * This predictor collects the time that it takes to complete an iteration, and + * uses this information as the prediction. The trigger becomes + */ + +#include "surrogate/app-iteration-predictor/common.h" + +struct avg_app_config { + int num_apps; + int num_nodes_in_pe; + int num_iters_to_collect; +}; + +struct app_iteration_predictor avg_app_iteration_predictor(struct avg_app_config *); + +void free_avg_app_iteration_predictor(void); + +#endif /* end of include guard */ diff --git a/codes/surrogate/app-iteration-predictor/common.h b/codes/surrogate/app-iteration-predictor/common.h new file mode 100644 index 00000000..5f5e7b96 --- /dev/null +++ b/codes/surrogate/app-iteration-predictor/common.h @@ -0,0 +1,85 @@ +#ifndef CODES_SURROGATE_ITERATION_PREDICTOR_COMMON_H +#define CODES_SURROGATE_ITERATION_PREDICTOR_COMMON_H + +/** + * common.h -- common datatypes and functionality to all application iteration predictors + * -Elkin Cruz + * + * Copyright (c) 2025 Rensselaer Polytechnic Institute + */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Iteration application prediction machinery. Notice that any of these predictors have to know how many iterations to run in total, thus they need data about the number of steps the application will take. + */ + +enum NODE_TYPE { + NODE_TYPE_unassigned = 0, + NODE_TYPE_background_noise = 1, + NODE_TYPE_app = 2, +}; + +struct app_iter_node_config { + int app_id; + enum NODE_TYPE type; + union { + // To be used by NODE_TYPE_app only + int app_ending_iter; + }; +}; + +// This returns how much to skip ahead and when to restart +struct iteration_pred { + int resume_at_iter; + double restart_at; +}; + +enum FAST_FORWARD { + FAST_FORWARD_switching = 0, + FAST_FORWARD_restart, // Stop accumulating data (we gain nothing from switching to surrogate-mode) and restart at future point in time +}; +struct fast_forward_values { + enum FAST_FORWARD status; // Are we switching to surrogate-mode + // Only needed for "switching" and "restart" + double restarting_at; // Time at which we will have fully restarted (or expect to) +}; + + +// Model calls to predictor +typedef void (*init_pred_iter_f) (tw_lp * lp, int nw_id_in_pe, struct app_iter_node_config *); // Initializes the predictor (eg, average) +typedef void (*feed_pred_iter_f) (tw_lp * lp, int nw_id_in_pe, int iteration_id, double iteration_time); // Feeds last iteration time +typedef void (*end_pred_iter_f) (tw_lp * lp, int nw_id_in_pe, double time); // Tells the predictor that the application has stopped running +typedef struct iteration_pred (*predict_pred_iter_f) (tw_lp * lp, int nw_id_in_pe); // Get prediction +typedef void (*predict_pred_iter_rc_f) (tw_lp * lp, int nw_id_in_pe); // Reverse prediction (reverse state of predictor one prediction) +typedef bool (*have_we_hit_switch_f) (tw_lp * lp, int nw_id_in_pe, int iteration_id); // Are we ready to switch to a future iterationº +// Director calls to predictor module +typedef bool (*is_predictor_read_f) (void); // Checking if it is a good time to switch (enough data has been collected or we have received some notification of an application ending, forcing us to restart collecting data). This might trigger an MPI_Allreduce call, thus has to be called by all PEs! +typedef void (*reset_pred_iter_f) (void); // Resets the predictor (eg, average) +typedef struct fast_forward_values (*prepare_fast_forward_f) (void); // Checking if it is a good time to switch (enough data has been collected) + +// API that predictors have to comply with and +struct app_iteration_predictor { + struct { + init_pred_iter_f init; + feed_pred_iter_f feed; + end_pred_iter_f ended; + predict_pred_iter_f predict; + predict_pred_iter_rc_f predict_rc; + have_we_hit_switch_f have_we_hit_switch; + } model; + struct { + reset_pred_iter_f reset; + is_predictor_read_f is_predictor_ready; + prepare_fast_forward_f prepare_fast_forward_jump; + } director; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* end of include guard */ diff --git a/codes/surrogate/application-surrogate.h b/codes/surrogate/application-surrogate.h new file mode 100644 index 00000000..9fded3db --- /dev/null +++ b/codes/surrogate/application-surrogate.h @@ -0,0 +1,43 @@ +#ifndef CODES_SURROGATE_APP_SURROGATE_H +#define CODES_SURROGATE_APP_SURROGATE_H + +/** + * switch.h -- DIRECTOR FUNCTION in charge of switching back and forth from high-fidelity and surrogate modes for the application level + * Elkin Cruz + * + * Copyright (c) 2025 Rensselaer Polytechnic Institute + */ + +#include +#include "surrogate/app-iteration-predictor/common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum APP_DIRECTOR_OPTS { + APP_DIRECTOR_OPTS_every_n_gvt = 0, // Call director every `n` GVTs + APP_DIRECTOR_OPTS_call_every_ns, // Call director every X (virtual) nanoseconds +}; + +struct application_director_config { + enum APP_DIRECTOR_OPTS option; + union { + // To use when APP_DIRECTOR_OPTS_every_n_gvt + int every_n_gvt; + // To use when APP_DIRECTOR_OPTS_call_every_ns + double call_every_ns; + }; + bool use_network_surrogate; +}; + +// Main function responsible for switching between high-fidelity and (application iteration) surrogate +void application_director_configure(struct application_director_config *, struct app_iteration_predictor *); + +void application_director_finalize(void); + +#ifdef __cplusplus +} +#endif + +#endif /* end of include guard */ diff --git a/codes/surrogate/init.h b/codes/surrogate/init.h new file mode 100644 index 00000000..f095e29c --- /dev/null +++ b/codes/surrogate/init.h @@ -0,0 +1,58 @@ +#ifndef CODES_SURROGATE_INIT_H +#define CODES_SURROGATE_INIT_H + +/** + * init.h -- Config/initialization point + * Elkin Cruz + * + * Copyright (c) 2023 Rensselaer Polytechnic Institute + */ +#include "codes/surrogate/packet-latency-predictor/common.h" +#include "codes/surrogate/app-iteration-predictor/common.h" +#include "codes/surrogate/network-surrogate.h" + +// Basic level of debugging is 1. It should be always turned on +// because it tells us when a switch to or from surrogate-mode happened. +// It can be deactivated (set to 0) if it ends up being too obnoxious +// Level 0: don't show anything +// Level 1: show when surrogate-mode is activated and deactivated +// Level 2: level 1 and some information at each GVT +// Level 3: level 1 and show extended information at each GVT +#define DEBUG_DIRECTOR 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Variable definitions + */ + +// Time spent switching from high-fidelity to surrogate and viceversa +extern double surrogate_switching_time; +// Total time spent in surrogate mode (between switches) +extern double time_in_surrogate; +// Time at which we transitioned into surrogate (zero means that we are in high-fidelity) +extern double surrogate_time_last; + +void print_surrogate_stats(void); + +/** Loads surrogate configuration, including packet latency predictor. */ +bool network_surrogate_configure( + char const * const annotation, + struct network_surrogate_config * const config, + struct packet_latency_predictor ** pl_pred //!< pointer to save packet latency predictor. Caller does not need to free pointer +); + +void application_surrogate_configure( + int num_terminals_on_pe, + int num_apps, + struct app_iteration_predictor ** iter_pred //!< pointer to save application iteration predictor. No need to free pointer +); +void surrogates_finalize(void); + +#ifdef __cplusplus +} +#endif + +#endif /* end of include guard */ diff --git a/codes/surrogate/network-surrogate.h b/codes/surrogate/network-surrogate.h new file mode 100644 index 00000000..b4dae45c --- /dev/null +++ b/codes/surrogate/network-surrogate.h @@ -0,0 +1,71 @@ +#ifndef CODES_SURROGATE_NETWORK_SURROGATE_H +#define CODES_SURROGATE_NETWORK_SURROGATE_H + +/** + * switch.h -- DIRECTOR FUNCTION in charge of switching back and forth from high-fidelity and surrogate modes + * Elkin Cruz + * + * Copyright (c) 2023 Rensselaer Polytechnic Institute + */ + +#include +#include +#include "codes/codes_mapping.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Functions that director should have access to +typedef void (*switch_surrogate_f) (void); // Switches back and forth from surrogate mode as defined by network model (e.g, by dragonfly-dally.C) +typedef bool (*is_surrogate_on_f) (void); // Switches back and forth from surrogate mode as defined by network model (e.g, by dragonfly-dally.C) + +struct network_model_surrogate { + switch_surrogate_f switch_surrogate; // this function switches the model to and from surrogate-mode on a PE basis. It has to be called on all PEs to switch the entire simulation to its surrogate version + is_surrogate_on_f is_surrogate_on; // determines if the model has switched or not +}; + + +// Switches back and forth from surrogate mode as defined by network model +// (e.g, by dragonfly-dally.C) +// Parameters: `data` corresponds to the lp sub-state, lp is the lp pointer, and the array of events in queue (to be processed) +typedef void (*model_switch_f) (void * data, tw_lp * lp, tw_event **); +typedef bool (*model_ask_if_freeze_f) (tw_lp * lp, tw_event * event); // Determines whether the event should be "frozen" or should be allowed to run during surrogate-mode +typedef void (*model_check_event_f) (void * state, tw_lp * lp, tw_event * event); // Determines whether the event should be "frozen" or should be allowed to run during surrogate-mode + +struct lp_types_switch { + char lpname[MAX_NAME_LENGTH]; + bool trigger_idle_modelnet; // Trigger idle events for model-net (prevents a model to be stuck in a schedule loop if it is to process packets during surrogate-mode). If this is true and the lpname does not start with 'modelnet_', the behaviour is undefined + model_switch_f highdef_to_surrogate; + model_switch_f surrogate_to_highdef; + model_ask_if_freeze_f should_event_be_frozen; // true means event from LP type shouldn't be frozen + model_ask_if_freeze_f should_event_be_deleted; // true means event from LP type shouldn't be deleted + model_check_event_f check_event_in_queue; + model_switch_f reset_predictor; +}; + +struct switch_at_struct { + size_t current_i; + size_t total; + double * time_stampts; // list of precise timestamps at which to switch +}; + +struct network_surrogate_config { + struct network_model_surrogate model; //!< functionality needed by the director to switch the model back and forth from high-fidelity to surrogate + int total_terminals; //!< total number of terminals + size_t n_lp_types; + struct lp_types_switch lp_types[MAX_LP_TYPES]; +}; + +void network_director_configure(struct network_surrogate_config *, struct switch_at_struct * switch_network_at, bool freeze_network_on_switch); + +// Function for application director to use network freezing machinery +void surrogate_switch_network_model(tw_pe * pe, bool is_queue_empty); + +void network_director_finalize(void); + +#ifdef __cplusplus +} +#endif + +#endif /* end of include guard */ diff --git a/codes/surrogate/packet-latency-predictor/average.h b/codes/surrogate/packet-latency-predictor/average.h new file mode 100644 index 00000000..f793bfa3 --- /dev/null +++ b/codes/surrogate/packet-latency-predictor/average.h @@ -0,0 +1,27 @@ +#ifndef CODES_SURROGATE_LATENCY_PREDICTOR_AVERAGE_H +#define CODES_SURROGATE_LATENCY_PREDICTOR_AVERAGE_H + +/** + * average.h -- implements a strategy to determine how long will it take for a + * packet to arrive at its destination based on averaging the time that takes + * to send packets from source to destination terminals + * -Elkin Cruz + * + * Copyright (c) 2023 Rensselaer Polytechnic Institute + */ + +#include "codes/surrogate/packet-latency-predictor/common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern double ignore_until; + +struct packet_latency_predictor average_latency_predictor(int num_terminals); + +#ifdef __cplusplus +} +#endif + +#endif /* end of include guard */ diff --git a/codes/surrogate/packet-latency-predictor/common.h b/codes/surrogate/packet-latency-predictor/common.h new file mode 100644 index 00000000..3faa7bff --- /dev/null +++ b/codes/surrogate/packet-latency-predictor/common.h @@ -0,0 +1,59 @@ +#ifndef CODES_SURROGATE_LATENCY_PREDICTOR_COMMON_H +#define CODES_SURROGATE_LATENCY_PREDICTOR_COMMON_H + +/** + * common.h -- common datatypes and functionality to all latency predictors + * -Elkin Cruz + * + * Copyright (c) 2023 Rensselaer Polytechnic Institute + */ +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Terminal-to-terminal packet latency prediction machinery + */ + +// Packet latencies +struct packet_start { + uint64_t packet_ID; + tw_lpid dest_terminal_lpid; // ROSS id; LPID for terminal + unsigned int dfdally_dest_terminal_id; // number in [0, total terminals) + double travel_start_time; + double workload_injection_time; // this is when the workload passed down the event to model-net + double processing_packet_delay; // delay for this packet to be processed from previous packet in the queue + uint32_t packet_size; + bool is_there_another_pckt_in_queue; // is there another packet in queue +}; + +struct packet_end { + double travel_end_time; + double next_packet_delay; // Delay to start processing next packet +}; + +// Definition of functions needed to define a predictor +typedef void (*init_pred_lat_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id); // Initializes the predictor (eg, LSTM) +typedef void (*reset_pred_lat_f) (void * predictor_data, tw_lp * lp); +typedef void (*feed_pred_lat_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *, struct packet_end const *); // Feeds known latency for packet sent at `now` +typedef struct packet_end (*predict_pred_lat_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *); // Get prediction for packet sent to `destination` at `now` +typedef void (*predict_pred_lat_rc_f) (void * predictor_data, tw_lp * lp); // Reverse prediction (reverse state of predictor one prediction) + +// API for packet latency predictors +struct packet_latency_predictor { + init_pred_lat_f init; + reset_pred_lat_f reset; + feed_pred_lat_f feed; + predict_pred_lat_f predict; + predict_pred_lat_rc_f predict_rc; + size_t predictor_data_sz; // `predictor_data` size +}; + +#ifdef __cplusplus +} +#endif + +#endif /* end of include guard */ diff --git a/codes/surrogate/packet-latency-predictor/torch-jit.h b/codes/surrogate/packet-latency-predictor/torch-jit.h new file mode 100644 index 00000000..80e532a5 --- /dev/null +++ b/codes/surrogate/packet-latency-predictor/torch-jit.h @@ -0,0 +1,19 @@ +#ifndef CODES_SURROGATE_TORCHJIT_H +#define CODES_SURROGATE_TORCHJIT_H + +#include +#include "codes/surrogate/init.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void surrogate_torch_init(char const * dir); + +extern struct packet_latency_predictor torch_latency_predictor; + +#ifdef __cplusplus +} +#endif + +#endif /* end of include guard */ diff --git a/codes_config.h.in b/codes_config.h.cmake.in similarity index 61% rename from codes_config.h.in rename to codes_config.h.cmake.in index 82554a42..1a687a23 100644 --- a/codes_config.h.in +++ b/codes_config.h.cmake.in @@ -7,6 +7,8 @@ // swm #define SWM_DATAROOTDIR "${SWM_DATAROOTDIR}" +// union +#define UNION_DATADIR "${UNION_DATAROOTDIR}" // damaris @@ -15,4 +17,4 @@ // darshan -// cortex \ No newline at end of file +// cortex diff --git a/configure.ac b/configure.ac deleted file mode 100755 index 1a40e40d..00000000 --- a/configure.ac +++ /dev/null @@ -1,234 +0,0 @@ -# -*- Autoconf -*- -# Process this file with autoconf to produce a configure script. - -AC_PREREQ([2.67]) -AC_INIT([codes], [1.4.2], [http://trac.mcs.anl.gov/projects/codes/newticket],[],[http://www.mcs.anl.gov/projects/codes/]) -LT_INIT - -AC_CANONICAL_TARGET -AC_CANONICAL_SYSTEM -AC_CANONICAL_HOST - -AM_INIT_AUTOMAKE([foreign subdir-objects -Wall]) - -m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) - -AC_CONFIG_SRCDIR([doc/BUILD_STEPS]) -AC_CONFIG_HEADERS([codes_config.h]) - -AX_PROG_BISON([],[AC_MSG_ERROR([could not find required package bison])]) -AX_PROG_FLEX([],[AC_MSG_ERROR([could not find required package flex])]) -AC_SUBST([BISON]) -AC_SUBST([FLEX]) - -# Checks for programs. -AC_PROG_CC -AM_PROG_CC_C_O -AC_PROG_CXX -AC_PROG_CXXCPP -AC_PROG_RANLIB - -PKG_PROG_PKG_CONFIG - -if test -z "$PKG_CONFIG" ; then - AC_MSG_ERROR([pkg-config is required. Please install the pkg-config program on your PATH or set the PKG_CONFIG environment variable to the appropriate package.]) -fi - -# Check for C99 -AC_PROG_CC_C99 - -AC_REQUIRE_CPP - -# Checks for header files. -AC_HEADER_STDC -AC_CHECK_HEADERS([stdlib.h string.h unistd.h execinfo.h pthread.h malloc.h]) - -# Checks for typedefs, structures, and compiler characteristics. -AC_C_CONST -AC_C_INLINE -AC_TYPE_INT8_T -AC_TYPE_INT16_T -AC_TYPE_INT32_T -AC_TYPE_INT64_T -AC_TYPE_UINT8_T -AC_TYPE_UINT16_T -AC_TYPE_UINT32_T -AC_TYPE_UINT64_T -AC_TYPE_SIZE_T -AC_TYPE_SSIZE_T - -# Add warning flags by default -AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS -Wall"]) -AX_CHECK_COMPILE_FLAG([-Wextra], [CFLAGS="$CFLAGS -Wextra"]) -AX_CHECK_COMPILE_FLAG([-Wshadow], [CFLAGS="$CFLAGS -Wshadow"]) - -# Checks for library functions. -AC_CHECK_FUNCS([memset]) -AC_CHECK_LIB([pthread],[pthread_create],,[AC_MSG_ERROR([Could not find pthread_create!])]) -AC_CHECK_LIB([m],[sqrt],,[AC_MSG_ERROR([Could not find sqrt!])]) - - -AX_PROG_BISON_CLFEATURES([],[AC_MSG_WARN([Could not find bison])], -[bison_ok="yes"], [bison_ok="no"]) -AC_SUBST([bison_ok]) - -dnl Check to see if CC is an MPI compiler -AC_MSG_CHECKING(whether the mpicc compiler works) -AC_TRY_COMPILE([#include ], [int ret = MPI_Init(0, (void*)0)], - AC_MSG_RESULT(yes), - AC_MSG_RESULT(no) - AC_MSG_ERROR(CC doesnt appear to be a valid MPI compiler. See INSTALL document or try adding CC=mpicc to your configure command line.) -) - -# check for ROSS -PKG_CHECK_MODULES_STATIC([ROSS], [ross], [], - [AC_MSG_ERROR([Could not find working ross installation via pkg-config])]) - -#check for Damaris -AC_ARG_WITH([damaris],[AS_HELP_STRING([--with-damaris], - [build with ROSS-Damaris in situ analysis support])], - [use_rdamaris=yes],[use_rdamaris=no]) -if test "x${use_rdamaris}" = xyes ; then - PKG_CHECK_MODULES_STATIC([ROSS_Damaris], [ross-damaris], [], - [AC_MSG_ERROR([Could not find working ROSS-Damaris installation via pkg-config])]) -fi -AM_CONDITIONAL(USE_RDAMARIS, [test "x${use_rdamaris}" = xyes]) - -# check for enable-g -AC_ARG_ENABLE([g],[AS_HELP_STRING([--enable-g], - [Build with GDB symbols])], - [use_debug=yes],[use_debug=no]) -AM_CONDITIONAL(USE_DEBUG, [test "x${use_debug}" = xyes]) - -# check for Darshan -AC_ARG_WITH([darshan],[AS_HELP_STRING([--with-darshan], - [Build with the darshan workload support])], - [use_darshan=yes],[use_darshan=no]) -if test "x${use_darshan}" = xyes ; then - PKG_CHECK_MODULES_STATIC([DARSHAN], [darshan-util], [], - [AC_MSG_ERROR([Could not find working darshan installation via pkg-config])]) - DARSHAN_VER=`pkg-config --modversion darshan-util` - AX_COMPARE_VERSION([$DARSHAN_VER],[ge],[2.3],[], - [AC_MSG_ERROR([Found Darshan $DARSHAN_VER but 2.3 or greater is needed])]) -fi -AM_CONDITIONAL(USE_DARSHAN, [test "x${use_darshan}" = xyes]) - - -# check for Argobots -AC_ARG_WITH([online],[AS_HELP_STRING([--with-online@<:@=DIR@:>@], - [Build with the online workloads and argobots support])], - [use_online=yes],[use_online=no]) -if test "x${use_online}" != "xno" ; then - AM_CONDITIONAL(USE_ONLINE, true) - AX_BOOST_BASE([1.66]) - AX_CXX_COMPILE_STDCXX(11, noext, mandatory) - PKG_CHECK_MODULES_STATIC([ARGOBOTS], [argobots], [], - [AC_MSG_ERROR([Could not find working argobots installation via pkg-config])]) - PKG_CHECK_MODULES_STATIC([SWM], [swm], [], - [AC_MSG_ERROR([Could not find working swm installation via pkg-config])]) - PKG_CHECK_VAR([SWM_DATAROOTDIR], [swm], [datarootdir], [], - [AC_MSG_ERROR[Could not find shared directory in SWM]]) - AC_DEFINE_UNQUOTED([SWM_DATAROOTDIR], ["$SWM_DATAROOTDIR"], [if using json - data files]) -else - AM_CONDITIONAL(USE_ONLINE, false) -fi - -# check for Recorder -AM_CONDITIONAL(USE_RECORDER, true) -RECORDER_CPPFLAGS="-DUSE_RECORDER=1" -AC_SUBST(RECORDER_CPPFLAGS) - -#check for Dumpi -AC_ARG_WITH([dumpi],[AS_HELP_STRING([--with-dumpi@<:@=DIR@:>@], - [location of Dumpi installation])]) -if test "x${with_dumpi}" != "x" ; then - CFLAGS="-I${with_dumpi}/include" - LIBS="-L${with_dumpi}/lib/ -lundumpi" - AC_CHECK_LIB([undumpi], - [undumpi_open], [], [AC_MSG_ERROR(Could not find dumpi)]) - AM_CONDITIONAL(USE_DUMPI, true) - DUMPI_CFLAGS="-I${with_dumpi}/include" - DUMPI_LIBS="-L${with_dumpi}/lib/ -lundumpi" - AC_SUBST(DUMPI_LIBS) - AC_SUBST(DUMPI_CFLAGS) -else - AM_CONDITIONAL(USE_DUMPI, false) -fi - -# check for Cortex -AC_ARG_WITH([cortex],[AS_HELP_STRING([--with-cortex@<:@=DIR@:>@], - [location of Cortex installation])]) - -# check for Python -AC_ARG_WITH([python],[AS_HELP_STRING([--with-python@<:@=DIR@:>@], - [location of Python 2.7 installation])]) - -# check for Boost Python -AC_ARG_WITH([boost],[AS_HELP_STRING([--with-boost@<:@=DIR@:>@], - [location of Boost Python installation])]) - -if [ test "x${with_python}" != "x" -a "x${with_boost}" != "x"] ; then - AC_CHECK_FILES([${with_python}/lib/libpython2.7.so ${with_boost}/lib/libboost_python.a], - AM_CONDITIONAL(USE_PYTHON, true), - AC_MSG_ERROR(Could not find Python and/or Boost-Python libraries)) - PYTHON_CFLAGS="-I${with_python}/include -I${with_boost}/include" - PYTHON_LIBS="-L${with_boost}/lib -lboost_python -L${with_python}/lib/ -lpython2.7" - AC_SUBST(PYTHON_LIBS) - AC_SUBST(PYTHON_CFLAGS) -else - AM_CONDITIONAL(USE_PYTHON, false) -fi - -if test "x${with_cortex}" != "x" ; then - AC_CHECK_FILES([${with_cortex}/lib/libcortex.a ${with_cortex}/lib/libcortex-mpich.a], - AM_CONDITIONAL(USE_CORTEX, true), - AC_MSG_ERROR(Could not find Cortex libraries libcortex.a and/or libcortex-mpich.a)) - CORTEX_CFLAGS="-I${with_cortex}/include" - CORTEX_LIBS="-L${with_cortex}/lib/ -lcortex-mpich -lcortex -lstdc++" - AC_SUBST(CORTEX_LIBS) - AC_SUBST(CORTEX_CFLAGS) -else - AM_CONDITIONAL(USE_CORTEX, false) -fi - -if [ test "x${with_cortex}" != "x" -a "x${with_python}" != "x" -a "x${with_boost}" != "x"] ; then - AC_CHECK_FILE([${with_cortex}/lib/libcortex-python.a], - AM_CONDITIONAL(USE_CORTEX_PYTHON, true), - AC_MSG_ERROR(Could not find library libcortex-python.a)) - CORTEX_PYTHON_CFLAGS="-I${with_cortex}/include" - CORTEX_PYTHON_LIBS="-L${with_cortex}/lib/ -lcortex-python" - AC_SUBST(CORTEX_PYTHON_LIBS) - AC_SUBST(CORTEX_PYTHON_CFLAGS) -else - AM_CONDITIONAL(USE_CORTEX_PYTHON, false) -fi - -dnl ====================================================================== -dnl Try harder to be valgrind safe -dnl ====================================================================== -AC_ARG_ENABLE(valgrind-clean, - [AS_HELP_STRING( - [--enable-valgrind-clean], - [Try harder to avoid valgrind warnings]) - ]) - -AS_IF([test "x$enable_valgrind_clean" = "xyes"], [ - AC_DEFINE([VALGRIND], [1], [If enabling valgrind-clean build]) -]) - - -dnl AC_CONFIG_FILES([src/iokernellang/codesparser.y]) -if test "x$srcdir" != "x."; then - AC_CONFIG_LINKS([tests/conf:$srcdir/tests/conf]) -fi - -AC_CONFIG_FILES([Makefile]) - -AC_OUTPUT([maint/codes.pc]) -AC_OUTPUT([src/network-workloads/conf/dragonfly-custom/modelnet-test-dragonfly-1728-nodes.conf]) -AC_OUTPUT([src/network-workloads/conf/dragonfly-plus/modelnet-test-dragonfly-plus.conf]) -AC_OUTPUT([src/network-workloads/conf/dragonfly-dally/modelnet-test-dragonfly-dally.conf]) -AC_OUTPUT([doc/example/tutorial-ping-pong.conf]) - - diff --git a/doc/Doxyfile b/doc/Doxyfile new file mode 100644 index 00000000..fce3f842 --- /dev/null +++ b/doc/Doxyfile @@ -0,0 +1,376 @@ +# Doxyfile 1.9.1 + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +DOXYFILE_ENCODING = UTF-8 +PROJECT_NAME = "CODES" +PROJECT_NUMBER = +PROJECT_BRIEF = +PROJECT_LOGO = +OUTPUT_DIRECTORY = doc +CREATE_SUBDIRS = NO +ALLOW_UNICODE_NAMES = NO +OUTPUT_LANGUAGE = English +OUTPUT_TEXT_DIRECTION = None +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = NO +FULL_PATH_NAMES = YES +STRIP_FROM_PATH = +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = YES +JAVADOC_BANNER = NO +QT_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = NO +PYTHON_DOCSTRING = YES +INHERIT_DOCS = YES +SEPARATE_MEMBER_PAGES = NO +TAB_SIZE = 4 +ALIASES = +OPTIMIZE_OUTPUT_FOR_C = NO +OPTIMIZE_OUTPUT_JAVA = NO +OPTIMIZE_FOR_FORTRAN = NO +OPTIMIZE_OUTPUT_VHDL = NO +OPTIMIZE_OUTPUT_SLICE = NO +EXTENSION_MAPPING = +MARKDOWN_SUPPORT = YES +TOC_INCLUDE_HEADINGS = 5 +AUTOLINK_SUPPORT = YES +BUILTIN_STL_SUPPORT = NO +CPP_CLI_SUPPORT = NO +SIP_SUPPORT = NO +IDL_PROPERTY_SUPPORT = YES +DISTRIBUTE_GROUP_DOC = NO +GROUP_NESTED_COMPOUNDS = NO +SUBGROUPING = YES +INLINE_GROUPED_CLASSES = NO +INLINE_SIMPLE_STRUCTS = NO +TYPEDEF_HIDES_STRUCT = NO +LOOKUP_CACHE_SIZE = 0 +NUM_PROC_THREADS = 1 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +EXTRACT_ALL = YES +EXTRACT_PRIVATE = YES +EXTRACT_PRIV_VIRTUAL = YES +EXTRACT_PACKAGE = YES +EXTRACT_STATIC = YES +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_METHODS = YES +EXTRACT_ANON_NSPACES = YES +RESOLVE_UNNAMED_PARAMS = YES +HIDE_UNDOC_MEMBERS = NO +HIDE_UNDOC_CLASSES = NO +HIDE_FRIEND_COMPOUNDS = NO +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = YES +HIDE_SCOPE_NAMES = NO +HIDE_COMPOUND_REFERENCE= NO +SHOW_INCLUDE_FILES = YES +SHOW_GROUPED_MEMB_INC = NO +FORCE_LOCAL_INCLUDES = NO +INLINE_INFO = YES +SORT_MEMBER_DOCS = YES +SORT_BRIEF_DOCS = NO +SORT_MEMBERS_CTORS_1ST = NO +SORT_GROUP_NAMES = NO +SORT_BY_SCOPE_NAME = NO +STRICT_PROTO_MATCHING = NO +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = YES +SHOW_FILES = YES +SHOW_NAMESPACES = YES +FILE_VERSION_FILTER = +LAYOUT_FILE = +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_NO_PARAMDOC = NO +WARN_AS_ERROR = NO +WARN_FORMAT = "$file:$line: $text" +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +INPUT = src/ codes/ doc/example/ +INPUT_ENCODING = UTF-8 +FILE_PATTERNS = *.c \ + *.C \ + *.h \ + *.py +RECURSIVE = YES +EXCLUDE = +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = +EXCLUDE_SYMBOLS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = * +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = +INPUT_FILTER = +FILTER_PATTERNS = +FILTER_SOURCE_FILES = NO +FILTER_SOURCE_PATTERNS = +USE_MDFILE_AS_MAINPAGE = + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +SOURCE_BROWSER = YES +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = NO +REFERENCES_RELATION = NO +REFERENCES_LINK_SOURCE = YES +SOURCE_TOOLTIPS = YES +USE_HTAGS = NO +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +ALPHABETICAL_INDEX = YES +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_HEADER = +HTML_FOOTER = +HTML_STYLESHEET = +HTML_EXTRA_STYLESHEET = +HTML_EXTRA_FILES = +HTML_COLORSTYLE_HUE = 220 +HTML_COLORSTYLE_SAT = 100 +HTML_COLORSTYLE_GAMMA = 80 +HTML_TIMESTAMP = NO +HTML_DYNAMIC_MENUS = YES +HTML_DYNAMIC_SECTIONS = NO +HTML_INDEX_NUM_ENTRIES = 100 +GENERATE_DOCSET = NO +DOCSET_FEEDNAME = "Doxygen generated docs" +DOCSET_BUNDLE_ID = org.doxygen.Project +DOCSET_PUBLISHER_ID = org.doxygen.Publisher +DOCSET_PUBLISHER_NAME = Publisher +GENERATE_HTMLHELP = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +CHM_INDEX_ENCODING = +BINARY_TOC = NO +TOC_EXPAND = NO +GENERATE_QHP = NO +QCH_FILE = +QHP_NAMESPACE = org.doxygen.Project +QHP_VIRTUAL_FOLDER = doc +QHP_CUST_FILTER_NAME = +QHP_CUST_FILTER_ATTRS = +QHP_SECT_FILTER_ATTRS = +QHG_LOCATION = +GENERATE_ECLIPSEHELP = NO +ECLIPSE_DOC_ID = org.doxygen.Project +DISABLE_INDEX = NO +GENERATE_TREEVIEW = NO +ENUM_VALUES_PER_LINE = 4 +TREEVIEW_WIDTH = 250 +EXT_LINKS_IN_WINDOW = NO +HTML_FORMULA_FORMAT = png +FORMULA_FONTSIZE = 10 +FORMULA_TRANSPARENT = YES +FORMULA_MACROFILE = +USE_MATHJAX = NO +MATHJAX_FORMAT = HTML-CSS +MATHJAX_RELPATH = https://cdn.jsdelivr.net/npm/mathjax@2 +MATHJAX_EXTENSIONS = +MATHJAX_CODEFILE = +SEARCHENGINE = YES +SERVER_BASED_SEARCH = NO +EXTERNAL_SEARCH = NO +SEARCHENGINE_URL = +SEARCHDATA_FILE = searchdata.xml +EXTERNAL_SEARCH_ID = +EXTRA_SEARCH_MAPPINGS = + +#--------------------------------------------------------------------------- +# Configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +GENERATE_LATEX = NO +LATEX_OUTPUT = latex +LATEX_CMD_NAME = +MAKEINDEX_CMD_NAME = makeindex +LATEX_MAKEINDEX_CMD = makeindex +COMPACT_LATEX = NO +PAPER_TYPE = a4 +EXTRA_PACKAGES = +LATEX_HEADER = +LATEX_FOOTER = +LATEX_EXTRA_STYLESHEET = +LATEX_EXTRA_FILES = +PDF_HYPERLINKS = YES +USE_PDFLATEX = YES +LATEX_BATCHMODE = NO +LATEX_HIDE_INDICES = NO +LATEX_SOURCE_CODE = NO +LATEX_BIB_STYLE = plain +LATEX_TIMESTAMP = NO +LATEX_EMOJI_DIRECTORY = + +#--------------------------------------------------------------------------- +# Configuration options related to the RTF output +#--------------------------------------------------------------------------- + +GENERATE_RTF = NO +RTF_OUTPUT = rtf +COMPACT_RTF = NO +RTF_HYPERLINKS = NO +RTF_STYLESHEET_FILE = +RTF_EXTENSIONS_FILE = +RTF_SOURCE_CODE = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the man page output +#--------------------------------------------------------------------------- + +GENERATE_MAN = NO +MAN_OUTPUT = man +MAN_EXTENSION = .3 +MAN_SUBDIR = +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the XML output +#--------------------------------------------------------------------------- + +GENERATE_XML = NO +XML_OUTPUT = xml +XML_PROGRAMLISTING = YES +XML_NS_MEMB_FILE_SCOPE = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the DOCBOOK output +#--------------------------------------------------------------------------- + +GENERATE_DOCBOOK = NO +DOCBOOK_OUTPUT = docbook +DOCBOOK_PROGRAMLISTING = NO + +#--------------------------------------------------------------------------- +# Configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +GENERATE_PERLMOD = NO +PERLMOD_LATEX = NO +PERLMOD_PRETTY = YES +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = NO +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES = NO +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = +PREDEFINED = +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration options related to external references +#--------------------------------------------------------------------------- + +TAGFILES = +GENERATE_TAGFILE = +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +EXTERNAL_PAGES = YES + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +CLASS_DIAGRAMS = YES +DIA_PATH = +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = YES +DOT_NUM_THREADS = 0 +DOT_FONTNAME = Helvetica +DOT_FONTSIZE = 10 +DOT_FONTPATH = +CLASS_GRAPH = YES +COLLABORATION_GRAPH = YES +GROUP_GRAPHS = YES +UML_LOOK = NO +UML_LIMIT_NUM_FIELDS = 10 +DOT_UML_DETAILS = NO +DOT_WRAP_THRESHOLD = 17 +TEMPLATE_RELATIONS = NO +INCLUDE_GRAPH = YES +INCLUDED_BY_GRAPH = YES +CALL_GRAPH = NO +CALLER_GRAPH = NO +GRAPHICAL_HIERARCHY = YES +DIRECTORY_GRAPH = YES +DOT_IMAGE_FORMAT = png +INTERACTIVE_SVG = NO +DOT_PATH = +DOTFILE_DIRS = +MSCFILE_DIRS = +DIAFILE_DIRS = +PLANTUML_JAR_PATH = +PLANTUML_CFG_FILE = +PLANTUML_INCLUDE_PATH = +DOT_GRAPH_MAX_NODES = 50 +MAX_DOT_GRAPH_DEPTH = 0 +DOT_TRANSPARENT = NO +DOT_MULTI_TARGETS = NO +GENERATE_LEGEND = YES +DOT_CLEANUP = YES diff --git a/doc/example/CMakeLists.txt b/doc/example/CMakeLists.txt new file mode 100644 index 00000000..f665d234 --- /dev/null +++ b/doc/example/CMakeLists.txt @@ -0,0 +1,26 @@ +set(example-files + example + tutorial-synthetic-ping-pong + ) + +foreach(namefile ${example-files}) + add_executable(${namefile} ${namefile}.c) + target_link_libraries(${namefile} PUBLIC codes) +endforeach() + +# Saving default config files to run experiments with +configure_file(tutorial-ping-pong.conf.in tutorial-ping-pong.template.conf.in @ONLY) +configure_file(tutorial-ping-pong-surrogate.conf.in tutorial-ping-pong-surrogate.template.conf.in @ONLY) + +set(single_quote "'") +set(double_quote "\"") + +set(PACKET_SIZE "4096") +set(CHUNK_SIZE "64") +set(NETWORK_TREATMENT "freeze") +set(PACKET_LATENCY_TRACE_PATH "packet-latency-trace/") +set(IGNORE_UNTIL "200e4") +set(PREDICTOR_TYPE "average") +string(REPLACE ${single_quote} ${double_quote} SWITCH_TIMESTAMPS "'1000e4', '8900e4'") +configure_file(tutorial-ping-pong.conf.in tutorial-ping-pong.conf) +configure_file(tutorial-ping-pong-surrogate.conf.in tutorial-ping-pong-surrogate.conf) diff --git a/doc/example/example.conf b/doc/example/example.conf index 161ab626..5f739647 100644 --- a/doc/example/example.conf +++ b/doc/example/example.conf @@ -24,7 +24,7 @@ PARAMS # - message_size: ROSS expects you to upper bound your event message size. # Going over this size will crash or otherwise destroy your # simulation. - message_size="368"; + message_size="432"; # - pe_mem_factor: this is a multiplier to the event memory allocation that # ROSS does up front (multiplier is per-PE). Increase this # (or change the associated mem_factor variable in diff --git a/doc/example/tutorial-ping-pong-surrogate.conf.in b/doc/example/tutorial-ping-pong-surrogate.conf.in new file mode 100644 index 00000000..fd53f4d1 --- /dev/null +++ b/doc/example/tutorial-ping-pong-surrogate.conf.in @@ -0,0 +1,84 @@ +# Run this example with: +# > cd path-to-codes/build +# > mpirun -np 3 doc/example/tutorial-synthetic-ping-pong --synch=3 --num_messages=10000 --lp-io-dir=codes-output -- doc/example/tutorial-ping-pong-surrogate.conf +LPGROUPS +{ + MODELNET_GRP + { + repetitions="36"; +# name of this lp changes according to the model + nw-lp="2"; +# these lp names will be the same for dragonfly-custom model + modelnet_dragonfly_dally="2"; + modelnet_dragonfly_dally_router="1"; + } +} +PARAMS +{ +# packet size in the network + packet_size="${PACKET_SIZE}"; + modelnet_order=( "dragonfly_dally","dragonfly_dally_router" ); + # scheduler options + modelnet_scheduler="fcfs"; +# chunk size in the network (when chunk size = packet size, packets will not be +# divided into chunks) + chunk_size="${CHUNK_SIZE}"; +# modelnet_scheduler="round-robin"; +# number of routers in group + num_routers="4"; +# number of groups in the network + num_groups="9"; +# buffer size in bytes for local virtual channels + local_vc_size="16384"; +#buffer size in bytes for global virtual channels + global_vc_size="16384"; +#buffer size in bytes for compute node virtual channels + cn_vc_size="32768"; +#bandwidth in GiB/s for local channels + local_bandwidth="2.0"; +# bandwidth in GiB/s for global channels + global_bandwidth="2.0"; +# bandwidth in GiB/s for compute node-router channels + cn_bandwidth="2.0"; +# ROSS message size + message_size="440"; +# number of compute nodes connected to router, dictated by dragonfly config +# file + num_cns_per_router="2"; +# number of global channels per router + num_global_channels="2"; +# network config file for intra-group connections + intra-group-connections="@CMAKE_SOURCE_DIR@/src/network-workloads/conf/dragonfly-dally/dfdally-72-intra"; +# network config file for inter-group connections + inter-group-connections="@CMAKE_SOURCE_DIR@/src/network-workloads/conf/dragonfly-dally/dfdally-72-inter"; +# routing protocol to be used + routing="prog-adaptive"; +# folder path to store packet latency from terminal to terminal, if no value is given it won't save anything + save_packet_latency_path="${PACKET_LATENCY_TRACE_PATH}"; +# router buffer occupancy snapshots + router_buffer_snapshots=( ${BUFFER_SNAPSHOTS} ); +} +NETWORK_SURROGATE { + enable="1"; # Options: 0 or 1 + +# determines the director switching from surrogate to high-def simulation strategy + director_mode="at-fixed-virtual-times"; + +# director configuration for: director_mode == "at-fixed-virtual-times" +# timestamps at which to switch to surrogate-mode and back + #fixed_switch_timestamps=( "100e4", "8900e4" ); # the first switch happens at around 100 ping messages, the second at approx. 9900 pings + #fixed_switch_timestamps=( "1000e4", "8900e4" ); # the first switch happens at around 1000 ping messages, the second at approx. 9900 pings + fixed_switch_timestamps=( ${SWITCH_TIMESTAMPS} ); + +# latency predictor to use. Options: average, torch-jit + packet_latency_predictor="${PREDICTOR_TYPE}"; +# some workload models need some time to stabilize, a point where the network behaviour stabilizes. The predictor will ignore all packet latencies that arrive during this period + ignore_until="${IGNORE_UNTIL}"; + +# parameters for torch-jit latency predictor + torch_jit_mode="single-static-model-for-all-terminals"; + torch_jit_model_path="${TORCH_JIT_MODEL_PATH}"; + +# selecting network treatment on switching to surrogate. Options: frezee, nothing + network_treatment_on_switch="${NETWORK_TREATMENT}"; +} diff --git a/doc/example/tutorial-ping-pong.conf.in b/doc/example/tutorial-ping-pong.conf.in index d8757459..e8e2ce4e 100644 --- a/doc/example/tutorial-ping-pong.conf.in +++ b/doc/example/tutorial-ping-pong.conf.in @@ -13,13 +13,13 @@ LPGROUPS PARAMS { # packet size in the network - packet_size="4096"; + packet_size="${PACKET_SIZE}"; modelnet_order=( "dragonfly_dally","dragonfly_dally_router" ); # scheduler options modelnet_scheduler="fcfs"; # chunk size in the network (when chunk size = packet size, packets will not be # divided into chunks) - chunk_size="4096"; + chunk_size="${CHUNK_SIZE}"; # modelnet_scheduler="round-robin"; # number of routers in group num_routers="4"; @@ -38,16 +38,20 @@ PARAMS # bandwidth in GiB/s for compute node-router channels cn_bandwidth="2.0"; # ROSS message size - message_size="736"; + message_size="440"; # number of compute nodes connected to router, dictated by dragonfly config # file num_cns_per_router="2"; # number of global channels per router num_global_channels="2"; # network config file for intra-group connections - intra-group-connections="@abs_srcdir@/../../src/network-workloads/conf/dragonfly-dally/dfdally-72-intra"; + intra-group-connections="@CMAKE_SOURCE_DIR@/src/network-workloads/conf/dragonfly-dally/dfdally-72-intra"; # network config file for inter-group connections - inter-group-connections="@abs_srcdir@/../../src/network-workloads/conf/dragonfly-dally/dfdally-72-inter"; + inter-group-connections="@CMAKE_SOURCE_DIR@/src/network-workloads/conf/dragonfly-dally/dfdally-72-inter"; # routing protocol to be used routing="prog-adaptive"; +# router buffer occupancy snapshots + router_buffer_snapshots=( ${BUFFER_SNAPSHOTS} ); +# folder path to store packet latency from terminal to terminal, if no value is given it won't save anything + save_packet_latency_path="${PACKET_LATENCY_TRACE_PATH}"; } diff --git a/doc/example/tutorial-synthetic-ping-pong.c b/doc/example/tutorial-synthetic-ping-pong.c index 7f219aa3..e25ce7bc 100644 --- a/doc/example/tutorial-synthetic-ping-pong.c +++ b/doc/example/tutorial-synthetic-ping-pong.c @@ -1,14 +1,12 @@ /* * Copyright (C) 2019 Neil McGlohon + * Mantained/edited by Elkin Cruz (2022-2023) * See LICENSE notice in top-level directory */ #include "codes/model-net.h" -#include "codes/lp-io.h" -#include "codes/codes.h" #include "codes/codes_mapping.h" -#include "codes/configuration.h" -#include "codes/lp-type-lookup.h" +#include "codes/surrogate/init.h" // just needed for stats on surrogate-mode static int net_id = 0; @@ -21,6 +19,7 @@ static unsigned int lp_io_use_suffix = 0; static int do_lp_io = 0; static int num_msgs = 20; +static int num_initial_msgs = 1; typedef struct svr_msg svr_msg; typedef struct svr_state svr_state; @@ -34,8 +33,8 @@ static int group_index, lp_type_index, rep_id, offset; enum svr_event { KICKOFF = 1, - PING, - PONG + PING, + PONG }; struct svr_msg @@ -44,15 +43,17 @@ struct svr_msg int sender_id; //ID of the sender workload LP to know who to send a PONG message back to int payload_value; //Some value that we will encode as an example model_net_event_return event_rc; //helper to encode data relating to CODES rng usage + // Used for rollback + tw_stime previous_ts; }; struct svr_state { - int svr_id; /* the ID of this server */ + tw_lpid svr_id; /* the ID of this server */ int ping_msg_sent_count; /* PING messages sent */ int ping_msg_recvd_count; /* PING messages received */ int pong_msg_sent_count; /* PONG messages sent */ - int pong_msg_recvd_count; /* PONG messages received */ + int pong_msg_recvd_count; /* PONG messages received */ tw_stime start_ts; /* time that this LP started sending requests */ tw_stime end_ts; /* time that this LP ended sending requests */ int payload_sum; /* the running sum of all payloads received */ @@ -82,6 +83,7 @@ const tw_optdef app_opt [] = { TWOPT_GROUP("Model net synthetic traffic " ), TWOPT_UINT("num_messages", num_msgs, "Number of PING messages to be generated per terminal "), + TWOPT_UINT("num_initial_messages", num_initial_msgs, "Number of PING messages to be injected initially at the start (larger = more congestion)"), TWOPT_UINT("payload_sz",PAYLOAD_SZ, "size of the message being sent "), TWOPT_CHAR("lp-io-dir", lp_io_dir, "Where to place io output (unspecified -> no output"), TWOPT_UINT("lp-io-use-suffix", lp_io_use_suffix, "Whether to append uniq suffix to lp-io directory (default 0)"), @@ -117,20 +119,28 @@ static void svr_init(svr_state * s, tw_lp * lp) //the lookahead value is a value required for conservative mode execution to work, it prevents scheduling a new event within the lookahead window tw_stime kickoff_time = g_tw_lookahead + (tw_rand_unif(lp->rng) * .0001); - tw_event *e; - svr_msg *m; - e = tw_event_new(lp->gid, kickoff_time, lp); //ROSS method to create a new event - m = tw_event_data(e); //Gives you a pointer to the data encoded within event e - m->svr_event_type = KICKOFF; //Set the event type so we can know how to classify the event when received - tw_event_send(e); //ROSS method to send off the event e with the encoded data in m + for (int i = 1; i <= num_initial_msgs && i <= num_msgs; i++) { + tw_event *e; + svr_msg *m; + e = tw_event_new(lp->gid, kickoff_time * i, lp); //ROSS method to create a new event + m = tw_event_data(e); //Gives you a pointer to the data encoded within event e + m->svr_event_type = KICKOFF; //Set the event type so we can know how to classify the event when received + tw_event_send(e); //ROSS method to send off the event e with the encoded data in m + } + + s->start_ts = kickoff_time; // the time when we're starting this LP's work is when the first ping is generated } static void handle_kickoff_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) { - s->start_ts = tw_now(lp); //the time when we're starting this LP's work is NOW + (void) b; + // This bit is just for testing. It allows to send a PING event only to the first LP/server + //if (lp->gid != 0) { + // return; + //} + + svr_msg ping_msg; - svr_msg * ping_msg = malloc(sizeof(svr_msg)); //allocate memory for new message - tw_lpid local_dest = -1; //ID of a sever, relative to only servers tw_lpid global_dest = -1; //ID of a server LP relative to ALL LPs @@ -142,49 +152,51 @@ static void handle_kickoff_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * assert(local_dest < num_nodes); assert(local_dest != s->svr_id); - ping_msg->sender_id = s->svr_id; //encode our server ID into the new ping message - ping_msg->svr_event_type = PING; //set it to type PING - ping_msg->payload_value = tw_rand_integer(lp->rng, 1, 10); //encode a random payload value to it from [1,10] - + ping_msg.sender_id = s->svr_id; //encode our server ID into the new ping message + ping_msg.svr_event_type = PING; //set it to type PING + ping_msg.payload_value = tw_rand_integer(lp->rng, 1, 10); //encode a random payload value to it from [1,10] + codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server global_dest = codes_mapping_get_lpid_from_relative(local_dest, group_name, lp_type_name, NULL, 0); s->ping_msg_sent_count++; - m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)ping_msg, 0, NULL, lp); + m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&ping_msg, 0, NULL, lp); } static void handle_kickoff_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) { - tw_rand_reverse_unif(lp->rng); //reverse the rng call for getting a local_dest - tw_rand_reverse_unif(lp->rng); //reverse the rng call for creating a payload value; - - s->ping_msg_sent_count--; //undo the increment of the ping_msg_sent_count in the server state + (void) b; model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message + s->ping_msg_sent_count--; //undo the increment of the ping_msg_sent_count in the server state + tw_rand_reverse_unif(lp->rng); //reverse the rng call for creating a payload value; + tw_rand_reverse_unif(lp->rng); //reverse the rng call for getting a local_dest } static void handle_ping_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) { + (void) b; s->ping_msg_recvd_count++; //increment the counter for ping messages received int original_sender = m->sender_id; //this is the server we need to send a PONG message back to s->payload_sum += m->payload_value; //increment our running sum of payload values received - svr_msg * pong_msg = malloc(sizeof(svr_msg)); //allocate memory for new message - pong_msg->sender_id = s->svr_id; - pong_msg->svr_event_type = PONG; + svr_msg pong_msg; + pong_msg.sender_id = s->svr_id; + pong_msg.svr_event_type = PONG; // only ping messages contain a payload value - not every value in a message struct must be utilized by all messages! codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server tw_lpid global_dest = codes_mapping_get_lpid_from_relative(original_sender, group_name, lp_type_name, NULL, 0); s->pong_msg_sent_count++; - m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)pong_msg, 0, NULL, lp); + m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&pong_msg, 0, NULL, lp); } static void handle_ping_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) { - s->ping_msg_recvd_count--; //undo the increment of the counter for ping messages received - s->payload_sum -= m->payload_value; //undo the increment of the payload sum - + (void) b; model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message + s->pong_msg_sent_count--; + s->payload_sum -= m->payload_value; //undo the increment of the payload sum + s->ping_msg_recvd_count--; //undo the increment of the counter for ping messages received } static void handle_pong_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) @@ -197,46 +209,50 @@ static void handle_pong_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) return; } - //Now we need to send another ping message back to the sender of the pong - int pong_sender = m->sender_id; //this is the sender of the PONG message that we want to send another PING message to - - svr_msg * ping_msg = malloc(sizeof(svr_msg)); //allocate memory for new message - ping_msg->sender_id = s->svr_id; //encode our server ID into the new ping message - ping_msg->svr_event_type = PING; //set it to type PING - ping_msg->payload_value = tw_rand_integer(lp->rng, 1, 10); //encode a random payload value to it - + //Now we need to send another ping message, to someone new (just to spice the simulation) + tw_lpid send_to = tw_rand_integer(lp->rng, 1, num_nodes - 2); + send_to = (s->svr_id + send_to) % num_nodes; + + svr_msg ping_msg; + ping_msg.sender_id = s->svr_id; //encode our server ID into the new ping message + ping_msg.svr_event_type = PING; //set it to type PING + ping_msg.payload_value = tw_rand_integer(lp->rng, 1, 10); //encode a random payload value to it + codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server - tw_lpid global_dest = codes_mapping_get_lpid_from_relative(pong_sender, group_name, lp_type_name, NULL, 0); + tw_lpid global_dest = codes_mapping_get_lpid_from_relative(send_to, group_name, lp_type_name, NULL, 0); s->ping_msg_sent_count++; - m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)ping_msg, 0, NULL, lp); + m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&ping_msg, 0, NULL, lp); } static void handle_pong_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) { - s->pong_msg_recvd_count--; //undo the increment of the counter for ping messages received - - if (b->c1) //if we flipped the c1 flag in the forward event - return; //then we don't need to undo any rngs or state change + if (! b->c1) { //if we didn't flip the c1 flag in the forward event + model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message + s->ping_msg_sent_count--; + tw_rand_reverse_unif(lp->rng); //undo the rng for the new payload value + tw_rand_reverse_unif(lp->rng); //undo the rng for the new server to send a ping to + b->c1 = 0; + } - tw_rand_reverse_unif(lp->rng); //undo the rng for the new payload value - s->ping_msg_sent_count--; - model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message + s->pong_msg_recvd_count--; //undo the increment of the counter for ping messages received } static void svr_finalize(svr_state * s, tw_lp * lp) { - s->end_ts = tw_now(lp); - int total_msgs_sent = s->ping_msg_sent_count + s->pong_msg_sent_count; int total_msg_size_sent = PAYLOAD_SZ * total_msgs_sent; tw_stime time_in_seconds_sent = ns_to_s(s->end_ts - s->start_ts); - printf("Sever LPID:%llu svr_id:%d sent %d bytes in %f seconds, PINGs Sent: %d; PONGs Received: %d; PINGs Received: %d; PONGs Sent %d; Payload Sum: %d\n", (unsigned long long)lp->gid, s->svr_id, total_msg_size_sent, - time_in_seconds_sent, s->ping_msg_sent_count, s->pong_msg_recvd_count, s->ping_msg_recvd_count, s->pong_msg_sent_count, s->payload_sum); + printf("Sever LPID:%lu svr_id:%lu sent %d bytes in %f seconds, PINGs Sent: %d; PONGs Received: %d; PINGs Received: %d; PONGs Sent %d; Payload Sum: %d\n", + (unsigned long)lp->gid, (unsigned long)s->svr_id, total_msg_size_sent, + time_in_seconds_sent, s->ping_msg_sent_count, s->pong_msg_recvd_count, s->ping_msg_recvd_count, s->pong_msg_sent_count, s->payload_sum); } static void svr_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) { + m->previous_ts = s->end_ts; + s->end_ts = tw_now(lp); + switch (m->svr_event_type) { case KICKOFF: @@ -271,6 +287,8 @@ static void svr_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) tw_error(TW_LOC, "\n Invalid message type %d ", m->svr_event_type); break; } + + s->end_ts = m->previous_ts; } /* convert ns to seconds */ @@ -290,6 +308,10 @@ int main(int argc, char **argv) int num_nets; int *net_ids; + /* 1 day of simulation time is drastically huge but it will ensure + that the simulation doesn't try to end before all packets are delivered */ + g_tw_ts_end = s_to_ns(24 * 60 * 60); + tw_opt_add(app_opt); tw_init(&argc, &argv); @@ -316,10 +338,6 @@ int main(int argc, char **argv) net_id = *net_ids; free(net_ids); - /* 1 day of simulation time is drastically huge but it will ensure - that the simulation doesn't try to end before all packets are delivered */ - g_tw_ts_end = s_to_ns(24 * 60 * 60); - num_nodes = codes_mapping_get_lp_count("MODELNET_GRP", 0, "nw-lp", NULL, 1); //get the number of nodes so we can use this value during the simulation assert(num_nodes); @@ -337,6 +355,10 @@ int main(int argc, char **argv) } model_net_report_stats(net_id); + // Printing some stats + surrogates_finalize(); + print_surrogate_stats(); + tw_end(); return 0; -} \ No newline at end of file +} diff --git a/doc/workload/union_online_workload.txt b/doc/workload/union_online_workload.txt new file mode 100644 index 00000000..fcd9938f --- /dev/null +++ b/doc/workload/union_online_workload.txt @@ -0,0 +1,68 @@ +This document serves the following purposes: +* CODES updates to accommodate Union online simulations +* Installation tutorial +* Completed Experiments +* Known issues + += CODES updates + +The code modifications are started with comment text "Xin:" + +== Header file + +Added parameters for collecting router traffic data, including: +* codes/model-net.h +* codes/net/dragonfly-custom.h +* codes/net/dragonfly-dally.h + +== Makefile + +Added checking for Union installation in the autoconf configure script configure.ac +Added src/workload/methods/codes-conc-online-comm-wrkld.C to code base if compile with Union in Makefile.am + +== Union online workload + +We add a pluggable workload module "src/workload/methods/codes-conc-online-comm-wrkld.C" into CODES workload generator to hold the actual implementation of Union communication events, such that the messages from Union skeletons can be emitted as simulation events in CODES. + +== Router status collection for dragonfly custom and dragonfly dally + +Added supportive functions for collecting traffic data on router port on the following network models: +* dragonfly custom at src/networks/model-net/dragonfly-custom.C +* dragonfly dally at src/networks/model-net/dragonfly-dally.C + +== Updates in MPI replay + +Added Union online workload type in MPI workload replay at src/network-workloads/model-net-mpi-replay.c + +== Configurations + +We added the following items in the CODES configuration file for collecting router traffic information during simulation. + +* counting_bool - flag to enable/disable the collection of trouter traffic +* counting_start - the start time in microsecond for collecting traffic data +* counting_interval - the time window size in microsecond for collection traffic data +* counting_windows - the number of time windows for collecting traffic data +* num_apps - the number of applications in the simulation workload +* offset - supportive parameter for getting the application id of each packet + +An example configuration can be found at: https://github.com/SPEAR-IIT/Union/blob/master/test/df1d-72-adp.conf + += Installation tutorial + +Please follow the Readme at: https://github.com/SPEAR-IIT/Union to install Union and run test simulation of Union online workloads. + += Completed Experiments + +We have completed the following experiments with Union online workload simulation: +* simulate Conceptual skeletons alone +* simulate Conceptual and SWM skeletons simultaneously +* simulate Conceptual and SWM skeletons simultaneously with different synthetic traffic patterns + +The above experiments have been done on both dragonfly custom and dragonfly dally network models, with sequential mode and optimistic mode. + += Known Issues + +Currently the rendezvous protocol in MPI replay cannot work with Union online workloads. +The reverse function router_buf_update_rc() does not take care of the cross window reverses for aggregated busytime on port. + + diff --git a/m4/ax_check_compile_flag.m4 b/m4/ax_check_compile_flag.m4 deleted file mode 100644 index a7680d72..00000000 --- a/m4/ax_check_compile_flag.m4 +++ /dev/null @@ -1,75 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) -# -# DESCRIPTION -# -# Check whether the given FLAG works with the current language's compiler -# or gives an error. (Warnings, however, are ignored) -# -# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on -# success/failure. -# -# If EXTRA-FLAGS is defined, it is added to the current language's default -# flags (e.g. CFLAGS) when the check is done. The check is thus made with -# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to -# force the compiler to issue an error when a bad flag is given. -# -# INPUT gives an alternative input source to AC_COMPILE_IFELSE. -# -# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this -# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG. -# -# LICENSE -# -# Copyright (c) 2008 Guido U. Draheim -# Copyright (c) 2011 Maarten Bosmans -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 4 - -AC_DEFUN([AX_CHECK_COMPILE_FLAG], -[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF -AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl -AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ - ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS - _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" - AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], - [AS_VAR_SET(CACHEVAR,[yes])], - [AS_VAR_SET(CACHEVAR,[no])]) - _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) -AS_VAR_IF(CACHEVAR,yes, - [m4_default([$2], :)], - [m4_default([$3], :)]) -AS_VAR_POPDEF([CACHEVAR])dnl -])dnl AX_CHECK_COMPILE_FLAGS - diff --git a/m4/ax_compare_version.m4 b/m4/ax_compare_version.m4 deleted file mode 100644 index 74dc0fdd..00000000 --- a/m4/ax_compare_version.m4 +++ /dev/null @@ -1,177 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_compare_version.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_COMPARE_VERSION(VERSION_A, OP, VERSION_B, [ACTION-IF-TRUE], [ACTION-IF-FALSE]) -# -# DESCRIPTION -# -# This macro compares two version strings. Due to the various number of -# minor-version numbers that can exist, and the fact that string -# comparisons are not compatible with numeric comparisons, this is not -# necessarily trivial to do in a autoconf script. This macro makes doing -# these comparisons easy. -# -# The six basic comparisons are available, as well as checking equality -# limited to a certain number of minor-version levels. -# -# The operator OP determines what type of comparison to do, and can be one -# of: -# -# eq - equal (test A == B) -# ne - not equal (test A != B) -# le - less than or equal (test A <= B) -# ge - greater than or equal (test A >= B) -# lt - less than (test A < B) -# gt - greater than (test A > B) -# -# Additionally, the eq and ne operator can have a number after it to limit -# the test to that number of minor versions. -# -# eq0 - equal up to the length of the shorter version -# ne0 - not equal up to the length of the shorter version -# eqN - equal up to N sub-version levels -# neN - not equal up to N sub-version levels -# -# When the condition is true, shell commands ACTION-IF-TRUE are run, -# otherwise shell commands ACTION-IF-FALSE are run. The environment -# variable 'ax_compare_version' is always set to either 'true' or 'false' -# as well. -# -# Examples: -# -# AX_COMPARE_VERSION([3.15.7],[lt],[3.15.8]) -# AX_COMPARE_VERSION([3.15],[lt],[3.15.8]) -# -# would both be true. -# -# AX_COMPARE_VERSION([3.15.7],[eq],[3.15.8]) -# AX_COMPARE_VERSION([3.15],[gt],[3.15.8]) -# -# would both be false. -# -# AX_COMPARE_VERSION([3.15.7],[eq2],[3.15.8]) -# -# would be true because it is only comparing two minor versions. -# -# AX_COMPARE_VERSION([3.15.7],[eq0],[3.15]) -# -# would be true because it is only comparing the lesser number of minor -# versions of the two values. -# -# Note: The characters that separate the version numbers do not matter. An -# empty string is the same as version 0. OP is evaluated by autoconf, not -# configure, so must be a string, not a variable. -# -# The author would like to acknowledge Guido Draheim whose advice about -# the m4_case and m4_ifvaln functions make this macro only include the -# portions necessary to perform the specific comparison specified by the -# OP argument in the final configure script. -# -# LICENSE -# -# Copyright (c) 2008 Tim Toolan -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 11 - -dnl ######################################################################### -AC_DEFUN([AX_COMPARE_VERSION], [ - AC_REQUIRE([AC_PROG_AWK]) - - # Used to indicate true or false condition - ax_compare_version=false - - # Convert the two version strings to be compared into a format that - # allows a simple string comparison. The end result is that a version - # string of the form 1.12.5-r617 will be converted to the form - # 0001001200050617. In other words, each number is zero padded to four - # digits, and non digits are removed. - AS_VAR_PUSHDEF([A],[ax_compare_version_A]) - A=`echo "$1" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \ - -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \ - -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \ - -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \ - -e 's/[[^0-9]]//g'` - - AS_VAR_PUSHDEF([B],[ax_compare_version_B]) - B=`echo "$3" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \ - -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \ - -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \ - -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \ - -e 's/[[^0-9]]//g'` - - dnl # In the case of le, ge, lt, and gt, the strings are sorted as necessary - dnl # then the first line is used to determine if the condition is true. - dnl # The sed right after the echo is to remove any indented white space. - m4_case(m4_tolower($2), - [lt],[ - ax_compare_version=`echo "x$A -x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/false/;s/x${B}/true/;1q"` - ], - [gt],[ - ax_compare_version=`echo "x$A -x$B" | sed 's/^ *//' | sort | sed "s/x${A}/false/;s/x${B}/true/;1q"` - ], - [le],[ - ax_compare_version=`echo "x$A -x$B" | sed 's/^ *//' | sort | sed "s/x${A}/true/;s/x${B}/false/;1q"` - ], - [ge],[ - ax_compare_version=`echo "x$A -x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/true/;s/x${B}/false/;1q"` - ],[ - dnl Split the operator from the subversion count if present. - m4_bmatch(m4_substr($2,2), - [0],[ - # A count of zero means use the length of the shorter version. - # Determine the number of characters in A and B. - ax_compare_version_len_A=`echo "$A" | $AWK '{print(length)}'` - ax_compare_version_len_B=`echo "$B" | $AWK '{print(length)}'` - - # Set A to no more than B's length and B to no more than A's length. - A=`echo "$A" | sed "s/\(.\{$ax_compare_version_len_B\}\).*/\1/"` - B=`echo "$B" | sed "s/\(.\{$ax_compare_version_len_A\}\).*/\1/"` - ], - [[0-9]+],[ - # A count greater than zero means use only that many subversions - A=`echo "$A" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"` - B=`echo "$B" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"` - ], - [.+],[ - AC_WARNING( - [illegal OP numeric parameter: $2]) - ],[]) - - # Pad zeros at end of numbers to make same length. - ax_compare_version_tmp_A="$A`echo $B | sed 's/./0/g'`" - B="$B`echo $A | sed 's/./0/g'`" - A="$ax_compare_version_tmp_A" - - # Check for equality or inequality as necessary. - m4_case(m4_tolower(m4_substr($2,0,2)), - [eq],[ - test "x$A" = "x$B" && ax_compare_version=true - ], - [ne],[ - test "x$A" != "x$B" && ax_compare_version=true - ],[ - AC_WARNING([illegal OP parameter: $2]) - ]) - ]) - - AS_VAR_POPDEF([A])dnl - AS_VAR_POPDEF([B])dnl - - dnl # Execute ACTION-IF-TRUE / ACTION-IF-FALSE. - if test "$ax_compare_version" = "true" ; then - m4_ifvaln([$4],[$4],[:])dnl - m4_ifvaln([$5],[else $5])dnl - fi -]) dnl AX_COMPARE_VERSION diff --git a/m4/ax_cxx_compile_stdcxx.m4 b/m4/ax_cxx_compile_stdcxx.m4 deleted file mode 100644 index 0b6cb3a7..00000000 --- a/m4/ax_cxx_compile_stdcxx.m4 +++ /dev/null @@ -1,972 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_CXX_COMPILE_STDCXX(VERSION, [ext|noext], [mandatory|optional]) -# -# DESCRIPTION -# -# Check for baseline language coverage in the compiler for the specified -# version of the C++ standard. If necessary, add switches to CXX and -# CXXCPP to enable support. VERSION may be '11' (for the C++11 standard) -# or '14' (for the C++14 standard). -# -# The second argument, if specified, indicates whether you insist on an -# extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g. -# -std=c++11). If neither is specified, you get whatever works, with -# preference for an extended mode. -# -# The third argument, if specified 'mandatory' or if left unspecified, -# indicates that baseline support for the specified C++ standard is -# required and that the macro should error out if no mode with that -# support is found. If specified 'optional', then configuration proceeds -# regardless, after defining HAVE_CXX${VERSION} if and only if a -# supporting mode is found. -# -# LICENSE -# -# Copyright (c) 2008 Benjamin Kosnik -# Copyright (c) 2012 Zack Weinberg -# Copyright (c) 2013 Roy Stogner -# Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov -# Copyright (c) 2015 Paul Norman -# Copyright (c) 2015 Moritz Klammler -# Copyright (c) 2016, 2018 Krzesimir Nowak -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 9 - -dnl This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro -dnl (serial version number 13). - -AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl - m4_if([$1], [11], [ax_cxx_compile_alternatives="11 0x"], - [$1], [14], [ax_cxx_compile_alternatives="14 1y"], - [$1], [17], [ax_cxx_compile_alternatives="17 1z"], - [m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl - m4_if([$2], [], [], - [$2], [ext], [], - [$2], [noext], [], - [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX])])dnl - m4_if([$3], [], [ax_cxx_compile_cxx$1_required=true], - [$3], [mandatory], [ax_cxx_compile_cxx$1_required=true], - [$3], [optional], [ax_cxx_compile_cxx$1_required=false], - [m4_fatal([invalid third argument `$3' to AX_CXX_COMPILE_STDCXX])]) - AC_LANG_PUSH([C++])dnl - ac_success=no - - m4_if([$2], [noext], [], [dnl - if test x$ac_success = xno; then - for alternative in ${ax_cxx_compile_alternatives}; do - switch="-std=gnu++${alternative}" - cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) - AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, - $cachevar, - [ac_save_CXX="$CXX" - CXX="$CXX $switch" - AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], - [eval $cachevar=yes], - [eval $cachevar=no]) - CXX="$ac_save_CXX"]) - if eval test x\$$cachevar = xyes; then - CXX="$CXX $switch" - if test -n "$CXXCPP" ; then - CXXCPP="$CXXCPP $switch" - fi - ac_success=yes - break - fi - done - fi]) - - m4_if([$2], [ext], [], [dnl - if test x$ac_success = xno; then - dnl HP's aCC needs +std=c++11 according to: - dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf - dnl Cray's crayCC needs "-h std=c++11" - for alternative in ${ax_cxx_compile_alternatives}; do - for switch in -std=c++${alternative} +std=c++${alternative} "-h std=c++${alternative}"; do - cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) - AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, - $cachevar, - [ac_save_CXX="$CXX" - CXX="$CXX $switch" - AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], - [eval $cachevar=yes], - [eval $cachevar=no]) - CXX="$ac_save_CXX"]) - if eval test x\$$cachevar = xyes; then - CXX="$CXX $switch" - if test -n "$CXXCPP" ; then - CXXCPP="$CXXCPP $switch" - fi - ac_success=yes - break - fi - done - if test x$ac_success = xyes; then - break - fi - done - fi]) - AC_LANG_POP([C++]) - if test x$ax_cxx_compile_cxx$1_required = xtrue; then - if test x$ac_success = xno; then - AC_MSG_ERROR([*** A compiler with support for C++$1 language features is required.]) - fi - fi - if test x$ac_success = xno; then - HAVE_CXX$1=0 - AC_MSG_NOTICE([No compiler with C++$1 support was found]) - else - HAVE_CXX$1=1 - AC_DEFINE(HAVE_CXX$1,1, - [define if the compiler supports basic C++$1 syntax]) - fi - AC_SUBST(HAVE_CXX$1) -]) - - -dnl Test body for checking C++11 support - -m4_define([_AX_CXX_COMPILE_STDCXX_testbody_11], - _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 -) - - -dnl Test body for checking C++14 support - -m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14], - _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 - _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 -) - -m4_define([_AX_CXX_COMPILE_STDCXX_testbody_17], - _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 - _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 - _AX_CXX_COMPILE_STDCXX_testbody_new_in_17 -) - -dnl Tests for new features in C++11 - -m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[ - -// If the compiler admits that it is not ready for C++11, why torture it? -// Hopefully, this will speed up the test. - -#ifndef __cplusplus - -#error "This is not a C++ compiler" - -#elif __cplusplus < 201103L - -#error "This is not a C++11 compiler" - -#else - -namespace cxx11 -{ - - namespace test_static_assert - { - - template - struct check - { - static_assert(sizeof(int) <= sizeof(T), "not big enough"); - }; - - } - - namespace test_final_override - { - - struct Base - { - virtual void f() {} - }; - - struct Derived : public Base - { - virtual void f() override {} - }; - - } - - namespace test_double_right_angle_brackets - { - - template < typename T > - struct check {}; - - typedef check single_type; - typedef check> double_type; - typedef check>> triple_type; - typedef check>>> quadruple_type; - - } - - namespace test_decltype - { - - int - f() - { - int a = 1; - decltype(a) b = 2; - return a + b; - } - - } - - namespace test_type_deduction - { - - template < typename T1, typename T2 > - struct is_same - { - static const bool value = false; - }; - - template < typename T > - struct is_same - { - static const bool value = true; - }; - - template < typename T1, typename T2 > - auto - add(T1 a1, T2 a2) -> decltype(a1 + a2) - { - return a1 + a2; - } - - int - test(const int c, volatile int v) - { - static_assert(is_same::value == true, ""); - static_assert(is_same::value == false, ""); - static_assert(is_same::value == false, ""); - auto ac = c; - auto av = v; - auto sumi = ac + av + 'x'; - auto sumf = ac + av + 1.0; - static_assert(is_same::value == true, ""); - static_assert(is_same::value == true, ""); - static_assert(is_same::value == true, ""); - static_assert(is_same::value == false, ""); - static_assert(is_same::value == true, ""); - return (sumf > 0.0) ? sumi : add(c, v); - } - - } - - namespace test_noexcept - { - - int f() { return 0; } - int g() noexcept { return 0; } - - static_assert(noexcept(f()) == false, ""); - static_assert(noexcept(g()) == true, ""); - - } - - namespace test_constexpr - { - - template < typename CharT > - unsigned long constexpr - strlen_c_r(const CharT *const s, const unsigned long acc) noexcept - { - return *s ? strlen_c_r(s + 1, acc + 1) : acc; - } - - template < typename CharT > - unsigned long constexpr - strlen_c(const CharT *const s) noexcept - { - return strlen_c_r(s, 0UL); - } - - static_assert(strlen_c("") == 0UL, ""); - static_assert(strlen_c("1") == 1UL, ""); - static_assert(strlen_c("example") == 7UL, ""); - static_assert(strlen_c("another\0example") == 7UL, ""); - - } - - namespace test_rvalue_references - { - - template < int N > - struct answer - { - static constexpr int value = N; - }; - - answer<1> f(int&) { return answer<1>(); } - answer<2> f(const int&) { return answer<2>(); } - answer<3> f(int&&) { return answer<3>(); } - - void - test() - { - int i = 0; - const int c = 0; - static_assert(decltype(f(i))::value == 1, ""); - static_assert(decltype(f(c))::value == 2, ""); - static_assert(decltype(f(0))::value == 3, ""); - } - - } - - namespace test_uniform_initialization - { - - struct test - { - static const int zero {}; - static const int one {1}; - }; - - static_assert(test::zero == 0, ""); - static_assert(test::one == 1, ""); - - } - - namespace test_lambdas - { - - void - test1() - { - auto lambda1 = [](){}; - auto lambda2 = lambda1; - lambda1(); - lambda2(); - } - - int - test2() - { - auto a = [](int i, int j){ return i + j; }(1, 2); - auto b = []() -> int { return '0'; }(); - auto c = [=](){ return a + b; }(); - auto d = [&](){ return c; }(); - auto e = [a, &b](int x) mutable { - const auto identity = [](int y){ return y; }; - for (auto i = 0; i < a; ++i) - a += b--; - return x + identity(a + b); - }(0); - return a + b + c + d + e; - } - - int - test3() - { - const auto nullary = [](){ return 0; }; - const auto unary = [](int x){ return x; }; - using nullary_t = decltype(nullary); - using unary_t = decltype(unary); - const auto higher1st = [](nullary_t f){ return f(); }; - const auto higher2nd = [unary](nullary_t f1){ - return [unary, f1](unary_t f2){ return f2(unary(f1())); }; - }; - return higher1st(nullary) + higher2nd(nullary)(unary); - } - - } - - namespace test_variadic_templates - { - - template - struct sum; - - template - struct sum - { - static constexpr auto value = N0 + sum::value; - }; - - template <> - struct sum<> - { - static constexpr auto value = 0; - }; - - static_assert(sum<>::value == 0, ""); - static_assert(sum<1>::value == 1, ""); - static_assert(sum<23>::value == 23, ""); - static_assert(sum<1, 2>::value == 3, ""); - static_assert(sum<5, 5, 11>::value == 21, ""); - static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, ""); - - } - - // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae - // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function - // because of this. - namespace test_template_alias_sfinae - { - - struct foo {}; - - template - using member = typename T::member_type; - - template - void func(...) {} - - template - void func(member*) {} - - void test(); - - void test() { func(0); } - - } - -} // namespace cxx11 - -#endif // __cplusplus >= 201103L - -]]) - - -dnl Tests for new features in C++14 - -m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_14], [[ - -// If the compiler admits that it is not ready for C++14, why torture it? -// Hopefully, this will speed up the test. - -#ifndef __cplusplus - -#error "This is not a C++ compiler" - -#elif __cplusplus < 201402L - -#error "This is not a C++14 compiler" - -#else - -namespace cxx14 -{ - - namespace test_polymorphic_lambdas - { - - int - test() - { - const auto lambda = [](auto&&... args){ - const auto istiny = [](auto x){ - return (sizeof(x) == 1UL) ? 1 : 0; - }; - const int aretiny[] = { istiny(args)... }; - return aretiny[0]; - }; - return lambda(1, 1L, 1.0f, '1'); - } - - } - - namespace test_binary_literals - { - - constexpr auto ivii = 0b0000000000101010; - static_assert(ivii == 42, "wrong value"); - - } - - namespace test_generalized_constexpr - { - - template < typename CharT > - constexpr unsigned long - strlen_c(const CharT *const s) noexcept - { - auto length = 0UL; - for (auto p = s; *p; ++p) - ++length; - return length; - } - - static_assert(strlen_c("") == 0UL, ""); - static_assert(strlen_c("x") == 1UL, ""); - static_assert(strlen_c("test") == 4UL, ""); - static_assert(strlen_c("another\0test") == 7UL, ""); - - } - - namespace test_lambda_init_capture - { - - int - test() - { - auto x = 0; - const auto lambda1 = [a = x](int b){ return a + b; }; - const auto lambda2 = [a = lambda1(x)](){ return a; }; - return lambda2(); - } - - } - - namespace test_digit_separators - { - - constexpr auto ten_million = 100'000'000; - static_assert(ten_million == 100000000, ""); - - } - - namespace test_return_type_deduction - { - - auto f(int& x) { return x; } - decltype(auto) g(int& x) { return x; } - - template < typename T1, typename T2 > - struct is_same - { - static constexpr auto value = false; - }; - - template < typename T > - struct is_same - { - static constexpr auto value = true; - }; - - int - test() - { - auto x = 0; - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - return x; - } - - } - -} // namespace cxx14 - -#endif // __cplusplus >= 201402L - -]]) - - -dnl Tests for new features in C++17 - -m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_17], [[ - -// If the compiler admits that it is not ready for C++17, why torture it? -// Hopefully, this will speed up the test. - -#ifndef __cplusplus - -#error "This is not a C++ compiler" - -#elif __cplusplus <= 201402L - -#error "This is not a C++17 compiler" - -#else - -#if defined(__clang__) - #define REALLY_CLANG -#else - #if defined(__GNUC__) - #define REALLY_GCC - #endif -#endif - -#include -#include -#include - -namespace cxx17 -{ - -#if !defined(REALLY_CLANG) - namespace test_constexpr_lambdas - { - - // TODO: test it with clang++ from git - - constexpr int foo = [](){return 42;}(); - - } -#endif // !defined(REALLY_CLANG) - - namespace test::nested_namespace::definitions - { - - } - - namespace test_fold_expression - { - - template - int multiply(Args... args) - { - return (args * ... * 1); - } - - template - bool all(Args... args) - { - return (args && ...); - } - - } - - namespace test_extended_static_assert - { - - static_assert (true); - - } - - namespace test_auto_brace_init_list - { - - auto foo = {5}; - auto bar {5}; - - static_assert(std::is_same, decltype(foo)>::value); - static_assert(std::is_same::value); - } - - namespace test_typename_in_template_template_parameter - { - - template typename X> struct D; - - } - - namespace test_fallthrough_nodiscard_maybe_unused_attributes - { - - int f1() - { - return 42; - } - - [[nodiscard]] int f2() - { - [[maybe_unused]] auto unused = f1(); - - switch (f1()) - { - case 17: - f1(); - [[fallthrough]]; - case 42: - f1(); - } - return f1(); - } - - } - - namespace test_extended_aggregate_initialization - { - - struct base1 - { - int b1, b2 = 42; - }; - - struct base2 - { - base2() { - b3 = 42; - } - int b3; - }; - - struct derived : base1, base2 - { - int d; - }; - - derived d1 {{1, 2}, {}, 4}; // full initialization - derived d2 {{}, {}, 4}; // value-initialized bases - - } - - namespace test_general_range_based_for_loop - { - - struct iter - { - int i; - - int& operator* () - { - return i; - } - - const int& operator* () const - { - return i; - } - - iter& operator++() - { - ++i; - return *this; - } - }; - - struct sentinel - { - int i; - }; - - bool operator== (const iter& i, const sentinel& s) - { - return i.i == s.i; - } - - bool operator!= (const iter& i, const sentinel& s) - { - return !(i == s); - } - - struct range - { - iter begin() const - { - return {0}; - } - - sentinel end() const - { - return {5}; - } - }; - - void f() - { - range r {}; - - for (auto i : r) - { - [[maybe_unused]] auto v = i; - } - } - - } - - namespace test_lambda_capture_asterisk_this_by_value - { - - struct t - { - int i; - int foo() - { - return [*this]() - { - return i; - }(); - } - }; - - } - - namespace test_enum_class_construction - { - - enum class byte : unsigned char - {}; - - byte foo {42}; - - } - - namespace test_constexpr_if - { - - template - int f () - { - if constexpr(cond) - { - return 13; - } - else - { - return 42; - } - } - - } - - namespace test_selection_statement_with_initializer - { - - int f() - { - return 13; - } - - int f2() - { - if (auto i = f(); i > 0) - { - return 3; - } - - switch (auto i = f(); i + 4) - { - case 17: - return 2; - - default: - return 1; - } - } - - } - -#if !defined(REALLY_CLANG) - namespace test_template_argument_deduction_for_class_templates - { - - // TODO: test it with clang++ from git - - template - struct pair - { - pair (T1 p1, T2 p2) - : m1 {p1}, - m2 {p2} - {} - - T1 m1; - T2 m2; - }; - - void f() - { - [[maybe_unused]] auto p = pair{13, 42u}; - } - - } -#endif // !defined(REALLY_CLANG) - - namespace test_non_type_auto_template_parameters - { - - template - struct B - {}; - - B<5> b1; - B<'a'> b2; - - } - -#if !defined(REALLY_CLANG) - namespace test_structured_bindings - { - - // TODO: test it with clang++ from git - - int arr[2] = { 1, 2 }; - std::pair pr = { 1, 2 }; - - auto f1() -> int(&)[2] - { - return arr; - } - - auto f2() -> std::pair& - { - return pr; - } - - struct S - { - int x1 : 2; - volatile double y1; - }; - - S f3() - { - return {}; - } - - auto [ x1, y1 ] = f1(); - auto& [ xr1, yr1 ] = f1(); - auto [ x2, y2 ] = f2(); - auto& [ xr2, yr2 ] = f2(); - const auto [ x3, y3 ] = f3(); - - } -#endif // !defined(REALLY_CLANG) - -#if !defined(REALLY_CLANG) - namespace test_exception_spec_type_system - { - - // TODO: test it with clang++ from git - - struct Good {}; - struct Bad {}; - - void g1() noexcept; - void g2(); - - template - Bad - f(T*, T*); - - template - Good - f(T1*, T2*); - - static_assert (std::is_same_v); - - } -#endif // !defined(REALLY_CLANG) - - namespace test_inline_variables - { - - template void f(T) - {} - - template inline T g(T) - { - return T{}; - } - - template<> inline void f<>(int) - {} - - template<> int g<>(int) - { - return 5; - } - - } - -} // namespace cxx17 - -#endif // __cplusplus <= 201402L - -]]) diff --git a/m4/ax_prog_bison.m4 b/m4/ax_prog_bison.m4 deleted file mode 100755 index aa3bb112..00000000 --- a/m4/ax_prog_bison.m4 +++ /dev/null @@ -1,68 +0,0 @@ -# =========================================================================== -# http://www.nongnu.org/autoconf-archive/ax_prog_bison.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_PROG_BISON(ACTION-IF-TRUE,ACTION-IF-FALSE) -# -# DESCRIPTION -# -# Check whether bison is the parser generator. Run ACTION-IF-TRUE if -# successful, ACTION-IF-FALSE otherwise -# -# LICENSE -# -# Copyright (c) 2009 Francesco Salvestrini -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation; either version 2 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -AC_DEFUN([AX_PROG_BISON], [ - AC_REQUIRE([AC_PROG_YACC]) - AC_REQUIRE([AC_PROG_SED]) - - AC_CACHE_CHECK([if bison is the parser generator],[ax_cv_prog_bison],[ - AS_IF([test "`echo \"$YACC\" | $SED 's,^.*\(bison\).*$,\1,'`" = "bison" ],[ - ax_cv_prog_bison=yes - ],[ - ax_cv_prog_bison=no - ]) - ]) - - AC_DEFINE([HAVE_YACC_OLD_PUSH],[0],[If old-style push parser syntax is supported by ${YACC}]) - AM_CONDITIONAL([HAVE_YACC_OLD_PUSH],[test "x${HAVE_YACC_OLD_PUSH}" == "x1"]) - AC_DEFINE([HAVE_YACC_OLD_PURE],[0],[If old-style pure reentrant parser syntax is supported by ${YACC}]) - AM_CONDITIONAL([HAVE_YACC_OLD_PURE],[test "x${HAVE_YACC_OLD_PURE}" == "x1"]) - - AS_IF([test "$ax_cv_prog_bison" = yes],[ - : - $1 - ],[ - : - $2 - ]) -]) diff --git a/m4/ax_prog_bison_clfeatures.m4 b/m4/ax_prog_bison_clfeatures.m4 deleted file mode 100755 index 46e82665..00000000 --- a/m4/ax_prog_bison_clfeatures.m4 +++ /dev/null @@ -1,137 +0,0 @@ -AC_DEFUN([AX_PROG_BISON_CLFEATURES], [ - AC_REQUIRE([AC_PROG_YACC]) - AC_REQUIRE([AC_PROG_SED]) - - AC_CACHE_CHECK([if bison is the parser generator],[ax_cv_prog_bison],[ - AS_IF([test "`echo \"$YACC\" | $SED 's,^.*\(bison\).*$,\1,'`" = "bison" ],[ - ax_cv_prog_bison=yes - ],[ - ax_cv_prog_bison=no - ]) - ]) - -cat > conftest.y < /dev/null 2>&1 && eval "$ac_compile_yacc" -then - AC_SUBST([CODES_PURE_PARSER_DEFINES], ["%pure-parser"]) - AC_MSG_RESULT([old-style]) - $3 -else - -cat > conftest.y < /dev/null 2>&1 && eval "$ac_compile_yacc" - then - AC_SUBST([CODES_PURE_PARSER_DEFINES], ["%define api.pure"]) - AC_MSG_RESULT([new-style]) - $3 - else - AC_MSG_RESULT([feature not supported]) - BVER=`${YACC} --version | head -n 1` - AC_MSG_WARN([${BVER} does not support pure / reentrant parser generation]) - $4 - fi -fi - -cat > conftest.y < /dev/null 2>&1 && eval "$ac_compile_yacc" -then - AC_SUBST([CODES_PUSH_PARSER_DEFINES], ["%define api.push_pull \"push\""]) - AC_MSG_RESULT([old-style]) - $3 -else - -cat > conftest.y < /dev/null 2>&1 && eval "$ac_compile_yacc" - then - AC_SUBST([CODES_PUSH_PARSER_DEFINES], ["%define api.push-pull push"]) - AC_MSG_RESULT([new-style]) - $3 - else - AC_MSG_RESULT([feature not supported]) - BVER=`${YACC} --version | head -n 1` - AC_MSG_WARN([${BVER} does not support push parser generation]) - $4 - fi -fi - - AS_IF([test "$ax_cv_prog_bison" = yes],[ - : - $1 - ],[ - : - $2 - ]) - - # cleanup bison / yacc tmp files - rm -rf y.output y.tab.h y.tab.c y.tab.o -]) diff --git a/m4/ax_prog_flex.m4 b/m4/ax_prog_flex.m4 deleted file mode 100755 index 6f8c6107..00000000 --- a/m4/ax_prog_flex.m4 +++ /dev/null @@ -1,62 +0,0 @@ -# =========================================================================== -# http://www.nongnu.org/autoconf-archive/ax_prog_flex.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_PROG_FLEX(ACTION-IF-TRUE,ACTION-IF-FALSE) -# -# DESCRIPTION -# -# Check whether flex is the scanner generator. Run ACTION-IF-TRUE if -# successful, ACTION-IF-FALSE otherwise -# -# LICENSE -# -# Copyright (c) 2009 Francesco Salvestrini -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation; either version 2 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -AC_DEFUN([AX_PROG_FLEX], [ - AC_REQUIRE([AC_PROG_LEX]) - AC_REQUIRE([AC_PROG_SED]) - - AC_CACHE_CHECK([if flex is the lexer generator],[ax_cv_prog_flex],[ - AS_IF([test "`echo \"$LEX\" | $SED 's,^.*\(flex\).*$,\1,'`" = "flex"],[ - ax_cv_prog_flex=yes - ],[ - ax_cv_prog_flex=no - ]) - ]) - AS_IF([test "$ax_cv_prog_flex" = yes],[ - : - $1 - ],[ - : - $2 - ]) -]) diff --git a/m4/libtool.m4 b/m4/libtool.m4 deleted file mode 100644 index a6d21ae5..00000000 --- a/m4/libtool.m4 +++ /dev/null @@ -1,8394 +0,0 @@ -# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*- -# -# Copyright (C) 1996-2001, 2003-2015 Free Software Foundation, Inc. -# Written by Gordon Matzigkeit, 1996 -# -# This file is free software; the Free Software Foundation gives -# unlimited permission to copy and/or distribute it, with or without -# modifications, as long as this notice is preserved. - -m4_define([_LT_COPYING], [dnl -# Copyright (C) 2014 Free Software Foundation, Inc. -# This is free software; see the source for copying conditions. There is NO -# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -# GNU Libtool is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of of the License, or -# (at your option) any later version. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program or library that is built -# using GNU Libtool, you may include this file under the same -# distribution terms that you use for the rest of that program. -# -# GNU Libtool is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -]) - -# serial 58 LT_INIT - - -# LT_PREREQ(VERSION) -# ------------------ -# Complain and exit if this libtool version is less that VERSION. -m4_defun([LT_PREREQ], -[m4_if(m4_version_compare(m4_defn([LT_PACKAGE_VERSION]), [$1]), -1, - [m4_default([$3], - [m4_fatal([Libtool version $1 or higher is required], - 63)])], - [$2])]) - - -# _LT_CHECK_BUILDDIR -# ------------------ -# Complain if the absolute build directory name contains unusual characters -m4_defun([_LT_CHECK_BUILDDIR], -[case `pwd` in - *\ * | *\ *) - AC_MSG_WARN([Libtool does not cope well with whitespace in `pwd`]) ;; -esac -]) - - -# LT_INIT([OPTIONS]) -# ------------------ -AC_DEFUN([LT_INIT], -[AC_PREREQ([2.62])dnl We use AC_PATH_PROGS_FEATURE_CHECK -AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl -AC_BEFORE([$0], [LT_LANG])dnl -AC_BEFORE([$0], [LT_OUTPUT])dnl -AC_BEFORE([$0], [LTDL_INIT])dnl -m4_require([_LT_CHECK_BUILDDIR])dnl - -dnl Autoconf doesn't catch unexpanded LT_ macros by default: -m4_pattern_forbid([^_?LT_[A-Z_]+$])dnl -m4_pattern_allow([^(_LT_EOF|LT_DLGLOBAL|LT_DLLAZY_OR_NOW|LT_MULTI_MODULE)$])dnl -dnl aclocal doesn't pull ltoptions.m4, ltsugar.m4, or ltversion.m4 -dnl unless we require an AC_DEFUNed macro: -AC_REQUIRE([LTOPTIONS_VERSION])dnl -AC_REQUIRE([LTSUGAR_VERSION])dnl -AC_REQUIRE([LTVERSION_VERSION])dnl -AC_REQUIRE([LTOBSOLETE_VERSION])dnl -m4_require([_LT_PROG_LTMAIN])dnl - -_LT_SHELL_INIT([SHELL=${CONFIG_SHELL-/bin/sh}]) - -dnl Parse OPTIONS -_LT_SET_OPTIONS([$0], [$1]) - -# This can be used to rebuild libtool when needed -LIBTOOL_DEPS=$ltmain - -# Always use our own libtool. -LIBTOOL='$(SHELL) $(top_builddir)/libtool' -AC_SUBST(LIBTOOL)dnl - -_LT_SETUP - -# Only expand once: -m4_define([LT_INIT]) -])# LT_INIT - -# Old names: -AU_ALIAS([AC_PROG_LIBTOOL], [LT_INIT]) -AU_ALIAS([AM_PROG_LIBTOOL], [LT_INIT]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_PROG_LIBTOOL], []) -dnl AC_DEFUN([AM_PROG_LIBTOOL], []) - - -# _LT_PREPARE_CC_BASENAME -# ----------------------- -m4_defun([_LT_PREPARE_CC_BASENAME], [ -# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. -func_cc_basename () -{ - for cc_temp in @S|@*""; do - case $cc_temp in - compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;; - distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;; - \-*) ;; - *) break;; - esac - done - func_cc_basename_result=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` -} -])# _LT_PREPARE_CC_BASENAME - - -# _LT_CC_BASENAME(CC) -# ------------------- -# It would be clearer to call AC_REQUIREs from _LT_PREPARE_CC_BASENAME, -# but that macro is also expanded into generated libtool script, which -# arranges for $SED and $ECHO to be set by different means. -m4_defun([_LT_CC_BASENAME], -[m4_require([_LT_PREPARE_CC_BASENAME])dnl -AC_REQUIRE([_LT_DECL_SED])dnl -AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl -func_cc_basename $1 -cc_basename=$func_cc_basename_result -]) - - -# _LT_FILEUTILS_DEFAULTS -# ---------------------- -# It is okay to use these file commands and assume they have been set -# sensibly after 'm4_require([_LT_FILEUTILS_DEFAULTS])'. -m4_defun([_LT_FILEUTILS_DEFAULTS], -[: ${CP="cp -f"} -: ${MV="mv -f"} -: ${RM="rm -f"} -])# _LT_FILEUTILS_DEFAULTS - - -# _LT_SETUP -# --------- -m4_defun([_LT_SETUP], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -AC_REQUIRE([AC_CANONICAL_BUILD])dnl -AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl -AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl - -_LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl -dnl -_LT_DECL([], [host_alias], [0], [The host system])dnl -_LT_DECL([], [host], [0])dnl -_LT_DECL([], [host_os], [0])dnl -dnl -_LT_DECL([], [build_alias], [0], [The build system])dnl -_LT_DECL([], [build], [0])dnl -_LT_DECL([], [build_os], [0])dnl -dnl -AC_REQUIRE([AC_PROG_CC])dnl -AC_REQUIRE([LT_PATH_LD])dnl -AC_REQUIRE([LT_PATH_NM])dnl -dnl -AC_REQUIRE([AC_PROG_LN_S])dnl -test -z "$LN_S" && LN_S="ln -s" -_LT_DECL([], [LN_S], [1], [Whether we need soft or hard links])dnl -dnl -AC_REQUIRE([LT_CMD_MAX_LEN])dnl -_LT_DECL([objext], [ac_objext], [0], [Object file suffix (normally "o")])dnl -_LT_DECL([], [exeext], [0], [Executable file suffix (normally "")])dnl -dnl -m4_require([_LT_FILEUTILS_DEFAULTS])dnl -m4_require([_LT_CHECK_SHELL_FEATURES])dnl -m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl -m4_require([_LT_CMD_RELOAD])dnl -m4_require([_LT_CHECK_MAGIC_METHOD])dnl -m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl -m4_require([_LT_CMD_OLD_ARCHIVE])dnl -m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl -m4_require([_LT_WITH_SYSROOT])dnl -m4_require([_LT_CMD_TRUNCATE])dnl - -_LT_CONFIG_LIBTOOL_INIT([ -# See if we are running on zsh, and set the options that allow our -# commands through without removal of \ escapes INIT. -if test -n "\${ZSH_VERSION+set}"; then - setopt NO_GLOB_SUBST -fi -]) -if test -n "${ZSH_VERSION+set}"; then - setopt NO_GLOB_SUBST -fi - -_LT_CHECK_OBJDIR - -m4_require([_LT_TAG_COMPILER])dnl - -case $host_os in -aix3*) - # AIX sometimes has problems with the GCC collect2 program. For some - # reason, if we set the COLLECT_NAMES environment variable, the problems - # vanish in a puff of smoke. - if test set != "${COLLECT_NAMES+set}"; then - COLLECT_NAMES= - export COLLECT_NAMES - fi - ;; -esac - -# Global variables: -ofile=libtool -can_build_shared=yes - -# All known linkers require a '.a' archive for static linking (except MSVC, -# which needs '.lib'). -libext=a - -with_gnu_ld=$lt_cv_prog_gnu_ld - -old_CC=$CC -old_CFLAGS=$CFLAGS - -# Set sane defaults for various variables -test -z "$CC" && CC=cc -test -z "$LTCC" && LTCC=$CC -test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS -test -z "$LD" && LD=ld -test -z "$ac_objext" && ac_objext=o - -_LT_CC_BASENAME([$compiler]) - -# Only perform the check for file, if the check method requires it -test -z "$MAGIC_CMD" && MAGIC_CMD=file -case $deplibs_check_method in -file_magic*) - if test "$file_magic_cmd" = '$MAGIC_CMD'; then - _LT_PATH_MAGIC - fi - ;; -esac - -# Use C for the default configuration in the libtool script -LT_SUPPORTED_TAG([CC]) -_LT_LANG_C_CONFIG -_LT_LANG_DEFAULT_CONFIG -_LT_CONFIG_COMMANDS -])# _LT_SETUP - - -# _LT_PREPARE_SED_QUOTE_VARS -# -------------------------- -# Define a few sed substitution that help us do robust quoting. -m4_defun([_LT_PREPARE_SED_QUOTE_VARS], -[# Backslashify metacharacters that are still active within -# double-quoted strings. -sed_quote_subst='s/\([["`$\\]]\)/\\\1/g' - -# Same as above, but do not quote variable references. -double_quote_subst='s/\([["`\\]]\)/\\\1/g' - -# Sed substitution to delay expansion of an escaped shell variable in a -# double_quote_subst'ed string. -delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' - -# Sed substitution to delay expansion of an escaped single quote. -delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' - -# Sed substitution to avoid accidental globbing in evaled expressions -no_glob_subst='s/\*/\\\*/g' -]) - -# _LT_PROG_LTMAIN -# --------------- -# Note that this code is called both from 'configure', and 'config.status' -# now that we use AC_CONFIG_COMMANDS to generate libtool. Notably, -# 'config.status' has no value for ac_aux_dir unless we are using Automake, -# so we pass a copy along to make sure it has a sensible value anyway. -m4_defun([_LT_PROG_LTMAIN], -[m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([ltmain.sh])])dnl -_LT_CONFIG_LIBTOOL_INIT([ac_aux_dir='$ac_aux_dir']) -ltmain=$ac_aux_dir/ltmain.sh -])# _LT_PROG_LTMAIN - - -## ------------------------------------- ## -## Accumulate code for creating libtool. ## -## ------------------------------------- ## - -# So that we can recreate a full libtool script including additional -# tags, we accumulate the chunks of code to send to AC_CONFIG_COMMANDS -# in macros and then make a single call at the end using the 'libtool' -# label. - - -# _LT_CONFIG_LIBTOOL_INIT([INIT-COMMANDS]) -# ---------------------------------------- -# Register INIT-COMMANDS to be passed to AC_CONFIG_COMMANDS later. -m4_define([_LT_CONFIG_LIBTOOL_INIT], -[m4_ifval([$1], - [m4_append([_LT_OUTPUT_LIBTOOL_INIT], - [$1 -])])]) - -# Initialize. -m4_define([_LT_OUTPUT_LIBTOOL_INIT]) - - -# _LT_CONFIG_LIBTOOL([COMMANDS]) -# ------------------------------ -# Register COMMANDS to be passed to AC_CONFIG_COMMANDS later. -m4_define([_LT_CONFIG_LIBTOOL], -[m4_ifval([$1], - [m4_append([_LT_OUTPUT_LIBTOOL_COMMANDS], - [$1 -])])]) - -# Initialize. -m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS]) - - -# _LT_CONFIG_SAVE_COMMANDS([COMMANDS], [INIT_COMMANDS]) -# ----------------------------------------------------- -m4_defun([_LT_CONFIG_SAVE_COMMANDS], -[_LT_CONFIG_LIBTOOL([$1]) -_LT_CONFIG_LIBTOOL_INIT([$2]) -]) - - -# _LT_FORMAT_COMMENT([COMMENT]) -# ----------------------------- -# Add leading comment marks to the start of each line, and a trailing -# full-stop to the whole comment if one is not present already. -m4_define([_LT_FORMAT_COMMENT], -[m4_ifval([$1], [ -m4_bpatsubst([m4_bpatsubst([$1], [^ *], [# ])], - [['`$\]], [\\\&])]m4_bmatch([$1], [[!?.]$], [], [.]) -)]) - - - -## ------------------------ ## -## FIXME: Eliminate VARNAME ## -## ------------------------ ## - - -# _LT_DECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION], [IS-TAGGED?]) -# ------------------------------------------------------------------- -# CONFIGNAME is the name given to the value in the libtool script. -# VARNAME is the (base) name used in the configure script. -# VALUE may be 0, 1 or 2 for a computed quote escaped value based on -# VARNAME. Any other value will be used directly. -m4_define([_LT_DECL], -[lt_if_append_uniq([lt_decl_varnames], [$2], [, ], - [lt_dict_add_subkey([lt_decl_dict], [$2], [libtool_name], - [m4_ifval([$1], [$1], [$2])]) - lt_dict_add_subkey([lt_decl_dict], [$2], [value], [$3]) - m4_ifval([$4], - [lt_dict_add_subkey([lt_decl_dict], [$2], [description], [$4])]) - lt_dict_add_subkey([lt_decl_dict], [$2], - [tagged?], [m4_ifval([$5], [yes], [no])])]) -]) - - -# _LT_TAGDECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION]) -# -------------------------------------------------------- -m4_define([_LT_TAGDECL], [_LT_DECL([$1], [$2], [$3], [$4], [yes])]) - - -# lt_decl_tag_varnames([SEPARATOR], [VARNAME1...]) -# ------------------------------------------------ -m4_define([lt_decl_tag_varnames], -[_lt_decl_filter([tagged?], [yes], $@)]) - - -# _lt_decl_filter(SUBKEY, VALUE, [SEPARATOR], [VARNAME1..]) -# --------------------------------------------------------- -m4_define([_lt_decl_filter], -[m4_case([$#], - [0], [m4_fatal([$0: too few arguments: $#])], - [1], [m4_fatal([$0: too few arguments: $#: $1])], - [2], [lt_dict_filter([lt_decl_dict], [$1], [$2], [], lt_decl_varnames)], - [3], [lt_dict_filter([lt_decl_dict], [$1], [$2], [$3], lt_decl_varnames)], - [lt_dict_filter([lt_decl_dict], $@)])[]dnl -]) - - -# lt_decl_quote_varnames([SEPARATOR], [VARNAME1...]) -# -------------------------------------------------- -m4_define([lt_decl_quote_varnames], -[_lt_decl_filter([value], [1], $@)]) - - -# lt_decl_dquote_varnames([SEPARATOR], [VARNAME1...]) -# --------------------------------------------------- -m4_define([lt_decl_dquote_varnames], -[_lt_decl_filter([value], [2], $@)]) - - -# lt_decl_varnames_tagged([SEPARATOR], [VARNAME1...]) -# --------------------------------------------------- -m4_define([lt_decl_varnames_tagged], -[m4_assert([$# <= 2])dnl -_$0(m4_quote(m4_default([$1], [[, ]])), - m4_ifval([$2], [[$2]], [m4_dquote(lt_decl_tag_varnames)]), - m4_split(m4_normalize(m4_quote(_LT_TAGS)), [ ]))]) -m4_define([_lt_decl_varnames_tagged], -[m4_ifval([$3], [lt_combine([$1], [$2], [_], $3)])]) - - -# lt_decl_all_varnames([SEPARATOR], [VARNAME1...]) -# ------------------------------------------------ -m4_define([lt_decl_all_varnames], -[_$0(m4_quote(m4_default([$1], [[, ]])), - m4_if([$2], [], - m4_quote(lt_decl_varnames), - m4_quote(m4_shift($@))))[]dnl -]) -m4_define([_lt_decl_all_varnames], -[lt_join($@, lt_decl_varnames_tagged([$1], - lt_decl_tag_varnames([[, ]], m4_shift($@))))dnl -]) - - -# _LT_CONFIG_STATUS_DECLARE([VARNAME]) -# ------------------------------------ -# Quote a variable value, and forward it to 'config.status' so that its -# declaration there will have the same value as in 'configure'. VARNAME -# must have a single quote delimited value for this to work. -m4_define([_LT_CONFIG_STATUS_DECLARE], -[$1='`$ECHO "$][$1" | $SED "$delay_single_quote_subst"`']) - - -# _LT_CONFIG_STATUS_DECLARATIONS -# ------------------------------ -# We delimit libtool config variables with single quotes, so when -# we write them to config.status, we have to be sure to quote all -# embedded single quotes properly. In configure, this macro expands -# each variable declared with _LT_DECL (and _LT_TAGDECL) into: -# -# ='`$ECHO "$" | $SED "$delay_single_quote_subst"`' -m4_defun([_LT_CONFIG_STATUS_DECLARATIONS], -[m4_foreach([_lt_var], m4_quote(lt_decl_all_varnames), - [m4_n([_LT_CONFIG_STATUS_DECLARE(_lt_var)])])]) - - -# _LT_LIBTOOL_TAGS -# ---------------- -# Output comment and list of tags supported by the script -m4_defun([_LT_LIBTOOL_TAGS], -[_LT_FORMAT_COMMENT([The names of the tagged configurations supported by this script])dnl -available_tags='_LT_TAGS'dnl -]) - - -# _LT_LIBTOOL_DECLARE(VARNAME, [TAG]) -# ----------------------------------- -# Extract the dictionary values for VARNAME (optionally with TAG) and -# expand to a commented shell variable setting: -# -# # Some comment about what VAR is for. -# visible_name=$lt_internal_name -m4_define([_LT_LIBTOOL_DECLARE], -[_LT_FORMAT_COMMENT(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], - [description])))[]dnl -m4_pushdef([_libtool_name], - m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [libtool_name])))[]dnl -m4_case(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [value])), - [0], [_libtool_name=[$]$1], - [1], [_libtool_name=$lt_[]$1], - [2], [_libtool_name=$lt_[]$1], - [_libtool_name=lt_dict_fetch([lt_decl_dict], [$1], [value])])[]dnl -m4_ifval([$2], [_$2])[]m4_popdef([_libtool_name])[]dnl -]) - - -# _LT_LIBTOOL_CONFIG_VARS -# ----------------------- -# Produce commented declarations of non-tagged libtool config variables -# suitable for insertion in the LIBTOOL CONFIG section of the 'libtool' -# script. Tagged libtool config variables (even for the LIBTOOL CONFIG -# section) are produced by _LT_LIBTOOL_TAG_VARS. -m4_defun([_LT_LIBTOOL_CONFIG_VARS], -[m4_foreach([_lt_var], - m4_quote(_lt_decl_filter([tagged?], [no], [], lt_decl_varnames)), - [m4_n([_LT_LIBTOOL_DECLARE(_lt_var)])])]) - - -# _LT_LIBTOOL_TAG_VARS(TAG) -# ------------------------- -m4_define([_LT_LIBTOOL_TAG_VARS], -[m4_foreach([_lt_var], m4_quote(lt_decl_tag_varnames), - [m4_n([_LT_LIBTOOL_DECLARE(_lt_var, [$1])])])]) - - -# _LT_TAGVAR(VARNAME, [TAGNAME]) -# ------------------------------ -m4_define([_LT_TAGVAR], [m4_ifval([$2], [$1_$2], [$1])]) - - -# _LT_CONFIG_COMMANDS -# ------------------- -# Send accumulated output to $CONFIG_STATUS. Thanks to the lists of -# variables for single and double quote escaping we saved from calls -# to _LT_DECL, we can put quote escaped variables declarations -# into 'config.status', and then the shell code to quote escape them in -# for loops in 'config.status'. Finally, any additional code accumulated -# from calls to _LT_CONFIG_LIBTOOL_INIT is expanded. -m4_defun([_LT_CONFIG_COMMANDS], -[AC_PROVIDE_IFELSE([LT_OUTPUT], - dnl If the libtool generation code has been placed in $CONFIG_LT, - dnl instead of duplicating it all over again into config.status, - dnl then we will have config.status run $CONFIG_LT later, so it - dnl needs to know what name is stored there: - [AC_CONFIG_COMMANDS([libtool], - [$SHELL $CONFIG_LT || AS_EXIT(1)], [CONFIG_LT='$CONFIG_LT'])], - dnl If the libtool generation code is destined for config.status, - dnl expand the accumulated commands and init code now: - [AC_CONFIG_COMMANDS([libtool], - [_LT_OUTPUT_LIBTOOL_COMMANDS], [_LT_OUTPUT_LIBTOOL_COMMANDS_INIT])]) -])#_LT_CONFIG_COMMANDS - - -# Initialize. -m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS_INIT], -[ - -# The HP-UX ksh and POSIX shell print the target directory to stdout -# if CDPATH is set. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH - -sed_quote_subst='$sed_quote_subst' -double_quote_subst='$double_quote_subst' -delay_variable_subst='$delay_variable_subst' -_LT_CONFIG_STATUS_DECLARATIONS -LTCC='$LTCC' -LTCFLAGS='$LTCFLAGS' -compiler='$compiler_DEFAULT' - -# A function that is used when there is no print builtin or printf. -func_fallback_echo () -{ - eval 'cat <<_LTECHO_EOF -\$[]1 -_LTECHO_EOF' -} - -# Quote evaled strings. -for var in lt_decl_all_varnames([[ \ -]], lt_decl_quote_varnames); do - case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in - *[[\\\\\\\`\\"\\\$]]*) - eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes - ;; - *) - eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" - ;; - esac -done - -# Double-quote double-evaled strings. -for var in lt_decl_all_varnames([[ \ -]], lt_decl_dquote_varnames); do - case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in - *[[\\\\\\\`\\"\\\$]]*) - eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes - ;; - *) - eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" - ;; - esac -done - -_LT_OUTPUT_LIBTOOL_INIT -]) - -# _LT_GENERATED_FILE_INIT(FILE, [COMMENT]) -# ------------------------------------ -# Generate a child script FILE with all initialization necessary to -# reuse the environment learned by the parent script, and make the -# file executable. If COMMENT is supplied, it is inserted after the -# '#!' sequence but before initialization text begins. After this -# macro, additional text can be appended to FILE to form the body of -# the child script. The macro ends with non-zero status if the -# file could not be fully written (such as if the disk is full). -m4_ifdef([AS_INIT_GENERATED], -[m4_defun([_LT_GENERATED_FILE_INIT],[AS_INIT_GENERATED($@)])], -[m4_defun([_LT_GENERATED_FILE_INIT], -[m4_require([AS_PREPARE])]dnl -[m4_pushdef([AS_MESSAGE_LOG_FD])]dnl -[lt_write_fail=0 -cat >$1 <<_ASEOF || lt_write_fail=1 -#! $SHELL -# Generated by $as_me. -$2 -SHELL=\${CONFIG_SHELL-$SHELL} -export SHELL -_ASEOF -cat >>$1 <<\_ASEOF || lt_write_fail=1 -AS_SHELL_SANITIZE -_AS_PREPARE -exec AS_MESSAGE_FD>&1 -_ASEOF -test 0 = "$lt_write_fail" && chmod +x $1[]dnl -m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT - -# LT_OUTPUT -# --------- -# This macro allows early generation of the libtool script (before -# AC_OUTPUT is called), incase it is used in configure for compilation -# tests. -AC_DEFUN([LT_OUTPUT], -[: ${CONFIG_LT=./config.lt} -AC_MSG_NOTICE([creating $CONFIG_LT]) -_LT_GENERATED_FILE_INIT(["$CONFIG_LT"], -[# Run this file to recreate a libtool stub with the current configuration.]) - -cat >>"$CONFIG_LT" <<\_LTEOF -lt_cl_silent=false -exec AS_MESSAGE_LOG_FD>>config.log -{ - echo - AS_BOX([Running $as_me.]) -} >&AS_MESSAGE_LOG_FD - -lt_cl_help="\ -'$as_me' creates a local libtool stub from the current configuration, -for use in further configure time tests before the real libtool is -generated. - -Usage: $[0] [[OPTIONS]] - - -h, --help print this help, then exit - -V, --version print version number, then exit - -q, --quiet do not print progress messages - -d, --debug don't remove temporary files - -Report bugs to ." - -lt_cl_version="\ -m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl -m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION]) -configured by $[0], generated by m4_PACKAGE_STRING. - -Copyright (C) 2011 Free Software Foundation, Inc. -This config.lt script is free software; the Free Software Foundation -gives unlimited permision to copy, distribute and modify it." - -while test 0 != $[#] -do - case $[1] in - --version | --v* | -V ) - echo "$lt_cl_version"; exit 0 ;; - --help | --h* | -h ) - echo "$lt_cl_help"; exit 0 ;; - --debug | --d* | -d ) - debug=: ;; - --quiet | --q* | --silent | --s* | -q ) - lt_cl_silent=: ;; - - -*) AC_MSG_ERROR([unrecognized option: $[1] -Try '$[0] --help' for more information.]) ;; - - *) AC_MSG_ERROR([unrecognized argument: $[1] -Try '$[0] --help' for more information.]) ;; - esac - shift -done - -if $lt_cl_silent; then - exec AS_MESSAGE_FD>/dev/null -fi -_LTEOF - -cat >>"$CONFIG_LT" <<_LTEOF -_LT_OUTPUT_LIBTOOL_COMMANDS_INIT -_LTEOF - -cat >>"$CONFIG_LT" <<\_LTEOF -AC_MSG_NOTICE([creating $ofile]) -_LT_OUTPUT_LIBTOOL_COMMANDS -AS_EXIT(0) -_LTEOF -chmod +x "$CONFIG_LT" - -# configure is writing to config.log, but config.lt does its own redirection, -# appending to config.log, which fails on DOS, as config.log is still kept -# open by configure. Here we exec the FD to /dev/null, effectively closing -# config.log, so it can be properly (re)opened and appended to by config.lt. -lt_cl_success=: -test yes = "$silent" && - lt_config_lt_args="$lt_config_lt_args --quiet" -exec AS_MESSAGE_LOG_FD>/dev/null -$SHELL "$CONFIG_LT" $lt_config_lt_args || lt_cl_success=false -exec AS_MESSAGE_LOG_FD>>config.log -$lt_cl_success || AS_EXIT(1) -])# LT_OUTPUT - - -# _LT_CONFIG(TAG) -# --------------- -# If TAG is the built-in tag, create an initial libtool script with a -# default configuration from the untagged config vars. Otherwise add code -# to config.status for appending the configuration named by TAG from the -# matching tagged config vars. -m4_defun([_LT_CONFIG], -[m4_require([_LT_FILEUTILS_DEFAULTS])dnl -_LT_CONFIG_SAVE_COMMANDS([ - m4_define([_LT_TAG], m4_if([$1], [], [C], [$1]))dnl - m4_if(_LT_TAG, [C], [ - # See if we are running on zsh, and set the options that allow our - # commands through without removal of \ escapes. - if test -n "${ZSH_VERSION+set}"; then - setopt NO_GLOB_SUBST - fi - - cfgfile=${ofile}T - trap "$RM \"$cfgfile\"; exit 1" 1 2 15 - $RM "$cfgfile" - - cat <<_LT_EOF >> "$cfgfile" -#! $SHELL -# Generated automatically by $as_me ($PACKAGE) $VERSION -# NOTE: Changes made to this file will be lost: look at ltmain.sh. - -# Provide generalized library-building support services. -# Written by Gordon Matzigkeit, 1996 - -_LT_COPYING -_LT_LIBTOOL_TAGS - -# Configured defaults for sys_lib_dlsearch_path munging. -: \${LT_SYS_LIBRARY_PATH="$configure_time_lt_sys_library_path"} - -# ### BEGIN LIBTOOL CONFIG -_LT_LIBTOOL_CONFIG_VARS -_LT_LIBTOOL_TAG_VARS -# ### END LIBTOOL CONFIG - -_LT_EOF - - cat <<'_LT_EOF' >> "$cfgfile" - -# ### BEGIN FUNCTIONS SHARED WITH CONFIGURE - -_LT_PREPARE_MUNGE_PATH_LIST -_LT_PREPARE_CC_BASENAME - -# ### END FUNCTIONS SHARED WITH CONFIGURE - -_LT_EOF - - case $host_os in - aix3*) - cat <<\_LT_EOF >> "$cfgfile" -# AIX sometimes has problems with the GCC collect2 program. For some -# reason, if we set the COLLECT_NAMES environment variable, the problems -# vanish in a puff of smoke. -if test set != "${COLLECT_NAMES+set}"; then - COLLECT_NAMES= - export COLLECT_NAMES -fi -_LT_EOF - ;; - esac - - _LT_PROG_LTMAIN - - # We use sed instead of cat because bash on DJGPP gets confused if - # if finds mixed CR/LF and LF-only lines. Since sed operates in - # text mode, it properly converts lines to CR/LF. This bash problem - # is reportedly fixed, but why not run on old versions too? - sed '$q' "$ltmain" >> "$cfgfile" \ - || (rm -f "$cfgfile"; exit 1) - - mv -f "$cfgfile" "$ofile" || - (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") - chmod +x "$ofile" -], -[cat <<_LT_EOF >> "$ofile" - -dnl Unfortunately we have to use $1 here, since _LT_TAG is not expanded -dnl in a comment (ie after a #). -# ### BEGIN LIBTOOL TAG CONFIG: $1 -_LT_LIBTOOL_TAG_VARS(_LT_TAG) -# ### END LIBTOOL TAG CONFIG: $1 -_LT_EOF -])dnl /m4_if -], -[m4_if([$1], [], [ - PACKAGE='$PACKAGE' - VERSION='$VERSION' - RM='$RM' - ofile='$ofile'], []) -])dnl /_LT_CONFIG_SAVE_COMMANDS -])# _LT_CONFIG - - -# LT_SUPPORTED_TAG(TAG) -# --------------------- -# Trace this macro to discover what tags are supported by the libtool -# --tag option, using: -# autoconf --trace 'LT_SUPPORTED_TAG:$1' -AC_DEFUN([LT_SUPPORTED_TAG], []) - - -# C support is built-in for now -m4_define([_LT_LANG_C_enabled], []) -m4_define([_LT_TAGS], []) - - -# LT_LANG(LANG) -# ------------- -# Enable libtool support for the given language if not already enabled. -AC_DEFUN([LT_LANG], -[AC_BEFORE([$0], [LT_OUTPUT])dnl -m4_case([$1], - [C], [_LT_LANG(C)], - [C++], [_LT_LANG(CXX)], - [Go], [_LT_LANG(GO)], - [Java], [_LT_LANG(GCJ)], - [Fortran 77], [_LT_LANG(F77)], - [Fortran], [_LT_LANG(FC)], - [Windows Resource], [_LT_LANG(RC)], - [m4_ifdef([_LT_LANG_]$1[_CONFIG], - [_LT_LANG($1)], - [m4_fatal([$0: unsupported language: "$1"])])])dnl -])# LT_LANG - - -# _LT_LANG(LANGNAME) -# ------------------ -m4_defun([_LT_LANG], -[m4_ifdef([_LT_LANG_]$1[_enabled], [], - [LT_SUPPORTED_TAG([$1])dnl - m4_append([_LT_TAGS], [$1 ])dnl - m4_define([_LT_LANG_]$1[_enabled], [])dnl - _LT_LANG_$1_CONFIG($1)])dnl -])# _LT_LANG - - -m4_ifndef([AC_PROG_GO], [ -############################################################ -# NOTE: This macro has been submitted for inclusion into # -# GNU Autoconf as AC_PROG_GO. When it is available in # -# a released version of Autoconf we should remove this # -# macro and use it instead. # -############################################################ -m4_defun([AC_PROG_GO], -[AC_LANG_PUSH(Go)dnl -AC_ARG_VAR([GOC], [Go compiler command])dnl -AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl -_AC_ARG_VAR_LDFLAGS()dnl -AC_CHECK_TOOL(GOC, gccgo) -if test -z "$GOC"; then - if test -n "$ac_tool_prefix"; then - AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo]) - fi -fi -if test -z "$GOC"; then - AC_CHECK_PROG(GOC, gccgo, gccgo, false) -fi -])#m4_defun -])#m4_ifndef - - -# _LT_LANG_DEFAULT_CONFIG -# ----------------------- -m4_defun([_LT_LANG_DEFAULT_CONFIG], -[AC_PROVIDE_IFELSE([AC_PROG_CXX], - [LT_LANG(CXX)], - [m4_define([AC_PROG_CXX], defn([AC_PROG_CXX])[LT_LANG(CXX)])]) - -AC_PROVIDE_IFELSE([AC_PROG_F77], - [LT_LANG(F77)], - [m4_define([AC_PROG_F77], defn([AC_PROG_F77])[LT_LANG(F77)])]) - -AC_PROVIDE_IFELSE([AC_PROG_FC], - [LT_LANG(FC)], - [m4_define([AC_PROG_FC], defn([AC_PROG_FC])[LT_LANG(FC)])]) - -dnl The call to [A][M_PROG_GCJ] is quoted like that to stop aclocal -dnl pulling things in needlessly. -AC_PROVIDE_IFELSE([AC_PROG_GCJ], - [LT_LANG(GCJ)], - [AC_PROVIDE_IFELSE([A][M_PROG_GCJ], - [LT_LANG(GCJ)], - [AC_PROVIDE_IFELSE([LT_PROG_GCJ], - [LT_LANG(GCJ)], - [m4_ifdef([AC_PROG_GCJ], - [m4_define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[LT_LANG(GCJ)])]) - m4_ifdef([A][M_PROG_GCJ], - [m4_define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[LT_LANG(GCJ)])]) - m4_ifdef([LT_PROG_GCJ], - [m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])]) - -AC_PROVIDE_IFELSE([AC_PROG_GO], - [LT_LANG(GO)], - [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])]) - -AC_PROVIDE_IFELSE([LT_PROG_RC], - [LT_LANG(RC)], - [m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])]) -])# _LT_LANG_DEFAULT_CONFIG - -# Obsolete macros: -AU_DEFUN([AC_LIBTOOL_CXX], [LT_LANG(C++)]) -AU_DEFUN([AC_LIBTOOL_F77], [LT_LANG(Fortran 77)]) -AU_DEFUN([AC_LIBTOOL_FC], [LT_LANG(Fortran)]) -AU_DEFUN([AC_LIBTOOL_GCJ], [LT_LANG(Java)]) -AU_DEFUN([AC_LIBTOOL_RC], [LT_LANG(Windows Resource)]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_CXX], []) -dnl AC_DEFUN([AC_LIBTOOL_F77], []) -dnl AC_DEFUN([AC_LIBTOOL_FC], []) -dnl AC_DEFUN([AC_LIBTOOL_GCJ], []) -dnl AC_DEFUN([AC_LIBTOOL_RC], []) - - -# _LT_TAG_COMPILER -# ---------------- -m4_defun([_LT_TAG_COMPILER], -[AC_REQUIRE([AC_PROG_CC])dnl - -_LT_DECL([LTCC], [CC], [1], [A C compiler])dnl -_LT_DECL([LTCFLAGS], [CFLAGS], [1], [LTCC compiler flags])dnl -_LT_TAGDECL([CC], [compiler], [1], [A language specific compiler])dnl -_LT_TAGDECL([with_gcc], [GCC], [0], [Is the compiler the GNU compiler?])dnl - -# If no C compiler was specified, use CC. -LTCC=${LTCC-"$CC"} - -# If no C compiler flags were specified, use CFLAGS. -LTCFLAGS=${LTCFLAGS-"$CFLAGS"} - -# Allow CC to be a program name with arguments. -compiler=$CC -])# _LT_TAG_COMPILER - - -# _LT_COMPILER_BOILERPLATE -# ------------------------ -# Check for compiler boilerplate output or warnings with -# the simple compiler test code. -m4_defun([_LT_COMPILER_BOILERPLATE], -[m4_require([_LT_DECL_SED])dnl -ac_outfile=conftest.$ac_objext -echo "$lt_simple_compile_test_code" >conftest.$ac_ext -eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err -_lt_compiler_boilerplate=`cat conftest.err` -$RM conftest* -])# _LT_COMPILER_BOILERPLATE - - -# _LT_LINKER_BOILERPLATE -# ---------------------- -# Check for linker boilerplate output or warnings with -# the simple link test code. -m4_defun([_LT_LINKER_BOILERPLATE], -[m4_require([_LT_DECL_SED])dnl -ac_outfile=conftest.$ac_objext -echo "$lt_simple_link_test_code" >conftest.$ac_ext -eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err -_lt_linker_boilerplate=`cat conftest.err` -$RM -r conftest* -])# _LT_LINKER_BOILERPLATE - -# _LT_REQUIRED_DARWIN_CHECKS -# ------------------------- -m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[ - case $host_os in - rhapsody* | darwin*) - AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:]) - AC_CHECK_TOOL([NMEDIT], [nmedit], [:]) - AC_CHECK_TOOL([LIPO], [lipo], [:]) - AC_CHECK_TOOL([OTOOL], [otool], [:]) - AC_CHECK_TOOL([OTOOL64], [otool64], [:]) - _LT_DECL([], [DSYMUTIL], [1], - [Tool to manipulate archived DWARF debug symbol files on Mac OS X]) - _LT_DECL([], [NMEDIT], [1], - [Tool to change global to local symbols on Mac OS X]) - _LT_DECL([], [LIPO], [1], - [Tool to manipulate fat objects and archives on Mac OS X]) - _LT_DECL([], [OTOOL], [1], - [ldd/readelf like tool for Mach-O binaries on Mac OS X]) - _LT_DECL([], [OTOOL64], [1], - [ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4]) - - AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod], - [lt_cv_apple_cc_single_mod=no - if test -z "$LT_MULTI_MODULE"; then - # By default we will add the -single_module flag. You can override - # by either setting the environment variable LT_MULTI_MODULE - # non-empty at configure time, or by adding -multi_module to the - # link flags. - rm -rf libconftest.dylib* - echo "int foo(void){return 1;}" > conftest.c - echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ --dynamiclib -Wl,-single_module conftest.c" >&AS_MESSAGE_LOG_FD - $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ - -dynamiclib -Wl,-single_module conftest.c 2>conftest.err - _lt_result=$? - # If there is a non-empty error log, and "single_module" - # appears in it, assume the flag caused a linker warning - if test -s conftest.err && $GREP single_module conftest.err; then - cat conftest.err >&AS_MESSAGE_LOG_FD - # Otherwise, if the output was created with a 0 exit code from - # the compiler, it worked. - elif test -f libconftest.dylib && test 0 = "$_lt_result"; then - lt_cv_apple_cc_single_mod=yes - else - cat conftest.err >&AS_MESSAGE_LOG_FD - fi - rm -rf libconftest.dylib* - rm -f conftest.* - fi]) - - AC_CACHE_CHECK([for -exported_symbols_list linker flag], - [lt_cv_ld_exported_symbols_list], - [lt_cv_ld_exported_symbols_list=no - save_LDFLAGS=$LDFLAGS - echo "_main" > conftest.sym - LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" - AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], - [lt_cv_ld_exported_symbols_list=yes], - [lt_cv_ld_exported_symbols_list=no]) - LDFLAGS=$save_LDFLAGS - ]) - - AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load], - [lt_cv_ld_force_load=no - cat > conftest.c << _LT_EOF -int forced_loaded() { return 2;} -_LT_EOF - echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD - $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD - echo "$AR cr libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD - $AR cr libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD - echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD - $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD - cat > conftest.c << _LT_EOF -int main() { return 0;} -_LT_EOF - echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD - $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err - _lt_result=$? - if test -s conftest.err && $GREP force_load conftest.err; then - cat conftest.err >&AS_MESSAGE_LOG_FD - elif test -f conftest && test 0 = "$_lt_result" && $GREP forced_load conftest >/dev/null 2>&1; then - lt_cv_ld_force_load=yes - else - cat conftest.err >&AS_MESSAGE_LOG_FD - fi - rm -f conftest.err libconftest.a conftest conftest.c - rm -rf conftest.dSYM - ]) - case $host_os in - rhapsody* | darwin1.[[012]]) - _lt_dar_allow_undefined='$wl-undefined ${wl}suppress' ;; - darwin1.*) - _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; - darwin*) # darwin 5.x on - # if running on 10.5 or later, the deployment target defaults - # to the OS version, if on x86, and 10.4, the deployment - # target defaults to 10.4. Don't you love it? - case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in - 10.0,*86*-darwin8*|10.0,*-darwin[[91]]*) - _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; - 10.[[012]][[,.]]*) - _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; - 10.*) - _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; - esac - ;; - esac - if test yes = "$lt_cv_apple_cc_single_mod"; then - _lt_dar_single_mod='$single_module' - fi - if test yes = "$lt_cv_ld_exported_symbols_list"; then - _lt_dar_export_syms=' $wl-exported_symbols_list,$output_objdir/$libname-symbols.expsym' - else - _lt_dar_export_syms='~$NMEDIT -s $output_objdir/$libname-symbols.expsym $lib' - fi - if test : != "$DSYMUTIL" && test no = "$lt_cv_ld_force_load"; then - _lt_dsymutil='~$DSYMUTIL $lib || :' - else - _lt_dsymutil= - fi - ;; - esac -]) - - -# _LT_DARWIN_LINKER_FEATURES([TAG]) -# --------------------------------- -# Checks for linker and compiler features on darwin -m4_defun([_LT_DARWIN_LINKER_FEATURES], -[ - m4_require([_LT_REQUIRED_DARWIN_CHECKS]) - _LT_TAGVAR(archive_cmds_need_lc, $1)=no - _LT_TAGVAR(hardcode_direct, $1)=no - _LT_TAGVAR(hardcode_automatic, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported - if test yes = "$lt_cv_ld_force_load"; then - _LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' - m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes], - [FC], [_LT_TAGVAR(compiler_needs_object, $1)=yes]) - else - _LT_TAGVAR(whole_archive_flag_spec, $1)='' - fi - _LT_TAGVAR(link_all_deplibs, $1)=yes - _LT_TAGVAR(allow_undefined_flag, $1)=$_lt_dar_allow_undefined - case $cc_basename in - ifort*|nagfor*) _lt_dar_can_shared=yes ;; - *) _lt_dar_can_shared=$GCC ;; - esac - if test yes = "$_lt_dar_can_shared"; then - output_verbose_link_cmd=func_echo_all - _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" - _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" - _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" - _LT_TAGVAR(module_expsym_cmds, $1)="sed -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" - m4_if([$1], [CXX], -[ if test yes != "$lt_cv_apple_cc_single_mod"; then - _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dsymutil" - _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dar_export_syms$_lt_dsymutil" - fi -],[]) - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi -]) - -# _LT_SYS_MODULE_PATH_AIX([TAGNAME]) -# ---------------------------------- -# Links a minimal program and checks the executable -# for the system default hardcoded library path. In most cases, -# this is /usr/lib:/lib, but when the MPI compilers are used -# the location of the communication and MPI libs are included too. -# If we don't find anything, use the default library path according -# to the aix ld manual. -# Store the results from the different compilers for each TAGNAME. -# Allow to override them for all tags through lt_cv_aix_libpath. -m4_defun([_LT_SYS_MODULE_PATH_AIX], -[m4_require([_LT_DECL_SED])dnl -if test set = "${lt_cv_aix_libpath+set}"; then - aix_libpath=$lt_cv_aix_libpath -else - AC_CACHE_VAL([_LT_TAGVAR([lt_cv_aix_libpath_], [$1])], - [AC_LINK_IFELSE([AC_LANG_PROGRAM],[ - lt_aix_libpath_sed='[ - /Import File Strings/,/^$/ { - /^0/ { - s/^0 *\([^ ]*\) *$/\1/ - p - } - }]' - _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` - # Check for a 64-bit object if we didn't find anything. - if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then - _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` - fi],[]) - if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then - _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=/usr/lib:/lib - fi - ]) - aix_libpath=$_LT_TAGVAR([lt_cv_aix_libpath_], [$1]) -fi -])# _LT_SYS_MODULE_PATH_AIX - - -# _LT_SHELL_INIT(ARG) -# ------------------- -m4_define([_LT_SHELL_INIT], -[m4_divert_text([M4SH-INIT], [$1 -])])# _LT_SHELL_INIT - - - -# _LT_PROG_ECHO_BACKSLASH -# ----------------------- -# Find how we can fake an echo command that does not interpret backslash. -# In particular, with Autoconf 2.60 or later we add some code to the start -# of the generated configure script that will find a shell with a builtin -# printf (that we can use as an echo command). -m4_defun([_LT_PROG_ECHO_BACKSLASH], -[ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' -ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO -ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO - -AC_MSG_CHECKING([how to print strings]) -# Test print first, because it will be a builtin if present. -if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ - test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then - ECHO='print -r --' -elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then - ECHO='printf %s\n' -else - # Use this function as a fallback that always works. - func_fallback_echo () - { - eval 'cat <<_LTECHO_EOF -$[]1 -_LTECHO_EOF' - } - ECHO='func_fallback_echo' -fi - -# func_echo_all arg... -# Invoke $ECHO with all args, space-separated. -func_echo_all () -{ - $ECHO "$*" -} - -case $ECHO in - printf*) AC_MSG_RESULT([printf]) ;; - print*) AC_MSG_RESULT([print -r]) ;; - *) AC_MSG_RESULT([cat]) ;; -esac - -m4_ifdef([_AS_DETECT_SUGGESTED], -[_AS_DETECT_SUGGESTED([ - test -n "${ZSH_VERSION+set}${BASH_VERSION+set}" || ( - ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' - ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO - ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO - PATH=/empty FPATH=/empty; export PATH FPATH - test "X`printf %s $ECHO`" = "X$ECHO" \ - || test "X`print -r -- $ECHO`" = "X$ECHO" )])]) - -_LT_DECL([], [SHELL], [1], [Shell to use when invoking shell scripts]) -_LT_DECL([], [ECHO], [1], [An echo program that protects backslashes]) -])# _LT_PROG_ECHO_BACKSLASH - - -# _LT_WITH_SYSROOT -# ---------------- -AC_DEFUN([_LT_WITH_SYSROOT], -[AC_MSG_CHECKING([for sysroot]) -AC_ARG_WITH([sysroot], -[AS_HELP_STRING([--with-sysroot@<:@=DIR@:>@], - [Search for dependent libraries within DIR (or the compiler's sysroot - if not specified).])], -[], [with_sysroot=no]) - -dnl lt_sysroot will always be passed unquoted. We quote it here -dnl in case the user passed a directory name. -lt_sysroot= -case $with_sysroot in #( - yes) - if test yes = "$GCC"; then - lt_sysroot=`$CC --print-sysroot 2>/dev/null` - fi - ;; #( - /*) - lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` - ;; #( - no|'') - ;; #( - *) - AC_MSG_RESULT([$with_sysroot]) - AC_MSG_ERROR([The sysroot must be an absolute path.]) - ;; -esac - - AC_MSG_RESULT([${lt_sysroot:-no}]) -_LT_DECL([], [lt_sysroot], [0], [The root where to search for ]dnl -[dependent libraries, and where our libraries should be installed.])]) - -# _LT_ENABLE_LOCK -# --------------- -m4_defun([_LT_ENABLE_LOCK], -[AC_ARG_ENABLE([libtool-lock], - [AS_HELP_STRING([--disable-libtool-lock], - [avoid locking (might break parallel builds)])]) -test no = "$enable_libtool_lock" || enable_libtool_lock=yes - -# Some flags need to be propagated to the compiler or linker for good -# libtool support. -case $host in -ia64-*-hpux*) - # Find out what ABI is being produced by ac_compile, and set mode - # options accordingly. - echo 'int i;' > conftest.$ac_ext - if AC_TRY_EVAL(ac_compile); then - case `/usr/bin/file conftest.$ac_objext` in - *ELF-32*) - HPUX_IA64_MODE=32 - ;; - *ELF-64*) - HPUX_IA64_MODE=64 - ;; - esac - fi - rm -rf conftest* - ;; -*-*-irix6*) - # Find out what ABI is being produced by ac_compile, and set linker - # options accordingly. - echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext - if AC_TRY_EVAL(ac_compile); then - if test yes = "$lt_cv_prog_gnu_ld"; then - case `/usr/bin/file conftest.$ac_objext` in - *32-bit*) - LD="${LD-ld} -melf32bsmip" - ;; - *N32*) - LD="${LD-ld} -melf32bmipn32" - ;; - *64-bit*) - LD="${LD-ld} -melf64bmip" - ;; - esac - else - case `/usr/bin/file conftest.$ac_objext` in - *32-bit*) - LD="${LD-ld} -32" - ;; - *N32*) - LD="${LD-ld} -n32" - ;; - *64-bit*) - LD="${LD-ld} -64" - ;; - esac - fi - fi - rm -rf conftest* - ;; - -mips64*-*linux*) - # Find out what ABI is being produced by ac_compile, and set linker - # options accordingly. - echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext - if AC_TRY_EVAL(ac_compile); then - emul=elf - case `/usr/bin/file conftest.$ac_objext` in - *32-bit*) - emul="${emul}32" - ;; - *64-bit*) - emul="${emul}64" - ;; - esac - case `/usr/bin/file conftest.$ac_objext` in - *MSB*) - emul="${emul}btsmip" - ;; - *LSB*) - emul="${emul}ltsmip" - ;; - esac - case `/usr/bin/file conftest.$ac_objext` in - *N32*) - emul="${emul}n32" - ;; - esac - LD="${LD-ld} -m $emul" - fi - rm -rf conftest* - ;; - -x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ -s390*-*linux*|s390*-*tpf*|sparc*-*linux*) - # Find out what ABI is being produced by ac_compile, and set linker - # options accordingly. Note that the listed cases only cover the - # situations where additional linker options are needed (such as when - # doing 32-bit compilation for a host where ld defaults to 64-bit, or - # vice versa); the common cases where no linker options are needed do - # not appear in the list. - echo 'int i;' > conftest.$ac_ext - if AC_TRY_EVAL(ac_compile); then - case `/usr/bin/file conftest.o` in - *32-bit*) - case $host in - x86_64-*kfreebsd*-gnu) - LD="${LD-ld} -m elf_i386_fbsd" - ;; - x86_64-*linux*) - case `/usr/bin/file conftest.o` in - *x86-64*) - LD="${LD-ld} -m elf32_x86_64" - ;; - *) - LD="${LD-ld} -m elf_i386" - ;; - esac - ;; - powerpc64le-*linux*) - LD="${LD-ld} -m elf32lppclinux" - ;; - powerpc64-*linux*) - LD="${LD-ld} -m elf32ppclinux" - ;; - s390x-*linux*) - LD="${LD-ld} -m elf_s390" - ;; - sparc64-*linux*) - LD="${LD-ld} -m elf32_sparc" - ;; - esac - ;; - *64-bit*) - case $host in - x86_64-*kfreebsd*-gnu) - LD="${LD-ld} -m elf_x86_64_fbsd" - ;; - x86_64-*linux*) - LD="${LD-ld} -m elf_x86_64" - ;; - powerpcle-*linux*) - LD="${LD-ld} -m elf64lppc" - ;; - powerpc-*linux*) - LD="${LD-ld} -m elf64ppc" - ;; - s390*-*linux*|s390*-*tpf*) - LD="${LD-ld} -m elf64_s390" - ;; - sparc*-*linux*) - LD="${LD-ld} -m elf64_sparc" - ;; - esac - ;; - esac - fi - rm -rf conftest* - ;; - -*-*-sco3.2v5*) - # On SCO OpenServer 5, we need -belf to get full-featured binaries. - SAVE_CFLAGS=$CFLAGS - CFLAGS="$CFLAGS -belf" - AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf, - [AC_LANG_PUSH(C) - AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no]) - AC_LANG_POP]) - if test yes != "$lt_cv_cc_needs_belf"; then - # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf - CFLAGS=$SAVE_CFLAGS - fi - ;; -*-*solaris*) - # Find out what ABI is being produced by ac_compile, and set linker - # options accordingly. - echo 'int i;' > conftest.$ac_ext - if AC_TRY_EVAL(ac_compile); then - case `/usr/bin/file conftest.o` in - *64-bit*) - case $lt_cv_prog_gnu_ld in - yes*) - case $host in - i?86-*-solaris*|x86_64-*-solaris*) - LD="${LD-ld} -m elf_x86_64" - ;; - sparc*-*-solaris*) - LD="${LD-ld} -m elf64_sparc" - ;; - esac - # GNU ld 2.21 introduced _sol2 emulations. Use them if available. - if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then - LD=${LD-ld}_sol2 - fi - ;; - *) - if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then - LD="${LD-ld} -64" - fi - ;; - esac - ;; - esac - fi - rm -rf conftest* - ;; -esac - -need_locks=$enable_libtool_lock -])# _LT_ENABLE_LOCK - - -# _LT_PROG_AR -# ----------- -m4_defun([_LT_PROG_AR], -[AC_CHECK_TOOLS(AR, [ar], false) -: ${AR=ar} -: ${AR_FLAGS=cr} -_LT_DECL([], [AR], [1], [The archiver]) -_LT_DECL([], [AR_FLAGS], [1], [Flags to create an archive]) - -AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file], - [lt_cv_ar_at_file=no - AC_COMPILE_IFELSE([AC_LANG_PROGRAM], - [echo conftest.$ac_objext > conftest.lst - lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&AS_MESSAGE_LOG_FD' - AC_TRY_EVAL([lt_ar_try]) - if test 0 -eq "$ac_status"; then - # Ensure the archiver fails upon bogus file names. - rm -f conftest.$ac_objext libconftest.a - AC_TRY_EVAL([lt_ar_try]) - if test 0 -ne "$ac_status"; then - lt_cv_ar_at_file=@ - fi - fi - rm -f conftest.* libconftest.a - ]) - ]) - -if test no = "$lt_cv_ar_at_file"; then - archiver_list_spec= -else - archiver_list_spec=$lt_cv_ar_at_file -fi -_LT_DECL([], [archiver_list_spec], [1], - [How to feed a file listing to the archiver]) -])# _LT_PROG_AR - - -# _LT_CMD_OLD_ARCHIVE -# ------------------- -m4_defun([_LT_CMD_OLD_ARCHIVE], -[_LT_PROG_AR - -AC_CHECK_TOOL(STRIP, strip, :) -test -z "$STRIP" && STRIP=: -_LT_DECL([], [STRIP], [1], [A symbol stripping program]) - -AC_CHECK_TOOL(RANLIB, ranlib, :) -test -z "$RANLIB" && RANLIB=: -_LT_DECL([], [RANLIB], [1], - [Commands used to install an old-style archive]) - -# Determine commands to create old-style static archives. -old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' -old_postinstall_cmds='chmod 644 $oldlib' -old_postuninstall_cmds= - -if test -n "$RANLIB"; then - case $host_os in - bitrig* | openbsd*) - old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" - ;; - *) - old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" - ;; - esac - old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" -fi - -case $host_os in - darwin*) - lock_old_archive_extraction=yes ;; - *) - lock_old_archive_extraction=no ;; -esac -_LT_DECL([], [old_postinstall_cmds], [2]) -_LT_DECL([], [old_postuninstall_cmds], [2]) -_LT_TAGDECL([], [old_archive_cmds], [2], - [Commands used to build an old-style archive]) -_LT_DECL([], [lock_old_archive_extraction], [0], - [Whether to use a lock for old archive extraction]) -])# _LT_CMD_OLD_ARCHIVE - - -# _LT_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, -# [OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE]) -# ---------------------------------------------------------------- -# Check whether the given compiler option works -AC_DEFUN([_LT_COMPILER_OPTION], -[m4_require([_LT_FILEUTILS_DEFAULTS])dnl -m4_require([_LT_DECL_SED])dnl -AC_CACHE_CHECK([$1], [$2], - [$2=no - m4_if([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4]) - echo "$lt_simple_compile_test_code" > conftest.$ac_ext - lt_compiler_flag="$3" ## exclude from sc_useless_quotes_in_assignment - # Insert the option either (1) after the last *FLAGS variable, or - # (2) before a word containing "conftest.", or (3) at the end. - # Note that $ac_compile itself does not contain backslashes and begins - # with a dollar sign (not a hyphen), so the echo should work correctly. - # The option is referenced via a variable to avoid confusing sed. - lt_compile=`echo "$ac_compile" | $SED \ - -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ - -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ - -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) - (eval "$lt_compile" 2>conftest.err) - ac_status=$? - cat conftest.err >&AS_MESSAGE_LOG_FD - echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD - if (exit $ac_status) && test -s "$ac_outfile"; then - # The compiler can only warn and ignore the option if not recognized - # So say no if there are warnings other than the usual output. - $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp - $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 - if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then - $2=yes - fi - fi - $RM conftest* -]) - -if test yes = "[$]$2"; then - m4_if([$5], , :, [$5]) -else - m4_if([$6], , :, [$6]) -fi -])# _LT_COMPILER_OPTION - -# Old name: -AU_ALIAS([AC_LIBTOOL_COMPILER_OPTION], [_LT_COMPILER_OPTION]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], []) - - -# _LT_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, -# [ACTION-SUCCESS], [ACTION-FAILURE]) -# ---------------------------------------------------- -# Check whether the given linker option works -AC_DEFUN([_LT_LINKER_OPTION], -[m4_require([_LT_FILEUTILS_DEFAULTS])dnl -m4_require([_LT_DECL_SED])dnl -AC_CACHE_CHECK([$1], [$2], - [$2=no - save_LDFLAGS=$LDFLAGS - LDFLAGS="$LDFLAGS $3" - echo "$lt_simple_link_test_code" > conftest.$ac_ext - if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then - # The linker can only warn and ignore the option if not recognized - # So say no if there are warnings - if test -s conftest.err; then - # Append any errors to the config.log. - cat conftest.err 1>&AS_MESSAGE_LOG_FD - $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp - $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 - if diff conftest.exp conftest.er2 >/dev/null; then - $2=yes - fi - else - $2=yes - fi - fi - $RM -r conftest* - LDFLAGS=$save_LDFLAGS -]) - -if test yes = "[$]$2"; then - m4_if([$4], , :, [$4]) -else - m4_if([$5], , :, [$5]) -fi -])# _LT_LINKER_OPTION - -# Old name: -AU_ALIAS([AC_LIBTOOL_LINKER_OPTION], [_LT_LINKER_OPTION]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], []) - - -# LT_CMD_MAX_LEN -#--------------- -AC_DEFUN([LT_CMD_MAX_LEN], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -# find the maximum length of command line arguments -AC_MSG_CHECKING([the maximum length of command line arguments]) -AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl - i=0 - teststring=ABCD - - case $build_os in - msdosdjgpp*) - # On DJGPP, this test can blow up pretty badly due to problems in libc - # (any single argument exceeding 2000 bytes causes a buffer overrun - # during glob expansion). Even if it were fixed, the result of this - # check would be larger than it should be. - lt_cv_sys_max_cmd_len=12288; # 12K is about right - ;; - - gnu*) - # Under GNU Hurd, this test is not required because there is - # no limit to the length of command line arguments. - # Libtool will interpret -1 as no limit whatsoever - lt_cv_sys_max_cmd_len=-1; - ;; - - cygwin* | mingw* | cegcc*) - # On Win9x/ME, this test blows up -- it succeeds, but takes - # about 5 minutes as the teststring grows exponentially. - # Worse, since 9x/ME are not pre-emptively multitasking, - # you end up with a "frozen" computer, even though with patience - # the test eventually succeeds (with a max line length of 256k). - # Instead, let's just punt: use the minimum linelength reported by - # all of the supported platforms: 8192 (on NT/2K/XP). - lt_cv_sys_max_cmd_len=8192; - ;; - - mint*) - # On MiNT this can take a long time and run out of memory. - lt_cv_sys_max_cmd_len=8192; - ;; - - amigaos*) - # On AmigaOS with pdksh, this test takes hours, literally. - # So we just punt and use a minimum line length of 8192. - lt_cv_sys_max_cmd_len=8192; - ;; - - bitrig* | darwin* | dragonfly* | freebsd* | netbsd* | openbsd*) - # This has been around since 386BSD, at least. Likely further. - if test -x /sbin/sysctl; then - lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` - elif test -x /usr/sbin/sysctl; then - lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` - else - lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs - fi - # And add a safety zone - lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` - lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` - ;; - - interix*) - # We know the value 262144 and hardcode it with a safety zone (like BSD) - lt_cv_sys_max_cmd_len=196608 - ;; - - os2*) - # The test takes a long time on OS/2. - lt_cv_sys_max_cmd_len=8192 - ;; - - osf*) - # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure - # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not - # nice to cause kernel panics so lets avoid the loop below. - # First set a reasonable default. - lt_cv_sys_max_cmd_len=16384 - # - if test -x /sbin/sysconfig; then - case `/sbin/sysconfig -q proc exec_disable_arg_limit` in - *1*) lt_cv_sys_max_cmd_len=-1 ;; - esac - fi - ;; - sco3.2v5*) - lt_cv_sys_max_cmd_len=102400 - ;; - sysv5* | sco5v6* | sysv4.2uw2*) - kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` - if test -n "$kargmax"; then - lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[ ]]//'` - else - lt_cv_sys_max_cmd_len=32768 - fi - ;; - *) - lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` - if test -n "$lt_cv_sys_max_cmd_len" && \ - test undefined != "$lt_cv_sys_max_cmd_len"; then - lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` - lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` - else - # Make teststring a little bigger before we do anything with it. - # a 1K string should be a reasonable start. - for i in 1 2 3 4 5 6 7 8; do - teststring=$teststring$teststring - done - SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} - # If test is not a shell built-in, we'll probably end up computing a - # maximum length that is only half of the actual maximum length, but - # we can't tell. - while { test X`env echo "$teststring$teststring" 2>/dev/null` \ - = "X$teststring$teststring"; } >/dev/null 2>&1 && - test 17 != "$i" # 1/2 MB should be enough - do - i=`expr $i + 1` - teststring=$teststring$teststring - done - # Only check the string length outside the loop. - lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` - teststring= - # Add a significant safety factor because C++ compilers can tack on - # massive amounts of additional arguments before passing them to the - # linker. It appears as though 1/2 is a usable value. - lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` - fi - ;; - esac -]) -if test -n "$lt_cv_sys_max_cmd_len"; then - AC_MSG_RESULT($lt_cv_sys_max_cmd_len) -else - AC_MSG_RESULT(none) -fi -max_cmd_len=$lt_cv_sys_max_cmd_len -_LT_DECL([], [max_cmd_len], [0], - [What is the maximum length of a command?]) -])# LT_CMD_MAX_LEN - -# Old name: -AU_ALIAS([AC_LIBTOOL_SYS_MAX_CMD_LEN], [LT_CMD_MAX_LEN]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], []) - - -# _LT_HEADER_DLFCN -# ---------------- -m4_defun([_LT_HEADER_DLFCN], -[AC_CHECK_HEADERS([dlfcn.h], [], [], [AC_INCLUDES_DEFAULT])dnl -])# _LT_HEADER_DLFCN - - -# _LT_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE, -# ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING) -# ---------------------------------------------------------------- -m4_defun([_LT_TRY_DLOPEN_SELF], -[m4_require([_LT_HEADER_DLFCN])dnl -if test yes = "$cross_compiling"; then : - [$4] -else - lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 - lt_status=$lt_dlunknown - cat > conftest.$ac_ext <<_LT_EOF -[#line $LINENO "configure" -#include "confdefs.h" - -#if HAVE_DLFCN_H -#include -#endif - -#include - -#ifdef RTLD_GLOBAL -# define LT_DLGLOBAL RTLD_GLOBAL -#else -# ifdef DL_GLOBAL -# define LT_DLGLOBAL DL_GLOBAL -# else -# define LT_DLGLOBAL 0 -# endif -#endif - -/* We may have to define LT_DLLAZY_OR_NOW in the command line if we - find out it does not work in some platform. */ -#ifndef LT_DLLAZY_OR_NOW -# ifdef RTLD_LAZY -# define LT_DLLAZY_OR_NOW RTLD_LAZY -# else -# ifdef DL_LAZY -# define LT_DLLAZY_OR_NOW DL_LAZY -# else -# ifdef RTLD_NOW -# define LT_DLLAZY_OR_NOW RTLD_NOW -# else -# ifdef DL_NOW -# define LT_DLLAZY_OR_NOW DL_NOW -# else -# define LT_DLLAZY_OR_NOW 0 -# endif -# endif -# endif -# endif -#endif - -/* When -fvisibility=hidden is used, assume the code has been annotated - correspondingly for the symbols needed. */ -#if defined __GNUC__ && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) -int fnord () __attribute__((visibility("default"))); -#endif - -int fnord () { return 42; } -int main () -{ - void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); - int status = $lt_dlunknown; - - if (self) - { - if (dlsym (self,"fnord")) status = $lt_dlno_uscore; - else - { - if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; - else puts (dlerror ()); - } - /* dlclose (self); */ - } - else - puts (dlerror ()); - - return status; -}] -_LT_EOF - if AC_TRY_EVAL(ac_link) && test -s "conftest$ac_exeext" 2>/dev/null; then - (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null - lt_status=$? - case x$lt_status in - x$lt_dlno_uscore) $1 ;; - x$lt_dlneed_uscore) $2 ;; - x$lt_dlunknown|x*) $3 ;; - esac - else : - # compilation failed - $3 - fi -fi -rm -fr conftest* -])# _LT_TRY_DLOPEN_SELF - - -# LT_SYS_DLOPEN_SELF -# ------------------ -AC_DEFUN([LT_SYS_DLOPEN_SELF], -[m4_require([_LT_HEADER_DLFCN])dnl -if test yes != "$enable_dlopen"; then - enable_dlopen=unknown - enable_dlopen_self=unknown - enable_dlopen_self_static=unknown -else - lt_cv_dlopen=no - lt_cv_dlopen_libs= - - case $host_os in - beos*) - lt_cv_dlopen=load_add_on - lt_cv_dlopen_libs= - lt_cv_dlopen_self=yes - ;; - - mingw* | pw32* | cegcc*) - lt_cv_dlopen=LoadLibrary - lt_cv_dlopen_libs= - ;; - - cygwin*) - lt_cv_dlopen=dlopen - lt_cv_dlopen_libs= - ;; - - darwin*) - # if libdl is installed we need to link against it - AC_CHECK_LIB([dl], [dlopen], - [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl],[ - lt_cv_dlopen=dyld - lt_cv_dlopen_libs= - lt_cv_dlopen_self=yes - ]) - ;; - - tpf*) - # Don't try to run any link tests for TPF. We know it's impossible - # because TPF is a cross-compiler, and we know how we open DSOs. - lt_cv_dlopen=dlopen - lt_cv_dlopen_libs= - lt_cv_dlopen_self=no - ;; - - *) - AC_CHECK_FUNC([shl_load], - [lt_cv_dlopen=shl_load], - [AC_CHECK_LIB([dld], [shl_load], - [lt_cv_dlopen=shl_load lt_cv_dlopen_libs=-ldld], - [AC_CHECK_FUNC([dlopen], - [lt_cv_dlopen=dlopen], - [AC_CHECK_LIB([dl], [dlopen], - [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl], - [AC_CHECK_LIB([svld], [dlopen], - [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-lsvld], - [AC_CHECK_LIB([dld], [dld_link], - [lt_cv_dlopen=dld_link lt_cv_dlopen_libs=-ldld]) - ]) - ]) - ]) - ]) - ]) - ;; - esac - - if test no = "$lt_cv_dlopen"; then - enable_dlopen=no - else - enable_dlopen=yes - fi - - case $lt_cv_dlopen in - dlopen) - save_CPPFLAGS=$CPPFLAGS - test yes = "$ac_cv_header_dlfcn_h" && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" - - save_LDFLAGS=$LDFLAGS - wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" - - save_LIBS=$LIBS - LIBS="$lt_cv_dlopen_libs $LIBS" - - AC_CACHE_CHECK([whether a program can dlopen itself], - lt_cv_dlopen_self, [dnl - _LT_TRY_DLOPEN_SELF( - lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes, - lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross) - ]) - - if test yes = "$lt_cv_dlopen_self"; then - wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" - AC_CACHE_CHECK([whether a statically linked program can dlopen itself], - lt_cv_dlopen_self_static, [dnl - _LT_TRY_DLOPEN_SELF( - lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes, - lt_cv_dlopen_self_static=no, lt_cv_dlopen_self_static=cross) - ]) - fi - - CPPFLAGS=$save_CPPFLAGS - LDFLAGS=$save_LDFLAGS - LIBS=$save_LIBS - ;; - esac - - case $lt_cv_dlopen_self in - yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; - *) enable_dlopen_self=unknown ;; - esac - - case $lt_cv_dlopen_self_static in - yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; - *) enable_dlopen_self_static=unknown ;; - esac -fi -_LT_DECL([dlopen_support], [enable_dlopen], [0], - [Whether dlopen is supported]) -_LT_DECL([dlopen_self], [enable_dlopen_self], [0], - [Whether dlopen of programs is supported]) -_LT_DECL([dlopen_self_static], [enable_dlopen_self_static], [0], - [Whether dlopen of statically linked programs is supported]) -])# LT_SYS_DLOPEN_SELF - -# Old name: -AU_ALIAS([AC_LIBTOOL_DLOPEN_SELF], [LT_SYS_DLOPEN_SELF]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], []) - - -# _LT_COMPILER_C_O([TAGNAME]) -# --------------------------- -# Check to see if options -c and -o are simultaneously supported by compiler. -# This macro does not hard code the compiler like AC_PROG_CC_C_O. -m4_defun([_LT_COMPILER_C_O], -[m4_require([_LT_DECL_SED])dnl -m4_require([_LT_FILEUTILS_DEFAULTS])dnl -m4_require([_LT_TAG_COMPILER])dnl -AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext], - [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)], - [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no - $RM -r conftest 2>/dev/null - mkdir conftest - cd conftest - mkdir out - echo "$lt_simple_compile_test_code" > conftest.$ac_ext - - lt_compiler_flag="-o out/conftest2.$ac_objext" - # Insert the option either (1) after the last *FLAGS variable, or - # (2) before a word containing "conftest.", or (3) at the end. - # Note that $ac_compile itself does not contain backslashes and begins - # with a dollar sign (not a hyphen), so the echo should work correctly. - lt_compile=`echo "$ac_compile" | $SED \ - -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ - -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ - -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) - (eval "$lt_compile" 2>out/conftest.err) - ac_status=$? - cat out/conftest.err >&AS_MESSAGE_LOG_FD - echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD - if (exit $ac_status) && test -s out/conftest2.$ac_objext - then - # The compiler can only warn and ignore the option if not recognized - # So say no if there are warnings - $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp - $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 - if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then - _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes - fi - fi - chmod u+w . 2>&AS_MESSAGE_LOG_FD - $RM conftest* - # SGI C++ compiler will create directory out/ii_files/ for - # template instantiation - test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files - $RM out/* && rmdir out - cd .. - $RM -r conftest - $RM conftest* -]) -_LT_TAGDECL([compiler_c_o], [lt_cv_prog_compiler_c_o], [1], - [Does compiler simultaneously support -c and -o options?]) -])# _LT_COMPILER_C_O - - -# _LT_COMPILER_FILE_LOCKS([TAGNAME]) -# ---------------------------------- -# Check to see if we can do hard links to lock some files if needed -m4_defun([_LT_COMPILER_FILE_LOCKS], -[m4_require([_LT_ENABLE_LOCK])dnl -m4_require([_LT_FILEUTILS_DEFAULTS])dnl -_LT_COMPILER_C_O([$1]) - -hard_links=nottested -if test no = "$_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)" && test no != "$need_locks"; then - # do not overwrite the value of need_locks provided by the user - AC_MSG_CHECKING([if we can lock with hard links]) - hard_links=yes - $RM conftest* - ln conftest.a conftest.b 2>/dev/null && hard_links=no - touch conftest.a - ln conftest.a conftest.b 2>&5 || hard_links=no - ln conftest.a conftest.b 2>/dev/null && hard_links=no - AC_MSG_RESULT([$hard_links]) - if test no = "$hard_links"; then - AC_MSG_WARN(['$CC' does not support '-c -o', so 'make -j' may be unsafe]) - need_locks=warn - fi -else - need_locks=no -fi -_LT_DECL([], [need_locks], [1], [Must we lock files when doing compilation?]) -])# _LT_COMPILER_FILE_LOCKS - - -# _LT_CHECK_OBJDIR -# ---------------- -m4_defun([_LT_CHECK_OBJDIR], -[AC_CACHE_CHECK([for objdir], [lt_cv_objdir], -[rm -f .libs 2>/dev/null -mkdir .libs 2>/dev/null -if test -d .libs; then - lt_cv_objdir=.libs -else - # MS-DOS does not allow filenames that begin with a dot. - lt_cv_objdir=_libs -fi -rmdir .libs 2>/dev/null]) -objdir=$lt_cv_objdir -_LT_DECL([], [objdir], [0], - [The name of the directory that contains temporary libtool files])dnl -m4_pattern_allow([LT_OBJDIR])dnl -AC_DEFINE_UNQUOTED([LT_OBJDIR], "$lt_cv_objdir/", - [Define to the sub-directory where libtool stores uninstalled libraries.]) -])# _LT_CHECK_OBJDIR - - -# _LT_LINKER_HARDCODE_LIBPATH([TAGNAME]) -# -------------------------------------- -# Check hardcoding attributes. -m4_defun([_LT_LINKER_HARDCODE_LIBPATH], -[AC_MSG_CHECKING([how to hardcode library paths into programs]) -_LT_TAGVAR(hardcode_action, $1)= -if test -n "$_LT_TAGVAR(hardcode_libdir_flag_spec, $1)" || - test -n "$_LT_TAGVAR(runpath_var, $1)" || - test yes = "$_LT_TAGVAR(hardcode_automatic, $1)"; then - - # We can hardcode non-existent directories. - if test no != "$_LT_TAGVAR(hardcode_direct, $1)" && - # If the only mechanism to avoid hardcoding is shlibpath_var, we - # have to relink, otherwise we might link with an installed library - # when we should be linking with a yet-to-be-installed one - ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, $1)" && - test no != "$_LT_TAGVAR(hardcode_minus_L, $1)"; then - # Linking always hardcodes the temporary library directory. - _LT_TAGVAR(hardcode_action, $1)=relink - else - # We can link without hardcoding, and we can hardcode nonexisting dirs. - _LT_TAGVAR(hardcode_action, $1)=immediate - fi -else - # We cannot hardcode anything, or else we can only hardcode existing - # directories. - _LT_TAGVAR(hardcode_action, $1)=unsupported -fi -AC_MSG_RESULT([$_LT_TAGVAR(hardcode_action, $1)]) - -if test relink = "$_LT_TAGVAR(hardcode_action, $1)" || - test yes = "$_LT_TAGVAR(inherit_rpath, $1)"; then - # Fast installation is not supported - enable_fast_install=no -elif test yes = "$shlibpath_overrides_runpath" || - test no = "$enable_shared"; then - # Fast installation is not necessary - enable_fast_install=needless -fi -_LT_TAGDECL([], [hardcode_action], [0], - [How to hardcode a shared library path into an executable]) -])# _LT_LINKER_HARDCODE_LIBPATH - - -# _LT_CMD_STRIPLIB -# ---------------- -m4_defun([_LT_CMD_STRIPLIB], -[m4_require([_LT_DECL_EGREP]) -striplib= -old_striplib= -AC_MSG_CHECKING([whether stripping libraries is possible]) -if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then - test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" - test -z "$striplib" && striplib="$STRIP --strip-unneeded" - AC_MSG_RESULT([yes]) -else -# FIXME - insert some real tests, host_os isn't really good enough - case $host_os in - darwin*) - if test -n "$STRIP"; then - striplib="$STRIP -x" - old_striplib="$STRIP -S" - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi - ;; - *) - AC_MSG_RESULT([no]) - ;; - esac -fi -_LT_DECL([], [old_striplib], [1], [Commands to strip libraries]) -_LT_DECL([], [striplib], [1]) -])# _LT_CMD_STRIPLIB - - -# _LT_PREPARE_MUNGE_PATH_LIST -# --------------------------- -# Make sure func_munge_path_list() is defined correctly. -m4_defun([_LT_PREPARE_MUNGE_PATH_LIST], -[[# func_munge_path_list VARIABLE PATH -# ----------------------------------- -# VARIABLE is name of variable containing _space_ separated list of -# directories to be munged by the contents of PATH, which is string -# having a format: -# "DIR[:DIR]:" -# string "DIR[ DIR]" will be prepended to VARIABLE -# ":DIR[:DIR]" -# string "DIR[ DIR]" will be appended to VARIABLE -# "DIRP[:DIRP]::[DIRA:]DIRA" -# string "DIRP[ DIRP]" will be prepended to VARIABLE and string -# "DIRA[ DIRA]" will be appended to VARIABLE -# "DIR[:DIR]" -# VARIABLE will be replaced by "DIR[ DIR]" -func_munge_path_list () -{ - case x@S|@2 in - x) - ;; - *:) - eval @S|@1=\"`$ECHO @S|@2 | $SED 's/:/ /g'` \@S|@@S|@1\" - ;; - x:*) - eval @S|@1=\"\@S|@@S|@1 `$ECHO @S|@2 | $SED 's/:/ /g'`\" - ;; - *::*) - eval @S|@1=\"\@S|@@S|@1\ `$ECHO @S|@2 | $SED -e 's/.*:://' -e 's/:/ /g'`\" - eval @S|@1=\"`$ECHO @S|@2 | $SED -e 's/::.*//' -e 's/:/ /g'`\ \@S|@@S|@1\" - ;; - *) - eval @S|@1=\"`$ECHO @S|@2 | $SED 's/:/ /g'`\" - ;; - esac -} -]])# _LT_PREPARE_PATH_LIST - - -# _LT_SYS_DYNAMIC_LINKER([TAG]) -# ----------------------------- -# PORTME Fill in your ld.so characteristics -m4_defun([_LT_SYS_DYNAMIC_LINKER], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -m4_require([_LT_DECL_EGREP])dnl -m4_require([_LT_FILEUTILS_DEFAULTS])dnl -m4_require([_LT_DECL_OBJDUMP])dnl -m4_require([_LT_DECL_SED])dnl -m4_require([_LT_CHECK_SHELL_FEATURES])dnl -m4_require([_LT_PREPARE_MUNGE_PATH_LIST])dnl -AC_MSG_CHECKING([dynamic linker characteristics]) -m4_if([$1], - [], [ -if test yes = "$GCC"; then - case $host_os in - darwin*) lt_awk_arg='/^libraries:/,/LR/' ;; - *) lt_awk_arg='/^libraries:/' ;; - esac - case $host_os in - mingw* | cegcc*) lt_sed_strip_eq='s|=\([[A-Za-z]]:\)|\1|g' ;; - *) lt_sed_strip_eq='s|=/|/|g' ;; - esac - lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` - case $lt_search_path_spec in - *\;*) - # if the path contains ";" then we assume it to be the separator - # otherwise default to the standard path separator (i.e. ":") - it is - # assumed that no part of a normal pathname contains ";" but that should - # okay in the real world where ";" in dirpaths is itself problematic. - lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` - ;; - *) - lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` - ;; - esac - # Ok, now we have the path, separated by spaces, we can step through it - # and add multilib dir if necessary... - lt_tmp_lt_search_path_spec= - lt_multi_os_dir=/`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` - # ...but if some path component already ends with the multilib dir we assume - # that all is fine and trust -print-search-dirs as is (GCC 4.2? or newer). - case "$lt_multi_os_dir; $lt_search_path_spec " in - "/; "* | "/.; "* | "/./; "* | *"$lt_multi_os_dir "* | *"$lt_multi_os_dir/ "*) - lt_multi_os_dir= - ;; - esac - for lt_sys_path in $lt_search_path_spec; do - if test -d "$lt_sys_path$lt_multi_os_dir"; then - lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path$lt_multi_os_dir" - elif test -n "$lt_multi_os_dir"; then - test -d "$lt_sys_path" && \ - lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" - fi - done - lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' -BEGIN {RS = " "; FS = "/|\n";} { - lt_foo = ""; - lt_count = 0; - for (lt_i = NF; lt_i > 0; lt_i--) { - if ($lt_i != "" && $lt_i != ".") { - if ($lt_i == "..") { - lt_count++; - } else { - if (lt_count == 0) { - lt_foo = "/" $lt_i lt_foo; - } else { - lt_count--; - } - } - } - } - if (lt_foo != "") { lt_freq[[lt_foo]]++; } - if (lt_freq[[lt_foo]] == 1) { print lt_foo; } -}'` - # AWK program above erroneously prepends '/' to C:/dos/paths - # for these hosts. - case $host_os in - mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ - $SED 's|/\([[A-Za-z]]:\)|\1|g'` ;; - esac - sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` -else - sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" -fi]) -library_names_spec= -libname_spec='lib$name' -soname_spec= -shrext_cmds=.so -postinstall_cmds= -postuninstall_cmds= -finish_cmds= -finish_eval= -shlibpath_var= -shlibpath_overrides_runpath=unknown -version_type=none -dynamic_linker="$host_os ld.so" -sys_lib_dlsearch_path_spec="/lib /usr/lib" -need_lib_prefix=unknown -hardcode_into_libs=no - -# when you set need_version to no, make sure it does not cause -set_version -# flags to be left without arguments -need_version=unknown - -AC_ARG_VAR([LT_SYS_LIBRARY_PATH], -[User-defined run-time library search path.]) - -case $host_os in -aix3*) - version_type=linux # correct to gnu/linux during the next big refactor - library_names_spec='$libname$release$shared_ext$versuffix $libname.a' - shlibpath_var=LIBPATH - - # AIX 3 has no versioning support, so we append a major version to the name. - soname_spec='$libname$release$shared_ext$major' - ;; - -aix[[4-9]]*) - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - hardcode_into_libs=yes - if test ia64 = "$host_cpu"; then - # AIX 5 supports IA64 - library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' - shlibpath_var=LD_LIBRARY_PATH - else - # With GCC up to 2.95.x, collect2 would create an import file - # for dependence libraries. The import file would start with - # the line '#! .'. This would cause the generated library to - # depend on '.', always an invalid library. This was fixed in - # development snapshots of GCC prior to 3.0. - case $host_os in - aix4 | aix4.[[01]] | aix4.[[01]].*) - if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' - echo ' yes ' - echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then - : - else - can_build_shared=no - fi - ;; - esac - # Using Import Files as archive members, it is possible to support - # filename-based versioning of shared library archives on AIX. While - # this would work for both with and without runtime linking, it will - # prevent static linking of such archives. So we do filename-based - # shared library versioning with .so extension only, which is used - # when both runtime linking and shared linking is enabled. - # Unfortunately, runtime linking may impact performance, so we do - # not want this to be the default eventually. Also, we use the - # versioned .so libs for executables only if there is the -brtl - # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. - # To allow for filename-based versioning support, we need to create - # libNAME.so.V as an archive file, containing: - # *) an Import File, referring to the versioned filename of the - # archive as well as the shared archive member, telling the - # bitwidth (32 or 64) of that shared object, and providing the - # list of exported symbols of that shared object, eventually - # decorated with the 'weak' keyword - # *) the shared object with the F_LOADONLY flag set, to really avoid - # it being seen by the linker. - # At run time we better use the real file rather than another symlink, - # but for link time we create the symlink libNAME.so -> libNAME.so.V - - case $with_aix_soname,$aix_use_runtimelinking in - # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct - # soname into executable. Probably we can add versioning support to - # collect2, so additional links can be useful in future. - aix,yes) # traditional libtool - dynamic_linker='AIX unversionable lib.so' - # If using run time linking (on AIX 4.2 or later) use lib.so - # instead of lib.a to let people know that these are not - # typical AIX shared libraries. - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - ;; - aix,no) # traditional AIX only - dynamic_linker='AIX lib.a[(]lib.so.V[)]' - # We preserve .a as extension for shared libraries through AIX4.2 - # and later when we are not doing run time linking. - library_names_spec='$libname$release.a $libname.a' - soname_spec='$libname$release$shared_ext$major' - ;; - svr4,*) # full svr4 only - dynamic_linker="AIX lib.so.V[(]$shared_archive_member_spec.o[)]" - library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' - # We do not specify a path in Import Files, so LIBPATH fires. - shlibpath_overrides_runpath=yes - ;; - *,yes) # both, prefer svr4 - dynamic_linker="AIX lib.so.V[(]$shared_archive_member_spec.o[)], lib.a[(]lib.so.V[)]" - library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' - # unpreferred sharedlib libNAME.a needs extra handling - postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' - postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' - # We do not specify a path in Import Files, so LIBPATH fires. - shlibpath_overrides_runpath=yes - ;; - *,no) # both, prefer aix - dynamic_linker="AIX lib.a[(]lib.so.V[)], lib.so.V[(]$shared_archive_member_spec.o[)]" - library_names_spec='$libname$release.a $libname.a' - soname_spec='$libname$release$shared_ext$major' - # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling - postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' - postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' - ;; - esac - shlibpath_var=LIBPATH - fi - ;; - -amigaos*) - case $host_cpu in - powerpc) - # Since July 2007 AmigaOS4 officially supports .so libraries. - # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - ;; - m68k) - library_names_spec='$libname.ixlibrary $libname.a' - # Create ${libname}_ixlibrary.a entries in /sys/libs. - finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' - ;; - esac - ;; - -beos*) - library_names_spec='$libname$shared_ext' - dynamic_linker="$host_os ld.so" - shlibpath_var=LIBRARY_PATH - ;; - -bsdi[[45]]*) - version_type=linux # correct to gnu/linux during the next big refactor - need_version=no - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - soname_spec='$libname$release$shared_ext$major' - finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' - shlibpath_var=LD_LIBRARY_PATH - sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" - sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" - # the default ld.so.conf also contains /usr/contrib/lib and - # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow - # libtool to hard-code these into programs - ;; - -cygwin* | mingw* | pw32* | cegcc*) - version_type=windows - shrext_cmds=.dll - need_version=no - need_lib_prefix=no - - case $GCC,$cc_basename in - yes,*) - # gcc - library_names_spec='$libname.dll.a' - # DLL is installed to $(libdir)/../bin by postinstall_cmds - postinstall_cmds='base_file=`basename \$file`~ - dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ - dldir=$destdir/`dirname \$dlpath`~ - test -d \$dldir || mkdir -p \$dldir~ - $install_prog $dir/$dlname \$dldir/$dlname~ - chmod a+x \$dldir/$dlname~ - if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then - eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; - fi' - postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ - dlpath=$dir/\$dldll~ - $RM \$dlpath' - shlibpath_overrides_runpath=yes - - case $host_os in - cygwin*) - # Cygwin DLLs use 'cyg' prefix rather than 'lib' - soname_spec='`echo $libname | sed -e 's/^lib/cyg/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' -m4_if([$1], [],[ - sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"]) - ;; - mingw* | cegcc*) - # MinGW DLLs use traditional 'lib' prefix - soname_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' - ;; - pw32*) - # pw32 DLLs use 'pw' prefix rather than 'lib' - library_names_spec='`echo $libname | sed -e 's/^lib/pw/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' - ;; - esac - dynamic_linker='Win32 ld.exe' - ;; - - *,cl*) - # Native MSVC - libname_spec='$name' - soname_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' - library_names_spec='$libname.dll.lib' - - case $build_os in - mingw*) - sys_lib_search_path_spec= - lt_save_ifs=$IFS - IFS=';' - for lt_path in $LIB - do - IFS=$lt_save_ifs - # Let DOS variable expansion print the short 8.3 style file name. - lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` - sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" - done - IFS=$lt_save_ifs - # Convert to MSYS style. - sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'` - ;; - cygwin*) - # Convert to unix form, then to dos form, then back to unix form - # but this time dos style (no spaces!) so that the unix form looks - # like /cygdrive/c/PROGRA~1:/cygdr... - sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` - sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` - sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` - ;; - *) - sys_lib_search_path_spec=$LIB - if $ECHO "$sys_lib_search_path_spec" | [$GREP ';[c-zC-Z]:/' >/dev/null]; then - # It is most probably a Windows format PATH. - sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` - else - sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` - fi - # FIXME: find the short name or the path components, as spaces are - # common. (e.g. "Program Files" -> "PROGRA~1") - ;; - esac - - # DLL is installed to $(libdir)/../bin by postinstall_cmds - postinstall_cmds='base_file=`basename \$file`~ - dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ - dldir=$destdir/`dirname \$dlpath`~ - test -d \$dldir || mkdir -p \$dldir~ - $install_prog $dir/$dlname \$dldir/$dlname' - postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ - dlpath=$dir/\$dldll~ - $RM \$dlpath' - shlibpath_overrides_runpath=yes - dynamic_linker='Win32 link.exe' - ;; - - *) - # Assume MSVC wrapper - library_names_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext $libname.lib' - dynamic_linker='Win32 ld.exe' - ;; - esac - # FIXME: first we should search . and the directory the executable is in - shlibpath_var=PATH - ;; - -darwin* | rhapsody*) - dynamic_linker="$host_os dyld" - version_type=darwin - need_lib_prefix=no - need_version=no - library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' - soname_spec='$libname$release$major$shared_ext' - shlibpath_overrides_runpath=yes - shlibpath_var=DYLD_LIBRARY_PATH - shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' -m4_if([$1], [],[ - sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"]) - sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' - ;; - -dgux*) - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - soname_spec='$libname$release$shared_ext$major' - shlibpath_var=LD_LIBRARY_PATH - ;; - -freebsd* | dragonfly*) - # DragonFly does not have aout. When/if they implement a new - # versioning mechanism, adjust this. - if test -x /usr/bin/objformat; then - objformat=`/usr/bin/objformat` - else - case $host_os in - freebsd[[23]].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi - version_type=freebsd-$objformat - case $version_type in - freebsd-elf*) - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - soname_spec='$libname$release$shared_ext$major' - need_version=no - need_lib_prefix=no - ;; - freebsd-*) - library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' - need_version=yes - ;; - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in - freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[[01]]* | freebsdelf3.[[01]]*) - shlibpath_overrides_runpath=yes - hardcode_into_libs=yes - ;; - freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \ - freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1) - shlibpath_overrides_runpath=no - hardcode_into_libs=yes - ;; - *) # from 4.6 on, and DragonFly - shlibpath_overrides_runpath=yes - hardcode_into_libs=yes - ;; - esac - ;; - -haiku*) - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - dynamic_linker="$host_os runtime_loader" - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - soname_spec='$libname$release$shared_ext$major' - shlibpath_var=LIBRARY_PATH - shlibpath_overrides_runpath=no - sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' - hardcode_into_libs=yes - ;; - -hpux9* | hpux10* | hpux11*) - # Give a soname corresponding to the major version so that dld.sl refuses to - # link against other versions. - version_type=sunos - need_lib_prefix=no - need_version=no - case $host_cpu in - ia64*) - shrext_cmds='.so' - hardcode_into_libs=yes - dynamic_linker="$host_os dld.so" - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - soname_spec='$libname$release$shared_ext$major' - if test 32 = "$HPUX_IA64_MODE"; then - sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" - sys_lib_dlsearch_path_spec=/usr/lib/hpux32 - else - sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" - sys_lib_dlsearch_path_spec=/usr/lib/hpux64 - fi - ;; - hppa*64*) - shrext_cmds='.sl' - hardcode_into_libs=yes - dynamic_linker="$host_os dld.sl" - shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH - shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - soname_spec='$libname$release$shared_ext$major' - sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" - sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec - ;; - *) - shrext_cmds='.sl' - dynamic_linker="$host_os dld.sl" - shlibpath_var=SHLIB_PATH - shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - soname_spec='$libname$release$shared_ext$major' - ;; - esac - # HP-UX runs *really* slowly unless shared libraries are mode 555, ... - postinstall_cmds='chmod 555 $lib' - # or fails outright, so override atomically: - install_override_mode=555 - ;; - -interix[[3-9]]*) - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - soname_spec='$libname$release$shared_ext$major' - dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=no - hardcode_into_libs=yes - ;; - -irix5* | irix6* | nonstopux*) - case $host_os in - nonstopux*) version_type=nonstopux ;; - *) - if test yes = "$lt_cv_prog_gnu_ld"; then - version_type=linux # correct to gnu/linux during the next big refactor - else - version_type=irix - fi ;; - esac - need_lib_prefix=no - need_version=no - soname_spec='$libname$release$shared_ext$major' - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' - case $host_os in - irix5* | nonstopux*) - libsuff= shlibsuff= - ;; - *) - case $LD in # libtool.m4 will add one of these switches to LD - *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") - libsuff= shlibsuff= libmagic=32-bit;; - *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") - libsuff=32 shlibsuff=N32 libmagic=N32;; - *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") - libsuff=64 shlibsuff=64 libmagic=64-bit;; - *) libsuff= shlibsuff= libmagic=never-match;; - esac - ;; - esac - shlibpath_var=LD_LIBRARY${shlibsuff}_PATH - shlibpath_overrides_runpath=no - sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" - sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" - hardcode_into_libs=yes - ;; - -# No shared lib support for Linux oldld, aout, or coff. -linux*oldld* | linux*aout* | linux*coff*) - dynamic_linker=no - ;; - -linux*android*) - version_type=none # Android doesn't support versioned libraries. - need_lib_prefix=no - need_version=no - library_names_spec='$libname$release$shared_ext' - soname_spec='$libname$release$shared_ext' - finish_cmds= - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - - # This implies no fast_install, which is unacceptable. - # Some rework will be needed to allow for fast_install - # before this can be enabled. - hardcode_into_libs=yes - - dynamic_linker='Android linker' - # Don't embed -rpath directories since the linker doesn't support them. - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - ;; - -# This must be glibc/ELF. -linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - soname_spec='$libname$release$shared_ext$major' - finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=no - - # Some binutils ld are patched to set DT_RUNPATH - AC_CACHE_VAL([lt_cv_shlibpath_overrides_runpath], - [lt_cv_shlibpath_overrides_runpath=no - save_LDFLAGS=$LDFLAGS - save_libdir=$libdir - eval "libdir=/foo; wl=\"$_LT_TAGVAR(lt_prog_compiler_wl, $1)\"; \ - LDFLAGS=\"\$LDFLAGS $_LT_TAGVAR(hardcode_libdir_flag_spec, $1)\"" - AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], - [AS_IF([ ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null], - [lt_cv_shlibpath_overrides_runpath=yes])]) - LDFLAGS=$save_LDFLAGS - libdir=$save_libdir - ]) - shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath - - # This implies no fast_install, which is unacceptable. - # Some rework will be needed to allow for fast_install - # before this can be enabled. - hardcode_into_libs=yes - - # Ideally, we could use ldconfig to report *all* directores which are - # searched for libraries, however this is still not possible. Aside from not - # being certain /sbin/ldconfig is available, command - # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, - # even though it is searched at run-time. Try to do the best guess by - # appending ld.so.conf contents (and includes) to the search path. - if test -f /etc/ld.so.conf; then - lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` - sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" - fi - - # We used to test for /lib/ld.so.1 and disable shared libraries on - # powerpc, because MkLinux only supported shared libraries with the - # GNU dynamic linker. Since this was broken with cross compilers, - # most powerpc-linux boxes support dynamic linking these days and - # people can always --disable-shared, the test was removed, and we - # assume the GNU/Linux dynamic linker is in use. - dynamic_linker='GNU/Linux ld.so' - ;; - -netbsdelf*-gnu) - version_type=linux - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=no - hardcode_into_libs=yes - dynamic_linker='NetBSD ld.elf_so' - ;; - -netbsd*) - version_type=sunos - need_lib_prefix=no - need_version=no - if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then - library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' - finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' - dynamic_linker='NetBSD (a.out) ld.so' - else - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - soname_spec='$libname$release$shared_ext$major' - dynamic_linker='NetBSD ld.elf_so' - fi - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - hardcode_into_libs=yes - ;; - -newsos6) - version_type=linux # correct to gnu/linux during the next big refactor - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - ;; - -*nto* | *qnx*) - version_type=qnx - need_lib_prefix=no - need_version=no - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - soname_spec='$libname$release$shared_ext$major' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=no - hardcode_into_libs=yes - dynamic_linker='ldqnx.so' - ;; - -openbsd* | bitrig*) - version_type=sunos - sys_lib_dlsearch_path_spec=/usr/lib - need_lib_prefix=no - if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then - need_version=no - else - need_version=yes - fi - library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' - finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - ;; - -os2*) - libname_spec='$name' - version_type=windows - shrext_cmds=.dll - need_version=no - need_lib_prefix=no - # OS/2 can only load a DLL with a base name of 8 characters or less. - soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; - v=$($ECHO $release$versuffix | tr -d .-); - n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); - $ECHO $n$v`$shared_ext' - library_names_spec='${libname}_dll.$libext' - dynamic_linker='OS/2 ld.exe' - shlibpath_var=BEGINLIBPATH - sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" - sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec - postinstall_cmds='base_file=`basename \$file`~ - dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ - dldir=$destdir/`dirname \$dlpath`~ - test -d \$dldir || mkdir -p \$dldir~ - $install_prog $dir/$dlname \$dldir/$dlname~ - chmod a+x \$dldir/$dlname~ - if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then - eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; - fi' - postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ - dlpath=$dir/\$dldll~ - $RM \$dlpath' - ;; - -osf3* | osf4* | osf5*) - version_type=osf - need_lib_prefix=no - need_version=no - soname_spec='$libname$release$shared_ext$major' - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - shlibpath_var=LD_LIBRARY_PATH - sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" - sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec - ;; - -rdos*) - dynamic_linker=no - ;; - -solaris*) - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - soname_spec='$libname$release$shared_ext$major' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - hardcode_into_libs=yes - # ldd complains unless libraries are executable - postinstall_cmds='chmod +x $lib' - ;; - -sunos4*) - version_type=sunos - library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' - finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - if test yes = "$with_gnu_ld"; then - need_lib_prefix=no - fi - need_version=yes - ;; - -sysv4 | sysv4.3*) - version_type=linux # correct to gnu/linux during the next big refactor - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - soname_spec='$libname$release$shared_ext$major' - shlibpath_var=LD_LIBRARY_PATH - case $host_vendor in - sni) - shlibpath_overrides_runpath=no - need_lib_prefix=no - runpath_var=LD_RUN_PATH - ;; - siemens) - need_lib_prefix=no - ;; - motorola) - need_lib_prefix=no - need_version=no - shlibpath_overrides_runpath=no - sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' - ;; - esac - ;; - -sysv4*MP*) - if test -d /usr/nec; then - version_type=linux # correct to gnu/linux during the next big refactor - library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' - soname_spec='$libname$shared_ext.$major' - shlibpath_var=LD_LIBRARY_PATH - fi - ;; - -sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) - version_type=sco - need_lib_prefix=no - need_version=no - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' - soname_spec='$libname$release$shared_ext$major' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - hardcode_into_libs=yes - if test yes = "$with_gnu_ld"; then - sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' - else - sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' - case $host_os in - sco3.2v5*) - sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" - ;; - esac - fi - sys_lib_dlsearch_path_spec='/usr/lib' - ;; - -tpf*) - # TPF is a cross-target only. Preferred cross-host = GNU/Linux. - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=no - hardcode_into_libs=yes - ;; - -uts4*) - version_type=linux # correct to gnu/linux during the next big refactor - library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' - soname_spec='$libname$release$shared_ext$major' - shlibpath_var=LD_LIBRARY_PATH - ;; - -*) - dynamic_linker=no - ;; -esac -AC_MSG_RESULT([$dynamic_linker]) -test no = "$dynamic_linker" && can_build_shared=no - -variables_saved_for_relink="PATH $shlibpath_var $runpath_var" -if test yes = "$GCC"; then - variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" -fi - -if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then - sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec -fi - -if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then - sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec -fi - -# remember unaugmented sys_lib_dlsearch_path content for libtool script decls... -configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec - -# ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code -func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" - -# to be used as default LT_SYS_LIBRARY_PATH value in generated libtool -configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH - -_LT_DECL([], [variables_saved_for_relink], [1], - [Variables whose values should be saved in libtool wrapper scripts and - restored at link time]) -_LT_DECL([], [need_lib_prefix], [0], - [Do we need the "lib" prefix for modules?]) -_LT_DECL([], [need_version], [0], [Do we need a version for libraries?]) -_LT_DECL([], [version_type], [0], [Library versioning type]) -_LT_DECL([], [runpath_var], [0], [Shared library runtime path variable]) -_LT_DECL([], [shlibpath_var], [0],[Shared library path variable]) -_LT_DECL([], [shlibpath_overrides_runpath], [0], - [Is shlibpath searched before the hard-coded library search path?]) -_LT_DECL([], [libname_spec], [1], [Format of library name prefix]) -_LT_DECL([], [library_names_spec], [1], - [[List of archive names. First name is the real one, the rest are links. - The last name is the one that the linker finds with -lNAME]]) -_LT_DECL([], [soname_spec], [1], - [[The coded name of the library, if different from the real name]]) -_LT_DECL([], [install_override_mode], [1], - [Permission mode override for installation of shared libraries]) -_LT_DECL([], [postinstall_cmds], [2], - [Command to use after installation of a shared archive]) -_LT_DECL([], [postuninstall_cmds], [2], - [Command to use after uninstallation of a shared archive]) -_LT_DECL([], [finish_cmds], [2], - [Commands used to finish a libtool library installation in a directory]) -_LT_DECL([], [finish_eval], [1], - [[As "finish_cmds", except a single script fragment to be evaled but - not shown]]) -_LT_DECL([], [hardcode_into_libs], [0], - [Whether we should hardcode library paths into libraries]) -_LT_DECL([], [sys_lib_search_path_spec], [2], - [Compile-time system search path for libraries]) -_LT_DECL([sys_lib_dlsearch_path_spec], [configure_time_dlsearch_path], [2], - [Detected run-time system search path for libraries]) -_LT_DECL([], [configure_time_lt_sys_library_path], [2], - [Explicit LT_SYS_LIBRARY_PATH set during ./configure time]) -])# _LT_SYS_DYNAMIC_LINKER - - -# _LT_PATH_TOOL_PREFIX(TOOL) -# -------------------------- -# find a file program that can recognize shared library -AC_DEFUN([_LT_PATH_TOOL_PREFIX], -[m4_require([_LT_DECL_EGREP])dnl -AC_MSG_CHECKING([for $1]) -AC_CACHE_VAL(lt_cv_path_MAGIC_CMD, -[case $MAGIC_CMD in -[[\\/*] | ?:[\\/]*]) - lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. - ;; -*) - lt_save_MAGIC_CMD=$MAGIC_CMD - lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR -dnl $ac_dummy forces splitting on constant user-supplied paths. -dnl POSIX.2 word splitting is done only on the output of word expansions, -dnl not every word. This closes a longstanding sh security hole. - ac_dummy="m4_if([$2], , $PATH, [$2])" - for ac_dir in $ac_dummy; do - IFS=$lt_save_ifs - test -z "$ac_dir" && ac_dir=. - if test -f "$ac_dir/$1"; then - lt_cv_path_MAGIC_CMD=$ac_dir/"$1" - if test -n "$file_magic_test_file"; then - case $deplibs_check_method in - "file_magic "*) - file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` - MAGIC_CMD=$lt_cv_path_MAGIC_CMD - if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | - $EGREP "$file_magic_regex" > /dev/null; then - : - else - cat <<_LT_EOF 1>&2 - -*** Warning: the command libtool uses to detect shared libraries, -*** $file_magic_cmd, produces output that libtool cannot recognize. -*** The result is that libtool may fail to recognize shared libraries -*** as such. This will affect the creation of libtool libraries that -*** depend on shared libraries, but programs linked with such libtool -*** libraries will work regardless of this problem. Nevertheless, you -*** may want to report the problem to your system manager and/or to -*** bug-libtool@gnu.org - -_LT_EOF - fi ;; - esac - fi - break - fi - done - IFS=$lt_save_ifs - MAGIC_CMD=$lt_save_MAGIC_CMD - ;; -esac]) -MAGIC_CMD=$lt_cv_path_MAGIC_CMD -if test -n "$MAGIC_CMD"; then - AC_MSG_RESULT($MAGIC_CMD) -else - AC_MSG_RESULT(no) -fi -_LT_DECL([], [MAGIC_CMD], [0], - [Used to examine libraries when file_magic_cmd begins with "file"])dnl -])# _LT_PATH_TOOL_PREFIX - -# Old name: -AU_ALIAS([AC_PATH_TOOL_PREFIX], [_LT_PATH_TOOL_PREFIX]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_PATH_TOOL_PREFIX], []) - - -# _LT_PATH_MAGIC -# -------------- -# find a file program that can recognize a shared library -m4_defun([_LT_PATH_MAGIC], -[_LT_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH) -if test -z "$lt_cv_path_MAGIC_CMD"; then - if test -n "$ac_tool_prefix"; then - _LT_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH) - else - MAGIC_CMD=: - fi -fi -])# _LT_PATH_MAGIC - - -# LT_PATH_LD -# ---------- -# find the pathname to the GNU or non-GNU linker -AC_DEFUN([LT_PATH_LD], -[AC_REQUIRE([AC_PROG_CC])dnl -AC_REQUIRE([AC_CANONICAL_HOST])dnl -AC_REQUIRE([AC_CANONICAL_BUILD])dnl -m4_require([_LT_DECL_SED])dnl -m4_require([_LT_DECL_EGREP])dnl -m4_require([_LT_PROG_ECHO_BACKSLASH])dnl - -AC_ARG_WITH([gnu-ld], - [AS_HELP_STRING([--with-gnu-ld], - [assume the C compiler uses GNU ld @<:@default=no@:>@])], - [test no = "$withval" || with_gnu_ld=yes], - [with_gnu_ld=no])dnl - -ac_prog=ld -if test yes = "$GCC"; then - # Check if gcc -print-prog-name=ld gives a path. - AC_MSG_CHECKING([for ld used by $CC]) - case $host in - *-*-mingw*) - # gcc leaves a trailing carriage return, which upsets mingw - ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; - *) - ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; - esac - case $ac_prog in - # Accept absolute paths. - [[\\/]]* | ?:[[\\/]]*) - re_direlt='/[[^/]][[^/]]*/\.\./' - # Canonicalize the pathname of ld - ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` - while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do - ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` - done - test -z "$LD" && LD=$ac_prog - ;; - "") - # If it fails, then pretend we aren't using GCC. - ac_prog=ld - ;; - *) - # If it is relative, then search for the first ld in PATH. - with_gnu_ld=unknown - ;; - esac -elif test yes = "$with_gnu_ld"; then - AC_MSG_CHECKING([for GNU ld]) -else - AC_MSG_CHECKING([for non-GNU ld]) -fi -AC_CACHE_VAL(lt_cv_path_LD, -[if test -z "$LD"; then - lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR - for ac_dir in $PATH; do - IFS=$lt_save_ifs - test -z "$ac_dir" && ac_dir=. - if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then - lt_cv_path_LD=$ac_dir/$ac_prog - # Check to see if the program is GNU ld. I'd rather use --version, - # but apparently some variants of GNU ld only accept -v. - # Break only if it was the GNU/non-GNU ld that we prefer. - case `"$lt_cv_path_LD" -v 2>&1 &1 conftest.i -cat conftest.i conftest.i >conftest2.i -: ${lt_DD:=$DD} -AC_PATH_PROGS_FEATURE_CHECK([lt_DD], [dd], -[if "$ac_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then - cmp -s conftest.i conftest.out \ - && ac_cv_path_lt_DD="$ac_path_lt_DD" ac_path_lt_DD_found=: -fi]) -rm -f conftest.i conftest2.i conftest.out]) -])# _LT_PATH_DD - - -# _LT_CMD_TRUNCATE -# ---------------- -# find command to truncate a binary pipe -m4_defun([_LT_CMD_TRUNCATE], -[m4_require([_LT_PATH_DD]) -AC_CACHE_CHECK([how to truncate binary pipes], [lt_cv_truncate_bin], -[printf 0123456789abcdef0123456789abcdef >conftest.i -cat conftest.i conftest.i >conftest2.i -lt_cv_truncate_bin= -if "$ac_cv_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then - cmp -s conftest.i conftest.out \ - && lt_cv_truncate_bin="$ac_cv_path_lt_DD bs=4096 count=1" -fi -rm -f conftest.i conftest2.i conftest.out -test -z "$lt_cv_truncate_bin" && lt_cv_truncate_bin="$SED -e 4q"]) -_LT_DECL([lt_truncate_bin], [lt_cv_truncate_bin], [1], - [Command to truncate a binary pipe]) -])# _LT_CMD_TRUNCATE - - -# _LT_CHECK_MAGIC_METHOD -# ---------------------- -# how to check for library dependencies -# -- PORTME fill in with the dynamic library characteristics -m4_defun([_LT_CHECK_MAGIC_METHOD], -[m4_require([_LT_DECL_EGREP]) -m4_require([_LT_DECL_OBJDUMP]) -AC_CACHE_CHECK([how to recognize dependent libraries], -lt_cv_deplibs_check_method, -[lt_cv_file_magic_cmd='$MAGIC_CMD' -lt_cv_file_magic_test_file= -lt_cv_deplibs_check_method='unknown' -# Need to set the preceding variable on all platforms that support -# interlibrary dependencies. -# 'none' -- dependencies not supported. -# 'unknown' -- same as none, but documents that we really don't know. -# 'pass_all' -- all dependencies passed with no checks. -# 'test_compile' -- check by making test program. -# 'file_magic [[regex]]' -- check by looking for files in library path -# that responds to the $file_magic_cmd with a given extended regex. -# If you have 'file' or equivalent on your system and you're not sure -# whether 'pass_all' will *always* work, you probably want this one. - -case $host_os in -aix[[4-9]]*) - lt_cv_deplibs_check_method=pass_all - ;; - -beos*) - lt_cv_deplibs_check_method=pass_all - ;; - -bsdi[[45]]*) - lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)' - lt_cv_file_magic_cmd='/usr/bin/file -L' - lt_cv_file_magic_test_file=/shlib/libc.so - ;; - -cygwin*) - # func_win32_libid is a shell function defined in ltmain.sh - lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' - lt_cv_file_magic_cmd='func_win32_libid' - ;; - -mingw* | pw32*) - # Base MSYS/MinGW do not provide the 'file' command needed by - # func_win32_libid shell function, so use a weaker test based on 'objdump', - # unless we find 'file', for example because we are cross-compiling. - if ( file / ) >/dev/null 2>&1; then - lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' - lt_cv_file_magic_cmd='func_win32_libid' - else - # Keep this pattern in sync with the one in func_win32_libid. - lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' - lt_cv_file_magic_cmd='$OBJDUMP -f' - fi - ;; - -cegcc*) - # use the weaker test based on 'objdump'. See mingw*. - lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' - lt_cv_file_magic_cmd='$OBJDUMP -f' - ;; - -darwin* | rhapsody*) - lt_cv_deplibs_check_method=pass_all - ;; - -freebsd* | dragonfly*) - if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then - case $host_cpu in - i*86 ) - # Not sure whether the presence of OpenBSD here was a mistake. - # Let's accept both of them until this is cleared up. - lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library' - lt_cv_file_magic_cmd=/usr/bin/file - lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` - ;; - esac - else - lt_cv_deplibs_check_method=pass_all - fi - ;; - -haiku*) - lt_cv_deplibs_check_method=pass_all - ;; - -hpux10.20* | hpux11*) - lt_cv_file_magic_cmd=/usr/bin/file - case $host_cpu in - ia64*) - lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64' - lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so - ;; - hppa*64*) - [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]'] - lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl - ;; - *) - lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]]\.[[0-9]]) shared library' - lt_cv_file_magic_test_file=/usr/lib/libc.sl - ;; - esac - ;; - -interix[[3-9]]*) - # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here - lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$' - ;; - -irix5* | irix6* | nonstopux*) - case $LD in - *-32|*"-32 ") libmagic=32-bit;; - *-n32|*"-n32 ") libmagic=N32;; - *-64|*"-64 ") libmagic=64-bit;; - *) libmagic=never-match;; - esac - lt_cv_deplibs_check_method=pass_all - ;; - -# This must be glibc/ELF. -linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) - lt_cv_deplibs_check_method=pass_all - ;; - -netbsd* | netbsdelf*-gnu) - if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then - lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' - else - lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$' - fi - ;; - -newos6*) - lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)' - lt_cv_file_magic_cmd=/usr/bin/file - lt_cv_file_magic_test_file=/usr/lib/libnls.so - ;; - -*nto* | *qnx*) - lt_cv_deplibs_check_method=pass_all - ;; - -openbsd* | bitrig*) - if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then - lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$' - else - lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' - fi - ;; - -osf3* | osf4* | osf5*) - lt_cv_deplibs_check_method=pass_all - ;; - -rdos*) - lt_cv_deplibs_check_method=pass_all - ;; - -solaris*) - lt_cv_deplibs_check_method=pass_all - ;; - -sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) - lt_cv_deplibs_check_method=pass_all - ;; - -sysv4 | sysv4.3*) - case $host_vendor in - motorola) - lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]' - lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` - ;; - ncr) - lt_cv_deplibs_check_method=pass_all - ;; - sequent) - lt_cv_file_magic_cmd='/bin/file' - lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' - ;; - sni) - lt_cv_file_magic_cmd='/bin/file' - lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib" - lt_cv_file_magic_test_file=/lib/libc.so - ;; - siemens) - lt_cv_deplibs_check_method=pass_all - ;; - pc) - lt_cv_deplibs_check_method=pass_all - ;; - esac - ;; - -tpf*) - lt_cv_deplibs_check_method=pass_all - ;; -os2*) - lt_cv_deplibs_check_method=pass_all - ;; -esac -]) - -file_magic_glob= -want_nocaseglob=no -if test "$build" = "$host"; then - case $host_os in - mingw* | pw32*) - if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then - want_nocaseglob=yes - else - file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[[\1]]\/[[\1]]\/g;/g"` - fi - ;; - esac -fi - -file_magic_cmd=$lt_cv_file_magic_cmd -deplibs_check_method=$lt_cv_deplibs_check_method -test -z "$deplibs_check_method" && deplibs_check_method=unknown - -_LT_DECL([], [deplibs_check_method], [1], - [Method to check whether dependent libraries are shared objects]) -_LT_DECL([], [file_magic_cmd], [1], - [Command to use when deplibs_check_method = "file_magic"]) -_LT_DECL([], [file_magic_glob], [1], - [How to find potential files when deplibs_check_method = "file_magic"]) -_LT_DECL([], [want_nocaseglob], [1], - [Find potential files using nocaseglob when deplibs_check_method = "file_magic"]) -])# _LT_CHECK_MAGIC_METHOD - - -# LT_PATH_NM -# ---------- -# find the pathname to a BSD- or MS-compatible name lister -AC_DEFUN([LT_PATH_NM], -[AC_REQUIRE([AC_PROG_CC])dnl -AC_CACHE_CHECK([for BSD- or MS-compatible name lister (nm)], lt_cv_path_NM, -[if test -n "$NM"; then - # Let the user override the test. - lt_cv_path_NM=$NM -else - lt_nm_to_check=${ac_tool_prefix}nm - if test -n "$ac_tool_prefix" && test "$build" = "$host"; then - lt_nm_to_check="$lt_nm_to_check nm" - fi - for lt_tmp_nm in $lt_nm_to_check; do - lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR - for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do - IFS=$lt_save_ifs - test -z "$ac_dir" && ac_dir=. - tmp_nm=$ac_dir/$lt_tmp_nm - if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext"; then - # Check to see if the nm accepts a BSD-compat flag. - # Adding the 'sed 1q' prevents false positives on HP-UX, which says: - # nm: unknown option "B" ignored - # Tru64's nm complains that /dev/null is an invalid object file - # MSYS converts /dev/null to NUL, MinGW nm treats NUL as empty - case $build_os in - mingw*) lt_bad_file=conftest.nm/nofile ;; - *) lt_bad_file=/dev/null ;; - esac - case `"$tmp_nm" -B $lt_bad_file 2>&1 | sed '1q'` in - *$lt_bad_file* | *'Invalid file or object type'*) - lt_cv_path_NM="$tmp_nm -B" - break 2 - ;; - *) - case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in - */dev/null*) - lt_cv_path_NM="$tmp_nm -p" - break 2 - ;; - *) - lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but - continue # so that we can try to find one that supports BSD flags - ;; - esac - ;; - esac - fi - done - IFS=$lt_save_ifs - done - : ${lt_cv_path_NM=no} -fi]) -if test no != "$lt_cv_path_NM"; then - NM=$lt_cv_path_NM -else - # Didn't find any BSD compatible name lister, look for dumpbin. - if test -n "$DUMPBIN"; then : - # Let the user override the test. - else - AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :) - case `$DUMPBIN -symbols -headers /dev/null 2>&1 | sed '1q'` in - *COFF*) - DUMPBIN="$DUMPBIN -symbols -headers" - ;; - *) - DUMPBIN=: - ;; - esac - fi - AC_SUBST([DUMPBIN]) - if test : != "$DUMPBIN"; then - NM=$DUMPBIN - fi -fi -test -z "$NM" && NM=nm -AC_SUBST([NM]) -_LT_DECL([], [NM], [1], [A BSD- or MS-compatible name lister])dnl - -AC_CACHE_CHECK([the name lister ($NM) interface], [lt_cv_nm_interface], - [lt_cv_nm_interface="BSD nm" - echo "int some_variable = 0;" > conftest.$ac_ext - (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&AS_MESSAGE_LOG_FD) - (eval "$ac_compile" 2>conftest.err) - cat conftest.err >&AS_MESSAGE_LOG_FD - (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&AS_MESSAGE_LOG_FD) - (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) - cat conftest.err >&AS_MESSAGE_LOG_FD - (eval echo "\"\$as_me:$LINENO: output\"" >&AS_MESSAGE_LOG_FD) - cat conftest.out >&AS_MESSAGE_LOG_FD - if $GREP 'External.*some_variable' conftest.out > /dev/null; then - lt_cv_nm_interface="MS dumpbin" - fi - rm -f conftest*]) -])# LT_PATH_NM - -# Old names: -AU_ALIAS([AM_PROG_NM], [LT_PATH_NM]) -AU_ALIAS([AC_PROG_NM], [LT_PATH_NM]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AM_PROG_NM], []) -dnl AC_DEFUN([AC_PROG_NM], []) - -# _LT_CHECK_SHAREDLIB_FROM_LINKLIB -# -------------------------------- -# how to determine the name of the shared library -# associated with a specific link library. -# -- PORTME fill in with the dynamic library characteristics -m4_defun([_LT_CHECK_SHAREDLIB_FROM_LINKLIB], -[m4_require([_LT_DECL_EGREP]) -m4_require([_LT_DECL_OBJDUMP]) -m4_require([_LT_DECL_DLLTOOL]) -AC_CACHE_CHECK([how to associate runtime and link libraries], -lt_cv_sharedlib_from_linklib_cmd, -[lt_cv_sharedlib_from_linklib_cmd='unknown' - -case $host_os in -cygwin* | mingw* | pw32* | cegcc*) - # two different shell functions defined in ltmain.sh; - # decide which one to use based on capabilities of $DLLTOOL - case `$DLLTOOL --help 2>&1` in - *--identify-strict*) - lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib - ;; - *) - lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback - ;; - esac - ;; -*) - # fallback: assume linklib IS sharedlib - lt_cv_sharedlib_from_linklib_cmd=$ECHO - ;; -esac -]) -sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd -test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO - -_LT_DECL([], [sharedlib_from_linklib_cmd], [1], - [Command to associate shared and link libraries]) -])# _LT_CHECK_SHAREDLIB_FROM_LINKLIB - - -# _LT_PATH_MANIFEST_TOOL -# ---------------------- -# locate the manifest tool -m4_defun([_LT_PATH_MANIFEST_TOOL], -[AC_CHECK_TOOL(MANIFEST_TOOL, mt, :) -test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt -AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_mainfest_tool], - [lt_cv_path_mainfest_tool=no - echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD - $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out - cat conftest.err >&AS_MESSAGE_LOG_FD - if $GREP 'Manifest Tool' conftest.out > /dev/null; then - lt_cv_path_mainfest_tool=yes - fi - rm -f conftest*]) -if test yes != "$lt_cv_path_mainfest_tool"; then - MANIFEST_TOOL=: -fi -_LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl -])# _LT_PATH_MANIFEST_TOOL - - -# _LT_DLL_DEF_P([FILE]) -# --------------------- -# True iff FILE is a Windows DLL '.def' file. -# Keep in sync with func_dll_def_p in the libtool script -AC_DEFUN([_LT_DLL_DEF_P], -[dnl - test DEF = "`$SED -n dnl - -e '\''s/^[[ ]]*//'\'' dnl Strip leading whitespace - -e '\''/^\(;.*\)*$/d'\'' dnl Delete empty lines and comments - -e '\''s/^\(EXPORTS\|LIBRARY\)\([[ ]].*\)*$/DEF/p'\'' dnl - -e q dnl Only consider the first "real" line - $1`" dnl -])# _LT_DLL_DEF_P - - -# LT_LIB_M -# -------- -# check for math library -AC_DEFUN([LT_LIB_M], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -LIBM= -case $host in -*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*) - # These system don't have libm, or don't need it - ;; -*-ncr-sysv4.3*) - AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM=-lmw) - AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm") - ;; -*) - AC_CHECK_LIB(m, cos, LIBM=-lm) - ;; -esac -AC_SUBST([LIBM]) -])# LT_LIB_M - -# Old name: -AU_ALIAS([AC_CHECK_LIBM], [LT_LIB_M]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_CHECK_LIBM], []) - - -# _LT_COMPILER_NO_RTTI([TAGNAME]) -# ------------------------------- -m4_defun([_LT_COMPILER_NO_RTTI], -[m4_require([_LT_TAG_COMPILER])dnl - -_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= - -if test yes = "$GCC"; then - case $cc_basename in - nvcc*) - _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -Xcompiler -fno-builtin' ;; - *) - _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' ;; - esac - - _LT_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions], - lt_cv_prog_compiler_rtti_exceptions, - [-fno-rtti -fno-exceptions], [], - [_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"]) -fi -_LT_TAGDECL([no_builtin_flag], [lt_prog_compiler_no_builtin_flag], [1], - [Compiler flag to turn off builtin functions]) -])# _LT_COMPILER_NO_RTTI - - -# _LT_CMD_GLOBAL_SYMBOLS -# ---------------------- -m4_defun([_LT_CMD_GLOBAL_SYMBOLS], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -AC_REQUIRE([AC_PROG_CC])dnl -AC_REQUIRE([AC_PROG_AWK])dnl -AC_REQUIRE([LT_PATH_NM])dnl -AC_REQUIRE([LT_PATH_LD])dnl -m4_require([_LT_DECL_SED])dnl -m4_require([_LT_DECL_EGREP])dnl -m4_require([_LT_TAG_COMPILER])dnl - -# Check for command to grab the raw symbol name followed by C symbol from nm. -AC_MSG_CHECKING([command to parse $NM output from $compiler object]) -AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], -[ -# These are sane defaults that work on at least a few old systems. -# [They come from Ultrix. What could be older than Ultrix?!! ;)] - -# Character class describing NM global symbol codes. -symcode='[[BCDEGRST]]' - -# Regexp to match symbols that can be accessed directly from C. -sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)' - -# Define system-specific variables. -case $host_os in -aix*) - symcode='[[BCDT]]' - ;; -cygwin* | mingw* | pw32* | cegcc*) - symcode='[[ABCDGISTW]]' - ;; -hpux*) - if test ia64 = "$host_cpu"; then - symcode='[[ABCDEGRST]]' - fi - ;; -irix* | nonstopux*) - symcode='[[BCDEGRST]]' - ;; -osf*) - symcode='[[BCDEGQRST]]' - ;; -solaris*) - symcode='[[BDRT]]' - ;; -sco3.2v5*) - symcode='[[DT]]' - ;; -sysv4.2uw2*) - symcode='[[DT]]' - ;; -sysv5* | sco5v6* | unixware* | OpenUNIX*) - symcode='[[ABDT]]' - ;; -sysv4) - symcode='[[DFNSTU]]' - ;; -esac - -# If we're using GNU nm, then use its standard symbol codes. -case `$NM -V 2>&1` in -*GNU* | *'with BFD'*) - symcode='[[ABCDGIRSTW]]' ;; -esac - -if test "$lt_cv_nm_interface" = "MS dumpbin"; then - # Gets list of data symbols to import. - lt_cv_sys_global_symbol_to_import="sed -n -e 's/^I .* \(.*\)$/\1/p'" - # Adjust the below global symbol transforms to fixup imported variables. - lt_cdecl_hook=" -e 's/^I .* \(.*\)$/extern __declspec(dllimport) char \1;/p'" - lt_c_name_hook=" -e 's/^I .* \(.*\)$/ {\"\1\", (void *) 0},/p'" - lt_c_name_lib_hook="\ - -e 's/^I .* \(lib.*\)$/ {\"\1\", (void *) 0},/p'\ - -e 's/^I .* \(.*\)$/ {\"lib\1\", (void *) 0},/p'" -else - # Disable hooks by default. - lt_cv_sys_global_symbol_to_import= - lt_cdecl_hook= - lt_c_name_hook= - lt_c_name_lib_hook= -fi - -# Transform an extracted symbol line into a proper C declaration. -# Some systems (esp. on ia64) link data and code symbols differently, -# so use this general approach. -lt_cv_sys_global_symbol_to_cdecl="sed -n"\ -$lt_cdecl_hook\ -" -e 's/^T .* \(.*\)$/extern int \1();/p'"\ -" -e 's/^$symcode$symcode* .* \(.*\)$/extern char \1;/p'" - -# Transform an extracted symbol line into symbol name and symbol address -lt_cv_sys_global_symbol_to_c_name_address="sed -n"\ -$lt_c_name_hook\ -" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ -" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/p'" - -# Transform an extracted symbol line into symbol name with lib prefix and -# symbol address. -lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n"\ -$lt_c_name_lib_hook\ -" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ -" -e 's/^$symcode$symcode* .* \(lib.*\)$/ {\"\1\", (void *) \&\1},/p'"\ -" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"lib\1\", (void *) \&\1},/p'" - -# Handle CRLF in mingw tool chain -opt_cr= -case $build_os in -mingw*) - opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp - ;; -esac - -# Try without a prefix underscore, then with it. -for ac_symprfx in "" "_"; do - - # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. - symxfrm="\\1 $ac_symprfx\\2 \\2" - - # Write the raw and C identifiers. - if test "$lt_cv_nm_interface" = "MS dumpbin"; then - # Fake it for dumpbin and say T for any non-static function, - # D for any global variable and I for any imported variable. - # Also find C++ and __fastcall symbols from MSVC++, - # which start with @ or ?. - lt_cv_sys_global_symbol_pipe="$AWK ['"\ -" {last_section=section; section=\$ 3};"\ -" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ -" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ -" /^ *Symbol name *: /{split(\$ 0,sn,\":\"); si=substr(sn[2],2)};"\ -" /^ *Type *: code/{print \"T\",si,substr(si,length(prfx))};"\ -" /^ *Type *: data/{print \"I\",si,substr(si,length(prfx))};"\ -" \$ 0!~/External *\|/{next};"\ -" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ -" {if(hide[section]) next};"\ -" {f=\"D\"}; \$ 0~/\(\).*\|/{f=\"T\"};"\ -" {split(\$ 0,a,/\||\r/); split(a[2],s)};"\ -" s[1]~/^[@?]/{print f,s[1],s[1]; next};"\ -" s[1]~prfx {split(s[1],t,\"@\"); print f,t[1],substr(t[1],length(prfx))}"\ -" ' prfx=^$ac_symprfx]" - else - lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" - fi - lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'" - - # Check to see that the pipe works correctly. - pipe_works=no - - rm -f conftest* - cat > conftest.$ac_ext <<_LT_EOF -#ifdef __cplusplus -extern "C" { -#endif -char nm_test_var; -void nm_test_func(void); -void nm_test_func(void){} -#ifdef __cplusplus -} -#endif -int main(){nm_test_var='a';nm_test_func();return(0);} -_LT_EOF - - if AC_TRY_EVAL(ac_compile); then - # Now try to grab the symbols. - nlist=conftest.nm - $ECHO "$as_me:$LINENO: $NM conftest.$ac_objext | $lt_cv_sys_global_symbol_pipe > $nlist" >&AS_MESSAGE_LOG_FD - if eval "$NM" conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist 2>&AS_MESSAGE_LOG_FD && test -s "$nlist"; then - # Try sorting and uniquifying the output. - if sort "$nlist" | uniq > "$nlist"T; then - mv -f "$nlist"T "$nlist" - else - rm -f "$nlist"T - fi - - # Make sure that we snagged all the symbols we need. - if $GREP ' nm_test_var$' "$nlist" >/dev/null; then - if $GREP ' nm_test_func$' "$nlist" >/dev/null; then - cat <<_LT_EOF > conftest.$ac_ext -/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ -#if defined _WIN32 || defined __CYGWIN__ || defined _WIN32_WCE -/* DATA imports from DLLs on WIN32 can't be const, because runtime - relocations are performed -- see ld's documentation on pseudo-relocs. */ -# define LT@&t@_DLSYM_CONST -#elif defined __osf__ -/* This system does not cope well with relocations in const data. */ -# define LT@&t@_DLSYM_CONST -#else -# define LT@&t@_DLSYM_CONST const -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -_LT_EOF - # Now generate the symbol file. - eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' - - cat <<_LT_EOF >> conftest.$ac_ext - -/* The mapping between symbol names and symbols. */ -LT@&t@_DLSYM_CONST struct { - const char *name; - void *address; -} -lt__PROGRAM__LTX_preloaded_symbols[[]] = -{ - { "@PROGRAM@", (void *) 0 }, -_LT_EOF - $SED "s/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext - cat <<\_LT_EOF >> conftest.$ac_ext - {0, (void *) 0} -}; - -/* This works around a problem in FreeBSD linker */ -#ifdef FREEBSD_WORKAROUND -static const void *lt_preloaded_setup() { - return lt__PROGRAM__LTX_preloaded_symbols; -} -#endif - -#ifdef __cplusplus -} -#endif -_LT_EOF - # Now try linking the two files. - mv conftest.$ac_objext conftstm.$ac_objext - lt_globsym_save_LIBS=$LIBS - lt_globsym_save_CFLAGS=$CFLAGS - LIBS=conftstm.$ac_objext - CFLAGS="$CFLAGS$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)" - if AC_TRY_EVAL(ac_link) && test -s conftest$ac_exeext; then - pipe_works=yes - fi - LIBS=$lt_globsym_save_LIBS - CFLAGS=$lt_globsym_save_CFLAGS - else - echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD - fi - else - echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD - fi - else - echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD - fi - else - echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD - cat conftest.$ac_ext >&5 - fi - rm -rf conftest* conftst* - - # Do not use the global_symbol_pipe unless it works. - if test yes = "$pipe_works"; then - break - else - lt_cv_sys_global_symbol_pipe= - fi -done -]) -if test -z "$lt_cv_sys_global_symbol_pipe"; then - lt_cv_sys_global_symbol_to_cdecl= -fi -if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then - AC_MSG_RESULT(failed) -else - AC_MSG_RESULT(ok) -fi - -# Response file support. -if test "$lt_cv_nm_interface" = "MS dumpbin"; then - nm_file_list_spec='@' -elif $NM --help 2>/dev/null | grep '[[@]]FILE' >/dev/null; then - nm_file_list_spec='@' -fi - -_LT_DECL([global_symbol_pipe], [lt_cv_sys_global_symbol_pipe], [1], - [Take the output of nm and produce a listing of raw symbols and C names]) -_LT_DECL([global_symbol_to_cdecl], [lt_cv_sys_global_symbol_to_cdecl], [1], - [Transform the output of nm in a proper C declaration]) -_LT_DECL([global_symbol_to_import], [lt_cv_sys_global_symbol_to_import], [1], - [Transform the output of nm into a list of symbols to manually relocate]) -_LT_DECL([global_symbol_to_c_name_address], - [lt_cv_sys_global_symbol_to_c_name_address], [1], - [Transform the output of nm in a C name address pair]) -_LT_DECL([global_symbol_to_c_name_address_lib_prefix], - [lt_cv_sys_global_symbol_to_c_name_address_lib_prefix], [1], - [Transform the output of nm in a C name address pair when lib prefix is needed]) -_LT_DECL([nm_interface], [lt_cv_nm_interface], [1], - [The name lister interface]) -_LT_DECL([], [nm_file_list_spec], [1], - [Specify filename containing input files for $NM]) -]) # _LT_CMD_GLOBAL_SYMBOLS - - -# _LT_COMPILER_PIC([TAGNAME]) -# --------------------------- -m4_defun([_LT_COMPILER_PIC], -[m4_require([_LT_TAG_COMPILER])dnl -_LT_TAGVAR(lt_prog_compiler_wl, $1)= -_LT_TAGVAR(lt_prog_compiler_pic, $1)= -_LT_TAGVAR(lt_prog_compiler_static, $1)= - -m4_if([$1], [CXX], [ - # C++ specific cases for pic, static, wl, etc. - if test yes = "$GXX"; then - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' - - case $host_os in - aix*) - # All AIX code is PIC. - if test ia64 = "$host_cpu"; then - # AIX 5 now supports IA64 processor - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - fi - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - ;; - - amigaos*) - case $host_cpu in - powerpc) - # see comment about AmigaOS4 .so support - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - ;; - m68k) - # FIXME: we need at least 68020 code to build shared libraries, but - # adding the '-m68020' flag to GCC prevents building anything better, - # like '-m68040'. - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' - ;; - esac - ;; - - beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) - # PIC is the default for these OSes. - ;; - mingw* | cygwin* | os2* | pw32* | cegcc*) - # This hack is so that the source file can tell whether it is being - # built for inclusion in a dll (and should export symbols for example). - # Although the cygwin gcc ignores -fPIC, still need this for old-style - # (--disable-auto-import) libraries - m4_if([$1], [GCJ], [], - [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) - case $host_os in - os2*) - _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' - ;; - esac - ;; - darwin* | rhapsody*) - # PIC is the default on this platform - # Common symbols not allowed in MH_DYLIB files - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' - ;; - *djgpp*) - # DJGPP does not support shared libraries at all - _LT_TAGVAR(lt_prog_compiler_pic, $1)= - ;; - haiku*) - # PIC is the default for Haiku. - # The "-static" flag exists, but is broken. - _LT_TAGVAR(lt_prog_compiler_static, $1)= - ;; - interix[[3-9]]*) - # Interix 3.x gcc -fpic/-fPIC options generate broken code. - # Instead, we relocate shared libraries at runtime. - ;; - sysv4*MP*) - if test -d /usr/nec; then - _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic - fi - ;; - hpux*) - # PIC is the default for 64-bit PA HP-UX, but not for 32-bit - # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag - # sets the default TLS model and affects inlining. - case $host_cpu in - hppa*64*) - ;; - *) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - ;; - esac - ;; - *qnx* | *nto*) - # QNX uses GNU C++, but need to define -shared option too, otherwise - # it will coredump. - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' - ;; - *) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - ;; - esac - else - case $host_os in - aix[[4-9]]*) - # All AIX code is PIC. - if test ia64 = "$host_cpu"; then - # AIX 5 now supports IA64 processor - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - else - _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' - fi - ;; - chorus*) - case $cc_basename in - cxch68*) - # Green Hills C++ Compiler - # _LT_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" - ;; - esac - ;; - mingw* | cygwin* | os2* | pw32* | cegcc*) - # This hack is so that the source file can tell whether it is being - # built for inclusion in a dll (and should export symbols for example). - m4_if([$1], [GCJ], [], - [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) - ;; - dgux*) - case $cc_basename in - ec++*) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - ;; - ghcx*) - # Green Hills C++ Compiler - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' - ;; - *) - ;; - esac - ;; - freebsd* | dragonfly*) - # FreeBSD uses GNU C++ - ;; - hpux9* | hpux10* | hpux11*) - case $cc_basename in - CC*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' - if test ia64 != "$host_cpu"; then - _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' - fi - ;; - aCC*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' - case $host_cpu in - hppa*64*|ia64*) - # +Z the default - ;; - *) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' - ;; - esac - ;; - *) - ;; - esac - ;; - interix*) - # This is c89, which is MS Visual C++ (no shared libs) - # Anyone wants to do a port? - ;; - irix5* | irix6* | nonstopux*) - case $cc_basename in - CC*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' - # CC pic flag -KPIC is the default. - ;; - *) - ;; - esac - ;; - linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) - case $cc_basename in - KCC*) - # KAI C++ Compiler - _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - ;; - ecpc* ) - # old Intel C++ for x86_64, which still supported -KPIC. - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' - ;; - icpc* ) - # Intel C++, used to be incompatible with GCC. - # ICC 10 doesn't accept -KPIC any more. - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' - ;; - pgCC* | pgcpp*) - # Portland Group C++ compiler - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - cxx*) - # Compaq C++ - # Make sure the PIC flag is empty. It appears that all Alpha - # Linux and Compaq Tru64 Unix objects are PIC. - _LT_TAGVAR(lt_prog_compiler_pic, $1)= - _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' - ;; - xlc* | xlC* | bgxl[[cC]]* | mpixl[[cC]]*) - # IBM XL 8.0, 9.0 on PPC and BlueGene - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' - ;; - *) - case `$CC -V 2>&1 | sed 5q` in - *Sun\ C*) - # Sun C++ 5.9 - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' - ;; - esac - ;; - esac - ;; - lynxos*) - ;; - m88k*) - ;; - mvs*) - case $cc_basename in - cxx*) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall' - ;; - *) - ;; - esac - ;; - netbsd* | netbsdelf*-gnu) - ;; - *qnx* | *nto*) - # QNX uses GNU C++, but need to define -shared option too, otherwise - # it will coredump. - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' - ;; - osf3* | osf4* | osf5*) - case $cc_basename in - KCC*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' - ;; - RCC*) - # Rational C++ 2.4.1 - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' - ;; - cxx*) - # Digital/Compaq C++ - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - # Make sure the PIC flag is empty. It appears that all Alpha - # Linux and Compaq Tru64 Unix objects are PIC. - _LT_TAGVAR(lt_prog_compiler_pic, $1)= - _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' - ;; - *) - ;; - esac - ;; - psos*) - ;; - solaris*) - case $cc_basename in - CC* | sunCC*) - # Sun C++ 4.2, 5.x and Centerline C++ - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' - ;; - gcx*) - # Green Hills C++ Compiler - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' - ;; - *) - ;; - esac - ;; - sunos4*) - case $cc_basename in - CC*) - # Sun C++ 4.x - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - lcc*) - # Lucid - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' - ;; - *) - ;; - esac - ;; - sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) - case $cc_basename in - CC*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - esac - ;; - tandem*) - case $cc_basename in - NCC*) - # NonStop-UX NCC 3.20 - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - ;; - *) - ;; - esac - ;; - vxworks*) - ;; - *) - _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no - ;; - esac - fi -], -[ - if test yes = "$GCC"; then - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' - - case $host_os in - aix*) - # All AIX code is PIC. - if test ia64 = "$host_cpu"; then - # AIX 5 now supports IA64 processor - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - fi - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - ;; - - amigaos*) - case $host_cpu in - powerpc) - # see comment about AmigaOS4 .so support - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - ;; - m68k) - # FIXME: we need at least 68020 code to build shared libraries, but - # adding the '-m68020' flag to GCC prevents building anything better, - # like '-m68040'. - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' - ;; - esac - ;; - - beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) - # PIC is the default for these OSes. - ;; - - mingw* | cygwin* | pw32* | os2* | cegcc*) - # This hack is so that the source file can tell whether it is being - # built for inclusion in a dll (and should export symbols for example). - # Although the cygwin gcc ignores -fPIC, still need this for old-style - # (--disable-auto-import) libraries - m4_if([$1], [GCJ], [], - [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) - case $host_os in - os2*) - _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' - ;; - esac - ;; - - darwin* | rhapsody*) - # PIC is the default on this platform - # Common symbols not allowed in MH_DYLIB files - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' - ;; - - haiku*) - # PIC is the default for Haiku. - # The "-static" flag exists, but is broken. - _LT_TAGVAR(lt_prog_compiler_static, $1)= - ;; - - hpux*) - # PIC is the default for 64-bit PA HP-UX, but not for 32-bit - # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag - # sets the default TLS model and affects inlining. - case $host_cpu in - hppa*64*) - # +Z the default - ;; - *) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - ;; - esac - ;; - - interix[[3-9]]*) - # Interix 3.x gcc -fpic/-fPIC options generate broken code. - # Instead, we relocate shared libraries at runtime. - ;; - - msdosdjgpp*) - # Just because we use GCC doesn't mean we suddenly get shared libraries - # on systems that don't support them. - _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no - enable_shared=no - ;; - - *nto* | *qnx*) - # QNX uses GNU C++, but need to define -shared option too, otherwise - # it will coredump. - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' - ;; - - sysv4*MP*) - if test -d /usr/nec; then - _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic - fi - ;; - - *) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - ;; - esac - - case $cc_basename in - nvcc*) # Cuda Compiler Driver 2.2 - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker ' - if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then - _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)" - fi - ;; - esac - else - # PORTME Check for flag to pass linker flags through the system compiler. - case $host_os in - aix*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - if test ia64 = "$host_cpu"; then - # AIX 5 now supports IA64 processor - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - else - _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' - fi - ;; - - darwin* | rhapsody*) - # PIC is the default on this platform - # Common symbols not allowed in MH_DYLIB files - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' - case $cc_basename in - nagfor*) - # NAG Fortran compiler - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - esac - ;; - - mingw* | cygwin* | pw32* | os2* | cegcc*) - # This hack is so that the source file can tell whether it is being - # built for inclusion in a dll (and should export symbols for example). - m4_if([$1], [GCJ], [], - [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) - case $host_os in - os2*) - _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' - ;; - esac - ;; - - hpux9* | hpux10* | hpux11*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but - # not for PA HP-UX. - case $host_cpu in - hppa*64*|ia64*) - # +Z the default - ;; - *) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' - ;; - esac - # Is there a better lt_prog_compiler_static that works with the bundled CC? - _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' - ;; - - irix5* | irix6* | nonstopux*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - # PIC (with -KPIC) is the default. - _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' - ;; - - linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) - case $cc_basename in - # old Intel for x86_64, which still supported -KPIC. - ecc*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' - ;; - # flang / f18. f95 an alias for gfortran or flang on Debian - flang* | f18* | f95*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' - ;; - # icc used to be incompatible with GCC. - # ICC 10 doesn't accept -KPIC any more. - icc* | ifort*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' - ;; - # Lahey Fortran 8.1. - lf95*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='--shared' - _LT_TAGVAR(lt_prog_compiler_static, $1)='--static' - ;; - nagfor*) - # NAG Fortran compiler - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - tcc*) - # Fabrice Bellard et al's Tiny C Compiler - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' - ;; - pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) - # Portland Group compilers (*not* the Pentium gcc compiler, - # which looks to be a dead project) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - ccc*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - # All Alpha code is PIC. - _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' - ;; - xl* | bgxl* | bgf* | mpixl*) - # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' - ;; - *) - case `$CC -V 2>&1 | sed 5q` in - *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*) - # Sun Fortran 8.3 passes all unrecognized flags to the linker - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - _LT_TAGVAR(lt_prog_compiler_wl, $1)='' - ;; - *Sun\ F* | *Sun*Fortran*) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' - ;; - *Sun\ C*) - # Sun C 5.9 - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - ;; - *Intel*\ [[CF]]*Compiler*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' - ;; - *Portland\ Group*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - esac - ;; - esac - ;; - - newsos6) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - - *nto* | *qnx*) - # QNX uses GNU C++, but need to define -shared option too, otherwise - # it will coredump. - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' - ;; - - osf3* | osf4* | osf5*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - # All OSF/1 code is PIC. - _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' - ;; - - rdos*) - _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' - ;; - - solaris*) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - case $cc_basename in - f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';; - *) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';; - esac - ;; - - sunos4*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - - sysv4 | sysv4.2uw2* | sysv4.3*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - - sysv4*MP*) - if test -d /usr/nec; then - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - fi - ;; - - sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - - unicos*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no - ;; - - uts4*) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - - *) - _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no - ;; - esac - fi -]) -case $host_os in - # For platforms that do not support PIC, -DPIC is meaningless: - *djgpp*) - _LT_TAGVAR(lt_prog_compiler_pic, $1)= - ;; - *) - _LT_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])" - ;; -esac - -AC_CACHE_CHECK([for $compiler option to produce PIC], - [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)], - [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_prog_compiler_pic, $1)]) -_LT_TAGVAR(lt_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_cv_prog_compiler_pic, $1) - -# -# Check to make sure the PIC flag actually works. -# -if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then - _LT_COMPILER_OPTION([if $compiler PIC flag $_LT_TAGVAR(lt_prog_compiler_pic, $1) works], - [_LT_TAGVAR(lt_cv_prog_compiler_pic_works, $1)], - [$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])], [], - [case $_LT_TAGVAR(lt_prog_compiler_pic, $1) in - "" | " "*) ;; - *) _LT_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_TAGVAR(lt_prog_compiler_pic, $1)" ;; - esac], - [_LT_TAGVAR(lt_prog_compiler_pic, $1)= - _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no]) -fi -_LT_TAGDECL([pic_flag], [lt_prog_compiler_pic], [1], - [Additional compiler flags for building library objects]) - -_LT_TAGDECL([wl], [lt_prog_compiler_wl], [1], - [How to pass a linker flag through the compiler]) -# -# Check to make sure the static flag actually works. -# -wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_TAGVAR(lt_prog_compiler_static, $1)\" -_LT_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works], - _LT_TAGVAR(lt_cv_prog_compiler_static_works, $1), - $lt_tmp_static_flag, - [], - [_LT_TAGVAR(lt_prog_compiler_static, $1)=]) -_LT_TAGDECL([link_static_flag], [lt_prog_compiler_static], [1], - [Compiler flag to prevent dynamic linking]) -])# _LT_COMPILER_PIC - - -# _LT_LINKER_SHLIBS([TAGNAME]) -# ---------------------------- -# See if the linker supports building shared libraries. -m4_defun([_LT_LINKER_SHLIBS], -[AC_REQUIRE([LT_PATH_LD])dnl -AC_REQUIRE([LT_PATH_NM])dnl -m4_require([_LT_PATH_MANIFEST_TOOL])dnl -m4_require([_LT_FILEUTILS_DEFAULTS])dnl -m4_require([_LT_DECL_EGREP])dnl -m4_require([_LT_DECL_SED])dnl -m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl -m4_require([_LT_TAG_COMPILER])dnl -AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) -m4_if([$1], [CXX], [ - _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' - _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] - case $host_os in - aix[[4-9]]*) - # If we're using GNU nm, then we don't want the "-C" option. - # -C means demangle to GNU nm, but means don't demangle to AIX nm. - # Without the "-l" option, or with the "-B" option, AIX nm treats - # weak defined symbols like other global defined symbols, whereas - # GNU nm marks them as "W". - # While the 'weak' keyword is ignored in the Export File, we need - # it in the Import File for the 'aix-soname' feature, so we have - # to replace the "-B" option with "-P" for AIX nm. - if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then - _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' - else - _LT_TAGVAR(export_symbols_cmds, $1)='`func_echo_all $NM | $SED -e '\''s/B\([[^B]]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && ([substr](\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' - fi - ;; - pw32*) - _LT_TAGVAR(export_symbols_cmds, $1)=$ltdll_cmds - ;; - cygwin* | mingw* | cegcc*) - case $cc_basename in - cl*) - _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' - ;; - *) - _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' - _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] - ;; - esac - ;; - linux* | k*bsd*-gnu | gnu*) - _LT_TAGVAR(link_all_deplibs, $1)=no - ;; - *) - _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' - ;; - esac -], [ - runpath_var= - _LT_TAGVAR(allow_undefined_flag, $1)= - _LT_TAGVAR(always_export_symbols, $1)=no - _LT_TAGVAR(archive_cmds, $1)= - _LT_TAGVAR(archive_expsym_cmds, $1)= - _LT_TAGVAR(compiler_needs_object, $1)=no - _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no - _LT_TAGVAR(export_dynamic_flag_spec, $1)= - _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' - _LT_TAGVAR(hardcode_automatic, $1)=no - _LT_TAGVAR(hardcode_direct, $1)=no - _LT_TAGVAR(hardcode_direct_absolute, $1)=no - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= - _LT_TAGVAR(hardcode_libdir_separator, $1)= - _LT_TAGVAR(hardcode_minus_L, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported - _LT_TAGVAR(inherit_rpath, $1)=no - _LT_TAGVAR(link_all_deplibs, $1)=unknown - _LT_TAGVAR(module_cmds, $1)= - _LT_TAGVAR(module_expsym_cmds, $1)= - _LT_TAGVAR(old_archive_from_new_cmds, $1)= - _LT_TAGVAR(old_archive_from_expsyms_cmds, $1)= - _LT_TAGVAR(thread_safe_flag_spec, $1)= - _LT_TAGVAR(whole_archive_flag_spec, $1)= - # include_expsyms should be a list of space-separated symbols to be *always* - # included in the symbol list - _LT_TAGVAR(include_expsyms, $1)= - # exclude_expsyms can be an extended regexp of symbols to exclude - # it will be wrapped by ' (' and ')$', so one must not match beginning or - # end of line. Example: 'a|bc|.*d.*' will exclude the symbols 'a' and 'bc', - # as well as any symbol that contains 'd'. - _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] - # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out - # platforms (ab)use it in PIC code, but their linkers get confused if - # the symbol is explicitly referenced. Since portable code cannot - # rely on this symbol name, it's probably fine to never include it in - # preloaded symbol tables. - # Exclude shared library initialization/finalization symbols. -dnl Note also adjust exclude_expsyms for C++ above. - extract_expsyms_cmds= - - case $host_os in - cygwin* | mingw* | pw32* | cegcc*) - # FIXME: the MSVC++ port hasn't been tested in a loooong time - # When not using gcc, we currently assume that we are using - # Microsoft Visual C++. - if test yes != "$GCC"; then - with_gnu_ld=no - fi - ;; - interix*) - # we just hope/assume this is gcc and not c89 (= MSVC++) - with_gnu_ld=yes - ;; - openbsd* | bitrig*) - with_gnu_ld=no - ;; - linux* | k*bsd*-gnu | gnu*) - _LT_TAGVAR(link_all_deplibs, $1)=no - ;; - esac - - _LT_TAGVAR(ld_shlibs, $1)=yes - - # On some targets, GNU ld is compatible enough with the native linker - # that we're better off using the native interface for both. - lt_use_gnu_ld_interface=no - if test yes = "$with_gnu_ld"; then - case $host_os in - aix*) - # The AIX port of GNU ld has always aspired to compatibility - # with the native linker. However, as the warning in the GNU ld - # block says, versions before 2.19.5* couldn't really create working - # shared libraries, regardless of the interface used. - case `$LD -v 2>&1` in - *\ \(GNU\ Binutils\)\ 2.19.5*) ;; - *\ \(GNU\ Binutils\)\ 2.[[2-9]]*) ;; - *\ \(GNU\ Binutils\)\ [[3-9]]*) ;; - *) - lt_use_gnu_ld_interface=yes - ;; - esac - ;; - *) - lt_use_gnu_ld_interface=yes - ;; - esac - fi - - if test yes = "$lt_use_gnu_ld_interface"; then - # If archive_cmds runs LD, not CC, wlarc should be empty - wlarc='$wl' - - # Set some defaults for GNU ld with shared library support. These - # are reset later if shared libraries are not supported. Putting them - # here allows them to be overridden if necessary. - runpath_var=LD_RUN_PATH - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' - # ancient GNU ld didn't support --whole-archive et. al. - if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then - _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' - else - _LT_TAGVAR(whole_archive_flag_spec, $1)= - fi - supports_anon_versioning=no - case `$LD -v | $SED -e 's/([^)]\+)\s\+//' 2>&1` in - *GNU\ gold*) supports_anon_versioning=yes ;; - *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11 - *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... - *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... - *\ 2.11.*) ;; # other 2.11 versions - *) supports_anon_versioning=yes ;; - esac - - # See if GNU ld supports shared libraries. - case $host_os in - aix[[3-9]]*) - # On AIX/PPC, the GNU linker is very broken - if test ia64 != "$host_cpu"; then - _LT_TAGVAR(ld_shlibs, $1)=no - cat <<_LT_EOF 1>&2 - -*** Warning: the GNU linker, at least up to release 2.19, is reported -*** to be unable to reliably create shared libraries on AIX. -*** Therefore, libtool is disabling shared libraries support. If you -*** really care for shared libraries, you may want to install binutils -*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. -*** You will then need to restart the configuration process. - -_LT_EOF - fi - ;; - - amigaos*) - case $host_cpu in - powerpc) - # see comment about AmigaOS4 .so support - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='' - ;; - m68k) - _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_minus_L, $1)=yes - ;; - esac - ;; - - beos*) - if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - # Joseph Beckenbach says some releases of gcc - # support --undefined. This deserves some investigation. FIXME - _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - - cygwin* | mingw* | pw32* | cegcc*) - # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, - # as there is no search path for DLLs. - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-all-symbols' - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - _LT_TAGVAR(always_export_symbols, $1)=no - _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes - _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' - _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] - - if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' - # If the export-symbols file already is a .def file, use it as - # is; otherwise, prepend EXPORTS... - _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then - cp $export_symbols $output_objdir/$soname.def; - else - echo EXPORTS > $output_objdir/$soname.def; - cat $export_symbols >> $output_objdir/$soname.def; - fi~ - $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - - haiku*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' - _LT_TAGVAR(link_all_deplibs, $1)=yes - ;; - - os2*) - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_minus_L, $1)=yes - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - shrext_cmds=.dll - _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ - $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ - $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ - $ECHO EXPORTS >> $output_objdir/$libname.def~ - emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ - $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ - emximp -o $lib $output_objdir/$libname.def' - _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ - $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ - $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ - $ECHO EXPORTS >> $output_objdir/$libname.def~ - prefix_cmds="$SED"~ - if test EXPORTS = "`$SED 1q $export_symbols`"; then - prefix_cmds="$prefix_cmds -e 1d"; - fi~ - prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ - cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ - $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ - emximp -o $lib $output_objdir/$libname.def' - _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' - _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes - ;; - - interix[[3-9]]*) - _LT_TAGVAR(hardcode_direct, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' - # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. - # Instead, shared libraries are loaded at an image base (0x10000000 by - # default) and relocated if they conflict, which is a slow very memory - # consuming and fragmenting process. To avoid this, we pick a random, - # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link - # time. Moving up from 0x10000000 also allows more sbrk(2) space. - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' - ;; - - gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) - tmp_diet=no - if test linux-dietlibc = "$host_os"; then - case $cc_basename in - diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) - esac - fi - if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ - && test no = "$tmp_diet" - then - tmp_addflag=' $pic_flag' - tmp_sharedflag='-shared' - case $cc_basename,$host_cpu in - pgcc*) # Portland Group C compiler - _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' - tmp_addflag=' $pic_flag' - ;; - pgf77* | pgf90* | pgf95* | pgfortran*) - # Portland Group f77 and f90 compilers - _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' - tmp_addflag=' $pic_flag -Mnomain' ;; - ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 - tmp_addflag=' -i_dynamic' ;; - efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 - tmp_addflag=' -i_dynamic -nofor_main' ;; - ifc* | ifort*) # Intel Fortran compiler - tmp_addflag=' -nofor_main' ;; - lf95*) # Lahey Fortran 8.1 - _LT_TAGVAR(whole_archive_flag_spec, $1)= - tmp_sharedflag='--shared' ;; - nagfor*) # NAGFOR 5.3 - tmp_sharedflag='-Wl,-shared' ;; - xl[[cC]]* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL C 8.0 on PPC (deal with xlf below) - tmp_sharedflag='-qmkshrobj' - tmp_addflag= ;; - nvcc*) # Cuda Compiler Driver 2.2 - _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' - _LT_TAGVAR(compiler_needs_object, $1)=yes - ;; - esac - case `$CC -V 2>&1 | sed 5q` in - *Sun\ C*) # Sun C 5.9 - _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' - _LT_TAGVAR(compiler_needs_object, $1)=yes - tmp_sharedflag='-G' ;; - *Sun\ F*) # Sun Fortran 8.3 - tmp_sharedflag='-G' ;; - esac - _LT_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' - - if test yes = "$supports_anon_versioning"; then - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ - cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ - echo "local: *; };" >> $output_objdir/$libname.ver~ - $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' - fi - - case $cc_basename in - tcc*) - _LT_TAGVAR(export_dynamic_flag_spec, $1)='-rdynamic' - ;; - xlf* | bgf* | bgxlf* | mpixlf*) - # IBM XL Fortran 10.1 on PPC cannot create shared libs itself - _LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' - _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' - if test yes = "$supports_anon_versioning"; then - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ - cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ - echo "local: *; };" >> $output_objdir/$libname.ver~ - $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' - fi - ;; - esac - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - - netbsd* | netbsdelf*-gnu) - if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then - _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' - wlarc= - else - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' - fi - ;; - - solaris*) - if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then - _LT_TAGVAR(ld_shlibs, $1)=no - cat <<_LT_EOF 1>&2 - -*** Warning: The releases 2.8.* of the GNU linker cannot reliably -*** create shared libraries on Solaris systems. Therefore, libtool -*** is disabling shared libraries support. We urge you to upgrade GNU -*** binutils to release 2.9.1 or newer. Another option is to modify -*** your PATH or compiler configuration so that the native linker is -*** used, and then restart. - -_LT_EOF - elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - - sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) - case `$LD -v 2>&1` in - *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*) - _LT_TAGVAR(ld_shlibs, $1)=no - cat <<_LT_EOF 1>&2 - -*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 cannot -*** reliably create shared libraries on SCO systems. Therefore, libtool -*** is disabling shared libraries support. We urge you to upgrade GNU -*** binutils to release 2.16.91.0.3 or newer. Another option is to modify -*** your PATH or compiler configuration so that the native linker is -*** used, and then restart. - -_LT_EOF - ;; - *) - # For security reasons, it is highly recommended that you always - # use absolute paths for naming shared libraries, and exclude the - # DT_RUNPATH tag from executables and libraries. But doing so - # requires that you compile everything twice, which is a pain. - if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - esac - ;; - - sunos4*) - _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' - wlarc= - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - *) - if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - esac - - if test no = "$_LT_TAGVAR(ld_shlibs, $1)"; then - runpath_var= - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= - _LT_TAGVAR(export_dynamic_flag_spec, $1)= - _LT_TAGVAR(whole_archive_flag_spec, $1)= - fi - else - # PORTME fill in a description of your system's linker (not GNU ld) - case $host_os in - aix3*) - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - _LT_TAGVAR(always_export_symbols, $1)=yes - _LT_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' - # Note: this linker hardcodes the directories in LIBPATH if there - # are no directories specified by -L. - _LT_TAGVAR(hardcode_minus_L, $1)=yes - if test yes = "$GCC" && test -z "$lt_prog_compiler_static"; then - # Neither direct hardcoding nor static linking is supported with a - # broken collect2. - _LT_TAGVAR(hardcode_direct, $1)=unsupported - fi - ;; - - aix[[4-9]]*) - if test ia64 = "$host_cpu"; then - # On IA64, the linker does run time linking by default, so we don't - # have to do anything special. - aix_use_runtimelinking=no - exp_sym_flag='-Bexport' - no_entry_flag= - else - # If we're using GNU nm, then we don't want the "-C" option. - # -C means demangle to GNU nm, but means don't demangle to AIX nm. - # Without the "-l" option, or with the "-B" option, AIX nm treats - # weak defined symbols like other global defined symbols, whereas - # GNU nm marks them as "W". - # While the 'weak' keyword is ignored in the Export File, we need - # it in the Import File for the 'aix-soname' feature, so we have - # to replace the "-B" option with "-P" for AIX nm. - if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then - _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' - else - _LT_TAGVAR(export_symbols_cmds, $1)='`func_echo_all $NM | $SED -e '\''s/B\([[^B]]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && ([substr](\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' - fi - aix_use_runtimelinking=no - - # Test if we are trying to use run time linking or normal - # AIX style linking. If -brtl is somewhere in LDFLAGS, we - # have runtime linking enabled, and use it for executables. - # For shared libraries, we enable/disable runtime linking - # depending on the kind of the shared library created - - # when "with_aix_soname,aix_use_runtimelinking" is: - # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables - # "aix,yes" lib.so shared, rtl:yes, for executables - # lib.a static archive - # "both,no" lib.so.V(shr.o) shared, rtl:yes - # lib.a(lib.so.V) shared, rtl:no, for executables - # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables - # lib.a(lib.so.V) shared, rtl:no - # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables - # lib.a static archive - case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) - for ld_flag in $LDFLAGS; do - if (test x-brtl = "x$ld_flag" || test x-Wl,-brtl = "x$ld_flag"); then - aix_use_runtimelinking=yes - break - fi - done - if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then - # With aix-soname=svr4, we create the lib.so.V shared archives only, - # so we don't have lib.a shared libs to link our executables. - # We have to force runtime linking in this case. - aix_use_runtimelinking=yes - LDFLAGS="$LDFLAGS -Wl,-brtl" - fi - ;; - esac - - exp_sym_flag='-bexport' - no_entry_flag='-bnoentry' - fi - - # When large executables or shared objects are built, AIX ld can - # have problems creating the table of contents. If linking a library - # or program results in "error TOC overflow" add -mminimal-toc to - # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not - # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. - - _LT_TAGVAR(archive_cmds, $1)='' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_direct_absolute, $1)=yes - _LT_TAGVAR(hardcode_libdir_separator, $1)=':' - _LT_TAGVAR(link_all_deplibs, $1)=yes - _LT_TAGVAR(file_list_spec, $1)='$wl-f,' - case $with_aix_soname,$aix_use_runtimelinking in - aix,*) ;; # traditional, no import file - svr4,* | *,yes) # use import file - # The Import File defines what to hardcode. - _LT_TAGVAR(hardcode_direct, $1)=no - _LT_TAGVAR(hardcode_direct_absolute, $1)=no - ;; - esac - - if test yes = "$GCC"; then - case $host_os in aix4.[[012]]|aix4.[[012]].*) - # We only want to do this on AIX 4.2 and lower, the check - # below for broken collect2 doesn't work under 4.3+ - collect2name=`$CC -print-prog-name=collect2` - if test -f "$collect2name" && - strings "$collect2name" | $GREP resolve_lib_name >/dev/null - then - # We have reworked collect2 - : - else - # We have old collect2 - _LT_TAGVAR(hardcode_direct, $1)=unsupported - # It fails to find uninstalled libraries when the uninstalled - # path is not listed in the libpath. Setting hardcode_minus_L - # to unsupported forces relinking - _LT_TAGVAR(hardcode_minus_L, $1)=yes - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)= - fi - ;; - esac - shared_flag='-shared' - if test yes = "$aix_use_runtimelinking"; then - shared_flag="$shared_flag "'$wl-G' - fi - # Need to ensure runtime linking is disabled for the traditional - # shared library, or the linker may eventually find shared libraries - # /with/ Import File - we do not want to mix them. - shared_flag_aix='-shared' - shared_flag_svr4='-shared $wl-G' - else - # not using gcc - if test ia64 = "$host_cpu"; then - # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release - # chokes on -Wl,-G. The following line is correct: - shared_flag='-G' - else - if test yes = "$aix_use_runtimelinking"; then - shared_flag='$wl-G' - else - shared_flag='$wl-bM:SRE' - fi - shared_flag_aix='$wl-bM:SRE' - shared_flag_svr4='$wl-G' - fi - fi - - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-bexpall' - # It seems that -bexpall does not export symbols beginning with - # underscore (_), so it is better to generate a list of symbols to export. - _LT_TAGVAR(always_export_symbols, $1)=yes - if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then - # Warning - without using the other runtime loading flags (-brtl), - # -berok will link without error, but may produce a broken library. - _LT_TAGVAR(allow_undefined_flag, $1)='-berok' - # Determine the default libpath from the value encoded in an - # empty executable. - _LT_SYS_MODULE_PATH_AIX([$1]) - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag - else - if test ia64 = "$host_cpu"; then - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $libdir:/usr/lib:/lib' - _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" - _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" - else - # Determine the default libpath from the value encoded in an - # empty executable. - _LT_SYS_MODULE_PATH_AIX([$1]) - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" - # Warning - without using the other run time loading flags, - # -berok will link without error, but may produce a broken library. - _LT_TAGVAR(no_undefined_flag, $1)=' $wl-bernotok' - _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-berok' - if test yes = "$with_gnu_ld"; then - # We only use this code for GNU lds that support --whole-archive. - _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' - else - # Exported symbols can be pulled into shared objects from archives - _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' - fi - _LT_TAGVAR(archive_cmds_need_lc, $1)=yes - _LT_TAGVAR(archive_expsym_cmds, $1)='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' - # -brtl affects multiple linker settings, -berok does not and is overridden later - compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([[, ]]\\)%-berok\\1%g"`' - if test svr4 != "$with_aix_soname"; then - # This is similar to how AIX traditionally builds its shared libraries. - _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' - fi - if test aix != "$with_aix_soname"; then - _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' - else - # used by -dlpreopen to get the symbols - _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$MV $output_objdir/$realname.d/$soname $output_objdir' - fi - _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$RM -r $output_objdir/$realname.d' - fi - fi - ;; - - amigaos*) - case $host_cpu in - powerpc) - # see comment about AmigaOS4 .so support - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='' - ;; - m68k) - _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_minus_L, $1)=yes - ;; - esac - ;; - - bsdi[[45]]*) - _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic - ;; - - cygwin* | mingw* | pw32* | cegcc*) - # When not using gcc, we currently assume that we are using - # Microsoft Visual C++. - # hardcode_libdir_flag_spec is actually meaningless, as there is - # no search path for DLLs. - case $cc_basename in - cl*) - # Native MSVC - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - _LT_TAGVAR(always_export_symbols, $1)=yes - _LT_TAGVAR(file_list_spec, $1)='@' - # Tell ltmain to make .lib files, not .a files. - libext=lib - # Tell ltmain to make .dll files, not .so files. - shrext_cmds=.dll - # FIXME: Setting linknames here is a bad hack. - _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' - _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then - cp "$export_symbols" "$output_objdir/$soname.def"; - echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; - else - $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; - fi~ - $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ - linknames=' - # The linker will not automatically build a static lib if we build a DLL. - # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' - _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes - _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' - _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols' - # Don't use ranlib - _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' - _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ - lt_tool_outputfile="@TOOL_OUTPUT@"~ - case $lt_outputfile in - *.exe|*.EXE) ;; - *) - lt_outputfile=$lt_outputfile.exe - lt_tool_outputfile=$lt_tool_outputfile.exe - ;; - esac~ - if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then - $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; - $RM "$lt_outputfile.manifest"; - fi' - ;; - *) - # Assume MSVC wrapper - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - # Tell ltmain to make .lib files, not .a files. - libext=lib - # Tell ltmain to make .dll files, not .so files. - shrext_cmds=.dll - # FIXME: Setting linknames here is a bad hack. - _LT_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' - # The linker will automatically build a .lib file if we build a DLL. - _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' - # FIXME: Should let the user specify the lib program. - _LT_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs' - _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes - ;; - esac - ;; - - darwin* | rhapsody*) - _LT_DARWIN_LINKER_FEATURES($1) - ;; - - dgux*) - _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor - # support. Future versions do this automatically, but an explicit c++rt0.o - # does not break anything, and helps significantly (at the cost of a little - # extra space). - freebsd2.2*) - _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. - freebsd2.*) - _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_minus_L, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - # FreeBSD 3 and greater uses gcc -shared to do shared libraries. - freebsd* | dragonfly*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - hpux9*) - if test yes = "$GCC"; then - _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' - else - _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' - fi - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - _LT_TAGVAR(hardcode_direct, $1)=yes - - # hardcode_minus_L: Not really in the search PATH, - # but as the default location of the library. - _LT_TAGVAR(hardcode_minus_L, $1)=yes - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' - ;; - - hpux10*) - if test yes,no = "$GCC,$with_gnu_ld"; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' - else - _LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' - fi - if test no = "$with_gnu_ld"; then - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_direct_absolute, $1)=yes - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' - # hardcode_minus_L: Not really in the search PATH, - # but as the default location of the library. - _LT_TAGVAR(hardcode_minus_L, $1)=yes - fi - ;; - - hpux11*) - if test yes,no = "$GCC,$with_gnu_ld"; then - case $host_cpu in - hppa*64*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' - ;; - ia64*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' - ;; - *) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' - ;; - esac - else - case $host_cpu in - hppa*64*) - _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' - ;; - ia64*) - _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' - ;; - *) - m4_if($1, [], [ - # Older versions of the 11.00 compiler do not understand -b yet - # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) - _LT_LINKER_OPTION([if $CC understands -b], - _LT_TAGVAR(lt_cv_prog_compiler__b, $1), [-b], - [_LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags'], - [_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'])], - [_LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags']) - ;; - esac - fi - if test no = "$with_gnu_ld"; then - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - - case $host_cpu in - hppa*64*|ia64*) - _LT_TAGVAR(hardcode_direct, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - *) - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_direct_absolute, $1)=yes - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' - - # hardcode_minus_L: Not really in the search PATH, - # but as the default location of the library. - _LT_TAGVAR(hardcode_minus_L, $1)=yes - ;; - esac - fi - ;; - - irix5* | irix6* | nonstopux*) - if test yes = "$GCC"; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' - # Try to use the -exported_symbol ld option, if it does not - # work, assume that -exports_file does not work either and - # implicitly export all symbols. - # This should be the same for all languages, so no per-tag cache variable. - AC_CACHE_CHECK([whether the $host_os linker accepts -exported_symbol], - [lt_cv_irix_exported_symbol], - [save_LDFLAGS=$LDFLAGS - LDFLAGS="$LDFLAGS -shared $wl-exported_symbol ${wl}foo $wl-update_registry $wl/dev/null" - AC_LINK_IFELSE( - [AC_LANG_SOURCE( - [AC_LANG_CASE([C], [[int foo (void) { return 0; }]], - [C++], [[int foo (void) { return 0; }]], - [Fortran 77], [[ - subroutine foo - end]], - [Fortran], [[ - subroutine foo - end]])])], - [lt_cv_irix_exported_symbol=yes], - [lt_cv_irix_exported_symbol=no]) - LDFLAGS=$save_LDFLAGS]) - if test yes = "$lt_cv_irix_exported_symbol"; then - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' - fi - _LT_TAGVAR(link_all_deplibs, $1)=no - else - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' - fi - _LT_TAGVAR(archive_cmds_need_lc, $1)='no' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - _LT_TAGVAR(inherit_rpath, $1)=yes - _LT_TAGVAR(link_all_deplibs, $1)=yes - ;; - - linux*) - case $cc_basename in - tcc*) - # Fabrice Bellard et al's Tiny C Compiler - _LT_TAGVAR(ld_shlibs, $1)=yes - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' - ;; - esac - ;; - - netbsd* | netbsdelf*-gnu) - if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then - _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out - else - _LT_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF - fi - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - newsos6) - _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - *nto* | *qnx*) - ;; - - openbsd* | bitrig*) - if test -f /usr/libexec/ld.so; then - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - _LT_TAGVAR(hardcode_direct_absolute, $1)=yes - if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags $wl-retain-symbols-file,$export_symbols' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' - else - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' - fi - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - - os2*) - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_minus_L, $1)=yes - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - shrext_cmds=.dll - _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ - $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ - $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ - $ECHO EXPORTS >> $output_objdir/$libname.def~ - emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ - $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ - emximp -o $lib $output_objdir/$libname.def' - _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ - $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ - $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ - $ECHO EXPORTS >> $output_objdir/$libname.def~ - prefix_cmds="$SED"~ - if test EXPORTS = "`$SED 1q $export_symbols`"; then - prefix_cmds="$prefix_cmds -e 1d"; - fi~ - prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ - cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ - $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ - emximp -o $lib $output_objdir/$libname.def' - _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' - _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes - ;; - - osf3*) - if test yes = "$GCC"; then - _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' - _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' - else - _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' - _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' - fi - _LT_TAGVAR(archive_cmds_need_lc, $1)='no' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - ;; - - osf4* | osf5*) # as osf3* with the addition of -msym flag - if test yes = "$GCC"; then - _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' - _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $pic_flag $libobjs $deplibs $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' - else - _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' - _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ - $CC -shared$allow_undefined_flag $wl-input $wl$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~$RM $lib.exp' - - # Both c and cxx compiler support -rpath directly - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' - fi - _LT_TAGVAR(archive_cmds_need_lc, $1)='no' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - ;; - - solaris*) - _LT_TAGVAR(no_undefined_flag, $1)=' -z defs' - if test yes = "$GCC"; then - wlarc='$wl' - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl-z ${wl}text $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ - $CC -shared $pic_flag $wl-z ${wl}text $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' - else - case `$CC -V 2>&1` in - *"Compilers 5.0"*) - wlarc='' - _LT_TAGVAR(archive_cmds, $1)='$LD -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ - $LD -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' - ;; - *) - wlarc='$wl' - _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ - $CC -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' - ;; - esac - fi - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - case $host_os in - solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; - *) - # The compiler driver will combine and reorder linker options, - # but understands '-z linker_flag'. GCC discards it without '$wl', - # but is careful enough not to reorder. - # Supported since Solaris 2.6 (maybe 2.5.1?) - if test yes = "$GCC"; then - _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' - else - _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' - fi - ;; - esac - _LT_TAGVAR(link_all_deplibs, $1)=yes - ;; - - sunos4*) - if test sequent = "$host_vendor"; then - # Use $CC to link under sequent, because it throws in some extra .o - # files that make .init and .fini sections work. - _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h $soname -o $lib $libobjs $deplibs $compiler_flags' - else - _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' - fi - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_minus_L, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - sysv4) - case $host_vendor in - sni) - _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_direct, $1)=yes # is this really true??? - ;; - siemens) - ## LD is ld it makes a PLAMLIB - ## CC just makes a GrossModule. - _LT_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs' - _LT_TAGVAR(hardcode_direct, $1)=no - ;; - motorola) - _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie - ;; - esac - runpath_var='LD_RUN_PATH' - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - sysv4.3*) - _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - _LT_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport' - ;; - - sysv4*MP*) - if test -d /usr/nec; then - _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - runpath_var=LD_RUN_PATH - hardcode_runpath_var=yes - _LT_TAGVAR(ld_shlibs, $1)=yes - fi - ;; - - sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) - _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' - _LT_TAGVAR(archive_cmds_need_lc, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - runpath_var='LD_RUN_PATH' - - if test yes = "$GCC"; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - else - _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - fi - ;; - - sysv5* | sco3.2v5* | sco5v6*) - # Note: We CANNOT use -z defs as we might desire, because we do not - # link with -lc, and that would cause any symbols used from libc to - # always be unresolved, which means just about no library would - # ever link correctly. If we're not using GNU ld we use -z text - # though, which does catch some bad symbols but isn't as heavy-handed - # as -z defs. - _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' - _LT_TAGVAR(allow_undefined_flag, $1)='$wl-z,nodefs' - _LT_TAGVAR(archive_cmds_need_lc, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R,$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=':' - _LT_TAGVAR(link_all_deplibs, $1)=yes - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Bexport' - runpath_var='LD_RUN_PATH' - - if test yes = "$GCC"; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - else - _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - fi - ;; - - uts4*) - _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - *) - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - esac - - if test sni = "$host_vendor"; then - case $host in - sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Blargedynsym' - ;; - esac - fi - fi -]) -AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) -test no = "$_LT_TAGVAR(ld_shlibs, $1)" && can_build_shared=no - -_LT_TAGVAR(with_gnu_ld, $1)=$with_gnu_ld - -_LT_DECL([], [libext], [0], [Old archive suffix (normally "a")])dnl -_LT_DECL([], [shrext_cmds], [1], [Shared library suffix (normally ".so")])dnl -_LT_DECL([], [extract_expsyms_cmds], [2], - [The commands to extract the exported symbol list from a shared archive]) - -# -# Do we need to explicitly link libc? -# -case "x$_LT_TAGVAR(archive_cmds_need_lc, $1)" in -x|xyes) - # Assume -lc should be added - _LT_TAGVAR(archive_cmds_need_lc, $1)=yes - - if test yes,yes = "$GCC,$enable_shared"; then - case $_LT_TAGVAR(archive_cmds, $1) in - *'~'*) - # FIXME: we may have to deal with multi-command sequences. - ;; - '$CC '*) - # Test whether the compiler implicitly links with -lc since on some - # systems, -lgcc has to come before -lc. If gcc already passes -lc - # to ld, don't add -lc before -lgcc. - AC_CACHE_CHECK([whether -lc should be explicitly linked in], - [lt_cv_]_LT_TAGVAR(archive_cmds_need_lc, $1), - [$RM conftest* - echo "$lt_simple_compile_test_code" > conftest.$ac_ext - - if AC_TRY_EVAL(ac_compile) 2>conftest.err; then - soname=conftest - lib=conftest - libobjs=conftest.$ac_objext - deplibs= - wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) - pic_flag=$_LT_TAGVAR(lt_prog_compiler_pic, $1) - compiler_flags=-v - linker_flags=-v - verstring= - output_objdir=. - libname=conftest - lt_save_allow_undefined_flag=$_LT_TAGVAR(allow_undefined_flag, $1) - _LT_TAGVAR(allow_undefined_flag, $1)= - if AC_TRY_EVAL(_LT_TAGVAR(archive_cmds, $1) 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) - then - lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=no - else - lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=yes - fi - _LT_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag - else - cat conftest.err 1>&5 - fi - $RM conftest* - ]) - _LT_TAGVAR(archive_cmds_need_lc, $1)=$lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1) - ;; - esac - fi - ;; -esac - -_LT_TAGDECL([build_libtool_need_lc], [archive_cmds_need_lc], [0], - [Whether or not to add -lc for building shared libraries]) -_LT_TAGDECL([allow_libtool_libs_with_static_runtimes], - [enable_shared_with_static_runtimes], [0], - [Whether or not to disallow shared libs when runtime libs are static]) -_LT_TAGDECL([], [export_dynamic_flag_spec], [1], - [Compiler flag to allow reflexive dlopens]) -_LT_TAGDECL([], [whole_archive_flag_spec], [1], - [Compiler flag to generate shared objects directly from archives]) -_LT_TAGDECL([], [compiler_needs_object], [1], - [Whether the compiler copes with passing no objects directly]) -_LT_TAGDECL([], [old_archive_from_new_cmds], [2], - [Create an old-style archive from a shared archive]) -_LT_TAGDECL([], [old_archive_from_expsyms_cmds], [2], - [Create a temporary old-style archive to link instead of a shared archive]) -_LT_TAGDECL([], [archive_cmds], [2], [Commands used to build a shared archive]) -_LT_TAGDECL([], [archive_expsym_cmds], [2]) -_LT_TAGDECL([], [module_cmds], [2], - [Commands used to build a loadable module if different from building - a shared archive.]) -_LT_TAGDECL([], [module_expsym_cmds], [2]) -_LT_TAGDECL([], [with_gnu_ld], [1], - [Whether we are building with GNU ld or not]) -_LT_TAGDECL([], [allow_undefined_flag], [1], - [Flag that allows shared libraries with undefined symbols to be built]) -_LT_TAGDECL([], [no_undefined_flag], [1], - [Flag that enforces no undefined symbols]) -_LT_TAGDECL([], [hardcode_libdir_flag_spec], [1], - [Flag to hardcode $libdir into a binary during linking. - This must work even if $libdir does not exist]) -_LT_TAGDECL([], [hardcode_libdir_separator], [1], - [Whether we need a single "-rpath" flag with a separated argument]) -_LT_TAGDECL([], [hardcode_direct], [0], - [Set to "yes" if using DIR/libNAME$shared_ext during linking hardcodes - DIR into the resulting binary]) -_LT_TAGDECL([], [hardcode_direct_absolute], [0], - [Set to "yes" if using DIR/libNAME$shared_ext during linking hardcodes - DIR into the resulting binary and the resulting library dependency is - "absolute", i.e impossible to change by setting $shlibpath_var if the - library is relocated]) -_LT_TAGDECL([], [hardcode_minus_L], [0], - [Set to "yes" if using the -LDIR flag during linking hardcodes DIR - into the resulting binary]) -_LT_TAGDECL([], [hardcode_shlibpath_var], [0], - [Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR - into the resulting binary]) -_LT_TAGDECL([], [hardcode_automatic], [0], - [Set to "yes" if building a shared library automatically hardcodes DIR - into the library and all subsequent libraries and executables linked - against it]) -_LT_TAGDECL([], [inherit_rpath], [0], - [Set to yes if linker adds runtime paths of dependent libraries - to runtime path list]) -_LT_TAGDECL([], [link_all_deplibs], [0], - [Whether libtool must link a program against all its dependency libraries]) -_LT_TAGDECL([], [always_export_symbols], [0], - [Set to "yes" if exported symbols are required]) -_LT_TAGDECL([], [export_symbols_cmds], [2], - [The commands to list exported symbols]) -_LT_TAGDECL([], [exclude_expsyms], [1], - [Symbols that should not be listed in the preloaded symbols]) -_LT_TAGDECL([], [include_expsyms], [1], - [Symbols that must always be exported]) -_LT_TAGDECL([], [prelink_cmds], [2], - [Commands necessary for linking programs (against libraries) with templates]) -_LT_TAGDECL([], [postlink_cmds], [2], - [Commands necessary for finishing linking programs]) -_LT_TAGDECL([], [file_list_spec], [1], - [Specify filename containing input files]) -dnl FIXME: Not yet implemented -dnl _LT_TAGDECL([], [thread_safe_flag_spec], [1], -dnl [Compiler flag to generate thread safe objects]) -])# _LT_LINKER_SHLIBS - - -# _LT_LANG_C_CONFIG([TAG]) -# ------------------------ -# Ensure that the configuration variables for a C compiler are suitably -# defined. These variables are subsequently used by _LT_CONFIG to write -# the compiler configuration to 'libtool'. -m4_defun([_LT_LANG_C_CONFIG], -[m4_require([_LT_DECL_EGREP])dnl -lt_save_CC=$CC -AC_LANG_PUSH(C) - -# Source file extension for C test sources. -ac_ext=c - -# Object file extension for compiled C test sources. -objext=o -_LT_TAGVAR(objext, $1)=$objext - -# Code to be used in simple compile tests -lt_simple_compile_test_code="int some_variable = 0;" - -# Code to be used in simple link tests -lt_simple_link_test_code='int main(){return(0);}' - -_LT_TAG_COMPILER -# Save the default compiler, since it gets overwritten when the other -# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. -compiler_DEFAULT=$CC - -# save warnings/boilerplate of simple test code -_LT_COMPILER_BOILERPLATE -_LT_LINKER_BOILERPLATE - -## CAVEAT EMPTOR: -## There is no encapsulation within the following macros, do not change -## the running order or otherwise move them around unless you know exactly -## what you are doing... -if test -n "$compiler"; then - _LT_COMPILER_NO_RTTI($1) - _LT_COMPILER_PIC($1) - _LT_COMPILER_C_O($1) - _LT_COMPILER_FILE_LOCKS($1) - _LT_LINKER_SHLIBS($1) - _LT_SYS_DYNAMIC_LINKER($1) - _LT_LINKER_HARDCODE_LIBPATH($1) - LT_SYS_DLOPEN_SELF - _LT_CMD_STRIPLIB - - # Report what library types will actually be built - AC_MSG_CHECKING([if libtool supports shared libraries]) - AC_MSG_RESULT([$can_build_shared]) - - AC_MSG_CHECKING([whether to build shared libraries]) - test no = "$can_build_shared" && enable_shared=no - - # On AIX, shared libraries and static libraries use the same namespace, and - # are all built from PIC. - case $host_os in - aix3*) - test yes = "$enable_shared" && enable_static=no - if test -n "$RANLIB"; then - archive_cmds="$archive_cmds~\$RANLIB \$lib" - postinstall_cmds='$RANLIB $lib' - fi - ;; - - aix[[4-9]]*) - if test ia64 != "$host_cpu"; then - case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in - yes,aix,yes) ;; # shared object as lib.so file only - yes,svr4,*) ;; # shared object as lib.so archive member only - yes,*) enable_static=no ;; # shared object in lib.a archive as well - esac - fi - ;; - esac - AC_MSG_RESULT([$enable_shared]) - - AC_MSG_CHECKING([whether to build static libraries]) - # Make sure either enable_shared or enable_static is yes. - test yes = "$enable_shared" || enable_static=yes - AC_MSG_RESULT([$enable_static]) - - _LT_CONFIG($1) -fi -AC_LANG_POP -CC=$lt_save_CC -])# _LT_LANG_C_CONFIG - - -# _LT_LANG_CXX_CONFIG([TAG]) -# -------------------------- -# Ensure that the configuration variables for a C++ compiler are suitably -# defined. These variables are subsequently used by _LT_CONFIG to write -# the compiler configuration to 'libtool'. -m4_defun([_LT_LANG_CXX_CONFIG], -[m4_require([_LT_FILEUTILS_DEFAULTS])dnl -m4_require([_LT_DECL_EGREP])dnl -m4_require([_LT_PATH_MANIFEST_TOOL])dnl -if test -n "$CXX" && ( test no != "$CXX" && - ( (test g++ = "$CXX" && `g++ -v >/dev/null 2>&1` ) || - (test g++ != "$CXX"))); then - AC_PROG_CXXCPP -else - _lt_caught_CXX_error=yes -fi - -AC_LANG_PUSH(C++) -_LT_TAGVAR(archive_cmds_need_lc, $1)=no -_LT_TAGVAR(allow_undefined_flag, $1)= -_LT_TAGVAR(always_export_symbols, $1)=no -_LT_TAGVAR(archive_expsym_cmds, $1)= -_LT_TAGVAR(compiler_needs_object, $1)=no -_LT_TAGVAR(export_dynamic_flag_spec, $1)= -_LT_TAGVAR(hardcode_direct, $1)=no -_LT_TAGVAR(hardcode_direct_absolute, $1)=no -_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= -_LT_TAGVAR(hardcode_libdir_separator, $1)= -_LT_TAGVAR(hardcode_minus_L, $1)=no -_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported -_LT_TAGVAR(hardcode_automatic, $1)=no -_LT_TAGVAR(inherit_rpath, $1)=no -_LT_TAGVAR(module_cmds, $1)= -_LT_TAGVAR(module_expsym_cmds, $1)= -_LT_TAGVAR(link_all_deplibs, $1)=unknown -_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds -_LT_TAGVAR(reload_flag, $1)=$reload_flag -_LT_TAGVAR(reload_cmds, $1)=$reload_cmds -_LT_TAGVAR(no_undefined_flag, $1)= -_LT_TAGVAR(whole_archive_flag_spec, $1)= -_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no - -# Source file extension for C++ test sources. -ac_ext=cpp - -# Object file extension for compiled C++ test sources. -objext=o -_LT_TAGVAR(objext, $1)=$objext - -# No sense in running all these tests if we already determined that -# the CXX compiler isn't working. Some variables (like enable_shared) -# are currently assumed to apply to all compilers on this platform, -# and will be corrupted by setting them based on a non-working compiler. -if test yes != "$_lt_caught_CXX_error"; then - # Code to be used in simple compile tests - lt_simple_compile_test_code="int some_variable = 0;" - - # Code to be used in simple link tests - lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }' - - # ltmain only uses $CC for tagged configurations so make sure $CC is set. - _LT_TAG_COMPILER - - # save warnings/boilerplate of simple test code - _LT_COMPILER_BOILERPLATE - _LT_LINKER_BOILERPLATE - - # Allow CC to be a program name with arguments. - lt_save_CC=$CC - lt_save_CFLAGS=$CFLAGS - lt_save_LD=$LD - lt_save_GCC=$GCC - GCC=$GXX - lt_save_with_gnu_ld=$with_gnu_ld - lt_save_path_LD=$lt_cv_path_LD - if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then - lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx - else - $as_unset lt_cv_prog_gnu_ld - fi - if test -n "${lt_cv_path_LDCXX+set}"; then - lt_cv_path_LD=$lt_cv_path_LDCXX - else - $as_unset lt_cv_path_LD - fi - test -z "${LDCXX+set}" || LD=$LDCXX - CC=${CXX-"c++"} - CFLAGS=$CXXFLAGS - compiler=$CC - _LT_TAGVAR(compiler, $1)=$CC - _LT_CC_BASENAME([$compiler]) - - if test -n "$compiler"; then - # We don't want -fno-exception when compiling C++ code, so set the - # no_builtin_flag separately - if test yes = "$GXX"; then - _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' - else - _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= - fi - - if test yes = "$GXX"; then - # Set up default GNU C++ configuration - - LT_PATH_LD - - # Check if GNU C++ uses GNU ld as the underlying linker, since the - # archiving commands below assume that GNU ld is being used. - if test yes = "$with_gnu_ld"; then - _LT_TAGVAR(archive_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' - - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' - - # If archive_cmds runs LD, not CC, wlarc should be empty - # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to - # investigate it a little bit more. (MM) - wlarc='$wl' - - # ancient GNU ld didn't support --whole-archive et. al. - if eval "`$CC -print-prog-name=ld` --help 2>&1" | - $GREP 'no-whole-archive' > /dev/null; then - _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' - else - _LT_TAGVAR(whole_archive_flag_spec, $1)= - fi - else - with_gnu_ld=no - wlarc= - - # A generic and very simple default shared library creation - # command for GNU C++ for the case where it uses the native - # linker, instead of GNU ld. If possible, this setting should - # overridden to take advantage of the native linker features on - # the platform it is being used on. - _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' - fi - - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' - - else - GXX=no - with_gnu_ld=no - wlarc= - fi - - # PORTME: fill in a description of your system's C++ link characteristics - AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) - _LT_TAGVAR(ld_shlibs, $1)=yes - case $host_os in - aix3*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - aix[[4-9]]*) - if test ia64 = "$host_cpu"; then - # On IA64, the linker does run time linking by default, so we don't - # have to do anything special. - aix_use_runtimelinking=no - exp_sym_flag='-Bexport' - no_entry_flag= - else - aix_use_runtimelinking=no - - # Test if we are trying to use run time linking or normal - # AIX style linking. If -brtl is somewhere in LDFLAGS, we - # have runtime linking enabled, and use it for executables. - # For shared libraries, we enable/disable runtime linking - # depending on the kind of the shared library created - - # when "with_aix_soname,aix_use_runtimelinking" is: - # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables - # "aix,yes" lib.so shared, rtl:yes, for executables - # lib.a static archive - # "both,no" lib.so.V(shr.o) shared, rtl:yes - # lib.a(lib.so.V) shared, rtl:no, for executables - # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables - # lib.a(lib.so.V) shared, rtl:no - # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables - # lib.a static archive - case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) - for ld_flag in $LDFLAGS; do - case $ld_flag in - *-brtl*) - aix_use_runtimelinking=yes - break - ;; - esac - done - if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then - # With aix-soname=svr4, we create the lib.so.V shared archives only, - # so we don't have lib.a shared libs to link our executables. - # We have to force runtime linking in this case. - aix_use_runtimelinking=yes - LDFLAGS="$LDFLAGS -Wl,-brtl" - fi - ;; - esac - - exp_sym_flag='-bexport' - no_entry_flag='-bnoentry' - fi - - # When large executables or shared objects are built, AIX ld can - # have problems creating the table of contents. If linking a library - # or program results in "error TOC overflow" add -mminimal-toc to - # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not - # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. - - _LT_TAGVAR(archive_cmds, $1)='' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_direct_absolute, $1)=yes - _LT_TAGVAR(hardcode_libdir_separator, $1)=':' - _LT_TAGVAR(link_all_deplibs, $1)=yes - _LT_TAGVAR(file_list_spec, $1)='$wl-f,' - case $with_aix_soname,$aix_use_runtimelinking in - aix,*) ;; # no import file - svr4,* | *,yes) # use import file - # The Import File defines what to hardcode. - _LT_TAGVAR(hardcode_direct, $1)=no - _LT_TAGVAR(hardcode_direct_absolute, $1)=no - ;; - esac - - if test yes = "$GXX"; then - case $host_os in aix4.[[012]]|aix4.[[012]].*) - # We only want to do this on AIX 4.2 and lower, the check - # below for broken collect2 doesn't work under 4.3+ - collect2name=`$CC -print-prog-name=collect2` - if test -f "$collect2name" && - strings "$collect2name" | $GREP resolve_lib_name >/dev/null - then - # We have reworked collect2 - : - else - # We have old collect2 - _LT_TAGVAR(hardcode_direct, $1)=unsupported - # It fails to find uninstalled libraries when the uninstalled - # path is not listed in the libpath. Setting hardcode_minus_L - # to unsupported forces relinking - _LT_TAGVAR(hardcode_minus_L, $1)=yes - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)= - fi - esac - shared_flag='-shared' - if test yes = "$aix_use_runtimelinking"; then - shared_flag=$shared_flag' $wl-G' - fi - # Need to ensure runtime linking is disabled for the traditional - # shared library, or the linker may eventually find shared libraries - # /with/ Import File - we do not want to mix them. - shared_flag_aix='-shared' - shared_flag_svr4='-shared $wl-G' - else - # not using gcc - if test ia64 = "$host_cpu"; then - # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release - # chokes on -Wl,-G. The following line is correct: - shared_flag='-G' - else - if test yes = "$aix_use_runtimelinking"; then - shared_flag='$wl-G' - else - shared_flag='$wl-bM:SRE' - fi - shared_flag_aix='$wl-bM:SRE' - shared_flag_svr4='$wl-G' - fi - fi - - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-bexpall' - # It seems that -bexpall does not export symbols beginning with - # underscore (_), so it is better to generate a list of symbols to - # export. - _LT_TAGVAR(always_export_symbols, $1)=yes - if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then - # Warning - without using the other runtime loading flags (-brtl), - # -berok will link without error, but may produce a broken library. - # The "-G" linker flag allows undefined symbols. - _LT_TAGVAR(no_undefined_flag, $1)='-bernotok' - # Determine the default libpath from the value encoded in an empty - # executable. - _LT_SYS_MODULE_PATH_AIX([$1]) - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" - - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag - else - if test ia64 = "$host_cpu"; then - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $libdir:/usr/lib:/lib' - _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" - _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" - else - # Determine the default libpath from the value encoded in an - # empty executable. - _LT_SYS_MODULE_PATH_AIX([$1]) - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" - # Warning - without using the other run time loading flags, - # -berok will link without error, but may produce a broken library. - _LT_TAGVAR(no_undefined_flag, $1)=' $wl-bernotok' - _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-berok' - if test yes = "$with_gnu_ld"; then - # We only use this code for GNU lds that support --whole-archive. - _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' - else - # Exported symbols can be pulled into shared objects from archives - _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' - fi - _LT_TAGVAR(archive_cmds_need_lc, $1)=yes - _LT_TAGVAR(archive_expsym_cmds, $1)='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' - # -brtl affects multiple linker settings, -berok does not and is overridden later - compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([[, ]]\\)%-berok\\1%g"`' - if test svr4 != "$with_aix_soname"; then - # This is similar to how AIX traditionally builds its shared - # libraries. Need -bnortl late, we may have -brtl in LDFLAGS. - _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' - fi - if test aix != "$with_aix_soname"; then - _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' - else - # used by -dlpreopen to get the symbols - _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$MV $output_objdir/$realname.d/$soname $output_objdir' - fi - _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$RM -r $output_objdir/$realname.d' - fi - fi - ;; - - beos*) - if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - # Joseph Beckenbach says some releases of gcc - # support --undefined. This deserves some investigation. FIXME - _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - - chorus*) - case $cc_basename in - *) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - esac - ;; - - cygwin* | mingw* | pw32* | cegcc*) - case $GXX,$cc_basename in - ,cl* | no,cl*) - # Native MSVC - # hardcode_libdir_flag_spec is actually meaningless, as there is - # no search path for DLLs. - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - _LT_TAGVAR(always_export_symbols, $1)=yes - _LT_TAGVAR(file_list_spec, $1)='@' - # Tell ltmain to make .lib files, not .a files. - libext=lib - # Tell ltmain to make .dll files, not .so files. - shrext_cmds=.dll - # FIXME: Setting linknames here is a bad hack. - _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' - _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then - cp "$export_symbols" "$output_objdir/$soname.def"; - echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; - else - $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; - fi~ - $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ - linknames=' - # The linker will not automatically build a static lib if we build a DLL. - # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' - _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes - # Don't use ranlib - _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' - _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ - lt_tool_outputfile="@TOOL_OUTPUT@"~ - case $lt_outputfile in - *.exe|*.EXE) ;; - *) - lt_outputfile=$lt_outputfile.exe - lt_tool_outputfile=$lt_tool_outputfile.exe - ;; - esac~ - func_to_tool_file "$lt_outputfile"~ - if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then - $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; - $RM "$lt_outputfile.manifest"; - fi' - ;; - *) - # g++ - # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, - # as there is no search path for DLLs. - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-all-symbols' - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - _LT_TAGVAR(always_export_symbols, $1)=no - _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes - - if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' - # If the export-symbols file already is a .def file, use it as - # is; otherwise, prepend EXPORTS... - _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then - cp $export_symbols $output_objdir/$soname.def; - else - echo EXPORTS > $output_objdir/$soname.def; - cat $export_symbols >> $output_objdir/$soname.def; - fi~ - $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - esac - ;; - darwin* | rhapsody*) - _LT_DARWIN_LINKER_FEATURES($1) - ;; - - os2*) - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_minus_L, $1)=yes - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - shrext_cmds=.dll - _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ - $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ - $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ - $ECHO EXPORTS >> $output_objdir/$libname.def~ - emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ - $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ - emximp -o $lib $output_objdir/$libname.def' - _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ - $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ - $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ - $ECHO EXPORTS >> $output_objdir/$libname.def~ - prefix_cmds="$SED"~ - if test EXPORTS = "`$SED 1q $export_symbols`"; then - prefix_cmds="$prefix_cmds -e 1d"; - fi~ - prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ - cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ - $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ - emximp -o $lib $output_objdir/$libname.def' - _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' - _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes - ;; - - dgux*) - case $cc_basename in - ec++*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - ghcx*) - # Green Hills C++ Compiler - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - *) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - esac - ;; - - freebsd2.*) - # C++ shared libraries reported to be fairly broken before - # switch to ELF - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - - freebsd-elf*) - _LT_TAGVAR(archive_cmds_need_lc, $1)=no - ;; - - freebsd* | dragonfly*) - # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF - # conventions - _LT_TAGVAR(ld_shlibs, $1)=yes - ;; - - haiku*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' - _LT_TAGVAR(link_all_deplibs, $1)=yes - ;; - - hpux9*) - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, - # but as the default - # location of the library. - - case $cc_basename in - CC*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - aCC*) - _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -b $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - # - # There doesn't appear to be a way to prevent this compiler from - # explicitly linking system object files so we need to strip them - # from the output so that they don't get included in the library - # dependencies. - output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP " \-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' - ;; - *) - if test yes = "$GXX"; then - _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' - else - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - esac - ;; - - hpux10*|hpux11*) - if test no = "$with_gnu_ld"; then - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - - case $host_cpu in - hppa*64*|ia64*) - ;; - *) - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' - ;; - esac - fi - case $host_cpu in - hppa*64*|ia64*) - _LT_TAGVAR(hardcode_direct, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - *) - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_direct_absolute, $1)=yes - _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, - # but as the default - # location of the library. - ;; - esac - - case $cc_basename in - CC*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - aCC*) - case $host_cpu in - hppa*64*) - _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' - ;; - ia64*) - _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' - ;; - *) - _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' - ;; - esac - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - # - # There doesn't appear to be a way to prevent this compiler from - # explicitly linking system object files so we need to strip them - # from the output so that they don't get included in the library - # dependencies. - output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP " \-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' - ;; - *) - if test yes = "$GXX"; then - if test no = "$with_gnu_ld"; then - case $host_cpu in - hppa*64*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' - ;; - ia64*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' - ;; - *) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' - ;; - esac - fi - else - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - esac - ;; - - interix[[3-9]]*) - _LT_TAGVAR(hardcode_direct, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' - # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. - # Instead, shared libraries are loaded at an image base (0x10000000 by - # default) and relocated if they conflict, which is a slow very memory - # consuming and fragmenting process. To avoid this, we pick a random, - # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link - # time. Moving up from 0x10000000 also allows more sbrk(2) space. - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' - ;; - irix5* | irix6*) - case $cc_basename in - CC*) - # SGI C++ - _LT_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' - - # Archives containing C++ object files must be created using - # "CC -ar", where "CC" is the IRIX C++ compiler. This is - # necessary to make sure instantiated templates are included - # in the archive. - _LT_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs' - ;; - *) - if test yes = "$GXX"; then - if test no = "$with_gnu_ld"; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' - else - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` -o $lib' - fi - fi - _LT_TAGVAR(link_all_deplibs, $1)=yes - ;; - esac - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - _LT_TAGVAR(inherit_rpath, $1)=yes - ;; - - linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) - case $cc_basename in - KCC*) - # Kuck and Associates, Inc. (KAI) C++ Compiler - - # KCC will only create a shared library if the output file - # ends with ".so" (or ".sl" for HP-UX), so rename the library - # to its proper name (with version) after linking. - _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib $wl-retain-symbols-file,$export_symbols; mv \$templib $lib' - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - # - # There doesn't appear to be a way to prevent this compiler from - # explicitly linking system object files so we need to strip them - # from the output so that they don't get included in the library - # dependencies. - output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' - - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' - - # Archives containing C++ object files must be created using - # "CC -Bstatic", where "CC" is the KAI C++ compiler. - _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' - ;; - icpc* | ecpc* ) - # Intel C++ - with_gnu_ld=yes - # version 8.0 and above of icpc choke on multiply defined symbols - # if we add $predep_objects and $postdep_objects, however 7.1 and - # earlier do not add the objects themselves. - case `$CC -V 2>&1` in - *"Version 7."*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' - ;; - *) # Version 8.0 or newer - tmp_idyn= - case $host_cpu in - ia64*) tmp_idyn=' -i_dynamic';; - esac - _LT_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' - ;; - esac - _LT_TAGVAR(archive_cmds_need_lc, $1)=no - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' - _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' - ;; - pgCC* | pgcpp*) - # Portland Group C++ compiler - case `$CC -V` in - *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*) - _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~ - rm -rf $tpldir~ - $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ - compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' - _LT_TAGVAR(old_archive_cmds, $1)='tpldir=Template.dir~ - rm -rf $tpldir~ - $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ - $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ - $RANLIB $oldlib' - _LT_TAGVAR(archive_cmds, $1)='tpldir=Template.dir~ - rm -rf $tpldir~ - $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ - $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='tpldir=Template.dir~ - rm -rf $tpldir~ - $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ - $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' - ;; - *) # Version 6 and above use weak symbols - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' - ;; - esac - - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl--rpath $wl$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' - _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' - ;; - cxx*) - # Compaq C++ - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib $wl-retain-symbols-file $wl$export_symbols' - - runpath_var=LD_RUN_PATH - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - # - # There doesn't appear to be a way to prevent this compiler from - # explicitly linking system object files so we need to strip them - # from the output so that they don't get included in the library - # dependencies. - output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' - ;; - xl* | mpixl* | bgxl*) - # IBM XL 8.0 on PPC, with GNU ld - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' - _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' - if test yes = "$supports_anon_versioning"; then - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ - cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ - echo "local: *; };" >> $output_objdir/$libname.ver~ - $CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' - fi - ;; - *) - case `$CC -V 2>&1 | sed 5q` in - *Sun\ C*) - # Sun C++ 5.9 - _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' - _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file $wl$export_symbols' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' - _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' - _LT_TAGVAR(compiler_needs_object, $1)=yes - - # Not sure whether something based on - # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 - # would be better. - output_verbose_link_cmd='func_echo_all' - - # Archives containing C++ object files must be created using - # "CC -xar", where "CC" is the Sun C++ compiler. This is - # necessary to make sure instantiated templates are included - # in the archive. - _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' - ;; - esac - ;; - esac - ;; - - lynxos*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - - m88k*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - - mvs*) - case $cc_basename in - cxx*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - *) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - esac - ;; - - netbsd*) - if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then - _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' - wlarc= - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - fi - # Workaround some broken pre-1.5 toolchains - output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' - ;; - - *nto* | *qnx*) - _LT_TAGVAR(ld_shlibs, $1)=yes - ;; - - openbsd* | bitrig*) - if test -f /usr/libexec/ld.so; then - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - _LT_TAGVAR(hardcode_direct_absolute, $1)=yes - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' - if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`"; then - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file,$export_symbols -o $lib' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' - _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' - fi - output_verbose_link_cmd=func_echo_all - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - - osf3* | osf4* | osf5*) - case $cc_basename in - KCC*) - # Kuck and Associates, Inc. (KAI) C++ Compiler - - # KCC will only create a shared library if the output file - # ends with ".so" (or ".sl" for HP-UX), so rename the library - # to its proper name (with version) after linking. - _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' - - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - - # Archives containing C++ object files must be created using - # the KAI C++ compiler. - case $host in - osf3*) _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; - *) _LT_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;; - esac - ;; - RCC*) - # Rational C++ 2.4.1 - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - cxx*) - case $host in - osf3*) - _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' - _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $soname `test -n "$verstring" && func_echo_all "$wl-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' - ;; - *) - _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' - _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ - echo "-hidden">> $lib.exp~ - $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname $wl-input $wl$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~ - $RM $lib.exp' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' - ;; - esac - - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - # - # There doesn't appear to be a way to prevent this compiler from - # explicitly linking system object files so we need to strip them - # from the output so that they don't get included in the library - # dependencies. - output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' - ;; - *) - if test yes,no = "$GXX,$with_gnu_ld"; then - _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' - case $host in - osf3*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' - ;; - *) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' - ;; - esac - - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' - - else - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - esac - ;; - - psos*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - - sunos4*) - case $cc_basename in - CC*) - # Sun C++ 4.x - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - lcc*) - # Lucid - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - *) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - esac - ;; - - solaris*) - case $cc_basename in - CC* | sunCC*) - # Sun C++ 4.2, 5.x and Centerline C++ - _LT_TAGVAR(archive_cmds_need_lc,$1)=yes - _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' - _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ - $CC -G$allow_undefined_flag $wl-M $wl$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' - - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - case $host_os in - solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; - *) - # The compiler driver will combine and reorder linker options, - # but understands '-z linker_flag'. - # Supported since Solaris 2.6 (maybe 2.5.1?) - _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' - ;; - esac - _LT_TAGVAR(link_all_deplibs, $1)=yes - - output_verbose_link_cmd='func_echo_all' - - # Archives containing C++ object files must be created using - # "CC -xar", where "CC" is the Sun C++ compiler. This is - # necessary to make sure instantiated templates are included - # in the archive. - _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' - ;; - gcx*) - # Green Hills C++ Compiler - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' - - # The C++ compiler must be used to create the archive. - _LT_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs' - ;; - *) - # GNU C++ compiler with Solaris linker - if test yes,no = "$GXX,$with_gnu_ld"; then - _LT_TAGVAR(no_undefined_flag, $1)=' $wl-z ${wl}defs' - if $CC --version | $GREP -v '^2\.7' > /dev/null; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ - $CC -shared $pic_flag -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' - - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' - else - # g++ 2.7 appears to require '-G' NOT '-shared' on this - # platform. - _LT_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ - $CC -G -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' - - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' - fi - - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $wl$libdir' - case $host_os in - solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; - *) - _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' - ;; - esac - fi - ;; - esac - ;; - - sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) - _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' - _LT_TAGVAR(archive_cmds_need_lc, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - runpath_var='LD_RUN_PATH' - - case $cc_basename in - CC*) - _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - ;; - *) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - ;; - esac - ;; - - sysv5* | sco3.2v5* | sco5v6*) - # Note: We CANNOT use -z defs as we might desire, because we do not - # link with -lc, and that would cause any symbols used from libc to - # always be unresolved, which means just about no library would - # ever link correctly. If we're not using GNU ld we use -z text - # though, which does catch some bad symbols but isn't as heavy-handed - # as -z defs. - _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' - _LT_TAGVAR(allow_undefined_flag, $1)='$wl-z,nodefs' - _LT_TAGVAR(archive_cmds_need_lc, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R,$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=':' - _LT_TAGVAR(link_all_deplibs, $1)=yes - _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Bexport' - runpath_var='LD_RUN_PATH' - - case $cc_basename in - CC*) - _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(old_archive_cmds, $1)='$CC -Tprelink_objects $oldobjs~ - '"$_LT_TAGVAR(old_archive_cmds, $1)" - _LT_TAGVAR(reload_cmds, $1)='$CC -Tprelink_objects $reload_objs~ - '"$_LT_TAGVAR(reload_cmds, $1)" - ;; - *) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - ;; - esac - ;; - - tandem*) - case $cc_basename in - NCC*) - # NonStop-UX NCC 3.20 - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - *) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - esac - ;; - - vxworks*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - - *) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - esac - - AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) - test no = "$_LT_TAGVAR(ld_shlibs, $1)" && can_build_shared=no - - _LT_TAGVAR(GCC, $1)=$GXX - _LT_TAGVAR(LD, $1)=$LD - - ## CAVEAT EMPTOR: - ## There is no encapsulation within the following macros, do not change - ## the running order or otherwise move them around unless you know exactly - ## what you are doing... - _LT_SYS_HIDDEN_LIBDEPS($1) - _LT_COMPILER_PIC($1) - _LT_COMPILER_C_O($1) - _LT_COMPILER_FILE_LOCKS($1) - _LT_LINKER_SHLIBS($1) - _LT_SYS_DYNAMIC_LINKER($1) - _LT_LINKER_HARDCODE_LIBPATH($1) - - _LT_CONFIG($1) - fi # test -n "$compiler" - - CC=$lt_save_CC - CFLAGS=$lt_save_CFLAGS - LDCXX=$LD - LD=$lt_save_LD - GCC=$lt_save_GCC - with_gnu_ld=$lt_save_with_gnu_ld - lt_cv_path_LDCXX=$lt_cv_path_LD - lt_cv_path_LD=$lt_save_path_LD - lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld - lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld -fi # test yes != "$_lt_caught_CXX_error" - -AC_LANG_POP -])# _LT_LANG_CXX_CONFIG - - -# _LT_FUNC_STRIPNAME_CNF -# ---------------------- -# func_stripname_cnf prefix suffix name -# strip PREFIX and SUFFIX off of NAME. -# PREFIX and SUFFIX must not contain globbing or regex special -# characters, hashes, percent signs, but SUFFIX may contain a leading -# dot (in which case that matches only a dot). -# -# This function is identical to the (non-XSI) version of func_stripname, -# except this one can be used by m4 code that may be executed by configure, -# rather than the libtool script. -m4_defun([_LT_FUNC_STRIPNAME_CNF],[dnl -AC_REQUIRE([_LT_DECL_SED]) -AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH]) -func_stripname_cnf () -{ - case @S|@2 in - .*) func_stripname_result=`$ECHO "@S|@3" | $SED "s%^@S|@1%%; s%\\\\@S|@2\$%%"`;; - *) func_stripname_result=`$ECHO "@S|@3" | $SED "s%^@S|@1%%; s%@S|@2\$%%"`;; - esac -} # func_stripname_cnf -])# _LT_FUNC_STRIPNAME_CNF - - -# _LT_SYS_HIDDEN_LIBDEPS([TAGNAME]) -# --------------------------------- -# Figure out "hidden" library dependencies from verbose -# compiler output when linking a shared library. -# Parse the compiler output and extract the necessary -# objects, libraries and library flags. -m4_defun([_LT_SYS_HIDDEN_LIBDEPS], -[m4_require([_LT_FILEUTILS_DEFAULTS])dnl -AC_REQUIRE([_LT_FUNC_STRIPNAME_CNF])dnl -# Dependencies to place before and after the object being linked: -_LT_TAGVAR(predep_objects, $1)= -_LT_TAGVAR(postdep_objects, $1)= -_LT_TAGVAR(predeps, $1)= -_LT_TAGVAR(postdeps, $1)= -_LT_TAGVAR(compiler_lib_search_path, $1)= - -dnl we can't use the lt_simple_compile_test_code here, -dnl because it contains code intended for an executable, -dnl not a library. It's possible we should let each -dnl tag define a new lt_????_link_test_code variable, -dnl but it's only used here... -m4_if([$1], [], [cat > conftest.$ac_ext <<_LT_EOF -int a; -void foo (void) { a = 0; } -_LT_EOF -], [$1], [CXX], [cat > conftest.$ac_ext <<_LT_EOF -class Foo -{ -public: - Foo (void) { a = 0; } -private: - int a; -}; -_LT_EOF -], [$1], [F77], [cat > conftest.$ac_ext <<_LT_EOF - subroutine foo - implicit none - integer*4 a - a=0 - return - end -_LT_EOF -], [$1], [FC], [cat > conftest.$ac_ext <<_LT_EOF - subroutine foo - implicit none - integer a - a=0 - return - end -_LT_EOF -], [$1], [GCJ], [cat > conftest.$ac_ext <<_LT_EOF -public class foo { - private int a; - public void bar (void) { - a = 0; - } -}; -_LT_EOF -], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF -package foo -func foo() { -} -_LT_EOF -]) - -_lt_libdeps_save_CFLAGS=$CFLAGS -case "$CC $CFLAGS " in #( -*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; -*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; -*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; -esac - -dnl Parse the compiler output and extract the necessary -dnl objects, libraries and library flags. -if AC_TRY_EVAL(ac_compile); then - # Parse the compiler output and extract the necessary - # objects, libraries and library flags. - - # Sentinel used to keep track of whether or not we are before - # the conftest object file. - pre_test_object_deps_done=no - - for p in `eval "$output_verbose_link_cmd"`; do - case $prev$p in - - -L* | -R* | -l*) - # Some compilers place space between "-{L,R}" and the path. - # Remove the space. - if test x-L = "$p" || - test x-R = "$p"; then - prev=$p - continue - fi - - # Expand the sysroot to ease extracting the directories later. - if test -z "$prev"; then - case $p in - -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; - -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; - -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; - esac - fi - case $p in - =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; - esac - if test no = "$pre_test_object_deps_done"; then - case $prev in - -L | -R) - # Internal compiler library paths should come after those - # provided the user. The postdeps already come after the - # user supplied libs so there is no need to process them. - if test -z "$_LT_TAGVAR(compiler_lib_search_path, $1)"; then - _LT_TAGVAR(compiler_lib_search_path, $1)=$prev$p - else - _LT_TAGVAR(compiler_lib_search_path, $1)="${_LT_TAGVAR(compiler_lib_search_path, $1)} $prev$p" - fi - ;; - # The "-l" case would never come before the object being - # linked, so don't bother handling this case. - esac - else - if test -z "$_LT_TAGVAR(postdeps, $1)"; then - _LT_TAGVAR(postdeps, $1)=$prev$p - else - _LT_TAGVAR(postdeps, $1)="${_LT_TAGVAR(postdeps, $1)} $prev$p" - fi - fi - prev= - ;; - - *.lto.$objext) ;; # Ignore GCC LTO objects - *.$objext) - # This assumes that the test object file only shows up - # once in the compiler output. - if test "$p" = "conftest.$objext"; then - pre_test_object_deps_done=yes - continue - fi - - if test no = "$pre_test_object_deps_done"; then - if test -z "$_LT_TAGVAR(predep_objects, $1)"; then - _LT_TAGVAR(predep_objects, $1)=$p - else - _LT_TAGVAR(predep_objects, $1)="$_LT_TAGVAR(predep_objects, $1) $p" - fi - else - if test -z "$_LT_TAGVAR(postdep_objects, $1)"; then - _LT_TAGVAR(postdep_objects, $1)=$p - else - _LT_TAGVAR(postdep_objects, $1)="$_LT_TAGVAR(postdep_objects, $1) $p" - fi - fi - ;; - - *) ;; # Ignore the rest. - - esac - done - - # Clean up. - rm -f a.out a.exe -else - echo "libtool.m4: error: problem compiling $1 test program" -fi - -$RM -f confest.$objext -CFLAGS=$_lt_libdeps_save_CFLAGS - -# PORTME: override above test on systems where it is broken -m4_if([$1], [CXX], -[case $host_os in -interix[[3-9]]*) - # Interix 3.5 installs completely hosed .la files for C++, so rather than - # hack all around it, let's just trust "g++" to DTRT. - _LT_TAGVAR(predep_objects,$1)= - _LT_TAGVAR(postdep_objects,$1)= - _LT_TAGVAR(postdeps,$1)= - ;; -esac -]) - -case " $_LT_TAGVAR(postdeps, $1) " in -*" -lc "*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;; -esac - _LT_TAGVAR(compiler_lib_search_dirs, $1)= -if test -n "${_LT_TAGVAR(compiler_lib_search_path, $1)}"; then - _LT_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_TAGVAR(compiler_lib_search_path, $1)}" | $SED -e 's! -L! !g' -e 's!^ !!'` -fi -_LT_TAGDECL([], [compiler_lib_search_dirs], [1], - [The directories searched by this compiler when creating a shared library]) -_LT_TAGDECL([], [predep_objects], [1], - [Dependencies to place before and after the objects being linked to - create a shared library]) -_LT_TAGDECL([], [postdep_objects], [1]) -_LT_TAGDECL([], [predeps], [1]) -_LT_TAGDECL([], [postdeps], [1]) -_LT_TAGDECL([], [compiler_lib_search_path], [1], - [The library search path used internally by the compiler when linking - a shared library]) -])# _LT_SYS_HIDDEN_LIBDEPS - - -# _LT_LANG_F77_CONFIG([TAG]) -# -------------------------- -# Ensure that the configuration variables for a Fortran 77 compiler are -# suitably defined. These variables are subsequently used by _LT_CONFIG -# to write the compiler configuration to 'libtool'. -m4_defun([_LT_LANG_F77_CONFIG], -[AC_LANG_PUSH(Fortran 77) -if test -z "$F77" || test no = "$F77"; then - _lt_disable_F77=yes -fi - -_LT_TAGVAR(archive_cmds_need_lc, $1)=no -_LT_TAGVAR(allow_undefined_flag, $1)= -_LT_TAGVAR(always_export_symbols, $1)=no -_LT_TAGVAR(archive_expsym_cmds, $1)= -_LT_TAGVAR(export_dynamic_flag_spec, $1)= -_LT_TAGVAR(hardcode_direct, $1)=no -_LT_TAGVAR(hardcode_direct_absolute, $1)=no -_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= -_LT_TAGVAR(hardcode_libdir_separator, $1)= -_LT_TAGVAR(hardcode_minus_L, $1)=no -_LT_TAGVAR(hardcode_automatic, $1)=no -_LT_TAGVAR(inherit_rpath, $1)=no -_LT_TAGVAR(module_cmds, $1)= -_LT_TAGVAR(module_expsym_cmds, $1)= -_LT_TAGVAR(link_all_deplibs, $1)=unknown -_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds -_LT_TAGVAR(reload_flag, $1)=$reload_flag -_LT_TAGVAR(reload_cmds, $1)=$reload_cmds -_LT_TAGVAR(no_undefined_flag, $1)= -_LT_TAGVAR(whole_archive_flag_spec, $1)= -_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no - -# Source file extension for f77 test sources. -ac_ext=f - -# Object file extension for compiled f77 test sources. -objext=o -_LT_TAGVAR(objext, $1)=$objext - -# No sense in running all these tests if we already determined that -# the F77 compiler isn't working. Some variables (like enable_shared) -# are currently assumed to apply to all compilers on this platform, -# and will be corrupted by setting them based on a non-working compiler. -if test yes != "$_lt_disable_F77"; then - # Code to be used in simple compile tests - lt_simple_compile_test_code="\ - subroutine t - return - end -" - - # Code to be used in simple link tests - lt_simple_link_test_code="\ - program t - end -" - - # ltmain only uses $CC for tagged configurations so make sure $CC is set. - _LT_TAG_COMPILER - - # save warnings/boilerplate of simple test code - _LT_COMPILER_BOILERPLATE - _LT_LINKER_BOILERPLATE - - # Allow CC to be a program name with arguments. - lt_save_CC=$CC - lt_save_GCC=$GCC - lt_save_CFLAGS=$CFLAGS - CC=${F77-"f77"} - CFLAGS=$FFLAGS - compiler=$CC - _LT_TAGVAR(compiler, $1)=$CC - _LT_CC_BASENAME([$compiler]) - GCC=$G77 - if test -n "$compiler"; then - AC_MSG_CHECKING([if libtool supports shared libraries]) - AC_MSG_RESULT([$can_build_shared]) - - AC_MSG_CHECKING([whether to build shared libraries]) - test no = "$can_build_shared" && enable_shared=no - - # On AIX, shared libraries and static libraries use the same namespace, and - # are all built from PIC. - case $host_os in - aix3*) - test yes = "$enable_shared" && enable_static=no - if test -n "$RANLIB"; then - archive_cmds="$archive_cmds~\$RANLIB \$lib" - postinstall_cmds='$RANLIB $lib' - fi - ;; - aix[[4-9]]*) - if test ia64 != "$host_cpu"; then - case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in - yes,aix,yes) ;; # shared object as lib.so file only - yes,svr4,*) ;; # shared object as lib.so archive member only - yes,*) enable_static=no ;; # shared object in lib.a archive as well - esac - fi - ;; - esac - AC_MSG_RESULT([$enable_shared]) - - AC_MSG_CHECKING([whether to build static libraries]) - # Make sure either enable_shared or enable_static is yes. - test yes = "$enable_shared" || enable_static=yes - AC_MSG_RESULT([$enable_static]) - - _LT_TAGVAR(GCC, $1)=$G77 - _LT_TAGVAR(LD, $1)=$LD - - ## CAVEAT EMPTOR: - ## There is no encapsulation within the following macros, do not change - ## the running order or otherwise move them around unless you know exactly - ## what you are doing... - _LT_COMPILER_PIC($1) - _LT_COMPILER_C_O($1) - _LT_COMPILER_FILE_LOCKS($1) - _LT_LINKER_SHLIBS($1) - _LT_SYS_DYNAMIC_LINKER($1) - _LT_LINKER_HARDCODE_LIBPATH($1) - - _LT_CONFIG($1) - fi # test -n "$compiler" - - GCC=$lt_save_GCC - CC=$lt_save_CC - CFLAGS=$lt_save_CFLAGS -fi # test yes != "$_lt_disable_F77" - -AC_LANG_POP -])# _LT_LANG_F77_CONFIG - - -# _LT_LANG_FC_CONFIG([TAG]) -# ------------------------- -# Ensure that the configuration variables for a Fortran compiler are -# suitably defined. These variables are subsequently used by _LT_CONFIG -# to write the compiler configuration to 'libtool'. -m4_defun([_LT_LANG_FC_CONFIG], -[AC_LANG_PUSH(Fortran) - -if test -z "$FC" || test no = "$FC"; then - _lt_disable_FC=yes -fi - -_LT_TAGVAR(archive_cmds_need_lc, $1)=no -_LT_TAGVAR(allow_undefined_flag, $1)= -_LT_TAGVAR(always_export_symbols, $1)=no -_LT_TAGVAR(archive_expsym_cmds, $1)= -_LT_TAGVAR(export_dynamic_flag_spec, $1)= -_LT_TAGVAR(hardcode_direct, $1)=no -_LT_TAGVAR(hardcode_direct_absolute, $1)=no -_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= -_LT_TAGVAR(hardcode_libdir_separator, $1)= -_LT_TAGVAR(hardcode_minus_L, $1)=no -_LT_TAGVAR(hardcode_automatic, $1)=no -_LT_TAGVAR(inherit_rpath, $1)=no -_LT_TAGVAR(module_cmds, $1)= -_LT_TAGVAR(module_expsym_cmds, $1)= -_LT_TAGVAR(link_all_deplibs, $1)=unknown -_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds -_LT_TAGVAR(reload_flag, $1)=$reload_flag -_LT_TAGVAR(reload_cmds, $1)=$reload_cmds -_LT_TAGVAR(no_undefined_flag, $1)= -_LT_TAGVAR(whole_archive_flag_spec, $1)= -_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no - -# Source file extension for fc test sources. -ac_ext=${ac_fc_srcext-f} - -# Object file extension for compiled fc test sources. -objext=o -_LT_TAGVAR(objext, $1)=$objext - -# No sense in running all these tests if we already determined that -# the FC compiler isn't working. Some variables (like enable_shared) -# are currently assumed to apply to all compilers on this platform, -# and will be corrupted by setting them based on a non-working compiler. -if test yes != "$_lt_disable_FC"; then - # Code to be used in simple compile tests - lt_simple_compile_test_code="\ - subroutine t - return - end -" - - # Code to be used in simple link tests - lt_simple_link_test_code="\ - program t - end -" - - # ltmain only uses $CC for tagged configurations so make sure $CC is set. - _LT_TAG_COMPILER - - # save warnings/boilerplate of simple test code - _LT_COMPILER_BOILERPLATE - _LT_LINKER_BOILERPLATE - - # Allow CC to be a program name with arguments. - lt_save_CC=$CC - lt_save_GCC=$GCC - lt_save_CFLAGS=$CFLAGS - CC=${FC-"f95"} - CFLAGS=$FCFLAGS - compiler=$CC - GCC=$ac_cv_fc_compiler_gnu - - _LT_TAGVAR(compiler, $1)=$CC - _LT_CC_BASENAME([$compiler]) - - if test -n "$compiler"; then - AC_MSG_CHECKING([if libtool supports shared libraries]) - AC_MSG_RESULT([$can_build_shared]) - - AC_MSG_CHECKING([whether to build shared libraries]) - test no = "$can_build_shared" && enable_shared=no - - # On AIX, shared libraries and static libraries use the same namespace, and - # are all built from PIC. - case $host_os in - aix3*) - test yes = "$enable_shared" && enable_static=no - if test -n "$RANLIB"; then - archive_cmds="$archive_cmds~\$RANLIB \$lib" - postinstall_cmds='$RANLIB $lib' - fi - ;; - aix[[4-9]]*) - if test ia64 != "$host_cpu"; then - case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in - yes,aix,yes) ;; # shared object as lib.so file only - yes,svr4,*) ;; # shared object as lib.so archive member only - yes,*) enable_static=no ;; # shared object in lib.a archive as well - esac - fi - ;; - esac - AC_MSG_RESULT([$enable_shared]) - - AC_MSG_CHECKING([whether to build static libraries]) - # Make sure either enable_shared or enable_static is yes. - test yes = "$enable_shared" || enable_static=yes - AC_MSG_RESULT([$enable_static]) - - _LT_TAGVAR(GCC, $1)=$ac_cv_fc_compiler_gnu - _LT_TAGVAR(LD, $1)=$LD - - ## CAVEAT EMPTOR: - ## There is no encapsulation within the following macros, do not change - ## the running order or otherwise move them around unless you know exactly - ## what you are doing... - _LT_SYS_HIDDEN_LIBDEPS($1) - _LT_COMPILER_PIC($1) - _LT_COMPILER_C_O($1) - _LT_COMPILER_FILE_LOCKS($1) - _LT_LINKER_SHLIBS($1) - _LT_SYS_DYNAMIC_LINKER($1) - _LT_LINKER_HARDCODE_LIBPATH($1) - - _LT_CONFIG($1) - fi # test -n "$compiler" - - GCC=$lt_save_GCC - CC=$lt_save_CC - CFLAGS=$lt_save_CFLAGS -fi # test yes != "$_lt_disable_FC" - -AC_LANG_POP -])# _LT_LANG_FC_CONFIG - - -# _LT_LANG_GCJ_CONFIG([TAG]) -# -------------------------- -# Ensure that the configuration variables for the GNU Java Compiler compiler -# are suitably defined. These variables are subsequently used by _LT_CONFIG -# to write the compiler configuration to 'libtool'. -m4_defun([_LT_LANG_GCJ_CONFIG], -[AC_REQUIRE([LT_PROG_GCJ])dnl -AC_LANG_SAVE - -# Source file extension for Java test sources. -ac_ext=java - -# Object file extension for compiled Java test sources. -objext=o -_LT_TAGVAR(objext, $1)=$objext - -# Code to be used in simple compile tests -lt_simple_compile_test_code="class foo {}" - -# Code to be used in simple link tests -lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }' - -# ltmain only uses $CC for tagged configurations so make sure $CC is set. -_LT_TAG_COMPILER - -# save warnings/boilerplate of simple test code -_LT_COMPILER_BOILERPLATE -_LT_LINKER_BOILERPLATE - -# Allow CC to be a program name with arguments. -lt_save_CC=$CC -lt_save_CFLAGS=$CFLAGS -lt_save_GCC=$GCC -GCC=yes -CC=${GCJ-"gcj"} -CFLAGS=$GCJFLAGS -compiler=$CC -_LT_TAGVAR(compiler, $1)=$CC -_LT_TAGVAR(LD, $1)=$LD -_LT_CC_BASENAME([$compiler]) - -# GCJ did not exist at the time GCC didn't implicitly link libc in. -_LT_TAGVAR(archive_cmds_need_lc, $1)=no - -_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds -_LT_TAGVAR(reload_flag, $1)=$reload_flag -_LT_TAGVAR(reload_cmds, $1)=$reload_cmds - -## CAVEAT EMPTOR: -## There is no encapsulation within the following macros, do not change -## the running order or otherwise move them around unless you know exactly -## what you are doing... -if test -n "$compiler"; then - _LT_COMPILER_NO_RTTI($1) - _LT_COMPILER_PIC($1) - _LT_COMPILER_C_O($1) - _LT_COMPILER_FILE_LOCKS($1) - _LT_LINKER_SHLIBS($1) - _LT_LINKER_HARDCODE_LIBPATH($1) - - _LT_CONFIG($1) -fi - -AC_LANG_RESTORE - -GCC=$lt_save_GCC -CC=$lt_save_CC -CFLAGS=$lt_save_CFLAGS -])# _LT_LANG_GCJ_CONFIG - - -# _LT_LANG_GO_CONFIG([TAG]) -# -------------------------- -# Ensure that the configuration variables for the GNU Go compiler -# are suitably defined. These variables are subsequently used by _LT_CONFIG -# to write the compiler configuration to 'libtool'. -m4_defun([_LT_LANG_GO_CONFIG], -[AC_REQUIRE([LT_PROG_GO])dnl -AC_LANG_SAVE - -# Source file extension for Go test sources. -ac_ext=go - -# Object file extension for compiled Go test sources. -objext=o -_LT_TAGVAR(objext, $1)=$objext - -# Code to be used in simple compile tests -lt_simple_compile_test_code="package main; func main() { }" - -# Code to be used in simple link tests -lt_simple_link_test_code='package main; func main() { }' - -# ltmain only uses $CC for tagged configurations so make sure $CC is set. -_LT_TAG_COMPILER - -# save warnings/boilerplate of simple test code -_LT_COMPILER_BOILERPLATE -_LT_LINKER_BOILERPLATE - -# Allow CC to be a program name with arguments. -lt_save_CC=$CC -lt_save_CFLAGS=$CFLAGS -lt_save_GCC=$GCC -GCC=yes -CC=${GOC-"gccgo"} -CFLAGS=$GOFLAGS -compiler=$CC -_LT_TAGVAR(compiler, $1)=$CC -_LT_TAGVAR(LD, $1)=$LD -_LT_CC_BASENAME([$compiler]) - -# Go did not exist at the time GCC didn't implicitly link libc in. -_LT_TAGVAR(archive_cmds_need_lc, $1)=no - -_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds -_LT_TAGVAR(reload_flag, $1)=$reload_flag -_LT_TAGVAR(reload_cmds, $1)=$reload_cmds - -## CAVEAT EMPTOR: -## There is no encapsulation within the following macros, do not change -## the running order or otherwise move them around unless you know exactly -## what you are doing... -if test -n "$compiler"; then - _LT_COMPILER_NO_RTTI($1) - _LT_COMPILER_PIC($1) - _LT_COMPILER_C_O($1) - _LT_COMPILER_FILE_LOCKS($1) - _LT_LINKER_SHLIBS($1) - _LT_LINKER_HARDCODE_LIBPATH($1) - - _LT_CONFIG($1) -fi - -AC_LANG_RESTORE - -GCC=$lt_save_GCC -CC=$lt_save_CC -CFLAGS=$lt_save_CFLAGS -])# _LT_LANG_GO_CONFIG - - -# _LT_LANG_RC_CONFIG([TAG]) -# ------------------------- -# Ensure that the configuration variables for the Windows resource compiler -# are suitably defined. These variables are subsequently used by _LT_CONFIG -# to write the compiler configuration to 'libtool'. -m4_defun([_LT_LANG_RC_CONFIG], -[AC_REQUIRE([LT_PROG_RC])dnl -AC_LANG_SAVE - -# Source file extension for RC test sources. -ac_ext=rc - -# Object file extension for compiled RC test sources. -objext=o -_LT_TAGVAR(objext, $1)=$objext - -# Code to be used in simple compile tests -lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }' - -# Code to be used in simple link tests -lt_simple_link_test_code=$lt_simple_compile_test_code - -# ltmain only uses $CC for tagged configurations so make sure $CC is set. -_LT_TAG_COMPILER - -# save warnings/boilerplate of simple test code -_LT_COMPILER_BOILERPLATE -_LT_LINKER_BOILERPLATE - -# Allow CC to be a program name with arguments. -lt_save_CC=$CC -lt_save_CFLAGS=$CFLAGS -lt_save_GCC=$GCC -GCC= -CC=${RC-"windres"} -CFLAGS= -compiler=$CC -_LT_TAGVAR(compiler, $1)=$CC -_LT_CC_BASENAME([$compiler]) -_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes - -if test -n "$compiler"; then - : - _LT_CONFIG($1) -fi - -GCC=$lt_save_GCC -AC_LANG_RESTORE -CC=$lt_save_CC -CFLAGS=$lt_save_CFLAGS -])# _LT_LANG_RC_CONFIG - - -# LT_PROG_GCJ -# ----------- -AC_DEFUN([LT_PROG_GCJ], -[m4_ifdef([AC_PROG_GCJ], [AC_PROG_GCJ], - [m4_ifdef([A][M_PROG_GCJ], [A][M_PROG_GCJ], - [AC_CHECK_TOOL(GCJ, gcj,) - test set = "${GCJFLAGS+set}" || GCJFLAGS="-g -O2" - AC_SUBST(GCJFLAGS)])])[]dnl -]) - -# Old name: -AU_ALIAS([LT_AC_PROG_GCJ], [LT_PROG_GCJ]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([LT_AC_PROG_GCJ], []) - - -# LT_PROG_GO -# ---------- -AC_DEFUN([LT_PROG_GO], -[AC_CHECK_TOOL(GOC, gccgo,) -]) - - -# LT_PROG_RC -# ---------- -AC_DEFUN([LT_PROG_RC], -[AC_CHECK_TOOL(RC, windres,) -]) - -# Old name: -AU_ALIAS([LT_AC_PROG_RC], [LT_PROG_RC]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([LT_AC_PROG_RC], []) - - -# _LT_DECL_EGREP -# -------------- -# If we don't have a new enough Autoconf to choose the best grep -# available, choose the one first in the user's PATH. -m4_defun([_LT_DECL_EGREP], -[AC_REQUIRE([AC_PROG_EGREP])dnl -AC_REQUIRE([AC_PROG_FGREP])dnl -test -z "$GREP" && GREP=grep -_LT_DECL([], [GREP], [1], [A grep program that handles long lines]) -_LT_DECL([], [EGREP], [1], [An ERE matcher]) -_LT_DECL([], [FGREP], [1], [A literal string matcher]) -dnl Non-bleeding-edge autoconf doesn't subst GREP, so do it here too -AC_SUBST([GREP]) -]) - - -# _LT_DECL_OBJDUMP -# -------------- -# If we don't have a new enough Autoconf to choose the best objdump -# available, choose the one first in the user's PATH. -m4_defun([_LT_DECL_OBJDUMP], -[AC_CHECK_TOOL(OBJDUMP, objdump, false) -test -z "$OBJDUMP" && OBJDUMP=objdump -_LT_DECL([], [OBJDUMP], [1], [An object symbol dumper]) -AC_SUBST([OBJDUMP]) -]) - -# _LT_DECL_DLLTOOL -# ---------------- -# Ensure DLLTOOL variable is set. -m4_defun([_LT_DECL_DLLTOOL], -[AC_CHECK_TOOL(DLLTOOL, dlltool, false) -test -z "$DLLTOOL" && DLLTOOL=dlltool -_LT_DECL([], [DLLTOOL], [1], [DLL creation program]) -AC_SUBST([DLLTOOL]) -]) - -# _LT_DECL_SED -# ------------ -# Check for a fully-functional sed program, that truncates -# as few characters as possible. Prefer GNU sed if found. -m4_defun([_LT_DECL_SED], -[AC_PROG_SED -test -z "$SED" && SED=sed -Xsed="$SED -e 1s/^X//" -_LT_DECL([], [SED], [1], [A sed program that does not truncate output]) -_LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"], - [Sed that helps us avoid accidentally triggering echo(1) options like -n]) -])# _LT_DECL_SED - -m4_ifndef([AC_PROG_SED], [ -############################################################ -# NOTE: This macro has been submitted for inclusion into # -# GNU Autoconf as AC_PROG_SED. When it is available in # -# a released version of Autoconf we should remove this # -# macro and use it instead. # -############################################################ - -m4_defun([AC_PROG_SED], -[AC_MSG_CHECKING([for a sed that does not truncate output]) -AC_CACHE_VAL(lt_cv_path_SED, -[# Loop through the user's path and test for sed and gsed. -# Then use that list of sed's as ones to test for truncation. -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for lt_ac_prog in sed gsed; do - for ac_exec_ext in '' $ac_executable_extensions; do - if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then - lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext" - fi - done - done -done -IFS=$as_save_IFS -lt_ac_max=0 -lt_ac_count=0 -# Add /usr/xpg4/bin/sed as it is typically found on Solaris -# along with /bin/sed that truncates output. -for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do - test ! -f "$lt_ac_sed" && continue - cat /dev/null > conftest.in - lt_ac_count=0 - echo $ECHO_N "0123456789$ECHO_C" >conftest.in - # Check for GNU sed and select it if it is found. - if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then - lt_cv_path_SED=$lt_ac_sed - break - fi - while true; do - cat conftest.in conftest.in >conftest.tmp - mv conftest.tmp conftest.in - cp conftest.in conftest.nl - echo >>conftest.nl - $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break - cmp -s conftest.out conftest.nl || break - # 10000 chars as input seems more than enough - test 10 -lt "$lt_ac_count" && break - lt_ac_count=`expr $lt_ac_count + 1` - if test "$lt_ac_count" -gt "$lt_ac_max"; then - lt_ac_max=$lt_ac_count - lt_cv_path_SED=$lt_ac_sed - fi - done -done -]) -SED=$lt_cv_path_SED -AC_SUBST([SED]) -AC_MSG_RESULT([$SED]) -])#AC_PROG_SED -])#m4_ifndef - -# Old name: -AU_ALIAS([LT_AC_PROG_SED], [AC_PROG_SED]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([LT_AC_PROG_SED], []) - - -# _LT_CHECK_SHELL_FEATURES -# ------------------------ -# Find out whether the shell is Bourne or XSI compatible, -# or has some other useful features. -m4_defun([_LT_CHECK_SHELL_FEATURES], -[if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then - lt_unset=unset -else - lt_unset=false -fi -_LT_DECL([], [lt_unset], [0], [whether the shell understands "unset"])dnl - -# test EBCDIC or ASCII -case `echo X|tr X '\101'` in - A) # ASCII based system - # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr - lt_SP2NL='tr \040 \012' - lt_NL2SP='tr \015\012 \040\040' - ;; - *) # EBCDIC based system - lt_SP2NL='tr \100 \n' - lt_NL2SP='tr \r\n \100\100' - ;; -esac -_LT_DECL([SP2NL], [lt_SP2NL], [1], [turn spaces into newlines])dnl -_LT_DECL([NL2SP], [lt_NL2SP], [1], [turn newlines into spaces])dnl -])# _LT_CHECK_SHELL_FEATURES - - -# _LT_PATH_CONVERSION_FUNCTIONS -# ----------------------------- -# Determine what file name conversion functions should be used by -# func_to_host_file (and, implicitly, by func_to_host_path). These are needed -# for certain cross-compile configurations and native mingw. -m4_defun([_LT_PATH_CONVERSION_FUNCTIONS], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -AC_REQUIRE([AC_CANONICAL_BUILD])dnl -AC_MSG_CHECKING([how to convert $build file names to $host format]) -AC_CACHE_VAL(lt_cv_to_host_file_cmd, -[case $host in - *-*-mingw* ) - case $build in - *-*-mingw* ) # actually msys - lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 - ;; - *-*-cygwin* ) - lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 - ;; - * ) # otherwise, assume *nix - lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 - ;; - esac - ;; - *-*-cygwin* ) - case $build in - *-*-mingw* ) # actually msys - lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin - ;; - *-*-cygwin* ) - lt_cv_to_host_file_cmd=func_convert_file_noop - ;; - * ) # otherwise, assume *nix - lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin - ;; - esac - ;; - * ) # unhandled hosts (and "normal" native builds) - lt_cv_to_host_file_cmd=func_convert_file_noop - ;; -esac -]) -to_host_file_cmd=$lt_cv_to_host_file_cmd -AC_MSG_RESULT([$lt_cv_to_host_file_cmd]) -_LT_DECL([to_host_file_cmd], [lt_cv_to_host_file_cmd], - [0], [convert $build file names to $host format])dnl - -AC_MSG_CHECKING([how to convert $build file names to toolchain format]) -AC_CACHE_VAL(lt_cv_to_tool_file_cmd, -[#assume ordinary cross tools, or native build. -lt_cv_to_tool_file_cmd=func_convert_file_noop -case $host in - *-*-mingw* ) - case $build in - *-*-mingw* ) # actually msys - lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 - ;; - esac - ;; -esac -]) -to_tool_file_cmd=$lt_cv_to_tool_file_cmd -AC_MSG_RESULT([$lt_cv_to_tool_file_cmd]) -_LT_DECL([to_tool_file_cmd], [lt_cv_to_tool_file_cmd], - [0], [convert $build files to toolchain format])dnl -])# _LT_PATH_CONVERSION_FUNCTIONS diff --git a/m4/ltoptions.m4 b/m4/ltoptions.m4 deleted file mode 100644 index 94b08297..00000000 --- a/m4/ltoptions.m4 +++ /dev/null @@ -1,437 +0,0 @@ -# Helper functions for option handling. -*- Autoconf -*- -# -# Copyright (C) 2004-2005, 2007-2009, 2011-2015 Free Software -# Foundation, Inc. -# Written by Gary V. Vaughan, 2004 -# -# This file is free software; the Free Software Foundation gives -# unlimited permission to copy and/or distribute it, with or without -# modifications, as long as this notice is preserved. - -# serial 8 ltoptions.m4 - -# This is to help aclocal find these macros, as it can't see m4_define. -AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) - - -# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME) -# ------------------------------------------ -m4_define([_LT_MANGLE_OPTION], -[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])]) - - -# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME) -# --------------------------------------- -# Set option OPTION-NAME for macro MACRO-NAME, and if there is a -# matching handler defined, dispatch to it. Other OPTION-NAMEs are -# saved as a flag. -m4_define([_LT_SET_OPTION], -[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl -m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]), - _LT_MANGLE_DEFUN([$1], [$2]), - [m4_warning([Unknown $1 option '$2'])])[]dnl -]) - - -# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET]) -# ------------------------------------------------------------ -# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. -m4_define([_LT_IF_OPTION], -[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])]) - - -# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET) -# ------------------------------------------------------- -# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME -# are set. -m4_define([_LT_UNLESS_OPTIONS], -[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), - [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option), - [m4_define([$0_found])])])[]dnl -m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3 -])[]dnl -]) - - -# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST) -# ---------------------------------------- -# OPTION-LIST is a space-separated list of Libtool options associated -# with MACRO-NAME. If any OPTION has a matching handler declared with -# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about -# the unknown option and exit. -m4_defun([_LT_SET_OPTIONS], -[# Set options -m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), - [_LT_SET_OPTION([$1], _LT_Option)]) - -m4_if([$1],[LT_INIT],[ - dnl - dnl Simply set some default values (i.e off) if boolean options were not - dnl specified: - _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no - ]) - _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no - ]) - dnl - dnl If no reference was made to various pairs of opposing options, then - dnl we run the default mode handler for the pair. For example, if neither - dnl 'shared' nor 'disable-shared' was passed, we enable building of shared - dnl archives by default: - _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED]) - _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC]) - _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC]) - _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install], - [_LT_ENABLE_FAST_INSTALL]) - _LT_UNLESS_OPTIONS([LT_INIT], [aix-soname=aix aix-soname=both aix-soname=svr4], - [_LT_WITH_AIX_SONAME([aix])]) - ]) -])# _LT_SET_OPTIONS - - -## --------------------------------- ## -## Macros to handle LT_INIT options. ## -## --------------------------------- ## - -# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME) -# ----------------------------------------- -m4_define([_LT_MANGLE_DEFUN], -[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])]) - - -# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE) -# ----------------------------------------------- -m4_define([LT_OPTION_DEFINE], -[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl -])# LT_OPTION_DEFINE - - -# dlopen -# ------ -LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes -]) - -AU_DEFUN([AC_LIBTOOL_DLOPEN], -[_LT_SET_OPTION([LT_INIT], [dlopen]) -AC_DIAGNOSE([obsolete], -[$0: Remove this warning and the call to _LT_SET_OPTION when you -put the 'dlopen' option into LT_INIT's first parameter.]) -]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], []) - - -# win32-dll -# --------- -# Declare package support for building win32 dll's. -LT_OPTION_DEFINE([LT_INIT], [win32-dll], -[enable_win32_dll=yes - -case $host in -*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) - AC_CHECK_TOOL(AS, as, false) - AC_CHECK_TOOL(DLLTOOL, dlltool, false) - AC_CHECK_TOOL(OBJDUMP, objdump, false) - ;; -esac - -test -z "$AS" && AS=as -_LT_DECL([], [AS], [1], [Assembler program])dnl - -test -z "$DLLTOOL" && DLLTOOL=dlltool -_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl - -test -z "$OBJDUMP" && OBJDUMP=objdump -_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl -])# win32-dll - -AU_DEFUN([AC_LIBTOOL_WIN32_DLL], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -_LT_SET_OPTION([LT_INIT], [win32-dll]) -AC_DIAGNOSE([obsolete], -[$0: Remove this warning and the call to _LT_SET_OPTION when you -put the 'win32-dll' option into LT_INIT's first parameter.]) -]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], []) - - -# _LT_ENABLE_SHARED([DEFAULT]) -# ---------------------------- -# implement the --enable-shared flag, and supports the 'shared' and -# 'disable-shared' LT_INIT options. -# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. -m4_define([_LT_ENABLE_SHARED], -[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl -AC_ARG_ENABLE([shared], - [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@], - [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])], - [p=${PACKAGE-default} - case $enableval in - yes) enable_shared=yes ;; - no) enable_shared=no ;; - *) - enable_shared=no - # Look at the argument we got. We use all the common list separators. - lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, - for pkg in $enableval; do - IFS=$lt_save_ifs - if test "X$pkg" = "X$p"; then - enable_shared=yes - fi - done - IFS=$lt_save_ifs - ;; - esac], - [enable_shared=]_LT_ENABLE_SHARED_DEFAULT) - - _LT_DECL([build_libtool_libs], [enable_shared], [0], - [Whether or not to build shared libraries]) -])# _LT_ENABLE_SHARED - -LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])]) -LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])]) - -# Old names: -AC_DEFUN([AC_ENABLE_SHARED], -[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared]) -]) - -AC_DEFUN([AC_DISABLE_SHARED], -[_LT_SET_OPTION([LT_INIT], [disable-shared]) -]) - -AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) -AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AM_ENABLE_SHARED], []) -dnl AC_DEFUN([AM_DISABLE_SHARED], []) - - - -# _LT_ENABLE_STATIC([DEFAULT]) -# ---------------------------- -# implement the --enable-static flag, and support the 'static' and -# 'disable-static' LT_INIT options. -# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. -m4_define([_LT_ENABLE_STATIC], -[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl -AC_ARG_ENABLE([static], - [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@], - [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])], - [p=${PACKAGE-default} - case $enableval in - yes) enable_static=yes ;; - no) enable_static=no ;; - *) - enable_static=no - # Look at the argument we got. We use all the common list separators. - lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, - for pkg in $enableval; do - IFS=$lt_save_ifs - if test "X$pkg" = "X$p"; then - enable_static=yes - fi - done - IFS=$lt_save_ifs - ;; - esac], - [enable_static=]_LT_ENABLE_STATIC_DEFAULT) - - _LT_DECL([build_old_libs], [enable_static], [0], - [Whether or not to build static libraries]) -])# _LT_ENABLE_STATIC - -LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])]) -LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])]) - -# Old names: -AC_DEFUN([AC_ENABLE_STATIC], -[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static]) -]) - -AC_DEFUN([AC_DISABLE_STATIC], -[_LT_SET_OPTION([LT_INIT], [disable-static]) -]) - -AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) -AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AM_ENABLE_STATIC], []) -dnl AC_DEFUN([AM_DISABLE_STATIC], []) - - - -# _LT_ENABLE_FAST_INSTALL([DEFAULT]) -# ---------------------------------- -# implement the --enable-fast-install flag, and support the 'fast-install' -# and 'disable-fast-install' LT_INIT options. -# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. -m4_define([_LT_ENABLE_FAST_INSTALL], -[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl -AC_ARG_ENABLE([fast-install], - [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], - [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], - [p=${PACKAGE-default} - case $enableval in - yes) enable_fast_install=yes ;; - no) enable_fast_install=no ;; - *) - enable_fast_install=no - # Look at the argument we got. We use all the common list separators. - lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, - for pkg in $enableval; do - IFS=$lt_save_ifs - if test "X$pkg" = "X$p"; then - enable_fast_install=yes - fi - done - IFS=$lt_save_ifs - ;; - esac], - [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT) - -_LT_DECL([fast_install], [enable_fast_install], [0], - [Whether or not to optimize for fast installation])dnl -])# _LT_ENABLE_FAST_INSTALL - -LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])]) -LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])]) - -# Old names: -AU_DEFUN([AC_ENABLE_FAST_INSTALL], -[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install]) -AC_DIAGNOSE([obsolete], -[$0: Remove this warning and the call to _LT_SET_OPTION when you put -the 'fast-install' option into LT_INIT's first parameter.]) -]) - -AU_DEFUN([AC_DISABLE_FAST_INSTALL], -[_LT_SET_OPTION([LT_INIT], [disable-fast-install]) -AC_DIAGNOSE([obsolete], -[$0: Remove this warning and the call to _LT_SET_OPTION when you put -the 'disable-fast-install' option into LT_INIT's first parameter.]) -]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], []) -dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], []) - - -# _LT_WITH_AIX_SONAME([DEFAULT]) -# ---------------------------------- -# implement the --with-aix-soname flag, and support the `aix-soname=aix' -# and `aix-soname=both' and `aix-soname=svr4' LT_INIT options. DEFAULT -# is either `aix', `both' or `svr4'. If omitted, it defaults to `aix'. -m4_define([_LT_WITH_AIX_SONAME], -[m4_define([_LT_WITH_AIX_SONAME_DEFAULT], [m4_if($1, svr4, svr4, m4_if($1, both, both, aix))])dnl -shared_archive_member_spec= -case $host,$enable_shared in -power*-*-aix[[5-9]]*,yes) - AC_MSG_CHECKING([which variant of shared library versioning to provide]) - AC_ARG_WITH([aix-soname], - [AS_HELP_STRING([--with-aix-soname=aix|svr4|both], - [shared library versioning (aka "SONAME") variant to provide on AIX, @<:@default=]_LT_WITH_AIX_SONAME_DEFAULT[@:>@.])], - [case $withval in - aix|svr4|both) - ;; - *) - AC_MSG_ERROR([Unknown argument to --with-aix-soname]) - ;; - esac - lt_cv_with_aix_soname=$with_aix_soname], - [AC_CACHE_VAL([lt_cv_with_aix_soname], - [lt_cv_with_aix_soname=]_LT_WITH_AIX_SONAME_DEFAULT) - with_aix_soname=$lt_cv_with_aix_soname]) - AC_MSG_RESULT([$with_aix_soname]) - if test aix != "$with_aix_soname"; then - # For the AIX way of multilib, we name the shared archive member - # based on the bitwidth used, traditionally 'shr.o' or 'shr_64.o', - # and 'shr.imp' or 'shr_64.imp', respectively, for the Import File. - # Even when GNU compilers ignore OBJECT_MODE but need '-maix64' flag, - # the AIX toolchain works better with OBJECT_MODE set (default 32). - if test 64 = "${OBJECT_MODE-32}"; then - shared_archive_member_spec=shr_64 - else - shared_archive_member_spec=shr - fi - fi - ;; -*) - with_aix_soname=aix - ;; -esac - -_LT_DECL([], [shared_archive_member_spec], [0], - [Shared archive member basename, for filename based shared library versioning on AIX])dnl -])# _LT_WITH_AIX_SONAME - -LT_OPTION_DEFINE([LT_INIT], [aix-soname=aix], [_LT_WITH_AIX_SONAME([aix])]) -LT_OPTION_DEFINE([LT_INIT], [aix-soname=both], [_LT_WITH_AIX_SONAME([both])]) -LT_OPTION_DEFINE([LT_INIT], [aix-soname=svr4], [_LT_WITH_AIX_SONAME([svr4])]) - - -# _LT_WITH_PIC([MODE]) -# -------------------- -# implement the --with-pic flag, and support the 'pic-only' and 'no-pic' -# LT_INIT options. -# MODE is either 'yes' or 'no'. If omitted, it defaults to 'both'. -m4_define([_LT_WITH_PIC], -[AC_ARG_WITH([pic], - [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@], - [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], - [lt_p=${PACKAGE-default} - case $withval in - yes|no) pic_mode=$withval ;; - *) - pic_mode=default - # Look at the argument we got. We use all the common list separators. - lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, - for lt_pkg in $withval; do - IFS=$lt_save_ifs - if test "X$lt_pkg" = "X$lt_p"; then - pic_mode=yes - fi - done - IFS=$lt_save_ifs - ;; - esac], - [pic_mode=m4_default([$1], [default])]) - -_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl -])# _LT_WITH_PIC - -LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])]) -LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])]) - -# Old name: -AU_DEFUN([AC_LIBTOOL_PICMODE], -[_LT_SET_OPTION([LT_INIT], [pic-only]) -AC_DIAGNOSE([obsolete], -[$0: Remove this warning and the call to _LT_SET_OPTION when you -put the 'pic-only' option into LT_INIT's first parameter.]) -]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_PICMODE], []) - -## ----------------- ## -## LTDL_INIT Options ## -## ----------------- ## - -m4_define([_LTDL_MODE], []) -LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive], - [m4_define([_LTDL_MODE], [nonrecursive])]) -LT_OPTION_DEFINE([LTDL_INIT], [recursive], - [m4_define([_LTDL_MODE], [recursive])]) -LT_OPTION_DEFINE([LTDL_INIT], [subproject], - [m4_define([_LTDL_MODE], [subproject])]) - -m4_define([_LTDL_TYPE], []) -LT_OPTION_DEFINE([LTDL_INIT], [installable], - [m4_define([_LTDL_TYPE], [installable])]) -LT_OPTION_DEFINE([LTDL_INIT], [convenience], - [m4_define([_LTDL_TYPE], [convenience])]) diff --git a/m4/ltsugar.m4 b/m4/ltsugar.m4 deleted file mode 100644 index 48bc9344..00000000 --- a/m4/ltsugar.m4 +++ /dev/null @@ -1,124 +0,0 @@ -# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- -# -# Copyright (C) 2004-2005, 2007-2008, 2011-2015 Free Software -# Foundation, Inc. -# Written by Gary V. Vaughan, 2004 -# -# This file is free software; the Free Software Foundation gives -# unlimited permission to copy and/or distribute it, with or without -# modifications, as long as this notice is preserved. - -# serial 6 ltsugar.m4 - -# This is to help aclocal find these macros, as it can't see m4_define. -AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) - - -# lt_join(SEP, ARG1, [ARG2...]) -# ----------------------------- -# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their -# associated separator. -# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier -# versions in m4sugar had bugs. -m4_define([lt_join], -[m4_if([$#], [1], [], - [$#], [2], [[$2]], - [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) -m4_define([_lt_join], -[m4_if([$#$2], [2], [], - [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) - - -# lt_car(LIST) -# lt_cdr(LIST) -# ------------ -# Manipulate m4 lists. -# These macros are necessary as long as will still need to support -# Autoconf-2.59, which quotes differently. -m4_define([lt_car], [[$1]]) -m4_define([lt_cdr], -[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], - [$#], 1, [], - [m4_dquote(m4_shift($@))])]) -m4_define([lt_unquote], $1) - - -# lt_append(MACRO-NAME, STRING, [SEPARATOR]) -# ------------------------------------------ -# Redefine MACRO-NAME to hold its former content plus 'SEPARATOR''STRING'. -# Note that neither SEPARATOR nor STRING are expanded; they are appended -# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). -# No SEPARATOR is output if MACRO-NAME was previously undefined (different -# than defined and empty). -# -# This macro is needed until we can rely on Autoconf 2.62, since earlier -# versions of m4sugar mistakenly expanded SEPARATOR but not STRING. -m4_define([lt_append], -[m4_define([$1], - m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) - - - -# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) -# ---------------------------------------------------------- -# Produce a SEP delimited list of all paired combinations of elements of -# PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list -# has the form PREFIXmINFIXSUFFIXn. -# Needed until we can rely on m4_combine added in Autoconf 2.62. -m4_define([lt_combine], -[m4_if(m4_eval([$# > 3]), [1], - [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl -[[m4_foreach([_Lt_prefix], [$2], - [m4_foreach([_Lt_suffix], - ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, - [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) - - -# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) -# ----------------------------------------------------------------------- -# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited -# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. -m4_define([lt_if_append_uniq], -[m4_ifdef([$1], - [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], - [lt_append([$1], [$2], [$3])$4], - [$5])], - [lt_append([$1], [$2], [$3])$4])]) - - -# lt_dict_add(DICT, KEY, VALUE) -# ----------------------------- -m4_define([lt_dict_add], -[m4_define([$1($2)], [$3])]) - - -# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) -# -------------------------------------------- -m4_define([lt_dict_add_subkey], -[m4_define([$1($2:$3)], [$4])]) - - -# lt_dict_fetch(DICT, KEY, [SUBKEY]) -# ---------------------------------- -m4_define([lt_dict_fetch], -[m4_ifval([$3], - m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), - m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) - - -# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) -# ----------------------------------------------------------------- -m4_define([lt_if_dict_fetch], -[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], - [$5], - [$6])]) - - -# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) -# -------------------------------------------------------------- -m4_define([lt_dict_filter], -[m4_if([$5], [], [], - [lt_join(m4_quote(m4_default([$4], [[, ]])), - lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), - [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl -]) diff --git a/m4/ltversion.m4 b/m4/ltversion.m4 deleted file mode 100644 index fa04b52a..00000000 --- a/m4/ltversion.m4 +++ /dev/null @@ -1,23 +0,0 @@ -# ltversion.m4 -- version numbers -*- Autoconf -*- -# -# Copyright (C) 2004, 2011-2015 Free Software Foundation, Inc. -# Written by Scott James Remnant, 2004 -# -# This file is free software; the Free Software Foundation gives -# unlimited permission to copy and/or distribute it, with or without -# modifications, as long as this notice is preserved. - -# @configure_input@ - -# serial 4179 ltversion.m4 -# This file is part of GNU Libtool - -m4_define([LT_PACKAGE_VERSION], [2.4.6]) -m4_define([LT_PACKAGE_REVISION], [2.4.6]) - -AC_DEFUN([LTVERSION_VERSION], -[macro_version='2.4.6' -macro_revision='2.4.6' -_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) -_LT_DECL(, macro_revision, 0) -]) diff --git a/m4/lt~obsolete.m4 b/m4/lt~obsolete.m4 deleted file mode 100644 index c6b26f88..00000000 --- a/m4/lt~obsolete.m4 +++ /dev/null @@ -1,99 +0,0 @@ -# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- -# -# Copyright (C) 2004-2005, 2007, 2009, 2011-2015 Free Software -# Foundation, Inc. -# Written by Scott James Remnant, 2004. -# -# This file is free software; the Free Software Foundation gives -# unlimited permission to copy and/or distribute it, with or without -# modifications, as long as this notice is preserved. - -# serial 5 lt~obsolete.m4 - -# These exist entirely to fool aclocal when bootstrapping libtool. -# -# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN), -# which have later been changed to m4_define as they aren't part of the -# exported API, or moved to Autoconf or Automake where they belong. -# -# The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN -# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us -# using a macro with the same name in our local m4/libtool.m4 it'll -# pull the old libtool.m4 in (it doesn't see our shiny new m4_define -# and doesn't know about Autoconf macros at all.) -# -# So we provide this file, which has a silly filename so it's always -# included after everything else. This provides aclocal with the -# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything -# because those macros already exist, or will be overwritten later. -# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. -# -# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. -# Yes, that means every name once taken will need to remain here until -# we give up compatibility with versions before 1.7, at which point -# we need to keep only those names which we still refer to. - -# This is to help aclocal find these macros, as it can't see m4_define. -AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) - -m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) -m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) -m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) -m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) -m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) -m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) -m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) -m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) -m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) -m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) -m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) -m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) -m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) -m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) -m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) -m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) -m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) -m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) -m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) -m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) -m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) -m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) -m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) -m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) -m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) -m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) -m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) -m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) -m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) -m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) -m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) -m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) -m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) -m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) -m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) -m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) -m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) -m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) -m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) -m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) -m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) -m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) -m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) -m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) -m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) -m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) -m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) -m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) -m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) -m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) -m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) -m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) -m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) -m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) -m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) -m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) -m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) -m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) -m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) -m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) -m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) diff --git a/m4/m4_ax_boost_base.m4 b/m4/m4_ax_boost_base.m4 deleted file mode 100644 index 2c789eae..00000000 --- a/m4/m4_ax_boost_base.m4 +++ /dev/null @@ -1,301 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_boost_base.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_BASE([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) -# -# DESCRIPTION -# -# Test for the Boost C++ libraries of a particular version (or newer) -# -# If no path to the installed boost library is given the macro searchs -# under /usr, /usr/local, /opt and /opt/local and evaluates the -# $BOOST_ROOT environment variable. Further documentation is available at -# . -# -# This macro calls: -# -# AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS) -# -# And sets: -# -# HAVE_BOOST -# -# LICENSE -# -# Copyright (c) 2008 Thomas Porschberg -# Copyright (c) 2009 Peter Adolphs -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 44 - -# example boost program (need to pass version) -m4_define([_AX_BOOST_BASE_PROGRAM], - [AC_LANG_PROGRAM([[ -#include -]],[[ -(void) ((void)sizeof(char[1 - 2*!!((BOOST_VERSION) < ($1))])); -]])]) - -AC_DEFUN([AX_BOOST_BASE], -[ -AC_ARG_WITH([boost], - [AS_HELP_STRING([--with-boost@<:@=ARG@:>@], - [use Boost library from a standard location (ARG=yes), - from the specified location (ARG=), - or disable it (ARG=no) - @<:@ARG=yes@:>@ ])], - [ - AS_CASE([$withval], - [no],[want_boost="no";_AX_BOOST_BASE_boost_path=""], - [yes],[want_boost="yes";_AX_BOOST_BASE_boost_path=""], - [want_boost="yes";_AX_BOOST_BASE_boost_path="$withval"]) - ], - [want_boost="yes"]) - - -AC_ARG_WITH([boost-libdir], - [AS_HELP_STRING([--with-boost-libdir=LIB_DIR], - [Force given directory for boost libraries. - Note that this will override library path detection, - so use this parameter only if default library detection fails - and you know exactly where your boost libraries are located.])], - [ - AS_IF([test -d "$withval"], - [_AX_BOOST_BASE_boost_lib_path="$withval"], - [AC_MSG_ERROR([--with-boost-libdir expected directory name])]) - ], - [_AX_BOOST_BASE_boost_lib_path=""]) - -BOOST_LDFLAGS="" -BOOST_CPPFLAGS="" -AS_IF([test "x$want_boost" = "xyes"], - [_AX_BOOST_BASE_RUNDETECT([$1],[$2],[$3])]) -AC_SUBST(BOOST_CPPFLAGS) -AC_SUBST(BOOST_LDFLAGS) -]) - - -# convert a version string in $2 to numeric and affect to polymorphic var $1 -AC_DEFUN([_AX_BOOST_BASE_TONUMERICVERSION],[ - AS_IF([test "x$2" = "x"],[_AX_BOOST_BASE_TONUMERICVERSION_req="1.20.0"],[_AX_BOOST_BASE_TONUMERICVERSION_req="$2"]) - _AX_BOOST_BASE_TONUMERICVERSION_req_shorten=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req : '\([[0-9]]*\.[[0-9]]*\)'` - _AX_BOOST_BASE_TONUMERICVERSION_req_major=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req : '\([[0-9]]*\)'` - AS_IF([test "x$_AX_BOOST_BASE_TONUMERICVERSION_req_major" = "x"], - [AC_MSG_ERROR([You should at least specify libboost major version])]) - _AX_BOOST_BASE_TONUMERICVERSION_req_minor=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req : '[[0-9]]*\.\([[0-9]]*\)'` - AS_IF([test "x$_AX_BOOST_BASE_TONUMERICVERSION_req_minor" = "x"], - [_AX_BOOST_BASE_TONUMERICVERSION_req_minor="0"]) - _AX_BOOST_BASE_TONUMERICVERSION_req_sub_minor=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` - AS_IF([test "X$_AX_BOOST_BASE_TONUMERICVERSION_req_sub_minor" = "X"], - [_AX_BOOST_BASE_TONUMERICVERSION_req_sub_minor="0"]) - _AX_BOOST_BASE_TONUMERICVERSION_RET=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req_major \* 100000 \+ $_AX_BOOST_BASE_TONUMERICVERSION_req_minor \* 100 \+ $_AX_BOOST_BASE_TONUMERICVERSION_req_sub_minor` - AS_VAR_SET($1,$_AX_BOOST_BASE_TONUMERICVERSION_RET) -]) - -dnl Run the detection of boost should be run only if $want_boost -AC_DEFUN([_AX_BOOST_BASE_RUNDETECT],[ - _AX_BOOST_BASE_TONUMERICVERSION(WANT_BOOST_VERSION,[$1]) - succeeded=no - - - AC_REQUIRE([AC_CANONICAL_HOST]) - dnl On 64-bit systems check for system libraries in both lib64 and lib. - dnl The former is specified by FHS, but e.g. Debian does not adhere to - dnl this (as it rises problems for generic multi-arch support). - dnl The last entry in the list is chosen by default when no libraries - dnl are found, e.g. when only header-only libraries are installed! - AS_CASE([${host_cpu}], - [x86_64],[libsubdirs="lib64 libx32 lib lib64"], - [ppc64|s390x|sparc64|aarch64|ppc64le|riscv64],[libsubdirs="lib64 lib lib64"], - [libsubdirs="lib"] - ) - - dnl allow for real multi-arch paths e.g. /usr/lib/x86_64-linux-gnu. Give - dnl them priority over the other paths since, if libs are found there, they - dnl are almost assuredly the ones desired. - AS_CASE([${host_cpu}], - [i?86],[multiarch_libsubdir="lib/i386-${host_os}"], - [multiarch_libsubdir="lib/${host_cpu}-${host_os}"] - ) - - dnl first we check the system location for boost libraries - dnl this location ist chosen if boost libraries are installed with the --layout=system option - dnl or if you install boost with RPM - AS_IF([test "x$_AX_BOOST_BASE_boost_path" != "x"],[ - AC_MSG_CHECKING([for boostlib >= $1 ($WANT_BOOST_VERSION) includes in "$_AX_BOOST_BASE_boost_path/include"]) - AS_IF([test -d "$_AX_BOOST_BASE_boost_path/include" && test -r "$_AX_BOOST_BASE_boost_path/include"],[ - AC_MSG_RESULT([yes]) - BOOST_CPPFLAGS="-I$_AX_BOOST_BASE_boost_path/include" - for _AX_BOOST_BASE_boost_path_tmp in $multiarch_libsubdir $libsubdirs; do - AC_MSG_CHECKING([for boostlib >= $1 ($WANT_BOOST_VERSION) lib path in "$_AX_BOOST_BASE_boost_path/$_AX_BOOST_BASE_boost_path_tmp"]) - AS_IF([test -d "$_AX_BOOST_BASE_boost_path/$_AX_BOOST_BASE_boost_path_tmp" && test -r "$_AX_BOOST_BASE_boost_path/$_AX_BOOST_BASE_boost_path_tmp" ],[ - AC_MSG_RESULT([yes]) - BOOST_LDFLAGS="-L$_AX_BOOST_BASE_boost_path/$_AX_BOOST_BASE_boost_path_tmp"; - break; - ], - [AC_MSG_RESULT([no])]) - done],[ - AC_MSG_RESULT([no])]) - ],[ - if test X"$cross_compiling" = Xyes; then - search_libsubdirs=$multiarch_libsubdir - else - search_libsubdirs="$multiarch_libsubdir $libsubdirs" - fi - for _AX_BOOST_BASE_boost_path_tmp in /usr /usr/local /opt /opt/local ; do - if test -d "$_AX_BOOST_BASE_boost_path_tmp/include/boost" && test -r "$_AX_BOOST_BASE_boost_path_tmp/include/boost" ; then - for libsubdir in $search_libsubdirs ; do - if ls "$_AX_BOOST_BASE_boost_path_tmp/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi - done - BOOST_LDFLAGS="-L$_AX_BOOST_BASE_boost_path_tmp/$libsubdir" - BOOST_CPPFLAGS="-I$_AX_BOOST_BASE_boost_path_tmp/include" - break; - fi - done - ]) - - dnl overwrite ld flags if we have required special directory with - dnl --with-boost-libdir parameter - AS_IF([test "x$_AX_BOOST_BASE_boost_lib_path" != "x"], - [BOOST_LDFLAGS="-L$_AX_BOOST_BASE_boost_lib_path"]) - - AC_MSG_CHECKING([for boostlib >= $1 ($WANT_BOOST_VERSION)]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_REQUIRE([AC_PROG_CXX]) - AC_LANG_PUSH(C++) - AC_COMPILE_IFELSE([_AX_BOOST_BASE_PROGRAM($WANT_BOOST_VERSION)],[ - AC_MSG_RESULT(yes) - succeeded=yes - found_system=yes - ],[ - ]) - AC_LANG_POP([C++]) - - - - dnl if we found no boost with system layout we search for boost libraries - dnl built and installed without the --layout=system option or for a staged(not installed) version - if test "x$succeeded" != "xyes" ; then - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - BOOST_CPPFLAGS= - if test -z "$_AX_BOOST_BASE_boost_lib_path" ; then - BOOST_LDFLAGS= - fi - _version=0 - if test -n "$_AX_BOOST_BASE_boost_path" ; then - if test -d "$_AX_BOOST_BASE_boost_path" && test -r "$_AX_BOOST_BASE_boost_path"; then - for i in `ls -d $_AX_BOOST_BASE_boost_path/include/boost-* 2>/dev/null`; do - _version_tmp=`echo $i | sed "s#$_AX_BOOST_BASE_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` - V_CHECK=`expr $_version_tmp \> $_version` - if test "x$V_CHECK" = "x1" ; then - _version=$_version_tmp - fi - VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` - BOOST_CPPFLAGS="-I$_AX_BOOST_BASE_boost_path/include/boost-$VERSION_UNDERSCORE" - done - dnl if nothing found search for layout used in Windows distributions - if test -z "$BOOST_CPPFLAGS"; then - if test -d "$_AX_BOOST_BASE_boost_path/boost" && test -r "$_AX_BOOST_BASE_boost_path/boost"; then - BOOST_CPPFLAGS="-I$_AX_BOOST_BASE_boost_path" - fi - fi - dnl if we found something and BOOST_LDFLAGS was unset before - dnl (because "$_AX_BOOST_BASE_boost_lib_path" = ""), set it here. - if test -n "$BOOST_CPPFLAGS" && test -z "$BOOST_LDFLAGS"; then - for libsubdir in $libsubdirs ; do - if ls "$_AX_BOOST_BASE_boost_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi - done - BOOST_LDFLAGS="-L$_AX_BOOST_BASE_boost_path/$libsubdir" - fi - fi - else - if test "x$cross_compiling" != "xyes" ; then - for _AX_BOOST_BASE_boost_path in /usr /usr/local /opt /opt/local ; do - if test -d "$_AX_BOOST_BASE_boost_path" && test -r "$_AX_BOOST_BASE_boost_path" ; then - for i in `ls -d $_AX_BOOST_BASE_boost_path/include/boost-* 2>/dev/null`; do - _version_tmp=`echo $i | sed "s#$_AX_BOOST_BASE_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` - V_CHECK=`expr $_version_tmp \> $_version` - if test "x$V_CHECK" = "x1" ; then - _version=$_version_tmp - best_path=$_AX_BOOST_BASE_boost_path - fi - done - fi - done - - VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` - BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE" - if test -z "$_AX_BOOST_BASE_boost_lib_path" ; then - for libsubdir in $libsubdirs ; do - if ls "$best_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi - done - BOOST_LDFLAGS="-L$best_path/$libsubdir" - fi - fi - - if test -n "$BOOST_ROOT" ; then - for libsubdir in $libsubdirs ; do - if ls "$BOOST_ROOT/stage/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi - done - if test -d "$BOOST_ROOT" && test -r "$BOOST_ROOT" && test -d "$BOOST_ROOT/stage/$libsubdir" && test -r "$BOOST_ROOT/stage/$libsubdir"; then - version_dir=`expr //$BOOST_ROOT : '.*/\(.*\)'` - stage_version=`echo $version_dir | sed 's/boost_//' | sed 's/_/./g'` - stage_version_shorten=`expr $stage_version : '\([[0-9]]*\.[[0-9]]*\)'` - V_CHECK=`expr $stage_version_shorten \>\= $_version` - if test "x$V_CHECK" = "x1" && test -z "$_AX_BOOST_BASE_boost_lib_path" ; then - AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT) - BOOST_CPPFLAGS="-I$BOOST_ROOT" - BOOST_LDFLAGS="-L$BOOST_ROOT/stage/$libsubdir" - fi - fi - fi - fi - - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_LANG_PUSH(C++) - AC_COMPILE_IFELSE([_AX_BOOST_BASE_PROGRAM($WANT_BOOST_VERSION)],[ - AC_MSG_RESULT(yes) - succeeded=yes - found_system=yes - ],[ - ]) - AC_LANG_POP([C++]) - fi - - if test "x$succeeded" != "xyes" ; then - if test "x$_version" = "x0" ; then - AC_MSG_NOTICE([[We could not detect the boost libraries (version $1 or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option. If you are sure you have boost installed, then check your version number looking in . See http://randspringer.de/boost for more documentation.]]) - else - AC_MSG_NOTICE([Your boost libraries seems to old (version $_version).]) - fi - # execute ACTION-IF-NOT-FOUND (if present): - ifelse([$3], , :, [$3]) - else - AC_DEFINE(HAVE_BOOST,,[define if the Boost library is available]) - # execute ACTION-IF-FOUND (if present): - ifelse([$2], , :, [$2]) - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - -]) diff --git a/m4/m4_ax_boost_filesystem.m4 b/m4/m4_ax_boost_filesystem.m4 deleted file mode 100644 index c392f9d6..00000000 --- a/m4/m4_ax_boost_filesystem.m4 +++ /dev/null @@ -1,118 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_boost_filesystem.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_FILESYSTEM -# -# DESCRIPTION -# -# Test for Filesystem library from the Boost C++ libraries. The macro -# requires a preceding call to AX_BOOST_BASE. Further documentation is -# available at . -# -# This macro calls: -# -# AC_SUBST(BOOST_FILESYSTEM_LIB) -# -# And sets: -# -# HAVE_BOOST_FILESYSTEM -# -# LICENSE -# -# Copyright (c) 2009 Thomas Porschberg -# Copyright (c) 2009 Michael Tindal -# Copyright (c) 2009 Roman Rybalko -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 27 - -AC_DEFUN([AX_BOOST_FILESYSTEM], -[ - AC_ARG_WITH([boost-filesystem], - AS_HELP_STRING([--with-boost-filesystem@<:@=special-lib@:>@], - [use the Filesystem library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-filesystem=boost_filesystem-gcc-mt ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_filesystem_lib="" - else - want_boost="yes" - ax_boost_user_filesystem_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - LIBS_SAVED=$LIBS - LIBS="$LIBS $BOOST_SYSTEM_LIB" - export LIBS - - AC_CACHE_CHECK(whether the Boost::Filesystem library is available, - ax_cv_boost_filesystem, - [AC_LANG_PUSH([C++]) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include ]], - [[using namespace boost::filesystem; - path my_path( "foo/bar/data.txt" ); - return 0;]])], - ax_cv_boost_filesystem=yes, ax_cv_boost_filesystem=no) - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_filesystem" = "xyes"; then - AC_DEFINE(HAVE_BOOST_FILESYSTEM,,[define if the Boost::Filesystem library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - if test "x$ax_boost_user_filesystem_lib" = "x"; then - for libextension in `ls -r $BOOSTLIBDIR/libboost_filesystem* 2>/dev/null | sed 's,.*/lib,,' | sed 's,\..*,,'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], - [link_filesystem="no"]) - done - if test "x$link_filesystem" != "xyes"; then - for libextension in `ls -r $BOOSTLIBDIR/boost_filesystem* 2>/dev/null | sed 's,.*/,,' | sed -e 's,\..*,,'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], - [link_filesystem="no"]) - done - fi - else - for ax_lib in $ax_boost_user_filesystem_lib boost_filesystem-$ax_boost_user_filesystem_lib; do - AC_CHECK_LIB($ax_lib, exit, - [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], - [link_filesystem="no"]) - done - - fi - if test "x$ax_lib" = "x"; then - AC_MSG_ERROR(Could not find a version of the library!) - fi - if test "x$link_filesystem" != "xyes"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - LIBS="$LIBS_SAVED" - fi -]) diff --git a/m4/m4_ax_boost_system.m4 b/m4/m4_ax_boost_system.m4 deleted file mode 100644 index 207d7be8..00000000 --- a/m4/m4_ax_boost_system.m4 +++ /dev/null @@ -1,121 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_boost_system.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_SYSTEM -# -# DESCRIPTION -# -# Test for System library from the Boost C++ libraries. The macro requires -# a preceding call to AX_BOOST_BASE. Further documentation is available at -# . -# -# This macro calls: -# -# AC_SUBST(BOOST_SYSTEM_LIB) -# -# And sets: -# -# HAVE_BOOST_SYSTEM -# -# LICENSE -# -# Copyright (c) 2008 Thomas Porschberg -# Copyright (c) 2008 Michael Tindal -# Copyright (c) 2008 Daniel Casimiro -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 19 - -AC_DEFUN([AX_BOOST_SYSTEM], -[ - AC_ARG_WITH([boost-system], - AS_HELP_STRING([--with-boost-system@<:@=special-lib@:>@], - [use the System library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-system=boost_system-gcc-mt ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_system_lib="" - else - want_boost="yes" - ax_boost_user_system_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - AC_REQUIRE([AC_CANONICAL_BUILD]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_CACHE_CHECK(whether the Boost::System library is available, - ax_cv_boost_system, - [AC_LANG_PUSH([C++]) - CXXFLAGS_SAVE=$CXXFLAGS - CXXFLAGS= - - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include ]], - [[boost::system::error_category *a = 0;]])], - ax_cv_boost_system=yes, ax_cv_boost_system=no) - CXXFLAGS=$CXXFLAGS_SAVE - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_system" = "xyes"; then - AC_SUBST(BOOST_CPPFLAGS) - - AC_DEFINE(HAVE_BOOST_SYSTEM,,[define if the Boost::System library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - - LDFLAGS_SAVE=$LDFLAGS - if test "x$ax_boost_user_system_lib" = "x"; then - for libextension in `ls -r $BOOSTLIBDIR/libboost_system* 2>/dev/null | sed 's,.*/lib,,' | sed 's,\..*,,'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], - [link_system="no"]) - done - if test "x$link_system" != "xyes"; then - for libextension in `ls -r $BOOSTLIBDIR/boost_system* 2>/dev/null | sed 's,.*/,,' | sed -e 's,\..*,,'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], - [link_system="no"]) - done - fi - - else - for ax_lib in $ax_boost_user_system_lib boost_system-$ax_boost_user_system_lib; do - AC_CHECK_LIB($ax_lib, exit, - [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], - [link_system="no"]) - done - - fi - if test "x$ax_lib" = "x"; then - AC_MSG_ERROR(Could not find a version of the library!) - fi - if test "x$link_system" = "xno"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - fi -]) diff --git a/m4/pkg.m4 b/m4/pkg.m4 deleted file mode 100644 index 62995f01..00000000 --- a/m4/pkg.m4 +++ /dev/null @@ -1,233 +0,0 @@ -# pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- -# serial 1 (pkg-config-0.24) -# -# Copyright © 2004 Scott James Remnant . -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# PKG_PROG_PKG_CONFIG([MIN-VERSION]) -# ---------------------------------- -AC_DEFUN([PKG_PROG_PKG_CONFIG], -[m4_pattern_forbid([^_?PKG_[A-Z_]+$]) -m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$]) -m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$]) -AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility]) -AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path]) -AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path]) - -if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then - AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) -fi -if test -n "$PKG_CONFIG"; then - _pkg_min_version=m4_default([$1], [0.9.0]) - AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version]) - if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - PKG_CONFIG="" - fi -fi[]dnl -])# PKG_PROG_PKG_CONFIG - -# PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) -# -# Check to see whether a particular set of modules exists. Similar -# to PKG_CHECK_MODULES(), but does not set variables or print errors. -# -# Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) -# only at the first occurence in configure.ac, so if the first place -# it's called might be skipped (such as if it is within an "if", you -# have to call PKG_CHECK_EXISTS manually -# -------------------------------------------------------------- -AC_DEFUN([PKG_CHECK_EXISTS], -[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl -if test -n "$PKG_CONFIG" && \ - AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then - m4_default([$2], [:]) -m4_ifvaln([$3], [else - $3])dnl -fi]) - -# _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) -# --------------------------------------------- -m4_define([_PKG_CONFIG], -[if test -n "$$1"; then - pkg_cv_[]$1="$$1" - elif test -n "$PKG_CONFIG"; then - PKG_CHECK_EXISTS([$3], - [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null` - test "x$?" != "x0" && pkg_failed=yes ], - [pkg_failed=yes]) - else - pkg_failed=untried -fi[]dnl -])# _PKG_CONFIG - -# _PKG_SHORT_ERRORS_SUPPORTED -# ----------------------------- -AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], -[AC_REQUIRE([PKG_PROG_PKG_CONFIG]) -if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then - _pkg_short_errors_supported=yes -else - _pkg_short_errors_supported=no -fi[]dnl -])# _PKG_SHORT_ERRORS_SUPPORTED - - -# PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], -# [ACTION-IF-NOT-FOUND]) -# -# -# Note that if there is a possibility the first call to -# PKG_CHECK_MODULES might not happen, you should be sure to include an -# explicit call to PKG_PROG_PKG_CONFIG in your configure.ac -# -# -# -------------------------------------------------------------- -AC_DEFUN([PKG_CHECK_MODULES], -[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl -AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl -AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl - -pkg_failed=no -AC_MSG_CHECKING([for $1]) - -_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) -_PKG_CONFIG([$1][_LIBS], [libs], [$2]) - -m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS -and $1[]_LIBS to avoid the need to call pkg-config. -See the pkg-config man page for more details.]) - -if test $pkg_failed = yes; then - AC_MSG_RESULT([no]) - _PKG_SHORT_ERRORS_SUPPORTED - if test $_pkg_short_errors_supported = yes; then - $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` - else - $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` - fi - # Put the nasty error message in config.log where it belongs - echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD - - m4_default([$4], [AC_MSG_ERROR( -[Package requirements ($2) were not met: - -$$1_PKG_ERRORS - -Consider adjusting the PKG_CONFIG_PATH environment variable if you -installed software in a non-standard prefix. - -_PKG_TEXT])[]dnl - ]) -elif test $pkg_failed = untried; then - AC_MSG_RESULT([no]) - m4_default([$4], [AC_MSG_FAILURE( -[The pkg-config script could not be found or is too old. Make sure it -is in your PATH or set the PKG_CONFIG environment variable to the full -path to pkg-config. - -_PKG_TEXT - -To get pkg-config, see .])[]dnl - ]) -else - $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS - $1[]_LIBS=$pkg_cv_[]$1[]_LIBS - AC_MSG_RESULT([yes]) - $3 -fi[]dnl -])# PKG_CHECK_MODULES - - -# PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], -# [ACTION-IF-NOT-FOUND]) -# --------------------------------------------------------------------- -# Checks for existence of MODULES and gathers its build flags with -# static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags -# and VARIABLE-PREFIX_LIBS from --libs. -# -# Note that if there is a possibility the first call to -# PKG_CHECK_MODULES_STATIC might not happen, you should be sure to include -# an explicit call to PKG_PROG_PKG_CONFIG in your configure.ac. -AC_DEFUN([PKG_CHECK_MODULES_STATIC], -[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl -_save_PKG_CONFIG=$PKG_CONFIG -PKG_CONFIG="$PKG_CONFIG --static" -PKG_CHECK_MODULES($@) -PKG_CONFIG=$_save_PKG_CONFIG[]dnl -]) - - -# PKG_INSTALLDIR(DIRECTORY) -# ------------------------- -# Substitutes the variable pkgconfigdir as the location where a module -# should install pkg-config .pc files. By default the directory is -# $libdir/pkgconfig, but the default can be changed by passing -# DIRECTORY. The user can override through the --with-pkgconfigdir -# parameter. -AC_DEFUN([PKG_INSTALLDIR], -[m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])]) -m4_pushdef([pkg_description], - [pkg-config installation directory @<:@]pkg_default[@:>@]) -AC_ARG_WITH([pkgconfigdir], - [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],, - [with_pkgconfigdir=]pkg_default) -AC_SUBST([pkgconfigdir], [$with_pkgconfigdir]) -m4_popdef([pkg_default]) -m4_popdef([pkg_description]) -]) dnl PKG_INSTALLDIR - - -# PKG_NOARCH_INSTALLDIR(DIRECTORY) -# ------------------------- -# Substitutes the variable noarch_pkgconfigdir as the location where a -# module should install arch-independent pkg-config .pc files. By -# default the directory is $datadir/pkgconfig, but the default can be -# changed by passing DIRECTORY. The user can override through the -# --with-noarch-pkgconfigdir parameter. -AC_DEFUN([PKG_NOARCH_INSTALLDIR], -[m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])]) -m4_pushdef([pkg_description], - [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@]) -AC_ARG_WITH([noarch-pkgconfigdir], - [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],, - [with_noarch_pkgconfigdir=]pkg_default) -AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir]) -m4_popdef([pkg_default]) -m4_popdef([pkg_description]) -]) dnl PKG_NOARCH_INSTALLDIR - - -# PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE, -# [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) -# ------------------------------------------- -# Retrieves the value of the pkg-config variable for the given module. -AC_DEFUN([PKG_CHECK_VAR], -[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl -AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl - -_PKG_CONFIG([$1], [variable="][$3]["], [$2]) -AS_VAR_COPY([$1], [pkg_cv_][$1]) - -AS_VAR_IF([$1], [""], [$5], [$4])dnl -])# PKG_CHECK_VAR diff --git a/maint/codes-net.pc.in b/maint/codes-net.pc.in deleted file mode 100644 index 9215d369..00000000 --- a/maint/codes-net.pc.in +++ /dev/null @@ -1,12 +0,0 @@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ - -Name: codes-net -Description: Network functionality for CODES storage simulation -Version: @PACKAGE_VERSION@ -URL: https://github.com/codes-org/codes -Requires: codes-base -Libs: -L${libdir} -lcodes-net -Cflags: -I${includedir} diff --git a/maint/codes.pc.in b/maint/codes.pc.in deleted file mode 100644 index 451a6c0f..00000000 --- a/maint/codes.pc.in +++ /dev/null @@ -1,29 +0,0 @@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ -ross_cflags=@ROSS_CFLAGS@ -ross_libs=@ROSS_LIBS@ -darshan_libs=@DARSHAN_LIBS@ -darshan_cflags=@DARSHAN_CFLAGS@ -dumpi_cflags=@DUMPI_CFLAGS@ -dumpi_libs=@DUMPI_LIBS@ -cortex_cflags=@CORTEX_CFLAGS@ -cortex_libs=@CORTEX_LIBS@ -python_cflags=@PYTHON_CFLAGS@ -python_libs=@PYTHON_LIBS@ -boost_cflags=@BOOST_CFLAGS@ -boost_libs=@BOOST_LIBS@ -argobots_libs=@ARGOBOTS_LIBS@ -argobots_cflags=@ARGOBOTS_CFLAGS@ -swm_libs=@SWM_LIBS@ -swm_cflags=@SWM_CFLAGS@ -swm_datarootdir=@SWM_DATAROOTDIR@ - -Name: codes-base -Description: Base functionality for CODES storage simulation -Version: @PACKAGE_VERSION@ -URL: https://github.com/codes-org/codes -Requires: -Libs: -L${libdir} -lcodes ${ross_libs} ${argobots_libs} ${swm_libs} ${darshan_libs} ${dumpi_libs} ${cortex_libs} -Cflags: -I${includedir} -I${swm_datarootdir} ${ross_cflags} ${darshan_cflags} ${swm_cflags} ${argobots_cflags} ${dumpi_cflags} ${cortex_cflags} diff --git a/prepare.sh b/prepare.sh deleted file mode 100755 index 2739136e..00000000 --- a/prepare.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh - -echo "Regenerating build files..." -autoreconf -fi -Im4 diff --git a/scripts/dragonfly-snapshots.py b/scripts/dragonfly-snapshots.py new file mode 100644 index 00000000..fe142ac2 --- /dev/null +++ b/scripts/dragonfly-snapshots.py @@ -0,0 +1,21 @@ +import numpy as np +import matplotlib.pyplot as plt + + +if __name__ == '__main__': + port_utilization = np.loadtxt("dragonfly-snapshots.csv", delimiter=',', dtype=float, skiprows=1) + + # finding all snapshot timestamps + timestamps = np.unique(port_utilization[:, 0]) + assert len(timestamps.shape) == 1 + + # Finding total utilization per snapshot + total_utilization = np.zeros_like(timestamps) + for i, ts in enumerate(timestamps): + total_utilization[i] = port_utilization[port_utilization[:, 0] == ts, 2:].sum() + + # plotting + plt.plot(timestamps, total_utilization) + plt.xlabel('snapshot time (ns)') + plt.ylabel('total buffer port occupancy') + plt.show() diff --git a/scripts/reproducibility-pads23/README.md b/scripts/reproducibility-pads23/README.md new file mode 100644 index 00000000..38751076 --- /dev/null +++ b/scripts/reproducibility-pads23/README.md @@ -0,0 +1,97 @@ +# Reproducing results of PADS23 paper + +This document contains the instructions to follow in order to compile, run the experiments +and generate the figures and table that appears on the paper: Hybrid PDES Simulation of +HPC Networks Using Zombie Packets, by Cruz-Camacho et. al 2023. + +The artifacts associated with this submission are: + +- The PDES simulator [ROSS](https://github.com/ross-org/ross) (Licensed under the + BSD-3-clause licence) +- The HPC network simulator [CODES](https://github.com/codes-org/codes) (Licensed under + the BSD-3-clause licence) + +All models included with the simulators are licensed under the same licence, namely +BSD-3-clause. + +A copy of these artifacts are available via [Zenodo](https://about.zenodo.org) with +[doi:10.5281/zenodo.7879224](https://doi.org/10.5281/zenodo.7879224). Zenodo's policies on +long-time storage and availability of the artificats can be found in: +. + +The code has been tested in two systems: a 20-core IBM Power9 processor (using 9 of its +cores), and an Intel core i7 vPro 8th Gen (a change in the number of available cores/slots +was needed in `experiments` as the processor does not have 9 available +cores). + +## Build + +To compile CODES (and ROSS), you need a CMake, and a C and C++ MPI-aware compiler. + +We have succesfully compiled CODES in a system with a XLC_r compiler (version 16.1.1) and +the Spectrum MPI (version 10.4) library, and in a x64 system with GCC (12.2.1) and Open +MPI (4.1.5). + +We assume that all commands are executed under base CODES directory: + +```bash +cd path-to-this/CODES +``` + +First compile ROSS: + +```bash +mkdir ROSS/build +pushd ROSS/build +cmake .. -DROSS_BUILD_MODELS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + -DCMAKE_INSTALL_PREFIX="$(pwd -P)/bin" -DCMAKE_CXX_COMPILER=mpicxx \ + -DCMAKE_C_COMPILER=mpicc -DCMAKE_BUILD_TYPE=Debug +make +make install +popd +``` + +Then compile CODES: + +```bash +mkdir build +pushd build +cmake .. -DCMAKE_PREFIX_PATH="$PWD/../ROSS/build/bin" \ + -DCMAKE_CXX_COMPILER=mpicxx -DCMAKE_C_COMPILER=mpicc \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_TESTING=ON -DCMAKE_INSTALL_PREFIX="$(pwd -P)/bin" +make +# make install is NOT necessary +popd +``` + +## Run and generate figures/tables + +The experiments, figure generation and table generation are contained in the script +`reproduce.sh`. The script calls the bash scripts in `experiments` which run the CODES +binary. If there is a need to change any parameter on the experiments (eg, number of +cores), these files are the place to do so. + +Python 3 is needed to generate the figures. The Python libraries: NumPy and matplotlib are +also required. (Tested on Python 3.10, NumPy 1.24.2 and Matplotlib 3.7.1.) An additional +external tool is `wc`, which is used to count the total number of lines/packets in the +simulation. (Tested on GNU `wc` versions 8.3 and 9.2.) + +To run the script simply: + +```bash +cd scripts/reproducibility-pads23/ +bash -x reproduce.sh +``` + +The total runtime for the script is dependent on machine resources. A runtime of 30 +minutes has been reported for a system running on Intel i9-12900K (16 cores, 5.20 GHz), +while for smaller systems, like Intel i7-8650U (4 cores, 4.2 GHz), the runtime has been of +around 2 to 4 hours. The experiments take up to 3 GBs of space in disk. If CODES was +compiled in a folder other than the one suggested (`build/`), you must change the variable +`CODES_BUILD_DIR` in the script. + +### Results + +The figures can be found in the directory `figures` and the table results in the file text +`results/sumarized-table.txt` diff --git a/scripts/reproducibility-pads23/experiments/conf-files/72-dragonfly-full.alloc b/scripts/reproducibility-pads23/experiments/conf-files/72-dragonfly-full.alloc new file mode 100644 index 00000000..5d1a3e8a --- /dev/null +++ b/scripts/reproducibility-pads23/experiments/conf-files/72-dragonfly-full.alloc @@ -0,0 +1,2 @@ +0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 + diff --git a/scripts/reproducibility-pads23/experiments/conf-files/terminal-dragonfly-72-surrogate-v5.conf.in b/scripts/reproducibility-pads23/experiments/conf-files/terminal-dragonfly-72-surrogate-v5.conf.in new file mode 100644 index 00000000..6ed72c9f --- /dev/null +++ b/scripts/reproducibility-pads23/experiments/conf-files/terminal-dragonfly-72-surrogate-v5.conf.in @@ -0,0 +1,73 @@ +LPGROUPS +{ + MODELNET_GRP + { + repetitions="36"; +# name of this lp changes according to the model + nw-lp="2"; +# these lp names will be the same for dragonfly-custom model + modelnet_dragonfly_dally="2"; + modelnet_dragonfly_dally_router="1"; + } +} +PARAMS +{ +# packet size in the network + packet_size="4096"; + modelnet_order=( "dragonfly_dally","dragonfly_dally_router" ); + # scheduler options + modelnet_scheduler="fcfs"; +# chunk size in the network (when chunk size = packet size, packets will not be +# divided into chunks) + chunk_size="${CHUNK_SIZE}"; +# modelnet_scheduler="round-robin"; +# number of routers in group + num_routers="4"; +# number of groups in the network + num_groups="9"; +# buffer size in bytes for local virtual channels + local_vc_size="16384"; +#buffer size in bytes for global virtual channels + global_vc_size="16384"; +#buffer size in bytes for compute node virtual channels + cn_vc_size="32768"; +#bandwidth in GiB/s for local channels + local_bandwidth="2.0"; +# bandwidth in GiB/s for global channels + global_bandwidth="2.0"; +# bandwidth in GiB/s for compute node-router channels + cn_bandwidth="2.0"; +# ROSS message size + message_size="736"; +# number of compute nodes connected to router, dictated by dragonfly config +# file + num_cns_per_router="2"; +# number of global channels per router + num_global_channels="2"; +# network config file for intra-group connections + intra-group-connections="${PATH_TO_CODES_SRC}/src/network-workloads/conf/dragonfly-dally/dfdally-72-intra"; +# network config file for inter-group connections + inter-group-connections="${PATH_TO_CODES_SRC}/src/network-workloads/conf/dragonfly-dally/dfdally-72-inter"; +# routing protocol to be used + routing="prog-adaptive"; +# folder path to store packet latency from terminal to terminal, if no value is given it won't save anything + save_packet_latency_path="${PACKET_LATENCY_PATH}"; +# router buffer occupancy snapshots + router_buffer_snapshots=( ${BUFFER_SNAPSHOTS} ); +} +SURROGATE { +# determines the director switching from surrogate to high-def simulation strategy + director_mode="at-fixed-virtual-times"; + +# director configuration for: director_mode == "at-fixed-virtual-times" +# timestamps at which to switch to surrogate-mode and back + fixed_switch_timestamps=( ${SWITCH_TIMESTAMPS} ); + +# latency predictor to use + packet_latency_predictor="average"; +# some workload models need some time to stabilize, a point where the network behaviour stabilizes. The predictor will ignore all packet latencies that arrive during this period + ignore_until="${IGNORE_UNTIL}"; + +# selecting network treatment on switching to surrogate + network_treatment_on_switch="${NETWORK_TREATMENT}"; +} diff --git a/doc/example/tutorial-ping-pong.conf b/scripts/reproducibility-pads23/experiments/conf-files/terminal-dragonfly-72-v5.conf.in similarity index 75% rename from doc/example/tutorial-ping-pong.conf rename to scripts/reproducibility-pads23/experiments/conf-files/terminal-dragonfly-72-v5.conf.in index ada5ccbd..9c833f43 100644 --- a/doc/example/tutorial-ping-pong.conf +++ b/scripts/reproducibility-pads23/experiments/conf-files/terminal-dragonfly-72-v5.conf.in @@ -19,7 +19,7 @@ PARAMS modelnet_scheduler="fcfs"; # chunk size in the network (when chunk size = packet size, packets will not be # divided into chunks) - chunk_size="4096"; + chunk_size="${CHUNK_SIZE}"; # modelnet_scheduler="round-robin"; # number of routers in group num_routers="4"; @@ -45,9 +45,13 @@ PARAMS # number of global channels per router num_global_channels="2"; # network config file for intra-group connections - intra-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally-72-intra"; + intra-group-connections="${PATH_TO_CODES_SRC}/src/network-workloads/conf/dragonfly-dally/dfdally-72-intra"; # network config file for inter-group connections - inter-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally-72-inter"; + inter-group-connections="${PATH_TO_CODES_SRC}/src/network-workloads/conf/dragonfly-dally/dfdally-72-inter"; # routing protocol to be used routing="prog-adaptive"; +# folder path to store packet latency from terminal to terminal, if no value is given it won't save anything + save_packet_latency_path="${PACKET_LATENCY_PATH}"; +# router buffer occupancy snapshots + router_buffer_snapshots=( ${BUFFER_SNAPSHOTS} ); } diff --git a/scripts/reproducibility-pads23/experiments/mpi-replay_72-node-dragonfly_synthetic1-100ms.sh b/scripts/reproducibility-pads23/experiments/mpi-replay_72-node-dragonfly_synthetic1-100ms.sh new file mode 100644 index 00000000..fb407ebd --- /dev/null +++ b/scripts/reproducibility-pads23/experiments/mpi-replay_72-node-dragonfly_synthetic1-100ms.sh @@ -0,0 +1,75 @@ +#!/usr/bin/bash -x + +np=3 + +# CONFIGURATION +# exported env variables are to be used by `envsubst` below +PATH_TO_CODES_BUILD="$1" +export PATH_TO_CODES_SRC="$2" +CONF_FILE_TEMPLATES="$3" +export CHUNK_SIZE=64 + +# configuration file for high-fidelity codes +export BUFFER_SNAPSHOTS='"1e6", "2e6", "3e6", "4e6", "5e6", "6e6", "7e6", "8e6", "9e6", "10e6", "11e6", "12e6", "13e6", "14e6", "15e6", "16e6", "17e6", "18e6", "19e6", "20e6", "21e6", "22e6", "23e6", "24e6", "25e6", "26e6", "27e6", "28e6", "29e6", "30e6", "31e6", "32e6", "33e6", "34e6", "35e6", "36e6", "37e6", "38e6", "39e6", "40e6", "41e6", "42e6", "43e6", "44e6", "45e6", "46e6", "47e6", "48e6", "49e6", "50e6", "51e6", "52e6", "53e6", "54e6", "55e6", "56e6", "57e6", "58e6", "59e6", "60e6", "61e6", "62e6", "63e6", "64e6", "65e6", "66e6", "67e6", "68e6", "69e6", "70e6", "71e6", "72e6", "73e6", "74e6", "75e6", "76e6", "77e6", "78e6", "79e6", "80e6", "81e6", "82e6", "83e6", "84e6", "85e6", "86e6", "87e6", "88e6", "89e6", "90e6", "91e6", "92e6", "93e6", "94e6", "95e6", "96e6", "97e6", "98e6", "99e6", "99.9e6"' +export PACKET_LATENCY_PATH='high-fidelity/packet-latency-trace' +cat "$CONF_FILE_TEMPLATES"/terminal-dragonfly-72-v5.conf.in | envsubst > terminal-dragonfly-72.conf + +# configuration file for hybrid-lite and hybrid codes +#export BUFFER_SNAPSHOTS='"1e6", "2e6", "3e6", "4e6", "5e6", "6e6", "7e6", "8e6", "9e6", "10e6", "11e6", "12e6", "13e6", "14e6", "15e6", "16e6", "17e6", "18e6", "19e6", "91e6", "92e6", "93e6", "94e6", "95e6", "96e6", "97e6", "98e6", "99e6", "99.9e6"' +export IGNORE_UNTIL=10e6 +export SWITCH_TIMESTAMPS='"20e6", "90e6"' +export NETWORK_TREATMENT=freeze +export PACKET_LATENCY_PATH='hybrid/packet-latency-trace' +cat "$CONF_FILE_TEMPLATES"/terminal-dragonfly-72-surrogate-v5.conf.in | envsubst > terminal-dragonfly-72-hybrid.conf + +# configuration file for hybrid-lite +export NETWORK_TREATMENT=nothing +export PACKET_LATENCY_PATH='hybrid-lite/packet-latency-trace' +cat "$CONF_FILE_TEMPLATES"/terminal-dragonfly-72-surrogate-v5.conf.in | envsubst > terminal-dragonfly-72-hybrid-lite.conf + +# yet more configuration files +cp "$CONF_FILE_TEMPLATES"/72-dragonfly-full.alloc . + +# creating dirs +mkdir -p high-fidelity hybrid hybrid-lite + +# RUNNING SIMULATION +period=480 + +# Creating custom/individual configuration files +work_alloc_file="72-dragonfly-period=${period}.synthetic.conf" +cat > "$work_alloc_file" < high-fidelity/model-result.txt 2> high-fidelity/model-result.stderr.txt + +# RUNNING CODES with SURROGATE MODEL +mpirun -np $np \ + "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay --synch=3 \ + --workload_type=online --workload_conf_file="$work_alloc_file" \ + --cons-lookahead=$lookahead --max-opt-lookahead=${lookahead%.*} --batch=4 --gvt-interval=256 \ + --alloc_file=72-dragonfly-full.alloc --end='100.001e6' \ + --extramem=$extramem --lp-io-dir=hybrid/codes-output \ + -- terminal-dragonfly-72-hybrid.conf > hybrid/model-result.txt 2> hybrid/model-result.stderr.txt + +# SAME AS BEFORE BUT NONFREEZING +mpirun -np $np \ + "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay --synch=3 \ + --workload_type=online --workload_conf_file="$work_alloc_file" \ + --cons-lookahead=$lookahead --max-opt-lookahead=${lookahead%.*} --batch=4 --gvt-interval=256 \ + --alloc_file=72-dragonfly-full.alloc --end='100.001e6' \ + --extramem=$extramem --lp-io-dir=hybrid-lite/codes-output \ + -- terminal-dragonfly-72-hybrid-lite.conf > hybrid-lite/model-result.txt 2> hybrid-lite/model-result.stderr.txt diff --git a/scripts/reproducibility-pads23/experiments/mpi-replay_72-node-dragonfly_synthetic1-10ms.sh b/scripts/reproducibility-pads23/experiments/mpi-replay_72-node-dragonfly_synthetic1-10ms.sh new file mode 100644 index 00000000..2b920be8 --- /dev/null +++ b/scripts/reproducibility-pads23/experiments/mpi-replay_72-node-dragonfly_synthetic1-10ms.sh @@ -0,0 +1,74 @@ +#!/usr/bin/bash -x + +np=3 + +# CONFIGURATION +# exported env variables are to be used by `envsubst` below +PATH_TO_CODES_BUILD="$1" +export PATH_TO_CODES_SRC="$2" +CONF_FILE_TEMPLATES="$3" +export CHUNK_SIZE=64 + +# configuration file for high-fidelity codes +export BUFFER_SNAPSHOTS='"100e3", "200e3", "300e3", "400e3", "500e3", "600e3", "700e3", "800e3", "900e3", "1e6", "1.1e6", "1.2e6", "1.3e6", "1.4e6", "1.5e6", "1.6e6", "1.7e6", "1.8e6", "1.9e6", "2e6", "2.1e6", "2.2e6", "2.3e6", "2.4e6", "2.5e6", "2.6e6", "2.7e6", "2.8e6", "2.9e6", "3e6", "3.1e6", "3.2e6", "3.3e6", "3.4e6", "3.5e6", "3.6e6", "3.7e6", "3.8e6", "3.9e6", "4e6", "4.1e6", "4.2e6", "4.3e6", "4.4e6", "4.5e6", "4.6e6", "4.7e6", "4.8e6", "4.9e6", "5e6", "5.1e6", "5.2e6", "5.3e6", "5.4e6", "5.5e6", "5.6e6", "5.7e6", "5.8e6", "5.9e6", "6e6", "6.1e6", "6.2e6", "6.3e6", "6.4e6", "6.5e6", "6.6e6", "6.7e6", "6.8e6", "6.9e6", "7e6", "7.1e6", "7.2e6", "7.3e6", "7.4e6", "7.5e6", "7.6e6", "7.7e6", "7.8e6", "7.9e6", "8e6", "8.1e6", "8.2e6", "8.3e6", "8.4e6", "8.5e6", "8.6e6", "8.7e6", "8.8e6", "8.9e6", "9e6", "9.1e6", "9.2e6", "9.3e6", "9.4e6", "9.5e6", "9.6e6", "9.7e6", "9.8e6", "9.9e6", "9.990e6"' +export PACKET_LATENCY_PATH='high-fidelity/packet-latency-trace' +cat "$CONF_FILE_TEMPLATES"/terminal-dragonfly-72-v5.conf.in | envsubst > terminal-dragonfly-72.conf + +# configuration file for hybrid-lite and hybrid codes +export IGNORE_UNTIL=2000e3 +export SWITCH_TIMESTAMPS='"3000e3", "8000e3"' +export NETWORK_TREATMENT=freeze +export PACKET_LATENCY_PATH='hybrid/packet-latency-trace' +cat "$CONF_FILE_TEMPLATES"/terminal-dragonfly-72-surrogate-v5.conf.in | envsubst > terminal-dragonfly-72-hybrid.conf + +# configuration file for hybrid-lite +export NETWORK_TREATMENT=nothing +export PACKET_LATENCY_PATH='hybrid-lite/packet-latency-trace' +cat "$CONF_FILE_TEMPLATES"/terminal-dragonfly-72-surrogate-v5.conf.in | envsubst > terminal-dragonfly-72-hybrid-lite.conf + +# yet more configuration files +cp "$CONF_FILE_TEMPLATES"/72-dragonfly-full.alloc . + +# creating dirs +mkdir -p high-fidelity hybrid hybrid-lite + +# RUNNING SIMULATION +period=480 + +# Creating custom/individual configuration files +work_alloc_file="72-dragonfly-period=${period}.synthetic.conf" +cat > "$work_alloc_file" < high-fidelity/model-result.txt 2> high-fidelity/model-result.stderr.txt + +# RUNNING CODES with SURROGATE MODEL +mpirun -np $np \ + "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay --synch=3 \ + --workload_type=online --workload_conf_file="$work_alloc_file" \ + --cons-lookahead=$lookahead --max-opt-lookahead=${lookahead%.*} --batch=4 --gvt-interval=256 \ + --alloc_file=72-dragonfly-full.alloc --end=10000.01e3 \ + --extramem=$extramem --lp-io-dir=hybrid/codes-output \ + -- terminal-dragonfly-72-hybrid.conf > hybrid/model-result.txt 2> hybrid/model-result.stderr.txt + +# SAME AS BEFORE BUT NONFREEZING +mpirun -np $np \ + "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay --synch=3 \ + --workload_type=online --workload_conf_file="$work_alloc_file" \ + --cons-lookahead=$lookahead --max-opt-lookahead=${lookahead%.*} --batch=4 --gvt-interval=256 \ + --alloc_file=72-dragonfly-full.alloc --end=10000.01e3 \ + --extramem=$extramem --lp-io-dir=hybrid-lite/codes-output \ + -- terminal-dragonfly-72-hybrid-lite.conf > hybrid-lite/model-result.txt 2> hybrid-lite/model-result.stderr.txt diff --git a/scripts/reproducibility-pads23/python-scripts/delay_in_window.py b/scripts/reproducibility-pads23/python-scripts/delay_in_window.py new file mode 100644 index 00000000..4bc19f0e --- /dev/null +++ b/scripts/reproducibility-pads23/python-scripts/delay_in_window.py @@ -0,0 +1,219 @@ +from __future__ import annotations + +import glob +import sys +import fileinput +import pathlib +import argparse +from enum import Enum +import typing as t + +import numpy as np + + +ndarray: t.TypeAlias = 'np.ndarray[t.Any, np.dtype[np.float64]]' + + +def collect_data_numpy( + path: str, + filepreffix: str | None = None, + filepostfix: str = "-gid=*.txt", + delimiter: str | None = None, + dtype: t.Any = int +) -> tuple[list[str], np.ndarray[t.Any, t.Any]]: + if filepreffix is None: + stat_files = [path] + else: + escaped_path = pathlib.Path(glob.escape(path)) + stat_files = glob.glob(str(escaped_path / f"{filepreffix}{filepostfix}")) + if not stat_files: + print(f"No valid `{filepreffix}` files have been found in path {path}", file=sys.stderr) + exit(1) + + data = np.loadtxt(fileinput.input(stat_files), delimiter=delimiter, dtype=dtype, + comments='#') + with open(stat_files[0], 'r') as f: + header = f.readline()[1:].split(',') + + return header, data + + +def mean_and_std(array: ndarray) -> tuple[float, float, float]: + return np.mean(array), np.std(array), float(array.shape[0]) # type: ignore + + +def find_mean_and_std_through_window( + delays: ndarray, + n_windows: int = 100, + start_time: float = 0.0, + end_time: float | None = None, + end_time_col: int = 9, + delay_col: int = 10, +) -> tuple[ndarray, ndarray, ndarray, ndarray]: + + if end_time is None: + end_time = delays[:, end_time_col].max() + + window_time = (end_time - start_time) / n_windows + windows = window_time * (np.arange(n_windows) + 1) + mean_and_std_through_windows = np.zeros((n_windows, 3)) + for i in range(n_windows): + delays_within_window = np.bitwise_and(i * window_time <= delays[:, end_time_col], + delays[:, end_time_col] < (i+1) * window_time) + if delays_within_window.sum() > 0: + mean_and_std_through_windows[i] = mean_and_std(delays[delays_within_window, delay_col]) + else: + mean_and_std_through_windows[i] = -1 + + # Removing all windows for which there is no data + good_res = mean_and_std_through_windows[:, 0] != -1 + windows = windows[good_res] + mean_and_std_through_windows = mean_and_std_through_windows[good_res] + + return windows, mean_and_std_through_windows[:, 0], mean_and_std_through_windows[:, 1], \ + mean_and_std_through_windows[:, 2].astype(np.int32) + + +class SrcDestRelationship(Enum): + Any = 0 + SameRouter = 1 + SameGroup = 2 + DifferentGroup = 3 + + +def break_delay_data_into( + delays: np.ndarray[t.Any, t.Any], + src_dest_rel: SrcDestRelationship, + nodes_per_router: int = 2, + nodes_per_group: int = 8 +) -> np.ndarray[t.Any, t.Any]: + if src_dest_rel == SrcDestRelationship.Any: + return delays + + elif src_dest_rel == SrcDestRelationship.DifferentGroup: + delays_out_group = (delays[:, 0] // nodes_per_group) != (delays[:, 1] // nodes_per_group) + return delays[delays_out_group] # type: ignore + + else: + + delays_same_router = \ + (delays[:, 0] // nodes_per_router) == (delays[:, 1] // nodes_per_router) + + if src_dest_rel == SrcDestRelationship.SameRouter: + return delays[delays_same_router] # type: ignore + + else: + assert src_dest_rel == SrcDestRelationship.SameGroup + + delays_same_group = np.bitwise_xor( + (delays[:, 0] // nodes_per_group) == (delays[:, 1] // nodes_per_group), + delays_same_router) + + return delays[delays_same_group] # type: ignore + + +class ProcessedPacketLatencyData(t.NamedTuple): + windows: ndarray + means: ndarray + stds: ndarray + n_samples: np.ndarray[t.Any, np.dtype[np.int32]] + header: list[str] | None = None + delays: ndarray | None = None + + +class MainGetDataLatencies(object): + def __init__(self) -> None: + parser = argparse.ArgumentParser() + source_group = parser.add_mutually_exclusive_group(required=True) + source_group.add_argument('--latencies-dir', type=pathlib.Path, + help='Folder to latencies (CSV file)') + source_group.add_argument('--latencies-file', type=pathlib.Path, + help='(CSV) File with latencies') + parser.add_argument('--windows', type=int, help='Total windows to break simulation in', + default=100) + parser.add_argument('--start', type=float, help='Total (virtual) simulation time', + required=True) + parser.add_argument('--end', type=float, help='Total (virtual) simulation time', + required=True) + # The following aims to plot different portions of the packet delay data + parser.add_argument('--src-dest-relationship', + help='Process only packets of related relationship', + choices=[rel.name for rel in SrcDestRelationship], default='Any') + parser.add_argument('--nodes-per-group', type=int, help='Assuming a 1-D dragonfly network, ' + 'this indicates the number of nodes per group (only useful with ' + '--src-dest-relationship)', default=8) + parser.add_argument('--nodes-per-router', type=int, help='Assuming a 1-D dragonfly ' + 'network, this indicates the number of nodes per router (only ' + 'useful with --src-dest-relationship)', default=2) + parser.add_argument('--use-cython', action='store_true', + help='Total (virtual) simulation time') + + self.parser = parser + self.args: argparse.Namespace | None = None + + def run( + self, + argv: list[str], + ) -> ProcessedPacketLatencyData: + self.args = args = self.parser.parse_args(argv) + + end_time = args.end + n_windows = args.windows + + dist_type = getattr(SrcDestRelationship, args.src_dest_relationship) + + if args.use_cython: + assert dist_type == SrcDestRelationship.Any + assert args.latencies_dir is not None + import pyximport; pyximport.install(language_level='3str') # noqa: E702 + from file_read_cython.read_mean_std_from_file import \ + load_mean_and_std_through_window + + windows, n_samples, samples = load_mean_and_std_through_window( + str(args.latencies_dir), args.start, args.end, num_windows=args.windows, + max_rows=100000) + means, stds = samples[:, 0], samples[:, 1] + + else: + # Columns within the csv file that matter to us + if args.latencies_dir: + header, delays = collect_data_numpy( + args.latencies_dir, 'packets-delay', delimiter=',', + dtype=np.dtype('float')) + else: + assert args.latencies_file is not None + header, delays = collect_data_numpy( + args.latencies_file, delimiter=',', dtype=np.dtype('float')) + # next_packet_delay_col = header.index('next_packet_delay') + end_time_col = header.index('end') + delay_col = header.index('latency') + + # delays = delays[delays[:, next_packet_delay_col] > 0] + delays = delays[delays[:, end_time_col] > 0] + delays = delays[delays[:, end_time_col] < end_time] + delays = break_delay_data_into( + delays, dist_type, + nodes_per_group=args.nodes_per_group, nodes_per_router=args.nodes_per_router) + + # Computing windowed mean and stds + windows, means, stds, n_samples = find_mean_and_std_through_window( + delays, n_windows=n_windows, end_time_col=end_time_col, + delay_col=delay_col, end_time=end_time) + + if 'header' in vars(): + return ProcessedPacketLatencyData(windows, means, stds, n_samples, header, delays) + else: + return ProcessedPacketLatencyData(windows, means, stds, n_samples) + + +if __name__ == '__main__': + main = MainGetDataLatencies() + main.parser.add_argument( + '--output', type=pathlib.Path, help='Directory to save aggregated stats', + required=True) + data = main.run(argv=sys.argv[1:]) + + assert main.args is not None + out_file_name = f"{main.args.output}.npz" + np.savez(out_file_name, + windows=data.windows, means=data.means, stds=data.stds, n_samples=data.n_samples) diff --git a/scripts/reproducibility-pads23/python-scripts/file_read_cython/__init__.py b/scripts/reproducibility-pads23/python-scripts/file_read_cython/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/scripts/reproducibility-pads23/python-scripts/file_read_cython/read_mean_std_from_file.pyx b/scripts/reproducibility-pads23/python-scripts/file_read_cython/read_mean_std_from_file.pyx new file mode 100644 index 00000000..d20b51e9 --- /dev/null +++ b/scripts/reproducibility-pads23/python-scripts/file_read_cython/read_mean_std_from_file.pyx @@ -0,0 +1,93 @@ +from pathlib import Path +import glob +import fileinput +import numpy as np + +from libc.math cimport floor, sqrt +cimport cython + + +@cython.boundscheck(False) # turn off bounds-checking for entire function +@cython.wraparound(False) # turn off wrapping (negative numbers) for entire function +def load_mean_and_std_through_window( + str filepath, + double start_time, + double end_time, + int num_windows = 100, + int max_rows = 10000 +): + cdef int num_lines + cdef int i + cdef int window_j + cdef double[:] windows + cdef double[:, :] packet_latency_data + cdef double[:, :] samples + cdef int[:] n_samples + cdef double window_time = (end_time - start_time) / num_windows + + samples = np.zeros((num_windows, 2), dtype=np.double) + n_samples = np.zeros((num_windows,), dtype=np.int32) + windows = np.zeros((num_windows,), dtype=np.double) + + stat_files = glob.glob(str(Path(filepath) / "packets-delay-gid=*.txt")) + + with open(stat_files[0], 'r') as f: + header = f.readline()[1:].split(',') + cdef int end_time_col = header.index('end') + cdef int delay_col = header.index('latency') + + # Finding mean of data + raw_files = fileinput.input(stat_files, mode='rb') + while True: + data_raw = np.loadtxt( + raw_files, delimiter=',', dtype=np.double, + comments='#', max_rows=max_rows) + if data_raw.size == 0 or len(data_raw.shape) != 2: + break + + packet_latency_data = data_raw + num_lines = packet_latency_data.shape[0] + + assert(num_lines != 0) + + for i in range(num_lines): + window_j = int(floor((packet_latency_data[i, end_time_col] - start_time) / window_time)) + if window_j < 0 or window_j >= num_windows: + continue + samples[window_j, 0] += packet_latency_data[i, delay_col] + n_samples[window_j] += 1 + raw_files.close() + + for i in range(num_windows): + # Computing mean + if n_samples[i] > 0: + samples[i, 0] /= n_samples[i] + windows[i] = (i+1) * window_time + + # Finding mean of data + raw_files = fileinput.input(stat_files, mode='rb') + while True: + data_raw = np.loadtxt( + raw_files, delimiter=',', dtype=np.double, + comments='#', max_rows=10000) + if data_raw.size == 0 or len(data_raw.shape) != 2: + break + + packet_latency_data = data_raw + num_lines = packet_latency_data.shape[0] + + assert(num_lines != 0) + + for i in range(num_lines): + window_j = int(floor((packet_latency_data[i, end_time_col] - start_time) / window_time)) + if window_j < 0 or window_j >= num_windows: + continue + samples[window_j, 1] += (packet_latency_data[i, delay_col] - samples[window_j, 0]) ** 2 + raw_files.close() + + for i in range(num_windows): + # Computing std + if n_samples[i] > 0: + samples[i, 1] = sqrt(samples[i, 1] / n_samples[i]) + + return np.asarray(windows), np.asarray(n_samples), np.asarray(samples) diff --git a/scripts/reproducibility-pads23/python-scripts/generate-table.py b/scripts/reproducibility-pads23/python-scripts/generate-table.py new file mode 100644 index 00000000..58a73bee --- /dev/null +++ b/scripts/reproducibility-pads23/python-scripts/generate-table.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +import argparse +import pathlib +from subprocess import check_output +from glob import glob +import csv + +import numpy as np + + +def determine_mse( + condensed: pathlib.Path, cut_off: int = 80, check_last: bool = True +) -> tuple[float, float]: + """Returns in us**2 (not ns**2)""" + data_high_fidelity = np.load(f"{condensed}/packet_latency-high-fidelity.npz") + data_hybrid = np.load(f"{condensed}/packet_latency-hybrid.npz") + data_hybrid_lite = np.load(f"{condensed}/packet_latency-hybrid-lite.npz") + + windows_hf, means_hf = data_high_fidelity['windows'], data_high_fidelity['means'] + windows_hybrid, means_hybrid = data_hybrid['windows'], data_hybrid['means'] + means_hybrid_lite = data_hybrid_lite['means'] + + assert np.all(windows_hf == windows_hybrid) + if check_last: + n_windows = windows_hf.shape[0] + means_hybrid_lite = means_hybrid_lite[:n_windows] + + n = means_hf[cut_off:].shape[0] + mse_hybrid_lite = np.sum((means_hf[cut_off:] - means_hybrid_lite[cut_off:])**2) / n + mse_hybrid = np.sum((means_hf[cut_off:] - means_hybrid[cut_off:])**2) / n + + return mse_hybrid / 1e6, mse_hybrid_lite / 1e6 + + +def get_runtimes(path: pathlib.Path) -> tuple[float, float, float]: + with open(path, newline='') as f: + reader = csv.reader(f) + csv_file = [row for row in reader] + + assert len(csv_file) == 4 + assert csv_file[0][8] == 'runtime' + return float(csv_file[1][8]), float(csv_file[2][8]), float(csv_file[3][8]) + + +def get_total_packets(latencies_dir: pathlib.Path) -> int: + out = check_output( + ['wc', '-l', '--total=always'] + glob(str(latencies_dir / "packets-delay-*")) + ).split() + assert out[-1] == b'total' + return int(out[-2]) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--folder-10ms', type=pathlib.Path, required=True, + help='Execution folder for 10 ms') + parser.add_argument('--folder-100ms', type=pathlib.Path, required=True, + help='Execution folder for 100 ms') + args = parser.parse_args() + + packets_hf = get_total_packets(args.folder_10ms / 'high-fidelity' / 'packet-latency-trace') + packets_hybrid = get_total_packets(args.folder_10ms / 'hybrid' / 'packet-latency-trace') + packets_hybrid_lite = get_total_packets( + args.folder_10ms / 'hybrid-lite' / 'packet-latency-trace') + throughput_hf = packets_hf * 1024 / 1024**3 * 100 + throughput_hybrid = packets_hybrid * 1024 / 1024**3 * 100 + throughput_hybrid_lite = packets_hybrid_lite * 1024 / 1024**3 * 100 + runtime_hf, runtime_hybrid, runtime_hybrid_lite = get_runtimes(args.folder_10ms / 'ross.csv') + throughput_hybrid_dis = (throughput_hybrid / throughput_hf - 1) * 100 + throughput_hybrid_lite_dis = (throughput_hybrid_lite / throughput_hf - 1) * 100 + + mse_hybrid, mse_hybrid_lite = determine_mse(args.folder_10ms / 'condensed') + print("10 ms Results") + print("Throughput (GB/s) high-fidelity:", throughput_hf) + print("Throughput (GB/s) hybrid:", throughput_hybrid) + print("Throughput (GB/s) hybrid-lite:", throughput_hybrid_lite) + print("Throughput (%) hybrid discrepancy:", throughput_hybrid_dis) + print("Throughput (%) hybrid-lite discrepancy:", throughput_hybrid_lite_dis) + print("Runtime (s) high-fidelity:", runtime_hf) + print("Runtime (s) hybrid:", runtime_hybrid) + print("Runtime (s) hybrid-lite:", runtime_hybrid_lite) + print("Mean squared error (MSE) for hybrid:", mse_hybrid, "ns^2") + print("Mean squared error (MSE) for hybrid-lite:", mse_hybrid_lite, "ns^2") + print() + + packets_hf = get_total_packets(args.folder_100ms / 'high-fidelity' / 'packet-latency-trace') + packets_hybrid = get_total_packets(args.folder_100ms / 'hybrid' / 'packet-latency-trace') + packets_hybrid_lite = get_total_packets( + args.folder_100ms / 'hybrid-lite' / 'packet-latency-trace') + throughput_hf = packets_hf * 1024 / 1024**3 * 10 + throughput_hybrid = packets_hybrid * 1024 / 1024**3 * 10 + throughput_hybrid_lite = packets_hybrid_lite * 1024 / 1024**3 * 10 + runtime_hf, runtime_hybrid, runtime_hybrid_lite = get_runtimes(args.folder_100ms / 'ross.csv') + throughput_hybrid_dis = (throughput_hybrid / throughput_hf - 1) * 100 + throughput_hybrid_lite_dis = (throughput_hybrid_lite / throughput_hf - 1) * 100 + + print("100 ms Results") + print("Throughput (GB/s) high-fidelity:", throughput_hf) + print("Throughput (GB/s) hybrid:", throughput_hybrid) + print("Throughput (GB/s) hybrid-lite:", throughput_hybrid_lite) + print("Throughput (%) hybrid discrepancy:", throughput_hybrid_dis) + print("Throughput (%) hybrid-lite discrepancy:", throughput_hybrid_lite_dis) + print("Runtime (s) high-fidelity:", runtime_hf) + print("Runtime (s) hybrid:", runtime_hybrid) + print("Runtime (s) hybrid-lite:", runtime_hybrid_lite) + mse_hybrid, mse_hybrid_lite = determine_mse(args.folder_100ms / 'condensed', + cut_off=90, check_last=False) + print("Mean squared error (MSE) for hybrid:", mse_hybrid, "ns^2") + print("Mean squared error (MSE) for hybrid-lite:", mse_hybrid_lite, "ns^2") diff --git a/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py b/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py new file mode 100644 index 00000000..67c41326 --- /dev/null +++ b/scripts/reproducibility-pads23/python-scripts/plot-packet-latency.py @@ -0,0 +1,221 @@ +from __future__ import annotations + +import argparse +import pathlib +import sys + +import numpy as np +import matplotlib.pyplot as plt +import matplotlib +from matplotlib.ticker import EngFormatter +from delay_in_window import MainGetDataLatencies + + +time_formatter_ns = EngFormatter() +time_formatter_ns.ENG_PREFIXES = {0: 'ns', 3: 'us', 6: 'ms', 9: 's'} + + +if __name__ == '__main__': + this_binary = sys.argv[0] + commands = { + 'plotfromraw': 'Generates a single packet-latency plot given the raw latency data', + 'plotfromzip': 'Generates a single packet-latency plot given a zipped file (NPZ).' + ' (npz file geterated by delay_in_window.py)', + 'pads23': 'Generates plot that appears on PADS23 paper' + } + parser = argparse.ArgumentParser( + usage=f'{this_binary} []\n\n' + 'The available commands are:\n' + + '\n'.join(f' {cmd}\t {desc}' for cmd, desc in commands.items())) + parser.add_argument('command', help='Subcommand to run') + main_args = parser.parse_args(sys.argv[1:2]) + + if main_args.command not in commands: + print("Unrecognized command:", main_args.command, file=sys.stderr) + exit(1) + + +if main_args.command == 'plotfromraw': + main_delay = MainGetDataLatencies() + main_delay.parser.add_argument( + '--std-factor', type=float, default=0.2, help='Size of variance to show as an std factor') + main_delay.parser.add_argument('--scatter-plot', action='store_true') + data = main_delay.run(argv=sys.argv[2:]) + + assert main_delay.args is not None + args = main_delay.args + std_factor = args.std_factor + + fig, ax = plt.subplots() + + if args.scatter_plot: + if args.use_cython: + raise Exception("To scatter-plot raw data, we must have access to raw data. " + "This is not possible when loading using Cython.") + assert data.delays is not None and data.header is not None + + end_col = data.header.index('end') + delay_col = data.header.index('latency') + ax.scatter(data.delays[:, end_col], data.delays[:, delay_col]) + else: + # plt.errorbar(windows, means, yerr=std_factor*stds) + ax.plot(data.windows, data.means) + ax.fill_between(data.windows, + data.means - std_factor*data.stds, + data.means + std_factor*data.stds, + color='#00F5') + + ax.set_xlabel('Virtual time') + ax.set_ylabel('Packet Latency' if args.scatter_plot else 'Average Packet Latency') + ax.yaxis.set_major_formatter(time_formatter_ns) + ax.xaxis.set_major_formatter(time_formatter_ns) + + plt.show() # type: ignore + + +if main_args.command == 'plotfromzip': + parser = argparse.ArgumentParser() + parser.add_argument('--latencies', type=pathlib.Path, required=True, + help='NPZ file containing packet-latency data') + parser.add_argument('--std-factor', type=float, default=0.2, + help='Size of variance to show as an std factor') + args = parser.parse_args(sys.argv[2:]) + + std_factor = args.std_factor + + data_npz = np.load(args.latencies) + windows, means, stds = data_npz['windows'], data_npz['means'], data_npz['stds'] + + fig, ax = plt.subplots() + + # plt.errorbar(windows, means, yerr=std_factor*stds) + ax.plot(windows, means) + ax.fill_between(windows, + means - std_factor*stds, + means + std_factor*stds, + color='#00F5') + + ax.set_xlabel('Virtual time') + ax.set_ylabel('Average Packet Latency') + ax.yaxis.set_major_formatter(time_formatter_ns) + ax.xaxis.set_major_formatter(time_formatter_ns) + + plt.show() # type: ignore + + +if main_args.command == 'pads23': + parser = argparse.ArgumentParser() + parser.add_argument('--latencies', type=pathlib.Path, help='Folder with condensed latencies', + required=True) + parser.add_argument('--output', type=pathlib.Path, help='Name of output figure', + default=None) + parser.add_argument('--std-factor', type=float, default=0.2, + help='Size of variance to show as an std factor') + parser.add_argument('--started-tracking', type=float, default=2e6) + parser.add_argument('--switch', type=float, default=3e6) + parser.add_argument('--switch-back', type=float, default=8e6) + parser.add_argument('--no-show-legend', dest='show_legend', action='store_false') + args = parser.parse_args(sys.argv[2:]) + + std_factor = args.std_factor + + if args.output: + matplotlib.use("pgf") + matplotlib.rcParams.update({ + "pgf.texsystem": "pdflatex", + 'font.family': 'serif', + 'font.size': 16, + 'text.usetex': True, + 'pgf.rcfonts': False, + }) + + data_high_fidelity = np.load(f"{args.latencies}/packet_latency-high-fidelity.npz") + data_hybrid = np.load(f"{args.latencies}/packet_latency-hybrid.npz") + data_hybrid_lite = np.load(f"{args.latencies}/packet_latency-hybrid-lite.npz") + + windows_hf, means_hf, stds_hf = \ + data_high_fidelity['windows'], data_high_fidelity['means'], data_high_fidelity['stds'] + windows_hybrid, means_hybrid, stds_hybrid = \ + data_hybrid['windows'], data_hybrid['means'], data_hybrid['stds'] + windows_hybrid_lite, means_hybrid_lite, stds_hybrid_lite = \ + data_hybrid_lite['windows'], data_hybrid_lite['means'], data_hybrid_lite['stds'] + + assert np.all(windows_hf == windows_hybrid) + n_windows = windows_hf.shape[0] + windows_hybrid_lite = windows_hybrid_lite[:n_windows] + means_hybrid_lite = means_hybrid_lite[:n_windows] + stds_hybrid_lite = stds_hybrid_lite[:n_windows] + assert np.all(windows_hybrid_lite == windows_hybrid) + + fig, ax = plt.subplots(figsize=(7, 3.8)) + + # plt.errorbar(windows_hf, means_hf, yerr=std_factor*stds_hf) + # plt.errorbar(windows_hybrid, means_hybrid, yerr=std_factor*stds_hybrid) + # plt.errorbar(windows_hybrid_lite, means_hybrid_lite, + # yerr=std_factor*stds_hybrid_lite) + ax.plot(windows_hf, means_hf, label='high-fidelity only') + ax.fill_between(windows_hf, + means_hf - std_factor*stds_hf, + means_hf + std_factor*stds_hf, + color='#00F5') + ax.plot(windows_hybrid_lite, means_hybrid_lite, label='hybrid-lite') + ax.fill_between(windows_hybrid_lite, + means_hybrid_lite - std_factor*stds_hybrid, + means_hybrid_lite + std_factor*stds_hybrid, + color='#F005') + ax.plot(windows_hybrid, means_hybrid, label='hybrid') + ax.fill_between(windows_hybrid, + means_hybrid - std_factor*stds_hybrid_lite, + means_hybrid + std_factor*stds_hybrid_lite, + color='#0F05') + + height_plot = ax.get_ylim()[1] + ax.vlines = ax.vlines([args.started_tracking, args.switch, args.switch_back], + -height_plot*0.04, height_plot, color='#AAA', ls='-') + ax.vlines.set_clip_on(False) + # ax.set_ylim((0.0, height_plot)) + + middle = (args.switch + args.switch_back) / 2 + arrow_color = {'arrowprops': dict(arrowstyle="->", color='#AAA'), 'color': '#333'} + ax.annotate("", xy=(args.started_tracking * .95, height_plot*.03), + xytext=(args.started_tracking * .6, height_plot*.1), **arrow_color) + ax.annotate("switch", xy=(args.switch*1.04, height_plot*.03), + xytext=(middle, height_plot*.08), **arrow_color) + ax.annotate("", xy=(args.switch_back * 0.96, height_plot*.03), + xytext=(middle, height_plot*.08), **arrow_color) + ax.text(args.started_tracking * .9, height_plot*.1, "start\ntracking", color='#333', ha='right') + + # ax.text(args.started_tracking, height_plot, "start latency tracking", color='#333', + # rotation=40, rotation_mode='anchor', horizontalalignment='left', + # verticalalignment='center') + # ax.text(args.switch, height_plot, "switch to surrogate", color='#333', rotation=40, + # rotation_mode='anchor', horizontalalignment='left', verticalalignment='center') + # ax.text(args.switch_back, 1.03 * height_plot, "switch to\nhigh-definition", color='#333', + # rotation=40, rotation_mode='anchor', horizontalalignment='left', + # verticalalignment='center') + + ax.set_xlabel('Virtual time') + ax.set_ylabel('Average Packet Latency') + # ax.set_ylim(0, 122e3) + if args.show_legend: + ax.legend(bbox_to_anchor=(.54, .02), loc='lower center', borderaxespad=0) + ax.yaxis.set_major_formatter(time_formatter_ns) + ax.xaxis.set_major_formatter(time_formatter_ns) + + # Finding when we switch back to high-fidelity from surrogate + cut_back = np.abs(windows_hf - args.switch_back).argmin() + 1 + + n = means_hf[cut_back:].shape[0] + mse_hybrid_lite = \ + np.sum((means_hf[cut_back:] - means_hybrid_lite[cut_back:])**2) / n + mse_hybrid = \ + np.sum((means_hf[cut_back:] - means_hybrid[cut_back:])**2) / n + print("Mean squared error (MSE) for hybrid:", mse_hybrid, "ns^2") + print("Mean squared error (MSE) for hybrid-lite:", mse_hybrid_lite, "ns^2") + + if args.output: + plt.tight_layout() + plt.savefig(f'{args.output}.pgf', bbox_inches='tight') + plt.savefig(f'{args.output}.pdf', bbox_inches='tight') + else: + plt.show() # type: ignore diff --git a/scripts/reproducibility-pads23/python-scripts/port-occupancy.py b/scripts/reproducibility-pads23/python-scripts/port-occupancy.py new file mode 100644 index 00000000..88a045f3 --- /dev/null +++ b/scripts/reproducibility-pads23/python-scripts/port-occupancy.py @@ -0,0 +1,199 @@ +import numpy as np +import matplotlib.pyplot as plt +import matplotlib +from matplotlib.ticker import EngFormatter + +import pathlib +import argparse +import sys + +from typing import Any +array_type = np.ndarray[Any, Any] + + +time_formatter_ns = EngFormatter() +time_formatter_ns.ENG_PREFIXES = {0: 'ns', 3: 'us', 6: 'ms', 9: 's'} +bytes_formater = EngFormatter(unit='B') + + +def load_aggregated_utilization(filename: str | pathlib.Path) -> tuple[array_type, array_type]: + port_utilization = np.loadtxt(filename, delimiter=',', dtype=float, skiprows=1) + + # finding all snapshot timestamps + timestamps = np.unique(port_utilization[:, 0]) + assert len(timestamps.shape) == 1 + + # Finding total utilization per snapshot + total_utilization = np.zeros_like(timestamps) + for i, ts in enumerate(timestamps): + total_utilization[i] = port_utilization[port_utilization[:, 0] == ts, 2:].sum() + + return timestamps, total_utilization + + +if __name__ == '__main__': + this_binary = sys.argv[0] + commands = { + 'singleplot': 'Displays port occupancy plot (needs full path of csv)', + 'multipleplot': 'Displays port occupancy plot (needs full path of csv\'s)', + 'pads23': 'Generates plot that appears on PADS23 paper', + } + parser = argparse.ArgumentParser( + usage=f'{this_binary} []\n\n' + 'The available commands are:\n' + + '\n'.join(f' {cmd}\t {desc}' for cmd, desc in commands.items())) + parser.add_argument('command', help='Subcommand to run') + main_args = parser.parse_args(sys.argv[1:2]) + + if main_args.command not in commands: + print("Unrecognized command:", main_args.command, file=sys.stderr) + exit(1) + + +if main_args.command == 'singleplot': + parser = argparse.ArgumentParser() + parser.add_argument('--csv', type=pathlib.Path, + help='Buffer occupancy CSV results', + required=True) + args = parser.parse_args(sys.argv[2:]) + + ts1, utilization_hf = load_aggregated_utilization(args.csv) + + # plotting + fig, ax = plt.subplots(figsize=(7, 3.8)) + # vlines = ax.vlines([2e6, 3e6, 8e6], -0.4e6, 7.15e6, color='#AAA', ls='-') + # vlines.set_clip_on(False) + + # arrow_color = {'arrowprops': dict(arrowstyle="->", color='#AAA'), 'color': '#333'} + # ax.annotate("", xy=(2.1e6, 0e6), xytext=(3.5e6, 1.1e6), **arrow_color) + # ax.annotate("switch", xy=(3.1e6, 0.1e6), xytext=(4.8e6, 0.5e6), **arrow_color) + # ax.annotate("", xy=(7.9e6, 0.1e6), xytext=(6.0e6, 0.5e6), **arrow_color) + # ax.text(3.5e6, 1.1e6, "start latency tracking", color='#333', ha='left') + + ax.plot(ts1, utilization_hf, label="high-fidelity", color='blue') + + ax.set_xlabel('Virtual time') + ax.set_ylabel('Total Buffer Port Occupancy') + # ax.set_ylim(-0.2e6, 6.9e6) + # ax.legend(bbox_to_anchor=(.5, .4), loc='lower center', borderaxespad=0) + ax.xaxis.set_major_formatter(time_formatter_ns) + ax.yaxis.set_major_formatter(bytes_formater) + + plt.show() + + +if main_args.command == 'multipleplot': + parser = argparse.ArgumentParser() + parser.add_argument('--csv', type=pathlib.Path, + help='Buffer occupancy CSV results (multiple csvs are possible)', + action='append', required=True) + args = parser.parse_args(sys.argv[2:]) + + # plotting + fig, ax = plt.subplots(figsize=(7, 3.8)) + + for csv in args.csv: + ts, utilization_hf = load_aggregated_utilization(csv) + ax.plot(ts, utilization_hf, label="high-fidelity") + + ax.set_xlabel('Virtual time') + ax.set_ylabel('Total Buffer Port Occupancy') + ax.xaxis.set_major_formatter(time_formatter_ns) + ax.yaxis.set_major_formatter(bytes_formater) + + plt.show() + + +if main_args.command == 'pads23': + parser = argparse.ArgumentParser() + parser.add_argument('--experiment-folder', type=pathlib.Path, + help='Folder where experiment was run', + required=True) + parser.add_argument('--output', type=pathlib.Path, help='Name of output figure', + default=False) + parser.add_argument('--started-tracking', type=float, default=2e6) + parser.add_argument('--switch', type=float, default=3e6) + parser.add_argument('--switch-back', type=float, default=8e6) + parser.add_argument('--show-switch-labels', action='store_true') + parser.add_argument('--no-show-legend', dest='show_legend', action='store_false') + args = parser.parse_args(sys.argv[2:]) + + dir_data = args.experiment_folder + # dir_data = pathlib.Path('data/synthetic1') + + if args.output: + matplotlib.use("pgf") + matplotlib.rcParams.update({ + "pgf.texsystem": "pdflatex", + 'font.family': 'serif', + 'font.size': 16, + 'text.usetex': True, + 'pgf.rcfonts': False, + }) + + ts1, utilization_hf = load_aggregated_utilization( + dir_data / "high-fidelity" / "codes-output" / "dragonfly-snapshots.csv") + ts2, utilization_hybrid = load_aggregated_utilization( + dir_data / "hybrid" / "codes-output" / "dragonfly-snapshots.csv") + ts3, utilization_hybrid_lite = load_aggregated_utilization( + dir_data / "hybrid-lite" / "codes-output" / "dragonfly-snapshots.csv") + + # Where to start and finish making the dotted line + assert np.all(np.abs(ts1 - ts2) < 1e-6) and np.all(np.abs(ts1 - ts3) < 1e-6) + cut1 = np.abs(ts1 - args.switch).argmin() + 1 # at switch + cut2 = np.abs(ts1 - args.switch_back).argmin() + + # plotting + fig, ax = plt.subplots(figsize=(7, 3.8)) + ax.plot(ts1, utilization_hf, label="high-fidelity", color='blue') + + ax.plot(ts3[:cut1], utilization_hybrid_lite[:cut1], + label="hybrid-lite", color='red') + ax.plot(ts3[cut1-1:cut2+1], utilization_hybrid_lite[cut1-1:cut2+1], + color='red', ls='--') + ax.plot(ts3[cut2:], utilization_hybrid_lite[cut2:], color='red') + + ax.plot(ts2[:cut1], utilization_hybrid[:cut1], label="hybrid", + color='green') + ax.plot(ts2[cut1-1:cut2+1], utilization_hybrid[cut1-1:cut2+1], color='green', ls='--') + ax.plot(ts2[cut2:], utilization_hybrid[cut2:], color='green') + + height_plot = ax.get_ylim()[1] + vlines = ax.vlines([args.started_tracking, args.switch, args.switch_back], + -height_plot*0.04, height_plot, color='#AAA', ls='-') + vlines.set_clip_on(False) + + middle = (args.switch + args.switch_back) / 2 + arrow_color = {'arrowprops': dict(arrowstyle="->", color='#AAA'), 'color': '#333'} + ax.annotate("", xy=(args.started_tracking * .95, 0e6), + xytext=(args.started_tracking * .6, height_plot*.3), **arrow_color) + ax.annotate("switch", xy=(args.switch*1.04, height_plot*.03), + xytext=(middle, height_plot*.08), **arrow_color) + ax.annotate("", xy=(args.switch_back * 0.96, height_plot*.03), + xytext=(middle, height_plot*.08), **arrow_color) + ax.text(args.started_tracking * .9, height_plot*.3, "start\ntracking", color='#333', ha='right') + + if args.show_switch_labels: + ax.text(args.started_tracking, height_plot, "start latency tracking", color='#333', + rotation=40, rotation_mode='anchor', horizontalalignment='left', + verticalalignment='center') + ax.text(args.switch, height_plot, "switch to surrogate", color='#333', rotation=40, + rotation_mode='anchor', horizontalalignment='left', verticalalignment='center') + ax.text(args.switch_back, height_plot, "switch to\nhigh-definition", color='#333', + rotation=40, rotation_mode='anchor', horizontalalignment='left', + verticalalignment='center') + + ax.set_xlabel('Virtual time') + ax.set_ylabel('Total Buffer Port Occupancy') + # ax.set_ylim(-0.2e6, 6.9e6) + if args.show_legend: + ax.legend(bbox_to_anchor=(.5, .4), loc='lower center', borderaxespad=0) + ax.xaxis.set_major_formatter(time_formatter_ns) + ax.yaxis.set_major_formatter(bytes_formater) + + if args.output: + plt.tight_layout() + plt.savefig(f'{args.output}.pgf', bbox_inches='tight') + plt.savefig(f'{args.output}.pdf', bbox_inches='tight') + else: + plt.show() diff --git a/scripts/reproducibility-pads23/reproduce.sh b/scripts/reproducibility-pads23/reproduce.sh new file mode 100644 index 00000000..42b5609b --- /dev/null +++ b/scripts/reproducibility-pads23/reproduce.sh @@ -0,0 +1,45 @@ +CODES_SOURCE_DIR="$PWD/../.." +CODES_BUILD_DIR="$PWD/../../build" +EXP_SCRIPTS="$PWD/experiments" + + +# Running experiments +mkdir -p results/10ms results/100ms + +pushd results/10ms +bash -x "$EXP_SCRIPTS"/mpi-replay_72-node-dragonfly_synthetic1-10ms.sh \ + "$CODES_BUILD_DIR" "$CODES_SOURCE_DIR" "$EXP_SCRIPTS/conf-files/" +popd + +pushd results/100ms +bash -x "$EXP_SCRIPTS"/mpi-replay_72-node-dragonfly_synthetic1-100ms.sh \ + "$CODES_BUILD_DIR" "$CODES_SOURCE_DIR" "$EXP_SCRIPTS/conf-files/" +popd + + +# Generating figures +mkdir results/10ms/condensed results/100ms/condensed + +for exp in {10,100}; do + for kind in {high-fidelity,hybrid,hybrid-lite}; do + python python-scripts/delay_in_window.py \ + --latencies-dir results/${exp}ms/$kind/packet-latency-trace \ + --output results/${exp}ms/condensed/packet_latency-$kind \ + --start 0.0 --end ${exp}e6 + done +done + +mkdir figures + +python python-scripts/plot-packet-latency.py pads23 \ + --latencies results/10ms/condensed \ + --output figures/packet_latency-10ms + +python python-scripts/port-occupancy.py pads23 \ + --experiment-folder results/10ms --output figures/port-occupancy-10ms + + +# Generating table +python python-scripts/generate-table.py \ + --folder-10ms results/10ms --folder-100ms results/100ms \ + > results/sumarized-table.txt diff --git a/scripts/terminal-to-terminal-latency/determine_mean_std.py b/scripts/terminal-to-terminal-latency/determine_mean_std.py new file mode 100644 index 00000000..09449143 --- /dev/null +++ b/scripts/terminal-to-terminal-latency/determine_mean_std.py @@ -0,0 +1,80 @@ +import numpy as np +import matplotlib.pyplot as plt + + +def mean_and_std(array: np.array) -> tuple[float, float]: + return np.mean(array), np.std(array) # type: ignore + + +if __name__ == '__main__': + delays = np.loadtxt("packets-delay.txt", skiprows=1, delimiter=",") + start_col = 8 + delay_col = 10 + size_col = 5 + + # Filtering data to some interval + delays = delays[delays[:, start_col] > 200e3] + # delays = delays[np.bitwise_and(delays[:, start_col] > 200e3, + # delays[:, start_col] + delays[:, delay_col] < 500e3)] + + # Distribution + delays_same_router = (delays[:, 0] // 2) == (delays[:, 1] // 2) + delays_same_group = np.bitwise_xor( + (delays[:, 0] // 8) == (delays[:, 1] // 8), + delays_same_router) + delays_out_group = (delays[:, 0] // 8) != (delays[:, 1] // 8) + + mean, std = mean_and_std(delays[:, delay_col]) + print(f"total mean: {mean:.2f} std: {std:.2f}") + print() + + delays0 = delays[delays[:, 0] == 0] + mean, std = mean_and_std(delays0[:, delay_col]) + print(f"terminal 0 mean: {mean:.2f} std: {std:.2f}") + print() + + fig, axs = plt.subplots(2, 2) + axs[0, 0].set_title("Latency from all terminals to all") + # axs[0, 0].set_xlabel("latency") + axs[0, 0].hist(delays[:, delay_col], bins=50, density=True, alpha=0.6, color='b') + axs[0, 1].set_title("Latency to terminals in same router") + # axs[0, 1].set_xlabel("latency") + axs[0, 1].hist(delays[delays_same_router, delay_col], bins=50, density=True, alpha=0.6, color='b') + axs[1, 0].set_title("Latency to terminals in same group") + axs[1, 0].set_xlabel("latency") + axs[1, 0].hist(delays[delays_same_group, delay_col], bins=50, density=True, alpha=0.6, color='b') + axs[1, 1].set_title("Latency to terminals in other groups") + axs[1, 1].set_xlabel("latency") + axs[1, 1].hist(delays[delays_out_group, delay_col], bins=50, density=True, alpha=0.6, color='b') + plt.show() + + delays01 = delays0[delays0[:, 1] == 1] + delays056 = delays0[delays0[:, 1] == 56] + plt.scatter(delays01[:, size_col], delays01[:, delay_col]) + plt.title("Packet size vs terminal to terminal delay. Terminal 0 to terminal 1") + plt.xlabel("Packet size") + plt.ylabel("Latency") + plt.show() + plt.scatter(delays056[:, size_col], delays056[:, delay_col]) + plt.title("Packet size vs terminal to terminal delay. Terminal 0 to terminal 56") + plt.xlabel("Packet size") + plt.ylabel("Latency") + plt.show() + + buckets = [delays0[delays0[:, 1] == i] for i in range(1, 72)] + buckets_processed = np.array([mean_and_std(b[:, delay_col]) for b in buckets]) + print("Destination, Means and stds for terminal 0") + for i, (mean, std) in enumerate(buckets_processed): + print(f"{i+1}, {mean:.2f}, {std:.2f}") + print() + + mean, std = mean_and_std(delays[delays_same_router, delay_col]) + print(f"same router mean: {mean:.2f} std: {std:.2f}") + print() + + mean, std = mean_and_std(delays[delays_same_group, delay_col]) + print(f"same group mean: {mean:.2f} std: {std:.2f} (excluding same router)") + print() + + mean, std = mean_and_std(delays[delays_out_group, delay_col]) + print(f"other groups mean: {mean:.2f} std: {std:.2f}") diff --git a/scripts/terminal-to-terminal-latency/sort-delays.py b/scripts/terminal-to-terminal-latency/sort-delays.py new file mode 100644 index 00000000..a75d6358 --- /dev/null +++ b/scripts/terminal-to-terminal-latency/sort-delays.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +import numpy as np +import glob +import fileinput +import sys +import pathlib + +from typing import Any + + +def collect_data_numpy( + path: pathlib.Path | str, + filepreffix: str, + delimiter: str | None = None, + dtype: Any = int +) -> np.ndarray[Any, Any]: + escaped_path = pathlib.Path(glob.escape(path)) # type: ignore + stat_files = glob.glob(str(escaped_path / f"{filepreffix}-gid=*.txt")) + if not stat_files: + print(f"No valid `{filepreffix}` files have been found in path {path}", file=sys.stderr) + exit(1) + + return np.loadtxt(fileinput.input(stat_files), delimiter=delimiter, dtype=dtype, + comments='#') + + +if __name__ == '__main__': + delays = collect_data_numpy('.', 'packets-delay', delimiter=',', + dtype=np.dtype('float')) + # sorting by source terminal and packet id + sorted_indx = np.lexsort((delays[:, 2], delays[:, 0])) + delays = delays[sorted_indx] + + # saving some columns + np.savetxt( + "packets-delay.csv", delays[:, (0, 1, 2, 3, 4, 5, 6, 7, 8, 10)], + fmt="%d,%d,%d,%d,%d,%d,%f,%f,%f,%f", + header='src_terminal,dst_terminal,packet_id,is_surrogate_on,is_predicted,' + 'packet_size,injection_time,delay_at_queue_head,start_time,delay', + comments='') diff --git a/scripts/workload-iteration-times/README.txt b/scripts/workload-iteration-times/README.txt new file mode 100644 index 00000000..bc2d5a16 --- /dev/null +++ b/scripts/workload-iteration-times/README.txt @@ -0,0 +1,6 @@ +To reproduce figures: + +```bash +python print-iterations.py /home/helq/Research/HPC/code/kronos/2024-feb-22/experiments/union/milc-jacobi/results/exp-007/iteration-logs --output figures/milc-jacobi-hf --legends Jacobi MILC +python print-iterations.py /home/helq/Research/HPC/code/kronos/2024-feb-22/experiments/union/milc-jacobi/results/exp-003/iteration-logs --output figures/milc-jacobi-surrogate --legends Jacobi MILC +``` diff --git a/scripts/workload-iteration-times/print-iterations.py b/scripts/workload-iteration-times/print-iterations.py new file mode 100644 index 00000000..efb637e5 --- /dev/null +++ b/scripts/workload-iteration-times/print-iterations.py @@ -0,0 +1,201 @@ +# Adapted from example from matplotlib lib + +from typing import Any, TextIO +import argparse +import pathlib +import colorsys + +import matplotlib.pyplot as plt +import matplotlib +from matplotlib.patches import Rectangle +from matplotlib.lines import Line2D +import numpy as np +import matplotlib.colors as mc + + +def adjust_lightness(color: str | tuple[float, float, float], amount: float = 0.5): + """ + Taken from: https://stackoverflow.com/a/49601444 + Smaller than 1 amounts darkness, larger than 1 lightens + Examples: + >> adjust_lightness('g', 1.3) + >> adjust_lightness('#F034A3', 0.6) + >> adjust_lightness((.3,.55,.1), 1.5) + """ + try: + c = mc.cnames[color] # type: ignore[reportArgumentType] + except: + c = color + c = colorsys.rgb_to_hls(*mc.to_rgb(c)) + return colorsys.hls_to_rgb(c[0], max(0, min(1, amount * c[1])), c[2]) + + +def plot_sequence( + ax: Any, + seq: Any, + names: Any, + height: Any, + color: str = 'red', + print_names: bool = True +): + box = Rectangle((0, 0), seq[0], height[0], color=adjust_lightness(color, 1.7)) + ax.add_patch(box) + for start, end, heit in zip(seq, height[1:], height[1:]): + box = Rectangle((start, 0), end, heit, color=adjust_lightness(color, 1.7)) + ax.add_patch(box) + + ax.vlines(seq, 0, height, color=adjust_lightness(color, 1.3)) + + non_zero_height = height != 0 + cleaned_seq = seq[non_zero_height] + cleaned_height = height[non_zero_height] + ax.scatter(cleaned_seq, cleaned_height, marker='.', color=color) + # ax.plot(seq, np.zeros_like(seq), "-o", color="k", markerfacecolor="w") + + # annotate lines + if print_names: + cleaned_names = names[non_zero_height] + for d, h, r in zip(cleaned_seq, cleaned_height, cleaned_names): + ax.annotate(r, xy=(d, h), + xytext=(3, np.sign(h)*3), textcoords="offset points", + horizontalalignment="right", + verticalalignment="bottom" if h > 0 else "top") + + +# hardcoded data +def iterations_count_example(): + iterations = np.array([5700202, 11141148, 16735521, 22248304, 28018657, 33344653, 39131394, 44535575, 49924184, 55265978, 60797003, 65999354, 71477966, 77089252, 82388323, 87510575, 92672984, 97968684, 103413575, 108791049, 114191370, 119281369, 124947369, 130269516, 135814413, 140706572, 146191543, 152244928, 157549505, 163252774]) + names = np.arange(iterations.size) + # height = np.ones_like(names) + height = iterations.astype(np.float64) + height[1:] -= iterations[:-1] + # mean_height = height.mean() + # height /= mean_height + + iterations2 = np.array([4475938, 8527507, 12500772, 16932824, 21122232, 24629352, 28727112, 32812390, 37119760, 40873748, 44831210, 49236742, 53495581, 57186915, 61102874, 65089296, 69034116, 72827668, 77306215, 81505333, 84962239, 88817963, 92788913, 97258245, 101298185, 105234798, 109230081, 113176951, 117033360, 120922482, 125158680, 129445759, 132927795, 136967719, 140707240, 144980904, 148570317, 152949619, 157429076, 161858572, 165599534, 169169124, 172576205, 176267989, 179822127, 183531146, 187147511, 190685445, 194270774, 197863388, 201349592, 204959427, 208557228, 212286717, 215720477, 219201662, 222629090, 226452092, 230156036, 233856397, 237545455, 241265332, 245016561, 248662995, 252212229, 255620388, 259105490, 262543988, 266118703, 269713894, 273230378, 276923706, 280425248, 284046990, 287508037, 291266834, 294812966, 298512239, 302113836, 305636975, 309307151, 312842662, 316463094, 320055020, 323542940, 327139573, 330811189, 334388299, 337788549, 341498322, 345104703, 348880050, 352448690, 356106442, 359506153, 363094952, 366703208, 370233755, 373770752, 377222496]) + names2 = np.arange(iterations2.size) + # height2 = -1 * np.ones_like(names2) + height2 = iterations2.astype(np.float64) + height2[1:] -= iterations2[:-1] + # height2 /= mean_height + height2 *= -1 + + return (iterations, names, height), (iterations2, names2, height2) + + +# class JobAvgIterations(TypedDict): +# iterations: + + +# typing cannot be done for structured arrays :S +def parse_iteration_log(log_file: TextIO): + log_pattern = r'ITERATION (\d+) node \d+ job (\d+) rank \d+ time (\d*\.?\d+)\n' + log_iters = np.fromregex(log_file, log_pattern, [('iter', np.int64), ('job', np.int64), ('time', np.float64)]) + + def get_avg_for_iters(job: np.int64): + def avg(it: np.int64) -> np.float64: + matched_iters = log_iters[np.bitwise_and(log_iters['job'] == job, log_iters['iter'] == it)] + return np.mean(matched_iters['time'].astype(np.float64)) + return avg + + jobs: dict[int, np.ndarray[Any, Any]] = {} + for job in np.unique(log_iters['job']): + iterations = np.unique(log_iters[log_iters['job'] == job]['iter']) + # avg_timestamp = np.vectorize(get_avg_for_iters(job), otypes=(np.float64,))(iterations) + avg_timestamp = np.array([get_avg_for_iters(job)(it) for it in iterations]) + assert(iterations.size == avg_timestamp.size) + + # finding time that each iteration took + avg_iter_time = avg_timestamp.copy() + avg_iter_time[1:] -= avg_timestamp[:-1] + # "removing" iterations for which we don't know how much they actually took + to_rem = iterations.copy() + to_rem[1:] -= to_rem[:-1] + 1 + to_rem[0] = 0 # Assuming the first value hasn't been skipped + avg_iter_time[to_rem != 0] = 0 + + combined = np.zeros_like(iterations, dtype=[('iter', np.int64), ('time', np.float64), ('iter_time', np.float64)]) + combined['iter'] = iterations + combined['time'] = avg_timestamp + combined['iter_time'] = avg_iter_time + jobs[int(job)] = combined + + return jobs + + +# if __name__ == "__main__": +# (iterations, names, height), (iterations2, names2, height2) = iterations_count_example() +# fig, ax = plt.subplots(figsize=(8.8, 4), layout="constrained") +# plot_sequence(ax, iterations, names, height, 'blue') +# plot_sequence(ax, iterations2, names2, height2, 'red') +# plt.setp(ax.get_xticklabels(), rotation=30, ha="right") +# plt.show() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + _ = parser.add_argument('file', type=argparse.FileType('r')) + _ = parser.add_argument('--output', type=pathlib.Path, help='Name of output figure', default=None) + _ = parser.add_argument('--iter-count', dest='iter_count', action='store_true') + _ = parser.add_argument('--legends', nargs='+', help='Application names', required=False) + _ = parser.add_argument('--no-show-plot', dest='show_plot', action='store_false') + args = parser.parse_args() + + if args.output: + matplotlib.use("pgf") + matplotlib.rcParams.update({ + "pgf.texsystem": "pdflatex", + 'font.family': 'serif', + 'font.size': 16, + 'text.usetex': True, + 'pgf.rcfonts': False, + }) + + parsed_logs = parse_iteration_log(args.file) + + final_timestamp = float(max(job['time'].max() for job in parsed_logs.values())) + print("Simulation end =", final_timestamp) + + if not args.show_plot: + exit(0) + + # Creating plot with data + fig, ax = plt.subplots(figsize=(6, 3), layout="constrained") + ax.set_xlabel("Total virtual time (ns)") + ax.set_ylabel("Virtual time \nper iteration (ns)") + #ax.set(title="") + largest_timestamp = max(v['time'].max() for v in parsed_logs.values()) + ax.plot([0, largest_timestamp], [0, 0], "-", color="k", markerfacecolor="w") + + color_table = ['tab:red', 'tab:blue', 'tab:green', 'tab:black'] + for i, job in enumerate(parsed_logs.keys()): + # Flipping second sequence if there are only two jobs + # mul = -1 if len(parsed_logs) == 2 and i == 1 else 1 + mul = 1 + plot_sequence( + ax, + parsed_logs[job]['time'], + parsed_logs[job]['iter'], + mul * parsed_logs[job]['iter_time'], + color=color_table[i], + print_names=args.iter_count) + + plt.setp(ax.get_xticklabels(), rotation=30, ha="right") + + if args.legends: + custom_lines = [] + legends = [] + for legend, color in zip(args.legends, color_table): + # Finding legend for application with ID i + legend: str + legends.append(legend) + custom_lines.append(Line2D([0], [0], color=color)) + ax.legend(custom_lines, legends) + + #ax.margins(y=0.1) + if args.output: + plt.tight_layout() + plt.savefig(f'{args.output}.pgf', bbox_inches='tight') + plt.savefig(f'{args.output}.pdf', bbox_inches='tight') + else: + plt.show() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d7d34112..08950e7f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -55,6 +55,14 @@ list(APPEND SRCS util/rc-stack.c util/congestion-controller.C + surrogate/init.c + surrogate/application-surrogate.c + surrogate/network-surrogate.c + surrogate/app-iteration-predictor/common.c + surrogate/app-iteration-predictor/average.c + surrogate/packet-latency-predictor/common.c + surrogate/packet-latency-predictor/average.c + iokernellang/codesparser.h iokernellang/codesparser.c iokernellang/codeslexer.h @@ -89,9 +97,17 @@ if(USE_DUMPI) endif() if(USE_ONLINE) - list(APPEND SRCS workload/methods/codes-online-comm-wrkld.C) - list(APPEND LIBS_TO_LINK PkgConfig::SWM) - list(APPEND LIBS_TO_LINK PkgConfig::ARGOBOTS) + if(USE_SWM) + list(APPEND SRCS workload/methods/codes-online-comm-wrkld.C) + list(APPEND LIBS_TO_LINK PkgConfig::SWM) + list(APPEND LIBS_TO_LINK PkgConfig::ARGOBOTS) + endif() + if(USE_UNION) + list(APPEND SRCS workload/methods/codes-conc-online-comm-wrkld.C) + list(APPEND LIBS_TO_LINK PkgConfig::SWM) + list(APPEND LIBS_TO_LINK PkgConfig::UNION) + list(APPEND LIBS_TO_LINK PkgConfig::ARGOBOTS) + endif() endif() @@ -103,6 +119,11 @@ endif() # list(APPEND SRCS workload/methods/codes-darshan3-io-wrkld.c) # endif() +if(USE_TORCH) + list(APPEND SRCS surrogate/packet-latency-predictor/torch-jit.C) + list(APPEND LIBS_TO_LINK ${TORCH_LIBRARIES}) +endif() + add_library(codes STATIC ${SRCS}) list(APPEND LIBS_TO_LINK ${MPI_C_LIBRARIES}) @@ -121,12 +142,20 @@ if(USE_DUMPI) target_include_directories(codes PUBLIC ${DUMPI_INCLUDE}) endif() -#LINK ARGOBOTS and SWM ONLINE +#LINK ARGOBOTS, SWM and UNION # target_link_libraries(codes PUBLIC PkgConfig::ARGOBOTS) if(USE_ONLINE) - target_include_directories(codes PUBLIC ${ARGOBOTS_INCLUDE_DIRS}) - # target_link_libraries(codes PUBLIC PkgConfig::SWM) - target_include_directories(codes PUBLIC ${SWM_INCLUDE_DIRS}) + if(USE_SWM) + target_include_directories(codes PUBLIC ${ARGOBOTS_INCLUDE_DIRS}) + # target_link_libraries(codes PUBLIC PkgConfig::SWM) + target_include_directories(codes PUBLIC ${SWM_INCLUDE_DIRS}) + endif() + if(USE_UNION) + target_include_directories(codes PUBLIC ${ARGOBOTS_INCLUDE_DIRS}) + # target_link_libraries(codes PUBLIC PkgConfig::SWM) + target_include_directories(codes PUBLIC ${SWM_INCLUDE_DIRS}) + target_include_directories(codes PUBLIC ${UNION_INCLUDE_DIRS}) + endif() endif() #LINK ROSS @@ -184,7 +213,7 @@ endforeach() # configure_file(modelconfig/configparser.c ${CMAKE_CURRENT_BINARY_DIR}/modelconfig/configparser.c COPYONLY) # configure_file(modelconfig/configparser.h ${CMAKE_CURRENT_BINARY_DIR}/modelconfig/configparser.h COPYONLY) -install(DIRECTORY "${CMAKE_SOURCE_DIR}/codes" DESTINATION "${CMAKE_BINARY_DIR}") +install(DIRECTORY "${CMAKE_SOURCE_DIR}/codes" DESTINATION include) install(TARGETS ${CODES_TARGETS} DESTINATION bin) diff --git a/src/Makefile.subdir b/src/Makefile.subdir index 9342c919..5721f7df 100644 --- a/src/Makefile.subdir +++ b/src/Makefile.subdir @@ -83,6 +83,7 @@ nobase_include_HEADERS = \ codes/resource-lp.h \ codes/local-storage-model.h \ codes/rc-stack.h \ + codes/surrogate.h \ codes/codes-jobmap.h \ codes/codes-callback.h \ codes/codes-mapping-context.h \ @@ -161,6 +162,7 @@ src_libcodes_la_SOURCES = \ src/workload/methods/codes-iomock-wrkld.c \ codes/rc-stack.h \ src/util/rc-stack.c \ + src/util/surrogate.c \ src/networks/model-net/network-managers/dragonfly-network-manager.C \ src/networks/model-net/core/model-net.c \ src/networks/model-net/common-net.c \ diff --git a/src/iokernellang/codeslexer.c b/src/iokernellang/codeslexer.c index 96f594a7..c4abdb8d 100644 --- a/src/iokernellang/codeslexer.c +++ b/src/iokernellang/codeslexer.c @@ -9,7 +9,7 @@ #define FLEX_SCANNER #define YY_FLEX_MAJOR_VERSION 2 #define YY_FLEX_MINOR_VERSION 5 -#define YY_FLEX_SUBMINOR_VERSION 35 +#define YY_FLEX_SUBMINOR_VERSION 37 #if YY_FLEX_SUBMINOR_VERSION > 0 #define FLEX_BETA #endif @@ -54,7 +54,6 @@ typedef int flex_int32_t; typedef unsigned char flex_uint8_t; typedef unsigned short int flex_uint16_t; typedef unsigned int flex_uint32_t; -#endif /* ! C99 */ /* Limits of integral types. */ #ifndef INT8_MIN @@ -85,6 +84,8 @@ typedef unsigned int flex_uint32_t; #define UINT32_MAX (4294967295U) #endif +#endif /* ! C99 */ + #endif /* ! FLEXINT_H */ #ifdef __cplusplus @@ -170,6 +171,11 @@ typedef void* yyscan_t; typedef struct yy_buffer_state *YY_BUFFER_STATE; #endif +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + #define EOB_ACT_CONTINUE_SCAN 0 #define EOB_ACT_END_OF_FILE 1 #define EOB_ACT_LAST_MATCH 2 @@ -205,11 +211,6 @@ typedef struct yy_buffer_state *YY_BUFFER_STATE; #define unput(c) yyunput( c, yyg->yytext_ptr , yyscanner ) -#ifndef YY_TYPEDEF_YY_SIZE_T -#define YY_TYPEDEF_YY_SIZE_T -typedef size_t yy_size_t; -#endif - #ifndef YY_STRUCT_YY_BUFFER_STATE #define YY_STRUCT_YY_BUFFER_STATE struct yy_buffer_state @@ -227,7 +228,7 @@ struct yy_buffer_state /* Number of characters read into yy_ch_buf, not including EOB * characters. */ - int yy_n_chars; + yy_size_t yy_n_chars; /* Whether we "own" the buffer - i.e., we know we created it, * and can realloc() it to grow it, and should free() it to @@ -306,7 +307,7 @@ static void CodesIOKernel__init_buffer (YY_BUFFER_STATE b,FILE *file ,yyscan_t y YY_BUFFER_STATE CodesIOKernel__scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner ); YY_BUFFER_STATE CodesIOKernel__scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); -YY_BUFFER_STATE CodesIOKernel__scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner ); +YY_BUFFER_STATE CodesIOKernel__scan_bytes (yyconst char *bytes,yy_size_t len ,yyscan_t yyscanner ); void *CodesIOKernel_alloc (yy_size_t ,yyscan_t yyscanner ); void *CodesIOKernel_realloc (void *,yy_size_t ,yyscan_t yyscanner ); @@ -338,7 +339,7 @@ void CodesIOKernel_free (void * ,yyscan_t yyscanner ); /* Begin user sect3 */ -#define CodesIOKernel_wrap(n) 1 +#define CodesIOKernel_wrap(yyscanner) 1 #define YY_SKIP_YYWRAP typedef unsigned char YY_CHAR; @@ -559,7 +560,7 @@ static yyconst flex_int32_t yy_rule_can_match_eol[37] = #define YY_USER_ACTION /* no user action */; #endif -#line 563 "../src/iokernellang/codeslexer.c" +#line 564 "../src/iokernellang/codeslexer.c" #define INITIAL 0 @@ -588,8 +589,8 @@ struct yyguts_t size_t yy_buffer_stack_max; /**< capacity of stack. */ YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */ char yy_hold_char; - int yy_n_chars; - int yyleng_r; + yy_size_t yy_n_chars; + yy_size_t yyleng_r; char *yy_c_buf_p; int yy_init; int yy_start; @@ -646,7 +647,7 @@ FILE *CodesIOKernel_get_out (yyscan_t yyscanner ); void CodesIOKernel_set_out (FILE * out_str ,yyscan_t yyscanner ); -int CodesIOKernel_get_leng (yyscan_t yyscanner ); +yy_size_t CodesIOKernel_get_leng (yyscan_t yyscanner ); char *CodesIOKernel_get_text (yyscan_t yyscanner ); @@ -654,6 +655,10 @@ int CodesIOKernel_get_lineno (yyscan_t yyscanner ); void CodesIOKernel_set_lineno (int line_number ,yyscan_t yyscanner ); +int CodesIOKernel_get_column (yyscan_t yyscanner ); + +void CodesIOKernel_set_column (int column_no ,yyscan_t yyscanner ); + YYSTYPE * CodesIOKernel_get_lval (yyscan_t yyscanner ); void CodesIOKernel_set_lval (YYSTYPE * yylval_param ,yyscan_t yyscanner ); @@ -713,7 +718,7 @@ static int input (yyscan_t yyscanner ); if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ { \ int c = '*'; \ - unsigned n; \ + size_t n; \ for ( n = 0; n < max_size && \ (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ buf[n] = (char) c; \ @@ -801,7 +806,7 @@ YY_DECL #line 43 "../src/iokernellang/codeslexer.l" -#line 805 "../src/iokernellang/codeslexer.c" +#line 810 "../src/iokernellang/codeslexer.c" yylval = yylval_param; @@ -879,7 +884,7 @@ YY_DECL if ( yy_act != YY_END_OF_BUFFER && yy_rule_can_match_eol[yy_act] ) { - int yyl; + yy_size_t yyl; for ( yyl = 0; yyl < yyleng; ++yyl ) if ( yytext[yyl] == '\n' ) @@ -1112,7 +1117,7 @@ YY_RULE_SETUP #line 117 "../src/iokernellang/codeslexer.l" ECHO; YY_BREAK -#line 1116 "../src/iokernellang/codeslexer.c" +#line 1121 "../src/iokernellang/codeslexer.c" case YY_STATE_EOF(INITIAL): yyterminate(); @@ -1299,21 +1304,21 @@ static int yy_get_next_buffer (yyscan_t yyscanner) else { - int num_to_read = + yy_size_t num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; while ( num_to_read <= 0 ) { /* Not enough room in the buffer - grow it. */ /* just a shorter name for the current buffer */ - YY_BUFFER_STATE b = YY_CURRENT_BUFFER; + YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE; int yy_c_buf_p_offset = (int) (yyg->yy_c_buf_p - b->yy_ch_buf); if ( b->yy_is_our_buffer ) { - int new_size = b->yy_buf_size * 2; + yy_size_t new_size = b->yy_buf_size * 2; if ( new_size <= 0 ) b->yy_buf_size += b->yy_buf_size / 8; @@ -1344,7 +1349,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner) /* Read in more data. */ YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), - yyg->yy_n_chars, (size_t) num_to_read ); + yyg->yy_n_chars, num_to_read ); YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; } @@ -1441,6 +1446,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner) yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; yy_is_jam = (yy_current_state == 133); + (void)yyg; return yy_is_jam ? 0 : yy_current_state; } @@ -1469,7 +1475,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner) else { /* need more input */ - int offset = yyg->yy_c_buf_p - yyg->yytext_ptr; + yy_size_t offset = yyg->yy_c_buf_p - yyg->yytext_ptr; ++yyg->yy_c_buf_p; switch ( yy_get_next_buffer( yyscanner ) ) @@ -1640,10 +1646,6 @@ static void CodesIOKernel__load_buffer_state (yyscan_t yyscanner) CodesIOKernel_free((void *) b ,yyscanner ); } -#ifndef __cplusplus -extern int isatty (int ); -#endif /* __cplusplus */ - /* Initializes or reinitializes a buffer. * This function is sometimes called more than once on the same buffer, * such as during a CodesIOKernel_restart() or at EOF. @@ -1760,7 +1762,7 @@ void CodesIOKernel_pop_buffer_state (yyscan_t yyscanner) */ static void CodesIOKernel_ensure_buffer_stack (yyscan_t yyscanner) { - int num_to_alloc; + yy_size_t num_to_alloc; struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; if (!yyg->yy_buffer_stack) { @@ -1853,17 +1855,17 @@ YY_BUFFER_STATE CodesIOKernel__scan_string (yyconst char * yystr , yyscan_t yysc /** Setup the input buffer state to scan the given bytes. The next call to CodesIOKernel_lex() will * scan from a @e copy of @a bytes. - * @param bytes the byte buffer to scan - * @param len the number of bytes in the buffer pointed to by @a bytes. + * @param yybytes the byte buffer to scan + * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. * @param yyscanner The scanner object. * @return the newly allocated buffer state object. */ -YY_BUFFER_STATE CodesIOKernel__scan_bytes (yyconst char * yybytes, int _yybytes_len , yyscan_t yyscanner) +YY_BUFFER_STATE CodesIOKernel__scan_bytes (yyconst char * yybytes, yy_size_t _yybytes_len , yyscan_t yyscanner) { YY_BUFFER_STATE b; char *buf; yy_size_t n; - int i; + yy_size_t i; /* Get memory for full buffer, including space for trailing EOB's. */ n = _yybytes_len + 2; @@ -1973,7 +1975,7 @@ FILE *CodesIOKernel_get_out (yyscan_t yyscanner) /** Get the length of the current token. * @param yyscanner The scanner object. */ -int CodesIOKernel_get_leng (yyscan_t yyscanner) +yy_size_t CodesIOKernel_get_leng (yyscan_t yyscanner) { struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; return yyleng; @@ -2009,7 +2011,7 @@ void CodesIOKernel_set_lineno (int line_number , yyscan_t yyscanner) /* lineno is only valid if an input buffer exists. */ if (! YY_CURRENT_BUFFER ) - yy_fatal_error( "CodesIOKernel_set_lineno called with no buffer" , yyscanner); + YY_FATAL_ERROR( "CodesIOKernel_set_lineno called with no buffer" ); yylineno = line_number; } @@ -2024,7 +2026,7 @@ void CodesIOKernel_set_column (int column_no , yyscan_t yyscanner) /* column is only valid if an input buffer exists. */ if (! YY_CURRENT_BUFFER ) - yy_fatal_error( "CodesIOKernel_set_column called with no buffer" , yyscanner); + YY_FATAL_ERROR( "CodesIOKernel_set_column called with no buffer" ); yycolumn = column_no; } diff --git a/src/iokernellang/codeslexer.h b/src/iokernellang/codeslexer.h index 034abc67..c3de58e9 100644 --- a/src/iokernellang/codeslexer.h +++ b/src/iokernellang/codeslexer.h @@ -13,7 +13,7 @@ #define FLEX_SCANNER #define YY_FLEX_MAJOR_VERSION 2 #define YY_FLEX_MINOR_VERSION 5 -#define YY_FLEX_SUBMINOR_VERSION 35 +#define YY_FLEX_SUBMINOR_VERSION 37 #if YY_FLEX_SUBMINOR_VERSION > 0 #define FLEX_BETA #endif @@ -58,7 +58,6 @@ typedef int flex_int32_t; typedef unsigned char flex_uint8_t; typedef unsigned short int flex_uint16_t; typedef unsigned int flex_uint32_t; -#endif /* ! C99 */ /* Limits of integral types. */ #ifndef INT8_MIN @@ -89,6 +88,8 @@ typedef unsigned int flex_uint32_t; #define UINT32_MAX (4294967295U) #endif +#endif /* ! C99 */ + #endif /* ! FLEXINT_H */ #ifdef __cplusplus @@ -161,7 +162,7 @@ struct yy_buffer_state /* Number of characters read into yy_ch_buf, not including EOB * characters. */ - int yy_n_chars; + yy_size_t yy_n_chars; /* Whether we "own" the buffer - i.e., we know we created it, * and can realloc() it to grow it, and should free() it to @@ -205,7 +206,7 @@ void CodesIOKernel_pop_buffer_state (yyscan_t yyscanner ); YY_BUFFER_STATE CodesIOKernel__scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner ); YY_BUFFER_STATE CodesIOKernel__scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); -YY_BUFFER_STATE CodesIOKernel__scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner ); +YY_BUFFER_STATE CodesIOKernel__scan_bytes (yyconst char *bytes,yy_size_t len ,yyscan_t yyscanner ); void *CodesIOKernel_alloc (yy_size_t ,yyscan_t yyscanner ); void *CodesIOKernel_realloc (void *,yy_size_t ,yyscan_t yyscanner ); @@ -213,7 +214,7 @@ void CodesIOKernel_free (void * ,yyscan_t yyscanner ); /* Begin user sect3 */ -#define CodesIOKernel_wrap(n) 1 +#define CodesIOKernel_wrap(yyscanner) 1 #define YY_SKIP_YYWRAP #define yytext_ptr yytext_r @@ -260,7 +261,7 @@ FILE *CodesIOKernel_get_out (yyscan_t yyscanner ); void CodesIOKernel_set_out (FILE * out_str ,yyscan_t yyscanner ); -int CodesIOKernel_get_leng (yyscan_t yyscanner ); +yy_size_t CodesIOKernel_get_leng (yyscan_t yyscanner ); char *CodesIOKernel_get_text (yyscan_t yyscanner ); @@ -268,6 +269,10 @@ int CodesIOKernel_get_lineno (yyscan_t yyscanner ); void CodesIOKernel_set_lineno (int line_number ,yyscan_t yyscanner ); +int CodesIOKernel_get_column (yyscan_t yyscanner ); + +void CodesIOKernel_set_column (int column_no ,yyscan_t yyscanner ); + YYSTYPE * CodesIOKernel_get_lval (yyscan_t yyscanner ); void CodesIOKernel_set_lval (YYSTYPE * yylval_param ,yyscan_t yyscanner ); @@ -340,6 +345,6 @@ extern int CodesIOKernel_lex \ #line 117 "../src/iokernellang/codeslexer.l" -#line 344 "../src/iokernellang/codeslexer.h" +#line 349 "../src/iokernellang/codeslexer.h" #undef CodesIOKernel_IN_HEADER #endif /* CodesIOKernel_HEADER_H */ diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index bfc368ab..018c4337 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -5,6 +5,8 @@ */ #include #include +#include +#include #include #include #include "codes/codes-workload.h" @@ -18,8 +20,11 @@ #include "codes/quickhash.h" #include "codes/codes-jobmap.h" #include "codes/congestion-controller-core.h" +#include "codes/surrogate/init.h" +#include "surrogate/app-iteration-predictor/common.h" /* turning on track lp will generate a lot of output messages */ +#define DBG_COMM 1 #define MN_LP_NM "modelnet_dragonfly_custom" #define CONTROL_MSG_SZ 64 #define TRACE -1 @@ -32,10 +37,12 @@ #define MAX_STATS 65536 #define COL_TAG 1235 #define BAR_TAG 1234 -#define PRINT_SYNTH_TRAFFIC 1 +#define PRINT_SYNTH_TRAFFIC 0 #define MAX_JOBS 64 +#define MAX_PERIODS_PER_APP 512 #define NEAR_ZERO .0001 //timestamp for use to be 'close to zero' but still allow progress, zero offset events are hard on the PDES engine #define OUTPUT_MARKS 0 +#define LP_DEBUG 0 static int msg_size_hash_compare( void *key, struct qhash_head *link); @@ -61,7 +68,6 @@ char workload_type[128]; char workload_name[128]; char workload_file[8192]; char offset_file[8192]; -static int wrkld_id; static int num_net_traces = 0; static int prioritize_collectives = 0; static int num_dumpi_traces = 0; @@ -84,10 +90,17 @@ static lp_io_handle io_handle; static unsigned int lp_io_use_suffix = 0; static int do_lp_io = 0; +/* Workload JSON file mapping structure */ +struct codes_workload_json_mapping { + char workload_type[MAX_NAME_LENGTH_WKLD]; + char json_path[8192]; +}; + /* variables for loading multiple applications */ char workloads_conf_file[8192]; char workloads_timer_file[8192]; char workloads_period_file[8192]; +char workload_json_files[8192]; char alloc_file[8192]; int num_traces_of_job[MAX_JOBS]; int is_job_synthetic[MAX_JOBS]; //0 if job is not synthetic 1 if job is @@ -96,9 +109,11 @@ float mean_interval_of_job[MAX_JOBS]; long job_timer1[MAX_JOBS]; long job_timer2[MAX_JOBS]; int period_count[MAX_JOBS]; -long period_time[MAX_JOBS][64]; -float period_interval[MAX_JOBS][64]; +double period_time[MAX_JOBS][MAX_PERIODS_PER_APP]; +float period_interval[MAX_JOBS][MAX_PERIODS_PER_APP]; char file_name_of_job[MAX_JOBS][8192]; +struct codes_workload_json_mapping workload_json_mappings[MAX_JOBS]; +int workload_json_mapping_count; tw_stime max_elapsed_time_per_job[MAX_JOBS] = {0}; @@ -134,6 +149,7 @@ static int syn_type = 0; FILE * workload_log = NULL; FILE * msg_size_log = NULL; +FILE * iteration_log = NULL; FILE * workload_agg_log = NULL; FILE * workload_meta_log = NULL; @@ -156,6 +172,10 @@ static double sampling_interval = 5000000; static double sampling_end_time = 3000000000; static int enable_debug = 0; +// Surrogate variables +struct app_iteration_predictor *iter_predictor = NULL; +static int nw_id_counter = 0; + /* set group context */ struct codes_mctx mapping_context; enum MAPPING_CONTEXTS @@ -185,7 +205,9 @@ enum MPI_NW_EVENTS CLI_BCKGND_GEN, CLI_BCKGND_CHANGE, CLI_NBR_FINISH, - CLI_OTHER_FINISH //received when another workload has finished + CLI_OTHER_FINISH, //received when another workload has finished + // Surrogate events + SURR_SKIP_ITERATION, // skips one (several) iteration(s) of simulation }; /* type of synthetic traffic */ @@ -216,7 +238,6 @@ struct mpi_msgs_queue int source_rank; int dest_rank; int64_t num_bytes; - int64_t seq_id; tw_stime req_init_time; dumpi_req_id req_id; struct qlist_head ql; @@ -226,8 +247,8 @@ struct mpi_msgs_queue struct completed_requests { unsigned int req_id; + int index; // for rollbacking struct qlist_head ql; - int index; }; /* for wait operations, store the pending operation and number of completed waits so far. */ @@ -238,7 +259,6 @@ struct pending_waits int num_completed; int count; tw_stime start_time; - struct qlist_head ql; }; struct msg_size_info @@ -273,27 +293,41 @@ typedef struct mpi_msgs_queue mpi_msgs_queue; typedef struct completed_requests completed_requests; typedef struct pending_waits pending_waits; -/* state of the network LP. It contains the pointers to send/receive lists */ +/* + * state of the network LP. It contains the pointers to send/receive lists + * + * nw-lp's can only run one job! Which all start at time 0 + * + * Three possible states for nw-lp: + * - run application (non-synthetic workload) + * - run background noise pattern (synthetic workload) + * - do nothing + **/ struct nw_state { - long num_events_per_lp; - tw_lpid nw_id; - short wrkld_end; - int app_id; - int local_rank; - int qos_level; +#if LP_DEBUG + size_t num_events_processed; +#endif /* if LP_DEBUG */ + + tw_lpid nw_id; // compute node id, as labeled by the network + tw_lpid nw_id_in_pe; // compute node id for this PE + int local_rank; // id local to the application or synthetic workload, this is the number that the application sees, their phony "MPI rank" + + // Parameters used for non-synthetic workloads + short wrkld_id; // workload machinery in charge, e.g, swm + int app_id; // application id, position on the queue for this app to run + int * known_completed_jobs; //array of whether this rank knows other jobs are completed. + struct rc_stack * processed_ops; + struct rc_stack * processed_wait_op; + struct rc_stack * matched_reqs; + struct pending_waits * wait_op; // Pending wait operation + // Parameters used for synthetic workload parameters int synthetic_pattern; int is_finished; int num_own_job_ranks_completed; //counted by the root rank 0 of a job - //array of whether this rank knows other jobs are completed. - int * known_completed_jobs; - - struct rc_stack * processed_ops; - struct rc_stack * processed_wait_op; - struct rc_stack * matched_reqs; -// struct rc_stack * indices; + int qos_level; /* count of sends, receives, collectives and delays */ unsigned long num_sends; @@ -335,9 +369,6 @@ struct nw_state struct qlist_head completed_reqs; tw_stime cur_interval_end; - - /* Pending wait operation */ - struct pending_waits * wait_op; /* Message size latency information */ struct qhash_table * msg_sz_table; @@ -371,7 +402,7 @@ struct nw_state struct nw_message { // forward message handler - int msg_type; + enum MPI_NW_EVENTS msg_type; int op_type; int num_rngs; model_net_event_return event_rc; @@ -383,7 +414,6 @@ struct nw_message int dest_rank; int64_t num_bytes; int num_matched; - int data_type; double sim_start_time; // for callbacks - time message was received double msg_send_time; @@ -394,23 +424,77 @@ struct nw_message int found_match; short wait_completed; short rend_send; + int resume_at_iter; } fwd; - struct - { - int saved_perm; - double saved_send_time; - double saved_send_time_sample; - double saved_recv_time; - double saved_recv_time_sample; - double saved_wait_time; - double saved_wait_time_sample; - double saved_delay; - double saved_delay_sample; - double saved_marker_time; - int64_t saved_num_bytes; - int saved_syn_length; - unsigned long saved_prev_switch; - double saved_prev_max_time; + + // A different struct for each type of MPI_NW_EVENTS (it can be used for the commit or the reverse handler) + union { + // For CLI_BCKGND_GEN + struct { + int saved_syn_length; + int saved_perm; // Used by PERMUTATION + unsigned long saved_prev_switch; // Used by PERMUTATION + unsigned long long saved_gen_data; + } gen; + + // For CLI_BCKGND_ARRIVE and MPI_SEND_ARRIVED_CB + struct { + double saved_prev_max_time; + double saved_send_time; + double saved_send_time_sample; + } arrive; + + // For CLI_BCKGND_CHANGE + struct { + double saved_send_time; + double saved_marker_time; + } change; + + // For MPI_OP_GET_NEXT there are also different types + struct { + double saved_elapsed_time; + union { + // CODES_WK_ALLREDUCE + struct { + double saved_send_time; + double saved_delay; + } all_reduce; + // CODES_WK_RECV and CODES_WK_IRECV + struct { + double saved_recv_time; + double saved_recv_time_sample; + } recv; + // CODES_WK_DELAY + struct { + double saved_delay; + double saved_delay_sample; + } delay; + // CODES_WK_END and CODES_WK_MARK + struct { + double saved_marker_time; + bool was_skipped; + } mark; + }; + } mpi_next; + + // For MPI_SEND_ARRIVED and MPI_REND_ARRIVED and MPI_SEND_POSTED + struct { + double saved_wait_time; + double saved_wait_time_sample; + double saved_recv_time; + double saved_recv_time_sample; + int64_t saved_num_bytes; + } mpi_send; + + // For MPI_REND_ACK_ARRIVED + struct { + int64_t saved_num_bytes; + } mpi_ack; + + // For SURR_SKIP_ITERATION + struct { + double saved_marker_time; + } surr_skip; } rc; }; @@ -666,7 +750,7 @@ void handle_other_finish( assert(ns->app_id == 0); //make sure that only the root workload is getting this notification assert(ns->local_rank == 0); //make sure that only the root rank is getting this notification - printf("App %d: Received finished workload notification",ns->app_id); + printf("App %d: Received finished workload notification\n", ns->app_id); // if(is_job_synthetic[ns->app_id]) // return; //nothing for synthetic (background) ranks to do here // printf(" And I am not synthetic\n"); @@ -795,6 +879,7 @@ void finish_bckgnd_traffic_rc( (void)lp; ns->is_finished = 0; + ns->elapsed_time = msg->rc.mpi_next.saved_elapsed_time; return; } void finish_bckgnd_traffic( @@ -806,6 +891,7 @@ void finish_bckgnd_traffic( (void)b; (void)msg; ns->is_finished = 1; + msg->rc.mpi_next.saved_elapsed_time = ns->elapsed_time; ns->elapsed_time = tw_now(lp) - ns->start_time; printf("\n LP %llu App %d completed sending data %llu completed at time %lf ", LLU(lp->gid),ns->app_id, ns->gen_data, tw_now(lp)); @@ -824,14 +910,13 @@ static void gen_synthetic_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp } if(bf->c2) { - s->prev_switch = m->rc.saved_prev_switch; - s->saved_perm_dest = m->rc.saved_perm; + s->prev_switch = m->rc.gen.saved_prev_switch; + s->saved_perm_dest = m->rc.gen.saved_perm; tw_rand_reverse_unif(lp->rng); } - int i; - for (i=0; i < m->rc.saved_syn_length; i++){ + s->gen_data = m->rc.gen.saved_gen_data; + for (int i=0; i < m->rc.gen.saved_syn_length; i++){ model_net_event_rc2(lp, &m->event_rc); - s->gen_data -= payload_sz; num_syn_bytes_sent -= payload_sz; s->num_bytes_sent -= payload_sz; s->ross_sample.num_bytes_sent -= payload_sz; @@ -842,8 +927,13 @@ static void gen_synthetic_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp if(bf->c5) finish_bckgnd_traffic_rc(s, bf, m, lp); - if(bf->c7) + if(bf->c7) { + s->saved_perm_dest = m->rc.gen.saved_perm; tw_rand_reverse_unif(lp->rng); + } + if (bf->c13) { + iter_predictor->model.predict_rc(lp, s->nw_id_in_pe); + } } /* generate synthetic traffic */ @@ -883,8 +973,8 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l case PERMUTATION: { - m->rc.saved_prev_switch = s->prev_switch; //for reverse computation - m->rc.saved_perm = s->saved_perm_dest; + m->rc.gen.saved_prev_switch = s->prev_switch; //for reverse computation + m->rc.gen.saved_perm = s->saved_perm_dest; length = 1; dest_svr = (int*) calloc(1, sizeof(int)); @@ -902,7 +992,6 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l { // printf("%d - %d >= %d\n",s->gen_data,s->prev_switch,perm_switch_thresh); bf->c2 = 1; - m->rc.saved_prev_switch = s->prev_switch; s->prev_switch = s->gen_data; //Amount of data pushed at time when switch initiated dest_svr[0] = tw_rand_integer(lp->rng, 0, num_clients - 1); if(dest_svr[0] == s->local_rank) @@ -971,7 +1060,7 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l tw_error(TW_LOC, "Undefined traffic pattern"); } /* Record length for reverse handler*/ - m->rc.saved_syn_length = length; + m->rc.gen.saved_syn_length = length; char prio[12]; switch(s->qos_level){ @@ -998,6 +1087,9 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l length = 0; } } + + m->rc.gen.saved_gen_data = s->gen_data; + if(length > 0) { // m->event_array_rc = (model_net_event_return) malloc(length * sizeof(model_net_event_return)); @@ -1032,10 +1124,19 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l /* New event after MEAN_INTERVAL */ tw_stime ts = mean_interval_of_job[s->app_id]; - tw_event * e; - nw_message * m_new; - e = tw_event_new(lp->gid, ts, lp); - m_new = (struct nw_message*)tw_event_data(e); + if (iter_predictor && iter_predictor->model.have_we_hit_switch(lp, s->nw_id_in_pe, 0)) { // background synthetic lps have no iterations + bf->c13 = 1; + struct iteration_pred iter_pred = iter_predictor->model.predict(lp, s->nw_id_in_pe); + double const restarting_background_at = iter_pred.restart_at; + // this check is necessary because we don't rely on iteration count for switch like applications do + if (restarting_background_at > tw_now(lp)) { + long const periods_to_jump = ceil((restarting_background_at - tw_now(lp)) / mean_interval_of_job[s->app_id]); + ts *= periods_to_jump; + s->gen_data += periods_to_jump * (length + payload_sz); + } + } + tw_event * e = tw_event_new(lp->gid, ts, lp); + nw_message * m_new = (struct nw_message*)tw_event_data(e); m_new->msg_type = CLI_BCKGND_GEN; tw_event_send(e); @@ -1062,23 +1163,24 @@ void arrive_syn_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp) num_syn_bytes_recvd -= data; s->num_bytes_recvd -= data; s->ross_sample.num_bytes_recvd -= data; - s->send_time = m->rc.saved_send_time; - s->ross_sample.send_time = m->rc.saved_send_time_sample; - if((tw_now(lp) - m->fwd.sim_start_time) > s->max_time) + s->send_time = m->rc.arrive.saved_send_time; + s->ross_sample.send_time = m->rc.arrive.saved_send_time_sample; + if(bf->c0) { - s->max_time = m->rc.saved_prev_max_time; - s->ross_sample.max_time = m->rc.saved_prev_max_time; + s->max_time = m->rc.arrive.saved_prev_max_time; + s->ross_sample.max_time = m->rc.arrive.saved_prev_max_time; } } void arrive_syn_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp) { (void)bf; (void)lp; - m->rc.saved_send_time = s->send_time; - m->rc.saved_send_time_sample = s->ross_sample.send_time; + m->rc.arrive.saved_send_time = s->send_time; + m->rc.arrive.saved_send_time_sample = s->ross_sample.send_time; if((tw_now(lp) - m->fwd.sim_start_time) > s->max_time) { - m->rc.saved_prev_max_time = s->max_time; + bf->c0 = 1; + m->rc.arrive.saved_prev_max_time = s->max_time; s->max_time = tw_now(lp) - m->fwd.sim_start_time; s->ross_sample.max_time = tw_now(lp) - m->fwd.sim_start_time; } @@ -1112,6 +1214,45 @@ void arrive_syn_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp) } } } + +// We never rollback all op messages properly. This is because we have not found any situation where we have to fully rollback a SURR_SKIP_ITERATION event. Any event that schedules a SURR_SKIP_ITERATION event will have been completed long before the SURR_SKIP_ITERATION event is processed. +static void skip_to_iteration_rc(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) {} + +static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) +{ + struct codes_workload_op mpi_op; + int resume_at_iter = m->fwd.resume_at_iter; + m->rc.surr_skip.saved_marker_time = tw_now(lp); + + // consuming all events until indicated iteration is reached + bool reached_end = false; + while (!reached_end) { + codes_workload_get_next(s->wrkld_id, s->app_id, s->local_rank, &mpi_op); + + switch (mpi_op.op_type) { + case CODES_WK_MARK: + if (mpi_op.u.send.tag == resume_at_iter) { + reached_end = true; + codes_workload_get_next_rc(s->wrkld_id, s->app_id, s->local_rank, &mpi_op); + } + break; + // If we reach the end of simulation, rollback once to allow the operation to be processed normally + case CODES_WK_END: + codes_workload_get_next_rc(s->wrkld_id, s->app_id, s->local_rank, &mpi_op); + reached_end = true; + break; + default: + break; + } + } + + tw_event *e = tw_event_new(lp->gid, 0.0, lp); + nw_message* msg = (nw_message*) tw_event_data(e); + msg->msg_type = MPI_OP_GET_NEXT; + msg->rc.mpi_next.mark.was_skipped = true; + tw_event_send(e); +} + /* Debugging functions, may generate unused function warning */ /*static void print_waiting_reqs(uint32_t * reqs, int count) { @@ -1154,39 +1295,24 @@ static int clear_completed_reqs(nw_state * s, (void)s; (void)lp; - int i, matched = 0; + int matched = 0; - for( i = 0; i < count; i++) - { - struct qlist_head * ent = NULL; - struct completed_requests * current = NULL; - struct completed_requests * prev = NULL; + struct qlist_head * ent, * _; + struct completed_requests * current = NULL; - int index = 0; - qlist_for_each(ent, &s->completed_reqs) - { - if(prev) - { - rc_stack_push(lp, prev, free, s->matched_reqs); - prev = NULL; - } - - current = qlist_entry(ent, completed_requests, ql); - current->index = index; - if(current->req_id == reqs[i]) - { + int index = 0; + qlist_for_each_safe(ent, _, &s->completed_reqs) { + current = qlist_entry(ent, completed_requests, ql); + for(int i = 0; i < count; i++) { + if(current->req_id == reqs[i]) { + current->index = index; ++matched; - qlist_del(¤t->ql); - prev = current; + qlist_del(ent); + rc_stack_push(lp, current, free, s->matched_reqs); + break; } - ++index; - } - - if(prev) - { - rc_stack_push(lp, prev, free, s->matched_reqs); - prev = NULL; - } + } + index++; } return matched; } @@ -1199,7 +1325,7 @@ static void add_completed_reqs(nw_state * s, { struct completed_requests * req = (struct completed_requests*)rc_stack_pop(s->matched_reqs); // turn on only if wait-all unmatched error arises in optimistic mode. - qlist_add(&req->ql, &s->completed_reqs); + qlist_add_at_index(&req->ql, &s->completed_reqs, req->index - count + i + 1); }//end for } @@ -1228,7 +1354,6 @@ static int notify_posted_wait(nw_state* s, if(op_type == CODES_WK_WAIT && (wait_elem->req_ids[0] == completed_req)) { - m->fwd.wait_completed = 1; wait_completed = 1; } else if(op_type == CODES_WK_WAITALL @@ -1241,6 +1366,7 @@ static int notify_posted_wait(nw_state* s, if(wait_elem->req_ids[i] == completed_req) { wait_elem->num_completed++; + m->fwd.wait_completed++; //This is just the individual request handle - not the entire wait. if(wait_elem->num_completed > wait_elem->count) printf("\n Num completed %d count %d LP %llu ", wait_elem->num_completed, @@ -1252,10 +1378,13 @@ static int notify_posted_wait(nw_state* s, if(wait_elem->num_completed >= wait_elem->count) { if(enable_debug) - fprintf(workload_log, "\n(%lf) APP ID %d MPI WAITALL COMPLETED AT %llu ", tw_now(lp), s->app_id, LLU(s->nw_id)); + { + // fprintf(workload_log, "\n(%lf) APP ID %d MPI WAITALL COMPLETED AT %llu ", tw_now(lp), s->app_id, LLU(s->nw_id)); + fprintf(workload_log, "\n (%lf) APP ID %d MPI WAITALL SOURCE %d COMPLETED", + tw_now(lp), s->app_id, s->local_rank); + } wait_completed = 1; } - m->fwd.wait_completed = 1; //This is just the individual request handle - not the entire wait. } } } @@ -1299,7 +1428,12 @@ static void codes_exec_mpi_wait(nw_state* s, tw_bf * bf, nw_message * m, tw_lp* { /* check in the completed receives queue if the request ID has already been completed.*/ -// printf("\n Wait posted rank id %d ", s->nw_id); + if(enable_debug) + { + fprintf(workload_log, "\n (%lf) APP ID %d MPI WAIT POSTED SOURCE %d", + tw_now(lp), s->app_id, s->local_rank); + } + assert(!s->wait_op); unsigned int req_id = mpi_op->u.wait.req_id; @@ -1383,7 +1517,11 @@ static void codes_exec_mpi_wait_all( struct codes_workload_op * mpi_op) { if(enable_debug) - fprintf(workload_log, "\n MPI WAITALL POSTED AT %llu ", LLU(s->nw_id)); + { + // fprintf(workload_log, "\n MPI WAITALL POSTED AT %llu ", LLU(s->nw_id)); + fprintf(workload_log, "\n (%lf) APP ID %d MPI WAITALL POSTED SOURCE %d", + tw_now(lp), s->app_id, s->local_rank); + } if(enable_sampling) { @@ -1488,6 +1626,7 @@ static int rm_matching_rcv(nw_state * ns, && ((qi->source_rank == qitem->source_rank) || qi->source_rank == -1)) { matched = 1; + m->rc.mpi_send.saved_num_bytes = qi->num_bytes; qi->num_bytes = qitem->num_bytes; break; } @@ -1511,8 +1650,8 @@ static int rm_matching_rcv(nw_state * ns, else { bf->c12 = 1; - m->rc.saved_recv_time = ns->recv_time; - m->rc.saved_recv_time_sample = ns->ross_sample.recv_time; + m->rc.mpi_send.saved_recv_time = ns->recv_time; + m->rc.mpi_send.saved_recv_time_sample = ns->ross_sample.recv_time; ns->recv_time += (tw_now(lp) - m->fwd.sim_start_time); ns->ross_sample.recv_time += (tw_now(lp) - m->fwd.sim_start_time); } @@ -1579,8 +1718,8 @@ static int rm_matching_send(nw_state * ns, send_ack_back(ns, bf, m, lp, qi, qitem->req_id); } - m->rc.saved_recv_time = ns->recv_time; - m->rc.saved_recv_time_sample = ns->ross_sample.recv_time; + m->rc.mpi_next.recv.saved_recv_time = ns->recv_time; + m->rc.mpi_next.recv.saved_recv_time_sample = ns->ross_sample.recv_time; ns->recv_time += (tw_now(lp) - qitem->req_init_time); ns->ross_sample.recv_time += (tw_now(lp) - qitem->req_init_time); @@ -1630,6 +1769,7 @@ static void codes_issue_next_event(tw_lp* lp) msg = (nw_message*)tw_event_data(e); msg->msg_type = MPI_OP_GET_NEXT; + msg->rc.mpi_next.mark.was_skipped = false; tw_event_send(e); } @@ -1642,8 +1782,8 @@ static void codes_exec_comp_delay( tw_stime ts; nw_message* msg; - m->rc.saved_delay = s->compute_time; - m->rc.saved_delay_sample = s->ross_sample.compute_time; + m->rc.mpi_next.delay.saved_delay = s->compute_time; + m->rc.mpi_next.delay.saved_delay_sample = s->ross_sample.compute_time; s->compute_time += (mpi_op->u.delay.nsecs/compute_time_speedup); s->ross_sample.compute_time += (mpi_op->u.delay.nsecs/compute_time_speedup); ts = (mpi_op->u.delay.nsecs/compute_time_speedup); @@ -1659,9 +1799,16 @@ static void codes_exec_comp_delay( //ts += g_tw_lookahead + 0.1 + tw_rand_exponential(lp->rng, noise); // assert(ts > 0); + if(enable_debug) + { + fprintf(workload_log, "\n (%lf) APP %d MPI DELAY SOURCE %d DURATION %lf", + tw_now(lp), s->app_id, s->local_rank, ts); + } + e = tw_event_new( lp->gid, ts , lp ); msg = (nw_message*)tw_event_data(e); msg->msg_type = MPI_OP_GET_NEXT; + msg->rc.mpi_next.mark.was_skipped = false; tw_event_send(e); } @@ -1673,8 +1820,8 @@ static void codes_exec_mpi_recv_rc( nw_message* m, tw_lp* lp) { - ns->recv_time = m->rc.saved_recv_time; - ns->ross_sample.recv_time = m->rc.saved_recv_time_sample; + ns->recv_time = m->rc.mpi_next.recv.saved_recv_time; + ns->ross_sample.recv_time = m->rc.mpi_next.recv.saved_recv_time_sample; if(bf->c11) codes_issue_next_event_rc(lp); @@ -1684,8 +1831,6 @@ static void codes_exec_mpi_recv_rc( if(m->fwd.found_match >= 0) { - ns->recv_time = m->rc.saved_recv_time; - ns->ross_sample.recv_time = m->rc.saved_recv_time_sample; //int queue_count = qlist_count(&ns->arrival_queue); mpi_msgs_queue * qi = (mpi_msgs_queue*)rc_stack_pop(ns->processed_ops); @@ -1735,9 +1880,8 @@ static void codes_exec_mpi_recv( If no matching isend is found, the receive operation is queued in the pending queue of receive operations. */ - m->rc.saved_recv_time = s->recv_time; - m->rc.saved_recv_time_sample = s->ross_sample.recv_time; - m->rc.saved_num_bytes = mpi_op->u.recv.num_bytes; + m->rc.mpi_next.recv.saved_recv_time = s->recv_time; + m->rc.mpi_next.recv.saved_recv_time_sample = s->ross_sample.recv_time; mpi_msgs_queue * recv_op = (mpi_msgs_queue*) malloc(sizeof(mpi_msgs_queue)); recv_op->req_init_time = tw_now(lp); @@ -1754,6 +1898,20 @@ static void codes_exec_mpi_recv( // printf("\n Receive op posted num bytes %llu source %d ", recv_op->num_bytes, // recv_op->source_rank); + if(enable_debug) + { + if(mpi_op->op_type == CODES_WK_RECV) + { + fprintf(workload_log, "\n (%lf) APP %d MPI RECV SOURCE %d DEST %d BYTES %"PRId64, + tw_now(lp), s->app_id, recv_op->source_rank, recv_op->dest_rank, recv_op->num_bytes); + } + else + { + fprintf(workload_log, "\n (%lf) APP ID %d MPI IRECV SOURCE %d DEST %d BYTES %"PRId64, + tw_now(lp), s->app_id, recv_op->source_rank, recv_op->dest_rank, recv_op->num_bytes); + } + } + int found_matching_sends = rm_matching_send(s, bf, m, lp, recv_op); /* for mpi irecvs, this is a non-blocking receive so just post it and move on with the trace read. */ @@ -1762,6 +1920,8 @@ static void codes_exec_mpi_recv( bf->c6 = 1; codes_issue_next_event(lp); } + + /* save the req id inserted in the completed queue for reverse computation. */ if(found_matching_sends < 0) { @@ -1791,7 +1951,7 @@ static void codes_exec_mpi_send_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_ int indx = s->sampling_indx; s->mpi_wkld_samples[indx].num_sends_sample--; - s->mpi_wkld_samples[indx].num_bytes_sample -= m->rc.saved_num_bytes; + s->mpi_wkld_samples[indx].num_bytes_sample -= m->rc.mpi_ack.saved_num_bytes; if(bf->c1) { @@ -1817,9 +1977,9 @@ static void codes_exec_mpi_send_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_ if(bf->c3) { - s->num_bytes_sent -= m->rc.saved_num_bytes; - s->ross_sample.num_bytes_sent -= m->rc.saved_num_bytes; - num_bytes_sent -= m->rc.saved_num_bytes; + s->num_bytes_sent -= m->rc.mpi_ack.saved_num_bytes; + s->ross_sample.num_bytes_sent -= m->rc.mpi_ack.saved_num_bytes; + num_bytes_sent -= m->rc.mpi_ack.saved_num_bytes; } } /* executes MPI send and isend operations */ @@ -1882,7 +2042,7 @@ static void codes_exec_mpi_send(nw_state* s, if(lp->gid == TRACK_LP) printf("\n Sender rank %llu global dest rank %d dest-rank %d bytes %"PRIu64" Tag %d", LLU(s->nw_id), global_dest_rank, mpi_op->u.send.dest_rank, mpi_op->u.send.num_bytes, mpi_op->u.send.tag); - m->rc.saved_num_bytes = mpi_op->u.send.num_bytes; + m->rc.mpi_ack.saved_num_bytes = mpi_op->u.send.num_bytes; /* model-net event */ tw_lpid dest_rank = codes_mapping_get_lpid_from_relative(global_dest_rank, NULL, "nw-lp", NULL, 0); @@ -1982,12 +2142,18 @@ static void codes_exec_mpi_send(nw_state* s, { if(mpi_op->op_type == CODES_WK_ISEND) { - fprintf(workload_log, "\n (%lf) APP %d MPI ISEND SOURCE %llu DEST %d TAG %d BYTES %"PRId64, - tw_now(lp), s->app_id, LLU(s->nw_id), global_dest_rank, mpi_op->u.send.tag, mpi_op->u.send.num_bytes); + // fprintf(workload_log, "\n (%lf) APP %d MPI ISEND SOURCE %llu DEST %d TAG %d BYTES %"PRId64, + // tw_now(lp), s->app_id, LLU(s->nw_id), global_dest_rank, mpi_op->u.send.tag, mpi_op->u.send.num_bytes); + fprintf(workload_log, "\n (%lf) APP %d MPI ISEND SOURCE %llu DEST %d TAG %d BYTES %"PRId64, + tw_now(lp), s->app_id, LLU(remote_m.fwd.src_rank), remote_m.fwd.dest_rank, mpi_op->u.send.tag, mpi_op->u.send.num_bytes); } else - fprintf(workload_log, "\n (%lf) APP ID %d MPI SEND SOURCE %llu DEST %d TAG %d BYTES %"PRId64, - tw_now(lp), s->app_id, LLU(s->nw_id), global_dest_rank, mpi_op->u.send.tag, mpi_op->u.send.num_bytes); + { + // fprintf(workload_log, "\n (%lf) APP ID %d MPI SEND SOURCE %llu DEST %d TAG %d BYTES %"PRId64, + // tw_now(lp), s->app_id, LLU(s->nw_id), global_dest_rank, mpi_op->u.send.tag, mpi_op->u.send.num_bytes); + fprintf(workload_log, "\n (%lf) APP ID %d MPI SEND SOURCE %llu DEST %d TAG %d BYTES %"PRId64, + tw_now(lp), s->app_id, LLU(remote_m.fwd.src_rank), remote_m.fwd.dest_rank, mpi_op->u.send.tag, mpi_op->u.send.num_bytes); + } } if(is_rend || is_eager) { @@ -2029,13 +2195,14 @@ static void update_completed_queue_rc(nw_state * s, tw_bf * bf, nw_message * m, { struct pending_waits* wait_elem = (struct pending_waits*)rc_stack_pop(s->processed_wait_op); s->wait_op = wait_elem; - s->wait_time = m->rc.saved_wait_time; - s->ross_sample.wait_time = m->rc.saved_wait_time_sample; + s->wait_time = m->rc.mpi_send.saved_wait_time; + s->ross_sample.wait_time = m->rc.mpi_send.saved_wait_time_sample; add_completed_reqs(s, lp, m->fwd.num_matched); codes_issue_next_event_rc(lp); } - if(m->fwd.wait_completed > 0) - s->wait_op->num_completed--; + if(m->fwd.wait_completed > 0) { + s->wait_op->num_completed -= m->fwd.wait_completed; + } } static void update_completed_queue(nw_state* s, @@ -2070,8 +2237,8 @@ static void update_completed_queue(nw_state* s, bf->c31 = 1; m->fwd.num_matched = clear_completed_reqs(s, lp, s->wait_op->req_ids, s->wait_op->count); - m->rc.saved_wait_time = s->wait_time; - m->rc.saved_wait_time_sample = s->ross_sample.wait_time; + m->rc.mpi_send.saved_wait_time = s->wait_time; + m->rc.mpi_send.saved_wait_time_sample = s->ross_sample.wait_time; s->wait_time += (tw_now(lp) - s->wait_op->start_time); s->ross_sample.wait_time += (tw_now(lp) - s->wait_op->start_time); @@ -2165,6 +2332,7 @@ static void update_arrival_queue_rc(nw_state* s, if(m->fwd.found_match >= 0) { mpi_msgs_queue * qi = (mpi_msgs_queue*)rc_stack_pop(s->processed_ops); + qi->num_bytes = m->rc.mpi_send.saved_num_bytes; // int queue_count = qlist_count(&s->pending_recvs_queue); if(m->fwd.found_match == 0) @@ -2187,8 +2355,8 @@ static void update_arrival_queue_rc(nw_state* s, } if(bf->c12) { - s->recv_time = m->rc.saved_recv_time; - s->ross_sample.recv_time = m->rc.saved_recv_time_sample; + s->recv_time = m->rc.mpi_send.saved_recv_time; + s->ross_sample.recv_time = m->rc.mpi_send.saved_recv_time_sample; } //if(bf->c10) @@ -2216,8 +2384,8 @@ static void update_arrival_queue(nw_state* s, tw_bf * bf, nw_message * m, tw_lp //if(s->local_rank != m->fwd.dest_rank) // printf("\n Dest rank %d local rank %d ", m->fwd.dest_rank, s->local_rank); - m->rc.saved_recv_time = s->recv_time; - m->rc.saved_recv_time_sample = s->ross_sample.recv_time; + m->rc.mpi_send.saved_recv_time = s->recv_time; + m->rc.mpi_send.saved_recv_time_sample = s->ross_sample.recv_time; s->num_bytes_recvd += m->fwd.num_bytes; s->ross_sample.num_bytes_recvd += m->fwd.num_bytes; num_bytes_recvd += m->fwd.num_bytes; @@ -2280,8 +2448,8 @@ static void update_message_time( (void)bf; (void)lp; - m->rc.saved_send_time = s->send_time; - m->rc.saved_send_time_sample = s->ross_sample.send_time; + m->rc.arrive.saved_send_time = s->send_time; + m->rc.arrive.saved_send_time_sample = s->ross_sample.send_time; s->send_time += m->fwd.msg_send_time; s->ross_sample.send_time += m->fwd.msg_send_time; } @@ -2294,8 +2462,8 @@ static void update_message_time_rc( { (void)bf; (void)lp; - s->send_time = m->rc.saved_send_time; - s->ross_sample.send_time = m->rc.saved_send_time_sample; + s->send_time = m->rc.arrive.saved_send_time; + s->ross_sample.send_time = m->rc.arrive.saved_send_time_sample; } /* initializes the network node LP, loads the trace file in the structs, calls the first MPI operation to be executed */ @@ -2305,6 +2473,7 @@ void nw_test_init(nw_state* s, tw_lp* lp) memset(s, 0, sizeof(*s)); s->nw_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); + s->nw_id_in_pe = nw_id_counter++; s->mpi_wkld_samples = (struct mpi_workload_sample*)calloc(MAX_STATS, sizeof(struct mpi_workload_sample)); s->sampling_indx = 0; s->is_finished = 0; @@ -2318,6 +2487,7 @@ void nw_test_init(nw_state* s, tw_lp* lp) s->qos_level = 0; //TODO: We need a more elegant solution for determining if qos is enabled or not. // This had been -1 but if qos is not configured (single job no workload conf file) // then this will error out + s->wrkld_id = -1; char type_name[512]; @@ -2327,6 +2497,8 @@ void nw_test_init(nw_state* s, tw_lp* lp) assert(num_net_traces <= num_mpi_lps); struct codes_jobmap_id lid; + online_comm_params oc_params; + dumpi_trace_params params_d; if(alloc_spec) { @@ -2356,14 +2528,10 @@ void nw_test_init(nw_state* s, tw_lp* lp) s->known_completed_jobs = calloc(num_jobs, sizeof(int)); if (strcmp(workload_type, "dumpi") == 0){ - dumpi_trace_params params_d; - strcpy(params_d.file_name, file_name_of_job[lid.job]); - params_d.num_net_traces = num_traces_of_job[lid.job]; - params_d.nprocs = nprocs; - params = (char*)¶ms_d; strcpy(params_d.file_name, file_name_of_job[lid.job]); params_d.num_net_traces = num_traces_of_job[lid.job]; - params = (char*)¶ms_d; + params_d.nprocs = nprocs; + params = (void*)¶ms_d; strcpy(type_name, "dumpi-trace-workload"); if(strlen(workloads_conf_file) > 0) @@ -2378,10 +2546,8 @@ void nw_test_init(nw_state* s, tw_lp* lp) strcpy(params_d.cortex_gen, cortex_gen); #endif } - else if(strcmp(workload_type, "online") == 0){ - - online_comm_params oc_params; - + else if(strcmp(workload_type, "swm-online") == 0){ + if(strlen(workload_name) > 0) { strcpy(oc_params.workload_name, workload_name); @@ -2417,8 +2583,36 @@ void nw_test_init(nw_state* s, tw_lp* lp) /*TODO: nprocs is different for dumpi and online workload. for * online, it is the number of ranks to be simulated. */ oc_params.nprocs = num_traces_of_job[lid.job]; - params = (char*)&oc_params; - strcpy(type_name, "online_comm_workload"); + params = (void*)&oc_params; + strcpy(type_name, "swm_online_comm_workload"); + } + //Xin: add conceputual online workload + else if(strcmp(workload_type, "conc-online") == 0){ + + if(strlen(workload_name) > 0) + { + strcpy(oc_params.workload_name, workload_name); + } + else if(strlen(workloads_conf_file) > 0) + { + strcpy(oc_params.workload_name, file_name_of_job[lid.job]); + } + + /* Look up custom JSON path for this workload */ + oc_params.file_path[0] = '\0'; + for(int i = 0; i < workload_json_mapping_count; i++) { + if(strcmp(workload_json_mappings[i].workload_type, oc_params.workload_name) == 0) { + strcpy(oc_params.file_path, workload_json_mappings[i].json_path); + break; + } + } + + /*TODO: nprocs is different for dumpi and online workload. for + * online, it is the number of ranks to be simulated. */ + // printf("conc-online num_traces_of_job %d\n", num_traces_of_job[lid.job]); + oc_params.nprocs = num_traces_of_job[lid.job]; + params = (void*)&oc_params; + strcpy(type_name, "conc_online_comm_workload"); } int rc = configuration_get_value_int(&config, "PARAMS", "num_qos_levels", NULL, &num_qos_levels); @@ -2444,12 +2638,10 @@ void nw_test_init(nw_state* s, tw_lp* lp) rc_stack_create(&s->processed_ops); rc_stack_create(&s->processed_wait_op); rc_stack_create(&s->matched_reqs); -// rc_stack_create(&s->indices); assert(s->processed_ops != NULL); assert(s->processed_wait_op != NULL); assert(s->matched_reqs != NULL); -// assert(s->indices != NULL); /* clock starts ticking when the first event is processed */ s->start_time = tw_now(lp); @@ -2458,11 +2650,10 @@ void nw_test_init(nw_state* s, tw_lp* lp) s->compute_time = 0; s->elapsed_time = 0; - s->app_id = lid.job; - s->local_rank = lid.rank; - + bool am_i_synthetic = false; if(strncmp(file_name_of_job[lid.job], "synthetic", 9) == 0) { + am_i_synthetic = true; sscanf(file_name_of_job[lid.job], "synthetic%d", &synthetic_pattern); if(synthetic_pattern <=0 || synthetic_pattern > 6) { @@ -2482,6 +2673,7 @@ void nw_test_init(nw_state* s, tw_lp* lp) e = tw_event_new(lp->gid, ts, lp); m_new = (nw_message*)tw_event_data(e); m_new->msg_type = CLI_BCKGND_GEN; + printf("\naddress difference = %ld\n", (&m_new->fwd.app_id - (int *)m_new)); tw_event_send(e); is_synthetic = 1; @@ -2493,8 +2685,7 @@ void nw_test_init(nw_state* s, tw_lp* lp) e2 = tw_event_new(lp->gid, ts2, lp); m_new2 = (nw_message*)tw_event_data(e2); m_new2->msg_type = CLI_BCKGND_CHANGE; - m_new2->fwd.msg_send_time = period_interval[lid.job][k]; - m_new2->rc.saved_send_time = mean_interval_of_job[s->app_id]; + m_new2->fwd.msg_send_time = period_interval[lid.job][k]; // Warning: this is overwriting a variable meant for message type MPI_SEND_ARRIVED_CB tw_event_send(e2); } } @@ -2502,7 +2693,7 @@ void nw_test_init(nw_state* s, tw_lp* lp) } else { - wrkld_id = codes_workload_load(type_name, params, s->app_id, s->local_rank); + s->wrkld_id = codes_workload_load(type_name, params, s->app_id, s->local_rank); codes_issue_next_event(lp); } if(enable_sampling && sampling_interval > 0) @@ -2515,20 +2706,46 @@ void nw_test_init(nw_state* s, tw_lp* lp) " num_sends num_bytes_sent sample_end_time"); } } + + if (iter_predictor) { + if (am_i_synthetic) { + struct app_iter_node_config conf = { + .app_id = s->app_id, + .type = NODE_TYPE_background_noise, + }; + iter_predictor->model.init(lp, s->nw_id_in_pe, &conf); + } else { + assert(s->wrkld_id != -1); + int const ending_iter = codes_workload_get_final_iteration(s->wrkld_id, s->app_id, s->local_rank); + if (ending_iter == -1) { + tw_warning(TW_LOC, "Predictor for non-synthetic job cannot be initialized. app id=%d", s->app_id); + } else { + struct app_iter_node_config conf = { + .app_id = s->app_id, + .type = NODE_TYPE_app, + .app_ending_iter = ending_iter, + }; + iter_predictor->model.init(lp, s->nw_id_in_pe, &conf); + } + } + } + return; } void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) { assert(s->app_id >= 0 && s->local_rank >= 0); +#if LP_DEBUG + s->num_events_processed++; +#endif /* if LP_DEBUG */ - //*(int *)bf = (int)0; + memset(bf, 0, sizeof(tw_bf)); rc_stack_gc(lp, s->matched_reqs); -// rc_stack_gc(lp, s->indices); rc_stack_gc(lp, s->processed_ops); rc_stack_gc(lp, s->processed_wait_op); - switch(m->msg_type) + switch((enum MPI_NW_EVENTS) m->msg_type) { case MPI_SEND_ARRIVED: update_arrival_queue(s, bf, m, lp); @@ -2574,8 +2791,8 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) codes_issue_next_event(lp); } - m->rc.saved_recv_time = s->recv_time; - m->rc.saved_recv_time_sample = s->ross_sample.recv_time; + m->rc.mpi_send.saved_recv_time = s->recv_time; + m->rc.mpi_send.saved_recv_time_sample = s->ross_sample.recv_time; s->recv_time += (tw_now(lp) - m->fwd.sim_start_time); s->ross_sample.recv_time += (tw_now(lp) - m->fwd.sim_start_time); @@ -2634,9 +2851,10 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) break; case CLI_BCKGND_CHANGE: - mean_interval_of_job[s->app_id] = m->fwd.msg_send_time; - printf("======== CHANGE [now: %lf] App:%d | Interval: %f\n", tw_now(lp), s->app_id, mean_interval_of_job[s->app_id]); - break; + m->rc.change.saved_send_time = mean_interval_of_job[s->app_id]; // Warning: this is overwriting a variable meant for message type MPI_OP_GET_NEXT (specifically CODES_WK_ALLREDUCE) and CLI_BCKGND_ARRIVE + mean_interval_of_job[s->app_id] = m->fwd.msg_send_time; + m->rc.change.saved_marker_time = tw_now(lp); + break; case CLI_BCKGND_ARRIVE: arrive_syn_tr(s, bf, m, lp); @@ -2653,16 +2871,21 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) case CLI_OTHER_FINISH: handle_other_finish(s, lp, bf, m); break; + + case SURR_SKIP_ITERATION: + skip_to_iteration(s, lp, bf, m); + break; } } static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) { - codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, m->mpi_op); + codes_workload_get_next_rc(s->wrkld_id, s->app_id, s->local_rank, m->mpi_op); if(m->op_type == CODES_WK_END) { s->is_finished = 0; + s->elapsed_time = m->rc.mpi_next.saved_elapsed_time; if(bf->c9) return; @@ -2701,8 +2924,8 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t { // if (bf->c28) // tw_rand_reverse_unif(lp->rng); - s->compute_time = m->rc.saved_delay; - s->ross_sample.compute_time = m->rc.saved_delay_sample; + s->compute_time = m->rc.mpi_next.delay.saved_delay; + s->ross_sample.compute_time = m->rc.mpi_next.delay.saved_delay_sample; } } break; @@ -2711,8 +2934,8 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t if(bf->c27) { s->num_all_reduce--; - s->col_time = m->rc.saved_send_time; - s->all_reduce_time = m->rc.saved_delay; + s->col_time = m->rc.mpi_next.all_reduce.saved_send_time; + s->all_reduce_time = m->rc.mpi_next.all_reduce.saved_delay; } else { @@ -2756,6 +2979,9 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t break; case CODES_WK_MARK: codes_issue_next_event_rc(lp); + if (bf->c13) { + iter_predictor->model.predict_rc(lp, s->nw_id_in_pe); + } break; default: @@ -2768,15 +2994,14 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l { //struct codes_workload_op * mpi_op = malloc(sizeof(struct codes_workload_op)); // printf("\n App id %d local rank %d ", s->app_id, s->local_rank); - // struct codes_workload_op mpi_op; - // codes_workload_get_next(wrkld_id, s->app_id, s->local_rank, &mpi_op); struct codes_workload_op * mpi_op = (struct codes_workload_op*)malloc(sizeof(struct codes_workload_op)); - codes_workload_get_next(wrkld_id, s->app_id, s->local_rank, mpi_op); + codes_workload_get_next(s->wrkld_id, s->app_id, s->local_rank, mpi_op); m->mpi_op = mpi_op; m->op_type = mpi_op->op_type; if(mpi_op->op_type == CODES_WK_END) { + m->rc.mpi_next.saved_elapsed_time = s->elapsed_time; s->elapsed_time = tw_now(lp) - s->start_time; s->is_finished = 1; @@ -2785,12 +3010,11 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l bf->c9 = 1; return; } - + /* Notify ranks from other job that checkpoint traffic has * completed */ - printf("\n Network node %d Rank %llu App %d finished at %lf ", s->local_rank, LLU(s->nw_id), s->app_id, tw_now(lp)); - int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); - + //int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); + m->rc.mpi_next.mark.saved_marker_time = tw_now(lp); notify_root_rank(s, lp, bf, m); // printf("Client rank %llu completed workload, local rank %d .\n", s->nw_id, s->local_rank); @@ -2858,9 +3082,9 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l if(s->col_time > 0) { bf->c27 = 1; - m->rc.saved_delay = s->all_reduce_time; + m->rc.mpi_next.all_reduce.saved_delay = s->all_reduce_time; s->all_reduce_time += tw_now(lp) - s->col_time; - m->rc.saved_send_time = s->col_time; + m->rc.mpi_next.all_reduce.saved_send_time = s->col_time; s->col_time = 0; s->num_all_reduce++; } @@ -2887,9 +3111,20 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l case CODES_WK_MARK: { - printf("\n MARK_%d node %llu job %d rank %d time %lf ", mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, tw_now(lp)); - m->rc.saved_marker_time = tw_now(lp); - codes_issue_next_event(lp); + m->rc.mpi_next.mark.saved_marker_time = tw_now(lp); + int iteration_i = mpi_op->u.send.tag; + + if (iter_predictor && iter_predictor->model.have_we_hit_switch(lp, s->nw_id_in_pe, iteration_i)) { + bf->c13 = 1; + struct iteration_pred iter_pred = iter_predictor->model.predict(lp, s->nw_id_in_pe); + tw_event *e = tw_event_new(lp->gid, iter_pred.restart_at - tw_now(lp), lp); + nw_message* msg = (nw_message*) tw_event_data(e); + msg->msg_type = SURR_SKIP_ITERATION; + msg->fwd.resume_at_iter = iter_pred.resume_at_iter; + tw_event_send(e); + } else { + codes_issue_next_event(lp); + } } break; @@ -2921,16 +3156,22 @@ void nw_test_finalize(nw_state* s, tw_lp* lp) return; if(strncmp(file_name_of_job[lid.job], "synthetic", 9) == 0) avg_msg_time = (s->send_time / s->num_recvs); - else if(strcmp(workload_type, "online") == 0) - codes_workload_finalize("online_comm_workload", params, s->app_id, s->local_rank); + else if(strcmp(workload_type, "swm-online") == 0) + codes_workload_finalize("swm_online_comm_workload", params, s->app_id, s->local_rank); + //Xin: for conceptual online workload + else if(strcmp(workload_type, "conc-online") == 0) + codes_workload_finalize("conc_online_comm_workload", params, s->app_id, s->local_rank); } else { if(s->nw_id >= (tw_lpid)num_net_traces) return; - if(strcmp(workload_type, "online") == 0) - codes_workload_finalize("online_comm_workload", params, s->app_id, s->local_rank); + if(strcmp(workload_type, "swm-online") == 0) + codes_workload_finalize("swm_online_comm_workload", params, s->app_id, s->local_rank); + //Xin: for conceptual online workload + if(strcmp(workload_type, "conc-online") == 0) + codes_workload_finalize("conc_online_comm_workload", params, s->app_id, s->local_rank); } struct msg_size_info * tmp_msg = NULL; @@ -3019,13 +3260,16 @@ void nw_test_finalize(nw_state* s, tw_lp* lp) //printf("\n LP %ld Time spent in communication %llu ", lp->gid, total_time - s->compute_time); rc_stack_destroy(s->matched_reqs); -// rc_stack_destroy(s->indices); rc_stack_destroy(s->processed_ops); rc_stack_destroy(s->processed_wait_op); } void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) { +#if LP_DEBUG + s->num_events_processed--; +#endif /* if LP_DEBUG */ + switch(m->msg_type) { case MPI_SEND_ARRIVED: @@ -3059,8 +3303,8 @@ void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * l if(bf->c8) update_completed_queue_rc(s, bf, m, lp); - s->recv_time = m->rc.saved_recv_time; - s->ross_sample.recv_time = m->rc.saved_recv_time_sample; + s->recv_time = m->rc.mpi_send.saved_recv_time; + s->ross_sample.recv_time = m->rc.mpi_send.saved_recv_time_sample; } break; @@ -3073,7 +3317,7 @@ void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * l break; case CLI_BCKGND_CHANGE: - mean_interval_of_job[s->app_id] = m->rc.saved_send_time; + mean_interval_of_job[s->app_id] = m->rc.change.saved_send_time; break; case CLI_BCKGND_ARRIVE: @@ -3091,6 +3335,10 @@ void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * l case CLI_OTHER_FINISH: handle_other_finish_rc(s, lp, bf, m); break; + + case SURR_SKIP_ITERATION: + skip_to_iteration_rc(s, lp, bf, m); + break; } } @@ -3099,25 +3347,504 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp switch(m->msg_type) { case MPI_OP_GET_NEXT: - if (m->mpi_op->op_type == CODES_WK_MARK) { - if (OUTPUT_MARKS) - { - int written1; - char marker_filename[128]; - written1 = sprintf(marker_filename, "mpi-replay-marker-tag-times"); - marker_filename[written1] = '\0'; - - char tag_line[32]; - int written; - written = sprintf(tag_line, "%d %d %.5f\n",s->nw_id, m->mpi_op->u.send.tag, m->rc.saved_marker_time); - lp_io_write(lp->gid, marker_filename, written, tag_line); - } + switch (m->mpi_op->op_type) { + case CODES_WK_END: + printf("Network node %d Rank %llu App %d finished at %lf \n", s->local_rank, LLU(s->nw_id), s->app_id, m->rc.mpi_next.mark.saved_marker_time); + if (iter_predictor) { + iter_predictor->model.ended(lp, s->nw_id_in_pe, m->rc.mpi_next.mark.saved_marker_time); + } + break; + + case CODES_WK_MARK: + if (! m->rc.mpi_next.mark.was_skipped) { + fprintf(iteration_log, "ITERATION %d node %llu job %d rank %d time %lf\n", m->mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, m->rc.mpi_next.mark.saved_marker_time); + if (iter_predictor) { + iter_predictor->model.feed(lp, s->nw_id_in_pe, m->mpi_op->u.send.tag, m->rc.mpi_next.mark.saved_marker_time); + } + } + + if (OUTPUT_MARKS) + { + int written1; + char marker_filename[128]; + written1 = sprintf(marker_filename, "mpi-replay-marker-tag-times"); + marker_filename[written1] = '\0'; + + char tag_line[32]; + int written; + written = sprintf(tag_line, "%llu %d %.5f\n",s->nw_id, m->mpi_op->u.send.tag, m->rc.mpi_next.mark.saved_marker_time); + lp_io_write(lp->gid, marker_filename, written, tag_line); + } + break; + + default: + break; } free(m->mpi_op); break; + case SURR_SKIP_ITERATION: + fprintf(iteration_log, "SKIPPED TO ITERATION %d node %llu job %d rank %d time %lf\n", m->fwd.resume_at_iter, LLU(s->nw_id), s->app_id, s->local_rank, m->rc.surr_skip.saved_marker_time); + break; + + case CLI_BCKGND_CHANGE: + printf("======== CHANGE [now: %lf] App|Job:%d | Period: %f\n", m->rc.change.saved_marker_time, s->app_id, m->fwd.msg_send_time); + break; + } +} + +static void make_qlist_cpy(struct qlist_head * into, struct qlist_head const * from, unsigned int sizeof_elem, unsigned int offset_ql) { + assert(sizeof_elem > offset_ql); + + int const num_elems = qlist_count(from); + INIT_QLIST_HEAD(into); + if (num_elems) { + char * pending_recvs = malloc(num_elems * sizeof_elem); + if (pending_recvs == NULL) { + tw_error(TW_LOC, "Malloc failed!"); + } + + char * new_entry = pending_recvs; + int i = 0; + struct qlist_head * ent; + qlist_for_each(ent, from) { + char * entry = ((char*)ent) - offset_ql; + + mempcpy(new_entry, entry, sizeof_elem); + struct qlist_head * new_entry_ql = (void*) (new_entry + offset_ql); + new_entry_ql->prev = (void*)(new_entry - sizeof_elem + offset_ql); + new_entry_ql->next = (void*)(new_entry + sizeof_elem + offset_ql); + i++; + new_entry += sizeof_elem; + } + assert(i == num_elems); + + struct qlist_head * first_ql = (void*)(pending_recvs + offset_ql); + struct qlist_head * last_ql = (void*)(pending_recvs + (num_elems - 1) * sizeof_elem + offset_ql); + into->next = first_ql; + into->prev = last_ql; + first_ql->prev = into; + last_ql->next = into; + } +} + +static void free_qlist_cpy(struct qlist_head * into, unsigned int offset_ql) { + if (! qlist_empty(into)) { + void * entry = (char *)(into->next) - offset_ql; + free(entry); + } +} + +bool compare_pending_waits(struct pending_waits const * before, struct pending_waits const * after) { + // if one is null and the other isn't, then they're not equal + if ((before == NULL) != (after == NULL)) { + return false; + } + // only check values if they are not nul + if (before == NULL) { + return true; + } + + bool is_same = true; + + is_same &= before->op_type == after->op_type; + is_same &= before->num_completed == after->num_completed; + is_same &= before->count == after->count; + is_same &= before->start_time == after->start_time; + + for (int i=0; icount; i++) { + is_same &= before->req_ids[i] == after->req_ids[i]; + } + + return is_same; +} + +static bool compare_mpi_msg_queues(mpi_msgs_queue * left, mpi_msgs_queue * right) { + bool is_same = true; + is_same &= left->op_type == right->op_type; + is_same &= left->tag == right->tag; + is_same &= left->source_rank == right->source_rank; + is_same &= left->dest_rank == right->dest_rank; + is_same &= left->num_bytes == right->num_bytes; + is_same &= left->req_init_time == right->req_init_time; + is_same &= left->req_id == right->req_id; + return is_same; +} + +static bool compare_completed_requests(completed_requests * left, completed_requests * right) { + bool is_same = true; + is_same &= left->req_id == right->req_id; + return is_same; +} + +static bool compare_msg_size_info(struct msg_size_info * left, struct msg_size_info * right) { + bool is_same = true; + is_same &= left->msg_size == right->msg_size; + is_same &= left->num_msgs == right->num_msgs; + is_same &= left->agg_latency == right->agg_latency; + is_same &= left->avg_latency == right->avg_latency; + is_same &= left->hash_link.next == right->hash_link.next; // This is not correct, we have to do deep copy this and chek that it is the same + is_same &= left->hash_link.prev == right->hash_link.prev; + return is_same; +} + +// Deep-copy of nw_state!! +// Functionality to check for correct implementation of reverse event handler +static void save_nw_lp_state(nw_state * into, nw_state const * from) { + memcpy(into, from, sizeof(nw_state)); + + make_qlist_cpy(&into->arrival_queue, &from->arrival_queue,sizeof(mpi_msgs_queue), QLIST_OFFSET(mpi_msgs_queue, ql)); + make_qlist_cpy(&into->pending_recvs_queue, &from->pending_recvs_queue, sizeof(mpi_msgs_queue), QLIST_OFFSET(mpi_msgs_queue, ql)); + make_qlist_cpy(&into->completed_reqs, &from->completed_reqs, sizeof(completed_requests), QLIST_OFFSET(completed_requests, ql)); + make_qlist_cpy(&into->msg_sz_list, &from->msg_sz_list, sizeof(struct msg_size_info), QLIST_OFFSET(struct msg_size_info, ql)); + // No need to copy msg_sz_table because all data is also in msg_sz_list + + int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); + into->known_completed_jobs = malloc(num_jobs * sizeof(int)); + memcpy(into->known_completed_jobs, from->known_completed_jobs, num_jobs * sizeof(int)); + if (from->wait_op != NULL) { + into->wait_op = malloc(sizeof(pending_waits)); + memcpy(into->wait_op, from->wait_op, sizeof(pending_waits)); + } + + // Don't forget to make deep copies of any new complex data types that nw_state points to +} + +static void print_mpi_msgs_queue(FILE * out, char const * prefix, struct qlist_head * head) { + mpi_msgs_queue * current = NULL; + qlist_for_each_entry(current, head, ql) { + fprintf(out, "%sMsg: OpType: %d Tag %d Source %d Dest %d bytes %"PRId64" req_init_time %g req_id %u\n", prefix, current->op_type, current->tag, current->source_rank, current->dest_rank, current->num_bytes, current->req_init_time, current->req_id); + } +} + +// Cleaning up deep-copy +static void clean_nw_lp_state(nw_state * into) { + free_qlist_cpy(&into->arrival_queue, QLIST_OFFSET(mpi_msgs_queue, ql)); + free_qlist_cpy(&into->pending_recvs_queue, QLIST_OFFSET(mpi_msgs_queue, ql)); + free_qlist_cpy(&into->completed_reqs, QLIST_OFFSET(completed_requests, ql)); + free_qlist_cpy(&into->msg_sz_list, QLIST_OFFSET(struct msg_size_info, ql)); + free(into->known_completed_jobs); + if (into->wait_op != NULL) { + free(into->wait_op); + } +} + +// Checking that deep-copy is the same as original!! +// Originally filled with a prompt on Claude +static bool check_nw_lp_state(nw_state * before, nw_state const * after) { + bool is_same = true; + + // Basic fields + is_same &= (before->nw_id == after->nw_id); + is_same &= (before->wrkld_id == after->wrkld_id); + is_same &= (before->app_id == after->app_id); + is_same &= (before->local_rank == after->local_rank); + is_same &= (before->qos_level == after->qos_level); + + // Pattern and completion flags + is_same &= (before->synthetic_pattern == after->synthetic_pattern); + is_same &= (before->is_finished == after->is_finished); + is_same &= (before->num_own_job_ranks_completed == after->num_own_job_ranks_completed); + + // Operation counts + is_same &= (before->num_sends == after->num_sends); + is_same &= (before->num_recvs == after->num_recvs); + is_same &= (before->num_cols == after->num_cols); + is_same &= (before->num_delays == after->num_delays); + is_same &= (before->num_wait == after->num_wait); + is_same &= (before->num_waitall == after->num_waitall); + is_same &= (before->num_waitsome == after->num_waitsome); + + // Timing information + is_same &= (before->start_time == after->start_time); + is_same &= (before->col_time == after->col_time); + is_same &= (before->reduce_time == after->reduce_time); + is_same &= (before->num_reduce == after->num_reduce); + is_same &= (before->all_reduce_time == after->all_reduce_time); + is_same &= (before->num_all_reduce == after->num_all_reduce); + is_same &= (before->elapsed_time == after->elapsed_time); + is_same &= (before->compute_time == after->compute_time); + is_same &= (before->send_time == after->send_time); + is_same &= (before->max_time == after->max_time); + is_same &= (before->recv_time == after->recv_time); + is_same &= (before->wait_time == after->wait_time); + + // Interval and current state + is_same &= (before->cur_interval_end == after->cur_interval_end); + + // Data statistics + is_same &= (before->num_bytes_sent == after->num_bytes_sent); + is_same &= (before->num_bytes_recvd == after->num_bytes_recvd); + is_same &= (before->syn_data == after->syn_data); + is_same &= (before->gen_data == after->gen_data); + + // Switch and routing information + is_same &= (before->prev_switch == after->prev_switch); + is_same &= (before->saved_perm_dest == after->saved_perm_dest); + is_same &= (before->rc_perm == after->rc_perm); + + // Sampling information + is_same &= (before->sampling_indx == after->sampling_indx); + //is_same &= (before->max_arr_size == after->max_arr_size); + + // Compare string buffers + is_same &= (strcmp(before->output_buf, after->output_buf) == 0); + is_same &= (strcmp(before->col_stats, after->col_stats) == 0); + + // Complex elements + is_same &= are_qlist_equal(&before->arrival_queue, &after->arrival_queue, QLIST_OFFSET(mpi_msgs_queue, ql), (bool (*) (void *, void *)) compare_mpi_msg_queues); + is_same &= are_qlist_equal(&before->pending_recvs_queue, &after->pending_recvs_queue, QLIST_OFFSET(mpi_msgs_queue, ql), (bool (*) (void *, void *)) compare_mpi_msg_queues); + is_same &= are_qlist_equal(&before->completed_reqs, &after->completed_reqs, QLIST_OFFSET(completed_requests, ql), (bool (*) (void *, void *)) compare_completed_requests); + is_same &= are_qlist_equal(&before->msg_sz_list, &after->msg_sz_list, QLIST_OFFSET(struct msg_size_info, ql), (bool (*) (void *, void *)) compare_msg_size_info); + + is_same &= !memcmp(&before->ross_sample, &after->ross_sample, sizeof(struct ross_model_sample)); + + int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); + is_same &= !memcmp(before->known_completed_jobs, after->known_completed_jobs, num_jobs * sizeof(int)); + is_same &= compare_pending_waits(before->wait_op, after->wait_op); + + // Skipped pointer comparisons (used in reverse computation): + // - processed_ops + // - processed_wait_op + // - matched_reqs + // - msg_sz_table + // Pointers used in some data collection (IO) or outside of PDES loop + // - mpi_wkld_samples + + // There is no need to implement msg_sz_table as all values are already + // accounted for in msg_sz_list. We can safely ignore all values in msg_sz_list + + return is_same; +} + +// Originally implemneted with a prompt on Claude.ai (tedious code, easy to check and produce) +static void print_nw_lp_state(FILE * out, char const * prefix, nw_state * state) { + int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); + + fprintf(out, "%snw-lp state ->\n", prefix); +#if LP_DEBUG + fprintf(out, "%s | num_events_processed = %zu\n", prefix, state->num_events_processed); +#endif /* if LP_DE%sBUG */ + fprintf(out, "%s | nw_id = %lu\n", prefix, state->nw_id); + fprintf(out, "%s | wrkld_end = %d\n", prefix, state->wrkld_id); + fprintf(out, "%s | app_id = %d\n", prefix, state->app_id); + fprintf(out, "%s | local_rank = %d\n", prefix, state->local_rank); + fprintf(out, "%s | qos_level = %d\n", prefix, state->qos_level); + fprintf(out, "%s | synthetic_pattern = %d\n", prefix, state->synthetic_pattern); + fprintf(out, "%s | is_finished = %d\n", prefix, state->is_finished); + fprintf(out, "%s |num_own_job_ranks_completed = %d\n", prefix, state->num_own_job_ranks_completed); + fprintf(out, "%s | known_completed_jobs[%d] = [", prefix, num_jobs); + for(int i=0; iknown_completed_jobs[i], i+1==num_jobs ? "" : ", "); + } + fprintf(out, "]\n"); + fprintf(out, "%s | *processed_ops = %p\n", prefix, state->processed_ops); + fprintf(out, "%s | *processed_wait_op = %p\n", prefix, state->processed_wait_op); + fprintf(out, "%s | *matched_reqs = %p\n", prefix, state->matched_reqs); + + // Operation counts + fprintf(out, "%s | num_sends = %lu\n", prefix, state->num_sends); + fprintf(out, "%s | num_recvs = %lu\n", prefix, state->num_recvs); + fprintf(out, "%s | num_cols = %lu\n", prefix, state->num_cols); + fprintf(out, "%s | num_delays = %lu\n", prefix, state->num_delays); + fprintf(out, "%s | num_wait = %lu\n", prefix, state->num_wait); + fprintf(out, "%s | num_waitall = %lu\n", prefix, state->num_waitall); + fprintf(out, "%s | num_waitsome = %lu\n", prefix, state->num_waitsome); + + // Timing information + fprintf(out, "%s | start_time = %g\n", prefix, state->start_time); + fprintf(out, "%s | col_time = %g\n", prefix, state->col_time); + fprintf(out, "%s | reduce_time = %g\n", prefix, state->reduce_time); + fprintf(out, "%s | num_reduce = %d\n", prefix, state->num_reduce); + fprintf(out, "%s | all_reduce_time = %g\n", prefix, state->all_reduce_time); + fprintf(out, "%s | num_all_reduce = %d\n", prefix, state->num_all_reduce); + fprintf(out, "%s | elapsed_time = %g\n", prefix, state->elapsed_time); + fprintf(out, "%s | compute_time = %g\n", prefix, state->compute_time); + fprintf(out, "%s | send_time = %g\n", prefix, state->send_time); + fprintf(out, "%s | max_time = %g\n", prefix, state->max_time); + fprintf(out, "%s | recv_time = %g\n", prefix, state->recv_time); + fprintf(out, "%s | wait_time = %g\n", prefix, state->wait_time); + + // Queue heads + char addprefix[] = " | | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1; + char subprefix[len_subprefix]; + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix); + + fprintf(out, "%s | arrival_queue[%d] = [\n", prefix, qlist_count(&state->arrival_queue)); + print_mpi_msgs_queue(out, subprefix, &state->arrival_queue); + fprintf(out, "%s | ]\n", prefix); + fprintf(out, "%s | pending_recvs_queue[%d] = [\n", prefix, qlist_count(&state->pending_recvs_queue)); + print_mpi_msgs_queue(out, subprefix, &state->pending_recvs_queue); + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | completed_reqs[%d] = [\n", prefix, qlist_count(&state->completed_reqs)); + completed_requests * current = NULL; + qlist_for_each_entry(current, &state->completed_reqs, ql) { + fprintf(out, "%s | | Req: req_id: %u\n", prefix, current->req_id); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | cur_interval_end = %g\n", prefix, state->cur_interval_end); + fprintf(out, "%s | *wait_op = %p\n", prefix, state->wait_op); + if (state->wait_op != NULL) { + fprintf(out, "%s | |.op_type = %d\n", prefix, state->wait_op->op_type); + fprintf(out, "%s | |.req_ids = [", prefix); + for(int i = 0; i < state->wait_op->count; i++) { + fprintf(out, "%d%s", state->wait_op->req_ids[i], i+1==state->wait_op->count ? "" : ", "); + } + fprintf(out, "]\n"); + fprintf(out, "%s | |.num_completed = %d\n", prefix, state->wait_op->num_completed); + fprintf(out, "%s | |.count = %d\n", prefix, state->wait_op->count); + fprintf(out, "%s | |.start_time = %g\n", prefix, state->wait_op->start_time); + } + fprintf(out, "%s | msg_sz_list[%d] = [\n", prefix, qlist_count(&state->completed_reqs)); + struct msg_size_info * ms_info = NULL; + qlist_for_each_entry(ms_info, &state->msg_sz_list, ql) { + fprintf(out, "%s | | MsSizeInfo: msg_size: %lu num_msgs: %d agg_latency: %g avg_latency: %g hash_link.next: %p hash_link.prev: %p\n", prefix, ms_info->msg_size, ms_info->num_msgs, ms_info->agg_latency, ms_info->avg_latency, ms_info->hash_link.next, ms_info->hash_link.prev); + } + fprintf(out, "%s | ]\n", prefix); + + // Data statistics + fprintf(out, "%s | num_bytes_sent = %llu\n", prefix, state->num_bytes_sent); + fprintf(out, "%s | num_bytes_recvd = %llu\n", prefix, state->num_bytes_recvd); + fprintf(out, "%s | syn_data = %llu\n", prefix, state->syn_data); + fprintf(out, "%s | gen_data = %llu\n", prefix, state->gen_data); + + fprintf(out, "%s | prev_switch = %lu\n", prefix, state->prev_switch); + fprintf(out, "%s | saved_perm_dest = %d\n", prefix, state->saved_perm_dest); + fprintf(out, "%s | rc_perm = %lu\n", prefix, state->rc_perm); + + // Sampling information + fprintf(out, "%s | sampling_indx = %d\n", prefix, state->sampling_indx); + fprintf(out, "%s | max_arr_size = %d\n", prefix, state->max_arr_size); + fprintf(out, "%s |* mpi_wkld_samples = %p\n", prefix, state->mpi_wkld_samples); + fprintf(out, "%s | output_buf = %.512s...\n", prefix, state->output_buf); + fprintf(out, "%s | col_stats = %.64s...\n", prefix, state->col_stats); + + fprintf(out, "%s |ross_sample.\n", prefix); + fprintf(out, "%s | | nw_id = %lu\n", prefix, state->ross_sample.nw_id); + fprintf(out, "%s | | app_id = %d\n", prefix, state->ross_sample.app_id); + fprintf(out, "%s | | local_rank = %d\n", prefix, state->ross_sample.local_rank); + fprintf(out, "%s | | num_sends = %lu\n", prefix, state->ross_sample.num_sends); + fprintf(out, "%s | | num_recvs = %lu\n", prefix, state->ross_sample.num_recvs); + fprintf(out, "%s | | num_bytes_sent = %llu\n", prefix, state->ross_sample.num_bytes_sent); + fprintf(out, "%s | | num_bytes_recvd = %llu\n", prefix, state->ross_sample.num_bytes_recvd); + fprintf(out, "%s | | send_time = %g\n", prefix, state->ross_sample.send_time); + fprintf(out, "%s | | recv_time = %g\n", prefix, state->ross_sample.recv_time); + fprintf(out, "%s | | wait_time = %g\n", prefix, state->ross_sample.wait_time); + fprintf(out, "%s | | compute_time = %g\n", prefix, state->ross_sample.compute_time); + fprintf(out, "%s | | comm_time = %g\n", prefix, state->ross_sample.comm_time); + fprintf(out, "%s | | max_time = %g\n", prefix, state->ross_sample.max_time); + fprintf(out, "%s | | avg_msg_time = %g\n", prefix, state->ross_sample.avg_msg_time); +} + +static char const * const MPI_NW_EVENTS_to_string(enum MPI_NW_EVENTS event_type) { + + switch (event_type) { + case MPI_OP_GET_NEXT: return "MPI_OP_GET_NEXT"; + case MPI_SEND_ARRIVED: return "MPI_SEND_ARRIVED"; + case MPI_SEND_ARRIVED_CB: return "MPI_SEND_ARRIVED_CB"; + case MPI_SEND_POSTED: return "MPI_SEND_POSTED"; + case MPI_REND_ARRIVED: return "MPI_REND_ARRIVED"; + case MPI_REND_ACK_ARRIVED: return "MPI_REND_ACK_ARRIVED"; + case CLI_BCKGND_FIN: return "CLI_BCKGND_FIN"; + case CLI_BCKGND_ARRIVE: return "CLI_BCKGND_ARRIVE"; + case CLI_BCKGND_GEN: return "CLI_BCKGND_GEN"; + case CLI_BCKGND_CHANGE: return "CLI_BCKGND_CHANGE"; + case CLI_NBR_FINISH: return "CLI_NBR_FINISH"; + case CLI_OTHER_FINISH: return "CLI_OTHER_FINISH"; + case SURR_SKIP_ITERATION: return "SURR_SKIP_ITERATION"; + default: return "UNKNOWN!!"; + } + +} + +// Original printing function from Claude.ai +static void print_nw_message(FILE * out, char const * prefix, nw_state* s, struct nw_message * msg) { + fprintf(out, "%snw_message ->\n", prefix); + fprintf(out, "%s | msg_type = %s\n", prefix, MPI_NW_EVENTS_to_string(msg->msg_type)); + fprintf(out, "%s | op_type = %s\n", prefix, op_type_string(msg->op_type)); + fprintf(out, "%s | num_rngs = %d\n", prefix, msg->num_rngs); + fprintf(out, "%s | event_rc = %d\n", prefix, msg->event_rc); + fprintf(out, "%s | mpi_op = %p\n", prefix, msg->mpi_op); + + char addprefix[] = " | | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1; + char subprefix[len_subprefix]; + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix); + fprint_codes_workload_op(out, subprefix, msg->mpi_op); + + fprintf(out, "%s | fwd\n", prefix); + fprintf(out, "%s | | src_rank = %lu\n", prefix, msg->fwd.src_rank); + fprintf(out, "%s | | dest_rank = %d\n", prefix, msg->fwd.dest_rank); + fprintf(out, "%s | | num_bytes = %ld\n", prefix, msg->fwd.num_bytes); + fprintf(out, "%s | | num_matched = %d\n", prefix, msg->fwd.num_matched); + fprintf(out, "%s | | sim_start_time = %g\n", prefix, msg->fwd.sim_start_time); + fprintf(out, "%s | | msg_send_time = %g\n", prefix, msg->fwd.msg_send_time); + fprintf(out, "%s | | req_id = %u\n", prefix, msg->fwd.req_id); + fprintf(out, "%s | | matched_req = %d\n", prefix, msg->fwd.matched_req); + fprintf(out, "%s | | tag = %d\n", prefix, msg->fwd.tag); + fprintf(out, "%s | | app_id = %d\n", prefix, msg->fwd.app_id); + fprintf(out, "%s | | found_match = %d\n", prefix, msg->fwd.found_match); + fprintf(out, "%s | | wait_completed = %d\n", prefix, msg->fwd.wait_completed); + fprintf(out, "%s | | rend_send = %d\n", prefix, msg->fwd.rend_send); + + fprintf(out, "%s | rc\n", prefix); + switch(msg->msg_type) { + case CLI_BCKGND_GEN: + fprintf(out, "%s | | gen\n", prefix); + fprintf(out, "%s | | saved_syn_length = %d\n", prefix, msg->rc.gen.saved_syn_length); + fprintf(out, "%s | | saved_perm = %d\n", prefix, msg->rc.gen.saved_perm); + fprintf(out, "%s | | saved_prev_switch = %lu\n", prefix, msg->rc.gen.saved_prev_switch); + break; + + case CLI_BCKGND_ARRIVE: + case MPI_SEND_ARRIVED_CB: + fprintf(out, "%s | |arrive.saved_prev_max_time = %g\n", prefix, msg->rc.arrive.saved_prev_max_time); + fprintf(out, "%s | | arrive.saved_send_time = %g\n", prefix, msg->rc.arrive.saved_send_time); + fprintf(out, "%s | |arrive.saved_send_time_sample = %g\n", prefix, msg->rc.arrive.saved_send_time_sample); + break; + + case CLI_BCKGND_CHANGE: + fprintf(out, "%s | | change.saved_send_time = %g\n", prefix, msg->rc.change.saved_send_time); + fprintf(out, "%s | | change.saved_marker_time = %g\n", prefix, msg->rc.change.saved_marker_time); + break; + + case MPI_OP_GET_NEXT: + fprintf(out, "%s | mpi_next\n", prefix); + fprintf(out, "%s | | saved_elapsed_time = %g\n", prefix, msg->rc.mpi_next.saved_elapsed_time); + fprintf(out, "%s | | all_reduce.saved_send_time = %g\n", prefix, msg->rc.mpi_next.all_reduce.saved_send_time); + fprintf(out, "%s | | all_reduce.saved_delay = %g\n", prefix, msg->rc.mpi_next.all_reduce.saved_delay); + + fprintf(out, "%s | | recv.saved_recv_time = %g\n", prefix, msg->rc.mpi_next.recv.saved_recv_time); + fprintf(out, "%s | | recv.saved_recv_time_sample = %g\n", prefix, msg->rc.mpi_next.recv.saved_recv_time_sample); + + fprintf(out, "%s | | delay.saved_delay = %g\n", prefix, msg->rc.mpi_next.delay.saved_delay); + fprintf(out, "%s | | delay.saved_delay_sample = %g\n", prefix, msg->rc.mpi_next.delay.saved_delay_sample); + + fprintf(out, "%s | | mark.saved_marker_time = %g\n", prefix, msg->rc.mpi_next.mark.saved_marker_time); + break; + + case MPI_SEND_ARRIVED: + case MPI_REND_ARRIVED: + case MPI_SEND_POSTED: + fprintf(out, "%s | | mpi_send\n", prefix); + fprintf(out, "%s | | saved_wait_time = %g\n", prefix, msg->rc.mpi_send.saved_wait_time); + fprintf(out, "%s | | saved_wait_time_sample = %g\n", prefix, msg->rc.mpi_send.saved_wait_time_sample); + fprintf(out, "%s | | saved_recv_time = %g\n", prefix, msg->rc.mpi_send.saved_recv_time); + fprintf(out, "%s | | saved_recv_time_sample = %g\n", prefix, msg->rc.mpi_send.saved_recv_time_sample); + fprintf(out, "%s | | saved_num_bytes = %lu\n", prefix, msg->rc.mpi_send.saved_num_bytes); + break; + + case MPI_REND_ACK_ARRIVED: + fprintf(out, "%s | | mpi_ack.saved_num_bytes = %ld\n", prefix, msg->rc.mpi_ack.saved_num_bytes); + break; + + default: + break; } } @@ -3129,6 +3856,7 @@ const tw_optdef app_opt [] = TWOPT_CHAR("workload_file", workload_file, "workload file name"), TWOPT_CHAR("alloc_file", alloc_file, "allocation file name"), TWOPT_CHAR("workload_conf_file", workloads_conf_file, "workload config file name"), + TWOPT_CHAR("workload_json_files", workload_json_files, "workload json files mapping file name"), TWOPT_CHAR("link_failure_file", g_nm_link_failure_filepath, "filepath for override of link failure file from configuration for supporting models"), TWOPT_CHAR("workload_timer_file", workloads_timer_file, "workload timer file name (for starting/pausing/stopping synthetic traffic)"), TWOPT_CHAR("workload_period_file", workloads_period_file, "workload periods file name (for changing the per-job synthetic traffic load at specified periods/times)"), @@ -3175,9 +3903,23 @@ const tw_lptype* nw_get_lp_type() return(&nw_lp); } +// ROSS function pointer table to check reverse event handler +crv_checkpointer nw_lp_chkptr = { + &nw_lp, + 0, + (save_checkpoint_state_f) save_nw_lp_state, + (clean_checkpoint_state_f) clean_nw_lp_state, + (check_states_f) check_nw_lp_state, + (print_lpstate_f) print_nw_lp_state, + (print_checkpoint_state_f) print_nw_lp_state, + (print_event_f) print_nw_message, +}; + static void nw_add_lp_type() { lp_type_register("nw-lp", nw_get_lp_type()); + // registering custom print for nw_lp LPs + crv_add_custom_state_checkpoint(&nw_lp_chkptr); } /* setup for the ROSS event tracing @@ -3278,6 +4020,23 @@ void modelnet_mpi_replay_read_config() } +void modelnet_mpi_replay_configure_app_surrogate() +{ + char app_surrogate_test[MAX_NAME_LENGTH]; + app_surrogate_test[0] = '\0'; + int app_surrogate_len = configuration_get_value(&config, "APPLICATION_SURROGATE", "enable", NULL, app_surrogate_test, MAX_NAME_LENGTH); + + // Only configure if APPLICATION_SURROGATE is present and enabled + if (app_surrogate_len == 0 || atoi(app_surrogate_test) == 0) { + return; + } + + tw_lpid const num_nw_lps_in_pe = codes_mapping_count_lps_of_type("nw-lp"); + int const num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); + application_surrogate_configure(num_nw_lps_in_pe, num_jobs, &iter_predictor); +} + + int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) { int rank; @@ -3294,17 +4053,18 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) tw_opt_add(app_opt); tw_opt_add(cc_app_opt); tw_init(argc, argv); + #ifdef USE_RDAMARIS if(g_st_ross_rank) { // keep damaris ranks from running code between here up until tw_end() #endif codes_comm_update(); - - if(strcmp(workload_type, "dumpi") != 0 && strcmp(workload_type, "online") != 0) + //Xin: add conceptual online workload + if(strcmp(workload_type, "dumpi") != 0 && strcmp(workload_type, "swm-online") != 0 && strcmp(workload_type, "conc-online") != 0) { if(tw_ismaster()) printf("Usage: mpirun -np n ./modelnet-mpi-replay --sync=1/3" - " --workload_type=dumpi/online" + " --workload_type=dumpi/swm-online/conc-online" " --workload_conf_file=prefix-workload-file-name" " --workload_timer_file=timer-file" " --workload_period_file=period-file" @@ -3321,6 +4081,14 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) return -1; } + bool is_conc_enabled = false; + + /* Xin: Currently rendezvous protocol cannot work with Conceptual online workloads */ + if(strcmp(workload_type, "conc-online") == 0) { + EAGER_THRESHOLD = INT64_MAX; + is_conc_enabled = true; + } + jobmap_ctx = NULL; // make sure it's NULL if it's not used @@ -3339,13 +4107,15 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) tw_error(TW_LOC, "\n Could not open file %s ", workloads_conf_file); int i = 0; - char ref = '\n'; - while(!feof(name_file)) + char line[1024]; + while(fgets(line, sizeof(line), name_file)) { - //TODO: can we allow for a 2 item line but with defaults for the last two? - ref = fscanf(name_file, "%d %s %d %f", &num_traces_of_job[i], file_name_of_job[i], &qos_level_of_job[i], &mean_interval_of_job[i]); + int const fields = sscanf(line, "%d %s %d %f", &num_traces_of_job[i], file_name_of_job[i], &qos_level_of_job[i], &mean_interval_of_job[i]); + if(fields != 4) { + tw_error(TW_LOC, "Invalid format in %s at line %d: expected 4 fields, got %d", workloads_conf_file, i+1, fields); + } - if(ref != EOF && strncmp(file_name_of_job[i], "synthetic", 9) == 0) + if(strncmp(file_name_of_job[i], "synthetic", 9) == 0) { num_syn_clients = num_traces_of_job[i]; num_net_traces += num_traces_of_job[i]; @@ -3357,7 +4127,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) tw_error(TW_LOC, "BISECTION requires and even number of nodes."); } - else if(ref!=EOF) + else { if(enable_debug) printf("\n%d traces of app %s (default qos class: %d)\n", num_traces_of_job[i], file_name_of_job[i], qos_level_of_job[i]); @@ -3370,14 +4140,13 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) } i++; } - printf("\n num_net_traces %d; num_dumpi_traces %d", num_net_traces, num_dumpi_traces); + printf("\n num_net_traces %d; num_dumpi_traces %d\n", num_net_traces, num_dumpi_traces); fclose(name_file); assert(strlen(alloc_file) != 0); alloc_spec = 1; jobmap_p.alloc_file = alloc_file; jobmap_ctx = codes_jobmap_configure(CODES_JOBMAP_LIST, &jobmap_p); - if(strlen(workloads_timer_file) > 0){ FILE *timer_file = fopen(workloads_timer_file, "r"); if(!timer_file) @@ -3402,18 +4171,55 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) char ref2 = '\n'; while(!feof(period_file)) { + if (j >= MAX_JOBS && !g_tw_mynode) { + tw_error(TW_LOC, "Exceeded number of max workloads in workloads period file. Max: %d", MAX_JOBS); + } ref2 = fscanf(period_file, "%d", &period_count[j]); + if (period_count[j] > MAX_PERIODS_PER_APP && !g_tw_mynode) { + tw_error(TW_LOC, "Too many periods for workload app %d", period_count[j]); + } if(ref2 != EOF){ - printf("======== [ID: %d] Period count: %d\n", j, period_count[j]); + if (!g_tw_mynode) { + printf("======== [ID: %d] Period count: %d\n", j, period_count[j]); + } for(int k = 0; k < period_count[j]; k++){ - fscanf(period_file, "%ld:%f", &period_time[j][k], &period_interval[j][k]); - printf("======== [ID: %d] Period time and interval: %ld and %f\n", j, period_time[j][k], period_interval[j][k]); + fscanf(period_file, "%lf:%f", &period_time[j][k], &period_interval[j][k]); + if (!g_tw_mynode) { + printf("======== [ID: %d] Period time and interval: %lf and %f\n", j, period_time[j][k], period_interval[j][k]); + } } } j++; } fclose(period_file); } + + /* Load workload JSON files mapping if specified */ + if(is_conc_enabled && strlen(workload_json_files) > 0) + { + FILE *json_file = fopen(workload_json_files, "r"); + if(!json_file) + tw_error(TW_LOC, "\n Could not open file %s ", workload_json_files); + + workload_json_mapping_count = 0; + + while(!feof(json_file) && workload_json_mapping_count < MAX_JOBS) + { + if(fscanf(json_file, "%s %s", + workload_json_mappings[workload_json_mapping_count].workload_type, + workload_json_mappings[workload_json_mapping_count].json_path) == 2) + { + workload_json_mapping_count++; + } + } + fclose(json_file); + + if(enable_debug) + printf("\n Loaded %d workload JSON mappings\n", workload_json_mapping_count); + } + if(!is_conc_enabled && strlen(workload_json_files) > 0) { + printf("\n Conceptual online worloads will not run, thus, we won't read any json files from --workload_json_files\n"); + } } else { @@ -3438,7 +4244,6 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) jobmap_ctx = codes_jobmap_configure(CODES_JOBMAP_IDENTITY, &jobmap_ident_p); } - MPI_Comm_rank(MPI_COMM_CODES, &rank); MPI_Comm_size(MPI_COMM_CODES, &nprocs); @@ -3457,6 +4262,29 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) modelnet_mpi_replay_read_config(); + //Xin: output iteration time into log file + + char const iteration_dir[] = "iteration-logs"; + if (!g_tw_mynode) { + int ret = mkdir("iteration-logs", 0775); + if(ret != 0) + { + tw_error(TW_LOC, "mkdir(\"%s/\")", iteration_dir); + } + } + MPI_Barrier(MPI_COMM_CODES); + int buffer_size = snprintf(NULL, 0, "%s/pe=%d.txt", iteration_dir, g_tw_mynode) + 1; + char *iteration_log_path = malloc(buffer_size); + snprintf(iteration_log_path, buffer_size, "%s/pe=%d.txt", iteration_dir, g_tw_mynode); + iteration_log = fopen(iteration_log_path, "w+"); + free(iteration_log_path); + if(!iteration_log) + { + printf("\n Error logging iteration times... quitting "); + MPI_Finalize(); + return -1; + } + if(enable_debug) { workload_log = fopen("mpi-op-logs", "w+"); @@ -3537,8 +4365,13 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) int ret = lp_io_prepare(lp_io_dir, flags, &io_handle, MPI_COMM_CODES); assert(ret == 0 || !"lp_io_prepare failure"); } + + modelnet_mpi_replay_configure_app_surrogate(); + tw_run(); + fclose(iteration_log); //Xin + if(enable_debug) fclose(workload_log); @@ -3554,6 +4387,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) double total_avg_send_time, total_max_send_time; double total_avg_wait_time, total_max_wait_time; double total_avg_recv_time, total_max_recv_time; + double g_max_elapsed_time_per_job[MAX_JOBS]; double g_total_syn_data = 0; MPI_Reduce(&num_bytes_sent, &total_bytes_sent, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); @@ -3570,6 +4404,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) MPI_Reduce(&avg_wait_time, &total_avg_wait_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); MPI_Reduce(&avg_send_time, &total_avg_send_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); MPI_Reduce(&total_syn_data, &g_total_syn_data, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(max_elapsed_time_per_job, g_max_elapsed_time_per_job, num_total_jobs, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_CODES); assert(num_net_traces); @@ -3588,19 +4423,20 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) printf("Per App Max Elapsed Times:\n"); for(int i = 0; i < num_total_jobs; i++) { - printf("\tApp %d: %.4f\n",i,max_elapsed_time_per_job[i]); + printf("\tApp %d: %.4f\n",i,g_max_elapsed_time_per_job[i]); } printf("----------\n"); if(synthetic_pattern == PERMUTATION) printf("\n Threshold for random permutation %ld ", perm_switch_thresh); + + if(is_synthetic) + printf("\n Synthetic traffic stats: data received per proc %lf bytes \n", g_total_syn_data/num_syn_clients); } if (do_lp_io){ int ret = lp_io_flush(io_handle, MPI_COMM_CODES); assert(ret == 0 || !"lp_io_flush failure"); } - if(is_synthetic) - printf("\n PE%d: Synthetic traffic stats: data received per proc %lf bytes \n",rank, g_total_syn_data/num_syn_clients); model_net_report_stats(net_id); @@ -3611,6 +4447,9 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) if(alloc_spec) codes_jobmap_destroy(jobmap_ctx); + surrogates_finalize(); + print_surrogate_stats(); + #ifdef USE_RDAMARIS } // end if(g_st_ross_rank) #endif diff --git a/src/network-workloads/model-net-synthetic-dragonfly-all.c b/src/network-workloads/model-net-synthetic-dragonfly-all.c index b763accc..bcebafa4 100644 --- a/src/network-workloads/model-net-synthetic-dragonfly-all.c +++ b/src/network-workloads/model-net-synthetic-dragonfly-all.c @@ -381,15 +381,16 @@ static void handle_kickoff_event( char anno[MAX_NAME_LENGTH]; tw_lpid local_dest = -1, global_dest = -1; - svr_msg * m_local = malloc(sizeof(svr_msg)); - svr_msg * m_remote = malloc(sizeof(svr_msg)); + svr_msg m_local; + svr_msg m_remote; - m_local->svr_event_type = LOCAL; - m_local->src = lp->gid; - m_local->msg_start_time = tw_now(lp); + m_local.svr_event_type = LOCAL; + m_local.src = lp->gid; + m_local.msg_start_time = tw_now(lp); - memcpy(m_remote, m_local, sizeof(svr_msg)); - m_remote->svr_event_type = REMOTE; + m_remote.svr_event_type = REMOTE; + m_remote.src = lp->gid; + m_remote.msg_start_time = tw_now(lp); codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, anno, &rep_id, &offset); int local_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); @@ -459,7 +460,7 @@ static void handle_kickoff_event( ns->msg_sent_count++; ns->last_send_ts = tw_now(lp); - m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)m_remote, sizeof(svr_msg), (const void*)m_local, lp); + m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)&m_remote, sizeof(svr_msg), (const void*)&m_local, lp); issue_event(ns, lp); return; } diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c index d469fa10..1595f480 100644 --- a/src/networks/model-net/core/model-net-lp.c +++ b/src/networks/model-net/core/model-net-lp.c @@ -16,11 +16,14 @@ #define MN_NAME "model_net_base" #define DEBUG 0 +#define MODELNET_LP_DEBUG 0 /**** BEGIN SIMULATION DATA STRUCTURES ****/ int model_net_base_magic; int mn_sample_enabled = 0; +static int is_freezing_on = false; + // message-type specific offsets - don't want to get bitten later by alignment // issues... static int msg_offsets[MAX_NETS]; @@ -46,6 +49,9 @@ static int servers_per_node_queue = -1; extern tw_stime codes_cn_delay; typedef struct model_net_base_state { +#if MODELNET_LP_DEBUG + size_t num_events_processed; +#endif /* if MODELNET_LP_DEBUG */ int net_id, nics_per_router; // whether scheduler loop is running int *in_sched_send_loop, in_sched_recv_loop; @@ -63,6 +69,8 @@ typedef struct model_net_base_state { void *sub_state; tw_stime next_available_time; tw_stime *node_copy_next_available_time; + // Copy of in_sched_send_loop before switching to surrogate mode + int * sched_loop_pre_surrogate, sched_recv_loop_pre_surrogate; } model_net_base_state; @@ -127,13 +135,33 @@ tw_lptype model_net_base_lp = { sizeof(model_net_base_state), }; +// Functionality to check for correct implementation of reverse event handler +static void save_state_net_state(model_net_base_state * into, model_net_base_state const * from); +static void clean_state_net_state(model_net_base_state * state); +static bool check_model_net_state(model_net_base_state * before, model_net_base_state * after); +static void print_model_net_state(FILE * out, char const * prefix, model_net_base_state * state); +static void print_model_net_checkpoint(FILE * out, char const * prefix, model_net_base_state * state); +static void print_event_state(FILE * out, char const * prefix, model_net_base_state * s, model_net_wrap_msg * msg); + +// ROSS function pointer table to check reverse event handler +crv_checkpointer model_net_chkptr = { + &model_net_base_lp, + 0, + (save_checkpoint_state_f) save_state_net_state, + (clean_checkpoint_state_f) clean_state_net_state, + (check_states_f) check_model_net_state, + (print_lpstate_f) print_model_net_state, + (print_checkpoint_state_f) print_model_net_checkpoint, + (print_event_f) print_event_state, +}; + static void model_net_commit_event(model_net_base_state * ns, tw_bf *b, model_net_wrap_msg * m, tw_lp * lp) { if(m->h.event_type == MN_BASE_PASS) { void * sub_msg; sub_msg = ((char*)m)+msg_offsets[ns->net_id]; - + if(ns->sub_type->commit != NULL) ns->sub_type->commit(ns->sub_state, b, sub_msg, lp); } @@ -264,6 +292,7 @@ void model_net_base_register(int *do_config_nets){ } } } + crv_add_custom_state_checkpoint(&model_net_chkptr); } static void base_read_config(const char * anno, model_net_base_params *p){ @@ -504,6 +533,7 @@ void model_net_base_lp_init( } ns->in_sched_send_loop = (int *)malloc(ns->params->num_queues * sizeof(int)); + ns->sched_loop_pre_surrogate = (int *)malloc(ns->params->num_queues * sizeof(int)); ns->sched_send = (model_net_sched**)malloc(ns->params->num_queues * sizeof(model_net_sched*)); for(int i = 0; i < ns->params->num_queues; i++) { ns->sched_send[i] = (model_net_sched*)malloc(sizeof(model_net_sched)); @@ -565,12 +595,20 @@ void model_net_base_event( tw_bf * b, model_net_wrap_msg * m, tw_lp * lp){ + memset(b, 0, sizeof(tw_bf)); +#if MODELNET_LP_DEBUG + ns->num_events_processed++; +#endif /* if MODELNET_LP_DEBUG */ if(m->h.magic != model_net_base_magic) printf("\n LP ID mismatched %llu\n", LLU(lp->gid)); assert(m->h.magic == model_net_base_magic); + if(!is_freezing_on && m->h.event_type == MN_BASE_SCHED_NEXT && m->msg.m_base.created_in_surrogate) { + return; + } + void * sub_msg; switch (m->h.event_type){ case MN_BASE_NEW_MSG: @@ -613,6 +651,13 @@ void model_net_base_event_rc( model_net_wrap_msg * m, tw_lp * lp){ assert(m->h.magic == model_net_base_magic); +#if MODELNET_LP_DEBUG + ns->num_events_processed--; +#endif /* if MODELNET_LP_DEBUG */ + + if(!is_freezing_on && m->h.event_type == MN_BASE_SCHED_NEXT && m->msg.m_base.created_in_surrogate) { + return; + } void * sub_msg; switch (m->h.event_type){ @@ -755,7 +800,9 @@ void handle_new_msg( // simply pass down to the scheduler model_net_request *r = &m->msg.m_base.req; // don't forget to set packet size, now that we're responsible for it! + r->msg_new_mn_event = tw_now(lp); r->packet_size = ns->params->packet_size; + b->c30 = 1; r->msg_id = ns->msg_id++; void * m_data = m+1; void *remote = NULL, *local = NULL; @@ -849,6 +896,10 @@ void handle_new_msg_rc( *in_sched_loop = 0; } model_net_sched_add_rc(ss, &m->msg.m_base.rc, lp); + + if (b->c30) { + ns->msg_id--; + } } /// bitfields used @@ -917,6 +968,26 @@ void handle_sched_next_rc( /**** END IMPLEMENTATIONS ****/ +tw_event * model_net_method_event_new_user_prio( + tw_lpid dest_gid, + tw_stime offset_ts, + tw_lp *sender, + int net_id, + void **msg_data, + void **extra_data, + tw_stime prio){ + tw_event *e = tw_event_new_user_prio(dest_gid, offset_ts, sender, prio); + model_net_wrap_msg *m_wrap = tw_event_data(e); + msg_set_header(model_net_base_magic, MN_BASE_PASS, sender->gid, + &m_wrap->h); + *msg_data = ((char*)m_wrap)+msg_offsets[net_id]; + // extra_data is optional + if (extra_data != NULL){ + *extra_data = m_wrap + 1; + } + return e; +} + tw_event * model_net_method_event_new( tw_lpid dest_gid, tw_stime offset_ts, @@ -1008,6 +1079,7 @@ void model_net_method_idle_event2(tw_stime offset_ts, int is_recv_queue, &m_wrap->h); m_wrap->msg.m_base.is_from_remote = is_recv_queue; r_wrap->queue_offset = queue_offset; + m_wrap->msg.m_base.created_in_surrogate = is_freezing_on; tw_event_send(e); } @@ -1081,6 +1153,371 @@ tw_event* model_net_method_congestion_event(tw_lpid dest_gid, } +/* START Checking reverse handler functionality */ +static void save_state_net_state(model_net_base_state * into, model_net_base_state const * from) { + memcpy(into, from, sizeof(model_net_base_state)); + + into->in_sched_send_loop = malloc(from->params->num_queues * sizeof(int)); + for (int i=0; i < from->params->num_queues; i++) { + into->in_sched_send_loop[i] = from->in_sched_send_loop[i]; + } + + into->sched_send = malloc(from->params->num_queues * sizeof(model_net_sched*)); + if (from->params->num_queues > 0) { + model_net_sched * sched_send_array = malloc(from->params->num_queues * sizeof(model_net_sched)); + for(int i = 0; i < from->params->num_queues; i++) { + into->sched_send[i] = &sched_send_array[i]; + save_model_net_sched(into->sched_send[i], from->sched_send[i]); + } + } + + into->sched_recv = malloc(sizeof(model_net_sched)); + save_model_net_sched(into->sched_recv, from->sched_recv); + + into->sub_state = NULL; + crv_checkpointer * chptr = method_array[from->net_id]->checkpointer; + if (chptr && chptr->save_lp) { + into->sub_state = calloc(1, from->sub_type->state_sz); + chptr->save_lp(into->sub_state, from->sub_state); + } + + into->node_copy_next_available_time = malloc(from->params->node_copy_queues * sizeof(tw_stime)); + for (int i=0; i < from->params->node_copy_queues; i++) { + into->node_copy_next_available_time[i] = from->node_copy_next_available_time[i]; + } +} + +static void clean_state_net_state(model_net_base_state * state) { + free(state->in_sched_send_loop); + + if (state->params->num_queues > 0) { + for(int i = 0; i < state->params->num_queues; i++) { + clean_model_net_sched(state->sched_send[i]); + } + } + free(state->sched_send[0]); + free(state->sched_send); + clean_model_net_sched(state->sched_recv); + free(state->sched_recv); + + if (state->sub_state != NULL) { + crv_checkpointer * chptr = method_array[state->net_id]->checkpointer; + if (chptr && chptr->clean_lp) { + chptr->clean_lp(state->sub_state); + } + free(state->sub_state); + } + free(state->node_copy_next_available_time); +} + +static bool warned_no_lp_checking_defined[MAX_NETS]; + +static bool check_model_net_state(model_net_base_state * before, model_net_base_state * after) { + bool is_same = true; + is_same &= before->net_id == after->net_id; + is_same &= before->nics_per_router == after->nics_per_router; + for (int i=0; i < before->params->num_queues; i++) { + is_same &= before->in_sched_send_loop[i] == after->in_sched_send_loop[i]; + } + is_same &= before->in_sched_recv_loop == after->in_sched_recv_loop; + is_same &= before->msg_id == after->msg_id; + for(int i = 0; i < before->params->num_queues; i++) { + is_same &= check_model_net_sched(before->sched_send[i], after->sched_send[i]); + } + is_same &= check_model_net_sched(before->sched_recv, after->sched_recv); + crv_checkpointer * chptr = method_array[before->net_id]->checkpointer; + if (chptr && before->sub_state != NULL && chptr->check_lps) { + is_same &= chptr->check_lps(before->sub_state, after->sub_state); + // Warning once that checking for LP subtype has not been fully implemented + } else if (!warned_no_lp_checking_defined[before->net_id]) { + fprintf(stderr, "Warning: Network of type \"%s\" has not been fully configured to be checkpointed (Running this model under SEQUENTIAL_ROLLBACK_CHECK won't capture any issues that arise from the reverse event handlers).\n", model_net_method_names[before->net_id]); + warned_no_lp_checking_defined[before->net_id] = true; + } + is_same &= before->next_available_time == after->next_available_time; + for (int i=0; i < before->params->node_copy_queues; i++) { + is_same &= before->node_copy_next_available_time[i] == after->node_copy_next_available_time[i]; + } + + return is_same; +} + +static void __print_model_net(FILE * out, char const * prefix, model_net_base_state * state, bool is_lp_state) { + fprintf(out, "%smodel_net_state ->\n", prefix); +#if MODELNET_LP_DEBUG + fprintf(out, "%s |num_events_processed = %zu\n", prefix, state->num_events_processed); +#endif /* if MODEL%sNET_LP_DEBUG */ + + void (*print_modelnet) (FILE *, char const *, model_net_sched *) = is_lp_state ? print_model_net_sched : print_model_net_sched_checkpoint; + + fprintf(out, "%s | net_id = %d\n", prefix, state->net_id); + fprintf(out, "%s | nics_per_router = %d\n", prefix, state->nics_per_router); + fprintf(out, "%s | *in_sched_send_loop[%d] = [", prefix, state->params->num_queues); // deep-all + for (int i=0; i < state->params->num_queues; i++) { + fprintf(out, "%d%s", state->in_sched_send_loop[i], i==state->params->num_queues-1 ? "" : ", "); + } + fprintf(out, "]\n"); + fprintf(out, "%s | in_sched_recv_loop = %d\n", prefix, state->in_sched_recv_loop); + fprintf(out, "%s | msg_id = %lu\n", prefix, state->msg_id); + fprintf(out, "%s | ** sched_send = %p\n", prefix, state->sched_send); // deep-all + // + int len_subprefix = snprintf(NULL, 0, "%s | | ", prefix) + 1; + char subprefix[len_subprefix]; + snprintf(subprefix, len_subprefix, "%s | | ", prefix); + for(int i = 0; i < state->params->num_queues; i++) { + fprintf(out, "%ssched_send[%d]:\n", subprefix, i); + print_modelnet(out, subprefix, state->sched_send[i]); + } + // + fprintf(out, "%s | * sched_recv = %p\n", prefix, state->sched_recv); // deep-all + print_modelnet(out, subprefix, state->sched_recv); + fprintf(out, "%s | * params = %p\n", prefix, state->params); + fprintf(out, "%s | * sub_type = %p\n", prefix, state->sub_type); + fprintf(out, "%s | * sub_model_type = %p\n", prefix, state->sub_model_type); + fprintf(out, "%s | * sub_state = %p\n", prefix, state->sub_state); // deep-all + // + crv_checkpointer * chptr = method_array[state->net_id]->checkpointer; + if (chptr && state->sub_state != NULL) { + if (is_lp_state && chptr->print_lp) { + chptr->print_lp(out, subprefix, state->sub_state); + } + if (!is_lp_state && chptr->print_checkpoint) { + chptr->print_checkpoint(out, subprefix, state->sub_state); + } + } + // + fprintf(out, "%s | next_available_time = %f\n", prefix, state->next_available_time); + fprintf(out, "%s | *node_copy_next_available_time[%d] = [", prefix, state->params->num_queues); // (done) deep-all + for (int i=0; i < state->params->node_copy_queues; i++) { + fprintf(out, "%g%s", state->node_copy_next_available_time[i], i==state->params->node_copy_queues-1 ? "" : ", "); + } + fprintf(out, "]\n"); + fprintf(out, "%s | *sched_loop_pre_surrogate = %p\n", prefix, state->sched_loop_pre_surrogate); // no need to check + fprintf(out, "%s | sched_recv_loop_pre_surrogate = %d\n", prefix, state->sched_recv_loop_pre_surrogate); // no need to check +} + +static void print_model_net_state(FILE * out, char const * prefix, model_net_base_state * state) { + __print_model_net(out, prefix, state, true); +} +static void print_model_net_checkpoint(FILE * out, char const * prefix, model_net_base_state * state) { + __print_model_net(out, prefix, state, false); +} + +static char const * const event_type_string(enum model_net_base_event_type type) { + switch (type) { + case MN_BASE_NEW_MSG: return "MN_BASE_NEW_MSG"; + case MN_BASE_SCHED_NEXT: return "MN_BASE_SCHED_NEXT"; + case MN_BASE_SAMPLE: return "MN_BASE_SAMPLE"; + case MN_BASE_PASS: return "MN_BASE_PASS"; + case MN_BASE_END_NOTIF: return "MN_BASE_END_NOTIF"; + case MN_CONGESTION_EVENT: return "MN_CONGESTION_EVENT"; + } + return "UNKNOWN TYPE!!"; +} + +// Used Claude for an initial draft of this function +bool check_model_net_request(model_net_request const * before, model_net_request const * after) { + bool is_same = true; + + is_same &= (before->final_dest_lp == after->final_dest_lp); + is_same &= (before->dest_mn_lp == after->dest_mn_lp); + is_same &= (before->src_lp == after->src_lp); + is_same &= (before->msg_start_time == after->msg_start_time); + is_same &= (before->msg_new_mn_event == after->msg_new_mn_event); + is_same &= (before->msg_size == after->msg_size); + is_same &= (before->pull_size == after->pull_size); + is_same &= (before->packet_size == after->packet_size); + is_same &= (before->msg_id == after->msg_id); + is_same &= (before->net_id == after->net_id); + is_same &= (before->is_pull == after->is_pull); + is_same &= (before->queue_offset == after->queue_offset); + is_same &= (before->remote_event_size == after->remote_event_size); + is_same &= (before->self_event_size == after->self_event_size); + is_same &= (before->app_id == after->app_id); + is_same &= (strncmp(before->category, after->category, CATEGORY_NAME_MAX) == 0); + + return is_same; +} + +void print_model_net_request(FILE * out, char const * prefix, model_net_request * req) { + fprintf(out, "%sfinal_dest_lp = %ld\n", prefix, req->final_dest_lp); + fprintf(out, "%sdest_mn_lp = %ld\n", prefix, req->dest_mn_lp); + fprintf(out, "%ssrc_lp = %ld\n", prefix, req->src_lp); + fprintf(out, "%smsg_start_time = %f\n", prefix, req->msg_start_time); + fprintf(out, "%smsg_new_mn_event = %f\n", prefix, req->msg_new_mn_event); + fprintf(out, "%smsg_size = %ld\n", prefix, req->msg_size); + fprintf(out, "%spull_size = %ld\n", prefix, req->pull_size); + fprintf(out, "%spacket_size = %ld\n", prefix, req->packet_size); + fprintf(out, "%smsg_id = %ld\n", prefix, req->msg_id); + fprintf(out, "%snet_id = %d\n", prefix, req->net_id); + fprintf(out, "%sis_pull = %d\n", prefix, req->is_pull); + fprintf(out, "%squeue_offset = %d\n", prefix, req->queue_offset); + fprintf(out, "%sremote_event_size = %d\n", prefix, req->remote_event_size); + fprintf(out, "%sself_event_size = %d\n", prefix, req->self_event_size); + fprintf(out, "%scategory = '%s'\n", prefix, req->category); + fprintf(out, "%sapp_id = %d\n", prefix, req->app_id); +} + +bool check_mn_stats(struct mn_stats const * before, struct mn_stats const * after) { + bool is_same = true; + + is_same &= (strncmp(before->category, after->category, CATEGORY_NAME_MAX) == 0); + is_same &= (before->send_count == after->send_count); + is_same &= (before->send_bytes == after->send_bytes); + is_same &= (before->send_time == after->send_time); + is_same &= (before->recv_count == after->recv_count); + is_same &= (before->recv_bytes == after->recv_bytes); + is_same &= (before->recv_time == after->recv_time); + is_same &= (before->max_event_size == after->max_event_size); + + return is_same; +} + +void print_mn_stats(FILE * out, char const * prefix, struct mn_stats * req) { + fprintf(out, "%scategory = '%s'\n", prefix, req->category); + fprintf(out, "%ssend_count = %ld\n", prefix, req->send_count); + fprintf(out, "%ssend_bytes = %ld\n", prefix, req->send_bytes); + fprintf(out, "%ssend_time = %g\n", prefix, req->send_time); + fprintf(out, "%srecv_count = %ld\n", prefix, req->recv_count); + fprintf(out, "%srecv_bytes = %ld\n", prefix, req->recv_bytes); + fprintf(out, "%srecv_time = %g\n", prefix, req->recv_time); + fprintf(out, "%smax_event_size = %ld\n", prefix, req->max_event_size); +} + +static void print_event_state(FILE * out, char const * prefix, model_net_base_state * state, model_net_wrap_msg * msg) { + fprintf(out, "%sh\n", prefix); + fprintf(out, "%s| src = %lu\n", prefix, msg->h.src); + fprintf(out, "%s| event_type = %d (%s)\n", prefix, msg->h.event_type, event_type_string(msg->h.event_type)); + fprintf(out, "%s| magic = %d\n", prefix, msg->h.magic); + + char addprefix[] = " | | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1; + char subprefix[len_subprefix]; + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix); + + char addprefix_2[] = " | | | "; + len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1; + char subprefix_2[len_subprefix]; + snprintf(subprefix_2, len_subprefix, "%s%s", prefix, addprefix_2); + + crv_checkpointer * chptr; + void * sub_msg; + switch (msg->h.event_type) { + case MN_BASE_NEW_MSG: + case MN_BASE_SCHED_NEXT: + // We can check m_base values + fprintf(out, "%sm_base\n", prefix); + fprintf(out, "%s | req\n", prefix); + print_model_net_request(out, subprefix, &msg->msg.m_base.req); + fprintf(out, "%s | is_from_remote = %d\n", prefix, msg->msg.m_base.is_from_remote); + fprintf(out, "%s | isQueueReq = %d\n", prefix, msg->msg.m_base.isQueueReq); + fprintf(out, "%s | save_ts = %f\n", prefix, msg->msg.m_base.save_ts); + fprintf(out, "%s | sched_params.prio = %d\n", prefix, msg->msg.m_base.sched_params.prio); + fprintf(out, "%s | rc\n", prefix); + fprintf(out, "%s | | req\n", prefix); + print_model_net_request(out, subprefix_2, &msg->msg.m_base.rc.req); + fprintf(out, "%s | | sched_params.prio = %d\n", prefix, msg->msg.m_base.rc.sched_params.prio); + fprintf(out, "%s | | rtn = %d\n", prefix, msg->msg.m_base.rc.rtn); + fprintf(out, "%s | | prio = %d\n", prefix, msg->msg.m_base.rc.prio); + fprintf(out, "%s | created_in_surrogate = %d\n", prefix, msg->msg.m_base.created_in_surrogate); + break; + + case MN_BASE_SAMPLE: + case MN_BASE_PASS: + case MN_BASE_END_NOTIF: + // printing sub_msg + fprintf(out, "%ssub_msg ->\n", prefix); + chptr = method_array[state->net_id]->checkpointer; + sub_msg = ((char*)msg)+msg_offsets[state->net_id]; + if (chptr && chptr->print_event) { + char addprefix[] = " | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1; + char subprefix[len_subprefix]; + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix); + chptr->print_event(out, subprefix, state->sub_state, sub_msg); + } else { + fprintf(out, "%s | == cannot print the submessage (event print function not yet defined for network of type %s) ==\n", prefix, model_net_method_names[state->net_id]); + } + break; + + case MN_CONGESTION_EVENT: + // Nothing to print + break; + } +} + +/* END checking reverse handler functionality */ + +void model_net_method_switch_to_surrogate(void) { + is_freezing_on = true; +} + +void model_net_method_switch_to_highdef(void) { + is_freezing_on = false; +} + +void model_net_method_switch_to_surrogate_lp(tw_lp * lp) { + model_net_base_state * const ns = (model_net_base_state*) lp->cur_state; + + //printf("PID %d in_sched_send_loop = [", lp->gid); + for (int i = 0; i < ns->params->num_queues; i++) { + //printf("%d ", ns->in_sched_send_loop[i]); + ns->sched_loop_pre_surrogate[i] = ns->in_sched_send_loop[i]; + // scheduling an idle event to prevent getting stuck in the middle of a scheduling loop + if (ns->sched_loop_pre_surrogate[i]) { // <- this can be more finely tuned + // TODO: change zero-offset event for something a bit more sensible + model_net_method_idle_event2(0.0, 0, i, lp); + } + //ns->in_sched_send_loop[i] = 0; + } + //printf("]\n"); + + ns->sched_recv_loop_pre_surrogate = ns->in_sched_recv_loop; + if (ns->in_sched_recv_loop) { + model_net_method_idle_event(0.0, 1, lp); + } + //ns->in_sched_recv_loop = 0; +} + +void model_net_method_switch_to_highdef_lp(tw_lp * lp) { + model_net_base_state * const ns = (model_net_base_state*) lp->cur_state; + + //printf("PID %d in_sched_send_loop = [", lp->gid); + for (int i = 0; i < ns->params->num_queues; i++) { + //printf("%d ", ns->in_sched_send_loop[i]); + // We have to duplicate an idle event that was produced in surrogate-mode, but not yet processed by the time we switch to high-def again, if that event was in the middle of the loop (asking for the next packet to inject) and in no other case + // TODO: Not all LPs need an event like this! + if (ns->sched_loop_pre_surrogate[i] == 0 && ns->in_sched_send_loop[i] == 1) { + model_net_method_idle_event2(0.0, 0, i, lp); + } + ns->in_sched_send_loop[i] |= ns->sched_loop_pre_surrogate[i]; + } + + if (ns->sched_recv_loop_pre_surrogate == 0 && ns->in_sched_recv_loop == 1) { + model_net_method_idle_event(0.0, 1, lp); + } + ns->in_sched_recv_loop |= ns->sched_recv_loop_pre_surrogate; +} + +void model_net_method_call_inner(tw_lp * lp, void (*fun) (void * inner, tw_lp * lp, void * data), void * data) { + model_net_base_state * const ns = (model_net_base_state*) lp->cur_state; + + fun(ns->sub_state, lp, data); +} + +int model_net_get_event_type_lp(model_net_wrap_msg * msg) { + return msg->h.event_type; +} + +void * model_net_method_msg_from_tw_event(tw_lp * lp, model_net_wrap_msg * msg) { + model_net_base_state * const ns = (model_net_base_state*) lp->cur_state; + + if (msg->h.event_type & MN_BASE_PASS) { // grab sub message + void * const sub_msg = ((char*)msg)+msg_offsets[ns->net_id]; + return sub_msg; + } + return NULL; +} + /* * Local variables: * c-indent-level: 4 diff --git a/src/networks/model-net/core/model-net-sched-impl.c b/src/networks/model-net/core/model-net-sched-impl.c index 7bcf4d16..ffe71d7a 100644 --- a/src/networks/model-net/core/model-net-sched-impl.c +++ b/src/networks/model-net/core/model-net-sched-impl.c @@ -20,15 +20,14 @@ if (MN_SCHED_DEBUG_VERBOSE) printf(_fmt, ##__VA_ARGS__); \ } while(0) -/// scheduler-specific data structures +/// scheduler-specific data structures typedef struct mn_sched_qitem { model_net_request req; mn_sched_params sched_params; // remaining bytes to send uint64_t rem; - tw_stime entry_time; - // pointers to event structures + // pointers to event structures // sizes are given in the request struct void * remote_event; void * local_event; @@ -56,7 +55,7 @@ typedef struct mn_sched_prio { /// FCFS // void used to avoid ptr-to-ptr conv warnings static void fcfs_init ( - const struct model_net_method * method, + const struct model_net_method * method, const model_net_sched_cfg_params * params, int is_recv_queue, void ** sched); @@ -83,10 +82,14 @@ static void fcfs_next_rc( const void * rc_event_save, const model_net_sched_rc * rc, tw_lp * lp); +static void save_state_fcfs_state(mn_sched_queue * into, mn_sched_queue const * from); +static void clean_state_fcfs_state(mn_sched_queue * into); +static bool check_fcfs_state(mn_sched_queue *before, mn_sched_queue *after); +static void print_fcfs_state(FILE * out, char const * prefix, mn_sched_queue *sched); // ROUND-ROBIN static void rr_init ( - const struct model_net_method * method, + const struct model_net_method * method, const model_net_sched_cfg_params * params, int is_recv_queue, void ** sched); @@ -114,7 +117,7 @@ static void rr_next_rc ( const model_net_sched_rc * rc, tw_lp * lp); static void prio_init ( - const struct model_net_method * method, + const struct model_net_method * method, const model_net_sched_cfg_params * params, int is_recv_queue, void ** sched); @@ -143,23 +146,40 @@ static void prio_next_rc ( tw_lp * lp); /// function tables (names defined by X macro in model-net-sched.h) -static const model_net_sched_interface fcfs_tab = +static const model_net_sched_interface fcfs_tab = { &fcfs_init, &fcfs_destroy, &fcfs_add, &fcfs_add_rc, &fcfs_next, &fcfs_next_rc}; -static const model_net_sched_interface rr_tab = +static const model_net_sched_interface rr_tab = { &rr_init, &rr_destroy, &rr_add, &rr_add_rc, &rr_next, &rr_next_rc}; static const model_net_sched_interface prio_tab = { &prio_init, &prio_destroy, &prio_add, &prio_add_rc, &prio_next, &prio_next_rc}; -#define X(a,b,c) c, +static const crv_checkpointer fcfs_chptr = { + NULL, + sizeof(mn_sched_queue), + (save_checkpoint_state_f) save_state_fcfs_state, + (clean_checkpoint_state_f) clean_state_fcfs_state, + (check_states_f) check_fcfs_state, + (print_lpstate_f) print_fcfs_state, + (print_checkpoint_state_f) print_fcfs_state, + NULL, +}; + +#define X(a,b,c,d) c, const model_net_sched_interface * sched_interfaces[] = { SCHEDULER_TYPES }; #undef X -/// FCFS implementation +#define X(a,b,c,d) d, +const crv_checkpointer * sched_checkpointers[] = { + SCHEDULER_TYPES +}; +#undef X + +/// FCFS implementation void fcfs_init( - const struct model_net_method * method, + const struct model_net_method * method, const model_net_sched_cfg_params * params, int is_recv_queue, void ** sched){ @@ -188,15 +208,16 @@ void fcfs_add ( tw_lp * lp){ (void)rc; // unneeded for fcfs mn_sched_qitem *q = malloc(sizeof(mn_sched_qitem)); - q->entry_time = tw_now(lp); q->req = *req; q->sched_params = *sched_params; q->rem = req->msg_size; + assert(req->remote_event_size == remote_event_size); if (remote_event_size > 0){ q->remote_event = malloc(remote_event_size); memcpy(q->remote_event, remote_event, remote_event_size); } else { q->remote_event = NULL; } + assert(req->self_event_size == local_event_size); if (local_event_size > 0){ q->local_event = malloc(local_event_size); memcpy(q->local_event, local_event, local_event_size); @@ -219,7 +240,7 @@ void fcfs_add_rc(void *sched, const model_net_sched_rc *rc, tw_lp *lp){ mn_sched_qitem *q = qlist_entry(ent, mn_sched_qitem, ql); dprintf("%llu (mn): rc adding request from %llu to %llu\n", LLU(lp->gid), LLU(q->req.src_lp), LLU(q->req.final_dest_lp)); - // free'ing NULLs is a no-op + // free'ing NULLs is a no-op free(q->remote_event); free(q->local_event); free(q); @@ -251,6 +272,8 @@ int fcfs_next( is_last_packet = 0; } + bool const is_there_another_pckt_in_queue = !is_last_packet || s->queue_len > 1; + if (s->is_recv_queue){ dprintf("%llu (mn): receiving message of size %llu (of %llu) " "from %llu to %llu at %1.5e (last:%d)\n", @@ -270,7 +293,8 @@ int fcfs_next( LLU(q->req.final_dest_lp), tw_now(lp), is_last_packet); *poffset = s->method->model_net_method_packet_event(&q->req, q->req.msg_size - q->rem, psize, 0.0, &q->sched_params, - q->remote_event, q->local_event, lp, is_last_packet); + q->remote_event, q->local_event, lp, is_last_packet, + is_there_another_pckt_in_queue); } // if last packet - remove from list, free, save for rc @@ -361,8 +385,114 @@ void fcfs_next_rc( } } +static void save_mn_sched_qitem(mn_sched_qitem * into, mn_sched_qitem const * from) { + into->req = from->req; + into->sched_params = from->sched_params; + into->rem = from->rem; + into->remote_event = NULL; + into->local_event = NULL; + if (from->remote_event != NULL) { + assert(from->req.remote_event_size > 0); + into->remote_event = malloc(from->req.remote_event_size); + memcpy(into->remote_event, from->remote_event, from->req.remote_event_size); + } + if (from->local_event != NULL) { + assert(from->req.self_event_size > 0); + into->local_event = malloc(from->req.self_event_size); + memcpy(into->local_event, from->local_event, from->req.self_event_size); + } +} + +static void save_state_fcfs_state(mn_sched_queue * into, mn_sched_queue const * from) { + into->method = from->method; + into->is_recv_queue = from->is_recv_queue; + into->queue_len = from->queue_len; + INIT_QLIST_HEAD(&into->reqs); + + mn_sched_qitem * sched_qitem = NULL; + qlist_for_each_entry(sched_qitem, &from->reqs, ql) { + mn_sched_qitem * new_sched_qitem = malloc(sizeof(mn_sched_qitem)); + save_mn_sched_qitem(new_sched_qitem, sched_qitem); + qlist_add_tail(&new_sched_qitem->ql, &into->reqs); + } +} + +static void clean_mn_sched_qitem(mn_sched_qitem * into) { + if (into->remote_event != NULL) { + free(into->remote_event); + } + if (into->local_event != NULL) { + free(into->local_event); + } +} + +static void clean_state_fcfs_state(mn_sched_queue * into) { + mn_sched_qitem * sched_qitem = NULL; + mn_sched_qitem * _ = NULL; + qlist_for_each_entry_safe(sched_qitem, _, &into->reqs, ql) { + clean_mn_sched_qitem(sched_qitem); + qlist_del(&sched_qitem->ql); + free(sched_qitem); + } +} + +static bool check_mn_sched_qitem(mn_sched_qitem * before, mn_sched_qitem * after) { + bool is_same = true; + + is_same &= check_model_net_request(&before->req, &after->req); + is_same &= before->sched_params.prio == after->sched_params.prio; + is_same &= before->rem == after->rem; + is_same &= !memcmp(before->remote_event, after->remote_event, before->req.remote_event_size); + is_same &= !memcmp(before->local_event, after->local_event, before->req.self_event_size); + return is_same; +} + +static bool check_fcfs_state(mn_sched_queue * before, mn_sched_queue * after) { + bool is_same = true; + + is_same &= before->is_recv_queue == after->is_recv_queue; + is_same &= before->queue_len == after->queue_len; + + if (qlist_count(&before->reqs) != qlist_count(&before->reqs)) { + return false; + } + + is_same &= are_qlist_equal(&before->reqs, &after->reqs, QLIST_OFFSET(mn_sched_qitem, ql), (bool (*) (void *, void *)) check_mn_sched_qitem); + + return is_same; +} + +static void print_mn_sched_qitem(FILE * out, char const * prefix, mn_sched_qitem * item) { + int len_subprefix = snprintf(NULL, 0, "%s | | ", prefix) + 1; + char subprefix[len_subprefix]; + snprintf(subprefix, len_subprefix, "%s | | ", prefix); + + fprintf(out, "%s mn_sched_qitem\n", prefix); + fprintf(out, "%s | .req\n", prefix); + print_model_net_request(out, subprefix, &item->req); + fprintf(out, "%s | sched_params.prio = %d\n", prefix, item->sched_params.prio); + fprintf(out, "%s | rem = %lu\n", prefix, item->rem); + fprintf(out, "%s | remote_event = %p (contents below)\n", prefix, item->remote_event); + tw_fprint_binary_array(out, subprefix, item->remote_event, item->req.remote_event_size); + fprintf(out, "%s | local_event = %p (contents below)\n", prefix, item->local_event); + tw_fprint_binary_array(out, subprefix, item->local_event, item->req.self_event_size); +} + +static void print_fcfs_state(FILE * out, char const * prefix, mn_sched_queue *sched) { + fprintf(out, "%sFCFS:\n", prefix); + fprintf(out, "%s | .method = %p\n", prefix, sched->method); + fprintf(out, "%s | .is_recv_queue = %d\n", prefix, sched->is_recv_queue); + fprintf(out, "%s | .queue_len = %d\n", prefix, sched->queue_len); + fprintf(out, "%s | .reqs[%d] = {\n", prefix, qlist_count(&sched->reqs)); + mn_sched_qitem * sched_qitem = NULL; + qlist_for_each_entry(sched_qitem, &sched->reqs, ql) { + print_mn_sched_qitem(out, prefix, sched_qitem); + } + fprintf(out, "%s | }\n", prefix); +} + void rr_init ( - const struct model_net_method * method, + const struct model_net_method * method, const model_net_sched_cfg_params * params, int is_recv_queue, void ** sched){ @@ -427,7 +557,7 @@ void rr_next_rc ( } void prio_init ( - const struct model_net_method * method, + const struct model_net_method * method, const model_net_sched_cfg_params * params, int is_recv_queue, void ** sched){ @@ -465,7 +595,7 @@ void prio_add ( mn_sched_prio *ss = sched; int prio = sched_params->prio; if (prio == -1){ - // default prio - lowest possible + // default prio - lowest possible prio = ss->params.num_prios-1; } else if (prio >= ss->params.num_prios){ @@ -504,7 +634,7 @@ int prio_next( } } rc->prio = -1; - return -1; // all sub schedulers had no work + return -1; // all sub schedulers had no work } void prio_next_rc ( diff --git a/src/networks/model-net/core/model-net-sched.c b/src/networks/model-net/core/model-net-sched.c index ca31659a..4868fcbf 100644 --- a/src/networks/model-net/core/model-net-sched.c +++ b/src/networks/model-net/core/model-net-sched.c @@ -14,7 +14,7 @@ #include "codes/model-net-sched-impl.h" #include "codes/quicklist.h" -#define X(a,b,c) b, +#define X(a,b,c,d) b, char * sched_names [] = { SCHEDULER_TYPES }; @@ -80,6 +80,72 @@ void model_net_sched_set_default_params(mn_sched_params *sched_params){ sched_params->prio = -1; } +/* START Checking reverse handler functionality */ +void save_model_net_sched(model_net_sched *into, model_net_sched const *from) { + into->type = from->type; + + into->dat = NULL; + crv_checkpointer const * chptr = sched_checkpointers[from->type]; + if (chptr && chptr->save_lp) { + into->dat = malloc(chptr->sz_storage); + chptr->save_lp(into->dat, from->dat); + } +} + +void clean_model_net_sched(model_net_sched *state) { + if (state->dat) { + crv_checkpointer const * chptr = sched_checkpointers[state->type]; + assert (chptr && chptr->clean_lp); + chptr->clean_lp(state->dat); + free(state->dat); + } +} + +bool check_model_net_sched( + model_net_sched *before, + model_net_sched *after +) { + crv_checkpointer const * chptr = sched_checkpointers[before->type]; + if (before->dat != NULL && chptr && chptr->check_lps) { + return chptr->check_lps(before->dat, after->dat); + } + tw_error(TW_LOC, "Scheduler of type \"%s\" has not been configured to be checkpointed", sched_names[before->type]); + return false; +} + +static void __print_model_net_sched( + FILE * out, + char const * prefix, + model_net_sched *sched, + bool is_lp_state +) { + crv_checkpointer const * chptr = sched_checkpointers[sched->type]; + fprintf(out, "%smodel_net_sched.sched_type = %d\n", prefix, sched->type); + fprintf(out, "%smodel_net_sched.dat = %p\n", prefix, sched->dat); + + int len_subprefix = snprintf(NULL, 0, "%s | ", prefix) + 1; + char subprefix[len_subprefix]; + snprintf(subprefix, len_subprefix, "%s | ", prefix); + + if (chptr) { + if (is_lp_state && chptr->print_lp) { + chptr->print_lp(out, subprefix, sched->dat); + } + if (!is_lp_state && chptr->print_checkpoint) { + chptr->print_checkpoint(out, subprefix, sched->dat); + } + } +} + +void print_model_net_sched(FILE * out, char const * prefix, model_net_sched *sched) { + __print_model_net_sched(out, prefix, sched, true); +} + +void print_model_net_sched_checkpoint(FILE * out, char const * prefix, model_net_sched *sched) { + __print_model_net_sched(out, prefix, sched, false); +} +/* STOP Checking reverse handler functionality */ + /* * Local variables: * c-indent-level: 4 diff --git a/src/networks/model-net/core/model-net.c b/src/networks/model-net/core/model-net.c index 5ee14979..51c0e9ba 100644 --- a/src/networks/model-net/core/model-net.c +++ b/src/networks/model-net/core/model-net.c @@ -325,7 +325,7 @@ static model_net_event_return model_net_event_impl_base( void const * self_event, tw_lp *sender) { - + if (remote_event_size + self_event_size + sizeof(model_net_wrap_msg) > g_tw_msg_sz){ tw_error(TW_LOC, "Error: model_net trying to transmit an event of size " @@ -378,6 +378,7 @@ static model_net_event_return model_net_event_impl_base( strncpy(r->category, category, CATEGORY_NAME_MAX-1); r->category[CATEGORY_NAME_MAX-1]='\0'; + if (is_msg_params_set[MN_MSG_PARAM_START_TIME]) r->msg_start_time = start_time_param; else @@ -386,10 +387,11 @@ static model_net_event_return model_net_event_impl_base( if (congestion_control_is_jobmap_set()) { //perhaps make jobmap a global set regardless of congestion control struct codes_jobmap_ctx *ctx; ctx = congestion_control_get_jobmap(); - struct codes_jobmap_id jid; + struct codes_jobmap_id jid; jid = codes_jobmap_to_local_id(codes_mapping_get_lp_relative_id(sender->gid, 0, 0), ctx); r->app_id = jid.job; } + // this is an outgoing message m->msg.m_base.is_from_remote = 0; m->msg.m_base.isQueueReq = 1; diff --git a/src/networks/model-net/dragonfly-custom.C b/src/networks/model-net/dragonfly-custom.C index a2deb05b..cf7bf21b 100644 --- a/src/networks/model-net/dragonfly-custom.C +++ b/src/networks/model-net/dragonfly-custom.C @@ -179,6 +179,13 @@ struct dragonfly_param double global_credit_delay; double cn_credit_delay; double router_delay; + + //Xin: parameters for message counters of apps + int counting_bool; + tw_stime counting_start; + tw_stime counting_interval; + int counting_windows; + int num_apps; }; struct dfly_hash_key @@ -369,12 +376,19 @@ struct router_state char output_buf[4096]; char output_buf2[4096]; + //Xin: buffer for output data + char output_buf5[4096]; + char output_buf6[4096]; struct dfly_router_sample * rsamples; long fwd_events; long rev_events; + //Xin: msg couters for apps + tw_stime **agg_busy_time; + int64_t **agg_link_traffic; + /* following used for ROSS model-level stats collection */ tw_stime* busy_time_ross_sample; int64_t * link_traffic_ross_sample; @@ -649,6 +663,23 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params){ routing = -1; } + //Xin: app msgs counting on routers + rc = configuration_get_value_int(&config, "PARAMS", "counting_bool", anno, &p->counting_bool); + if(p->counting_bool) { + int rc1 = configuration_get_value_double(&config, "PARAMS", "counting_start", anno, &p->counting_start); + int rc2 = configuration_get_value_int(&config, "PARAMS", "counting_windows", anno, &p->counting_windows); + int rc3 = configuration_get_value_double(&config, "PARAMS", "counting_interval", anno, &p->counting_interval); + int rc4 = configuration_get_value_int(&config, "PARAMS", "num_apps", anno, &p->num_apps); + if(rc1 || rc2 || rc3 || rc4) + tw_error(TW_LOC, "\n Missing couting values, (counting_start/windows/interval/num_apps) check for config files\n"); + + //convert us to ns + p->counting_start = p->counting_start * 1000; + p->counting_interval = p->counting_interval * 1000; + + //printf("start %f, end %f, interval %f\n", p->counting_start, p->counting_end, p->counting_interval); + } + // rc = configuration_get_value_int(&config, "PARAMS", "num_vcs_override", anno, &p->num_vcs); // if(rc) { // if(routing == PROG_ADAPTIVE) @@ -1155,6 +1186,21 @@ void router_custom_setup(router_state * r, tw_lp * lp) r->ross_rsample.busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); r->ross_rsample.link_traffic_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); + //Xin: msg counters for apps + if(p->counting_bool > 0) + { + r->agg_link_traffic = (int64_t **) calloc(p->counting_windows, sizeof(int64_t *)); + r->agg_busy_time = (tw_stime **) malloc (p->counting_windows * sizeof(tw_stime *)); + + for (int i = 0; i < p->counting_windows; ++i) + { + r->agg_link_traffic[i] = (int64_t*) calloc(p->radix, sizeof(int64_t)); + r->agg_busy_time[i] = (tw_stime*) malloc(p->radix * sizeof(tw_stime)); + for(int j = 0; j < p->radix; j++) + r->agg_busy_time[i][j] = 0.0; + } + } + rc_stack_create(&r->st); for(int i = 0; i < p->num_router_rows; i++) @@ -1206,7 +1252,8 @@ static tw_stime dragonfly_custom_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; (void)sched_params; @@ -2510,6 +2557,35 @@ void dragonfly_custom_router_final(router_state * s, written += sprintf(s->output_buf2 + written, " %lld", LLD(s->link_traffic[d])); lp_io_write(lp->gid, (char*)"dragonfly-router-traffic", written, s->output_buf2); + + //Xin: output link statistics + if(s->params->counting_bool) + { + // for link traffic + if(!s->router_id){ + written = sprintf(s->output_buf5, "# Format \n"); + lp_io_write(lp->gid, (char*)"dragonfly-router-traffic-sample", written, s->output_buf5); + } + for(int i=0; i < p->counting_windows; i++) { + written = sprintf(s->output_buf5, "\n %llu %d %d %lf", LLU(lp->gid), s->router_id / p->num_routers, s->router_id , (p->counting_start+(i+1)*p->counting_interval)); + for (int d=0; d < p->radix; d++) + written += sprintf(s->output_buf5 + written, " %d", (s->agg_link_traffic[i][d])); + lp_io_write(lp->gid, (char*)"dragonfly-router-traffic-sample", written, s->output_buf5); + } + + // for link busy time + if(!s->router_id){ + written = sprintf(s->output_buf6, "# Format \n"); + lp_io_write(lp->gid, (char*)"dragonfly-router-busytime-sample", written, s->output_buf6); + } + for(int i=0; i < p->counting_windows; i++) { + written = sprintf(s->output_buf6, "\n %llu %d %d %lf", LLU(lp->gid), s->router_id / p->num_routers, s->router_id , (p->counting_start+(i+1)*p->counting_interval)); + for (int d=0; d < p->radix; d++) + written += sprintf(s->output_buf6 + written, " %lf", (s->agg_busy_time[i][d])); + lp_io_write(lp->gid, (char*)"dragonfly-router-busytime-sample", written, s->output_buf6); + } + } + } static vector get_intra_router(router_state * s, int src_router_id, int dest_router_id, int num_rtrs_per_grp) @@ -3356,6 +3432,17 @@ static void router_packet_send_rc(router_state * s, terminal_custom_message_list * cur_entry = (terminal_custom_message_list *)rc_stack_pop(s->st); assert(cur_entry); + + //Xin: target window to rollback + bool rolback = false; + int current_window = -1; + const dragonfly_param *p = s->params; + if(s->params->counting_bool>0 && msg->last_sent_time >= s->params->counting_start) { + current_window = (int) ((msg->last_sent_time-s->params->counting_start)/s->params->counting_interval); + if(current_window < s->params->counting_windows) { + rolback = true; + } + } if(bf->c11) { @@ -3363,6 +3450,10 @@ static void router_packet_send_rc(router_state * s, s->link_traffic_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; s->ross_rsample.link_traffic_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; s->link_traffic_ross_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; + //Xin: reverse link traffic + if(rolback && current_window >= 0){ + s->agg_link_traffic[current_window][output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; + } } if(bf->c12) { @@ -3370,6 +3461,10 @@ static void router_packet_send_rc(router_state * s, s->link_traffic_sample[output_port] -= s->params->chunk_size; s->ross_rsample.link_traffic_sample[output_port] -= s->params->chunk_size; s->link_traffic_ross_sample[output_port] -= s->params->chunk_size; + //Xin: reverse link traffic + if(rolback && current_window >= 0){ + s->agg_link_traffic[current_window][output_port] -= s->params->chunk_size; + } } s->next_output_available_time[output_port] = msg->saved_available_time; @@ -3483,6 +3578,18 @@ router_packet_send( router_state * s, m->intm_lp_id = lp->gid; m->magic = router_magic_num; + //Xin: target window to update link traffic + msg->last_sent_time = tw_now(lp); + bool update = false; + int current_window = -1; + const dragonfly_param *p = s->params; + if(s->params->counting_bool>0 && msg->last_sent_time >= s->params->counting_start) { + current_window = (int) ((msg->last_sent_time - s->params->counting_start)/s->params->counting_interval); + if(current_window < s->params->counting_windows) { + update = true; + } + } + if((cur_entry->msg.packet_size % s->params->chunk_size) && (cur_entry->msg.chunk_id == num_chunks - 1)) { bf->c11 = 1; s->link_traffic[output_port] += (cur_entry->msg.packet_size % @@ -3493,14 +3600,27 @@ router_packet_send( router_state * s, s->params->chunk_size); s->link_traffic_ross_sample[output_port] += (cur_entry->msg.packet_size % s->params->chunk_size); + //Xin: update link traffic data + if(update && current_window >= 0){ + // if (s->router_id==0 && to_terminal) + // printf("Router %d: update port %d with app %d traffic to dest %d\n", s->router_id, output_port, msg->app_id, msg->final_dest_gid); + s->agg_link_traffic[current_window][output_port] += (cur_entry->msg.packet_size % + s->params->chunk_size); + } } else { bf->c12 = 1; s->link_traffic[output_port] += s->params->chunk_size; s->link_traffic_sample[output_port] += s->params->chunk_size; s->ross_rsample.link_traffic_sample[output_port] += s->params->chunk_size; s->link_traffic_ross_sample[output_port] += s->params->chunk_size; + //Xin: update link traffic data + if(update && current_window >= 0){ + s->agg_link_traffic[current_window][output_port] += s->params->chunk_size; + } } + + if(cur_entry->msg.packet_ID == LLU(TRACK_PKT) && cur_entry->msg.src_terminal_id == T_ID) printf("\n Queuing at the router %d ", s->router_id); /* Determine the event type. If the packet has arrived at the final @@ -3563,6 +3683,15 @@ static void router_buf_update_rc(router_state * s, s->ross_rsample.busy_time[indx] = msg->saved_sample_time; s->busy_time_ross_sample[indx] = msg->saved_busy_time_ross; s->last_buf_full[indx][output_chan] = msg->saved_busy_time; + + //Xin: reverse agg busytime (not working for cross window reverse) + const dragonfly_param *p = s->params; + if(s->params->counting_bool>0 && msg->last_bufupdate_time >= s->params->counting_start) { + int current_window = (int) ((msg->last_bufupdate_time - s->params->counting_start)/s->params->counting_interval); + if(current_window < s->params->counting_windows) { + s->agg_busy_time[current_window][indx] = msg->saved_rcv_time; + } + } } if(bf->c1) { terminal_custom_message_list* head = return_tail(s->pending_msgs[indx], @@ -3596,6 +3725,23 @@ static void router_buf_update(router_state * s, tw_bf * bf, terminal_custom_mess s->busy_time_sample[indx] += (tw_now(lp) - s->last_buf_full[indx][output_chan]); s->ross_rsample.busy_time[indx] += (tw_now(lp) - s->last_buf_full[indx][output_chan]); s->busy_time_ross_sample[indx] += (tw_now(lp) - s->last_buf_full[indx][output_chan]); + + //Xin: agg busy time + const dragonfly_param *p = s->params; + msg->last_bufupdate_time = tw_now(lp); + if(s->params->counting_bool>0 && msg->last_bufupdate_time >= s->params->counting_start) { + int current_window = (int) ((msg->last_bufupdate_time - s->params->counting_start)/s->params->counting_interval); + if(current_window < s->params->counting_windows) { + int full_window = (int) ((s->last_buf_full[indx][output_chan] - s->params->counting_start)/s->params->counting_interval); + if(full_window==current_window) { + s->agg_busy_time[current_window][indx] += (tw_now(lp) - s->last_buf_full[indx][output_chan]); + } else { + s->agg_busy_time[current_window][indx] += (tw_now(lp) - (s->params->counting_start+current_window*s->params->counting_interval)); + s->agg_busy_time[full_window][indx] += ((s->params->counting_start+current_window*s->params->counting_interval) - s->last_buf_full[indx][output_chan]); + } + } + } + s->last_buf_full[indx][output_chan] = 0.0; } if(s->queued_msgs[indx][output_chan] != NULL) { @@ -3876,6 +4022,7 @@ struct model_net_method dragonfly_custom_method = NULL,//(final_f)dragonfly_custom_sample_fin custom_dragonfly_register_model_types, custom_dragonfly_get_model_types, + NULL, }; struct model_net_method dragonfly_custom_router_method = @@ -3898,6 +4045,7 @@ struct model_net_method dragonfly_custom_router_method = NULL,//(final_f)dragonfly_custom_rsample_fin custom_router_register_model_types, custom_dfly_router_get_model_types, + NULL, }; #ifdef ENABLE_CORTEX diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index a45c48d3..91befa1b 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -3,7 +3,7 @@ * See LICENSE in top-level directory * * Originally written by Misbah Mubarak - * Updated by Neil McGlohon + * Updated by Neil McGlohon and Elkin Cruz-Camacho * * A 1D specific dragonfly custom model - diverged from dragonfly-custom.C * Differs from dragonfly.C in that it allows for the custom features typically found in @@ -19,10 +19,11 @@ #include "codes/jenkins-hash.h" #include "codes/codes_mapping.h" #include "codes/codes.h" -#include "codes/model-net.h" #include "codes/model-net-method.h" #include "codes/model-net-lp.h" +#include "codes/surrogate/init.h" #include "codes/net/dragonfly-dally.h" +#include "quicklist.h" #include "sys/file.h" #include "codes/quickhash.h" #include "codes/rc-stack.h" @@ -30,6 +31,8 @@ #include #include #include +#include +#include #include "codes/network-manager/dragonfly-network-manager.h" #include "codes/congestion-controller-model.h" @@ -44,7 +47,6 @@ #define DFLY_HASH_TABLE_SIZE 4999 // debugging parameters #define BW_MONITOR 1 -#define DEBUG_LP 892 #define T_ID -1 #define TRACK -1 #define TRACK_PKT -1 @@ -74,6 +76,15 @@ #define LP_CONFIG_NM_ROUT (model_net_lp_config_names[DRAGONFLY_DALLY_ROUTER]) #define LP_METHOD_NM_ROUT (model_net_method_names[DRAGONFLY_DALLY_ROUTER]) +// If we have configured the network surrogate, then we will collect packet delay data, which is done via the scheduling of an event. This additional event will shift the random generator and thus the same model will behave differently from the start when compared with the one where the surrogate is not setup. If one wants to test both scenarios (with and without the surrogate) and maintain determinism in high-fidelity, one has to enable this option +#define ALWAYS_DETERMINISTIC_NETWORK 0 + +#define num_chunks_for(message_size, chunk_size) ((message_size) ? ((message_size) + (chunk_size) - 1) / (chunk_size) : 1) + +/* handles terminal and router events like packet generate/send/receive/buffer */ +typedef struct terminal_state terminal_state; +typedef struct router_state router_state; + static int max_lvc_src_g = 1; static int max_lvc_intm_g = 3; static int min_gvc_src_g = 0; @@ -89,9 +100,9 @@ static long num_remote_packets = 0; static long global_stalled_chunk_counter = 0; -#define OUTPUT_SNAPSHOT 0 -const static int num_snapshots = 0; -tw_stime snapshot_times[num_snapshots] = {}; +#define OUTPUT_SNAPSHOT 1 +static int num_snapshots = 0; +tw_stime * snapshot_times; char snapshot_filename[128]; /* time in nanosecs */ @@ -175,23 +186,41 @@ static int sample_rtr_bytes_written = 0; static char cn_sample_file[MAX_NAME_LENGTH]; static char router_sample_file[MAX_NAME_LENGTH]; -//don't do overhead here - job of MPI layer -static tw_stime mpi_soft_overhead = 0; +// File to store packet latency from terminal-to-terminal +// NOTE: Only non-predicted latencies are saved to file +static FILE * packet_latency_f = NULL; +static void setup_packet_latency_path(char const * const dir_to_save); + + +// ==== START OF Parameters to tune surrogate mode ==== +// +static bool dally_surrogate_configured = false; +static bool is_dally_surrogate_on = false; +static bool freeze_network_on_switch = false; +static struct packet_latency_predictor * terminal_predictor = NULL; +static void switch_surrogate(void); +static bool is_surrogate_on_fun(void); +static void dragonfly_dally_terminal_highdef_to_surrogate(terminal_state * s, tw_lp * lp, tw_event **); +static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw_lp * lp, tw_event **); +static bool dragonfly_dally_terminal_should_event_be_frozen(tw_lp * lp, tw_event * event); +static bool dragonfly_dally_router_should_event_be_frozen(tw_lp * lp, tw_event * event); +static void dragonfly_dally_terminal_pre_surrogate_switch_event_queue( terminal_state * s, tw_lp * lp, tw_event * event); +static void dragonfly_dally_terminal_reset_predictor(terminal_state * s, tw_lp * lp, void *); +// +// ==== END OF Parameters to tune surrogate mode ==== typedef struct terminal_dally_message_list terminal_dally_message_list; struct terminal_dally_message_list { terminal_dally_message msg; char* event_data; - terminal_dally_message_list *next; - terminal_dally_message_list *prev; + struct qlist_head list; }; static void init_terminal_dally_message_list(terminal_dally_message_list *thisO, terminal_dally_message *inmsg) { thisO->msg = *inmsg; thisO->event_data = NULL; - thisO->next = NULL; - thisO->prev = NULL; + INIT_QLIST_HEAD(&thisO->list); } static void delete_terminal_dally_message_list(void *thisO) { @@ -215,6 +244,7 @@ struct dragonfly_param int global_vc_size; /* buffer size of the global channels */ int cn_vc_size; /* buffer size of the compute node channels */ int chunk_size; /* full-sized packets are broken into smaller chunks.*/ + int packet_size; /* maximum size of a packet, although we have no control over it. It is model-net who is in charge of generating packets of at most this size */ int global_k_picks; /* k number of connections to select from when doing local adaptive routing */ int adaptive_threshold; int rail_select; // method by which rails are selected @@ -244,6 +274,14 @@ struct dragonfly_param double router_delay; int max_hops_notify; //maximum number of hops allowed before notifying via printout + + //Xin: parameters for message counters of apps + int counting_bool; + tw_stime counting_start; + tw_stime counting_interval; + int counting_windows; + int num_apps; + }; static const dragonfly_param* stored_params; @@ -283,6 +321,7 @@ struct dfly_qhash_entry struct dfly_hash_key key; char * remote_event_data; int num_chunks; + int remaining_packets; int remote_event_size; struct qhash_head hash_link; }; @@ -319,7 +358,7 @@ typedef enum route_scoring_metric_t } route_scoring_metric_t; /* Enumeration of types of events sent between model LPs */ -typedef enum event_t +enum event_t { T_GENERATE=1, T_ARRIVE, @@ -332,7 +371,18 @@ typedef enum event_t R_BW_HALT, T_BANDWIDTH, R_SNAPSHOT, //used for timed statistic outputs -} event_t; + T_NOTIFY, // used to notify a source or destination terminal about packets status (useful for informing about latency, zombie packet or delete a zombie packet) + T_ARRIVE_PREDICTED, // this event is generated by a latency predictor instead of traversing the network +#if ALWAYS_DETERMINISTIC_NETWORK + T_VACUOUS_EVENT, // nothing happens with this event, it's just ment to be a dummy event that allows us to keep the number of events produced in a simulation the same regardless of whether packet latency is activated (can be safely removed) +#endif /* ALWAYS_DETERMINISTIC_NETWORK */ +}; + +// Types of notifications between terminals +enum notify_t { + NOTIFY_LATENCY, // Notifying the source terminal of the total latency to deliver the packet + NOTIFY_ZOMBIE, // Notifying the destination terminal of a packet that should be treated as a zombie +}; /* whether the last hop of a packet was global, local or a terminal */ enum last_hop @@ -437,14 +487,35 @@ static bool isRoutingNonminimalExplicit(int alg) return false; } -/* handles terminal and router events like packet generate/send/receive/buffer */ -typedef struct terminal_state terminal_state; -typedef struct router_state router_state; +/** + * Surrogate definitions and data + */ + +struct packet_sent { + struct packet_start start; + double next_packet_delay; // When the packet is initially sent, this value is -1, when the next packet is sent this value is updated to the actual delay to process the next packet + void * message_data; // Yep, we have to save the entire message just because we might need to resend the message when switching to surrogate-mode. It's wasteful but there is no other way + void * remote_event_data; // This and the one above have to be freed. This contains the extra information that the message contains + void * local_data; // This and the one above have to be freed. This contains the extra information that the message contains +}; + +struct packet_id { + uint64_t packet_ID; + unsigned int dfdally_src_terminal_id; +}; +bool operator<(struct packet_id const &lk, struct packet_id const &rk) { + return lk.packet_ID == rk.packet_ID ? lk.dfdally_src_terminal_id < rk.dfdally_src_terminal_id : lk.packet_ID < rk.packet_ID; +} +bool operator==(struct packet_id const &lk, struct packet_id const &rk) { + return lk.packet_ID == rk.packet_ID && lk.dfdally_src_terminal_id == rk.dfdally_src_terminal_id; +} +// Some more function declarations +static void notify_dest_lp_of(terminal_state * s, tw_lp * lp, terminal_dally_message * msg, enum notify_t notification); /* dragonfly compute node data structure */ struct terminal_state { - uint64_t packet_counter; + int64_t packet_counter; int packet_gen; int packet_fin; @@ -459,15 +530,11 @@ struct terminal_state DragonflyConnectionManager connMan; tlc_state *local_congestion_controller; - map workload_lpid_to_app_id; - set app_ids; - int workloads_finished_flag; int** vc_occupancy; // vc_occupancies [rail_id][qos_level] tw_stime* terminal_available_time; // [rail_id] - terminal_dally_message_list ***terminal_msgs; //[rail_id][qos_level] - terminal_dally_message_list ***terminal_msgs_tail; //[rail_id][qos_level] + struct qlist_head **terminal_msgs; //[rail_id][qos_level] - quicklist heads int* in_send_loop; // [rail_id] struct mn_stats dragonfly_stats_array[CATEGORY_MAX]; @@ -534,6 +601,30 @@ struct terminal_state tw_stime fin_chunks_time_ross_sample; tw_stime *busy_time_ross_sample; struct dfly_cn_sample ross_sample; + + // Variables to recover latency of packets sent to other terminals + // Sent packets (to be populated at by commit handler of packet sender) + map sent_packets; + set is_pending_local_send; + int64_t last_packet_sent_id; + // We need the next packet to be injected in the network before feeding the packet info forward (the predictor needs starting time, delay to send next packet and latency) + struct { + int64_t packet_ID; + double travel_end_time; + } arrival_of_last_packet; + // received (and not completed, yet) packets. The value associated to a key is the remaining number of "bytes" to receive before the packet is consumed totally. If a packet size == chunk size, this map will never be used/filled + map remaining_sz_packets; + + // Stores the last time in which a packet was processed (time at which a T_GENERATE event was processed) + double last_in_queue_time; + // The predictor kicks in on surrogate mode and predicts the time a packet will take to its destination + void * predictor_data; + + // Zombie events appear when the network traffic is displaced to the future. By then, all packets that were in the network should have been already delievered, thus zombies + set zombies; + + // Variable to save the entire state of the terminal into before switching to surrogate mode. During surrogate-mode, the terminal should not access the state of the network + terminal_state * frozen_state; }; struct router_state @@ -558,10 +649,8 @@ struct router_state unsigned long* stalled_chunks; //Counter for when a packet is put into queued messages instead of routing due to full VC unsigned long* total_chunks; //Counter for when a packet is sent - per port - terminal_dally_message_list ***pending_msgs; - terminal_dally_message_list ***pending_msgs_tail; - terminal_dally_message_list ***queued_msgs; - terminal_dally_message_list ***queued_msgs_tail; + struct qlist_head **pending_msgs; + struct qlist_head **queued_msgs; int *in_send_loop; int *queued_count; struct rc_stack * st; @@ -591,6 +680,13 @@ struct router_state long fwd_events; long rev_events; + //Xin: buffer for output data + char output_buf5[4096]; + char output_buf6[4096]; + //Xin: msg couters for apps + tw_stime **agg_busy_time; + int64_t **agg_link_traffic; + /* following used for ROSS model-level stats collection */ tw_stime* busy_time_ross_sample; int64_t * link_traffic_ross_sample; @@ -1132,6 +1228,8 @@ static tw_stime gen_noise(tw_lp *lp, short* rng_counter) (*rng_counter)++; return noise; #else + (void) lp; + (void) rng_counter; return 0; #endif } @@ -1209,7 +1307,7 @@ static int dfdally_get_assigned_router_id_from_terminal(const dragonfly_param *p int num_rails = params->num_rails; int total_routers = params->total_routers; - int total_terminals = params->total_terminals; + //int total_terminals = params->total_terminals; int num_cn_per_router = params->num_cn; if(num_planes == 1) //then all rails go to the same router //TODO: this could change - could be cool! @@ -1228,6 +1326,15 @@ static int dfdally_get_assigned_router_id_from_terminal(const dragonfly_param *p { return (term_gid / num_cn_per_router) + (rail_id * routers_per_plane); } + // NOTE(helq): The compiler has been bothering me about the lack of a + // return statement here, so I added a message to something that + // (hopefully) will never happen. + else + { + tw_error(TW_LOC, "Error: this should have never happened. We couldn't " + "figure out to which router does a terminal belong to :S"); + return -1; + } } } @@ -1449,77 +1556,228 @@ static Connection dfdally_get_best_from_k_connections(router_state *s, tw_bf *bf return get_absolute_best_connection_from_conns(s, bf, msg, lp, k_conns); } -static void append_to_terminal_dally_message_list( - terminal_dally_message_list ** thisq, - terminal_dally_message_list ** thistail, - int index, - terminal_dally_message_list *msg) +static inline void append_to_qlist(struct qlist_head *head, terminal_dally_message_list *msg) { -// printf("\n msg id %d ", msg->msg.packet_ID); - if (thisq[index] == NULL) { - thisq[index] = msg; - } - else { - assert(thistail[index] != NULL); - thistail[index]->next = msg; - msg->prev = thistail[index]; - } - thistail[index] = msg; -// printf("\n done adding %d ", msg->msg.packet_ID); + qlist_add_tail(&msg->list, head); } -static void prepend_to_terminal_dally_message_list( - terminal_dally_message_list ** thisq, - terminal_dally_message_list ** thistail, - int index, - terminal_dally_message_list *msg) +static inline void prepend_to_qlist(struct qlist_head *head, terminal_dally_message_list *msg) { - if (thisq[index] == NULL) { - thistail[index] = msg; - } - else { - thisq[index]->prev = msg; - msg->next = thisq[index]; - } - thisq[index] = msg; + qlist_add(&msg->list, head); } -static terminal_dally_message_list* return_head( - terminal_dally_message_list ** thisq, - terminal_dally_message_list ** thistail, - int index) +// Restore old functions for router compatibility + + +static void copy_terminal_dally_message_qlist(struct qlist_head *into_head, struct qlist_head *from_head) { - terminal_dally_message_list *head = thisq[index]; - if (head != NULL) { - thisq[index] = head->next; - if(head->next != NULL) { - head->next->prev = NULL; - head->next = NULL; - } - else { - thistail[index] = NULL; + if (qlist_empty(from_head)) { + return; + } + + terminal_dally_message_list *from_entry; + qlist_for_each_entry(from_entry, from_head, list) { + terminal_dally_message_list *copy_entry = (terminal_dally_message_list *)malloc(sizeof(terminal_dally_message_list)); + + // Deep copy the entry + memcpy(copy_entry, from_entry, sizeof(terminal_dally_message_list)); + INIT_QLIST_HEAD(©_entry->list); + + if (from_entry->event_data != NULL) { + int event_data_sz = from_entry->msg.remote_event_size_bytes + from_entry->msg.local_event_size_bytes; + copy_entry->event_data = (char *)malloc(event_data_sz); + memcpy(copy_entry->event_data, from_entry->event_data, event_data_sz); } + + append_to_qlist(into_head, copy_entry); } - return head; } -static terminal_dally_message_list* return_tail( - terminal_dally_message_list ** thisq, - terminal_dally_message_list ** thistail, - int index) +static void clean_terminal_dally_message_qlist(struct qlist_head *head) { - terminal_dally_message_list *tail = thistail[index]; - assert(tail); - if (tail->prev != NULL) { - tail->prev->next = NULL; - thistail[index] = tail->prev; - tail->prev = NULL; - } - else { - thistail[index] = NULL; - thisq[index] = NULL; + terminal_dally_message_list *entry, *tmp; + qlist_for_each_entry_safe(entry, tmp, head, list) { + qlist_del(&entry->list); + if (entry->event_data != NULL) { + free(entry->event_data); + } + free(entry); + } +} + +static bool check_terminal_dally_message_qlist(struct qlist_head *before, struct qlist_head *after) +{ + bool is_same = true; + + // Check if both are empty + if (qlist_empty(before) && qlist_empty(after)) { + return true; + } + + // If only one is empty, they're different + if (qlist_empty(before) || qlist_empty(after)) { + return false; + } + + terminal_dally_message_list *entry_before, *entry_after; + struct qlist_head *pos_before = before->next; + struct qlist_head *pos_after = after->next; + + while (pos_before != before && pos_after != after) { + entry_before = qlist_entry(pos_before, terminal_dally_message_list, list); + entry_after = qlist_entry(pos_after, terminal_dally_message_list, list); + + is_same &= check_terminal_dally_message(&entry_before->msg, &entry_after->msg); + is_same &= (entry_before->event_data == NULL) == (entry_after->event_data == NULL); + + if (entry_before->event_data != NULL && entry_after->event_data != NULL) { + int const message_size = entry_before->msg.remote_event_size_bytes + entry_before->msg.local_event_size_bytes; + int const message_size_after = entry_after->msg.remote_event_size_bytes + entry_after->msg.local_event_size_bytes; + is_same &= (message_size == message_size_after); + if (message_size > 0) { + is_same &= (memcmp(entry_before->event_data, entry_after->event_data, message_size) == 0); + } + } + + pos_before = pos_before->next; + pos_after = pos_after->next; + } + + // Check if both reached the end at the same time + is_same &= (pos_before == before) && (pos_after == after); + + return is_same; +} + + +static terminal_dally_message_list* return_head_from_qlist(struct qlist_head *head) +{ + if (qlist_empty(head)) { + return NULL; + } + + struct qlist_head *item = qlist_pop(head); + return qlist_entry(item, terminal_dally_message_list, list); +} + +static terminal_dally_message_list* return_tail_from_qlist(struct qlist_head *head) +{ + if (qlist_empty(head)) { + return NULL; + } + + struct qlist_head *item = qlist_pop_back(head); + return qlist_entry(item, terminal_dally_message_list, list); +} + + + +static void copy_msgs_qlist(struct qlist_head *into_qlist, struct qlist_head *from_qlist) { + INIT_QLIST_HEAD(into_qlist); + + if (qlist_empty(from_qlist)) { + return; + } + + struct qlist_head *pos; + qlist_for_each(pos, from_qlist) { + terminal_dally_message_list *from_entry = qlist_entry(pos, terminal_dally_message_list, list); + terminal_dally_message_list *copy_entry = (terminal_dally_message_list*) malloc(sizeof(terminal_dally_message_list)); + + init_terminal_dally_message_list(copy_entry, &from_entry->msg); + if (from_entry->event_data != NULL) { + copy_entry->event_data = (char*) malloc(from_entry->msg.remote_event_size_bytes); + memcpy(copy_entry->event_data, from_entry->event_data, from_entry->msg.remote_event_size_bytes); + } + + qlist_add_tail(©_entry->list, into_qlist); + } +} + + + +static bool check_msgs_qlist(struct qlist_head * before, struct qlist_head * after) { + bool is_same = true; + + if (qlist_empty(before) && qlist_empty(after)) { + return true; + } + + if (qlist_empty(before) != qlist_empty(after)) { + return false; + } + + struct qlist_head *pos_before = before->next; + struct qlist_head *pos_after = after->next; + + while (pos_before != before && pos_after != after) { + terminal_dally_message_list *entry_before = qlist_entry(pos_before, terminal_dally_message_list, list); + terminal_dally_message_list *entry_after = qlist_entry(pos_after, terminal_dally_message_list, list); + + is_same &= check_terminal_dally_message(&entry_before->msg, &entry_after->msg); + is_same &= (entry_before->event_data == NULL) == (entry_after->event_data == NULL); + + int const message_size = entry_before->msg.remote_event_size_bytes + entry_before->msg.local_event_size_bytes; + int const message_size_after = entry_after->msg.remote_event_size_bytes + entry_after->msg.local_event_size_bytes; + is_same &= message_size == message_size_after; + + if (is_same && entry_before->event_data != NULL) { + assert(message_size > 0); + is_same &= !memcmp(entry_before->event_data, entry_after->event_data, message_size); + } + + pos_before = pos_before->next; + pos_after = pos_after->next; + } + + // Check if both reached end + if (pos_before != before || pos_after != after) { + is_same = false; // different lengths + } + + return is_same; +} + +static void print_terminal_dally_message_qlist(FILE * out, char const * prefix, terminal_state * ns, struct qlist_head * head) { + if (qlist_empty(head)) { + return; + } + + char addprefix_2[] = " | | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1; + char * subprefix = (char *) malloc(len_subprefix * sizeof(char)); + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2); + + terminal_dally_message_list * entry; + qlist_for_each_entry(entry, head, list) { + fprintf(out, "%s terminal_dally_message_list (%p) {\n", prefix, entry); + fprintf(out, "%s packet_ID = %llu\n", subprefix, LLU(entry->msg.packet_ID)); + fprintf(out, "%s }\n", prefix); + } + + free(subprefix); +} + + +static void print_msgs_qlist(FILE * out, char const * prefix, struct qlist_head * qlist) { + if (qlist_empty(qlist)) { + return; + } + + char addprefix_2[] = " | | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1; + char * subprefix = (char *) malloc(len_subprefix * sizeof(char)); + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2); + + struct qlist_head *pos; + qlist_for_each(pos, qlist) { + terminal_dally_message_list *entry = qlist_entry(pos, terminal_dally_message_list, list); + fprintf(out, "%s qlist entry (%p) {\n", prefix, entry); + print_terminal_dally_message(out, subprefix, NULL, &entry->msg); + fprintf(out, "%s }\n", prefix); } - return tail; + + free(subprefix); } static tw_stime* buff_time_storage_create(terminal_state *s) @@ -1630,6 +1888,13 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) fprintf(stderr, "Chunk size for packets is specified, setting to %d\n", p->chunk_size); } + rc = configuration_get_value_int(&config, "PARAMS", "packet_size", anno, &p->packet_size); + if(rc) { + p->chunk_size = 512; + if(!myRank) + fprintf(stderr, "Packet size not specificied, it is assumed to be %d\n", p->packet_size); + } + rc = configuration_get_value_double(&config, "PARAMS", "local_bandwidth", anno, &p->local_bandwidth); if(rc) { p->local_bandwidth = 5.25; @@ -1752,7 +2017,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) if (p->num_rails % p->num_planes != 0) tw_error(TW_LOC, "Number of rails not evenly divisible by number of planes!\n"); - char rail_select_str[MAX_NAME_LENGTH]; + char rail_select_str[MAX_NAME_LENGTH] = {'\0'}; rc = configuration_get_value(&config, "PARAMS", "rail_select", anno, rail_select_str, MAX_NAME_LENGTH); if(strcmp(rail_select_str, "dedicated") == 0) @@ -1777,7 +2042,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) fprintf(stderr, "global_k_picks for global adaptive routing not specified, setting to %d\n",p->global_k_picks); } - char scoring_str[MAX_NAME_LENGTH]; + char scoring_str[MAX_NAME_LENGTH] = {'\0'}; configuration_get_value(&config, "PARAMS", "route_scoring_metric", anno, scoring_str, MAX_NAME_LENGTH); if (strcmp(scoring_str, "alpha") == 0) { scoring = ALPHA; @@ -1810,6 +2075,24 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) if(p->num_qos_levels > 1) p->num_vcs = p->num_qos_levels * p->num_vcs; + //Xin: app msgs counting on routers + rc = configuration_get_value_int(&config, "PARAMS", "counting_bool", anno, &p->counting_bool); + if(p->counting_bool) { + int rc1 = configuration_get_value_double(&config, "PARAMS", "counting_start", anno, &p->counting_start); + int rc2 = configuration_get_value_int(&config, "PARAMS", "counting_windows", anno, &p->counting_windows); + int rc3 = configuration_get_value_double(&config, "PARAMS", "counting_interval", anno, &p->counting_interval); + int rc4 = configuration_get_value_int(&config, "PARAMS", "num_apps", anno, &p->num_apps); + if(rc1 || rc2 || rc3 || rc4) + tw_error(TW_LOC, "\n Missing couting values, (counting_start/windows/interval/num_apps) check for config files\n"); + + //convert us to ns + p->counting_start = p->counting_start * 1000; + p->counting_interval = p->counting_interval * 1000; + + //printf("start %f, end %f, interval %f\n", p->counting_start, p->counting_end, p->counting_interval); + } + + rc = configuration_get_value_int(&config, "PARAMS", "num_groups", anno, &p->num_groups); if(rc) { tw_error(TW_LOC, "\nnum_groups not specified, Aborting\n"); @@ -1854,7 +2137,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) // read intra group connections, store from a router's perspective // all links to the same router form a vector - char intraFile[MAX_NAME_LENGTH]; + char intraFile[MAX_NAME_LENGTH] = {'\0'}; configuration_get_value(&config, "PARAMS", "intra-group-connections", anno, intraFile, MAX_NAME_LENGTH); if (strlen(intraFile) <= 0) { @@ -1911,7 +2194,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) // read inter group connections, store from a router's perspective // also create a group level table that tells all the connecting routers - char interFile[MAX_NAME_LENGTH]; + char interFile[MAX_NAME_LENGTH] = {'\0'}; configuration_get_value(&config, "PARAMS", "inter-group-connections", anno, interFile, MAX_NAME_LENGTH); if(strlen(interFile) <= 0) { @@ -1976,7 +2259,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) //read link failure file - char failureFileName[MAX_NAME_LENGTH]; + char failureFileName[MAX_NAME_LENGTH] = {'\0'}; failureFileName[0] = '\0'; if (strlen(g_nm_link_failure_filepath) == 0) //was this defined already via a command line argument? @@ -2152,6 +2435,93 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) } // END CONGESTION CONTROL + // Router buffer occupancy configuration + if (OUTPUT_SNAPSHOT) { + char **timestamps; + size_t len; + configuration_get_multivalue(&config, "PARAMS", "router_buffer_snapshots", anno, ×tamps, &len); + if (len) { + assert(timestamps != NULL); + num_snapshots = len; + snapshot_times = (tw_stime*) malloc(len * sizeof(tw_stime)); + + for (size_t i = 0; i < len; i++) { + errno = 0; + snapshot_times[i] = strtod(timestamps[i], NULL); + if (errno == ERANGE || errno == EILSEQ){ + tw_error(TW_LOC, "Sequence `%s' could not be succesfully interpreted as a _double_.", timestamps[i]); + } + } + + if(!myRank) { + fprintf(stderr, "\nRouter snaptshots activated for timestamps = "); + for (size_t i = 0; i < len; i++) { + fprintf(stderr, "%g%s", snapshot_times[i], i == len-1 ? "" : ", "); + } + fprintf(stderr, "\n"); + } + + // freeing some memory + for (size_t i = 0; i < len; i++) { + free(timestamps[i]); + } + free(timestamps); + } + } + + // Packet latency path to store configuration + char packet_latency_path[MAX_NAME_LENGTH]; + packet_latency_path[0] = '\0'; + configuration_get_value(&config, "PARAMS", "save_packet_latency_path", anno, packet_latency_path, MAX_NAME_LENGTH); + if(strlen(packet_latency_path) > 0) { + setup_packet_latency_path(packet_latency_path); + } + + // START Surrogate configuration + char enable_str[MAX_NAME_LENGTH]; + enable_str[0] = '\0'; + int const rc_enable = configuration_get_value(&config, "NETWORK_SURROGATE", "enable", anno, enable_str, MAX_NAME_LENGTH); + bool enable_network_surrogate = false; + if (rc_enable > 0) { + enable_network_surrogate = (strcmp(enable_str, "1") == 0 || strcmp(enable_str, "true") == 0); + } + // if surrogate mode has been set up + if (enable_network_surrogate) { + struct network_surrogate_config surr_conf = { + .model = {.switch_surrogate = switch_surrogate, .is_surrogate_on = is_surrogate_on_fun}, + .total_terminals = p->total_terminals, + .n_lp_types = 2, + .lp_types = { + {.lpname = "modelnet_dragonfly_dally", + .trigger_idle_modelnet = true, + .highdef_to_surrogate = (model_switch_f) dragonfly_dally_terminal_highdef_to_surrogate, + .surrogate_to_highdef = (model_switch_f) dragonfly_dally_terminal_surrogate_to_highdef, + .should_event_be_frozen = dragonfly_dally_terminal_should_event_be_frozen, + .should_event_be_deleted = NULL, + .check_event_in_queue = (model_check_event_f) dragonfly_dally_terminal_pre_surrogate_switch_event_queue, + .reset_predictor = (model_switch_f) dragonfly_dally_terminal_reset_predictor, + }, + {.lpname = "modelnet_dragonfly_dally_router", + .trigger_idle_modelnet = false, + .highdef_to_surrogate = NULL, + .surrogate_to_highdef = NULL, + .should_event_be_frozen = dragonfly_dally_router_should_event_be_frozen, + .should_event_be_deleted = NULL, + .check_event_in_queue = NULL, + .reset_predictor = NULL, + }, + 0 + } + }; + freeze_network_on_switch = network_surrogate_configure(anno, &surr_conf, &terminal_predictor); + if (terminal_predictor) { + dally_surrogate_configured = true; + } else { + tw_error(TW_LOC, "Latency predictor is NULL. Something during surrogate configuration failed."); + } + } + // END Surrogate configuration + if (PRINT_CONFIG && !myRank) { dragonfly_print_params(p,stderr); } @@ -2175,6 +2545,33 @@ void dragonfly_dally_configure() { #ifdef ENABLE_CORTEX model_net_topology = dragonfly_dally_cortex_topology; #endif + +} + +static void setup_packet_latency_path(char const * const dir_to_save) { + assert(packet_latency_f == NULL); + // checking + int const NO_ERROR = 0; + struct stat st; + memset(&st, 0, sizeof(struct stat)); + if(g_tw_mynode == 0 && stat(dir_to_save, &st) == -1) { + int res = mkdir(dir_to_save, 0700); + if (res != NO_ERROR) { + tw_error(TW_LOC, "Error (%d) occurred when attempting to mkdir folder `%s`", errno, dir_to_save); + } + } + MPI_Barrier(MPI_COMM_CODES); + + char const fmt[] = "%s/packets-delay-gid=%lu.txt"; + int sz = snprintf(NULL, 0, fmt, dir_to_save, g_tw_mynode); + char filename_path[sz + 1]; // `+ 1` for terminating null byte + snprintf(filename_path, sizeof(filename_path), fmt, dir_to_save, g_tw_mynode); + packet_latency_f = fopen(filename_path, "w+"); + if(!packet_latency_f) { + tw_error(TW_LOC, "File %s could not be opened", filename_path); + } + + fprintf(packet_latency_f, "#src_terminal,dest_terminal,packet_id,is_surrogate_on,is_predicted,size,workload_injection,next_packet_delay,start,end,latency,is_there_another_pckt_in_queue\n"); } /* report dragonfly statistics like average and maximum packet latency, average number of hops traversed */ @@ -2209,6 +2606,9 @@ void dragonfly_dally_report_stats() // long long total_stalled_chunks; //helpful for debugging and determinism checking // MPI_Reduce( &global_stalled_chunk_counter, &total_stalled_chunks, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + if (packet_latency_f) { + fclose(packet_latency_f); + } /* print statistics */ if(!g_tw_mynode) { @@ -2278,7 +2678,7 @@ static void dragonfly_dally_router_congestion_event_commit(router_state *s, tw_b cc_router_local_congestion_event_commit(s->local_congestion_controller, bf, msg, lp); } -int get_vcg_from_category(terminal_dally_message * msg) +static int get_vcg_from_category(terminal_dally_message * msg) { if(strcmp(msg->category, "high") == 0) return Q_HIGH; @@ -2331,7 +2731,7 @@ static int get_rtr_bandwidth_consumption(router_state * s, int qos_lvl, int outp return percent_bw; } -void issue_bw_monitor_event_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +static void issue_bw_monitor_event_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { int num_qos_levels = s->params->num_qos_levels; int num_rails = s->params->num_rails; @@ -2355,7 +2755,7 @@ void issue_bw_monitor_event_rc(terminal_state * s, tw_bf * bf, terminal_dally_me } /* resets the bandwidth numbers recorded so far */ -void issue_bw_monitor_event(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +static void issue_bw_monitor_event(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { int num_qos_levels = s->params->num_qos_levels; int num_rails = s->params->num_rails; @@ -2399,7 +2799,7 @@ void issue_bw_monitor_event(terminal_state * s, tw_bf * bf, terminal_dally_messa } } -void issue_rtr_bw_monitor_event_rc(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp) +static void issue_rtr_bw_monitor_event_rc(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp) { int radix = s->params->radix; int num_qos_levels = s->params->num_qos_levels; @@ -2420,7 +2820,8 @@ void issue_rtr_bw_monitor_event_rc(router_state *s, tw_bf *bf, terminal_dally_me msg->rc_is_qos_set = 0; } } -void issue_rtr_bw_monitor_event(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp) + +static void issue_rtr_bw_monitor_event(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp) { int radix = s->params->radix; int num_qos_levels = s->params->num_qos_levels; @@ -2491,7 +2892,7 @@ static int get_next_vcg(terminal_state * s, tw_bf * bf, terminal_dally_message * if(num_qos_levels == 1) { - if(s->terminal_msgs[msg->rail_id][0] == NULL || s->vc_occupancy[msg->rail_id][0] + s->params->chunk_size > s->params->cn_vc_size) + if(qlist_empty(&s->terminal_msgs[msg->rail_id][0]) || s->vc_occupancy[msg->rail_id][0] + s->params->chunk_size > s->params->cn_vc_size) return -1; else return 0; @@ -2524,7 +2925,7 @@ static int get_next_vcg(terminal_state * s, tw_bf * bf, terminal_dally_message * { if(s->qos_status[msg->rail_id][i] == Q_ACTIVE) { - if(s->terminal_msgs[msg->rail_id][i] != NULL && s->vc_occupancy[msg->rail_id][i] + s->params->chunk_size <= s->params->cn_vc_size) + if(!qlist_empty(&s->terminal_msgs[msg->rail_id][i]) && s->vc_occupancy[msg->rail_id][i] + s->params->chunk_size <= s->params->cn_vc_size) return i; } } @@ -2535,7 +2936,7 @@ static int get_next_vcg(terminal_state * s, tw_bf * bf, terminal_dally_message * /* All vcgs are exceeding their bandwidth limits*/ for(int i = 0; i < num_qos_levels; i++) { - if(s->terminal_msgs[msg->rail_id][i] != NULL && s->vc_occupancy[msg->rail_id][i] + s->params->chunk_size <= s->params->cn_vc_size) + if(!qlist_empty(&s->terminal_msgs[msg->rail_id][i]) && s->vc_occupancy[msg->rail_id][i] + s->params->chunk_size <= s->params->cn_vc_size) { bf->c2 = 1; @@ -2562,7 +2963,7 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess int chunk_size = s->params->chunk_size; int bw_consumption[num_qos_levels]; /* First make sure the bandwidth consumptions are up to date. */ - if(BW_MONITOR == 1) + if(BW_MONITOR == 1 && num_qos_levels > 1) { for(int k = 0; k < num_qos_levels; k++) { @@ -2594,7 +2995,7 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess int base_limit = i * vcs_per_qos; for(int k = base_limit; k < base_limit + vcs_per_qos; k ++) { - if(s->pending_msgs[output_port][k] != NULL) + if(!qlist_empty(&s->pending_msgs[output_port][k])) return k; } } @@ -2602,35 +3003,319 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess } /* All vcgs are exceeding their bandwidth limits*/ - msg->last_saved_qos = s->last_qos_lvl[output_port]; - int next_rr_vcg = (s->last_qos_lvl[output_port] + 1) % num_qos_levels; + msg->last_saved_qos = s->last_qos_lvl[output_port]; // last_qos_lvl stores a vc# not a qos# for routers. Terminals store qos# + //int next_rr_vcg = (s->last_qos_lvl[output_port] + 1) % num_qos_levels; + int next_rr_vc = (s->last_qos_lvl[output_port] + 1) % s->params->num_vcs; - for(int i = 0; i < num_qos_levels; i++) + for(int i = 0; i < s->params->num_vcs; i++) { - base_limit = next_rr_vcg * vcs_per_qos; - for(int k = base_limit; k < base_limit + vcs_per_qos; k++) + if(!qlist_empty(&s->pending_msgs[output_port][next_rr_vc])) { - if(s->pending_msgs[output_port][k] != NULL) - { - if(msg->last_saved_qos < 0) - msg->last_saved_qos = s->last_qos_lvl[output_port]; + s->last_qos_lvl[output_port] = next_rr_vc; + return next_rr_vc; + } + next_rr_vc = (next_rr_vc + 1) % s->params->num_vcs; + } + return -1; +} + +static inline void packet_latency_save_to_file( + unsigned int terminal_id, + struct packet_start * start, + struct packet_end * end, + bool surrogate_on, + bool is_predicted +) { + if (!packet_latency_f) { return; } // Don't save if there isn't a file to save to + if (end->travel_end_time > g_tw_ts_end) { return; } // This packet could never arrive to its destination! + fprintf(packet_latency_f, "%u,%u,%lu,%d,%d,%u,%f,%f,%f,%f,%f,%d\n", + terminal_id, start->dfdally_dest_terminal_id, start->packet_ID, + surrogate_on, is_predicted, start->packet_size, + start->workload_injection_time, + end->next_packet_delay, start->travel_start_time, + end->travel_end_time, end->travel_end_time - start->travel_start_time, + start->is_there_another_pckt_in_queue); +} + +// ==== START OF Surrogate functions definition ==== + +static void switch_surrogate(void) { + is_dally_surrogate_on = ! is_dally_surrogate_on; +} + +static bool is_surrogate_on_fun(void) { + return is_dally_surrogate_on; +} + +static void feed_packet_to_predictor(terminal_state * s, tw_lp * lp, uint64_t packet_ID, double end_time) { + assert(s->sent_packets.count(packet_ID) == 1); // packet_ID is in s->sent_packets + auto sent = s->sent_packets[packet_ID]; + struct packet_end end = { + .travel_end_time = end_time, + .next_packet_delay = sent.next_packet_delay, + }; + + packet_latency_save_to_file(s->terminal_id, &sent.start, &end, is_dally_surrogate_on, false); + if (dally_surrogate_configured && !is_dally_surrogate_on) { + assert(terminal_predictor != NULL); + terminal_predictor->feed(s->predictor_data, lp, s->terminal_id, &sent.start, &end); + } + + // Deallocating memory + if (sent.message_data) { + free(sent.message_data); + } + if (sent.remote_event_data) { + free(sent.remote_event_data); + } + if (sent.local_data) { + free(sent.local_data); + } +} + +// We check an event that is in the event queue, thus we do not process it yet +static void dragonfly_dally_terminal_pre_surrogate_switch_event_queue( + terminal_state * s, tw_lp * lp, tw_event * event) { + int const event_type = model_net_get_event_type_lp((model_net_wrap_msg *) tw_event_data(event)); + // if event is T_NOTIFY, add event relevant data into hash map for T_NOTIFY event + if (event_type == MN_BASE_PASS) { + terminal_dally_message * msg = (terminal_dally_message *) + model_net_method_msg_from_tw_event(lp, (model_net_wrap_msg *) tw_event_data(event)); + assert(msg != NULL); + if (msg->type == T_NOTIFY) { + assert(msg->notify_type == NOTIFY_LATENCY); + feed_packet_to_predictor(s, lp, msg->packet_ID, msg->travel_end_time); + s->sent_packets.erase(msg->packet_ID); + } + } +} + +static void dragonfly_dally_terminal_reset_predictor(terminal_state * s, tw_lp * lp, void * vacuous) { + (void) vacuous; + if (terminal_predictor != NULL && s->predictor_data != NULL) { + terminal_predictor->reset(s->predictor_data, lp); + } +} + +// This function never rollsback because it's called at GVT +static void dragonfly_dally_terminal_highdef_to_surrogate( + terminal_state * s, tw_lp * lp, tw_event ** terminal_events) { + (void) terminal_events; + + if (s->arrival_of_last_packet.packet_ID != -1) { + assert(s->sent_packets.count(s->arrival_of_last_packet.packet_ID) == 1); // packet_ID is in s->sent_packets + assert(s->sent_packets[s->arrival_of_last_packet.packet_ID].next_packet_delay < 0); // next_packet_delay is -1 - s->last_qos_lvl[output_port] = next_rr_vcg; - return k; + double const travel_end_time = s->arrival_of_last_packet.travel_end_time; + feed_packet_to_predictor(s, lp, s->arrival_of_last_packet.packet_ID, travel_end_time); + s->sent_packets.erase(s->arrival_of_last_packet.packet_ID); + s->arrival_of_last_packet.packet_ID = -1; + } + + // Going through every packet that was sent but not yet received, remove it + // from the list, send it to its destination using the predictor, and + // notify of its zombie status. + // (deleting all elements from s->sent_packets as we go) + for (auto it = s->sent_packets.begin(); it != s->sent_packets.end(); it = s->sent_packets.erase(it)) { + uint64_t packet_ID = it->first; + auto & sent = it->second; + + assert(packet_ID == sent.start.packet_ID); + + // The packet has not been delievered. Send directly to destination and notify of zombie event + if (freeze_network_on_switch) { + struct packet_end predicted_end = + terminal_predictor->predict(s->predictor_data, lp, s->terminal_id, &sent.start); + + double latency = predicted_end.travel_end_time - tw_now(lp); + if (predicted_end.travel_end_time < tw_now(lp) || latency < 0) { + predicted_end.travel_end_time = tw_now(lp); + latency = 0; + } + + packet_latency_save_to_file(s->terminal_id, &sent.start, &predicted_end, is_dally_surrogate_on, true); + + assert(sent.message_data); + terminal_dally_message * const msg_data = (terminal_dally_message*) sent.message_data; + terminal_dally_message * m; + void * remote_event; + tw_event * const e = model_net_method_event_new( + sent.start.dest_terminal_lpid, latency, lp, DRAGONFLY_DALLY, (void**)&m, &remote_event); + memcpy(m, msg_data, sizeof(terminal_dally_message)); + if (m->remote_event_size_bytes) { + memcpy(remote_event, sent.remote_event_data, m->remote_event_size_bytes); + } + m->magic = terminal_magic_num; + m->type = T_ARRIVE_PREDICTED; + m->src_terminal_id = lp->gid; + //m->travel_start_time = tw_now(lp); + //m->rail_id = msg->rail_id; + //m->vc_index = vcg; + //m->last_hop = TERMINAL; + m->path_type = -1; + m->local_event_size_bytes = 0; + m->is_intm_visited = 0; + m->intm_grp_id = -1; + m->intm_rtr_id = -1; //for legacy prog-adaptive + assert(m->dfdally_src_terminal_id == s->terminal_id); + assert(m->packet_ID == sent.start.packet_ID); + assert(m->dest_terminal_lpid == sent.start.dest_terminal_lpid); + assert(m->dfdally_dest_terminal_id == sent.start.dfdally_dest_terminal_id); + //assert(m->travel_start_time >= sent.start.travel_start_time); + assert(m->packet_size == sent.start.packet_size); + tw_event_send(e); + + //printf("NOTIFYING of zombie: packet dest id %d dest gid %d\n", sent.start.dest_terminal_lpid, sent.start.dfdally_dest_terminal_id); + notify_dest_lp_of(s, lp, msg_data, NOTIFY_ZOMBIE); + + if (s->is_pending_local_send.count(packet_ID) == 1) { + assert(sent.local_data); + assert(msg_data->local_event_size_bytes); + double const local_ts = 11; + tw_event *e_new = tw_event_new(msg_data->sender_lp, local_ts, lp); + void * m_new = tw_event_data(e_new); + memcpy(m_new, sent.local_data, msg_data->local_event_size_bytes); + tw_event_send(e_new); + s->is_pending_local_send.erase(packet_ID); + } + + // Deallocating memory from packet_start + if (sent.message_data) { + free(sent.message_data); + } + if (sent.remote_event_data) { + free(sent.remote_event_data); + } + if (sent.local_data) { + free(sent.local_data); } } - next_rr_vcg = (next_rr_vcg + 1) % num_qos_levels; - assert(next_rr_vcg < 2); } - return -1; + assert(s->sent_packets.empty()); + assert(s->is_pending_local_send.empty()); + + // Hide current state and clean current state. Hidding the network information is in principle + // the same as freezing the state of the network. + assert(s->frozen_state == NULL); + terminal_state * frozen_state = (terminal_state*) malloc(sizeof(terminal_state)); + memcpy(frozen_state, s, sizeof(terminal_state)); + memset(s, 0, sizeof(terminal_state)); + for (size_t i = 0; i < CATEGORY_MAX; i++) { + s->dragonfly_stats_array[i] = frozen_state->dragonfly_stats_array[i]; + } + s->packet_gen = frozen_state->packet_gen; + s->total_gen_size = frozen_state->total_gen_size; + s->params = frozen_state->params; + s->packet_counter = frozen_state->packet_counter; + s->local_congestion_controller = frozen_state->local_congestion_controller; + s->last_in_queue_time = frozen_state->last_in_queue_time; + s->predictor_data = frozen_state->predictor_data; + s->terminal_id = frozen_state->terminal_id; + s->packet_fin = frozen_state->packet_fin; + s->finished_packets = frozen_state->finished_packets; + s->data_size_sample = frozen_state->data_size_sample; + s->ross_sample.data_size_sample = frozen_state->ross_sample.data_size_sample; + s->data_size_ross_sample = frozen_state->data_size_ross_sample; + s->total_msg_size = frozen_state->total_msg_size; + s->finished_msgs = frozen_state->finished_msgs; + s->rank_tbl_pop = frozen_state->rank_tbl_pop; + s->last_packet_sent_id = frozen_state->last_packet_sent_id; + s->rank_tbl = frozen_state->rank_tbl; + s->st = frozen_state->st; + memcpy(&s->arrival_of_last_packet, &frozen_state->arrival_of_last_packet, sizeof(s->arrival_of_last_packet)); + memcpy(&s->zombies, &frozen_state->zombies, sizeof(s->zombies)); + memcpy(&s->sent_packets, &frozen_state->sent_packets, sizeof(s->sent_packets)); + memcpy(&s->is_pending_local_send, &frozen_state->is_pending_local_send, sizeof(s->is_pending_local_send)); + memcpy(&s->remaining_sz_packets, &frozen_state->remaining_sz_packets, sizeof(s->remaining_sz_packets)); + + s->frozen_state = frozen_state; +}; + +// This function never rollsback because it's called at GVT +// Note: this function CANNOT generate any events, because it is to be used in `dragonfly_dally_terminal_final` too +static void dragonfly_dally_terminal_surrogate_to_highdef( + terminal_state * s, tw_lp * lp, tw_event ** terminal_events) { + (void) lp; + (void) terminal_events; + //printf("Terminal %d (PID: %d) switching back to high-def at %e\n", s->terminal_id, lp->gid, tw_now(lp)); + + // Re-instanciating pre-transition (before surrogate was turned on) terminal state + assert(s->frozen_state != NULL); + terminal_state * frozen_state = s->frozen_state; + for (size_t i = 0; i < CATEGORY_MAX; i++) { + frozen_state->dragonfly_stats_array[i] = s->dragonfly_stats_array[i]; + } + frozen_state->packet_gen = s->packet_gen; + frozen_state->total_gen_size = s->total_gen_size; + frozen_state->params = s->params; + frozen_state->packet_counter = s->packet_counter; + frozen_state->local_congestion_controller = s->local_congestion_controller; + frozen_state->last_in_queue_time = s->last_in_queue_time; + frozen_state->predictor_data = s->predictor_data; + frozen_state->terminal_id = s->terminal_id; + frozen_state->packet_fin = s->packet_fin; + frozen_state->finished_packets = s->finished_packets; + frozen_state->data_size_sample = s->data_size_sample; + frozen_state->ross_sample.data_size_sample = s->ross_sample.data_size_sample; + frozen_state->data_size_ross_sample = s->data_size_ross_sample; + frozen_state->total_msg_size = s->total_msg_size; + frozen_state->finished_msgs = s->finished_msgs; + frozen_state->rank_tbl_pop = s->rank_tbl_pop; + frozen_state->last_packet_sent_id = s->last_packet_sent_id; + frozen_state->rank_tbl = s->rank_tbl; + frozen_state->st = s->st; + memcpy(&frozen_state->arrival_of_last_packet, &s->arrival_of_last_packet, sizeof(s->arrival_of_last_packet)); + memcpy(&frozen_state->zombies, &s->zombies, sizeof(s->zombies)); + memcpy(&frozen_state->sent_packets, &s->sent_packets, sizeof(s->sent_packets)); + memcpy(&frozen_state->is_pending_local_send, &s->is_pending_local_send, sizeof(s->is_pending_local_send)); + memcpy(&frozen_state->remaining_sz_packets, &s->remaining_sz_packets, sizeof(s->remaining_sz_packets)); + memcpy(s, frozen_state, sizeof(terminal_state)); + memset(frozen_state, 0, sizeof(terminal_state)); + free(frozen_state); + assert(s->frozen_state == NULL); +}; + +static bool dragonfly_dally_terminal_should_event_be_frozen(tw_lp * lp, tw_event * event) { + (void) lp; + assert(lp->gid == event->dest_lpid); + // Freeze everything except for MN_BASE_NEW_MSG's + int const event_types_to_freeze = MN_BASE_SCHED_NEXT | MN_BASE_SAMPLE | MN_BASE_PASS | MN_BASE_END_NOTIF | MN_CONGESTION_EVENT; + int const event_type = model_net_get_event_type_lp((model_net_wrap_msg *) tw_event_data(event)); + if (event_type & event_types_to_freeze) { // Finding out whether current event type is one of freeze types + return true; + } + return false; +} + +// Freezing all events except for R_SNAPSHOT's!! +static bool dragonfly_dally_router_should_event_be_frozen(tw_lp * lp, tw_event * event) { + assert(lp->gid == event->dest_lpid); + // Freeze everything except for MN_BASE_PASS! + int const event_types_to_freeze = MN_BASE_NEW_MSG | MN_BASE_SCHED_NEXT | MN_BASE_SAMPLE | MN_BASE_END_NOTIF | MN_CONGESTION_EVENT; + int const event_type = model_net_get_event_type_lp((model_net_wrap_msg *) tw_event_data(event)); + if (event_type & event_types_to_freeze) { // Finding out whether current event type is one of freeze types + return true; + } + + // Ignore (ie, do not freeze) R_SNAPSHOT's! + assert(event_type == MN_BASE_PASS); + terminal_dally_message * msg = (terminal_dally_message *) model_net_method_msg_from_tw_event(lp, (model_net_wrap_msg *) tw_event_data(event)); + assert(msg != NULL); + if (msg->type == R_SNAPSHOT) { // Snapshots will stay unaltered, never frozen + return false; + } + + return true; } +// +// ==== END OF Surrogate functions definition ==== //Snapshot pattern //Sends a snapshot event - this wakes the router at the specified time to store its data somewhere //this storage place could be in the event or elsewehre so long as the data is over-writeable //in case the event gets rolled back and replayed. //On commit of the snapshot event, the commit function looks where the data was stored and outputs to lpio -void router_send_snapshot_events(router_state *s, tw_lp *lp) +static void router_send_snapshot_events(router_state *s, tw_lp *lp) { int len = sprintf(snapshot_filename, "dragonfly-snapshots.csv"); snapshot_filename[len] = '\0'; @@ -2638,11 +3323,33 @@ void router_send_snapshot_events(router_state *s, tw_lp *lp) { if (OUTPUT_SNAPSHOT) { - char snapshot_line[1024]; - int written; + // Finding size of snapshot line + int line_sz = 28; // This is the size of '#Time of snapshot,Router ID,' + for (int i = 0, j = 0; i < s->params->radix; ) { + int sz = snprintf(NULL, 0, "Port %d VC %d,", i, j); + line_sz += sz; + + j++; + if(j >= s->params->num_vcs) { i++; j = 0; } + } - written = sprintf(snapshot_line, "#Time of snapshot, Router ID, Port 0 VC 0, Port 0 VC 1 ... Port N VC M\n#Radix = %d Num VCs = %d\n",s->params->radix, s->params->num_vcs); - lp_io_write(lp->gid, snapshot_filename, written, snapshot_line); + // Creating snapshot line + char snapshot_line[line_sz + 1]; // extra space for '\0' + int offset = 28; + snprintf(snapshot_line, sizeof(snapshot_line), "#Time of snapshot,Router ID,"); + for (int i = 0, j = 0; i < s->params->radix; ) { + int sz = snprintf(snapshot_line + offset, sizeof(snapshot_line) - offset, "Port %d VC %d,", i, j); + offset += sz; + + j++; + if(j >= s->params->num_vcs) { i++; j = 0; } + } + assert(line_sz == offset); + snapshot_line[line_sz - 1] = '\n'; // replacing last ',' for '\n' + snapshot_line[line_sz] = '\0'; // just in case it's treated as a null terminating string + + // "Saving" snapshot line + lp_io_write(lp->gid, snapshot_filename, line_sz, snapshot_line); } } @@ -2659,8 +3366,13 @@ void router_send_snapshot_events(router_state *s, tw_lp *lp) // printf("%d: sending snapshot events\n",s->router_id); } -void router_handle_snapshot_event(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp) +static void router_handle_snapshot_event(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp) { + if (msg->packet_ID >= num_snapshots) { + fprintf(stderr, "Warning: packet_ID = %llu will not be saved in the snapshot because there are only %d spaces available.\n", msg->packet_ID, num_snapshots); + return; + } + for(int i = 0; i < s->params->radix; i++) { for(int j = 0; j < s->params->num_vcs; j++) @@ -2673,49 +3385,200 @@ void router_handle_snapshot_event(router_state *s, tw_bf *bf, terminal_dally_mes } } -void terminal_dally_commit(terminal_state * s, - tw_bf * bf, - terminal_dally_message * msg, - tw_lp * lp) -{ - if(msg->type == T_BANDWIDTH) - { - if(msg->rc_is_qos_set == 1) { - free(msg->rc_qos_data); - free(msg->rc_qos_status); - msg->rc_is_qos_set = 0; - } +static void terminal_commit_packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { + if (!packet_latency_f && !dally_surrogate_configured) { + return; } - if(msg->type == T_ARRIVE) - { - if (OUTPUT_END_END_LATENCIES) - { - if (msg->message_id % OUTPUT_LATENCY_MODULO == 0) { - int written1; - char end_end_filename[128]; - written1 = sprintf(end_end_filename, "end-to-end-latency-hops"); - end_end_filename[written1] = '\0'; - - char latency[32]; - int written; - tw_stime lat = msg->travel_end_time-msg->travel_start_time; - written = sprintf(latency, "%d %.5f %d\n",msg->app_id, msg->travel_end_time-msg->travel_start_time,msg->my_N_hop); - lp_io_write(lp->gid, end_end_filename, written, latency); - } - } + // Storing packet info of sent packet. Once packets arrive back, we can compute + // the latency of sending the packet + void * msg_data = malloc(sizeof(terminal_dally_message)); + memcpy(msg_data, msg, sizeof(terminal_dally_message)); + void * remote_data = NULL; + if (msg->remote_event_size_bytes) { + remote_data = malloc(msg->remote_event_size_bytes); + memcpy(remote_data, model_net_method_get_edata(DRAGONFLY_DALLY, msg), msg->remote_event_size_bytes); + } + void * local_data = NULL; + if (msg->local_event_size_bytes) { + local_data = malloc(msg->local_event_size_bytes); + memcpy(local_data, (char *) model_net_method_get_edata(DRAGONFLY_DALLY, msg) + msg->remote_event_size_bytes, msg->local_event_size_bytes); + } + double const processing_packet_delay = msg->saved_next_packet_delay; + + // TODO (elkin): In the future, this ugly initialization could be done all in a single "line" instead of setting all values one by one. The reason to do it this way is because some old compilers do not understand other ways of initializing + struct packet_sent sent; + sent.start.packet_ID = msg->packet_ID; + sent.start.dest_terminal_lpid = msg->dest_terminal_lpid; + sent.start.dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id; + sent.start.travel_start_time = msg->saved_processing_time; + sent.start.workload_injection_time = msg->msg_start_time; + sent.start.processing_packet_delay = processing_packet_delay; + sent.start.packet_size = msg->packet_size; + sent.start.is_there_another_pckt_in_queue = msg->is_there_another_pckt_in_queue; + sent.next_packet_delay = -1; + sent.message_data = msg_data; + sent.remote_event_data = remote_data; + sent.local_data = local_data; + + s->sent_packets[msg->packet_ID] = sent; + if (freeze_network_on_switch && msg->local_event_size_bytes > 0) { + s->is_pending_local_send.insert(msg->packet_ID); + } + + // Set next_packet_delay for the last past sent packet + if (s->sent_packets.count(s->last_packet_sent_id) == 1) { + assert(s->sent_packets[s->last_packet_sent_id].next_packet_delay == -1); + s->sent_packets[s->last_packet_sent_id].next_packet_delay = processing_packet_delay; + } + + // If we already received the (previous) last packet latency, we inject it now into the predictor + if (s->arrival_of_last_packet.packet_ID != -1) { + assert(s->arrival_of_last_packet.packet_ID == s->last_packet_sent_id); + assert(s->arrival_of_last_packet.travel_end_time > 0); + + double const travel_end_time = s->arrival_of_last_packet.travel_end_time; + feed_packet_to_predictor(s, lp, s->arrival_of_last_packet.packet_ID, travel_end_time); + s->sent_packets.erase(s->arrival_of_last_packet.packet_ID); + s->arrival_of_last_packet.packet_ID = -1; } } -void router_dally_commit(router_state * s, +static void terminal_dally_commit(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { - if(msg->type == R_BANDWIDTH) - { - if(msg->rc_is_qos_set == 1) { - free(msg->rc_qos_data); + + switch (msg->type) { + case T_GENERATE: + if(bf->c10) { // if the packet was sent as a prediction, store the prediction in memory + assert(dally_surrogate_configured); + auto start = (struct packet_start) { + .packet_ID = msg->packet_ID, + .dest_terminal_lpid = msg->dest_terminal_lpid, + .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id, + .travel_start_time = msg->travel_start_time, + .workload_injection_time = msg->msg_start_time, + .processing_packet_delay = -1, + .packet_size = msg->packet_size, + .is_there_another_pckt_in_queue = msg->is_there_another_pckt_in_queue + }; + + // Saving + auto end = (struct packet_end) { + .travel_end_time = msg->travel_end_time, + .next_packet_delay = msg->saved_next_packet_delay, + }; + packet_latency_save_to_file(s->terminal_id, &start, &end, is_dally_surrogate_on, true); + + // If we had latency info for the last packet transmitted, then we have to store it into memory and clean the variable + if (s->arrival_of_last_packet.packet_ID != -1) { + assert(s->arrival_of_last_packet.packet_ID == s->last_packet_sent_id); + assert(s->arrival_of_last_packet.travel_end_time > 0); + + auto sent = s->sent_packets[s->arrival_of_last_packet.packet_ID]; + + struct packet_end end = { + .travel_end_time = s->arrival_of_last_packet.travel_end_time, + .next_packet_delay = -1, + }; + + packet_latency_save_to_file(s->terminal_id, &sent.start, &end, is_dally_surrogate_on, false); + + s->sent_packets.erase(s->arrival_of_last_packet.packet_ID); + s->arrival_of_last_packet.packet_ID = -1; + } + + // If the packet info is to be stored in memory to compute terminal delay + } else { + terminal_commit_packet_generate(s, bf, msg, lp); + } + + assert(msg->packet_ID - 1 == s->last_packet_sent_id); + s->last_packet_sent_id = msg->packet_ID; + break; + + case T_ARRIVE: + if (OUTPUT_END_END_LATENCIES) { + if (msg->message_id % OUTPUT_LATENCY_MODULO == 0) { + int written1; + char end_end_filename[128]; + written1 = sprintf(end_end_filename, "end-to-end-latency-hops"); + end_end_filename[written1] = '\0'; + + char latency[32]; + int written; + tw_stime lat = msg->travel_end_time-msg->travel_start_time; + written = sprintf(latency, "%d %.5f %d\n",msg->app_id, msg->travel_end_time-msg->travel_start_time,msg->my_N_hop); + lp_io_write(lp->gid, end_end_filename, written, latency); + } + } + break; + + case T_ARRIVE_PREDICTED: + break; + + case T_SEND: + if (freeze_network_on_switch) { + if (bf->c16 && s->is_pending_local_send.count(msg->packet_ID) == 1) { + s->is_pending_local_send.erase(msg->packet_ID); + } + } + break; + + case T_BUFFER: + break; + + case T_BANDWIDTH: + if(msg->rc_is_qos_set == 1) { + free(msg->rc_qos_data); + free(msg->rc_qos_status); + msg->rc_is_qos_set = 0; + } + break; + + case T_NOTIFY: + if(msg->notify_type == NOTIFY_LATENCY) { + assert(lp->gid == msg->src_terminal_id); + assert(s->terminal_id == msg->dfdally_src_terminal_id); + uint64_t packet_ID = msg->packet_ID; + + if (s->sent_packets.count(packet_ID) == 1) { // packet_ID is in s->sent_packets + auto sent = s->sent_packets[packet_ID]; + assert(msg->travel_end_time > sent.start.travel_start_time); + if (packet_ID == s->last_packet_sent_id) { // packet_ID is last, we cannot compute the next_packet_delay + assert(s->arrival_of_last_packet.packet_ID == -1); + s->arrival_of_last_packet.packet_ID = packet_ID; + s->arrival_of_last_packet.travel_end_time = msg->travel_end_time; + } else { + feed_packet_to_predictor(s, lp, packet_ID, msg->travel_end_time); + s->sent_packets.erase(packet_ID); + } + } + } + break; + +#if ALWAYS_DETERMINISTIC_NETWORK + case T_VACUOUS_EVENT: + break; +#endif /* ALWAYS_DETERMINISTIC_NETWORK */ + + default: + printf("\n LP %d Terminal message type not supported %d ", (int)lp->gid, msg->type); + tw_error(TW_LOC, "Msg type not supported"); + } +} + +static void router_dally_commit(router_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + if(msg->type == R_BANDWIDTH) + { + if(msg->rc_is_qos_set == 1) { + free(msg->rc_qos_data); free(msg->rc_qos_status); msg->rc_is_qos_set = 0; } @@ -2743,7 +3606,7 @@ void router_dally_commit(router_state * s, if (msg->type == R_SNAPSHOT) { - if (OUTPUT_SNAPSHOT == 1) + if (OUTPUT_SNAPSHOT == 1 && msg->packet_ID < num_snapshots) { char snapshot_line[8192]; int written; @@ -2759,14 +3622,18 @@ void router_dally_commit(router_state * s, written += sprintf(snapshot_line+written, "%d, ", this_vc_snapshot_data); } } - written += sprintf(snapshot_line+written, "\n"); + assert(written <= 8192); + assert(snapshot_line[written - 2] == ','); + snapshot_line[written - 2] = '\n'; // Replacing ',' for new line + written -= 1; + lp_io_write(lp->gid, snapshot_filename, written, snapshot_line); } } } /* initialize a dragonfly compute node terminal */ -void terminal_dally_init( terminal_state * s, tw_lp * lp ) +static void terminal_dally_init( terminal_state * s, tw_lp * lp ) { s->packet_gen = 0; s->packet_fin = 0; @@ -2811,9 +3678,6 @@ void terminal_dally_init( terminal_state * s, tw_lp * lp ) codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, NULL, 1, s->router_id[i] / num_routers_per_mgrp, s->router_id[i] % num_routers_per_mgrp, &s->router_lp[i]); } - s->workload_lpid_to_app_id = map(); - s->app_ids = set(); - s->terminal_available_time = (tw_stime*)calloc(p->num_rails, sizeof(tw_stime)); s->packet_counter = 0; s->min_latency = INT_MAX; @@ -2855,11 +3719,8 @@ void terminal_dally_init( terminal_state * s, tw_lp * lp ) s->in_send_loop = (int*)calloc(p->num_rails, sizeof(int)); s->issueIdle = (int*)calloc(p->num_rails, sizeof(int)); - s->rank_tbl = NULL; - s->terminal_msgs = - (terminal_dally_message_list***)calloc(p->num_rails, sizeof(terminal_dally_message_list**)); - s->terminal_msgs_tail = - (terminal_dally_message_list***)calloc(p->num_rails, sizeof(terminal_dally_message_list**)); + s->rank_tbl = qhash_init(dragonfly_rank_hash_compare, dragonfly_hash_func, DFLY_HASH_TABLE_SIZE); + s->terminal_msgs = (struct qlist_head**)calloc(p->num_rails, sizeof(struct qlist_head*)); s->qos_status = (int**)calloc(p->num_rails, sizeof(int*)); s->qos_data = (int**)calloc(p->num_rails, sizeof(int*)); @@ -2867,13 +3728,11 @@ void terminal_dally_init( terminal_state * s, tw_lp * lp ) for(i = 0; i < p->num_rails; i++) { s->in_send_loop[i] = 0; - s->terminal_msgs[i] = (terminal_dally_message_list**)calloc(num_qos_levels, sizeof(terminal_dally_message_list*)); - s->terminal_msgs_tail[i] = (terminal_dally_message_list**)calloc(num_qos_levels, sizeof(terminal_dally_message_list*)); + s->terminal_msgs[i] = (struct qlist_head*)calloc(s->params->num_vcs, sizeof(struct qlist_head)); - for(int j = 0; j < num_qos_levels; j++) + for(int j = 0; j < s->params->num_vcs; j++) { - s->terminal_msgs[i][j] = NULL; - s->terminal_msgs_tail[i][j] = NULL; + INIT_QLIST_HEAD(&s->terminal_msgs[i][j]); } /* Whether the virtual channel group is active or over-bw*/ @@ -2903,16 +3762,41 @@ void terminal_dally_init( terminal_state * s, tw_lp * lp ) fprintf(dragonfly_term_bw_log, "\n term-id time-stamp port-id busy-time"); }*/ + s->local_congestion_controller = NULL; if (g_congestion_control_enabled) { s->local_congestion_controller = (tlc_state*)calloc(1,sizeof(tlc_state)); cc_terminal_local_controller_init(s->local_congestion_controller, lp, s->terminal_id, &s->workloads_finished_flag); } + + // This doesn't allocate any memory, it calls the constructor on the + // previously allocated memory (by ROSS) + // In the future calling the constructor could be done with: + // std::construct_at, for now this syntax suffices and works + // (see https://en.cppreference.com/w/cpp/memory/construct_at) + s->last_packet_sent_id = -1; + s->arrival_of_last_packet.packet_ID = -1; + s->arrival_of_last_packet.travel_end_time = -1; + new (&s->sent_packets) map(); + new (&s->is_pending_local_send) set(); + new (&s->remaining_sz_packets) map(); + new (&s->zombies) set(); + s->frozen_state = NULL; + + // alloc'ing memory for predictor, calling initiliazer for predictor + s->predictor_data = NULL; + if (terminal_predictor != NULL) { + if (terminal_predictor->predictor_data_sz > 0) { + s->predictor_data = calloc(1, terminal_predictor->predictor_data_sz); + } + terminal_predictor->init(s->predictor_data, lp, s->terminal_id); + } + s->last_in_queue_time = 0; return; } /* sets up the router virtual channels, global channels, * local channels, compute node channels */ -void router_dally_init(router_state * r, tw_lp * lp) +static void router_dally_init(router_state * r, tw_lp * lp) { char anno[MAX_NAME_LENGTH]; codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, @@ -3004,13 +3888,9 @@ void router_dally_init(router_state * r, tw_lp * lp) r->last_qos_lvl = (int*)calloc(p->radix, sizeof(int)); r->qos_status = (int**)calloc(p->radix, sizeof(int*)); r->pending_msgs = - (terminal_dally_message_list***)calloc((p->radix), sizeof(terminal_dally_message_list**)); - r->pending_msgs_tail = - (terminal_dally_message_list***)calloc((p->radix), sizeof(terminal_dally_message_list**)); + (struct qlist_head**)calloc(p->radix, sizeof(struct qlist_head*)); r->queued_msgs = - (terminal_dally_message_list***)calloc(p->radix, sizeof(terminal_dally_message_list**)); - r->queued_msgs_tail = - (terminal_dally_message_list***)calloc(p->radix, sizeof(terminal_dally_message_list**)); + (struct qlist_head**)calloc(p->radix, sizeof(struct qlist_head*)); r->queued_count = (int*)calloc(p->radix, sizeof(int)); r->last_buf_full = (tw_stime*)calloc(p->radix, sizeof(tw_stime*)); r->busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); @@ -3043,14 +3923,8 @@ void router_dally_init(router_state * r, tw_lp * lp) r->in_send_loop[i] = 0; r->vc_occupancy[i] = (int*)calloc(p->num_vcs, sizeof(int)); // printf("\n Number of vcs %d for radix %d ", p->num_vcs, p->radix); - r->pending_msgs[i] = (terminal_dally_message_list**)calloc(p->num_vcs, - sizeof(terminal_dally_message_list*)); - r->pending_msgs_tail[i] = (terminal_dally_message_list**)calloc(p->num_vcs, - sizeof(terminal_dally_message_list*)); - r->queued_msgs[i] = (terminal_dally_message_list**)calloc(p->num_vcs, - sizeof(terminal_dally_message_list*)); - r->queued_msgs_tail[i] = (terminal_dally_message_list**)calloc(p->num_vcs, - sizeof(terminal_dally_message_list*)); + r->pending_msgs[i] = (struct qlist_head*)calloc(p->num_vcs, sizeof(struct qlist_head)); + r->queued_msgs[i] = (struct qlist_head*)calloc(p->num_vcs, sizeof(struct qlist_head)); r->qos_status[i] = (int*)calloc(num_qos_levels, sizeof(int)); r->qos_data[i] = (int*)calloc(num_qos_levels, sizeof(int)); for(int j = 0; j < num_qos_levels; j++) @@ -3060,10 +3934,8 @@ void router_dally_init(router_state * r, tw_lp * lp) } for(int j = 0; j < p->num_vcs; j++) { - r->pending_msgs[i][j] = NULL; - r->pending_msgs_tail[i][j] = NULL; - r->queued_msgs[i][j] = NULL; - r->queued_msgs_tail[i][j] = NULL; + INIT_QLIST_HEAD(&r->pending_msgs[i][j]); + INIT_QLIST_HEAD(&r->queued_msgs[i][j]); } } @@ -3097,6 +3969,7 @@ void router_dally_init(router_state * r, tw_lp * lp) } + r->snapshot_data = NULL; if (num_snapshots) { r->snapshot_data = (int**)calloc(num_snapshots, sizeof(int*)); for(int i = 0; i < num_snapshots; i++) @@ -3106,6 +3979,24 @@ void router_dally_init(router_state * r, tw_lp * lp) router_send_snapshot_events(r, lp); } + //Xin: msg counters for apps + r->agg_link_traffic = NULL; + r->agg_busy_time = NULL; + if(p->counting_bool > 0) + { + r->agg_link_traffic = (int64_t **) calloc(p->counting_windows, sizeof(int64_t *)); + r->agg_busy_time = (tw_stime **) malloc (p->counting_windows * sizeof(tw_stime *)); + + for (int i = 0; i < p->counting_windows; ++i) + { + r->agg_link_traffic[i] = (int64_t*) calloc(p->radix, sizeof(int64_t)); + // r->agg_link_traffic[i] = (int64_t*) calloc(p->radix*p->num_apps, sizeof(int64_t)); + r->agg_busy_time[i] = (tw_stime*) malloc(p->radix * sizeof(tw_stime)); + for(int j = 0; j < p->radix; j++) + r->agg_busy_time[i][j] = 0.0; + } + } + return; } @@ -3128,7 +4019,8 @@ static tw_stime dragonfly_dally_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; (void)sched_params; @@ -3164,8 +4056,10 @@ static tw_stime dragonfly_dally_packet_event( msg->pull_size = req->pull_size; msg->magic = terminal_magic_num; msg->msg_start_time = req->msg_start_time; + msg->msg_new_mn_event = req->msg_new_mn_event; msg->rail_id = req->queue_offset; msg->app_id = req->app_id; + msg->is_there_another_pckt_in_queue = is_there_another_pckt_in_queue; if(is_last_pckt) /* Its the last packet so pass in remote and local event information*/ { @@ -3187,6 +4081,136 @@ static tw_stime dragonfly_dally_packet_event( return xfer_to_nic_time; } +static void packet_generate_predicted_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + struct mn_stats * stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); + stat->send_count--; + stat->send_bytes -= msg->packet_size; + stat->send_time -= (1/s->params->cn_bandwidth) * msg->packet_size; + + s->last_in_queue_time = msg->saved_last_in_queue_time; + + terminal_predictor->predict_rc(s->predictor_data, lp); + + s->packet_counter--; + s->total_gen_size -= msg->packet_size; + s->packet_gen--; + packet_gen--; +} + +/* generates packet at the current dragonfly compute node */ +static void packet_generate_predicted(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { + packet_gen++; + s->packet_gen++; + s->total_gen_size += msg->packet_size; + + assert(lp->gid != msg->dest_terminal_lpid); + const dragonfly_param *p = s->params; + + msg->packet_ID = s->packet_counter; + s->packet_counter++; + + // these actually don't matter because we are bypassing the network + msg->my_N_hop = -1; + msg->my_l_hop = -1; + msg->my_g_hop = -1; + msg->my_hops_cur_group = -1; + + // Using predictor to find latency + double const processing_packet_delay = tw_now(lp) - s->last_in_queue_time; + auto start = (struct packet_start) { + .packet_ID = msg->packet_ID, + .dest_terminal_lpid = msg->dest_terminal_lpid, + .dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id, + .travel_start_time = tw_now(lp), + .workload_injection_time = msg->msg_start_time, + .processing_packet_delay = processing_packet_delay, + .packet_size = msg->packet_size, + .is_there_another_pckt_in_queue = msg->is_there_another_pckt_in_queue + }; + + struct packet_end const end = + terminal_predictor->predict(s->predictor_data, lp, s->terminal_id, &start); + double const latency = end.travel_end_time - start.travel_start_time; + double const arrival = start.travel_start_time + latency; // this is "equivalent" to end.travel_end_time + assert(arrival >= tw_now(lp)); + + // determining injection delay + //tw_stime injection_ts; + //if (g_congestion_control_enabled) { + // double bandwidth_coef = 1; + // if (cc_terminal_is_abatement_active(s->local_congestion_controller)) { + // bandwidth_coef = cc_terminal_get_current_injection_bandwidth_coef(s->local_congestion_controller); + // } + // injection_ts = bytes_to_ns(msg->packet_size, bandwidth_coef * s->params->cn_bandwidth); + //} + //else { + // injection_ts = bytes_to_ns(msg->packet_size, s->params->cn_bandwidth); + //} + //tw_stime const nic_ts = injection_ts; + // The code above does a good job at limiting the speed in which packets are injected, so it produces good + // results when running in surrogate. A good model should produce similar `nic`s to what the code above + // does (the average predictor does just that!) + double const nic_ts = end.next_packet_delay; + assert(nic_ts > 0); + + // Scheduling idle event for next packet to be processed + bool const is_from_remote = false; + model_net_method_idle_event2(nic_ts, is_from_remote, msg->rail_id, lp); + msg->saved_last_in_queue_time = s->last_in_queue_time; + s->last_in_queue_time = tw_now(lp); + + // Info to be used at commit time to save into file + msg->saved_next_packet_delay = end.next_packet_delay; + msg->travel_start_time = tw_now(lp); + msg->travel_end_time = arrival; + + // Sending packet directly to destination terminal + //tw_stime const ts = 0; + terminal_dally_message * m; + void * remote_event; + void const * const m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg); + tw_event * const e = model_net_method_event_new( + msg->dest_terminal_lpid, latency, lp, DRAGONFLY_DALLY, (void**)&m, &remote_event); + memcpy(m, msg, sizeof(terminal_dally_message)); + if (msg->remote_event_size_bytes) { + memcpy(remote_event, m_data_src, msg->remote_event_size_bytes); + } + m->magic = terminal_magic_num; + m->type = T_ARRIVE_PREDICTED; + m->src_terminal_id = lp->gid; + m->dfdally_src_terminal_id = s->terminal_id; //m->travel_start_time = tw_now(lp); + //m->rail_id = msg->rail_id; + //m->vc_index = vcg; + //m->last_hop = TERMINAL; + m->path_type = -1; + m->local_event_size_bytes = 0; + m->is_intm_visited = 0; + m->intm_grp_id = -1; + m->intm_rtr_id = -1; //for legacy prog-adaptive + tw_event_send(e); + + const int total_event_size = model_net_get_msg_sz(DRAGONFLY_DALLY) + + msg->remote_event_size_bytes + msg->local_event_size_bytes; + mn_stats* stat; + stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); + stat->send_count++; + stat->send_bytes += msg->packet_size; + stat->send_time += (1/p->cn_bandwidth) * msg->packet_size; + if(stat->max_event_size < total_event_size) { + stat->max_event_size = total_event_size; + } + + if(msg->local_event_size_bytes > 0) + { + tw_event *e_new = tw_event_new(msg->sender_lp, nic_ts, lp); + void *m_new = tw_event_data(e_new); + void *local_event = (char*) model_net_method_get_edata(DRAGONFLY_DALLY, msg) + msg->remote_event_size_bytes; + memcpy(m_new, local_event, msg->local_event_size_bytes); + tw_event_send(e_new); + } +} + static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { int num_qos_levels = s->params->num_qos_levels; @@ -3198,6 +4222,8 @@ static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_me packet_gen--; s->packet_counter--; + s->last_in_queue_time = msg->saved_last_in_queue_time; + if(bf->c2) num_local_packets_sr--; if(bf->c3) @@ -3205,9 +4231,7 @@ static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_me if(bf->c4) num_remote_packets--; - int num_chunks = msg->packet_size/s->params->chunk_size; - if(msg->packet_size < s->params->chunk_size) - num_chunks++; + int const num_chunks = num_chunks_for(msg->packet_size, s->params->chunk_size); int i; int vcg = 0; @@ -3219,7 +4243,7 @@ static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_me assert(vcg < num_qos_levels); for(i = 0; i < num_chunks; i++) { - delete_terminal_dally_message_list(return_tail(s->terminal_msgs[msg->rail_id], s->terminal_msgs_tail[msg->rail_id], vcg)); + delete_terminal_dally_message_list(return_tail_from_qlist(&s->terminal_msgs[msg->rail_id][vcg])); s->terminal_length[msg->rail_id][vcg] -= s->params->chunk_size; } if(bf->c5) { @@ -3247,6 +4271,7 @@ static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_me if(bf->c8) s->last_buf_full[msg->rail_id] = msg->saved_busy_time; } + struct mn_stats* stat; stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); stat->send_count--; @@ -3260,6 +4285,7 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa s->packet_gen++; s->total_gen_size += msg->packet_size; + msg->saved_processing_time = tw_now(lp); tw_stime ts, injection_ts, nic_ts; @@ -3267,15 +4293,7 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa const dragonfly_param *p = s->params; int total_event_size; - uint64_t num_chunks = msg->packet_size / p->chunk_size; - - double cn_delay = s->params->cn_delay; - - if (msg->packet_size < s->params->chunk_size) - num_chunks++; - - if(msg->packet_size < s->params->chunk_size) - cn_delay = bytes_to_ns(msg->packet_size % s->params->chunk_size, s->params->cn_bandwidth); + uint64_t const num_chunks = num_chunks_for(msg->packet_size, p->chunk_size); int dest_router_id; if (s->params->num_injection_queues > 1 || netMan.is_link_failures_enabled()) { @@ -3466,6 +4484,11 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa msg->my_g_hop = 0; msg->my_hops_cur_group = 0; + //assert(tw_now(lp) == msg->travel_start_time); + // This in here is NOT next_packet_delay but processing packet delay!! + msg->saved_next_packet_delay = tw_now(lp) - s->last_in_queue_time; + msg->saved_last_in_queue_time = s->last_in_queue_time; + s->last_in_queue_time = tw_now(lp); //qos stuff int num_qos_levels = s->params->num_qos_levels; @@ -3518,8 +4541,7 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa cur_chunk->msg.output_chan = vcg; cur_chunk->msg.chunk_id = i; cur_chunk->msg.origin_router_id = s->router_id[msg->rail_id]; - append_to_terminal_dally_message_list(s->terminal_msgs[msg->rail_id], s->terminal_msgs_tail[msg->rail_id], - vcg, cur_chunk); + append_to_qlist(&s->terminal_msgs[msg->rail_id][vcg], cur_chunk); s->terminal_length[msg->rail_id][vcg] += s->params->chunk_size; } @@ -3625,12 +4647,13 @@ static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_dally_messag { int num_qos_levels = s->params->num_qos_levels; + assert(msg->rail_id < s->params->num_rails); if(msg->qos_reset1) s->qos_status[msg->rail_id][0] = Q_ACTIVE; - if(msg->qos_reset2) + if(msg->qos_reset2 && s->params->num_qos_levels > 1) s->qos_status[msg->rail_id][1] = Q_ACTIVE; - if(msg->last_saved_qos) + if(msg->last_saved_qos >= 0) s->last_qos_lvl[msg->rail_id] = msg->last_saved_qos; if(bf->c1) { @@ -3654,14 +4677,14 @@ static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_dally_messag terminal_dally_message_list* cur_entry = (terminal_dally_message_list *)rc_stack_pop(s->st); + cur_entry->msg.travel_start_time = msg->saved_avg_time; int data_size = s->params->chunk_size; if(cur_entry->msg.packet_size < s->params->chunk_size) data_size = cur_entry->msg.packet_size % s->params->chunk_size; s->qos_data[msg->rail_id][vcg] -= data_size; - prepend_to_terminal_dally_message_list(s->terminal_msgs[msg->rail_id], - s->terminal_msgs_tail[msg->rail_id], vcg, cur_entry); + prepend_to_qlist(&s->terminal_msgs[msg->rail_id][vcg], cur_entry); if(bf->c4) { s->in_send_loop[msg->rail_id] = msg->saved_send_loop; @@ -3715,11 +4738,11 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message * } msg->saved_vc = vcg; - terminal_dally_message_list* cur_entry = s->terminal_msgs[msg->rail_id][vcg]; + terminal_dally_message_list* cur_entry = return_head_from_qlist(&s->terminal_msgs[msg->rail_id][vcg]); int data_size = s->params->chunk_size; - uint64_t num_chunks = cur_entry->msg.packet_size/s->params->chunk_size; - if(cur_entry->msg.packet_size < s->params->chunk_size) - num_chunks++; + uint64_t const num_chunks = num_chunks_for(cur_entry->msg.packet_size, s->params->chunk_size); + + msg->saved_avg_time = cur_entry->msg.travel_start_time; // reusing field saved_avg_time. It is only used in another event handler path (arrive). So, no interruptions here cur_entry->msg.travel_start_time = tw_now(lp); double bandwidth_coef = 1; @@ -3784,13 +4807,17 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message * tw_event_send(e); +#if DEBUG == 1 if(cur_entry->msg.packet_ID == LLU(TRACK_PKT) && lp->gid == T_ID) printf("\n Packet %llu generated at terminal %d dest %llu size %llu num chunks %llu router-id %d %llu", cur_entry->msg.packet_ID, s->terminal_id, LLU(cur_entry->msg.dest_terminal_lpid), LLU(cur_entry->msg.packet_size), LLU(num_chunks), s->router_id[msg->rail_id], LLU(router_id)); +#endif if(cur_entry->msg.chunk_id == num_chunks - 1 && (cur_entry->msg.local_event_size_bytes > 0)) { + bf->c16 = 1; + msg->packet_ID = cur_entry->msg.packet_ID; tw_stime local_ts = 0; tw_event *e_new = tw_event_new(cur_entry->msg.sender_lp, local_ts, lp); void * m_new = tw_event_data(e_new); @@ -3801,7 +4828,7 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message * } s->vc_occupancy[msg->rail_id][vcg] += s->params->chunk_size; - cur_entry = return_head(s->terminal_msgs[msg->rail_id], s->terminal_msgs_tail[msg->rail_id], vcg); + rc_stack_push(lp, cur_entry, delete_terminal_dally_message_list, s->st); s->terminal_length[msg->rail_id][vcg] -= s->params->chunk_size; s->link_traffic[msg->rail_id] += s->params->chunk_size; @@ -3813,12 +4840,8 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message * if(num_qos_levels > 1) //I think this one is OK since the default is that terminals have only 1 VC anyway so leaving vcg as next_vcg = get_next_vcg(s, bf, msg, lp); - cur_entry = NULL; - if(next_vcg >= 0) - cur_entry = s->terminal_msgs[msg->rail_id][next_vcg]; - /* if there is another packet inline then schedule another send event */ - if(cur_entry != NULL && s->vc_occupancy[msg->rail_id][next_vcg] + s->params->chunk_size <= s->params->cn_vc_size) { + if(next_vcg >= 0 && !qlist_empty(&s->terminal_msgs[msg->rail_id][next_vcg]) && s->vc_occupancy[msg->rail_id][next_vcg] + s->params->chunk_size <= s->params->cn_vc_size) { terminal_dally_message *m_new; e = model_net_method_event_new(lp->gid, injection_ts + gen_noise(lp, &msg->num_rngs), lp, DRAGONFLY_DALLY, (void**)&m_new, NULL); m_new->type = T_SEND; @@ -3830,7 +4853,11 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message * bf->c4 = 1; s->in_send_loop[msg->rail_id] = 0; } - if(s->issueIdle[msg->rail_id]) { + // TODO (elkin): The check on vcg only properly works for `num_qos_levels == 1`. Ideally, we should be checking if there is enough + // space for the next packet in the queue (the packet determines in which queue it's going to be injected), but that is not + // possible, because we only know the queue at `packet_generate`. This might not present a big problem for most applications but + // those that are fed at a rate higher than what they can process can see the queue, potentially, grow very large. + if(s->issueIdle[msg->rail_id] && s->terminal_length[msg->rail_id][vcg] < s->params->cn_vc_size) { bf->c5 = 1; s->issueIdle[msg->rail_id] = 0; model_net_method_idle_event2(injection_ts, 0, msg->rail_id, lp); @@ -3853,9 +4880,109 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message * return; } +static void notify_dest_lp_of( + terminal_state * s, + tw_lp * lp, + terminal_dally_message * msg, + enum notify_t notification + ) { + assert(NOTIFY_ZOMBIE == notification); + double offset = -1.0; + switch (notification) { + case NOTIFY_ZOMBIE: + offset = 0.0; // Zero-offset events are ugly, but we want to guarantee correctness, so this prevents funky stuff from happening (like in the case of events arriving and being processed before the notification reaches them) + break; + default: + tw_error(TW_LOC, "The notification event with type %d couldn't be created", notification); + } + + terminal_dally_message * new_msg; + // Lower value in priority means that it will be processed first + // This event will be processed before any predicted packet arrives (even if scheduled at the same timestamp) + tw_event *e = model_net_method_event_new_user_prio(msg->dest_terminal_lpid, offset, lp, DRAGONFLY_DALLY, (void**)&new_msg, NULL, 1); + + memcpy(new_msg, msg, sizeof(terminal_dally_message)); // Just making sure that if the simulation breaks because we didn't set some value below, it breaks in a spectacular manner (~0 can be -1) + assert(new_msg->dfdally_src_terminal_id == s->terminal_id); + new_msg->type = T_NOTIFY; + new_msg->notify_type = notification; + new_msg->magic = terminal_magic_num; + tw_event_send(e); +} + +static void notify_src_lp_on_total_latency(tw_lp * lp, terminal_dally_message * msg) +{ + terminal_dally_message * new_msg; + tw_event *e = model_net_method_event_new( + msg->src_terminal_id, g_tw_lookahead, lp, DRAGONFLY_DALLY, (void**)&new_msg, NULL); + + // We copy all the data from the original message but will change the flags for the message + // (Some of the data that we care about: packet_ID, src_terminal_lpid, dest_terminal_lpid, dfdally_src_terminal_id, dfdally_dest_terminal_id) + memcpy(new_msg, msg, sizeof(terminal_dally_message)); + //strcpy(new_msg->category, msg->category); + new_msg->type = T_NOTIFY; + new_msg->notify_type = NOTIFY_LATENCY; + new_msg->magic = terminal_magic_num; + tw_event_send(e); +} + +static void process_terminal_notification_event_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { + switch ((enum notify_t) msg->notify_type) { + case NOTIFY_LATENCY: + break; + + case NOTIFY_ZOMBIE: + { + struct packet_id const zombie = { + .packet_ID = msg->packet_ID, + .dfdally_src_terminal_id = msg->dfdally_src_terminal_id}; + assert(s->zombies.count(zombie) == 1); + s->zombies.erase(zombie); + } + break; + } +} + +static void process_terminal_notification_event(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { + switch ((enum notify_t) msg->notify_type) { + case NOTIFY_LATENCY: + break; + + case NOTIFY_ZOMBIE: { + // Adding new zombie to LP list of zombies + assert(lp->gid == msg->dest_terminal_lpid); + assert(s->terminal_id == msg->dfdally_dest_terminal_id); + + //printf("INSERTING zombie alert: LPID=%d packet_ID = %d dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id); + struct packet_id const zombie = { + .packet_ID = msg->packet_ID, + .dfdally_src_terminal_id = msg->dfdally_src_terminal_id}; + assert(s->zombies.count(zombie) == 0); + s->zombies.insert(zombie); + } + break; + } +} + +#if ALWAYS_DETERMINISTIC_NETWORK +// This function triggers an event that is completely ignored when processed later. The number of events produced by a terminal/router DOES alter the simulation results. (The number of events processed by an LP shouldn't be a parameter to the simulation itself, but it is weirdly). +static void vacuous_msg_to_itself(terminal_state * s, terminal_dally_message * msg, tw_lp * lp) +{ + (void) s; + (void) msg; + terminal_dally_message * new_msg; + tw_event *e = model_net_method_event_new( + lp->gid, g_tw_lookahead, lp, DRAGONFLY_DALLY, (void**)&new_msg, NULL); + + new_msg->type = T_VACUOUS_EVENT; + new_msg->magic = terminal_magic_num; + tw_event_send(e); +} +#endif /* ALWAYS_DETERMINISTIC_NETWORK */ + //used by packet_arrive() static void send_remote_event(terminal_state * s, terminal_dally_message * msg, tw_lp * lp, tw_bf * bf, char * event_data, int remote_event_size) { + (void) s; void * tmp_ptr = model_net_method_get_edata(DRAGONFLY_DALLY, msg); tw_stime ts = 0; @@ -3883,16 +5010,171 @@ static void send_remote_event(terminal_state * s, terminal_dally_message * msg, return; } +static void packet_arrive_predicted_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + struct dfly_hash_key key = { + .message_id = msg->message_id, + .sender_id = msg->sender_lp, + }; + struct dfly_qhash_entry * tmp = NULL; + struct qhash_head * hash_link = NULL; + + // If entry was removed from hash + if(bf->c8) { + struct dfly_qhash_entry * d_entry_pop = (dfly_qhash_entry *) rc_stack_pop(s->st); + qhash_add(s->rank_tbl, &key, &(d_entry_pop->hash_link)); + s->rank_tbl_pop++; + + if(s->rank_tbl_pop >= DFLY_HASH_TABLE_SIZE) + tw_error(TW_LOC, "\n Exceeded allocated qhash size, increase hash size in dragonfly model"); + + hash_link = &(d_entry_pop->hash_link); + tmp = d_entry_pop; + // In case it was not deleted, and we accessed it + } else if (bf->c9 || bf->c5) { + assert(!tmp); + hash_link = qhash_search(s->rank_tbl, &key); + + tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link); + } + assert((bf->c9 || bf->c5) == bf->c6); + + if(bf->c4) { + model_net_event_rc2(lp, &msg->event_rc); + } + + if(bf->c7) { + s->finished_msgs--; + s->total_msg_size -= msg->total_size; + total_msg_sz -= msg->total_size; + N_finished_msgs--; + s->data_size_ross_sample -= msg->total_size; + s->ross_sample.data_size_sample -= msg->total_size; + s->data_size_sample -= msg->total_size; + } + + if(bf->c6) { + tmp->remaining_packets++; + } + + if(bf->c5) { + qhash_del(hash_link); + free_tmp(tmp); + s->rank_tbl_pop--; + } +} + +/* packet arrives at the destination terminal */ +static void packet_arrive_predicted(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + assert(lp->gid == msg->dest_terminal_lpid); + /* WE do not allow self messages through dragonfly */ + assert(lp->gid != msg->src_terminal_id); + +#if DEBUG == 1 + if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) + printf("\n Packet %llu arrived at lp %llu hops %d ", LLU(msg->sender_lp), LLU(lp->gid), msg->my_N_hop); +#endif + + //record for commit_f file IO + msg->travel_end_time = tw_now(lp); + + // The table has to have been initialized already, if not, what the heck! + struct dfly_hash_key key = { + .message_id = msg->message_id, + .sender_id = msg->sender_lp, + }; + + // Finding out if message is in hash + struct qhash_head * hash_link = qhash_search(s->rank_tbl, &key); + struct dfly_qhash_entry * tmp = NULL; + if(hash_link) { + bf->c9 = 1; + tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link); + // We create an entry into the hash only if it makes sense to do so (ie, only when the message needs multiple packets to be completed) + } else if (msg->total_size > msg->packet_size) { + bf->c5 = 1; + + uint64_t const packet_size = s->params->packet_size; + uint64_t total_packets = msg->total_size / packet_size + (msg->total_size % packet_size ? 1 : 0); + if (total_packets == 0) { total_packets = 1; } + + struct dfly_qhash_entry * const d_entry = (dfly_qhash_entry *) calloc(1, sizeof (struct dfly_qhash_entry)); + d_entry->num_chunks = 0; + d_entry->key = key; + d_entry->remote_event_data = NULL; + d_entry->remote_event_size = 0; + d_entry->remaining_packets = total_packets; + qhash_add(s->rank_tbl, &key, &(d_entry->hash_link)); + s->rank_tbl_pop++; + + if(s->rank_tbl_pop >= DFLY_HASH_TABLE_SIZE) { + tw_error(TW_LOC, "\n Exceeded allocated qhash size, increase hash size in dragonfly model"); + } + + hash_link = &(d_entry->hash_link); + tmp = d_entry; + // Just for completion, checking invariant + } else { + // packet sz == message sz + assert(msg->total_size == msg->packet_size); + } + + // Decreasing the number of remaining packets + if (tmp) { + bf->c6 = 1; + tmp->remaining_packets--; + + /* retrieve the event data, all chunks from the same packet carry the `remote_event_data` */ + if(msg->remote_event_size_bytes > 0 && !tmp->remote_event_data) + { + /* Now retreieve the number of chunks completed from the hash and update them */ + void *m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg); + + /* Retreive the remote event entry */ + tmp->remote_event_data = (char*) calloc(1, msg->remote_event_size_bytes); + assert(tmp->remote_event_data); + tmp->remote_event_size = msg->remote_event_size_bytes; + memcpy(tmp->remote_event_data, m_data_src, msg->remote_event_size_bytes); + } + } + + bool const is_msg_completed = tmp ? tmp->remaining_packets == 0 : true; + + if(is_msg_completed) { + bf->c7 = 1; + s->data_size_sample += msg->total_size; + s->ross_sample.data_size_sample += msg->total_size; + s->data_size_ross_sample += msg->total_size; + N_finished_msgs++; + total_msg_sz += msg->total_size; + s->total_msg_size += msg->total_size; + s->finished_msgs++; + + if (tmp) { + // This should always be true. It sends the message to the server/workload or communicates to the model-net layer + if (tmp->remote_event_data && tmp->remote_event_size > 0) { + send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size); + } + + bf->c8 = 1; + qhash_del(hash_link); + rc_stack_push(lp, tmp, free_tmp, s->st); + s->rank_tbl_pop--; + } else { // packet sz == message sz + if (msg->remote_event_size_bytes > 0) { + void *m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg); + assert(m_data_src); + send_remote_event(s, msg, lp, bf, (char*) m_data_src, msg->remote_event_size_bytes); + } + } + } +} + static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { if (g_congestion_control_enabled) cc_terminal_send_ack_rc(s->local_congestion_controller); - - if(bf->c31) - { - s->packet_fin--; - packet_fin--; - } if(msg->path_type == MINIMAL) minimal_count--; @@ -3932,6 +5214,8 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess if(bf->c1) { + s->packet_fin--; + packet_fin--; stat->recv_count--; stat->recv_bytes -= msg->packet_size; N_finished_packets--; @@ -3944,11 +5228,35 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess { s->max_latency = msg->saved_available_time; } - if(bf->c7) - { + + struct packet_id const packet_key = { + .packet_ID = msg->packet_ID, + .dfdally_src_terminal_id = msg->dfdally_src_terminal_id + }; + + if (bf->c28) { + if (bf->c29) { + s->remaining_sz_packets[packet_key] = 0; + } + s->remaining_sz_packets[packet_key] += s->params->chunk_size; + } else { + if (bf->c29) { + s->remaining_sz_packets.erase(packet_key); + } + } + + if (bf->c14) { + s->zombies.emplace((struct packet_id) { + .packet_ID = msg->packet_ID, + .dfdally_src_terminal_id = msg->dfdally_src_terminal_id + }); + } + if (bf->c15) { + return; + } + + if(bf->c7) { //assert(!hash_link); - if(bf->c4) - model_net_event_rc2(lp, &msg->event_rc); N_finished_msgs--; s->finished_msgs--; @@ -3958,6 +5266,10 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess s->ross_sample.data_size_sample -= msg->total_size; s->data_size_ross_sample -= msg->total_size; + if(bf->c4) { + model_net_event_rc2(lp, &msg->event_rc); + } + struct dfly_qhash_entry * d_entry_pop = (dfly_qhash_entry *)rc_stack_pop(s->st); qhash_add(s->rank_tbl, &key, &(d_entry_pop->hash_link)); s->rank_tbl_pop++; @@ -3973,20 +5285,21 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess assert(tmp); tmp->num_chunks--; + if (bf->c13) { + tmp->remaining_packets++; + } + if(bf->c5) { qhash_del(hash_link); free_tmp(tmp); s->rank_tbl_pop--; } - - return; } /* packet arrives at the destination terminal */ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { - // if(isRoutingMinimal(routing) && msg->my_N_hop > 4) // { // printf("TERMINAL RECEIVED A NONMINIMAL LENGTH PACKET\n"); @@ -4008,46 +5321,13 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message if (g_congestion_control_enabled) cc_terminal_send_ack(s->local_congestion_controller, msg->src_terminal_id); - // NIC aggregation - should this be a separate function? - // Trigger an event on receiving server - - if(!s->rank_tbl) - s->rank_tbl = qhash_init(dragonfly_rank_hash_compare, dragonfly_hash_func, DFLY_HASH_TABLE_SIZE); - - struct dfly_hash_key key; - key.message_id = msg->message_id; - key.sender_id = msg->sender_lp; - - struct qhash_head *hash_link = NULL; - struct dfly_qhash_entry * tmp = NULL; - - hash_link = qhash_search(s->rank_tbl, &key); - - if(hash_link) - tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link); - - uint64_t total_chunks = msg->total_size / s->params->chunk_size; - - if(msg->total_size % s->params->chunk_size) - total_chunks++; - - if(!total_chunks) - total_chunks = 1; - - /*if(tmp) - { - if(tmp->num_chunks >= total_chunks || tmp->num_chunks < 0) - { - //tw_output(lp, "\n invalid number of chunks %d for LP %ld ", tmp->num_chunks, lp->gid); - tw_lp_suspend(lp, 0, 0); - return; - } - }*/ assert(lp->gid == msg->dest_terminal_lpid); +#if DEBUG == 1 if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) printf("\n Packet %llu arrived at lp %llu hops %d ", LLU(msg->sender_lp), LLU(lp->gid), msg->my_N_hop); - +#endif + tw_stime ts = s->params->cn_credit_delay; // no method_event here - message going to router @@ -4062,11 +5342,6 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message buf_msg->type = R_BUFFER; tw_event_send(buf_e); - bf->c1 = 0; - bf->c3 = 0; - bf->c4 = 0; - bf->c7 = 0; - /* Total overall finished chunks in simulation */ N_finished_chunks++; /* Finished chunks on a LP basis */ @@ -4080,9 +5355,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message /* WE do not allow self messages through dragonfly */ assert(lp->gid != msg->src_terminal_id); - uint64_t num_chunks = msg->packet_size / s->params->chunk_size; - if (msg->packet_size < s->params->chunk_size) - num_chunks++; + uint64_t const num_chunks = num_chunks_for(msg->packet_size, s->params->chunk_size); if(msg->path_type == MINIMAL) minimal_count++; @@ -4090,12 +5363,6 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message if(msg->path_type == NON_MINIMAL) nonmin_count++; - if(msg->chunk_id == num_chunks - 1) - { - bf->c31 = 1; - s->packet_fin++; - packet_fin++; - } if(msg->path_type != MINIMAL && msg->path_type != NON_MINIMAL) printf("\n Wrong message path type %d ", msg->path_type); @@ -4123,6 +5390,20 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message msg->saved_rcv_time = stat->recv_time; stat->recv_time += ete_latency; + // Chunk with the last id has been received (not the last chunk to receive necessarily) + if(msg->chunk_id == num_chunks - 1) + { + bf->c1 = 1; + s->packet_fin++; + packet_fin++; + + stat->recv_count++; + stat->recv_bytes += msg->packet_size; + + N_finished_packets++; + s->finished_packets++; + } + #if DEBUG == 1 if( msg->packet_ID == TRACK && msg->chunk_id == num_chunks-1 @@ -4138,19 +5419,99 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message } #endif - /* Now retreieve the number of chunks completed from the hash and update - * them */ - void *m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg); + if(s->min_latency > ete_latency) { + bf->c21 = 1; + msg->saved_min_lat = s->min_latency; + s->min_latency = ete_latency; + } + + if(s->max_latency < ete_latency) { + bf->c22 = 1; + msg->saved_available_time = s->max_latency; + s->max_latency = ete_latency; + } + + struct packet_id const packet_key = { + .packet_ID = msg->packet_ID, + .dfdally_src_terminal_id = msg->dfdally_src_terminal_id + }; + bool const is_zombie = s->zombies.count(packet_key) == 1; + bool const has_remaining_sz = s->remaining_sz_packets.count(packet_key) == 1; + + // Finding out if the packet is complete + bool is_packet_completed = false; + int const chunk_size = s->params->chunk_size; + if (has_remaining_sz) { + bf->c28 = 1; + int const actual_chunk_size = std::min(chunk_size, (int)s->remaining_sz_packets[packet_key]); + assert(s->remaining_sz_packets[packet_key] >= actual_chunk_size); + s->remaining_sz_packets[packet_key] -= actual_chunk_size; + + // if `remaining == 0`, ie, if the packet has been completed + if (s->remaining_sz_packets[packet_key] == 0) { + bf->c29 = 1; + is_packet_completed = true; + s->remaining_sz_packets.erase(packet_key); + } + } else { + if (chunk_size < msg->packet_size) { + bf->c29 = 1; + s->remaining_sz_packets[packet_key] = msg->packet_size - chunk_size; + } else { + is_packet_completed = true; + } + } + + // Zombies don't generate delay notifications, and they don't modify the state of `s->rank_tbl` (`packet_arrive_predicted` should have removed the msg entry already) + if (is_zombie) { + //printf("We got a zombie! LPID=%d packet_ID = %d dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id); + if (is_packet_completed) { + s->zombies.erase(packet_key); + bf->c14 = 1; + } + bf->c15 = 1; + return; + } + + struct dfly_hash_key key = { + .message_id = msg->message_id, + .sender_id = msg->sender_lp, + }; + + struct qhash_head *hash_link = NULL; + struct dfly_qhash_entry * tmp = NULL; + + hash_link = qhash_search(s->rank_tbl, &key); + + if(hash_link) + tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link); + + uint64_t const total_chunks = num_chunks_for(msg->total_size, s->params->chunk_size); + + /*if(tmp) + { + if(tmp->num_chunks >= total_chunks || tmp->num_chunks < 0) + { + //tw_output(lp, "\n invalid number of chunks %d for LP %ld ", tmp->num_chunks, lp->gid); + tw_lp_suspend(lp, 0, 0); + return; + } + }*/ /* If an entry does not exist then create one */ if(!tmp) { bf->c5 = 1; + uint64_t const packet_size = s->params->packet_size; + uint64_t total_packets = msg->total_size / packet_size + (msg->total_size % packet_size ? 1 : 0); + if (total_packets == 0) { total_packets = 1; } + struct dfly_qhash_entry * d_entry = (dfly_qhash_entry *)calloc(1, sizeof (struct dfly_qhash_entry)); d_entry->num_chunks = 0; d_entry->key = key; d_entry->remote_event_data = NULL; d_entry->remote_event_size = 0; + d_entry->remaining_packets = total_packets; qhash_add(s->rank_tbl, &key, &(d_entry->hash_link)); s->rank_tbl_pop++; @@ -4164,45 +5525,48 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message assert(tmp); tmp->num_chunks++; - if(msg->chunk_id == num_chunks - 1) - { - bf->c1 = 1; - stat->recv_count++; - stat->recv_bytes += msg->packet_size; - - N_finished_packets++; - s->finished_packets++; - } - - /* if its the last chunk of the packet then handle the remote event data */ + /* retrieve the event data, all chunks from the same packet carry the `remote_event_data` */ if(msg->remote_event_size_bytes > 0 && !tmp->remote_event_data) { + /* Now retreieve the number of chunks completed from the hash and update + * them */ + void *m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg); + /* Retreive the remote event entry */ tmp->remote_event_data = (char*)calloc(1, msg->remote_event_size_bytes); assert(tmp->remote_event_data); tmp->remote_event_size = msg->remote_event_size_bytes; memcpy(tmp->remote_event_data, m_data_src, msg->remote_event_size_bytes); } - - if(s->min_latency > ete_latency) { - bf->c21 = 1; - msg->saved_min_lat = s->min_latency; - s->min_latency = ete_latency; - } - if(s->max_latency < ete_latency) { - bf->c22 = 1; - msg->saved_available_time = s->max_latency; - s->max_latency = ete_latency; - } - /* If all chunks of a message have arrived then send a remote event to the - * callee*/ - //assert(tmp->num_chunks <= total_chunks); + // if the packet is complete (ie, this `msg` is the last piece of the packet) + if (is_packet_completed) { + bf->c13 = 1; + + tmp->remaining_packets--; + + //printf("Good day sir, not a zombie! LPID=%d packet_ID = %d dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id); + if (packet_latency_f || dally_surrogate_configured) { + notify_src_lp_on_total_latency(lp, msg); +#if ALWAYS_DETERMINISTIC_NETWORK + } else { + // This vacuous msg is necessary just to keep simulations with and without the latency notification the same. Notifying the latency does not impact + // the simulation (unless the data is fed to a predictor, later to be used). If the latency notification is deactivated, the simulation will produce + // the same number of events (a bit wasteful), a parameter that model-net or dragonfly-dally for some reason use :S + vacuous_msg_to_itself(s, msg, lp); +#endif /* if ALWAYS_DETERMINISTIC_NETWORK */ + } + } - if(tmp->num_chunks >= total_chunks) + // if the message is complete (ie, this `msg` is the last piece of the message) + /* If all chunks of a message have arrived then send a remote event to the callee */ + //if(tmp->num_chunks >= total_chunks) // this was the test before, it is a good test assumming the network is never frozen + if(tmp->remaining_packets == 0) { bf->c7 = 1; + assert(tmp->num_chunks <= total_chunks); + s->data_size_sample += msg->total_size; s->ross_sample.data_size_sample += msg->total_size; s->data_size_ross_sample += msg->total_size; @@ -4210,7 +5574,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message total_msg_sz += msg->total_size; s->total_msg_size += msg->total_size; s->finished_msgs++; - + //assert(tmp->remote_event_data && tmp->remote_event_size > 0); if(tmp->remote_event_data && tmp->remote_event_size > 0) { send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size); @@ -4219,8 +5583,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message qhash_del(hash_link); rc_stack_push(lp, tmp, free_tmp, s->st); s->rank_tbl_pop--; - } - return; + } } static void terminal_buf_update_rc(terminal_state * s, @@ -4260,7 +5623,7 @@ static void terminal_buf_update(terminal_state * s, tw_stime ts = 0; s->vc_occupancy[msg->rail_id][vcg] -= s->params->chunk_size; - if(s->in_send_loop[msg->rail_id] == 0 && s->terminal_msgs[msg->rail_id][vcg] != NULL) { + if(s->in_send_loop[msg->rail_id] == 0 && !qlist_empty(&s->terminal_msgs[msg->rail_id][vcg])) { terminal_dally_message *m; bf->c1 = 1; tw_event* e = model_net_method_event_new(lp->gid, ts + gen_noise(lp, &msg->num_rngs), lp, DRAGONFLY_DALLY, @@ -4274,10 +5637,12 @@ static void terminal_buf_update(terminal_state * s, return; } -void -dragonfly_dally_terminal_final( terminal_state * s, +static void dragonfly_dally_terminal_final( terminal_state * s, tw_lp * lp ) { + if (freeze_network_on_switch && is_dally_surrogate_on) { + dragonfly_dally_terminal_surrogate_to_highdef(s, lp, NULL); + } // printf("terminal id %d\n",s->terminal_id); dragonfly_total_time += s->total_time; //increment the PE level time counter @@ -4334,13 +5699,57 @@ dragonfly_dally_terminal_final( terminal_state * s, for(int i = 0; i < s->params->num_rails; i++) { - if(s->terminal_msgs[i][0] != NULL) - printf("[%llu] leftover terminal messages \n", LLU(lp->gid)); + if(!qlist_empty(&s->terminal_msgs[i][0])) { + printf("[%llu] leftover terminal messages \n", LLU(lp->gid)); + } } lp_io_write(lp->gid, (char*)"dragonfly-cn-stats", written, s->output_buf2); + if (packet_latency_f) { + // If the last packet transmitted actually received a latency notification (was delievered) + if (s->arrival_of_last_packet.packet_ID != -1) { + auto sent = s->sent_packets[s->arrival_of_last_packet.packet_ID]; + assert(s->sent_packets.count(s->arrival_of_last_packet.packet_ID) == 1); // packet_ID is in s->sent_packets + assert(sent.next_packet_delay < 0); // next_packet_delay is -1 + + double const travel_end_time = s->arrival_of_last_packet.travel_end_time; + struct packet_end end = { + .travel_end_time = travel_end_time, + .next_packet_delay = -1, + }; + + packet_latency_save_to_file(s->terminal_id, &sent.start, &end, false, false); + + // Deallocating memory from packet_start + if (sent.message_data) { free(sent.message_data); } + if (sent.remote_event_data) { free(sent.remote_event_data); } + if (sent.local_data) { free(sent.local_data); } + + s->sent_packets.erase(s->arrival_of_last_packet.packet_ID); + s->arrival_of_last_packet.packet_ID = -1; + } + + // Storing all other missing packets into io file (deleting all elements from s->sent_packets as we go) + for (auto it = s->sent_packets.begin(); it != s->sent_packets.end(); it = s->sent_packets.erase(it)) { + auto& sent = it->second; + uint64_t packet_ID = it->first; + assert(sent.message_data); + + struct packet_end end = { + .travel_end_time = -1, + .next_packet_delay = sent.next_packet_delay, + }; + + packet_latency_save_to_file(s->terminal_id, &sent.start, &end, false, false); + + // Deallocating memory from packet_start + if (sent.message_data) { free(sent.message_data); } + if (sent.remote_event_data) { free(sent.remote_event_data); } + if (sent.local_data) { free(sent.local_data); } + } + } //if(s->packet_gen != s->packet_fin) // printf("\n generated %d finished %d ", s->packet_gen, s->packet_fin); @@ -4349,16 +5758,40 @@ dragonfly_dally_terminal_final( terminal_state * s, qhash_finalize(s->rank_tbl); rc_stack_destroy(s->st); + rc_stack_destroy(s->cc_st); //TODO FREE THESE CORRECTLY for(int i = 0; i < s->params->num_rails; i++) { free(s->vc_occupancy[i]); + // TODO: terminal_msgs are not properly freed if there are messages left. Correct this! free(s->terminal_msgs[i]); - free(s->terminal_msgs_tail[i]); } free(s->vc_occupancy); free(s->terminal_msgs); - free(s->terminal_msgs_tail); + + // Calling destructors for data. There is no need to free data, the + // destructors do it themselves. ROSS allocated space for the datatypes and + // it doesn't need to be freed +#if 0 + // Checking that there aren't any zombies left in the simulation + printf("terminal %d - zombies = [", s->terminal_id); + for (auto&& z: s->zombies) { + printf("(%d %d) ", z.packet_ID, z.dfdally_src_terminal_id); + } + printf("]\n"); +#endif + for (auto&& kv: s->sent_packets) { + if (kv.second.message_data) { free(kv.second.message_data); } + if (kv.second.remote_event_data) { free(kv.second.remote_event_data); } + if (kv.second.local_data) { free(kv.second.local_data); } + } + s->sent_packets.~map(); + s->is_pending_local_send.~set(); + s->remaining_sz_packets.~map(); + + if (s->predictor_data) { + free(s->predictor_data); + } } void dragonfly_dally_router_final(router_state * s, tw_lp * lp){ @@ -4379,11 +5812,11 @@ void dragonfly_dally_router_final(router_state * s, tw_lp * lp){ int i, j; for(i = 0; i < s->params->radix; i++) { for(j = 0; j < s->params->num_vcs; j++) { - if(s->queued_msgs[i][j] != NULL) { + if(!qlist_empty(&s->queued_msgs[i][j])) { printf("[%llu] leftover queued messages %d %d %d\n", LLU(lp->gid), i, j, s->vc_occupancy[i][j]); } - if(s->pending_msgs[i][j] != NULL) { + if(!qlist_empty(&s->pending_msgs[i][j])) { printf("[%llu] lefover pending messages %d %d\n", LLU(lp->gid), i, j); } } @@ -4393,6 +5826,7 @@ void dragonfly_dally_router_final(router_state * s, tw_lp * lp){ fclose(dragonfly_rtr_bw_log); rc_stack_destroy(s->st); + rc_stack_destroy(s->cc_st); const dragonfly_param *p = s->params; int written = 0; @@ -4478,6 +5912,35 @@ void dragonfly_dally_router_final(router_state * s, tw_lp * lp){ // dragonfly_print_params(s->params); // } // } + + //Xin: output link statistics + if(p->counting_bool) + { + // for link traffic + if(!s->router_id){ + written = sprintf(s->output_buf5, "# Format \n"); + lp_io_write(lp->gid, (char*)"dragonfly-router-traffic-sample", written, s->output_buf5); + } + for(int i=0; i < p->counting_windows; i++) { + written = sprintf(s->output_buf5, "\n %llu %d %d %lf", LLU(lp->gid), s->router_id / p->num_routers, s->router_id , (p->counting_start+(i+1)*p->counting_interval)); + for (int d=0; d < p->radix; d++) + written += sprintf(s->output_buf5 + written, " %d", (s->agg_link_traffic[i][d])); + lp_io_write(lp->gid, (char*)"dragonfly-router-traffic-sample", written, s->output_buf5); + } + + // for link busy time + if(!s->router_id){ + written = sprintf(s->output_buf6, "# Format \n"); + lp_io_write(lp->gid, (char*)"dragonfly-router-busytime-sample", written, s->output_buf6); + } + for(int i=0; i < p->counting_windows; i++) { + written = sprintf(s->output_buf6, "\n %llu %d %d %lf", LLU(lp->gid), s->router_id / p->num_routers, s->router_id , (p->counting_start+(i+1)*p->counting_interval)); + for (int d=0; d < p->radix; d++) + written += sprintf(s->output_buf6 + written, " %lf", (s->agg_busy_time[i][d])); + lp_io_write(lp->gid, (char*)"dragonfly-router-busytime-sample", written, s->output_buf6); + } + } + } static Connection do_dfdally_routing(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id) @@ -4711,8 +6174,12 @@ static void router_packet_receive_rc(router_state * s, s->is_monitoring_bw = 0; if(bf->c2) { - terminal_dally_message_list * tail = return_tail(s->pending_msgs[output_port], s->pending_msgs_tail[output_port], output_chan); - delete_terminal_dally_message_list(tail); + if (!qlist_empty(&s->pending_msgs[output_port][output_chan])) { + struct qlist_head *last = s->pending_msgs[output_port][output_chan].prev; + qlist_del(last); + terminal_dally_message_list *tail = qlist_entry(last, terminal_dally_message_list, list); + delete_terminal_dally_message_list(tail); + } s->vc_occupancy[output_port][output_chan] -= s->params->chunk_size; if(bf->c3) { s->in_send_loop[output_port] = 0; @@ -4724,8 +6191,12 @@ static void router_packet_receive_rc(router_state * s, { s->last_buf_full[output_port] = msg->saved_busy_time; } - delete_terminal_dally_message_list(return_tail(s->queued_msgs[output_port], - s->queued_msgs_tail[output_port], output_chan)); + if (!qlist_empty(&s->queued_msgs[output_port][output_chan])) { + struct qlist_head *last = s->queued_msgs[output_port][output_chan].prev; + qlist_del(last); + terminal_dally_message_list *tail = qlist_entry(last, terminal_dally_message_list, list); + delete_terminal_dally_message_list(tail); + } s->queued_count[output_port] -= s->params->chunk_size; } @@ -4887,8 +6358,10 @@ static void router_packet_receive( router_state * s, tw_error(TW_LOC, "\n Output channel %d great than available VCs %d", output_chan, s->params->num_vcs - 1); //cur_chunk->msg.packet_ID, output_chan, output_port, s->router_id, dest_router_id, cur_chunk->msg.path_type, src_grp_id, dest_grp_id, msg->src_terminal_id); +#if DEBUG == 1 if(cur_chunk->msg.packet_ID == LLU(TRACK_PKT) && cur_chunk->msg.src_terminal_id == T_ID) printf("\n Packet %llu arrived at router %u next stop %d final stop %d local hops %d global hops %d", cur_chunk->msg.packet_ID, s->router_id, next_stop, dest_router_id, cur_chunk->msg.my_l_hop, cur_chunk->msg.my_g_hop); +#endif if(msg->remote_event_size_bytes > 0) { void *m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY_ROUTER, msg); @@ -4901,8 +6374,7 @@ static void router_packet_receive( router_state * s, assert(output_chan < s->params->num_vcs && output_port < s->params->radix); router_credit_send(s, msg, lp, -1, &(msg->num_rngs)); - append_to_terminal_dally_message_list(s->pending_msgs[output_port], s->pending_msgs_tail[output_port], - output_chan, cur_chunk); + qlist_add_tail(&cur_chunk->list, &s->pending_msgs[output_port][output_chan]); s->vc_occupancy[output_port][output_chan] += s->params->chunk_size; if(s->in_send_loop[output_port] == 0) { bf->c3 = 1; @@ -4926,8 +6398,7 @@ static void router_packet_receive( router_state * s, cur_chunk->msg.saved_vc = msg->vc_index; cur_chunk->msg.saved_channel = msg->output_chan; assert(output_chan < s->params->num_vcs && output_port < s->params->radix); - append_to_terminal_dally_message_list( s->queued_msgs[output_port], - s->queued_msgs_tail[output_port], output_chan, cur_chunk); + qlist_add_tail(&cur_chunk->list, &s->queued_msgs[output_port][output_chan]); s->queued_count[output_port] += s->params->chunk_size; @@ -4965,14 +6436,26 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m int src_term_id = msg->dfdally_src_terminal_id; int app_id = msg->saved_app_id; + assert(output_port < s->params->radix); if(msg->qos_reset1) s->qos_status[output_port][0] = Q_ACTIVE; - if(msg->qos_reset2) + if(msg->qos_reset2 && s->params->num_qos_levels > 1) s->qos_status[output_port][1] = Q_ACTIVE; if(msg->last_saved_qos) s->last_qos_lvl[output_port] = msg->last_saved_qos; - + + //Xin: target window to rollback + bool rolback = false; + int current_window = -1; + const dragonfly_param *p = s->params; + if(s->params->counting_bool>0 && msg->last_sent_time >= s->params->counting_start) { + current_window = (int) ((msg->last_sent_time-s->params->counting_start)/s->params->counting_interval); + if(current_window < s->params->counting_windows) { + rolback = true; + } + } + if(bf->c1) { s->in_send_loop[output_port] = msg->saved_send_loop; if(bf->c2) { @@ -4980,6 +6463,7 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m } return; } + s->last_qos_lvl[output_port] = msg->last_saved_qos; int output_chan = msg->saved_channel; if(bf->c8) @@ -4988,6 +6472,11 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m s->busy_time_sample[output_port] = msg->saved_sample_time; s->ross_rsample.busy_time[output_port] = msg->saved_sample_time; s->last_buf_full[output_port] = msg->saved_busy_time; + + //Xin: reverse busy time + if(rolback && current_window >= 0){ + s->agg_busy_time[current_window][output_port] = msg->saved_rcv_time; + } } terminal_dally_message_list * cur_entry = (terminal_dally_message_list *)rc_stack_pop(s->st); @@ -5001,7 +6490,6 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m if(cur_entry->msg.packet_size < s->params->chunk_size) msg_size = cur_entry->msg.packet_size; - s->qos_data[output_port][vcg] -= msg_size; s->next_output_available_time[output_port] = msg->saved_available_time; if(bf->c11) @@ -5010,6 +6498,12 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m s->link_traffic_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; s->ross_rsample.link_traffic_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; s->link_traffic_ross_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; + msg_size = cur_entry->msg.packet_size % s->params->chunk_size; + + //Xin: reverse link traffic + if(rolback && current_window >= 0){ + s->agg_link_traffic[current_window][output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; + } } if(bf->c12) { @@ -5017,12 +6511,17 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m s->link_traffic_sample[output_port] -= s->params->chunk_size; s->ross_rsample.link_traffic_sample[output_port] -= s->params->chunk_size; s->link_traffic_ross_sample[output_port] -= s->params->chunk_size; + + //Xin: reverse link traffic + if(rolback && current_window >= 0){ + s->agg_link_traffic[current_window][output_port] -= s->params->chunk_size; + } } + s->qos_data[output_port][vcg] -= msg_size; s->total_chunks[output_port]--; - prepend_to_terminal_dally_message_list(s->pending_msgs[output_port], - s->pending_msgs_tail[output_port], output_chan, cur_entry); + qlist_add(&cur_entry->list, &s->pending_msgs[output_port][output_chan]); if (g_congestion_control_enabled) { congestion_control_message *cc_msg_rc = (congestion_control_message*)rc_stack_pop(s->cc_st); @@ -5054,6 +6553,18 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes msg->saved_vc = output_port; msg->saved_channel = output_chan; + + //Xin: target window to update link traffic + msg->last_sent_time = tw_now(lp); + bool update = false; + int current_window = -1; + const dragonfly_param *p = s->params; + if(s->params->counting_bool>0 && msg->last_sent_time >= s->params->counting_start) { + current_window = (int) ((msg->last_sent_time - s->params->counting_start)/s->params->counting_interval); + if(current_window < s->params->counting_windows) { + update = true; + } + } if(output_chan < 0) { @@ -5068,7 +6579,12 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes return; } - cur_entry = s->pending_msgs[output_port][output_chan]; + if (!qlist_empty(&s->pending_msgs[output_port][output_chan])) { + struct qlist_head *first = s->pending_msgs[output_port][output_chan].next; + cur_entry = qlist_entry(first, terminal_dally_message_list, list); + } else { + cur_entry = NULL; + } msg->dfdally_src_terminal_id = cur_entry->msg.dfdally_src_terminal_id; @@ -5084,6 +6600,11 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes s->busy_time_sample[output_port] += (tw_now(lp) - s->last_buf_full[output_port]); s->ross_rsample.busy_time[output_port] += (tw_now(lp) - s->last_buf_full[output_port]); s->last_buf_full[output_port] = 0.0; + + //Xin: update link busy time + if(update && current_window >= 0){ + s->agg_busy_time[current_window][output_port] += (tw_now(lp) - s->last_buf_full[output_port]); + } } int vcg = 0; @@ -5107,9 +6628,7 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes bandwidth = s->params->global_bandwidth; } - uint64_t num_chunks = cur_entry->msg.packet_size / s->params->chunk_size; - if(cur_entry->msg.packet_size < s->params->chunk_size) - num_chunks++; + uint64_t const num_chunks = num_chunks_for(cur_entry->msg.packet_size, s->params->chunk_size); /* Injection delay: Time taken for the data to be placed on the link/channel * - Based on bandwidth @@ -5169,13 +6688,18 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes m->magic = router_magic_num; int msg_size = s->params->chunk_size; - if((cur_entry->msg.packet_size % s->params->chunk_size) && (cur_entry->msg.chunk_id == num_chunks - 1)) { + if(((cur_entry->msg.packet_size % s->params->chunk_size) || cur_entry->msg.packet_size == 0) && (cur_entry->msg.chunk_id == num_chunks - 1)) { bf->c11 = 1; s->link_traffic[output_port] += (cur_entry->msg.packet_size % s->params->chunk_size); s->link_traffic_sample[output_port] += (cur_entry->msg.packet_size % s->params->chunk_size); s->ross_rsample.link_traffic_sample[output_port] += (cur_entry->msg.packet_size % s->params->chunk_size); s->link_traffic_ross_sample[output_port] += (cur_entry->msg.packet_size % s->params->chunk_size); msg_size = cur_entry->msg.packet_size % s->params->chunk_size; + + //Xin: update link traffic + if(update && current_window >= 0){ + s->agg_link_traffic[current_window][output_port] += (cur_entry->msg.packet_size % s->params->chunk_size); + } } else { bf->c12 = 1; @@ -5183,12 +6707,19 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes s->link_traffic_sample[output_port] += s->params->chunk_size; s->ross_rsample.link_traffic_sample[output_port] += s->params->chunk_size; s->link_traffic_ross_sample[output_port] += s->params->chunk_size; + + //Xin: update link traffic + if(update && current_window >= 0){ + s->agg_link_traffic[current_window][output_port] += s->params->chunk_size; + } } s->total_chunks[output_port]++; +#if DEBUG == 1 if(cur_entry->msg.packet_ID == LLU(TRACK_PKT) && cur_entry->msg.src_terminal_id == T_ID) printf("\n Queuing at the router %d ", s->router_id); +#endif m->rail_id = msg->rail_id; @@ -5212,15 +6743,31 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes rc_stack_push(lp, cc_msg_rc, cc_msg_rc_storage_delete, s->cc_st); } - cur_entry = return_head(s->pending_msgs[output_port], - s->pending_msgs_tail[output_port], output_chan); + struct qlist_head *item = qlist_pop(&s->pending_msgs[output_port][output_chan]); + cur_entry = item ? qlist_entry(item, terminal_dally_message_list, list) : NULL; rc_stack_push(lp, cur_entry, delete_terminal_dally_message_list, s->st); s->qos_data[output_port][vcg] += msg_size; s->next_output_available_time[output_port] -= s->params->router_delay; injection_ts -= s->params->router_delay; - int next_output_chan = get_next_router_vcg(s, bf, msg, lp); + int next_output_chan = -1; + int base_limit = 0; + int vcs_per_qos = s->params->num_vcs / num_qos_levels; + for(int i = 0; i < num_qos_levels; i++) + { + base_limit = i * vcs_per_qos; + for(int k = base_limit; k < base_limit + vcs_per_qos; k ++) + { + if(!qlist_empty(&s->pending_msgs[output_port][k])) + { + next_output_chan = k; + break; + } + } + if(next_output_chan >= 0) + break; + } if(next_output_chan < 0) { @@ -5228,7 +6775,12 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes s->in_send_loop[output_port] = 0; return; } - cur_entry = s->pending_msgs[output_port][next_output_chan]; + if (!qlist_empty(&s->pending_msgs[output_port][next_output_chan])) { + struct qlist_head *first = s->pending_msgs[output_port][next_output_chan].next; + cur_entry = qlist_entry(first, terminal_dally_message_list, list); + } else { + cur_entry = NULL; + } assert(cur_entry != NULL); terminal_dally_message *m_new; @@ -5258,12 +6810,19 @@ static void router_buf_update_rc(router_state * s, s->ross_rsample.busy_time[indx] = msg->saved_sample_time; s->busy_time_ross_sample[indx] = msg->saved_busy_time_ross; s->last_buf_full[indx] = msg->saved_busy_time; + + //Xin: reverse agg busytime (not working for cross window reverse) + const dragonfly_param *p = s->params; + if(s->params->counting_bool>0 && msg->last_bufupdate_time >= s->params->counting_start) { + int current_window = (int) ((msg->last_bufupdate_time - s->params->counting_start)/s->params->counting_interval); + if(current_window < s->params->counting_windows) { + s->agg_busy_time[current_window][indx] = msg->saved_rcv_time; + } + } } if(bf->c1) { - terminal_dally_message_list* head = return_tail(s->pending_msgs[indx], - s->pending_msgs_tail[indx], output_chan); - prepend_to_terminal_dally_message_list(s->queued_msgs[indx], - s->queued_msgs_tail[indx], output_chan, head); + terminal_dally_message_list* head = return_tail_from_qlist(&s->pending_msgs[indx][output_chan]); + qlist_add(&head->list, &s->queued_msgs[indx][output_chan]); s->vc_occupancy[indx][output_chan] -= s->params->chunk_size; s->queued_count[indx] += s->params->chunk_size; } @@ -5290,15 +6849,32 @@ static void router_buf_update(router_state * s, tw_bf * bf, terminal_dally_messa s->busy_time_sample[indx] += (tw_now(lp) - s->last_buf_full[indx]); s->ross_rsample.busy_time[indx] += (tw_now(lp) - s->last_buf_full[indx]); s->busy_time_ross_sample[indx] += (tw_now(lp) - s->last_buf_full[indx]); + + //Xin: update link busy time + const dragonfly_param *p = s->params; + msg->last_bufupdate_time = tw_now(lp); + if(s->params->counting_bool>0 && msg->last_bufupdate_time >= s->params->counting_start) { + int current_window = (int) ((msg->last_bufupdate_time - s->params->counting_start)/s->params->counting_interval); + if(current_window < s->params->counting_windows) { + int full_window = (int) ((s->last_buf_full[indx] - s->params->counting_start)/s->params->counting_interval); + if(full_window==current_window) { + s->agg_busy_time[current_window][indx] += (tw_now(lp) - s->last_buf_full[indx]); + } else { + s->agg_busy_time[current_window][indx] += (tw_now(lp) - (s->params->counting_start+current_window*s->params->counting_interval)); + s->agg_busy_time[full_window][indx] += ((s->params->counting_start+current_window*s->params->counting_interval) - s->last_buf_full[indx]); + } + } + } + s->last_buf_full[indx] = 0.0; } - if(s->queued_msgs[indx][output_chan] != NULL) { + if(!qlist_empty(&s->queued_msgs[indx][output_chan])) { bf->c1 = 1; assert(indx < s->params->radix); assert(output_chan < s->params->num_vcs); - terminal_dally_message_list *head = return_head(s->queued_msgs[indx], - s->queued_msgs_tail[indx], output_chan); + struct qlist_head *item = qlist_pop(&s->queued_msgs[indx][output_chan]); + terminal_dally_message_list *head = item ? qlist_entry(item, terminal_dally_message_list, list) : NULL; /*if(strcmp(head->msg.category, "medium") == 0) { if(head->msg.saved_channel < 4 || head->msg.saved_channel >= 8) @@ -5307,13 +6883,12 @@ static void router_buf_update(router_state * s, tw_bf * bf, terminal_dally_messa } }*/ router_credit_send(s, &head->msg, lp, 1, &(msg->num_rngs)); - append_to_terminal_dally_message_list(s->pending_msgs[indx], - s->pending_msgs_tail[indx], output_chan, head); + qlist_add_tail(&head->list, &s->pending_msgs[indx][output_chan]); s->vc_occupancy[indx][output_chan] += s->params->chunk_size; s->queued_count[indx] -= s->params->chunk_size; } - if(s->in_send_loop[indx] == 0 && s->pending_msgs[indx][output_chan] != NULL) { + if(s->in_send_loop[indx] == 0 && !qlist_empty(&s->pending_msgs[indx][output_chan])) { bf->c2 = 1; terminal_dally_message *m; tw_stime ts = maxd(s->next_output_available_time[indx], tw_now(lp)) - tw_now(lp); @@ -5329,7 +6904,7 @@ static void router_buf_update(router_state * s, tw_bf * bf, terminal_dally_messa return; } -void +static void terminal_dally_event( terminal_state * s, tw_bf * bf, terminal_dally_message * msg, @@ -5342,17 +6917,38 @@ terminal_dally_event( terminal_state * s, s->ross_sample.fwd_events++; //*(int *)bf = (int)0; assert(msg->magic == terminal_magic_num); - - rc_stack_gc(lp, s->st); + //printf("LPID: %llu Event type %d processed at %f\n", lp->gid, msg->type, tw_now(lp)); + + if (is_dally_surrogate_on && freeze_network_on_switch) { + // This event will be reversed. It comes from the past, it has been forwarded to the future + // by the surrogate freezing the network procedure and should not be taken into account + if (! (msg->type == T_GENERATE || msg->type == T_ARRIVE_PREDICTED || msg->type == T_NOTIFY)) { + bf->c20 = 1; + printf("This shouldn't happen! :( (time stamp = %e)\n", tw_now(lp)); + return; + } + } else { + rc_stack_gc(lp, s->st); + rc_stack_gc(lp, s->cc_st); + } switch(msg->type) { case T_GENERATE: - packet_generate(s,bf,msg,lp); + if (is_dally_surrogate_on) { + bf->c10 = 1; + packet_generate_predicted(s,bf,msg,lp); + } else { + packet_generate(s,bf,msg,lp); + } break; case T_ARRIVE: packet_arrive(s,bf,msg,lp); break; + + case T_ARRIVE_PREDICTED: + packet_arrive_predicted(s,bf,msg,lp); + break; case T_SEND: packet_send(s,bf,msg,lp); @@ -5365,13 +6961,23 @@ terminal_dally_event( terminal_state * s, case T_BANDWIDTH: issue_bw_monitor_event(s, bf, msg, lp); break; + + case T_NOTIFY: + process_terminal_notification_event(s, bf, msg, lp); + break; + +#if ALWAYS_DETERMINISTIC_NETWORK + case T_VACUOUS_EVENT: + break; +#endif /* ALWAYS_DETERMINISTIC_NETWORK */ + default: printf("\n LP %d Terminal message type not supported %d ", (int)lp->gid, msg->type); tw_error(TW_LOC, "Msg type not supported"); } } -void router_dally_event(router_state * s, tw_bf * bf, terminal_dally_message * msg, +static void router_dally_event(router_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { msg->num_cll = 0; @@ -5380,7 +6986,9 @@ void router_dally_event(router_state * s, tw_bf * bf, terminal_dally_message * m s->fwd_events++; s->ross_rsample.fwd_events++; rc_stack_gc(lp, s->st); + rc_stack_gc(lp, s->cc_st); + msg->last_received_time = s->last_time; s->last_time = tw_now(lp); assert(msg->magic == router_magic_num); @@ -5420,20 +7028,32 @@ void router_dally_event(router_state * s, tw_bf * bf, terminal_dally_message * m } /* Reverse computation handler for a terminal event */ -void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +static void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { + s->rev_events++; + s->ross_sample.rev_events++; + + // In case the event was skipped above, skip now + if (bf->c20) { + bf->c20 = 0; + printf("Has been rolledback! :)\n"); + return; + } + for(int i = 0; i < msg->num_rngs; i++) tw_rand_reverse_unif(lp->rng); for(int i = 0; i < msg->num_cll; i++) codes_local_latency_reverse(lp); - s->rev_events++; - s->ross_sample.rev_events++; - switch(msg->type) + switch((enum event_t) msg->type) { case T_GENERATE: - packet_generate_rc(s, bf, msg, lp); + if (bf->c10) { + packet_generate_predicted_rc(s,bf,msg,lp); + } else { + packet_generate_rc(s, bf, msg, lp); + } break; case T_SEND: @@ -5444,6 +7064,10 @@ void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_da packet_arrive_rc(s, bf, msg, lp); break; + case T_ARRIVE_PREDICTED: + packet_arrive_predicted_rc(s, bf, msg, lp); + break; + case T_BUFFER: terminal_buf_update_rc(s, bf, msg, lp); break; @@ -5451,18 +7075,30 @@ void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_da case T_BANDWIDTH: issue_bw_monitor_event_rc(s,bf, msg, lp); break; + + case T_NOTIFY: + process_terminal_notification_event_rc(s, bf, msg, lp); + break; + +#if ALWAYS_DETERMINISTIC_NETWORK + case T_VACUOUS_EVENT: + break; +#endif /* ALWAYS_DETERMINISTIC_NETWORK */ default: tw_error(TW_LOC, "\n Invalid terminal event type %d ", msg->type); } msg->num_cll = 0; msg->num_rngs = 0; + bf->c10 = 0; } /* Reverse computation handler for a router event */ -void router_dally_rc_event_handler(router_state * s, tw_bf * bf, +static void router_dally_rc_event_handler(router_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { + s->last_time = msg->last_received_time; + for(int i = 0; i < msg->num_rngs; i++) tw_rand_reverse_unif(lp->rng); @@ -5472,7 +7108,7 @@ void router_dally_rc_event_handler(router_state * s, tw_bf * bf, s->rev_events++; s->ross_rsample.rev_events++; - switch(msg->type) { + switch((enum event_t) msg->type) { case R_SEND: router_packet_send_rc(s, bf, msg, lp); break; @@ -5492,8 +7128,1227 @@ void router_dally_rc_event_handler(router_state * s, tw_bf * bf, msg->num_rngs = 0; } +//*** ---------- START OF reverse handler checking functions ---------- *** +static void copy_rank_tbl(struct qhash_table * into, struct qhash_table const * from) { + // YES! This function is very, very slow and so are all the others. This is + // the simplest implementation we could come up with without changing how + // qhash_table works or replacing it altogether. Both options would need + // substantial changes to the dragonfly model + for (int i = 0; i < from->table_size; i++) { + struct dfly_qhash_entry *entry; + qlist_for_each_entry(entry, &from->array[i], hash_link) { + struct dfly_qhash_entry *new_entry = + (struct dfly_qhash_entry *)malloc(sizeof(struct dfly_qhash_entry)); + *new_entry = *entry; // There is no need to copy contents of pointer because we don't check it + qlist_add(&new_entry->hash_link, &into->array[i]); + } + } +} + +static void clean_rank_tbl(struct qhash_table * rank_tbl) { + for (int i=0; i < rank_tbl->table_size; i++) { + while(!qlist_empty(&rank_tbl->array[i])) { + struct qlist_head *item = qlist_pop(&rank_tbl->array[i]); + struct dfly_qhash_entry * entry = qlist_entry(item, struct dfly_qhash_entry, hash_link); + free(entry); + } + } +} + +static bool check_dfly_qhash_entry(struct dfly_qhash_entry * before, struct dfly_qhash_entry * after) { + // We ignore the remote data fields because they won't be needed: + // - remote_event_size + // - remote_event_data + + if (before->key.sender_id != after->key.sender_id || + before->key.message_id != after->key.message_id || + before->num_chunks != after->num_chunks || + before->remaining_packets != after->remaining_packets) { + return false; + } + + return true; +} + +static bool check_rank_tbl(qhash_table const * before, struct qhash_table const * after) { + for (int i=0; i < before->table_size; i++) { + if (qlist_count(&before->array[i]) != qlist_count(&before->array[i])) { + return false; + } + struct dfly_qhash_entry * before_entry; + struct dfly_qhash_entry * after_entry; + qlist_for_each_entry(before_entry, &before->array[i], hash_link) { + // Yes, this is slow if there are many collisions, but often there won't be any + bool found_entry = false; + qlist_for_each_entry(after_entry, &after->array[i], hash_link) { + if (check_dfly_qhash_entry(before_entry, after_entry)) { + found_entry = true; + break; + } + } + if (!found_entry) { + return false; + } + } + } + return true; +} + +static void print_rank_tbl(FILE * out, char const * prefix, struct qhash_table * rank_tbl) { + fprintf(out, "%stable_size = %d\n", prefix, rank_tbl->table_size); + fprintf(out, "%s compare = %p\n", prefix, rank_tbl->compare); + fprintf(out, "%s hash = %p\n", prefix, rank_tbl->hash); + fprintf(out, "%s array = %p\n", prefix, rank_tbl->array); + + char addprefix[] = " | | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1; + char * subprefix = (char *) malloc(len_subprefix * sizeof(char)); + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix); + + for (int i=0; i < rank_tbl->table_size; i++) { + struct dfly_qhash_entry * entry; + qlist_for_each_entry(entry, &rank_tbl->array[i], hash_link) { + fprintf(out, "%s | {\n", prefix); + fprintf(out, "%s | key.message_id = %lu\n", prefix, entry->key.message_id); + fprintf(out, "%s | key.sender_id = %lu\n", prefix, entry->key.sender_id); + fprintf(out, "%s | num_chunks = %d\n", prefix, entry->num_chunks); + fprintf(out, "%s | remaining_packets = %d\n", prefix, entry->remaining_packets); + fprintf(out, "%s | remote_event_size = %d\n", prefix, entry->remote_event_size); + fprintf(out, "%s | * remote_event_data = %p\n", prefix, entry->remote_event_data); + if (entry->remote_event_size) { + tw_fprint_binary_array(out, subprefix, entry->remote_event_data, entry->remote_event_size); + } + fprintf(out, "%s | },\n", prefix); + } + } + + free(subprefix); +} + +static void save_terminal_state(terminal_state *into, terminal_state const *from) { + // These should be deep-cloned/compared/printed if we want to run the functionality they are activated at + // from->predictor_data + // from->sample_stat + // from->ross_sample + // from->busy_time_ross_sample + // from->busy_time_sample + + memcpy(into, from, sizeof(terminal_state)); + + dragonfly_param const * p = into->params; + int const num_qos_levels = p->num_qos_levels; + int const num_rails = p->num_rails; + + if (!is_dally_surrogate_on) { + into->vc_occupancy = (int **) malloc(num_rails * sizeof(int*)); + into->terminal_length = (int**) malloc(num_rails * sizeof(int*)); + into->last_buf_full = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); + into->in_send_loop = (int*) malloc(num_rails * sizeof(int)); + into->issueIdle = (int*) malloc(num_rails * sizeof(int)); + into->qos_status = (int**) malloc(num_rails * sizeof(int*)); + into->qos_data = (int**) malloc(num_rails * sizeof(int*)); + into->last_qos_lvl = (int*) malloc(num_rails * sizeof(int)); + into->terminal_available_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); + into->stalled_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t)); + into->total_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t)); + into->busy_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); + into->terminal_msgs = (struct qlist_head**) malloc(num_rails * sizeof(struct qlist_head*)); + into->link_traffic = (uint64_t*) malloc(num_rails * sizeof(uint64_t)); + + for(int i = 0; i < num_rails; i++) { + into->vc_occupancy[i] = (int*) malloc(num_qos_levels * sizeof(int)); + into->terminal_length[i] = (int*) malloc(num_qos_levels * sizeof(int)); + into->qos_status[i] = (int*) malloc(num_qos_levels * sizeof(int)); + into->qos_data[i] = (int*) malloc(num_qos_levels * sizeof(int)); + into->terminal_msgs[i] = (struct qlist_head*) malloc(num_qos_levels * sizeof(struct qlist_head)); + for (int j = 0; jvc_occupancy[i][j] = from->vc_occupancy[i][j]; + into->terminal_length[i][j] = from->terminal_length[i][j]; + into->qos_data[i][j] = from->qos_data[i][j]; + into->qos_status[i][j] = from->qos_status[i][j]; + INIT_QLIST_HEAD(&into->terminal_msgs[i][j]); + copy_terminal_dally_message_qlist(&into->terminal_msgs[i][j], &from->terminal_msgs[i][j]); + } + into->last_buf_full[i] = from->last_buf_full[i]; + into->in_send_loop[i] = from->in_send_loop[i]; + into->issueIdle[i] = from->issueIdle[i]; + into->last_qos_lvl[i] = from->last_qos_lvl[i]; + into->terminal_available_time[i] = from->terminal_available_time[i]; + into->stalled_chunks[i] = from->stalled_chunks[i]; + into->total_chunks[i] = from->total_chunks[i]; + into->busy_time[i] = from->busy_time[i]; + into->link_traffic[i] = from->link_traffic[i]; + } + } + + if (from->local_congestion_controller != NULL) { + assert(g_congestion_control_enabled); + into->local_congestion_controller = (tlc_state*) malloc(sizeof(tlc_state)); + save_tlc_state(into->local_congestion_controller, from->local_congestion_controller); + } + + into->rank_tbl = qhash_init(dragonfly_rank_hash_compare, dragonfly_hash_func, DFLY_HASH_TABLE_SIZE); + copy_rank_tbl(into->rank_tbl, from->rank_tbl); + + // I would use the C++ amgic to copy these containers but they don't work as well :S + new (&into->remaining_sz_packets) map(); + new (&into->zombies) set(); + + // Sorry const, I promise not to change the state of remaining_sz_packets + map * from_remaining_sz_packets = (map *) &from->remaining_sz_packets; + set * from_zombies = (set *) &from->zombies; + + std::map::iterator it_map; + for (it_map = from_remaining_sz_packets->begin(); it_map != from_remaining_sz_packets->end(); ++it_map) { + into->remaining_sz_packets[it_map->first] = it_map->second; + } + + std::set::iterator it_set; + for (it_set = from_zombies->begin(); it_set != from_zombies->end(); ++it_set) { + struct packet_id const zombie = { + .packet_ID = it_set->packet_ID, + .dfdally_src_terminal_id = it_set->dfdally_src_terminal_id}; + into->zombies.insert(zombie); + } +} + +// Partially written by Claude +static void clean_terminal_state(terminal_state *state) { + dragonfly_param const * p = state->params; + int const num_rails = p->num_rails; + int const num_qos_levels = p->num_qos_levels; + + if (!is_dally_surrogate_on) { + for (int i = 0; i < num_rails; i++) { + free(state->vc_occupancy[i]); + free(state->terminal_length[i]); + free(state->qos_status[i]); + free(state->qos_data[i]); + for (int j = 0; jterminal_msgs[i][j]); + } + free(state->terminal_msgs[i]); + } + + free(state->vc_occupancy); + free(state->terminal_length); + free(state->last_buf_full); + free(state->in_send_loop); + free(state->issueIdle); + free(state->qos_status); + free(state->qos_data); + free(state->last_qos_lvl); + free(state->terminal_available_time); + free(state->stalled_chunks); + free(state->total_chunks); + free(state->busy_time); + free(state->link_traffic); + free(state->terminal_msgs); + } + + if (state->local_congestion_controller != NULL) { + clean_tlc_state(state->local_congestion_controller); + free(state->local_congestion_controller); + } + + clean_rank_tbl(state->rank_tbl); + qhash_finalize(state->rank_tbl); + + state->remaining_sz_packets.~map(); + state->zombies.~set(); +} + +static bool check_terminal_state(terminal_state *before, terminal_state *after) { + bool is_same = true; + + // There is no need to deep-copy the following. They're never modified + assert(before->params == after->params); + assert(before->router_lp == after->router_lp); + assert(before->router_id == after->router_id); + + // We ignore the comparison of the following. They are not meant to be rolled-back + // before->fwd_events + // before->rev_events + // before->sent_packets + // before->last_packet_sent_id + // before->arrival_of_last_packet + // before->anno + assert(before->frozen_state == after->frozen_state); + + // Comparing all other elements of the struct + is_same &= (before->packet_counter == after->packet_counter); + is_same &= (before->packet_gen == after->packet_gen); + is_same &= (before->packet_fin == after->packet_fin); + is_same &= (before->total_gen_size == after->total_gen_size); + is_same &= (before->terminal_id == after->terminal_id); + is_same &= (before->workloads_finished_flag == after->workloads_finished_flag); + is_same &= (before->is_monitoring_bw == after->is_monitoring_bw); + is_same &= (before->rank_tbl_pop == after->rank_tbl_pop); + is_same &= (before->total_time == after->total_time); + is_same &= (before->total_msg_size == after->total_msg_size); + is_same &= (before->total_hops == after->total_hops); + is_same &= (before->finished_msgs == after->finished_msgs); + is_same &= (before->finished_chunks == after->finished_chunks); + is_same &= (before->finished_packets == after->finished_packets); + is_same &= (before->injected_chunks == after->injected_chunks); + is_same &= (before->ejected_chunks == after->ejected_chunks); + is_same &= (before->max_latency == after->max_latency); + is_same &= (before->min_latency == after->min_latency); + is_same &= (before->fin_chunks_sample == after->fin_chunks_sample); + is_same &= (before->data_size_sample == after->data_size_sample); + is_same &= (before->fin_hops_sample == after->fin_hops_sample); + is_same &= (before->fin_chunks_time == after->fin_chunks_time); + is_same &= (before->op_arr_size == after->op_arr_size); + is_same &= (before->max_arr_size == after->max_arr_size); + is_same &= (before->fin_chunks_ross_sample == after->fin_chunks_ross_sample); + is_same &= (before->data_size_ross_sample == after->data_size_ross_sample); + is_same &= (before->fin_hops_ross_sample == after->fin_hops_ross_sample); + is_same &= (before->fin_chunks_time_ross_sample == after->fin_chunks_time_ross_sample); + is_same &= (before->last_in_queue_time == after->last_in_queue_time); + + // Compare string buffers + is_same &= (strncmp(before->output_buf, after->output_buf, 4096) == 0); + is_same &= (strncmp(before->output_buf2, after->output_buf2, 4096) == 0); + is_same &= (strncmp(before->sample_buf, after->sample_buf, 4096) == 0); + + // Compare anno strings (handling NULL case) + if (before->anno && after->anno) { + is_same &= (strcmp(before->anno, after->anno) == 0); + } else { + is_same &= (before->anno == after->anno); + } + + if (!is_dally_surrogate_on) { + dragonfly_param const * p = before->params; + int const num_qos_levels = p->num_qos_levels; + int const num_rails = p->num_rails; + + for (int i = 0; i < num_rails; i++) { + for (int j = 0; j < num_qos_levels; j++) { + is_same &= (before->vc_occupancy[i][j] == after->vc_occupancy[i][j]); + is_same &= (before->terminal_length[i][j] == after->terminal_length[i][j]); + is_same &= (before->qos_status[i][j] == after->qos_status[i][j]); + is_same &= (before->qos_data[i][j] == after->qos_data[i][j]); + is_same &= check_terminal_dally_message_qlist(&before->terminal_msgs[i][j], &after->terminal_msgs[i][j]); + } + + is_same &= (before->last_buf_full[i] == after->last_buf_full[i]); + is_same &= (before->in_send_loop[i] == after->in_send_loop[i]); + is_same &= (before->issueIdle[i] == after->issueIdle[i]); + is_same &= (before->last_qos_lvl[i] == after->last_qos_lvl[i]); + is_same &= (before->terminal_available_time[i] == after->terminal_available_time[i]); + is_same &= (before->stalled_chunks[i] == after->stalled_chunks[i]); + is_same &= (before->total_chunks[i] == after->total_chunks[i]); + is_same &= (before->busy_time[i] == after->busy_time[i]); + is_same &= (before->link_traffic[i] == after->link_traffic[i]); + } + } + + // Ignoring model statistics. In general, we don't care if there are errors in the statistics, as they are only approximate. The stastistics don't interferee with the state of the model. There is a bug within the statistics when rolbacking though. A parameters is never reversed properly + //for (size_t i = 0; i < CATEGORY_MAX; i++) { + // is_same &= check_mn_stats(&before->dragonfly_stats_array[i], &after->dragonfly_stats_array[i]); + //} + + if (after->local_congestion_controller != NULL) { + is_same &= check_tlc_state(before->local_congestion_controller, after->local_congestion_controller); + } + + is_same &= check_rank_tbl(before->rank_tbl, after->rank_tbl); + + is_same &= before->remaining_sz_packets == after->remaining_sz_packets; + is_same &= before->zombies == after->zombies; + + return is_same; +} + +// Print fuction originally constructed with help from Claude.ai +static void print_terminal_state(FILE * out, char const * prefix, terminal_state * state) { + fprintf(out, "%sterminal_state (dragonfly-dally) ->\n", prefix); + fprintf(out, "%s | packet_counter = %ld\n", prefix, state->packet_counter); + fprintf(out, "%s | packet_gen = %d\n", prefix, state->packet_gen); + fprintf(out, "%s | packet_fin = %d\n", prefix, state->packet_fin); + fprintf(out, "%s | total_gen_size = %d\n", prefix, state->total_gen_size); + + fprintf(out, "%s | * router_lp[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%lu", i ? ", " : "", state->router_lp[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * router_id[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%u", i ? ", " : "", state->router_id[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | terminal_id = %u\n", prefix, state->terminal_id); + fprintf(out, "%s | connMan = \n", prefix); + + char addprefix[] = " | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1; + char * subprefix = (char *) malloc(len_subprefix * sizeof(char)); + fprintf(out, "%s | *local_congestion_controller = %p\n", prefix, state->local_congestion_controller); + if (state->local_congestion_controller != NULL) { + print_tlc_state(out, subprefix, state->local_congestion_controller); + } + free(subprefix); + + fprintf(out, "%s | workloads_finished_flag = %d\n", prefix, state->workloads_finished_flag); + + if (is_dally_surrogate_on) { + fprintf(out, "%s | ** vc_occupancy = %p\n", prefix, state->vc_occupancy); + fprintf(out, "%s | *terminal_available_time = %p\n", prefix, state->terminal_available_time); + fprintf(out, "%s | *** terminal_msgs = %p\n", prefix, state->terminal_msgs); + } else { + fprintf(out, "%s | ** vc_occupancy[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s | rail %d: [", prefix, i); + for (int j=0; jparams->num_qos_levels; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->vc_occupancy[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | *terminal_available_time[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->terminal_available_time[i]); + } + fprintf(out, "]\n"); + + char addprefix_2[] = " | | | "; + len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1; + subprefix = (char *) malloc(len_subprefix * sizeof(char)); + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2); + fprintf(out, "%s | *** terminal_msgs[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s | rail %d: [\n", prefix, i); + for (int j=0; jparams->num_qos_levels; j++) { + fprintf(out, "%s | | qos level %d\n", prefix, j); + print_terminal_dally_message_qlist(out, subprefix, state, &state->terminal_msgs[i][j]); + } + } + fprintf(out, "%s | ]\n", prefix); + free(subprefix); + } + + + if (is_dally_surrogate_on) { + fprintf(out, "%s | * in_send_loop = %p\n", prefix, state->in_send_loop); + } else { + fprintf(out, "%s | * in_send_loop[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->in_send_loop[i]); + } + fprintf(out, "]\n"); + } + + char addprefix_3[] = " | | "; + len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_3) + 1; + subprefix = (char *) malloc(len_subprefix * sizeof(char)); + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_3); + fprintf(out, "%s | dragonfly_stats_array = [\n", prefix); + for (int i = 0; i < CATEGORY_MAX; i++) { + fprintf(out, "%s | %d:\n", prefix, i); + print_mn_stats(out, subprefix, &state->dragonfly_stats_array[i]); + } + fprintf(out, "%s | ]\n", prefix); + free(subprefix); + + if (is_dally_surrogate_on) { + fprintf(out, "%s | ** qos_status = %p\n", prefix, state->qos_status); + fprintf(out, "%s | ** qos_data = %p\n", prefix, state->qos_data); + fprintf(out, "%s | * last_qos_lvl = %p\n", prefix, state->last_qos_lvl); + } else { + fprintf(out, "%s | ** qos_status[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s | rail %d: [", prefix, i); + for (int j=0; jparams->num_qos_levels; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->qos_status[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | ** qos_data[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s | rail %d: [", prefix, i); + for (int j=0; jparams->num_qos_levels; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->qos_data[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | * last_qos_lvl[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->last_qos_lvl[i]); + } + fprintf(out, "]\n"); + } + + fprintf(out, "%s | is_monitoring_bw = %d\n", prefix, state->is_monitoring_bw); + fprintf(out, "%s | * st = %p\n", prefix, state->st); + fprintf(out, "%s | * cc_st = %p\n", prefix, state->cc_st); + + if (is_dally_surrogate_on) { + fprintf(out, "%s | * issueIdle = %p\n", prefix, state->issueIdle); + fprintf(out, "%s | ** terminal_length = %p\n", prefix, state->terminal_length); + } else { + fprintf(out, "%s | * issueIdle[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->issueIdle[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | ** terminal_length[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s | rail %d: [", prefix, i); + for (int j=0; jparams->num_qos_levels; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->terminal_length[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + } + + fprintf(out, "%s | * anno = %s\n", prefix, state->anno ? state->anno : "(nil)"); + fprintf(out, "%s | * params = %p\n", prefix, state->params); + + fprintf(out, "%s | * rank_tbl = {\n", prefix); + char addprefix_4[] = " | "; + len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_4) + 1; + subprefix = (char *) malloc(len_subprefix * sizeof(char)); + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_4); + print_rank_tbl(out, subprefix, state->rank_tbl); + free(subprefix); + fprintf(out, "%s | }\n", prefix); + + fprintf(out, "%s | rank_tbl_pop = %lu\n", prefix, state->rank_tbl_pop); + fprintf(out, "%s | total_time = %g\n", prefix, state->total_time); + fprintf(out, "%s | total_msg_size = %lu\n", prefix, state->total_msg_size); + fprintf(out, "%s | total_hops = %g\n", prefix, state->total_hops); + fprintf(out, "%s | finished_msgs = %ld\n", prefix, state->finished_msgs); + fprintf(out, "%s | finished_chunks = %ld\n", prefix, state->finished_chunks); + fprintf(out, "%s | finished_packets = %ld\n", prefix, state->finished_packets); + + if (is_dally_surrogate_on) { + fprintf(out, "%s | ** terminal_length = %p\n", prefix, state->terminal_length); + fprintf(out, "%s | * last_buf_full = %p\n", prefix, state->last_buf_full); + fprintf(out, "%s | * busy_time = %p\n", prefix, state->busy_time); + fprintf(out, "%s | * link_traffic = %p\n", prefix, state->link_traffic); + fprintf(out, "%s | * total_chunks = %p\n", prefix, state->total_chunks); + fprintf(out, "%s | * stalled_chunks = %p\n", prefix, state->stalled_chunks); + } else { + fprintf(out, "%s | * last_buf_full[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->last_buf_full[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * busy_time[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->busy_time[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * link_traffic[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%lu", i ? ", " : "", state->link_traffic[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * total_chunks[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%lu", i ? ", " : "", state->total_chunks[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * stalled_chunks[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%lu", i ? ", " : "", state->stalled_chunks[i]); + } + fprintf(out, "]\n"); + } + + fprintf(out, "%s | injected_chunks = %lu\n", prefix, state->injected_chunks); + fprintf(out, "%s | ejected_chunks = %lu\n", prefix, state->ejected_chunks); + fprintf(out, "%s | max_latency = %g\n", prefix, state->max_latency); + fprintf(out, "%s | min_latency = %g\n", prefix, state->min_latency); + fprintf(out, "%s | output_buf = '%.4096s'\n", prefix, state->output_buf); + fprintf(out, "%s | output_buf2 = '%.4096s'\n", prefix, state->output_buf2); + fprintf(out, "%s | fin_chunks_sample = %ld\n", prefix, state->fin_chunks_sample); + fprintf(out, "%s | data_size_sample = %ld\n", prefix, state->data_size_sample); + fprintf(out, "%s | fin_hops_sample = %g\n", prefix, state->fin_hops_sample); + fprintf(out, "%s | fin_chunks_time = %g\n", prefix, state->fin_chunks_time); + fprintf(out, "%s | * busy_time_sample = %p\n", prefix, state->busy_time_sample); + fprintf(out, "%s | sample_buf = '%.4096s'\n", prefix, state->sample_buf); + fprintf(out, "%s | * sample_stat = %p\n", prefix, state->sample_stat); // ingnoring as this part of the code is never used. Originally part of instrumentation + fprintf(out, "%s | op_arr_size = %d\n", prefix, state->op_arr_size); + fprintf(out, "%s | max_arr_size = %d\n", prefix, state->max_arr_size); + fprintf(out, "%s | fwd_events = %ld\n", prefix, state->fwd_events); + fprintf(out, "%s | rev_events = %ld\n", prefix, state->rev_events); + fprintf(out, "%s | fin_chunks_ross_sample = %ld\n", prefix, state->fin_chunks_ross_sample); + fprintf(out, "%s | data_size_ross_sample = %ld\n", prefix, state->data_size_ross_sample); + fprintf(out, "%s | fin_hops_ross_sample = %ld\n", prefix, state->fin_hops_ross_sample); + fprintf(out, "%s | fin_chunks_time_ross_sample = %g\n", prefix, state->fin_chunks_time_ross_sample); + fprintf(out, "%s | * busy_time_ross_sample = %p\n", prefix, state->busy_time_ross_sample); // ingnoring as this part of the code is never used. Originally part of instrumentation + fprintf(out, "%s | ross_sample = \n", prefix); // ingnoring as this part of the code is never used. Originally part of instrumentation + + // modified outside of process and reverse computation (at commit and at surrogate change) + fprintf(out, "%s | sent_packets = \n", prefix); + + fprintf(out, "%s | last_packet_sent_id = %ld\n", prefix, state->last_packet_sent_id); + fprintf(out, "%s | arrival_of_last_packet = {packet_ID: %ld, travel_end_time: %g}\n", prefix, state->arrival_of_last_packet.packet_ID, state->arrival_of_last_packet.travel_end_time); + + fprintf(out, "%s | remaining_sz_packets = {\n", prefix); + std::map::iterator it_map; + for (it_map = state->remaining_sz_packets.begin(); it_map != state->remaining_sz_packets.end(); ++it_map) { + fprintf(out, "%s | {packet_ID: %lu, dfdally_src_terminal_id: %u} -> %d,\n", prefix, it_map->first.packet_ID, it_map->first.dfdally_src_terminal_id, it_map->second); + } + fprintf(out, "%s | }\n", prefix); + + fprintf(out, "%s | last_in_queue_time = %g\n", prefix, state->last_in_queue_time); + fprintf(out, "%s | * predictor_data = %p\n", prefix, state->predictor_data); + + fprintf(out, "%s | zombies = [\n", prefix); + std::set::iterator it; + for (it = state->zombies.begin(); it != state->zombies.end(); ++it) { + fprintf(out, "%s | {packet_ID: %lu, dfdally_src_terminal_id: %u},\n", prefix, it->packet_ID, it->dfdally_src_terminal_id); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | * frozen_state = %p\n", prefix, state->frozen_state); +} + +// Original function implemented by Claude +static void save_router_state(router_state *into, router_state const *from) { + // Missing deep-clone/comparison/print members. These members are always accessed, so it is possible to discover some bugs if we print their contents + // from->local_congestion_controller + + // Missing deep-clone/comparison/print members. + // from->rsamples + // from->ross_rsample + // from->busy_time_sample + // from->link_traffic_sample + // from->link_traffic_ross_sample + + memcpy(into, from, sizeof(router_state)); + + dragonfly_param const * p = into->params; + int const radix = p->radix; + int const num_qos_levels = p->num_qos_levels; + + into->global_channel = (int*) malloc(p->num_global_channels * sizeof(int)); + + for (int i = 0; i < p->num_global_channels; i++) { + into->global_channel[i] = from->global_channel[i]; + } + + into->next_output_available_time = (tw_stime*) malloc(radix * sizeof(tw_stime)); + into->last_buf_full = (tw_stime*) malloc(radix * sizeof(tw_stime)); + into->busy_time = (tw_stime*) malloc(radix * sizeof(tw_stime)); + into->stalled_chunks = (unsigned long*) malloc(radix * sizeof(unsigned long)); + into->total_chunks = (unsigned long*) malloc(radix * sizeof(unsigned long)); + into->in_send_loop = (int*) malloc(radix * sizeof(int)); + into->queued_count = (int*) malloc(radix * sizeof(int)); + into->port_bandwidths = (double*) malloc(radix * sizeof(double)); + into->vc_max_sizes = (int*) malloc(radix * sizeof(int)); + into->link_traffic = (int64_t*) malloc(radix * sizeof(int64_t)); + into->last_qos_lvl = (int*) malloc(radix * sizeof(int)); + into->vc_occupancy = (int**) malloc(radix * sizeof(int*)); + into->qos_status = (int**) malloc(radix * sizeof(int*)); + into->qos_data = (int**) malloc(radix * sizeof(int*)); + into->pending_msgs = (struct qlist_head**) malloc(radix * sizeof(struct qlist_head*)); + into->queued_msgs = (struct qlist_head**) malloc(radix * sizeof(struct qlist_head*)); + + for (int i = 0; i < radix; i++) { + into->next_output_available_time[i] = from->next_output_available_time[i]; + into->last_buf_full[i] = from->last_buf_full[i]; + into->busy_time[i] = from->busy_time[i]; + into->stalled_chunks[i] = from->stalled_chunks[i]; + into->total_chunks[i] = from->total_chunks[i]; + into->in_send_loop[i] = from->in_send_loop[i]; + into->queued_count[i] = from->queued_count[i]; + into->port_bandwidths[i] = from->port_bandwidths[i]; + into->vc_max_sizes[i] = from->vc_max_sizes[i]; + into->link_traffic[i] = from->link_traffic[i]; + into->last_qos_lvl[i] = from->last_qos_lvl[i]; + + into->vc_occupancy[i] = (int*) malloc(p->num_vcs * sizeof(int)); + into->qos_status[i] = (int*) malloc(num_qos_levels * sizeof(int)); + into->qos_data[i] = (int*) malloc(num_qos_levels * sizeof(int)); + + into->pending_msgs[i] = (struct qlist_head*) malloc(p->num_vcs * sizeof(struct qlist_head)); + into->queued_msgs[i] = (struct qlist_head*) malloc(p->num_vcs * sizeof(struct qlist_head)); + + for (int j = 0; j < p->num_vcs; j++) { + into->vc_occupancy[i][j] = from->vc_occupancy[i][j]; + copy_msgs_qlist(&into->pending_msgs[i][j], &from->pending_msgs[i][j]); + copy_msgs_qlist(&into->queued_msgs[i][j], &from->queued_msgs[i][j]); + } + for (int j = 0; j < num_qos_levels; j++) { + into->qos_status[i][j] = from->qos_status[i][j]; + into->qos_data[i][j] = from->qos_data[i][j]; + } + } + + if (p->counting_bool > 0) { + assert(from->agg_busy_time != NULL); + assert(from->agg_link_traffic != NULL); + into->agg_busy_time = (tw_stime**) malloc(p->counting_windows * sizeof(tw_stime*)); + into->agg_link_traffic = (int64_t**) malloc(p->counting_windows * sizeof(int64_t*)); + + for (int i = 0; i < p->counting_windows; i++) { + into->agg_busy_time[i] = (tw_stime*) malloc(radix * sizeof(tw_stime)); + into->agg_link_traffic[i] = (int64_t*) malloc(radix * sizeof(int64_t)); + memcpy(into->agg_busy_time[i], from->agg_busy_time[i], radix * sizeof(tw_stime)); + memcpy(into->agg_link_traffic[i], from->agg_link_traffic[i], radix * sizeof(int64_t)); + } + } + + //if (from->local_congestion_controller != NULL) { + // assert(g_congestion_control_enabled); + // into->local_congestion_controller = (rlc_state*) malloc(sizeof(rlc_state)); + // save_rlc_state(into->local_congestion_controller, from->local_congestion_controller); + //} +} + +// Original function implemented by Claude +static void clean_router_state(router_state *state) { + dragonfly_param const * p = state->params; + int const radix = p->radix; + + // Free simple arrays + free(state->global_channel); + free(state->next_output_available_time); + free(state->last_buf_full); + free(state->busy_time); + free(state->stalled_chunks); + free(state->total_chunks); + free(state->in_send_loop); + free(state->queued_count); + free(state->port_bandwidths); + free(state->vc_max_sizes); + free(state->link_traffic); + free(state->last_qos_lvl); + + // Clean and free 2D arrays + for (int i = 0; i < radix; i++) { + free(state->vc_occupancy[i]); + free(state->qos_status[i]); + free(state->qos_data[i]); + + for (int j = 0; j < p->num_vcs; j++) { + // Clean up qlist entries - remove and free all elements + while (!qlist_empty(&state->pending_msgs[i][j])) { + struct qlist_head *item = qlist_pop(&state->pending_msgs[i][j]); + terminal_dally_message_list *entry = qlist_entry(item, terminal_dally_message_list, list); + free(entry->event_data); + free(entry); + } + while (!qlist_empty(&state->queued_msgs[i][j])) { + struct qlist_head *item = qlist_pop(&state->queued_msgs[i][j]); + terminal_dally_message_list *entry = qlist_entry(item, terminal_dally_message_list, list); + free(entry->event_data); + free(entry); + } + } + + free(state->pending_msgs[i]); + free(state->queued_msgs[i]); + } + + free(state->vc_occupancy); + free(state->qos_status); + free(state->qos_data); + free(state->pending_msgs); + free(state->queued_msgs); + + if (p->counting_bool > 0) { + for (int i = 0; i < p->counting_windows; i++) { + free(state->agg_busy_time[i]); + free(state->agg_link_traffic[i]); + } + free(state->agg_busy_time); + free(state->agg_link_traffic); + } + + //if (state->local_congestion_controller != NULL) { + // clean_rlc_state(state->local_congestion_controller); + // free(state->local_congestion_controller); + //} +} + +// Original function implemented by Claude +static bool check_router_state(router_state const *before, router_state const *after) { + // The following are not checked because they don't influence any other + // components of the router state, ie, they are never used to change + // the simulation behavior. + // - snapshot_data + // - fwd_events + // - rev_events + + dragonfly_param const * p = before->params; + int const radix = p->radix; + int const num_qos_levels = p->num_qos_levels; + + if (before->router_id != after->router_id || + before->group_id != after->group_id || + before->plane_id != after->plane_id || + before->op_arr_size != after->op_arr_size || + before->max_arr_size != after->max_arr_size || + before->workloads_finished_flag != after->workloads_finished_flag || + before->is_monitoring_bw != after->is_monitoring_bw || + before->last_time != after->last_time) { + return false; + } + + for (int i = 0; i < p->num_global_channels; i++) { + if (before->global_channel[i] != after->global_channel[i]) { + return false; + } + } + + for (int i = 0; i < radix; i++) { + if (before->next_output_available_time[i] != after->next_output_available_time[i] || + before->last_buf_full[i] != after->last_buf_full[i] || + before->busy_time[i] != after->busy_time[i] || + before->stalled_chunks[i] != after->stalled_chunks[i] || + before->total_chunks[i] != after->total_chunks[i] || + before->in_send_loop[i] != after->in_send_loop[i] || + before->queued_count[i] != after->queued_count[i] || + before->port_bandwidths[i] != after->port_bandwidths[i] || + before->vc_max_sizes[i] != after->vc_max_sizes[i] || + before->link_traffic[i] != after->link_traffic[i] || + before->last_qos_lvl[i] != after->last_qos_lvl[i]) { + return false; + } + + for (int j = 0; j < p->num_vcs; j++) { + if (before->vc_occupancy[i][j] != after->vc_occupancy[i][j]) { + return false; + } + + if (!check_msgs_qlist(&before->pending_msgs[i][j], &after->pending_msgs[i][j]) || + !check_msgs_qlist(&before->queued_msgs[i][j], &after->queued_msgs[i][j])) { + return false; + } + } + + for (int j = 0; j < num_qos_levels; j++) { + if (before->qos_status[i][j] != after->qos_status[i][j] || + before->qos_data[i][j] != after->qos_data[i][j]) { + return false; + } + } + } + + if ((before->agg_busy_time == NULL) != (after->agg_busy_time == NULL)) { + return false; + } + if ((before->agg_link_traffic == NULL) != (after->agg_link_traffic == NULL)) { + return false; + } + + if (p->counting_bool > 0) { + assert(before->agg_busy_time != NULL && after->agg_busy_time); + assert(before->agg_link_traffic != NULL && after->agg_link_traffic); + for (int i = 0; i < p->counting_windows; i++) { + for (int j = 0; j < radix; j++) { + if (before->agg_busy_time[i][j] != after->agg_busy_time[i][j] || + before->agg_link_traffic[i][j] != after->agg_link_traffic[i][j]) { + return false; + } + } + } + } + + //if (before->local_congestion_controller != NULL) { + // if (!check_rlc_state(before->local_congestion_controller, after->local_congestion_controller)) { + // return false; + // } + //} + + // Check strings + if (strncmp(before->output_buf, after->output_buf, 4096) != 0 || + strncmp(before->output_buf5, after->output_buf5, 4096) != 0 || + strncmp(before->output_buf6, after->output_buf6, 4096) != 0) { + return false; + } + + // All checks passed + return true; +} + +// Original function implemented by Claude +static void print_router_state(FILE * out, char const * prefix, router_state * state) { + dragonfly_param const * p = state->params; + int const radix = p->radix; + int const num_qos_levels = p->num_qos_levels; + + fprintf(out, "%srouter_state (dragonfly) ->\n", prefix); + fprintf(out, "%s | router_id = %u\n", prefix, state->router_id); + fprintf(out, "%s | group_id = %d\n", prefix, state->group_id); + fprintf(out, "%s | plane_id = %d\n", prefix, state->plane_id); + fprintf(out, "%s | op_arr_size = %d\n", prefix, state->op_arr_size); + fprintf(out, "%s | max_arr_size = %d\n", prefix, state->max_arr_size); + + fprintf(out, "%s | * global_channel[%d] = [", prefix, radix); + for (int i = 0; i < p->num_global_channels; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->global_channel[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | connMan = \n", prefix); + + char addprefix[] = " | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1; + char * subprefix = (char *) malloc(len_subprefix * sizeof(char)); + fprintf(out, "%s | *local_congestion_controller = %p\n", prefix, state->local_congestion_controller); + //if (state->local_congestion_controller != NULL) { + // snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix); + // print_rlc_state(out, subprefix, state->local_congestion_controller); + //} + free(subprefix); + + fprintf(out, "%s | *next_output_available_time[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->next_output_available_time[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * last_buf_full[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->last_buf_full[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * busy_time[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->busy_time[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * busy_time_sample = %p\n", prefix, state->busy_time_sample); + + fprintf(out, "%s | * stalled_chunks[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%lu", i ? ", " : "", state->stalled_chunks[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * total_chunks[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%lu", i ? ", " : "", state->total_chunks[i]); + } + fprintf(out, "]\n"); + + char addprefix_2[] = " | | | "; + len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1; + subprefix = (char *) malloc(len_subprefix * sizeof(char)); + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2); + + fprintf(out, "%s | *** pending_msgs[%d][%d] = [\n", prefix, radix, p->num_vcs); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s | port %d: [\n", prefix, i); + for (int j = 0; j < p->num_vcs; j++) { + fprintf(out, "%s | | vcs # %d\n", prefix, j); + print_msgs_qlist(out, subprefix, &state->pending_msgs[i][j]); + } + fprintf(out, "%s | ]\n", prefix); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | *** queued_msgs[%d][%d] = [\n", prefix, radix, p->num_vcs); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s | port %d: [\n", prefix, i); + for (int j = 0; j < p->num_vcs; j++) { + fprintf(out, "%s | | vcs # %d\n", prefix, j); + print_msgs_qlist(out, subprefix, &state->queued_msgs[i][j]); + } + fprintf(out, "%s | ]\n", prefix); + } + fprintf(out, "%s | ]\n", prefix); + free(subprefix); + + fprintf(out, "%s | * in_send_loop[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->in_send_loop[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * queued_count[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->queued_count[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * st = %p\n", prefix, state->st); + fprintf(out, "%s | * cc_st = %p\n", prefix, state->cc_st); + fprintf(out, "%s | workloads_finished_flag = %d\n", prefix, state->workloads_finished_flag); + + fprintf(out, "%s | * port_bandwidths[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->port_bandwidths[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * vc_max_sizes[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->vc_max_sizes[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | ** vc_occupancy[%d][%d] = [\n", prefix, radix, p->num_vcs); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s | port %d: [", prefix, i); + for (int j = 0; j < p->num_vcs; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->vc_occupancy[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | * link_traffic[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%ld", i ? ", " : "", state->link_traffic[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * link_traffic_sample = %p\n", prefix, state->link_traffic_sample); + + fprintf(out, "%s | is_monitoring_bw = %d\n", prefix, state->is_monitoring_bw); + + fprintf(out, "%s | * last_qos_lvl[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->last_qos_lvl[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | ** qos_status[%d][%d] = [\n", prefix, radix, num_qos_levels); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s | port %d: [", prefix, i); + for (int j = 0; j < num_qos_levels; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->qos_status[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | ** qos_data[%d][%d] = [\n", prefix, radix, num_qos_levels); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s | port %d: [", prefix, i); + for (int j = 0; j < num_qos_levels; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->qos_data[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | * anno = %s\n", prefix, state->anno ? state->anno : "(nil)"); + fprintf(out, "%s | * params = %p\n", prefix, state->params); + + fprintf(out, "%s | ** snapshot_data = %p\n", prefix, state->snapshot_data); + + fprintf(out, "%s | output_buf = '%.4096s'\n", prefix, state->output_buf); + fprintf(out, "%s | * rsamples = %p\n", prefix, state->rsamples); + fprintf(out, "%s | fwd_events = %ld\n", prefix, state->fwd_events); + fprintf(out, "%s | rev_events = %ld\n", prefix, state->rev_events); + fprintf(out, "%s | output_buf5 = '%.4096s'\n", prefix, state->output_buf5); + fprintf(out, "%s | output_buf6 = '%.4096s'\n", prefix, state->output_buf6); + + if(p->counting_bool <= 0) + { + fprintf(out, "%s | ** agg_busy_time = %p\n", prefix, state->agg_busy_time); + fprintf(out, "%s | ** agg_link_traffic = %p\n", prefix, state->agg_link_traffic); + } else { + assert(state->agg_busy_time != NULL); + assert(state->agg_link_traffic != NULL); + fprintf(out, "%s | ** agg_busy_time[%d][%d] = [\n", prefix, p->counting_windows, radix); + for (int i = 0; i < p->counting_windows; i++) { + fprintf(out, "%s | window %d: [", prefix, i); + for (int j = 0; j < radix; j++) { + fprintf(out, "%s%g", j ? ", " : "", state->agg_busy_time[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | ** agg_link_traffic[%d][%d] = [\n", prefix, p->counting_windows, radix); + for (int i = 0; i < p->counting_windows; i++) { + fprintf(out, "%s | window %d: [", prefix, i); + for (int j = 0; j < radix; j++) { + fprintf(out, "%s%lu", j ? ", " : "", state->agg_link_traffic[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + } + + fprintf(out, "%s | ross_rsample = \n", prefix); + fprintf(out, "%s | last_time = %g\n", prefix, state->last_time); +} + +char const * const string_event_t(enum event_t type) { + switch (type) { + case T_GENERATE: return "T_GENERATE"; + case T_ARRIVE: return "T_ARRIVE"; + case T_SEND: return "T_SEND"; + case T_BUFFER: return "T_BUFFER"; + case R_SEND: return "R_SEND"; + case R_ARRIVE: return "R_ARRIVE"; + case R_BUFFER: return "R_BUFFER"; + case R_BANDWIDTH: return "R_BANDWIDTH"; + case R_BW_HALT: return "R_BW_HALT"; + case T_BANDWIDTH: return "T_BANDWIDTH"; + case R_SNAPSHOT: return "R_SNAPSHOT"; + case T_NOTIFY: return "T_NOTIFY"; + case T_ARRIVE_PREDICTED: return "T_ARRIVE_PREDICTED"; +#if ALWAYS_DETERMINISTIC_NETWORK + case T_VACUOUS_EVENT: return "T_VACUOUS_EVENT"; +#endif /* ALWAYS_DETERMINISTIC_NETWORK */ + default: return "UNKNOWN TYPE!!"; + } +} + +// Built with help of Claude +bool check_terminal_dally_message(struct terminal_dally_message * before, struct terminal_dally_message * after) { + bool is_same = true; + + // Fields that have no effects in the simulation + // before->this_router_ptp_latency + + // Compare all fields + is_same &= before->magic == after->magic; + is_same &= before->travel_start_time == after->travel_start_time; + is_same &= before->travel_end_time == after->travel_end_time; + is_same &= before->packet_ID == after->packet_ID; + is_same &= before->type == after->type; + is_same &= before->notify_type == after->notify_type; + is_same &= strncmp(before->category, after->category, CATEGORY_NAME_MAX) == 0; + is_same &= before->final_dest_gid == after->final_dest_gid; + is_same &= before->sender_lp == after->sender_lp; + is_same &= before->sender_mn_lp == after->sender_mn_lp; + is_same &= before->dest_terminal_lpid == after->dest_terminal_lpid; + is_same &= before->dfdally_src_terminal_id == after->dfdally_src_terminal_id; + is_same &= before->dfdally_dest_terminal_id == after->dfdally_dest_terminal_id; + is_same &= before->src_terminal_id == after->src_terminal_id; + is_same &= before->origin_router_id == after->origin_router_id; + is_same &= before->app_id == after->app_id; + is_same &= before->my_N_hop == after->my_N_hop; + is_same &= before->my_l_hop == after->my_l_hop; + is_same &= before->my_g_hop == after->my_g_hop; + is_same &= before->my_hops_cur_group == after->my_hops_cur_group; + is_same &= before->next_stop == after->next_stop; + is_same &= before->this_router_arrival == after->this_router_arrival; + is_same &= before->intm_lp_id == after->intm_lp_id; + is_same &= before->last_hop == after->last_hop; + is_same &= before->is_intm_visited == after->is_intm_visited; + is_same &= before->intm_rtr_id == after->intm_rtr_id; + is_same &= before->intm_grp_id == after->intm_grp_id; + is_same &= before->chunk_id == after->chunk_id; + is_same &= before->packet_size == after->packet_size; + is_same &= before->message_id == after->message_id; + is_same &= before->total_size == after->total_size; + is_same &= before->remote_event_size_bytes == after->remote_event_size_bytes; + is_same &= before->local_event_size_bytes == after->local_event_size_bytes; + is_same &= before->vc_index == after->vc_index; + is_same &= before->rail_id == after->rail_id; + is_same &= before->output_chan == after->output_chan; + is_same &= before->is_pull == after->is_pull; + is_same &= before->pull_size == after->pull_size; + is_same &= before->path_type == after->path_type; + is_same &= before->is_there_another_pckt_in_queue == after->is_there_another_pckt_in_queue; + is_same &= before->qos_reset1 == after->qos_reset1; + is_same &= before->qos_reset2 == after->qos_reset2; + + return is_same; +} + +// Print fuction originally constructed with help from Claude.ai +void print_terminal_dally_message(FILE * out, char const * prefix, void * s, struct terminal_dally_message * msg) { + //terminal_state * ns = (terminal_state *) s; + //router_state * ns = (router_state *) s; + + fprintf(out, "%sterminal_dally_message ->\n", prefix); + fprintf(out, "%s | magic = %d\n", prefix, msg->magic); + fprintf(out, "%s | travel_start_time = %g\n", prefix, msg->travel_start_time); + fprintf(out, "%s | travel_end_time = %g\n", prefix, msg->travel_end_time); + fprintf(out, "%s | packet_ID = %llu\n", prefix, msg->packet_ID); + fprintf(out, "%s | type = %d (%s)\n", prefix, msg->type, string_event_t((enum event_t) msg->type)); + fprintf(out, "%s | notify_type = %d\n", prefix, msg->notify_type); + fprintf(out, "%s | category = %.16s\n", prefix, msg->category); + fprintf(out, "%s | final_dest_gid = %lu\n", prefix, msg->final_dest_gid); + fprintf(out, "%s | sender_lp = %lu\n", prefix, msg->sender_lp); + fprintf(out, "%s | sender_mn_lp = %lu\n", prefix, msg->sender_mn_lp); + fprintf(out, "%s | dest_terminal_lpid = %lu\n", prefix, msg->dest_terminal_lpid); + fprintf(out, "%s | dfdally_src_terminal_id = %u\n", prefix, msg->dfdally_src_terminal_id); + fprintf(out, "%s | dfdally_dest_terminal_id = %u\n", prefix, msg->dfdally_dest_terminal_id); + fprintf(out, "%s | src_terminal_id = %u\n", prefix, msg->src_terminal_id); + fprintf(out, "%s | origin_router_id = %u\n", prefix, msg->origin_router_id); + fprintf(out, "%s | app_id = %d\n", prefix, msg->app_id); + fprintf(out, "%s | my_N_hop = %d\n", prefix, msg->my_N_hop); + fprintf(out, "%s | my_l_hop = %d\n", prefix, msg->my_l_hop); + fprintf(out, "%s | my_g_hop = %d\n", prefix, msg->my_g_hop); + fprintf(out, "%s | my_hops_cur_group = %d\n", prefix, msg->my_hops_cur_group); + fprintf(out, "%s | saved_channel = %d\n", prefix, msg->saved_channel); + fprintf(out, "%s | saved_vc = %d\n", prefix, msg->saved_vc); + fprintf(out, "%s | next_stop = %d\n", prefix, msg->next_stop); + fprintf(out, "%s | this_router_arrival = %g\n", prefix, msg->this_router_arrival); + fprintf(out, "%s | this_router_ptp_latency = %g\n", prefix, msg->this_router_ptp_latency); + fprintf(out, "%s | intm_lp_id = %u\n", prefix, msg->intm_lp_id); + fprintf(out, "%s | last_hop = %d\n", prefix, msg->last_hop); + fprintf(out, "%s | is_intm_visited = %d\n", prefix, msg->is_intm_visited); + fprintf(out, "%s | intm_rtr_id = %d\n", prefix, msg->intm_rtr_id); + fprintf(out, "%s | intm_grp_id = %d\n", prefix, msg->intm_grp_id); + fprintf(out, "%s | saved_src_dest = %d\n", prefix, msg->saved_src_dest); + fprintf(out, "%s | saved_src_chan = %d\n", prefix, msg->saved_src_chan); + fprintf(out, "%s | chunk_id = %u\n", prefix, msg->chunk_id); + fprintf(out, "%s | packet_size = %u\n", prefix, msg->packet_size); + fprintf(out, "%s | message_id = %u\n", prefix, msg->message_id); + fprintf(out, "%s | total_size = %u\n", prefix, msg->total_size); + fprintf(out, "%s | remote_event_size_bytes = %d\n", prefix, msg->remote_event_size_bytes); + fprintf(out, "%s | local_event_size_bytes = %d\n", prefix, msg->local_event_size_bytes); + fprintf(out, "%s | vc_index = %d\n", prefix, msg->vc_index); + fprintf(out, "%s | rail_id = %d\n", prefix, msg->rail_id); + fprintf(out, "%s | output_chan = %d\n", prefix, msg->output_chan); + fprintf(out, "%s | event_rc = %d\n", prefix, msg->event_rc); + fprintf(out, "%s | is_pull = %d\n", prefix, msg->is_pull); + fprintf(out, "%s | pull_size = %u\n", prefix, msg->pull_size); + fprintf(out, "%s | path_type = %d\n", prefix, msg->path_type); + fprintf(out, "%s | saved_app_id = %d\n", prefix, msg->saved_app_id); + fprintf(out, "%s | is_there_another_pckt_in_queue = %d\n", prefix, msg->is_there_another_pckt_in_queue); + fprintf(out, "%s | num_rngs = %d\n", prefix, msg->num_rngs); + fprintf(out, "%s | num_cll = %d\n", prefix, msg->num_cll); + fprintf(out, "%s | last_saved_qos = %d\n", prefix, msg->last_saved_qos); + fprintf(out, "%s | qos_reset1 = %d\n", prefix, msg->qos_reset1); + fprintf(out, "%s | qos_reset2 = %d\n", prefix, msg->qos_reset2); + fprintf(out, "%s | rc_is_qos_set = %d\n", prefix, msg->rc_is_qos_set); + fprintf(out, "%s | * rc_qos_data = %p\n", prefix, msg->rc_qos_data); + fprintf(out, "%s | * rc_qos_status = %p\n", prefix, msg->rc_qos_status); + fprintf(out, "%s | saved_send_loop = %d\n", prefix, msg->saved_send_loop); + fprintf(out, "%s | saved_available_time = %g\n", prefix, msg->saved_available_time); + fprintf(out, "%s | saved_min_lat = %g\n", prefix, msg->saved_min_lat); + fprintf(out, "%s | saved_avg_time = %g\n", prefix, msg->saved_avg_time); + fprintf(out, "%s | saved_rcv_time = %g\n", prefix, msg->saved_rcv_time); + fprintf(out, "%s | saved_busy_time = %g\n", prefix, msg->saved_busy_time); + fprintf(out, "%s | saved_total_time = %g\n", prefix, msg->saved_total_time); + fprintf(out, "%s | saved_sample_time = %g\n", prefix, msg->saved_sample_time); + fprintf(out, "%s | msg_start_time = %g\n", prefix, msg->msg_start_time); + fprintf(out, "%s | saved_busy_time_ross = %g\n", prefix, msg->saved_busy_time_ross); + fprintf(out, "%s | saved_fin_chunks_ross = %g\n", prefix, msg->saved_fin_chunks_ross); + fprintf(out, "%s | saved_last_in_queue_time = %g\n", prefix, msg->saved_last_in_queue_time); + fprintf(out, "%s | saved_next_packet_delay = %g\n", prefix, msg->saved_next_packet_delay); + fprintf(out, "%s | saved_processing_time = %g\n", prefix, msg->saved_processing_time); + fprintf(out, "%s | msg_new_mn_event = %g\n", prefix, msg->msg_new_mn_event); + fprintf(out, "%s | last_received_time = %g\n", prefix, msg->last_received_time); + fprintf(out, "%s | last_sent_time = %g\n", prefix, msg->last_sent_time); + fprintf(out, "%s | last_bufupdate_time = %g\n", prefix, msg->last_bufupdate_time); +} +//*** ---------- END OF reverse handler checking functions ---------- *** + /* dragonfly compute node and router LP types */ -extern "C" { tw_lptype dragonfly_dally_lps[] = { // Terminal handling functions @@ -5519,7 +8374,29 @@ tw_lptype dragonfly_dally_lps[] = }, {NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0}, }; -} + +crv_checkpointer dragonfly_dally_checkpointers[] = { + { + &dragonfly_dally_lps[0], + sizeof(terminal_state), + (save_checkpoint_state_f) save_terminal_state, + (clean_checkpoint_state_f) clean_terminal_state, + (check_states_f) check_terminal_state, + (print_lpstate_f) print_terminal_state, + (print_checkpoint_state_f) print_terminal_state, + (print_event_f) print_terminal_dally_message, + }, + { + &dragonfly_dally_lps[1], + sizeof(router_state), + (save_checkpoint_state_f) save_router_state, + (clean_checkpoint_state_f) clean_router_state, + (check_states_f) check_router_state, + (print_lpstate_f) print_router_state, + (print_checkpoint_state_f) print_router_state, + (print_event_f) print_terminal_dally_message, + }, +}; /* returns the dragonfly lp type for lp registration */ static const tw_lptype* dragonfly_dally_get_cn_lp_type(void) @@ -5807,7 +8684,7 @@ static Connection dfdally_prog_adaptive_routing(router_state *s, tw_bf *bf, term vector< Connection > poss_nonmin_next_stops = get_legal_nonminimal_stops(s, bf, msg, lp, fdest_router_id); Connection best_min_conn, best_nonmin_conn; - ConnectionType conn_type_of_mins, conn_type_of_nonmins; + ConnectionType conn_type_of_mins = CONN_LOCAL, conn_type_of_nonmins = CONN_LOCAL; if (poss_min_next_stops.size() > 0) { @@ -6280,8 +9157,10 @@ static tw_lpid get_next_stop_legacy(router_state *s, tw_lp *lp, tw_bf *bf, termi codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, s->anno, 0, next_stop / num_routers_per_mgrp, next_stop % num_routers_per_mgrp, &router_dest_id); +#if DEBUG == 1 if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) printf("\n Next stop is %d ", next_stop); +#endif return router_dest_id; } @@ -6338,8 +9217,10 @@ static tw_lpid get_next_stop_legacy(router_state *s, tw_lp *lp, tw_bf *bf, termi dest_lp = dests; } +#if DEBUG == 1 if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) printf("\n Next stop is %d ", dest_lp); +#endif codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, s->anno, 0, dest_lp / num_routers_per_mgrp, dest_lp % num_routers_per_mgrp, &router_dest_id); @@ -6745,8 +9626,10 @@ static Connection dfdally_prog_adaptive_legacy_routing(router_state *s, tw_bf *b next_stop = get_next_stop_legacy(s, lp, bf, msg, dest_router_id, adap_chan, do_chan_selection, get_direct_con, &(msg->num_rngs)); +#if DEBUG == 1 if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) printf("\n Packet %llu arrived at router %u next stop %d final stop %d local hops %d global hops %d", msg->packet_ID, s->router_id, next_stop, dest_router_id, msg->my_l_hop, msg->my_g_hop); +#endif output_port = get_output_port_legacy(s, msg, lp, bf, next_stop, &(msg->num_rngs)); assert(output_port >= 0); @@ -6783,6 +9666,7 @@ struct model_net_method dragonfly_dally_method = (event_f)dragonfly_dally_terminal_congestion_event, (revent_f)dragonfly_dally_terminal_congestion_event_rc, (commit_f)dragonfly_dally_terminal_congestion_event_commit, + &dragonfly_dally_checkpointers[0], }; struct model_net_method dragonfly_dally_router_method = @@ -6810,6 +9694,7 @@ struct model_net_method dragonfly_dally_router_method = (event_f)dragonfly_dally_router_congestion_event, (revent_f)dragonfly_dally_router_congestion_event_rc, (commit_f)dragonfly_dally_router_congestion_event_commit, + &dragonfly_dally_checkpointers[1], }; // #ifdef ENABLE_CORTEX diff --git a/src/networks/model-net/dragonfly-plus.C b/src/networks/model-net/dragonfly-plus.C index 7a80d65a..141b7ce8 100644 --- a/src/networks/model-net/dragonfly-plus.C +++ b/src/networks/model-net/dragonfly-plus.C @@ -3151,7 +3151,8 @@ static tw_stime dragonfly_plus_packet_event(model_net_request const *req, void const *remote_event, void const *self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void) message_offset; (void) sched_params; @@ -6618,6 +6619,7 @@ struct model_net_method dragonfly_plus_method = { NULL, //(final_f)dragonfly_plus_sample_fin, dfly_plus_register_model_types, dfly_plus_get_model_types, + NULL, }; struct model_net_method dragonfly_plus_router_method = { @@ -6639,6 +6641,7 @@ struct model_net_method dragonfly_plus_router_method = { NULL, //(final_f)dragonfly_plus_rsample_fin, dfly_plus_router_register_model_types, dfly_plus_router_get_model_types, + NULL, }; // #ifdef ENABLE_CORTEX diff --git a/src/networks/model-net/dragonfly.c b/src/networks/model-net/dragonfly.c index eb5e81bb..faee79d8 100644 --- a/src/networks/model-net/dragonfly.c +++ b/src/networks/model-net/dragonfly.c @@ -1087,7 +1087,8 @@ static tw_stime dragonfly_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; (void)sched_params; diff --git a/src/networks/model-net/express-mesh.C b/src/networks/model-net/express-mesh.C index 1e36afd8..e8b7392b 100644 --- a/src/networks/model-net/express-mesh.C +++ b/src/networks/model-net/express-mesh.C @@ -722,7 +722,8 @@ static tw_stime local_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; (void)sched_params; diff --git a/src/networks/model-net/fattree.c b/src/networks/model-net/fattree.c index eb1c49b5..e7db6c61 100644 --- a/src/networks/model-net/fattree.c +++ b/src/networks/model-net/fattree.c @@ -1527,7 +1527,8 @@ static tw_stime fattree_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { #if DEBUG_RC packet_event_f++; diff --git a/src/networks/model-net/loggp.c b/src/networks/model-net/loggp.c index 22904287..def3eb22 100644 --- a/src/networks/model-net/loggp.c +++ b/src/networks/model-net/loggp.c @@ -119,7 +119,8 @@ static tw_stime loggp_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt); + int is_last_pckt, + bool is_there_another_pckt_in_queue); static void loggp_packet_event_rc(tw_lp *sender); tw_stime loggp_recv_msg_event( @@ -611,7 +612,8 @@ static tw_stime loggp_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; tw_event * e_new; diff --git a/src/networks/model-net/simplenet-upd.c b/src/networks/model-net/simplenet-upd.c index 5b5edc78..5955f228 100644 --- a/src/networks/model-net/simplenet-upd.c +++ b/src/networks/model-net/simplenet-upd.c @@ -100,7 +100,8 @@ static tw_stime simplenet_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt); + int is_last_pckt, + bool is_there_another_pckt_in_queue); static void simplenet_packet_event_rc(tw_lp *sender); @@ -523,7 +524,8 @@ static tw_stime simplenet_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; // unused... (void)sched_params; // unused... @@ -549,6 +551,7 @@ static tw_stime simplenet_packet_event( msg->event_type = SN_MSG_START; msg->is_pull = req->is_pull; msg->pull_size = req->pull_size; + //msg->is_there_another_pckt_in_queue = is_there_another_pckt_in_queue; /*Fill in simplenet information*/ if(is_last_pckt) /* Its the last packet so pass in remote event information*/ diff --git a/src/networks/model-net/simplep2p.c b/src/networks/model-net/simplep2p.c index e7609870..6eb9ac0d 100644 --- a/src/networks/model-net/simplep2p.c +++ b/src/networks/model-net/simplep2p.c @@ -132,7 +132,8 @@ static tw_stime simplep2p_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt); + int is_last_pckt, + bool is_there_another_pckt_in_queue); static void simplep2p_packet_event_rc(tw_lp *sender); @@ -807,7 +808,8 @@ static tw_stime simplep2p_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; (void)sched_params; diff --git a/src/networks/model-net/slimfly.c b/src/networks/model-net/slimfly.c index da122ec6..eee9cd74 100644 --- a/src/networks/model-net/slimfly.c +++ b/src/networks/model-net/slimfly.c @@ -1158,7 +1158,8 @@ static tw_stime slimfly_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { // printf("slim packet event\n"); @@ -4044,6 +4045,7 @@ struct model_net_method slimfly_method = NULL, slimfly_register_model_types, slimfly_get_cn_model_types, + NULL, }; struct model_net_method slimfly_router_method = @@ -4066,6 +4068,7 @@ struct model_net_method slimfly_router_method = NULL, slimfly_router_register_model_types, slimfly_get_router_model_types, + NULL, }; diff --git a/src/networks/model-net/torus.c b/src/networks/model-net/torus.c index 7db338e6..6ae6c7e6 100644 --- a/src/networks/model-net/torus.c +++ b/src/networks/model-net/torus.c @@ -498,7 +498,8 @@ static tw_stime torus_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; // not using atm... (void)sched_params; // not using atm... diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c new file mode 100644 index 00000000..b529be7f --- /dev/null +++ b/src/surrogate/app-iteration-predictor/average.c @@ -0,0 +1,633 @@ +#include "surrogate/app-iteration-predictor/average.h" +#include "codes/codes.h" +#include "surrogate/app-iteration-predictor/common.h" +#include +#include +#include +#include + +#define master_printf(...) if (g_tw_mynode == 0) { printf(__VA_ARGS__); } + +static struct avg_app_config my_config = {0}; + +struct node_data { + int app_id; + double acc_iteration_time; + double prev_iteration_time; + int acc_iters; + int last_iter; +}; +static struct node_data * arr_node_data = NULL; // array containing info for all nodes + +enum APP_STATUS { + APP_STATUS_running = 0, + APP_STATUS_just_completed, // fully ended in this PE + APP_STATUS_completed_everywhere, // fully ended on all PEs +}; + +struct app_data { + enum NODE_TYPE type; + int num_nodes; // nodes in PE + int nodes_with_enough_iters; + int ending_iteration; // last iteration the simulation will run (aka, num of iterations) + int nodes_that_have_ended; + enum APP_STATUS status; // use ended to stop accumulating data + // To be used when called by the model. Set by `prepare_fast_forward_jump` + struct { + int jump_at_iter; + int resume_at_iter; + double restart_at; + } pred; +}; +static struct app_data * arr_app_data = NULL; // array containing info for all apps +static bool ready_to_skip = false; + +static inline char const * string_node_type(enum NODE_TYPE type) { + switch (type) { + case NODE_TYPE_unassigned: return "Unassigned app"; + case NODE_TYPE_background_noise: return "Background noise/synthetic pattern"; + case NODE_TYPE_app: return "App that runs on predictable iterations"; + default: return "Unknown type!"; + } +} + + +static void find_max_iter_per_app(int * save_last_iter); +static inline void mpi_allreduce_int_max(int const * local_data, int * result_data, int count); +static inline void mpi_allreduce_int_sum(int const * local_data, int * result_data, int count); +static inline void mpi_allreduce_double_sum(double const * local_data, double * result_data, int count); +static inline void mpi_allreduce_bool_and(bool const * local_data, bool * result_data, int count); +static inline void init_int_array(int * array, int size, int value); +static inline void init_double_array(double * array, int size, double value); +static inline int app_id_for(int nw_id_in_pe) { + return arr_node_data[nw_id_in_pe].app_id; +} + + +static void model_calls_init(tw_lp * lp, int nw_id_in_pe, struct app_iter_node_config * config) { + assert(arr_node_data); + if (my_config.num_nodes_in_pe <= nw_id_in_pe) { + tw_error(TW_LOC, "Node id relative to PE (%d) is larger than the number of nodes %d", nw_id_in_pe, my_config.num_nodes_in_pe); + } + + // Storing node data info + arr_node_data[nw_id_in_pe].app_id = config->app_id; + arr_node_data[nw_id_in_pe].last_iter = INT_MIN; + + // Storing app data info + arr_app_data[config->app_id].num_nodes++; + + if (arr_app_data[config->app_id].type == NODE_TYPE_unassigned) { + arr_app_data[config->app_id].type = config->type; + } else if (arr_app_data[config->app_id].type != config->type) { + tw_error(TW_LOC, "Two different ranks for application %d have signaded different compute node types. LP ID %d is of type '%s', but app had been configured as '%s'", lp->gid, string_node_type(arr_app_data[config->app_id].type), string_node_type(config->type)); + } + + if (config->type == NODE_TYPE_background_noise) { + return; // nothing left to set for synthetic workloads + } + + if (arr_app_data[config->app_id].ending_iteration == INT_MIN) { + arr_app_data[config->app_id].ending_iteration = config->app_ending_iter; + } else if (arr_app_data[config->app_id].ending_iteration != config->app_ending_iter) { + tw_error(TW_LOC, "Two different ranks for application %d have differing total iterations they will run (%d != %d)", config->app_id, config->app_ending_iter, arr_app_data[config->app_id].ending_iteration); + } +} + +static inline void assert_app_initialized(int nw_id_in_pe) { + int const app_id = app_id_for(nw_id_in_pe); + if (app_id == -1) { + assert(arr_app_data[app_id].type == NODE_TYPE_unassigned); + tw_error(TW_LOC, "Predictor for node was not initialized! Node ID (on PE) %d", nw_id_in_pe); + } +} + +static void model_calls_feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) { + (void) lp; + assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe); + assert_app_initialized(nw_id_in_pe); + + int const app_id = app_id_for(nw_id_in_pe); + + // We should only be handling non-synthetic workloads (aka, no background noise) + static bool shown_warning = false; + if (!shown_warning && arr_app_data[app_id].type == NODE_TYPE_background_noise) { + shown_warning = true; + tw_warning(TW_LOC, "`feed` has been called in App %d, which was determined to be Background traffic (aka, a synthetic workload)", app_id); + return; + } + + assert(arr_app_data[app_id].type == NODE_TYPE_app); + struct node_data * node_data = &arr_node_data[nw_id_in_pe]; + // we only collect iteration data past the previous `last_iter` + if (node_data->last_iter >= iter) { + return; + } + if (arr_app_data[app_id].status != APP_STATUS_running) { + tw_warning(TW_LOC, "Attempting to feed data to application predictor for an application that has either been marked as completed or not configured"); + } + node_data->acc_iteration_time += iteration_time - node_data->prev_iteration_time; + node_data->prev_iteration_time = iteration_time; + node_data->acc_iters++; + node_data->last_iter = iter; + // We've hit the required number of iterations to feed our predictor + if (node_data->acc_iters == my_config.num_iters_to_collect) { + arr_app_data[app_id].nodes_with_enough_iters++; + } +} + + +static void model_calls_ended(tw_lp * lp, int nw_id_in_pe, double iteration_time) { + assert_app_initialized(nw_id_in_pe); + struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)]; + app_data->nodes_that_have_ended++; + if (app_data->nodes_that_have_ended == app_data->num_nodes) { + app_data->status = APP_STATUS_just_completed; + } +} + + +static struct iteration_pred model_calls_predict(tw_lp * lp, int nw_id_in_pe) { + assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe); + assert_app_initialized(nw_id_in_pe); + struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)]; + return (struct iteration_pred) { + .resume_at_iter = app_data->pred.resume_at_iter, + .restart_at = app_data->pred.restart_at, + }; +} + +static void model_calls_predict_rc(tw_lp * lp, int nw_id_in_pe) {} + +static void reset_with(bool const * app_just_ended) { + ready_to_skip = false; + + master_printf("Resetting (average) application predictor at GVT %d time %f\n", g_tw_gvt_done, g_tw_pe->GVT_sig.recv_ts) + + int last_iter[my_config.num_apps]; + find_max_iter_per_app(last_iter); // We should start tracking iterations from the next iteration + + for (int i=0; i < my_config.num_nodes_in_pe; i++) { + struct node_data * node_data = &arr_node_data[i]; + if (node_data->app_id == -1) { + continue; + } + node_data->acc_iters = 0; + node_data->acc_iteration_time = 0; + if (node_data->last_iter < arr_app_data[node_data->app_id].pred.resume_at_iter) { + node_data->last_iter = last_iter[node_data->app_id]; + node_data->prev_iteration_time = arr_app_data[node_data->app_id].pred.restart_at; + } + } + for (int i=0; i < my_config.num_apps; i++) { + arr_app_data[i].nodes_with_enough_iters = 0; + } + + // If an app just fully ended (ended on all PEs but hasn't been cleaned) then clean it + for (int i = 0; i < my_config.num_apps; i++) { + if (app_just_ended[i]) { + arr_app_data[i].status = APP_STATUS_completed_everywhere; + } + } +} + +static bool model_calls_have_we_hit_switch(tw_lp * lp, int nw_id_in_pe, int iteration_id) { + assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe); + assert_app_initialized(nw_id_in_pe); + + if (!ready_to_skip) { + return false; + } + + struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)]; + switch (app_data->type) { + case NODE_TYPE_background_noise: + return true; + case NODE_TYPE_app: + if (iteration_id == app_data->pred.jump_at_iter) { + return true; + } + default: + break; + } + + return false; +} + +static inline void find_app_types(enum NODE_TYPE * app_type) { + int app_type_here[my_config.num_apps]; + for (int i = 0; i < my_config.num_apps; i++) { + app_type_here[i] = arr_app_data[i].type; + } + int app_type_int[my_config.num_apps]; + mpi_allreduce_int_max(app_type_here, app_type_int, my_config.num_apps); + + // Convert back to enums + for (int i = 0; i < my_config.num_apps; i++) { + app_type[i] = app_type_int[i]; + } +} + +static inline void post_init_share_ending_iteration(void) { + // Sharing ending_iteration results across PEs + int ending_iteration_here[my_config.num_apps]; + for (int i = 0; i < my_config.num_apps; i++) { + ending_iteration_here[i] = arr_app_data[i].ending_iteration; + } + int ending_iteration[my_config.num_apps]; + mpi_allreduce_int_max(ending_iteration_here, ending_iteration, my_config.num_apps); + + enum NODE_TYPE app_type[my_config.num_apps]; + find_app_types(app_type); + + // Checking that total iterations are the same across nodes + for (int i = 0; i < my_config.num_apps; i++) { + struct app_data * app_data_here = &arr_app_data[i]; + switch (app_type[i]) { + case NODE_TYPE_unassigned: + assert(app_data_here->type == NODE_TYPE_unassigned); + master_printf("Workload/app %d has not been configured to be tracked by iteration predictor\n", i); + app_data_here->status = APP_STATUS_completed_everywhere; + break; + case NODE_TYPE_background_noise: + if (app_data_here->type == NODE_TYPE_app) { + tw_error(TW_LOC, "Two different ranks for application %d (on different PEs) have signaled conflicting node type (here: application, other: background noise)", i); + } + // We assume the background noise stays the same forever, thus we can think of it as not running. But if the background noise were to change, we would have to keep it APP_STATUS_running. And, possibly, we would have to call .ended() from the background process + app_data_here->status = APP_STATUS_completed_everywhere; + app_data_here->type = NODE_TYPE_background_noise; + break; + case NODE_TYPE_app: + if (app_data_here->type == NODE_TYPE_unassigned) { + // There are no nodes for this application on this PE + app_data_here->status = APP_STATUS_just_completed; + } else if (app_data_here->type == NODE_TYPE_background_noise) { + tw_error(TW_LOC, "Two different ranks for application %d (on different PEs) have signaled conflicting node type (here: background noise, other: application)", i); + } else if (ending_iteration[i] != app_data_here->ending_iteration) { + tw_error(TW_LOC, "Two different ranks for application %d (on different PEs) have differing total iterations they will run (%d != %d)", i, ending_iteration[i], app_data_here->ending_iteration); + } + app_data_here->ending_iteration = ending_iteration[i]; + app_data_here->type = NODE_TYPE_app; + break; + } + } +} + +static inline bool has_any_app_ended(bool * save_app_just_ended) { + // Checking any application has fully ended, in which case we have to restart collecting data + bool app_just_ended_here[my_config.num_apps]; + for (int i = 0; i < my_config.num_apps; i++) { + struct app_data * app_data = &arr_app_data[i]; + app_just_ended_here[i] = app_data->status == APP_STATUS_just_completed; + } + mpi_allreduce_bool_and(app_just_ended_here, save_app_just_ended, my_config.num_apps); + for (int i = 0; i < my_config.num_apps; i++) { + if (save_app_just_ended[i]) { + return true; + } + } + return false; +} + +static inline bool all_apps_ended(void) { + for (int i = 0; i < my_config.num_apps; i++) { + struct app_data * app_data = &arr_app_data[i]; + if (app_data->status != APP_STATUS_completed_everywhere) { + return false; + } + } + return true; +} + + +static inline bool has_everyone_accumulated_enough() { + bool everyone = true; + for (int i = 0; i < my_config.num_apps; i++) { + struct app_data * app_data = &arr_app_data[i]; + // ignoring apps that have ended already + bool const app_in_pe = app_data->num_nodes > 0; + bool const hasnt_ended = app_data->status != APP_STATUS_completed_everywhere; + if (app_in_pe && hasnt_ended) { + everyone &= app_data->nodes_with_enough_iters == app_data->num_nodes; + } + } + return everyone; +} + +static bool director_calls_is_predictor_ready(void) { + static bool post_init_done = false; + if (!post_init_done) { + post_init_share_ending_iteration(); + post_init_done = true; + } + bool app_just_ended[my_config.num_apps]; + if (has_any_app_ended(app_just_ended)) { + reset_with(app_just_ended); + return false; + } + + if (all_apps_ended()) { + return false; + } + + // check that all applications have collected data for enough iterations to jump ahead + bool const everyone_ready_here = has_everyone_accumulated_enough(); + bool everyone_ready; + mpi_allreduce_bool_and(&everyone_ready_here, &everyone_ready, 1); + return everyone_ready; +} + + +static void director_calls_reset(void) { + bool app_just_ended[my_config.num_apps]; + has_any_app_ended(app_just_ended); + reset_with(app_just_ended); +} + +static void find_avg_iteration_time(double * save_avg_time) { + double acc_iter_time_here[my_config.num_apps]; + int acc_iters_here[my_config.num_apps]; + init_double_array(acc_iter_time_here, my_config.num_apps, 0.0); + init_int_array(acc_iters_here, my_config.num_apps, 0); + for (int i=0; i < my_config.num_nodes_in_pe; i++) { + struct node_data * node_data = &arr_node_data[i]; + int const app_id = node_data->app_id; + if (app_id == -1) { + continue; + } + acc_iter_time_here[app_id] += node_data->acc_iteration_time; + acc_iters_here[app_id] += node_data->acc_iters; + } + double acc_iter_time[my_config.num_apps]; + mpi_allreduce_double_sum(acc_iter_time_here, acc_iter_time, my_config.num_apps); + int acc_iters[my_config.num_apps]; + mpi_allreduce_int_sum(acc_iters_here, acc_iters, my_config.num_apps); + + for (int i=0; i < my_config.num_apps; i++) { + if (acc_iters[i]) { + save_avg_time[i] = acc_iter_time[i] / acc_iters[i]; + } + } +} + +static inline void mpi_allreduce_int_max(int const * local_data, int * result_data, int count) { + if(MPI_Allreduce(local_data, result_data, count, MPI_INT, MPI_MAX, MPI_COMM_CODES) != MPI_SUCCESS) { + tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't compute maximum"); + } +} + +static inline void mpi_allreduce_int_sum(int const * local_data, int * result_data, int count) { + if(MPI_Allreduce(local_data, result_data, count, MPI_INT, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) { + tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up"); + } +} + +static inline void mpi_allreduce_double_sum(double const * local_data, double * result_data, int count) { + if(MPI_Allreduce(local_data, result_data, count, MPI_DOUBLE, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) { + tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up"); + } +} + +static inline void mpi_allreduce_bool_and(bool const * local_data, bool * result_data, int count) { + if(MPI_Allreduce(local_data, result_data, count, MPI_C_BOOL, MPI_LAND, MPI_COMM_CODES) != MPI_SUCCESS) { + tw_error(TW_LOC, "MPI_Allreduce call failed!"); + } +} + +static inline void init_int_array(int * array, int size, int value) { + for (int i = 0; i < size; i++) { + array[i] = value; + } +} + +static inline void init_double_array(double * array, int size, double value) { + for (int i = 0; i < size; i++) { + array[i] = value; + } +} + +static void find_max_iter_per_app(int * save_last_iter) { + int last_iter_here[my_config.num_apps]; + init_int_array(last_iter_here, my_config.num_apps, INT_MIN); + + for (int i=0; i < my_config.num_nodes_in_pe; i++) { + struct node_data * node_data = &arr_node_data[i]; + int const app_id = node_data->app_id; + if (app_id == -1) { + continue; + } + if (last_iter_here[app_id] < node_data->last_iter) { + last_iter_here[app_id] = node_data->last_iter; + } + } + mpi_allreduce_int_max(last_iter_here, save_last_iter, my_config.num_apps); +} + +static void find_avg_time_for_max_iter(double * save_last_iter_time, int const * last_iter) { + int acc_iters_here[my_config.num_apps]; + double acc_last_iter_time[my_config.num_apps]; + init_int_array(acc_iters_here, my_config.num_apps, 0); + init_double_array(acc_last_iter_time, my_config.num_apps, 0.0); + for (int i=0; i < my_config.num_nodes_in_pe; i++) { + struct node_data * node_data = &arr_node_data[i]; + int const app_id = node_data->app_id; + if (app_id == -1) { + continue; + } + if (node_data->last_iter == last_iter[app_id]) { + acc_last_iter_time[app_id] += node_data->prev_iteration_time; + acc_iters_here[app_id]++; + } + } + mpi_allreduce_double_sum(acc_last_iter_time, save_last_iter_time, my_config.num_apps); + int acc_iters[my_config.num_apps]; + mpi_allreduce_int_sum(acc_iters_here, acc_iters, my_config.num_apps); + for (int i=0; i < my_config.num_apps; i++) { + if (acc_iters[i] > 0) { + save_last_iter_time[i] /= acc_iters[i]; + } + } +} + +static void get_running_apps(bool * is_running) { + for (int i = 0; i < my_config.num_apps; i++) { + is_running[i] = arr_app_data[i].status != APP_STATUS_completed_everywhere; + } +} + +static double compute_earliest_end_time( + bool const * is_running, + double const * avg_iter_time, + int const * last_iter, + double const * last_iter_time) { + // Compute avg end time for all apps (loop through every node, and add value to avg array) + double apps_end_time[my_config.num_apps]; + for (int i = 0; i < my_config.num_apps; i++) { + int const iterations_left = arr_app_data[i].ending_iteration - last_iter[i]; + apps_end_time[i] = last_iter_time[i] + iterations_left * avg_iter_time[i]; + } + // Pick smallest compute end time/time to skip + double switch_time = DBL_MAX; + for (int i = 0; i < my_config.num_apps; i++) { + if (is_running[i] && switch_time > apps_end_time[i]) { + switch_time = apps_end_time[i]; + } + } + return switch_time; +} + +static bool compute_restart_params( + bool const * is_running, + double const * avg_iter_time, + int const * last_iter, + double const * last_iter_time, + double switch_time, + double * apps_restart_at_time, + int * apps_restart_at_iter) { + // Find iteration to skip to per node + bool worth_switching = true; + for (int i = 0; i < my_config.num_apps; i++) { + if (!is_running[i]) { + continue; + } + int iters_to_skip = lround((switch_time - last_iter_time[i]) / avg_iter_time[i]); + apps_restart_at_time[i] = last_iter_time[i] + iters_to_skip * avg_iter_time[i]; + apps_restart_at_iter[i] = last_iter[i] + iters_to_skip; + + // if we are not skipping at least one iteration, there is no point in trying to fastforward + if (iters_to_skip <= 1) { + worth_switching = false; + } + } + return worth_switching; +} + +static double find_latest_restart_time(bool const * is_running, double const * apps_restart_at_time) { + // Compute last application to restart (this is restarting_at) + double last_to_finish = 0; + for (int i = 0; i < my_config.num_apps; i++) { + if (is_running[i] && last_to_finish < apps_restart_at_time[i]) { + last_to_finish = apps_restart_at_time[i]; + } + } + return last_to_finish; +} + +static double find_earliest_restart_time(bool const * is_running, double const * apps_restart_at_time) { + // Compute last application to restart (this is restarting_at) + double first_to_finish = DBL_MAX; + for (int i = 0; i < my_config.num_apps; i++) { + if (is_running[i] && first_to_finish > apps_restart_at_time[i]) { + first_to_finish = apps_restart_at_time[i]; + } + } + return first_to_finish; +} + +static void set_app_prediction_data( + bool const * is_running, + int const * last_iter, + int const * apps_restart_at_iter, + double const * apps_restart_at_time, + double const earliest_app_restart) { + // Set values for iteration to restart at and iterations to jump for each application + for (int i = 0; i < my_config.num_apps; i++) { + switch (arr_app_data[i].type) { + case NODE_TYPE_unassigned: + break; + case NODE_TYPE_background_noise: + arr_app_data[i].pred.restart_at = earliest_app_restart; + break; + case NODE_TYPE_app: + if (is_running[i]) { + arr_app_data[i].pred.jump_at_iter = last_iter[i] + 1; + arr_app_data[i].pred.resume_at_iter = apps_restart_at_iter[i]; + arr_app_data[i].pred.restart_at = apps_restart_at_time[i]; + } + break; + } + } +} + +static struct fast_forward_values director_calls_prepare_fast_forward_jump(void) { + // 0. Check if app is still running + bool is_running[my_config.num_apps]; + get_running_apps(is_running); + + // 1. Compute end time for each application given current data (pick smallest) + // a. Find avg iteration per app + double avg_iter_time[my_config.num_apps]; + find_avg_iteration_time(avg_iter_time); + // b. Find iteration to start switch after + int last_iter[my_config.num_apps]; + double last_iter_time[my_config.num_apps]; + find_max_iter_per_app(last_iter); + find_avg_time_for_max_iter(last_iter_time, last_iter); + // c. & d. Compute and pick smallest end time/time to skip + double switch_time = compute_earliest_end_time(is_running, avg_iter_time, last_iter, last_iter_time); + + // 2. Find number of iterations to skip per node given time to skip, then compute when each application is expected to reach this point + // a. Find iteration to skip to per node + double apps_restart_at_time[my_config.num_apps]; + int apps_restart_at_iter[my_config.num_apps]; + bool worth_switching = compute_restart_params(is_running, avg_iter_time, last_iter, last_iter_time, switch_time, apps_restart_at_time, apps_restart_at_iter); + + // b. Compute last application to restart (this is restarting_at) + double const last_to_finish = find_latest_restart_time(is_running, apps_restart_at_time); + double const first_to_finish = find_earliest_restart_time(is_running, apps_restart_at_time); + + // c. If the number of iterations to skip is zero for any app, force reset of predictor tracking + if (!worth_switching) { + return (struct fast_forward_values) { + .status = FAST_FORWARD_restart, + .restarting_at = last_to_finish, + }; + } + + // 3. Set values for iteration to restart at and iterations to jump for each application + set_app_prediction_data(is_running, last_iter, apps_restart_at_iter, apps_restart_at_time, first_to_finish); + ready_to_skip = true; + + return (struct fast_forward_values) { + .status = FAST_FORWARD_switching, + .restarting_at = last_to_finish, + }; +} + +struct app_iteration_predictor avg_app_iteration_predictor(struct avg_app_config * config_) { + my_config = *config_; + arr_node_data = calloc(my_config.num_nodes_in_pe, sizeof(struct node_data)); + arr_app_data = calloc(my_config.num_apps, sizeof(struct app_data)); + for (int i=0; i < my_config.num_nodes_in_pe; i++) { + struct node_data * node_data = &arr_node_data[i]; + node_data->app_id = -1; + node_data->last_iter = INT_MIN; + } + for (int i=0; i < my_config.num_apps; i++) { + arr_app_data[i].ending_iteration = INT_MIN; + } + return (struct app_iteration_predictor) { + .model = { + .init = model_calls_init, + .feed = model_calls_feed, + .ended = model_calls_ended, + .predict = model_calls_predict, + .predict_rc = model_calls_predict_rc, + .have_we_hit_switch = model_calls_have_we_hit_switch, + }, + .director = { + .reset = director_calls_reset, + .is_predictor_ready = director_calls_is_predictor_ready, + .prepare_fast_forward_jump = director_calls_prepare_fast_forward_jump, + } + }; +} + +void free_avg_app_iteration_predictor(void) { + if (arr_node_data) { + free(arr_node_data); + } + if (arr_app_data) { + free(arr_app_data); + } +} diff --git a/src/surrogate/app-iteration-predictor/common.c b/src/surrogate/app-iteration-predictor/common.c new file mode 100644 index 00000000..cc8db1a3 --- /dev/null +++ b/src/surrogate/app-iteration-predictor/common.c @@ -0,0 +1 @@ +#include "surrogate/app-iteration-predictor/common.h" diff --git a/src/surrogate/application-surrogate.c b/src/surrogate/application-surrogate.c new file mode 100644 index 00000000..fb6044df --- /dev/null +++ b/src/surrogate/application-surrogate.c @@ -0,0 +1,123 @@ +#include "surrogate/application-surrogate.h" +#include +#include "surrogate/network-surrogate.h" +#include "surrogate/init.h" + +static struct app_iteration_predictor * iter_predictor; +static struct application_director_config conf = { + .option = APP_DIRECTOR_OPTS_call_every_ns, + .every_n_gvt = 1000000, + .use_network_surrogate = false +}; +static enum { + PRE_JUMP = 0, + POST_JUMP_switched, // Switched to surrogate-mode + POST_JUMP_skipped, // Did not switch, and skipping until next application finishes +} director_state; + +#ifdef USE_RAND_TIEBREAKER +#define gvt_for(pe) (pe->GVT_sig.recv_ts) +#else +#define gvt_for(pe) (pe->GVT) +#endif + +#define master_printf(...) if (g_tw_mynode == 0) { printf(__VA_ARGS__); } + +static void application_director_pre_switch(tw_pe * pe, bool is_queue_empty) { + // No need to switch to surrogate when the simulation has ended + if (is_queue_empty || gvt_for(pe) >= g_tw_ts_end) { + return; + } + // Scheduling next GVT hook call if it is not scheduled every tw_trigger_gvt_hook_every + if (conf.option == APP_DIRECTOR_OPTS_call_every_ns) { + tw_trigger_gvt_hook_at(gvt_for(pe) + conf.call_every_ns); + } + + if (!iter_predictor->director.is_predictor_ready()) { + return; + } + struct fast_forward_values jump_to = iter_predictor->director.prepare_fast_forward_jump(); + double const restarting_at = jump_to.restarting_at > gvt_for(pe) ? jump_to.restarting_at : gvt_for(pe); + switch (jump_to.status) { + case FAST_FORWARD_switching: + tw_trigger_gvt_hook_at(restarting_at + 1); // + 1 to force director to run right after we have fully fast-forward + master_printf("Triggering switch to application iteration surrogate mode at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe)); + + if (conf.use_network_surrogate) { + master_printf("Switching network surrogate on\n"); + surrogate_switch_network_model(pe, is_queue_empty); + } + + surrogate_time_last = tw_clock_read(); + director_state = POST_JUMP_switched; + break; + + case FAST_FORWARD_restart: + tw_trigger_gvt_hook_at(restarting_at + 1); // + 1 to force director to run right after we have fully fast-forward + director_state = POST_JUMP_skipped; + break; + } +} + +static void application_director_post_switch(tw_pe * pe, bool is_queue_empty) { + // No need to restart high-fidelity simulation if network was not suspended + if (is_queue_empty && !conf.use_network_surrogate) { + return; + } + + // Scheduling next GVT hook call + if (!is_queue_empty) { + if (conf.option == APP_DIRECTOR_OPTS_call_every_ns) { + tw_trigger_gvt_hook_at(gvt_for(pe) + conf.call_every_ns); + } else { + tw_trigger_gvt_hook_every(conf.every_n_gvt); + } + } + + double const start = tw_clock_read(); + iter_predictor->director.reset(); + double const end = tw_clock_read(); + surrogate_switching_time += end - start; + + if (director_state == POST_JUMP_switched) { + master_printf("Back to full high-fidelity application iteration mode at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe)); + + if (conf.use_network_surrogate) { + master_printf("Switching network surrogate off\n"); + surrogate_switch_network_model(pe, is_queue_empty); + } + + time_in_surrogate += start - surrogate_time_last; + surrogate_time_last = 0.0; + } else { + master_printf("Resetting network predictor at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe)); + } + director_state = PRE_JUMP; +} + +static void application_director(tw_pe * pe, bool is_queue_empty) { + switch (director_state) { + case PRE_JUMP: + application_director_pre_switch(pe, is_queue_empty); + break; + case POST_JUMP_switched: + case POST_JUMP_skipped: + application_director_post_switch(pe, is_queue_empty); + break; + } +} + +void application_director_configure(struct application_director_config * conf_, struct app_iteration_predictor * iter_predictor_) { + conf = *conf_; + iter_predictor = iter_predictor_; + g_tw_gvt_hook = application_director; + director_state = PRE_JUMP; + if (conf.option == APP_DIRECTOR_OPTS_every_n_gvt) { + tw_trigger_gvt_hook_every(conf.every_n_gvt); + } else { + tw_trigger_gvt_hook_at(conf.call_every_ns); + } +} + +void application_director_finalize(void) { +} diff --git a/src/surrogate/init.c b/src/surrogate/init.c new file mode 100644 index 00000000..dc165eae --- /dev/null +++ b/src/surrogate/init.c @@ -0,0 +1,314 @@ +#include +#include +#include +#include +#include + +#ifdef USE_TORCH +#include +#endif + +#define master_printf(...) if (g_tw_mynode == 0) { printf(__VA_ARGS__); } + +// Shared times across network and application surrogates +double surrogate_switching_time = 0.0; +double time_in_surrogate = 0.0; +double surrogate_time_last = 0.0; + +static bool network_director_enabled = false; +static bool is_network_surrogate_configured = false; +static bool is_app_surrogate_configured = false; +static struct packet_latency_predictor current_net_predictor = {0}; +static struct app_iteration_predictor current_iter_predictor = {0}; + + +// === Stats! +void print_surrogate_stats(void) { + // Computing the time in surrogate only makes sense if we can switch the whole simulation all at once (like the network simulation does), and it doesn't work with the application surrogate as this doesn't switch the state of the simulation all at once + if(is_network_surrogate_configured && g_tw_mynode == 0) { + printf("\nTotal time spent on surrogate-mode: %.4f\n", (double) time_in_surrogate / g_tw_clock_rate); + printf("Total time spent on switching from and to surrogate-mode: %.4f\n", (double) surrogate_switching_time / g_tw_clock_rate); + } +} +// === END OF Stats! + + +// === All things Surrogate Configuration +bool network_surrogate_configure( + char const * const anno, + struct network_surrogate_config * const sc, + struct packet_latency_predictor ** pl_pred +) { + assert(sc); + assert(0 < sc->n_lp_types && sc->n_lp_types <= MAX_LP_TYPES); + is_network_surrogate_configured = true; + + struct switch_at_struct switch_network_at; + + // Determining which director mode to set up + char director_mode[MAX_NAME_LENGTH]; + director_mode[0] = '\0'; + configuration_get_value(&config, "NETWORK_SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH); + if (strcmp(director_mode, "at-fixed-virtual-times") == 0) { + master_printf("\nNetwork surrogate activated switching at fixed virtual times: "); + + // Loading timestamps + char **timestamps; + size_t len; + configuration_get_multivalue(&config, "NETWORK_SURROGATE", "fixed_switch_timestamps", anno, ×tamps, &len); + + network_director_enabled = true; + switch_network_at.current_i = 0; + switch_network_at.total = len; + switch_network_at.time_stampts = malloc(len * sizeof(double)); + + for (size_t i = 0; i < len; i++) { + errno = 0; + switch_network_at.time_stampts[i] = strtod(timestamps[i], NULL); + if (errno == ERANGE || errno == EILSEQ){ + tw_error(TW_LOC, "Sequence `%s' could not be succesfully interpreted as a _double_.", timestamps[i]); + } + + master_printf("%g%s", switch_network_at.time_stampts[i], i == len-1 ? "" : ", "); + } + master_printf("\n"); + + // freeing timestamps before it dissapears + for (size_t i = 0; i < len; i++) { + free(timestamps[i]); + } + free(timestamps); + } else if (strcmp(director_mode, "delegate-to-app-director") == 0) { + master_printf("\nNetwork surrogate enabled but director won't run. Network surrogate will be triggered by app director if present\n"); + } else { + tw_error(TW_LOC, "Unknown director mode `%s`", director_mode); + } + + // Determining which predictor to set up and return + char latency_pred_name[MAX_NAME_LENGTH]; + latency_pred_name[0] = '\0'; + configuration_get_value(&config, "NETWORK_SURROGATE", "packet_latency_predictor", anno, latency_pred_name, MAX_NAME_LENGTH); + if (*latency_pred_name) { + if (strcmp(latency_pred_name, "average") == 0) { + current_net_predictor = average_latency_predictor(sc->total_terminals); + *pl_pred = ¤t_net_predictor; + +#ifdef USE_TORCH + } else if (strcmp(latency_pred_name, "torch-jit") == 0) { + char torch_jit_mode[MAX_NAME_LENGTH]; + torch_jit_mode[0] = '\0'; + configuration_get_value(&config, "NETWORK_SURROGATE", "torch_jit_mode", anno, torch_jit_mode, MAX_NAME_LENGTH); + if (strcmp(torch_jit_mode, "single-static-model-for-all-terminals") != 0) { + tw_error(TW_LOC, "Unknown torch-jit mode `%s`", torch_jit_mode); + } + + char torch_jit_model_path[MAX_NAME_LENGTH]; + torch_jit_model_path[0] = '\0'; + configuration_get_value(&config, "NETWORK_SURROGATE", "torch_jit_model_path", anno, torch_jit_model_path, MAX_NAME_LENGTH); + surrogate_torch_init(torch_jit_model_path); + + *pl_pred = &torch_latency_predictor; +#endif + + } else { + tw_error(TW_LOC, "Unknown predictor for packet latency `%s` " + "(possibilities include: average" +#ifdef USE_TORCH + ", torch-jit" +#endif + ")", latency_pred_name); + } + } else { + current_net_predictor = average_latency_predictor(sc->total_terminals); + *pl_pred = ¤t_net_predictor; + master_printf("Enabling average packet latency predictor (default behaviour)\n"); + } + + // Finding out whether to ignore some packet latencies + int rc = configuration_get_value_double(&config, "NETWORK_SURROGATE", "ignore_until", anno, &ignore_until); + if (rc) { + ignore_until = -1; // any negative number disables ignore_until, all packet latencies will be considered + master_printf("`ignore_until` disabled (all packet latencies will be used in training the predictor)\n"); + } else { + master_printf("ignore_until=%g a packet delievered before this time stamp will not be used in training any predictor\n", ignore_until); + } + + bool freeze_network_on_switch = true; + // Determining which predictor to set up and return + char network_treatment_name[MAX_NAME_LENGTH]; + network_treatment_name[0] = '\0'; + configuration_get_value(&config, "NETWORK_SURROGATE", "network_treatment_on_switch", anno, network_treatment_name, MAX_NAME_LENGTH); + if (*network_treatment_name) { + if (strcmp(network_treatment_name, "freeze") == 0) { + freeze_network_on_switch = true; + master_printf("The network will be frozen on switch to surrogate\n"); + } else if (strcmp(network_treatment_name, "nothing") == 0) { + freeze_network_on_switch = false; + master_printf("The network will be left alone on switch to surrogate (it will run on the background until it empties by itself)\n"); + } else { + tw_error(TW_LOC, "Unknown network treatment `%s` (possibilities include: frezee or nothing)", network_treatment_name); + } + } else { + freeze_network_on_switch = true; + master_printf("The network will be frozen on switch to surrogate (default behaviour)\n"); + } + + network_director_configure(sc, network_director_enabled ? &switch_network_at: NULL, freeze_network_on_switch); + + //surr_config.director.switch_surrogate(); + if (DEBUG_DIRECTOR && g_tw_mynode == 0) { + fprintf(stderr, "Simulation starting on network %s mode\n", sc->model.is_surrogate_on() ? "surrogate" : "high-fidelity"); + } + + return freeze_network_on_switch; +} + +static int load_and_validate_int_param(const char* param_name, int default_value) { + char param_str[MAX_NAME_LENGTH]; + param_str[0] = '\0'; + int const rc = configuration_get_value(&config, "APPLICATION_SURROGATE", param_name, NULL, param_str, MAX_NAME_LENGTH); + int value = (rc > 0) ? atoi(param_str) : default_value; + + if (value <= 0) { + tw_warning(TW_LOC, "%s must be a positive integer, got %d. Using default value %d.", param_name, value, default_value); + value = default_value; + } + + return value; +} + +static double load_and_validate_double_param(const char* param_name, double default_value) { + char param_str[MAX_NAME_LENGTH]; + param_str[0] = '\0'; + int const rc = configuration_get_value(&config, "APPLICATION_SURROGATE", param_name, NULL, param_str, MAX_NAME_LENGTH); + double value = (rc > 0) ? strtod(param_str, NULL) : default_value; + + if (value <= 0) { + tw_warning(TW_LOC, "%s must be a positive integer, got %d. Using default value %d.", param_name, value, default_value); + value = default_value; + } + + return value; +} + +static struct application_director_config load_director_config(void) { + int const default_gvt = 100; + double const default_ns = 1.0e6; // 1ms + + enum { + MODE_NOT_SET, + MODE_EVERY_N_GVT, + MODE_EVERY_N_NANOSECONDS, + MODE_UNKNOWN + } mode; + + char director_mode[MAX_NAME_LENGTH]; + director_mode[0] = '\0'; + int const rc_mode = configuration_get_value(&config, "APPLICATION_SURROGATE", "director_mode", NULL, director_mode, MAX_NAME_LENGTH); + + if (rc_mode == 0) { + mode = MODE_NOT_SET; + } else if (strcmp(director_mode, "every-n-gvt") == 0) { + mode = MODE_EVERY_N_GVT; + } else if (strcmp(director_mode, "every-n-nanoseconds") == 0) { + mode = MODE_EVERY_N_NANOSECONDS; + } else { + mode = MODE_UNKNOWN; + } + + int every_n_gvt = load_and_validate_int_param("director_num_gvt", default_gvt); + double every_n_ns = load_and_validate_double_param("director_num_ns", default_ns); + + bool const is_sequential = (g_tw_synchronization_protocol == SEQUENTIAL || + g_tw_synchronization_protocol == SEQUENTIAL_ROLLBACK_CHECK); + + struct application_director_config config; + switch (mode) { + case MODE_EVERY_N_GVT: + if (is_sequential) { + tw_warning(TW_LOC, "Cannot use 'every-n-gvt' mode in sequential simulation. Forcing 'every-n-nanoseconds' mode."); + config.option = APP_DIRECTOR_OPTS_call_every_ns; + config.call_every_ns = every_n_ns; + } else { + config.option = APP_DIRECTOR_OPTS_every_n_gvt; + config.every_n_gvt = every_n_gvt; + } + break; + + case MODE_EVERY_N_NANOSECONDS: + config.option = APP_DIRECTOR_OPTS_call_every_ns; + config.call_every_ns = every_n_ns; + break; + + case MODE_UNKNOWN: + tw_warning(TW_LOC, "Unknown director_mode '%s'. Using default mode 'every-n-nanoseconds'.", director_mode); + config.option = APP_DIRECTOR_OPTS_call_every_ns; + config.call_every_ns = every_n_ns; + break; + + case MODE_NOT_SET: + default: + tw_warning(TW_LOC, "director_mode not set. Using default mode 'every-n-nanoseconds'."); + config.option = APP_DIRECTOR_OPTS_call_every_ns; + config.call_every_ns = every_n_ns; + break; + } + + config.use_network_surrogate = is_network_surrogate_configured; + + return config; +} + +void application_surrogate_configure( + int num_terminals_in_pe, + int num_apps, + struct app_iteration_predictor ** iter_pred +) { + char num_iters_str[MAX_NAME_LENGTH]; + num_iters_str[0] = '\0'; + int const rc = configuration_get_value(&config, "APPLICATION_SURROGATE", "num_iters_to_collect", NULL, num_iters_str, MAX_NAME_LENGTH); + int const num_of_iters_to_feed = (rc > 0) ? atoi(num_iters_str) : 5; // default to 5 if not specified + + struct avg_app_config predictor_config = { + .num_apps = num_apps, + .num_nodes_in_pe = num_terminals_in_pe, + .num_iters_to_collect = num_of_iters_to_feed, + }; + + struct application_director_config app_dir_config = load_director_config(); + + current_iter_predictor = avg_app_iteration_predictor(&predictor_config); + application_director_configure(&app_dir_config, ¤t_iter_predictor); + *iter_pred = ¤t_iter_predictor; + is_app_surrogate_configured = true; + + // Printing configuration summary + master_printf("\nApplication surrogate configuration:\n"); + master_printf(" Predictor - num_apps: %d, num_iters_to_collect: %d\n", + predictor_config.num_apps, predictor_config.num_iters_to_collect); + + if (app_dir_config.option == APP_DIRECTOR_OPTS_every_n_gvt) { + master_printf(" Director - mode: every-n-gvt, every_n_gvt: %d\n", app_dir_config.every_n_gvt); + } else { + master_printf(" Director - mode: every-n-nanoseconds, call_every_ns: %e\n", app_dir_config.call_every_ns); + } + if (network_director_enabled) { + master_printf(" The network director has been replaced by the application director. The application director will trigger the network surrogate on and off.\n"); + } + master_printf("\n"); +} + +void surrogates_finalize(void) { + // TODO (helq): check that we are in fact still in surrogate (either network or application) + if (surrogate_time_last > 0) { // we likely didn't transitioned back from surrogate mode + time_in_surrogate += tw_clock_read() - surrogate_time_last; + } + if (is_network_surrogate_configured) { + network_director_finalize(); + } + if (is_app_surrogate_configured) { + application_director_finalize(); + free_avg_app_iteration_predictor(); + } +} +// === END OF All things Surrogate Configuration diff --git a/src/surrogate/network-surrogate.c b/src/surrogate/network-surrogate.c new file mode 100644 index 00000000..c2278583 --- /dev/null +++ b/src/surrogate/network-surrogate.c @@ -0,0 +1,464 @@ +#include +#include +#include +#include +#include + +#define master_printf(cond, ...) if (cond && g_tw_mynode == 0) { printf(__VA_ARGS__); } + +static bool is_network_surrogate_configured = false; +static struct switch_at_struct switch_network_at = {0}; +static struct network_surrogate_config net_surr_config = {0}; +static bool freeze_network_on_switch = false; +static bool network_director_enabled = false; + +// === Frozen events system for separate queue approach +static tw_event *frozen_events_head = NULL; // Head of frozen events linked list +static double frozen_events_switch_time = 0.0; // Time when we switched to surrogate mode + +// === Director functionality +// + +static struct lp_types_switch const * get_type_switch(char const * const name) { + for (size_t i = 0; i < net_surr_config.n_lp_types; i++) { + //printf("THIS %s and %s\n", surr_config.lp_types[i].lpname, name); + if (strcmp(net_surr_config.lp_types[i].lpname, name) == 0) { + return &net_surr_config.lp_types[i]; + } + } + return NULL; +} + + +static void freeze_events_to_separate_queue_pe(tw_pe * pe) { +#ifdef USE_RAND_TIEBREAKER + tw_event_sig gvt_sig = pe->GVT_sig; + tw_stime gvt = gvt_sig.recv_ts; +#else + tw_stime gvt = pe->GVT; +#endif + + // Store the time when we switch to surrogate mode + frozen_events_switch_time = gvt; + + tw_event * next_event = tw_pq_dequeue(pe->pq); + + // If there aren't any events left to process, then this PE has nothing to do + if (next_event == NULL) { + return; + } + + tw_event * dequed_events = NULL; // Linked list of non-frozen events, to be placed back in the queue + int events_processed = 0; // Total events processed from queue + int events_enqueued = 0; // Events put back in queue + int events_frozen = 0; // Events moved to frozen queue + int events_deleted = 0; // Events deleted + + // Traversing all events stored in the queue + while (next_event) { + events_processed++; + + // Filtering events to freeze + assert(next_event->prev == NULL); +#ifdef USE_RAND_TIEBREAKER + assert(tw_event_sig_compare_ptr(&next_event->sig, &gvt_sig) >= 0); +#else + assert(next_event->recv_ts >= gvt); +#endif + if (next_event->event_id && next_event->state.remote) { + tw_hash_remove(pe->hash_t, next_event, next_event->send_pe); + } + + // finding out lp type + char const * lp_type_name; + int rep_id, offset; // unused + codes_mapping_get_lp_info2(next_event->dest_lpid, NULL, &lp_type_name, NULL, &rep_id, &offset); + bool const is_lp_modelnet = strncmp("modelnet_", lp_type_name, 9) == 0; + struct lp_types_switch const * const lp_type_switch = get_type_switch(lp_type_name); + + // "Processing" event + if (lp_type_switch && lp_type_switch->check_event_in_queue) { + if (is_lp_modelnet) { + model_net_method_call_inner(next_event->dest_lp, (void (*) (void *, tw_lp *, void *))lp_type_switch->check_event_in_queue, next_event); + } else { + lp_type_switch->check_event_in_queue(next_event->dest_lp->cur_state, next_event->dest_lp, next_event); + } + } + + bool deleted = false; + bool frozen = false; + + // Check if event should be frozen (moved to separate queue) + if (lp_type_switch && lp_type_switch->should_event_be_frozen + && lp_type_switch->should_event_be_frozen(next_event->dest_lp, next_event)) { + // Add to frozen events linked list (no timestamp manipulation here) + next_event->prev = frozen_events_head; + frozen_events_head = next_event; + frozen = true; + events_frozen++; + // deleting event if we need to + } else if (lp_type_switch && lp_type_switch->should_event_be_deleted + && lp_type_switch->should_event_be_deleted(next_event->dest_lp, next_event)) { + tw_event_free(pe, next_event); + deleted = true; + events_deleted++; + } + + // store event in dequed_events to inject immediately back to the queue + if (!deleted && !frozen) { + next_event->prev = dequed_events; + dequed_events = next_event; + } + + next_event = tw_pq_dequeue(pe->pq); + } + + // Reinjecting non-frozen events into simulation + while (dequed_events) { + tw_event * const prev_event = dequed_events; + dequed_events = dequed_events->prev; + prev_event->prev = NULL; + tw_pq_enqueue(pe->pq, prev_event); + + if (prev_event->event_id && prev_event->state.remote) { + tw_hash_insert(pe->hash_t, prev_event, prev_event->send_pe); + } + + events_enqueued++; + } + + if (DEBUG_DIRECTOR > 0) { + printf("PE %lu: Processed %d events (%d enqueued, %d frozen, %d deleted)\n", + g_tw_mynode, events_processed, events_enqueued, events_frozen, events_deleted); + } + + // Sanity check: processed = enqueued + frozen + deleted + assert(events_processed == events_enqueued + events_frozen + events_deleted); +} + +static void unfreeze_events_from_separate_queue_pe(tw_pe * pe) { +#ifdef USE_RAND_TIEBREAKER + tw_stime current_gvt = pe->GVT_sig.recv_ts; +#else + tw_stime current_gvt = pe->GVT; +#endif + + // Calculate offset to adjust timestamps: current_gvt - switch_time + double time_offset = current_gvt - frozen_events_switch_time; + + int events_restored = 0; + + // Traverse the frozen events linked list and restore them to the main queue + while (frozen_events_head) { + tw_event * event_to_restore = frozen_events_head; + frozen_events_head = frozen_events_head->prev; + event_to_restore->prev = NULL; + + // Adjust timestamp: original_time + time_spent_in_surrogate +#ifdef USE_RAND_TIEBREAKER + assert(event_to_restore->recv_ts == event_to_restore->sig.recv_ts); + event_to_restore->recv_ts += time_offset; + event_to_restore->sig.recv_ts = event_to_restore->recv_ts; +#else + event_to_restore->recv_ts += time_offset; +#endif + + // Re-enqueue the event + tw_pq_enqueue(pe->pq, event_to_restore); + + // Re-add to hash table if it was a remote event + if (event_to_restore->event_id && event_to_restore->state.remote) { + tw_hash_insert(pe->hash_t, event_to_restore, event_to_restore->send_pe); + } + + events_restored++; + } + + if (DEBUG_DIRECTOR > 0 && events_restored > 0) { + printf("PE %lu: Restored %d frozen events with time offset %.6f\n", + g_tw_mynode, events_restored, time_offset); + } + + // Reset frozen events state + frozen_events_switch_time = 0.0; +} + + +// Switching from a (vanilla) high-def simulation to surrogate mode +// consists of: +// - Cancel all events that have to be cancelled and clean everything +// - Looking at all events in the PE, "freezing" those in the network model +// and letting the workload events be processed further +// - Going through every LP and calling their respective functions +static void events_high_def_to_surrogate_switch(tw_pe * pe) { +#ifdef USE_RAND_TIEBREAKER + tw_event_sig gvt_sig = pe->GVT_sig; +#else + tw_stime gvt = pe->GVT; +#endif + if (g_tw_synchronization_protocol != OPTIMISTIC && g_tw_synchronization_protocol != SEQUENTIAL && g_tw_synchronization_protocol != SEQUENTIAL_ROLLBACK_CHECK) { + tw_error(TW_LOC, "Sorry, sending packets to the future hasn't been implement in this mode"); + } + + master_printf(DEBUG_DIRECTOR > 1, "PE %lu - AVL size %d (before freezing events)\n", g_tw_mynode, pe->avl_tree_size); + freeze_events_to_separate_queue_pe(pe); + master_printf(DEBUG_DIRECTOR > 1, "PE %lu - AVL size %d (after freezing events to separate queue)\n", g_tw_mynode, pe->avl_tree_size); + + // Going through all LPs in PE and running their specific functions + for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) { + tw_lp * const lp = g_tw_lp[local_lpid]; + assert(local_lpid == lp->id); + + // Modifying current time for LPs (technically, KPs) so that they + // coincide with current GVT (the current GVT often does not + // correspond to the (last) time stored in KPs). +#ifdef USE_RAND_TIEBREAKER + lp->kp->last_sig = gvt_sig; +#else + lp->kp->last_time = gvt; +#endif + + char const * lp_type_name; + int rep_id, offset; // unused + codes_mapping_get_lp_info2(lp->gid, NULL, &lp_type_name, NULL, &rep_id, &offset); + bool const is_lp_modelnet = strncmp("modelnet_", lp_type_name, 9) == 0; + struct lp_types_switch const * const lp_type_switch = get_type_switch(lp_type_name); + + pe->cur_event = pe->abort_event; + pe->cur_event->caused_by_me = NULL; +#ifdef USE_RAND_TIEBREAKER + pe->cur_event->sig = pe->GVT_sig; +#else + pe->cur_event->recv_ts = pe->GVT; +#endif + + if (lp_type_switch) { + if (lp_type_switch->trigger_idle_modelnet) { + assert(is_lp_modelnet); + model_net_method_switch_to_surrogate_lp(lp); + } + if (lp_type_switch->highdef_to_surrogate) { + if (is_lp_modelnet) { + model_net_method_call_inner(lp, (void (*) (void *, tw_lp *, void *))lp_type_switch->highdef_to_surrogate, NULL); + } else { + lp_type_switch->highdef_to_surrogate(lp->cur_state, lp, NULL); + } + } + } + } + + // This will force a global update on all the new remote events (instead of waiting until the next GVT cycle to update events to process) + if (g_tw_synchronization_protocol == OPTIMISTIC) { + tw_scheduler_rollback_and_cancel_events_pe(pe); + } + +} + + +static void events_surrogate_to_high_def_switch(tw_pe * pe) { +#ifdef USE_RAND_TIEBREAKER + tw_event_sig gvt_sig = pe->GVT_sig; +#else + tw_stime gvt = pe->GVT; +#endif + + // Restore frozen events back to the main queue with timestamp adjustment + master_printf(DEBUG_DIRECTOR > 1, "PE %lu - AVL size %d (before injecting events into event queue again)\n", g_tw_mynode, pe->avl_tree_size); + unfreeze_events_from_separate_queue_pe(pe); + master_printf(DEBUG_DIRECTOR > 1, "PE %lu - AVL size %d (after defreezing events from separate queue)\n", g_tw_mynode, pe->avl_tree_size); + + // Going through all LPs in PE and running their specific functions + for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) { + tw_lp * const lp = g_tw_lp[local_lpid]; + assert(local_lpid == lp->id); + + // Modifying current time for LPs (technically, KPs) so that they + // coincide with current GVT (the current GVT often does not + // correspond to the (last) time stored in KPs). +#ifdef USE_RAND_TIEBREAKER + tw_event_sig const previous_sig = lp->kp->last_sig; + lp->kp->last_sig = gvt_sig; +#else + tw_stime const previous_time = lp->kp->last_time; + lp->kp->last_time = gvt; +#endif + + char const * lp_type_name; + int rep_id, offset; // unused + codes_mapping_get_lp_info2(lp->gid, NULL, &lp_type_name, NULL, &rep_id, &offset); + bool const is_lp_modelnet = strncmp("modelnet_", lp_type_name, 9) == 0; + struct lp_types_switch const * const lp_type_switch = get_type_switch(lp_type_name); + + pe->cur_event = pe->abort_event; + pe->cur_event->caused_by_me = NULL; +#ifdef USE_RAND_TIEBREAKER + pe->cur_event->sig = pe->GVT_sig; +#else + pe->cur_event->recv_ts = pe->GVT; +#endif + + if (lp_type_switch) { + if (lp_type_switch->trigger_idle_modelnet) { + assert(is_lp_modelnet); + model_net_method_switch_to_highdef_lp(lp); + } + if (lp_type_switch->surrogate_to_highdef) { + if (is_lp_modelnet) { + model_net_method_call_inner(lp, (void (*) (void *, tw_lp *, void *))lp_type_switch->surrogate_to_highdef, NULL); + } else { + lp_type_switch->surrogate_to_highdef(lp->cur_state, lp, NULL); + } + } + if (lp_type_switch->reset_predictor) { + if (is_lp_modelnet) { + model_net_method_call_inner(lp, (void (*) (void *, tw_lp *, void *))lp_type_switch->reset_predictor, NULL); + } else { + lp_type_switch->reset_predictor(lp->cur_state, lp, NULL); + } + } + } + +#ifdef USE_RAND_TIEBREAKER + lp->kp->last_sig = previous_sig; +#else + lp->kp->last_time = previous_time; +#endif + } +} + + +static void switch_model(tw_pe * pe, bool is_queue_empty) { + // Rollback if in optimistic mode and the simulation has events yet to process (globally) + if (g_tw_synchronization_protocol == OPTIMISTIC && !is_queue_empty) { + tw_scheduler_rollback_and_cancel_events_pe(pe); + } + master_printf(DEBUG_DIRECTOR, "Switching to network %s\n", net_surr_config.model.is_surrogate_on() ? "high-fidelity": "surrogate"); + + bool const is_surrogate_off = !net_surr_config.model.is_surrogate_on(); + if (is_surrogate_off && is_queue_empty) { + master_printf(true, "No need to switch to surrogate when the simulation has no events to process\n"); + return; + } + net_surr_config.model.switch_surrogate(); + + // "Freezing" network events and activating LP's switch functions + if (freeze_network_on_switch) { + if (is_surrogate_off) { + model_net_method_switch_to_surrogate(); + events_high_def_to_surrogate_switch(pe); + } else { + model_net_method_switch_to_highdef(); + events_surrogate_to_high_def_switch(pe); + } + } +} + + +void network_director(tw_pe * pe, bool is_queue_empty) { + assert(is_network_surrogate_configured); + assert(network_director_enabled); + +#ifdef USE_RAND_TIEBREAKER + tw_stime gvt = pe->GVT_sig.recv_ts; +#else + tw_stime gvt = pe->GVT; +#endif + + static int i = 0; + if (g_tw_mynode == 0) { + if (DEBUG_DIRECTOR == 2) { + printf("."); + fflush(stdout); + } + if (DEBUG_DIRECTOR == 3) { + printf("GVT %d at %f in %s\n", i++, gvt, + net_surr_config.model.is_surrogate_on() ? "surrogate-mode" : "high-definition"); + } + } + + // Only in sequential mode pe->GVT does not carry the current gvt, while it does in conservative and optimistic +#ifdef USE_RAND_TIEBREAKER + assert((g_tw_synchronization_protocol == SEQUENTIAL) || (g_tw_synchronization_protocol == SEQUENTIAL_ROLLBACK_CHECK) || (pe->GVT_sig.recv_ts == gvt)); +#else + assert((g_tw_synchronization_protocol == SEQUENTIAL) || (g_tw_synchronization_protocol == SEQUENTIAL_ROLLBACK_CHECK) || (pe->GVT == gvt)); +#endif + + // Do not process if the simulation ended + if (gvt >= g_tw_ts_end) { + return; + } + + // ---- Past this means that we are in fact switching ---- + bool const surrogate_state_pre_switch = net_surr_config.model.is_surrogate_on(); + + // Asking the director/model to switch + if (DEBUG_DIRECTOR && g_tw_mynode == 0) { + if (DEBUG_DIRECTOR == 2) { + printf("\n"); + } + printf("Switching network at %f\n", gvt); + } + + double const start = tw_clock_read(); + switch_model(pe, is_queue_empty); + double const end = tw_clock_read(); + surrogate_switching_time += end - start; + + // Setting trigger for next switch + if (++switch_network_at.current_i < switch_network_at.total) { + double next_switch = switch_network_at.time_stampts[switch_network_at.current_i]; + tw_trigger_gvt_hook_at(next_switch); + } + + bool const is_surrogate_on = net_surr_config.model.is_surrogate_on(); + if (is_surrogate_on == surrogate_state_pre_switch) { + // The surrogate was never switched! + return; + } + + master_printf(DEBUG_DIRECTOR == 1, "Network switch completed!\n"); + if (DEBUG_DIRECTOR > 1) { + printf("PE %lu: Switch completed!\n", g_tw_mynode); + } + + // Determining time in surrogate + if (is_surrogate_on) { + // Start tracking time spent in surrogate mode + surrogate_time_last = end; + } else { + // We are done tracking time spent in surrogate mode + time_in_surrogate += start - surrogate_time_last; + surrogate_time_last = 0.0; + } +} + +void network_director_configure(struct network_surrogate_config * sc, struct switch_at_struct * switch_network_at_, bool fnos) { + is_network_surrogate_configured = true; + // Injecting into ROSS the function to be called at GVT + if (switch_network_at_) { + network_director_enabled = true; + g_tw_gvt_hook = network_director; + switch_network_at = *switch_network_at_; + tw_trigger_gvt_hook_at(switch_network_at.time_stampts[0]); + } + net_surr_config = *sc; + freeze_network_on_switch = fnos; +} + +void network_director_finalize(void) { + if (network_director_enabled) { + free(switch_network_at.time_stampts); + } +} + +// === Function for application director to use switch to surrogate machinery +void surrogate_switch_network_model(tw_pe * pe, bool is_queue_empty) { + // Simply expose the existing switch_model function for use by application director + double const start = tw_clock_read(); + switch_model(pe, is_queue_empty); + double const end = tw_clock_read(); + surrogate_switching_time += end - start; +} +// +// === END OF Director functionality +// vim: set tabstop=4 shiftwidth=4 expandtab : diff --git a/src/surrogate/packet-latency-predictor/average.c b/src/surrogate/packet-latency-predictor/average.c new file mode 100644 index 00000000..4b14aedb --- /dev/null +++ b/src/surrogate/packet-latency-predictor/average.c @@ -0,0 +1,130 @@ +#include +#include + +double ignore_until = 0; +static int num_terminals = 0; + + +// === Average packet latency functionality +// +struct aggregated_latency_one_terminal { + double sum_latency; + unsigned int total_msgs; +}; + +struct latency_surrogate { + struct aggregated_latency_one_terminal aggregated_next_packet_delay; + struct aggregated_latency_one_terminal aggregated_latency_for_all; + struct aggregated_latency_one_terminal aggregated_latency[]; +}; + +static void init_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal) { + (void) lp; + (void) src_terminal; + assert(data->aggregated_latency_for_all.sum_latency == 0); + assert(data->aggregated_latency_for_all.total_msgs == 0); + assert(data->aggregated_latency[0].sum_latency == 0); + assert(data->aggregated_latency[0].total_msgs == 0); + assert(data->aggregated_next_packet_delay.total_msgs == 0); + assert(data->aggregated_next_packet_delay.sum_latency == 0); +} + +static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * start, struct packet_end const * end) { + (void) lp; + (void) src_terminal; + + if (end->travel_end_time < ignore_until) { + return; + } + + unsigned int const dest_terminal = start->dfdally_dest_terminal_id; + double const latency = end->travel_end_time - start->travel_start_time; + assert(dest_terminal < num_terminals); + assert(end->travel_end_time > start->travel_start_time); + + // For average latency per terminal + data->aggregated_latency[dest_terminal].sum_latency += latency; + data->aggregated_latency[dest_terminal].total_msgs++; + + // For average total latency (used in case there is no data for a specific node) + data->aggregated_latency_for_all.sum_latency += latency; + data->aggregated_latency_for_all.total_msgs++; + + // We ignore the delay if there are no more packets in the queue + if (start->is_there_another_pckt_in_queue) { + data->aggregated_next_packet_delay.sum_latency += end->next_packet_delay; + data->aggregated_next_packet_delay.total_msgs ++; + } +} + +static struct packet_end predict_latency(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * packet_dest) { + (void) lp; + + unsigned int const dest_terminal = packet_dest->dfdally_dest_terminal_id; + assert(dest_terminal < num_terminals); + + unsigned int const total_total_datapoints = data->aggregated_latency_for_all.total_msgs; + if (total_total_datapoints == 0) { + // otherwise, we have no data to approximate the latency + tw_error(TW_LOC, "Terminal %u doesn't have any packet delay information available to predict future packet latency!\n", src_terminal); + return (struct packet_end) { + .travel_end_time = -1.0, + .next_packet_delay = -1.0, + }; + } + + // In case we have any data to determine the average for a specific terminal + unsigned int const total_datapoints_for_term = data->aggregated_latency[dest_terminal].total_msgs; + double latency = -1.0; + if (total_datapoints_for_term > 0) { + latency = data->aggregated_latency[dest_terminal].sum_latency / total_datapoints_for_term; + } else { + // If no information for that terminal exists, use average from all message + latency = data->aggregated_latency_for_all.sum_latency / total_total_datapoints; + } + assert(latency >= 0); + + // TODO (Elkin): 10 is an arbitrary small value, but it should be nic_ts as implemented in `packet_getenerate` in dragonfly-dally + double const next_packet_delay = data->aggregated_next_packet_delay.total_msgs == 0 ? 10 : + data->aggregated_next_packet_delay.sum_latency / data->aggregated_next_packet_delay.total_msgs; + return (struct packet_end) { + .travel_end_time = packet_dest->travel_start_time + latency, + .next_packet_delay = next_packet_delay, + }; +} + +static void predict_latency_rc(struct latency_surrogate * data, tw_lp * lp) { + (void) data; + (void) lp; +} + +static void reset_pred(struct latency_surrogate * data, tw_lp * lp) { + (void) lp; + + data->aggregated_next_packet_delay.sum_latency = 0; + data->aggregated_next_packet_delay.total_msgs = 0; + + data->aggregated_latency_for_all.sum_latency = 0; + data->aggregated_latency_for_all.total_msgs = 0; + + for (int i = 0; i < num_terminals; i++) { + data->aggregated_latency[i].sum_latency = 0; + data->aggregated_latency[i].total_msgs = 0; + } +} + + +struct packet_latency_predictor average_latency_predictor(int num_terminals_) { + assert(num_terminals_ >= 0); + num_terminals = num_terminals_; + return (struct packet_latency_predictor) { + .init = (init_pred_lat_f) init_pred, + .reset = (reset_pred_lat_f) reset_pred, + .feed = (feed_pred_lat_f) feed_pred, + .predict = (predict_pred_lat_f) predict_latency, + .predict_rc = (predict_pred_lat_rc_f) predict_latency_rc, + .predictor_data_sz = sizeof(struct latency_surrogate) + num_terminals * sizeof(struct aggregated_latency_one_terminal) + }; +} +// +// === END OF Average packet latency functionality diff --git a/src/surrogate/packet-latency-predictor/common.c b/src/surrogate/packet-latency-predictor/common.c new file mode 100644 index 00000000..b78d2c31 --- /dev/null +++ b/src/surrogate/packet-latency-predictor/common.c @@ -0,0 +1 @@ +#include diff --git a/src/surrogate/packet-latency-predictor/torch-jit.C b/src/surrogate/packet-latency-predictor/torch-jit.C new file mode 100644 index 00000000..e2c1384c --- /dev/null +++ b/src/surrogate/packet-latency-predictor/torch-jit.C @@ -0,0 +1,120 @@ +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +static torch::jit::Module packet_latency_model; + + +inline void assert_correct_dims(at::Tensor * t) { + int const dims = t->ndimension(); + + for (int i = 0; i < dims-1; i++) { + assert(at::size(*t, i) == 1); + } + assert(at::size(*t, dims - 1) == 2); +} + + +void surrogate_torch_init(char const * dir) { + std::cout << "Loading Torch-JIT model\n"; + try { + // Deserialize the ScriptModule from a file + packet_latency_model = torch::jit::load(dir); + } + catch (const c10::Error& e) { + tw_error(TW_LOC, "Error loading Torch-JIT model"); + } + + // Configuring to run on a single thread + at::set_num_threads(1); + + // === Checking consistency of model with dummy input + if (packet_latency_model.is_training()) { + std::cerr << "The Torch-JIT model was saved before running .eval(). " + "The output from the model will be as if it was in training mode, " + "meaning, it might be faulty." + << std::endl; + } + + long int data_input[] = {0, 0, 0, 0}; + size_t const n_input = sizeof(data_input) / sizeof(long int); + + std::vector inputs; + torch::NoGradGuard no_grad; + inputs.emplace_back(torch::from_blob(data_input, {1, (int) n_input}, at::kLong)); + + // Predicting value + at::Tensor output = packet_latency_model.forward(inputs).toTensor(); + assert_correct_dims(&output); + // === End of check + std::cout << "Torch-JIT model loaded successfully\n"; +} + + +static struct packet_end surrogate_torch_predict(void *, tw_lp * lp, unsigned int src_terminal, struct packet_start const * packet_dest) { + //auto t_start = std::chrono::high_resolution_clock::now(); + + // Create a vector of inputs. + long int data_input[] = { + src_terminal, + packet_dest->dfdally_dest_terminal_id, + packet_dest->packet_size, + packet_dest->is_there_another_pckt_in_queue + }; + size_t n_input = sizeof(data_input) / sizeof(long int); + + std::vector inputs; + inputs.emplace_back(torch::from_blob(data_input, {1, (int) n_input}, at::kLong)); + + at::Tensor output = packet_latency_model.forward(inputs).toTensor(); + //assert_correct_dims(&output); + + auto *out_data = output.data_ptr(); + return (struct packet_end) { + .travel_end_time = packet_dest->travel_start_time + (out_data[0] > 0 ? out_data[0] : 10), + .next_packet_delay = out_data[1] > 0 ? out_data[1] : 200, + }; + + //auto t_end = std::chrono::high_resolution_clock::now(); + //double total = std::chrono::duration(t_end-t_start).count(); +} + + +// Dummies to use when no actual data is fed +static void init_pred_dummy(void * data, tw_lp * lp, unsigned int src_terminal) { + (void) data; + (void) lp; + (void) src_terminal; +} + + +static void feed_pred_dummy(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * start, struct packet_end const * end) { + (void) data; + (void) lp; + (void) src_terminal; + (void) start; + (void) end; +} + + +static void predict_latency_rc_dummy(struct latency_surrogate * data, tw_lp * lp) { + (void) data; + (void) lp; +} + + +struct packet_latency_predictor torch_latency_predictor = { + .init = (init_pred_f) init_pred_dummy, + .feed = (feed_pred_f) feed_pred_dummy, + .predict = (predict_pred_f) surrogate_torch_predict, + .predict_rc = (predict_pred_rc_f) predict_latency_rc_dummy, + .predictor_data_sz = 0 +}; diff --git a/src/surrogate/zmqml/Makefile b/src/surrogate/zmqml/Makefile new file mode 100644 index 00000000..4c28ed54 --- /dev/null +++ b/src/surrogate/zmqml/Makefile @@ -0,0 +1,27 @@ + +CXX=g++ +CXXFLAGS=-g -Wall -O2 -std=c++11 $(shell pkg-config --cflags libzmq) -Wdeprecated-declarations +LDFLAGS=$(shell pkg-config --libs libzmq) -lm +TARGETS=libzmqmlrequester.so demozmqmlrequester + +all: $(TARGETS) + +libzmqmlrequester.so: zmqmlrequester.o + $(CXX) -shared -o $@ $^ + +zmqmlrequester.o: zmqmlrequester.cpp zmqmlrequester.h + $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ + +demozmqmlrequester: demozmqmlrequester.cpp libzmqmlrequester.so + $(CXX) $(CXXFLAGS) -o $@ $< $(LDFLAGS) -L./ -lzmqmlrequester + +# doxygen +# sphinx + +clean: + rm -f $(TARGETS) + rm -f *.o + rm -f tmptestsend.dat + +distclean: clean + rm -f *~ diff --git a/src/surrogate/zmqml/NOTES.txt b/src/surrogate/zmqml/NOTES.txt new file mode 100644 index 00000000..5e82b2eb --- /dev/null +++ b/src/surrogate/zmqml/NOTES.txt @@ -0,0 +1,15 @@ + +Please copy packets-delay.csv to src/surrogate/zmqml/model/ before +starting the demo. + +You need to open two terminals for this demo. + +In the first terminal, +$ ./zmqmlserver.py + +In the second terminal, +$ ./runcppdemo.sh + + + + diff --git a/src/surrogate/zmqml/demozmqmlrequester.cpp b/src/surrogate/zmqml/demozmqmlrequester.cpp new file mode 100644 index 00000000..95e866a4 --- /dev/null +++ b/src/surrogate/zmqml/demozmqmlrequester.cpp @@ -0,0 +1,135 @@ +#include "zmqmlrequester.h" + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +static void test_blockingcall() { + cout << "* test_blockingcall" << endl; + + vector args = {"sleep", "1"}; + vector result = zmqml_request("execute", args); + + cout << "status:" << result[0] << endl; +} + +static void test_nonblockingcall() { + cout << "* test_nonblockingcall" << endl; + + vector args = {"sleep", "3"}; + vector ret = zmqml_request("launch", args); + + string status = ret[0]; + string id = ret[1]; + cout << "status=" << status << " id=" << id << endl; + + int cnt = 0; + while (true) { + ret = zmqml_request("query", {id}); + status = ret[0]; + cout << "status=" << status << endl; + if (status == "done") { + break; + } + this_thread::sleep_for(chrono::milliseconds(500)); + cnt++; + } + cout << "done cnt=" << cnt << endl; +} + +static void test_send_binary() { + cout << "* test_send_binary" << endl; + + string data; + ifstream file("model/ml-model.pt", ios::binary); + + if (file) { + file.seekg(0, ios::end); + data.resize(file.tellg()); + file.seekg(0, ios::beg); + file.read(&data[0], data.size()); + file.close(); + } else { + cerr << "Failed to open the file." << endl; + return; + } + + vector ret = zmqml_request("send", + {"tmptestsend.dat"}, // dest filename + data); + string status = ret[0]; + cout << "status=" << status << endl; +} + +static void measure_latency() { + cout << "* measure_latency" << endl; + + vector tss; + + int n = 1000; + for (int i = 0; i < n; ++i) { + auto start_time = chrono::steady_clock::now(); + vector result = zmqml_request("nothing"); + auto end_time = chrono::steady_clock::now(); + auto duration = chrono::duration(end_time - start_time).count(); + tss.push_back(duration); + } + double sum = 0; + for (double ts : tss) sum += ts; + double mean = sum / tss.size(); + double sum_sq_diff = 0; + for (double ts : tss) sum_sq_diff += (ts - mean) * (ts - mean); + double std_dev = sqrt(sum_sq_diff / tss.size()); + cout << "zmqcmd latency: mean = " << mean << ", std deviation = " << std_dev << endl; +} + + +void test_mlpacketdelay_training() { + std::cout << "* test_mlpacketdelay_training" << std::endl; + + vector args = {"mlpacketdelay_training", + "--method", "MLP", "--epoch", "1", + "--input-file", "model/packets-delay.csv", + "--model-path", "ml-model.pt"}; + + vector ret = zmqml_request("launch", args); + + string status = ret[0]; + string id = ret[1]; + cout << "status=" << status << " id=" << id << endl; + + int cnt = 0; + while (true) { + ret = zmqml_request("query", {id}); + status = ret[0]; + cout << "status=" << status << endl; + if (status == "done") { + break; + } + this_thread::sleep_for(chrono::milliseconds(500)); + cnt++; + } + cout << "done cnt=" << cnt << endl; +} + + + +int main () { + if(0) { + test_send_binary(); + test_blockingcall(); + test_nonblockingcall(); + measure_latency(); + } + + test_mlpacketdelay_training(); + + zmqml_request("exit"); + return 0; +} diff --git a/src/surrogate/zmqml/model/ml-model.pt b/src/surrogate/zmqml/model/ml-model.pt new file mode 100644 index 00000000..20080927 Binary files /dev/null and b/src/surrogate/zmqml/model/ml-model.pt differ diff --git a/src/surrogate/zmqml/model/mlpacketdelay.py b/src/surrogate/zmqml/model/mlpacketdelay.py new file mode 100644 index 00000000..4cab7c46 --- /dev/null +++ b/src/surrogate/zmqml/model/mlpacketdelay.py @@ -0,0 +1,258 @@ +import argparse +import os +import random +import time +import warnings +from itertools import product +from pathlib import Path + +import pandas as pd +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from sklearn.preprocessing import MinMaxScaler + + +warnings.filterwarnings("ignore") + +class MLP(nn.Module): + def __init__(self, total_terminals, max_packet_size, h_dim, out_dim, norm_max_list, + norm_min_list, channels=1): + super().__init__() + self.norm_max_list = norm_max_list + self.norm_min_list = norm_min_list + self.total_terminals = total_terminals + self.max_packet_size = max_packet_size + self.channels = channels + self.weights = nn.Parameter(torch.Tensor(channels, total_terminals, total_terminals)) + self.reg = nn.Sequential( + nn.Linear(channels + 2, h_dim), + nn.ReLU(), + nn.Linear(h_dim, out_dim), + ) + + nn.init.uniform_(self.weights, 0, 1) + + def forward(self, input_seq): + tt = self.total_terminals + input_src_terminal = F.one_hot(input_seq[:, 0], num_classes=tt) + input_src_terminal = input_src_terminal.reshape((-1, 1, 1, tt)).float() + input_dest_terminal = F.one_hot(input_seq[:, 1], num_classes=tt) + input_dest_terminal = input_dest_terminal.reshape((-1, 1, tt, 1)).float() + + # assuming the input was a single row, it could be written as matrix + # multiplication as + # combined = input_src_terminal @ self.weights @ input_dest_terminal.T + combined = torch.matmul(torch.matmul(input_src_terminal, self.weights), input_dest_terminal) + + input_size = input_seq[:, 2].reshape((-1, 1)).float() / self.max_packet_size + input_is_there_another = input_seq[:, 3].reshape((-1, 1)).float() + + input_seq = torch.concat( + (combined.reshape((-1, self.channels)), + input_size, # size + input_is_there_another, # is_there_another_pckt_in_queue + ), + dim=1 + ).float() + + pred = self.reg(input_seq) + + if not self.training: + pred = self.denormalize(pred) + return pred + + def denormalize(self, pred_norm): + pred = torch.zeros(pred_norm.shape) + for i in range(pred_norm.shape[1]): + pred[:, i] = pred_norm[:, i]*(self.norm_max_list[i] - self.norm_min_list[i]) + self.norm_min_list[i] + + return pred + +def split(data): + # removing packets with no info and shuffle data + noinfo_index = (data['next_packet_delay'] != -1) + noinfo2_index = (data['is_there_another_pckt_in_queue'] != 0) + data = data[np.bitwise_and(noinfo_index, noinfo2_index)] + data = data.sample(frac=1, random_state=1) + + #split data + train_data = data[0:int(0.8*len(data))] + test_data = data[int(0.8*len(data)):] + + return train_data, test_data + +def extract_process_data(train_data, X_columns, Y_columns): + # encode input data with one-hot encoding + # categories = np.unique(train_data[X_columns[0]].values) + # X_train = np.zeros((train_data.shape[0], len(X_columns), len(categories))) + # for i in range(len(X_columns)): + # column = train_data[X_columns[i]].values + # categories = np.unique(column) + # X_train[:, i, :] = np.array([np.array(item == categories, dtype=int) for item in column]) + X_train = train_data[X_columns].values + + # normalize output data with minimax + scaler = MinMaxScaler() + Y_train = scaler.fit_transform(train_data[Y_columns].values) # x_normalized = (x-x_min)/(x_max-x_min) + norm_max_list, norm_min_list = scaler.data_max_, scaler.data_min_ + + return X_train, Y_train, norm_max_list, norm_min_list + +def extract_data(test_data, X_columns, Y_columns): + X_test = test_data[X_columns].values + Y_test = test_data[Y_columns].values + + return X_test, Y_test + +def cal_rmse(pred, test): + return np.sqrt(np.mean(np.square(pred - test))) + + +def main_func(args): + + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + if torch.cuda.is_available(): + torch.cuda.empty_cache() + torch.cuda.manual_seed(args.seed) + torch.cuda.manual_seed_all(args.seed) + os.environ['PYTHONHASHSEED'] = str(args.seed) + os.environ['OMP_NUM_THREADS'] = '1' + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + data = pd.read_csv(args.input_file) + # print(data) + + train_data, test_data = split(data) + + start_time = time.time() + + if args.method == 'MLP': + X_columns = ['src_terminal', 'dest_terminal', 'size', 'is_there_another_pckt_in_queue'] + Y_columns = ['latency', 'next_packet_delay'] + X_train, Y_train, norm_max_list, norm_min_list = extract_process_data(train_data, X_columns, Y_columns) + X_test, Y_test = extract_data(test_data, X_columns, Y_columns) + y_dim = Y_train.shape[1] + + if args.load_model: + print("Loading model from disk") + mlp = torch.jit.load(args.model_path) + else: + print("Generating model from scratch") + mlp = MLP(args.terminals, args.pck_size, args.h_dim, y_dim, torch.FloatTensor(norm_max_list), torch.FloatTensor(norm_min_list)) + + optimizer = torch.optim.Adam(mlp.parameters(), lr=0.001) + loss_function = nn.MSELoss() + mlp.train() + + all_idx = list(range(len(X_train))) + random.shuffle(all_idx) + + batch_size = 1024 + batch_num = len(X_train) // batch_size if len(X_train) % batch_size == 0 else len(X_train) // batch_size + 1 + + for i in range(args.epoch): + epoch_loss = 0 + for batch_idx in range(batch_num): + is_final_batch = (batch_idx == (batch_num - 1)) + + if not is_final_batch: + idx = all_idx[batch_idx * batch_size: (batch_idx + 1) * batch_size] + else: + idx = all_idx[batch_idx * batch_size:] + + x, y = X_train[idx], Y_train[idx] + + x, y = torch.LongTensor(x), torch.FloatTensor(y) + + optimizer.zero_grad() + y_pred = mlp(x) + loss = loss_function(y_pred, y) + loss.backward() + optimizer.step() + epoch_loss = epoch_loss + loss + + print(i, epoch_loss) + + # EVAL has to be called before saving the state of the network + mlp.eval() + + mlp_scripted = torch.jit.script(mlp) + mlp_scripted.save(args.model_path) + + X_test = torch.LongTensor(X_test) + with torch.no_grad(): + Y_pred = mlp(X_test).numpy() + + rmse = cal_rmse(Y_pred, Y_test) + + elif args.method == 'Average': + train_data = train_data[['src_terminal', 'dest_terminal', 'latency']] + test_data = test_data[['src_terminal', 'dest_terminal', 'latency']] + + mean_src_dest = train_data.groupby(['src_terminal', 'dest_terminal']).mean() + mean_src = train_data.groupby(['src_terminal']).mean() + mean_dest = train_data.groupby(['dest_terminal']).mean() + total_avg = train_data.values.mean() + + terminal2terminal = np.zeros((72, 72)) + # terminal2terminal = np.ones((72, 72)) * total_avg + for i, j in product(range(72), range(72)): + if mean_src_dest.index.isin([(i, j)]).any(): + latency = mean_src_dest.loc[(i, j), 'latency'] + elif mean_src.index.isin([i]).any() == True and mean_dest.index.isin([j]).any()== False: + latency = mean_src.loc[i, 'latency'].item() + elif mean_src.index.isin([i]).any() == False and mean_dest.index.isin([j]).any()== True: + latency = mean_dest.loc[j, 'latency'].item() + else: + latency = total_avg + terminal2terminal[i, j] = latency + + items = test_data[['src_terminal', 'dest_terminal']].values + src = items[:, 0] + dest = items[:, 1] + pred = terminal2terminal[src, dest] + + rmse = cal_rmse(pred, test_data['latency'].values) + + end_time = time.time() + + print('rmse:', rmse) + print('Time:', end_time - start_time) + + if args.plot_weights: + if args.method == 'MLP': + with torch.no_grad(): + terminal2terminal = mlp.weights.numpy() + terminal2terminal = terminal2terminal[0, :, :] # extracting first channel weights + + import matplotlib.pyplot as plt + fig, ax = plt.subplots() + c = ax.imshow(terminal2terminal, cmap='RdBu', interpolation='nearest') + fig.colorbar(c, ax=ax) + plt.show() + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Delay Prediction") + parser.add_argument('--method', type=str, default='MLP', choices=['MLP','Average']) + parser.add_argument('--epoch', type=int, default=10, help='epochs to train') + parser.add_argument('--h-dim', type=int, default=16, help='dimension of the hidden layer') + parser.add_argument('--seed', type=int, default=0) + parser.add_argument('--pck_size', type=int, default=4096, help='maximum packet size in simulation') + parser.add_argument('--terminals', type=int, default=72, help='total number of terminals in the network') + parser.add_argument('--input-file', type=Path, default=Path('packet-delays.txt')) +# parser.add_argument('--load-model', action=argparse.BooleanOptionalAction, default=False, + parser.add_argument('--load-model', action='store_true', default=False, + help='whether to load model from file or start from scratch') + parser.add_argument('--model-path', type=Path, default=Path('MLP_Surrogate-combined.pt')) +# parser.add_argument('--plot-weights', action=argparse.BooleanOptionalAction, default=False, + parser.add_argument('--plot-weights', action='store_true', default=False, + help='whether to show weights from source to destination') + + args = parser.parse_args() + + main_func(args) diff --git a/src/surrogate/zmqml/model/train.sh b/src/surrogate/zmqml/model/train.sh new file mode 100644 index 00000000..0b52b953 --- /dev/null +++ b/src/surrogate/zmqml/model/train.sh @@ -0,0 +1,3 @@ +python mlpacketdelay.py --method MLP --epoch 50 \ + --input-file data/packets-delay.csv \ + --model-path ml-model.pt diff --git a/src/surrogate/zmqml/pydemozmqmlrequester.py b/src/surrogate/zmqml/pydemozmqmlrequester.py new file mode 100755 index 00000000..b6929279 --- /dev/null +++ b/src/surrogate/zmqml/pydemozmqmlrequester.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python + +# +# pyrequester : a requester sample implementation to ZeroMQ-based ML +# task dispatching server +# +# Written by Kazutomo Yoshii +# + +import zmq +import json +import time +import numpy as np +import sys +import re + +debug = False # XXX: make this argument + +endpoint = "tcp://localhost:5555" # XXX: make this configurable + +def zmqml_request(cmd, args=None, bindata=b"None"): + """ + Sends a command to a specified endpoint using ZeroMQ and waits for a response. + + :param cmd: the command to be sent. + :type cmd: str + :param args: A list of arguments for the command where the first argument is the function name. Defaults to None. + :type args: list, optional + :return: A tuple containing the results extracted from the response and the elapsed time in seconds. + :rtype: tuple + :raises zmq.ZMQError: Raises an exception if there is an issue with the ZeroMQ communication. + + Example usage: + >>> zmqml_request("execute", ["mlpacketdelay", "param1", "param2"]) + """ + + context = zmq.Context() + socket = context.socket(zmq.REQ) + socket.connect(endpoint) + + # the first arg in args is the function name (e.g., mlpacketdelay) + msg = {"cmd":cmd, "args":args} + msgencoded = json.dumps(msg).encode('utf-8') + + delimiter = b'\x00' + payload = msgencoded + delimiter + bindata + socket.send(payload) + + response = socket.recv_json() + status = response["status"] + if debug: + print("status:", status) + + socket.close() + + return response + +# +# +def measure_latency(): + print("* measure_latency") + tss = [] + n = 1000 + for i in range(0,n): + st = time.time() + zmqml_request("nothing") # blocking + tss.append(time.time() - st) + print('zmqcmd latency:', np.mean(tss), np.std(tss)) + +# +# +def test_blocking_sleep(): + print("* test_blocking_sleep") + + target = ["sleep", "1"] # this works like args to main() in C + + ret = zmqml_request("execute", target) # blocking + print(f'status={ret["status"]} et={ret["et"]}') + print("done") + +# +# +def test_nonblocking_sleep(): + print("* test_nonblocking_sleep") + + target = ["sleep", "2"] + + ret = zmqml_request("launch", target) + status = ret["status"] + id = ret["id"] + print(f'status={status} id={id}') + + cnt = 0 + while True: + ret = zmqml_request("query", [id]) + status = ret["status"] + print(f"status={status}") + if status == "done": + break + time.sleep(.5) + cnt = cnt + 1 + print(f"done cnt={cnt}") + + +# +# +def test_mlpacketdelay_training(): + print("* test_mlpacketdelay_training") + + target = ["mlpacketdelay_training", + "--method", "MLP", "--epoch", "1", + "--input-file", "model/packets-delay.csv", + "--model-path", "ml-model.pt"] + + ret = zmqml_request("launch", target) + status = ret["status"] + id = ret["id"] + print(f'status={status} id={id}') + + cnt = 0 + while True: + ret = zmqml_request("query", [id]) + status = ret["status"] + print(f"status={status}") + if status == "done": + break + time.sleep(.5) + cnt = cnt + 1 + print(f"done cnt={cnt}") + +# +# +def test_send_binary(): + print("* test_send_binary") + + data = b"" + with open('model/ml-model.pt', 'rb') as f: + data = f.read() + + ret = zmqml_request("send", ["tmptestsend.dat"], data) + status = ret["status"] + print(f"status={status}") + + +if __name__ == "__main__": + test_mlpacketdelay_training() + + test_send_binary() + test_blocking_sleep() + test_nonblocking_sleep() + measure_latency() + + zmqml_request("exit") + sys.exit(0) diff --git a/src/surrogate/zmqml/runcppdemo.sh b/src/surrogate/zmqml/runcppdemo.sh new file mode 100755 index 00000000..fe460392 --- /dev/null +++ b/src/surrogate/zmqml/runcppdemo.sh @@ -0,0 +1,2 @@ +make +LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd` ./demozmqmlrequester diff --git a/src/surrogate/zmqml/runmlpacketdelay.py b/src/surrogate/zmqml/runmlpacketdelay.py new file mode 100644 index 00000000..11a1ce33 --- /dev/null +++ b/src/surrogate/zmqml/runmlpacketdelay.py @@ -0,0 +1,31 @@ + +import argparse +from pathlib import Path + +from model import mlpacketdelay + +def run_mlpacketdelay_training(args): + parser = argparse.ArgumentParser(description="Delay Prediction") + parser.add_argument('--method', type=str, default='MLP', choices=['MLP','Average']) + parser.add_argument('--epoch', type=int, default=10, help='epochs to train') + parser.add_argument('--h-dim', type=int, default=16, help='dimension of the hidden layer') + parser.add_argument('--seed', type=int, default=0) + parser.add_argument('--pck_size', type=int, default=4096, help='maximum packet size in simulation') + parser.add_argument('--terminals', type=int, default=72, help='total number of terminals in the network') + parser.add_argument('--input-file', type=Path, default=Path('packet-delays.txt')) +# parser.add_argument('--load-model', action=argparse.BooleanOptionalAction, default=False, + parser.add_argument('--load-model', action='store_true', default=False, + help='whether to load model from file or start from scratch') + parser.add_argument('--model-path', type=Path, default=Path('MLP_Surrogate-combined.pt')) +# parser.add_argument('--plot-weights', action=argparse.BooleanOptionalAction, default=False, + parser.add_argument('--plot-weights', action='store_true', default=False, + help='whether to show weights from source to destination') + + # parsed_args = parser.parse_args(["--method", "MLP", "--epoch", "1", # 50 + # "--input-file", "model/data/packets-delay.csv", + # "--model-path", "model/ml-model.pt"]) + + parsed_args = parser.parse_args(args) + + mlpacketdelay.main_func(parsed_args) + diff --git a/src/surrogate/zmqml/zmqmlrequester.cpp b/src/surrogate/zmqml/zmqmlrequester.cpp new file mode 100644 index 00000000..6f43758c --- /dev/null +++ b/src/surrogate/zmqml/zmqmlrequester.cpp @@ -0,0 +1,124 @@ +#include "zmqmlrequester.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "rapidjson/document.h" +#include "rapidjson/writer.h" +#include "rapidjson/stringbuffer.h" + +using namespace std; +using namespace rapidjson; + +static string endpoint = "tcp://localhost:5555"; +static int debug = 0; + +/** + * See zmqmlrequester.h + */ +vector zmqml_request(const string& cmd, + const vector& args, + const string& bindata) { + zmq::context_t context(1); + zmq::socket_t socket(context, ZMQ_REQ); + socket.connect(endpoint); + + Document msg; + msg.SetObject(); + auto& allocator = msg.GetAllocator(); + + Value cmdValue; + cmdValue.SetString(cmd.c_str(), cmd.length(), msg.GetAllocator()); + msg.AddMember("cmd", cmdValue, msg.GetAllocator()); + + if (args == std::vector()) { + Value argsArray(kArrayType); + argsArray.PushBack(Value("dummy", allocator), allocator); + msg.AddMember("args", argsArray, allocator); + } else { + Value argsArray(kArrayType); + for(const auto& arg: args) { + argsArray.PushBack(Value(arg.c_str(), allocator), allocator); + } + msg.AddMember("args", argsArray, allocator); + } + + StringBuffer buffer; + Writer writer(buffer); + msg.Accept(writer); + + if (debug) cout << buffer.GetString() << endl; + + string bufferstr = buffer.GetString(); + const char delimiter = '\0'; + string jsonbinmsg = bufferstr + delimiter + bindata; + + zmq::message_t reqmsg(jsonbinmsg.begin(), jsonbinmsg.end()); + socket.send(reqmsg, zmq::send_flags::none); + + zmq::message_t reply; + socket.recv(reply); + + string tmp(static_cast(reply.data()), reply.size()); + Document response; + response.Parse(tmp.c_str()); + + vector ret; + + if (response.HasMember("status")) { + ret.push_back(response["status"].GetString()); + + if (response.HasMember("et")) { + ret.push_back(response["et"].GetString()); + } + + if (response.HasMember("id")) { + ret.push_back(response["id"].GetString()); + } + } else { + ret.push_back("failed"); + } + + return ret; +} + + + + +#if 0 +/** + * @brief Finds all occurrences of a regex pattern within a given + * input string and returns them. + * + * This function searches for all matches of the `pattern` within the + * `input` string, extracting the first captured group from each + * match. Each match found by applying the regular expression is added + * to a vector of strings, which is then returned. + * + * @param pattern The regular expression pattern to search for within + * the input string. The pattern should include at least one capturing group. + * @param input The string to search within for the pattern. + * @return A `std::vector` containing all the matches + * found. Each element in the vector is the first captured + * group from a match of the pattern in the input. + */ +static std::vector findall(const std::string& pattern, const std::string& input) { + std::vector matches; + std::regex re(pattern); + auto words_begin = std::sregex_iterator(input.begin(), input.end(), re); + auto words_end = std::sregex_iterator(); + + for (auto it = words_begin; it != words_end; ++it) { + std::smatch match = *it; + matches.push_back(match.str(1)); // Extract the first captured group + } + return matches; +} +#endif diff --git a/src/surrogate/zmqml/zmqmlrequester.h b/src/surrogate/zmqml/zmqmlrequester.h new file mode 100644 index 00000000..220f1a51 --- /dev/null +++ b/src/surrogate/zmqml/zmqmlrequester.h @@ -0,0 +1,38 @@ +#ifndef __ZMQREQUESTER_H_DEFINED__ +#define __ZMQREQUESTER_H_DEFINED__ + +#include +#include + +/** + * @brief Sends a request over ZeroMQ with the specified command and arguments, + * receives a reply + * + * This function constructs a JSON message with the provided command + * and arguments, sends it over a ZeroMQ REQ socket, waits for the + * reply, parses the JSON response, and extracts the 'status', 'et' + * (if present), and 'id' (if present) fields. It constructs a vector + * of strings containing these fields for the return value. If the + * 'status' field is not present in the response, it returns a vector + * containing "failed". + * + * @param cmd zmqml request command: 'query', 'launch', execute', send', 'nothing', 'exit' + * @param args the arguments for launch and execute + * @param bindata binary data from send + * @return vector A vector containing the 'status' field and + * optionally 'et' and 'id'. 'status' is not present, returns + * a vector with "failed". + * + * @exception std::runtime_error Thrown if there are any issues with ZeroMQ communication. + * @exception rapidjson::ParseErrorException Thrown if parsing the JSON response fails. + * @note This function assumes that the 'endpoint' variable (used in + * socket.connect) is defined externally and is accessible + * within this function scope. Ensure 'endpoint' is properly + * configured before calling this function. + * @note If 'debug' is true, the JSON message sent is printed to standard output. + */ +extern std::vector zmqml_request(const std::string& cmd, + const std::vector& args = std::vector(), + const std::string& bindata = "None" + ); +#endif diff --git a/src/surrogate/zmqml/zmqmlserver.py b/src/surrogate/zmqml/zmqmlserver.py new file mode 100755 index 00000000..066b0512 --- /dev/null +++ b/src/surrogate/zmqml/zmqmlserver.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python + +# +# zmqmlserver : ZeroMQ-based ML task dispatching server +# +# Written by Kazutomo Yoshii +# + +import zmq +import json +import threading +import sys +import time +from itertools import count # generate unit id +# from dataclasses import dataclass + +# TODO: abstract a mechanism to call training +from runmlpacketdelay import run_mlpacketdelay_training + +#import os +#model_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "model")) +#sys.path.insert(0, model_dir) + +endpoint = "tcp://*:5555" + +debug = False + +# +# +# +launch_id = count(start=1) # unique for launched thread +launched_threads = {} # id:obj. keep track of active threads. remove the thread once it finished + +class LaunchCMD: + def __init__(self): + # thread event + self.done_ev = threading.Event() # successfully done + + def launch(self, func, func_args): + self.thread = threading.Thread(target=func, args=(self.done_ev, func_args)) + try: + self.thread.start() + self.st = time.time() + self.id = next(launch_id) + launched_threads[self.id] = self + except RuntimeError as e: + print(f"Failed to launch: {e}") + self.id = -1 + + return self.id + + def query(self): + res = self.done_ev.is_set() + status = "running" + if res: + self.thread.join() + del launched_threads[self.id] + if debug: + print("thread joined") + status = "done" + + return (status, time.time() - self.st) + +# +# launchable functions by LaunchCMD here +# +def launch_sleep(done_event, args): + if debug: + print("sleep started") + time.sleep(int(args[0])) + if debug: + print("sleep done") + done_event.set() + +def launch_mlpacketdelay_training(done_event, args): + if debug: + print("mlpacketdelay_training started") + + run_mlpacketdelay_training(args) + + if debug: + print("mlpacketdelay_training done") + done_event.set() + + +list_nonblockingcalls = { + "sleep": launch_sleep, + "mlpacketdelay_training": launch_mlpacketdelay_training, +} + +# +# +# +def nonblockingcall(args): + func = args[0] # the 1st arg is the target func + func_args = args[1:] + + status = "failed" + + threadid = -1 + if func in list_nonblockingcalls: + launchcmd = LaunchCMD() + + threadid = launchcmd.launch( + list_nonblockingcalls[func], # func + func_args # args + ) + if threadid > 0: + launched_threads[threadid] = launchcmd + status = "done" + + return (status, threadid) + +# +# define blocking-call functions here +# +def func_sleep(args): + time.sleep(int(args[0])) + return True + +# +# register blocking call functions to list_blockingcalls +# +list_blockingcalls = { + "sleep" : func_sleep +} + +def blockingcall(args): + func = args[0] # the 1st arg is the target func + func_args = args[1:] + + status = "failed" + st = time.time() + if func in list_blockingcalls: + if func_sleep(func_args): + status = "done" + + elapsed_time = time.time() - st + return (status, elapsed_time) + + +# +# receive bindata +# +def receivedata(args, bindata): + destfn = args[0] + status = "failed" + st = time.time() + with open(destfn, "wb") as f: + f.write(bindata) + status = "done" + + elapsed_time = time.time() - st + return (status, elapsed_time) + + +# +# main listener loop +# XXX: add mechanisms for multiple requesters +# +def zmq_cmd_listener(): + context = zmq.Context() + socket = context.socket(zmq.REP) + socket.bind(endpoint) + + while True: + tmp = socket.recv() + delimiter = b'\x00' + msgraw, bindata = tmp.split(delimiter, 1) + msg = json.loads(msgraw.decode('utf-8')) + cmd = msg["cmd"] + args = msg.get("args",[]) + + if debug: + print(f"Received cmd:{cmd} args:{args}") + + retmsg = {"status":"none"} # empty status + + if cmd == "nothing": # this cmd does nothing. to measure the latency + retmsg = {"status":"done"} + elif cmd == "execute": + (status, et) = blockingcall(args) + retmsg = {"status":status, "et":str(et)} + elif cmd == "launch": + (status, id) = nonblockingcall(args) + retmsg = {"status":status, "id":str(id)} + elif cmd == "query": + targetid = int(args[0]) + (status, et) = launched_threads[targetid].query() + retmsg = {"status":status, "et":str(et)} + elif cmd == "send": + destfn = args[0] + (status, et) = receivedata(args, bindata) + retmsg = {"status":status, "et":str(et)} + + # send response back to the requester + socket.send_json(retmsg) + + if cmd == "exit": + # XXX: add codes to kill active threads + break + +# +# +# +if __name__ == "__main__": + if debug: + print("start zmq_cmd_listener") + + zmq_cmd_listener() + + if debug: + print("done") diff --git a/src/util/codes_mapping.c b/src/util/codes_mapping.c index 9a8554ba..359a6622 100644 --- a/src/util/codes_mapping.c +++ b/src/util/codes_mapping.c @@ -519,6 +519,21 @@ static void codes_mapping_init(void) return; } +tw_lpid codes_mapping_count_lps_of_type(char const lp_type_name[MAX_NAME_LENGTH]) +{ + tw_lpid count = 0; + for (tw_lpid lpid = 0; lpid < g_tw_nlp; lpid ++) { + tw_lpid ross_gid = g_tw_lp[lpid]->gid; + int grp_id, lpt_id, rep_id, offset; + char this_lp_type[MAX_NAME_LENGTH]; + codes_mapping_get_lp_info(ross_gid, NULL, &grp_id, this_lp_type, &lpt_id, NULL, &rep_id, &offset); // This lookup could be speed up, but making this call is far simpler rn + if (strncmp(lp_type_name, this_lp_type, MAX_NAME_LENGTH)) { + count++; + } + } + return count; +} + /* This function takes the global LP ID, maps it to the local LP ID and returns the LP * lps have global and local LP IDs * global LP IDs are unique across all PEs, local LP IDs are unique within a PE */ diff --git a/src/util/congestion-controller.C b/src/util/congestion-controller.C index a0dab10c..d8ff1a1a 100644 --- a/src/util/congestion-controller.C +++ b/src/util/congestion-controller.C @@ -906,6 +906,59 @@ static double calculate_bandwidth_usage_percent(int bytes_transmitted, double ma return percent_bw; } +void save_tlc_state(tlc_state * into, tlc_state const * from) { + memcpy(into, from, sizeof(tlc_state)); + into->ejected_rate_windows = (double*) malloc(cc_bandwidth_rolling_window_count * sizeof(double)); + for (int i = 0; i < cc_bandwidth_rolling_window_count; i++) { + into->ejected_rate_windows[i] = from->ejected_rate_windows[i]; + } +} + +void clean_tlc_state(tlc_state * state) { + free(state->ejected_rate_windows); +} + +bool check_tlc_state(tlc_state * before, tlc_state * after) { + bool is_same = true; + + is_same &= before->terminal_id == after->terminal_id; + is_same &= before->app_id == after->app_id; + is_same &= before->abatement_signal_count == after->abatement_signal_count; + is_same &= before->window_epoch == after->window_epoch; + is_same &= before->ejected_packet_bytes == after->ejected_packet_bytes; + + for (int i = 0; i < cc_bandwidth_rolling_window_count; i++) { + is_same &= before->ejected_rate_windows[i] == after->ejected_rate_windows[i]; + } + + is_same &= before->cur_average_rate == after->cur_average_rate; + is_same &= before->is_abatement_active == after->is_abatement_active; + is_same &= *before->workloads_finished_flag_ptr == *after->workloads_finished_flag_ptr; + is_same &= before->current_injection_bandwidth_coef == after->current_injection_bandwidth_coef; + + return is_same; +} + +void print_tlc_state(FILE * out, char const * prefix, tlc_state * state) { + fprintf(out, "%s tlc_state ->\n", prefix); + fprintf(out, "%s | terminal_id = %d\n", prefix, state->terminal_id); + fprintf(out, "%s | app_id = %d\n", prefix, state->app_id); + fprintf(out, "%s | abatement_signal_count = %d\n", prefix, state->abatement_signal_count); + fprintf(out, "%s | window_epoch = %ud\n", prefix, state->window_epoch); + fprintf(out, "%s | ejected_packet_bytes = %ud\n", prefix, state->ejected_packet_bytes); + + fprintf(out, "%s | ejected_rate_windows[%d] = [", prefix, cc_bandwidth_rolling_window_count); + for (int i = 0; i < cc_bandwidth_rolling_window_count; i++) { + fprintf(out, "%g%s", state->ejected_rate_windows[i], i == cc_bandwidth_rolling_window_count - 1 ? "" : ", "); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | cur_average_rate = %g\n", prefix, state->cur_average_rate); + fprintf(out, "%s | is_abatement_active = %d\n", prefix, state->is_abatement_active); + fprintf(out, "%s | workloads_finished_flag_ptr = %d\n", prefix, *state->workloads_finished_flag_ptr); + fprintf(out, "%s | current_injection_bandwidth_coef = %g\n", prefix, state->current_injection_bandwidth_coef); +} + void cc_terminal_process_bandwidth_check(tlc_state *s, congestion_control_message *msg, tw_lp *lp) { double usage_percent = calculate_bandwidth_usage_percent(s->ejected_packet_bytes, s->params->terminal_configured_bandwidth, 1); //multiplier for multiple rails but right now we're just using 1 diff --git a/src/util/jobmap-impl/jobmap-list.c b/src/util/jobmap-impl/jobmap-list.c index 7876727f..5ab3abf4 100644 --- a/src/util/jobmap-impl/jobmap-list.c +++ b/src/util/jobmap-impl/jobmap-list.c @@ -31,6 +31,12 @@ struct jobmap_list { int num_jobs; int *rank_counts; int **global_ids; + + // This is a look up table containing the same info as above, but with O(1) access. + // It is used by `jobmap_list_to_local`. This solves a scalibility bug that appears + // when all jobs combined have many nodes (> 8K nodes) + int highest_global_id; + struct codes_jobmap_id * id_to_jobmap; }; #define COND_REALLOC(_len_expr, _cap_var, _buf_var) \ @@ -148,6 +154,37 @@ static int jobmap_list_configure(void const * params, void ** ctx) } } while (!feof(f)); + // === Building id_to_jobmap lookup table === + // There's some room for improvement (we can probably loop fewer times and fuze some + // loops together), but they are relatively inexpensive when done once at the start + // of the simulation, so this acceptable + + // Finding highest global id. Although we should be able to get this from the network + // configuration file, we look it up in here to keep different parts of CODES separated/modularized + lst->highest_global_id = -1; + for(int i=0; inum_jobs; i++) { + for(int j=0; j < lst->rank_counts[i]; j++) { + if(lst->highest_global_id < lst->global_ids[i][j]) { + lst->highest_global_id = lst->global_ids[i][j]; + } + } + } + lst->id_to_jobmap = calloc(lst->highest_global_id + 1, sizeof(*lst->id_to_jobmap)); + for (int i=0; i<=lst->highest_global_id; i++) { + lst->id_to_jobmap[i].job = -1; + lst->id_to_jobmap[i].rank = -1; + } + // Finally, filling up the table + for(int i=0; inum_jobs; i++) { + for(int j=0; j < lst->rank_counts[i]; j++) { + int const id = lst->global_ids[i][j]; + lst->id_to_jobmap[id].job = i; + lst->id_to_jobmap[id].rank = j; + } + } + // === === + + // returning if everything went alright if (rc == 0) { fclose(f); free(line_buf); @@ -160,6 +197,7 @@ static int jobmap_list_configure(void const * params, void ** ctx) } free(lst->global_ids); free(lst->rank_counts); + free(lst->id_to_jobmap); free(lst); *ctx = NULL; return -1; @@ -168,23 +206,14 @@ static int jobmap_list_configure(void const * params, void ** ctx) static struct codes_jobmap_id jobmap_list_to_local(int id, void const * ctx) { - struct codes_jobmap_id rtn; - rtn.job = -1; - rtn.rank = -1; - struct jobmap_list const *lst = (struct jobmap_list const *)ctx; - for(int i=0; inum_jobs; i++) { - for(int j=0; j < lst->rank_counts[i]; j++) { - if(id == lst->global_ids[i][j]) { - rtn.job = i; - rtn.rank = j; - return rtn; - } - } + // invalid id from what we got in the config + if (id < 0 || lst->highest_global_id < id) { + return (struct codes_jobmap_id) { .job = -1, .rank = -1 }; } - return rtn; + return lst->id_to_jobmap[id]; } static int jobmap_list_to_global(struct codes_jobmap_id id, void const * ctx) @@ -221,6 +250,7 @@ static void jobmap_list_destroy(void * ctx) free(lst->global_ids); free(lst->rank_counts); + free(lst->id_to_jobmap); free(ctx); } diff --git a/src/util/lp-type-lookup.c b/src/util/lp-type-lookup.c index 8d00e4f5..6bc80136 100644 --- a/src/util/lp-type-lookup.c +++ b/src/util/lp-type-lookup.c @@ -10,8 +10,6 @@ #include "ross.h" #include "codes/lp-type-lookup.h" -#define MAX_LP_TYPES 64 - struct lp_name_mapping { const char* name; diff --git a/src/util/rc-stack.c b/src/util/rc-stack.c index 8df52463..9491c897 100644 --- a/src/util/rc-stack.c +++ b/src/util/rc-stack.c @@ -12,11 +12,16 @@ enum rc_stack_mode { RC_NONOPT, // not in optimistic mode RC_OPT, // optimistic mode - RC_OPT_DBG // optimistic *debug* mode (requires special handling) + RC_OPT_DBG, // optimistic *debug* mode (requires special handling) + RC_SEQ_RV_DBG, // sequential rollback chek, a *debug* mode that requires special handling }; typedef struct rc_entry_s { +#ifdef USE_RAND_TIEBREAKER tw_event_sig e_sig; // ROSS 2D event timestamp (.recv_ts & .event_tiebreaker) +#else + tw_stime time; +#endif void * data; void (*free_fn)(void*); struct qlist_head ql; @@ -36,11 +41,12 @@ void rc_stack_create(struct rc_stack **s){ } switch (g_tw_synchronization_protocol) { case OPTIMISTIC: - ss->mode = RC_OPT; - break; case OPTIMISTIC_REALTIME: ss->mode = RC_OPT; break; + case SEQUENTIAL_ROLLBACK_CHECK: + ss->mode = RC_SEQ_RV_DBG; + break; case OPTIMISTIC_DEBUG: ss->mode = RC_OPT_DBG; break; @@ -63,7 +69,11 @@ void rc_stack_push( if (s->mode != RC_NONOPT || free_fn == NULL) { rc_entry * ent = (rc_entry*)malloc(sizeof(*ent)); assert(ent); +#ifdef USE_RAND_TIEBREAKER ent->e_sig = tw_now_sig(lp); +#else + ent->time = tw_now(lp); +#endif ent->data = data; ent->free_fn = free_fn; qlist_add_tail(&ent->ql, &s->head); @@ -95,11 +105,26 @@ void rc_stack_gc(tw_lp const *lp, struct rc_stack *s) { if (s->mode == RC_OPT_DBG) return; + // rollback until only one event is left + if (s->mode == RC_SEQ_RV_DBG) { + struct qlist_head *ent = s->head.next; + while (ent->next != &s->head) { + rc_entry *r = qlist_entry(ent, rc_entry, ql); + qlist_del(ent); + if (r->free_fn) r->free_fn(r->data); + free(r); + s->count--; + ent = s->head.next; + } + return; + } + + // Removing all stored rollback events from stack struct qlist_head *ent = s->head.next; while (ent != &s->head) { rc_entry *r = qlist_entry(ent, rc_entry, ql); #ifdef USE_RAND_TIEBREAKER - if (lp == NULL || tw_event_sig_compare(r->e_sig, lp->pe->GVT_sig) == -1) { + if (lp == NULL || tw_event_sig_compare_ptr(&r->e_sig, &lp->pe->GVT_sig) < 0) { #else if (lp == NULL || r->time < lp->pe->GVT){ #endif diff --git a/src/workload/codes-workload-dump.c b/src/workload/codes-workload-dump.c index 18756bba..73631869 100644 --- a/src/workload/codes-workload-dump.c +++ b/src/workload/codes-workload-dump.c @@ -215,7 +215,7 @@ int main(int argc, char *argv[]) wparams = (char*)&d_params; } } - else if(strcmp(type, "online_comm_workload") == 0){ + else if(strcmp(type, "swm_online_comm_workload") == 0 || strcmp(type, "conc_online_comm_workload") == 0){ if (n == -1){ fprintf(stderr, "Expected \"--num-ranks\" argument for online workload\n"); @@ -448,7 +448,7 @@ int main(int argc, char *argv[]) } } while (op.op_type != CODES_WK_END); - if(strcmp(type, "online_comm_workload") == 0) + if(strcmp(type, "swm_online_comm_workload") == 0 || strcmp(type, "conc_online_comm_workload") == 0) { codes_workload_finalize(type, wparams, 0, i); } diff --git a/src/workload/codes-workload.c b/src/workload/codes-workload.c index 91cd49d4..45657be3 100644 --- a/src/workload/codes-workload.c +++ b/src/workload/codes-workload.c @@ -35,9 +35,14 @@ extern struct codes_workload_method darshan_mpi_io_workload_method; #ifdef USE_RECORDER extern struct codes_workload_method recorder_io_workload_method; #endif -#ifdef USE_ONLINE -extern struct codes_workload_method online_comm_workload_method; + +#ifdef USE_SWM +extern struct codes_workload_method swm_online_comm_workload_method; +#endif +#ifdef USE_UNION +extern struct codes_workload_method conc_online_comm_workload_method; #endif + extern struct codes_workload_method checkpoint_workload_method; extern struct codes_workload_method iomock_workload_method; @@ -59,8 +64,11 @@ static struct codes_workload_method const * method_array_default[] = #endif #endif -#ifdef USE_ONLINE - &online_comm_workload_method, +#ifdef USE_SWM + &swm_online_comm_workload_method, +#endif +#ifdef USE_UNION + &conc_online_comm_workload_method, #endif #ifdef USE_RECORDER &recorder_io_workload_method, @@ -166,7 +174,7 @@ void codes_workload_free_config_return(codes_workload_config_return *c) int codes_workload_load( const char* type, - const char* params, + const void* params, int app_id, int rank) { @@ -360,6 +368,13 @@ int codes_workload_get_rank_cnt( return(-1); } +int codes_workload_get_final_iteration(int wkld_id, int app_id, int rank) { + if (method_array[wkld_id]->codes_workload_get_final_iteration) { + return method_array[wkld_id]->codes_workload_get_final_iteration(app_id, rank); + } + return -1; +} + void codes_workload_print_op( FILE *f, struct codes_workload_op *op, @@ -542,6 +557,156 @@ void codes_workload_add_method(struct codes_workload_method const * method) method_array[num_user_methods++] = method; } +char const * const op_type_string(enum codes_workload_op_type op_type) { + switch(op_type) { + case CODES_WK_END: return "CODES_WK_END"; + case CODES_WK_DELAY: return "CODES_WK_DELAY"; + case CODES_WK_BARRIER: return "CODES_WK_BARRIER"; + case CODES_WK_OPEN: return "CODES_WK_OPEN"; + case CODES_WK_CLOSE: return "CODES_WK_CLOSE"; + case CODES_WK_WRITE: return "CODES_WK_WRITE"; + case CODES_WK_READ: return "CODES_WK_READ"; + case CODES_WK_SEND: return "CODES_WK_SEND"; + case CODES_WK_RECV: return "CODES_WK_RECV"; + case CODES_WK_ISEND: return "CODES_WK_ISEND"; + case CODES_WK_IRECV: return "CODES_WK_IRECV"; + case CODES_WK_BCAST: return "CODES_WK_BCAST"; + case CODES_WK_ALLGATHER: return "CODES_WK_ALLGATHER"; + case CODES_WK_ALLGATHERV: return "CODES_WK_ALLGATHERV"; + case CODES_WK_ALLTOALL: return "CODES_WK_ALLTOALL"; + case CODES_WK_ALLTOALLV: return "CODES_WK_ALLTOALLV"; + case CODES_WK_REDUCE: return "CODES_WK_REDUCE"; + case CODES_WK_ALLREDUCE: return "CODES_WK_ALLREDUCE"; + case CODES_WK_COL: return "CODES_WK_COL"; + case CODES_WK_WAITALL: return "CODES_WK_WAITALL"; + case CODES_WK_WAIT: return "CODES_WK_WAIT"; + case CODES_WK_WAITSOME: return "CODES_WK_WAITSOME"; + case CODES_WK_WAITANY: return "CODES_WK_WAITANY"; + case CODES_WK_TESTALL: return "CODES_WK_TESTALL"; + case CODES_WK_REQ_FREE: return "CODES_WK_REQ_FREE"; + case CODES_WK_IGNORE: return "CODES_WK_IGNORE"; + case CODES_WK_MPI_OPEN: return "CODES_WK_MPI_OPEN"; + case CODES_WK_MPI_CLOSE: return "CODES_WK_MPI_CLOSE"; + case CODES_WK_MPI_WRITE: return "CODES_WK_MPI_WRITE"; + case CODES_WK_MPI_READ: return "CODES_WK_MPI_READ"; + case CODES_WK_MPI_COLL_OPEN: return "CODES_WK_MPI_COLL_OPEN"; + case CODES_WK_MPI_COLL_WRITE: return "CODES_WK_MPI_COLL_WRITE"; + case CODES_WK_MPI_COLL_READ: return "CODES_WK_MPI_COLL_READ"; + case CODES_WK_MARK: return "CODES_WK_MARK"; + default: return "UNKNOWN!!"; + } +} + +// Initial implementation by Claude.ai +void fprint_codes_workload_op(FILE * out, char const * prefix, struct codes_workload_op * op) { + if (op == NULL) { + return; + } + + // Print common fields first + fprintf(out, "%sop_type = %s\n", prefix, op_type_string(op->op_type)); + + fprintf(out, "%s start_time = %f\n", prefix, op->start_time); + fprintf(out, "%s end_time = %f\n", prefix, op->end_time); + fprintf(out, "%s sim_start_time = %f\n", prefix, op->sim_start_time); + fprintf(out, "%s sequence_id = %ld\n", prefix, op->sequence_id); + + // Print union fields based on op_type + switch(op->op_type) { + case CODES_WK_DELAY: + fprintf(out, "%s delay.seconds = %f\n", prefix, op->u.delay.seconds); + fprintf(out, "%s delay.nsecs = %f\n", prefix, op->u.delay.nsecs); + break; + + case CODES_WK_BARRIER: + fprintf(out, "%s barrier.count = %d\n", prefix, op->u.barrier.count); + fprintf(out, "%s barrier.root = %d\n", prefix, op->u.barrier.root); + break; + + case CODES_WK_OPEN: + case CODES_WK_MPI_OPEN: + case CODES_WK_MPI_COLL_OPEN: + fprintf(out, "%s open.file_id = %lu\n", prefix, op->u.open.file_id); + fprintf(out, "%s open.create_flag = %d\n", prefix, op->u.open.create_flag); + break; + + case CODES_WK_WRITE: + case CODES_WK_MPI_WRITE: + case CODES_WK_MPI_COLL_WRITE: + fprintf(out, "%s write.file_id = %lu\n", prefix, op->u.write.file_id); + fprintf(out, "%s write.offset = %ld\n", prefix, op->u.write.offset); + fprintf(out, "%s write.size = %zu\n", prefix, op->u.write.size); + break; + + case CODES_WK_READ: + case CODES_WK_MPI_READ: + case CODES_WK_MPI_COLL_READ: + fprintf(out, "%s read.file_id = %lu\n", prefix, op->u.read.file_id); + fprintf(out, "%s read.offset = %ld\n", prefix, op->u.read.offset); + fprintf(out, "%s read.size = %zu\n", prefix, op->u.read.size); + break; + + case CODES_WK_CLOSE: + case CODES_WK_MPI_CLOSE: + fprintf(out, "%s close.file_id = %lu\n", prefix, op->u.close.file_id); + break; + + case CODES_WK_SEND: + case CODES_WK_ISEND: + fprintf(out, "%s send.source_rank = %d\n", prefix, op->u.send.source_rank); + fprintf(out, "%s send.dest_rank = %d\n", prefix, op->u.send.dest_rank); + fprintf(out, "%s send.num_bytes = %ld\n", prefix, op->u.send.num_bytes); + fprintf(out, "%s send.data_type = %d\n", prefix, op->u.send.data_type); + fprintf(out, "%s send.count = %d\n", prefix, op->u.send.count); + fprintf(out, "%s send.tag = %d\n", prefix, op->u.send.tag); + fprintf(out, "%s send.req_id = %u\n", prefix, op->u.send.req_id); + break; + + case CODES_WK_RECV: + case CODES_WK_IRECV: + fprintf(out, "%s recv.source_rank = %d\n", prefix, op->u.recv.source_rank); + fprintf(out, "%s recv.dest_rank = %d\n", prefix, op->u.recv.dest_rank); + fprintf(out, "%s recv.num_bytes = %ld\n", prefix, op->u.recv.num_bytes); + fprintf(out, "%s recv.data_type = %d\n", prefix, op->u.recv.data_type); + fprintf(out, "%s recv.count = %d\n", prefix, op->u.recv.count); + fprintf(out, "%s recv.tag = %d\n", prefix, op->u.recv.tag); + fprintf(out, "%s recv.req_id = %u\n", prefix, op->u.recv.req_id); + break; + + case CODES_WK_COL: + case CODES_WK_BCAST: + case CODES_WK_ALLGATHER: + case CODES_WK_ALLGATHERV: + case CODES_WK_ALLTOALL: + case CODES_WK_ALLTOALLV: + case CODES_WK_REDUCE: + case CODES_WK_ALLREDUCE: + fprintf(out, "%scollective.num_bytes = %d\n", prefix, op->u.collective.num_bytes); + break; + + case CODES_WK_WAITALL: + case CODES_WK_WAITSOME: + case CODES_WK_WAITANY: + case CODES_WK_TESTALL: + fprintf(out, "%s waits.count = %d\n", prefix, op->u.waits.count); + fprintf(out, "%s waits.req_ids = %p\n", prefix, op->u.waits.req_ids); + break; + + case CODES_WK_WAIT: + fprintf(out, "%s wait.req_id = %u\n", prefix, op->u.wait.req_id); + break; + + case CODES_WK_REQ_FREE: + fprintf(out, "%s free.req_id = %u\n", prefix, op->u.free.req_id); + break; + + case CODES_WK_END: + case CODES_WK_IGNORE: + case CODES_WK_MARK: + break; + } +} + /* * Local variables: * c-indent-level: 4 diff --git a/src/workload/methods/codes-checkpoint-wrkld.c b/src/workload/methods/codes-checkpoint-wrkld.c index 672f9bbc..82e5fdb4 100644 --- a/src/workload/methods/codes-checkpoint-wrkld.c +++ b/src/workload/methods/codes-checkpoint-wrkld.c @@ -31,7 +31,7 @@ static void * checkpoint_workload_read_config( char const * section_name, char const * annotation, int num_ranks); -static int checkpoint_workload_load(const char* params, int app_id, int rank); +static int checkpoint_workload_load(const void* params, int app_id, int rank); static void checkpoint_workload_get_next(int app_id, int rank, struct codes_workload_op *op); static void checkpoint_workload_get_next_rc2(int app_id, int rank); @@ -112,7 +112,7 @@ static void * checkpoint_workload_read_config( return p; } -static int checkpoint_workload_load(const char* params, int app_id, int rank) +static int checkpoint_workload_load(const void* params, int app_id, int rank) { checkpoint_wrkld_params *c_params = (checkpoint_wrkld_params *)params; struct checkpoint_state* new_state; diff --git a/src/workload/methods/codes-conc-online-comm-wrkld.C b/src/workload/methods/codes-conc-online-comm-wrkld.C new file mode 100644 index 00000000..42b4b5c7 --- /dev/null +++ b/src/workload/methods/codes-conc-online-comm-wrkld.C @@ -0,0 +1,2156 @@ +/* + * Copyright (C) 2014 University of Chicago + * See COPYRIGHT notice in top-level directory. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "codes/codes-workload.h" +#include "codes/quickhash.h" +#include "codes/codes-jobmap.h" +#include "codes_config.h" +#include "union_util.h" + +//#ifdef USE_SWM +#include "lammps.h" +#include "nekbone_swm_user_code.h" +#include "nearest_neighbor_swm_user_code.h" +#include "all_to_one_swm_user_code.h" +#include "milc_swm_user_code.h" +#include "abt.h" +//#endif + +#define ALLREDUCE_SHORT_MSG_SIZE 2048 + +#define DBG_COMM 0 +#define DBG_LINKING 0 +#define DBG_TMP 0 +#define CHECKPOINT_HASH_TABLE_SIZE 251 +#define DEFAULT_WR_BUF_SIZE (16 * 1024 * 1024) /* 16 MiB default */ + +#define THISMIN(a,b) ((a) < (b)) ? (a) : (b) + +using namespace std; + +static struct qhash_table *rank_tbl = NULL; +static int rank_tbl_pop = 0; +static int total_rank_cnt = 0; +static ABT_thread global_prod_thread = NULL; +static ABT_xstream self_es; +static long cpu_freq = 1.0; +static long num_allreduce = 0; +static long num_isends = 0; +static long num_irecvs = 0; +static long num_barriers = 0; +static long num_sends = 0; +static long num_recvs = 0; +static long num_sendrecv = 0; +static long num_waitalls = 0; + +//static std::map send_count; +//static std::map isend_count; +//static std::map allreduce_count; + +struct shared_context { + int my_rank; + uint32_t wait_id; + int num_ranks; + char workload_name[MAX_NAME_LENGTH_WKLD]; + void * swm_obj; + void * conc_params; + bool isconc; + ABT_thread producer; + std::deque fifo; + struct { + bool received; + int final_iteration; + } init_data_from_workload; +}; + +struct rank_mpi_context { + struct qhash_head hash_link; + int app_id; + struct shared_context sctx; +}; + +typedef struct rank_mpi_compare { + int app_id; + int rank; +} rank_mpi_compare; + +/* Conceptual online workload implementations */ + +void UNION_Pass_app_data(struct union_app_data * app_data) { + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err; + + err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + + sctx->init_data_from_workload.received = true; + sctx->init_data_from_workload.final_iteration = app_data->final_iteration; + + ABT_thread_yield_to(global_prod_thread); +} + +void UNION_MPI_Comm_size (UNION_Comm comm, int *size) +{ + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err; + + err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + + *size = sctx->num_ranks; + // printf("ranks %d\n", sctx->num_ranks); +} + +void UNION_MPI_Comm_rank( UNION_Comm comm, int *rank ) +{ + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err; + + err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + + *rank = sctx->my_rank; +} + +void UNION_MPI_Finalize() +{ + /* Add an event in the shared queue and then yield */ + struct codes_workload_op wrkld_per_rank; + + wrkld_per_rank.op_type = CODES_WK_END; + + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + sctx->fifo.push_back(&wrkld_per_rank); + + if(DBG_COMM){ + printf("\nUNION FINALIZE src %d ", sctx->my_rank); + printf("\nnum_sends %ld num_recvs %ld num_isends %ld num_irecvs %ld num_allreduce %ld num_barrier %ld num_waitalls %ld\n", + num_sends, num_recvs, num_isends, num_irecvs, num_allreduce, num_barriers, num_waitalls); + // printf("Rank %d yield to CODES thread: %p\n", sctx->my_rank, global_prod_thread); + } + + ABT_thread_yield_to(global_prod_thread); +} + +// cycle_count assumes 1 GHz, meaning, 1 cycle is 1 nanosecond. This is different from SWM_Compute! +void UNION_Compute(long cycle_count) +{ + /* Add an event in the shared queue and then yield */ + struct codes_workload_op wrkld_per_rank; + + wrkld_per_rank.op_type = CODES_WK_DELAY; + wrkld_per_rank.u.delay.nsecs = cycle_count; + wrkld_per_rank.u.delay.seconds = (cycle_count) / (1000.0 * 1000.0 * 1000.0); + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + sctx->fifo.push_back(&wrkld_per_rank); + if(DBG_COMM){ + printf("\nUNION COMPUTE src %d: %ld ns ", sctx->my_rank, cycle_count); + } + ABT_thread_yield_to(global_prod_thread); +} + +void UNION_Mark_Iteration(UNION_TAG iter_tag) +{ + /* Add an event in the shared queue and then yield */ + struct codes_workload_op wrkld_per_rank; + + wrkld_per_rank.op_type = CODES_WK_MARK; + wrkld_per_rank.u.send.tag = iter_tag; + + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + wrkld_per_rank.u.send.source_rank = sctx->my_rank; + sctx->fifo.push_back(&wrkld_per_rank); + + if(DBG_COMM){ + printf("\nUNION MARKITERATION src %d ", sctx->my_rank); + } + + ABT_thread_yield_to(global_prod_thread); +} + + +void UNION_IO_OPEN_FILE(int fid) +{ + struct codes_workload_op op; + op.op_type = CODES_WK_OPEN; + op.u.open.file_id = fid; + op.u.open.create_flag = 1; + + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + sctx->fifo.push_back(&op); + + if(DBG_TMP){ + printf("\nUNION IO OPEN src %d ", sctx->my_rank); + } + + ABT_thread_yield_to(global_prod_thread); + +} + +void UNION_IO_WRITE(int fid, long size) +{ + struct codes_workload_op op; + op.op_type = CODES_WK_WRITE; + op.u.write.file_id = fid; + op.u.write.offset = 0; + op.u.write.size = size; + + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + sctx->fifo.push_back(&op); + + if(DBG_TMP){ + printf("\nUNION IO WRITE src %d ", sctx->my_rank); + } + + ABT_thread_yield_to(global_prod_thread); +} + +void UNION_IO_READ(int fid, long size) +{ + struct codes_workload_op op; + op.op_type = CODES_WK_READ; + op.u.read.file_id = fid; + op.u.read.offset = 0; + op.u.read.size = size; + + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + sctx->fifo.push_back(&op); + + if(DBG_TMP){ + printf("\nUNION IO READ src %d ", sctx->my_rank); + } + + ABT_thread_yield_to(global_prod_thread); +} + +void UNION_IO_CLOSE_FILE(int fid) +{ + struct codes_workload_op op; + op.op_type = CODES_WK_CLOSE; + op.u.close.file_id = fid; + + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + sctx->fifo.push_back(&op); + + if(DBG_TMP){ + printf("\nUNION IO READ src %d ", sctx->my_rank); + } + + ABT_thread_yield_to(global_prod_thread); +} + +void UNION_MPI_Send(const void *buf, + int count, + UNION_Datatype datatype, + int dest, + int tag, + UNION_Comm comm) +{ + /* add an event in the shared queue and then yield */ + struct codes_workload_op wrkld_per_rank; + + int datatypesize; + UNION_Type_size(datatype, &datatypesize); + + wrkld_per_rank.op_type = CODES_WK_SEND; + wrkld_per_rank.u.send.tag = tag; + wrkld_per_rank.u.send.count = count; + wrkld_per_rank.u.send.data_type = datatype; + wrkld_per_rank.u.send.num_bytes = count * datatypesize; + wrkld_per_rank.u.send.dest_rank = dest; + + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + wrkld_per_rank.u.send.source_rank = sctx->my_rank; + sctx->fifo.push_back(&wrkld_per_rank); + if(DBG_TMP){ + printf("\nUNION SEND src %d dst %d: %lld bytes ", sctx->my_rank, dest, + wrkld_per_rank.u.send.num_bytes); + // printf("Rank %d yield to CODES thread: %p\n", sctx->my_rank, global_prod_thread); + } + int rc = ABT_thread_yield_to(global_prod_thread); + num_sends++; +} + +void UNION_MPI_Recv(void *buf, + int count, + UNION_Datatype datatype, + int source, + int tag, + UNION_Comm comm, + UNION_Status *status) +{ + /* Add an event in the shared queue and then yield */ + struct codes_workload_op wrkld_per_rank; + + int datatypesize; + UNION_Type_size(datatype, &datatypesize); + + wrkld_per_rank.op_type = CODES_WK_RECV; + wrkld_per_rank.u.recv.tag = tag; + wrkld_per_rank.u.recv.source_rank = source; + wrkld_per_rank.u.recv.data_type = datatype; + wrkld_per_rank.u.recv.count = count; + wrkld_per_rank.u.recv.num_bytes = count * datatypesize; + + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + wrkld_per_rank.u.recv.dest_rank = sctx->my_rank; + sctx->fifo.push_back(&wrkld_per_rank); + if(DBG_COMM){ + printf("\nUNION RECV src %d dst %d: %lld bytes ", source, sctx->my_rank, + wrkld_per_rank.u.recv.num_bytes); + // printf("Rank %d yield to CODES thread: %p\n", sctx->my_rank, global_prod_thread); + } + + ABT_thread_yield_to(global_prod_thread); + num_recvs++; +} + +void UNION_MPI_Sendrecv(const void *sendbuf, + int sendcount, + UNION_Datatype sendtype, + int dest, + int sendtag, + void *recvbuf, + int recvcount, + UNION_Datatype recvtype, + int source, + int recvtag, + UNION_Comm comm, + UNION_Status *status) +{ + /* sendrecv events */ + struct codes_workload_op send_op; + + int datatypesize1, datatypesize2; + UNION_Type_size(sendtype, &datatypesize1); + UNION_Type_size(recvtype, &datatypesize2); + + send_op.op_type = CODES_WK_SEND; + send_op.u.send.tag = sendtag; + send_op.u.send.count = sendcount; + send_op.u.send.data_type = sendtype; + send_op.u.send.num_bytes = sendcount * datatypesize1; + send_op.u.send.dest_rank = dest; + + struct codes_workload_op recv_op; + + recv_op.op_type = CODES_WK_RECV; + recv_op.u.recv.tag = recvtag; + recv_op.u.recv.source_rank = source; + recv_op.u.recv.count = recvcount; + recv_op.u.recv.data_type = recvtype; + recv_op.u.recv.num_bytes = recvcount * datatypesize2; + + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + + /* Add an event in the shared queue and then yield */ + recv_op.u.recv.dest_rank = sctx->my_rank; + send_op.u.send.source_rank = sctx->my_rank; + sctx->fifo.push_back(&send_op); + sctx->fifo.push_back(&recv_op); + if(DBG_COMM){ + printf("\nUNION SENDRECV ssrc %d sdst %d: %lld bytes; rsrc %d rdst %d: %lld bytes ", sctx->my_rank, dest, + send_op.u.send.num_bytes, source, sctx->my_rank, recv_op.u.recv.num_bytes); + } + ABT_thread_yield_to(global_prod_thread); + num_sendrecv++; +} + + +void UNION_MPI_Barrier(UNION_Comm comm) +{ + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err; + int rank, size, src, dest, mask; + + err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + + rank = sctx->my_rank; + size = sctx->num_ranks; + mask = 0x1; + + while(mask < size) { + dest = (rank + mask) % size; + src = (rank - mask + size) % size; + + UNION_MPI_Sendrecv(NULL, 0, UNION_Int, dest, 1234, NULL, 0, UNION_Int, src, 1234, + comm, NULL); + + mask <<= 1; + } + num_barriers++; + // if(DBG_COMM){ + // printf("UNION BARRIER src %d\n", sctx->my_rank); + // } +} + +void UNION_MPI_Isend(const void *buf, + int count, + UNION_Datatype datatype, + int dest, + int tag, + UNION_Comm comm, + UNION_Request *request) +{ + /* add an event in the shared queue and then yield */ + // printf("\n Sending to rank %d ", comm_id); + struct codes_workload_op wrkld_per_rank; + + int datatypesize; + UNION_Type_size(datatype, &datatypesize); + + wrkld_per_rank.op_type = CODES_WK_ISEND; + wrkld_per_rank.u.send.tag = tag; + wrkld_per_rank.u.send.count = count; + wrkld_per_rank.u.send.data_type = datatype; + wrkld_per_rank.u.send.num_bytes = count * datatypesize; + wrkld_per_rank.u.send.dest_rank = dest; + + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + wrkld_per_rank.u.send.source_rank = sctx->my_rank; + sctx->fifo.push_back(&wrkld_per_rank); + + *request = sctx->wait_id; + wrkld_per_rank.u.send.req_id = *request; + sctx->wait_id++; + if(DBG_COMM){ + printf("\nUNION ISEND src %d dst %d: %lld bytes ", sctx->my_rank, dest, + wrkld_per_rank.u.send.num_bytes); + } + + ABT_thread_yield_to(global_prod_thread); + num_isends++; +} + +void UNION_MPI_Irecv(void *buf, + int count, + UNION_Datatype datatype, + int source, + int tag, + UNION_Comm comm, + UNION_Request *request) +{ + /* Add an event in the shared queue and then yield */ + struct codes_workload_op wrkld_per_rank; + + int datatypesize; + UNION_Type_size(datatype, &datatypesize); + + wrkld_per_rank.op_type = CODES_WK_IRECV; + wrkld_per_rank.u.recv.tag = tag; + wrkld_per_rank.u.recv.source_rank = source; + wrkld_per_rank.u.recv.count = count; + wrkld_per_rank.u.recv.data_type = datatype; + wrkld_per_rank.u.recv.num_bytes = count * datatypesize; + + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + wrkld_per_rank.u.recv.dest_rank = sctx->my_rank; + sctx->fifo.push_back(&wrkld_per_rank); + + *request = sctx->wait_id; + wrkld_per_rank.u.recv.req_id = *request; + sctx->wait_id++; + if(DBG_COMM){ + printf("\nUNION IRECV src %d dst %d: %lld bytes ", source, sctx->my_rank, + wrkld_per_rank.u.recv.num_bytes); + } + ABT_thread_yield_to(global_prod_thread); + num_irecvs++; +} + +void UNION_MPI_Wait(UNION_Request *request, + UNION_Status *status) +{ + /* Add an event in the shared queue and then yield */ + struct codes_workload_op wrkld_per_rank; + + wrkld_per_rank.op_type = CODES_WK_WAIT; + wrkld_per_rank.u.wait.req_id = *(UNION_Request *)request; + + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + sctx->fifo.push_back(&wrkld_per_rank); + if(DBG_COMM){ + printf("\nUNION WAIT src %d ",sctx->my_rank); + } + ABT_thread_yield_to(global_prod_thread); +} + +void UNION_MPI_Waitall(int count, + UNION_Request array_of_requests[], + UNION_Status array_of_statuses[]) +{ + num_waitalls++; + for(int i = 0; i < count; i++) + UNION_MPI_Wait(&array_of_requests[i], UNION_STATUSES_IGNORE); + + // if(DBG_COMM){ + // printf("UNION WAITALL count %d\n", count); + // } +} + +void UNION_MPI_Reduce(const void *sendbuf, + void *recvbuf, + int count, + UNION_Datatype datatype, + UNION_Op op, + int root, + UNION_Comm comm) +{ + //todo +} + +void UNION_MPI_Allreduce(const void *sendbuf, + void *recvbuf, + int count, + UNION_Datatype datatype, + UNION_Op op, + UNION_Comm comm) +{ + int comm_size, rank, type_size, i, send_idx, recv_idx, last_idx, send_cnt, recv_cnt; + int pof2, mask, rem, newrank, newdst, dst, *cnts, *disps; + + UNION_MPI_Comm_size(comm, &comm_size); + UNION_MPI_Comm_rank(comm, &rank); + UNION_Type_size(datatype, &type_size); + + cnts = disps = NULL; + + pof2 = 1; + while (pof2 <= comm_size) pof2 <<= 1; + pof2 >>=1; + + rem = comm_size - pof2; + + /* In the non-power-of-two case, all even-numbered + processes of rank < 2*rem send their data to + (rank+1). These even-numbered processes no longer + participate in the algorithm until the very end. The + remaining processes form a nice power-of-two. */ + if (rank < 2*rem) { + if (rank % 2 == 0) { /* even */ + UNION_MPI_Send(NULL, count, datatype, rank+1, -1002, comm); + newrank = -1; + } else { /* odd */ + UNION_MPI_Recv(NULL, count, datatype, rank-1, -1002, comm, NULL); + newrank = rank / 2; + } + } else { + newrank = rank - rem; + } + + /* If op is user-defined or count is less than pof2, use + recursive doubling algorithm. Otherwise do a reduce-scatter + followed by allgather. (If op is user-defined, + derived datatypes are allowed and the user could pass basic + datatypes on one process and derived on another as long as + the type maps are the same. Breaking up derived + datatypes to do the reduce-scatter is tricky, therefore + using recursive doubling in that case.) */ + if (newrank != -1) { + if ((count*type_size <= 81920 ) || (count < pof2)) { + mask = 0x1; + while (mask < pof2) { + newdst = newrank ^ mask; + dst = (newdst < rem) ? newdst*2 + 1 : newdst + rem; + + UNION_MPI_Sendrecv(NULL, count, datatype, dst, -1002, NULL, count, datatype, dst, -1002, comm, NULL); + mask <<= 1; + } + } else { + /* do a reduce-scatter followed by allgather */ + /* for the reduce-scatter, calculate the count that + each process receives and the displacement within + the buffer */ + + cnts = (int*)malloc(pof2*sizeof(int)); + disps = (int*)malloc(pof2*sizeof(int)); + + for (i=0; i<(pof2-1); i++) + cnts[i] = count/pof2; + cnts[pof2-1] = count - (count/pof2)*(pof2-1); + + disps[0] = 0; + for (i=1; i>= 1; + while (mask > 0) { + newdst = newrank ^ mask; + /* find real rank of dest */ + dst = (newdst < rem) ? newdst*2 + 1 : newdst + rem; + + send_cnt = recv_cnt = 0; + if (newrank < newdst) { + if (mask != pof2/2) + last_idx = last_idx + pof2/(mask*2); + + recv_idx = send_idx + pof2/(mask*2); + for (i=send_idx; i newdst) send_idx = recv_idx; + mask >>= 1; + } + } + } + + if(rank < 2*rem) { + if(rank % 2) {/* odd */ + UNION_MPI_Send(NULL, count, datatype, rank-1, -1002, comm); + } else { + UNION_MPI_Recv(NULL, count, datatype, rank+1, -1002, comm, NULL); + } + } + + if(cnts) free(cnts); + if(disps) free(disps); +} + + +void bcast_binomial(void *buffer, + int rank, + int count, + UNION_Datatype datatype, + int root, + UNION_Comm comm) +{ + int comm_size, src, dst, relative_rank, mask; + UNION_Status status; + UNION_MPI_Comm_size(comm, &comm_size); + + relative_rank = (rank >= root) ? rank - root : rank - root + comm_size; + + mask = 0x1; + while(mask < comm_size) + { + if(relative_rank & mask) + { + src = rank - mask; + if(src < 0) src += comm_size; + UNION_MPI_Recv(buffer,count,datatype,src,-1005,comm, &status); + break; + } + mask <<= 1; + } + + mask >>=1; + while(mask > 0) + { + if(relative_rank + mask < comm_size) + { + dst = rank + mask; + if(dst >= comm_size) dst -= comm_size; + UNION_MPI_Send(buffer,count,datatype,dst,-1005,comm); + } + mask >>= 1; + } +} + +void bcast_scatter_doubling_allgather(void *buffer, + int rank, + int count, + UNION_Datatype datatype, + int root, + UNION_Comm comm) +{ + int comm_size, dst, relative_rank, mask, scatter_size, curr_size, recvcount, recv_size = 0; + UNION_Status status; + int j, k, i, tmp_mask; + int type_size, nbytes = 0; + int relative_dst, dst_tree_root, my_tree_root, send_offset, recv_offset; + + UNION_Type_size(datatype, &type_size); + UNION_MPI_Comm_size(comm, &comm_size); + + relative_rank = (rank >= root) ? rank - root : rank - root + comm_size; + + if(comm_size == 1) return; + + nbytes = type_size * count; + if(nbytes == 0) return; + + scatter_size = (nbytes + comm_size - 1)/comm_size; /* ceiling division */ + curr_size = THISMIN(scatter_size, (nbytes - (relative_rank * scatter_size))); + + if (curr_size < 0) curr_size = 0; + + mask = 0x1; + i = 0; + + while(mask < comm_size) { + relative_dst = relative_rank ^ mask; + dst = (relative_dst + root) % comm_size; + + dst_tree_root = relative_dst >> i; + dst_tree_root <<= i; + + my_tree_root = relative_rank >> i; + my_tree_root <<= i; + + send_offset = my_tree_root * scatter_size; + recv_offset = dst_tree_root * scatter_size; + + if(relative_dst < comm_size) + { + recvcount = (nbytes-recv_offset < 0 ? 0 : nbytes-recv_offset); + UNION_MPI_Sendrecv(buffer,curr_size,UNION_Byte,dst,-1005,buffer,recvcount,UNION_Byte,dst,-1005,comm,&status); + curr_size += recv_size; + } + + mask <<= 1; + i++; + } +} + +void bcast_scatter_ring_allgather(void *buffer, + int rank, + int count, + UNION_Datatype datatype, + int root, + UNION_Comm comm) +{ + int comm_size, scatter_size, j, i, nbytes, type_size; + int left, right, jnext, curr_size = 0; + int recvd_size; + UNION_Status status; + + UNION_Type_size(datatype, &type_size); + UNION_MPI_Comm_size(comm, &comm_size); + + if(comm_size == 1) return; + + nbytes = type_size * count; + if (nbytes == 0) return; + + scatter_size = (nbytes + comm_size - 1)/comm_size; /* ceiling division */ + + curr_size = THISMIN(scatter_size, nbytes - ((rank - root + comm_size) % comm_size) * scatter_size); + if(curr_size < 0) curr_size = 0; + + left = (comm_size + rank - 1) % comm_size; + right = (rank + 1) % comm_size; + j = rank; + jnext = left; + + for (i=1; i(arg); + sctx->init_data_from_workload.received = true; + sctx->init_data_from_workload.final_iteration = app_data->final_iteration; + + ABT_thread_yield_to(global_prod_thread); +} + +/* + * peer: the receiving peer id + * comm_id: the communicator id being used + * tag: tag id + * reqvc: virtual channel being used by the message (to be ignored) + * rspvc: virtual channel being used by the message (to be ignored) + * buf: the address of sender's buffer in memory + * bytes: number of bytes to be sent + * reqrt and rsprt: routing types (to be ignored) */ + +void SWM_Send(SWM_PEER peer, + SWM_COMM_ID comm_id, + SWM_TAG tag, + SWM_VC reqvc, + SWM_VC rspvc, + SWM_BUF buf, + SWM_BYTES bytes, + SWM_BYTES pktrspbytes, + SWM_ROUTING_TYPE reqrt, + SWM_ROUTING_TYPE rsprt) +{ + /* add an event in the shared queue and then yield */ + // printf("\n Sending to rank %d ", comm_id); + struct codes_workload_op wrkld_per_rank; + + wrkld_per_rank.op_type = CODES_WK_SEND; + wrkld_per_rank.u.send.tag = tag; + wrkld_per_rank.u.send.num_bytes = bytes; + wrkld_per_rank.u.send.dest_rank = peer; + +#ifdef DBG_COMM +/* if(tag != 1235 && tag != 1234) + { + auto it = send_count.find(bytes); + if(it == send_count.end()) + { + send_count.insert(std::make_pair(bytes, 1)); + } + else + { + it->second = it->second + 1; + } + }*/ +#endif + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + wrkld_per_rank.u.send.source_rank = sctx->my_rank; + sctx->fifo.push_back(&wrkld_per_rank); + + if(DBG_COMM){ + printf("\nSWM SEND src %d dst %d: %lld bytes ", sctx->my_rank, peer, + wrkld_per_rank.u.send.num_bytes); + // printf("Rank %d yield to CODES thread: %p\n", sctx->my_rank, global_prod_thread); + } + + ABT_thread_yield_to(global_prod_thread); + num_sends++; +} + +/* + * @param comm_id: communicator ID (For now, UNION_Comm_World) + * reqvc and rspvc: virtual channel IDs for request and response (ignore for + * our purpose) + * buf: buffer location for the call (ignore for our purpose) + * reqrt and rsprt: routing types, ignore and use routing from config file instead. + * */ +void SWM_Barrier( + SWM_COMM_ID comm_id, + SWM_VC reqvc, + SWM_VC rspvc, + SWM_BUF buf, + SWM_UNKNOWN auto1, + SWM_UNKNOWN2 auto2, + SWM_ROUTING_TYPE reqrt, + SWM_ROUTING_TYPE rsprt) +{ + /* Add an event in the shared queue and then yield */ +#if 0 + struct codes_workload_op wrkld_per_rank; + + wrkld_per_rank.op_type = CODES_WK_DELAY; + /* TODO: Check how to convert cycle count into delay? */ + wrkld_per_rank.u.delay.nsecs = 0.1; + +#ifdef DBG_COMM + printf("\n Barrier delay %lf ", wrkld_per_rank.u.delay.nsecs); +#endif + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + sctx->fifo.push_back(&wrkld_per_rank); + + ABT_thread_yield_to(global_prod_thread); +#endif +#ifdef DBG_COMM +// printf("\n barrier "); +#endif + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err; + int rank, size, src, dest, mask; + + err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + + rank = sctx->my_rank; + size = sctx->num_ranks; + mask = 0x1; + + while(mask < size) { + dest = (rank + mask) % size; + src = (rank - mask + size) % size; + + SWM_Sendrecv(comm_id, dest, 1234, reqvc, rspvc, 0, 0, 0, + src, 1234, 0, reqrt, rsprt); + mask <<= 1; + } + num_barriers++; +} + +void SWM_Isend(SWM_PEER peer, + SWM_COMM_ID comm_id, + SWM_TAG tag, + SWM_VC reqvc, + SWM_VC rspvc, + SWM_BUF buf, + SWM_BYTES bytes, + SWM_BYTES pktrspbytes, + uint32_t * handle, + SWM_ROUTING_TYPE reqrt, + SWM_ROUTING_TYPE rsprt) +{ + /* add an event in the shared queue and then yield */ + // printf("\n Sending to rank %d ", comm_id); + struct codes_workload_op wrkld_per_rank; + + wrkld_per_rank.op_type = CODES_WK_ISEND; + wrkld_per_rank.u.send.tag = tag; + wrkld_per_rank.u.send.num_bytes = bytes; + wrkld_per_rank.u.send.dest_rank = peer; + +#ifdef DBG_COMM +/* if(tag != 1235 && tag != 1234) + { + auto it = isend_count.find(bytes); + if(it == isend_count.end()) + { + isend_count.insert(std::make_pair(bytes, 1)); + } + else + { + it->second = it->second + 1; + } + }*/ +#endif + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + wrkld_per_rank.u.send.source_rank = sctx->my_rank; + sctx->fifo.push_back(&wrkld_per_rank); + + *handle = sctx->wait_id; + wrkld_per_rank.u.send.req_id = *handle; + sctx->wait_id++; + + if(DBG_COMM){ + printf("\nSWM ISEND src %d dst %d: %lld bytes ", sctx->my_rank, peer, + wrkld_per_rank.u.send.num_bytes); + } + + ABT_thread_yield_to(global_prod_thread); + num_isends++; +} +void SWM_Recv(SWM_PEER peer, + SWM_COMM_ID comm_id, + SWM_TAG tag, + SWM_BUF buf) +{ + /* Add an event in the shared queue and then yield */ + struct codes_workload_op wrkld_per_rank; + + wrkld_per_rank.op_type = CODES_WK_RECV; + wrkld_per_rank.u.recv.tag = tag; + wrkld_per_rank.u.recv.source_rank = peer; + wrkld_per_rank.u.recv.num_bytes = 0; + +#ifdef DBG_COMM + //printf("\n recv op tag: %d source: %d ", tag, peer); +#endif + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + wrkld_per_rank.u.recv.dest_rank = sctx->my_rank; + sctx->fifo.push_back(&wrkld_per_rank); + + if(DBG_COMM){ + printf("\nSWM RECV src %d dst %d: %lld bytes ", peer, sctx->my_rank, + wrkld_per_rank.u.recv.num_bytes); + } + + ABT_thread_yield_to(global_prod_thread); + num_recvs++; +} + +/* handle is for the request ID */ +void SWM_Irecv(SWM_PEER peer, + SWM_COMM_ID comm_id, + SWM_TAG tag, + SWM_BUF buf, + uint32_t* handle) +{ + /* Add an event in the shared queue and then yield */ + struct codes_workload_op wrkld_per_rank; + + wrkld_per_rank.op_type = CODES_WK_IRECV; + wrkld_per_rank.u.recv.tag = tag; + wrkld_per_rank.u.recv.source_rank = peer; + wrkld_per_rank.u.recv.num_bytes = 0; + + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + wrkld_per_rank.u.recv.dest_rank = sctx->my_rank; + sctx->fifo.push_back(&wrkld_per_rank); + + + *handle = sctx->wait_id; + wrkld_per_rank.u.recv.req_id = *handle; + sctx->wait_id++; + + if(DBG_COMM){ + printf("\nSWM IRECV src %d dst %d: %lld bytes ", peer, sctx->my_rank, + wrkld_per_rank.u.recv.num_bytes); + } + + ABT_thread_yield_to(global_prod_thread); + num_irecvs++; +} + +void SWM_Compute(long cycle_count) +{ + //NM: noting that cpu_frequency has been loaded in comm_online_workload_load() as GHz, e.g. cpu_freq = 2.0 means 2.0GHz + if(!cpu_freq) + cpu_freq = 2.0; + /* Add an event in the shared queue and then yield */ + struct codes_workload_op wrkld_per_rank; + + double cpu_freq_hz = cpu_freq * 1000.0 * 1000.0 * 1000.0; + double delay_in_seconds = cycle_count / cpu_freq_hz; + double delay_in_ns = delay_in_seconds * 1000.0 * 1000.0 * 1000.0; + + wrkld_per_rank.op_type = CODES_WK_DELAY; + /* TODO: Check how to convert cycle count into delay? */ + wrkld_per_rank.u.delay.nsecs = delay_in_ns; + wrkld_per_rank.u.delay.seconds = delay_in_seconds; +#ifdef DBG_COMM + // printf("\n Compute op delay: %f ", delay_in_ns); +#endif + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + sctx->fifo.push_back(&wrkld_per_rank); + + if(DBG_COMM){ + printf("\nSWM COMPUTE src %d: %lld ns ", sctx->my_rank, delay_in_ns); + } + + ABT_thread_yield_to(global_prod_thread); + +} + +void SWM_Wait(uint32_t req_id) +{ + /* Add an event in the shared queue and then yield */ + struct codes_workload_op wrkld_per_rank; + + wrkld_per_rank.op_type = CODES_WK_WAIT; + /* TODO: Check how to convert cycle count into delay? */ + wrkld_per_rank.u.wait.req_id = req_id; + +#ifdef DBG_COMM +// printf("\n wait op req_id: %"PRIu32"\n", req_id); +// printf("\n wait "); +#endif + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + sctx->fifo.push_back(&wrkld_per_rank); + + if(DBG_COMM){ + printf("\nSWM WAIT src %d ",sctx->my_rank); + } + + ABT_thread_yield_to(global_prod_thread); +} + +void SWM_Waitall(int len, uint32_t * req_ids) +{ + num_waitalls++; + /* Add an event in the shared queue and then yield */ + struct codes_workload_op wrkld_per_rank; + + wrkld_per_rank.op_type = CODES_WK_WAITALL; + /* TODO: Check how to convert cycle count into delay? */ + wrkld_per_rank.u.waits.count = len; + wrkld_per_rank.u.waits.req_ids = (unsigned int*)calloc(len, sizeof(int)); + + for(int i = 0; i < len; i++) + wrkld_per_rank.u.waits.req_ids[i] = req_ids[i]; + +#ifdef DBG_COMM +// for(int i = 0; i < len; i++) +// printf("\n wait op len %d req_id: %"PRIu32"\n", len, req_ids[i]); +#endif + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + sctx->fifo.push_back(&wrkld_per_rank); + + if(DBG_COMM){ + printf("\nSWM WAITALL src %d: count %d ",sctx->my_rank, len); + } + + ABT_thread_yield_to(global_prod_thread); +} + +void SWM_Sendrecv( + SWM_COMM_ID comm_id, + SWM_PEER sendpeer, + SWM_TAG sendtag, + SWM_VC sendreqvc, + SWM_VC sendrspvc, + SWM_BUF sendbuf, + SWM_BYTES sendbytes, + SWM_BYTES pktrspbytes, + SWM_PEER recvpeer, + SWM_TAG recvtag, + SWM_BUF recvbuf, + SWM_ROUTING_TYPE reqrt, + SWM_ROUTING_TYPE rsprt) +{ + // printf("\n Sending to %d receiving from %d ", sendpeer, recvpeer); + struct codes_workload_op send_op; + + send_op.op_type = CODES_WK_SEND; + send_op.u.send.tag = sendtag; + send_op.u.send.num_bytes = sendbytes; + send_op.u.send.dest_rank = sendpeer; + + /* Add an event in the shared queue and then yield */ + struct codes_workload_op recv_op; + + recv_op.op_type = CODES_WK_RECV; + recv_op.u.recv.tag = recvtag; + recv_op.u.recv.source_rank = recvpeer; + recv_op.u.recv.num_bytes = 0; + + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + recv_op.u.recv.dest_rank = sctx->my_rank; + send_op.u.send.source_rank = sctx->my_rank; + sctx->fifo.push_back(&send_op); + sctx->fifo.push_back(&recv_op); + + if(DBG_COMM){ + printf("\nSWM SENDRECV ssrc %d sdst %d: %d bytes; rsrc %d rdst %d: %lld bytes ", sctx->my_rank, sendpeer, + sendbytes, recvpeer, sctx->my_rank, recv_op.u.recv.num_bytes); + } + + ABT_thread_yield_to(global_prod_thread); + num_sendrecv++; +} + +/* @param count: number of bytes in Allreduce + * @param respbytes: number of bytes to be sent in response (ignore for our + * purpose) + * $params comm_id: communicator ID (MPI_COMM_WORLD for our case) + * @param sendreqvc: virtual channel of the sender request (ignore for our + * purpose) + * @param sendrspvc: virtual channel of the response request (ignore for our + * purpose) + * @param sendbuf and rcvbuf: buffers for send and receive calls (ignore for + * our purpose) */ +void SWM_Allreduce( + SWM_BYTES count, + SWM_BYTES respbytes, + SWM_COMM_ID comm_id, + SWM_VC sendreqvc, + SWM_VC sendrspvc, + SWM_BUF sendbuf, + SWM_BUF rcvbuf) +{ +#if 0 + /* TODO: For now, simulate a constant delay for ALlreduce*/ + // printf("\n Allreduce bytes %d ", bytes); + /* Add an event in the shared queue and then yield */ + struct codes_workload_op wrkld_per_rank; + + wrkld_per_rank.op_type = CODES_WK_DELAY; + /* TODO: Check how to convert cycle count into delay? */ + wrkld_per_rank.u.delay.nsecs = bytes + 0.1; + +#ifdef DBG_COMM + printf("\n Allreduce delay %lf ", wrkld_per_rank.u.delay.nsecs); +#endif + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + sctx->fifo.push_back(&wrkld_per_rank); + + ABT_thread_yield_to(global_prod_thread); +#endif + +#ifdef DBG_COMM + /* + auto it = allreduce_count.find(count); + if(it == allreduce_count.end()) + { + allreduce_count.insert(std::make_pair(count, 1)); + } + else + { + it->second = it->second + 1; + } + */ +#endif + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + + int comm_size, i, send_idx, recv_idx, last_idx, send_cnt, recv_cnt; + int pof2, mask, rem, newrank, newdst, dst, *cnts, *disps; + int rank = sctx->my_rank; + comm_size = sctx->num_ranks; + + cnts = disps = NULL; + + pof2 = 1; + while (pof2 <= comm_size) pof2 <<= 1; + pof2 >>=1; + + rem = comm_size - pof2; + + /* In the non-power-of-two case, all even-numbered + processes of rank < 2*rem send their data to + (rank+1). These even-numbered processes no longer + participate in the algorithm until the very end. The + remaining processes form a nice power-of-two. */ + if (rank < 2*rem) { + if (rank % 2 == 0) { /* even */ + SWM_Send(rank+1, comm_id, 1235, sendreqvc, sendrspvc, 0, count, 1, 0, 0); + newrank = -1; + } else { /* odd */ + SWM_Recv(rank-1, comm_id, 1235, 0); + newrank = rank / 2; + } + } else { + newrank = rank - rem; + } + + /* If op is user-defined or count is less than pof2, use + recursive doubling algorithm. Otherwise do a reduce-scatter + followed by allgather. (If op is user-defined, + derived datatypes are allowed and the user could pass basic + datatypes on one process and derived on another as long as + the type maps are the same. Breaking up derived + datatypes to do the reduce-scatter is tricky, therefore + using recursive doubling in that case.) */ + if (newrank != -1) { + if ((count <= ALLREDUCE_SHORT_MSG_SIZE) || (count < pof2)) { + + mask = 0x1; + while (mask < pof2) { + newdst = newrank ^ mask; + dst = (newdst < rem) ? newdst*2 + 1 : newdst + rem; + + SWM_Sendrecv(comm_id, dst, 1235, sendreqvc, sendrspvc, 0, + count, 1, dst, 1235, 0, 0, 0); + + mask <<= 1; + } + } else { + /* do a reduce-scatter followed by allgather */ + /* for the reduce-scatter, calculate the count that + each process receives and the displacement within + the buffer */ + + cnts = (int*)malloc(pof2*sizeof(int)); + disps = (int*)malloc(pof2*sizeof(int)); + + for (i=0; i<(pof2-1); i++) + cnts[i] = count/pof2; + cnts[pof2-1] = count - (count/pof2)*(pof2-1); + + disps[0] = 0; + for (i=1; i>= 1; + while (mask > 0) { + newdst = newrank ^ mask; + /* find real rank of dest */ + dst = (newdst < rem) ? newdst*2 + 1 : newdst + rem; + + send_cnt = recv_cnt = 0; + if (newrank < newdst) { + if (mask != pof2/2) + last_idx = last_idx + pof2/(mask*2); + + recv_idx = send_idx + pof2/(mask*2); + for (i=send_idx; i newdst) send_idx = recv_idx; + + mask >>= 1; + } + } + } + + if(rank < 2*rem) { + if(rank % 2) {/* odd */ + SWM_Send(rank-1, comm_id, 1235, sendreqvc, sendrspvc, 0, count, 1, 0, 0); + } else { + SWM_Recv(rank+1, comm_id, 1235, 0); + } + } + + if(cnts) free(cnts); + if(disps) free(disps); + + num_allreduce++; +} + +void SWM_Allreduce( + SWM_BYTES bytes, + SWM_BYTES respbytes, + SWM_COMM_ID comm_id, + SWM_VC sendreqvc, + SWM_VC sendrspvc, + SWM_BUF sendbuf, + SWM_BUF rcvbuf, + SWM_UNKNOWN auto1, + SWM_UNKNOWN2 auto2, + SWM_ROUTING_TYPE reqrt, + SWM_ROUTING_TYPE rsprt) +{ + SWM_Allreduce(bytes, respbytes, comm_id, sendreqvc, sendrspvc, sendbuf, rcvbuf); +} + +void SWM_Finalize() +{ + /* Add an event in the shared queue and then yield */ + struct codes_workload_op wrkld_per_rank; + + wrkld_per_rank.op_type = CODES_WK_END; + + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + sctx->fifo.push_back(&wrkld_per_rank); + + if(DBG_COMM){ + /* + auto it = allreduce_count.begin(); + for(; it != allreduce_count.end(); it++) + { + cout << "\n Allreduce " << it->first << " " << it->second; + } + + it = send_count.begin(); + for(; it != send_count.end(); it++) + { + cout << "\n Send " << it->first << " " << it->second; + } + + it = isend_count.begin(); + for(; it != isend_count.end(); it++) + { + cout << "\n isend " << it->first << " " << it->second; + }*/ + printf("\nSWM FINALIZE src %d ", sctx->my_rank); + printf("\nnum_sends %ld num_recvs %ld num_isends %ld num_irecvs %ld num_allreduce %ld num_barrier %ld num_waitalls %ld\n", + num_sends, num_recvs, num_isends, num_irecvs, num_allreduce, num_barriers, num_waitalls); + } + ABT_thread_yield_to(global_prod_thread); +} + +void SWM_Mark_Iteration(SWM_TAG iter_tag) +{ + /* Add an event in the shared queue and then yield */ + struct codes_workload_op wrkld_per_rank; + + wrkld_per_rank.op_type = CODES_WK_MARK; + wrkld_per_rank.u.send.tag = iter_tag; + + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + wrkld_per_rank.u.send.source_rank = sctx->my_rank; + sctx->fifo.push_back(&wrkld_per_rank); + + if(DBG_COMM){ + printf("\nSWM MARKITERATION src %d ", sctx->my_rank); + } + + ABT_thread_yield_to(global_prod_thread); +} + +//#endif + + +static int hash_rank_compare(void *key, struct qhash_head *link) +{ + rank_mpi_compare *in = (rank_mpi_compare*)key; + rank_mpi_context *tmp; + + tmp = qhash_entry(link, rank_mpi_context, hash_link); + if (tmp->sctx.my_rank == in->rank && tmp->app_id == in->app_id) + return 1; + return 0; +} + +static void workload_caller(void * arg) +{ + shared_context* sctx = static_cast(arg); + + // printf("\n workload name %s ", sctx->workload_name); + if(strncmp(sctx->workload_name, "conceptual", 10) == 0) + { + union_bench_param * conc_params = static_cast (sctx->conc_params); + // printf("program: %s\n",conc_params->conc_program); + // printf("argc: %d\n",conc_params->conc_argc); + int i; + for (i=0; iconc_argc; i++){ + conc_params->conc_argv[i] = conc_params->config_in[i]; + } + // conc_params->argv = &conc_params->conc_argv; + if(DBG_LINKING) + { + printf("\nLoad Union Benchmark: %s: %s", conc_params->conc_program, conc_params->conc_argv[1]); + } + union_conc_bench_load(conc_params->conc_program, + conc_params->conc_argc, + conc_params->conc_argv); + } else if(strcmp(sctx->workload_name, "lammps") == 0) + { + LAMMPS_SWM * lammps_swm = static_cast(sctx->swm_obj); + lammps_swm->call(); + } + else if(strcmp(sctx->workload_name, "nekbone") == 0) + { + NEKBONESWMUserCode * nekbone_swm = static_cast(sctx->swm_obj); + nekbone_swm->call(); + } + else if(strcmp(sctx->workload_name, "milc") == 0) + { + MilcSWMUserCode * milc_swm = static_cast(sctx->swm_obj); + milc_swm->call(); + } + else if(strcmp(sctx->workload_name, "nearest_neighbor") == 0) + { + NearestNeighborSWMUserCode * nn_swm = static_cast(sctx->swm_obj); + nn_swm->call(); + } + else if(strcmp(sctx->workload_name, "incast") == 0 || strcmp(sctx->workload_name, "incast1") == 0 || strcmp(sctx->workload_name, "incast2") == 0) + { + AllToOneSWMUserCode * incast_swm = static_cast(sctx->swm_obj); + incast_swm->call(); + } +} + +static void determine_workload_paths(online_comm_params const * o_params, string& swm_path, string& conc_path, bool& isconc) +{ + /* First check if custom JSON path is provided through file_path parameter */ + if(strlen(o_params->file_path) > 0) { + if(strncmp(o_params->workload_name, "conceptual", 10) == 0) { + conc_path.append(o_params->file_path); + isconc = 1; + } else { + swm_path.append(o_params->file_path); + } + return; + } + + /* Fall back to hardcoded paths */ + swm_path.append(SWM_DATAROOTDIR); + if(strcmp(o_params->workload_name, "lammps") == 0) { + swm_path.append("/lammps_workload.json"); + } else if(strcmp(o_params->workload_name, "nekbone") == 0) { + swm_path.append("/workload.json"); + } else if(strcmp(o_params->workload_name, "milc") == 0) { + swm_path.append("/milc_skeleton.json"); + } else if(strcmp(o_params->workload_name, "nearest_neighbor") == 0) { + swm_path.append("/skeleton.json"); + } else if(strcmp(o_params->workload_name, "incast") == 0) { + swm_path.append("/incast.json"); + } else if(strcmp(o_params->workload_name, "incast1") == 0) { + swm_path.append("/incast1.json"); + } else if(strcmp(o_params->workload_name, "incast2") == 0) { + swm_path.append("/incast2.json"); + } else if(strncmp(o_params->workload_name, "conceptual", 10) == 0) { + conc_path.append(UNION_DATADIR); + conc_path.append("/conceptual.json"); + isconc = 1; + } else { + tw_error(TW_LOC, "\n Undefined workload type %s ", o_params->workload_name); + } +} + +static int comm_online_workload_load(const void * params, int app_id, int rank) +{ + /* LOAD parameters from JSON file*/ + online_comm_params * o_params = (online_comm_params*)params; + int nprocs = o_params->nprocs; + + rank_mpi_context *my_ctx = new rank_mpi_context; + //my_ctx = (rank_mpi_context*)caloc(1, sizeof(rank_mpi_context)); + assert(my_ctx); + my_ctx->sctx.my_rank = rank; + my_ctx->sctx.num_ranks = nprocs; + my_ctx->sctx.wait_id = 0; + my_ctx->app_id = app_id; + my_ctx->sctx.init_data_from_workload.received = false; + + // printf("my_ctx nprocs %d\n", my_ctx->sctx.num_ranks); + + void** generic_ptrs; + int array_len = 1; + generic_ptrs = (void**)calloc(array_len, sizeof(void*)); + generic_ptrs[0] = (void*)&rank; + + strcpy(my_ctx->sctx.workload_name, o_params->workload_name); + boost::property_tree::ptree root, child; + string swm_path, conc_path; + bool isconc=0; + + // printf("workload name: %s\n", o_params->workload_name); + determine_workload_paths(o_params, swm_path, conc_path, isconc); + + // printf("\nUnion jason path %s\n", conc_path.c_str()); + if(isconc){ + try { + std::ifstream jsonFile(conc_path.c_str()); + boost::property_tree::json_parser::read_json(jsonFile, root); + + // printf("workload_name: %s\n", o_params->workload_name); + union_bench_param *tmp_params = (union_bench_param *) calloc(1, sizeof(union_bench_param)); + child = root.get_child(&o_params->workload_name[11]); + + // if we were given a path, we read the type of workload from the config + bool const has_path = o_params->file_path[0] != '\0'; + if (has_path) { + strcpy(tmp_params->conc_program, child.get_child("argv").begin()->second.data().c_str()); + } else { + strcpy(tmp_params->conc_program, &o_params->workload_name[11]); + } + + tmp_params->conc_argc = child.get("argc"); + int i = 0; + BOOST_FOREACH(boost::property_tree::ptree::value_type &v, child.get_child("argv")) + { + assert(v.first.empty()); // array elements have no names + // tmp_params->conc_argv[i] = (char *) v.second.data().c_str(); + strcpy(tmp_params->config_in[i], v.second.data().c_str()); + i += 1; + } + my_ctx->sctx.conc_params = (void*) tmp_params; + my_ctx->sctx.isconc = 1; + } + catch(std::exception & e) + { + printf("Exception when reading UNION/Conceptual json config %s: %s\n", conc_path.c_str(), e.what()); + return -1; + } + } + else { + try { + std::ifstream jsonFile(swm_path.c_str()); + boost::property_tree::json_parser::read_json(jsonFile, root); + cpu_freq = root.get("jobs.cfg.cpu_freq") / 1e9; + + // if we were given a path, we read the type of workload from the config + bool const has_path = o_params->file_path[0] != '\0'; + if (has_path) { + strcpy(o_params->workload_name, root.get("jobs.cfg.app").c_str()); + strcpy(my_ctx->sctx.workload_name, o_params->workload_name); + } + } + catch(std::exception & e) + { + printf("Exception when reading SWM json config %s: %s\n", swm_path.c_str(), e.what()); + return -1; + } + my_ctx->sctx.isconc = 0; + if(strcmp(o_params->workload_name, "lammps") == 0) + { + LAMMPS_SWM * lammps_swm = new LAMMPS_SWM(root, generic_ptrs); + my_ctx->sctx.swm_obj = (void*)lammps_swm; + } + else if(strcmp(o_params->workload_name, "nekbone") == 0) + { + NEKBONESWMUserCode * nekbone_swm = new NEKBONESWMUserCode(root, generic_ptrs); + my_ctx->sctx.swm_obj = (void*)nekbone_swm; + } + else if(strcmp(o_params->workload_name, "milc") == 0) + { + MilcSWMUserCode * milc_swm = new MilcSWMUserCode(root, generic_ptrs); + my_ctx->sctx.swm_obj = (void*)milc_swm; + } + else if(strcmp(o_params->workload_name, "nearest_neighbor") == 0) + { + NearestNeighborSWMUserCode * nn_swm = new NearestNeighborSWMUserCode(root, generic_ptrs); + my_ctx->sctx.swm_obj = (void*)nn_swm; + } + else if(strcmp(o_params->workload_name, "incast") == 0 || strcmp(o_params->workload_name, "incast1") == 0 || strcmp(o_params->workload_name, "incast2") == 0) + { + AllToOneSWMUserCode * incast_swm = new AllToOneSWMUserCode(root, generic_ptrs); + my_ctx->sctx.swm_obj = (void*)incast_swm; + } + } + + if(global_prod_thread == NULL) + { + ABT_xstream_self(&self_es); + ABT_thread_self(&global_prod_thread); + } + int rcode = ABT_thread_create_on_xstream(self_es, + &workload_caller, (void*)&(my_ctx->sctx), + ABT_THREAD_ATTR_NULL, &(my_ctx->sctx.producer)); + + // Running thread that we just spawn until the producer adds an OP to FIFO or SWM_Mark_total_iterations is called. We use SWM_Mark_total_iterations in order to pass information into CODES from the SWM app. + while(my_ctx->sctx.fifo.empty() && !my_ctx->sctx.init_data_from_workload.received) + { + ABT_thread_yield_to(my_ctx->sctx.producer); + } + + if(DBG_LINKING) + { + printf("\nRank %d create app thread? %d", rank, rcode); + } + rank_mpi_compare cmp; + cmp.app_id = app_id; + cmp.rank = rank; + + if(!rank_tbl) + { + rank_tbl = qhash_init(hash_rank_compare, quickhash_64bit_hash, nprocs); + if(!rank_tbl) + return -1; + } + qhash_add(rank_tbl, &cmp, &(my_ctx->hash_link)); + rank_tbl_pop++; + + return 0; +} + +static void comm_online_workload_get_next(int app_id, int rank, struct codes_workload_op * op) +{ + /* At this point, we will use the "call" function. The send/receive/wait + * definitions will be replaced by our own function definitions that will do a + * yield to argobots if an event is not available. */ + /* if shared queue is empty then yield */ + + rank_mpi_context * temp_data; + struct qhash_head * hash_link = NULL; + rank_mpi_compare cmp; + cmp.rank = rank; + cmp.app_id = app_id; + hash_link = qhash_search(rank_tbl, &cmp); + if(!hash_link) + { + printf("\n not found for rank id %d , %d", rank, app_id); + op->op_type = CODES_WK_END; + return; + } + temp_data = qhash_entry(hash_link, rank_mpi_context, hash_link); + assert(temp_data); + while(temp_data->sctx.fifo.empty()) + { + if(DBG_COMM){ + // void * arg; + // int err = ABT_thread_get_arg(temp_data->sctx.producer, &arg); + // assert(err == ABT_SUCCESS); + // struct shared_context * sctx = static_cast(arg); + printf("\nFIFO que empty, yield to rank %d ", rank); + } + int rc = ABT_thread_yield_to(temp_data->sctx.producer); + } + struct codes_workload_op * front_op = temp_data->sctx.fifo.front(); + assert(front_op); + if(DBG_COMM) + { + switch(front_op->op_type) + { + case CODES_WK_ISEND: printf("\nFIFO pop operation ISEND src %d ", rank); + case CODES_WK_SEND: printf("\nFIFO pop operation SEND src %d ", rank); + case CODES_WK_RECV: printf("\nFIFO pop operation RECV src %d ", rank); + case CODES_WK_IRECV: printf("\nFIFO pop operation IRECV src %d ", rank); + case CODES_WK_DELAY: printf("\nFIFO pop operation COMPUTE src %d ", rank); + case CODES_WK_WAIT: printf("\nFIFO pop operation WAIT src %d ", rank); + case CODES_WK_WAITALL: printf("\nFIFO pop operation WAITALL src %d ", rank); + } + } + *op = *front_op; + temp_data->sctx.fifo.pop_front(); + return; +} +static int comm_online_workload_get_rank_cnt(const char *params, int app_id) +{ + online_comm_params * o_params = (online_comm_params*)params; + int nprocs = o_params->nprocs; + return nprocs; +} + +static int comm_online_workload_finalize(const char* params, int app_id, int rank) +{ + // printf("Rank %d: Finalize workload for app %d\n", rank, app_id); + rank_mpi_context * temp_data; + struct qhash_head * hash_link = NULL; + rank_mpi_compare cmp; + cmp.rank = rank; + cmp.app_id = app_id; + hash_link = qhash_search(rank_tbl, &cmp); + if(!hash_link) + { + printf("\n not found for rank id %d , %d ", rank, app_id); + return -1; + } + temp_data = qhash_entry(hash_link, rank_mpi_context, hash_link); + assert(temp_data); + + int rc; + rc = ABT_thread_join(temp_data->sctx.producer); + // printf("thread terminate rc=%d\n", rc); + rc = ABT_thread_free(&(temp_data->sctx.producer)); + // printf("thread free rc=%d\n", rc); + if (temp_data->sctx.isconc){ + // printf("free conceptual params\n"); + free(temp_data->sctx.conc_params); + } + return 0; +} + +static int comm_online_workload_get_final_iteration(int app_id, int rank) { + rank_mpi_compare cmp; + cmp.app_id = app_id; + cmp.rank = rank; + + struct qhash_head * hash_link = qhash_search(rank_tbl, &cmp); + if(!hash_link) + { + printf("Workload/job not found for rank id %d, and app_id %d\n", rank, app_id); + return -1; + } + rank_mpi_context * ctx = qhash_entry(hash_link, rank_mpi_context, hash_link); + if (ctx->sctx.init_data_from_workload.received) { + return ctx->sctx.init_data_from_workload.final_iteration; + } + return -1; +} + +extern "C" { +/* workload method name and function pointers for the CODES workload API */ +struct codes_workload_method conc_online_comm_workload_method = +{ + //.method_name = + (char*)"conc_online_comm_workload", + //.codes_workload_read_config = + NULL, + //.codes_workload_load = + comm_online_workload_load, + //.codes_workload_get_next = + comm_online_workload_get_next, + // .codes_workload_get_next_rc2 = + NULL, + // .codes_workload_get_rank_cnt + comm_online_workload_get_rank_cnt, + // .codes_workload_finalize = + comm_online_workload_finalize, + // .codes_workload_get_time = + NULL, + // .codes_workload_get_final_iteration = + comm_online_workload_get_final_iteration, +}; +} // closing brace for extern "C" + diff --git a/src/workload/methods/codes-darshan3-io-wrkld.c b/src/workload/methods/codes-darshan3-io-wrkld.c index 9e0d60d6..5fc924c1 100644 --- a/src/workload/methods/codes-darshan3-io-wrkld.c +++ b/src/workload/methods/codes-darshan3-io-wrkld.c @@ -53,7 +53,7 @@ static void * darshan_io_workload_read_config( char const * annotation, int num_ranks); /* Darshan workload generator's implementation of the CODES workload API */ -static int darshan_psx_io_workload_load(const char *params, int app_id, int rank); +static int darshan_psx_io_workload_load(const void *params, int app_id, int rank); static void darshan_psx_io_workload_get_next(int app_id, int rank, struct codes_workload_op *op); static int darshan_psx_io_workload_get_rank_cnt(const char *params, int app_id); static int darshan_rank_hash_compare(void *key, struct qhash_head *link); @@ -179,7 +179,7 @@ static int darshan_psx_io_workload_get_time(const char *params, int app_id, int } /* load the workload generator for this rank, given input params */ -static int darshan_psx_io_workload_load(const char *params, int app_id, int rank) +static int darshan_psx_io_workload_load(const void *params, int app_id, int rank) { darshan_params *d_params = (darshan_params *)params; darshan_fd logfile_fd = NULL; diff --git a/src/workload/methods/codes-dumpi-trace-nw-wrkld.c b/src/workload/methods/codes-dumpi-trace-nw-wrkld.c index f32291da..e6710f52 100644 --- a/src/workload/methods/codes-dumpi-trace-nw-wrkld.c +++ b/src/workload/methods/codes-dumpi-trace-nw-wrkld.c @@ -119,7 +119,7 @@ static inline double time_to_ns_lf(dumpi_clock t){ }*/ /* load the trace */ -static int dumpi_trace_nw_workload_load(const char* params, int app_id, int rank); +static int dumpi_trace_nw_workload_load(const void* params, int app_id, int rank); /* dumpi implementation of get next operation in the workload */ static void dumpi_trace_nw_workload_get_next(int app_id, int rank, struct codes_workload_op *op); @@ -770,7 +770,7 @@ static int hash_rank_compare(void *key, struct qhash_head *link) return 0; } -int dumpi_trace_nw_workload_load(const char* params, int app_id, int rank) +int dumpi_trace_nw_workload_load(const void* params, int app_id, int rank) { libundumpi_callbacks callbacks; libundumpi_cbpair callarr[DUMPI_END_OF_STREAM]; diff --git a/src/workload/methods/codes-iolang-wrkld.c b/src/workload/methods/codes-iolang-wrkld.c index 45c647b1..c4920571 100644 --- a/src/workload/methods/codes-iolang-wrkld.c +++ b/src/workload/methods/codes-iolang-wrkld.c @@ -27,7 +27,7 @@ static void * iolang_io_workload_read_config( int num_ranks); /* load the workload file */ -static int iolang_io_workload_load(const char* params, int app_id, int rank); +static int iolang_io_workload_load(const void* params, int app_id, int rank); /* get next operation */ static void iolang_io_workload_get_next(int app_id, int rank, struct codes_workload_op *op); @@ -87,7 +87,7 @@ static void * iolang_io_workload_read_config( } /* loads the workload file for each simulated MPI rank/ compute node LP */ -int iolang_io_workload_load(const char* params, int app_id, int rank) +int iolang_io_workload_load(const void* params, int app_id, int rank) { int t = -1; iolang_params* i_param = (struct iolang_params*)params; diff --git a/src/workload/methods/codes-iomock-wrkld.c b/src/workload/methods/codes-iomock-wrkld.c index 4c9ebc80..90ee6a99 100644 --- a/src/workload/methods/codes-iomock-wrkld.c +++ b/src/workload/methods/codes-iomock-wrkld.c @@ -132,7 +132,7 @@ static void * iomock_workload_read_config( } /* load the workload file */ -static int iomock_workload_load(const char* params, int app_id, int rank) +static int iomock_workload_load(const void* params, int app_id, int rank) { iomock_params const * p = (iomock_params const *) params; diff --git a/src/workload/methods/codes-online-comm-wrkld.C b/src/workload/methods/codes-online-comm-wrkld.C index c9e87406..8d783403 100644 --- a/src/workload/methods/codes-online-comm-wrkld.C +++ b/src/workload/methods/codes-online-comm-wrkld.C @@ -29,7 +29,7 @@ #include "milc_swm_user_code.h" #include "allreduce.h" #include "periodic_aggressor.h" -// #include "abt.h" +#include "abt.h" #include "layered_allbroadcast.h" #define ALLREDUCE_SHORT_MSG_SIZE 2048 @@ -66,6 +66,10 @@ struct shared_context { void * swm_obj; ABT_thread producer; std::deque fifo; + struct { + bool received; + int final_iteration; + } init_data_from_workload; }; struct rank_mpi_context { @@ -79,6 +83,21 @@ typedef struct rank_mpi_compare { int rank; } rank_mpi_compare; +void SWM_Pass_app_data(struct swm_app_data *app_data) { + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + sctx->init_data_from_workload.received = true; + sctx->init_data_from_workload.final_iteration = app_data->final_iteration; + + ABT_thread_yield_to(global_prod_thread); +} + /* * peer: the receiving peer id * comm_id: the communicator id being used @@ -933,7 +952,7 @@ string get_default_path(online_comm_params * o_params) } -static int comm_online_workload_load(const char * params, int app_id, int rank) +static int comm_online_workload_load(const void * params, int app_id, int rank) { /* LOAD parameters from JSON file*/ online_comm_params * o_params = (online_comm_params*)params; @@ -946,6 +965,7 @@ static int comm_online_workload_load(const char * params, int app_id, int rank) my_ctx->sctx.num_ranks = nprocs; my_ctx->sctx.wait_id = 0; my_ctx->app_id = app_id; + my_ctx->sctx.init_data_from_workload.received = false; void** generic_ptrs; int array_len = 1; @@ -1038,6 +1058,12 @@ static int comm_online_workload_load(const char * params, int app_id, int rank) &workload_caller, (void*)&(my_ctx->sctx), ABT_THREAD_ATTR_NULL, &(my_ctx->sctx.producer)); + // Running thread that we just spawn until the producer adds an OP to FIFO or SWM_Mark_total_iterations is called. We use SWM_Mark_total_iterations in order to pass information into CODES from the SWM app. + while(my_ctx->sctx.fifo.empty() && !my_ctx->sctx.init_data_from_workload.received) + { + ABT_thread_yield_to(my_ctx->sctx.producer); + } + rank_mpi_compare cmp; cmp.app_id = app_id; cmp.rank = rank; @@ -1112,12 +1138,31 @@ static int comm_online_workload_finalize(const char* params, int app_id, int ran ABT_thread_free(&(temp_data->sctx.producer)); return 0; } + +static int comm_online_workload_get_final_iteration(int app_id, int rank) { + rank_mpi_compare cmp; + cmp.app_id = app_id; + cmp.rank = rank; + + struct qhash_head * hash_link = qhash_search(rank_tbl, &cmp); + if(!hash_link) + { + printf("Workload/job not found for rank id %d, and app_id %d\n", rank, app_id); + return -1; + } + rank_mpi_context * ctx = qhash_entry(hash_link, rank_mpi_context, hash_link); + if (ctx->sctx.init_data_from_workload.received) { + return ctx->sctx.init_data_from_workload.final_iteration; + } + return -1; +} + extern "C" { /* workload method name and function pointers for the CODES workload API */ -struct codes_workload_method online_comm_workload_method = +struct codes_workload_method swm_online_comm_workload_method = { //.method_name = - (char*)"online_comm_workload", + (char*)"swm_online_comm_workload", //.codes_workload_read_config = NULL, //.codes_workload_load = @@ -1129,7 +1174,11 @@ struct codes_workload_method online_comm_workload_method = // .codes_workload_get_rank_cnt comm_online_workload_get_rank_cnt, // .codes_workload_finalize = - comm_online_workload_finalize + comm_online_workload_finalize, + // .codes_workload_get_time = + NULL, + // .codes_workload_get_final_iteration + comm_online_workload_get_final_iteration, }; } // closing brace for extern "C" diff --git a/src/workload/methods/codes-recorder-io-wrkld.c b/src/workload/methods/codes-recorder-io-wrkld.c index 0f88408e..d6c76a6b 100644 --- a/src/workload/methods/codes-recorder-io-wrkld.c +++ b/src/workload/methods/codes-recorder-io-wrkld.c @@ -53,7 +53,7 @@ struct rank_traces_context }; /* CODES workload API functions for workloads generated from recorder traces*/ -static int recorder_io_workload_load(const char *params, int app_id, int rank); +static int recorder_io_workload_load(const void *params, int app_id, int rank); static void recorder_io_workload_get_next(int app_id, int rank, struct codes_workload_op *op); /* helper functions for recorder workload CODES API */ @@ -73,7 +73,7 @@ static struct qhash_table *rank_tbl = NULL; static int rank_tbl_pop = 0; /* load the workload generator for this rank, given input params */ -static int recorder_io_workload_load(const char *params, int app_id, int rank) +static int recorder_io_workload_load(const void *params, int app_id, int rank) { recorder_params *r_params = (recorder_params *) params; struct rank_traces_context *newv = NULL; diff --git a/src/workload/methods/test-workload-method.c b/src/workload/methods/test-workload-method.c index 5081d1c3..c0659982 100644 --- a/src/workload/methods/test-workload-method.c +++ b/src/workload/methods/test-workload-method.c @@ -14,7 +14,7 @@ #include "ross.h" #include "codes/codes-workload.h" -static int test_workload_load(const char* params, int app_id, int rank); +static int test_workload_load(const void* params, int app_id, int rank); static void test_workload_get_next(int app_id, int rank, struct codes_workload_op *op); /* state information for each rank that is retrieving requests */ @@ -38,7 +38,7 @@ struct codes_workload_method test_workload_method = .codes_workload_get_next = test_workload_get_next, }; -static int test_workload_load(const char* params, int app_id, int rank) +static int test_workload_load(const void* params, int app_id, int rank) { /* no params in this case; this example will work with any number of * ranks diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 00000000..6390965c --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,116 @@ +enable_testing() + +configure_file(run-test.sh.in run-test.sh) + +include_directories("${ROSS_INCLUDE_DIRS}" "${CODES_SOURCE_DIR}") + +# Unfortunatelly, CMake doesn't support iteration of a key-pair structure, +# otherwise the following lists could be easily compressed into a single +# list/dictionary/structure. Instead each C file name **MUST** match each +# binary file name. This would be handled differently if maintaining the +# autoconfig build was not a **MUST** (in which case, the name of the file and +# its binary should match, leaving us with a single list!) +set(test-c-files + lp-io-test.c + mapping_test.c + jobmap-test.c + map-ctx-test.c + modelnet-prio-sched-test.c + modelnet-test-dragonfly.c + modelnet-test.c + resource-test.c + rc-stack-test.c + modelnet-p2p-bw.c + modelnet-simplep2p-test.c + local-storage-model-test.c + ) + +set(test-bin-names + lp-io-test + mapping_test + jobmap-test + map-ctx-test + modelnet-prio-sched-test + modelnet-test-dragonfly + modelnet-test + resource-test + rc-stack-test + modelnet-p2p-bw + modelnet-simplep2p-test + lsm-test + ) + +foreach(testname cfile IN ZIP_LISTS test-bin-names test-c-files) + add_executable(${testname} ${cfile}) + target_link_libraries(${testname} PUBLIC codes) +endforeach() + +# Additional binary which is not self contained +add_executable(codes-workload-test + workload/codes-workload-test.c + workload/codes-workload-test-svr-lp.c + workload/codes-workload-test-cn-lp.c + ) +target_link_libraries(codes-workload-test PUBLIC codes) + +# Tests are also not consistent with the files to compile, but +# that's ok, there are more tests than binary files +set(test-shell-files + # All binaries have an associated sh test script + lp-io-test.sh + mapping_test.sh + jobmap-test.sh + map-ctx-test.sh + modelnet-prio-sched-test.sh + modelnet-test-dragonfly.sh + modelnet-test.sh + resource-test.sh + modelnet-p2p-bw-loggp.sh + modelnet-simplep2p-test.sh + rc-stack-test.sh + lsm-test.sh + # These tests correspond to archived models / no binaries are being generated by CMake + #modelnet-test-dragonfly-custom-synthetic.sh + #modelnet-test-dragonfly-dally-synthetic.sh + #modelnet-test-dragonfly-plus-synthetic.sh + modelnet-test-torus.sh + modelnet-test-em.sh + modelnet-test-loggp.sh + modelnet-test-slimfly.sh + # These tests are not supported by the makefile/autoconf machinery either + #modelnet-test-dragonfly-custom-traces.sh + #modelnet-test-dragonfly-traces.sh + #modelnet-test-slimfly-traces.sh + #modelnet-test-torus-traces.sh + modelnet-test-dragonfly-synthetic.sh + modelnet-test-fattree-synthetic.sh + modelnet-test-slimfly-synthetic.sh + workload/codes-workload-test.sh + example-ping-pong-determinism.sh + example-ping-pong-surrogate-1.sh + example-ping-pong-surrogate-2.sh + example-ping-pong-surrogate-3.sh + example-ping-pong-no-logging.sh + example-ping-pong-surrogate-determinism-1.sh + example-ping-pong-surrogate-determinism-2.sh + ) + +configure_file(conf/union-milc-jacobi-workload/dfdally-72-par.conf.in conf/union-milc-jacobi-workload/dfdally-72-par.conf.in @ONLY) + +if(USE_UNION) + list(APPEND test-shell-files + union-workload-test-surrogate.sh + union-workload-test-surrogate-smaller-chunk-size.sh + union-workload-test-surrogate-parallel.sh + union-workload-test-surrogate-parallel-deterministic-1.sh + union-workload-test-surrogate-parallel-deterministic-2.sh + union-workload-test-surrogate-parallel-deterministic-3.sh + union-workload-test-surrogate-parallel-deterministic-4.sh + ) +endif() + +foreach(testname ${test-shell-files}) + add_test(NAME ${testname} + COMMAND "${CMAKE_CURRENT_BINARY_DIR}/run-test.sh" "${CMAKE_CURRENT_SOURCE_DIR}/${testname}" + WORKING_DIRECTORY "${CODES_BINARY_DIR}") +endforeach() diff --git a/tests/conf/modelnet-p2p-bw-loggp.conf b/tests/conf/modelnet-p2p-bw-loggp.conf index 93da757b..f4b8030b 100644 --- a/tests/conf/modelnet-p2p-bw-loggp.conf +++ b/tests/conf/modelnet-p2p-bw-loggp.conf @@ -10,7 +10,7 @@ LPGROUPS PARAMS { packet_size="2147483648"; - message_size="384"; + message_size="448"; modelnet_order=( "loggp" ); # scheduler options modelnet_scheduler="fcfs"; diff --git a/tests/conf/modelnet-prio-sched-test.conf b/tests/conf/modelnet-prio-sched-test.conf index b6cdd9b2..3a5dc7f6 100644 --- a/tests/conf/modelnet-prio-sched-test.conf +++ b/tests/conf/modelnet-prio-sched-test.conf @@ -10,7 +10,7 @@ LPGROUPS PARAMS { packet_size="512"; - message_size="416"; + message_size="480"; modelnet_order=( "simplenet" ); # scheduler options modelnet_scheduler="priority"; diff --git a/tests/conf/modelnet-test-dragonfly.conf b/tests/conf/modelnet-test-dragonfly.conf index 3c7e9a3b..bf9ee298 100644 --- a/tests/conf/modelnet-test-dragonfly.conf +++ b/tests/conf/modelnet-test-dragonfly.conf @@ -23,6 +23,6 @@ PARAMS local_bandwidth="5.25"; global_bandwidth="4.7"; cn_bandwidth="5.25"; - message_size="400"; + message_size="464"; routing="nonminimal"; } diff --git a/tests/conf/modelnet-test-loggp.conf b/tests/conf/modelnet-test-loggp.conf index 139641fb..a7431e33 100644 --- a/tests/conf/modelnet-test-loggp.conf +++ b/tests/conf/modelnet-test-loggp.conf @@ -9,7 +9,7 @@ LPGROUPS } PARAMS { - message_size="400"; + message_size="464"; modelnet_order=( "loggp" ); # scheduler options modelnet_scheduler="fcfs-full"; diff --git a/tests/conf/modelnet-test-simplep2p.conf b/tests/conf/modelnet-test-simplep2p.conf index b0c22f93..e77c6752 100644 --- a/tests/conf/modelnet-test-simplep2p.conf +++ b/tests/conf/modelnet-test-simplep2p.conf @@ -9,7 +9,7 @@ LPGROUPS } PARAMS { - message_size="312"; + message_size="464"; packet_size="1024"; modelnet_order=("simplep2p"); # scheduler options diff --git a/tests/conf/modelnet-test-slimfly.conf b/tests/conf/modelnet-test-slimfly.conf index 7d01910b..89892c5e 100644 --- a/tests/conf/modelnet-test-slimfly.conf +++ b/tests/conf/modelnet-test-slimfly.conf @@ -30,6 +30,6 @@ PARAMS global_bandwidth="9.0"; cn_bandwidth="9.0"; link_delay = "0"; - message_size="400"; + message_size="464"; routing="minimal"; } diff --git a/tests/conf/modelnet-test-torus.conf b/tests/conf/modelnet-test-torus.conf index 2d8d6cdf..da5980ba 100644 --- a/tests/conf/modelnet-test-torus.conf +++ b/tests/conf/modelnet-test-torus.conf @@ -14,7 +14,7 @@ PARAMS # scheduler options modelnet_scheduler="fcfs"; # modelnet_scheduler="round-robin"; - message_size="400"; + message_size="464"; n_dims="3"; dim_length="4,2,2"; link_bandwidth="2.0"; diff --git a/tests/conf/modelnet-test.conf b/tests/conf/modelnet-test.conf index 938b9fe2..7113709f 100644 --- a/tests/conf/modelnet-test.conf +++ b/tests/conf/modelnet-test.conf @@ -10,7 +10,7 @@ LPGROUPS PARAMS { packet_size="512"; - message_size="400"; + message_size="464"; modelnet_order=( "simplenet" ); # scheduler options modelnet_scheduler="fcfs"; diff --git a/tests/conf/union-milc-jacobi-workload/conceptual.json b/tests/conf/union-milc-jacobi-workload/conceptual.json new file mode 100644 index 00000000..557c0bce --- /dev/null +++ b/tests/conf/union-milc-jacobi-workload/conceptual.json @@ -0,0 +1,65 @@ +{ + "latency": { + "argc": 5, + "argv": [ + "latency", + "--reps", + "100", + "--maxbytes", + "1M" + ] + }, + "cosmoflow": { + "argc": 7, + "argv": [ + "cosmoflow", + "--msgsize", + "7379200", + "--reps", + "10", + "--compute", + "129" + ] + }, + "cosmo": { + "argc": 5, + "argv": [ + "cosmo", + "5", + "7379200", + "129000000" + ] + }, + "jacobi3d": { + "argc": 11, + "argv": [ + "jacobi3d", + "400", + "300", + "300", + "100", + "100", + "100", + "50000", + "39", + "200000", + "barrier" + ] + }, + "alexnet": { + "argc": 2, + "argv": [ + "alexnet", + "10" + ] + }, + "checkpoint": { + "argc": 4, + "argv": [ + "checkpoint", + "1", + "50000000000", + "100000000" + ] + } +} diff --git a/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in b/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in new file mode 100644 index 00000000..ade97ef2 --- /dev/null +++ b/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in @@ -0,0 +1,98 @@ +LPGROUPS +{ + MODELNET_GRP + { + repetitions="36"; +# name of this lp changes according to the model + nw-lp="2"; +# these lp names will be the same for dragonfly-custom model + modelnet_dragonfly_dally="2"; + modelnet_dragonfly_dally_router="1"; + } +} +PARAMS +{ +# packet size in the network + packet_size="4096"; + modelnet_order=( "dragonfly_dally","dragonfly_dally_router" ); + # scheduler options + modelnet_scheduler="fcfs"; +# chunk size in the network (when chunk size = packet size, packets will not be +# divided into chunks) + chunk_size="${CHUNK_SIZE}"; +# modelnet_scheduler="round-robin"; +# number of routers in group + num_routers="4"; +# number of groups in the network + num_groups="9"; +# buffer size in bytes for local virtual channels + local_vc_size="16384"; +#buffer size in bytes for global virtual channels + global_vc_size="16384"; +#buffer size in bytes for compute node virtual channels + cn_vc_size="32768"; +#bandwidth in GiB/s for local channels + local_bandwidth="5.25"; +# bandwidth in GiB/s for global channels + global_bandwidth="4.7"; +# bandwidth in GiB/s for compute node-router channels + cn_bandwidth="5.25"; +# ROSS message size + message_size="840"; +# number of compute nodes connected to router, dictated by dragonfly config +# file + num_cns_per_router="2"; +# number of global channels per router + num_global_channels="2"; +# network config file for intra-group connections + intra-group-connections="@CMAKE_SOURCE_DIR@/src/network-workloads/conf/dragonfly-dally/dfdally-72-intra"; +# network config file for inter-group connections + inter-group-connections="@CMAKE_SOURCE_DIR@/src/network-workloads/conf/dragonfly-dally/dfdally-72-inter"; +# routing protocol to be used + routing="prog-adaptive"; + minimal-bias="1"; + df-dally-vc = "1"; +# counting msgs recv to/send from routers + counting_bool="0"; + counting_start="0"; + counting_windows="1800"; + #interval in us + counting_interval="300"; + num_apps="2"; + #offset for app_id: model-net-mpi-replay is 88, synthetic-dfly-plus is 24 + offset="144"; +} +NETWORK_SURROGATE { + enable="${NETWORK_SURR_ON}"; # either "0" or "1" +# determines the director switching from surrogate to high-def simulation strategy + director_mode="delegate-to-app-director"; + #director_mode="at-fixed-virtual-times"; + +# director configuration for: director_mode == "at-fixed-virtual-times" +# timestamps at which to switch to surrogate-mode and back + fixed_switch_timestamps=( "25.0e6", "400.0e6" ); + +# latency predictor to use. Options: average, torch-jit + packet_latency_predictor="average"; +# some workload models need some time to stabilize, a point where the network behaviour stabilizes. The predictor will ignore all packet latencies that arrive during this period + ignore_until="2.0e6"; + +# parameters for torch-jit latency predictor + torch_jit_mode="single-static-model-for-all-terminals"; + torch_jit_model_path=""; + +# selecting network treatment on switching to surrogate. Options: freeze, nothing + network_treatment_on_switch="${NETWORK_MODE}"; +} +APPLICATION_SURROGATE { + enable="${APP_SURR_ON}"; # either 0 or 1 + + # Configuring director + director_mode="${APP_DIRECTOR_MODE}"; # Opts: "every-n-gvt", "every-n-nanoseconds" + director_num_gvt="${EVERY_N_GVT}"; + director_num_ns="${EVERY_NSECS}"; # 1^6 means 1ms + + # Configuring predictor + # Minimum number of iterations to collect data from before skipping ahead in the simulation + num_iters_to_collect="2"; +} diff --git a/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf b/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf new file mode 100644 index 00000000..93c60688 --- /dev/null +++ b/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf @@ -0,0 +1,2 @@ +36 conceptual-jacobi3d 1 0 +36 milc 1 0 diff --git a/tests/conf/union-milc-jacobi-workload/milc_skeleton.json b/tests/conf/union-milc-jacobi-workload/milc_skeleton.json new file mode 100644 index 00000000..c43f648f --- /dev/null +++ b/tests/conf/union-milc-jacobi-workload/milc_skeleton.json @@ -0,0 +1,17 @@ +{ +"jobs" : { + "dll_path": "${FABSIM_APPS_PATH}/dll/milc.so", + "size": 36, + "cfg": { + "app": "milc", + "iteration_cnt": 120, + "compute_delay": 100, + "dimension_cnt": 4, + "dimension_sizes": [2,2,3,3], + "msg_size": 497664, + "max_dimension_distance": 1, + "randomize_communication_order": false, + "cpu_freq" : 4e9 + } + } +} diff --git a/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf b/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf new file mode 100644 index 00000000..07e490d0 --- /dev/null +++ b/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf @@ -0,0 +1,2 @@ +12 24 38 2 65 18 3 70 39 11 69 67 48 21 8 45 71 55 6 1 49 68 32 5 61 46 23 9 7 26 15 62 52 28 13 25 +0 42 31 10 57 20 41 40 66 64 4 22 33 58 37 59 47 43 54 50 14 35 44 16 63 56 36 30 19 51 27 34 17 29 53 60 diff --git a/tests/example-ping-pong-determinism.sh b/tests/example-ping-pong-determinism.sh new file mode 100755 index 00000000..58c2c973 --- /dev/null +++ b/tests/example-ping-pong-determinism.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +# Running simulation twice with the same parameters + +mpirun -np 3 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=3 \ + --num_messages=10 --payload_sz=8192 \ + -- "$bindir/doc/example/tutorial-ping-pong.conf" \ + > model-output-1.txt 2> model-output-1-error.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +mpirun -np 3 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=3 \ + --num_messages=10 --payload_sz=8192 \ + -- "$bindir/doc/example/tutorial-ping-pong.conf" \ + > model-output-2.txt 2> model-output-2-error.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# This checks for the number of events processed. If they are different, then +# the simulation is not deterministic (so this should fail!). As always, just +# a unit test +diff <(grep 'Net Events Processed' model-output-1.txt) \ + <(grep 'Net Events Processed' model-output-2.txt) +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "The number of net events processed does not coincide, ie," \ + "the simulation is not deterministic" + exit $err +fi diff --git a/tests/example-ping-pong-no-logging.sh b/tests/example-ping-pong-no-logging.sh new file mode 100755 index 00000000..0fb0be8d --- /dev/null +++ b/tests/example-ping-pong-no-logging.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Test: checking simulation runs without any problem when "packet latency path" is not given + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +# Configuring surrogate instance +export PACKET_SIZE=4096 +export CHUNK_SIZE=4096 +export PACKET_LATENCY_TRACE_PATH= +cat "$bindir/doc/example"/tutorial-ping-pong.template.conf.in | envsubst > tutorial-ping-pong.conf + +# Running simulation twice with the same parameters + +mpirun -np 1 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=1 \ + --num_messages=10 --payload_sz=4096 \ + -- tutorial-ping-pong.conf \ + > model-output-1.txt 2> model-output-1-error.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' model-output-1.txt diff --git a/tests/example-ping-pong-surrogate-1.sh b/tests/example-ping-pong-surrogate-1.sh new file mode 100755 index 00000000..6b9e4b8b --- /dev/null +++ b/tests/example-ping-pong-surrogate-1.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# Test: checking whether surrogate mode behaves the same as high-fidelity +# Should take at most 1 minute to run + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +# Configuring surrogate instance +export PACKET_SIZE=4096 +export CHUNK_SIZE=64 +export NETWORK_TREATMENT=freeze +export PREDICTOR_TYPE=average +export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate/ +export IGNORE_UNTIL=0.0 +export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"' +cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf + +export PACKET_LATENCY_TRACE_PATH=packet-latency-highdef/ +cat "$bindir/doc/example"/tutorial-ping-pong.template.conf.in | envsubst > tutorial-ping-pong.conf + +# Running simulation twice with the same parameters + +mpirun -np 1 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=1 \ + --num_messages=10 --payload_sz=16320 \ + -- tutorial-ping-pong.conf \ + > model-output-1.txt 2> model-output-1-error.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +mpirun -np 1 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=1 \ + --num_messages=10 --payload_sz=16320 \ + -- tutorial-ping-pong-surrogate.conf \ + > model-output-2.txt 2> model-output-2-error.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking that the surrogate switched properly +grep 'Network switch completed' model-output-2.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# This checks for the number of events processed. If they are different, then +# the simulation is not deterministic (so this should fail!). As always, just +# a unit test +to_remove_from_output=' sent [0-9]* bytes in [0-9.]* seconds' +diff <(grep "Sever LPID:" model-output-1.txt | sed "s/${to_remove_from_output}//") \ + <(grep "Sever LPID:" model-output-2.txt | sed "s/${to_remove_from_output}//") +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "The surrogate gave different results from high-fidelity" + exit $err +fi + +# This checks for an equal number of packets transmitted +diff <(packet-latency-surrogate/*.txt | wc -l) <(packet-latency-highdef/*.txt | wc -l) +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "The surrogate gave different results from high-fidelity on the number of packets transmitted" + exit $err +fi diff --git a/tests/example-ping-pong-surrogate-2.sh b/tests/example-ping-pong-surrogate-2.sh new file mode 100755 index 00000000..a37d309a --- /dev/null +++ b/tests/example-ping-pong-surrogate-2.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# Test: checking whether surrogate mode behaves the same as high-fidelity +# Should take at most 1 minute to run + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +# Configuring surrogate instance +export PACKET_SIZE=128 +export CHUNK_SIZE=64 +export NETWORK_TREATMENT=freeze +export PREDICTOR_TYPE=average +export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate/ +export IGNORE_UNTIL=0.0 +export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"' +cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf + +export PACKET_LATENCY_TRACE_PATH=packet-latency-highdef/ +cat "$bindir/doc/example"/tutorial-ping-pong.template.conf.in | envsubst > tutorial-ping-pong.conf + +# Running simulation twice with the same parameters + +mpirun -np 1 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=1 \ + --num_messages=10 --payload_sz=16320 \ + -- tutorial-ping-pong.conf \ + > model-output-1.txt 2> model-output-1-error.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +mpirun -np 1 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=1 \ + --num_messages=10 --payload_sz=16320 \ + -- tutorial-ping-pong-surrogate.conf \ + > model-output-2.txt 2> model-output-2-error.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking that the surrogate switched properly +grep 'Network switch completed' model-output-2.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# This checks for the number of events processed. If they are different, then +# the simulation is not deterministic (so this should fail!). As always, just +# a unit test +to_remove_from_output=' sent [0-9]* bytes in [0-9.]* seconds' +diff <(grep "Sever LPID:" model-output-1.txt | sed "s/${to_remove_from_output}//") \ + <(grep "Sever LPID:" model-output-2.txt | sed "s/${to_remove_from_output}//") +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "The surrogate gave different results from high-fidelity" + exit $err +fi + +# This checks for an equal number of packets transmitted +diff <(packet-latency-surrogate/*.txt | wc -l) <(packet-latency-highdef/*.txt | wc -l) +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "The surrogate gave different results from high-fidelity on the number of packets transmitted" + exit $err +fi diff --git a/tests/example-ping-pong-surrogate-3.sh b/tests/example-ping-pong-surrogate-3.sh new file mode 100755 index 00000000..ca04c245 --- /dev/null +++ b/tests/example-ping-pong-surrogate-3.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# Test: checking whether freezing the network works equally well as not freezing the network (in terms of packets processed) +# Should take at most 1 minute to run + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +# Configuring surrogate instance +export PACKET_SIZE=128 +export CHUNK_SIZE=64 +export NETWORK_TREATMENT=freeze +export PREDICTOR_TYPE=average +export PACKET_LATENCY_TRACE_PATH=packet-latency-freeze/ +export IGNORE_UNTIL=0.0 +export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"' +cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate.conf + +export NETWORK_TREATMENT=nothing +export PACKET_LATENCY_TRACE_PATH=packet-latency-non-freeze/ +cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-non-freeze.conf + +# Running simulation twice with the same parameters + +mpirun -np 1 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=1 \ + --num_messages=10 --payload_sz=16320 \ + -- tutorial-ping-pong-surrogate.conf \ + > model-output-1.txt 2> model-output-1-error.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +mpirun -np 1 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=1 \ + --num_messages=10 --payload_sz=16320 \ + -- tutorial-ping-pong-surrogate-non-freeze.conf \ + > model-output-2.txt 2> model-output-2-error.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking that the surrogate switched properly +grep 'Network switch completed' model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# This checks for the number of events processed. If they are different, then +# the simulation is not deterministic (so this should fail!). As always, just +# a unit test +to_remove_from_output=' sent [0-9]* bytes in [0-9.]* seconds' +diff <(grep "Sever LPID:" model-output-1.txt | sed "s/${to_remove_from_output}//") \ + <(grep "Sever LPID:" model-output-2.txt | sed "s/${to_remove_from_output}//") +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "Freezing the network leads to a different result than not doing it" + exit $err +fi + +# This checks for an equal number of packets transmitted +diff <(packet-latency-freeze/*.txt | wc -l) <(packet-latency-non-freeze/*.txt | wc -l) +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "The two modes (freezing and not) are processing a different number of packets" + exit $err +fi diff --git a/tests/example-ping-pong-surrogate-determinism-1.sh b/tests/example-ping-pong-surrogate-determinism-1.sh new file mode 100755 index 00000000..5726aa0b --- /dev/null +++ b/tests/example-ping-pong-surrogate-determinism-1.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +# Configuring surrogate instance +export PACKET_SIZE=1024 +export CHUNK_SIZE=1024 +export NETWORK_TREATMENT=nothing +export PREDICTOR_TYPE=average +export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-1/ +export IGNORE_UNTIL=0.0 +export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"' +cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-1.conf + +export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-2/ +cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-2.conf + +# Running simulation twice with the same parameters + +mpirun -np 3 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=3 \ + --num_messages=100 --payload_sz=8192 \ + -- tutorial-ping-pong-surrogate-1.conf \ + > model-output-1.txt 2> model-output-1-error.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +mpirun -np 3 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=3 \ + --num_messages=100 --payload_sz=8192 \ + -- tutorial-ping-pong-surrogate-2.conf \ + > model-output-2.txt 2> model-output-2-error.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking that the surrogate switched properly +grep 'Network switch completed' model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# This checks for the number of events processed. If they are different, then +# the simulation is not deterministic (so this should fail!). As always, just +# a unit test +diff <(grep 'Net Events Processed' model-output-1.txt) \ + <(grep 'Net Events Processed' model-output-2.txt) +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "The number of net events processed does not coincide, ie," \ + "the simulation is not deterministic" + exit $err +fi diff --git a/tests/example-ping-pong-surrogate-determinism-2.sh b/tests/example-ping-pong-surrogate-determinism-2.sh new file mode 100755 index 00000000..f7908a27 --- /dev/null +++ b/tests/example-ping-pong-surrogate-determinism-2.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +# Configuring surrogate instance +export PACKET_SIZE=1024 +export CHUNK_SIZE=1024 +export NETWORK_TREATMENT=freeze +export PREDICTOR_TYPE=average +export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-1/ +export IGNORE_UNTIL=0.0 +export SWITCH_TIMESTAMPS='".08e6", ".1e6", ".2e6", ".6e6", ".7e6", ".9e6", "1.0e6", "1.3e6", "1.6e6", "1.7e6", "1.9e6", "2.0e6", "2.3e6", "2.6e6", "2.7e6", "2.9e6", "3.0e6", "3.3e6", "3.6e6", "3.7e6", "3.9e6", "4.0e6", "4.3e6", "4.6e6", "4.7e6", "4.9e6", "5.0e6", "9.8e6"' +cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-1.conf + +export PACKET_LATENCY_TRACE_PATH=packet-latency-surrogate-2/ +cat "$bindir/doc/example"/tutorial-ping-pong-surrogate.template.conf.in | envsubst > tutorial-ping-pong-surrogate-2.conf + +# Running simulation twice with the same parameters + +mpirun -np 3 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=3 \ + --num_messages=100 --payload_sz=8192 \ + -- tutorial-ping-pong-surrogate-1.conf \ + > model-output-1.txt 2> model-output-1-error.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +mpirun -np 3 "$bindir/doc/example/tutorial-synthetic-ping-pong" --sync=3 \ + --num_messages=100 --payload_sz=8192 \ + -- tutorial-ping-pong-surrogate-2.conf \ + > model-output-2.txt 2> model-output-2-error.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking that the surrogate switched properly +grep 'Network switch completed' model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# This checks for the number of events processed. If they are different, then +# the simulation is not deterministic (so this should fail!). As always, just +# a unit test +diff <(grep 'Net Events Processed' model-output-1.txt) \ + <(grep 'Net Events Processed' model-output-2.txt) +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "The number of net events processed does not coincide, ie," \ + "the simulation is not deterministic" + exit $err +fi diff --git a/tests/jobmap-test.sh b/tests/jobmap-test.sh index 21ee0c45..a56e1698 100755 --- a/tests/jobmap-test.sh +++ b/tests/jobmap-test.sh @@ -5,4 +5,8 @@ if [[ -z $srcdir ]] ; then exit 1 fi -tests/jobmap-test $srcdir/tests/conf/jobmap-test-list.conf +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=. +fi + +"$bindir"/tests/jobmap-test "$srcdir"/tests/conf/jobmap-test-list.conf diff --git a/tests/lp-io-test.sh b/tests/lp-io-test.sh index db932e46..c9a6d4fa 100755 --- a/tests/lp-io-test.sh +++ b/tests/lp-io-test.sh @@ -1,3 +1,7 @@ #!/bin/bash -tests/lp-io-test --sync=1 +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=. +fi + +mpirun -np 1 "$bindir"/tests/lp-io-test --sync=1 diff --git a/tests/lsm-test.sh b/tests/lsm-test.sh index b703970b..168a2411 100755 --- a/tests/lsm-test.sh +++ b/tests/lsm-test.sh @@ -5,4 +5,8 @@ if [ -z $srcdir ]; then exit 1 fi -tests/lsm-test --sync=1 --conf=$srcdir/tests/conf/lsm-test.conf +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=. +fi + +mpirun -np 1 "$bindir"/tests/lsm-test --sync=1 --conf="$srcdir"/tests/conf/lsm-test.conf diff --git a/tests/map-ctx-test.c b/tests/map-ctx-test.c index 1e754089..3ce14b95 100644 --- a/tests/map-ctx-test.c +++ b/tests/map-ctx-test.c @@ -161,6 +161,8 @@ int main(int argc, char *argv[]) CHECK("group_direct_anno"); CHECK("group_direct_anno"); + MPI_Finalize(); + return 0; } diff --git a/tests/map-ctx-test.sh b/tests/map-ctx-test.sh index 8fd7dd0d..def90c7f 100755 --- a/tests/map-ctx-test.sh +++ b/tests/map-ctx-test.sh @@ -5,4 +5,8 @@ if [[ -z $srcdir ]] ; then exit 1 fi -tests/map-ctx-test $srcdir/tests/conf/map-ctx-test.conf +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=. +fi + +mpirun -np 1 "$bindir"/tests/map-ctx-test "$srcdir"/tests/conf/map-ctx-test.conf diff --git a/tests/mapping_test.sh b/tests/mapping_test.sh index 60d233ff..40d62908 100755 --- a/tests/mapping_test.sh +++ b/tests/mapping_test.sh @@ -1,12 +1,17 @@ #!/bin/bash -tst=$srcdir/tests +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=. +fi + +tst="$srcdir/tests" set -e -tests/mapping_test --sync=1 --codes-config=$tst/conf/mapping_test.conf \ + +mpirun -np 1 "$bindir"/tests/mapping_test --sync=1 --codes-config="$tst"/conf/mapping_test.conf \ 2> mapping_test.err \ 1| grep TEST > mapping_test.out -diff $tst/expected/mapping_test.out mapping_test.out +diff "$tst"/expected/mapping_test.out mapping_test.out err=$? if [ -s mapping_test.err ] ; then diff --git a/tests/modelnet-p2p-bw-loggp.sh b/tests/modelnet-p2p-bw-loggp.sh index 2972521f..b49c80e9 100755 --- a/tests/modelnet-p2p-bw-loggp.sh +++ b/tests/modelnet-p2p-bw-loggp.sh @@ -1,3 +1,7 @@ #!/bin/bash -tests/modelnet-p2p-bw --sync=1 -- tests/conf/modelnet-p2p-bw-loggp.conf +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=. +fi + +mpirun -np 1 "$bindir"/tests/modelnet-p2p-bw --sync=1 -- "$srcdir"/tests/conf/modelnet-p2p-bw-loggp.conf diff --git a/tests/modelnet-prio-sched-test.sh b/tests/modelnet-prio-sched-test.sh index f9939322..1d033d97 100755 --- a/tests/modelnet-prio-sched-test.sh +++ b/tests/modelnet-prio-sched-test.sh @@ -1,14 +1,18 @@ #!/bin/bash -tests/modelnet-prio-sched-test --sync=1 -- \ - tests/conf/modelnet-prio-sched-test.conf +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=. +fi + +mpirun -np 1 "$bindir"/tests/modelnet-prio-sched-test --sync=1 -- \ + $srcdir/tests/conf/modelnet-prio-sched-test.conf err=$? if [[ $err -ne 0 ]]; then exit $err fi -mpirun -np 2 tests/modelnet-prio-sched-test --sync=3 -- \ - tests/conf/modelnet-prio-sched-test.conf +mpirun -np 2 "$bindir"/tests/modelnet-prio-sched-test --sync=3 -- \ + $srcdir/tests/conf/modelnet-prio-sched-test.conf err=$? if [[ $err -ne 0 ]]; then exit $err diff --git a/tests/modelnet-simplep2p-test.sh b/tests/modelnet-simplep2p-test.sh new file mode 100755 index 00000000..94e4cada --- /dev/null +++ b/tests/modelnet-simplep2p-test.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +if [[ -z $srcdir ]] ; then + echo srcdir variable not set + exit 1 +fi + +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=. +fi + +mpirun -np 1 "$bindir"/tests/modelnet-simplep2p-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-simplep2p.conf diff --git a/tests/modelnet-test-dragonfly-synthetic.sh b/tests/modelnet-test-dragonfly-synthetic.sh index 8859ecbf..a5b4b072 100755 --- a/tests/modelnet-test-dragonfly-synthetic.sh +++ b/tests/modelnet-test-dragonfly-synthetic.sh @@ -1,3 +1,11 @@ #!/bin/bash -src/network-workloads/model-net-synthetic --sync=1 --num_messages=1 -- $srcdir/src/network-workloads/conf/modelnet-synthetic-dragonfly.conf +# Binaries generated by CMake are located in a different place +# to those of autoconf +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=src/network-workloads +else + bindir="$bindir"/src +fi + +mpirun -np 1 "$bindir"/model-net-synthetic --sync=1 --num_messages=1 -- "$srcdir"/src/network-workloads/conf/modelnet-synthetic-dragonfly.conf diff --git a/tests/modelnet-test-dragonfly.sh b/tests/modelnet-test-dragonfly.sh index 9362a821..8731a87a 100755 --- a/tests/modelnet-test-dragonfly.sh +++ b/tests/modelnet-test-dragonfly.sh @@ -1,5 +1,7 @@ #!/bin/bash -tests/modelnet-test --sync=1 -- tests/conf/modelnet-test-dragonfly.conf - +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=. +fi +mpirun -np 1 "$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-dragonfly.conf diff --git a/tests/modelnet-test-em.sh b/tests/modelnet-test-em.sh index 158ab272..d4411d6d 100755 --- a/tests/modelnet-test-em.sh +++ b/tests/modelnet-test-em.sh @@ -1,5 +1,7 @@ #!/bin/bash -tests/modelnet-test --sync=1 -- tests/conf/modelnet-test-em.conf - +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=. +fi +mpirun -np 1 "$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-em.conf diff --git a/tests/modelnet-test-fattree-synthetic.sh b/tests/modelnet-test-fattree-synthetic.sh index 9b76acdf..3f7744d9 100755 --- a/tests/modelnet-test-fattree-synthetic.sh +++ b/tests/modelnet-test-fattree-synthetic.sh @@ -5,10 +5,15 @@ if [ -z $srcdir ]; then exit 1 fi -source $srcdir/tests/download-traces.sh +# Binaries generated by CMake are located in a different place +# to those of autoconf +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=src/network-workloads +else + bindir="$bindir"/src +fi -src/network-workloads/model-net-synthetic-fattree --sync=1 -- $srcdir/src/network-workloads/conf/modelnet-synthetic-fattree.conf +mpirun -np 1 "$bindir"/model-net-synthetic-fattree --sync=1 -- "$srcdir"/src/network-workloads/conf/modelnet-synthetic-fattree.conf +#source $srcdir/tests/download-traces.sh #src/network-workloads/model-net-mpi-replay --sync=1 --num_net_traces=27 --workload_file=/tmp/df_AMG_n27_dumpi/dumpi-2014.03.03.14.55.00- --workload_type="dumpi" -- $srcdir/src/network-workloads/conf/modelnet-mpi-test-fattree.conf - - diff --git a/tests/modelnet-test-loggp.sh b/tests/modelnet-test-loggp.sh index 4da704bd..2eef34bd 100755 --- a/tests/modelnet-test-loggp.sh +++ b/tests/modelnet-test-loggp.sh @@ -1,3 +1,7 @@ #!/bin/bash -tests/modelnet-test --sync=1 -- tests/conf/modelnet-test-loggp.conf +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=. +fi + +mpirun -np 1 "$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-loggp.conf diff --git a/tests/modelnet-test-slimfly-synthetic.sh b/tests/modelnet-test-slimfly-synthetic.sh index a610e63a..21ea0458 100755 --- a/tests/modelnet-test-slimfly-synthetic.sh +++ b/tests/modelnet-test-slimfly-synthetic.sh @@ -1,4 +1,11 @@ #!/bin/bash -src/network-workloads/model-net-synthetic-slimfly --sync=1 -- $srcdir/src/network-workloads/conf/modelnet-synthetic-slimfly-min.conf +# Binaries generated by CMake are located in a different place +# to those of autoconf +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=src/network-workloads +else + bindir="$bindir"/src +fi +mpirun -np 1 "$bindir"/model-net-synthetic-slimfly --sync=1 -- "$srcdir"/src/network-workloads/conf/modelnet-synthetic-slimfly-min.conf diff --git a/tests/modelnet-test-slimfly.sh b/tests/modelnet-test-slimfly.sh index 938802ae..2a5b6a8c 100755 --- a/tests/modelnet-test-slimfly.sh +++ b/tests/modelnet-test-slimfly.sh @@ -1,3 +1,7 @@ #!/bin/bash -tests/modelnet-test --sync=1 -- tests/conf/modelnet-test-slimfly.conf +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=. +fi + +mpirun -np 1 "$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-slimfly.conf diff --git a/tests/modelnet-test-torus.sh b/tests/modelnet-test-torus.sh index 942a881e..2b102bcc 100755 --- a/tests/modelnet-test-torus.sh +++ b/tests/modelnet-test-torus.sh @@ -1,4 +1,7 @@ #!/bin/bash -tests/modelnet-test --sync=1 -- tests/conf/modelnet-test-torus.conf +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=. +fi +mpirun -np 1 "$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test-torus.conf diff --git a/tests/modelnet-test.sh b/tests/modelnet-test.sh index bf327fec..248f5117 100755 --- a/tests/modelnet-test.sh +++ b/tests/modelnet-test.sh @@ -1,3 +1,7 @@ #!/bin/bash -tests/modelnet-test --sync=1 -- tests/conf/modelnet-test.conf +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=. +fi + +mpirun -np 1 "$bindir"/tests/modelnet-test --sync=1 -- "$srcdir"/tests/conf/modelnet-test.conf diff --git a/tests/rc-stack-test.sh b/tests/rc-stack-test.sh new file mode 100755 index 00000000..ec254542 --- /dev/null +++ b/tests/rc-stack-test.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +if [[ -z $srcdir ]] ; then + echo srcdir variable not set + exit 1 +fi + +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=. +fi + +mpirun -np 1 "$bindir"/tests/modelnet-simplep2p-test diff --git a/tests/resource-test.sh b/tests/resource-test.sh index c39213ca..4125e0f8 100755 --- a/tests/resource-test.sh +++ b/tests/resource-test.sh @@ -5,4 +5,8 @@ if [[ -z $srcdir ]] ; then exit 1 fi -tests/resource-test --sync=1 --codes-config=$srcdir/tests/conf/buffer_test.conf +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=. +fi + +mpirun -np 1 "$bindir"/tests/resource-test --sync=1 --codes-config="$srcdir"/tests/conf/buffer_test.conf diff --git a/tests/run-test.sh.in b/tests/run-test.sh.in new file mode 100755 index 00000000..be706268 --- /dev/null +++ b/tests/run-test.sh.in @@ -0,0 +1,47 @@ +#!/bin/bash -x + +# This file is called when running a test using CTest. +# +# To run without deleting test directories set DONT_DELETE_TEST_DIR +# > DONT_DELETE_TEST_DIR=1 ctest +# instead of +# > ctest + +export srcdir="${CMAKE_SOURCE_DIR}" +export bindir="${CMAKE_BINARY_DIR}" +export GENERATED_USING_CMAKE=1 + +# Set Union and SWM install paths if available +if [ -n "${SWM_DATAROOTDIR}" ]; then + export SWM_DATAROOTDIR="${SWM_DATAROOTDIR}" +fi +if [ -n "${UNION_DATAROOTDIR}" ]; then + export UNION_DATAROOTDIR="${UNION_DATAROOTDIR}" +fi + +# Creating temporal folder in order to save output without colliding with +# some other process that would like to generate the same output +mkdir -p testing-output +tmpdir="$(mktemp -d testing-output/test-XXXXXX)" + +pushd $tmpdir + +# running experiment +bash -x "$1" + +# checking for exit error +err=$? +[[ $err -ne 0 ]] && exit $err + +# checking if ross.csv exists and is empty +if [[ -f ross.csv ]] && [[ ! -s ross.csv ]]; then + >&2 echo "The experiment seems to have failed. \`ross.csv' is empty, i.e., the binary died before saving any output" + exit 1 +fi + +popd + +# deleting temporal dir (this should only happen if there were no errors) +if [ -z $DONT_DELETE_TEST_DIR ]; then + rm -r $tmpdir +fi diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-1.sh b/tests/union-workload-test-surrogate-parallel-deterministic-1.sh new file mode 100644 index 00000000..22294863 --- /dev/null +++ b/tests/union-workload-test-surrogate-parallel-deterministic-1.sh @@ -0,0 +1,114 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +if [[ -z $UNION_DATAROOTDIR ]] ; then + echo UNION_DATAROOTDIR variable not set + exit 1 +fi + +if [[ -z $SWM_DATAROOTDIR ]] ; then + echo SWM_DATAROOTDIR variable not set + exit 1 +fi + +np=3 + +expfolder="$PWD" +export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload" + +# Backing up and copying milc json! +tmpdir="$(TMPDIR="$PWD" mktemp -d)" +mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json" +cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json" +cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" + +# Copying configuration files to keep as documentation +cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder" +cp "$CONFIGS_PATH/conceptual.json" "$expfolder" +cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" +cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" + +# CODES config file +export CHUNK_SIZE=4096 +export PATH_TO_CONNECTIONS="$CONFIGS_PATH" +export NETWORK_SURR_ON=0 +export NETWORK_MODE=nothing +export APP_SURR_ON=1 +export APP_DIRECTOR_MODE=every-n-nanoseconds +#export APP_DIRECTOR_MODE=every-n-gvt +export EVERY_N_GVT=500 +export EVERY_NSECS=1e6 +envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf" + +# running simulation +cons_lookahead=200 +opt_lookahead=600 + +export PATH_TO_CODES_BUILD="$bindir" + +mkdir run-1 +pushd run-1 + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output-1.txt 2> model-output-1-error.txt + +err=$? +[[ $err -ne 0 ]] && exit $err + +popd + +mkdir run-2 +pushd run-2 + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output-2.txt 2> model-output-2-error.txt + +err=$? + +popd + +# Setting milc json back +mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" +rmdir "$tmpdir" + +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' run-1/model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +diff <(grep 'Net Events Processed' run-1/model-output-1.txt) \ + <(grep 'Net Events Processed' run-2/model-output-2.txt) +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "The number of net events processed does not coincide, ie," \ + "the simulation is not deterministic" + exit $err +fi + +exit 0 diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-2.sh b/tests/union-workload-test-surrogate-parallel-deterministic-2.sh new file mode 100644 index 00000000..2a0384fd --- /dev/null +++ b/tests/union-workload-test-surrogate-parallel-deterministic-2.sh @@ -0,0 +1,116 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +if [[ -z $UNION_DATAROOTDIR ]] ; then + echo UNION_DATAROOTDIR variable not set + exit 1 +fi + +if [[ -z $SWM_DATAROOTDIR ]] ; then + echo SWM_DATAROOTDIR variable not set + exit 1 +fi + +np=3 + +expfolder="$PWD" +export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload" + +# Backing up and copying milc json! +tmpdir="$(TMPDIR="$PWD" mktemp -d)" +mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json" +cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json" +cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" + +# Copying configuration files to keep as documentation +cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder" +cp "$CONFIGS_PATH/conceptual.json" "$expfolder" +cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" +cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" + +# CODES config file +export CHUNK_SIZE=4096 +export PATH_TO_CONNECTIONS="$CONFIGS_PATH" +export NETWORK_SURR_ON=0 +export NETWORK_MODE=nothing +export APP_SURR_ON=1 +export APP_DIRECTOR_MODE=every-n-nanoseconds +export EVERY_N_GVT=500 +export EVERY_NSECS=1e6 +envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par-1.conf" + +export APP_DIRECTOR_MODE=every-n-gvt +envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par-2.conf" + +# running simulation +cons_lookahead=200 +opt_lookahead=600 + +export PATH_TO_CODES_BUILD="$bindir" + +mkdir run-1 +pushd run-1 + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par-1.conf" \ + > model-output-1.txt 2> model-output-1-error.txt + +err=$? +[[ $err -ne 0 ]] && exit $err + +popd + +mkdir run-2 +pushd run-2 + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par-2.conf" \ + > model-output-2.txt 2> model-output-2-error.txt + +err=$? + +popd + +# Setting milc json back +mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" +rmdir "$tmpdir" + +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' run-1/model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +diff <(grep 'Net Events Processed' run-1/model-output-1.txt) \ + <(grep 'Net Events Processed' run-2/model-output-2.txt) +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "The number of net events processed does not coincide, ie," \ + "the simulation is not deterministic" + exit $err +fi + +exit 0 diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-3.sh b/tests/union-workload-test-surrogate-parallel-deterministic-3.sh new file mode 100644 index 00000000..93b74afc --- /dev/null +++ b/tests/union-workload-test-surrogate-parallel-deterministic-3.sh @@ -0,0 +1,114 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +if [[ -z $UNION_DATAROOTDIR ]] ; then + echo UNION_DATAROOTDIR variable not set + exit 1 +fi + +if [[ -z $SWM_DATAROOTDIR ]] ; then + echo SWM_DATAROOTDIR variable not set + exit 1 +fi + +np=3 + +expfolder="$PWD" +export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload" + +# Backing up and copying milc json! +tmpdir="$(TMPDIR="$PWD" mktemp -d)" +mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json" +cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json" +cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" + +# Copying configuration files to keep as documentation +cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder" +cp "$CONFIGS_PATH/conceptual.json" "$expfolder" +cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" +cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" + +# CODES config file +export CHUNK_SIZE=4096 +export PATH_TO_CONNECTIONS="$CONFIGS_PATH" +export NETWORK_SURR_ON=1 +export NETWORK_MODE=nothing +export APP_SURR_ON=1 +export APP_DIRECTOR_MODE=every-n-nanoseconds +#export APP_DIRECTOR_MODE=every-n-gvt +export EVERY_N_GVT=500 +export EVERY_NSECS=1e6 +envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf" + +# running simulation +cons_lookahead=200 +opt_lookahead=600 + +export PATH_TO_CODES_BUILD="$bindir" + +mkdir run-1 +pushd run-1 + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output-1.txt 2> model-output-1-error.txt + +err=$? +[[ $err -ne 0 ]] && exit $err + +popd + +mkdir run-2 +pushd run-2 + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output-2.txt 2> model-output-2-error.txt + +err=$? + +popd + +# Setting milc json back +mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" +rmdir "$tmpdir" + +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' run-1/model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +diff <(grep 'Net Events Processed' run-1/model-output-1.txt) \ + <(grep 'Net Events Processed' run-2/model-output-2.txt) +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "The number of net events processed does not coincide, ie," \ + "the simulation is not deterministic" + exit $err +fi + +exit 0 diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-4.sh b/tests/union-workload-test-surrogate-parallel-deterministic-4.sh new file mode 100644 index 00000000..639eed1e --- /dev/null +++ b/tests/union-workload-test-surrogate-parallel-deterministic-4.sh @@ -0,0 +1,114 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +if [[ -z $UNION_DATAROOTDIR ]] ; then + echo UNION_DATAROOTDIR variable not set + exit 1 +fi + +if [[ -z $SWM_DATAROOTDIR ]] ; then + echo SWM_DATAROOTDIR variable not set + exit 1 +fi + +np=3 + +expfolder="$PWD" +export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload" + +# Backing up and copying milc json! +tmpdir="$(TMPDIR="$PWD" mktemp -d)" +mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json" +cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json" +cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" + +# Copying configuration files to keep as documentation +cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder" +cp "$CONFIGS_PATH/conceptual.json" "$expfolder" +cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" +cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" + +# CODES config file +export CHUNK_SIZE=4096 +export PATH_TO_CONNECTIONS="$CONFIGS_PATH" +export NETWORK_SURR_ON=1 +export NETWORK_MODE=freeze +export APP_SURR_ON=1 +export APP_DIRECTOR_MODE=every-n-nanoseconds +#export APP_DIRECTOR_MODE=every-n-gvt +export EVERY_N_GVT=500 +export EVERY_NSECS=1e6 +envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf" + +# running simulation +cons_lookahead=200 +opt_lookahead=600 + +export PATH_TO_CODES_BUILD="$bindir" + +mkdir run-1 +pushd run-1 + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output-1.txt 2> model-output-1-error.txt + +err=$? +[[ $err -ne 0 ]] && exit $err + +popd + +mkdir run-2 +pushd run-2 + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output-2.txt 2> model-output-2-error.txt + +err=$? + +popd + +# Setting milc json back +mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" +rmdir "$tmpdir" + +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' run-1/model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +diff <(grep 'Net Events Processed' run-1/model-output-1.txt) \ + <(grep 'Net Events Processed' run-2/model-output-2.txt) +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "The number of net events processed does not coincide, ie," \ + "the simulation is not deterministic" + exit $err +fi + +exit 0 diff --git a/tests/union-workload-test-surrogate-parallel.sh b/tests/union-workload-test-surrogate-parallel.sh new file mode 100644 index 00000000..f84bccad --- /dev/null +++ b/tests/union-workload-test-surrogate-parallel.sh @@ -0,0 +1,103 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +if [[ -z $UNION_DATAROOTDIR ]] ; then + echo UNION_DATAROOTDIR variable not set + exit 1 +fi + +if [[ -z $SWM_DATAROOTDIR ]] ; then + echo SWM_DATAROOTDIR variable not set + exit 1 +fi + +np=3 + +expfolder="$PWD" +export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload" + +# Backing up and copying milc json! +tmpdir="$(TMPDIR="$PWD" mktemp -d)" +mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json" +cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json" +cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" + +# Copying configuration files to keep as documentation +cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder" +cp "$CONFIGS_PATH/conceptual.json" "$expfolder" +cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" +cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" + +# CODES config file +export CHUNK_SIZE=4096 +export PATH_TO_CONNECTIONS="$CONFIGS_PATH" +export NETWORK_SURR_ON=0 +export NETWORK_MODE=nothing +export APP_SURR_ON=1 +export APP_DIRECTOR_MODE=every-n-nanoseconds +#export APP_DIRECTOR_MODE=every-n-gvt +export EVERY_N_GVT=500 +export EVERY_NSECS=1e6 +envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf" + +# running simulation +cons_lookahead=200 +opt_lookahead=600 + +export PATH_TO_CODES_BUILD="$bindir" + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output.txt 2> model-output-error.txt + +err=$? + +# Setting milc json back +mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" +rmdir "$tmpdir" + +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking both milc and jacobi ran +grep 'MILC: Iteration 119/120' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +grep 'Jacobi3D: Completed 39 iterations' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +grep 'App 0: All non-synthetic workloads have completed' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# it transitioned into surrogacy +grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# it transitioned back to high-fidelity +grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +exit 0 diff --git a/tests/union-workload-test-surrogate-smaller-chunk-size.sh b/tests/union-workload-test-surrogate-smaller-chunk-size.sh new file mode 100644 index 00000000..d7266ccc --- /dev/null +++ b/tests/union-workload-test-surrogate-smaller-chunk-size.sh @@ -0,0 +1,103 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +if [[ -z $UNION_DATAROOTDIR ]] ; then + echo UNION_DATAROOTDIR variable not set + exit 1 +fi + +if [[ -z $SWM_DATAROOTDIR ]] ; then + echo SWM_DATAROOTDIR variable not set + exit 1 +fi + +np=1 + +expfolder="$PWD" +export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload" + +# Backing up and copying milc json! +tmpdir="$(TMPDIR="$PWD" mktemp -d)" +mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json" +cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json" +cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" + +# Copying configuration files to keep as documentation +cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder" +cp "$CONFIGS_PATH/conceptual.json" "$expfolder" +cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" +cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" + +# CODES config file +export CHUNK_SIZE=2048 +export PATH_TO_CONNECTIONS="$CONFIGS_PATH" +export NETWORK_SURR_ON=0 +export NETWORK_MODE=nothing +export APP_SURR_ON=1 +export APP_DIRECTOR_MODE=every-n-nanoseconds +#export APP_DIRECTOR_MODE=every-n-gvt +export EVERY_N_GVT=500 +export EVERY_NSECS=1e6 +envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf" + +# running simulation +cons_lookahead=200 +opt_lookahead=600 + +export PATH_TO_CODES_BUILD="$bindir" + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=1 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output.txt 2> model-output-error.txt + +err=$? + +# Setting milc json back +mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" +rmdir "$tmpdir" + +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking both milc and jacobi ran +grep 'MILC: Iteration 119/120' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +grep 'Jacobi3D: Completed 39 iterations' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +grep 'App 0: All non-synthetic workloads have completed' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# it transitioned into surrogacy +grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# it transitioned back to high-fidelity +grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +exit 0 diff --git a/tests/union-workload-test-surrogate.sh b/tests/union-workload-test-surrogate.sh new file mode 100644 index 00000000..0a34b2c8 --- /dev/null +++ b/tests/union-workload-test-surrogate.sh @@ -0,0 +1,103 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +if [[ -z $UNION_DATAROOTDIR ]] ; then + echo UNION_DATAROOTDIR variable not set + exit 1 +fi + +if [[ -z $SWM_DATAROOTDIR ]] ; then + echo SWM_DATAROOTDIR variable not set + exit 1 +fi + +np=1 + +expfolder="$PWD" +export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload" + +# Backing up and copying milc json! +tmpdir="$(TMPDIR="$PWD" mktemp -d)" +mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json" +cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json" +cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" + +# Copying configuration files to keep as documentation +cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder" +cp "$CONFIGS_PATH/conceptual.json" "$expfolder" +cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" +cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" + +# CODES config file +export CHUNK_SIZE=4096 +export PATH_TO_CONNECTIONS="$CONFIGS_PATH" +export NETWORK_SURR_ON=0 +export NETWORK_MODE=nothing +export APP_SURR_ON=1 +export APP_DIRECTOR_MODE=every-n-nanoseconds +#export APP_DIRECTOR_MODE=every-n-gvt +export EVERY_N_GVT=500 +export EVERY_NSECS=1e6 +envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf" + +# running simulation +cons_lookahead=200 +opt_lookahead=600 + +export PATH_TO_CODES_BUILD="$bindir" + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=1 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output.txt 2> model-output-error.txt + +err=$? + +# Setting milc json back +mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" +rmdir "$tmpdir" + +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking both milc and jacobi ran +grep 'MILC: Iteration 119/120' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +grep 'Jacobi3D: Completed 39 iterations' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +grep 'App 0: All non-synthetic workloads have completed' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# it transitioned into surrogacy +grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# it transitioned back to high-fidelity +grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +exit 0 diff --git a/tests/workload/codes-workload-test.sh b/tests/workload/codes-workload-test.sh index 1b58abfb..a594716b 100755 --- a/tests/workload/codes-workload-test.sh +++ b/tests/workload/codes-workload-test.sh @@ -1,3 +1,11 @@ #!/bin/bash -tests/workload/codes-workload-test --sync=1 $srcdir/tests/workload/codes-workload-test.conf +# Binaries generated by CMake are located in a different place +# to those of autoconf +if [ -z $GENERATED_USING_CMAKE ]; then + bindir=tests/workload +else + bindir=$bindir/tests +fi + +"$bindir"/codes-workload-test --sync=1 "$srcdir"/tests/workload/codes-workload-test.conf