From 0e71565b32bab14045ed6cd86446b18caefae2c8 Mon Sep 17 00:00:00 2001 From: narek-cliqz Date: Mon, 22 Aug 2016 10:01:58 +0200 Subject: [PATCH] Merge commit 'dfdf0e6595f19c2f5e1229cd022df140ddd98e5d' # Please enter a commit message to explain why this merge is necessary, # especially if it merges an updated upstream into a topic branch. # # Lines starting with '#' will be ignored, and an empty message aborts # the commit. --- keyvi/3rdparty/tpie/.travis.yml | 17 +- keyvi/3rdparty/tpie/README.md | 3 + keyvi/3rdparty/tpie/appveyor.yml | 54 ++ keyvi/3rdparty/tpie/doc/CMakeLists.txt | 4 +- keyvi/3rdparty/tpie/doc/authors.dox | 1 + keyvi/3rdparty/tpie/doc/index.dox | 2 +- keyvi/3rdparty/tpie/doc/pipelining.dox | 55 +- keyvi/3rdparty/tpie/doc/pipelining_nodes.dox | 1 + keyvi/3rdparty/tpie/doc/setup.dox | 3 +- .../tpie/test/speed_regression/btree.cpp | 6 +- .../tpie/test/speed_regression/stream2.cpp | 2 +- .../tpie/test/speed_regression/testtime.h | 2 +- keyvi/3rdparty/tpie/test/unit/CMakeLists.txt | 22 +- keyvi/3rdparty/tpie/test/unit/merge_sort.h | 15 +- .../tpie/test/unit/test_ami_stream.cpp | 2 +- keyvi/3rdparty/tpie/test/unit/test_btree.cpp | 8 +- .../tpie/test/unit/test_close_file.cpp | 47 +- .../tpie/test/unit/test_compressed_stream.cpp | 40 +- .../tpie/test/unit/test_file_count.cpp | 3 +- .../tpie/test/unit/test_merge_sort.cpp | 12 +- .../tpie/test/unit/test_pipelining.cpp | 323 ++++++- .../test/unit/test_pipelining_runtime.cpp | 299 ++++++- .../test/unit/test_serialization_sort.cpp | 6 +- keyvi/3rdparty/tpie/tpie/CMakeLists.txt | 8 +- keyvi/3rdparty/tpie/tpie/array.h | 4 + keyvi/3rdparty/tpie/tpie/btree.h | 1 + keyvi/3rdparty/tpie/tpie/btree/base.h | 19 +- keyvi/3rdparty/tpie/tpie/btree/btree.h | 8 + .../3rdparty/tpie/tpie/btree/btree_builder.h | 18 +- .../3rdparty/tpie/tpie/btree/external_store.h | 19 +- .../tpie/tpie/btree/external_store_base.h | 2 +- .../3rdparty/tpie/tpie/btree/internal_store.h | 12 + keyvi/3rdparty/tpie/tpie/btree/node.h | 2 +- .../tpie/tpie/btree/serialized_store.h | 413 +++++++++ keyvi/3rdparty/tpie/tpie/compressed/request.h | 17 +- keyvi/3rdparty/tpie/tpie/compressed/stream.h | 45 +- keyvi/3rdparty/tpie/tpie/compressed/thread.h | 4 +- keyvi/3rdparty/tpie/tpie/exception.h | 18 + .../3rdparty/tpie/tpie/file_accessor/posix.h | 1 + .../tpie/tpie/file_accessor/posix.inl | 27 +- .../tpie/tpie/file_accessor/stdio.inl | 6 +- .../file_accessor/stream_accessor_base.inl | 4 +- .../3rdparty/tpie/tpie/file_accessor/win32.h | 5 + .../tpie/tpie/file_accessor/win32.inl | 21 +- keyvi/3rdparty/tpie/tpie/file_count.cpp | 69 -- keyvi/3rdparty/tpie/tpie/file_count.h | 56 -- keyvi/3rdparty/tpie/tpie/file_manager.cpp | 70 ++ keyvi/3rdparty/tpie/tpie/file_manager.h | 93 ++ keyvi/3rdparty/tpie/tpie/hash_map.h | 4 +- keyvi/3rdparty/tpie/tpie/memory.cpp | 92 +- keyvi/3rdparty/tpie/tpie/memory.h | 101 +-- keyvi/3rdparty/tpie/tpie/pipelining/buffer.h | 3 + .../3rdparty/tpie/tpie/pipelining/container.h | 88 ++ .../3rdparty/tpie/tpie/pipelining/exception.h | 14 +- .../tpie/tpie/pipelining/factory_base.h | 29 +- .../tpie/tpie/pipelining/file_stream.h | 212 +++-- keyvi/3rdparty/tpie/tpie/pipelining/helpers.h | 73 +- keyvi/3rdparty/tpie/tpie/pipelining/merge.h | 8 +- .../tpie/tpie/pipelining/merge_sorter.h | 208 +++-- keyvi/3rdparty/tpie/tpie/pipelining/node.cpp | 141 +-- keyvi/3rdparty/tpie/tpie/pipelining/node.h | 249 ++++-- keyvi/3rdparty/tpie/tpie/pipelining/numeric.h | 37 +- .../3rdparty/tpie/tpie/pipelining/pipe_base.h | 10 + .../tpie/tpie/pipelining/pipeline.cpp | 54 +- .../3rdparty/tpie/tpie/pipelining/pipeline.h | 99 ++- keyvi/3rdparty/tpie/tpie/pipelining/reverse.h | 2 + .../3rdparty/tpie/tpie/pipelining/runtime.cpp | 805 +++++++++++++----- keyvi/3rdparty/tpie/tpie/pipelining/runtime.h | 56 +- .../tpie/tpie/pipelining/serialization.h | 9 +- .../tpie/tpie/pipelining/serialization_sort.h | 13 +- keyvi/3rdparty/tpie/tpie/pipelining/sort.h | 71 +- .../tpie/tpie/pipelining/sort_parameters.h | 15 +- keyvi/3rdparty/tpie/tpie/pipelining/split.h | 118 +++ .../3rdparty/tpie/tpie/pipelining/std_glue.h | 71 +- .../tpie/tpie/pipelining/subpipeline.h | 167 ++++ .../3rdparty/tpie/tpie/pipelining/tokens.cpp | 13 + keyvi/3rdparty/tpie/tpie/pipelining/tokens.h | 39 +- keyvi/3rdparty/tpie/tpie/pipelining/visit.h | 68 ++ keyvi/3rdparty/tpie/tpie/priority_queue.h | 17 +- keyvi/3rdparty/tpie/tpie/priority_queue.inl | 31 +- .../tpie/tpie/progress_indicator_base.cpp | 1 - .../tpie/progress_indicator_subindicator.h | 2 + keyvi/3rdparty/tpie/tpie/resource_manager.cpp | 104 +++ keyvi/3rdparty/tpie/tpie/resource_manager.h | 132 +++ keyvi/3rdparty/tpie/tpie/resources.cpp | 34 + keyvi/3rdparty/tpie/tpie/resources.h | 48 ++ keyvi/3rdparty/tpie/tpie/serialization2.h | 18 +- .../3rdparty/tpie/tpie/serialization_sorter.h | 127 ++- .../tpie/tpie/serialization_stream.cpp | 2 +- keyvi/3rdparty/tpie/tpie/sort_manager.h | 2 +- keyvi/3rdparty/tpie/tpie/stack.h | 3 + keyvi/3rdparty/tpie/tpie/stats.h | 4 +- keyvi/3rdparty/tpie/tpie/stream.h | 4 +- keyvi/3rdparty/tpie/tpie/stream_old.h | 5 +- keyvi/3rdparty/tpie/tpie/tpie.cpp | 6 + keyvi/3rdparty/tpie/tpie/tpie.h | 7 +- keyvi/3rdparty/tpie/tpie/unittest.h | 4 +- 97 files changed, 4166 insertions(+), 1123 deletions(-) create mode 100644 keyvi/3rdparty/tpie/appveyor.yml create mode 100644 keyvi/3rdparty/tpie/tpie/btree/serialized_store.h delete mode 100644 keyvi/3rdparty/tpie/tpie/file_count.cpp delete mode 100644 keyvi/3rdparty/tpie/tpie/file_count.h create mode 100644 keyvi/3rdparty/tpie/tpie/file_manager.cpp create mode 100644 keyvi/3rdparty/tpie/tpie/file_manager.h create mode 100644 keyvi/3rdparty/tpie/tpie/pipelining/split.h create mode 100644 keyvi/3rdparty/tpie/tpie/pipelining/subpipeline.h create mode 100644 keyvi/3rdparty/tpie/tpie/pipelining/visit.h create mode 100644 keyvi/3rdparty/tpie/tpie/resource_manager.cpp create mode 100644 keyvi/3rdparty/tpie/tpie/resource_manager.h create mode 100644 keyvi/3rdparty/tpie/tpie/resources.cpp create mode 100644 keyvi/3rdparty/tpie/tpie/resources.h diff --git a/keyvi/3rdparty/tpie/.travis.yml b/keyvi/3rdparty/tpie/.travis.yml index 67debefa..2ffaceff 100644 --- a/keyvi/3rdparty/tpie/.travis.yml +++ b/keyvi/3rdparty/tpie/.travis.yml @@ -1,8 +1,17 @@ +matrix: + include: + - os: linux + compiler: gcc + - os: linux + compiler: clang + - os: osx + compiler: clang language: cpp -compiler: -- gcc +before install: + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update ; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install boost snappy; fi install: -- if [ "$CXX" = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi + - if [ "$CXX" = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi addons: apt: sources: @@ -17,4 +26,4 @@ addons: - libboost-system1.55-dev - libsnappy-dev script: mkdir build && cd build && cmake .. && make -j2 && make test -sudo: false \ No newline at end of file +sudo: false diff --git a/keyvi/3rdparty/tpie/README.md b/keyvi/3rdparty/tpie/README.md index 2fb09b74..4f6b58b8 100644 --- a/keyvi/3rdparty/tpie/README.md +++ b/keyvi/3rdparty/tpie/README.md @@ -1,6 +1,9 @@ TPIE - The Templated Portable I/O Environment ============================================= +NIX: [![Build Status](https://travis-ci.org/thomasmoelhave/tpie.svg?branch=master)](https://travis-ci.org/thomasmoelhave/tpie) +Windows: [![Build status Windows](https://ci.appveyor.com/api/projects/status/570fltgf1ko5twna/branch/master?svg=true)](https://ci.appveyor.com/project/antialize/tpie/branch/master) + The TPIE (Templated Portable I/O Environment) library is a tool box providing efficient and convenient tools to ease the implementation of algorithm and data structures on very large sets of data. diff --git a/keyvi/3rdparty/tpie/appveyor.yml b/keyvi/3rdparty/tpie/appveyor.yml new file mode 100644 index 00000000..0c14827b --- /dev/null +++ b/keyvi/3rdparty/tpie/appveyor.yml @@ -0,0 +1,54 @@ +# Specify version format +version: "{build}" + +# Operating system (build VM template) +os: Visual Studio 2015 + +# build platform, i.e. Win32 (instead of x86), x64, Any CPU. This setting is optional. +platform: + - x64 + +# specify custom environment variables +environment: + BOOST_ROOT: C:\Libraries\boost_1_60_0 + BOOST_LIBRARYDIR: C:\Libraries\boost_1_60_0\lib64-msvc-14.0 + +# build configuration, i.e. Debug, Release, etc. +configuration: + - Debug + - Release + +# scripts that are called at very beginning, before repo cloning +init: + - cmd: cmake --version + - cmd: msbuild /version + +# clone directory +clone_folder: C:\projects\tpie + +# branches to build +branches: + # whitelist + # only: + # - master + # blacklist + except: + - gh-pages + +# scripts to run before build +before_build: + - cmd: cd C:\projects\tpie + - cmd: md build + - cmd: cd build + - cmd: if "%platform%"=="Win32" set CMAKE_GENERATOR_NAME=Visual Studio 14 2015 + - cmd: if "%platform%"=="x64" set CMAKE_GENERATOR_NAME=Visual Studio 14 2015 Win64 + - cmd: cmake -G "%CMAKE_GENERATOR_NAME%" -DCMAKE_BUILD_TYPE=%configuration% -DBOOST_ROOT="%BOOST_ROOT%" -DBOOST_LIBRARYDIR="%BOOST_LIBRARYDIR%" .. + +build: + parallel: true # enable MSBuild parallel builds + project: C:\projects\tpie\build\tpie.sln # path to Visual Studio solution or project + +test_script: + - cmd: cd C:\projects\tpie + - cmd: cd build + - cmd: ctest --timeout 30 -j 2 --output-on-failure -C "%CONFIG%" ctest diff --git a/keyvi/3rdparty/tpie/doc/CMakeLists.txt b/keyvi/3rdparty/tpie/doc/CMakeLists.txt index 1d05c418..d2943247 100644 --- a/keyvi/3rdparty/tpie/doc/CMakeLists.txt +++ b/keyvi/3rdparty/tpie/doc/CMakeLists.txt @@ -18,8 +18,8 @@ else (INTERNAL_DOCS) endif(INTERNAL_DOCS) ADD_CUSTOM_TARGET(apidocs - sh -c "sed -e \"s/@VERSION@/`git describe --always`/g\" ${PROJECT_SOURCE_DIR}/doc/header.html > ${PROJECT_BINARY_DIR}/doc/header.html" - COMMAND sh -c "sed -e \"s/@VERSION@/`git describe --always`/g\" ${PROJECT_SOURCE_DIR}/doc/footer.html > ${PROJECT_BINARY_DIR}/doc/footer.html" + sh -c "sed -e \"s/\@VERSION\@/`git describe --always`/g\" ${PROJECT_SOURCE_DIR}/doc/header.html > ${PROJECT_BINARY_DIR}/doc/header.html" + COMMAND sh -c "sed -e \"s/\@VERSION\@/`git describe --always`/g\" ${PROJECT_SOURCE_DIR}/doc/footer.html > ${PROJECT_BINARY_DIR}/doc/footer.html" COMMAND ${DOXYGEN} ${DOXYFILE} VERBATIM) diff --git a/keyvi/3rdparty/tpie/doc/authors.dox b/keyvi/3rdparty/tpie/doc/authors.dox index 33acfb59..6fc50512 100644 --- a/keyvi/3rdparty/tpie/doc/authors.dox +++ b/keyvi/3rdparty/tpie/doc/authors.dox @@ -6,6 +6,7 @@ The TPIE development team consists of the following people: \section sec_current Current contributors - Andrew Danner + - Asger Hautop Drewsen - Jakob Truelsen - Jan Vahrenhold - Lars Arge diff --git a/keyvi/3rdparty/tpie/doc/index.dox b/keyvi/3rdparty/tpie/doc/index.dox index 3e88361b..aae873f1 100644 --- a/keyvi/3rdparty/tpie/doc/index.dox +++ b/keyvi/3rdparty/tpie/doc/index.dox @@ -57,6 +57,6 @@ as a group of contributors in Europe and the US. A full list of current and past contributors can be found in the \ref authors section. The current maintainer is -Mathias Rav. +Asger Hautop Drewsen. */ diff --git a/keyvi/3rdparty/tpie/doc/pipelining.dox b/keyvi/3rdparty/tpie/doc/pipelining.dox index 730f608a..a84ceebe 100644 --- a/keyvi/3rdparty/tpie/doc/pipelining.dox +++ b/keyvi/3rdparty/tpie/doc/pipelining.dox @@ -74,7 +74,7 @@ private: dest_t dest; }; -typedef tp::pipe_middle > hello_world; +typedef tp::pipe_middle > hello_world; \endcode A node implementation may supply the following extra information to the framework @@ -185,7 +185,7 @@ private: dest_t dest; }; -typedef tp::pipe_middle > hello_world; +typedef tp::pipe_middle > hello_world; \endcode \section sec_pull Pull nodes @@ -207,28 +207,23 @@ The implementation details of pull nodes are similar to regular nodes. The follo template class pull_hello_world_type : public node { public: - typedef int item_type; - - pull_hello_world_type(source_t source) : source(std::move(source)) { - add_pull_source(source); - } - - item_type pull() { - int item = source.can_pull(); - if (item % 2 == 0) - return item/2; - return (3 * item + 1); - } - - bool can_pull() { - return source.can_pull(); - } - + typedef int item_type; + pull_hello_world_type(source_t source) : source(std::move(source)) { + add_pull_source(source); + } + item_type pull() { + int item = source.pull(); + if (item % 2 == 0) + return item/2; + return (3 * item + 1); + } + bool can_pull() { + return source.can_pull(); + } private: - source_t source; + source_t source; }; - -typedef pullpipe_middle > pull_hello_world; +typedef pullpipe_middle > pull_hello_world; \endcode \section sec_forming_pipelines Forming pipelines @@ -359,8 +354,8 @@ public: }; template -inline pipe_begin, source_t> > square(const source_t & source) { - return tempfactory_1, source_t >(source); +inline pipe_begin, source_t> > square(const source_t & source) { + return tempfactory, source_t >(source); } \endcode \code @@ -671,7 +666,7 @@ instantiate the node implementations. Usually, the built-in factories contained in \c factory_helpers.h will suffice: \code -typedef tpie::pipelining::factory_0 hello_world_factory; +typedef tpie::pipelining::factory hello_world_factory; \endcode but in some cases it is helpful to implement one's own factory. @@ -731,7 +726,7 @@ To use the above defined factories, we might write the following: \code using namespace tpie; using namespace tpie::pipelining; -factory_1 &> fact0(inputstream); +factory &> fact0(inputstream); hello_world_factory fact1; goodbye_world_factory fact2; pipeline p = fact0.construct(fact1.construct(fact2.construct())); @@ -743,14 +738,14 @@ helper classes to ease the construction of pipelines, namely the descendants of \c pipe_base which are called \c pipe_begin, \c pipe_middle and \c pipe_end. \code -inline pipe_middle > +inline pipe_middle > hello_world() { - return factory_0(); + return factory(); } -inline pipe_end > +inline pipe_end > goodbye_world() { - return termfactory_0(); + return termfactory(); } \endcode diff --git a/keyvi/3rdparty/tpie/doc/pipelining_nodes.dox b/keyvi/3rdparty/tpie/doc/pipelining_nodes.dox index 664816e6..3d6a19d2 100644 --- a/keyvi/3rdparty/tpie/doc/pipelining_nodes.dox +++ b/keyvi/3rdparty/tpie/doc/pipelining_nodes.dox @@ -44,6 +44,7 @@ \ref tpie::pipelining::store_sort \ref tpie::pipelining::tee \ref tpie::pipelining::unzip +\ref tpie::pipelining::visit \ref tpie::pipelining::zip \endsecreflist diff --git a/keyvi/3rdparty/tpie/doc/setup.dox b/keyvi/3rdparty/tpie/doc/setup.dox index ea850e29..a134d75b 100644 --- a/keyvi/3rdparty/tpie/doc/setup.dox +++ b/keyvi/3rdparty/tpie/doc/setup.dox @@ -68,8 +68,7 @@ sudo tar xvf path/to/tpie/build/tpie-1.0.0-Linux.tar.gz \endverbatim To link your application with TPIE, may manually specify the linker flags --ltpie -lboost_system -lboost_filesystem -lboost_date_time --lboost_thread, or you may use the example \c CMakeLists.txt from the +-ltpie -lboost_system -lboost_filesystem -lboost_date_time, or you may use the example \c CMakeLists.txt from the \c tpie/example/ directory to find the TPIE installation using CMake.

Windows

diff --git a/keyvi/3rdparty/tpie/test/speed_regression/btree.cpp b/keyvi/3rdparty/tpie/test/speed_regression/btree.cpp index 26076a50..5c1f3a86 100644 --- a/keyvi/3rdparty/tpie/test/speed_regression/btree.cpp +++ b/keyvi/3rdparty/tpie/test/speed_regression/btree.cpp @@ -59,10 +59,10 @@ void test(size_t times, size_t size) { btree tree(tmp.path()); // pre-protocol - int x[count]; + std::vector x(count); for(size_t i = 0; i < count; ++i) x[i] = i; - std::random_shuffle(x, x + count); + std::random_shuffle(x.begin(), x.end()); // insertion getTestRealtime(start); @@ -81,7 +81,7 @@ void test(size_t times, size_t size) { s(testRealtimeDiff(start,end)); // deletion - std::random_shuffle(x, x + count); + std::random_shuffle(x.begin(), x.end()); getTestRealtime(start); for(size_t i = 0; i < count; ++i) { diff --git a/keyvi/3rdparty/tpie/test/speed_regression/stream2.cpp b/keyvi/3rdparty/tpie/test/speed_regression/stream2.cpp index 0ffed330..87619fbe 100644 --- a/keyvi/3rdparty/tpie/test/speed_regression/stream2.cpp +++ b/keyvi/3rdparty/tpie/test/speed_regression/stream2.cpp @@ -63,7 +63,7 @@ class series_crtp : public series_base { self().end(n); } - virtual const char * name() const { return self().get_name(); } + const char * name() const override { return self().get_name(); } }; class series_random : public series_crtp { diff --git a/keyvi/3rdparty/tpie/test/speed_regression/testtime.h b/keyvi/3rdparty/tpie/test/speed_regression/testtime.h index 7ced3a91..372bb610 100644 --- a/keyvi/3rdparty/tpie/test/speed_regression/testtime.h +++ b/keyvi/3rdparty/tpie/test/speed_regression/testtime.h @@ -90,7 +90,7 @@ namespace tpie { /// Calculate real time difference in micro seconds /////////////////////////////////////////////////////////////////// inline uint_fast64_t testRealtimeDiff(const test_realtime_t a, const test_realtime_t b) { - return tpie::test_millisecs(a, b); + return (uint_fast64_t)tpie::test_millisecs(a, b); } } } diff --git a/keyvi/3rdparty/tpie/test/unit/CMakeLists.txt b/keyvi/3rdparty/tpie/test/unit/CMakeLists.txt index e7461d9a..954d003e 100644 --- a/keyvi/3rdparty/tpie/test/unit/CMakeLists.txt +++ b/keyvi/3rdparty/tpie/test/unit/CMakeLists.txt @@ -53,6 +53,8 @@ add_unittest(compressed_stream odd_block_size write_only write_peek + + lockstep_reverse ) add_unittest(btree internal_augment @@ -95,6 +97,7 @@ add_unittest(merge_sort evacuate_before_merge evacuate_before_report sort_upper_bound + sort_faulty_upper_bound temp_file_usage tall_tree ) @@ -110,6 +113,7 @@ add_unittest(serialization_sort small_final_fanout evacuate_before_merge evacuate_before_report + file_limit ) add_unittest(stats simple) add_unittest(stream @@ -151,6 +155,9 @@ add_unittest(pipelining merger_memory bound_fetch_forward fetch_forward + forward_multiple_pipelines + forward_unique_ptr + pipe_base_forward virtual virtual_cref_item_type virtual_fork @@ -165,16 +172,27 @@ add_unittest(pipelining parallel_push_in_end node_map join + split copy_ctor datastructures phase_priority_test set_flush_priority_test node_map + subpipeline + file_limit_sort + passive_virtual_management + join_split_dealloc ) -add_unittest(pipelining_runtime evacuate get_phase_graph) +add_unittest(pipelining_runtime evacuate get_phase_graph evacuate_phase_graph) add_unittest(pipelining_serialization basic reverse sort) add_unittest(maybe basic unique_ptr) -add_unittest(close_file internal serialization_writer_close serialization_writer_dtor serialization_reader_dtor) +add_unittest(close_file + internal + serialization_writer_close + serialization_writer_dtor + serialization_reader_dtor + file_limit_enforcement +) add_unittest(node_name gcc msvc) add_unittest(snappy basic) diff --git a/keyvi/3rdparty/tpie/test/unit/merge_sort.h b/keyvi/3rdparty/tpie/test/unit/merge_sort.h index 76c15ecb..c9d4ef72 100644 --- a/keyvi/3rdparty/tpie/test/unit/merge_sort.h +++ b/keyvi/3rdparty/tpie/test/unit/merge_sort.h @@ -75,7 +75,8 @@ static bool sort_test(memory_size_type m1, double mb_data, memory_size_type extraMemory = 0, bool evacuateBeforeMerge = false, - bool evacuateBeforeReport = false) + bool evacuateBeforeReport = false, + memory_size_type file_limit = 0) { m1 *= 1024*1024; m2 *= 1024*1024; @@ -88,6 +89,7 @@ static bool sort_test(memory_size_type m1, relative_memory_usage m(extraMemory); sorter s; s.set_available_memory(m1, m2, m3); + s.set_available_files(file_limit); log_debug() << "Begin phase 1" << std::endl; m.set_threshold(m1); @@ -177,6 +179,13 @@ static bool evacuate_before_report_test() { return sort_test(20,20,20,50, 0, false, true); } +static bool file_limit_test(int limit) { + get_file_manager().set_limit(limit); + get_file_manager().set_enforcement(file_manager::ENFORCE_THROW); + + return sort_test(15,15,15,40, 0, false, false, limit); +} + public: static tests & add_all(tests & t) { @@ -192,6 +201,10 @@ static tests & add_all(tests & t) { ; } +static tests & add_file_limit_test(tests & t, int limit) { + return t.test(file_limit_test, "file_limit", "limit", limit); +} + }; #endif // TPIE_TEST_MERGE_SORT_H diff --git a/keyvi/3rdparty/tpie/test/unit/test_ami_stream.cpp b/keyvi/3rdparty/tpie/test/unit/test_ami_stream.cpp index f200bf48..02e5c7a7 100644 --- a/keyvi/3rdparty/tpie/test/unit/test_ami_stream.cpp +++ b/keyvi/3rdparty/tpie/test/unit/test_ami_stream.cpp @@ -109,7 +109,7 @@ bool basic() { s.seek(idx); if (i%2 == 0) { - uint64_t *read; + uint64_t *read = nullptr; s.read_item(&read); if (*read != data[idx]) { tpie::log_error() << "Expected element " << idx << " to be " << data[idx] << ", got " << *read << std::endl; diff --git a/keyvi/3rdparty/tpie/test/unit/test_btree.cpp b/keyvi/3rdparty/tpie/test/unit/test_btree.cpp index 1dd9d249..6a3e9863 100644 --- a/keyvi/3rdparty/tpie/test/unit/test_btree.cpp +++ b/keyvi/3rdparty/tpie/test/unit/test_btree.cpp @@ -477,6 +477,11 @@ bool external_bound_test() { return bound_test(TA(), tmp.path()); } +bool serialized_build_test() { + temp_file tmp; + return build_test(TA(), tmp.path()); +} + int main(int argc, char **argv) { return tpie::tests(argc, argv) .test(internal_basic_test, "internal_basic") @@ -492,7 +497,8 @@ int main(int argc, char **argv) { .test(external_key_and_comparator_test, "external_key_and_compare") .test(external_augment_test, "external_augment") .test(external_build_test, "external_build") - .test(external_bound_test, "external_bound"); + .test(external_bound_test, "external_bound") + .test(serialized_build_test, "serialized_build"); } diff --git a/keyvi/3rdparty/tpie/test/unit/test_close_file.cpp b/keyvi/3rdparty/tpie/test/unit/test_close_file.cpp index 98a53895..28fad0cf 100644 --- a/keyvi/3rdparty/tpie/test/unit/test_close_file.cpp +++ b/keyvi/3rdparty/tpie/test/unit/test_close_file.cpp @@ -27,6 +27,8 @@ #include #include #include +#include +#include #ifdef WIN32 class open_file_monitor { @@ -66,11 +68,11 @@ class open_file_monitor { public: open_file_monitor() { - m_openFiles = tpie::open_file_count(); + m_openFiles = tpie::get_file_manager().used(); } bool ensure_closed_and_delete(std::string fileName) { - tpie::memory_size_type openFiles = tpie::open_file_count(); + tpie::memory_size_type openFiles = tpie::get_file_manager().used(); tpie::log_debug() << "Open file count was " << m_openFiles << "; is now " << openFiles << std::endl; if (openFiles != m_openFiles) { @@ -86,7 +88,7 @@ class open_file_monitor { tpie::log_error() << "Failed to unlink file: " << ::strerror(errno) << std::endl; return false; } - if (tpie::open_file_count() != m_openFiles) { + if (tpie::get_file_manager().used() != m_openFiles) { tpie::log_error() << "ensure_closed_and_delete: Even after unlink, " "file count does not match." << std::endl; } @@ -101,12 +103,13 @@ bool test_test() { tpie::log_debug() << "Temporary file is " << fileName << std::endl; boost::filesystem::remove(fileName); open_file_monitor m; - FILE * fp = ::fopen(fileName.c_str(), "w"); + tpie::file_accessor::raw_file_accessor fa; + fa.open_wo(fileName); TEST_ENSURE(boost::filesystem::exists(fileName), "fopen did not create file"); TEST_ENSURE(!m.ensure_closed_and_delete(fileName), "ensure_closed_and_delete is wrong"); - ::fclose(fp); + fa.close_i(); TEST_ENSURE(m.ensure_closed_and_delete(fileName), "ensure_closed_and_delete is wrong"); return true; @@ -158,11 +161,45 @@ bool serialization_reader_dtor_test() { return true; } +bool file_limit_enforcement_test() { + int limit = 5; + int should_error = limit; + + tpie::get_file_manager().set_limit(limit); + tpie::get_file_manager().set_enforcement(tpie::file_manager::ENFORCE_THROW); + + std::vector fas(should_error + 1); + int i = 0; + for (auto &fa : fas) { + std::string fileName = tpie::tempname::tpie_name(); + try { + fa.open_wo(fileName); + } catch(const tpie::out_of_resource_error &e) { + if (i == should_error) { + continue; + } else { + return false; + } + } + if (i == should_error) { + return false; + } + i++; + } + + for (auto &fa : fas) { + fa.close_i(); + } + + return true; +} + int main(int argc, char ** argv) { return tpie::tests(argc, argv) .test(test_test, "internal") .test(serialization_writer_close_test, "serialization_writer_close") .test(serialization_writer_dtor_test, "serialization_writer_dtor") .test(serialization_reader_dtor_test, "serialization_reader_dtor") + .test(file_limit_enforcement_test, "file_limit_enforcement") ; } diff --git a/keyvi/3rdparty/tpie/test/unit/test_compressed_stream.cpp b/keyvi/3rdparty/tpie/test/unit/test_compressed_stream.cpp index 5ddf3b30..8ef5cae6 100644 --- a/keyvi/3rdparty/tpie/test/unit/test_compressed_stream.cpp +++ b/keyvi/3rdparty/tpie/test/unit/test_compressed_stream.cpp @@ -949,6 +949,44 @@ bool write_only_test() { return success; } +bool stack_test() { + constexpr uint32_t block_items = 4; + auto bof = tpie::file_stream::calculate_block_factor(block_items); + tpie::file_stream fs_a(bof), fs_b(bof), fs_c(bof); + fs_a.open(tpie::compression_none); + fs_b.open(tpie::compression_none); + fs_c.open(tpie::compression_all); + + constexpr uint32_t cnt = block_items * 2; + + for (uint32_t i=0; i < cnt; ++i) { + fs_a.write(i); + fs_b.write(i); + fs_c.write(i); + } + + for (uint32_t i=cnt-1; i < cnt; --i) { + uint32_t val; + val = fs_a.read_back(); + if (i != val) { + tpie::log_error() << "Bad value " << val << " instead of " << i << std::endl; + return false; + } + val = fs_b.read_back(); + if (i != val) { + tpie::log_error() << "Bad value " << val << " instead of " << i << std::endl; + return false; + } + val = fs_c.read_back(); + if (i != val) { + tpie::log_error() << "Bad value " << val << " instead of " << i << std::endl; + return false; + } + } + + return true; +} + template tpie::tests & add_tests(tpie::tests & t, std::string suffix) { typedef tests T; @@ -991,5 +1029,5 @@ int main(int argc, char ** argv) { .test(write_peek_test, "write_peek", "n", static_cast(1 << 23)) /* .test(read_only_test, "read_only") */ .test(write_only_test, "write_only") - ; + .test(stack_test, "lockstep_reverse"); } diff --git a/keyvi/3rdparty/tpie/test/unit/test_file_count.cpp b/keyvi/3rdparty/tpie/test/unit/test_file_count.cpp index a0947289..f98d7010 100644 --- a/keyvi/3rdparty/tpie/test/unit/test_file_count.cpp +++ b/keyvi/3rdparty/tpie/test/unit/test_file_count.cpp @@ -19,7 +19,6 @@ #include "common.h" #include #include -#include #include #include @@ -27,7 +26,7 @@ using namespace tpie; bool file_count_test() { temp_file tmp; - memory_size_type avail = available_files(); + memory_size_type avail = get_file_manager().available(); memory_size_type itemSize; memory_size_type blockSize; memory_size_type userDataSize = 0; diff --git a/keyvi/3rdparty/tpie/test/unit/test_merge_sort.cpp b/keyvi/3rdparty/tpie/test/unit/test_merge_sort.cpp index 2b40a12b..531e966a 100644 --- a/keyvi/3rdparty/tpie/test/unit/test_merge_sort.cpp +++ b/keyvi/3rdparty/tpie/test/unit/test_merge_sort.cpp @@ -53,7 +53,7 @@ class use_merge_sort { } }; -bool sort_upper_bound_test() { +bool sort_upper_bound_test_base(memory_size_type dataUpperBound) { typedef use_merge_sort Traits; typedef Traits::sorter sorter; typedef Traits::test_t test_t; @@ -62,7 +62,6 @@ bool sort_upper_bound_test() { memory_size_type m2 = 20 *1024*1024; memory_size_type m3 = 20 *1024*1024; memory_size_type dataSize = 15*1024*1024; - memory_size_type dataUpperBound = 80*1024*1024; memory_size_type items = dataSize / sizeof(test_t); @@ -86,6 +85,14 @@ bool sort_upper_bound_test() { return true; } +bool sort_upper_bound_test() { + return sort_upper_bound_test_base(80*1024*1024); +} + +bool sort_faulty_upper_bound_test() { + return sort_upper_bound_test_base(400); // much lower than dataSize +} + bool temp_file_usage_test() { bool result = true; const stream_size_type initialUsage = get_temp_file_usage(); @@ -156,6 +163,7 @@ int main(int argc, char ** argv) { return sort_tester::add_all(t) .test(sort_upper_bound_test, "sort_upper_bound") + .test(sort_faulty_upper_bound_test, "sort_faulty_upper_bound") .test(temp_file_usage_test, "temp_file_usage") .test(tall_tree_test, "tall_tree", "fanout", static_cast(6), "height", static_cast(1)) ; diff --git a/keyvi/3rdparty/tpie/test/unit/test_pipelining.cpp b/keyvi/3rdparty/tpie/test/unit/test_pipelining.cpp index dfdcec09..8fe0ca6b 100644 --- a/keyvi/3rdparty/tpie/test/unit/test_pipelining.cpp +++ b/keyvi/3rdparty/tpie/test/unit/test_pipelining.cpp @@ -19,13 +19,18 @@ #include "common.h" #include +#include #include #include #include +#include #include #include +#include #include #include +#include +#include using namespace tpie; using namespace tpie::pipelining; @@ -821,6 +826,119 @@ bool bound_fetch_forward_test() { return true; } +bool forward_unique_ptr_result = true; + +template +struct FUP1 : public node { + dest_t dest; + FUP1(dest_t dest) : dest(std::move(dest)) {} + + virtual void propagate() override { + forward("item", std::unique_ptr(new int(293))); + } + + virtual void go() override { + } +}; + +struct FUP2 : public node { + virtual void propagate() override { + if (!can_fetch("item")) { + log_error() << "Cannot fetch item" << std::endl; + forward_unique_ptr_result = false; + return; + } + auto &p = fetch>("item"); + if (*p != 293) { + log_error() << "Expected 293, not " << *p << std::endl; + forward_unique_ptr_result = false; + return; + } + } +}; + +bool forward_unique_ptr_test() { + std::unique_ptr ptr(new int(1337)); + pipeline p = make_pipe_begin() + | make_pipe_end(); + p.plot(log_info()); + p.forward("ptr", std::move(ptr)); + p(); + if (!forward_unique_ptr_result) return false; + if (!p.can_fetch("ptr")) { + log_error() << "Cannot fetch ptr" << std::endl; + return false; + } + auto &ptr2 = p.fetch>("ptr"); + if (*ptr2 != 1337) { + log_error() << "Expected 1337, not " << *ptr2 << std::endl; + return false; + } + return true; +} + +bool forward_multiple_pipelines_test() { + passive_sorter ps; + pipeline p = input_vector(std::vector{3, 2, 1}) | ps.input(); + p.forward("test", 8); + pipeline p_ = input_vector(std::vector{5, 6, 7}) | add_pairs(ps.output()) | null_sink(); + p(); + int val = p_.fetch("test"); + return val == 8; +} + +bool pipe_base_forward_result = true; + +struct PBF_base : public node { + int n; + PBF_base(int n) : n(n) {} + + virtual void prepare() override { + for (int i = 1; i <= 3; i++) { + std::string item = "item" + std::to_string(i); + bool fetchable = can_fetch(item); + if (n < i) { + if (fetchable) { + log_error() << "Pipe segment " << n + << " could fetch item " << i << "." << std::endl; + pipe_base_forward_result = false; + } + } else { + if (!fetchable) { + log_error() << "Pipe segment " << n + << " couldn't fetch item " << i << "." << std::endl; + pipe_base_forward_result = false; + } + int value = fetch(item); + if (value != i) { + log_error() << "Pipe segment " << n + << " fetched item " << i + << " with value " << value << "." << std::endl; + pipe_base_forward_result = false; + } + } + } + } +}; + +template +struct PBF : public PBF_base { + dest_t dest; + PBF(dest_t dest, int n) : PBF_base(n), dest(std::move(dest)) {} + + virtual void go() override { + } +}; + +bool pipe_base_forward_test() { + pipeline p = make_pipe_begin(1).forward("item1", 1) + | make_pipe_middle(2).forward("item2", 2) + | make_pipe_end(3).forward("item3", 3); + p(); + + return pipe_base_forward_result; +} + // Assume that dest_t::item_type is a reference type. // Push a dereferenced zero pointer to the destination. template @@ -1552,7 +1670,7 @@ class node_map_tester_factory : public factory_base { if (edges.size() != nodes*nodes) throw std::invalid_argument("edges has wrong size"); } - node_map_tester construct() const { + node_map_tester construct() { std::vector nodes; node_map_tester node; this->init_node(node); @@ -1587,17 +1705,17 @@ class node_map_tester_factory : public factory_base { } }; -bool node_map_test(size_t nodes, bool hasInitiator, const std::string & edges) { +bool node_map_test(size_t nodes, bool acyclic, const std::string & edges) { node_map_tester_factory fact(nodes, edges); pipeline p = tpie::pipelining::bits::pipeline_impl(fact); p.plot(log_info()); try { p(); - } catch (const no_initiator_node &) { - return !hasInitiator; + } catch (const exception &) { + return !acyclic; } - return hasInitiator; + return acyclic; } void node_map_multi_test(teststream & ts) { @@ -1633,7 +1751,7 @@ void node_map_multi_test(teststream & ts) { "<.")); ts << "item_cycle" << result (node_map_test - (3, true, + (3, false, ".><" "..>" "...")); @@ -1668,6 +1786,35 @@ bool join_test() { return true; } +bool split_test() { + std::vector i(10); + std::vector o1, o2; + + for (int j = 0; j < 10; ++j) { + i[j] = j; + } + + split j; + pipeline p1 = input_vector(i) | j.sink(); + pipeline p2 = j.source() | output_vector(o1); + pipeline p3 = j.source() | output_vector(o2); + + p3.plot(log_info()); + p3(); + + if (o1.size() != 10 || o2.size() != 10) { + log_error() << "Wrong output size " << o1.size() << " " << o2.size() << " expected 10" << std::endl; + return false; + } + + for (int i = 0; i < 10; ++i) { + if (o1[i] == i % 10 && o2[i] == i % 10) continue; + log_error() << "Wrong output item got " << o1[i] << " " << o2[i] << " expected " << i%10 << std::endl; + return false; + } + return true; +} + bool copy_ctor_test() { std::vector i(10); std::vector j; @@ -1926,6 +2073,162 @@ bool phase_priority_test() { return true; } +template +class subpipe_tester_type: public node { +public: + struct dest_pusher: public node { + dest_pusher(dest_t & dest, int first): first(first), dest(dest) {} + void push(int second) { + dest.push(std::make_pair(first, second)); + } + int first; + dest_t & dest; + }; + + subpipeline sp; + int first; + subpipe_tester_type(dest_t dest): dest(std::move(dest)) { + set_memory_fraction(2); + } + + void prepare() override { + first = 1234; + } + + void push(std::pair i) { + if (i.first != first) { + if (first != 1234) + sp.end(); + first = i.first; + sp = sort() | pipe_end>(dest, first); + sp.begin(get_available_memory()); + } + sp.push(i.second); + } + + void end() override { + if (first != 1234) + sp.end(); + } + + dest_t dest; +}; + +typedef pipe_middle > subpipe_tester; + +bool subpipeline_test() { + constexpr int outer_size = 10; + constexpr int inner_size = 3169; //Must be prime + std::vector > items; + for (int i=0; i < outer_size; ++i) { + for (int j=0; j < inner_size; ++j) + items.push_back(std::make_pair(i, (j*13) % inner_size)); + } + + std::vector > items2; + + pipeline p = input_vector(items) | subpipe_tester() | output_vector(items2); + p(); + if (items2.size() != items.size()) return false; + + int cnt=0; + for (int i=0; i < outer_size; ++i) + for (int j=0; j < inner_size; ++j) + if (items2[cnt++] != std::make_pair(i, j)) return false; + + return true; +} + +bool file_limit_sort_test() { + int N = 1000000; + int B = 10000; + // Merge sort needs at least 3 open files for binary merge sort + // + 2 open files to store stream_position objects for sorted runs. + int F = 5; + + get_memory_manager().set_limit(5000000); + + set_block_size(B * sizeof(int)); + get_file_manager().set_limit(F); + get_file_manager().set_enforcement(file_manager::ENFORCE_THROW); + + std::vector items; + for (int i = 0; i < N; i++) { + items.push_back(N - i); + } + + std::vector items2; + + pipeline p = input_vector(items) | sort() | output_vector(items2); + p(); + + return true; +} + +template +virtual_chunk passive_virtual_chunk() { + T passive; + return virtual_chunk(fork(passive.input()) + | buffer() + | merge(passive.output())); +} + +template +void passive_virtual_test(teststream & ts, const char * cname, const std::vector & input, const std::vector & expected_output) { + ts << cname; + + auto vc = passive_virtual_chunk(); + + std::vector output; + pipeline p = virtual_chunk_begin(input_vector(input)) + | vc + | virtual_chunk_end(output_vector(output)); + p(); + + // Output is interleaved with the input + auto expected = expected_output; + for (size_t i = 0; i < input.size(); i++) { + expected.insert(expected.begin() + i * 2, input[i]); + } + + ts << result(output == expected); +} + +void passive_virtual_test_multi(teststream & ts) { + std::vector input = {3, 4, 1, 2}; + std::vector reversed = {2, 1, 4, 3}; + std::vector sorted = {1, 2, 3, 4}; + +#define TEST(T, expected) passive_virtual_test>(ts, #T, input, expected) + + TEST(passive_sorter, sorted); + TEST(passive_buffer, input); + TEST(passive_reverser, reversed); + TEST(serialization_passive_sorter, sorted); + TEST(passive_serialization_buffer, input); + TEST(passive_serialization_reverser, reversed); + +#undef TEST +} + +bool join_split_dealloc_test() { + std::vector v{1, 2, 3}; + pipeline p, p1, p2, p3; + + { + join j; + split s; + + p = input_vector(v) | s.sink(); + p1 = s.source() | j.sink(); + p2 = s.source() | j.sink(); + p3 = j.source() | null_sink(); + } + + p(); + + return true; +} int main(int argc, char ** argv) { return tpie::tests(argc, argv) @@ -1949,6 +2252,9 @@ int main(int argc, char ** argv) { .test(merger_memory_test, "merger_memory", "n", static_cast(10)) .test(fetch_forward_test, "fetch_forward") .test(bound_fetch_forward_test, "bound_fetch_forward") + .test(forward_unique_ptr_test, "forward_unique_ptr") + .test(forward_multiple_pipelines_test, "forward_multiple_pipelines") + .test(pipe_base_forward_test, "pipe_base_forward") .test(virtual_test, "virtual") .test(virtual_fork_test, "virtual_fork") .test(virtual_cref_item_type_test, "virtual_cref_item_type") @@ -1963,10 +2269,15 @@ int main(int argc, char ** argv) { .test(parallel_own_buffer_test, "parallel_own_buffer") .test(parallel_push_in_end_test, "parallel_push_in_end") .test(join_test, "join") + .test(split_test, "split") + .test(subpipeline_test, "subpipeline") .multi_test(node_map_multi_test, "node_map") .test(copy_ctor_test, "copy_ctor") .test(set_flush_priority_test, "set_flush_priority_test") .test(phase_priority_test, "phase_priority_test") .multi_test(datastructure_test_multi, "datastructures") + .test(file_limit_sort_test, "file_limit_sort") + .multi_test(passive_virtual_test_multi, "passive_virtual_management") + .test(join_split_dealloc_test, "join_split_dealloc") ; } diff --git a/keyvi/3rdparty/tpie/test/unit/test_pipelining_runtime.cpp b/keyvi/3rdparty/tpie/test/unit/test_pipelining_runtime.cpp index 82ed28fc..f0e0f82d 100644 --- a/keyvi/3rdparty/tpie/test/unit/test_pipelining_runtime.cpp +++ b/keyvi/3rdparty/tpie/test/unit/test_pipelining_runtime.cpp @@ -36,6 +36,14 @@ class evac_node : public node { } }; +class no_evac_node : public node { +public: + + virtual bool can_evacuate() override { + return false; + } +}; + bool evacuate_test() { const size_t N = 7; evac_node nodes[N]; @@ -51,49 +59,49 @@ bool evacuate_test() { graph phaseGraph; for (size_t i = 0; i < N; ++i) phaseGraph.add_node(i); - phaseGraph.add_edge(0, 1); - phaseGraph.add_edge(0, 2); - phaseGraph.add_edge(1, 3); - phaseGraph.add_edge(2, 3); - phaseGraph.add_edge(3, 4); - phaseGraph.add_edge(3, 5); - phaseGraph.add_edge(4, 6); - phaseGraph.add_edge(5, 6); - + std::vector > edges{{0,1}, {0,2}, {1,3}, {2,3}, {3,4}, {3,5}, {4,6}, {5,6}}; + + for (auto e: edges) { + phaseGraph.add_edge(e.first, e.second); + log_info() << nodes[e.second].get_id() << " " << nodes[e.first].get_id() << std::endl; + nodes[e.second].add_memory_share_dependency(nodes[e.first]); + } + // 0 -- 1 ---- 3 -- 4 ---- 6 // \ / \ / // `---- 2 ´ `---- 5 ´ // - // Since the result of 1 and 4 are not needed in 2 and 5 resp., - // (that is, there is no edge 1-2 or 4-5,) - // 1 and 4 should be evacuated when they are done. - - std::vector expect(7); - expect[1] = expect[4] = true; - std::vector evacuateWhenDone; + std::unordered_set evacuateWhenDone; std::vector > phases; { runtime rt(nodeMap); rt.get_phases(phaseMap, phaseGraph, evacuateWhenDone, phases); } - + + std::unordered_map nodePhases; + for (size_t i=0; i < phases.size(); ++i) + for (node * n: phases[i]) + nodePhases.emplace((evac_node*)n-nodes, i); + bool bad = false; - for (size_t i = 0; i < N; ++i) { - if (evacuateWhenDone[i] == expect[i]) { - log_debug() << "Node " << i << ": " - << (expect[i] ? "should evacuate" : "don't evacuate") - << std::endl; - } else { - log_error() << "Node " << i << ": Expected " - << (expect[i] ? "should evacuate" : "don't evacuate") - << ", got " - << (evacuateWhenDone[i] ? "should evacuate" : "don't evacuate") - << std::endl; + for (size_t i=0; i < N; ++i) { + size_t evac = 0; + for (auto e: edges) { + if (e.first != i) continue; + if (nodePhases[i]+1 == nodePhases[e.second]) continue; + evac = 1; + } + if (evacuateWhenDone.count(nodes[i].get_id()) != evac) { + if (evac) + log_error() << "Evac of node " << i << " should be evaced but is not" << std::endl; + else + log_error() << "Evac of node " << i << " is evacuated but should not be" << std::endl; bad = true; } } + return !bad; } @@ -165,9 +173,244 @@ bool get_phase_graph_test() { return true; } +// See tpie::pipelining::bits::runtime::get_phases for description of edge colors +enum edge_color { + BLACK, + RED, + GREEN, +}; +const char * edge_color_names[] = { + "black", + "red", + "green", +}; + +struct edge_t { + edge_color color; + size_t from; + size_t to; +}; + +void evacuate_phase_graph_test(teststream & ts, + bool should_fail, + size_t expected_satisfied_reds, + const char * name, + const std::vector & edges) { + ts << name << std::endl; + + std::vector nodeList; + std::map> nodes; + std::map can_evac_node; + + // Make sure red edges come from nodes that can be evacuated + // and green edges come from nodes that can't + // Black edge can come from both types + for (const edge_t & e : edges) { + if (e.color == BLACK) continue; + + bool can_evac = e.color == RED; + + if (can_evac_node.find(e.from) != can_evac_node.end()) { + tp_assert(can_evac == can_evac_node[e.from], + "Red and green edge with from same node not possible: " + std::to_string(e.from)); + } + + can_evac_node[e.from] = can_evac; + } + + for (const auto & p : can_evac_node) { + node * n = p.second? static_cast(new evac_node()): static_cast(new no_evac_node()); + nodes[p.first] = std::shared_ptr(n); + nodeList.push_back(n); + } + + // For all nodes with neither a red or green edge coming from it + // we make evacuatable nodes. + for (const edge_t & e : edges) { + for (size_t i : {e.from, e.to}) { + if (nodes.find(i) == nodes.end()) { + node * n = new evac_node(); + nodes[i] = std::shared_ptr(n); + nodeList.push_back(n); + } + } + } + + std::map revNodes; + for (const auto & p : nodes) { + revNodes.insert({p.second.get(), p.first}); + } + + size_t N = nodeList.size(); + + node_map::ptr nodeMap = nodeList[0]->get_node_map(); + for (auto n : nodeList) n->get_node_map()->union_set(nodeMap); + nodeMap = nodeMap->find_authority(); + + // In our model, each node is its own phase. + std::map phaseMap; + for (const auto & p : nodes) phaseMap[p.second.get()] = p.first; + + graph phaseGraph; + for (size_t i = 0; i < N; ++i) phaseGraph.add_node(i); + + for (const edge_t & e : edges) { + phaseGraph.add_edge(e.from, e.to); + log_info() << e.from << " -(" << edge_color_names[e.color] << ")-> " << e.to << std::endl; + if (e.color == BLACK) { + nodes[e.to]->add_dependency(*nodes[e.from]); + } else { + nodes[e.to]->add_memory_share_dependency(*nodes[e.from]); + } + } + + std::unordered_set evacuateWhenDone; + std::vector> phases; + + { + runtime rt(nodeMap); + try { + rt.get_phases(phaseMap, phaseGraph, evacuateWhenDone, phases); + } catch (exception & e) { + if (!should_fail) { + log_error() << e.what() << std::endl; + } + ts << result(should_fail); + return; + } + } + + if (should_fail) { + log_error() << "Constructed phase ordering successfully" << std::endl; + } + + log_info() << "Phase order: "; + std::vector phaseOrder; + for (const auto & phase : phases) { + size_t i = revNodes[phase[0]]; + phaseOrder.push_back(i); + log_info() << i << ", "; + } + log_info() << std::endl; + + log_info() << "Evacuated nodes: "; + std::unordered_set evacuatedNodes; + for (auto id : evacuateWhenDone) { + size_t i = revNodes[nodeMap->get(id)]; + evacuatedNodes.insert(i); + log_info() << i << ", "; + } + log_info() << std::endl; + + if (should_fail) { + ts << result(false); + return; + } + + bool bad = false; + size_t satisfied_reds = 0; + size_t reds = 0; + for (const edge_t & e : edges) { + auto from = std::find(phaseOrder.begin(), phaseOrder.end(), e.from); + auto to = std::find(phaseOrder.begin(), phaseOrder.end(), e.to); + bool satisfied = to - from == 1; + + if (e.color == GREEN) { + if (evacuatedNodes.count(e.from) != 0) { + log_error() << "Evacuated a node with a green edge going out: " << e.from << std::endl; + bad = true; + } + if (!satisfied) { + log_error() << "Phases with green edge between not consecutive: " + << e.from << " -> " << e.to << std::endl; + bad = true; + } + } else if (e.color == RED) { + reds++; + if (satisfied) satisfied_reds++; + } + } + + log_info() << "Satisfied " << satisfied_reds << " out of " << reds << " red edges" << std::endl; + + if (satisfied_reds != expected_satisfied_reds) { + log_error() << "Satisfied " << satisfied_reds << " red edges, expected " << expected_satisfied_reds << std::endl; + ts << result(false); + return; + } + + ts << result(!bad); +} + +void evacuate_phase_graph_multi(teststream & ts) { + evacuate_phase_graph_test(ts, true, 0, "Simple fail", { + {BLACK, 0, 1}, + {BLACK, 1, 2}, + {GREEN, 0, 2}, + }); + evacuate_phase_graph_test(ts, false, 0, "Diamond working", { + {GREEN, 0, 1}, + {BLACK, 1, 3}, + {BLACK, 0, 2}, + {GREEN, 2, 3}, + }); + evacuate_phase_graph_test(ts, true, 0, "Diamond failing", { + {GREEN, 0, 1}, + {GREEN, 1, 3}, + {BLACK, 0, 2}, + {BLACK, 2, 3}, + }); + evacuate_phase_graph_test(ts, false, 0, "Green path", { + {GREEN, 2, 3}, + {GREEN, 1, 2}, + {GREEN, 3, 4}, + {GREEN, 0, 1}, + }); + evacuate_phase_graph_test(ts, false, 0, "Green bridges", { + {GREEN, 0, 1}, + {BLACK, 1, 2}, + {BLACK, 2, 4}, + {BLACK, 1, 3}, + {BLACK, 3, 4}, + {GREEN, 4, 5}, + {BLACK, 5, 6}, + {BLACK, 6, 8}, + {BLACK, 5, 7}, + {BLACK, 7, 8}, + {GREEN, 8, 9}, + {BLACK, 9, 10}, + {GREEN, 10, 11}, + }); + evacuate_phase_graph_test(ts, false, 2, "Red diamond", { + {RED, 0, 1}, + {RED, 1, 3}, + {RED, 0, 2}, + {RED, 2, 3}, + }); + evacuate_phase_graph_test(ts, false, 2, "3/4 Red diamond 1", { + {RED, 0, 1}, + {RED, 1, 3}, + {BLACK, 0, 2}, + {RED, 2, 3}, + }); + evacuate_phase_graph_test(ts, false, 2, "3/4 Red diamond 2", { + {BLACK, 0, 1}, + {RED, 1, 3}, + {RED, 0, 2}, + {RED, 2, 3}, + }); + evacuate_phase_graph_test(ts, false, 1, "Contracted node w/ outgoing red & black", { + {GREEN, 0, 1}, + {BLACK, 0, 2}, + {RED, 1, 2}, + {BLACK, 0, 3}, + }); +} + int main(int argc, char ** argv) { return tpie::tests(argc, argv) .test(evacuate_test, "evacuate") .test(get_phase_graph_test, "get_phase_graph") + .multi_test(evacuate_phase_graph_multi, "evacuate_phase_graph") ; } diff --git a/keyvi/3rdparty/tpie/test/unit/test_serialization_sort.cpp b/keyvi/3rdparty/tpie/test/unit/test_serialization_sort.cpp index 65d502e6..9e65c2d9 100644 --- a/keyvi/3rdparty/tpie/test/unit/test_serialization_sort.cpp +++ b/keyvi/3rdparty/tpie/test/unit/test_serialization_sort.cpp @@ -69,7 +69,7 @@ class use_serialization_sorter { int main(int argc, char ** argv) { tests t(argc, argv); - return - sort_tester::add_all(t) - ; + sort_tester::add_all(t); + sort_tester::add_file_limit_test(t, 3); + return t; } diff --git a/keyvi/3rdparty/tpie/tpie/CMakeLists.txt b/keyvi/3rdparty/tpie/tpie/CMakeLists.txt index 3dfe66aa..fab41812 100644 --- a/keyvi/3rdparty/tpie/tpie/CMakeLists.txt +++ b/keyvi/3rdparty/tpie/tpie/CMakeLists.txt @@ -34,6 +34,7 @@ set (HEADERS file.h file_base.h file_base_crtp.h + file_manager.h file_stream.h file_stream_base.h file_accessor/byte_stream_accessor.h @@ -41,7 +42,6 @@ set (HEADERS file_accessor/stream_accessor.h file_accessor/stream_accessor_base.h file_accessor/stream_accessor_base.inl - file_count.h execution_time_predictor.h internal_sort.h internal_queue.h @@ -107,6 +107,8 @@ set (HEADERS progress_indicator_null.h progress_indicator_terminal.h queue.h + resource_manager.h + resources.h serialization.h serialization2.h serialization_stream.h @@ -157,7 +159,7 @@ set (SOURCES compressed/thread.cpp cpu_timer.cpp file_base.cpp - file_count.cpp + file_manager.cpp file_stream_base.cpp execution_time_predictor.cpp fractional_progress.cpp @@ -174,6 +176,8 @@ set (SOURCES prime.cpp progress_indicator_base.cpp progress_indicator_subindicator.cpp + resource_manager.cpp + resources.cpp serialization_stream.cpp hash.cpp tempname.cpp diff --git a/keyvi/3rdparty/tpie/tpie/array.h b/keyvi/3rdparty/tpie/tpie/array.h index 8baf99df..7cdc307c 100644 --- a/keyvi/3rdparty/tpie/tpie/array.h +++ b/keyvi/3rdparty/tpie/tpie/array.h @@ -535,6 +535,10 @@ class array : public linear_memory_base > { /////////////////////////////////////////////////////////////////////////// const T * get() const {return m_elements;} + /////////////////////////////////////////////////////////////////////////// + /// \brief Return copy of the allocator + /////////////////////////////////////////////////////////////////////////// + Allocator get_allocator() const {return m_allocator;} private: friend struct bits::allocator_usage; diff --git a/keyvi/3rdparty/tpie/tpie/btree.h b/keyvi/3rdparty/tpie/tpie/btree.h index bed653ef..50aab1ad 100644 --- a/keyvi/3rdparty/tpie/tpie/btree.h +++ b/keyvi/3rdparty/tpie/tpie/btree.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/keyvi/3rdparty/tpie/tpie/btree/base.h b/keyvi/3rdparty/tpie/tpie/btree/base.h index 31d8a542..f7d615d1 100644 --- a/keyvi/3rdparty/tpie/tpie/btree/base.h +++ b/keyvi/3rdparty/tpie/tpie/btree/base.h @@ -71,6 +71,7 @@ struct int_opt {static const int O=i;}; static const int f_internal = 1; static const int f_static = 2; static const int f_unordered = 4; +static const int f_serialized = 8; } //namespace bbits @@ -98,8 +99,12 @@ using btree_dynamic = bbits::int_opt<0>; using btree_unordered = bbits::int_opt; using btree_ordered = bbits::int_opt<0>; +using btree_serialized = bbits::int_opt; +using btree_not_serialized = bbits::int_opt<0>; + namespace bbits { +//O = flags, a, b = B-tree parameters, C = comparator, K = key extractor, A = augmenter template struct Opt { static const int O=O_; @@ -193,6 +198,9 @@ class internal_store; template class external_store; +template +class serialized_store; + struct enab {}; template @@ -210,7 +218,9 @@ class tree_state { static const bool is_internal = O::O & bbits::f_internal; static const bool is_static = O::O & bbits::f_static; static const bool is_ordered = ! (O::O & bbits::f_unordered); - + static const bool is_serialized = O::O & bbits::f_serialized; + static_assert(!is_serialized || is_static, "Serialized B-tree cannot be dynamic."); + typedef typename std::conditional< is_ordered, typename O::K, @@ -259,7 +269,12 @@ class tree_state { typedef typename std::conditional< is_internal, bbits::internal_store, - bbits::external_store >::type store_type; + typename std::conditional< + is_serialized, + bbits::serialized_store, + bbits::external_store + >::type + >::type store_type; typedef typename store_type::internal_type internal_type; typedef typename store_type::leaf_type leaf_type; diff --git a/keyvi/3rdparty/tpie/tpie/btree/btree.h b/keyvi/3rdparty/tpie/tpie/btree/btree.h index c4bced29..0018a356 100644 --- a/keyvi/3rdparty/tpie/tpie/btree/btree.h +++ b/keyvi/3rdparty/tpie/tpie/btree/btree.h @@ -577,6 +577,14 @@ class tree { return m_state.store().size() == 0; } + void set_metadata(const std::string & data) { + m_state.store().set_metadata(data); + } + + std::string get_metadata() { + return m_state.store().get_metadata(); + } + /** * Construct a btree with the given storage */ diff --git a/keyvi/3rdparty/tpie/tpie/btree/btree_builder.h b/keyvi/3rdparty/tpie/tpie/btree/btree_builder.h index bfe7efff..61366d10 100644 --- a/keyvi/3rdparty/tpie/tpie/btree/btree_builder.h +++ b/keyvi/3rdparty/tpie/tpie/btree/btree_builder.h @@ -90,7 +90,7 @@ class builder { } leaf.augment = m_state.m_augmenter(node_type(&m_state, leaf.leaf)); - + m_state.store().flush(); m_leaves.push_back(leaf); } @@ -108,7 +108,8 @@ class builder { } internal.augment = m_state.m_augmenter(node_type(&m_state, internal.internal)); - + m_state.store().flush(); + // push the internal node to the deque of nodes if(m_internal_nodes.size() < 1) m_internal_nodes.push_back(std::deque()); m_internal_nodes[0].push_back(internal); @@ -126,6 +127,7 @@ class builder { m_internal_nodes[level].pop_front(); } internal.augment = m_state.m_augmenter(node_type(&m_state, internal.internal)); + m_state.store().flush(); // push the internal node to the deque of nodes if(m_internal_nodes.size() < level+2) m_internal_nodes.push_back(std::deque()); @@ -187,7 +189,7 @@ class builder { */ template explicit builder(std::string path, comp_type comp=comp_type(), augmenter_type augmenter=augmenter_type(), enable =enab() ) - : m_state(store_type(path), std::move(augmenter), typename state_type::keyextract_type()) + : m_state(store_type(path, true), std::move(augmenter), typename state_type::keyextract_type()) , m_comp(comp) {} @@ -209,11 +211,11 @@ class builder { if(m_items.size() < leaf_tipping_point()) return; extract_nodes(); } - + /** * \brief Constructs and returns a btree from the value that was pushed to the builder. The btree builder should not be used again after this point. */ - tree_type build() { + tree_type build(const std::string & metadata = std::string()) { // finish building the tree by traversing all levels and constructing leaves/nodes // construct one or two leaves if neccesary @@ -257,7 +259,11 @@ class builder { else m_state.store().set_root(m_internal_nodes.back().front().internal); } - + if (metadata.size()) { + m_state.store().flush(); + m_state.store().set_metadata(metadata); + } + m_state.store().finalize_build(); return tree_type(std::move(m_state), std::move(m_comp)); } diff --git a/keyvi/3rdparty/tpie/tpie/btree/external_store.h b/keyvi/3rdparty/tpie/tpie/btree/external_store.h index ce8a19c7..f9036b26 100644 --- a/keyvi/3rdparty/tpie/tpie/btree/external_store.h +++ b/keyvi/3rdparty/tpie/tpie/btree/external_store.h @@ -110,7 +110,7 @@ class external_store : public external_store_base { /** * \brief Construct a new empty btree storage */ - explicit external_store(const std::string & path) + explicit external_store(const std::string & path, bool /*write_only*/=false) //TODO maybe use this? : external_store_base(path) { m_collection = std::make_shared( @@ -318,7 +318,7 @@ class external_store : public external_store_base { for (size_t i=0; i < *(dstInter.count); ++i) if (dstInter.values[i].handle == child.handle) return i; tp_assert(false, "Leaf not found"); - __builtin_unreachable(); + tpie_unreachable(); } size_t index(internal_type child, internal_type node) const { @@ -328,7 +328,7 @@ class external_store : public external_store_base { for (size_t i=0; i < *(dstInter.count); ++i) if (dstInter.values[i].handle == child.handle) return i; tp_assert(false, "Node not found"); - __builtin_unreachable(); + tpie_unreachable(); } void set_augment(blocks::block_handle child, internal_type node, augment_type augment) { @@ -345,7 +345,7 @@ class external_store : public external_store_base { } tp_assert(false, "Not found"); - __builtin_unreachable(); + tpie_unreachable(); } @@ -379,6 +379,17 @@ class external_store : public external_store_base { void set_size(size_t size) throw() { m_size = size; } + + void flush() {} + void finalize_build() {} + + void set_metadata(const std::string & data) { + throw exception("Not yet implemnted."); + } + + std::string get_metadata() { + throw exception("Not yet implemnted."); + } std::shared_ptr m_collection; diff --git a/keyvi/3rdparty/tpie/tpie/btree/external_store_base.h b/keyvi/3rdparty/tpie/tpie/btree/external_store_base.h index e3da800c..cb532c46 100644 --- a/keyvi/3rdparty/tpie/tpie/btree/external_store_base.h +++ b/keyvi/3rdparty/tpie/tpie/btree/external_store_base.h @@ -41,7 +41,7 @@ class external_store_base { ~external_store_base(); protected: - blocks::block_handle m_root; + blocks::block_handle m_root; std::string m_path; size_t m_height; size_t m_size; diff --git a/keyvi/3rdparty/tpie/tpie/btree/internal_store.h b/keyvi/3rdparty/tpie/tpie/btree/internal_store.h index 75e72f2d..2b4ac800 100644 --- a/keyvi/3rdparty/tpie/tpie/btree/internal_store.h +++ b/keyvi/3rdparty/tpie/tpie/btree/internal_store.h @@ -236,10 +236,22 @@ class internal_store { void set_size(size_t size) throw() { m_size = size; } + + void flush() {} + void finalize_build() {} + void set_metadata(const std::string & data) { + metadata = data; + } + + std::string get_metadata() { + return metadata; + } + void * m_root; size_t m_height; size_t m_size; + std::string metadata; template friend class ::tpie::btree_node; diff --git a/keyvi/3rdparty/tpie/tpie/btree/node.h b/keyvi/3rdparty/tpie/tpie/btree/node.h index d5bb3164..10ab1d15 100644 --- a/keyvi/3rdparty/tpie/tpie/btree/node.h +++ b/keyvi/3rdparty/tpie/tpie/btree/node.h @@ -149,7 +149,7 @@ class btree_node { * * Requires is_leaf() */ - value_type value(size_t i) const { + const value_type & value(size_t i) const { tp_assert(m_is_leaf, "Not leaf"); return m_state->store().get(m_leaf, i); } diff --git a/keyvi/3rdparty/tpie/tpie/btree/serialized_store.h b/keyvi/3rdparty/tpie/tpie/btree/serialized_store.h new file mode 100644 index 00000000..d74145e3 --- /dev/null +++ b/keyvi/3rdparty/tpie/tpie/btree/serialized_store.h @@ -0,0 +1,413 @@ +// -*- mode: c++; tab-width: 4; indent-tabs-mode: t; eval: (progn (c-set-style "stroustrup") (c-set-offset 'innamespace 0)); -*- +// vi:set ts=4 sts=4 sw=4 noet : +// Copyright 2014 The TPIE development team +// +// This file is part of TPIE. +// +// TPIE is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. +// +// TPIE is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +// License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with TPIE. If not, see + +#ifndef _TPIE_BTREE_SERIALIZED_STORE_H_ +#define _TPIE_BTREE_SERIALIZED_STORE_H_ + +#include +#include +#include +#include +#include +#include + +namespace tpie { +namespace bbits { + +/** + * \brief Serializing store + * + * \tparam T the type of value stored + * \tparam A the type of augmentation + * \tparam a the minimum fanout of a node + * \tparam b the maximum fanout of a node + */ +template +class serialized_store { +public: + static const size_t a = a_?a_:2; + static const size_t b = b_?b_:4; + + /** + * \brief Type of value of items stored + */ + typedef T value_type; + + /** + * \brief Type of augmentation stored + */ + typedef A augment_type; + + + typedef size_t size_type; + + typedef uint64_t off_t; + + + serialized_store(const serialized_store & o) = delete; + serialized_store & operator=(const serialized_store & o) = delete; + serialized_store(serialized_store && o) = default; + + serialized_store & operator=(serialized_store && o) { + this->~serialized_store(); + new (this) serialized_store(o); + return this; + } + +private: + struct internal_content { + off_t offset; + A augment; + + static const bool is_trivially_serializable=true; + }; + + struct internal { + off_t my_offset; //NOTE not serialized + size_t count; + internal_content values[b]; + + template + friend void serialize(S & s, const internal & i) { + using tpie::serialize; + serialize(s, i.count); + serialize(s, i.values, i.values + i.count); + } + + template + friend void unserialize(D & d, internal & i) { + using tpie::unserialize; + unserialize(d, i.count); + assert(i.count <= b); + unserialize(d, i.values, i.values + i.count); + } + }; + + struct leaf { + off_t my_offset; //NOTE not serialized + size_t count; + T values[b]; + + template + friend void serialize(S & s, const leaf & i) { + using tpie::serialize; + serialize(s, i.count); + serialize(s, i.values, i.values + i.count); + } + + template + friend void unserialize(D & d, leaf & i) { + using tpie::unserialize; + unserialize(d, i.count); + assert(i.count <= b); + unserialize(d, i.values, i.values + i.count); + } + }; + + struct header { + static constexpr uint64_t good_magic = 0x8bbd51bfe5e3d477, current_version = 0; + uint64_t magic; + uint64_t version; // 0 + off_t root; // offset of root + size_t height; // tree height (internal and leaf levels) + size_t size; // number of items (from btree) + off_t metadata_offset; + off_t metadata_size; + }; + + typedef std::shared_ptr internal_type; + typedef std::shared_ptr leaf_type; + + /** + * \brief Construct a new empty btree storage + */ + explicit serialized_store(const std::string & path, bool write_only=false): + m_height(0), m_size(0), metadata_offset(0), metadata_size(0), path(path) { + f.reset(new std::fstream()); + header h; + if (write_only) { + f->open(path, std::ios_base::out | std::ios_base::trunc | std::ios_base::binary); + if (!f->is_open()) + throw invalid_file_exception("Open failed"); + memset(&h, 0, sizeof(h)); + f->write(reinterpret_cast(&h), sizeof(h)); + } else { + f->open(path, std::ios_base::in | std::ios_base::binary); + if (!f->is_open()) + throw invalid_file_exception("Open failed"); + f->read(reinterpret_cast(&h), sizeof(h)); + if (!*f) + throw invalid_file_exception("Unable to read header"); + + if (h.magic != header::good_magic) + throw invalid_file_exception("Bad magic"); + + if (h.version != header::current_version) + throw invalid_file_exception("Bad version"); + + m_height = h.height; + m_size = h.size; + metadata_offset = h.metadata_offset; + metadata_size = h.metadata_size; + if (m_height == 1) { + root_leaf = std::make_shared(); + root_leaf->my_offset = h.root; + f->seekg(h.root); + unserialize(*f, *root_leaf); + } else if (m_height > 1) { + root_internal = std::make_shared(); + root_internal->my_offset = h.root; + f->seekg(h.root); + unserialize(*f, *root_internal); + } + } + } + + static constexpr size_t min_internal_size() {return a;} + static constexpr size_t max_internal_size() {return b;} + + static constexpr size_t min_leaf_size() {return a;} + static constexpr size_t max_leaf_size() {return b;} + + void move(internal_type src, size_t src_i, + internal_type dst, size_t dst_i) { + dst->values[dst_i] = src->values[src_i]; + } + + void move(leaf_type src, size_t src_i, + leaf_type dst, size_t dst_i) { + dst->values[dst_i] = src->values[src_i]; + } + + void set(leaf_type dst, size_t dst_i, T c) { + assert(dst == current_leaf); + dst->values[dst_i] = c; + } + + void set(internal_type node, size_t i, internal_type c) { + assert(node == current_internal); + node->values[i].offset = c->my_offset; + } + + void set(internal_type node, size_t i, leaf_type c) { + assert(node == current_internal); + node->values[i].offset = c->my_offset; + } + + const T & get(leaf_type l, size_t i) const { + return l->values[i]; + } + + size_t count(internal_type node) const { + return node->count; + } + + size_t count(leaf_type node) const { + return node->count; + } + + void set_count(internal_type node, size_t i) { + node->count = i; + } + + void set_count(leaf_type node, size_t i) { + node->count = i; + } + + leaf_type create_leaf() { + assert(!current_internal && !current_leaf); + current_leaf = std::make_shared(); + current_leaf->my_offset = (stream_size_type)f->tellp(); + return current_leaf; + } + leaf_type create(leaf_type) {return create_leaf();} + internal_type create_internal() { + assert(!current_internal && !current_leaf); + current_internal = std::make_shared(); + current_internal->my_offset = (stream_size_type)f->tellp(); + return current_internal; + } + internal_type create(internal_type) {return create_internal();} + + void set_root(internal_type node) {root_internal = node;} + void set_root(leaf_type node) {root_leaf = node;} + + internal_type get_root_internal() const { + return root_internal; + } + + leaf_type get_root_leaf() const { + return root_leaf; + } + + internal_type get_child_internal(internal_type node, size_t i) const { + internal_type child = std::make_shared(); + assert(i < node->count); + child->my_offset = node->values[i].offset; + f->seekg(child->my_offset); + unserialize(*f, *child); + return child; + } + + leaf_type get_child_leaf(internal_type node, size_t i) const { + leaf_type child = std::make_shared(); + assert(i < node->count); + child->my_offset = node->values[i].offset; + f->seekg(child->my_offset); + unserialize(*f, *child); + return child; + } + + size_t index(off_t my_offset, internal_type node) const { + for (size_t i=0; i < node->count; ++i) + if (node->values[i].offset == my_offset) return i; + tp_assert(false, "Not found"); + tpie_unreachable(); + } + + size_t index(leaf_type l, internal_type node) const { + return index(l->my_offset, node); + } + + size_t index(internal_type i, internal_type node) const { + return index(i->my_offset, node); + } + + void set_augment(leaf_type l, internal_type p, augment_type ag) { + size_t idx = index(l->my_offset, p); + p->values[idx].augment = ag; + } + + void set_augment(internal_type i, internal_type p, augment_type ag) { + size_t idx = index(i->my_offset, p); + p->values[idx].augment = ag; + } + + const augment_type & augment(internal_type p, size_t i) const { + return p->values[i].augment; + } + + size_t height() const throw() { + return m_height; + } + + void set_height(size_t height) throw() { + m_height = height; + } + + size_t size() const throw() { + return m_size; + } + + void set_size(size_t size) throw() { + m_size = size; + } + + void flush() { + if (current_internal) { + assert(!current_leaf); + assert((stream_size_type)f->tellp() == current_internal->my_offset); + serialize(*f, *current_internal); + current_internal.reset(); + } + if (current_leaf) { + assert((stream_size_type)f->tellp() == current_leaf->my_offset); + serialize(*f, *current_leaf); + current_leaf.reset(); + } + } + + void finalize_build() { + // Should call flush() first. + assert(!current_internal && !current_leaf); + + header h; + h.magic = header::good_magic; + h.version = header::current_version; + h.root = 0; + if (root_internal) { + h.root = root_internal->my_offset; + } else if (root_leaf) { + h.root = root_leaf->my_offset; + } else { + assert(m_size == 0); + } + h.height = m_height; + h.size = m_size; + h.metadata_offset = metadata_offset; + h.metadata_size = metadata_size; + f->seekp(0); + f->write(reinterpret_cast(&h), sizeof(h)); + f->close(); + + f->open(path, std::ios_base::in | std::ios_base::binary); + if (!f->is_open()) + throw invalid_file_exception("Open failed"); + } + + void set_metadata(const std::string & data) { + assert(!current_internal && !current_leaf); + assert(f->is_open()); + metadata_offset = (stream_size_type)f->tellp(); + metadata_size = data.size(); + f->write(data.c_str(), data.size()); + } + + std::string get_metadata() { + assert(f->is_open()); + if (metadata_offset == 0 || metadata_size == 0) + return {}; + std::string data(metadata_size, '\0'); + f->read(&data[0], metadata_size); + return data; + } + + size_t m_height; + size_t m_size; + off_t metadata_offset, metadata_size; + + std::string path; + std::unique_ptr f; + internal_type current_internal, root_internal; + leaf_type current_leaf, root_leaf; + + template + friend class ::tpie::btree_node; + + template + friend class ::tpie::btree_iterator; + + template + friend class bbits::tree; + + template + friend class bbits::tree_state; + + template + friend class bbits::builder; + +}; + +} //namespace bbits +} //namespace tpie +#endif /*_TPIE_BTREE_SERIALIZED_STORE_H_*/ diff --git a/keyvi/3rdparty/tpie/tpie/compressed/request.h b/keyvi/3rdparty/tpie/tpie/compressed/request.h index c4d9dbe4..077bd60d 100644 --- a/keyvi/3rdparty/tpie/tpie/compressed/request.h +++ b/keyvi/3rdparty/tpie/tpie/compressed/request.h @@ -51,10 +51,10 @@ namespace tpie { class compressor_response { public: compressor_response() - : m_blockNumber(std::numeric_limits::max()) + : m_done(false) + , m_blockNumber(std::numeric_limits::max()) , m_readOffset(0) , m_blockSize(0) - , m_done(false) , m_endOfStream(false) , m_nextReadOffset(0) , m_nextBlockSize(0) @@ -131,7 +131,12 @@ class compressor_response { return m_readOffset; } - // read, stream + // any, thread + void set_done() { + m_done = true; + } + + // any, stream bool done() { return m_done; } @@ -151,13 +156,15 @@ class compressor_response { private: std::condition_variable m_changed; + // Information about either read or write + bool m_done; + // Information about the write stream_size_type m_blockNumber; stream_size_type m_readOffset; memory_size_type m_blockSize; // Information about the read - bool m_done; bool m_endOfStream; stream_size_type m_nextReadOffset; memory_size_type m_nextBlockSize; @@ -292,11 +299,13 @@ class write_request : public request_base { // must have lock! void update_recorded_size() { + m_response->set_done(); if (m_tempFile != NULL) m_tempFile->update_recorded_size(m_fileAccessor->file_size()); } // must have lock! void update_recorded_size(stream_size_type fileSize) { + m_response->set_done(); if (m_tempFile != NULL) m_tempFile->update_recorded_size(fileSize); } diff --git a/keyvi/3rdparty/tpie/tpie/compressed/stream.h b/keyvi/3rdparty/tpie/tpie/compressed/stream.h index 0baa46e7..d3b47fed 100644 --- a/keyvi/3rdparty/tpie/tpie/compressed/stream.h +++ b/keyvi/3rdparty/tpie/tpie/compressed/stream.h @@ -69,7 +69,7 @@ struct open { friend inline open::type operator^(open::type a, open::type b) { return (open::type) ((int) a ^ (int) b); } friend inline open::type operator~(open::type a) - { return (open::type) (int) ~a; } + { return (open::type) ~(int) a; } static type translate(access_type accessType, cache_hint cacheHint, compression_flags compressionFlags) { return (type) (( @@ -418,6 +418,13 @@ class compressed_stream_base { /** Response from compressor thread; protected by compressor thread mutex. */ compressor_response m_response; + /** When use_compression() is true: + * Indicates whether m_response is the response to a write request. + * Used for knowing where to read next in read/read_back. + * */ + bool m_updateReadOffsetFromWrite = false; + stream_size_type m_lastWriteBlockNumber; + seek_state::type m_seekState; /** Position relating to the currently loaded buffer. @@ -528,7 +535,7 @@ class file_stream : public compressed_stream_base { close(); } catch (std::exception & e) { log_error() << "Someone threw an error in file_stream::~file_stream: " << e.what() << std::endl; - throw; + abort(); } } @@ -617,6 +624,7 @@ class file_stream : public compressed_stream_base { void seek(stream_offset_type offset, offset_type whence=beginning) { tp_assert(is_open(), "seek: !is_open"); uncache_read_writes(); + m_updateReadOffsetFromWrite = false; if (!use_compression()) { // Handle uncompressed case by delegating to set_position. switch (whence) { @@ -721,6 +729,7 @@ class file_stream : public compressed_stream_base { // No need to flush block m_buffer.reset(); m_response.clear_block_info(); + m_updateReadOffsetFromWrite = false; compressor_thread_lock l(compressor()); finish_requests(l); get_buffer(l, 0); @@ -794,6 +803,7 @@ class file_stream : public compressed_stream_base { m_currentFileSize = std::numeric_limits::max(); compressor_thread_lock l(compressor()); m_response.clear_block_info(); + m_updateReadOffsetFromWrite = false; } m_size = offset; if (offset < m_offset) { @@ -872,6 +882,8 @@ class file_stream : public compressed_stream_base { /// \c get_position. /////////////////////////////////////////////////////////////////////////// void set_position(const stream_position & pos) { + m_updateReadOffsetFromWrite = false; + // If the code is correct, short circuiting is not necessary; // if the code is not correct, short circuiting might mask faults. /* @@ -1042,6 +1054,7 @@ class file_stream : public compressed_stream_base { if (m_nextItem == m_bufferEnd) { compressor_thread_lock lock(compressor()); if (m_bufferDirty) { + m_updateReadOffsetFromWrite = true; flush_block(lock); } if (offset() == size()) { @@ -1064,8 +1077,10 @@ class file_stream : public compressed_stream_base { if (m_nextItem == m_bufferEnd) { compressor_thread_lock l(compressor()); - if (m_bufferDirty) + if (m_bufferDirty) { + m_updateReadOffsetFromWrite = true; flush_block(l); + } get_buffer(l, m_streamBlocks); m_nextItem = m_bufferBegin; } @@ -1104,6 +1119,8 @@ class file_stream : public compressed_stream_base { uncache_read_writes(); compressor_thread_lock l(compressor()); + + m_updateReadOffsetFromWrite = false; if (this->m_bufferDirty) flush_block(l); @@ -1266,11 +1283,28 @@ class file_stream : public compressed_stream_base { compressor().request(r); m_bufferDirty = false; + if (m_updateReadOffsetFromWrite) { + m_lastWriteBlockNumber = blockNumber; + } + if (blockNumber == m_streamBlocks) { ++m_streamBlocks; } } + void maybe_update_read_offset(compressor_thread_lock & lock) { + if (m_updateReadOffsetFromWrite && use_compression()) { + while (!m_response.done()) { + m_response.wait(lock); + } + if (m_response.has_block_info(m_lastWriteBlockNumber)) { + m_readOffset = m_response.get_read_offset(m_lastWriteBlockNumber); + m_nextReadOffset = m_readOffset + m_response.get_block_size(m_lastWriteBlockNumber); + } + m_updateReadOffsetFromWrite = false; + } + } + /////////////////////////////////////////////////////////////////////////// /// \brief Reads next block according to nextReadOffset/nextBlockSize. /// @@ -1280,6 +1314,8 @@ class file_stream : public compressed_stream_base { uncache_read_writes(); get_buffer(lock, blockNumber); + maybe_update_read_offset(lock); + stream_size_type readOffset; if (m_buffer->get_state() == compressor_buffer_state::clean) { m_readOffset = m_buffer->get_read_offset(); @@ -1336,6 +1372,9 @@ class file_stream : public compressed_stream_base { uncache_read_writes(); tp_assert(use_compression(), "read_previous_block: !use_compression"); get_buffer(lock, blockNumber); + + maybe_update_read_offset(lock); + if (m_buffer->get_state() == compressor_buffer_state::clean) { m_readOffset = m_buffer->get_read_offset(); m_nextReadOffset = m_readOffset + m_buffer->get_block_size(); diff --git a/keyvi/3rdparty/tpie/tpie/compressed/thread.h b/keyvi/3rdparty/tpie/tpie/compressed/thread.h index fd5c2b5f..1a9ad670 100644 --- a/keyvi/3rdparty/tpie/tpie/compressed/thread.h +++ b/keyvi/3rdparty/tpie/tpie/compressed/thread.h @@ -77,9 +77,9 @@ class compressor_thread_lock { ~compressor_thread_lock() { ptime t3 = ptime::now(); // Time blocked - increment_user(0, ptime::seconds(t1, t2)*1000000); + increment_user(0, (stream_size_type)(ptime::seconds(t1, t2)*1000000)); // Time held - increment_user(1, ptime::seconds(t2, t3)*1000000); + increment_user(1, (stream_size_type)(ptime::seconds(t2, t3)*1000000)); } lock_t & get_lock() { diff --git a/keyvi/3rdparty/tpie/tpie/exception.h b/keyvi/3rdparty/tpie/tpie/exception.h index f966edef..a18ffb8a 100644 --- a/keyvi/3rdparty/tpie/tpie/exception.h +++ b/keyvi/3rdparty/tpie/tpie/exception.h @@ -67,5 +67,23 @@ struct job_manager_exception: public exception { job_manager_exception(): exception("") {}; }; +/////////////////////////////////////////////////////////////////////////////// +/// \brief Thrown when trying to allocate too much of a resource. +/// +/// When the resource limit is exceeded and the resource limit enforcement policy +/// is set to THROW, this error is thrown by the resource subsystem. +/////////////////////////////////////////////////////////////////////////////// +struct out_of_resource_error: public exception { + out_of_resource_error(const std::string & s): exception(s) {} +}; + +struct out_of_memory_error: public out_of_resource_error { + out_of_memory_error(const std::string & s): out_of_resource_error(s) {} +}; + +struct out_of_files_error: public out_of_resource_error { + out_of_files_error(const std::string & s): out_of_resource_error(s) {} +}; + } #endif //__TPIE_EXCEPTION_H__ diff --git a/keyvi/3rdparty/tpie/tpie/file_accessor/posix.h b/keyvi/3rdparty/tpie/tpie/file_accessor/posix.h index 2d7a1c88..03761787 100644 --- a/keyvi/3rdparty/tpie/tpie/file_accessor/posix.h +++ b/keyvi/3rdparty/tpie/tpie/file_accessor/posix.h @@ -63,6 +63,7 @@ class posix { inline void set_cache_hint(cache_hint cacheHint); private: + inline void _open(const std::string & path, int flags, mode_t mode); inline void give_advice(); }; diff --git a/keyvi/3rdparty/tpie/tpie/file_accessor/posix.inl b/keyvi/3rdparty/tpie/tpie/file_accessor/posix.inl index d5d83f86..9188ae07 100644 --- a/keyvi/3rdparty/tpie/tpie/file_accessor/posix.inl +++ b/keyvi/3rdparty/tpie/tpie/file_accessor/posix.inl @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -105,32 +105,37 @@ inline stream_size_type posix::file_size_i() { return static_cast(buf.st_size); } +void posix::_open(const std::string & path, int flags, mode_t mode = 0755) { + m_fd = ::open(path.c_str(), flags, mode); + if (m_fd == -1) { + return; + } + get_file_manager().increment_open_file_count(); + give_advice(); +} + void posix::open_wo(const std::string & path) { - m_fd = ::open(path.c_str(), O_RDWR | O_TRUNC | O_CREAT, 0666); + _open(path, O_RDWR | O_TRUNC | O_CREAT, 0666); if (m_fd == -1) throw_errno(path); - give_advice(); } void posix::open_ro(const std::string & path) { - m_fd = ::open(path.c_str(), O_RDONLY); + _open(path, O_RDONLY); if (m_fd == -1) throw_errno(path); - give_advice(); } bool posix::try_open_rw(const std::string & path) { - m_fd = ::open(path.c_str(), O_RDWR); + _open(path.c_str(), O_RDWR); if (m_fd == -1) { if (errno != ENOENT) throw_errno(path); return false; } - give_advice(); return true; } void posix::open_rw_new(const std::string & path) { - m_fd = ::open(path.c_str(), O_RDWR | O_CREAT, 0666); + _open(path.c_str(), O_RDWR | O_CREAT, 0666); if (m_fd == -1) throw_errno(path); - give_advice(); } bool posix::is_open() const { @@ -139,7 +144,9 @@ bool posix::is_open() const { void posix::close_i() { if (m_fd != 0) { - ::close(m_fd); + if (::close(m_fd) == 0) { + get_file_manager().decrement_open_file_count(); + } } m_fd=0; } diff --git a/keyvi/3rdparty/tpie/tpie/file_accessor/stdio.inl b/keyvi/3rdparty/tpie/tpie/file_accessor/stdio.inl index 1ffc5529..89c0b79c 100644 --- a/keyvi/3rdparty/tpie/tpie/file_accessor/stdio.inl +++ b/keyvi/3rdparty/tpie/tpie/file_accessor/stdio.inl @@ -21,7 +21,7 @@ //#include #include #include -#include +#include #include #include @@ -98,7 +98,7 @@ void stdio::open(const std::string & path, write_header(false); } } - increment_open_file_count(); + get_file_manager().increment_open_file_count(); setvbuf(m_fd, NULL, _IONBF, 0); } @@ -106,7 +106,7 @@ void stdio::close() { if (m_fd && m_write) write_header(true); if (m_fd != 0) { ::fclose(m_fd); - decrement_open_file_count(); + get_file_manager().decrement_open_file_count(); } m_fd=0; } diff --git a/keyvi/3rdparty/tpie/tpie/file_accessor/stream_accessor_base.inl b/keyvi/3rdparty/tpie/tpie/file_accessor/stream_accessor_base.inl index 7b6df453..d3bcf210 100644 --- a/keyvi/3rdparty/tpie/tpie/file_accessor/stream_accessor_base.inl +++ b/keyvi/3rdparty/tpie/tpie/file_accessor/stream_accessor_base.inl @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -28,6 +27,7 @@ #include #include #include +#include namespace tpie { namespace file_accessor { @@ -151,7 +151,6 @@ void stream_accessor_base::open(const std::string & path, write_header(false); } } - increment_open_file_count(); m_open = true; if (write && m_maxUserDataSize < maxUserDataSize) { close(); @@ -166,7 +165,6 @@ void stream_accessor_base::close() { if (m_write) write_header(true); m_fileAccessor.close_i(); - decrement_open_file_count(); m_open = false; } diff --git a/keyvi/3rdparty/tpie/tpie/file_accessor/win32.h b/keyvi/3rdparty/tpie/tpie/file_accessor/win32.h index ad99b67e..980b2b24 100644 --- a/keyvi/3rdparty/tpie/tpie/file_accessor/win32.h +++ b/keyvi/3rdparty/tpie/tpie/file_accessor/win32.h @@ -28,6 +28,8 @@ #include #include +#include +#include #undef NO_ERROR #include @@ -61,6 +63,9 @@ class win32 { inline bool is_open() const; inline void set_cache_hint(cache_hint cacheHint); + +private: + inline void _open(const std::string & path, DWORD access, DWORD create_mode); }; } diff --git a/keyvi/3rdparty/tpie/tpie/file_accessor/win32.inl b/keyvi/3rdparty/tpie/tpie/file_accessor/win32.inl index 4e463e3d..e91e7c6c 100644 --- a/keyvi/3rdparty/tpie/tpie/file_accessor/win32.inl +++ b/keyvi/3rdparty/tpie/tpie/file_accessor/win32.inl @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include @@ -88,18 +88,25 @@ void win32::set_cache_hint(cache_hint cacheHint) { } } +void win32::_open(const std::string & path, DWORD access, DWORD create_mode) { + m_fd = CreateFile(path.c_str(), access, shared_flags, 0, create_mode, m_creationFlag, 0); + if (m_fd == INVALID_HANDLE_VALUE) return; + + get_file_manager().increment_open_file_count(); +} + void win32::open_wo(const std::string & path) { - m_fd = CreateFile(path.c_str(), GENERIC_WRITE, shared_flags, 0, CREATE_ALWAYS, m_creationFlag, 0); + _open(path, GENERIC_WRITE, CREATE_ALWAYS); if (m_fd == INVALID_HANDLE_VALUE) throw_getlasterror(); } void win32::open_ro(const std::string & path) { - m_fd = CreateFile(path.c_str(), GENERIC_READ, shared_flags, 0, OPEN_EXISTING, m_creationFlag, 0); + _open(path, GENERIC_READ, OPEN_EXISTING); if (m_fd == INVALID_HANDLE_VALUE) throw_getlasterror(); } bool win32::try_open_rw(const std::string & path) { - m_fd = CreateFile(path.c_str(), GENERIC_READ | GENERIC_WRITE , shared_flags, 0, OPEN_EXISTING, m_creationFlag, 0); + _open(path, GENERIC_READ | GENERIC_WRITE, OPEN_EXISTING); if (m_fd == INVALID_HANDLE_VALUE) { if (GetLastError() != ERROR_FILE_NOT_FOUND) throw_getlasterror(); return false; @@ -108,7 +115,7 @@ bool win32::try_open_rw(const std::string & path) { } void win32::open_rw_new(const std::string & path) { - m_fd = CreateFile(path.c_str(), GENERIC_READ | GENERIC_WRITE , shared_flags, 0, CREATE_NEW, m_creationFlag, 0); + _open(path, GENERIC_READ | GENERIC_WRITE, CREATE_NEW); if (m_fd == INVALID_HANDLE_VALUE) throw_getlasterror(); } @@ -118,7 +125,9 @@ bool win32::is_open() const { void win32::close_i() { if (m_fd != INVALID_HANDLE_VALUE) { - CloseHandle(m_fd); + if(CloseHandle(m_fd)) { + get_file_manager().decrement_open_file_count(); + } } m_fd=INVALID_HANDLE_VALUE; } diff --git a/keyvi/3rdparty/tpie/tpie/file_count.cpp b/keyvi/3rdparty/tpie/tpie/file_count.cpp deleted file mode 100644 index 8701cfd8..00000000 --- a/keyvi/3rdparty/tpie/tpie/file_count.cpp +++ /dev/null @@ -1,69 +0,0 @@ -// -*- mode: c++; tab-width: 4; indent-tabs-mode: t; c-file-style: "stroustrup"; -*- -// vi:set ts=4 sts=4 sw=4 noet : -// Copyright 2010, 2012, The TPIE development team -// -// This file is part of TPIE. -// -// TPIE is free software: you can redistribute it and/or modify it under -// the terms of the GNU Lesser General Public License as published by the -// Free Software Foundation, either version 3 of the License, or (at your -// option) any later version. -// -// TPIE is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -// License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with TPIE. If not, see -#include -#include -#ifndef _WIN32 -#include -#include -#include -#endif - -namespace { - -inline int get_maximum_open_files() { -#ifdef _WIN32 - return 512; -#else - return getdtablesize(); -#endif -} - -} - -namespace tpie { - -memory_size_type available_files() { -#ifdef _WIN32 - return get_maximum_open_files(); -#else - // skip to the first unused file descriptor - int nextfd = dup(0); - if (nextfd == -1) { - // all files must be in use - return 0; - } - close(nextfd); - - int count = 1; - ++nextfd; // `nextfd' is a fd that is not in use - - int maxfd = get_maximum_open_files(); - while (nextfd < maxfd) { - if (-1 == fcntl(nextfd, F_GETFD)) ++count; - ++nextfd; - } - return count; -#endif -} - -memory_size_type open_file_count() { - return get_maximum_open_files()-available_files(); -} - -} diff --git a/keyvi/3rdparty/tpie/tpie/file_count.h b/keyvi/3rdparty/tpie/tpie/file_count.h deleted file mode 100644 index d5a3162e..00000000 --- a/keyvi/3rdparty/tpie/tpie/file_count.h +++ /dev/null @@ -1,56 +0,0 @@ -// -*- mode: c++; tab-width: 4; indent-tabs-mode: t; c-file-style: "stroustrup"; -*- -// vi:set ts=4 sts=4 sw=4 noet : -// Copyright 2010, 2012, The TPIE development team -// -// This file is part of TPIE. -// -// TPIE is free software: you can redistribute it and/or modify it under -// the terms of the GNU Lesser General Public License as published by the -// Free Software Foundation, either version 3 of the License, or (at your -// option) any later version. -// -// TPIE is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -// License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with TPIE. If not, see -#ifndef __TPIE_FILE_CONUT_H__ -#define __TPIE_FILE_CONUT_H__ -#include -#include -//////////////////////////////////////////////////////////////////////////////// -/// \file file_count.h -/// \brief Count the number of open files. -//////////////////////////////////////////////////////////////////////////////// - -namespace tpie { - -/////////////////////////////////////////////////////////////////////////////// -/// \brief Deprecated; does nothing. -/////////////////////////////////////////////////////////////////////////////// -TPIE_DEPRECATED(inline void increment_open_file_count()) { - // Does nothing. -} - -/////////////////////////////////////////////////////////////////////////////// -/// \brief Deprecated; does nothing. -/////////////////////////////////////////////////////////////////////////////// -TPIE_DEPRECATED(inline void decrement_open_file_count()) { - // Does nothing. -} - -//////////////////////////////////////////////////////////////////////////////// -/// \brief Return the current number of open files. -/// \return The current number of open files. -//////////////////////////////////////////////////////////////////////////////// -memory_size_type open_file_count(); - -//////////////////////////////////////////////////////////////////////////////// -/// \brief Return the additional number of files that can be opened before -/// running out of file descriptors. -//////////////////////////////////////////////////////////////////////////////// -memory_size_type available_files(); -} -#endif //__TPIE_FILE_CONUT_H__ diff --git a/keyvi/3rdparty/tpie/tpie/file_manager.cpp b/keyvi/3rdparty/tpie/tpie/file_manager.cpp new file mode 100644 index 00000000..e6830138 --- /dev/null +++ b/keyvi/3rdparty/tpie/tpie/file_manager.cpp @@ -0,0 +1,70 @@ +// -*- mode: c++; tab-width: 4; indent-tabs-mode: t; eval: (progn (c-set-style "stroustrup") (c-set-offset 'innamespace 0)); -*- + // vi:set ts=4 sts=4 sw=4 noet : +// +// Copyright 2011, 2014, The TPIE development team +// +// This file is part of TPIE. +// +// TPIE is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. +// +// TPIE is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +// License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with TPIE. If not, see + +#include "file_manager.h" +#include +#include +#include "tpie_log.h" +#include +#include +#include "pretty_print.h" +#ifndef _WIN32 +#include +#include +#endif + +namespace tpie { + +size_t get_maximum_open_files() { +#ifdef _WIN32 + return _getmaxstdio(); +#else + struct rlimit limits; + if (getrlimit(RLIMIT_NOFILE, &limits) == -1) { + return 256; + } + return limits.rlim_cur; +#endif +} + +file_manager * fm = 0; + +file_manager::file_manager(): resource_manager(FILES) {} + +void init_file_manager() { + const size_t reserved_files = 42; + + fm = new file_manager(); + fm->set_limit(get_maximum_open_files() - reserved_files); +} + +void finish_file_manager() { + delete fm; + fm = 0; +} + +file_manager & get_file_manager() { +#ifndef TPIE_NDEBUG + if (fm == 0) throw std::runtime_error("File management not initialized"); +#endif + return * fm; +} + +} //namespace tpieg diff --git a/keyvi/3rdparty/tpie/tpie/file_manager.h b/keyvi/3rdparty/tpie/tpie/file_manager.h new file mode 100644 index 00000000..116af43a --- /dev/null +++ b/keyvi/3rdparty/tpie/tpie/file_manager.h @@ -0,0 +1,93 @@ +// -*- mode: c++; tab-width: 4; indent-tabs-mode: t; eval: (progn (c-set-style "stroustrup") (c-set-offset 'innamespace 0)); -*- +// vi:set ts=4 sts=4 sw=4 noet : +// +// Copyright 2011, The TPIE development team +// +// This file is part of TPIE. +// +// TPIE is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. +// +// TPIE is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +// License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with TPIE. If not, see + +/////////////////////////////////////////////////////////////////////////// +/// \file tpie/file.h Memory management subsystem. +/////////////////////////////////////////////////////////////////////////// + +#ifndef __TPIE_FILE_MANAGER_H__ +#define __TPIE_FILE_MANAGER_H__ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace tpie { + +/////////////////////////////////////////////////////////////////////////////// +/// \brief File management object used to track file usage. +/////////////////////////////////////////////////////////////////////////////// +class file_manager final : public resource_manager { +public: + /////////////////////////////////////////////////////////////////////////// + /// \internal + /// Construct the file manager object. + /////////////////////////////////////////////////////////////////////////// + file_manager(); + + void increment_open_file_count() { + register_increased_usage(1); + } + + void decrement_open_file_count() { + register_decreased_usage(1); + } + + std::string amount_with_unit(size_t amount) const override { + std::ostringstream os; + if (amount == 1) { + os << "a file"; + } else { + os << amount << " files"; + } + return os.str(); + } + +protected: + void throw_out_of_resource_error(const std::string & s) override { + throw out_of_files_error(s); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +/// \internal \brief Used by tpie_init to initialize the file manager. +/////////////////////////////////////////////////////////////////////////////// +void init_file_manager(); + +/////////////////////////////////////////////////////////////////////////////// +/// \internal \brief Used by tpie_finish to deinitialize the file manager. +/////////////////////////////////////////////////////////////////////////////// +void finish_file_manager(); + +/////////////////////////////////////////////////////////////////////////////// +/// \brief Return a reference to the file manager. +/// May only be called when init_file_manager has been called. +/// See \ref tpie_init(). +/////////////////////////////////////////////////////////////////////////////// +file_manager & get_file_manager(); + +} //namespace tpie + +#endif //__TPIE_MEMORY_H__ diff --git a/keyvi/3rdparty/tpie/tpie/hash_map.h b/keyvi/3rdparty/tpie/tpie/hash_map.h index c6962002..27b1cfcc 100644 --- a/keyvi/3rdparty/tpie/tpie/hash_map.h +++ b/keyvi/3rdparty/tpie/tpie/hash_map.h @@ -595,7 +595,7 @@ class hash_map: public linear_memory_base< hash_map, typename index_t=size_t, template class table_t=linear_probing_hash_table> -class hash_set { +class hash_set : public linear_memory_base< hash_set > { private: typedef table_t tbl_t; tbl_t tbl; diff --git a/keyvi/3rdparty/tpie/tpie/memory.cpp b/keyvi/3rdparty/tpie/tpie/memory.cpp index dd118d07..ea54c2ab 100644 --- a/keyvi/3rdparty/tpie/tpie/memory.cpp +++ b/keyvi/3rdparty/tpie/tpie/memory.cpp @@ -28,86 +28,9 @@ namespace tpie { -inline void segfault() { - std::abort(); -} - memory_manager * mm = 0; -memory_manager::memory_manager() - : m_used(0), m_limit(0), m_maxExceeded(0), m_nextWarning(0), m_enforce(ENFORCE_WARN) {} - -size_t memory_manager::used() const throw() { - return m_used.load(); -} - -size_t memory_manager::available() const throw() { - size_t used = m_used.load(); - size_t limit = m_limit; - if (used < limit) return limit-used; - return 0; -} - -} // namespace tpie - -void tpie_print_memory_complaint(std::ostream & os, size_t bytes, size_t usage, size_t limit) { - os << "Memory limit exceeded by " << tpie::bits::pretty_print::size_type(usage - limit) - << " (" << (usage-limit) * 100 / limit << "%), while trying to allocate " << tpie::bits::pretty_print::size_type(bytes) << "." - << " Limit is " << tpie::bits::pretty_print::size_type(limit) << ", but " << tpie::bits::pretty_print::size_type(usage) << " would be used."; -} - -namespace tpie { - -void memory_manager::register_allocation(size_t bytes) { - switch(m_enforce) { - case ENFORCE_IGNORE: - m_used.fetch_add(bytes); - break; - case ENFORCE_THROW: { - size_t usage = m_used.fetch_add(bytes); - if (usage > m_limit && m_limit > 0) { - std::stringstream ss; - tpie_print_memory_complaint(ss, bytes, usage, m_limit); - throw out_of_memory_error(ss.str().c_str()); - } - break; } - case ENFORCE_DEBUG: - case ENFORCE_WARN: { - size_t usage = m_used.fetch_add(bytes); - if (usage > m_limit && usage - m_limit > m_maxExceeded && m_limit > 0) { - m_maxExceeded = usage - m_limit; - if (m_maxExceeded >= m_nextWarning) { - m_nextWarning = m_maxExceeded + m_maxExceeded/8; - std::ostream & os = (m_enforce == ENFORCE_DEBUG) ? log_debug() : log_warning(); - tpie_print_memory_complaint(os, bytes, usage, m_limit); - os << std::endl; - } - } - break; } - }; -} - -void memory_manager::register_deallocation(size_t bytes) { -#ifndef TPIE_NDEBUG - size_t usage = m_used.fetch_sub(bytes); - if (bytes > usage) { - log_error() << "Error in deallocation, trying to deallocate " << bytes << " bytes, while only " << - usage << " were allocated" << std::endl; - segfault(); - } -#else - m_used.fetch_sub(bytes); -#endif -} - - -void memory_manager::set_limit(size_t new_limit) { - m_limit = new_limit; -} - -void memory_manager::set_enforcement(enforce_t e) { - m_enforce = e; -} +memory_manager::memory_manager(): resource_manager(MEMORY) {} /////////////////////////////////////////////////////////////////////////////// /// \internal \brief Buffers messages to the debug log. @@ -182,6 +105,9 @@ std::pair memory_manager::__allocate_consecutive(size_t upper return std::make_pair(res, best); } +void memory_manager::throw_out_of_resource_error(const std::string & s) { + throw out_of_memory_error(s); +} #ifndef TPIE_NDEBUG void memory_manager::register_pointer(void * p, size_t size, const std::type_info & t) { @@ -193,7 +119,7 @@ void memory_manager::__register_pointer(void * p, size_t size, const std::type_i if (m_pointers.count(p) != 0) { log_error() << "Trying to register pointer " << p << " of size " << size << " which is already registered" << std::endl; - segfault(); + std::abort(); } m_pointers[p] = std::make_pair(size, &t);; } @@ -208,17 +134,17 @@ void memory_manager::__unregister_pointer(void * p, size_t size, const std::type if (i == m_pointers.end()) { log_error() << "Trying to deregister pointer " << p << " of size " << size << " which was never registered" << std::endl; - segfault(); + std::abort(); } else { if (i->second.first != size) { log_error() << "Trying to deregister pointer " << p << " of size " << size << " which was registered with size " << i->second.first << std::endl; - segfault(); + std::abort(); } if (*i->second.second != t) { log_error() << "Trying to deregister pointer " << p << " of type " << t.name() << " which was registered with size " << i->second.second->name() << std::endl; - segfault(); + std::abort(); } m_pointers.erase(i); } @@ -232,7 +158,7 @@ void memory_manager::assert_tpie_ptr(void * p) { void memory_manager::__assert_tpie_ptr(void * p) { if (!p || m_pointers.count(p)) return; log_error() << p << " has not been allocated with tpie_new" << std::endl; - segfault(); + std::abort(); } void memory_manager::complain_about_unfreed_memory() { diff --git a/keyvi/3rdparty/tpie/tpie/memory.h b/keyvi/3rdparty/tpie/tpie/memory.h index b4c84ed0..090cbb80 100644 --- a/keyvi/3rdparty/tpie/tpie/memory.h +++ b/keyvi/3rdparty/tpie/tpie/memory.h @@ -27,6 +27,8 @@ #include #include +#include +#include #include #include #include @@ -36,88 +38,11 @@ namespace tpie { -/////////////////////////////////////////////////////////////////////////////// -/// \brief Thrown when trying to allocate too much memory. -/// -/// When the memory limit is exceeded and the memory limit enforcement policy -/// is set to THROW, this error is thrown by the memory subsystem. -/////////////////////////////////////////////////////////////////////////////// -struct out_of_memory_error : public std::bad_alloc { - const char * msg; - out_of_memory_error(const char * s) : msg(s) { } - virtual const char* what() const throw() {return msg;} -}; - - /////////////////////////////////////////////////////////////////////////////// /// \brief Memory management object used to track memory usage. /////////////////////////////////////////////////////////////////////////////// -class memory_manager { +class memory_manager final : public resource_manager { public: - /////////////////////////////////////////////////////////////////////////// - /// Memory limit enforcement policies. - /////////////////////////////////////////////////////////////////////////// - enum enforce_t { - /** Ignore when running out of memory. */ - ENFORCE_IGNORE, - /** \brief Log to debug log when the memory limit is exceeded. - * Note that not all violations will be logged. */ - ENFORCE_DEBUG, - /** \brief Log a warning when the memory limit is exceeded. Note that - * not all violations will be logged. */ - ENFORCE_WARN, - /** Throw an out_of_memory_error when the memory limit is exceeded. */ - ENFORCE_THROW - }; - - /////////////////////////////////////////////////////////////////////////// - /// Return the current amount of memory used. - /////////////////////////////////////////////////////////////////////////// - size_t used() const throw(); - - /////////////////////////////////////////////////////////////////////////// - /// Return the amount of memory still available to allocation. - /////////////////////////////////////////////////////////////////////////// - size_t available() const throw(); - - /////////////////////////////////////////////////////////////////////////// - /// Return the memory limit. - /////////////////////////////////////////////////////////////////////////// - size_t limit() const throw() {return m_limit;} - - /////////////////////////////////////////////////////////////////////////// - /// \brief Update the memory limit. - /// If the memory limit is exceeded by decreasing the limit, - /// no exception will be thrown. - /// \param new_limit The new memory limit in bytes. - /////////////////////////////////////////////////////////////////////////// - void set_limit(size_t new_limit); - - /////////////////////////////////////////////////////////////////////////// - /// \brief Set the memory limit enforcement policy. - /// \param e The new enforcement policy. - /////////////////////////////////////////////////////////////////////////// - void set_enforcement(enforce_t e); - - /////////////////////////////////////////////////////////////////////////// - /// \brief Return the current memory limit enforcement policy. - /////////////////////////////////////////////////////////////////////////// - enforce_t enforcement() {return m_enforce;} - - /////////////////////////////////////////////////////////////////////////// - /// \internal - /// Register that more memory has been used. - /// Possibly throws a warning or an exception if the memory limit is - /// exceeded, depending on the enforcement. - /////////////////////////////////////////////////////////////////////////// - void register_allocation(size_t bytes); - - /////////////////////////////////////////////////////////////////////////// - /// \internal - /// Register that some memory has been freed. - /////////////////////////////////////////////////////////////////////////// - void register_deallocation(size_t bytes); - /////////////////////////////////////////////////////////////////////////// /// \internal /// Construct the memory manager object. @@ -130,6 +55,18 @@ class memory_manager { /////////////////////////////////////////////////////////////////////////// std::pair __allocate_consecutive(size_t upper_bound, size_t granularity); + void register_allocation(size_t bytes) { + register_increased_usage(bytes); + } + + void register_deallocation(size_t bytes) { + register_decreased_usage(bytes); + } + + std::string amount_with_unit(size_t amount) const override { + return bits::pretty_print::size_type(amount); + } + #ifndef TPIE_NDEBUG // The following methods take the mutex before calling the private doubly // underscored equivalent. @@ -139,14 +76,10 @@ class memory_manager { void complain_about_unfreed_memory(); #endif +protected: + void throw_out_of_resource_error(const std::string & s) override; private: - std::atomic m_used; - size_t m_limit; - size_t m_maxExceeded; - size_t m_nextWarning; - enforce_t m_enforce; - #ifndef TPIE_NDEBUG std::mutex m_mutex; diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/buffer.h b/keyvi/3rdparty/tpie/tpie/pipelining/buffer.h index f722c71f..13ed1a42 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/buffer.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/buffer.h @@ -48,6 +48,7 @@ class buffer_pull_output_t: public node { add_dependency(input_token); set_name("Fetching items", PRIORITY_SIGNIFICANT); set_minimum_memory(file_stream::memory_usage()); + set_minimum_resource_usage(FILES, 1); set_plot_options(PLOT_BUFFERED); } @@ -88,6 +89,7 @@ class buffer_input_t: public node { { set_name("Storing items", PRIORITY_INSIGNIFICANT); set_minimum_memory(tpie::file_stream::memory_usage()); + set_minimum_resource_usage(FILES, 1); set_plot_options(PLOT_BUFFERED | PLOT_SIMPLIFIED_HIDE); } @@ -123,6 +125,7 @@ class buffer_output_t: public node { add_dependency(input_token); add_push_destination(this->dest); set_minimum_memory(tpie::file_stream::memory_usage()); + set_minimum_resource_usage(FILES, 1); set_name("Buffer", PRIORITY_INSIGNIFICANT); set_plot_options(PLOT_BUFFERED); } diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/container.h b/keyvi/3rdparty/tpie/tpie/pipelining/container.h index e6fb6043..d7c71d80 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/container.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/container.h @@ -21,6 +21,8 @@ #define __TPIE_PIPELINING_PIPE_CONTAINER_H__ #include +#include +#include namespace tpie { namespace pipelining { @@ -138,6 +140,92 @@ F container_construct_copy(container & cont, T2 && ... a) { return bits::dispatch_gen::type::template run_copy(cont, std::move(a)...); } +namespace bits { + +class any_noncopyable_cont_base { +public: + virtual ~any_noncopyable_cont_base() {}; + virtual const std::type_info & type() const { + return typeid(void); + } +}; + +template +class any_noncopyable_cont: public any_noncopyable_cont_base { +public: + any_noncopyable_cont(T value): value(move_if_movable(value)) {} + T value; + const std::type_info & type() const override { + return typeid(value); + } +}; + +} //namespace bits + +class bad_any_noncopyable_cast: public std::bad_cast { +public: + const char * what() const noexcept override {return "bad any_noncopyable cast";} +}; + +class any_noncopyable { +public: + template + explicit any_noncopyable(T t) { + cont = std::unique_ptr( + new bits::any_noncopyable_cont(move_if_movable(t))); + } + + any_noncopyable() = default; + any_noncopyable(const any_noncopyable &) = delete; + any_noncopyable(any_noncopyable &&) = default; + any_noncopyable & operator=(const any_noncopyable & o) = delete; + any_noncopyable & operator=(any_noncopyable && o) = default; + + template + any_noncopyable & operator=(T t) { + cont = std::unique_ptr( + new bits::any_noncopyable_cont(move_if_movable(t))); + return *this; + } + + explicit operator bool() {return (bool)cont;} + + void reset() {cont.reset();} + + template + friend const T & any_cast(const any_noncopyable & a); + + template + friend T & any_cast(any_noncopyable & a); + + friend void swap(any_noncopyable & l, any_noncopyable & r); + + const std::type_info & type() const { + if (!cont) return typeid(void); + auto val = cont.get(); + return val->type(); + } +private: + std::unique_ptr cont; +}; + +template +const T & any_cast(const any_noncopyable & a) { + if (!a.cont) throw bad_any_noncopyable_cast(); + auto val = a.cont.get(); + if (typeid(*val) != typeid(bits::any_noncopyable_cont)) throw bad_any_noncopyable_cast(); + return static_cast*>(val)->value; +} + +template +T & any_cast(any_noncopyable & a) { + if (!a.cont) throw bad_any_noncopyable_cast(); + auto val = a.cont.get(); + if (typeid(*val) != typeid(bits::any_noncopyable_cont)) throw bad_any_noncopyable_cast(); + return static_cast*>(val)->value; +} + +inline void swap(any_noncopyable & l, any_noncopyable & r) {std::swap(l.cont, r.cont);} } //namespace pipelining } //namespace tpie diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/exception.h b/keyvi/3rdparty/tpie/tpie/pipelining/exception.h index 4579d4f3..bcad3a30 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/exception.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/exception.h @@ -24,39 +24,39 @@ namespace tpie { -class merge_sort_not_ready : exception { +class merge_sort_not_ready : public exception { public: inline merge_sort_not_ready() : tpie::exception("Merge sort did not have memory assigned") {} }; namespace pipelining { -class not_initiator_node : tpie::exception { +class not_initiator_node : public tpie::exception { public: inline not_initiator_node() : tpie::exception("Not an initiator node") {} }; -class no_initiator_node : tpie::exception { +class no_initiator_node : public tpie::exception { public: no_initiator_node() : tpie::exception("Phase has no initiator node") {} }; -class virtual_chunk_not_ready : tpie::exception { +class virtual_chunk_not_ready : public tpie::exception { public: inline virtual_chunk_not_ready() : tpie::exception("Virtual receiver is missing a destination") {} }; -class virtual_chunk_missing_begin : tpie::exception { +class virtual_chunk_missing_begin : public tpie::exception { public: inline virtual_chunk_missing_begin() : tpie::exception("Virtual begin chunk contains no pipes") {} }; -class virtual_chunk_missing_middle : tpie::exception { +class virtual_chunk_missing_middle : public tpie::exception { public: inline virtual_chunk_missing_middle() : tpie::exception("Virtual middle chunk contains no pipes, and input type is not output type") {} }; -class virtual_chunk_missing_end : tpie::exception { +class virtual_chunk_missing_end : public tpie::exception { public: inline virtual_chunk_missing_end() : tpie::exception("Virtual end chunk contains no pipes") {} }; diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/factory_base.h b/keyvi/3rdparty/tpie/tpie/pipelining/factory_base.h index 7554f64f..d6ba2e5b 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/factory_base.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/factory_base.h @@ -24,9 +24,6 @@ #ifndef __TPIE_PIPELINING_FACTORY_BASE_H__ #define __TPIE_PIPELINING_FACTORY_BASE_H__ #include - -// XXX remove when init_segment is removed -#include #include #include @@ -124,15 +121,6 @@ class factory_base { } } - /////////////////////////////////////////////////////////////////////////// - /// \brief Deprecated alias of init_node. - /////////////////////////////////////////////////////////////////////////// - inline void init_segment(node & r) const { - log_fatal() << "init_segment has been renamed to init_node" << std::endl; - backtrace(log_fatal()); - init_node(r); - } - /////////////////////////////////////////////////////////////////////////// /// \brief Initialize node constructed in a subclass. /// @@ -143,7 +131,7 @@ class factory_base { /// If more than one node is constructed in the subclass in \c construct(), /// the implementation should use \c init_sub_node instead. /////////////////////////////////////////////////////////////////////////// - inline void init_node(node & r) const { + inline void init_node(node & r) { if (!m_name.empty()) { r.set_name(m_name, m_namePriority); } @@ -166,7 +154,7 @@ class factory_base { /// If just one node is constructed in the subclass in \c construct(), /// the implementation should use \c init_node instead. /////////////////////////////////////////////////////////////////////////// - void init_sub_node(node & r) const { + void init_sub_node(node & r) { if (m_breadcrumbs.empty()) { if (m_name.empty()) { // no op @@ -309,13 +297,23 @@ class factory_base { m_add_relations.push_back(std::make_pair(s,bits::depends)); } + void forward(const std::string & key, any_noncopyable value) { + m_forwards.push_back({key, std::move(value)}); + } + private: - void init_common(node & r) const { + void init_common(node & r) { if (m_set) r.set_memory_fraction(memory()); for (size_t i = 0; i < m_hooks.size(); ++i) { m_hooks[i]->init_node(r); } + + auto nodeMap = r.get_node_map()->find_authority(); + + for (auto &p : m_forwards) { + nodeMap->forward_from_pipe_base(r.get_id(), p.first, std::move(p.second)); + } } double m_amount; @@ -327,6 +325,7 @@ class factory_base { std::vector m_hooks; std::vector m_add_to_set; std::vector > m_add_relations; + std::vector > m_forwards; }; } // namespace pipelining diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/file_stream.h b/keyvi/3rdparty/tpie/tpie/pipelining/file_stream.h index 8f2ac785..69d55092 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/file_stream.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/file_stream.h @@ -26,11 +26,19 @@ #include #include #include +#include namespace tpie { - namespace pipelining { +enum stream_option { + STREAM_RESET=1, + STREAM_CLOSE=2 +}; + +TPIE_DECLARE_OPERATORS_FOR_FLAGS(stream_option) +typedef tpie::flags stream_options; + namespace bits { /////////////////////////////////////////////////////////////////////////////// @@ -43,19 +51,21 @@ class input_t : public node { public: typedef typename push_type::type item_type; - inline input_t(dest_t dest, file_stream & fs) : dest(std::move(dest)), fs(fs) { + input_t(dest_t dest, file_stream & fs, stream_options options) : options(options), fs(fs), dest(std::move(dest)) { add_push_destination(this->dest); set_name("Read", PRIORITY_INSIGNIFICANT); set_minimum_memory(fs.memory_usage()); } virtual void propagate() override { + if (options & STREAM_RESET) fs.seek(0); + if (fs.is_open()) { - forward("items", fs.size()); + forward("items", fs.size() - fs.offset()); + set_steps(fs.size() - fs.offset()); } else { forward("items", 0); } - set_steps(fs.size()); } virtual void go() override { @@ -67,11 +77,53 @@ class input_t : public node { } } + virtual void end() override { + if (options & STREAM_CLOSE) fs.close(); + } + private: - dest_t dest; + stream_options options; file_stream & fs; + dest_t dest; }; +/////////////////////////////////////////////////////////////////////////////// +/// \class input_t +/// +/// file_stream input generator. +/////////////////////////////////////////////////////////////////////////////// +template +class named_input_t : public node { +public: + typedef typename push_type::type item_type; + + named_input_t(dest_t dest, std::string path) : dest(std::move(dest)), path(path) { + add_push_destination(this->dest); + set_name("Read", PRIORITY_INSIGNIFICANT); + set_minimum_memory(file_stream::memory_usage()); + } + + virtual void propagate() override { + fs.construct(); + fs->open(path, access_read); + forward("items", fs->size()); + set_steps(fs->size()); + } + + virtual void go() override { + while (fs->can_read()) { + dest.push(fs->read()); + step(); + } + fs.destruct(); + } +private: + dest_t dest; + maybe > fs; + std::string path; +}; + + /////////////////////////////////////////////////////////////////////////////// /// \class pull_input_t /// @@ -82,25 +134,32 @@ class pull_input_t : public node { public: typedef T item_type; - inline pull_input_t(file_stream & fs) : fs(fs) { + pull_input_t(file_stream & fs, stream_options options) : options(options), fs(fs) { set_name("Read", PRIORITY_INSIGNIFICANT); set_minimum_memory(fs.memory_usage()); } virtual void propagate() override { - forward("items", fs.size()); - set_steps(fs.size()); + if (options & STREAM_RESET) fs.seek(0); + forward("items", fs.size()-fs.offset()); + set_steps(fs.size()-fs.offset()); } - inline T pull() { + T pull() { step(); return fs.read(); } - inline bool can_pull() { + bool can_pull() { return fs.can_read(); } + virtual void end() override { + if (options & STREAM_CLOSE) fs.close(); + } + +private: + stream_options options; file_stream & fs; }; @@ -114,14 +173,15 @@ class pull_reverse_input_t : public node { public: typedef T item_type; - inline pull_reverse_input_t(file_stream & fs) : fs(fs) { + pull_reverse_input_t(file_stream & fs, stream_options options) : options(options), fs(fs) { set_name("Read", PRIORITY_INSIGNIFICANT); set_minimum_memory(fs.memory_usage()); } virtual void propagate() override { - forward("items", fs.size()); - set_steps(fs.size()); + if (options & STREAM_RESET) fs.seek(0, file_stream::end); + forward("items", fs.offset()); + set_steps(fs.offset()); } inline T pull() { @@ -133,6 +193,12 @@ class pull_reverse_input_t : public node { return fs.can_read_back(); } + virtual void end() override { + if (options & STREAM_CLOSE) fs.close(); + } + +private: + stream_options options; file_stream & fs; }; @@ -187,12 +253,12 @@ class output_t : public node { public: typedef T item_type; - inline output_t(file_stream & fs) : fs(fs) { + output_t(file_stream & fs) : fs(fs) { set_name("Write", PRIORITY_INSIGNIFICANT); set_minimum_memory(fs.memory_usage()); } - inline void push(const T & item) { + void push(const T & item) { fs.write(item); } private: @@ -243,7 +309,7 @@ class pull_output_t : public node { public: typedef typename pull_type::type item_type; - inline pull_output_t(source_t source, file_stream & fs) : source(std::move(source)), fs(fs) { + pull_output_t(source_t source, file_stream & fs) : source(std::move(source)), fs(fs) { add_pull_source(this->source); set_name("Write", PRIORITY_INSIGNIFICANT); set_minimum_memory(fs.memory_usage()); @@ -256,56 +322,49 @@ class pull_output_t : public node { } source.end(); } - + +private: source_t source; file_stream & fs; }; -template -class tee_t { +template +class tee_t: public node { public: - template - class type: public node { - public: - typedef T item_type; - type(dest_t dest, file_stream & fs): fs(fs), dest(std::move(dest)) { - set_minimum_memory(fs.memory_usage()); - } - - void push(const item_type & i) { - fs.write(i); - dest.push(i); - } - private: - file_stream & fs; - dest_t dest; - }; + typedef T item_type; + tee_t(dest_t dest, file_stream & fs): fs(fs), dest(std::move(dest)) { + set_minimum_memory(fs.memory_usage()); + } + + void push(const item_type & i) { + fs.write(i); + dest.push(i); + } +private: + file_stream & fs; + dest_t dest; }; -template +template class pull_tee_t { public: - template - class type: public node { - public: - typedef T item_type; - type(source_t source, file_stream & fs): fs(fs), source(std::move(source)) { - set_minimum_memory(fs.memory_usage()); - } - - bool can_pull() { - return source.can_pull(); - } + typedef T item_type; + pull_tee_t(source_t source, file_stream & fs): fs(fs), source(std::move(source)) { + set_minimum_memory(fs.memory_usage()); + } - item_type pull() { - item_type i = source.pull(); - fs.write(i); - return i; - } - private: - file_stream & fs; - source_t source; - }; + bool can_pull() { + return source.can_pull(); + } + + item_type pull() { + item_type i = source.pull(); + fs.write(i); + return i; + } +private: + file_stream & fs; + source_t source; }; } // namespace bits @@ -314,29 +373,44 @@ class pull_tee_t { /// \brief Pipelining nodes that pushes the contents of the given file stream /// to the next node in the pipeline. /// \param fs The file stream from which it pushes items +/// \param options Stream options /////////////////////////////////////////////////////////////////////////////// template -inline pipe_begin &> > input(file_stream & fs) { - return factory &>(fs); +inline pipe_begin &, stream_options> > input(file_stream & fs, + stream_options options=stream_options()) { + return {fs, options}; } +/////////////////////////////////////////////////////////////////////////////// +/// \brief Pipelining nodes that pushes the contents of the named file stream +/// to the next node in the pipeline. +/// \param path The file stream from which it pushes items +/////////////////////////////////////////////////////////////////////////////// +typedef pipe_begin > named_input; + /////////////////////////////////////////////////////////////////////////////// /// \brief A pipelining pull-node that reads items from the given file_stream /// \param fs The file stream from which it reads items. +/// \param options Stream options /////////////////////////////////////////////////////////////////////////////// template -inline pullpipe_begin, file_stream &> > pull_input(file_stream & fs) { - return termfactory, file_stream &>(fs); +inline pullpipe_begin, file_stream &, stream_options> > pull_input( + file_stream & fs, + stream_options options=stream_options()) { + return {fs, options}; } /////////////////////////////////////////////////////////////////////////////// /// \brief A pipelining pull-node that reads items in reverse order from the /// given file_stream /// \param fs The file stream from which it reads items. +/// \param options Stream options /////////////////////////////////////////////////////////////////////////////// template -inline pullpipe_begin, file_stream &> > pull_reverse_input(file_stream & fs) { - return termfactory, file_stream &>(fs); +inline pullpipe_begin, file_stream &, stream_options> > pull_reverse_input( + file_stream & fs, + stream_options options=stream_options()) { + return {fs, options}; } /////////////////////////////////////////////////////////////////////////////// @@ -355,7 +429,7 @@ inline pullpipe_begin, std::string> > na /////////////////////////////////////////////////////////////////////////////// template inline pipe_end, file_stream &> > output(file_stream & fs) { - return termfactory, file_stream &>(fs); + return {fs}; } /////////////////////////////////////////////////////////////////////////////// @@ -374,7 +448,7 @@ inline pipe_end, std::string> > named_output /////////////////////////////////////////////////////////////////////////////// template inline pullpipe_end &> > pull_output(file_stream & fs) { - return factory &>(fs); + return {fs}; } /////////////////////////////////////////////////////////////////////////////// @@ -383,8 +457,9 @@ inline pullpipe_end &> > pull_output /// \param fs The file stream that items should be written to /////////////////////////////////////////////////////////////////////////////// template -inline pipe_middle::type>::template type, T &> > -tee(T & fs) {return factory::type>::template type, T &>(fs);} +inline pipe_middle, T &> > tee(T & fs) { + return {fs}; +} /////////////////////////////////////////////////////////////////////////////// /// \brief A pull-pipe node that when pulled from will pull from its source, @@ -392,8 +467,9 @@ tee(T & fs) {return factory::type>::template t /// \param fs The file stream that items should be written to /////////////////////////////////////////////////////////////////////////////// template -inline pullpipe_middle::type>::template type, T &> > -pull_tee(T & fs) {return factory::type>::template type, T &>(fs);} +inline pullpipe_middle, T &> > pull_tee(T & fs) { + return {fs}; +} } // namespace pipelining diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/helpers.h b/keyvi/3rdparty/tpie/tpie/pipelining/helpers.h index c0e831ae..f39a0434 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/helpers.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/helpers.h @@ -130,7 +130,7 @@ class fork_t { public: typedef typename push_type::type item_type; - type(dest_t dest, fact2_t && fact2) : dest(std::move(dest)), dest2(std::move(fact2.construct())) { + type(dest_t dest, fact2_t fact2) : dest(std::move(dest)), dest2(fact2.construct()) { add_push_destination(this->dest); add_push_destination(dest2); } @@ -146,6 +146,33 @@ class fork_t { }; }; + +template +class pull_fork_t: public node { +public: + typedef typename pull_type::type item_type; + + pull_fork_t(source_t source, dest_fact_t dest_fact) + : dest(dest_fact.construct()) + , source(std::move(source)) { + add_pull_source(this->source); + add_push_destination(dest); + } + + bool can_pull() {return source.can_pull();} + + item_type pull() { + item_type i=source.pull(); + dest.push(i); + return i; + } + +private: + typename dest_fact_t::constructed_type dest; + source_t source; +}; + + template class null_sink_t: public node { public: @@ -351,7 +378,7 @@ struct unzip_t { typedef typename push_type::type second_type; typedef std::pair item_type; - type(dest1_t dest1, fact2_t && fact2) : dest1(std::move(dest1)), dest2(fact2.construct()) { + type(dest1_t dest1, fact2_t fact2) : dest1(std::move(dest1)), dest2(fact2.construct()) { add_push_destination(this->dest1); add_push_destination(dest2); } @@ -389,8 +416,8 @@ class pull_source_t { typedef typename fact_t::constructed_type source_t; typedef typename push_type::type item_type; - type(dest_t && dest, fact_t && fact) - : dest(std::move(dest)), src(std::move(fact.construct())) { + type(dest_t dest, fact_t fact) + : dest(std::move(dest)), src(fact.construct()) { add_push_destination(dest); add_pull_source(src); } @@ -418,7 +445,7 @@ class unique_t : public node { public: typedef typename push_type::type item_type; - unique_t(dest_t && dest, equal_t equal) + unique_t(dest_t dest, equal_t equal) : equal(equal), dest(std::move(dest)) {} void begin() override { @@ -464,9 +491,21 @@ typedef pullpipe_middle > pull_peek; /// to the destination and then to "to" /////////////////////////////////////////////////////////////////////////////// template -pipe_middle, fact_t &&> > -fork(pipe_end && to) { - return tempfactory, fact_t &&>(std::move(to.factory)); +pipe_middle, fact_t> > +fork(pipe_end to) { + return {std::move(to.factory)}; +} + +/////////////////////////////////////////////////////////////////////////////// +/// \brief Create a pulling fork pipe node. +/// +/// Whenever an element e is pulled from fork node, e is first pushed +/// into the destination +/////////////////////////////////////////////////////////////////////////////// +template +pullpipe_middle, dest_fact_t> > +pull_fork(dest_fact_t dest_fact) { + return {std::move(dest_fact)}; } /////////////////////////////////////////////////////////////////////////////// @@ -476,9 +515,9 @@ fork(pipe_end && to) { /// a is pushed to its destination, and then b is pushed to "to" /////////////////////////////////////////////////////////////////////////////// template -pipe_middle, fact_t &&> > -unzip(pipe_end && to) { - return tempfactory, fact_t &&>(std::move(to.factory)); +pipe_middle, fact_t> > +unzip(pipe_end to) { + return {std::move(to.factory)}; } /////////////////////////////////////////////////////////////////////////////// @@ -489,9 +528,9 @@ unzip(pipe_end && to) { /// and std::make_pair(a,b) is pushed to the destination /////////////////////////////////////////////////////////////////////////////// template -pipe_middle, fact_t &&> > -zip(pullpipe_begin && from) { - return tempfactory, fact_t &&>(std::move(from.factory)); +pipe_middle, fact_t> > +zip(pullpipe_begin from) { + return {std::move(from.factory)}; } /////////////////////////////////////////////////////////////////////////////// @@ -622,9 +661,9 @@ pipe_middle > > item_type() { /// \param The pull source, and the source forwards the number of items, "items" /////////////////////////////////////////////////////////////////////////////// template -pipe_begin, fact_t &&> > -pull_source(pullpipe_begin && from) { - return tempfactory, fact_t &&>(std::move(from.factory)); +pipe_begin, fact_t> > +pull_source(pullpipe_begin from) { + return {std::move(from.factory)}; } /////////////////////////////////////////////////////////////////////////////// diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/merge.h b/keyvi/3rdparty/tpie/tpie/pipelining/merge.h index de9c74d7..48b4e217 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/merge.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/merge.h @@ -47,7 +47,7 @@ class merge_t { public: typedef typename push_type::type item_type; - type(dest_t dest, fact_t && fact) : dest(std::move(dest)), with(fact.construct()) { + type(dest_t dest, fact_t fact) : dest(std::move(dest)), with(fact.construct()) { add_push_destination(this->dest); add_pull_source(with); } @@ -69,9 +69,9 @@ class merge_t { /// for each item pushed to it. /////////////////////////////////////////////////////////////////////////////// template -inline pipe_middle::template type, pull_t &&> > -merge(pullpipe_begin && with) { - return factory::template type, pull_t &&>(std::move(with.factory)); +inline pipe_middle::template type, pull_t> > +merge(pullpipe_begin with) { + return {std::move(with.factory)}; } } // namespace pipelining diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/merge_sorter.h b/keyvi/3rdparty/tpie/tpie/pipelining/merge_sorter.h index 7558e376..863aee43 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/merge_sorter.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/merge_sorter.h @@ -161,17 +161,24 @@ class merge_sorter { typedef std::shared_ptr ptr; typedef progress_types Progress; - static const memory_size_type maximumFanout = 250; // arbitrary. TODO: run experiments to find threshold + static const memory_size_type defaultFiles = 253; // Default number of files available, when not using set_available_files + static const memory_size_type minimumFilesPhase1 = 1; + static const memory_size_type maximumFilesPhase1 = 1; + static const memory_size_type minimumFilesPhase2 = 5; + static const memory_size_type maximumFilesPhase2 = std::numeric_limits::max(); + static const memory_size_type minimumFilesPhase3 = 5; + static const memory_size_type maximumFilesPhase3 = std::numeric_limits::max(); inline merge_sorter(pred_t pred = pred_t(), store_t store = store_t()) : m_bucketPtr(new memory_bucket()) , m_bucket(memory_bucket_ref(m_bucketPtr.get())) - , m_state(stParameters) + , m_state(stNotStarted) , p() , m_parametersSet(false) , m_store(store.template get_specific()) , m_merger(pred, m_store, m_bucket) , m_currentRunItems(m_bucket) + , m_maxItems(std::numeric_limits::max()) , pred(pred) , m_evacuated(false) , m_finalMergeInitialized(false) @@ -183,7 +190,7 @@ class merge_sorter { /// purposes). /////////////////////////////////////////////////////////////////////////// inline void set_parameters(memory_size_type runLength, memory_size_type fanout) { - tp_assert(m_state == stParameters, "Merge sorting already begun"); + tp_assert(m_state == stNotStarted, "Merge sorting already begun"); p.runLength = p.internalReportThreshold = runLength; p.fanout = p.finalFanout = fanout; m_parametersSet = true; @@ -192,12 +199,35 @@ class merge_sorter { log_debug() << "Fanout = " << p.fanout << " (uses memory " << fanout_memory_usage(p.fanout) << ")" << std::endl; } + /////////////////////////////////////////////////////////////////////////// + /// \brief Calculate parameters from given amount of files. + /// \param f Files available for phase 1, 2 and 3 + /////////////////////////////////////////////////////////////////////////// + inline void set_available_files(memory_size_type f) { + p.filesPhase1 = p.filesPhase2 = p.filesPhase3 = f; + check_not_started(); + } + + /////////////////////////////////////////////////////////////////////////// + /// \brief Calculate parameters from given amount of files. + /// \param f1 Files available for phase 1 + /// \param f2 Files available for phase 2 + /// \param f3 Files available for phase 3 + /////////////////////////////////////////////////////////////////////////// + inline void set_available_files(memory_size_type f1, memory_size_type f2, memory_size_type f3) { + p.filesPhase1 = f1; + p.filesPhase2 = f2; + p.filesPhase3 = f3; + check_not_started(); + } + /////////////////////////////////////////////////////////////////////////// /// \brief Calculate parameters from given memory amount. - /// \param m Memory available for phase 2, 3 and 4 + /// \param m Memory available for phase 1, 2 and 3 /////////////////////////////////////////////////////////////////////////// inline void set_available_memory(memory_size_type m) { - calculate_parameters(m, m, m); + p.memoryPhase1 = p.memoryPhase2 = p.memoryPhase3 = m; + check_not_started(); } /////////////////////////////////////////////////////////////////////////// @@ -207,44 +237,57 @@ class merge_sorter { /// \param m3 Memory available for phase 3 /////////////////////////////////////////////////////////////////////////// inline void set_available_memory(memory_size_type m1, memory_size_type m2, memory_size_type m3) { - calculate_parameters(m1, m2, m3); + p.memoryPhase1 = m1; + p.memoryPhase2 = m2; + p.memoryPhase3 = m3; + check_not_started(); } private: - // set_phase_?_memory helper - inline void maybe_calculate_parameters() { - if (m_state != stParameters) - throw tpie::exception("Bad state in maybe_calculate_parameters"); - if (p.memoryPhase1 > 0 && - p.memoryPhase2 > 0 && - p.memoryPhase3 > 0) - calculate_parameters(p.memoryPhase1, - p.memoryPhase2, - p.memoryPhase3); + // Checks if we should still be able to change parameters + inline void check_not_started() { + if (m_state != stNotStarted) { + throw tpie::exception("Can't change parameters after merge sorting has started"); + } } public: + inline void set_phase_1_files(memory_size_type f1) { + p.filesPhase1 = f1; + check_not_started(); + } + + inline void set_phase_2_files(memory_size_type f2) { + p.filesPhase2 = f2; + check_not_started(); + } + + inline void set_phase_3_files(memory_size_type f3) { + p.filesPhase3 = f3; + check_not_started(); + } + inline void set_phase_1_memory(memory_size_type m1) { p.memoryPhase1 = m1; - maybe_calculate_parameters(); + check_not_started(); } inline void set_phase_2_memory(memory_size_type m2) { p.memoryPhase2 = m2; - maybe_calculate_parameters(); + check_not_started(); } inline void set_phase_3_memory(memory_size_type m3) { p.memoryPhase3 = m3; - maybe_calculate_parameters(); + check_not_started(); } /////////////////////////////////////////////////////////////////////////// /// \brief Initiate phase 1: Formation of input runs. /////////////////////////////////////////////////////////////////////////// inline void begin() { - tp_assert(m_state == stParameters, "Merge sorting already begun"); - if (!m_parametersSet) throw merge_sort_not_ready(); + tp_assert(m_state == stNotStarted, "Merge sorting already begun"); + if (!m_parametersSet) calculate_parameters(); log_debug() << "Start forming input runs" << std::endl; m_currentRunItems = array(0, allocator(m_bucket)); m_currentRunItems.resize((size_t)p.runLength); @@ -325,6 +368,11 @@ class merge_sorter { m_state = stMerge; } + inline bool is_calc_free() const { + tp_assert(m_state == stMerge, "Wrong phase"); + return m_reportInternal || m_finishedRuns <= p.fanout; + } + /////////////////////////////////////////////////////////////////////////// /// \brief Perform phase 2: Performing all merges in the merge tree except /// the last one. @@ -578,10 +626,10 @@ class merge_sorter { // Thus, we assume the largest fanout, meaning we might overshoot. // If we do overshoot, we will just spend the extra bytes on a run length // longer than 1, which is probably what the user wants anyway. - sort_parameters p((sort_parameters())); - p.runLength = 1; - p.fanout = calculate_fanout(std::numeric_limits::max()); - return memory_usage_phase_1(p); + sort_parameters tmp_p((sort_parameters())); + tmp_p.runLength = 1; + tmp_p.fanout = calculate_fanout(std::numeric_limits::max(), 0); + return memory_usage_phase_1(tmp_p); } static memory_size_type memory_usage_phase_2(const sort_parameters & params) { @@ -589,7 +637,7 @@ class merge_sorter { } static memory_size_type minimum_memory_phase_2() { - return fanout_memory_usage(calculate_fanout(0)); + return fanout_memory_usage(calculate_fanout(0, 0)); } static memory_size_type memory_usage_phase_3(const sort_parameters & params) { @@ -597,11 +645,11 @@ class merge_sorter { } static memory_size_type minimum_memory_phase_3() { - return fanout_memory_usage(calculate_fanout(0)); + return fanout_memory_usage(calculate_fanout(0, 0)); } static memory_size_type maximum_memory_phase_3() { - return fanout_memory_usage(maximumFanout); + return std::numeric_limits::max(); } memory_size_type actual_memory_phase_3() { @@ -618,18 +666,36 @@ class merge_sorter { } private: + static memory_size_type clamp(memory_size_type lo, memory_size_type val, memory_size_type hi) { + return std::max(lo, std::min(val, hi)); + } + /////////////////////////////////////////////////////////////////////////// /// \brief Calculate parameters from given memory amount. - /// \param m1 Memory available for phase 1 - /// \param m2 Memory available for phase 2 - /// \param m3 Memory available for phase 3 /////////////////////////////////////////////////////////////////////////// - inline void calculate_parameters(const memory_size_type m1, const memory_size_type m2, const memory_size_type m3) { - tp_assert(m_state == stParameters, "Merge sorting already begun"); - - p.memoryPhase1 = m1; - p.memoryPhase2 = m2; - p.memoryPhase3 = m3; + inline void calculate_parameters() { + tp_assert(m_state == stNotStarted, "Merge sorting already begun"); + + if(!p.filesPhase1) + p.filesPhase1 = clamp(minimumFilesPhase1, defaultFiles, maximumFilesPhase1); + if(!p.filesPhase2) + p.filesPhase2 = clamp(minimumFilesPhase2, defaultFiles, maximumFilesPhase2); + if(!p.filesPhase3) + p.filesPhase3 = clamp(minimumFilesPhase3, defaultFiles, maximumFilesPhase3); + + if(p.filesPhase1 < minimumFilesPhase1) + throw tpie::exception("file limit for phase 1 too small (" + std::to_string(p.filesPhase1) + " < " + std::to_string(minimumFilesPhase1) + ")"); + if(p.filesPhase2 < minimumFilesPhase2) + throw tpie::exception("file limit for phase 2 too small (" + std::to_string(p.filesPhase2) + " < " + std::to_string(minimumFilesPhase2) + ")"); + if(p.filesPhase3 < minimumFilesPhase3) + throw tpie::exception("file limit for phase 3 too small (" + std::to_string(p.filesPhase3) + " < " + std::to_string(minimumFilesPhase3) + ")"); + + if (!p.filesPhase1) + throw tpie::exception("memory limit for phase 1 not set"); + if (!p.filesPhase2) + throw tpie::exception("memory limit for phase 2 not set"); + if (!p.filesPhase3) + throw tpie::exception("memory limit for phase 3 not set"); // We must set aside memory for temp_files in m_runFiles. // m_runFiles contains fanout*2 temp_files, so calculate fanout before run length. @@ -638,7 +704,7 @@ class merge_sorter { // Run length: unbounded // Fanout: determined by the size of our merge heap and the stream memory usage. log_debug() << "Phase 2: " << p.memoryPhase2 << " b available memory\n"; - p.fanout = calculate_fanout(p.memoryPhase2); + p.fanout = calculate_fanout(p.memoryPhase2, p.filesPhase2); if (fanout_memory_usage(p.fanout) > p.memoryPhase2) { log_debug() << "Not enough memory for fanout " << p.fanout << "! (" << p.memoryPhase2 << " < " << fanout_memory_usage(p.fanout) << ")\n"; p.memoryPhase2 = fanout_memory_usage(p.fanout); @@ -648,7 +714,7 @@ class merge_sorter { // Run length: unbounded // Fanout: determined by the stream memory usage. log_debug() << "Phase 3: " << p.memoryPhase3 << " b available memory\n"; - p.finalFanout = calculate_fanout(p.memoryPhase3); + p.finalFanout = calculate_fanout(p.memoryPhase3, p.filesPhase3); if (p.finalFanout > p.fanout) p.finalFanout = p.fanout; @@ -682,36 +748,38 @@ class merge_sorter { m_parametersSet = true; + set_items(m_maxItems); + log_debug() << "Calculated merge sort parameters\n"; p.dump(log_debug()); log_debug() << std::endl; log_debug() << "Merge sort phase 1: " - << m1 << " b available, " << memory_usage_phase_1(p) << " b expected" << std::endl; - if (memory_usage_phase_1(p) > m1) { + << p.memoryPhase1 << " b available, " << memory_usage_phase_1(p) << " b expected" << std::endl; + if (memory_usage_phase_1(p) > p.memoryPhase1) { log_warning() << "Merge sort phase 1 exceeds the alloted memory usage: " - << m1 << " b available, but " << memory_usage_phase_1(p) << " b expected" << std::endl; + << p.memoryPhase1 << " b available, but " << memory_usage_phase_1(p) << " b expected" << std::endl; } log_debug() << "Merge sort phase 2: " - << m2 << " b available, " << memory_usage_phase_2(p) << " b expected" << std::endl; - if (memory_usage_phase_2(p) > m2) { + << p.memoryPhase2 << " b available, " << memory_usage_phase_2(p) << " b expected" << std::endl; + if (memory_usage_phase_2(p) > p.memoryPhase2) { log_warning() << "Merge sort phase 2 exceeds the alloted memory usage: " - << m2 << " b available, but " << memory_usage_phase_2(p) << " b expected" << std::endl; + << p.memoryPhase2 << " b available, but " << memory_usage_phase_2(p) << " b expected" << std::endl; } log_debug() << "Merge sort phase 3: " - << m3 << " b available, " << memory_usage_phase_3(p) << " b expected" << std::endl; - if (memory_usage_phase_3(p) > m3) { + << p.memoryPhase3 << " b available, " << memory_usage_phase_3(p) << " b expected" << std::endl; + if (memory_usage_phase_3(p) > p.memoryPhase3) { log_warning() << "Merge sort phase 3 exceeds the alloted memory usage: " - << m3 << " b available, but " << memory_usage_phase_3(p) << " b expected" << std::endl; + << p.memoryPhase3 << " b available, but " << memory_usage_phase_3(p) << " b expected" << std::endl; } } /////////////////////////////////////////////////////////////////////////// /// calculate_parameters helper /////////////////////////////////////////////////////////////////////////// - static inline memory_size_type calculate_fanout(memory_size_type availableMemory) { + static inline memory_size_type calculate_fanout(memory_size_type availableMemory, memory_size_type availableFiles) { memory_size_type fanout_lo = 2; - memory_size_type fanout_hi = maximumFanout + 1; + memory_size_type fanout_hi = availableFiles - 2; // binary search while (fanout_lo < fanout_hi - 1) { memory_size_type mid = fanout_lo + (fanout_hi-fanout_lo)/2; @@ -744,17 +812,35 @@ class merge_sorter { /// mode. /////////////////////////////////////////////////////////////////////////// void set_items(stream_size_type n) { - if (!m_parametersSet) - throw exception("Wrong state in set_items: parameters not set"); - if (m_state != stParameters) - throw exception("Wrong state in set_items: state is not stParameters"); + if (m_state != stNotStarted) + throw exception("Wrong state in set_items: state is not stNotStarted"); + + m_maxItems = n; - if(n < p.runLength) { - p.runLength = p.internalReportThreshold; + if (!m_parametersSet) { + // We will handle this later in calculate_parameters + return; + } + + // If the item upper bound is less than a run, + // then it might pay off to decrease the length of a run + // so that we can avoid I/O altogether. + if (m_maxItems < p.runLength) { + memory_size_type newRunLength = + std::max(memory_size_type(m_maxItems), p.internalReportThreshold); log_debug() << "Decreasing run length from " << p.runLength - << " to " << p.internalReportThreshold << std::endl; - - log_debug() << "New merge sort parameters\n"; + << " to " << newRunLength + << " since at most " << m_maxItems << " items will be pushed," + << " and the internal report threshold is " + << p.internalReportThreshold + << ". New merge sort parameters:\n"; + // In principle, we could decrease runLength to m_maxItems, + // but setting runLength below internalReportThreshold does not + // give additional benefits. + // Furthermore, buggy code could call set_items with a very low + // upper bound, leading to unacceptable performance in practice; + // thus, internalReportThreshold is used as a stopgap/failsafe. + p.runLength = newRunLength; p.dump(log_debug()); log_debug() << std::endl; } @@ -807,7 +893,7 @@ class merge_sorter { } enum state_type { - stParameters, + stNotStarted, stRunFormation, stMerge, stReport @@ -849,6 +935,8 @@ class merge_sorter { stream_size_type m_itemCount; + stream_size_type m_maxItems; + pred_t pred; bool m_evacuated; bool m_finalMergeInitialized; diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/node.cpp b/keyvi/3rdparty/tpie/tpie/pipelining/node.cpp index bc426a0e..47035df5 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/node.cpp +++ b/keyvi/3rdparty/tpie/tpie/pipelining/node.cpp @@ -45,31 +45,6 @@ void proxy_progress_indicator::refresh() { } // namespace bits -node_parameters::node_parameters() - : minimumMemory(0) - , maximumMemory(std::numeric_limits::max()) - , memoryFraction(0.0) - , name() - , namePriority(PRIORITY_NO_NAME) - , phaseName() - , phaseNamePriority(PRIORITY_NO_NAME) - , stepsTotal(0) -{ -} - -void node::set_memory_fraction(double f) { - switch (get_state()) { - case STATE_IN_PROPAGATE: - case STATE_AFTER_PROPAGATE: - case STATE_FRESH: - case STATE_IN_PREPARE: - break; - default: - throw call_order_exception("set_memory_fraction"); - } - m_parameters.memoryFraction = f; -} - const std::string & node::get_name() { if (m_parameters.name.empty()) { m_parameters.name = bits::extract_pipe_name(typeid(*this).name()); @@ -94,7 +69,6 @@ void node::set_phase_name(const std::string & name, priority_type priority) { node::node() : token(this) - , m_availableMemory(0) , m_flushPriority(0) , m_stepsLeft(0) , m_pi(0) @@ -106,7 +80,6 @@ node::node() node::node(node && other) : token(other.token, this) , m_parameters(std::move(other.m_parameters)) - , m_availableMemory(std::move(other.m_availableMemory)) , m_buckets(std::move(other.m_buckets)) , m_flushPriority(std::move(other.m_flushPriority)) , m_stepsLeft(std::move(other.m_stepsLeft)) @@ -128,7 +101,6 @@ node & node::operator=(node && other) { node::node(const node_token & token) : token(token, this, true) , m_parameters() - , m_availableMemory(0) , m_flushPriority(0) , m_stepsLeft(0) , m_pi(0) @@ -170,37 +142,52 @@ void node::add_dependency(const node & dest) { add_dependency(dest.token); } -void node::set_minimum_memory(memory_size_type minimumMemory) { - switch (get_state()) { - case STATE_IN_PROPAGATE: - case STATE_AFTER_PROPAGATE: - case STATE_FRESH: - case STATE_IN_PREPARE: - break; - default: - throw call_order_exception("set_minimum_memory"); - } - m_parameters.minimumMemory = minimumMemory; +void node::add_memory_share_dependency(const node_token & dest) { + bits::node_map::ptr m = token.map_union(dest); + m->add_relation(token.id(), dest.id(), bits::memory_share_depends); } -void node::set_maximum_memory(memory_size_type maximumMemory) { - switch (get_state()) { - case STATE_IN_PROPAGATE: - case STATE_AFTER_PROPAGATE: - case STATE_FRESH: - case STATE_IN_PREPARE: - break; - default: - throw call_order_exception("set_maximum_memory"); - } - m_parameters.maximumMemory = maximumMemory; +void node::add_memory_share_dependency(const node & dest) { + add_memory_share_dependency(dest.token); } -void node::set_available_memory(memory_size_type availableMemory) { - m_availableMemory = availableMemory; + +#define TPIE_RESOURCE_SETTER(setter_name, param_type, param_name) \ + void node::setter_name(resource_type type, param_type value) { \ + switch (get_state()) { \ + case STATE_IN_PROPAGATE: \ + case STATE_AFTER_PROPAGATE: \ + case STATE_FRESH: \ + case STATE_IN_PREPARE: \ + break; \ + default: \ + resource_type t = get_resource_being_assigned(); \ + /* If the changed resource is being assigned later, + * allow changing it + */ \ + if (t != NO_RESOURCE && type > t) \ + break; \ + throw call_order_exception(#setter_name); \ + } \ + m_parameters.resource_parameters[type].param_name = value; \ + } + +TPIE_RESOURCE_SETTER(set_minimum_resource_usage, memory_size_type, minimum); +TPIE_RESOURCE_SETTER(set_maximum_resource_usage, memory_size_type, maximum); +TPIE_RESOURCE_SETTER(set_resource_fraction, double, fraction); + +#undef TPIE_RESOURCE_SETTER + +void node::_internal_set_available_of_resource(resource_type type, memory_size_type available) { + m_parameters.resource_parameters[type].available = available; + resource_available_changed(type, available); + if (type == MEMORY) { + // Legacy interface + set_available_memory(available); + } } -void node::forward_any(std::string key, boost::any value, memory_size_type k) { +void node::forward_any(std::string key, any_noncopyable value, memory_size_type k) { switch (get_state()) { case STATE_FRESH: case STATE_IN_PREPARE: @@ -223,7 +210,7 @@ void node::forward_any(std::string key, boost::any value, memory_size_type k) { break; } - add_forwarded_data(key, value, true); + m_forwardedFromHere[key] = std::move(value); bits::node_map::ptr nodeMap = get_node_map()->find_authority(); @@ -231,25 +218,49 @@ void node::forward_any(std::string key, boost::any value, memory_size_type k) { std::vector successors; nodeMap->get_successors(get_id(), successors, k, true); for (auto i : successors) { - nodeMap->get(i)->add_forwarded_data(key, value, false); + nodeMap->get(i)->add_forwarded_data(key, get_id()); } } -void node::add_forwarded_data(std::string key, boost::any value, bool explicitForward) { - if (m_values.count(key) && - !explicitForward && m_values[key].second) return; - m_values[key].first = value; - m_values[key].second = explicitForward; +void node::add_forwarded_data(std::string key, node_token::id_t from_node) { + m_forwardedToHere[key] = from_node; +} + +node::maybeany_t node::fetch_maybe(std::string key) { + auto nodeMap = get_node_map()->find_authority(); + + auto it = m_forwardedToHere.find(key); + if (it == m_forwardedToHere.end()) { + // Try to lookup the key in the node map + return nodeMap->fetch_maybe(key); + } + + auto fetch_from_id = it->second; + node *fetch_from = nodeMap->get(fetch_from_id); + if (!fetch_from) { + return maybeany_t(); + } + + return fetch_from->get_forwarded_data_maybe(key); } -boost::any node::fetch_any(std::string key) { - if (m_values.count(key) != 0) { - return m_values[key].first; - } else { +any_noncopyable & node::fetch_any(std::string key) { + maybeany_t value = fetch_maybe(key); + if (!value) { std::stringstream ss; - ss << "Tried to fetch nonexistent key '" << key << '\''; + ss << "Tried to fetch nonexistent key '" << key << "'" + << " in " << get_name() << " of type " << typeid(*this).name(); throw invalid_argument_exception(ss.str()); } + return *value; +} + +node::maybeany_t node::get_forwarded_data_maybe(std::string key) { + auto it = m_forwardedFromHere.find(key); + if (it == m_forwardedFromHere.end()) { + return maybeany_t(); + } + return maybeany_t(it->second); } void node::set_steps(stream_size_type steps) { diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/node.h b/keyvi/3rdparty/tpie/tpie/pipelining/node.h index cfe0168c..04ea2d9c 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/node.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/node.h @@ -24,13 +24,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include namespace tpie { @@ -49,20 +49,24 @@ class proxy_progress_indicator : public tpie::progress_indicator_base { } // namespace bits -struct node_parameters { - node_parameters(); +struct node_resource_parameters { + memory_size_type minimum = 0; + memory_size_type maximum = std::numeric_limits::max(); + double fraction = 0.0; + + memory_size_type available = 0; +}; - memory_size_type minimumMemory; - memory_size_type maximumMemory; - double memoryFraction; +struct node_parameters { + node_resource_parameters resource_parameters[resource_type::TOTAL_RESOURCE_TYPES]; std::string name; - priority_type namePriority; + priority_type namePriority = PRIORITY_NO_NAME; std::string phaseName; - priority_type phaseNamePriority; + priority_type phaseNamePriority = PRIORITY_NO_NAME; - stream_size_type stepsTotal; + stream_size_type stepsTotal = 0; }; /////////////////////////////////////////////////////////////////////////////// @@ -73,6 +77,8 @@ struct node_parameters { /////////////////////////////////////////////////////////////////////////////// class node { public: + typedef boost::optional maybeany_t; + /////////////////////////////////////////////////////////////////////////// /// \brief Options for how to plot this node ////////////////////////////////////////////////////////////////////////// @@ -103,12 +109,74 @@ class node { /////////////////////////////////////////////////////////////////////////// virtual ~node() {} + /////////////////////////////////////////////////////////////////////////// + /// \brief Get the minimum amount of the resource declared by this node. + /// Defaults to zero when no minimum has been set. + /////////////////////////////////////////////////////////////////////////// + inline memory_size_type get_minimum_resource_usage(resource_type type) const { + return m_parameters.resource_parameters[type].minimum; + } + + /////////////////////////////////////////////////////////////////////////// + /// \brief Get the maximum amount of the resource declared by this node. + /// Defaults to maxint when no maximum has been set. + /////////////////////////////////////////////////////////////////////////// + inline memory_size_type get_maximum_resource_usage(resource_type type) const { + return m_parameters.resource_parameters[type].maximum; + } + + /////////////////////////////////////////////////////////////////////////// + /// \brief Get the priority for the specific resource of this node. + /////////////////////////////////////////////////////////////////////////// + inline double get_resource_fraction(resource_type type) const { + return m_parameters.resource_parameters[type].fraction; + } + + /////////////////////////////////////////////////////////////////////////// + /// \brief Get the amount of the specific resource assigned to this node. + /////////////////////////////////////////////////////////////////////////// + inline memory_size_type get_available_of_resource(resource_type type) const { + return m_parameters.resource_parameters[type].available; + } + + /////////////////////////////////////////////////////////////////////////// + /// \brief Called by implementers to declare minimum resource requirements. + /////////////////////////////////////////////////////////////////////////// + void set_minimum_resource_usage(resource_type type, memory_size_type usage); + + /////////////////////////////////////////////////////////////////////////// + /// \brief Called by implementers to declare maximum resource requirements. + /// + /// To signal that you don't want to use this resource, + /// set minimum resource usage and the resource fraction to zero. + /////////////////////////////////////////////////////////////////////////// + void set_maximum_resource_usage(resource_type type, memory_size_type usage); + + /////////////////////////////////////////////////////////////////////////// + /// \Brief Set the resource priority of this node. Resources are + /// distributed proportionally to the priorities of the nodes in the given + /// phase. + /////////////////////////////////////////////////////////////////////////// + void set_resource_fraction(resource_type type, double f); + + /////////////////////////////////////////////////////////////////////////// + /// \brief Called by the resource manager to notify the node's available + /// amount of resource has changed. + /////////////////////////////////////////////////////////////////////////// + virtual void resource_available_changed(resource_type, memory_size_type) { + } + + /////////////////////////////////////////////////////////////////////////// + /// \brief Used internally to assign the available resource to the node. + /////////////////////////////////////////////////////////////////////////// + void _internal_set_available_of_resource(resource_type type, memory_size_type available); + /////////////////////////////////////////////////////////////////////////// /// \brief Get the minimum amount of memory declared by this node. /// Defaults to zero when no minimum has been set. /////////////////////////////////////////////////////////////////////////// inline memory_size_type get_minimum_memory() const { - return m_parameters.minimumMemory; + return get_minimum_resource_usage(MEMORY); } /////////////////////////////////////////////////////////////////////////// @@ -116,37 +184,64 @@ class node { /// Defaults to maxint when no maximum has been set. /////////////////////////////////////////////////////////////////////////// inline memory_size_type get_maximum_memory() const { - return m_parameters.maximumMemory; + return get_maximum_resource_usage(MEMORY); + } + + /////////////////////////////////////////////////////////////////////////// + /// \brief Get the memory priority of this node. + /////////////////////////////////////////////////////////////////////////// + inline double get_memory_fraction() const { + return get_resource_fraction(MEMORY); } /////////////////////////////////////////////////////////////////////////// /// \brief Get the amount of memory assigned to this node. /////////////////////////////////////////////////////////////////////////// inline memory_size_type get_available_memory() const { - return m_availableMemory; + return get_available_of_resource(MEMORY); } /////////////////////////////////////////////////////////////////////////// - /// \brief Get the amount of memory assigned to this node. + /// \brief Called by implementers to declare minimum memory requirements. /////////////////////////////////////////////////////////////////////////// - inline memory_size_type get_used_memory() const { - memory_size_type ans=0; - for (const auto & p: m_buckets) - if (p) ans += p->count; - return ans; + void set_minimum_memory(memory_size_type minimumMemory) { + set_minimum_resource_usage(MEMORY, minimumMemory); + } + + /////////////////////////////////////////////////////////////////////////// + /// \brief Called by implementers to declare maximum memory requirements. + /// + /// To signal that you don't want any memory, set minimum memory and the + /// memory fraction to zero. + /////////////////////////////////////////////////////////////////////////// + void set_maximum_memory(memory_size_type maximumMemory) { + set_maximum_resource_usage(MEMORY, maximumMemory); } /////////////////////////////////////////////////////////////////////////// /// \Brief Set the memory priority of this node. Memory is distributed /// proportionally to the priorities of the nodes in the given phase. /////////////////////////////////////////////////////////////////////////// - void set_memory_fraction(double f); + void set_memory_fraction(double f) { + set_resource_fraction(MEMORY, f); + } /////////////////////////////////////////////////////////////////////////// - /// \brief Get the memory priority of this node. + /// \brief Called by the memory manager to set the amount of memory + /// assigned to this node. /////////////////////////////////////////////////////////////////////////// - inline double get_memory_fraction() const { - return m_parameters.memoryFraction; + virtual void set_available_memory(memory_size_type availableMemory) { + unused(availableMemory); + } + + /////////////////////////////////////////////////////////////////////////// + /// \brief Get the amount of memory currently used by this node. + /////////////////////////////////////////////////////////////////////////// + inline memory_size_type get_used_memory() const { + memory_size_type ans=0; + for (const auto & p: m_buckets) + if (p) ans += p->count; + return ans; } /////////////////////////////////////////////////////////////////////////// @@ -204,6 +299,8 @@ class node { virtual void begin() { } + virtual bool is_go_free() const {return false;} + /////////////////////////////////////////////////////////////////////////// /// \brief For initiator nodes, execute this phase by pushing all items /// to be pushed. For non-initiator nodes, the default implementation @@ -333,6 +430,20 @@ class node { m_state = s; } + /////////////////////////////////////////////////////////////////////////// + /// \brief Used internally to check order of method calls. + /////////////////////////////////////////////////////////////////////////// + resource_type get_resource_being_assigned() const { + return m_resourceBeingAssigned; + } + + /////////////////////////////////////////////////////////////////////////// + /// \brief Used internally to check order of method calls. + /////////////////////////////////////////////////////////////////////////// + void set_resource_being_assigned(resource_type type) { + m_resourceBeingAssigned = type; + } + /////////////////////////////////////////////////////////////////////////// /// \brief Get options specified for plot(), as a combination of /// \c node::PLOT values. @@ -383,7 +494,7 @@ class node { #ifdef _WIN32 #pragma warning( pop ) #endif // _WIN32 - +public: /////////////////////////////////////////////////////////////////////////// /// \brief Called by implementers to declare a push destination. /////////////////////////////////////////////////////////////////////////// @@ -419,24 +530,21 @@ class node { void add_dependency(const node & dest); /////////////////////////////////////////////////////////////////////////// - /// \brief Called by implementers to declare minimum memory requirements. + /// \brief Called by implementers to declare a node memory share + /// dependency, that is, a requirement that another node has end() called + /// before the begin() of this node, and memory shared between end() and + /// begin() unless evacuate() is called /////////////////////////////////////////////////////////////////////////// - void set_minimum_memory(memory_size_type minimumMemory); + void add_memory_share_dependency(const node_token & dest); /////////////////////////////////////////////////////////////////////////// - /// \brief Called by implementers to declare maximum memory requirements. - /// - /// To signal that you don't want any memory, set minimum memory and the - /// memory fraction to zero. + /// \brief Called by implementers to declare a node memory share + /// dependency, that is, a requirement that another node has end() called + /// before the begin() of this node, and memory shared between end() and + /// begin() unless evacuate() is called /////////////////////////////////////////////////////////////////////////// - void set_maximum_memory(memory_size_type maximumMemory); + void add_memory_share_dependency(const node & dest); - /////////////////////////////////////////////////////////////////////////// - /// \brief Called by the memory manager to set the amount of memory - /// assigned to this node. - /////////////////////////////////////////////////////////////////////////// - virtual void set_available_memory(memory_size_type availableMemory); -public: /////////////////////////////////////////////////////////////////////////// /// \brief Called by implementers to forward auxiliary data to successors. /// If explicitForward is false, the data will not override data forwarded @@ -455,57 +563,60 @@ class node { /////////////////////////////////////////////////////////////////////////// template void forward(std::string key, T value, memory_size_type k = std::numeric_limits::max()) { - forward_any(key, boost::any(value), k); + forward_any(key, any_noncopyable(std::move(value)), k); } /////////////////////////////////////////////////////////////////////////// /// \brief See \ref node::forward. /////////////////////////////////////////////////////////////////////////// - void forward_any(std::string key, boost::any value, memory_size_type k = std::numeric_limits::max()); + void forward_any(std::string key, any_noncopyable value, memory_size_type k = std::numeric_limits::max()); private: /////////////////////////////////////////////////////////////////////////// /// \brief Called by forward_any to add forwarded data. - // - /// If explicitForward is false, the data will not override data forwarded - /// with explicitForward == true. /////////////////////////////////////////////////////////////////////////// - void add_forwarded_data(std::string key, boost::any value, bool explicitForward); + void add_forwarded_data(std::string key, node_token::id_t from_node); + + /////////////////////////////////////////////////////////////////////////// + /// \brief Called by fetch_any to get data forwarded from this node. + /////////////////////////////////////////////////////////////////////////// + maybeany_t get_forwarded_data_maybe(std::string key); public: /////////////////////////////////////////////////////////////////////////// /// \brief Find out if there is a piece of auxiliary data forwarded with a /// given name. /////////////////////////////////////////////////////////////////////////// - inline bool can_fetch(std::string key) { - return m_values.count(key) != 0; + bool can_fetch(std::string key) { + return bool(fetch_maybe(key)); } /////////////////////////////////////////////////////////////////////////// - /// \brief Fetch piece of auxiliary data as boost::any (the internal + /// \brief Fetch piece of auxiliary data as any_noncopyable (the internal + /// representation) wrapped in a boost::optional which is unitialized + /// if the key is not found. + /////////////////////////////////////////////////////////////////////////// + maybeany_t fetch_maybe(std::string key); + + /////////////////////////////////////////////////////////////////////////// + /// \brief Fetch piece of auxiliary data as any_noncopyable (the internal /// representation). /////////////////////////////////////////////////////////////////////////// - boost::any fetch_any(std::string key); + any_noncopyable & fetch_any(std::string key); /////////////////////////////////////////////////////////////////////////// /// \brief Fetch piece of auxiliary data, expecting a given value type. /////////////////////////////////////////////////////////////////////////// template - inline T fetch(std::string key) { - if (m_values.count(key) == 0) { - std::stringstream ss; - ss << "Tried to fetch nonexistent key '" << key - << "' of type " << typeid(T).name() - << " in " << get_name() << " of type " << typeid(*this).name(); - throw invalid_argument_exception(ss.str()); - } + inline T & fetch(std::string key) { + any_noncopyable &item = fetch_any(key); try { - return boost::any_cast(m_values[key].first); - } catch (boost::bad_any_cast m) { + return any_cast(item); + } catch (bad_any_noncopyable_cast m) { std::stringstream ss; ss << "Trying to fetch key '" << key << "' of type " << typeid(T).name() << " but forwarded data was of type " - << m_values[key].first.type().name() << ". Message was: " << m.what(); + << item.type().name() << ". Message was: " << m.what(); throw invalid_argument_exception(ss.str()); } } @@ -605,13 +716,13 @@ class node { /////////////////////////////////////////////////////////////////////////////// template void set_datastructure(const std::string & name, T datastructure) { - bits::node_map::datastructuremap_t & structures = get_node_map()->get_datastructures(); + bits::node_map::datastructuremap_t & structures = get_node_map()->find_authority()->get_datastructures(); bits::node_map::datastructuremap_t::iterator i = structures.find(name); if(i == structures.end()) throw tpie::exception("attempted to set non-registered datastructure"); - i->second.second = datastructure; + i->second.second = move_if_movable(datastructure); } /////////////////////////////////////////////////////////////////////////////// @@ -620,16 +731,24 @@ class node { /// \tparam the type of the datastructure /////////////////////////////////////////////////////////////////////////////// template - T get_datastructure(const std::string & name) { - bits::node_map::datastructuremap_t & structures = get_node_map()->get_datastructures(); + T & get_datastructure(const std::string & name) { + bits::node_map::datastructuremap_t & structures = get_node_map()->find_authority()->get_datastructures(); bits::node_map::datastructuremap_t::iterator i = structures.find(name); if(i == structures.end()) throw tpie::exception("attempted to get non-registered datastructure"); - return boost::any_cast(i->second.second); + return any_cast(i->second.second); } + void unset_datastructure(const std::string & name) { + bits::node_map::datastructuremap_t & structures = get_node_map()->find_authority()->get_datastructures(); + bits::node_map::datastructuremap_t::iterator i = structures.find(name); + + if(i == structures.end()) return; + i->second.second.reset(); + } + private: struct datastructure_info_t { datastructure_info_t() : min(0), max(std::numeric_limits::max()) {} @@ -692,17 +811,17 @@ class node { node_token token; node_parameters m_parameters; - memory_size_type m_availableMemory; std::vector > m_buckets; - typedef std::map > valuemap; - valuemap m_values; + std::map m_forwardedFromHere; + std::map m_forwardedToHere; datastructuremap_t m_datastructures; memory_size_type m_flushPriority; stream_size_type m_stepsLeft; progress_indicator_base * m_pi; STATE m_state; + resource_type m_resourceBeingAssigned = NO_RESOURCE; std::unique_ptr m_piProxy; flags m_plotOptions; diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/numeric.h b/keyvi/3rdparty/tpie/tpie/pipelining/numeric.h index 9ca2294b..d4d079cc 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/numeric.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/numeric.h @@ -36,7 +36,7 @@ class linear_t : public node { public: typedef typename push_type::type item_type; - inline linear_t(dest_t dest, item_type factor, item_type term) : dest(std::move(dest)), factor(factor), term(term) { + inline linear_t(dest_t dest, item_type factor, item_type term) : factor(factor), term(term), dest(std::move(dest)) { add_push_destination(this->dest); set_name("Linear transform", PRIORITY_INSIGNIFICANT); } @@ -44,9 +44,33 @@ class linear_t : public node { dest.push(item*factor+term); } private: + item_type factor, term; + dest_t dest; +}; + +template +class range_t : public node { +public: + typedef typename push_type::type item_type; + + range_t(dest_t dest, item_type from, item_type to, item_type increment) : from(from), to(to), increment(increment), dest(std::move(dest)) {} + + void propagate() { + stream_size_type items = (from - to) / increment; + set_steps(items); + forward("items", items); + } + + virtual void go() override { + for (item_type i=from; i < to; i += increment) { + dest.push(i); + step(1); + } + } + +private: + item_type from, to, increment; dest_t dest; - item_type factor; - item_type term; }; } // namespace bits @@ -63,6 +87,13 @@ linear(T factor, T term) { return factory(factor, term); } +template +inline pipe_begin > +range(T from, T to, T increment = 1) { + return factory(from, to, increment); +} + + } // namespace pipelining } // namespace tpie diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/pipe_base.h b/keyvi/3rdparty/tpie/tpie/pipelining/pipe_base.h index 03f6f615..8e98618d 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/pipe_base.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/pipe_base.h @@ -151,6 +151,16 @@ class pipe_base { return std::move(self()); } + child_t forward_any(const std::string & key, any_noncopyable value) { + self().factory.forward(key, std::move(value)); + return std::move(self()); + } + + template + child_t forward(const std::string & key, T value) { + return forward_any(key, any_noncopyable(value)); + } + protected: inline child_t & self() {return *static_cast(this);} inline const child_t & self() const {return *static_cast(this);} diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/pipeline.cpp b/keyvi/3rdparty/tpie/tpie/pipelining/pipeline.cpp index 939b208d..7577b80a 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/pipeline.cpp +++ b/keyvi/3rdparty/tpie/tpie/pipelining/pipeline.cpp @@ -18,6 +18,7 @@ // along with TPIE. If not, see #include +#include #include #include #include @@ -51,7 +52,7 @@ namespace bits { typedef std::unordered_map nodes_t; -void pipeline_base::plot_impl(std::ostream & out, bool full) { +void pipeline_base_base::plot_impl(std::ostream & out, bool full) { typedef tpie::pipelining::bits::node_map::id_t id_t; node_map::ptr nodeMap = m_nodeMap->find_authority(); @@ -99,17 +100,22 @@ void pipeline_base::plot_impl(std::ostream & out, bool full) { case no_forward_depends: out << '"' << name(nodeMap, s) << "\" -> \"" << name(nodeMap, t) << "\" [arrowhead=none,arrowtail=normal,dir=both,style=dotted];\n"; break; + case memory_share_depends: + out << '"' << name(nodeMap, s) << "\" -> \"" << name(nodeMap, t) << "\" [arrowhead=none,arrowtail=normal,dir=both,style=tapered];\n"; + break; + } } out << '}' << std::endl; } void pipeline_base::operator()(stream_size_type items, progress_indicator_base & pi, + const memory_size_type initialFiles, const memory_size_type initialMemory, const char * file, const char * function) { node_map::ptr map = m_nodeMap->find_authority(); runtime rt(map); - rt.go(items, pi, initialMemory, file, function); + rt.go(items, pi, initialFiles, initialMemory, file, function); /* typedef std::vector phases_t; @@ -138,17 +144,11 @@ void pipeline_base::operator()(stream_size_type items, progress_indicator_base & */ } -void pipeline_base::forward_any(std::string key, const boost::any & value) { - node_map::ptr map = m_nodeMap->find_authority(); - runtime rt(map); - std::vector sources; - rt.get_item_sources(sources); - for (size_t j = 0; j < sources.size(); ++j) { - sources[j]->forward_any(key, value); - } +void pipeline_base_base::forward_any(std::string key, any_noncopyable value) { + get_node_map()->find_authority()->forward(key, std::move(value)); } -bool pipeline_base::can_fetch(std::string key) { +bool pipeline_base_base::can_fetch(std::string key) { node_map::ptr map = m_nodeMap->find_authority(); runtime rt(map); std::vector sinks; @@ -159,7 +159,7 @@ bool pipeline_base::can_fetch(std::string key) { return false; } -boost::any pipeline_base::fetch_any(std::string key) { +any_noncopyable & pipeline_base_base::fetch_any(std::string key) { node_map::ptr map = m_nodeMap->find_authority(); runtime rt(map); std::vector sinks; @@ -205,14 +205,8 @@ void pipeline_base::order_before(pipeline_base & other) { } } - - -} // namespace bits - -pipeline * pipeline::m_current = NULL; - -void pipeline::output_memory(std::ostream & o) const { - bits::node_map::ptr nodeMap = p->get_node_map()->find_authority(); +void pipeline_base_base::output_memory(std::ostream & o) const { + bits::node_map::ptr nodeMap = get_node_map()->find_authority(); for (bits::node_map::mapit i = nodeMap->begin(); i != nodeMap->end(); ++i) { bits::node_map::val_t p = nodeMap->get(i->first); o << p->get_name() << ": min=" << p->get_minimum_memory() << "; max=" << p->get_available_memory() << "; prio=" << p->get_memory_fraction() << ";" << std::endl; @@ -220,6 +214,26 @@ void pipeline::output_memory(std::ostream & o) const { } } +void subpipeline_base::begin(stream_size_type items, progress_indicator_base & pi, + memory_size_type filesAvailable, memory_size_type mem, + const char * file, const char * function) { + rt.reset(new runtime(m_nodeMap->find_authority())); + gc = rt->go_init(items, pi, filesAvailable, mem, file, function); + rt->go_until(gc.get(), frontNode); +} + +void subpipeline_base::end() { + rt->go_until(gc.get(), nullptr); + gc.reset(); + rt.reset(); +} + + +} // namespace bits + +pipeline * pipeline::m_current = NULL; + + } // namespace pipelining } // namespace tpie diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/pipeline.h b/keyvi/3rdparty/tpie/tpie/pipelining/pipeline.h index 81ef90ed..3a5e9f58 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/pipeline.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/pipeline.h @@ -20,11 +20,11 @@ #ifndef __TPIE_PIPELINING_PIPELINE_H__ #define __TPIE_PIPELINING_PIPELINE_H__ -#include #include #include #include #include +#include namespace tpie { @@ -34,16 +34,10 @@ namespace bits { /////////////////////////////////////////////////////////////////////////////// /// \class pipeline_base -/// Virtual superclass for pipelines implementing the function call operator. +/// Virtual superclass for pipelines and subpipelines /////////////////////////////////////////////////////////////////////////////// -class pipeline_base { +class pipeline_base_base { public: - /////////////////////////////////////////////////////////////////////////// - /// \brief Invoke the pipeline. - /////////////////////////////////////////////////////////////////////////// - void operator()(stream_size_type items, progress_indicator_base & pi, memory_size_type mem, - const char * file, const char * function); - /////////////////////////////////////////////////////////////////////////// /// \brief Generate a GraphViz plot of the pipeline /// @@ -73,32 +67,60 @@ class pipeline_base { /////////////////////////////////////////////////////////////////////////// void plot_full(std::ostream & out) {plot_impl(out, true);} - double memory() const { - return m_memory; - } - /////////////////////////////////////////////////////////////////////////// /// \brief Virtual dtor. /////////////////////////////////////////////////////////////////////////// - virtual ~pipeline_base() {} + virtual ~pipeline_base_base() {} + + void forward_any(std::string key, any_noncopyable value); + + bool can_fetch(std::string key); + + any_noncopyable & fetch_any(std::string key); node_map::ptr get_node_map() const { return m_nodeMap; } - void forward_any(std::string key, const boost::any & value); + void output_memory(std::ostream & o) const; +protected: + node_map::ptr m_nodeMap; - bool can_fetch(std::string key); +private: + void plot_impl(std::ostream & out, bool full); +}; + + +/////////////////////////////////////////////////////////////////////////////// +/// \class pipeline_base +/// Virtual superclass for pipelines implementing the function call operator. +/////////////////////////////////////////////////////////////////////////////// +class pipeline_base: public pipeline_base_base { +public: + /////////////////////////////////////////////////////////////////////////// + /// \brief Invoke the pipeline. + /////////////////////////////////////////////////////////////////////////// + void operator()(stream_size_type items, progress_indicator_base & pi, + memory_size_type filesAvailable, memory_size_type mem, + const char * file, const char * function); - boost::any fetch_any(std::string key); + /////////////////////////////////////////////////////////////////////////// + /// \brief Invoke the pipeline with amount of available files automatically + /// configured. + /////////////////////////////////////////////////////////////////////////// + void operator()(stream_size_type items, progress_indicator_base & pi, + memory_size_type mem, + const char * file, const char * function) { + operator()(items, pi, get_file_manager().available(), mem, file, function); + } - void order_before(pipeline_base & other); + double memory() const { + return m_memory; + } + void order_before(pipeline_base & other); protected: - node_map::ptr m_nodeMap; double m_memory; -private: - void plot_impl(std::ostream & out, bool full); }; /////////////////////////////////////////////////////////////////////////////// @@ -171,19 +193,27 @@ class pipeline { void operator()() { CurrentPipeSetter _(this); progress_indicator_null pi; - (*p)(1, pi, get_memory_manager().available(), nullptr, nullptr); + (*p)(1, pi, get_file_manager().available(), get_memory_manager().available(), nullptr, nullptr); + } + + void operator()(stream_size_type items, progress_indicator_base & pi, + const char * file, const char * function) { + CurrentPipeSetter _(this); + (*p)(items, pi, get_file_manager().available(), get_memory_manager().available(), file, function); } void operator()(stream_size_type items, progress_indicator_base & pi, + memory_size_type mem, const char * file, const char * function) { CurrentPipeSetter _(this); - (*p)(items, pi, get_memory_manager().available(), file, function); + (*p)(items, pi, get_file_manager().available(), mem, file, function); } - void operator()(stream_size_type items, progress_indicator_base & pi, memory_size_type mem, + void operator()(stream_size_type items, progress_indicator_base & pi, + memory_size_type filesAvailable, memory_size_type mem, const char * file, const char * function) { CurrentPipeSetter _(this); - (*p)(items, pi, mem, file, function); + (*p)(items, pi, filesAvailable, mem, file, function); } void plot(std::ostream & os = std::cout) { @@ -197,7 +227,8 @@ class pipeline { inline double memory() const { return p->memory(); } - inline bits::node_map::ptr get_node_map() const { + + bits::node_map::ptr get_node_map() const { return p->get_node_map(); } @@ -205,23 +236,23 @@ class pipeline { return p->can_fetch(key); } - boost::any fetch_any(std::string key) { + any_noncopyable & fetch_any(std::string key) { return p->fetch_any(key); } template - T fetch(std::string key) { - boost::any a = fetch_any(key); - return *boost::any_cast(&a); + T & fetch(std::string key) { + any_noncopyable &a = fetch_any(key); + return any_cast(a); } - void forward_any(std::string key, const boost::any & value) { - return p->forward_any(key, value); + void forward_any(std::string key, any_noncopyable value) { + p->forward_any(key, std::move(value)); } template void forward(std::string key, T value) { - forward_any(key, boost::any(value)); + forward_any(key, any_noncopyable(std::move(value))); } pipeline & then(pipeline & other) { @@ -229,7 +260,7 @@ class pipeline { return other; } - void output_memory(std::ostream & o) const; + void output_memory(std::ostream & o) const {p->output_memory(o);} static pipeline * current() {return m_current;} private: diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/reverse.h b/keyvi/3rdparty/tpie/tpie/pipelining/reverse.h index 9658fc63..64e7f6ff 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/reverse.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/reverse.h @@ -45,6 +45,7 @@ class reverser_input_t: public node { { set_name("Store items", PRIORITY_SIGNIFICANT); set_minimum_memory(stack::memory_usage()); + set_minimum_resource_usage(FILES, 1); set_plot_options(PLOT_BUFFERED | PLOT_SIMPLIFIED_HIDE); } @@ -115,6 +116,7 @@ class reverser_output_t: public node { add_push_destination(this->dest); set_name("Output reversed", PRIORITY_INSIGNIFICANT); set_minimum_memory(this->m_stack->memory_usage()); + set_minimum_resource_usage(FILES, 1); set_plot_options(PLOT_BUFFERED); } diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/runtime.cpp b/keyvi/3rdparty/tpie/tpie/pipelining/runtime.cpp index b52bc4e9..ffc2e15a 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/runtime.cpp +++ b/keyvi/3rdparty/tpie/tpie/pipelining/runtime.cpp @@ -31,6 +31,10 @@ namespace pipelining { namespace bits { +struct not_a_dag_exception : public exception { + not_a_dag_exception(const std::string &s) : exception(s) {} +}; + /////////////////////////////////////////////////////////////////////////////// /// \brief Directed graph with nodes of type T. /// @@ -52,6 +56,10 @@ class graph { m_edgeLists[u].push_back(v); } + const std::set & get_node_set() const { + return m_nodes; + } + const std::vector & get_edge_list(const T & i) const { return m_edgeLists.find(i)->second; } @@ -104,7 +112,12 @@ class graph { } size_t visit(T u) { - if (m_finishTime.count(u)) return m_finishTime[u]; + if (m_finishTime.count(u)) { + if (m_finishTime[u] == 0) { + throw not_a_dag_exception("Cycle detected in graph"); + } + return m_finishTime[u]; + } m_finishTime[u] = 0; ++m_time; const std::vector & edgeList = get_edge_list(u); @@ -126,46 +139,142 @@ class graph { }; }; -/////////////////////////////////////////////////////////////////////////////// -/// Helper methods for memory assignment. -/// The memory assignment algorithm is in runtime::get_memory_factor. -/////////////////////////////////////////////////////////////////////////////// -class memory_runtime { +class resource_runtime { public: - memory_runtime(const std::vector & nodes); + resource_runtime(const std::vector & nodes, resource_type type) + : m_nodes(nodes) + , m_minimumUsage(0) + , m_maximumUsage(0) + , m_fraction(0.0) + , m_type(type) + { + const size_t N = m_nodes.size(); + for (size_t i = 0; i < N; ++i) { + m_minimumUsage += minimum_usage(i); + m_maximumUsage += maximum_usage(i); + m_fraction += fraction(i); + } + } // Node accessors - memory_size_type minimum_memory(size_t i) const; - memory_size_type maximum_memory(size_t i) const; - double fraction(size_t i) const; + memory_size_type minimum_usage(size_t i) const { + return m_nodes[i]->get_minimum_resource_usage(m_type); + }; + memory_size_type maximum_usage(size_t i) const { + return m_nodes[i]->get_maximum_resource_usage(m_type); + }; + double fraction(size_t i) const { + return m_nodes[i]->get_resource_fraction(m_type); + }; // Node accessor aggregates - memory_size_type sum_minimum_memory() const; - memory_size_type sum_maximum_memory() const; - double sum_fraction() const; + memory_size_type sum_minimum_usage() const { + return m_minimumUsage; + }; + memory_size_type sum_maximum_usage() const { + return m_maximumUsage; + }; + double sum_fraction() const { + return m_fraction; + }; // Node mutator - void set_memory(size_t i, memory_size_type mem); + void set_usage(size_t i, memory_size_type usage) { + m_nodes[i]->_internal_set_available_of_resource(m_type, usage); + }; - void assign_memory(double factor); + void assign_usage(double factor) { + for (size_t i = 0; i < m_nodes.size(); ++i) + set_usage(i, get_assigned_usage(i, factor)); + }; - // Special case of assign_memory when factor is zero. - void assign_minimum_memory(); + // Special case of assign_usage when factor is zero. + void assign_minimum_resource() { + for (size_t i = 0; i < m_nodes.size(); ++i) + set_usage(i, minimum_usage(i)); + }; - memory_size_type sum_assigned_memory(double factor) const; + memory_size_type sum_assigned_usage(double factor) const { + memory_size_type total = 0; + for (size_t i = 0; i < m_nodes.size(); ++i) + total += get_assigned_usage(i, factor); + return total; + }; - memory_size_type get_assigned_memory(size_t i, double factor) const; + memory_size_type get_assigned_usage(size_t i, double factor) const { + return clamp(minimum_usage(i), maximum_usage(i), factor * fraction(i)); + }; static memory_size_type clamp(memory_size_type lo, memory_size_type hi, - double v); + double v) { + if (v < lo) return lo; + if (v > hi) return hi; + return static_cast(v); + }; + + void print_usage(double c, std::ostream & os) { + size_t cw = 12; + size_t prec_frac = 2; + std::string sep(2, ' '); + + os << "\nPipelining phase " << m_type << " assigned\n" + << std::setw(cw) << "Minimum" + << std::setw(cw) << "Maximum" + << std::setw(cw) << "Fraction" + << std::setw(cw) << "Assigned" + << sep << "Name\n"; + + for (size_t i = 0; i < m_nodes.size(); ++i) { + std::string frac; + { + std::stringstream ss; + ss << std::fixed << std::setprecision(prec_frac) + << fraction(i); + frac = ss.str(); + } - void print_memory(double c, std::ostream & os); + stream_size_type lo = minimum_usage(i); + stream_size_type hi = maximum_usage(i); + stream_size_type assigned = get_assigned_usage(i, c); -private: + os << std::setw(cw) << lo; + if (hi == std::numeric_limits::max()) { + os << std::setw(cw) << "inf"; + } else { + os << std::setw(cw) << hi; + } + os << std::setw(cw) << frac + << std::setw(cw) << assigned + << sep + << m_nodes[i]->get_name().substr(0, 50) << '\n'; + } + os << std::endl; + } + +protected: const std::vector & m_nodes; - memory_size_type m_minimumMemory; - memory_size_type m_maximumMemory; + memory_size_type m_minimumUsage; + memory_size_type m_maximumUsage; double m_fraction; + resource_type m_type; +}; + +/////////////////////////////////////////////////////////////////////////////// +/// Helper methods for file assignment. +/// The file assignment algorithm is in runtime::get_files_factor. +/////////////////////////////////////////////////////////////////////////////// +class file_runtime : public resource_runtime { +public: + file_runtime(const std::vector & nodes) : resource_runtime(nodes, FILES) {} +}; + +/////////////////////////////////////////////////////////////////////////////// +/// Helper methods for memory assignment. +/// The memory assignment algorithm is in runtime::get_memory_factor. +/////////////////////////////////////////////////////////////////////////////// +class memory_runtime : public resource_runtime { +public: + memory_runtime(const std::vector & nodes) : resource_runtime(nodes, MEMORY) {} }; /////////////////////////////////////////////////////////////////////////////// @@ -183,6 +292,16 @@ class datastructure_runtime { memory_size_type sum_assigned_memory(size_t phase) const; // sum the assigned memory for datastructures used in the phase using the factors given to the minimize_factor method void assign_memory(); + void free_datastructures(size_t phase) { + auto & ds = m_nodeMap.get_datastructures(); + for (auto & p: m_datastructures) { + if (p.second.right_most_phase != phase) continue; + auto it = ds.find(p.first); + if (it == ds.end()) continue; + it->second.second.reset(); + } + } + //void print_memory(double c, std::ostream & os); private: static memory_size_type clamp(memory_size_type lo, memory_size_type hi, double v); @@ -209,6 +328,27 @@ class datastructure_runtime { node_map & m_nodeMap; }; +std::string get_phase_name(const std::vector & phase) { + priority_type highest = std::numeric_limits::lowest(); + size_t highest_node = 0; + for (size_t i = 0; i < phase.size(); ++i) { + if (phase[i]->get_phase_name_priority() > highest && phase[i]->get_phase_name().size()) { + highest_node = i; + highest = phase[i]->get_phase_name_priority(); + } + } + std::string n = phase[highest_node]->get_phase_name(); + if (!n.empty()) return n; + + highest_node = 0; + for (size_t i = 0; i < phase.size(); ++i) { + if (phase[i]->get_name_priority() > highest) { + highest_node = i; + highest = phase[i]->get_name_priority(); + } + } + return phase[highest_node]->get_name(); +} /////////////////////////////////////////////////////////////////////////////// @@ -222,7 +362,15 @@ class datastructure_runtime { /////////////////////////////////////////////////////////////////////////////// class progress_indicators { public: - progress_indicators(): fp(nullptr) {} + progress_indicators(): fp(nullptr), m_nulls(false) {} + + progress_indicators(const progress_indicators & o) = delete; + progress_indicators & operator =(const progress_indicators & o) = delete; + progress_indicators & operator =(const progress_indicators && o) = delete; + progress_indicators(progress_indicators && o): fp(o.fp), m_nulls(o.m_nulls), m_progressIndicators(std::move(o.m_progressIndicators)) { + o.fp = nullptr; + o.m_progressIndicators.clear(); + } ~progress_indicators() { if (fp) fp->done(); @@ -243,10 +391,12 @@ class progress_indicators { m_progressIndicators.resize(N); fp = nullptr; if (!file|| !function) { + m_nulls = true; for (size_t i = 0; i < N; ++i) m_progressIndicators[i] = new progress_indicator_null(); return; } + m_nulls = false; fp = new fractional_progress(&pi); std::size_t uuid = 0; @@ -264,31 +414,10 @@ class progress_indicators { } private: - std::string get_phase_name(const std::vector & phase) { - priority_type highest = std::numeric_limits::min(); - size_t highest_node = 0; - for (size_t i = 0; i < phase.size(); ++i) { - if (phase[i]->get_phase_name_priority() > highest) { - highest_node = i; - highest = phase[i]->get_phase_name_priority(); - } - } - std::string n = phase[highest_node]->get_phase_name(); - if (!n.empty()) return n; - - highest_node = 0; - for (size_t i = 0; i < phase.size(); ++i) { - if (phase[i]->get_name_priority() > highest) { - highest_node = i; - highest = phase[i]->get_name_priority(); - } - } - return phase[highest_node]->get_name(); - } - friend class phase_progress_indicator; fractional_progress * fp; + bool m_nulls; std::vector m_progressIndicators; }; @@ -298,10 +427,24 @@ class progress_indicators { /////////////////////////////////////////////////////////////////////////////// class phase_progress_indicator { public: + phase_progress_indicator() : m_pi(nullptr) {} + phase_progress_indicator(const phase_progress_indicator &) = delete; + phase_progress_indicator(phase_progress_indicator && o): m_pi(o.m_pi) {o.m_pi = nullptr;} + + phase_progress_indicator & operator=(const phase_progress_indicator &) = delete; + phase_progress_indicator & operator=(phase_progress_indicator && o) { + if (m_pi) m_pi->done(); + m_pi = o.m_pi; + o.m_pi = nullptr; + return *this; + } + phase_progress_indicator(progress_indicators & pi, size_t phaseNumber, - const std::vector & nodes) + const std::vector & nodes, bool emptyFace) : m_pi(pi.m_progressIndicators[phaseNumber]) { + if (emptyFace && !pi.m_nulls) + static_cast(m_pi)->set_crumb(""); stream_size_type steps = 0; for (size_t j = 0; j < nodes.size(); ++j) { steps += nodes[j]->get_steps(); @@ -310,7 +453,7 @@ class phase_progress_indicator { } ~phase_progress_indicator() { - m_pi->done(); + if (m_pi) m_pi->done(); } progress_indicator_base & get() { @@ -350,125 +493,6 @@ class begin_end { std::vector m_topologicalOrder; }; -memory_runtime::memory_runtime(const std::vector & nodes) - : m_nodes(nodes) - , m_minimumMemory(0) - , m_maximumMemory(0) - , m_fraction(0.0) -{ - const size_t N = m_nodes.size(); - for (size_t i = 0; i < N; ++i) { - m_minimumMemory += minimum_memory(i); - m_maximumMemory += maximum_memory(i); - m_fraction += fraction(i); - } -} - -// Node accessors -memory_size_type memory_runtime::minimum_memory(size_t i) const { - return m_nodes[i]->get_minimum_memory(); -} - -memory_size_type memory_runtime::maximum_memory(size_t i) const { - return m_nodes[i]->get_maximum_memory(); -} - -double memory_runtime::fraction(size_t i) const { - return m_nodes[i]->get_memory_fraction(); -} - -// Node accessor aggregates -memory_size_type memory_runtime::sum_minimum_memory() const { - return m_minimumMemory; -} - -memory_size_type memory_runtime::sum_maximum_memory() const { - return m_maximumMemory; -} - -double memory_runtime::sum_fraction() const { - return m_fraction; -} - - -// Node mutator -void memory_runtime::set_memory(size_t i, memory_size_type mem) { - m_nodes[i]->set_available_memory(mem); -} - -void memory_runtime::assign_memory(double factor) { - for (size_t i = 0; i < m_nodes.size(); ++i) - set_memory(i, get_assigned_memory(i, factor)); -} - -// Special case of assign_memory when factor is zero. -void memory_runtime::assign_minimum_memory() { - for (size_t i = 0; i < m_nodes.size(); ++i) - set_memory(i, minimum_memory(i)); -} - -memory_size_type memory_runtime::sum_assigned_memory(double factor) const { - memory_size_type memoryAssigned = 0; - for (size_t i = 0; i < m_nodes.size(); ++i) - memoryAssigned += get_assigned_memory(i, factor); - return memoryAssigned; -} - -memory_size_type memory_runtime::get_assigned_memory(size_t i, - double factor) const { - return clamp(minimum_memory(i), maximum_memory(i), - factor * fraction(i)); -} - -/*static*/ -memory_size_type memory_runtime::clamp(memory_size_type lo, - memory_size_type hi, - double v) -{ - if (v < lo) return lo; - if (v > hi) return hi; - return static_cast(v); -} - -void memory_runtime::print_memory(double c, std::ostream & os) { - size_t cw = 12; - size_t prec_frac = 2; - std::string sep(2, ' '); - - os << "\nPipelining phase memory assigned\n" - << std::setw(cw) << "Minimum" - << std::setw(cw) << "Maximum" - << std::setw(cw) << "Fraction" - << std::setw(cw) << "Assigned" - << sep << "Name\n"; - - for (size_t i = 0; i < m_nodes.size(); ++i) { - std::string frac; - { - std::stringstream ss; - ss << std::fixed << std::setprecision(prec_frac) - << fraction(i); - frac = ss.str(); - } - - stream_size_type lo = minimum_memory(i); - stream_size_type hi = maximum_memory(i); - stream_size_type assigned = get_assigned_memory(i, c); - - os << std::setw(cw) << lo; - if (hi == std::numeric_limits::max()) { - os << std::setw(cw) << "inf"; - } else { - os << std::setw(cw) << hi; - } - os << std::setw(cw) << frac - << std::setw(cw) << assigned - << sep - << m_nodes[i]->get_name().substr(0, 50) << '\n'; - } - os << std::endl; -} - datastructure_runtime::datastructure_runtime(const std::vector > & phases, node_map & nodeMap) : m_nodeMap(nodeMap) { @@ -562,18 +586,26 @@ memory_size_type datastructure_runtime::clamp(memory_size_type lo, memory_size_t void datastructure_runtime::assign_memory() { for(std::map::iterator i = m_datastructures.begin(); i != m_datastructures.end(); ++i) { memory_size_type mem = clamp(i->second.min, i->second.max, i->second.factor * i->second.priority); - m_nodeMap.get_datastructures().insert(std::make_pair(i->first, std::make_pair(mem, boost::any()))); + m_nodeMap.get_datastructures().insert(std::make_pair(i->first, std::make_pair(mem, any_noncopyable()))); } } -runtime::runtime(node_map::ptr nodeMap) - : m_nodeMap(*nodeMap) -{ -} - -size_t runtime::get_node_count() { - return m_nodeMap.size(); -} +struct gocontext { + std::map phaseMap; + std::vector flushPriorities; + graph phaseGraph; + graph orderedPhaseGraph; + std::vector > phases; + std::unordered_set evacuateWhenDone; + std::vector > itemFlow; + std::vector > actor; + datastructure_runtime drt; + progress_indicators pi; + size_t i; + memory_size_type files; + memory_size_type memory; + phase_progress_indicator phaseProgress; +}; size_t calculate_recursive_flush_priority(size_t phase, std::vector > & mem, const std::vector & flushPriorities, const graph & phaseGraph) { if(mem[phase].second) @@ -604,6 +636,17 @@ class flush_priority_greater_comp { const std::vector > & m_priorities; }; + +runtime::runtime(node_map::ptr nodeMap) + : m_nodeMap(*nodeMap) +{ +} + +size_t runtime::get_node_count() { + return m_nodeMap.size(); +} + + void runtime::get_ordered_graph(const std::vector & flushPriorities, const graph & phaseGraph, graph & orderedPhaseGraph) { std::vector > recursiveFlushPriorites; recursiveFlushPriorites.resize(phaseGraph.size()); @@ -637,11 +680,13 @@ void runtime::get_flush_priorities(const std::map & phaseMap, st } } -void runtime::go(stream_size_type items, - progress_indicator_base & progress, - memory_size_type memory, - const char * file, - const char * function) { +void gocontextdel::operator()(void * p) {delete static_cast(p);} + +gocontext_ptr runtime::go_init(stream_size_type items, + progress_indicator_base & progress, + memory_size_type files, + memory_size_type memory, + const char * file, const char * function) { if (get_node_count() == 0) throw tpie::exception("no nodes in pipelining graph"); @@ -667,7 +712,7 @@ void runtime::go(stream_size_type items, // Build phases vector std::vector > phases; - std::vector evacuateWhenDone; + std::unordered_set evacuateWhenDone; get_phases(phaseMap, orderedPhaseGraph, evacuateWhenDone, phases); // Build item flow graph and actor graph for each phase @@ -676,9 +721,10 @@ void runtime::go(stream_size_type items, std::vector > actor; get_actor_graphs(phases, actor); - // Check that each phase has at least one initiator - ensure_initiators(phases); - + // Make the nodeMap forward all the forwards calls + // made on pipe_bases + m_nodeMap.forward_pipe_base_forwards(); + // Toposort item flow graph for each phase // and call node::prepare in item source to item sink order prepare_all(itemFlow); @@ -686,6 +732,9 @@ void runtime::go(stream_size_type items, // build the datastructure runtime datastructure_runtime drt(phases, m_nodeMap); + // Gather node file requirements and assign files to each phase + assign_files(phases, files); + // Gather node memory requirements and assign memory to each phase assign_memory(phases, memory, drt); @@ -706,28 +755,91 @@ void runtime::go(stream_size_type items, progress_indicators pi; pi.init(items, progress, phases, file, function); - for (size_t i = 0; i < phases.size(); ++i) { + return gocontext_ptr(new gocontext{ + std::move(phaseMap), + std::move(flushPriorities), + std::move(phaseGraph), + std::move(orderedPhaseGraph), + std::move(phases), + std::move(evacuateWhenDone), + std::move(itemFlow), + std::move(actor), + std::move(drt), + std::move(pi), + 0, + files, + memory, + phase_progress_indicator()}); +} + + +void runtime::go_until(gocontext * gc, node * node) { + if (gc->i > gc->phases.size()) return; + + if (gc->i != 0) { + begin_end beginEnd(gc->actor[gc->i-1]); + beginEnd.end(); + } + + for (; gc->i < gc->phases.size(); ++gc->i) { // Run each phase: // Evacuate previous if necessary - if (i > 0 && evacuateWhenDone[i-1]) evacuate_all(phases[i-1]); + auto & phase = gc->phases[gc->i]; + log_debug() << "Running pipe phase " << get_phase_name(phase) << std::endl; + + if (gc->i > 0) evacuate_all(gc->phases[gc->i-1], gc->evacuateWhenDone); + // call propagate in item source to item sink order - propagate_all(itemFlow[i]); + propagate_all(gc->itemFlow[gc->i]); + // reassign files to all nodes in the phase + reassign_files(gc->phases, gc->i, gc->files); // reassign memory to all nodes in the phase - reassign_memory(phases, i, memory, drt); + reassign_memory(gc->phases, gc->i, gc->memory, gc->drt); + + bool emptyFace = true; + for (auto n: phase) + if (is_initiator(n) && !n->is_go_free()) + emptyFace = false; + // sum number of steps and call pi.init() - phase_progress_indicator phaseProgress(pi, i, phases[i]); + gc->phaseProgress = phase_progress_indicator(gc->pi, gc->i, phase, emptyFace); + // set progress indicators on each node - set_progress_indicators(phases[i], phaseProgress.get()); + set_progress_indicators(phase, gc->phaseProgress.get()); // call begin in leaf to root actor order - begin_end beginEnd(actor[i]); + begin_end beginEnd(gc->actor[gc->i]); beginEnd.begin(); + // call go on initiators - go_initiators(phases[i]); + for (auto n: phase) + if (n == node) { + gc->i++; + return; + } + go_initiators(gc->phases[gc->i]); + // call end in root to leaf actor order beginEnd.end(); + + gc->drt.free_datastructures(gc->i); + // call pi.done in ~phase_progress_indicator + gc->phaseProgress = phase_progress_indicator(); } // call fp->done in ~progress_indicators + gc->i++; +} + +void runtime::go(stream_size_type items, + progress_indicator_base & progress, + memory_size_type filesAvailable, + memory_size_type memory, + const char * file, + const char * function) { + gocontext_ptr gc = go_init(items, progress, filesAvailable, memory, file, function); + // Check that each phase has at least one initiator + ensure_initiators(gc->phases); + go_until(gc.get(), nullptr); } void runtime::get_item_sources(std::vector & itemSources) { @@ -749,6 +861,7 @@ void runtime::get_item_sources(std::vector & itemSources) { case pulls: case depends: case no_forward_depends: + case memory_share_depends: possibleSources.erase(from); break; } @@ -778,6 +891,7 @@ void runtime::get_item_sinks(std::vector & itemSinks) { case pulls: case depends: case no_forward_depends: + case memory_share_depends: possibleSinks.erase(to); break; } @@ -804,7 +918,9 @@ void runtime::get_phase_map(std::map & phaseMap) { const node_map::relmap_t & relations = m_nodeMap.get_relations(); for (node_map::relmapit i = relations.begin(); i != relations.end(); ++i) { - if (i->second.second != depends && i->second.second != no_forward_depends) + if (i->second.second != depends + && i->second.second != no_forward_depends + && i->second.second != memory_share_depends) unionFind.union_set(numbering[i->first], numbering[i->second.first]); } @@ -830,7 +946,9 @@ void runtime::get_phase_graph(const std::map & phaseMap, const node_map::relmap_t & relations = m_nodeMap.get_relations(); for (node_map::relmapit i = relations.begin(); i != relations.end(); ++i) { - if (i->second.second == depends || i->second.second == no_forward_depends) + if (i->second.second == depends + || i->second.second == no_forward_depends + || i->second.second == memory_share_depends) phaseGraph.add_edge(phaseMap.find(m_nodeMap.get(i->second.first))->second, phaseMap.find(m_nodeMap.get(i->first))->second); } @@ -856,11 +974,163 @@ std::vector runtime::inverse_permutation(const std::vector & f) void runtime::get_phases(const std::map & phaseMap, const graph & phaseGraph, - std::vector & evacuateWhenDone, + std::unordered_set & evacuateWhenDone, std::vector > & phases) { + /* + * We have a dependency edge saying that a node in one phase shares memory with a node in another phase. + * If these two phases are not executed consecutively the shared memory will have to be evacuated to disk, + * since some other phase running between the two phases could need the memory. + * Obviously we want to minimize the number of evacuations, but how? + * + * Let a normal dependency between two phase be represented by a black edge + * and let a memory sharing dependency be represented by a red edge if the memory can be evacuated + * and green if it cannot be evacuated. + * We say that a non-black edge is satisfied if its two end points are consecutive in the topological order, + * so the objective is to maximize the number of satisfied edges. + * Also we must satisfy ALL green edges, if this is not possible the input is malformed, + * and someone has to implement an evacuate method somewhere. + * + * First note that a non-black edges cannot be satisfied + * if there exists an alternative path from its source to its destination (with length at least 2), + * so any such red edge can be recolored to black, if there is any such green edge the input is invalid. + * + * Next note that for any node we can satisfy at most one outgoing edge, and at most one incoming edge. + */ + + std::vector> blackEdges; + std::vector> redEdges; + std::unordered_map greenEdges; + std::unordered_map revGreenEdges; + + const node_map::relmap_t & relations = m_nodeMap.find_authority()->get_relations(); + for (node_map::relmapit i = relations.begin(); i != relations.end(); ++i) { + // from and to is swapped in the relationship so that + // to depends on from, meaning from should be run before to + node *from = m_nodeMap.get(i->second.first); + node *to = m_nodeMap.get(i->first); + + size_t fromPhase = phaseMap.find(from)->second; + size_t toPhase = phaseMap.find(to)->second; + bits::node_relation rel = i->second.second; + + if (fromPhase == toPhase) { + // Not an edge between two different phases + continue; + } + + if (rel != memory_share_depends) { + // Black edge + log_debug() << "Black edge: " << fromPhase << " -> " << toPhase << std::endl; + blackEdges.push_back({fromPhase, toPhase}); + continue; + } + + if (from->can_evacuate()) { + // Red edge + log_debug() << "Red edge: " << fromPhase << " -> " << toPhase << std::endl; + redEdges.push_back({fromPhase, toPhase}); + } else { + // Green edge + log_debug() << "Green edge: " << fromPhase << " -> " << toPhase << std::endl; + + // Check if we already have a green edge from fromPhase or to toPhase + // If so one of edges can't be satisfied, but all green edges must be satisfied + if (greenEdges.find(fromPhase) != greenEdges.end() || + revGreenEdges.find(toPhase) != revGreenEdges.end()) { + throw tpie::exception("get_phases: can't satisfy all green edges"); + } + greenEdges[fromPhase] = toPhase; + revGreenEdges[toPhase] = fromPhase; + } + } + + disjoint_sets contractedNodes(phaseGraph.size()); + for (size_t i : phaseGraph.get_node_set()) { + contractedNodes.make_set(i); + } + + for (const auto & p : greenEdges) { + contractedNodes.union_set(p.first, p.second); + } + + std::unordered_map> greenPaths; + for (const auto & p : greenEdges) { + size_t i = contractedNodes.find_set(p.first); + greenPaths[i].add_edge(p.first, p.second); + } + + graph contractedGraph; + for (size_t i : phaseGraph.get_node_set()) { + contractedGraph.add_node(contractedNodes.find_set(i)); + } + + /* + * Greedily prefer red edges over black in the topological order. + * First we add all black edges to the graph then all the red. + * If there is both a black edge and red edge between the same contracted node, + * we shall consider the edge as a red edge. + * This ensure that dfs in the topological order implementation + * will visit red edges later than black edges. + */ + std::set> redEdgesSet; + for (auto p : redEdges) { + p.first = contractedNodes.find_set(p.first); + p.second = contractedNodes.find_set(p.second); + redEdgesSet.insert(p); + } + + for (bool addingRedEdge : {false, true}) { + const auto * edges = addingRedEdge? &redEdges: &blackEdges; + for (const auto & p : *edges) { + size_t i = contractedNodes.find_set(p.first); + size_t j = contractedNodes.find_set(p.second); + + /* + * If we have an edge from one contracted node to another + * it must either be a green edge or an edge going + * in the same direction as the green path, because the graph is a DAG. + * So if we find a topological order for new the graph, + * the topological order without contractions will also satisfy this edge. + */ + if (i == j) { + continue; + } + + /* + * If we are adding black edges, but there is also a red edge + * between the same nodes, we should first add it after adding all black edges. + */ + if (!addingRedEdge && redEdgesSet.count({i, j})) { + continue; + } + + if (!contractedGraph.has_edge(i, j)) { + contractedGraph.add_edge(i, j); + } + } + } + std::vector topologicalOrder; - phaseGraph.rootfirst_topological_order(topologicalOrder); + try { + contractedGraph.rootfirst_topological_order(topologicalOrder); + } catch(not_a_dag_exception & e) { + throw tpie::exception("get_phases: can't satisfy all green edges"); + } + + // Expand contracted edges in topologicalOrder + for (const auto & p : greenPaths) { + size_t i = p.first; + const graph & g = p.second; + + std::vector path; + g.topological_order(path); + + auto it = std::find(topologicalOrder.begin(), topologicalOrder.end(), i); + *it = *path.rbegin(); + topologicalOrder.insert(it, path.begin(), path.end() - 1); + } + // topologicalOrder[0] is the first phase to run, // topologicalOrder[1] the next, and so on. @@ -876,10 +1146,21 @@ void runtime::get_phases(const std::map & phaseMap, phases[topoOrderMap[i->second]].push_back(i->first); } - evacuateWhenDone.resize(phases.size(), false); - for (size_t i = 0; i + 1 < phases.size(); ++i) { - if (!phaseGraph.has_edge(topologicalOrder[i], topologicalOrder[i+1])) - evacuateWhenDone[i] = true; + std::unordered_set previousNodes; + bits::node_map::ptr nodeMap = (phases.front().front())->get_node_map()->find_authority(); + for (const auto & phase : phases) { + for (const auto node : phase) { + const auto range = nodeMap->get_relations().equal_range(node->get_id()); + for (auto it = range.first ; it != range.second ; ++it) { + if (it->second.second != memory_share_depends) continue; + if (previousNodes.count(it->second.first) != 0) continue; + evacuateWhenDone.emplace(it->second.first); + } + } + previousNodes.clear(); + for (const auto node : phase) { + previousNodes.emplace(node->get_id()); + } } } @@ -911,7 +1192,9 @@ void runtime::get_graph(std::vector & phase, graph & result, for (relmapit j = edges.first; j != edges.second; ++j) { node * u = m_nodeMap.get(j->first); node * v = m_nodeMap.get(j->second.first); - if (j->second.second == depends || j->second.second == no_forward_depends) continue; + if (j->second.second == depends) continue; + if (j->second.second == no_forward_depends) continue; + if (j->second.second == memory_share_depends) continue; if (itemFlow && j->second.second == pulls) std::swap(u, v); result.add_edge(u, v); } @@ -948,11 +1231,16 @@ void runtime::prepare_all(const std::vector > & itemFlow) { } } -void runtime::evacuate_all(const std::vector & phase) { - for (size_t i = 0; i < phase.size(); ++i) { - if (phase[i]->can_evacuate()) { - phase[i]->evacuate(); - tpie::log_debug() << "Evacuated node " << phase[i]->get_id() << std::endl; +void runtime::evacuate_all(const std::vector & phase, + const std::unordered_set & evacuateWhenDone) { + for (auto node : phase) { + if (evacuateWhenDone.count(node->get_id()) == 0) + continue; + if (node->can_evacuate()) { + node->evacuate(); + tpie::log_debug() << "Evacuated node " << node->get_id() << std::endl; + } else { + tpie::log_warning() << "Need to evacuate but not possible." << node->get_id() << std::endl; } } } @@ -984,6 +1272,89 @@ void runtime::go_initiators(const std::vector & phase) { } } +/*static*/ +void runtime::set_resource_being_assigned(const std::vector & nodes, + resource_type type) { + for (node * n : nodes) + n->set_resource_being_assigned(type); +} + +/*static*/ +void runtime::assign_files(const std::vector > & phases, + memory_size_type files) { + for (size_t phase = 0; phase < phases.size(); ++phase) { + file_runtime frt(phases[phase]); + + double c = get_files_factor(files, frt); +#ifndef TPIE_NDEBUG + frt.print_usage(c, log_debug()); +#endif // TPIE_NDEBUG + set_resource_being_assigned(phases[phase], FILES); + frt.assign_usage(c); + set_resource_being_assigned(phases[phase], NO_RESOURCE); + } +} + +/*static*/ +void runtime::reassign_files(const std::vector > & phases, + size_t phase, + memory_size_type files) { + file_runtime frt(phases[phase]); + double c = get_files_factor(files, frt); +#ifndef TPIE_NDEBUG + frt.print_usage(c, log_debug()); +#endif // TPIE_NDEBUG + set_resource_being_assigned(phases[phase], FILES); + frt.assign_usage(c); + set_resource_being_assigned(phases[phase], NO_RESOURCE); +} + +/*static*/ +double runtime::get_files_factor(memory_size_type files, const file_runtime & frt) { + memory_size_type min = frt.sum_minimum_usage(); + if (min > files) { + log_warning() << "Not enough files for pipelining phase (" + << min << " > " << files << ")" + << std::endl; + return 0.0; + } + + // This case is handled specially to avoid dividing by zero later on. + double fraction_sum = frt.sum_fraction(); + if (fraction_sum < 1e-9) { + return 0.0; + } + + double c_lo = 0.0; + double c_hi = 1.0; + // Exponential search + memory_size_type oldFilesAssigned = 0; + while (true) { + double factor = files * c_hi / fraction_sum; + memory_size_type filesAssigned = frt.sum_assigned_usage(factor); + if (filesAssigned < files && filesAssigned != oldFilesAssigned) + c_hi *= 2; + else + break; + oldFilesAssigned = filesAssigned; + } + + // Binary search + while (c_hi - c_lo > 1e-6) { + double c = c_lo + (c_hi-c_lo)/2; + double factor = files * c / fraction_sum; + memory_size_type filesAssigned = frt.sum_assigned_usage(factor); + + if (filesAssigned > files) { + c_hi = c; + } else { + c_lo = c; + } + } + + return files * c_lo / fraction_sum; +} + /*static*/ void runtime::assign_memory(const std::vector > & phases, memory_size_type memory, @@ -1000,9 +1371,11 @@ void runtime::assign_memory(const std::vector > & phases, double c = get_memory_factor(memory, phase, mrt, drt, true); #ifndef TPIE_NDEBUG - mrt.print_memory(c, log_debug()); + mrt.print_usage(c, log_debug()); #endif // TPIE_NDEBUG - mrt.assign_memory(c); + set_resource_being_assigned(phases[phase], MEMORY); + mrt.assign_usage(c); + set_resource_being_assigned(phases[phase], NO_RESOURCE); } drt.assign_memory(); } @@ -1015,14 +1388,16 @@ void runtime::reassign_memory(const std::vector > & phases, memory_runtime mrt(phases[phase]); double c = get_memory_factor(memory, phase, mrt, drt, true); #ifndef TPIE_NDEBUG - mrt.print_memory(c, log_debug()); + mrt.print_usage(c, log_debug()); #endif // TPIE_NDEBUG - mrt.assign_memory(c); + set_resource_being_assigned(phases[phase], MEMORY); + mrt.assign_usage(c); + set_resource_being_assigned(phases[phase], NO_RESOURCE); } /*static*/ double runtime::get_memory_factor(memory_size_type memory, memory_size_type phase, const memory_runtime & mrt, const datastructure_runtime & drt, bool datastructures_locked) { - memory_size_type min = mrt.sum_minimum_memory() + drt.sum_minimum_memory(phase); + memory_size_type min = mrt.sum_minimum_usage() + drt.sum_minimum_memory(phase); if (min > memory) { log_warning() << "Not enough memory for pipelining phase (" << min << " > " << memory << ")" @@ -1042,7 +1417,7 @@ double runtime::get_memory_factor(memory_size_type memory, memory_size_type phas memory_size_type oldMemoryAssigned = 0; while (true) { double factor = memory * c_hi / fraction_sum; - memory_size_type memoryAssigned = mrt.sum_assigned_memory(factor) + (datastructures_locked ? drt.sum_assigned_memory(phase) : drt.sum_assigned_memory(factor, phase)); + memory_size_type memoryAssigned = mrt.sum_assigned_usage(factor) + (datastructures_locked ? drt.sum_assigned_memory(phase) : drt.sum_assigned_memory(factor, phase)); if (memoryAssigned < memory && memoryAssigned != oldMemoryAssigned) c_hi *= 2; else @@ -1054,7 +1429,7 @@ double runtime::get_memory_factor(memory_size_type memory, memory_size_type phas while (c_hi - c_lo > 1e-6) { double c = c_lo + (c_hi-c_lo)/2; double factor = memory * c / fraction_sum; - memory_size_type memoryAssigned = mrt.sum_assigned_memory(factor) + (datastructures_locked ? drt.sum_assigned_memory(phase) : drt.sum_assigned_memory(factor, phase)); + memory_size_type memoryAssigned = mrt.sum_assigned_usage(factor) + (datastructures_locked ? drt.sum_assigned_memory(phase) : drt.sum_assigned_memory(factor, phase)); if (memoryAssigned > memory) { c_hi = c; diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/runtime.h b/keyvi/3rdparty/tpie/tpie/pipelining/runtime.h index fbf8475e..2976d14d 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/runtime.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/runtime.h @@ -22,7 +22,9 @@ #include #include +#include #include +#include namespace tpie { @@ -32,8 +34,16 @@ namespace bits { template class graph; +class file_runtime; class memory_runtime; class datastructure_runtime; + +struct gocontext; +struct gocontextdel { + void operator()(void *); +}; +typedef std::unique_ptr gocontext_ptr; + /////////////////////////////////////////////////////////////////////////////// /// \brief Execute the pipeline contained in a node_map. @@ -56,6 +66,14 @@ class runtime { /////////////////////////////////////////////////////////////////////////// size_t get_node_count(); + gocontext_ptr go_init(stream_size_type items, + progress_indicator_base & progress, + memory_size_type files, + memory_size_type memory, + const char * file, const char * function); + + void go_until(gocontext * gc, node * node=nullptr); + /////////////////////////////////////////////////////////////////////////// /// \brief Execute the pipeline. /// @@ -73,6 +91,7 @@ class runtime { /////////////////////////////////////////////////////////////////////////// void go(stream_size_type items, progress_indicator_base & progress, + memory_size_type files, memory_size_type memory, const char * file, const char * function); @@ -123,12 +142,12 @@ class runtime { /// \brief Compute topological phase order. /// /// The vector phases[i] will contain the nodes in the ith phase to run. - /// If no node in phases[i] has a dependency to a node in phases[i-1], - /// evacuateWhenDone[i] is set to true. + /// For each node in phase[i], if the node has a memory share dependency to + /// any node not in phases[i-1], the node is contained in evacuateWhenDone. /////////////////////////////////////////////////////////////////////////// void get_phases(const std::map & phaseMap, const graph & phaseGraph, - std::vector & evacuateWhenDone, + std::unordered_set & evacuateWhenDone, std::vector > & phases); /////////////////////////////////////////////////////////////////////////// @@ -175,9 +194,11 @@ class runtime { void prepare_all(const std::vector > & itemFlow); /////////////////////////////////////////////////////////////////////////// - /// \brief Call evacuate on all nodes for which can_evacuate() is true. + /// \brief Call evacuate on all nodes in evacuateWhenDone for which + /// can_evacuate() is true. /////////////////////////////////////////////////////////////////////////// - void evacuate_all(const std::vector & phase); + void evacuate_all(const std::vector & phase, + const std::unordered_set & evacuateWhenDone); /////////////////////////////////////////////////////////////////////////// /// \brief Call propagate on all nodes in item source to sink order. @@ -196,6 +217,31 @@ class runtime { /////////////////////////////////////////////////////////////////////////// void go_initiators(const std::vector & phase); + /////////////////////////////////////////////////////////////////////////// + /// \brief Internal method used by go(). + /////////////////////////////////////////////////////////////////////////// + static void set_resource_being_assigned(const std::vector & nodes, + resource_type type); + + /////////////////////////////////////////////////////////////////////////// + /// \brief Internal method used by go(). + /////////////////////////////////////////////////////////////////////////// + static void assign_files(const std::vector > & phases, + memory_size_type files); + + /////////////////////////////////////////////////////////////////////////// + /// \brief Internal method used by go(). + /////////////////////////////////////////////////////////////////////////// + static void reassign_files(const std::vector > & phases, + memory_size_type phase, + memory_size_type files); + + /////////////////////////////////////////////////////////////////////////// + /// \brief Internal method used by assign_memory(). + /////////////////////////////////////////////////////////////////////////// + static double get_files_factor(memory_size_type files, + const file_runtime & frt); + /////////////////////////////////////////////////////////////////////////// /// \brief Internal method used by go(). /////////////////////////////////////////////////////////////////////////// diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/serialization.h b/keyvi/3rdparty/tpie/tpie/pipelining/serialization.h index 3ec5d210..5e0f6fa1 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/serialization.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/serialization.h @@ -51,6 +51,7 @@ class input_t : public node { set_name("Serialization reader"); add_push_destination(this->dest); set_minimum_memory(rd->memory_usage()); + set_minimum_resource_usage(FILES, 1); } virtual void propagate() override { @@ -86,6 +87,7 @@ class output_t : public node { { set_name("Serialization writer"); set_minimum_memory(wr->memory_usage()); + set_minimum_resource_usage(FILES, 1); } void push(const T & x) { @@ -134,6 +136,7 @@ class reverser_input_t : public node { { this->set_name("Serialization reverse writer"); //TODO memory + set_minimum_resource_usage(FILES, 1); set_plot_options(PLOT_BUFFERED | PLOT_SIMPLIFIED_HIDE); } @@ -175,6 +178,7 @@ class reverser_output_t : public node { add_dependency(input_token); add_push_destination(this->dest); //TODO memory + set_minimum_resource_usage(FILES, 1); set_plot_options(PLOT_BUFFERED); } @@ -218,8 +222,8 @@ class reverser_pull_output_t : public node { { set_name("Serialization reverse reader"); add_dependency(input_token); - add_push_destination(this->dest); //TODO memory + set_minimum_resource_usage(FILES, 1); set_plot_options(PLOT_BUFFERED); } @@ -267,6 +271,7 @@ class buffer_input_t : public node { , items(0) { set_name("Serialization buffer writer"); //TODO memory + set_minimum_resource_usage(FILES, 1); set_plot_options(PLOT_BUFFERED | PLOT_SIMPLIFIED_HIDE); } @@ -307,6 +312,7 @@ class buffer_output_t : public node { add_dependency(input_token); add_push_destination(this->dest); //TODO MEMORY + set_minimum_resource_usage(FILES, 1); set_name("Serialization buffer reader"); set_plot_options(PLOT_BUFFERED); } @@ -352,6 +358,7 @@ class buffer_pull_output_t: public node { add_dependency(input_token); set_name("Fetching items", PRIORITY_SIGNIFICANT); //TODO memory + set_minimum_resource_usage(FILES, 1); set_plot_options(PLOT_BUFFERED); } diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/serialization_sort.h b/keyvi/3rdparty/tpie/tpie/pipelining/serialization_sort.h index beea7570..238b4f8c 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/serialization_sort.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/serialization_sort.h @@ -63,7 +63,7 @@ class sort_output_base : public node { } void set_calc_node(node & calc) { - add_dependency(calc); + add_memory_share_dependency(calc); } virtual void propagate() override { @@ -86,12 +86,11 @@ class sort_output_base : public node { } void add_calc_dependency(node_token tkn) { - add_dependency(tkn); + add_memory_share_dependency(tkn); } protected: virtual void set_available_memory(memory_size_type availableMemory) override { - node::set_available_memory(availableMemory); if (!m_propagate_called) m_sorter->set_phase_3_memory(availableMemory); } @@ -231,6 +230,8 @@ class sort_calc_t : public node { m_sorter->set_owner(this); } + virtual bool is_go_free() const override {return m_sorter->is_merge_runs_free();} + virtual void go() override { progress_indicator_base * pi = proxy_progress_indicator(); log_debug() << "TODO: Progress information during merging." << std::endl; @@ -259,12 +260,11 @@ class sort_calc_t : public node { } void set_input_node(node & input) { - add_dependency(input); + add_memory_share_dependency(input); } protected: virtual void set_available_memory(memory_size_type availableMemory) override { - node::set_available_memory(availableMemory); if (!m_propagate_called) m_sorter->set_phase_2_memory(availableMemory); } @@ -330,7 +330,6 @@ class sort_input_t : public node { protected: virtual void set_available_memory(memory_size_type availableMemory) override { - node::set_available_memory(availableMemory); if (!m_propagate_called) m_sorter->set_phase_1_memory(availableMemory); } @@ -358,7 +357,7 @@ class sort_factory_base : public factory_base { }; template - typename constructed::type construct(dest_t dest) const { + typename constructed::type construct(dest_t dest) { typedef typename push_type::type item_type; typedef typename constructed::Traits Traits; diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/sort.h b/keyvi/3rdparty/tpie/tpie/pipelining/sort.h index 8a99e22d..889ebc0c 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/sort.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/sort.h @@ -70,14 +70,20 @@ class sort_output_base : public node { } void add_calc_dependency(node_token tkn) { - add_dependency(tkn); + add_memory_share_dependency(tkn); } protected: - virtual void set_available_memory(memory_size_type availableMemory) override { - node::set_available_memory(availableMemory); - if (!m_propagate_called) - m_sorter->set_phase_3_memory(availableMemory); + virtual void resource_available_changed(resource_type type, memory_size_type available) override { + // TODO: Handle changing parameters of sorter after data structures has been frozen, i.e. after propagate + if (m_propagate_called) + return; + + if (type == MEMORY) + m_sorter->set_phase_3_memory(available); + else if (type == FILES) { + m_sorter->set_phase_3_files(available); + } } sort_output_base(sorterptr sorter) @@ -109,6 +115,8 @@ class sort_pull_output_t : public sort_output_base { sort_pull_output_t(sorterptr sorter) : sort_output_base(sorter) { + this->set_minimum_resource_usage(FILES, sorter_t::minimumFilesPhase3); + this->set_resource_fraction(FILES, 1.0); this->set_minimum_memory(sorter_t::minimum_memory_phase_3()); this->set_maximum_memory(sorter_t::maximum_memory_phase_3()); this->set_name("Write sorted output", PRIORITY_INSIGNIFICANT); @@ -168,6 +176,8 @@ class sort_output_t : public sort_output_base::type, , dest(std::move(dest)) { this->add_push_destination(dest); + this->set_minimum_resource_usage(FILES, sorter_t::minimumFilesPhase3); + this->set_resource_fraction(FILES, 1.0); this->set_minimum_memory(sorter_t::minimum_memory_phase_3()); this->set_maximum_memory(sorter_t::maximum_memory_phase_3()); this->set_name("Write sorted output", PRIORITY_INSIGNIFICANT); @@ -231,6 +241,8 @@ class sort_calc_t : public node { } void init() { + set_minimum_resource_usage(FILES, sorter_t::minimumFilesPhase2); + set_resource_fraction(FILES, 1.0); set_minimum_memory(sorter_t::minimum_memory_phase_2()); set_name("Perform merge heap", PRIORITY_SIGNIFICANT); set_memory_fraction(1.0); @@ -252,6 +264,8 @@ class sort_calc_t : public node { m_sorter.reset(); } + virtual bool is_go_free() const override {return m_sorter->is_calc_free();} + virtual void go() override { progress_indicator_base * pi = proxy_progress_indicator(); m_sorter->calc(*pi); @@ -271,14 +285,20 @@ class sort_calc_t : public node { } void set_input_node(node & input) { - add_dependency(input); + add_memory_share_dependency(input); } protected: - virtual void set_available_memory(memory_size_type availableMemory) override { - node::set_available_memory(availableMemory); - if (!m_propagate_called) - m_sorter->set_phase_2_memory(availableMemory); + virtual void resource_available_changed(resource_type type, memory_size_type available) override { + // TODO: Handle changing parameters of sorter after data structures has been frozen, i.e. after propagate + if (m_propagate_called) + return; + + if (type == MEMORY) + m_sorter->set_phase_2_memory(available); + else if (type == FILES) { + m_sorter->set_phase_2_files(available); + } } private: @@ -310,8 +330,10 @@ class sort_input_t : public node { , dest(std::move(dest)) { this->dest.set_input_node(*this); - set_minimum_memory(sorter_t::minimum_memory_phase_1()); set_name("Form input runs", PRIORITY_SIGNIFICANT); + set_minimum_resource_usage(FILES, sorter_t::minimumFilesPhase1); + set_resource_fraction(FILES, 0.0); + set_minimum_memory(m_sorter->minimum_memory_phase_1()); set_memory_fraction(1.0); set_plot_options(PLOT_BUFFERED | PLOT_SIMPLIFIED_HIDE); } @@ -319,7 +341,6 @@ class sort_input_t : public node { virtual void propagate() override { if (this->can_fetch("items")) m_sorter->set_items(this->fetch("items")); - m_sorter->begin(); m_propagate_called = true; } @@ -332,6 +353,7 @@ class sort_input_t : public node { } void begin() override { + m_sorter->begin(); m_sorter->set_owner(this); } @@ -353,10 +375,16 @@ class sort_input_t : public node { } protected: - virtual void set_available_memory(memory_size_type availableMemory) override { - node::set_available_memory(availableMemory); - if (!m_propagate_called) - m_sorter->set_phase_1_memory(availableMemory); + virtual void resource_available_changed(resource_type type, memory_size_type available) override { + // TODO: Handle changing parameters of sorter after data structures has been frozen, i.e. after propagate + if (m_propagate_called) + return; + + if (type == MEMORY) + m_sorter->set_phase_1_memory(available); + else if (type == FILES) { + m_sorter->set_phase_1_files(available); + } } private: sorterptr m_sorter; @@ -381,7 +409,7 @@ class sort_factory_base : public factory_base { }; template - typename constructed::type construct(dest_t dest) const { + typename constructed::type construct(dest_t dest) { typedef typename push_type::type item_type; typedef typename store_t::template element_type::type element_type; typedef typename constructed::pred_type pred_type; @@ -563,7 +591,6 @@ class passive_sorter_factory_output : public factory_base { /////////////////////////////////////////////////////////////////////////////// /// \brief Pipelined sorter with push input and pull output. /// Get the input pipe with \c input() and the output pullpipe with \c output(). -/// input() must not be called after output(). /// \tparam T The type of item to sort /// \tparam pred_t The predicate (e.g. std::less) indicating the predicate /// on which to order an item before another. @@ -598,20 +625,20 @@ class passive_sorter { /// \brief Get the input push node. /////////////////////////////////////////////////////////////////////////// input_pipe_t input() { - tp_assert(m_sorterInput, "Output called more then once"); + tp_assert(m_sorterInput, "input() called more than once"); auto ret = bits::passive_sorter_factory_input( std::move(m_sorterInput), m_calc_token); - return std::move(ret); + return {std::move(ret)}; } /////////////////////////////////////////////////////////////////////////// /// \brief Get the output pull node. /////////////////////////////////////////////////////////////////////////// output_pipe_t output() { - tp_assert(m_sorterOutput, "Output called more then once"); + tp_assert(m_sorterOutput, "output() called more than once"); auto ret = bits::passive_sorter_factory_output( std::move(m_sorterOutput), m_calc_token); - return std::move(ret); + return {std::move(ret)}; } private: diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/sort_parameters.h b/keyvi/3rdparty/tpie/tpie/pipelining/sort_parameters.h index b977299c..2a1eb3fe 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/sort_parameters.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/sort_parameters.h @@ -26,10 +26,16 @@ namespace tpie { struct sort_parameters { - /** Memory available while forming sorted runs. */ + /** files available while forming sorted runs. */ + memory_size_type filesPhase1; + /** memory available while forming sorted runs. */ memory_size_type memoryPhase1; + /** files available while merging runs. */ + memory_size_type filesPhase2; /** Memory available while merging runs. */ memory_size_type memoryPhase2; + /** files available during output phase. */ + memory_size_type filesPhase3; /** Memory available during output phase. */ memory_size_type memoryPhase3; /** Run length, subject to memory restrictions during phase 2. @@ -41,17 +47,20 @@ struct sort_parameters { /** Maximum item count for internal reporting, subject to memory * restrictions in all phases. Less or equal to runLength. */ memory_size_type internalReportThreshold; - /** Fanout of merge tree during phase 3. */ + /** Fanout of merge tree during phase 2. */ memory_size_type fanout; - /** Fanout of merge tree during phase 4. Less or equal to fanout. */ + /** Fanout of merge tree during phase 3. Less or equal to fanout. */ memory_size_type finalFanout; void dump(std::ostream & out) const { out << "Merge sort parameters\n" + << "Phase 1 files: " << filesPhase1 << '\n' << "Phase 1 memory: " << memoryPhase1 << '\n' << "Run length: " << runLength << '\n' + << "Phase 2 files: " << filesPhase2 << '\n' << "Phase 2 memory: " << memoryPhase2 << '\n' << "Fanout: " << fanout << '\n' + << "Phase 3 files: " << filesPhase3 << '\n' << "Phase 3 memory: " << memoryPhase3 << '\n' << "Final merge level fanout: " << finalFanout << '\n' << "Internal report threshold: " << internalReportThreshold << '\n'; diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/split.h b/keyvi/3rdparty/tpie/tpie/pipelining/split.h new file mode 100644 index 00000000..501bbf93 --- /dev/null +++ b/keyvi/3rdparty/tpie/tpie/pipelining/split.h @@ -0,0 +1,118 @@ +// -*- mode: c++; tab-width: 4; indent-tabs-mode: t; eval: (progn (c-set-style "stroustrup") (c-set-offset 'innamespace 0)); -*- +// vi:set ts=4 sts=4 sw=4 noet : +// Copyright 2016, The TPIE development team +// +// This file is part of TPIE. +// +// TPIE is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. +// +// TPIE is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +// License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with TPIE. If not, see + +#ifndef TPIE_PIPELINING_SPLIT_H +#define TPIE_PIPELINING_SPLIT_H + +#include +#include +#include + +namespace tpie { +namespace pipelining { + +/////////////////////////////////////////////////////////////////////////////// +/// \brief Split one push streams into multiple. +/// +/// A single pipe can push into \c sink(), and multiple pipes +/// can be constructed using \c source(). Every element pushed into \c sink() +/// will be pushed into every \c source(). +/// +/// \tparam T The type of item pushed +/////////////////////////////////////////////////////////////////////////////// +template +class split { +public: + class source_base : public node { + public: + source_base() = default; + source_base(source_base &&) = default; + + virtual void push(const T & v) = 0; + + protected: + ~source_base() {} + }; + + template + class source_impl : public source_base { + public: + source_impl(dest_t dest, node_token sink_token, std::vector & the_sources) + : the_sources(the_sources) + , dest(std::move(dest)) + { + this->set_name("Split source", PRIORITY_INSIGNIFICANT); + this->add_push_destination(this->dest); + + this->get_node_map()->union_set(sink_token.get_map()); + bits::node_map::ptr m = this->get_node_map()->find_authority(); + m->add_relation(sink_token.id(), this->get_token().id(), bits::pushes); + } + + source_impl(source_impl &&) = default; + + virtual void prepare() override { + the_sources.push_back(this); + }; + + virtual void push(const T & v) override { + dest.push(v); + } + + private: + std::vector & the_sources; + dest_t dest; + }; + + pipe_begin &> > source() { + return {sink_token, the_sources}; + } + + class sink_impl : public node { + public: + typedef T item_type; + + sink_impl(node_token sink_token, std::vector & the_sources) + : node(sink_token), the_sources(the_sources) + { + set_name("Join sink", PRIORITY_INSIGNIFICANT); + } + + void push(const T & v) { + for (auto & source : the_sources) + source->push(v); + } + + private: + std::vector & the_sources; + }; + + pipe_end&> > sink() { + return {sink_token, the_sources}; + } + +private: + std::vector the_sources; + node_token sink_token; +}; + +} // namespace pipelining +} // namespace tpie + +#endif // TPIE_PIPELINING_SPLIT_H diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/std_glue.h b/keyvi/3rdparty/tpie/tpie/pipelining/std_glue.h index 8fa321a1..166a6c50 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/std_glue.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/std_glue.h @@ -32,47 +32,68 @@ namespace pipelining { namespace bits { -template +template class input_vector_t : public node { public: - typedef typename push_type::type item_type; + typedef T item_type; - inline input_vector_t(dest_t dest, const std::vector & input) : dest(std::move(dest)), input(input) { + input_vector_t(dest_t dest, const std::vector & input) : dest(std::move(dest)), input(input) { add_push_destination(this->dest); } - virtual void propagate() override { + void propagate() override { forward("items", static_cast(input.size())); set_steps(input.size()); } - virtual void go() override { - typedef typename std::vector::const_iterator IT; - for (IT i = input.begin(); i != input.end(); ++i) { - dest.push(*i); + void go() override { + for (auto & i: input) { + dest.push(i); step(); } } private: dest_t dest; - const std::vector & input; + const std::vector & input; }; -template -class output_vector_t : public node { +template +class pull_input_vector_t : public node { public: typedef T item_type; + + pull_input_vector_t(const std::vector & input) : input(input) {} - inline output_vector_t(std::vector & output) : output(output) { + void propagate() override { + forward("items", static_cast(input.size())); } - inline void push(const T & item) { + void begin() override {idx=0;}; + bool can_pull() const {return idx < input.size();} + const T & peek() const {return input[idx];} + const T & pull() {return input[idx++];} + +private: + size_t idx; + const std::vector & input; +}; + + +template +class output_vector_t : public node { +public: + typedef T item_type; + + output_vector_t(std::vector & output) : output(output) {} + + void push(const T & item) { output.push_back(item); } private: - std::vector & output; + std::vector & output; }; + template class lambda_t { public: @@ -122,18 +143,28 @@ class exclude_lambda_t { /// next node in the pipeline. /// \param input The vector from which it pushes items /////////////////////////////////////////////////////////////////////////////// -template -inline pipe_begin &> > input_vector(const std::vector & input) { - return factory &>(input); +template +inline pipe_begin, const std::vector &> > input_vector(const std::vector & input) { + return {input}; +} + +/////////////////////////////////////////////////////////////////////////////// +/// \brief Pipelining nodes that pushes the contents of the given vector to the +/// next node in the pipeline. +/// \param input The vector from which it pushes items +/////////////////////////////////////////////////////////////////////////////// +template +inline pullpipe_begin, const std::vector &> > pull_input_vector(const std::vector & input) { + return {input}; } /////////////////////////////////////////////////////////////////////////////// /// \brief Pipelining node that pushes items to the given vector. /// \param output The vector to push items to /////////////////////////////////////////////////////////////////////////////// -template -inline pipe_end, std::vector &> > output_vector(std::vector & output) { - return termfactory, std::vector &>(output); +template +inline pipe_end, std::vector &> > output_vector(std::vector & output) { + return termfactory, std::vector &>(output); } /////////////////////////////////////////////////////////////////////////////// diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/subpipeline.h b/keyvi/3rdparty/tpie/tpie/pipelining/subpipeline.h new file mode 100644 index 00000000..78a9a378 --- /dev/null +++ b/keyvi/3rdparty/tpie/tpie/pipelining/subpipeline.h @@ -0,0 +1,167 @@ +// -*- mode: c++; tab-width: 4; indent-tabs-mode: t; eval: (progn (c-set-style "stroustrup") (c-set-offset 'innamespace 0)); -*- +// vi:set ts=4 sts=4 sw=4 noet : +// Copyright 2016, The TPIE development team +// +// This file is part of TPIE. +// +// TPIE is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. +// +// TPIE is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +// License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with TPIE. If not, see +#ifndef __TPIE_PIPELINING_SUBPIPELINE_H__ +#define __TPIE_PIPELINING_SUBPIPELINE_H__ + +#include +#include + +namespace tpie { +namespace pipelining { +namespace bits { + +class subpipeline_base: public pipeline_base_base { +public: + void begin(stream_size_type items, progress_indicator_base & pi, + memory_size_type filesAvailable, memory_size_type mem, + const char * file, const char * function); + void begin(stream_size_type items, progress_indicator_base & pi, + memory_size_type mem, + const char * file, const char * function) { + begin(items, pi, get_file_manager().available(), mem, file, function); + } + void end(); +protected: + node * frontNode; +private: + gocontext_ptr gc; + std::unique_ptr rt; +}; + +template +struct subpipeline_virt: public subpipeline_base { + progress_indicator_null pi; + + virtual void push(const item_type &) = 0; +}; + +template +struct subpipeline_impl: public subpipeline_virt { + typename fact_t::constructed_type front; + + subpipeline_impl(fact_t fact): front(fact.construct()) { + this->m_nodeMap = front.get_node_map(); + this->frontNode = &front; + } + + subpipeline_impl(const subpipeline_impl &) = delete; + subpipeline_impl & operator=(const subpipeline_impl &) = delete; + subpipeline_impl(subpipeline_impl &&) = delete; + subpipeline_impl & operator=(subpipeline_impl &&) = delete; + + void push(const item_type & item) override { + front.push(item); + }; + +}; +} //namespace bits + +/////////////////////////////////////////////////////////////////////////////// +/// \class pipeline +/// +/// Container class for a subpipeline +/////////////////////////////////////////////////////////////////////////////// +template +struct subpipeline { + subpipeline() {} + subpipeline(subpipeline &&) = default; + subpipeline(const subpipeline &) = default; + subpipeline & operator=(subpipeline &&) = default; + subpipeline & operator=(const subpipeline &) = default; + subpipeline(const std::shared_ptr> & p): p(p) {} + + template + subpipeline(T from) { + *this = std::move(from); + } + + template + subpipeline & operator=(T from) { + p.reset(new bits::subpipeline_impl(std::move(from))); + return *this; + } + + void push(const item_type & item) {p->push(item);} + + void begin(size_t filesAvailable, size_t memory) { + p->begin(1, p->pi, filesAvailable, memory, nullptr, nullptr); + } + + void begin(size_t memory) { + begin(get_file_manager().available(), memory); + } + + void begin(stream_size_type items, progress_indicator_base & pi, + memory_size_type filesAvailable, memory_size_type mem, + const char * file, const char * function) { + p->begin(items, pi, filesAvailable, mem, file, function); + } + + void begin(stream_size_type items, progress_indicator_base & pi, + memory_size_type mem, + const char * file, const char * function) { + begin(items, pi, get_file_manager().available(), mem, file, function); + } + + void end() {p->end();} + + void plot(std::ostream & os = std::cout) { + p->plot(os); + } + + void plot_full(std::ostream & os = std::cout) { + p->plot_full(os); + } + + bits::node_map::ptr get_node_map() const { + return p->get_node_map(); + } + + bool can_fetch(std::string key) { + return p->can_fetch(key); + } + + any_noncopyable fetch_any(std::string key) { + return p->fetch_any(key); + } + + template + T fetch(std::string key) { + any_noncopyable a = fetch_any(key); + return *any_cast(&a); + } + + void forward_any(std::string key, const any_noncopyable & value) { + return p->forward_any(key, value); + } + + template + void forward(std::string key, T value) { + forward_any(key, any_noncopyable(value)); + } + + void output_memory(std::ostream & o) const {p->output_memory(o);} +private: + std::shared_ptr> p; +}; + +} //namespace pipelining +} //namespace tpie + +#endif //__TPIE_PIPELINING_SUBPIPELINE_H__ diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/tokens.cpp b/keyvi/3rdparty/tpie/tpie/pipelining/tokens.cpp index b1a39337..9123f576 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/tokens.cpp +++ b/keyvi/3rdparty/tpie/tpie/pipelining/tokens.cpp @@ -50,6 +50,11 @@ void node_map::link(node_map::ptr target) { for (relmapit i = target->m_relationsInv.begin(); i != target->m_relationsInv.end(); ++i) { m_relationsInv.insert(*i); } + for (auto i = target->m_pipelineForwards.begin(); i != m_pipelineForwards.end(); ++i) { + m_pipelineForwards[i->first] = std::move(i->second); + } + std::move(target->m_pipeBaseForwards.begin(), target->m_pipeBaseForwards.end(), + std::back_inserter(m_pipeBaseForwards)); target->m_tokens.clear(); target->m_authority = this; @@ -130,6 +135,7 @@ void node_map::get_successors(id_t from, std::vector & successors, memory_ case pulls: case depends: case no_forward_depends: + case memory_share_depends: break; } } @@ -142,6 +148,7 @@ void node_map::get_successors(id_t from, std::vector & successors, memory_ break; case pulls: case depends: + case memory_share_depends: q.push(std::make_pair(i->second.first, d+1)); break; case no_forward_depends: @@ -154,6 +161,12 @@ void node_map::get_successors(id_t from, std::vector & successors, memory_ } } +void node_map::forward_pipe_base_forwards() { + for (auto &t : m_pipeBaseForwards) { + get(t.from)->forward(t.key, std::move(t.value)); + } +} + } // namespace bits } // namespace pipelining diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/tokens.h b/keyvi/3rdparty/tpie/tpie/pipelining/tokens.h index a0ab97b1..a3a17875 100644 --- a/keyvi/3rdparty/tpie/tpie/pipelining/tokens.h +++ b/keyvi/3rdparty/tpie/tpie/pipelining/tokens.h @@ -74,13 +74,15 @@ #include #include #include +#include #include #include #include #include #include #include -#include +#include +#include namespace tpie { @@ -92,7 +94,8 @@ enum node_relation { pushes, pulls, depends, - no_forward_depends + no_forward_depends, + memory_share_depends }; class node_map { @@ -106,10 +109,19 @@ class node_map { typedef std::multimap > relmap_t; typedef relmap_t::const_iterator relmapit; - typedef std::map > datastructuremap_t; + typedef std::unordered_map > datastructuremap_t; + + typedef boost::optional maybeany_t; + typedef std::unordered_map forwardmap_t; typedef boost::intrusive_ptr ptr; + struct pipe_base_forward_t { + id_t from; + std::string key; + any_noncopyable value; + }; + static ptr create() { return ptr(new node_map); } @@ -191,6 +203,23 @@ class node_map { /////////////////////////////////////////////////////////////////////////// void get_successors(id_t from, std::vector & successors, memory_size_type k, bool forward_only=false); + void forward(std::string key, any_noncopyable value) { + m_pipelineForwards[key] = std::move(value); + } + + maybeany_t fetch_maybe(std::string key) { + auto it = m_pipelineForwards.find(key); + if (it == m_pipelineForwards.end()) { + return maybeany_t(); + } + return maybeany_t(it->second); + } + + void forward_from_pipe_base(id_t from, std::string key, any_noncopyable value) { + m_pipeBaseForwards.push_back({from, key, std::move(value)}); + } + + void forward_pipe_base_forwards(); friend void intrusive_ptr_add_ref(node_map * m) { m->m_refCnt++; @@ -206,6 +235,8 @@ class node_map { relmap_t m_relations; relmap_t m_relationsInv; datastructuremap_t m_datastructures; + forwardmap_t m_pipelineForwards; + std::vector m_pipeBaseForwards; size_t out_degree(const relmap_t & map, id_t from, node_relation rel) const; size_t out_degree(const relmap_t & map, id_t from) const; @@ -234,7 +265,7 @@ class node_token { typedef bits::node_map::val_t val_t; // Use for the simple case in which a node owns its own token - inline node_token(val_t owner) + explicit node_token(val_t owner) : m_tokens(bits::node_map::create()) , m_id(m_tokens->add_token(owner)) , m_free(false) diff --git a/keyvi/3rdparty/tpie/tpie/pipelining/visit.h b/keyvi/3rdparty/tpie/tpie/pipelining/visit.h new file mode 100644 index 00000000..a996e229 --- /dev/null +++ b/keyvi/3rdparty/tpie/tpie/pipelining/visit.h @@ -0,0 +1,68 @@ +// -*- mode: c++; tab-width: 4; indent-tabs-mode: t; eval: (progn (c-set-style "stroustrup") (c-set-offset 'innamespace 0)); -*- +// vi:set ts=4 sts=4 sw=4 noet : +// Copyright 2016 The TPIE development team +// +// This file is part of TPIE. +// +// TPIE is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. +// +// TPIE is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +// License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with TPIE. If not, see + +#ifndef __TPIE_PIPELINING_VISIT_H__ +#define __TPIE_PIPELINING_VISIT_H__ + +#include +#include +#include +#include + +namespace tpie { +namespace pipelining { +namespace bits { + +template +class visit_t { +public: + template + class type: public node { + private: + F functor; + dest_t dest; + public: + typedef typename std::decay::argument_type>::type item_type; + type(dest_t dest, const F & functor): + functor(functor), dest(std::move(dest)) { + set_name(bits::extract_pipe_name(typeid(F).name()), PRIORITY_NO_NAME); + } + + void push(const item_type & item) { + functor(item); + dest.push(item); + } + }; +}; + +} //namespace bits + +/////////////////////////////////////////////////////////////////////////////// +/// \brief A pipelining node that applies a functor to elements pushed to it. +/// \param functor The visitor to use +/////////////////////////////////////////////////////////////////////////////// +template +pipe_middle, F> > visit(const F & functor) { + return tempfactory, F >(functor); +} + +} //namespace pipelining +} //namespace terrastream + +#endif //__TPIE_PIPELINING_VISIT_H__ diff --git a/keyvi/3rdparty/tpie/tpie/priority_queue.h b/keyvi/3rdparty/tpie/tpie/priority_queue.h index 20bd96e4..d5191bcc 100644 --- a/keyvi/3rdparty/tpie/tpie/priority_queue.h +++ b/keyvi/3rdparty/tpie/tpie/priority_queue.h @@ -79,21 +79,30 @@ class priority_queue { typedef memory_size_type group_type; typedef memory_size_type slot_type; public: + static constexpr float default_blocksize = 0.0625; + /////////////////////////////////////////////////////////////////////////// /// \brief Constructor. /// /// \param f Factor of memory that the priority queue is allowed to use. /// \param b Block factor /////////////////////////////////////////////////////////////////////////// - priority_queue(double f=1.0, float b=0.0625); + priority_queue(double f=1.0, float b=default_blocksize, stream_size_type n = std::numeric_limits::max()); #ifndef DOXYGEN // \param mmavail Number of bytes the priority queue is allowed to use. // \param b Block factor - priority_queue(memory_size_type mm_avail, float b=0.0625); + priority_queue(memory_size_type mm_avail, float b=default_blocksize, stream_size_type n = std::numeric_limits::max()); #endif - + ///////////////////////////////////////////////////////// + /// + /// Compute the maximal amount of memory it makes sence + /// to give a queue that will contain atmount n elements + /// + ///////////////////////////////////////////////////////// + static memory_size_type memory_usage(stream_size_type n, float b=default_blocksize); + ///////////////////////////////////////////////////////// /// /// Destructor @@ -205,7 +214,7 @@ class priority_queue { float block_factor; - void init(memory_size_type mm_avail); + void init(memory_size_type mm_avail, stream_size_type n = std::numeric_limits::max() ); void slot_start_set(slot_type slot, memory_size_type n); memory_size_type slot_start(slot_type slot) const; diff --git a/keyvi/3rdparty/tpie/tpie/priority_queue.inl b/keyvi/3rdparty/tpie/tpie/priority_queue.inl index 01c82883..79ab439a 100644 --- a/keyvi/3rdparty/tpie/tpie/priority_queue.inl +++ b/keyvi/3rdparty/tpie/tpie/priority_queue.inl @@ -18,7 +18,7 @@ // along with TPIE. If not, see template -priority_queue::priority_queue(double f, float b) : +priority_queue::priority_queue(double f, float b, stream_size_type n) : block_factor(b) { // constructor mem fraction assert(f<= 1.0 && f > 0); assert(b > 0.0); @@ -27,29 +27,48 @@ block_factor(b) { // constructor mem fraction << mm_avail/1024/1024 << "mb(" << mm_avail << "bytes)" << "\n"); mm_avail = static_cast(static_cast(mm_avail)*f); - init(mm_avail); + init(mm_avail, n); } #ifndef DOXYGEN template -priority_queue::priority_queue(memory_size_type mm_avail, float b) : +priority_queue::priority_queue(memory_size_type mm_avail, float b, stream_size_type n) : block_factor(b) { // constructor absolute mem assert(mm_avail <= get_memory_manager().limit() && mm_avail > 0); assert(b > 0.0); TP_LOG_DEBUG("priority_queue: Memory limit: " << mm_avail/1024/1024 << "mb(" << mm_avail << "bytes)" << "\n"); - init(mm_avail); + init(mm_avail, n); } #endif + template -void priority_queue::init(memory_size_type mm_avail) { // init +memory_size_type priority_queue::memory_usage(stream_size_type n, float) { + if ( std::numeric_limits::max() / sizeof(T) < n) + return std::numeric_limits::max(); + + return n * sizeof(T); +} + + +template +void priority_queue::init(memory_size_type mm_avail, stream_size_type n) { // init #ifdef _WIN32 #ifndef _WIN64 mm_avail = std::min(mm_avail, static_cast(1024*1024*512)); #endif //_WIN64 #endif //_WIN32 + if (memory_usage(n, block_factor) <= mm_avail) { + // Internal allocation + opq.reset(tpie_new(n)); + current_r = 0; + m_size = 0; // total size of priority queue + buffer_size = 0; + buffer_start = 0; + return; + } TP_LOG_DEBUG("m_for_queue: " << mm_avail << "\n"); @@ -124,7 +143,7 @@ void priority_queue::init(memory_size_type mm_avail) { / setting_k = static_cast(nominator/denominator); //Set fanout // Don't open too many files - setting_k = std::min(available_files()-40, setting_k); + setting_k = std::min(get_file_manager().available(), setting_k); // Performance degrades with more than around 250 open files setting_k = std::min(static_cast(250), setting_k); diff --git a/keyvi/3rdparty/tpie/tpie/progress_indicator_base.cpp b/keyvi/3rdparty/tpie/tpie/progress_indicator_base.cpp index 120a131b..17bb56c1 100644 --- a/keyvi/3rdparty/tpie/tpie/progress_indicator_base.cpp +++ b/keyvi/3rdparty/tpie/tpie/progress_indicator_base.cpp @@ -35,7 +35,6 @@ class ptime { static double seconds(const ptime & t1, const ptime & t2) { if (t1.m_ptime.is_special() || t2.m_ptime.is_special()) { - tp_assert(false, "ptime::seconds: Special ptime"); return 0.0; } return (t2.m_ptime - t1.m_ptime).total_microseconds() / 1000000.0; diff --git a/keyvi/3rdparty/tpie/tpie/progress_indicator_subindicator.h b/keyvi/3rdparty/tpie/tpie/progress_indicator_subindicator.h index 2355fd1a..f929a0d7 100644 --- a/keyvi/3rdparty/tpie/tpie/progress_indicator_subindicator.h +++ b/keyvi/3rdparty/tpie/tpie/progress_indicator_subindicator.h @@ -44,6 +44,8 @@ class progress_indicator_subindicator: public progress_indicator_base { virtual void pop_breadcrumb(); virtual void init(stream_size_type range); virtual void done(); + + void set_crumb(const std::string & c) {m_crumb = c;} void setup(progress_indicator_base * parent, stream_size_type outerRange, diff --git a/keyvi/3rdparty/tpie/tpie/resource_manager.cpp b/keyvi/3rdparty/tpie/tpie/resource_manager.cpp new file mode 100644 index 00000000..0c48f83f --- /dev/null +++ b/keyvi/3rdparty/tpie/tpie/resource_manager.cpp @@ -0,0 +1,104 @@ +// -*- mode: c++; tab-width: 4; indent-tabs-mode: t; eval: (progn (c-set-style "stroustrup") (c-set-offset 'innamespace 0)); -*- +// vi:set ts=4 sts=4 sw=4 noet : +// +// Copyright 2011, 2014, The TPIE development team +// +// This file is part of TPIE. +// +// TPIE is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. +// +// TPIE is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +// License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with TPIE. If not, see + +#include "memory.h" +#include +#include +#include "tpie_log.h" +#include +#include "pretty_print.h" + +namespace tpie { + +resource_manager::resource_manager(resource_type type) + : m_used(0), m_limit(0), m_maxExceeded(0), m_nextWarning(0), m_enforce(ENFORCE_WARN), resource_managed(type) {} + +size_t resource_manager::used() const noexcept { + return m_used.load(); +} + +size_t resource_manager::available() const noexcept { + size_t used = m_used.load(); + size_t limit = m_limit; + if (used < limit) return limit-used; + return 0; +} + +void resource_manager::print_resource_complaint(std::ostream & os, size_t amount, size_t usage) { + size_t diff = usage - m_limit; + + os << "Resource " << resource_managed << " limit exceeded by " << amount_with_unit(diff) + << " (" << (diff * 100 / m_limit) << "%), while trying to increase usage by " << amount_with_unit(amount) << "." + << " Limit is " << amount_with_unit(m_limit) << ", but " << amount_with_unit(usage) << " would be used."; +} + +void resource_manager::register_increased_usage(size_t amount) { + switch(m_enforce) { + case ENFORCE_IGNORE: + m_used.fetch_add(amount); + break; + case ENFORCE_THROW: { + size_t usage = m_used.fetch_add(amount) + amount; + if (usage > m_limit && m_limit > 0) { + std::stringstream ss; + print_resource_complaint(ss, amount, usage); + throw_out_of_resource_error(ss.str()); + throw out_of_resource_error(ss.str()); + } + break; } + case ENFORCE_DEBUG: + case ENFORCE_WARN: { + size_t usage = m_used.fetch_add(amount) + amount; + if (usage > m_limit && usage - m_limit > m_maxExceeded && m_limit > 0) { + m_maxExceeded = usage - m_limit; + if (m_maxExceeded >= m_nextWarning) { + m_nextWarning = m_maxExceeded + m_maxExceeded/8; + std::ostream & os = (m_enforce == ENFORCE_DEBUG) ? log_debug() : log_warning(); + print_resource_complaint(os, amount, usage); + os << std::endl; + } + } + break; } + }; +} + +void resource_manager::register_decreased_usage(size_t amount) { +#ifndef TPIE_NDEBUG + size_t usage = m_used.fetch_sub(amount); + if (amount > usage) { + log_error() << "Error in decrease_usage, trying to decrease by " + << amount_with_unit(amount) << " , while only " + << amount_with_unit(usage) << " were allocated" << std::endl; + std::abort(); + } +#else + m_used.fetch_sub(amount); +#endif +} + +void resource_manager::set_limit(size_t new_limit) { + m_limit = new_limit; +} + +void resource_manager::set_enforcement(enforce_t e) { + m_enforce = e; +} + +} //namespace tpie diff --git a/keyvi/3rdparty/tpie/tpie/resource_manager.h b/keyvi/3rdparty/tpie/tpie/resource_manager.h new file mode 100644 index 00000000..57cd615e --- /dev/null +++ b/keyvi/3rdparty/tpie/tpie/resource_manager.h @@ -0,0 +1,132 @@ +// -*- mode: c++; tab-width: 4; indent-tabs-mode: t; eval: (progn (c-set-style "stroustrup") (c-set-offset 'innamespace 0)); -*- +// vi:set ts=4 sts=4 sw=4 noet : +// +// Copyright 2011, The TPIE development team +// +// This file is part of TPIE. +// +// TPIE is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. +// +// TPIE is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +// License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with TPIE. If not, see + +/////////////////////////////////////////////////////////////////////////// +/// \file tpie/memory.h Memory management subsystem. +/////////////////////////////////////////////////////////////////////////// + +#ifndef __TPIE_RESOURCE_MANAGER_H__ +#define __TPIE_RESOURCE_MANAGER_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace tpie { + +/////////////////////////////////////////////////////////////////////////////// +/// \brief Resource management object used to track resource usage. +/////////////////////////////////////////////////////////////////////////////// +class resource_manager { +public: + /////////////////////////////////////////////////////////////////////////// + /// Memory limit enforcement policies. + /////////////////////////////////////////////////////////////////////////// + enum enforce_t { + /** Ignore when running out of the resource. */ + ENFORCE_IGNORE, + /** \brief Log to debug log when the resource limit is exceeded. + * Note that not all violations will be logged. */ + ENFORCE_DEBUG, + /** \brief Log a warning when the resource limit is exceeded. Note that + * not all violations will be logged. */ + ENFORCE_WARN, + /** Throw an out_of_resource_error when the resource limit is exceeded. */ + ENFORCE_THROW + }; + + /////////////////////////////////////////////////////////////////////////// + /// Return the current amount of the resource used. + /////////////////////////////////////////////////////////////////////////// + size_t used() const noexcept; + + /////////////////////////////////////////////////////////////////////////// + /// Return the amount of the resource still available to be assigned. + /////////////////////////////////////////////////////////////////////////// + size_t available() const noexcept; + + /////////////////////////////////////////////////////////////////////////// + /// Return the resource limit. + /////////////////////////////////////////////////////////////////////////// + size_t limit() const noexcept {return m_limit;} + + /////////////////////////////////////////////////////////////////////////// + /// \brief Update the resource limit. + /// If the resource limit is exceeded by decreasing the limit, + /// no exception will be thrown. + /// \param new_limit The new resource limit. + /////////////////////////////////////////////////////////////////////////// + void set_limit(size_t new_limit); + + /////////////////////////////////////////////////////////////////////////// + /// \brief Set the resource limit enforcement policy. + /// \param e The new enforcement policy. + /////////////////////////////////////////////////////////////////////////// + void set_enforcement(enforce_t e); + + /////////////////////////////////////////////////////////////////////////// + /// \brief Return the current resource limit enforcement policy. + /////////////////////////////////////////////////////////////////////////// + enforce_t enforcement() const noexcept {return m_enforce;} + + void register_increased_usage(size_t amount); + + void register_decreased_usage(size_t amount); + + virtual std::string amount_with_unit(size_t amount) const { + std::ostringstream os; + os << amount << " amount of " << resource_managed; + return os.str(); + } + + /////////////////////////////////////////////////////////////////////////// + /// \internal + /// Construct the resource manager object. + /////////////////////////////////////////////////////////////////////////// + resource_manager(resource_type type); + + virtual ~resource_manager() = default; + +private: + void print_resource_complaint(std::ostream & os, size_t amount, size_t usage); +protected: + virtual void throw_out_of_resource_error(const std::string & s) = 0; + + std::atomic m_used; + size_t m_limit; + size_t m_maxExceeded; + size_t m_nextWarning; + enforce_t m_enforce; + + resource_type resource_managed; +}; + +} //namespace tpie + +#endif //__TPIE_RESOURCE_MANAGER_H__ diff --git a/keyvi/3rdparty/tpie/tpie/resources.cpp b/keyvi/3rdparty/tpie/tpie/resources.cpp new file mode 100644 index 00000000..98954270 --- /dev/null +++ b/keyvi/3rdparty/tpie/tpie/resources.cpp @@ -0,0 +1,34 @@ +// -*- mode: c++; tab-width: 4; indent-tabs-mode: t; eval: (progn (c-set-style "stroustrup") (c-set-offset 'innamespace 0)); -*- +// vi:set ts=4 sts=4 sw=4 noet : +// +// Copyright 2016, The TPIE development team +// +// This file is part of TPIE. +// +// TPIE is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. +// +// TPIE is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +// License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with TPIE. If not, see + +#include + +namespace tpie { + +std::ostream & operator<<(std::ostream & os, const resource_type t) { + switch (t) { + case FILES: os << "files"; break; + case MEMORY: os << "memory"; break; + default: os << "unknown"; break; + } + return os; +} + +} //namespace tpie diff --git a/keyvi/3rdparty/tpie/tpie/resources.h b/keyvi/3rdparty/tpie/tpie/resources.h new file mode 100644 index 00000000..c8fc324e --- /dev/null +++ b/keyvi/3rdparty/tpie/tpie/resources.h @@ -0,0 +1,48 @@ +// -*- mode: c++; tab-width: 4; indent-tabs-mode: t; eval: (progn (c-set-style "stroustrup") (c-set-offset 'innamespace 0)); -*- +// vi:set ts=4 sts=4 sw=4 noet : +// +// Copyright 2016, The TPIE development team +// +// This file is part of TPIE. +// +// TPIE is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. +// +// TPIE is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +// License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with TPIE. If not, see + +/////////////////////////////////////////////////////////////////////////// +/// \file tpie/resources.h Defines all types of managed resources. +/// Currently only FILES and MEMORY. +/////////////////////////////////////////////////////////////////////////// + +#ifndef __TPIE_RESOURCES_H__ +#define __TPIE_RESOURCES_H__ + +#include + +namespace tpie { + +enum resource_type { + // These should be ordered by when the resource + // assigned at runtime + FILES, + MEMORY, + + // Special values for internal use + TOTAL_RESOURCE_TYPES, + NO_RESOURCE +}; + +std::ostream & operator<<(std::ostream & os, const resource_type t); + +} //namespace tpie + +#endif //__TPIE_RESOURCES_H__ diff --git a/keyvi/3rdparty/tpie/tpie/serialization2.h b/keyvi/3rdparty/tpie/tpie/serialization2.h index 147190cf..9c7ec2e4 100644 --- a/keyvi/3rdparty/tpie/tpie/serialization2.h +++ b/keyvi/3rdparty/tpie/tpie/serialization2.h @@ -94,7 +94,7 @@ struct is_trivially_serializable { static long magic(...); public: static bool const value= - std::is_pod::value || sizeof(magic((T*)0))==sizeof(char); + (std::is_pod::value || sizeof(magic((T*)nullptr))==sizeof(char)) && !std::is_pointer::value; }; /////////////////////////////////////////////////////////////////////////////// @@ -102,8 +102,7 @@ struct is_trivially_serializable { /////////////////////////////////////////////////////////////////////////////// template void serialize(D & dst, const T & v, - typename std::enable_if::value - && !std::is_pointer::value >::type * = 0) { + typename std::enable_if::value>::type * = 0) { dst.write((const char *)&v, sizeof(T)); } @@ -112,8 +111,7 @@ void serialize(D & dst, const T & v, /////////////////////////////////////////////////////////////////////////////// template void unserialize(S & src, T & v, - typename std::enable_if::value - && !std::is_pointer::value>::type * = 0) { + typename std::enable_if::value>::type * = 0) { src.read((char *)&v, sizeof(T)); } @@ -125,8 +123,7 @@ namespace bits { /////////////////////////////////////////////////////////////////////////////// template ::value, - bool is_pod=std::is_pod::value_type>::value, - bool is_pointer=std::is_pointer::value_type>::value> + bool is_ts=is_trivially_serializable::value_type>::value> struct array_encode_magic { void operator()(D & dst, T start, T end) { using tpie::serialize; @@ -135,7 +132,7 @@ struct array_encode_magic { }; template -struct array_encode_magic { +struct array_encode_magic { void operator()(D & d, T start, T end) { if (start == end) { // Do not dereference two iterators pointing to null @@ -153,8 +150,7 @@ struct array_encode_magic { /////////////////////////////////////////////////////////////////////////////// template ::value, - bool is_pod=std::is_pod::value_type>::value, - bool is_pointer=std::is_pointer::value_type>::value> + bool is_ts=is_trivially_serializable::value_type>::value> struct array_decode_magic { void operator()(D & dst, T start, T end) { using tpie::unserialize; @@ -163,7 +159,7 @@ struct array_decode_magic { }; template -struct array_decode_magic { +struct array_decode_magic { void operator()(D & d, T start, T end) { if (start == end) { // Do not dereference two iterators pointing to null diff --git a/keyvi/3rdparty/tpie/tpie/serialization_sorter.h b/keyvi/3rdparty/tpie/tpie/serialization_sorter.h index 2f4e157a..011c5f96 100644 --- a/keyvi/3rdparty/tpie/tpie/serialization_sorter.h +++ b/keyvi/3rdparty/tpie/tpie/serialization_sorter.h @@ -40,10 +40,16 @@ namespace tpie { namespace serialization_bits { struct sort_parameters { - /** Memory available while forming sorted runs. */ + /** files available while forming sorted runs. */ + memory_size_type filesPhase1; + /** memory available while forming sorted runs. */ memory_size_type memoryPhase1; + /** files available while merging runs. */ + memory_size_type filesPhase2; /** Memory available while merging runs. */ memory_size_type memoryPhase2; + /** files available during output phase. */ + memory_size_type filesPhase3; /** Memory available during output phase. */ memory_size_type memoryPhase3; /** Minimum size of serialized items. */ @@ -53,8 +59,11 @@ struct sort_parameters { void dump(std::ostream & out) const { out << "Serialization merge sort parameters\n" + << "Phase 1 files: " << filesPhase1 << '\n' << "Phase 1 memory: " << memoryPhase1 << '\n' + << "Phase 2 files: " << filesPhase2 << '\n' << "Phase 2 memory: " << memoryPhase2 << '\n' + << "Phase 3 files: " << filesPhase3 << '\n' << "Phase 3 memory: " << memoryPhase3 << '\n' << "Minimum item size: " << minimumItemSize << '\n' << "Temporary directory: " << tempDir << '\n'; @@ -513,6 +522,16 @@ class serialization_sorter { bool m_reportInternal; const T * m_nextInternalItem; + static const memory_size_type defaultFiles = 253; // Default number of files available, when not using set_available_files + static const memory_size_type minimumFilesPhase1 = 1; + static const memory_size_type maximumFilesPhase1 = 1; + static const memory_size_type minimumFilesPhase2 = 3; + static const memory_size_type maximumFilesPhase2 = std::numeric_limits::max(); + static const memory_size_type minimumFilesPhase3 = 3; + static const memory_size_type maximumFilesPhase3 = std::numeric_limits::max(); + + const int defaultMaxFiles = 253; + public: serialization_sorter(memory_size_type minimumItemSize = sizeof(T), pred_t pred = pred_t()) : m_buffer_bucket_ptr(new memory_bucket()) @@ -529,6 +548,9 @@ class serialization_sorter { , m_reportInternal(false) , m_nextInternalItem(0) { + m_params.filesPhase1 = 0; + m_params.filesPhase2 = 0; + m_params.filesPhase3 = 0; m_params.memoryPhase1 = 0; m_params.memoryPhase2 = 0; m_params.memoryPhase3 = 0; @@ -536,30 +558,64 @@ class serialization_sorter { } private: - // set_phase_?_memory helper - inline void maybe_calculate_parameters() { - if (m_state != state_initial) - throw tpie::exception("Bad state in maybe_calculate_parameters"); - if (m_params.memoryPhase1 > 0 && - m_params.memoryPhase2 > 0 && - m_params.memoryPhase3 > 0) - calculate_parameters(); + // Checks if we should still be able to change parameters + inline void check_not_started() { + if (m_state != state_initial) { + throw tpie::exception("Can't change parameters after sorting has started"); + } } public: + inline void set_phase_1_files(memory_size_type f1) { + m_params.filesPhase1 = f1; + check_not_started(); + } + + inline void set_phase_2_files(memory_size_type f2) { + m_params.filesPhase2 = f2; + check_not_started(); + } + + inline void set_phase_3_files(memory_size_type f3) { + m_params.filesPhase3 = f3; + check_not_started(); + } + + /////////////////////////////////////////////////////////////////////////// + /// \brief Calculate parameters from given amount of files. + /// \param f Files available for phase 1, 2 and 3 + /////////////////////////////////////////////////////////////////////////// + inline void set_available_files(memory_size_type f) { + m_params.filesPhase1 = m_params.filesPhase2 = m_params.filesPhase3 = f; + check_not_started(); + } + + /////////////////////////////////////////////////////////////////////////// + /// \brief Calculate parameters from given amount of files. + /// \param f1 Files available for phase 1 + /// \param f2 Files available for phase 2 + /// \param f3 Files available for phase 3 + /////////////////////////////////////////////////////////////////////////// + inline void set_available_files(memory_size_type f1, memory_size_type f2, memory_size_type f3) { + m_params.filesPhase1 = f1; + m_params.filesPhase2 = f2; + m_params.filesPhase3 = f3; + check_not_started(); + } + void set_phase_1_memory(memory_size_type m1) { m_params.memoryPhase1 = m1; - maybe_calculate_parameters(); + check_not_started(); } void set_phase_2_memory(memory_size_type m2) { m_params.memoryPhase2 = m2; - maybe_calculate_parameters(); + check_not_started(); } void set_phase_3_memory(memory_size_type m3) { m_params.memoryPhase3 = m3; - maybe_calculate_parameters(); + check_not_started(); } void set_available_memory(memory_size_type m) { @@ -610,10 +666,28 @@ class serialization_sorter { m_owning_node = n; } private: + static memory_size_type clamp(memory_size_type lo, memory_size_type val, memory_size_type hi) { + return std::max(lo, std::min(val, hi)); + } + void calculate_parameters() { if (m_state != state_initial) throw tpie::exception("Bad state in calculate_parameters"); + if(!m_params.filesPhase1) + m_params.filesPhase1 = clamp(minimumFilesPhase1, defaultFiles, maximumFilesPhase1); + if(!m_params.filesPhase2) + m_params.filesPhase2 = clamp(minimumFilesPhase2, defaultFiles, maximumFilesPhase2); + if(!m_params.filesPhase3) + m_params.filesPhase3 = clamp(minimumFilesPhase3, defaultFiles, maximumFilesPhase3); + + if(m_params.filesPhase1 < minimumFilesPhase1) + throw tpie::exception("file limit for phase 1 too small (" + std::to_string(m_params.filesPhase1) + " < " + std::to_string(minimumFilesPhase1) + ")"); + if(m_params.filesPhase2 < minimumFilesPhase2) + throw tpie::exception("file limit for phase 2 too small (" + std::to_string(m_params.filesPhase2) + " < " + std::to_string(minimumFilesPhase2) + ")"); + if(m_params.filesPhase3 < minimumFilesPhase3) + throw tpie::exception("file limit for phase 3 too small (" + std::to_string(m_params.filesPhase3) + " < " + std::to_string(minimumFilesPhase3) + ")"); + memory_size_type memAvail1 = m_params.memoryPhase1; if (memAvail1 <= serialization_writer::memory_usage()) { log_error() << "Not enough memory for run formation; have " << memAvail1 @@ -657,7 +731,7 @@ class serialization_sorter { memory_size_type perFanout = m_params.minimumItemSize + serialization_reader::memory_usage(); // Floored division to compute the largest possible fanout. - memory_size_type fanout = fanoutMemory / perFanout; + memory_size_type fanout = std::min(fanoutMemory / perFanout, m_params.filesPhase2 - 1); if (fanout < 2) { log_error() << "Not enough memory for merging, even when minimum item size is assumed. " << "mem avail = " << memForMerge @@ -680,7 +754,7 @@ class serialization_sorter { public: void begin() { if (!m_parametersSet) - throw tpie::exception("Parameters not set in serialization_sorter"); + calculate_parameters(); if (m_state != state_initial) throw tpie::exception("Bad state in begin"); m_state = state_1; @@ -784,6 +858,24 @@ class serialization_sorter { return 0; } + + bool is_merge_runs_free() { + if (m_state != state_2) + throw tpie::exception("Bad state in end"); + if (m_reportInternal) return true; + + memory_size_type largestItem = m_sorter.get_largest_item_size(); + memory_size_type fanoutMemory = m_params.memoryPhase2 - serialization_writer::memory_usage(); + memory_size_type perFanout = largestItem + serialization_reader::memory_usage(); + memory_size_type fanout = std::min(m_params.filesPhase2 - 1, fanoutMemory / perFanout); + + memory_size_type finalFanoutMemory = m_params.memoryPhase3; + memory_size_type finalFanout = std::min( + {m_params.filesPhase3 - 1, fanout, finalFanoutMemory / perFanout}); + + return m_files.next_level_runs() <= finalFanout; + } + void merge_runs() { if (m_state != state_2) throw tpie::exception("Bad state in end"); @@ -808,7 +900,7 @@ class serialization_sorter { // Only change the item size to largestItem rather than minimumItemSize. memory_size_type fanoutMemory = m_params.memoryPhase2 - serialization_writer::memory_usage(); memory_size_type perFanout = largestItem + serialization_reader::memory_usage(); - memory_size_type fanout = fanoutMemory / perFanout; + memory_size_type fanout = std::min(fanoutMemory / perFanout, m_params.filesPhase2 - 1); if (fanout < 2) { log_error() << "Not enough memory for merging. " @@ -820,9 +912,8 @@ class serialization_sorter { } memory_size_type finalFanoutMemory = m_params.memoryPhase3; - memory_size_type finalFanout = - std::min(fanout, - finalFanoutMemory / perFanout); + memory_size_type finalFanout = std::min( + {m_params.filesPhase3 - 1, fanout, finalFanoutMemory / perFanout}); if (finalFanout < 2) { log_error() << "Not enough memory for merging (final fanout < 2). " diff --git a/keyvi/3rdparty/tpie/tpie/serialization_stream.cpp b/keyvi/3rdparty/tpie/tpie/serialization_stream.cpp index 14870b91..128b2925 100644 --- a/keyvi/3rdparty/tpie/tpie/serialization_stream.cpp +++ b/keyvi/3rdparty/tpie/tpie/serialization_stream.cpp @@ -404,7 +404,7 @@ void serialization_reverse_reader::open(temp_file & tempFile) { stream_size_type serialization_reverse_reader::offset() { if (m_blockSize == 0) - return size(); + return 0; // size of blocks not read at all stream_size_type remainingBlocks = m_blockNumber * block_size(); diff --git a/keyvi/3rdparty/tpie/tpie/sort_manager.h b/keyvi/3rdparty/tpie/tpie/sort_manager.h index ba7bd274..458ff12b 100644 --- a/keyvi/3rdparty/tpie/tpie/sort_manager.h +++ b/keyvi/3rdparty/tpie/tpie/sort_manager.h @@ -397,7 +397,7 @@ void sort_manager::compute_sort_params(void){ // number of substreams we want. It may not be able to due to // operating system restrictions, such as on the number of regions // that can be mmap()ed in, max number of file descriptors, etc. - int availableStreams = static_cast(available_files()); + int availableStreams = static_cast(get_file_manager().available()); // Merging requires an available stream/file decriptor for // each of the mrgArity input strems. We need one additional file descriptor diff --git a/keyvi/3rdparty/tpie/tpie/stack.h b/keyvi/3rdparty/tpie/tpie/stack.h index 5c5254ee..b6fbb628 100644 --- a/keyvi/3rdparty/tpie/tpie/stack.h +++ b/keyvi/3rdparty/tpie/tpie/stack.h @@ -289,6 +289,9 @@ class stack { return size(); } + tpie::stack & underlying_stack() { + return m_ulate; + } private: temp_file m_tempFile; diff --git a/keyvi/3rdparty/tpie/tpie/stats.h b/keyvi/3rdparty/tpie/tpie/stats.h index 018f2eac..7b5ffad8 100644 --- a/keyvi/3rdparty/tpie/tpie/stats.h +++ b/keyvi/3rdparty/tpie/tpie/stats.h @@ -74,7 +74,7 @@ class ptime { static ptime now() {return clock::now();} static double seconds(const ptime & t1, const ptime & t2) { - return std::chrono::duration_cast( + return std::chrono::duration_cast>( t2.m_ptime - t1.m_ptime).count(); } @@ -94,7 +94,7 @@ class stat_timer { ~stat_timer() { ptime t2 = ptime::now(); - increment_user(i, ptime::seconds(t1, t2)*1000000); + increment_user(i, (stream_size_type)(ptime::seconds(t1, t2)*1000000)); } private: diff --git a/keyvi/3rdparty/tpie/tpie/stream.h b/keyvi/3rdparty/tpie/tpie/stream.h index ac9354e4..facb3b16 100644 --- a/keyvi/3rdparty/tpie/tpie/stream.h +++ b/keyvi/3rdparty/tpie/tpie/stream.h @@ -138,6 +138,7 @@ class stream { m_stream.seek(offset); } catch(const stream_exception &e) { TP_LOG_WARNING_ID("BTE error - seek failed: " << e.what()); + tpie::unused(e); return BTE_ERROR; } return NO_ERROR; @@ -152,6 +153,7 @@ class stream { m_stream.truncate(offset); } catch(const stream_exception & e) { TP_LOG_WARNING_ID("BTE error - truncate failed: " << e.what()); + tpie::unused(e); return BTE_ERROR; } return NO_ERROR; @@ -182,7 +184,7 @@ class stream { } size_t available_streams(void) { - return available_files(); + return get_file_manager().available(); } memory_size_type chunk_size(void) const { diff --git a/keyvi/3rdparty/tpie/tpie/stream_old.h b/keyvi/3rdparty/tpie/tpie/stream_old.h index 4ecc9f20..eda84ac3 100644 --- a/keyvi/3rdparty/tpie/tpie/stream_old.h +++ b/keyvi/3rdparty/tpie/tpie/stream_old.h @@ -36,7 +36,6 @@ #include #include -#include #include @@ -44,6 +43,8 @@ #include +#include + namespace tpie { namespace ami { @@ -291,7 +292,7 @@ class stream_old { /// of streams currently opened by TPIE. //////////////////////////////////////////////////////////////////////////// size_t available_streams(void) { - return available_files(); + return get_file_manager().available(); } //////////////////////////////////////////////////////////////////////////// diff --git a/keyvi/3rdparty/tpie/tpie/tpie.cpp b/keyvi/3rdparty/tpie/tpie/tpie.cpp index 628cefa7..563bf280 100644 --- a/keyvi/3rdparty/tpie/tpie/tpie.cpp +++ b/keyvi/3rdparty/tpie/tpie/tpie.cpp @@ -36,6 +36,9 @@ static tpie::memory_size_type the_block_size=0; namespace tpie { void tpie_init(flags subsystems) { + if (subsystems & FILE_MANAGER) + init_file_manager(); + if (subsystems & MEMORY_MANAGER) init_memory_manager(); @@ -88,6 +91,9 @@ void tpie_finish(flags subsystems) { if (subsystems & MEMORY_MANAGER) finish_memory_manager(); + if (subsystems & FILE_MANAGER) + finish_file_manager(); + if (subsystems & TEMPFILE) finish_tempfile(); } diff --git a/keyvi/3rdparty/tpie/tpie/tpie.h b/keyvi/3rdparty/tpie/tpie/tpie.h index b25f67a1..7b92e37e 100644 --- a/keyvi/3rdparty/tpie/tpie/tpie.h +++ b/keyvi/3rdparty/tpie/tpie/tpie.h @@ -56,8 +56,11 @@ enum subsystem { HASH=128, /** \brief Generate temporary files */ TEMPFILE=256, + /** \brief Needed for working with files and implicitly by all + * TPIE algorithm and data structure implementations. */ + FILE_MANAGER=512, /** \brief Alias for all default subsystems. */ - ALL=MEMORY_MANAGER | DEFAULT_LOGGING | PROGRESS | PRIMEDB | JOB_MANAGER | STREAMS | HASH | TEMPFILE + ALL=MEMORY_MANAGER | DEFAULT_LOGGING | PROGRESS | PRIMEDB | JOB_MANAGER | STREAMS | HASH | TEMPFILE | FILE_MANAGER }; TPIE_DECLARE_OPERATORS_FOR_FLAGS(subsystem) @@ -88,7 +91,7 @@ memory_size_type get_block_size(); /////////////////////////////////////////////////////////////////////////////// /// \brief Set the TPIE block size. /// -/// It is not safe to change the block size once TPIE has been initialized. +/// It is not safe to change the block size when any streams are open. /////////////////////////////////////////////////////////////////////////////// void set_block_size(memory_size_type block_size); diff --git a/keyvi/3rdparty/tpie/tpie/unittest.h b/keyvi/3rdparty/tpie/tpie/unittest.h index f5cd2552..1cd09869 100644 --- a/keyvi/3rdparty/tpie/tpie/unittest.h +++ b/keyvi/3rdparty/tpie/tpie/unittest.h @@ -36,11 +36,11 @@ typedef std::chrono::high_resolution_clock test_clock; typedef std::chrono::time_point test_time; inline test_time test_now() {return test_clock::now();} inline double test_millisecs(const test_time & from, const test_time & to) { - return std::chrono::duration_cast(to-from).count(); + return std::chrono::duration_cast >(to-from).count(); } inline double test_secs(const test_time & from, const test_time & to) { - return std::chrono::duration_cast(to-from).count(); + return std::chrono::duration_cast>(to-from).count(); } class teststream_buf: public std::basic_streambuf > {