Skip to content
This repository has been archived by the owner on Mar 3, 2020. It is now read-only.

Commit

Permalink
daemon randomly crashes and its good this time!
Browse files Browse the repository at this point in the history
  • Loading branch information
isabelsavannah committed Sep 13, 2018
1 parent 2dead1f commit 9dc3d5d
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 4 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,5 +95,6 @@ add_subdirectory(swarm)
add_subdirectory(utils)
add_subdirectory(audit)
add_subdirectory(pbft)
add_subdirectory(chaos)

include(cmake/static_analysis.cmake)
6 changes: 6 additions & 0 deletions chaos/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
add_library(chaos
chaos.hpp
chaos.cpp
)

target_link_libraries(chaos)
79 changes: 79 additions & 0 deletions chaos/chaos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,83 @@
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

#include <random>
#include <boost/format.hpp>
#include <chaos/chaos.hpp>

using namespace bzn;

namespace stub
{
const bool chaos_enabled = true;

/*
* 10% will fail within a couple minutes
* 20% will fail within the first hour
* 40% will last 1-12 hours
* 20% will last 12-48 hours
* 10% will last 48+ hours
*/
const double weibull_shape = 0.5;
const double weibull_scale_hours = 10;

}

chaos::chaos(std::shared_ptr<bzn::asio::io_context_base> io_context, const bzn::options_base& options)
: io_context(io_context)
, options(options)
, crash_timer(io_context->make_unique_steady_timer())
{
// We don't need cryptographically secure randomness here, but it does need to be of reasonable quality and differ across processes
std::random_device rd;
this->random.seed(rd());
}

void
chaos::start()
{
std::call_once(this->start_once,
[this]()
{
this->start_crash_timer();
});
}

void
chaos::start_crash_timer()
{
if (!stub::chaos_enabled)
{
return;
}

std::weibull_distribution<double> distribution(stub::weibull_shape, stub::weibull_scale_hours);

double hours_until_crash = distribution(this->random);
LOG(info) << boost::format("Chaos module will trigger this node crashing in %1$.2f hours") % hours_until_crash;

auto time_until_crash = std::chrono::duration<double, std::chrono::hours::period>(hours_until_crash);
LOG(info) << time_until_crash.count();
LOG(info) << std::chrono::duration_cast<std::chrono::milliseconds>(time_until_crash).count();

this->crash_timer->expires_from_now(std::chrono::duration_cast<std::chrono::milliseconds>(time_until_crash));

// Doing this with this timer means that crashes will only occur at times where boost schedules
// a new callback to take place, rather than truly at random.
this->crash_timer->async_wait(std::bind(&chaos::handle_crash_timer, shared_from_this(), std::placeholders::_1));
}

void
chaos::handle_crash_timer(const boost::system::error_code& /*ec*/)
{
if (!stub::chaos_enabled)
{
return;
}

LOG(fatal) << "Chaos module triggering node crash";
// This log message may not actually be printed, because...

std::abort();
// Intentionally crashing abruptly
}
22 changes: 19 additions & 3 deletions chaos/chaos.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,30 @@

#pragma once

#include <mutex>
#include <options/options_base.hpp>
#include <include/boost_asio_beast.hpp>
#include <random>

namespace bzn
{
class chaos
class chaos : public std::enable_shared_from_this<chaos>
{
public:
chaos(const bzn::options_base& options);
start();
chaos(std::shared_ptr<bzn::asio::io_context_base> io_context, const bzn::options_base& options);
void start();

private:
void start_crash_timer();
void handle_crash_timer(const boost::system::error_code&);

std::once_flag start_once;

const std::shared_ptr<bzn::asio::io_context_base> io_context;
const bzn::options_base& options;

std::unique_ptr<bzn::asio::steady_timer_base> crash_timer;

std::mt19937 random;
};
}
2 changes: 1 addition & 1 deletion swarm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
add_executable(swarm main.cpp)
add_dependencies(swarm jsoncpp) # rocksdb
target_include_directories(swarm PRIVATE ${JSONCPP_INCLUDE_DIRS})
target_link_libraries(swarm node http raft pbft audit crud options ethereum bootstrap storage proto protobuf.a status ${Boost_LIBRARIES} ${JSONCPP_LIBRARIES} pthread)
target_link_libraries(swarm node http raft pbft audit crud options ethereum bootstrap storage proto protobuf.a status chaos ${Boost_LIBRARIES} ${JSONCPP_LIBRARIES} pthread)
3 changes: 3 additions & 0 deletions swarm/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <pbft/pbft.hpp>
#include <pbft/pbft_failure_detector.hpp>
#include <raft/raft.hpp>
#include <chaos/chaos.hpp>


void
Expand Down Expand Up @@ -216,9 +217,11 @@ main(int argc, const char* argv[])
auto websocket = std::make_shared<bzn::beast::websocket>();
auto node = std::make_shared<bzn::node>(io_context, websocket, options.get_ws_idle_timeout(), boost::asio::ip::tcp::endpoint{options.get_listener()});
auto audit = std::make_shared<bzn::audit>(io_context, node, options.get_monitor_endpoint(io_context), options.get_uuid(), options.get_audit_mem_size(), options.pbft_enabled());
auto chaos = std::make_shared<bzn::chaos>(io_context, options);

node->start();
audit->start();
chaos->start();

if (options.pbft_enabled())
{
Expand Down

0 comments on commit 9dc3d5d

Please sign in to comment.