Skip to content

Commit

Permalink
Merge pull request #590 from Sonnexo/master
Browse files Browse the repository at this point in the history
Introduce subsystems for memory and loop
  • Loading branch information
lukasrothenberger committed Jun 5, 2024
2 parents 07db7a4 + a7d38d0 commit 5a4ae6e
Show file tree
Hide file tree
Showing 101 changed files with 6,253 additions and 2,338 deletions.
6 changes: 3 additions & 3 deletions DiscoPoP/DiscoPoP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2004,7 +2004,7 @@ void DiscoPoP::dp_reduction_insert_functions() {
llvm::FunctionType *loop_incr_fn_type = llvm::FunctionType::get(
llvm::Type::getVoidTy(*ctx_), loop_incr_fn_args, false);
FunctionCallee incr_loop_counter_callee =
module_->getOrInsertFunction("incr_loop_counter", loop_incr_fn_type);
module_->getOrInsertFunction("__dp_loop_incr", loop_incr_fn_type);

for (auto const &loop_info : loops_) {
llvm::Value *val =
Expand All @@ -2019,11 +2019,11 @@ void DiscoPoP::dp_reduction_insert_functions() {
loop_metadata_file.close();

// add a function to output the final data
// loop_counter_output
// dp_loop_output
llvm::FunctionType *output_fn_type =
llvm::FunctionType::get(llvm::Type::getVoidTy(*ctx_), false);
FunctionCallee loop_counter_output_callee =
module_->getOrInsertFunction("loop_counter_output", output_fn_type);
module_->getOrInsertFunction("__dp_loop_output", output_fn_type);
FunctionCallee cu_taken_branch_counter_output_callee =
module_->getOrInsertFunction("__dp_taken_branch_counter_output",
output_fn_type);
Expand Down
1 change: 1 addition & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ target_sources(
DiscoPoP_BM
PRIVATE
benchmarks.cpp
memory_region_tree/benchmark_memory_region_tree.cpp
perfect_shadow/benchmark_perfect_shadow.cpp
scope/benchmark_scope.cpp)

Expand Down
193 changes: 193 additions & 0 deletions benchmark/memory_region_tree/benchmark_memory_region_tree.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
/*
* This file is part of the DiscoPoP software
* (http://www.discopop.tu-darmstadt.de)
*
* Copyright (c) 2020, Technische Universitaet Darmstadt, Germany
*
* This software may be modified and distributed under the terms of
* the 3-Clause BSD License. See the LICENSE file in the package base
* directory for details.
*
*/

#include <benchmark/benchmark.h>

#include <algorithm>
#include <cstdint>
#include <random>
#include <vector>

#include "../../rtlib/memory/MemoryRegionTree.hpp"

// General functions

static std::vector<ADDR> convert_to_address(const std::int64_t number_iterations) {
auto mt = std::mt19937{0};
auto uid = std::uniform_int_distribution<ADDR>{0, 0x7FFFFFFFFFFFFFFF};

auto addresses = std::vector<ADDR>{};
addresses.resize(number_iterations);

for (auto i = std::int64_t(0); i < number_iterations; i++) {
addresses[i] = uid(mt);
}

std::sort(addresses.begin(), addresses.end());

return addresses;
}

// Benchmarks for old version (i.e., establishing a base line)

static void benchmark_mrt_allocate_region(benchmark::State& state) {
const auto number_iterations = state.range(0);

const auto addresses = convert_to_address(number_iterations * 2);

// This exists so that the destructor call does not interfere with the timing
auto dumping_ground = std::vector<__dp::MemoryRegionTree>{};

for (auto _ : state) {
state.PauseTiming();
auto tree = __dp::MemoryRegionTree{};
state.ResumeTiming();

for (auto i = 0; i < number_iterations * 2; i += 2) {
tree.allocate_region(addresses[i], addresses[i + 1], i + 1);
}

state.PauseTiming();
dumping_ground.emplace_back(std::move(tree));
state.ResumeTiming();
}
}

static void benchmark_mrt_get_memory_region_id(benchmark::State& state) {
const auto number_iterations = state.range(0);

const auto addresses = convert_to_address(number_iterations * 2);

auto tree = __dp::MemoryRegionTree{};

for (auto i = 0; i < number_iterations * 2; i += 2) {
tree.allocate_region(addresses[i], addresses[i + 1], i + 1);
}

for (auto _ : state) {
for (auto i = 0; i < number_iterations * 2; i++) {
benchmark::DoNotOptimize(tree.get_memory_region_id(addresses[i]));
}
}
}

static void benchmark_mrt_get_memory_region_id_string_found(benchmark::State& state) {
const auto number_iterations = state.range(0);

const auto addresses = convert_to_address(number_iterations * 2);

auto tree = __dp::MemoryRegionTree{};

for (auto i = 0; i < number_iterations * 2; i += 2) {
tree.allocate_region(addresses[i], addresses[i + 1], i + 1);
}

// This exists so that the destructor call does not interfere with the timing
auto dumping_ground = std::vector<std::string>{};
dumping_ground.reserve(number_iterations);

for (auto _ : state) {
for (auto i = 0; i < number_iterations * 2; i++) {
dumping_ground.emplace_back(tree.get_memory_region_id_string(addresses[i], "fallback"));
}
}
}

static void benchmark_mrt_get_memory_region_id_string_fallback(benchmark::State& state) {
const auto number_iterations = state.range(0);

const auto addresses = convert_to_address(number_iterations * 2);

auto tree = __dp::MemoryRegionTree{};

for (auto i = 0; i < number_iterations * 2; i += 2) {
tree.allocate_region(addresses[i], addresses[i + 1], i + 1);
}

// This exists so that the destructor call does not interfere with the timing
auto dumping_ground = std::vector<std::string>{};
dumping_ground.reserve(number_iterations);

for (auto _ : state) {
for (auto i = 0; i < number_iterations * 2; i++) {
const auto base_address = addresses[i];
const auto address = (i % 2 == 0) ? base_address - 1 : base_address + 1;

dumping_ground.emplace_back(tree.get_memory_region_id_string(address, "fallback"));
}
}
}

static void benchmark_mrt_destructor(benchmark::State& state) {
const auto number_iterations = state.range(0);

const auto addresses = convert_to_address(number_iterations * 2);

for (auto _ : state) {
state.PauseTiming();
auto tree = __dp::MemoryRegionTree{};
for (auto i = 0; i < number_iterations * 2; i += 2) {
tree.allocate_region(addresses[i], addresses[i + 1], i + 1);
}
state.ResumeTiming();
}
}

static void benchmark_mrt_free_region(benchmark::State& state) {
const auto number_iterations = state.range(0);

const auto addresses = convert_to_address(number_iterations * 2);

// This exists so that the destructor call does not interfere with the timing
auto dumping_ground = std::vector<__dp::MemoryRegionTree>{};

auto tree = __dp::MemoryRegionTree{};

for (auto i = 0; i < number_iterations * 2; i += 2) {
tree.allocate_region(addresses[i], addresses[i + 1], i + 1);
}

for (auto _ : state) {
state.PauseTiming();
auto tree = __dp::MemoryRegionTree{};
for (auto i = 0; i < number_iterations * 2; i += 2) {
tree.allocate_region(addresses[i], addresses[i + 1], i + 1);
}
state.ResumeTiming();

for (auto i = 0; i < number_iterations * 2; i += 2) {
tree.free_region(addresses[i]);
}

state.PauseTiming();
dumping_ground.emplace_back(std::move(tree));
state.ResumeTiming();
}
}

BENCHMARK(benchmark_mrt_allocate_region)->Unit(benchmark::kMillisecond)->Arg(32)->Iterations(100);
BENCHMARK(benchmark_mrt_allocate_region)->Unit(benchmark::kMillisecond)->Arg(1024)->Iterations(100);

BENCHMARK(benchmark_mrt_get_memory_region_id)->Unit(benchmark::kMillisecond)->Arg(32)->Iterations(100);
BENCHMARK(benchmark_mrt_get_memory_region_id)->Unit(benchmark::kMillisecond)->Arg(1024)->Iterations(100);

BENCHMARK(benchmark_mrt_get_memory_region_id_string_found)->Unit(benchmark::kMillisecond)->Arg(32)->Iterations(100);
BENCHMARK(benchmark_mrt_get_memory_region_id_string_found)->Unit(benchmark::kMillisecond)->Arg(1024)->Iterations(100);

BENCHMARK(benchmark_mrt_get_memory_region_id_string_fallback)->Unit(benchmark::kMillisecond)->Arg(32)->Iterations(100);
BENCHMARK(benchmark_mrt_get_memory_region_id_string_fallback)->Unit(benchmark::kMillisecond)->Arg(1024)->Iterations(100);

BENCHMARK(benchmark_mrt_destructor)->Unit(benchmark::kMillisecond)->Arg(32)->Iterations(100);
BENCHMARK(benchmark_mrt_destructor)->Unit(benchmark::kMillisecond)->Arg(1024)->Iterations(100);

BENCHMARK(benchmark_mrt_free_region)->Unit(benchmark::kMillisecond)->Arg(32)->Iterations(100);
BENCHMARK(benchmark_mrt_free_region)->Unit(benchmark::kMillisecond)->Arg(1024)->Iterations(100);
2 changes: 1 addition & 1 deletion benchmark/perfect_shadow/benchmark_perfect_shadow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include <cstdint>
#include <vector>

#include "../../rtlib/perfect_shadow.hpp"
#include "../../rtlib/memory/PerfectShadow.hpp"

// General functions

Expand Down
2 changes: 1 addition & 1 deletion benchmark/scope/benchmark_scope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include <cstdint>
#include <vector>

#include "../../rtlib/scope.hpp"
#include "../../rtlib/memory/Scope.hpp"

// General functions

Expand Down
78 changes: 39 additions & 39 deletions docs/setup/discopop.md
Original file line number Diff line number Diff line change
@@ -1,39 +1,39 @@
---
layout: default
title: DiscoPoP
parent: Setup
nav_order: 1
---

# DiscoPoP Setup
## Prerequisites
- LLVM/clang version 11
- Python version 3.6 or greater

## Setup
```
git clone git@github.com:discopop-project/discopop.git
cd discopop
mkdir build
```

## Build libraries and install Python modules
```
cd build
cmake .. <CMAKE_FLAGS>
make
cd ..
```

where `<CMAKE_FLAGS>` can consist of any combination of the following flags and commonly used CMAKE_FLAGS:
- In case you want to use a specific Version of LLVM, it is possible to specify the `-DUSE_LLVM_VERSION=<version>` flag.
- In case you want to use a specific LLVM installation, specify the location via the `-DLLVM_DIST_PATH=<llvm_base_dir>` flag.
- In case your application uses PThreads, please specify `-DDP_PTHREAD_COMPATIBILITY_MODE=[0|1]`. Note, however, that this can influence the runtime of the profiling.
- In case you require a more verbose output of the runtime library, specify the `-DDP_RTLIB_VERBOSE=[0|1]` flag.
- In case you want to specify the number of Workers available for the profiling step, specify the `-DDP_NUM_WORKERS=<int>` flag. By default, `3` worker threads are used to analyze the observed memory accesses. `0` might be used to disable the creation of additional threads for the analysis.

## Testing the installation
To test the installation, it is possible to execute the provided set of unit tests.
```
python -m unittest -v
```
---
layout: default
title: DiscoPoP
parent: Setup
nav_order: 1
---

# DiscoPoP Setup
## Prerequisites
- LLVM/clang version 11
- Python version 3.6 or greater

## Setup
```
git clone git@github.com:discopop-project/discopop.git
cd discopop
mkdir build
```

## Build libraries and install Python modules
```
cd build
cmake .. <CMAKE_FLAGS>
make
cd ..
```

where `<CMAKE_FLAGS>` can consist of any combination of the following flags and commonly used CMAKE_FLAGS:
- In case you want to use a specific Version of LLVM, it is possible to specify the `-DUSE_LLVM_VERSION=<version>` flag.
- In case you want to use a specific LLVM installation, specify the location via the `-DLLVM_DIST_PATH=<llvm_base_dir>` flag.
- In case your application uses PThreads, please specify `-DDP_PTHREAD_COMPATIBILITY_MODE=[0|1]`. Note, however, that this can influence the runtime of the profiling.
- In case you require a more verbose output of the runtime library, specify the `-DDP_RTLIB_VERBOSE=[0|1]` flag.
- In case you want to specify the number of Workers available for the profiling step, specify the `-DDP_NUM_WORKERS=<int>` flag. By default, `3` worker threads are used to analyze the observed memory accesses. `0` might be used to disable the creation of additional threads for the analysis.

## Testing the installation
To test the installation, it is possible to execute the provided set of unit tests.
```
python -m unittest -v
```
45 changes: 23 additions & 22 deletions rtlib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,29 @@
set(DiscoPoP_SOURCES
iFunctions.cpp
iFunctionsGlobals.cpp
iFunctionsTypes.cpp
signature.cpp
loop_counter.cpp
cu_taken_branch_counter.cpp
../share/lib/timer.cpp
MemoryRegionTree.cpp

memory/MemoryManager.cpp
memory/Signature.cpp

functions/dp_add_bb_deps.cpp
functions/dp_alloca.cpp
functions/dp_call.cpp
functions/dp_decl.cpp
functions/dp_delete.cpp
functions/dp_finalize.cpp
functions/dp_func_entry.cpp
functions/dp_func_exit.cpp
functions/dp_loop_entry.cpp
functions/dp_loop_exit.cpp
functions/dp_new.cpp
functions/dp_read.cpp
functions/dp_report_bb.cpp
functions/dp_report_bb_pair.cpp
functions/dp_write.cpp
injected_functions/dp_add_bb_deps.cpp
injected_functions/dp_alloca.cpp
injected_functions/dp_call.cpp
injected_functions/dp_decl.cpp
injected_functions/dp_delete.cpp
injected_functions/dp_finalize.cpp
injected_functions/dp_func_entry.cpp
injected_functions/dp_func_exit.cpp
injected_functions/dp_incr_taken_branch_counter.cpp
injected_functions/dp_loop_entry.cpp
injected_functions/dp_loop_exit.cpp
injected_functions/dp_loop_incr.cpp
injected_functions/dp_loop_output.cpp
injected_functions/dp_new.cpp
injected_functions/dp_read.cpp
injected_functions/dp_report_bb.cpp
injected_functions/dp_report_bb_pair.cpp
injected_functions/dp_taken_branch_counter_output.cpp
injected_functions/dp_write.cpp
)

set(CMAKE_CXX_FLAGS
Expand Down Expand Up @@ -70,4 +71,4 @@ install(TARGETS DiscoPoP_RT ARCHIVE DESTINATION lib)
# compile simple-alias-detection
#exec_program(${CMAKE_CURRENT_SOURCE_DIR}/simple-alias-detection/compile.sh
# ARGS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}
#)
#)
6 changes: 6 additions & 0 deletions rtlib/DPTypes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@

// #define SKIP_DUP_INSTR 1

// To manually enable/disable internal timing
// #define DP_SKIP_INTERNAL_TIMER
#ifndef DP_SKIP_INTERNAL_TIMER
#define DP_INTERNAL_TIMER
#endif

typedef std::int64_t LID;
typedef std::int64_t ADDR;
typedef std::int64_t sigElement;
Expand Down

0 comments on commit 5a4ae6e

Please sign in to comment.