Skip to content

Commit

Permalink
separable compilation for Rust (#244)
Browse files Browse the repository at this point in the history
separable compilation for Rust #244
  • Loading branch information
vhnatyk committed Nov 16, 2023
1 parent 29cad66 commit 7baea7c
Show file tree
Hide file tree
Showing 10 changed files with 163 additions and 63 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/main-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,11 @@ jobs:
cuda: '12.0.0'
method: 'network'
# https://docs.nvidia.com/cuda/archive/12.0.0/cuda-installation-guide-microsoft-windows/index.html
sub-packages: '["cudart", "nvcc", "thrust"]'
sub-packages: '["cudart", "nvcc", "thrust", "visual_studio_integration"]'
- name: Build Rust Targets
if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
env:
CUDA_PATH: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}
run: cargo build --release --verbose

build-golang-linux:
Expand Down
5 changes: 2 additions & 3 deletions .github/workflows/main-format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,5 @@ jobs:
- name: Checkout
uses: actions/checkout@v3
- name: Check clang-format
run: |
if [[ $(find ./ -path ./icicle/build -prune -o -path ./target -prune -iname *.h -or -iname *.cuh -or -iname *.cu -or -iname *.c -or -iname *.cpp | xargs clang-format --dry-run -ferror-limit=1 -style=file 2>&1) ]]; then echo "Please run clang-format"; exit 1; fi
run: unformatted_files=$(find ./ -path ./icicle/build -prune -o -path ./target -prune -iname *.h -or -iname *.cuh -or -iname *.cu -or -iname *.c -or -iname *.cpp | xargs clang-format --dry-run -ferror-limit=1 -style=file >&2); if [[ $unformatted_files ]]; then echo $unformatted_files; echo "Please run clang-format"; exit 1; fi

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point

[build-dependencies]
cc = { version = "1.0", features = ["parallel"] }
cmake = "0.1.50"

[dev-dependencies]
"criterion" = "0.4.0"
Expand Down
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ The script does the following:
- Creates a file with the curve name in ``src/curves`` with the relevant objects for the curve.
- Creates a test file with the curve name in ``src``.

Also files from ``./icicle/curves/<curve_name>/supported_operations.cu`` should be added individually to ``add_library`` section of [``./icicle/CMakeLists.txt``][CMAKELISTS]

Testing the new curve could be done by running the tests in ``tests_curve_name`` (e.g. ``tests_bls12_381``).

## Docker
Expand Down Expand Up @@ -207,6 +209,12 @@ If you are changing code, please make sure to change your [git hooks path][HOOKS
git config core.hooksPath ./scripts/hooks
```

In case `clang-format` is missing on your system, you can install it using the following command:

```sh
sudo apt install clang-format
```

This will ensure our custom hooks are run and will make it easier to follow our coding guidelines.

### Hall of Fame
Expand Down Expand Up @@ -241,6 +249,7 @@ See [LICENSE-MIT][LMIT] for details.
[googletest]: https://github.com/google/googletest/
[HOOKS_DOCS]: https://git-scm.com/docs/githooks
[HOOKS_PATH]: ./scripts/hooks/
[CMAKELISTS]: https://github.com/ingonyama-zk/icicle/blob/f0e6b465611227b858ec4590f4de5432e892748d/icicle/CMakeLists.txt#L28
[GOOGLE_COLAB_ICICLE]: https://github.com/gkigiermo/rust-cuda-colab
[GRANT_PROGRAM]: https://docs.google.com/forms/d/e/1FAIpQLSc967TnNwxZZ4akejcSi4KOUmGrEc68ZZV-FHLfo8KnP1wbpg/viewform

Expand Down
60 changes: 41 additions & 19 deletions build.rs
Original file line number Diff line number Diff line change
@@ -1,31 +1,53 @@
use std::env;
use std::env::{self, var};

use cmake::Config;

fn main() {
//TODO: check cargo features selected
//TODO: can conflict/duplicate with make ?
let cargo_dir = var("CARGO_MANIFEST_DIR").unwrap();
let profile = var("PROFILE").unwrap();

let target_output_dir = format!("{}/target/{}", cargo_dir, profile);
let build_output_dir = format!("{}/build", target_output_dir);

println!("cargo:rerun-if-env-changed=CXXFLAGS");
println!("cargo:rerun-if-changed=./icicle");
println!("cargo:rerun-if-changed=./target/{}", profile); // without this it ignores manual changes to build folder

let arch_type = env::var("ARCH_TYPE").unwrap_or(String::from("native"));
let stream_type = env::var("DEFAULT_STREAM").unwrap_or(String::from("legacy"));

let mut arch = String::from("-arch=");
arch.push_str(&arch_type);
let mut stream = String::from("-default-stream=");
stream.push_str(&stream_type);
let mut cmake = Config::new("./icicle");
cmake
.define("BUILD_TESTS", "OFF")
.out_dir(&target_output_dir)
.build_target("icicle");

let mut nvcc = cc::Build::new();
let target_profile: &str = if profile == "release" { "Release" } else { "Debug" };

println!("Compiling icicle library using arch: {}", &arch);
cmake.define("CMAKE_BUILD_TYPE", target_profile);

if cfg!(feature = "g2") {
nvcc.define("G2_DEFINED", None);
cmake.define("G2_DEFINED", "");
}

cmake.build();

if cfg!(unix) {
if let Ok(cuda_path) = var("CUDA_HOME") {
println!("cargo:rustc-link-search=native={}/lib64", cuda_path);
} else {
println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64");
}
} else if cfg!(windows) {
let build_output_dir_cmake = format!("{}/{}", build_output_dir, target_profile);

println!("cargo:rustc-link-search={}", &build_output_dir_cmake);
}

println!("cargo:rustc-link-search={}", &build_output_dir);
println!("cargo:rustc-link-search={}", &target_output_dir);
println!("cargo:rustc-link-lib=ingo_icicle");
println!("cargo:rustc-link-lib=dylib=cuda");
println!("cargo:rustc-link-lib=dylib=cudart");

if cfg!(unix) {
println!("cargo:rustc-link-lib=dylib=stdc++");
}
nvcc.cuda(true);
nvcc.debug(false);
nvcc.flag(&arch);
nvcc.flag(&stream);
nvcc.files(["./icicle/curves/index.cu"]);
nvcc.compile("ingo_icicle"); //TODO: extension??
}
117 changes: 91 additions & 26 deletions icicle/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,44 +5,109 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)

# add the target cuda architectures
# each additional architecture increases the compilation time and output file size
if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
if(${CMAKE_VERSION} VERSION_LESS "3.24.0")
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
else()
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
endif ()
find_program(_nvidia_smi "nvidia-smi")

if(_nvidia_smi)
set(DETECT_GPU_COUNT_NVIDIA_SMI 0)

# execute nvidia-smi -L to get a short list of GPUs available
exec_program(${_nvidia_smi_path} ARGS -L
OUTPUT_VARIABLE _nvidia_smi_out
RETURN_VALUE _nvidia_smi_ret)

# process the stdout of nvidia-smi
if(_nvidia_smi_ret EQUAL 0)
# convert string with newlines to list of strings
string(REGEX REPLACE "\n" ";" _nvidia_smi_out "${_nvidia_smi_out}")

foreach(_line ${_nvidia_smi_out})
if(_line MATCHES "^GPU [0-9]+:")
math(EXPR DETECT_GPU_COUNT_NVIDIA_SMI "${DETECT_GPU_COUNT_NVIDIA_SMI}+1")

# the UUID is not very useful for the user, remove it
string(REGEX REPLACE " \\(UUID:.*\\)" "" _gpu_info "${_line}")

if(NOT _gpu_info STREQUAL "")
list(APPEND DETECT_GPU_INFO "${_gpu_info}")
endif()
endif()
endforeach()

check_num_gpu_info(${DETECT_GPU_COUNT_NVIDIA_SMI} DETECT_GPU_INFO)
set(DETECT_GPU_COUNT ${DETECT_GPU_COUNT_NVIDIA_SMI})
endif()
endif()

# ##
if(DETECT_GPU_COUNT GREATER 0)
set(CMAKE_CUDA_ARCHITECTURES native) # do native
else()
# no GPUs found, like on Github CI runners
set(CMAKE_CUDA_ARCHITECTURES 50) # some safe value
endif()
endif()

project(icicle LANGUAGES CUDA CXX)

set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
set(CMAKE_CUDA_FLAGS_RELEASE "")
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")

include(FetchContent)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/refs/tags/v1.13.0.zip
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
option(BUILD_TESTS "Build tests" OFF)

# boosting lib
include_directories("/home/miner/include/boost_1_80_0")
if(NOT BUILD_TESTS)
add_library(
icicle STATIC
curves/bn254/lde.cu
curves/bn254/msm.cu
curves/bn254/projective.cu
curves/bn254/ve_mod_mult.cu
curves/bls12_377/lde.cu
curves/bls12_377/msm.cu
curves/bls12_377/projective.cu
curves/bls12_377/ve_mod_mult.cu
curves/bls12_381/lde.cu
curves/bls12_381/msm.cu
curves/bls12_381/projective.cu
curves/bls12_381/ve_mod_mult.cu
curves/bls12_381/poseidon.cu
)

set_target_properties(icicle PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(icicle PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
set_target_properties(icicle PROPERTIES OUTPUT_NAME "ingo_icicle")
target_compile_options(icicle PRIVATE -c)

else()
include(FetchContent)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/refs/tags/v1.13.0.zip
)

set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
# For Windows: Prevent overriding the parent project's compiler/linker settings
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)

enable_testing()
enable_testing()

add_executable(
primitives_test
primitives/test.cu
)
target_link_libraries(
primitives_test
GTest::gtest_main
)
add_executable(
primitives_test
primitives/test.cu
)
target_link_libraries(
primitives_test
GTest::gtest_main
)

include(GoogleTest)
set_target_properties(primitives_test PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
include(GoogleTest)
set_target_properties(primitives_test PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

gtest_discover_tests(primitives_test)
gtest_discover_tests(primitives_test)
endif()
2 changes: 1 addition & 1 deletion icicle/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ mkdir -p build; cmake -S . -B build; cmake --build build; cd build && ctest; cd

Before proceeding, make sure the following software installed:

1. CMake at least version 3.16, which can be downloaded from [cmake.org](https://cmake.org/files/)
1. CMake at least version 3.18, which can be downloaded from [cmake.org](https://cmake.org/files/)
It is recommended to have the latest version installed.
2. [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu) version 12.0 or newer.
3. GCC - version 9 or newer recommended.
Expand Down
16 changes: 7 additions & 9 deletions icicle/appUtils/msm/msm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,8 @@ __global__ void add_ones_kernel(A* points, S* scalars, P* results, const unsigne
results[tid] = sum;
}

template <typename S>
__global__ void
find_cutoff_kernel(unsigned* v, unsigned size, unsigned cutoff, unsigned run_length, S* fake_param, unsigned* result)
__global__ __forceinline__ void
find_cutoff_kernel(unsigned* v, unsigned size, unsigned cutoff, unsigned run_length, unsigned* result)
{
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
const unsigned nof_threads = (size + run_length - 1) / run_length;
Expand All @@ -168,9 +167,8 @@ find_cutoff_kernel(unsigned* v, unsigned size, unsigned cutoff, unsigned run_len
if (tid == 0 && v[size - 1] > cutoff) { result[0] = size; }
}

template <typename S>
__global__ void find_max_size(
unsigned* bucket_sizes, unsigned* single_bucket_indices, unsigned c, S* fake_param, unsigned* largest_bucket_size)
__global__ __forceinline__ void
find_max_size(unsigned* bucket_sizes, unsigned* single_bucket_indices, unsigned c, unsigned* largest_bucket_size)
{
for (int i = 0;; i++) {
if (single_bucket_indices[i] & ((1 << c) - 1)) {
Expand Down Expand Up @@ -528,14 +526,14 @@ void bucket_method_msm(
NUM_THREADS = min(1 << 5, cutoff_nof_runs);
NUM_BLOCKS = (cutoff_nof_runs + NUM_THREADS - 1) / NUM_THREADS;
find_cutoff_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(
sorted_bucket_sizes, h_nof_buckets_to_compute, bucket_th, cutoff_run_length, d_scalars, nof_large_buckets);
sorted_bucket_sizes, h_nof_buckets_to_compute, bucket_th, cutoff_run_length, nof_large_buckets);

unsigned h_nof_large_buckets;
cudaMemcpyAsync(&h_nof_large_buckets, nof_large_buckets, sizeof(unsigned), cudaMemcpyDeviceToHost, stream);

unsigned* max_res;
cudaMallocAsync(&max_res, sizeof(unsigned) * 2, stream);
find_max_size<<<1, 1, 0, stream>>>(sorted_bucket_sizes, sorted_single_bucket_indices, c, d_scalars, max_res);
find_max_size<<<1, 1, 0, stream>>>(sorted_bucket_sizes, sorted_single_bucket_indices, c, max_res);

unsigned h_max_res[2];
cudaMemcpyAsync(h_max_res, max_res, sizeof(unsigned) * 2, cudaMemcpyDeviceToHost, stream);
Expand Down Expand Up @@ -967,7 +965,7 @@ void reference_msm(S* scalars, A* a_points, unsigned size)
std::cout << P::to_affine(res) << std::endl;
}

unsigned get_optimal_c(const unsigned size)
unsigned inline get_optimal_c(const unsigned size)
{
if (size < 17) return 1;
// return 17;
Expand Down
6 changes: 3 additions & 3 deletions icicle/utils/error_handler.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#define CHECK_CUDA_ERROR(val) check((val), #val, __FILE__, __LINE__)
template <typename T>
void check(T err, const char* const func, const char* const file, const int line)
void inline check(T err, const char* const func, const char* const file, const int line)
{
if (err != cudaSuccess) {
std::cerr << "CUDA Runtime Error at: " << file << ":" << line << std::endl;
Expand All @@ -12,7 +12,7 @@ void check(T err, const char* const func, const char* const file, const int line
}

#define CHECK_LAST_CUDA_ERROR() checkLast(__FILE__, __LINE__)
void checkLast(const char* const file, const int line)
void inline checkLast(const char* const file, const int line)
{
cudaError_t err{cudaGetLastError()};
if (err != cudaSuccess) {
Expand All @@ -22,7 +22,7 @@ void checkLast(const char* const file, const int line)
}

#define CHECK_SYNC_DEVICE_ERROR() syncDevice(__FILE__, __LINE__)
void syncDevice(const char* const file, const int line)
void inline syncDevice(const char* const file, const int line)
{
cudaError_t err{cudaDeviceSynchronize()};
if (err != cudaSuccess) {
Expand Down
6 changes: 5 additions & 1 deletion scripts/hooks/pre-push
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
status=0
# Run clang-format on CUDA, C, and CPP files
# clang-format writes to stderr in dry-run mode. In order to capture the output to detect if there are changes needed we redirect stderr to stdin
if [[ $(find ./ -path ./icicle/build -prune -o -path ./target -prune -iname *.h -or -iname *.cuh -or -iname *.cu -or -iname *.c -or -iname *.cpp | xargs clang-format --dry-run -ferror-limit=1 -style=file 2>&1) ]];
# to print list of files
unformatted_files=$(find ./ -path ./icicle/build -prune -o -path ./target -prune -iname *.h -or -iname *.cuh -or -iname *.cu -or -iname *.c -or -iname *.cpp | xargs clang-format --dry-run -ferror-limit=1 -style=file 2>&1)

if [[ $unformatted_files ]];
then
echo "🚨 There are files in Icicle Core that need formatting."
echo $unformatted_files
echo "Please format all .c, .cpp, .h, .cu, .cuh files using the following command:"
echo "find ./ -path ./icicle/build -prune -o -path ./target -prune -iname *.h -or -iname *.cuh -or -iname *.cu -or -iname *.c -or -iname *.cpp | xargs clang-format -i -style=file"
status=1
Expand Down

0 comments on commit 7baea7c

Please sign in to comment.