Skip to content

Commit

Permalink
Polynomial API and CUDA backend
Browse files Browse the repository at this point in the history
(1) C++ template API that is backend (GPU, ZPU or other) agnostic
(polynomials.h)
(2) concrete CUDA implementation (polynomial_cuda_backend.cu/cuh)
(3) C API for FFI
(4) Groth16 example (test)
(5) icicle library is now built for tests as well. Polynomial tests are
linked to icicle lib.
  • Loading branch information
yshekel committed Apr 14, 2024
1 parent 0c34092 commit 6e3eecd
Show file tree
Hide file tree
Showing 27 changed files with 3,154 additions and 118 deletions.
20 changes: 10 additions & 10 deletions .github/workflows/cpp_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@ jobs:
- name: Checkout Repo
uses: actions/checkout@v4
- name: Build curve
working-directory: ./icicle/tests
working-directory: ./icicle
if: needs.check-changed-files.outputs.cpp_cuda == 'true'
run: |
mkdir -p build
cmake -DCMAKE_BUILD_TYPE=Release -DCURVE=${{ matrix.curve }} -DG2=ON -S . -B build
cmake --build build
mkdir -p build && rm -rf build/*
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=ON -DCURVE=${{ matrix.curve }} -DG2=ON -S . -B build
cmake --build build -j
- name: Run C++ curve Tests
working-directory: ./icicle/tests/build
working-directory: ./icicle/build/tests
if: needs.check-changed-files.outputs.cpp_cuda == 'true'
run: ctest

Expand All @@ -62,13 +62,13 @@ jobs:
- name: Checkout Repo
uses: actions/checkout@v4
- name: Build field
working-directory: ./icicle/tests
working-directory: ./icicle
if: needs.check-changed-files.outputs.cpp_cuda == 'true'
run: |
mkdir -p build
cmake -DCMAKE_BUILD_TYPE=Release -DFIELD=${{ matrix.field }} -DEXT_FIELD=ON -S . -B build
cmake --build build
mkdir -p build && rm -rf build/*
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=ON -DFIELD=${{ matrix.field }} -DEXT_FIELD=ON -S . -B build
cmake --build build -j
- name: Run C++ field Tests
working-directory: ./icicle/tests/build
working-directory: ./icicle/build/tests
if: needs.check-changed-files.outputs.cpp_cuda == 'true'
run: ctest
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@
**/Cargo.lock
**/icicle/build/
**/wrappers/rust/icicle-cuda-runtime/src/bindings.rs
**/build
**/build*
**/icicle/appUtils/large_ntt/work
icicle/appUtils/large_ntt/work/test_ntt
27 changes: 27 additions & 0 deletions docs/docs/icicle/polynomials/ffi.uml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
@startuml
skinparam componentStyle uml2

' Define Components
component "C++ Template\nComponent" as CppTemplate {
[Parameterizable Interface]
}
component "C API Wrapper\nComponent" as CApiWrapper {
[C API Interface]
}
component "Rust Code\nComponent" as RustCode {
[Macro Interface\n(Template Instantiation)]
}

' Define Artifact
artifact "Static Library\n«artifact»" as StaticLib

' Connections
CppTemplate -down-> CApiWrapper : Instantiates
CApiWrapper .down.> StaticLib : Compiles into
RustCode -left-> StaticLib : Links against\nand calls via FFI

' Notes
note right of CppTemplate : Generic C++\ntemplate implementation
note right of CApiWrapper : Exposes C API for FFI\nto Rust/Go
note right of RustCode : Uses macros to\ninstantiate templates
@enduml
86 changes: 86 additions & 0 deletions docs/docs/icicle/polynomials/hw_backends.uml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
@startuml

' Define Interface for Polynomial Backend Operations
interface IPolynomialBackend {
+add()
+subtract()
+multiply()
+divide()
+evaluate()
}

' Define Interface for Polynomial Context (State Management)
interface IPolynomialContext {
+initFromCoeffs()
+initFromEvals()
+getCoeffs()
+getEvals()
}

' PolynomialAPI now uses two strategies: Backend and Context
class PolynomialAPI {
-backendStrategy: IPolynomialBackend
-contextStrategy: IPolynomialContext
-setBackendStrategy(IPolynomialBackend)
-setContextStrategy(IPolynomialContext)
+add()
+subtract()
+multiply()
+divide()
+evaluate()
}

' Backend Implementations
class GPUPolynomialBackend implements IPolynomialBackend {
#gpuResources: Resource
+add()
+subtract()
+multiply()
+divide()
+evaluate()
}

class ZPUPolynomialBackend implements IPolynomialBackend {
#zpuResources: Resource
+add()
+subtract()
+multiply()
+divide()
+evaluate()
}

class TracerPolynomialBackend implements IPolynomialBackend {
#traceData: Data
+add()
+subtract()
+multiply()
+divide()
+evaluate()
}

' Context Implementations (Placeholder for actual implementation)
class GPUContext implements IPolynomialContext {
+initFromCoeffs()
+initFromEvals()
+getCoeffs()
+getEvals()
}

class ZPUContext implements IPolynomialContext {
+initFromCoeffs()
+initFromEvals()
+getCoeffs()
+getEvals()
}

class TracerContext implements IPolynomialContext {
+initFromCoeffs()
+initFromEvals()
+getCoeffs()
+getEvals()
}

' Relationships
PolynomialAPI o-- IPolynomialBackend : uses
PolynomialAPI o-- IPolynomialContext : uses
@enduml
40 changes: 30 additions & 10 deletions icicle/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,39 +1,59 @@
cmake_minimum_required(VERSION 3.18)

project(icicle LANGUAGES CUDA CXX)

include(cmake/Common.cmake)
include(cmake/FieldsCommon.cmake)
include(cmake/CurvesCommon.cmake)

set_env()
set_gpu_env()

project(icicle LANGUAGES CUDA CXX)

option(DEVMODE "Enable development mode" OFF)
if (DEVMODE)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O0 --ptxas-options=-O0 --ptxas-options=-allow-expensive-optimizations=false -DDEVMODE=ON")
endif ()
option(EXT_FIELD "Build extension field" OFF)
option(G2 "Build G2" OFF)
option(ECNTT "Build ECNTT" OFF)
option(BUILD_HASH "Build hash functions" OFF)
option(BUILD_TESTS "Build unit tests" OFF)
option(BUILD_BENCHMARKS "Build benchmarks" OFF)
# add options here

if((DEFINED CURVE) AND (DEFINED FIELD))
message( FATAL_ERROR "CURVE and FIELD cannot be defined at the same time" )
endif ()

option(EXT_FIELD "Build extension field" OFF)
if (DEVMODE)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O0 --ptxas-options=-O0 --ptxas-options=-allow-expensive-optimizations=false -DDEVMODE=ON")
endif ()

if(DEFINED FIELD)
check_field()
add_subdirectory(src/fields)
endif ()

option(G2 "Build G2" OFF)
option(ECNTT "Build ECNTT" OFF)
if(DEFINED CURVE)
check_curve()
set(FIELD ${CURVE})
add_subdirectory(src/fields)
add_subdirectory(src/curves)
endif ()

option(BUILD_HASH "Build hash functions" OFF)
if (G2)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DG2")
endif ()

if (EXT_FIELD)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DEXT_FIELD")
endif ()

if(BUILD_HASH)
add_subdirectory(src/hash)
endif ()
endif ()

if (BUILD_TESTS)
add_subdirectory(tests)
endif()

if (BUILD_BENCHMARKS)
add_subdirectory(benchmarks)
endif()
30 changes: 2 additions & 28 deletions icicle/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,31 +1,5 @@
cmake_minimum_required(VERSION 3.18)

include(../cmake/Common.cmake)
include(../cmake/CurvesCommon.cmake)
include(../cmake/FieldsCommon.cmake)

set_env()
set_gpu_env()

project(icicle_benchmarks LANGUAGES CUDA CXX)

if(DEFINED CURVE)
if(DEFINED FIELD)
message( FATAL_ERROR "CURVE and FIELD cannot be defined at the same time" )
endif ()
check_curve()
elseif(DEFINED FIELD)
if (EXT_FIELD)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DEXT_FIELD")
endif ()
check_field()
endif ()

add_executable(
benches
benches.cu
)

add_executable(benches benches.cu)
target_link_libraries(benches benchmark::benchmark)

target_include_directories(benches PUBLIC ${CMAKE_SOURCE_DIR}/include/)
find_package(benchmark REQUIRED)
5 changes: 4 additions & 1 deletion icicle/include/curves/projective.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ public:
return {point.x * denom, point.y * denom};
}

static HOST_DEVICE_INLINE Projective from_affine(const Affine<FF>& point) { return {point.x, point.y, FF::one()}; }
static HOST_DEVICE_INLINE Projective from_affine(const Affine<FF>& point)
{
return point == Affine<FF>::zero() ? zero() : Projective{point.x, point.y, FF::one()};
}

static HOST_DEVICE_INLINE Projective ToMontgomery(const Projective& point)
{
Expand Down
2 changes: 1 addition & 1 deletion icicle/include/msm/msm.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ namespace msm {
* @return Default value of [MSMConfig](@ref MSMConfig).
*/
template <typename A>
MSMConfig DefaultMSMConfig();
MSMConfig DefaultMSMConfig(const device_context::DeviceContext& ctx = device_context::get_default_device_context());

/**
* A function that computes MSM: \f$ MSM(s_i, P_i) = \sum_{i=1}^N s_i \cdot P_i \f$.
Expand Down
12 changes: 11 additions & 1 deletion icicle/include/ntt/ntt.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,15 @@ namespace ntt {
template <typename S>
cudaError_t ReleaseDomain(device_context::DeviceContext& ctx);

/* Returns the basic root of unity Wn corresponding to the basic root used to initialize the domain.
* Useful when computing NTT on cosets. In that case we must use the root W_2n that is between W_n and W_n+1.
* @param logn log size of the required root.
* @param ctx Details related to the device such as its id and stream id.
* @return Wn root of unity corresponding to logn and the basic root used for initDomain(root)
*/
template <typename S>
S GetRootOfUnity(uint64_t logn, device_context::DeviceContext& ctx);

/**
* @enum NTTDir
* Whether to perform normal forward NTT, or inverse NTT (iNTT). Mathematically, forward NTT computes polynomial
Expand Down Expand Up @@ -129,7 +138,8 @@ namespace ntt {
* @return Default value of [NTTConfig](@ref NTTConfig).
*/
template <typename S>
NTTConfig<S> DefaultNTTConfig();
NTTConfig<S>
DefaultNTTConfig(const device_context::DeviceContext& ctx = device_context::get_default_device_context());

/**
* A function that computes NTT or iNTT in-place. It's necessary to call [InitDomain](@ref InitDomain) with an
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#pragma once

#include "gpu-utils/device_context.cuh"
#include "fields/field_config.cuh"
#include "polynomials/polynomials.h"

using device_context::DeviceContext;

namespace polynomials {
template <typename C = scalar_t, typename D = C, typename I = C>
class CUDAPolynomialFactory : public AbstractPolynomialFactory<C, D, I>
{
std::vector<DeviceContext> m_device_contexts; // device-id --> device context
std::vector<cudaStream_t> m_device_streams; // device-id --> device stream. Storing the streams here as workaround
// since DeviceContext has a reference to a stream.

public:
CUDAPolynomialFactory();
~CUDAPolynomialFactory();
std::shared_ptr<IPolynomialContext<C, D, I>> create_context() override;
std::shared_ptr<IPolynomialBackend<C, D, I>> create_backend() override;
};
} // namespace polynomials
49 changes: 49 additions & 0 deletions icicle/include/polynomials/polynomial_abstract_factory.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#pragma once

#include "polynomial_context.h"
#include "polynomial_backend.h"
#include <memory> // For std::shared_ptr

namespace polynomials {

/**
* @brief Abstract factory for creating polynomial contexts and backends.
*
* The `AbstractPolynomialFactory` serves as an interface for factories capable of creating
* instances of `IPolynomialContext` and `IPolynomialBackend`. This design allows for the
* decoupling of object creation from their usage, facilitating the implementation of various
* computational strategies (e.g., GPU, ZPU) without altering client code. Each concrete factory
* is expected to provide tailored implementations of polynomial contexts and backends that
* are optimized for specific computational environments.
*
* @tparam C Type of the coefficients.
* @tparam D Domain type, representing the input space of the polynomial.
* @tparam I Image type, representing the output space of the polynomial.
*/
template <typename C, typename D, typename I>
class AbstractPolynomialFactory
{
public:
/**
* @brief Creates and returns a shared pointer to an `IPolynomialContext` instance.
*
* @return std::shared_ptr<IPolynomialContext<C, D, I>> A shared pointer to the created
* polynomial context instance.
*/
virtual std::shared_ptr<IPolynomialContext<C, D, I>> create_context() = 0;

/**
* @brief Creates and returns a shared pointer to an `IPolynomialBackend` instance.
*
* @return std::shared_ptr<IPolynomialBackend<C, D, I>> A shared pointer to the created
* polynomial backend instance.
*/
virtual std::shared_ptr<IPolynomialBackend<C, D, I>> create_backend() = 0;

/**
* @brief Virtual destructor for the `AbstractPolynomialFactory`.
*/
virtual ~AbstractPolynomialFactory() = default;
};

} // namespace polynomials

0 comments on commit 6e3eecd

Please sign in to comment.