Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add support for cublas Lt #182

Merged
merged 11 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/cargo-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ jobs:
- uses: actions-rs/cargo@v1
with:
command: check
args: --no-default-features --features ci-check,no-std,cudnn,cublas,nvrtc,driver,curand,nccl
args: --no-default-features --features ci-check,no-std,cudnn,cublas,cublaslt,nvrtc,driver,curand,nccl
2 changes: 1 addition & 1 deletion .github/workflows/cargo-clippy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ jobs:
- uses: actions-rs/cargo@v1
with:
command: clippy
args: --no-default-features --features ci-check,no-std,cudnn,cublas,nvrtc,driver,curand,nccl -- -D warnings
args: --no-default-features --features ci-check,no-std,cudnn,cublas,cublaslt,nvrtc,driver,curand,nccl -- -D warnings
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ default = ["std", "driver", "nvrtc", "cublas", "curand"]
nvrtc = []
driver = ["nvrtc"]
cublas = ["driver"]
cublaslt = ["driver"]
cudnn = ["driver"]
curand = ["driver"]
nccl = ["driver"]
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Safe abstractions over:
2. [NVRTC API](https://docs.nvidia.com/cuda/nvrtc/index.html)
3. [cuRAND API](https://docs.nvidia.com/cuda/curand/index.html)
4. [cuBLAS API](https://docs.nvidia.com/cuda/cublas/index.html)
5. [cuBLASLt API](https://docs.nvidia.com/cuda/cublas/#using-the-cublaslt-api)

**Pre-alpha state**, expect breaking changes and not all cuda functions
contain a safe wrapper. **Contributions welcome for any that aren't included!**
Expand All @@ -27,6 +28,7 @@ To that end there are three levels to each wrapper (by default the safe api is e
use cudarc::driver::{safe, result, sys};
use cudarc::nvrtc::{safe, result, sys};
use cudarc::cublas::{safe, result, sys};
use cudarc::cublaslt::{safe, result, sys};
use cudarc::curand::{safe, result, sys};
```

Expand Down
26 changes: 18 additions & 8 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,27 +30,37 @@ fn link_cuda() {

#[cfg(feature = "driver")]
println!("cargo:rustc-link-lib=dylib=cuda");
#[cfg(feature = "nvrtc")]
println!("cargo:rustc-link-lib=dylib=nvrtc");
#[cfg(feature = "curand")]
println!("cargo:rustc-link-lib=dylib=curand");
#[cfg(feature = "nccl")]
println!("cargo:rustc-link-lib=dylib=nccl");

#[cfg(feature = "static-linking")]
{
#[cfg(feature = "cublas")]
println!("cargo:rustc-link-lib=dylib=stdc++");
#[cfg(any(feature = "cublas", feature = "cublaslt"))] {
println!("cargo:rustc-link-lib=dylib=cudart");
println!("cargo:rustc-link-lib=static=cublasLt_static");
}
#[cfg(feature = "cublas")]
println!("cargo:rustc-link-lib=static=cublas_static");
#[cfg(feature = "cublas")]
println!("cargo:rustc-link-lib=static=cublasLt_static");
#[cfg(feature = "curand")] {
println!("cargo:rustc-link-lib=dylib=culibos");
println!("cargo:rustc-link-lib=static=curand_static");
}
#[cfg(feature = "nvrtc")] {
println!("cargo:rustc-link-lib=static=nvrtc_static");
println!("cargo:rustc-link-lib=static=nvptxcompiler_static");
println!("cargo:rustc-link-lib=static=nvrtc-builtins_static");
}
}
#[cfg(not(feature = "static-linking"))]
{
#[cfg(feature = "nvrtc")]
println!("cargo:rustc-link-lib=dylib=nvrtc");
#[cfg(feature = "curand")]
println!("cargo:rustc-link-lib=dylib=curand");
#[cfg(feature = "cublas")]
println!("cargo:rustc-link-lib=dylib=cublas");
#[cfg(feature = "cublas")]
#[cfg(any(feature = "cublas", feature = "cublaslt"))]
println!("cargo:rustc-link-lib=dylib=cublasLt");
}

Expand Down
17 changes: 17 additions & 0 deletions src/cublaslt/bindgen.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash
# Requires rust-bindgen 0.68.1 or superior
set -exu
BINDGEN_EXTRA_CLANG_ARGS="-D__CUDA_BF16_TYPES_EXIST__" \
bindgen \
--allowlist-type="^cublasLt.*" \
--allowlist-var="^cublasLt.*" \
--allowlist-function="^cublasLt.*" \
--default-enum-style=rust \
--no-doc-comments \
--with-derive-default \
--with-derive-eq \
--with-derive-hash \
--with-derive-ord \
--use-core \
wrapper.h -- -I/usr/local/cuda/include \
> sys.rs
6 changes: 6 additions & 0 deletions src/cublaslt/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pub mod result;
pub mod safe;
#[allow(warnings)]
pub mod sys;

pub use safe::*;
233 changes: 233 additions & 0 deletions src/cublaslt/result.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
use super::sys;
use crate::cublaslt::sys::cublasLtMatmulAlgo_t;
use core::ffi::c_void;
use core::mem::MaybeUninit;

#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct CublasError(pub sys::cublasStatus_t);

impl sys::cublasStatus_t {
pub fn result(self) -> Result<(), CublasError> {
match self {
sys::cublasStatus_t::CUBLAS_STATUS_SUCCESS => Ok(()),
_ => Err(CublasError(self)),
}
}
}

#[cfg(feature = "std")]
impl std::fmt::Display for CublasError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{self:?}")
}
}

#[cfg(feature = "std")]
impl std::error::Error for CublasError {}

/// Creates a handle to the cuBLASLT library. See
/// [nvidia docs](https://docs.nvidia.com/cuda/cublas/index.html#cublasltcreate)
pub fn create_handle() -> Result<sys::cublasLtHandle_t, CublasError> {
let mut handle = MaybeUninit::uninit();
unsafe {
sys::cublasLtCreate(handle.as_mut_ptr()).result()?;
Ok(handle.assume_init())
}
}

/// Destroys a handle previously created with [create_handle()]. See
/// [nvidia docs](https://docs.nvidia.com/cuda/cublas/index.html#cublasltdestroy)
///
/// # Safety
///
/// `handle` must not have been freed already.
pub unsafe fn destroy_handle(handle: sys::cublasLtHandle_t) -> Result<(), CublasError> {
sys::cublasLtDestroy(handle).result()
}

/// Creates a matrix layout descriptor. See
/// [nvidia docs](https://docs.nvidia.com/cuda/cublas/index.html#cublasltmatrixlayoutcreate)
pub fn create_matrix_layout(
matrix_type: sys::cudaDataType,
rows: u64,
cols: u64,
ld: i64,
) -> Result<sys::cublasLtMatrixLayout_t, CublasError> {
let mut matrix_layout = MaybeUninit::uninit();
unsafe {
sys::cublasLtMatrixLayoutCreate(matrix_layout.as_mut_ptr(), matrix_type, rows, cols, ld)
.result()?;
Ok(matrix_layout.assume_init())
}
}

/// Destroys a matrix layout previously created with [create_matrix_layout(...)]. See
/// [nvidia docs](https://docs.nvidia.com/cuda/cublas/index.html#cublasltmatrixlayoutdestroy)
///
/// # Safety
///
/// `matrix_layout` must not have been freed already.
pub unsafe fn destroy_matrix_layout(
matrix_layout: sys::cublasLtMatrixLayout_t,
) -> Result<(), CublasError> {
sys::cublasLtMatrixLayoutDestroy(matrix_layout).result()
}

/// Creates a matrix multiply descriptor. See
/// [nvidia docs](https://docs.nvidia.com/cuda/cublas/index.html#cublasltmatmuldesccreate)
pub fn create_matmul_desc(
compute_type: sys::cublasComputeType_t,
scale_type: sys::cudaDataType,
) -> Result<sys::cublasLtMatmulDesc_t, CublasError> {
let mut matmul_desc = MaybeUninit::uninit();
unsafe {
sys::cublasLtMatmulDescCreate(matmul_desc.as_mut_ptr(), compute_type, scale_type)
.result()?;
Ok(matmul_desc.assume_init())
}
}

/// Sets the value of the specified attribute belonging to a previously created matrix multiply
/// descriptor. See
/// [nvidia docs](https://docs.nvidia.com/cuda/cublas/index.html#cublasltmatmuldescsetattribute)
pub unsafe fn set_matmul_desc_attribute(

Check failure on line 93 in src/cublaslt/result.rs

View workflow job for this annotation

GitHub Actions / clippy

unsafe function's docs miss `# Safety` section
matmul_desc: sys::cublasLtMatmulDesc_t,
attr: sys::cublasLtMatmulDescAttributes_t,
buf: *const c_void,
buf_size: usize,
) -> Result<(), CublasError> {
sys::cublasLtMatmulDescSetAttribute(matmul_desc, attr, buf, buf_size).result()
}

/// Destroys a matrix multiply descriptor previously created with [create_matmul_desc(...)]. See
/// [nvidia docs](https://docs.nvidia.com/cuda/cublas/index.html#cublasltmatmuldescdestroy)
///
/// # Safety
///
/// `matmul_desc` must not have been freed already.
pub unsafe fn destroy_matmul_desc(
matmul_desc: sys::cublasLtMatmulDesc_t,
) -> Result<(), CublasError> {
sys::cublasLtMatmulDescDestroy(matmul_desc).result()
}

/// Creates a matrix multiply heuristic search preferences descriptor. See
/// [nvidia docs](https://docs.nvidia.com/cuda/cublas/index.html#cublasltmatmulpreferencecreate)
pub fn create_matmul_pref() -> Result<sys::cublasLtMatmulPreference_t, CublasError> {
let mut matmul_pref = MaybeUninit::uninit();
unsafe {
sys::cublasLtMatmulPreferenceCreate(matmul_pref.as_mut_ptr()).result()?;
Ok(matmul_pref.assume_init())
}
}

/// Sets the value of the specified attribute belonging to a previously create matrix multiply
/// preferences descriptor. See
/// [nvidia docs](https://docs.nvidia.com/cuda/cublas/index.html#cublasltmatmulpreferencesetattribute)
pub unsafe fn set_matmul_pref_attribute(

Check failure on line 127 in src/cublaslt/result.rs

View workflow job for this annotation

GitHub Actions / clippy

unsafe function's docs miss `# Safety` section
matmul_pref: sys::cublasLtMatmulPreference_t,
attr: sys::cublasLtMatmulPreferenceAttributes_t,
buf: *const c_void,
buf_size: usize,
) -> Result<(), CublasError> {
sys::cublasLtMatmulPreferenceSetAttribute(matmul_pref, attr, buf, buf_size).result()
}

/// Destroys a matrix multiply preferences descriptor previously created
/// with [create_matmul_pref()]. See
/// [nvidia docs](https://docs.nvidia.com/cuda/cublas/index.html#cublasltmatmulpreferencedestroy)
///
/// # Safety
///
/// `matmul_pref` must not have been freed already.
pub unsafe fn destroy_matmul_pref(
matmul_pref: sys::cublasLtMatmulPreference_t,
) -> Result<(), CublasError> {
sys::cublasLtMatmulPreferenceDestroy(matmul_pref).result()
}

/// Retrieves the fastest possible algorithm for the matrix multiply operation function
/// given input matrices A, B and C and the output matrix D. See
/// [nvidia docs](https://docs.nvidia.com/cuda/cublas/index.html#cublasltmatmulalgogetheuristic)
pub fn get_matmul_algo_heuristic(
handle: sys::cublasLtHandle_t,
matmul_desc: sys::cublasLtMatmulDesc_t,
a_layout: sys::cublasLtMatrixLayout_t,
b_layout: sys::cublasLtMatrixLayout_t,
c_layout: sys::cublasLtMatrixLayout_t,
d_layout: sys::cublasLtMatrixLayout_t,
matmul_pref: sys::cublasLtMatmulPreference_t,
) -> Result<sys::cublasLtMatmulHeuristicResult_t, CublasError> {
let mut matmul_heuristic = MaybeUninit::uninit();
let mut algo_count = 0;

unsafe {
sys::cublasLtMatmulAlgoGetHeuristic(
handle,

Check failure on line 166 in src/cublaslt/result.rs

View workflow job for this annotation

GitHub Actions / clippy

this public function might dereference a raw pointer but is not marked `unsafe`
matmul_desc,

Check failure on line 167 in src/cublaslt/result.rs

View workflow job for this annotation

GitHub Actions / clippy

this public function might dereference a raw pointer but is not marked `unsafe`
a_layout,

Check failure on line 168 in src/cublaslt/result.rs

View workflow job for this annotation

GitHub Actions / clippy

this public function might dereference a raw pointer but is not marked `unsafe`
b_layout,

Check failure on line 169 in src/cublaslt/result.rs

View workflow job for this annotation

GitHub Actions / clippy

this public function might dereference a raw pointer but is not marked `unsafe`
c_layout,

Check failure on line 170 in src/cublaslt/result.rs

View workflow job for this annotation

GitHub Actions / clippy

this public function might dereference a raw pointer but is not marked `unsafe`
d_layout,

Check failure on line 171 in src/cublaslt/result.rs

View workflow job for this annotation

GitHub Actions / clippy

this public function might dereference a raw pointer but is not marked `unsafe`
matmul_pref,

Check failure on line 172 in src/cublaslt/result.rs

View workflow job for this annotation

GitHub Actions / clippy

this public function might dereference a raw pointer but is not marked `unsafe`
1, // only select the fastest algo
matmul_heuristic.as_mut_ptr(),
&mut algo_count,
)
.result()?;

if algo_count == 0 {
return Err(CublasError(
sys::cublasStatus_t::CUBLAS_STATUS_NOT_SUPPORTED,
));
}

let matmul_heuristic = matmul_heuristic.assume_init();
matmul_heuristic.state.result()?;

Ok(matmul_heuristic)
}
}

/// Computes the matrix multiplication of matrics A and B to produce the output matrix D,
/// according to the following operation: D = alpha*(A*B) + beta*(C)
/// where A, B, and C are input matrices, and alpha and beta are input scalars. See
/// [nvidia docs](https://docs.nvidia.com/cuda/cublas/index.html#cublasltmatmul)
pub unsafe fn matmul(

Check failure on line 196 in src/cublaslt/result.rs

View workflow job for this annotation

GitHub Actions / clippy

unsafe function's docs miss `# Safety` section
handle: sys::cublasLtHandle_t,
matmul_desc: sys::cublasLtMatmulDesc_t,
alpha: *const c_void,
beta: *const c_void,
a: *const c_void,
a_layout: sys::cublasLtMatrixLayout_t,
b: *const c_void,
b_layout: sys::cublasLtMatrixLayout_t,
c: *const c_void,
c_layout: sys::cublasLtMatrixLayout_t,
d: *mut c_void,
d_layout: sys::cublasLtMatrixLayout_t,
algo: *const cublasLtMatmulAlgo_t,
workspace: *mut c_void,
workspace_size: usize,
stream: sys::cudaStream_t,
) -> Result<(), CublasError> {
sys::cublasLtMatmul(
handle,
matmul_desc,
alpha,
a,
a_layout,
b,
b_layout,
beta,
c,
c_layout,
d,
d_layout,
algo,
workspace,
workspace_size,
stream,
)
.result()
}
Loading
Loading