Skip to content

Commit

Permalink
Merge pull request #7994 from kmaehashi/softlink-driver
Browse files Browse the repository at this point in the history
Softlink CUDA Driver
  • Loading branch information
emcastillo committed Dec 1, 2023
2 parents b81289d + f35dadd commit d9fd6de
Show file tree
Hide file tree
Showing 14 changed files with 236 additions and 225 deletions.
216 changes: 153 additions & 63 deletions cupy_backends/cuda/api/_driver_extern.pxi
Original file line number Diff line number Diff line change
@@ -1,66 +1,156 @@
cdef extern from '../../cupy_backend.h' nogil:
# Error handling
int cuGetErrorName(Result error, const char** pStr)
int cuGetErrorString(Result error, const char** pStr)

# Primary context management
int cuDevicePrimaryCtxRelease(Device dev)

# Context management
int cuCtxGetCurrent(Context* pctx)
int cuCtxSetCurrent(Context ctx)
int cuCtxCreate(Context* pctx, unsigned int flags, Device dev)
int cuCtxDestroy(Context ctx)
int cuCtxGetDevice(Device*)

# Module load and kernel execution
int cuLinkCreate(unsigned int numOptions, CUjit_option* options,
void** optionValues, LinkState* stateOut)
int cuLinkAddData(LinkState state, CUjitInputType type, void* data,
size_t size, const char* name, unsigned int numOptions,
CUjit_option* options, void** optionValues)
int cuLinkAddFile(LinkState state, CUjitInputType type, const char* path,
unsigned int numOptions, CUjit_option* options, void**
optionValues)
int cuLinkComplete(LinkState state, void** cubinOut, size_t* sizeOut)
int cuLinkDestroy(LinkState state)
int cuModuleLoad(Module* module, char* fname)
int cuModuleLoadData(Module* module, void* image)
int cuModuleUnload(Module hmod)
int cuModuleGetFunction(Function* hfunc, Module hmod,
char* name)
int cuModuleGetGlobal(Deviceptr* dptr, size_t* bytes, Module hmod,
char* name)
int cuLaunchKernel(
Function f, unsigned int gridDimX, unsigned int gridDimY,
unsigned int gridDimZ, unsigned int blockDimX,
unsigned int blockDimY, unsigned int blockDimZ,
unsigned int sharedMemBytes, Stream hStream,
void** kernelParams, void** extra)

int cuLaunchCooperativeKernel(
Function f, unsigned int gridDimX, unsigned int gridDimY,
unsigned int gridDimZ, unsigned int blockDimX,
unsigned int blockDimY, unsigned int blockDimZ,
unsigned int sharedMemBytes, Stream hStream,
void** kernelParams)

# Kernel attributes
int cuFuncGetAttribute(int *pi, CUfunction_attribute attrib,
Function hfunc)

int cuFuncSetAttribute(Function hfunc, CUfunction_attribute attrib,
int value)

# Occupancy
int cuOccupancyMaxActiveBlocksPerMultiprocessor(
int* numBlocks, Function func, int blockSize, size_t dynamicSMemSize)
int cuOccupancyMaxPotentialBlockSize(
int* minGridSize, int* blockSize, Function func, CUoccupancyB2DSize
block2shmem, size_t dynamicSMemSize, int blockSizeLimit)

# Stream
int cuStreamGetCtx(Stream hStream, Context* pctx)
import sys as _sys

from cupy_backends.cuda._softlink cimport SoftLink


# Error handling
ctypedef int (*F_cuGetErrorName)(Result error, const char** pStr) nogil
cdef F_cuGetErrorName cuGetErrorName
ctypedef int (*F_cuGetErrorString)(Result error, const char** pStr) nogil
cdef F_cuGetErrorString cuGetErrorString

# Primary context management
ctypedef int (*F_cuDevicePrimaryCtxRelease)(Device dev) nogil
cdef F_cuDevicePrimaryCtxRelease cuDevicePrimaryCtxRelease

# Context management
ctypedef int (*F_cuCtxGetCurrent)(Context* pctx) nogil
cdef F_cuCtxGetCurrent cuCtxGetCurrent
ctypedef int (*F_cuCtxSetCurrent)(Context ctx) nogil
cdef F_cuCtxSetCurrent cuCtxSetCurrent
ctypedef int (*F_cuCtxCreate)(Context* pctx, unsigned int flags, Device dev) nogil # NOQA
cdef F_cuCtxCreate cuCtxCreate
ctypedef int (*F_cuCtxDestroy)(Context ctx) nogil
cdef F_cuCtxDestroy cuCtxDestroy
ctypedef int (*F_cuCtxGetDevice)(Device*) nogil
cdef F_cuCtxGetDevice cuCtxGetDevice

# Module load and kernel execution
ctypedef int (*F_cuLinkCreate)(unsigned int numOptions, CUjit_option* options, void** optionValues, LinkState* stateOut) nogil # NOQA
cdef F_cuLinkCreate cuLinkCreate
ctypedef int (*F_cuLinkAddData)(LinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned int numOptions, CUjit_option* options, void** optionValues) nogil # NOQA
cdef F_cuLinkAddData cuLinkAddData
ctypedef int (*F_cuLinkAddFile)(LinkState state, CUjitInputType type, const char* path, unsigned int numOptions, CUjit_option* options, void** optionValues) nogil # NOQA
cdef F_cuLinkAddFile cuLinkAddFile
ctypedef int (*F_cuLinkComplete)(LinkState state, void** cubinOut, size_t* sizeOut) nogil # NOQA
cdef F_cuLinkComplete cuLinkComplete
ctypedef int (*F_cuLinkDestroy)(LinkState state) nogil
cdef F_cuLinkDestroy cuLinkDestroy
ctypedef int (*F_cuModuleLoad)(Module* module, char* fname) nogil
cdef F_cuModuleLoad cuModuleLoad
ctypedef int (*F_cuModuleLoadData)(Module* module, void* image) nogil
cdef F_cuModuleLoadData cuModuleLoadData
ctypedef int (*F_cuModuleUnload)(Module hmod) nogil
cdef F_cuModuleUnload cuModuleUnload
ctypedef int (*F_cuModuleGetFunction)(Function* hfunc, Module hmod, char* name) nogil # NOQA
cdef F_cuModuleGetFunction cuModuleGetFunction
ctypedef int (*F_cuModuleGetGlobal)(Deviceptr* dptr, size_t* bytes, Module hmod, char* name) nogil # NOQA
cdef F_cuModuleGetGlobal cuModuleGetGlobal
ctypedef int (*F_cuLaunchKernel)(Function f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, Stream hStream, void** kernelParams, void** extra) nogil # NOQA
cdef F_cuLaunchKernel cuLaunchKernel
ctypedef int (*F_cuLaunchCooperativeKernel)(Function f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, Stream hStream, void** kernelParams) nogil # NOQA
cdef F_cuLaunchCooperativeKernel cuLaunchCooperativeKernel

# Kernel attributes
ctypedef int (*F_cuFuncGetAttribute)(int *pi, CUfunction_attribute attrib, Function hfunc) nogil # NOQA
cdef F_cuFuncGetAttribute cuFuncGetAttribute

ctypedef int (*F_cuFuncSetAttribute)(Function hfunc, CUfunction_attribute attrib, int value) nogil # NOQA
cdef F_cuFuncSetAttribute cuFuncSetAttribute

# Occupancy
ctypedef int (*F_cuOccupancyMaxActiveBlocksPerMultiprocessor)(int* numBlocks, Function func, int blockSize, size_t dynamicSMemSize) nogil # NOQA
cdef F_cuOccupancyMaxActiveBlocksPerMultiprocessor cuOccupancyMaxActiveBlocksPerMultiprocessor # NOQA
ctypedef int (*F_cuOccupancyMaxPotentialBlockSize)(int* minGridSize, int* blockSize, Function func, CUoccupancyB2DSize block2shmem, size_t dynamicSMemSize, int blockSizeLimit) nogil # NOQA
cdef F_cuOccupancyMaxPotentialBlockSize cuOccupancyMaxPotentialBlockSize

# Stream
ctypedef int (*F_cuStreamGetCtx)(Stream hStream, Context* pctx) nogil
cdef F_cuStreamGetCtx cuStreamGetCtx


cdef extern from '../../cupy_backend.h' nogil:
# Build-time version
enum: CUDA_VERSION


cdef SoftLink _L = None
cdef inline void initialize() except *:
global _L
if _L is not None:
return
_initialize()


cdef void _initialize() except *:
global _L
_L = _get_softlink()

cdef str version = '_v2' if CUPY_CUDA_VERSION != 0 else ''

global cuGetErrorName
cuGetErrorName = <F_cuGetErrorName>_L.get('GetErrorName')
global cuGetErrorString
cuGetErrorString = <F_cuGetErrorString>_L.get('GetErrorString')
global cuDevicePrimaryCtxRelease
cuDevicePrimaryCtxRelease = <F_cuDevicePrimaryCtxRelease>_L.get(f'DevicePrimaryCtxRelease{version}') # NOQA
global cuCtxGetCurrent
cuCtxGetCurrent = <F_cuCtxGetCurrent>_L.get('CtxGetCurrent')
global cuCtxSetCurrent
cuCtxSetCurrent = <F_cuCtxSetCurrent>_L.get('CtxSetCurrent')
global cuCtxCreate
cuCtxCreate = <F_cuCtxCreate>_L.get(f'CtxCreate{version}')
global cuCtxDestroy
cuCtxDestroy = <F_cuCtxDestroy>_L.get(f'CtxDestroy{version}')
global cuCtxGetDevice
cuCtxGetDevice = <F_cuCtxGetDevice>_L.get('CtxGetDevice')
global cuLinkCreate
cuLinkCreate = <F_cuLinkCreate>_L.get(f'LinkCreate{version}')
global cuLinkAddData
cuLinkAddData = <F_cuLinkAddData>_L.get(f'LinkAddData{version}')
global cuLinkAddFile
cuLinkAddFile = <F_cuLinkAddFile>_L.get(f'LinkAddFile{version}')
global cuLinkComplete
cuLinkComplete = <F_cuLinkComplete>_L.get('LinkComplete')
global cuLinkDestroy
cuLinkDestroy = <F_cuLinkDestroy>_L.get('LinkDestroy')
global cuModuleLoad
cuModuleLoad = <F_cuModuleLoad>_L.get('ModuleLoad')
global cuModuleLoadData
cuModuleLoadData = <F_cuModuleLoadData>_L.get('ModuleLoadData')
global cuModuleUnload
cuModuleUnload = <F_cuModuleUnload>_L.get('ModuleUnload')
global cuModuleGetFunction
cuModuleGetFunction = <F_cuModuleGetFunction>_L.get('ModuleGetFunction')
global cuModuleGetGlobal
cuModuleGetGlobal = <F_cuModuleGetGlobal>_L.get(f'ModuleGetGlobal{version}') # NOQA
global cuLaunchKernel
cuLaunchKernel = <F_cuLaunchKernel>_L.get('LaunchKernel')
global cuLaunchCooperativeKernel
cuLaunchCooperativeKernel = <F_cuLaunchCooperativeKernel>_L.get('LaunchCooperativeKernel') # NOQA
global cuFuncGetAttribute
cuFuncGetAttribute = <F_cuFuncGetAttribute>_L.get('FuncGetAttribute')
global cuFuncSetAttribute
cuFuncSetAttribute = <F_cuFuncSetAttribute>_L.get('FuncSetAttribute')
global cuOccupancyMaxActiveBlocksPerMultiprocessor
cuOccupancyMaxActiveBlocksPerMultiprocessor = <F_cuOccupancyMaxActiveBlocksPerMultiprocessor>_L.get('OccupancyMaxActiveBlocksPerMultiprocessor') # NOQA
global cuOccupancyMaxPotentialBlockSize
cuOccupancyMaxPotentialBlockSize = <F_cuOccupancyMaxPotentialBlockSize>_L.get('OccupancyMaxPotentialBlockSize') # NOQA
global cuStreamGetCtx
cuStreamGetCtx = <F_cuStreamGetCtx>_L.get('StreamGetCtx')


cdef SoftLink _get_softlink():
cdef str prefix = 'cu'
cdef object libname = None

if CUPY_CUDA_VERSION != 0:
if _sys.platform == 'linux':
libname = 'libcuda.so.1'
else:
libname = 'nvcuda.dll'
elif CUPY_HIP_VERSION != 0:
# Use CUDA-to-HIP layer defined in the header.
libname = __file__

return SoftLink(libname, prefix, mandatory=True)
4 changes: 2 additions & 2 deletions cupy_backends/cuda/api/_driver_typedef.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ cdef extern from *:

ctypedef void* Context 'CUcontext'
ctypedef void* Deviceptr 'CUdeviceptr'
ctypedef void* Event 'cudaEvent_t'
ctypedef void* Event 'CUevent'
ctypedef void* Function 'CUfunction'
ctypedef void* Module 'CUmodule'
ctypedef void* Stream 'cudaStream_t'
ctypedef void* Stream 'CUstream'
ctypedef void* LinkState 'CUlinkState'

ctypedef int CUjit_option 'CUjit_option'
Expand Down
1 change: 1 addition & 0 deletions cupy_backends/cuda/api/_runtime_typedef.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ cdef extern from *:
ctypedef int DeviceAttr 'cudaDeviceAttr'
ctypedef int MemoryAdvise 'cudaMemoryAdvise'

ctypedef void* Stream 'cudaStream_t'
ctypedef void _StreamCallbackDef(
driver.Stream stream, Error status, void* userData)
ctypedef _StreamCallbackDef* StreamCallback 'cudaStreamCallback_t'
Expand Down

0 comments on commit d9fd6de

Please sign in to comment.