Skip to content

Commit 8cb5e1e

Browse files
committed
feat: Support cdpSimpleQuicksort on CUDA SDK
Add the implicit function, __cudaRegisterVar. However, the relationship of dynamic parallelism should be clarified.
1 parent b49c5f0 commit 8cb5e1e

File tree

4 files changed

+95
-2
lines changed

4 files changed

+95
-2
lines changed

qcu-device/hw/misc/virtio-qcuda.c

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ CUcontext cudaContext;
6767

6868
int cudaContext_count;
6969

70-
#define cudaFunctionMaxNum 8
70+
#define cudaFunctionMaxNum 512
7171
uint32_t cudaFunctionNum;
7272

7373
#define cudaEventMaxNum 32
@@ -254,6 +254,12 @@ static void qcu_cudaRegisterFatBinary(VirtioQCArg *arg)
254254

255255
}
256256

257+
static void qcu_cudaRegisterVar(VirtioQCArg *arg)
258+
{
259+
ptrace("call cudaRegisterVar\n");
260+
}
261+
262+
257263
static void qcu_cudaUnregisterFatBinary(VirtioQCArg *arg)
258264
{
259265
uint32_t i;
@@ -298,7 +304,7 @@ static void qcu_cudaRegisterFunction(VirtioQCArg *arg)
298304
memcpy(devicesKernels[cudaFunctionNum].functionName, functionName, arg->pBSize);
299305
devicesKernels[cudaFunctionNum].funcId = funcId;
300306

301-
ptrace("fatBin= %16p ,name= '%s'\n", fatBin, functionName);
307+
ptrace("fatBin= %16p ,name= '%s', cudaFunctionNum = %d\n", fatBin, functionName, cudaFunctionNum);
302308

303309

304310
int i = totalDevices;
@@ -743,6 +749,20 @@ static void qcu_cudaDeviceReset(VirtioQCArg *arg)
743749
arg->cmd = err;
744750
}
745751

752+
static void qcu_cudaDeviceSetLimit(VirtioQCArg *arg)
753+
{
754+
cudaError_t err;
755+
int device;
756+
757+
pfunc();
758+
759+
cudaError(err = cudaDeviceSetLimit(arg->pA, arg->pB));
760+
761+
arg->cmd = err;
762+
763+
}
764+
765+
746766
////////////////////////////////////////////////////////////////////////////////
747767
/// Version Management
748768
////////////////////////////////////////////////////////////////////////////////
@@ -1266,6 +1286,10 @@ static void virtio_qcuda_cmd_handle(VirtIODevice *vdev, VirtQueue *vq)
12661286
qcu_cudaUnregisterFatBinary(arg);
12671287
break;
12681288

1289+
case VIRTQC_cudaRegisterVar:
1290+
qcu_cudaRegisterVar(arg);
1291+
break;
1292+
12691293
case VIRTQC_cudaRegisterFunction:
12701294
qcu_cudaRegisterFunction(arg);
12711295
break;
@@ -1320,6 +1344,11 @@ static void virtio_qcuda_cmd_handle(VirtIODevice *vdev, VirtQueue *vq)
13201344
qcu_cudaDeviceReset(arg);
13211345
break;
13221346

1347+
case VIRTQC_cudaDeviceSetLimit:
1348+
qcu_cudaDeviceSetLimit(arg);
1349+
break;
1350+
1351+
13231352
// Version Management (runtime API)
13241353
case VIRTQC_cudaDriverGetVersion:
13251354
qcu_cudaDriverGetVersion(arg);

qcu-driver/qcuda_common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ enum
3636
VIRTQC_cudaRegisterFatBinary = 200,
3737
VIRTQC_cudaUnregisterFatBinary,
3838
VIRTQC_cudaRegisterFunction,
39+
VIRTQC_cudaRegisterVar,
3940
VIRTQC_cudaLaunch,
4041

4142
// Memory Management (runtime API)
@@ -52,6 +53,7 @@ enum
5253
VIRTQC_cudaSetDevice,
5354
VIRTQC_cudaDeviceSynchronize,
5455
VIRTQC_cudaDeviceReset,
56+
VIRTQC_cudaDeviceSetLimit,
5557

5658
// Version Management (runtime API)
5759
VIRTQC_cudaDriverGetVersion,

qcu-driver/qcuda_driver.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,12 @@ void qcu_cudaRegisterFunction(VirtioQCArg *arg)
357357
kfree_gpa(arg->pB, arg->pBSize);
358358
}
359359

360+
void qcu_cudaRegisterVar(VirtioQCArg *arg)
361+
{
362+
qcu_misc_send_cmd(arg);
363+
}
364+
365+
360366
void qcu_cudaLaunch(VirtioQCArg *arg)
361367
{ // pA: cuda kernel configuration
362368
// pB: cuda kernel parameters
@@ -692,6 +698,13 @@ void qcu_cudaDeviceReset(VirtioQCArg *arg)
692698
qcu_misc_send_cmd(arg);
693699
}
694700

701+
void qcu_cudaDeviceSetLimit(VirtioQCArg *arg)
702+
{
703+
pfunc();
704+
705+
qcu_misc_send_cmd(arg);
706+
}
707+
695708
////////////////////////////////////////////////////////////////////////////////
696709
/// Version Management
697710
////////////////////////////////////////////////////////////////////////////////
@@ -1109,6 +1122,10 @@ static long qcu_misc_ioctl(struct file *filp, unsigned int _cmd, unsigned long _
11091122
qcu_cudaRegisterFunction(arg);
11101123
break;
11111124

1125+
case VIRTQC_cudaRegisterVar:
1126+
qcu_cudaRegisterVar(arg);
1127+
break;
1128+
11121129
case VIRTQC_cudaLaunch:
11131130
qcu_cudaLaunch(arg);
11141131
break;
@@ -1159,6 +1176,10 @@ static long qcu_misc_ioctl(struct file *filp, unsigned int _cmd, unsigned long _
11591176
qcu_cudaDeviceReset(arg);
11601177
break;
11611178

1179+
case VIRTQC_cudaDeviceSetLimit:
1180+
qcu_cudaDeviceSetLimit(arg);
1181+
break;
1182+
11621183
//case VIRTQC_checkCudaCapabilities:
11631184
// qcu_checkCudaCapabilities(arg);
11641185
// break;

qcu-library/libcudart.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,30 @@ void __cudaRegisterFunction(
301301
time_end(t_RegFunc);
302302
}
303303

304+
void __cudaRegisterVar(
305+
void **fatCubinHandle,
306+
char *hostVar,
307+
char *deviceAddress,
308+
const char *deviceName,
309+
int ext,
310+
int size,
311+
int constant,
312+
int global
313+
)
314+
{
315+
pfunc();
316+
317+
ptrace("fatCubinHandle= %p, value= %p\n", fatCubinHandle, *fatCubinHandle);
318+
ptrace("hostVar= %s (%p)\n", hostVar, hostVar);
319+
ptrace("deviceAddress= %s (%p)\n", deviceAddress, deviceAddress);
320+
ptrace("deviceName= %s\n", deviceName);
321+
ptrace("ext= %d, size = %d, constant = %d, global = %d\n", ext, size, constant, global);
322+
323+
VirtioQCArg arg;
324+
send_cmd_to_device( VIRTQC_cudaRegisterVar, &arg);
325+
}
326+
327+
304328
cudaError_t cudaConfigureCall(
305329
dim3 gridDim,
306330
dim3 blockDim,
@@ -655,6 +679,23 @@ cudaError_t cudaDeviceReset(void)
655679
return (cudaError_t)arg.cmd;
656680
}
657681

682+
683+
cudaError_t cudaDeviceSetLimit (enum cudaLimit limit, size_t value )
684+
{
685+
VirtioQCArg arg;
686+
pfunc();
687+
688+
memset(&arg, 0, sizeof(VirtioQCArg));
689+
690+
ptr( arg.pA, limit, 0);
691+
ptr( arg.pB, value, 0);
692+
693+
send_cmd_to_device( VIRTQC_cudaDeviceSetLimit, &arg);
694+
695+
return (cudaError_t)arg.cmd;
696+
}
697+
698+
658699
////////////////////////////////////////////////////////////////////////////////
659700
/// Version Management
660701
////////////////////////////////////////////////////////////////////////////////

0 commit comments

Comments
 (0)