diff --git a/Orochi/Orochi.cpp b/Orochi/Orochi.cpp index 0b320ce..669f345 100644 --- a/Orochi/Orochi.cpp +++ b/Orochi/Orochi.cpp @@ -338,6 +338,9 @@ oroError OROAPI oroGetDeviceProperties(oroDeviceProp* props, oroDevice dev) e = cuDeviceGetAttribute( &props->computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, deviceId ); e = cuDeviceGetAttribute( &props->concurrentKernels, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, deviceId ); e = cuDeviceGetAttribute( &props->ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, deviceId ); + e = cuDeviceGetAttribute( &props->major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, deviceId ); + e = cuDeviceGetAttribute( &props->minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, deviceId ); + return oroSuccess; } return oroErrorUnknown; diff --git a/UnitTest/bitcodes/generate_bitcodes.bat b/UnitTest/bitcodes/generate_bitcodes.bat new file mode 100644 index 0000000..84334f0 --- /dev/null +++ b/UnitTest/bitcodes/generate_bitcodes.bat @@ -0,0 +1,2 @@ +call hipcc --cuda-device-only --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ../moduleTestKernel.cpp +call hipcc --cuda-device-only --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ../moduleTestFunc.cpp diff --git a/UnitTest/bitcodes/generate_bitcodes.sh b/UnitTest/bitcodes/generate_bitcodes.sh index 6b24187..320189c 100644 --- a/UnitTest/bitcodes/generate_bitcodes.sh +++ b/UnitTest/bitcodes/generate_bitcodes.sh @@ -1,30 +1,2 @@ hipcc --cuda-device-only --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ../moduleTestKernel.cpp - hipcc --cuda-device-only --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ../moduleTestFunc.cpp -hipcc --offload-arch=gfx1030 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1031 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1032 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1033 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1034 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1035 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1036 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1010 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1011 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1012 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1013 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx900 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx906 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only - -hipcc --offload-arch=gfx1030 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1031 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1032 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1033 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1034 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1035 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1036 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1010 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1011 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1012 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1013 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx900 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx906 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only diff --git a/UnitTest/bitcodes/generate_bitcodes_gfx1100.bat b/UnitTest/bitcodes/generate_bitcodes_gfx1100.bat deleted file mode 100644 index dc4b3fa..0000000 --- a/UnitTest/bitcodes/generate_bitcodes_gfx1100.bat +++ /dev/null @@ -1,7 +0,0 @@ -call hipcc --cuda-device-only --offload-arch=gfx1100 --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ..\moduleTestKernel.cpp - -call hipcc --cuda-device-only --offload-arch=gfx1100 --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ..\moduleTestFunc.cpp - -call hipcc --offload-arch=gfx1100 ..\moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only - -call hipcc --offload-arch=gfx1100 ..\moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only diff --git a/UnitTest/bitcodes/generate_bitcodes_gfx1100.sh b/UnitTest/bitcodes/generate_bitcodes_gfx1100.sh deleted file mode 100755 index 5be261b..0000000 --- a/UnitTest/bitcodes/generate_bitcodes_gfx1100.sh +++ /dev/null @@ -1,6 +0,0 @@ -hipcc --cuda-device-only --offload-arch=gfx1100 --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ../moduleTestKernel.cpp - -hipcc --cuda-device-only --offload-arch=gfx1100 --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ../moduleTestFunc.cpp - -hipcc --offload-arch=gfx1100 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1100 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only diff --git a/UnitTest/bitcodes/generate_bitcodes_gfx1102.bat b/UnitTest/bitcodes/generate_bitcodes_gfx1102.bat deleted file mode 100644 index ce27e68..0000000 --- a/UnitTest/bitcodes/generate_bitcodes_gfx1102.bat +++ /dev/null @@ -1,7 +0,0 @@ -call hipcc --cuda-device-only --offload-arch=gfx1102 --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ..\moduleTestKernel.cpp - -call hipcc --cuda-device-only --offload-arch=gfx1102 --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ..\moduleTestFunc.cpp - -call hipcc --offload-arch=gfx1102 ..\moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only - -call hipcc --offload-arch=gfx1102 ..\moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only diff --git a/UnitTest/bitcodes/generate_bitcodes_gfx1102.sh b/UnitTest/bitcodes/generate_bitcodes_gfx1102.sh deleted file mode 100755 index 3752150..0000000 --- a/UnitTest/bitcodes/generate_bitcodes_gfx1102.sh +++ /dev/null @@ -1,6 +0,0 @@ -hipcc --cuda-device-only --offload-arch=gfx1102 --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ../moduleTestKernel.cpp - -hipcc --cuda-device-only --offload-arch=gfx1102 --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ../moduleTestFunc.cpp - -hipcc --offload-arch=gfx1102 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only -hipcc --offload-arch=gfx1102 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only diff --git a/UnitTest/bitcodes/generate_bitcodes_nvidia.bat b/UnitTest/bitcodes/generate_bitcodes_nvidia.bat new file mode 100644 index 0000000..6eb0ea0 --- /dev/null +++ b/UnitTest/bitcodes/generate_bitcodes_nvidia.bat @@ -0,0 +1,2 @@ +nvcc -x cu -fatbin --device-c -arch=all ../moduleTestFunc.cpp +nvcc -x cu -fatbin --device-c -arch=all ../moduleTestKernel.cpp diff --git a/UnitTest/bitcodes/generate_bitcodes_nvidia.sh b/UnitTest/bitcodes/generate_bitcodes_nvidia.sh index 1315219..bed8af6 100644 --- a/UnitTest/bitcodes/generate_bitcodes_nvidia.sh +++ b/UnitTest/bitcodes/generate_bitcodes_nvidia.sh @@ -1,4 +1,2 @@ -nvcc -arch=compute_80 -code="sm_80,sm_86,sm_87" -fatbin --device-c ../moduleTestFunc.cu -nvcc -arch=compute_80 -code="sm_80,sm_86,sm_87" -fatbin --device-c ../moduleTestKernel.cu -nvcc -cubin --device-c -arch=sm_80 ../moduleTestFunc.cu -nvcc -cubin --device-c -arch=sm_80 ../moduleTestKernel.cu +nvcc -fatbin --device-c -arch=all ../moduleTestFunc.cu +nvcc -fatbin --device-c -arch=all ../moduleTestKernel.cu diff --git a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1010.bc b/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1010.bc deleted file mode 100644 index f80e54c..0000000 Binary files a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1010.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1011.bc b/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1011.bc deleted file mode 100644 index 499d130..0000000 Binary files a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1011.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1012.bc b/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1012.bc deleted file mode 100644 index c6e04b7..0000000 Binary files a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1012.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1013.bc b/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1013.bc deleted file mode 100644 index b28684f..0000000 Binary files a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1013.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1030.bc b/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1030.bc deleted file mode 100644 index 4184f79..0000000 Binary files a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1030.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1031.bc b/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1031.bc deleted file mode 100644 index df9fd1f..0000000 Binary files a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1031.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1032.bc b/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1032.bc deleted file mode 100644 index 3b0ffe9..0000000 Binary files a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1032.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1033.bc b/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1033.bc deleted file mode 100644 index 59a0326..0000000 Binary files a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1033.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1034.bc b/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1034.bc deleted file mode 100644 index 16014a4..0000000 Binary files a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1034.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1035.bc b/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1035.bc deleted file mode 100644 index 14cb944..0000000 Binary files a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1035.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1036.bc b/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1036.bc deleted file mode 100644 index adcc7da..0000000 Binary files a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx1036.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx900.bc b/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx900.bc deleted file mode 100644 index 3009b18..0000000 Binary files a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx900.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx906.bc b/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx906.bc deleted file mode 100644 index 7c4f9a9..0000000 Binary files a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa-gfx906.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa.bc b/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa.bc deleted file mode 100644 index f96d05c..0000000 Binary files a/UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestFunc.cubin b/UnitTest/bitcodes/moduleTestFunc.cubin deleted file mode 100644 index daa0f3c..0000000 Binary files a/UnitTest/bitcodes/moduleTestFunc.cubin and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestFunc.fatbin b/UnitTest/bitcodes/moduleTestFunc.fatbin deleted file mode 100644 index bd985d8..0000000 Binary files a/UnitTest/bitcodes/moduleTestFunc.fatbin and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1010.bc b/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1010.bc deleted file mode 100644 index 1150d40..0000000 Binary files a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1010.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1011.bc b/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1011.bc deleted file mode 100644 index b906a9c..0000000 Binary files a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1011.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1012.bc b/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1012.bc deleted file mode 100644 index 33413f6..0000000 Binary files a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1012.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1013.bc b/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1013.bc deleted file mode 100644 index 9ebaf56..0000000 Binary files a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1013.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1030.bc b/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1030.bc deleted file mode 100644 index a2d554e..0000000 Binary files a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1030.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1031.bc b/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1031.bc deleted file mode 100644 index 685cfa1..0000000 Binary files a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1031.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1032.bc b/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1032.bc deleted file mode 100644 index 5f8fdfa..0000000 Binary files a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1032.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1033.bc b/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1033.bc deleted file mode 100644 index d81982a..0000000 Binary files a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1033.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1034.bc b/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1034.bc deleted file mode 100644 index d2eb0c5..0000000 Binary files a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1034.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1035.bc b/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1035.bc deleted file mode 100644 index 58f1bfb..0000000 Binary files a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1035.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1036.bc b/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1036.bc deleted file mode 100644 index 6079d0e..0000000 Binary files a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx1036.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx900.bc b/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx900.bc deleted file mode 100644 index 6c94614..0000000 Binary files a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx900.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx906.bc b/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx906.bc deleted file mode 100644 index 999d184..0000000 Binary files a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa-gfx906.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa.bc b/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa.bc deleted file mode 100644 index cd1b97c..0000000 Binary files a/UnitTest/bitcodes/moduleTestKernel-hip-amdgcn-amd-amdhsa.bc and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestKernel.cubin b/UnitTest/bitcodes/moduleTestKernel.cubin deleted file mode 100644 index 18c4ad1..0000000 Binary files a/UnitTest/bitcodes/moduleTestKernel.cubin and /dev/null differ diff --git a/UnitTest/bitcodes/moduleTestKernel.fatbin b/UnitTest/bitcodes/moduleTestKernel.fatbin deleted file mode 100644 index 27011de..0000000 Binary files a/UnitTest/bitcodes/moduleTestKernel.fatbin and /dev/null differ diff --git a/UnitTest/main.cpp b/UnitTest/main.cpp index e2601ae..9bea12b 100644 --- a/UnitTest/main.cpp +++ b/UnitTest/main.cpp @@ -149,7 +149,7 @@ void loadFile( const char* path, std::vector& dst ) f.close(); } } - +#if 0 TEST_F( OroTestBase, linkBc ) { oroDeviceProp props; @@ -228,7 +228,7 @@ TEST_F( OroTestBase, linkBc ) ORORTCCHECK( oroModuleUnload( module ) ); } } - +#endif TEST_F( OroTestBase, link ) { oroDeviceProp props; @@ -237,8 +237,10 @@ TEST_F( OroTestBase, link ) std::vector data1; const bool isAmd = oroGetCurAPI( 0 ) == ORO_API_HIP; + std::string arch = "-arch=sm_" + std::to_string( props.major ) + std::string( "0" ); + std::vector opts = isAmd ? std::vector({ "-fgpu-rdc", "-c", "--cuda-device-only" }) - : std::vector({ "--device-c", "-arch=sm_80" }); + : std::vector({ "--device-c", arch.c_str() }); { std::string code; OrochiUtils::readSourceCode( "../UnitTest/moduleTestKernel.h", code ); @@ -310,7 +312,7 @@ TEST_F( OroTestBase, link ) ORORTCCHECK( oroModuleUnload( module ) ); } } - +#if 0 TEST_F( OroTestBase, link_addFile ) { oroDeviceProp props; @@ -385,6 +387,7 @@ TEST_F( OroTestBase, link_addFile ) ORORTCCHECK( oroModuleUnload( module ) ); } } +#endif TEST_F( OroTestBase, link_null_name ) { @@ -393,9 +396,9 @@ TEST_F( OroTestBase, link_null_name ) std::vector data0; std::vector data1; const bool isAmd = oroGetCurAPI( 0 ) == ORO_API_HIP; - + std::string arch = "-arch=sm_" + std::to_string( props.major ) + std::string( "0" ); std::vector opts = isAmd ? std::vector({ "-fgpu-rdc", "-c", "--cuda-device-only" }) - : std::vector({ "--device-c", "-arch=sm_80" }); + : std::vector({ "--device-c", arch.c_str() }); { std::string code; OrochiUtils::readSourceCode( "../UnitTest/moduleTestKernel.h", code ); @@ -526,14 +529,14 @@ TEST_F( OroTestBase, link_bundledBc_with_bc ) std::vector data0; std::vector data1; const bool isAmd = oroGetCurAPI( 0 ) == ORO_API_HIP; - + std::string arch = "-arch=sm_" + std::to_string( props.major ) + std::string( "0" ); { std::string bcFile = isAmd ? "../UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa.bc" : "../UnitTest/bitcodes/moduleTestFunc.fatbin"; loadFile( bcFile.c_str(), data1 ); } { std::vector opts = isAmd ? std::vector({ "-fgpu-rdc", "-c", "--cuda-device-only" }) - : std::vector({ "--device-c", "-arch=sm_80" }); + : std::vector({ "--device-c", arch.c_str() }); std::string code; OrochiUtils::readSourceCode( "../UnitTest/moduleTestKernel.h", code ); OrochiUtils::getData( m_device, code.c_str(), "../UnitTest/moduleTestKernel.h", &opts, data0 ); @@ -607,6 +610,7 @@ TEST_F( OroTestBase, link_bundledBc_with_bc_loweredName ) std::vector data0; std::vector data1; const bool isAmd = oroGetCurAPI( 0 ) == ORO_API_HIP; + std::string arch = "-arch=sm_" + std::to_string( props.major ) + std::string( "0" ); const char* funcName = "testKernel<0>"; std::string loweredNameStr; orortcProgram prog; @@ -616,7 +620,8 @@ TEST_F( OroTestBase, link_bundledBc_with_bc_loweredName ) loadFile( bcFile.c_str(), data1 ); } { - std::vector opts = isAmd ? std::vector( { "-fgpu-rdc", "-c", "--cuda-device-only" } ) : std::vector( { "--device-c", "-arch=sm_80" } ); + std::vector opts = isAmd ? std::vector( { "-fgpu-rdc", "-c", "--cuda-device-only" } ) + : std::vector( { "--device-c", arch.c_str() } ); std::string code; OrochiUtils::readSourceCode( "../UnitTest/moduleTestKernel_loweredName.h", code ); diff --git a/UnitTest/moduleTestFunc.cpp b/UnitTest/moduleTestFunc.cpp index 04854f4..ef7333b 100644 --- a/UnitTest/moduleTestFunc.cpp +++ b/UnitTest/moduleTestFunc.cpp @@ -1,4 +1,6 @@ +#if !defined( __CUDACC__ ) #include +#endif __device__ void setInfo( int *x ) { diff --git a/UnitTest/moduleTestFunc.cu b/UnitTest/moduleTestFunc.cu deleted file mode 100644 index 6985602..0000000 --- a/UnitTest/moduleTestFunc.cu +++ /dev/null @@ -1,5 +0,0 @@ - __device__ void setInfo( int *x ) -{ - int tid = threadIdx.x; - atomicAdd( x, tid ); -} diff --git a/UnitTest/moduleTestKernel.cu b/UnitTest/moduleTestKernel.cu deleted file mode 100644 index 3c20131..0000000 --- a/UnitTest/moduleTestKernel.cu +++ /dev/null @@ -1,6 +0,0 @@ -extern __device__ void setInfo( int *x ); - -extern "C" __global__ void testKernel( int *x ) -{ - setInfo(x); -} diff --git a/UnitTest/premake5.lua b/UnitTest/premake5.lua index f721898..def48fb 100644 --- a/UnitTest/premake5.lua +++ b/UnitTest/premake5.lua @@ -19,3 +19,7 @@ project "Unittest" files { "../contrib/gtest-1.6.0/gtest-all.cc" } sysincludedirs{ "../contrib/gtest-1.6.0/" } defines { "GTEST_HAS_TR1_TUPLE=0" } + if _OPTIONS["kernelcompile"] then + os.execute( "cd ./bitcodes/ && generate_bitcodes.bat" ) + os.execute( "cd ./bitcodes/ && generate_bitcodes_nvidia.bat" ) + end diff --git a/contrib/hipew/src/hipew.cpp b/contrib/hipew/src/hipew.cpp index a5ab931..83018a4 100644 --- a/contrib/hipew/src/hipew.cpp +++ b/contrib/hipew/src/hipew.cpp @@ -480,8 +480,8 @@ void hipewInit( int* resultDriver, int* resultRtc, hipuint32_t flags ) _LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetProgramLog ); _LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetProgramLogSize ); _LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetCode ); - // _LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetBitcodeSize ); - // _LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetBitcode ); + _LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetBitcodeSize ); + _LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetBitcode ); _LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetCodeSize ); _LIBRARY_FIND_CHECKED( rtcLib, hiprtcLinkCreate ); _LIBRARY_FIND_CHECKED( rtcLib, hiprtcLinkAddFile ); diff --git a/premake5.lua b/premake5.lua index bc58c22..2b414ab 100644 --- a/premake5.lua +++ b/premake5.lua @@ -8,6 +8,11 @@ newoption { description = "Use precompiled kernels" } +newoption { + trigger = "kernelcompile", + description = "Compile kernels used for unit test" +} + function copydir(src_dir, dst_dir, filter, single_dst_dir) if not os.isdir(src_dir) then printError("'%s' is not an existing directory!", src_dir)