Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/oro 0 only bundle #50

Merged
merged 3 commits into from
Mar 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Orochi/Orochi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,9 @@ oroError OROAPI oroGetDeviceProperties(oroDeviceProp* props, oroDevice dev)
e = cuDeviceGetAttribute( &props->computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, deviceId );
e = cuDeviceGetAttribute( &props->concurrentKernels, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, deviceId );
e = cuDeviceGetAttribute( &props->ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, deviceId );
e = cuDeviceGetAttribute( &props->major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, deviceId );
e = cuDeviceGetAttribute( &props->minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, deviceId );

return oroSuccess;
}
return oroErrorUnknown;
Expand Down
2 changes: 2 additions & 0 deletions UnitTest/bitcodes/generate_bitcodes.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
call hipcc --cuda-device-only --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ../moduleTestKernel.cpp
call hipcc --cuda-device-only --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ../moduleTestFunc.cpp
28 changes: 0 additions & 28 deletions UnitTest/bitcodes/generate_bitcodes.sh
Original file line number Diff line number Diff line change
@@ -1,30 +1,2 @@
hipcc --cuda-device-only --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ../moduleTestKernel.cpp

hipcc --cuda-device-only --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ../moduleTestFunc.cpp
hipcc --offload-arch=gfx1030 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1031 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1032 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1033 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1034 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1035 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1036 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1010 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1011 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1012 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1013 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx900 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx906 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only

hipcc --offload-arch=gfx1030 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1031 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1032 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1033 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1034 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1035 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1036 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1010 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1011 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1012 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1013 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx900 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx906 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
7 changes: 0 additions & 7 deletions UnitTest/bitcodes/generate_bitcodes_gfx1100.bat

This file was deleted.

6 changes: 0 additions & 6 deletions UnitTest/bitcodes/generate_bitcodes_gfx1100.sh

This file was deleted.

7 changes: 0 additions & 7 deletions UnitTest/bitcodes/generate_bitcodes_gfx1102.bat

This file was deleted.

6 changes: 0 additions & 6 deletions UnitTest/bitcodes/generate_bitcodes_gfx1102.sh

This file was deleted.

2 changes: 2 additions & 0 deletions UnitTest/bitcodes/generate_bitcodes_nvidia.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
nvcc -x cu -fatbin --device-c -arch=all ../moduleTestFunc.cpp
nvcc -x cu -fatbin --device-c -arch=all ../moduleTestKernel.cpp
6 changes: 2 additions & 4 deletions UnitTest/bitcodes/generate_bitcodes_nvidia.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
nvcc -arch=compute_80 -code="sm_80,sm_86,sm_87" -fatbin --device-c ../moduleTestFunc.cu
nvcc -arch=compute_80 -code="sm_80,sm_86,sm_87" -fatbin --device-c ../moduleTestKernel.cu
nvcc -cubin --device-c -arch=sm_80 ../moduleTestFunc.cu
nvcc -cubin --device-c -arch=sm_80 ../moduleTestKernel.cu
nvcc -fatbin --device-c -arch=all ../moduleTestFunc.cu
nvcc -fatbin --device-c -arch=all ../moduleTestKernel.cu
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file removed UnitTest/bitcodes/moduleTestFunc.cubin
Binary file not shown.
Binary file removed UnitTest/bitcodes/moduleTestFunc.fatbin
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file removed UnitTest/bitcodes/moduleTestKernel.cubin
Binary file not shown.
Binary file removed UnitTest/bitcodes/moduleTestKernel.fatbin
Binary file not shown.
23 changes: 14 additions & 9 deletions UnitTest/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ void loadFile( const char* path, std::vector<char>& dst )
f.close();
}
}

#if 0
TEST_F( OroTestBase, linkBc )
{
oroDeviceProp props;
Expand Down Expand Up @@ -228,7 +228,7 @@ TEST_F( OroTestBase, linkBc )
ORORTCCHECK( oroModuleUnload( module ) );
}
}

#endif
TEST_F( OroTestBase, link )
{
oroDeviceProp props;
Expand All @@ -237,8 +237,10 @@ TEST_F( OroTestBase, link )
std::vector<char> data1;
const bool isAmd = oroGetCurAPI( 0 ) == ORO_API_HIP;

std::string arch = "-arch=sm_" + std::to_string( props.major ) + std::string( "0" );

std::vector<const char*> opts = isAmd ? std::vector<const char *>({ "-fgpu-rdc", "-c", "--cuda-device-only" })
: std::vector<const char *>({ "--device-c", "-arch=sm_80" });
: std::vector<const char *>({ "--device-c", arch.c_str() });
{
std::string code;
OrochiUtils::readSourceCode( "../UnitTest/moduleTestKernel.h", code );
Expand Down Expand Up @@ -310,7 +312,7 @@ TEST_F( OroTestBase, link )
ORORTCCHECK( oroModuleUnload( module ) );
}
}

#if 0
TEST_F( OroTestBase, link_addFile )
{
oroDeviceProp props;
Expand Down Expand Up @@ -385,6 +387,7 @@ TEST_F( OroTestBase, link_addFile )
ORORTCCHECK( oroModuleUnload( module ) );
}
}
#endif

TEST_F( OroTestBase, link_null_name )
{
Expand All @@ -393,9 +396,9 @@ TEST_F( OroTestBase, link_null_name )
std::vector<char> data0;
std::vector<char> data1;
const bool isAmd = oroGetCurAPI( 0 ) == ORO_API_HIP;

std::string arch = "-arch=sm_" + std::to_string( props.major ) + std::string( "0" );
std::vector<const char*> opts = isAmd ? std::vector<const char *>({ "-fgpu-rdc", "-c", "--cuda-device-only" })
: std::vector<const char *>({ "--device-c", "-arch=sm_80" });
: std::vector<const char *>({ "--device-c", arch.c_str() });
{
std::string code;
OrochiUtils::readSourceCode( "../UnitTest/moduleTestKernel.h", code );
Expand Down Expand Up @@ -526,14 +529,14 @@ TEST_F( OroTestBase, link_bundledBc_with_bc )
std::vector<char> data0;
std::vector<char> data1;
const bool isAmd = oroGetCurAPI( 0 ) == ORO_API_HIP;

std::string arch = "-arch=sm_" + std::to_string( props.major ) + std::string( "0" );
{
std::string bcFile = isAmd ? "../UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa.bc" : "../UnitTest/bitcodes/moduleTestFunc.fatbin";
loadFile( bcFile.c_str(), data1 );
}
{
std::vector<const char*> opts = isAmd ? std::vector<const char *>({ "-fgpu-rdc", "-c", "--cuda-device-only" })
: std::vector<const char *>({ "--device-c", "-arch=sm_80" });
: std::vector<const char *>({ "--device-c", arch.c_str() });
std::string code;
OrochiUtils::readSourceCode( "../UnitTest/moduleTestKernel.h", code );
OrochiUtils::getData( m_device, code.c_str(), "../UnitTest/moduleTestKernel.h", &opts, data0 );
Expand Down Expand Up @@ -607,6 +610,7 @@ TEST_F( OroTestBase, link_bundledBc_with_bc_loweredName )
std::vector<char> data0;
std::vector<char> data1;
const bool isAmd = oroGetCurAPI( 0 ) == ORO_API_HIP;
std::string arch = "-arch=sm_" + std::to_string( props.major ) + std::string( "0" );
const char* funcName = "testKernel<0>";
std::string loweredNameStr;
orortcProgram prog;
Expand All @@ -616,7 +620,8 @@ TEST_F( OroTestBase, link_bundledBc_with_bc_loweredName )
loadFile( bcFile.c_str(), data1 );
}
{
std::vector<const char*> opts = isAmd ? std::vector<const char*>( { "-fgpu-rdc", "-c", "--cuda-device-only" } ) : std::vector<const char*>( { "--device-c", "-arch=sm_80" } );
std::vector<const char*> opts = isAmd ? std::vector<const char*>( { "-fgpu-rdc", "-c", "--cuda-device-only" } )
: std::vector<const char*>( { "--device-c", arch.c_str() } );
std::string code;

OrochiUtils::readSourceCode( "../UnitTest/moduleTestKernel_loweredName.h", code );
Expand Down
2 changes: 2 additions & 0 deletions UnitTest/moduleTestFunc.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#if !defined( __CUDACC__ )
#include <hip/hip_runtime.h>
#endif

__device__ void setInfo( int *x )
{
Expand Down
5 changes: 0 additions & 5 deletions UnitTest/moduleTestFunc.cu

This file was deleted.

6 changes: 0 additions & 6 deletions UnitTest/moduleTestKernel.cu

This file was deleted.

4 changes: 4 additions & 0 deletions UnitTest/premake5.lua
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ project "Unittest"
files { "../contrib/gtest-1.6.0/gtest-all.cc" }
sysincludedirs{ "../contrib/gtest-1.6.0/" }
defines { "GTEST_HAS_TR1_TUPLE=0" }
if _OPTIONS["kernelcompile"] then
os.execute( "cd ./bitcodes/ && generate_bitcodes.bat" )
os.execute( "cd ./bitcodes/ && generate_bitcodes_nvidia.bat" )
end
4 changes: 2 additions & 2 deletions contrib/hipew/src/hipew.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,8 +480,8 @@ void hipewInit( int* resultDriver, int* resultRtc, hipuint32_t flags )
_LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetProgramLog );
_LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetProgramLogSize );
_LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetCode );
// _LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetBitcodeSize );
// _LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetBitcode );
_LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetBitcodeSize );
_LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetBitcode );
_LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetCodeSize );
_LIBRARY_FIND_CHECKED( rtcLib, hiprtcLinkCreate );
_LIBRARY_FIND_CHECKED( rtcLib, hiprtcLinkAddFile );
Expand Down
5 changes: 5 additions & 0 deletions premake5.lua
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ newoption {
description = "Use precompiled kernels"
}

newoption {
trigger = "kernelcompile",
description = "Compile kernels used for unit test"
}

function copydir(src_dir, dst_dir, filter, single_dst_dir)
if not os.isdir(src_dir) then
printError("'%s' is not an existing directory!", src_dir)
Expand Down