From eade62da547ab549c0c8364df111f18b6e540fef Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Sat, 27 May 2017 12:39:24 +0100 Subject: [PATCH] finally come up with a principled way of detecting byvalued-ness --- src/patch_hostside.cpp | 95 +++++++++++++++++++++++++++----------- src/patch_hostside.h | 5 +- test/tf/random_op_gpu.meta | 24 ++++++++++ 3 files changed, 97 insertions(+), 27 deletions(-) create mode 100644 test/tf/random_op_gpu.meta diff --git a/src/patch_hostside.cpp b/src/patch_hostside.cpp index 68e8299e..ae9242d5 100644 --- a/src/patch_hostside.cpp +++ b/src/patch_hostside.cpp @@ -344,7 +344,7 @@ void addMetadata(Instruction *value, string tag) { value->setMetadata(tag, mdnode); } -llvm::Instruction *PatchHostside::addSetKernelArgInst_byvaluestruct(llvm::Instruction *lastInst, llvm::Value *structPointer) { +llvm::Instruction *PatchHostside::addSetKernelArgInst_byvaluestruct(llvm::Instruction *lastInst, ParamInfo *paramInfo, llvm::Value *structPointer) { // // what this is going to do is: // - create a call to pass the hostside buffer to hostside_opencl_funcs, at runtime @@ -359,8 +359,35 @@ llvm::Instruction *PatchHostside::addSetKernelArgInst_byvaluestruct(llvm::Instru PointerType *structPointerType = cast(structPointer->getType()); // structPointerType->dump(); - // indentor << isa(structPointer->getType()) << endl; - // structPointerType->getPointerElementType()->dump(); + indentor << "structPointerType is pointer? " << isa(structPointerType) << endl; + indentor << "isa struct pointer? " << isa(structPointer->getType()) << endl; + + if(!isa(structPointerType->getPointerElementType())) { + indentor << "doesnt actually point to a struct, currenlty, in hostside bytecode => adding a bitcast" << endl; + // paramInfo->typeDevicesideFn + // string deviceSideTypeName = paramInfo->typeDevicesideFn->getName().str(): + // indentor << "deviceSideTypeName: " << deviceSideTypeName << endl; + paramInfo->typeDevicesideFn->dump(); + indentor << "dump type on this: " << typeDumper.dumpType(paramInfo->typeDevicesideFn) << endl; + // BitCastInst *bitcast = new BitCastInst(structPointer, paramInfo->typeDevicesideFn, 0); + // PointerType *pointerStruct = + BitCastInst *bitcast = new BitCastInst(structPointer, paramInfo->typeDevicesideFn); + bitcast->insertAfter(lastInst); + bitcast->dump(); + indentor << "after creating bitcast" << endl; + structPointer = bitcast; + lastInst = bitcast; + + structPointerType = cast(structPointer->getType()); + indentor << "structPointerType is pointer? " << isa(structPointerType) << endl; + indentor << "isa struct pointer? " << isa(structPointer->getType()) << endl; + } + + indentor << "addSetKernelArgInst_byvaluestruct structPointerType->dump():" << endl; + structPointerType->dump(); + indentor << "addSetKernelArgInst_byvaluestruct structPointerType->getPointerElementType()->dump():" << endl; + structPointerType->getPointerElementType()->dump(); + StructType *structType = cast(structPointerType->getPointerElementType()); // outs() << "structType:\n"; // structType->dump(); @@ -451,19 +478,30 @@ llvm::Instruction *PatchHostside::addSetKernelArgInst(llvm::Instruction *lastIns This figures out the type of the argument, and dispatches appropriately. All arguments pass through this function, including primitives, pointers, structs, ... */ - Indentor indentor; // Type *typeHostsideFn = paramInfo->typeHostsideFn; Value *value = paramInfo->value; Value *valueAsPointerInstr = paramInfo->pointer; + + Indentor indentor_; + indentor_ << "addSetKErnelArgInst arg=" << paramInfo->paramIndex << endl; + + Indentor indentor; + // int size = paramInfo->size; // bool definitelyNotAPointer = paramInfo->size != 4 && paramInfo->size != 8; // bool couldBePrimitiveInt = paramInfo->size == 4 || paramInfo->size == 8; // bool couldBePrimitiveFloat = paramInfo->size == 4 || paramInfo->size == 8; // bool couldBePointer = paramInfo->size == 4 || paramInfo->size == 8; - bool clearlyAByValueStruct = paramInfo->size > 8; + // bool clearlyAByValueStruct = paramInfo->size > 8; // bool definitelyNotAPointer = false; // indentor << "size=" << paramInfo->size << endl; - if(clearlyAByValueStruct) { + + Type *devicesideType = paramInfo->typeDevicesideFn; + + if(paramInfo->devicesideByVal && + isa(devicesideType) && + isa(devicesideType->getPointerElementType())) { + indentor << "deviceside is by value struct, so we will pass by value, as a struct" << endl; // indentor << "clearly a by value struct" << endl; // value->dump(); // valueAsPointerInstr->dump(); @@ -479,7 +517,7 @@ llvm::Instruction *PatchHostside::addSetKernelArgInst(llvm::Instruction *lastIns } // lastInst = PatchHostside::addSetKernelArgInst_byvaluestruct(lastInst, value); - lastInst = PatchHostside::addSetKernelArgInst_byvaluestruct(lastInst, valueAsPointerInstr); + lastInst = PatchHostside::addSetKernelArgInst_byvaluestruct(lastInst, paramInfo, valueAsPointerInstr); } else if(IntegerType *intType = dyn_cast(value->getType())) { indentor << "addSetKernelArgInst: int primitive" << endl; lastInst = PatchHostside::addSetKernelArgInst_int(lastInst, value, intType); @@ -502,7 +540,7 @@ llvm::Instruction *PatchHostside::addSetKernelArgInst(llvm::Instruction *lastIns } else if(isa(value->getType())) { // cout << "structtype" << endl; indentor << "addSetKernelArgInst: byvalue struct" << endl; - lastInst = PatchHostside::addSetKernelArgInst_byvaluestruct(lastInst, valueAsPointerInstr); + lastInst = PatchHostside::addSetKernelArgInst_byvaluestruct(lastInst, paramInfo, valueAsPointerInstr); } else if(isa(value->getType())) { // cout << "got vector arg type" << endl; // value->dump(); @@ -608,24 +646,29 @@ void PatchHostside::getLaunchTypes( break; } indentor << "params[" << i << "]" << endl; - ParamInfo *paramInfo = info->params[i].get(); - - paramInfo->hostsideArg = hostsideArgs[i]; - paramInfo->devicesideArg = devicesideArgs[i]; - paramInfo->hostsideByVal = paramInfo->hostsideArg->hasByValAttr(); - paramInfo->devicesideByVal = paramInfo->devicesideArg->hasByValAttr(); - - indentor << "hostside byval " << paramInfo->hostsideArg->hasByValAttr() << endl; - indentor << "deviceside byval " << paramInfo->devicesideArg->hasByValAttr() << endl; - // indentor << "hostsidearg" << endl; - // hostsideArg->dump(); - // indentor << "devicesideArg" << endl; - // devicesideArg->dump(); - - paramInfo->typeHostsideFn = typeHostsideFn; - paramInfo->typeDevicesideFn = deviceFnType->getParamType(i); - indentor << " hostside type " << typeDumper.dumpType(paramInfo->typeHostsideFn) << endl; - indentor << " deviceside type " << typeDumper.dumpType(paramInfo->typeDevicesideFn) << endl; + { + Indentor indentor; + ParamInfo *paramInfo = info->params[i].get(); + + paramInfo->paramIndex = i; + + paramInfo->hostsideArg = hostsideArgs[i]; + paramInfo->devicesideArg = devicesideArgs[i]; + paramInfo->hostsideByVal = paramInfo->hostsideArg->hasByValAttr(); + paramInfo->devicesideByVal = paramInfo->devicesideArg->hasByValAttr(); + + indentor << "hostside byval " << paramInfo->hostsideArg->hasByValAttr() << endl; + indentor << "deviceside byval " << paramInfo->devicesideArg->hasByValAttr() << endl; + // indentor << "hostsidearg" << endl; + // hostsideArg->dump(); + // indentor << "devicesideArg" << endl; + // devicesideArg->dump(); + + paramInfo->typeHostsideFn = typeHostsideFn; + paramInfo->typeDevicesideFn = deviceFnType->getParamType(i); + indentor << "hostside type " << typeDumper.dumpType(paramInfo->typeHostsideFn) << endl; + indentor << "deviceside type " << typeDumper.dumpType(paramInfo->typeDevicesideFn) << endl; + } i++; } } diff --git a/src/patch_hostside.h b/src/patch_hostside.h index 3ad69248..4758029b 100644 --- a/src/patch_hostside.h +++ b/src/patch_hostside.h @@ -87,6 +87,8 @@ class ParamInfo { llvm::Argument *devicesideArg = 0; int size = 0; // from CudaLaunch function call declaration + + int paramIndex = 0; // in the original function, nto in our hacked around kernel function }; // noncopyable(const noncopyable&) =delete; @@ -197,7 +199,8 @@ class PatchHostside { // // this patch_hostside addSetKernelArgInst_byvaluestruct function is going to handle walking the struct, and sending // the other pointers through using additional method calls, likely to setKernelArgGpuBuffer - static llvm::Instruction *addSetKernelArgInst_byvaluestruct(llvm::Instruction *lastInst, llvm::Value *valueAsPointerInstr); + static llvm::Instruction *addSetKernelArgInst_byvaluestruct( + llvm::Instruction *lastInst, cocl::ParamInfo *paramInfo, llvm::Value *valueAsPointerInstr); // this needs to do the same as addSetKernelArgInst_byvaluestruct , but it passes the struct pointer into the // setKernelArgGpuBuffer function, rather than the setKernelArgHostsideBuffer diff --git a/test/tf/random_op_gpu.meta b/test/tf/random_op_gpu.meta new file mode 100644 index 00000000..b7b4205b --- /dev/null +++ b/test/tf/random_op_gpu.meta @@ -0,0 +1,24 @@ +kernel: _ZN10tensorflow7functor28FillPhiloxRandomKernelLaunchINS_6random27TruncatedNormalDistributionINS2_19SingleSampleAdapterINS2_12PhiloxRandomEEEfEEEEvS5_PNT_17ResultElementTypeExS8_ + +in device-noopt: +``` +define void @_ZN10tensorflow7functor28FillPhiloxRandomKernelLaunchINS_6random27TruncatedNormalDistributionINS2_19SingleSampleAdapterINS2_12PhiloxRandomEEEfEEEEvS5_PNT_17ResultElementTypeExS8_( + %"class.tensorflow::random::PhiloxRandom"* byval nocapture readonly align 4, + float* nocapture, + i64, + %"class.tensorflow::random::TruncatedNormalDistribution"* byval nocapture readonly align 4 +) local_unnamed_addr #1 comdat { +``` + +in hostraw: +``` +define linkonce_odr void @_ZN10tensorflow7functor28FillPhiloxRandomKernelLaunchINS_6random27TruncatedNormalDistributionINS2_19SingleSampleAdapterINS2_12PhiloxRandomEEEfEEEEvS5_PNT_17ResultElementTypeExS8_( + %"class.tensorflow::random::PhiloxRandom"* byval align 8, + float*, + i64, + float) #0 { +``` + +And the original launch call, in hostraw: +``` +```