diff --git a/src/new_instruction_dumper.cpp b/src/new_instruction_dumper.cpp index af3c0576..bcbbd738 100644 --- a/src/new_instruction_dumper.cpp +++ b/src/new_instruction_dumper.cpp @@ -909,16 +909,19 @@ void NewInstructionDumper::dumpInsertValue(cocl::LocalValueInfo *localValueInfo) // return ""; } -void NewInstructionDumper::dumpMemcpyCharCharLong(LocalValueInfo *localValueInfo) { +// this will be slowtastic, but at least it gets things working... +void NewInstructionDumper::dumpMemcpy(LocalValueInfo *localValueInfo, int align) { // std::string gencode = ""; localValueInfo->clWriter.reset(new NoExpressionClWriter(localValueInfo)); Instruction *instr = cast(localValueInfo->value); int totalLength = cast(instr->getOperand(2))->getSExtValue(); - int align = cast(instr->getOperand(3))->getSExtValue(); + // int align = cast(instr->getOperand(3))->getSExtValue(); string dstAddressSpaceStr = typeDumper->dumpAddressSpace(instr->getOperand(0)->getType()); string srcAddressSpaceStr = typeDumper->dumpAddressSpace(instr->getOperand(1)->getType()); string elementTypeString = ""; - if(align == 4) { + if(align == 1) { + elementTypeString = "char"; + } else if(align == 4) { elementTypeString = "int"; } else if(align == 8) { elementTypeString = "int2"; @@ -942,6 +945,42 @@ void NewInstructionDumper::dumpMemcpyCharCharLong(LocalValueInfo *localValueInfo // localValueInfo } +// void NewInstructionDumper::dumpMemcpy(LocalValueInfo *localValueInfo, int align) { +// // std::string gencode = ""; +// localValueInfo->clWriter.reset(new NoExpressionClWriter(localValueInfo)); +// Instruction *instr = cast(localValueInfo->value); +// int totalLength = cast(instr->getOperand(2))->getSExtValue(); +// // int align = cast(instr->getOperand(3))->getSExtValue(); +// // int align = 1; +// string dstAddressSpaceStr = typeDumper->dumpAddressSpace(instr->getOperand(0)->getType()); +// string srcAddressSpaceStr = typeDumper->dumpAddressSpace(instr->getOperand(1)->getType()); +// string elementTypeString = ""; +// if(align == 1) { +// elementTypeString = "char"; +// } else if(align == 4) { +// elementTypeString = "int"; +// } else if(align == 8) { +// elementTypeString = "int2"; +// } else if(align == 16) { +// elementTypeString = "int4"; +// } else { +// throw runtime_error("not implemented dumpmemcpy for align " + easycl::toString(align)); +// } +// int numElements = totalLength / align; +// if(numElements >1) { +// // localValueInfo->inlineCl.push_back("#pragma unroll"); +// localValueInfo->inlineCl.push_back("for(int __i=0; __i < " + easycl::toString(numElements) + "; __i++) {"); +// localValueInfo->inlineCl.push_back(" ((" + dstAddressSpaceStr + " " + elementTypeString + " *)" + getOperand(instr->getOperand(0))->getExpr() + ")[__i] = " + +// "((" + srcAddressSpaceStr + " " + elementTypeString + " *)" + getOperand(instr->getOperand(1))->getExpr() + ")[__i]"); +// localValueInfo->inlineCl.push_back("}\n"); +// } else { +// localValueInfo->inlineCl.push_back("((" + dstAddressSpaceStr + " " + elementTypeString + " *)" + getOperand(instr->getOperand(0))->getExpr() + ")[0] = " + +// "((" + srcAddressSpaceStr + " " + elementTypeString + " *)" + getOperand(instr->getOperand(1))->getExpr() + ")[0]"); +// } +// // return gencode; +// // localValueInfo +// } + void NewInstructionDumper::dumpCall(LocalValueInfo *localValueInfo, const std::map &returnTypeByFunction) { localValueInfo->clWriter.reset(new CallClWriter(localValueInfo)); // ClWriter *clWriter = cast(localValueInfo->clWriter.get()); @@ -1122,7 +1161,11 @@ void NewInstructionDumper::dumpCall(LocalValueInfo *localValueInfo, const std::m localValueInfo->setAddressSpace(0); return; } else if(functionName == "llvm.memcpy.p0i8.p0i8.i64") { - dumpMemcpyCharCharLong(localValueInfo); // just ignore for now + int align = cast(instr->getOperand(3))->getSExtValue(); + dumpMemcpy(localValueInfo, align); + return; + } else if(functionName == "_Z6memcpyPvPKvm") { + dumpMemcpy(localValueInfo, 1); return; } else if(functionNamesMap->isMappedFunction(functionName)) { functionName = functionNamesMap->getFunctionMappedName(functionName); diff --git a/src/new_instruction_dumper.h b/src/new_instruction_dumper.h index fb0618fe..00a5f79f 100644 --- a/src/new_instruction_dumper.h +++ b/src/new_instruction_dumper.h @@ -104,7 +104,8 @@ class NewInstructionDumper { LocalValueInfo *dumpConstant(llvm::Constant *constant); void dumpConstantExpr(LocalValueInfo *localValueInfo); // LocalValueInfo *CreateConstantInfo(Consant *constant); - void dumpMemcpyCharCharLong(LocalValueInfo *localValueInfo); + // void dumpMemcpyCharCharLong(LocalValueInfo *localValueInfo); + void dumpMemcpy(LocalValueInfo *localValueInfo, int align); void dumpCall(LocalValueInfo *localValueInfo, const std::map &returnTypeByFunction); void runGeneration(LocalValueInfo *localValueInfo, const std::map &returnTypeByFunction); diff --git a/test/test_maths.py b/test/test_maths.py index 1cb0be78..ad7f6808 100644 --- a/test/test_maths.py +++ b/test/test_maths.py @@ -1,4 +1,4 @@ -# Copyright Hugh Perkins 2016 +# Copyright Hugh Perkins 2016, 2017 """ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/test_memcpy.py b/test/test_memcpy.py new file mode 100644 index 00000000..656685b9 --- /dev/null +++ b/test/test_memcpy.py @@ -0,0 +1,50 @@ +# Copyright Hugh Perkins 2017 +""" +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import numpy as np +import pyopencl as cl +import os +import math +import pytest +from test import test_common +from test.test_common import offset_type + + +def test_memcpy(context, q, int_data, int_data_gpu): + ll_code = """ +declare void @_Z6memcpyPvPKvm(i8*, i8*, i64) + +define void @mykernel(i32* %data) { + %1 = bitcast i32* %data to i8* + + %2 = getelementptr i32, i32* %data, i32 8 + %3 = bitcast i32* %2 to i8* + + call void @_Z6memcpyPvPKvm(i8 *%3, i8 *%1, i64 32) + ret void +} +""" + cl_code = test_common.ll_to_cl(ll_code, 'mykernel', num_clmems=1) + print('cl_code', cl_code) + for i in range(8): + int_data[i] = 3 + i + cl.enqueue_copy(q, int_data_gpu, int_data) + kernel = test_common.build_kernel(context, cl_code, 'mykernel') + kernel(q, (32,), (32,), int_data_gpu, offset_type(0), cl.LocalMemory(32)) + from_gpu = np.copy(int_data) + cl.enqueue_copy(q, from_gpu, int_data_gpu) + q.finish() + for i in range(8): + print(i, from_gpu[8 + i]) + assert from_gpu[8 + i] == 3 + i