Skip to content

Commit

Permalink
add slowtastic memcpy implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
hughperkins committed May 27, 2017
1 parent 1d9e0e8 commit c24f828
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 6 deletions.
51 changes: 47 additions & 4 deletions src/new_instruction_dumper.cpp
Expand Up @@ -909,16 +909,19 @@ void NewInstructionDumper::dumpInsertValue(cocl::LocalValueInfo *localValueInfo)
// return "";
}

void NewInstructionDumper::dumpMemcpyCharCharLong(LocalValueInfo *localValueInfo) {
// this will be slowtastic, but at least it gets things working...
void NewInstructionDumper::dumpMemcpy(LocalValueInfo *localValueInfo, int align) {
// std::string gencode = "";
localValueInfo->clWriter.reset(new NoExpressionClWriter(localValueInfo));
Instruction *instr = cast<Instruction>(localValueInfo->value);
int totalLength = cast<ConstantInt>(instr->getOperand(2))->getSExtValue();
int align = cast<ConstantInt>(instr->getOperand(3))->getSExtValue();
// int align = cast<ConstantInt>(instr->getOperand(3))->getSExtValue();
string dstAddressSpaceStr = typeDumper->dumpAddressSpace(instr->getOperand(0)->getType());
string srcAddressSpaceStr = typeDumper->dumpAddressSpace(instr->getOperand(1)->getType());
string elementTypeString = "";
if(align == 4) {
if(align == 1) {
elementTypeString = "char";
} else if(align == 4) {
elementTypeString = "int";
} else if(align == 8) {
elementTypeString = "int2";
Expand All @@ -942,6 +945,42 @@ void NewInstructionDumper::dumpMemcpyCharCharLong(LocalValueInfo *localValueInfo
// localValueInfo
}

// void NewInstructionDumper::dumpMemcpy(LocalValueInfo *localValueInfo, int align) {
// // std::string gencode = "";
// localValueInfo->clWriter.reset(new NoExpressionClWriter(localValueInfo));
// Instruction *instr = cast<Instruction>(localValueInfo->value);
// int totalLength = cast<ConstantInt>(instr->getOperand(2))->getSExtValue();
// // int align = cast<ConstantInt>(instr->getOperand(3))->getSExtValue();
// // int align = 1;
// string dstAddressSpaceStr = typeDumper->dumpAddressSpace(instr->getOperand(0)->getType());
// string srcAddressSpaceStr = typeDumper->dumpAddressSpace(instr->getOperand(1)->getType());
// string elementTypeString = "";
// if(align == 1) {
// elementTypeString = "char";
// } else if(align == 4) {
// elementTypeString = "int";
// } else if(align == 8) {
// elementTypeString = "int2";
// } else if(align == 16) {
// elementTypeString = "int4";
// } else {
// throw runtime_error("not implemented dumpmemcpy for align " + easycl::toString(align));
// }
// int numElements = totalLength / align;
// if(numElements >1) {
// // localValueInfo->inlineCl.push_back("#pragma unroll");
// localValueInfo->inlineCl.push_back("for(int __i=0; __i < " + easycl::toString(numElements) + "; __i++) {");
// localValueInfo->inlineCl.push_back(" ((" + dstAddressSpaceStr + " " + elementTypeString + " *)" + getOperand(instr->getOperand(0))->getExpr() + ")[__i] = " +
// "((" + srcAddressSpaceStr + " " + elementTypeString + " *)" + getOperand(instr->getOperand(1))->getExpr() + ")[__i]");
// localValueInfo->inlineCl.push_back("}\n");
// } else {
// localValueInfo->inlineCl.push_back("((" + dstAddressSpaceStr + " " + elementTypeString + " *)" + getOperand(instr->getOperand(0))->getExpr() + ")[0] = " +
// "((" + srcAddressSpaceStr + " " + elementTypeString + " *)" + getOperand(instr->getOperand(1))->getExpr() + ")[0]");
// }
// // return gencode;
// // localValueInfo
// }

void NewInstructionDumper::dumpCall(LocalValueInfo *localValueInfo, const std::map<llvm::Function *, llvm::Type *> &returnTypeByFunction) {
localValueInfo->clWriter.reset(new CallClWriter(localValueInfo));
// ClWriter *clWriter = cast<ClWriter>(localValueInfo->clWriter.get());
Expand Down Expand Up @@ -1122,7 +1161,11 @@ void NewInstructionDumper::dumpCall(LocalValueInfo *localValueInfo, const std::m
localValueInfo->setAddressSpace(0);
return;
} else if(functionName == "llvm.memcpy.p0i8.p0i8.i64") {
dumpMemcpyCharCharLong(localValueInfo); // just ignore for now
int align = cast<ConstantInt>(instr->getOperand(3))->getSExtValue();
dumpMemcpy(localValueInfo, align);
return;
} else if(functionName == "_Z6memcpyPvPKvm") {
dumpMemcpy(localValueInfo, 1);
return;
} else if(functionNamesMap->isMappedFunction(functionName)) {
functionName = functionNamesMap->getFunctionMappedName(functionName);
Expand Down
3 changes: 2 additions & 1 deletion src/new_instruction_dumper.h
Expand Up @@ -104,7 +104,8 @@ class NewInstructionDumper {
LocalValueInfo *dumpConstant(llvm::Constant *constant);
void dumpConstantExpr(LocalValueInfo *localValueInfo);
// LocalValueInfo *CreateConstantInfo(Consant *constant);
void dumpMemcpyCharCharLong(LocalValueInfo *localValueInfo);
// void dumpMemcpyCharCharLong(LocalValueInfo *localValueInfo);
void dumpMemcpy(LocalValueInfo *localValueInfo, int align);
void dumpCall(LocalValueInfo *localValueInfo, const std::map<llvm::Function *, llvm::Type *> &returnTypeByFunction);

void runGeneration(LocalValueInfo *localValueInfo, const std::map<llvm::Function *, llvm::Type *> &returnTypeByFunction);
Expand Down
2 changes: 1 addition & 1 deletion test/test_maths.py
@@ -1,4 +1,4 @@
# Copyright Hugh Perkins 2016
# Copyright Hugh Perkins 2016, 2017
"""
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
50 changes: 50 additions & 0 deletions test/test_memcpy.py
@@ -0,0 +1,50 @@
# Copyright Hugh Perkins 2017
"""
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import numpy as np
import pyopencl as cl
import os
import math
import pytest
from test import test_common
from test.test_common import offset_type


def test_memcpy(context, q, int_data, int_data_gpu):
ll_code = """
declare void @_Z6memcpyPvPKvm(i8*, i8*, i64)
define void @mykernel(i32* %data) {
%1 = bitcast i32* %data to i8*
%2 = getelementptr i32, i32* %data, i32 8
%3 = bitcast i32* %2 to i8*
call void @_Z6memcpyPvPKvm(i8 *%3, i8 *%1, i64 32)
ret void
}
"""
cl_code = test_common.ll_to_cl(ll_code, 'mykernel', num_clmems=1)
print('cl_code', cl_code)
for i in range(8):
int_data[i] = 3 + i
cl.enqueue_copy(q, int_data_gpu, int_data)
kernel = test_common.build_kernel(context, cl_code, 'mykernel')
kernel(q, (32,), (32,), int_data_gpu, offset_type(0), cl.LocalMemory(32))
from_gpu = np.copy(int_data)
cl.enqueue_copy(q, from_gpu, int_data_gpu)
q.finish()
for i in range(8):
print(i, from_gpu[8 + i])
assert from_gpu[8 + i] == 3 + i

0 comments on commit c24f828

Please sign in to comment.