always use normalized sampler to read image3d with literal sampler (#…

…1215) We cannot use unormalized sampled with 3D images. This commit intend to fix that for literal sampler (static sampler defined in the kernel). When detecting a case of reading a image3d with a literal sampler using unnormalized coordinate: - create a equivalent sampler using normalized coordinate - normalized the coordinate Ref #1202
google · Sep 7, 2023 · 3e3c954 · 3e3c954
1 parent 6bfb8fb
commit 3e3c954
Show file tree

Hide file tree

Showing 11 changed files with 286 additions and 30 deletions.
diff --git a/docs/OpenCLCOnVulkan.md b/docs/OpenCLCOnVulkan.md
@@ -823,6 +823,13 @@ describe:
 
 All supported.
 
+Reading 3D images with a unormalized sampler is not allowed in Vulkan. Thus
+OpenCL source code performing this operation is emulated by normalizing the
+coordinate before reading the image using a normalized sampler.
+
+The normalization of the coordinate can introduce an accuracy issue as the
+floating point division used is not correctly rounded.
+
 #### cl_khr_subgroups extension
 
 The OpenCL extension `cl_khr_subgroups` requires SPIR-V 1.3 or greater and

diff --git a/lib/BuiltinsEnum.h b/lib/BuiltinsEnum.h
@@ -29,6 +29,7 @@ enum BuiltinType : unsigned int {
   kSpirvCopyMemory,
   kClspvSamplerVarLiteral,
   kClspvCompositeConstruct,
+  kClspvGetImageSizes,
   kType_Clspv_End,
 
   kType_Async_Start,

diff --git a/lib/BuiltinsMap.inc b/lib/BuiltinsMap.inc
@@ -1117,6 +1117,7 @@ static std::unordered_map<const char *, Builtins::BuiltinType, cstr_hash,
         {"clspv.sampler_var_literal", Builtins::kClspvSamplerVarLiteral},
         {"clspv.composite_construct", Builtins::kClspvCompositeConstruct},
 
+        {"clspv.get_image_sizes", Builtins::kClspvGetImageSizes},
 };
 
 #endif // CLSPV_LIB_BUILTINSMAP_INC_
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
@@ -77,6 +77,7 @@ add_library(clspv_passes OBJECT
   ${CMAKE_CURRENT_SOURCE_DIR}/ReplacePointerBitcastPass.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/RewriteInsertsPass.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/RewritePackedStructs.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/SamplerUtils.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/ScalarizePass.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/ShareModuleScopeVariables.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/SignedCompareFixupPass.cpp

diff --git a/lib/ReplaceOpenCLBuiltinPass.cpp b/lib/ReplaceOpenCLBuiltinPass.cpp
@@ -34,12 +34,14 @@
 
 #include "clspv/AddressSpace.h"
 #include "clspv/Option.h"
+#include "clspv/Sampler.h"
 
 #include "Builtins.h"
 #include "Constants.h"
 #include "MemFence.h"
 #include "ReplaceOpenCLBuiltinPass.h"
 #include "SPIRVOp.h"
+#include "SamplerUtils.h"
 #include "Types.h"
 
 using namespace clspv;
@@ -387,6 +389,9 @@ PreservedAnalyses ReplaceOpenCLBuiltinPass::run(Module &M,
       func_list.push_front(&F);
     }
   }
+
+  removeUnusedSamplers(M);
+
   if (func_list.size() != 0) {
     // recursively convert functions, but first remove dead
     for (auto *F : func_list) {
@@ -400,6 +405,23 @@ PreservedAnalyses ReplaceOpenCLBuiltinPass::run(Module &M,
   return PA;
 }
 
+void ReplaceOpenCLBuiltinPass::removeUnusedSamplers(Module &M) {
+  auto F = M.getFunction(clspv::TranslateSamplerInitializerFunction());
+  if (!F) {
+    return;
+  }
+  SmallVector<CallInst *> ToErase;
+  for (auto U : F->users()) {
+    auto Call = dyn_cast<CallInst>(U);
+    if (Call != nullptr && Call->getNumUses() == 0) {
+      ToErase.push_back(Call);
+    }
+  }
+  for (auto Call : ToErase) {
+    Call->eraseFromParent();
+  }
+}
+
 bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
   auto &FI = Builtins::Lookup(&F);
   auto builtin = FI.getType();
@@ -713,9 +735,8 @@ bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
   case Builtins::kReadImagef:
   case Builtins::kReadImagei:
   case Builtins::kReadImageui: {
-    if (FI.getParameter(1).isSampler() &&
-        FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
-      return replaceSampledReadImageWithIntCoords(F);
+    if (FI.getParameter(1).isSampler()) {
+      return replaceSampledReadImage(F);
     }
     break;
   }
@@ -3271,44 +3292,84 @@ bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Function &F) {
   });
 }
 
-bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(
-    Function &F) {
-  // convert read_image with int coords to float coords
+bool ReplaceOpenCLBuiltinPass::replaceSampledReadImage(Function &F) {
   Module &M = *F.getParent();
   return replaceCallsWithValue(F, [&](CallInst *CI) {
+    bool changed = false;
+    auto &FI = Builtins::Lookup(&F);
     // The image.
-    auto Arg0 = CI->getOperand(0);
+    auto Img = CI->getOperand(0);
 
     // The sampler.
-    auto Arg1 = CI->getOperand(1);
+    auto Sampler = CI->getOperand(1);
 
-    // The coordinate (integer type that we can't handle).
-    auto Arg2 = CI->getOperand(2);
+    // The coordinate
+    auto Coord = CI->getOperand(2);
 
-    auto *image_ty = InferType(Arg0, M.getContext(), &InferredTypeCache);
-    uint32_t dim = clspv::ImageNumDimensions(image_ty);
-    uint32_t components = dim + (clspv::IsArrayImageType(image_ty) ? 1 : 0);
-    Type *float_ty = nullptr;
-    if (components == 1) {
-      float_ty = Type::getFloatTy(M.getContext());
-    } else {
-      float_ty = FixedVectorType::get(Type::getFloatTy(M.getContext()),
-                                      cast<VectorType>(Arg2->getType())
-                                          ->getElementCount()
-                                          .getKnownMinValue());
-    }
+    FunctionType *NewFType = F.getFunctionType();
+    std::string NewFName = F.getName().str();
 
-    auto NewFType = FunctionType::get(
-        CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty}, false);
+    auto *image_ty = InferType(Img, M.getContext(), &InferredTypeCache);
+    if (FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
+      uint32_t dim = clspv::ImageNumDimensions(image_ty);
+      uint32_t components = dim + (clspv::IsArrayImageType(image_ty) ? 1 : 0);
+      Type *float_ty = nullptr;
+      if (components == 1) {
+        float_ty = Type::getFloatTy(M.getContext());
+      } else {
+        float_ty = FixedVectorType::get(Type::getFloatTy(M.getContext()),
+                                        cast<VectorType>(Coord->getType())
+                                            ->getElementCount()
+                                            .getKnownMinValue());
+      }
 
-    std::string NewFName = F.getName().str();
-    NewFName[NewFName.length() - 1] = 'f';
+      Coord = CastInst::Create(Instruction::SIToFP, Coord, float_ty, "", CI);
+      NewFType = FunctionType::get(
+          CI->getType(), {Img->getType(), Sampler->getType(), float_ty}, false);
+      NewFName[NewFName.length() - 1] = 'f';
+      changed = true;
+    }
 
-    auto NewF = M.getOrInsertFunction(NewFName, NewFType);
+    FunctionCallee SamplerFct;
+    uint64_t SamplerInitValue;
+    auto IsUnnormalizedStaticSampler = [&Sampler, &SamplerFct,
+                                        &SamplerInitValue, &M]() {
+      auto SamplerCall = dyn_cast<CallInst>(Sampler);
+      if (SamplerCall == nullptr ||
+          !SamplerCall->getCalledFunction()->getName().contains(
+              TranslateSamplerInitializerFunction())) {
+        return false;
+      }
+      SamplerFct =
+          M.getOrInsertFunction(SamplerCall->getCalledFunction()->getName(),
+                                SamplerCall->getFunctionType());
+      auto cst = dyn_cast<ConstantInt>(SamplerCall->getOperand(0));
+      if (cst == nullptr) {
+        return false;
+      }
+      SamplerInitValue = cst->getZExtValue();
+      return (SamplerInitValue & kSamplerNormalizedCoordsMask) ==
+             CLK_NORMALIZED_COORDS_FALSE;
+    };
+    if (clspv::ImageDimensionality(image_ty) == spv::Dim3D &&
+        IsUnnormalizedStaticSampler()) {
+      IRBuilder<> B(CI);
+      // normalized coordinate
+      Coord = NormalizedCoordinate(M, B, Coord, Img, image_ty);
+      // copy the sampler but using normalized coordinate
+      Sampler = CallInst::Create(
+          SamplerFct,
+          {B.getInt32(SamplerInitValue | CLK_NORMALIZED_COORDS_TRUE)}, "",
+          dyn_cast<Instruction>(Sampler));
+      changed = true;
+    }
 
-    auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
+    if (!changed) {
+      return (CallInst *)nullptr;
+    }
 
-    return CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
+    auto NewF = M.getOrInsertFunction(NewFName, NewFType);
+    return CallInst::Create(NewF, {Img, Sampler, Coord}, "", CI);
   });
 }
 

diff --git a/lib/ReplaceOpenCLBuiltinPass.h b/lib/ReplaceOpenCLBuiltinPass.h
@@ -34,6 +34,7 @@ struct ReplaceOpenCLBuiltinPass
 
 private:
   bool runOnFunction(llvm::Function &F);
+  void removeUnusedSamplers(llvm::Module &M);
   bool replaceAbs(llvm::Function &F);
   bool replaceAbsDiff(llvm::Function &F, bool is_signed);
   bool replaceCopysign(llvm::Function &F);
@@ -86,7 +87,7 @@ struct ReplaceOpenCLBuiltinPass
   bool replaceVstoreHalf16(llvm::Function &F);
   bool replaceHalfReadImage(llvm::Function &F);
   bool replaceHalfWriteImage(llvm::Function &F);
-  bool replaceSampledReadImageWithIntCoords(llvm::Function &F);
+  bool replaceSampledReadImage(llvm::Function &F);
   bool replaceAtomics(llvm::Function &F, spv::Op Op);
   bool replaceAtomics(llvm::Function &F, llvm::AtomicRMWInst::BinOp Op);
   bool replaceAtomicLoad(llvm::Function &F);

diff --git a/lib/SPIRVProducerPass.cpp b/lib/SPIRVProducerPass.cpp
@@ -3667,6 +3667,37 @@ SPIRVProducerPassImpl::GenerateClspvInstruction(CallInst *Call,
     }
     break;
   }
+  case Builtins::kClspvGetImageSizes: {
+    addCapability(spv::CapabilityImageQuery);
+    Value *Image = Call->getArgOperand(0);
+    auto *ImageTy = InferType(Image, module->getContext(), &InferredTypeCache);
+    if (ImageDimensionality(ImageTy) != spv::Dim3D ||
+        !IsSampledImageType(ImageTy)) {
+      llvm_unreachable("Unexpected Image in Builtins::kClspvGetImageSizes");
+    }
+
+    SPIRVOperandVec Ops;
+
+    Ops << getSPIRVType(
+               FixedVectorType::get(Type::getInt32Ty(module->getContext()), 3))
+        << Image << getSPIRVInt32Constant(0);
+    RID = addSPIRVInst(spv::OpImageQuerySizeLod, Ops);
+
+    Ops.clear();
+    auto int4Ty =
+        FixedVectorType::get(Type::getInt32Ty(module->getContext()), 4);
+    Ops << getSPIRVType(int4Ty) << RID
+        << getSPIRVConstant(ConstantInt::get(int4Ty, (uint64_t)1)) << 0 << 1
+        << 2 << 4;
+    RID = addSPIRVInst(spv::OpVectorShuffle, Ops);
+
+    Ops.clear();
+    Ops << getSPIRVType(
+               FixedVectorType::get(Type::getFloatTy(module->getContext()), 4))
+        << RID;
+    RID = addSPIRVInst(spv::OpConvertUToF, Ops);
+    break;
+  }
   default:
     llvm_unreachable("Unknown CLSPV Instruction");
     break;

diff --git a/lib/SamplerUtils.cpp b/lib/SamplerUtils.cpp
@@ -0,0 +1,31 @@
+// Copyright 2023 The Clspv Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "SamplerUtils.h"
+
+using namespace llvm;
+
+namespace clspv {
+
+Value *NormalizedCoordinate(Module &M, IRBuilder<> &B, Value *Coord, Value *Img,
+                            Type *ImgTy) {
+  auto getImageSizesFct = M.getOrInsertFunction(
+      "clspv.get_image_sizes",
+      FunctionType::get(Coord->getType(), {ImgTy}, false));
+  Value *ImgSizes = B.CreateCall(getImageSizesFct, {Img});
+
+  return B.CreateFDiv(Coord, ImgSizes);
+}
+
+} // namespace clspv
diff --git a/lib/SamplerUtils.h b/lib/SamplerUtils.h
@@ -0,0 +1,29 @@
+// Copyright 2023 The Clspv Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef _CLSPV_SAMPLER_UTILS_H
+#define _CLSPV_SAMPLER_UTILS_H
+
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Value.h"
+
+namespace clspv {
+
+llvm::Value *NormalizedCoordinate(llvm::Module &M, llvm::IRBuilder<> &B,
+                                  llvm::Value *Coord, llvm::Value *Img,
+                                  llvm::Type *ImgTy);
+
+} // namespace clspv
+
+#endif
diff --git a/test/ImageBuiltins/read_image3d_with_literal_unorm_sampler.cl b/test/ImageBuiltins/read_image3d_with_literal_unorm_sampler.cl
@@ -0,0 +1,30 @@
+// RUN: clspv %s -o %t.spv --cl-native-math
+// RUN: spirv-dis %t.spv -o %t.spvasm
+// RUN: spirv-val %t.spv --target-env spv1.0
+// RUN: FileCheck %s < %t.spvasm
+
+// CHECK-DAG:  [[uint:%[^ ]+]] = OpTypeInt 32 0
+// CHECK-DAG:  [[float:%[^ ]+]] = OpTypeFloat 32
+// CHECK-DAG:  [[uint3:%[^ ]+]] = OpTypeVector [[uint]] 3
+// CHECK-DAG:  [[uint4:%[^ ]+]] = OpTypeVector [[uint]] 4
+// CHECK-DAG:  [[float4:%[^ ]+]] = OpTypeVector [[float]] 4
+// CHECK-DAG:  [[float_0:%[^ ]+]] = OpConstant [[float]] 0
+// CHECK-DAG:  [[uint_0:%[^ ]+]] = OpConstant [[uint]] 0
+// CHECK-DAG:  [[uint_1:%[^ ]+]] = OpConstant [[uint]] 1
+// CHECK-DAG:  [[uint_21:%[^ ]+]] = OpConstant [[uint]] 21
+// CHECK-DAG:  [[vec:%[^ ]+]] = OpConstantComposite [[uint4]] [[uint_1]] [[uint_1]] [[uint_1]] [[uint_1]]
+
+// CHECK:  [[sizes:%[^ ]+]] = OpImageQuerySizeLod [[uint3]] {{.*}} [[uint_0]]
+// CHECK:  [[shuffle:%[^ ]+]] = OpVectorShuffle [[uint4]] [[sizes]] [[vec]] 0 1 2 4
+// CHECK:  [[convert:%[^ ]+]] = OpConvertUToF [[float4]] [[shuffle]]
+// CHECK:  [[div:%[^ ]+]] = OpFDiv [[float4]] %44 [[convert]]
+// CHECK:  OpImageSampleExplicitLod [[float4]] {{.*}} [[div]] Lod [[float_0]]
+
+// CHECK:  OpExtInst %void {{.*}} LiteralSampler [[uint_0]] [[uint_0]] [[uint_21]]
+
+static const sampler_t my_sampler = CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;
+
+void kernel foo(read_only image3d_t img, global float4 *out, int i)
+{
+    *out = read_imagef(img, my_sampler, (int4)(2, 3, 4, 5));
+}