apache · tqchen · Jun 22, 2018 · Apr 12, 2018 · Apr 12, 2018 · Apr 13, 2018
diff --git a/include/tvm/build_module.h b/include/tvm/build_module.h
@@ -157,9 +157,9 @@ EXPORT Target rasp(const std::vector<std::string>& options =
 EXPORT Target mali(const std::vector<std::string>& options =
                    std::vector<std::string>());
 
-/*! \return A target for Intel GPU */
-EXPORT Target intel_gpu(const std::vector<std::string>& options =
-                   std::vector<std::string>());
+/*! \return A target for Intel Graphics */
+EXPORT Target intel_graphics(const std::vector<std::string>& options =
+                             std::vector<std::string>());
 
 /*! \return A target for stackvm */
 EXPORT Target stackvm(const std::vector<std::string>& options =

diff --git a/python/tvm/target.py b/python/tvm/target.py
@@ -76,7 +76,7 @@ class Target(NodeBase):
     - :any:`tvm.target.cuda` create CUDA target
     - :any:`tvm.target.rocm` create ROCM target
     - :any:`tvm.target.mali` create Mali target
-    - :any:`tvm.target.intel_gpu` create Intel GPU target
+    - :any:`tvm.target.intel_graphics` create Intel Graphics target
     """
     def __init__(self, handle):
         super(Target, self).__init__(handle)
@@ -402,15 +402,15 @@ def mali(options=None):
     return _api_internal._TargetCreate("opencl", *opts)
 
 
-def intel_gpu(options=None):
-    """Returns an Intel GPU target.
+def intel_graphics(options=None):
+    """Returns an Intel Graphics target.
 
     Parameters
     ----------
     options : str or list of str
         Additional options
     """
-    opts = ["-device=intel_gpu"]
+    opts = ["-device=intel_graphics"]
     opts = _merge_opts(opts, options)
     return _api_internal._TargetCreate("opencl", *opts)
 

diff --git a/src/codegen/build_module.cc b/src/codegen/build_module.cc
@@ -76,7 +76,7 @@ Target CreateTarget(const std::string& target_name,
     t->keys_array.push_back(ir::StringImm::make("rocm"));
     t->keys_array.push_back(ir::StringImm::make("gpu"));
     t->max_num_threads = 256;
-    if (t->device_name == "intel_gpu") {
+    if (t->device_name == "intel_graphics") {
       t->thread_warp_size = 16;
     }
   } else if (target_name == "metal" || target_name == "vulkan") {
@@ -274,9 +274,9 @@ Target mali(const std::vector<std::string>& options) {
   }));
 }
 
-Target intel_gpu(const std::vector<std::string>& options) {
+Target intel_graphics(const std::vector<std::string>& options) {
   return CreateTarget("opencl", MergeOptions(options, {
-    "-device=intel_gpu"
+    "-device=intel_graphics"
   }));
 }
 

diff --git a/src/codegen/codegen_opencl.cc b/src/codegen/codegen_opencl.cc
@@ -159,7 +159,7 @@ void CodeGenOpenCL::PrintStorageSync(const Call* op) {
   const std::string& sync = op->args[0].as<StringImm>()->value;
   if (sync == "warp") {
     this->PrintIndent();
-    this->stream << "sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n";
+    this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n";
   } else if (sync == "shared") {
     this->PrintIndent();
     this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n";

diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc
@@ -40,7 +40,7 @@ void OpenCLWorkspace::GetAttr(
     }
     case kWarpSize: {
       /* TODO: the warp size of OpenCL device is not always 1
-               e.g. Intel GPU has a sub group concept which contains 8 - 32 work items,
+               e.g. Intel Graphics has a sub group concept which contains 8 - 32 work items,
                corresponding to the number of SIMD entries the heardware configures.
                We need to figure out a way to query this information from the hardware.
       */

diff --git a/tests/python/integration/test_ewise.py b/tests/python/integration/test_ewise.py
@@ -34,7 +34,7 @@ def check_device(device, host="stackvm"):
         np.testing.assert_allclose(
             b.asnumpy(), np.exp(a.asnumpy()), rtol=1e-5)
 
-    check_device("opencl -device=intel_gpu")
+    check_device("opencl -device=intel_graphics")
     check_device("cuda", "llvm")
     check_device("vulkan")
 

diff --git a/tests/python/unittest/test_lang_target.py b/tests/python/unittest/test_lang_target.py
@@ -47,7 +47,7 @@ def test_target_string_parse():
     assert str(target) == str(tvm.target.cuda("-libs=cublas,cudnn"))
 
 
-    assert tvm.target.intel_gpu().device_name == "intel_gpu"
+    assert tvm.target.intel_graphics().device_name == "intel_graphics"
 
 if __name__ == "__main__":
     test_target_dispatch()

diff --git a/topi/python/topi/__init__.py b/topi/python/topi/__init__.py
@@ -26,6 +26,7 @@
 from . import cuda
 from . import rasp
 from . import mali
+from . import intel_graphics
 from . import opengl
 from . import util
 from . import rocm

diff --git a/topi/python/topi/cuda/pooling.py b/topi/python/topi/cuda/pooling.py
@@ -33,9 +33,8 @@ def _schedule(Pool):
         else:
             Out = outs[0].op.output(0)
             s[Pool].set_scope("local")
-        i, c, h, w = s[Out].op.axis
-        by, ty = s[Out].split(i, factor=num_thread)
-        bx, tx = s[Out].split(c, factor=num_thread)
+        by, ty = s[Out].split(s[Out].op.axis[0], factor=num_thread)
+        bx, tx = s[Out].split(s[Out].op.axis[1], factor=num_thread)
         s[Out].reorder(by, bx, ty, tx)
         s[Out].bind(ty, thread_y)
         s[Out].bind(tx, thread_x)

diff --git a/topi/python/topi/intel_graphics/__init__.py b/topi/python/topi/intel_graphics/__init__.py
@@ -0,0 +1,5 @@
+# pylint: disable=redefined-builtin, wildcard-import
+"""Intel Gen9 GPU specific declaration and schedules."""
+from __future__ import absolute_import as _abs
+
+from .conv2d import *