From c9d78edcadb555b4be7eb7d871fa121c3106ea75 Mon Sep 17 00:00:00 2001
From: Alexey Sachkov <alexey.sachkov@intel.com>
Date: Thu, 26 Dec 2019 17:35:57 +0300
Subject: [PATCH 1/3] [SYCL] Partially revert intel/llvm#960

The only file left is
`sycl/test/regression/fp16-with-unnamed-lambda.cpp`

Signed-off-by: Alexey Sachkov <alexey.sachkov@intel.com>
---
 clang/lib/AST/ItaniumMangle.cpp               | 71 -------------------
 .../CodeGenSYCL/half-with-unnamed-lambda.cpp  | 68 ------------------
 2 files changed, 139 deletions(-)
 delete mode 100644 clang/test/CodeGenSYCL/half-with-unnamed-lambda.cpp
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 2ed006255c846..465f8ec23462b 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -2456,67 +2456,6 @@ static bool isTypeSubstitutable(Qualifiers Quals, const Type *Ty,
   return true;
 }
 
-namespace {
-struct DeclContextDesc {
-  Decl::Kind DeclKind;
-  StringRef Name;
-};
-} // namespace
-
-// For Scopes argument, the only supported Decl::Kind values are:
-// - Namespace
-// - CXXRecord
-// - ClassTemplateSpecialization
-static bool matchQualifiedTypeName(const QualType &Ty,
-                            ArrayRef<DeclContextDesc> Scopes) {
-  // The idea: check the declaration context chain starting from the type
-  // itself. At each step check the context is of expected kind
-  // (namespace) and name.
-  const CXXRecordDecl *RecTy = Ty->getAsCXXRecordDecl();
-
-  if (!RecTy)
-    return false; // only classes/structs supported
-  const auto *Ctx = dyn_cast<DeclContext>(RecTy);
-
-  for (const auto &Scope : llvm::reverse(Scopes)) {
-    Decl::Kind DK = Ctx->getDeclKind();
-    StringRef Name = "";
-
-    if (DK != Scope.DeclKind)
-      return false;
-
-    switch (DK) {
-    case Decl::Kind::ClassTemplateSpecialization:
-      // ClassTemplateSpecializationDecl inherits from CXXRecordDecl
-    case Decl::Kind::CXXRecord:
-      Name = cast<CXXRecordDecl>(Ctx)->getName();
-      break;
-    case Decl::Kind::Namespace:
-      Name = cast<NamespaceDecl>(Ctx)->getName();
-      break;
-    default:
-      return false;
-    }
-    if (Name != Scope.Name)
-      return false;
-    Ctx = Ctx->getParent();
-  }
-  return Ctx->isTranslationUnit();
-}
-
-static bool isSYCLHostHalfType(const Type *Ty) {
-  // FIXME: this is not really portable, since the bunch of namespace below
-  // is not specified by the SYCL standard and highly depends on particular
-  // implementation
-  static const std::array<DeclContextDesc, 5> Scopes = {
-      DeclContextDesc{Decl::Kind::Namespace, "cl"},
-      DeclContextDesc{Decl::Kind::Namespace, "sycl"},
-      DeclContextDesc{Decl::Kind::Namespace, "detail"},
-      DeclContextDesc{Decl::Kind::Namespace, "half_impl"},
-      DeclContextDesc{Decl::Kind::CXXRecord, "half"}};
-  return matchQualifiedTypeName(QualType(Ty, 0), Scopes);
-}
-
 void CXXNameMangler::mangleType(QualType T) {
   // If our type is instantiation-dependent but not dependent, we mangle
   // it as it was written in the source, removing any top-level sugar.
@@ -2576,11 +2515,6 @@ void CXXNameMangler::mangleType(QualType T) {
 
   bool isSubstitutable =
     isTypeSubstitutable(quals, ty, Context.getASTContext());
-  if (Context.isUniqueNameMangler() && isSYCLHostHalfType(ty)) {
-    // Set isSubstitutable to false for cl::sycl::detail::half_impl::half
-    // to achieve the same mangling for other components
-    isSubstitutable = false;
-  }
   if (isSubstitutable && mangleSubstitution(T))
     return;
 
@@ -3057,11 +2991,6 @@ void CXXNameMangler::mangleType(const RecordType *T) {
   mangleType(static_cast<const TagType*>(T));
 }
 void CXXNameMangler::mangleType(const TagType *T) {
-  if (Context.isUniqueNameMangler() && isSYCLHostHalfType(T)) {
-    // Mangle cl::sycl::detail::half_imple::half as _Float16
-    mangleType(Context.getASTContext().Float16Ty);
-    return;
-  }
   mangleName(T->getDecl());
 }
 
diff --git a/clang/test/CodeGenSYCL/half-with-unnamed-lambda.cpp b/clang/test/CodeGenSYCL/half-with-unnamed-lambda.cpp
deleted file mode 100644
index f5b757f750d3e..0000000000000
--- a/clang/test/CodeGenSYCL/half-with-unnamed-lambda.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-// RUN: %clangxx -fsycl-device-only -fsycl-unnamed-lambda -emit-llvm %s -o %t1.bc
-// RUN: llvm-dis %t1.bc -o - | FileCheck %s
-// RUN: %clangxx -fsycl-device-only -fsycl-unnamed-lambda -emit-llvm %s -DUSE_WRAPPER=1 -o %t2.bc
-// RUN: llvm-dis %t2.bc -o - | FileCheck %s
-
-// Mangling of kernel lambda must be the same for both versions of half
-// CHECK: __unique_stable_name{{.*}} = private unnamed_addr constant [52 x i8] c"_ZTSN2cl4sycl6bufferINS0_4pairIDF16_NS0_5dummyEEEEE\00"
-
-// Helper function to get string returned by __unique_stable_name in LLVM IR
-template <typename T>
-void print() {
-  auto temp = __unique_stable_name(T);
-}
-
-// Helper function to get "print" emitted in device code
-template<typename T, typename F>
-__attribute__((sycl_kernel)) void helper(F f) {
-  print<T>();
-  f();
-}
-
-// Half wrapper, as it defined in SYCL headers
-namespace cl {
-namespace sycl {
-namespace detail {
-namespace half_impl {
-class half {
-public:
-  half operator=(int) {return *this;}
-};
-} // namespace half_impl
-} // namespace detail
-} // namespace sycl
-} // namespace cl
-
-#ifndef USE_WRAPPER
-using half = _Float16;
-#else
-using half = cl::sycl::detail::half_impl::half;
-#endif
-
-// A few more fake data types to complicate the mangling
-namespace cl {
-namespace sycl {
-struct dummy {
-  int a;
-};
-template<typename T1, typename T2>
-struct pair {
-  T1 a;
-  T2 b;
-};
-template <typename T>
-class buffer {
-public:
-  T &operator[](int) const { return value; }
-  mutable T value;
-};
-} // namespace sycl
-} // namespace cl
-
-int main() {
-  cl::sycl::buffer<cl::sycl::pair<half, cl::sycl::dummy>> B1;
-
-  helper<decltype(B1)>([](){});
-
-  return 0;
-}

From 898694da775a6faf14cb7fa5e3c93f648237bcc4 Mon Sep 17 00:00:00 2001
From: Alexey Sachkov <alexey.sachkov@intel.com>
Date: Thu, 26 Dec 2019 17:37:17 +0300
Subject: [PATCH 2/3] [SYCL] Partially revert intel/llvm#185

Changes to tests are preserved

Signed-off-by: Alexey Sachkov <alexey.sachkov@intel.com>
---
 sycl/include/CL/sycl/detail/kernel_desc.hpp | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/sycl/include/CL/sycl/detail/kernel_desc.hpp b/sycl/include/CL/sycl/detail/kernel_desc.hpp
index 9051c4ea98f42..ee6af56db4e67 100644
--- a/sycl/include/CL/sycl/detail/kernel_desc.hpp
+++ b/sycl/include/CL/sycl/detail/kernel_desc.hpp
@@ -15,22 +15,10 @@ __SYCL_INLINE namespace cl {
 namespace sycl {
 namespace detail {
 
-namespace half_impl {
-
-class half;
-// Half type is defined as _Float16 on device and as manually implemented half
-// type on host. Integration header is generated by device compiler so it sees
-// half type as _Float16 and it will add _Float16 to integration header if it
-// is used in kernel name template parameters. To avoid errors in host
-// compilation we remove _Float16 from integration header using following macro.
-// Same thing goes about bool type which is defined as _Bool.
 #ifndef __SYCL_DEVICE_ONLY__
-#define _Float16 cl::sycl::detail::half_impl::half
 #define _Bool bool
 #endif
 
-} // namespace half_impl
-
 // kernel parameter kinds
 enum class kernel_param_kind_t {
   kind_accessor,

From 5d8ac2068580868503ba6315702755aeae22da53 Mon Sep 17 00:00:00 2001
From: Alexey Sachkov <alexey.sachkov@intel.com>
Date: Tue, 24 Dec 2019 22:46:52 +0300
Subject: [PATCH 3/3] [SYCL] Rework half implementation

Because of the fact, that `half` type is not a standard C++ type and it is
not supported everywhere, its implementation differs between host and
device: C++ class with overloaded arithmetic operators is used on host
and `_Float16` is used on device side.

Previously, the switch between two version was implemented as
preprocessor macro and having two different types caused some problems
with integration header and unnamed lambda feature, see intel/llvm#185 and
intel/llvm#960.

This patch redesigned `half` implementation in a way, that single
wrapper data type is used as `half` representation on both host and
device sides; differentiation between actual host and device
implementations is done under the hood of this wrapper.

Signed-off-by: Alexey Sachkov <alexey.sachkov@intel.com>
---
 sycl/include/CL/sycl/aliases.hpp              |  11 +-
 sycl/include/CL/sycl/detail/boolean.hpp       |   2 +-
 .../CL/sycl/detail/generic_type_lists.hpp     |   9 +-
 .../CL/sycl/detail/generic_type_traits.hpp    |  46 ++++++-
 sycl/include/CL/sycl/detail/type_traits.hpp   |  43 +++---
 sycl/include/CL/sycl/half_type.hpp            | 127 +++++++++++++++++-
 sycl/include/CL/sycl/intel/sub_group.hpp      |   6 +-
 sycl/include/CL/sycl/multi_ptr.hpp            |   6 +-
 sycl/include/CL/sycl/types.hpp                |  56 +++-----
 sycl/source/half_type.cpp                     |   2 +-
 sycl/test/basic_tests/generic_type_traits.cpp |   4 +
 sycl/test/regression/vec-to-half.cpp          |  11 ++
 12 files changed, 235 insertions(+), 88 deletions(-)
 create mode 100644 sycl/test/regression/vec-to-half.cpp

diff --git a/sycl/include/CL/sycl/aliases.hpp b/sycl/include/CL/sycl/aliases.hpp
index 24b7696d364bf..79eabbc94282a 100644
--- a/sycl/include/CL/sycl/aliases.hpp
+++ b/sycl/include/CL/sycl/aliases.hpp
@@ -24,11 +24,11 @@ class half;
 } // namespace sycl
 } // namespace cl
 
-#ifdef __SYCL_DEVICE_ONLY__
-using half = _Float16;
-#else
+// FIXME: line below exports 'half' into global namespace, which seems incorrect
+// However, SYCL 1.2.1 spec considers 'half' to be a fundamental C++ data type
+// which doesn't exist within the 'cl::sycl' namespace.
+// Related spec issue: KhronosGroup/SYCL-Docs#40
 using half = cl::sycl::detail::half_impl::half;
-#endif
 
 #define MAKE_VECTOR_ALIAS(ALIAS, TYPE, N)                                      \
   using ALIAS##N = cl::sycl::vec<TYPE, N>;
@@ -80,7 +80,8 @@ using ulong = unsigned long;
 using longlong = long long;
 using ulonglong = unsigned long long;
 // TODO cl::sycl::half is not in SYCL specification, but is used by Khronos CTS.
-using half = half;
+// Related tests issue: KhronosGroup/SYCL-CTS#37
+using half = cl::sycl::detail::half_impl::half;
 using cl_bool = bool;
 using cl_char = std::int8_t;
 using cl_uchar = std::uint8_t;
diff --git a/sycl/include/CL/sycl/detail/boolean.hpp b/sycl/include/CL/sycl/detail/boolean.hpp
index f59fb4fe860c2..4414ebb316228 100644
--- a/sycl/include/CL/sycl/detail/boolean.hpp
+++ b/sycl/include/CL/sycl/detail/boolean.hpp
@@ -99,7 +99,7 @@ template <int N> struct Boolean {
 
 private:
   template <int Num> friend struct Assigner;
-  alignas(VectorAlignment<bool, N>::value) DataType value;
+  alignas(detail::vector_alignment<bool, N>::value) DataType value;
 };
 
 template <> struct Boolean<1> {
diff --git a/sycl/include/CL/sycl/detail/generic_type_lists.hpp b/sycl/include/CL/sycl/detail/generic_type_lists.hpp
index 3adde29c82648..5a7226183d6a3 100644
--- a/sycl/include/CL/sycl/detail/generic_type_lists.hpp
+++ b/sycl/include/CL/sycl/detail/generic_type_lists.hpp
@@ -11,15 +11,20 @@
 #include <CL/sycl/access/access.hpp>
 #include <CL/sycl/detail/stl_type_traits.hpp>
 #include <CL/sycl/detail/type_list.hpp>
-#include <CL/sycl/half_type.hpp>
 
 // Generic type name description, which serves as a description for all valid
 // types of parameters to kernel functions
 
-// Forward declaration
+// Forward declarations
 __SYCL_INLINE namespace cl {
 namespace sycl {
 template <typename T, int N> class vec;
+namespace detail {
+namespace half_impl {
+class half;
+}
+} // namespace detail
+using half = detail::half_impl::half;
 } // namespace sycl
 } // namespace cl
 
diff --git a/sycl/include/CL/sycl/detail/generic_type_traits.hpp b/sycl/include/CL/sycl/detail/generic_type_traits.hpp
index db1511281d2e3..49dd336835660 100644
--- a/sycl/include/CL/sycl/detail/generic_type_traits.hpp
+++ b/sycl/include/CL/sycl/detail/generic_type_traits.hpp
@@ -232,6 +232,32 @@ using is_genptr = bool_constant<
 
 template <typename T> using is_nan_type = is_contained<T, gtl::nan_list>;
 
+// nan_types
+template <typename T, typename Enable = void> struct nan_types;
+
+template <typename T>
+struct nan_types<
+    T, enable_if_t<is_contained<T, gtl::unsigned_short_list>::value, T>> {
+  using ret_type = change_base_type_t<T, half>;
+  using arg_type = find_same_size_type_t<gtl::scalar_unsigned_short_list, half>;
+};
+
+template <typename T>
+struct nan_types<
+    T, enable_if_t<is_contained<T, gtl::unsigned_int_list>::value, T>> {
+  using ret_type = change_base_type_t<T, float>;
+  using arg_type = find_same_size_type_t<gtl::scalar_unsigned_int_list, float>;
+};
+
+template <typename T>
+struct nan_types<
+    T,
+    enable_if_t<is_contained<T, gtl::unsigned_long_integer_list>::value, T>> {
+  using ret_type = change_base_type_t<T, double>;
+  using arg_type =
+      find_same_size_type_t<gtl::scalar_unsigned_long_integer_list, double>;
+};
+
 template <typename T> using nan_return_t = typename nan_types<T, T>::ret_type;
 
 template <typename T>
@@ -364,10 +390,14 @@ using select_cl_scalar_intergal_t =
 // select_cl_scalar_t picks corresponding cl_* type for input
 // scalar T or returns T if T is not scalar.
 template <typename T>
-using select_cl_scalar_t =
-    conditional_t<std::is_integral<T>::value, select_cl_scalar_intergal_t<T>,
-                  conditional_t<std::is_floating_point<T>::value,
-                                select_cl_scalar_float_t<T>, T>>;
+using select_cl_scalar_t = conditional_t<
+    std::is_integral<T>::value, select_cl_scalar_intergal_t<T>,
+    conditional_t<
+        std::is_floating_point<T>::value, select_cl_scalar_float_t<T>,
+        // half is a special case: it is implemented differently on host and
+        // device and therefore, might lower to different types
+        conditional_t<std::is_same<T, half>::value,
+                      cl::sycl::detail::half_impl::BIsRepresentationT, T>>>;
 
 // select_cl_vector_or_scalar does cl_* type selection for element type of
 // a vector type T and does scalar type substitution.  If T is not
@@ -378,7 +408,13 @@ template <typename T>
 struct select_cl_vector_or_scalar<
     T, typename std::enable_if<is_vgentype<T>::value>::type> {
   using type =
-      vec<select_cl_scalar_t<typename T::element_type>, T::get_count()>;
+      // select_cl_scalar_t returns _Float16, so, we try to instantiate vec
+      // class with _Float16 DataType, which is not expected there
+      // So, leave vector<half, N> as-is
+      vec<conditional_t<std::is_same<typename T::element_type, half>::value,
+                        typename T::element_type,
+                        select_cl_scalar_t<typename T::element_type>>,
+          T::get_count()>;
 };
 
 template <typename T>
diff --git a/sycl/include/CL/sycl/detail/type_traits.hpp b/sycl/include/CL/sycl/detail/type_traits.hpp
index 3a50940c25a03..c11e443f63ed9 100644
--- a/sycl/include/CL/sycl/detail/type_traits.hpp
+++ b/sycl/include/CL/sycl/detail/type_traits.hpp
@@ -12,12 +12,17 @@
 #include <CL/sycl/detail/generic_type_lists.hpp>
 #include <CL/sycl/detail/stl_type_traits.hpp>
 #include <CL/sycl/detail/type_list.hpp>
-#include <CL/sycl/half_type.hpp>
 
 #include <type_traits>
 
 __SYCL_INLINE namespace cl {
 namespace sycl {
+namespace detail {
+namespace half_impl {
+class half;
+}
+} // namespace detail
+using half = detail::half_impl::half;
 
 // Forward declaration
 template <typename ElementType, access::address_space Space> class multi_ptr;
@@ -38,6 +43,16 @@ struct vector_size_impl<vec<T, N>> : int_constant<N> {};
 template <typename T>
 struct vector_size : vector_size_impl<remove_cv_t<remove_reference_t<T>>> {};
 
+// 4.10.2.6 Memory layout and alignment
+template <typename T, int N>
+struct vector_alignment_impl
+    : conditional_t<N == 3, int_constant<sizeof(T) * 4>,
+                    int_constant<sizeof(T) * N>> {};
+
+template <typename T, int N>
+struct vector_alignment
+    : vector_alignment_impl<remove_cv_t<remove_reference_t<T>>, N> {};
+
 // vector_element
 template <typename T> struct vector_element_impl;
 template <typename T>
@@ -233,32 +248,6 @@ template <typename T, int N, typename TL> struct make_type_impl<vec<T, N>, TL> {
 template <typename T, typename TL>
 using make_type_t = typename make_type_impl<T, TL>::type;
 
-// nan_types
-template <typename T, typename Enable = void> struct nan_types;
-
-template <typename T>
-struct nan_types<
-    T, enable_if_t<is_contained<T, gtl::unsigned_short_list>::value, T>> {
-  using ret_type = change_base_type_t<T, half>;
-  using arg_type = find_same_size_type_t<gtl::scalar_unsigned_short_list, half>;
-};
-
-template <typename T>
-struct nan_types<
-    T, enable_if_t<is_contained<T, gtl::unsigned_int_list>::value, T>> {
-  using ret_type = change_base_type_t<T, float>;
-  using arg_type = find_same_size_type_t<gtl::scalar_unsigned_int_list, float>;
-};
-
-template <typename T>
-struct nan_types<
-    T,
-    enable_if_t<is_contained<T, gtl::unsigned_long_integer_list>::value, T>> {
-  using ret_type = change_base_type_t<T, double>;
-  using arg_type =
-      find_same_size_type_t<gtl::scalar_unsigned_long_integer_list, double>;
-};
-
 // make_larger_t
 template <typename T, typename Enable = void> struct make_larger_impl;
 template <typename T>
diff --git a/sycl/include/CL/sycl/half_type.hpp b/sycl/include/CL/sycl/half_type.hpp
index 85584690a1644..08d51e3580e14 100644
--- a/sycl/include/CL/sycl/half_type.hpp
+++ b/sycl/include/CL/sycl/half_type.hpp
@@ -9,7 +9,9 @@
 #pragma once
 
 #include <CL/sycl/detail/defines.hpp>
+#include <CL/sycl/detail/type_traits.hpp>
 
+#include <array>
 #include <cmath>
 #include <cstdint>
 #include <functional>
@@ -19,7 +21,7 @@
 __SYCL_INLINE namespace cl {
 namespace sycl {
 namespace detail {
-namespace half_impl {
+namespace host_half_impl {
 
 class half {
 public:
@@ -71,6 +73,125 @@ class half {
 private:
   uint16_t Buf;
 };
+
+} // namespace host_half_impl
+
+namespace half_impl {
+class half;
+
+// Several aliases are defined below:
+// - StorageT: actual representation of half data type. It is used by scalar
+//   half values and by 'cl::sycl::vec' class. On device side, it points to some
+//   native half data type, while on host some custom data type is used to
+//   emulate operations of 16-bit floating-point values
+//
+// - BIsRepresentationT: data type which is used by built-in functions. It is
+//   distinguished from StorageT, because on host, we can still operate on the
+//   wrapper itself and there is no sense in direct usage of underlying data
+//   type (too many changes required for BIs implementation without any
+//   foreseeable profits)
+//
+// - VecNStorageT - representation of N-element vector of halfs. Follows the
+//   same logic as StorageT
+#ifdef __SYCL_DEVICE_ONLY__
+  using StorageT = _Float16;
+  using BIsRepresentationT = _Float16;
+
+  using Vec2StorageT = StorageT __attribute__((ext_vector_type(2)));
+  using Vec3StorageT = StorageT __attribute__((ext_vector_type(3)));
+  using Vec4StorageT = StorageT __attribute__((ext_vector_type(4)));
+  using Vec8StorageT = StorageT __attribute__((ext_vector_type(8)));
+  using Vec16StorageT = StorageT __attribute__((ext_vector_type(16)));
+#else
+  using StorageT = detail::host_half_impl::half;
+  // No need to extract underlying data type for built-in functions operating on
+  // host
+  using BIsRepresentationT = half;
+
+  // On the host side we cannot use OpenCL cl_half# types as an underlying type
+  // for vec because they are actually defined as an integer type under the
+  // hood. As a result half values will be converted to the integer and passed
+  // as a kernel argument which is expected to be floating point number.
+  template <int NumElements> struct half_vec {
+    alignas(detail::vector_alignment<StorageT, NumElements>::value)
+      std::array<StorageT, NumElements> s;
+  };
+
+  using Vec2StorageT = half_vec<2>;
+  using Vec3StorageT = half_vec<3>;
+  using Vec4StorageT = half_vec<4>;
+  using Vec8StorageT = half_vec<8>;
+  using Vec16StorageT = half_vec<16>;
+#endif
+
+class half {
+public:
+  half() = default;
+  half(const half &) = default;
+  half(half &&) = default;
+
+  half(const float &rhs) : Data(rhs) {}
+
+  half &operator=(const half &rhs) = default;
+
+#ifndef __SYCL_DEVICE_ONLY__
+  // Since StorageT and BIsRepresentationT are different on host, these two
+  // helpers are required for 'vec' class
+  half(const detail::host_half_impl::half &rhs) : Data(rhs) {};
+  operator detail::host_half_impl::half() const { return Data; }
+#endif // __SYCL_DEVICE_ONLY__
+
+  // Operator +=, -=, *=, /=
+  half &operator+=(const half &rhs) {
+    Data += rhs.Data;
+    return *this;
+  }
+
+  half &operator-=(const half &rhs) {
+    Data -= rhs.Data;
+    return *this;
+  }
+
+  half &operator*=(const half &rhs) {
+    Data *= rhs.Data;
+    return *this;
+  }
+
+  half &operator/=(const half &rhs) {
+    Data /= rhs.Data;
+    return *this;
+  }
+
+  // Operator ++, --
+  half &operator++() {
+    *this += 1;
+    return *this;
+  }
+
+  half operator++(int) {
+    half ret(*this);
+    operator++();
+    return ret;
+  }
+
+  half &operator--() {
+    *this -= 1;
+    return *this;
+  }
+
+  half operator--(int) {
+    half ret(*this);
+    operator--();
+    return ret;
+  }
+
+  // Operator float
+  operator float() const { return static_cast<float>(Data); }
+
+  template <typename Key> friend struct std::hash;
+private:
+  StorageT Data;
+};
 } // namespace half_impl
 
 // Accroding to C++ standard math functions from cmath/math.h should work only
@@ -90,11 +211,7 @@ inline float cast_if_host_half(half_impl::half val) {
 } // namespace sycl
 } // namespace cl
 
-#ifdef __SYCL_DEVICE_ONLY__
-using half = _Float16;
-#else
 using half = cl::sycl::detail::half_impl::half;
-#endif
 
 // Partial specialization of some functions in namespace `std`
 namespace std {
diff --git a/sycl/include/CL/sycl/intel/sub_group.hpp b/sycl/include/CL/sycl/intel/sub_group.hpp
index e7eeb9d8844d9..e8379dc8536db 100644
--- a/sycl/include/CL/sycl/intel/sub_group.hpp
+++ b/sycl/include/CL/sycl/intel/sub_group.hpp
@@ -168,9 +168,9 @@ __SYCL_SG_CALC_OVERLOAD(GroupOpFP, FMin, intel::minimum<T>)
 __SYCL_SG_CALC_OVERLOAD(GroupOpISigned, SMax, intel::maximum<T>)
 __SYCL_SG_CALC_OVERLOAD(GroupOpIUnsigned, UMax, intel::maximum<T>)
 __SYCL_SG_CALC_OVERLOAD(GroupOpFP, FMax, intel::maximum<T>)
-__SYCL_SG_CALC_OVERLOAD(GroupOpISigned, IAdd<T>, intel::plus<T>)
-__SYCL_SG_CALC_OVERLOAD(GroupOpIUnsigned, IAdd<T>, intel::plus<T>)
-__SYCL_SG_CALC_OVERLOAD(GroupOpFP, FAdd<T>, intel::plus<T>)
+__SYCL_SG_CALC_OVERLOAD(GroupOpISigned, IAdd, intel::plus<T>)
+__SYCL_SG_CALC_OVERLOAD(GroupOpIUnsigned, IAdd, intel::plus<T>)
+__SYCL_SG_CALC_OVERLOAD(GroupOpFP, FAdd, intel::plus<T>)
 
 #undef __SYCL_SG_CALC_OVERLOAD
 
diff --git a/sycl/include/CL/sycl/multi_ptr.hpp b/sycl/include/CL/sycl/multi_ptr.hpp
index d67390b96831f..c3a05f08cc580 100644
--- a/sycl/include/CL/sycl/multi_ptr.hpp
+++ b/sycl/include/CL/sycl/multi_ptr.hpp
@@ -10,6 +10,7 @@
 #include <CL/__spirv/spirv_ops.hpp>
 #include <CL/sycl/access/access.hpp>
 #include <CL/sycl/detail/common.hpp>
+#include <CL/sycl/detail/stl_type_traits.hpp>
 #include <cassert>
 #include <cstddef>
 
@@ -22,7 +23,10 @@ class accessor;
 
 template <typename ElementType, access::address_space Space> class multi_ptr {
 public:
-  using element_type = ElementType;
+  using element_type =
+      detail::conditional_t<std::is_same<ElementType, half>::value,
+                    cl::sycl::detail::half_impl::BIsRepresentationT,
+                    ElementType>;
   using difference_type = std::ptrdiff_t;
 
   // Implementation defined pointer and reference types that correspond to
diff --git a/sycl/include/CL/sycl/types.hpp b/sycl/include/CL/sycl/types.hpp
index 1d8b43f6bc771..2f420b69d4685 100644
--- a/sycl/include/CL/sycl/types.hpp
+++ b/sycl/include/CL/sycl/types.hpp
@@ -268,15 +268,6 @@ detail::enable_if_t<is_float_to_int<T, R>::value, R> convertImpl(T Value) {
 #endif
 }
 
-// 4.10.2.6 Memory layout and alignment
-template <int N> struct VectorLength { constexpr static int value = N; };
-
-template <> struct VectorLength<3> { constexpr static int value = 4; };
-
-template <typename T, int N> struct VectorAlignment {
-  constexpr static int value = sizeof(T) * VectorLength<N>::value;
-};
-
 } // namespace detail
 
 #if defined(_WIN32) && (_MSC_VER)
@@ -441,10 +432,16 @@ template <typename Type, int NumElements> class vec {
 #ifdef __SYCL_USE_EXT_VECTOR_TYPE__
   template <typename T = void>
   using EnableIfNotHostHalf = typename std::enable_if<
-      !std::is_same<DataT, cl::sycl::detail::half_impl::half>::value, T>::type;
+      !std::is_same<DataT, cl::sycl::detail::half_impl::half>::value ||
+          !std::is_same<cl::sycl::detail::half_impl::StorageT,
+                            cl::sycl::detail::host_half_impl::half>::value,
+      T>::type;
   template <typename T = void>
   using EnableIfHostHalf = typename std::enable_if<
-      std::is_same<DataT, cl::sycl::detail::half_impl::half>::value, T>::type;
+      std::is_same<DataT, cl::sycl::detail::half_impl::half>::value &&
+          std::is_same<cl::sycl::detail::half_impl::StorageT,
+                       cl::sycl::detail::host_half_impl::half>::value,
+      T>::type;
 
   template <typename Ty = DataT>
   explicit vec(const EnableIfNotHostHalf<Ty> &arg) {
@@ -1066,7 +1063,7 @@ template <typename Type, int NumElements> class vec {
   // For MSVC compiler max alignment is 64, e.g. vec<double, 16> required
   // alignment of 128 and MSVC compiler cann't align a parameter with requested
   // alignment of 128.
-  SYCL_ALIGNAS((detail::VectorAlignment<DataT, NumElements>::value))
+  SYCL_ALIGNAS((detail::vector_alignment<DataT, NumElements>::value))
       DataType m_Data;
 
   // friends
@@ -1798,14 +1795,9 @@ DECLARE_TYPE_VIA_CL_T(long);
 DECLARE_TYPE_VIA_CL_T(ulong);
 DECLARE_TYPE_T(longlong);
 DECLARE_TYPE_T(ulonglong);
+// Note: halfs are not declared here, because they have different representation
+// between host and device, see separate handling below
 DECLARE_TYPE_VIA_CL_T(float);
-// Half type is defined as custom class for host and _Float16 for device.
-// The ext_vector_type attribute is only applicable to integral and float
-// scalars so it's not possible to use attribute ext_vector_type for half on
-// host.
-#ifdef __SYCL_DEVICE_ONLY__
-DECLARE_TYPE_VIA_CL_T(half);
-#endif
 DECLARE_TYPE_VIA_CL_T(double);
 
 #define GET_CL_TYPE(target, num) __##target##num##_vec_t
@@ -1818,24 +1810,12 @@ DECLARE_TYPE_VIA_CL_T(double);
 #define GET_SCALAR_CL_TYPE(target) ::cl_##target
 #endif // __SYCL_USE_EXT_VECTOR_TYPE__
 
-// On the host side we cannot use OpenCL cl_half# types as an underlying type
-// for vec because they are actually defined as an integer type under the hood.
-// As a result half values will be converted to the integer and passed as a
-// kernel argument which is expected to be floating point number.
-#ifndef __SYCL_DEVICE_ONLY__
-template <int NumElements> struct half_vec {
-  alignas(cl::sycl::detail::VectorAlignment<half, NumElements>::value)
-      std::array<half, NumElements> s;
-};
-
-using __half_t = half;
-using __half2_vec_t = half_vec<2>;
-using __half3_vec_t = half_vec<3>;
-using __half4_vec_t = half_vec<4>;
-using __half8_vec_t = half_vec<8>;
-using __half16_vec_t = half_vec<16>;
-#endif
-
+using __half_t = cl::sycl::detail::half_impl::StorageT;
+using __half2_vec_t = cl::sycl::detail::half_impl::Vec2StorageT;
+using __half3_vec_t = cl::sycl::detail::half_impl::Vec3StorageT;
+using __half4_vec_t = cl::sycl::detail::half_impl::Vec4StorageT;
+using __half8_vec_t = cl::sycl::detail::half_impl::Vec8StorageT;
+using __half16_vec_t = cl::sycl::detail::half_impl::Vec16StorageT;
 #define GET_CL_HALF_TYPE(target, num) __##target##num##_vec_t
 
 __SYCL_INLINE namespace cl {
@@ -1961,7 +1941,7 @@ using select_apply_cl_t =
   DECLARE_HALF_CONVERTER(base, 16)                                             \
   template <> class BaseCLTypeConverter<base, 1> {                             \
   public:                                                                      \
-    using DataType = half;                                                     \
+    using DataType = __half_t;                                                 \
   };                                                                           \
   } // namespace detail
 
diff --git a/sycl/source/half_type.cpp b/sycl/source/half_type.cpp
index 9a5e8565a809c..9e2db56e53991 100644
--- a/sycl/source/half_type.cpp
+++ b/sycl/source/half_type.cpp
@@ -116,7 +116,7 @@ static float half2Float(const uint16_t &Val) {
   return Result;
 }
 
-namespace half_impl {
+namespace host_half_impl {
 
 half::half(const float &RHS) : Buf(float2Half(RHS)) {}
 
diff --git a/sycl/test/basic_tests/generic_type_traits.cpp b/sycl/test/basic_tests/generic_type_traits.cpp
index e4912fd2a8ef5..adea81a8d1705 100644
--- a/sycl/test/basic_tests/generic_type_traits.cpp
+++ b/sycl/test/basic_tests/generic_type_traits.cpp
@@ -2,6 +2,7 @@
 
 #include <CL/sycl.hpp>
 #include <CL/sycl/detail/common.hpp>
+#include <CL/sycl/half_type.hpp>
 #include <cassert>
 #include <iostream>
 
@@ -260,6 +261,9 @@ int main() {
           value,
       "");
 #endif
+  static_assert(std::is_same<d::ConvertToOpenCLType_t<s::half>,
+                             d::half_impl::BIsRepresentationT>::value,
+                "");
 
   s::multi_ptr<int, s::access::address_space::global_space> mp;
   int *dp = mp;
diff --git a/sycl/test/regression/vec-to-half.cpp b/sycl/test/regression/vec-to-half.cpp
new file mode 100644
index 0000000000000..a9aca6503bde2
--- /dev/null
+++ b/sycl/test/regression/vec-to-half.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang -O0 -fsyntax-only -Xclang -verify %s
+// expected-no-diagnostics
+
+#include <CL/sycl.hpp>
+
+int main() {
+  cl::sycl::vec<cl::sycl::half, 1> V(1.0);
+  cl::sycl::vec<cl::sycl::half, 1> V2 = V.template convert<cl::sycl::half>();
+
+  return 0;
+}