From dea31d9fd3779bf95a41b36609df66938c593236 Mon Sep 17 00:00:00 2001
From: Aelphy <aelphy@google.com>
Date: Tue, 6 Dec 2022 12:07:20 +0100
Subject: [PATCH] Added SELECT support for float16_t

---
 src/CodeGen_Xtensa.cpp | 12 +++++++++++-
 src/XtensaOptimize.cpp |  6 ++++++
 src/XtensaOptimize.h   |  3 +++
 3 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/src/CodeGen_Xtensa.cpp b/src/CodeGen_Xtensa.cpp
index 78daa4ef00f2..594a84ba8a0d 100644
--- a/src/CodeGen_Xtensa.cpp
+++ b/src/CodeGen_Xtensa.cpp
@@ -252,6 +252,8 @@ using int64x16_t = xb_vecN_2x64w;
 using uint1x16_t = vboolN_2;
 using uint1x32_t = vboolN;
 using uint1x64_t = vbool2N;
+using float16x16_t = xb_vecN_2xf16;
+using float16x32_t = xb_vecNxf16;
 using float32x16_t = xb_vecN_2xf32;
 #elif XCHAL_VISION_TYPE == 8
 using int8x128_t = xb_vec2Nx8;
@@ -269,6 +271,8 @@ using uint48x64_t = xb_vecNx48;
 using uint1x32_t = vboolN_2;
 using uint1x64_t = vboolN;
 using uint1x128_t = vbool2N;
+using float16x32_t = xb_vecN_2xf16;
+using float16x64_t = xb_vecNxf16;
 using float32x32_t = xb_vecN_2xf32;
 using int64x32_t = xb_vecN_2x64w;
 #endif
@@ -462,6 +466,7 @@ using float32x128_t = MultipleOfNativeVector<float32x32_t, 4>;
 #define VECTOR_WIDTH_U8 128
 #define VECTOR_WIDTH_I16 64
 #define VECTOR_WIDTH_U16 64
+#define VECTOR_WIDTH_F16 64
 #define VECTOR_WIDTH_I32 32
 #define VECTOR_WIDTH_U32 32
 #define VECTOR_WIDTH_F32 32
@@ -2636,7 +2641,7 @@ class ScopedDmaInitializer {
             Type(Type::Int, 48, target.natural_vector_size<int16_t>()),
             Type(Type::UInt, 48, target.natural_vector_size<uint16_t>()),
             Type(Type::Int, 64, target.natural_vector_size<int32_t>()),
-            Type(Type::Float, 16, target.natural_vector_size<int16_t>()),
+            Type(Type::Float, 16, target.natural_vector_size<float16_t>()),
             Type(Type::Float, 32, target.natural_vector_size<float>()),
         };
 
@@ -2644,6 +2649,7 @@ class ScopedDmaInitializer {
             Int(8, 4),
             UInt(8, 4),
             UInt(8, 8),
+            Float(16, 16)
         };
 
         std::set<Type> multiple_of_native_types;
@@ -3047,6 +3053,8 @@ void CodeGen_Xtensa::visit(const Select *op) {
             rhs << "IVP_MOVN_2X32T(" << true_val << ", " << false_val << ", " << cond << ")";
         } else if (is_native_xtensa_vector<uint32_t>(op->type, target)) {
             rhs << "IVP_MOVN_2X32UT(" << true_val << ", " << false_val << ", " << cond << ")";
+        } else if (is_native_xtensa_vector<float16_t>(op->type, target)) {
+            rhs << "IVP_MOVNXF16T(" << true_val << ", " << false_val << ", " << cond << ")";
         } else if (is_native_xtensa_vector<float>(op->type, target)) {
             rhs << "IVP_MOVN_2XF32T(" << true_val << ", " << false_val << ", " << cond << ")";
         } else {
@@ -3177,6 +3185,8 @@ void CodeGen_Xtensa::visit(const LT *op) {
         print_assignment(op->type, "IVP_LTN_2X32(" + sa + ", " + sb + ")");
     } else if (is_native_xtensa_vector<uint32_t>(op->a.type(), target)) {
         print_assignment(op->type, "IVP_LTUN_2X32U(" + sa + ", " + sb + ")");
+    } else if (is_native_xtensa_vector<float16_t>(op->a.type(), target)) {
+        print_assignment(op->type, "IVP_OLTNXF16(" + sa + ", " + sb + ")");
     } else if (is_native_xtensa_vector<float>(op->a.type(), target)) {
         print_assignment(op->type, "IVP_OLTN_2XF32(" + sa + ", " + sb + ")");
     } else {
diff --git a/src/XtensaOptimize.cpp b/src/XtensaOptimize.cpp
index 054f00048579..b9b586c771b2 100644
--- a/src/XtensaOptimize.cpp
+++ b/src/XtensaOptimize.cpp
@@ -69,6 +69,12 @@ bool is_native_xtensa_vector<uint32_t>(const Type &t, const Target &target) {
     return t.is_uint() && (t.bits() == 32) && (t.lanes() == vector_size);
 }
 
+template<>
+bool is_native_xtensa_vector<float16_t>(const Type &t, const Target &target) {
+    int vector_size = target.natural_vector_size<float16_t>();
+    return t.is_float() && (t.bits() == 16) && (t.lanes() == vector_size);
+}
+
 template<>
 bool is_native_xtensa_vector<float>(const Type &t, const Target &target) {
     int vector_size = target.natural_vector_size<float>();
diff --git a/src/XtensaOptimize.h b/src/XtensaOptimize.h
index e92b9f213f29..06349f49295a 100644
--- a/src/XtensaOptimize.h
+++ b/src/XtensaOptimize.h
@@ -35,6 +35,9 @@ bool is_native_xtensa_vector<int64_t>(const Type &t, const Target &target);
 template<>
 bool is_native_xtensa_vector<uint32_t>(const Type &t, const Target &target);
 
+template<>
+bool is_native_xtensa_vector<float16_t>(const Type &t, const Target &target);
+
 template<>
 bool is_native_xtensa_vector<float>(const Type &t, const Target &target);