From 188f44b2228ef3b830a981e0843457885be463c6 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Sat, 19 Feb 2022 09:36:27 -0800
Subject: [PATCH 1/5] Updating the RyuJIT casting behavior to be IEEE 754
 compliant and to saturate on overflow

---
 docs/design/coreclr/botr/readytorun-format.md |  28 +-
 src/coreclr/inc/corinfo.h                     |  28 +-
 src/coreclr/inc/jithelpers.h                  |  52 +-
 src/coreclr/inc/readytorun.h                  |  28 +-
 src/coreclr/inc/readytorunhelpers.h           |  28 +-
 src/coreclr/jit/codegenxarch.cpp              | 101 ++--
 src/coreclr/jit/compiler.h                    |  21 +-
 src/coreclr/jit/gentree.cpp                   | 248 ++++++++-
 src/coreclr/jit/hwintrinsicarm64.cpp          |  42 +-
 src/coreclr/jit/hwintrinsicxarch.cpp          |  13 +-
 src/coreclr/jit/importer.cpp                  | 132 +++++
 src/coreclr/jit/lower.cpp                     |   3 +-
 src/coreclr/jit/lower.h                       |   2 +-
 src/coreclr/jit/lowerarmarch.cpp              | 184 ++++++-
 src/coreclr/jit/lowerxarch.cpp                | 500 +++++++++++++++--
 src/coreclr/jit/lsraxarch.cpp                 |   9 +-
 src/coreclr/jit/morph.cpp                     | 213 ++++----
 src/coreclr/jit/simdashwintrinsic.cpp         |  40 +-
 src/coreclr/jit/utils.cpp                     | 343 +++++++++---
 src/coreclr/jit/utils.h                       |  17 +-
 src/coreclr/jit/valuenum.cpp                  | 162 +++---
 src/coreclr/jit/valuenumfuncs.h               |  20 +
 src/coreclr/nativeaot/Runtime/MathHelpers.cpp | 331 ++++++++++--
 .../Runtime/CompilerHelpers/MathHelpers.cs    | 203 +++++--
 .../Internal/Runtime/ReadyToRunConstants.cs   |  28 +-
 .../Common/JitInterface/CorInfoHelpFunc.cs    |  38 +-
 .../SOS/DacTableGen/MapSymbolProvider.cs      |   2 +-
 .../ILCompiler.Compiler/Compiler/JitHelper.cs |  68 ++-
 .../JitInterface/CorInfoImpl.ReadyToRun.cs    |  64 ++-
 .../ReadyToRunSignature.cs                    |  74 ++-
 .../JitInterface/CorInfoImpl.RyuJit.cs        |  65 ++-
 src/coreclr/vm/i386/jithelp.S                 |  81 ---
 src/coreclr/vm/i386/jithelp.asm               | 181 -------
 src/coreclr/vm/i386/jitinterfacex86.cpp       |  37 --
 src/coreclr/vm/jithelpers.cpp                 | 506 +++++++++++++++---
 src/coreclr/vm/jitinterface.h                 |   8 -
 src/tests/JIT/CodeGenBringUpTests/DblCast.cs  | 284 ++++++++++
 .../JIT/CodeGenBringUpTests/DblCast_d.csproj  |  13 +
 .../JIT/CodeGenBringUpTests/DblCast_do.csproj |  13 +
 .../JIT/CodeGenBringUpTests/DblCast_r.csproj  |  13 +
 .../JIT/CodeGenBringUpTests/DblCast_ro.csproj |  13 +
 src/tests/JIT/CodeGenBringUpTests/FltCast.cs  | 284 ++++++++++
 .../JIT/CodeGenBringUpTests/FltCast_d.csproj  |  13 +
 .../JIT/CodeGenBringUpTests/FltCast_do.csproj |  13 +
 .../JIT/CodeGenBringUpTests/FltCast_r.csproj  |  13 +
 .../JIT/CodeGenBringUpTests/FltCast_ro.csproj |  13 +
 .../out_of_range_fp_to_int_conversions.cs     |  26 +-
 47 files changed, 3483 insertions(+), 1115 deletions(-)
 create mode 100644 src/tests/JIT/CodeGenBringUpTests/DblCast.cs
 create mode 100644 src/tests/JIT/CodeGenBringUpTests/DblCast_d.csproj
 create mode 100644 src/tests/JIT/CodeGenBringUpTests/DblCast_do.csproj
 create mode 100644 src/tests/JIT/CodeGenBringUpTests/DblCast_r.csproj
 create mode 100644 src/tests/JIT/CodeGenBringUpTests/DblCast_ro.csproj
 create mode 100644 src/tests/JIT/CodeGenBringUpTests/FltCast.cs
 create mode 100644 src/tests/JIT/CodeGenBringUpTests/FltCast_d.csproj
 create mode 100644 src/tests/JIT/CodeGenBringUpTests/FltCast_do.csproj
 create mode 100644 src/tests/JIT/CodeGenBringUpTests/FltCast_r.csproj
 create mode 100644 src/tests/JIT/CodeGenBringUpTests/FltCast_ro.csproj

diff --git a/docs/design/coreclr/botr/readytorun-format.md b/docs/design/coreclr/botr/readytorun-format.md
index 6bc9a5068d9e7..6680e70c0e86b 100644
--- a/docs/design/coreclr/botr/readytorun-format.md
+++ b/docs/design/coreclr/botr/readytorun-format.md
@@ -743,8 +743,8 @@ enum ReadyToRunHelper
     READYTORUN_HELPER_LLsh                      = 0xC7,
     READYTORUN_HELPER_LRsh                      = 0xC8,
     READYTORUN_HELPER_LRsz                      = 0xC9,
-    READYTORUN_HELPER_Lng2Dbl                   = 0xCA,
-    READYTORUN_HELPER_ULng2Dbl                  = 0xCB,
+    READYTORUN_HELPER_Int64ToDouble             = 0xCA,
+    READYTORUN_HELPER_UInt64ToDouble            = 0xCB,
 
     // 32-bit division helpers
     READYTORUN_HELPER_Div                       = 0xCC,
@@ -753,14 +753,22 @@ enum ReadyToRunHelper
     READYTORUN_HELPER_UMod                      = 0xCF,
 
     // Floating point conversions
-    READYTORUN_HELPER_Dbl2Int                   = 0xD0,
-    READYTORUN_HELPER_Dbl2IntOvf                = 0xD1,
-    READYTORUN_HELPER_Dbl2Lng                   = 0xD2,
-    READYTORUN_HELPER_Dbl2LngOvf                = 0xD3,
-    READYTORUN_HELPER_Dbl2UInt                  = 0xD4,
-    READYTORUN_HELPER_Dbl2UIntOvf               = 0xD5,
-    READYTORUN_HELPER_Dbl2ULng                  = 0xD6,
-    READYTORUN_HELPER_Dbl2ULngOvf               = 0xD7,
+    READYTORUN_HELPER_DoubleToInt32             = 0xD0,
+    READYTORUN_HELPER_DoubleToInt32Ovf          = 0xD1,
+    READYTORUN_HELPER_DoubleToInt64             = 0xD2,
+    READYTORUN_HELPER_DoubleToInt64Ovf          = 0xD3,
+    READYTORUN_HELPER_DoubleToUInt32            = 0xD4,
+    READYTORUN_HELPER_DoubleToUInt32Ovf         = 0xD5,
+    READYTORUN_HELPER_DoubleToUInt64            = 0xD6,
+    READYTORUN_HELPER_DoubleToUInt64Ovf         = 0xD7,
+    READYTORUN_HELPER_DoubleToInt8              = 0xD8,
+    READYTORUN_HELPER_DoubleToInt8Ovf           = 0xD9,
+    READYTORUN_HELPER_DoubleToInt16             = 0xDA,
+    READYTORUN_HELPER_DoubleToInt16Ovf          = 0xDB,
+    READYTORUN_HELPER_DoubleToUInt8             = 0xDC,
+    READYTORUN_HELPER_DoubleToUInt8Ovf          = 0xDD,
+    READYTORUN_HELPER_DoubleToUInt16            = 0xDE,
+    READYTORUN_HELPER_DoubleToUInt16Ovf         = 0xDF,
 
     // Floating point ops
     READYTORUN_HELPER_DblRem                    = 0xE0,
diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h
index 99dd1fb7c2831..9c5e413286005 100644
--- a/src/coreclr/inc/corinfo.h
+++ b/src/coreclr/inc/corinfo.h
@@ -384,16 +384,24 @@ enum CorInfoHelpFunc
     CORINFO_HELP_LMOD,
     CORINFO_HELP_ULDIV,
     CORINFO_HELP_ULMOD,
-    CORINFO_HELP_LNG2DBL,               // Convert a signed int64 to a double
-    CORINFO_HELP_ULNG2DBL,              // Convert a unsigned int64 to a double
-    CORINFO_HELP_DBL2INT,
-    CORINFO_HELP_DBL2INT_OVF,
-    CORINFO_HELP_DBL2LNG,
-    CORINFO_HELP_DBL2LNG_OVF,
-    CORINFO_HELP_DBL2UINT,
-    CORINFO_HELP_DBL2UINT_OVF,
-    CORINFO_HELP_DBL2ULNG,
-    CORINFO_HELP_DBL2ULNG_OVF,
+    CORINFO_HELP_Int64ToDouble,
+    CORINFO_HELP_UInt64ToDouble,
+    CORINFO_HELP_DoubleToInt8,
+    CORINFO_HELP_DoubleToInt8_OVF,
+    CORINFO_HELP_DoubleToInt16,
+    CORINFO_HELP_DoubleToInt16_OVF,
+    CORINFO_HELP_DoubleToInt32,
+    CORINFO_HELP_DoubleToInt32_OVF,
+    CORINFO_HELP_DoubleToInt64,
+    CORINFO_HELP_DoubleToInt64_OVF,
+    CORINFO_HELP_DoubleToUInt8,
+    CORINFO_HELP_DoubleToUInt8_OVF,
+    CORINFO_HELP_DoubleToUInt16,
+    CORINFO_HELP_DoubleToUInt16_OVF,
+    CORINFO_HELP_DoubleToUInt32,
+    CORINFO_HELP_DoubleToUInt32_OVF,
+    CORINFO_HELP_DoubleToUInt64,
+    CORINFO_HELP_DoubleToUInt64_OVF,
     CORINFO_HELP_FLTREM,
     CORINFO_HELP_DBLREM,
     CORINFO_HELP_FLTROUND,
diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h
index e2c4e0e8fcbf0..bb3b2f7f2a7b2 100644
--- a/src/coreclr/inc/jithelpers.h
+++ b/src/coreclr/inc/jithelpers.h
@@ -35,8 +35,6 @@
     JITHELPER(CORINFO_HELP_UDIV,                JIT_UDiv,           CORINFO_HELP_SIG_8_STACK)
     JITHELPER(CORINFO_HELP_UMOD,                JIT_UMod,           CORINFO_HELP_SIG_8_STACK)
 
-    // CORINFO_HELP_DBL2INT, CORINFO_HELP_DBL2UINT, and CORINFO_HELP_DBL2LONG get
-    // patched for CPUs that support SSE2 (P4 and above).
 #ifndef TARGET_64BIT
     JITHELPER(CORINFO_HELP_LLSH,                JIT_LLsh,           CORINFO_HELP_SIG_REG_ONLY)
     JITHELPER(CORINFO_HELP_LRSH,                JIT_LRsh,           CORINFO_HELP_SIG_REG_ONLY)
@@ -46,27 +44,35 @@
     JITHELPER(CORINFO_HELP_LRSH,                NULL,               CORINFO_HELP_SIG_CANNOT_USE_ALIGN_STUB)
     JITHELPER(CORINFO_HELP_LRSZ,                NULL,               CORINFO_HELP_SIG_CANNOT_USE_ALIGN_STUB)
 #endif // TARGET_64BIT
-    JITHELPER(CORINFO_HELP_LMUL,                JIT_LMul,           CORINFO_HELP_SIG_16_STACK)
-    JITHELPER(CORINFO_HELP_LMUL_OVF,            JIT_LMulOvf,        CORINFO_HELP_SIG_16_STACK)
-    JITHELPER(CORINFO_HELP_ULMUL_OVF,           JIT_ULMulOvf,       CORINFO_HELP_SIG_16_STACK)
-    JITHELPER(CORINFO_HELP_LDIV,                JIT_LDiv,           CORINFO_HELP_SIG_16_STACK)
-    JITHELPER(CORINFO_HELP_LMOD,                JIT_LMod,           CORINFO_HELP_SIG_16_STACK)
-    JITHELPER(CORINFO_HELP_ULDIV,               JIT_ULDiv,          CORINFO_HELP_SIG_16_STACK)
-    JITHELPER(CORINFO_HELP_ULMOD,               JIT_ULMod,          CORINFO_HELP_SIG_16_STACK)
-    JITHELPER(CORINFO_HELP_LNG2DBL,             JIT_Lng2Dbl,        CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_ULNG2DBL,            JIT_ULng2Dbl,       CORINFO_HELP_SIG_8_STACK)
-    DYNAMICJITHELPER(CORINFO_HELP_DBL2INT,      JIT_Dbl2Lng,        CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_DBL2INT_OVF,         JIT_Dbl2IntOvf,     CORINFO_HELP_SIG_8_STACK)
-    DYNAMICJITHELPER(CORINFO_HELP_DBL2LNG,      JIT_Dbl2Lng,        CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_DBL2LNG_OVF,         JIT_Dbl2LngOvf,     CORINFO_HELP_SIG_8_STACK)
-    DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT,     JIT_Dbl2Lng,        CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_DBL2UINT_OVF,        JIT_Dbl2UIntOvf,    CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_DBL2ULNG,            JIT_Dbl2ULng,       CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_DBL2ULNG_OVF,        JIT_Dbl2ULngOvf,    CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_FLTREM,              JIT_FltRem,         CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_DBLREM,              JIT_DblRem,         CORINFO_HELP_SIG_16_STACK)
-    JITHELPER(CORINFO_HELP_FLTROUND,            JIT_FloatRound,     CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_DBLROUND,            JIT_DoubleRound,    CORINFO_HELP_SIG_16_STACK)
+    JITHELPER(CORINFO_HELP_LMUL,                JIT_LMul,              CORINFO_HELP_SIG_16_STACK)
+    JITHELPER(CORINFO_HELP_LMUL_OVF,            JIT_LMulOvf,           CORINFO_HELP_SIG_16_STACK)
+    JITHELPER(CORINFO_HELP_ULMUL_OVF,           JIT_ULMulOvf,          CORINFO_HELP_SIG_16_STACK)
+    JITHELPER(CORINFO_HELP_LDIV,                JIT_LDiv,              CORINFO_HELP_SIG_16_STACK)
+    JITHELPER(CORINFO_HELP_LMOD,                JIT_LMod,              CORINFO_HELP_SIG_16_STACK)
+    JITHELPER(CORINFO_HELP_ULDIV,               JIT_ULDiv,             CORINFO_HELP_SIG_16_STACK)
+    JITHELPER(CORINFO_HELP_ULMOD,               JIT_ULMod,             CORINFO_HELP_SIG_16_STACK)
+    JITHELPER(CORINFO_HELP_Int64ToDouble,       JIT_Int64ToDouble,     CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_UInt64ToDouble,      JIT_UInt64ToDouble,    CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DoubleToInt8,        JIT_DoubleToInt8,      CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DoubleToInt8_OVF,    JIT_DoubleToInt8Ovf,   CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DoubleToInt16,       JIT_DoubleToInt16,     CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DoubleToInt16_OVF,   JIT_DoubleToInt16Ovf,  CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DoubleToInt32,       JIT_DoubleToInt32,     CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DoubleToInt32_OVF,   JIT_DoubleToInt32Ovf,  CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DoubleToInt64,       JIT_DoubleToInt64,     CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DoubleToInt64_OVF,   JIT_DoubleToInt64Ovf,  CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DoubleToUInt8,       JIT_DoubleToUInt8,     CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DoubleToUInt8_OVF,   JIT_DoubleToUInt8Ovf,  CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DoubleToUInt16,      JIT_DoubleToUInt16,    CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DoubleToUInt16_OVF,  JIT_DoubleToUInt16Ovf, CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DoubleToUInt32,      JIT_DoubleToUInt32,    CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DoubleToUInt32_OVF,  JIT_DoubleToUInt32Ovf, CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DoubleToUInt64,      JIT_DoubleToUInt64,    CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DoubleToUInt64_OVF,  JIT_DoubleToUInt64Ovf, CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_FLTREM,              JIT_FltRem,            CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DBLREM,              JIT_DblRem,            CORINFO_HELP_SIG_16_STACK)
+    JITHELPER(CORINFO_HELP_FLTROUND,            JIT_FloatRound,        CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DBLROUND,            JIT_DoubleRound,       CORINFO_HELP_SIG_16_STACK)
 
     // Allocating a new object
     JITHELPER(CORINFO_HELP_NEWFAST,                     JIT_New,    CORINFO_HELP_SIG_REG_ONLY)
diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h
index 25bd45d376e8f..7fce76a3eb453 100644
--- a/src/coreclr/inc/readytorun.h
+++ b/src/coreclr/inc/readytorun.h
@@ -320,8 +320,8 @@ enum ReadyToRunHelper
     READYTORUN_HELPER_LLsh                      = 0xC7,
     READYTORUN_HELPER_LRsh                      = 0xC8,
     READYTORUN_HELPER_LRsz                      = 0xC9,
-    READYTORUN_HELPER_Lng2Dbl                   = 0xCA,
-    READYTORUN_HELPER_ULng2Dbl                  = 0xCB,
+    READYTORUN_HELPER_Int64ToDouble             = 0xCA,
+    READYTORUN_HELPER_UInt64ToDouble            = 0xCB,
 
     // 32-bit division helpers
     READYTORUN_HELPER_Div                       = 0xCC,
@@ -330,14 +330,22 @@ enum ReadyToRunHelper
     READYTORUN_HELPER_UMod                      = 0xCF,
 
     // Floating point conversions
-    READYTORUN_HELPER_Dbl2Int                   = 0xD0,
-    READYTORUN_HELPER_Dbl2IntOvf                = 0xD1,
-    READYTORUN_HELPER_Dbl2Lng                   = 0xD2,
-    READYTORUN_HELPER_Dbl2LngOvf                = 0xD3,
-    READYTORUN_HELPER_Dbl2UInt                  = 0xD4,
-    READYTORUN_HELPER_Dbl2UIntOvf               = 0xD5,
-    READYTORUN_HELPER_Dbl2ULng                  = 0xD6,
-    READYTORUN_HELPER_Dbl2ULngOvf               = 0xD7,
+    READYTORUN_HELPER_DoubleToInt32             = 0xD0,
+    READYTORUN_HELPER_DoubleToInt32Ovf          = 0xD1,
+    READYTORUN_HELPER_DoubleToInt64             = 0xD2,
+    READYTORUN_HELPER_DoubleToInt64Ovf          = 0xD3,
+    READYTORUN_HELPER_DoubleToUInt32            = 0xD4,
+    READYTORUN_HELPER_DoubleToUInt32Ovf         = 0xD5,
+    READYTORUN_HELPER_DoubleToUInt64            = 0xD6,
+    READYTORUN_HELPER_DoubleToUInt64Ovf         = 0xD7,
+    READYTORUN_HELPER_DoubleToInt8              = 0xD8,
+    READYTORUN_HELPER_DoubleToInt8Ovf           = 0xD9,
+    READYTORUN_HELPER_DoubleToInt16             = 0xDA,
+    READYTORUN_HELPER_DoubleToInt16Ovf          = 0xDB,
+    READYTORUN_HELPER_DoubleToUInt8             = 0xDC,
+    READYTORUN_HELPER_DoubleToUInt8Ovf          = 0xDD,
+    READYTORUN_HELPER_DoubleToUInt16            = 0xDE,
+    READYTORUN_HELPER_DoubleToUInt16Ovf         = 0xDF,
 
     // Floating point ops
     READYTORUN_HELPER_DblRem                    = 0xE0,
diff --git a/src/coreclr/inc/readytorunhelpers.h b/src/coreclr/inc/readytorunhelpers.h
index 66e2d4a3b164e..d6b4516a1b658 100644
--- a/src/coreclr/inc/readytorunhelpers.h
+++ b/src/coreclr/inc/readytorunhelpers.h
@@ -65,22 +65,30 @@ HELPER(READYTORUN_HELPER_ULMod,                     CORINFO_HELP_ULMOD,
 HELPER(READYTORUN_HELPER_LLsh,                      CORINFO_HELP_LLSH,                              )
 HELPER(READYTORUN_HELPER_LRsh,                      CORINFO_HELP_LRSH,                              )
 HELPER(READYTORUN_HELPER_LRsz,                      CORINFO_HELP_LRSZ,                              )
-HELPER(READYTORUN_HELPER_Lng2Dbl,                   CORINFO_HELP_LNG2DBL,                           )
-HELPER(READYTORUN_HELPER_ULng2Dbl,                  CORINFO_HELP_ULNG2DBL,                          )
+HELPER(READYTORUN_HELPER_Int64ToDouble,             CORINFO_HELP_Int64ToDouble,                     )
+HELPER(READYTORUN_HELPER_UInt64ToDouble,            CORINFO_HELP_UInt64ToDouble,                    )
 
 HELPER(READYTORUN_HELPER_Div,                       CORINFO_HELP_DIV,                               )
 HELPER(READYTORUN_HELPER_Mod,                       CORINFO_HELP_MOD,                               )
 HELPER(READYTORUN_HELPER_UDiv,                      CORINFO_HELP_UDIV,                              )
 HELPER(READYTORUN_HELPER_UMod,                      CORINFO_HELP_UMOD,                              )
 
-HELPER(READYTORUN_HELPER_Dbl2Int,                   CORINFO_HELP_DBL2INT,                           )
-HELPER(READYTORUN_HELPER_Dbl2IntOvf,                CORINFO_HELP_DBL2INT_OVF,                       )
-HELPER(READYTORUN_HELPER_Dbl2Lng,                   CORINFO_HELP_DBL2LNG,                           )
-HELPER(READYTORUN_HELPER_Dbl2LngOvf,                CORINFO_HELP_DBL2LNG_OVF,                       )
-HELPER(READYTORUN_HELPER_Dbl2UInt,                  CORINFO_HELP_DBL2UINT,                          )
-HELPER(READYTORUN_HELPER_Dbl2UIntOvf,               CORINFO_HELP_DBL2UINT_OVF,                      )
-HELPER(READYTORUN_HELPER_Dbl2ULng,                  CORINFO_HELP_DBL2ULNG,                          )
-HELPER(READYTORUN_HELPER_Dbl2ULngOvf,               CORINFO_HELP_DBL2ULNG_OVF,                      )
+HELPER(READYTORUN_HELPER_DoubleToInt32,             CORINFO_HELP_DoubleToInt32,                     )
+HELPER(READYTORUN_HELPER_DoubleToInt32Ovf,          CORINFO_HELP_DoubleToInt32_OVF,                 )
+HELPER(READYTORUN_HELPER_DoubleToInt64,             CORINFO_HELP_DoubleToInt64,                     )
+HELPER(READYTORUN_HELPER_DoubleToInt64Ovf,          CORINFO_HELP_DoubleToInt64_OVF,                 )
+HELPER(READYTORUN_HELPER_DoubleToUInt32,            CORINFO_HELP_DoubleToUInt32,                    )
+HELPER(READYTORUN_HELPER_DoubleToUInt32Ovf,         CORINFO_HELP_DoubleToUInt32_OVF,                )
+HELPER(READYTORUN_HELPER_DoubleToUInt64,            CORINFO_HELP_DoubleToUInt64,                    )
+HELPER(READYTORUN_HELPER_DoubleToUInt64Ovf,         CORINFO_HELP_DoubleToUInt64_OVF,                )
+HELPER(READYTORUN_HELPER_DoubleToInt8,              CORINFO_HELP_DoubleToInt8,                      )
+HELPER(READYTORUN_HELPER_DoubleToInt8Ovf,           CORINFO_HELP_DoubleToInt8_OVF,                  )
+HELPER(READYTORUN_HELPER_DoubleToInt16,             CORINFO_HELP_DoubleToInt16,                     )
+HELPER(READYTORUN_HELPER_DoubleToInt16Ovf,          CORINFO_HELP_DoubleToInt16_OVF,                 )
+HELPER(READYTORUN_HELPER_DoubleToUInt8,             CORINFO_HELP_DoubleToUInt8,                     )
+HELPER(READYTORUN_HELPER_DoubleToUInt8Ovf,          CORINFO_HELP_DoubleToUInt8_OVF,                 )
+HELPER(READYTORUN_HELPER_DoubleToUInt16,            CORINFO_HELP_DoubleToUInt16,                    )
+HELPER(READYTORUN_HELPER_DoubleToUInt16Ovf,         CORINFO_HELP_DoubleToUInt16_OVF,                )
 
 HELPER(READYTORUN_HELPER_FltRem,                    CORINFO_HELP_FLTREM,                            )
 HELPER(READYTORUN_HELPER_DblRem,                    CORINFO_HELP_DBLREM,                            )
diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index e2dad3d7cc6ed..aea816c87894f 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -6763,11 +6763,6 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
     var_types srcType = op1->TypeGet();
     assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
 
-#if !defined(TARGET_64BIT)
-    // We expect morph to replace long to float/double casts with helper calls
-    noway_assert(!varTypeIsLong(srcType));
-#endif // !defined(TARGET_64BIT)
-
     // Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we
     // ensure srcType of a cast is non gc-type.  Codegen should never see BYREF as source type except
     // for GT_LCL_VAR_ADDR and GT_LCL_FLD_ADDR that represent stack addresses and can be considered
@@ -6796,10 +6791,23 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
     emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
     noway_assert((srcSize == EA_ATTR(genTypeSize(TYP_INT))) || (srcSize == EA_ATTR(genTypeSize(TYP_LONG))));
 
-    // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions
-    // here since they should have been lowered apropriately.
+#if defined(TARGET_X86)
+// We expect morph to replace long to float/double casts with helper calls
+
+#endif // TARGET_X86
+
+#if defined(TARGET_X86)
+    // x86 doesn't expect to see int64/uint32/uint64 -> float/double here since they should have been
+    // replaced with helper calls by the front end.
+    noway_assert(!varTypeIsLong(srcType));
     noway_assert(srcType != TYP_UINT);
-    noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT));
+#endif // TARGET_X86
+
+#if defined(TARGET_AMD64)
+    // x64 shouldn't see a uint64 -> float/double as it should have been lowered to an alternative
+    // sequence -or- converted to a helper call by the front end.
+    noway_assert(srcType != TYP_ULONG);
+#endif // TARGET_AMD64
 
     // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used
     // which does a partial write to lower 4/8 bytes of xmm register keeping the other
@@ -6815,46 +6823,27 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
 
     // Note that here we need to specify srcType that will determine
     // the size of source reg/mem operand and rex.w prefix.
-    instruction ins = ins_FloatConv(dstType, TYP_INT);
-    GetEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
-
-    // Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction
-    // will interpret ULONG value as LONG.  Hence we need to adjust the
-    // result if sign-bit of srcType is set.
-    if (srcType == TYP_ULONG)
-    {
-        // The instruction sequence below is less accurate than what clang
-        // and gcc generate. However, we keep the current sequence for backward compatibility.
-        // If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule
-        // should be also updated for consistent conversion result.
-        assert(dstType == TYP_DOUBLE);
-        assert(op1->isUsedFromReg());
 
-        // Set the flags without modifying op1.
-        // test op1Reg, op1Reg
-        inst_RV_RV(INS_test, op1->GetRegNum(), op1->GetRegNum(), srcType);
+    var_types fromType = srcType;
 
-        // No need to adjust result if op1 >= 0 i.e. positive
-        // Jge label
-        BasicBlock* label = genCreateTempLabel();
-        inst_JMP(EJ_jge, label);
-
-        // Adjust the result
-        // result = result + 0x43f00000 00000000
-        // addsd resultReg,  0x43f00000 00000000
-        CORINFO_FIELD_HANDLE* cns = &u8ToDblBitmask;
-        if (*cns == nullptr)
-        {
-            double d;
-            static_assert_no_msg(sizeof(double) == sizeof(__int64));
-            *((__int64*)&d) = 0x43f0000000000000LL;
+#if defined(TARGET_AMD64)
+    if (fromType == TYP_UINT)
+    {
+        // There isn't an instruction that directly allows conversion from TYP_UINT
+        // so we convert from TYP_LONG instead.
 
-            *cns = GetEmitter()->emitFltOrDblConst(d, EA_8BYTE);
-        }
-        GetEmitter()->emitIns_R_C(INS_addsd, EA_8BYTE, treeNode->GetRegNum(), *cns, 0);
+        fromType = TYP_LONG;
 
-        genDefineTempLabel(label);
+        // We require the value to come from a register so the upper bits here
+        // will be zero and we can know we'll get a correct result.
+        assert(op1->isUsedFromReg());
     }
+#endif // TARGET_AMD64
+
+    assert(!varTypeIsUnsigned(fromType));
+
+    instruction ins = ins_FloatConv(dstType, fromType);
+    GetEmitter()->emitInsBinary(ins, emitTypeSize(fromType), treeNode, op1);
 
     genProduceReg(treeNode);
 }
@@ -6873,8 +6862,6 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
 //    The treeNode must have an assigned register.
 //    SrcType=float/double and DstType= int32/uint32/int64/uint64
 //
-// TODO-XArch-CQ: (Low-pri) - generate in-line code when DstType = uint64
-//
 void CodeGen::genFloatToIntCast(GenTree* treeNode)
 {
     // we don't expect to see overflow detecting float/double --> int type conversions here
@@ -6893,9 +6880,9 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     }
 #endif
 
+    var_types srcType = treeNode->CastFromType();
     var_types dstType = treeNode->CastToType();
-    var_types srcType = op1->TypeGet();
-    assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
+    assert(varTypeIsFloating(srcType) && varTypeIsIntegral(dstType));
 
     // We should never be seeing dstType whose size is neither sizeof(TYP_INT) nor sizeof(TYP_LONG).
     // For conversions to byte/sbyte/int16/uint16 from float/double, we would expect the
@@ -6905,17 +6892,23 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
     noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));
 
-    // We shouldn't be seeing uint64 here as it should have been converted
-    // into a helper call by either front-end or lowering phase.
-    noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
+#if defined(TARGET_X86)
+    // x86 shouldn't see casts to int64/uint32/uint64 as they should have been convered into
+    // helper calls by the front-end.
 
-    // If the dstType is TYP_UINT, we have 32-bits to encode the
-    // float number. Any of 33rd or above bits can be the sign bit.
-    // To achieve it we pretend as if we are converting it to a long.
-    if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))))
+    noway_assert(dstType != TYP_UINT);
+    noway_assert(!varTypeIsLong(dstType));
+#else
+    // x64 shouldn't see casts to uint64 as they should have been lowered into an alternative sequence
+    noway_assert(dstType != TYP_ULONG);
+
+    // If the dstType is TYP_UINT, we can convert to TYP_LONG and implicitly take
+    // the lower 32-bits as the result.
+    if ((dstType == TYP_UINT) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))))
     {
         dstType = TYP_LONG;
     }
+#endif // TARGET_X86
 
     // Note that we need to specify dstType here so that it will determine
     // the size of destination integer register and also the rex.w prefix.
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index f9647e92c562f..94797a2dd991d 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -3279,6 +3279,24 @@ class Compiler
     GenTree* gtNewSimdCreateBroadcastNode(
         var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);
 
+    GenTree* gtNewSimdCvtToDoubleNode(
+        var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);
+
+    GenTree* gtNewSimdCvtToInt32Node(
+        var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);
+
+    GenTree* gtNewSimdCvtToInt64Node(
+        var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);
+
+    GenTree* gtNewSimdCvtToSingleNode(
+        var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);
+
+    GenTree* gtNewSimdCvtToUInt32Node(
+        var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);
+
+    GenTree* gtNewSimdCvtToUInt64Node(
+        var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);
+
     GenTree* gtNewSimdDotProdNode(var_types   type,
                                   GenTree*    op1,
                                   GenTree*    op2,
@@ -5572,9 +5590,6 @@ class Compiler
     // Does value-numbering for a call.  We interpret some helper calls.
     void fgValueNumberCall(GenTreeCall* call);
 
-    // Does value-numbering for a helper representing a cast operation.
-    void fgValueNumberCastHelper(GenTreeCall* call);
-
     // Does value-numbering for a helper "call" that has a VN function symbol "vnf".
     void fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueNumPair vnpExc);
 
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 7cb6166f21191..ed73e31812701 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -13478,13 +13478,13 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree)
 
                             case TYP_FLOAT:
                             case TYP_DOUBLE:
-                                if (tree->IsUnsigned() && (lval1 < 0))
+                                if (tree->IsUnsigned())
                                 {
-                                    d1 = FloatingPointUtils::convertUInt64ToDouble((unsigned __int64)lval1);
+                                    d1 = FloatingPointUtils::convertUInt64ToDouble((uint64_t)lval1);
                                 }
                                 else
                                 {
-                                    d1 = (double)lval1;
+                                    d1 = FloatingPointUtils::convertInt64ToDouble((int64_t)lval1);
                                 }
 
                                 if (tree->CastToType() == TYP_FLOAT)
@@ -13543,31 +13543,31 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree)
                         switch (tree->CastToType())
                         {
                             case TYP_BYTE:
-                                i1 = INT32(INT8(d1));
+                                i1 = FloatingPointUtils::convertDoubleToInt8(d1);
                                 goto CNS_INT;
 
-                            case TYP_SHORT:
-                                i1 = INT32(INT16(d1));
+                            case TYP_UBYTE:
+                                i1 = FloatingPointUtils::convertDoubleToUInt8(d1);
                                 goto CNS_INT;
 
-                            case TYP_USHORT:
-                                i1 = INT32(UINT16(d1));
+                            case TYP_SHORT:
+                                i1 = FloatingPointUtils::convertDoubleToInt16(d1);
                                 goto CNS_INT;
 
-                            case TYP_UBYTE:
-                                i1 = INT32(UINT8(d1));
+                            case TYP_USHORT:
+                                i1 = FloatingPointUtils::convertDoubleToUInt16(d1);
                                 goto CNS_INT;
 
                             case TYP_INT:
-                                i1 = INT32(d1);
+                                i1 = FloatingPointUtils::convertDoubleToInt32(d1);
                                 goto CNS_INT;
 
                             case TYP_UINT:
-                                i1 = forceCastToUInt32(d1);
+                                i1 = FloatingPointUtils::convertDoubleToUInt32(d1);
                                 goto CNS_INT;
 
                             case TYP_LONG:
-                                lval1 = INT64(d1);
+                                lval1 = FloatingPointUtils::convertDoubleToInt64(d1);
                                 goto CNS_LONG;
 
                             case TYP_ULONG:
@@ -19896,6 +19896,228 @@ GenTree* Compiler::gtNewSimdCreateBroadcastNode(
     return gtNewSimdHWIntrinsicNode(type, op1, hwIntrinsicID, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
 }
 
+GenTree* Compiler::gtNewSimdCvtToDoubleNode(
+    var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic)
+{
+    assert(IsBaselineSimdIsaSupportedDebugOnly());
+
+    assert(varTypeIsSIMD(type));
+    assert(getSIMDTypeForSize(simdSize) == type);
+
+    assert(op1 != nullptr);
+    assert(op1->TypeIs(type));
+
+    var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
+    assert((simdBaseType == TYP_LONG) || (simdBaseType == TYP_ULONG));
+
+    NamedIntrinsic intrinsic = NI_Illegal;
+
+#if defined(TARGET_XARCH)
+    unreached();
+#elif defined(TARGET_ARM64)
+    intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_ConvertToDoubleScalar : NI_AdvSimd_Arm64_ConvertToDouble;
+    return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+#else
+#error Unsupported platform
+#endif // !TARGET_XARCH && !TARGET_ARM64
+}
+
+GenTree* Compiler::gtNewSimdCvtToInt32Node(
+    var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic)
+{
+    assert(IsBaselineSimdIsaSupportedDebugOnly());
+
+    assert(varTypeIsSIMD(type));
+    assert(getSIMDTypeForSize(simdSize) == type);
+
+    assert(op1 != nullptr);
+    assert(op1->TypeIs(type));
+
+    var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
+    assert(simdBaseType == TYP_FLOAT);
+
+    NamedIntrinsic intrinsic = NI_Illegal;
+
+#if defined(TARGET_XARCH)
+    CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType);
+
+    GenTree* dup1;
+    GenTree* dup2;
+    GenTree* tmp;
+    GenTree* msk;
+
+    // First we need to clear any NaN values to 0. We do that by comparing the value against
+    // itself which will give all bits set for non-NaN and zero for NaN. We then and that
+    // with the original input which will clear NaN values to zero.
+
+    op1 = impCloneExpr(op1, &dup1, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                       nullptr DEBUGARG("Clone op1 for vector convert to int32"));
+    op1 = impCloneExpr(op1, &dup2, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                       nullptr DEBUGARG("Clone op1 for vector convert to int32"));
+
+    tmp = gtNewSimdCmpOpNode(GT_EQ, type, op1, dup1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+    op1 = gtNewSimdBinOpNode(GT_AND, type, tmp, dup2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+
+    // Next we need to clamp the value to the saturation values. For the lower bound, this is trivial
+    // as we can just take the maximum of the input and the lower bound since the lower bound is a
+    // power of two and exactly representable. For the upper bound, however, it's not exactly representable
+    // and so we'd end up having to take the minimum of the input and the next smallest representable value
+    // this would be problematic since we'd clamp to 2147483520, rather than 2147483647.
+    //
+    // So, instead, we need to compare against the upper bound to create a mask, do the truncation, and
+    // blend in the correct results. This will take an extra instruction but will give us a correct result.
+
+    tmp = gtNewDconNode(-2147483648.0f, TYP_FLOAT);
+    tmp = gtNewSimdCreateBroadcastNode(type, tmp, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+    op1 = gtNewSimdMaxNode(type, op1, tmp, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+
+    op1 = impCloneExpr(op1, &dup1, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                       nullptr DEBUGARG("Clone op1 for vector convert to int32"));
+
+    tmp = gtNewDconNode(+2147483648.0f, TYP_FLOAT);
+    tmp = gtNewSimdCreateBroadcastNode(type, tmp, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+    msk = gtNewSimdCmpOpNode(GT_GE, type, dup1, tmp, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+
+    intrinsic =
+        (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation : NI_SSE2_ConvertToVector128Int32WithTruncation;
+
+    op1 = gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+
+    tmp = gtNewIconNode(2147483647);
+    tmp = gtNewSimdCreateBroadcastNode(type, tmp, CORINFO_TYPE_INT, simdSize, isSimdAsHWIntrinsic);
+
+    // TODO-XARCH-CQ: Ideally gtNewSimdCndSelNode would handle this itself, but until that happens manually
+    // preference BlendVariable since we know that its "all bits set" or "no bits set" per element.
+
+    if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
+    {
+        // The operand order here is "right, left, condition" and the condition is greater than or equal to
+        // the upper bound so, left needs to be the saturated value and right should be the truncated value.
+        return gtNewSimdHWIntrinsicNode(type, op1, tmp, msk, NI_SSE41_BlendVariable, CORINFO_TYPE_INT, simdSize,
+                                        isSimdAsHWIntrinsic);
+    }
+    else
+    {
+        // The operand order here is "condition, left, right" and the condition is greater than or equal to
+        // the upper bound so, left needs to be the saturated value and right should be the truncated value.
+        return gtNewSimdCndSelNode(type, msk, tmp, op1, CORINFO_TYPE_INT, simdSize, isSimdAsHWIntrinsic);
+    }
+#elif defined(TARGET_ARM64)
+    intrinsic = NI_AdvSimd_ConvertToInt32RoundToZero;
+    return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+#else
+#error Unsupported platform
+#endif // !TARGET_XARCH && !TARGET_ARM64
+}
+
+GenTree* Compiler::gtNewSimdCvtToInt64Node(
+    var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic)
+{
+    assert(IsBaselineSimdIsaSupportedDebugOnly());
+
+    assert(varTypeIsSIMD(type));
+    assert(getSIMDTypeForSize(simdSize) == type);
+
+    assert(op1 != nullptr);
+    assert(op1->TypeIs(type));
+
+    var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
+    assert(simdBaseType == TYP_DOUBLE);
+
+    NamedIntrinsic intrinsic = NI_Illegal;
+
+#if defined(TARGET_XARCH)
+    unreached();
+#elif defined(TARGET_ARM64)
+    intrinsic =
+        (simdSize == 8) ? NI_AdvSimd_Arm64_ConvertToInt64RoundToZeroScalar : NI_AdvSimd_Arm64_ConvertToInt64RoundToZero;
+    return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+#else
+#error Unsupported platform
+#endif // !TARGET_XARCH && !TARGET_ARM64
+}
+
+GenTree* Compiler::gtNewSimdCvtToSingleNode(
+    var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic)
+{
+    assert(IsBaselineSimdIsaSupportedDebugOnly());
+
+    assert(varTypeIsSIMD(type));
+    assert(getSIMDTypeForSize(simdSize) == type);
+
+    assert(op1 != nullptr);
+    assert(op1->TypeIs(type));
+
+    var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
+    assert((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT));
+
+    NamedIntrinsic intrinsic = NI_Illegal;
+
+#if defined(TARGET_XARCH)
+    assert(simdBaseType == TYP_INT);
+    intrinsic = (simdSize == 32) ? NI_AVX_ConvertToVector256Single : NI_SSE2_ConvertToVector128Single;
+#elif defined(TARGET_ARM64)
+    intrinsic = NI_AdvSimd_ConvertToSingle;
+#else
+#error Unsupported platform
+#endif // !TARGET_XARCH && !TARGET_ARM64
+
+    return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+}
+
+GenTree* Compiler::gtNewSimdCvtToUInt32Node(
+    var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic)
+{
+    assert(IsBaselineSimdIsaSupportedDebugOnly());
+
+    assert(varTypeIsSIMD(type));
+    assert(getSIMDTypeForSize(simdSize) == type);
+
+    assert(op1 != nullptr);
+    assert(op1->TypeIs(type));
+
+    var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
+    assert(simdBaseType == TYP_FLOAT);
+
+    NamedIntrinsic intrinsic = NI_Illegal;
+
+#if defined(TARGET_XARCH)
+    unreached();
+#elif defined(TARGET_ARM64)
+    intrinsic = NI_AdvSimd_ConvertToUInt32RoundToZero;
+    return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+#else
+#error Unsupported platform
+#endif // !TARGET_XARCH && !TARGET_ARM64
+}
+
+GenTree* Compiler::gtNewSimdCvtToUInt64Node(
+    var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic)
+{
+    assert(IsBaselineSimdIsaSupportedDebugOnly());
+
+    assert(varTypeIsSIMD(type));
+    assert(getSIMDTypeForSize(simdSize) == type);
+
+    assert(op1 != nullptr);
+    assert(op1->TypeIs(type));
+
+    var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
+    assert(simdBaseType == TYP_DOUBLE);
+
+    NamedIntrinsic intrinsic = NI_Illegal;
+
+#if defined(TARGET_XARCH)
+    unreached();
+#elif defined(TARGET_ARM64)
+    intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_ConvertToUInt64RoundToZeroScalar
+                                : NI_AdvSimd_Arm64_ConvertToUInt64RoundToZero;
+    return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+#else
+#error Unsupported platform
+#endif // !TARGET_XARCH && !TARGET_ARM64
+}
+
 GenTree* Compiler::gtNewSimdDotProdNode(var_types   type,
                                         GenTree*    op1,
                                         GenTree*    op2,
diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp
index f1ac37611c5f8..6ccc04360db22 100644
--- a/src/coreclr/jit/hwintrinsicarm64.cpp
+++ b/src/coreclr/jit/hwintrinsicarm64.cpp
@@ -478,12 +478,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector128_ConvertToDouble:
         {
             assert(sig->numArgs == 1);
-            assert((simdBaseType == TYP_LONG) || (simdBaseType == TYP_ULONG));
 
-            intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_ConvertToDoubleScalar : NI_AdvSimd_Arm64_ConvertToDouble;
+            op1 = impSIMDPopStack(retType);
 
-            op1     = impSIMDPopStack(retType);
-            retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
+            retNode =
+                gtNewSimdCvtToDoubleNode(retType, op1, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
             break;
         }
 
@@ -491,11 +490,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector128_ConvertToInt32:
         {
             assert(sig->numArgs == 1);
-            assert(simdBaseType == TYP_FLOAT);
 
             op1 = impSIMDPopStack(retType);
-            retNode =
-                gtNewSimdHWIntrinsicNode(retType, op1, NI_AdvSimd_ConvertToInt32RoundToZero, simdBaseJitType, simdSize);
+
+            retNode = gtNewSimdCvtToInt32Node(retType, op1, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
             break;
         }
 
@@ -503,13 +501,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector128_ConvertToInt64:
         {
             assert(sig->numArgs == 1);
-            assert(simdBaseType == TYP_DOUBLE);
 
-            intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_ConvertToInt64RoundToZeroScalar
-                                        : NI_AdvSimd_Arm64_ConvertToInt64RoundToZero;
+            op1 = impSIMDPopStack(retType);
 
-            op1     = impSIMDPopStack(retType);
-            retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
+            retNode = gtNewSimdCvtToInt64Node(retType, op1, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
             break;
         }
 
@@ -517,10 +512,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector128_ConvertToSingle:
         {
             assert(sig->numArgs == 1);
-            assert((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT));
 
-            op1     = impSIMDPopStack(retType);
-            retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_AdvSimd_ConvertToSingle, simdBaseJitType, simdSize);
+            op1 = impSIMDPopStack(retType);
+
+            retNode =
+                gtNewSimdCvtToSingleNode(retType, op1, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
             break;
         }
 
@@ -528,11 +524,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector128_ConvertToUInt32:
         {
             assert(sig->numArgs == 1);
-            assert(simdBaseType == TYP_FLOAT);
 
-            op1     = impSIMDPopStack(retType);
-            retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_AdvSimd_ConvertToUInt32RoundToZero, simdBaseJitType,
-                                               simdSize);
+            op1 = impSIMDPopStack(retType);
+
+            retNode =
+                gtNewSimdCvtToUInt32Node(retType, op1, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
             break;
         }
 
@@ -540,13 +536,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector128_ConvertToUInt64:
         {
             assert(sig->numArgs == 1);
-            assert(simdBaseType == TYP_DOUBLE);
 
-            intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_ConvertToUInt64RoundToZeroScalar
-                                        : NI_AdvSimd_Arm64_ConvertToUInt64RoundToZero;
+            op1 = impSIMDPopStack(retType);
 
-            op1     = impSIMDPopStack(retType);
-            retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
+            retNode =
+                gtNewSimdCvtToUInt64Node(retType, op1, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
             break;
         }
 
diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
index aec1be705f3db..6883b5a3c38d9 100644
--- a/src/coreclr/jit/hwintrinsicxarch.cpp
+++ b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -852,13 +852,10 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector256_ConvertToInt32:
         {
             assert(sig->numArgs == 1);
-            assert(simdBaseType == TYP_FLOAT);
 
-            intrinsic = (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation
-                                         : NI_SSE2_ConvertToVector128Int32WithTruncation;
+            op1 = impSIMDPopStack(retType);
 
-            op1     = impSIMDPopStack(retType);
-            retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
+            retNode = gtNewSimdCvtToInt32Node(retType, op1, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
             break;
         }
 
@@ -869,10 +866,10 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
 
             if (simdBaseType == TYP_INT)
             {
-                intrinsic = (simdSize == 32) ? NI_AVX_ConvertToVector256Single : NI_SSE2_ConvertToVector128Single;
+                op1 = impSIMDPopStack(retType);
 
-                op1     = impSIMDPopStack(retType);
-                retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
+                retNode =
+                    gtNewSimdCvtToSingleNode(retType, op1, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
             }
             else
             {
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index c22b6915675f7..11765442f04ac 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -14052,6 +14052,138 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                         // Try and fold the introduced cast
                         op1 = gtFoldExprConst(op1);
                     }
+
+#if defined(TARGET_XARCH)
+                    if (!ovfl && op1->OperIs(GT_CAST) && varTypeIsFloating(op1->AsCast()->CastOp()) &&
+                        varTypeIsIntegral(lclTyp))
+                    {
+                        // We are going to transform the conversion into effectively:
+                        //   var result = platform_cast(value);
+                        //   return (result != sentinel) ? result : saturating_cast(value);
+
+                        var_types castFromType = op1->AsCast()->CastFromType();
+                        var_types castToType   = op1->AsCast()->CastToType();
+
+                        GenTree* value;
+                        GenTree* valueDup;
+                        GenTree* platformResult;
+                        GenTree* platformResultDup;
+                        GenTree* saturatedResult;
+                        GenTree* sentinel;
+                        GenTree* comparison;
+                        GenTree* qmark;
+                        GenTree* colon;
+
+                        // The result of the cast is currently op1, the value as op1 of that
+                        platformResult = op1;
+                        value          = op1->AsCast()->CastOp();
+
+                        // We need the input value twice, once for the platform and once for the saturating cast
+                        value =
+                            impCloneExpr(value, &valueDup, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+                                         nullptr DEBUGARG("Clone value for the saturated fp2int conversion fallback"));
+                        platformResult->AsCast()->CastOp() = value;
+
+                        // We also need the platform result twice, once for the comparison and once for the qmark colon
+                        platformResult =
+                            impCloneExpr(platformResult, &platformResultDup, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+                                         nullptr DEBUGARG("Clone result for the saturated fp2int conversion fallback"));
+
+                        // We need to check slightly different sentinels depending on if the conversion is from double
+                        // or float
+                        if (type == TYP_LONG)
+                        {
+                            sentinel = gtNewLconNode(INT64(0x8000000000000000));
+                        }
+                        else
+                        {
+                            assert(type == TYP_INT);
+                            sentinel = gtNewIconNode(INT32(0x80000000), TYP_INT);
+                        }
+
+                        comparison = gtNewOperNode(GT_EQ, TYP_INT, platformResult, sentinel);
+
+                        // Get the saturated result via a call to the appropriate helper function
+                        unsigned helper;
+
+                        switch (castToType)
+                        {
+                            case TYP_BYTE:
+                            {
+                                helper = CORINFO_HELP_DoubleToInt8;
+                                break;
+                            }
+
+                            case TYP_UBYTE:
+                            {
+                                helper = CORINFO_HELP_DoubleToUInt8;
+                                break;
+                            }
+
+                            case TYP_SHORT:
+                            {
+                                helper = CORINFO_HELP_DoubleToInt16;
+                                break;
+                            }
+
+                            case TYP_USHORT:
+                            {
+                                helper = CORINFO_HELP_DoubleToUInt16;
+                                break;
+                            }
+
+                            case TYP_INT:
+                            {
+                                helper = CORINFO_HELP_DoubleToInt32;
+                                break;
+                            }
+
+                            case TYP_UINT:
+                            {
+                                helper = CORINFO_HELP_DoubleToUInt32;
+                                break;
+                            }
+
+                            case TYP_LONG:
+                            {
+                                helper = CORINFO_HELP_DoubleToInt64;
+                                break;
+                            }
+
+                            case TYP_ULONG:
+                            {
+                                helper = CORINFO_HELP_DoubleToUInt64;
+                                break;
+                            }
+
+                            default:
+                            {
+                                unreached();
+                            }
+                        }
+
+                        if (castFromType == TYP_FLOAT)
+                        {
+                            // We only provide helpers for double to integer conversions so cast the value to double
+                            valueDup = gtNewCastNode(TYP_DOUBLE, valueDup, false, TYP_DOUBLE);
+                        }
+
+                        saturatedResult = gtNewHelperCallNode(helper, type, gtNewCallArgs(valueDup));
+
+                        // Construct the qmark given the two possible results and the relevant condition
+                        colon = new (this, GT_COLON) GenTreeColon(type, saturatedResult, platformResultDup);
+                        qmark = gtNewQmarkNode(type, comparison, colon->AsColon());
+
+                        // Ensure that the qmark is in a local to ensure it meets the "top level" requirements
+                        unsigned temp = lvaGrabTemp(true DEBUGARG("QMARK required to be top level: fp2int conversion"));
+
+                        impAssignTempGen(temp, qmark, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, nullptr,
+                                         impCurStmtDI);
+                        var_types tempType = genActualType(lvaTable[temp].TypeGet());
+
+                        op1 = gtNewLclvNode(temp, tempType);
+                    }
+#endif // TARGET_XARCH
                 }
 
                 impPushOnStack(op1, tiRetVal);
diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp
index eaac88aa6df4f..4ee5a94675090 100644
--- a/src/coreclr/jit/lower.cpp
+++ b/src/coreclr/jit/lower.cpp
@@ -251,8 +251,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
             break;
 
         case GT_CAST:
-            LowerCast(node);
-            break;
+            return LowerCast(node->AsCast());
 
 #if defined(TARGET_XARCH) || defined(TARGET_ARM64)
         case GT_BOUNDS_CHECK:
diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h
index 7f2d0ff999408..73cb9ddd71157 100644
--- a/src/coreclr/jit/lower.h
+++ b/src/coreclr/jit/lower.h
@@ -318,7 +318,7 @@ class Lowering final : public Phase
     bool TryLowerSwitchToBitTest(
         BasicBlock* jumpTable[], unsigned jumpCount, unsigned targetCount, BasicBlock* bbSwitch, GenTree* switchValue);
 
-    void LowerCast(GenTree* node);
+    GenTree* LowerCast(GenTreeCast* node);
 
 #if !CPU_LOAD_STORE_ARCH
     bool IsRMWIndirCandidate(GenTree* operand, GenTree* storeInd);
diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp
index e7d74f303da0b..acaa23b5dbbe2 100644
--- a/src/coreclr/jit/lowerarmarch.cpp
+++ b/src/coreclr/jit/lowerarmarch.cpp
@@ -552,29 +552,191 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT
 //    don't expect to see them here.
 //    i) GT_CAST(float/double, int type with overflow detection)
 //
-void Lowering::LowerCast(GenTree* tree)
+GenTree* Lowering::LowerCast(GenTreeCast* tree)
 {
-    assert(tree->OperGet() == GT_CAST);
-
     JITDUMP("LowerCast for: ");
     DISPNODE(tree);
     JITDUMP("\n");
 
-    GenTree*  op1     = tree->AsOp()->gtOp1;
-    var_types dstType = tree->CastToType();
-    var_types srcType = genActualType(op1->TypeGet());
+    GenTree*  castOp       = tree->CastOp();
+    var_types castToType   = tree->CastToType();
+    var_types castFromType = tree->CastFromType();
+    var_types tmpType      = TYP_UNDEF;
+
+    // force the castFromType to unsigned if GT_UNSIGNED flag is set
+    if (tree->IsUnsigned())
+    {
+        castFromType = varTypeToUnsigned(castFromType);
+    }
+
+    // We should never see the following casts for as they are expected to be converted into helper calls by front-end.
+    //   castFromType = float/double               castToType = overflow detecting cast
+    //       Reason: must be converted to a helper call
+    if (varTypeIsFloating(castFromType))
+    {
+        noway_assert(!tree->gtOverflow());
+    }
 
-    if (varTypeIsFloating(srcType))
+    // Case of src is a small type and dst is a floating point type.
+    if (varTypeIsSmall(castFromType) && varTypeIsFloating(castToType))
     {
+        // These conversions can never be overflow detecting ones.
         noway_assert(!tree->gtOverflow());
-        assert(!varTypeIsSmall(dstType)); // fgMorphCast creates intermediate casts when converting from float to small
-                                          // int.
+        tmpType = TYP_INT;
+    }
+#if defined(FEATURE_SIMD) && defined(FEATURE_HW_INTRINSICS)
+    // case of src is a floating point type and dst is a small type.
+    else if (varTypeIsFloating(castFromType) && varTypeIsSmall(castToType))
+    {
+        // We are casting to a small type and need to saturate the result, so generate effectively
+        //     fmov        s1, wzr
+        //     fmax        s0, s1, s0
+        //     mov         w8, #1
+        //     fmov        s1, w8
+        //     fmin        s0, s1, s0
+        //
+        // Where, s0 = castOp
+        // Where, #0 = +255.0 -or- +65535.0
+        //
+        // -or, for signed values
+        //     mov         w8, #0
+        //     fmov        s1, w8
+        //     fmax        s0, s1, s0
+        //     mov         w8, #1
+        //     fmov        s1, w8
+        //     fmin        s0, s1, s0
+        //
+        // Where, s0 = castOp
+        // Where, #0 = -128.0 -or- -32768.0
+        // Where, #1 = +127.0 -or- +32767.0
+
+        CorInfoType    simdBaseJitType;
+        NamedIntrinsic simdMaxIntrinId;
+        NamedIntrinsic simdMinIntrinId;
+
+        if (castFromType == TYP_DOUBLE)
+        {
+            simdBaseJitType = CORINFO_TYPE_DOUBLE;
+        }
+        else
+        {
+            assert(castFromType == TYP_FLOAT);
+            simdBaseJitType = CORINFO_TYPE_FLOAT;
+        }
+
+        simdMaxIntrinId = NI_AdvSimd_Arm64_MaxScalar;
+        simdMinIntrinId = NI_AdvSimd_Arm64_MinScalar;
+
+        // We max ourselves against the lower bound
+        GenTree* lowerBound;
+
+        if (varTypeIsUnsigned(castToType))
+        {
+            lowerBound = comp->gtNewSimdZeroNode(TYP_SIMD16, simdBaseJitType, 16, /* isSimdAsHWIntrinsic */ false);
+            BlockRange().InsertAfter(castOp, lowerBound);
+            LowerNode(lowerBound);
+        }
+        else
+        {
+            GenTree* lowerBoundCns;
+
+            if (castToType == TYP_BYTE)
+            {
+                lowerBoundCns = comp->gtNewDconNode(-128.0, castFromType);
+            }
+            else
+            {
+                assert(castToType == TYP_SHORT);
+                lowerBoundCns = comp->gtNewDconNode(-32768.0, castFromType);
+            }
+
+            BlockRange().InsertAfter(castOp, lowerBoundCns);
+
+            lowerBound = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, lowerBoundCns, NI_Vector128_CreateScalarUnsafe,
+                                                        simdBaseJitType, 16);
+            BlockRange().InsertAfter(lowerBoundCns, lowerBound);
+            LowerNode(lowerBound);
+        }
+
+        GenTree* simdMax =
+            comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, castOp, lowerBound, simdMaxIntrinId, simdBaseJitType, 16);
+        BlockRange().InsertAfter(lowerBound, simdMax);
+        LowerNode(simdMax);
+
+        // We min ourselves against the upper boun
+        GenTree* upperBound;
+        GenTree* upperBoundCns;
+
+        switch (castToType)
+        {
+            case TYP_BYTE:
+            {
+                upperBoundCns = comp->gtNewDconNode(+127.0, castFromType);
+                break;
+            }
+
+            case TYP_UBYTE:
+            {
+                upperBoundCns = comp->gtNewDconNode(+255.0, castFromType);
+                break;
+            }
+
+            case TYP_SHORT:
+            {
+                upperBoundCns = comp->gtNewDconNode(+32767.0, castFromType);
+                break;
+            }
+
+            case TYP_USHORT:
+            {
+                upperBoundCns = comp->gtNewDconNode(+65535.0, castFromType);
+                break;
+            }
+
+            default:
+            {
+                unreached();
+            }
+        }
+
+        BlockRange().InsertAfter(simdMax, upperBoundCns);
+
+        upperBound = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, upperBoundCns, NI_Vector128_CreateScalarUnsafe,
+                                                    simdBaseJitType, 16);
+        BlockRange().InsertAfter(upperBoundCns, upperBound);
+        LowerNode(upperBound);
+
+        GenTree* simdMin =
+            comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, simdMax, upperBound, simdMinIntrinId, simdBaseJitType, 16);
+        BlockRange().InsertAfter(upperBound, simdMin);
+        LowerNode(simdMin);
+
+        // Replace the castOp with the properly saturated TYP_INT value
+        castOp = comp->gtNewSimdHWIntrinsicNode(castFromType, simdMin, NI_Vector128_ToScalar, simdBaseJitType, 16);
+        BlockRange().InsertAfter(simdMin, castOp);
+        LowerNode(castOp);
+
+        tree->CastOp()     = castOp;
+        tree->CastToType() = TYP_INT;
     }
+#else // !FEATURE_SIMD || !FEATURE_HW_INTRINSICS
+    assert(!varTypeIsFloating(castFromType) || !varTypeIsSmall(castToType));
+#endif // FEATURE_SIMD && FEATURE_HW_INTRINSICS
 
-    assert(!varTypeIsSmall(srcType));
+    if (tmpType != TYP_UNDEF)
+    {
+        GenTree* tmp = comp->gtNewCastNode(tmpType, castOp, tree->IsUnsigned(), tmpType);
+        tmp->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
+        tree->gtFlags &= ~GTF_UNSIGNED;
+        tree->AsOp()->gtOp1 = tmp;
+        BlockRange().InsertAfter(castOp, tmp);
+        ContainCheckCast(tmp->AsCast());
+    }
 
     // Now determine if we have operands that should be contained.
-    ContainCheckCast(tree->AsCast());
+    ContainCheckCast(tree);
+
+    return tree->gtNext;
 }
 
 //------------------------------------------------------------------------
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index 2bf009026bd89..d82a2a1c765fd 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -656,11 +656,10 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk)
  * GT_CAST(uint16, float/double)   =   GT_CAST(GT_CAST(uint16, int32), float/double)
  *
  * SSE2 conversion instructions operate on signed integers. casts from Uint32/Uint64
- * are morphed as follows by front-end and hence should not be seen here.
+ * are morphed as follows by front-end and hence should not be seen here for 32-bit.
  * GT_CAST(uint32, float/double)   =   GT_CAST(GT_CAST(uint32, long), float/double)
  * GT_CAST(uint64, float)          =   GT_CAST(GT_CAST(uint64, double), float)
  *
- *
  * Similarly casts from float/double to a smaller int type are transformed as follows:
  * GT_CAST(float/double, byte)     =   GT_CAST(GT_CAST(float/double, int32), byte)
  * GT_CAST(float/double, sbyte)    =   GT_CAST(GT_CAST(float/double, int32), sbyte)
@@ -671,74 +670,475 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk)
  * integer.  The above transformations help us to leverage those instructions.
  *
  * Note that for the following conversions we still depend on helper calls and
- * don't expect to see them here.
+ * don't expect to see them here for 32-bit.
  *  i) GT_CAST(float/double, uint64)
  * ii) GT_CAST(float/double, int type with overflow detection)
  *
- * TODO-XArch-CQ: (Low-pri): Jit64 generates in-line code of 8 instructions for (i) above.
- * There are hardly any occurrences of this conversion operation in platform
- * assemblies or in CQ perf benchmarks (1 occurrence in corelib, microsoft.jscript,
- * 1 occurrence in Roslyn and no occurrences in system, system.core, system.numerics
- * system.windows.forms, scimark, fractals, bio mums). If we ever find evidence that
- * doing this optimization is a win, should consider generating in-lined code.
  */
-void Lowering::LowerCast(GenTree* tree)
+GenTree* Lowering::LowerCast(GenTreeCast* tree)
 {
-    assert(tree->OperGet() == GT_CAST);
-
-    GenTree*  castOp     = tree->AsCast()->CastOp();
-    var_types castToType = tree->CastToType();
-    var_types srcType    = castOp->TypeGet();
-    var_types tmpType    = TYP_UNDEF;
+    GenTree*  castOp       = tree->CastOp();
+    var_types castToType   = tree->CastToType();
+    var_types castFromType = tree->CastFromType();
+    var_types tmpType      = TYP_UNDEF;
 
-    // force the srcType to unsigned if GT_UNSIGNED flag is set
-    if (tree->gtFlags & GTF_UNSIGNED)
+    // force the castFromType to unsigned if GT_UNSIGNED flag is set
+    if (tree->IsUnsigned())
     {
-        srcType = varTypeToUnsigned(srcType);
+        castFromType = varTypeToUnsigned(castFromType);
     }
 
-    // We should never see the following casts as they are expected to be lowered
-    // apropriately or converted into helper calls by front-end.
-    //   srcType = float/double                    castToType = * and overflow detecting cast
+    // We should never see the following casts for as they are expected to be converted into helper calls by front-end.
+    //   castFromType = float/double               castToType = overflow detecting cast
+    //       Reason: must be converted to a helper call
+    //
+    // The same goes for these on 32-bit specifically.
+    //   castFromType = float/double,              castToType = int64/uint32/uint64
     //       Reason: must be converted to a helper call
-    //   srcType = float/double,                   castToType = ulong
+    //   castFromType = int64/uint32/uint64        castToType = float/double
     //       Reason: must be converted to a helper call
-    //   srcType = uint                            castToType = float/double
-    //       Reason: uint -> float/double = uint -> long -> float/double
-    //   srcType = ulong                           castToType = float
-    //       Reason: ulong -> float = ulong -> double -> float
-    if (varTypeIsFloating(srcType))
+    if (varTypeIsFloating(castFromType))
     {
         noway_assert(!tree->gtOverflow());
-        noway_assert(castToType != TYP_ULONG);
+#if defined(TARGET_X86)
+        noway_assert(!varTypeIsLong(castToType));
+        noway_assert(castToType != TYP_UINT);
+#endif // TARGET_X86
     }
-    else if (srcType == TYP_UINT)
+#if defined(TARGET_X86)
+    else if (tree->IsUnsigned() || (castFromType == TYP_LONG))
     {
         noway_assert(!varTypeIsFloating(castToType));
     }
-    else if (srcType == TYP_ULONG)
-    {
-        noway_assert(castToType != TYP_FLOAT);
-    }
+#endif // TARGET_X86
 
     // Case of src is a small type and dst is a floating point type.
-    if (varTypeIsSmall(srcType) && varTypeIsFloating(castToType))
+    if (varTypeIsSmall(castFromType) && varTypeIsFloating(castToType))
     {
         // These conversions can never be overflow detecting ones.
         noway_assert(!tree->gtOverflow());
         tmpType = TYP_INT;
     }
-    // case of src is a floating point type and dst is a small type.
-    else if (varTypeIsFloating(srcType) && varTypeIsSmall(castToType))
+#if defined(FEATURE_SIMD) && defined(FEATURE_HW_INTRINSICS)
+    // case of src is a floating point type and dst is a small type or unsigned.
+    else if (varTypeIsFloating(castFromType) && (varTypeIsSmall(castToType) || varTypeIsUnsigned(castToType)))
     {
-        tmpType = TYP_INT;
+        // We are casting to a type where we need to saturate the result, so generate effectively
+        //     vmovsd      xmm0, qword ptr [rdx]
+        //     xorps       xmm1, xmm1, xmm1
+        //     vmaxss      xmm0, xmm1, xmm0
+        //     vmovss      xmm1, dword ptr [reloc @RWD00]
+        //     vminss      xmm0, xmm1, xmm0
+        //
+        // Where, rdx = castOp
+        // Where, reloc @RWD00 = +255.0 -or- +65535.0
+        //
+        // -or, for signed values
+        //     vmovsd      xmm0, qword ptr [rdx]
+        //     vmovss      xmm1, dword ptr [reloc @RWD00]
+        //     vmaxss      xmm0, xmm1, xmm0
+        //     vmovss      xmm1, dword ptr [reloc @RWD04]
+        //     vminss      xmm0, xmm1, xmm0
+        //
+        // Where, rdx = castOp
+        // Where, reloc @RWD00 = -128.0 -or- -32768.0
+        // Where, reloc @RWD04 = +127.0 -or- +32767.0
+
+        // ** NOTE **
+        // vmaxsd, vmaxss, vminsd, and vminss all have special behavior for NaN
+        // In particular, if either input is NaN, the second parameter is returned
+        //
+        // This means we require the second parameter to be the user input, rather
+        // than it being the lower or upper bounds to ensure NaN still becomes 0
+        // ** NOTE **
+
+        CorInfoType    simdBaseJitType;
+        NamedIntrinsic simdMaxIntrinId;
+        NamedIntrinsic simdMinIntrinId;
+
+        if (castFromType == TYP_DOUBLE)
+        {
+            simdBaseJitType = CORINFO_TYPE_DOUBLE;
+
+            simdMaxIntrinId = NI_SSE2_MaxScalar;
+            simdMinIntrinId = NI_SSE2_MinScalar;
+        }
+        else
+        {
+            assert(castFromType == TYP_FLOAT);
+            simdBaseJitType = CORINFO_TYPE_FLOAT;
+
+            simdMaxIntrinId = NI_SSE_MaxScalar;
+            simdMinIntrinId = NI_SSE_MinScalar;
+        }
+
+        // We max ourselves against the lower bound
+        GenTree* lowerBound;
+
+        if (varTypeIsUnsigned(castToType))
+        {
+            lowerBound = comp->gtNewSimdZeroNode(TYP_SIMD16, simdBaseJitType, 16, /* isSimdAsHWIntrinsic */ false);
+            BlockRange().InsertAfter(castOp, lowerBound);
+            LowerNode(lowerBound);
+        }
+        else
+        {
+            GenTree* lowerBoundCns;
+
+            if (castToType == TYP_BYTE)
+            {
+                lowerBoundCns = comp->gtNewDconNode(-128.0, castFromType);
+            }
+            else
+            {
+                assert(castToType == TYP_SHORT);
+                lowerBoundCns = comp->gtNewDconNode(-32768.0, castFromType);
+            }
+
+            BlockRange().InsertAfter(castOp, lowerBoundCns);
+
+            lowerBound = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, lowerBoundCns, NI_Vector128_CreateScalarUnsafe,
+                                                        simdBaseJitType, 16);
+            BlockRange().InsertAfter(lowerBoundCns, lowerBound);
+            LowerNode(lowerBound);
+        }
+
+        GenTree* simdMax =
+            comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, lowerBound, castOp, simdMaxIntrinId, simdBaseJitType, 16);
+        BlockRange().InsertAfter(lowerBound, simdMax);
+        LowerNode(simdMax);
+
+        // We min ourselves against the upper boun
+        GenTree* upperBound;
+        GenTree* upperBoundCns;
+
+        switch (castToType)
+        {
+            case TYP_BYTE:
+            {
+                upperBoundCns = comp->gtNewDconNode(+127.0, castFromType);
+                break;
+            }
+
+            case TYP_UBYTE:
+            {
+                upperBoundCns = comp->gtNewDconNode(+255.0, castFromType);
+                break;
+            }
+
+            case TYP_SHORT:
+            {
+                upperBoundCns = comp->gtNewDconNode(+32767.0, castFromType);
+                break;
+            }
+
+            case TYP_USHORT:
+            {
+                upperBoundCns = comp->gtNewDconNode(+65535.0, castFromType);
+                break;
+            }
+
+            case TYP_UINT:
+            {
+                upperBoundCns = comp->gtNewDconNode(+4294967295.0, castFromType);
+                break;
+            }
+
+            case TYP_ULONG:
+            {
+                upperBoundCns = comp->gtNewDconNode(+18446744073709551615.0, castFromType);
+                break;
+            }
+
+            default:
+            {
+                unreached();
+            }
+        }
+
+        BlockRange().InsertAfter(simdMax, upperBoundCns);
+
+        upperBound = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, upperBoundCns, NI_Vector128_CreateScalarUnsafe,
+                                                    simdBaseJitType, 16);
+        BlockRange().InsertAfter(upperBoundCns, upperBound);
+        LowerNode(upperBound);
+
+        GenTree* simdMin =
+            comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, upperBound, simdMax, simdMinIntrinId, simdBaseJitType, 16);
+        BlockRange().InsertAfter(upperBound, simdMin);
+        LowerNode(simdMin);
+
+        // Replace the castOp with the properly saturated TYP_INT value
+        castOp = comp->gtNewSimdHWIntrinsicNode(castFromType, simdMin, NI_Vector128_ToScalar, simdBaseJitType, 16);
+        BlockRange().InsertAfter(simdMin, castOp);
+        LowerNode(castOp);
+
+        tree->CastOp() = castOp;
+
+        if (varTypeIsSmall(castToType))
+        {
+            tree->CastToType() = TYP_INT;
+        }
     }
+#else // !FEATURE_SIMD || !FEATURE_HW_INTRINSICS
+    assert(!varTypeIsFloating(castFromType) || !varTypeIsSmall(castToType));
+#endif // FEATURE_SIMD && FEATURE_HW_INTRINSICS
+
+#if defined(TARGET_AMD64)
+    if ((varTypeIsFloating(castFromType) && (castToType == TYP_ULONG)) ||
+        ((castFromType == TYP_FLOAT) && (castToType == TYP_UINT)))
+    {
+        // This is based on the codegen by Clang/LLVM, Apache-2.0 WITH LLVM-exception
+
+        // We are generating a tree that will effectively be the following assembly
+        //     vmovss      xmm0, qword ptr [rdx]
+        //     vcvttss2si  eax,  xmm0
+        //     mov         rd8,  eax
+        //     sar         rd8,  31
+        //     vsubss      xmm0, xmm0, qword ptr [reloc @RWD00]
+        //     vcvttss2si  rd9,  xmm0
+        //     and         rd9,  r8
+        //     or          rd9,  rax
+        //
+        // Where, rdx = castOp
+        // Where, reloc @RWD00 = 2147483648.0
+        //
+        // -or-
+        //
+        //     vmovsd      xmm0, qword ptr [rdx]
+        //     vcvttsd2si  rax,  xmm0
+        //     mov         r8,   rax
+        //     sar         r8,   63
+        //     vsubsd      xmm0, xmm0, qword ptr [reloc @RWD00]
+        //     vcvttsd2si  r9,   xmm0
+        //     and         r9,   r8
+        //     or          r9,   rax
+        //
+        // Where, rdx = castOp
+        // Where, reloc @RWD00 = 9223372036854775808.0
+
+        var_types signedType;
+        int       shiftCnsVal;
+        double    magicCnsVal;
+
+        if (castToType == TYP_UINT)
+        {
+            signedType  = TYP_INT;
+            shiftCnsVal = 31;
+            magicCnsVal = 2147483648.0;
+        }
+        else
+        {
+            assert(castToType == TYP_ULONG);
+
+            signedType  = TYP_LONG;
+            shiftCnsVal = 63;
+            magicCnsVal = 9223372036854775808.0;
+        }
+
+        // We clone the input as we'll need it twice
+
+        LIR::Use castOpUse(BlockRange(), &tree->gtOp1, tree);
+        ReplaceWithLclVar(castOpUse);
+        castOp = tree->gtOp1;
+
+        GenTree* castOpDup = comp->gtClone(castOp);
+        BlockRange().InsertAfter(castOp, castOpDup);
+
+        // We first cast to the signed type, TYP_INT or TYP_LONG
+
+        GenTree* firstCast = comp->gtNewCastNode(signedType, castOp, /* unsigned */ false, signedType);
+        BlockRange().InsertAfter(castOpDup, firstCast);
+        LowerNode(firstCast);
+
+        // We insert an arithmetic right shift by 31 or 63 to propagate the sign
+
+        GenTree* shiftCns = comp->gtNewIconNode(shiftCnsVal, TYP_INT);
+        BlockRange().InsertAfter(firstCast, shiftCns);
+
+        GenTree* propagateSign = comp->gtNewOperNode(GT_RSH, signedType, firstCast, shiftCns);
+        BlockRange().InsertAfter(shiftCns, propagateSign);
+        LowerNode(propagateSign);
+
+        // We clone the firstCast result and replace it with a local, doing it here so we have a valid use
+
+        LIR::Use firstCastUse(BlockRange(), &propagateSign->AsOp()->gtOp1, propagateSign);
+        ReplaceWithLclVar(firstCastUse);
+        firstCast = propagateSign->AsOp()->gtOp1;
+
+        GenTree* firstCastDup = comp->gtClone(firstCast);
+        BlockRange().InsertAfter(firstCast, firstCastDup);
+
+        // We subtract a "magic number" from the original input
+
+        GenTree* magicCns = comp->gtNewDconNode(magicCnsVal, castFromType);
+        BlockRange().InsertAfter(propagateSign, magicCns);
+        LowerNode(magicCns);
+
+        GenTree* subtract = comp->gtNewOperNode(GT_SUB, castFromType, castOpDup, magicCns);
+        BlockRange().InsertAfter(magicCns, subtract);
+        LowerNode(subtract);
+
+        // We cast the result of that to the signed type, TYP_INT or TYP_LONG
+
+        GenTree* secondCast = comp->gtNewCastNode(signedType, subtract, /* unsigned */ false, signedType);
+        BlockRange().InsertAfter(subtract, secondCast);
+        LowerNode(secondCast);
+
+        // We AND the result with the propagatedSign
+
+        GenTree* masked = comp->gtNewOperNode(GT_AND, signedType, secondCast, propagateSign);
+        BlockRange().InsertAfter(secondCast, masked);
+        LowerNode(masked);
+
+        // We or that with the firstCast result for the final result
+
+        GenTree* result = comp->gtNewOperNode(GT_OR, signedType, firstCastDup, masked);
+        BlockRange().InsertAfter(masked, result);
+        LowerNode(result);
+
+        // Finally we replace the original tree's use and remove it
+
+        LIR::Use use;
+        if (BlockRange().TryGetUse(tree, &use))
+        {
+            use.ReplaceWith(result);
+        }
+
+        BlockRange().Remove(tree);
+        return result->gtNext;
+    }
+#if defined(FEATURE_SIMD) && defined(FEATURE_HW_INTRINSICS)
+    else if ((castFromType == TYP_ULONG) && varTypeIsFloating(castToType))
+    {
+        // This is based on the codegen by Clang/LLVM, Apache-2.0 WITH LLVM-exception
+
+        // We are generating a tree that will effectively be the following assembly
+        //     vmovd      xmm0, qword ptr [rdx]
+        //     vpunpckldq xmm0, xmm0, xmmword ptr [reloc @RWD00]
+        //     vsubpd     xmm0, xmm0, xmmword ptr [reloc @RWD16]
+        //     vpermilpd  xmm1, xmm0, 1
+        //     vaddsd     xmm0, xmm0, xmm1
+        //
+        // Where, rdx = castOp
+        // Where, reloc @RWD00 = { 0x4330000, 0x4530000, 0x00000000, 0x00000000 }
+        // Where, reloc @RWD16 = { 0x4330000000000000, 0x4530000000000000 }
+        //
+        // Where, vpermilpd = "vpshufd xmm1, xmm0, 78" on downlevel hardware
+
+        assert(castToType == TYP_DOUBLE);
+
+        // We move the value into a SIMD register
+
+        GenTree* simd =
+            comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, castOp, NI_Vector128_CreateScalarUnsafe, CORINFO_TYPE_ULONG, 16);
+        BlockRange().InsertAfter(castOp, simd);
+        LowerNode(simd);
+
+        // We generate a magic constant that represents the upper 32-bits of 2^52 and 2^84 for each half
+
+        VectorConstant vecCns1 = {};
+
+        vecCns1.i32[0] = 0x43300000;
+        vecCns1.i32[1] = 0x45300000;
+
+        UNATIVE_OFFSET       cnum1 = comp->GetEmitter()->emitDataConst(&vecCns1, 16, 16, TYP_SIMD16);
+        CORINFO_FIELD_HANDLE hnd1  = comp->eeFindJitDataOffs(cnum1);
+        GenTree* clsVarAddr1 = new (comp, GT_CLS_VAR_ADDR) GenTreeClsVar(GT_CLS_VAR_ADDR, TYP_I_IMPL, hnd1, nullptr);
+        BlockRange().InsertAfter(simd, clsVarAddr1);
+
+        GenTree* vecCnsInd1 = comp->gtNewIndir(TYP_SIMD16, clsVarAddr1);
+        BlockRange().InsertAfter(clsVarAddr1, vecCnsInd1);
+        LowerNode(vecCnsInd1);
+
+        // We then unpack the original input with the magic constant to create two doubles, each with a 32-bit mantissa
+
+        GenTree* unpack =
+            comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, simd, vecCnsInd1, NI_SSE2_UnpackLow, CORINFO_TYPE_INT, 16);
+        BlockRange().InsertAfter(vecCnsInd1, unpack);
+        LowerNode(unpack);
+
+        // We generate a magic constant that represents 2^52 and 2^84, respectively
+
+        VectorConstant vecCns2 = {};
+
+        vecCns2.i64[0] = 0x4330000000000000;
+        vecCns2.i64[1] = 0x4530000000000000;
+
+        UNATIVE_OFFSET       cnum2 = comp->GetEmitter()->emitDataConst(&vecCns2, 16, 16, TYP_SIMD16);
+        CORINFO_FIELD_HANDLE hnd2  = comp->eeFindJitDataOffs(cnum2);
+        GenTree* clsVarAddr2 = new (comp, GT_CLS_VAR_ADDR) GenTreeClsVar(GT_CLS_VAR_ADDR, TYP_I_IMPL, hnd2, nullptr);
+        BlockRange().InsertAfter(unpack, clsVarAddr2);
+
+        GenTree* vecCnsInd2 = comp->gtNewIndir(TYP_SIMD16, clsVarAddr2);
+        BlockRange().InsertAfter(clsVarAddr2, vecCnsInd2);
+        LowerNode(vecCnsInd2);
+
+        // We subtract 2^52 and 2^84, respectively; from each half of the result
+
+        GenTree* subtract =
+            comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, unpack, vecCnsInd2, NI_SSE2_Subtract, CORINFO_TYPE_DOUBLE, 16);
+        BlockRange().InsertAfter(vecCnsInd2, subtract);
+        LowerNode(subtract);
+
+        // We swap the upper and lower doubles
+
+        int            shufCnsVal       = 0b01001110;
+        NamedIntrinsic shuffleIntrinsic = NI_SSE2_Shuffle;
+        CorInfoType    shuffleType      = CORINFO_TYPE_INT;
+
+        if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX))
+        {
+            shufCnsVal       = 1;
+            shuffleIntrinsic = NI_AVX_Permute;
+            shuffleType      = CORINFO_TYPE_DOUBLE;
+        }
+
+        GenTree* shufCns = comp->gtNewIconNode(shufCnsVal, TYP_INT);
+        BlockRange().InsertAfter(subtract, shufCns);
+
+        GenTreeHWIntrinsic* shuffle =
+            comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, subtract, shufCns, shuffleIntrinsic, shuffleType, 16);
+        BlockRange().InsertAfter(shufCns, shuffle);
+        LowerNode(shuffle);
+
+        // We clone the subtract result and replace it with a local, doing it here so we have a valid use
+
+        LIR::Use subtractUse(BlockRange(), &shuffle->Op(1), shuffle);
+        ReplaceWithLclVar(subtractUse);
+        subtract = shuffle->Op(1);
+
+        GenTree* subtractDup = comp->gtClone(subtract);
+        BlockRange().InsertAfter(subtract, subtractDup);
+
+        // We add the shuffle and the subtract result together, giving the result
+
+        GenTree* result = comp->gtNewOperNode(GT_ADD, TYP_DOUBLE, subtractDup, shuffle);
+        BlockRange().InsertAfter(shuffle, result);
+        LowerNode(result);
+
+        // Finally we replace the original tree's use and remove it
+
+        LIR::Use use;
+        if (BlockRange().TryGetUse(tree, &use))
+        {
+            use.ReplaceWith(result);
+        }
+
+        BlockRange().Remove(tree);
+        return result->gtNext;
+    }
+#else  // !FEATURE_SIMD || !FEATURE_HW_INTRINSICS
+    // We don't expect to see uint64 -> float/double as it should have been converted
+    // to a helper call by the front-end
+    noway_assert((castFromType != TYP_ULONG) || !varTypeIsFloating(castToType));
+#endif // FEATURE_SIMD && FEATURE_HW_INTRINSICS
+#endif // TARGET_AMD64
 
     if (tmpType != TYP_UNDEF)
     {
         GenTree* tmp = comp->gtNewCastNode(tmpType, castOp, tree->IsUnsigned(), tmpType);
         tmp->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
-
         tree->gtFlags &= ~GTF_UNSIGNED;
         tree->AsOp()->gtOp1 = tmp;
         BlockRange().InsertAfter(castOp, tmp);
@@ -746,7 +1146,9 @@ void Lowering::LowerCast(GenTree* tree)
     }
 
     // Now determine if we have operands that should be contained.
-    ContainCheckCast(tree->AsCast());
+    ContainCheckCast(tree);
+
+    return tree->gtNext;
 }
 
 #ifdef FEATURE_SIMD
@@ -4860,8 +5262,21 @@ void Lowering::ContainCheckCast(GenTreeCast* node)
         }
 #endif // DEBUG
 
-        // U8 -> R8 conversion requires that the operand be in a register.
-        if (srcType != TYP_ULONG)
+#if defined(TARGET_X86)
+        // x86 doesn't expect to see int64/uint32/uint64 -> float/double here since they should have been
+        // replaced with helper calls by the front end.
+        noway_assert(!varTypeIsLong(srcType));
+        noway_assert(srcType != TYP_UINT);
+#endif // TARGET_X86
+
+#if defined(TARGET_AMD64)
+        // x64 shouldn't see a uint64 -> float/double as it should have been lowered to an alternative
+        // sequence -or- converted to a helper call by the front end.
+        noway_assert(srcType != TYP_ULONG);
+
+        // U4 -> R4/R8 conversion requires that the operand be in a register as this forces
+        // a zero extension and ensures the upper bits are zero so we can emit: cvtsi2sd xmm0, rax
+        if (srcType != TYP_UINT)
         {
             if ((IsContainableMemoryOp(castOp) && IsSafeToContainMem(node, castOp)) || castOp->IsCnsNonZeroFltOrDbl())
             {
@@ -4874,6 +5289,7 @@ void Lowering::ContainCheckCast(GenTreeCast* node)
                 castOp->SetRegOptional();
             }
         }
+#endif // TARGET_AMD64
     }
 #if !defined(TARGET_64BIT)
     if (varTypeIsLong(srcType))
diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp
index f9e69773d70cb..947c5f1285040 100644
--- a/src/coreclr/jit/lsraxarch.cpp
+++ b/src/coreclr/jit/lsraxarch.cpp
@@ -2504,7 +2504,14 @@ int LinearScan::BuildCast(GenTreeCast* cast)
         // rather require it to be different from operand's reg.
         buildInternalIntRegisterDefForNode(cast);
     }
-#endif
+#if !defined(FEATURE_SIMD) || !defined(FEATURE_HW_INTRINSICS)
+    else if ((srcType == TYP_ULONG) && varTypeIsFloating(castType))
+    {
+        // We need a temporary register that's different from the operand
+        buildInternalIntRegisterDefForNode(cast);
+    }
+#endif // !FEATURE_SIMD || !FEATURE_HW_INTRINSICS
+#endif // TARGET_X86
 
     int srcCount = BuildOperandUses(src, candidates);
     buildInternalRegisterUses();
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index cc532365a6e8f..2aa88bd33a039 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -184,12 +184,12 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
             // This goes through helper and hence src needs to be converted to double.
             && tree->gtOverflow()
 #elif defined(TARGET_AMD64)
-            // Amd64: src = float, dst = uint64 or overflow conversion.
+            // Amd64: src = float, dst = overflow conversion.
             // This goes through helper and hence src needs to be converted to double.
-            && (tree->gtOverflow() || (dstType == TYP_ULONG))
+            && tree->gtOverflow()
 #elif defined(TARGET_ARM)
-            // Arm: src = float, dst = int64/uint64 or overflow conversion.
-            && (tree->gtOverflow() || varTypeIsLong(dstType))
+            // Arm: src = float, dst = int8/int16/int64/uint8/uint16/uint64 or overflow conversion.
+            && (tree->gtOverflow() || varTypeIsLong(dstType) || varTypeIsSmall(dstType))
 #else
             // x86: src = float, dst = uint32/int64/uint64 or overflow conversion.
             && (tree->gtOverflow() || varTypeIsLong(dstType) || (dstType == TYP_UINT))
@@ -199,66 +199,86 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
             oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE);
         }
 
-        // Do we need to do it in two steps R -> I -> smallType?
-        if (dstSize < genTypeSize(TYP_INT))
-        {
-            oper = gtNewCastNodeL(TYP_INT, oper, /* fromUnsigned */ false, TYP_INT);
-            oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
-            tree->AsCast()->CastOp() = oper;
-            // We must not mistreat the original cast, which was from a floating point type,
-            // as from an unsigned type, since we now have a TYP_INT node for the source and
-            // CAST_OVF(BYTE <- INT) != CAST_OVF(BYTE <- UINT).
-            assert(!tree->IsUnsigned());
-        }
-        else
+        if (!tree->gtOverflow())
         {
-            if (!tree->gtOverflow())
+            switch (dstType)
             {
-#ifdef TARGET_ARM64 // ARM64 supports all non-overflow checking conversions directly.
-                return nullptr;
+#if defined(FEATURE_SIMD) && defined(FEATURE_HW_INTRINSICS)
+                case TYP_BYTE:
+                case TYP_UBYTE:
+                case TYP_SHORT:
+                case TYP_USHORT:
+                    // When SIMD && HW_INTRINSICS are avaialble we support int8, int16, uint8, and uint16 conversions
+                    // via lowering
+                    return nullptr;
+#else  // !FEATURE_SIMD || !FEATURE_HW_INTRINSICS
+                case TYP_BYTE:
+                    return fgMorphCastIntoHelper(tree, CORINFO_HELP_DoubleToInt8, oper);
+
+                case TYP_UBYTE:
+                    return fgMorphCastIntoHelper(tree, CORINFO_HELP_DoubleToUInt8, oper);
+
+                case TYP_SHORT:
+                    return fgMorphCastIntoHelper(tree, CORINFO_HELP_DoubleToInt16, oper);
+
+                case TYP_USHORT:
+                    return fgMorphCastIntoHelper(tree, CORINFO_HELP_DoubleToUInt16, oper);
+#endif // FEATURE_SIMD && FEATURE_HW_INTRINSICS
+
+                case TYP_INT:
+                    // AMD64, ARM, ARM64, and x86 support int32 conversions directly
+                    return nullptr;
+
+#if defined(TARGET_X86)
+                case TYP_UINT:
+                    return fgMorphCastIntoHelper(tree, CORINFO_HELP_DoubleToUInt32, oper);
+#else  // TARGET_ARM || TARGET_ARM64 || TARGET_AMD64
+                case TYP_UINT:
+                    // ARM and ARM64 support uint32 conversions directly
+                    // AMD64 supports uint32 conversions via lowering
+                    return nullptr;
+#endif // TARGET_ARM || TARGET_AMD64
+
+#if defined(TARGET_64BIT)
+                case TYP_LONG:
+                case TYP_ULONG:
+                    // ARM64 supports int64 and uint64 conversions directly
+                    // AMD64 supports int64 and uint64 conversions via lowering
+                    return nullptr;
 #else
-                switch (dstType)
-                {
-                    case TYP_INT:
-                        return nullptr;
-
-                    case TYP_UINT:
-#if defined(TARGET_ARM) || defined(TARGET_AMD64)
-                        return nullptr;
-#else  // TARGET_X86
-                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
-#endif // TARGET_X86
+                case TYP_LONG:
+                    return fgMorphCastIntoHelper(tree, CORINFO_HELP_DoubleToInt64, oper);
 
-                    case TYP_LONG:
-#ifdef TARGET_AMD64
-                        // SSE2 has instructions to convert a float/double directly to a long
-                        return nullptr;
-#else  // !TARGET_AMD64
-                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
-#endif // !TARGET_AMD64
-
-                    case TYP_ULONG:
-                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
-                    default:
-                        unreached();
-                }
-#endif // TARGET_ARM64
+                case TYP_ULONG:
+                    return fgMorphCastIntoHelper(tree, CORINFO_HELP_DoubleToUInt64, oper);
+#endif // !TARGET_64BIT
+
+                default:
+                    unreached();
             }
-            else
-            {
-                switch (dstType)
-                {
-                    case TYP_INT:
-                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
-                    case TYP_UINT:
-                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
-                    case TYP_LONG:
-                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
-                    case TYP_ULONG:
-                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
-                    default:
-                        unreached();
-                }
+        }
+        else
+        {
+            switch (dstType)
+            {
+                case TYP_BYTE:
+                    return fgMorphCastIntoHelper(tree, CORINFO_HELP_DoubleToInt8_OVF, oper);
+                case TYP_UBYTE:
+                    return fgMorphCastIntoHelper(tree, CORINFO_HELP_DoubleToUInt8_OVF, oper);
+                case TYP_SHORT:
+                    return fgMorphCastIntoHelper(tree, CORINFO_HELP_DoubleToInt16_OVF, oper);
+                case TYP_USHORT:
+                    return fgMorphCastIntoHelper(tree, CORINFO_HELP_DoubleToUInt16_OVF, oper);
+                case TYP_INT:
+                    return fgMorphCastIntoHelper(tree, CORINFO_HELP_DoubleToInt32_OVF, oper);
+                case TYP_UINT:
+                    return fgMorphCastIntoHelper(tree, CORINFO_HELP_DoubleToUInt32_OVF, oper);
+                case TYP_LONG:
+                    return fgMorphCastIntoHelper(tree, CORINFO_HELP_DoubleToInt64_OVF, oper);
+                case TYP_ULONG:
+                    return fgMorphCastIntoHelper(tree, CORINFO_HELP_DoubleToUInt64_OVF, oper);
+                default:
+                    unreached();
             }
         }
     }
@@ -275,15 +295,22 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
     }
 #endif //! TARGET_64BIT
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_AMD64)
     // AArch, unlike x86/amd64, has instructions that can cast directly from
     // all integers (except for longs on AArch32 of course) to floats.
+    //
+    // AMD64, however, has efficient alternatives that can directly handle
+    // the cases that it doesn't support (uint and ulong). For u64->f32
+    // we want to keep the behavior as `u64->f64->f32` however, as its simpler
+    //
     // Because there is no IL instruction conv.r4.un, uint/ulong -> float
     // casts are always imported as CAST(float <- CAST(double <- uint/ulong)).
     // We can eliminate the redundant intermediate cast as an optimization.
     else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && oper->OperIs(GT_CAST)
-#ifdef TARGET_ARM
-             && !varTypeIsLong(oper->AsCast()->CastOp())
+#if defined(TARGET_ARM)
+             && !varTypeIsLong(oper->AsCast()->CastFromType())
+#elif defined(TARGET_AMD64)
+             && (!varTypeIsLong(oper->AsCast()->CastFromType()) || !oper->AsCast()->IsUnsigned())
 #endif
                  )
     {
@@ -292,7 +319,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
 
         return fgMorphTree(oper);
     }
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_AMD64
 
 #ifdef TARGET_ARM
     // converts long/ulong --> float/double casts into helper calls.
@@ -312,50 +339,11 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
             return fgMorphTree(tree);
         }
         if (tree->gtFlags & GTF_UNSIGNED)
-            return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
-        return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
+            return fgMorphCastIntoHelper(tree, CORINFO_HELP_UInt64ToDouble, oper);
+        return fgMorphCastIntoHelper(tree, CORINFO_HELP_Int64ToDouble, oper);
     }
 #endif // TARGET_ARM
 
-#ifdef TARGET_AMD64
-    // Do we have to do two step U4/8 -> R4/8 ?
-    // Codegen supports the following conversion as one-step operation
-    // a) Long -> R4/R8
-    // b) U8 -> R8
-    //
-    // The following conversions are performed as two-step operations using above.
-    // U4 -> R4/8 = U4-> Long -> R4/8
-    // U8 -> R4   = U8 -> R8 -> R4
-    else if (tree->IsUnsigned() && varTypeIsFloating(dstType))
-    {
-        srcType = varTypeToUnsigned(srcType);
-
-        if (srcType == TYP_ULONG)
-        {
-            if (dstType == TYP_FLOAT)
-            {
-                // Codegen can handle U8 -> R8 conversion.
-                // U8 -> R4 =  U8 -> R8 -> R4
-                // - change the dsttype to double
-                // - insert a cast from double to float
-                // - recurse into the resulting tree
-                tree->CastToType() = TYP_DOUBLE;
-                tree->gtType       = TYP_DOUBLE;
-                tree               = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT);
-
-                return fgMorphTree(tree);
-            }
-        }
-        else if (srcType == TYP_UINT)
-        {
-            oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG);
-            oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
-            tree->ClearUnsigned();
-            tree->CastOp() = oper;
-        }
-    }
-#endif // TARGET_AMD64
-
 #ifdef TARGET_X86
     // Do we have to do two step U4/8 -> R4/8 ?
     else if (tree->IsUnsigned() && varTypeIsFloating(dstType))
@@ -364,19 +352,19 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
 
         if (srcType == TYP_ULONG)
         {
-            return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
+            return fgMorphCastIntoHelper(tree, CORINFO_HELP_UInt64ToDouble, oper);
         }
         else if (srcType == TYP_UINT)
         {
             oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG);
             oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
             tree->gtFlags &= ~GTF_UNSIGNED;
-            return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
+            return fgMorphCastIntoHelper(tree, CORINFO_HELP_Int64ToDouble, oper);
         }
     }
-    else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
+    else if (!tree->IsUnsigned() && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
     {
-        oper = fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
+        oper = fgMorphCastIntoHelper(tree, CORINFO_HELP_Int64ToDouble, oper);
 
         // Since we don't have a Jit Helper that converts to a TYP_FLOAT
         // we just use the one that converts to a TYP_DOUBLE
@@ -400,7 +388,14 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
             return oper;
         }
     }
-#endif // TARGET_X86
+#elif defined(TARGET_AMD64) && (!defined(FEATURE_SIMD) || !defined(FEATURE_HW_INTRINSICS))
+    else if (tree->IsUnsigned() && (varTypeToUnsigned(srcType) == TYP_ULONG) && varTypeIsFloating(dstType))
+    {
+        // For x64, if SIMD or HW_INTRINSICS are disabled, we just want to have uint64 -> float/double
+        // to fallback to the helper call.
+        return fgMorphCastIntoHelper(tree, CORINFO_HELP_UInt64ToDouble, oper);
+    }
+#endif
     else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
     {
         // We are casting away GC information.  we would like to just
diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp
index 32a1ce5b66de1..aa4ce542e5776 100644
--- a/src/coreclr/jit/simdashwintrinsic.cpp
+++ b/src/coreclr/jit/simdashwintrinsic.cpp
@@ -702,20 +702,14 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                 case NI_VectorT128_ConvertToInt32:
                 case NI_VectorT256_ConvertToInt32:
                 {
-                    assert(simdBaseType == TYP_FLOAT);
-                    NamedIntrinsic convert = (simdSize == 32) ? NI_AVX_ConvertToVector256Int32WithTruncation
-                                                              : NI_SSE2_ConvertToVector128Int32WithTruncation;
-                    return gtNewSimdHWIntrinsicNode(retType, op1, convert, simdBaseJitType, simdSize,
-                                                    /* isSimdAsHWIntrinsic */ true);
+                    return gtNewSimdCvtToInt32Node(retType, op1, simdBaseJitType, simdSize,
+                                                   /* isSimdAsHWIntrinsic */ true);
                 }
 
                 case NI_VectorT128_ConvertToSingle:
                 case NI_VectorT256_ConvertToSingle:
                 {
-                    assert(simdBaseType == TYP_INT);
-                    NamedIntrinsic convert =
-                        (simdSize == 32) ? NI_AVX_ConvertToVector256Single : NI_SSE2_ConvertToVector128Single;
-                    return gtNewSimdHWIntrinsicNode(retType, op1, convert, simdBaseJitType, simdSize,
+                    return gtNewSimdCvtToSingleNode(retType, op1, simdBaseJitType, simdSize,
                                                     /* isSimdAsHWIntrinsic */ true);
                 }
 
@@ -746,44 +740,38 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
 
                 case NI_VectorT128_ConvertToDouble:
                 {
-                    assert((simdBaseType == TYP_LONG) || (simdBaseType == TYP_ULONG));
-                    return gtNewSimdHWIntrinsicNode(retType, op1, NI_AdvSimd_Arm64_ConvertToDouble, simdBaseJitType,
-                                                    simdSize, /* isSimdAsHWIntrinsic */ true);
+                    return gtNewSimdCvtToDoubleNode(retType, op1, simdBaseJitType, simdSize,
+                                                    /* isSimdAsHWIntrinsic */ true);
                 }
 
                 case NI_VectorT128_ConvertToInt32:
                 {
-                    assert(simdBaseType == TYP_FLOAT);
-                    return gtNewSimdHWIntrinsicNode(retType, op1, NI_AdvSimd_ConvertToInt32RoundToZero, simdBaseJitType,
-                                                    simdSize, /* isSimdAsHWIntrinsic */ true);
+                    return gtNewSimdCvtToInt32Node(retType, op1, simdBaseJitType, simdSize,
+                                                   /* isSimdAsHWIntrinsic */ true);
                 }
 
                 case NI_VectorT128_ConvertToInt64:
                 {
-                    assert(simdBaseType == TYP_DOUBLE);
-                    return gtNewSimdHWIntrinsicNode(retType, op1, NI_AdvSimd_Arm64_ConvertToInt64RoundToZero,
-                                                    simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ true);
+                    return gtNewSimdCvtToInt64Node(retType, op1, simdBaseJitType, simdSize,
+                                                   /* isSimdAsHWIntrinsic */ true);
                 }
 
                 case NI_VectorT128_ConvertToSingle:
                 {
-                    assert((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT));
-                    return gtNewSimdHWIntrinsicNode(retType, op1, NI_AdvSimd_ConvertToSingle, simdBaseJitType, simdSize,
+                    return gtNewSimdCvtToSingleNode(retType, op1, simdBaseJitType, simdSize,
                                                     /* isSimdAsHWIntrinsic */ true);
                 }
 
                 case NI_VectorT128_ConvertToUInt32:
                 {
-                    assert(simdBaseType == TYP_FLOAT);
-                    return gtNewSimdHWIntrinsicNode(retType, op1, NI_AdvSimd_ConvertToUInt32RoundToZero,
-                                                    simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ true);
+                    return gtNewSimdCvtToUInt32Node(retType, op1, simdBaseJitType, simdSize,
+                                                    /* isSimdAsHWIntrinsic */ true);
                 }
 
                 case NI_VectorT128_ConvertToUInt64:
                 {
-                    assert(simdBaseType == TYP_DOUBLE);
-                    return gtNewSimdHWIntrinsicNode(retType, op1, NI_AdvSimd_Arm64_ConvertToUInt64RoundToZero,
-                                                    simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ true);
+                    return gtNewSimdCvtToUInt64Node(retType, op1, simdBaseJitType, simdSize,
+                                                    /* isSimdAsHWIntrinsic */ true);
                 }
 
                 case NI_VectorT128_Sum:
diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp
index 529c6538699f5..d0875ebc633c4 100644
--- a/src/coreclr/jit/utils.cpp
+++ b/src/coreclr/jit/utils.cpp
@@ -1211,12 +1211,16 @@ void HelperCallProperties::init()
             case CORINFO_HELP_LRSH:
             case CORINFO_HELP_LRSZ:
             case CORINFO_HELP_LMUL:
-            case CORINFO_HELP_LNG2DBL:
-            case CORINFO_HELP_ULNG2DBL:
-            case CORINFO_HELP_DBL2INT:
-            case CORINFO_HELP_DBL2LNG:
-            case CORINFO_HELP_DBL2UINT:
-            case CORINFO_HELP_DBL2ULNG:
+            case CORINFO_HELP_Int64ToDouble:
+            case CORINFO_HELP_UInt64ToDouble:
+            case CORINFO_HELP_DoubleToInt8:
+            case CORINFO_HELP_DoubleToInt16:
+            case CORINFO_HELP_DoubleToInt32:
+            case CORINFO_HELP_DoubleToInt64:
+            case CORINFO_HELP_DoubleToUInt8:
+            case CORINFO_HELP_DoubleToUInt16:
+            case CORINFO_HELP_DoubleToUInt32:
+            case CORINFO_HELP_DoubleToUInt64:
             case CORINFO_HELP_FLTREM:
             case CORINFO_HELP_DBLREM:
             case CORINFO_HELP_FLTROUND:
@@ -1248,10 +1252,14 @@ void HelperCallProperties::init()
 
             case CORINFO_HELP_LMUL_OVF:
             case CORINFO_HELP_ULMUL_OVF:
-            case CORINFO_HELP_DBL2INT_OVF:
-            case CORINFO_HELP_DBL2LNG_OVF:
-            case CORINFO_HELP_DBL2UINT_OVF:
-            case CORINFO_HELP_DBL2ULNG_OVF:
+            case CORINFO_HELP_DoubleToInt8_OVF:
+            case CORINFO_HELP_DoubleToInt16_OVF:
+            case CORINFO_HELP_DoubleToInt32_OVF:
+            case CORINFO_HELP_DoubleToInt64_OVF:
+            case CORINFO_HELP_DoubleToUInt8_OVF:
+            case CORINFO_HELP_DoubleToUInt16_OVF:
+            case CORINFO_HELP_DoubleToUInt32_OVF:
+            case CORINFO_HELP_DoubleToUInt64_OVF:
 
                 isPure = true;
                 break;
@@ -1892,79 +1900,278 @@ unsigned CountDigits(double num, unsigned base /* = 10 */)
 
 #endif // DEBUG
 
-double FloatingPointUtils::convertUInt64ToDouble(unsigned __int64 uIntVal)
-{
-    __int64 s64 = uIntVal;
-    double  d;
-    if (s64 < 0)
-    {
-#if defined(TARGET_XARCH)
-        // RyuJIT codegen and clang (or gcc) may produce different results for casting uint64 to
-        // double, and the clang result is more accurate. For example,
-        //    1) (double)0x84595161401484A0UL --> 43e08b2a2c280290  (RyuJIT codegen or VC++)
-        //    2) (double)0x84595161401484A0UL --> 43e08b2a2c280291  (clang or gcc)
-        // If the folding optimization below is implemented by simple casting of (double)uint64_val
-        // and it is compiled by clang, casting result can be inconsistent, depending on whether
-        // the folding optimization is triggered or the codegen generates instructions for casting. //
-        // The current solution is to force the same math as the codegen does, so that casting
-        // result is always consistent.
-
-        // d = (double)(int64_t)uint64 + 0x1p64
-        uint64_t adjHex = 0x43F0000000000000UL;
-        d               = (double)s64 + *(double*)&adjHex;
-#else
-        d = (double)uIntVal;
-#endif
+double FloatingPointUtils::convertInt64ToDouble(int64_t val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_Int64ToDouble
+    // This should be kept in sync with RhpInt64ToDouble
+    // ** NOTE **
+
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return (double)val;
+}
+
+double FloatingPointUtils::convertUInt64ToDouble(uint64_t val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_UInt64ToDouble
+    // This should be kept in sync with RhpUInt64ToDouble
+    // ** NOTE **
+
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return (double)val;
+}
+
+int8_t FloatingPointUtils::convertDoubleToInt8(double val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_DoubleToInt8
+    // This should be kept in sync with RhpDoubleToInt8
+    // ** NOTE **
+
+    if (_isnan(val))
+    {
+        // NAN should return 0
+        return 0;
     }
-    else
+
+    if (val <= -129.0)
     {
-        d = (double)uIntVal;
+        // Too small should saturate to INT8_MIN
+        return INT8_MIN;
     }
-    return d;
+
+    if (val >= +256.0)
+    {
+        // Too large should saturate to INT8_MAX
+        return INT8_MAX;
+    }
+
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return INT8(val);
 }
 
-float FloatingPointUtils::convertUInt64ToFloat(unsigned __int64 u64)
+int16_t FloatingPointUtils::convertDoubleToInt16(double val)
 {
-    double d = convertUInt64ToDouble(u64);
-    return (float)d;
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_DoubleToInt16
+    // This should be kept in sync with RhpDoubleToInt16
+    // ** NOTE **
+
+    if (_isnan(val))
+    {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -32769.0)
+    {
+        // Too small should saturate to INT16_MIN
+        return INT16_MIN;
+    }
+
+    if (val >= +32768.0)
+    {
+        // Too large should saturate to INT16_MAX
+        return INT16_MAX;
+    }
+
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return INT16(val);
 }
 
-unsigned __int64 FloatingPointUtils::convertDoubleToUInt64(double d)
+int32_t FloatingPointUtils::convertDoubleToInt32(double val)
 {
-    unsigned __int64 u64;
-    if (d >= 0.0)
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_DoubleToInt32
+    // This should be kept in sync with RhpDoubleToInt32
+    // ** NOTE **
+
+    if (_isnan(val))
     {
-        // Work around a C++ issue where it doesn't properly convert large positive doubles
-        const double two63 = 2147483648.0 * 4294967296.0;
-        if (d < two63)
-        {
-            u64 = UINT64(d);
-        }
-        else
-        {
-            // subtract 0x8000000000000000, do the convert then add it back again
-            u64 = INT64(d - two63) + I64(0x8000000000000000);
-        }
-        return u64;
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -2147483649.0)
+    {
+        // Too small should saturate to INT32_MIN
+        return INT32_MIN;
+    }
+
+    if (val >= +2147483648.0)
+    {
+        // Too large should saturate to INT32_MAX
+        return INT32_MAX;
     }
 
-#ifdef TARGET_XARCH
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
 
-    // While the Ecma spec does not specifically call this out,
-    // the case of conversion from negative double to unsigned integer is
-    // effectively an overflow and therefore the result is unspecified.
-    // With MSVC for x86/x64, such a conversion results in the bit-equivalent
-    // unsigned value of the conversion to integer. Other compilers convert
-    // negative doubles to zero when the target is unsigned.
-    // To make the behavior consistent across OS's on TARGET_XARCH,
-    // this double cast is needed to conform MSVC behavior.
+    return INT32(val);
+}
 
-    u64 = UINT64(INT64(d));
-#else
-    u64   = UINT64(d);
-#endif // TARGET_XARCH
+int64_t FloatingPointUtils::convertDoubleToInt64(double val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_DoubleToInt64
+    // This should be kept in sync with RhpDoubleToInt64
+    // ** NOTE **
+
+    if (_isnan(val))
+    {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -9223372036854777856.0)
+    {
+        // Too small should saturate to INT64_MIN
+        return INT64_MIN;
+    }
+
+    if (val >= +9223372036854775808.0)
+    {
+        // Too large should saturate to INT64_MAX
+        return INT64_MAX;
+    }
+
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return INT64(val);
+}
+
+uint8_t FloatingPointUtils::convertDoubleToUInt8(double val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_DoubleToUInt8
+    // This should be kept in sync with RhpDoubleToUInt8
+    // ** NOTE **
+
+    if (_isnan(val))
+    {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -1.0)
+    {
+        // Too small should saturate to UINT8_MIN
+        return UINT8_MIN;
+    }
+
+    if (val >= +256.0)
+    {
+        // Too large should saturate to UINT8_MAX
+        return UINT8_MAX;
+    }
+
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return UINT8(val);
+}
+
+uint16_t FloatingPointUtils::convertDoubleToUInt16(double val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_DoubleToUInt16
+    // This should be kept in sync with RhpDoubleToUInt16
+    // ** NOTE **
+
+    if (_isnan(val))
+    {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -1.0)
+    {
+        // Too small should saturate to UINT16_MIN
+        return UINT16_MIN;
+    }
+
+    if (val >= +65536.0)
+    {
+        // Too large should saturate to UINT16_MAX
+        return UINT16_MAX;
+    }
+
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return UINT16(val);
+}
+
+uint32_t FloatingPointUtils::convertDoubleToUInt32(double val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_DoubleToUInt32
+    // This should be kept in sync with RhpDoubleToUInt32
+    // ** NOTE **
+
+    if (_isnan(val))
+    {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -1.0)
+    {
+        // Too small should saturate to UINT32_MIN
+        return UINT32_MIN;
+    }
+
+    if (val >= +4294967296.0)
+    {
+        // Too large should saturate to UINT32_MAX
+        return UINT32_MAX;
+    }
+
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return UINT32(val);
+}
+
+uint64_t FloatingPointUtils::convertDoubleToUInt64(double val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_DoubleToUInt64
+    // This should be kept in sync with RhpDoubleToUInt64
+    // ** NOTE **
+
+    if (_isnan(val))
+    {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -1.0)
+    {
+        // Too small should saturate to UINT64_MIN
+        return UINT64_MIN;
+    }
+
+    if (val >= +18446744073709551616.0)
+    {
+        // Too large values should saturate to UINT64_MAX
+        return UINT64_MAX;
+    }
+
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
 
-    return u64;
+    return UINT64(val);
 }
 
 // Rounds a double-precision floating-point value to the nearest integer,
diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h
index 39985f1cee9b5..a71beb9b44444 100644
--- a/src/coreclr/jit/utils.h
+++ b/src/coreclr/jit/utils.h
@@ -669,11 +669,18 @@ unsigned CountDigits(double num, unsigned base = 10);
 class FloatingPointUtils
 {
 public:
-    static double convertUInt64ToDouble(unsigned __int64 u64);
-
-    static float convertUInt64ToFloat(unsigned __int64 u64);
-
-    static unsigned __int64 convertDoubleToUInt64(double d);
+    static double convertInt64ToDouble(int64_t val);
+    static double convertUInt64ToDouble(uint64_t val);
+
+    static int8_t convertDoubleToInt8(double val);
+    static int16_t convertDoubleToInt16(double val);
+    static int32_t convertDoubleToInt32(double val);
+    static int64_t convertDoubleToInt64(double val);
+
+    static uint8_t convertDoubleToUInt8(double val);
+    static uint16_t convertDoubleToUInt16(double val);
+    static uint32_t convertDoubleToUInt32(double val);
+    static uint64_t convertDoubleToUInt64(double val);
 
     static double round(double x);
 
diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp
index e52b3226f3bd8..fb3f59a38bb9c 100644
--- a/src/coreclr/jit/valuenum.cpp
+++ b/src/coreclr/jit/valuenum.cpp
@@ -3164,21 +3164,25 @@ ValueNum ValueNumStore::EvalCastForConstantArgs(var_types typ, VNFunc func, Valu
                             assert(typ == TYP_FLOAT);
                             if (srcIsUnsigned)
                             {
-                                return VNForFloatCon(FloatingPointUtils::convertUInt64ToFloat(UINT64(arg0Val)));
+                                double d = FloatingPointUtils::convertUInt64ToDouble(UINT64(arg0Val));
+                                return VNForFloatCon(forceCastToFloat(d));
                             }
                             else
                             {
-                                return VNForFloatCon(float(arg0Val));
+                                double d = FloatingPointUtils::convertInt64ToDouble(arg0Val);
+                                return VNForFloatCon(forceCastToFloat(d));
                             }
                         case TYP_DOUBLE:
                             assert(typ == TYP_DOUBLE);
                             if (srcIsUnsigned)
                             {
-                                return VNForDoubleCon(FloatingPointUtils::convertUInt64ToDouble(UINT64(arg0Val)));
+                                double d = FloatingPointUtils::convertUInt64ToDouble(UINT64(arg0Val));
+                                return VNForDoubleCon(d);
                             }
                             else
                             {
-                                return VNForDoubleCon(double(arg0Val));
+                                double d = FloatingPointUtils::convertInt64ToDouble(arg0Val);
+                                return VNForDoubleCon(d);
                             }
                         default:
                             unreached();
@@ -10060,81 +10064,6 @@ void Compiler::fgValueNumberCall(GenTreeCall* call)
     }
 }
 
-void Compiler::fgValueNumberCastHelper(GenTreeCall* call)
-{
-    CorInfoHelpFunc helpFunc         = eeGetHelperNum(call->gtCallMethHnd);
-    var_types       castToType       = TYP_UNDEF;
-    var_types       castFromType     = TYP_UNDEF;
-    bool            srcIsUnsigned    = false;
-    bool            hasOverflowCheck = false;
-
-    switch (helpFunc)
-    {
-        case CORINFO_HELP_LNG2DBL:
-            castToType   = TYP_DOUBLE;
-            castFromType = TYP_LONG;
-            break;
-
-        case CORINFO_HELP_ULNG2DBL:
-            castToType    = TYP_DOUBLE;
-            castFromType  = TYP_LONG;
-            srcIsUnsigned = true;
-            break;
-
-        case CORINFO_HELP_DBL2INT:
-            castToType   = TYP_INT;
-            castFromType = TYP_DOUBLE;
-            break;
-
-        case CORINFO_HELP_DBL2INT_OVF:
-            castToType       = TYP_INT;
-            castFromType     = TYP_DOUBLE;
-            hasOverflowCheck = true;
-            break;
-
-        case CORINFO_HELP_DBL2LNG:
-            castToType   = TYP_LONG;
-            castFromType = TYP_DOUBLE;
-            break;
-
-        case CORINFO_HELP_DBL2LNG_OVF:
-            castToType       = TYP_LONG;
-            castFromType     = TYP_DOUBLE;
-            hasOverflowCheck = true;
-            break;
-
-        case CORINFO_HELP_DBL2UINT:
-            castToType   = TYP_UINT;
-            castFromType = TYP_DOUBLE;
-            break;
-
-        case CORINFO_HELP_DBL2UINT_OVF:
-            castToType       = TYP_UINT;
-            castFromType     = TYP_DOUBLE;
-            hasOverflowCheck = true;
-            break;
-
-        case CORINFO_HELP_DBL2ULNG:
-            castToType   = TYP_ULONG;
-            castFromType = TYP_DOUBLE;
-            break;
-
-        case CORINFO_HELP_DBL2ULNG_OVF:
-            castToType       = TYP_ULONG;
-            castFromType     = TYP_DOUBLE;
-            hasOverflowCheck = true;
-            break;
-
-        default:
-            unreached();
-    }
-
-    ValueNumPair argVNP  = call->fgArgInfo->GetArgNode(0)->gtVNPair;
-    ValueNumPair castVNP = vnStore->VNPairForCast(argVNP, castToType, castFromType, srcIsUnsigned, hasOverflowCheck);
-
-    call->SetVNs(castVNP);
-}
-
 VNFunc Compiler::fgValueNumberJitHelperMethodVNFunc(CorInfoHelpFunc helpFunc)
 {
     assert(s_helperCallProperties.IsPure(helpFunc) || s_helperCallProperties.IsAllocator(helpFunc));
@@ -10198,6 +10127,62 @@ VNFunc Compiler::fgValueNumberJitHelperMethodVNFunc(CorInfoHelpFunc helpFunc)
             vnf = VNF_DblRound;
             break; // Is this the right thing?
 
+        case CORINFO_HELP_Int64ToDouble:
+            vnf = VNF_Int64ToDouble;
+            break;
+        case CORINFO_HELP_UInt64ToDouble:
+            vnf = VNF_UInt64ToDouble;
+            break;
+
+        case CORINFO_HELP_DoubleToInt8:
+            vnf = VNF_DoubleToInt8;
+            break;
+        case CORINFO_HELP_DoubleToInt8_OVF:
+            vnf = VNF_DoubleToInt8Ovf;
+            break;
+        case CORINFO_HELP_DoubleToInt16:
+            vnf = VNF_DoubleToInt16;
+            break;
+        case CORINFO_HELP_DoubleToInt16_OVF:
+            vnf = VNF_DoubleToInt16Ovf;
+            break;
+        case CORINFO_HELP_DoubleToInt32:
+            vnf = VNF_DoubleToInt32;
+            break;
+        case CORINFO_HELP_DoubleToInt32_OVF:
+            vnf = VNF_DoubleToInt32Ovf;
+            break;
+        case CORINFO_HELP_DoubleToInt64:
+            vnf = VNF_DoubleToInt64;
+            break;
+        case CORINFO_HELP_DoubleToInt64_OVF:
+            vnf = VNF_DoubleToInt64Ovf;
+            break;
+        case CORINFO_HELP_DoubleToUInt8:
+            vnf = VNF_DoubleToUInt8;
+            break;
+        case CORINFO_HELP_DoubleToUInt8_OVF:
+            vnf = VNF_DoubleToUInt8Ovf;
+            break;
+        case CORINFO_HELP_DoubleToUInt16:
+            vnf = VNF_DoubleToUInt16;
+            break;
+        case CORINFO_HELP_DoubleToUInt16_OVF:
+            vnf = VNF_DoubleToUInt16Ovf;
+            break;
+        case CORINFO_HELP_DoubleToUInt32:
+            vnf = VNF_DoubleToUInt32;
+            break;
+        case CORINFO_HELP_DoubleToUInt32_OVF:
+            vnf = VNF_DoubleToUInt32Ovf;
+            break;
+        case CORINFO_HELP_DoubleToUInt64:
+            vnf = VNF_DoubleToUInt64;
+            break;
+        case CORINFO_HELP_DoubleToUInt64_OVF:
+            vnf = VNF_DoubleToUInt64Ovf;
+            break;
+
         // These allocation operations probably require some augmentation -- perhaps allocSiteId,
         // something about array length...
         case CORINFO_HELP_NEWFAST:
@@ -10392,25 +10377,6 @@ bool Compiler::fgValueNumberHelperCall(GenTreeCall* call)
 {
     CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
 
-    switch (helpFunc)
-    {
-        case CORINFO_HELP_LNG2DBL:
-        case CORINFO_HELP_ULNG2DBL:
-        case CORINFO_HELP_DBL2INT:
-        case CORINFO_HELP_DBL2INT_OVF:
-        case CORINFO_HELP_DBL2LNG:
-        case CORINFO_HELP_DBL2LNG_OVF:
-        case CORINFO_HELP_DBL2UINT:
-        case CORINFO_HELP_DBL2UINT_OVF:
-        case CORINFO_HELP_DBL2ULNG:
-        case CORINFO_HELP_DBL2ULNG_OVF:
-            fgValueNumberCastHelper(call);
-            return false;
-
-        default:
-            break;
-    }
-
     bool pure        = s_helperCallProperties.IsPure(helpFunc);
     bool isAlloc     = s_helperCallProperties.IsAllocator(helpFunc);
     bool modHeap     = s_helperCallProperties.MutatesHeap(helpFunc);
diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h
index 74e8b90ff19bb..bbdf9d18a1471 100644
--- a/src/coreclr/jit/valuenumfuncs.h
+++ b/src/coreclr/jit/valuenumfuncs.h
@@ -73,6 +73,26 @@ ValueNumFuncDef(HelperMultipleExc, 0, false, false, false)  // Represents one or
 ValueNumFuncDef(FltRound, 1, false, false, false)
 ValueNumFuncDef(DblRound, 1, false, false, false)
 
+ValueNumFuncDef(Int64ToDouble, 1, false, false, false)
+ValueNumFuncDef(UInt64ToDouble, 1, false, false, false)
+
+ValueNumFuncDef(DoubleToInt8, 1, false, false, false)
+ValueNumFuncDef(DoubleToInt8Ovf, 1, false, false, false)
+ValueNumFuncDef(DoubleToInt16, 1, false, false, false)
+ValueNumFuncDef(DoubleToInt16Ovf, 1, false, false, false)
+ValueNumFuncDef(DoubleToInt32, 1, false, false, false)
+ValueNumFuncDef(DoubleToInt32Ovf, 1, false, false, false)
+ValueNumFuncDef(DoubleToInt64, 1, false, false, false)
+ValueNumFuncDef(DoubleToInt64Ovf, 1, false, false, false)
+ValueNumFuncDef(DoubleToUInt8, 1, false, false, false)
+ValueNumFuncDef(DoubleToUInt8Ovf, 1, false, false, false)
+ValueNumFuncDef(DoubleToUInt16, 1, false, false, false)
+ValueNumFuncDef(DoubleToUInt16Ovf, 1, false, false, false)
+ValueNumFuncDef(DoubleToUInt32, 1, false, false, false)
+ValueNumFuncDef(DoubleToUInt32Ovf, 1, false, false, false)
+ValueNumFuncDef(DoubleToUInt64, 1, false, false, false)
+ValueNumFuncDef(DoubleToUInt64Ovf, 1, false, false, false)
+
 ValueNumFuncDef(Abs, 1, false, false, false)
 ValueNumFuncDef(Acos, 1, false, false, false)
 ValueNumFuncDef(Acosh, 1, false, false, false)
diff --git a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
index 1209e9c554362..dda663b5111e2 100644
--- a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
@@ -9,14 +9,310 @@
 // Floating point and 64-bit integer math helpers.
 //
 
-EXTERN_C REDHAWK_API uint64_t REDHAWK_CALLCONV RhpDbl2ULng(double val)
-{
-    return((uint64_t)val);
-}
-
 #undef min
 #undef max
 #include <cmath>
+#include <limits>
+
+double PlatformInt64ToDouble(int64_t val)
+{
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return double(val);
+}
+
+EXTERN_C REDHAWK_API double REDHAWK_CALLCONV RhpInt64ToDouble(int64_t val)
+{
+    // ** NOTE **
+    // This should be kept in sync with with CORINFO_HELP_Int64ToDouble
+    // This should be kept in sync with FloatingPointUtils::convertInt64ToDouble
+    // ** NOTE **
+
+    return PlatformInt64ToDouble(val);
+}
+
+double PlatformUInt64ToDouble(uint64_t val)
+{
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return double(val);
+}
+
+EXTERN_C REDHAWK_API double REDHAWK_CALLCONV RhpUInt64ToDouble(uint64_t val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_UInt64ToDouble
+    // This should be kept in sync with FloatingPointUtils::convertUInt64ToDouble
+    // ** NOTE **
+
+    return PlatformUInt64ToDouble(val);
+}
+
+int8_t PlatformDoubleToInt8(double val)
+{
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return int8_t(val);
+}
+
+EXTERN_C REDHAWK_API int8_t REDHAWK_CALLCONV RhpDoubleToInt8(double val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_DoubleToInt8
+    // This should be kept in sync with FloatingPointUtils::convertDoubleToInt8
+    // ** NOTE **
+
+    if (std::isnan(val)) {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -129.0) {
+        // Too small should saturate to int8::min
+        return std::numeric_limits<int8_t>::min();
+    }
+
+    if (val >= +128.0) {
+        // Too large should saturate to int8::max
+        return std::numeric_limits<int8_t>::max();
+    }
+
+    return PlatformDoubleToInt8(val);
+}
+
+int16_t PlatformDoubleToInt16(double val)
+{
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return int16_t(val);
+}
+
+EXTERN_C REDHAWK_API int16_t REDHAWK_CALLCONV RhpDoubleToInt16(double val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_DoubleToInt16
+    // This should be kept in sync with FloatingPointUtils::convertDoubleToInt16
+    // ** NOTE **
+
+    if (std::isnan(val)) {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -32769.0) {
+        // Too small should saturate to int16::min
+        return std::numeric_limits<int16_t>::min();
+    }
+
+    if (val >= +32768.0) {
+        // Too large should saturate to int16::max
+        return std::numeric_limits<int16_t>::max();
+    }
+
+    return PlatformDoubleToInt16(val);
+}
+
+int32_t PlatformDoubleToInt32(double val)
+{
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return int32_t(val);
+}
+
+EXTERN_C REDHAWK_API int32_t REDHAWK_CALLCONV RhpDoubleToInt32(double val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_DoubleToInt32
+    // This should be kept in sync with FloatingPointUtils::convertDoubleToInt32
+    // ** NOTE **
+
+    if (std::isnan(val)) {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -2147483649.0) {
+        // Too small should saturate to int32::min
+        return std::numeric_limits<int32_t>::min();
+    }
+
+    if (val >= +2147483648.0) {
+        // Too large should saturate to int32::max
+        return std::numeric_limits<int32_t>::max();
+    }
+
+    return PlatformDoubleToInt32(val);
+}
+
+int64_t PlatformDoubleToInt64(double val)
+{
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return int64_t(val);
+}
+
+EXTERN_C REDHAWK_API int64_t REDHAWK_CALLCONV RhpDoubleToInt64(double val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_DoubleToInt64
+    // This should be kept in sync with FloatingPointUtils::convertDoubleToInt64
+    // ** NOTE **
+
+    if (std::isnan(val)) {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -9223372036854777856.0) {
+        // Too small should saturate to int64::min
+        return std::numeric_limits<int64_t>::min();
+    }
+
+    if (val >= +9223372036854775808.0) {
+        // Too large should saturate to int64::max
+        return std::numeric_limits<int64_t>::max();
+    }
+
+    return PlatformDoubleToInt64(val);
+}
+
+uint8_t PlatformDoubleToUInt8(double val)
+{
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return uint8_t(val);
+}
+
+EXTERN_C REDHAWK_API uint8_t REDHAWK_CALLCONV RhpDoubleToUInt8(double val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_DoubleToUInt8
+    // This should be kept in sync with FloatingPointUtils::convertDoubleToUInt8
+    // ** NOTE **
+
+    if (std::isnan(val)) {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -1.0) {
+        // Too small should saturate to uint8::min
+        return std::numeric_limits<uint8_t>::min();
+    }
+
+    if (val >= +256.0) {
+        // Too large should saturate to uint8::max
+        return std::numeric_limits<uint8_t>::max();
+    }
+
+    return PlatformDoubleToUInt8(val);
+}
+
+uint16_t PlatformDoubleToUInt16(double val)
+{
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return uint16_t(val);
+}
+
+EXTERN_C REDHAWK_API uint16_t REDHAWK_CALLCONV RhpDoubleToUInt16(double val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_DoubleToUInt16
+    // This should be kept in sync with FloatingPointUtils::convertDoubleToUInt16
+    // ** NOTE **
+
+    if (std::isnan(val)) {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -1.0) {
+        // Too small should saturate to uint16::min
+        return std::numeric_limits<uint16_t>::min();
+    }
+
+    if (val >= +65536.0) {
+        // Too large should saturate to uint16::max
+        return std::numeric_limits<uint16_t>::max();
+    }
+
+    return PlatformDoubleToUInt16(val);
+}
+
+uint32_t PlatformDoubleToUInt32(double val)
+{
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return uint32_t(val);
+}
+
+EXTERN_C REDHAWK_API uint32_t REDHAWK_CALLCONV RhpDoubleToUInt32(double val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_DoubleToUInt32
+    // This should be kept in sync with FloatingPointUtils::convertDoubleToUInt32
+    // ** NOTE **
+
+    if (std::isnan(val)) {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -1.0) {
+        // Too small should saturate to uint32::min
+        return std::numeric_limits<uint32_t>::min();
+    }
+
+    if (val >= +4294967296.0) {
+        // Too large should saturate to uint32::max
+        return std::numeric_limits<uint32_t>::max();
+    }
+
+    return PlatformDoubleToUInt32(val);
+}
+
+uint64_t PlatformDoubleToUInt64(double val)
+{
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return uint64_t(val);
+}
+
+EXTERN_C REDHAWK_API uint64_t REDHAWK_CALLCONV RhpDoubleToUInt64(double val)
+{
+    // ** NOTE **
+    // This should be kept in sync with CORINFO_HELP_DoubleToUInt64
+    // This should be kept in sync with FloatingPointUtils::convertDoubleToUInt64
+    // ** NOTE **
+
+    if (std::isnan(val)) {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -1.0) {
+        // Too small should saturate to uint64::min
+        return std::numeric_limits<uint64_t>::min();
+    }
+
+    if (val >= +18446744073709551616.0) {
+        // Too large values should saturate to uint64::max
+        return std::numeric_limits<uint64_t>::max();
+    }
+
+    return PlatformDoubleToUInt64(val);
+}
 
 EXTERN_C REDHAWK_API float REDHAWK_CALLCONV RhpFltRem(float dividend, float divisor)
 {
@@ -151,29 +447,4 @@ EXTERN_C REDHAWK_API int64_t REDHAWK_CALLCONV RhpLLsh(int64_t i, int32_t j)
     return i << j;
 }
 
-EXTERN_C REDHAWK_API int64_t REDHAWK_CALLCONV RhpDbl2Lng(double val)
-{
-    return (int64_t)val;
-}
-
-EXTERN_C REDHAWK_API int32_t REDHAWK_CALLCONV RhpDbl2Int(double val)
-{
-    return (int32_t)val;
-}
-
-EXTERN_C REDHAWK_API uint32_t REDHAWK_CALLCONV RhpDbl2UInt(double val)
-{
-    return (uint32_t)val;
-}
-
-EXTERN_C REDHAWK_API double REDHAWK_CALLCONV RhpLng2Dbl(int64_t val)
-{
-    return (double)val;
-}
-
-EXTERN_C REDHAWK_API double REDHAWK_CALLCONV RhpULng2Dbl(uint64_t val)
-{
-    return (double)val;
-}
-
 #endif // HOST_ARM
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MathHelpers.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MathHelpers.cs
index 5f19f6b01e197..8cde748b31131 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MathHelpers.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MathHelpers.cs
@@ -214,76 +214,161 @@ private static long ThrowLngArithExc()
         }
 #endif // TARGET_64BIT
 
-        [RuntimeExport("Dbl2IntOvf")]
-        public static int Dbl2IntOvf(double val)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static sbyte PlatformDoubleToInt8(double val)
         {
-            const double two31 = 2147483648.0;
+            return (sbyte)val;
+        }
 
-            // Note that this expression also works properly for val = NaN case
-            if (val > -two31 - 1 && val < two31)
-                return unchecked((int)val);
+        [RuntimeExport("DoubleToInt8Ovf")]
+        public static sbyte DoubleToInt8Ovf(double val)
+        {
+            if (val > -129.0 && val < 128.0)
+            {
+                // -129.0 and +128.0 are exactly representable
+                // Note that the above condition also works properly for val = NaN case
+                return PlatformDoubleToInt8(val);
+            }
 
-            return ThrowIntOvf();
+            return ThrowInt8OverflowException();
         }
 
-        [RuntimeExport("Dbl2UIntOvf")]
-        public static uint Dbl2UIntOvf(double val)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static byte PlatformDoubleToUInt8(double val)
         {
-            // Note that this expression also works properly for val = NaN case
-            if (val > -1.0 && val < 4294967296.0)
-                return unchecked((uint)val);
+            return (byte)val;
+        }
 
-            return ThrowUIntOvf();
+        [RuntimeExport("DoubleToUInt8Ovf")]
+        public static byte DoubleToUInt8Ovf(double val)
+        {
+            if (val > -1.0 && val < +256.0)
+            {
+                // -1.0 and +256.0 are exactly representable
+                // Note that the above condition also works properly for val = NaN case
+                return PlatformDoubleToUInt8(val);
+            }
+
+            return ThrowUInt8OverflowException();
         }
 
-        [RuntimeExport("Dbl2LngOvf")]
-        public static long Dbl2LngOvf(double val)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static short PlatformDoubleToInt16(double val)
         {
-            const double two63 = 2147483648.0 * 4294967296.0;
+            return (short)val;
+        }
 
-            // Note that this expression also works properly for val = NaN case
-            // We need to compare with the very next double to two63. 0x402 is epsilon to get us there.
-            if (val > -two63 - 0x402 && val < two63)
-                return unchecked((long)val);
+        [RuntimeExport("DoubleToInt16Ovf")]
+        public static short DoubleToInt16Ovf(double val)
+        {
+            if (val > -32769.0 && val < +32768.0)
+            {
+                // -32769.0 and +32768.0 are exactly representable
+                // Note that the above condition also works properly for val = NaN case
+                return PlatformDoubleToInt16(val);
+            }
 
-            return ThrowLngOvf();
+            return ThrowInt16OverflowException();
         }
 
-        [RuntimeExport("Dbl2ULngOvf")]
-        public static ulong Dbl2ULngOvf(double val)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static ushort PlatformDoubleToUInt16(double val)
         {
-            const double two64 = 2.0 * 2147483648.0 * 4294967296.0;
+            return (ushort)val;
+        }
 
-            // Note that this expression also works properly for val = NaN case
-            if (val > -1.0 && val < two64)
-                return unchecked((ulong)val);
+        [RuntimeExport("DoubleToUInt16Ovf")]
+        public static ushort DoubleToUInt16Ovf(double val)
+        {
+            if (val > -1.0 && val < +65536.0)
+            {
+                // -1.0 and +65536.0 are exactly representable
+                // Note that the above condition also works properly for val = NaN case
+                return PlatformDoubleToUInt16(val);
+            }
 
-            return ThrowULngOvf();
+            return ThrowUInt16OverflowException();
         }
 
-        [RuntimeExport("Flt2IntOvf")]
-        public static int Flt2IntOvf(float val)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static int PlatformDoubleToInt32(double val)
         {
-            const double two31 = 2147483648.0;
+            return (int)val;
+        }
+
+        [RuntimeExport("DoubleToInt32Ovf")]
+        public static int DoubleToInt32Ovf(double val)
+        {
+            if (val > -2147483649.0 && val < +2147483648.0)
+            {
+                // -2147483649.0 and +2147483648.0 are exactly representable
+                // Note that the above condition also works properly for val = NaN case
+                return PlatformDoubleToInt32(val);
+            }
 
-            // Note that this expression also works properly for val = NaN case
-            if (val > -two31 - 1 && val < two31)
-                return ((int)val);
+            return ThrowInt32OverflowException();
+        }
 
-            return ThrowIntOvf();
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static uint PlatformDoubleToUInt32(double val)
+        {
+            return (uint)val;
         }
 
-        [RuntimeExport("Flt2LngOvf")]
-        public static long Flt2LngOvf(float val)
+        [RuntimeExport("DoubleToUInt32Ovf")]
+        public static uint DoubleToUInt32Ovf(double val)
         {
-            const double two63 = 2147483648.0 * 4294967296.0;
+            if (val > -1.0 && val < +4294967296.0)
+            {
+                // -1.0 and +4294967296.0 are exactly representable
+                // Note that the above condition also works properly for val = NaN case
+                return PlatformDoubleToUInt32(val);
+            }
 
-            // Note that this expression also works properly for val = NaN case
-            // We need to compare with the very next double to two63. 0x402 is epsilon to get us there.
-            if (val > -two63 - 0x402 && val < two63)
-                return ((long)val);
+            return ThrowUInt32OverflowException();
+        }
 
-            return ThrowIntOvf();
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static long PlatformDoubleToInt64(double val)
+        {
+            return (long)val;
+        }
+
+        [RuntimeExport("DoubleToInt64Ovf")]
+        public static long DoubleToInt64Ovf(double val)
+        {
+            if (val > -9223372036854777856.0 && val < +9223372036854775808.0)
+            {
+                // +9223372036854775808.0 is exactly representable
+                //
+                // -9223372036854777809.0 however, is not and rounds to -9223372036854777808.0
+                // we use -9223372036854777856.0 instead which is the next representable value smaller
+                // than -9223372036854777808.0
+                //
+                // Note that this expression also works properly for val = NaN case
+                return PlatformDoubleToInt64(val);
+            }
+
+            return ThrowInt64OverflowException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static ulong PlatformDoubleToUInt64(double val)
+        {
+            return (ulong)val;
+        }
+
+        [RuntimeExport("DoubleToUInt64Ovf")]
+        public static ulong DoubleToUInt64Ovf(double val)
+        {
+            if (val > -1.0 && val < +18446744073709551616.0)
+            {
+                // -1.0 and +18446744073709551616.0 are exactly representable
+                // Note that the above condition also works properly for val = NaN case
+                return PlatformDoubleToUInt64(val);
+            }
+
+            return ThrowUInt64OverflowException();
         }
 
 #if TARGET_ARM
@@ -344,25 +429,49 @@ public static long UMod(uint i, uint j)
         //
 
         [MethodImpl(MethodImplOptions.NoInlining)]
-        private static int ThrowIntOvf()
+        private static sbyte ThrowInt8OverflowException()
+        {
+            throw new OverflowException();
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        private static byte ThrowUInt8OverflowException()
+        {
+            throw new OverflowException();
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        private static short ThrowInt16OverflowException()
+        {
+            throw new OverflowException();
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        private static ushort ThrowUInt16OverflowException()
+        {
+            throw new OverflowException();
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        private static int ThrowInt32OverflowException()
         {
             throw new OverflowException();
         }
 
         [MethodImpl(MethodImplOptions.NoInlining)]
-        private static uint ThrowUIntOvf()
+        private static uint ThrowUInt32OverflowException()
         {
             throw new OverflowException();
         }
 
         [MethodImpl(MethodImplOptions.NoInlining)]
-        private static long ThrowLngOvf()
+        private static long ThrowInt64OverflowException()
         {
             throw new OverflowException();
         }
 
         [MethodImpl(MethodImplOptions.NoInlining)]
-        private static ulong ThrowULngOvf()
+        private static ulong ThrowUInt64OverflowException()
         {
             throw new OverflowException();
         }
diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs
index 30f7caa363636..70e0a92c11587 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs
@@ -236,8 +236,8 @@ public enum ReadyToRunHelper
         LLsh                        = 0xC7,
         LRsh                        = 0xC8,
         LRsz                        = 0xC9,
-        Lng2Dbl                     = 0xCA,
-        ULng2Dbl                    = 0xCB,
+        Int64ToDouble               = 0xCA,
+        UInt64ToDouble              = 0xCB,
 
         // 32-bit division helpers
         Div                         = 0xCC,
@@ -246,14 +246,22 @@ public enum ReadyToRunHelper
         UMod                        = 0xCF,
 
         // Floating point conversions
-        Dbl2Int                     = 0xD0,
-        Dbl2IntOvf                  = 0xD1,
-        Dbl2Lng                     = 0xD2,
-        Dbl2LngOvf                  = 0xD3,
-        Dbl2UInt                    = 0xD4,
-        Dbl2UIntOvf                 = 0xD5,
-        Dbl2ULng                    = 0xD6,
-        Dbl2ULngOvf                 = 0xD7,
+        DoubleToInt32               = 0xD0,
+        DoubleToInt32Ovf            = 0xD1,
+        DoubleToInt64               = 0xD2,
+        DoubleToInt64Ovf            = 0xD3,
+        DoubleToUInt32              = 0xD4,
+        DoubleToUInt32Ovf           = 0xD5,
+        DoubleToUInt64              = 0xD6,
+        DoubleToUInt64Ovf           = 0xD7,
+        DoubleToInt8                = 0xD8,
+        DoubleToInt8Ovf             = 0xD9,
+        DoubleToInt16               = 0xDA,
+        DoubleToInt16Ovf            = 0xDB,
+        DoubleToUInt8               = 0xDC,
+        DoubleToUInt8Ovf            = 0xDD,
+        DoubleToUInt16              = 0xDE,
+        DoubleToUInt16Ovf           = 0xDF,
 
         // Floating point ops
         DblRem                      = 0xE0,
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
index e39535645034e..60856fe61a24f 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
@@ -32,22 +32,30 @@ public enum CorInfoHelpFunc
         CORINFO_HELP_LMOD,
         CORINFO_HELP_ULDIV,
         CORINFO_HELP_ULMOD,
-        CORINFO_HELP_LNG2DBL,               // Convert a signed int64 to a double
-        CORINFO_HELP_ULNG2DBL,              // Convert a unsigned int64 to a double
-        CORINFO_HELP_DBL2INT,
-        CORINFO_HELP_DBL2INT_OVF,
-        CORINFO_HELP_DBL2LNG,
-        CORINFO_HELP_DBL2LNG_OVF,
-        CORINFO_HELP_DBL2UINT,
-        CORINFO_HELP_DBL2UINT_OVF,
-        CORINFO_HELP_DBL2ULNG,
-        CORINFO_HELP_DBL2ULNG_OVF,
+        CORINFO_HELP_Int64ToDouble,
+        CORINFO_HELP_UInt64ToDouble,
+        CORINFO_HELP_DoubleToInt8,
+        CORINFO_HELP_DoubleToInt8_OVF,
+        CORINFO_HELP_DoubleToInt16,
+        CORINFO_HELP_DoubleToInt16_OVF,
+        CORINFO_HELP_DoubleToInt32,
+        CORINFO_HELP_DoubleToInt32_OVF,
+        CORINFO_HELP_DoubleToInt64,
+        CORINFO_HELP_DoubleToInt64_OVF,
+        CORINFO_HELP_DoubleToUInt8,
+        CORINFO_HELP_DoubleToUInt8_OVF,
+        CORINFO_HELP_DoubleToUInt16,
+        CORINFO_HELP_DoubleToUInt16_OVF,
+        CORINFO_HELP_DoubleToUInt32,
+        CORINFO_HELP_DoubleToUInt32_OVF,
+        CORINFO_HELP_DoubleToUInt64,
+        CORINFO_HELP_DoubleToUInt64_OVF,
         CORINFO_HELP_FLTREM,
         CORINFO_HELP_DBLREM,
         CORINFO_HELP_FLTROUND,
         CORINFO_HELP_DBLROUND,
 
-        /* Allocating a new object. Always use ICorClassInfo::getNewHelper() to decide 
+        /* Allocating a new object. Always use ICorClassInfo::getNewHelper() to decide
            which is the right helper to use to allocate an object of a given type. */
 
         CORINFO_HELP_NEWFAST,
@@ -81,7 +89,7 @@ public enum CorInfoHelpFunc
         CORINFO_HELP_CHKCASTARRAY,
         CORINFO_HELP_CHKCASTCLASS,
         CORINFO_HELP_CHKCASTANY,
-        CORINFO_HELP_CHKCASTCLASS_SPECIAL, // Optimized helper for classes. Assumes that the trivial cases 
+        CORINFO_HELP_CHKCASTCLASS_SPECIAL, // Optimized helper for classes. Assumes that the trivial cases
                                            // has been taken care of by the inlined check
 
         CORINFO_HELP_BOX,
@@ -120,7 +128,7 @@ public enum CorInfoHelpFunc
         CORINFO_HELP_MON_EXIT_STATIC,
 
         CORINFO_HELP_GETCLASSFROMMETHODPARAM, // Given a generics method handle, returns a class handle
-        CORINFO_HELP_GETSYNCFROMCLASSHANDLE,  // Given a generics class handle, returns the sync monitor 
+        CORINFO_HELP_GETSYNCFROMCLASSHANDLE,  // Given a generics class handle, returns the sync monitor
                                               // in its ManagedClassObject
 
         /* GC support */
@@ -166,7 +174,7 @@ public enum CorInfoHelpFunc
 
         CORINFO_HELP_GETSTATICFIELDADDR_TLS,        // Helper for PE TLS fields
 
-        // There are a variety of specialized helpers for accessing static fields. The JIT should use 
+        // There are a variety of specialized helpers for accessing static fields. The JIT should use
         // ICorClassInfo::getSharedStaticsOrCCtorHelper to determine which helper to use
 
         // Helpers for regular statics
@@ -275,7 +283,7 @@ public enum CorInfoHelpFunc
         CORINFO_HELP_JIT_PINVOKE_BEGIN, // Transition to preemptive mode before a P/Invoke, frame is the first argument
         CORINFO_HELP_JIT_PINVOKE_END,   // Transition to cooperative mode after a P/Invoke, frame is the first argument
 
-        CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER, // Transition to cooperative mode in reverse P/Invoke prolog, frame is the first argument    
+        CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER, // Transition to cooperative mode in reverse P/Invoke prolog, frame is the first argument
         CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER_TRACK_TRANSITIONS, // Transition to cooperative mode and track transitions in reverse P/Invoke prolog.
         CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT,  // Transition to preemptive mode in reverse P/Invoke epilog, frame is the first argument
         CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT_TRACK_TRANSITIONS, // Transition to preemptive mode and track transitions in reverse P/Invoke prolog.
diff --git a/src/coreclr/tools/SOS/DacTableGen/MapSymbolProvider.cs b/src/coreclr/tools/SOS/DacTableGen/MapSymbolProvider.cs
index 70ce5a74c7aa8..f45076ad8a3a6 100644
--- a/src/coreclr/tools/SOS/DacTableGen/MapSymbolProvider.cs
+++ b/src/coreclr/tools/SOS/DacTableGen/MapSymbolProvider.cs
@@ -133,7 +133,7 @@ enum WindowsSymbolTypes {
         // GlobalVarName2
         //   Example: @JIT_WriteBarrier@
         //       (or) _JIT_FltRem@
-        //       (or) _JIT_Dbl2Lng@
+        //       (or) _JIT_DoubleToInt64@
         //       (or) _JIT_LLsh@
         Reg_MapAddress +
         Reg_ExWhiteSpaces +
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
index 1a775400feafd..845d367b9ffd4 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
@@ -12,7 +12,7 @@ namespace ILCompiler
     internal class JitHelper
     {
         /// <summary>
-        /// Returns JIT helper entrypoint. JIT helpers can be either implemented by entrypoint with given mangled name or 
+        /// Returns JIT helper entrypoint. JIT helpers can be either implemented by entrypoint with given mangled name or
         /// by a method in class library.
         /// </summary>
         public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, out string mangledName, out MethodDesc methodDesc)
@@ -136,37 +136,63 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id,
                     mangledName = "RhTypeCast_AreTypesEquivalent";
                     break;
 
-                case ReadyToRunHelper.Lng2Dbl:
-                    mangledName = "RhpLng2Dbl";
+                case ReadyToRunHelper.Int64ToDouble:
+                    mangledName = "RhpInt64ToDouble";
                     break;
-                case ReadyToRunHelper.ULng2Dbl:
-                    mangledName = "RhpULng2Dbl";
+                case ReadyToRunHelper.UInt64ToDouble:
+                    mangledName = "RhpUInt64ToDouble";
                     break;
 
-                case ReadyToRunHelper.Dbl2Lng:
-                    mangledName = "RhpDbl2Lng";
+                case ReadyToRunHelper.DoubleToInt32:
+                    mangledName = "RhpDoubleToInt32";
                     break;
-                case ReadyToRunHelper.Dbl2ULng:
-                    mangledName = "RhpDbl2ULng";
+                case ReadyToRunHelper.DoubleToInt64:
+                    mangledName = "RhpDoubleToInt64";
                     break;
-                case ReadyToRunHelper.Dbl2Int:
-                    mangledName = "RhpDbl2Int";
+                case ReadyToRunHelper.DoubleToUInt32:
+                    mangledName = "RhpDoubleToUInt32";
                     break;
-                case ReadyToRunHelper.Dbl2UInt:
-                    mangledName = "RhpDbl2UInt";
+                case ReadyToRunHelper.DoubleToUInt64:
+                    mangledName = "RhpDoubleToUInt64";
                     break;
 
-                case ReadyToRunHelper.Dbl2IntOvf:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "Dbl2IntOvf");
+                case ReadyToRunHelper.DoubleToInt8:
+                    mangledName = "RhpDoubleToInt8";
                     break;
-                case ReadyToRunHelper.Dbl2UIntOvf:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "Dbl2UIntOvf");
+                case ReadyToRunHelper.DoubleToInt16:
+                    mangledName = "RhpDoubleToInt16";
                     break;
-                case ReadyToRunHelper.Dbl2LngOvf:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "Dbl2LngOvf");
+                case ReadyToRunHelper.DoubleToUInt8:
+                    mangledName = "RhpDoubleToUInt8";
                     break;
-                case ReadyToRunHelper.Dbl2ULngOvf:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "Dbl2ULngOvf");
+                case ReadyToRunHelper.DoubleToUInt16:
+                    mangledName = "RhpDoubleToUInt16";
+                    break;
+
+                case ReadyToRunHelper.DoubleToInt32Ovf:
+                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "DoubleToInt32Ovf");
+                    break;
+                case ReadyToRunHelper.DoubleToInt64Ovf:
+                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "DoubleToInt64Ovf");
+                    break;
+                case ReadyToRunHelper.DoubleToUInt32Ovf:
+                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "DoubleToUInt32Ovf");
+                    break;
+                case ReadyToRunHelper.DoubleToUInt64Ovf:
+                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "DoubleToUInt64Ovf");
+                    break;
+
+                case ReadyToRunHelper.DoubleToInt8Ovf:
+                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "DoubleToInt8Ovf");
+                    break;
+                case ReadyToRunHelper.DoubleToInt16Ovf:
+                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "DoubleToInt32Ovf");
+                    break;
+                case ReadyToRunHelper.DoubleToUInt8Ovf:
+                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "DoubleToUInt8Ovf");
+                    break;
+                case ReadyToRunHelper.DoubleToUInt16Ovf:
+                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "DoubleToUInt16Ovf");
                     break;
 
                 case ReadyToRunHelper.DblRem:
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
index 2bd78b6cb4192..f3e4a4d479c01 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
@@ -746,11 +746,11 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum)
                 case CorInfoHelpFunc.CORINFO_HELP_LRSZ:
                     id = ReadyToRunHelper.LRsz;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_LNG2DBL:
-                    id = ReadyToRunHelper.Lng2Dbl;
+                case CorInfoHelpFunc.CORINFO_HELP_Int64ToDouble:
+                    id = ReadyToRunHelper.Int64ToDouble;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_ULNG2DBL:
-                    id = ReadyToRunHelper.ULng2Dbl;
+                case CorInfoHelpFunc.CORINFO_HELP_UInt64ToDouble:
+                    id = ReadyToRunHelper.UInt64ToDouble;
                     break;
 
                 case CorInfoHelpFunc.CORINFO_HELP_DIV:
@@ -766,29 +766,53 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum)
                     id = ReadyToRunHelper.UMod;
                     break;
 
-                case CorInfoHelpFunc.CORINFO_HELP_DBL2INT:
-                    id = ReadyToRunHelper.Dbl2Int;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToInt32:
+                    id = ReadyToRunHelper.DoubleToInt32;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_DBL2INT_OVF:
-                    id = ReadyToRunHelper.Dbl2IntOvf;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToInt32_OVF:
+                    id = ReadyToRunHelper.DoubleToInt32Ovf;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_DBL2LNG:
-                    id = ReadyToRunHelper.Dbl2Lng;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToInt64:
+                    id = ReadyToRunHelper.DoubleToInt64;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_DBL2LNG_OVF:
-                    id = ReadyToRunHelper.Dbl2LngOvf;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToInt64_OVF:
+                    id = ReadyToRunHelper.DoubleToInt64Ovf;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT:
-                    id = ReadyToRunHelper.Dbl2UInt;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToUInt32:
+                    id = ReadyToRunHelper.DoubleToUInt32;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT_OVF:
-                    id = ReadyToRunHelper.Dbl2UIntOvf;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToUInt32_OVF:
+                    id = ReadyToRunHelper.DoubleToUInt32Ovf;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_DBL2ULNG:
-                    id = ReadyToRunHelper.Dbl2ULng;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToUInt64:
+                    id = ReadyToRunHelper.DoubleToUInt64;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_DBL2ULNG_OVF:
-                    id = ReadyToRunHelper.Dbl2ULngOvf;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToUInt64_OVF:
+                    id = ReadyToRunHelper.DoubleToUInt64Ovf;
+                    break;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToInt8:
+                    id = ReadyToRunHelper.DoubleToInt8;
+                    break;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToInt8_OVF:
+                    id = ReadyToRunHelper.DoubleToInt8Ovf;
+                    break;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToInt16:
+                    id = ReadyToRunHelper.DoubleToInt16;
+                    break;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToInt16_OVF:
+                    id = ReadyToRunHelper.DoubleToInt16Ovf;
+                    break;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToUInt8:
+                    id = ReadyToRunHelper.DoubleToUInt8;
+                    break;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToUInt8_OVF:
+                    id = ReadyToRunHelper.DoubleToUInt8Ovf;
+                    break;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToUInt16:
+                    id = ReadyToRunHelper.DoubleToUInt16;
+                    break;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToUInt16_OVF:
+                    id = ReadyToRunHelper.DoubleToUInt16Ovf;
                     break;
 
                 case CorInfoHelpFunc.CORINFO_HELP_FLTREM:
diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs
index 8ec3cf31f332e..9c70602bd6011 100644
--- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs
@@ -14,7 +14,7 @@
 namespace ILCompiler.Reflection.ReadyToRun
 {
     /// <summary>
-    /// This represents all possible signatures that is 
+    /// This represents all possible signatures that is
     /// </summary>
     public abstract class ReadyToRunSignature
     {
@@ -1812,12 +1812,12 @@ private void ParseHelper(StringBuilder builder)
                     builder.Append("LRSZ");
                     break;
 
-                case ReadyToRunHelper.Lng2Dbl:
-                    builder.Append("LNG2DBL");
+                case ReadyToRunHelper.Int64ToDouble:
+                    builder.Append("INT64TODOUBLE");
                     break;
 
-                case ReadyToRunHelper.ULng2Dbl:
-                    builder.Append("ULNG2DBL");
+                case ReadyToRunHelper.UInt64ToDouble:
+                    builder.Append("UINT64TODOUBLE");
                     break;
 
                 // 32-bit division helpers
@@ -1838,36 +1838,68 @@ private void ParseHelper(StringBuilder builder)
                     break;
 
                 // Floating point conversions
-                case ReadyToRunHelper.Dbl2Int:
-                    builder.Append("DBL2INT");
+                case ReadyToRunHelper.DoubleToInt32:
+                    builder.Append("DOUBLETOINT32");
                     break;
 
-                case ReadyToRunHelper.Dbl2IntOvf:
-                    builder.Append("DBL2INTOVF");
+                case ReadyToRunHelper.DoubleToInt32Ovf:
+                    builder.Append("DOUBLETOINT32OVF");
                     break;
 
-                case ReadyToRunHelper.Dbl2Lng:
-                    builder.Append("DBL2LNG");
+                case ReadyToRunHelper.DoubleToInt64:
+                    builder.Append("DOUBLETOINT64");
                     break;
 
-                case ReadyToRunHelper.Dbl2LngOvf:
-                    builder.Append("DBL2LNGOVF");
+                case ReadyToRunHelper.DoubleToInt64Ovf:
+                    builder.Append("DOUBLETOINT64OVF");
                     break;
 
-                case ReadyToRunHelper.Dbl2UInt:
-                    builder.Append("DBL2UINT");
+                case ReadyToRunHelper.DoubleToUInt32:
+                    builder.Append("DOUBLETOUINT32");
                     break;
 
-                case ReadyToRunHelper.Dbl2UIntOvf:
-                    builder.Append("DBL2UINTOVF");
+                case ReadyToRunHelper.DoubleToUInt32Ovf:
+                    builder.Append("DOUBLETOUINT32OVF");
                     break;
 
-                case ReadyToRunHelper.Dbl2ULng:
-                    builder.Append("DBL2ULNG");
+                case ReadyToRunHelper.DoubleToUInt64:
+                    builder.Append("DOUBLETOUINT64");
                     break;
 
-                case ReadyToRunHelper.Dbl2ULngOvf:
-                    builder.Append("DBL2ULNGOVF");
+                case ReadyToRunHelper.DoubleToUInt64Ovf:
+                    builder.Append("DOUBLETOUINT64OVF");
+                    break;
+
+                case ReadyToRunHelper.DoubleToInt8:
+                    builder.Append("DOUBLETOINT8");
+                    break;
+
+                case ReadyToRunHelper.DoubleToInt8Ovf:
+                    builder.Append("DOUBLETOINT8OVF");
+                    break;
+
+                case ReadyToRunHelper.DoubleToInt16:
+                    builder.Append("DOUBLETOINT16");
+                    break;
+
+                case ReadyToRunHelper.DoubleToInt16Ovf:
+                    builder.Append("DOUBLETOINT16OVF");
+                    break;
+
+                case ReadyToRunHelper.DoubleToUInt8:
+                    builder.Append("DOUBLETOUINT8");
+                    break;
+
+                case ReadyToRunHelper.DoubleToUInt8Ovf:
+                    builder.Append("DOUBLETOUINT8OVF");
+                    break;
+
+                case ReadyToRunHelper.DoubleToUInt16:
+                    builder.Append("DOUBLETOUINT16");
+                    break;
+
+                case ReadyToRunHelper.DoubleToUInt16Ovf:
+                    builder.Append("DOUBLETOUINT16OVF");
                     break;
 
                 // Floating point ops
diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs
index 6c231eff007cd..3d37496164304 100644
--- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs
+++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs
@@ -561,11 +561,11 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum)
                 case CorInfoHelpFunc.CORINFO_HELP_LRSZ:
                     id = ReadyToRunHelper.LRsz;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_LNG2DBL:
-                    id = ReadyToRunHelper.Lng2Dbl;
+                case CorInfoHelpFunc.CORINFO_HELP_Int64ToDouble:
+                    id = ReadyToRunHelper.Int64ToDouble;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_ULNG2DBL:
-                    id = ReadyToRunHelper.ULng2Dbl;
+                case CorInfoHelpFunc.CORINFO_HELP_UInt64ToDouble:
+                    id = ReadyToRunHelper.UInt64ToDouble;
                     break;
 
                 case CorInfoHelpFunc.CORINFO_HELP_DIV:
@@ -581,29 +581,54 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum)
                     id = ReadyToRunHelper.UMod;
                     break;
 
-                case CorInfoHelpFunc.CORINFO_HELP_DBL2INT:
-                    id = ReadyToRunHelper.Dbl2Int;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToInt32:
+                    id = ReadyToRunHelper.DoubleToInt32;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_DBL2INT_OVF:
-                    id = ReadyToRunHelper.Dbl2IntOvf;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToInt32_OVF:
+                    id = ReadyToRunHelper.DoubleToInt32Ovf;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_DBL2LNG:
-                    id = ReadyToRunHelper.Dbl2Lng;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToInt64:
+                    id = ReadyToRunHelper.DoubleToInt64;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_DBL2LNG_OVF:
-                    id = ReadyToRunHelper.Dbl2LngOvf;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToInt64_OVF:
+                    id = ReadyToRunHelper.DoubleToInt64Ovf;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT:
-                    id = ReadyToRunHelper.Dbl2UInt;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToUInt32:
+                    id = ReadyToRunHelper.DoubleToUInt32;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_DBL2UINT_OVF:
-                    id = ReadyToRunHelper.Dbl2UIntOvf;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToUInt32_OVF:
+                    id = ReadyToRunHelper.DoubleToUInt32Ovf;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_DBL2ULNG:
-                    id = ReadyToRunHelper.Dbl2ULng;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToUInt64:
+                    id = ReadyToRunHelper.DoubleToUInt64;
                     break;
-                case CorInfoHelpFunc.CORINFO_HELP_DBL2ULNG_OVF:
-                    id = ReadyToRunHelper.Dbl2ULngOvf;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToUInt64_OVF:
+                    id = ReadyToRunHelper.DoubleToUInt64Ovf;
+                    break;
+
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToInt8:
+                    id = ReadyToRunHelper.DoubleToInt8;
+                    break;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToInt8_OVF:
+                    id = ReadyToRunHelper.DoubleToInt8Ovf;
+                    break;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToInt16:
+                    id = ReadyToRunHelper.DoubleToInt16;
+                    break;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToInt16_OVF:
+                    id = ReadyToRunHelper.DoubleToInt16Ovf;
+                    break;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToUInt8:
+                    id = ReadyToRunHelper.DoubleToUInt8;
+                    break;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToUInt8_OVF:
+                    id = ReadyToRunHelper.DoubleToUInt8Ovf;
+                    break;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToUInt16:
+                    id = ReadyToRunHelper.DoubleToUInt16;
+                    break;
+                case CorInfoHelpFunc.CORINFO_HELP_DoubleToUInt16_OVF:
+                    id = ReadyToRunHelper.DoubleToUInt16Ovf;
                     break;
 
                 case CorInfoHelpFunc.CORINFO_HELP_FLTREM:
diff --git a/src/coreclr/vm/i386/jithelp.S b/src/coreclr/vm/i386/jithelp.S
index e1733810a1a76..b99af3c0ecbb5 100644
--- a/src/coreclr/vm/i386/jithelp.S
+++ b/src/coreclr/vm/i386/jithelp.S
@@ -551,87 +551,6 @@ LOCAL_LABEL(LRszMORE32):
     ret
 LEAF_END JIT_LRsz, _TEXT
 
-// *********************************************************************/
-//  JIT_Dbl2LngP4x87
-//
-// Purpose:
-//   converts a double to a long truncating toward zero (C semantics)
-//
-// uses stdcall calling conventions
-//
-//   This code is faster on a P4 than the Dbl2Lng code above, but is
-//   slower on a PIII.  Hence we choose this code when on a P4 or above.
-//
-LEAF_ENTRY JIT_Dbl2LngP4x87, _TEXT
-    // get some local space
-    sub 	esp, 8
-
-    #define arg1 [esp + 0x0C]
-    fld     QWORD PTR arg1          // fetch arg
-    fnstcw  WORD PTR arg1           // store FPCW
-    movzx   eax, WORD PTR arg1      // zero extend - wide
-    or      ah, 0x0C                // turn on OE and DE flags
-    mov     DWORD PTR [esp], eax    // store new FPCW bits
-    fldcw   WORD PTR  [esp]         // reload FPCW with new bits
-    fistp   QWORD PTR [esp]         // convert
-
-    // reload FP result
-    mov	    eax, DWORD PTR [esp]
-    mov	    edx, DWORD PTR [esp + 4]
-
-    // reload original FPCW value
-    fldcw   WORD PTR arg1
-    #undef arg1
-
-    // restore stack
-    add     esp, 8
-
-    ret
-LEAF_END JIT_Dbl2LngP4x87, _TEXT
-
-// *********************************************************************/
-//  JIT_Dbl2LngSSE3
-//
-// Purpose:
-//    converts a double to a long truncating toward zero (C semantics)
-//
-//  uses stdcall calling conventions
-//
-//    This code is faster than the above P4 x87 code for Intel processors
-//    equal or later than Core2 and Atom that have SSE3 support
-//
-LEAF_ENTRY JIT_Dbl2LngSSE3, _TEXT
-    // get some local space
-    sub     esp, 8
-
-    fld     QWORD PTR [esp + 0x0C]   // fetch arg
-    fisttp  QWORD PTR [esp]         // convert
-    mov     eax, DWORD PTR [esp]    // reload FP result
-    mov     edx, DWORD PTR [esp + 4]
-
-    // restore stack
-    add     esp, 8
-
-    ret
-LEAF_END JIT_Dbl2LngSSE3, _TEXT
-
-// *********************************************************************/
-// JIT_Dbl2IntSSE2
-//
-// Purpose:
-//  converts a double to a long truncating toward zero (C semantics)
-//
-// uses stdcall calling conventions
-//
-// This code is even faster than the P4 x87 code for Dbl2LongP4x87,
-// but only returns a 32 bit value (only good for int).
-//
-LEAF_ENTRY JIT_Dbl2IntSSE2, _TEXT
-    movsd     xmm0, [esp + 4]
-    cvttsd2si eax, xmm0
-    ret
-LEAF_END JIT_Dbl2IntSSE2, _TEXT
-
 // *********************************************************************/
 // JIT_StackProbe
 //
diff --git a/src/coreclr/vm/i386/jithelp.asm b/src/coreclr/vm/i386/jithelp.asm
index 3650b3f2afd6d..5a98257caace1 100644
--- a/src/coreclr/vm/i386/jithelp.asm
+++ b/src/coreclr/vm/i386/jithelp.asm
@@ -36,11 +36,6 @@ JIT_LLsh                        TEXTEQU <_JIT_LLsh@0>
 JIT_LRsh                        TEXTEQU <_JIT_LRsh@0>
 JIT_LRsz                        TEXTEQU <_JIT_LRsz@0>
 JIT_LMul                        TEXTEQU <@JIT_LMul@16>
-JIT_Dbl2LngOvf                  TEXTEQU <@JIT_Dbl2LngOvf@8>
-JIT_Dbl2Lng                     TEXTEQU <@JIT_Dbl2Lng@8>
-JIT_Dbl2IntSSE2                 TEXTEQU <@JIT_Dbl2IntSSE2@8>
-JIT_Dbl2LngP4x87                TEXTEQU <@JIT_Dbl2LngP4x87@8>
-JIT_Dbl2LngSSE3	                TEXTEQU <@JIT_Dbl2LngSSE3@8>
 JIT_InternalThrowFromHelper     TEXTEQU <@JIT_InternalThrowFromHelper@4>
 JIT_WriteBarrierReg_PreGrow     TEXTEQU <_JIT_WriteBarrierReg_PreGrow@0>
 JIT_WriteBarrierReg_PostGrow    TEXTEQU <_JIT_WriteBarrierReg_PostGrow@0>
@@ -635,182 +630,6 @@ LMul_hard:
 
 JIT_LMul ENDP
 
-;*********************************************************************/
-; JIT_Dbl2LngOvf
-
-;Purpose:
-;   converts a double to a long truncating toward zero (C semantics)
-;   with check for overflow
-;
-;       uses stdcall calling conventions
-;
-PUBLIC JIT_Dbl2LngOvf
-JIT_Dbl2LngOvf PROC
-        fnclex
-        fld     qword ptr [esp+4]
-        push    ecx
-        push    ecx
-        fstp    qword ptr [esp]
-        call    JIT_Dbl2Lng
-        mov     ecx,eax
-        fnstsw  ax
-        test    ax,01h
-        jnz     Dbl2LngOvf_throw
-        mov     eax,ecx
-        ret     8
-
-Dbl2LngOvf_throw:
-        mov     ECX, CORINFO_OverflowException_ASM
-        call    JIT_InternalThrowFromHelper
-        ret     8
-JIT_Dbl2LngOvf ENDP
-
-;*********************************************************************/
-; JIT_Dbl2Lng
-
-;Purpose:
-;   converts a double to a long truncating toward zero (C semantics)
-;
-;       uses stdcall calling conventions
-;
-;   note that changing the rounding mode is very expensive.  This
-;   routine basiclly does the truncation sematics without changing
-;   the rounding mode, resulting in a win.
-;
-PUBLIC JIT_Dbl2Lng
-JIT_Dbl2Lng PROC
-        fld qword ptr[ESP+4]            ; fetch arg
-        lea ecx,[esp-8]
-        sub esp,16                      ; allocate frame
-        and ecx,-8                      ; align pointer on boundary of 8
-        fld st(0)                       ; duplciate top of stack
-        fistp qword ptr[ecx]            ; leave arg on stack, also save in temp
-        fild qword ptr[ecx]             ; arg, round(arg) now on stack
-        mov edx,[ecx+4]                 ; high dword of integer
-        mov eax,[ecx]                   ; low dword of integer
-        test eax,eax
-        je integer_QNaN_or_zero
-
-arg_is_not_integer_QNaN:
-        fsubp st(1),st                  ; TOS=d-round(d),
-                                        ; { st(1)=st(1)-st & pop ST }
-        test edx,edx                    ; what's sign of integer
-        jns positive
-                                        ; number is negative
-                                        ; dead cycle
-                                        ; dead cycle
-        fstp dword ptr[ecx]             ; result of subtraction
-        mov ecx,[ecx]                   ; dword of difference(single precision)
-        add esp,16
-        xor ecx,80000000h
-        add ecx,7fffffffh               ; if difference>0 then increment integer
-        adc eax,0                       ; inc eax (add CARRY flag)
-        adc edx,0                       ; propagate carry flag to upper bits
-        ret 8
-
-positive:
-        fstp dword ptr[ecx]             ;17-18 ; result of subtraction
-        mov ecx,[ecx]                   ; dword of difference (single precision)
-        add esp,16
-        add ecx,7fffffffh               ; if difference<0 then decrement integer
-        sbb eax,0                       ; dec eax (subtract CARRY flag)
-        sbb edx,0                       ; propagate carry flag to upper bits
-        ret 8
-
-integer_QNaN_or_zero:
-        test edx,7fffffffh
-        jnz arg_is_not_integer_QNaN
-        fstp st(0)                      ;; pop round(arg)
-        fstp st(0)                      ;; arg
-        add esp,16
-        ret 8
-JIT_Dbl2Lng ENDP
-
-;*********************************************************************/
-; JIT_Dbl2LngP4x87
-
-;Purpose:
-;   converts a double to a long truncating toward zero (C semantics)
-;
-;	uses stdcall calling conventions
-;
-;   This code is faster on a P4 than the Dbl2Lng code above, but is
-;   slower on a PIII.  Hence we choose this code when on a P4 or above.
-;
-PUBLIC JIT_Dbl2LngP4x87
-JIT_Dbl2LngP4x87 PROC
-arg1	equ	<[esp+0Ch]>
-
-    sub 	esp, 8                  ; get some local space
-
-    fld	qword ptr arg1              ; fetch arg
-    fnstcw  word ptr arg1           ; store FPCW
-    movzx   eax, word ptr arg1      ; zero extend - wide
-    or	ah, 0Ch                     ; turn on OE and DE flags
-    mov	dword ptr [esp], eax        ; store new FPCW bits
-    fldcw   word ptr  [esp]         ; reload FPCW with new bits
-    fistp   qword ptr [esp]         ; convert
-    mov	eax, dword ptr [esp]        ; reload FP result
-    mov	edx, dword ptr [esp+4]      ;
-    fldcw   word ptr arg1           ; reload original FPCW value
-
-    add esp, 8                      ; restore stack
-
-    ret	8
-JIT_Dbl2LngP4x87 ENDP
-
-;*********************************************************************/
-; JIT_Dbl2LngSSE3
-
-;Purpose:
-;   converts a double to a long truncating toward zero (C semantics)
-;
-;	uses stdcall calling conventions
-;
-;   This code is faster than the above P4 x87 code for Intel processors
-;   equal or later than Core2 and Atom that have SSE3 support
-;
-.686P
-.XMM
-PUBLIC JIT_Dbl2LngSSE3
-JIT_Dbl2LngSSE3 PROC
-arg1	equ	<[esp+0Ch]>
-
-    sub esp, 8                      ; get some local space
-
-    fld qword ptr arg1              ; fetch arg
-    fisttp qword ptr [esp]          ; convert
-    mov eax, dword ptr [esp]        ; reload FP result
-    mov edx, dword ptr [esp+4]
-
-    add esp, 8                      ; restore stack
-
-    ret	8
-JIT_Dbl2LngSSE3 ENDP
-.586
-
-;*********************************************************************/
-; JIT_Dbl2IntSSE2
-
-;Purpose:
-;   converts a double to a long truncating toward zero (C semantics)
-;
-;	uses stdcall calling conventions
-;
-;   This code is even faster than the P4 x87 code for Dbl2LongP4x87,
-;   but only returns a 32 bit value (only good for int).
-;
-.686P
-.XMM
-PUBLIC JIT_Dbl2IntSSE2
-JIT_Dbl2IntSSE2 PROC
-	$movsd	xmm0, [esp+4]
-	cvttsd2si eax, xmm0
-	ret 8
-JIT_Dbl2IntSSE2 ENDP
-.586
-
-
 ;*********************************************************************/
 ; This is the small write barrier thunk we use when we know the
 ; ephemeral generation is higher in memory than older generations.
diff --git a/src/coreclr/vm/i386/jitinterfacex86.cpp b/src/coreclr/vm/i386/jitinterfacex86.cpp
index 0467f347aaacb..9196b2a5f435b 100644
--- a/src/coreclr/vm/i386/jitinterfacex86.cpp
+++ b/src/coreclr/vm/i386/jitinterfacex86.cpp
@@ -94,26 +94,6 @@ extern "C" void STDCALL WriteBarrierAssert(BYTE* ptr, Object* obj)
 
 #endif // _DEBUG
 
-#ifndef TARGET_UNIX
-
-HCIMPL1_V(INT32, JIT_Dbl2IntOvf, double val)
-{
-    FCALL_CONTRACT;
-
-    INT64 ret = HCCALL1_V(JIT_Dbl2Lng, val);
-
-    if (ret != (INT32) ret)
-        goto THROW;
-
-    return (INT32) ret;
-
-THROW:
-    FCThrow(kOverflowException);
-}
-HCIMPLEND
-#endif // TARGET_UNIX
-
-
 FCDECL1(Object*, JIT_New, CORINFO_CLASS_HANDLE typeHnd_);
 
 
@@ -983,23 +963,6 @@ void InitJITHelpers1()
         popad
     }
 
-    //  If bit 26 (SSE2) is set, then we can use the SSE2 flavors
-    //  and faster x87 implementation for the P4 of Dbl2Lng.
-    if (dwCPUFeaturesEDX & (1<<26))
-    {
-        SetJitHelperFunction(CORINFO_HELP_DBL2INT, JIT_Dbl2IntSSE2);
-        if (dwCPUFeaturesECX & 1)  // check SSE3
-        {
-            SetJitHelperFunction(CORINFO_HELP_DBL2UINT, JIT_Dbl2LngSSE3);
-            SetJitHelperFunction(CORINFO_HELP_DBL2LNG, JIT_Dbl2LngSSE3);
-	}
-        else
-        {
-            SetJitHelperFunction(CORINFO_HELP_DBL2UINT, JIT_Dbl2LngP4x87);   // SSE2 only for signed
-            SetJitHelperFunction(CORINFO_HELP_DBL2LNG, JIT_Dbl2LngP4x87);
-        }
-    }
-
     if (!(TrackAllocationsEnabled()
         || LoggingOn(LF_GCALLOC, LL_INFO10)
 #ifdef _DEBUG
diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index 11b9ff7bf7f5d..e5ac602c9ac81 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -487,29 +487,6 @@ HCIMPLEND
 
 #include <optsmallperfcritical.h>
 
-/*********************************************************************/
-//
-HCIMPL1_V(double, JIT_ULng2Dbl, UINT64 val)
-{
-    FCALL_CONTRACT;
-
-    double conv = (double) ((INT64) val);
-    if (conv < 0)
-        conv += (4294967296.0 * 4294967296.0);  // add 2^64
-    _ASSERTE(conv >= 0);
-    return(conv);
-}
-HCIMPLEND
-
-/*********************************************************************/
-// needed for ARM and RyuJIT-x86
-HCIMPL1_V(double, JIT_Lng2Dbl, INT64 val)
-{
-    FCALL_CONTRACT;
-    return double(val);
-}
-HCIMPLEND
-
 //--------------------------------------------------------------------------
 template <class ftype>
 ftype modftype(ftype value, ftype *iptr);
@@ -542,7 +519,6 @@ ftype BankersRound(ftype value)
                      value);
 }
 
-
 /*********************************************************************/
 // round double to nearest int (as double)
 HCIMPL1_V(double, JIT_DoubleRound, double val)
@@ -562,118 +538,496 @@ HCIMPL1_V(float, JIT_FloatRound, float val)
 HCIMPLEND
 
 /*********************************************************************/
-// Call fast Dbl2Lng conversion - used by functions below
-FORCEINLINE INT64 FastDbl2Lng(double val)
+
+FORCEINLINE double PlatformInt64ToDouble(INT64 val)
 {
-#ifdef TARGET_X86
     FCALL_CONTRACT;
-    return HCCALL1_V(JIT_Dbl2Lng, val);
-#else
+
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return double(val);
+}
+
+HCIMPL1_V(double, JIT_Int64ToDouble, INT64 val)
+{
     FCALL_CONTRACT;
-    return((__int64) val);
-#endif
+
+    // ** NOTE **
+    // This should be kept in sync with FloatingPointUtils::convertInt64ToDouble
+    // This should be kept in sync with RhpInt64ToDouble
+    // ** NOTE **
+
+    return PlatformInt64ToDouble(val);
 }
+HCIMPLEND
 
 /*********************************************************************/
-HCIMPL1_V(UINT32, JIT_Dbl2UIntOvf, double val)
+
+FORCEINLINE double PlatformUInt64ToDouble(UINT64 val)
 {
     FCALL_CONTRACT;
 
-        // Note that this expression also works properly for val = NaN case
-    if (val > -1.0 && val < 4294967296.0)
-        return((UINT32)FastDbl2Lng(val));
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return double(val);
+}
+
+HCIMPL1_V(double, JIT_UInt64ToDouble, UINT64 val)
+{
+    FCALL_CONTRACT;
+
+    // ** NOTE **
+    // This should be kept in sync with FloatingPointUtils::convertUInt64ToDouble
+    // This should be kept in sync with RhpUInt64ToDouble
+    // ** NOTE **
+
+    return PlatformUInt64ToDouble(val);
+}
+HCIMPLEND
+
+/*********************************************************************/
+
+FORCEINLINE INT8 PlatformDoubleToInt8(double val)
+{
+    FCALL_CONTRACT;
+
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return INT8(val);
+}
+
+HCIMPL1_V(INT8, JIT_DoubleToInt8, double val)
+{
+    FCALL_CONTRACT;
+
+    // ** NOTE **
+    // This should be kept in sync with FloatingPointUtils::convertDoubleToInt8
+    // This should be kept in sync with RhpDoubleToInt8
+    // ** NOTE **
+
+    if (_isnan(val)) {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -129.0) {
+        // Too small should saturate to INT8_MIN
+        return INT8_MIN;
+    }
+
+    if (val >= +128.0) {
+        // Too large should saturate to INT8_MAX
+        return INT8_MAX;
+    }
+
+    return PlatformDoubleToInt8(val);
+}
+HCIMPLEND
+
+HCIMPL1_V(int, JIT_DoubleToInt8Ovf, double val)
+{
+    FCALL_CONTRACT;
+
+    if (val > -129.0 && val < 128.0) {
+        // -129.0 and +128.0 are exactly representable
+        // Note that the above condition also works properly for val = NaN case
+        return PlatformDoubleToInt8(val);
+    }
 
     FCThrow(kOverflowException);
 }
 HCIMPLEND
 
 /*********************************************************************/
-HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val)
+
+FORCEINLINE INT16 PlatformDoubleToInt16(double val)
 {
     FCALL_CONTRACT;
 
-    const double two63  = 2147483648.0 * 4294967296.0;
-    UINT64 ret;
-    if (val < two63) {
-        ret = FastDbl2Lng(val);
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return INT16(val);
+}
+
+HCIMPL1_V(INT16, JIT_DoubleToInt16, double val)
+{
+    FCALL_CONTRACT;
+
+    // ** NOTE **
+    // This should be kept in sync with FloatingPointUtils::convertDoubleToInt16
+    // This should be kept in sync with RhpDoubleToInt16
+    // ** NOTE **
+
+    if (_isnan(val)) {
+        // NAN should return 0
+        return 0;
     }
-    else {
-        // subtract 0x8000000000000000, do the convert then add it back again
-        ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000);
+
+    if (val <= -32769.0) {
+        // Too small should saturate to INT16_MIN
+        return INT16_MIN;
     }
-    return ret;
+
+    if (val >= +32768.0) {
+        // Too large should saturate to INT16_MAX
+        return INT16_MAX;
+    }
+
+    return PlatformDoubleToInt16(val);
+}
+HCIMPLEND
+
+HCIMPL1_V(int, JIT_DoubleToInt16Ovf, double val)
+{
+    FCALL_CONTRACT;
+
+    if (val > -32769.0 && val < +32768.0) {
+        // -32769.0 and +32768.0 are exactly representable
+        // Note that the above condition also works properly for val = NaN case
+        return PlatformDoubleToInt16(val);
+    }
+
+    FCThrow(kOverflowException);
+}
+HCIMPLEND
+
+/*********************************************************************/
+
+FORCEINLINE INT32 PlatformDoubleToInt32(double val)
+{
+    FCALL_CONTRACT;
+
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return INT32(val);
+}
+
+HCIMPL1_V(INT32, JIT_DoubleToInt32, double val)
+{
+    FCALL_CONTRACT;
+
+    // ** NOTE **
+    // This should be kept in sync with FloatingPointUtils::convertDoubleToInt32
+    // This should be kept in sync with RhpDoubleToInt32
+    // ** NOTE **
+
+    if (_isnan(val)) {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -2147483649.0) {
+        // Too small should saturate to INT32_MIN
+        return INT32_MIN;
+    }
+
+    if (val >= +2147483648.0) {
+        // Too large should saturate to INT32_MAX
+        return INT32_MAX;
+    }
+
+    return PlatformDoubleToInt32(val);
+}
+HCIMPLEND
+
+HCIMPL1_V(INT32, JIT_DoubleToInt32Ovf, double val)
+{
+    FCALL_CONTRACT;
+
+    if (val > -2147483649.0 && val < +2147483648.0) {
+        // -2147483649.0 and +2147483648.0 are exactly representable
+        // Note that the above condition also works properly for val = NaN case
+        return PlatformDoubleToInt32(val);
+    }
+
+    FCThrow(kOverflowException);
 }
 HCIMPLEND
 
 /*********************************************************************/
-HCIMPL1_V(UINT64, JIT_Dbl2ULngOvf, double val)
+
+FORCEINLINE INT64 PlatformDoubleToInt64(double val)
 {
     FCALL_CONTRACT;
 
-    const double two64  = 4294967296.0 * 4294967296.0;
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return INT64(val);
+}
+
+HCIMPL1_V(INT64, JIT_DoubleToInt64, double val)
+{
+    FCALL_CONTRACT;
+
+    // ** NOTE **
+    // This should be kept in sync with FloatingPointUtils::convertDoubleToInt64
+    // This should be kept in sync with RhpDoubleToInt64
+    // ** NOTE **
+
+    if (_isnan(val)) {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -9223372036854777856.0) {
+        // Too small should saturate to INT64_MIN
+        return INT64_MIN;
+    }
+
+    if (val >= +9223372036854775808.0) {
+        // Too large should saturate to INT64_MAX
+        return INT64_MAX;
+    }
+
+    return PlatformDoubleToInt64(val);
+}
+HCIMPLEND
+
+HCIMPL1_V(INT64, JIT_DoubleToInt64Ovf, double val)
+{
+    FCALL_CONTRACT;
+
+    if (val > -9223372036854777856.0 && val < +9223372036854775808.0) {
+        // +9223372036854775808.0 is exactly representable
+        //
+        // -9223372036854777809.0 however, is not and rounds to -9223372036854777808.0
+        // we use -9223372036854777856.0 instead which is the next representable value smaller
+        // than -9223372036854777808.0
+        //
         // Note that this expression also works properly for val = NaN case
-    if (val > -1.0 && val < two64) {
-        const double two63  = 2147483648.0 * 4294967296.0;
-        UINT64 ret;
-        if (val < two63) {
-            ret = FastDbl2Lng(val);
-        }
-        else {
-            // subtract 0x8000000000000000, do the convert then add it back again
-            ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000);
-        }
-#ifdef _DEBUG
-        // since no overflow can occur, the value always has to be within 1
-        double roundTripVal = HCCALL1_V(JIT_ULng2Dbl, ret);
-        _ASSERTE(val - 1.0 <= roundTripVal && roundTripVal <= val + 1.0);
-#endif // _DEBUG
-        return ret;
+        return PlatformDoubleToInt64(val);
     }
 
     FCThrow(kOverflowException);
 }
 HCIMPLEND
 
+/*********************************************************************/
 
-#if !defined(TARGET_X86) || defined(TARGET_UNIX)
+FORCEINLINE UINT8 PlatformDoubleToUInt8(double val)
+{
+    FCALL_CONTRACT;
+
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return UINT8(val);
+}
+
+HCIMPL1_V(UINT8, JIT_DoubleToUInt8, double val)
+{
+    FCALL_CONTRACT;
+
+    // ** NOTE **
+    // This should be kept in sync with FloatingPointUtils::convertDoubleToUInt8
+    // This should be kept in sync with RhpDoubleToUInt8
+    // ** NOTE **
+
+    if (_isnan(val)) {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -1.0) {
+        // Too small should saturate to UINT8_MIN
+        return UINT8_MIN;
+    }
+
+    if (val >= +256.0) {
+        // Too large should saturate to UINT8_MAX
+        return UINT8_MAX;
+    }
+
+    return PlatformDoubleToUInt8(val);
+}
+HCIMPLEND
 
-HCIMPL1_V(INT64, JIT_Dbl2Lng, double val)
+HCIMPL1_V(UINT8, JIT_DoubleToUInt8Ovf, double val)
 {
     FCALL_CONTRACT;
 
-    return((INT64)val);
+    if (val > -1.0 && val < +256.0) {
+        // -1.0 and +256.0 are exactly representable
+        // Note that the above condition also works properly for val = NaN case
+        return PlatformDoubleToUInt8(val);
+    }
+
+    FCThrow(kOverflowException);
 }
 HCIMPLEND
 
-HCIMPL1_V(int, JIT_Dbl2IntOvf, double val)
+/*********************************************************************/
+
+FORCEINLINE UINT16 PlatformDoubleToUInt16(double val)
 {
     FCALL_CONTRACT;
 
-    const double two31 = 2147483648.0;
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
 
-        // Note that this expression also works properly for val = NaN case
-    if (val > -two31 - 1 && val < two31)
-        return((INT32)val);
+    return UINT16(val);
+}
+
+HCIMPL1_V(UINT16, JIT_DoubleToUInt16, double val)
+{
+    FCALL_CONTRACT;
+
+    // ** NOTE **
+    // This should be kept in sync with FloatingPointUtils::convertDoubleToUInt16
+    // This should be kept in sync with RhpDoubleToUInt16
+    // ** NOTE **
+
+    if (_isnan(val)) {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -1.0) {
+        // Too small should saturate to UINT16_MIN
+        return UINT16_MIN;
+    }
+
+    if (val >= +65536.0) {
+        // Too large should saturate to UINT16_MAX
+        return UINT16_MAX;
+    }
+
+    return PlatformDoubleToUInt16(val);
+}
+HCIMPLEND
+
+HCIMPL1_V(UINT16, JIT_DoubleToUInt16Ovf, double val)
+{
+    FCALL_CONTRACT;
+
+    if (val > -1.0 && val < +65536.0) {
+        // -1.0 and +65536.0 are exactly representable
+        // Note that the above condition also works properly for val = NaN case
+        return PlatformDoubleToUInt16(val);
+    }
 
     FCThrow(kOverflowException);
 }
 HCIMPLEND
 
-HCIMPL1_V(INT64, JIT_Dbl2LngOvf, double val)
+/*********************************************************************/
+
+FORCEINLINE UINT32 PlatformDoubleToUInt32(double val)
 {
     FCALL_CONTRACT;
 
-    const double two63  = 2147483648.0 * 4294967296.0;
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
 
-    // Note that this expression also works properly for val = NaN case
-    // We need to compare with the very next double to two63. 0x402 is epsilon to get us there.
-    if (val > -two63 - 0x402 && val < two63)
-        return((INT64)val);
+    return UINT32(val);
+}
+
+HCIMPL1_V(UINT32, JIT_DoubleToUInt32, double val)
+{
+    FCALL_CONTRACT;
+
+    // ** NOTE **
+    // This should be kept in sync with FloatingPointUtils::convertDoubleToUInt32
+    // This should be kept in sync with RhpDoubleToUInt32
+    // ** NOTE **
+
+    if (_isnan(val)) {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -1.0) {
+        // Too small should saturate to UINT32_MIN
+        return UINT32_MIN;
+    }
+
+    if (val >= +4294967296.0) {
+        // Too large should saturate to UINT32_MAX
+        return UINT32_MAX;
+    }
+
+    return PlatformDoubleToUInt32(val);
+}
+HCIMPLEND
+
+HCIMPL1_V(UINT32, JIT_DoubleToUInt32Ovf, double val)
+{
+    FCALL_CONTRACT;
+
+    if (val > -1.0 && val < +4294967296.0) {
+        // -1.0 and +4294967296.0 are exactly representable
+        // Note that the above condition also works properly for val = NaN case
+        return PlatformDoubleToUInt32(val);
+    }
 
     FCThrow(kOverflowException);
 }
 HCIMPLEND
 
+/*********************************************************************/
+
+FORCEINLINE UINT64 PlatformDoubleToUInt64(double val)
+{
+    FCALL_CONTRACT;
+
+    // Previous versions of compilers have had incorrect implementations here, however
+    // all currently supported compiler implementations are believed to be correct.
+
+    return UINT64(val);
+}
+
+HCIMPL1_V(UINT64, JIT_DoubleToUInt64, double val)
+{
+    FCALL_CONTRACT;
+
+    // ** NOTE **
+    // This should be kept in sync with FloatingPointUtils::convertDoubleToUInt64
+    // This should be kept in sync with RhpDoubleToUInt64
+    // ** NOTE **
+
+    if (_isnan(val)) {
+        // NAN should return 0
+        return 0;
+    }
+
+    if (val <= -1.0) {
+        // Too small should saturate to UINT64_MIN
+        return UINT64_MIN;
+    }
+
+    if (val >= +18446744073709551616.0) {
+        // Too large values should saturate to UINT64_MAX
+        return UINT64_MAX;
+    }
+
+    return PlatformDoubleToUInt64(val);
+}
+HCIMPLEND
+
+HCIMPL1_V(UINT64, JIT_DoubleToUInt64Ovf, double val)
+{
+    FCALL_CONTRACT;
+
+    if (val > -1.0 && val < +18446744073709551616.0) {
+        // -1.0 and +18446744073709551616.0 are exactly representable
+        // Note that the above condition also works properly for val = NaN case
+        return PlatformDoubleToUInt64(val);
+    }
+
+    FCThrow(kOverflowException);
+}
+HCIMPLEND
+
+/*********************************************************************/
+
+#if !defined(TARGET_X86) || defined(TARGET_UNIX)
+
 HCIMPL2_VV(float, JIT_FltRem, float dividend, float divisor)
 {
     FCALL_CONTRACT;
diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h
index 4c35ac5713c0f..96b5143f5fbd5 100644
--- a/src/coreclr/vm/jitinterface.h
+++ b/src/coreclr/vm/jitinterface.h
@@ -315,14 +315,6 @@ EXTERN_C FCDECL2(Object*, JIT_NewArr1OBJ_MP_InlineGetThread, CORINFO_CLASS_HANDL
 
 EXTERN_C FCDECL2_VV(INT64, JIT_LMul, INT64 val1, INT64 val2);
 
-EXTERN_C FCDECL1_V(INT64, JIT_Dbl2Lng, double val);
-EXTERN_C FCDECL1_V(INT64, JIT_Dbl2IntSSE2, double val);
-EXTERN_C FCDECL1_V(INT64, JIT_Dbl2LngP4x87, double val);
-EXTERN_C FCDECL1_V(INT64, JIT_Dbl2LngSSE3, double val);
-EXTERN_C FCDECL1_V(INT64, JIT_Dbl2LngOvf, double val);
-
-EXTERN_C FCDECL1_V(INT32, JIT_Dbl2IntOvf, double val);
-
 EXTERN_C FCDECL2_VV(float, JIT_FltRem, float dividend, float divisor);
 EXTERN_C FCDECL2_VV(double, JIT_DblRem, double dividend, double divisor);
 
diff --git a/src/tests/JIT/CodeGenBringUpTests/DblCast.cs b/src/tests/JIT/CodeGenBringUpTests/DblCast.cs
new file mode 100644
index 0000000000000..230eccefc2431
--- /dev/null
+++ b/src/tests/JIT/CodeGenBringUpTests/DblCast.cs
@@ -0,0 +1,284 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+
+public class BringUpTest_DblCast
+{
+    const int Pass = 100;
+    const int Fail = -1;
+
+    public static int Main()
+    {
+        // Each of the below scenarios tests a given value in both the checked and unchecked contexts. If all scenarios pass
+        // Validate<T>(...) returns 0. Otherwise it returns a positive number for each failed scenario.
+        //
+        // Each conversion group validates the following scenarios:
+        //  * NaN, which should return 0 or overflow
+        //  *
+        //  * NegativeInfinity, which should return T.MinValue or overflow
+        //  * PositiveInfinity, which should return T.MaxValue or overflow
+        //  *
+        //  * The nearest value to T.MinValue which does overflow, which should return T.MinValue
+        //  * The nearest value to T.MaxValue which does overflow, which should return T.MaxValue
+        //  *
+        //  * The nearest value to T.MinValue which does not overflow, which should return T.MinValue
+        //  * The nearest value to T.MaxValue which does not overflow, which should return T.MaxValue
+        //  *
+        //  * T.MinValue, which should return T.MinValue and not overflow
+        //  * T.MaxValue, which should return T.MaxValue and not overflow
+        //  * - Int64/UInt64 are a special case where this will overflow as T.MaxValue is not representable and rounds up to (T.MaxValue + 1)
+        //  *
+        //  * NegativePi, which should return -3 but which will overflow for unsigned values and should return 0 instead
+        //  * PositivePi, which should return +3 and not overflow
+
+        int numFailing = 0;
+
+        // Double -> Int8
+
+        numFailing += Validate<sbyte>(double.NaN, 0, expectsOverflow: true, Unchecked.DoubleToInt8, Checked.DoubleToInt8);
+
+        numFailing += Validate<sbyte>(double.NegativeInfinity, sbyte.MinValue, expectsOverflow: true, Unchecked.DoubleToInt8, Checked.DoubleToInt8);
+        numFailing += Validate<sbyte>(double.PositiveInfinity, sbyte.MaxValue, expectsOverflow: true, Unchecked.DoubleToInt8, Checked.DoubleToInt8);
+
+        numFailing += Validate<sbyte>(-129.0, sbyte.MinValue, expectsOverflow: true, Unchecked.DoubleToInt8, Checked.DoubleToInt8);
+        numFailing += Validate<sbyte>(+128.0, sbyte.MaxValue, expectsOverflow: true, Unchecked.DoubleToInt8, Checked.DoubleToInt8);
+
+        numFailing += Validate<sbyte>(-128.99999999999997, sbyte.MinValue, expectsOverflow: false, Unchecked.DoubleToInt8, Checked.DoubleToInt8);
+        numFailing += Validate<sbyte>(+127.99999999999999, sbyte.MaxValue, expectsOverflow: false, Unchecked.DoubleToInt8, Checked.DoubleToInt8);
+
+        numFailing += Validate<sbyte>(-128.0, sbyte.MinValue, expectsOverflow: false, Unchecked.DoubleToInt8, Checked.DoubleToInt8);
+        numFailing += Validate<sbyte>(+127.0, sbyte.MaxValue, expectsOverflow: false, Unchecked.DoubleToInt8, Checked.DoubleToInt8);
+
+        numFailing += Validate<sbyte>(-Math.PI, -3, expectsOverflow: false, Unchecked.DoubleToInt8, Checked.DoubleToInt8);
+        numFailing += Validate<sbyte>(+Math.PI, +3, expectsOverflow: false, Unchecked.DoubleToInt8, Checked.DoubleToInt8);
+
+        // Double -> Int16
+
+        numFailing += Validate<short>(double.NaN, 0, expectsOverflow: true, Unchecked.DoubleToInt16, Checked.DoubleToInt16);
+
+        numFailing += Validate<short>(double.NegativeInfinity, short.MinValue, expectsOverflow: true, Unchecked.DoubleToInt16, Checked.DoubleToInt16);
+        numFailing += Validate<short>(double.PositiveInfinity, short.MaxValue, expectsOverflow: true, Unchecked.DoubleToInt16, Checked.DoubleToInt16);
+
+        numFailing += Validate<short>(-32769.0, short.MinValue, expectsOverflow: true, Unchecked.DoubleToInt16, Checked.DoubleToInt16);
+        numFailing += Validate<short>(+32768.0, short.MaxValue, expectsOverflow: true, Unchecked.DoubleToInt16, Checked.DoubleToInt16);
+
+        numFailing += Validate<short>(-32768.999999999990, short.MinValue, expectsOverflow: false, Unchecked.DoubleToInt16, Checked.DoubleToInt16);
+        numFailing += Validate<short>(+32767.999999999996, short.MaxValue, expectsOverflow: false, Unchecked.DoubleToInt16, Checked.DoubleToInt16);
+
+        numFailing += Validate<short>(-32768.0, short.MinValue, expectsOverflow: false, Unchecked.DoubleToInt16, Checked.DoubleToInt16);
+        numFailing += Validate<short>(+32767.0, short.MaxValue, expectsOverflow: false, Unchecked.DoubleToInt16, Checked.DoubleToInt16);
+
+        numFailing += Validate<short>(-Math.PI, -3, expectsOverflow: false, Unchecked.DoubleToInt16, Checked.DoubleToInt16);
+        numFailing += Validate<short>(+Math.PI, +3, expectsOverflow: false, Unchecked.DoubleToInt16, Checked.DoubleToInt16);
+
+        // Double -> Int32
+
+        numFailing += Validate<int>(double.NaN, 0, expectsOverflow: true, Unchecked.DoubleToInt32, Checked.DoubleToInt32);
+
+        numFailing += Validate<int>(double.NegativeInfinity, int.MinValue, expectsOverflow: true, Unchecked.DoubleToInt32, Checked.DoubleToInt32);
+        numFailing += Validate<int>(double.PositiveInfinity, int.MaxValue, expectsOverflow: true, Unchecked.DoubleToInt32, Checked.DoubleToInt32);
+
+        numFailing += Validate<int>(-2147483649.0, int.MinValue, expectsOverflow: true, Unchecked.DoubleToInt32, Checked.DoubleToInt32);
+        numFailing += Validate<int>(+2147483648.0, int.MaxValue, expectsOverflow: true, Unchecked.DoubleToInt32, Checked.DoubleToInt32);
+
+        numFailing += Validate<int>(-2147483648.9999995, int.MinValue, expectsOverflow: false, Unchecked.DoubleToInt32, Checked.DoubleToInt32);
+        numFailing += Validate<int>(+2147483647.9999998, int.MaxValue, expectsOverflow: false, Unchecked.DoubleToInt32, Checked.DoubleToInt32);
+
+        numFailing += Validate<int>(-2147483648.0, int.MinValue, expectsOverflow: false, Unchecked.DoubleToInt32, Checked.DoubleToInt32);
+        numFailing += Validate<int>(+2147483647.0, int.MaxValue, expectsOverflow: false, Unchecked.DoubleToInt32, Checked.DoubleToInt32);
+
+        numFailing += Validate<int>(-Math.PI, -3, expectsOverflow: false, Unchecked.DoubleToInt32, Checked.DoubleToInt32);
+        numFailing += Validate<int>(+Math.PI, +3, expectsOverflow: false, Unchecked.DoubleToInt32, Checked.DoubleToInt32);
+
+        // Double -> Int64
+
+        numFailing += Validate<long>(double.NaN, 0, expectsOverflow: true, Unchecked.DoubleToInt64, Checked.DoubleToInt64);
+
+        numFailing += Validate<long>(double.NegativeInfinity, long.MinValue, expectsOverflow: true, Unchecked.DoubleToInt64, Checked.DoubleToInt64);
+        numFailing += Validate<long>(double.PositiveInfinity, long.MaxValue, expectsOverflow: true, Unchecked.DoubleToInt64, Checked.DoubleToInt64);
+
+        numFailing += Validate<long>(-9223372036854777856.0, long.MinValue, expectsOverflow: true, Unchecked.DoubleToInt64, Checked.DoubleToInt64);
+        numFailing += Validate<long>(+9223372036854775808.0, long.MaxValue, expectsOverflow: true, Unchecked.DoubleToInt64, Checked.DoubleToInt64);
+
+        numFailing += Validate<long>(-9223372036854775808.0, long.MinValue, expectsOverflow: false, Unchecked.DoubleToInt64, Checked.DoubleToInt64);
+        numFailing += Validate<long>(+9223372036854774784.0, 9223372036854774784, expectsOverflow: false, Unchecked.DoubleToInt64, Checked.DoubleToInt64);
+
+        numFailing += Validate<long>(-9223372036854775808.0, long.MinValue, expectsOverflow: false, Unchecked.DoubleToInt64, Checked.DoubleToInt64);
+        numFailing += Validate<long>(+9223372036854775807.0, long.MaxValue, expectsOverflow: true, Unchecked.DoubleToInt64, Checked.DoubleToInt64);
+
+        numFailing += Validate<long>(-Math.PI, -3, expectsOverflow: false, Unchecked.DoubleToInt64, Checked.DoubleToInt64);
+        numFailing += Validate<long>(+Math.PI, +3, expectsOverflow: false, Unchecked.DoubleToInt64, Checked.DoubleToInt64);
+
+        // Double -> UInt8
+
+        numFailing += Validate<byte>(double.NegativeInfinity, byte.MinValue, expectsOverflow: true, Unchecked.DoubleToUInt8, Checked.DoubleToUInt8);
+        numFailing += Validate<byte>(double.NaN, 0, expectsOverflow: true, Unchecked.DoubleToUInt8, Checked.DoubleToUInt8);
+        numFailing += Validate<byte>(double.PositiveInfinity, byte.MaxValue, expectsOverflow: true, Unchecked.DoubleToUInt8, Checked.DoubleToUInt8);
+
+        numFailing += Validate<byte>(-1.000, byte.MinValue, expectsOverflow: true, Unchecked.DoubleToUInt8, Checked.DoubleToUInt8);
+        numFailing += Validate<byte>(+256.0, byte.MaxValue, expectsOverflow: true, Unchecked.DoubleToUInt8, Checked.DoubleToUInt8);
+
+        numFailing += Validate<byte>(-0.9999999999999999, byte.MinValue, expectsOverflow: false, Unchecked.DoubleToUInt8, Checked.DoubleToUInt8);
+        numFailing += Validate<byte>(+255.99999999999997, byte.MaxValue, expectsOverflow: false, Unchecked.DoubleToUInt8, Checked.DoubleToUInt8);
+
+        numFailing += Validate<byte>(-0.000, byte.MinValue, expectsOverflow: false, Unchecked.DoubleToUInt8, Checked.DoubleToUInt8);
+        numFailing += Validate<byte>(+255.0, byte.MaxValue, expectsOverflow: false, Unchecked.DoubleToUInt8, Checked.DoubleToUInt8);
+
+        numFailing += Validate<byte>(-Math.PI, -0, expectsOverflow: true, Unchecked.DoubleToUInt8, Checked.DoubleToUInt8);
+        numFailing += Validate<byte>(+Math.PI, +3, expectsOverflow: false, Unchecked.DoubleToUInt8, Checked.DoubleToUInt8);
+
+        // Double -> UInt16
+
+        numFailing += Validate<ushort>(double.NaN, 0, expectsOverflow: true, Unchecked.DoubleToUInt16, Checked.DoubleToUInt16);
+
+        numFailing += Validate<ushort>(double.NegativeInfinity, ushort.MinValue, expectsOverflow: true, Unchecked.DoubleToUInt16, Checked.DoubleToUInt16);
+        numFailing += Validate<ushort>(double.PositiveInfinity, ushort.MaxValue, expectsOverflow: true, Unchecked.DoubleToUInt16, Checked.DoubleToUInt16);
+
+        numFailing += Validate<ushort>(-1.00000, ushort.MinValue, expectsOverflow: true, Unchecked.DoubleToUInt16, Checked.DoubleToUInt16);
+        numFailing += Validate<ushort>(+65536.0, ushort.MaxValue, expectsOverflow: true, Unchecked.DoubleToUInt16, Checked.DoubleToUInt16);
+
+        numFailing += Validate<ushort>(-0.9999999999999999, ushort.MinValue, expectsOverflow: false, Unchecked.DoubleToUInt16, Checked.DoubleToUInt16);
+        numFailing += Validate<ushort>(+65535.999999999990, ushort.MaxValue, expectsOverflow: false, Unchecked.DoubleToUInt16, Checked.DoubleToUInt16);
+
+        numFailing += Validate<ushort>(-0.00000, ushort.MinValue, expectsOverflow: false, Unchecked.DoubleToUInt16, Checked.DoubleToUInt16);
+        numFailing += Validate<ushort>(+65535.0, ushort.MaxValue, expectsOverflow: false, Unchecked.DoubleToUInt16, Checked.DoubleToUInt16);
+
+        numFailing += Validate<ushort>(-Math.PI, -0, expectsOverflow: true, Unchecked.DoubleToUInt16, Checked.DoubleToUInt16);
+        numFailing += Validate<ushort>(+Math.PI, +3, expectsOverflow: false, Unchecked.DoubleToUInt16, Checked.DoubleToUInt16);
+
+        // Double -> UInt32
+
+        numFailing += Validate<uint>(double.NaN, 0, expectsOverflow: true, Unchecked.DoubleToUInt32, Checked.DoubleToUInt32);
+
+        numFailing += Validate<uint>(double.NegativeInfinity, uint.MinValue, expectsOverflow: true, Unchecked.DoubleToUInt32, Checked.DoubleToUInt32);
+        numFailing += Validate<uint>(double.PositiveInfinity, uint.MaxValue, expectsOverflow: true, Unchecked.DoubleToUInt32, Checked.DoubleToUInt32);
+
+        numFailing += Validate<uint>(-1.0000000000, uint.MinValue, expectsOverflow: true, Unchecked.DoubleToUInt32, Checked.DoubleToUInt32);
+        numFailing += Validate<uint>(+4294967296.0, uint.MaxValue, expectsOverflow: true, Unchecked.DoubleToUInt32, Checked.DoubleToUInt32);
+
+        numFailing += Validate<uint>(-0.9999999999999999, uint.MinValue, expectsOverflow: false, Unchecked.DoubleToUInt32, Checked.DoubleToUInt32);
+        numFailing += Validate<uint>(+4294967295.9999995, uint.MaxValue, expectsOverflow: false, Unchecked.DoubleToUInt32, Checked.DoubleToUInt32);
+
+        numFailing += Validate<uint>(-0.0000000000, uint.MinValue, expectsOverflow: false, Unchecked.DoubleToUInt32, Checked.DoubleToUInt32);
+        numFailing += Validate<uint>(+4294967295.0, uint.MaxValue, expectsOverflow: false, Unchecked.DoubleToUInt32, Checked.DoubleToUInt32);
+
+        numFailing += Validate<uint>(-Math.PI, -0, expectsOverflow: true, Unchecked.DoubleToUInt32, Checked.DoubleToUInt32);
+        numFailing += Validate<uint>(+Math.PI, +3, expectsOverflow: false, Unchecked.DoubleToUInt32, Checked.DoubleToUInt32);
+
+        // Double -> UInt64
+
+        numFailing += Validate<ulong>(double.NaN, 0, expectsOverflow: true, Unchecked.DoubleToUInt64, Checked.DoubleToUInt64);
+
+        numFailing += Validate<ulong>(double.NegativeInfinity, ulong.MinValue, expectsOverflow: true, Unchecked.DoubleToUInt64, Checked.DoubleToUInt64);
+        numFailing += Validate<ulong>(double.PositiveInfinity, ulong.MaxValue, expectsOverflow: true, Unchecked.DoubleToUInt64, Checked.DoubleToUInt64);
+
+        numFailing += Validate<ulong>(-1.00000000000000000000, ulong.MinValue, expectsOverflow: true, Unchecked.DoubleToUInt64, Checked.DoubleToUInt64);
+        numFailing += Validate<ulong>(+18446744073709551616.0, ulong.MaxValue, expectsOverflow: true, Unchecked.DoubleToUInt64, Checked.DoubleToUInt64);
+
+        numFailing += Validate<ulong>(-0.99999999999999990000, ulong.MinValue, expectsOverflow: false, Unchecked.DoubleToUInt64, Checked.DoubleToUInt64);
+        numFailing += Validate<ulong>(+18446744073709549568.0, 18446744073709549568, expectsOverflow: false, Unchecked.DoubleToUInt64, Checked.DoubleToUInt64);
+
+        numFailing += Validate<ulong>(-0.00000000000000000000, ulong.MinValue, expectsOverflow: false, Unchecked.DoubleToUInt64, Checked.DoubleToUInt64);
+        numFailing += Validate<ulong>(+18446744073709551615.0, ulong.MaxValue, expectsOverflow: true, Unchecked.DoubleToUInt64, Checked.DoubleToUInt64);
+
+        numFailing += Validate<ulong>(-Math.PI, -0, expectsOverflow: true, Unchecked.DoubleToUInt64, Checked.DoubleToUInt64);
+        numFailing += Validate<ulong>(+Math.PI, +3, expectsOverflow: false, Unchecked.DoubleToUInt64, Checked.DoubleToUInt64);
+
+        return (numFailing == 0) ? Pass : Fail;
+    }
+
+    public static int Validate<T>(double x, T expected, bool expectsOverflow, Func<double, T> uncheckedFunc, Func<double, T> checkedFunc)
+        where T : IEquatable<T>
+    {
+        int numFailing = 0;
+
+        T uncheckedResult = uncheckedFunc(x);
+
+        if (!uncheckedResult.Equals(expected))
+        {
+            Console.WriteLine($"Unchecked conversion for Double -> {typeof(T)} failed; Input: {x}; Expected {expected}; Actual {uncheckedResult}");
+            numFailing += 1;
+        }
+
+        var caughtOverflow = false;
+
+        try
+        {
+            T checkedResult = checkedFunc(x);
+            if (!checkedResult.Equals(expected))
+            {
+                Console.WriteLine($"Checked conversion for Double -> {typeof(T)} failed; Input: {x}; Expected {expected}; Actual {checkedResult}");
+                numFailing += 1;
+            }
+        }
+        catch (OverflowException)
+        {
+            caughtOverflow = true;
+        }
+
+        if (caughtOverflow != expectsOverflow)
+        {
+            Console.WriteLine($"Checked conversion for Double -> {typeof(T)} failed; Input: {x}; Expected Overflow {expectsOverflow}; Caught Overflow {caughtOverflow}");
+            numFailing += 1;
+        }
+
+        return numFailing;
+    }
+}
+
+public class Unchecked
+{
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static sbyte DoubleToInt8(double x) => unchecked((sbyte)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static short DoubleToInt16(double x) => unchecked((short)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static int DoubleToInt32(double x) => unchecked((int)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static long DoubleToInt64(double x) => unchecked((long)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static byte DoubleToUInt8(double x) => unchecked((byte)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static ushort DoubleToUInt16(double x) => unchecked((ushort)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static uint DoubleToUInt32(double x) => unchecked((uint)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static ulong DoubleToUInt64(double x) => unchecked((ulong)(x));
+}
+
+public class Checked
+{
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static sbyte DoubleToInt8(double x) => checked((sbyte)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static short DoubleToInt16(double x) => checked((short)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static int DoubleToInt32(double x) => checked((int)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static long DoubleToInt64(double x) => checked((long)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static byte DoubleToUInt8(double x) => checked((byte)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static ushort DoubleToUInt16(double x) => checked((ushort)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static uint DoubleToUInt32(double x) => checked((uint)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static ulong DoubleToUInt64(double x) => checked((ulong)(x));
+}
diff --git a/src/tests/JIT/CodeGenBringUpTests/DblCast_d.csproj b/src/tests/JIT/CodeGenBringUpTests/DblCast_d.csproj
new file mode 100644
index 0000000000000..bf45fc918774e
--- /dev/null
+++ b/src/tests/JIT/CodeGenBringUpTests/DblCast_d.csproj
@@ -0,0 +1,13 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <CLRTestPriority>0</CLRTestPriority>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Full</DebugType>
+    <Optimize>False</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="DblCast.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/CodeGenBringUpTests/DblCast_do.csproj b/src/tests/JIT/CodeGenBringUpTests/DblCast_do.csproj
new file mode 100644
index 0000000000000..8338bd165b9f1
--- /dev/null
+++ b/src/tests/JIT/CodeGenBringUpTests/DblCast_do.csproj
@@ -0,0 +1,13 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <CLRTestPriority>0</CLRTestPriority>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Full</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="DblCast.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/CodeGenBringUpTests/DblCast_r.csproj b/src/tests/JIT/CodeGenBringUpTests/DblCast_r.csproj
new file mode 100644
index 0000000000000..60312f1bf1bfe
--- /dev/null
+++ b/src/tests/JIT/CodeGenBringUpTests/DblCast_r.csproj
@@ -0,0 +1,13 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <CLRTestPriority>0</CLRTestPriority>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>PdbOnly</DebugType>
+    <Optimize>False</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="DblCast.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/CodeGenBringUpTests/DblCast_ro.csproj b/src/tests/JIT/CodeGenBringUpTests/DblCast_ro.csproj
new file mode 100644
index 0000000000000..cdc7cbba09f66
--- /dev/null
+++ b/src/tests/JIT/CodeGenBringUpTests/DblCast_ro.csproj
@@ -0,0 +1,13 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <CLRTestPriority>0</CLRTestPriority>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>PdbOnly</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="DblCast.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/CodeGenBringUpTests/FltCast.cs b/src/tests/JIT/CodeGenBringUpTests/FltCast.cs
new file mode 100644
index 0000000000000..53f5d828ba656
--- /dev/null
+++ b/src/tests/JIT/CodeGenBringUpTests/FltCast.cs
@@ -0,0 +1,284 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+
+public class BringUpTest_FltCast
+{
+    const int Pass = 100;
+    const int Fail = -1;
+
+    public static int Main()
+    {
+        // Each of the below scenarios tests a given value in both the checked and unchecked contexts. If all scenarios pass
+        // Validate<T>(...) returns 0. Otherwise it returns a positive number for each failed scenario.
+        //
+        // Each conversion group validates the following scenarios:
+        //  * NaN, which should return 0 or overflow
+        //  *
+        //  * NegativeInfinity, which should return T.MinValue or overflow
+        //  * PositiveInfinity, which should return T.MaxValue or overflow
+        //  *
+        //  * The nearest value to T.MinValue which does overflow, which should return T.MinValue
+        //  * The nearest value to T.MaxValue which does overflow, which should return T.MaxValue
+        //  *
+        //  * The nearest value to T.MinValue which does not overflow, which should return T.MinValue
+        //  * The nearest value to T.MaxValue which does not overflow, which should return T.MaxValue
+        //  *
+        //  * T.MinValue, which should return T.MinValue and not overflow
+        //  * T.MaxValue, which should return T.MaxValue and not overflow
+        //  * - Int32/Int64/UInt32/UInt64 are a special case where this will overflow as T.MaxValue is not representable and rounds up to (T.MaxValue + 1)
+        //  *
+        //  * NegativePi, which should return -3 but which will overflow for unsigned values and should return 0 instead
+        //  * PositivePi, which should return +3 and not overflow
+
+        int numFailing = 0;
+
+        // Single -> Int8
+
+        numFailing += Validate<sbyte>(float.NaN, 0, expectsOverflow: true, Unchecked.SingleToInt8, Checked.SingleToInt8);
+
+        numFailing += Validate<sbyte>(float.NegativeInfinity, sbyte.MinValue, expectsOverflow: true, Unchecked.SingleToInt8, Checked.SingleToInt8);
+        numFailing += Validate<sbyte>(float.PositiveInfinity, sbyte.MaxValue, expectsOverflow: true, Unchecked.SingleToInt8, Checked.SingleToInt8);
+
+        numFailing += Validate<sbyte>(-129.0f, sbyte.MinValue, expectsOverflow: true, Unchecked.SingleToInt8, Checked.SingleToInt8);
+        numFailing += Validate<sbyte>(+128.0f, sbyte.MaxValue, expectsOverflow: true, Unchecked.SingleToInt8, Checked.SingleToInt8);
+
+        numFailing += Validate<sbyte>(-128.99998f, sbyte.MinValue, expectsOverflow: false, Unchecked.SingleToInt8, Checked.SingleToInt8);
+        numFailing += Validate<sbyte>(+127.99999f, sbyte.MaxValue, expectsOverflow: false, Unchecked.SingleToInt8, Checked.SingleToInt8);
+
+        numFailing += Validate<sbyte>(-128.0f, sbyte.MinValue, expectsOverflow: false, Unchecked.SingleToInt8, Checked.SingleToInt8);
+        numFailing += Validate<sbyte>(+127.0f, sbyte.MaxValue, expectsOverflow: false, Unchecked.SingleToInt8, Checked.SingleToInt8);
+
+        numFailing += Validate<sbyte>(-MathF.PI, -3, expectsOverflow: false, Unchecked.SingleToInt8, Checked.SingleToInt8);
+        numFailing += Validate<sbyte>(+MathF.PI, +3, expectsOverflow: false, Unchecked.SingleToInt8, Checked.SingleToInt8);
+
+        // Single -> Int16
+
+        numFailing += Validate<short>(float.NaN, 0, expectsOverflow: true, Unchecked.SingleToInt16, Checked.SingleToInt16);
+
+        numFailing += Validate<short>(float.NegativeInfinity, short.MinValue, expectsOverflow: true, Unchecked.SingleToInt16, Checked.SingleToInt16);
+        numFailing += Validate<short>(float.PositiveInfinity, short.MaxValue, expectsOverflow: true, Unchecked.SingleToInt16, Checked.SingleToInt16);
+
+        numFailing += Validate<short>(-32769.0f, short.MinValue, expectsOverflow: true, Unchecked.SingleToInt16, Checked.SingleToInt16);
+        numFailing += Validate<short>(+32768.0f, short.MaxValue, expectsOverflow: true, Unchecked.SingleToInt16, Checked.SingleToInt16);
+
+        numFailing += Validate<short>(-32768.996f, short.MinValue, expectsOverflow: false, Unchecked.SingleToInt16, Checked.SingleToInt16);
+        numFailing += Validate<short>(+32767.998f, short.MaxValue, expectsOverflow: false, Unchecked.SingleToInt16, Checked.SingleToInt16);
+
+        numFailing += Validate<short>(-32768.0f, short.MinValue, expectsOverflow: false, Unchecked.SingleToInt16, Checked.SingleToInt16);
+        numFailing += Validate<short>(+32767.0f, short.MaxValue, expectsOverflow: false, Unchecked.SingleToInt16, Checked.SingleToInt16);
+
+        numFailing += Validate<short>(-MathF.PI, -3, expectsOverflow: false, Unchecked.SingleToInt16, Checked.SingleToInt16);
+        numFailing += Validate<short>(+MathF.PI, +3, expectsOverflow: false, Unchecked.SingleToInt16, Checked.SingleToInt16);
+
+        // Single -> Int32
+
+        numFailing += Validate<int>(float.NaN, 0, expectsOverflow: true, Unchecked.SingleToInt32, Checked.SingleToInt32);
+
+        numFailing += Validate<int>(float.NegativeInfinity, int.MinValue, expectsOverflow: true, Unchecked.SingleToInt32, Checked.SingleToInt32);
+        numFailing += Validate<int>(float.PositiveInfinity, int.MaxValue, expectsOverflow: true, Unchecked.SingleToInt32, Checked.SingleToInt32);
+
+        numFailing += Validate<int>(-2147483904.0f, int.MinValue, expectsOverflow: true, Unchecked.SingleToInt32, Checked.SingleToInt32);
+        numFailing += Validate<int>(+2147483648.0f, int.MaxValue, expectsOverflow: true, Unchecked.SingleToInt32, Checked.SingleToInt32);
+
+        numFailing += Validate<int>(-2147483648.0f, int.MinValue, expectsOverflow: false, Unchecked.SingleToInt32, Checked.SingleToInt32);
+        numFailing += Validate<int>(+2147483520.0f, 2147483520, expectsOverflow: false, Unchecked.SingleToInt32, Checked.SingleToInt32);
+
+        numFailing += Validate<int>(-2147483648.0f, int.MinValue, expectsOverflow: false, Unchecked.SingleToInt32, Checked.SingleToInt32);
+        numFailing += Validate<int>(+2147483647.0f, int.MaxValue, expectsOverflow: true, Unchecked.SingleToInt32, Checked.SingleToInt32);
+
+        numFailing += Validate<int>(-MathF.PI, -3, expectsOverflow: false, Unchecked.SingleToInt32, Checked.SingleToInt32);
+        numFailing += Validate<int>(+MathF.PI, +3, expectsOverflow: false, Unchecked.SingleToInt32, Checked.SingleToInt32);
+
+        // Single -> Int64
+
+        numFailing += Validate<long>(float.NaN, 0, expectsOverflow: true, Unchecked.SingleToInt64, Checked.SingleToInt64);
+
+        numFailing += Validate<long>(float.NegativeInfinity, long.MinValue, expectsOverflow: true, Unchecked.SingleToInt64, Checked.SingleToInt64);
+        numFailing += Validate<long>(float.PositiveInfinity, long.MaxValue, expectsOverflow: true, Unchecked.SingleToInt64, Checked.SingleToInt64);
+
+        numFailing += Validate<long>(-9223373136366403584.0f, long.MinValue, expectsOverflow: true, Unchecked.SingleToInt64, Checked.SingleToInt64);
+        numFailing += Validate<long>(+9223372036854775808.0f, long.MaxValue, expectsOverflow: true, Unchecked.SingleToInt64, Checked.SingleToInt64);
+
+        numFailing += Validate<long>(-9223372036854775808.0f, long.MinValue, expectsOverflow: false, Unchecked.SingleToInt64, Checked.SingleToInt64);
+        numFailing += Validate<long>(+9223371487098961920.0f, 9223371487098961920, expectsOverflow: false, Unchecked.SingleToInt64, Checked.SingleToInt64);
+
+        numFailing += Validate<long>(-9223372036854775808.0f, long.MinValue, expectsOverflow: false, Unchecked.SingleToInt64, Checked.SingleToInt64);
+        numFailing += Validate<long>(+9223372036854775807.0f, long.MaxValue, expectsOverflow: true, Unchecked.SingleToInt64, Checked.SingleToInt64);
+
+        numFailing += Validate<long>(-MathF.PI, -3, expectsOverflow: false, Unchecked.SingleToInt64, Checked.SingleToInt64);
+        numFailing += Validate<long>(+MathF.PI, +3, expectsOverflow: false, Unchecked.SingleToInt64, Checked.SingleToInt64);
+
+        // Single -> UInt8
+
+        numFailing += Validate<byte>(float.NegativeInfinity, byte.MinValue, expectsOverflow: true, Unchecked.SingleToUInt8, Checked.SingleToUInt8);
+        numFailing += Validate<byte>(float.NaN, 0, expectsOverflow: true, Unchecked.SingleToUInt8, Checked.SingleToUInt8);
+        numFailing += Validate<byte>(float.PositiveInfinity, byte.MaxValue, expectsOverflow: true, Unchecked.SingleToUInt8, Checked.SingleToUInt8);
+
+        numFailing += Validate<byte>(-1.000f, byte.MinValue, expectsOverflow: true, Unchecked.SingleToUInt8, Checked.SingleToUInt8);
+        numFailing += Validate<byte>(+256.0f, byte.MaxValue, expectsOverflow: true, Unchecked.SingleToUInt8, Checked.SingleToUInt8);
+
+        numFailing += Validate<byte>(-0.99999994f, byte.MinValue, expectsOverflow: false, Unchecked.SingleToUInt8, Checked.SingleToUInt8);
+        numFailing += Validate<byte>(+255.999980f, byte.MaxValue, expectsOverflow: false, Unchecked.SingleToUInt8, Checked.SingleToUInt8);
+
+        numFailing += Validate<byte>(-0.000f, byte.MinValue, expectsOverflow: false, Unchecked.SingleToUInt8, Checked.SingleToUInt8);
+        numFailing += Validate<byte>(+255.0f, byte.MaxValue, expectsOverflow: false, Unchecked.SingleToUInt8, Checked.SingleToUInt8);
+
+        numFailing += Validate<byte>(-MathF.PI, -0, expectsOverflow: true, Unchecked.SingleToUInt8, Checked.SingleToUInt8);
+        numFailing += Validate<byte>(+MathF.PI, +3, expectsOverflow: false, Unchecked.SingleToUInt8, Checked.SingleToUInt8);
+
+        // Single -> UInt16
+
+        numFailing += Validate<ushort>(float.NaN, 0, expectsOverflow: true, Unchecked.SingleToUInt16, Checked.SingleToUInt16);
+
+        numFailing += Validate<ushort>(float.NegativeInfinity, ushort.MinValue, expectsOverflow: true, Unchecked.SingleToUInt16, Checked.SingleToUInt16);
+        numFailing += Validate<ushort>(float.PositiveInfinity, ushort.MaxValue, expectsOverflow: true, Unchecked.SingleToUInt16, Checked.SingleToUInt16);
+
+        numFailing += Validate<ushort>(-1.00000f, ushort.MinValue, expectsOverflow: true, Unchecked.SingleToUInt16, Checked.SingleToUInt16);
+        numFailing += Validate<ushort>(+65536.0f, ushort.MaxValue, expectsOverflow: true, Unchecked.SingleToUInt16, Checked.SingleToUInt16);
+
+        numFailing += Validate<ushort>(-0.99999994f, ushort.MinValue, expectsOverflow: false, Unchecked.SingleToUInt16, Checked.SingleToUInt16);
+        numFailing += Validate<ushort>(+65535.9960f, ushort.MaxValue, expectsOverflow: false, Unchecked.SingleToUInt16, Checked.SingleToUInt16);
+
+        numFailing += Validate<ushort>(-0.00000f, ushort.MinValue, expectsOverflow: false, Unchecked.SingleToUInt16, Checked.SingleToUInt16);
+        numFailing += Validate<ushort>(+65535.0f, ushort.MaxValue, expectsOverflow: false, Unchecked.SingleToUInt16, Checked.SingleToUInt16);
+
+        numFailing += Validate<ushort>(-MathF.PI, -0, expectsOverflow: true, Unchecked.SingleToUInt16, Checked.SingleToUInt16);
+        numFailing += Validate<ushort>(+MathF.PI, +3, expectsOverflow: false, Unchecked.SingleToUInt16, Checked.SingleToUInt16);
+
+        // Single -> UInt32
+
+        numFailing += Validate<uint>(float.NaN, 0, expectsOverflow: true, Unchecked.SingleToUInt32, Checked.SingleToUInt32);
+
+        numFailing += Validate<uint>(float.NegativeInfinity, uint.MinValue, expectsOverflow: true, Unchecked.SingleToUInt32, Checked.SingleToUInt32);
+        numFailing += Validate<uint>(float.PositiveInfinity, uint.MaxValue, expectsOverflow: true, Unchecked.SingleToUInt32, Checked.SingleToUInt32);
+
+        numFailing += Validate<uint>(-1.0000000000f, uint.MinValue, expectsOverflow: true, Unchecked.SingleToUInt32, Checked.SingleToUInt32);
+        numFailing += Validate<uint>(+4294967296.0f, uint.MaxValue, expectsOverflow: true, Unchecked.SingleToUInt32, Checked.SingleToUInt32);
+
+        numFailing += Validate<uint>(-0.9999999400f, uint.MinValue, expectsOverflow: false, Unchecked.SingleToUInt32, Checked.SingleToUInt32);
+        numFailing += Validate<uint>(+4294967040.0f, 4294967040, expectsOverflow: false, Unchecked.SingleToUInt32, Checked.SingleToUInt32);
+
+        numFailing += Validate<uint>(-0.0000000000f, uint.MinValue, expectsOverflow: false, Unchecked.SingleToUInt32, Checked.SingleToUInt32);
+        numFailing += Validate<uint>(+4294967295.0f, uint.MaxValue, expectsOverflow: true, Unchecked.SingleToUInt32, Checked.SingleToUInt32);
+
+        numFailing += Validate<uint>(-MathF.PI, -0, expectsOverflow: true, Unchecked.SingleToUInt32, Checked.SingleToUInt32);
+        numFailing += Validate<uint>(+MathF.PI, +3, expectsOverflow: false, Unchecked.SingleToUInt32, Checked.SingleToUInt32);
+
+        // Single -> UInt64
+
+        numFailing += Validate<ulong>(float.NaN, 0, expectsOverflow: true, Unchecked.SingleToUInt64, Checked.SingleToUInt64);
+
+        numFailing += Validate<ulong>(float.NegativeInfinity, ulong.MinValue, expectsOverflow: true, Unchecked.SingleToUInt64, Checked.SingleToUInt64);
+        numFailing += Validate<ulong>(float.PositiveInfinity, ulong.MaxValue, expectsOverflow: true, Unchecked.SingleToUInt64, Checked.SingleToUInt64);
+
+        numFailing += Validate<ulong>(-1.00000000000000000000f, ulong.MinValue, expectsOverflow: true, Unchecked.SingleToUInt64, Checked.SingleToUInt64);
+        numFailing += Validate<ulong>(+18446744073709551616.0f, ulong.MaxValue, expectsOverflow: true, Unchecked.SingleToUInt64, Checked.SingleToUInt64);
+
+        numFailing += Validate<ulong>(-0.99999994000000000000f, ulong.MinValue, expectsOverflow: false, Unchecked.SingleToUInt64, Checked.SingleToUInt64);
+        numFailing += Validate<ulong>(+18446742974197923840.0f, 18446742974197923840, expectsOverflow: false, Unchecked.SingleToUInt64, Checked.SingleToUInt64);
+
+        numFailing += Validate<ulong>(-0.00000000000000000000f, ulong.MinValue, expectsOverflow: false, Unchecked.SingleToUInt64, Checked.SingleToUInt64);
+        numFailing += Validate<ulong>(+18446744073709551615.0f, ulong.MaxValue, expectsOverflow: true, Unchecked.SingleToUInt64, Checked.SingleToUInt64);
+
+        numFailing += Validate<ulong>(-MathF.PI, -0, expectsOverflow: true, Unchecked.SingleToUInt64, Checked.SingleToUInt64);
+        numFailing += Validate<ulong>(+MathF.PI, +3, expectsOverflow: false, Unchecked.SingleToUInt64, Checked.SingleToUInt64);
+
+        return (numFailing == 0) ? Pass : Fail;
+    }
+
+    public static int Validate<T>(float x, T expected, bool expectsOverflow, Func<float, T> uncheckedFunc, Func<float, T> checkedFunc)
+        where T : IEquatable<T>
+    {
+        int numFailing = 0;
+
+        T uncheckedResult = uncheckedFunc(x);
+
+        if (!uncheckedResult.Equals(expected))
+        {
+            Console.WriteLine($"Unchecked conversion for Single -> {typeof(T)} failed; Input: {x}; Expected {expected}; Actual {uncheckedResult}");
+            numFailing += 1;
+        }
+
+        var caughtOverflow = false;
+
+        try
+        {
+            T checkedResult = checkedFunc(x);
+            if (!checkedResult.Equals(expected))
+            {
+                Console.WriteLine($"Checked conversion for Single -> {typeof(T)} failed; Input: {x}; Expected {expected}; Actual {checkedResult}");
+                numFailing += 1;
+            }
+        }
+        catch (OverflowException)
+        {
+            caughtOverflow = true;
+        }
+
+        if (caughtOverflow != expectsOverflow)
+        {
+            Console.WriteLine($"Checked conversion for Single -> {typeof(T)} failed; Input: {x}; Expected Overflow {expectsOverflow}; Caught Overflow {caughtOverflow}");
+            numFailing += 1;
+        }
+
+        return numFailing;
+    }
+}
+
+public class Unchecked
+{
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static sbyte SingleToInt8(float x) => unchecked((sbyte)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static short SingleToInt16(float x) => unchecked((short)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static int SingleToInt32(float x) => unchecked((int)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static long SingleToInt64(float x) => unchecked((long)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static byte SingleToUInt8(float x) => unchecked((byte)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static ushort SingleToUInt16(float x) => unchecked((ushort)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static uint SingleToUInt32(float x) => unchecked((uint)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static ulong SingleToUInt64(float x) => unchecked((ulong)(x));
+}
+
+public class Checked
+{
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static sbyte SingleToInt8(float x) => checked((sbyte)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static short SingleToInt16(float x) => checked((short)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static int SingleToInt32(float x) => checked((int)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static long SingleToInt64(float x) => checked((long)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static byte SingleToUInt8(float x) => checked((byte)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static ushort SingleToUInt16(float x) => checked((ushort)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static uint SingleToUInt32(float x) => checked((uint)(x));
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static ulong SingleToUInt64(float x) => checked((ulong)(x));
+}
diff --git a/src/tests/JIT/CodeGenBringUpTests/FltCast_d.csproj b/src/tests/JIT/CodeGenBringUpTests/FltCast_d.csproj
new file mode 100644
index 0000000000000..eeac5d8d72678
--- /dev/null
+++ b/src/tests/JIT/CodeGenBringUpTests/FltCast_d.csproj
@@ -0,0 +1,13 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <CLRTestPriority>0</CLRTestPriority>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Full</DebugType>
+    <Optimize>False</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="FltCast.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/CodeGenBringUpTests/FltCast_do.csproj b/src/tests/JIT/CodeGenBringUpTests/FltCast_do.csproj
new file mode 100644
index 0000000000000..09059fb274c20
--- /dev/null
+++ b/src/tests/JIT/CodeGenBringUpTests/FltCast_do.csproj
@@ -0,0 +1,13 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <CLRTestPriority>0</CLRTestPriority>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Full</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="FltCast.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/CodeGenBringUpTests/FltCast_r.csproj b/src/tests/JIT/CodeGenBringUpTests/FltCast_r.csproj
new file mode 100644
index 0000000000000..7e2e5edd54076
--- /dev/null
+++ b/src/tests/JIT/CodeGenBringUpTests/FltCast_r.csproj
@@ -0,0 +1,13 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <CLRTestPriority>0</CLRTestPriority>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>PdbOnly</DebugType>
+    <Optimize>False</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="FltCast.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/CodeGenBringUpTests/FltCast_ro.csproj b/src/tests/JIT/CodeGenBringUpTests/FltCast_ro.csproj
new file mode 100644
index 0000000000000..5950ec2504272
--- /dev/null
+++ b/src/tests/JIT/CodeGenBringUpTests/FltCast_ro.csproj
@@ -0,0 +1,13 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <CLRTestPriority>0</CLRTestPriority>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>PdbOnly</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="FltCast.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
index 954fda640816c..2c437af9a6f51 100644
--- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
+++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
@@ -312,7 +312,7 @@ static void TestBitValue(uint value, double? dblValNullable = null, FPtoIntegerC
                 failures++;
                 Console.WriteLine($"Managed.ConvertDoubleToUInt64(dblVal, t) != Native.ConvertDoubleToUInt64(dblVal, t) {t} {value} {dblVal} {Managed.ConvertDoubleToUInt64(dblVal, t)} != {Native.ConvertDoubleToUInt64(dblVal, t)}");
             }
-            
+
             if (t == ManagedConversionRule)
             {
                 if (Managed.ConvertDoubleToInt32(dblVal, FPtoIntegerConversionType.CONVERT_NATIVECOMPILERBEHAVIOR) != Managed.ConvertDoubleToInt32(dblVal, t))
@@ -332,7 +332,7 @@ static void TestBitValue(uint value, double? dblValNullable = null, FPtoIntegerC
                     failures++;
                     Console.WriteLine($"ConvertDoubleToInt64 NativeCompilerBehavior(managed) {t} {value} {dblVal} {Managed.ConvertDoubleToInt64(dblVal, FPtoIntegerConversionType.CONVERT_NATIVECOMPILERBEHAVIOR)} != {Managed.ConvertDoubleToInt64(dblVal, t)}");
                 }
-                
+
                 if (Managed.ConvertDoubleToUInt64(dblVal, FPtoIntegerConversionType.CONVERT_NATIVECOMPILERBEHAVIOR) != Managed.ConvertDoubleToUInt64(dblVal, t))
                 {
                     failures++;
@@ -373,30 +373,18 @@ static void TestBitValue(uint value, double? dblValNullable = null, FPtoIntegerC
 
         static int Main(string[] args)
         {
-            switch (RuntimeInformation.ProcessArchitecture)
-            {
-                case Architecture.X86:
-                case Architecture.X64:
-                    Program.ManagedConversionRule = FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64;
-                    break;
-
-                case Architecture.Arm:
-                    Program.ManagedConversionRule = FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32;
-                    break;
-
-                case Architecture.Arm64:
-                    Program.ManagedConversionRule = FPtoIntegerConversionType.CONVERT_SATURATING;
-                    break;
-            }
-            Console.WriteLine($"Expected managed float behavior is {Program.ManagedConversionRule} Execute with parameter to adjust");
+            Program.ManagedConversionRule = FPtoIntegerConversionType.CONVERT_SATURATING;
+
             if (args.Length > 0)
             {
-                if (!Enum.TryParse(args[0], out ManagedConversionRule))
+                if (!Enum.TryParse(args[0], out Program.ManagedConversionRule))
                 {
                     Console.WriteLine($"Unable to parse {args[0]}");
                     return 1;
                 }
             }
+
+            Console.WriteLine($"Expected managed float behavior is {Program.ManagedConversionRule}");
             Console.WriteLine("Specific test cases");
 
             TestBitValue(0, 9223372036854777856.0);

From 1e61a4037ca4d31e965f80461f8a67b2ae8a6f22 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Sat, 19 Feb 2022 09:41:47 -0800
Subject: [PATCH 2/5] Updating Mono casting behavior to be IEEE 754 compliant
 and to saturate on overflow

---
 src/mono/mono/metadata/icall-signatures.h |   9 +
 src/mono/mono/metadata/jit-icall-reg.h    |  29 +-
 src/mono/mono/mini/aot-runtime.h          |   2 +-
 src/mono/mono/mini/decompose.c            |  17 -
 src/mono/mono/mini/interp/interp.c        | 125 ++---
 src/mono/mono/mini/jit-icalls.c           | 528 +++++++++++++++-------
 src/mono/mono/mini/jit-icalls.h           |  84 +++-
 src/mono/mono/mini/mini-amd64.h           |   4 -
 src/mono/mono/mini/mini-arm.h             |   7 -
 src/mono/mono/mini/mini-arm64.h           |   7 -
 src/mono/mono/mini/mini-mips.h            |   6 -
 src/mono/mono/mini/mini-ppc.h             |   7 -
 src/mono/mono/mini/mini-riscv.h           |   8 -
 src/mono/mono/mini/mini-runtime.c         |  84 ++--
 src/mono/mono/mini/mini-sparc.h           |   6 -
 src/mono/mono/mini/mini-wasm.h            |   2 -
 src/mono/mono/mini/mini-x86.h             |   3 -
 src/mono/mono/utils/mono-math.h           |  91 +++-
 18 files changed, 632 insertions(+), 387 deletions(-)

diff --git a/src/mono/mono/metadata/icall-signatures.h b/src/mono/mono/metadata/icall-signatures.h
index 080d4724d8945..7d429ba48af26 100644
--- a/src/mono/mono/metadata/icall-signatures.h
+++ b/src/mono/mono/metadata/icall-signatures.h
@@ -153,15 +153,22 @@ ICALL_SIG (1, (void))				\
 ICALL_SIG (2, (double, double))			\
 ICALL_SIG (2, (double, int32))			\
 ICALL_SIG (2, (double, long))			\
+ICALL_SIG (2, (double, uint32))			\
+ICALL_SIG (2, (double, ulong))			\
 ICALL_SIG (2, (double, ptr))			\
+ICALL_SIG (2, (float, double))			\
+ICALL_SIG (2, (float, int32))			\
 ICALL_SIG (2, (float, long))			\
 ICALL_SIG (2, (int, obj))			\
 ICALL_SIG (2, (int16, double))			\
 ICALL_SIG (2, (int32, double))			\
+ICALL_SIG (2, (int16, float))			\
+ICALL_SIG (2, (int32, float))			\
 ICALL_SIG (2, (int32, obj))			\
 ICALL_SIG (2, (int32, object))			\
 ICALL_SIG (2, (int8, double))			\
 ICALL_SIG (2, (long, double))			\
+ICALL_SIG (2, (int8, float))			\
 ICALL_SIG (2, (long, float))			\
 ICALL_SIG (2, (obj, ptr))			\
 ICALL_SIG (2, (object, int))			\
@@ -174,9 +181,11 @@ ICALL_SIG (2, (ptr, object))			\
 ICALL_SIG (2, (ptr, ptr))			\
 ICALL_SIG (2, (uint16, double))			\
 ICALL_SIG (2, (uint32, double))			\
+ICALL_SIG (2, (uint16, float))			\
 ICALL_SIG (2, (uint32, float))			\
 ICALL_SIG (2, (uint8, double))			\
 ICALL_SIG (2, (ulong, double))			\
+ICALL_SIG (2, (uint8, float))			\
 ICALL_SIG (2, (ulong, float))			\
 ICALL_SIG (2, (void, int))			\
 ICALL_SIG (2, (void, int32))			\
diff --git a/src/mono/mono/metadata/jit-icall-reg.h b/src/mono/mono/metadata/jit-icall-reg.h
index d4ca204068329..0c3e2e905ad17 100644
--- a/src/mono/mono/metadata/jit-icall-reg.h
+++ b/src/mono/mono/metadata/jit-icall-reg.h
@@ -74,7 +74,13 @@ MONO_JIT_ICALL (__emul_fconv_to_i1)	\
 MONO_JIT_ICALL (__emul_fconv_to_i2)	\
 MONO_JIT_ICALL (__emul_fconv_to_i4)	\
 MONO_JIT_ICALL (__emul_fconv_to_i8)	\
+MONO_JIT_ICALL (__emul_fconv_to_ovf_i1)	\
+MONO_JIT_ICALL (__emul_fconv_to_ovf_i2)	\
+MONO_JIT_ICALL (__emul_fconv_to_ovf_i4)	\
 MONO_JIT_ICALL (__emul_fconv_to_ovf_i8)	\
+MONO_JIT_ICALL (__emul_fconv_to_ovf_u1)	\
+MONO_JIT_ICALL (__emul_fconv_to_ovf_u2)	\
+MONO_JIT_ICALL (__emul_fconv_to_ovf_u4)	\
 MONO_JIT_ICALL (__emul_fconv_to_ovf_u8)	\
 MONO_JIT_ICALL (__emul_fconv_to_r4)	\
 MONO_JIT_ICALL (__emul_fconv_to_u)	\
@@ -87,9 +93,9 @@ MONO_JIT_ICALL (__emul_fmul)	\
 MONO_JIT_ICALL (__emul_fneg)	\
 MONO_JIT_ICALL (__emul_frem)	\
 MONO_JIT_ICALL (__emul_fsub)	\
-MONO_JIT_ICALL (__emul_iconv_to_r_un) \
 MONO_JIT_ICALL (__emul_iconv_to_r4) \
 MONO_JIT_ICALL (__emul_iconv_to_r8) \
+MONO_JIT_ICALL (__emul_iconv_to_r8_un) \
 MONO_JIT_ICALL (__emul_lconv_to_r4) \
 MONO_JIT_ICALL (__emul_lconv_to_r8) \
 MONO_JIT_ICALL (__emul_lconv_to_r8_un) \
@@ -112,11 +118,24 @@ MONO_JIT_ICALL (__emul_op_imul_ovf_un) \
 MONO_JIT_ICALL (__emul_op_imul_ovf_un_oom) \
 MONO_JIT_ICALL (__emul_op_irem) \
 MONO_JIT_ICALL (__emul_op_irem_un) \
-MONO_JIT_ICALL (__emul_rconv_to_i8) \
-MONO_JIT_ICALL (__emul_rconv_to_ovf_i8) \
-MONO_JIT_ICALL (__emul_rconv_to_ovf_u8) \
+MONO_JIT_ICALL (__emul_rconv_to_i)	\
+MONO_JIT_ICALL (__emul_rconv_to_i1)	\
+MONO_JIT_ICALL (__emul_rconv_to_i2)	\
+MONO_JIT_ICALL (__emul_rconv_to_i4)	\
+MONO_JIT_ICALL (__emul_rconv_to_i8)	\
+MONO_JIT_ICALL (__emul_rconv_to_ovf_i1)	\
+MONO_JIT_ICALL (__emul_rconv_to_ovf_i2)	\
+MONO_JIT_ICALL (__emul_rconv_to_ovf_i4)	\
+MONO_JIT_ICALL (__emul_rconv_to_ovf_i8)	\
+MONO_JIT_ICALL (__emul_rconv_to_ovf_u1)	\
+MONO_JIT_ICALL (__emul_rconv_to_ovf_u2)	\
+MONO_JIT_ICALL (__emul_rconv_to_ovf_u4)	\
+MONO_JIT_ICALL (__emul_rconv_to_ovf_u8)	\
+MONO_JIT_ICALL (__emul_rconv_to_u)	\
+MONO_JIT_ICALL (__emul_rconv_to_u1)	\
+MONO_JIT_ICALL (__emul_rconv_to_u2)	\
 MONO_JIT_ICALL (__emul_rconv_to_u4)	\
-MONO_JIT_ICALL (__emul_rconv_to_u8) \
+MONO_JIT_ICALL (__emul_rconv_to_u8)	\
 MONO_JIT_ICALL (__emul_rrem) \
 MONO_JIT_ICALL (cominterop_get_ccw) \
 MONO_JIT_ICALL (cominterop_get_ccw_object) \
diff --git a/src/mono/mono/mini/aot-runtime.h b/src/mono/mono/mini/aot-runtime.h
index 4cb34374fe08b..45df7fd446d55 100644
--- a/src/mono/mono/mini/aot-runtime.h
+++ b/src/mono/mono/mini/aot-runtime.h
@@ -11,7 +11,7 @@
 #include "mini.h"
 
 /* Version number of the AOT file format */
-#define MONO_AOT_FILE_VERSION 183
+#define MONO_AOT_FILE_VERSION 184
 
 #define MONO_AOT_TRAMP_PAGE_SIZE 16384
 
diff --git a/src/mono/mono/mini/decompose.c b/src/mono/mono/mini/decompose.c
index 8e6db0073f589..97b5eb7b048d9 100644
--- a/src/mono/mono/mini/decompose.c
+++ b/src/mono/mono/mini/decompose.c
@@ -520,10 +520,8 @@ mono_decompose_opcode (MonoCompile *cfg, MonoInst *ins)
 		}
 		break;
 	case OP_ICONV_TO_R_UN:
-#ifdef MONO_ARCH_EMULATE_CONV_R8_UN
 		if (!COMPILE_LLVM (cfg))
 			emulate = TRUE;
-#endif
 		break;
 	default:
 		emulate = TRUE;
@@ -698,21 +696,6 @@ mono_decompose_long_opts (MonoCompile *cfg)
 			case OP_LCONV_TO_U:
 				MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, tree->dreg, MONO_LVREG_LS (tree->sreg1));
 				break;
-#ifndef MONO_ARCH_EMULATE_LCONV_TO_R8
-			case OP_LCONV_TO_R8:
-				MONO_EMIT_NEW_BIALU (cfg, OP_LCONV_TO_R8_2, tree->dreg, MONO_LVREG_LS (tree->sreg1), MONO_LVREG_MS (tree->sreg1));
-				break;
-#endif
-#ifndef MONO_ARCH_EMULATE_LCONV_TO_R4
-			case OP_LCONV_TO_R4:
-				MONO_EMIT_NEW_BIALU (cfg, OP_LCONV_TO_R4_2, tree->dreg, MONO_LVREG_LS (tree->sreg1), MONO_LVREG_MS (tree->sreg1));
-				break;
-#endif
-#ifndef MONO_ARCH_EMULATE_LCONV_TO_R8_UN
-			case OP_LCONV_TO_R_UN:
-				MONO_EMIT_NEW_BIALU (cfg, OP_LCONV_TO_R_UN_2, tree->dreg, MONO_LVREG_LS (tree->sreg1), MONO_LVREG_MS (tree->sreg1));
-				break;
-#endif
 			case OP_LCONV_TO_OVF_I1: {
 				MonoBasicBlock *is_negative, *end_label;
 
diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c
index 44d9d8ab55768..62027cf8a501d 100644
--- a/src/mono/mono/mini/interp/interp.c
+++ b/src/mono/mono/mini/interp/interp.c
@@ -5136,18 +5136,11 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_I1_R4)
-			LOCAL_VAR (ip [1], gint32) = (gint8) (gint32) LOCAL_VAR (ip [2], float);
+			LOCAL_VAR (ip [1], gint32) = mono_fconv_i1 (LOCAL_VAR (ip [2], float));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_I1_R8)
-			/* without gint32 cast, C compiler is allowed to use undefined
-			 * behaviour if data.f is bigger than >255. See conv.fpint section
-			 * in C standard:
-			 * > The conversion truncates; that is, the fractional  part
-			 * > is discarded.  The behavior is undefined if the truncated
-			 * > value cannot be represented in the destination type.
-			 * */
-			LOCAL_VAR (ip [1], gint32) = (gint8) (gint32) LOCAL_VAR (ip [2], double);
+			LOCAL_VAR (ip [1], gint32) = mono_fconv_i1 (LOCAL_VAR (ip [2], double));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_U1_I4)
@@ -5159,11 +5152,11 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_U1_R4)
-			LOCAL_VAR (ip [1], gint32) = (guint8) (guint32) LOCAL_VAR (ip [2], float);
+			LOCAL_VAR (ip [1], gint32) = mono_fconv_u1 (LOCAL_VAR (ip [2], float));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_U1_R8)
-			LOCAL_VAR (ip [1], gint32) = (guint8) (guint32) LOCAL_VAR (ip [2], double);
+			LOCAL_VAR (ip [1], gint32) = mono_fconv_u1 (LOCAL_VAR (ip [2], double));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_I2_I4)
@@ -5175,11 +5168,11 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_I2_R4)
-			LOCAL_VAR (ip [1], gint32) = (gint16) (gint32) LOCAL_VAR (ip [2], float);
+			LOCAL_VAR (ip [1], gint32) = mono_fconv_i2 (LOCAL_VAR (ip [2], float));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_I2_R8)
-			LOCAL_VAR (ip [1], gint32) = (gint16) (gint32) LOCAL_VAR (ip [2], double);
+			LOCAL_VAR (ip [1], gint32) = mono_fconv_i2 (LOCAL_VAR (ip [2], double));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_U2_I4)
@@ -5191,35 +5184,27 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_U2_R4)
-			LOCAL_VAR (ip [1], gint32) = (guint16) (guint32) LOCAL_VAR (ip [2], float);
+			LOCAL_VAR (ip [1], gint32) = mono_fconv_u2 (LOCAL_VAR (ip [2], float));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_U2_R8)
-			LOCAL_VAR (ip [1], gint32) = (guint16) (guint32) LOCAL_VAR (ip [2], double);
+			LOCAL_VAR (ip [1], gint32) = mono_fconv_u2 (LOCAL_VAR (ip [2], double));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_I4_R4)
-			LOCAL_VAR (ip [1], gint32) = (gint32) LOCAL_VAR (ip [2], float);
+			LOCAL_VAR (ip [1], gint32) = mono_fconv_i4 (LOCAL_VAR (ip [2], float));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_I4_R8)
-			LOCAL_VAR (ip [1], gint32) = (gint32) LOCAL_VAR (ip [2], double);
+			LOCAL_VAR (ip [1], gint32) = mono_fconv_i4 (LOCAL_VAR (ip [2], double));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_U4_R4)
-#ifdef MONO_ARCH_EMULATE_FCONV_TO_U4
-			LOCAL_VAR (ip [1], gint32) = mono_rconv_u4 (LOCAL_VAR (ip [2], float));
-#else
-			LOCAL_VAR (ip [1], gint32) = (guint32) LOCAL_VAR (ip [2], float);
-#endif
+			LOCAL_VAR (ip [1], gint32) = mono_fconv_u4 (LOCAL_VAR (ip [2], float));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_U4_R8)
-#ifdef MONO_ARCH_EMULATE_FCONV_TO_U4
 			LOCAL_VAR (ip [1], gint32) = mono_fconv_u4 (LOCAL_VAR (ip [2], double));
-#else
-			LOCAL_VAR (ip [1], gint32) = (guint32) LOCAL_VAR (ip [2], double);
-#endif
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_I8_I4)
@@ -5231,51 +5216,43 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_I8_R4)
-			LOCAL_VAR (ip [1], gint64) = (gint64) LOCAL_VAR (ip [2], float);
+			LOCAL_VAR (ip [1], gint64) = mono_fconv_i8 (LOCAL_VAR (ip [2], float));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_I8_R8)
-			LOCAL_VAR (ip [1], gint64) = (gint64) LOCAL_VAR (ip [2], double);
+			LOCAL_VAR (ip [1], gint64) = mono_fconv_i8 (LOCAL_VAR (ip [2], double));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_R4_I4)
-			LOCAL_VAR (ip [1], float) = (float) LOCAL_VAR (ip [2], gint32);
+			LOCAL_VAR (ip [1], float) = mono_conv_to_r4 (LOCAL_VAR (ip [2], gint32));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_R4_I8)
-			LOCAL_VAR (ip [1], float) = (float) LOCAL_VAR (ip [2], gint64);
+			LOCAL_VAR (ip [1], float) = mono_lconv_to_r4 (LOCAL_VAR (ip [2], gint64));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_R4_R8)
-			LOCAL_VAR (ip [1], float) = (float) LOCAL_VAR (ip [2], double);
+			LOCAL_VAR (ip [1], float) = mono_fconv_r4 (LOCAL_VAR (ip [2], double));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_R8_I4)
-			LOCAL_VAR (ip [1], double) = (double) LOCAL_VAR (ip [2], gint32);
+			LOCAL_VAR (ip [1], double) = mono_conv_to_r8 (LOCAL_VAR (ip [2], gint32));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_R8_I8)
-			LOCAL_VAR (ip [1], double) = (double) LOCAL_VAR (ip [2], gint64);
+			LOCAL_VAR (ip [1], double) = mono_lconv_to_r8 (LOCAL_VAR (ip [2], gint64));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_R8_R4)
-			LOCAL_VAR (ip [1], double) = (double) LOCAL_VAR (ip [2], float);
+			LOCAL_VAR (ip [1], double) = LOCAL_VAR (ip [2], float);
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_U8_R4)
-#ifdef MONO_ARCH_EMULATE_FCONV_TO_U8
-			LOCAL_VAR (ip [1], gint64) = mono_rconv_u8 (LOCAL_VAR (ip [2], float));
-#else
-			LOCAL_VAR (ip [1], gint64) = (guint64) LOCAL_VAR (ip [2], float);
-#endif
+			LOCAL_VAR (ip [1], gint64) = mono_fconv_u8 (LOCAL_VAR (ip [2], float));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_U8_R8)
-#ifdef MONO_ARCH_EMULATE_FCONV_TO_U8
 			LOCAL_VAR (ip [1], gint64) = mono_fconv_u8 (LOCAL_VAR (ip [2], double));
-#else
-			LOCAL_VAR (ip [1], gint64) = (guint64) LOCAL_VAR (ip [2], double);
-#endif
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CPOBJ) {
@@ -5588,11 +5565,11 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 			MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_R_UN_I4)
-			LOCAL_VAR (ip [1], double) = (double)LOCAL_VAR (ip [2], guint32);
+			LOCAL_VAR (ip [1], double) = mono_conv_to_r8_un (LOCAL_VAR (ip [2], guint32));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_CONV_R_UN_I8)
-			LOCAL_VAR (ip [1], double) = (double)LOCAL_VAR (ip [2], guint64);
+			LOCAL_VAR (ip [1], double) = mono_lconv_to_r8_un (LOCAL_VAR (ip [2], guint64));
 			ip += 3;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_UNBOX) {
@@ -5842,28 +5819,28 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_U8_R4) {
 			float val = LOCAL_VAR (ip [2], float);
-			if (!mono_try_trunc_u64 (val, (guint64*)(locals + ip [1])))
+			if (!mono_try_trunc_u8 (val, (guint64*)(locals + ip [1])))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
 			ip += 3;
 			MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_U8_R8) {
 			double val = LOCAL_VAR (ip [2], double);
-			if (!mono_try_trunc_u64 (val, (guint64*)(locals + ip [1])))
+			if (!mono_try_trunc_u8 (val, (guint64*)(locals + ip [1])))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
 			ip += 3;
 			MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_I8_R4) {
 			float val = LOCAL_VAR (ip [2], float);
-			if (!mono_try_trunc_i64 (val, (gint64*)(locals + ip [1])))
+			if (!mono_try_trunc_i8 (val, (gint64*)(locals + ip [1])))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
 			ip += 3;
 			MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_I8_R8) {
 			double val = LOCAL_VAR (ip [2], double);
-			if (!mono_try_trunc_i64 (val, (gint64*)(locals + ip [1])))
+			if (!mono_try_trunc_i8 (val, (gint64*)(locals + ip [1])))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
 			ip += 3;
 			MINT_IN_BREAK;
@@ -6174,19 +6151,14 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_I4_R4) {
 			float val = LOCAL_VAR (ip [2], float);
-			double val_r8 = (double)val;
-			if (val_r8 > ((double)G_MININT32 - 1) && val_r8 < ((double)G_MAXINT32 + 1))
-				LOCAL_VAR (ip [1], gint32) = (gint32) val;
-			else
+			if (!mono_try_trunc_i4 (val, (gint32*)(locals + ip [1])))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
 			ip += 3;
 			MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_I4_R8) {
 			double val = LOCAL_VAR (ip [2], double);
-			if (val > ((double)G_MININT32 - 1) && val < ((double)G_MAXINT32 + 1))
-				LOCAL_VAR (ip [1], gint32) = (gint32) val;
-			else
+			if (!mono_try_trunc_i4 (val, (gint32*)(locals + ip [1])))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
 			ip += 3;
 			MINT_IN_BREAK;
@@ -6209,19 +6181,14 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_U4_R4) {
 			float val = LOCAL_VAR (ip [2], float);
-			double val_r8 = val;
-			if (val_r8 > -1.0 && val_r8 < ((double)G_MAXUINT32 + 1))
-				LOCAL_VAR (ip [1], gint32) = (guint32)val;
-			else
+			if (!mono_try_trunc_u4 (val, (guint32*)(locals + ip [1])))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
 			ip += 3;
 			MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_U4_R8) {
 			double val = LOCAL_VAR (ip [2], double);
-			if (val > -1.0 && val < ((double)G_MAXUINT32 + 1))
-				LOCAL_VAR (ip [1], gint32) = (guint32)val;
-			else
+			if (!mono_try_trunc_u4 (val, (guint32*)(locals + ip [1])))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
 			ip += 3;
 			MINT_IN_BREAK;
@@ -6260,18 +6227,14 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_I2_R4) {
 			float val = LOCAL_VAR (ip [2], float);
-			if (val > (G_MININT16 - 1) && val < (G_MAXINT16 + 1))
-				LOCAL_VAR (ip [1], gint32) = (gint16) val;
-			else
+			if (!mono_try_trunc_i2 (val, (gint16*)(locals + ip [1])))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
 			ip += 3;
 			MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_I2_R8) {
 			double val = LOCAL_VAR (ip [2], double);
-			if (val > (G_MININT16 - 1) && val < (G_MAXINT16 + 1))
-				LOCAL_VAR (ip [1], gint32) = (gint16) val;
-			else
+			if (!mono_try_trunc_i2 (val, (gint16*)(locals + ip [1])))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
 			ip += 3;
 			MINT_IN_BREAK;
@@ -6294,18 +6257,14 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_U2_R4) {
 			float val = LOCAL_VAR (ip [2], float);
-			if (val > -1.0f && val < (G_MAXUINT16 + 1))
-				LOCAL_VAR (ip [1], gint32) = (guint16) val;
-			else
+			if (!mono_try_trunc_u2 (val, (guint16*)(locals + ip [1])))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
 			ip += 3;
 			MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_U2_R8) {
 			double val = LOCAL_VAR (ip [2], double);
-			if (val > -1.0 && val < (G_MAXUINT16 + 1))
-				LOCAL_VAR (ip [1], gint32) = (guint16) val;
-			else
+			if (!mono_try_trunc_u2 (val, (guint16*)(locals + ip [1])))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
 			ip += 3;
 			MINT_IN_BREAK;
@@ -6344,18 +6303,14 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_I1_R4) {
 			float val = LOCAL_VAR (ip [2], float);
-			if (val > (G_MININT8 - 1) && val < (G_MAXINT8 + 1))
-				LOCAL_VAR (ip [1], gint32) = (gint8) val;
-			else
+			if (!mono_try_trunc_i1 (val, (gint8*)(locals + ip [1])))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
 			ip += 3;
 			MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_I1_R8) {
 			double val = LOCAL_VAR (ip [2], double);
-			if (val > (G_MININT8 - 1) && val < (G_MAXINT8 + 1))
-				LOCAL_VAR (ip [1], gint32) = (gint8) val;
-			else
+			if (!mono_try_trunc_i1 (val, (gint8*)(locals + ip [1])))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
 			ip += 3;
 			MINT_IN_BREAK;
@@ -6378,18 +6333,14 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_U1_R4) {
 			float val = LOCAL_VAR (ip [2], float);
-			if (val > -1.0f && val < (G_MAXUINT8 + 1))
-				LOCAL_VAR (ip [1], gint32) = (guint8)val;
-			else
+			if (!mono_try_trunc_u1 (val, (guint8*)(locals + ip [1])))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
 			ip += 3;
 			MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_U1_R8) {
 			double val = LOCAL_VAR (ip [2], double);
-			if (val > -1.0 && val < (G_MAXUINT8 + 1))
-				LOCAL_VAR (ip [1], gint32) = (guint8)val;
-			else
+			if (!mono_try_trunc_u1 (val, (guint8*)(locals + ip [1])))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
 			ip += 3;
 			MINT_IN_BREAK;
diff --git a/src/mono/mono/mini/jit-icalls.c b/src/mono/mono/mini/jit-icalls.c
index d40ff1f321cfc..b4f8242a7cc94 100644
--- a/src/mono/mono/mini/jit-icalls.c
+++ b/src/mono/mono/mini/jit-icalls.c
@@ -606,54 +606,6 @@ mono_fneg (double a)
 	return -a;
 }
 
-double
-mono_fconv_r4 (double a)
-{
-	return (float)a;
-}
-
-double
-mono_conv_to_r8 (int a)
-{
-	return (double)a;
-}
-
-double
-mono_conv_to_r4 (int a)
-{
-	return (double)(float)a;
-}
-
-gint8
-mono_fconv_i1 (double a)
-{
-	return (gint8)a;
-}
-
-gint16
-mono_fconv_i2 (double a)
-{
-	return (gint16)a;
-}
-
-gint32
-mono_fconv_i4 (double a)
-{
-	return (gint32)a;
-}
-
-guint8
-mono_fconv_u1 (double a)
-{
-	return (guint8)a;
-}
-
-guint16
-mono_fconv_u2 (double a)
-{
-	return (guint16)a;
-}
-
 gboolean
 mono_fcmp_eq (double a, double b)
 {
@@ -918,185 +870,435 @@ mono_ldtoken_wrapper_generic_shared (MonoImage *image, int token, MonoMethod *me
 	return mono_ldtoken_wrapper (image, token, generic_context);
 }
 
-#ifdef MONO_ARCH_EMULATE_FCONV_TO_U8
-guint64
-mono_fconv_u8 (double v)
+float
+mono_conv_to_r4 (gint32 v)
 {
-#if defined(TARGET_X86) || defined(TARGET_AMD64)
-	const double two63 = 2147483648.0 * 4294967296.0;
-	if (v < two63) {
-		return (gint64)v;
-	} else {
-		return (gint64)(v - two63) + ((guint64)1 << 63);
-	}
-#else
-	if (mono_isinf (v) || mono_isnan (v))
+	return (float)v;
+}
+
+float
+mono_conv_to_r4_un (guint32 v)
+{
+	return (double)v;
+}
+
+double
+mono_conv_to_r8 (gint32 v)
+{
+	return (double)v;
+}
+
+double
+mono_conv_to_r8_un (guint32 v)
+{
+	return (double)v;
+}
+
+gint8
+mono_fconv_i1 (double v)
+{
+	if (mono_isnan (v)) {
+		// NAN should return 0
 		return 0;
-	return (guint64)v;
-#endif
+	}
+
+	if (v <= -129.0) {
+		// Too small should saturate to int8::min
+		return G_MININT8;
+	}
+
+	if (v >= +128.0) {
+		// Too large should saturate to int8::max
+		return G_MAXINT8;
+	}
+
+	return (gint8)v;
 }
 
-guint64
-mono_rconv_u8 (float v)
+gint16
+mono_fconv_i2 (double v)
 {
-#if defined(TARGET_X86) || defined(TARGET_AMD64)
-	const float two63 = 2147483648.0 * 4294967296.0;
-	if (v < two63) {
-		return (gint64)v;
-	} else {
-		return (gint64)(v - two63) + ((guint64)1 << 63);
+	if (mono_isnan (v)) {
+		// NAN should return 0
+		return 0;
 	}
-#else
-	if (mono_isinf (v) || mono_isnan (v))
+
+	if (v <= -32769.0) {
+		// Too small should saturate to int16::min
+		return G_MININT16;
+	}
+
+	if (v >= +32768.0) {
+		// Too large should saturate to int16::max
+		return G_MAXINT16;
+	}
+
+	return (gint16)v;
+}
+
+gint32
+mono_fconv_i4 (double v)
+{
+	if (mono_isnan (v)) {
+		// NAN should return 0
 		return 0;
-	return (guint64)v;
-#endif
+	}
+
+	if (v <= -2147483649.0) {
+		// Too small should saturate to int32::min
+		return G_MININT32;
+	}
+
+	if (v >= +2147483648.0) {
+		// Too large should saturate to int32::max
+		return G_MAXINT32;
+	}
+
+	return (gint32)v;
 }
-#endif
 
-#ifdef MONO_ARCH_EMULATE_FCONV_TO_I8
 gint64
 mono_fconv_i8 (double v)
 {
+	if (mono_isnan (v)) {
+		// NAN should return 0
+		return 0;
+	}
+
+	if (v <= -9223372036854777856.0) {
+		// Too small should saturate to int64::min
+		return G_MININT64;
+	}
+
+	if (v >= +9223372036854775808.0) {
+		// Too large should saturate to int64::max
+		return G_MAXINT64;
+	}
+
 	return (gint64)v;
 }
-#endif
 
-#ifdef MONO_ARCH_EMULATE_FCONV_TO_U4
+float
+mono_fconv_r4 (double v)
+{
+	return (float)v;
+}
+
+guint8
+mono_fconv_u1 (double v)
+{
+	if (mono_isnan (v)) {
+		// NAN should return 0
+		return 0;
+	}
+
+	if (v <= -1.0) {
+		// Too small should saturate to uint8::min
+		return 0; // G_MINUINT8
+	}
+
+	if (v >= +256.0) {
+		// Too large should saturate to uint8::max
+		return G_MAXUINT8;
+	}
+
+	return (guint8)v;
+}
+
+guint16
+mono_fconv_u2 (double v)
+{
+	if (mono_isnan (v)) {
+		// NAN should return 0
+		return 0;
+	}
+
+	if (v <= -1.0) {
+		// Too small should saturate to uint16::min
+		return 0; // G_MINUINT16
+	}
+
+	if (v >= +65536.0) {
+		// Too large should saturate to uint16::max
+		return G_MAXUINT16;
+	}
+
+	return (guint16)v;
+}
+
 guint32
 mono_fconv_u4 (double v)
 {
-	/* MS.NET behaves like this for some reason */
-	if (mono_isinf (v) || mono_isnan (v))
+	if (mono_isnan (v)) {
+		// NAN should return 0
 		return 0;
+	}
+
+	if (v <= -1.0) {
+		// Too small should saturate to uint32::min
+		return 0; // G_MINUINT32
+	}
+
+	if (v >= +4294967296.0) {
+		// Too large should saturate to uint32::max
+		return G_MAXUINT32;
+	}
+
 	return (guint32)v;
 }
 
-guint32
-mono_rconv_u4 (float v)
+guint64
+mono_fconv_u8 (double v)
 {
-	if (mono_isinf (v) || mono_isnan (v))
+	if (mono_isnan (v)) {
+		// NAN should return 0
 		return 0;
-	return (guint32) v;
+	}
+
+	if (v <= -1.0) {
+		// Too small should saturate to uint64::min
+		return 0; // G_MINUINT64
+	}
+
+	if (v >= +18446744073709551616.0) {
+		// Too large vues should saturate to uint64::max
+		return G_MAXUINT64;
+	}
+
+	return (guint64)v;
+}
+
+gint8
+mono_fconv_ovf_i1 (double v)
+{
+	gint8 r;
+	if (mono_try_trunc_i1(v, &r)) {
+		return r;
+	}
+
+	ERROR_DECL (error);
+	mono_error_set_overflow (error);
+	mono_error_set_pending_exception (error);
+	return 0;
+}
+
+gint16
+mono_fconv_ovf_i2 (double v)
+{
+	gint16 r;
+	if (mono_try_trunc_i2(v, &r)) {
+		return r;
+	}
+
+	ERROR_DECL (error);
+	mono_error_set_overflow (error);
+	mono_error_set_pending_exception (error);
+	return 0;
+}
+
+gint32
+mono_fconv_ovf_i4 (double v)
+{
+	gint32 r;
+	if (mono_try_trunc_i4(v, &r)) {
+		return r;
+	}
+
+	ERROR_DECL (error);
+	mono_error_set_overflow (error);
+	mono_error_set_pending_exception (error);
+	return 0;
 }
-#endif
 
 gint64
 mono_fconv_ovf_i8 (double v)
 {
-	const gint64 res = (gint64)v;
+	gint64 r;
+	if (mono_try_trunc_i8(v, &r)) {
+		return r;
+	}
 
-	if (mono_isnan (v) || mono_trunc (v) != res) {
-		ERROR_DECL (error);
-		mono_error_set_overflow (error);
-		mono_error_set_pending_exception (error);
-		return 0;
+	ERROR_DECL (error);
+	mono_error_set_overflow (error);
+	mono_error_set_pending_exception (error);
+	return 0;
+}
+
+guint8
+mono_fconv_ovf_u1 (double v)
+{
+	guint8 r;
+	if (mono_try_trunc_u1(v, &r)) {
+		return r;
 	}
-	return res;
+
+	ERROR_DECL (error);
+	mono_error_set_overflow (error);
+	mono_error_set_pending_exception (error);
+	return 0;
 }
 
-guint64
-mono_fconv_ovf_u8 (double v)
+guint16
+mono_fconv_ovf_u2 (double v)
 {
-	guint64 res;
+	guint16 r;
+	if (mono_try_trunc_u2(v, &r)) {
+		return r;
+	}
 
-/*
- * The soft-float implementation of some ARM devices have a buggy guin64 to double
- * conversion that it looses precision even when the integer if fully representable
- * as a double.
- *
- * This was found with 4294967295ull, converting to double and back looses one bit of precision.
- *
- * To work around this issue we test for value boundaries instead.
- */
-#if defined(__arm__) && defined(MONO_ARCH_SOFT_FLOAT_FALLBACK)
-	if (mono_isnan (v) || !(v >= -0.5 && v <= ULLONG_MAX+0.5)) {
-		ERROR_DECL (error);
-		mono_error_set_overflow (error);
-		mono_error_set_pending_exception (error);
-		return 0;
+	ERROR_DECL (error);
+	mono_error_set_overflow (error);
+	mono_error_set_pending_exception (error);
+	return 0;
+}
+
+guint32
+mono_fconv_ovf_u4 (double v)
+{
+	guint32 r;
+	if (mono_try_trunc_u4(v, &r)) {
+		return r;
 	}
-	res = (guint64)v;
-#else
-	res = (guint64)v;
-	if (mono_isnan (v) || mono_trunc (v) != res) {
-		ERROR_DECL (error);
-		mono_error_set_overflow (error);
-		mono_error_set_pending_exception (error);
-		return 0;
+
+	ERROR_DECL (error);
+	mono_error_set_overflow (error);
+	mono_error_set_pending_exception (error);
+	return 0;
+}
+
+guint64
+mono_fconv_ovf_u8 (double v)
+{
+	guint64 r;
+	if (mono_try_trunc_u8(v, &r)) {
+		return r;
 	}
-#endif
-	return res;
+
+	ERROR_DECL (error);
+	mono_error_set_overflow (error);
+	mono_error_set_pending_exception (error);
+	return 0;
+}
+
+float
+mono_lconv_to_r4 (gint64 v)
+{
+	return (float)v;
+}
+
+float
+mono_lconv_to_r4_un (guint64 v)
+{
+	return (float)v;
+}
+
+double
+mono_lconv_to_r8 (gint64 v)
+{
+	return (double)v;
+}
+
+double
+mono_lconv_to_r8_un (guint64 v)
+{
+	return (double)v;
+}
+
+gint8
+mono_rconv_i1 (float v)
+{
+	return mono_fconv_i1(v);
+}
+
+gint16
+mono_rconv_i2 (float v)
+{
+	return mono_fconv_i2(v);
+}
+
+gint32
+mono_rconv_i4 (float v)
+{
+	return mono_fconv_i4(v);
 }
 
-#ifdef MONO_ARCH_EMULATE_FCONV_TO_I8
 gint64
 mono_rconv_i8 (float v)
 {
-	return (gint64)v;
+	return mono_fconv_i8(v);
 }
-#endif
 
-gint64
-mono_rconv_ovf_i8 (float v)
+guint8
+mono_rconv_u1 (float v)
 {
-	const gint64 res = (gint64)v;
+	return mono_fconv_u1(v);
+}
 
-	if (mono_isnan (v) || mono_trunc (v) != res) {
-		ERROR_DECL (error);
-		mono_error_set_overflow (error);
-		mono_error_set_pending_exception (error);
-		return 0;
-	}
-	return res;
+guint16
+mono_rconv_u2 (float v)
+{
+	return mono_fconv_u2(v);
+}
+
+guint32
+mono_rconv_u4 (float v)
+{
+	return mono_fconv_u4(v);
 }
 
 guint64
-mono_rconv_ovf_u8 (float v)
+mono_rconv_u8 (float v)
 {
-	guint64 res;
+	return mono_fconv_u8(v);
+}
 
-	res = (guint64)v;
-	if (mono_isnan (v) || mono_trunc (v) != res) {
-		ERROR_DECL (error);
-		mono_error_set_overflow (error);
-		mono_error_set_pending_exception (error);
-		return 0;
-	}
-	return res;
+gint8
+mono_rconv_ovf_i1 (float v)
+{
+	return mono_fconv_ovf_i1(v);
 }
 
-#ifdef MONO_ARCH_EMULATE_LCONV_TO_R8
-double
-mono_lconv_to_r8 (gint64 a)
+gint16
+mono_rconv_ovf_i2 (float v)
 {
-	return (double)a;
+	return mono_fconv_ovf_i2(v);
 }
-#endif
 
-#ifdef MONO_ARCH_EMULATE_LCONV_TO_R4
-float
-mono_lconv_to_r4 (gint64 a)
+gint32
+mono_rconv_ovf_i4 (float v)
 {
-	return (float)a;
+	return mono_fconv_ovf_i4(v);
 }
-#endif
 
-#ifdef MONO_ARCH_EMULATE_CONV_R8_UN
-double
-mono_conv_to_r8_un (guint32 a)
+gint64
+mono_rconv_ovf_i8 (float v)
 {
-	return (double)a;
+	return mono_fconv_ovf_i8(v);
 }
-#endif
 
-#ifdef MONO_ARCH_EMULATE_LCONV_TO_R8_UN
-double
-mono_lconv_to_r8_un (guint64 a)
+guint8
+mono_rconv_ovf_u1 (float v)
 {
-	return (double)a;
+	return mono_fconv_ovf_u1(v);
+}
+
+guint16
+mono_rconv_ovf_u2 (float v)
+{
+	return mono_fconv_ovf_u2(v);
+}
+
+guint32
+mono_rconv_ovf_u4 (float v)
+{
+	return mono_fconv_ovf_u4(v);
+}
+
+guint64
+mono_rconv_ovf_u8 (float v)
+{
+	return mono_fconv_ovf_u8(v);
 }
-#endif
 
 #ifdef MONO_ARCH_EMULATE_FREM
 // Wrapper to avoid taking address of overloaded function.
diff --git a/src/mono/mono/mini/jit-icalls.h b/src/mono/mono/mini/jit-icalls.h
index be54726728584..78dd0e8b17cb7 100644
--- a/src/mono/mono/mini/jit-icalls.h
+++ b/src/mono/mono/mini/jit-icalls.h
@@ -76,37 +76,87 @@ ICALL_EXPORT gpointer mono_ldtoken_wrapper (MonoImage *image, int token, MonoGen
 
 ICALL_EXPORT gpointer mono_ldtoken_wrapper_generic_shared (MonoImage *image, int token, MonoMethod *method);
 
-ICALL_EXPORT guint64 mono_fconv_u8 (double v);
+ICALL_EXPORT float mono_conv_to_r4 (gint32 v);
 
-ICALL_EXPORT guint64 mono_rconv_u8 (float v);
+ICALL_EXPORT float mono_conv_to_r4_un (guint32 v);
+
+ICALL_EXPORT double mono_conv_to_r8 (gint32 v);
+
+ICALL_EXPORT double mono_conv_to_r8_un (guint32 v);
+
+ICALL_EXPORT gint8 mono_fconv_i1 (double v);
+
+ICALL_EXPORT gint16 mono_fconv_i2 (double v);
+
+ICALL_EXPORT gint32 mono_fconv_i4 (double v);
 
 ICALL_EXPORT gint64 mono_fconv_i8 (double v);
 
+ICALL_EXPORT float mono_fconv_r4 (double v);
+
+ICALL_EXPORT guint8 mono_fconv_u1 (double v);
+
+ICALL_EXPORT guint16 mono_fconv_u2 (double v);
+
 ICALL_EXPORT guint32 mono_fconv_u4 (double v);
 
-ICALL_EXPORT guint32 mono_rconv_u4 (float v);
+ICALL_EXPORT guint64 mono_fconv_u8 (double v);
+
+ICALL_EXPORT gint8 mono_fconv_ovf_i1 (double v);
+
+ICALL_EXPORT gint16 mono_fconv_ovf_i2 (double v);
+
+ICALL_EXPORT gint32 mono_fconv_ovf_i4 (double v);
 
 ICALL_EXPORT gint64 mono_fconv_ovf_i8 (double v);
 
+ICALL_EXPORT guint8 mono_fconv_ovf_u1 (double v);
+
+ICALL_EXPORT guint16 mono_fconv_ovf_u2 (double v);
+
+ICALL_EXPORT guint32 mono_fconv_ovf_u4 (double v);
+
 ICALL_EXPORT guint64 mono_fconv_ovf_u8 (double v);
 
+ICALL_EXPORT float mono_lconv_to_r4 (gint64 v);
+
+ICALL_EXPORT float mono_lconv_to_r4_un (guint64 v);
+
+ICALL_EXPORT double mono_lconv_to_r8 (gint64 v);
+
+ICALL_EXPORT double mono_lconv_to_r8_un (guint64 v);
+
+ICALL_EXPORT gint8 mono_rconv_i1 (float v);
+
+ICALL_EXPORT gint16 mono_rconv_i2 (float v);
+
+ICALL_EXPORT gint32 mono_rconv_i4 (float v);
+
 ICALL_EXPORT gint64 mono_rconv_i8 (float v);
 
-ICALL_EXPORT gint64 mono_rconv_ovf_i8 (float v);
+ICALL_EXPORT guint8 mono_rconv_u1 (float v);
 
-ICALL_EXPORT guint64 mono_rconv_ovf_u8 (float v);
+ICALL_EXPORT guint16 mono_rconv_u2 (float v);
+
+ICALL_EXPORT guint32 mono_rconv_u4 (float v);
+
+ICALL_EXPORT guint64 mono_rconv_u8 (float v);
 
-ICALL_EXPORT double mono_lconv_to_r8 (gint64 a);
+ICALL_EXPORT gint8 mono_rconv_ovf_i1 (float v);
 
-ICALL_EXPORT double mono_conv_to_r8 (gint32 a);
+ICALL_EXPORT gint16 mono_rconv_ovf_i2 (float v);
 
-ICALL_EXPORT double mono_conv_to_r4 (gint32 a);
+ICALL_EXPORT gint32 mono_rconv_ovf_i4 (float v);
 
-ICALL_EXPORT float mono_lconv_to_r4 (gint64 a);
+ICALL_EXPORT gint64 mono_rconv_ovf_i8 (float v);
+
+ICALL_EXPORT guint8 mono_rconv_ovf_u1 (float v);
 
-ICALL_EXPORT double mono_conv_to_r8_un (guint32 a);
+ICALL_EXPORT guint16 mono_rconv_ovf_u2 (float v);
 
-ICALL_EXPORT double mono_lconv_to_r8_un (guint64 a);
+ICALL_EXPORT guint32 mono_rconv_ovf_u4 (float v);
+
+ICALL_EXPORT guint64 mono_rconv_ovf_u8 (float v);
 
 ICALL_EXPORT gpointer mono_helper_compile_generic_method (MonoObject *obj, MonoMethod *method, gpointer *this_arg);
 
@@ -124,18 +174,6 @@ ICALL_EXPORT double mono_fmul (double a, double b);
 
 ICALL_EXPORT double mono_fneg (double a);
 
-ICALL_EXPORT double mono_fconv_r4 (double a);
-
-ICALL_EXPORT gint8 mono_fconv_i1 (double a);
-
-ICALL_EXPORT gint16 mono_fconv_i2 (double a);
-
-ICALL_EXPORT gint32 mono_fconv_i4 (double a);
-
-ICALL_EXPORT guint8 mono_fconv_u1 (double a);
-
-ICALL_EXPORT guint16 mono_fconv_u2 (double a);
-
 ICALL_EXPORT gboolean mono_fcmp_eq (double a, double b);
 
 ICALL_EXPORT gboolean mono_fcmp_ge (double a, double b);
diff --git a/src/mono/mono/mini/mini-amd64.h b/src/mono/mono/mini/mini-amd64.h
index 8872ece7ee97e..03bc31fe055a0 100644
--- a/src/mono/mono/mini/mini-amd64.h
+++ b/src/mono/mono/mini/mini-amd64.h
@@ -409,10 +409,6 @@ typedef struct {
 #define MONO_ARCH_NO_EMULATE_LONG_SHIFT_OPS
 #define MONO_ARCH_NO_EMULATE_LONG_MUL_OPTS
 
-#define MONO_ARCH_EMULATE_CONV_R8_UN 1
-#define MONO_ARCH_EMULATE_FCONV_TO_U8 1
-// x64 FullAOT+LLVM fails to pass the basic-float tests without this.
-#define MONO_ARCH_EMULATE_FCONV_TO_U4 1
 #define MONO_ARCH_EMULATE_FREM 1
 #define MONO_ARCH_HAVE_IS_INT_OVERFLOW 1
 #define MONO_ARCH_HAVE_INVALIDATE_METHOD 1
diff --git a/src/mono/mono/mini/mini-arm.h b/src/mono/mono/mini/mini-arm.h
index 12581b13992b3..4bbcd5debee89 100644
--- a/src/mono/mono/mini/mini-arm.h
+++ b/src/mono/mono/mini/mini-arm.h
@@ -310,15 +310,8 @@ typedef struct MonoCompileArch {
 	int thunks_size;
 } MonoCompileArch;
 
-#define MONO_ARCH_EMULATE_FCONV_TO_U4 1
-#define MONO_ARCH_EMULATE_FCONV_TO_I8 1
-#define MONO_ARCH_EMULATE_FCONV_TO_U8 1
-#define MONO_ARCH_EMULATE_LCONV_TO_R8 1
-#define MONO_ARCH_EMULATE_LCONV_TO_R4 1
-#define MONO_ARCH_EMULATE_LCONV_TO_R8_UN 1
 #define MONO_ARCH_EMULATE_FREM 1
 #define MONO_ARCH_EMULATE_DIV 1
-#define MONO_ARCH_EMULATE_CONV_R8_UN 1
 #define MONO_ARCH_EMULATE_MUL_OVF 1
 
 #define ARM_FIRST_ARG_REG 0
diff --git a/src/mono/mono/mini/mini-arm64.h b/src/mono/mono/mini/mini-arm64.h
index 8feacf8f81e49..0874135035a4d 100644
--- a/src/mono/mono/mini/mini-arm64.h
+++ b/src/mono/mono/mini/mini-arm64.h
@@ -125,19 +125,12 @@ typedef struct {
 	int thunks_size;
 } MonoCompileArch;
 
-#define MONO_ARCH_EMULATE_FCONV_TO_U4 1
-#define MONO_ARCH_EMULATE_FCONV_TO_U8 1
 #ifdef MONO_ARCH_ILP32
 /* For the watch (starting with series 4), a new ABI is introduced: arm64_32.
  * We can still use the older AOT compiler to produce bitcode, because it's
  * "offset compatible". However, since it is targeting arm7k, it makes certain
  * assumptions that we need to align here. */
-#define MONO_ARCH_EMULATE_FCONV_TO_I8 1
-#define MONO_ARCH_EMULATE_LCONV_TO_R8 1
-#define MONO_ARCH_EMULATE_LCONV_TO_R4 1
-#define MONO_ARCH_EMULATE_LCONV_TO_R8_UN 1
 #define MONO_ARCH_EMULATE_DIV 1
-#define MONO_ARCH_EMULATE_CONV_R8_UN 1
 #else
 #define MONO_ARCH_NO_EMULATE_LONG_SHIFT_OPS 1
 #define MONO_ARCH_NO_EMULATE_LONG_MUL_OPTS 1
diff --git a/src/mono/mono/mini/mini-mips.h b/src/mono/mono/mini/mini-mips.h
index b3a9920adf4e7..bccc3db390a15 100644
--- a/src/mono/mono/mini/mini-mips.h
+++ b/src/mono/mono/mini/mini-mips.h
@@ -229,15 +229,9 @@ typedef struct MonoCompileArch {
 } MonoCompileArch;
 
 #if SIZEOF_REGISTER == 4
-#define MONO_ARCH_EMULATE_FCONV_TO_I8 1
-#define MONO_ARCH_EMULATE_LCONV_TO_R8 1
-#define MONO_ARCH_EMULATE_LCONV_TO_R4 1
-#define MONO_ARCH_EMULATE_LCONV_TO_R8_UN 1
 #define MONO_ARCH_EMULATE_FREM 1
 #endif
 
-#define MONO_ARCH_EMULATE_FCONV_TO_U8 1
-
 /*
  * mips backend misses some instructions that enable emitting of optimal
  * code on other targets and, additionally, the register allocator gets
diff --git a/src/mono/mono/mini/mini-ppc.h b/src/mono/mono/mini/mini-ppc.h
index 88a5415c5a925..8e6b6acf60048 100644
--- a/src/mono/mono/mini/mini-ppc.h
+++ b/src/mono/mono/mini/mini-ppc.h
@@ -97,15 +97,8 @@ typedef struct MonoCompileArch {
 /* 32 and 64 bit AIX use function descriptors */
 #define PPC_USES_FUNCTION_DESCRIPTOR
 #endif
-
-#define MONO_ARCH_EMULATE_FCONV_TO_I8 1
-#define MONO_ARCH_EMULATE_LCONV_TO_R8 1
-#define MONO_ARCH_EMULATE_LCONV_TO_R4 1
 #endif
 
-#define MONO_ARCH_EMULATE_FCONV_TO_U4 1
-#define MONO_ARCH_EMULATE_FCONV_TO_U8 1
-#define MONO_ARCH_EMULATE_LCONV_TO_R8_UN 1
 #define MONO_ARCH_EMULATE_FREM 1
 #define MONO_ARCH_GC_MAPS_SUPPORTED 1
 
diff --git a/src/mono/mono/mini/mini-riscv.h b/src/mono/mono/mini/mini-riscv.h
index 3691718be689c..64bed3f9360cb 100644
--- a/src/mono/mono/mini/mini-riscv.h
+++ b/src/mono/mono/mini/mini-riscv.h
@@ -114,14 +114,6 @@
 
 #endif
 
-#define MONO_ARCH_EMULATE_CONV_R8_UN        (1)
-#define MONO_ARCH_EMULATE_FCONV_TO_U8       (1)
-#define MONO_ARCH_EMULATE_FCONV_TO_U4       (1)
-#define MONO_ARCH_EMULATE_FCONV_TO_I8       (1)
-#define MONO_ARCH_EMULATE_LCONV_TO_R8       (1)
-#define MONO_ARCH_EMULATE_LCONV_TO_R4       (1)
-#define MONO_ARCH_EMULATE_LCONV_TO_R8_UN    (1)
-
 #define MONO_ARCH_NEED_DIV_CHECK (1)
 
 #define MONO_ARCH_HAVE_OP_TAIL_CALL          (1)
diff --git a/src/mono/mono/mini/mini-runtime.c b/src/mono/mono/mini/mini-runtime.c
index e9f466491d983..9964dd96c0cb7 100644
--- a/src/mono/mono/mini/mini-runtime.c
+++ b/src/mono/mono/mini/mini-runtime.c
@@ -4761,36 +4761,64 @@ register_icalls (void)
 	register_opcode_emulation (OP_FDIV, __emul_fdiv, mono_icall_sig_double_double_double, mono_fdiv, FALSE);
 #endif
 
-#ifdef MONO_ARCH_EMULATE_FCONV_TO_U8
-	register_opcode_emulation (OP_FCONV_TO_U8, __emul_fconv_to_u8, mono_icall_sig_ulong_double, mono_fconv_u8, FALSE);
-	register_opcode_emulation (OP_RCONV_TO_U8, __emul_rconv_to_u8, mono_icall_sig_ulong_float, mono_rconv_u8, FALSE);
-#endif
-#ifdef MONO_ARCH_EMULATE_FCONV_TO_U4
+	register_opcode_emulation (OP_FCONV_TO_I1, __emul_fconv_to_i1, mono_icall_sig_int8_double, mono_fconv_i1, FALSE);
+	register_opcode_emulation (OP_FCONV_TO_I2, __emul_fconv_to_i2, mono_icall_sig_int16_double, mono_fconv_i2, FALSE);
+	register_opcode_emulation (OP_FCONV_TO_I4, __emul_fconv_to_i4, mono_icall_sig_int32_double, mono_fconv_i4, FALSE);
+	register_opcode_emulation (OP_FCONV_TO_I8, __emul_fconv_to_i8, mono_icall_sig_long_double, mono_fconv_i8, FALSE);
+	register_opcode_emulation (OP_FCONV_TO_R4, __emul_fconv_to_r4, mono_icall_sig_float_double, mono_fconv_r4, FALSE);
+	register_opcode_emulation (OP_FCONV_TO_U1, __emul_fconv_to_u1, mono_icall_sig_uint8_double, mono_fconv_u1, FALSE);
+	register_opcode_emulation (OP_FCONV_TO_U2, __emul_fconv_to_u2, mono_icall_sig_uint16_double, mono_fconv_u2, FALSE);
 	register_opcode_emulation (OP_FCONV_TO_U4, __emul_fconv_to_u4, mono_icall_sig_uint32_double, mono_fconv_u4, FALSE);
-	register_opcode_emulation (OP_RCONV_TO_U4, __emul_rconv_to_u4, mono_icall_sig_uint32_float, mono_rconv_u4, FALSE);
+	register_opcode_emulation (OP_FCONV_TO_U8, __emul_fconv_to_u8, mono_icall_sig_ulong_double, mono_fconv_u8, FALSE);
+
+#if TARGET_SIZEOF_VOID_P == 4
+	register_opcode_emulation (OP_FCONV_TO_I, __emul_fconv_to_i, mono_icall_sig_int32_double, mono_fconv_i4, FALSE);
+#else
+	register_opcode_emulation (OP_FCONV_TO_I, __emul_fconv_to_i, mono_icall_sig_long_double, mono_fconv_i8, FALSE);
 #endif
+
+	register_opcode_emulation (OP_FCONV_TO_OVF_I1, __emul_fconv_to_ovf_i1, mono_icall_sig_int8_double, mono_fconv_ovf_i1, FALSE);
+	register_opcode_emulation (OP_FCONV_TO_OVF_I2, __emul_fconv_to_ovf_i2, mono_icall_sig_int16_double, mono_fconv_ovf_i2, FALSE);
+	register_opcode_emulation (OP_FCONV_TO_OVF_I4, __emul_fconv_to_ovf_i4, mono_icall_sig_int32_double, mono_fconv_ovf_i4, FALSE);
 	register_opcode_emulation (OP_FCONV_TO_OVF_I8, __emul_fconv_to_ovf_i8, mono_icall_sig_long_double, mono_fconv_ovf_i8, FALSE);
+	register_opcode_emulation (OP_FCONV_TO_OVF_U1, __emul_fconv_to_ovf_u1, mono_icall_sig_uint8_double, mono_fconv_ovf_u1, FALSE);
+	register_opcode_emulation (OP_FCONV_TO_OVF_U2, __emul_fconv_to_ovf_u2, mono_icall_sig_uint16_double, mono_fconv_ovf_u2, FALSE);
+	register_opcode_emulation (OP_FCONV_TO_OVF_U4, __emul_fconv_to_ovf_u4, mono_icall_sig_uint32_double, mono_fconv_ovf_u4, FALSE);
 	register_opcode_emulation (OP_FCONV_TO_OVF_U8, __emul_fconv_to_ovf_u8, mono_icall_sig_ulong_double, mono_fconv_ovf_u8, FALSE);
-	register_opcode_emulation (OP_RCONV_TO_OVF_I8, __emul_rconv_to_ovf_i8, mono_icall_sig_long_float, mono_rconv_ovf_i8, FALSE);
-	register_opcode_emulation (OP_RCONV_TO_OVF_U8, __emul_rconv_to_ovf_u8, mono_icall_sig_ulong_float, mono_rconv_ovf_u8, FALSE);
 
-#ifdef MONO_ARCH_EMULATE_FCONV_TO_I8
-	register_opcode_emulation (OP_FCONV_TO_I8, __emul_fconv_to_i8, mono_icall_sig_long_double, mono_fconv_i8, FALSE);
-	register_opcode_emulation (OP_RCONV_TO_I8, __emul_rconv_to_i8, mono_icall_sig_long_float, mono_rconv_i8, FALSE);
-#endif
+	register_opcode_emulation (OP_ICONV_TO_R4, __emul_iconv_to_r4, mono_icall_sig_float_int32, mono_conv_to_r4, FALSE);
+	register_opcode_emulation (OP_ICONV_TO_R8, __emul_iconv_to_r8, mono_icall_sig_double_int32, mono_conv_to_r8, FALSE);
 
-#ifdef MONO_ARCH_EMULATE_CONV_R8_UN
-	register_opcode_emulation (OP_ICONV_TO_R_UN, __emul_iconv_to_r_un, mono_icall_sig_double_int32, mono_conv_to_r8_un, FALSE);
-#endif
-#ifdef MONO_ARCH_EMULATE_LCONV_TO_R8
-	register_opcode_emulation (OP_LCONV_TO_R8, __emul_lconv_to_r8, mono_icall_sig_double_long, mono_lconv_to_r8, FALSE);
-#endif
-#ifdef MONO_ARCH_EMULATE_LCONV_TO_R4
 	register_opcode_emulation (OP_LCONV_TO_R4, __emul_lconv_to_r4, mono_icall_sig_float_long, mono_lconv_to_r4, FALSE);
+	register_opcode_emulation (OP_LCONV_TO_R8, __emul_lconv_to_r8, mono_icall_sig_double_long, mono_lconv_to_r8, FALSE);
+
+	register_opcode_emulation (OP_ICONV_TO_R_UN, __emul_iconv_to_r8_un, mono_icall_sig_double_uint32, mono_conv_to_r8_un, FALSE);
+	register_opcode_emulation (OP_LCONV_TO_R_UN, __emul_lconv_to_r8_un, mono_icall_sig_double_ulong, mono_lconv_to_r8_un, FALSE);
+	
+	register_opcode_emulation (OP_RCONV_TO_I1, __emul_rconv_to_i1, mono_icall_sig_int8_float, mono_rconv_i1, FALSE);
+	register_opcode_emulation (OP_RCONV_TO_I2, __emul_rconv_to_i2, mono_icall_sig_int16_float, mono_rconv_i2, FALSE);
+	register_opcode_emulation (OP_RCONV_TO_I4, __emul_rconv_to_i4, mono_icall_sig_int32_float, mono_rconv_i4, FALSE);
+	register_opcode_emulation (OP_RCONV_TO_I8, __emul_rconv_to_i8, mono_icall_sig_long_float, mono_rconv_i8, FALSE);
+	register_opcode_emulation (OP_RCONV_TO_U1, __emul_rconv_to_u1, mono_icall_sig_uint8_float, mono_rconv_u1, FALSE);
+	register_opcode_emulation (OP_RCONV_TO_U2, __emul_rconv_to_u2, mono_icall_sig_uint16_float, mono_rconv_u2, FALSE);
+	register_opcode_emulation (OP_RCONV_TO_U4, __emul_rconv_to_u4, mono_icall_sig_uint32_float, mono_rconv_u4, FALSE);
+	register_opcode_emulation (OP_RCONV_TO_U8, __emul_rconv_to_u8, mono_icall_sig_ulong_float, mono_rconv_u8, FALSE);
+
+#if TARGET_SIZEOF_VOID_P == 4
+	register_opcode_emulation (OP_RCONV_TO_I, __emul_rconv_to_i, mono_icall_sig_int32_float, mono_rconv_i4, FALSE);
+#else
+	register_opcode_emulation (OP_RCONV_TO_I, __emul_rconv_to_i, mono_icall_sig_long_float, mono_rconv_i8, FALSE);
 #endif
-#ifdef MONO_ARCH_EMULATE_LCONV_TO_R8_UN
-	register_opcode_emulation (OP_LCONV_TO_R_UN, __emul_lconv_to_r8_un, mono_icall_sig_double_long, mono_lconv_to_r8_un, FALSE);
-#endif
+
+	register_opcode_emulation (OP_RCONV_TO_OVF_I1, __emul_rconv_to_ovf_i1, mono_icall_sig_int8_float, mono_rconv_ovf_i1, FALSE);
+	register_opcode_emulation (OP_RCONV_TO_OVF_I2, __emul_rconv_to_ovf_i2, mono_icall_sig_int16_float, mono_rconv_ovf_i2, FALSE);
+	register_opcode_emulation (OP_RCONV_TO_OVF_I4, __emul_rconv_to_ovf_i4, mono_icall_sig_int32_float, mono_rconv_ovf_i4, FALSE);
+	register_opcode_emulation (OP_RCONV_TO_OVF_I8, __emul_rconv_to_ovf_i8, mono_icall_sig_long_float, mono_rconv_ovf_i8, FALSE);
+	register_opcode_emulation (OP_RCONV_TO_OVF_U1, __emul_rconv_to_ovf_u1, mono_icall_sig_uint8_float, mono_rconv_ovf_u1, FALSE);
+	register_opcode_emulation (OP_RCONV_TO_OVF_U2, __emul_rconv_to_ovf_u2, mono_icall_sig_uint16_float, mono_rconv_ovf_u2, FALSE);
+	register_opcode_emulation (OP_RCONV_TO_OVF_U4, __emul_rconv_to_ovf_u4, mono_icall_sig_uint32_float, mono_rconv_ovf_u4, FALSE);
+	register_opcode_emulation (OP_RCONV_TO_OVF_U8, __emul_rconv_to_ovf_u8, mono_icall_sig_ulong_float, mono_rconv_ovf_u8, FALSE);
+
 #ifdef MONO_ARCH_EMULATE_FREM
 	register_opcode_emulation (OP_FREM, __emul_frem, mono_icall_sig_double_double_double, mono_fmod, FALSE);
 	register_opcode_emulation (OP_RREM, __emul_rrem, mono_icall_sig_float_float_float, fmodf, FALSE);
@@ -4802,18 +4830,6 @@ register_icalls (void)
 		register_opcode_emulation (OP_FADD, __emul_fadd, mono_icall_sig_double_double_double, mono_fadd, FALSE);
 		register_opcode_emulation (OP_FMUL, __emul_fmul, mono_icall_sig_double_double_double, mono_fmul, FALSE);
 		register_opcode_emulation (OP_FNEG, __emul_fneg, mono_icall_sig_double_double, mono_fneg, FALSE);
-		register_opcode_emulation (OP_ICONV_TO_R8, __emul_iconv_to_r8, mono_icall_sig_double_int32, mono_conv_to_r8, FALSE);
-		register_opcode_emulation (OP_ICONV_TO_R4, __emul_iconv_to_r4, mono_icall_sig_double_int32, mono_conv_to_r4, FALSE);
-		register_opcode_emulation (OP_FCONV_TO_R4, __emul_fconv_to_r4, mono_icall_sig_double_double, mono_fconv_r4, FALSE);
-		register_opcode_emulation (OP_FCONV_TO_I1, __emul_fconv_to_i1, mono_icall_sig_int8_double, mono_fconv_i1, FALSE);
-		register_opcode_emulation (OP_FCONV_TO_I2, __emul_fconv_to_i2, mono_icall_sig_int16_double, mono_fconv_i2, FALSE);
-		register_opcode_emulation (OP_FCONV_TO_I4, __emul_fconv_to_i4, mono_icall_sig_int32_double, mono_fconv_i4, FALSE);
-		register_opcode_emulation (OP_FCONV_TO_U1, __emul_fconv_to_u1, mono_icall_sig_uint8_double, mono_fconv_u1, FALSE);
-		register_opcode_emulation (OP_FCONV_TO_U2, __emul_fconv_to_u2, mono_icall_sig_uint16_double, mono_fconv_u2, FALSE);
-
-#if TARGET_SIZEOF_VOID_P == 4
-		register_opcode_emulation (OP_FCONV_TO_I, __emul_fconv_to_i, mono_icall_sig_int32_double, mono_fconv_i4, FALSE);
-#endif
 
 		register_opcode_emulation (OP_FBEQ, __emul_fcmp_eq, mono_icall_sig_uint32_double_double, mono_fcmp_eq, FALSE);
 		register_opcode_emulation (OP_FBLT, __emul_fcmp_lt, mono_icall_sig_uint32_double_double, mono_fcmp_lt, FALSE);
diff --git a/src/mono/mono/mini/mini-sparc.h b/src/mono/mono/mini/mini-sparc.h
index 1d96c2f0360ce..6390c7361f9e7 100644
--- a/src/mono/mono/mini/mini-sparc.h
+++ b/src/mono/mono/mini/mini-sparc.h
@@ -95,12 +95,6 @@ typedef struct MonoCompileArch {
 /*#define MONO_ARCH_SIGSEGV_ON_ALTSTACK*/
 #endif
 
-#define MONO_ARCH_EMULATE_FCONV_TO_U8   1
-#define MONO_ARCH_EMULATE_FCONV_TO_I8   1
-#define MONO_ARCH_EMULATE_LCONV_TO_R8   1
-#define MONO_ARCH_EMULATE_LCONV_TO_R4   1
-#define MONO_ARCH_EMULATE_CONV_R8_UN    1
-#define MONO_ARCH_EMULATE_LCONV_TO_R8_UN 1
 #define MONO_ARCH_EMULATE_FREM 1
 #define MONO_ARCH_NEED_DIV_CHECK 1
 #define MONO_ARCH_IMT_REG sparc_g1
diff --git a/src/mono/mono/mini/mini-wasm.h b/src/mono/mono/mini/mini-wasm.h
index d307077de1c33..ab87349a70ab1 100644
--- a/src/mono/mono/mini/mini-wasm.h
+++ b/src/mono/mono/mini/mini-wasm.h
@@ -27,8 +27,6 @@
 #define MONO_ARCH_NO_CODEMAN 1
 
 #define MONO_ARCH_EMULATE_FREM 1
-#define MONO_ARCH_EMULATE_FCONV_TO_U8 1
-#define MONO_ARCH_EMULATE_FCONV_TO_U4 1
 #define MONO_ARCH_NO_EMULATE_LONG_SHIFT_OPS 1
 #define MONO_ARCH_NO_EMULATE_LONG_MUL_OPTS 1
 #define MONO_ARCH_FLOAT32_SUPPORTED 1
diff --git a/src/mono/mono/mini/mini-x86.h b/src/mono/mono/mini/mini-x86.h
index 7e5f7a25a9583..6f074f2a5ce5e 100644
--- a/src/mono/mono/mini/mini-x86.h
+++ b/src/mono/mono/mini/mini-x86.h
@@ -172,9 +172,6 @@ typedef struct {
 /* Enables OP_LSHL, OP_LSHL_IMM, OP_LSHR, OP_LSHR_IMM, OP_LSHR_UN, OP_LSHR_UN_IMM */
 #define MONO_ARCH_NO_EMULATE_LONG_SHIFT_OPS
 
-#define MONO_ARCH_EMULATE_FCONV_TO_U8 1
-#define MONO_ARCH_EMULATE_FCONV_TO_U4 1
-
 #define MONO_ARCH_NEED_DIV_CHECK 1
 #define MONO_ARCH_HAVE_IS_INT_OVERFLOW 1
 #define MONO_ARCH_HAVE_INVALIDATE_METHOD 1
diff --git a/src/mono/mono/utils/mono-math.h b/src/mono/mono/utils/mono-math.h
index a24a0a303a9ef..01e84a026b16a 100644
--- a/src/mono/mono/utils/mono-math.h
+++ b/src/mono/mono/utils/mono-math.h
@@ -101,11 +101,52 @@ mono_round_to_even (double x)
 }
 
 static inline gboolean
-mono_try_trunc_i64 (double val, gint64 *out)
+mono_try_trunc_i1 (double val, gint8 *out)
 {
-	const double two63  = 2147483648.0 * 4294967296.0;
-	// 0x402 is epsilon used to get us to the next value
-	if (val > (-two63 - 0x402) && val < two63) {
+	if (val > -129.0 && val < 128.0) {
+		// -129.0 and +128.0 are exactly representable
+		// Note that this expression also works properly for val = NaN case
+		*out = (gint8)val;
+		return TRUE;
+	}
+	return FALSE;
+}
+
+static inline gboolean
+mono_try_trunc_i2 (double val, gint16 *out)
+{
+	if (val > -32769.0 && val < +32768.0) {
+		// -32769.0 and +32768.0 are exactly representable
+		// Note that this expression also works properly for val = NaN case
+		*out = (gint16)val;
+		return TRUE;
+	}
+	return FALSE;
+}
+
+static inline gboolean
+mono_try_trunc_i4 (double val, gint32 *out)
+{
+	if (val > -2147483649.0 && val < +2147483648.0) {
+		// -2147483649.0 and +2147483648.0 are exactly representable
+		// Note that this expression also works properly for val = NaN case
+		*out = (gint32)val;
+		return TRUE;
+	}
+	return FALSE;
+}
+
+static inline gboolean
+mono_try_trunc_i8 (double val, gint64 *out)
+{
+	if (val > -9223372036854777856.0 && val < +9223372036854775808.0) {
+		// +9223372036854775808.0 is exactly representable
+		//
+		// -9223372036854777809.0 however, is not and rounds to -9223372036854777808.0
+		// we use -9223372036854777856.0 instead which is the next representable vue smaller
+		// than -9223372036854777808.0
+		//
+		// Note that this expression also works properly for val = NaN case
 		*out = (gint64)val;
 		return TRUE;
 	}
@@ -113,10 +154,46 @@ mono_try_trunc_i64 (double val, gint64 *out)
 }
 
 static inline gboolean
-mono_try_trunc_u64 (double val, guint64 *out)
+mono_try_trunc_u1 (double val, guint8 *out)
+{
+	if (val > -1.0 && val < +256.0) {
+		// -1.0 and +256.0 are exactly representable
+		// Note that the above condition also works properly for val = NaN case
+		return TRUE;
+	}
+	return FALSE;
+}
+
+static inline gboolean
+mono_try_trunc_u2 (double val, guint16 *out)
+{
+	if (val > -1.0 && val < +65536.0) {
+		// -1.0 and +65536.0 are exactly representable
+		// Note that the above condition also works properly for val = NaN case
+		*out = (guint16)val;
+		return TRUE;
+	}
+	return FALSE;
+}
+
+static inline gboolean
+mono_try_trunc_u4 (double val, guint32 *out)
+{
+	if (val > -1.0 && val < +4294967296.0) {
+		// -1.0 and +4294967296.0 are exactly representable
+		// Note that the above condition also works properly for val = NaN case
+		*out = (guint32)val;
+		return TRUE;
+	}
+	return FALSE;
+}
+
+static inline gboolean
+mono_try_trunc_u8 (double val, guint64 *out)
 {
-	const double two64  = 4294967296.0 * 4294967296.0;
-	if (val > -1.0 && val < two64) {
+	if (val > -1.0 && val < +18446744073709551616.0) {
+		// -1.0 and +18446744073709551616.0 are exactly representable
+		// Note that the above condition also works properly for val = NaN case
 		*out = (guint64)val;
 		return TRUE;
 	}

From f20d0f33b88acd1442182f9c7d6ce9597e2a9f09 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Wed, 2 Mar 2022 10:59:05 -0800
Subject: [PATCH 3/5] Fixing the mono interp to assign locals as gint32

---
 src/mono/mono/mini/interp/interp.c | 32 ++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c
index 62027cf8a501d..9e8388076c203 100644
--- a/src/mono/mono/mini/interp/interp.c
+++ b/src/mono/mono/mini/interp/interp.c
@@ -6227,15 +6227,19 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_I2_R4) {
 			float val = LOCAL_VAR (ip [2], float);
-			if (!mono_try_trunc_i2 (val, (gint16*)(locals + ip [1])))
+			gint16 res;
+			if (!mono_try_trunc_i2 (val, &res))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
+			LOCAL_VAR (ip [1], gint32) = res;
 			ip += 3;
 			MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_I2_R8) {
 			double val = LOCAL_VAR (ip [2], double);
-			if (!mono_try_trunc_i2 (val, (gint16*)(locals + ip [1])))
+			gint16 res;
+			if (!mono_try_trunc_i2 (val, &res))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
+			LOCAL_VAR (ip [1], gint32) = res;
 			ip += 3;
 			MINT_IN_BREAK;
 		}
@@ -6257,15 +6261,19 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_U2_R4) {
 			float val = LOCAL_VAR (ip [2], float);
-			if (!mono_try_trunc_u2 (val, (guint16*)(locals + ip [1])))
+			guint16 res;
+			if (!mono_try_trunc_u2 (val, &res))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
+			LOCAL_VAR (ip [1], gint32) = res;
 			ip += 3;
 			MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_U2_R8) {
 			double val = LOCAL_VAR (ip [2], double);
-			if (!mono_try_trunc_u2 (val, (guint16*)(locals + ip [1])))
+			guint16 res;
+			if (!mono_try_trunc_u2 (val, &res))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
+			LOCAL_VAR (ip [1], gint32) = res;
 			ip += 3;
 			MINT_IN_BREAK;
 		}
@@ -6303,15 +6311,19 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_I1_R4) {
 			float val = LOCAL_VAR (ip [2], float);
-			if (!mono_try_trunc_i1 (val, (gint8*)(locals + ip [1])))
+			gint8 res;
+			if (!mono_try_trunc_i1 (val, &res))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
+			LOCAL_VAR (ip [1], gint32) = res;
 			ip += 3;
 			MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_I1_R8) {
 			double val = LOCAL_VAR (ip [2], double);
-			if (!mono_try_trunc_i1 (val, (gint8*)(locals + ip [1])))
+			gint8 res;
+			if (!mono_try_trunc_i1 (val, &res))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
+			LOCAL_VAR (ip [1], gint32) = res;
 			ip += 3;
 			MINT_IN_BREAK;
 		}
@@ -6333,15 +6345,19 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_U1_R4) {
 			float val = LOCAL_VAR (ip [2], float);
-			if (!mono_try_trunc_u1 (val, (guint8*)(locals + ip [1])))
+			guint8 res;
+			if (!mono_try_trunc_u1 (val, &res))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
+			LOCAL_VAR (ip [1], gint32) = res;
 			ip += 3;
 			MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_CONV_OVF_U1_R8) {
 			double val = LOCAL_VAR (ip [2], double);
-			if (!mono_try_trunc_u1 (val, (guint8*)(locals + ip [1])))
+			guint8 res;
+			if (!mono_try_trunc_u1 (val, &res))
 				THROW_EX (interp_get_exception_overflow (frame, ip), ip);
+			LOCAL_VAR (ip [1], gint32) = res;
 			ip += 3;
 			MINT_IN_BREAK;
 		}

From 5d972fb7d0e300e1be09ca9d18d6cb367c545e9b Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Thu, 3 Mar 2022 17:11:32 -0800
Subject: [PATCH 4/5] Ensure that lowering can take advantage of baseline
 hardware intrinsics for specialized code paths

---
 src/coreclr/jit/compiler.h                  | 46 +++++++++++++++++++++
 src/coreclr/jit/hwintrinsiccodegenxarch.cpp |  5 ++-
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 94797a2dd991d..5ee14a6235694 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -8795,6 +8795,52 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     */
 
+    bool IsBaselineSimdIsa(CORINFO_InstructionSet isa)
+    {
+#ifdef FEATURE_SIMD
+        switch (isa)
+        {
+#if defined(TARGET_XARCH)
+            case InstructionSet_X86Base:
+            case InstructionSet_SSE:
+            case InstructionSet_SSE2:
+            case InstructionSet_Vector128:
+            {
+                return true;
+            }
+#endif // TARGET_XARCH
+
+#if defined(TARGET_AMD64)
+            case InstructionSet_X86Base_X64:
+            case InstructionSet_SSE_X64:
+            case InstructionSet_SSE2_X64:
+            {
+                return true;
+            }
+#endif // TARGET_AMD64
+
+#if defined(TARGET_ARM64)
+            case InstructionSet_ArmBase:
+            case InstructionSet_AdvSimd:
+            case InstructionSet_Vector64:
+            case InstructionSet_Vector128:
+            case InstructionSet_ArmBase_Arm64:
+            case InstructionSet_AdvSimd_Arm64:
+            {
+                return true;
+            }
+#endif // TARGET_AMD64
+
+            default:
+            {
+                return false;
+            }
+        }
+#else
+        return false;
+#endif
+    }
+
     bool IsBaselineSimdIsaSupported()
     {
 #ifdef FEATURE_SIMD
diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
index 094325a4ca4ee..e843766c78da0 100644
--- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
@@ -97,7 +97,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
     size_t                 numArgs     = node->GetOperandCount();
 
     // We need to validate that other phases of the compiler haven't introduced unsupported intrinsics
-    assert(compiler->compIsaSupportedDebugOnly(isa));
+    // However, specially allow any "baseline" SIMD ISAs however as it greatly simplifies some patterns we
+    // want to introduce in lowering, since it allows us to reuse all the existing logic for various opts
+    assert(compiler->compIsaSupportedDebugOnly(isa) || compiler->IsBaselineSimdIsa(isa));
 
     int ival = HWIntrinsicInfo::lookupIval(intrinsicId, compiler->compOpportunisticallyDependsOn(InstructionSet_AVX));
 
@@ -848,7 +850,6 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node)
     regNumber      targetReg   = node->GetRegNum();
     var_types      baseType    = node->GetSimdBaseType();
 
-    assert(compiler->compIsaSupportedDebugOnly(InstructionSet_SSE));
     assert((baseType >= TYP_BYTE) && (baseType <= TYP_DOUBLE));
 
     GenTree* op1 = (node->GetOperandCount() >= 1) ? node->Op(1) : nullptr;

From b4d8630f30e090c1db692171b71435029497cdea Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Sun, 6 Mar 2022 15:23:25 -0800
Subject: [PATCH 5/5] Ensure mono_try_trunc_u1 correctly handles assigning its
 out parameter

---
 src/mono/mono/utils/mono-math.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mono/mono/utils/mono-math.h b/src/mono/mono/utils/mono-math.h
index 01e84a026b16a..42593c32f12d7 100644
--- a/src/mono/mono/utils/mono-math.h
+++ b/src/mono/mono/utils/mono-math.h
@@ -159,6 +159,7 @@ mono_try_trunc_u1 (double val, guint8 *out)
 	if (val > -1.0 && val < +256.0) {
 		// -1.0 and +256.0 are exactly representable
 		// Note that the above condition also works properly for val = NaN case
+		*out = (guint8)val;
 		return TRUE;
 	}
 	return FALSE;