apache · gengliangwang · May 17, 2026 · May 17, 2026 · May 17, 2026 · May 17, 2026
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/CastUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/CastUtils.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions;
+
+import org.apache.spark.QueryContext;
+import org.apache.spark.sql.errors.QueryExecutionErrors;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Decimal;
+
+/**
+ * Static helpers used by {@code Cast.doGenCode} (and corresponding eval
+ * paths) for ANSI overflow-checked narrowing conversions. The source and
+ * target {@link DataType} objects referenced by the overflow error message
+ * are held in {@code private static final} fields so the happy path
+ * performs no per-row {@code references[]} lookups.
+ */
+public final class CastUtils {
+
+  private CastUtils() {}
+
+  private static final DataType SHORT = DataTypes.ShortType;
+  private static final DataType INT = DataTypes.IntegerType;
+  private static final DataType LONG = DataTypes.LongType;
+  private static final DataType BYTE = DataTypes.ByteType;
+  private static final DataType FLOAT = DataTypes.FloatType;
+  private static final DataType DOUBLE = DataTypes.DoubleType;
+
+  // ----- integral narrowing (ANSI: throw on overflow) -----
+
+  public static byte shortToByteExact(short v) {
+    if (v == (byte) v) return (byte) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, SHORT, BYTE);
+  }
+
+  public static byte intToByteExact(int v) {
+    if (v == (byte) v) return (byte) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, INT, BYTE);
+  }
+
+  public static byte longToByteExact(long v) {
+    if (v == (byte) v) return (byte) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, LONG, BYTE);
+  }
+
+  public static short intToShortExact(int v) {
+    if (v == (short) v) return (short) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, INT, SHORT);
+  }
+
+  public static short longToShortExact(long v) {
+    if (v == (short) v) return (short) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, LONG, SHORT);
+  }
+
+  public static int longToIntExact(long v) {
+    if (v == (int) v) return (int) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, LONG, INT);
+  }
+
+  // ----- fractional -> integral (ANSI: throw on overflow) -----
+  // Mirrors castFractionToIntegralTypeCode: floor(v) <= MAX && ceil(v) >= MIN.
+
+  public static byte floatToByteExact(float v) {
+    if (Math.floor(v) <= Byte.MAX_VALUE && Math.ceil(v) >= Byte.MIN_VALUE) return (byte) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, FLOAT, BYTE);
+  }
+
+  public static byte doubleToByteExact(double v) {
+    if (Math.floor(v) <= Byte.MAX_VALUE && Math.ceil(v) >= Byte.MIN_VALUE) return (byte) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, DOUBLE, BYTE);
+  }
+
+  public static short floatToShortExact(float v) {
+    if (Math.floor(v) <= Short.MAX_VALUE && Math.ceil(v) >= Short.MIN_VALUE) return (short) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, FLOAT, SHORT);
+  }
+
+  public static short doubleToShortExact(double v) {
+    if (Math.floor(v) <= Short.MAX_VALUE && Math.ceil(v) >= Short.MIN_VALUE) return (short) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, DOUBLE, SHORT);
+  }
+
+  public static int floatToIntExact(float v) {
+    if (Math.floor(v) <= Integer.MAX_VALUE && Math.ceil(v) >= Integer.MIN_VALUE) return (int) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, FLOAT, INT);
+  }
+
+  public static int doubleToIntExact(double v) {
+    if (Math.floor(v) <= Integer.MAX_VALUE && Math.ceil(v) >= Integer.MIN_VALUE) return (int) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, DOUBLE, INT);
+  }
+
+  // ----- fractional -> long (ANSI: throw on overflow) -----
+
+  public static long floatToLongExact(float v) {
+    if (Math.floor(v) <= Long.MAX_VALUE && Math.ceil(v) >= Long.MIN_VALUE) return (long) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, FLOAT, LONG);
+  }
+
+  public static long doubleToLongExact(double v) {
+    if (Math.floor(v) <= Long.MAX_VALUE && Math.ceil(v) >= Long.MIN_VALUE) return (long) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, DOUBLE, LONG);
+  }
+
+  // ----- decimal precision adjustment -----
+  // Mutates the input Decimal in place. Used by Cast.changePrecision (and by
+  // BinaryArithmetic / DivModLike in follow-up PRs) to apply the target
+  // precision/scale on the per-row hot path.
+
+  public static Decimal changePrecisionExact(
+      Decimal d, int precision, int scale, QueryContext context) {
+    if (d.changePrecision(precision, scale)) return d;
+    throw QueryExecutionErrors.cannotChangeDecimalPrecisionError(d, precision, scale, context);
+  }
+
+  public static Decimal changePrecisionOrNull(Decimal d, int precision, int scale) {
+    return d.changePrecision(precision, scale) ? d : null;
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -897,6 +897,10 @@ case class Cast(
       buildCast[Long](_, t => timestampToLong(t))
     case _: TimeType =>
       buildCast[Long](_, t => timeToLong(t))
+    case FloatType if ansiEnabled =>
+      b => CastUtils.floatToLongExact(b.asInstanceOf[Float])
+    case DoubleType if ansiEnabled =>
+      b => CastUtils.doubleToLongExact(b.asInstanceOf[Double])
     case x: NumericType if ansiEnabled =>
       val exactNumeric = PhysicalNumericType.exactNumeric(x)
       b => exactNumeric.toLong(b)
@@ -939,6 +943,12 @@ case class Cast(
       })
     case _: TimeType =>
       buildCast[Long](_, t => timeToLong(t).toInt)
+    case LongType if ansiEnabled =>
+      b => CastUtils.longToIntExact(b.asInstanceOf[Long])
+    case FloatType if ansiEnabled =>
+      b => CastUtils.floatToIntExact(b.asInstanceOf[Float])
+    case DoubleType if ansiEnabled =>
+      b => CastUtils.doubleToIntExact(b.asInstanceOf[Double])
     case x: NumericType if ansiEnabled =>
       val exactNumeric = PhysicalNumericType.exactNumeric(x)
       b => exactNumeric.toInt(b)
@@ -984,6 +994,14 @@ case class Cast(
           errorOrNull(t, from, ShortType)
         }
       })
+    case IntegerType if ansiEnabled =>
+      b => CastUtils.intToShortExact(b.asInstanceOf[Int])
+    case LongType if ansiEnabled =>
+      b => CastUtils.longToShortExact(b.asInstanceOf[Long])
+    case FloatType if ansiEnabled =>
+      b => CastUtils.floatToShortExact(b.asInstanceOf[Float])
+    case DoubleType if ansiEnabled =>
+      b => CastUtils.doubleToShortExact(b.asInstanceOf[Double])
     case x: NumericType if ansiEnabled =>
       val exactNumeric = PhysicalNumericType.exactNumeric(x)
       b =>
@@ -1040,6 +1058,16 @@ case class Cast(
           errorOrNull(t, from, ByteType)
         }
       })
+    case ShortType if ansiEnabled =>
+      b => CastUtils.shortToByteExact(b.asInstanceOf[Short])
+    case IntegerType if ansiEnabled =>
+      b => CastUtils.intToByteExact(b.asInstanceOf[Int])
+    case LongType if ansiEnabled =>
+      b => CastUtils.longToByteExact(b.asInstanceOf[Long])
+    case FloatType if ansiEnabled =>
+      b => CastUtils.floatToByteExact(b.asInstanceOf[Float])
+    case DoubleType if ansiEnabled =>
+      b => CastUtils.doubleToByteExact(b.asInstanceOf[Double])
     case x: NumericType if ansiEnabled =>
       val exactNumeric = PhysicalNumericType.exactNumeric(x)
       b =>
@@ -1079,15 +1107,11 @@ case class Cast(
       value: Decimal,
       decimalType: DecimalType,
       nullOnOverflow: Boolean): Decimal = {
-    if (value.changePrecision(decimalType.precision, decimalType.scale)) {
-      value
+    if (nullOnOverflow) {
+      CastUtils.changePrecisionOrNull(value, decimalType.precision, decimalType.scale)
     } else {
-      if (nullOnOverflow) {
-        null
-      } else {
-        throw QueryExecutionErrors.cannotChangeDecimalPrecisionError(
-          value, decimalType.precision, decimalType.scale, getContextOrNull())
-      }
+      CastUtils.changePrecisionExact(
+        value, decimalType.precision, decimalType.scale, getContextOrNull())
     }
   }
 
@@ -1540,23 +1564,21 @@ case class Cast(
          |$d.changePrecision(${decimalType.precision}, ${decimalType.scale});
          |$evPrim = $d;
        """.stripMargin
-    } else {
-      val errorContextCode = getContextOrNullCode(ctx, !nullOnOverflow)
-      val overflowCode = if (nullOnOverflow) {
-        s"$evNull = true;"
-      } else {
-        s"""
-           |throw QueryExecutionErrors.cannotChangeDecimalPrecisionError(
-           |  $d, ${decimalType.precision}, ${decimalType.scale}, $errorContextCode);
-         """.stripMargin
-      }
+    } else if (nullOnOverflow) {
       code"""
          |if ($d.changePrecision(${decimalType.precision}, ${decimalType.scale})) {
          |  $evPrim = $d;
          |} else {
-         |  $overflowCode
+         |  $evNull = true;
          |}
        """.stripMargin
+    } else {
+      val errorContextCode = getContextOrNullCode(ctx, !nullOnOverflow)
+      val castUtils = classOf[CastUtils].getName
+      code"""
+         |$evPrim = $castUtils.changePrecisionExact(
+         |  $d, ${decimalType.precision}, ${decimalType.scale}, $errorContextCode);
+       """.stripMargin
     }
   }
 
@@ -1982,33 +2004,26 @@ case class Cast(
     }
   }
 
+  private[this] def integralPrefix(from: DataType): String = from match {
+    case ShortType => "short"
+    case IntegerType => "int"
+    case LongType => "long"
+  }
+
+  private[this] def fractionalPrefix(from: DataType): String = from match {
+    case FloatType => "float"
+    case DoubleType => "double"
+  }
+
   private[this] def castIntegralTypeToIntegralTypeExactCode(
       ctx: CodegenContext,
       integralType: String,
       from: DataType,
       to: DataType): CastFunction = {
     assert(ansiEnabled)
-    val fromDt = ctx.addReferenceObj("from", from, from.getClass.getName)
-    val toDt = ctx.addReferenceObj("to", to, to.getClass.getName)
-    (c, evPrim, _) =>
-      code"""
-        if ($c == ($integralType) $c) {
-          $evPrim = ($integralType) $c;
-        } else {
-          throw QueryExecutionErrors.castingCauseOverflowError($c, $fromDt, $toDt);
-        }
-      """
-  }
-
-
-  private[this] def lowerAndUpperBound(integralType: String): (String, String) = {
-    val (min, max, typeIndicator) = integralType.toLowerCase(Locale.ROOT) match {
-      case "long" => (Long.MinValue, Long.MaxValue, "L")
-      case "int" => (Int.MinValue, Int.MaxValue, "")
-      case "short" => (Short.MinValue, Short.MaxValue, "")
-      case "byte" => (Byte.MinValue, Byte.MaxValue, "")
-    }
-    (min.toString + typeIndicator, max.toString + typeIndicator)
+    val castUtils = classOf[CastUtils].getName
+    val method = s"${integralPrefix(from)}To${integralType.capitalize}Exact"
+    (c, evPrim, _) => code"$evPrim = $castUtils.$method($c);"
   }
 
   private[this] def castFractionToIntegralTypeCode(
@@ -2017,23 +2032,9 @@ case class Cast(
       from: DataType,
       to: DataType): CastFunction = {
     assert(ansiEnabled)
-    val (min, max) = lowerAndUpperBound(integralType)
-    val mathClass = classOf[Math].getName
-    val fromDt = ctx.addReferenceObj("from", from, from.getClass.getName)
-    val toDt = ctx.addReferenceObj("to", to, to.getClass.getName)
-    // When casting floating values to integral types, Spark uses the method `Numeric.toInt`
-    // Or `Numeric.toLong` directly. For positive floating values, it is equivalent to `Math.floor`;
-    // for negative floating values, it is equivalent to `Math.ceil`.
-    // So, we can use the condition `Math.floor(x) <= upperBound && Math.ceil(x) >= lowerBound`
-    // to check if the floating value x is in the range of an integral type after rounding.
-    (c, evPrim, _) =>
-      code"""
-        if ($mathClass.floor($c) <= $max && $mathClass.ceil($c) >= $min) {
-          $evPrim = ($integralType) $c;
-        } else {
-          throw QueryExecutionErrors.castingCauseOverflowError($c, $fromDt, $toDt);
-        }
-      """
+    val castUtils = classOf[CastUtils].getName
+    val method = s"${fractionalPrefix(from)}To${integralType.capitalize}Exact"
+    (c, evPrim, _) => code"$evPrim = $castUtils.$method($c);"
   }
 
   private[this] def castToByteCode(from: DataType, ctx: CodegenContext): CastFunction = from match {