From a1a9a2b70fc04b9ba55aeacfd98b87a133838113 Mon Sep 17 00:00:00 2001
From: Alex Reinking <areinking@adobe.com>
Date: Fri, 29 May 2026 16:36:16 -0400
Subject: [PATCH 1/2] Fix lossless_negate for casts from unsigned inner types

lossless_negate tried to negate cast(outer, inner) by negating inner
and casting back, regardless of inner's signedness. This is unsound
when inner is unsigned: unsigned negation wraps modularly, so
cast(outer, -uint8(x)) != -cast(outer, uint8(x)) in general.

For example, -uint8(65) = uint8(191) (wrapping), but -int16(65) = -65,
so int16(uint8(-x)) gives 191 while the correct -int16(uint8(x)) = -65.

This caused FindIntrinsics to produce incorrect vectorized code: when
lowering Sub(int16(A), int16(B_uint8)) it would compute the negation
of int16(B_uint8) by negating inside uint8 arithmetic, turning a
subtraction into an incorrect addition.

The fix is to guard the Cast case: only attempt to push negation
through a cast when the inner type is not unsigned. Signed integers
are safe (overflow is checked via lossless_cast) and floats are safe
(negation is an exact sign-bit flip with no wrapping).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/IROperator.cpp                 | 15 ++++++---
 test/correctness/lossless_cast.cpp | 49 ++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 5 deletions(-)
diff --git a/src/IROperator.cpp b/src/IROperator.cpp
index c729539daa29..2f4b9393bbc2 100644
--- a/src/IROperator.cpp
+++ b/src/IROperator.cpp
@@ -609,12 +609,17 @@ Expr lossless_negate(const Expr &x) {
     } else if (const FloatImm *f = x.as<FloatImm>()) {
         return FloatImm::make(f->type, -f->value);
     } else if (const Cast *c = x.as<Cast>()) {
-        Expr value = lossless_negate(c->value);
-        if (value.defined()) {
-            // This logic is only sound if we know the cast can't overflow.
-            value = lossless_cast(c->type, value);
+        // Only safe to negate through a cast when the inner type is signed.
+        // For unsigned inner types, negation wraps modularly (e.g., -uint8(65)
+        // = uint8(191)), so cast(outer, -inner) != -cast(outer, inner).
+        if (c->value.type().is_int()) {
+            Expr value = lossless_negate(c->value);
             if (value.defined()) {
-                return value;
+                // This logic is only sound if we know the cast can't overflow.
+                value = lossless_cast(c->type, value);
+                if (value.defined()) {
+                    return value;
+                }
             }
         }
     } else if (const Ramp *r = x.as<Ramp>()) {
diff --git a/test/correctness/lossless_cast.cpp b/test/correctness/lossless_cast.cpp
index b3e055ce3fe3..43f06c95cd75 100644
--- a/test/correctness/lossless_cast.cpp
+++ b/test/correctness/lossless_cast.cpp
@@ -103,6 +103,55 @@ int main(int argc, char **argv) {
     e = make_reduce(UInt(8), VectorReduce::Max);
     found_error |= check_lossless_cast(Bool(), e, make_reduce(Bool(), VectorReduce::Or));
 
+    // Runtime test: verify that lossless_cast of a widening_sub expression
+    // evaluates correctly when vectorized. This is a regression test for a bug
+    // in lossless_negate where it incorrectly negated through an unsigned-to-signed
+    // cast, causing FindIntrinsics to generate wrong code for the vectorized case.
+    {
+        Var x("x");
+        Buffer<uint8_t> buf(1024, "buf");
+        for (int i = 0; i < 1024; i++) {
+            buf(i) = (uint8_t)i;
+        }
+
+        // A = int8(-16 + 32 / int8(buf(x)))
+        Expr a = cast(Int(8), -16) + cast(Int(8), 32) / cast(Int(8), cast(UInt(8), buf(x)));
+        // inner = (a / -33_i8) * -33_i8  (in int8 Euclidean arithmetic)
+        Expr inner = (a / cast(Int(8), -33)) * cast(Int(8), -33);
+        // b = 223_u8 * uint8(inner)
+        Expr b = cast(UInt(8), 223) * cast(UInt(8), inner);
+
+        // e1: (int64)widening_sub(int32(int16(a)), int32(uint16(b)))
+        Expr e1 = cast(Int(64), widening_sub(cast(Int(32), cast(Int(16), a)),
+                                             cast(Int(32), cast(UInt(16), b))));
+
+        // lossless_cast to int16 - the returned expression should evaluate
+        // identically to e1 when vectorized.
+        Expr e2 = lossless_cast(Int(16), e1);
+        if (!e2.defined()) {
+            std::cerr << "Runtime regression test: lossless_cast unexpectedly returned undefined\n";
+            found_error = true;
+        } else {
+            Func f;
+            f(x) = {e1, cast(Int(64), e2)};
+            f.vectorize(x, 4, TailStrategy::RoundUp);
+
+            Buffer<int64_t> out1(1024), out2(1024);
+            Pipeline p(f);
+            p.realize({out1, out2});
+
+            for (int i = 0; i < 1024; i++) {
+                if (out1(i) != out2(i)) {
+                    std::cerr << "Runtime regression test: mismatch at x=" << i
+                              << ": original=" << out1(i)
+                              << " lossless_cast=" << out2(i) << "\n";
+                    found_error = true;
+                    break;
+                }
+            }
+        }
+    }
+
     if (found_error) {
         return 1;
     }

From 1765a4bd7497f12139feb0dc2ab3a2dfcd22c555 Mon Sep 17 00:00:00 2001
From: Alex Reinking <areinking@adobe.com>
Date: Sat, 30 May 2026 10:59:56 -0400
Subject: [PATCH 2/2] more precise check for negation

---
 src/IROperator.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/IROperator.cpp b/src/IROperator.cpp
index 2f4b9393bbc2..bd575234421b 100644
--- a/src/IROperator.cpp
+++ b/src/IROperator.cpp
@@ -609,10 +609,12 @@ Expr lossless_negate(const Expr &x) {
     } else if (const FloatImm *f = x.as<FloatImm>()) {
         return FloatImm::make(f->type, -f->value);
     } else if (const Cast *c = x.as<Cast>()) {
-        // Only safe to negate through a cast when the inner type is signed.
+        // Only safe to negate through a cast when the inner type is not unsigned.
         // For unsigned inner types, negation wraps modularly (e.g., -uint8(65)
         // = uint8(191)), so cast(outer, -inner) != -cast(outer, inner).
-        if (c->value.type().is_int()) {
+        // Signed integers and floats are fine: signed negation is checked for
+        // overflow below via lossless_cast, and float negation is exact.
+        if (!c->value.type().is_uint()) {
             Expr value = lossless_negate(c->value);
             if (value.defined()) {
                 // This logic is only sound if we know the cast can't overflow.