diff --git a/src/IROperator.cpp b/src/IROperator.cpp index c729539daa29..bd575234421b 100644 --- a/src/IROperator.cpp +++ b/src/IROperator.cpp @@ -609,12 +609,19 @@ Expr lossless_negate(const Expr &x) { } else if (const FloatImm *f = x.as()) { return FloatImm::make(f->type, -f->value); } else if (const Cast *c = x.as()) { - Expr value = lossless_negate(c->value); - if (value.defined()) { - // This logic is only sound if we know the cast can't overflow. - value = lossless_cast(c->type, value); + // Only safe to negate through a cast when the inner type is not unsigned. + // For unsigned inner types, negation wraps modularly (e.g., -uint8(65) + // = uint8(191)), so cast(outer, -inner) != -cast(outer, inner). + // Signed integers and floats are fine: signed negation is checked for + // overflow below via lossless_cast, and float negation is exact. + if (!c->value.type().is_uint()) { + Expr value = lossless_negate(c->value); if (value.defined()) { - return value; + // This logic is only sound if we know the cast can't overflow. + value = lossless_cast(c->type, value); + if (value.defined()) { + return value; + } } } } else if (const Ramp *r = x.as()) { diff --git a/test/correctness/lossless_cast.cpp b/test/correctness/lossless_cast.cpp index b3e055ce3fe3..43f06c95cd75 100644 --- a/test/correctness/lossless_cast.cpp +++ b/test/correctness/lossless_cast.cpp @@ -103,6 +103,55 @@ int main(int argc, char **argv) { e = make_reduce(UInt(8), VectorReduce::Max); found_error |= check_lossless_cast(Bool(), e, make_reduce(Bool(), VectorReduce::Or)); + // Runtime test: verify that lossless_cast of a widening_sub expression + // evaluates correctly when vectorized. This is a regression test for a bug + // in lossless_negate where it incorrectly negated through an unsigned-to-signed + // cast, causing FindIntrinsics to generate wrong code for the vectorized case. + { + Var x("x"); + Buffer buf(1024, "buf"); + for (int i = 0; i < 1024; i++) { + buf(i) = (uint8_t)i; + } + + // A = int8(-16 + 32 / int8(buf(x))) + Expr a = cast(Int(8), -16) + cast(Int(8), 32) / cast(Int(8), cast(UInt(8), buf(x))); + // inner = (a / -33_i8) * -33_i8 (in int8 Euclidean arithmetic) + Expr inner = (a / cast(Int(8), -33)) * cast(Int(8), -33); + // b = 223_u8 * uint8(inner) + Expr b = cast(UInt(8), 223) * cast(UInt(8), inner); + + // e1: (int64)widening_sub(int32(int16(a)), int32(uint16(b))) + Expr e1 = cast(Int(64), widening_sub(cast(Int(32), cast(Int(16), a)), + cast(Int(32), cast(UInt(16), b)))); + + // lossless_cast to int16 - the returned expression should evaluate + // identically to e1 when vectorized. + Expr e2 = lossless_cast(Int(16), e1); + if (!e2.defined()) { + std::cerr << "Runtime regression test: lossless_cast unexpectedly returned undefined\n"; + found_error = true; + } else { + Func f; + f(x) = {e1, cast(Int(64), e2)}; + f.vectorize(x, 4, TailStrategy::RoundUp); + + Buffer out1(1024), out2(1024); + Pipeline p(f); + p.realize({out1, out2}); + + for (int i = 0; i < 1024; i++) { + if (out1(i) != out2(i)) { + std::cerr << "Runtime regression test: mismatch at x=" << i + << ": original=" << out1(i) + << " lossless_cast=" << out2(i) << "\n"; + found_error = true; + break; + } + } + } + } + if (found_error) { return 1; }