From c7dd87094a86bf16a63a8000db235d7dcbcf64f5 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Fri, 18 Sep 2020 14:59:57 -0700
Subject: [PATCH 01/16] fix

---
 src/wasm/literal.cpp                          | 68 +++++++++++++------
 test/passes/fuzz-exec_O.txt                   | 21 ++++++
 test/passes/fuzz-exec_O.wast                  | 19 +++++-
 .../passes/translate-to-fuzz_all-features.txt |  4 +-
 4 files changed, 90 insertions(+), 22 deletions(-)

diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp
index 39a4cea3cbc..9062e2dfea2 100644
--- a/src/wasm/literal.cpp
+++ b/src/wasm/literal.cpp
@@ -759,10 +759,35 @@ Literal Literal::add(const Literal& other) const {
       return Literal(uint32_t(i32) + uint32_t(other.i32));
     case Type::i64:
       return Literal(uint64_t(i64) + uint64_t(other.i64));
-    case Type::f32:
-      return Literal(getf32() + other.getf32());
-    case Type::f64:
-      return Literal(getf64() + other.getf64());
+    case Type::f32: {
+      // Special-case addition of 1. nan + 1 can change nan bits per the
+      // wasm spec, but it is ok to just return that original nan, and we
+      // do that here so that we are consistent with the optimization of
+      // removing the * 1 and leaving just the nan. That is, if we just
+      // do a normal multiply and the CPU decides to change the bits, we'd
+      // give a different result on optimized code, which would look like
+      // it was a bad optimization. So out of all the valid results to
+      // return here, return the simplest one that is consistent with
+      // our optimization for the case of 1.
+      float lhs = getf32(), rhs = other.getf32();
+      if (lhs == 1) {
+        return Literal(rhs);
+      }
+      if (rhs == 1) {
+        return Literal(lhs);
+      }
+      return Literal(lhs + rhs);
+    }
+    case Type::f64: {
+      double lhs = getf64(), rhs = other.getf64();
+      if (lhs == 1) {
+        return Literal(rhs);
+      }
+      if (rhs == 1) {
+        return Literal(lhs);
+      }
+      return Literal(lhs + rhs);
+    }
     case Type::v128:
     case Type::funcref:
     case Type::externref:
@@ -781,10 +806,22 @@ Literal Literal::sub(const Literal& other) const {
       return Literal(uint32_t(i32) - uint32_t(other.i32));
     case Type::i64:
       return Literal(uint64_t(i64) - uint64_t(other.i64));
-    case Type::f32:
-      return Literal(getf32() - other.getf32());
-    case Type::f64:
-      return Literal(getf64() - other.getf64());
+    case Type::f32: {
+      float lhs = getf32(), rhs = other.getf32();
+      // As with addition, make sure to not change NaN bits in trivial
+      // operations.
+      if (rhs == 0) {
+        return Literal(lhs);
+      }
+      return Literal(lhs - rhs);
+    }
+    case Type::f64: {
+      double lhs = getf64(), rhs = other.getf64();
+      if (rhs == 0) {
+        return Literal(lhs);
+      }
+      return Literal(lhs - rhs);
+    }
     case Type::v128:
     case Type::funcref:
     case Type::externref:
@@ -875,16 +912,9 @@ Literal Literal::mul(const Literal& other) const {
     case Type::i64:
       return Literal(uint64_t(i64) * uint64_t(other.i64));
     case Type::f32: {
-      // Special-case multiplication by 1. nan * 1 can change nan bits per the
-      // wasm spec, but it is ok to just return that original nan, and we
-      // do that here so that we are consistent with the optimization of
-      // removing the * 1 and leaving just the nan. That is, if we just
-      // do a normal multiply and the CPU decides to change the bits, we'd
-      // give a different result on optimized code, which would look like
-      // it was a bad optimization. So out of all the valid results to
-      // return here, return the simplest one that is consistent with
-      // our optimization for the case of 1.
       float lhs = getf32(), rhs = other.getf32();
+      // As with addition, make sure to not change NaN bits in trivial
+      // operations.
       if (rhs == 1) {
         return Literal(lhs);
       }
@@ -940,7 +970,8 @@ Literal Literal::div(const Literal& other) const {
         case FP_INFINITE: // fallthrough
         case FP_NORMAL:   // fallthrough
         case FP_SUBNORMAL:
-          // Special-case division by 1, similar to multiply from earlier.
+          // As with addition, make sure to not change NaN bits in trivial
+          // operations.
           if (rhs == 1) {
             return Literal(lhs);
           }
@@ -972,7 +1003,6 @@ Literal Literal::div(const Literal& other) const {
         case FP_INFINITE: // fallthrough
         case FP_NORMAL:   // fallthrough
         case FP_SUBNORMAL:
-          // See above comment on f32.
           if (rhs == 1) {
             return Literal(lhs);
           }
diff --git a/test/passes/fuzz-exec_O.txt b/test/passes/fuzz-exec_O.txt
index ef8e165bbc2..f2bad9c74da 100644
--- a/test/passes/fuzz-exec_O.txt
+++ b/test/passes/fuzz-exec_O.txt
@@ -36,17 +36,29 @@
 [fuzz-exec] note result: mul1 => -nan:0x34546d
 [fuzz-exec] calling mul2
 [fuzz-exec] note result: mul2 => -nan:0x34546d
+[fuzz-exec] calling add1
+[fuzz-exec] note result: add1 => -nan:0x74546d
+[fuzz-exec] calling add2
+[fuzz-exec] note result: add2 => -nan:0x74546d
+[fuzz-exec] calling sub
+[fuzz-exec] note result: sub => -nan:0x34546d
 (module
  (type $none_=>_f32 (func (result f32)))
  (export "div" (func $0))
  (export "mul1" (func $1))
  (export "mul2" (func $1))
+ (export "add1" (func $3))
+ (export "add2" (func $3))
+ (export "sub" (func $1))
  (func $0 (; has Stack IR ;) (result f32)
   (f32.const -nan:0x23017a)
  )
  (func $1 (; has Stack IR ;) (result f32)
   (f32.const -nan:0x34546d)
  )
+ (func $3 (; has Stack IR ;) (result f32)
+  (f32.const -nan:0x74546d)
+ )
 )
 [fuzz-exec] calling div
 [fuzz-exec] note result: div => -nan:0x23017a
@@ -54,6 +66,15 @@
 [fuzz-exec] note result: mul1 => -nan:0x34546d
 [fuzz-exec] calling mul2
 [fuzz-exec] note result: mul2 => -nan:0x34546d
+[fuzz-exec] calling add1
+[fuzz-exec] note result: add1 => -nan:0x74546d
+[fuzz-exec] calling add2
+[fuzz-exec] note result: add2 => -nan:0x74546d
+[fuzz-exec] calling sub
+[fuzz-exec] note result: sub => -nan:0x34546d
+[fuzz-exec] comparing add1
+[fuzz-exec] comparing add2
 [fuzz-exec] comparing div
 [fuzz-exec] comparing mul1
 [fuzz-exec] comparing mul2
+[fuzz-exec] comparing sub
diff --git a/test/passes/fuzz-exec_O.wast b/test/passes/fuzz-exec_O.wast
index 5c739c548a4..7aac4b11478 100644
--- a/test/passes/fuzz-exec_O.wast
+++ b/test/passes/fuzz-exec_O.wast
@@ -39,5 +39,22 @@
    (f32.const -nan:0x34546d)
   )
  )
+ (func "add1" (result f32)
+  (f32.add
+   (f32.const -nan:0x34546d)
+   (f32.const 0)
+  )
+ )
+ (func "add2" (result f32)
+  (f32.add
+   (f32.const 0)
+   (f32.const -nan:0x34546d)
+  )
+ )
+ (func "sub" (result f32)
+  (f32.sub
+   (f32.const -nan:0x34546d)
+   (f32.const 0)
+  )
+ )
 )
-
diff --git a/test/passes/translate-to-fuzz_all-features.txt b/test/passes/translate-to-fuzz_all-features.txt
index 80552e2d5e4..20cf5754e15 100644
--- a/test/passes/translate-to-fuzz_all-features.txt
+++ b/test/passes/translate-to-fuzz_all-features.txt
@@ -402,7 +402,7 @@
                   (block $label$6
                    (call $log-f32
                     (f32.min
-                     (f32.const -1)
+                     (f32.const -1.1754943508222875e-38)
                      (f32.demote_f64
                       (f64.copysign
                        (f64.const 28)
@@ -523,7 +523,7 @@
                      (call $log-v128
                       (i32x4.ne
                        (local.get $4)
-                       (v128.const i32x4 0xffe3e76d 0x41dfffff 0xd70a3d70 0x3ffb70a3)
+                       (v128.const i32x4 0xffe3e76d 0x41dfffff 0xae147ae1 0x3fe6e147)
                       )
                      )
                      (br $label$3)

From 71e9ecb096c78aab62382f2503255ce63acc6a09 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Fri, 18 Sep 2020 15:02:41 -0700
Subject: [PATCH 02/16] fix comment

---
 src/wasm/literal.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp
index 9062e2dfea2..6064692db18 100644
--- a/src/wasm/literal.cpp
+++ b/src/wasm/literal.cpp
@@ -760,7 +760,7 @@ Literal Literal::add(const Literal& other) const {
     case Type::i64:
       return Literal(uint64_t(i64) + uint64_t(other.i64));
     case Type::f32: {
-      // Special-case addition of 1. nan + 1 can change nan bits per the
+      // Special-case addition of 0. nan + 0 can change nan bits per the
       // wasm spec, but it is ok to just return that original nan, and we
       // do that here so that we are consistent with the optimization of
       // removing the * 1 and leaving just the nan. That is, if we just

From 95a27c019ed9a82b0c98d107610087fb118c600d Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Fri, 18 Sep 2020 15:03:15 -0700
Subject: [PATCH 03/16] fix comment

---
 src/wasm/literal.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp
index 6064692db18..02e95b10159 100644
--- a/src/wasm/literal.cpp
+++ b/src/wasm/literal.cpp
@@ -763,8 +763,8 @@ Literal Literal::add(const Literal& other) const {
       // Special-case addition of 0. nan + 0 can change nan bits per the
       // wasm spec, but it is ok to just return that original nan, and we
       // do that here so that we are consistent with the optimization of
-      // removing the * 1 and leaving just the nan. That is, if we just
-      // do a normal multiply and the CPU decides to change the bits, we'd
+      // removing the + 0 and leaving just the nan. That is, if we just
+      // do a normal add and the CPU decides to change the bits, we'd
       // give a different result on optimized code, which would look like
       // it was a bad optimization. So out of all the valid results to
       // return here, return the simplest one that is consistent with

From 87aec7d4318a119b2342a114014d935e6c5e6ffe Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Fri, 18 Sep 2020 15:12:54 -0700
Subject: [PATCH 04/16] fix

---
 src/wasm/literal.cpp                          | 48 ++++++-------------
 .../passes/translate-to-fuzz_all-features.txt |  4 +-
 2 files changed, 17 insertions(+), 35 deletions(-)

diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp
index 02e95b10159..d6603062e91 100644
--- a/src/wasm/literal.cpp
+++ b/src/wasm/literal.cpp
@@ -759,35 +759,10 @@ Literal Literal::add(const Literal& other) const {
       return Literal(uint32_t(i32) + uint32_t(other.i32));
     case Type::i64:
       return Literal(uint64_t(i64) + uint64_t(other.i64));
-    case Type::f32: {
-      // Special-case addition of 0. nan + 0 can change nan bits per the
-      // wasm spec, but it is ok to just return that original nan, and we
-      // do that here so that we are consistent with the optimization of
-      // removing the + 0 and leaving just the nan. That is, if we just
-      // do a normal add and the CPU decides to change the bits, we'd
-      // give a different result on optimized code, which would look like
-      // it was a bad optimization. So out of all the valid results to
-      // return here, return the simplest one that is consistent with
-      // our optimization for the case of 1.
-      float lhs = getf32(), rhs = other.getf32();
-      if (lhs == 1) {
-        return Literal(rhs);
-      }
-      if (rhs == 1) {
-        return Literal(lhs);
-      }
-      return Literal(lhs + rhs);
-    }
-    case Type::f64: {
-      double lhs = getf64(), rhs = other.getf64();
-      if (lhs == 1) {
-        return Literal(rhs);
-      }
-      if (rhs == 1) {
-        return Literal(lhs);
-      }
-      return Literal(lhs + rhs);
-    }
+    case Type::f32:
+      return Literal(getf32() + other.getf32());
+    case Type::f64:
+      return Literal(getf64() + other.getf64());
     case Type::v128:
     case Type::funcref:
     case Type::externref:
@@ -808,16 +783,23 @@ Literal Literal::sub(const Literal& other) const {
       return Literal(uint64_t(i64) - uint64_t(other.i64));
     case Type::f32: {
       float lhs = getf32(), rhs = other.getf32();
-      // As with addition, make sure to not change NaN bits in trivial
-      // operations.
-      if (rhs == 0) {
+      // Special-case subtraction of 0. nan - 0 can change nan bits per the
+      // wasm spec, but it is ok to just return that original nan, and we
+      // do that here so that we are consistent with the optimization of
+      // removing the - 0 and leaving just the nan. That is, if we just
+      // do a normal add and the CPU decides to change the bits, we'd
+      // give a different result on optimized code, which would look like
+      // it was a bad optimization. So out of all the valid results to
+      // return here, return the simplest one that is consistent with
+      // our optimization for the case of 1.
+      if (rhs == 0 && !std::signbit(rhs)) {
         return Literal(lhs);
       }
       return Literal(lhs - rhs);
     }
     case Type::f64: {
       double lhs = getf64(), rhs = other.getf64();
-      if (rhs == 0) {
+      if (rhs == 0 && !std::signbit(rhs)) {
         return Literal(lhs);
       }
       return Literal(lhs - rhs);
diff --git a/test/passes/translate-to-fuzz_all-features.txt b/test/passes/translate-to-fuzz_all-features.txt
index 20cf5754e15..80552e2d5e4 100644
--- a/test/passes/translate-to-fuzz_all-features.txt
+++ b/test/passes/translate-to-fuzz_all-features.txt
@@ -402,7 +402,7 @@
                   (block $label$6
                    (call $log-f32
                     (f32.min
-                     (f32.const -1.1754943508222875e-38)
+                     (f32.const -1)
                      (f32.demote_f64
                       (f64.copysign
                        (f64.const 28)
@@ -523,7 +523,7 @@
                      (call $log-v128
                       (i32x4.ne
                        (local.get $4)
-                       (v128.const i32x4 0xffe3e76d 0x41dfffff 0xae147ae1 0x3fe6e147)
+                       (v128.const i32x4 0xffe3e76d 0x41dfffff 0xd70a3d70 0x3ffb70a3)
                       )
                      )
                      (br $label$3)

From 5f85c5997ac15af30ae077cf367fbfe0619ab39c Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Fri, 18 Sep 2020 15:15:50 -0700
Subject: [PATCH 05/16] fix

---
 src/wasm/literal.cpp         | 44 +++++++++++++++++++++++++-----------
 test/passes/fuzz-exec_O.wast | 16 +++++++++++--
 2 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp
index d6603062e91..0c1276940b9 100644
--- a/src/wasm/literal.cpp
+++ b/src/wasm/literal.cpp
@@ -759,10 +759,35 @@ Literal Literal::add(const Literal& other) const {
       return Literal(uint32_t(i32) + uint32_t(other.i32));
     case Type::i64:
       return Literal(uint64_t(i64) + uint64_t(other.i64));
-    case Type::f32:
-      return Literal(getf32() + other.getf32());
-    case Type::f64:
-      return Literal(getf64() + other.getf64());
+    case Type::f32: {
+      // Special-case addition of -0. nan + -0 can change nan bits per the
+      // wasm spec, but it is ok to just return that original nan, and we
+      // do that here so that we are consistent with the optimization of
+      // removing the + -0 and leaving just the nan. That is, if we just
+      // do a normal add and the CPU decides to change the bits, we'd
+      // give a different result on optimized code, which would look like
+      // it was a bad optimization. So out of all the valid results to
+      // return here, return the simplest one that is consistent with
+      // our optimization for the case of 1.
+      float lhs = getf32(), rhs = other.getf32();
+      if (lhs == 0 && std::signbit(lhs)) {
+        return Literal(rhs);
+      }
+      if (rhs == 0 && std::signbit(rhs)) {
+        return Literal(lhs);
+      }
+      return Literal(lhs + rhs);
+    }
+    case Type::f64: {
+      double lhs = getf64(), rhs = other.getf64();
+      if (lhs == 0 && std::signbit(lhs)) {
+        return Literal(rhs);
+      }
+      if (rhs == 0 && std::signbit(rhs)) {
+        return Literal(lhs);
+      }
+      return Literal(lhs + rhs);
+    }
     case Type::v128:
     case Type::funcref:
     case Type::externref:
@@ -783,15 +808,8 @@ Literal Literal::sub(const Literal& other) const {
       return Literal(uint64_t(i64) - uint64_t(other.i64));
     case Type::f32: {
       float lhs = getf32(), rhs = other.getf32();
-      // Special-case subtraction of 0. nan - 0 can change nan bits per the
-      // wasm spec, but it is ok to just return that original nan, and we
-      // do that here so that we are consistent with the optimization of
-      // removing the - 0 and leaving just the nan. That is, if we just
-      // do a normal add and the CPU decides to change the bits, we'd
-      // give a different result on optimized code, which would look like
-      // it was a bad optimization. So out of all the valid results to
-      // return here, return the simplest one that is consistent with
-      // our optimization for the case of 1.
+      // As with addition, make sure to not change NaN bits in trivial
+      // operations.
       if (rhs == 0 && !std::signbit(rhs)) {
         return Literal(lhs);
       }
diff --git a/test/passes/fuzz-exec_O.wast b/test/passes/fuzz-exec_O.wast
index 7aac4b11478..32e2af25dc5 100644
--- a/test/passes/fuzz-exec_O.wast
+++ b/test/passes/fuzz-exec_O.wast
@@ -42,13 +42,19 @@
  (func "add1" (result f32)
   (f32.add
    (f32.const -nan:0x34546d)
-   (f32.const 0)
+   (f32.const -0)
   )
  )
  (func "add2" (result f32)
   (f32.add
-   (f32.const 0)
+   (f32.const -0)
+   (f32.const -nan:0x34546d)
+  )
+ )
+ (func "add3" (result f32)
+  (f32.add
    (f32.const -nan:0x34546d)
+   (f32.const 0)
   )
  )
  (func "sub" (result f32)
@@ -57,4 +63,10 @@
    (f32.const 0)
   )
  )
+ (func "sub" (result f32)
+  (f32.sub
+   (f32.const -nan:0x34546d)
+   (f32.const -0)
+  )
+ )
 )

From b6a9f3cbe9e7ed0e2a85ac699c2942c8becd8e37 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Fri, 18 Sep 2020 15:17:42 -0700
Subject: [PATCH 06/16] fix spec test

---
 test/spec/old_float_exprs.wast | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/spec/old_float_exprs.wast b/test/spec/old_float_exprs.wast
index ca031114f77..52fc7926e12 100644
--- a/test/spec/old_float_exprs.wast
+++ b/test/spec/old_float_exprs.wast
@@ -72,8 +72,10 @@
     (f64.sub (local.get $x) (f64.const 0.0)))
 )
 
-(assert_return (invoke "f32.no_fold_sub_zero" (f32.const nan:0x200000)) (f32.const nan:0x600000))
-(assert_return (invoke "f64.no_fold_sub_zero" (f64.const nan:0x4000000000000)) (f64.const nan:0xc000000000000))
+;; XXX BINARYEN: pick the same NaN pattern as the input here, to match the
+;; interpreter
+(assert_return (invoke "f32.no_fold_sub_zero" (f32.const nan:0x200000)) (f32.const nan:0x200000))
+(assert_return (invoke "f64.no_fold_sub_zero" (f64.const nan:0x4000000000000)) (f64.const nan:0x4000000000000))
 
 ;; Test that x*0.0 is not folded to 0.0.
 

From 9ec23eeb23083c40ae4aba77784087655c018255 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Fri, 18 Sep 2020 15:21:05 -0700
Subject: [PATCH 07/16] update

---
 test/passes/fuzz-exec_O.txt  | 44 +++++++++++++++++++++++++-----------
 test/passes/fuzz-exec_O.wast | 10 ++++++--
 2 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/test/passes/fuzz-exec_O.txt b/test/passes/fuzz-exec_O.txt
index f2bad9c74da..6bf8ba50bac 100644
--- a/test/passes/fuzz-exec_O.txt
+++ b/test/passes/fuzz-exec_O.txt
@@ -37,26 +37,35 @@
 [fuzz-exec] calling mul2
 [fuzz-exec] note result: mul2 => -nan:0x34546d
 [fuzz-exec] calling add1
-[fuzz-exec] note result: add1 => -nan:0x74546d
+[fuzz-exec] note result: add1 => -nan:0x34546d
 [fuzz-exec] calling add2
-[fuzz-exec] note result: add2 => -nan:0x74546d
-[fuzz-exec] calling sub
-[fuzz-exec] note result: sub => -nan:0x34546d
+[fuzz-exec] note result: add2 => -nan:0x34546d
+[fuzz-exec] calling add3
+[fuzz-exec] note result: add3 => -nan:0x74546d
+[fuzz-exec] calling add4
+[fuzz-exec] note result: add4 => -nan:0x74546d
+[fuzz-exec] calling sub1
+[fuzz-exec] note result: sub1 => -nan:0x34546d
+[fuzz-exec] calling sub2
+[fuzz-exec] note result: sub2 => -nan:0x74546d
 (module
  (type $none_=>_f32 (func (result f32)))
  (export "div" (func $0))
  (export "mul1" (func $1))
  (export "mul2" (func $1))
- (export "add1" (func $3))
- (export "add2" (func $3))
- (export "sub" (func $1))
+ (export "add1" (func $1))
+ (export "add2" (func $1))
+ (export "add3" (func $5))
+ (export "add4" (func $5))
+ (export "sub1" (func $1))
+ (export "sub2" (func $5))
  (func $0 (; has Stack IR ;) (result f32)
   (f32.const -nan:0x23017a)
  )
  (func $1 (; has Stack IR ;) (result f32)
   (f32.const -nan:0x34546d)
  )
- (func $3 (; has Stack IR ;) (result f32)
+ (func $5 (; has Stack IR ;) (result f32)
   (f32.const -nan:0x74546d)
  )
 )
@@ -67,14 +76,23 @@
 [fuzz-exec] calling mul2
 [fuzz-exec] note result: mul2 => -nan:0x34546d
 [fuzz-exec] calling add1
-[fuzz-exec] note result: add1 => -nan:0x74546d
+[fuzz-exec] note result: add1 => -nan:0x34546d
 [fuzz-exec] calling add2
-[fuzz-exec] note result: add2 => -nan:0x74546d
-[fuzz-exec] calling sub
-[fuzz-exec] note result: sub => -nan:0x34546d
+[fuzz-exec] note result: add2 => -nan:0x34546d
+[fuzz-exec] calling add3
+[fuzz-exec] note result: add3 => -nan:0x74546d
+[fuzz-exec] calling add4
+[fuzz-exec] note result: add4 => -nan:0x74546d
+[fuzz-exec] calling sub1
+[fuzz-exec] note result: sub1 => -nan:0x34546d
+[fuzz-exec] calling sub2
+[fuzz-exec] note result: sub2 => -nan:0x74546d
 [fuzz-exec] comparing add1
 [fuzz-exec] comparing add2
+[fuzz-exec] comparing add3
+[fuzz-exec] comparing add4
 [fuzz-exec] comparing div
 [fuzz-exec] comparing mul1
 [fuzz-exec] comparing mul2
-[fuzz-exec] comparing sub
+[fuzz-exec] comparing sub1
+[fuzz-exec] comparing sub2
diff --git a/test/passes/fuzz-exec_O.wast b/test/passes/fuzz-exec_O.wast
index 32e2af25dc5..0532fc8df08 100644
--- a/test/passes/fuzz-exec_O.wast
+++ b/test/passes/fuzz-exec_O.wast
@@ -57,13 +57,19 @@
    (f32.const 0)
   )
  )
- (func "sub" (result f32)
+ (func "add4" (result f32)
+  (f32.add
+   (f32.const 0)
+   (f32.const -nan:0x34546d)
+  )
+ )
+ (func "sub1" (result f32)
   (f32.sub
    (f32.const -nan:0x34546d)
    (f32.const 0)
   )
  )
- (func "sub" (result f32)
+ (func "sub2" (result f32)
   (f32.sub
    (f32.const -nan:0x34546d)
    (f32.const -0)

From 80270a1920ec604e7825bf71e02a07cf2c36ef42 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Mon, 21 Sep 2020 09:14:42 -0700
Subject: [PATCH 08/16] rework

---
 src/pass.h                          |  5 ++
 src/passes/OptimizeInstructions.cpp | 15 ++---
 src/wasm/literal.cpp                | 87 ++++-------------------------
 test/passes/fuzz-exec_O.wast        |  8 +--
 test/spec/old_float_exprs.wast      | 12 ++--
 5 files changed, 33 insertions(+), 94 deletions(-)

diff --git a/src/pass.h b/src/pass.h
index a3ee41d61fc..27e7ee37f96 100644
--- a/src/pass.h
+++ b/src/pass.h
@@ -102,6 +102,11 @@ struct PassOptions {
   // many cases.
   bool lowMemoryUnused = false;
   enum { LowMemoryBound = 1024 };
+  // Whether to allow "loose" math semantics, ignoring corner cases with NaNs
+  // and assuming math follows the algebraic rules for associativity and so
+  // forth (which IEEE floats do not, strictly speaking). This is inspired by
+  // gcc/clang's -ffast-math flag.
+  bool fastMath = false;
   // Whether to try to preserve debug info through, which are special calls.
   bool debugInfo = false;
   // Arbitrary string arguments from the commandline, which we forward to
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index 1c2445d5d73..beda5eb26bd 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -161,7 +161,10 @@ struct OptimizeInstructions
 #endif
   }
 
+  bool fastMath;
+
   void doWalkFunction(Function* func) {
+    fastMath = getPassOptions().fastMath;
     // first, scan locals
     {
       LocalScanner scanner(localInfo, getPassOptions());
@@ -1369,7 +1372,7 @@ struct OptimizeInstructions
     }
     {
       double value;
-      if (matches(curr, binary(Abstract::Sub, any(), fval(&value))) &&
+      if (fastMath && matches(curr, binary(Abstract::Sub, any(), fval(&value))) &&
           value == 0.0) {
         // x - (-0.0)   ==>   x + 0.0
         if (std::signbit(value)) {
@@ -1385,19 +1388,17 @@ struct OptimizeInstructions
     {
       // x + (-0.0)   ==>   x
       double value;
-      if (matches(curr, binary(Abstract::Add, any(), fval(&value))) &&
+      if (fastMath && matches(curr, binary(Abstract::Add, any(), fval(&value))) &&
           value == 0.0 && std::signbit(value)) {
         return curr->left;
       }
     }
-    // Note that this is correct even on floats with a NaN on the left,
-    // as a NaN would skip the computation and just return the NaN,
-    // and that is precisely what we do here. but, the same with -1
-    // (change to a negation) would be incorrect for that reason.
     if (matches(curr, binary(Abstract::Mul, any(&left), constant(1))) ||
         matches(curr, binary(Abstract::DivS, any(&left), constant(1))) ||
         matches(curr, binary(Abstract::DivU, any(&left), constant(1)))) {
-      return left;
+      if (!curr->type.isFloat() || fastMath) {
+        return left;
+      }
     }
     return nullptr;
   }
diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp
index 8d621794c57..dbed8f5cf52 100644
--- a/src/wasm/literal.cpp
+++ b/src/wasm/literal.cpp
@@ -775,35 +775,10 @@ Literal Literal::add(const Literal& other) const {
       return Literal(uint32_t(i32) + uint32_t(other.i32));
     case Type::i64:
       return Literal(uint64_t(i64) + uint64_t(other.i64));
-    case Type::f32: {
-      // Special-case addition of -0. nan + -0 can change nan bits per the
-      // wasm spec, but it is ok to just return that original nan, and we
-      // do that here so that we are consistent with the optimization of
-      // removing the + -0 and leaving just the nan. That is, if we just
-      // do a normal add and the CPU decides to change the bits, we'd
-      // give a different result on optimized code, which would look like
-      // it was a bad optimization. So out of all the valid results to
-      // return here, return the simplest one that is consistent with
-      // our optimization for the case of 1.
-      float lhs = getf32(), rhs = other.getf32();
-      if (lhs == 0 && std::signbit(lhs)) {
-        return Literal(rhs);
-      }
-      if (rhs == 0 && std::signbit(rhs)) {
-        return Literal(lhs);
-      }
-      return Literal(lhs + rhs);
-    }
-    case Type::f64: {
-      double lhs = getf64(), rhs = other.getf64();
-      if (lhs == 0 && std::signbit(lhs)) {
-        return Literal(rhs);
-      }
-      if (rhs == 0 && std::signbit(rhs)) {
-        return Literal(lhs);
-      }
-      return Literal(lhs + rhs);
-    }
+    case Type::f32:
+      return Literal(getf32() + other.getf32());
+    case Type::f64:
+      return Literal(getf64() + other.getf64());
     case Type::v128:
     case Type::funcref:
     case Type::externref:
@@ -824,22 +799,10 @@ Literal Literal::sub(const Literal& other) const {
       return Literal(uint32_t(i32) - uint32_t(other.i32));
     case Type::i64:
       return Literal(uint64_t(i64) - uint64_t(other.i64));
-    case Type::f32: {
-      float lhs = getf32(), rhs = other.getf32();
-      // As with addition, make sure to not change NaN bits in trivial
-      // operations.
-      if (rhs == 0 && !std::signbit(rhs)) {
-        return Literal(lhs);
-      }
-      return Literal(lhs - rhs);
-    }
-    case Type::f64: {
-      double lhs = getf64(), rhs = other.getf64();
-      if (rhs == 0 && !std::signbit(rhs)) {
-        return Literal(lhs);
-      }
-      return Literal(lhs - rhs);
-    }
+    case Type::f32:
+      return Literal(getf32() - other.getf32());
+    case Type::f64:
+      return Literal(getf64() - other.getf64());
     case Type::v128:
     case Type::funcref:
     case Type::externref:
@@ -931,28 +894,10 @@ Literal Literal::mul(const Literal& other) const {
       return Literal(uint32_t(i32) * uint32_t(other.i32));
     case Type::i64:
       return Literal(uint64_t(i64) * uint64_t(other.i64));
-    case Type::f32: {
-      float lhs = getf32(), rhs = other.getf32();
-      // As with addition, make sure to not change NaN bits in trivial
-      // operations.
-      if (rhs == 1) {
-        return Literal(lhs);
-      }
-      if (lhs == 1) {
-        return Literal(rhs);
-      }
-      return Literal(lhs * rhs);
-    }
-    case Type::f64: {
-      double lhs = getf64(), rhs = other.getf64();
-      if (rhs == 1) {
-        return Literal(lhs);
-      }
-      if (lhs == 1) {
-        return Literal(rhs);
-      }
-      return Literal(lhs * rhs);
-    }
+    case Type::f32:
+      return Literal(getf32() * other.getf32());
+    case Type::f64:
+      return Literal(getf64() * other.getf64());
     case Type::v128:
     case Type::funcref:
     case Type::externref:
@@ -992,11 +937,6 @@ Literal Literal::div(const Literal& other) const {
         case FP_INFINITE: // fallthrough
         case FP_NORMAL:   // fallthrough
         case FP_SUBNORMAL:
-          // As with addition, make sure to not change NaN bits in trivial
-          // operations.
-          if (rhs == 1) {
-            return Literal(lhs);
-          }
           return Literal(lhs / rhs);
         default:
           WASM_UNREACHABLE("invalid fp classification");
@@ -1025,9 +965,6 @@ Literal Literal::div(const Literal& other) const {
         case FP_INFINITE: // fallthrough
         case FP_NORMAL:   // fallthrough
         case FP_SUBNORMAL:
-          if (rhs == 1) {
-            return Literal(lhs);
-          }
           return Literal(lhs / rhs);
         default:
           WASM_UNREACHABLE("invalid fp classification");
diff --git a/test/passes/fuzz-exec_O.wast b/test/passes/fuzz-exec_O.wast
index 0532fc8df08..b34dc2e8f7a 100644
--- a/test/passes/fuzz-exec_O.wast
+++ b/test/passes/fuzz-exec_O.wast
@@ -22,10 +22,10 @@
 )
 (module
  (func "div" (result f32)
-  (f32.div                   ;; div by 1 can be removed, leaving this nan
-   (f32.const -nan:0x23017a) ;; as it is. wasm semantics allow nan bits to
-   (f32.const 1)             ;; change, but the interpreter should not do so,
-  )                          ;; so that it does not fail on that opt.
+  (f32.div
+   (f32.const -nan:0x23017a)
+   (f32.const 1)
+  )
  )
  (func "mul1" (result f32)
   (f32.mul
diff --git a/test/spec/old_float_exprs.wast b/test/spec/old_float_exprs.wast
index 52fc7926e12..f4b3098078a 100644
--- a/test/spec/old_float_exprs.wast
+++ b/test/spec/old_float_exprs.wast
@@ -105,10 +105,8 @@
     (f64.mul (local.get $x) (f64.const 1.0)))
 )
 
-;; XXX BINARYEN: disable this test, as we have testing for the more strict property
-;;               of not changing the bits at all in our interpreter
-;; (assert_return (invoke "f32.no_fold_mul_one" (f32.const nan:0x200000)) (f32.const nan:0x600000))
-;; (assert_return (invoke "f64.no_fold_mul_one" (f64.const nan:0x4000000000000)) (f64.const nan:0xc000000000000))
+(assert_return (invoke "f32.no_fold_mul_one" (f32.const nan:0x200000)) (f32.const nan:0x600000))
+(assert_return (invoke "f64.no_fold_mul_one" (f64.const nan:0x4000000000000)) (f64.const nan:0xc000000000000))
 
 ;; Test that 0.0/x is not folded to 0.0.
 
@@ -137,10 +135,8 @@
     (f64.div (local.get $x) (f64.const 1.0)))
 )
 
-;; XXX BINARYEN: disable this test, as we have testing for the more strict property
-;;               of not changing the bits at all in our interpreter
-;; (assert_return (invoke "f32.no_fold_div_one" (f32.const nan:0x200000)) (f32.const nan:arithmetic))
-;; (assert_return (invoke "f64.no_fold_div_one" (f64.const nan:0x4000000000000)) (f64.const nan:arithmetic))
+(assert_return (invoke "f32.no_fold_div_one" (f32.const nan:0x200000)) (f32.const nan:arithmetic))
+(assert_return (invoke "f64.no_fold_div_one" (f64.const nan:0x4000000000000)) (f64.const nan:arithmetic))
 
 ;; Test that x/-1.0 is not folded to -x.
 

From 26c1e06c2a62685b8ed0b3a38e1d815d9cc8c32b Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Mon, 21 Sep 2020 09:15:11 -0700
Subject: [PATCH 09/16] format

---
 src/passes/OptimizeInstructions.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index beda5eb26bd..6c43e8a268a 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -1372,7 +1372,8 @@ struct OptimizeInstructions
     }
     {
       double value;
-      if (fastMath && matches(curr, binary(Abstract::Sub, any(), fval(&value))) &&
+      if (fastMath &&
+          matches(curr, binary(Abstract::Sub, any(), fval(&value))) &&
           value == 0.0) {
         // x - (-0.0)   ==>   x + 0.0
         if (std::signbit(value)) {
@@ -1388,7 +1389,8 @@ struct OptimizeInstructions
     {
       // x + (-0.0)   ==>   x
       double value;
-      if (fastMath && matches(curr, binary(Abstract::Add, any(), fval(&value))) &&
+      if (fastMath &&
+          matches(curr, binary(Abstract::Add, any(), fval(&value))) &&
           value == 0.0 && std::signbit(value)) {
         return curr->left;
       }

From e7f5a9ac8cb2991b36ca1d497710b53d36953b7a Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Mon, 21 Sep 2020 09:23:47 -0700
Subject: [PATCH 10/16] fix

---
 src/tools/optimization-options.h |  7 +++++++
 test/spec/old_float_exprs.wast   | 10 ++++------
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/tools/optimization-options.h b/src/tools/optimization-options.h
index 72f478329a3..7c12f4d0e2b 100644
--- a/src/tools/optimization-options.h
+++ b/src/tools/optimization-options.h
@@ -187,6 +187,13 @@ struct OptimizationOptions : public ToolOptions {
            Options::Arguments::Zero,
            [this](Options*, const std::string&) {
              passOptions.lowMemoryUnused = true;
+           })
+      .add("--fast-math",
+           "-ffm",
+           "Optimize floats without handling corner cases of NaNs and rounding",
+           Options::Arguments::Zero,
+           [this](Options*, const std::string&) {
+             passOptions.fastMath = true;
            });
     // add passes in registry
     for (const auto& p : PassRegistry::get()->getRegisteredNames()) {
diff --git a/test/spec/old_float_exprs.wast b/test/spec/old_float_exprs.wast
index f4b3098078a..7900832b0a8 100644
--- a/test/spec/old_float_exprs.wast
+++ b/test/spec/old_float_exprs.wast
@@ -72,10 +72,8 @@
     (f64.sub (local.get $x) (f64.const 0.0)))
 )
 
-;; XXX BINARYEN: pick the same NaN pattern as the input here, to match the
-;; interpreter
-(assert_return (invoke "f32.no_fold_sub_zero" (f32.const nan:0x200000)) (f32.const nan:0x200000))
-(assert_return (invoke "f64.no_fold_sub_zero" (f64.const nan:0x4000000000000)) (f64.const nan:0x4000000000000))
+(assert_return (invoke "f32.no_fold_sub_zero" (f32.const nan:0x200000)) (f32.const nan:0x600000))
+(assert_return (invoke "f64.no_fold_sub_zero" (f64.const nan:0x4000000000000)) (f64.const nan:0xc000000000000))
 
 ;; Test that x*0.0 is not folded to 0.0.
 
@@ -135,8 +133,8 @@
     (f64.div (local.get $x) (f64.const 1.0)))
 )
 
-(assert_return (invoke "f32.no_fold_div_one" (f32.const nan:0x200000)) (f32.const nan:arithmetic))
-(assert_return (invoke "f64.no_fold_div_one" (f64.const nan:0x4000000000000)) (f64.const nan:arithmetic))
+(assert_return (invoke "f32.no_fold_div_one" (f32.const nan:0x200000)) (f32.const nan:0x600000))
+(assert_return (invoke "f64.no_fold_div_one" (f64.const nan:0x4000000000000)) (f64.const nan:0xc000000000000))
 
 ;; Test that x/-1.0 is not folded to -x.
 

From 2936b8ab3f2252775944311d6ace14a8b8b8ebda Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Mon, 21 Sep 2020 09:24:09 -0700
Subject: [PATCH 11/16] format

---
 src/tools/optimization-options.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/tools/optimization-options.h b/src/tools/optimization-options.h
index 7c12f4d0e2b..5b6a643e64d 100644
--- a/src/tools/optimization-options.h
+++ b/src/tools/optimization-options.h
@@ -188,13 +188,12 @@ struct OptimizationOptions : public ToolOptions {
            [this](Options*, const std::string&) {
              passOptions.lowMemoryUnused = true;
            })
-      .add("--fast-math",
-           "-ffm",
-           "Optimize floats without handling corner cases of NaNs and rounding",
-           Options::Arguments::Zero,
-           [this](Options*, const std::string&) {
-             passOptions.fastMath = true;
-           });
+      .add(
+        "--fast-math",
+        "-ffm",
+        "Optimize floats without handling corner cases of NaNs and rounding",
+        Options::Arguments::Zero,
+        [this](Options*, const std::string&) { passOptions.fastMath = true; });
     // add passes in registry
     for (const auto& p : PassRegistry::get()->getRegisteredNames()) {
       (*this).add(

From 301c14e2ede10994f0f98597f80b947e0a15cf81 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Mon, 21 Sep 2020 09:30:52 -0700
Subject: [PATCH 12/16] more

---
 test/passes/O_fast-math.txt                   | 21 +++++++
 test/passes/O_fast-math.wast                  | 57 +++++++++++++++++++
 test/passes/fuzz-exec_O.txt                   | 35 ++++++------
 .../optimize-instructions_all-features.txt    | 48 ++++++++++++----
 4 files changed, 130 insertions(+), 31 deletions(-)
 create mode 100644 test/passes/O_fast-math.txt
 create mode 100644 test/passes/O_fast-math.wast

diff --git a/test/passes/O_fast-math.txt b/test/passes/O_fast-math.txt
new file mode 100644
index 00000000000..1b454c68eae
--- /dev/null
+++ b/test/passes/O_fast-math.txt
@@ -0,0 +1,21 @@
+(module
+ (type $none_=>_f32 (func (result f32)))
+ (export "div" (func $0))
+ (export "mul1" (func $1))
+ (export "mul2" (func $2))
+ (export "add1" (func $1))
+ (export "add2" (func $2))
+ (export "add3" (func $2))
+ (export "add4" (func $2))
+ (export "sub1" (func $1))
+ (export "sub2" (func $2))
+ (func $0 (; has Stack IR ;) (result f32)
+  (f32.const -nan:0x23017a)
+ )
+ (func $1 (; has Stack IR ;) (result f32)
+  (f32.const -nan:0x34546d)
+ )
+ (func $2 (; has Stack IR ;) (result f32)
+  (f32.const -nan:0x74546d)
+ )
+)
diff --git a/test/passes/O_fast-math.wast b/test/passes/O_fast-math.wast
new file mode 100644
index 00000000000..2317f782d9b
--- /dev/null
+++ b/test/passes/O_fast-math.wast
@@ -0,0 +1,57 @@
+;; with fast-math we can optimize some of these patterns
+(module
+ (func "div" (result f32)
+  (f32.div
+   (f32.const -nan:0x23017a)
+   (f32.const 1)
+  )
+ )
+ (func "mul1" (result f32)
+  (f32.mul
+   (f32.const -nan:0x34546d)
+   (f32.const 1)
+  )
+ )
+ (func "mul2" (result f32)
+  (f32.mul
+   (f32.const 1)
+   (f32.const -nan:0x34546d)
+  )
+ )
+ (func "add1" (result f32)
+  (f32.add
+   (f32.const -nan:0x34546d)
+   (f32.const -0)
+  )
+ )
+ (func "add2" (result f32)
+  (f32.add
+   (f32.const -0)
+   (f32.const -nan:0x34546d)
+  )
+ )
+ (func "add3" (result f32)
+  (f32.add
+   (f32.const -nan:0x34546d)
+   (f32.const 0)
+  )
+ )
+ (func "add4" (result f32)
+  (f32.add
+   (f32.const 0)
+   (f32.const -nan:0x34546d)
+  )
+ )
+ (func "sub1" (result f32)
+  (f32.sub
+   (f32.const -nan:0x34546d)
+   (f32.const 0)
+  )
+ )
+ (func "sub2" (result f32)
+  (f32.sub
+   (f32.const -nan:0x34546d)
+   (f32.const -0)
+  )
+ )
+)
diff --git a/test/passes/fuzz-exec_O.txt b/test/passes/fuzz-exec_O.txt
index 6bf8ba50bac..f17b0465030 100644
--- a/test/passes/fuzz-exec_O.txt
+++ b/test/passes/fuzz-exec_O.txt
@@ -31,21 +31,21 @@
 [fuzz-exec] comparing func_0
 [fuzz-exec] comparing func_1
 [fuzz-exec] calling div
-[fuzz-exec] note result: div => -nan:0x23017a
+[fuzz-exec] note result: div => -nan:0x63017a
 [fuzz-exec] calling mul1
-[fuzz-exec] note result: mul1 => -nan:0x34546d
+[fuzz-exec] note result: mul1 => -nan:0x74546d
 [fuzz-exec] calling mul2
-[fuzz-exec] note result: mul2 => -nan:0x34546d
+[fuzz-exec] note result: mul2 => -nan:0x74546d
 [fuzz-exec] calling add1
-[fuzz-exec] note result: add1 => -nan:0x34546d
+[fuzz-exec] note result: add1 => -nan:0x74546d
 [fuzz-exec] calling add2
-[fuzz-exec] note result: add2 => -nan:0x34546d
+[fuzz-exec] note result: add2 => -nan:0x74546d
 [fuzz-exec] calling add3
 [fuzz-exec] note result: add3 => -nan:0x74546d
 [fuzz-exec] calling add4
 [fuzz-exec] note result: add4 => -nan:0x74546d
 [fuzz-exec] calling sub1
-[fuzz-exec] note result: sub1 => -nan:0x34546d
+[fuzz-exec] note result: sub1 => -nan:0x74546d
 [fuzz-exec] calling sub2
 [fuzz-exec] note result: sub2 => -nan:0x74546d
 (module
@@ -55,36 +55,33 @@
  (export "mul2" (func $1))
  (export "add1" (func $1))
  (export "add2" (func $1))
- (export "add3" (func $5))
- (export "add4" (func $5))
+ (export "add3" (func $1))
+ (export "add4" (func $1))
  (export "sub1" (func $1))
- (export "sub2" (func $5))
+ (export "sub2" (func $1))
  (func $0 (; has Stack IR ;) (result f32)
-  (f32.const -nan:0x23017a)
+  (f32.const -nan:0x63017a)
  )
  (func $1 (; has Stack IR ;) (result f32)
-  (f32.const -nan:0x34546d)
- )
- (func $5 (; has Stack IR ;) (result f32)
   (f32.const -nan:0x74546d)
  )
 )
 [fuzz-exec] calling div
-[fuzz-exec] note result: div => -nan:0x23017a
+[fuzz-exec] note result: div => -nan:0x63017a
 [fuzz-exec] calling mul1
-[fuzz-exec] note result: mul1 => -nan:0x34546d
+[fuzz-exec] note result: mul1 => -nan:0x74546d
 [fuzz-exec] calling mul2
-[fuzz-exec] note result: mul2 => -nan:0x34546d
+[fuzz-exec] note result: mul2 => -nan:0x74546d
 [fuzz-exec] calling add1
-[fuzz-exec] note result: add1 => -nan:0x34546d
+[fuzz-exec] note result: add1 => -nan:0x74546d
 [fuzz-exec] calling add2
-[fuzz-exec] note result: add2 => -nan:0x34546d
+[fuzz-exec] note result: add2 => -nan:0x74546d
 [fuzz-exec] calling add3
 [fuzz-exec] note result: add3 => -nan:0x74546d
 [fuzz-exec] calling add4
 [fuzz-exec] note result: add4 => -nan:0x74546d
 [fuzz-exec] calling sub1
-[fuzz-exec] note result: sub1 => -nan:0x34546d
+[fuzz-exec] note result: sub1 => -nan:0x74546d
 [fuzz-exec] calling sub2
 [fuzz-exec] note result: sub2 => -nan:0x74546d
 [fuzz-exec] comparing add1
diff --git a/test/passes/optimize-instructions_all-features.txt b/test/passes/optimize-instructions_all-features.txt
index 8c21b8494d6..648eb6f33a1 100644
--- a/test/passes/optimize-instructions_all-features.txt
+++ b/test/passes/optimize-instructions_all-features.txt
@@ -2811,10 +2811,16 @@
    (local.get $x64)
   )
   (drop
-   (local.get $y32)
+   (f32.mul
+    (local.get $y32)
+    (f32.const 1)
+   )
   )
   (drop
-   (local.get $y64)
+   (f64.mul
+    (local.get $y64)
+    (f64.const 1)
+   )
   )
   (drop
    (i32.const 0)
@@ -2847,10 +2853,16 @@
    (local.get $x64)
   )
   (drop
-   (local.get $y32)
+   (f32.div
+    (local.get $y32)
+    (f32.const 1)
+   )
   )
   (drop
-   (local.get $y64)
+   (f64.div
+    (local.get $y64)
+    (f64.const 1)
+   )
   )
   (drop
    (f32.div
@@ -3584,27 +3596,39 @@
  )
  (func $const-float-zero (param $fx f32) (param $fy f64)
   (drop
-   (local.get $fx)
+   (f32.sub
+    (local.get $fx)
+    (f32.const 0)
+   )
   )
   (drop
-   (local.get $fy)
+   (f64.sub
+    (local.get $fy)
+    (f64.const 0)
+   )
   )
   (drop
-   (local.get $fx)
+   (f32.add
+    (local.get $fx)
+    (f32.const -0)
+   )
   )
   (drop
-   (local.get $fy)
+   (f64.add
+    (local.get $fy)
+    (f64.const -0)
+   )
   )
   (drop
-   (f32.add
+   (f32.sub
     (local.get $fx)
-    (f32.const 0)
+    (f32.const -0)
    )
   )
   (drop
-   (f64.add
+   (f64.sub
     (local.get $fy)
-    (f64.const 0)
+    (f64.const -0)
    )
   )
   (drop

From e661403ac674f7c4d943776c2ac823d78bd9a7b2 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Mon, 21 Sep 2020 09:53:35 -0700
Subject: [PATCH 13/16] changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 590d756fdf9..63d887b6927 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,8 @@ full changeset diff at the end of each section.
 Current Trunk
 -------------
 
+- Add `--fast-math` mode. (#3155)
+
 v97
 ---
 

From 10a814aac12a1966ecc2d4503b374299b10801b7 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Wed, 30 Sep 2020 11:52:45 -0700
Subject: [PATCH 14/16] Update src/passes/OptimizeInstructions.cpp

Co-authored-by: Thomas Lively <7121787+tlively@users.noreply.github.com>
---
 src/passes/OptimizeInstructions.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index 6c43e8a268a..30ead48c331 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -1398,7 +1398,7 @@ struct OptimizeInstructions
     if (matches(curr, binary(Abstract::Mul, any(&left), constant(1))) ||
         matches(curr, binary(Abstract::DivS, any(&left), constant(1))) ||
         matches(curr, binary(Abstract::DivU, any(&left), constant(1)))) {
-      if (!curr->type.isFloat() || fastMath) {
+      if (curr->type.isInteger() || fastMath) {
         return left;
       }
     }

From c4cfcba1b7eb0f0d65a9d78117211d20574292bc Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Wed, 30 Sep 2020 11:55:38 -0700
Subject: [PATCH 15/16] another case

---
 src/passes/OptimizeInstructions.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index f1abdfaaabf..01af45bcdac 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -1413,7 +1413,7 @@ struct OptimizeInstructions
           curr->op = Abstract::getBinary(type, Abstract::Add);
           right->value = right->value.neg();
           return curr;
-        } else {
+        } else if (fastMath) {
           // x - 0.0   ==>   x
           return curr->left;
         }

From dfef85aacf554af98199441b34617d338ecc0633 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Wed, 30 Sep 2020 12:04:03 -0700
Subject: [PATCH 16/16] test

---
 test/passes/optimize-instructions_all-features.txt  | 6 ++++++
 test/passes/optimize-instructions_all-features.wast | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/test/passes/optimize-instructions_all-features.txt b/test/passes/optimize-instructions_all-features.txt
index d9531672ce4..ad09a502ff0 100644
--- a/test/passes/optimize-instructions_all-features.txt
+++ b/test/passes/optimize-instructions_all-features.txt
@@ -3738,6 +3738,12 @@
     (f64.const 0)
    )
   )
+  (drop
+   (f32.sub
+    (f32.const -nan:0x34546d)
+    (f32.const 0)
+   )
+  )
  )
  (func $rhs-is-neg-one (param $x i32) (param $y i64) (param $fx f32) (param $fy f64)
   (drop
diff --git a/test/passes/optimize-instructions_all-features.wast b/test/passes/optimize-instructions_all-features.wast
index 1afa3e5b535..a5649c48aa3 100644
--- a/test/passes/optimize-instructions_all-features.wast
+++ b/test/passes/optimize-instructions_all-features.wast
@@ -4233,6 +4233,10 @@
       (local.get $fy) ;; skip
       (f64.const 0)
     ))
+    (drop (f32.sub
+      (f32.const -nan:0x34546d) ;; skip
+      (f32.const 0)
+    ))
   )
   (func $rhs-is-neg-one (param $x i32) (param $y i64) (param $fx f32) (param $fy f64)
     (drop (i32.sub