From 35924d415e950ce2f0225d1a514a2f2cb950215b Mon Sep 17 00:00:00 2001
From: Christopher Wellons <wellons@nullprogram.com>
Date: Tue, 14 Feb 2023 14:02:12 -0500
Subject: [PATCH] Use Kahan summation for fsum and fsumf

This compensated summation gives a more precise result than the original
algorithm, as demonstrated through extra precision in the tests. It also
drops recursion and the extra branching.

I used double precision internally for fsumf since I assume its purpose
is to interface with single precision, not necessary constrain itself to
only using single precision. If needed, it can be trivially changed to
use single precision internally while still passing the tests.

In fsumf, naive summation using double precision is likely sufficient,
and more precise than single precision Kaham summation. In other words,
Kahan summation with double precision in fsumf may be overkill.

In the fsumf test, neither 10000.1 nor 1.1 can be represented exactly.
The former is rounded down and the latter is rounded up (by less) to the
nearest representable value. As a result, the most precise sum is just
shy of the "obvious" result.
---
 libc/tinymath/fsum.c           | 14 ++++++++++----
 libc/tinymath/fsumf.c          | 14 ++++++++++----
 test/libc/tinymath/fsum_test.c |  8 ++++----
 3 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/libc/tinymath/fsum.c b/libc/tinymath/fsum.c
index cdfdd228530..817c45b1e0e 100644
--- a/libc/tinymath/fsum.c
+++ b/libc/tinymath/fsum.c
@@ -24,8 +24,14 @@
  */
 double fsum(const double *p, size_t n) {
   size_t i;
-  double s;
-  if (n > 8) return fsum(p, n / 2) + fsum(p + n / 2, n - n / 2);
-  for (s = i = 0; i < n; ++i) s += p[i];
-  return s;
+  double err, sum, t, y;
+
+  sum = err = 0;
+  for (i = 0; i < n; ++i) {
+    y = p[i] - err;
+    t = sum + y;
+    err = (t - sum) - y;
+    sum = t;
+  }
+  return sum;
 }
diff --git a/libc/tinymath/fsumf.c b/libc/tinymath/fsumf.c
index c0c42ba7eb8..18359ffda51 100644
--- a/libc/tinymath/fsumf.c
+++ b/libc/tinymath/fsumf.c
@@ -23,9 +23,15 @@
  * Adds floats in array.
  */
 float fsumf(const float *p, size_t n) {
-  float s;
   size_t i;
-  if (n > 8) return fsumf(p, n / 2) + fsumf(p + n / 2, n - n / 2);
-  for (s = i = 0; i < n; ++i) s += p[i];
-  return s;
+  double err, sum, t, y;
+
+  sum = err = 0;
+  for (i = 0; i < n; ++i) {
+    y = p[i] - err;
+    t = sum + y;
+    err = (t - sum) - y;
+    sum = t;
+  }
+  return sum;
 }
diff --git a/test/libc/tinymath/fsum_test.c b/test/libc/tinymath/fsum_test.c
index e4772fc709c..65f1d1930c3 100644
--- a/test/libc/tinymath/fsum_test.c
+++ b/test/libc/tinymath/fsum_test.c
@@ -31,21 +31,21 @@ double D[N];
 void SetUp(void) {
   int i;
   for (i = 0; i < N / 2; ++i) {
-    D[i * 2 + 0] = 1000000000.1;
+    D[i * 2 + 0] = 10000000000.1;
     D[i * 2 + 1] = 1.1;
   }
   for (i = 0; i < N / 2; ++i) {
-    F[i * 2 + 0] = 1000.1;
+    F[i * 2 + 0] = 10000.1;
     F[i * 2 + 1] = 1.1;
   }
 }
 
 TEST(fsum, test) {
-  EXPECT_STREQ("500000000.6", _gc(xasprintf("%.15g", fsum(D, N) / N)));
+  EXPECT_STREQ("5000000000.6", _gc(xasprintf("%.16g", fsum(D, N) / N)));
 }
 
 TEST(fsumf, test) {
-  EXPECT_STREQ("500.6", _gc(xasprintf("%.7g", fsumf(F, N) / N)));
+  EXPECT_STREQ("5000.5996", _gc(xasprintf("%.8g", fsumf(F, N) / N)));
 }
 
 BENCH(fsum, bench) {