coderive-lang · DanexCodr · Apr 17, 2026 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,30 @@
 
 All notable changes to Coderive are documented in this file.
 
+## [v0.9.2] - Why Slow? - April 17, 2026
+
+### 🔬 Lexer/Parser Throughput Baseline (New)
+- Added a dedicated cross-language lexer/parser throughput suite under `benchmarks/lexer_parser/`.
+- New runner: `bash benchmarks/lexer_parser/run_lexer_parser_benchmark.sh <runs> <iterations>`.
+- Current baseline (run: `5` medians, `1000` iterations):
+
+| Language | Median ms | Throughput MB/s |
+|----------|-----------|-----------------|
+| Coderive | 632       | 2.22            |
+| Java     | 111       | 12.61           |
+| Go       | 17        | 82.35           |
+| Kotlin   | 162       | 8.64            |
+| Python   | 270       | 5.19            |
+| Lua      | 228       | 6.14            |
+
+### 🧭 What This Suggests To Improve Next
+- **Parser construction/validation overhead is dominant** in Coderive relative to pure scanner+light-parser baselines.
+- **Tokenizer object churn remains significant** (token allocation and parser handoff pressure).
+- **Near-term focus**:
+  1. add a lexer-only throughput mode in Coderive benchmarks to isolate lexing cost from parse cost,
+  2. reduce parser backtracking/rewind-heavy paths in `MainParser` declaration probing,
+  3. introduce low-allocation token-stream views for parser hot paths.
+
 ## [v0.9.0] - Platform Snapshot - April 13, 2026
 
 ### 🔀 Merge Coverage for This Snapshot

diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -0,0 +1,53 @@
+# Cross-language benchmark suite
+
+This benchmark compares **Coderive** against:
+
+- Java
+- C
+- C++
+- Rust
+- Python
+
+## Workload
+
+Each implementation runs the same deterministic kernels:
+
+1. Sum of squares from `1..2,000,000`
+2. Iterative `fib(35)` repeated `2,000` times
+3. Naive prime counting in `2..5,000`
+
+Each program prints a single line:
+
+`CHECKSUM:<value>`
+
+Expected checksum:
+
+`2666668685121930669`
+
+## Run
+
+From repository root:
+
+```bash
+bash benchmarks/run_cross_language_benchmark.sh
+```
+
+Optional runs per language (median-based):
+
+```bash
+bash benchmarks/run_cross_language_benchmark.sh 5
+```
+
+The runner:
+
+- compiles required binaries
+- runs each language multiple times
+- verifies checksum consistency
+- prints median time in milliseconds
+- skips languages missing toolchains
+
+## Lexer/Parser throughput benchmark
+
+See:
+
+`benchmarks/lexer_parser/README.md`
diff --git a/benchmarks/c/cross_language_benchmark.c b/benchmarks/c/cross_language_benchmark.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdio.h>
+
+static int64_t fib(int n) {
+    if (n == 0) return 0;
+    int64_t a = 0;
+    int64_t b = 1;
+    for (int i = 1; i <= n; i++) {
+        int64_t next = a + b;
+        a = b;
+        b = next;
+    }
+    return a;
+}
+
+static int64_t sum_squares(int limit) {
+    int64_t total = 0;
+    for (int i = 1; i <= limit; i++) {
+        total += (int64_t)i * (int64_t)i;
+    }
+    return total;
+}
+
+static int64_t prime_count(int limit) {
+    int64_t count = 0;
+    for (int candidate = 2; candidate <= limit; candidate++) {
+        int is_prime = 1;
+        if (candidate > 2) {
+            for (int divisor = 2; divisor < candidate; divisor++) {
+                if (candidate % divisor == 0) {
+                    is_prime = 0;
+                    break;
+                }
+            }
+        }
+        if (is_prime) {
+            count++;
+        }
+    }
+    return count;
+}
+
+int main(void) {
+    int64_t sum_part = sum_squares(2000000);
+    int64_t fib_part = fib(35) * 2000;
+    int64_t prime_part = prime_count(5000);
+    int64_t checksum = sum_part + fib_part + prime_part;
+    printf("CHECKSUM:%lld\n", (long long)checksum);
+    return 0;
+}
diff --git a/benchmarks/coderive/CrossLanguageBenchmark.cod b/benchmarks/coderive/CrossLanguageBenchmark.cod
@@ -0,0 +1,57 @@
+unit benchmarks.coderive
+
+share CrossLanguageBenchmark {
+    share fib(n: int) :: value: int {
+        if n == 0 { ~> (value: 0) }
+        a: int = 0
+        b: int = 1
+        for i of 1 to n {
+            nextValue := a + b
+            a = b
+            b = nextValue
+        }
+        ~> (value: a)
+    }
+
+    share sumSquares(limit: int) :: value: int {
+        total: int = 0
+        for i of 1 to limit {
+            total = total + (i * i)
+        }
+        ~> (value: total)
+    }
+
+    share primeCount(limit: int) :: value: int {
+        count: int = 0
+        for candidate of 2 to limit {
+            isPrime: bool = true
+            if candidate > 2 {
+                for divisor of 2 to candidate - 1 {
+                    if candidate % divisor == 0 {
+                        isPrime = false
+                        break
+                    }
+                }
+            }
+            if isPrime {
+                count = count + 1
+            }
+        }
+        ~> (value: count)
+    }
+
+    share main() {
+        sumPart := CrossLanguageBenchmark.sumSquares(2M)
+
+        fibValue := CrossLanguageBenchmark.fib(35)
+        fibTotal: int = 0
+        for i of 1 to 2000 {
+            fibTotal = fibTotal + fibValue
+        }
+
+        primePart := CrossLanguageBenchmark.primeCount(5000)
+
+        checksum := sumPart + fibTotal + primePart
+        out("CHECKSUM:" + checksum)
+    }
+}
diff --git a/benchmarks/cpp/cross_language_benchmark.cpp b/benchmarks/cpp/cross_language_benchmark.cpp
@@ -0,0 +1,50 @@
+#include <cstdint>
+#include <iostream>
+
+static std::int64_t fib(int n) {
+    if (n == 0) return 0;
+    std::int64_t a = 0;
+    std::int64_t b = 1;
+    for (int i = 1; i <= n; ++i) {
+        std::int64_t next = a + b;
+        a = b;
+        b = next;
+    }
+    return a;
+}
+
+static std::int64_t sumSquares(int limit) {
+    std::int64_t total = 0;
+    for (int i = 1; i <= limit; ++i) {
+        total += static_cast<std::int64_t>(i) * static_cast<std::int64_t>(i);
+    }
+    return total;
+}
+
+static std::int64_t primeCount(int limit) {
+    std::int64_t count = 0;
+    for (int candidate = 2; candidate <= limit; ++candidate) {
+        bool isPrime = true;
+        if (candidate > 2) {
+            for (int divisor = 2; divisor < candidate; ++divisor) {
+                if (candidate % divisor == 0) {
+                    isPrime = false;
+                    break;
+                }
+            }
+        }
+        if (isPrime) {
+            ++count;
+        }
+    }
+    return count;
+}
+
+int main() {
+    std::int64_t sumPart = sumSquares(2000000);
+    std::int64_t fibPart = fib(35) * 2000;
+    std::int64_t primePart = primeCount(5000);
+    std::int64_t checksum = sumPart + fibPart + primePart;
+    std::cout << "CHECKSUM:" << checksum << '\n';
+    return 0;
+}
diff --git a/benchmarks/java/CrossLanguageBenchmark.java b/benchmarks/java/CrossLanguageBenchmark.java
@@ -0,0 +1,48 @@
+public final class CrossLanguageBenchmark {
+    private CrossLanguageBenchmark() {}
+
+    private static long fib(int n) {
+        if (n == 0) return 0L;
+        long a = 0L;
+        long b = 1L;
+        for (int i = 1; i <= n; i++) {
+            long next = a + b;
+            a = b;
+            b = next;
+        }
+        return a;
+    }
+
+    private static long sumSquares(int limit) {
+        long total = 0L;
+        for (int i = 1; i <= limit; i++) {
+            total += (long) i * (long) i;
+        }
+        return total;
+    }
+
+    private static long primeCount(int limit) {
+        long count = 0L;
+        for (int candidate = 2; candidate <= limit; candidate++) {
+            boolean isPrime = true;
+            if (candidate > 2) {
+                for (int divisor = 2; divisor < candidate; divisor++) {
+                    if (candidate % divisor == 0) {
+                        isPrime = false;
+                        break;
+                    }
+                }
+            }
+            if (isPrime) count++;
+        }
+        return count;
+    }
+
+    public static void main(String[] args) {
+        long sumPart = sumSquares(2_000_000);
+        long fibPart = fib(35) * 2_000L;
+        long primePart = primeCount(5_000);
+        long checksum = sumPart + fibPart + primePart;
+        System.out.println("CHECKSUM:" + checksum);
+    }
+}
diff --git a/benchmarks/lexer_parser/README.md b/benchmarks/lexer_parser/README.md
@@ -0,0 +1,32 @@
+# Lexer/Parser Throughput Benchmark
+
+This suite benchmarks lexer/parser throughput for:
+
+- Coderive (real `MainLexer` + `MainParser`)
+- Java
+- Go
+- Kotlin
+- Python
+- Lua
+
+## Run
+
+From repository root:
+
+```bash
+bash benchmarks/lexer_parser/run_lexer_parser_benchmark.sh
+```
+
+Optional:
+
+```bash
+bash benchmarks/lexer_parser/run_lexer_parser_benchmark.sh <runs> <iterations>
+```
+
+- `runs`: median sample count (default `3`)
+- `iterations`: full corpus passes per run (default `20`)
+
+Output columns:
+
+- **Median ms**: median wall time per language
+- **Throughput MB/s**: processed corpus bytes per second