From efb83b4c7971714fc0787daefff0f09681a8670a Mon Sep 17 00:00:00 2001 From: tannevaled Date: Thu, 25 Jun 2026 20:57:01 +0200 Subject: [PATCH] test: add TruffleRuby to the conformance + performance validation Add TruffleRuby as a third independent reference implementation alongside MRI and JRuby: - scripts/oracle.sh: now a 4-way differential oracle (rbgo vs MRI vs JRuby vs TruffleRuby). Factor the per-reference comparison into diff_one() so each reference (incl. the new TRUFFLE=truffleruby) is checked uniformly and a divergence names every implementation it disagrees with. Absent references are still skipped, not failed. - bench/run.sh: add JRuby and TruffleRuby columns to the performance table via a best_ref() helper that reports a time, 'n/a' (not installed) or 'diff' (output diverges from MRI). The harness previously timed only MRI(+YJIT). Both gracefully skip a reference that is not installed, so the validation runs with whichever implementations are present. Co-Authored-By: Claude Opus 4.8 --- bench/run.sh | 34 +++++++++++++++++-------- scripts/oracle.sh | 64 +++++++++++++++++++++++++++++------------------ 2 files changed, 64 insertions(+), 34 deletions(-) diff --git a/bench/run.sh b/bench/run.sh index 3142f7b..4e19396 100755 --- a/bench/run.sh +++ b/bench/run.sh @@ -1,18 +1,22 @@ #!/usr/bin/env bash # Differential performance harness: runs each bench/*.rb under rbgo (interpreter), -# rbgo+AOT (a native binary from `rbgo build`), MRI, and MRI+YJIT, takes the best -# of N runs (least noise), and prints a Markdown table of wall-clock times and -# ratios. Correctness (same stdout) is checked first; a program is skipped if any -# runtime disagrees with MRI. +# rbgo+AOT (a native binary from `rbgo build`), MRI, MRI+YJIT, JRuby and +# TruffleRuby, takes the best of N runs (least noise), and prints a Markdown table +# of wall-clock times and ratios. Correctness (same stdout as MRI) is checked +# first; a program is skipped if rbgo disagrees with MRI. JRuby and TruffleRuby +# are extra reference runtimes, shown as "n/a" when not installed and "diff" when +# their output diverges from MRI. # # Usage: bench/run.sh [runs] (default 5) # -# The AOT column needs the Go toolchain and a module checkout (so `rbgo build` -# can compile + link). Set AOT=0 to skip it (e.g. with only an installed rbgo). +# Env: RBGO (./rbgo), RUBY (ruby), JRUBY (jruby), TRUFFLE (truffleruby), AOT (1). +# The AOT column needs the Go toolchain + a module checkout; set AOT=0 to skip it. set -u RUNS="${1:-5}" RBGO="${RBGO:-./rbgo}" RUBY="${RUBY:-ruby}" +JRUBY="${JRUBY:-jruby}" +TRUFFLE="${TRUFFLE:-truffleruby}" AOT="${AOT:-1}" HERE="$(cd "$(dirname "$0")" && pwd)" TMP="$(mktemp -d)" @@ -27,16 +31,26 @@ best() { # best() prog args... → minimal real seconds over $RUNS echo "$b" } -printf '| Benchmark | rbgo | rbgo+AOT | MRI | MRI+YJIT | AOT/MRI | AOT/YJIT |\n' -printf '| --- | ---: | ---: | ---: | ---: | ---: | ---: |\n' +# best_ref times an optional reference runtime: "Ns" on success, "n/a" when the +# binary is absent, "diff" when its output diverges from MRI's. +best_ref() { # bin, prog, expected_output + command -v "$1" >/dev/null 2>&1 || { echo "n/a"; return; } + [ "$("$1" "$2" 2>/dev/null)" = "$3" ] || { echo "diff"; return; } + echo "$(best "$1" "$2")s" +} + +printf '| Benchmark | rbgo | rbgo+AOT | MRI | MRI+YJIT | JRuby | TruffleRuby | AOT/MRI | AOT/YJIT |\n' +printf '| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |\n' for f in "$HERE"/*.rb; do name=$(basename "$f" .rb) ro=$("$RBGO" "$f" 2>/dev/null); mo=$("$RUBY" "$f" 2>/dev/null) - [ "$ro" != "$mo" ] && { printf '| %s | (output differs, skipped) ||||||\n' "$name"; continue; } + [ "$ro" != "$mo" ] && { printf '| %s | (output differs, skipped) | | | | | | | |\n' "$name"; continue; } rb=$(best "$RBGO" "$f") mr=$(best "$RUBY" "$f") yj=$(best "$RUBY" --yjit "$f") + jr=$(best_ref "$JRUBY" "$f" "$mo") + tr=$(best_ref "$TRUFFLE" "$f" "$mo") # AOT: build a specialised native binary for this program, then time it. at="n/a"; am="—"; ay="—" @@ -56,5 +70,5 @@ for f in "$HERE"/*.rb; do fi fi - printf '| %s | %ss | %s | %ss | %ss | %s | %s |\n' "$name" "$rb" "$at" "$mr" "$yj" "$am" "$ay" + printf '| %s | %ss | %s | %ss | %ss | %s | %s | %s | %s |\n' "$name" "$rb" "$at" "$mr" "$yj" "$jr" "$tr" "$am" "$ay" done diff --git a/scripts/oracle.sh b/scripts/oracle.sh index 2974d9c..97e8365 100755 --- a/scripts/oracle.sh +++ b/scripts/oracle.sh @@ -1,10 +1,10 @@ #!/bin/sh -# oracle.sh — three-way differential oracle for go-embedded-ruby. +# oracle.sh — differential oracle for go-embedded-ruby. # -# Runs Ruby through rbgo, MRI (CRuby) and JRuby and reports whether they agree. -# MRI and JRuby are independent reference implementations of Ruby 4.0; rbgo is the -# pure-Go implementation under test, so a divergence from either is a conformance -# signal. Exits non-zero on any divergence. +# Runs Ruby through rbgo, MRI (CRuby), JRuby and TruffleRuby and reports whether +# they agree. MRI, JRuby and TruffleRuby are independent reference implementations +# of Ruby; rbgo is the pure-Go implementation under test, so a divergence from any +# reference is a conformance signal. Exits non-zero on any divergence. # # Usage: # scripts/oracle.sh -e 'p [1, 2, 3].sum' # one snippet @@ -13,14 +13,16 @@ # # per line (# = comment) # # Environment overrides: RBGO (default: /tmp/rbgo, else built from ./cmd/rbgo), -# MRI (default: ruby), JRUBY (default: jruby). A reference that is not installed -# is skipped with a note rather than failing the run. +# MRI (default: ruby), JRUBY (default: jruby), TRUFFLE (default: truffleruby). A +# reference that is not installed is skipped with a note rather than failing the +# run, so the oracle still works with whichever references are present. set -eu here=$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd) RBGO=${RBGO:-} MRI=${MRI:-ruby} JRUBY=${JRUBY:-jruby} +TRUFFLE=${TRUFFLE:-truffleruby} if [ -z "$RBGO" ]; then if [ -x /tmp/rbgo ]; then @@ -48,34 +50,47 @@ run() { # binary, args... "$bin" "$@" 2>&1 | tail -1 } -# eval_three / file_three fill rbgo_out, mri_out, jruby_out for a snippet / file. -eval_three() { +# eval_all / file_all fill rbgo_out, mri_out, jruby_out, truffle_out for a +# snippet / file. +eval_all() { rbgo_out=$("$RBGO" run -e "$1" 2>&1 | tail -1) mri_out=$(run "$MRI" -e "$1") jruby_out=$(run "$JRUBY" -e "$1") + truffle_out=$(run "$TRUFFLE" -e "$1") } -file_three() { +file_all() { rbgo_out=$("$RBGO" run "$1" 2>&1 | tail -1) mri_out=$(run "$MRI" "$1") jruby_out=$(run "$JRUBY" "$1") + truffle_out=$(run "$TRUFFLE" "$1") +} + +# diff_one compares rbgo against one reference, skipping it when it is absent. +# Appends the reference label to $st on a divergence. Args: ref-output, label. +diff_one() { # ref_out, label + case "$1" in + *'not installed'*) return ;; + esac + if [ "$rbgo_out" != "$1" ]; then + if [ "$st" = agree ]; then st="DIVERGE-from-$2"; else st="$st,$2"; fi + fi } # status_of echoes "agree" or a DIVERGE-* label, judged against whichever # references actually ran. status_of() { st=agree - case "$mri_out" in *'not installed'*) : ;; *) [ "$rbgo_out" = "$mri_out" ] || st=DIVERGE-from-mri ;; esac - case "$jruby_out" in - *'not installed'*) : ;; - *) [ "$rbgo_out" = "$jruby_out" ] || { [ "$st" = agree ] && st=DIVERGE-from-jruby || st="$st,jruby"; } ;; - esac + diff_one "$mri_out" mri + diff_one "$jruby_out" jruby + diff_one "$truffle_out" truffle echo "$st" } report_one() { # heading already printed by caller - printf 'rbgo : %s\n' "$rbgo_out" - printf 'mri : %s\n' "$mri_out" - printf 'jruby: %s\n' "$jruby_out" + printf 'rbgo : %s\n' "$rbgo_out" + printf 'mri : %s\n' "$mri_out" + printf 'jruby : %s\n' "$jruby_out" + printf 'truffle: %s\n' "$truffle_out" printf '=> %s\n' "$(status_of)" [ "$(status_of)" = agree ] } @@ -83,7 +98,7 @@ report_one() { # heading already printed by caller case "${1:-}" in -e) [ $# -ge 2 ] || usage - eval_three "$2" + eval_all "$2" report_one ;; -b | --batch) @@ -98,16 +113,17 @@ case "${1:-}" in while IFS= read -r line || [ -n "$line" ]; do case "$line" in '' | \#*) continue ;; esac # skip blank lines and comments total=$((total + 1)) - eval_three "$line" + eval_all "$line" st=$(status_of) if [ "$st" = agree ]; then printf 'ok %s\n' "$line" else diverged=$((diverged + 1)) printf 'DIFF %s\n' "$line" - printf ' rbgo : %s\n' "$rbgo_out" - printf ' mri : %s\n' "$mri_out" - printf ' jruby: %s\n' "$jruby_out" + printf ' rbgo : %s\n' "$rbgo_out" + printf ' mri : %s\n' "$mri_out" + printf ' jruby : %s\n' "$jruby_out" + printf ' truffle: %s\n' "$truffle_out" fi done <"$corpus" printf '\n%d/%d agree (%d diverge)\n' "$((total - diverged))" "$total" "$diverged" @@ -117,7 +133,7 @@ case "${1:-}" in usage ;; *) - file_three "$1" + file_all "$1" report_one ;; esac