From b7d3441ca4cfbc93f0e063d6846e2dfe6a870624 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sun, 26 Apr 2026 09:18:11 +0200 Subject: [PATCH] fix(interpreter): defensive cleanup for my-hash/my-array on fallback The bytecode-interpreter opcodes SCOPE_EXIT_CLEANUP_HASH and SCOPE_EXIT_CLEANUP_ARRAY blindly cast their register slot to RuntimeHash / RuntimeArray. If a control-flow path skipped the my-hash / my-array initialisation (early `return`, `last`, `goto`, or a short-circuit guarding the declaration), the register could still hold a transient RuntimeScalar produced by an unrelated CREATE_LIST that recycled the same slot. The unconditional cast then threw class RuntimeScalar cannot be cast to class RuntimeHash class RuntimeScalar cannot be cast to class RuntimeArray at sub/scope exit, even when the user's logic completed normally. This only surfaces in the interpreter-fallback path (the JIT/JVM backend uses a different code generator), so it stayed hidden until something forced fallback. `use Moose;` is the canonical trigger: Sub::Exporter::Progressive::import contains `goto \&Exporter::import`, which forces JIT->interpreter fallback for that whole sub, and the sub also has lexical hashes/arrays. The scalar variant SCOPE_EXIT_CLEANUP already had the same defensive `instanceof` check (with a long explanatory comment); this commit mirrors that pattern for the hash and array variants and adds an extensive comment explaining the invariants, the trigger, and a minimal repro so future maintainers don't "clean up" the check. Adds src/test/resources/unit/interpreter_myhash_myarray_scope_exit.t which exercises every variant on the interpreter-fallback path (verified via JPERL_SHOW_FALLBACK=1). Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../backend/bytecode/BytecodeInterpreter.java | 104 ++++++++++++++- .../org/perlonjava/core/Configuration.java | 4 +- .../interpreter_myhash_myarray_scope_exit.t | 118 ++++++++++++++++++ 3 files changed, 220 insertions(+), 6 deletions(-) create mode 100644 src/test/resources/unit/interpreter_myhash_myarray_scope_exit.t diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java index 1e81c7be4..b5f0f8f2e 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java @@ -206,16 +206,112 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c } case Opcodes.SCOPE_EXIT_CLEANUP_HASH -> { - // Scope-exit cleanup for a my-hash register + // Scope-exit cleanup for a my-hash register. + // + // !!! DO NOT REMOVE THE `instanceof` CHECK BELOW !!! + // + // Reverting to a blind `(RuntimeHash) registers[reg]` + // cast WILL bring back the bug fixed by this code: + // a `ClassCastException: RuntimeScalar cannot be + // cast to RuntimeHash` thrown at scope/sub exit. + // + // Why a register slot for a `my %h` can hold a + // RuntimeScalar: + // + // 1. The JIT (JVM) backend can fall back to the + // bytecode interpreter for individual subs + // that use features it doesn't support — e.g. + // dynamic `goto $coderef` / `goto \&sub`. See + // SubroutineParser interpreter-fallback path + // and JPERL_SHOW_FALLBACK=1 for diagnostics. + // + // 2. The same register file is shared across + // every basic block in the sub. The compiler + // reuses (recycles) registers between + // independent statements, so a slot that the + // compiler later reserves for `my %h` may + // previously have been used as the + // destination of an unrelated CREATE_LIST, + // assignment or arithmetic op which leaves a + // RuntimeScalar in it. + // + // 3. Any control-flow path that *skips* the + // MY_HASH initialisation for that slot + // (e.g. an early `return`, `last`, `goto`, + // or a short-circuited `&&`/`||` guarding + // the `my %h = (...)` declaration) will + // leave the stale RuntimeScalar behind. + // + // 4. SCOPE_EXIT_CLEANUP_HASH runs + // unconditionally for every declared + // my-hash, regardless of whether step 3 was + // taken. With a blind cast this throws + // ClassCastException and unwinds out of the + // sub even when the user's logic completed + // normally. + // + // Why ignoring the slot is correct: + // + // The user can never have observed `%h` on a + // path that skipped its initialisation, so + // there is nothing user-visible to clean up. + // `MortalList.scopeExitCleanupHash` only has + // real work to do on actual RuntimeHash + // instances (releasing tied magic, decrementing + // tracked-element ref counts, etc.); a non-hash + // slot simply has no cleanup obligation. + // + // Minimal regression test (must keep passing): + // + // sub t { + // my %h = (a=>1); # SCOPE_EXIT_CLEANUP_HASH emitted + // print "ok\n"; + // return; # exits before goto -> JIT fallback + // my $f = sub {1}; + // goto $f; # forces interpreter fallback + // } + // t(); + // + // Originally surfaced by `use Moose;` -> + // Sub::Exporter::Progressive::import (uses + // `goto \&Exporter::import`). int reg = bytecode[pc++]; - MortalList.scopeExitCleanupHash((RuntimeHash) registers[reg]); + RuntimeBase slot = registers[reg]; + if (slot instanceof RuntimeHash rh) { + MortalList.scopeExitCleanupHash(rh); + } registers[reg] = null; } case Opcodes.SCOPE_EXIT_CLEANUP_ARRAY -> { - // Scope-exit cleanup for a my-array register + // Scope-exit cleanup for a my-array register. + // + // !!! DO NOT REMOVE THE `instanceof` CHECK BELOW !!! + // + // See the long comment on SCOPE_EXIT_CLEANUP_HASH + // above — the exact same reasoning applies here, + // just with RuntimeArray instead of RuntimeHash. + // Reverting to a blind `(RuntimeArray) registers[reg]` + // cast will reintroduce ClassCastException at + // sub/scope exit for any my-array whose + // initialisation was skipped by control flow on + // an interpreter-fallback sub. + // + // Minimal regression test (must keep passing): + // + // sub t { + // my @a = ('x'); # SCOPE_EXIT_CLEANUP_ARRAY emitted + // print "ok\n"; + // return; + // my $f = sub {1}; + // goto $f; # forces JIT->interpreter fallback + // } + // t(); int reg = bytecode[pc++]; - MortalList.scopeExitCleanupArray((RuntimeArray) registers[reg]); + RuntimeBase slot = registers[reg]; + if (slot instanceof RuntimeArray ra) { + MortalList.scopeExitCleanupArray(ra); + } registers[reg] = null; } diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index da6b8686c..5b8f11f02 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "0443cf987"; + public static final String gitCommitId = "0663a8089"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 25 2026 22:05:08"; + public static final String buildTimestamp = "Apr 26 2026 09:17:16"; // Prevent instantiation private Configuration() { diff --git a/src/test/resources/unit/interpreter_myhash_myarray_scope_exit.t b/src/test/resources/unit/interpreter_myhash_myarray_scope_exit.t new file mode 100644 index 000000000..7ad65ef1e --- /dev/null +++ b/src/test/resources/unit/interpreter_myhash_myarray_scope_exit.t @@ -0,0 +1,118 @@ +use strict; +use warnings; +print "1..6\n"; + +# Regression test for a bytecode-interpreter bug where SCOPE_EXIT_CLEANUP_HASH +# and SCOPE_EXIT_CLEANUP_ARRAY blindly cast their register slot to +# RuntimeHash / RuntimeArray. If a control-flow path skipped the +# my-hash / my-array initialisation (e.g. an early `return`, `last`, +# `goto`, or a short-circuit `&&`/`||` guarding the `my %h = (...)` / +# `my @a = (...)` declaration), the register could still hold a +# transient RuntimeScalar produced by an unrelated CREATE_LIST that +# recycled the same slot. The unconditional cast then threw +# "class RuntimeScalar cannot be cast to class RuntimeHash" +# "class RuntimeScalar cannot be cast to class RuntimeArray" +# at sub/scope exit, even though the user's logic completed normally. +# +# This test only triggers in the bytecode-interpreter backend (the +# JIT/JVM backend uses a different code path), so we have to coerce +# the offending sub onto the interpreter fallback path. The most +# reliable trigger today is `goto $coderef` (dynamic goto EXPR), which +# the JIT cannot currently emit and which forces the entire enclosing +# sub to be compiled to InterpretedCode and run by BytecodeInterpreter. +# +# Originally surfaced by `use Moose;`, which loads +# Sub::Exporter::Progressive::import — that sub uses +# `goto \&Exporter::import` and contains lexical hashes/arrays. +# Without this fix, every Moose-based test died at `use Moose;` with +# the ClassCastException above. +# +# !!! If this test starts failing, do NOT delete it. The fix lives in +# !!! BytecodeInterpreter.java around the SCOPE_EXIT_CLEANUP_HASH / +# !!! SCOPE_EXIT_CLEANUP_ARRAY opcodes (defensive instanceof checks). + +# --- Test 1: my-hash + early return + goto-fallback ------------------ +sub hash_then_return { + my %h = (a => 1, b => 2); + return "got=" . $h{a}; + # Unreachable code that forces the JIT to fall back to the + # bytecode interpreter for this whole sub: + my $f = sub { 1 }; + goto $f; +} +print "not " unless hash_then_return() eq 'got=1'; +print "ok 1 - my %h + early return survives interpreter fallback\n"; + +# --- Test 2: my-array + early return + goto-fallback ----------------- +sub array_then_return { + my @a = ('x', 'y', 'z'); + return "len=" . scalar(@a); + my $f = sub { 1 }; + goto $f; +} +print "not " unless array_then_return() eq 'len=3'; +print "ok 2 - my \@a + early return survives interpreter fallback\n"; + +# --- Test 3: both my-hash and my-array in the same sub --------------- +sub mixed { + my %h = (k => 'v'); + my @a = (10, 20); + return "h=" . $h{k} . ";a=" . $a[1]; + my $f = sub { 1 }; + goto $f; +} +print "not " unless mixed() eq 'h=v;a=20'; +print "ok 3 - mixed my %h and my \@a both cleaned up safely\n"; + +# --- Test 4: the real-world Moose-style trigger ---------------------- +# This mirrors the line in Sub::Exporter::Progressive::import that +# originally exposed the bug: a complex `for` loop that aliases $_ +# to multiple list elements, combined with a `goto \&...` later in +# the same sub. +sub progressive_like { + my @exports = ('foo', 'bar'); + my @defaults = (':all', 'baz'); + my %tags = (default => ['x', 'y'], other => ['-all', 'z']); + @{$_} = map { /\A[:-]all\z/ ? @exports : $_ } @{$_} + for \@defaults, values %tags; + return scalar(@defaults) . ',' . scalar(@{ $tags{other} }); + # Force interpreter fallback: + my $f = sub { 1 }; + goto $f; +} +print "not " unless progressive_like() eq '3,3'; +print "ok 4 - Sub::Exporter::Progressive-style for-loop pattern works\n"; + +# --- Test 5: many invocations, register-reuse stress ----------------- +# Call the sub repeatedly so any lingering register reuse across +# invocations is exercised. +my $ok = 1; +for my $i (1 .. 100) { + my $r = mixed(); + $ok = 0 unless defined $r && $r eq 'h=v;a=20'; +} +print "not " unless $ok; +print "ok 5 - 100 iterations without scope-exit ClassCastException\n"; + +# --- Test 6: short-circuit-skipped my-hash + interpreter fallback ---- +# Combine the short-circuit pattern (which my_short_circuit_scope_exit.t +# covers for scalars) with a my-hash on the interpreter path. +sub short_circuit_hash { + my $arg = shift; + if ( ref($arg) + and UNIVERSAL::isa($arg, 'HASH') + and defined( (my %copy = %$arg) + ? $copy{key} + : undef ) ) + { + return "k=" . $copy{key}; + } + return 'skipped'; + # Force interpreter fallback regardless of which branch ran: + my $f = sub { 1 }; + goto $f; +} +my $r1 = short_circuit_hash(undef); +my $r2 = short_circuit_hash({ key => 'v' }); +print "not " unless $r1 eq 'skipped' and $r2 eq 'k=v'; +print "ok 6 - short-circuit-skipped my %h cleaned up safely\n";