diff --git a/dev/bench/memory_benchmark.pl b/dev/bench/memory_benchmark.pl new file mode 100755 index 000000000..f559b3dd3 --- /dev/null +++ b/dev/bench/memory_benchmark.pl @@ -0,0 +1,198 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use Time::HiRes qw(time); +use Cwd qw(abs_path); +use File::Basename qw(dirname); +use File::Spec; + +# Find repo root +sub find_repo_root { + my $dir = abs_path(dirname($0)); + while (1) { + my $jperl = File::Spec->catfile($dir, 'jperl'); + my $git = File::Spec->catdir($dir, '.git'); + if (-f $jperl && -d $git) { + return $dir; + } + my $parent = abs_path(File::Spec->catdir($dir, File::Spec->updir())); + last if !defined($parent) || $parent eq $dir; + $dir = $parent; + } + die "Unable to locate repo root\n"; +} + +my $repo_root = find_repo_root(); +my $jperl = File::Spec->catfile($repo_root, 'jperl'); + +# Memory benchmark workloads with delta measurement +my @workloads = ( + { + name => "Array creation (1M elements)", + code_before => 'use Devel::Peek; print "READY\n"; my $line = ;', + code_create => 'my @arr = (1..1_000_000);', + code_after => 'my $sum = 0; $sum += $_ for @arr; print $sum, "\n";', + }, + { + name => "Hash creation (100K entries)", + code_before => 'print "READY\n"; my $line = ;', + code_create => 'my %hash; $hash{$_} = $_ * 2 for (1..100_000);', + code_after => 'my $sum = 0; $sum += $hash{$_} for keys %hash; print $sum, "\n";', + }, + { + name => "String operations (10K iterations)", + code_before => 'print "READY\n"; my $line = ;', + code_create => 'my $str = "x" x 1000; my $result = ""; for (1..10_000) { $result .= substr($str, 0, 10); }', + code_after => 'print length($result), "\n";', + }, + { + name => "Nested data structures", + code_before => 'print "READY\n"; my $line = ;', + code_create => 'my @data; for my $i (1..1000) { push @data, { id => $i, values => [1..$i] }; }', + code_after => 'my $sum = 0; for my $item (@data) { $sum += scalar(@{$item->{values}}); } print $sum, "\n";', + }, +); + +print "# Memory Usage Benchmark: perl vs jperl\n\n"; +print "Measuring peak memory usage (RSS) for various workloads.\n"; +print "Using /usr/bin/time to capture memory statistics.\n\n"; + +# Check if /usr/bin/time exists +if (!-x '/usr/bin/time') { + die "Error: /usr/bin/time not found. This script requires GNU time or BSD time.\n"; +} + +# Detect time format (GNU vs BSD) +my $time_format; +my $time_test = `/usr/bin/time -l echo test 2>&1`; +if ($time_test =~ /maximum resident set size/) { + # BSD time (macOS) + $time_format = 'bsd'; +} else { + # Try GNU time + $time_test = `/usr/bin/time -v echo test 2>&1`; + if ($time_test =~ /Maximum resident set size/) { + $time_format = 'gnu'; + } else { + die "Error: Unable to determine time format. Need GNU time or BSD time.\n"; + } +} + +print "Detected time format: $time_format\n\n"; + +sub get_memory_usage { + my ($interpreter, $code) = @_; + + # Write code to a temp file to avoid shell quoting issues + my $tmpfile = "/tmp/perlbench_$$.pl"; + my $timefile = "/tmp/perlbench_time_$$.txt"; + + open my $fh, '>', $tmpfile or die "Cannot write to $tmpfile: $!"; + print $fh $code; + close $fh; + + # Use shell to redirect time output to a file + my $cmd; + if ($time_format eq 'bsd') { + # BSD time: redirect stderr to file + $cmd = "/usr/bin/time -l $interpreter $tmpfile > /dev/null 2> $timefile"; + } else { + # GNU time + $cmd = "/usr/bin/time -v $interpreter $tmpfile > /dev/null 2> $timefile"; + } + + system($cmd); + + # Read the time output + open my $tfh, '<', $timefile or do { + unlink $tmpfile; + unlink $timefile; + return undef; + }; + my $output = do { local $/; <$tfh> }; + close $tfh; + + unlink $tmpfile; + unlink $timefile; + + if ($time_format eq 'bsd') { + # Match format: " 1228800 maximum resident set size" + if ($output =~ /^\s*(\d+)\s+maximum resident set size/m) { + # BSD reports in bytes + return int($1 / 1024); # Convert to KB + } + } else { + # GNU time + if ($output =~ /Maximum resident set size \(kbytes\): (\d+)/) { + return $1; + } + } + + return undef; +} + +sub format_memory { + my ($kb) = @_; + return "N/A" unless defined $kb; + + if ($kb < 1024) { + return sprintf("%d KB", $kb); + } elsif ($kb < 1024 * 1024) { + return sprintf("%.1f MB", $kb / 1024); + } else { + return sprintf("%.2f GB", $kb / (1024 * 1024)); + } +} + +sub format_ratio { + my ($perl_mem, $jperl_mem) = @_; + return "N/A" unless defined $perl_mem && defined $jperl_mem && $perl_mem > 0; + + my $ratio = $jperl_mem / $perl_mem; + return sprintf("%.2fx", $ratio); +} + +# Run benchmarks +my @results; + +for my $workload (@workloads) { + print "Running: $workload->{name}\n"; + + my $code = $workload->{code}; + + # Run with perl + my $perl_mem = get_memory_usage('perl', $code); + print " perl: " . format_memory($perl_mem) . "\n"; + + # Run with jperl + my $jperl_mem = get_memory_usage($jperl, $code); + print " jperl: " . format_memory($jperl_mem) . "\n"; + + my $ratio = format_ratio($perl_mem, $jperl_mem); + print " ratio: $ratio\n\n"; + + push @results, { + name => $workload->{name}, + perl_mem => $perl_mem, + jperl_mem => $jperl_mem, + ratio => $ratio, + }; +} + +# Print summary table +print "\n# Summary\n\n"; +print "| Workload | Perl 5 | PerlOnJava | Ratio (jperl/perl) |\n"; +print "|----------|--------|------------|--------------------|\n"; + +for my $result (@results) { + printf "| %-40s | %10s | %10s | %10s |\n", + $result->{name}, + format_memory($result->{perl_mem}), + format_memory($result->{jperl_mem}), + $result->{ratio}; +} + +print "\n"; +print "Note: Memory measurements are peak RSS (Resident Set Size).\n"; +print "JVM startup overhead is included in these measurements.\n"; +print "For long-running processes, the overhead becomes less significant.\n"; diff --git a/dev/bench/memory_delta_benchmark.pl b/dev/bench/memory_delta_benchmark.pl new file mode 100755 index 000000000..1143fec9e --- /dev/null +++ b/dev/bench/memory_delta_benchmark.pl @@ -0,0 +1,239 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use Cwd qw(abs_path); +use File::Basename qw(dirname); +use File::Spec; + +# Find repo root +sub find_repo_root { + my $dir = abs_path(dirname($0)); + while (1) { + my $jperl = File::Spec->catfile($dir, 'jperl'); + my $git = File::Spec->catdir($dir, '.git'); + if (-f $jperl && -d $git) { + return $dir; + } + my $parent = abs_path(File::Spec->catdir($dir, File::Spec->updir())); + last if !defined($parent) || $parent eq $dir; + $dir = $parent; + } + die "Unable to locate repo root\n"; +} + +my $repo_root = find_repo_root(); +my $jperl = File::Spec->catfile($repo_root, 'jperl'); + +# Check for --interpreter flag +my $use_interpreter = 0; +if (@ARGV && $ARGV[0] eq '--interpreter') { + $use_interpreter = 1; + $jperl .= ' --interpreter'; +} + +# Get current RSS memory in KB (works on macOS and Linux) +sub get_current_memory { + if ($^O eq 'darwin') { + # macOS: use ps + my $pid = $$; + my $output = `ps -o rss= -p $pid`; + chomp $output; + $output =~ s/^\s+//; + return $output; # Already in KB on macOS + } elsif ($^O eq 'linux') { + # Linux: read /proc/self/status + open my $fh, '<', '/proc/self/status' or return undef; + while (<$fh>) { + if (/^VmRSS:\s+(\d+)\s+kB/) { + close $fh; + return $1; + } + } + close $fh; + return undef; + } else { + return undef; + } +} + +sub format_memory { + my ($kb) = @_; + return "N/A" unless defined $kb; + + if ($kb < 1024) { + return sprintf("%d KB", $kb); + } elsif ($kb < 1024 * 1024) { + return sprintf("%.1f MB", $kb / 1024); + } else { + return sprintf("%.2f GB", $kb / (1024 * 1024)); + } +} + +# Memory benchmark workloads - sized so Perl 5 uses at least 100MB per test +my @workloads = ( + { + name => "Array creation (15M elements)", + code => q{ + my $mem_before = get_current_memory(); + my @arr = (1..15_000_000); + # Force memory measurement while array is still in scope + my $mem_after = get_current_memory(); + my $delta = $mem_after - $mem_before; + print "DELTA:$delta\n"; + # Use the array to prevent GC optimization + my $sum = 0; $sum += $_ for @arr; + print "RESULT:$sum\n"; + }, + }, + { + name => "Hash creation (2M entries)", + code => q{ + my $mem_before = get_current_memory(); + my %hash; $hash{$_} = $_ * 2 for (1..2_000_000); + # Force memory measurement while hash is still in scope + my $mem_after = get_current_memory(); + my $delta = $mem_after - $mem_before; + print "DELTA:$delta\n"; + # Use the hash to prevent GC optimization + my $sum = 0; $sum += $hash{$_} for keys %hash; + print "RESULT:$sum\n"; + }, + }, + { + name => "String buffer (100M chars)", + code => q{ + my $mem_before = get_current_memory(); + my $str = "x" x 100000; + my $result = ""; + for (1..1000) { $result .= $str; } + # Force memory measurement while strings are still in scope + my $mem_after = get_current_memory(); + my $delta = $mem_after - $mem_before; + print "DELTA:$delta\n"; + # Use the result to prevent GC optimization + print "RESULT:" . length($result) . "\n"; + }, + }, + { + name => "Nested data structures (30K objects)", + code => q{ + my $mem_before = get_current_memory(); + my @data; + for my $i (1..30_000) { + push @data, { id => $i, values => [1..100] }; + } + # Force memory measurement while data is still in scope + my $mem_after = get_current_memory(); + my $delta = $mem_after - $mem_before; + print "DELTA:$delta\n"; + # Use the data to prevent GC optimization + my $sum = 0; + for my $item (@data) { + $sum += scalar(@{$item->{values}}); + } + print "RESULT:$sum\n"; + }, + }, +); + +my $mode_name = $use_interpreter ? "jperl --interpreter" : "jperl (compiler)"; +print "# Memory Delta Benchmark: perl vs $mode_name\n\n"; +print "Measuring memory delta (before/after data creation) to exclude startup overhead.\n\n"; + +sub run_benchmark { + my ($interpreter, $code) = @_; + + # Create a script that includes the get_current_memory function + my $full_code = q{ +use strict; +use warnings; + +sub get_current_memory { + if ($^O eq 'darwin') { + my $pid = $$; + my $output = `ps -o rss= -p $pid`; + chomp $output; + $output =~ s/^\s+//; + return $output; + } elsif ($^O eq 'linux') { + open my $fh, '<', '/proc/self/status' or return undef; + while (<$fh>) { + if (/^VmRSS:\s+(\d+)\s+kB/) { + close $fh; + return $1; + } + } + close $fh; + return undef; + } else { + return undef; + } +} + +} . $code; + + my $tmpfile = "/tmp/perlbench_delta_$$.pl"; + open my $fh, '>', $tmpfile or die "Cannot write to $tmpfile: $!"; + print $fh $full_code; + close $fh; + + my $output = `$interpreter $tmpfile 2>&1`; + unlink $tmpfile; + + # Parse output + my ($delta, $result); + if ($output =~ /DELTA:(\d+)/) { + $delta = $1; + } + if ($output =~ /RESULT:(\d+)/) { + $result = $1; + } + + return ($delta, $result); +} + +# Run benchmarks +my @results; + +for my $workload (@workloads) { + print "Running: $workload->{name}\n"; + + # Run with perl + my ($perl_delta, $perl_result) = run_benchmark('perl', $workload->{code}); + print " perl: " . format_memory($perl_delta) . " (result: $perl_result)\n"; + + # Run with jperl + my ($jperl_delta, $jperl_result) = run_benchmark($jperl, $workload->{code}); + print " jperl: " . format_memory($jperl_delta) . " (result: $jperl_result)\n"; + + my $ratio = "N/A"; + if (defined $perl_delta && defined $jperl_delta && $perl_delta > 0) { + $ratio = sprintf("%.2fx", $jperl_delta / $perl_delta); + } + print " ratio: $ratio\n\n"; + + push @results, { + name => $workload->{name}, + perl_delta => $perl_delta, + jperl_delta => $jperl_delta, + ratio => $ratio, + }; +} + +# Print summary table +print "\n# Summary\n\n"; +print "| Workload | Perl 5 Delta | PerlOnJava Delta | Ratio (jperl/perl) |\n"; +print "|----------|--------------|------------------|--------------------|\n"; + +for my $result (@results) { + printf "| %-40s | %12s | %16s | %18s |\n", + $result->{name}, + format_memory($result->{perl_delta}), + format_memory($result->{jperl_delta}), + $result->{ratio}; +} + +print "\n"; +print "Note: Delta measurements show memory increase during data creation.\n"; +print "This excludes interpreter/JVM startup overhead.\n"; +print "Measures actual memory used by data structures.\n"; diff --git a/dev/interpreter/SKILL.md b/dev/interpreter/SKILL.md index 77cd2d038..95b7828b6 100644 --- a/dev/interpreter/SKILL.md +++ b/dev/interpreter/SKILL.md @@ -1,5 +1,7 @@ # PerlOnJava Interpreter Developer Guide +- name all test files /tmp/test.pl + ## Quick Reference **Performance:** 46.84M ops/sec (1.75x slower than compiler ✓) diff --git a/dev/presentations/German_Perl_Raku_Workshop_2026/Makefile b/dev/presentations/German_Perl_Raku_Workshop_2026/Makefile index 043cf4aa4..a3b77a76a 100644 --- a/dev/presentations/German_Perl_Raku_Workshop_2026/Makefile +++ b/dev/presentations/German_Perl_Raku_Workshop_2026/Makefile @@ -6,6 +6,7 @@ help: @echo "" @echo " make serve - Start web server and view presentation" @echo " make open - Just open browser (server must be running)" + @echo " make stats - Collect statistics and benchmarks for slide numbers" @echo " make pdf - Instructions for PDF export" @echo " make clean - Clean temporary files" @echo "" @@ -43,6 +44,10 @@ open: @command -v xdg-open >/dev/null 2>&1 && xdg-open http://localhost:8000 || true @echo "If browser didn't open, go to: http://localhost:8000" +# Collect statistics and benchmarks for slide numbers +stats: + @perl ./collect_slide_numbers.pl + # PDF export instructions pdf: @echo "To export to PDF:" diff --git a/dev/presentations/German_Perl_Raku_Workshop_2026/collect_slide_numbers.pl b/dev/presentations/German_Perl_Raku_Workshop_2026/collect_slide_numbers.pl index d368d7e3f..05b307470 100644 --- a/dev/presentations/German_Perl_Raku_Workshop_2026/collect_slide_numbers.pl +++ b/dev/presentations/German_Perl_Raku_Workshop_2026/collect_slide_numbers.pl @@ -6,7 +6,6 @@ use File::Basename qw(dirname); use File::Spec; use Getopt::Long qw(GetOptions); -use Time::HiRes qw(time); my %opt = ( stats => 1, @@ -15,8 +14,6 @@ eval_iterations => 1_000_000, eval_payload_len => 50, print_cmd => 0, - startup_runs => 30, - startup_warmup => 5, ); GetOptions( @@ -26,8 +23,6 @@ 'eval-iterations=i' => \$opt{eval_iterations}, 'eval-payload-len=i' => \$opt{eval_payload_len}, 'print-cmd!' => \$opt{print_cmd}, - 'startup-runs=i' => \$opt{startup_runs}, - 'startup-warmup=i' => \$opt{startup_warmup}, ) or die "Invalid options\n"; sub find_repo_root { @@ -160,49 +155,6 @@ sub bench_command_seconds { return 0 + $1; } -sub wall_time_cmd_seconds { - my (%args) = @_; - my $cmd = $args{cmd}; - my $env = $args{env} || {}; - - if ($opt{print_cmd}) { - if (%$env) { - my @pairs; - for my $k (sort keys %$env) { - push @pairs, $k . '=' . $env->{$k}; - } - print "CMD: " . join(' ', @pairs) . " $cmd\n"; - } else { - print "CMD: $cmd\n"; - } - } - - my $t0 = time(); - my ($exit, $out) = run_cmd(cmd => $cmd, env => $env); - my $t1 = time(); - die "Command failed (exit=$exit):\n$cmd\n$out\n" if $exit != 0; - return $t1 - $t0; -} - -sub mean { - my ($vals) = @_; - return undef if !$vals || !@$vals; - my $sum = 0; - $sum += $_ for @$vals; - return $sum / scalar(@$vals); -} - -sub median { - my ($vals) = @_; - return undef if !$vals || !@$vals; - my @s = sort { $a <=> $b } @$vals; - my $n = scalar(@s); - if ($n % 2) { - return $s[int($n / 2)]; - } - return ($s[$n/2 - 1] + $s[$n/2]) / 2; -} - sub print_markdown_table { my (%args) = @_; my $headers = $args{headers}; @@ -307,45 +259,4 @@ sub print_markdown_table { print "Notes:\n"; print "- Force eval STRING to use the interpreter backend via: JPERL_EVAL_USE_INTERPRETER=1 ./jperl ...\n"; print "- You can tune --eval-iterations and --iterations for runtime on slower machines.\n"; - - my $startup_runs = $opt{startup_runs}; - my $startup_warmup = $opt{startup_warmup}; - if (!defined $startup_runs || $startup_runs < 1) { - $startup_runs = 1; - } - if (!defined $startup_warmup || $startup_warmup < 0) { - $startup_warmup = 0; - } - - my $startup_perl = q{perl -e 'print "hello, World!\n"' > /dev/null}; - my $startup_jperl = $jperl . q{ -e 'print "hello, World!\n"' > /dev/null}; - - my @startup_perl_times; - my @startup_jperl_times; - - for (1 .. $startup_warmup) { - wall_time_cmd_seconds(cmd => $startup_perl); - wall_time_cmd_seconds(cmd => $startup_jperl); - } - for (1 .. $startup_runs) { - push @startup_perl_times, wall_time_cmd_seconds(cmd => $startup_perl); - push @startup_jperl_times, wall_time_cmd_seconds(cmd => $startup_jperl); - } - - my $startup_perl_median = median(\@startup_perl_times); - my $startup_jperl_median = median(\@startup_jperl_times); - - print "\n"; - print "# Startup benchmark (hello world, wall time)\n\n"; - print "Runs: $startup_runs (warmup: $startup_warmup)\n\n"; - - my $startup_vs_perl = format_vs_baseline(baseline => $startup_perl_median, candidate => $startup_jperl_median); - - print_markdown_table( - headers => ['Implementation', 'Median', 'Mean', 'vs Perl 5 (median)'], - rows => [ - ['Perl 5', format_seconds($startup_perl_median), format_seconds(mean(\@startup_perl_times)), 'baseline'], - ['PerlOnJava', format_seconds($startup_jperl_median), format_seconds(mean(\@startup_jperl_times)), $startup_vs_perl], - ], - ); } diff --git a/dev/prompts/compound-assignment-investigation.md b/dev/prompts/compound-assignment-investigation.md new file mode 100644 index 000000000..0713b9ad6 --- /dev/null +++ b/dev/prompts/compound-assignment-investigation.md @@ -0,0 +1,157 @@ +# Investigation: Compound Assignments in eval STRING + +## The Critical Bug Found and Fixed + +### Problem +Compound assignments (`+=`, `-=`, `.=`, `&=`, etc.) inside `eval STRING` were not modifying the outer variable: + +```perl +my $x = 10; +eval '$x += 5'; +print "$x\n"; # Printed 10, should print 15 +``` + +### Root Cause + +The interpreter's compound assignment opcodes were **replacing register references** instead of **modifying RuntimeScalar objects in place**. + +When eval STRING captures a parent variable: +1. EvalStringHandler captures the actual RuntimeScalar object from parent's register +2. Places it into child eval's register (e.g., register 3) +3. Both parent and child now have references to the SAME RuntimeScalar object +4. Modifications must happen **on the object**, not by **replacing the reference** + +### The Bug in BytecodeInterpreter.java + +**BEFORE (Broken)**: +```java +case Opcodes.ADD_ASSIGN: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = MathOperators.add( // ❌ REPLACES REFERENCE! + (RuntimeScalar) registers[rd], + (RuntimeScalar) registers[rs] + ); + break; +} +``` + +This creates a NEW RuntimeScalar and replaces `registers[rd]` with it. The parent's register still points to the OLD object, so it doesn't see the change. + +### How Compiler Does It (from --disassemble) + +``` +ALOAD 7 # Load $x +DUP # Duplicate reference +ALOAD 8 # Load value +INVOKESTATIC stringConcat # Call operator -> result +INVOKEVIRTUAL set # Call x.set(result) - modifies IN PLACE +POP # Discard return value +``` + +The key pattern: **DUP the reference, call operator, call set() on original reference**. + +### The Fix + +**AFTER (Fixed)**: +```java +case Opcodes.ADD_ASSIGN: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + MathOperators.addAssign( // ✓ Modifies in place! + (RuntimeScalar) registers[rd], + (RuntimeScalar) registers[rs] + ); + // Don't reassign registers[rd] - it's already modified + break; +} +``` + +`MathOperators.addAssign()` internally does: +1. Computes result: `result = add(arg1, arg2)` +2. **Modifies arg1 in place**: `arg1.set(result)` +3. Returns arg1 (same object) + +### Opcodes Fixed + +1. **ADD_ASSIGN**: Now uses `MathOperators.addAssign()` (modifies in place) +2. **STRING_CONCAT_ASSIGN**: Calls `stringConcat()` then `set()` on original +3. **BITWISE_AND_ASSIGN**: Calls bitwise op then `set()` on original +4. **BITWISE_OR_ASSIGN**: Calls bitwise op then `set()` on original +5. **BITWISE_XOR_ASSIGN**: Calls bitwise op then `set()` on original +6. **ADD_ASSIGN_INT**: Calls `add()` then `set()` on original + +**Already Correct** (were using *Assign methods): +- SUBTRACT_ASSIGN +- MULTIPLY_ASSIGN +- DIVIDE_ASSIGN +- MODULUS_ASSIGN + +### Testing Results + +**Before Fix**: +```perl +my $x = 10; eval '$x += 5'; print "$x\n"; # Output: 10 ❌ +my $y = 12; eval '$y &= 10'; print "$y\n"; # Output: 12 ❌ +my $z = "hi"; eval '$z .= "!"; print "$z\n"; # Output: hi ❌ +``` + +**After Fix**: +```perl +my $x = 10; eval '$x += 5'; print "$x\n"; # Output: 15 ✓ +my $y = 12; eval '$y &= 10'; print "$y\n"; # Output: 8 ✓ +my $z = "hi"; eval '$z .= "!"; print "$z\n"; # Output: hi! ✓ +``` + +## Why Tests Still Fail + +Despite this critical fix, op/bop.t and op/hashassign.t still show as "incomplete". Investigation shows: + +### op/bop.t Error +``` +Internal error: $expected &= $y failed: Unsupported operator: binary&= at (eval 272) line 1 +``` + +**Analysis**: The error mentions "binary&=" (not just "&="). This might be: +1. A stale error message (test caching the error from before the fix) +2. The parser creating a node with operator name "binary&=" in some contexts +3. A nested eval scenario we haven't covered + +**Action Needed**: Run individual failing test cases to see if the error is real or stale. + +### op/hashassign.t Error +``` +'@temp = ("\x{3c}" => undef)' gave at ... +``` + +**Analysis**: This is NOT a compound assignment issue. It's about hash/array assignment edge cases. + +### op/tr.t Error +``` +Unsupported operator: tr at (eval 151) line 1 +``` + +**Analysis**: The `tr` operator in eval STRING context is a separate issue (not related to compound assignments). + +## Commits + +**Commit f7fbea78**: "fix: Modify compound assignments in place for captured variables" +- Fixed all compound assignment opcodes in BytecodeInterpreter.java +- Added comprehensive investigation document + +## Expected Impact + +While the specific test files still show as incomplete (likely due to other issues), the compound assignment fix is **fundamental and correct**. It will enable: + +1. **Correct eval STRING behavior** for all compound assignments +2. **Variable capture** working as designed +3. **Compatibility with compiler mode** (both modes now handle captured variables identically) + +The remaining test failures are due to OTHER issues (tr operator, hash assignment edge cases, etc.), not compound assignments. + +## Next Steps + +1. **Verify the fix independently**: Create isolated test cases showing compound assignments work +2. **Investigate op/bop.t line 272**: Run that specific test case to see if error is real +3. **tr operator**: Separate investigation needed for tr in eval STRING +4. **op/hashassign.t**: Investigate the hash/array assignment issue (not compound assignment related) diff --git a/dev/prompts/interpreter-operator-plan.md b/dev/prompts/interpreter-operator-plan.md new file mode 100644 index 000000000..70186655d --- /dev/null +++ b/dev/prompts/interpreter-operator-plan.md @@ -0,0 +1,97 @@ +# Comprehensive Plan: Fix Remaining Interpreter Issues + +## Analysis Summary + +After analyzing all failing tests, I've identified that the issues are NOT primarily missing operators. The main problems are: + +### 1. **Compound Assignments in eval STRING Don't Work** (CRITICAL) +**Affects**: op/bop.t (-322 tests), op/hashassign.t (-257 tests) + +**Problem**: Compound assignments like `$x += 5`, `$x &= 10` inside eval STRING don't modify the outer variable. + +**Test showing the issue**: +```perl +my $x = 10; +eval '$x += 5'; +print "$x\n"; # Still prints 10, should print 15 +``` + +**Root Cause**: The `handleCompoundAssignment()` method in BytecodeCompiler only handles lexical variables (`hasVariable(varName)`), but variables captured from outer scope in eval STRING aren't in the local scope map. + +**Solution**: +1. Modify `handleCompoundAssignment()` to check if variable is captured (similar to how regular assignment handles it) +2. Add logic to emit compound assignment opcodes for captured variables +3. May need to use global variable path if not in local scope + +**Files to modify**: +- `src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java` (handleCompoundAssignment method ~line 966) + +### 2. **tr Operator in eval STRING** +**Affects**: op/tr.t (-187 tests) + +**Problem**: The tr operator is not recognized in eval STRING context. + +**Status**: tr works in normal code but reports "Unsupported operator: tr" in eval STRING. + +**Root Cause**: The tr operator might be parsed differently in eval STRING, or the BytecodeCompiler doesn't handle the `=~` operator with tr on the right side. + +**Solution**: Need to investigate how tr is represented in the AST and add handling for it. + +### 3. **Regex Engine Limitations** (NOT FIXABLE by adding operators) +**Affects**: Multiple re/*.t files (~3000+ tests total) + +These failures are due to regex features not implemented in the regex engine: +- Conditional patterns `(?(...)...)` +- Code blocks `(?{...})` +- Lookbehind >255 chars +- Various advanced regex features + +**These cannot be fixed by adding interpreter opcodes** - they require regex engine enhancements. + +### 4. **Other Test Issues** +- **op/stat_errors.t**: File I/O edge cases (not missing operators) +- **op/decl-refs.t**: "my list declaration requires identifier" - parser/compiler issue +- **uni/variables.t, uni/fold.t**: Likely unicode/regex edge cases + +## Implementation Priority + +### Phase 1: Fix Compound Assignments in eval STRING (HIGH IMPACT) +**Expected gain**: +500-600 tests (op/bop.t, op/hashassign.t) + +1. Analyze how regular assignment (`=`) handles captured variables in eval STRING +2. Apply same pattern to `handleCompoundAssignment()` method +3. Test with all compound operators: +=, -=, *=, /=, %=, .=, &=, |=, ^= + +### Phase 2: Fix tr Operator in eval STRING (MEDIUM IMPACT) +**Expected gain**: +100-150 tests (op/tr.t) + +1. Investigate how tr is parsed in eval STRING +2. Add handling in BinaryOperatorNode visitor for =~ with tr on right side +3. May need to emit TR opcode or handle it specially + +### Phase 3: Investigate Remaining op/ Test Failures (LOW IMPACT) +**Expected gain**: +50-100 tests + +- op/decl-refs.t: Fix list declaration issue +- op/stat_errors.t: May not need operator additions + +## Why Previous Approach Was Inefficient + +I was implementing operators one-by-one without analyzing the REAL bottlenecks: +- Added index/rindex: +351 tests ✓ (good) +- Added pos: +15 tests (minimal because regex engine limits) +- Added bitwise &=, |=, ^=: +0 tests (because compound assignments don't work in eval STRING anyway) + +## Expected Total Impact After Phase 1+2 + +- **Current**: ~9,000 failing tests across 16 files +- **After Phase 1**: ~8,400 failing tests (-600) +- **After Phase 2**: ~8,200 failing tests (-200) +- **Remaining**: ~8,200 tests (mostly regex engine limitations) + +## Next Steps + +1. **Implement Phase 1**: Fix compound assignments in eval STRING +2. **Implement Phase 2**: Fix tr operator +3. **Re-run all tests** to verify impact +4. **Report findings**: Document that remaining failures are mostly regex engine limitations diff --git a/dev/prompts/interpreter_remaining_failures_investigation.md b/dev/prompts/interpreter_remaining_failures_investigation.md new file mode 100644 index 000000000..bcb332a1e --- /dev/null +++ b/dev/prompts/interpreter_remaining_failures_investigation.md @@ -0,0 +1,194 @@ +# Investigation: Interpreter Mode Remaining Test Failures + +## Current Status +- **Compiler mode**: 153/173 tests passing (88.5%) +- **Interpreter mode**: 147/173 tests passing (85.0%) +- **Gap**: 6 tests to reach compiler parity + +## Test Failures Summary + +### Group 1: Self-Recursive Eval with Lexical Variables (5 tests) +**Failing Tests**: 34, 37, 38, 59, 63 + +**Root Cause**: Critical bug in lexical variable capture when eval STRING contains: +1. A print statement with string interpolation referencing a lexical variable (e.g., `print "# level $l\n"`) +2. A recursive call to the same function passing that lexical variable (e.g., `recurse($l)`) + +**Symptom**: +```perl +sub recurse { + my $l = shift; + eval 'print "# level $l\n"; recurse($l);'; +} +``` + +Compiler output: `# level 42` +Interpreter output: `# level main::STDOUT` + +The variable `$l` is being incorrectly resolved to "main::STDOUT" in the interpreter. + +**Investigation Results**: +- Simple cases work fine: `eval 'print "l=$l\n"'` ✓ +- Non-recursive cases work: `eval 'print "l=$l\n"; other_function($l);'` ✓ +- Recursive cases WITHOUT print work: `eval 'recurse($l+1);'` ✓ +- **FAILS**: `eval 'print "# level $l\n"; recurse($l);'` ✗ + +**Technical Analysis**: +The issue occurs during compilation of the eval STRING in interpreter mode. When both: +1. String interpolation uses `$l` +2. Function call argument uses `$l` +3. Function called is the currently executing function (self-recursion) + +The variable lookup mechanism in BytecodeCompiler incorrectly resolves `$l`, possibly due to: +- Confusion between lexical scope variable and filehandle in print context +- Incorrect captured variable registry when the same eval STRING is compiled multiple times with different call frames +- Variable name resolution falling back to global lookup instead of captured lexicals + +**Location**: +- `RuntimeCode.evalStringWithInterpreter()` - lines 754-796 (adjustedRegistry building) +- `BytecodeCompiler` variable resolution for captured variables + +### Group 2: Strict Subs Error Not Setting $@ (1 test) +**Failing Test**: 168 + +**Test Code**: +```perl +use strict; use warnings; +$SIG{__DIE__} = sub { die "X" }; +eval { eval "bar"; print "after eval $@"; }; +if ($@) { print "outer eval $@" } +``` + +**Expected**: `after eval X at - line 1.` +**Got (interpreter)**: `after eval ` (empty $@) + +**Root Cause**: The interpreter's `evalStringWithInterpreter()` is not catching and setting $@ for "Bareword not allowed while strict subs in use" errors. + +**Investigation Results**: +- Syntax errors (e.g., `eval "1+;"`) properly set $@ ✓ +- Bareword strict subs errors do NOT set $@ in interpreter ✗ +- Compiler mode correctly sets $@ for bareword errors ✓ + +**Technical Analysis**: +The error handling in `RuntimeCode.evalStringWithInterpreter()` (lines 798-843) catches compilation exceptions and sets $@. However, strict subs violations may be: +1. Caught by a different exception path that doesn't set $@ +2. Not being thrown as exceptions during BytecodeCompiler.compile() +3. Being silently ignored somewhere in the compilation pipeline + +**Location**: +- `RuntimeCode.evalStringWithInterpreter()` - error handling block (lines 798-843) +- `BytecodeCompiler.compile()` - strict subs enforcement + +### Group 3: Line Number Tracking in Eval (1 test) +**Failing Test**: 136 + +**Test Code**: +```perl +eval "\${\nfoobar\n} = 10; warn q{should be line 3}"; +# Expected: "should be line 3 at (eval 1) line 3.\n" +# Got: undef +``` + +**Root Cause**: Line number tracking is not properly maintained when compiling multi-line eval STRING in interpreter mode. + +**Technical Analysis**: +The interpreter needs to: +1. Track source line numbers during parsing of eval STRING +2. Map bytecode positions to source lines +3. Report correct line numbers in error messages + +This likely requires enhancing `BytecodeCompiler` to store line number mapping information that can be used by error reporting. + +**Location**: +- `BytecodeCompiler` - line number tracking during compilation +- `InterpretedCode.pcToTokenIndex` mapping +- Error message generation in BytecodeInterpreter + +## Fixes Completed So Far + +### Fix 1: Context Propagation (commit 4a4fa943) +- Fixed BytecodeCompiler to preserve context for last statement in blocks +- Only non-last statements use VOID context +- **Tests fixed**: 107-108 + +### Fix 2: Post-Increment/Decrement (commit 1248a54b) +- Removed incorrect STORE_GLOBAL_SCALAR after POST_AUTO* opcodes +- Fixed BytecodeInterpreter to store return values from post-increment/decrement +- **Tests fixed**: 12-13 + +### Fix 3: Local Variable Support (commit 93ce1df6) +- Added dynamic variable restoration in evalStringWithInterpreter +- Implemented local($var)=value assignment pattern support +- **Tests fixed**: 13 (additional improvement) + +## Recommended Fix Priority + +### Priority 1: Test 168 (Strict Subs Error Handling) - QUICKEST WIN +**Estimated Effort**: Low +**Impact**: 1 test + +**Approach**: +1. Debug why bareword strict subs errors aren't being caught +2. Ensure all compilation errors properly set $@ in evalStringWithInterpreter +3. Add specific handling for PerlCompilerException from strict violations + +**Files to Modify**: +- `RuntimeCode.evalStringWithInterpreter()` - error handling + +### Priority 2: Test 136 (Line Number Tracking) - MEDIUM EFFORT +**Estimated Effort**: Medium +**Impact**: 1 test + +**Approach**: +1. Enhance BytecodeCompiler to properly track source line numbers +2. Store line-to-bytecode mapping in InterpretedCode +3. Use mapping in error reporting + +**Files to Modify**: +- `BytecodeCompiler` - add line tracking +- `InterpretedCode` - enhance pcToTokenIndex +- Error message generation code + +### Priority 3: Tests 34, 37, 38, 59, 63 (Self-Recursive Eval) - COMPLEX +**Estimated Effort**: High +**Impact**: 5 tests (would exceed compiler parity) + +**Approach** (complex - requires deep debugging): +1. Add extensive logging to variable capture mechanism +2. Debug adjustedRegistry building in evalStringWithInterpreter +3. Investigate why print + recursive call causes variable confusion +4. Possibly requires architectural change to how variables are captured per eval execution + +**Files to Modify**: +- `RuntimeCode.evalStringWithInterpreter()` - variable capture (lines 762-796) +- `BytecodeCompiler` - variable resolution for captured variables +- Possibly: eval STRING compilation caching mechanism + +## Path to Compiler Parity (153 tests) + +To reach 153 tests passing (compiler parity), we need to fix **6 more tests**. + +**Recommended Strategy**: +1. Fix Test 168 (strict subs $@ setting) - **+1 test** → 148/173 +2. Fix Test 136 (line numbers) - **+1 test** → 149/173 +3. Investigate and fix self-recursive eval bug for remaining tests + +**Alternative Strategy**: +Since the self-recursive eval bug is complex, we could: +1. Fix simpler issues in other test categories (tests 45-46, 95-97, 99-102, 121-122, 125-126, 130-131, 146-151) +2. Look for quick wins in those 20 other failing tests +3. Pick the 4 easiest to reach 153 total + +## Next Steps + +1. **Immediate**: Fix test 168 by ensuring strict subs errors set $@ properly +2. **Short-term**: Fix test 136 line number tracking +3. **Medium-term**: Debug self-recursive eval variable capture or find 4 other easy wins +4. **Goal**: Reach 153 tests passing to achieve compiler parity and enable PR merge + +## Notes + +- The self-recursive eval bug is a fundamental issue with how the interpreter captures lexical variables in recursive eval contexts +- Fixing it may require significant refactoring of the variable capture mechanism +- Consider whether reaching parity via other simpler test fixes is more pragmatic +- All fixes should maintain existing passing tests (regression prevention) diff --git a/src/main/java/org/perlonjava/codegen/EmitEval.java b/src/main/java/org/perlonjava/codegen/EmitEval.java index 078b6f517..13daa0ecc 100644 --- a/src/main/java/org/perlonjava/codegen/EmitEval.java +++ b/src/main/java/org/perlonjava/codegen/EmitEval.java @@ -562,7 +562,15 @@ private static void emitEvalInterpreterPath(EmitterVisitor emitterVisitor, Strin // Stack: [RuntimeScalar(String), String, Object[], RuntimeArray(@_)] // Push the calling context (scalar, list, or void) - emitterVisitor.pushCallContext(); + // For eval, use the context determined by how the eval result is used + // This matches the compiler path which uses a compile-time constant + if (emitterVisitor.ctx.contextType == RuntimeContextType.RUNTIME) { + // If context is RUNTIME, load it from wantarray variable + mv.visitVarInsn(Opcodes.ILOAD, emitterVisitor.ctx.symbolTable.getVariableIndex("wantarray")); + } else { + // Otherwise use the compile-time constant (LIST/SCALAR/VOID) + mv.visitLdcInsn(emitterVisitor.ctx.contextType); + } // Stack: [RuntimeScalar(String), String, Object[], RuntimeArray(@_), int] // Call evalStringWithInterpreter which returns RuntimeList directly diff --git a/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java b/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java index 0931bbc40..b09570038 100644 --- a/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java @@ -36,13 +36,14 @@ public class BytecodeCompiler implements Visitor { // Each scope is a Map mapping variable names to register indices private final Stack> variableScopes = new Stack<>(); + // Symbol table for package/class tracking + // Tracks current package, class flag, and package versions like the compiler does + private final ScopedSymbolTable symbolTable = new ScopedSymbolTable(); + // Stack to save/restore register state when entering/exiting scopes private final Stack savedNextRegister = new Stack<>(); private final Stack savedBaseRegister = new Stack<>(); - // Track current package name (for global variables) - private String currentPackage = "main"; - // Token index tracking for error reporting private final TreeMap pcToTokenIndex = new TreeMap<>(); private int currentTokenIndex = -1; // Track current token for error reporting @@ -50,6 +51,9 @@ public class BytecodeCompiler implements Visitor { // Error reporting private final ErrorMessageUtil errorUtil; + // EmitterContext for strict checks and other compile-time options + private EmitterContext emitterContext; + // Register allocation private int nextRegister = 3; // 0=this, 1=@_, 2=wantarray private int baseRegisterForStatement = 3; // Reset point after each statement @@ -66,6 +70,14 @@ public class BytecodeCompiler implements Visitor { private String[] capturedVarNames; // Parallel array of names private Map capturedVarIndices; // Name → register index + // Track ALL variables ever declared (for variableRegistry) + // This is needed because inner scopes get popped before variableRegistry is built + private final Map allDeclaredVariables = new HashMap<>(); + + // BEGIN support for named subroutine closures + private int currentSubroutineBeginId = 0; // BEGIN ID for current named subroutine (0 = not in named sub) + private Set currentSubroutineClosureVars = new HashSet<>(); // Variables captured from outer scope + // Source information private final String sourceName; private final int sourceLine; @@ -175,6 +187,7 @@ private int getVariableRegister(String name) { private int addVariable(String name, String declType) { int reg = allocateRegister(); variableScopes.peek().put(name, reg); + allDeclaredVariables.put(name, reg); // Track for variableRegistry return reg; } @@ -211,9 +224,10 @@ private void exitScope() { /** * Helper: Get current package name for global variable resolution. + * Uses symbolTable for proper package/class tracking. */ private String getCurrentPackage() { - return currentPackage; + return symbolTable.getCurrentPackage(); } /** @@ -270,13 +284,22 @@ public InterpretedCode compile(Node node) { * @return InterpretedCode ready for execution */ public InterpretedCode compile(Node node, EmitterContext ctx) { + // Store context for strict checks and other compile-time options + this.emitterContext = ctx; + // Detect closure variables if context is provided if (ctx != null) { detectClosureVariables(node, ctx); + // Use the calling context from EmitterContext for top-level expressions + // This is crucial for eval STRING to propagate context correctly + currentCallContext = ctx.contextType; } // If we have captured variables, allocate registers for them - if (capturedVars != null && capturedVars.length > 0) { + // BUT: If we were constructed with a parentRegistry (for eval STRING), + // nextRegister is already correctly set by the constructor. + // Only reset it if we have runtime capturedVars but no parentRegistry. + if (capturedVars != null && capturedVars.length > 0 && capturedVarIndices == null) { // Registers 0-2 are reserved (this, @_, wantarray) // Registers 3+ are captured variables nextRegister = 3 + capturedVars.length; @@ -285,13 +308,39 @@ public InterpretedCode compile(Node node, EmitterContext ctx) { // Visit the node to generate bytecode node.accept(this); - // Emit RETURN with last result register (or register 0 for empty) + // Convert result to scalar context if needed (for eval STRING) + if (currentCallContext == RuntimeContextType.SCALAR && lastResultReg >= 0) { + RuntimeBase lastResult = null; // Can't access at compile time + // Use ARRAY_SIZE to convert arrays/lists to scalar count + int scalarReg = allocateRegister(); + emit(Opcodes.ARRAY_SIZE); + emitReg(scalarReg); + emitReg(lastResultReg); + lastResultReg = scalarReg; + } + + // Emit RETURN with last result register + // If no result was produced, return undef instead of register 0 ("this") + int returnReg; + if (lastResultReg >= 0) { + returnReg = lastResultReg; + } else { + // No result - allocate register for undef + returnReg = allocateRegister(); + emit(Opcodes.LOAD_UNDEF); + emitReg(returnReg); + } + emit(Opcodes.RETURN); - emit(lastResultReg >= 0 ? lastResultReg : 0); + emitReg(returnReg); // Build variable registry for eval STRING support - // This maps variable names to their register indices for variable capture + // Use allDeclaredVariables which tracks ALL variables ever declared, + // not variableScopes which loses variables when scopes are popped Map variableRegistry = new HashMap<>(); + variableRegistry.putAll(allDeclaredVariables); + + // Also include variables from current scopes (in case of nested contexts) for (Map scope : variableScopes) { variableRegistry.putAll(scope); } @@ -323,6 +372,13 @@ public InterpretedCode compile(Node node, EmitterContext ctx) { * @param ctx EmitterContext containing symbol table and eval context */ private void detectClosureVariables(Node ast, EmitterContext ctx) { + // If capturedVarIndices was already set by the constructor (for eval STRING + // with parentRegistry), don't overwrite it. The constructor has already set up + // the correct captured variable mappings from the parent scope. + if (capturedVarIndices != null) { + return; // Already set up by constructor with parentRegistry + } + // Step 1: Collect all variable references in AST Set referencedVars = collectReferencedVariables(ast); @@ -430,7 +486,10 @@ public void visit(BlockNode node) { enterScope(); // Visit each statement in the block - for (Node stmt : node.elements) { + int numStatements = node.elements.size(); + for (int i = 0; i < numStatements; i++) { + Node stmt = node.elements.get(i); + // Track line number for this statement (like codegen's setDebugInfoLineNumber) if (stmt != null) { int tokenIndex = stmt.getIndex(); @@ -442,7 +501,9 @@ public void visit(BlockNode node) { int savedContext = currentCallContext; // If this is not an assignment or other value-using construct, use VOID context - if (!(stmt instanceof BinaryOperatorNode && ((BinaryOperatorNode) stmt).operator.equals("="))) { + // EXCEPT for the last statement in a block, which should use the block's context + boolean isLastStatement = (i == numStatements - 1); + if (!isLastStatement && !(stmt instanceof BinaryOperatorNode && ((BinaryOperatorNode) stmt).operator.equals("="))) { currentCallContext = RuntimeContextType.VOID; } @@ -552,9 +613,30 @@ public void visit(IdentifierNode node) { } if (!found) { + // Not a lexical variable - could be a global or a bareword + // Check for strict subs violation (bareword without sigil) + if (!varName.startsWith("$") && !varName.startsWith("@") && !varName.startsWith("%")) { + // This is a bareword (no sigil) + if (emitterContext != null && emitterContext.symbolTable != null && + emitterContext.symbolTable.isStrictOptionEnabled(org.perlonjava.perlmodule.Strict.HINT_STRICT_SUBS)) { + throwCompilerException("Bareword \"" + varName + "\" not allowed while \"strict subs\" in use"); + } + // Not strict - treat bareword as string literal + int rd = allocateRegister(); + emit(Opcodes.LOAD_STRING); + emitReg(rd); + int strIdx = addToStringPool(varName); + emit(strIdx); + lastResultReg = rd; + return; + } + // Global variable + // Strip sigil and normalize name (e.g., "$x" → "main::x") + String bareVarName = varName.substring(1); // Remove sigil + String normalizedName = NameNormalizer.normalizeVariableName(bareVarName, getCurrentPackage()); int rd = allocateRegister(); - int nameIdx = addToStringPool(varName); + int nameIdx = addToStringPool(normalizedName); emit(Opcodes.LOAD_GLOBAL_SCALAR); emitReg(rd); @@ -895,36 +977,60 @@ private void handleHashSlice(BinaryOperatorNode node, OperatorNode leftOp) { private void handleCompoundAssignment(BinaryOperatorNode node) { String op = node.operator; - // Get the left operand register (the variable being assigned to) - // The left side must be a variable reference - if (!(node.left instanceof OperatorNode)) { - throwCompilerException("Compound assignment requires variable on left side"); - return; - } + // Compile the right operand first (the value to add/subtract/etc.) + int savedContext = currentCallContext; + currentCallContext = RuntimeContextType.SCALAR; + node.right.accept(this); + int valueReg = lastResultReg; - OperatorNode leftOp = (OperatorNode) node.left; - if (!leftOp.operator.equals("$")) { - throwCompilerException("Compound assignment currently only supports scalar variables"); - return; - } + // Get the left operand register (the variable or expression being assigned to) + int targetReg; + boolean isGlobal = false; - if (!(leftOp.operand instanceof IdentifierNode)) { - throwCompilerException("Compound assignment requires simple variable"); - return; - } + // Check if left side is a simple variable reference + if (node.left instanceof OperatorNode) { + OperatorNode leftOp = (OperatorNode) node.left; - String varName = "$" + ((IdentifierNode) leftOp.operand).name; + if (leftOp.operator.equals("$") && leftOp.operand instanceof IdentifierNode) { + // Simple scalar variable: $x += 5 + String varName = "$" + ((IdentifierNode) leftOp.operand).name; - // Get the variable's register - if (!hasVariable(varName)) { - throwCompilerException("Undefined variable: " + varName); - return; - } - int targetReg = getVariableRegister(varName); + if (hasVariable(varName)) { + // Lexical variable - use its register directly + targetReg = getVariableRegister(varName); + } else { + // Global variable - need to load it first + isGlobal = true; + targetReg = allocateRegister(); + String normalizedName = NameNormalizer.normalizeVariableName(varName, getCurrentPackage()); + int nameIdx = addToStringPool(normalizedName); + emit(Opcodes.LOAD_GLOBAL_SCALAR); + emitReg(targetReg); + emit(nameIdx); + } + } else { + // Other operator (not simple variable) - compile as expression in SCALAR context + node.left.accept(this); + targetReg = lastResultReg; + } + } else { + // Not an OperatorNode (could be BinaryOperatorNode like ($x &= $y)) + // Compile the left side as an expression in SCALAR context + node.left.accept(this); + targetReg = lastResultReg; - // Compile the right operand (the value to add/subtract/etc.) - node.right.accept(this); - int valueReg = lastResultReg; + // Convert to scalar if it's a list + if (!(lastResultReg == targetReg)) { + // Already handled + } else { + // May need to convert list to scalar + int scalarReg = allocateRegister(); + emit(Opcodes.LIST_TO_SCALAR); + emitReg(scalarReg); + emitReg(targetReg); + targetReg = scalarReg; + } + } // Emit the appropriate compound assignment opcode switch (op) { @@ -934,8 +1040,15 @@ private void handleCompoundAssignment(BinaryOperatorNode node) { case "/=" -> emit(Opcodes.DIVIDE_ASSIGN); case "%=" -> emit(Opcodes.MODULUS_ASSIGN); case ".=" -> emit(Opcodes.STRING_CONCAT_ASSIGN); + case "&=", "binary&=" -> emit(Opcodes.BITWISE_AND_ASSIGN); // Numeric bitwise AND + case "|=", "binary|=" -> emit(Opcodes.BITWISE_OR_ASSIGN); // Numeric bitwise OR + case "^=", "binary^=" -> emit(Opcodes.BITWISE_XOR_ASSIGN); // Numeric bitwise XOR + case "&.=" -> emit(Opcodes.STRING_BITWISE_AND_ASSIGN); // String bitwise AND + case "|.=" -> emit(Opcodes.STRING_BITWISE_OR_ASSIGN); // String bitwise OR + case "^.=" -> emit(Opcodes.STRING_BITWISE_XOR_ASSIGN); // String bitwise XOR default -> { throwCompilerException("Unknown compound assignment operator: " + op); + currentCallContext = savedContext; return; } } @@ -943,8 +1056,20 @@ private void handleCompoundAssignment(BinaryOperatorNode node) { emitReg(targetReg); emitReg(valueReg); + // If it's a global variable, store it back + if (isGlobal) { + OperatorNode leftOp = (OperatorNode) node.left; + String varName = "$" + ((IdentifierNode) leftOp.operand).name; + String normalizedName = NameNormalizer.normalizeVariableName(varName, getCurrentPackage()); + int nameIdx = addToStringPool(normalizedName); + emit(Opcodes.STORE_GLOBAL_SCALAR); + emit(nameIdx); + emitReg(targetReg); + } + // The result is stored in targetReg lastResultReg = targetReg; + currentCallContext = savedContext; } /** @@ -1210,6 +1335,7 @@ private void compileAssignmentOperator(BinaryOperatorNode node) { // Track this variable - map the name to the register we already allocated variableScopes.peek().put(varName, reg); + allDeclaredVariables.put(varName, reg); // Track for variableRegistry lastResultReg = reg; return; } @@ -1257,7 +1383,19 @@ private void compileAssignmentOperator(BinaryOperatorNode node) { // Track this variable - map the name to the register we already allocated variableScopes.peek().put(varName, arrayReg); - lastResultReg = arrayReg; + allDeclaredVariables.put(varName, arrayReg); // Track for variableRegistry + + // In scalar context, return the count of elements assigned + // In list/void context, return the array + if (currentCallContext == RuntimeContextType.SCALAR) { + int countReg = allocateRegister(); + emit(Opcodes.ARRAY_SIZE); + emitReg(countReg); + emitReg(listReg); + lastResultReg = countReg; + } else { + lastResultReg = arrayReg; + } return; } @@ -1278,7 +1416,17 @@ private void compileAssignmentOperator(BinaryOperatorNode node) { emitReg(arrayReg); emitReg(listReg); - lastResultReg = arrayReg; + // In scalar context, return the count of elements assigned + // In list/void context, return the array + if (currentCallContext == RuntimeContextType.SCALAR) { + int countReg = allocateRegister(); + emit(Opcodes.ARRAY_SIZE); + emitReg(countReg); + emitReg(listReg); + lastResultReg = countReg; + } else { + lastResultReg = arrayReg; + } return; } else if (sigilOp.operator.equals("%") && sigilOp.operand instanceof IdentifierNode) { // Handle my %hash = ... @@ -1307,6 +1455,7 @@ private void compileAssignmentOperator(BinaryOperatorNode node) { // Track this variable - map the name to the register we already allocated variableScopes.peek().put(varName, hashReg); + allDeclaredVariables.put(varName, hashReg); // Track for variableRegistry lastResultReg = hashReg; return; } @@ -1377,22 +1526,58 @@ private void compileAssignmentOperator(BinaryOperatorNode node) { if (sigilOp.operand instanceof IdentifierNode) { String varName = sigil + ((IdentifierNode) sigilOp.operand).name; - // Declare the variable - int varReg = addVariable(varName, "my"); - - // Initialize based on sigil - switch (sigil) { - case "$" -> { - emit(Opcodes.LOAD_UNDEF); - emitReg(varReg); + int varReg; + + // Check if this variable is captured by named subs (Parser marks with id) + if (sigilOp.id != 0) { + // This variable is captured - use RETRIEVE_BEGIN to get persistent storage + int beginId = sigilOp.id; + int nameIdx = addToStringPool(varName); + varReg = allocateRegister(); + + switch (sigil) { + case "$" -> { + emitWithToken(Opcodes.RETRIEVE_BEGIN_SCALAR, node.getIndex()); + emitReg(varReg); + emit(nameIdx); + emit(beginId); + } + case "@" -> { + emitWithToken(Opcodes.RETRIEVE_BEGIN_ARRAY, node.getIndex()); + emitReg(varReg); + emit(nameIdx); + emit(beginId); + } + case "%" -> { + emitWithToken(Opcodes.RETRIEVE_BEGIN_HASH, node.getIndex()); + emitReg(varReg); + emit(nameIdx); + emit(beginId); + } } - case "@" -> { - emit(Opcodes.NEW_ARRAY); - emitReg(varReg); - } - case "%" -> { - emit(Opcodes.NEW_HASH); - emitReg(varReg); + + // Track this variable + variableScopes.peek().put(varName, varReg); + allDeclaredVariables.put(varName, varReg); // Track for variableRegistry + } else { + // Regular lexical variable (not captured) + // Declare the variable + varReg = addVariable(varName, "my"); + + // Initialize based on sigil + switch (sigil) { + case "$" -> { + emit(Opcodes.LOAD_UNDEF); + emitReg(varReg); + } + case "@" -> { + emit(Opcodes.NEW_ARRAY); + emitReg(varReg); + } + case "%" -> { + emit(Opcodes.NEW_HASH); + emitReg(varReg); + } } } @@ -1410,9 +1595,17 @@ private void compileAssignmentOperator(BinaryOperatorNode node) { // Assign to variable if (sigil.equals("$")) { - emit(Opcodes.MOVE); - emitReg(varReg); - emitReg(elemReg); + if (sigilOp.id != 0) { + // Captured variable - use SET_SCALAR to preserve aliasing + emit(Opcodes.SET_SCALAR); + emitReg(varReg); + emitReg(elemReg); + } else { + // Regular variable - use MOVE + emit(Opcodes.MOVE); + emitReg(varReg); + emitReg(elemReg); + } } else if (sigil.equals("@")) { emit(Opcodes.ARRAY_SET_FROM_LIST); emitReg(varReg); @@ -1567,6 +1760,99 @@ private void compileAssignmentOperator(BinaryOperatorNode node) { lastResultReg = hashReg; return; } + } else if (localOperand instanceof ListNode) { + // Handle local($x) = value or local($x, $y) = (v1, v2) + ListNode listNode = (ListNode) localOperand; + + // Special case: single element list local($x) = value + if (listNode.elements.size() == 1) { + Node element = listNode.elements.get(0); + if (element instanceof OperatorNode) { + OperatorNode sigilOp = (OperatorNode) element; + if (sigilOp.operator.equals("$") && sigilOp.operand instanceof IdentifierNode) { + String varName = "$" + ((IdentifierNode) sigilOp.operand).name; + + // Check if it's a lexical variable (should not be localized) + if (hasVariable(varName)) { + throwCompilerException("Can't localize lexical variable " + varName); + return; + } + + // Compile RHS first + node.right.accept(this); + int valueReg = lastResultReg; + + // Get the global variable and localize it + String packageName = getCurrentPackage(); + String globalVarName = packageName + "::" + ((IdentifierNode) sigilOp.operand).name; + int nameIdx = addToStringPool(globalVarName); + + int localReg = allocateRegister(); + emitWithToken(Opcodes.LOCAL_SCALAR, node.getIndex()); + emitReg(localReg); + emit(nameIdx); + + // Assign value to the localized variable + emit(Opcodes.SET_SCALAR); + emitReg(localReg); + emitReg(valueReg); + + lastResultReg = localReg; + return; + } + } + } + + // Multi-element case: local($x, $y) = (v1, v2) + // Compile RHS first + node.right.accept(this); + int valueReg = lastResultReg; + + // For each element in the list, localize and assign + for (int i = 0; i < listNode.elements.size(); i++) { + Node element = listNode.elements.get(i); + + if (element instanceof OperatorNode) { + OperatorNode sigilOp = (OperatorNode) element; + if (sigilOp.operator.equals("$") && sigilOp.operand instanceof IdentifierNode) { + String varName = "$" + ((IdentifierNode) sigilOp.operand).name; + + // Check if it's a lexical variable (should not be localized) + if (hasVariable(varName)) { + throwCompilerException("Can't localize lexical variable " + varName); + return; + } + + // Get the global variable + String packageName = getCurrentPackage(); + String globalVarName = packageName + "::" + ((IdentifierNode) sigilOp.operand).name; + int nameIdx = addToStringPool(globalVarName); + + int localReg = allocateRegister(); + emitWithToken(Opcodes.LOCAL_SCALAR, node.getIndex()); + emitReg(localReg); + emit(nameIdx); + + // Extract element from RHS list + int elemReg = allocateRegister(); + emit(Opcodes.ARRAY_GET); + emitReg(elemReg); + emitReg(valueReg); + emitInt(i); + + // Assign to the localized variable + emit(Opcodes.SET_SCALAR); + emitReg(localReg); + emitReg(elemReg); + + if (i == 0) { + // Return the first localized variable + lastResultReg = localReg; + } + } + } + } + return; } } } @@ -1641,7 +1927,10 @@ private void compileAssignmentOperator(BinaryOperatorNode node) { lastResultReg = targetReg; } else { // Global variable - int nameIdx = addToStringPool(varName); + // Strip sigil and normalize name (e.g., "$x" → "main::x") + String bareVarName = varName.substring(1); // Remove sigil + String normalizedName = NameNormalizer.normalizeVariableName(bareVarName, getCurrentPackage()); + int nameIdx = addToStringPool(normalizedName); emit(Opcodes.STORE_GLOBAL_SCALAR); emit(nameIdx); emitReg(valueReg); @@ -1802,6 +2091,18 @@ private void compileAssignmentOperator(BinaryOperatorNode node) { emitReg(valueReg); lastResultReg = globReg; + } else if (leftOp.operator.equals("pos")) { + // pos($var) = value - lvalue assignment to regex position + // pos() returns a PosLvalueScalar that can be assigned to + node.left.accept(this); + int lvalueReg = lastResultReg; + + // Use SET_SCALAR to assign through the lvalue + emit(Opcodes.SET_SCALAR); + emitReg(lvalueReg); + emitReg(valueReg); + + lastResultReg = valueReg; } else { throwCompilerException("Assignment to unsupported operator: " + leftOp.operator); } @@ -1816,8 +2117,9 @@ private void compileAssignmentOperator(BinaryOperatorNode node) { emitReg(valueReg); lastResultReg = targetReg; } else { - // Global variable - int nameIdx = addToStringPool(varName); + // Global variable (varName has no sigil here) + String normalizedName = NameNormalizer.normalizeVariableName(varName, getCurrentPackage()); + int nameIdx = addToStringPool(normalizedName); emit(Opcodes.STORE_GLOBAL_SCALAR); emit(nameIdx); emitReg(valueReg); @@ -2220,7 +2522,10 @@ private void compileAssignmentOperator(BinaryOperatorNode node) { emitReg(elementReg); } } else { - int nameIdx = addToStringPool(varName); + // Normalize global variable name (remove sigil, add package) + String bareVarName = varName.substring(1); // Remove "$" + String normalizedName = NameNormalizer.normalizeVariableName(bareVarName, getCurrentPackage()); + int nameIdx = addToStringPool(normalizedName); emit(Opcodes.STORE_GLOBAL_SCALAR); emit(nameIdx); emitReg(elementReg); @@ -2586,7 +2891,7 @@ private int compileBinaryOperatorSwitch(String operator, int rs1, int rs2, int t emitReg(rd); emitReg(rs2); // List register emitReg(rs1); // Closure register - emitInt(addToStringPool(currentPackage)); // Package name for sort + emitInt(addToStringPool(getCurrentPackage())); // Package name for sort } case "split" -> { // Split operator: split pattern, string @@ -2648,6 +2953,48 @@ private int compileBinaryOperatorSwitch(String operator, int rs1, int rs2, int t emitReg(rs2); emit(currentCallContext); } + case "&", "binary&" -> { + // Numeric bitwise AND: rs1 & rs2 + emit(Opcodes.BITWISE_AND_BINARY); + emitReg(rd); + emitReg(rs1); + emitReg(rs2); + } + case "|", "binary|" -> { + // Numeric bitwise OR: rs1 | rs2 + emit(Opcodes.BITWISE_OR_BINARY); + emitReg(rd); + emitReg(rs1); + emitReg(rs2); + } + case "^", "binary^" -> { + // Numeric bitwise XOR: rs1 ^ rs2 + emit(Opcodes.BITWISE_XOR_BINARY); + emitReg(rd); + emitReg(rs1); + emitReg(rs2); + } + case "&." -> { + // String bitwise AND: rs1 &. rs2 + emit(Opcodes.STRING_BITWISE_AND); + emitReg(rd); + emitReg(rs1); + emitReg(rs2); + } + case "|." -> { + // String bitwise OR: rs1 |. rs2 + emit(Opcodes.STRING_BITWISE_OR); + emitReg(rd); + emitReg(rs1); + emitReg(rs2); + } + case "^." -> { + // String bitwise XOR: rs1 ^. rs2 + emit(Opcodes.STRING_BITWISE_XOR); + emitReg(rd); + emitReg(rs1); + emitReg(rs2); + } default -> throwCompilerException("Unsupported operator: " + operator, tokenIndex); } @@ -2688,10 +3035,13 @@ public void visit(BinaryOperatorNode node) { return; } - // Handle compound assignment operators (+=, -=, *=, /=, %=, .=) + // Handle compound assignment operators (+=, -=, *=, /=, %=, .=, &=, |=, ^=, &.=, |.=, ^.=, binary&=, binary|=, binary^=) if (node.operator.equals("+=") || node.operator.equals("-=") || node.operator.equals("*=") || node.operator.equals("/=") || - node.operator.equals("%=") || node.operator.equals(".=")) { + node.operator.equals("%=") || node.operator.equals(".=") || + node.operator.equals("&=") || node.operator.equals("|=") || node.operator.equals("^=") || + node.operator.equals("&.=") || node.operator.equals("|.=") || node.operator.equals("^.=") || + node.operator.startsWith("binary")) { // Handle binary&=, binary|=, binary^= handleCompoundAssignment(node); return; } @@ -3169,6 +3519,7 @@ private void compileVariableDeclaration(OperatorNode node, String op) { emit(sigilOp.id); // Track this as a captured variable - map to the register we allocated variableScopes.peek().put(varName, reg); + allDeclaredVariables.put(varName, reg); // Track for variableRegistry } case "@" -> { emitWithToken(Opcodes.RETRIEVE_BEGIN_ARRAY, node.getIndex()); @@ -3176,6 +3527,7 @@ private void compileVariableDeclaration(OperatorNode node, String op) { emit(nameIdx); emit(sigilOp.id); variableScopes.peek().put(varName, reg); + allDeclaredVariables.put(varName, reg); // Track for variableRegistry } case "%" -> { emitWithToken(Opcodes.RETRIEVE_BEGIN_HASH, node.getIndex()); @@ -3183,6 +3535,7 @@ private void compileVariableDeclaration(OperatorNode node, String op) { emit(nameIdx); emit(sigilOp.id); variableScopes.peek().put(varName, reg); + allDeclaredVariables.put(varName, reg); // Track for variableRegistry } default -> throwCompilerException("Unsupported variable type: " + sigil); } @@ -3240,6 +3593,7 @@ private void compileVariableDeclaration(OperatorNode node, String op) { emit(nameIdx); emit(sigilOp.id); variableScopes.peek().put(varName, reg); + allDeclaredVariables.put(varName, reg); // Track for variableRegistry } case "@" -> { emitWithToken(Opcodes.RETRIEVE_BEGIN_ARRAY, node.getIndex()); @@ -3247,6 +3601,7 @@ private void compileVariableDeclaration(OperatorNode node, String op) { emit(nameIdx); emit(sigilOp.id); variableScopes.peek().put(varName, reg); + allDeclaredVariables.put(varName, reg); // Track for variableRegistry } case "%" -> { emitWithToken(Opcodes.RETRIEVE_BEGIN_HASH, node.getIndex()); @@ -3254,6 +3609,7 @@ private void compileVariableDeclaration(OperatorNode node, String op) { emit(nameIdx); emit(sigilOp.id); variableScopes.peek().put(varName, reg); + allDeclaredVariables.put(varName, reg); // Track for variableRegistry } default -> throwCompilerException("Unsupported variable type in list declaration: " + sigil); } @@ -3462,7 +3818,19 @@ private void compileVariableReference(OperatorNode node, String op) { if (node.operand instanceof IdentifierNode) { String varName = "$" + ((IdentifierNode) node.operand).name; - if (hasVariable(varName)) { + // Check if this is a closure variable captured from outer scope via PersistentVariable + if (currentSubroutineBeginId != 0 && currentSubroutineClosureVars.contains(varName)) { + // This is a closure variable - use RETRIEVE_BEGIN_SCALAR + int rd = allocateRegister(); + int nameIdx = addToStringPool(varName); + + emitWithToken(Opcodes.RETRIEVE_BEGIN_SCALAR, node.getIndex()); + emitReg(rd); + emit(nameIdx); + emit(currentSubroutineBeginId); + + lastResultReg = rd; + } else if (hasVariable(varName)) { // Lexical variable - use existing register lastResultReg = getVariableRegister(varName); } else { @@ -3471,7 +3839,7 @@ private void compileVariableReference(OperatorNode node, String op) { String globalVarName = varName.substring(1); // Remove $ sigil first if (!globalVarName.contains("::")) { // Add package prefix - globalVarName = "main::" + globalVarName; + globalVarName = getCurrentPackage() + "::" + globalVarName; } int rd = allocateRegister(); @@ -3524,9 +3892,18 @@ private void compileVariableReference(OperatorNode node, String op) { return; } - // Check if it's a lexical array + // Check if this is a closure variable captured from outer scope via PersistentVariable int arrayReg; - if (hasVariable(varName)) { + if (currentSubroutineBeginId != 0 && currentSubroutineClosureVars.contains(varName)) { + // This is a closure variable - use RETRIEVE_BEGIN_ARRAY + arrayReg = allocateRegister(); + int nameIdx = addToStringPool(varName); + + emitWithToken(Opcodes.RETRIEVE_BEGIN_ARRAY, node.getIndex()); + emitReg(arrayReg); + emit(nameIdx); + emit(currentSubroutineBeginId); + } else if (hasVariable(varName)) { // Lexical array - use existing register arrayReg = getVariableRegister(varName); } else { @@ -3737,18 +4114,28 @@ public void visit(OperatorNode node) { throwCompilerException("scalar operator requires an operand"); } return; - } else if (op.equals("package")) { - // Package declaration: package Foo; + } else if (op.equals("package") || op.equals("class")) { + // Package/Class declaration: package Foo; or class Foo; // This updates the current package context for subsequent variable declarations if (node.operand instanceof IdentifierNode) { String packageName = ((IdentifierNode) node.operand).name; - // Update the current package for this compilation scope - currentPackage = packageName; + // Check if this is a class declaration (either "class" operator or isClass annotation) + Boolean isClassAnnotation = (Boolean) node.getAnnotation("isClass"); + boolean isClass = op.equals("class") || (isClassAnnotation != null && isClassAnnotation); + + // Update the current package/class in symbol table + // This tracks package name, isClass flag, and version + symbolTable.setCurrentPackage(packageName, isClass); + + // Register as Perl 5.38+ class for proper stringification if needed + if (isClass) { + org.perlonjava.runtime.ClassRegistry.registerClass(packageName); + } lastResultReg = -1; // No runtime value } else { - throwCompilerException("package operator requires an identifier"); + throwCompilerException(op + " operator requires an identifier"); } } else if (op.equals("say") || op.equals("print")) { // say/print $x @@ -3778,6 +4165,44 @@ public void visit(OperatorNode node) { } else { throwCompilerException("NOT operator requires operand"); } + } else if (op.equals("~") || op.equals("binary~")) { + // Bitwise NOT operator: ~$x or binary~$x + // Evaluate operand and emit BITWISE_NOT_BINARY opcode + if (node.operand != null) { + node.operand.accept(this); + int rs = lastResultReg; + + // Allocate result register + int rd = allocateRegister(); + + // Emit BITWISE_NOT_BINARY opcode + emit(Opcodes.BITWISE_NOT_BINARY); + emitReg(rd); + emitReg(rs); + + lastResultReg = rd; + } else { + throwCompilerException("Bitwise NOT operator requires operand"); + } + } else if (op.equals("~.")) { + // String bitwise NOT operator: ~.$x + // Evaluate operand and emit BITWISE_NOT_STRING opcode + if (node.operand != null) { + node.operand.accept(this); + int rs = lastResultReg; + + // Allocate result register + int rd = allocateRegister(); + + // Emit BITWISE_NOT_STRING opcode + emit(Opcodes.BITWISE_NOT_STRING); + emitReg(rd); + emitReg(rs); + + lastResultReg = rd; + } else { + throwCompilerException("String bitwise NOT operator requires operand"); + } } else if (op.equals("defined")) { // Defined operator: defined($x) // Check if value is defined (not undef) @@ -3902,11 +4327,16 @@ public void visit(OperatorNode node) { // Use optimized autoincrement/decrement opcodes if (isPostfix) { // Postfix: returns old value before modifying + // Need TWO registers: one for result (old value), one for variable + int resultReg = allocateRegister(); if (isIncrement) { emit(Opcodes.POST_AUTOINCREMENT); } else { emit(Opcodes.POST_AUTODECREMENT); } + emitReg(resultReg); // Destination for old value + emitReg(varReg); // Variable to modify in-place + lastResultReg = resultReg; } else { // Prefix: returns new value after modifying if (isIncrement) { @@ -3914,10 +4344,9 @@ public void visit(OperatorNode node) { } else { emit(Opcodes.PRE_AUTODECREMENT); } + emitReg(varReg); + lastResultReg = varReg; } - emitReg(varReg); - - lastResultReg = varReg; } else { throwCompilerException("Increment/decrement of non-lexical variable not yet supported"); } @@ -3932,29 +4361,32 @@ public void visit(OperatorNode node) { // Use optimized autoincrement/decrement opcodes if (isPostfix) { + // Postfix: returns old value before modifying + // Need TWO registers: one for result (old value), one for variable + int resultReg = allocateRegister(); if (isIncrement) { emit(Opcodes.POST_AUTOINCREMENT); } else { emit(Opcodes.POST_AUTODECREMENT); } + emitReg(resultReg); // Destination for old value + emitReg(varReg); // Variable to modify in-place + lastResultReg = resultReg; } else { if (isIncrement) { emit(Opcodes.PRE_AUTOINCREMENT); } else { emit(Opcodes.PRE_AUTODECREMENT); } + emitReg(varReg); + lastResultReg = varReg; } - emitReg(varReg); - - lastResultReg = varReg; } else { // Global variable increment/decrement - // Add package prefix if not present - String globalVarName = varName; - if (!globalVarName.contains("::")) { - globalVarName = "main::" + varName.substring(1); - } - int nameIdx = addToStringPool(globalVarName); + // Normalize global variable name (remove sigil, add package) + String bareVarName = varName.substring(1); // Remove "$" + String normalizedName = NameNormalizer.normalizeVariableName(bareVarName, getCurrentPackage()); + int nameIdx = addToStringPool(normalizedName); // Load global variable int globalReg = allocateRegister(); @@ -3964,28 +4396,37 @@ public void visit(OperatorNode node) { // Apply increment/decrement if (isPostfix) { + // Postfix: returns old value before modifying + // Need TWO registers: one for result (old value), one for variable + int resultReg = allocateRegister(); if (isIncrement) { emit(Opcodes.POST_AUTOINCREMENT); } else { emit(Opcodes.POST_AUTODECREMENT); } + emitReg(resultReg); // Destination for old value + emitReg(globalReg); // Variable to modify in-place + lastResultReg = resultReg; } else { if (isIncrement) { emit(Opcodes.PRE_AUTOINCREMENT); } else { emit(Opcodes.PRE_AUTODECREMENT); } + emitReg(globalReg); + lastResultReg = globalReg; } - emitReg(globalReg); - // Store back to global variable - emit(Opcodes.STORE_GLOBAL_SCALAR); - emit(nameIdx); - emitReg(globalReg); - - lastResultReg = globalReg; + // NOTE: Do NOT store back to global variable! + // The POST/PRE_AUTO* opcodes modify the global variable directly + // and return the appropriate value (old for postfix, new for prefix). + // Storing back would overwrite the modification with the return value. } + } else { + throwCompilerException("Invalid operand for increment/decrement operator"); } + } else { + throwCompilerException("Increment/decrement operator requires operand"); } } else if (op.equals("return")) { // return $expr; @@ -4062,6 +4503,181 @@ public void visit(OperatorNode node) { } lastResultReg = rd; + } else if (op.equals("study")) { + // study $var + // In modern Perl, study is a no-op that always returns true + // We evaluate the operand for side effects, then return 1 + + if (node.operand != null) { + // Evaluate operand for side effects (though typically there are none) + node.operand.accept(this); + } + + // Return 1 (true) + int rd = allocateRegister(); + emit(Opcodes.LOAD_INT); + emitReg(rd); + emitInt(1); + + lastResultReg = rd; + } else if (op.equals("require")) { + // require MODULE_NAME or require VERSION + // Evaluate operand in scalar context + int savedContext = currentCallContext; + currentCallContext = RuntimeContextType.SCALAR; + try { + node.operand.accept(this); + int operandReg = lastResultReg; + + // Call ModuleOperators.require() + int rd = allocateRegister(); + emit(Opcodes.REQUIRE); + emitReg(rd); + emitReg(operandReg); + + lastResultReg = rd; + } finally { + currentCallContext = savedContext; + } + } else if (op.equals("pos")) { + // pos($var) - get or set regex match position + // Returns an lvalue that can be assigned to + int savedContext = currentCallContext; + currentCallContext = RuntimeContextType.SCALAR; + try { + node.operand.accept(this); + int operandReg = lastResultReg; + + // Call RuntimeScalar.pos() + int rd = allocateRegister(); + emit(Opcodes.POS); + emitReg(rd); + emitReg(operandReg); + + lastResultReg = rd; + } finally { + currentCallContext = savedContext; + } + } else if (op.equals("index") || op.equals("rindex")) { + // index(str, substr, pos?) or rindex(str, substr, pos?) + if (node.operand instanceof ListNode) { + ListNode args = (ListNode) node.operand; + + int savedContext = currentCallContext; + currentCallContext = RuntimeContextType.SCALAR; + try { + // Evaluate first arg (string) + if (args.elements.isEmpty()) { + throwCompilerException("Not enough arguments for " + op); + } + args.elements.get(0).accept(this); + int strReg = lastResultReg; + + // Evaluate second arg (substring) + if (args.elements.size() < 2) { + throwCompilerException("Not enough arguments for " + op); + } + args.elements.get(1).accept(this); + int substrReg = lastResultReg; + + // Evaluate third arg (position) - optional, defaults to undef + int posReg; + if (args.elements.size() >= 3) { + args.elements.get(2).accept(this); + posReg = lastResultReg; + } else { + posReg = allocateRegister(); + emit(Opcodes.LOAD_UNDEF); + emitReg(posReg); + } + + // Call index or rindex + int rd = allocateRegister(); + emit(op.equals("index") ? Opcodes.INDEX : Opcodes.RINDEX); + emitReg(rd); + emitReg(strReg); + emitReg(substrReg); + emitReg(posReg); + + lastResultReg = rd; + } finally { + currentCallContext = savedContext; + } + } else { + throwCompilerException(op + " requires a list of arguments"); + } + } else if (op.equals("stat") || op.equals("lstat")) { + // stat FILE or lstat FILE + int savedContext = currentCallContext; + currentCallContext = RuntimeContextType.SCALAR; + try { + node.operand.accept(this); + int operandReg = lastResultReg; + + int rd = allocateRegister(); + emit(op.equals("stat") ? Opcodes.STAT : Opcodes.LSTAT); + emitReg(rd); + emitReg(operandReg); + emit(savedContext); // Pass calling context + + lastResultReg = rd; + } finally { + currentCallContext = savedContext; + } + } else if (op.startsWith("-") && op.length() == 2) { + // File test operators: -r, -w, -x, etc. + int savedContext = currentCallContext; + currentCallContext = RuntimeContextType.SCALAR; + try { + node.operand.accept(this); + int operandReg = lastResultReg; + + int rd = allocateRegister(); + + // Map operator to opcode + char testChar = op.charAt(1); + short opcode; + switch (testChar) { + case 'r': opcode = Opcodes.FILETEST_R; break; + case 'w': opcode = Opcodes.FILETEST_W; break; + case 'x': opcode = Opcodes.FILETEST_X; break; + case 'o': opcode = Opcodes.FILETEST_O; break; + case 'R': opcode = Opcodes.FILETEST_R_REAL; break; + case 'W': opcode = Opcodes.FILETEST_W_REAL; break; + case 'X': opcode = Opcodes.FILETEST_X_REAL; break; + case 'O': opcode = Opcodes.FILETEST_O_REAL; break; + case 'e': opcode = Opcodes.FILETEST_E; break; + case 'z': opcode = Opcodes.FILETEST_Z; break; + case 's': opcode = Opcodes.FILETEST_S; break; + case 'f': opcode = Opcodes.FILETEST_F; break; + case 'd': opcode = Opcodes.FILETEST_D; break; + case 'l': opcode = Opcodes.FILETEST_L; break; + case 'p': opcode = Opcodes.FILETEST_P; break; + case 'S': opcode = Opcodes.FILETEST_S_UPPER; break; + case 'b': opcode = Opcodes.FILETEST_B; break; + case 'c': opcode = Opcodes.FILETEST_C; break; + case 't': opcode = Opcodes.FILETEST_T; break; + case 'u': opcode = Opcodes.FILETEST_U; break; + case 'g': opcode = Opcodes.FILETEST_G; break; + case 'k': opcode = Opcodes.FILETEST_K; break; + case 'T': opcode = Opcodes.FILETEST_T_UPPER; break; + case 'B': opcode = Opcodes.FILETEST_B_UPPER; break; + case 'M': opcode = Opcodes.FILETEST_M; break; + case 'A': opcode = Opcodes.FILETEST_A; break; + case 'C': opcode = Opcodes.FILETEST_C_UPPER; break; + default: + throwCompilerException("Unsupported file test operator: " + op); + return; + } + + emit(opcode); + emitReg(rd); + emitReg(operandReg); + + lastResultReg = rd; + } finally { + currentCallContext = savedContext; + } } else if (op.equals("die")) { // die $message; if (node.operand != null) { @@ -4124,6 +4740,72 @@ public void visit(OperatorNode node) { emitReg(locationReg); } lastResultReg = -1; // No result after die + } else if (op.equals("warn")) { + // warn $message; + if (node.operand != null) { + // Evaluate warn message + node.operand.accept(this); + int msgReg = lastResultReg; + + // Precompute location message at compile time + String locationMsg; + // Use annotation from AST node which has the correct line number + Object lineObj = node.getAnnotation("line"); + Object fileObj = node.getAnnotation("file"); + if (lineObj != null && fileObj != null) { + String fileName = fileObj.toString(); + int lineNumber = Integer.parseInt(lineObj.toString()); + locationMsg = " at " + fileName + " line " + lineNumber; + } else if (errorUtil != null) { + // Fallback to errorUtil if annotations not available + String fileName = errorUtil.getFileName(); + int lineNumber = errorUtil.getLineNumberAccurate(node.getIndex()); + locationMsg = " at " + fileName + " line " + lineNumber; + } else { + // Final fallback if neither available + locationMsg = " at " + sourceName + " line " + sourceLine; + } + + int locationReg = allocateRegister(); + emit(Opcodes.LOAD_STRING); + emitReg(locationReg); + emit(addToStringPool(locationMsg)); + + // Emit WARN with both message and precomputed location + emitWithToken(Opcodes.WARN, node.getIndex()); + emitReg(msgReg); + emitReg(locationReg); + } else { + // warn; (no message - use $@) + int undefReg = allocateRegister(); + emit(Opcodes.LOAD_UNDEF); + emitReg(undefReg); + + // Precompute location message for bare warn + String locationMsg; + if (errorUtil != null) { + String fileName = errorUtil.getFileName(); + int lineNumber = errorUtil.getLineNumber(node.getIndex()); + locationMsg = " at " + fileName + " line " + lineNumber; + } else { + locationMsg = " at " + sourceName + " line " + sourceLine; + } + + int locationReg = allocateRegister(); + emit(Opcodes.LOAD_STRING); + emitReg(locationReg); + emitInt(addToStringPool(locationMsg)); + + emitWithToken(Opcodes.WARN, node.getIndex()); + emitReg(undefReg); + emitReg(locationReg); + } + // warn returns 1 (true) in Perl + int resultReg = allocateRegister(); + emit(Opcodes.LOAD_INT); + emitReg(resultReg); + emitInt(1); + lastResultReg = resultReg; } else if (op.equals("eval")) { // eval $string; if (node.operand != null) { @@ -5017,6 +5699,15 @@ public void visit(OperatorNode node) { } else { throwCompilerException("unary + operator requires an operand"); } + } else if (op.equals("wantarray")) { + // wantarray operator: returns undef in VOID, false in SCALAR, true in LIST + // Read register 2 (wantarray context) and convert to Perl convention + int rd = allocateRegister(); + emit(Opcodes.WANTARRAY); + emitReg(rd); + emitReg(2); // Register 2 contains the calling context + + lastResultReg = rd; } else { throwCompilerException("Unsupported operator: " + op); } @@ -5346,18 +6037,61 @@ private void visitNamedSubroutine(SubroutineNode node) { } } + // If there are closure variables, we need to store them in PersistentVariable globals + // so the named sub can retrieve them using RETRIEVE_BEGIN opcodes + int beginId = 0; + if (!closureVarIndices.isEmpty()) { + // Assign a unique BEGIN ID for this subroutine + beginId = org.perlonjava.codegen.EmitterMethodCreator.classCounter++; + + // Store each closure variable in PersistentVariable globals + for (int i = 0; i < closureVarNames.size(); i++) { + String varName = closureVarNames.get(i); + int varReg = closureVarIndices.get(i); + + // Get the variable type from the sigil + String sigil = varName.substring(0, 1); + String bareVarName = varName.substring(1); + String beginVarName = org.perlonjava.runtime.PersistentVariable.beginPackage(beginId) + "::" + bareVarName; + + // Store the variable value in PersistentVariable global + int nameIdx = addToStringPool(beginVarName); + switch (sigil) { + case "$" -> { + emit(Opcodes.STORE_GLOBAL_SCALAR); + emit(nameIdx); + emitReg(varReg); + } + case "@" -> { + emit(Opcodes.STORE_GLOBAL_ARRAY); + emit(nameIdx); + emitReg(varReg); + } + case "%" -> { + emit(Opcodes.STORE_GLOBAL_HASH); + emit(nameIdx); + emitReg(varReg); + } + } + } + } + // Step 3: Create a new BytecodeCompiler for the subroutine body - BytecodeCompiler subCompiler = new BytecodeCompiler(this.sourceName, node.getIndex(), this.errorUtil); + BytecodeCompiler subCompiler = new BytecodeCompiler( + this.sourceName, + node.getIndex(), + this.errorUtil + ); - // Step 4: Pre-populate sub-compiler's variable scope with captured variables - for (String varName : closureVarNames) { - subCompiler.addVariable(varName, "my"); - } + // Set the BEGIN ID in the sub-compiler so it knows to use RETRIEVE_BEGIN opcodes + subCompiler.currentSubroutineBeginId = beginId; + subCompiler.currentSubroutineClosureVars = new HashSet<>(closureVarNames); - // Step 5: Compile the subroutine body + // Step 4: Compile the subroutine body + // Sub-compiler will use RETRIEVE_BEGIN opcodes for closure variables InterpretedCode subCode = subCompiler.compile(node.block); - // Step 6: Emit bytecode to create closure with captured variables at RUNTIME + // Step 5: Emit bytecode to create closure or simple code ref int codeReg = allocateRegister(); if (closureVarIndices.isEmpty()) { @@ -5367,20 +6101,18 @@ private void visitNamedSubroutine(SubroutineNode node) { emitReg(codeReg); emit(constIdx); } else { - int templateIdx = addToConstantPool(subCode); - emit(Opcodes.CREATE_CLOSURE); + // Store the InterpretedCode directly (closures are handled via PersistentVariable) + RuntimeScalar codeScalar = new RuntimeScalar((RuntimeCode) subCode); + int constIdx = addToConstantPool(codeScalar); + emit(Opcodes.LOAD_CONST); emitReg(codeReg); - emit(templateIdx); - emit(closureVarIndices.size()); - for (int regIdx : closureVarIndices) { - emit(regIdx); - } + emit(constIdx); } - // Step 7: Store in global namespace + // Step 6: Store in global namespace String fullName = node.name; if (!fullName.contains("::")) { - fullName = "main::" + fullName; + fullName = getCurrentPackage() + "::" + fullName; // Use getCurrentPackage() for proper package tracking } int nameIdx = addToStringPool(fullName); @@ -5424,17 +6156,30 @@ private void visitAnonymousSubroutine(SubroutineNode node) { } // Step 3: Create a new BytecodeCompiler for the subroutine body - BytecodeCompiler subCompiler = new BytecodeCompiler(this.sourceName, node.getIndex(), this.errorUtil); - - // Step 4: Pre-populate sub-compiler's variable scope with captured variables - for (String varName : closureVarNames) { - subCompiler.addVariable(varName, "my"); + // Build a variable registry from current scope to pass to sub-compiler + // This allows nested closures to see grandparent scope variables + Map parentRegistry = new HashMap<>(); + parentRegistry.put("this", 0); + parentRegistry.put("@_", 1); + parentRegistry.put("wantarray", 2); + + // Add captured variables with adjusted indices (starting at 3) + for (int i = 0; i < closureVarNames.size(); i++) { + parentRegistry.put(closureVarNames.get(i), 3 + i); } - // Step 5: Compile the subroutine body + BytecodeCompiler subCompiler = new BytecodeCompiler( + this.sourceName, + node.getIndex(), + this.errorUtil, + parentRegistry // Pass parent variable registry for nested closure support + ); + + // Step 4: Compile the subroutine body + // Sub-compiler will use parentRegistry to resolve captured variables InterpretedCode subCode = subCompiler.compile(node.block); - // Step 6: Create closure or simple code ref + // Step 5: Create closure or simple code ref int codeReg = allocateRegister(); if (closureVarIndices.isEmpty()) { diff --git a/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java b/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java index 519480d88..7114dd6dd 100644 --- a/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java @@ -1100,10 +1100,10 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c } case Opcodes.ADD_ASSIGN: { - // Add and assign: rd = rd + rs + // Add and assign: rd += rs (modifies rd in place) int rd = bytecode[pc++]; int rs = bytecode[pc++]; - registers[rd] = MathOperators.add( + MathOperators.addAssign( (RuntimeScalar) registers[rd], (RuntimeScalar) registers[rs] ); @@ -1111,22 +1111,394 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c } case Opcodes.ADD_ASSIGN_INT: { - // Add immediate and assign: rd = rd + imm + // Add immediate and assign: rd += imm (modifies rd in place) int rd = bytecode[pc++]; int immediate = readInt(bytecode, pc); pc += 2; - registers[rd] = MathOperators.add((RuntimeScalar) registers[rd], immediate); + RuntimeScalar result = MathOperators.add((RuntimeScalar) registers[rd], immediate); + ((RuntimeScalar) registers[rd]).set(result); break; } case Opcodes.STRING_CONCAT_ASSIGN: { - // String concatenation and assign: rd .= rs + // String concatenation and assign: rd .= rs (modifies rd in place) int rd = bytecode[pc++]; int rs = bytecode[pc++]; - registers[rd] = StringOperators.stringConcat( + RuntimeScalar result = StringOperators.stringConcat( + (RuntimeScalar) registers[rd], + (RuntimeScalar) registers[rs] + ); + ((RuntimeScalar) registers[rd]).set(result); + break; + } + + case Opcodes.BITWISE_AND_ASSIGN: { + // Bitwise AND assignment: rd &= rs (modifies rd in place) + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + RuntimeScalar result = org.perlonjava.operators.BitwiseOperators.bitwiseAndBinary( + (RuntimeScalar) registers[rd], + (RuntimeScalar) registers[rs] + ); + ((RuntimeScalar) registers[rd]).set(result); + break; + } + + case Opcodes.BITWISE_OR_ASSIGN: { + // Bitwise OR assignment: rd |= rs (modifies rd in place) + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + RuntimeScalar result = org.perlonjava.operators.BitwiseOperators.bitwiseOrBinary( + (RuntimeScalar) registers[rd], + (RuntimeScalar) registers[rs] + ); + ((RuntimeScalar) registers[rd]).set(result); + break; + } + + case Opcodes.BITWISE_XOR_ASSIGN: { + // Bitwise XOR assignment: rd ^= rs (modifies rd in place) + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + RuntimeScalar result = org.perlonjava.operators.BitwiseOperators.bitwiseXorBinary( + (RuntimeScalar) registers[rd], + (RuntimeScalar) registers[rs] + ); + ((RuntimeScalar) registers[rd]).set(result); + break; + } + + case Opcodes.STRING_BITWISE_AND_ASSIGN: { + // String bitwise AND assignment: rd &.= rs (modifies rd in place) + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + RuntimeScalar result = org.perlonjava.operators.BitwiseOperators.bitwiseAndDot( + (RuntimeScalar) registers[rd], + (RuntimeScalar) registers[rs] + ); + ((RuntimeScalar) registers[rd]).set(result); + break; + } + + case Opcodes.STRING_BITWISE_OR_ASSIGN: { + // String bitwise OR assignment: rd |.= rs (modifies rd in place) + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + RuntimeScalar result = org.perlonjava.operators.BitwiseOperators.bitwiseOrDot( (RuntimeScalar) registers[rd], (RuntimeScalar) registers[rs] ); + ((RuntimeScalar) registers[rd]).set(result); + break; + } + + case Opcodes.STRING_BITWISE_XOR_ASSIGN: { + // String bitwise XOR assignment: rd ^.= rs (modifies rd in place) + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + RuntimeScalar result = org.perlonjava.operators.BitwiseOperators.bitwiseXorDot( + (RuntimeScalar) registers[rd], + (RuntimeScalar) registers[rs] + ); + ((RuntimeScalar) registers[rd]).set(result); + break; + } + + case Opcodes.BITWISE_AND_BINARY: { + // Numeric bitwise AND: rd = rs1 binary& rs2 + int rd = bytecode[pc++]; + int rs1 = bytecode[pc++]; + int rs2 = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.BitwiseOperators.bitwiseAndBinary( + (RuntimeScalar) registers[rs1], + (RuntimeScalar) registers[rs2] + ); + break; + } + + case Opcodes.BITWISE_OR_BINARY: { + // Numeric bitwise OR: rd = rs1 binary| rs2 + int rd = bytecode[pc++]; + int rs1 = bytecode[pc++]; + int rs2 = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.BitwiseOperators.bitwiseOrBinary( + (RuntimeScalar) registers[rs1], + (RuntimeScalar) registers[rs2] + ); + break; + } + + case Opcodes.BITWISE_XOR_BINARY: { + // Numeric bitwise XOR: rd = rs1 binary^ rs2 + int rd = bytecode[pc++]; + int rs1 = bytecode[pc++]; + int rs2 = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.BitwiseOperators.bitwiseXorBinary( + (RuntimeScalar) registers[rs1], + (RuntimeScalar) registers[rs2] + ); + break; + } + + case Opcodes.STRING_BITWISE_AND: { + // String bitwise AND: rd = rs1 &. rs2 + int rd = bytecode[pc++]; + int rs1 = bytecode[pc++]; + int rs2 = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.BitwiseOperators.bitwiseAndDot( + (RuntimeScalar) registers[rs1], + (RuntimeScalar) registers[rs2] + ); + break; + } + + case Opcodes.STRING_BITWISE_OR: { + // String bitwise OR: rd = rs1 |. rs2 + int rd = bytecode[pc++]; + int rs1 = bytecode[pc++]; + int rs2 = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.BitwiseOperators.bitwiseOrDot( + (RuntimeScalar) registers[rs1], + (RuntimeScalar) registers[rs2] + ); + break; + } + + case Opcodes.STRING_BITWISE_XOR: { + // String bitwise XOR: rd = rs1 ^. rs2 + int rd = bytecode[pc++]; + int rs1 = bytecode[pc++]; + int rs2 = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.BitwiseOperators.bitwiseXorDot( + (RuntimeScalar) registers[rs1], + (RuntimeScalar) registers[rs2] + ); + break; + } + + case Opcodes.BITWISE_NOT_BINARY: { + // Numeric bitwise NOT: rd = binary~ rs + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.BitwiseOperators.bitwiseNotBinary( + (RuntimeScalar) registers[rs] + ); + break; + } + + case Opcodes.BITWISE_NOT_STRING: { + // String bitwise NOT: rd = ~. rs + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.BitwiseOperators.bitwiseNotDot( + (RuntimeScalar) registers[rs] + ); + break; + } + + // File test and stat operations + case Opcodes.STAT: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + int ctx = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.Stat.stat((RuntimeScalar) registers[rs], ctx); + break; + } + + case Opcodes.LSTAT: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + int ctx = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.Stat.lstat((RuntimeScalar) registers[rs], ctx); + break; + } + + case Opcodes.FILETEST_R: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-r", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_W: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-w", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_X: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-x", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_O: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-o", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_R_REAL: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-R", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_W_REAL: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-W", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_X_REAL: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-X", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_O_REAL: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-O", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_E: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-e", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_Z: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-z", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_S: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-s", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_F: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-f", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_D: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-d", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_L: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-l", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_P: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-p", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_S_UPPER: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-S", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_B: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-b", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_C: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-c", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_T: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-t", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_U: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-u", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_G: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-g", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_K: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-k", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_T_UPPER: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-T", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_B_UPPER: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-B", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_M: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-M", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_A: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-A", (RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.FILETEST_C_UPPER: { + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.FileTestOperator.fileTest("-C", (RuntimeScalar) registers[rs]); break; } @@ -1168,14 +1540,14 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c } case Opcodes.MATCH_REGEX: { - // Match regex: rd = RuntimeRegex.matchRegex(string, regex, ctx) + // Match regex: rd = RuntimeRegex.matchRegex(quotedRegex, string, ctx) int rd = bytecode[pc++]; int stringReg = bytecode[pc++]; int regexReg = bytecode[pc++]; int ctx = bytecode[pc++]; registers[rd] = org.perlonjava.regex.RuntimeRegex.matchRegex( - (RuntimeScalar) registers[stringReg], - (RuntimeScalar) registers[regexReg], + (RuntimeScalar) registers[regexReg], // quotedRegex first + (RuntimeScalar) registers[stringReg], // string second ctx ); break; @@ -1189,6 +1561,59 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c break; } + case Opcodes.WANTARRAY: { + // Get wantarray context: rd = Operator.wantarray(wantarrayReg) + int rd = bytecode[pc++]; + int wantarrayReg = bytecode[pc++]; + int ctx = ((RuntimeScalar) registers[wantarrayReg]).getInt(); + registers[rd] = org.perlonjava.operators.Operator.wantarray(ctx); + break; + } + + case Opcodes.REQUIRE: { + // Require module or version: rd = ModuleOperators.require(rs) + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.ModuleOperators.require((RuntimeScalar) registers[rs]); + break; + } + + case Opcodes.POS: { + // Get regex position: rd = rs.pos() + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + registers[rd] = ((RuntimeScalar) registers[rs]).pos(); + break; + } + + case Opcodes.INDEX: { + // Find substring position: rd = StringOperators.index(str, substr, pos) + int rd = bytecode[pc++]; + int strReg = bytecode[pc++]; + int substrReg = bytecode[pc++]; + int posReg = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.StringOperators.index( + (RuntimeScalar) registers[strReg], + (RuntimeScalar) registers[substrReg], + (RuntimeScalar) registers[posReg] + ); + break; + } + + case Opcodes.RINDEX: { + // Find substring position from end: rd = StringOperators.rindex(str, substr, pos) + int rd = bytecode[pc++]; + int strReg = bytecode[pc++]; + int substrReg = bytecode[pc++]; + int posReg = bytecode[pc++]; + registers[rd] = org.perlonjava.operators.StringOperators.rindex( + (RuntimeScalar) registers[strReg], + (RuntimeScalar) registers[substrReg], + (RuntimeScalar) registers[posReg] + ); + break; + } + case Opcodes.PRE_AUTOINCREMENT: { // Pre-increment: ++rd int rd = bytecode[pc++]; @@ -1197,9 +1622,11 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c } case Opcodes.POST_AUTOINCREMENT: { - // Post-increment: rd++ - int rd = bytecode[pc++]; - ((RuntimeScalar) registers[rd]).postAutoIncrement(); + // Post-increment: rd = rs++ + // The postAutoIncrement() method increments the variable and returns the OLD value + int rd = bytecode[pc++]; // Destination register for old value + int rs = bytecode[pc++]; // Source variable register + registers[rd] = ((RuntimeScalar) registers[rs]).postAutoIncrement(); break; } @@ -1211,9 +1638,11 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c } case Opcodes.POST_AUTODECREMENT: { - // Post-decrement: rd-- - int rd = bytecode[pc++]; - ((RuntimeScalar) registers[rd]).postAutoDecrement(); + // Post-decrement: rd = rs-- + // The postAutoDecrement() method decrements the variable and returns the OLD value + int rd = bytecode[pc++]; // Destination register for old value + int rs = bytecode[pc++]; // Source variable register + registers[rd] = ((RuntimeScalar) registers[rs]).postAutoDecrement(); break; } @@ -1236,17 +1665,13 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c } case Opcodes.WARN: { - // Warn with message: warn(rs) - int warnRs = bytecode[pc++]; - RuntimeBase message = registers[warnRs]; - - // Get token index for this warn location if available - Integer tokenIndex = code.pcToTokenIndex != null - ? code.pcToTokenIndex.get(pc - 2) // PC before we read register - : null; + // Warn with message and precomputed location: warn(msgReg, locationReg) + int msgReg = bytecode[pc++]; + int locationReg = bytecode[pc++]; + RuntimeBase message = registers[msgReg]; + RuntimeScalar where = (RuntimeScalar) registers[locationReg]; - // Call WarnDie.warn() with proper parameters - RuntimeScalar where = new RuntimeScalar(" at " + code.sourceName + " line " + code.sourceLine); + // Call WarnDie.warn() with precomputed location WarnDie.warn(message, where, code.sourceName, code.sourceLine); break; } diff --git a/src/main/java/org/perlonjava/interpreter/EvalStringHandler.java b/src/main/java/org/perlonjava/interpreter/EvalStringHandler.java index 09a7c072c..dc455149a 100644 --- a/src/main/java/org/perlonjava/interpreter/EvalStringHandler.java +++ b/src/main/java/org/perlonjava/interpreter/EvalStringHandler.java @@ -127,7 +127,7 @@ public static RuntimeScalar evalString(String perlCode, errorUtil, adjustedRegistry // Pass adjusted registry for variable capture ); - InterpretedCode evalCode = compiler.compile(ast); + InterpretedCode evalCode = compiler.compile(ast, ctx); // Pass ctx for context propagation // Step 5: Attach captured variables to eval'd code if (capturedVars.length > 0) { @@ -197,7 +197,7 @@ public static RuntimeScalar evalString(String perlCode, sourceName + " (eval)", sourceLine ); - InterpretedCode evalCode = compiler.compile(ast); + InterpretedCode evalCode = compiler.compile(ast, ctx); // Pass ctx for context propagation // Attach captured variables evalCode = evalCode.withCapturedVars(capturedVars); diff --git a/src/main/java/org/perlonjava/interpreter/InterpretedCode.java b/src/main/java/org/perlonjava/interpreter/InterpretedCode.java index 37299ede4..a44329265 100644 --- a/src/main/java/org/perlonjava/interpreter/InterpretedCode.java +++ b/src/main/java/org/perlonjava/interpreter/InterpretedCode.java @@ -452,6 +452,229 @@ public String disassemble() { rs = bytecode[pc++]; sb.append("STRING_CONCAT_ASSIGN r").append(rd).append(" .= r").append(rs).append("\n"); break; + case Opcodes.BITWISE_AND_ASSIGN: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("BITWISE_AND_ASSIGN r").append(rd).append(" &= r").append(rs).append("\n"); + break; + case Opcodes.BITWISE_OR_ASSIGN: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("BITWISE_OR_ASSIGN r").append(rd).append(" |= r").append(rs).append("\n"); + break; + case Opcodes.BITWISE_XOR_ASSIGN: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("BITWISE_XOR_ASSIGN r").append(rd).append(" ^= r").append(rs).append("\n"); + break; + case Opcodes.STRING_BITWISE_AND_ASSIGN: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("STRING_BITWISE_AND_ASSIGN r").append(rd).append(" &.= r").append(rs).append("\n"); + break; + case Opcodes.STRING_BITWISE_OR_ASSIGN: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("STRING_BITWISE_OR_ASSIGN r").append(rd).append(" |.= r").append(rs).append("\n"); + break; + case Opcodes.STRING_BITWISE_XOR_ASSIGN: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("STRING_BITWISE_XOR_ASSIGN r").append(rd).append(" ^.= r").append(rs).append("\n"); + break; + case Opcodes.BITWISE_AND_BINARY: + rd = bytecode[pc++]; + int andRs1 = bytecode[pc++]; + int andRs2 = bytecode[pc++]; + sb.append("BITWISE_AND_BINARY r").append(rd).append(" = r").append(andRs1).append(" & r").append(andRs2).append("\n"); + break; + case Opcodes.BITWISE_OR_BINARY: + rd = bytecode[pc++]; + int orRs1 = bytecode[pc++]; + int orRs2 = bytecode[pc++]; + sb.append("BITWISE_OR_BINARY r").append(rd).append(" = r").append(orRs1).append(" | r").append(orRs2).append("\n"); + break; + case Opcodes.BITWISE_XOR_BINARY: + rd = bytecode[pc++]; + int xorRs1 = bytecode[pc++]; + int xorRs2 = bytecode[pc++]; + sb.append("BITWISE_XOR_BINARY r").append(rd).append(" = r").append(xorRs1).append(" ^ r").append(xorRs2).append("\n"); + break; + case Opcodes.STRING_BITWISE_AND: + rd = bytecode[pc++]; + int strAndRs1 = bytecode[pc++]; + int strAndRs2 = bytecode[pc++]; + sb.append("STRING_BITWISE_AND r").append(rd).append(" = r").append(strAndRs1).append(" &. r").append(strAndRs2).append("\n"); + break; + case Opcodes.STRING_BITWISE_OR: + rd = bytecode[pc++]; + int strOrRs1 = bytecode[pc++]; + int strOrRs2 = bytecode[pc++]; + sb.append("STRING_BITWISE_OR r").append(rd).append(" = r").append(strOrRs1).append(" |. r").append(strOrRs2).append("\n"); + break; + case Opcodes.STRING_BITWISE_XOR: + rd = bytecode[pc++]; + int strXorRs1 = bytecode[pc++]; + int strXorRs2 = bytecode[pc++]; + sb.append("STRING_BITWISE_XOR r").append(rd).append(" = r").append(strXorRs1).append(" ^. r").append(strXorRs2).append("\n"); + break; + case Opcodes.BITWISE_NOT_BINARY: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("BITWISE_NOT_BINARY r").append(rd).append(" = ~r").append(rs).append("\n"); + break; + case Opcodes.BITWISE_NOT_STRING: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("BITWISE_NOT_STRING r").append(rd).append(" = ~.r").append(rs).append("\n"); + break; + case Opcodes.STAT: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + int statCtx = bytecode[pc++]; + sb.append("STAT r").append(rd).append(" = stat(r").append(rs).append(", ctx=").append(statCtx).append(")\n"); + break; + case Opcodes.LSTAT: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + int lstatCtx = bytecode[pc++]; + sb.append("LSTAT r").append(rd).append(" = lstat(r").append(rs).append(", ctx=").append(lstatCtx).append(")\n"); + break; + case Opcodes.FILETEST_R: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_R r").append(rd).append(" = -r r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_W: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_W r").append(rd).append(" = -w r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_X: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_X r").append(rd).append(" = -x r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_O: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_O r").append(rd).append(" = -o r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_R_REAL: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_R_REAL r").append(rd).append(" = -R r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_W_REAL: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_W_REAL r").append(rd).append(" = -W r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_X_REAL: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_X_REAL r").append(rd).append(" = -X r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_O_REAL: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_O_REAL r").append(rd).append(" = -O r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_E: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_E r").append(rd).append(" = -e r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_Z: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_Z r").append(rd).append(" = -z r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_S: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_S r").append(rd).append(" = -s r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_F: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_F r").append(rd).append(" = -f r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_D: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_D r").append(rd).append(" = -d r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_L: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_L r").append(rd).append(" = -l r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_P: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_P r").append(rd).append(" = -p r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_S_UPPER: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_S_UPPER r").append(rd).append(" = -S r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_B: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_B r").append(rd).append(" = -b r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_C: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_C r").append(rd).append(" = -c r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_T: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_T r").append(rd).append(" = -t r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_U: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_U r").append(rd).append(" = -u r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_G: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_G r").append(rd).append(" = -g r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_K: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_K r").append(rd).append(" = -k r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_T_UPPER: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_T_UPPER r").append(rd).append(" = -T r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_B_UPPER: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_B_UPPER r").append(rd).append(" = -B r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_M: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_M r").append(rd).append(" = -M r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_A: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_A r").append(rd).append(" = -A r").append(rs).append("\n"); + break; + case Opcodes.FILETEST_C_UPPER: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("FILETEST_C_UPPER r").append(rd).append(" = -C r").append(rs).append("\n"); + break; case Opcodes.PUSH_LOCAL_VARIABLE: rs = bytecode[pc++]; sb.append("PUSH_LOCAL_VARIABLE r").append(rs).append("\n"); @@ -485,6 +708,37 @@ public String disassemble() { rs = bytecode[pc++]; sb.append("CHOMP r").append(rd).append(" = chomp(r").append(rs).append(")\n"); break; + case Opcodes.WANTARRAY: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("WANTARRAY r").append(rd).append(" = wantarray(r").append(rs).append(")\n"); + break; + case Opcodes.REQUIRE: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("REQUIRE r").append(rd).append(" = require(r").append(rs).append(")\n"); + break; + case Opcodes.POS: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("POS r").append(rd).append(" = pos(r").append(rs).append(")\n"); + break; + case Opcodes.INDEX: { + rd = bytecode[pc++]; + int idxStrReg = bytecode[pc++]; + int idxSubstrReg = bytecode[pc++]; + int idxPosReg = bytecode[pc++]; + sb.append("INDEX r").append(rd).append(" = index(r").append(idxStrReg).append(", r").append(idxSubstrReg).append(", r").append(idxPosReg).append(")\n"); + break; + } + case Opcodes.RINDEX: { + rd = bytecode[pc++]; + int ridxStrReg = bytecode[pc++]; + int ridxSubstrReg = bytecode[pc++]; + int ridxPosReg = bytecode[pc++]; + sb.append("RINDEX r").append(rd).append(" = rindex(r").append(ridxStrReg).append(", r").append(ridxSubstrReg).append(", r").append(ridxPosReg).append(")\n"); + break; + } case Opcodes.PRE_AUTOINCREMENT: rd = bytecode[pc++]; sb.append("PRE_AUTOINCREMENT ++r").append(rd).append("\n"); diff --git a/src/main/java/org/perlonjava/interpreter/Opcodes.java b/src/main/java/org/perlonjava/interpreter/Opcodes.java index 713bbaff9..dc085188b 100644 --- a/src/main/java/org/perlonjava/interpreter/Opcodes.java +++ b/src/main/java/org/perlonjava/interpreter/Opcodes.java @@ -662,8 +662,152 @@ public class Opcodes { * Format: CHOMP rd rs */ public static final short CHOMP = 168; - // ================================================================= - // OPCODES 169-32767: RESERVED FOR FUTURE OPERATIONS + /** Get wantarray context: rd = Operator.wantarray(wantarrayReg) + * Format: WANTARRAY rd wantarrayReg */ + public static final short WANTARRAY = 169; + + /** Require module or version: rd = ModuleOperators.require(rs) + * Format: REQUIRE rd rs */ + public static final short REQUIRE = 170; + + /** Get regex position: rd = rs.pos() (returns lvalue for assignment) + * Format: POS rd rs */ + public static final short POS = 171; + + /** Find substring position: rd = StringOperators.index(str, substr, pos) + * Format: INDEX rd str substr pos */ + public static final short INDEX = 172; + + /** Find substring position from end: rd = StringOperators.rindex(str, substr, pos) + * Format: RINDEX rd str substr pos */ + public static final short RINDEX = 173; + + /** Bitwise AND assignment: target &= value + * Format: BITWISE_AND_ASSIGN target value */ + public static final short BITWISE_AND_ASSIGN = 174; + + /** Bitwise OR assignment: target |= value + * Format: BITWISE_OR_ASSIGN target value */ + public static final short BITWISE_OR_ASSIGN = 175; + + /** Bitwise XOR assignment: target ^= value + * Format: BITWISE_XOR_ASSIGN target value */ + public static final short BITWISE_XOR_ASSIGN = 176; + + /** String bitwise AND assignment: target &.= value + * Format: STRING_BITWISE_AND_ASSIGN target value */ + public static final short STRING_BITWISE_AND_ASSIGN = 177; + + /** String bitwise OR assignment: target |.= value + * Format: STRING_BITWISE_OR_ASSIGN target value */ + public static final short STRING_BITWISE_OR_ASSIGN = 178; + + /** String bitwise XOR assignment: target ^.= value + * Format: STRING_BITWISE_XOR_ASSIGN target value */ + public static final short STRING_BITWISE_XOR_ASSIGN = 179; + + /** Numeric bitwise AND: rd = rs1 binary& rs2 + * Format: BITWISE_AND_BINARY rd rs1 rs2 */ + public static final short BITWISE_AND_BINARY = 180; + + /** Numeric bitwise OR: rd = rs1 binary| rs2 + * Format: BITWISE_OR_BINARY rd rs1 rs2 */ + public static final short BITWISE_OR_BINARY = 181; + + /** Numeric bitwise XOR: rd = rs1 binary^ rs2 + * Format: BITWISE_XOR_BINARY rd rs1 rs2 */ + public static final short BITWISE_XOR_BINARY = 182; + + /** String bitwise AND: rd = rs1 &. rs2 + * Format: STRING_BITWISE_AND rd rs1 rs2 */ + public static final short STRING_BITWISE_AND = 183; + + /** String bitwise OR: rd = rs1 |. rs2 + * Format: STRING_BITWISE_OR rd rs1 rs2 */ + public static final short STRING_BITWISE_OR = 184; + + /** String bitwise XOR: rd = rs1 ^. rs2 + * Format: STRING_BITWISE_XOR rd rs1 rs2 */ + public static final short STRING_BITWISE_XOR = 185; + + /** Numeric bitwise NOT: rd = binary~ rs + * Format: BITWISE_NOT_BINARY rd rs */ + public static final short BITWISE_NOT_BINARY = 186; + + /** String bitwise NOT: rd = ~. rs + * Format: BITWISE_NOT_STRING rd rs */ + public static final short BITWISE_NOT_STRING = 187; + + // ================================================================= + // FILE TEST AND STAT OPERATIONS (188-218) + // ================================================================= + + /** stat operator: rd = stat(rs) [context] + * Format: STAT rd rs ctx */ + public static final short STAT = 188; + + /** lstat operator: rd = lstat(rs) [context] + * Format: LSTAT rd rs ctx */ + public static final short LSTAT = 189; + + // File test operators (unary operators returning boolean or value) + /** -r FILE: readable */ + public static final short FILETEST_R = 190; + /** -w FILE: writable */ + public static final short FILETEST_W = 191; + /** -x FILE: executable */ + public static final short FILETEST_X = 192; + /** -o FILE: owned by effective uid */ + public static final short FILETEST_O = 193; + /** -R FILE: readable by real uid */ + public static final short FILETEST_R_REAL = 194; + /** -W FILE: writable by real uid */ + public static final short FILETEST_W_REAL = 195; + /** -X FILE: executable by real uid */ + public static final short FILETEST_X_REAL = 196; + /** -O FILE: owned by real uid */ + public static final short FILETEST_O_REAL = 197; + /** -e FILE: exists */ + public static final short FILETEST_E = 198; + /** -z FILE: zero size */ + public static final short FILETEST_Z = 199; + /** -s FILE: size in bytes */ + public static final short FILETEST_S = 200; + /** -f FILE: plain file */ + public static final short FILETEST_F = 201; + /** -d FILE: directory */ + public static final short FILETEST_D = 202; + /** -l FILE: symbolic link */ + public static final short FILETEST_L = 203; + /** -p FILE: named pipe */ + public static final short FILETEST_P = 204; + /** -S FILE: socket */ + public static final short FILETEST_S_UPPER = 205; + /** -b FILE: block special */ + public static final short FILETEST_B = 206; + /** -c FILE: character special */ + public static final short FILETEST_C = 207; + /** -t FILE: tty */ + public static final short FILETEST_T = 208; + /** -u FILE: setuid */ + public static final short FILETEST_U = 209; + /** -g FILE: setgid */ + public static final short FILETEST_G = 210; + /** -k FILE: sticky bit */ + public static final short FILETEST_K = 211; + /** -T FILE: text file */ + public static final short FILETEST_T_UPPER = 212; + /** -B FILE: binary file */ + public static final short FILETEST_B_UPPER = 213; + /** -M FILE: modification age (days) */ + public static final short FILETEST_M = 214; + /** -A FILE: access age (days) */ + public static final short FILETEST_A = 215; + /** -C FILE: inode change age (days) */ + public static final short FILETEST_C_UPPER = 216; + + // ================================================================= + // OPCODES 217-32767: RESERVED FOR FUTURE OPERATIONS // ================================================================= // See PHASE3_OPERATOR_PROMOTIONS.md for promotion strategy. // All SLOWOP_* constants have been removed - use direct opcodes 114-154 instead. diff --git a/src/main/java/org/perlonjava/runtime/RuntimeCode.java b/src/main/java/org/perlonjava/runtime/RuntimeCode.java index 7ede786ef..6b5d63b5d 100644 --- a/src/main/java/org/perlonjava/runtime/RuntimeCode.java +++ b/src/main/java/org/perlonjava/runtime/RuntimeCode.java @@ -669,6 +669,12 @@ public static RuntimeList evalStringWithInterpreter( ); evalRuntimeContext.set(runtimeCtx); + InterpretedCode interpretedCode = null; + RuntimeList result; + + // Save dynamic variable level to restore after eval + int dynamicVarLevel = DynamicVariableManager.getLocalLevel(); + try { String evalString = code.toString(); @@ -729,69 +735,164 @@ public static RuntimeList evalStringWithInterpreter( } } - // Parse the eval string - Lexer lexer = new Lexer(evalString); - List tokens = lexer.tokenize(); - - // Create parser context - ScopedSymbolTable parseSymbolTable = capturedSymbolTable.snapShot(); - EmitterContext evalCtx = new EmitterContext( - new JavaClassInfo(), - parseSymbolTable, - null, - null, - ctx.contextType, - true, - new ErrorMessageUtil(evalCompilerOptions.fileName, tokens), - evalCompilerOptions, - ctx.unitcheckBlocks); - - Parser parser = new Parser(evalCtx, tokens); - Node ast = parser.parse(); - - // Run UNITCHECK blocks - runUnitcheckBlocks(evalCtx.unitcheckBlocks); - - // Build adjusted registry for captured variables - // Map variable names to register indices (3+ for captured variables) - Map adjustedRegistry = new HashMap<>(); - adjustedRegistry.put("this", 0); - adjustedRegistry.put("@_", 1); - adjustedRegistry.put("wantarray", 2); - - // Add captured variables starting at register 3 - int captureIndex = 3; - Map capturedVariables = capturedSymbolTable.getAllVisibleVariables(); - for (Map.Entry entry : capturedVariables.entrySet()) { - int index = entry.getKey(); - if (index >= 3) { // Skip reserved registers - String varName = entry.getValue().name(); - adjustedRegistry.put(varName, captureIndex); - captureIndex++; + try { + // Parse the eval string + Lexer lexer = new Lexer(evalString); + List tokens = lexer.tokenize(); + + // Create parser context + ScopedSymbolTable parseSymbolTable = capturedSymbolTable.snapShot(); + EmitterContext evalCtx = new EmitterContext( + new JavaClassInfo(), + parseSymbolTable, + null, + null, + callContext, // Use the runtime calling context, not the saved one! + true, + new ErrorMessageUtil(evalCompilerOptions.fileName, tokens), + evalCompilerOptions, + ctx.unitcheckBlocks); + + Parser parser = new Parser(evalCtx, tokens); + Node ast = parser.parse(); + + // Run UNITCHECK blocks + runUnitcheckBlocks(evalCtx.unitcheckBlocks); + + // Build adjusted registry for captured variables + // Map variable names to register indices (3+ for captured variables) + Map adjustedRegistry = new HashMap<>(); + adjustedRegistry.put("this", 0); + adjustedRegistry.put("@_", 1); + adjustedRegistry.put("wantarray", 2); + + // Add captured variables starting at register 3 + int captureIndex = 3; + Map capturedVariables = capturedSymbolTable.getAllVisibleVariables(); + for (Map.Entry entry : capturedVariables.entrySet()) { + int index = entry.getKey(); + if (index >= 3) { // Skip reserved registers + String varName = entry.getValue().name(); + adjustedRegistry.put(varName, captureIndex); + captureIndex++; + } + } + + // Compile to InterpretedCode with variable registry + BytecodeCompiler compiler = new BytecodeCompiler( + evalCompilerOptions.fileName, + 1, + evalCtx.errorUtil, + adjustedRegistry); + interpretedCode = compiler.compile(ast, evalCtx); + + // Set captured variables + if (runtimeValues.length > 0) { + RuntimeBase[] capturedVars2 = new RuntimeBase[runtimeValues.length]; + for (int i = 0; i < runtimeValues.length; i++) { + capturedVars2[i] = (RuntimeBase) runtimeValues[i]; + } + interpretedCode = interpretedCode.withCapturedVars(capturedVars2); + } + + } catch (Throwable e) { + // Compilation error in eval-string + // Set the global error variable "$@" + RuntimeScalar err = GlobalVariable.getGlobalVariable("main::@"); + err.set(e.getMessage()); + + // Check if $SIG{__DIE__} handler is defined + RuntimeScalar sig = GlobalVariable.getGlobalHash("main::SIG").get("__DIE__"); + if (sig.getDefinedBoolean()) { + // Call the $SIG{__DIE__} handler (similar to what die() does) + RuntimeScalar sigHandler = new RuntimeScalar(sig); + + // Undefine $SIG{__DIE__} before calling to avoid infinite recursion + int level = DynamicVariableManager.getLocalLevel(); + DynamicVariableManager.pushLocalVariable(sig); + + try { + RuntimeArray handlerArgs = new RuntimeArray(); + RuntimeArray.push(handlerArgs, new RuntimeScalar(err)); + apply(sigHandler, handlerArgs, RuntimeContextType.SCALAR); + } catch (Throwable handlerException) { + // If the handler dies, use its payload as the new error + if (handlerException instanceof RuntimeException && handlerException.getCause() instanceof PerlDieException) { + PerlDieException pde = (PerlDieException) handlerException.getCause(); + RuntimeBase handlerPayload = pde.getPayload(); + if (handlerPayload != null) { + err.set(handlerPayload.getFirst()); + } + } else if (handlerException instanceof PerlDieException) { + PerlDieException pde = (PerlDieException) handlerException; + RuntimeBase handlerPayload = pde.getPayload(); + if (handlerPayload != null) { + err.set(handlerPayload.getFirst()); + } + } + // If handler throws other exceptions, ignore them (keep original error in $@) + } finally { + // Restore $SIG{__DIE__} + DynamicVariableManager.popToLocalLevel(level); + } } - } - // Compile to InterpretedCode with variable registry - BytecodeCompiler compiler = new BytecodeCompiler( - evalCompilerOptions.fileName, - 1, - evalCtx.errorUtil, - adjustedRegistry); - InterpretedCode interpretedCode = compiler.compile(ast); - - // Set captured variables - if (runtimeValues.length > 0) { - RuntimeBase[] capturedVars2 = new RuntimeBase[runtimeValues.length]; - for (int i = 0; i < runtimeValues.length; i++) { - capturedVars2[i] = (RuntimeBase) runtimeValues[i]; + // Return undef/empty list to signal compilation failure + if (callContext == RuntimeContextType.LIST) { + return new RuntimeList(); + } else { + return new RuntimeList(new RuntimeScalar()); } - interpretedCode = interpretedCode.withCapturedVars(capturedVars2); } - // Execute directly and return result - return interpretedCode.apply(args, callContext); + // Execute the interpreted code + try { + result = interpretedCode.apply(args, callContext); + + // Clear $@ on successful execution + RuntimeScalar err = GlobalVariable.getGlobalVariable("main::@"); + err.set(""); + + return result; + + } catch (PerlDieException e) { + // Runtime error - set $@ and return undef/empty list + RuntimeScalar err = GlobalVariable.getGlobalVariable("main::@"); + RuntimeBase payload = e.getPayload(); + if (payload != null) { + err.set(payload.getFirst()); + } else { + err.set("Died"); + } + + // Return undef/empty list + if (callContext == RuntimeContextType.LIST) { + return new RuntimeList(); + } else { + return new RuntimeList(new RuntimeScalar()); + } + + } catch (Throwable e) { + // Other runtime errors - set $@ and return undef/empty list + RuntimeScalar err = GlobalVariable.getGlobalVariable("main::@"); + String message = e.getMessage(); + if (message == null || message.isEmpty()) { + message = e.getClass().getSimpleName(); + } + err.set(message); + + // Return undef/empty list + if (callContext == RuntimeContextType.LIST) { + return new RuntimeList(); + } else { + return new RuntimeList(new RuntimeScalar()); + } + } } finally { + // Restore dynamic variables (local) to their state before eval + DynamicVariableManager.popToLocalLevel(dynamicVarLevel); + // Clean up ThreadLocal evalRuntimeContext.remove(); } @@ -1278,7 +1379,7 @@ public static RuntimeList apply(RuntimeScalar runtimeScalar, String subroutineNa // Method to apply (execute) a subroutine reference (legacy method for compatibility) public static RuntimeList apply(RuntimeScalar runtimeScalar, String subroutineName, RuntimeBase list, int callContext) { - + // WORKAROUND for eval-defined subs not filling lexical forward declarations: // If the RuntimeScalar is undef (forward declaration never filled), // silently return undef so tests can continue running.