From f70bac58a63a7b9181b342126b2208768797587d Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 29 Apr 2026 15:16:52 +0200 Subject: [PATCH] fix(values,GCString): hash values in scalar context, and split GCString shim Two fixes uncovered by `jcpan -t User::Identity`: 1. `values %h` returned the LAST value (instead of the element count) when used as the final expression of a sub called in scalar context. `RuntimeHash.keys()` already set `scalarContextSize` on the result so `getList()` wrapped (rather than copied) it; `values()` did not, so the resulting list scalarized to its last element. Mirror keys() by setting `scalarContextSize` in both the plain and TIED_HASH paths of `RuntimeHash.values()`. Fixes both JVM and interpreter backends. 2. The pure-Perl `Unicode::GCString` shim used by Text::vCard et al was defined as a second package inside `lib/Unicode/LineBreak.pm`. Two consequences: * `use Unicode::GCString` (without first loading LineBreak) could not find a .pm file. String::Print does exactly that. * MakeMaker's "skip files bundled in the PerlOnJava JAR" logic only matched `Unicode/LineBreak.pm`, so installing CPAN's Unicode-LineBreak distribution would shadow the shim with the broken XS-needing version. Move the shim into its own file `src/main/perl/lib/Unicode/GCString.pm` and have LineBreak.pm `require` it. Now both files are visible under jar:PERL5LIB and the SKIP logic preserves both. Together these unblock the full User::Identity dependency chain (Log::Report -> Log::Report::Util -> String::Print -> Unicode::GCString) so `jcpan -t User::Identity` now passes all 114 subtests. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../org/perlonjava/core/Configuration.java | 4 +- .../runtime/runtimetypes/RuntimeHash.java | 9 ++- src/main/perl/lib/Unicode/GCString.pm | 78 +++++++++++++++++++ src/main/perl/lib/Unicode/LineBreak.pm | 60 +++----------- 4 files changed, 97 insertions(+), 54 deletions(-) create mode 100644 src/main/perl/lib/Unicode/GCString.pm diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index e5f940598..11d36492f 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "ff1da2bbb"; + public static final String gitCommitId = "775a74956"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 29 2026 13:59:18"; + public static final String buildTimestamp = "Apr 29 2026 15:16:52"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeHash.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeHash.java index 4d80cbda7..d1fc24099 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeHash.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeHash.java @@ -923,7 +923,12 @@ public RuntimeArray values() { } isKey = !isKey; } - hashIterator = null; // keys resets the iterator + hashIterator = null; // values resets the iterator + // Set scalarContextSize so that values() in scalar context returns the count. + // Without this, when `values %h` is the last expression of a sub called in + // scalar context, getList() copies the elements into a RuntimeList whose + // scalar() yields the LAST element instead of the count. + list.scalarContextSize = list.elements.size(); return list; } @@ -936,6 +941,8 @@ public RuntimeArray values() { list.elements.add(value); // push an alias to the value (direct reference, not a copy) } hashIterator = null; // values resets the iterator + // Mirror keys(): mark this array so scalar context returns the count, not the last value. + list.scalarContextSize = list.elements.size(); return list; } diff --git a/src/main/perl/lib/Unicode/GCString.pm b/src/main/perl/lib/Unicode/GCString.pm new file mode 100644 index 000000000..95b4790be --- /dev/null +++ b/src/main/perl/lib/Unicode/GCString.pm @@ -0,0 +1,78 @@ +package Unicode::GCString; + +# Minimal pure-Perl shim of Unicode::GCString for PerlOnJava. +# +# The original module is part of the XS-based Unicode::LineBreak +# distribution and provides a grapheme-cluster string API. PerlOnJava +# ships only the tiny subset of GCString that downstream modules +# (String::Print, Text::vCard, ...) actually use. +# +# If a CPAN install of Unicode::LineBreak would otherwise overwrite +# this file with the XS-needing version, MakeMaker.pm in PerlOnJava +# detects the bundled copy in jar:PERL5LIB/Unicode/GCString.pm and +# skips it, preserving this shim. +# +# If you need the full functionality, please open an issue. + +use strict; +use warnings; + +our $VERSION = '2019.001'; + +sub new { + my ($class, $str) = @_; + $str = '' unless defined $str; + my @clusters = ($str =~ /(\X)/gs); + return bless { str => $str, clusters => \@clusters }, $class; +} + +sub length { return scalar @{ $_[0]->{clusters} }; } + +sub as_string { return $_[0]->{str}; } + +sub substr { + my ($self, $start, $len) = @_; + my @c = @{ $self->{clusters} }; + my $total = scalar @c; + $start = 0 if !defined $start; + if ($start < 0) { $start = $total + $start; } + $start = 0 if $start < 0; + $start = $total if $start > $total; + my $end; + if (!defined $len) { + $end = $total; + } elsif ($len < 0) { + $end = $total + $len; + } else { + $end = $start + $len; + } + $end = $start if $end < $start; + $end = $total if $end > $total; + my $piece = join '', @c[$start .. $end - 1]; + return Unicode::GCString->new($piece); +} + +# Approximate column width (1 per grapheme cluster). +sub columns { return scalar @{ $_[0]->{clusters} }; } + +use overload + '""' => \&as_string, + 'bool' => sub { CORE::length( $_[0]->{str} ) > 0 }, + '0+' => \&length, + fallback => 1; + +1; + +__END__ + +=head1 NAME + +Unicode::GCString - Minimal PerlOnJava shim + +=head1 DESCRIPTION + +Provides just enough of L for modules like +L and L that only need basic grapheme +cluster splitting. + +=cut diff --git a/src/main/perl/lib/Unicode/LineBreak.pm b/src/main/perl/lib/Unicode/LineBreak.pm index 46c781d17..ca1be2ead 100644 --- a/src/main/perl/lib/Unicode/LineBreak.pm +++ b/src/main/perl/lib/Unicode/LineBreak.pm @@ -39,57 +39,15 @@ sub break { return defined $str ? $str : ''; } -package Unicode::GCString; - -# Minimal grapheme-cluster string class. Uses \X to split the string -# into grapheme clusters. Only the methods used by Text::vCard et al -# are implemented: new, length, substr, as_string, columns. - -use strict; -use warnings; - -sub new { - my ($class, $str) = @_; - $str = '' unless defined $str; - my @clusters = ($str =~ /(\X)/gs); - return bless { str => $str, clusters => \@clusters }, $class; -} - -sub length { return scalar @{ $_[0]->{clusters} }; } - -sub as_string { return $_[0]->{str}; } - -# String overload would be nice, but keep it explicit. -sub substr { - my ($self, $start, $len) = @_; - my @c = @{ $self->{clusters} }; - my $total = scalar @c; - $start = 0 if !defined $start; - if ($start < 0) { $start = $total + $start; } - $start = 0 if $start < 0; - $start = $total if $start > $total; - my $end; - if (!defined $len) { - $end = $total; - } elsif ($len < 0) { - $end = $total + $len; - } else { - $end = $start + $len; - } - $end = $start if $end < $start; - $end = $total if $end > $total; - my $piece = join '', @c[$start .. $end - 1]; - return Unicode::GCString->new($piece); -} - -# Approximate column width (1 per grapheme cluster). -sub columns { return scalar @{ $_[0]->{clusters} }; } - -use overload - '""' => \&as_string, - 'bool' => sub { CORE::length( $_[0]->{str} ) > 0 }, - '0+' => \&length, - fallback => 1; +# The Unicode::GCString package now lives in its own file +# (lib/Unicode/GCString.pm) so that: +# * `use Unicode::GCString` works without first loading +# Unicode::LineBreak (e.g. String::Print does this); +# * the MakeMaker SKIP-bundled-file logic detects +# jar:PERL5LIB/Unicode/GCString.pm and refuses to overwrite the +# pure-Perl shim with the XS-needing version from CPAN's +# Unicode-LineBreak distribution. +require Unicode::GCString; 1;