diff --git a/src/main/java/org/perlonjava/codegen/Dereference.java b/src/main/java/org/perlonjava/codegen/Dereference.java index 98370fb27..3ad70927a 100644 --- a/src/main/java/org/perlonjava/codegen/Dereference.java +++ b/src/main/java/org/perlonjava/codegen/Dereference.java @@ -4,6 +4,7 @@ import org.objectweb.asm.Opcodes; import org.perlonjava.astnode.*; import org.perlonjava.astvisitor.EmitterVisitor; +import org.perlonjava.perlmodule.Strict; import org.perlonjava.runtime.PerlCompilerException; import org.perlonjava.runtime.RuntimeContextType; @@ -469,16 +470,32 @@ public static void handleArrowArrayDeref(EmitterVisitor emitterVisitor, BinaryOp Node elem = right.elements.getFirst(); elem.accept(emitterVisitor.with(RuntimeContextType.SCALAR)); - String methodName = switch (arrayOperation) { - case "get" -> "arrayDerefGet"; - case "delete" -> "arrayDerefDelete"; - case "exists" -> "arrayDerefExists"; - default -> - throw new PerlCompilerException(node.tokenIndex, "Not implemented: array operation: " + arrayOperation, emitterVisitor.ctx.errorUtil); - }; - - emitterVisitor.ctx.mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL, "org/perlonjava/runtime/RuntimeScalar", - methodName, "(Lorg/perlonjava/runtime/RuntimeScalar;)Lorg/perlonjava/runtime/RuntimeScalar;", false); + // Check if strict refs is enabled at compile time + if (emitterVisitor.ctx.symbolTable.isStrictOptionEnabled(Strict.HINT_STRICT_REFS)) { + // Use strict version (throws error on symbolic references) + String methodName = switch (arrayOperation) { + case "get" -> "arrayDerefGet"; + case "delete" -> "arrayDerefDelete"; + case "exists" -> "arrayDerefExists"; + default -> + throw new PerlCompilerException(node.tokenIndex, "Not implemented: array operation: " + arrayOperation, emitterVisitor.ctx.errorUtil); + }; + emitterVisitor.ctx.mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL, "org/perlonjava/runtime/RuntimeScalar", + methodName, "(Lorg/perlonjava/runtime/RuntimeScalar;)Lorg/perlonjava/runtime/RuntimeScalar;", false); + } else { + // Use non-strict version (allows symbolic references) + String methodName = switch (arrayOperation) { + case "get" -> "arrayDerefGetNonStrict"; + case "delete" -> "arrayDerefDeleteNonStrict"; + case "exists" -> "arrayDerefExistsNonStrict"; + default -> + throw new PerlCompilerException(node.tokenIndex, "Not implemented: array operation: " + arrayOperation, emitterVisitor.ctx.errorUtil); + }; + // Push the current package name for symbolic reference resolution + emitterVisitor.pushCurrentPackage(); + emitterVisitor.ctx.mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL, "org/perlonjava/runtime/RuntimeScalar", + methodName, "(Lorg/perlonjava/runtime/RuntimeScalar;Ljava/lang/String;)Lorg/perlonjava/runtime/RuntimeScalar;", false); + } } else { // Multiple indices: use slice method (only for get operation) if (!arrayOperation.equals("get")) { @@ -526,15 +543,32 @@ public static void handleArrowHashDeref(EmitterVisitor emitterVisitor, BinaryOpe emitterVisitor.ctx.logDebug("visit -> (HashLiteralNode) autoquote " + node.right); nodeRight.accept(emitterVisitor.with(RuntimeContextType.SCALAR)); - String methodName = switch (hashOperation) { - case "get" -> "hashDerefGet"; - case "delete" -> "hashDerefDelete"; - case "exists" -> "hashDerefExists"; - default -> - throw new PerlCompilerException(node.tokenIndex, "Not implemented: hash operation: " + hashOperation, emitterVisitor.ctx.errorUtil); - }; - - emitterVisitor.ctx.mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL, "org/perlonjava/runtime/RuntimeScalar", methodName, "(Lorg/perlonjava/runtime/RuntimeScalar;)Lorg/perlonjava/runtime/RuntimeScalar;", false); + // Check if strict refs is enabled at compile time + if (emitterVisitor.ctx.symbolTable.isStrictOptionEnabled(Strict.HINT_STRICT_REFS)) { + // Use strict version (throws error on symbolic references) + String methodName = switch (hashOperation) { + case "get" -> "hashDerefGet"; + case "delete" -> "hashDerefDelete"; + case "exists" -> "hashDerefExists"; + default -> + throw new PerlCompilerException(node.tokenIndex, "Not implemented: hash operation: " + hashOperation, emitterVisitor.ctx.errorUtil); + }; + emitterVisitor.ctx.mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL, "org/perlonjava/runtime/RuntimeScalar", + methodName, "(Lorg/perlonjava/runtime/RuntimeScalar;)Lorg/perlonjava/runtime/RuntimeScalar;", false); + } else { + // Use non-strict version (allows symbolic references) + String methodName = switch (hashOperation) { + case "get" -> "hashDerefGetNonStrict"; + case "delete" -> "hashDerefDeleteNonStrict"; + case "exists" -> "hashDerefExistsNonStrict"; + default -> + throw new PerlCompilerException(node.tokenIndex, "Not implemented: hash operation: " + hashOperation, emitterVisitor.ctx.errorUtil); + }; + // Push the current package name for symbolic reference resolution + emitterVisitor.pushCurrentPackage(); + emitterVisitor.ctx.mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL, "org/perlonjava/runtime/RuntimeScalar", + methodName, "(Lorg/perlonjava/runtime/RuntimeScalar;Ljava/lang/String;)Lorg/perlonjava/runtime/RuntimeScalar;", false); + } EmitOperator.handleVoidContext(emitterVisitor); } } diff --git a/src/main/java/org/perlonjava/codegen/EmitVariable.java b/src/main/java/org/perlonjava/codegen/EmitVariable.java index 493c8c775..35ca97600 100644 --- a/src/main/java/org/perlonjava/codegen/EmitVariable.java +++ b/src/main/java/org/perlonjava/codegen/EmitVariable.java @@ -315,6 +315,9 @@ static void handleVariableOperator(EmitterVisitor emitterVisitor, OperatorNode n emitterVisitor.pushCurrentPackage(); mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL, "org/perlonjava/runtime/RuntimeScalar", "hashDerefNonStrict", "(Ljava/lang/String;)Lorg/perlonjava/runtime/RuntimeHash;", false); } + if (emitterVisitor.ctx.contextType == RuntimeContextType.SCALAR) { + mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL, "org/perlonjava/runtime/RuntimeHash", "scalar", "()Lorg/perlonjava/runtime/RuntimeScalar;", false); + } return; case "$": // `$$a` diff --git a/src/main/java/org/perlonjava/operators/RuntimeTransliterate.java b/src/main/java/org/perlonjava/operators/RuntimeTransliterate.java index 857f135d6..7bebfd82a 100644 --- a/src/main/java/org/perlonjava/operators/RuntimeTransliterate.java +++ b/src/main/java/org/perlonjava/operators/RuntimeTransliterate.java @@ -56,9 +56,10 @@ public RuntimeScalar transliterate(RuntimeScalar originalString, int ctx) { for (int i = 0; i < input.length(); i++) { int codePoint = input.codePointAt(i); - // Handle surrogate pairs for Unicode - if (Character.isHighSurrogate(input.charAt(i)) && i + 1 < input.length()) { - i++; // Skip the low surrogate + // Handle surrogate pairs for Unicode - only skip if it's a valid supplementary code point + // codePointAt() already combines surrogate pairs, so we just need to skip the second char unit + if (Character.isSupplementaryCodePoint(codePoint)) { + i++; // Skip the low surrogate of a valid surrogate pair } boolean matched = false; @@ -395,23 +396,35 @@ private int parseCharAt(String input, int pos, List result) { if (pos + 2 < input.length() && input.charAt(pos + 2) == '{') { int closePos = input.indexOf('}', pos + 3); if (closePos > pos + 3) { - String content = input.substring(pos + 3, closePos); - - // Check if it's a Unicode code point \N{U+XXXX} - if (content.startsWith("U+")) { - try { - int codePoint = Integer.parseInt(content.substring(2), 16); - result.add(codePoint); - return closePos - pos + 1; - } catch (NumberFormatException e) { - // Invalid format - } + String content = input.substring(pos + 3, closePos).trim(); + + // Check for empty character name + if (content.isEmpty()) { + throw new RuntimeException("Unknown charname ''"); } - // For named characters, we'd need a lookup table - // For now, throw error for named sequences - throw new RuntimeException("\\" + "N{" + content + - "} must not be a named sequence in transliteration operator"); + // Try to resolve the Unicode character name + try { + int codePoint = org.perlonjava.regex.UnicodeResolver.getCodePointFromName(content); + result.add(codePoint); + return closePos - pos + 1; + } catch (IllegalArgumentException e) { + // Check if it's a named sequence (multi-character) + // Named sequences are not allowed in tr/// + String errorMsg = e.getMessage(); + if (errorMsg != null && errorMsg.contains("named sequence")) { + throw new RuntimeException("\\" + "N{" + content + + "} must not be a named sequence in transliteration operator"); + } + // For any other error (invalid or unknown name), also reject as named sequence + // because ICU4J returns -1 for both cases and we can't distinguish them easily + // Perl 5 gives a specific error for named sequences, but we'll be conservative + throw new RuntimeException("\\" + "N{" + content + + "} must not be a named sequence in transliteration operator"); + } + } else if (closePos == pos + 3) { + // Empty \N{} - this is the case where closePos is immediately after { + throw new RuntimeException("Unknown charname ''"); } } result.add((int) 'N'); diff --git a/src/main/java/org/perlonjava/regex/UnicodeResolver.java b/src/main/java/org/perlonjava/regex/UnicodeResolver.java index 53d9b2480..3357c9278 100644 --- a/src/main/java/org/perlonjava/regex/UnicodeResolver.java +++ b/src/main/java/org/perlonjava/regex/UnicodeResolver.java @@ -18,7 +18,7 @@ public class UnicodeResolver { * * @param name The name of the Unicode character. * @return The Unicode code point. - * @throws IllegalArgumentException If the name is invalid or not found. + * @throws IllegalArgumentException If the name is invalid, not found, or is a named sequence. */ public static int getCodePointFromName(String name) { int codePoint; @@ -48,12 +48,40 @@ public static int getCodePointFromName(String name) { }; if (codePoint == -1) { + // Check if this is a named sequence (multi-character sequence) + // Named sequences are not supported in some contexts like tr/// + if (isNamedSequence(name)) { + throw new IllegalArgumentException("named sequence: " + name); + } throw new IllegalArgumentException("Invalid Unicode character name: " + name); } } return codePoint; } + /** + * Checks if a given name refers to a Unicode named character sequence. + * Named sequences are multi-character sequences with Unicode-assigned names. + * + * @param name The name to check. + * @return true if it's a named sequence, false otherwise. + */ + private static boolean isNamedSequence(String name) { + // ICU4J's UCharacter.getCharFromName() returns -1 for both invalid names + // and named sequences. Unfortunately, there's no easy way to distinguish + // between them without maintaining our own list of named sequences. + // + // For now, we conservatively treat all failures as potential named sequences + // in the context of tr///, which is the safest approach. + // + // Common named sequences include things like: + // - "KATAKANA LETTER AINU P" (U+31F7 U+309A) + // - "LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW" (U+0045 U+0329) + // + // This is left as a placeholder for future enhancement if needed. + return false; + } + /** * Parses a user-defined property definition string and returns a character class pattern. * The format is hex ranges separated by tabs/newlines: diff --git a/src/main/java/org/perlonjava/runtime/RuntimeBaseProxy.java b/src/main/java/org/perlonjava/runtime/RuntimeBaseProxy.java index b77edae12..b6315d5d1 100644 --- a/src/main/java/org/perlonjava/runtime/RuntimeBaseProxy.java +++ b/src/main/java/org/perlonjava/runtime/RuntimeBaseProxy.java @@ -200,6 +200,55 @@ public RuntimeScalar hashDerefExists(RuntimeScalar index) { return ret; } + // Non-strict versions (allow symbolic references) + // Note: We don't call vivify() here because for symbolic references (STRING/BYTE_STRING), + // we're not modifying the scalar - we're just using its string value to look up a global variable. + // For RuntimeScalarReadOnly (immutable scalars like string literals), lvalue is null and we can't vivify. + // Instead, we call the method directly on 'this' which will handle symbolic references correctly. + public RuntimeScalar hashDerefGetNonStrict(RuntimeScalar index, String currentPackage) { + if (lvalue == null) { + // For read-only scalars, call the method on 'this' directly + // This will use the RuntimeScalar implementation which handles symbolic references + return super.hashDerefGetNonStrict(index, currentPackage); + } + return lvalue.hashDerefGetNonStrict(index, currentPackage); + } + + public RuntimeScalar hashDerefDeleteNonStrict(RuntimeScalar index, String currentPackage) { + if (lvalue == null) { + return super.hashDerefDeleteNonStrict(index, currentPackage); + } + return lvalue.hashDerefDeleteNonStrict(index, currentPackage); + } + + public RuntimeScalar hashDerefExistsNonStrict(RuntimeScalar index, String currentPackage) { + if (lvalue == null) { + return super.hashDerefExistsNonStrict(index, currentPackage); + } + return lvalue.hashDerefExistsNonStrict(index, currentPackage); + } + + public RuntimeScalar arrayDerefGetNonStrict(RuntimeScalar index, String currentPackage) { + if (lvalue == null) { + return super.arrayDerefGetNonStrict(index, currentPackage); + } + return lvalue.arrayDerefGetNonStrict(index, currentPackage); + } + + public RuntimeScalar arrayDerefDeleteNonStrict(RuntimeScalar index, String currentPackage) { + if (lvalue == null) { + return super.arrayDerefDeleteNonStrict(index, currentPackage); + } + return lvalue.arrayDerefDeleteNonStrict(index, currentPackage); + } + + public RuntimeScalar arrayDerefExistsNonStrict(RuntimeScalar index, String currentPackage) { + if (lvalue == null) { + return super.arrayDerefExistsNonStrict(index, currentPackage); + } + return lvalue.arrayDerefExistsNonStrict(index, currentPackage); + } + /** * Performs a pre-increment operation on the underlying scalar. * diff --git a/src/main/java/org/perlonjava/runtime/RuntimeScalar.java b/src/main/java/org/perlonjava/runtime/RuntimeScalar.java index 3894c354d..ec4115f83 100644 --- a/src/main/java/org/perlonjava/runtime/RuntimeScalar.java +++ b/src/main/java/org/perlonjava/runtime/RuntimeScalar.java @@ -730,31 +730,61 @@ public RuntimeScalar hashDerefDelete(RuntimeScalar index) { return this.hashDeref().delete(index); } + // Method to implement `delete $v->{key}`, when "no strict refs" is in effect + public RuntimeScalar hashDerefDeleteNonStrict(RuntimeScalar index, String packageName) { + return this.hashDerefNonStrict(packageName).delete(index); + } + // Method to implement `exists $v->{key}` public RuntimeScalar hashDerefExists(RuntimeScalar index) { return this.hashDeref().exists(index); } + // Method to implement `exists $v->{key}`, when "no strict refs" is in effect + public RuntimeScalar hashDerefExistsNonStrict(RuntimeScalar index, String packageName) { + return this.hashDerefNonStrict(packageName).exists(index); + } + // Method to implement `$v->[10]` public RuntimeScalar arrayDerefGet(RuntimeScalar index) { return this.arrayDeref().get(index); } + // Method to implement `$v->[10]`, when "no strict refs" is in effect + public RuntimeScalar arrayDerefGetNonStrict(RuntimeScalar index, String packageName) { + return this.arrayDerefNonStrict(packageName).get(index); + } + // Method to implement `$v->[10, 20]` (slice) public RuntimeList arrayDerefGetSlice(RuntimeList indices) { return this.arrayDeref().getSlice(indices); } + // Method to implement `$v->[10, 20]` (slice), when "no strict refs" is in effect + public RuntimeList arrayDerefGetSliceNonStrict(RuntimeList indices, String packageName) { + return this.arrayDerefNonStrict(packageName).getSlice(indices); + } + // Method to implement `delete $v->[10]` public RuntimeScalar arrayDerefDelete(RuntimeScalar index) { return this.arrayDeref().delete(index); } + // Method to implement `delete $v->[10]`, when "no strict refs" is in effect + public RuntimeScalar arrayDerefDeleteNonStrict(RuntimeScalar index, String packageName) { + return this.arrayDerefNonStrict(packageName).delete(index); + } + // Method to implement `exists $v->[10]` public RuntimeScalar arrayDerefExists(RuntimeScalar index) { return this.arrayDeref().exists(index); } + // Method to implement `exists $v->[10]`, when "no strict refs" is in effect + public RuntimeScalar arrayDerefExistsNonStrict(RuntimeScalar index, String packageName) { + return this.arrayDerefNonStrict(packageName).exists(index); + } + // Method to implement `@$v` public RuntimeArray arrayDeref() { // Check if object is eligible for overloading @@ -942,12 +972,18 @@ public RuntimeHash hashDerefNonStrict(String packageName) { yield AutovivificationHash.createAutovivifiedHash(this); } case HASHREFERENCE -> (RuntimeHash) value; - case TIED_SCALAR -> tiedFetch().hashDerefNonStrict(packageName); - default -> { + case GLOB -> { + // When dereferencing a typeglob as a hash, return the hash slot + RuntimeGlob glob = (RuntimeGlob) value; + yield GlobalVariable.getGlobalHash(glob.globName); + } + case STRING, BYTE_STRING -> { // Symbolic reference: treat the scalar's string value as a variable name String varName = NameNormalizer.normalizeVariableName(this.toString(), packageName); yield GlobalVariable.getGlobalHash(varName); } + case TIED_SCALAR -> tiedFetch().hashDerefNonStrict(packageName); + default -> throw new PerlCompilerException("Not a HASH reference"); }; } @@ -978,12 +1014,18 @@ public RuntimeArray arrayDerefNonStrict(String packageName) { yield AutovivificationArray.createAutovivifiedArray(this); } case ARRAYREFERENCE -> (RuntimeArray) value; - case TIED_SCALAR -> tiedFetch().arrayDerefNonStrict(packageName); - default -> { + case GLOB -> { + // When dereferencing a typeglob as an array, return the array slot + RuntimeGlob glob = (RuntimeGlob) value; + yield GlobalVariable.getGlobalArray(glob.globName); + } + case STRING, BYTE_STRING -> { // Symbolic reference: treat the scalar's string value as a variable name String varName = NameNormalizer.normalizeVariableName(this.toString(), packageName); yield GlobalVariable.getGlobalArray(varName); } + case TIED_SCALAR -> tiedFetch().arrayDerefNonStrict(packageName); + default -> throw new PerlCompilerException("Not an ARRAY reference"); }; }