diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitOperator.java b/src/main/java/org/perlonjava/backend/jvm/EmitOperator.java index 2ead729e4..ec263bbf1 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitOperator.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitOperator.java @@ -1767,6 +1767,10 @@ static void handleCreateReference(EmitterVisitor emitterVisitor, OperatorNode no } else if (node.operand instanceof OperatorNode op && op.operator.equals("$")) { // Scalar variable - use SCALAR context contextType = RuntimeContextType.SCALAR; + } else if (node.operand instanceof OperatorNode op && op.operator.equals("*")) { + // *{EXPR} — EXPR is evaluated in scalar context (e.g. Symbol::qualify_to_ref's + // \*{ qualify $_[0], ... }). LIST context breaks the comma/ternary inside braces. + contextType = RuntimeContextType.SCALAR; } node.operand.accept(emitterVisitor.with(contextType)); diff --git a/src/main/java/org/perlonjava/frontend/parser/ListParser.java b/src/main/java/org/perlonjava/frontend/parser/ListParser.java index 82b2180be..77f3286ba 100644 --- a/src/main/java/org/perlonjava/frontend/parser/ListParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/ListParser.java @@ -36,6 +36,15 @@ public class ListParser { * @throws PerlCompilerException If the syntax is incorrect or the minimum number of items is not met. */ static ListNode parseZeroOrOneList(Parser parser, int minItems) { + return parseZeroOrOneList(parser, minItems, null); + } + + /** + * @param tooManyArgsForBuiltin if non-null and more than one parenthesized argument is parsed, + * emit {@code Too many arguments for } (Perl builtin wording) + * instead of a generic syntax error. + */ + static ListNode parseZeroOrOneList(Parser parser, int minItems, String tooManyArgsForBuiltin) { if (looksLikeEmptyList(parser)) { // Return an empty list if it looks like an empty list if (minItems > 0) { @@ -52,7 +61,11 @@ static ListNode parseZeroOrOneList(Parser parser, int minItems) { TokenUtils.consume(parser); expr = new ListNode(parseList(parser, ")", 0), parser.tokenIndex); if (expr.elements.size() > 1) { - parser.throwError("syntax error"); + if (tooManyArgsForBuiltin != null) { + parser.throwError("Too many arguments for " + tooManyArgsForBuiltin); + } else { + parser.throwError("syntax error"); + } } } else if (token.type == LexerTokenType.EOF || isListTerminator(parser, token) || token.text.equals(",") || (token.text.equals("isa") && token.type == LexerTokenType.IDENTIFIER @@ -60,6 +73,10 @@ static ListNode parseZeroOrOneList(Parser parser, int minItems) { // No argument // 'isa' when enabled as a feature is an infix operator, not a bareword argument expr = new ListNode(parser.tokenIndex); + } else if (token.text.equals("?")) { + // `defined ? expr : expr` (zero-arg defined uses $_), `rand ? expr : expr`, etc. + // Do not parse the ternary `?` as the unary operator's optional operand. + expr = new ListNode(parser.tokenIndex); } else { // Argument without parentheses expr = ListNode.makeList(parser.parseExpression(parser.getPrecedence("isa") + 1)); @@ -328,7 +345,9 @@ public static boolean looksLikeEmptyList(Parser parser) { List savedHeredocNodes = ParseHeredoc.saveHeredocState(parser); LexerToken token = TokenUtils.consume(parser); - LexerToken token1 = parser.tokens.get(parser.tokenIndex); // Next token including spaces + LexerToken token1 = parser.tokenIndex < parser.tokens.size() + ? parser.tokens.get(parser.tokenIndex) + : new LexerToken(LexerTokenType.EOF, ""); LexerToken nextToken = TokenUtils.peek(parser); // After spaces // Check if this is a list terminator, but we need to restore position for the check @@ -385,7 +404,12 @@ public static boolean looksLikeEmptyList(Parser parser) { if (CompilerOptions.DEBUG_ENABLED) parser.ctx.logDebug("parseZeroOrMoreList looks like regex"); } else { // Subroutine call with zero arguments, followed by infix operator: `pos = 3` - if (CompilerOptions.DEBUG_ENABLED) parser.ctx.logDebug("parseZeroOrMoreList return zero at `" + parser.tokens.get(parser.tokenIndex) + "`"); + if (CompilerOptions.DEBUG_ENABLED) { + String dbgTok = parser.tokenIndex < parser.tokens.size() + ? String.valueOf(parser.tokens.get(parser.tokenIndex)) + : "EOF"; + parser.ctx.logDebug("parseZeroOrMoreList return zero at `" + dbgTok + "`"); + } // if (LVALUE_INFIX_OP.contains(token.text)) { // throw new PerlCompilerException(tokenIndex, "Can't modify non-lvalue subroutine call", ctx.errorUtil); // } diff --git a/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java b/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java index b18f9e3df..cdd09961f 100644 --- a/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java @@ -639,7 +639,7 @@ static OperatorNode parseOperatorWithOneOptionalArgument(Parser parser, LexerTok Node operand; // Handle operators with one optional argument String text = token.text; - operand = ListParser.parseZeroOrOneList(parser, 0); + operand = ListParser.parseZeroOrOneList(parser, 0, text); if (((ListNode) operand).elements.isEmpty()) { switch (text) { case "sleep": @@ -869,7 +869,7 @@ static OperatorNode parseDefined(Parser parser, LexerToken token, int currentInd // Handle 'defined' operator with special parsing context boolean parsingTakeReference = parser.parsingTakeReference; parser.parsingTakeReference = true; // don't call `&subr` while parsing "Take reference" - operand = ListParser.parseZeroOrOneList(parser, 0); + operand = ListParser.parseZeroOrOneList(parser, 0, "defined"); parser.parsingTakeReference = parsingTakeReference; if (operand.elements.isEmpty()) { // `defined` without arguments means `defined $_` @@ -890,7 +890,7 @@ static OperatorNode parseUndef(Parser parser, LexerToken token, int currentIndex // Similar to 'defined', we need to prevent &subr from being auto-called boolean parsingTakeReference = parser.parsingTakeReference; parser.parsingTakeReference = true; // don't call `&subr` while parsing "Take reference" - operand = ListParser.parseZeroOrOneList(parser, 0); + operand = ListParser.parseZeroOrOneList(parser, 0, "undef"); parser.parsingTakeReference = parsingTakeReference; if (operand.elements.isEmpty()) { // `undef` without arguments returns undef @@ -1291,7 +1291,7 @@ static BinaryOperatorNode parseSeek(Parser parser, LexerToken token, int current static OperatorNode parseReadpipe(Parser parser) { Node operand; // Handle 'readpipe' operator with one optional argument - operand = ListParser.parseZeroOrOneList(parser, 0); + operand = ListParser.parseZeroOrOneList(parser, 0, "readpipe"); if (((ListNode) operand).elements.isEmpty()) { // Create `$_` variable if no argument is provided operand = ParserNodeUtils.scalarUnderscore(parser); @@ -1300,9 +1300,10 @@ static OperatorNode parseReadpipe(Parser parser) { } static OperatorNode parsePack(Parser parser, LexerToken token, int currentIndex) { - Node operand; - // Handle 'pack' operator with one or more arguments - operand = ListParser.parseZeroOrMoreList(parser, 1, false, true, false, false); + ListNode operand = ListParser.parseZeroOrMoreList(parser, 0, false, true, false, false); + if (operand.elements.isEmpty()) { + parser.throwError("Not enough arguments for pack"); + } return new OperatorNode(token.text, operand, currentIndex); } diff --git a/src/main/java/org/perlonjava/frontend/parser/ParseHeredoc.java b/src/main/java/org/perlonjava/frontend/parser/ParseHeredoc.java index 41950ac80..321d8954a 100644 --- a/src/main/java/org/perlonjava/frontend/parser/ParseHeredoc.java +++ b/src/main/java/org/perlonjava/frontend/parser/ParseHeredoc.java @@ -47,7 +47,23 @@ static OperatorNode parseHeredoc(Parser parser, String tokenText) { } } else if (tokenText.length() == 1 && "'`\"".contains(tokenText)) { delimiter = tokenText; - } else if (token.type == LexerTokenType.IDENTIFIER) { + // Lexer tokenizes <<`LABEL` as: ` LABEL ` (opening/closing backticks are separate tokens). + // parseRawString("q") does not handle a NUMBER in the middle, so grab LABEL here and skip q(). + if ("`".equals(delimiter) + && token.type == LexerTokenType.OPERATOR + && parser.tokenIndex + 2 < parser.tokens.size()) { + LexerToken mid = parser.tokens.get(parser.tokenIndex + 1); + LexerToken end = parser.tokens.get(parser.tokenIndex + 2); + if ((mid.type == LexerTokenType.IDENTIFIER || mid.type == LexerTokenType.NUMBER) + && end.type == LexerTokenType.OPERATOR + && "`".equals(end.text)) { + TokenUtils.consume(parser); + identifier = mid.text; + TokenUtils.consume(parser); + TokenUtils.consume(parser); + } + } + } else if (token.type == LexerTokenType.IDENTIFIER || token.type == LexerTokenType.NUMBER) { delimiter = "\""; identifier = tokenText; TokenUtils.consume(parser); @@ -234,10 +250,10 @@ else if (currentIndex >= tokens.size() || operand = new StringNode(string, newlineIndex); break; case "\"": - operand = interpolateString(parser, string, newlineIndex); + operand = interpolateString(parser, string, newlineIndex, true); break; case "`": - Node interpolated = interpolateString(parser, string, newlineIndex); + Node interpolated = interpolateString(parser, string, newlineIndex, false); List elements = new ArrayList<>(); elements.add(interpolated); ListNode list = new ListNode(elements, newlineIndex); @@ -263,7 +279,7 @@ else if (currentIndex >= tokens.size() || parser.tokenIndex = newlineIndex; } - private static Node interpolateString(Parser parser, String string, int newlineIndex) { + private static Node interpolateString(Parser parser, String string, int newlineIndex, boolean preprocessBracedBackslashQuotes) { ArrayList buffers = new ArrayList<>(); buffers.add(string); StringParser.ParsedString rawStr = new StringParser.ParsedString(newlineIndex, newlineIndex, buffers, ' ', ' ', ' ', ' '); @@ -273,7 +289,8 @@ private static Node interpolateString(Parser parser, String string, int newlineI List heredocContext = new ArrayList<>(); // Parse the string with the new context, preserving the original parser context - Node result = StringDoubleQuoted.parseDoubleQuotedString(parser.ctx, rawStr, true, true, false, heredocContext, parser); + Node result = StringDoubleQuoted.parseDoubleQuotedString(parser.ctx, rawStr, true, true, false, heredocContext, parser, + preprocessBracedBackslashQuotes); // After parsing, any heredocs declared in this context need to be added to the parent parser.getHeredocNodes().addAll(heredocContext); diff --git a/src/main/java/org/perlonjava/frontend/parser/Parser.java b/src/main/java/org/perlonjava/frontend/parser/Parser.java index 85952b419..d1c0dfcfc 100644 --- a/src/main/java/org/perlonjava/frontend/parser/Parser.java +++ b/src/main/java/org/perlonjava/frontend/parser/Parser.java @@ -65,6 +65,14 @@ public class Parser { // re-tokenized string content and __LINE__ should use this as the base line, // counting newlines from the inner token list to offset from it. public int baseLineNumber = 0; + /** + * When {@code true} (qq and normal strings), {@link Variable#parseBracedVariable} may rewrite + * {@code \"} before {@code "} inside {@code ${...}} so patterns like {@code "${\"name\"}"} + * interpolate {@code $name}. When {@code false} (qx / command heredocs), keep the backslash so + * {@code ${\"hello"}} parses as a reference to the string {@code hello} and dereferences it, + * matching Perl 5. + */ + public boolean preprocessBracedBackslashQuotesInInterpolation = true; /** * Constructs a Parser with the given context and tokens. diff --git a/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java b/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java index 4fa3a4302..b838bcd8b 100644 --- a/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java +++ b/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java @@ -1,11 +1,14 @@ package org.perlonjava.frontend.parser; import org.perlonjava.frontend.astnode.*; +import org.perlonjava.frontend.lexer.Lexer; import org.perlonjava.frontend.lexer.LexerToken; import org.perlonjava.frontend.lexer.LexerTokenType; import org.perlonjava.runtime.runtimetypes.GlobalVariable; import org.perlonjava.runtime.runtimetypes.PerlCompilerException; +import java.util.List; + import static org.perlonjava.frontend.parser.ListParser.consumeCommas; import static org.perlonjava.frontend.parser.ListParser.isComma; import static org.perlonjava.frontend.parser.ParserNodeUtils.scalarUnderscore; @@ -245,21 +248,27 @@ static ListNode consumeArgsWithPrototype(Parser parser, String prototype, boolea // Check for too many arguments without parentheses only if prototype expects 2+ args if (!hasParentheses && countPrototypeArgs(prototype) >= 2) { - // If we see a comma after parsing all required args, check if it's a trailing comma - if (isComma(TokenUtils.peek(parser))) { - // Consume the comma and check what follows - int saveIndex = parser.tokenIndex; - consumeCommas(parser); - LexerToken nextToken = TokenUtils.peek(parser); - // If followed by a statement terminator, it's a trailing comma (allowed) - // Otherwise, it's too many arguments - if (!Parser.isExpressionTerminator(nextToken) && - nextToken.type != LexerTokenType.EOF && - !nextToken.text.equals(")")) { + // Do not use TokenUtils.peek here: it runs Whitespace.skipWhitespace(), which + // processes NEWLINE and may fill in a pending << heredoc before arguments are done. + List tokens = parser.tokens; + int i = skipHorizontalWhitespaceTokens(tokens, parser.tokenIndex); + if (i < tokens.size() && isComma(tokens.get(i))) { + int j = skipHorizontalWhitespaceTokens(tokens, i + 1); + LexerToken nextToken = tokenAtOrEof(tokens, j); + // Trailing comma before the newline that starts a pending << heredoc body is valid + // (see op/exec.t package o block). A newline with no pending heredoc is an extra arg. + boolean trailingCommaBeforeHeredoc = + nextToken.type == LexerTokenType.NEWLINE && !parser.getHeredocNodes().isEmpty(); + if (!trailingCommaBeforeHeredoc + && !Parser.isExpressionTerminator(nextToken) + && nextToken.type != LexerTokenType.EOF + && !nextToken.text.equals(")")) { throwTooManyArgumentsError(parser); } - // Restore position - the comma will be handled by the caller - parser.tokenIndex = saveIndex; + if (trailingCommaBeforeHeredoc) { + parser.tokenIndex = i; + consumeCommas(parser); + } } } } @@ -300,6 +309,26 @@ static ListNode consumeArgsWithPrototype(Parser parser, String prototype, boolea return args; } + /** Advance past SPACE/TAB-only whitespace tokens; never consume NEWLINE (heredoc triggers). */ + private static int skipHorizontalWhitespaceTokens(List tokens, int i) { + while (i < tokens.size()) { + LexerToken t = tokens.get(i); + if (t.type == LexerTokenType.WHITESPACE) { + i++; + continue; + } + break; + } + return i; + } + + private static LexerToken tokenAtOrEof(List tokens, int i) { + if (i >= tokens.size()) { + return new LexerToken(LexerTokenType.EOF, Lexer.EOF); + } + return tokens.get(i); + } + private static int firstNonCodeArgIndexAfterAmpersandPrototype(String prototype, ListNode args) { if (prototype == null || args.elements.size() < 2) { return -1; @@ -415,6 +444,12 @@ private static void parsePrototypeArguments(Parser parser, ListNode args, String parser.throwError("syntax error"); } + // Builtin Perl parsing (no parentheses): `symlink qw(a b)`, `atan2 qw(1 2)`, etc. + // One parenthesis-free list literal fills successive leading `$` / `_` prototype slots. + if (tryConsumeParenFreeWordListForLeadingScalars(parser, args, prototype, hasParentheses)) { + return; + } + // If prototype starts with ';' and we're at a terminator or single comma, all arguments are optional if (prototype.startsWith(";") && (isArgumentTerminator(parser) || isComma(TokenUtils.peek(parser)))) { return; @@ -470,6 +505,71 @@ private static void parsePrototypeArguments(Parser parser, ListNode args, String } } + /** + * Perl builtins accept {@code symlink qw(a b)} without commas: a single qw list fills successive + * leading scalar prototype slots (see {@code perl -MO=Deparse -e 'symlink qw(/x /y)'}). + */ + private static boolean tryConsumeParenFreeWordListForLeadingScalars( + Parser parser, ListNode args, String prototype, boolean hasParentheses) { + if (hasParentheses || prototype == null || prototype.isEmpty()) { + return false; + } + int slots = countLeadingConsecutiveDollarPrototypeSlots(prototype); + if (slots < 2) { + return false; + } + int saved = parser.tokenIndex; + List savedHeredocs = ParseHeredoc.saveHeredocState(parser); + Node expr = parser.parseExpression(parser.getPrecedence(",")); + if (expr instanceof ListNode ln + && ln.elements.size() == slots + && isPlainStringWordList(ln)) { + for (Node word : ln.elements) { + Node scalarArg = ParserNodeUtils.toScalarContext(word); + copyArgumentStartIndex(word, scalarArg); + scalarArg.setAnnotation("context", "SCALAR"); + args.elements.add(scalarArg); + } + return true; + } + parser.tokenIndex = saved; + parser.getHeredocNodes().clear(); + parser.getHeredocNodes().addAll(savedHeredocs); + return false; + } + + /** + * Counts leading {@code $} slots only (skipping whitespace). Used for the parenthesis-free + * {@code qw(...)} merge optimization: prototypes like {@code $_} (template + implicit {@code $_}) + * must not run that lookahead — it would invoke {@code parseExpression} at a terminator and yield a + * bogus syntax error instead of {@code Not enough arguments for unpack}. + */ + private static int countLeadingConsecutiveDollarPrototypeSlots(String prototype) { + int count = 0; + for (int i = 0; i < prototype.length(); i++) { + char c = prototype.charAt(i); + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { + continue; + } + if (c == '$') { + count++; + continue; + } + break; + } + return count; + } + + /** True if every element is a {@link StringNode} (parenthesis-free qw word list). */ + private static boolean isPlainStringWordList(ListNode ln) { + for (Node n : ln.elements) { + if (!(n instanceof StringNode)) { + return false; + } + } + return true; + } + /** * Parses an argument with optional comma handling. * diff --git a/src/main/java/org/perlonjava/frontend/parser/StringDoubleQuoted.java b/src/main/java/org/perlonjava/frontend/parser/StringDoubleQuoted.java index 8634ed1e5..ab84fc571 100644 --- a/src/main/java/org/perlonjava/frontend/parser/StringDoubleQuoted.java +++ b/src/main/java/org/perlonjava/frontend/parser/StringDoubleQuoted.java @@ -91,7 +91,7 @@ private StringDoubleQuoted(EmitterContext ctx, List tokens, Parser p * @return An AST node representing the parsed string (StringNode, BinaryOperatorNode for join, etc.) */ static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedString rawStr, boolean parseEscapes, boolean interpolateVariable, boolean isRegexReplacement) { - return parseDoubleQuotedString(ctx, rawStr, parseEscapes, interpolateVariable, isRegexReplacement, null); + return parseDoubleQuotedString(ctx, rawStr, parseEscapes, interpolateVariable, isRegexReplacement, null, null, true); } /** @@ -109,7 +109,7 @@ static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedStrin * @return An AST node representing the parsed string (StringNode, BinaryOperatorNode for join, etc.) */ static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedString rawStr, boolean parseEscapes, boolean interpolateVariable, boolean isRegexReplacement, List sharedHeredocNodes) { - return parseDoubleQuotedString(ctx, rawStr, parseEscapes, interpolateVariable, isRegexReplacement, sharedHeredocNodes, null); + return parseDoubleQuotedString(ctx, rawStr, parseEscapes, interpolateVariable, isRegexReplacement, sharedHeredocNodes, null, true); } /** @@ -125,6 +125,14 @@ static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedStrin * @return An AST node representing the parsed string */ static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedString rawStr, boolean parseEscapes, boolean interpolateVariable, boolean isRegexReplacement, List sharedHeredocNodes, Parser originalParser) { + return parseDoubleQuotedString(ctx, rawStr, parseEscapes, interpolateVariable, isRegexReplacement, + sharedHeredocNodes, originalParser, true); + } + + /** + * @param preprocessBracedBackslashQuotes See {@link Parser#preprocessBracedBackslashQuotesInInterpolation}. + */ + static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedString rawStr, boolean parseEscapes, boolean interpolateVariable, boolean isRegexReplacement, List sharedHeredocNodes, Parser originalParser, boolean preprocessBracedBackslashQuotes) { // Extract the first buffer (double-quoted strings don't have multiple parts like here-docs) var input = rawStr.buffers.getFirst(); var tokenIndex = rawStr.next; @@ -142,6 +150,8 @@ static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedStrin new Parser(ctx, tokens, sharedHeredocNodes) : new Parser(ctx, tokens); + parser.preprocessBracedBackslashQuotesInInterpolation = preprocessBracedBackslashQuotes; + // Preserve context flags from original parser if provided if (originalParser != null) { parser.isInMethod = originalParser.isInMethod; diff --git a/src/main/java/org/perlonjava/frontend/parser/StringParser.java b/src/main/java/org/perlonjava/frontend/parser/StringParser.java index 01bcddf29..6daed2977 100644 --- a/src/main/java/org/perlonjava/frontend/parser/StringParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/StringParser.java @@ -586,7 +586,7 @@ public static OperatorNode parseRegexMatch(EmitterContext ctx, String operator, public static OperatorNode parseSystemCommand(EmitterContext ctx, String operator, ParsedString rawStr) { operator = "qx"; // Parse as interpolated string (like double quotes) - Node parsed = StringDoubleQuoted.parseDoubleQuotedString(ctx, rawStr, true, true, false); + Node parsed = StringDoubleQuoted.parseDoubleQuotedString(ctx, rawStr, true, true, false, null, null, false); List elements = new ArrayList<>(); elements.add(parsed); ListNode list = new ListNode(elements, rawStr.index); diff --git a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java index 4b0379889..71ed9640b 100644 --- a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java @@ -201,15 +201,20 @@ static Node parseSubroutineCall(Parser parser, boolean isMethod) { boolean prototypeHasGlob = prototype != null && prototype.contains("*"); - // If a package name follows, then it looks like a indirect method - // Unless the subName looks like an operator - // Unless the subName has a prototype with `*` - // // Note: feature-gated core keywords (`try`, `catch`, `finally`) should // participate in indirect-object parsing when their feature is *off* — // this is how Error.pm's classic // try { ... } catch Error::Simple with { ... } // idiom is recognised (parses as `Error::Simple->catch(with {...})`). + + // If a package name follows, it may be indirect-object syntax (INVOCANT->METHOD(LIST)). + // + // Do not skip this probe just because the callee has a scalar-leading prototype; that + // heuristic mis-parsed `new Some::Long::Name LIST` inside a package that defines its own + // `sub new ($...)` as a direct arity-checked call to the wrong sub (Parse::RecDescent). + // Perl does not treat `new` specially here — the method bareword is arbitrary. + // + // Ambiguous parses rely on the rejection/backtracking logic inside this block. if (peek(parser).type == LexerTokenType.IDENTIFIER && isValidIndirectMethod(subName, parser) && !prototypeHasGlob) { diff --git a/src/main/java/org/perlonjava/frontend/parser/Variable.java b/src/main/java/org/perlonjava/frontend/parser/Variable.java index a7329934c..7e8ec643a 100644 --- a/src/main/java/org/perlonjava/frontend/parser/Variable.java +++ b/src/main/java/org/perlonjava/frontend/parser/Variable.java @@ -99,8 +99,9 @@ public static boolean isFieldInClassHierarchy(Parser parser, String fieldName) { */ public static Node parseVariable(Parser parser, String sigil) { Node operand; - LexerToken nextToken = parser.tokenIndex < parser.tokens.size() - ? parser.tokens.get(parser.tokenIndex) + int nextTokIdx = Whitespace.skipWhitespace(parser, parser.tokenIndex, parser.tokens); + LexerToken nextToken = nextTokIdx < parser.tokens.size() + ? parser.tokens.get(nextTokIdx) : new LexerToken(LexerTokenType.EOF, ""); // Special case 1: $${...} - nested scalar dereference @@ -831,8 +832,48 @@ public static Node parseBracedVariable(Parser parser, String sigil, boolean isSt return new OperatorNode(sigil, new StringNode("", parser.tokenIndex), parser.tokenIndex); } - // For string interpolation, preprocess \" sequences IN PLACE - if (isStringInterpolation) { + // *{EXPR}: either a glob NAME (`*{P2::ISA}`) or a full expression (`*{ qualify $_[0], ... }`). + // Never parse a lone qualified identifier with ParseBlock — strict subs rejects it as a + // useless statement (perl5_t/t/mro/basic.t). When `{` is not followed by a single + // identifier up to `}`, fall back to ParseBlock like Perl's expression-in-braces form. + if ("*".equals(sigil)) { + int savedIdx = parser.tokenIndex; + parser.tokenIndex = Whitespace.skipWhitespace(parser, parser.tokenIndex, parser.tokens); + String globInnerName = IdentifierParser.parseComplexIdentifierInner(parser, true, true); + if (globInnerName != null && !globInnerName.isEmpty()) { + if (!isMaybeOperator(globInnerName, parser) + && !isBuiltinFunctionFollowedByArrow(globInnerName, parser)) { + parser.tokenIndex = Whitespace.skipWhitespace(parser, parser.tokenIndex, parser.tokens); + if (TokenUtils.peek(parser).text.equals("}")) { + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); + int idx = parser.tokenIndex; + return new OperatorNode(sigil, new IdentifierNode(globInnerName, idx), idx); + } + } + } + parser.tokenIndex = savedIdx; + + boolean savedInsideBracedDereference = parser.insideBracedDereference; + boolean savedParsingTakeReference = parser.parsingTakeReference; + parser.parsingTakeReference = false; + try { + BlockNode block = ParseBlock.parseBlock(parser); + if (!TokenUtils.peek(parser).text.equals("}")) { + throw new PerlCompilerException( + parser.tokenIndex, + "Missing closing brace in *{...} construct", + parser.ctx.errorUtil); + } + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); + return new OperatorNode(sigil, block, parser.tokenIndex); + } finally { + parser.insideBracedDereference = savedInsideBracedDereference; + parser.parsingTakeReference = savedParsingTakeReference; + } + } + + // For qq-like interpolation, preprocess \" sequences IN PLACE (not qx/command strings). + if (isStringInterpolation && parser.preprocessBracedBackslashQuotesInInterpolation) { int startIndex = parser.tokenIndex; int braceLevel = 1; diff --git a/src/main/java/org/perlonjava/frontend/parser/Whitespace.java b/src/main/java/org/perlonjava/frontend/parser/Whitespace.java index e8959f77f..44c30b805 100644 --- a/src/main/java/org/perlonjava/frontend/parser/Whitespace.java +++ b/src/main/java/org/perlonjava/frontend/parser/Whitespace.java @@ -33,7 +33,10 @@ public static int skipWhitespace(Parser parser, int tokenIndex, List case NEWLINE: if (!parser.getHeredocNodes().isEmpty()) { - // Process heredocs before advancing past the NEWLINE + // parseHeredocAfterNewline reads parser.tokenIndex as the newline position. + // This loop advances local tokenIndex across WHITESPACE without syncing the parser, + // so align before processing pending heredocs. + parser.tokenIndex = tokenIndex; ParseHeredoc.parseHeredocAfterNewline(parser); tokenIndex = parser.tokenIndex; } else if (parser.heredocNewlineIndex == tokenIndex && parser.heredocSkipToIndex > tokenIndex) { diff --git a/src/main/java/org/perlonjava/runtime/operators/ModuleOperators.java b/src/main/java/org/perlonjava/runtime/operators/ModuleOperators.java index 05e8ebc45..8d420e9f4 100644 --- a/src/main/java/org/perlonjava/runtime/operators/ModuleOperators.java +++ b/src/main/java/org/perlonjava/runtime/operators/ModuleOperators.java @@ -7,10 +7,8 @@ import org.perlonjava.runtime.perlmodule.BHooksEndOfScope; import org.perlonjava.runtime.runtimetypes.*; -import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; @@ -153,6 +151,9 @@ private static RuntimeBase doFile(RuntimeScalar runtimeScalar, boolean setINC, b String code = null; String actualFileName = null; + /** Raw bytes from {@code jar:PERL5LIB} before decoding — decoded once {@link CompilerOptions} exists. */ + byte[] jarPrefetchedBytes = null; + // Variables for handling array references with state RuntimeCode codeRef = null; RuntimeArray stateArgs = null; @@ -570,16 +571,11 @@ else if (code == null) { actualFileName = GlobalContext.JAR_PERLLIB + "/" + fileName; fullName = Paths.get(resourcePath); // Just for compatibility - try (InputStream is = resource.openStream(); - BufferedReader reader = new BufferedReader(new InputStreamReader(is))) { - StringBuilder content = new StringBuilder(); - String line = null; - while ((line = reader.readLine()) != null) { - content.append(line).append("\n"); - } - code = content.toString(); + try (InputStream is = resource.openStream()) { + jarPrefetchedBytes = is.readAllBytes(); break; } catch (IOException e1) { + jarPrefetchedBytes = null; // Continue to next directory } } @@ -635,6 +631,9 @@ else if (code == null) { parsedArgs.applySourceFilters = shouldApplyFilters; // Enable source filter preprocessing if needed parsedArgs.disassembleEnabled = RuntimeCode.DISASSEMBLE; parsedArgs.useInterpreter = RuntimeCode.USE_INTERPRETER; + if (jarPrefetchedBytes != null) { + code = FileUtils.decodePerlSourceBytes(jarPrefetchedBytes, parsedArgs); + } if (code == null) { try { // Use the absolute fullName for file I/O (parsedArgs.fileName may be relative) diff --git a/src/main/java/org/perlonjava/runtime/operators/SystemOperator.java b/src/main/java/org/perlonjava/runtime/operators/SystemOperator.java index 600670f87..27516df15 100644 --- a/src/main/java/org/perlonjava/runtime/operators/SystemOperator.java +++ b/src/main/java/org/perlonjava/runtime/operators/SystemOperator.java @@ -1,5 +1,6 @@ package org.perlonjava.runtime.operators; +import org.perlonjava.backend.bytecode.InterpreterState; import org.perlonjava.runtime.ForkOpenCompleteException; import org.perlonjava.runtime.ForkOpenState; import org.perlonjava.runtime.mro.InheritanceResolver; @@ -17,9 +18,12 @@ import java.util.regex.Pattern; import static org.perlonjava.runtime.runtimetypes.GlobalContext.encodeSpecialVar; +import static org.perlonjava.runtime.runtimetypes.GlobalVariable.getGlobalCodeRef; import static org.perlonjava.runtime.runtimetypes.GlobalVariable.getGlobalVariable; +import static org.perlonjava.runtime.runtimetypes.GlobalVariable.isGlobalCodeRefDefined; import static org.perlonjava.runtime.runtimetypes.GlobalVariable.setGlobalVariable; import static org.perlonjava.runtime.runtimetypes.RuntimeIO.flushAllHandles; +import static org.perlonjava.runtime.runtimetypes.RuntimeScalarCache.scalarFalse; import static org.perlonjava.runtime.runtimetypes.RuntimeScalarCache.scalarUndef; /** @@ -44,6 +48,24 @@ public class SystemOperator { * @throws PerlCompilerException if an error occurs during command execution or stream handling. */ public static RuntimeBase systemCommand(RuntimeScalar command, int ctx) { + // Perl dispatches qx//, `` and readpipe() through the package readpipe CV when defined + // (use subs + sub readpipe), not straight to the shell. + // Use InterpreterState (updated by JVM `package` / scoped blocks), not caller() — + // caller() from inside this runtime helper resolves the wrong package and skips the override. + String pkg = InterpreterState.currentPackage.get().toString(); + if (pkg == null || pkg.isEmpty()) { + pkg = "main"; + } + String fqReadpipe = pkg.endsWith("::") ? pkg + "readpipe" : pkg + "::readpipe"; + if (isGlobalCodeRefDefined(fqReadpipe)) { + RuntimeScalar cv = getGlobalCodeRef(fqReadpipe); + if (cv.value instanceof RuntimeCode rc && rc.defined()) { + RuntimeArray argv = new RuntimeArray(); + argv.add(command); + return rc.apply(argv, RuntimeCode.effectiveCallContext(ctx)); + } + } + String cmd = command.toString(); CommandResult result; @@ -552,7 +574,9 @@ public static RuntimeScalar exec(RuntimeList args, boolean hasHandle, int ctx) { List flattenedArgs = flattenToStringList(args.elements); if (flattenedArgs.isEmpty()) { - throw new PerlCompilerException("exec: no command specified"); + // Perl returns false and sets errno (typically ENOENT) — does not die. + getGlobalVariable("main::!").set(2); + return scalarFalse; } // Check for pending fork-open emulation diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/Symbol.java b/src/main/java/org/perlonjava/runtime/perlmodule/Symbol.java index 542e067cc..fe0a5700f 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/Symbol.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/Symbol.java @@ -3,8 +3,6 @@ import org.perlonjava.backend.jvm.EmitterMethodCreator; import org.perlonjava.runtime.runtimetypes.*; -import static org.perlonjava.runtime.runtimetypes.RuntimeContextType.SCALAR; - /** * The Symbol class provides functionalities for symbol manipulation in a Perl-like environment. * It extends PerlModuleBase to leverage module initialization and method registration. @@ -134,7 +132,7 @@ public static RuntimeList qualify(RuntimeArray args, int ctx) { if (args.size() > 1) { packageName = args.get(1); } else { - RuntimeList callerList = RuntimeCode.caller(new RuntimeList(), SCALAR); + RuntimeList callerList = RuntimeCode.caller(new RuntimeList(), RuntimeContextType.SCALAR); packageName = callerList.scalar(); } RuntimeScalar result; @@ -161,16 +159,26 @@ public static RuntimeList qualify_to_ref(RuntimeArray args, int ctx) { if (args.size() < 1 || args.size() > 2) { throw new IllegalStateException("Bad number of arguments for qualify_to_ref()"); } - RuntimeScalar object = qualify(args, ctx).scalar(); + RuntimeScalar object; + if (args.size() == 1) { + RuntimeArray qa = new RuntimeArray(); + qa.push(args.get(0)); + // Prefer perl-compatible caller(); InterpreterState can diverge from caller inside + // closures invoked from another package (qualify_to_ref must match embedded qualify). + qa.push(RuntimeCode.caller(new RuntimeList(), RuntimeContextType.SCALAR).scalar()); + object = qualify(qa, ctx).scalar(); + } else { + object = qualify(args, ctx).scalar(); + } RuntimeScalar result; if (!object.isString()) { // Already a glob reference or similar — return as-is result = object; } else { - // Create a named RuntimeGlob and return a GLOBREFERENCE to it. - // This mirrors Perl's \*{name}: the caller gets a reference whose - // hash slot (and other slots) delegate to the global symbol table. - result = new RuntimeGlob(object.toString()).createReference(); + // Use the canonical stash glob (vivifying if needed), not a detached RuntimeGlob. + // new RuntimeGlob(name).createReference() pointed at an orphan glob — slots like + // ARRAY never saw @Pkg::name (Symbol::qualify_to_ref, FindBin::libs path). + result = GlobalVariable.getGlobalIO(object.toString()).createReference(); } // System.out.println("qualify_to_ref returns " + result.type); RuntimeList list = new RuntimeList(); diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/FileUtils.java b/src/main/java/org/perlonjava/runtime/runtimetypes/FileUtils.java index 601716945..1409d6045 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/FileUtils.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/FileUtils.java @@ -15,15 +15,14 @@ public class FileUtils { /** - * Reads a file with automatic encoding detection based on BOM (Byte Order Mark). - * Supports UTF-8, UTF-16BE, UTF-16LE, and defaults to UTF-8 if no BOM is found. + * Decodes Perl module / script bytes (from disk or from a JAR resource) using the same rules as + * {@link #readFileWithEncodingDetection(Path, CompilerOptions)}: BOM detection, charset selection, + * {@link CompilerOptions#rawCodeBytes} for {@code __DATA__}, and newline normalization for the lexer. * - * @param filePath The path to the file to read - * @return The decoded string content of the file - * @throws IOException if the file cannot be read + *

Jar loading previously used {@code InputStreamReader} + {@code readLine()}, which could diverge + * from file reads (charset, raw bytes); keep one path so identical sources compile identically. */ - public static String readFileWithEncodingDetection(Path filePath, CompilerOptions parsedArgs) throws IOException { - byte[] bytes = Files.readAllBytes(filePath); + public static String decodePerlSourceBytes(byte[] bytes, CompilerOptions parsedArgs) { String content = detectEncodingAndDecode(bytes, parsedArgs); // Normalize line endings: \r\n → \n, bare \r → \n // This must happen for source files so the Lexer sees clean \n line endings. @@ -34,6 +33,19 @@ public static String readFileWithEncodingDetection(Path filePath, CompilerOption return content; } + /** + * Reads a file with automatic encoding detection based on BOM (Byte Order Mark). + * Supports UTF-8, UTF-16BE, UTF-16LE, and defaults to UTF-8 if no BOM is found. + * + * @param filePath The path to the file to read + * @return The decoded string content of the file + * @throws IOException if the file cannot be read + */ + public static String readFileWithEncodingDetection(Path filePath, CompilerOptions parsedArgs) throws IOException { + byte[] bytes = Files.readAllBytes(filePath); + return decodePerlSourceBytes(bytes, parsedArgs); + } + /** * Detects the encoding of file content based on BOM and heuristics, then decodes it. * diff --git a/src/test/resources/unit/eval_string_lexical_args_use.t b/src/test/resources/unit/eval_string_lexical_args_use.t new file mode 100644 index 000000000..128470a3e --- /dev/null +++ b/src/test/resources/unit/eval_string_lexical_args_use.t @@ -0,0 +1,21 @@ +# Regression: eval STRING must close over outer lexicals like perl(1) +# (Test::More::use_ok passes \@imports via my @args and uses \@{$args[0]} in eval). + +use strict; +use warnings; +use Test::More tests => 1; + +sub run_eval { + my ( $code, @args ) = @_; + my $out = eval $code; + die $@ if $@; + return $out; +} + +my @want = qw(alpha beta gamma); +my $got = run_eval( + q{ join '|', @{$args[0]} }, + \@want, +); + +is( $got, join( '|', @want ), 'eval STRING sees outer @args in @{$args[0]}' ); diff --git a/src/test/resources/unit/symbol_star_brace_qualify_to_ref.t b/src/test/resources/unit/symbol_star_brace_qualify_to_ref.t new file mode 100644 index 000000000..08e16d051 --- /dev/null +++ b/src/test/resources/unit/symbol_star_brace_qualify_to_ref.t @@ -0,0 +1,35 @@ +# Regression: *{ qualify EXPR } must parse EXPR as code (Symbol::qualify_to_ref). +# +# Run: perl src/test/resources/unit/symbol_star_brace_qualify_to_ref.t +# ./jperl src/test/resources/unit/symbol_star_brace_qualify_to_ref.t + +use strict; +use warnings; +use Test::More tests => 3; + +use Symbol qw( qualify qualify_to_ref ); + +package Testophile; + +no strict 'refs'; +*{ 'bin' } = [ 'one', 'two' ]; + +package main; + +is( + Symbol::qualify( 'bin', 'Testophile' ), + 'Testophile::bin', + 'qualify(bin, Testophile)', +); + +my $r = do { + package Testophile; + Symbol::qualify_to_ref('bin'); +}; +is( ref($r), 'GLOB', 'qualify_to_ref (1-arg, caller Testophile) returns GLOB ref' ); + +is( + scalar( @{ *{$r} } ), + 2, + '@{ *qualify_to_ref(...) } aliases @Testophile::bin', +);