Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/main/java/org/perlonjava/backend/jvm/EmitOperator.java
Original file line number Diff line number Diff line change
Expand Up @@ -1767,6 +1767,10 @@ static void handleCreateReference(EmitterVisitor emitterVisitor, OperatorNode no
} else if (node.operand instanceof OperatorNode op && op.operator.equals("$")) {
// Scalar variable - use SCALAR context
contextType = RuntimeContextType.SCALAR;
} else if (node.operand instanceof OperatorNode op && op.operator.equals("*")) {
// *{EXPR} — EXPR is evaluated in scalar context (e.g. Symbol::qualify_to_ref's
// \*{ qualify $_[0], ... }). LIST context breaks the comma/ternary inside braces.
contextType = RuntimeContextType.SCALAR;
}

node.operand.accept(emitterVisitor.with(contextType));
Expand Down
30 changes: 27 additions & 3 deletions src/main/java/org/perlonjava/frontend/parser/ListParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ public class ListParser {
* @throws PerlCompilerException If the syntax is incorrect or the minimum number of items is not met.
*/
static ListNode parseZeroOrOneList(Parser parser, int minItems) {
return parseZeroOrOneList(parser, minItems, null);
}

/**
* @param tooManyArgsForBuiltin if non-null and more than one parenthesized argument is parsed,
* emit {@code Too many arguments for <name>} (Perl builtin wording)
* instead of a generic syntax error.
*/
static ListNode parseZeroOrOneList(Parser parser, int minItems, String tooManyArgsForBuiltin) {
if (looksLikeEmptyList(parser)) {
// Return an empty list if it looks like an empty list
if (minItems > 0) {
Expand All @@ -52,14 +61,22 @@ static ListNode parseZeroOrOneList(Parser parser, int minItems) {
TokenUtils.consume(parser);
expr = new ListNode(parseList(parser, ")", 0), parser.tokenIndex);
if (expr.elements.size() > 1) {
parser.throwError("syntax error");
if (tooManyArgsForBuiltin != null) {
parser.throwError("Too many arguments for " + tooManyArgsForBuiltin);
} else {
parser.throwError("syntax error");
}
}
} else if (token.type == LexerTokenType.EOF || isListTerminator(parser, token) || token.text.equals(",")
|| (token.text.equals("isa") && token.type == LexerTokenType.IDENTIFIER
&& parser.ctx.symbolTable.isFeatureCategoryEnabled("isa"))) {
// No argument
// 'isa' when enabled as a feature is an infix operator, not a bareword argument
expr = new ListNode(parser.tokenIndex);
} else if (token.text.equals("?")) {
// `defined ? expr : expr` (zero-arg defined uses $_), `rand ? expr : expr`, etc.
// Do not parse the ternary `?` as the unary operator's optional operand.
expr = new ListNode(parser.tokenIndex);
} else {
// Argument without parentheses
expr = ListNode.makeList(parser.parseExpression(parser.getPrecedence("isa") + 1));
Expand Down Expand Up @@ -328,7 +345,9 @@ public static boolean looksLikeEmptyList(Parser parser) {
List<OperatorNode> savedHeredocNodes = ParseHeredoc.saveHeredocState(parser);

LexerToken token = TokenUtils.consume(parser);
LexerToken token1 = parser.tokens.get(parser.tokenIndex); // Next token including spaces
LexerToken token1 = parser.tokenIndex < parser.tokens.size()
? parser.tokens.get(parser.tokenIndex)
: new LexerToken(LexerTokenType.EOF, "");
LexerToken nextToken = TokenUtils.peek(parser); // After spaces

// Check if this is a list terminator, but we need to restore position for the check
Expand Down Expand Up @@ -385,7 +404,12 @@ public static boolean looksLikeEmptyList(Parser parser) {
if (CompilerOptions.DEBUG_ENABLED) parser.ctx.logDebug("parseZeroOrMoreList looks like regex");
} else {
// Subroutine call with zero arguments, followed by infix operator: `pos = 3`
if (CompilerOptions.DEBUG_ENABLED) parser.ctx.logDebug("parseZeroOrMoreList return zero at `" + parser.tokens.get(parser.tokenIndex) + "`");
if (CompilerOptions.DEBUG_ENABLED) {
String dbgTok = parser.tokenIndex < parser.tokens.size()
? String.valueOf(parser.tokens.get(parser.tokenIndex))
: "EOF";
parser.ctx.logDebug("parseZeroOrMoreList return zero at `" + dbgTok + "`");
}
// if (LVALUE_INFIX_OP.contains(token.text)) {
// throw new PerlCompilerException(tokenIndex, "Can't modify non-lvalue subroutine call", ctx.errorUtil);
// }
Expand Down
15 changes: 8 additions & 7 deletions src/main/java/org/perlonjava/frontend/parser/OperatorParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ static OperatorNode parseOperatorWithOneOptionalArgument(Parser parser, LexerTok
Node operand;
// Handle operators with one optional argument
String text = token.text;
operand = ListParser.parseZeroOrOneList(parser, 0);
operand = ListParser.parseZeroOrOneList(parser, 0, text);
if (((ListNode) operand).elements.isEmpty()) {
switch (text) {
case "sleep":
Expand Down Expand Up @@ -869,7 +869,7 @@ static OperatorNode parseDefined(Parser parser, LexerToken token, int currentInd
// Handle 'defined' operator with special parsing context
boolean parsingTakeReference = parser.parsingTakeReference;
parser.parsingTakeReference = true; // don't call `&subr` while parsing "Take reference"
operand = ListParser.parseZeroOrOneList(parser, 0);
operand = ListParser.parseZeroOrOneList(parser, 0, "defined");
parser.parsingTakeReference = parsingTakeReference;
if (operand.elements.isEmpty()) {
// `defined` without arguments means `defined $_`
Expand All @@ -890,7 +890,7 @@ static OperatorNode parseUndef(Parser parser, LexerToken token, int currentIndex
// Similar to 'defined', we need to prevent &subr from being auto-called
boolean parsingTakeReference = parser.parsingTakeReference;
parser.parsingTakeReference = true; // don't call `&subr` while parsing "Take reference"
operand = ListParser.parseZeroOrOneList(parser, 0);
operand = ListParser.parseZeroOrOneList(parser, 0, "undef");
parser.parsingTakeReference = parsingTakeReference;
if (operand.elements.isEmpty()) {
// `undef` without arguments returns undef
Expand Down Expand Up @@ -1291,7 +1291,7 @@ static BinaryOperatorNode parseSeek(Parser parser, LexerToken token, int current
static OperatorNode parseReadpipe(Parser parser) {
Node operand;
// Handle 'readpipe' operator with one optional argument
operand = ListParser.parseZeroOrOneList(parser, 0);
operand = ListParser.parseZeroOrOneList(parser, 0, "readpipe");
if (((ListNode) operand).elements.isEmpty()) {
// Create `$_` variable if no argument is provided
operand = ParserNodeUtils.scalarUnderscore(parser);
Expand All @@ -1300,9 +1300,10 @@ static OperatorNode parseReadpipe(Parser parser) {
}

static OperatorNode parsePack(Parser parser, LexerToken token, int currentIndex) {
Node operand;
// Handle 'pack' operator with one or more arguments
operand = ListParser.parseZeroOrMoreList(parser, 1, false, true, false, false);
ListNode operand = ListParser.parseZeroOrMoreList(parser, 0, false, true, false, false);
if (operand.elements.isEmpty()) {
parser.throwError("Not enough arguments for pack");
}
return new OperatorNode(token.text, operand, currentIndex);
}

Expand Down
27 changes: 22 additions & 5 deletions src/main/java/org/perlonjava/frontend/parser/ParseHeredoc.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,23 @@ static OperatorNode parseHeredoc(Parser parser, String tokenText) {
}
} else if (tokenText.length() == 1 && "'`\"".contains(tokenText)) {
delimiter = tokenText;
} else if (token.type == LexerTokenType.IDENTIFIER) {
// Lexer tokenizes <<`LABEL` as: ` LABEL ` (opening/closing backticks are separate tokens).
// parseRawString("q") does not handle a NUMBER in the middle, so grab LABEL here and skip q().
if ("`".equals(delimiter)
&& token.type == LexerTokenType.OPERATOR
&& parser.tokenIndex + 2 < parser.tokens.size()) {
LexerToken mid = parser.tokens.get(parser.tokenIndex + 1);
LexerToken end = parser.tokens.get(parser.tokenIndex + 2);
if ((mid.type == LexerTokenType.IDENTIFIER || mid.type == LexerTokenType.NUMBER)
&& end.type == LexerTokenType.OPERATOR
&& "`".equals(end.text)) {
TokenUtils.consume(parser);
identifier = mid.text;
TokenUtils.consume(parser);
TokenUtils.consume(parser);
}
}
} else if (token.type == LexerTokenType.IDENTIFIER || token.type == LexerTokenType.NUMBER) {
delimiter = "\"";
identifier = tokenText;
TokenUtils.consume(parser);
Expand Down Expand Up @@ -234,10 +250,10 @@ else if (currentIndex >= tokens.size() ||
operand = new StringNode(string, newlineIndex);
break;
case "\"":
operand = interpolateString(parser, string, newlineIndex);
operand = interpolateString(parser, string, newlineIndex, true);
break;
case "`":
Node interpolated = interpolateString(parser, string, newlineIndex);
Node interpolated = interpolateString(parser, string, newlineIndex, false);
List<Node> elements = new ArrayList<>();
elements.add(interpolated);
ListNode list = new ListNode(elements, newlineIndex);
Expand All @@ -263,7 +279,7 @@ else if (currentIndex >= tokens.size() ||
parser.tokenIndex = newlineIndex;
}

private static Node interpolateString(Parser parser, String string, int newlineIndex) {
private static Node interpolateString(Parser parser, String string, int newlineIndex, boolean preprocessBracedBackslashQuotes) {
ArrayList<String> buffers = new ArrayList<>();
buffers.add(string);
StringParser.ParsedString rawStr = new StringParser.ParsedString(newlineIndex, newlineIndex, buffers, ' ', ' ', ' ', ' ');
Expand All @@ -273,7 +289,8 @@ private static Node interpolateString(Parser parser, String string, int newlineI
List<OperatorNode> heredocContext = new ArrayList<>();

// Parse the string with the new context, preserving the original parser context
Node result = StringDoubleQuoted.parseDoubleQuotedString(parser.ctx, rawStr, true, true, false, heredocContext, parser);
Node result = StringDoubleQuoted.parseDoubleQuotedString(parser.ctx, rawStr, true, true, false, heredocContext, parser,
preprocessBracedBackslashQuotes);

// After parsing, any heredocs declared in this context need to be added to the parent
parser.getHeredocNodes().addAll(heredocContext);
Expand Down
8 changes: 8 additions & 0 deletions src/main/java/org/perlonjava/frontend/parser/Parser.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,14 @@ public class Parser {
// re-tokenized string content and __LINE__ should use this as the base line,
// counting newlines from the inner token list to offset from it.
public int baseLineNumber = 0;
/**
* When {@code true} (qq and normal strings), {@link Variable#parseBracedVariable} may rewrite
* {@code \"} before {@code "} inside {@code ${...}} so patterns like {@code "${\"name\"}"}
* interpolate {@code $name}. When {@code false} (qx / command heredocs), keep the backslash so
* {@code ${\"hello"}} parses as a reference to the string {@code hello} and dereferences it,
* matching Perl 5.
*/
public boolean preprocessBracedBackslashQuotesInInterpolation = true;

/**
* Constructs a Parser with the given context and tokens.
Expand Down
126 changes: 113 additions & 13 deletions src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package org.perlonjava.frontend.parser;

import org.perlonjava.frontend.astnode.*;
import org.perlonjava.frontend.lexer.Lexer;
import org.perlonjava.frontend.lexer.LexerToken;
import org.perlonjava.frontend.lexer.LexerTokenType;
import org.perlonjava.runtime.runtimetypes.GlobalVariable;
import org.perlonjava.runtime.runtimetypes.PerlCompilerException;

import java.util.List;

import static org.perlonjava.frontend.parser.ListParser.consumeCommas;
import static org.perlonjava.frontend.parser.ListParser.isComma;
import static org.perlonjava.frontend.parser.ParserNodeUtils.scalarUnderscore;
Expand Down Expand Up @@ -245,21 +248,27 @@ static ListNode consumeArgsWithPrototype(Parser parser, String prototype, boolea

// Check for too many arguments without parentheses only if prototype expects 2+ args
if (!hasParentheses && countPrototypeArgs(prototype) >= 2) {
// If we see a comma after parsing all required args, check if it's a trailing comma
if (isComma(TokenUtils.peek(parser))) {
// Consume the comma and check what follows
int saveIndex = parser.tokenIndex;
consumeCommas(parser);
LexerToken nextToken = TokenUtils.peek(parser);
// If followed by a statement terminator, it's a trailing comma (allowed)
// Otherwise, it's too many arguments
if (!Parser.isExpressionTerminator(nextToken) &&
nextToken.type != LexerTokenType.EOF &&
!nextToken.text.equals(")")) {
// Do not use TokenUtils.peek here: it runs Whitespace.skipWhitespace(), which
// processes NEWLINE and may fill in a pending << heredoc before arguments are done.
List<LexerToken> tokens = parser.tokens;
int i = skipHorizontalWhitespaceTokens(tokens, parser.tokenIndex);
if (i < tokens.size() && isComma(tokens.get(i))) {
int j = skipHorizontalWhitespaceTokens(tokens, i + 1);
LexerToken nextToken = tokenAtOrEof(tokens, j);
// Trailing comma before the newline that starts a pending << heredoc body is valid
// (see op/exec.t package o block). A newline with no pending heredoc is an extra arg.
boolean trailingCommaBeforeHeredoc =
nextToken.type == LexerTokenType.NEWLINE && !parser.getHeredocNodes().isEmpty();
if (!trailingCommaBeforeHeredoc
&& !Parser.isExpressionTerminator(nextToken)
&& nextToken.type != LexerTokenType.EOF
&& !nextToken.text.equals(")")) {
throwTooManyArgumentsError(parser);
}
// Restore position - the comma will be handled by the caller
parser.tokenIndex = saveIndex;
if (trailingCommaBeforeHeredoc) {
parser.tokenIndex = i;
consumeCommas(parser);
}
}
}
}
Expand Down Expand Up @@ -300,6 +309,26 @@ static ListNode consumeArgsWithPrototype(Parser parser, String prototype, boolea
return args;
}

/** Advance past SPACE/TAB-only whitespace tokens; never consume NEWLINE (heredoc triggers). */
private static int skipHorizontalWhitespaceTokens(List<LexerToken> tokens, int i) {
while (i < tokens.size()) {
LexerToken t = tokens.get(i);
if (t.type == LexerTokenType.WHITESPACE) {
i++;
continue;
}
break;
}
return i;
}

private static LexerToken tokenAtOrEof(List<LexerToken> tokens, int i) {
if (i >= tokens.size()) {
return new LexerToken(LexerTokenType.EOF, Lexer.EOF);
}
return tokens.get(i);
}

private static int firstNonCodeArgIndexAfterAmpersandPrototype(String prototype, ListNode args) {
if (prototype == null || args.elements.size() < 2) {
return -1;
Expand Down Expand Up @@ -415,6 +444,12 @@ private static void parsePrototypeArguments(Parser parser, ListNode args, String
parser.throwError("syntax error");
}

// Builtin Perl parsing (no parentheses): `symlink qw(a b)`, `atan2 qw(1 2)`, etc.
// One parenthesis-free list literal fills successive leading `$` / `_` prototype slots.
if (tryConsumeParenFreeWordListForLeadingScalars(parser, args, prototype, hasParentheses)) {
return;
}

// If prototype starts with ';' and we're at a terminator or single comma, all arguments are optional
if (prototype.startsWith(";") && (isArgumentTerminator(parser) || isComma(TokenUtils.peek(parser)))) {
return;
Expand Down Expand Up @@ -470,6 +505,71 @@ private static void parsePrototypeArguments(Parser parser, ListNode args, String
}
}

/**
* Perl builtins accept {@code symlink qw(a b)} without commas: a single qw list fills successive
* leading scalar prototype slots (see {@code perl -MO=Deparse -e 'symlink qw(/x /y)'}).
*/
private static boolean tryConsumeParenFreeWordListForLeadingScalars(
Parser parser, ListNode args, String prototype, boolean hasParentheses) {
if (hasParentheses || prototype == null || prototype.isEmpty()) {
return false;
}
int slots = countLeadingConsecutiveDollarPrototypeSlots(prototype);
if (slots < 2) {
return false;
}
int saved = parser.tokenIndex;
List<OperatorNode> savedHeredocs = ParseHeredoc.saveHeredocState(parser);
Node expr = parser.parseExpression(parser.getPrecedence(","));
if (expr instanceof ListNode ln
&& ln.elements.size() == slots
&& isPlainStringWordList(ln)) {
for (Node word : ln.elements) {
Node scalarArg = ParserNodeUtils.toScalarContext(word);
copyArgumentStartIndex(word, scalarArg);
scalarArg.setAnnotation("context", "SCALAR");
args.elements.add(scalarArg);
}
return true;
}
parser.tokenIndex = saved;
parser.getHeredocNodes().clear();
parser.getHeredocNodes().addAll(savedHeredocs);
return false;
}

/**
* Counts leading {@code $} slots only (skipping whitespace). Used for the parenthesis-free
* {@code qw(...)} merge optimization: prototypes like {@code $_} (template + implicit {@code $_})
* must not run that lookahead — it would invoke {@code parseExpression} at a terminator and yield a
* bogus syntax error instead of {@code Not enough arguments for unpack}.
*/
private static int countLeadingConsecutiveDollarPrototypeSlots(String prototype) {
int count = 0;
for (int i = 0; i < prototype.length(); i++) {
char c = prototype.charAt(i);
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
continue;
}
if (c == '$') {
count++;
continue;
}
break;
}
return count;
}

/** True if every element is a {@link StringNode} (parenthesis-free qw word list). */
private static boolean isPlainStringWordList(ListNode ln) {
for (Node n : ln.elements) {
if (!(n instanceof StringNode)) {
return false;
}
}
return true;
}

/**
* Parses an argument with optional comma handling.
*
Expand Down
Loading
Loading