Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ public class BytecodeCompiler implements Visitor {
// Token index tracking for error reporting
private final TreeMap<Integer, Integer> pcToTokenIndex = new TreeMap<>();
int currentTokenIndex = -1; // Track current token for error reporting
// Callsite ID counter for /o modifier support (unique across all compilations)
private static int nextCallsiteId = 1;
// Track last result register for expression chaining
int lastResultReg = -1;
// Target output register for ALIAS elimination (same save/restore pattern as currentCallContext).
Expand Down Expand Up @@ -3605,6 +3607,14 @@ int allocateRegister() {
return reg;
}

/**
* Allocate a unique callsite ID for /o modifier support.
* Each callsite with /o gets a unique ID so the pattern is compiled only once per callsite.
*/
int allocateCallsiteId() {
return nextCallsiteId++;
}

int allocateOutputRegister() {
if (targetOutputReg >= 0) {
int reg = targetOutputReg;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c
// =================================================================

case Opcodes.DEFINED, Opcodes.REF, Opcodes.BLESS, Opcodes.ISA, Opcodes.PROTOTYPE,
Opcodes.QUOTE_REGEX -> {
Opcodes.QUOTE_REGEX, Opcodes.QUOTE_REGEX_O -> {
pc = executeTypeOps(opcode, bytecode, pc, registers, code);
}

Expand Down Expand Up @@ -1858,6 +1858,14 @@ private static int executeTypeOps(int opcode, int[] bytecode, int pc,
registers[rd] = RuntimeRegex.getQuotedRegex(registers[patternReg].scalar(), registers[flagsReg].scalar());
return pc;
}
case Opcodes.QUOTE_REGEX_O -> {
int rd = bytecode[pc++];
int patternReg = bytecode[pc++];
int flagsReg = bytecode[pc++];
int callsiteId = bytecode[pc++];
registers[rd] = RuntimeRegex.getQuotedRegex(registers[patternReg].scalar(), registers[flagsReg].scalar(), callsiteId);
return pc;
}
default -> throw new RuntimeException("Unknown type opcode: " + opcode);
}
}
Expand Down
59 changes: 47 additions & 12 deletions src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java
Original file line number Diff line number Diff line change
Expand Up @@ -297,21 +297,39 @@ public static void visitOperator(BytecodeCompiler bytecodeCompiler, OperatorNode
bytecodeCompiler.throwCompilerException("quoteRegex requires pattern and flags");
}

// Check if /o modifier is used (flags are typically a StringNode)
boolean hasOModifier = false;
Node flagsNode = operand.elements.get(1);
if (flagsNode instanceof StringNode) {
hasOModifier = ((StringNode) flagsNode).value.contains("o");
}

// Compile pattern and flags
operand.elements.get(0).accept(bytecodeCompiler); // Pattern
int patternReg = bytecodeCompiler.lastResultReg;

operand.elements.get(1).accept(bytecodeCompiler); // Flags
flagsNode.accept(bytecodeCompiler); // Flags
int flagsReg = bytecodeCompiler.lastResultReg;

// Allocate result register
int rd = bytecodeCompiler.allocateOutputRegister();

// Emit QUOTE_REGEX opcode
bytecodeCompiler.emit(Opcodes.QUOTE_REGEX);
bytecodeCompiler.emitReg(rd);
bytecodeCompiler.emitReg(patternReg);
bytecodeCompiler.emitReg(flagsReg);
// Emit appropriate opcode based on /o modifier
if (hasOModifier) {
// Use QUOTE_REGEX_O with callsite ID for /o modifier
int callsiteId = bytecodeCompiler.allocateCallsiteId();
bytecodeCompiler.emit(Opcodes.QUOTE_REGEX_O);
bytecodeCompiler.emitReg(rd);
bytecodeCompiler.emitReg(patternReg);
bytecodeCompiler.emitReg(flagsReg);
bytecodeCompiler.emitReg(callsiteId);
} else {
// Normal QUOTE_REGEX
bytecodeCompiler.emit(Opcodes.QUOTE_REGEX);
bytecodeCompiler.emitReg(rd);
bytecodeCompiler.emitReg(patternReg);
bytecodeCompiler.emitReg(flagsReg);
}

bytecodeCompiler.lastResultReg = rd;
} else if (op.equals("++") || op.equals("--") || op.equals("++postfix") || op.equals("--postfix")) {
Expand Down Expand Up @@ -1995,20 +2013,37 @@ public static void visitOperator(BytecodeCompiler bytecodeCompiler, OperatorNode
bytecodeCompiler.throwCompilerException("matchRegex requires pattern and flags");
}

// Check if /o modifier is used (flags are typically a StringNode)
boolean hasOModifier = false;
Node flagsNode = args.elements.get(1);
if (flagsNode instanceof StringNode) {
hasOModifier = ((StringNode) flagsNode).value.contains("o");
}

// Compile pattern
args.elements.get(0).accept(bytecodeCompiler);
int patternReg = bytecodeCompiler.lastResultReg;

// Compile flags
args.elements.get(1).accept(bytecodeCompiler);
flagsNode.accept(bytecodeCompiler);
int flagsReg = bytecodeCompiler.lastResultReg;

// Create quoted regex using QUOTE_REGEX opcode
// Create quoted regex using appropriate opcode
int regexReg = bytecodeCompiler.allocateRegister();
bytecodeCompiler.emit(Opcodes.QUOTE_REGEX);
bytecodeCompiler.emitReg(regexReg);
bytecodeCompiler.emitReg(patternReg);
bytecodeCompiler.emitReg(flagsReg);
if (hasOModifier) {
// Use QUOTE_REGEX_O with callsite ID for /o modifier
int callsiteId = bytecodeCompiler.allocateCallsiteId();
bytecodeCompiler.emit(Opcodes.QUOTE_REGEX_O);
bytecodeCompiler.emitReg(regexReg);
bytecodeCompiler.emitReg(patternReg);
bytecodeCompiler.emitReg(flagsReg);
bytecodeCompiler.emitReg(callsiteId);
} else {
bytecodeCompiler.emit(Opcodes.QUOTE_REGEX);
bytecodeCompiler.emitReg(regexReg);
bytecodeCompiler.emitReg(patternReg);
bytecodeCompiler.emitReg(flagsReg);
}

// Check if a string was provided (from =~ binding)
if (args.elements.size() > 2) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1273,6 +1273,14 @@ public String disassemble() {
sb.append("QUOTE_REGEX r").append(rd).append(" = qr{r").append(patternReg)
.append("}r").append(flagsReg).append("\n");
break;
case Opcodes.QUOTE_REGEX_O:
rd = bytecode[pc++];
patternReg = bytecode[pc++];
flagsReg = bytecode[pc++];
int callsiteId = bytecode[pc++];
sb.append("QUOTE_REGEX_O r").append(rd).append(" = qr{r").append(patternReg)
.append("}r").append(flagsReg).append(" callsite=").append(callsiteId).append("\n");
break;
case Opcodes.ITERATOR_CREATE:
rd = bytecode[pc++];
rs = bytecode[pc++];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,14 @@ public static int executeStringConcatAssign(int[] bytecode, int pc, RuntimeBase[
if (BytecodeInterpreter.isImmutableProxy(registers[rd])) {
registers[rd] = BytecodeInterpreter.ensureMutableScalar(registers[rd]);
}
RuntimeScalar target = (RuntimeScalar) registers[rd];
RuntimeScalar result = StringOperators.stringConcat(
(RuntimeScalar) registers[rd],
target,
(RuntimeScalar) registers[rs]
);
((RuntimeScalar) registers[rd]).set(result);
target.set(result);
// Invalidate pos() - any string modification should reset pos to undef
RuntimePosLvalue.invalidatePos(target);
return pc;
}

Expand Down
6 changes: 6 additions & 0 deletions src/main/java/org/perlonjava/backend/bytecode/Opcodes.java
Original file line number Diff line number Diff line change
Expand Up @@ -1816,6 +1816,12 @@ public class Opcodes {
*/
public static final short TIMES = 373;

/**
* Quote regex with /o modifier support: rd = RuntimeRegex.getQuotedRegex(pattern_reg, flags_reg, callsite_id)
* Format: QUOTE_REGEX_O rd pattern_reg flags_reg callsite_id
*/
public static final short QUOTE_REGEX_O = 374;

private Opcodes() {
} // Utility class - no instantiation
}
11 changes: 11 additions & 0 deletions src/main/java/org/perlonjava/backend/jvm/EmitBinaryOperator.java
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,17 @@ static void handleCompoundAssignment(EmitterVisitor emitterVisitor, BinaryOperat
}
// assign to the Lvalue
mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL, "org/perlonjava/runtime/runtimetypes/RuntimeScalar", "set", "(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;", false);

// For string concat assign (.=), invalidate pos() since string was modified
if (node.operator.equals(".=")) {
mv.visitInsn(Opcodes.DUP);
mv.visitMethodInsn(Opcodes.INVOKESTATIC,
"org/perlonjava/runtime/runtimetypes/RuntimePosLvalue",
"invalidatePos",
"(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)V",
false);
}

EmitOperator.handleVoidContext(emitterVisitor);
}
}
Expand Down
29 changes: 23 additions & 6 deletions src/main/java/org/perlonjava/backend/jvm/EmitRegex.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
* transliteration and replacement.
*/
public class EmitRegex {
// Callsite ID counter for /o modifier support (unique across all JVM compilations)
private static int nextCallsiteId = 100000; // Start at 100000 to avoid collision with interpreter IDs

/**
* Handles the binding regex operation where a variable is bound to a regex operation.
Expand Down Expand Up @@ -247,14 +249,29 @@ static void handleMatchRegex(EmitterVisitor emitterVisitor, OperatorNode node) {
ListNode operand = (ListNode) node.operand;
EmitterVisitor scalarVisitor = emitterVisitor.with(RuntimeContextType.SCALAR);

// Check if /o modifier is present
boolean hasOModifier = false;
Node flagsNode = operand.elements.get(1);
if (flagsNode instanceof StringNode) {
hasOModifier = ((StringNode) flagsNode).value.contains("o");
}

// Process pattern and flags
operand.elements.get(0).accept(scalarVisitor); // Pattern
operand.elements.get(1).accept(scalarVisitor); // Flags

// Create the regex matcher
emitterVisitor.ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC,
"org/perlonjava/runtime/regex/RuntimeRegex", "getQuotedRegex",
"(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;", false);
flagsNode.accept(scalarVisitor); // Flags

// Create the regex matcher (use 3-argument version for /o)
if (hasOModifier) {
int callsiteId = nextCallsiteId++;
emitterVisitor.ctx.mv.visitLdcInsn(callsiteId);
emitterVisitor.ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC,
"org/perlonjava/runtime/regex/RuntimeRegex", "getQuotedRegex",
"(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;I)Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;", false);
} else {
emitterVisitor.ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC,
"org/perlonjava/runtime/regex/RuntimeRegex", "getQuotedRegex",
"(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;", false);
}

int regexSlot = emitterVisitor.ctx.javaClassInfo.acquireSpillSlot();
boolean pooledRegex = regexSlot >= 0;
Expand Down
60 changes: 60 additions & 0 deletions src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ protected boolean removeEldestEntry(Map.Entry<String, RuntimeRegex> eldest) {
return size() > MAX_REGEX_CACHE_SIZE;
}
};
// Cache for /o modifier - maps callsite ID to compiled regex (only first compilation is used)
private static final Map<Integer, RuntimeScalar> optimizedRegexCache = new LinkedHashMap<>();
// Global matcher used for regex operations
public static Matcher globalMatcher; // Provides Perl regex variables like %+, %-
public static String globalMatchString; // Provides Perl regex variables like $&
Expand Down Expand Up @@ -314,6 +316,37 @@ public static RuntimeScalar getQuotedRegex(RuntimeScalar patternString, RuntimeS
return new RuntimeScalar(compile(patternString.toString(), modifierStr));
}

/**
* Variant of getQuotedRegex that supports the /o modifier.
* When callsiteId is provided and modifiers contain 'o', the regex is compiled only once
* and cached for subsequent calls from the same callsite.
*
* @param patternString The regex pattern string.
* @param modifiers Modifiers for the regex pattern (may include 'o').
* @param callsiteId Unique identifier for this callsite (used for /o caching).
* @return A RuntimeScalar representing the compiled regex.
*/
public static RuntimeScalar getQuotedRegex(RuntimeScalar patternString, RuntimeScalar modifiers, int callsiteId) {
String modifierStr = modifiers.toString();

// Check if /o modifier is present
if (modifierStr.contains("o")) {
// Check if we already have a cached regex for this callsite
RuntimeScalar cached = optimizedRegexCache.get(callsiteId);
if (cached != null) {
return cached;
}

// Compile the regex and cache it
RuntimeScalar result = getQuotedRegex(patternString, modifiers);
optimizedRegexCache.put(callsiteId, result);
return result;
}

// No /o modifier, use normal compilation
return getQuotedRegex(patternString, modifiers);
}

/**
* Internal variant of qr// that includes a `replacement`.
* This is the internal representation of the `s///` operation.
Expand Down Expand Up @@ -409,6 +442,33 @@ public static RuntimeBase matchRegex(RuntimeScalar quotedRegex, RuntimeScalar st
private static RuntimeBase matchRegexDirect(RuntimeScalar quotedRegex, RuntimeScalar string, int ctx) {
RuntimeRegex regex = resolveRegex(quotedRegex);
regex = ensureCompiledForRuntime(regex);

// Save original flags before potentially changing regex
RegexFlags originalFlags = regex.regexFlags;

// Handle empty pattern - reuse last successful pattern or use empty pattern
if (regex.patternString == null || regex.patternString.isEmpty()) {
if (lastSuccessfulPattern != null) {
// Use the pattern from last successful match
// But keep the current flags (especially /g and /i)
Pattern pattern = lastSuccessfulPattern.pattern;
// Re-apply current flags if they differ
if (originalFlags != null && !originalFlags.equals(lastSuccessfulPattern.regexFlags)) {
// Need to recompile with current flags
int newFlags = originalFlags.toPatternFlags();
pattern = Pattern.compile(lastSuccessfulPattern.patternString, newFlags);
}
// Create a temporary regex with the right pattern and current flags
RuntimeRegex tempRegex = new RuntimeRegex();
tempRegex.pattern = pattern;
tempRegex.patternString = lastSuccessfulPattern.patternString;
tempRegex.hasPreservesMatch = lastSuccessfulPattern.hasPreservesMatch || (originalFlags != null && originalFlags.preservesMatch());
tempRegex.regexFlags = originalFlags;
tempRegex.useGAssertion = originalFlags != null && originalFlags.useGAssertion();
regex = tempRegex;
}
// If no previous pattern, the empty pattern matches empty string at start (default behavior)
}

// Debug logging
if (DEBUG_REGEX) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,21 @@ public static RuntimeScalar pos(RuntimeScalar perlVariable) {
return position;
}

/**
* Invalidate the pos() for a scalar when its string value is modified.
* This should be called on any string modification operation (.=, substr assignment, etc.)
* to ensure pos() returns undef after the modification.
*
* @param perlVariable the scalar whose pos should be invalidated
*/
public static void invalidatePos(RuntimeScalar perlVariable) {
if (perlVariable == null) {
return;
}
// Remove the cache entry entirely so pos() returns undef
positionCache.remove(perlVariable);
}

private static void clearZeroLengthMatchTracking(RuntimeScalar perlVariable) {
CacheEntry cachedEntry = positionCache.get(perlVariable);
if (cachedEntry != null) {
Expand Down