diff --git a/dev/design/TAINT_MODE.md b/dev/design/TAINT_MODE.md new file mode 100644 index 000000000..6593e7d9a --- /dev/null +++ b/dev/design/TAINT_MODE.md @@ -0,0 +1,387 @@ +# Taint Mode Implementation Plan + +## Overview + +Perl's taint mode (`-T` flag) tracks data from external sources (environment variables, command line arguments, file input, etc.) and prevents their use in potentially dangerous operations like `system()` calls without explicit validation. + +## Requirements + +1. **No extra storage for normal scalars** - RuntimeScalar size must not increase +2. **No extra runtime checks for normal scalars** - Only tainted scalars incur overhead +3. **Gradual implementation** - Each phase delivers working functionality + +## Design: TAINTED Type (Wrapper Pattern) + +Add a `TAINTED` type to RuntimeScalarType, following the existing TIED_SCALAR pattern: + +```java +// In RuntimeScalarType.java +public static final int TAINTED = 17; // Next available type + +// A tainted scalar: +// - type = TAINTED +// - value = RuntimeScalar (the actual scalar with its own type) +``` + +**How it meets requirements:** +- Normal scalars unchanged (no extra fields) +- Only tainted scalars have `type == TAINTED` +- Taint check is alongside existing TIED_SCALAR check (not a new check pattern) +- Follows established wrapper pattern in the codebase + +**Key methods:** + +```java +// In RuntimeScalar.java + +public boolean isTainted() { + return type == TAINTED; +} + +// Get the actual scalar (unwrap if tainted) +public RuntimeScalar getActualScalar() { + return (type == TAINTED) ? (RuntimeScalar) value : this; +} + +// Create a tainted wrapper +public static RuntimeScalar taint(RuntimeScalar scalar) { + if (scalar.type == TAINTED) return scalar; // Already tainted + RuntimeScalar tainted = new RuntimeScalar(); + tainted.type = TAINTED; + tainted.value = scalar; + return tainted; +} +``` + +**Taint propagation in set():** + +```java +public RuntimeScalar set(RuntimeScalar value) { + if (value == null) { ... } + if (value.type == TIED_SCALAR) { + return set(value.tiedFetch()); + } + if (this.type == TIED_SCALAR) { + return this.tiedStore(value); + } + // Taint propagation - preserve taint wrapper + if (value.type == TAINTED) { + RuntimeScalar inner = (RuntimeScalar) value.value; + this.type = TAINTED; + this.value = new RuntimeScalar(inner); + return this; + } + this.type = value.type; + this.value = value.value; + return this; +} +``` + +**Value access (unwrap when needed):** + +```java +// Methods that need the actual value unwrap first +public int getInt() { + if (type == TAINTED) { + return ((RuntimeScalar) value).getInt(); + } + // ... existing implementation +} + +public String toString() { + if (type == TAINTED) { + return ((RuntimeScalar) value).toString(); + } + // ... existing implementation +} +``` + +--- + +## Phase 1: Minimal Fix for IPC::System::Simple + +**Goal:** Make `t/10_formatting.t` pass by refusing external commands in taint mode. + +**Approach:** Check `${^TAINT}` at dangerous operations rather than tracking propagation. + +### Changes + +1. **Modify bundled IPC::System::Simple** (`src/main/perl/lib/IPC/System/Simple.pm`): + ```perl + # In _check_taint or at the start of system/capture operations: + if (${^TAINT}) { + croak("Insecure dependency while running with -T switch"); + } + ``` + +2. **Keep existing infrastructure:** + - `-T` flag parsing (already done) + - `${^TAINT}` variable (already done) + +### Testing +- `t/10_formatting.t` - should pass (command refused in taint mode) + +### Limitations +- Not true taint semantics +- All external commands blocked in `-T` mode +- Cannot untaint values + +--- + +## Phase 2: TAINTED Type Infrastructure + +**Goal:** Add TAINTED type and basic taint detection. + +### Changes + +1. **Add TAINTED constant to RuntimeScalarType.java:** + ```java + public static final int TAINTED = 17; + ``` + +2. **Add helper methods to RuntimeScalar.java:** + ```java + public boolean isTainted() { + return type == TAINTED; + } + + public RuntimeScalar getActualScalar() { + return (type == TAINTED) ? (RuntimeScalar) value : this; + } + + public static RuntimeScalar taint(RuntimeScalar scalar) { + if (scalar.type == TAINTED) return scalar; + RuntimeScalar tainted = new RuntimeScalar(); + tainted.type = TAINTED; + tainted.value = new RuntimeScalar(scalar); // Copy to avoid aliasing + return tainted; + } + ``` + +3. **Mark tainted sources in GlobalContext.java:** + ```java + // $^X + if (compilerOptions.taintMode) { + RuntimeScalar exec = RuntimeScalar.taint(new RuntimeScalar(perlExecutable)); + GlobalVariable.aliasGlobalVariable("main::\030", exec); + } + + // %ENV + if (compilerOptions.taintMode) { + env.put(k, RuntimeScalar.taint(new RuntimeScalar(v))); + } + ``` + +4. **Update ScalarUtil.tainted():** + ```java + public static RuntimeList tainted(RuntimeArray args, int ctx) { + return new RuntimeScalar(args.get(0).isTainted()).getList(); + } + ``` + +### Testing +- `tainted($^X)` returns true when `-T` is used +- `tainted($ENV{PATH})` returns true when `-T` is used +- `tainted("constant")` returns false + +--- + +## Phase 3: Taint Propagation + +**Goal:** Taint propagates through assignment and operations. + +### Changes + +1. **Update set() to propagate taint:** + ```java + public RuntimeScalar set(RuntimeScalar value) { + // ... existing null and TIED_SCALAR checks ... + + // Propagate taint + if (value.type == TAINTED) { + RuntimeScalar inner = (RuntimeScalar) value.value; + this.type = TAINTED; + this.value = new RuntimeScalar(inner); + return this; + } + + this.type = value.type; + this.value = value.value; + return this; + } + ``` + +2. **Update value access methods to unwrap:** + ```java + public int getInt() { + if (type == TAINTED) return ((RuntimeScalar) value).getInt(); + // ... existing + } + + public double getDouble() { + if (type == TAINTED) return ((RuntimeScalar) value).getDouble(); + // ... existing + } + + public String toString() { + if (type == TAINTED) return ((RuntimeScalar) value).toString(); + // ... existing + } + + public boolean getBoolean() { + if (type == TAINTED) return ((RuntimeScalar) value).getBoolean(); + // ... existing + } + ``` + +3. **Update operations to propagate taint:** + + For binary operations, result is tainted if either operand is tainted: + ```java + // Example: string concatenation + public RuntimeScalar concat(RuntimeScalar other) { + boolean resultTainted = this.isTainted() || other.isTainted(); + RuntimeScalar thisActual = this.getActualScalar(); + RuntimeScalar otherActual = other.getActualScalar(); + + RuntimeScalar result = new RuntimeScalar(thisActual.toString() + otherActual.toString()); + + return resultTainted ? RuntimeScalar.taint(result) : result; + } + ``` + +### Testing +- `my $x = $^X; tainted($x)` returns true +- `my $y = $^X . ""; tainted($y)` returns true +- `tainted($clean . $tainted)` returns true + +--- + +## Phase 4: Dangerous Operation Enforcement + +**Goal:** Tainted data causes errors in dangerous operations. + +### Operations to Protect + +1. **Process execution:** + - `system()`, `exec()`, `qx//`, backticks + - `open()` with pipe + +2. **Code execution:** + - `eval($string)`, `require($file)`, `do($file)` + +3. **File system:** + - `unlink()`, `mkdir()`, `rmdir()` + - `chmod()`, `chown()`, `chdir()` + - `rename()`, `link()`, `symlink()` + +### Implementation + +```java +// Helper method +public static void checkTaint(RuntimeScalar scalar, String operation) { + if (scalar.isTainted()) { + throw new PerlCompilerException( + "Insecure dependency in " + operation + " while running with -T switch" + ); + } +} + +// In SystemOperator.java +public static RuntimeList system(RuntimeArray args, int ctx) { + for (RuntimeScalar arg : args.elements) { + checkTaint(arg, "system"); + } + // ... existing implementation +} +``` + +--- + +## Phase 5: Untainting via Regex + +**Goal:** Allow validated data to be untainted via regex captures. + +### Perl Semantics + +```perl +if ($tainted =~ /^([\w\/]+)$/) { + my $clean = $1; # $1 is NOT tainted +} +``` + +### Implementation + +Regex captures create normal RuntimeScalar, not tainted: + +```java +// In RuntimeRegex capture handling +// Always create non-tainted scalars for captures +RuntimeScalar capture = new RuntimeScalar(matchedText); +// The captured value is untainted regardless of source +``` + +--- + +## Files to Modify by Phase + +### Phase 1 +- `src/main/perl/lib/IPC/System/Simple.pm` - Add ${^TAINT} check + +### Phase 2 +- `RuntimeScalarType.java` - Add TAINTED constant +- `RuntimeScalar.java` - Add isTainted(), getActualScalar(), taint() +- `GlobalContext.java` - Create tainted scalars for $^X, %ENV, @ARGV +- `ScalarUtil.java` - Use isTainted() method +- `Builtin.java` - Update is_tainted() + +### Phase 3 +- `RuntimeScalar.java` - Update set(), getInt(), getDouble(), toString(), getBoolean() +- String/arithmetic operator classes - Propagate taint in operations + +### Phase 4 +- `SystemOperator.java` - Taint checks +- `FileOperator.java` - Taint checks +- `Eval.java` - Taint checks + +### Phase 5 +- `RuntimeRegex.java` - Ensure captures are not tainted + +--- + +## Cleanup + +After implementing the TAINTED type approach: +- Remove `RuntimeScalarTaint.java` (no longer needed) +- Remove any WeakHashMap-based taint tracking code + +--- + +## Progress Tracking + +### Current Status: Phase 1 complete + +### Completed Phases + +- [x] **Phase 1: Minimal Fix for IPC::System::Simple** (2026-03-24) + - Modified `src/main/perl/lib/IPC/System/Simple.pm` `_check_taint()` to block ALL external commands when `${^TAINT}` is set + - Added `isTainted()` method to RuntimeScalar.java (returns false, ready for Phase 2) + - Updated `ScalarUtil.tainted()` to use `isTainted()` method + - **Bonus fix**: Reset `$?` to 0 before END blocks in SpecialBlock.java (Perl semantics) - this fixed spurious "Looks like your test exited with X" warnings from Test::Builder + - **Test results**: IPC::System::Simple 15/17 test programs pass, 169/181 subtests (93%) + +### Infrastructure Complete +- [x] `-T` flag parsing +- [x] `${^TAINT}` variable +- [x] `isTainted()` method stub + +### Next Steps (Phase 2) +1. Add TAINTED type constant to RuntimeScalarType.java +2. Implement `taint()` and `getActualScalar()` methods +3. Mark `$^X`, `%ENV`, `@ARGV` as tainted sources +4. Update `tainted()` to return true for TAINTED type + +### Open Questions +- Should @ARGV be tainted? (Yes in Perl) +- Handle taint in hash/array element access? +- Taint and references - should $$ref propagate taint? diff --git a/dev/import-perl5/config.yaml b/dev/import-perl5/config.yaml index 04c0fdcce..483bb8de7 100644 --- a/dev/import-perl5/config.yaml +++ b/dev/import-perl5/config.yaml @@ -637,6 +637,15 @@ imports: - source: perl5/lib/Class/Struct.pm target: src/main/perl/lib/Class/Struct.pm + # From core distribution + - source: perl5/dist/constant/lib/constant.pm + target: src/main/perl/lib/constant.pm + + # Tests for distribution + - source: perl5/dist/constant/t + target: perl5_t/constant + type: directory + # Add more imports below as needed # Example with minimal fields: # - source: perl5/lib/SomeModule.pm diff --git a/src/main/java/org/perlonjava/app/cli/ArgumentParser.java b/src/main/java/org/perlonjava/app/cli/ArgumentParser.java index 13474ab53..14bcf948f 100644 --- a/src/main/java/org/perlonjava/app/cli/ArgumentParser.java +++ b/src/main/java/org/perlonjava/app/cli/ArgumentParser.java @@ -384,12 +384,12 @@ private static int processClusteredSwitches(String[] args, CompilerOptions parse case 'e': // Handle inline code specified with -e index = handleInlineCode(args, parsedArgs, index, j, arg); - break; + return index; case 'E': // Handle inline code specified with -E parsedArgs.useVersion = true; index = handleInlineCode(args, parsedArgs, index, j, arg); - break; + return index; case 'f': // No-op: don't do $sitelib/sitecustomize.pl at startup break; @@ -890,19 +890,24 @@ private static int handleInputRecordSeparator(String[] args, CompilerOptions par * @return The updated index after processing the inline code. */ private static int handleInlineCode(String[] args, CompilerOptions parsedArgs, int index, int j, String arg) { - if (j == arg.length() - 1 && index + 1 < args.length) { + String newCode; + if (j < arg.length() - 1) { + // If there's code specified immediately after -e (e.g., -e1, -e'print 1'), use it + newCode = arg.substring(j + 1); + } else if (index + 1 < args.length) { // If -e is the last character in the switch and there's a subsequent argument, treat it as code - String newCode = args[++index]; - if (parsedArgs.code == null) { - parsedArgs.code = newCode; - } else { - parsedArgs.code += "\n" + newCode; - } - parsedArgs.fileName = "-e"; // Indicate that the code was provided inline + newCode = args[++index]; } else { System.err.println("No code specified for -e."); System.exit(1); + return index; + } + if (parsedArgs.code == null) { + parsedArgs.code = newCode; + } else { + parsedArgs.code += "\n" + newCode; } + parsedArgs.fileName = "-e"; // Indicate that the code was provided inline return index; } diff --git a/src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java b/src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java index 00bc90918..65d16a6fc 100644 --- a/src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java +++ b/src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java @@ -330,7 +330,7 @@ private static RuntimeList executeCode(RuntimeCode runtimeCode, EmitterContext c throw e; } catch (Throwable t) { if (isMainProgram) { - runEndBlocks(); + runEndBlocks(false); // Don't reset $? on exception path } RuntimeIO.closeAllHandles(); if (t instanceof RuntimeException runtimeException) { diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java index 8a3c0cb28..9afaeb3e3 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java @@ -5015,6 +5015,34 @@ public void visit(For3Node node) { @Override public void visit(IfNode node) { + // Try to evaluate the condition at compile time for dead code elimination + String currentPackage = symbolTable.getCurrentPackage(); + Boolean constantValue = getConstantConditionValue(node.condition, currentPackage); + + // For "unless", invert the condition + if (constantValue != null && "unless".equals(node.operator)) { + constantValue = !constantValue; + } + + // If we have a constant condition, we can eliminate dead code + if (constantValue != null) { + if (constantValue) { + // Condition is constant true - compile only the then branch + if (node.thenBranch != null) { + node.thenBranch.accept(this); + } + } else { + // Condition is constant false - compile only the else branch + if (node.elseBranch != null) { + node.elseBranch.accept(this); + } else { + lastResultReg = -1; + } + } + return; + } + + // Non-constant condition - compile normal if/else bytecode compileNode(node.condition, -1, RuntimeContextType.SCALAR); int condReg = lastResultReg; @@ -5067,6 +5095,110 @@ public void visit(IfNode node) { } } + /** + * Tries to determine if a condition node is a compile-time constant. + * This enables dead code elimination for patterns like: + * use constant WINDOWS => 0; + * if (WINDOWS) { ... Windows-only code ... } + * + * @param condition The condition node to evaluate + * @param currentPackage The current package name for resolving identifiers + * @return Boolean.TRUE if constant true, Boolean.FALSE if constant false, null if not constant + */ + private static Boolean getConstantConditionValue(Node condition, String currentPackage) { + // Handle literal numbers (e.g., if (0), if (1)) + if (condition instanceof NumberNode numNode) { + try { + double value = Double.parseDouble(numNode.value); + return value != 0; + } catch (NumberFormatException e) { + // Non-numeric value, treat as non-constant + return null; + } + } + + // Handle literal strings (e.g., if (""), if ("0"), if ("true")) + if (condition instanceof StringNode strNode) { + String value = strNode.value; + // Perl false: "", "0" + return !value.isEmpty() && !value.equals("0"); + } + + // Handle bare identifiers that might be constant subroutines (e.g., if (WINDOWS)) + if (condition instanceof IdentifierNode idNode) { + String fullName = NameNormalizer.normalizeVariableName(idNode.name, currentPackage); + RuntimeScalar codeRef = GlobalVariable.getGlobalCodeRef(fullName); + if (codeRef != null && codeRef.value instanceof RuntimeCode code) { + if (code.constantValue != null) { + // This is a constant subroutine - evaluate its value + RuntimeList constList = code.constantValue; + if (constList.elements.isEmpty()) { + return false; // Empty list is false + } + RuntimeBase firstElement = constList.elements.getFirst(); + if (firstElement instanceof RuntimeScalar scalar) { + return scalar.getBoolean(); + } + } + } + } + + // Handle explicit subroutine calls like WINDOWS() - check if it's a call to a constant sub + // The AST for WINDOWS() or WINDOWS looks like: + // BinaryOperatorNode("(", OperatorNode("&", IdentifierNode("WINDOWS")), ListNode()) + if (condition instanceof BinaryOperatorNode binNode && "(".equals(binNode.operator)) { + // Check if the left side is a subroutine reference: OperatorNode("&", IdentifierNode) + if (binNode.left instanceof OperatorNode opNode && "&".equals(opNode.operator)) { + if (opNode.operand instanceof IdentifierNode idNode) { + // Check if the arguments are empty (no-arg call like CONSTANT()) + boolean hasNoArgs = binNode.right == null + || (binNode.right instanceof ListNode listNode && listNode.elements.isEmpty()); + if (hasNoArgs) { + String fullName = NameNormalizer.normalizeVariableName(idNode.name, currentPackage); + RuntimeScalar codeRef = GlobalVariable.getGlobalCodeRef(fullName); + if (codeRef != null && codeRef.value instanceof RuntimeCode code) { + if (code.constantValue != null) { + RuntimeList constList = code.constantValue; + if (constList.elements.isEmpty()) { + return false; + } + RuntimeBase firstElement = constList.elements.getFirst(); + if (firstElement instanceof RuntimeScalar scalar) { + return scalar.getBoolean(); + } + } + } + } + } + } + // Also handle the case where left is a bare IdentifierNode (older AST representation) + if (binNode.left instanceof IdentifierNode idNode) { + // Check if the arguments are empty (no-arg call like CONSTANT()) + boolean hasNoArgs = binNode.right == null + || (binNode.right instanceof ListNode listNode && listNode.elements.isEmpty()); + if (hasNoArgs) { + String fullName = NameNormalizer.normalizeVariableName(idNode.name, currentPackage); + RuntimeScalar codeRef = GlobalVariable.getGlobalCodeRef(fullName); + if (codeRef != null && codeRef.value instanceof RuntimeCode code) { + if (code.constantValue != null) { + RuntimeList constList = code.constantValue; + if (constList.elements.isEmpty()) { + return false; + } + RuntimeBase firstElement = constList.elements.getFirst(); + if (firstElement instanceof RuntimeScalar scalar) { + return scalar.getBoolean(); + } + } + } + } + } + } + + // Not a constant we can evaluate at compile time + return null; + } + @Override public void visit(TernaryOperatorNode node) { // condition ? true_expr : false_expr diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitStatement.java b/src/main/java/org/perlonjava/backend/jvm/EmitStatement.java index d07486300..7e8174aa7 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitStatement.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitStatement.java @@ -7,11 +7,8 @@ import org.objectweb.asm.Opcodes; import org.perlonjava.frontend.analysis.EmitterVisitor; import org.perlonjava.frontend.analysis.RegexUsageDetector; -import org.perlonjava.frontend.astnode.For3Node; -import org.perlonjava.frontend.astnode.IfNode; -import org.perlonjava.frontend.astnode.OperatorNode; -import org.perlonjava.frontend.astnode.TryNode; -import org.perlonjava.runtime.runtimetypes.RuntimeContextType; +import org.perlonjava.frontend.astnode.*; +import org.perlonjava.runtime.runtimetypes.*; import java.util.ArrayList; import java.util.List; @@ -37,8 +34,113 @@ public static void emitSignalCheck(MethodVisitor mv) { false); } + /** + * Tries to determine if a condition node is a compile-time constant. + * This enables dead code elimination for patterns like: + * use constant WINDOWS => 0; + * if (WINDOWS) { ... Windows-only code ... } + * + * @param condition The condition node to evaluate + * @param currentPackage The current package name for resolving identifiers + * @return Boolean.TRUE if constant true, Boolean.FALSE if constant false, null if not constant + */ + private static Boolean getConstantConditionValue(Node condition, String currentPackage) { + // Handle literal numbers (e.g., if (0), if (1)) + if (condition instanceof NumberNode numNode) { + try { + double value = Double.parseDouble(numNode.value); + return value != 0; + } catch (NumberFormatException e) { + // Non-numeric value, treat as non-constant + return null; + } + } + + // Handle literal strings (e.g., if (""), if ("0"), if ("true")) + if (condition instanceof StringNode strNode) { + String value = strNode.value; + // Perl false: "", "0" + return !value.isEmpty() && !value.equals("0"); + } + + // Handle bare identifiers that might be constant subroutines (e.g., if (WINDOWS)) + if (condition instanceof IdentifierNode idNode) { + String fullName = NameNormalizer.normalizeVariableName(idNode.name, currentPackage); + RuntimeScalar codeRef = GlobalVariable.getGlobalCodeRef(fullName); + if (codeRef != null && codeRef.value instanceof RuntimeCode code) { + if (code.constantValue != null) { + // This is a constant subroutine - evaluate its value + RuntimeList constList = code.constantValue; + if (constList.elements.isEmpty()) { + return false; // Empty list is false + } + RuntimeBase firstElement = constList.elements.getFirst(); + if (firstElement instanceof RuntimeScalar scalar) { + return scalar.getBoolean(); + } + } + } + } + + // Handle explicit subroutine calls like WINDOWS() - check if it's a call to a constant sub + // The AST for WINDOWS() or WINDOWS looks like: + // BinaryOperatorNode("(", OperatorNode("&", IdentifierNode("WINDOWS")), ListNode()) + if (condition instanceof BinaryOperatorNode binNode && "(".equals(binNode.operator)) { + // Check if the left side is a subroutine reference: OperatorNode("&", IdentifierNode) + if (binNode.left instanceof OperatorNode opNode && "&".equals(opNode.operator)) { + if (opNode.operand instanceof IdentifierNode idNode) { + // Check if the arguments are empty (no-arg call like CONSTANT()) + boolean hasNoArgs = binNode.right == null + || (binNode.right instanceof ListNode listNode && listNode.elements.isEmpty()); + if (hasNoArgs) { + String fullName = NameNormalizer.normalizeVariableName(idNode.name, currentPackage); + RuntimeScalar codeRef = GlobalVariable.getGlobalCodeRef(fullName); + if (codeRef != null && codeRef.value instanceof RuntimeCode code) { + if (code.constantValue != null) { + RuntimeList constList = code.constantValue; + if (constList.elements.isEmpty()) { + return false; + } + RuntimeBase firstElement = constList.elements.getFirst(); + if (firstElement instanceof RuntimeScalar scalar) { + return scalar.getBoolean(); + } + } + } + } + } + } + // Also handle the case where left is a bare IdentifierNode (older AST representation) + if (binNode.left instanceof IdentifierNode idNode) { + // Check if the arguments are empty (no-arg call like CONSTANT()) + boolean hasNoArgs = binNode.right == null + || (binNode.right instanceof ListNode listNode && listNode.elements.isEmpty()); + if (hasNoArgs) { + String fullName = NameNormalizer.normalizeVariableName(idNode.name, currentPackage); + RuntimeScalar codeRef = GlobalVariable.getGlobalCodeRef(fullName); + if (codeRef != null && codeRef.value instanceof RuntimeCode code) { + if (code.constantValue != null) { + RuntimeList constList = code.constantValue; + if (constList.elements.isEmpty()) { + return false; + } + RuntimeBase firstElement = constList.elements.getFirst(); + if (firstElement instanceof RuntimeScalar scalar) { + return scalar.getBoolean(); + } + } + } + } + } + } + + // Not a constant we can evaluate at compile time + return null; + } + /** * Emits bytecode for an if statement, including support for 'unless'. + * Performs dead code elimination when the condition is a compile-time constant. * * @param emitterVisitor The visitor used for code emission. * @param node The if node representing the if statement. @@ -46,6 +148,62 @@ public static void emitSignalCheck(MethodVisitor mv) { public static void emitIf(EmitterVisitor emitterVisitor, IfNode node) { if (CompilerOptions.DEBUG_ENABLED) emitterVisitor.ctx.logDebug("IF start: " + node.operator); + // Try to evaluate the condition at compile time for dead code elimination + String currentPackage = emitterVisitor.ctx.symbolTable.getCurrentPackage(); + Boolean constantValue = getConstantConditionValue(node.condition, currentPackage); + + // For "unless", invert the condition + if (constantValue != null && "unless".equals(node.operator)) { + constantValue = !constantValue; + } + + // If we have a constant condition, we can eliminate dead code + if (constantValue != null) { + if (CompilerOptions.DEBUG_ENABLED) { + emitterVisitor.ctx.logDebug("IF constant folding: condition is " + constantValue); + } + + if (constantValue) { + // Condition is constant true - emit only the then branch + // Still need to set up scope and labels for potential nested constructs + List branchLabels = new ArrayList<>(); + EmitBlock.collectIfChainLabels(node, branchLabels); + int branchLabelsPushed = EmitBlock.pushNewGotoLabels(emitterVisitor.ctx.javaClassInfo, branchLabels); + + int scopeIndex = emitterVisitor.ctx.symbolTable.enterScope(); + node.thenBranch.accept(emitterVisitor); + emitterVisitor.ctx.symbolTable.exitScope(scopeIndex); + + for (int i = 0; i < branchLabelsPushed; i++) { + emitterVisitor.ctx.javaClassInfo.popGotoLabels(); + } + } else { + // Condition is constant false - emit only the else branch + if (node.elseBranch != null) { + List branchLabels = new ArrayList<>(); + EmitBlock.collectIfChainLabels(node, branchLabels); + int branchLabelsPushed = EmitBlock.pushNewGotoLabels(emitterVisitor.ctx.javaClassInfo, branchLabels); + + int scopeIndex = emitterVisitor.ctx.symbolTable.enterScope(); + node.elseBranch.accept(emitterVisitor); + emitterVisitor.ctx.symbolTable.exitScope(scopeIndex); + + for (int i = 0; i < branchLabelsPushed; i++) { + emitterVisitor.ctx.javaClassInfo.popGotoLabels(); + } + } else { + // No else branch - emit undef if not void context + if (emitterVisitor.ctx.contextType != RuntimeContextType.VOID) { + EmitOperator.emitUndef(emitterVisitor.ctx.mv); + } + } + } + + if (CompilerOptions.DEBUG_ENABLED) emitterVisitor.ctx.logDebug("IF end (constant folded)"); + return; + } + + // Non-constant condition - emit normal if/else code List branchLabels = new ArrayList<>(); EmitBlock.collectIfChainLabels(node, branchLabels); int branchLabelsPushed = EmitBlock.pushNewGotoLabels(emitterVisitor.ctx.javaClassInfo, branchLabels); diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index ba9d3e49f..e669f8608 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,14 +33,14 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "204b2f32c"; + public static final String gitCommitId = "9470032eb"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitDate = "2026-03-24"; + public static final String gitCommitDate = "2026-03-25"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/frontend/parser/ParserTables.java b/src/main/java/org/perlonjava/frontend/parser/ParserTables.java index f20c588c8..679f2354d 100644 --- a/src/main/java/org/perlonjava/frontend/parser/ParserTables.java +++ b/src/main/java/org/perlonjava/frontend/parser/ParserTables.java @@ -27,14 +27,14 @@ public class ParserTables { public static final Set OVERRIDABLE_OP = Set.of( "caller", "chdir", "close", "connect", "die", "do", - "exit", + "exec", "exit", "fork", "getpwuid", "glob", "hex", "kill", "oct", "open", "readline", "readpipe", "rename", "require", - "stat", + "stat", "system", "time", "uc", "warn" diff --git a/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java b/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java index bb64443f6..f268e2786 100644 --- a/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java +++ b/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java @@ -459,13 +459,16 @@ private static int handleTypeGlobArgument(Parser parser, ListNode args, boolean } // Handle my/our/state with multiple variables: pipe(my ($r, $w)) - // These should flatten to multiple arguments + // These should flatten to multiple arguments while preserving the declaration if (expr instanceof OperatorNode opNode && (opNode.operator.equals("my") || opNode.operator.equals("our") || opNode.operator.equals("state")) && opNode.operand instanceof ListNode listNode && listNode.elements.size() > 1) { - // Flatten all elements into args + // Flatten all elements into args, wrapping each in the same declaration type + String declOp = opNode.operator; for (Node element : listNode.elements) { - Node scalarArg = ParserNodeUtils.toScalarContext(element); + // Wrap each element in the same declaration type (my/our/state) + Node declNode = new OperatorNode(declOp, element, element.getIndex()); + Node scalarArg = ParserNodeUtils.toScalarContext(declNode); scalarArg.setAnnotation("context", "SCALAR"); args.elements.add(scalarArg); } diff --git a/src/main/java/org/perlonjava/frontend/parser/StringSegmentParser.java b/src/main/java/org/perlonjava/frontend/parser/StringSegmentParser.java index b8a92561a..2e0661fae 100644 --- a/src/main/java/org/perlonjava/frontend/parser/StringSegmentParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/StringSegmentParser.java @@ -1017,11 +1017,12 @@ private boolean isValidArrayVariableStart(LexerToken token) { * @return true if this character should prevent interpolation */ private boolean isNonInterpolatingCharacter(String text) { - return switch (text) { - case "%", "|", "#", "\"", "\\", - "?" -> true; - default -> false; - }; + // Note: Special punctuation variables like $?, $|, $%, $", $\, $# etc. + // are all valid Perl special variables and SHOULD be interpolated. + // Previously this list incorrectly included these characters, preventing + // interpolation of valid special variables like $? (child error status). + // These characters are handled correctly by IdentifierParser.parseComplexIdentifier(). + return false; } /** diff --git a/src/main/java/org/perlonjava/runtime/operators/KillOperator.java b/src/main/java/org/perlonjava/runtime/operators/KillOperator.java index c5e110416..55e03d091 100644 --- a/src/main/java/org/perlonjava/runtime/operators/KillOperator.java +++ b/src/main/java/org/perlonjava/runtime/operators/KillOperator.java @@ -30,8 +30,10 @@ public static RuntimeScalar kill(int ctx, RuntimeBase... args) { int signal; // Handle named signals (e.g., "TERM", "KILL", "HUP") - if (signalArg.isString()) { - signal = getSignalNumber(signalArg.toString()); + // But first check if it's a numeric string like "9" from @ARGV + String strVal = signalArg.toString(); + if (signalArg.isString() && !isNumericString(strVal)) { + signal = getSignalNumber(strVal); if (signal == -1) { // Invalid signal name setErrno(22); // EINVAL @@ -252,6 +254,25 @@ private static void setErrno(int errno) { getGlobalVariable("main::!").set(new RuntimeScalar(errno)); } + // Check if a string represents a numeric value + private static boolean isNumericString(String s) { + if (s == null || s.isEmpty()) { + return false; + } + // Handle optional leading minus sign + int start = 0; + if (s.charAt(0) == '-') { + if (s.length() == 1) return false; + start = 1; + } + for (int i = start; i < s.length(); i++) { + if (!Character.isDigit(s.charAt(i))) { + return false; + } + } + return true; + } + /** * Check if a signal should terminate the process by default. * These are signals that Perl terminates on when no handler is set. diff --git a/src/main/java/org/perlonjava/runtime/operators/Operator.java b/src/main/java/org/perlonjava/runtime/operators/Operator.java index fc1c91d7a..6c50ce746 100644 --- a/src/main/java/org/perlonjava/runtime/operators/Operator.java +++ b/src/main/java/org/perlonjava/runtime/operators/Operator.java @@ -280,15 +280,16 @@ private static RuntimeScalar substrImpl(int ctx, boolean warnEnabled, RuntimeBas if (offset < 0) { offset = strLength + offset; // When computed offset goes negative (before string start): - // - Clip offset to 0 - // - Reduce length by the overshoot amount - // Example: substr("a", -2, 2) -> offset=-1, clip to 0, length=2+(-1)=1, returns "a" - // But: substr("hello", -10, 1) -> offset=-5, length=1+(-5)=-4 → warn and return undef + // - If adjusted length is negative, warn and return undef (too much overshoot) + // - If adjusted length is >= 0, clip offset to 0 and return substring (no warning) + // Example: substr("hello", -10, 1) -> offset=-5, adjustedLen=-4 -> warn + undef + // Example: substr("a", -2, 1) -> offset=-1, adjustedLen=0 -> "" (no warning) + // Example: substr("a", -2, 2) -> offset=-1, adjustedLen=1, returns "a" (no warning) if (offset < 0) { - // Check if adjusted length would be non-positive (Perl warns in this case) + // Adjust length by the overshoot (negative offset value) int adjustedLength = length + offset; - if (adjustedLength <= 0) { - // Warn and return undef (same as positive offset out of bounds) + if (adjustedLength < 0) { + // Adjusted length is negative - warn and return undef if (warnEnabled) { WarnDie.warn(new RuntimeScalar("substr outside of string"), RuntimeScalarCache.scalarEmptyString); @@ -301,7 +302,14 @@ private static RuntimeScalar substrImpl(int ctx, boolean warnEnabled, RuntimeBas lvalue.value = null; return lvalue; } - // Reduce length by the overshoot (negative offset value) + if (adjustedLength == 0) { + // Adjusted length is exactly zero - return empty string (defined), no warning + if (replacement != null) { + return new RuntimeScalar(""); + } + return new RuntimeSubstrLvalue((RuntimeScalar) args[0], "", 0, 0); + } + // Reduce length by the overshoot, no warning length = adjustedLength; offset = 0; } diff --git a/src/main/java/org/perlonjava/runtime/operators/SystemOperator.java b/src/main/java/org/perlonjava/runtime/operators/SystemOperator.java index 931124058..af8681ed7 100644 --- a/src/main/java/org/perlonjava/runtime/operators/SystemOperator.java +++ b/src/main/java/org/perlonjava/runtime/operators/SystemOperator.java @@ -33,25 +33,39 @@ public class SystemOperator { * Executes a system command and returns the output as a RuntimeBase. * This implements Perl's backtick operator (`command`). * + * Like Perl's native qx/backticks, this bypasses the shell for simple commands + * without metacharacters, and uses the shell only when necessary. + * * @param command The command to execute as a RuntimeScalar. * @param ctx The context type, determining the return type (list or scalar). * @return The output of the command as a RuntimeBase. * @throws PerlCompilerException if an error occurs during command execution or stream handling. */ public static RuntimeBase systemCommand(RuntimeScalar command, int ctx) { - CommandResult result = executeCommand(command.toString(), true); + String cmd = command.toString(); + CommandResult result; + + // Check for shell metacharacters - if none, execute directly without shell + // This matches native Perl behavior where simple commands bypass the shell + if (SHELL_METACHARACTERS.matcher(cmd).find()) { + // Has shell metacharacters, use shell + result = executeCommand(cmd, true); + } else { + // No shell metacharacters, split into words and execute directly + String[] words = cmd.trim().split("\\s+"); + result = executeCommandDirectCapture(Arrays.asList(words)); + } // Set $? to the exit status + // Note: result.exitCode is already in wait status format (from waitForProcessWithStatus) if (result.exitCode == -1) { // Command failed to execute getGlobalVariable("main::?").set(-1); getGlobalVariable(encodeSpecialVar("CHILD_ERROR_NATIVE")).set(-1); } else { - // Normal exit - put exit code in upper byte (Perl wait status convention) - int waitStatus = result.exitCode << 8; - getGlobalVariable("main::?").set(waitStatus); - // ${^CHILD_ERROR_NATIVE} also stores the wait status format - getGlobalVariable(encodeSpecialVar("CHILD_ERROR_NATIVE")).set(waitStatus); + // Wait status is already in correct format (exit_code << 8 or signal in lower bits) + getGlobalVariable("main::?").set(result.exitCode); + getGlobalVariable(encodeSpecialVar("CHILD_ERROR_NATIVE")).set(result.exitCode); } return processOutput(result.output, ctx); @@ -76,7 +90,19 @@ public static RuntimeScalar system(RuntimeList args, boolean hasHandle, int ctx) CommandResult result; - if (!hasHandle && flattenedArgs.size() == 1) { + if (hasHandle && flattenedArgs.size() >= 2) { + // Indirect object syntax: system { $program } @args + // In Perl, @args[0] becomes argv[0] (process name), @args[1:] are actual arguments + // Java's ProcessBuilder can't set argv[0] separately, so we skip it + // flattenedArgs = [$program, $argv0, $arg1, $arg2, ...] + // We want to execute: $program with arguments [$arg1, $arg2, ...] + String program = flattenedArgs.get(0); + // Skip flattenedArgs[1] (the custom argv[0]) since Java can't use it + List actualArgs = new ArrayList<>(); + actualArgs.add(program); + actualArgs.addAll(flattenedArgs.subList(2, flattenedArgs.size())); + result = executeCommandDirect(actualArgs); + } else if (!hasHandle && flattenedArgs.size() == 1) { // Single argument - check for shell metacharacters String command = flattenedArgs.getFirst(); if (SHELL_METACHARACTERS.matcher(command).find()) { @@ -294,6 +320,72 @@ private static CommandResult executeCommandDirect(List commandArgs) { return new CommandResult("", exitCode); } + /** + * Executes a command directly without shell interpretation and captures output. + * This is used by backticks/qx for commands without shell metacharacters. + * + * @param commandArgs List of command and arguments. + * @return CommandResult containing captured output and exit code. + */ + private static CommandResult executeCommandDirectCapture(List commandArgs) { + StringBuilder output = new StringBuilder(); + Process process = null; + int exitCode = -1; + + try { + flushAllHandles(); + + ProcessBuilder processBuilder = new ProcessBuilder(commandArgs); + String userDir = System.getProperty("user.dir"); + processBuilder.directory(new File(userDir)); + + // Copy %ENV to the subprocess environment + copyPerlEnvToProcessBuilder(processBuilder); + + // Inherit stderr (goes to terminal like Perl's backticks) + processBuilder.redirectError(ProcessBuilder.Redirect.INHERIT); + + process = processBuilder.start(); + + final Process finalProcess = process; + final StringBuilder finalOutput = output; + + // Capture stdout + Thread stdoutThread = new Thread(() -> { + try (java.io.InputStream is = finalProcess.getInputStream()) { + byte[] buffer = new byte[8192]; + int bytesRead; + java.io.ByteArrayOutputStream baos = new java.io.ByteArrayOutputStream(); + while ((bytesRead = is.read(buffer)) != -1) { + baos.write(buffer, 0, bytesRead); + } + synchronized (finalOutput) { + finalOutput.append(baos.toString()); + } + } catch (IOException e) { + // Stream closed - this is normal when process terminates + } + }); + + stdoutThread.start(); + exitCode = waitForProcessWithStatus(process); + stdoutThread.join(); + } catch (IOException e) { + // Command failed to start - return -1 as per Perl spec + setGlobalVariable("main::!", e.getMessage()); + exitCode = -1; + } catch (InterruptedException e) { + PerlSignalQueue.checkPendingSignals(); + Thread.interrupted(); + } finally { + if (process != null) { + process.destroy(); + } + } + + return new CommandResult(output.toString(), exitCode); + } + /** * Waits for a process to complete and returns the full wait status. * On POSIX systems, uses native waitpid() to get signal information. @@ -406,7 +498,16 @@ public static RuntimeScalar exec(RuntimeList args, boolean hasHandle, int ctx) { int exitCode; - if (!hasHandle && flattenedArgs.size() == 1) { + if (hasHandle && flattenedArgs.size() >= 2) { + // Indirect object syntax: exec { $program } @args + // In Perl, @args[0] becomes argv[0] (process name), @args[1:] are actual arguments + // Java's ProcessBuilder can't set argv[0] separately, so we skip it + String program = flattenedArgs.get(0); + List actualArgs = new ArrayList<>(); + actualArgs.add(program); + actualArgs.addAll(flattenedArgs.subList(2, flattenedArgs.size())); + exitCode = execCommandDirect(actualArgs); + } else if (!hasHandle && flattenedArgs.size() == 1) { // Single argument - check for shell metacharacters String command = flattenedArgs.getFirst(); if (SHELL_METACHARACTERS.matcher(command).find()) { diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/Internals.java b/src/main/java/org/perlonjava/runtime/perlmodule/Internals.java index 1284e5f28..721955929 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/Internals.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/Internals.java @@ -107,10 +107,21 @@ public static RuntimeList svReadonly(RuntimeArray args, int ctx) { RuntimeScalarReadOnly readonlyScalar; if (targetScalar.type == RuntimeScalarType.INTEGER) { readonlyScalar = new RuntimeScalarReadOnly(targetScalar.getInt()); + } else if (targetScalar.type == RuntimeScalarType.DOUBLE) { + readonlyScalar = new RuntimeScalarReadOnly(targetScalar.getDouble()); } else if (targetScalar.type == RuntimeScalarType.BOOLEAN) { readonlyScalar = new RuntimeScalarReadOnly(targetScalar.getBoolean()); - } else if (targetScalar.type == RuntimeScalarType.STRING) { + } else if (targetScalar.type == RuntimeScalarType.STRING || targetScalar.type == RuntimeScalarType.BYTE_STRING) { readonlyScalar = new RuntimeScalarReadOnly(targetScalar.toString()); + } else if (targetScalar.type == RuntimeScalarType.ARRAYREFERENCE || + targetScalar.type == RuntimeScalarType.HASHREFERENCE || + targetScalar.type == RuntimeScalarType.REFERENCE || + targetScalar.type == RuntimeScalarType.CODE || + targetScalar.type == RuntimeScalarType.GLOBREFERENCE) { + // For reference types, don't modify the value - just mark as readonly + // In Perl, making a reference readonly prevents reassignment of the variable + // but doesn't change the referenced data + return new RuntimeList(); } else { readonlyScalar = new RuntimeScalarReadOnly(); // undef } diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/ScalarUtil.java b/src/main/java/org/perlonjava/runtime/perlmodule/ScalarUtil.java index 29acd1c58..2f6a348d6 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/ScalarUtil.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/ScalarUtil.java @@ -293,7 +293,7 @@ public static RuntimeList set_prototype(RuntimeArray args, int ctx) { /** - * Placeholder for the tainted functionality. + * Checks if a scalar is tainted (contains data from external sources). * * @param args The arguments passed to the method. * @param ctx The context in which the method is called. @@ -303,7 +303,6 @@ public static RuntimeList tainted(RuntimeArray args, int ctx) { if (args.size() != 1) { throw new IllegalStateException("Bad number of arguments for tainted() method"); } - // Placeholder for tainted functionality - return new RuntimeScalar(false).getList(); + return new RuntimeScalar(args.get(0).isTainted()).getList(); } } diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/GlobalContext.java b/src/main/java/org/perlonjava/runtime/runtimetypes/GlobalContext.java index bd73c35b5..1a45e0be8 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/GlobalContext.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/GlobalContext.java @@ -216,6 +216,7 @@ public static void initializeGlobals(CompilerOptions compilerOptions) { // Initialize built-in Perl classes DiamondIO.initialize(compilerOptions); Universal.initialize(); + Mro.initialize(); // mro functions available without 'use mro' Vars.initialize(); Subs.initialize(); Builtin.initialize(); diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java index bcde47f4d..90bf555fb 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java @@ -593,6 +593,7 @@ public static RuntimeIO openPipe(RuntimeList runtimeList) { String arg = strings.getFirst(); String mode = null; String ioLayers = ""; + boolean noShell = false; // Flag to bypass shell interpretation if (strings.size() > 1) { if (arg.startsWith("|-")) { @@ -620,22 +621,31 @@ public static RuntimeIO openPipe(RuntimeList runtimeList) { } } + // Check for :noshell layer - bypasses shell for single-arg pipe open + // Usage: open($fh, "-|:noshell", $cmd) to execute $cmd literally without shell + if (ioLayers.contains(":noshell")) { + noShell = true; + ioLayers = ioLayers.replace(":noshell", ""); + } + if (arg.isEmpty()) { strings.removeFirst(); } else { strings.set(0, arg); } - // System.out.println("open pipe: mode=" + mode + " cmd=" + strings + " layers=" + ioLayers); + // System.out.println("open pipe: mode=" + mode + " cmd=" + strings + " layers=" + ioLayers + " noShell=" + noShell); if (">".equals(mode)) { - if (strings.size() == 1) { + // When noShell is true, always use list constructor to bypass shell + if (strings.size() == 1 && !noShell) { fh.ioHandle = new PipeOutputChannel(strings.getFirst()); } else { fh.ioHandle = new PipeOutputChannel(strings); } } else if ("<".equals(mode)) { - if (strings.size() == 1) { + // When noShell is true, always use list constructor to bypass shell + if (strings.size() == 1 && !noShell) { fh.ioHandle = new PipeInputChannel(strings.getFirst()); } else { fh.ioHandle = new PipeInputChannel(strings); @@ -648,8 +658,10 @@ public static RuntimeIO openPipe(RuntimeList runtimeList) { // Add the handle to the LRU cache addHandle(fh.ioHandle); - // Apply any I/O layers - fh.binmode(ioLayers); + // Apply any I/O layers (excluding the already-processed :noshell) + if (!ioLayers.isEmpty()) { + fh.binmode(ioLayers); + } } catch (IOException e) { handleIOException(e, "open failed"); fh = null; diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeList.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeList.java index 3f89a0f05..d99f9b23e 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeList.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeList.java @@ -44,11 +44,12 @@ public RuntimeList(RuntimeScalar value) { /** * Constructs a RuntimeList from another RuntimeList. + * Creates a shallow copy of the elements list to prevent mutation of the original. * * @param value The RuntimeList to initialize this list with. */ public RuntimeList(RuntimeList value) { - this.elements = value.elements; + this.elements = new ArrayList<>(value.elements); } /** diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java index c4837dab3..ccc97ffef 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java @@ -625,6 +625,16 @@ public RuntimeScalar scalar() { return this; } + /** + * Returns whether this scalar is tainted. + * Will be updated to check type == TAINTED when taint mode is fully implemented. + * + * @return false for regular scalars, true for tainted scalars + */ + public boolean isTainted() { + return false; + } + // Add itself to a RuntimeArray. public void addToArray(RuntimeArray runtimeArray) { switch (runtimeArray.type) { diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalarReadOnly.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalarReadOnly.java index 55e577b8f..4f97d3101 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalarReadOnly.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalarReadOnly.java @@ -77,6 +77,21 @@ public RuntimeScalarReadOnly(String s) { this.type = RuntimeScalarType.STRING; } + /** + * Constructs a RuntimeScalarReadOnly representing a double value. + * + * @param d the double value + */ + public RuntimeScalarReadOnly(double d) { + super(); + this.b = (d != 0.0); + this.i = (int) d; + this.s = ScalarUtils.formatLikePerl(d); + this.d = d; + this.value = d; + this.type = RuntimeScalarType.DOUBLE; + } + /** * Throws an exception as this scalar is immutable and cannot be modified. * diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/SpecialBlock.java b/src/main/java/org/perlonjava/runtime/runtimetypes/SpecialBlock.java index fe835df33..ee3bf5df0 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/SpecialBlock.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/SpecialBlock.java @@ -1,5 +1,7 @@ package org.perlonjava.runtime.runtimetypes; +import static org.perlonjava.runtime.runtimetypes.GlobalVariable.getGlobalVariable; + /** * The SpecialBlock class manages different types of code blocks (end, init, check, and unitcheck) * that can be saved and executed in a specific order. This class provides methods to save and run @@ -48,8 +50,17 @@ public static void saveCheckBlock(RuntimeScalar codeRef) { /** * Executes all code blocks stored in the endBlocks array in LIFO order. + * + * @param resetChildStatus if true, reset $? to 0 before running END blocks (normal exit). + * if false, preserve $? (die/exception path). */ - public static void runEndBlocks() { + public static void runEndBlocks(boolean resetChildStatus) { + if (resetChildStatus) { + // Reset $? to 0 before END blocks run (Perl semantics for normal exit) + // This ensures END blocks see $? = 0 unless they explicitly set it + getGlobalVariable("main::?").set(0); + } + while (!endBlocks.isEmpty()) { RuntimeScalar block = RuntimeArray.pop(endBlocks); if (block.getDefinedBoolean()) { @@ -58,6 +69,14 @@ public static void runEndBlocks() { } } + /** + * Executes all code blocks stored in the endBlocks array in LIFO order. + * Resets $? to 0 before running (normal exit behavior). + */ + public static void runEndBlocks() { + runEndBlocks(true); + } + /** * Executes all code blocks stored in the initBlocks array in FIFO order. */ diff --git a/src/main/perl/lib/Config.pm b/src/main/perl/lib/Config.pm index be61c83c6..4b805639c 100644 --- a/src/main/perl/lib/Config.pm +++ b/src/main/perl/lib/Config.pm @@ -175,13 +175,16 @@ $os_name =~ s/\s+/_/g; d_getprotobyname => 'define', d_getservbyname => 'define', - # Signal handling - sig_name => 'HUP INT QUIT ILL TRAP ABRT BUS FPE KILL USR1 SEGV USR2 PIPE ALRM TERM', - sig_num => '1 2 3 4 5 6 7 8 9 10 11 12 13 14 15', + # Signal handling - signal 0 is ZERO (used for process existence checks) + # Note: Signal names vary by OS. This is a common POSIX subset. + # The index in the space-separated list corresponds to the signal number. + sig_name => 'ZERO HUP INT QUIT ILL TRAP ABRT BUS FPE KILL USR1 SEGV USR2 PIPE ALRM TERM', + sig_num => '0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15', # Executable exe_ext => $os_name =~ /win/ ? '.exe' : '', _exe => $os_name =~ /win/ ? '.exe' : '', + perlpath => $^X, # Path to the perl interpreter (jperl) # Version info version => '5.42.0', diff --git a/src/main/perl/lib/IPC/System/Simple.pm b/src/main/perl/lib/IPC/System/Simple.pm new file mode 100644 index 000000000..3e38c5c39 --- /dev/null +++ b/src/main/perl/lib/IPC/System/Simple.pm @@ -0,0 +1,269 @@ +package IPC::System::Simple; + +# PerlOnJava native implementation of IPC::System::Simple +# +# This is a simplified implementation that provides core functionality +# without the Windows-specific code that causes issues in PerlOnJava. +# The original module uses constants in dead code branches that PerlOnJava +# doesn't optimize away, causing "Bareword not allowed" errors. + +use strict; +use warnings; +use Carp; +use Config; +use List::Util qw(first); +use Scalar::Util qw(tainted); + +our $VERSION = '1.30'; +our @ISA = qw(Exporter); +our @EXPORT_OK = qw( + capture capturex + run runx + system systemx + $EXITVAL EXIT_ANY +); + +our $EXITVAL = -1; + +use constant EXIT_ANY_CONST => -1; +use constant EXIT_ANY => [ EXIT_ANY_CONST ]; + +# Platform detection constants (for compatibility with tests) +use constant WINDOWS => ($^O eq 'MSWin32'); +use constant VMS => ($^O eq 'VMS'); + +# Error message templates +use constant FAIL_START => q{"%s" failed to start: "%s"}; +use constant FAIL_SIGNAL => q{"%s" died to signal "%s" (%d)%s}; +use constant FAIL_BADEXIT => q{"%s" unexpectedly returned exit value %d}; +use constant FAIL_UNDEF => q{%s called with undefined command}; +use constant FAIL_TAINT => q{%s called with tainted argument "%s"}; +use constant FAIL_TAINT_ENV => q{%s called with tainted environment $ENV{%s}}; +use constant FAIL_INTERNAL => q{IPC::System::Simple Internal error: %s}; + +# Signal name lookup +my @Signal_from_number = split(' ', $Config{sig_name}); + +# Environment variables to check for taint +my @Check_tainted_env = qw(PATH IFS CDPATH ENV BASH_ENV); + +# system simply calls run +no warnings 'once'; +*system = \&run; +*systemx = \&runx; +use warnings; + +sub run { + _check_taint(@_); + my ($valid_returns, $command, @args) = _process_args(@_); + + if (@args) { + return systemx($valid_returns, $command, @args); + } + + # Single-arg system call (uses shell) + { + no warnings 'exec'; + CORE::system($command); + } + + return _process_child_error($?, $command, $valid_returns); +} + +sub runx { + _check_taint(@_); + my ($valid_returns, $command, @args) = _process_args(@_); + + # Use indirect object syntax to NEVER invoke the shell + # system { $program } $program, @args + # This forces Perl to treat $command as a literal program name + no warnings; + CORE::system { $command } $command, @args; + + return _process_child_error($?, $command, $valid_returns); +} + +sub capture { + _check_taint(@_); + my ($valid_returns, $command, @args) = _process_args(@_); + + if (@args) { + return capturex($valid_returns, $command, @args); + } + + $EXITVAL = -1; + my $wantarray = wantarray(); + + no warnings 'exec'; + + if ($wantarray) { + my @results = qx($command); + _process_child_error($?, $command, $valid_returns); + return @results; + } + + my $results = qx($command); + _process_child_error($?, $command, $valid_returns); + return $results; +} + +sub capturex { + _check_taint(@_); + my ($valid_returns, $command, @args) = _process_args(@_); + + $EXITVAL = -1; + my $wantarray = wantarray(); + + # Use :noshell layer to bypass shell interpretation completely + # This treats the command as a literal program name, not a shell command + # For multi-arg calls, the list form already bypasses shell + my $fh; + my $mode = @args ? "-|" : "-|:noshell"; + if (!open($fh, $mode, $command, @args)) { + croak sprintf(FAIL_START, $command, $!); + } + + my @results; + my $results; + + if ($wantarray) { + @results = <$fh>; + } else { + local $/; + $results = <$fh>; + } + + close($fh); + _process_child_error($?, $command, $valid_returns); + + return $wantarray ? @results : $results; +} + +# Quote a command and its arguments for shell execution +sub _quote_command { + my ($cmd, @args) = @_; + + # Quote each argument to protect special characters + my @quoted; + for my $arg ($cmd, @args) { + # Use single quotes and escape any single quotes in the argument + my $quoted = $arg; + $quoted =~ s/'/'\\''/g; + push @quoted, "'$quoted'"; + } + + return join(' ', @quoted); +} + +sub _check_taint { + return if not ${^TAINT}; + + # Phase 1 taint mode: block ALL external commands when -T is active + # This is a minimal implementation - future phases will implement + # proper taint propagation and allow untainting via regex captures + my $caller = (caller(1))[3]; + croak("Insecure dependency in $caller while running with -T switch"); +} + +sub _process_child_error { + my ($child_error, $command, $valid_returns) = @_; + + $EXITVAL = -1; + + if ($child_error == -1) { + croak sprintf(FAIL_START, $command, $!); + } elsif (($child_error & 0x7f) == 0) { + # WIFEXITED - normal exit + $EXITVAL = ($child_error >> 8) & 0xff; # WEXITSTATUS + return _check_exit($command, $EXITVAL, $valid_returns); + } elsif (($child_error & 0x7f) > 0 && ($child_error & 0x7f) < 0x7f) { + # WIFSIGNALED - killed by signal + my $signal_no = $child_error & 0x7f; # WTERMSIG + my $signal_name = $Signal_from_number[$signal_no] || "UNKNOWN"; + my $coredump = ($child_error & 0x80) ? " and dumped core" : ""; + croak sprintf(FAIL_SIGNAL, $command, $signal_name, $signal_no, $coredump); + } + + croak "'$command' ran without exit value or signal"; +} + +sub _check_exit { + my ($command, $exitval, $valid_returns) = @_; + + # EXIT_ANY accepts any exit value + if (@$valid_returns == 1 && $valid_returns->[0] == EXIT_ANY_CONST) { + return $exitval; + } + + if (not defined first { $_ == $exitval } @$valid_returns) { + croak sprintf(FAIL_BADEXIT, $command, $exitval); + } + return $exitval; +} + +sub _process_args { + my $valid_returns = [0]; + my $caller = (caller(1))[3]; + + if (not @_) { + croak "$caller called with no arguments"; + } + + if (ref $_[0] eq "ARRAY") { + $valid_returns = shift(@_); + } + + if (not @_) { + croak "$caller called with no command"; + } + + my $command = shift(@_); + + if (not defined $command) { + croak sprintf(FAIL_UNDEF, $caller); + } + + return ($valid_returns, $command, @_); +} + +# Alias for POSIX compatibility +sub WIFEXITED { (($_[0] // 0) & 0x7f) == 0 } +sub WEXITSTATUS { (($_[0] // 0) >> 8) & 0xff } +sub WIFSIGNALED { my $s = ($_[0] // 0) & 0x7f; $s > 0 && $s < 0x7f } +sub WTERMSIG { ($_[0] // 0) & 0x7f } + +# Windows-only function - dies on non-Windows platforms +sub _spawn_or_die { + if (not WINDOWS) { + croak sprintf(FAIL_INTERNAL, "_spawn_or_die called when not under Win32"); + } + # Windows implementation would go here, but PerlOnJava on JVM + # doesn't support Windows-specific Win32::Process APIs + croak sprintf(FAIL_INTERNAL, "_spawn_or_die not implemented on this platform"); +} + +1; + +__END__ + +=head1 NAME + +IPC::System::Simple - Run commands simply, with detailed diagnostics + +=head1 SYNOPSIS + + use IPC::System::Simple qw(system capture run); + + # Run a command, die on failure + run("some_command"); + + # Capture output + my $output = capture("some_command"); + my @lines = capture("some_command"); + +=head1 DESCRIPTION + +This is a PerlOnJava-native implementation of IPC::System::Simple that +provides the core functionality without Windows-specific code. + +=cut diff --git a/src/main/perl/lib/POSIX.pm b/src/main/perl/lib/POSIX.pm index b3b8fec9c..0b13a14d0 100644 --- a/src/main/perl/lib/POSIX.pm +++ b/src/main/perl/lib/POSIX.pm @@ -15,6 +15,22 @@ use Exporter (); use XSLoader; XSLoader::load('POSIX'); +# Define O_* constants directly (same values as Fcntl.pm) +# These are needed by many modules that use POSIX +use constant O_RDONLY => 0; +use constant O_WRONLY => 1; +use constant O_RDWR => 2; +use constant O_CREAT => 0100; # 64 in decimal +use constant O_EXCL => 0200; # 128 +use constant O_NOCTTY => 0400; # 256 +use constant O_TRUNC => 01000; # 512 +use constant O_APPEND => 02000; # 1024 +use constant O_NONBLOCK => 04000; # 2048 + +# Wait constants +use constant WNOHANG => 1; +use constant WUNTRACED => 2; + # Custom import to support legacy foo_h form (without colon) # This rewrites locale_h to :locale_h, errno_h to :errno_h, etc. sub import { @@ -27,7 +43,15 @@ sub import { } # Export tags for different groups of functions/constants -our @EXPORT = (); # Default to exporting nothing +# Native Perl's POSIX exports many constants by default +# Only export constants that are actually implemented in this module +our @EXPORT = qw( + O_RDONLY O_WRONLY O_RDWR O_CREAT O_EXCL O_NOCTTY O_TRUNC O_APPEND O_NONBLOCK + WEXITSTATUS WIFEXITED WIFSIGNALED WIFSTOPPED WSTOPSIG WTERMSIG WCOREDUMP + WNOHANG WUNTRACED + SEEK_CUR SEEK_END SEEK_SET + F_OK R_OK W_OK X_OK +); our @EXPORT_OK = qw( # Process functions _exit abort access alarm chdir chmod chown close ctermid dup dup2 @@ -310,15 +334,14 @@ sub strerror { POSIX::_strerror(@_) } sub signal { POSIX::_signal(@_) } sub raise { POSIX::_raise(@_) } -# Constants - generate subs for each constant +# Constants - generate subs for each constant that has Java implementation +# Note: O_* and WNOHANG/WUNTRACED are defined with 'use constant' above for my $const (qw( EINTR ENOENT ESRCH EIO ENXIO E2BIG ENOEXEC EBADF ECHILD EAGAIN ENOMEM EACCES EFAULT ENOTBLK EBUSY EEXIST EXDEV ENODEV ENOTDIR EISDIR EINVAL ENFILE EMFILE ENOTTY ETXTBSY EFBIG ENOSPC ESPIPE EROFS EMLINK EPIPE EDOM ERANGE EPERM - O_RDONLY O_WRONLY O_RDWR O_CREAT O_EXCL O_NOCTTY O_TRUNC O_APPEND O_NONBLOCK - SEEK_SET SEEK_CUR SEEK_END F_OK R_OK W_OK X_OK @@ -326,8 +349,6 @@ for my $const (qw( SIGHUP SIGINT SIGQUIT SIGILL SIGTRAP SIGABRT SIGBUS SIGFPE SIGKILL SIGUSR1 SIGSEGV SIGUSR2 SIGPIPE SIGALRM SIGTERM SIGCHLD SIGCONT SIGSTOP SIGTSTP - - WNOHANG WUNTRACED )) { no strict 'refs'; *{$const} = eval "sub () { POSIX::_const_$const() }"; diff --git a/src/main/perl/lib/constant.pm b/src/main/perl/lib/constant.pm index adffa761f..dd86be2b2 100644 --- a/src/main/perl/lib/constant.pm +++ b/src/main/perl/lib/constant.pm @@ -1,7 +1,6 @@ package constant; use strict; -use Symbol 'qualify_to_ref'; sub import { my $class = shift; @@ -23,9 +22,10 @@ sub import { sub _define_constant { my ($package, $name, $value) = @_; - my $full_name = "${package}::$name"; - my $ref = qualify_to_ref($full_name); - *$ref = sub () { $value }; + no strict 'refs'; + # Store directly in stash as a reference - this creates a proper constant + # that RuntimeStashEntry recognizes and sets constantValue on the RuntimeCode + ${"${package}::"}{$name} = \$value; } 1; diff --git a/src/test/resources/unit/string_interpolation.t b/src/test/resources/unit/string_interpolation.t index 278cbdf67..543b3edab 100644 --- a/src/test/resources/unit/string_interpolation.t +++ b/src/test/resources/unit/string_interpolation.t @@ -59,6 +59,35 @@ subtest 'Special variables and array access' => sub { # This tests complex variable access patterns }; +subtest 'Special punctuation variable interpolation' => sub { + # Test that special punctuation variables interpolate correctly + # These were previously blocked by isNonInterpolatingCharacter + + # $? - child process status (should be 0 or empty initially) + system("true") if $^O ne 'MSWin32'; # Set $? to 0 + my $child_status = "$?"; + like($child_status, qr/^\d*$/, "\$? interpolates as numeric value"); + + # $| - autoflush + local $| = 1; + is("$|", "1", "\$| interpolates correctly"); + + # $% - page number + is("$%", "0", "\$% interpolates correctly"); + + # $\ - output record separator + local $\ = ""; + is("$\\", "", "\$\\ interpolates correctly"); + + # $( - real group ID + my $gid = "$("; + like($gid, qr/^\d+/, "\$( interpolates as numeric GID"); + + # $) - effective group ID + my $egid = "$)"; + like($egid, qr/^\d+/, "\$) interpolates as numeric EGID"); +}; + subtest 'Array reference interpolation' => sub { is("@{[123]}", "123", "Single element array ref interpolation"); is("@{[123, 456]}", "123 456", "Multiple element array ref interpolation");