From d81a980face7f7ede5add9b1a8d7b9b481167779 Mon Sep 17 00:00:00 2001 From: Bertrand Martin Date: Thu, 30 Apr 2026 17:19:09 +0200 Subject: [PATCH] Refactor persistent globals to live in RuntimeStack --- README.md | 1 + src/main/java/io/jawk/backend/AVM.java | 480 ++++++++++++++++-- .../java/io/jawk/backend/RuntimeStack.java | 88 +++- src/site/markdown/java-advanced.md | 15 + src/test/java/io/jawk/AwkTest.java | 199 ++++++++ 5 files changed, 740 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 439711bf..1123cf61 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ String result = awk.script("{ print toupper($0) }").input("hello world").execute ``` When writing custom extensions, annotate associative array parameters with `@JawkAssocArray` and declare them as `Map` values rather than concrete map implementations. +If you embed Jawk through [`AVM`](https://jawk.io/apidocs/io/jawk/backend/AVM.html), use `executePersistingGlobals(...)` when you want user-defined globals to survive across sequential runs on the same runtime instance. ## Documentation diff --git a/src/main/java/io/jawk/backend/AVM.java b/src/main/java/io/jawk/backend/AVM.java index f9538311..09a0a35f 100644 --- a/src/main/java/io/jawk/backend/AVM.java +++ b/src/main/java/io/jawk/backend/AVM.java @@ -25,6 +25,7 @@ import java.io.Closeable; import java.io.IOException; import java.io.PrintStream; +import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.Objects; import java.util.ArrayDeque; @@ -133,6 +134,7 @@ private void push(Object o) { private boolean inputSourceFilelistAssignmentsApplied; private InputSource resolvedInputSource; private AwkExpression installedEvalExpression; + private boolean mergedGlobalLayoutActive; /** * Construct the interpreter. @@ -335,9 +337,7 @@ public void execute( AwkProgram compiledProgram = Objects.requireNonNull(program, "program"); InputSource resolvedSource = Objects.requireNonNull(inputSource, "inputSource"); resetRuntimeState(runtimeArguments, variableOverrides); - globalVariableOffsets = compiledProgram.getGlobalVariableOffsetMap(); - globalVariableArrays = compiledProgram.getGlobalVariableAarrayMap(); - functionNames = compiledProgram.getFunctionNameSet(); + installProgramMetadata(compiledProgram); jrt.prepareForExecution(settings.getFieldSeparator(), settings.getDefaultRS()); if (!executionSpecialVariables.isEmpty()) { @@ -347,6 +347,94 @@ public void execute( executeTuples(compiledProgram.top()); } + /** + * Executes a compiled AWK program while persisting user-defined global + * variables across repeated executions on this AVM instance. + *

+ * Before the new program starts, this method imports any user-defined + * globals currently materialized in the AVM and remaps them onto the + * incoming program's compiled global slots. + * + * @param program compiled program to execute + * @param inputSource input source providing records + * @throws ExitException when the program terminates via {@code exit} + * @throws IOException if execution fails + */ + public void executePersistingGlobals(AwkProgram program, InputSource inputSource) + throws ExitException, + IOException { + executePersistingGlobals(program, inputSource, Collections.emptyList(), null); + } + + /** + * Executes a compiled AWK program while persisting user-defined global + * variables across repeated executions on this AVM instance. + *

+ * Before the new program starts, this method imports any user-defined + * globals currently materialized in the AVM and remaps them onto the + * incoming program's compiled global slots. + * + * @param program compiled program to execute + * @param inputSource input source providing records + * @param runtimeArguments name=value or filename entries from the command line + * @throws ExitException when the program terminates via {@code exit} + * @throws IOException if execution fails + */ + public void executePersistingGlobals( + AwkProgram program, + InputSource inputSource, + List runtimeArguments) + throws ExitException, + IOException { + executePersistingGlobals(program, inputSource, runtimeArguments, null); + } + + /** + * Executes a compiled AWK program while persisting user-defined global + * variables across repeated executions on this AVM instance. + *

+ * Before the new program starts, this method imports any user-defined + * globals currently materialized in the AVM and remaps them onto the + * incoming program's compiled global slots. + * + * @param program compiled program to execute + * @param inputSource input source providing records + * @param runtimeArguments name=value or filename entries from the command line + * @param variableOverrides additional variable assignments applied on top of + * the settings-level variables (may be {@code null}) + * @throws ExitException when the program terminates via {@code exit} + * @throws IOException if execution fails + */ + public void executePersistingGlobals( + AwkProgram program, + InputSource inputSource, + List runtimeArguments, + Map variableOverrides) + throws ExitException, + IOException { + AwkProgram compiledProgram = Objects.requireNonNull(program, "program"); + InputSource resolvedSource = Objects.requireNonNull(inputSource, "inputSource"); + mergeRuntimeState(runtimeArguments, variableOverrides, compiledProgram); + + jrt.prepareForExecution(settings.getFieldSeparator(), settings.getDefaultRS()); + if (!executionSpecialVariables.isEmpty()) { + jrt.applySpecialVariables(executionSpecialVariables); + } + rebindResolvedInputSource(resolvedSource); + executeTuples(compiledProgram.top()); + } + + /** + * Clears the user-defined globals retained in the current runtime stack. + *

+ * The next {@link #executePersistingGlobals(AwkProgram, InputSource, List, Map)} + * call will therefore start from an empty persistent global bank. + */ + public void clearPersistentGlobals() { + runtimeStack.clearGlobals(); + mergedGlobalLayoutActive = false; + } + private void initExtensions() { if (extensionInstances.isEmpty()) { return; @@ -459,6 +547,11 @@ private boolean prepareForEval( } private void resetRuntimeState(List runtimeArguments, Map variableOverrides) { + resetTransientRuntimeState(runtimeArguments, variableOverrides); + runtimeStack.clearGlobals(); + } + + private void resetTransientRuntimeState(List runtimeArguments, Map variableOverrides) { // Reset the AVM-owned state that must not leak across executions. operandStack.clear(); environOffset = NULL_OFFSET; @@ -475,27 +568,12 @@ private void resetRuntimeState(List runtimeArguments, Map(runtimeArguments) : Collections.emptyList(); - - if (variableOverrides == null || variableOverrides.isEmpty()) { - executionInitialVariables = baseInitialVariables; - executionSpecialVariables = baseSpecialVariables; - } else { - executionInitialVariables = new HashMap(baseInitialVariables); - executionInitialVariables.putAll(variableOverrides); - - Map specialOverrides = JRT.copySpecialVariables(variableOverrides); - if (specialOverrides.isEmpty()) { - executionSpecialVariables = baseSpecialVariables; - } else { - executionSpecialVariables = new HashMap(baseSpecialVariables); - executionSpecialVariables.putAll(specialOverrides); - } - } + prepareExecutionInputs(runtimeArguments, variableOverrides); } private void installExpressionMetadata(AwkExpression compiledExpression) { @@ -508,6 +586,12 @@ private void installExpressionMetadata(AwkExpression compiledExpression) { installedEvalExpression = compiledExpression; } + private void installProgramMetadata(AwkProgram compiledProgram) { + globalVariableOffsets = compiledProgram.getGlobalVariableOffsetMap(); + globalVariableArrays = compiledProgram.getGlobalVariableAarrayMap(); + functionNames = compiledProgram.getFunctionNameSet(); + } + private void rebindResolvedInputSource(InputSource resolvedSource) { InputSource previousResolvedSource = resolvedInputSource; if (previousResolvedSource != null && previousResolvedSource != resolvedSource) { @@ -524,6 +608,315 @@ private boolean hasCompatibleEvalGlobalLayout(long numGlobals) { && Objects.equals(initializedEvalGlobalVariableArrays, globalVariableArrays); } + /** + * Resets transient execution state, installs the new program metadata, and + * merges the previously retained user globals into the new compiled global + * layout. + * + * @param runtimeArguments name=value or filename entries for this execution + * @param variableOverrides per-call variable overrides for this execution + * @param compiledProgram program whose global layout should become active + */ + private void mergeRuntimeState( + List runtimeArguments, + Map variableOverrides, + AwkProgram compiledProgram) { + Map carriedGlobals = collectPersistentGlobalValues(); + resetTransientRuntimeState(runtimeArguments, variableOverrides); + installProgramMetadata(compiledProgram); + + Map basePersistentSeeds = collectBasePersistentGlobalSeeds(); + Map executionUserSeeds = collectExecutionUserGlobalSeeds(runtimeArguments, variableOverrides); + List mergedGlobalNamesByOffset = buildMergedGlobalNamesByOffset( + carriedGlobals, + basePersistentSeeds, + executionUserSeeds); + + runtimeStack.rebindGlobals(mergedGlobalNamesByOffset); + restoreNamedGlobals(carriedGlobals); + seedMissingNamedGlobals(carriedGlobals, basePersistentSeeds); + applyNamedGlobalOverrides(executionUserSeeds); + mergedGlobalLayoutActive = true; + } + + /** + * Returns whether the current runtime stack already contains a merged global + * layout for the compiled program about to execute. + *

+ * Persistent execution may append previously retained globals after the + * compiled globals of the incoming program. The tuple stream only dereferences + * the prefix defined by {@code SET_NUM_GLOBALS}, so appended globals are valid + * as long as the compiled prefix still matches name-for-name and offset-for-offset. + * + * @param numGlobals number of globals compiled into the active program + * @return {@code true} when the merged layout is compatible with the active + * program + */ + private boolean hasCompatiblePersistentGlobalLayout(long numGlobals) { + Object[] globals = runtimeStack.getNumGlobals(); + if (!mergedGlobalLayoutActive + || globals == null + || globalVariableOffsets == null + || globals.length < numGlobals) { + return false; + } + for (Map.Entry entry : globalVariableOffsets.entrySet()) { + int offset = entry.getValue().intValue(); + if (offset < 0 || offset >= globals.length || !entry.getKey().equals(runtimeStack.getGlobalName(offset))) { + return false; + } + } + return true; + } + + /** + * Prepares the per-execution runtime arguments and variable overrides. + *

+ * Base settings-level variables remain the default source. Per-call overrides + * are layered on top without mutating the base snapshot held by this AVM. + * + * @param runtimeArguments name=value or filename entries for this execution + * @param variableOverrides per-call variable overrides for this execution + */ + private void prepareExecutionInputs( + List runtimeArguments, + Map variableOverrides) { + this.arguments = runtimeArguments != null ? new ArrayList<>(runtimeArguments) : Collections.emptyList(); + + if (variableOverrides == null || variableOverrides.isEmpty()) { + executionInitialVariables = baseInitialVariables; + executionSpecialVariables = baseSpecialVariables; + } else { + executionInitialVariables = new HashMap(baseInitialVariables); + executionInitialVariables.putAll(variableOverrides); + + Map specialOverrides = JRT.copySpecialVariables(variableOverrides); + if (specialOverrides.isEmpty()) { + executionSpecialVariables = baseSpecialVariables; + } else { + executionSpecialVariables = new HashMap(baseSpecialVariables); + executionSpecialVariables.putAll(specialOverrides); + } + } + } + + /** + * Collects the current user-defined globals retained in the runtime stack. + * + * @return retained user globals keyed by name, in current runtime order + */ + private Map collectPersistentGlobalValues() { + Map retainedGlobals = new LinkedHashMap(); + for (Map.Entry entry : runtimeStack.snapshotGlobalVariables().entrySet()) { + if (isPersistentEligibleGlobal(entry.getKey())) { + retainedGlobals.put(entry.getKey(), entry.getValue()); + } + } + return retainedGlobals; + } + + /** + * Collects the AVM-wide baseline variables that should seed persistent + * globals when no retained value exists yet for the same name. + * + * @return baseline user globals keyed by name + */ + private Map collectBasePersistentGlobalSeeds() { + Map basePersistentSeeds = new LinkedHashMap(); + for (Map.Entry entry : baseInitialVariables.entrySet()) { + String name = entry.getKey(); + if (isPersistentEligibleGlobal(name)) { + validateSeededGlobal(name, entry.getValue()); + basePersistentSeeds.put(name, entry.getValue()); + } + } + return basePersistentSeeds; + } + + /** + * Collects the user-defined variables that should override the retained + * global bank for the current persistent execution. + *

+ * Only user globals are included here. JRT-managed special variables still + * flow through the normal execution setup. + * + * @param runtimeArguments name=value or filename entries for this execution + * @param variableOverrides per-call variable overrides for this execution + * @return insertion-ordered overriding seed values keyed by variable name + */ + private Map collectExecutionUserGlobalSeeds( + List runtimeArguments, + Map variableOverrides) { + Map executionUserSeeds = new LinkedHashMap(); + if (variableOverrides != null) { + for (Map.Entry entry : variableOverrides.entrySet()) { + String name = entry.getKey(); + if (isPersistentEligibleGlobal(name)) { + validateSeededGlobal(name, entry.getValue()); + executionUserSeeds.put(name, entry.getValue()); + } + } + } + for (String argument : runtimeArguments != null ? runtimeArguments : Collections.emptyList()) { + if (argument.indexOf('=') <= 0) { + continue; + } + NameValueAssignment assignment = parseNameValueAssignment(argument); + if (isPersistentEligibleGlobal(assignment.name)) { + validateSeededGlobal(assignment.name, assignment.value); + executionUserSeeds.put(assignment.name, assignment.value); + } + } + return executionUserSeeds; + } + + /** + * Builds the slot order for the next persistent execution. + *

+ * The compiled globals are always installed first in their compiled offset + * order. Retained globals and seeded user globals that are not compiled by + * the incoming program are appended afterwards so future runs can still reuse + * them without changing the current program's compiled offsets. + * + * @param carriedGlobals retained user globals from the previous execution + * @param basePersistentSeeds baseline user globals coming from the AVM settings + * @param executionUserSeeds per-call user overrides for this execution + * @return merged slot-to-name layout for the next persistent run + */ + private List buildMergedGlobalNamesByOffset( + Map carriedGlobals, + Map basePersistentSeeds, + Map executionUserSeeds) { + LinkedHashSet orderedNames = new LinkedHashSet(); + List> compiledGlobals = new ArrayList>( + globalVariableOffsets.entrySet()); + Collections.sort(compiledGlobals, (left, right) -> left.getValue().compareTo(right.getValue())); + for (Map.Entry entry : compiledGlobals) { + orderedNames.add(entry.getKey()); + } + for (String name : carriedGlobals.keySet()) { + orderedNames.add(name); + } + for (String name : basePersistentSeeds.keySet()) { + orderedNames.add(name); + } + for (String name : executionUserSeeds.keySet()) { + orderedNames.add(name); + } + return new ArrayList(orderedNames); + } + + /** + * Restores retained global values into the runtime stack after the merged + * layout has been installed. + * + * @param carriedGlobals retained user globals from the previous execution + */ + private void restoreNamedGlobals(Map carriedGlobals) { + for (Map.Entry entry : carriedGlobals.entrySet()) { + runtimeStack.setGlobalVariable(entry.getKey(), entry.getValue()); + } + } + + /** + * Applies baseline user globals when no retained value exists yet for the + * same name. + * + * @param carriedGlobals retained user globals from the previous execution + * @param basePersistentSeeds baseline user globals coming from the AVM settings + */ + private void seedMissingNamedGlobals( + Map carriedGlobals, + Map basePersistentSeeds) { + for (Map.Entry entry : basePersistentSeeds.entrySet()) { + if (!carriedGlobals.containsKey(entry.getKey())) { + runtimeStack.setGlobalVariable(entry.getKey(), entry.getValue()); + } + } + } + + /** + * Applies per-execution user overrides on top of the merged runtime globals. + * + * @param executionUserSeeds per-call user overrides for this execution + */ + private void applyNamedGlobalOverrides(Map executionUserSeeds) { + for (Map.Entry entry : executionUserSeeds.entrySet()) { + runtimeStack.setGlobalVariable(entry.getKey(), entry.getValue()); + } + } + + /** + * Returns whether the given global name should participate in persistent + * memory. + * + * @param name global variable name + * @return {@code true} when the variable should persist across runs + */ + private boolean isPersistentEligibleGlobal(String name) { + return name != null + && !JRT.isJrtManagedSpecialVariable(name) + && !"ARGV".equals(name) + && !"ARGC".equals(name) + && !"ENVIRON".equals(name) + && !"RSTART".equals(name) + && !"RLENGTH".equals(name) + && !"IGNORECASE".equals(name); + } + + /** + * Validates that a seeded global value is compatible with the compiled + * metadata of the current program. + * + * @param name variable name to validate + * @param value proposed seeded value + */ + private void validateSeededGlobal(String name, Object value) { + if (functionNames.contains(name)) { + throw new IllegalArgumentException("Cannot assign a scalar to a function name (" + name + ")."); + } + Boolean arrayObj = globalVariableArrays.get(name); + if (Boolean.TRUE.equals(arrayObj) && !(value instanceof Map)) { + throw new IllegalArgumentException("Cannot assign a scalar to a non-scalar variable (" + name + ")."); + } + } + + /** + * Parses a runtime {@code name=value} assignment. + * + * @param nameValue raw assignment text + * @return parsed assignment + */ + private NameValueAssignment parseNameValueAssignment(String nameValue) { + int eqIdx = nameValue.indexOf('='); + if (eqIdx == 0) { + throw new IllegalArgumentException( + "Must have a non-blank variable name in a name=value variable assignment argument."); + } + String name = nameValue.substring(0, eqIdx); + String value = nameValue.substring(eqIdx + 1); + return new NameValueAssignment(name, coerceVariableAssignmentValue(value)); + } + + /** + * Coerces a runtime assignment value using the same scalar rules as the + * existing command-line handling: integer first, then double, then string. + * + * @param value raw text to coerce + * @return coerced scalar value + */ + private Object coerceVariableAssignmentValue(String value) { + try { + return Integer.parseInt(value); + } catch (NumberFormatException nfe) { + try { + return Double.parseDouble(value); + } catch (NumberFormatException nfe2) { + return value; + } + } + } + /** * Executes the tuple stream after the runtime has been fully prepared. * @@ -1943,8 +2336,13 @@ private void executeTuples(PositionTracker position) case SET_NUM_GLOBALS: { // arg[0] = # of globals Object[] globals = runtimeStack.getNumGlobals(); - if (globals == null) { - runtimeStack.setNumGlobals(position.intArg(0)); + if (mergedGlobalLayoutActive) { + if (!hasCompatiblePersistentGlobalLayout(position.intArg(0))) { + throw new IllegalStateException( + "AVM globals are already initialized for an incompatible persistent layout."); + } + } else if (globals == null) { + runtimeStack.setNumGlobals(position.intArg(0), globalVariableOffsets); initializedEvalGlobalVariableOffsets = globalVariableOffsets; initializedEvalGlobalVariableArrays = globalVariableArrays; @@ -2618,24 +3016,9 @@ public final Object getSUBSEP() { */ @SuppressWarnings("unused") private void setFilelistVariable(String nameValue) { - int eqIdx = nameValue.indexOf('='); - // variable name should be non-blank - if (eqIdx == 0) { - throw new IllegalArgumentException( - "Must have a non-blank variable name in a name=value variable assignment argument."); - } - String name = nameValue.substring(0, eqIdx); - String value = nameValue.substring(eqIdx + 1); - Object obj; - try { - obj = Integer.parseInt(value); - } catch (NumberFormatException nfe) { - try { - obj = Double.parseDouble(value); - } catch (NumberFormatException nfe2) { - obj = value; - } - } + NameValueAssignment assignment = parseNameValueAssignment(nameValue); + String name = assignment.name; + Object obj = assignment.value; // make sure we're not receiving funcname=value assignments if (functionNames.contains(name)) { @@ -2651,8 +3034,9 @@ private void setFilelistVariable(String nameValue) { } else { runtimeStack.setFilelistVariable(offsetObj.intValue(), obj); } + } else if (runtimeStack.hasGlobalVariable(name)) { + runtimeStack.setGlobalVariable(name, obj); } - // otherwise, do nothing } /** {@inheritDoc} */ @@ -2682,6 +3066,8 @@ public final void assignVariable(String name, Object obj) { } else { runtimeStack.setFilelistVariable(offsetObj.intValue(), obj); } + } else if (runtimeStack.hasGlobalVariable(name)) { + runtimeStack.setGlobalVariable(name, obj); } } @@ -2847,6 +3233,16 @@ private Map ensureArrayInArray(Map map, Object k private static final UninitializedObject BLANK = new UninitializedObject(); + private static final class NameValueAssignment { + private final String name; + private final Object value; + + private NameValueAssignment(String name, Object value) { + this.name = name; + this.value = value; + } + } + private static final class SingleRecordInputSource implements InputSource { private final String record; diff --git a/src/main/java/io/jawk/backend/RuntimeStack.java b/src/main/java/io/jawk/backend/RuntimeStack.java index f45dfa6c..a881c816 100644 --- a/src/main/java/io/jawk/backend/RuntimeStack.java +++ b/src/main/java/io/jawk/backend/RuntimeStack.java @@ -23,8 +23,13 @@ */ import java.util.ArrayDeque; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Deque; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; import io.jawk.intermediate.UninitializedObject; /** @@ -36,6 +41,8 @@ class RuntimeStack { private static final Object[] NULL_LOCALS_SENTINEL = new Object[0]; private Object[] globals = null; + private List globalNamesByOffset = Collections.emptyList(); + private Map globalOffsetsByName = Collections.emptyMap(); private Object[] locals = null; private Deque localsStack = new ArrayDeque(); private Deque returnIndexes = new ArrayDeque(); @@ -43,6 +50,7 @@ class RuntimeStack { @SuppressWarnings("unused") public void dump() { System.out.println("globals = " + Arrays.toString(globals)); + System.out.println("globalNamesByOffset = " + globalNamesByOffset); System.out.println("locals = " + Arrays.toString(locals)); System.out.println("localsStack = " + localsStack); System.out.println("returnIndexes = " + returnIndexes); @@ -53,7 +61,17 @@ Object[] getNumGlobals() { } void reset() { + clearGlobals(); + resetTransientState(); + } + + void clearGlobals() { globals = null; + globalNamesByOffset = Collections.emptyList(); + globalOffsetsByName = Collections.emptyMap(); + } + + void resetTransientState() { locals = null; localsStack.clear(); returnIndexes.clear(); @@ -61,11 +79,12 @@ void reset() { } /** Must be one of the first methods executed. */ - void setNumGlobals(long l) { + void setNumGlobals(long l, Map offsetsByName) { globals = new Object[(int) l]; for (int i = 0; i < l; i++) { globals[i] = null; } + setGlobalLayoutMetadata((int) l, offsetsByName); // must accept multiple executions // expandFrameIfNecessary(num_globals); } @@ -84,6 +103,48 @@ void setNumGlobals(long l) { * } */ + void rebindGlobals(List namesByOffset) { + globals = new Object[namesByOffset.size()]; + for (int i = 0; i < globals.length; i++) { + globals[i] = null; + } + setGlobalLayoutMetadata(namesByOffset); + } + + Map snapshotGlobalVariables() { + Map snapshot = new LinkedHashMap(); + if (globals == null) { + return snapshot; + } + for (int i = 0; i < globals.length && i < globalNamesByOffset.size(); i++) { + String name = globalNamesByOffset.get(i); + if (name != null) { + snapshot.put(name, globals[i]); + } + } + return snapshot; + } + + boolean hasGlobalVariable(String name) { + return globalOffsetsByName.containsKey(name); + } + + Object getGlobalVariable(String name) { + Integer offset = globalOffsetsByName.get(name); + return offset == null ? null : globals[offset.intValue()]; + } + + void setGlobalVariable(String name, Object value) { + Integer offset = globalOffsetsByName.get(name); + if (offset != null) { + globals[offset.intValue()] = value; + } + } + + String getGlobalName(int offset) { + return offset >= 0 && offset < globalNamesByOffset.size() ? globalNamesByOffset.get(offset) : null; + } + Object getVariable(long offset, boolean isGlobal) { if (isGlobal) { return globals[(int) offset]; @@ -152,4 +213,29 @@ Object getReturnValue() { returnValue = null; return retval; } + + private void setGlobalLayoutMetadata(int numGlobals, Map offsetsByName) { + List namesByOffset = new ArrayList(Collections.nCopies(numGlobals, (String) null)); + if (offsetsByName != null) { + for (Map.Entry entry : offsetsByName.entrySet()) { + int offset = entry.getValue().intValue(); + if (offset >= 0 && offset < numGlobals) { + namesByOffset.set(offset, entry.getKey()); + } + } + } + setGlobalLayoutMetadata(namesByOffset); + } + + private void setGlobalLayoutMetadata(List namesByOffset) { + globalNamesByOffset = new ArrayList(namesByOffset); + Map offsetsByName = new LinkedHashMap(); + for (int i = 0; i < namesByOffset.size(); i++) { + String name = namesByOffset.get(i); + if (name != null) { + offsetsByName.put(name, Integer.valueOf(i)); + } + } + globalOffsetsByName = offsetsByName; + } } diff --git a/src/site/markdown/java-advanced.md b/src/site/markdown/java-advanced.md index 7d6a03b7..744ddc22 100644 --- a/src/site/markdown/java-advanced.md +++ b/src/site/markdown/java-advanced.md @@ -52,6 +52,21 @@ try (AVM avm = awk.createAvm()) { Each `execute(...)` resets the AWK execution state before the program starts again, but it still reuses the same interpreter instance and runtime infrastructure. +When you want user-defined globals to survive into a later program run on the same runtime, use `executePersistingGlobals(...)` for the run where you want the remapping to happen: + +```java +Awk awk = new Awk(); +AwkProgram first = awk.compile("BEGIN { total = 5 }"); +AwkProgram second = awk.compile("BEGIN { print total }"); + +try (AVM avm = awk.createAvm()) { + avm.execute(first, firstSource); + avm.executePersistingGlobals(second, secondSource); +} +``` + +`executePersistingGlobals(...)` first imports the user-defined globals currently materialized in that `AVM`, then remaps them onto the next program's compiled global slots. The persistent memory lives only for that `AVM` instance. Built-in runtime variables such as `NR`, `NF`, `FS`, and `RS` still reset between runs. + ## Why Stateful Eval Is Powerful and Dangerous Raw repeated eval against one runtime is intentionally stateful: diff --git a/src/test/java/io/jawk/AwkTest.java b/src/test/java/io/jawk/AwkTest.java index 79047407..5f671c37 100644 --- a/src/test/java/io/jawk/AwkTest.java +++ b/src/test/java/io/jawk/AwkTest.java @@ -45,6 +45,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.junit.Test; @@ -1437,6 +1438,204 @@ public void sandboxAllowsReadingArgv() throws Exception { .runAndAssert(); } + @Test + public void executePersistingGlobalsSharesUserGlobalsAcrossPrograms() throws Exception { + Awk awk = new Awk(); + AwkProgram first = awk.compile("BEGIN { A = 1; B = 2; C = 3; print A, B, C }"); + AwkProgram second = awk.compile("BEGIN { D = C + 1; C = C + 10; print C, D }"); + AwkProgram third = awk.compile("BEGIN { print A, B, C, D }"); + + try (AVM avm = awk.createAvm()) { + assertEquals("1 2 3\n", executePersistent(avm, first)); + assertEquals("13 4\n", executePersistent(avm, second)); + assertEquals("1 2 13 4\n", executePersistent(avm, third)); + } + } + + @Test + public void executePersistingGlobalsRetainsGlobalsAcrossRepeatedRunsOfSameProgram() throws Exception { + AwkProgram program = AWK.compile("BEGIN { count++; print count }"); + + try (AVM avm = AWK.createAvm()) { + assertEquals("1\n", executePersistent(avm, program)); + assertEquals("2\n", executePersistent(avm, program)); + } + } + + @Test + public void existingExecuteDoesNotPersistGlobals() throws Exception { + AwkProgram assign = AWK.compile("BEGIN { A = 1 }"); + AwkProgram read = AWK.compile("BEGIN { print A }"); + + try (AVM avm = AWK.createAvm()) { + assertEquals("", executeNormally(avm, assign)); + assertEquals("\n", executeNormally(avm, read)); + } + } + + @Test + public void executePersistingGlobalsImportsGlobalsFromPreviousExecute() throws Exception { + AwkProgram first = AWK.compile("BEGIN { A = 1; B = 2; C = 3 }"); + AwkProgram second = AWK.compile("BEGIN { D = C + 1; C = C + 10 }"); + AwkProgram third = AWK.compile("BEGIN { print A, B, C, D }"); + + try (AVM avm = AWK.createAvm()) { + assertEquals("", executeNormally(avm, first)); + assertEquals("", executePersistent(avm, second)); + assertEquals("1 2 13 4\n", executePersistent(avm, third)); + } + } + + @Test + public void executePersistingGlobalsPersistsSeededCompiledAndUncompiledUserGlobals() throws Exception { + AwkProgram seedNothing = AWK.compile("BEGIN { }"); + AwkProgram readValues = AWK.compile("BEGIN { print x, y }"); + Map overrides = new LinkedHashMap(); + overrides.put("x", Long.valueOf(4L)); + + try (AVM avm = AWK.createAvm()) { + assertEquals( + "", + executePersistent(avm, seedNothing, Collections.singletonList("y=5"), overrides, emptyInputSource())); + assertEquals("4 5\n", executePersistent(avm, readValues)); + } + } + + @Test + public void executePersistingGlobalsDoesNotPersistBuiltInsAndKeepsArraysOnlyForGlobals() throws Exception { + AwkProgram writeBuiltIn = AWK.compile("BEGIN { NR = 99; print NR }"); + AwkProgram readBuiltIn = AWK.compile("BEGIN { print NR }"); + AwkProgram seedArrayAndLocal = AWK + .compile( + "function seed(tmp) { tmp = 7; arr[\"x\"] = 9 } BEGIN { seed() }"); + AwkProgram readArrayAndLocal = AWK.compile("BEGIN { print arr[\"x\"]; print tmp }"); + + try (AVM avm = AWK.createAvm()) { + assertEquals("99\n", executePersistent(avm, writeBuiltIn)); + assertEquals("0\n", executePersistent(avm, readBuiltIn)); + assertEquals("", executePersistent(avm, seedArrayAndLocal)); + assertEquals("9\n\n", executePersistent(avm, readArrayAndLocal)); + } + } + + @Test + public void executePersistingGlobalsPersistsMutationsBeforeExitAndIoFailure() throws Exception { + AwkProgram exitProgram = AWK.compile("BEGIN { A = 3; exit 7 }"); + AwkProgram increment = AWK.compile("{ A++ }"); + AwkProgram readA = AWK.compile("BEGIN { print A }"); + + try (AVM avm = AWK.createAvm()) { + avm.setAwkSink(new AppendableAwkSink(new StringBuilder(), java.util.Locale.US)); + ExitException exit = assertThrows( + ExitException.class, + () -> avm.executePersistingGlobals(exitProgram, emptyInputSource())); + assertEquals(7, exit.getCode()); + assertEquals("3\n", executePersistent(avm, readA)); + + avm.clearPersistentGlobals(); + avm.setAwkSink(new AppendableAwkSink(new StringBuilder(), java.util.Locale.US)); + IOException io = assertThrows( + IOException.class, + () -> avm.executePersistingGlobals(increment, new ThrowingAfterFirstRecordInputSource())); + assertEquals("boom", io.getMessage()); + assertEquals("1\n", executePersistent(avm, readA)); + } + } + + @Test + public void clearPersistentGlobalsRemovesOnlyPersistentUserGlobals() throws Exception { + AwkProgram assign = AWK.compile("BEGIN { A = 1 }"); + AwkProgram read = AWK.compile("BEGIN { print A }"); + + try (AVM avm = AWK.createAvm()) { + assertEquals("", executePersistent(avm, assign)); + avm.clearPersistentGlobals(); + assertEquals("\n", executePersistent(avm, read)); + assertEquals("\n", executeNormally(avm, read)); + } + } + + private static String executePersistent(AVM avm, AwkProgram program) throws Exception { + return executePersistent( + avm, + program, + Collections.emptyList(), + Collections.emptyMap(), + emptyInputSource()); + } + + private static String executePersistent( + AVM avm, + AwkProgram program, + List arguments, + Map variableOverrides, + InputSource inputSource) + throws Exception { + StringBuilder out = new StringBuilder(); + avm.setAwkSink(new AppendableAwkSink(out, java.util.Locale.US)); + avm.executePersistingGlobals(program, inputSource, arguments, variableOverrides); + return out.toString(); + } + + private static String executeNormally(AVM avm, AwkProgram program) throws Exception { + StringBuilder out = new StringBuilder(); + avm.setAwkSink(new AppendableAwkSink(out, java.util.Locale.US)); + avm.execute(program, emptyInputSource(), Collections.emptyList(), null); + return out.toString(); + } + + private static InputSource emptyInputSource() { + return new InputSource() { + @Override + public boolean nextRecord() { + return false; + } + + @Override + public String getRecordText() { + return null; + } + + @Override + public List getFields() { + return null; + } + + @Override + public boolean isFromFilenameList() { + return false; + } + }; + } + + private static final class ThrowingAfterFirstRecordInputSource implements InputSource { + private boolean firstRecordAvailable = true; + + @Override + public boolean nextRecord() throws IOException { + if (firstRecordAvailable) { + firstRecordAvailable = false; + return true; + } + throw new IOException("boom"); + } + + @Override + public String getRecordText() { + return "row"; + } + + @Override + public List getFields() { + return null; + } + + @Override + public boolean isFromFilenameList() { + return false; + } + } + private static final class StructuredOutputSink extends AwkSink { private final List> printedValues = new ArrayList<>();