Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[JENKINS-53358] Remove bogus persistence calls due to notifyListeners #234

Draft
wants to merge 10 commits into
base: master
Choose a base branch
from
9 changes: 5 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,12 @@
<properties>
<revision>2.54</revision>
<changelist>-SNAPSHOT</changelist>
<jenkins.version>2.62</jenkins.version>
<jenkins.version>2.73.3</jenkins.version>
<java.level>8</java.level>
<no-test-jar>false</no-test-jar>
<git-plugin.version>3.1.0</git-plugin.version>
<workflow-support-plugin.version>2.17</workflow-support-plugin.version>
<scm-api-plugin.version>2.0.8</scm-api-plugin.version>
<workflow-support-plugin.version>2.20</workflow-support-plugin.version>
<scm-api-plugin.version>2.2.6</scm-api-plugin.version>
<groovy-cps.version>1.24</groovy-cps.version>
<jenkins-test-harness.version>2.37</jenkins-test-harness.version>
</properties>
Expand Down Expand Up @@ -143,7 +143,8 @@
<dependency>
<groupId>org.jenkins-ci.plugins.workflow</groupId>
<artifactId>workflow-job</artifactId>
<version>2.21</version>
<!--<version>2.23</version> -->
<version>2.25-rc775.35b435d87ee7</version>
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only until I cut the release, then it will be converted to the final release version and this will be cleaned up

<scope>test</scope>
</dependency>
<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1441,7 +1441,11 @@ void notifyListeners(List<FlowNode> nodes, boolean synchronous) {
}
} finally {
if (synchronous) {
bc.abort(); // hack to skip save—we are holding a lock
bc.abort(); //// hack to skip save—we are holding a lock
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the introduced //?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to see if people are paying attention

} else if (!(getDurabilityHint().isPersistWithEveryStep()) && !(nodes.stream().anyMatch(x->x instanceof FlowEndNode))) {
// We do not want to invoke gratuitous save calls if we aren't set to persist with every step
// Except that we MUST trigger a persistence call at the very end of the execution
bc.abort();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We aren't worried about whether this causes JENKINS-50199 to happen more often because even if it does, it should only matter when reloading configuration from disk, and that case is now handled by jenkinsci/workflow-job-plugin#104, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dwnusbaum Correct -- and if the cause of JENKINS-50199 is still triggered, then that's a bug somewhere else -- if we were relying on this to trigger persistence, that's a clear bug.

} else {
try {
bc.commit();
Expand Down Expand Up @@ -1493,10 +1497,6 @@ public boolean isPaused() {
return false;
}

private void setPersistedClean(boolean persistedClean) { // Workaround for some issues with anonymous classes.
this.persistedClean = persistedClean;
}

/**
* Pause or unpause the execution.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -367,10 +367,6 @@ private boolean run() {
}
}

if (changed && !stillRunnable) {
execution.persistedClean = null;
saveProgramIfPossible(false);
}
if (ending) {
execution.cleanUpHeap();
if (scripts != null) {
Expand All @@ -382,6 +378,9 @@ private boolean run() {
} catch (IOException x) {
LOGGER.log(Level.WARNING, "Failed to delete program.dat in " + execution, x);
}
} else if (changed && !stillRunnable) {
execution.persistedClean = null;
saveProgramIfPossible(false);
}

return stillRunnable;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import java.lang.reflect.Field;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Random;
Expand Down Expand Up @@ -507,7 +508,7 @@ public void evaluate() throws Throwable {
/** Verify that our flag for whether or not a build was cleanly persisted gets reset when things happen.
*/
@Test
public void testDurableAgainstCleanRestartResetsCleanlyPersistedFlag() throws Exception {
public void testNondurableResetsCleanlyPersistedFlag() throws Exception {
final String jobName = "durableAgainstClean";
story.addStep(new Statement() {
@Override
Expand All @@ -531,18 +532,24 @@ public void evaluate() throws Throwable {
WorkflowRun run = story.j.jenkins.getItemByFullName(jobName, WorkflowJob.class).getLastBuild();
assert run.isBuilding();
assert run.getResult() != Result.FAILURE;
Thread.sleep(35000); // Step completes
Thread.sleep(35000); // First sleep completes
if (run.getExecution() instanceof CpsFlowExecution) {
CpsFlowExecution exec = (CpsFlowExecution)run.getExecution();
assert exec.persistedClean == null;
}
assert exec.persistedClean == null || exec.persistedClean == false;
} // Flow still in running though, so shouldn't persist cleanly
}
});
story.addStep(new Statement() {
@Override
public void evaluate() throws Throwable {
Thread.sleep(2000L); // Just to allow time for basic async processes to finish.
verifyFailedCleanly(story.j.jenkins, story.j.jenkins.getItemByFullName(jobName, WorkflowJob.class).getLastBuild());
WorkflowRun run = story.j.jenkins.getItemByFullName(jobName, WorkflowJob.class).getLastBuild();
CpsFlowExecution exec = (CpsFlowExecution)(run.getExecution());
// Assert.assertTrue(exec.isComplete());
// Assert.assertThat(exec.getCurrentHeads().get(0), Matchers.instanceOf(FlowEndNode.class));
// Assert.assertEquals(Result.FAILURE, run.getResult());

verifyFailedCleanly(story.j.jenkins, run);
}
});
}
Expand Down Expand Up @@ -847,7 +854,7 @@ private WorkflowRun runFuzzerJob(JenkinsRule jrule, String jobName, FlowDurabili
* May fail rarely due to files being copied in a different order than they are modified as part of simulating a dirty restart.
* See {@link RestartableJenkinsRule#simulateAbruptShutdown()} for why that copying happens. */
@Test
@Ignore //Too long to run as part of main suite
//Too long to run as part of main suite
@TimedRepeatRule.RepeatForTime(repeatMillis = 150_000)
public void fuzzTimingDurable() throws Exception {
final String jobName = "NestedParallelDurableJob";
Expand All @@ -860,6 +867,7 @@ public void fuzzTimingDurable() throws Exception {
@Override
public void evaluate() throws Throwable {
WorkflowRun run = runFuzzerJob(story.j, jobName, FlowDurabilityHint.MAX_SURVIVABILITY);
buildNumber[0] = run.getNumber();
logStart[0] = JenkinsRule.getLog(run);
nodesOut.clear();
nodesOut.addAll(new DepthFirstScanner().allNodes(run.getExecution()));
Expand All @@ -872,10 +880,13 @@ public void evaluate() throws Throwable {
story.addStep(new Statement() {
@Override
public void evaluate() throws Throwable {
WorkflowRun run = story.j.jenkins.getItemByFullName(jobName, WorkflowJob.class).getLastBuild();
if (run == null) { // Build killed so early the Run did not get to persist
WorkflowJob j = story.j.jenkins.getItemByFullName(jobName, WorkflowJob.class);
WorkflowRun run;
if (j == null || j.getBuildByNumber(buildNumber[0]) == null) {
// Issues with test framework or build killed so early Run not created
return;
}
run = j.getBuildByNumber(buildNumber[0]);
if (run.getExecution() != null) {
Assert.assertEquals(FlowDurabilityHint.MAX_SURVIVABILITY, run.getExecution().getDurabilityHint());
}
Expand All @@ -895,11 +906,12 @@ public void evaluate() throws Throwable {
* May fail rarely due to files being copied in a different order than they are modified as part of simulating a dirty restart.
* See {@link RestartableJenkinsRule#simulateAbruptShutdown()} for why that copying happens. */
@Test
@Ignore //Too long to run as part of main suite
//Too long to run as part of main suite
@TimedRepeatRule.RepeatForTime(repeatMillis = 150_000)
public void fuzzTimingNonDurableWithDirtyRestart() throws Exception {
final String jobName = "NestedParallelDurableJob";
final String[] logStart = new String[1];
final int[] buildNum = new int[1];

// Create thread that eventually interrupts Jenkins with a hard shutdown at a random time interval
story.addStepWithDirtyShutdown(new Statement() {
Expand All @@ -915,12 +927,13 @@ public void evaluate() throws Throwable {
} else {
Assert.assertEquals(Boolean.TRUE, ((CpsFlowExecution)run.getExecution()).persistedClean);
}
buildNum[0] = run.getNumber();
}
});
story.addStep(new Statement() {
@Override
public void evaluate() throws Throwable {
WorkflowRun run = story.j.jenkins.getItemByFullName(jobName, WorkflowJob.class).getLastBuild();
WorkflowRun run = story.j.jenkins.getItemByFullName(jobName, WorkflowJob.class).getBuildByNumber(buildNum[0]);
if (run == null) { // Build killed so early the Run did not get to persist
return;
}
Expand Down Expand Up @@ -953,19 +966,21 @@ public void evaluate() throws Throwable {

/** Test interrupting build by randomly restarting *cleanly* at unpredictable times and verify we stick pick up and resume. */
@Test
@Ignore //Too long to run as part of main suite
//Too long to run as part of main suite
@TimedRepeatRule.RepeatForTime(repeatMillis = 150_000)
public void fuzzTimingNonDurableWithCleanRestart() throws Exception {

final String jobName = "NestedParallelDurableJob";
final String[] logStart = new String[1];
final List<FlowNode> nodesOut = new ArrayList<FlowNode>();
final int[] buildNum = new int[1];

// Create thread that eventually interrupts Jenkins with a hard shutdown at a random time interval
story.addStep(new Statement() {
@Override
public void evaluate() throws Throwable {
WorkflowRun run = runFuzzerJob(story.j, jobName, FlowDurabilityHint.PERFORMANCE_OPTIMIZED);
buildNum[0] = run.getNumber();
logStart[0] = JenkinsRule.getLog(run);
if (run.getExecution() != null) {
Assert.assertEquals(FlowDurabilityHint.PERFORMANCE_OPTIMIZED, run.getExecution().getDurabilityHint());
Expand All @@ -978,7 +993,7 @@ public void evaluate() throws Throwable {
story.addStep(new Statement() {
@Override
public void evaluate() throws Throwable {
WorkflowRun run = story.j.jenkins.getItemByFullName(jobName, WorkflowJob.class).getLastBuild();
WorkflowRun run = story.j.jenkins.getItemByFullName(jobName, WorkflowJob.class).getBuildByNumber(buildNum[0]);
if (run == null) { // Build killed so early the Run did not get to persist
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ static void assertCompletedCleanly(WorkflowRun run) throws Exception {
Assert.assertTrue(cpsExec.getCurrentHeads().get(0) instanceof FlowEndNode);
Assert.assertTrue(cpsExec.startNodes == null || cpsExec.startNodes.isEmpty());
Assert.assertFalse(cpsExec.blocksRestart());
Assert.assertEquals(Boolean.TRUE, cpsExec.persistedClean);
} else {
System.out.println("WARNING: no FlowExecutionForBuild");
}
Expand Down