Skip to content
Permalink
Browse files

[FIXED JENKINS-27565] Refactor the Queue and Nodes to use a consisten…

…t locking strategy

The test system I set up to verify resolution of customer(s)' issues driving this change, required
additional changes in order to fully resolve the issues at hand. As a result I am bundling these
changes:

- Moves nodes to being store in separate config files outside of the main config file (improves performance) [FIXED JENKINS-27562]
- Makes the Jenkins is loading screen not block on the extensions loading lock [FIXED JENKINS-27563]
- Removes race condition rendering the list of executors [FIXED JENKINS-27564] [FIXED JENKINS-15355]
- Tidy up the locks that were causing deadlocks with the once retention strategy in durable tasks [FIXED JENKINS-27476]
- Remove any requirement from Jenkins Core to lock on the Queue when rendering the Jenkins UI [FIXED-JENKINS-27566]
  • Loading branch information...
stephenc committed Mar 23, 2015
1 parent e9bf4b7 commit 92147c3597308bc05e6448ccc41409fcc7c05fd7
@@ -28,6 +28,7 @@
import hudson.cli.CLICommand;
import hudson.console.ConsoleAnnotationDescriptor;
import hudson.console.ConsoleAnnotatorFactory;
import hudson.init.InitMilestone;
import hudson.model.AbstractProject;
import hudson.model.Action;
import hudson.model.Describable;
@@ -201,7 +202,12 @@ public static String xsDate(Calendar cal) {
public static String rfc822Date(Calendar cal) {
return Util.RFC822_DATETIME_FORMATTER.format(cal.getTime());
}


public static boolean isExtensionsAvailable() {
final Jenkins jenkins = Jenkins.getInstance();
return jenkins == null ? false : jenkins.getInitLevel().compareTo(InitMilestone.EXTENSIONS_AUGMENTED) >= 0;
}

public static void initPageVariables(JellyContext context) {
StaplerRequest currentRequest = Stapler.getCurrentRequest();
String rootURL = currentRequest.getContextPath();
@@ -48,8 +48,6 @@

private static final Logger LOGGER = Logger.getLogger(AbstractCIBase.class.getName());

private final transient Object updateComputerLock = new Object();

/**
* If you are calling this on Hudson something is wrong.
*
@@ -137,15 +135,20 @@ private void updateComputer(Node n, Map<String,Computer> byNameMap, Set<Computer
used.add(c);
}

/*package*/ void removeComputer(Computer computer) {
Map<Node,Computer> computers = getComputerMap();
for (Map.Entry<Node, Computer> e : computers.entrySet()) {
if (e.getValue() == computer) {
computers.remove(e.getKey());
computer.onRemoved();
return;
/*package*/ void removeComputer(final Computer computer) {
Queue.withLock(new Runnable() {
@Override
public void run() {
Map<Node,Computer> computers = getComputerMap();
for (Map.Entry<Node, Computer> e : computers.entrySet()) {
if (e.getValue() == computer) {
computers.remove(e.getKey());
computer.onRemoved();
return;
}
}
}
}
});
}

/*package*/ @CheckForNull Computer getComputer(Node n) {
@@ -160,37 +163,40 @@ private void updateComputer(Node n, Map<String,Computer> byNameMap, Set<Computer
* This method tries to reuse existing {@link Computer} objects
* so that we won't upset {@link Executor}s running in it.
*/
protected void updateComputerList(boolean automaticSlaveLaunch) throws IOException {
Map<Node,Computer> computers = getComputerMap();
synchronized(updateComputerLock) {// just so that we don't have two code updating computer list at the same time
Map<String,Computer> byName = new HashMap<String,Computer>();
for (Computer c : computers.values()) {
Node node = c.getNode();
if (node == null)
continue; // this computer is gone
byName.put(node.getNodeName(),c);
}
protected void updateComputerList(final boolean automaticSlaveLaunch) {
final Map<Node,Computer> computers = getComputerMap();
Queue.withLock(new Runnable() {
@Override
public void run() {
Map<String,Computer> byName = new HashMap<String,Computer>();
for (Computer c : computers.values()) {
Node node = c.getNode();
if (node == null)
continue; // this computer is gone
byName.put(node.getNodeName(),c);
}

final Set<Computer> old = new HashSet<Computer>(computers.values());
Set<Computer> used = new HashSet<Computer>();
final Set<Computer> old = new HashSet<Computer>(computers.values());
Set<Computer> used = new HashSet<Computer>();

updateComputer(this, byName, used, automaticSlaveLaunch);
for (Node s : getNodes()) {
long start = System.currentTimeMillis();
updateComputer(s, byName, used, automaticSlaveLaunch);
if(LOG_STARTUP_PERFORMANCE)
LOGGER.info(String.format("Took %dms to update node %s",
System.currentTimeMillis()-start, s.getNodeName()));
}
updateComputer(AbstractCIBase.this, byName, used, automaticSlaveLaunch);
for (Node s : getNodes()) {
long start = System.currentTimeMillis();
updateComputer(s, byName, used, automaticSlaveLaunch);
if(LOG_STARTUP_PERFORMANCE)
LOGGER.info(String.format("Took %dms to update node %s",
System.currentTimeMillis()-start, s.getNodeName()));
}

// find out what computers are removed, and kill off all executors.
// when all executors exit, it will be removed from the computers map.
// so don't remove too quickly
old.removeAll(used);
for (Computer c : old) {
killComputer(c);
// find out what computers are removed, and kill off all executors.
// when all executors exit, it will be removed from the computers map.
// so don't remove too quickly
old.removeAll(used);
for (Computer c : old) {
killComputer(c);
}
}
}
});
getQueue().scheduleMaintenance();
for (ComputerListener cl : ComputerListener.all())
cl.onConfigurationChange();
@@ -176,6 +176,21 @@

protected final Object statusChangeLock = new Object();

private transient final List<RuntimeException> terminatedBy = Collections.synchronizedList(new ArrayList
<RuntimeException>());

public void recordTermination() {
try {
throw new RuntimeException(String.format("Termination requested by %s", Thread.currentThread()));
} catch (RuntimeException e) {
terminatedBy.add(e);
}
}

public List<RuntimeException> getTerminatedBy() {
return new ArrayList<RuntimeException>(terminatedBy);
}

public Computer(Node node) {
setNode(node);
}
@@ -404,6 +419,7 @@ public final long getConnectTime() {
* @since 1.320
*/
public Future<?> disconnect(OfflineCause cause) {
recordTermination();
offlineCause = cause;
if (Util.isOverridden(Computer.class,getClass(),"disconnect"))
return disconnect(); // legacy subtypes that extend disconnect().
@@ -419,6 +435,7 @@ public final long getConnectTime() {
* Use {@link #disconnect(OfflineCause)} and specify the cause.
*/
public Future<?> disconnect() {
recordTermination();
if (Util.isOverridden(Computer.class,getClass(),"disconnect",OfflineCause.class))
// if the subtype already derives disconnect(OfflineCause), delegate to it
return disconnect(null);
@@ -808,6 +825,21 @@ public final int countExecutors() {
return new ArrayList<OneOffExecutor>(oneOffExecutors);
}

public List<DisplayExecutor> getDisplayExecutors() {
List<DisplayExecutor> result = new ArrayList<DisplayExecutor>(executors.size()+oneOffExecutors.size());
int index = 0;
for (Executor e: executors) {
result.add(new DisplayExecutor(Integer.toString(index+1), String.format("executors/%d", index), e));
index++;
}
index = 0;
for (OneOffExecutor e: oneOffExecutors) {
result.add(new DisplayExecutor("", String.format("oneOffExecutors/%d", index), e));
index++;
}
return result;
}

/**
* Returns true if all the executors of this computer are idle.
*/
@@ -867,14 +899,21 @@ public final long getDemandStartMilliseconds() {
/**
* Called by {@link Executor} to kill excessive executors from this computer.
*/
/*package*/ synchronized void removeExecutor(Executor e) {
executors.remove(e);
addNewExecutorIfNecessary();
if(!isAlive())
{
AbstractCIBase ciBase = Jenkins.getInstance();
ciBase.removeComputer(this);
}
/*package*/ void removeExecutor(final Executor e) {
Queue.withLock(new Runnable() {
@Override
public void run() {
synchronized (Computer.this) {
executors.remove(e);
addNewExecutorIfNecessary();
if(!isAlive())
{
AbstractCIBase ciBase = Jenkins.getInstance();
ciBase.removeComputer(Computer.this);
}
}
}
});
}

/**
@@ -1413,6 +1452,71 @@ public boolean accept(File dir, String name) {
}
}

public static class DisplayExecutor implements ModelObject {

@Nonnull
private final String displayName;
@Nonnull
private final String url;
@Nonnull
private final Executor executor;

public DisplayExecutor(@Nonnull String displayName, @Nonnull String url, @Nonnull Executor executor) {
this.displayName = displayName;
this.url = url;
this.executor = executor;
}

@Override
@Nonnull
public String getDisplayName() {
return displayName;
}

@Nonnull
public String getUrl() {
return url;
}

@Nonnull
public Executor getExecutor() {
return executor;
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("DisplayExecutor{");
sb.append("displayName='").append(displayName).append('\'');
sb.append(", url='").append(url).append('\'');
sb.append(", executor=").append(executor);
sb.append('}');
return sb.toString();
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}

DisplayExecutor that = (DisplayExecutor) o;

if (!executor.equals(that.executor)) {
return false;
}

return true;
}

@Override
public int hashCode() {
return executor.hashCode();
}
}

public static final PermissionGroup PERMISSIONS = new PermissionGroup(Computer.class,Messages._Computer_Permissions_Title());
public static final Permission CONFIGURE = new Permission(PERMISSIONS,"Configure", Messages._Computer_ConfigurePermission_Description(), Permission.CONFIGURE, PermissionScope.COMPUTER);
/**

0 comments on commit 92147c3

Please sign in to comment.
You can’t perform that action at this time.