Skip to content

Commit

Permalink
Fix cloud tracking (#383)
Browse files Browse the repository at this point in the history
* Removing a recently added integ test (need some refactoring to make it work)

* Added a warning to cloud name description to prevent users from modifying it

* Deleting files related to cloud instance tracking and reassignments

* [fix] Remove references of oldId and EC2FleetCloudAware class, remove tracking of cloud instance

[refactor] Added instanceId to FleetNode for clarity, added getDescriptor to return sub type

[refactor] Dont provision if Jenkins is quieting down and terminating

Fix #360
Fix #322
  • Loading branch information
pdk27 authored Jul 17, 2023
1 parent bb2c33d commit a0d67cf
Show file tree
Hide file tree
Showing 27 changed files with 311 additions and 702 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,4 @@ protected AbstractEC2FleetCloud(String name) {
public abstract boolean hasExcessCapacity();

public abstract boolean scheduleToTerminate(String instanceId, boolean ignoreMinConstraints, EC2AgentTerminationReason reason);

public abstract String getOldId();

public abstract String getFleet();
}
81 changes: 37 additions & 44 deletions src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloud.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import com.amazon.jenkins.ec2fleet.aws.RegionHelper;
import com.amazon.jenkins.ec2fleet.fleet.EC2Fleet;
import com.amazon.jenkins.ec2fleet.fleet.EC2Fleets;
import com.amazon.jenkins.ec2fleet.utils.EC2FleetCloudAwareUtils;
import com.amazonaws.services.ec2.AmazonEC2;
import com.amazonaws.services.ec2.model.Instance;
import com.amazonaws.services.ec2.model.InstanceStateName;
Expand All @@ -13,6 +12,7 @@
import hudson.Extension;
import hudson.model.Computer;
import hudson.model.Descriptor;
import hudson.model.Failure;
import hudson.model.Label;
import hudson.model.Node;
import hudson.model.Queue;
Expand Down Expand Up @@ -86,25 +86,6 @@ public class EC2FleetCloud extends AbstractEC2FleetCloud {
private static final Logger LOGGER = Logger.getLogger(EC2FleetCloud.class.getName());
private static final ScheduledExecutorService EXECUTOR = Executors.newSingleThreadScheduledExecutor();

// Counter to keep track of planned nodes per EC2FleetCloud, used in node's display name
private transient AtomicInteger plannedNodeCounter = new AtomicInteger(1);

/**
* Provide unique identifier for this instance of {@link EC2FleetCloud}, <code>transient</code>
* will not be stored. Not available for customer, instead use {@link EC2FleetCloud#name}
* will be used only during Jenkins configuration update <code>config.jelly</code>,
* when new instance of same cloud is created and we need to find old instance and
* repoint resources like {@link Computer} {@link Node} etc.
* <p>
* It's lazy to support old versions which don't have this field at all.
* <p>
* However it's stable, as soon as it will be created and called first uuid will be same
* for all future calls to the same instances of lazy uuid.
*
* @see EC2FleetCloudAware
*/
private transient LazyUuid id;

/**
* Replaced with {@link EC2FleetCloud#awsCredentialsId}
* <p>
Expand Down Expand Up @@ -178,9 +159,11 @@ public class EC2FleetCloud extends AbstractEC2FleetCloud {

private transient ArrayList<ScheduledFuture<?>> plannedNodeScheduledFutures;

// Counter to keep track of planned nodes per EC2FleetCloud, used in node's display name
private transient AtomicInteger plannedNodeCounter = new AtomicInteger(1);

@DataBoundConstructor
public EC2FleetCloud(final String name,
final String oldId,
final String awsCredentialsId,
final @Deprecated String credentialsId,
final String region,
Expand Down Expand Up @@ -238,8 +221,6 @@ public EC2FleetCloud(final String name,
if (fleet != null) {
this.stats = EC2Fleets.get(fleet).getState(
getAwsCredentialsId(), region, endpoint, getFleet());
// Reassign existing nodes/computer with new reference of cloud
EC2FleetCloudAwareUtils.reassign(fleet, this);
}
}

Expand All @@ -259,16 +240,6 @@ public String getAwsCredentialsId() {
return StringUtils.isNotBlank(awsCredentialsId) ? awsCredentialsId : credentialsId;
}

/**
* Called old as will be used by new instance of cloud, for
* which this id is old (not current)
*
* @return id of current cloud
*/
public String getOldId() {
return id.getValue();
}

public boolean isDisableTaskResubmit() {
return disableTaskResubmit;
}
Expand Down Expand Up @@ -298,7 +269,6 @@ public String getEndpoint() {
return endpoint;
}

@Override
public String getFleet() {
return fleet;
}
Expand Down Expand Up @@ -419,6 +389,15 @@ private synchronized int getNextPlannedNodeCounter() {

@Override
public synchronized Collection<NodeProvisioner.PlannedNode> provision(@Nonnull final Cloud.CloudState cloudState, final int excessWorkload) {
Jenkins jenkinsInstance = Jenkins.get();
if (jenkinsInstance.isQuietingDown()) {

Check warning on line 393 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Partially covered line

Line 393 is only partially covered, one branch is missing
LOGGER.log(Level.FINE, "Not provisioning nodes, Jenkins instance is quieting down");

Check warning on line 394 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 394 is not covered by tests
return Collections.emptyList();

Check warning on line 395 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 395 is not covered by tests
} else if (jenkinsInstance.isTerminating()) {

Check warning on line 396 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Partially covered line

Line 396 is only partially covered, one branch is missing
LOGGER.log(Level.FINE, "Not provisioning nodes, Jenkins instance is terminating");

Check warning on line 397 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 397 is not covered by tests
return Collections.emptyList();

Check warning on line 398 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 398 is not covered by tests
}

fine("excessWorkload %s", excessWorkload);

if (stats == null) {
Expand Down Expand Up @@ -469,6 +448,7 @@ public synchronized Collection<NodeProvisioner.PlannedNode> provision(@Nonnull f
// This protects us from leaving planned nodes stranded within Jenkins NodeProvisioner when the Fleet
// is updated or removed before it can scale. After scaling, EC2FleetOnlineChecker will cancel the future
// if something happens to the Fleet.
// TODO: refactor to consolidate logic with EC2FleetOnlineChecker

Check warning on line 451 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloud.java

View check run for this annotation

ci.jenkins.io / Open Tasks Scanner

TODO

NORMAL: refactor to consolidate logic with EC2FleetOnlineChecker
final ScheduledFuture<?> scheduledFuture = EXECUTOR.schedule(() -> {
if (completableFuture.isDone()) {
return;
Expand Down Expand Up @@ -644,7 +624,7 @@ public void run() {

final Set<String> jenkinsInstances = new HashSet<>();
for (final Node node : jenkins.getNodes()) {
if (node instanceof EC2FleetNode && ((EC2FleetNode) node).getCloud().getFleet().equals(fleet)) {
if (node instanceof EC2FleetNode && ((EC2FleetCloud)((EC2FleetNode) node).getCloud()).getFleet().equals(fleet)) {

Check warning on line 627 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Partially covered line

Line 627 is only partially covered, 2 branches are missing
jenkinsInstances.add(node.getNodeName());
}
}
Expand Down Expand Up @@ -776,7 +756,6 @@ public synchronized boolean scheduleToTerminate(final String instanceId, final b
@Override
public boolean canProvision(final Cloud.CloudState cloudState) {
final Label label = cloudState.getLabel();
fine("CanProvision called on fleet: \"" + this.labelString + "\" wanting: \"" + (label == null ? "(unspecified)" : label.getName()) + "\".");
if (fleet == null) {
fine("Fleet/ASG for cloud is null, returning false");
return false;
Expand All @@ -786,7 +765,7 @@ public boolean canProvision(final Cloud.CloudState cloudState) {
return false;
}
if (label != null && !Label.parse(this.labelString).containsAll(label.listAtoms())) {
fine("Label '%s' not found within Fleet's labels '%s', returning false", label, this.labelString);
finer("Label '%s' not found within Fleet's labels '%s', returning false", label, this.labelString);
return false;
}
return true;
Expand All @@ -798,8 +777,6 @@ private Object readResolve() {
}

private void init() {
id = new LazyUuid();

plannedNodesCache = new HashSet<>();
instanceIdsToTerminate = new HashMap<>();
plannedNodeScheduledFutures = new ArrayList<>();
Expand Down Expand Up @@ -864,7 +841,7 @@ private void addNewAgent(final AmazonEC2 ec2, final Instance instance, FleetStat
final Node.Mode nodeMode = restrictUsage ? Node.Mode.EXCLUSIVE : Node.Mode.NORMAL;
final EC2FleetNode node = new EC2FleetNode(instanceId, "Fleet agent for " + instanceId,
effectiveFsRoot, effectiveNumExecutors, nodeMode, labelString, new ArrayList<NodeProperty<?>>(),
this, computerLauncher, maxTotalUses);
this.name, computerLauncher, maxTotalUses);

// Initialize our retention strategy
node.setRetentionStrategy(new EC2RetentionStrategy());
Expand Down Expand Up @@ -905,7 +882,7 @@ private int getCurrentSpareInstanceCount(final FleetStateStats currentState, fin
}

// Do not count computer if it is not a part of the given fleet
if (!Objects.equals(((EC2FleetNodeComputer) computer).getCloud().getFleet(), currentState.getFleetId())) {
if (!Objects.equals(((EC2FleetCloud)((EC2FleetNodeComputer) computer).getCloud()).getFleet(), currentState.getFleetId())) {

Check warning on line 885 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Partially covered line

Line 885 is only partially covered, 2 branches are missing
continue;
}
currentBusyInstances++;
Expand All @@ -928,6 +905,10 @@ private void fine(final String msg, final Object... args) {
LOGGER.fine(getLogPrefix() + String.format(msg, args));
}

private void finer(final String msg, final Object... args) {
LOGGER.finer(getLogPrefix() + String.format(msg, args));
}

private void warning(final String msg, final Object... args) {
LOGGER.warning(getLogPrefix() + String.format(msg, args));
}
Expand All @@ -936,6 +917,11 @@ private void warning(final Throwable t, final String msg, final Object... args)
LOGGER.log(Level.WARNING, getLogPrefix() + String.format(msg, args), t);
}

@Override
public DescriptorImpl getDescriptor() {
return (DescriptorImpl) super.getDescriptor();
}

@Extension
@SuppressWarnings("unused")
public static class DescriptorImpl extends Descriptor<Cloud> {
Expand Down Expand Up @@ -972,6 +958,15 @@ public FormValidation doCheckMaxTotalUses(@QueryParameter String value) {
return FormValidation.error("Maximum Total Uses must be greater or equal to -1");
}

public FormValidation doCheckCloudName(@QueryParameter final String name) {
try {
Jenkins.checkGoodName(name);

Check warning on line 963 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 963 is not covered by tests
} catch (Failure e) {

Check warning on line 964 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 964 is not covered by tests
return FormValidation.error(e.getMessage());

Check warning on line 965 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 965 is not covered by tests
}

Check warning on line 966 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 966 is not covered by tests
return FormValidation.ok();

Check warning on line 967 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 967 is not covered by tests
}

public ListBoxModel doFillFleetItems(@QueryParameter final boolean showAllFleets,
@QueryParameter final String region,
@QueryParameter final String endpoint,
Expand All @@ -985,7 +980,7 @@ public ListBoxModel doFillFleetItems(@QueryParameter final boolean showAllFleets
awsCredentialsId, region, endpoint, model, fleet, showAllFleets);
}
} catch (final Exception ex) {
LOGGER.log(Level.WARNING, String.format("Cannot describe fleets in %s or by endpoint %s", region, endpoint), ex);
LOGGER.log(Level.WARNING, String.format("Cannot describe fleets in '%s' or by endpoint '%s'", region, endpoint), ex);
return model;
}

Expand Down Expand Up @@ -1026,7 +1021,5 @@ public boolean configure(final StaplerRequest req, final JSONObject formData) th
save();
return super.configure(req, formData);
}

}

}
23 changes: 0 additions & 23 deletions src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloudAware.java

This file was deleted.

58 changes: 15 additions & 43 deletions src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetLabelCloud.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import com.amazon.jenkins.ec2fleet.aws.RegionHelper;
import com.amazon.jenkins.ec2fleet.fleet.EC2Fleets;
import com.amazon.jenkins.ec2fleet.fleet.EC2SpotFleet;
import com.amazon.jenkins.ec2fleet.utils.EC2FleetCloudAwareUtils;
import com.amazonaws.services.cloudformation.AmazonCloudFormation;
import com.amazonaws.services.cloudformation.model.StackStatus;
import com.amazonaws.services.ec2.AmazonEC2;
Expand All @@ -16,7 +15,6 @@
import com.cloudbees.jenkins.plugins.awscredentials.AWSCredentialsHelper;
import hudson.Extension;
import hudson.model.AbstractProject;
import hudson.model.Computer;
import hudson.model.Descriptor;
import hudson.model.Item;
import hudson.model.Label;
Expand Down Expand Up @@ -75,22 +73,6 @@ public class EC2FleetLabelCloud extends AbstractEC2FleetCloud {
private static final SimpleFormatter sf = new SimpleFormatter();
private static final Logger LOGGER = Logger.getLogger(EC2FleetLabelCloud.class.getName());

/**
* Provide unique identifier for this instance of {@link EC2FleetLabelCloud}, <code>transient</code>
* will not be stored. Not available for customer, instead use {@link EC2FleetLabelCloud#name}
* will be used only during Jenkins configuration update <code>config.jelly</code>,
* when new instance of same cloud is created and we need to find old instance and
* repoint resources like {@link Computer} {@link Node} etc.
* <p>
* It's lazy to support old versions which don't have this field at all.
* <p>
* However it's stable, as soon as it will be created and called first uuid will be same
* for all future calls to the same instances of lazy uuid.
*
* @see EC2FleetCloudAware
*/
private transient LazyUuid id;

private final String awsCredentialsId;
private final String region;
private final String endpoint;
Expand Down Expand Up @@ -123,7 +105,6 @@ public class EC2FleetLabelCloud extends AbstractEC2FleetCloud {

@DataBoundConstructor
public EC2FleetLabelCloud(final String name,
final String oldId,
final String awsCredentialsId,
final String region,
final String endpoint,
Expand Down Expand Up @@ -162,12 +143,6 @@ public EC2FleetLabelCloud(final String name,
this.cloudStatusIntervalSec = cloudStatusIntervalSec;
this.noDelayProvision = noDelayProvision;
this.ec2KeyPairName = ec2KeyPairName;

if (StringUtils.isNotEmpty(oldId)) {
// existent cloud was modified, let's re-assign all dependencies of old cloud instance
// to new one
EC2FleetCloudAwareUtils.reassign(oldId, this);
}
}

public String getEc2KeyPairName() {
Expand All @@ -182,22 +157,6 @@ public String getAwsCredentialsId() {
return awsCredentialsId;
}

/**
* Called old as will be used by new instance of cloud, for
* which this id is old (not current)
*
* @return id of current cloud
*/
public String getOldId() {
return id.getValue();
}

@Override
public String getFleet() {
// TODO: We need a way to map existing node/computer's cloud reference rather than relying on oldId
return null;
}

public boolean isDisableTaskResubmit() {
return disableTaskResubmit;
}
Expand Down Expand Up @@ -295,6 +254,15 @@ public synchronized boolean hasExcessCapacity() {

@Override
public synchronized Collection<NodeProvisioner.PlannedNode> provision(@Nonnull final Cloud.CloudState cloudState, int excessWorkload) {
Jenkins jenkinsInstance = Jenkins.get();
if (jenkinsInstance.isQuietingDown()) {

Check warning on line 258 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetLabelCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Partially covered line

Line 258 is only partially covered, one branch is missing
LOGGER.log(Level.FINE, "Not provisioning nodes, Jenkins instance is quieting down");

Check warning on line 259 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetLabelCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 259 is not covered by tests
return Collections.emptyList();

Check warning on line 260 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetLabelCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 260 is not covered by tests
} else if (jenkinsInstance.isTerminating()) {

Check warning on line 261 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetLabelCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Partially covered line

Line 261 is only partially covered, one branch is missing
LOGGER.log(Level.FINE, "Not provisioning nodes, Jenkins instance is terminating");

Check warning on line 262 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetLabelCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 262 is not covered by tests
return Collections.emptyList();

Check warning on line 263 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetLabelCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 263 is not covered by tests
}

info("excessWorkload %s", excessWorkload);

final Label label = cloudState.getLabel();
Expand Down Expand Up @@ -608,7 +576,6 @@ private Object readResolve() {
}

private void init() {
id = new LazyUuid();
states = new HashMap<>();
}

Expand Down Expand Up @@ -653,7 +620,7 @@ private void addNewAgent(
//TODO: Add maxTotalUses to EC2FleetLabelCloud similar to EC2FleetCloud

Check warning on line 620 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetLabelCloud.java

View check run for this annotation

ci.jenkins.io / Open Tasks Scanner

TODO

NORMAL: Add maxTotalUses to EC2FleetLabelCloud similar to EC2FleetCloud
final EC2FleetNode node = new EC2FleetNode(instanceId, "Fleet agent for " + instanceId,
effectiveFsRoot, effectiveNumExecutors, nodeMode, labelString, new ArrayList<NodeProperty<?>>(),
this, computerLauncher, -1);
this.name, computerLauncher, -1);

// Initialize our retention strategy
node.setRetentionStrategy(new EC2RetentionStrategy());
Expand Down Expand Up @@ -812,6 +779,11 @@ public void run() {
}
}

@Override
public EC2FleetLabelCloud.DescriptorImpl getDescriptor() {
return (EC2FleetLabelCloud.DescriptorImpl) super.getDescriptor();

Check warning on line 784 in src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetLabelCloud.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 784 is not covered by tests
}

@Extension
@SuppressWarnings("unused")
public static class DescriptorImpl extends Descriptor<Cloud> {
Expand Down
Loading

0 comments on commit a0d67cf

Please sign in to comment.