Skip to content

Commit

Permalink
Fix unable create node above max after failure during deployment (#289)
Browse files Browse the repository at this point in the history
Co-authored-by: Georges M. Zwingelstein <gmzwingelstein@rockwellautomation.com>
Co-authored-by: Tim Jacomb <21194782+timja@users.noreply.github.com>
  • Loading branch information
3 people committed Jun 17, 2021
1 parent 5877a4d commit b41dc00
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 31 deletions.
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ To use this plugin to create VM agents, first you need to have an Azure Service
## Configure the Plugin

### Add a New Azure VM Agents Cloud
1. Within the Jenkins dashboard, click Manage Jenkins -> Configure System -> Scroll to the bottom of the page
and find the section with the dropdown "Add a new cloud" -> click on it and select "Azure VM Agents"
1. Click Manage Jenkins -> Manage Nodes and Clouds -> in the left pane, click "Configure Clouds".
2. Provide a name for the cloud (plugin will generate one for you if you leave it empty, but it's recommended to give it a meaningful name).
3. Select an existing account from the Azure Credentials dropdown or add new "Microsoft Azure Service Principal" credentials in the Credentials Management page by filling out the Subscription ID, Client ID, Client Secret and the OAuth 2.0 Token Endpoint.
4. Click on “Verify configuration” to make sure that the profile configuration is done correctly.
Expand Down Expand Up @@ -228,4 +227,4 @@ def myCloud = new AzureVMCloudBuilder()
.build()
Jenkins.getInstance().clouds.add(myCloud)
```
This sample only contains a few arguments of builder, please find all the arguments in folder [builders](src/main/java/com/microsoft/azure/vmagent/builders).
This sample only contains a few arguments of builder, please find all the arguments in folder [builders](src/main/java/com/microsoft/azure/vmagent/builders).
45 changes: 25 additions & 20 deletions src/main/java/com/microsoft/azure/vmagent/AzureVMCloud.java
Original file line number Diff line number Diff line change
Expand Up @@ -542,8 +542,8 @@ public AzureVMAgent createProvisionedAgent(
String deploymentName) throws AzureCloudException {

LOGGER.log(Level.INFO,
"AzureVMCloud: createProvisionedAgent: Waiting for deployment {0} to be completed",
deploymentName);
"AzureVMCloud: createProvisionedAgent: Waiting for deployment {0} with VM {1} to be completed",
new Object[]{deploymentName, vmName});

final int sleepTimeInSeconds = 30;
final int timeoutInSeconds = getDeploymentTimeout();
Expand Down Expand Up @@ -689,8 +689,7 @@ public Collection<PlannedNode> provision(Label label, int workLoad) {
getServiceDelegate().setVirtualMachineDetails(
agentNode, template);
Jenkins.getInstance().addNode(agentNode);
if (agentNode.getAgentLaunchMethod()
.equalsIgnoreCase("SSH")) {
if (agentNode.getAgentLaunchMethod().equalsIgnoreCase("SSH")) {
retrySshConnect(azureComputer);
} else { // Wait until node is online
waitUntilJNLPNodeIsOnline(agentNode);
Expand Down Expand Up @@ -738,16 +737,17 @@ public Collection<PlannedNode> provision(Label label, int workLoad) {
int adjustedNumberOfAgents = adjustVirtualMachineCount(numberOfAgents);
if (adjustedNumberOfAgents == 0) {
LOGGER.log(Level.INFO,
"Not able to create any new nodes, at or above maximum VM count of {0}",
getMaxVirtualMachinesLimit());
"Not able to create {0} nodes, at or above maximum VM count of {1} and already {2} VM(s)",
new Object[]{numberOfAgents, getMaxVirtualMachinesLimit(),
getApproximateVirtualMachineCount()});
return plannedNodes;
} else if (adjustedNumberOfAgents < numberOfAgents) {
LOGGER.log(Level.INFO,
"Able to create new nodes, but can only create {0} (desired {1})",
new Object[]{adjustedNumberOfAgents, numberOfAgents});
}
doProvision(adjustedNumberOfAgents, plannedNodes, template);
// wait for deployment completion ant than check for created nodes
// wait for deployment completion and then check for created nodes
} catch (Exception e) {
LOGGER.log(
Level.SEVERE,
Expand Down Expand Up @@ -810,6 +810,7 @@ public Node call() throws AzureCloudException {
try {
info = deploymentFuture.get();
} catch (InterruptedException | ExecutionException e) {
handleFailure(template, null, e, FailureStage.DEPLOYMENT);
throw AzureCloudException.create(e);
}

Expand Down Expand Up @@ -896,17 +897,19 @@ private void handleFailure(
String vmName,
Exception e,
FailureStage stage) {
// Attempt to terminate whatever was created
try {
getServiceDelegate().terminateVirtualMachine(
vmName,
template.getResourceGroupName());
} catch (AzureCloudException terminateEx) {
LOGGER.log(
Level.SEVERE,
String.format("Failure terminating previous failed agent '%s'", vmName),
terminateEx);
// Do not throw to avoid it being recorded
// Attempt to terminate whatever was created if any
if (vmName != null) {
try {
getServiceDelegate().terminateVirtualMachine(
vmName,
template.getResourceGroupName());
} catch (AzureCloudException terminateEx) {
LOGGER.log(
Level.SEVERE,
String.format("Failure terminating previous failed agent '%s'", vmName),
terminateEx);
// Do not throw to avoid it being recorded
}
}
template.retrieveAzureCloudReference().adjustVirtualMachineCount(-1);
// Update the template status given this new issue.
Expand Down Expand Up @@ -964,9 +967,11 @@ public String call() {
// 30 minutes is decent time for the node to be alive
final int timeoutInMinutes = 30;
String result = future.get(timeoutInMinutes, TimeUnit.MINUTES);
LOGGER.log(Level.INFO, "Azure Cloud: waitUntilOnline: node is alive , result {0}", result);
LOGGER.log(Level.INFO, "Azure Cloud: waitUntilOnline: node {0} is alive, result {1}",
new Object[]{agent.getDisplayName(), result});
} catch (Exception ex) {
throw AzureCloudException.create("Azure Cloud: waitUntilOnline: Failure waiting till online", ex);
throw AzureCloudException.create(String.format("Azure Cloud: waitUntilOnline: "
+ "Failure waiting {0} till online", agent.getDisplayName()), ex);
} finally {
future.cancel(true);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ public AzureVMDeploymentInfo createDeployment(
String scriptUri = null;
try {
LOGGER.log(Level.INFO,
"AzureVMManagementServiceDelegate: createDeployment: Initializing deployment for agentTemplate {0}",
template.getTemplateName());
"AzureVMManagementServiceDelegate: createDeployment: Initializing deployment for {0} agentTemplate(s) {1}",
new Object[]{numberOfAgents, template.getTemplateName()});

Map<String, Object> properties = AzureVMAgentTemplate.getTemplateProperties(template);

Expand All @@ -230,8 +230,8 @@ public AzureVMDeploymentInfo createDeployment(
}
LOGGER.log(Level.INFO,
"AzureVMManagementServiceDelegate: createDeployment:"
+ " Creating a new deployment {0} with VM base name {1}",
new Object[]{deploymentName, vmBaseName});
+ " Creating a new deployment {0} with VM base name {1} for {2} VM(s)",
new Object[]{deploymentName, vmBaseName, numberOfAgents});
final String resourceGroupName = template.getResourceGroupName();
final String resourceGroupReferenceType = template.getResourceGroupReferenceType();

Expand Down Expand Up @@ -587,7 +587,11 @@ public AzureVMDeploymentInfo createDeployment(
.beginCreate();
return new AzureVMDeploymentInfo(deploymentName, vmBaseName, numberOfAgents);
} catch (Exception e) {
LOGGER.log(Level.SEVERE, "AzureVMManagementServiceDelegate: deployment: Unable to deploy", e);
LOGGER.log(Level.SEVERE,
String.format(
"AzureVMManagementServiceDelegate: deployment: Unable to deploy %d %s",
numberOfAgents, template.getTemplateName()),
e);
// Pass the info off to the template so that it can be queued for update.
template.handleTemplateProvisioningFailure(e.getMessage(), FailureStage.PROVISIONING);
try {
Expand Down Expand Up @@ -906,7 +910,7 @@ public void setVirtualMachineDetails(
azureAgent.setPrivateIP(privateIP);

LOGGER.log(Level.INFO, "Azure agent details:\n"
+ "nodeName{0}\n"
+ "nodeName={0}\n"
+ "adminUserName={1}\n"
+ "shutdownOnIdle={2}\n"
+ "retentionTimeInMin={3}\n"
Expand Down Expand Up @@ -2427,7 +2431,7 @@ private void createAzureResourceGroup(
} catch (Exception e) {
throw AzureCloudException.create(
String.format(
" Failed to create resource group with group name %s, location %s",
"Failed to create resource group with group name %s, location %s",
resourceGroupName, locationName),
e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
public enum FailureStage {

VALIDATION,
PREPROVISIONING,
DEPLOYMENT,
PROVISIONING,
POSTPROVISIONING

Expand Down

0 comments on commit b41dc00

Please sign in to comment.