Skip to content
Permalink
Browse files

Tag deployed resources and remove them if they leak [FIXED JENKINS-41…

…330]
  • Loading branch information
clguiman authored and arroyc committed Feb 7, 2017
1 parent ec4d96c commit 2f70f8df6d40f3d400d4b560569982eb51e612a4
@@ -15,6 +15,7 @@
*/
package com.microsoft.azure.vmagent;

import com.microsoft.azure.PagedList;
import com.microsoft.azure.vmagent.Messages;
import java.io.IOException;
import java.util.concurrent.Callable;
@@ -23,19 +24,30 @@
import com.microsoft.azure.vmagent.exceptions.AzureCloudException;
import com.microsoft.azure.management.Azure;
import com.microsoft.azure.management.resources.Deployment;
import com.microsoft.azure.management.resources.GenericResource;
import com.microsoft.azure.util.AzureCredentials.ServicePrincipal;
import com.microsoft.azure.vmagent.retry.DefaultRetryStrategy;
import com.microsoft.azure.vmagent.util.AzureUtil;
import com.microsoft.azure.vmagent.util.ExecutionEngine;
import com.microsoft.azure.vmagent.util.CleanUpAction;
import com.microsoft.azure.vmagent.util.Constants;
import com.microsoft.azure.vmagent.util.TokenCache;

import jenkins.model.Jenkins;
import hudson.Extension;
import hudson.model.AsyncPeriodicWork;
import hudson.model.TaskListener;
import hudson.model.Computer;
import java.net.URI;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.logging.Level;
import org.apache.commons.lang.StringUtils;
import org.joda.time.DateTime;

@Extension
@@ -93,6 +105,10 @@ public void registerDeployment(String cloudName, String resourceGroupName, Strin
DeploymentInfo newDeploymentToClean = new DeploymentInfo(cloudName, resourceGroupName, deploymentName, maxDeleteAttempts);
deploymentsToClean.add(newDeploymentToClean);
}

public AzureUtil.DeploymentTag getDeploymentTag() {
return new AzureUtil.DeploymentTag();
}
}

public void cleanDeployments() {
@@ -185,7 +201,121 @@ else if(state.equalsIgnoreCase("succeeded") && diffTimeInMinutes > successTimeou
}
LOGGER.log(Level.INFO, "AzureVMAgentCleanUpTask: cleanDeployments: Done cleaning deployments");
}


/* There are some edge-cases where we might loose track of the provisioned resources:
1. the process stops right after we start provisioning
2. some Azure error blocks us from deleting the resource
This method will look into the resource group and remove all resources that have our tag and are not accounted for.
*/
public void cleanLeakedResources() {
Jenkins instance = Jenkins.getInstance();
if (instance == null)
return;
for (AzureVMCloud cloud : instance.clouds.getAll(AzureVMCloud.class)) {
cleanLeakedResources(cloud.getResourceGroupName(), cloud.getServicePrincipal(), cloud.name, new DeploymentRegistrar());
}
}

public List<String> getValidVMs(final String cloudName) {
List<String> VMs = new ArrayList<>();
Jenkins instance = Jenkins.getInstance();
if (instance != null) {
for (Computer computer : instance.getComputers()) {
if (computer instanceof AzureVMComputer) {
AzureVMComputer azureComputer = (AzureVMComputer) computer;
AzureVMAgent agent = azureComputer.getNode();
if (agent != null && agent.getCloudName().equals(cloudName)) {
final String vmName = computer.getName();
VMs.add(vmName);
}
}
}
}
return VMs;
}

public void cleanLeakedResources(
final String resourceGroup,
final ServicePrincipal servicePrincipal,
final String cloudName,
final DeploymentRegistrar deploymentRegistrar) {
try{
final List<String> validVMs = getValidVMs(cloudName);
final Azure azureClient = TokenCache.getInstance(servicePrincipal).getAzureClient();
//can't use listByTag because for some reason that method strips all the tags from the outputted resources (https://github.com/Azure/azure-sdk-for-java/issues/1436)
final PagedList<GenericResource> resources = azureClient.genericResources().listByGroup(resourceGroup);

final PriorityQueue<GenericResource> resourcesMarkedForDeletion = new PriorityQueue<> (resources.size(), new Comparator<GenericResource>(){
@Override
public int compare(GenericResource o1, GenericResource o2) {
int o1Priority = getPriority(o1);
int o2Priority = getPriority(o2);
if (o1Priority == o2Priority) {
return 0;
}
return (o1Priority < o2Priority) ? - 1 : 1;
}
private int getPriority(final GenericResource resource) {
final String type = resource.type();
if (StringUtils.containsIgnoreCase(type, "virtualMachine")) {
return 1;
}
if (StringUtils.containsIgnoreCase(type, "networkInterface")) {
return 2;
}
if (StringUtils.containsIgnoreCase(type, "IPAddress")) {
return 3;
}
return 4;
}
});

for (GenericResource resource : resources) {
final Map<String,String> tags = resource.tags();
if ( !tags.containsKey(Constants.AZURE_RESOURCES_TAG_NAME) ||
!deploymentRegistrar.getDeploymentTag().matches(new AzureUtil.DeploymentTag(tags.get(Constants.AZURE_RESOURCES_TAG_NAME)))) {
continue;
}
boolean shouldSkipDeletion = false;
for (String validVM : validVMs) {
if (resource.name().contains(validVM)) {
shouldSkipDeletion = true;
break;
}
}
// we're not removing storage accounts of networks - someone else might be using them
if (shouldSkipDeletion || StringUtils.containsIgnoreCase(resource.type(), "StorageAccounts") || StringUtils.containsIgnoreCase(resource.type(), "virtualNetworks")) {
continue;
}
resourcesMarkedForDeletion.add(resource);
}

while(!resourcesMarkedForDeletion.isEmpty()) {
try {
final GenericResource resource = resourcesMarkedForDeletion.poll();
if (resource == null)
continue;

URI osDiskURI = null;
if (StringUtils.containsIgnoreCase(resource.type(), "virtualMachine")) {
osDiskURI = new URI(azureClient.virtualMachines().getById(resource.id()).osDiskVhdUri());
}

LOGGER.log(Level.INFO, "cleanLeakedResources: deleting {0} from resource group {1}", new Object[]{resource.name(), resourceGroup});
azureClient.genericResources().deleteById(resource.id());
if ( osDiskURI != null) {
AzureVMManagementServiceDelegate.removeStorageBlob(azureClient, osDiskURI, resourceGroup);
}
} catch (Exception e) {
LOGGER.log(Level.INFO, "AzureVMAgentCleanUpTask: cleanLeakedResources: failed to clean resource ", e);
}
}
} catch (Exception e) {
// No need to throw exception back, just log and move on.
LOGGER.log(Level.INFO, "AzureVMAgentCleanUpTask: cleanLeakedResources: failed to clean leaked resources ", e);
}
}

private void cleanVMs() {
cleanVMs(new ExecutionEngine());
}
@@ -293,6 +423,8 @@ public void execute(TaskListener arg0) throws InterruptedException {
cleanVMs();
// Clean up the deployments
cleanDeployments();

cleanLeakedResources();
}

@Override
@@ -203,6 +203,8 @@ public static AzureVMDeploymentInfo createDeployment(final AzureVMAgentTemplate

ObjectNode.class.cast(tmp.get("variables")).put("vmName", vmBaseName);
ObjectNode.class.cast(tmp.get("variables")).put("location", locationName);
ObjectNode.class.cast(tmp.get("variables")).put("jenkinsTag", Constants.AZURE_JENKINS_TAG_VALUE);
ObjectNode.class.cast(tmp.get("variables")).put("resourceTag", deploymentRegistrar.getDeploymentTag().get());

if (StringUtils.isNotBlank(template.getImagePublisher())) {
ObjectNode.class.cast(tmp.get("variables")).put("imagePublisher", template.getImagePublisher());
@@ -836,25 +838,7 @@ public static void terminateVirtualMachine(

// Now remove the disks
for (URI diskUri : diskUrisToRemove) {
// Obtain container, storage account, and blob name
String storageAccountName = diskUri.getHost().split("\\.")[0];
String containerName = PathUtility.getContainerNameFromUri(diskUri, false);
String blobName = PathUtility.getBlobNameFromURI(diskUri, false);

LOGGER.log(Level.INFO, "AzureVMManagementServiceDelegate: terminateVirtualMachine: Removing disk blob {0}, in container {1} of storage account {2}",
new Object[]{blobName, containerName, storageAccountName});

List<StorageAccountKey> storageKeys = azureClient.storageAccounts()
.getByGroup(resourceGroupName, storageAccountName)
.getKeys();
if (!storageKeys.isEmpty()) {
String storageAccountKey = storageKeys.get(0).value();
CloudStorageAccount account = new CloudStorageAccount(new StorageCredentialsAccountAndKey(storageAccountName, storageAccountKey));
CloudBlobClient blobClient = account.createCloudBlobClient();
blobClient.getContainerReference(containerName)
.getBlockBlobReference(blobName)
.deleteIfExists();
}
AzureVMManagementServiceDelegate.removeStorageBlob(azureClient, diskUri, resourceGroupName);
}
}
} catch (Exception e) {
@@ -882,6 +866,28 @@ public Void call() throws Exception {
}
}

public static void removeStorageBlob(final Azure azureClient, final URI blobURI, final String resourceGroupName) throws Exception {
// Obtain container, storage account, and blob name
String storageAccountName = blobURI.getHost().split("\\.")[0];
String containerName = PathUtility.getContainerNameFromUri(blobURI, false);
String blobName = PathUtility.getBlobNameFromURI(blobURI, false);

LOGGER.log(Level.INFO, "removeStorageBlob: Removing disk blob {0}, in container {1} of storage account {2}",
new Object[]{blobName, containerName, storageAccountName});

List<StorageAccountKey> storageKeys = azureClient.storageAccounts()
.getByGroup(resourceGroupName, storageAccountName)
.getKeys();
if (!storageKeys.isEmpty()) {
String storageAccountKey = storageKeys.get(0).value();
CloudStorageAccount account = new CloudStorageAccount(new StorageCredentialsAccountAndKey(storageAccountName, storageAccountKey));
CloudBlobClient blobClient = account.createCloudBlobClient();
blobClient.getContainerReference(containerName)
.getBlockBlobReference(blobName)
.deleteIfExists();
}
}

/**
* Remove the IP name
*
@@ -396,4 +396,65 @@ public static boolean isValidTimeOut(String deploymentTimeout) {
return false;
return true;
}

public static class DeploymentTag {

public DeploymentTag() {
this(System.currentTimeMillis() / 1000);
}

/* Expects a string in this format: "<id>/<timestamp>".
If id is ommited it will be replaced with an empty string
If timestamp is ommited or it's a negative number than it will be replaced with 0 */
public DeploymentTag(final String tag){
String id = "";
long ts = 0;

if (tag != null && !tag.isEmpty()) {
String[] parts = tag.split("/");
if (parts.length >= 1) {
id = parts[0];
}
if (parts.length >= 2) {
try {
ts = Long.parseLong(parts[1]);
ts = (ts < 0) ? 0 : ts;
} catch (Exception e) {
ts = 0;
}
}
}
this.instanceId = id;
this.timestamp = ts;
}

public String get() {
return instanceId + "/" + Long.toString(timestamp);
}

// two tags match if they have the same instance id and the timestamp diff is greater than Constants.AZURE_DEPLOYMENT_TIMEOUT
public boolean matches(final DeploymentTag rhs) {
return matches(rhs, Constants.AZURE_DEPLOYMENT_TIMEOUT);
}

public boolean matches(final DeploymentTag rhs, long timeout) {
if (!instanceId.equals(rhs.instanceId))
return false;
return Math.abs(timestamp - rhs.timestamp) > timeout;
}

protected DeploymentTag(long timestamp) {
String id = "";
try {
id = Jenkins.getInstance().getLegacyInstanceId();
} catch (Exception e) {
id = "AzureJenkins000";
}
this.instanceId = id;
this.timestamp = timestamp;
}

private final String instanceId;
private final long timestamp;
}
}
@@ -139,4 +139,12 @@
public static final String DEFAULT_RESOURCE_GROUP_PATTERN = "^[a-zA-Z0-9][a-zA-Z\\-_0-9]{0,62}[a-zA-Z0-9]$";

public static final HttpLoggingInterceptor.Level DEFAULT_AZURE_SDK_LOGGING_LEVEL = HttpLoggingInterceptor.Level.NONE;

public static final String AZURE_JENKINS_TAG_NAME = "JenkinsManagedTag";

public static final String AZURE_JENKINS_TAG_VALUE = "ManagedByAzureVMAgents";

public static final String AZURE_RESOURCES_TAG_NAME = "JenkinsResourceTag";

public static final long AZURE_DEPLOYMENT_TIMEOUT = 2 * 60 * 60;//in seconds
}

0 comments on commit 2f70f8d

Please sign in to comment.
You can’t perform that action at this time.