Skip to content

Commit

Permalink
Fix: EC2 concurrent start bug (#20)
Browse files Browse the repository at this point in the history
* fix: fixed bug when the deployment plan failed when updating a stack if the stack was previously updated without a client request token.

* fix: will now never separate ec2 or ecs instances for the same runner
  • Loading branch information
oscarostrand committed May 24, 2023
1 parent b58b11d commit 23a3e4e
Show file tree
Hide file tree
Showing 18 changed files with 224 additions and 199 deletions.
2 changes: 0 additions & 2 deletions attini-action/pom.xml
Expand Up @@ -158,7 +158,6 @@
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>lambda</artifactId>
<version>${aws.version}</version>
<exclusions>
<exclusion>
<groupId>software.amazon.awssdk</groupId>
Expand Down Expand Up @@ -187,7 +186,6 @@
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>organizations</artifactId>
<version>${aws.version}</version>
<exclusions>
<exclusion>
<groupId>software.amazon.awssdk</groupId>
Expand Down
10 changes: 5 additions & 5 deletions attini-action/src/main/java/attini/action/App.java
Expand Up @@ -35,7 +35,7 @@
import attini.action.facades.artifactstore.ArtifactStoreFacade;
import attini.action.facades.deployorigin.DeployOriginFacade;
import attini.action.facades.stackdata.DeploymentPlanDataFacade;
import attini.action.facades.stackdata.StackDataFacade;
import attini.action.facades.stackdata.ResourceStateFacade;
import attini.action.facades.stepfunction.StepFunctionFacade;
import attini.action.actions.runner.CouldNotParseInputException;
import attini.action.licence.LicenceAgreementHandler;
Expand All @@ -58,7 +58,7 @@ public class App implements RequestHandler<Map<String, Object>, Map<String, Obje
private final ObjectMapper quarkusMapper;
private final ArtifactStoreFacade artifactStoreFacade;
private final ImportHandler importHandler;
private final StackDataFacade stackDataFacade;
private final ResourceStateFacade resourceStateFacade;

private final DeploymentPlanDataFacade deploymentPlanDataFacade;
private final CdkRunnerAdapter cdkRunnerAdapter;
Expand All @@ -76,7 +76,7 @@ public App(CfnHandler cfnHandler,
ObjectMapper quarkusMapper,
ArtifactStoreFacade artifactStoreFacade,
ImportHandler importHandler,
StackDataFacade stackDataFacade,
ResourceStateFacade resourceStateFacade,
DeploymentPlanDataFacade deploymentPlanDataFacade,
CdkRunnerAdapter cdkRunnerAdapter, SamPackageRunnerAdapter samPackageRunnerAdapter) {
this.cfnHandler = requireNonNull(cfnHandler, "deployCfnStackService");
Expand All @@ -91,7 +91,7 @@ public App(CfnHandler cfnHandler,
this.quarkusMapper = requireNonNull(quarkusMapper, "quarkusMapper");
this.artifactStoreFacade = requireNonNull(artifactStoreFacade, "artifactStoreFacade");
this.importHandler = requireNonNull(importHandler, "readOutputHandler");
this.stackDataFacade = requireNonNull(stackDataFacade, "stackDataFacade");
this.resourceStateFacade = requireNonNull(resourceStateFacade, "stackDataFacade");
this.deploymentPlanDataFacade = requireNonNull(deploymentPlanDataFacade, "deploymentPlanDataFacade");
this.cdkRunnerAdapter = requireNonNull(cdkRunnerAdapter, "cdkRunnerAdapter");
this.samPackageRunnerAdapter = requireNonNull(samPackageRunnerAdapter, "samPackageRunnerAdapter");
Expand Down Expand Up @@ -187,7 +187,7 @@ public Map<String, Object> handleRequest(Map<String, Object> input, Context cont
DeploymentPlanExecutionMetadata deploymentPlanExecutionMetadata = quarkusMapper.convertValue(input.get(
"deploymentPlanExecutionMetadata"),
DeploymentPlanExecutionMetadata.class);
stackDataFacade.saveManualApprovalData(deploymentPlanExecutionMetadata, deploymentOriginData);
resourceStateFacade.saveManualApprovalData(deploymentPlanExecutionMetadata, deploymentOriginData);

}

Expand Down
Expand Up @@ -5,7 +5,7 @@
import org.jboss.logging.Logger;

import attini.action.actions.deploycloudformation.stackconfig.StackConfiguration;
import attini.action.facades.stackdata.StackDataFacade;
import attini.action.facades.stackdata.ResourceStateFacade;
import attini.action.facades.stepfunction.StepFunctionFacade;
import attini.action.facades.stepguard.StepGuardFacade;
import software.amazon.awssdk.awscore.exception.AwsErrorDetails;
Expand All @@ -17,22 +17,22 @@ public class CfnErrorHandler {

private final CfnStackFacade cfnStackFacade;
private final StepGuardFacade stepGuardFacade;
private final StackDataFacade stackDataFacade;
private final ResourceStateFacade resourceStateFacade;
private final StepFunctionFacade stepFunctionFacade;
public CfnErrorHandler(CfnStackFacade cfnStackFacade,
StepGuardFacade stepGuardFacade,
StackDataFacade stackDataFacade,
ResourceStateFacade resourceStateFacade,
StepFunctionFacade stepFunctionFacade) {
this.cfnStackFacade = requireNonNull(cfnStackFacade, "cfnStackFacade");
this.stepGuardFacade = requireNonNull(stepGuardFacade, "stepGuardFacade");
this.stackDataFacade = requireNonNull(stackDataFacade, "stackDataFacade");
this.resourceStateFacade = requireNonNull(resourceStateFacade, "stackDataFacade");
this.stepFunctionFacade = requireNonNull(stepFunctionFacade, "stepFunctionFacade");
}

protected void handleNoUpdatesToPerformedState(StackData stackData, String resourceStatus) {
String stackName = stackData.getStackConfiguration().getStackName();
logger.info(String.format("No updates are to be performed to stack %s", stackName));
stackDataFacade.saveStackData(stackData);
resourceStateFacade.saveStackData(stackData);
stepGuardFacade.notifyStepCompleted(stackData, resourceStatus);


Expand Down
Expand Up @@ -8,7 +8,7 @@
import org.jboss.logging.Logger;

import attini.action.domain.DesiredState;
import attini.action.facades.stackdata.StackDataFacade;
import attini.action.facades.stackdata.ResourceStateFacade;
import attini.action.facades.stepfunction.StepFunctionFacade;
import attini.action.facades.stepguard.StepGuardFacade;
import software.amazon.awssdk.services.cloudformation.model.CloudFormationException;
Expand All @@ -19,19 +19,19 @@ public class DeployCfnCrossRegionService {

private final CfnStackFacade cfnStackFacade;
private final StepGuardFacade stepGuardFacade;
private final StackDataFacade stackDataFacade;
private final ResourceStateFacade resourceStateFacade;
private final StepFunctionFacade stepFunctionFacade;
private final CfnErrorHandler cfnErrorHandler;


public DeployCfnCrossRegionService(CfnStackFacade cfnStackFacade,
StepGuardFacade stepGuardFacade,
StackDataFacade stackDataFacade,
ResourceStateFacade resourceStateFacade,
StepFunctionFacade stepFunctionFacade,
CfnErrorHandler cfnErrorHandler) {
this.cfnStackFacade = requireNonNull(cfnStackFacade, "cfnStackFacade");
this.stepGuardFacade = requireNonNull(stepGuardFacade, "stepGuardFacade");
this.stackDataFacade = requireNonNull(stackDataFacade, "stackDataFacade");
this.resourceStateFacade = requireNonNull(resourceStateFacade, "stackDataFacade");
this.stepFunctionFacade = requireNonNull(stepFunctionFacade, "stepFunctionFacade");
this.cfnErrorHandler = requireNonNull(cfnErrorHandler, "cfnErrorHandler");
}
Expand Down Expand Up @@ -59,9 +59,9 @@ public void deployWithPolling(StackData stackData) {
if (isNewExecution(stackData) && shouldDeleteStack(stackData)) {
logger.info("New request for stack deletion received. Will delete stack");
cfnStackFacade.deleteStack(stackData);
stackDataFacade.saveStackData(stackData);
stackDataFacade.saveToken(stackData.getDeploymentPlanExecutionMetadata().sfnToken(),
stackData.getStackConfiguration());
resourceStateFacade.saveStackData(stackData);
resourceStateFacade.saveToken(stackData.getDeploymentPlanExecutionMetadata().sfnToken(),
stackData.getStackConfiguration());
stepFunctionFacade.sendError(stackData.getDeploymentPlanExecutionMetadata().sfnToken(),
"Is in progress",
"IsExecuting");
Expand All @@ -72,9 +72,9 @@ public void deployWithPolling(StackData stackData) {
if (isNewExecution(stackData)) {
logger.info("New request for stack update received. Will update stack");
String stackId = cfnStackFacade.updateStackCrossRegion(stackData);
stackDataFacade.saveStackData(stackData, stackId);
stackDataFacade.saveToken(stackData.getDeploymentPlanExecutionMetadata().sfnToken(),
stackData.getStackConfiguration());
resourceStateFacade.saveStackData(stackData, stackId);
resourceStateFacade.saveToken(stackData.getDeploymentPlanExecutionMetadata().sfnToken(),
stackData.getStackConfiguration());
stepFunctionFacade.sendError(stackData.getDeploymentPlanExecutionMetadata().sfnToken(),
"Is in progress",
"IsExecuting");
Expand All @@ -93,9 +93,9 @@ public void deployWithPolling(StackData stackData) {
if (!shouldDeleteStack(stackData)) {
logger.info("No stack found. Will create the stack");
String stackId = cfnStackFacade.createStackCrossRegion(stackData);
stackDataFacade.saveStackData(stackData, stackId);
stackDataFacade.saveToken(stackData.getDeploymentPlanExecutionMetadata().sfnToken(),
stackData.getStackConfiguration());
resourceStateFacade.saveStackData(stackData, stackId);
resourceStateFacade.saveToken(stackData.getDeploymentPlanExecutionMetadata().sfnToken(),
stackData.getStackConfiguration());
stepFunctionFacade.sendError(stackData.getDeploymentPlanExecutionMetadata().sfnToken(),
"Is in progress",
"IsExecuting");
Expand Down Expand Up @@ -141,7 +141,7 @@ private static boolean isSameExecution(StackData stackData, String clientRequest
}

private boolean isNewExecution(StackData stackData) {
Optional<SfnExecutionArn> stacksExecutionId = stackDataFacade.getStacksSfnExecutionArn(stackData.getStackConfiguration());
Optional<SfnExecutionArn> stacksExecutionId = resourceStateFacade.getStacksSfnExecutionArn(stackData.getStackConfiguration());
return stacksExecutionId.filter(sfnExecutionArn -> stackData.getDeploymentPlanExecutionMetadata()
.executionArn()
.equals(sfnExecutionArn))
Expand Down
Expand Up @@ -12,25 +12,25 @@

import attini.action.actions.deploycloudformation.CloudFormationErrorResolver.CloudFormationError;
import attini.action.domain.DesiredState;
import attini.action.facades.stackdata.StackDataFacade;
import attini.action.facades.stackdata.ResourceStateFacade;
import attini.action.facades.stepfunction.StepFunctionFacade;
import software.amazon.awssdk.services.cloudformation.model.AlreadyExistsException;
import software.amazon.awssdk.services.cloudformation.model.CloudFormationException;

public class DeployCfnService {
private static final Logger logger = Logger.getLogger(DeployCfnService.class);
private final CfnStackFacade cfnStackFacade;
private final StackDataFacade stackDataFacade;
private final ResourceStateFacade resourceStateFacade;
private final StepFunctionFacade stepFunctionFacade;
private final CfnErrorHandler cfnErrorHandler;


public DeployCfnService(CfnStackFacade cfnStackFacade,
StackDataFacade stackDataFacade,
ResourceStateFacade resourceStateFacade,
StepFunctionFacade stepFunctionFacade,
CfnErrorHandler cfnErrorHandler) {
this.cfnStackFacade = requireNonNull(cfnStackFacade, "cfnStackFacade");
this.stackDataFacade = requireNonNull(stackDataFacade, "stackDataFacade");
this.resourceStateFacade = requireNonNull(resourceStateFacade, "stackDataFacade");
this.stepFunctionFacade = requireNonNull(stepFunctionFacade, "stepFunctionFacade");
this.cfnErrorHandler = requireNonNull(cfnErrorHandler, "cfnErrorHandler");
}
Expand All @@ -53,9 +53,9 @@ private void deleteStack(StackData stackData) {
cfnStackFacade.deleteStack(stackData);
logger.info("Stack " + stackData.getStackConfiguration()
.getStackName() + " is being deleted using callback strategy");
stackDataFacade.saveStackData(stackData);
stackDataFacade.saveToken(stackData.getDeploymentPlanExecutionMetadata().sfnToken(),
stackData.getStackConfiguration());
resourceStateFacade.saveStackData(stackData);
resourceStateFacade.saveToken(stackData.getDeploymentPlanExecutionMetadata().sfnToken(),
stackData.getStackConfiguration());


} catch (CloudFormationException e) {
Expand All @@ -77,10 +77,10 @@ private void deleteStack(StackData stackData) {
private void deployWithCallback(StackData stackData) {
try {
String stackId = cfnStackFacade.updateCfnStack(stackData);
stackDataFacade.saveStackData(stackData, stackId);
resourceStateFacade.saveStackData(stackData, stackId);

stackDataFacade.saveToken(stackData.getDeploymentPlanExecutionMetadata().sfnToken(),
stackData.getStackConfiguration());
resourceStateFacade.saveToken(stackData.getDeploymentPlanExecutionMetadata().sfnToken(),
stackData.getStackConfiguration());

} catch (CloudFormationException e) {
CloudFormationError cloudFormationError = resolveError(e);
Expand Down Expand Up @@ -114,10 +114,10 @@ private void createStack(StackData stackData) {

try {
String stackId = cfnStackFacade.createCfnStack(stackData);
stackDataFacade.saveStackData(stackData, stackId);
resourceStateFacade.saveStackData(stackData, stackId);

stackDataFacade.saveToken(stackData.getDeploymentPlanExecutionMetadata().sfnToken(),
stackData.getStackConfiguration());
resourceStateFacade.saveToken(stackData.getDeploymentPlanExecutionMetadata().sfnToken(),
stackData.getStackConfiguration());

} catch (AlreadyExistsException e) {
stepFunctionFacade.sendError(stackData.getDeploymentPlanExecutionMetadata().sfnToken(),
Expand Down
@@ -0,0 +1,4 @@
package attini.action.actions.runner;

public class AcquireEc2StartLockException extends RuntimeException{
}
@@ -0,0 +1,4 @@
package attini.action.actions.runner;

public class AcquireEcsStartLockException extends RuntimeException {
}
Expand Up @@ -14,6 +14,7 @@
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;

import attini.action.facades.stackdata.ResourceStateFacade;
import attini.action.system.EnvironmentVariables;
import attini.domain.polling.Poller;
import attini.domain.polling.PollingResult;
Expand Down Expand Up @@ -47,6 +48,7 @@ public class Ec2Facade {
private final SsmClient ssmClient;
private final ObjectMapper objectMapper;
private final EcsFacade ecsFacade;
private final ResourceStateFacade resourceStateFacade;

private final static Map<String, String> imageIdMap = Map.of("AmazonLinux2",
"/aws/service/ecs/optimized-ami/amazon-linux-2/kernel-5.10/recommended",
Expand All @@ -67,17 +69,23 @@ public Ec2Facade(Ec2Client ec2Client,
EnvironmentVariables environmentVariables,
SsmClient ssmClient,
ObjectMapper objectMapper,
EcsFacade ecsFacade) {
EcsFacade ecsFacade,
ResourceStateFacade resourceStateFacade) {
this.ec2Client = requireNonNull(ec2Client, "ec2Client");
this.environmentVariables = requireNonNull(environmentVariables, "environmentVariables");
this.ssmClient = requireNonNull(ssmClient, "ssmClient");
this.objectMapper = requireNonNull(objectMapper, "objectMapper");
this.ecsFacade = requireNonNull(ecsFacade, "ecsFacade");
this.resourceStateFacade = requireNonNull(resourceStateFacade, "stackDataDynamoFacade");
}

public String startInstance(Ec2 ec2, RunnerData runnerData) {

logger.info("Starting new ec2 instance");
if (!resourceStateFacade.acquireEc2StartLock(runnerData)) {
throw new AcquireEc2StartLockException();
}


String imageId = getImageId(ec2.getEc2Config().ami().orElse("AmazonLinux2"));
String deviceName = ec2Client.describeImages(DescribeImagesRequest.builder().imageIds(imageId).build())
Expand Down Expand Up @@ -221,7 +229,7 @@ public void waitForStart(String instanceId, RunnerData runnerData, Ec2 ec2) {
.build());

Poller.builder(() -> new PollingResult<>(ecsFacade.isRegisterWithCluster(instanceId, runnerData), null))
.setCalls(90)
.setCalls(120)
.setInterval(2, TimeUnit.SECONDS)
.setTimeoutExceptionSupplier(() -> {
InstanceStatus instanceStatus = getInstanceStatus(instanceId);
Expand Down

0 comments on commit 23a3e4e

Please sign in to comment.