Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
33d6e76
fix(deploy): better handle more exceptions in monitor server job
evansiroky Jul 24, 2020
be81ea6
fix(deploy): use fresh ec2 client and catch terminate instance except…
evansiroky Jul 26, 2020
f08b5e1
fix(deploy): use nonce to ensure integrity of otp-runner status file
evansiroky Jul 27, 2020
9bfb521
test(deploy): make deterministic nonce values
evansiroky Jul 27, 2020
660c9c6
fix(deploy): check for null nonce in status file
evansiroky Jul 27, 2020
7503f0e
fix(deploy): properly check for null otp-runner nonce
evansiroky Jul 27, 2020
004f6de
fix(deploy): correctly create longer-lasting AWS sessions
evansiroky Jul 27, 2020
579719c
refactor(deploy): remove code that customized credential session time
evansiroky Jul 27, 2020
d8799af
test(FeedStore): add test to verify file name of temp GTFS file
landonreed Jul 31, 2020
09fd9e4
fix(FeedStore): write temp GTFS files with .zip suffix
landonreed Jul 31, 2020
f6f020a
test(FeedStoreTest): add javadoc, clean up test
landonreed Aug 3, 2020
bdc3e6b
Merge pull request #334 from ibi-group/fix-tmp-gtfs-creation
landonreed Aug 3, 2020
cc83994
refactor(deploy): correct graph build timeout wait time
evansiroky Aug 6, 2020
928b610
refactor(deploy): upload otp-runner manifest, build/router-config.jso…
evansiroky Aug 6, 2020
e20e306
Merge branch 'graph-build-image-job' into improve-monitorable-server-…
evansiroky Aug 14, 2020
56083f7
Merge branch 'graph-build-image-job' into improve-monitorable-server-…
evansiroky Aug 14, 2020
e27b982
Merge branch 'graph-build-image-job' into improve-monitorable-server-…
evansiroky Aug 17, 2020
35f4f04
Merge branch 'dev' into improve-monitorable-server-job-exception-hand…
evansiroky Aug 17, 2020
6fe3bfd
refactor(deploy): make sure to exit deployjob after graph building in…
evansiroky Aug 17, 2020
144defa
refactor: introduce TimeTracker class for tracking task timeouts
evansiroky Aug 17, 2020
529401d
refactor: various improvements after code review
evansiroky Aug 19, 2020
564df01
test: update snapshots
evansiroky Aug 20, 2020
6223b9b
refactor: fix typo
landonreed Aug 20, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions src/main/java/com/conveyal/datatools/common/utils/AWSUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -130,14 +130,19 @@ public static String downloadFromS3(AmazonS3 s3, String bucket, String filename,
* https://docs.aws.amazon.com/IAM/latest/UserGuide/tutorial_cross-account-with-roles.html). The credentials can be
* then used for creating a temporary S3 or EC2 client.
*/
public static AWSStaticCredentialsProvider getCredentialsForRole(String role, String sessionName) {
public static AWSStaticCredentialsProvider getCredentialsForRole(
String role,
String sessionName
) {
String roleSessionName = "data-tools-session";
if (role == null) return null;
if (sessionName != null) roleSessionName = String.join("-", roleSessionName, sessionName);
STSAssumeRoleSessionCredentialsProvider sessionProvider = new STSAssumeRoleSessionCredentialsProvider.Builder(
role,
roleSessionName
).build();
STSAssumeRoleSessionCredentialsProvider sessionProvider = new STSAssumeRoleSessionCredentialsProvider
.Builder(
role,
roleSessionName
)
.build();
AWSSessionCredentials credentials = sessionProvider.getCredentials();
return new AWSStaticCredentialsProvider(new BasicSessionCredentials(
credentials.getAWSAccessKeyId(), credentials.getAWSSecretKey(),
Expand Down
265 changes: 200 additions & 65 deletions src/main/java/com/conveyal/datatools/manager/jobs/DeployJob.java

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@ public class OtpRunnerManifest {
public String buildLogFile;
public String graphObjUrl;
public String graphsFolder;
public List<String> gtfsAndOsmUrls;
public String jarFile;
public String jarUrl;
public String nonce;
public String otpRunnerLogFile;
public boolean prefixLogUploadsWithInstanceId;
public String routerConfigJSON;
public List<String> routerFolderDownloads;
public String routerName;
public boolean runServer;
public String s3UploadPath;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ public class OtpRunnerStatus {
public boolean graphUploaded;
public boolean serverStarted;
public String message;
public String nonce;
public int numFilesDownloaded;
public double pctProgress;
public int totalFilesToDownload;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package com.conveyal.datatools.manager.jobs;

import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.services.ec2.AmazonEC2;
import com.amazonaws.services.ec2.model.AmazonEC2Exception;
import com.amazonaws.services.ec2.model.CreateImageRequest;
import com.amazonaws.services.ec2.model.CreateImageResult;
import com.amazonaws.services.ec2.model.DeregisterImageRequest;
Expand All @@ -9,11 +11,13 @@
import com.amazonaws.services.ec2.model.Image;
import com.amazonaws.services.ec2.model.Instance;
import com.conveyal.datatools.common.status.MonitorableJob;
import com.conveyal.datatools.common.utils.AWSUtils;
import com.conveyal.datatools.manager.DataManager;
import com.conveyal.datatools.manager.auth.Auth0UserProfile;
import com.conveyal.datatools.manager.controllers.api.ServerController;
import com.conveyal.datatools.manager.models.OtpServer;
import com.conveyal.datatools.manager.persistence.Persistence;
import com.conveyal.datatools.manager.utils.TimeTracker;

import java.util.List;
import java.util.concurrent.TimeUnit;
Expand All @@ -22,23 +26,33 @@

/**
* Job that is dispatched during a {@link DeployJob} that spins up EC2 instances. This handles waiting for a graph build
* image to be created after a graph build has completed.
* image to be created after a graph build has completed. If an error occurs, or if the image was created successfully
* and has a separate graph build instance type, the EC2 instance will be terminated.
*/
public class RecreateBuildImageJob extends MonitorableJob {
private final AmazonEC2 ec2;
private final List<Instance> graphBuildingInstances;
private final OtpServer otpServer;

public RecreateBuildImageJob(
DeployJob parentDeployJob,
Auth0UserProfile owner,
List<Instance> graphBuildingInstances,
OtpServer otpServer,
AmazonEC2 ec2
List<Instance> graphBuildingInstances
) {
super(owner, String.format("Recreating build image for %s", otpServer.name), JobType.RECREATE_BUILD_IMAGE);
super(
owner,
String.format("Recreating build image for %s", parentDeployJob.getOtpServer().name),
JobType.RECREATE_BUILD_IMAGE
);
this.otpServer = parentDeployJob.getOtpServer();
this.graphBuildingInstances = graphBuildingInstances;
this.otpServer = otpServer;
this.ec2 = ec2;
AWSCredentialsProvider credentials = AWSUtils.getCredentialsForRole(
otpServer.role,
"recreate-build-image"
);
ec2 = parentDeployJob.getCustomRegion() == null
? AWSUtils.getEC2ClientForCredentials(credentials)
: AWSUtils.getEC2ClientForCredentials(credentials, parentDeployJob.getCustomRegion());
}

@Override
Expand All @@ -57,6 +71,7 @@ public void jobLogic() throws Exception {
DescribeImagesRequest describeImagesRequest = new DescribeImagesRequest()
.withImageIds(createdImageId);
// wait for the image to be created. Also, make sure the parent DeployJob hasn't failed this job already.
TimeTracker imageCreationTracker = new TimeTracker(1, TimeUnit.HOURS);
while (!imageCreated && !status.error) {
DescribeImagesResult describeImagesResult = ec2.describeImages(describeImagesRequest);
for (Image image : describeImagesResult.getImages()) {
Expand All @@ -65,7 +80,7 @@ public void jobLogic() throws Exception {
// See https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/ec2/model/ImageState.html
String imageState = image.getState().toLowerCase();
if (imageState.equals("pending")) {
if (System.currentTimeMillis() - status.startTime > TimeUnit.HOURS.toMillis(1)) {
if (imageCreationTracker.hasTimedOut()) {
terminateInstanceAndFailWithMessage(
"It has taken over an hour for the graph build image to be created! Check the AWS console to see if the image was created successfully."
);
Expand Down Expand Up @@ -118,7 +133,14 @@ public void jobLogic() throws Exception {
// terminate graph building instance if needed
if (otpServer.ec2Info.hasSeparateGraphBuildConfig()) {
status.message = "Terminating graph building instance";
ServerController.terminateInstances(ec2, graphBuildingInstances);
try {
ServerController.terminateInstances(ec2, graphBuildingInstances);
} catch (AmazonEC2Exception e) {
status.fail(
"Graph build image successfully created, but failed to terminate graph building instance!",
e
);
}
}
status.completeSuccessfully("Graph build image successfully created!");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,11 +209,12 @@ private void copyVersionToLatest(File version, FeedSource feedSource) throws IOE
FileUtils.copyFile(version, latest, true);
}

private File createTempFile (String name, InputStream in) throws IOException {
Path path = Files.createTempFile(name, null);
protected File createTempFile (String name, InputStream in) throws IOException {
// Create temp file in such a way that filename is preserved (no tmp suffix added).
final File tempFile = new File(new File(System.getProperty("java.io.tmpdir")), name);
LOG.info("Storing temp GTFS file at {}", tempFile.getAbsolutePath());
// FIXME: Figure out how to manage temp files created here. Currently, we just call deleteOnExit, but
// this will only delete the file once the java process stops.
File tempFile = path.toFile();
tempFile.deleteOnExit();
ByteStreams.copy(in, new FileOutputStream(tempFile));
return tempFile;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package com.conveyal.datatools.manager.utils;

import java.util.concurrent.TimeUnit;

/**
* A helper class to track whether a task has timed out.
*/
public class TimeTracker {
private final long maxEndTime;
private final long startTime = System.currentTimeMillis();

/**
* @param maxDuration The maximum duration that this task should take.
* @param timeUnit the unit of time that the duration is in
*/
public TimeTracker(long maxDuration, TimeUnit timeUnit) {
maxEndTime = startTime + timeUnit.toMillis(maxDuration);
}


public boolean hasTimedOut() {
return System.currentTimeMillis() > maxEndTime;
}

public long elapsedSeconds() {
return (System.currentTimeMillis() - startTime) / 1000;
}
}
2 changes: 1 addition & 1 deletion src/test/java/com/conveyal/datatools/TestUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public static FeedVersion createFeedVersionFromGtfsZip(FeedSource source, String
/**
* Helper to get a File for the given file or folder that should be in the gtfs folder of the test resources
*/
private static String getGtfsResourcePath(String gtfsFileName) {
public static String getGtfsResourcePath(String gtfsFileName) {
return TestUtils.class.getResource("gtfs/" + gtfsFileName).getFile();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import com.conveyal.datatools.manager.persistence.Persistence;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -93,7 +92,13 @@ public void canMakeGraphBuildUserDataScript () {
"test-deploy",
DeployJob.DeployType.REPLACE
);
assertThat(deployJob.constructUserData(false), matchesSnapshot());
assertThat(
replaceNonce(
deployJob.constructManifestAndUserData(false, true),
"canMakeGraphBuildUserDataScript"
),
matchesSnapshot()
);
}

/**
Expand All @@ -108,7 +113,23 @@ public void canMakeServerOnlyUserDataScript () {
"test-deploy",
DeployJob.DeployType.REPLACE
);
assertThat(deployJob.constructUserData(true), matchesSnapshot());
assertThat(
replaceNonce(
deployJob.constructManifestAndUserData(true, true),
"canMakeServerOnlyUserDataScript"
),
matchesSnapshot()
);
}

/**
* Replaces the nonce in the user data with a deterministic value
*/
private String replaceNonce(String userData, String replacementNonce) {
return userData.replaceFirst(
"nonce\":\"[\\w-]*",
String.format("nonce\":\"%s", replacementNonce)
);
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package com.conveyal.datatools.manager.persistence;

import com.conveyal.datatools.DatatoolsTest;
import com.conveyal.datatools.UnitTest;
import com.conveyal.datatools.manager.models.FeedVersion;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

import static com.conveyal.datatools.TestUtils.getGtfsResourcePath;

public class FeedStoreTest extends UnitTest {
private static final Logger LOG = LoggerFactory.getLogger(FeedStoreTest.class);

@BeforeClass
public static void setUp() throws Exception {
DatatoolsTest.setUp();
LOG.info("{} setup", FeedStoreTest.class.getSimpleName());
}

/**
* Verify that {@link FeedStore} will write a temp file with the input file name.
*/
@Test
public void canCreateTempGtfsFile() throws IOException {
final String gtfsFileName = "gtfs.zip";
File gtfsFile = new File(getGtfsResourcePath("bart_new.zip"));
FileInputStream fileInputStream = new FileInputStream(gtfsFile);
File tempFile = FeedVersion.feedStore.createTempFile(gtfsFileName, fileInputStream);
LOG.info("Feed store wrote temp file to: {}", tempFile.getAbsolutePath());
Assert.assertEquals(tempFile.getName(), gtfsFileName);
}
}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"#!/bin/bash\nexport PATH=\"$PATH:/home/ubuntu/.yarn/bin\"\nexport PATH=\"$PATH:/home/ubuntu/.nvm/versions/node/v12.16.3/bin\"\nrm /usr/share/nginx/client/status.json || echo '' > /dev/null\nrm /var/otp/otp-runner-manifest.json || echo '' > /dev/null\nrm /opt/otp/otp-latest-trimet-dev || echo '' > /dev/null\nrm /var/log/otp-runner.log || echo '' > /dev/null\nrm /var/log/otp-build.log || echo '' > /dev/null\nrm /var/log/otp-server.log || echo '' > /dev/null\necho $'{\"buildConfigJSON\":\"{ \\\\\\\"hello\\\\\\\": \\\\\\\"th\\\\ne\\'r\\\\te\\\\\\\" }\",\"buildGraph\":true,\"graphObjUrl\":\"s3://datatools-dev/test-deploy/Graph.obj\",\"graphsFolder\":\"/var/otp/graphs\",\"jarFile\":\"/opt/otp/otp-latest-trimet-dev\",\"jarUrl\":\"https://opentripplanner-builds.s3.amazonaws.com/otp-latest-trimet-dev.jar\",\"otpRunnerLogFile\":\"/var/log/otp-runner.log\",\"prefixLogUploadsWithInstanceId\":true,\"routerConfigJSON\":\"{ \\\\\\\"hi\\\\\\\": \\\\\\\"th\\\\ne\\'r\\\\te\\\\\\\" }\",\"runServer\":true,\"s3UploadPath\":\"s3://datatools-dev/test-deploy\",\"serverStartupTimeoutSeconds\":3300,\"statusFileLocation\":\"/usr/share/nginx/client/status.json\",\"uploadGraphBuildLogs\":true,\"uploadGraphBuildReport\":true,\"uploadGraph\":true,\"uploadOtpRunnerLogs\":true,\"uploadServerStartupLogs\":true}' > /var/otp/otp-runner-manifest.json\nyarn global add https://github.com/ibi-group/otp-runner.git\notp-runner /var/otp/otp-runner-manifest.json"
"#!/bin/bash\nexport PATH=\"$PATH:/home/ubuntu/.yarn/bin\"\nexport PATH=\"$PATH:/home/ubuntu/.nvm/versions/node/v12.16.3/bin\"\nrm /usr/share/nginx/client/status.json || echo '' > /dev/null\nrm /var/otp/otp-runner-manifest.json || echo '' > /dev/null\nrm /opt/otp/otp-latest-trimet-dev || echo '' > /dev/null\nrm /var/log/otp-runner.log || echo '' > /dev/null\nrm /var/log/otp-build.log || echo '' > /dev/null\nrm /var/log/otp-server.log || echo '' > /dev/null\naws s3 cp s3://datatools-dev/test-deploy/otp-runner-graph-build-manifest.json /var/otp/otp-runner-manifest.json\nyarn global add https://github.com/ibi-group/otp-runner.git#master\notp-runner /var/otp/otp-runner-manifest.json"
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"#!/bin/bash\nexport PATH=\"$PATH:/home/ubuntu/.yarn/bin\"\nexport PATH=\"$PATH:/home/ubuntu/.nvm/versions/node/v12.16.3/bin\"\nrm /usr/share/nginx/client/status.json || echo '' > /dev/null\nrm /var/otp/otp-runner-manifest.json || echo '' > /dev/null\nrm /opt/otp/otp-latest-trimet-dev || echo '' > /dev/null\nrm /var/log/otp-runner.log || echo '' > /dev/null\nrm /var/log/otp-build.log || echo '' > /dev/null\nrm /var/log/otp-server.log || echo '' > /dev/null\necho $'{\"buildGraph\":false,\"graphObjUrl\":\"s3://datatools-dev/test-deploy/Graph.obj\",\"graphsFolder\":\"/var/otp/graphs\",\"jarFile\":\"/opt/otp/otp-latest-trimet-dev\",\"jarUrl\":\"https://opentripplanner-builds.s3.amazonaws.com/otp-latest-trimet-dev.jar\",\"otpRunnerLogFile\":\"/var/log/otp-runner.log\",\"prefixLogUploadsWithInstanceId\":true,\"routerConfigJSON\":\"{ \\\\\\\"hi\\\\\\\": \\\\\\\"th\\\\ne\\'r\\\\te\\\\\\\" }\",\"runServer\":true,\"s3UploadPath\":\"s3://datatools-dev/test-deploy\",\"serverStartupTimeoutSeconds\":3300,\"statusFileLocation\":\"/usr/share/nginx/client/status.json\",\"uploadGraphBuildLogs\":false,\"uploadGraphBuildReport\":false,\"uploadGraph\":false,\"uploadOtpRunnerLogs\":true,\"uploadServerStartupLogs\":true}' > /var/otp/otp-runner-manifest.json\nyarn global add https://github.com/ibi-group/otp-runner.git\notp-runner /var/otp/otp-runner-manifest.json"
"#!/bin/bash\nexport PATH=\"$PATH:/home/ubuntu/.yarn/bin\"\nexport PATH=\"$PATH:/home/ubuntu/.nvm/versions/node/v12.16.3/bin\"\nrm /usr/share/nginx/client/status.json || echo '' > /dev/null\nrm /var/otp/otp-runner-manifest.json || echo '' > /dev/null\nrm /opt/otp/otp-latest-trimet-dev || echo '' > /dev/null\nrm /var/log/otp-runner.log || echo '' > /dev/null\nrm /var/log/otp-build.log || echo '' > /dev/null\nrm /var/log/otp-server.log || echo '' > /dev/null\naws s3 cp s3://datatools-dev/test-deploy/otp-runner-server-manifest.json /var/otp/otp-runner-manifest.json\nyarn global add https://github.com/ibi-group/otp-runner.git#master\notp-runner /var/otp/otp-runner-manifest.json"