From ab22ecc23d246aabc0f3d4b59fea6c06be21eb5b Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Fri, 26 Jan 2024 23:55:01 +0800 Subject: [PATCH 01/16] factor out method to get a single cutoff grid --- .../RegionalAnalysisController.java | 154 ++++++++++-------- 1 file changed, 83 insertions(+), 71 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java index b08cb8a9b..28c8bc65f 100644 --- a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java +++ b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java @@ -39,6 +39,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.List; +import java.util.Locale; import java.util.zip.GZIPOutputStream; import static com.conveyal.analysis.util.JsonUtil.toJson; @@ -156,6 +157,79 @@ private int getIntQueryParameter (Request req, String parameterName, int default } } + private FileStorageKey getSingleCutoffGrid ( + RegionalAnalysis analysis, + int cutoffIndex, + String destinationPointSetId, + int percentile, + FileStorageFormat fileFormat + ) throws IOException { + String regionalAnalysisId = analysis._id; + int cutoffMinutes = analysis.cutoffsMinutes != null ? analysis.cutoffsMinutes[cutoffIndex] : analysis.cutoffMinutes; + LOG.debug( + "Returning {} minute accessibility to pointset {} (percentile {}) for regional analysis {} in format {}.", + cutoffMinutes, destinationPointSetId, percentile, regionalAnalysisId, fileFormat + ); + // Analysis grids now have the percentile and cutoff in their S3 key, because there can be many of each. + // We do this even for results generated by older workers, so they will be re-extracted with the new name. + // These grids are reasonably small, we may be able to just send all cutoffs to the UI instead of selecting. + String singleCutoffKey = String.format( + "%s_%s_P%d_C%d.%s", + regionalAnalysisId, destinationPointSetId, percentile, cutoffMinutes, + fileFormat.extension.toLowerCase(Locale.ROOT) + ); + FileStorageKey singleCutoffFileStorageKey = new FileStorageKey(RESULTS, singleCutoffKey); + if (!fileStorage.exists(singleCutoffFileStorageKey)) { + // An accessibility grid for this particular cutoff has apparently never been extracted from the + // regional results file before. Extract one and save it for future reuse. Older regional analyses + // did not have arrays allowing multiple cutoffs, percentiles, or destination pointsets. The + // filenames of such regional accessibility results will not have a percentile or pointset ID. + // First try the newest form of regional results: multi-percentile, multi-destination-grid. + String multiCutoffKey = String.format("%s_%s_P%d.access", regionalAnalysisId, destinationPointSetId, percentile); + FileStorageKey multiCutoffFileStorageKey = new FileStorageKey(RESULTS, multiCutoffKey); + if (!fileStorage.exists(multiCutoffFileStorageKey)) { + LOG.warn("Falling back to older file name formats for regional results file: " + multiCutoffKey); + // Fall back to second-oldest form: multi-percentile, single destination grid. + multiCutoffKey = String.format("%s_P%d.access", regionalAnalysisId, percentile); + multiCutoffFileStorageKey = new FileStorageKey(RESULTS, multiCutoffKey); + if (fileStorage.exists(multiCutoffFileStorageKey)) { + checkArgument(analysis.destinationPointSetIds.length == 1); + } else { + // Fall back on oldest form of results, single-percentile, single-destination-grid. + multiCutoffKey = regionalAnalysisId + ".access"; + multiCutoffFileStorageKey = new FileStorageKey(RESULTS, multiCutoffKey); + if (fileStorage.exists(multiCutoffFileStorageKey)) { + checkArgument(analysis.travelTimePercentiles.length == 1); + checkArgument(analysis.destinationPointSetIds.length == 1); + } else { + throw AnalysisServerException.notFound("Cannot find original source regional analysis output."); + } + } + } + LOG.debug("Single-cutoff grid {} not found on S3, deriving it from {}.", singleCutoffKey, multiCutoffKey); + + InputStream multiCutoffInputStream = new FileInputStream(fileStorage.getFile(multiCutoffFileStorageKey)); + Grid grid = new SelectingGridReducer(cutoffIndex).compute(multiCutoffInputStream); + + File localFile = FileUtils.createScratchFile(fileFormat.toString()); + FileOutputStream fos = new FileOutputStream(localFile); + + switch (fileFormat) { + case GRID: + grid.write(new GZIPOutputStream(fos)); + break; + case PNG: + grid.writePng(fos); + break; + case GEOTIFF: + grid.writeGeotiff(fos); + break; + } + + fileStorage.moveIntoStorage(singleCutoffFileStorageKey, localFile); + } + } + /** * This used to extract a particular percentile of a regional analysis as a grid file. * Now it just gets the single percentile that exists for any one analysis, either from the local buffer file @@ -167,7 +241,7 @@ private Object getRegionalResults (Request req, Response res) throws IOException // The UUID of the regional analysis for which we want the output data final String regionalAnalysisId = req.params("_id"); // The response file format: PNG, TIFF, or GRID - final String fileFormatExtension = req.params("format"); + final String fileFormatExtension = req.params("format"); // FIXME this may be case sensitive (could make multiple files for different requests) RegionalAnalysis analysis = Persistence.regionalAnalyses.findPermitted( QueryBuilder.start("_id").is(req.params("_id")).get(), @@ -186,8 +260,7 @@ private Object getRegionalResults (Request req, Response res) throws IOException // For newer analyses that have multiple cutoffs, percentiles, or destination pointsets, these initial values // are coming from deprecated fields, are not meaningful and will be overwritten below from query parameters. int percentile = analysis.travelTimePercentile; - int cutoffMinutes = analysis.cutoffMinutes; - int cutoffIndex = 0; + int cutoffIndex = 0; // For old single-cutoff outputs, we can still select the zeroth grid. String destinationPointSetId = analysis.grid; // Handle newer regional analyses with multiple cutoffs in an array. @@ -196,7 +269,7 @@ private Object getRegionalResults (Request req, Response res) throws IOException if (analysis.cutoffsMinutes != null) { int nCutoffs = analysis.cutoffsMinutes.length; checkState(nCutoffs > 0, "Regional analysis has no cutoffs."); - cutoffMinutes = getIntQueryParameter(req, "cutoff", analysis.cutoffsMinutes[nCutoffs / 2]); + int cutoffMinutes = getIntQueryParameter(req, "cutoff", analysis.cutoffsMinutes[nCutoffs / 2]); cutoffIndex = new TIntArrayList(analysis.cutoffsMinutes).indexOf(cutoffMinutes); checkState(cutoffIndex >= 0, "Travel time cutoff for this regional analysis must be taken from this list: (%s)", @@ -228,83 +301,22 @@ private Object getRegionalResults (Request req, Response res) throws IOException "Destination gridId must be one of: %s", String.join(",", analysis.destinationPointSetIds)); } - // We started implementing the ability to retrieve and display partially completed analyses. // We eventually decided these should not be available here at the same endpoint as complete, immutable results. - if (broker.findJob(regionalAnalysisId) != null) { throw AnalysisServerException.notFound("Analysis is incomplete, no results file is available."); } - // FIXME It is possible that regional analysis is complete, but UI is trying to fetch gridded results when there - // aren't any (only CSV, because origins are freeform). - // How can we determine whether this analysis is expected to have no gridded results and cleanly return a 404? - - // The analysis has already completed, results should be stored and retrieved from S3 via redirects. - LOG.debug("Returning {} minute accessibility to pointset {} (percentile {}) for regional analysis {}.", - cutoffMinutes, destinationPointSetId, percentile, regionalAnalysisId); + // aren't any (only CSV, because origins are freeform). + // How should we determine whether this analysis is expected to have no gridded results and cleanly return a 404? FileStorageFormat format = FileStorageFormat.valueOf(fileFormatExtension.toUpperCase()); if (!FileStorageFormat.GRID.equals(format) && !FileStorageFormat.PNG.equals(format) && !FileStorageFormat.GEOTIFF.equals(format)) { throw AnalysisServerException.badRequest("Format \"" + format + "\" is invalid. Request format must be \"grid\", \"png\", or \"tiff\"."); } - - // Analysis grids now have the percentile and cutoff in their S3 key, because there can be many of each. - // We do this even for results generated by older workers, so they will be re-extracted with the new name. - // These grids are reasonably small, we may be able to just send all cutoffs to the UI instead of selecting. - String singleCutoffKey = - String.format("%s_%s_P%d_C%d.%s", regionalAnalysisId, destinationPointSetId, percentile, cutoffMinutes, fileFormatExtension); - - // A lot of overhead here - UI contacts backend, backend calls S3, backend responds to UI, UI contacts S3. - FileStorageKey singleCutoffFileStorageKey = new FileStorageKey(RESULTS, singleCutoffKey); - if (!fileStorage.exists(singleCutoffFileStorageKey)) { - // An accessibility grid for this particular cutoff has apparently never been extracted from the - // regional results file before. Extract one and save it for future reuse. Older regional analyses - // did not have arrays allowing multiple cutoffs, percentiles, or destination pointsets. The - // filenames of such regional accessibility results will not have a percentile or pointset ID. - // First try the newest form of regional results: multi-percentile, multi-destination-grid. - String multiCutoffKey = String.format("%s_%s_P%d.access", regionalAnalysisId, destinationPointSetId, percentile); - FileStorageKey multiCutoffFileStorageKey = new FileStorageKey(RESULTS, multiCutoffKey); - if (!fileStorage.exists(multiCutoffFileStorageKey)) { - LOG.warn("Falling back to older file name formats for regional results file: " + multiCutoffKey); - // Fall back to second-oldest form: multi-percentile, single destination grid. - multiCutoffKey = String.format("%s_P%d.access", regionalAnalysisId, percentile); - multiCutoffFileStorageKey = new FileStorageKey(RESULTS, multiCutoffKey); - if (fileStorage.exists(multiCutoffFileStorageKey)) { - checkArgument(analysis.destinationPointSetIds.length == 1); - } else { - // Fall back on oldest form of results, single-percentile, single-destination-grid. - multiCutoffKey = regionalAnalysisId + ".access"; - multiCutoffFileStorageKey = new FileStorageKey(RESULTS, multiCutoffKey); - if (fileStorage.exists(multiCutoffFileStorageKey)) { - checkArgument(analysis.travelTimePercentiles.length == 1); - checkArgument(analysis.destinationPointSetIds.length == 1); - } else { - throw AnalysisServerException.notFound("Cannot find original source regional analysis output."); - } - } - } - LOG.debug("Single-cutoff grid {} not found on S3, deriving it from {}.", singleCutoffKey, multiCutoffKey); - - InputStream multiCutoffInputStream = new FileInputStream(fileStorage.getFile(multiCutoffFileStorageKey)); - Grid grid = new SelectingGridReducer(cutoffIndex).compute(multiCutoffInputStream); - - File localFile = FileUtils.createScratchFile(format.toString()); - FileOutputStream fos = new FileOutputStream(localFile); - - switch (format) { - case GRID: - grid.write(new GZIPOutputStream(fos)); - break; - case PNG: - grid.writePng(fos); - break; - case GEOTIFF: - grid.writeGeotiff(fos); - break; - } - - fileStorage.moveIntoStorage(singleCutoffFileStorageKey, localFile); - } + // Process has a lot of overhead: UI contacts backend, backend calls S3, backend responds to UI, UI contacts S3. + FileStorageKey singleCutoffFileStorageKey = getSingleCutoffGrid( + analysis, cutoffIndex, destinationPointSetId, percentile, format + ); return JsonUtil.toJsonString( JsonUtil.objectNode().put("url", fileStorage.getURL(singleCutoffFileStorageKey)) ); From 67fbb896c13aacf327d03e8ee8943514c2a35d97 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Sat, 27 Jan 2024 01:32:17 +0800 Subject: [PATCH 02/16] proof of concept: return URL of zipped rasters --- .../RegionalAnalysisController.java | 63 ++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java index 28c8bc65f..1ca232832 100644 --- a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java +++ b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java @@ -21,6 +21,7 @@ import com.conveyal.r5.analyst.PointSetCache; import com.conveyal.r5.analyst.cluster.RegionalTask; import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.net.HttpHeaders; import com.google.common.primitives.Ints; import com.mongodb.QueryBuilder; import gnu.trove.list.array.TIntArrayList; @@ -35,11 +36,18 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.net.URI; +import java.nio.file.FileSystem; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.zip.GZIPOutputStream; import static com.conveyal.analysis.util.JsonUtil.toJson; @@ -166,7 +174,7 @@ private FileStorageKey getSingleCutoffGrid ( ) throws IOException { String regionalAnalysisId = analysis._id; int cutoffMinutes = analysis.cutoffsMinutes != null ? analysis.cutoffsMinutes[cutoffIndex] : analysis.cutoffMinutes; - LOG.debug( + LOG.info( "Returning {} minute accessibility to pointset {} (percentile {}) for regional analysis {} in format {}.", cutoffMinutes, destinationPointSetId, percentile, regionalAnalysisId, fileFormat ); @@ -228,6 +236,58 @@ private FileStorageKey getSingleCutoffGrid ( fileStorage.moveIntoStorage(singleCutoffFileStorageKey, localFile); } + return singleCutoffFileStorageKey; + } + + private Object getAllRegionalResults (Request req, Response res) throws IOException { + // Get the UUID of the regional analysis for which we want the output data. + final String regionalAnalysisId = req.params("_id"); + RegionalAnalysis analysis = Persistence.regionalAnalyses.findPermitted( + QueryBuilder.start("_id").is(req.params("_id")).get(), + DBProjection.exclude("request.scenario.modifications"), + UserPermissions.from(req) + ).iterator().next(); + if (analysis == null || analysis.deleted) { + throw AnalysisServerException.notFound("The specified regional analysis is unknown or has been deleted."); + } + List fileStorageKeys = new ArrayList<>(); + if (analysis.cutoffsMinutes == null || analysis.travelTimePercentiles == null || analysis.destinationPointSetIds == null) { + throw AnalysisServerException.badRequest("Batch result download is not available for legacy regional results."); + } + for (String destinationPointSetId : analysis.destinationPointSetIds) { + for (int cutoffIndex = 0; cutoffIndex < analysis.cutoffsMinutes.length; cutoffIndex++) { + for (int percentile : analysis.travelTimePercentiles) { + FileStorageKey fileStorageKey = getSingleCutoffGrid( + analysis, + cutoffIndex, + destinationPointSetId, + percentile, + FileStorageFormat.GEOTIFF + ); + fileStorageKeys.add(fileStorageKey); + } + } + } + File tempZipFile = File.createTempFile("regional", ".zip"); + tempZipFile.delete(); // FIXME: zipfs doesn't work on existing empty files, the file has to not exist. + tempZipFile.deleteOnExit(); + Map env = Map.of("create", "true"); + URI uri = URI.create("jar:file:" + tempZipFile.getAbsolutePath()); + try (FileSystem zipFilesystem = FileSystems.newFileSystem(uri, env)) { + for (FileStorageKey fileStorageKey : fileStorageKeys) { + Path storagePath = fileStorage.getFile(fileStorageKey).toPath(); + Path zipPath = zipFilesystem.getPath(storagePath.getFileName().toString()); + Files.copy(storagePath, zipPath, StandardCopyOption.REPLACE_EXISTING); + } + } + FileStorageKey fileStorageKey = new FileStorageKey(RESULTS, analysis._id + "_ALL.zip"); + fileStorage.moveIntoStorage(fileStorageKey, tempZipFile); + String humanReadableName = analysis.name + ".zip"; + res.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"%s\"".formatted(humanReadableName)); + return JsonUtil.toJsonString(JsonUtil.objectNode() + .put("url", fileStorage.getURL(fileStorageKey)) + .put("name", humanReadableName) + ); } /** @@ -563,6 +623,7 @@ public void registerEndpoints (spark.Service sparkService) { sparkService.path("/api/regional", () -> { // For grids, no transformer is supplied: render raw bytes or input stream rather than transforming to JSON. sparkService.get("/:_id", this::getRegionalAnalysis); + sparkService.get("/:_id/all", this::getAllRegionalResults); sparkService.get("/:_id/grid/:format", this::getRegionalResults); sparkService.get("/:_id/csv/:resultType", this::getCsvResults); sparkService.get("/:_id/scenarioJsonUrl", this::getScenarioJsonUrl); From c0bd7ea622225f898182761a31f5abb016879ced Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Sat, 27 Jan 2024 01:32:53 +0800 Subject: [PATCH 03/16] partially fix MIME type lookup by file extension this is not particularly efficient but it's only used in local operation --- .../analysis/controllers/LocalFilesController.java | 4 +++- .../java/com/conveyal/file/FileStorageFormat.java | 11 +++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/LocalFilesController.java b/src/main/java/com/conveyal/analysis/controllers/LocalFilesController.java index c92fbbb33..a5ef44f74 100644 --- a/src/main/java/com/conveyal/analysis/controllers/LocalFilesController.java +++ b/src/main/java/com/conveyal/analysis/controllers/LocalFilesController.java @@ -33,7 +33,9 @@ private Object getFile (Request req, Response res) throws Exception { FileStorageKey key = new FileStorageKey(category, filename); File file = fileStorage.getFile(key); FileStorageFormat format = FileStorageFormat.fromFilename(filename); - res.type(format.mimeType); + if (format != null) { + res.type(format.mimeType); + } // If the content-encoding is set to gzip, Spark automatically gzips the response. This double-gzips anything // that was already gzipped. Some of our files are already gzipped, and we rely on the the client browser to diff --git a/src/main/java/com/conveyal/file/FileStorageFormat.java b/src/main/java/com/conveyal/file/FileStorageFormat.java index c33569de9..e3b6e0fe0 100644 --- a/src/main/java/com/conveyal/file/FileStorageFormat.java +++ b/src/main/java/com/conveyal/file/FileStorageFormat.java @@ -1,5 +1,7 @@ package com.conveyal.file; +import java.util.Locale; + /** * An enumeration of all the file types we handle as uploads, derived internal data, or work products. * Really this should be a union of several enumerated types (upload/internal/product) but Java does not allow this. @@ -37,7 +39,12 @@ public enum FileStorageFormat { } public static FileStorageFormat fromFilename (String filename) { - String extension = filename.substring(filename.lastIndexOf(".") + 1); - return FileStorageFormat.valueOf(extension.toUpperCase()); + String extension = filename.substring(filename.lastIndexOf(".") + 1).toLowerCase(Locale.ROOT); + for (FileStorageFormat format : FileStorageFormat.values()) { + if (format.extension.equals(extension)) { + return format; + } + } + return null; } } From 980d60eb4d761d9d4812b1fa024580a58364ece8 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Sat, 27 Jan 2024 23:56:02 +0800 Subject: [PATCH 04/16] replace UUIDs with human readable filenames --- .../RegionalAnalysisController.java | 118 +++++++++++++----- .../analysis/results/BaseResultWriter.java | 1 + 2 files changed, 87 insertions(+), 32 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java index 1ca232832..7f6189322 100644 --- a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java +++ b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java @@ -45,9 +45,12 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.zip.GZIPOutputStream; import static com.conveyal.analysis.util.JsonUtil.toJson; @@ -165,6 +168,11 @@ private int getIntQueryParameter (Request req, String parameterName, int default } } + /** + * Get a regional analysis results raster for a single (percentile, cutoff, destination) combination, in one of + * several image file formats. This method was factored out for use from two different API endpoints, one for + * fetching a single grid, and another for fetching grids for all combinations of parameters at once. + */ private FileStorageKey getSingleCutoffGrid ( RegionalAnalysis analysis, int cutoffIndex, @@ -242,54 +250,100 @@ private FileStorageKey getSingleCutoffGrid ( private Object getAllRegionalResults (Request req, Response res) throws IOException { // Get the UUID of the regional analysis for which we want the output data. final String regionalAnalysisId = req.params("_id"); + final UserPermissions userPermissions = UserPermissions.from(req); RegionalAnalysis analysis = Persistence.regionalAnalyses.findPermitted( QueryBuilder.start("_id").is(req.params("_id")).get(), DBProjection.exclude("request.scenario.modifications"), - UserPermissions.from(req) + userPermissions ).iterator().next(); if (analysis == null || analysis.deleted) { throw AnalysisServerException.notFound("The specified regional analysis is unknown or has been deleted."); } - List fileStorageKeys = new ArrayList<>(); - if (analysis.cutoffsMinutes == null || analysis.travelTimePercentiles == null || analysis.destinationPointSetIds == null) { - throw AnalysisServerException.badRequest("Batch result download is not available for legacy regional results."); - } - for (String destinationPointSetId : analysis.destinationPointSetIds) { - for (int cutoffIndex = 0; cutoffIndex < analysis.cutoffsMinutes.length; cutoffIndex++) { - for (int percentile : analysis.travelTimePercentiles) { - FileStorageKey fileStorageKey = getSingleCutoffGrid( - analysis, - cutoffIndex, - destinationPointSetId, - percentile, - FileStorageFormat.GEOTIFF + FileStorageKey zippedResultsKey = new FileStorageKey(RESULTS, analysis._id + "_ALL.zip"); + // Keep only alphanumeric characters and underscores from user-specified analysis name. + String friendlyAnalysisName = filenameCleanString(analysis.name); + if (!fileStorage.exists(zippedResultsKey)) { + List fileStorageKeys = new ArrayList<>(); + if (analysis.cutoffsMinutes == null || analysis.travelTimePercentiles == null || analysis.destinationPointSetIds == null) { + throw AnalysisServerException.badRequest("Batch result download is not available for legacy regional results."); + } + // Map from cryptic UUIDs to human readable strings that can replace them. + Map friendlyReplacements = new HashMap<>(); + { + // Replace the regional analysis ID with its name. + friendlyReplacements.put(analysis._id, friendlyAnalysisName); + // Replace each destination point set ID with its name. + int d = 0; + for (String destinationPointSetId : analysis.destinationPointSetIds) { + OpportunityDataset opportunityDataset = Persistence.opportunityDatasets.findByIdIfPermitted( + destinationPointSetId, + userPermissions ); - fileStorageKeys.add(fileStorageKey); + checkNotNull(opportunityDataset, "Opportunity dataset could not be found in database."); + // Avoid name collisions, prepend an integer. + String friendlyDestinationName = "D%d_%s".formatted(d++, filenameCleanString(opportunityDataset.name)); + friendlyReplacements.put(destinationPointSetId, friendlyDestinationName); } } - } - File tempZipFile = File.createTempFile("regional", ".zip"); - tempZipFile.delete(); // FIXME: zipfs doesn't work on existing empty files, the file has to not exist. - tempZipFile.deleteOnExit(); - Map env = Map.of("create", "true"); - URI uri = URI.create("jar:file:" + tempZipFile.getAbsolutePath()); - try (FileSystem zipFilesystem = FileSystems.newFileSystem(uri, env)) { - for (FileStorageKey fileStorageKey : fileStorageKeys) { - Path storagePath = fileStorage.getFile(fileStorageKey).toPath(); - Path zipPath = zipFilesystem.getPath(storagePath.getFileName().toString()); - Files.copy(storagePath, zipPath, StandardCopyOption.REPLACE_EXISTING); + // Iterate over all combinations and generate one geotiff grid output for each one. + // TODO check whether origins are freeform: if (analysis.request.originPointSetKey == null) ? + for (String destinationPointSetId : analysis.destinationPointSetIds) { + for (int cutoffIndex = 0; cutoffIndex < analysis.cutoffsMinutes.length; cutoffIndex++) { + for (int percentile : analysis.travelTimePercentiles) { + FileStorageKey fileStorageKey = getSingleCutoffGrid( + analysis, + cutoffIndex, + destinationPointSetId, + percentile, + FileStorageFormat.GEOTIFF + ); + fileStorageKeys.add(fileStorageKey); + } + } + } + // Also include any CSV files that were generated. TODO these are gzipped, decompress and re-compress. + // for (String csvStorageName : analysis.resultStorage.values()) { + // LOG.info("Including {} in results zip file.", csvStorageName); + // FileStorageKey csvKey = new FileStorageKey(RESULTS, csvStorageName); + // fileStorageKeys.add(csvKey); + // } + File tempZipFile = File.createTempFile("regional", ".zip"); + // Zipfs can't open existing empty files, the file has to not exist. FIXME: Non-dangerous race condition + // Examining ZipFileSystemProvider reveals a "useTempFile" env parameter, but this is for the individual entries. + // May be better to just use zipOutputStream which would also allow gzip - zip CSV conversion. + tempZipFile.delete(); + Map env = Map.of("create", "true"); + URI uri = URI.create("jar:file:" + tempZipFile.getAbsolutePath()); + try (FileSystem zipFilesystem = FileSystems.newFileSystem(uri, env)) { + for (FileStorageKey fileStorageKey : fileStorageKeys) { + Path storagePath = fileStorage.getFile(fileStorageKey).toPath(); + String nameInZip = storagePath.getFileName().toString(); + // This is so inefficient but it should work. + for (String replacementKey : friendlyReplacements.keySet()) { + nameInZip = nameInZip.replace(replacementKey, friendlyReplacements.get(replacementKey)); + } + Path zipPath = zipFilesystem.getPath(nameInZip); + Files.copy(storagePath, zipPath, StandardCopyOption.REPLACE_EXISTING); + } } + fileStorage.moveIntoStorage(zippedResultsKey, tempZipFile); } - FileStorageKey fileStorageKey = new FileStorageKey(RESULTS, analysis._id + "_ALL.zip"); - fileStorage.moveIntoStorage(fileStorageKey, tempZipFile); - String humanReadableName = analysis.name + ".zip"; - res.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"%s\"".formatted(humanReadableName)); + String humanReadableZipName = friendlyAnalysisName + ".zip"; + res.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"%s\"".formatted(humanReadableZipName)); return JsonUtil.toJsonString(JsonUtil.objectNode() - .put("url", fileStorage.getURL(fileStorageKey)) - .put("name", humanReadableName) + .put("url", fileStorage.getURL(zippedResultsKey)) + .put("name", humanReadableZipName) ); } + private String filenameCleanString (String original) { + String ret = original.replaceAll("\\W+", "_"); + if (ret.length() > 20) { + ret = ret.substring(0, 20); + } + return ret; + } + /** * This used to extract a particular percentile of a regional analysis as a grid file. * Now it just gets the single percentile that exists for any one analysis, either from the local buffer file diff --git a/src/main/java/com/conveyal/analysis/results/BaseResultWriter.java b/src/main/java/com/conveyal/analysis/results/BaseResultWriter.java index df289c9fe..8bcf94d26 100644 --- a/src/main/java/com/conveyal/analysis/results/BaseResultWriter.java +++ b/src/main/java/com/conveyal/analysis/results/BaseResultWriter.java @@ -61,6 +61,7 @@ protected synchronized void finish (String fileName) throws IOException { // There's probably a more elegant way to do this with NIO and without closing the buffer. // That would be Files.copy(File.toPath(),X) or ByteStreams.copy. + // Perhaps better: we could wrap the output buffer in a gzip output stream and zip as we write out. InputStream is = new BufferedInputStream(new FileInputStream(bufferFile)); OutputStream os = new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(gzippedResultFile))); ByteStreams.copy(is, os); From 0bee308a87321313a00d307ca1925455b8e30309 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Fri, 2 Feb 2024 22:43:58 +0800 Subject: [PATCH 05/16] human-readable name together with download URLs This enables #673, but should be backward compatible with existing UI. JSON responses containing a URL have an accompanying humanName field. This can be used as the download attribute of an HTML link, or as the attachment name in a content-disposition header. CSV download still returns text/plain (requires JSON parsing on UI side) --- .../AggregationAreaController.java | 8 ++- .../OpportunityDatasetController.java | 30 ++++----- .../RegionalAnalysisController.java | 67 +++++++++---------- .../java/com/conveyal/file/FileStorage.java | 5 ++ .../com/conveyal/file/UrlWithHumanName.java | 39 +++++++++++ 5 files changed, 94 insertions(+), 55 deletions(-) create mode 100644 src/main/java/com/conveyal/file/UrlWithHumanName.java diff --git a/src/main/java/com/conveyal/analysis/controllers/AggregationAreaController.java b/src/main/java/com/conveyal/analysis/controllers/AggregationAreaController.java index 286fa5593..fae1637b6 100644 --- a/src/main/java/com/conveyal/analysis/controllers/AggregationAreaController.java +++ b/src/main/java/com/conveyal/analysis/controllers/AggregationAreaController.java @@ -10,6 +10,7 @@ import com.conveyal.analysis.persistence.AnalysisDB; import com.conveyal.analysis.util.JsonUtil; import com.conveyal.file.FileStorage; +import com.conveyal.file.UrlWithHumanName; import com.conveyal.r5.analyst.progress.Task; import com.fasterxml.jackson.databind.node.ObjectNode; import org.bson.conversions.Bson; @@ -27,6 +28,7 @@ import static com.google.common.base.Preconditions.checkNotNull; import static com.mongodb.client.model.Filters.and; import static com.mongodb.client.model.Filters.eq; +import static org.eclipse.jetty.http.MimeTypes.Type.APPLICATION_JSON; /** * Stores vector aggregationAreas (used to define the region of a weighted average accessibility metric). @@ -98,10 +100,10 @@ private Collection getAggregationAreas (Request req, Response r } /** Returns a JSON-wrapped URL for the mask grid of the aggregation area whose id matches the path parameter. */ - private ObjectNode getAggregationAreaGridUrl (Request req, Response res) { + private UrlWithHumanName getAggregationAreaGridUrl (Request req, Response res) { AggregationArea aggregationArea = aggregationAreaCollection.findPermittedByRequestParamId(req); - String url = fileStorage.getURL(aggregationArea.getStorageKey()); - return JsonUtil.objectNode().put("url", url); + res.type(APPLICATION_JSON.asString()); + return fileStorage.getJsonUrl(aggregationArea.getStorageKey(), aggregationArea.name, "grid"); } @Override diff --git a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java index b1afcfc05..c28363e0a 100644 --- a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java +++ b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java @@ -18,6 +18,7 @@ import com.conveyal.file.FileStorageFormat; import com.conveyal.file.FileStorageKey; import com.conveyal.file.FileUtils; +import com.conveyal.file.UrlWithHumanName; import com.conveyal.r5.analyst.FreeFormPointSet; import com.conveyal.r5.analyst.Grid; import com.conveyal.r5.analyst.PointSet; @@ -61,6 +62,7 @@ import static com.conveyal.file.FileCategory.GRIDS; import static com.conveyal.r5.analyst.WebMercatorExtents.parseZoom; import static com.conveyal.r5.analyst.progress.WorkProductType.OPPORTUNITY_DATASET; +import static org.eclipse.jetty.http.MimeTypes.Type.APPLICATION_JSON; /** * Controller that handles fetching opportunity datasets (grids and other pointset formats). @@ -94,10 +96,6 @@ public OpportunityDatasetController ( /** Store upload status objects FIXME trivial Javadoc */ private final List uploadStatuses = new ArrayList<>(); - private ObjectNode getJsonUrl (FileStorageKey key) { - return JsonUtil.objectNode().put("url", fileStorage.getURL(key)); - } - private void addStatusAndRemoveOldStatuses(OpportunityDatasetUploadStatus status) { uploadStatuses.add(status); LocalDateTime now = LocalDateTime.now(); @@ -113,10 +111,11 @@ private Collection getRegionDatasets(Request req, Response r ); } - private Object getOpportunityDataset(Request req, Response res) { + private UrlWithHumanName getOpportunityDataset(Request req, Response res) { OpportunityDataset dataset = Persistence.opportunityDatasets.findByIdFromRequestIfPermitted(req); if (dataset.format == FileStorageFormat.GRID) { - return getJsonUrl(dataset.getStorageKey()); + res.type(APPLICATION_JSON.asString()); + return fileStorage.getJsonUrl(dataset.getStorageKey(), dataset.sourceName + "_" + dataset.name, "grid"); } else { // Currently the UI can only visualize grids, not other kinds of datasets (freeform points). // We do generate a rasterized grid for each of the freeform pointsets we create, so ideally we'd redirect @@ -564,9 +563,10 @@ private List createGridsFromShapefile(List fileItems, * Respond to a request with a redirect to a downloadable file. * @param req should specify regionId, opportunityDatasetId, and an available download format (.tiff or .grid) */ - private Object downloadOpportunityDataset (Request req, Response res) throws IOException { + private UrlWithHumanName downloadOpportunityDataset (Request req, Response res) throws IOException { FileStorageFormat downloadFormat; String format = req.params("format"); + res.type(APPLICATION_JSON.asString()); try { downloadFormat = FileStorageFormat.valueOf(format.toUpperCase()); } catch (IllegalArgumentException iae) { @@ -576,38 +576,32 @@ private Object downloadOpportunityDataset (Request req, Response res) throws IOE String regionId = req.params("_id"); String gridKey = format; FileStorageKey storageKey = new FileStorageKey(GRIDS, String.format("%s/%s.grid", regionId, gridKey)); - return getJsonUrl(storageKey); + return fileStorage.getJsonUrl(storageKey, gridKey, "grid"); + } + if (FileStorageFormat.GRID.equals(downloadFormat)) { + return getOpportunityDataset(req, res); } - - if (FileStorageFormat.GRID.equals(downloadFormat)) return getOpportunityDataset(req, res); - final OpportunityDataset opportunityDataset = Persistence.opportunityDatasets.findByIdFromRequestIfPermitted(req); - FileStorageKey gridKey = opportunityDataset.getStorageKey(FileStorageFormat.GRID); FileStorageKey formatKey = opportunityDataset.getStorageKey(downloadFormat); - // if this grid is not on S3 in the requested format, try to get the .grid format if (!fileStorage.exists(gridKey)) { throw AnalysisServerException.notFound("Requested grid does not exist."); } - if (!fileStorage.exists(formatKey)) { // get the grid and convert it to the requested format File gridFile = fileStorage.getFile(gridKey); Grid grid = Grid.read(new GZIPInputStream(new FileInputStream(gridFile))); // closes input stream File localFile = FileUtils.createScratchFile(downloadFormat.toString()); FileOutputStream fos = new FileOutputStream(localFile); - if (FileStorageFormat.PNG.equals(downloadFormat)) { grid.writePng(fos); } else if (FileStorageFormat.GEOTIFF.equals(downloadFormat)) { grid.writeGeotiff(fos); } - fileStorage.moveIntoStorage(formatKey, localFile); } - - return getJsonUrl(formatKey); + return fileStorage.getJsonUrl(formatKey, opportunityDataset.sourceName + "_" + opportunityDataset.name, downloadFormat.extension); } /** diff --git a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java index 7f6189322..2d169187f 100644 --- a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java +++ b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java @@ -8,6 +8,7 @@ import com.conveyal.analysis.models.AnalysisRequest; import com.conveyal.analysis.models.OpportunityDataset; import com.conveyal.analysis.models.RegionalAnalysis; +import com.conveyal.file.UrlWithHumanName; import com.conveyal.analysis.persistence.Persistence; import com.conveyal.analysis.results.CsvResultType; import com.conveyal.analysis.util.JsonUtil; @@ -20,8 +21,6 @@ import com.conveyal.r5.analyst.PointSet; import com.conveyal.r5.analyst.PointSetCache; import com.conveyal.r5.analyst.cluster.RegionalTask; -import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.net.HttpHeaders; import com.google.common.primitives.Ints; import com.mongodb.QueryBuilder; import gnu.trove.list.array.TIntArrayList; @@ -46,20 +45,21 @@ import java.util.Arrays; import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Set; import java.util.zip.GZIPOutputStream; import static com.conveyal.analysis.util.JsonUtil.toJson; import static com.conveyal.file.FileCategory.BUNDLES; import static com.conveyal.file.FileCategory.RESULTS; +import static com.conveyal.file.UrlWithHumanName.filenameCleanString; import static com.conveyal.r5.transit.TransportNetworkCache.getScenarioFilename; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; +import static org.eclipse.jetty.http.MimeTypes.Type.APPLICATION_JSON; +import static org.eclipse.jetty.http.MimeTypes.Type.TEXT_HTML; /** * Spark HTTP handler methods that allow launching new regional analyses, as well as deleting them and fetching @@ -328,28 +328,15 @@ private Object getAllRegionalResults (Request req, Response res) throws IOExcept } fileStorage.moveIntoStorage(zippedResultsKey, tempZipFile); } - String humanReadableZipName = friendlyAnalysisName + ".zip"; - res.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"%s\"".formatted(humanReadableZipName)); - return JsonUtil.toJsonString(JsonUtil.objectNode() - .put("url", fileStorage.getURL(zippedResultsKey)) - .put("name", humanReadableZipName) - ); - } - - private String filenameCleanString (String original) { - String ret = original.replaceAll("\\W+", "_"); - if (ret.length() > 20) { - ret = ret.substring(0, 20); - } - return ret; +// res.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"%s\"".formatted(humanReadableZipName)); + res.type(APPLICATION_JSON.asString()); + return fileStorage.getJsonUrl(zippedResultsKey, analysis.name, "zip"); } /** * This used to extract a particular percentile of a regional analysis as a grid file. - * Now it just gets the single percentile that exists for any one analysis, either from the local buffer file - * for an analysis still in progress, or from S3 for a completed analysis. */ - private Object getRegionalResults (Request req, Response res) throws IOException { + private UrlWithHumanName getRegionalResults (Request req, Response res) throws IOException { // Get some path parameters out of the URL. // The UUID of the regional analysis for which we want the output data @@ -431,12 +418,20 @@ private Object getRegionalResults (Request req, Response res) throws IOException FileStorageKey singleCutoffFileStorageKey = getSingleCutoffGrid( analysis, cutoffIndex, destinationPointSetId, percentile, format ); - return JsonUtil.toJsonString( - JsonUtil.objectNode().put("url", fileStorage.getURL(singleCutoffFileStorageKey)) - ); + res.type(APPLICATION_JSON.asString()); + // Substitute human readable name and shorten destination UUID. + // TODO better system for resolving UUID to human readable names in single and multi-grid cases + // Or maybe only allow multi-grid download for saving by end user, and single grid is purely internal. + int firstUnderscore = singleCutoffFileStorageKey.path.indexOf('_'); + int secondUnderscore = singleCutoffFileStorageKey.path.indexOf('_', firstUnderscore + 1); + int lastDot = singleCutoffFileStorageKey.path.lastIndexOf('.'); + String humanName = analysis.name + + singleCutoffFileStorageKey.path.substring(firstUnderscore, firstUnderscore + 6) + + singleCutoffFileStorageKey.path.substring(secondUnderscore, lastDot); + return fileStorage.getJsonUrl(singleCutoffFileStorageKey, humanName, format.extension); } - private String getCsvResults (Request req, Response res) { + private Object getCsvResults (Request req, Response res) { final String regionalAnalysisId = req.params("_id"); final CsvResultType resultType = CsvResultType.valueOf(req.params("resultType").toUpperCase()); // If the resultType parameter received on the API is unrecognized, valueOf throws IllegalArgumentException @@ -458,7 +453,10 @@ private String getCsvResults (Request req, Response res) { FileStorageKey fileStorageKey = new FileStorageKey(RESULTS, storageKey); - res.type("text/plain"); + // TODO handle JSON with human name on UI side + // res.type(APPLICATION_JSON.asString()); + // return fileStorage.getJsonUrl(fileStorageKey, analysis.name, resultType + ".csv"); + res.type(TEXT_HTML.asString()); return fileStorage.getURL(fileStorageKey); } @@ -652,7 +650,7 @@ private RegionalAnalysis updateRegionalAnalysis (Request request, Response respo * Return a JSON-wrapped URL for the file in FileStorage containing the JSON representation of the scenario for * the given regional analysis. */ - private JsonNode getScenarioJsonUrl (Request request, Response response) { + private UrlWithHumanName getScenarioJsonUrl (Request request, Response response) { RegionalAnalysis regionalAnalysis = Persistence.regionalAnalyses.findByIdIfPermitted( request.params("_id"), DBProjection.exclude("request.scenario.modifications"), @@ -663,9 +661,9 @@ private JsonNode getScenarioJsonUrl (Request request, Response response) { final String scenarioId = regionalAnalysis.request.scenarioId; checkNotNull(networkId, "RegionalAnalysis did not contain a network ID."); checkNotNull(scenarioId, "RegionalAnalysis did not contain an embedded request with scenario ID."); - String scenarioUrl = fileStorage.getURL( - new FileStorageKey(BUNDLES, getScenarioFilename(regionalAnalysis.bundleId, scenarioId))); - return JsonUtil.objectNode().put("url", scenarioUrl); + FileStorageKey scenarioKey = new FileStorageKey(BUNDLES, getScenarioFilename(regionalAnalysis.bundleId, scenarioId)); + response.type(APPLICATION_JSON.asString()); + return fileStorage.getJsonUrl(scenarioKey, regionalAnalysis.name, "scenario.json"); } @Override @@ -676,11 +674,12 @@ public void registerEndpoints (spark.Service sparkService) { }); sparkService.path("/api/regional", () -> { // For grids, no transformer is supplied: render raw bytes or input stream rather than transforming to JSON. + // Wait, do we have any endpoints that write grids into responses? It looks like they all return URLs now. sparkService.get("/:_id", this::getRegionalAnalysis); - sparkService.get("/:_id/all", this::getAllRegionalResults); - sparkService.get("/:_id/grid/:format", this::getRegionalResults); - sparkService.get("/:_id/csv/:resultType", this::getCsvResults); - sparkService.get("/:_id/scenarioJsonUrl", this::getScenarioJsonUrl); + sparkService.get("/:_id/all", this::getAllRegionalResults, toJson); + sparkService.get("/:_id/grid/:format", this::getRegionalResults, toJson); + sparkService.get("/:_id/csv/:resultType", this::getCsvResults, toJson); + sparkService.get("/:_id/scenarioJsonUrl", this::getScenarioJsonUrl, toJson); sparkService.delete("/:_id", this::deleteRegionalAnalysis, toJson); sparkService.post("", this::createRegionalAnalysis, toJson); sparkService.put("/:_id", this::updateRegionalAnalysis, toJson); diff --git a/src/main/java/com/conveyal/file/FileStorage.java b/src/main/java/com/conveyal/file/FileStorage.java index 52de21316..8c5ef0122 100644 --- a/src/main/java/com/conveyal/file/FileStorage.java +++ b/src/main/java/com/conveyal/file/FileStorage.java @@ -94,4 +94,9 @@ default InputStream getInputStream (FileCategory fileCategory, String fileName) } } + default UrlWithHumanName getJsonUrl (FileStorageKey key, String rawHumanName, String humanExtension) { + String url = this.getURL(key); + return UrlWithHumanName.fromCleanedName(url, rawHumanName, humanExtension); + } + } diff --git a/src/main/java/com/conveyal/file/UrlWithHumanName.java b/src/main/java/com/conveyal/file/UrlWithHumanName.java new file mode 100644 index 000000000..c80815b93 --- /dev/null +++ b/src/main/java/com/conveyal/file/UrlWithHumanName.java @@ -0,0 +1,39 @@ +package com.conveyal.file; + +/** + * Combines a url for downloading a file, which might include a globally unique but human-annoying UUID, with a + * suggested human-readable name for that file when saved by an end user. The humanName may not be globally unique, + * so is only appropriate for cases where it doesn't need to be machine discoverable using a UUID. The humanName can + * be used as the download attribute of an HTML link, or as the attachment name in a content-disposition header. + * Instances of this record are intended to be serialized to JSON as an HTTP API response. + * // TODO make this into a class with factory methods and move static cleanFilename method here. + */ +public class UrlWithHumanName { + public final String url; + public final String humanName; + + public UrlWithHumanName (String url, String humanName) { + this.url = url; + this.humanName = humanName; + } + + private static final int TRUNCATE_FILENAME_CHARS = 220; + + /** + * Given an arbitrary string, make it safe for use as a friendly human-readable filename. + * This can yield non-unique strings and is intended for files downloaded by end users that do not need to be + * machine-discoverable by unique IDs. + */ + public static String filenameCleanString (String original) { + String ret = original.replaceAll("\\W+", "_"); + if (ret.length() > TRUNCATE_FILENAME_CHARS) { + ret = ret.substring(0, TRUNCATE_FILENAME_CHARS); + } + return ret; + } + + public static UrlWithHumanName fromCleanedName (String url, String rawHumanName, String humanExtension) { + String humanName = UrlWithHumanName.filenameCleanString(rawHumanName) + "." + humanExtension; + return new UrlWithHumanName(url, humanName); + } +} From 06d9beed01daae96a45780ba73a9a3af70954704 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Mon, 6 Nov 2023 17:43:07 +0800 Subject: [PATCH 06/16] include route, stop names in regional path results --- .../java/com/conveyal/r5/analyst/cluster/PathResult.java | 2 +- .../java/com/conveyal/r5/transit/path/RouteSequence.java | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java b/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java index c2366ac1a..99601291e 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java @@ -108,7 +108,7 @@ public ArrayList[] summarizeIterations(Stat stat) { int nIterations = iterations.size(); checkState(nIterations > 0, "A path was stored without any iterations"); String waits = null, transfer = null, totalTime = null; - String[] path = routeSequence.detailsWithGtfsIds(transitLayer); + String[] path = routeSequence.detailsWithGtfsIds(transitLayer, true); double targetValue; IntStream totalWaits = iterations.stream().mapToInt(i -> i.waitTimes.sum()); if (stat == Stat.MINIMUM) { diff --git a/src/main/java/com/conveyal/r5/transit/path/RouteSequence.java b/src/main/java/com/conveyal/r5/transit/path/RouteSequence.java index 6ed2eb73c..a5bdd71ae 100644 --- a/src/main/java/com/conveyal/r5/transit/path/RouteSequence.java +++ b/src/main/java/com/conveyal/r5/transit/path/RouteSequence.java @@ -28,15 +28,15 @@ public RouteSequence(PatternSequence patternSequence, TransitLayer transitLayer) } /** Returns details summarizing this route sequence, using GTFS ids stored in the supplied transitLayer. */ - public String[] detailsWithGtfsIds(TransitLayer transitLayer){ + public String[] detailsWithGtfsIds(TransitLayer transitLayer, boolean includeNames){ StringJoiner routeIds = new StringJoiner("|"); StringJoiner boardStopIds = new StringJoiner("|"); StringJoiner alightStopIds = new StringJoiner("|"); StringJoiner rideTimes = new StringJoiner("|"); for (int i = 0; i < routes.size(); i++) { - routeIds.add(transitLayer.routeString(routes.get(i), false)); - boardStopIds.add(transitLayer.stopString(stopSequence.boardStops.get(i), false)); - alightStopIds.add(transitLayer.stopString(stopSequence.alightStops.get(i), false)); + routeIds.add(transitLayer.routeString(routes.get(i), includeNames)); + boardStopIds.add(transitLayer.stopString(stopSequence.boardStops.get(i), includeNames)); + alightStopIds.add(transitLayer.stopString(stopSequence.alightStops.get(i), includeNames)); rideTimes.add(String.format("%.1f", stopSequence.rideTimesSeconds.get(i) / 60f)); } String accessTime = stopSequence.access == null ? null : String.format("%.1f", stopSequence.access.time / 60f); From d5f5becc8d2f9d0b14e8cafdc4468775f75cf9ba Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Sat, 3 Feb 2024 01:06:20 +0800 Subject: [PATCH 07/16] enable/disable ids and names csv with string flags --- .../analysis/models/AnalysisRequest.java | 7 +++ .../analyst/cluster/AnalysisWorkerTask.java | 7 +++ .../r5/analyst/cluster/PathResult.java | 19 ++++++- .../com/conveyal/r5/transit/TransitLayer.java | 57 ++++++++++++++----- .../r5/transit/path/RouteSequence.java | 17 +++--- 5 files changed, 86 insertions(+), 21 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java b/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java index 80d5e507a..f2249c7fe 100644 --- a/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java +++ b/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java @@ -21,6 +21,7 @@ import java.util.Collection; import java.util.EnumSet; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; /** @@ -175,6 +176,11 @@ public class AnalysisRequest { */ public int dualAccessibilityThreshold = 0; + /** + * Freeform (untyped) flags for enabling experimental, undocumented, or arcane behavior in backend or workers. + * This should be used to replace all previous special behavior flags that were embedded inside analysis names etc. + */ + public Set flags; /** * Create the R5 `Scenario` from this request. @@ -281,6 +287,7 @@ public void populateTask (AnalysisWorkerTask task, UserPermissions userPermissio task.includeTemporalDensity = includeTemporalDensity; task.dualAccessibilityThreshold = dualAccessibilityThreshold; + task.flags = flags; } private EnumSet getEnumSetFromString (String s) { diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/AnalysisWorkerTask.java b/src/main/java/com/conveyal/r5/analyst/cluster/AnalysisWorkerTask.java index 30266f171..d64996a15 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/AnalysisWorkerTask.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/AnalysisWorkerTask.java @@ -15,6 +15,7 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo; import java.util.Arrays; +import java.util.Set; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; @@ -177,6 +178,12 @@ public abstract class AnalysisWorkerTask extends ProfileRequest { */ public ChaosParameters injectFault; + /** + * Freeform (untyped) flags for enabling experimental, undocumented, or arcane worker behavior. + * This should be used to replace all previous special behavior flags that were embedded inside analysis names etc. + */ + public Set flags; + /** * Is this a single point or regional request? Needed to encode types in JSON serialization. Can that type field be * added automatically with a serializer annotation instead of by defining a getter method and two dummy methods? diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java b/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java index 99601291e..36e71699f 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java @@ -19,6 +19,9 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import static com.conveyal.r5.transit.TransitLayer.EntityRepresentation.ID_ONLY; +import static com.conveyal.r5.transit.TransitLayer.EntityRepresentation.ID_AND_NAME; +import static com.conveyal.r5.transit.TransitLayer.EntityRepresentation.NAME_ONLY; import static com.google.common.base.Preconditions.checkState; /** @@ -47,8 +50,11 @@ public class PathResult { * With additional changes, patterns could be collapsed further to route combinations or modes. */ public final Multimap[] iterationsForPathTemplates; + private final TransitLayer transitLayer; + private TransitLayer.EntityRepresentation nameOrId; + public static final String[] DATA_COLUMNS = new String[]{ "routes", "boardStops", @@ -76,6 +82,17 @@ public PathResult(AnalysisWorkerTask task, TransitLayer transitLayer) { } iterationsForPathTemplates = new Multimap[nDestinations]; this.transitLayer = transitLayer; + if (task.flags == null) { + nameOrId = ID_ONLY; + } else { + boolean includeEntityNames = task.flags.contains("csv_names"); + boolean includeEntityIds = !task.flags.contains("csv_no_ids"); + if (includeEntityIds && includeEntityNames) { + this.nameOrId = ID_AND_NAME; + } else if (includeEntityNames) { + this.nameOrId = NAME_ONLY; + } + } } /** @@ -108,7 +125,7 @@ public ArrayList[] summarizeIterations(Stat stat) { int nIterations = iterations.size(); checkState(nIterations > 0, "A path was stored without any iterations"); String waits = null, transfer = null, totalTime = null; - String[] path = routeSequence.detailsWithGtfsIds(transitLayer, true); + String[] path = routeSequence.detailsWithGtfsIds(transitLayer, nameOrId); double targetValue; IntStream totalWaits = iterations.stream().mapToInt(i -> i.waitTimes.sum()); if (stat == Stat.MINIMUM) { diff --git a/src/main/java/com/conveyal/r5/transit/TransitLayer.java b/src/main/java/com/conveyal/r5/transit/TransitLayer.java index 871491ff2..a025ba4d0 100644 --- a/src/main/java/com/conveyal/r5/transit/TransitLayer.java +++ b/src/main/java/com/conveyal/r5/transit/TransitLayer.java @@ -54,6 +54,9 @@ import java.util.stream.IntStream; import java.util.stream.StreamSupport; +import static com.conveyal.r5.transit.TransitLayer.EntityRepresentation.ID_ONLY; +import static com.conveyal.r5.transit.TransitLayer.EntityRepresentation.NAME_ONLY; + /** * A key simplifying factor is that we don't handle overnight trips. This is fine for analysis at usual times of day. @@ -815,31 +818,59 @@ public TIntSet findStopsInGeometry (Geometry geometry) { return stops; } + public enum EntityRepresentation { + ID_ONLY, NAME_ONLY, ID_AND_NAME + } + /** * For the given pattern index, returns the GTFS routeId. If includeName is true, the returned string will * also include a route_short_name or route_long_name (if they are not null). */ - public String routeString(int routeIndex, boolean includeName) { + public String routeString (int routeIndex, EntityRepresentation nameOrId) { RouteInfo routeInfo = routes.get(routeIndex); - String route = routeInfo.route_id; - if (includeName) { - if (routeInfo.route_short_name != null) { - route += " (" + routeInfo.route_short_name + ")"; - } else if (routeInfo.route_long_name != null){ - route += " (" + routeInfo.route_long_name + ")"; + String name = routeInfo.route_short_name; + String id = routeInfo.route_id; + // If we might actually use the name, check some fallbacks. + if (nameOrId != ID_ONLY) { + if (name == null) { + name = routeInfo.route_long_name; + } + if (name == null) { + name = routeInfo.route_id; } } - return route; + return switch (nameOrId) { + case NAME_ONLY -> name; + case ID_AND_NAME -> id + " (" + name + ")"; + default -> id; + }; } /** * For the given stop index, returns the GTFS stopId (stripped of R5's feedId prefix) and, if includeName is true, * stopName. */ - public String stopString(int stopIndex, boolean includeName) { - // TODO use a compact feed index, instead of splitting to remove feedIds - String stop = stopIdForIndex.get(stopIndex) == null ? "[new]" : stopIdForIndex.get(stopIndex).split(":")[1]; - if (includeName) stop += " (" + stopNames.get(stopIndex) + ")"; - return stop; + public String stopString(int stopIndex, EntityRepresentation nameOrId) { + String stopId = stopIdForIndex.get(stopIndex); + String stopName = stopNames.get(stopIndex); + // I'd trust the JVM JIT to optimize out these assignments on different code paths, but not the split call. + if (nameOrId != NAME_ONLY) { + if (stopId == null) { + stopId = "[new]"; + } else { + // TODO use a compact feed ID instead of splitting to remove feedIds (or put feedId into another CSV field) + stopId = stopId.split(":")[1]; + } + } + if (nameOrId != ID_ONLY) { + if (stopName == null) { + stopName = "[new]"; + } + } + return switch (nameOrId) { + case NAME_ONLY -> stopName; + case ID_AND_NAME -> stopId + " (" + stopName + ")"; + default -> stopId; + }; } } diff --git a/src/main/java/com/conveyal/r5/transit/path/RouteSequence.java b/src/main/java/com/conveyal/r5/transit/path/RouteSequence.java index a5bdd71ae..3bb4f512c 100644 --- a/src/main/java/com/conveyal/r5/transit/path/RouteSequence.java +++ b/src/main/java/com/conveyal/r5/transit/path/RouteSequence.java @@ -1,6 +1,7 @@ package com.conveyal.r5.transit.path; import com.conveyal.r5.transit.TransitLayer; +import com.conveyal.r5.transit.TransitLayer.EntityRepresentation; import gnu.trove.list.TIntList; import gnu.trove.list.array.TIntArrayList; @@ -9,6 +10,8 @@ import java.util.Objects; import java.util.StringJoiner; +import static com.conveyal.r5.transit.TransitLayer.EntityRepresentation.ID_AND_NAME; + /** A door-to-door path that includes the routes ridden between stops */ public class RouteSequence { @@ -28,15 +31,15 @@ public RouteSequence(PatternSequence patternSequence, TransitLayer transitLayer) } /** Returns details summarizing this route sequence, using GTFS ids stored in the supplied transitLayer. */ - public String[] detailsWithGtfsIds(TransitLayer transitLayer, boolean includeNames){ + public String[] detailsWithGtfsIds (TransitLayer transitLayer, EntityRepresentation nameOrId){ StringJoiner routeIds = new StringJoiner("|"); StringJoiner boardStopIds = new StringJoiner("|"); StringJoiner alightStopIds = new StringJoiner("|"); StringJoiner rideTimes = new StringJoiner("|"); for (int i = 0; i < routes.size(); i++) { - routeIds.add(transitLayer.routeString(routes.get(i), includeNames)); - boardStopIds.add(transitLayer.stopString(stopSequence.boardStops.get(i), includeNames)); - alightStopIds.add(transitLayer.stopString(stopSequence.alightStops.get(i), includeNames)); + routeIds.add(transitLayer.routeString(routes.get(i), nameOrId)); + boardStopIds.add(transitLayer.stopString(stopSequence.boardStops.get(i), nameOrId)); + alightStopIds.add(transitLayer.stopString(stopSequence.alightStops.get(i), nameOrId)); rideTimes.add(String.format("%.1f", stopSequence.rideTimesSeconds.get(i) / 60f)); } String accessTime = stopSequence.access == null ? null : String.format("%.1f", stopSequence.access.time / 60f); @@ -55,9 +58,9 @@ public String[] detailsWithGtfsIds(TransitLayer transitLayer, boolean includeNam public Collection transitLegs(TransitLayer transitLayer) { Collection transitLegs = new ArrayList<>(); for (int i = 0; i < routes.size(); i++) { - String routeString = transitLayer.routeString(routes.get(i), true); - String boardStop = transitLayer.stopString(stopSequence.boardStops.get(i), true); - String alightStop = transitLayer.stopString(stopSequence.alightStops.get(i), true); + String routeString = transitLayer.routeString(routes.get(i), ID_AND_NAME); + String boardStop = transitLayer.stopString(stopSequence.boardStops.get(i), ID_AND_NAME); + String alightStop = transitLayer.stopString(stopSequence.alightStops.get(i), ID_AND_NAME); transitLegs.add(new TransitLeg(routeString, stopSequence.rideTimesSeconds.get(i), boardStop, alightStop)); } return transitLegs; From 5e1ccd0c197958a06dad2c542724d7cf1c7ca10a Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Fri, 16 Feb 2024 23:55:47 +0800 Subject: [PATCH 08/16] Remove obsolete comment --- src/main/java/com/conveyal/file/UrlWithHumanName.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/com/conveyal/file/UrlWithHumanName.java b/src/main/java/com/conveyal/file/UrlWithHumanName.java index c80815b93..8cf447c75 100644 --- a/src/main/java/com/conveyal/file/UrlWithHumanName.java +++ b/src/main/java/com/conveyal/file/UrlWithHumanName.java @@ -6,7 +6,6 @@ * so is only appropriate for cases where it doesn't need to be machine discoverable using a UUID. The humanName can * be used as the download attribute of an HTML link, or as the attachment name in a content-disposition header. * Instances of this record are intended to be serialized to JSON as an HTTP API response. - * // TODO make this into a class with factory methods and move static cleanFilename method here. */ public class UrlWithHumanName { public final String url; From e11fc19e5af413da331bce40c6ced687be5ba8c2 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Sat, 17 Feb 2024 00:45:23 +0800 Subject: [PATCH 09/16] nested CsvResultOptions in requests reverse order of name and ID in output and enum. string flags are maintained in request but no longer used. deleted code serves as example of how to use flags. --- .../analysis/models/AnalysisRequest.java | 6 ++++++ .../analysis/models/CsvResultOptions.java | 18 +++++++++++++++++ .../analyst/cluster/AnalysisWorkerTask.java | 4 ++++ .../r5/analyst/cluster/PathResult.java | 20 ++++--------------- .../com/conveyal/r5/transit/TransitLayer.java | 6 +++--- .../r5/transit/path/RouteSequence.java | 17 ++++++++-------- 6 files changed, 44 insertions(+), 27 deletions(-) create mode 100644 src/main/java/com/conveyal/analysis/models/CsvResultOptions.java diff --git a/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java b/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java index f2249c7fe..3bff888e8 100644 --- a/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java +++ b/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java @@ -12,8 +12,10 @@ import com.conveyal.r5.analyst.scenario.Scenario; import com.conveyal.r5.api.util.LegMode; import com.conveyal.r5.api.util.TransitModes; +import com.conveyal.r5.transit.TransitLayer; import com.mongodb.QueryBuilder; import org.apache.commons.codec.digest.DigestUtils; +import org.checkerframework.checker.units.qual.C; import java.time.LocalDate; import java.util.ArrayList; @@ -182,6 +184,9 @@ public class AnalysisRequest { */ public Set flags; + /** Control the details of CSV regional analysis output, including whether to output IDs, names, or both. */ + public CsvResultOptions csvResultOptions = new CsvResultOptions(); + /** * Create the R5 `Scenario` from this request. */ @@ -288,6 +293,7 @@ public void populateTask (AnalysisWorkerTask task, UserPermissions userPermissio task.includeTemporalDensity = includeTemporalDensity; task.dualAccessibilityThreshold = dualAccessibilityThreshold; task.flags = flags; + task.csvResultOptions = csvResultOptions; } private EnumSet getEnumSetFromString (String s) { diff --git a/src/main/java/com/conveyal/analysis/models/CsvResultOptions.java b/src/main/java/com/conveyal/analysis/models/CsvResultOptions.java new file mode 100644 index 000000000..c21b52aa3 --- /dev/null +++ b/src/main/java/com/conveyal/analysis/models/CsvResultOptions.java @@ -0,0 +1,18 @@ +package com.conveyal.analysis.models; + +import com.conveyal.r5.transit.TransitLayer; +import com.conveyal.r5.transit.TransitLayer.EntityRepresentation; + +import static com.conveyal.r5.transit.TransitLayer.EntityRepresentation.ID_ONLY; + +/** + * API model type included in analysis requests to control details of CSV regional analysis output. + * This type is shared between AnalysisRequest (Frontend -> Broker) and AnalysisWorkerTask (Broker -> Workers). + * There is precedent for nested compound types shared across those top level request types (see DecayFunction). + */ +public class CsvResultOptions { + public EntityRepresentation routeRepresentation = ID_ONLY; + public EntityRepresentation stopRepresentation = ID_ONLY; + // Only feed ID representation is allowed to be null (no feed IDs at all, the default). + public EntityRepresentation feedRepresentation = null; +} diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/AnalysisWorkerTask.java b/src/main/java/com/conveyal/r5/analyst/cluster/AnalysisWorkerTask.java index d64996a15..2698905fa 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/AnalysisWorkerTask.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/AnalysisWorkerTask.java @@ -1,5 +1,6 @@ package com.conveyal.r5.analyst.cluster; +import com.conveyal.analysis.models.CsvResultOptions; import com.conveyal.r5.analyst.FreeFormPointSet; import com.conveyal.r5.analyst.Grid; import com.conveyal.r5.analyst.GridTransformWrapper; @@ -184,6 +185,9 @@ public abstract class AnalysisWorkerTask extends ProfileRequest { */ public Set flags; + /** Control the details of CSV regional analysis output, including whether to output IDs, names, or both. */ + public CsvResultOptions csvResultOptions; + /** * Is this a single point or regional request? Needed to encode types in JSON serialization. Can that type field be * added automatically with a serializer annotation instead of by defining a getter method and two dummy methods? diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java b/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java index 36e71699f..dd42972b1 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java @@ -1,5 +1,6 @@ package com.conveyal.r5.analyst.cluster; +import com.conveyal.analysis.models.CsvResultOptions; import com.conveyal.r5.analyst.StreetTimesAndModes; import com.conveyal.r5.transit.TransitLayer; import com.conveyal.r5.transit.path.Path; @@ -19,9 +20,6 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; -import static com.conveyal.r5.transit.TransitLayer.EntityRepresentation.ID_ONLY; -import static com.conveyal.r5.transit.TransitLayer.EntityRepresentation.ID_AND_NAME; -import static com.conveyal.r5.transit.TransitLayer.EntityRepresentation.NAME_ONLY; import static com.google.common.base.Preconditions.checkState; /** @@ -53,7 +51,7 @@ public class PathResult { private final TransitLayer transitLayer; - private TransitLayer.EntityRepresentation nameOrId; + private final CsvResultOptions csvOptions; public static final String[] DATA_COLUMNS = new String[]{ "routes", @@ -82,17 +80,7 @@ public PathResult(AnalysisWorkerTask task, TransitLayer transitLayer) { } iterationsForPathTemplates = new Multimap[nDestinations]; this.transitLayer = transitLayer; - if (task.flags == null) { - nameOrId = ID_ONLY; - } else { - boolean includeEntityNames = task.flags.contains("csv_names"); - boolean includeEntityIds = !task.flags.contains("csv_no_ids"); - if (includeEntityIds && includeEntityNames) { - this.nameOrId = ID_AND_NAME; - } else if (includeEntityNames) { - this.nameOrId = NAME_ONLY; - } - } + this.csvOptions = task.csvResultOptions; } /** @@ -125,7 +113,7 @@ public ArrayList[] summarizeIterations(Stat stat) { int nIterations = iterations.size(); checkState(nIterations > 0, "A path was stored without any iterations"); String waits = null, transfer = null, totalTime = null; - String[] path = routeSequence.detailsWithGtfsIds(transitLayer, nameOrId); + String[] path = routeSequence.detailsWithGtfsIds(transitLayer, csvOptions); double targetValue; IntStream totalWaits = iterations.stream().mapToInt(i -> i.waitTimes.sum()); if (stat == Stat.MINIMUM) { diff --git a/src/main/java/com/conveyal/r5/transit/TransitLayer.java b/src/main/java/com/conveyal/r5/transit/TransitLayer.java index a025ba4d0..7ec85e6fa 100644 --- a/src/main/java/com/conveyal/r5/transit/TransitLayer.java +++ b/src/main/java/com/conveyal/r5/transit/TransitLayer.java @@ -819,7 +819,7 @@ public TIntSet findStopsInGeometry (Geometry geometry) { } public enum EntityRepresentation { - ID_ONLY, NAME_ONLY, ID_AND_NAME + ID_ONLY, NAME_ONLY, NAME_AND_ID } /** @@ -841,7 +841,7 @@ public String routeString (int routeIndex, EntityRepresentation nameOrId) { } return switch (nameOrId) { case NAME_ONLY -> name; - case ID_AND_NAME -> id + " (" + name + ")"; + case NAME_AND_ID -> name + " (" + id + ")"; default -> id; }; } @@ -869,7 +869,7 @@ public String stopString(int stopIndex, EntityRepresentation nameOrId) { } return switch (nameOrId) { case NAME_ONLY -> stopName; - case ID_AND_NAME -> stopId + " (" + stopName + ")"; + case NAME_AND_ID -> stopName + " (" + stopId + ")"; default -> stopId; }; } diff --git a/src/main/java/com/conveyal/r5/transit/path/RouteSequence.java b/src/main/java/com/conveyal/r5/transit/path/RouteSequence.java index 3bb4f512c..1e60478f3 100644 --- a/src/main/java/com/conveyal/r5/transit/path/RouteSequence.java +++ b/src/main/java/com/conveyal/r5/transit/path/RouteSequence.java @@ -1,5 +1,6 @@ package com.conveyal.r5.transit.path; +import com.conveyal.analysis.models.CsvResultOptions; import com.conveyal.r5.transit.TransitLayer; import com.conveyal.r5.transit.TransitLayer.EntityRepresentation; import gnu.trove.list.TIntList; @@ -10,7 +11,7 @@ import java.util.Objects; import java.util.StringJoiner; -import static com.conveyal.r5.transit.TransitLayer.EntityRepresentation.ID_AND_NAME; +import static com.conveyal.r5.transit.TransitLayer.EntityRepresentation.NAME_AND_ID; /** A door-to-door path that includes the routes ridden between stops */ public class RouteSequence { @@ -31,15 +32,15 @@ public RouteSequence(PatternSequence patternSequence, TransitLayer transitLayer) } /** Returns details summarizing this route sequence, using GTFS ids stored in the supplied transitLayer. */ - public String[] detailsWithGtfsIds (TransitLayer transitLayer, EntityRepresentation nameOrId){ + public String[] detailsWithGtfsIds (TransitLayer transitLayer, CsvResultOptions csvOptions){ StringJoiner routeIds = new StringJoiner("|"); StringJoiner boardStopIds = new StringJoiner("|"); StringJoiner alightStopIds = new StringJoiner("|"); StringJoiner rideTimes = new StringJoiner("|"); for (int i = 0; i < routes.size(); i++) { - routeIds.add(transitLayer.routeString(routes.get(i), nameOrId)); - boardStopIds.add(transitLayer.stopString(stopSequence.boardStops.get(i), nameOrId)); - alightStopIds.add(transitLayer.stopString(stopSequence.alightStops.get(i), nameOrId)); + routeIds.add(transitLayer.routeString(routes.get(i), csvOptions.routeRepresentation)); + boardStopIds.add(transitLayer.stopString(stopSequence.boardStops.get(i), csvOptions.stopRepresentation)); + alightStopIds.add(transitLayer.stopString(stopSequence.alightStops.get(i), csvOptions.stopRepresentation)); rideTimes.add(String.format("%.1f", stopSequence.rideTimesSeconds.get(i) / 60f)); } String accessTime = stopSequence.access == null ? null : String.format("%.1f", stopSequence.access.time / 60f); @@ -58,9 +59,9 @@ public String[] detailsWithGtfsIds (TransitLayer transitLayer, EntityRepresentat public Collection transitLegs(TransitLayer transitLayer) { Collection transitLegs = new ArrayList<>(); for (int i = 0; i < routes.size(); i++) { - String routeString = transitLayer.routeString(routes.get(i), ID_AND_NAME); - String boardStop = transitLayer.stopString(stopSequence.boardStops.get(i), ID_AND_NAME); - String alightStop = transitLayer.stopString(stopSequence.alightStops.get(i), ID_AND_NAME); + String routeString = transitLayer.routeString(routes.get(i), NAME_AND_ID); + String boardStop = transitLayer.stopString(stopSequence.boardStops.get(i), NAME_AND_ID); + String alightStop = transitLayer.stopString(stopSequence.alightStops.get(i), NAME_AND_ID); transitLegs.add(new TransitLeg(routeString, stopSequence.rideTimesSeconds.get(i), boardStop, alightStop)); } return transitLegs; From 119ffc149a5c7ebe0b2b0e033f9560652f10b2c9 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Thu, 29 Feb 2024 11:46:55 +0900 Subject: [PATCH 10/16] Update src/main/java/com/conveyal/analysis/models/AnalysisRequest.java Co-authored-by: Anson Stewart --- src/main/java/com/conveyal/analysis/models/AnalysisRequest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java b/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java index 3bff888e8..95944f262 100644 --- a/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java +++ b/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java @@ -12,7 +12,6 @@ import com.conveyal.r5.analyst.scenario.Scenario; import com.conveyal.r5.api.util.LegMode; import com.conveyal.r5.api.util.TransitModes; -import com.conveyal.r5.transit.TransitLayer; import com.mongodb.QueryBuilder; import org.apache.commons.codec.digest.DigestUtils; import org.checkerframework.checker.units.qual.C; From c92bf7ba245d25c83e35055e045c4b0f81a47b39 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Thu, 29 Feb 2024 11:48:40 +0900 Subject: [PATCH 11/16] Update src/main/java/com/conveyal/analysis/models/CsvResultOptions.java Co-authored-by: Anson Stewart --- src/main/java/com/conveyal/analysis/models/CsvResultOptions.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/com/conveyal/analysis/models/CsvResultOptions.java b/src/main/java/com/conveyal/analysis/models/CsvResultOptions.java index c21b52aa3..e925e5ff3 100644 --- a/src/main/java/com/conveyal/analysis/models/CsvResultOptions.java +++ b/src/main/java/com/conveyal/analysis/models/CsvResultOptions.java @@ -1,6 +1,5 @@ package com.conveyal.analysis.models; -import com.conveyal.r5.transit.TransitLayer; import com.conveyal.r5.transit.TransitLayer.EntityRepresentation; import static com.conveyal.r5.transit.TransitLayer.EntityRepresentation.ID_ONLY; From eca7051f1e59db545bff9249ad9d2aafd6c12ccc Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Thu, 29 Feb 2024 11:48:46 +0900 Subject: [PATCH 12/16] Update src/main/java/com/conveyal/analysis/models/AnalysisRequest.java Co-authored-by: Anson Stewart --- src/main/java/com/conveyal/analysis/models/AnalysisRequest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java b/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java index 95944f262..cda79c9b8 100644 --- a/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java +++ b/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java @@ -14,7 +14,6 @@ import com.conveyal.r5.api.util.TransitModes; import com.mongodb.QueryBuilder; import org.apache.commons.codec.digest.DigestUtils; -import org.checkerframework.checker.units.qual.C; import java.time.LocalDate; import java.util.ArrayList; From ac21c90362c100b0c0e84754d5e55e6e4c2b551f Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Thu, 21 Mar 2024 00:25:39 +0800 Subject: [PATCH 13/16] human readable names without string chopping also use last instead of first five chars of UUID --- .../RegionalAnalysisController.java | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java index 2d169187f..0b1af4d35 100644 --- a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java +++ b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java @@ -267,7 +267,7 @@ private Object getAllRegionalResults (Request req, Response res) throws IOExcept if (analysis.cutoffsMinutes == null || analysis.travelTimePercentiles == null || analysis.destinationPointSetIds == null) { throw AnalysisServerException.badRequest("Batch result download is not available for legacy regional results."); } - // Map from cryptic UUIDs to human readable strings that can replace them. + // Map from cryptic UUIDs to human-readable strings that can replace them. Map friendlyReplacements = new HashMap<>(); { // Replace the regional analysis ID with its name. @@ -414,21 +414,22 @@ private UrlWithHumanName getRegionalResults (Request req, Response res) throws I if (!FileStorageFormat.GRID.equals(format) && !FileStorageFormat.PNG.equals(format) && !FileStorageFormat.GEOTIFF.equals(format)) { throw AnalysisServerException.badRequest("Format \"" + format + "\" is invalid. Request format must be \"grid\", \"png\", or \"tiff\"."); } - // Process has a lot of overhead: UI contacts backend, backend calls S3, backend responds to UI, UI contacts S3. + // Significant overhead here: UI contacts backend, backend calls S3, backend responds to UI, UI contacts S3. FileStorageKey singleCutoffFileStorageKey = getSingleCutoffGrid( analysis, cutoffIndex, destinationPointSetId, percentile, format ); + // Create alternative human-readable filename. Similar to internal storage key name, but using a shortened + // ID for the destinations and the user-specified name for the project. This project name may contain invalid + // characters, but will be sanitized by getJsonUrl. We use the end of the dataset ID rather than the beginning + // because it's more likely to be different when several datasets are created in rapid succession. + String shortDestinationId = destinationPointSetId.substring( + destinationPointSetId.length() - 5, destinationPointSetId.length() + ); + int cutoffMinutes = + analysis.cutoffsMinutes != null ? analysis.cutoffsMinutes[cutoffIndex] : analysis.cutoffMinutes; + String rawHumanName = String.format("%s_%s_P%d_C%d", analysis.name, shortDestinationId, percentile, cutoffMinutes); res.type(APPLICATION_JSON.asString()); - // Substitute human readable name and shorten destination UUID. - // TODO better system for resolving UUID to human readable names in single and multi-grid cases - // Or maybe only allow multi-grid download for saving by end user, and single grid is purely internal. - int firstUnderscore = singleCutoffFileStorageKey.path.indexOf('_'); - int secondUnderscore = singleCutoffFileStorageKey.path.indexOf('_', firstUnderscore + 1); - int lastDot = singleCutoffFileStorageKey.path.lastIndexOf('.'); - String humanName = analysis.name + - singleCutoffFileStorageKey.path.substring(firstUnderscore, firstUnderscore + 6) + - singleCutoffFileStorageKey.path.substring(secondUnderscore, lastDot); - return fileStorage.getJsonUrl(singleCutoffFileStorageKey, humanName, format.extension); + return fileStorage.getJsonUrl(singleCutoffFileStorageKey, rawHumanName, format.extension.toLowerCase(Locale.ROOT)); } private Object getCsvResults (Request req, Response res) { From e42274e93bd16acbbc2acebe896029fe65bda192 Mon Sep 17 00:00:00 2001 From: Anson Stewart Date: Wed, 20 Mar 2024 21:57:04 -0400 Subject: [PATCH 14/16] Add feed id column to path results (sequel) (#936) * Write feedIds as separate column feedIds are derived from boarding stops * Add "group" column in CSV path results For future use (see #924, for example) --- .../r5/analyst/cluster/PathResult.java | 9 ++++- .../com/conveyal/r5/transit/TransitLayer.java | 7 ++++ .../r5/transit/path/RouteSequence.java | 40 +++++++++++++------ 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java b/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java index dd42972b1..43e617920 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java @@ -57,13 +57,15 @@ public class PathResult { "routes", "boardStops", "alightStops", + "feedIds", "rideTimes", "accessTime", "egressTime", "transferTime", "waitTimes", "totalTime", - "nIterations" + "nIterations", + "group" }; public PathResult(AnalysisWorkerTask task, TransitLayer transitLayer) { @@ -140,7 +142,10 @@ public ArrayList[] summarizeIterations(Stat stat) { score = thisScore; } } - String[] row = ArrayUtils.addAll(path, transfer, waits, totalTime, String.valueOf(nIterations)); + String group = ""; // Reserved for future use + String[] row = ArrayUtils.addAll( + path, transfer, waits, totalTime, String.valueOf(nIterations), group + ); checkState(row.length == DATA_COLUMNS.length); summary[d].add(row); } diff --git a/src/main/java/com/conveyal/r5/transit/TransitLayer.java b/src/main/java/com/conveyal/r5/transit/TransitLayer.java index 7ec85e6fa..f978d5a5e 100644 --- a/src/main/java/com/conveyal/r5/transit/TransitLayer.java +++ b/src/main/java/com/conveyal/r5/transit/TransitLayer.java @@ -873,4 +873,11 @@ public String stopString(int stopIndex, EntityRepresentation nameOrId) { default -> stopId; }; } + + /** + * For a supplied stopIndex in the transit layer, return the feed id (which we prepend to the GTFS stop id). + */ + public String feedFromStop(int stopIndex) { + return stopIdForIndex.get(stopIndex) == null ? "[new]" : stopIdForIndex.get(stopIndex).split(":")[0]; + } } diff --git a/src/main/java/com/conveyal/r5/transit/path/RouteSequence.java b/src/main/java/com/conveyal/r5/transit/path/RouteSequence.java index 1e60478f3..62e6cfd34 100644 --- a/src/main/java/com/conveyal/r5/transit/path/RouteSequence.java +++ b/src/main/java/com/conveyal/r5/transit/path/RouteSequence.java @@ -31,25 +31,39 @@ public RouteSequence(PatternSequence patternSequence, TransitLayer transitLayer) } } - /** Returns details summarizing this route sequence, using GTFS ids stored in the supplied transitLayer. */ + /** + * Returns details summarizing this route sequence, using GTFS ids stored in the supplied transitLayer. + * @param csvOptions indicates whether names or IDs should be returned for certain fields. + * @return array of pipe-concatenated strings, with the route, board stop, alight stop, ride time, and feed for + * each transit leg, as well as the access and egress time. + * + * If csvOptions.feedRepresentation is not null, the feed values will be R5-generated UUID for boarding stop of + * each leg. We are grabbing the feed ID from the stop rather than the route (which might seem like a better + * representative of the leg) because stops happen to have a readily available feed ID. + */ public String[] detailsWithGtfsIds (TransitLayer transitLayer, CsvResultOptions csvOptions){ - StringJoiner routeIds = new StringJoiner("|"); - StringJoiner boardStopIds = new StringJoiner("|"); - StringJoiner alightStopIds = new StringJoiner("|"); - StringJoiner rideTimes = new StringJoiner("|"); + StringJoiner routeJoiner = new StringJoiner("|"); + StringJoiner boardStopJoiner = new StringJoiner("|"); + StringJoiner alightStopJoiner = new StringJoiner("|"); + StringJoiner feedJoiner = new StringJoiner("|"); + StringJoiner rideTimeJoiner = new StringJoiner("|"); for (int i = 0; i < routes.size(); i++) { - routeIds.add(transitLayer.routeString(routes.get(i), csvOptions.routeRepresentation)); - boardStopIds.add(transitLayer.stopString(stopSequence.boardStops.get(i), csvOptions.stopRepresentation)); - alightStopIds.add(transitLayer.stopString(stopSequence.alightStops.get(i), csvOptions.stopRepresentation)); - rideTimes.add(String.format("%.1f", stopSequence.rideTimesSeconds.get(i) / 60f)); + routeJoiner.add(transitLayer.routeString(routes.get(i), csvOptions.routeRepresentation)); + boardStopJoiner.add(transitLayer.stopString(stopSequence.boardStops.get(i), csvOptions.stopRepresentation)); + alightStopJoiner.add(transitLayer.stopString(stopSequence.alightStops.get(i), csvOptions.stopRepresentation)); + if (csvOptions.feedRepresentation != null) { + feedJoiner.add(transitLayer.feedFromStop(stopSequence.boardStops.get(i))); + } + rideTimeJoiner.add(String.format("%.1f", stopSequence.rideTimesSeconds.get(i) / 60f)); } String accessTime = stopSequence.access == null ? null : String.format("%.1f", stopSequence.access.time / 60f); String egressTime = stopSequence.egress == null ? null : String.format("%.1f", stopSequence.egress.time / 60f); return new String[]{ - routeIds.toString(), - boardStopIds.toString(), - alightStopIds.toString(), - rideTimes.toString(), + routeJoiner.toString(), + boardStopJoiner.toString(), + alightStopJoiner.toString(), + rideTimeJoiner.toString(), + feedJoiner.toString(), accessTime, egressTime }; From b8f359ab042f855c8b326a836f4071a3d4fa4e02 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Thu, 21 Mar 2024 17:48:48 +0800 Subject: [PATCH 15/16] convert and validate FileStorageFormat immediately Eliminates poorly named intermediate string variable and need for explanation in outdated comments. --- .../RegionalAnalysisController.java | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java index 0b1af4d35..896b53801 100644 --- a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java +++ b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java @@ -341,8 +341,13 @@ private UrlWithHumanName getRegionalResults (Request req, Response res) throws I // Get some path parameters out of the URL. // The UUID of the regional analysis for which we want the output data final String regionalAnalysisId = req.params("_id"); - // The response file format: PNG, TIFF, or GRID - final String fileFormatExtension = req.params("format"); // FIXME this may be case sensitive (could make multiple files for different requests) + // FIXME It is possible that regional analysis is complete, but UI is trying to fetch gridded results when there + // aren't any (only CSV, because origins are freeform). + // How should we determine whether this analysis is expected to have no gridded results and cleanly return a 404? + FileStorageFormat format = FileStorageFormat.valueOf(req.params("format").toUpperCase()); + if (!FileStorageFormat.GRID.equals(format) && !FileStorageFormat.PNG.equals(format) && !FileStorageFormat.GEOTIFF.equals(format)) { + throw AnalysisServerException.badRequest("Format \"" + format + "\" is invalid. Request format must be \"grid\", \"png\", or \"geotiff\"."); + } RegionalAnalysis analysis = Persistence.regionalAnalyses.findPermitted( QueryBuilder.start("_id").is(req.params("_id")).get(), @@ -407,13 +412,6 @@ private UrlWithHumanName getRegionalResults (Request req, Response res) throws I if (broker.findJob(regionalAnalysisId) != null) { throw AnalysisServerException.notFound("Analysis is incomplete, no results file is available."); } - // FIXME It is possible that regional analysis is complete, but UI is trying to fetch gridded results when there - // aren't any (only CSV, because origins are freeform). - // How should we determine whether this analysis is expected to have no gridded results and cleanly return a 404? - FileStorageFormat format = FileStorageFormat.valueOf(fileFormatExtension.toUpperCase()); - if (!FileStorageFormat.GRID.equals(format) && !FileStorageFormat.PNG.equals(format) && !FileStorageFormat.GEOTIFF.equals(format)) { - throw AnalysisServerException.badRequest("Format \"" + format + "\" is invalid. Request format must be \"grid\", \"png\", or \"tiff\"."); - } // Significant overhead here: UI contacts backend, backend calls S3, backend responds to UI, UI contacts S3. FileStorageKey singleCutoffFileStorageKey = getSingleCutoffGrid( analysis, cutoffIndex, destinationPointSetId, percentile, format @@ -674,8 +672,6 @@ public void registerEndpoints (spark.Service sparkService) { sparkService.get("/:regionId/regional/running", this::getRunningAnalyses, toJson); }); sparkService.path("/api/regional", () -> { - // For grids, no transformer is supplied: render raw bytes or input stream rather than transforming to JSON. - // Wait, do we have any endpoints that write grids into responses? It looks like they all return URLs now. sparkService.get("/:_id", this::getRegionalAnalysis); sparkService.get("/:_id/all", this::getAllRegionalResults, toJson); sparkService.get("/:_id/grid/:format", this::getRegionalResults, toJson); From b4933c609e9df751e1d11232bc50a02abf4f5b2a Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Thu, 21 Mar 2024 22:41:20 +0800 Subject: [PATCH 16/16] reusable logic to derive human readable names --- .../RegionalAnalysisController.java | 189 +++++++++--------- .../java/com/conveyal/file/FileStorage.java | 6 + .../com/conveyal/file/UrlWithHumanName.java | 13 +- 3 files changed, 105 insertions(+), 103 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java index 896b53801..a60eccd2b 100644 --- a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java +++ b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java @@ -6,9 +6,9 @@ import com.conveyal.analysis.components.broker.Broker; import com.conveyal.analysis.components.broker.JobStatus; import com.conveyal.analysis.models.AnalysisRequest; +import com.conveyal.analysis.models.Model; import com.conveyal.analysis.models.OpportunityDataset; import com.conveyal.analysis.models.RegionalAnalysis; -import com.conveyal.file.UrlWithHumanName; import com.conveyal.analysis.persistence.Persistence; import com.conveyal.analysis.results.CsvResultType; import com.conveyal.analysis.util.JsonUtil; @@ -16,6 +16,7 @@ import com.conveyal.file.FileStorageFormat; import com.conveyal.file.FileStorageKey; import com.conveyal.file.FileUtils; +import com.conveyal.file.UrlWithHumanName; import com.conveyal.r5.analyst.FreeFormPointSet; import com.conveyal.r5.analyst.Grid; import com.conveyal.r5.analyst.PointSet; @@ -44,7 +45,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -168,20 +168,33 @@ private int getIntQueryParameter (Request req, String parameterName, int default } } + /** + * Associate a storage key with a human-readable name. + * Currently, this record type is only used within the RegionalAnalysisController class. + */ + private record HumanKey(FileStorageKey storageKey, String humanName) { }; + /** * Get a regional analysis results raster for a single (percentile, cutoff, destination) combination, in one of * several image file formats. This method was factored out for use from two different API endpoints, one for * fetching a single grid, and another for fetching grids for all combinations of parameters at once. + * It returns the unique FileStorageKey for those results, associated with a non-unique human-readable name. */ - private FileStorageKey getSingleCutoffGrid ( + private HumanKey getSingleCutoffGrid ( RegionalAnalysis analysis, - int cutoffIndex, - String destinationPointSetId, + OpportunityDataset destinations, + int cutoffMinutes, int percentile, FileStorageFormat fileFormat ) throws IOException { - String regionalAnalysisId = analysis._id; - int cutoffMinutes = analysis.cutoffsMinutes != null ? analysis.cutoffsMinutes[cutoffIndex] : analysis.cutoffMinutes; + final String regionalAnalysisId = analysis._id; + final String destinationPointSetId = destinations._id; + // Selecting the zeroth cutoff still makes sense for older analyses that don't allow an array of N cutoffs. + int cutoffIndex = 0; + if (analysis.cutoffsMinutes != null) { + cutoffIndex = new TIntArrayList(analysis.cutoffsMinutes).indexOf(cutoffMinutes); + checkState(cutoffIndex >= 0); + } LOG.info( "Returning {} minute accessibility to pointset {} (percentile {}) for regional analysis {} in format {}.", cutoffMinutes, destinationPointSetId, percentile, regionalAnalysisId, fileFormat @@ -244,69 +257,40 @@ private FileStorageKey getSingleCutoffGrid ( fileStorage.moveIntoStorage(singleCutoffFileStorageKey, localFile); } - return singleCutoffFileStorageKey; + String analysisHumanName = humanNameForEntity(analysis); + String destinationHumanName = humanNameForEntity(destinations); + String resultHumanFilename = filenameCleanString( + String.format("%s_%s_P%d_C%d", analysisHumanName, destinationHumanName, percentile, cutoffMinutes) + ) + "." + fileFormat.extension.toLowerCase(Locale.ROOT); + // Note that the returned human filename already contains the appropriate extension. + return new HumanKey(singleCutoffFileStorageKey, resultHumanFilename); } private Object getAllRegionalResults (Request req, Response res) throws IOException { - // Get the UUID of the regional analysis for which we want the output data. final String regionalAnalysisId = req.params("_id"); final UserPermissions userPermissions = UserPermissions.from(req); - RegionalAnalysis analysis = Persistence.regionalAnalyses.findPermitted( - QueryBuilder.start("_id").is(req.params("_id")).get(), - DBProjection.exclude("request.scenario.modifications"), - userPermissions - ).iterator().next(); - if (analysis == null || analysis.deleted) { - throw AnalysisServerException.notFound("The specified regional analysis is unknown or has been deleted."); + final RegionalAnalysis analysis = getAnalysis(regionalAnalysisId, userPermissions); + if (analysis.cutoffsMinutes == null || analysis.travelTimePercentiles == null || analysis.destinationPointSetIds == null) { + throw AnalysisServerException.badRequest("Batch result download is not available for legacy regional results."); + } + if (analysis.request.originPointSetKey != null) { + throw AnalysisServerException.badRequest("Batch result download only available for gridded origins."); } FileStorageKey zippedResultsKey = new FileStorageKey(RESULTS, analysis._id + "_ALL.zip"); - // Keep only alphanumeric characters and underscores from user-specified analysis name. - String friendlyAnalysisName = filenameCleanString(analysis.name); if (!fileStorage.exists(zippedResultsKey)) { - List fileStorageKeys = new ArrayList<>(); - if (analysis.cutoffsMinutes == null || analysis.travelTimePercentiles == null || analysis.destinationPointSetIds == null) { - throw AnalysisServerException.badRequest("Batch result download is not available for legacy regional results."); - } - // Map from cryptic UUIDs to human-readable strings that can replace them. - Map friendlyReplacements = new HashMap<>(); - { - // Replace the regional analysis ID with its name. - friendlyReplacements.put(analysis._id, friendlyAnalysisName); - // Replace each destination point set ID with its name. - int d = 0; - for (String destinationPointSetId : analysis.destinationPointSetIds) { - OpportunityDataset opportunityDataset = Persistence.opportunityDatasets.findByIdIfPermitted( - destinationPointSetId, - userPermissions - ); - checkNotNull(opportunityDataset, "Opportunity dataset could not be found in database."); - // Avoid name collisions, prepend an integer. - String friendlyDestinationName = "D%d_%s".formatted(d++, filenameCleanString(opportunityDataset.name)); - friendlyReplacements.put(destinationPointSetId, friendlyDestinationName); - } - } - // Iterate over all combinations and generate one geotiff grid output for each one. - // TODO check whether origins are freeform: if (analysis.request.originPointSetKey == null) ? + // Iterate over all dest, cutoff, percentile combinations and generate one geotiff grid output for each one. + List humanKeys = new ArrayList<>(); for (String destinationPointSetId : analysis.destinationPointSetIds) { - for (int cutoffIndex = 0; cutoffIndex < analysis.cutoffsMinutes.length; cutoffIndex++) { + OpportunityDataset destinations = getDestinations(destinationPointSetId, userPermissions); + for (int cutoffMinutes : analysis.cutoffsMinutes) { for (int percentile : analysis.travelTimePercentiles) { - FileStorageKey fileStorageKey = getSingleCutoffGrid( - analysis, - cutoffIndex, - destinationPointSetId, - percentile, - FileStorageFormat.GEOTIFF + HumanKey gridKey = getSingleCutoffGrid( + analysis, destinations, cutoffMinutes, percentile, FileStorageFormat.GEOTIFF ); - fileStorageKeys.add(fileStorageKey); + humanKeys.add(gridKey); } } } - // Also include any CSV files that were generated. TODO these are gzipped, decompress and re-compress. - // for (String csvStorageName : analysis.resultStorage.values()) { - // LOG.info("Including {} in results zip file.", csvStorageName); - // FileStorageKey csvKey = new FileStorageKey(RESULTS, csvStorageName); - // fileStorageKeys.add(csvKey); - // } File tempZipFile = File.createTempFile("regional", ".zip"); // Zipfs can't open existing empty files, the file has to not exist. FIXME: Non-dangerous race condition // Examining ZipFileSystemProvider reveals a "useTempFile" env parameter, but this is for the individual entries. @@ -315,49 +299,68 @@ private Object getAllRegionalResults (Request req, Response res) throws IOExcept Map env = Map.of("create", "true"); URI uri = URI.create("jar:file:" + tempZipFile.getAbsolutePath()); try (FileSystem zipFilesystem = FileSystems.newFileSystem(uri, env)) { - for (FileStorageKey fileStorageKey : fileStorageKeys) { - Path storagePath = fileStorage.getFile(fileStorageKey).toPath(); - String nameInZip = storagePath.getFileName().toString(); - // This is so inefficient but it should work. - for (String replacementKey : friendlyReplacements.keySet()) { - nameInZip = nameInZip.replace(replacementKey, friendlyReplacements.get(replacementKey)); - } - Path zipPath = zipFilesystem.getPath(nameInZip); + for (HumanKey key : humanKeys) { + Path storagePath = fileStorage.getFile(key.storageKey).toPath(); + Path zipPath = zipFilesystem.getPath(key.humanName); Files.copy(storagePath, zipPath, StandardCopyOption.REPLACE_EXISTING); } } fileStorage.moveIntoStorage(zippedResultsKey, tempZipFile); } -// res.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"%s\"".formatted(humanReadableZipName)); res.type(APPLICATION_JSON.asString()); - return fileStorage.getJsonUrl(zippedResultsKey, analysis.name, "zip"); + String analysisHumanName = humanNameForEntity(analysis); + return fileStorage.getJsonUrl(zippedResultsKey, analysisHumanName, "zip"); } /** - * This used to extract a particular percentile of a regional analysis as a grid file. + * Given an Entity, make a human-readable name for the entity composed of its user-supplied name as well as + * the most rapidly changing digits of its ID to disambiguate in case multiple entities have the same name. + * It is also possible to find the exact entity in many web UI fields using this suffix of its ID. */ - private UrlWithHumanName getRegionalResults (Request req, Response res) throws IOException { + private static String humanNameForEntity (Model entity) { + // Most or all IDs encountered are MongoDB ObjectIDs. The first four and middle five bytes are slow-changing + // and would not disambiguate between data sets. Only the 3-byte counter at the end will be sure to change. + // See https://www.mongodb.com/docs/manual/reference/method/ObjectId/ + final String id = entity._id; + checkArgument(id.length() > 6, "ID had too few characters."); + String shortId = id.substring(id.length() - 6, id.length()); + String humanName = "%s_%s".formatted(filenameCleanString(entity.name), shortId); + return humanName; + } - // Get some path parameters out of the URL. - // The UUID of the regional analysis for which we want the output data - final String regionalAnalysisId = req.params("_id"); - // FIXME It is possible that regional analysis is complete, but UI is trying to fetch gridded results when there - // aren't any (only CSV, because origins are freeform). - // How should we determine whether this analysis is expected to have no gridded results and cleanly return a 404? - FileStorageFormat format = FileStorageFormat.valueOf(req.params("format").toUpperCase()); - if (!FileStorageFormat.GRID.equals(format) && !FileStorageFormat.PNG.equals(format) && !FileStorageFormat.GEOTIFF.equals(format)) { - throw AnalysisServerException.badRequest("Format \"" + format + "\" is invalid. Request format must be \"grid\", \"png\", or \"geotiff\"."); - } + /** Fetch destination OpportunityDataset from database, followed by a check that it was present. */ + private static OpportunityDataset getDestinations (String destinationPointSetId, UserPermissions userPermissions) { + OpportunityDataset opportunityDataset = + Persistence.opportunityDatasets.findByIdIfPermitted(destinationPointSetId, userPermissions); + checkNotNull(opportunityDataset, "Opportunity dataset could not be found in database."); + return opportunityDataset; + } + /** Fetch RegionalAnalysis from database by ID, followed by a check that it was present and not deleted. */ + private static RegionalAnalysis getAnalysis (String analysisId, UserPermissions userPermissions) { RegionalAnalysis analysis = Persistence.regionalAnalyses.findPermitted( - QueryBuilder.start("_id").is(req.params("_id")).get(), + QueryBuilder.start("_id").is(analysisId).get(), DBProjection.exclude("request.scenario.modifications"), - UserPermissions.from(req) + userPermissions ).iterator().next(); - if (analysis == null || analysis.deleted) { throw AnalysisServerException.notFound("The specified regional analysis is unknown or has been deleted."); } + return analysis; + } + + /** Extract a particular percentile and cutoff of a regional analysis in one of several different raster formats. */ + private UrlWithHumanName getRegionalResults (Request req, Response res) throws IOException { + // It is possible that regional analysis is complete, but UI is trying to fetch gridded results when there + // aren't any (only CSV, because origins are freeform). How should we determine whether this analysis is + // expected to have no gridded results and cleanly return a 404? + final String regionalAnalysisId = req.params("_id"); + FileStorageFormat format = FileStorageFormat.valueOf(req.params("format").toUpperCase()); + if (!FileStorageFormat.GRID.equals(format) && !FileStorageFormat.PNG.equals(format) && !FileStorageFormat.GEOTIFF.equals(format)) { + throw AnalysisServerException.badRequest("Format \"" + format + "\" is invalid. Request format must be \"grid\", \"png\", or \"geotiff\"."); + } + final UserPermissions userPermissions = UserPermissions.from(req); + RegionalAnalysis analysis = getAnalysis(regionalAnalysisId, userPermissions); // Which channel to extract from results with multiple values per origin (for different travel time cutoffs) // and multiple output files per analysis (for different percentiles of travel time and/or different @@ -366,7 +369,7 @@ private UrlWithHumanName getRegionalResults (Request req, Response res) throws I // For newer analyses that have multiple cutoffs, percentiles, or destination pointsets, these initial values // are coming from deprecated fields, are not meaningful and will be overwritten below from query parameters. int percentile = analysis.travelTimePercentile; - int cutoffIndex = 0; // For old single-cutoff outputs, we can still select the zeroth grid. + int cutoffMinutes = analysis.cutoffMinutes; String destinationPointSetId = analysis.grid; // Handle newer regional analyses with multiple cutoffs in an array. @@ -375,9 +378,8 @@ private UrlWithHumanName getRegionalResults (Request req, Response res) throws I if (analysis.cutoffsMinutes != null) { int nCutoffs = analysis.cutoffsMinutes.length; checkState(nCutoffs > 0, "Regional analysis has no cutoffs."); - int cutoffMinutes = getIntQueryParameter(req, "cutoff", analysis.cutoffsMinutes[nCutoffs / 2]); - cutoffIndex = new TIntArrayList(analysis.cutoffsMinutes).indexOf(cutoffMinutes); - checkState(cutoffIndex >= 0, + cutoffMinutes = getIntQueryParameter(req, "cutoff", analysis.cutoffsMinutes[nCutoffs / 2]); + checkArgument(new TIntArrayList(analysis.cutoffsMinutes).contains(cutoffMinutes), "Travel time cutoff for this regional analysis must be taken from this list: (%s)", Ints.join(", ", analysis.cutoffsMinutes) ); @@ -413,21 +415,10 @@ private UrlWithHumanName getRegionalResults (Request req, Response res) throws I throw AnalysisServerException.notFound("Analysis is incomplete, no results file is available."); } // Significant overhead here: UI contacts backend, backend calls S3, backend responds to UI, UI contacts S3. - FileStorageKey singleCutoffFileStorageKey = getSingleCutoffGrid( - analysis, cutoffIndex, destinationPointSetId, percentile, format - ); - // Create alternative human-readable filename. Similar to internal storage key name, but using a shortened - // ID for the destinations and the user-specified name for the project. This project name may contain invalid - // characters, but will be sanitized by getJsonUrl. We use the end of the dataset ID rather than the beginning - // because it's more likely to be different when several datasets are created in rapid succession. - String shortDestinationId = destinationPointSetId.substring( - destinationPointSetId.length() - 5, destinationPointSetId.length() - ); - int cutoffMinutes = - analysis.cutoffsMinutes != null ? analysis.cutoffsMinutes[cutoffIndex] : analysis.cutoffMinutes; - String rawHumanName = String.format("%s_%s_P%d_C%d", analysis.name, shortDestinationId, percentile, cutoffMinutes); + OpportunityDataset destinations = getDestinations(destinationPointSetId, userPermissions); + HumanKey gridKey = getSingleCutoffGrid(analysis, destinations, cutoffMinutes, percentile, format); res.type(APPLICATION_JSON.asString()); - return fileStorage.getJsonUrl(singleCutoffFileStorageKey, rawHumanName, format.extension.toLowerCase(Locale.ROOT)); + return fileStorage.getJsonUrl(gridKey.storageKey, gridKey.humanName); } private Object getCsvResults (Request req, Response res) { diff --git a/src/main/java/com/conveyal/file/FileStorage.java b/src/main/java/com/conveyal/file/FileStorage.java index 8c5ef0122..f9bd687cc 100644 --- a/src/main/java/com/conveyal/file/FileStorage.java +++ b/src/main/java/com/conveyal/file/FileStorage.java @@ -99,4 +99,10 @@ default UrlWithHumanName getJsonUrl (FileStorageKey key, String rawHumanName, St return UrlWithHumanName.fromCleanedName(url, rawHumanName, humanExtension); } + /** This assumes the humanFileName is already a complete filename (cleaned and truncated with any extension). */ + default UrlWithHumanName getJsonUrl (FileStorageKey key, String humanFileName) { + String url = this.getURL(key); + return new UrlWithHumanName(url, humanFileName); + } + } diff --git a/src/main/java/com/conveyal/file/UrlWithHumanName.java b/src/main/java/com/conveyal/file/UrlWithHumanName.java index 8cf447c75..f7a49c933 100644 --- a/src/main/java/com/conveyal/file/UrlWithHumanName.java +++ b/src/main/java/com/conveyal/file/UrlWithHumanName.java @@ -19,14 +19,19 @@ public UrlWithHumanName (String url, String humanName) { private static final int TRUNCATE_FILENAME_CHARS = 220; /** - * Given an arbitrary string, make it safe for use as a friendly human-readable filename. - * This can yield non-unique strings and is intended for files downloaded by end users that do not need to be - * machine-discoverable by unique IDs. + * Given an arbitrary string, make it safe for use in a friendly human-readable filename. This can yield non-unique + * strings and is intended for files downloaded by end users that do not need to be machine-discoverable by unique + * IDs. A length of up to 255 characters will work with most filesystems and within ZIP files. In all names we + * generate, the end of the name more uniquely identifies it (contains a fragment of a hex object ID or contains + * the distinguishing factors such as cutoff and percentile for files within a ZIP archive). Therefore, we truncate + * to a suffix rather than a prefix when the name is too long. We keep the length somewhat under 255 in case some + * other short suffix needs to be appended before use as a filename. + * Note that this will strip dot characters out of the string, so any dot and extension must be suffixed later. */ public static String filenameCleanString (String original) { String ret = original.replaceAll("\\W+", "_"); if (ret.length() > TRUNCATE_FILENAME_CHARS) { - ret = ret.substring(0, TRUNCATE_FILENAME_CHARS); + ret = ret.substring(ret.length() - TRUNCATE_FILENAME_CHARS, ret.length()); } return ret; }