diff --git a/.travis.yml b/.travis.yml index 88754db05..4cfbbd451 100644 --- a/.travis.yml +++ b/.travis.yml @@ -43,6 +43,12 @@ after_success: # Upload the shaded jar. It's named with git describe, so begins with the last tag name, so begins with the letter v. # On the other hand, the main un-shaded artifact will begin with the project name 'r5'. - aws s3 cp --recursive --exclude "*" --include "v*.jar" /home/travis/build/conveyal/r5/target s3://r5-builds + # copy Javadoc to S3 + # The syntax below sets JAVADOC_TARGET to TRAVIS_TAG if defined otherwise TRAVIS_BRANCH + # If TRAVIS_TAG is defined TRAVIS_BRANCH will be undefined pr travis docs, but travis will run two + # builds: one for the tag and one for the commit to the branch. + - JAVADOC_TARGET=${TRAVIS_TAG:-$TRAVIS_BRANCH} + - if [[ "$TRAVIS_PULL_REQUEST" = false ]]; then mvn javadoc:javadoc; aws s3 cp --cache-control max-age=300 --recursive /home/travis/build/conveyal/r5/target/site/apidocs s3://javadoc.conveyal.com/r5/${TRAVIS_BRANCH}/; fi sudo: true # sudo: true gets us more memory, which we need, at the expense of slower builds @@ -55,4 +61,3 @@ cache: notifications: slack: conveyal:vqrkN7708qU1OTL9XkbMqQFV - diff --git a/LICENSE b/LICENSE index 65cebebfb..3cb70de14 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2014-2016 Conveyal +Copyright (c) 2014-2017 Conveyal Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 878fda962..5f448190e 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ on this particular point [here](http://conveyal.com/blog/2015/05/04/variation-in Please follow the Conveyal Java style guide at https://github.com/conveyal/JavaStyle/ +Javadoc for the project is built automatically after every change and published at http://javadoc.conveyal.com/r5/master/ + ## History R5 grew out of several open-source projects. The focus on analytic applications, and the core development team behind R5, diff --git a/pom.xml b/pom.xml index 0375c9880..77cdb9aff 100644 --- a/pom.xml +++ b/pom.xml @@ -8,7 +8,7 @@ com.conveyal r5 - 2.2.0-SNAPSHOT + 2.5.0-SNAPSHOT jar @@ -72,6 +72,7 @@ true **/*.properties + worker.sh debug-plan/** fares/** diff --git a/src/main/java/com/conveyal/r5/R5Main.java b/src/main/java/com/conveyal/r5/R5Main.java index 0b6af2608..e86454159 100644 --- a/src/main/java/com/conveyal/r5/R5Main.java +++ b/src/main/java/com/conveyal/r5/R5Main.java @@ -17,6 +17,13 @@ */ public class R5Main { public static void main (String... args) throws Exception { + System.out.println("__________________\n" + + "___ __ \\__ ____/\n" + + "__ /_/ /_____ \\ \n" + + "_ _, _/ ____/ / \n" + + "/_/ |_| /_____/ \n" + + " "); + // Pull argument 0 off as the sub-command, // then pass the remaining args (1..n) on to that subcommand. String command = args[0]; diff --git a/src/main/java/com/conveyal/r5/analyst/BootstrapPercentileHypothesisTestGridStatisticComputer.java b/src/main/java/com/conveyal/r5/analyst/BootstrapPercentileHypothesisTestGridStatisticComputer.java new file mode 100644 index 000000000..e5b022241 --- /dev/null +++ b/src/main/java/com/conveyal/r5/analyst/BootstrapPercentileHypothesisTestGridStatisticComputer.java @@ -0,0 +1,85 @@ +package com.conveyal.r5.analyst; + +/** + * Compute p-values that the two regional analysis results differ due to systematic variation (change in transit network, + * percentile of interest, land use, time window, etc.) rather than due to random variation from the Monte Carlo search + * process. This uses the confidence interval method of bootstrap hypothesis testing described on page 214 of Efron and + * Tibshirani (1993). We perform a two-tailed test, so we are not making an assumptions about whether the change was positive + * or negative; a significant value indicates that there was a change, and that the sign is correct. + * + * This technique works by constructing a confidence interval that has one end at 0 and the other end where it may lie to + * make the confidence interval symmetric (i.e. the same amount of density in both tails). We use the percentile method + * to calculate this confidence interval; while we are aware there are issues with this, the better bootstrap confidence + * interval methods require us to store additional data so we can compute a jackknife estimate of acceleration (Efron + * and Tibshirani 1993, ch. 14). + * + * While it might initially seem that we could use the permutation test described earlier in that chapter, we in fact cannot. + * It attractively promises to compare two distributions, which is exactly what we have, but it promises to compare some + * statistic of those distributions. We're not interested in the difference in means of the sampling distributions, we're + * interested in the the difference of the full sampling distribution. To use the permutation test, we would have to apply + * it one level up the stack, when computing the regional results, and compute two regional runs simultaneously so we could + * permute them when we still had the travel times for all iterations in memory. + * + * References + * Efron, B., & Tibshirani, R. J. (1993). An Introduction to the Bootstrap. Boca Raton, FL: Chapman and Hall/CRC. + */ +public class BootstrapPercentileHypothesisTestGridStatisticComputer extends DualGridStatisticComputer { + @Override + protected double computeValuesForOrigin(int x, int y, int[] aValues, int[] bValues) { + // compute the value + int nBelowZero = 0; + int nZero = 0; + int nAboveZero = 0; + int nTotal = 0; + + // get the point estimate of the difference + int pointEstimate = bValues[0] - aValues[0]; + if (pointEstimate == 0) return 0; // no difference, not statistically significant + + // subtract every value in b from every value in a + // this creates a bootstrapped sampling distribution of the differences, since each bootstrap sample in each analysis + // is independent of all others (we've taken a lot of care to ensure this is the case). + for (int aIdx = 1; aIdx < aValues.length; aIdx++) { + // TODO a and b values are used more than once. This doesn't create bootstrap dependence, correct? + int aVal = aValues[aIdx]; + for (int bIdx = 1; bIdx < bValues.length; bIdx++, nTotal++) { + int bVal = bValues[bIdx]; + int difference = bVal - aVal; + + if (difference > 0) nAboveZero++; + else if (difference < 0) nBelowZero++; + else nZero++; + } + } + + double pVal; + + // if the point estimate was less than zero, assume the confidence interval is less than zero + // This could be wrong if the accessibility does not lie on the same side of zero as the majority of the density. + // TODO Efron and Tibshirani don't really discuss how to handle that case, and in particular don't discuss two- + // tailed tests at all. + if (pointEstimate < 0) { + // compute the density that lies at or above zero. We have changed the technique slightly from what is + // described in Efron and Tibshirani 1993 to explicitly handle values that are exactly 0 (important because + // we are working with discretized, integer data). + double densityAtOrAboveZero = (double) (nZero + nAboveZero) / nTotal; + // two tailed test, take this much density off the other end as well, and that's the p-value for this test. + pVal = 2 * densityAtOrAboveZero; + } else { + // compute the density that lies at or below zero + double densityAtOrBelowZero = (double) (nBelowZero + nZero) / nTotal; + // two tailed test + pVal = 2 * densityAtOrBelowZero; + } + + // fix up case where point est does not lie on same side of 0 as majority of density. + if (pVal > 1) pVal = 1; + + // scale probability to 0 - 100,000 + // note that the return value can actually be less than 0 if a majority of the density lies on the + // opposite side of zero from the point estimate. Maybe we shouldn't be looking at the point estimate + // NB for historical reasons, the probability grids show the probability of change, not the probability of no + // change (i.e. the p-value). Invert the values (i.e. replace p with alpha) + return (1 - pVal) * 1e5; + } +} diff --git a/src/main/java/com/conveyal/r5/analyst/DualGridStatisticComputer.java b/src/main/java/com/conveyal/r5/analyst/DualGridStatisticComputer.java new file mode 100644 index 000000000..c0ec86232 --- /dev/null +++ b/src/main/java/com/conveyal/r5/analyst/DualGridStatisticComputer.java @@ -0,0 +1,130 @@ +package com.conveyal.r5.analyst; + +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.model.S3Object; +import com.google.common.io.LittleEndianDataInputStream; + +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.GZIPInputStream; + +/** + * A DualGridStatisticComputer computes statistics based on two grids. + */ +public abstract class DualGridStatisticComputer { + private static final AmazonS3 s3 = new AmazonS3Client(); + /** Version of the access grid format we read */ + private static final int ACCESS_GRID_VERSION = 0; + + /** + * Calculate the probability at each origin that a random individual sample from regional analysis B is larger than one from regional + * analysis A. We do this empirically and exhaustively by for each origin looping over every possible combination of + * samples and taking a difference, then evaluating the number that yielded results greater than zero. + * + * The regional analysis access grids must be of identical size and zoom level, and a Grid object (the same as is used + * for destination grids) will be returned, with probabilities scaled from 0 to 100,000. + */ + public Grid computeImprovementProbability (String resultBucket, String regionalAnalysisAKey, String regionalAnalysisBKey) throws IOException { + S3Object aGrid = s3.getObject(resultBucket, regionalAnalysisAKey); + S3Object bGrid = s3.getObject(resultBucket, regionalAnalysisBKey); + return computeImprovementProbability(aGrid.getObjectContent(), bGrid.getObjectContent()); + } + + public Grid computeImprovementProbability(InputStream a, InputStream b) throws IOException { + LittleEndianDataInputStream aIn = new LittleEndianDataInputStream(new GZIPInputStream(a)); + LittleEndianDataInputStream bIn = new LittleEndianDataInputStream(new GZIPInputStream(b)); + + validateHeaderAndVersion(aIn); + validateHeaderAndVersion(bIn); + + int aZoom = aIn.readInt(); + int aWest = aIn.readInt(); + int aNorth = aIn.readInt(); + int aWidth = aIn.readInt(); + int aHeight = aIn.readInt(); + + int bZoom = bIn.readInt(); + int bWest = bIn.readInt(); + int bNorth = bIn.readInt(); + int bWidth = bIn.readInt(); + int bHeight = bIn.readInt(); + + if (aZoom != bZoom || + aWest != bWest || + aNorth != bNorth || + aWidth != bWidth || + aHeight != bHeight) { + throw new IllegalArgumentException("Grid sizes for comparison must be identical!"); + } + + // number of iterations need not be equal, the computed probability is still valid even if they are not + // as the probability of choosing any particular sample is still uniform within each scenario. + int aIterations = aIn.readInt(); + int bIterations = bIn.readInt(); + + Grid out = new Grid(aZoom, aWidth, aHeight, aNorth, aWest); + + // pixels are in row-major order, iterate over y on outside + for (int y = 0; y < aHeight; y++) { + for (int x = 0; x < aWidth; x++) { + int[] aValues = new int[aIterations]; + int[] bValues = new int[bIterations]; + + for (int iteration = 0, val = 0; iteration < aIterations; iteration++) { + aValues[iteration] = (val += aIn.readInt()); + } + + for (int iteration = 0, val = 0; iteration < bIterations; iteration++) { + bValues[iteration] = (val += bIn.readInt()); + } + + out.grid[x][y] = computeValuesForOrigin(x, y, aValues, bValues); + } + } + + return out; + } + + private static void validateHeaderAndVersion(LittleEndianDataInputStream input) throws IOException { + char[] header = new char[8]; + for (int i = 0; i < 8; i++) { + header[i] = (char) input.readByte(); + } + + if (!"ACCESSGR".equals(new String(header))) { + throw new IllegalArgumentException("Input not in access grid format!"); + } + + int version = input.readInt(); + + if (version != ACCESS_GRID_VERSION) { + throw new IllegalArgumentException(String.format("Version mismatch of access grids, expected %s, found %s", ACCESS_GRID_VERSION, version)); + } + } + + /** Given the origin coordinates and the values from the two grids, compute a value for the output grid */ + protected abstract double computeValuesForOrigin (int x, int y, int[] aValues, int[] bValues); + + public static void main (String... args) throws IOException { + DualGridStatisticComputer comp; + if ("--two-tailed".equals(args[0])) { + comp = new BootstrapPercentileHypothesisTestGridStatisticComputer(); + } else { + throw new RuntimeException("Unknown grid statistic computer " + args[0]); + } + + FileInputStream a = new FileInputStream(args[1]); + FileInputStream b = new FileInputStream(args[2]); + Grid grid = comp.computeImprovementProbability(a, b); + + if (args[3].endsWith(".grid")) + grid.write(new FileOutputStream(args[3])); + else if (args[3].endsWith(".png")) + grid.writePng(new FileOutputStream(args[3])); + else if (args[3].endsWith(".tif")) + grid.writeGeotiff(new FileOutputStream(args[3])); + } +} diff --git a/src/main/java/com/conveyal/r5/analyst/ExtractingGridStatisticComputer.java b/src/main/java/com/conveyal/r5/analyst/ExtractingGridStatisticComputer.java new file mode 100644 index 000000000..28a174470 --- /dev/null +++ b/src/main/java/com/conveyal/r5/analyst/ExtractingGridStatisticComputer.java @@ -0,0 +1,38 @@ +package com.conveyal.r5.analyst; + +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.model.S3Object; +import com.conveyal.r5.analyst.scenario.GridStatisticComputer; +import com.google.common.io.LittleEndianDataInputStream; + +import java.io.IOException; +import java.util.Arrays; +import java.util.zip.GZIPInputStream; + +/** + * Access grids are three-dimensional arrays, with the first two dimensions consisting of x and y coordinates of origins + * within the regional analysis, and the third dimension reflects multiple values of the indicator of interest. This could + * be instantaneous accessibility results for each Monte Carlo draw when computing average instantaneous accessibility (i.e. + * Owen-style accessibility), or it could be multiple bootstrap replications of the sampling distribution of accessibility + * given median travel time (see Conway, M. W., Byrd, A. and van Eggermond, M. "A Statistical Approach to Comparing + * Accessibility Results: Including Uncertainty in Public Transport Sketch Planning," paper presented at the 2017 World + * Symposium of Transport and Land Use Research, Brisbane, QLD, Australia, Jul 3-6.) + * + * An ExtractingGridStatisticComputer simply grabs the value at a particular index within each origin. + * When storing bootstrap replications of travel time, we also store the point estimate (using all Monte Carlo draws + * equally weighted) as the first value, so an ExtractingGridStatisticComputer(0) can be used to retrieve the point estimate. + */ +public class ExtractingGridStatisticComputer extends GridStatisticComputer { + public final int index; + + /** Initialize with the index to extract */ + public ExtractingGridStatisticComputer (int index) { + this.index = index; + } + + @Override + protected double computeValueForOrigin(int x, int y, int[] valuesThisOrigin) { + return valuesThisOrigin[index]; + } +} diff --git a/src/main/java/com/conveyal/r5/analyst/Grid.java b/src/main/java/com/conveyal/r5/analyst/Grid.java index 4b7c64873..7192aa205 100644 --- a/src/main/java/com/conveyal/r5/analyst/Grid.java +++ b/src/main/java/com/conveyal/r5/analyst/Grid.java @@ -122,12 +122,24 @@ public Grid (int zoom, int width, int height, int north, int west) { this.grid = new double[width][height]; } + /** + * Version of getPixelWeights which returns the weights as relative to the total area of the input geometry (i.e. + * the weight at a pixel is the proportion of the input geometry that falls within that pixel. + */ + public TObjectDoubleMap getPixelWeights (Geometry geometry) { + return getPixelWeights(geometry, false); + } + /** * Get the proportions of an input polygon feature that overlap each grid cell, in the format [x, y] => weight. * This weight object can then be fed into the incrementFromPixelWeights function to actually burn a polygon into the * grid. + * + * If relativeToPixels is true, the weights are the proportion of the pixel that is covered. Otherwise they are the + * portion of this polygon which is within the given grid cell. If using incrementPixelWeights, this should be set to + * false. */ - public TObjectDoubleMap getPixelWeights (Geometry geometry) { + public TObjectDoubleMap getPixelWeights (Geometry geometry, boolean relativeToPixels) { // No need to convert to a local coordinate system // Both the supplied polygon and the web mercator pixel geometries are left in WGS84 geographic coordinates. // Both are distorted equally along the X axis at a given latitude so the proportion of the geometry within @@ -151,7 +163,8 @@ public TObjectDoubleMap getPixelWeights (Geometry geometry) { Geometry pixel = getPixelGeometry(x + west, y + north, zoom); Geometry intersection = pixel.intersection(geometry); - double weight = intersection.getArea() / area; + double denominator = relativeToPixels ? pixel.getArea() : area; + double weight = intersection.getArea() / denominator; weights.put(new int[] { x, y }, weight); } } @@ -345,20 +358,34 @@ public void writeShapefile (String fileName, String fieldName) { /* functions below from http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#Mathematics */ + /** Return the pixel the given longitude falls within */ public static int lonToPixel (double lon, int zoom) { return (int) ((lon + 180) / 360 * Math.pow(2, zoom) * 256); } + /** return the west side of the given pixel (assuming an integer pixel; noninteger pixels will return the appropriate location within the pixel) */ public static double pixelToLon (double pixel, int zoom) { return pixel / (Math.pow(2, zoom) * 256) * 360 - 180; } + /** Return the longitude of the center of the given pixel */ + public static double pixelToCenterLon (int pixel, int zoom) { + return pixelToLon(pixel + 0.5, zoom); + } + + /** Return the pixel the given latitude falls within */ public static int latToPixel (double lat, int zoom) { double latRad = FastMath.toRadians(lat); return (int) ((1 - log(tan(latRad) + 1 / cos(latRad)) / Math.PI) * Math.pow(2, zoom - 1) * 256); } - // We're using FastMath here, because the built-in math functions were taking a laarge amount of time in profiling. + /** Return the latitude of the center of the given pixel */ + public static double pixelToCenterLat (int pixel, int zoom) { + return pixelToLat(pixel + 0.5, zoom); + } + + // We're using FastMath here, because the built-in math functions were taking a large amount of time in profiling. + /** return the north side of the given pixel (assuming an integer pixel; noninteger pixels will return the appropriate location within the pixel) */ public static double pixelToLat (double pixel, int zoom) { return FastMath.toDegrees(atan(sinh(Math.PI - (pixel / 256d) / Math.pow(2, zoom) * 2 * Math.PI))); } diff --git a/src/main/java/com/conveyal/r5/analyst/GridBounds.java b/src/main/java/com/conveyal/r5/analyst/GridBounds.java new file mode 100644 index 000000000..c3d15e202 --- /dev/null +++ b/src/main/java/com/conveyal/r5/analyst/GridBounds.java @@ -0,0 +1,8 @@ +package com.conveyal.r5.analyst; + +/** + * We have a lot of classes which have zoom, west, north, width, and height attributes. We would like to be able to pass + * them into functions without breaking out the values of these. + */ +public class GridBounds { +} diff --git a/src/main/java/com/conveyal/r5/analyst/GridComputer.java b/src/main/java/com/conveyal/r5/analyst/GridComputer.java index e23e1599a..2f8d039f8 100644 --- a/src/main/java/com/conveyal/r5/analyst/GridComputer.java +++ b/src/main/java/com/conveyal/r5/analyst/GridComputer.java @@ -6,40 +6,111 @@ import com.amazonaws.services.sqs.model.SendMessageRequest; import com.conveyal.r5.analyst.cluster.GridRequest; import com.conveyal.r5.analyst.cluster.Origin; -import com.conveyal.r5.analyst.cluster.TaskStatistics; import com.conveyal.r5.api.util.LegMode; +import com.conveyal.r5.point_to_point.builder.PointToPointQuery; import com.conveyal.r5.profile.FastRaptorWorker; -import com.conveyal.r5.profile.Propagater; +import com.conveyal.r5.profile.PerTargetPropagater; import com.conveyal.r5.profile.RaptorWorker; import com.conveyal.r5.profile.StreetMode; import com.conveyal.r5.streets.LinkedPointSet; import com.conveyal.r5.streets.StreetRouter; import com.conveyal.r5.transit.TransportNetwork; -import com.google.common.io.LittleEndianDataOutputStream; import gnu.trove.iterator.TIntIntIterator; +import gnu.trove.list.TIntList; +import gnu.trove.list.array.TIntArrayList; import gnu.trove.map.TIntIntMap; +import gnu.trove.map.hash.TIntIntHashMap; +import org.apache.commons.math3.random.MersenneTwister; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.util.Arrays; import java.util.Base64; -import java.util.stream.Stream; +import java.util.stream.DoubleStream; /** * Computes an accessibility indicator at a single cell in a Web Mercator grid, using destination densities from - * a Web Mercator density grid. Both grids must be at the same zoom level. The accessibility indicator is calculated - * separately for each iteration of the range RAPTOR algorithm (each departure minute and randomization of the schedules - * for the frequency-based routes) and all of these different values of the indicator are retained to allow - * probabilistic scenario comparison. This does freeze the travel time cutoff and destination grid in the interest of - * keeping the results down to an acceptable size. The results are placed on an Amazon SQS queue for collation by - * a GridResultConsumer and a GridResultAssembler. + * a Web Mercator density grid. Both grids must be at the same zoom level. This class computes accessibility given median + * travel time (described below and in Conway, Byrd and van Eggermond 2017). * - * These requests are enqueued by the frontend one for each origin in a regional analysis. + * The accessibility is calculated using + * median travel time to each destination (there are plans to change this to allow use of arbitrary percentiles in the future). In order + * to facilitate probabilistic comparison of scenarios, many accessibility values are returned, representing the + * sampling distribution of the accessibility. The first value is the value computed using all the Monte Carlo frequency + * draws with equal weight (i.e. it is our best point estimate). + * + * Subsequent values are produced by bootstrapping the iterations (combinations of a departure minute and a Monte Carlo draw) to simulate + * the sampling distribution. For each value returned, we select + * _with replacement_ a different set of Monte Carlo draws to use to approximate the effects of repeating the analysis + * with completely new Monte Carlo draws. This yields the same number of Monte Carlo draws, but some of the draws from + * the original analysis are not present, and some are duplicated. Sounds crazy but it works. + * + * First, some terminology: a bootstrap replication is the statistic of interest defined on one bootstrap sample, + * which is a collection of values sampled with replacement from the original Monte Carlo draws. + * For each of the bootstrap samples we compute, we define which Monte Carlo draws should be included. We want + * to choose the Monte Carlo draws included in a particular bootstrap sample here, rather than choosing different + * Monte Carlo draws at each destination. The most intuitive explanation for this is that this matches the + * way our accessibility indicator is defined: each Monte Carlo draw is used at every destination, rather than + * computing separate Monte Carlo draws for each destination. Sampling Monte Carlo draws independently at + * each destination causes insufficient variance in the resulting sampling + * distribution. Extreme values of travel time tend to be correlated across many destinations; a particular Monte + * Carlo draw is likely to affect the travel time to all destinations reached by it simultaneously; in the most + * extreme case, a change in the Monte Carlo draw on the line serving the origin could make the whole network + * reachable, or not, within the travel time threshold. + * + * This leads us to a more theoretical justification. One of the tenets of the bootstrap is that the + * bootstrap samples be independent (Efron and Tibshirani 1993, 46). In situations where the data are not + * independent and identically distributed, a number of techniques, e.g. the moving block bootstrap, have been developed + * (see Lahiri 2003) Most of these methods + * consist of changes to the bootstrap sampling technique to ensure the samples are independent, and the dependence + * structure of the data is wrapped up within a sample. While none of the off-the-shelf approaches for dependent + * data appear to be helpful for our use case, since we know (or can speculate) the correlation properties of our + * data, we can come up with a sampling technique. Since there is a dependence among destinations (many or all + * being affected by the same Monte Carlo draw), we use the same bootstrap samples (consisting of Monte Carlo draws) + * to compute the travel times to each destination. + * + * There is also dependence in the departure minutes; Monte Carlo draws from the same departure minute will have + * more similar travel times and therefore accessibility values than those from different departure minutes due to the deterministic and constant effect of the + * scheduled network. Adjacent departure minutes will additionally have correlated travel times and accessibility values + * because transit service is similar (you might have to wait one more minute to catch the same bus). + * There are also likely periodic effects at the transit service frequency (e.g. 15 minutes, + * 30 minutes, etc.). In testing in Atlanta we did not find ignoring this dependence to be an issue, Patching this up is + * fairly simple. Rather than drawing n Monte Carlo draws with replacement for + * each bootstrap sample, we draw n / m draws for each departure minute, where n is the number of Monte Carlo draws done in + * the original analysis, and m is the number of departure minute. That is, we ensure that the number of Monte Carlo draws + * using a particular departure minute is identical in each bootstrap sample and the original, non-bootstrapped point + * estimate. This also means that all the variation in the sampling distributions is due to the effect of frequency-based + * lines, rather than due to variation due to departure time (which is held constant). + * + * We initially did not think that the change to sample from within the departure minutes to be that significant (see + * http://rpubs.com/mattwigway/bootstrap-dependence). However, it in fact is. It produces narrower sampling distributions + * that result entirely from the Monte Carlo simulation of frequency lines, without mixing in systematic variation that + * comes from variation due to different departure times. This also should yield p-values that are more correct; because + * there is no systematic influence from scheduled lines, 5% of the values should in fact be found statistically significant + * when testing the same scenario (which suggests we should probably be using a higher significance threshold, since we are + * doing thousands of computations; see Wasserstein and Lazar 2016, "The ASA's statement on p-values," + * http://dx.doi.org/10.1080/00031305.2016.1154108, and Ioanndis 2005, "Why Most Published Research Findings are False", + * http://dx.doi.org/10.1371/journal.pmed.0020124. + * + * For more information on the bootstrap and its application to the computation of accessibility given median accessibility, + * see + * + * Efron, Bradley, and Robert J Tibshirani. An Introduction to the Bootstrap, Boca Raton, FL: Chapman and Hall/CRC, 1993. + * Lahiri, S. N. Resampling Methods for Dependent Data, New York: Springer, 2003. + * Conway, M. W., Byrd, A. and van Eggermond, M. "A Statistical Approach to Comparing Accessibility Results: Including + * Uncertainty in Public Transport Sketch Planning," paper presented at the 2017 World Symposium of Transport and Land + * Use Research, Brisbane, Queensland, Australia, Jul 3-6. + * + * The results are placed on an Amazon SQS queue for collation by a GridResultConsumer and a GridResultAssembler. */ public class GridComputer { private static final Logger LOG = LoggerFactory.getLogger(GridComputer.class); + /** The number of bootstrap replications used to bootstrap the sampling distribution of the percentiles */ + public static final int N_BOOTSTRAP_REPLICATIONS = 1000; + /** SQS client. TODO: async? */ private static final AmazonSQS sqs = new AmazonSQSClient(); @@ -65,82 +136,232 @@ public void run() throws IOException { // ensure they both have the same zoom level if (request.zoom != grid.zoom) throw new IllegalArgumentException("grid zooms do not match!"); - // NB this will skip all destinations that lie outside query bounds. This is intentional. - final WebMercatorGridPointSet targets = - pointSetCache.get(request.zoom, request.west, request.north, request.width, request.height); - // use the middle of the grid cell - request.request.fromLat = Grid.pixelToLat(request.north + request.y + 0.5, request.zoom); - request.request.fromLon = Grid.pixelToLon(request.west + request.x + 0.5, request.zoom); + request.request.fromLat = Grid.pixelToCenterLat(request.north + request.y, request.zoom); + request.request.fromLon = Grid.pixelToCenterLon(request.west + request.x, request.zoom); - // Run the raptor algorithm to get times at each destination for each iteration + // Use the extent of the opportunity density grid as the destinations; this avoids having to convert between + // coordinate systems, and avoids including thousands of extra points in the weeds where there happens to be a + // transit stop. It also means that all data will be included in the analysis even if the user is only analyzing + // a small part of the city. + WebMercatorGridPointSet destinations = pointSetCache.get(grid); + // TODO recast using egress mode - // first, find the access stops - StreetMode mode; - if (request.request.accessModes.contains(LegMode.CAR)) mode = StreetMode.CAR; - else if (request.request.accessModes.contains(LegMode.BICYCLE)) mode = StreetMode.BICYCLE; - else mode = StreetMode.WALK; + StreetMode accessMode = LegMode.getDominantStreetMode(request.request.accessModes); + StreetMode directMode = LegMode.getDominantStreetMode(request.request.directModes); - LOG.info("Maximum number of rides: {}", request.request.maxRides); - LOG.info("Maximum trip duration: {}", request.request.maxTripDurationMinutes); + if (request.request.transitModes.isEmpty()) { + // non-transit search + LinkedPointSet linkedDestinations = destinations.link(network.streetLayer, directMode); - final LinkedPointSet linkedTargets = targets.link(network.streetLayer, mode); + StreetRouter sr = new StreetRouter(network.streetLayer); + sr.timeLimitSeconds = request.request.maxTripDurationMinutes * 60; + sr.streetMode = directMode; + sr.profileRequest = request.request; + sr.setOrigin(request.request.fromLat, request.request.fromLon); + sr.dominanceVariable = StreetRouter.State.RoutingVariable.DURATION_SECONDS; + sr.route(); - StreetRouter sr = new StreetRouter(network.streetLayer); - sr.distanceLimitMeters = 2000; - sr.setOrigin(request.request.fromLat, request.request.fromLon); - sr.dominanceVariable = StreetRouter.State.RoutingVariable.DISTANCE_MILLIMETERS; - sr.route(); + int offstreetWalkSpeedMillimetersPerSecond = (int) (request.request.getSpeed(directMode) * 1000); + int[] travelTimes = linkedDestinations.eval(sr::getTravelTimeToVertex, offstreetWalkSpeedMillimetersPerSecond).travelTimes; - TIntIntMap reachedStops = sr.getReachedStops(); + double accessibility = 0; + for (int y = 0, index1d = 0; y < grid.height; y++) { + for (int x = 0; x < grid.width; x++, index1d++) { + if (travelTimes[index1d] < request.cutoffMinutes * 60) { + accessibility += grid.grid[x][y]; + } + } + } - // convert millimeters to seconds - int millimetersPerSecond = (int) (request.request.walkSpeed * 1000); - for (TIntIntIterator it = reachedStops.iterator(); it.hasNext();) { - it.advance(); - it.setValue(it.value() / millimetersPerSecond); - } + finish(new int[] { (int) accessibility }); // no uncertainty so no bootstraps + } else { + // always walk at egress + LinkedPointSet linkedDestinationsEgress = destinations.link(network.streetLayer, StreetMode.WALK); + // if the access mode is also walk, the link function will use its cache to return the same linkedpointset + // NB Should use direct mode but then we'd have to run the street search twice. + if (!request.request.directModes.equals(request.request.accessModes)) { + LOG.warn("Disparate direct modes and access modes are not supported in analysis mode."); + } + LinkedPointSet linkedDestinationsAccess = destinations.link(network.streetLayer, accessMode); - FastRaptorWorker router = new FastRaptorWorker(network.transitLayer, request.request, reachedStops); - - // Run the raptor algorithm - int[][] timesAtStopsEachIteration = router.route(); - - // Do propagation - int[] nonTransferTravelTimesToStops = linkedTargets.eval(sr::getTravelTimeToVertex).travelTimes; - Propagater propagater = - new Propagater(timesAtStopsEachIteration, nonTransferTravelTimesToStops, linkedTargets, request.request); - - // save the instantaneous accessibility at each minute/iteration, later we will use this to compute probabilities - // of improvement. - // This means we have the fungibility issue described in AndrewOwenMeanGridStatisticComputer. - int[] accessibilityPerIteration = propagater.propagate(times -> { - int access = 0; - // times in row-major order, convert to grid coordinates - // TODO use consistent grids for all data in a project - for (int gridy = 0; gridy < grid.height; gridy++) { - int reqy = gridy + grid.north - request.north; - if (reqy < 0 || reqy >= request.height) continue; // outside of project bounds - - for (int gridx = 0; gridx < grid.width; gridx++) { - int reqx = gridx + grid.west - request.west; - if (reqx < 0 || reqx >= request.width) continue; // outside of project bounds - - int index = reqy * request.width + reqx; - if (times[index] < request.cutoffMinutes * 60) { - access += grid.grid[gridx][gridy]; - } + // Transit search, run the raptor algorithm to get times at each destination for each iteration + LOG.info("Maximum number of rides: {}", request.request.maxRides); + LOG.info("Maximum trip duration: {}", request.request.maxTripDurationMinutes); + + // first, find the access stops + StreetRouter sr = new StreetRouter(network.streetLayer); + sr.profileRequest = request.request; + + TIntIntMap accessTimes; + int offstreetTravelSpeedMillimetersPerSecond = (int) (request.request.getSpeed(accessMode) * 1000); + int[] nonTransitTravelTimesToDestinations; + + if (request.request.accessModes.contains(LegMode.CAR_PARK)) { + //Currently first search from origin to P+R is hardcoded as time dominance variable for Max car time seconds + //Second search from P+R to stops is not actually a search we just return list of all reached stops for each found P+R. + // If multiple P+Rs reach the same stop, only one with shortest time is returned. Stops were searched for during graph building phase. + // time to stop is time from CAR streetrouter to stop + CAR PARK time + time to walk to stop based on request walk speed + //by default 20 CAR PARKS are found it can be changed with sr.maxVertices variable + sr = PointToPointQuery.findParkRidePath(request.request, sr, network.transitLayer); + + if (sr == null) { + // Origin not found. Return an empty access times map, as is done by the other conditions for other modes. + // TODO this is ugly. we should have a way to break out of the search early (here and in other methods). + accessTimes = new TIntIntHashMap(); + } else { + accessTimes = sr.getReachedStops(); + } + + // disallow non-transit access + // TODO should we allow non transit access with park and ride? + nonTransitTravelTimesToDestinations = new int[linkedDestinationsAccess.size()]; + Arrays.fill(nonTransitTravelTimesToDestinations, RaptorWorker.UNREACHED); + } else if (accessMode == StreetMode.WALK) { + // Special handling for walk search, find distance in seconds and divide to match behavior at egress + // (in stop trees). For bike/car searches this is immaterial as the access searches are already asymmetric. + sr.streetMode = accessMode; + sr.distanceLimitMeters = 2000; // TODO hardwired same as traveltimesurfacecomputer + sr.setOrigin(request.request.fromLat, request.request.fromLon); + sr.dominanceVariable = StreetRouter.State.RoutingVariable.DISTANCE_MILLIMETERS; + sr.route(); + + // Get the travel times to all stops reached in the initial on-street search. Convert distances to speeds. + // getReachedStops returns distances in this case since dominance variable is millimeters, + // so convert to times in the loop below + accessTimes = sr.getReachedStops(); + for (TIntIntIterator it = accessTimes.iterator(); it.hasNext(); ) { + it.advance(); + it.setValue(it.value() / offstreetTravelSpeedMillimetersPerSecond); } + // again, use distance / speed rather than time for symmetry with other searches + final StreetRouter effectivelyFinalSr = sr; + nonTransitTravelTimesToDestinations = + linkedDestinationsAccess.eval(v -> { + StreetRouter.State state = effectivelyFinalSr.getStateAtVertex(v); + if (state == null) return RaptorWorker.UNREACHED; + else return state.distance / offstreetTravelSpeedMillimetersPerSecond; + }, + offstreetTravelSpeedMillimetersPerSecond).travelTimes; + } else { + // Other modes are already asymmetric with the egress/stop trees, so just do a time-based on street + // search and don't worry about distance limiting. + sr.streetMode = accessMode; + sr.timeLimitSeconds = request.request.getMaxAccessTime(accessMode); + sr.setOrigin(request.request.fromLat, request.request.fromLon); + sr.dominanceVariable = StreetRouter.State.RoutingVariable.DURATION_SECONDS; + sr.route(); + accessTimes = sr.getReachedStops(); // already in seconds + nonTransitTravelTimesToDestinations = + linkedDestinationsAccess.eval(sr::getTravelTimeToVertex, offstreetTravelSpeedMillimetersPerSecond) + .travelTimes; } - return Math.round(access); - }); + FastRaptorWorker router = new FastRaptorWorker(network.transitLayer, request.request, accessTimes); + + // Run the raptor algorithm + int[][] timesAtStopsEachIteration = router.route(); + + // compute bootstrap weights, see comments in Javadoc detailing how we compute the weights we're using + // the Mersenne Twister is a fast, high-quality RNG well-suited to Monte Carlo situations + MersenneTwister twister = new MersenneTwister(); + + // This stores the number of times each Monte Carlo draw is included in each bootstrap sample, which could be + // 0, 1 or more. We store the weights on each iteration rather than a list of iterations because it allows + // us to easily construct the weights s.t. they sum to the original number of MC draws. + int[][] bootstrapWeights = new int[N_BOOTSTRAP_REPLICATIONS + 1][router.nMinutes * router.monteCarloDrawsPerMinute]; + + Arrays.fill(bootstrapWeights[0], 1); // equal weight to all observations for first sample + + for (int bootstrap = 1; bootstrap < bootstrapWeights.length; bootstrap++) { + for (int minute = 0; minute < router.nMinutes; minute++) { + for (int draw = 0; draw < router.monteCarloDrawsPerMinute; draw++) { + int iteration = minute * router.monteCarloDrawsPerMinute + twister.nextInt(router.monteCarloDrawsPerMinute); + bootstrapWeights[bootstrap][iteration]++; + } + } + } + + // the minimum number of times a destination must be reachable in a single bootstrap sample to be considered + // reachable. + int minCount = (int) (router.nMinutes * router.monteCarloDrawsPerMinute * (request.travelTimePercentile / 100d)); + + // Do propagation of travel times from transit stops to the destinations + PerTargetPropagater propagater = + new PerTargetPropagater(timesAtStopsEachIteration, nonTransitTravelTimesToDestinations, linkedDestinationsEgress, request.request, request.cutoffMinutes * 60); + + // store the accessibility results for each bootstrap replication + double[] bootstrapReplications = new double[N_BOOTSTRAP_REPLICATIONS + 1]; + + // The lambda will be called with a boolean array of whether the target is reachable within the cutoff at each + // Monte Carlo draw. These are then bootstrapped to create the sampling distribution. + propagater.propagate((target, reachable) -> { + int gridx = target % grid.width; + int gridy = target / grid.width; + double opportunityCountAtTarget = grid.grid[gridx][gridy]; + + // as an optimization, don't even bother to compute the sampling distribution at cells that contain no + // opportunities. + if (opportunityCountAtTarget < 1e-6) return; + + // index the Monte Carlo iterations in which the destination was reached within the travel time cutoff, + // so we can skip over the non-reachable ones in bootstrap computations. + // this improves computation speed (verified) + TIntList reachableInIterationsList = new TIntArrayList(); + + for (int i = 0; i < reachable.length; i++) { + if (reachable[i]) reachableInIterationsList.add(i); + } + + int[] reachableInIterations = reachableInIterationsList.toArray(); + + boolean isAlwaysReachableWithinTravelTimeCutoff = + reachableInIterations.length == timesAtStopsEachIteration.length; + boolean isNeverReachableWithinTravelTimeCutoff = reachableInIterations.length == 0; + + // Optimization: only bootstrap if some of the travel times are above the cutoff and some below. + // If a destination is always reachable, it will perforce be reachable always in every bootstrap + // sample, so there is no need to compute the bootstraps, and similarly if it is never reachable. + if (isAlwaysReachableWithinTravelTimeCutoff) { + // this destination is always reachable and will be included in all bootstrap samples, no need to do the + // bootstrapping + // possible optimization: have a variable that persists between calls to this lambda and increment + // that single value, then add that value to each replication at the end; reduces the number of additions. + for (int i = 0; i < bootstrapReplications.length; i++) + bootstrapReplications[i] += grid.grid[gridx][gridy]; + } else if (isNeverReachableWithinTravelTimeCutoff) { + // do nothing, never reachable, does not impact accessibility + } else { + // This origin is sometimes reachable within the time window, do bootstrapping to determine + // the distribution of how often + for (int bootstrap = 0; bootstrap < N_BOOTSTRAP_REPLICATIONS + 1; bootstrap++) { + int count = 0; + for (int iteration : reachableInIterations) { + count += bootstrapWeights[bootstrap][iteration]; + } + + // TODO sigmoidal rolloff here, to avoid artifacts from large destinations that jump a few seconds + // in or out of the cutoff. + if (count > minCount) { + bootstrapReplications[bootstrap] += opportunityCountAtTarget; + } + } + } + }); + + // round (not cast/floor) these all to ints. + int[] intReplications = DoubleStream.of(bootstrapReplications).mapToInt(d -> (int) Math.round(d)).toArray(); + finish(intReplications); + } + } + + private void finish (int[] samples) throws IOException { // now construct the output // these things are tiny, no problem storing in memory ByteArrayOutputStream baos = new ByteArrayOutputStream(); - new Origin(request, accessibilityPerIteration).write(baos); + new Origin(request, samples).write(baos); // send this origin to an SQS queue as a binary payload; it will be consumed by GridResultConsumer // and GridResultAssembler diff --git a/src/main/java/com/conveyal/r5/analyst/ImprovementProbabilityGridStatisticComputer.java b/src/main/java/com/conveyal/r5/analyst/ImprovementProbabilityGridStatisticComputer.java index 492a70565..f2e751250 100644 --- a/src/main/java/com/conveyal/r5/analyst/ImprovementProbabilityGridStatisticComputer.java +++ b/src/main/java/com/conveyal/r5/analyst/ImprovementProbabilityGridStatisticComputer.java @@ -14,107 +14,28 @@ * used to create grid B will produce a higher accessibility value than the scenario used to compute grid A. Since this * uses instantaneous accessibility (based on the accessibility at a single minute and Monte Carlo draw), it is in the same * family as the AndrewOwenMeanGridStatisticComputer, and has the same fungibility concerns documented there. + * + * Note that this does not extend GridStatisticComputer as it works with two access grids simultaneously. */ -public class ImprovementProbabilityGridStatisticComputer { - private static final AmazonS3 s3 = new AmazonS3Client(); - - /** Version of the access grid format we read */ - private static final int ACCESS_GRID_VERSION = 0; - - /** - * Calculate the probability at each origin that a random individual sample from regional analysis B is larger than one from regional - * analysis A. We do this empirically and exhaustively by for each origin looping over every possible combination of - * samples and taking a difference, then evaluating the number that yielded results greater than zero. - * - * The regional analysis access grids must be of identical size and zoom level, and a Grid object (the same as is used - * for destination grids) will be returned, with probabilities scaled from 0 to 100,000. - */ - public static Grid computeImprovementProbability (String resultBucket, String regionalAnalysisAKey, String regionalAnalysisBKey) throws IOException { - S3Object baseGrid = s3.getObject(resultBucket, regionalAnalysisAKey); - S3Object scenarioGrid = s3.getObject(resultBucket, regionalAnalysisBKey); - LittleEndianDataInputStream base = new LittleEndianDataInputStream(new GZIPInputStream(baseGrid.getObjectContent())); - LittleEndianDataInputStream scenario = new LittleEndianDataInputStream(new GZIPInputStream(scenarioGrid.getObjectContent())); - validateHeaderAndVersion(base); - validateHeaderAndVersion(scenario); - - int zoom = base.readInt(); - int west = base.readInt(); - int north = base.readInt(); - int width = base.readInt(); - int height = base.readInt(); - - int scenarioZoom = scenario.readInt(); - int scenarioWest = scenario.readInt(); - int scenarioNorth = scenario.readInt(); - int scenarioWidth = scenario.readInt(); - int scenarioHeight = scenario.readInt(); - - if (zoom != scenarioZoom || - west != scenarioWest || - north != scenarioNorth || - width != scenarioWidth || - height != scenarioHeight) { - throw new IllegalArgumentException("Grid sizes for comparison must be identical!"); - } - - // number of iterations need not be equal, the computed probability is still valid even if they are not - // as the probability of choosing any particular sample is still uniform within each scenario. - int baseIterations = base.readInt(); - int scenarioIterations = scenario.readInt(); - - Grid out = new Grid(zoom, width, height, north, west); - - // pixels are in row-major order, iterate over y on outside - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) { - int[] baseValues = new int[baseIterations]; - int[] scenarioValues = new int[scenarioIterations]; - int positive = 0; - - for (int iteration = 0, val = 0; iteration < baseIterations; iteration++) { - baseValues[iteration] = (val += base.readInt()); - } - - for (int iteration = 0, val = 0; iteration < scenarioIterations; iteration++) { - scenarioValues[iteration] = (val += scenario.readInt()); - } - - // loop over all scenarion and base values for this cell and determine how many have positive difference - // this is effectively a convolution, we are calculating the difference (a special case of the sum) for - // two empirical distributions. At some point in the future we could save the probability distribution of - // changes rather than the probability of improvement. - for (int scenarioVal : scenarioValues) { - for (int baseVal : baseValues) { - if (scenarioVal > baseVal) positive++; - } - } - - // Store as probabilities on 0 to 100,000 scale, and store in the grid. - // We are using the same classes used for opportunity grids here, so they are stored as doubles, and - // will be cast to ints when saved. This is to avoid rounding error when projecting opportunities into - // the grids, which we're not doing here, but these grids are small enough the use of doubles instead of ints - // is relatively harmless. - out.grid[x][y] = (double) positive / (scenarioIterations * baseIterations) * 100_000; +public class ImprovementProbabilityGridStatisticComputer extends DualGridStatisticComputer { + @Override + protected double computeValuesForOrigin(int x, int y, int[] aValues, int[] bValues) { + int positive = 0; + // loop over all scenario and base values for this cell and determine how many have positive difference + // this is effectively a convolution, we are calculating the difference (a special case of the sum) for + // two empirical distributions. At some point in the future we could save the probability distribution of + // changes rather than the probability of improvement. + for (int scenarioVal : bValues) { + for (int baseVal : aValues) { + if (scenarioVal > baseVal) positive++; } } - return out; - } - - private static void validateHeaderAndVersion(LittleEndianDataInputStream input) throws IOException { - char[] header = new char[8]; - for (int i = 0; i < 8; i++) { - header[i] = (char) input.readByte(); - } - - if (!"ACCESSGR".equals(new String(header))) { - throw new IllegalArgumentException("Input not in access grid format!"); - } - - int version = input.readInt(); - - if (version != ACCESS_GRID_VERSION) { - throw new IllegalArgumentException(String.format("Version mismatch of access grids, expected %s, found %s", ACCESS_GRID_VERSION, version)); - } + // Store as probabilities on 0 to 100,000 scale, and store in the grid. + // We are using the same classes used for opportunity grids here, so they are stored as doubles, and + // will be cast to ints when saved. This is to avoid rounding error when projecting opportunities into + // the grids, which we're not doing here, but these grids are small enough the use of doubles instead of ints + // is relatively harmless. + return (double) positive / (aValues.length * bValues.length) * 100_000; } } diff --git a/src/main/java/com/conveyal/r5/analyst/PercentileGridStatisticComputer.java b/src/main/java/com/conveyal/r5/analyst/PercentileGridStatisticComputer.java index 39d4cbba5..fee905e55 100644 --- a/src/main/java/com/conveyal/r5/analyst/PercentileGridStatisticComputer.java +++ b/src/main/java/com/conveyal/r5/analyst/PercentileGridStatisticComputer.java @@ -3,6 +3,7 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.S3Object; +import com.conveyal.r5.analyst.scenario.GridStatisticComputer; import com.google.common.io.LittleEndianDataInputStream; import com.google.common.primitives.Ints; import org.apache.commons.math3.stat.descriptive.rank.Percentile; @@ -16,70 +17,22 @@ * Take an access grid, which is a grid containing a vector of accessibility at each origin, and collapse to grid of * scalars containing a percentile of accessibility for each origin. */ -public class PercentileGridStatisticComputer { - private static final AmazonS3 s3 = new AmazonS3Client(); +public class PercentileGridStatisticComputer extends GridStatisticComputer { + public final double percentile; - /** Version of the access grid format we read */ - private static final int ACCESS_GRID_VERSION = 0; - - /** Compute a particular percentile of a grid (percentile between 0 and 100) */ - public static Grid computePercentile (String resultsBucket, String key, double percentile) throws IOException { - percentile /= 100; - - if (percentile < 0 || percentile > 1) { - throw new IllegalArgumentException("Invalid percentile!"); - } - - S3Object accessGrid = s3.getObject(resultsBucket, key); - - LittleEndianDataInputStream input = new LittleEndianDataInputStream(new GZIPInputStream(accessGrid.getObjectContent())); - - char[] header = new char[8]; - for (int i = 0; i < 8; i++) { - header[i] = (char) input.readByte(); - } - - if (!"ACCESSGR".equals(new String(header))) { - throw new IllegalArgumentException("Input not in access grid format!"); - } - - int version = input.readInt(); - - if (version != ACCESS_GRID_VERSION) { - throw new IllegalArgumentException(String.format("Version mismatch of access grids, expected %s, found %s", ACCESS_GRID_VERSION, version)); - } - - int zoom = input.readInt(); - int west = input.readInt(); - int north = input.readInt(); - int width = input.readInt(); - int height = input.readInt(); - int nIterations = input.readInt(); - - Grid grid = new Grid(zoom, width, height, north, west); - - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) { - int[] values = new int[nIterations]; - - for (int iteration = 0, val = 0; iteration < nIterations; iteration++) { - values[iteration] = (val += input.readInt()); - } - - // compute percentiles - Arrays.sort(values); - double index = percentile * (values.length - 1); - int lowerIndex = (int) Math.floor(index); - int upperIndex = (int) Math.ceil(index); - double remainder = index % 1; - // weight the upper and lower values by where the percentile falls between them (linear interpolation) - double val = values[upperIndex] * remainder + values[lowerIndex] * (1 - remainder); - grid.grid[x][y] = val; - } - } - - input.close(); + public PercentileGridStatisticComputer (double percentile) { + this.percentile = percentile; + } - return grid; + @Override + protected double computeValueForOrigin(int x, int y, int[] valuesThisOrigin) { + // compute percentiles + Arrays.sort(valuesThisOrigin); + double index = percentile / 100 * (valuesThisOrigin.length - 1); + int lowerIndex = (int) Math.floor(index); + int upperIndex = (int) Math.ceil(index); + double remainder = index % 1; + // weight the upper and lower values by where the percentile falls between them (linear interpolation) + return valuesThisOrigin[upperIndex] * remainder + valuesThisOrigin[lowerIndex] * (1 - remainder); } } diff --git a/src/main/java/com/conveyal/r5/analyst/TravelTimeSurfaceComputer.java b/src/main/java/com/conveyal/r5/analyst/TravelTimeSurfaceComputer.java new file mode 100644 index 000000000..a3fdd031b --- /dev/null +++ b/src/main/java/com/conveyal/r5/analyst/TravelTimeSurfaceComputer.java @@ -0,0 +1,224 @@ +package com.conveyal.r5.analyst; + +import com.conveyal.r5.analyst.cluster.AccessGridWriter; +import com.conveyal.r5.analyst.cluster.TravelTimeSurfaceRequest; +import com.conveyal.r5.analyst.error.TaskError; +import com.conveyal.r5.api.util.LegMode; +import com.conveyal.r5.common.JsonUtilities; +import com.conveyal.r5.point_to_point.builder.PointToPointQuery; +import com.conveyal.r5.profile.FastRaptorWorker; +import com.conveyal.r5.profile.PerTargetPropagater; +import com.conveyal.r5.profile.RaptorWorker; +import com.conveyal.r5.profile.StreetMode; +import com.conveyal.r5.streets.LinkedPointSet; +import com.conveyal.r5.streets.StreetRouter; +import com.conveyal.r5.transit.TransportNetwork; +import gnu.trove.iterator.TIntIntIterator; +import gnu.trove.map.TIntIntMap; +import gnu.trove.map.hash.TIntIntHashMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Arrays; +import java.util.Collection; +import java.util.stream.IntStream; + +/** + * This computes a travel time surface and returns it in access grid format, with each pixel of the grid containing + * different percentiles of travel time requested by the frontend. + */ +public class TravelTimeSurfaceComputer { + private static final Logger LOG = LoggerFactory.getLogger(TravelTimeSurfaceComputer.class); + private static final WebMercatorGridPointSetCache pointSetCache = new WebMercatorGridPointSetCache(); + + public final TravelTimeSurfaceRequest request; + public final TransportNetwork network; + + public TravelTimeSurfaceComputer (TravelTimeSurfaceRequest request, TransportNetwork network) { + this.request = request; + this.network = network; + } + + public void write (OutputStream os) throws IOException { + StreetMode accessMode = LegMode.getDominantStreetMode(request.request.accessModes); + StreetMode directMode = LegMode.getDominantStreetMode(request.request.directModes); + + int nPercentiles = request.percentiles.length; + + WebMercatorGridPointSet destinations = pointSetCache.get(request.zoom, request.west, request.north, request.width, request.height); + + AccessGridWriter output; + try { + output = new AccessGridWriter(request.zoom, request.west, request.north, request.width, request.height, nPercentiles); + } catch (IOException e) { + // in memory, should not be able to throw this + throw new RuntimeException(e); + } + + if (request.request.transitModes.isEmpty()) { + // non transit search + StreetRouter sr = new StreetRouter(network.streetLayer); + sr.timeLimitSeconds = request.request.maxTripDurationMinutes * 60; + sr.streetMode = directMode; + sr.dominanceVariable = StreetRouter.State.RoutingVariable.DURATION_SECONDS; + sr.profileRequest = request.request; + sr.setOrigin(request.request.fromLat, request.request.fromLon); + sr.route(); + + int offstreetTravelSpeedMillimetersPerSecond = (int) (request.request.getSpeed(directMode) * 1000); + + LinkedPointSet linkedDestinations = destinations.link(network.streetLayer, directMode); + + int[] travelTimesToTargets = linkedDestinations.eval(sr::getTravelTimeToVertex, offstreetTravelSpeedMillimetersPerSecond).travelTimes; + for (int target = 0; target < travelTimesToTargets.length; target++) { + int x = target % request.width; + int y = target / request.width; + + final int travelTimeMinutes = + travelTimesToTargets[target] == RaptorWorker.UNREACHED ? RaptorWorker.UNREACHED : travelTimesToTargets[target] / 60; + // the frontend expects percentiles of travel time. There is no variation in nontransit travel time so + // just replicate the same number repeatedly. This could be improved, but at least it will compress well. + // int divide (floor) used below as well. TODO is this wise? + int[] results = IntStream.range(0, nPercentiles).map(i -> travelTimeMinutes).toArray(); + try { + output.writePixel(x, y, results); + } catch (IOException e) { + // can't happen as we're not using a file system backed output + throw new RuntimeException(e); + } + } + } else { + // we always walk to egress from transit, but we may have a different access mode. + // if the access mode is also walk, the pointset's linkage cache will return two references to the same + // linkage below + // TODO use directMode? Is that a resource limiting issue? + // also gridcomputer uses accessMode to avoid running two street searches + LinkedPointSet linkedDestinationsAccess = destinations.link(network.streetLayer, accessMode); + LinkedPointSet linkedDestinationsEgress = destinations.link(network.streetLayer, StreetMode.WALK); + + if (!request.request.directModes.equals(request.request.accessModes)) { + LOG.warn("Disparate direct modes and access modes are not supported in analysis mode."); + } + + // Perform street search to find transit stops and non-transit times. + StreetRouter sr = new StreetRouter(network.streetLayer); + sr.profileRequest = request.request; + + TIntIntMap accessTimes; + int offstreetTravelSpeedMillimetersPerSecond = (int) (request.request.getSpeed(accessMode) * 1000); + int[] nonTransitTravelTimesToDestinations; + + if (request.request.accessModes.contains(LegMode.CAR_PARK)) { + //Currently first search from origin to P+R is hardcoded as time dominance variable for Max car time seconds + //Second search from P+R to stops is not actually a search we just return list of all reached stops for each found P+R. + // If multiple P+Rs reach the same stop, only one with shortest time is returned. Stops were searched for during graph building phase. + // time to stop is time from CAR streetrouter to stop + CAR PARK time + time to walk to stop based on request walk speed + //by default 20 CAR PARKS are found it can be changed with sr.maxVertices variable + sr = PointToPointQuery.findParkRidePath(request.request, sr, network.transitLayer); + + if (sr == null) { + // Origin not found. Return an empty access times map, as is done by the other conditions for other modes. + // TODO this is ugly. we should have a way to break out of the search early (here and in other methods). + accessTimes = new TIntIntHashMap(); + } else { + accessTimes = sr.getReachedStops(); + } + + // disallow non-transit access + // TODO should we allow non transit access with park and ride? + nonTransitTravelTimesToDestinations = new int[linkedDestinationsAccess.size()]; + Arrays.fill(nonTransitTravelTimesToDestinations, RaptorWorker.UNREACHED); + } else if (accessMode == StreetMode.WALK) { + // Special handling for walk search, find distance in seconds and divide to match behavior at egress + // (in stop trees). For bike/car searches this is immaterial as the access searches are already asymmetric. + sr.streetMode = accessMode; + sr.distanceLimitMeters = 2000; // TODO hardwired same as gridcomputer + sr.setOrigin(request.request.fromLat, request.request.fromLon); + sr.dominanceVariable = StreetRouter.State.RoutingVariable.DISTANCE_MILLIMETERS; + sr.route(); + + // Get the travel times to all stops reached in the initial on-street search. Convert distances to speeds. + // getReachedStops returns distances in this case since dominance variable is millimeters, + // so convert to times in the loop below + accessTimes = sr.getReachedStops(); + for (TIntIntIterator it = accessTimes.iterator(); it.hasNext(); ) { + it.advance(); + it.setValue(it.value() / offstreetTravelSpeedMillimetersPerSecond); + } + + // again, use distance / speed rather than time for symmetry with other searches + final StreetRouter effectivelyFinalSr = sr; + nonTransitTravelTimesToDestinations = + linkedDestinationsAccess.eval(v -> { + StreetRouter.State state = effectivelyFinalSr.getStateAtVertex(v); + if (state == null) return RaptorWorker.UNREACHED; + else return state.distance / offstreetTravelSpeedMillimetersPerSecond; + }, + offstreetTravelSpeedMillimetersPerSecond).travelTimes; + } else { + // Other modes are already asymmetric with the egress/stop trees, so just do a time-based on street + // search and don't worry about distance limiting. + sr.streetMode = accessMode; + sr.timeLimitSeconds = request.request.getMaxAccessTime(accessMode); + sr.setOrigin(request.request.fromLat, request.request.fromLon); + sr.dominanceVariable = StreetRouter.State.RoutingVariable.DURATION_SECONDS; + sr.route(); + accessTimes = sr.getReachedStops(); // already in seconds + nonTransitTravelTimesToDestinations = + linkedDestinationsAccess.eval(sr::getTravelTimeToVertex, offstreetTravelSpeedMillimetersPerSecond) + .travelTimes; + } + + // Create a new Raptor Worker. + FastRaptorWorker worker = new FastRaptorWorker(network.transitLayer, request.request, accessTimes); + + // Run the main RAPTOR algorithm to find paths and travel times to all stops in the network. + int[][] transitTravelTimesToStops = worker.route(); + + PerTargetPropagater perTargetPropagater = new PerTargetPropagater(transitTravelTimesToStops, nonTransitTravelTimesToDestinations, linkedDestinationsEgress, request.request, 120 * 60); + perTargetPropagater.propagateTimes((target, times) -> { + // sort the times at each target and read off percentiles + Arrays.sort(times); + int[] results = new int[nPercentiles]; + + for (int i = 0; i < nPercentiles; i++) { + int offset = (int) Math.round(request.percentiles[i] / 100d * times.length); + // Int divide will floor; this is correct because value 0 has travel times of up to one minute, etc. + // This means that anything less than a cutoff of (say) 60 minutes (in seconds) will have value 59, + // which is what we want. But maybe this is tying the backend and frontend too closely. + results[i] = times[offset] == RaptorWorker.UNREACHED ? RaptorWorker.UNREACHED : times[offset] / 60; + } + + int x = target % request.width; + int y = target / request.width; + try { + output.writePixel(x, y, results); + } catch (IOException e) { + // can't happen as we're not using a file system backed output + throw new RuntimeException(e); + } + }); + } + + LOG.info("Travel time surface of size {}kb complete", output.getBytes().length / 1000); + + os.write(output.getBytes()); + + LOG.info("Travel time surface written, appending metadata with {} warnings", network.scenarioApplicationWarnings.size()); + + // Append scenario application warning JSON to result + ResultMetadata metadata = new ResultMetadata(); + metadata.scenarioApplicationWarnings = network.scenarioApplicationWarnings; + JsonUtilities.objectMapper.writeValue(os, metadata); + + LOG.info("Done writing"); + + os.close(); + } + + private static class ResultMetadata { + public Collection scenarioApplicationWarnings; + } +} diff --git a/src/main/java/com/conveyal/r5/analyst/WebMercatorGridPointSetCache.java b/src/main/java/com/conveyal/r5/analyst/WebMercatorGridPointSetCache.java index 69143f973..e165c0e6a 100644 --- a/src/main/java/com/conveyal/r5/analyst/WebMercatorGridPointSetCache.java +++ b/src/main/java/com/conveyal/r5/analyst/WebMercatorGridPointSetCache.java @@ -27,6 +27,10 @@ public WebMercatorGridPointSet get (int zoom, int west, int north, int width, in return cache.computeIfAbsent(key, GridKey::toPointset); } + public WebMercatorGridPointSet get(Grid grid) { + return get(grid.zoom, grid.west, grid.north, grid.width, grid.height); + } + private static class GridKey { public int zoom; public int west; diff --git a/src/main/java/com/conveyal/r5/analyst/broker/Broker.java b/src/main/java/com/conveyal/r5/analyst/broker/Broker.java index 2263c7edc..acdc0527d 100644 --- a/src/main/java/com/conveyal/r5/analyst/broker/Broker.java +++ b/src/main/java/com/conveyal/r5/analyst/broker/Broker.java @@ -12,6 +12,7 @@ import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Multimap; import com.google.common.collect.TreeMultimap; +import com.google.common.io.ByteStreams; import gnu.trove.map.TIntObjectMap; import gnu.trove.map.TObjectLongMap; import gnu.trove.map.hash.TIntObjectHashMap; @@ -24,6 +25,7 @@ import org.slf4j.LoggerFactory; import java.io.*; +import java.text.MessageFormat; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.*; @@ -182,7 +184,7 @@ public Broker (Properties brokerConfig, String addr, int port) { workerConfig.setProperty("auto-shutdown", "true"); } - // TODO what are these for? + // Tags for the workers workerName = brokerConfig.getProperty("worker-name") != null ? brokerConfig.getProperty("worker-name") : "analyst-worker"; project = brokerConfig.getProperty("project") != null ? brokerConfig.getProperty("project") : "analyst"; @@ -349,7 +351,7 @@ public void createWorkersInCategory (WorkerCategory category) { return; } - // TODO: compute + // TODO: should we start multiple workers on large jobs? int nWorkers = 1; // There are no workers on this graph with the right worker commit, start some. @@ -365,13 +367,10 @@ public void createWorkersInCategory (WorkerCategory category) { // It's fine to just modify the worker config without a protective copy because this method is synchronized. workerConfig.setProperty("initial-graph-id", category.graphId); - workerConfig.setProperty("worker-version", category.workerVersion); // Tell the worker where to get its R5 JAR. This is a Conveyal S3 bucket with HTTP access turned on. String workerDownloadUrl = String.format("https://r5-builds.s3.amazonaws.com/%s.jar", category.workerVersion); - workerConfig.setProperty("download-url", workerDownloadUrl); - // This is the R5 broker, so always start R5 workers (rather than OTP workers). - workerConfig.setProperty("main-class", AnalystWorker.class.getName()); + ByteArrayOutputStream cfg = new ByteArrayOutputStream(); try { workerConfig.store(cfg, "Worker config"); @@ -380,9 +379,29 @@ public void createWorkersInCategory (WorkerCategory category) { throw new RuntimeException(e); } - // Send the config to the new workers as EC2 "user data" - String userData = new String(Base64.getEncoder().encode(cfg.toByteArray())); - req.setUserData(userData); + // Read in the startup script + // We used to just pass the config to custom AMI, but by constructing a startup script that initializes a stock + // Amazon Linux AMI, we don't have to worry about maintaining and keeping our AMI up to date. Amazon Linux applies + // important security updates on startup automatically. + try { + String workerConfigString = cfg.toString(); + InputStream scriptIs = Broker.class.getClassLoader().getResourceAsStream("worker.sh"); + ByteArrayOutputStream scriptBaos = new ByteArrayOutputStream(); + ByteStreams.copy(scriptIs, scriptBaos); + scriptIs.close(); + scriptBaos.close(); + String scriptTemplate = scriptBaos.toString(); + + String logGroup = workerConfig.getProperty("log-group"); + + String script = MessageFormat.format(scriptTemplate, workerDownloadUrl, logGroup, workerConfigString); + + // Send the config to the new workers as EC2 "user data" + String userData = new String(Base64.getEncoder().encode(script.getBytes())); + req.setUserData(userData); + } catch (IOException e) { + throw new RuntimeException(e); + } if (brokerConfig.getProperty("worker-iam-role") != null) req.setIamInstanceProfile(new IamInstanceProfileSpecification().withArn(brokerConfig.getProperty("worker-iam-role"))); @@ -398,7 +417,7 @@ public void createWorkersInCategory (WorkerCategory category) { RunInstancesResult res = ec2.runInstances(req); res.getReservation().getInstances().forEach(i -> { Collection tags = Arrays.asList( - new Tag("name", workerName), + new Tag("Name", workerName), new Tag("project", project) ); i.setTags(tags); diff --git a/src/main/java/com/conveyal/r5/analyst/broker/BrokerHttpHandler.java b/src/main/java/com/conveyal/r5/analyst/broker/BrokerHttpHandler.java index 54a9308b2..5282841fc 100644 --- a/src/main/java/com/conveyal/r5/analyst/broker/BrokerHttpHandler.java +++ b/src/main/java/com/conveyal/r5/analyst/broker/BrokerHttpHandler.java @@ -205,6 +205,8 @@ else if (request.getMethod() == Method.POST && "complete".equals(command)) { // believes to be a reliable work result. suspendedProducerResponse.setStatus(HttpStatus.OK_200); suspendedProducerResponse.setContentType(request.getContentType()); + String contentEncoding = request.getHeader("Content-Encoding"); + if (contentEncoding != null) suspendedProducerResponse.setHeader("Content-Encoding", contentEncoding); } else { // The worker is providing an error message because it spotted something wrong with the request. suspendedProducerResponse.setStatus(Integer.parseInt(successOrError)); diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/AccessGridWriter.java b/src/main/java/com/conveyal/r5/analyst/cluster/AccessGridWriter.java new file mode 100644 index 000000000..42b6d19aa --- /dev/null +++ b/src/main/java/com/conveyal/r5/analyst/cluster/AccessGridWriter.java @@ -0,0 +1,151 @@ +package com.conveyal.r5.analyst.cluster; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; + +/** + * Random-access access grid writer - write arrays into offsets in an access grid format file (3-d array) + * * Access grids look like this: + * Header (ASCII text "ACCESSGR") (note that this header is eight bytes, so the full grid can be mapped into a + * Javascript typed array if desired) + * Version, 4-byte integer + * (4 byte int) Web mercator zoom level + * (4 byte int) west (x) edge of the grid, i.e. how many pixels this grid is east of the left edge of the world + * (4 byte int) north (y) edge of the grid, i.e. how many pixels this grid is south of the top edge of the world + * (4 byte int) width of the grid in pixels + * (4 byte int) height of the grid in pixels + * (4 byte int) number of values per pixel + * (repeated 4-byte int) values of each pixel in row major order. Values within a given pixel are delta coded. + */ +public class AccessGridWriter { + public static final Logger LOG = LoggerFactory.getLogger(GridResultAssembler.class); + + /** The version of the access grids we produce */ + public static final int VERSION = 0; + + /** The version of the origins we consume */ + public static final int ORIGIN_VERSION = 0; + + /** The offset to get to the data section of the access grid file */ + public static final long HEADER_SIZE = 9 * 4; + + private BufferAbstraction buffer; + + // store as longs so we can use with impunity without fear of overflow + private final long zoom, west, north, width, height, nValuesPerPixel; + + /** Create a new access grid writer for a width x height x nValuesPerPixel array. */ + public AccessGridWriter (int zoom, int west, int north, int width, int height, int nValuesPerPixel) throws IOException { + this(null, zoom, west, north, width, height, nValuesPerPixel); + } + + /** Create a file-backed AccessGridWriter */ + public AccessGridWriter (File gridFile, int zoom, int west, int north, int width, int height, int nValuesPerPixel) throws IOException { + this.zoom = zoom; + this.west = west; + this.north = north; + this.width = width; + this.height = height; + this.nValuesPerPixel = nValuesPerPixel; + + long nBytes = (long) width * height * nValuesPerPixel * 4 + HEADER_SIZE; + this.buffer = new BufferAbstraction(gridFile, nBytes); + + buffer.writeString(0, "ACCESSGR"); + buffer.writeInt(8, VERSION); + buffer.writeInt(12, zoom); + buffer.writeInt(16, west); + buffer.writeInt(20, north); + buffer.writeInt(24, width); + buffer.writeInt(28, height); + buffer.writeInt(32, nValuesPerPixel); + } + + public void writePixel (int x, int y, int[] pixelValues) throws IOException { + if (pixelValues.length != nValuesPerPixel) throw new IllegalArgumentException("Incorrect pixel size!"); + + long index1d = (y * width + x) * nValuesPerPixel * 4 + 36; + + int prev = 0; + for (int i : pixelValues) { + buffer.writeInt(index1d, i - prev); + prev = i; + index1d += 4; + } + } + + public void close () throws IOException { + this.buffer.close(); + } + + public byte[] getBytes () { + return buffer.getBytes(); + } + + private static class BufferAbstraction { + private RandomAccessFile file; + private byte[] buffer; + + public BufferAbstraction (File file, long length) throws IOException { + if (file != null) { + this.file = new RandomAccessFile(file, "rw"); + this.file.setLength(length); + // clear the contents of the file, don't accidentally leak disk contents + for (long i = 0; i < length; i++) { + this.file.writeByte(0); + } + } else { + if (length > Integer.MAX_VALUE) throw new IllegalArgumentException("Attempt to create in memory buffer larger than 2GB"); + this.buffer = new byte[(int) length]; + } + } + + public void writeInt (long offset, int value) throws IOException { + byte a = (byte) value; + byte b = (byte) (value >> 8); + byte c = (byte) (value >> 16); + byte d = (byte) (value >> 24); + + if (file != null) { + file.seek(offset); + // can't use file.writeInt, as it is in typical Java fashion big-endian . . . + file.writeByte(a); + file.writeByte(b); + file.writeByte(c); + file.writeByte(d); + } else { + int offsetInt = (int) offset; + buffer[offsetInt] = a; + buffer[offsetInt + 1] = b; + buffer[offsetInt + 2] = c; + buffer[offsetInt + 3] = d; + } + } + + public void writeString (long offset, String value) throws IOException { + if (file != null) { + file.seek(offset); + file.writeBytes(value); + } else { + int offsetInt = (int) offset; + byte[] bytes = value.getBytes(); + for (byte byt : bytes) { + buffer[offsetInt++] = byt; + } + } + } + + public byte[] getBytes () { + if (buffer != null) return buffer; + else throw new UnsupportedOperationException("Attempt to retrieve bytes for on-disk access grid."); + } + + public void close () throws IOException { + if (this.file != null) this.file.close(); + } + } +} diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/AnalystWorker.java b/src/main/java/com/conveyal/r5/analyst/cluster/AnalystWorker.java index 8b8db548f..7de3256f3 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/AnalystWorker.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/AnalystWorker.java @@ -6,6 +6,7 @@ import com.amazonaws.services.s3.AmazonS3Client; import com.conveyal.r5.analyst.GridCache; import com.conveyal.r5.analyst.GridComputer; +import com.conveyal.r5.analyst.TravelTimeSurfaceComputer; import com.conveyal.r5.analyst.error.ScenarioApplicationException; import com.conveyal.r5.analyst.error.TaskError; import com.conveyal.r5.api.util.LegMode; @@ -248,8 +249,10 @@ public AnalystWorker(Properties config, TransportNetworkCache cache) { public void run() { // Create executors with up to one thread per processor. + // fix size of highPriorityExecutor to avoid broken pipes when threads are killed off before they are done sending data + // (not confirmed if this actually works) int nP = Runtime.getRuntime().availableProcessors(); - highPriorityExecutor = new ThreadPoolExecutor(1, nP, 60, TimeUnit.SECONDS, new ArrayBlockingQueue<>(255)); + highPriorityExecutor = new ThreadPoolExecutor(nP, nP, 60, TimeUnit.SECONDS, new ArrayBlockingQueue<>(255)); highPriorityExecutor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy()); batchExecutor = new ThreadPoolExecutor(1, nP, 60, TimeUnit.SECONDS, new ArrayBlockingQueue<>(nP * 2)); batchExecutor.setRejectedExecutionHandler(new ThreadPoolExecutor.AbortPolicy()); @@ -266,8 +269,11 @@ public void run() { // so the worker needs to not come online until it's ready to process requests. if (networkId != null) { LOG.info("Pre-loading or building network with ID {}", networkId); - transportNetworkCache.getNetwork(networkId); - LOG.info("Done pre-loading network {}", networkId); + if (transportNetworkCache.getNetwork(networkId) == null) { + LOG.error("Failed to pre-load transport network {}", networkId); + } else { + LOG.info("Done pre-loading network {}", networkId); + } } // Start filling the work queues. @@ -404,6 +410,8 @@ private void handleOneRequest(GenericClusterRequest clusterRequest) { this.handleStaticMetadataRequest((StaticMetadata.MetadataRequest) clusterRequest, transportNetwork, ts); } else if (clusterRequest instanceof StaticMetadata.StopTreeRequest) { this.handleStaticStopTrees((StaticMetadata.StopTreeRequest) clusterRequest, transportNetwork, ts); + } else if (clusterRequest instanceof TravelTimeSurfaceRequest) { + this.handleTravelTimeSurfaceRequest((TravelTimeSurfaceRequest) clusterRequest, transportNetwork, ts); } else if (clusterRequest instanceof GridRequest) { this.handleGridRequest((GridRequest) clusterRequest, transportNetwork, ts); } else { @@ -510,6 +518,26 @@ private void handleStaticStopTrees (StaticMetadata.StopTreeRequest request, Tran } } + /** handle a request for a travel time surface */ + public void handleTravelTimeSurfaceRequest (TravelTimeSurfaceRequest request, TransportNetwork network, TaskStatistics ts) { + TravelTimeSurfaceComputer computer = new TravelTimeSurfaceComputer(request, network); + try { + PipedInputStream pis = new PipedInputStream(); + + // gzip the data before sending it to the broker. Compression ratios here are extreme (100x is not uncommon) + // so this will help avoid broken pipes, connection reset by peer, buffer overflows, etc. since these types + // of errors are more common on large files. + GZIPOutputStream pos = new GZIPOutputStream(new PipedOutputStream(pis)); + + finishPriorityTask(request, pis, "application/octet-stream", "gzip"); + + computer.write(pos); + pos.close(); + } catch (IOException e) { + LOG.error("Error writing travel time surface to broker", e); + } + } + /** Handle a request for access from a Web Mercator grid to a web mercator opportunity density grid (used for regional analysis) */ private void handleGridRequest (GridRequest request, TransportNetwork network, TaskStatistics ts) { try { @@ -735,25 +763,32 @@ private void saveBatchTaskResults (AnalystClusterRequest clusterRequest, ResultE } } - /** - * We have two kinds of output from a worker: we can either write to an object in a bucket on S3, or we can stream - * output over HTTP to a waiting web service caller. This function handles the latter case. It connects to the - * cluster broker, signals that the task with a certain ID is being completed, and posts the result back through the - * broker. The broker then passes the result on to the original requester (usually the analysis web UI). - * - * This function will run the HTTP Post operation in a new thread so that this function can return, allowing its - * caller to write data to the input stream it passed in. This arrangement avoids broken pipes that can happen - * when the calling thread dies. TODO clarify when and how which thread can die. - */ public void finishPriorityTask(GenericClusterRequest clusterRequest, InputStream result, String contentType) { + finishPriorityTask(clusterRequest, result, contentType, null); + } + + /** + * We have two kinds of output from a worker: we can either write to an object in a bucket on S3, or we can stream + * output over HTTP to a waiting web service caller. This function handles the latter case. It connects to the + * cluster broker, signals that the task with a certain ID is being completed, and posts the result back through the + * broker. The broker then passes the result on to the original requester (usually the analysis web UI). + * + * This function will run the HTTP Post operation in a new thread so that this function can return, allowing its + * caller to write data to the input stream it passed in. This arrangement avoids broken pipes that can happen + * when the calling thread dies. TODO clarify when and how which thread can die. + */ + public void finishPriorityTask(GenericClusterRequest clusterRequest, InputStream result, String contentType, String contentEncoding) { //CountingInputStream is = new CountingInputStream(result); + LOG.info("Returning high-priority results for task {}", clusterRequest.taskId); + String url = BROKER_BASE_URL + String.format("/complete/success/%s", clusterRequest.taskId); HttpPost httpPost = new HttpPost(url); // TODO reveal any errors etc. that occurred on the worker. httpPost.setEntity(new InputStreamEntity(result)); httpPost.setHeader("Content-Type", contentType); + if (contentEncoding != null) httpPost.setHeader("Content-Encoding", contentEncoding); taskDeliveryExecutor.execute(() -> { try { HttpResponse response = httpClient.execute(httpPost); diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/GenericClusterRequest.java b/src/main/java/com/conveyal/r5/analyst/cluster/GenericClusterRequest.java index 90dd18644..0dbaf4e46 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/GenericClusterRequest.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/GenericClusterRequest.java @@ -21,7 +21,8 @@ @JsonSubTypes.Type(name = "analyst", value = AnalystClusterRequest.class), @JsonSubTypes.Type(name = "grid", value = GridRequest.class), @JsonSubTypes.Type(name = "static-metadata", value = StaticMetadata.MetadataRequest.class), - @JsonSubTypes.Type(name = "static-stop-trees", value = StaticMetadata.StopTreeRequest.class) + @JsonSubTypes.Type(name = "static-stop-trees", value = StaticMetadata.StopTreeRequest.class), + @JsonSubTypes.Type(name = "travel-time-surface", value = TravelTimeSurfaceRequest.class) }) public abstract class GenericClusterRequest implements Serializable { diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/GridRequest.java b/src/main/java/com/conveyal/r5/analyst/cluster/GridRequest.java index da2e9e280..d626645a6 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/GridRequest.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/GridRequest.java @@ -1,6 +1,7 @@ package com.conveyal.r5.analyst.cluster; import com.conveyal.r5.profile.ProfileRequest; +import com.fasterxml.jackson.annotation.JsonInclude; import java.util.List; @@ -29,6 +30,10 @@ public class GridRequest extends GenericClusterRequest { /** Travel time cutoff (minutes) */ public int cutoffMinutes; + /** Percentile of travel time */ + @JsonInclude(JsonInclude.Include.NON_DEFAULT) + public int travelTimePercentile = 50; + public final String type = "grid"; /** Where should output of this job be saved? */ diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/GridResultAssembler.java b/src/main/java/com/conveyal/r5/analyst/cluster/GridResultAssembler.java index 5bfb0dd63..9744e21df 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/GridResultAssembler.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/GridResultAssembler.java @@ -2,8 +2,6 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; -import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.AmazonSQSClient; import com.amazonaws.services.sqs.model.Message; import com.conveyal.r5.analyst.LittleEndianIntOutputStream; import com.google.common.io.ByteStreams; @@ -24,7 +22,6 @@ import java.io.RandomAccessFile; import java.util.Base64; import java.util.BitSet; -import java.util.Locale; import java.util.zip.GZIPOutputStream; /** @@ -134,18 +131,20 @@ public void handleMessage (Message message) { ByteArrayInputStream bais = new ByteArrayInputStream(body); Origin origin = Origin.read(bais); - // if this is not the first request, make sure that we have the correct number of iterations - if (buffer != null && origin.accessibilityPerIteration.length != this.nIterations) { - LOG.error("Origin {}, {} has {} iterations, expected {}", - origin.x, origin.y, origin.accessibilityPerIteration.length, this.nIterations); + // if this is not the first request, make sure that we have the correct number of accessibility + // samples (either instantaneous accessibility values or bootstrap replications of accessibility given median + // travel time, depending on worker version) + if (buffer != null && origin.samples.length != this.nIterations) { + LOG.error("Origin {}, {} has {} samples, expected {}", + origin.x, origin.y, origin.samples.length, this.nIterations); error = true; } // convert to a delta coded byte array - ByteArrayOutputStream pixelByteOutputStream = new ByteArrayOutputStream(origin.accessibilityPerIteration.length * 4); + ByteArrayOutputStream pixelByteOutputStream = new ByteArrayOutputStream(origin.samples.length * 4); LittleEndianDataOutputStream pixelDataOutputStream = new LittleEndianDataOutputStream(pixelByteOutputStream); - for (int i = 0, prev = 0; i < origin.accessibilityPerIteration.length; i++) { - int current = origin.accessibilityPerIteration[i]; + for (int i = 0, prev = 0; i < origin.samples.length; i++) { + int current = origin.samples[i]; pixelDataOutputStream.writeInt(current - prev); prev = current; } @@ -154,7 +153,7 @@ public void handleMessage (Message message) { // write to the proper subregion of the buffer for this origin // use a synchronized block to ensure no threading issues synchronized (this) { - if (buffer == null) this.initialize(origin.accessibilityPerIteration.length); + if (buffer == null) this.initialize(origin.samples.length); // The origins we receive have 2d coordinates. // Flatten them to compute file offsets and for the origin checklist. int index1d = origin.y * request.width + origin.x; diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/Origin.java b/src/main/java/com/conveyal/r5/analyst/cluster/Origin.java index 0afb422b2..99bfd69bd 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/Origin.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/Origin.java @@ -1,6 +1,5 @@ package com.conveyal.r5.analyst.cluster; -import com.conveyal.r5.publish.StaticSiteRequest; import com.google.common.io.LittleEndianDataInputStream; import com.google.common.io.LittleEndianDataOutputStream; import org.slf4j.Logger; @@ -26,14 +25,17 @@ public class Origin { /** Y coordinate of origin within regional analysis */ public int y; - /** The instantaneous accessibility for each iteration */ - public int[] accessibilityPerIteration; + /** + * Samples of accessibility. Depending on worker version, this could be bootstrap replications of accessibility given + * median travel time, or with older workers would be instantaneous accessibility for each departure minute. + */ + public int[] samples; /** Construct an origin given a grid request and the instantaneous accessibility computed for each iteration */ - public Origin (GridRequest request, int[] accessibilityPerIteration) { + public Origin (GridRequest request, int[] samples) { this.x = request.x; this.y = request.y; - this.accessibilityPerIteration = accessibilityPerIteration; + this.samples = samples; } /** allow construction of blank origin for static read method */ @@ -56,9 +58,9 @@ public void write(OutputStream out) throws IOException { data.writeInt(y); // write the number of iterations - data.writeInt(accessibilityPerIteration.length); + data.writeInt(samples.length); - for (int i : accessibilityPerIteration) { + for (int i : samples) { // don't bother to delta code, these are small and we're not gzipping data.writeInt(i); } @@ -91,11 +93,11 @@ public static Origin read (InputStream inputStream) throws IOException { origin.x = data.readInt(); origin.y = data.readInt(); - origin.accessibilityPerIteration = new int[data.readInt()]; + origin.samples = new int[data.readInt()]; - for (int iteration = 0; iteration < origin.accessibilityPerIteration.length; iteration++) { + for (int iteration = 0; iteration < origin.samples.length; iteration++) { // de-delta-code the origin - origin.accessibilityPerIteration[iteration] = data.readInt(); + origin.samples[iteration] = data.readInt(); } data.close(); diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/TravelTimeSurfaceRequest.java b/src/main/java/com/conveyal/r5/analyst/cluster/TravelTimeSurfaceRequest.java new file mode 100644 index 000000000..ddb8ef373 --- /dev/null +++ b/src/main/java/com/conveyal/r5/analyst/cluster/TravelTimeSurfaceRequest.java @@ -0,0 +1,36 @@ +package com.conveyal.r5.analyst.cluster; + +import com.conveyal.r5.profile.ProfileRequest; +import com.fasterxml.jackson.annotation.JsonInclude; + +/** + * Request a travel time surface containing travel times to all destinations for several percentiles of travel time. + */ +public class TravelTimeSurfaceRequest extends GenericClusterRequest { + /** The profile request to use */ + public ProfileRequest request; + + public int zoom; + public int west; + public int north; + public int width; + public int height; + + /** Which percentiles to calculate */ + public double[] percentiles = new double[] { 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95 }; + + /** The grid key on S3 to compute access to */ + public String grid; + + public final String type = "travel-time-surface"; + + @Override + public ProfileRequest extractProfileRequest() { + return request; + } + + @Override + public boolean isHighPriority() { + return true; // travel time surfaces used in high priority requests only + } +} diff --git a/src/main/java/com/conveyal/r5/analyst/scenario/AndrewOwenMeanGridStatisticComputer.java b/src/main/java/com/conveyal/r5/analyst/scenario/AndrewOwenMeanGridStatisticComputer.java index 168d0c7bc..c725e87a8 100644 --- a/src/main/java/com/conveyal/r5/analyst/scenario/AndrewOwenMeanGridStatisticComputer.java +++ b/src/main/java/com/conveyal/r5/analyst/scenario/AndrewOwenMeanGridStatisticComputer.java @@ -26,60 +26,11 @@ * Planning Using Interactive Accessibility Methods on Combined Schedule and Headway-Based Networks,” under review for the * the 96th Annual Meeting of the Transportation Research Board, January 2017. */ -public class AndrewOwenMeanGridStatisticComputer { - private static final AmazonS3 s3 = new AmazonS3Client(); - - /** Version of the access grid format we read */ - private static final int ACCESS_GRID_VERSION = 0; - - /** Compute a particular percentile of a grid (percentile between 0 and 100) */ - public static Grid computeMean (String resultsBucket, String key) throws IOException { - S3Object accessGrid = s3.getObject(resultsBucket, key); - - LittleEndianDataInputStream input = new LittleEndianDataInputStream(new GZIPInputStream(accessGrid.getObjectContent())); - - char[] header = new char[8]; - for (int i = 0; i < 8; i++) { - header[i] = (char) input.readByte(); - } - - if (!"ACCESSGR".equals(new String(header))) { - throw new IllegalArgumentException("Input not in access grid format!"); - } - - int version = input.readInt(); - - if (version != ACCESS_GRID_VERSION) { - throw new IllegalArgumentException(String.format("Version mismatch of access grids, expected %s, found %s", ACCESS_GRID_VERSION, version)); - } - - int zoom = input.readInt(); - int west = input.readInt(); - int north = input.readInt(); - int width = input.readInt(); - int height = input.readInt(); - int nIterations = input.readInt(); - - Grid grid = new Grid(zoom, width, height, north, west); - - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) { - - int sum = 0; - int count = 0; - - for (int iteration = 0, val = 0; iteration < nIterations; iteration++) { - sum += (val += input.readInt()); - count++; - } - - // compute percentiles - grid.grid[x][y] = (double) sum / count; - } - } - - input.close(); - - return grid; +public class AndrewOwenMeanGridStatisticComputer extends GridStatisticComputer { + @Override + protected double computeValueForOrigin(int x, int y, int[] valuesThisOrigin) { + int sum = 0; + for (int i = 0; i < valuesThisOrigin.length; i++) sum += valuesThisOrigin[i]; + return (double) sum / (double) valuesThisOrigin.length; } } diff --git a/src/main/java/com/conveyal/r5/analyst/scenario/GridStatisticComputer.java b/src/main/java/com/conveyal/r5/analyst/scenario/GridStatisticComputer.java new file mode 100644 index 000000000..e8aaf45fe --- /dev/null +++ b/src/main/java/com/conveyal/r5/analyst/scenario/GridStatisticComputer.java @@ -0,0 +1,117 @@ +package com.conveyal.r5.analyst.scenario; + +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.model.S3Object; +import com.conveyal.r5.analyst.BootstrapPercentileHypothesisTestGridStatisticComputer; +import com.conveyal.r5.analyst.DualGridStatisticComputer; +import com.conveyal.r5.analyst.ExtractingGridStatisticComputer; +import com.conveyal.r5.analyst.Grid; +import com.google.common.io.LittleEndianDataInputStream; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.GZIPInputStream; + +/** + * This contains the base functions for reading access grids, which are three-dimensional arrays, with the first + * two dimensions consisting of x and y coordinates of origins within the regional analysis, and the third + * dimension reflecting multiple values of the indicator of interest. This could be instantaneous + * accessibility results for each Monte Carlo draw when computing average instantaneous accessibility (i.e. + * Owen-style accessibility), or it could be multiple bootstrap replications of the sampling distribution of accessibility + * given median travel time (see Conway, M. W., Byrd, A. and van Eggermond, M. "A Statistical Approach to Comparing + * Accessibility Results: Including Uncertainty in Public Transport Sketch Planning," paper presented at the 2017 World + * Symposium of Transport and Land Use Research, Brisbane, QLD, Australia, Jul 3-6.) + */ +public abstract class GridStatisticComputer { + private static final AmazonS3 s3 = new AmazonS3Client(); + + /** Version of the access grid format we read */ + private static final int ACCESS_GRID_VERSION = 0; + + public Grid compute(String resultsBucket, String key) throws IOException { + S3Object accessGrid = s3.getObject(resultsBucket, key); + + return compute(accessGrid.getObjectContent()); + } + + public Grid compute (InputStream rawInput) throws IOException { + LittleEndianDataInputStream input = new LittleEndianDataInputStream(new GZIPInputStream(rawInput)); + + char[] header = new char[8]; + for (int i = 0; i < 8; i++) { + header[i] = (char) input.readByte(); + } + + if (!"ACCESSGR".equals(new String(header))) { + throw new IllegalArgumentException("Input not in access grid format!"); + } + + int version = input.readInt(); + + if (version != ACCESS_GRID_VERSION) { + throw new IllegalArgumentException(String.format("Version mismatch of access grids, expected %s, found %s", ACCESS_GRID_VERSION, version)); + } + + int zoom = input.readInt(); + int west = input.readInt(); + int north = input.readInt(); + int width = input.readInt(); + int height = input.readInt(); + + // The number of samples stored at each origin; these could be instantaneous accessibility values for each + // Monte Carlo draw, or they could be bootstrap replications of a sampling distribution of accessibility given + // median travel time. + int nSamples = input.readInt(); + + Grid outputGrid = new Grid(zoom, width, height, north, west); + + int[] valuesThisOrigin = new int[nSamples]; + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + // input values are delta-coded per origin, so use val to keep track of current value + for (int iteration = 0, val = 0; iteration < nSamples; iteration++) { + valuesThisOrigin[iteration] = (val += input.readInt()); + } + + // compute percentiles + outputGrid.grid[x][y] = computeValueForOrigin(x, y, valuesThisOrigin); + } + } + + input.close(); + + return outputGrid; + } + + /** Subclasses should override this value to compute the single value at a particular grid cell given the samples at that origin. */ + protected abstract double computeValueForOrigin (int x, int y, int[] valuesThisOrigin); + + public static void main (String... args) throws IOException { + GridStatisticComputer comp; + InputStream in; + if ("--two-tailed".equals(args[0])) { + DualGridStatisticComputer.main(args); + return; + } else if ("--extract".equals(args[0])) { + int which = Integer.parseInt(args[1]); + in = new FileInputStream(args[2]); + comp = new ExtractingGridStatisticComputer(which); + } else { + throw new RuntimeException("Unknown grid statistic computer " + args[0]); + } + + Grid grid = comp.compute(in); + + if (args[3].endsWith(".grid")) + grid.write(new FileOutputStream(args[3])); + else if (args[3].endsWith(".png")) + grid.writePng(new FileOutputStream(args[3])); + else if (args[3].endsWith(".tif")) + grid.writeGeotiff(new FileOutputStream(args[3])); + } +} diff --git a/src/main/java/com/conveyal/r5/api/ProfileResponse.java b/src/main/java/com/conveyal/r5/api/ProfileResponse.java index 71d73913a..8dff49542 100644 --- a/src/main/java/com/conveyal/r5/api/ProfileResponse.java +++ b/src/main/java/com/conveyal/r5/api/ProfileResponse.java @@ -210,4 +210,9 @@ public void generateStreetTransfers(TransportNetwork transportNetwork, ProfileRe } request.reverseSearch = prevReverseSearch; } + + /** Recompute stats for all options, should be done once all options have been added */ + public void recomputeStats (ProfileRequest request) { + this.options.forEach(o -> o.recomputeStats(request)); + } } diff --git a/src/main/java/com/conveyal/r5/api/util/LegMode.java b/src/main/java/com/conveyal/r5/api/util/LegMode.java index aa70d39ed..75cb533d5 100644 --- a/src/main/java/com/conveyal/r5/api/util/LegMode.java +++ b/src/main/java/com/conveyal/r5/api/util/LegMode.java @@ -1,5 +1,9 @@ package com.conveyal.r5.api.util; +import com.conveyal.r5.profile.StreetMode; + +import java.util.Set; + /** * Modes of transport on ingress egress legs */ @@ -9,5 +13,12 @@ public enum LegMode { //Renting a bicycle BICYCLE_RENT, //Park & Ride - CAR_PARK + CAR_PARK; + + /** Return the heaviest/fastest StreetMode for use in stop finding */ + public static StreetMode getDominantStreetMode(Set modes) { + if (modes.contains(LegMode.CAR)) return StreetMode.CAR; + else if (modes.contains(LegMode.BICYCLE)) return StreetMode.BICYCLE; + else return StreetMode.WALK; + } } diff --git a/src/main/java/com/conveyal/r5/api/util/ProfileOption.java b/src/main/java/com/conveyal/r5/api/util/ProfileOption.java index 954357647..076c52767 100644 --- a/src/main/java/com/conveyal/r5/api/util/ProfileOption.java +++ b/src/main/java/com/conveyal/r5/api/util/ProfileOption.java @@ -1,6 +1,9 @@ package com.conveyal.r5.api.util; +import com.conveyal.r5.profile.Path; import com.conveyal.r5.profile.PathWithTimes; +import com.conveyal.r5.profile.ProfileRequest; +import com.conveyal.r5.profile.StatsCalculator; import com.conveyal.r5.transit.TransitLayer; import com.google.common.base.Joiner; import com.google.common.collect.Lists; @@ -8,6 +11,8 @@ import org.slf4j.LoggerFactory; import java.time.*; +import java.time.temporal.TemporalAccessor; +import java.time.temporal.TemporalField; import java.util.*; import java.util.stream.Collectors; @@ -34,6 +39,18 @@ public class ProfileOption { private transient Map accessIndexes = new HashMap<>(); private transient Map egressIndexes = new HashMap<>(); + /** + * contains full itineraries needed to compute statistics. We don't present these to the client though. + * + * There is no way to compute the statistics by combining the statistics of constituent paths. Suppose there + * are two patterns each running every 15 minutes; we don't know if they are coming in and out of phase. + * Ergo, we save all itineraries so we can compute fresh stats when we're done. + * + * Since all transit paths here will have the same start and end stops, we can save all itineraries individually, + * we don't need to save their start and end stops as well. + */ + private transient List fullItineraries = new ArrayList<>(); + @Override public String toString() { return "ProfileOption{" + " transit=\n " + transit + @@ -154,6 +171,7 @@ public void addTransit(TransitLayer transitLayer, PathWithTimes currentTransitPa } } + fullItineraries.addAll(currentTransitPath.itineraries); } /** @@ -281,6 +299,29 @@ public void addMiddle(StreetSegment streetSegment, Transfer transfer) { } } + /** recompute wait and ride stats for all transit itineraries, should be done once all transit paths are added */ + public void recomputeStats (ProfileRequest req) { + if (!fullItineraries.isEmpty()) { + StatsCalculator.StatsCollection collection = + StatsCalculator.computeStatistics( + req, + // TODO is this intended to handle multiple access modes to the same stop? + // do these ever have more or less than one value? + access.get(itinerary.get(0).connection.access).duration, + egress.get(itinerary.get(0).connection.egress).duration, + transit.size(), + fullItineraries); + + this.stats = collection.stats; + + for (int leg = 0; leg < transit.size(); leg++) { + TransitSegment segment = transit.get(leg); + segment.rideStats = collection.rideStats[leg]; + segment.waitStats = collection.waitStats[leg]; + } + } + } + public List getFares() { return fares == null ? null : fares.stream().collect(Collectors.toList()); } diff --git a/src/main/java/com/conveyal/r5/common/JsonUtilities.java b/src/main/java/com/conveyal/r5/common/JsonUtilities.java index b91b51c28..db5788792 100644 --- a/src/main/java/com/conveyal/r5/common/JsonUtilities.java +++ b/src/main/java/com/conveyal/r5/common/JsonUtilities.java @@ -21,13 +21,19 @@ */ public abstract class JsonUtilities { public static final ObjectMapper objectMapper = createBaseObjectMapper(); + public static final ObjectMapper lenientObjectMapper = createBaseObjectMapper(); static { // If we receive a JSON object containing a field that we don't recognize, fail. This should catch misspellings. + // This is used on the broker which should always use the latest R5 so that fields aren't silently dropped because + // the broker does not support them. objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, true); - } - public static final ObjectMapper lenientObjectMapper = createBaseObjectMapper(); + // On the worker, we want to use an objectmapper that will ignore unknown properties, so that it doesn't crash + // when an older worker is connected to a newer broker. + // TODO figure out how to warn the user when a user uses features not supported by their worker + lenientObjectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + } private static ObjectMapper createBaseObjectMapper () { ObjectMapper objectMapper = new ObjectMapper(); diff --git a/src/main/java/com/conveyal/r5/labeling/LevelOfTrafficStressLabeler.java b/src/main/java/com/conveyal/r5/labeling/LevelOfTrafficStressLabeler.java index b19d23e66..8101caf48 100644 --- a/src/main/java/com/conveyal/r5/labeling/LevelOfTrafficStressLabeler.java +++ b/src/main/java/com/conveyal/r5/labeling/LevelOfTrafficStressLabeler.java @@ -12,6 +12,8 @@ import org.slf4j.LoggerFactory; import java.util.EnumSet; +import java.util.HashSet; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -28,6 +30,10 @@ public class LevelOfTrafficStressLabeler { /** match OSM speeds, from http://wiki.openstreetmap.org/wiki/Key:maxspeed */ private static final Pattern speedPattern = Pattern.compile("^([0-9][\\.0-9]*?) ?(km/h|kmh|kph|mph|knots)?$"); + Set badMaxspeedValues = new HashSet<>(); + + Set badLaneValues = new HashSet<>(); + /** Set the LTS for this way in the provided flags (not taking into account any intersection LTS at the moment) */ public void label (Way way, EnumSet forwardFlags, EnumSet backFlags) { // the general idea behind this function is that we progress from low-stress to higher-stress, bailing out as we go. @@ -76,18 +82,23 @@ public void label (Way way, EnumSet forwardFlags, EnumSet 0) LOG.warn("Unrecognized values for maxspeed tag: {}", badMaxspeedValues); + if (badLaneValues.size() > 0) LOG.warn("Unrecognized values for lane tag: {}", badLaneValues); + } } diff --git a/src/main/java/com/conveyal/r5/labeling/TraversalPermissionLabeler.java b/src/main/java/com/conveyal/r5/labeling/TraversalPermissionLabeler.java index bc2d0bd27..dbf93b757 100644 --- a/src/main/java/com/conveyal/r5/labeling/TraversalPermissionLabeler.java +++ b/src/main/java/com/conveyal/r5/labeling/TraversalPermissionLabeler.java @@ -283,7 +283,7 @@ private void applyOnewayHierarchically(Node node, OneWay oneWay, EnumMap getTreeForWay (Way way) { String highway = way.getTag("highway"); EnumMap tree = getDefaultTree(); @@ -431,17 +431,24 @@ private EnumMap getDirectionalTree (Way way) { applyOnewayHierarchically(Node.BICYCLE, sidepath, tree); } - // there are in fact one way pedestrian paths, believe it or not, but usually pedestrians don't inherit any oneway - // restrictions from more general modes + // Usually pedestrians don't inherit any oneway restrictions from other modes. + // However there are there are some one way pedestrian paths, e.g. entrances with turnstiles. + // Most such tags are oneway:foot=no though. applyOnewayHierarchically(Node.FOOT, OneWay.NO, tree); - if (way.hasTag("oneway:foot")) applyOnewayHierarchically(Node.FOOT, OneWay.fromTag(way.getTag("oneway")), tree); + if (way.hasTag("oneway:foot")) { + applyOnewayHierarchically(Node.FOOT, OneWay.fromTag(way.getTag("oneway:foot")), tree); + } return tree; } - /** We are using such a small subset of the tree that we can just code it up manually here */ + /** + * We apply access permissions recursively to sub-trees of modes. + * https://wiki.openstreetmap.org/wiki/Computing_access_restrictions#Transport_mode_hierarchy + * We are using such a small subset of the tree on the wiki that we code up the parent/child relationships manually. + */ protected enum Node { ACCESS, // root FOOT, VEHICLE, // level 1 diff --git a/src/main/java/com/conveyal/r5/point_to_point/builder/PointToPointQuery.java b/src/main/java/com/conveyal/r5/point_to_point/builder/PointToPointQuery.java index d7771501c..dc6595b89 100644 --- a/src/main/java/com/conveyal/r5/point_to_point/builder/PointToPointQuery.java +++ b/src/main/java/com/conveyal/r5/point_to_point/builder/PointToPointQuery.java @@ -10,6 +10,7 @@ import com.conveyal.r5.streets.StreetRouter; import com.conveyal.r5.streets.VertexStore; import com.conveyal.r5.transit.RouteInfo; +import com.conveyal.r5.transit.TransitLayer; import com.conveyal.r5.transit.TransportNetwork; import com.conveyal.r5.transit.TripPattern; import gnu.trove.iterator.TIntIntIterator; @@ -185,6 +186,8 @@ public ProfileResponse getPlan(ProfileRequest request) { profileResponse.generateStreetTransfers(transportNetwork, request); } + profileResponse.recomputeStats(request); + LOG.info("Returned {} options", profileResponse.getOptions().size()); LOG.info("Took {} ms", System.currentTimeMillis() - startRouting); @@ -295,7 +298,7 @@ private HashMap findAccessPaths(ProfileRequest request) { StreetRouter streetRouter = new StreetRouter(transportNetwork.streetLayer); streetRouter.profileRequest = request; if (mode == LegMode.CAR_PARK) { - streetRouter = findParkRidePath(request, streetRouter); + streetRouter = findParkRidePath(request, streetRouter, transportNetwork.transitLayer); if (streetRouter != null) { accessRouter.put(LegMode.CAR_PARK, streetRouter); } else { @@ -346,7 +349,7 @@ private HashMap findAccessPaths(ProfileRequest request) { * @param streetRouter where profileRequest was already set * @return null if path isn't found */ - private StreetRouter findParkRidePath(ProfileRequest request, StreetRouter streetRouter) { + public static StreetRouter findParkRidePath(ProfileRequest request, StreetRouter streetRouter, TransitLayer transitLayer) { streetRouter.streetMode = StreetMode.CAR; streetRouter.timeLimitSeconds = request.maxCarTime * 60; streetRouter.flagSearch = VertexStore.VertexFlag.PARK_AND_RIDE; @@ -357,7 +360,7 @@ private StreetRouter findParkRidePath(ProfileRequest request, StreetRouter stree LOG.info("CAR PARK: Found {} car parks", carParks.size()); ParkRideRouter parkRideRouter = new ParkRideRouter(streetRouter.streetLayer); parkRideRouter.profileRequest = request; - parkRideRouter.addParks(carParks, transportNetwork.transitLayer); + parkRideRouter.addParks(carParks, transitLayer); parkRideRouter.previousRouter = streetRouter; return parkRideRouter; /*StreetRouter walking = new StreetRouter(transportNetwork.streetLayer); diff --git a/src/main/java/com/conveyal/r5/profile/FastRaptorWorker.java b/src/main/java/com/conveyal/r5/profile/FastRaptorWorker.java index ca61a9dfc..b03ffc869 100644 --- a/src/main/java/com/conveyal/r5/profile/FastRaptorWorker.java +++ b/src/main/java/com/conveyal/r5/profile/FastRaptorWorker.java @@ -40,6 +40,8 @@ public class FastRaptorWorker { /** Minimum wait for boarding to account for schedule variation */ private static final int MINIMUM_BOARD_WAIT_SEC = 60; + public final int nMinutes; + public final int monteCarloDrawsPerMinute; // Variables to track time spent, all in nanoseconds (some of the operations we're timing are significantly submillisecond) // (although I suppose using ms would be fine because the number of times we cross a millisecond boundary would be proportional @@ -107,6 +109,12 @@ public FastRaptorWorker (TransitLayer transitLayer, ProfileRequest request, TInt for (int i = 1; i < this.scheduleState.length; i++) this.scheduleState[i].previous = this.scheduleState[i - 1]; offsets = new FrequencyRandomOffsets(transitLayer); + + // compute number of minutes for scheduled search + nMinutes = (request.toTime - request.fromTime) / DEPARTURE_STEP_SEC; + + // how many monte carlo draws per minute of scheduled search to get desired total iterations? + monteCarloDrawsPerMinute = (int) Math.ceil((double) request.monteCarloDraws / nMinutes); } /** For each iteration, return the travel time to each transit stop */ @@ -117,16 +125,10 @@ public int[][] route () { prefilterPatterns(); - // compute number of minutes for scheduled search - int nMinutes = (request.toTime - request.fromTime) / DEPARTURE_STEP_SEC; - - // how many monte carlo draws per minute of scheduled search to get desired total iterations? - int monteCarloDrawsPerMinute = (int) Math.ceil((double) request.monteCarloDraws / nMinutes); - LOG.info("Performing {} scheduled iterations each with {} Monte Carlo draws for a total of {} iterations", nMinutes, monteCarloDrawsPerMinute, nMinutes * monteCarloDrawsPerMinute); - int[][] results = new int[transit.hasFrequencies ? nMinutes * monteCarloDrawsPerMinute : nMinutes][]; + int[][] results = new int[nMinutes * monteCarloDrawsPerMinute][]; int currentIteration = 0; // main loop over departure times @@ -314,9 +316,16 @@ private int[][] runRaptorForMinute (int departureTime, int iterationsPerMinute) return result; } else { - // no frequencies, return result of scheduled search - if (saveAllStates) statesEachIteration.add(scheduleState[request.maxRides].deepCopy()); - return new int[][] { scheduleState[request.maxRides].bestNonTransferTimes }; + // No frequencies, return result of scheduled search, but multiplied by the number of + // MC draws so that the scheduled search accessibility avoids potential bugs where assumptions + // are made about how many results will be returned from a search, e.g., in + // https://github.com/conveyal/r5/issues/306 + int[][] result = new int[iterationsPerMinute][]; + for (int i = 0; i < monteCarloDrawsPerMinute; i++) { + if (saveAllStates) statesEachIteration.add(scheduleState[request.maxRides].deepCopy()); + result[i] = scheduleState[request.maxRides].bestNonTransferTimes; + } + return result; } } @@ -408,7 +417,11 @@ private void doScheduledSearchForRound(RaptorState inputState, RaptorState outpu } } - private void doFrequencySearchForRound(RaptorState inputState, RaptorState outputState, boolean bound) { + /** Do a frequency search. If computeDeterministicUpperBound is true, worst-case frequency boarding time will be used + * so that the output of this function can be used in a range-RAPTOR search. Otherwise Monte Carlo schedules will be + * used to improve upon the output of the range-RAPTOR bounds search. + */ + private void doFrequencySearchForRound(RaptorState inputState, RaptorState outputState, boolean computeDeterministicUpperBound) { BitSet patternsTouched = getPatternsTouchedForStops(inputState, frequencyIndexForOriginalPatternIndex); for (int patternIndex = patternsTouched.nextSetBit(0); patternIndex >= 0; patternIndex = patternsTouched.nextSetBit(patternIndex + 1)) { @@ -445,9 +458,12 @@ private void doFrequencySearchForRound(RaptorState inputState, RaptorState outpu if (inputState.bestStopsTouched.get(stop)) { int earliestBoardTime = inputState.bestTimes[stop] + MINIMUM_BOARD_WAIT_SEC; - int newBoardingDepartureTimeAtStop = bound ? - getRandomFrequencyDepartureTime(schedule, stopPositionInPattern, offset, frequencyEntryIdx, earliestBoardTime) : - getWorstCaseFrequencyDepartureTime(schedule, stopPositionInPattern, frequencyEntryIdx, earliestBoardTime); + // if we're computing the upper bound, we want the worst case. This is the only thing that is + // valid in a range RAPTOR search; using random schedule draws in range RAPTOR would be problematic + // because they need to be independent across minutes. + int newBoardingDepartureTimeAtStop = computeDeterministicUpperBound ? + getWorstCaseFrequencyDepartureTime(schedule, stopPositionInPattern, frequencyEntryIdx, earliestBoardTime) : + getRandomFrequencyDepartureTime(schedule, stopPositionInPattern, offset, frequencyEntryIdx, earliestBoardTime); int remainOnBoardDepartureTimeAtStop = Integer.MAX_VALUE; diff --git a/src/main/java/com/conveyal/r5/profile/PathWithTimes.java b/src/main/java/com/conveyal/r5/profile/PathWithTimes.java index 648a8681d..66e86eb26 100644 --- a/src/main/java/com/conveyal/r5/profile/PathWithTimes.java +++ b/src/main/java/com/conveyal/r5/profile/PathWithTimes.java @@ -21,7 +21,6 @@ public class PathWithTimes extends Path { /** Wait stats for each leg */ public Stats[] waitStats; - /** Ride stats for each leg */ public Stats[] rideStats; @@ -175,69 +174,12 @@ private void sortAndFilterItineraries () { * we just compute how long the path takes at every possible departure minute. There's probably an * elegant theoretical way to do this, but I prefer pragmatism over theory. */ - private void computeStatistics (ProfileRequest req, int accessTime, int egressTime) { - this.stats = new Stats(); - this.rideStats = IntStream.range(0, this.patterns.length).mapToObj(i -> new Stats()).toArray(Stats[]::new); - this.waitStats = IntStream.range(0, this.patterns.length).mapToObj(i -> new Stats()).toArray(Stats[]::new); - - for (int start = req.fromTime; start < req.toTime; start += 60) { - // TODO should board slack be applied at the origin stop? Is this done in RaptorWorker? - int timeAtOriginStop = start + accessTime + RaptorWorker.BOARD_SLACK_SECONDS; - int bestTimeAtDestinationStop = Integer.MAX_VALUE; - - Itinerary bestItinerary = null; - for (Itinerary itin : this.itineraries) { - // itinerary cannot be used at this time - if (itin.boardTimes[0] < timeAtOriginStop) continue; - - if (itin.alightTimes[this.length - 1] < bestTimeAtDestinationStop) { - bestTimeAtDestinationStop = itin.alightTimes[this.length - 1]; - bestItinerary = itin; - } - } - - if (bestItinerary == null) continue; // cannot use this trip at this time - - int bestTimeAtDestination = bestTimeAtDestinationStop + egressTime; - - int travelTime = bestTimeAtDestination - start; - - stats.num++; - stats.avg += travelTime; - stats.min = Math.min(stats.min, travelTime); - stats.max = Math.max(stats.max, travelTime); - - // accumulate stats for each leg - for (int leg = 0; leg < this.patterns.length; leg++) { - Stats ride = rideStats[leg]; - int rideLen = bestItinerary.alightTimes[leg] - bestItinerary.boardTimes[leg]; - ride.num++; - ride.avg += rideLen; - ride.min = Math.min(ride.min, rideLen); - ride.max = Math.max(ride.max, rideLen); - - Stats wait = waitStats[leg]; - - int arriveAtStopTime = leg == 0 ? timeAtOriginStop : bestItinerary.arriveAtBoardStopTimes[leg]; - - int waitTime = bestItinerary.boardTimes[leg] - arriveAtStopTime; - - wait.num++; - wait.avg += waitTime; - wait.min = Math.min(waitTime, wait.min); - wait.max = Math.max(waitTime, wait.max); - } - } - - if (stats.num == 0) throw new IllegalStateException("No valid itineraries found for path computed in RaptorWorker"); - - stats.avg /= stats.num; - - for (Stats[] statSet : new Stats[][] { rideStats, waitStats }) { - for (Stats stats : statSet) { - stats.avg /= stats.num; - } - } + public void computeStatistics (ProfileRequest req, int accessTime, int egressTime) { + StatsCalculator.StatsCollection coll = + StatsCalculator.computeStatistics(req, accessTime, egressTime, length, itineraries); + this.stats = coll.stats; + this.waitStats = coll.waitStats; + this.rideStats = coll.rideStats; } /** itineraries for this path, which can be sorted by departure time */ diff --git a/src/main/java/com/conveyal/r5/profile/PerTargetPropagater.java b/src/main/java/com/conveyal/r5/profile/PerTargetPropagater.java new file mode 100644 index 000000000..e6e946bcf --- /dev/null +++ b/src/main/java/com/conveyal/r5/profile/PerTargetPropagater.java @@ -0,0 +1,159 @@ +package com.conveyal.r5.profile; + +import com.conveyal.r5.streets.LinkedPointSet; +import gnu.trove.map.TIntIntMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Arrays; + +/** + * This class propagates from times at transit stops to times at destinations (targets). It is called with a function + * that will be called with the target index (which is the row-major 1D index of the destination that is being + * propagated to) and the array of whether the target was reached within the travel time cutoff in each Monte Carlo + * draw. This is used in GridComputer to perform bootstrapping of accessibility given median travel time. This function + * is only called for targets that were ever reached. + * + * It may seem needlessly generic to use a lambda function, but it allows us to confine the bootstrapping code to GridComputer. + * Perhaps this should be refactored to be a BootstrappingPropagater that just returns bootstrapped accessibility values. + */ +public class PerTargetPropagater { + private static final Logger LOG = LoggerFactory.getLogger(PerTargetPropagater.class); + + /** Times at transit stops for each iteration */ + public final int[][] travelTimesToStopsEachIteration; + + /** Times at targets using the street network */ + public final int[] nonTransitTravelTimesToTargets; + + /** The travel time cutoff in this regional analysis */ + public final int cutoffSeconds; + + /** The linked targets */ + public final LinkedPointSet targets; + + /** the profilerequest (used for walk speed etc.) */ + public final ProfileRequest request; + + public PerTargetPropagater (int[][] travelTimesToStopsEachIteration, int[] nonTransitTravelTimesToTargets, LinkedPointSet targets, ProfileRequest request, int cutoffSeconds) { + this.travelTimesToStopsEachIteration = travelTimesToStopsEachIteration; + this.nonTransitTravelTimesToTargets = nonTransitTravelTimesToTargets; + this.targets = targets; + this.request = request; + this.cutoffSeconds = cutoffSeconds; + } + + private void propagate (Reducer reducer, TravelTimeReducer travelTimeReducer) { + boolean saveTravelTimes = travelTimeReducer != null; + targets.makePointToStopDistanceTablesIfNeeded(); + + long startTimeMillis = System.currentTimeMillis(); + // avoid float math in loop below + // float math was previously observed to slow down this loop, however it's debatable whether that was due to + // casts, the operations themselves, or the fact that the operations were being completed with doubles rather + // than floats. + int speedMillimetersPerSecond = (int) (request.walkSpeed * 1000); + + boolean[] perIterationResults = new boolean[travelTimesToStopsEachIteration.length]; + int[] perIterationTravelTimes = saveTravelTimes ? new int[travelTimesToStopsEachIteration.length] : null; + + // Invert the travel times to stops array, to provide better memory locality in the tight loop below. Confirmed + // that this provides a significant speedup, which makes sense; Java doesn't have true multidimensional arrays + // but rather represents int[][] as Object[int[]], which means that each of the arrays "on the inside" is stored + // separately in memory and may not be contiguous, also meaning the CPU can't efficiently predict and prefetch + // what we need next. When we invert the array, we then have all travel times to a particular stop for all iterations + // in a single array. The CPU will only page the data that is relevant for the current target, i.e. the travel + // times to nearby stops. Since we are also looping over the targets in a geographic manner (in row-major order), + // it is likely the stops relevant to a particular target will already be in memory from the previous target. + // This should not increase memory consumption very much as we're only storing the travel times to stops. The + // Netherlands has about 70,000 stops, if you do 1,000 iterations to 70,000 stops, the array being duplicated is + // only 70,000 * 1000 * 4 bytes per int ~= 267 megabytes. I don't think it will be worthwhile to change the + // algorithm to output already-transposed data as that will create other memory locality problems (since the + // pathfinding algorithm solves one iteration for all stops simultaneously). + int[][] invertedTravelTimesToStops = new int[travelTimesToStopsEachIteration[0].length][travelTimesToStopsEachIteration.length]; + + for (int iteration = 0; iteration < travelTimesToStopsEachIteration.length; iteration++) { + for (int stop = 0; stop < travelTimesToStopsEachIteration[0].length; stop++) { + invertedTravelTimesToStops[stop][iteration] = travelTimesToStopsEachIteration[iteration][stop]; + } + } + + for (int targetIdx = 0; targetIdx < targets.size(); targetIdx++) { + // clear previous results, fill with whether target is reached within the cutoff without transit (which does + // not vary with monte carlo draw) + boolean targetReachedWithoutTransit = nonTransitTravelTimesToTargets[targetIdx] < cutoffSeconds; + Arrays.fill(perIterationResults, targetReachedWithoutTransit); + if (saveTravelTimes) { + Arrays.fill(perIterationTravelTimes, nonTransitTravelTimesToTargets[targetIdx]); + } + + if (targetReachedWithoutTransit && !saveTravelTimes) { + // if the target is reached without transit, there's no need to do any propagation as the array cannot + // change + reducer.accept(targetIdx, perIterationResults); + continue; + } + + TIntIntMap pointToStopDistanceTable = targets.pointToStopDistanceTables.get(targetIdx); + + // all variables used in lambdas must be "effectively final"; arrays are effectively final even if their + // values change + boolean[] targetEverReached = new boolean[] { nonTransitTravelTimesToTargets[targetIdx] <= cutoffSeconds }; + + // don't try to propagate transit if there are no nearby transit stops, + // but still call the reducer below with the non-transit times, because you can walk even where there is no + // transit + if (pointToStopDistanceTable != null) { + pointToStopDistanceTable.forEachEntry((stop, distanceMillimeters) -> { + for (int iteration = 0; iteration < perIterationResults.length; iteration++) { + int timeAtStop = invertedTravelTimesToStops[stop][iteration]; + + if (timeAtStop > cutoffSeconds || saveTravelTimes && timeAtStop > perIterationTravelTimes[iteration]) continue; // avoid overflow + + int timeAtTargetThisStop = timeAtStop + distanceMillimeters / speedMillimetersPerSecond; + + if (timeAtTargetThisStop < cutoffSeconds) { + if (saveTravelTimes) { + if (timeAtTargetThisStop < perIterationTravelTimes[iteration]) { + perIterationTravelTimes[iteration] = timeAtTargetThisStop; + targetEverReached[0] = true; + } + } else { + perIterationResults[iteration] = true; + targetEverReached[0] = true; + } + } + } + return true; + }); + } + + if (saveTravelTimes) travelTimeReducer.accept(targetIdx, perIterationTravelTimes); + else if (targetEverReached[0]) reducer.accept(targetIdx, perIterationResults); + } + + long totalTimeMillis = System.currentTimeMillis() - startTimeMillis; + LOG.info("Propagating {} iterations from {} stops to {} targets took {}s", + travelTimesToStopsEachIteration.length, + travelTimesToStopsEachIteration[0].length, + targets.size(), + totalTimeMillis / 1000d + ); + } + + public void propagate (Reducer reducer) { + propagate(reducer, null); + } + + public void propagateTimes (TravelTimeReducer reducer) { + propagate(null, reducer); + } + + public static interface Reducer { + public void accept (int targetIndex, boolean[] targetReachedWithinCutoff); + } + + public interface TravelTimeReducer { + public void accept (int targetIndex, int[] travelTimesForTargets); + } +} diff --git a/src/main/java/com/conveyal/r5/profile/ProfileRequest.java b/src/main/java/com/conveyal/r5/profile/ProfileRequest.java index dad02f187..79cb62005 100644 --- a/src/main/java/com/conveyal/r5/profile/ProfileRequest.java +++ b/src/main/java/com/conveyal/r5/profile/ProfileRequest.java @@ -9,6 +9,7 @@ import com.conveyal.r5.api.util.TransitModes; import com.conveyal.r5.model.json_serialization.*; import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonSerialize; import graphql.schema.DataFetchingEnvironment; @@ -236,6 +237,7 @@ timings in the schedule. Such an option does not overlap (temporally) its altern //If true current search is reverse search AKA we are looking for a path from destination to origin in reverse //It differs from searchType because it is used as egress search + @JsonInclude(JsonInclude.Include.NON_DEFAULT) public boolean reverseSearch = false; /** maximum fare. If nonnegative, fares will be used in routing. */ @@ -325,6 +327,20 @@ public float getSpeed(StreetMode streetMode) { throw new IllegalArgumentException("getSpeed(): Invalid mode " + streetMode); } + @JsonIgnore + public int getMaxAccessTime (StreetMode mode) { + switch (mode) { + case CAR: + return maxCarTime; + case BICYCLE: + return maxBikeTime; + case WALK: + return maxWalkTime; + default: + throw new IllegalArgumentException("Invalid mode"); + } + } + /** * * @return true if there is any transitMode in transitModes (Safe to call if transitModes is null) diff --git a/src/main/java/com/conveyal/r5/profile/Propagater.java b/src/main/java/com/conveyal/r5/profile/Propagater.java deleted file mode 100644 index 29948dd2e..000000000 --- a/src/main/java/com/conveyal/r5/profile/Propagater.java +++ /dev/null @@ -1,101 +0,0 @@ -package com.conveyal.r5.profile; - -import com.conveyal.r5.streets.LinkedPointSet; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Arrays; -import java.util.function.ToIntFunction; -import java.util.stream.IntStream; - -/** - * A propagater does propagation, like how an alligator does allegation. - * - * This takes a matrix of times at each transit stop for each RAPTOR iteration, and propagates those times - * to the street network. A matrix of travel times to points at each iteration can be too large in very large - * graphs (e.g. NYC or NL), thus this works in a streaming fashion, calling a function passed into the propagate - * call with the results of each iteration, which can then be summarized or written to disk as the caller desires. - */ -public class Propagater { - private static final Logger LOG = LoggerFactory.getLogger(Propagater.class); - - /** Times at transit stops for each iteration */ - public final int[][] travelTimesToStopsEachIteration; - - /** Times at targets using the street network */ - public final int[] nonTransferTravelTimesToTargets; - - /** The linked targets */ - public final LinkedPointSet targets; - - /** the profilerequest (used for walk speed etc.) */ - public final ProfileRequest request; - - public Propagater (int[][] travelTimesToStopsEachIteration, int[] nonTransferTravelTimesToTargets, LinkedPointSet targets, ProfileRequest request) { - this.travelTimesToStopsEachIteration = travelTimesToStopsEachIteration; - this.nonTransferTravelTimesToTargets = nonTransferTravelTimesToTargets; - this.targets = targets; - this.request = request; - } - - /** - * perform the propagation from transit stops to targets, and call the reducer with the travel time to each target - * for each iteration. - * - * For large networks, making a 2-D array of all the travel times to every target is too large to fit in memory, so - * we call the reducer function with the times for each iteration, and it can summarize on the fly, write to disk, - * &c. as desired by the caller. - */ - public int[] propagate (ToIntFunction reducer) { - long startTime = System.currentTimeMillis(); - // propagate the transit times - // NB if this is slow, parallelize; however, it's observed to be quick even in large networks, and for regional - // analyses, the entire request is already parallelized at the origin level (i.e. we're computing the accessibility - // for multiple origins simultaneously). - // This can use something akin to range-RAPTOR where we only update the travel time if it changes from one - // iteration to the next, but that requires working on clock times, so we'd have to pass more information down - // into this function; I'm not sure it's worth it. - int[] result = new int[travelTimesToStopsEachIteration.length]; - for (int iteration = 0; iteration < travelTimesToStopsEachIteration.length; iteration++) { - int[] travelTimesToTargetsThisIteration = Arrays.copyOf(nonTransferTravelTimesToTargets, nonTransferTravelTimesToTargets.length); - int[] travelTimesToStopsThisIteration = travelTimesToStopsEachIteration[iteration]; - - // avoid float math in loop below - // float math was previously observed to slow down this loop, however it's debatable whether that was due to - // casts, the operations themselves, or the fact that the operations were being completed with doubles rather - // than floats. - int speedMillimetersPerSecond = (int) (request.walkSpeed * 1000); - - // Loop over each stop and propagate - for (int stop = 0; stop < travelTimesToStopsThisIteration.length; stop++) { - int travelTimeToStop = travelTimesToStopsThisIteration[stop]; - - if (travelTimeToStop == RaptorWorker.UNREACHED) continue; - - int[] stopToTargetTree = targets.stopToPointDistanceTables.get(stop); - if (stopToTargetTree == null) continue; // stop is unlinked - for (int idx = 0; idx < stopToTargetTree.length; idx += 2) { - int target = stopToTargetTree[idx]; - int distanceMillimeters = stopToTargetTree[idx + 1]; - int timeSeconds = distanceMillimeters / speedMillimetersPerSecond; - int timeToTargetIncludingTransit = travelTimeToStop + timeSeconds; - - if (timeToTargetIncludingTransit < travelTimesToTargetsThisIteration[target]) { - travelTimesToTargetsThisIteration[target] = timeToTargetIncludingTransit; - } - } - } - - result[iteration] = reducer.applyAsInt(travelTimesToTargetsThisIteration); - } - - LOG.info("Propagation from {} stops to {} targets for {} iterations took {}s", - travelTimesToStopsEachIteration[0].length, - targets.size(), - travelTimesToStopsEachIteration.length, - (System.currentTimeMillis() - startTime) / 1000d - ); - - return result; - } -} diff --git a/src/main/java/com/conveyal/r5/profile/StatsCalculator.java b/src/main/java/com/conveyal/r5/profile/StatsCalculator.java new file mode 100644 index 000000000..52d27a69d --- /dev/null +++ b/src/main/java/com/conveyal/r5/profile/StatsCalculator.java @@ -0,0 +1,97 @@ +package com.conveyal.r5.profile; + +import com.conveyal.r5.api.util.Itinerary; +import com.conveyal.r5.api.util.Stats; + +import java.util.Collection; +import java.util.stream.IntStream; + +/** + * Created by matthewc on 4/4/17. + */ +public class StatsCalculator { + /** + * Compute statistics for this particular path over the time window. This is super simple, + * we just compute how long the path takes at every possible departure minute. There's probably an + * elegant theoretical way to do this, but I prefer pragmatism over theory. + */ + public static StatsCollection computeStatistics (ProfileRequest req, int accessTime, int egressTime, + int nSegments, Collection itineraries) { + Stats stats = new Stats(); + Stats[] rideStats = IntStream.range(0, nSegments).mapToObj(i -> new Stats()).toArray(Stats[]::new); + Stats[] waitStats = IntStream.range(0, nSegments).mapToObj(i -> new Stats()).toArray(Stats[]::new); + + for (int start = req.fromTime; start < req.toTime; start += 60) { + // TODO should board slack be applied at the origin stop? Is this done in RaptorWorker? + int timeAtOriginStop = start + accessTime + RaptorWorker.BOARD_SLACK_SECONDS; + int bestTimeAtDestinationStop = Integer.MAX_VALUE; + + PathWithTimes.Itinerary bestItinerary = null; + for (PathWithTimes.Itinerary itin : itineraries) { + // itinerary cannot be used at this time + if (itin.boardTimes[0] < timeAtOriginStop) continue; + + if (itin.alightTimes[nSegments - 1] < bestTimeAtDestinationStop) { + bestTimeAtDestinationStop = itin.alightTimes[nSegments - 1]; + bestItinerary = itin; + } + } + + if (bestItinerary == null) continue; // cannot use this trip at this time + + int bestTimeAtDestination = bestTimeAtDestinationStop + egressTime; + + int travelTime = bestTimeAtDestination - start; + + stats.num++; + stats.avg += travelTime; + stats.min = Math.min(stats.min, travelTime); + stats.max = Math.max(stats.max, travelTime); + + // accumulate stats for each leg + for (int leg = 0; leg < nSegments; leg++) { + Stats ride = rideStats[leg]; + int rideLen = bestItinerary.alightTimes[leg] - bestItinerary.boardTimes[leg]; + ride.num++; + ride.avg += rideLen; + ride.min = Math.min(ride.min, rideLen); + ride.max = Math.max(ride.max, rideLen); + + Stats wait = waitStats[leg]; + + int arriveAtStopTime = leg == 0 ? timeAtOriginStop : bestItinerary.arriveAtBoardStopTimes[leg]; + + int waitTime = bestItinerary.boardTimes[leg] - arriveAtStopTime; + + wait.num++; + wait.avg += waitTime; + wait.min = Math.min(waitTime, wait.min); + wait.max = Math.max(waitTime, wait.max); + } + } + + if (stats.num == 0) throw new IllegalStateException("No valid itineraries found for path computed in RaptorWorker"); + + stats.avg /= stats.num; + + for (Stats[] statSet : new Stats[][] { rideStats, waitStats }) { + for (Stats s : statSet) { + s.avg /= s.num; + } + } + + return new StatsCollection(stats, waitStats, rideStats); + } + + public static class StatsCollection { + public Stats stats; + public Stats[] waitStats; + public Stats[] rideStats; + + public StatsCollection (Stats stats, Stats[] waitStats, Stats[] rideStats) { + this.stats = stats; + this.rideStats = rideStats; + this.waitStats = waitStats; + } + } +} diff --git a/src/main/java/com/conveyal/r5/streets/EdgeStore.java b/src/main/java/com/conveyal/r5/streets/EdgeStore.java index 8beaf19a2..0f45d7108 100644 --- a/src/main/java/com/conveyal/r5/streets/EdgeStore.java +++ b/src/main/java/com/conveyal/r5/streets/EdgeStore.java @@ -1071,9 +1071,12 @@ private EdgeStore() { * lists. * We don't use clone() and make sure every field copy is explicit below, avoiding any unintentional shallow-copying * of collections or referenced data structures. + * + * @param copiedStreetLayer the copy of the street layer that this edgeStore will reference. */ - public EdgeStore extendOnlyCopy() { + public EdgeStore extendOnlyCopy(StreetLayer copiedStreetLayer) { EdgeStore copy = new EdgeStore(); + copy.layer = copiedStreetLayer; copy.firstModifiableEdge = this.nEdges(); // The Edge store references a vertex store, and the StreetLayer should also hold the same reference. // So the StreetLayer that makes this copy needs to grab a pointer to the new extend only VertexStore diff --git a/src/main/java/com/conveyal/r5/streets/LinkedPointSet.java b/src/main/java/com/conveyal/r5/streets/LinkedPointSet.java index 289a4f7af..df2aec114 100644 --- a/src/main/java/com/conveyal/r5/streets/LinkedPointSet.java +++ b/src/main/java/com/conveyal/r5/streets/LinkedPointSet.java @@ -71,6 +71,12 @@ public class LinkedPointSet implements Serializable { /** For each transit stop, the distances to nearby PointSet points as packed (point_index, distance) pairs. */ public List stopToPointDistanceTables; + /** + * For each pointset point, the stops reachable without using transit, as a map from StopID to distance in millimeters. + * Inverted version of stopToPointDistanceTables. + */ + public transient List pointToStopDistanceTables; + /** It is preferred to specify a mode when linking TODO remove this. */ @Deprecated public LinkedPointSet(PointSet pointSet, StreetLayer streetLayer) { @@ -379,4 +385,29 @@ public void makeStopToPointDistanceTables(Geometry treeRebuildZone) { counter.done(); } + public synchronized void makePointToStopDistanceTablesIfNeeded () { + if (pointToStopDistanceTables != null) return; + + synchronized (this) { + // check again in case they were built while waiting on this synchronized block + if (pointToStopDistanceTables != null) return; + if (stopToPointDistanceTables == null) makeStopToPointDistanceTables(null); + TIntIntMap[] result = new TIntIntMap[size()]; + + for (int stop = 0; stop < stopToPointDistanceTables.size(); stop++) { + int[] stopToPointDistanceTable = stopToPointDistanceTables.get(stop); + if (stopToPointDistanceTable == null) continue; + + for (int idx = 0; idx < stopToPointDistanceTable.length; idx += 2) { + int point = stopToPointDistanceTable[idx]; + int distance = stopToPointDistanceTable[idx + 1]; + if (result[point] == null) result[point] = new TIntIntHashMap(); + result[point].put(stop, distance); + } + } + pointToStopDistanceTables = Arrays.asList(result); + } + } + + } diff --git a/src/main/java/com/conveyal/r5/streets/ParkRideRouter.java b/src/main/java/com/conveyal/r5/streets/ParkRideRouter.java index cbbeee3b1..d6185f662 100644 --- a/src/main/java/com/conveyal/r5/streets/ParkRideRouter.java +++ b/src/main/java/com/conveyal/r5/streets/ParkRideRouter.java @@ -15,8 +15,12 @@ */ public class ParkRideRouter extends StreetRouter { + //Key is stop index, value is duration to reach it for getReachedStops TIntIntMap transitStopIndexDurationMap; + //Key is streetVertex of stops, value is duration to reach it for getTravelTimeToVertex + TIntIntMap streetVertexDurationMap; + //Key is streetVertex value is state from park ride to this stop for making path TIntObjectMap transitStopStreetIndexParkRideState; public ParkRideRouter(StreetLayer streetLayer) { @@ -35,6 +39,7 @@ public ParkRideRouter(StreetLayer streetLayer) { */ public void addParks(TIntObjectMap carParks, TransitLayer transitLayer) { transitStopIndexDurationMap = new TIntIntHashMap(carParks.size() * 3); + streetVertexDurationMap = new TIntIntHashMap(carParks.size() * 3); TIntObjectMap parkRideLocationsMap = streetLayer.parkRideLocationsMap; transitStopStreetIndexParkRideState = new TIntObjectHashMap<>(carParks.size()); final double walkSpeedMillimetersPerSecond = profileRequest.walkSpeed * 1000; @@ -56,12 +61,14 @@ public void addParks(TIntObjectMap carParks, TransitLayer transitLayer) { if (!transitStopIndexDurationMap.containsKey(toStop)) { transitStopIndexDurationMap.put(toStop, totalTime); transitStopStreetIndexParkRideState.put(stopStreetVertexIdx, pr_state); + streetVertexDurationMap.put(stopStreetVertexIdx, totalTime); // ELSE we only add time and update P+R if new time is shorter then previously saved one } else { int savedTime = transitStopIndexDurationMap.get(toStop); if (totalTime < savedTime) { transitStopIndexDurationMap.put(toStop, totalTime); transitStopStreetIndexParkRideState.put(stopStreetVertexIdx, pr_state); + streetVertexDurationMap.put(stopStreetVertexIdx, totalTime); } } @@ -102,4 +109,18 @@ public State getStateAtVertex(int vertexIndex) { return null; } } + + /** + * Returns travel time to this vertex. Only returns time to stops, since only those times are saved + * @param vertexIndex + * @return + */ + @Override + public int getTravelTimeToVertex(int vertexIndex) { + if (this.streetVertexDurationMap.containsKey(vertexIndex)) { + return streetVertexDurationMap.get(vertexIndex); + } else { + return Integer.MAX_VALUE; + } + } } diff --git a/src/main/java/com/conveyal/r5/streets/StreetLayer.java b/src/main/java/com/conveyal/r5/streets/StreetLayer.java index c7c2f61e5..81e5f1c7e 100644 --- a/src/main/java/com/conveyal/r5/streets/StreetLayer.java +++ b/src/main/java/com/conveyal/r5/streets/StreetLayer.java @@ -24,7 +24,6 @@ import com.vividsolutions.jts.operation.union.UnaryUnionOp; import gnu.trove.list.TIntList; import gnu.trove.list.array.TIntArrayList; -import gnu.trove.map.TIntIntMap; import gnu.trove.map.TIntObjectMap; import gnu.trove.map.TLongIntMap; import gnu.trove.map.hash.TIntObjectHashMap; @@ -268,6 +267,7 @@ public void loadFromOsm(OSM osm, boolean removeIslands, boolean saveVertexIndex) } } } + stressLabeler.logErrors(); // summarize LTS statistics Edge cursor = edgeStore.getCursor(); @@ -592,6 +592,24 @@ else if ("to".equals(member.role)) { else if ("via".equals(member.role)) { via.add(member); } + + // Osmosis may produce situations where referential integrity is violated, probably at the edge of the + // bounding box where half a turn restriction is outside the box. + if (member.type == OSMEntity.Type.WAY) { + if (!osm.ways.containsKey(member.id)) { + LOG.warn("Turn restriction relation {} references nonexistent way {}, dropping this relation", + id, + member.id); + return; + } + } else if (member.type == OSMEntity.Type.NODE) { + if (!osm.nodes.containsKey(member.id)) { + LOG.warn("Turn restriction relation {} references nonexistent node {}, dropping this relation", + id, + member.id); + return; + } + } } @@ -922,6 +940,7 @@ void addReverseTurnRestriction(TurnRestriction turnRestriction, int index) { /** * Get or create mapping from a global long OSM ID to an internal street vertex ID, creating the vertex as needed. + * @return the internal ID for the street vertex that was found or created, or -1 if there was no such OSM node. */ private int getVertexIndexForOsmNode(long osmNodeId) { int vertexIndex = vertexIndexForOsmNode.get(osmNodeId); @@ -929,13 +948,15 @@ private int getVertexIndexForOsmNode(long osmNodeId) { // Register a new vertex, incrementing the index starting from zero. // Store node coordinates for this new street vertex Node node = osm.nodes.get(osmNodeId); - vertexIndex = vertexStore.addVertex(node.getLat(), node.getLon()); - - VertexStore.Vertex v = vertexStore.getCursor(vertexIndex); - if (node.hasTag("highway", "traffic_signals")) - v.setFlag(VertexStore.VertexFlag.TRAFFIC_SIGNAL); - - vertexIndexForOsmNode.put(osmNodeId, vertexIndex); + if (node == null) { + LOG.warn("OSM data references an undefined node. This is often the result of extracting a bounding box in Osmosis without the completeWays option."); + } else { + vertexIndex = vertexStore.addVertex(node.getLat(), node.getLon()); + VertexStore.Vertex v = vertexStore.getCursor(vertexIndex); + if (node.hasTag("highway", "traffic_signals")) + v.setFlag(VertexStore.VertexFlag.TRAFFIC_SIGNAL); + vertexIndexForOsmNode.put(osmNodeId, vertexIndex); + } } return vertexIndex; } @@ -981,6 +1002,10 @@ private void makeEdge(Way way, int beginIdx, int endIdx, Long osmID) { for (int n = beginIdx; n <= endIdx; n++) { long nodeId = way.nodes[n]; Node node = osm.nodes.get(nodeId); + if (node == null) { + LOG.warn("Not creating street segment that references an undefined node."); + return; + } envelope.expandToInclude(node.getLon(), node.getLat()); nodes.add(node); } @@ -1513,7 +1538,7 @@ public StreetLayer scenarioCopy(TransportNetwork newScenarioNetwork, boolean wil // Indicate that the content of the new StreetLayer will be changed by giving it the scenario's scenarioId. // If the copy will not be modified, scenarioId remains unchanged to allow cached pointset linkage reuse. copy.scenarioId = newScenarioNetwork.scenarioId; - copy.edgeStore = edgeStore.extendOnlyCopy(); + copy.edgeStore = edgeStore.extendOnlyCopy(copy); // The extend-only copy of the EdgeStore also contains a new extend-only copy of the VertexStore. copy.vertexStore = copy.edgeStore.vertexStore; copy.temporaryEdgeIndex = new IntHashGrid(); diff --git a/src/main/java/com/conveyal/r5/transit/DCFareCalculator.java b/src/main/java/com/conveyal/r5/transit/DCFareCalculator.java index 2110c8489..10fc17f41 100644 --- a/src/main/java/com/conveyal/r5/transit/DCFareCalculator.java +++ b/src/main/java/com/conveyal/r5/transit/DCFareCalculator.java @@ -15,9 +15,9 @@ */ public class DCFareCalculator { - private static final FareTable METRORAIL = new FareTable("fares/dc/metrorail.csv"); - private static final FareTable MARC = new FareTable("fares/dc/marc.csv"); - private static final FareTable VRE = new FareTable("fares/dc/vre.csv"); + private static final FareTable METRORAIL = new FareTable("fares/dc/metrorail.csv", true); + private static final FareTable MARC = new FareTable("fares/dc/marc.csv", true); + private static final FareTable VRE = new FareTable("fares/dc/vre.csv", true); private static final String[] metroExpress = { "J7", "J9", "P17", "P19", "W13", "W19", "11Y", "17A", "17B", "17G", "17H", "17K", "17L", "17M", "18E", "18G", "18H", "18P", "29E", "29G", "29H", "29X" }; @@ -40,7 +40,6 @@ public class DCFareCalculator { private static RideType classify (RouteInfo route) { // NOTE the agencyId string of the route's agencyAndId is not the same as the one given by route.getAgency. // The former is the same for all routes in the feed. The latter is the true agency of the feed. - String agency = route.agency_id; String agency_url = route.agency_url == null ? null : route.agency_url.toString(); // this is used in single-agency feeds so it should work String short_name = route.route_short_name; diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetwork.java b/src/main/java/com/conveyal/r5/transit/TransportNetwork.java index 9d2ff0d25..df5ee12e3 100644 --- a/src/main/java/com/conveyal/r5/transit/TransportNetwork.java +++ b/src/main/java/com/conveyal/r5/transit/TransportNetwork.java @@ -40,6 +40,12 @@ public class TransportNetwork implements Serializable { public TransitLayer transitLayer; + /** + * This stores any number of lightweight scenario networks built upon the current base network. + * FIXME that sounds like a memory leak, should be a WeighingCache or at least size-limited. + */ + public transient Map scenarios = new HashMap<>(); + /** * A grid point set that covers the full extent of this transport network. The PointSet itself then caches linkages * to street networks (the baseline street network, or ones with various scenarios applied). If they have been diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java index 7d0b62865..1d34c2557 100644 --- a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java +++ b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java @@ -6,7 +6,6 @@ import com.amazonaws.services.s3.model.S3Object; import com.conveyal.gtfs.GTFSCache; import com.conveyal.osmlib.OSMCache; -import com.conveyal.r5.analyst.WebMercatorGridPointSet; import com.conveyal.r5.analyst.cluster.BundleManifest; import com.conveyal.r5.analyst.scenario.Scenario; import com.conveyal.r5.common.JsonUtilities; @@ -14,17 +13,21 @@ import com.conveyal.r5.point_to_point.builder.TNBuilderConfig; import com.conveyal.r5.profile.ProfileRequest; import com.conveyal.r5.streets.StreetLayer; -import com.google.common.collect.Sets; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; +import com.google.common.cache.RemovalListener; import com.google.common.io.ByteStreams; import org.apache.commons.io.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; -import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; @@ -32,8 +35,6 @@ * This holds one or more TransportNetworks keyed on unique strings. * Because (de)serialization is now much faster than building networks from scratch, built graphs are cached on the * local filesystem and on S3 for later re-use. - * Actually this currently only holds one single TransportNetwork, but that will eventually change. - * FIXME the synchronization is kind of primitive and will need to be more sophisticated when a worker has multiple loaded networks. */ public class TransportNetworkCache { @@ -44,11 +45,11 @@ public class TransportNetworkCache { private final File cacheDir; private final String sourceBucket; + private final String bucketFolder; - String currentNetworkId = null; - - TransportNetwork currentNetwork = null; + private static final int DEFAULT_CACHE_SIZE = 1; + private final LoadingCache cache; private final GTFSCache gtfsCache; private final OSMCache osmCache; @@ -59,51 +60,51 @@ public TransportNetworkCache(String sourceBucket) { /** Create a transport network cache. If source bucket is null, will work offline. */ public TransportNetworkCache(String sourceBucket, File cacheDir) { + this(sourceBucket, cacheDir, DEFAULT_CACHE_SIZE, null); + } + + public TransportNetworkCache(String sourceBucket, File cacheDir, String bucketFolder) { + this(sourceBucket, cacheDir, DEFAULT_CACHE_SIZE, bucketFolder); + } + + /** Create a transport network cache. If source bucket is null, will work offline. */ + public TransportNetworkCache(String sourceBucket, File cacheDir, int cacheSize, String bucketFolder) { this.cacheDir = cacheDir; this.sourceBucket = sourceBucket; + this.bucketFolder = bucketFolder != null ? bucketFolder.replaceAll("\\/","") : null; + this.cache = createCache(cacheSize); this.gtfsCache = new GTFSCache(sourceBucket, cacheDir); this.osmCache = new OSMCache(sourceBucket, cacheDir); } public TransportNetworkCache(GTFSCache gtfsCache, OSMCache osmCache) { + this(gtfsCache, osmCache, DEFAULT_CACHE_SIZE); + } + + public TransportNetworkCache(GTFSCache gtfsCache, OSMCache osmCache, int cacheSize) { + this(gtfsCache, osmCache, cacheSize, null); + } + + public TransportNetworkCache(GTFSCache gtfsCache, OSMCache osmCache, int cacheSize, String bucketFolder) { this.gtfsCache = gtfsCache; this.osmCache = osmCache; + this.cache = createCache(cacheSize); this.cacheDir = gtfsCache.cacheDir; this.sourceBucket = gtfsCache.bucket; + // we don't necessarily want to put r5 networks in the gtfsCache bucketFolder + // (e.g., "s3://bucket/gtfs"), but we'll go ahead and use the same bucket + this.bucketFolder = bucketFolder; } - /** - * This stores any number of lightweight scenario networks built upon the current base network. - * FIXME that sounds like a memory leak, should be a WeighingCache or at least size-limited. - */ - private Map scenarioNetworkCache = new HashMap<>(); - - /** - * Return the graph for the given unique identifier for graph builder inputs on S3. - * If this is the same as the last graph built, just return the pre-built graph. - * If not, build the graph from the inputs, fetching them from S3 to the local cache as needed. - */ + /** Convenience method that returns transport network from cache. */ public synchronized TransportNetwork getNetwork (String networkId) { - - LOG.info("Finding or building a TransportNetwork for ID {} and R5 version {}", networkId, R5Version.version); - - if (networkId.equals(currentNetworkId)) { - LOG.info("Network ID has not changed. Reusing the last one that was built."); - return currentNetwork; - } - - TransportNetwork network = checkCached(networkId); - if (network == null) { - LOG.info("Cached transport network for id {} and R5 version {} was not found. Building the network from scratch.", - networkId, R5Version.version); - network = buildNetwork(networkId); + try { + return cache.get(networkId); + } catch (Exception e) { + LOG.error("Exception while loading a transport network into the cache: {}", e.toString()); + e.printStackTrace(); + return null; } - - currentNetwork = network; - currentNetworkId = networkId; - scenarioNetworkCache.clear(); // We cache only scenario graphs built upon the currently active base graph. - - return network; } /** @@ -122,7 +123,10 @@ public synchronized TransportNetwork getNetworkForScenario (String networkId, Pr // The following call clears the scenarioNetworkCache if the current base graph changes. TransportNetwork baseNetwork = this.getNetwork(networkId); - TransportNetwork scenarioNetwork = scenarioNetworkCache.get(scenarioId); + if (baseNetwork.scenarios == null) { + baseNetwork.scenarios = new HashMap<>(); + } + TransportNetwork scenarioNetwork = baseNetwork.scenarios.get(scenarioId); // DEBUG force scenario re-application // scenarioNetwork = null; @@ -135,12 +139,11 @@ public synchronized TransportNetwork getNetworkForScenario (String networkId, Pr // resolve scenario LOG.info("Retrieving scenario stored separately on S3 rather than in the ProfileRequest"); - String scenarioKey = String.format("%s_%s.json", networkId, scenarioId); - File scenarioFile = new File(cacheDir, scenarioKey); + File scenarioFile = new File(cacheDir, getScenarioFilename(networkId, scenarioId)); if (!scenarioFile.exists()) { try { - S3Object obj = s3.getObject(sourceBucket, scenarioKey); + S3Object obj = s3.getObject(sourceBucket, getScenarioKey(networkId, scenarioId)); InputStream is = obj.getObjectContent(); OutputStream os = new BufferedOutputStream(new FileOutputStream(scenarioFile)); ByteStreams.copy(is, os); @@ -172,18 +175,26 @@ public synchronized TransportNetwork getNetworkForScenario (String networkId, Pr // scenario.modifications.add(0, new InactiveTripsFilter(baseNetwork, clusterRequest.profileRequest)); scenarioNetwork = scenario.applyToTransportNetwork(baseNetwork); LOG.info("Done applying scenario. Caching the resulting network."); - scenarioNetworkCache.put(scenario.id, scenarioNetwork); + baseNetwork.scenarios.put(scenario.id, scenarioNetwork); } else { LOG.info("Reusing cached TransportNetwork for scenario {}.", scenarioId); } return scenarioNetwork; } + private String getScenarioFilename(String networkId, String scenarioId) { + return String.format("%s_%s.json", networkId, scenarioId); + } + + private String getScenarioKey(String networkId, String scenarioId) { + String filename = getScenarioFilename(networkId, scenarioId); + return bucketFolder != null ? String.join("/", bucketFolder, filename) : filename; + } + /** If this transport network is already built and cached, fetch it quick */ private TransportNetwork checkCached (String networkId) { try { - String filename = networkId + "_" + R5Version.version + ".dat"; - File cacheLocation = new File(cacheDir, networkId + "_" + R5Version.version + ".dat"); + File cacheLocation = new File(cacheDir, getR5NetworkFilename(networkId)); if (cacheLocation.exists()) LOG.info("Found locally-cached TransportNetwork at {}", cacheLocation); else { @@ -193,7 +204,7 @@ private TransportNetwork checkCached (String networkId) { LOG.info("Checking for cached transport network on S3."); S3Object tn; try { - tn = s3.getObject(sourceBucket, filename); + tn = s3.getObject(sourceBucket, getR5NetworkKey(networkId)); } catch (AmazonServiceException ex) { LOG.info("No cached transport network was found in S3. It will be built from scratch."); return null; @@ -222,6 +233,16 @@ private TransportNetwork checkCached (String networkId) { } } + + private String getR5NetworkKey(String networkId) { + String filename = getR5NetworkFilename(networkId); + return bucketFolder != null ? String.join("/", bucketFolder, filename) : filename; + } + + private String getR5NetworkFilename(String networkId) { + return networkId + "_" + R5Version.version + ".dat"; + } + /** If we did not find a cached network, build one */ public TransportNetwork buildNetwork (String networkId) { @@ -247,8 +268,7 @@ public TransportNetwork buildNetwork (String networkId) { network.rebuildLinkedGridPointSet(); // Cache the network. - String filename = networkId + "_" + R5Version.version + ".dat"; - File cacheLocation = new File(cacheDir, networkId + "_" + R5Version.version + ".dat"); + File cacheLocation = new File(cacheDir, getR5NetworkFilename(networkId)); try { // Serialize TransportNetwork to local cache on this worker @@ -256,7 +276,7 @@ public TransportNetwork buildNetwork (String networkId) { // Upload the serialized TransportNetwork to S3 if (sourceBucket != null) { LOG.info("Uploading the serialized TransportNetwork to S3 for use by other workers."); - s3.putObject(sourceBucket, filename, cacheLocation); + s3.putObject(sourceBucket, getR5NetworkKey(networkId), cacheLocation); LOG.info("Done uploading the serialized TransportNetwork to S3."); } else { LOG.info("Network saved to cache directory, not uploading to S3 while working offline."); @@ -279,7 +299,8 @@ private TransportNetwork buildNetworkFromBundleZip (String networkId) { if (sourceBucket != null) { LOG.info("Downloading graph input files from S3."); dataDirectory.mkdirs(); - S3Object graphDataZipObject = s3.getObject(sourceBucket, networkId + ".zip"); + String networkZipKey = bucketFolder != null ? String.join("/", bucketFolder, networkId + ".zip") : networkId + ".zip"; + S3Object graphDataZipObject = s3.getObject(sourceBucket, networkZipKey); ZipInputStream zis = new ZipInputStream(graphDataZipObject.getObjectContent()); try { ZipEntry entry; @@ -329,13 +350,14 @@ private TransportNetwork buildNetworkFromBundleZip (String networkId) { * It contains the unique IDs of the GTFS feeds and OSM extract. */ private TransportNetwork buildNetworkFromManifest (String networkId) { - String manifestFileName = GTFSCache.cleanId(networkId) + ".json"; + String manifestFileName = getManifestFilename(networkId); File manifestFile = new File(cacheDir, manifestFileName); // TODO handle manifest not in S3 if (!manifestFile.exists() && sourceBucket != null) { LOG.info("Manifest file not found locally, downloading from S3"); - s3.getObject(new GetObjectRequest(sourceBucket, manifestFileName), manifestFile); + String manifestKey = getManifestKey(networkId); + s3.getObject(new GetObjectRequest(sourceBucket, manifestKey), manifestFile); } BundleManifest manifest; @@ -369,17 +391,77 @@ private TransportNetwork buildNetworkFromManifest (String networkId) { network.rebuildTransientIndexes(); - new TransferFinder(network).findTransfers(); + TransferFinder transferFinder = new TransferFinder(network); + transferFinder.findTransfers(); + transferFinder.findParkRideTransfer(); + + return network; + } + + private String getManifestKey(String networkId) { + String filename = getManifestFilename(networkId); + return bucketFolder != null ? String.join("/", bucketFolder, filename) : filename; + } + + private String getManifestFilename(String networkId) { + return GTFSCache.cleanId(networkId) + ".json"; + } + + private LoadingCache createCache(int size) { + RemovalListener removalListener = removalNotification -> { + String id = removalNotification.getKey(); + + // delete local files ONLY if using s3 + if (sourceBucket != null) { + String[] extensions = {".db", ".db.p", ".zip"}; + // delete local cache files (including zip) when feed removed from cache + for (String type : extensions) { + File file = new File(cacheDir, id + type); + file.delete(); + } + } + }; + return CacheBuilder.newBuilder() + .maximumSize(size) + .removalListener(removalListener) + .build(new CacheLoader() { + public TransportNetwork load(Object s) throws Exception { + // Thanks, java, for making me use a cast here. If I put generic arguments to new CacheLoader + // due to type erasure it can't be sure I'm using types correctly. + return loadNetwork((String) s); + } + }); + } + + /** + * Return the graph for the given unique identifier for graph builder inputs on S3. + * If this is the same as the last graph built, just return the pre-built graph. + * If not, build the graph from the inputs, fetching them from S3 to the local cache as needed. + */ + private TransportNetwork loadNetwork(String networkId) { + + LOG.info("Finding or building a TransportNetwork for ID {} and R5 version {}", networkId, R5Version.version); + + TransportNetwork network = checkCached(networkId); + if (network == null) { + LOG.info("Cached transport network for id {} and R5 version {} was not found. Building the network from scratch.", + networkId, R5Version.version); + network = buildNetwork(networkId); + } + cache.put(networkId, network); return network; } public Set getLoadedNetworkIds() { - if (currentNetwork == null) return Collections.emptySet(); - else return Sets.newHashSet(currentNetwork.scenarioId); + return cache.asMap().keySet(); } public Set getAppliedScenarios() { - return scenarioNetworkCache.keySet(); + return cache.asMap().values().stream() + .filter(network -> network.scenarios != null) + .map(network -> network.scenarios.keySet()) + .flatMap(Collection::stream) + .collect(Collectors.toSet()); } } diff --git a/src/main/java/com/conveyal/r5/transit/fare/FareTable.java b/src/main/java/com/conveyal/r5/transit/fare/FareTable.java index f60e52a24..b70d75a0f 100644 --- a/src/main/java/com/conveyal/r5/transit/fare/FareTable.java +++ b/src/main/java/com/conveyal/r5/transit/fare/FareTable.java @@ -22,7 +22,14 @@ public class FareTable { private Map, Fare> fares = Maps.newHashMap(); + private boolean ignoreAgencyId; + public FareTable (String name) { + this(name, false); + } + + public FareTable (String name, boolean ignoreAgencyId) { + this.ignoreAgencyId = ignoreAgencyId; InputStream is = FareTable.class.getClassLoader().getResourceAsStream(name); CsvReader reader = new CsvReader(is, ',', Charset.forName("UTF-8")); try { @@ -42,12 +49,17 @@ public FareTable (String name) { } public Fare lookup (String from, String to) { - return new Fare(fares.get(new P2(from, to))); // defensive copy, in case the caller discounts + return new Fare(fares.get(new P2<>(from, to))); // defensive copy, in case the caller discounts } public Fare lookup (Stop from, Stop to) { - //TODO: how does this works WRT different GTFS feeds having Stops with same Ids? - return lookup(from.stopId, to.stopId); + if (this.ignoreAgencyId) { + String fromWithoutFeedId = from.stopId.split(":", 2)[1]; + String toWithoutFeedId = to.stopId.split(":", 2)[1]; + return lookup(fromWithoutFeedId, toWithoutFeedId); + } else { + return lookup(from.stopId, to.stopId); + } } } diff --git a/src/main/java/com/conveyal/r5/util/P2.java b/src/main/java/com/conveyal/r5/util/P2.java index 1d353ab6a..3a737cace 100644 --- a/src/main/java/com/conveyal/r5/util/P2.java +++ b/src/main/java/com/conveyal/r5/util/P2.java @@ -6,18 +6,41 @@ /** * Tuple of two elements with same type */ -public class P2 extends Pair { +public class P2 { + public final E a; + + public final E b; + /** * Creates a new pair * - * @param key The key for this pair - * @param value The value to use for this pair + * @param b The key for this pair + * @param b The value to use for this pair */ public P2( - @NamedArg("key") - E key, - @NamedArg("value") - E value) { - super(key, value); + E a, + E b) { + this.a = a; + this.b = b; + } + + @Override + public String toString() { + return String.format("P2<%s %s>", a, b); + } + + @Override + public int hashCode() { + return (a != null ? a.hashCode() : 0) + + (b != null ? b.hashCode() * 31 : 0); + } + + @Override + public boolean equals(Object o) { + if(!(o instanceof P2)) return false; + P2 other = (P2) o; + boolean aIsEqual = (a == null) ? other.a == null : a.equals(other.a); + boolean bIsEqual = (b == null) ? other.b == null : b.equals(other.b); + return aIsEqual && bIsEqual; } } diff --git a/src/main/resources/worker.sh b/src/main/resources/worker.sh new file mode 100644 index 000000000..33f4314b5 --- /dev/null +++ b/src/main/resources/worker.sh @@ -0,0 +1,82 @@ +#!/bin/bash +# Downloads and runs an analyst worker. +# This shell script undergoes variable substitution via the Java MessageFormat class before being passed to newly +# started worker machines via the AWS EC2 user data. MessageFormat will replace special tokens (consisting of numbers +# inside curly braces) with configuration information specific to the worker being started. These are: +# 0: the URL to grab the worker JAR from +# 1: the AWS log group to use +# 2: the worker configuration to use +# If you are reading this comment inside the EC2 user data field, this variable substitution has already happened. +# The string instance_id in curly brackets is substituted by EC2 at startup, not by our Java code. It and any shell +# variable references that contain brackets are single-quoted to tell Java's MessageFormat not to substitute them. + +# prep the system: install log agent, java +yum -y install awslogs java-1.8.0-openjdk + +# first things first: set up logging +LOGFILE=/var/log/analyst-worker.log + +echo Starting analyst worker at `date` > $LOGFILE + +# make it so that the worker can write to the logfile +chown ec2-user:ec2-user $LOGFILE +chmod 664 $LOGFILE # Log agent needs to read log file + +# using a shell "herefile" or "heredoc", pipe the data between < /etc/awslogs/awslogs.conf < /etc/awslogs/awscli.conf < ~ec2-user/worker.conf <> $LOGFILE 2>&1 + +# Figure out how much memory to give the worker in kilobytes +TOTAL_MEM=`grep MemTotal /proc/meminfo | sed 's/[^0-9]//g'` +# 2097152 kb is 2GB, leave that much for the OS +MEM=`echo $TOTAL_MEM - 2097152 | bc` + +# Start the worker +# run in ec2-user's home directory, in the subshell +{ + cd ~ec2-user + sudo -u ec2-user java8 -Xmx$'{MEM}'k -jar r5.jar worker worker.conf >> $LOGFILE 2>&1 + + # If the worker exits or doesn't start, wait a few minutes so that the CloudWatch log agent grabs + # the logs + sleep 120 + halt -p +} &